1762 files changed, 114031 insertions, 97773 deletions
diff --git a/test/Analysis/BlockFrequencyInfo/bad_input.ll b/test/Analysis/BlockFrequencyInfo/bad_input.ll
new file mode 100644
index 0000000..bcdc1e6
--- /dev/null
+++ b/test/Analysis/BlockFrequencyInfo/bad_input.ll
@@ -0,0 +1,50 @@
+; RUN: opt < %s -analyze -block-freq | FileCheck %s
+
+declare void @g(i32 %x)
+
+; CHECK-LABEL: Printing analysis {{.*}} for function 'branch_weight_0':
+; CHECK-NEXT: block-frequency-info: branch_weight_0
+define void @branch_weight_0(i32 %a) {
+; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]]
+entry:
+  br label %for.body
+
+; Check that we get 1,4 instead of 0,3.
+; CHECK-NEXT: for.body: float = 4.0,
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  call void @g(i32 %i)
+  %inc = add i32 %i, 1
+  %cmp = icmp ugt i32 %inc, %a
+  br i1 %cmp, label %for.end, label %for.body, !prof !0
+
+; CHECK-NEXT: for.end: float = 1.0, int = [[ENTRY]]
+for.end:
+  ret void
+}
+
+!0 = metadata !{metadata !"branch_weights", i32 0, i32 3}
+
+; CHECK-LABEL: Printing analysis {{.*}} for function 'infinite_loop'
+; CHECK-NEXT: block-frequency-info: infinite_loop
+define void @infinite_loop(i1 %x) {
+; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]]
+entry:
+  br i1 %x, label %for.body, label %for.end, !prof !1
+
+; Check that the loop scale maxes out at 4096, giving 2048 here.
+; CHECK-NEXT: for.body: float = 2048.0,
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  call void @g(i32 %i)
+  %inc = add i32 %i, 1
+  br label %for.body
+
+; Check that the exit weight is half of entry, since half is lost in the
+; infinite loop above.
+; CHECK-NEXT: for.end: float = 0.5,
+for.end:
+  ret void
+}
+
+!1 = metadata !{metadata !"branch_weights", i32 1, i32 1}
diff --git a/test/Analysis/BlockFrequencyInfo/basic.ll b/test/Analysis/BlockFrequencyInfo/basic.ll
index ce29fb5..006e6ab 100644
--- a/test/Analysis/BlockFrequencyInfo/basic.ll
+++ b/test/Analysis/BlockFrequencyInfo/basic.ll
@@ -1,13 +1,14 @@
 ; RUN: opt < %s -analyze -block-freq | FileCheck %s
 
 define i32 @test1(i32 %i, i32* %a) {
-; CHECK: Printing analysis {{.*}} for function 'test1'
-; CHECK: entry = 1.0
+; CHECK-LABEL: Printing analysis {{.*}} for function 'test1':
+; CHECK-NEXT: block-frequency-info: test1
+; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]]
 entry:
   br label %body
 
 ; Loop backedges are weighted and thus their bodies have a greater frequency.
-; CHECK: body = 32.0
+; CHECK-NEXT: body: float = 32.0,
 body:
   %iv = phi i32 [ 0, %entry ], [ %next, %body ]
   %base = phi i32 [ 0, %entry ], [ %sum, %body ]
@@ -18,29 +19,29 @@ body:
   %exitcond = icmp eq i32 %next, %i
   br i1 %exitcond, label %exit, label %body
 
-; CHECK: exit = 1.0
+; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]]
 exit:
   ret i32 %sum
 }
 
 define i32 @test2(i32 %i, i32 %a, i32 %b) {
-; CHECK: Printing analysis {{.*}} for function 'test2'
-; CHECK: entry = 1.0
+; CHECK-LABEL: Printing analysis {{.*}} for function 'test2':
+; CHECK-NEXT: block-frequency-info: test2
+; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]]
 entry:
   %cond = icmp ult i32 %i, 42
   br i1 %cond, label %then, label %else, !prof !0
 
 ; The 'then' branch is predicted more likely via branch weight metadata.
-; CHECK: then = 0.94116
+; CHECK-NEXT: then: float = 0.9411{{[0-9]*}},
 then:
   br label %exit
 
-; CHECK: else = 0.05877
+; CHECK-NEXT: else: float = 0.05882{{[0-9]*}},
 else:
   br label %exit
 
-; FIXME: It may be a bug that we don't sum back to 1.0.
-; CHECK: exit = 0.99993
+; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]]
 exit:
   %result = phi i32 [ %a, %then ], [ %b, %else ]
   ret i32 %result
@@ -49,37 +50,37 @@ exit:
 !0 = metadata !{metadata !"branch_weights", i32 64, i32 4}
 
 define i32 @test3(i32 %i, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
-; CHECK: Printing analysis {{.*}} for function 'test3'
-; CHECK: entry = 1.0
+; CHECK-LABEL: Printing analysis {{.*}} for function 'test3':
+; CHECK-NEXT: block-frequency-info: test3
+; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]]
 entry:
   switch i32 %i, label %case_a [ i32 1, label %case_b
                                  i32 2, label %case_c
                                  i32 3, label %case_d
                                  i32 4, label %case_e ], !prof !1
 
-; CHECK: case_a = 0.04998
+; CHECK-NEXT: case_a: float = 0.05,
 case_a:
   br label %exit
 
-; CHECK: case_b = 0.04998
+; CHECK-NEXT: case_b: float = 0.05,
 case_b:
   br label %exit
 
 ; The 'case_c' branch is predicted more likely via branch weight metadata.
-; CHECK: case_c = 0.79998
+; CHECK-NEXT: case_c: float = 0.8,
 case_c:
   br label %exit
 
-; CHECK: case_d = 0.04998
+; CHECK-NEXT: case_d: float = 0.05,
 case_d:
   br label %exit
 
-; CHECK: case_e = 0.04998
+; CHECK-NEXT: case_e: float = 0.05,
 case_e:
   br label %exit
 
-; FIXME: It may be a bug that we don't sum back to 1.0.
-; CHECK: exit = 0.99993
+; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]]
 exit:
   %result = phi i32 [ %a, %case_a ],
                     [ %b, %case_b ],
@@ -91,44 +92,50 @@ exit:
 
 !1 = metadata !{metadata !"branch_weights", i32 4, i32 4, i32 64, i32 4, i32 4}
 
-; CHECK: Printing analysis {{.*}} for function 'nested_loops'
-; CHECK: entry = 1.0
-; This test doesn't seem to be assigning sensible frequencies to nested loops.
 define void @nested_loops(i32 %a) {
+; CHECK-LABEL: Printing analysis {{.*}} for function 'nested_loops':
+; CHECK-NEXT: block-frequency-info: nested_loops
+; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]]
 entry:
   br label %for.cond1.preheader
 
+; CHECK-NEXT: for.cond1.preheader: float = 4001.0,
 for.cond1.preheader:
   %x.024 = phi i32 [ 0, %entry ], [ %inc12, %for.inc11 ]
   br label %for.cond4.preheader
 
+; CHECK-NEXT: for.cond4.preheader: float = 16008001.0,
 for.cond4.preheader:
   %y.023 = phi i32 [ 0, %for.cond1.preheader ], [ %inc9, %for.inc8 ]
   %add = add i32 %y.023, %x.024
   br label %for.body6
 
+; CHECK-NEXT: for.body6: float = 64048012001.0,
 for.body6:
   %z.022 = phi i32 [ 0, %for.cond4.preheader ], [ %inc, %for.body6 ]
   %add7 = add i32 %add, %z.022
-  tail call void @g(i32 %add7) #2
+  tail call void @g(i32 %add7)
   %inc = add i32 %z.022, 1
   %cmp5 = icmp ugt i32 %inc, %a
   br i1 %cmp5, label %for.inc8, label %for.body6, !prof !2
 
+; CHECK-NEXT: for.inc8: float = 16008001.0,
 for.inc8:
   %inc9 = add i32 %y.023, 1
   %cmp2 = icmp ugt i32 %inc9, %a
   br i1 %cmp2, label %for.inc11, label %for.cond4.preheader, !prof !2
 
+; CHECK-NEXT: for.inc11: float = 4001.0,
 for.inc11:
   %inc12 = add i32 %x.024, 1
   %cmp = icmp ugt i32 %inc12, %a
   br i1 %cmp, label %for.end13, label %for.cond1.preheader, !prof !2
 
+; CHECK-NEXT: for.end13: float = 1.0, int = [[ENTRY]]
 for.end13:
   ret void
 }
 
-declare void @g(i32) #1
+declare void @g(i32)
 
 !2 = metadata !{metadata !"branch_weights", i32 1, i32 4000}
diff --git a/test/Analysis/BlockFrequencyInfo/double_backedge.ll b/test/Analysis/BlockFrequencyInfo/double_backedge.ll
new file mode 100644
index 0000000..df8217c
--- /dev/null
+++ b/test/Analysis/BlockFrequencyInfo/double_backedge.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -analyze -block-freq | FileCheck %s
+
+define void @double_backedge(i1 %x) {
+; CHECK-LABEL: Printing analysis {{.*}} for function 'double_backedge':
+; CHECK-NEXT: block-frequency-info: double_backedge
+entry:
+; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]]
+  br label %loop
+
+loop:
+; CHECK-NEXT: loop: float = 10.0,
+  br i1 %x, label %exit, label %loop.1, !prof !0
+
+loop.1:
+; CHECK-NEXT: loop.1: float = 9.0,
+  br i1 %x, label %loop, label %loop.2, !prof !1
+
+loop.2:
+; CHECK-NEXT: loop.2: float = 5.0,
+  br label %loop
+
+exit:
+; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]]
+  ret void
+}
+!0 = metadata !{metadata !"branch_weights", i32 1, i32 9}
+!1 = metadata !{metadata !"branch_weights", i32 4, i32 5}
diff --git a/test/Analysis/BlockFrequencyInfo/double_exit.ll b/test/Analysis/BlockFrequencyInfo/double_exit.ll
new file mode 100644
index 0000000..75f664d
--- /dev/null
+++ b/test/Analysis/BlockFrequencyInfo/double_exit.ll
@@ -0,0 +1,165 @@
+; RUN: opt < %s -analyze -block-freq | FileCheck %s
+
+; CHECK-LABEL: Printing analysis {{.*}} for function 'double_exit':
+; CHECK-NEXT: block-frequency-info: double_exit
+define i32 @double_exit(i32 %N) {
+; Mass = 1
+; Frequency = 1
+; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]]
+entry:
+  br label %outer
+
+; Mass = 1
+; Backedge mass = 1/3, exit mass = 2/3
+; Loop scale = 3/2
+; Pseudo-edges = exit
+; Pseudo-mass = 1
+; Frequency = 1*3/2*1 = 3/2
+; CHECK-NEXT: outer: float = 1.5,
+outer:
+  %I.0 = phi i32 [ 0, %entry ], [ %inc6, %outer.inc ]
+  %Return.0 = phi i32 [ 0, %entry ], [ %Return.1, %outer.inc ]
+  %cmp = icmp slt i32 %I.0, %N
+  br i1 %cmp, label %inner, label %exit, !prof !2 ; 2:1
+
+; Mass = 1
+; Backedge mass = 3/5, exit mass = 2/5
+; Loop scale = 5/2
+; Pseudo-edges = outer.inc @ 1/5, exit @ 1/5
+; Pseudo-mass = 2/3
+; Frequency = 3/2*1*5/2*2/3 = 5/2
+; CHECK-NEXT: inner: float = 2.5,
+inner:
+  %Return.1 = phi i32 [ %Return.0, %outer ], [ %call4, %inner.inc ]
+  %J.0 = phi i32 [ %I.0, %outer ], [ %inc, %inner.inc ]
+  %cmp2 = icmp slt i32 %J.0, %N
+  br i1 %cmp2, label %inner.body, label %outer.inc, !prof !1 ; 4:1
+
+; Mass = 4/5
+; Frequency = 5/2*4/5 = 2
+; CHECK-NEXT: inner.body: float = 2.0,
+inner.body:
+  %call = call i32 @c2(i32 %I.0, i32 %J.0)
+  %tobool = icmp ne i32 %call, 0
+  br i1 %tobool, label %exit, label %inner.inc, !prof !0 ; 3:1
+
+; Mass = 3/5
+; Frequency = 5/2*3/5 = 3/2
+; CHECK-NEXT: inner.inc: float = 1.5,
+inner.inc:
+  %call4 = call i32 @logic2(i32 %Return.1, i32 %I.0, i32 %J.0)
+  %inc = add nsw i32 %J.0, 1
+  br label %inner
+
+; Mass = 1/3
+; Frequency = 3/2*1/3 = 1/2
+; CHECK-NEXT: outer.inc: float = 0.5,
+outer.inc:
+  %inc6 = add nsw i32 %I.0, 1
+  br label %outer
+
+; Mass = 1
+; Frequency = 1
+; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]]
+exit:
+  %Return.2 = phi i32 [ %Return.1, %inner.body ], [ %Return.0, %outer ]
+  ret i32 %Return.2
+}
+
+!0 = metadata !{metadata !"branch_weights", i32 1, i32 3}
+!1 = metadata !{metadata !"branch_weights", i32 4, i32 1}
+!2 = metadata !{metadata !"branch_weights", i32 2, i32 1}
+
+declare i32 @c2(i32, i32)
+declare i32 @logic2(i32, i32, i32)
+
+; CHECK-LABEL: Printing analysis {{.*}} for function 'double_exit_in_loop':
+; CHECK-NEXT: block-frequency-info: double_exit_in_loop
+define i32 @double_exit_in_loop(i32 %N) {
+; Mass = 1
+; Frequency = 1
+; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]]
+entry:
+  br label %outer
+
+; Mass = 1
+; Backedge mass = 1/2, exit mass = 1/2
+; Loop scale = 2
+; Pseudo-edges = exit
+; Pseudo-mass = 1
+; Frequency = 1*2*1 = 2
+; CHECK-NEXT: outer: float = 2.0,
+outer:
+  %I.0 = phi i32 [ 0, %entry ], [ %inc12, %outer.inc ]
+  %Return.0 = phi i32 [ 0, %entry ], [ %Return.3, %outer.inc ]
+  %cmp = icmp slt i32 %I.0, %N
+  br i1 %cmp, label %middle, label %exit, !prof !3 ; 1:1
+
+; Mass = 1
+; Backedge mass = 1/3, exit mass = 2/3
+; Loop scale = 3/2
+; Pseudo-edges = outer.inc
+; Pseudo-mass = 1/2
+; Frequency = 2*1*3/2*1/2 = 3/2
+; CHECK-NEXT: middle: float = 1.5,
+middle:
+  %J.0 = phi i32 [ %I.0, %outer ], [ %inc9, %middle.inc ]
+  %Return.1 = phi i32 [ %Return.0, %outer ], [ %Return.2, %middle.inc ]
+  %cmp2 = icmp slt i32 %J.0, %N
+  br i1 %cmp2, label %inner, label %outer.inc, !prof !2 ; 2:1
+
+; Mass = 1
+; Backedge mass = 3/5, exit mass = 2/5
+; Loop scale = 5/2
+; Pseudo-edges = middle.inc @ 1/5, outer.inc @ 1/5
+; Pseudo-mass = 2/3
+; Frequency = 3/2*1*5/2*2/3 = 5/2
+; CHECK-NEXT: inner: float = 2.5,
+inner:
+  %Return.2 = phi i32 [ %Return.1, %middle ], [ %call7, %inner.inc ]
+  %K.0 = phi i32 [ %J.0, %middle ], [ %inc, %inner.inc ]
+  %cmp5 = icmp slt i32 %K.0, %N
+  br i1 %cmp5, label %inner.body, label %middle.inc, !prof !1 ; 4:1
+
+; Mass = 4/5
+; Frequency = 5/2*4/5 = 2
+; CHECK-NEXT: inner.body: float = 2.0,
+inner.body:
+  %call = call i32 @c3(i32 %I.0, i32 %J.0, i32 %K.0)
+  %tobool = icmp ne i32 %call, 0
+  br i1 %tobool, label %outer.inc, label %inner.inc, !prof !0 ; 3:1
+
+; Mass = 3/5
+; Frequency = 5/2*3/5 = 3/2
+; CHECK-NEXT: inner.inc: float = 1.5,
+inner.inc:
+  %call7 = call i32 @logic3(i32 %Return.2, i32 %I.0, i32 %J.0, i32 %K.0)
+  %inc = add nsw i32 %K.0, 1
+  br label %inner
+
+; Mass = 1/3
+; Frequency = 3/2*1/3 = 1/2
+; CHECK-NEXT: middle.inc: float = 0.5,
+middle.inc:
+  %inc9 = add nsw i32 %J.0, 1
+  br label %middle
+
+; Mass = 1/2
+; Frequency = 2*1/2 = 1
+; CHECK-NEXT: outer.inc: float = 1.0,
+outer.inc:
+  %Return.3 = phi i32 [ %Return.2, %inner.body ], [ %Return.1, %middle ]
+  %inc12 = add nsw i32 %I.0, 1
+  br label %outer
+
+; Mass = 1
+; Frequency = 1
+; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]]
+exit:
+  ret i32 %Return.0
+}
+
+!3 = metadata !{metadata !"branch_weights", i32 1, i32 1}
+
+declare i32 @c3(i32, i32, i32)
+declare i32 @logic3(i32, i32, i32, i32)
diff --git a/test/Analysis/BlockFrequencyInfo/irreducible.ll b/test/Analysis/BlockFrequencyInfo/irreducible.ll
new file mode 100644
index 0000000..af4ad15
--- /dev/null
+++ b/test/Analysis/BlockFrequencyInfo/irreducible.ll
@@ -0,0 +1,421 @@
+; RUN: opt < %s -analyze -block-freq | FileCheck %s
+
+; A loop with multiple exits isn't irreducible.  It should be handled
+; correctly.
+;
+; CHECK-LABEL: Printing analysis {{.*}} for function 'multiexit':
+; CHECK-NEXT: block-frequency-info: multiexit
+define void @multiexit(i1 %x) {
+; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]]
+entry:
+  br label %loop.1
+
+; CHECK-NEXT: loop.1: float = 2.0,
+loop.1:
+  br i1 %x, label %exit.1, label %loop.2, !prof !0
+
+; CHECK-NEXT: loop.2: float = 1.75,
+loop.2:
+  br i1 %x, label %exit.2, label %loop.1, !prof !1
+
+; CHECK-NEXT: exit.1: float = 0.25,
+exit.1:
+  br label %return
+
+; CHECK-NEXT: exit.2: float = 0.75,
+exit.2:
+  br label %return
+
+; CHECK-NEXT: return: float = 1.0, int = [[ENTRY]]
+return:
+  ret void
+}
+
+!0 = metadata !{metadata !"branch_weights", i32 1, i32 7}
+!1 = metadata !{metadata !"branch_weights", i32 3, i32 4}
+
+; Irreducible control flow
+; ========================
+;
+; LoopInfo defines a loop as a non-trivial SCC dominated by a single block,
+; called the header.  A given loop, L, can have sub-loops, which are loops
+; within the subgraph of L that excludes the header.
+;
+; In addition to loops, -block-freq has limited support for irreducible SCCs,
+; which are SCCs with multiple entry blocks.  Irreducible SCCs are discovered
+; on they fly, and modelled as loops with multiple headers.
+;
+; The headers of irreducible sub-SCCs consist of its entry blocks and all nodes
+; that are targets of a backedge within it (excluding backedges within true
+; sub-loops).
+;
+; -block-freq is currently designed to act like a block is inserted that
+; intercepts all the edges to the headers.  All backedges and entries point to
+; this block.  Its successors are the headers, which split the frequency
+; evenly.
+;
+; There are a number of testcases below.  Only the first two have detailed
+; explanations.
+;
+; Testcase #1
+; ===========
+;
+; In this case c1 and c2 should have frequencies of 15/7 and 13/7,
+; respectively.  To calculate this, consider assigning 1.0 to entry, and
+; distributing frequency iteratively (to infinity).  At the first iteration,
+; entry gives 3/4 to c1 and 1/4 to c2.  At every step after, c1 and c2 give 3/4
+; of what they have to each other.  Somehow, all of it comes out to exit.
+;
+;       c1 = 3/4 + 1/4*3/4 + 3/4*3^2/4^2 + 1/4*3^3/4^3 + 3/4*3^3/4^3 + ...
+;       c2 = 1/4 + 3/4*3/4 + 1/4*3^2/4^2 + 3/4*3^3/4^3 + 1/4*3^3/4^3 + ...
+;
+; Simplify by splitting up the odd and even terms of the series and taking out
+; factors so that the infite series matches:
+;
+;       c1 =  3/4 *(9^0/16^0 + 9^1/16^1 + 9^2/16^2 + ...)
+;          +  3/16*(9^0/16^0 + 9^1/16^1 + 9^2/16^2 + ...)
+;       c2 =  1/4 *(9^0/16^0 + 9^1/16^1 + 9^2/16^2 + ...)
+;          +  9/16*(9^0/16^0 + 9^1/16^1 + 9^2/16^2 + ...)
+;
+;       c1 = 15/16*(9^0/16^0 + 9^1/16^1 + 9^2/16^2 + ...)
+;       c2 = 13/16*(9^0/16^0 + 9^1/16^1 + 9^2/16^2 + ...)
+;
+; Since this geometric series sums to 16/7:
+;
+;       c1 = 15/7
+;       c2 = 13/7
+;
+; If we treat c1 and c2 as members of the same loop, the exit frequency of the
+; loop as a whole is 1/4, so the loop scale should be 4.  Summing c1 and c2
+; gives 28/7, or 4.0, which is nice confirmation of the math above.
+;
+; -block-freq currently treats the two nodes as equals.
+define void @multientry(i1 %x) {
+; CHECK-LABEL: Printing analysis {{.*}} for function 'multientry':
+; CHECK-NEXT: block-frequency-info: multientry
+entry:
+; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]]
+  br i1 %x, label %c1, label %c2, !prof !2
+
+c1:
+; CHECK-NEXT: c1: float = 2.0,
+; The "correct" answer is: float = 2.142857{{[0-9]*}},
+  br i1 %x, label %c2, label %exit, !prof !2
+
+c2:
+; CHECK-NEXT: c2: float = 2.0,
+; The "correct" answer is: float = 1.857142{{[0-9]*}},
+  br i1 %x, label %c1, label %exit, !prof !2
+
+exit:
+; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]]
+  ret void
+}
+
+!2 = metadata !{metadata !"branch_weights", i32 3, i32 1}
+
+; Testcase #2
+; ===========
+;
+; In this case c1 and c2 should be treated as equals in a single loop.  The
+; exit frequency is 1/3, so the scaling factor for the loop should be 3.0.  The
+; loop is entered 2/3 of the time, and c1 and c2 split the total loop frequency
+; evenly (1/2), so they should each have frequencies of 1.0 (3.0*2/3*1/2).
+; Another way of computing this result is by assigning 1.0 to entry and showing
+; that c1 and c2 should accumulate frequencies of:
+;
+;       1/3   +   2/9   +   4/27  +   8/81  + ...
+;     2^0/3^1 + 2^1/3^2 + 2^2/3^3 + 2^3/3^4 + ...
+;
+; At the first step, c1 and c2 each get 1/3 of the entry.  At each subsequent
+; step, c1 and c2 each get 1/3 of what's left in c1 and c2 combined.  This
+; infinite series sums to 1.
+;
+; Since the currently algorithm *always* assumes entry blocks are equal,
+; -block-freq gets the right answers here.
+define void @crossloops(i2 %x) {
+; CHECK-LABEL: Printing analysis {{.*}} for function 'crossloops':
+; CHECK-NEXT: block-frequency-info: crossloops
+entry:
+; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]]
+  switch i2 %x, label %exit [ i2 1, label %c1
+                              i2 2, label %c2 ], !prof !3
+
+c1:
+; CHECK-NEXT: c1: float = 1.0,
+  switch i2 %x, label %exit [ i2 1, label %c1
+                              i2 2, label %c2 ], !prof !3
+
+c2:
+; CHECK-NEXT: c2: float = 1.0,
+  switch i2 %x, label %exit [ i2 1, label %c1
+                              i2 2, label %c2 ], !prof !3
+
+exit:
+; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]]
+  ret void
+}
+
+!3 = metadata !{metadata !"branch_weights", i32 2, i32 2, i32 2}
+
+; A true loop with irreducible control flow inside.
+define void @loop_around_irreducible(i1 %x) {
+; CHECK-LABEL: Printing analysis {{.*}} for function 'loop_around_irreducible':
+; CHECK-NEXT: block-frequency-info: loop_around_irreducible
+entry:
+; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]]
+  br label %loop
+
+loop:
+; CHECK-NEXT: loop: float = 4.0, int = [[HEAD:[0-9]+]]
+  br i1 %x, label %left, label %right, !prof !4
+
+left:
+; CHECK-NEXT: left: float = 8.0,
+  br i1 %x, label %right, label %loop.end, !prof !5
+
+right:
+; CHECK-NEXT: right: float = 8.0,
+  br i1 %x, label %left, label %loop.end, !prof !5
+
+loop.end:
+; CHECK-NEXT: loop.end: float = 4.0, int = [[HEAD]]
+  br i1 %x, label %loop, label %exit, !prof !5
+
+exit:
+; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]]
+  ret void
+}
+!4 = metadata !{metadata !"branch_weights", i32 1, i32 1}
+!5 = metadata !{metadata !"branch_weights", i32 3, i32 1}
+
+; Two unrelated irreducible SCCs.
+define void @two_sccs(i1 %x) {
+; CHECK-LABEL: Printing analysis {{.*}} for function 'two_sccs':
+; CHECK-NEXT: block-frequency-info: two_sccs
+entry:
+; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]]
+  br i1 %x, label %a, label %b, !prof !6
+
+a:
+; CHECK-NEXT: a: float = 0.75,
+  br i1 %x, label %a.left, label %a.right, !prof !7
+
+a.left:
+; CHECK-NEXT: a.left: float = 1.5,
+  br i1 %x, label %a.right, label %exit, !prof !6
+
+a.right:
+; CHECK-NEXT: a.right: float = 1.5,
+  br i1 %x, label %a.left, label %exit, !prof !6
+
+b:
+; CHECK-NEXT: b: float = 0.25,
+  br i1 %x, label %b.left, label %b.right, !prof !7
+
+b.left:
+; CHECK-NEXT: b.left: float = 0.625,
+  br i1 %x, label %b.right, label %exit, !prof !8
+
+b.right:
+; CHECK-NEXT: b.right: float = 0.625,
+  br i1 %x, label %b.left, label %exit, !prof !8
+
+exit:
+; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]]
+  ret void
+}
+!6 = metadata !{metadata !"branch_weights", i32 3, i32 1}
+!7 = metadata !{metadata !"branch_weights", i32 1, i32 1}
+!8 = metadata !{metadata !"branch_weights", i32 4, i32 1}
+
+; A true loop inside irreducible control flow.
+define void @loop_inside_irreducible(i1 %x) {
+; CHECK-LABEL: Printing analysis {{.*}} for function 'loop_inside_irreducible':
+; CHECK-NEXT: block-frequency-info: loop_inside_irreducible
+entry:
+; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]]
+  br i1 %x, label %left, label %right, !prof !9
+
+left:
+; CHECK-NEXT: left: float = 2.0,
+  br i1 %x, label %right, label %exit, !prof !10
+
+right:
+; CHECK-NEXT: right: float = 2.0, int = [[RIGHT:[0-9]+]]
+  br label %loop
+
+loop:
+; CHECK-NEXT: loop: float = 6.0,
+  br i1 %x, label %loop, label %right.end, !prof !11
+
+right.end:
+; CHECK-NEXT: right.end: float = 2.0, int = [[RIGHT]]
+  br i1 %x, label %left, label %exit, !prof !10
+
+exit:
+; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]]
+  ret void
+}
+!9 = metadata !{metadata !"branch_weights", i32 1, i32 1}
+!10 = metadata !{metadata !"branch_weights", i32 3, i32 1}
+!11 = metadata !{metadata !"branch_weights", i32 2, i32 1}
+
+; Irreducible control flow in a branch that's in a true loop.
+define void @loop_around_branch_with_irreducible(i1 %x) {
+; CHECK-LABEL: Printing analysis {{.*}} for function 'loop_around_branch_with_irreducible':
+; CHECK-NEXT: block-frequency-info: loop_around_branch_with_irreducible
+entry:
+; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]]
+  br label %loop
+
+loop:
+; CHECK-NEXT: loop: float = 2.0, int = [[LOOP:[0-9]+]]
+  br i1 %x, label %normal, label %irreducible.entry, !prof !12
+
+normal:
+; CHECK-NEXT: normal: float = 1.5,
+  br label %loop.end
+
+irreducible.entry:
+; CHECK-NEXT: irreducible.entry: float = 0.5, int = [[IRREDUCIBLE:[0-9]+]]
+  br i1 %x, label %left, label %right, !prof !13
+
+left:
+; CHECK-NEXT: left: float = 1.0,
+  br i1 %x, label %right, label %irreducible.exit, !prof !12
+
+right:
+; CHECK-NEXT: right: float = 1.0,
+  br i1 %x, label %left, label %irreducible.exit, !prof !12
+
+irreducible.exit:
+; CHECK-NEXT: irreducible.exit: float = 0.5, int = [[IRREDUCIBLE]]
+  br label %loop.end
+
+loop.end:
+; CHECK-NEXT: loop.end: float = 2.0, int = [[LOOP]]
+  br i1 %x, label %loop, label %exit, !prof !13
+
+exit:
+; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]]
+  ret void
+}
+!12 = metadata !{metadata !"branch_weights", i32 3, i32 1}
+!13 = metadata !{metadata !"branch_weights", i32 1, i32 1}
+
+; Irreducible control flow between two true loops.
+define void @loop_around_branch_with_irreducible_around_loop(i1 %x) {
+; CHECK-LABEL: Printing analysis {{.*}} for function 'loop_around_branch_with_irreducible_around_loop':
+; CHECK-NEXT: block-frequency-info: loop_around_branch_with_irreducible_around_loop
+entry:
+; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]]
+  br label %loop
+
+loop:
+; CHECK-NEXT: loop: float = 3.0, int = [[LOOP:[0-9]+]]
+  br i1 %x, label %normal, label %irreducible, !prof !14
+
+normal:
+; CHECK-NEXT: normal: float = 2.0,
+  br label %loop.end
+
+irreducible:
+; CHECK-NEXT: irreducible: float = 1.0,
+  br i1 %x, label %left, label %right, !prof !15
+
+left:
+; CHECK-NEXT: left: float = 2.0,
+  br i1 %x, label %right, label %loop.end, !prof !16
+
+right:
+; CHECK-NEXT: right: float = 2.0, int = [[RIGHT:[0-9]+]]
+  br label %right.loop
+
+right.loop:
+; CHECK-NEXT: right.loop: float = 10.0,
+  br i1 %x, label %right.loop, label %right.end, !prof !17
+
+right.end:
+; CHECK-NEXT: right.end: float = 2.0, int = [[RIGHT]]
+  br i1 %x, label %left, label %loop.end, !prof !16
+
+loop.end:
+; CHECK-NEXT: loop.end: float = 3.0, int = [[LOOP]]
+  br i1 %x, label %loop, label %exit, !prof !14
+
+exit:
+; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]]
+  ret void
+}
+!14 = metadata !{metadata !"branch_weights", i32 2, i32 1}
+!15 = metadata !{metadata !"branch_weights", i32 1, i32 1}
+!16 = metadata !{metadata !"branch_weights", i32 3, i32 1}
+!17 = metadata !{metadata !"branch_weights", i32 4, i32 1}
+
+; An irreducible SCC with a non-header.
+define void @nonheader(i1 %x) {
+; CHECK-LABEL: Printing analysis {{.*}} for function 'nonheader':
+; CHECK-NEXT: block-frequency-info: nonheader
+entry:
+; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]]
+  br i1 %x, label %left, label %right, !prof !18
+
+left:
+; CHECK-NEXT: left: float = 1.0,
+  br i1 %x, label %bottom, label %exit, !prof !19
+
+right:
+; CHECK-NEXT: right: float = 1.0,
+  br i1 %x, label %bottom, label %exit, !prof !20
+
+bottom:
+; CHECK-NEXT: bottom: float = 1.0,
+  br i1 %x, label %left, label %right, !prof !18
+
+exit:
+; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]]
+  ret void
+}
+!18 = metadata !{metadata !"branch_weights", i32 1, i32 1}
+!19 = metadata !{metadata !"branch_weights", i32 1, i32 3}
+!20 = metadata !{metadata !"branch_weights", i32 3, i32 1}
+
+; An irreducible SCC with an irreducible sub-SCC.  In the current version of
+; -block-freq, this means an extra header.
+;
+; This testcases uses non-trivial branch weights.  The CHECK statements here
+; will start to fail if we change -block-freq to be more accurate.  Currently,
+; we expect left, right and top to be treated as equal headers.
+define void @nonentry_header(i1 %x, i2 %y) {
+; CHECK-LABEL: Printing analysis {{.*}} for function 'nonentry_header':
+; CHECK-NEXT: block-frequency-info: nonentry_header
+entry:
+; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]]
+  br i1 %x, label %left, label %right, !prof !21
+
+left:
+; CHECK-NEXT: left: float = 3.0,
+  br i1 %x, label %top, label %bottom, !prof !22
+
+right:
+; CHECK-NEXT: right: float = 3.0,
+  br i1 %x, label %top, label %bottom, !prof !22
+
+top:
+; CHECK-NEXT: top: float = 3.0,
+  switch i2 %y, label %exit [ i2 0, label %left
+                              i2 1, label %right
+                              i2 2, label %bottom ], !prof !23
+
+bottom:
+; CHECK-NEXT: bottom: float = 4.5,
+  br label %top
+
+exit:
+; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]]
+  ret void
+}
+!21 = metadata !{metadata !"branch_weights", i32 2, i32 1}
+!22 = metadata !{metadata !"branch_weights", i32 1, i32 1}
+!23 = metadata !{metadata !"branch_weights", i32 8, i32 1, i32 3, i32 12}
diff --git a/test/Analysis/BlockFrequencyInfo/loop_with_branch.ll b/test/Analysis/BlockFrequencyInfo/loop_with_branch.ll
new file mode 100644
index 0000000..9d27b6b
--- /dev/null
+++ b/test/Analysis/BlockFrequencyInfo/loop_with_branch.ll
@@ -0,0 +1,44 @@
+; RUN: opt < %s -analyze -block-freq | FileCheck %s
+
+; CHECK-LABEL: Printing analysis {{.*}} for function 'loop_with_branch':
+; CHECK-NEXT: block-frequency-info: loop_with_branch
+define void @loop_with_branch(i32 %a) {
+; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]]
+entry:
+  %skip_loop = call i1 @foo0(i32 %a)
+  br i1 %skip_loop, label %skip, label %header, !prof !0
+
+; CHECK-NEXT: skip: float = 0.25,
+skip:
+  br label %exit
+
+; CHECK-NEXT: header: float = 4.5,
+header:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %back ]
+  %i.next = add i32 %i, 1
+  %choose = call i2 @foo1(i32 %i)
+  switch i2 %choose, label %exit [ i2 0, label %left
+                                   i2 1, label %right ], !prof !1
+
+; CHECK-NEXT: left: float = 1.5,
+left:
+  br label %back
+
+; CHECK-NEXT: right: float = 2.25,
+right:
+  br label %back
+
+; CHECK-NEXT: back: float = 3.75,
+back:
+  br label %header
+
+; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]]
+exit:
+  ret void
+}
+
+declare i1 @foo0(i32)
+declare i2 @foo1(i32)
+
+!0 = metadata !{metadata !"branch_weights", i32 1, i32 3}
+!1 = metadata !{metadata !"branch_weights", i32 1, i32 2, i32 3}
diff --git a/test/Analysis/BlockFrequencyInfo/nested_loop_with_branches.ll b/test/Analysis/BlockFrequencyInfo/nested_loop_with_branches.ll
new file mode 100644
index 0000000..d93ffce
--- /dev/null
+++ b/test/Analysis/BlockFrequencyInfo/nested_loop_with_branches.ll
@@ -0,0 +1,59 @@
+; RUN: opt < %s -analyze -block-freq | FileCheck %s
+
+; CHECK-LABEL: Printing analysis {{.*}} for function 'nested_loop_with_branches'
+; CHECK-NEXT: block-frequency-info: nested_loop_with_branches
+define void @nested_loop_with_branches(i32 %a) {
+; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]]
+entry:
+  %v0 = call i1 @foo0(i32 %a)
+  br i1 %v0, label %exit, label %outer, !prof !0
+
+; CHECK-NEXT: outer: float = 12.0,
+outer:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %inner.end ], [ %i.next, %no_inner ]
+  %i.next = add i32 %i, 1
+  %do_inner = call i1 @foo1(i32 %i)
+  br i1 %do_inner, label %no_inner, label %inner, !prof !0
+
+; CHECK-NEXT: inner: float = 36.0,
+inner:
+  %j = phi i32 [ 0, %outer ], [ %j.next, %inner.end ]
+  %side = call i1 @foo3(i32 %j)
+  br i1 %side, label %left, label %right, !prof !0
+
+; CHECK-NEXT: left: float = 9.0,
+left:
+  %v4 = call i1 @foo4(i32 %j)
+  br label %inner.end
+
+; CHECK-NEXT: right: float = 27.0,
+right:
+  %v5 = call i1 @foo5(i32 %j)
+  br label %inner.end
+
+; CHECK-NEXT: inner.end: float = 36.0,
+inner.end:
+  %stay_inner = phi i1 [ %v4, %left ], [ %v5, %right ]
+  %j.next = add i32 %j, 1
+  br i1 %stay_inner, label %inner, label %outer, !prof !1
+
+; CHECK-NEXT: no_inner: float = 3.0,
+no_inner:
+  %continue = call i1 @foo6(i32 %i)
+  br i1 %continue, label %outer, label %exit, !prof !1
+
+; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]]
+exit:
+  ret void
+}
+
+declare i1 @foo0(i32)
+declare i1 @foo1(i32)
+declare i1 @foo2(i32)
+declare i1 @foo3(i32)
+declare i1 @foo4(i32)
+declare i1 @foo5(i32)
+declare i1 @foo6(i32)
+
+!0 = metadata !{metadata !"branch_weights", i32 1, i32 3}
+!1 = metadata !{metadata !"branch_weights", i32 3, i32 1}
diff --git a/test/Analysis/BranchProbabilityInfo/loop.ll b/test/Analysis/BranchProbabilityInfo/loop.ll
index b648cbb..40f1111 100644
--- a/test/Analysis/BranchProbabilityInfo/loop.ll
+++ b/test/Analysis/BranchProbabilityInfo/loop.ll
@@ -15,7 +15,7 @@ do.body:
   %i.0 = phi i32 [ 0, %entry ], [ %inc3, %do.end ]
   call void @g1()
   br label %do.body1
-; CHECK: edge do.body -> do.body1 probability is 124 / 124 = 100%
+; CHECK: edge do.body -> do.body1 probability is 16 / 16 = 100%
 
 do.body1:
   %j.0 = phi i32 [ 0, %do.body ], [ %inc, %do.body1 ]
@@ -55,8 +55,8 @@ for.body:
   %i.010 = phi i32 [ 0, %for.body.lr.ph ], [ %inc5, %for.end ]
   call void @g1()
   br i1 %cmp27, label %for.body3, label %for.end
-; CHECK: edge for.body -> for.body3 probability is 62 / 124 = 50%
-; CHECK: edge for.body -> for.end probability is 62 / 124 = 50%
+; CHECK: edge for.body -> for.body3 probability is 20 / 32 = 62.5%
+; CHECK: edge for.body -> for.end probability is 12 / 32 = 37.5%
 
 for.body3:
   %j.08 = phi i32 [ %inc, %for.body3 ], [ 0, %for.body ]
@@ -91,8 +91,8 @@ do.body:
   %0 = load i32* %c, align 4
   %cmp = icmp slt i32 %0, 42
   br i1 %cmp, label %do.body1, label %if.end
-; CHECK: edge do.body -> do.body1 probability is 62 / 124 = 50%
-; CHECK: edge do.body -> if.end probability is 62 / 124 = 50%
+; CHECK: edge do.body -> do.body1 probability is 16 / 32 = 50%
+; CHECK: edge do.body -> if.end probability is 16 / 32 = 50%
 
 do.body1:
   %j.0 = phi i32 [ %inc, %do.body1 ], [ 0, %do.body ]
@@ -165,7 +165,7 @@ do.body:
   %i.0 = phi i32 [ 0, %entry ], [ %inc4, %do.end ]
   call void @g1()
   br label %do.body1
-; CHECK: edge do.body -> do.body1 probability is 124 / 124 = 100%
+; CHECK: edge do.body -> do.body1 probability is 16 / 16 = 100%
 
 do.body1:
   %j.0 = phi i32 [ 0, %do.body ], [ %inc, %if.end ]
@@ -209,7 +209,7 @@ do.body:
   %i.0 = phi i32 [ 0, %entry ], [ %inc4, %do.end ]
   call void @g1()
   br label %do.body1
-; CHECK: edge do.body -> do.body1 probability is 124 / 124 = 100%
+; CHECK: edge do.body -> do.body1 probability is 16 / 16 = 100%
 
 do.body1:
   %j.0 = phi i32 [ 0, %do.body ], [ %inc, %do.cond ]
@@ -261,14 +261,14 @@ for.body:
   %0 = load i32* %c, align 4
   %cmp1 = icmp eq i32 %0, %i.011
   br i1 %cmp1, label %for.inc5, label %if.end
-; CHECK: edge for.body -> for.inc5 probability is 62 / 124 = 50%
-; CHECK: edge for.body -> if.end probability is 62 / 124 = 50%
+; CHECK: edge for.body -> for.inc5 probability is 16 / 32 = 50%
+; CHECK: edge for.body -> if.end probability is 16 / 32 = 50%
 
 if.end:
   call void @g1()
   br i1 %cmp38, label %for.body4, label %for.end
-; CHECK: edge if.end -> for.body4 probability is 62 / 124 = 50%
-; CHECK: edge if.end -> for.end probability is 62 / 124 = 50%
+; CHECK: edge if.end -> for.body4 probability is 20 / 32 = 62.5%
+; CHECK: edge if.end -> for.end probability is 12 / 32 = 37.5%
 
 for.body4:
   %j.09 = phi i32 [ %inc, %for.body4 ], [ 0, %if.end ]
@@ -282,7 +282,7 @@ for.body4:
 for.end:
   call void @g3()
   br label %for.inc5
-; CHECK: edge for.end -> for.inc5 probability is 124 / 124 = 100%
+; CHECK: edge for.end -> for.inc5 probability is 16 / 16 = 100%
 
 for.inc5:
   %inc6 = add nsw i32 %i.011, 1
@@ -314,35 +314,35 @@ for.body:
   %i.019 = phi i32 [ 0, %for.body.lr.ph ], [ %inc14, %for.end ]
   call void @g1()
   br i1 %cmp216, label %for.body3, label %for.end
-; CHECK: edge for.body -> for.body3 probability is 62 / 124 = 50%
-; CHECK: edge for.body -> for.end probability is 62 / 124 = 50%
+; CHECK: edge for.body -> for.body3 probability is 20 / 32 = 62.5%
+; CHECK: edge for.body -> for.end probability is 12 / 32 = 37.5%
 
 for.body3:
   %j.017 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
   %0 = load i32* %c, align 4
   %cmp4 = icmp eq i32 %0, %j.017
   br i1 %cmp4, label %for.inc, label %if.end
-; CHECK: edge for.body3 -> for.inc probability is 62 / 124 = 50%
-; CHECK: edge for.body3 -> if.end probability is 62 / 124 = 50%
+; CHECK: edge for.body3 -> for.inc probability is 16 / 32 = 50%
+; CHECK: edge for.body3 -> if.end probability is 16 / 32 = 50%
 
 if.end:
   %1 = load i32* %arrayidx5, align 4
   %cmp6 = icmp eq i32 %1, %j.017
   br i1 %cmp6, label %for.inc, label %if.end8
-; CHECK: edge if.end -> for.inc probability is 62 / 124 = 50%
-; CHECK: edge if.end -> if.end8 probability is 62 / 124 = 50%
+; CHECK: edge if.end -> for.inc probability is 16 / 32 = 50%
+; CHECK: edge if.end -> if.end8 probability is 16 / 32 = 50%
 
 if.end8:
   %2 = load i32* %arrayidx9, align 4
   %cmp10 = icmp eq i32 %2, %j.017
   br i1 %cmp10, label %for.inc, label %if.end12
-; CHECK: edge if.end8 -> for.inc probability is 62 / 124 = 50%
-; CHECK: edge if.end8 -> if.end12 probability is 62 / 124 = 50%
+; CHECK: edge if.end8 -> for.inc probability is 16 / 32 = 50%
+; CHECK: edge if.end8 -> if.end12 probability is 16 / 32 = 50%
 
 if.end12:
   call void @g2()
   br label %for.inc
-; CHECK: edge if.end12 -> for.inc probability is 124 / 124 = 100%
+; CHECK: edge if.end12 -> for.inc probability is 16 / 16 = 100%
 
 for.inc:
   %inc = add nsw i32 %j.017, 1
diff --git a/test/Analysis/BranchProbabilityInfo/pr18705.ll b/test/Analysis/BranchProbabilityInfo/pr18705.ll
new file mode 100644
index 0000000..9f239b4
--- /dev/null
+++ b/test/Analysis/BranchProbabilityInfo/pr18705.ll
@@ -0,0 +1,58 @@
+; RUN: opt < %s -analyze -branch-prob | FileCheck %s
+
+; Since neither of while.body's out-edges is an exit or a back edge,
+; calcLoopBranchHeuristics should return early without setting the weights.
+; calcFloatingPointHeuristics, which is run later, sets the weights.
+;
+; CHECK: edge while.body -> if.then probability is 20 / 32 = 62.5%
+; CHECK: edge while.body -> if.else probability is 12 / 32 = 37.5%
+
+define void @foo1(i32 %n, i32* nocapture %b, i32* nocapture %c, i32* nocapture %d, float* nocapture readonly %f0, float* nocapture readonly %f1) {
+entry:
+  %tobool8 = icmp eq i32 %n, 0
+  br i1 %tobool8, label %while.end, label %while.body.lr.ph
+
+while.body.lr.ph:
+  %0 = sext i32 %n to i64
+  br label %while.body
+
+while.body:
+  %indvars.iv = phi i64 [ %0, %while.body.lr.ph ], [ %indvars.iv.next, %if.end ]
+  %b.addr.011 = phi i32* [ %b, %while.body.lr.ph ], [ %b.addr.1, %if.end ]
+  %d.addr.010 = phi i32* [ %d, %while.body.lr.ph ], [ %incdec.ptr4, %if.end ]
+  %c.addr.09 = phi i32* [ %c, %while.body.lr.ph ], [ %c.addr.1, %if.end ]
+  %indvars.iv.next = add nsw i64 %indvars.iv, -1
+  %arrayidx = getelementptr inbounds float* %f0, i64 %indvars.iv.next
+  %1 = load float* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float* %f1, i64 %indvars.iv.next
+  %2 = load float* %arrayidx2, align 4
+  %cmp = fcmp une float %1, %2
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  %incdec.ptr = getelementptr inbounds i32* %b.addr.011, i64 1
+  %3 = load i32* %b.addr.011, align 4
+  %add = add nsw i32 %3, 12
+  store i32 %add, i32* %b.addr.011, align 4
+  br label %if.end
+
+if.else:
+  %incdec.ptr3 = getelementptr inbounds i32* %c.addr.09, i64 1
+  %4 = load i32* %c.addr.09, align 4
+  %sub = add nsw i32 %4, -13
+  store i32 %sub, i32* %c.addr.09, align 4
+  br label %if.end
+
+if.end:
+  %c.addr.1 = phi i32* [ %c.addr.09, %if.then ], [ %incdec.ptr3, %if.else ]
+  %b.addr.1 = phi i32* [ %incdec.ptr, %if.then ], [ %b.addr.011, %if.else ]
+  %incdec.ptr4 = getelementptr inbounds i32* %d.addr.010, i64 1
+  store i32 14, i32* %d.addr.010, align 4
+  %5 = trunc i64 %indvars.iv.next to i32
+  %tobool = icmp eq i32 %5, 0
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:
+  ret void
+}
+
diff --git a/test/Analysis/CostModel/AArch64/lit.local.cfg b/test/Analysis/CostModel/AArch64/lit.local.cfg
new file mode 100644
index 0000000..c420349
--- /dev/null
+++ b/test/Analysis/CostModel/AArch64/lit.local.cfg
@@ -0,0 +1,3 @@
+targets = set(config.root.targets_to_build.split())
+if not 'AArch64' in targets:
+    config.unsupported = True
diff --git a/test/Analysis/CostModel/ARM64/select.ll b/test/Analysis/CostModel/AArch64/select.ll
index 216dc5d..216dc5d 100644
--- a/test/Analysis/CostModel/ARM64/select.ll
+++ b/test/Analysis/CostModel/AArch64/select.ll
diff --git a/test/Analysis/CostModel/ARM64/store.ll b/test/Analysis/CostModel/AArch64/store.ll
index 0c9883c..0c9883c 100644
--- a/test/Analysis/CostModel/ARM64/store.ll
+++ b/test/Analysis/CostModel/AArch64/store.ll
diff --git a/test/Analysis/CostModel/ARM64/lit.local.cfg b/test/Analysis/CostModel/ARM64/lit.local.cfg
deleted file mode 100644
index 84ac981..0000000
--- a/test/Analysis/CostModel/ARM64/lit.local.cfg
+++ /dev/null
@@ -1,3 +0,0 @@
-targets = set(config.root.targets_to_build.split())
-if not 'ARM64' in targets:
-    config.unsupported = True
diff --git a/test/Analysis/CostModel/PowerPC/ext.ll b/test/Analysis/CostModel/PowerPC/ext.ll
index daaa8f5..7d6a14e 100644
--- a/test/Analysis/CostModel/PowerPC/ext.ll
+++ b/test/Analysis/CostModel/PowerPC/ext.ll
@@ -13,7 +13,7 @@ define void @exts() {
   ; CHECK: cost of 1 {{.*}} sext
   %v3 = sext <4 x i16> undef to <4 x i32>
 
-  ; CHECK: cost of 216 {{.*}} sext
+  ; CHECK: cost of 112 {{.*}} sext
   %v4 = sext <8 x i16> undef to <8 x i32>
 
   ret void
diff --git a/test/Analysis/CostModel/PowerPC/insert_extract.ll b/test/Analysis/CostModel/PowerPC/insert_extract.ll
index f51963d..8dc0031 100644
--- a/test/Analysis/CostModel/PowerPC/insert_extract.ll
+++ b/test/Analysis/CostModel/PowerPC/insert_extract.ll
@@ -3,13 +3,13 @@ target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "powerpc64-unknown-linux-gnu"
 
 define i32 @insert(i32 %arg) {
-  ; CHECK: cost of 13 {{.*}} insertelement
+  ; CHECK: cost of 10 {{.*}} insertelement
   %x = insertelement <4 x i32> undef, i32 %arg, i32 0
   ret i32 undef
 }
 
 define i32 @extract(<4 x i32> %arg) {
-  ; CHECK: cost of 13 {{.*}} extractelement
+  ; CHECK: cost of 3 {{.*}} extractelement
   %x = extractelement <4 x i32> %arg, i32 0
   ret i32 %x
 }
diff --git a/test/Analysis/CostModel/PowerPC/load_store.ll b/test/Analysis/CostModel/PowerPC/load_store.ll
index 8145a1d..368f0a7 100644
--- a/test/Analysis/CostModel/PowerPC/load_store.ll
+++ b/test/Analysis/CostModel/PowerPC/load_store.ll
@@ -31,9 +31,15 @@ define i32 @loads(i32 %arg) {
 
   ; FIXME: There actually are sub-vector Altivec loads, and so we could handle
   ; this with a small expense, but we don't currently.
-  ; CHECK: cost of 60 {{.*}} load
+  ; CHECK: cost of 48 {{.*}} load
   load <4 x i16>* undef, align 2
 
+  ; CHECK: cost of 1 {{.*}} load
+  load <4 x i32>* undef, align 4
+
+  ; CHECK: cost of 46 {{.*}} load
+  load <3 x float>* undef, align 1
+
   ret i32 undef
 }
 
diff --git a/test/Analysis/CostModel/X86/intrinsic-cost.ll b/test/Analysis/CostModel/X86/intrinsic-cost.ll
index 8eeee81..3b27b52 100644
--- a/test/Analysis/CostModel/X86/intrinsic-cost.ll
+++ b/test/Analysis/CostModel/X86/intrinsic-cost.ll
@@ -58,3 +58,31 @@ for.end:                                          ; preds = %vector.body
 }
 
 declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>)  nounwind readnone
+
+define void @test3(float* nocapture %f, <4 x float> %b, <4 x float> %c) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %0 = getelementptr inbounds float* %f, i64 %index
+  %1 = bitcast float* %0 to <4 x float>*
+  %wide.load = load <4 x float>* %1, align 4
+  %2 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %wide.load, <4 x float> %b, <4 x float> %c)
+  store <4 x float> %2, <4 x float>* %1, align 4
+  %index.next = add i64 %index, 4
+  %3 = icmp eq i64 %index.next, 1024
+  br i1 %3, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; CORE2: Printing analysis 'Cost Model Analysis' for function 'test3':
+; CORE2: Cost Model: Found an estimated cost of 4 for instruction: %2 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %wide.load, <4 x float> %b, <4 x float> %c)
+
+; COREI7: Printing analysis 'Cost Model Analysis' for function 'test3':
+; COREI7: Cost Model: Found an estimated cost of 4 for instruction: %2 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %wide.load, <4 x float> %b, <4 x float> %c)
+
+}
+
+declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
diff --git a/test/Analysis/CostModel/X86/vdiv-cost.ll b/test/Analysis/CostModel/X86/vdiv-cost.ll
new file mode 100644
index 0000000..c8e4557
--- /dev/null
+++ b/test/Analysis/CostModel/X86/vdiv-cost.ll
@@ -0,0 +1,92 @@
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+sse2,-sse4.1 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2
+
+define <4 x i32> @test1(<4 x i32> %a) {
+  %div = udiv <4 x i32> %a, <i32 7, i32 7, i32 7, i32 7>
+  ret <4 x i32> %div
+
+; CHECK: 'Cost Model Analysis' for function 'test1':
+; SSE2: Found an estimated cost of 15 for instruction:   %div
+; AVX2: Found an estimated cost of 15 for instruction:   %div
+}
+
+define <8 x i32> @test2(<8 x i32> %a) {
+  %div = udiv <8 x i32> %a, <i32 7, i32 7, i32 7, i32 7,i32 7, i32 7, i32 7, i32 7>
+  ret <8 x i32> %div
+
+; CHECK: 'Cost Model Analysis' for function 'test2':
+; SSE2: Found an estimated cost of 30 for instruction:   %div
+; AVX2: Found an estimated cost of 15 for instruction:   %div
+}
+
+define <8 x i16> @test3(<8 x i16> %a) {
+  %div = udiv <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+  ret <8 x i16> %div
+
+; CHECK: 'Cost Model Analysis' for function 'test3':
+; SSE2: Found an estimated cost of 6 for instruction:   %div
+; AVX2: Found an estimated cost of 6 for instruction:   %div
+}
+
+define <16 x i16> @test4(<16 x i16> %a) {
+  %div = udiv <16 x i16> %a, <i16 7, i16 7, i16 7, i16 7,i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7,i16 7, i16 7, i16 7, i16 7>
+  ret <16 x i16> %div
+
+; CHECK: 'Cost Model Analysis' for function 'test4':
+; SSE2: Found an estimated cost of 12 for instruction:   %div
+; AVX2: Found an estimated cost of 6 for instruction:   %div
+}
+
+define <8 x i16> @test5(<8 x i16> %a) {
+  %div = sdiv <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+  ret <8 x i16> %div
+
+; CHECK: 'Cost Model Analysis' for function 'test5':
+; SSE2: Found an estimated cost of 6 for instruction:   %div
+; AVX2: Found an estimated cost of 6 for instruction:   %div
+}
+
+define <16 x i16> @test6(<16 x i16> %a) {
+  %div = sdiv <16 x i16> %a, <i16 7, i16 7, i16 7, i16 7,i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7,i16 7, i16 7, i16 7, i16 7>
+  ret <16 x i16> %div
+
+; CHECK: 'Cost Model Analysis' for function 'test6':
+; SSE2: Found an estimated cost of 12 for instruction:   %div
+; AVX2: Found an estimated cost of 6 for instruction:   %div
+}
+
+define <16 x i8> @test7(<16 x i8> %a) {
+  %div = sdiv <16 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
+  ret <16 x i8> %div
+
+; CHECK: 'Cost Model Analysis' for function 'test7':
+; SSE2: Found an estimated cost of 320 for instruction:   %div
+; AVX2: Found an estimated cost of 320 for instruction:   %div
+}
+
+define <4 x i32> @test8(<4 x i32> %a) {
+  %div = sdiv <4 x i32> %a, <i32 7, i32 7, i32 7, i32 7>
+  ret <4 x i32> %div
+
+; CHECK: 'Cost Model Analysis' for function 'test8':
+; SSE2: Found an estimated cost of 19 for instruction:   %div
+; AVX2: Found an estimated cost of 15 for instruction:   %div
+}
+
+define <8 x i32> @test9(<8 x i32> %a) {
+  %div = sdiv <8 x i32> %a, <i32 7, i32 7, i32 7, i32 7,i32 7, i32 7, i32 7, i32 7>
+  ret <8 x i32> %div
+
+; CHECK: 'Cost Model Analysis' for function 'test9':
+; SSE2: Found an estimated cost of 38 for instruction:   %div
+; AVX2: Found an estimated cost of 15 for instruction:   %div
+}
+
+define <8 x i32> @test10(<8 x i32> %a) {
+  %div = sdiv <8 x i32> %a, <i32 8, i32 7, i32 7, i32 7,i32 7, i32 7, i32 7, i32 7>
+  ret <8 x i32> %div
+
+; CHECK: 'Cost Model Analysis' for function 'test10':
+; SSE2: Found an estimated cost of 160 for instruction:   %div
+; AVX2: Found an estimated cost of 160 for instruction:   %div
+}
diff --git a/test/Analysis/CostModel/X86/vselect-cost.ll b/test/Analysis/CostModel/X86/vselect-cost.ll
new file mode 100644
index 0000000..2416777
--- /dev/null
+++ b/test/Analysis/CostModel/X86/vselect-cost.ll
@@ -0,0 +1,126 @@
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+sse2,-sse4.1 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE41
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2
+
+
+; Verify the cost of vector select instructions.
+
+; SSE41 added blend instructions with an immediate for <2 x double> and
+; <4 x float>. Integers of the same size should also use those instructions.
+
+define <2 x i64> @test_2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_2i64':
+; SSE2: Cost Model: {{.*}} 4 for instruction:   %sel = select <2 x i1>
+; SSE41: Cost Model: {{.*}} 1 for instruction:   %sel = select <2 x i1>
+; AVX: Cost Model: {{.*}} 1 for instruction:   %sel = select <2 x i1>
+; AVX2: Cost Model: {{.*}} 1 for instruction:   %sel = select <2 x i1>
+  %sel = select <2 x i1> <i1 true, i1 false>, <2 x i64> %a, <2 x i64> %b
+  ret <2 x i64> %sel
+}
+
+define <2 x double> @test_2double(<2 x double> %a, <2 x double> %b) {
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_2double':
+; SSE2: Cost Model: {{.*}} 3 for instruction:   %sel = select <2 x i1>
+; SSE41: Cost Model: {{.*}} 1 for instruction:   %sel = select <2 x i1>
+; AVX: Cost Model: {{.*}} 1 for instruction:   %sel = select <2 x i1>
+; AVX2: Cost Model: {{.*}} 1 for instruction:   %sel = select <2 x i1>
+  %sel = select <2 x i1> <i1 true, i1 false>, <2 x double> %a, <2 x double> %b
+  ret <2 x double> %sel
+}
+
+define <4 x i32> @test_4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_4i32':
+; SSE2: Cost Model: {{.*}} 8 for instruction:   %sel = select <4 x i1>
+; SSE41: Cost Model: {{.*}} 1 for instruction:   %sel = select <4 x i1>
+; AVX: Cost Model: {{.*}} 1 for instruction:   %sel = select <4 x i1>
+; AVX2: Cost Model: {{.*}} 1 for instruction:   %sel = select <4 x i1>
+  %sel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> %a, <4 x i32> %b
+  ret <4 x i32> %sel
+}
+
+define <4 x float> @test_4float(<4 x float> %a, <4 x float> %b) {
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_4float':
+; SSE2: Cost Model: {{.*}} 7 for instruction:   %sel = select <4 x i1>
+; SSE41: Cost Model: {{.*}} 1 for instruction:   %sel = select <4 x i1>
+; AVX: Cost Model: {{.*}} 1 for instruction:   %sel = select <4 x i1>
+; AVX2: Cost Model: {{.*}} 1 for instruction:   %sel = select <4 x i1>
+  %sel = select <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x float> %a, <4 x float> %b
+  ret <4 x float> %sel
+}
+
+define <16 x i8> @test_16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_16i8':
+; SSE2: Cost Model: {{.*}} 32 for instruction:   %sel = select <16 x i1>
+; SSE41: Cost Model: {{.*}} 1 for instruction:   %sel = select <16 x i1>
+; AVX: Cost Model: {{.*}} 1 for instruction:   %sel = select <16 x i1>
+; AVX2: Cost Model: {{.*}} 1 for instruction:   %sel = select <16 x i1>
+  %sel = select <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true>, <16 x i8> %a, <16 x i8> %b
+  ret <16 x i8> %sel
+}
+
+; AVX added blend instructions with an immediate for <4 x double> and
+; <8 x float>. Integers of the same size should also use those instructions.
+define <4 x i64> @test_4i64(<4 x i64> %a, <4 x i64> %b) {
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_4i64':
+; SSE2: Cost Model: {{.*}} 8 for instruction:   %sel = select <4 x i1>
+; SSE41: Cost Model: {{.*}} 2 for instruction:   %sel = select <4 x i1>
+; AVX: Cost Model: {{.*}} 1 for instruction:   %sel = select <4 x i1>
+; AVX2: Cost Model: {{.*}} 1 for instruction:   %sel = select <4 x i1>
+  %sel = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i64> %a, <4 x i64> %b
+  ret <4 x i64> %sel
+}
+
+define <4 x double> @test_4double(<4 x double> %a, <4 x double> %b) {
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_4double':
+; SSE2: Cost Model: {{.*}} 6 for instruction:   %sel = select <4 x i1>
+; SSE41: Cost Model: {{.*}} 2 for instruction:   %sel = select <4 x i1>
+; AVX: Cost Model: {{.*}} 1 for instruction:   %sel = select <4 x i1>
+; AVX2: Cost Model: {{.*}} 1 for instruction:   %sel = select <4 x i1>
+  %sel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x double> %a, <4 x double> %b
+  ret <4 x double> %sel
+}
+
+define <8 x i32> @test_8i32(<8 x i32> %a, <8 x i32> %b) {
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_8i32':
+; SSE2: Cost Model: {{.*}} 16 for instruction:   %sel = select <8 x i1>
+; SSE41: Cost Model: {{.*}} 2 for instruction:   %sel = select <8 x i1>
+; AVX: Cost Model: {{.*}} 1 for instruction:   %sel = select <8 x i1>
+; AVX2: Cost Model: {{.*}} 1 for instruction:   %sel = select <8 x i1>
+  %sel = select <8 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 false>, <8 x i32> %a, <8 x i32> %b
+  ret <8 x i32> %sel
+}
+
+define <8 x float> @test_8float(<8 x float> %a, <8 x float> %b) {
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_8float':
+; SSE2: Cost Model: {{.*}} 14 for instruction:   %sel = select <8 x i1>
+; SSE41: Cost Model: {{.*}} 2 for instruction:   %sel = select <8 x i1>
+; AVX: Cost Model: {{.*}} 1 for instruction:   %sel = select <8 x i1>
+; AVX2: Cost Model: {{.*}} 1 for instruction:   %sel = select <8 x i1>
+  %sel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x float> %a, <8 x float> %b
+  ret <8 x float> %sel
+}
+
+; AVX2
+define <16 x i16> @test_16i16(<16 x i16> %a, <16 x i16> %b) {
+; CHECK:Printing analysis 'Cost Model Analysis' for function 'test_16i16':
+; SSE2: Cost Model: {{.*}} 32 for instruction:   %sel = select <16 x i1>
+; SSE41: Cost Model: {{.*}} 2 for instruction:   %sel = select <16 x i1>
+;;; FIXME: This AVX cost is obviously wrong. We shouldn't be scalarizing.
+; AVX: Cost Model: {{.*}} 32 for instruction:   %sel = select <16 x i1>
+; AVX2: Cost Model: {{.*}} 1 for instruction:   %sel = select <16 x i1>
+  %sel = select <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <16 x i16> %a, <16 x i16> %b
+  ret <16 x i16> %sel
+}
+
+define <32 x i8> @test_32i8(<32 x i8> %a, <32 x i8> %b) {
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_32i8':
+; SSE2: Cost Model: {{.*}} 64 for instruction:   %sel = select <32 x i1>
+; SSE41: Cost Model: {{.*}} 2 for instruction:   %sel = select <32 x i1>
+;;; FIXME: This AVX cost is obviously wrong. We shouldn't be scalarizing.
+; AVX: Cost Model: {{.*}} 64 for instruction:   %sel = select <32 x i1>
+; AVX2: Cost Model: {{.*}} 1 for instruction:   %sel = select <32 x i1>
+  %sel = select <32 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true>, <32 x i8> %a, <32 x i8> %b
+  ret <32 x i8> %sel
+}
+
diff --git a/test/Analysis/Delinearization/a.ll b/test/Analysis/Delinearization/a.ll
index 9308749..efebcc4 100644
--- a/test/Analysis/Delinearization/a.ll
+++ b/test/Analysis/Delinearization/a.ll
@@ -12,17 +12,6 @@
 ; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(i32) bytes.
 ; CHECK: ArrayRef[{3,+,2}<%for.i>][{-4,+,3}<%for.j>][{7,+,5}<%for.k>]
 
-; AddRec: {{(8 + ((4 + (12 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(12 * %o)}<%for.j>
-; CHECK: Base offset: %A
-; CHECK: ArrayDecl[UnknownSize][%o] with elements of sizeof(i32) bytes.
-; CHECK: ArrayRef[{(1 + (3 * %m)),+,(2 * %m)}<%for.i>][{2,+,(3 * %o)}<%for.j>]
-
-; AddRec: {(8 + ((-8 + (24 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>
-; CHECK: Base offset: %A
-; CHECK: ArrayDecl[UnknownSize] with elements of 2 bytes.
-; CHECK: ArrayRef[{((1 + ((-1 + (3 * %m)) * %o)) * sizeof(i32)),+,(%m * %o * sizeof(i32))}<%for.i>]
-
-; Function Attrs: nounwind uwtable
 define void @foo(i64 %n, i64 %m, i64 %o, i32* nocapture %A) #0 {
 entry:
   %cmp32 = icmp sgt i64 %n, 0
diff --git a/test/Analysis/Delinearization/gcd_multiply_expr.ll b/test/Analysis/Delinearization/gcd_multiply_expr.ll
new file mode 100644
index 0000000..f962f6d
--- /dev/null
+++ b/test/Analysis/Delinearization/gcd_multiply_expr.ll
@@ -0,0 +1,153 @@
+; RUN: opt < %s -basicaa -da -analyze -delinearize
+;
+; a, b, c, d, g, h;
+; char *f;
+; static fn1(p1) {
+;   char *e = p1;
+;   for (; d;) {
+;     a = 0;
+;     for (;; ++a)
+;       for (; b; ++b)
+;         c = e[b + a];
+;   }
+; }
+;
+; fn2() {
+;   for (;;)
+;     fn1(&f[g * h]);
+; }
+
+@g = common global i32 0, align 4
+@h = common global i32 0, align 4
+@f = common global i8* null, align 4
+@a = common global i32 0, align 4
+@b = common global i32 0, align 4
+@c = common global i32 0, align 4
+@d = common global i32 0, align 4
+
+define i32 @fn2() {
+entry:
+  %.pr = load i32* @d, align 4
+  %phitmp = icmp eq i32 %.pr, 0
+  br label %for.cond
+
+for.cond:
+  %0 = phi i1 [ true, %for.cond ], [ %phitmp, %entry ]
+  br i1 %0, label %for.cond, label %for.cond2thread-pre-split.preheader.i
+
+for.cond2thread-pre-split.preheader.i:
+  %1 = load i32* @g, align 4
+  %2 = load i32* @h, align 4
+  %mul = mul nsw i32 %2, %1
+  %3 = load i8** @f, align 4
+  %.pr.pre.i = load i32* @b, align 4
+  br label %for.cond2thread-pre-split.i
+
+for.cond2thread-pre-split.i:
+  %.pr.i = phi i32 [ 0, %for.inc5.i ], [ %.pr.pre.i, %for.cond2thread-pre-split.preheader.i ]
+  %storemerge.i = phi i32 [ %inc6.i, %for.inc5.i ], [ 0, %for.cond2thread-pre-split.preheader.i ]
+  store i32 %storemerge.i, i32* @a, align 4
+  %tobool31.i = icmp eq i32 %.pr.i, 0
+  br i1 %tobool31.i, label %for.inc5.i, label %for.body4.preheader.i
+
+for.body4.preheader.i:
+  %4 = icmp slt i32 %.pr.i, -7
+  %add.i = add i32 %storemerge.i, %mul
+  br i1 %4, label %for.body4.i.preheader, label %for.body4.ur.i.preheader
+
+for.body4.i.preheader:
+  %5 = sub i32 -8, %.pr.i
+  %6 = lshr i32 %5, 3
+  %7 = mul i32 %6, 8
+  br label %for.body4.i
+
+for.body4.i:
+  %8 = phi i32 [ %inc.7.i, %for.body4.i ], [ %.pr.i, %for.body4.i.preheader ]
+  %arrayidx.sum1 = add i32 %add.i, %8
+  %arrayidx.i = getelementptr inbounds i8* %3, i32 %arrayidx.sum1
+  %9 = load i8* %arrayidx.i, align 1
+  %conv.i = sext i8 %9 to i32
+  store i32 %conv.i, i32* @c, align 4
+  %inc.i = add nsw i32 %8, 1
+  store i32 %inc.i, i32* @b, align 4
+  %arrayidx.sum2 = add i32 %add.i, %inc.i
+  %arrayidx.1.i = getelementptr inbounds i8* %3, i32 %arrayidx.sum2
+  %10 = load i8* %arrayidx.1.i, align 1
+  %conv.1.i = sext i8 %10 to i32
+  store i32 %conv.1.i, i32* @c, align 4
+  %inc.1.i = add nsw i32 %8, 2
+  store i32 %inc.1.i, i32* @b, align 4
+  %arrayidx.sum3 = add i32 %add.i, %inc.1.i
+  %arrayidx.2.i = getelementptr inbounds i8* %3, i32 %arrayidx.sum3
+  %11 = load i8* %arrayidx.2.i, align 1
+  %conv.2.i = sext i8 %11 to i32
+  store i32 %conv.2.i, i32* @c, align 4
+  %inc.2.i = add nsw i32 %8, 3
+  store i32 %inc.2.i, i32* @b, align 4
+  %arrayidx.sum4 = add i32 %add.i, %inc.2.i
+  %arrayidx.3.i = getelementptr inbounds i8* %3, i32 %arrayidx.sum4
+  %12 = load i8* %arrayidx.3.i, align 1
+  %conv.3.i = sext i8 %12 to i32
+  store i32 %conv.3.i, i32* @c, align 4
+  %inc.3.i = add nsw i32 %8, 4
+  store i32 %inc.3.i, i32* @b, align 4
+  %arrayidx.sum5 = add i32 %add.i, %inc.3.i
+  %arrayidx.4.i = getelementptr inbounds i8* %3, i32 %arrayidx.sum5
+  %13 = load i8* %arrayidx.4.i, align 1
+  %conv.4.i = sext i8 %13 to i32
+  store i32 %conv.4.i, i32* @c, align 4
+  %inc.4.i = add nsw i32 %8, 5
+  store i32 %inc.4.i, i32* @b, align 4
+  %arrayidx.sum6 = add i32 %add.i, %inc.4.i
+  %arrayidx.5.i = getelementptr inbounds i8* %3, i32 %arrayidx.sum6
+  %14 = load i8* %arrayidx.5.i, align 1
+  %conv.5.i = sext i8 %14 to i32
+  store i32 %conv.5.i, i32* @c, align 4
+  %inc.5.i = add nsw i32 %8, 6
+  store i32 %inc.5.i, i32* @b, align 4
+  %arrayidx.sum7 = add i32 %add.i, %inc.5.i
+  %arrayidx.6.i = getelementptr inbounds i8* %3, i32 %arrayidx.sum7
+  %15 = load i8* %arrayidx.6.i, align 1
+  %conv.6.i = sext i8 %15 to i32
+  store i32 %conv.6.i, i32* @c, align 4
+  %inc.6.i = add nsw i32 %8, 7
+  store i32 %inc.6.i, i32* @b, align 4
+  %arrayidx.sum8 = add i32 %add.i, %inc.6.i
+  %arrayidx.7.i = getelementptr inbounds i8* %3, i32 %arrayidx.sum8
+  %16 = load i8* %arrayidx.7.i, align 1
+  %conv.7.i = sext i8 %16 to i32
+  store i32 %conv.7.i, i32* @c, align 4
+  %inc.7.i = add nsw i32 %8, 8
+  store i32 %inc.7.i, i32* @b, align 4
+  %tobool3.7.i = icmp sgt i32 %inc.7.i, -8
+  br i1 %tobool3.7.i, label %for.inc5.loopexit.ur-lcssa.i, label %for.body4.i
+
+for.inc5.loopexit.ur-lcssa.i:
+  %17 = add i32 %.pr.i, 8
+  %18 = add i32 %17, %7
+  %19 = icmp eq i32 %18, 0
+  br i1 %19, label %for.inc5.i, label %for.body4.ur.i.preheader
+
+for.body4.ur.i.preheader:
+  %.ph = phi i32 [ %18, %for.inc5.loopexit.ur-lcssa.i ], [ %.pr.i, %for.body4.preheader.i ]
+  br label %for.body4.ur.i
+
+for.body4.ur.i:
+  %20 = phi i32 [ %inc.ur.i, %for.body4.ur.i ], [ %.ph, %for.body4.ur.i.preheader ]
+  %arrayidx.sum = add i32 %add.i, %20
+  %arrayidx.ur.i = getelementptr inbounds i8* %3, i32 %arrayidx.sum
+  %21 = load i8* %arrayidx.ur.i, align 1
+  %conv.ur.i = sext i8 %21 to i32
+  store i32 %conv.ur.i, i32* @c, align 4
+  %inc.ur.i = add nsw i32 %20, 1
+  store i32 %inc.ur.i, i32* @b, align 4
+  %tobool3.ur.i = icmp eq i32 %inc.ur.i, 0
+  br i1 %tobool3.ur.i, label %for.inc5.i.loopexit, label %for.body4.ur.i
+
+for.inc5.i.loopexit:
+  br label %for.inc5.i
+
+for.inc5.i:
+  %inc6.i = add nsw i32 %storemerge.i, 1
+  br label %for.cond2thread-pre-split.i
+}
diff --git a/test/Analysis/Delinearization/himeno_1.ll b/test/Analysis/Delinearization/himeno_1.ll
index 9458bd2..c94ca7a 100644
--- a/test/Analysis/Delinearization/himeno_1.ll
+++ b/test/Analysis/Delinearization/himeno_1.ll
@@ -31,16 +31,6 @@
 ; CHECK: ArrayDecl[UnknownSize][(sext i32 %a.cols to i64)][(sext i32 %a.deps to i64)] with elements of sizeof(float) bytes.
 ; CHECK: ArrayRef[{1,+,1}<nuw><nsw><%for.i>][{1,+,1}<nuw><nsw><%for.j>][{1,+,1}<nuw><nsw><%for.k>]
 
-; AddRec: {{(-4 + (4 * (sext i32 (-1 + %p.deps) to i64)) + (4 * (sext i32 %a.deps to i64) * (1 + (sext i32 %a.cols to i64))) + %a.base),+,(4 * (sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>,+,(4 * (sext i32 %a.deps to i64))}<%for.j>
-; CHECK: Base offset: %a.base
-; CHECK: ArrayDecl[UnknownSize][(sext i32 %a.deps to i64)] with elements of sizeof(float) bytes.
-; CHECK: ArrayRef[{(1 + (sext i32 %a.cols to i64)),+,(sext i32 %a.cols to i64)}<%for.i>][{(-1 + (sext i32 (-1 + %p.deps) to i64)),+,(sext i32 %a.deps to i64)}<%for.j>]
-
-; AddRec: {(-4 + (4 * (sext i32 (-1 + %p.deps) to i64)) + ((sext i32 %a.deps to i64) * (-4 + (4 * (sext i32 (-1 + %p.cols) to i64)) + (4 * (sext i32 %a.cols to i64)))) + %a.base),+,(4 * (sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>
-; CHECK: Base offset: %a.base
-; CHECK: ArrayDecl[UnknownSize] with elements of sizeof(float) bytes.
-; CHECK: ArrayRef[{(-1 + (sext i32 (-1 + %p.deps) to i64) + ((sext i32 %a.deps to i64) * (-1 + (sext i32 (-1 + %p.cols) to i64) + (sext i32 %a.cols to i64)))),+,((sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>]
-
 %struct.Mat = type { float*, i32, i32, i32, i32 }
 
 define void @jacobi(i32 %nn, %struct.Mat* nocapture %a, %struct.Mat* nocapture %p) nounwind uwtable {
diff --git a/test/Analysis/Delinearization/himeno_2.ll b/test/Analysis/Delinearization/himeno_2.ll
index a290066..c256384 100644
--- a/test/Analysis/Delinearization/himeno_2.ll
+++ b/test/Analysis/Delinearization/himeno_2.ll
@@ -31,16 +31,6 @@
 ; CHECK: ArrayDecl[UnknownSize][(sext i32 %a.cols to i64)][(sext i32 %a.deps to i64)] with elements of sizeof(float) bytes.
 ; CHECK: ArrayRef[{1,+,1}<nuw><nsw><%for.i>][{1,+,1}<nuw><nsw><%for.j>][{1,+,1}<nuw><nsw><%for.k>]
 
-; AddRec: {{(-4 + (4 * (sext i32 (-1 + %p.deps) to i64)) + (4 * (sext i32 %a.deps to i64) * (1 + (sext i32 %a.cols to i64))) + %a.base),+,(4 * (sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>,+,(4 * (sext i32 %a.deps to i64))}<%for.j>
-; CHECK: Base offset: %a.base
-; CHECK: ArrayDecl[UnknownSize][(sext i32 %a.deps to i64)] with elements of sizeof(float) bytes.
-; CHECK: ArrayRef[{(1 + (sext i32 %a.cols to i64)),+,(sext i32 %a.cols to i64)}<%for.i>][{(-1 + (sext i32 (-1 + %p.deps) to i64)),+,(sext i32 %a.deps to i64)}<%for.j>]
-
-; AddRec: {(-4 + (4 * (sext i32 (-1 + %p.deps) to i64)) + ((sext i32 %a.deps to i64) * (-4 + (4 * (sext i32 (-1 + %p.cols) to i64)) + (4 * (sext i32 %a.cols to i64)))) + %a.base),+,(4 * (sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>
-; CHECK: Base offset: %a.base
-; CHECK: ArrayDecl[UnknownSize] with elements of sizeof(float) bytes.
-; CHECK: ArrayRef[{(-1 + (sext i32 (-1 + %p.deps) to i64) + ((sext i32 %a.deps to i64) * (-1 + (sext i32 (-1 + %p.cols) to i64) + (sext i32 %a.cols to i64)))),+,((sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>]
-
 %struct.Mat = type { float*, i32, i32, i32, i32 }
 
 define void @jacobi(i32 %nn, %struct.Mat* nocapture %a, %struct.Mat* nocapture %p) nounwind uwtable {
diff --git a/test/Analysis/Delinearization/iv_times_constant_in_subscript.ll b/test/Analysis/Delinearization/iv_times_constant_in_subscript.ll
new file mode 100644
index 0000000..01a4b96
--- /dev/null
+++ b/test/Analysis/Delinearization/iv_times_constant_in_subscript.ll
@@ -0,0 +1,45 @@
+; RUN: opt < %s -analyze -delinearize | FileCheck %s
+
+; Derived from the following code:
+;
+; void foo(long n, long m, long b, double A[n][m]) {
+;   for (long i = 0; i < n; i++)
+;     for (long j = 0; j < m; j++)
+;       A[2i+b][2j] = 1.0;
+; }
+
+; AddRec: {{((%m * %b * sizeof(double)) + %A),+,(2 * %m * sizeof(double))}<%for.i>,+,(2 * sizeof(double))}<%for.j>
+; CHECK: Base offset: %A
+; CHECK: ArrayDecl[UnknownSize][%m] with elements of sizeof(double) bytes.
+; CHECK: ArrayRef[{%b,+,2}<%for.i>][{0,+,2}<%for.j>]
+
+
+define void @foo(i64 %n, i64 %m, i64 %b, double* %A) {
+entry:
+  br label %for.i
+
+for.i:
+  %i = phi i64 [ 0, %entry ], [ %i.inc, %for.i.inc ]
+  %outerdim = mul nsw i64 %i, 2
+  %outerdim2 = add nsw i64 %outerdim, %b
+  %tmp = mul nsw i64 %outerdim2, %m
+  br label %for.j
+
+for.j:
+  %j = phi i64 [ 0, %for.i ], [ %j.inc, %for.j ]
+  %prodj = mul i64 %j, 2
+  %vlaarrayidx.sum = add i64 %prodj, %tmp
+  %arrayidx = getelementptr inbounds double* %A, i64 %vlaarrayidx.sum
+  store double 1.0, double* %arrayidx
+  %j.inc = add nsw i64 %j, 1
+  %j.exitcond = icmp eq i64 %j.inc, %m
+  br i1 %j.exitcond, label %for.i.inc, label %for.j
+
+for.i.inc:
+  %i.inc = add nsw i64 %i, 1
+  %i.exitcond = icmp eq i64 %i.inc, %n
+  br i1 %i.exitcond, label %end, label %for.i
+
+end:
+  ret void
+}
diff --git a/test/Analysis/Delinearization/lit.local.cfg b/test/Analysis/Delinearization/lit.local.cfg
index 19eebc0..c6106e4 100644
--- a/test/Analysis/Delinearization/lit.local.cfg
+++ b/test/Analysis/Delinearization/lit.local.cfg
@@ -1 +1 @@
-config.suffixes = ['.ll', '.c', '.cpp']
+config.suffixes = ['.ll']
diff --git a/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll b/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll
index 82cab16..ae80ebc 100644
--- a/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll
+++ b/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll
@@ -13,16 +13,6 @@
 ; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double) bytes.
 ; CHECK: ArrayRef[{3,+,1}<nw><%for.i>][{-4,+,1}<nw><%for.j>][{7,+,1}<nw><%for.k>]
 
-; AddRec: {{(48 + ((-24 + (24 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>
-; CHECK: Base offset: %A
-; CHECK: ArrayDecl[UnknownSize][%o] with elements of sizeof(double) bytes.
-; CHECK: ArrayRef[{(-3 + (3 * %m)),+,%m}<%for.i>][{6,+,%o}<%for.j>]
-
-; AddRec: {(48 + ((-32 + (32 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>
-; CHECK: Base offset: %A
-; CHECK: ArrayDecl[UnknownSize] with elements of sizeof(double) bytes.
-; CHECK: ArrayRef[{(6 + ((-4 + (4 * %m)) * %o)),+,(%m * %o)}<%for.i>]
-
 define void @foo(i64 %n, i64 %m, i64 %o, double* %A) {
 entry:
   br label %for.i
diff --git a/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_nts_3d.ll b/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_nts_3d.ll
index a1e779f..75080da 100644
--- a/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_nts_3d.ll
+++ b/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_nts_3d.ll
@@ -13,16 +13,6 @@
 ; CHECK: ArrayDecl[UnknownSize][%m][(%o + %p)] with elements of sizeof(double) bytes.
 ; CHECK: ArrayRef[{3,+,1}<nw><%for.cond4.preheader.lr.ph.us>][{-4,+,1}<nw><%for.body6.lr.ph.us.us>][{7,+,1}<nw><%for.body6.us.us>]
 
-; AddRec: {{(48 + (8 * %o) + (8 * (-4 + (3 * %m)) * (%o + %p)) + %A),+,(8 * (%o + %p) * %m)}<%for.cond4.preheader.lr.ph.us>,+,(8 * (%o + %p))}<%for.body6.lr.ph.us.us>
-; CHECK: Base offset: %A
-; CHECK: ArrayDecl[UnknownSize][(%o + %p)] with elements of sizeof(double) bytes.
-; CHECK: ArrayRef[{(-4 + (3 * %m)),+,%m}<%for.cond4.preheader.lr.ph.us>][{(6 + %o),+,(%o + %p)}<%for.body6.lr.ph.us.us>]
-
-; AddRec: {(48 + (8 * %o) + ((-40 + (32 * %m)) * (%o + %p)) + %A),+,(8 * (%o + %p) * %m)}<%for.cond4.preheader.lr.ph.us>
-; CHECK: Base offset: %A
-; CHECK: ArrayDecl[UnknownSize] with elements of sizeof(double) bytes.
-; CHECK: ArrayRef[{(6 + ((-5 + (4 * %m)) * (%o + %p)) + %o),+,((%o + %p) * %m)}<%for.cond4.preheader.lr.ph.us>]
-
 define void @foo(i64 %n, i64 %m, i64 %o, i64 %p, double* nocapture %A) nounwind uwtable {
 entry:
   %add = add nsw i64 %p, %o
diff --git a/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll b/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll
index a52a4c9..e921444 100644
--- a/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll
+++ b/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll
@@ -13,16 +13,6 @@
 ; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double) bytes.
 ; CHECK: ArrayRef[{%p,+,1}<nw><%for.i>][{%q,+,1}<nw><%for.j>][{%r,+,1}<nw><%for.k>]
 
-; AddRec: {{(-8 + (8 * ((((%m * %p) + %q) * %o) + %r)) + (8 * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>
-; CHECK: Base offset: %A
-; CHECK: ArrayDecl[UnknownSize][%o] with elements of sizeof(double) bytes.
-; CHECK: ArrayRef[{(1 + (%m * %p) + %q),+,%m}<%for.i>][{(-1 + %r),+,%o}<%for.j>]
-
-; AddRec: {(-8 + (8 * ((((%m * %p) + %q) * %o) + %r)) + (8 * %m * %o) + %A),+,(8 * %m * %o)}<%for.i>
-; CHECK: Base offset: %A
-; CHECK: ArrayDecl[UnknownSize] with elements of sizeof(double) bytes.
-; CHECK: ArrayRef[{(-1 + ((((1 + %p) * %m) + %q) * %o) + %r),+,(%m * %o)}<%for.i>]
-
 define void @foo(i64 %n, i64 %m, i64 %o, double* %A, i64 %p, i64 %q, i64 %r) {
 entry:
   br label %for.i
diff --git a/test/Analysis/Delinearization/multidim_only_ivs_2d.ll b/test/Analysis/Delinearization/multidim_only_ivs_2d.ll
index d68a158..48bec08 100644
--- a/test/Analysis/Delinearization/multidim_only_ivs_2d.ll
+++ b/test/Analysis/Delinearization/multidim_only_ivs_2d.ll
@@ -13,11 +13,6 @@
 ; CHECK: ArrayDecl[UnknownSize][%m] with elements of sizeof(double) bytes.
 ; CHECK: ArrayRef[{0,+,1}<nuw><nsw><%for.i>][{0,+,1}<nuw><nsw><%for.j>]
 
-; AddRec: {(-8 + (8 * %m) + %A),+,(8 * %m)}<%for.i>
-; CHECK: Base offset: %A
-; CHECK: ArrayDecl[UnknownSize] with elements of sizeof(double) bytes.
-; CHECK: ArrayRef[{(-1 + %m),+,%m}<%for.i>]
-
 define void @foo(i64 %n, i64 %m, double* %A) {
 entry:
   br label %for.i
diff --git a/test/Analysis/Delinearization/multidim_only_ivs_2d_nested.ll b/test/Analysis/Delinearization/multidim_only_ivs_2d_nested.ll
index 7207420..810188f 100644
--- a/test/Analysis/Delinearization/multidim_only_ivs_2d_nested.ll
+++ b/test/Analysis/Delinearization/multidim_only_ivs_2d_nested.ll
@@ -1,4 +1,6 @@
 ; RUN: opt < %s -analyze -delinearize | FileCheck %s
+; XFAIL: *
+; We do not recognize anymore variable size arrays.
 
 ; extern void bar(long n, long m, double A[n][m]);
 ;
diff --git a/test/Analysis/Delinearization/multidim_only_ivs_3d.ll b/test/Analysis/Delinearization/multidim_only_ivs_3d.ll
index 24f9583..aad0f09 100644
--- a/test/Analysis/Delinearization/multidim_only_ivs_3d.ll
+++ b/test/Analysis/Delinearization/multidim_only_ivs_3d.ll
@@ -13,16 +13,6 @@
 ; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double) bytes.
 ; CHECK: ArrayRef[{0,+,1}<nuw><nsw><%for.i>][{0,+,1}<nuw><nsw><%for.j>][{0,+,1}<nuw><nsw><%for.k>]
 
-; AddRec: {{(-8 + (8 * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>
-; CHECK: Base offset: %A
-; CHECK: ArrayDecl[UnknownSize][(%m * %o)] with elements of sizeof(double) bytes.
-; CHECK: ArrayRef[{0,+,1}<nuw><nsw><%for.i>][{(-1 + %o),+,%o}<%for.j>]
-
-; AddRec: {(-8 + (8 * %m * %o) + %A),+,(8 * %m * %o)}<%for.i>
-; CHECK: Base offset: %A
-; CHECK: ArrayDecl[UnknownSize] with elements of sizeof(double) bytes.
-; CHECK: ArrayRef[{(-1 + (%m * %o)),+,(%m * %o)}<%for.i>]
-
 define void @foo(i64 %n, i64 %m, i64 %o, double* %A) {
 entry:
   br label %for.i
diff --git a/test/Analysis/Delinearization/multidim_only_ivs_3d_cast.ll b/test/Analysis/Delinearization/multidim_only_ivs_3d_cast.ll
index e151610..9e406d1 100644
--- a/test/Analysis/Delinearization/multidim_only_ivs_3d_cast.ll
+++ b/test/Analysis/Delinearization/multidim_only_ivs_3d_cast.ll
@@ -12,16 +12,6 @@
 ; CHECK: ArrayDecl[UnknownSize][(zext i32 %m to i64)][(zext i32 %o to i64)] with elements of 8 bytes.
 ; CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>]
 
-; AddRec: {{((8 * (zext i32 (-1 + %o) to i64)) + %A),+,(8 * (zext i32 %m to i64) * (zext i32 %o to i64))}<%for.i>,+,(8 * (zext i32 %o to i64))}<%for.j>
-; CHECK: Base offset: %A
-; CHECK: ArrayDecl[UnknownSize][((zext i32 %m to i64) * (zext i32 %o to i64))] with elements of 8 bytes.
-; CHECK: ArrayRef[{0,+,1}<%for.i>][{(zext i32 (-1 + %o) to i64),+,(zext i32 %o to i64)}<%for.j>]
-
-; AddRec: {((8 * (zext i32 (-1 + %o) to i64)) + (8 * (zext i32 (-1 + %m) to i64) * (zext i32 %o to i64)) + %A),+,(8 * (zext i32 %m to i64) * (zext i32 %o to i64))}<%for.i>
-; CHECK: Base offset: %A
-; CHECK: ArrayDecl[UnknownSize] with elements of 8 bytes.
-; CHECK: ArrayRef[{((zext i32 (-1 + %o) to i64) + ((zext i32 (-1 + %m) to i64) * (zext i32 %o to i64))),+,((zext i32 %m to i64) * (zext i32 %o to i64))}<%for.i>]
-
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
diff --git a/test/Analysis/Delinearization/multidim_two_accesses_different_delinearization.ll b/test/Analysis/Delinearization/multidim_two_accesses_different_delinearization.ll
new file mode 100644
index 0000000..6a98507
--- /dev/null
+++ b/test/Analysis/Delinearization/multidim_two_accesses_different_delinearization.ll
@@ -0,0 +1,43 @@
+; RUN: opt -basicaa -da -analyze -da-delinearize < %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Derived from the following code:
+;
+; void foo(long n, long m, double *A) {
+;   for (long i = 0; i < n; i++)
+;     for (long j = 0; j < m; j++)
+;       *(A + i * n + j) = 1.0;
+;       *(A + j * m + i) = 2.0;
+; }
+
+define void @foo(i64 %n, i64 %m, double* %A) {
+entry:
+  br label %for.i
+
+for.i:
+  %i = phi i64 [ 0, %entry ], [ %i.inc, %for.i.inc ]
+  br label %for.j
+
+for.j:
+  %j = phi i64 [ 0, %for.i ], [ %j.inc, %for.j ]
+  %tmp = mul nsw i64 %i, %m
+  %vlaarrayidx.sum = add i64 %j, %tmp
+  %arrayidx = getelementptr inbounds double* %A, i64 %vlaarrayidx.sum
+  store double 1.0, double* %arrayidx
+  %tmp1 = mul nsw i64 %j, %n
+  %vlaarrayidx.sum1 = add i64 %i, %tmp1
+  %arrayidx1 = getelementptr inbounds double* %A, i64 %vlaarrayidx.sum1
+  store double 1.0, double* %arrayidx1
+  %j.inc = add nsw i64 %j, 1
+  %j.exitcond = icmp eq i64 %j.inc, %m
+  br i1 %j.exitcond, label %for.i.inc, label %for.j
+
+for.i.inc:
+  %i.inc = add nsw i64 %i, 1
+  %i.exitcond = icmp eq i64 %i.inc, %n
+  br i1 %i.exitcond, label %end, label %for.i
+
+end:
+  ret void
+}
diff --git a/test/Analysis/Delinearization/undef.ll b/test/Analysis/Delinearization/undef.ll
new file mode 100644
index 0000000..8ee64e3
--- /dev/null
+++ b/test/Analysis/Delinearization/undef.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -analyze -delinearize
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @foo(double* %Ey) {
+entry:
+  br i1 undef, label %for.cond55.preheader, label %for.end324
+
+for.cond55.preheader:
+  %iz.069 = phi i64 [ %inc323, %for.inc322 ], [ 0, %entry ]
+  br i1 undef, label %for.cond58.preheader, label %for.inc322
+
+for.cond58.preheader:
+  %iy.067 = phi i64 [ %inc320, %for.end ], [ 0, %for.cond55.preheader ]
+  br i1 undef, label %for.body60, label %for.end
+
+for.body60:
+  %ix.062 = phi i64 [ %inc, %for.body60 ], [ 0, %for.cond58.preheader ]
+  %0 = mul i64 %iz.069, undef
+  %tmp5 = add i64 %iy.067, %0
+  %tmp6 = mul i64 %tmp5, undef
+  %arrayidx69.sum = add i64 undef, %tmp6
+  %arrayidx70 = getelementptr inbounds double* %Ey, i64 %arrayidx69.sum
+  %1 = load double* %arrayidx70, align 8
+  %inc = add nsw i64 %ix.062, 1
+  br i1 false, label %for.body60, label %for.end
+
+for.end:
+  %inc320 = add nsw i64 %iy.067, 1
+  br i1 undef, label %for.cond58.preheader, label %for.inc322
+
+for.inc322:
+  %inc323 = add nsw i64 %iz.069, 1
+  br i1 undef, label %for.cond55.preheader, label %for.end324
+
+for.end324:
+  ret void
+}
diff --git a/test/Analysis/DependenceAnalysis/Banerjee.ll b/test/Analysis/DependenceAnalysis/Banerjee.ll
index 5c17064..883a06d 100644
--- a/test/Analysis/DependenceAnalysis/Banerjee.ll
+++ b/test/Analysis/DependenceAnalysis/Banerjee.ll
@@ -24,7 +24,7 @@ entry:
 
 ; DELIN: 'Dependence Analysis' for function 'banerjee0':
 ; DELIN: da analyze - none!
-; DELIN: da analyze - consistent flow [0 1]!
+; DELIN: da analyze - flow [<= <>]!
 ; DELIN: da analyze - confused!
 ; DELIN: da analyze - none!
 ; DELIN: da analyze - confused!
@@ -83,10 +83,10 @@ entry:
 ; CHECK: da analyze - output [* *]!
 
 ; DELIN: 'Dependence Analysis' for function 'banerjee1':
-; DELIN: da analyze - none
-; DELIN: da analyze - consistent flow [0 1]!
+; DELIN: da analyze - output [* *]!
+; DELIN: da analyze - flow [* <>]!
 ; DELIN: da analyze - confused!
-; DELIN: da analyze - none
+; DELIN: da analyze - input [* *]!
 ; DELIN: da analyze - confused!
 ; DELIN: da analyze - output [* *]!
 
@@ -218,7 +218,7 @@ entry:
 
 ; DELIN: 'Dependence Analysis' for function 'banerjee3':
 ; DELIN: da analyze - none!
-; DELIN: da analyze - consistent flow [-9 -9]!
+; DELIN: da analyze - flow [> >]!
 ; DELIN: da analyze - confused!
 ; DELIN: da analyze - none!
 ; DELIN: da analyze - confused!
@@ -336,7 +336,7 @@ entry:
 
 ; DELIN: 'Dependence Analysis' for function 'banerjee5':
 ; DELIN: da analyze - none!
-; DELIN: da analyze - consistent flow [9 9]!
+; DELIN: da analyze - flow [< <]!
 ; DELIN: da analyze - confused!
 ; DELIN: da analyze - none!
 ; DELIN: da analyze - confused!
@@ -395,7 +395,7 @@ entry:
 
 ; DELIN: 'Dependence Analysis' for function 'banerjee6':
 ; DELIN: da analyze - none!
-; DELIN: da analyze - consistent flow [0 -9]!
+; DELIN: da analyze - flow [=> <>]!
 ; DELIN: da analyze - confused!
 ; DELIN: da analyze - none!
 ; DELIN: da analyze - confused!
@@ -454,7 +454,7 @@ entry:
 
 ; DELIN: 'Dependence Analysis' for function 'banerjee7':
 ; DELIN: da analyze - none!
-; DELIN: da analyze - consistent flow [-1 0]!
+; DELIN: da analyze - flow [> <=]!
 ; DELIN: da analyze - confused!
 ; DELIN: da analyze - none!
 ; DELIN: da analyze - confused!
@@ -513,7 +513,7 @@ entry:
 
 ; DELIN: 'Dependence Analysis' for function 'banerjee8':
 ; DELIN: da analyze - none!
-; DELIN: da analyze - consistent flow [-1 -1]!
+; DELIN: da analyze - flow [> <>]!
 ; DELIN: da analyze - confused!
 ; DELIN: da analyze - none!
 ; DELIN: da analyze - confused!
@@ -571,7 +571,7 @@ entry:
 ; CHECK: da analyze - none!
 
 ; DELIN: 'Dependence Analysis' for function 'banerjee9':
-; DELIN: da analyze - none!
+; DELIN: da analyze - output [* *]!
 ; DELIN: da analyze - flow [<= =|<]!
 ; DELIN: da analyze - confused!
 ; DELIN: da analyze - none!
@@ -750,7 +750,7 @@ entry:
 
 ; DELIN: 'Dependence Analysis' for function 'banerjee12':
 ; DELIN: da analyze - none!
-; DELIN: da analyze - consistent flow [0 -11]!
+; DELIN: da analyze - flow [= <>]!
 ; DELIN: da analyze - confused!
 ; DELIN: da analyze - none!
 ; DELIN: da analyze - confused!
diff --git a/test/Analysis/DependenceAnalysis/GCD.ll b/test/Analysis/DependenceAnalysis/GCD.ll
index 7efa8b5..7eca18e 100644
--- a/test/Analysis/DependenceAnalysis/GCD.ll
+++ b/test/Analysis/DependenceAnalysis/GCD.ll
@@ -24,10 +24,10 @@ entry:
 ; CHECK: da analyze - none!
 
 ; DELIN: 'Dependence Analysis' for function 'gcd0'
-; DELIN: da analyze - none!
+; DELIN: da analyze - output [* *]!
 ; DELIN: da analyze - flow [=> *|<]!
 ; DELIN: da analyze - confused!
-; DELIN: da analyze - none!
+; DELIN: da analyze - input [* *]!
 ; DELIN: da analyze - confused!
 ; DELIN: da analyze - none!
 
@@ -85,10 +85,10 @@ entry:
 ; CHECK: da analyze - none!
 
 ; DELIN: 'Dependence Analysis' for function 'gcd1'
-; DELIN: da analyze - none!
+; DELIN: da analyze - output [* *]!
 ; DELIN: da analyze - none!
 ; DELIN: da analyze - confused!
-; DELIN: da analyze - none!
+; DELIN: da analyze - input [* *]!
 ; DELIN: da analyze - confused!
 ; DELIN: da analyze - none!
 
@@ -147,10 +147,10 @@ entry:
 ; CHECK: da analyze - none!
 
 ; DELIN: 'Dependence Analysis' for function 'gcd2'
-; DELIN: da analyze - none!
+; DELIN: da analyze - output [* *]!
 ; DELIN: da analyze - none!
 ; DELIN: da analyze - confused!
-; DELIN: da analyze - none!
+; DELIN: da analyze - input [* *]!
 ; DELIN: da analyze - confused!
 ; DELIN: da analyze - none!
 
@@ -410,10 +410,10 @@ entry:
 ; CHECK: da analyze - output [* *]!
 
 ; DELIN: 'Dependence Analysis' for function 'gcd6'
-; DELIN: da analyze - none!
+; DELIN: da analyze - output [* *]!
 ; DELIN: da analyze - none!
 ; DELIN: da analyze - confused!
-; DELIN: da analyze - none!
+; DELIN: da analyze - input [* *]!
 ; DELIN: da analyze - confused!
 ; DELIN: da analyze - output [* *]!
 
diff --git a/test/Analysis/LazyCallGraph/basic.ll b/test/Analysis/LazyCallGraph/basic.ll
index ebadb75..b8108d9 100644
--- a/test/Analysis/LazyCallGraph/basic.ll
+++ b/test/Analysis/LazyCallGraph/basic.ll
@@ -124,3 +124,53 @@ define void @test2() {
   load i8** bitcast (void ()** @h to i8**)
   ret void
 }
+
+; Verify the SCCs formed.
+;
+; CHECK-LABEL: SCC with 1 functions:
+; CHECK-NEXT:    f7
+;
+; CHECK-LABEL: SCC with 1 functions:
+; CHECK-NEXT:    f6
+;
+; CHECK-LABEL: SCC with 1 functions:
+; CHECK-NEXT:    f5
+;
+; CHECK-LABEL: SCC with 1 functions:
+; CHECK-NEXT:    f4
+;
+; CHECK-LABEL: SCC with 1 functions:
+; CHECK-NEXT:    f3
+;
+; CHECK-LABEL: SCC with 1 functions:
+; CHECK-NEXT:    f2
+;
+; CHECK-LABEL: SCC with 1 functions:
+; CHECK-NEXT:    f1
+;
+; CHECK-LABEL: SCC with 1 functions:
+; CHECK-NEXT:    test2
+;
+; CHECK-LABEL: SCC with 1 functions:
+; CHECK-NEXT:    f12
+;
+; CHECK-LABEL: SCC with 1 functions:
+; CHECK-NEXT:    f11
+;
+; CHECK-LABEL: SCC with 1 functions:
+; CHECK-NEXT:    f10
+;
+; CHECK-LABEL: SCC with 1 functions:
+; CHECK-NEXT:    f9
+;
+; CHECK-LABEL: SCC with 1 functions:
+; CHECK-NEXT:    f8
+;
+; CHECK-LABEL: SCC with 1 functions:
+; CHECK-NEXT:    test1
+;
+; CHECK-LABEL: SCC with 1 functions:
+; CHECK-NEXT:    f
+;
+; CHECK-LABEL: SCC with 1 functions:
+; CHECK-NEXT:    test0
diff --git a/test/Analysis/ScalarEvolution/max-trip-count.ll b/test/Analysis/ScalarEvolution/max-trip-count.ll
index 0cdbdf5..31f06a4 100644
--- a/test/Analysis/ScalarEvolution/max-trip-count.ll
+++ b/test/Analysis/ScalarEvolution/max-trip-count.ll
@@ -98,3 +98,112 @@ for.end:                                          ; preds = %for.cond.for.end_cr
 ; CHECK: Determining loop execution counts for: @test
 ; CHECK-NEXT: backedge-taken count is
 ; CHECK-NEXT: max backedge-taken count is -1
+
+; PR19799: Indvars miscompile due to an incorrect max backedge taken count from SCEV.
+; CHECK-LABEL: @pr19799
+; CHECK: Loop %for.body.i: <multiple exits> Unpredictable backedge-taken count. 
+; CHECK: Loop %for.body.i: max backedge-taken count is 1
+@a = common global i32 0, align 4
+
+define i32 @pr19799() {
+entry:
+  store i32 -1, i32* @a, align 4
+  br label %for.body.i
+
+for.body.i:                                       ; preds = %for.cond.i, %entry
+  %storemerge1.i = phi i32 [ -1, %entry ], [ %add.i.i, %for.cond.i ]
+  %tobool.i = icmp eq i32 %storemerge1.i, 0
+  %add.i.i = add nsw i32 %storemerge1.i, 2
+  br i1 %tobool.i, label %bar.exit, label %for.cond.i
+
+for.cond.i:                                       ; preds = %for.body.i
+  store i32 %add.i.i, i32* @a, align 4
+  %cmp.i = icmp slt i32 %storemerge1.i, 0
+  br i1 %cmp.i, label %for.body.i, label %bar.exit
+
+bar.exit:                                         ; preds = %for.cond.i, %for.body.i
+  ret i32 0
+}
+
+; PR18886: Indvars miscompile due to an incorrect max backedge taken count from SCEV.
+; CHECK-LABEL: @pr18886
+; CHECK: Loop %for.body: <multiple exits> Unpredictable backedge-taken count. 
+; CHECK: Loop %for.body: max backedge-taken count is 3
+@aa = global i64 0, align 8
+
+define i32 @pr18886() {
+entry:
+  store i64 -21, i64* @aa, align 8
+  br label %for.body
+
+for.body:
+  %storemerge1 = phi i64 [ -21, %entry ], [ %add, %for.cond ]
+  %tobool = icmp eq i64 %storemerge1, 0
+  %add = add nsw i64 %storemerge1, 8
+  br i1 %tobool, label %return, label %for.cond
+
+for.cond:
+  store i64 %add, i64* @aa, align 8
+  %cmp = icmp slt i64 %add, 9
+  br i1 %cmp, label %for.body, label %return
+
+return:
+  %retval.0 = phi i32 [ 1, %for.body ], [ 0, %for.cond ]
+  ret i32 %retval.0
+}
+
+; Here we have a must-exit loop latch that is not computable and a
+; may-exit early exit that can only have one non-exiting iteration
+; before the check is forever skipped.
+;
+; CHECK-LABEL: @cannot_compute_mustexit
+; CHECK: Loop %for.body.i: <multiple exits> Unpredictable backedge-taken count. 
+; CHECK: Loop %for.body.i: Unpredictable max backedge-taken count. 
+@b = common global i32 0, align 4
+
+define i32 @cannot_compute_mustexit() {
+entry:
+  store i32 -1, i32* @a, align 4
+  br label %for.body.i
+
+for.body.i:                                       ; preds = %for.cond.i, %entry
+  %storemerge1.i = phi i32 [ -1, %entry ], [ %add.i.i, %for.cond.i ]
+  %tobool.i = icmp eq i32 %storemerge1.i, 0
+  %add.i.i = add nsw i32 %storemerge1.i, 2
+  br i1 %tobool.i, label %bar.exit, label %for.cond.i
+
+for.cond.i:                                       ; preds = %for.body.i
+  store i32 %add.i.i, i32* @a, align 4
+  %ld = load volatile i32* @b
+  %cmp.i = icmp ne i32 %ld, 0
+  br i1 %cmp.i, label %for.body.i, label %bar.exit
+
+bar.exit:                                         ; preds = %for.cond.i, %for.body.i
+  ret i32 0
+}
+
+; This loop has two must-exits, both of which dominate the latch. The
+; MaxBECount should be the minimum of them.
+;
+; CHECK-LABEL: @two_mustexit
+; CHECK: Loop %for.body.i: <multiple exits> Unpredictable backedge-taken count. 
+; CHECK: Loop %for.body.i: max backedge-taken count is 1
+define i32 @two_mustexit() {
+entry:
+  store i32 -1, i32* @a, align 4
+  br label %for.body.i
+
+for.body.i:                                       ; preds = %for.cond.i, %entry
+  %storemerge1.i = phi i32 [ -1, %entry ], [ %add.i.i, %for.cond.i ]
+  %tobool.i = icmp sgt i32 %storemerge1.i, 0
+  %add.i.i = add nsw i32 %storemerge1.i, 2
+  br i1 %tobool.i, label %bar.exit, label %for.cond.i
+
+for.cond.i:                                       ; preds = %for.body.i
+  store i32 %add.i.i, i32* @a, align 4
+  %cmp.i = icmp slt i32 %storemerge1.i, 3
+  br i1 %cmp.i, label %for.body.i, label %bar.exit
+
+bar.exit:                                         ; preds = %for.cond.i, %for.body.i
+  ret i32 0
+}
diff --git a/test/Assembler/2009-04-25-AliasGEP.ll b/test/Assembler/2009-04-25-AliasGEP.ll
deleted file mode 100644
index 6d07208..0000000
--- a/test/Assembler/2009-04-25-AliasGEP.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis
-; PR4066
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i386-apple-darwin9"
-	%struct.i2c_device_id = type { }
-@w83l785ts_id = internal constant [0 x %struct.i2c_device_id] zeroinitializer, align 1		; <[0 x %struct.i2c_device_id]*> [#uses=1]
-
-@__mod_i2c_device_table = alias getelementptr ([0 x %struct.i2c_device_id]* @w83l785ts_id, i32 0, i32 0)		; <%struct.i2c_device_id*> [#uses=0]
diff --git a/test/Assembler/addrspacecast-alias.ll b/test/Assembler/addrspacecast-alias.ll
index 6623a25..052a141 100644
--- a/test/Assembler/addrspacecast-alias.ll
+++ b/test/Assembler/addrspacecast-alias.ll
@@ -1,6 +1,7 @@
-; RUN: llvm-as -disable-output %s
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
 
 ; Test that global aliases are allowed to be constant addrspacecast
 
 @i = internal addrspace(1) global i8 42
-@ia = alias internal i8 addrspace(2)* addrspacecast (i8 addrspace(1)* @i to i8 addrspace(2)*)
+@ia = alias internal addrspace(2) i8 addrspace(3)*, i8 addrspace(1)* @i
+; CHECK: @ia = alias internal addrspace(2) i8 addrspace(3)*, i8 addrspace(1)* @i
diff --git a/test/Assembler/alias-addrspace.ll b/test/Assembler/alias-addrspace.ll
new file mode 100644
index 0000000..6d378e4
--- /dev/null
+++ b/test/Assembler/alias-addrspace.ll
@@ -0,0 +1,6 @@
+; RUN: not llvm-as %s 2>&1 | FileCheck %s
+
+@foo = global i32 42
+@bar = alias internal addrspace(1) i32* @foo
+
+CHECK: error: A type is required if addrspace is given
diff --git a/test/Assembler/alias-redefinition.ll b/test/Assembler/alias-redefinition.ll
new file mode 100644
index 0000000..19ad85b
--- /dev/null
+++ b/test/Assembler/alias-redefinition.ll
@@ -0,0 +1,7 @@
+; RUN: not llvm-as %s 2>&1 | FileCheck %s
+
+; CHECK: error: redefinition of global named '@bar'
+
+@foo = global i32 0
+@bar = alias i32* @foo
+@bar = alias i32* @foo
diff --git a/test/Assembler/alias-to-alias.ll b/test/Assembler/alias-to-alias.ll
new file mode 100644
index 0000000..1ea99bb
--- /dev/null
+++ b/test/Assembler/alias-to-alias.ll
@@ -0,0 +1,5 @@
+; RUN:  not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+; CHECK: Alias must point to function or variable
+
+@b1 = alias i32* @c1
+@c1 = alias i32* @b1
diff --git a/test/Assembler/alias-to-alias2.ll b/test/Assembler/alias-to-alias2.ll
new file mode 100644
index 0000000..a8a0196
--- /dev/null
+++ b/test/Assembler/alias-to-alias2.ll
@@ -0,0 +1,7 @@
+; RUN:  not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+; CHECK: error: Alias is pointed by alias b1
+
+@g = global i32 42
+
+@b1 = alias i32* @c1
+@c1 = alias i32* @g
diff --git a/test/Assembler/alias-type.ll b/test/Assembler/alias-type.ll
new file mode 100644
index 0000000..ead3e95
--- /dev/null
+++ b/test/Assembler/alias-type.ll
@@ -0,0 +1,6 @@
+; RUN: not llvm-as %s 2>&1 | FileCheck %s
+
+@foo = global i32 42
+@bar = alias i32 @foo
+
+CHECK: error: An alias must have pointer type
diff --git a/test/Assembler/half-constprop.ll b/test/Assembler/half-constprop.ll
index 03ccdda..9e24f72 100644
--- a/test/Assembler/half-constprop.ll
+++ b/test/Assembler/half-constprop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -O3 | llvm-dis | FileCheck %s
+; RUN: opt < %s -O3 -S | FileCheck %s
 ; Testing half constant propagation.
 
 define half @abc() nounwind {
diff --git a/test/Assembler/half-conv.ll b/test/Assembler/half-conv.ll
index bf9ae57..70a6b86 100644
--- a/test/Assembler/half-conv.ll
+++ b/test/Assembler/half-conv.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -O3 | llvm-dis | FileCheck %s
+; RUN: opt < %s -O3 -S | FileCheck %s
 ; Testing half to float conversion.
 
 define float @abc() nounwind {
diff --git a/test/Assembler/internal-hidden-alias.ll b/test/Assembler/internal-hidden-alias.ll
new file mode 100644
index 0000000..660514b
--- /dev/null
+++ b/test/Assembler/internal-hidden-alias.ll
@@ -0,0 +1,6 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+
+@global = global i32 0
+
+@alias = hidden alias internal i32* @global
+; CHECK: symbol with local linkage must have default visibility
diff --git a/test/Assembler/internal-hidden-function.ll b/test/Assembler/internal-hidden-function.ll
new file mode 100644
index 0000000..193ed7c
--- /dev/null
+++ b/test/Assembler/internal-hidden-function.ll
@@ -0,0 +1,7 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+
+define internal hidden void @function() {
+; CHECK: symbol with local linkage must have default visibility
+entry:
+  ret void
+}
diff --git a/test/Assembler/internal-hidden-variable.ll b/test/Assembler/internal-hidden-variable.ll
new file mode 100644
index 0000000..eddd067
--- /dev/null
+++ b/test/Assembler/internal-hidden-variable.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+
+@var = internal hidden global i32 0
+; CHECK: symbol with local linkage must have default visibility
diff --git a/test/Assembler/internal-protected-alias.ll b/test/Assembler/internal-protected-alias.ll
new file mode 100644
index 0000000..d785826
--- /dev/null
+++ b/test/Assembler/internal-protected-alias.ll
@@ -0,0 +1,6 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+
+@global = global i32 0
+
+@alias = protected alias internal i32* @global
+; CHECK: symbol with local linkage must have default visibility
diff --git a/test/Assembler/internal-protected-function.ll b/test/Assembler/internal-protected-function.ll
new file mode 100644
index 0000000..944cb75
--- /dev/null
+++ b/test/Assembler/internal-protected-function.ll
@@ -0,0 +1,7 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+
+define internal protected void @function() {
+; CHECK: symbol with local linkage must have default visibility
+entry:
+  ret void
+}
diff --git a/test/Assembler/internal-protected-variable.ll b/test/Assembler/internal-protected-variable.ll
new file mode 100644
index 0000000..df02275
--- /dev/null
+++ b/test/Assembler/internal-protected-variable.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+
+@var = internal protected global i32 0
+; CHECK: symbol with local linkage must have default visibility
diff --git a/test/Assembler/private-hidden-alias.ll b/test/Assembler/private-hidden-alias.ll
new file mode 100644
index 0000000..58be92a
--- /dev/null
+++ b/test/Assembler/private-hidden-alias.ll
@@ -0,0 +1,6 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+
+@global = global i32 0
+
+@alias = hidden alias private i32* @global
+; CHECK: symbol with local linkage must have default visibility
diff --git a/test/Assembler/private-hidden-function.ll b/test/Assembler/private-hidden-function.ll
new file mode 100644
index 0000000..dd73f04
--- /dev/null
+++ b/test/Assembler/private-hidden-function.ll
@@ -0,0 +1,7 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+
+define private hidden void @function() {
+; CHECK: symbol with local linkage must have default visibility
+entry:
+  ret void
+}
diff --git a/test/Assembler/private-hidden-variable.ll b/test/Assembler/private-hidden-variable.ll
new file mode 100644
index 0000000..ce6bfa9
--- /dev/null
+++ b/test/Assembler/private-hidden-variable.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+
+@var = private hidden global i32 0
+; CHECK: symbol with local linkage must have default visibility
diff --git a/test/Assembler/private-protected-alias.ll b/test/Assembler/private-protected-alias.ll
new file mode 100644
index 0000000..a72c248
--- /dev/null
+++ b/test/Assembler/private-protected-alias.ll
@@ -0,0 +1,6 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+
+@global = global i32 0
+
+@alias = protected alias private i32* @global
+; CHECK: symbol with local linkage must have default visibility
diff --git a/test/Assembler/private-protected-function.ll b/test/Assembler/private-protected-function.ll
new file mode 100644
index 0000000..5dbb420
--- /dev/null
+++ b/test/Assembler/private-protected-function.ll
@@ -0,0 +1,7 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+
+define private protected void @function() {
+; CHECK: symbol with local linkage must have default visibility
+entry:
+  ret void
+}
diff --git a/test/Assembler/private-protected-variable.ll b/test/Assembler/private-protected-variable.ll
new file mode 100644
index 0000000..c4458f5
--- /dev/null
+++ b/test/Assembler/private-protected-variable.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+
+@var = private protected global i32 0
+; CHECK: symbol with local linkage must have default visibility
diff --git a/test/Bitcode/attributes.ll b/test/Bitcode/attributes.ll
index 545f1cb..02e1bb1 100644
--- a/test/Bitcode/attributes.ll
+++ b/test/Bitcode/attributes.ll
@@ -218,6 +218,11 @@ define void @f36(i8* inalloca) {
         ret void
 }
 
+define nonnull i8* @f37(i8* nonnull %a) {
+; CHECK: define nonnull i8* @f37(i8* nonnull %a) {
+        ret i8* %a
+}
+
 ; CHECK: attributes #0 = { noreturn }
 ; CHECK: attributes #1 = { nounwind }
 ; CHECK: attributes #2 = { readnone }
diff --git a/test/Bitcode/deprecated-linker_private-linker_private_weak.ll b/test/Bitcode/deprecated-linker_private-linker_private_weak.ll
new file mode 100644
index 0000000..12a527c
--- /dev/null
+++ b/test/Bitcode/deprecated-linker_private-linker_private_weak.ll
@@ -0,0 +1,17 @@
+; RUN: llvm-as -o - %s | llvm-dis | FileCheck %s
+; RUN: llvm-as -o /dev/null %s 2>&1 | FileCheck %s -check-prefix CHECK-WARNINGS
+
+@.linker_private = linker_private unnamed_addr constant [15 x i8] c"linker_private\00", align 64
+@.linker_private_weak = linker_private_weak unnamed_addr constant [20 x i8] c"linker_private_weak\00", align 64
+
+; CHECK: @.linker_private = private unnamed_addr constant [15 x i8] c"linker_private\00", align 64
+; CHECK: @.linker_private_weak = private unnamed_addr constant [20 x i8] c"linker_private_weak\00", align 64
+
+; CHECK-WARNINGS: warning: '.linker_private' is deprecated, treating as PrivateLinkage
+; CHECK-WARNINGS: @.linker_private = linker_private unnamed_addr constant [15 x i8] c"linker_private\00", align 64
+; CHECK-WARNINGS:                    ^
+
+; CHECK-WARNINGS: warning: '.linker_private_weak' is deprecated, treating as PrivateLinkage
+; CHECK-WARNINGS: @.linker_private_weak = linker_private_weak unnamed_addr constant [20 x i8] c"linker_private_weak\00", align 64
+; CHECK-WARNINGS:                         ^
+
diff --git a/test/Bitcode/local-linkage-default-visibility.3.4.ll b/test/Bitcode/local-linkage-default-visibility.3.4.ll
new file mode 100644
index 0000000..45a7b12
--- /dev/null
+++ b/test/Bitcode/local-linkage-default-visibility.3.4.ll
@@ -0,0 +1,79 @@
+; RUN: llvm-dis < %s.bc | FileCheck %s
+
+; local-linkage-default-visibility.3.4.ll.bc was generated by passing this file
+; to llvm-as-3.4.  The test checks that LLVM upgrades visibility of symbols
+; with local linkage to default visibility.
+
+@default.internal.var = internal global i32 0
+; CHECK: @default.internal.var = internal global i32 0
+
+@hidden.internal.var = internal hidden global i32 0
+; CHECK: @hidden.internal.var = internal global i32 0
+
+@protected.internal.var = internal protected global i32 0
+; CHECK: @protected.internal.var = internal global i32 0
+
+@default.private.var = private global i32 0
+; CHECK: @default.private.var = private global i32 0
+
+@hidden.private.var = private hidden global i32 0
+; CHECK: @hidden.private.var = private global i32 0
+
+@protected.private.var = private protected global i32 0
+; CHECK: @protected.private.var = private global i32 0
+
+@global = global i32 0
+
+@default.internal.alias = alias internal i32* @global
+; CHECK: @default.internal.alias = alias internal i32* @global
+
+@hidden.internal.alias = hidden alias internal i32* @global
+; CHECK: @hidden.internal.alias = alias internal i32* @global
+
+@protected.internal.alias = protected alias internal i32* @global
+; CHECK: @protected.internal.alias = alias internal i32* @global
+
+@default.private.alias = alias private i32* @global
+; CHECK: @default.private.alias = alias private i32* @global
+
+@hidden.private.alias = hidden alias private i32* @global
+; CHECK: @hidden.private.alias = alias private i32* @global
+
+@protected.private.alias = protected alias private i32* @global
+; CHECK: @protected.private.alias = alias private i32* @global
+
+define internal void @default.internal() {
+; CHECK: define internal void @default.internal
+entry:
+  ret void
+}
+
+define internal hidden void @hidden.internal() {
+; CHECK: define internal void @hidden.internal
+entry:
+  ret void
+}
+
+define internal protected void @protected.internal() {
+; CHECK: define internal void @protected.internal
+entry:
+  ret void
+}
+
+define private void @default.private() {
+; CHECK: define private void @default.private
+entry:
+  ret void
+}
+
+define private hidden void @hidden.private() {
+; CHECK: define private void @hidden.private
+entry:
+  ret void
+}
+
+define private protected void @protected.private() {
+; CHECK: define private void @protected.private
+entry:
+  ret void
+}
diff --git a/test/Bitcode/local-linkage-default-visibility.3.4.ll.bc b/test/Bitcode/local-linkage-default-visibility.3.4.ll.bc
new file mode 100644
index 0000000..6e49f7e
--- /dev/null
+++ b/test/Bitcode/local-linkage-default-visibility.3.4.ll.bc
diff --git a/test/Bitcode/old-aliases.ll b/test/Bitcode/old-aliases.ll
new file mode 100644
index 0000000..4ef47c0
--- /dev/null
+++ b/test/Bitcode/old-aliases.ll
@@ -0,0 +1,22 @@
+; RUN: llvm-dis < %s.bc | FileCheck %s
+
+; old-aliases.bc consist of this file assembled with an old llvm-as (3.5 trunk)
+; from when aliases contained a ConstantExpr.
+
+@v1 = global i32 0
+; CHECK: @v1 = global i32 0
+
+@v2 = global [1 x i32] zeroinitializer
+; CHECK: @v2 = global [1 x i32] zeroinitializer
+
+@v3 = alias bitcast (i32* @v1 to i16*)
+; CHECK: @v3 = alias i16, i32* @v1
+
+@v4 = alias getelementptr ([1 x i32]* @v2, i32 0, i32 0)
+; CHECK: @v4 = alias i32, [1 x i32]* @v2
+
+@v5 = alias i32 addrspace(2)* addrspacecast (i32 addrspace(0)* @v1 to i32 addrspace(2)*)
+; CHECK: @v5 = alias addrspace(2) i32, i32* @v1
+
+@v6 = alias i16* @v3
+; CHECK: @v6 = alias i16, i32* @v1
diff --git a/test/Bitcode/old-aliases.ll.bc b/test/Bitcode/old-aliases.ll.bc
new file mode 100644
index 0000000..1f157b2
--- /dev/null
+++ b/test/Bitcode/old-aliases.ll.bc
diff --git a/test/Bitcode/tailcall.ll b/test/Bitcode/tailcall.ll
new file mode 100644
index 0000000..765b470
--- /dev/null
+++ b/test/Bitcode/tailcall.ll
@@ -0,0 +1,17 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+; Check that musttail and tail roundtrip.
+
+declare cc8191 void @t1_callee()
+define cc8191 void @t1() {
+; CHECK: tail call cc8191 void @t1_callee()
+  tail call cc8191 void @t1_callee()
+  ret void
+}
+
+declare cc8191 void @t2_callee()
+define cc8191 void @t2() {
+; CHECK: musttail call cc8191 void @t2_callee()
+  musttail call cc8191 void @t2_callee()
+  ret void
+}
diff --git a/test/Bitcode/upgrade-global-ctors.ll b/test/Bitcode/upgrade-global-ctors.ll
new file mode 100644
index 0000000..bd253a8
--- /dev/null
+++ b/test/Bitcode/upgrade-global-ctors.ll
@@ -0,0 +1,3 @@
+; RUN:  llvm-dis < %s.bc| FileCheck %s
+
+; CHECK: @llvm.global_ctors = appending global [0 x { i32, void ()*, i8* }] zeroinitializer
diff --git a/test/Bitcode/upgrade-global-ctors.ll.bc b/test/Bitcode/upgrade-global-ctors.ll.bc
new file mode 100644
index 0000000..927fd91
--- /dev/null
+++ b/test/Bitcode/upgrade-global-ctors.ll.bc
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 827cd76..3e08a16 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -37,6 +37,7 @@ set(LLVM_TEST_DEPENDS
           llvm-mc
           llvm-mcmarkup
           llvm-nm
+          llvm-size
           llvm-objdump
           llvm-profdata
           llvm-readobj
diff --git a/test/CodeGen/AArch64/128bit_load_store.ll b/test/CodeGen/AArch64/128bit_load_store.ll
index 502fd70..a6f0776 100644
--- a/test/CodeGen/AArch64/128bit_load_store.ll
+++ b/test/CodeGen/AArch64/128bit_load_store.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=neon | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=neon | FileCheck %s --check-prefix=CHECK
 
 define void @test_store_f128(fp128* %ptr, fp128 %val) #0 {
-; CHECK: test_store_f128
+; CHECK-LABEL: test_store_f128
 ; CHECK: str	 {{q[0-9]+}}, [{{x[0-9]+}}]
 entry:
   store fp128 %val, fp128* %ptr, align 16
@@ -9,7 +9,7 @@ entry:
 }
 
 define fp128 @test_load_f128(fp128* readonly %ptr) #2 {
-; CHECK: test_load_f128
+; CHECK-LABEL: test_load_f128
 ; CHECK: ldr	 {{q[0-9]+}}, [{{x[0-9]+}}]
 entry:
   %0 = load fp128* %ptr, align 16
@@ -17,9 +17,9 @@ entry:
 }
 
 define void @test_vstrq_p128(i128* %ptr, i128 %val) #0 {
-; CHECK: test_vstrq_p128
-; CHECK: str	{{x[0-9]+}}, [{{x[0-9]+}}, #8]
-; CHECK-NEXT: str	 {{x[0-9]+}}, [{{x[0-9]+}}]
+; CHECK-LABEL: test_vstrq_p128
+; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [{{x[0-9]+}}]
+
 entry:
   %0 = bitcast i128* %ptr to fp128*
   %1 = bitcast i128 %val to fp128
@@ -28,9 +28,9 @@ entry:
 }
 
 define i128 @test_vldrq_p128(i128* readonly %ptr) #2 {
-; CHECK: test_vldrq_p128
-; CHECK: ldr	{{x[0-9]+}}, [{{x[0-9]+}}]
-; CHECK-NEXT: ldr	{{x[0-9]+}}, [{{x[0-9]+}}, #8]
+; CHECK-LABEL: test_vldrq_p128
+; CHECK: ldp {{x[0-9]+}}, {{x[0-9]+}}, [{{x[0-9]+}}]
+
 entry:
   %0 = bitcast i128* %ptr to fp128*
   %1 = load fp128* %0, align 16
@@ -39,7 +39,7 @@ entry:
 }
 
 define void @test_ld_st_p128(i128* nocapture %ptr) #0 {
-; CHECK: test_ld_st_p128
+; CHECK-LABEL: test_ld_st_p128
 ; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}]
 ; CHECK-NEXT: str	{{q[0-9]+}}, [{{x[0-9]+}}, #16]
 entry:
diff --git a/test/CodeGen/AArch64/aarch64-neon-v1i1-setcc.ll b/test/CodeGen/AArch64/aarch64-neon-v1i1-setcc.ll
new file mode 100644
index 0000000..c932253
--- /dev/null
+++ b/test/CodeGen/AArch64/aarch64-neon-v1i1-setcc.ll
@@ -0,0 +1,69 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
+; arm64 has a separate copy as aarch64-neon-v1i1-setcc.ll
+
+; This file test the DAG node like "v1i1 SETCC v1i64, v1i64". As the v1i1 type
+; is illegal in AArch64 backend, the legalizer tries to scalarize this node.
+; As the v1i64 operands of SETCC are legal types, they will not be scalarized.
+; Currently the type legalizer will have an assertion failure as it assumes all
+; operands of SETCC have been legalized.
+; FIXME: If the algorithm of type scalarization is improved and can legaize
+; "v1i1 SETCC" correctly, these test cases are not needed.
+
+define i64 @test_sext_extr_cmp_0(<1 x i64> %v1, <1 x i64> %v2) {
+; CHECK-LABEL: test_sext_extr_cmp_0:
+; CHECK: cmp {{x[0-9]+}}, {{x[0-9]+}}
+  %1 = icmp sge <1 x i64> %v1, %v2
+  %2 = extractelement <1 x i1> %1, i32 0
+  %vget_lane = sext i1 %2 to i64
+  ret i64 %vget_lane
+}
+
+define i64 @test_sext_extr_cmp_1(<1 x double> %v1, <1 x double> %v2) {
+; CHECK-LABEL: test_sext_extr_cmp_1:
+; CHECK: fcmp {{d[0-9]+}}, {{d[0-9]+}}
+  %1 = fcmp oeq <1 x double> %v1, %v2
+  %2 = extractelement <1 x i1> %1, i32 0
+  %vget_lane = sext i1 %2 to i64
+  ret i64 %vget_lane
+}
+
+define <1 x i64> @test_select_v1i1_0(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) {
+; CHECK-LABEL: test_select_v1i1_0:
+; CHECK: cmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+; CHECK: bic v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+  %1 = icmp eq <1 x i64> %v1, %v2
+  %res = select <1 x i1> %1, <1 x i64> zeroinitializer, <1 x i64> %v3
+  ret <1 x i64> %res
+}
+
+define <1 x i64> @test_select_v1i1_1(<1 x double> %v1, <1 x double> %v2, <1 x i64> %v3) {
+; CHECK-LABEL: test_select_v1i1_1:
+; CHECK: fcmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+; CHECK: bic v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+  %1 = fcmp oeq <1 x double> %v1, %v2
+  %res = select <1 x i1> %1, <1 x i64> zeroinitializer, <1 x i64> %v3
+  ret <1 x i64> %res
+}
+
+define <1 x double> @test_select_v1i1_2(<1 x i64> %v1, <1 x i64> %v2, <1 x double> %v3) {
+; CHECK-LABEL: test_select_v1i1_2:
+; CHECK: cmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+; CHECK: bic v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+  %1 = icmp eq <1 x i64> %v1, %v2
+  %res = select <1 x i1> %1, <1 x double> zeroinitializer, <1 x double> %v3
+  ret <1 x double> %res
+}
+
+define i32 @test_br_extr_cmp(<1 x i64> %v1, <1 x i64> %v2) {
+; CHECK-LABEL: test_br_extr_cmp:
+; CHECK: cmp x{{[0-9]+}}, x{{[0-9]+}}
+  %1 = icmp eq <1 x i64> %v1, %v2
+  %2 = extractelement <1 x i1> %1, i32 0
+  br i1 %2, label %if.end, label %if.then
+
+if.then:
+  ret i32 0;
+
+if.end:
+  ret i32 1;
+}
diff --git a/test/CodeGen/AArch64/adc.ll b/test/CodeGen/AArch64/adc.ll
index 29637d3..892573b 100644
--- a/test/CodeGen/AArch64/adc.ll
+++ b/test/CodeGen/AArch64/adc.ll
@@ -1,5 +1,5 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-LE %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-apple-ios7.0 | FileCheck --check-prefix=CHECK --check-prefix=CHECK-LE %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=arm64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s
 
 define i128 @test_simple(i128 %a, i128 %b, i128 %c) {
 ; CHECK-LABEL: test_simple:
diff --git a/test/CodeGen/AArch64/addsub-shifted.ll b/test/CodeGen/AArch64/addsub-shifted.ll
index 269c1e8..0a93edd 100644
--- a/test/CodeGen/AArch64/addsub-shifted.ll
+++ b/test/CodeGen/AArch64/addsub-shifted.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs %s -o - -mtriple=arm64-apple-ios7.0 | FileCheck %s
 
 @var32 = global i32 0
 @var64 = global i64 0
@@ -35,7 +35,7 @@ define void @test_lsl_arith(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) {
   %shift4a = shl i32 %lhs4a, 15
   %val4a = sub i32 0, %shift4a
   store volatile i32 %val4a, i32* @var32
-; CHECK: sub {{w[0-9]+}}, wzr, {{w[0-9]+}}, lsl #15
+; CHECK: neg {{w[0-9]+}}, {{w[0-9]+}}, lsl #15
 
   %rhs5 = load volatile i64* @var64
   %shift5 = shl i64 %rhs5, 18
@@ -66,7 +66,7 @@ define void @test_lsl_arith(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) {
   %shift8a = shl i64 %lhs8a, 60
   %val8a = sub i64 0, %shift8a
   store volatile i64 %val8a, i64* @var64
-; CHECK: sub {{x[0-9]+}}, xzr, {{x[0-9]+}}, lsl #60
+; CHECK: neg {{x[0-9]+}}, {{x[0-9]+}}, lsl #60
 
   ret void
 ; CHECK: ret
@@ -99,7 +99,7 @@ define void @test_lsr_arith(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) {
   %shift4a = lshr i32 %lhs32, 15
   %val4a = sub i32 0, %shift4a
   store volatile i32 %val4a, i32* @var32
-; CHECK: sub {{w[0-9]+}}, wzr, {{w[0-9]+}}, lsr #15
+; CHECK: neg {{w[0-9]+}}, {{w[0-9]+}}, lsr #15
 
   %shift5 = lshr i64 %rhs64, 18
   %val5 = add i64 %lhs64, %shift5
@@ -125,7 +125,7 @@ define void @test_lsr_arith(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) {
   %shift8a = lshr i64 %lhs64, 45
   %val8a = sub i64 0, %shift8a
   store volatile i64 %val8a, i64* @var64
-; CHECK: sub {{x[0-9]+}}, xzr, {{x[0-9]+}}, lsr #45
+; CHECK: neg {{x[0-9]+}}, {{x[0-9]+}}, lsr #45
 
   ret void
 ; CHECK: ret
@@ -158,7 +158,7 @@ define void @test_asr_arith(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) {
   %shift4a = ashr i32 %lhs32, 15
   %val4a = sub i32 0, %shift4a
   store volatile i32 %val4a, i32* @var32
-; CHECK: sub {{w[0-9]+}}, wzr, {{w[0-9]+}}, asr #15
+; CHECK: neg {{w[0-9]+}}, {{w[0-9]+}}, asr #15
 
   %shift5 = ashr i64 %rhs64, 18
   %val5 = add i64 %lhs64, %shift5
@@ -184,7 +184,7 @@ define void @test_asr_arith(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) {
   %shift8a = ashr i64 %lhs64, 45
   %val8a = sub i64 0, %shift8a
   store volatile i64 %val8a, i64* @var64
-; CHECK: sub {{x[0-9]+}}, xzr, {{x[0-9]+}}, asr #45
+; CHECK: neg {{x[0-9]+}}, {{x[0-9]+}}, asr #45
 
   ret void
 ; CHECK: ret
@@ -245,7 +245,7 @@ define i32 @test_cmn(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) {
   br i1 %tst1, label %t2, label %end
   ; Important that this isn't lowered to a cmn instruction because if %rhs32 ==
   ; 0 then the results will differ.
-; CHECK: sub [[RHS:w[0-9]+]], wzr, {{w[0-9]+}}, lsl #13
+; CHECK: neg [[RHS:w[0-9]+]], {{w[0-9]+}}, lsl #13
 ; CHECK: cmp {{w[0-9]+}}, [[RHS]]
 
 t2:
@@ -268,7 +268,7 @@ t4:
   %tst4 = icmp slt i64 %lhs64, %val4
   br i1 %tst4, label %t5, label %end
   ; Again, it's important that cmn isn't used here in case %rhs64 == 0.
-; CHECK: sub [[RHS:x[0-9]+]], xzr, {{x[0-9]+}}, lsl #43
+; CHECK: neg [[RHS:x[0-9]+]], {{x[0-9]+}}, lsl #43
 ; CHECK: cmp {{x[0-9]+}}, [[RHS]]
 
 t5:
diff --git a/test/CodeGen/AArch64/addsub.ll b/test/CodeGen/AArch64/addsub.ll
index 4d46d04..b85fdbb 100644
--- a/test/CodeGen/AArch64/addsub.ll
+++ b/test/CodeGen/AArch64/addsub.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-linux-gnu | FileCheck %s
 
 ; Note that this should be refactored (for efficiency if nothing else)
 ; when the PCS is implemented so we don't have to worry about the
@@ -28,12 +28,12 @@ define void @add_small() {
 define void @add_med() {
 ; CHECK-LABEL: add_med:
 
-; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, #3567, lsl #12
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{#3567, lsl #12|#14610432}}
   %val32 = load i32* @var_i32
   %newval32 = add i32 %val32, 14610432 ; =0xdef000
   store i32 %newval32, i32* @var_i32
 
-; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #4095, lsl #12
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{#4095, lsl #12|#16773120}}
   %val64 = load i64* @var_i64
   %newval64 = add i64 %val64, 16773120 ; =0xfff000
   store i64 %newval64, i64* @var_i64
@@ -62,12 +62,12 @@ define void @sub_small() {
 define void @sub_med() {
 ; CHECK-LABEL: sub_med:
 
-; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, #3567, lsl #12
+; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{#3567, lsl #12|#14610432}}
   %val32 = load i32* @var_i32
   %newval32 = sub i32 %val32, 14610432 ; =0xdef000
   store i32 %newval32, i32* @var_i32
 
-; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, #4095, lsl #12
+; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{#4095, lsl #12|#16773120}}
   %val64 = load i64* @var_i64
   %newval64 = sub i64 %val64, 16773120 ; =0xfff000
   store i64 %newval64, i64* @var_i64
@@ -80,13 +80,13 @@ define void @testing() {
   %val = load i32* @var_i32
 
 ; CHECK: cmp {{w[0-9]+}}, #4095
-; CHECK: b.ne .LBB4_6
+; CHECK: b.ne [[RET:.?LBB[0-9]+_[0-9]+]]
   %cmp_pos_small = icmp ne i32 %val, 4095
   br i1 %cmp_pos_small, label %ret, label %test2
 
 test2:
-; CHECK: cmp {{w[0-9]+}}, #3567, lsl #12
-; CHECK: b.lo .LBB4_6
+; CHECK: cmp {{w[0-9]+}}, {{#3567, lsl #12|#14610432}}
+; CHECK: b.lo [[RET]]
   %newval2 = add i32 %val, 1
   store i32 %newval2, i32* @var_i32
   %cmp_pos_big = icmp ult i32 %val, 14610432
@@ -94,7 +94,7 @@ test2:
 
 test3:
 ; CHECK: cmp {{w[0-9]+}}, #123
-; CHECK: b.lt .LBB4_6
+; CHECK: b.lt [[RET]]
   %newval3 = add i32 %val, 2
   store i32 %newval3, i32* @var_i32
   %cmp_pos_slt = icmp slt i32 %val, 123
@@ -102,7 +102,7 @@ test3:
 
 test4:
 ; CHECK: cmp {{w[0-9]+}}, #321
-; CHECK: b.gt .LBB4_6
+; CHECK: b.gt [[RET]]
   %newval4 = add i32 %val, 3
   store i32 %newval4, i32* @var_i32
   %cmp_pos_sgt = icmp sgt i32 %val, 321
@@ -110,7 +110,7 @@ test4:
 
 test5:
 ; CHECK: cmn {{w[0-9]+}}, #444
-; CHECK: b.gt .LBB4_6
+; CHECK: b.gt [[RET]]
   %newval5 = add i32 %val, 4
   store i32 %newval5, i32* @var_i32
   %cmp_neg_uge = icmp sgt i32 %val, -444
diff --git a/test/CodeGen/AArch64/addsub_ext.ll b/test/CodeGen/AArch64/addsub_ext.ll
index f0e11c6..a2266b1 100644
--- a/test/CodeGen/AArch64/addsub_ext.ll
+++ b/test/CodeGen/AArch64/addsub_ext.ll
@@ -1,11 +1,11 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs %s -o - -mtriple=aarch64-linux-gnu | FileCheck %s
 
 @var8 = global i8 0
 @var16 = global i16 0
 @var32 = global i32 0
 @var64 = global i64 0
 
-define void @addsub_i8rhs() {
+define void @addsub_i8rhs() minsize {
 ; CHECK-LABEL: addsub_i8rhs:
     %val8_tmp = load i8* @var8
     %lhs32 = load i32* @var32
@@ -80,7 +80,7 @@ end:
     ret void
 }
 
-define void @addsub_i16rhs() {
+define void @addsub_i16rhs() minsize {
 ; CHECK-LABEL: addsub_i16rhs:
     %val16_tmp = load i16* @var16
     %lhs32 = load i32* @var32
@@ -158,7 +158,7 @@ end:
 ; N.b. we could probably check more here ("add w2, w3, w1, uxtw" for
 ; example), but the remaining instructions are probably not idiomatic
 ; in the face of "add/sub (shifted register)" so I don't intend to.
-define void @addsub_i32rhs() {
+define void @addsub_i32rhs() minsize {
 ; CHECK-LABEL: addsub_i32rhs:
     %val32_tmp = load i32* @var32
     %lhs64 = load i64* @var64
diff --git a/test/CodeGen/AArch64/alloca.ll b/test/CodeGen/AArch64/alloca.ll
index 1d3c0a0..f93efbc 100644
--- a/test/CodeGen/AArch64/alloca.ll
+++ b/test/CodeGen/AArch64/alloca.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-NOFP %s
+; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=CHECK
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-NOFP-ARM64 %s
 
 declare void @use_addr(i8*)
 
@@ -8,23 +8,22 @@ define void @test_simple_alloca(i64 %n) {
 
   %buf = alloca i8, i64 %n
   ; Make sure we align the stack change to 16 bytes:
-; CHECK-DAG: add [[SPDELTA:x[0-9]+]], x0, #15
-; CHECK-DAG: and x0, [[SPDELTA]], #0xfffffffffffffff0
+; CHECK: {{mov|add}} x29
+; CHECK: mov [[TMP:x[0-9]+]], sp
+; CHECK: add [[SPDELTA_TMP:x[0-9]+]], x0, #15
+; CHECK: and [[SPDELTA:x[0-9]+]], [[SPDELTA_TMP]], #0xfffffffffffffff0
 
   ; Make sure we change SP. It would be surprising if anything but x0 were used
   ; for the final sp, but it could be if it was then moved into x0.
-; CHECK-DAG: mov [[TMP:x[0-9]+]], sp
-; CHECK-DAG: sub x0, [[TMP]], [[SPDELTA]]
-; CHECK: mov sp, x0
+; CHECK: sub [[NEWSP:x[0-9]+]], [[TMP]], [[SPDELTA]]
+; CHECK: mov sp, [[NEWSP]]
 
   call void @use_addr(i8* %buf)
 ; CHECK: bl use_addr
 
   ret void
   ; Make sure epilogue restores sp from fp
-; CHECK: sub sp, x29, #16
-; CHECK: ldp x29, x30, [sp, #16]
-; CHECK: add sp, sp, #32
+; CHECK: {{sub|mov}} sp, x29
 ; CHECK: ret
 }
 
@@ -32,57 +31,70 @@ declare void @use_addr_loc(i8*, i64*)
 
 define i64 @test_alloca_with_local(i64 %n) {
 ; CHECK-LABEL: test_alloca_with_local:
-; CHECK: sub sp, sp, #32
-; CHECK: stp x29, x30, [sp, #16]
+; CHECK-DAG: sub sp, sp, [[LOCAL_STACK:#[0-9]+]]
+; CHECK-DAG: {{mov|add}} x29, sp
 
   %loc = alloca i64
   %buf = alloca i8, i64 %n
   ; Make sure we align the stack change to 16 bytes:
-; CHECK-DAG: add [[SPDELTA:x[0-9]+]], x0, #15
-; CHECK-DAG: and x0, [[SPDELTA]], #0xfffffffffffffff0
+; CHECK: mov [[TMP:x[0-9]+]], sp
+; CHECK: add [[SPDELTA_TMP:x[0-9]+]], x0, #15
+; CHECK: and [[SPDELTA:x[0-9]+]], [[SPDELTA_TMP]], #0xfffffffffffffff0
 
   ; Make sure we change SP. It would be surprising if anything but x0 were used
   ; for the final sp, but it could be if it was then moved into x0.
-; CHECK-DAG: mov [[TMP:x[0-9]+]], sp
-; CHECK-DAG: sub x0, [[TMP]], [[SPDELTA]]
-; CHECK: mov sp, x0
+; CHECK: sub [[NEWSP:x[0-9]+]], [[TMP]], [[SPDELTA]]
+; CHECK: mov sp, [[NEWSP]]
 
-  ; Obviously suboptimal code here, but it to get &local in x1
-; CHECK: sub [[TMP:x[0-9]+]], x29, [[LOC_FROM_FP:#[0-9]+]]
-; CHECK: add x1, [[TMP]], #0
+; CHECK: sub {{x[0-9]+}}, x29, #[[LOC_FROM_FP:[0-9]+]]
 
   call void @use_addr_loc(i8* %buf, i64* %loc)
 ; CHECK: bl use_addr
 
   %val = load i64* %loc
-; CHECK: sub x[[TMP:[0-9]+]], x29, [[LOC_FROM_FP]]
-; CHECK: ldr x0, [x[[TMP]]]
+
+; CHECK: ldur x0, [x29, #-[[LOC_FROM_FP]]]
 
   ret i64 %val
   ; Make sure epilogue restores sp from fp
-; CHECK: sub sp, x29, #16
-; CHECK: ldp x29, x30, [sp, #16]
-; CHECK: add sp, sp, #32
+; CHECK: {{sub|mov}} sp, x29
 ; CHECK: ret
 }
 
 define void @test_variadic_alloca(i64 %n, ...) {
-; CHECK: test_variadic_alloca:
-
-; CHECK: sub     sp, sp, #208
-; CHECK: stp     x29, x30, [sp, #192]
-; CHECK: add     x29, sp, #192
-; CHECK: sub     [[TMP:x[0-9]+]], x29, #192
-; CHECK: add     x8, [[TMP]], #0
-; CHECK-FP: str     q7, [x8, #112]
+; CHECK-LABEL: test_variadic_alloca:
+
 ; [...]
-; CHECK-FP: str     q1, [x8, #16]
 
-; CHECK-NOFP: sub     sp, sp, #80
-; CHECK-NOFP: stp     x29, x30, [sp, #64]
-; CHECK-NOFP: add     x29, sp, #64
-; CHECK-NOFP: sub     [[TMP:x[0-9]+]], x29, #64
-; CHECK-NOFP: add     x8, [[TMP]], #0
+
+; CHECK-NOFP-AARCH64: sub     sp, sp, #80
+; CHECK-NOFP-AARCH64: stp     x29, x30, [sp, #64]
+; CHECK-NOFP-AARCH64: add     x29, sp, #64
+; CHECK-NOFP-AARCH64: sub     [[TMP:x[0-9]+]], x29, #64
+; CHECK-NOFP-AARCH64: add     x8, [[TMP]], #0
+
+
+; CHECK: stp     x29, x30, [sp, #-16]!
+; CHECK: mov     x29, sp
+; CHECK: sub     sp, sp, #192
+; CHECK: stp     q6, q7, [x29, #-96]
+; [...]
+; CHECK: stp     q0, q1, [x29, #-192]
+
+; CHECK: stp     x6, x7, [x29, #-16]
+; [...]
+; CHECK: stp     x2, x3, [x29, #-48]
+
+; CHECK-NOFP-ARM64: stp     x29, x30, [sp, #-16]!
+; CHECK-NOFP-ARM64: mov     x29, sp
+; CHECK-NOFP-ARM64: sub     sp, sp, #64
+; CHECK-NOFP-ARM64: stp     x6, x7, [x29, #-16]
+; [...]
+; CHECK-NOFP-ARM64: stp     x4, x5, [x29, #-32]
+; [...]
+; CHECK-NOFP-ARM64: stp     x2, x3, [x29, #-48]
+; [...]
+; CHECK-NOFP-ARM64: mov     x8, sp
 
   %addr = alloca i8, i64 %n
 
@@ -90,23 +102,24 @@ define void @test_variadic_alloca(i64 %n, ...) {
 ; CHECK: bl use_addr
 
   ret void
-; CHECK: sub sp, x29, #192
-; CHECK: ldp x29, x30, [sp, #192]
-; CHECK: add sp, sp, #208
 
-; CHECK-NOFP: sub sp, x29, #64
-; CHECK-NOFP: ldp x29, x30, [sp, #64]
-; CHECK-NOFP: add sp, sp, #80
+; CHECK-NOFP-AARCH64: sub sp, x29, #64
+; CHECK-NOFP-AARCH64: ldp x29, x30, [sp, #64]
+; CHECK-NOFP-AARCH64: add sp, sp, #80
+
+; CHECK-NOFP-ARM64: mov sp, x29
+; CHECK-NOFP-ARM64: ldp x29, x30, [sp], #16
 }
 
 define void @test_alloca_large_frame(i64 %n) {
 ; CHECK-LABEL: test_alloca_large_frame:
 
-; CHECK: sub sp, sp, #496
-; CHECK: stp x29, x30, [sp, #480]
-; CHECK: add x29, sp, #480
-; CHECK: sub sp, sp, #48
-; CHECK: sub sp, sp, #1953, lsl #12
+
+; CHECK: stp     x20, x19, [sp, #-32]!
+; CHECK: stp     x29, x30, [sp, #16]
+; CHECK: add     x29, sp, #16
+; CHECK: sub     sp, sp, #1953, lsl #12
+; CHECK: sub     sp, sp, #512
 
   %addr1 = alloca i8, i64 %n
   %addr2 = alloca i64, i64 1000000
@@ -114,9 +127,10 @@ define void @test_alloca_large_frame(i64 %n) {
   call void @use_addr_loc(i8* %addr1, i64* %addr2)
 
   ret void
-; CHECK: sub sp, x29, #480
-; CHECK: ldp x29, x30, [sp, #480]
-; CHECK: add sp, sp, #496
+
+; CHECK: sub     sp, x29, #16
+; CHECK: ldp     x29, x30, [sp, #16]
+; CHECK: ldp     x20, x19, [sp], #32
 }
 
 declare i8* @llvm.stacksave()
@@ -124,7 +138,6 @@ declare void @llvm.stackrestore(i8*)
 
 define void @test_scoped_alloca(i64 %n) {
 ; CHECK-LABEL: test_scoped_alloca:
-; CHECK: sub sp, sp, #32
 
   %sp = call i8* @llvm.stacksave()
 ; CHECK: mov [[SAVED_SP:x[0-9]+]], sp
diff --git a/test/CodeGen/AArch64/analyze-branch.ll b/test/CodeGen/AArch64/analyze-branch.ll
index 36bc2e0..6616b27 100644
--- a/test/CodeGen/AArch64/analyze-branch.ll
+++ b/test/CodeGen/AArch64/analyze-branch.ll
@@ -168,7 +168,7 @@ define void @test_TBZ_fallthrough_nottaken(i64 %in) nounwind {
   %tst = icmp eq i64 %bit, 0
   br i1 %tst, label %true, label %false, !prof !1
 
-; CHECK: tbz {{x[0-9]+}}, #15, [[TRUE:.LBB[0-9]+_[0-9]+]]
+; CHECK: tbz {{[wx][0-9]+}}, #15, [[TRUE:.LBB[0-9]+_[0-9]+]]
 ; CHECK-NEXT: // BB#
 ; CHECK-NEXT: bl test_false
 
@@ -213,7 +213,7 @@ define void @test_TBNZ_fallthrough_nottaken(i64 %in) nounwind {
   %tst = icmp ne i64 %bit, 0
   br i1 %tst, label %true, label %false, !prof !1
 
-; CHECK: tbnz {{x[0-9]+}}, #15, [[TRUE:.LBB[0-9]+_[0-9]+]]
+; CHECK: tbnz {{[wx][0-9]+}}, #15, [[TRUE:.LBB[0-9]+_[0-9]+]]
 ; CHECK-NEXT: // BB#
 ; CHECK-NEXT: bl test_false
 
diff --git a/test/CodeGen/ARM64/2011-03-09-CPSRSpill.ll b/test/CodeGen/AArch64/arm64-2011-03-09-CPSRSpill.ll
index 6fb7c3f..6fb7c3f 100644
--- a/test/CodeGen/ARM64/2011-03-09-CPSRSpill.ll
+++ b/test/CodeGen/AArch64/arm64-2011-03-09-CPSRSpill.ll
diff --git a/test/CodeGen/ARM64/2011-03-17-AsmPrinterCrash.ll b/test/CodeGen/AArch64/arm64-2011-03-17-AsmPrinterCrash.ll
index 2b083d8..2b083d8 100644
--- a/test/CodeGen/ARM64/2011-03-17-AsmPrinterCrash.ll
+++ b/test/CodeGen/AArch64/arm64-2011-03-17-AsmPrinterCrash.ll
diff --git a/test/CodeGen/ARM64/2011-03-21-Unaligned-Frame-Index.ll b/test/CodeGen/AArch64/arm64-2011-03-21-Unaligned-Frame-Index.ll
index 6f0ec34..6f0ec34 100644
--- a/test/CodeGen/ARM64/2011-03-21-Unaligned-Frame-Index.ll
+++ b/test/CodeGen/AArch64/arm64-2011-03-21-Unaligned-Frame-Index.ll
diff --git a/test/CodeGen/ARM64/2011-04-21-CPSRBug.ll b/test/CodeGen/AArch64/arm64-2011-04-21-CPSRBug.ll
index 88232fc..88232fc 100644
--- a/test/CodeGen/ARM64/2011-04-21-CPSRBug.ll
+++ b/test/CodeGen/AArch64/arm64-2011-04-21-CPSRBug.ll
diff --git a/test/CodeGen/AArch64/arm64-2011-10-18-LdStOptBug.ll b/test/CodeGen/AArch64/arm64-2011-10-18-LdStOptBug.ll
new file mode 100644
index 0000000..8f99bc3
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-2011-10-18-LdStOptBug.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -mtriple=arm64-apple-ios | FileCheck %s
+
+; Can't fold the increment by 1<<12 into a post-increment load
+; rdar://10301335
+
+@test_data = common global i32 0, align 4
+
+define void @t() nounwind ssp {
+; CHECK-LABEL: t:
+entry:
+  br label %for.body
+
+for.body:
+; CHECK: for.body
+; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}]
+; CHECK: add x[[REG:[0-9]+]],
+; CHECK:                      x[[REG]], #1, lsl  #12
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %0 = shl nsw i64 %indvars.iv, 12
+  %add = add nsw i64 %0, 34628173824
+  %1 = inttoptr i64 %add to i32*
+  %2 = load volatile i32* %1, align 4096
+  store volatile i32 %2, i32* @test_data, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 200
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
diff --git a/test/CodeGen/ARM64/2012-01-11-ComparisonDAGCrash.ll b/test/CodeGen/AArch64/arm64-2012-01-11-ComparisonDAGCrash.ll
index d47dbb2..d47dbb2 100644
--- a/test/CodeGen/ARM64/2012-01-11-ComparisonDAGCrash.ll
+++ b/test/CodeGen/AArch64/arm64-2012-01-11-ComparisonDAGCrash.ll
diff --git a/test/CodeGen/ARM64/2012-05-07-DAGCombineVectorExtract.ll b/test/CodeGen/AArch64/arm64-2012-05-07-DAGCombineVectorExtract.ll
index a4d37e4..a4d37e4 100644
--- a/test/CodeGen/ARM64/2012-05-07-DAGCombineVectorExtract.ll
+++ b/test/CodeGen/AArch64/arm64-2012-05-07-DAGCombineVectorExtract.ll
diff --git a/test/CodeGen/ARM64/2012-05-07-MemcpyAlignBug.ll b/test/CodeGen/AArch64/arm64-2012-05-07-MemcpyAlignBug.ll
index d59b0d0..d59b0d0 100644
--- a/test/CodeGen/ARM64/2012-05-07-MemcpyAlignBug.ll
+++ b/test/CodeGen/AArch64/arm64-2012-05-07-MemcpyAlignBug.ll
diff --git a/test/CodeGen/ARM64/2012-05-09-LOADgot-bug.ll b/test/CodeGen/AArch64/arm64-2012-05-09-LOADgot-bug.ll
index d1840d3..d1840d3 100644
--- a/test/CodeGen/ARM64/2012-05-09-LOADgot-bug.ll
+++ b/test/CodeGen/AArch64/arm64-2012-05-09-LOADgot-bug.ll
diff --git a/test/CodeGen/ARM64/2012-05-22-LdStOptBug.ll b/test/CodeGen/AArch64/arm64-2012-05-22-LdStOptBug.ll
index 4b037db..4b037db 100644
--- a/test/CodeGen/ARM64/2012-05-22-LdStOptBug.ll
+++ b/test/CodeGen/AArch64/arm64-2012-05-22-LdStOptBug.ll
diff --git a/test/CodeGen/AArch64/arm64-2012-06-06-FPToUI.ll b/test/CodeGen/AArch64/arm64-2012-06-06-FPToUI.ll
new file mode 100644
index 0000000..168e921
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-2012-06-06-FPToUI.ll
@@ -0,0 +1,67 @@
+; RUN: llc -march=arm64 -O0 < %s | FileCheck %s
+; RUN: llc -march=arm64 -O3 < %s | FileCheck %s
+
+@.str = private unnamed_addr constant [9 x i8] c"%lf %lu\0A\00", align 1
+@.str1 = private unnamed_addr constant [8 x i8] c"%lf %u\0A\00", align 1
+@.str2 = private unnamed_addr constant [8 x i8] c"%f %lu\0A\00", align 1
+@.str3 = private unnamed_addr constant [7 x i8] c"%f %u\0A\00", align 1
+
+define void @testDouble(double %d) ssp {
+; CHECK-LABEL: testDouble:
+; CHECK:  fcvtzu x{{[0-9]+}}, d{{[0-9]+}}
+; CHECK:  fcvtzu w{{[0-9]+}}, d{{[0-9]+}}
+entry:
+  %d.addr = alloca double, align 8
+  store double %d, double* %d.addr, align 8
+  %0 = load double* %d.addr, align 8
+  %1 = load double* %d.addr, align 8
+  %conv = fptoui double %1 to i64
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0), double %0, i64 %conv)
+  %2 = load double* %d.addr, align 8
+  %3 = load double* %d.addr, align 8
+  %conv1 = fptoui double %3 to i32
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str1, i32 0, i32 0), double %2, i32 %conv1)
+  ret void
+}
+
+declare i32 @printf(i8*, ...)
+
+define void @testFloat(float %f) ssp {
+; CHECK-LABEL: testFloat:
+; CHECK:  fcvtzu x{{[0-9]+}}, s{{[0-9]+}}
+; CHECK:  fcvtzu w{{[0-9]+}}, s{{[0-9]+}}
+entry:
+  %f.addr = alloca float, align 4
+  store float %f, float* %f.addr, align 4
+  %0 = load float* %f.addr, align 4
+  %conv = fpext float %0 to double
+  %1 = load float* %f.addr, align 4
+  %conv1 = fptoui float %1 to i64
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str2, i32 0, i32 0), double %conv, i64 %conv1)
+  %2 = load float* %f.addr, align 4
+  %conv2 = fpext float %2 to double
+  %3 = load float* %f.addr, align 4
+  %conv3 = fptoui float %3 to i32
+  %call4 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8]* @.str3, i32 0, i32 0), double %conv2, i32 %conv3)
+  ret void
+}
+
+define i32 @main(i32 %argc, i8** %argv) ssp {
+entry:
+  %retval = alloca i32, align 4
+  %argc.addr = alloca i32, align 4
+  %argv.addr = alloca i8**, align 8
+  store i32 0, i32* %retval
+  store i32 %argc, i32* %argc.addr, align 4
+  store i8** %argv, i8*** %argv.addr, align 8
+  call void @testDouble(double 1.159198e+01)
+  call void @testFloat(float 0x40272F1800000000)
+  ret i32 0
+}
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+
+!0 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
+!1 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
+!2 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
+!3 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0}
diff --git a/test/CodeGen/ARM64/2012-07-11-InstrEmitterBug.ll b/test/CodeGen/AArch64/arm64-2012-07-11-InstrEmitterBug.ll
index 55ecfb5..55ecfb5 100644
--- a/test/CodeGen/ARM64/2012-07-11-InstrEmitterBug.ll
+++ b/test/CodeGen/AArch64/arm64-2012-07-11-InstrEmitterBug.ll
diff --git a/test/CodeGen/AArch64/arm64-2013-01-13-ffast-fcmp.ll b/test/CodeGen/AArch64/arm64-2013-01-13-ffast-fcmp.ll
new file mode 100644
index 0000000..e2c43d9
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-2013-01-13-ffast-fcmp.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -fp-contract=fast | FileCheck %s --check-prefix=FAST
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128"
+target triple = "arm64-apple-ios7.0.0"
+
+;FAST-LABEL: _Z9example25v:
+;FAST: fcmgt.4s
+;FAST: ret
+
+;CHECK-LABEL: _Z9example25v:
+;CHECK: fcmgt.4s
+;CHECK: ret
+
+define <4 x i32> @_Z9example25v( <4 x float> %N0,  <4 x float> %N1) {
+  %A = fcmp olt <4 x float> %N0, %N1
+  %B = zext <4 x i1> %A to <4 x i32>
+  ret <4 x i32> %B
+}
diff --git a/test/CodeGen/ARM64/2013-01-23-frem-crash.ll b/test/CodeGen/AArch64/arm64-2013-01-23-frem-crash.ll
index 9451124..9451124 100644
--- a/test/CodeGen/ARM64/2013-01-23-frem-crash.ll
+++ b/test/CodeGen/AArch64/arm64-2013-01-23-frem-crash.ll
diff --git a/test/CodeGen/ARM64/2013-01-23-sext-crash.ll b/test/CodeGen/AArch64/arm64-2013-01-23-sext-crash.ll
index 404027b..404027b 100644
--- a/test/CodeGen/ARM64/2013-01-23-sext-crash.ll
+++ b/test/CodeGen/AArch64/arm64-2013-01-23-sext-crash.ll
diff --git a/test/CodeGen/AArch64/arm64-2013-02-12-shufv8i8.ll b/test/CodeGen/AArch64/arm64-2013-02-12-shufv8i8.ll
new file mode 100644
index 0000000..a350ba1
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-2013-02-12-shufv8i8.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple
+
+;CHECK-LABEL: Shuff:
+;CHECK: tbl.8b
+;CHECK: ret
+define <8 x i8 > @Shuff(<8 x i8> %in, <8 x i8>* %out) nounwind ssp {
+  %value = shufflevector <8 x i8> %in, <8 x i8> zeroinitializer, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i8> %value
+}
+
+
diff --git a/test/CodeGen/AArch64/arm64-2014-04-16-AnInfiniteLoopInDAGCombine.ll b/test/CodeGen/AArch64/arm64-2014-04-16-AnInfiniteLoopInDAGCombine.ll
new file mode 100644
index 0000000..a73b707
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-2014-04-16-AnInfiniteLoopInDAGCombine.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=arm64
+
+; This test case tests an infinite loop bug in DAG combiner.
+; It just tries to do the following replacing endlessly:
+; (1)  Replacing.3 0x2c509f0: v4i32 = any_extend 0x2c4cd08 [ORD=4]
+;      With: 0x2c4d128: v4i32 = sign_extend 0x2c4cd08 [ORD=4]
+;
+; (2)  Replacing.2 0x2c4d128: v4i32 = sign_extend 0x2c4cd08 [ORD=4]
+;      With: 0x2c509f0: v4i32 = any_extend 0x2c4cd08 [ORD=4]
+; As we think the (2) optimization from SIGN_EXTEND to ANY_EXTEND is
+; an optimization to replace unused bits with undefined bits, we remove
+; the (1) optimization (It doesn't make sense to replace undefined bits
+; with signed bits).
+
+define <4 x i32> @infiniteLoop(<4 x i32> %in0, <4 x i16> %in1) {
+entry:
+  %cmp.i = icmp sge <4 x i16> %in1, <i16 32767, i16 32767, i16 -1, i16 -32768>
+  %sext.i = sext <4 x i1> %cmp.i to <4 x i32>
+  %mul.i = mul <4 x i32> %in0, %sext.i
+  %sext = shl <4 x i32> %mul.i, <i32 16, i32 16, i32 16, i32 16>
+  %vmovl.i.i = ashr <4 x i32> %sext, <i32 16, i32 16, i32 16, i32 16>
+  ret <4 x i32> %vmovl.i.i
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/arm64-2014-04-28-sqshl-uqshl-i64Contant.ll b/test/CodeGen/AArch64/arm64-2014-04-28-sqshl-uqshl-i64Contant.ll
new file mode 100644
index 0000000..3949b85
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-2014-04-28-sqshl-uqshl-i64Contant.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -verify-machineinstrs -march=arm64 | FileCheck %s
+
+; Check if sqshl/uqshl with constant shift amout can be selected. 
+define i64 @test_vqshld_s64_i(i64 %a) {
+; CHECK-LABEL: test_vqshld_s64_i:
+; CHECK: sqshl {{d[0-9]+}}, {{d[0-9]+}}, #36
+  %1 = tail call i64 @llvm.aarch64.neon.sqshl.i64(i64 %a, i64 36)
+  ret i64 %1
+}
+
+define i64 @test_vqshld_u64_i(i64 %a) {
+; CHECK-LABEL: test_vqshld_u64_i:
+; CHECK: uqshl {{d[0-9]+}}, {{d[0-9]+}}, #36
+  %1 = tail call i64 @llvm.aarch64.neon.uqshl.i64(i64 %a, i64 36)
+  ret i64 %1
+}
+
+declare i64 @llvm.aarch64.neon.uqshl.i64(i64, i64)
+declare i64 @llvm.aarch64.neon.sqshl.i64(i64, i64)
diff --git a/test/CodeGen/AArch64/arm64-2014-04-29-EXT-undef-mask.ll b/test/CodeGen/AArch64/arm64-2014-04-29-EXT-undef-mask.ll
new file mode 100644
index 0000000..1b2d543
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-2014-04-29-EXT-undef-mask.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -O0 -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+; The following 2 test cases test shufflevector with beginning UNDEF mask.
+define <8 x i16> @test_vext_undef_traverse(<8 x i16> %in) {
+;CHECK-LABEL: test_vext_undef_traverse:
+;CHECK: {{ext.16b.*v0, #4}}
+  %vext = shufflevector <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 0, i16 0>, <8 x i16> %in, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 6, i32 7, i32 8, i32 9>
+  ret <8 x i16> %vext
+}
+
+define <8 x i16> @test_vext_undef_traverse2(<8 x i16> %in) {
+;CHECK-LABEL: test_vext_undef_traverse2:
+;CHECK: {{ext.16b.*v0, #6}}
+  %vext = shufflevector <8 x i16> %in, <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2>
+  ret <8 x i16> %vext
+}
+
+define <8 x i8> @test_vext_undef_traverse3(<8 x i8> %in) {
+;CHECK-LABEL: test_vext_undef_traverse3:
+;CHECK: {{ext.8b.*v0, #6}}
+  %vext = shufflevector <8 x i8> %in, <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 3, i32 4, i32 5>
+  ret <8 x i8> %vext
+}
diff --git a/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll b/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll
new file mode 100644
index 0000000..c4597d5
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll
@@ -0,0 +1,67 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -aarch64-simd-scalar=true -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=generic -aarch64-simd-scalar=true -asm-verbose=false | FileCheck %s -check-prefix=GENERIC
+
+define <2 x i64> @bar(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
+; CHECK-LABEL: bar:
+; CHECK: add.2d	v[[REG:[0-9]+]], v0, v1
+; CHECK: add	d[[REG3:[0-9]+]], d[[REG]], d1
+; CHECK: sub	d[[REG2:[0-9]+]], d[[REG]], d1
+; GENERIC-LABEL: bar:
+; GENERIC: add	v[[REG:[0-9]+]].2d, v0.2d, v1.2d
+; GENERIC: add	d[[REG3:[0-9]+]], d[[REG]], d1
+; GENERIC: sub	d[[REG2:[0-9]+]], d[[REG]], d1
+  %add = add <2 x i64> %a, %b
+  %vgetq_lane = extractelement <2 x i64> %add, i32 0
+  %vgetq_lane2 = extractelement <2 x i64> %b, i32 0
+  %add3 = add i64 %vgetq_lane, %vgetq_lane2
+  %sub = sub i64 %vgetq_lane, %vgetq_lane2
+  %vecinit = insertelement <2 x i64> undef, i64 %add3, i32 0
+  %vecinit8 = insertelement <2 x i64> %vecinit, i64 %sub, i32 1
+  ret <2 x i64> %vecinit8
+}
+
+define double @subdd_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
+; CHECK-LABEL: subdd_su64:
+; CHECK: sub d0, d1, d0
+; CHECK-NEXT: ret
+; GENERIC-LABEL: subdd_su64:
+; GENERIC: sub d0, d1, d0
+; GENERIC-NEXT: ret
+  %vecext = extractelement <2 x i64> %a, i32 0
+  %vecext1 = extractelement <2 x i64> %b, i32 0
+  %sub.i = sub nsw i64 %vecext1, %vecext
+  %retval = bitcast i64 %sub.i to double
+  ret double %retval
+}
+
+define double @vaddd_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
+; CHECK-LABEL: vaddd_su64:
+; CHECK: add d0, d1, d0
+; CHECK-NEXT: ret
+; GENERIC-LABEL: vaddd_su64:
+; GENERIC: add d0, d1, d0
+; GENERIC-NEXT: ret
+  %vecext = extractelement <2 x i64> %a, i32 0
+  %vecext1 = extractelement <2 x i64> %b, i32 0
+  %add.i = add nsw i64 %vecext1, %vecext
+  %retval = bitcast i64 %add.i to double
+  ret double %retval
+}
+
+; sub MI doesn't access dsub register.
+define double @add_sub_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
+; CHECK-LABEL: add_sub_su64:
+; CHECK: add d0, d1, d0
+; CHECK: sub d0, {{d[0-9]+}}, d0
+; CHECK-NEXT: ret
+; GENERIC-LABEL: add_sub_su64:
+; GENERIC: add d0, d1, d0
+; GENERIC: sub d0, {{d[0-9]+}}, d0
+; GENERIC-NEXT: ret
+  %vecext = extractelement <2 x i64> %a, i32 0
+  %vecext1 = extractelement <2 x i64> %b, i32 0
+  %add.i = add i64 %vecext1, %vecext
+  %sub.i = sub i64 0, %add.i
+  %retval = bitcast i64 %sub.i to double
+  ret double %retval
+}
diff --git a/test/CodeGen/AArch64/arm64-aapcs.ll b/test/CodeGen/AArch64/arm64-aapcs.ll
new file mode 100644
index 0000000..b713f0d
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-aapcs.ll
@@ -0,0 +1,103 @@
+; RUN: llc -mtriple=arm64-linux-gnu -enable-misched=false < %s | FileCheck %s
+
+@var = global i32 0, align 4
+
+define i128 @test_i128_align(i32, i128 %arg, i32 %after) {
+  store i32 %after, i32* @var, align 4
+; CHECK: str w4, [{{x[0-9]+}}, :lo12:var]
+
+  ret i128 %arg
+; CHECK: mov x0, x2
+; CHECK: mov x1, x3
+}
+
+@var64 = global i64 0, align 8
+
+  ; Check stack slots are 64-bit at all times.
+define void @test_stack_slots([8 x i32], i1 %bool, i8 %char, i16 %short,
+                                i32 %int, i64 %long) {
+  ; Part of last store. Blasted scheduler.
+; CHECK: ldr [[LONG:x[0-9]+]], [sp, #32]
+
+  %ext_bool = zext i1 %bool to i64
+  store volatile i64 %ext_bool, i64* @var64, align 8
+; CHECK: ldrb w[[EXT:[0-9]+]], [sp]
+; CHECK: and x[[EXTED:[0-9]+]], x[[EXT]], #0x1
+; CHECK: str x[[EXTED]], [{{x[0-9]+}}, :lo12:var64]
+
+  %ext_char = zext i8 %char to i64
+  store volatile i64 %ext_char, i64* @var64, align 8
+; CHECK: ldrb w[[EXT:[0-9]+]], [sp, #8]
+; CHECK: str x[[EXT]], [{{x[0-9]+}}, :lo12:var64]
+
+  %ext_short = zext i16 %short to i64
+  store volatile i64 %ext_short, i64* @var64, align 8
+; CHECK: ldrh w[[EXT:[0-9]+]], [sp, #16]
+; CHECK: str x[[EXT]], [{{x[0-9]+}}, :lo12:var64]
+
+  %ext_int = zext i32 %int to i64
+  store volatile i64 %ext_int, i64* @var64, align 8
+; CHECK: ldr{{b?}} w[[EXT:[0-9]+]], [sp, #24]
+; CHECK: str x[[EXT]], [{{x[0-9]+}}, :lo12:var64]
+
+  store volatile i64 %long, i64* @var64, align 8
+; CHECK: str [[LONG]], [{{x[0-9]+}}, :lo12:var64]
+
+  ret void
+}
+
+; Make sure the callee does extensions (in the absence of zext/sext
+; keyword on args) while we're here.
+
+define void @test_extension(i1 %bool, i8 %char, i16 %short, i32 %int) {
+  %ext_bool = zext i1 %bool to i64
+  store volatile i64 %ext_bool, i64* @var64
+; CHECK: and [[EXT:x[0-9]+]], x0, #0x1
+; CHECK: str [[EXT]], [{{x[0-9]+}}, :lo12:var64]
+
+  %ext_char = sext i8 %char to i64
+  store volatile i64 %ext_char, i64* @var64
+; CHECK: sxtb [[EXT:x[0-9]+]], w1
+; CHECK: str [[EXT]], [{{x[0-9]+}}, :lo12:var64]
+
+  %ext_short = zext i16 %short to i64
+  store volatile i64 %ext_short, i64* @var64
+; CHECK: and [[EXT:x[0-9]+]], x2, #0xffff
+; CHECK: str [[EXT]], [{{x[0-9]+}}, :lo12:var64]
+
+  %ext_int = zext i32 %int to i64
+  store volatile i64 %ext_int, i64* @var64
+; CHECK: ubfx [[EXT:x[0-9]+]], x3, #0, #32
+; CHECK: str [[EXT]], [{{x[0-9]+}}, :lo12:var64]
+
+  ret void
+}
+
+declare void @variadic(i32 %a, ...)
+
+  ; Under AAPCS variadic functions have the same calling convention as
+  ; others. The extra arguments should go in registers rather than on the stack.
+define void @test_variadic() {
+  call void(i32, ...)* @variadic(i32 0, i64 1, double 2.0)
+; CHECK: fmov d0, #2.0
+; CHECK: orr w1, wzr, #0x1
+; CHECK: bl variadic
+  ret void
+}
+
+; We weren't marking x7 as used after deciding that the i128 didn't fit into
+; registers and putting the first half on the stack, so the *second* half went
+; into x7. Yuck!
+define i128 @test_i128_shadow([7 x i64] %x0_x6, i128 %sp) {
+; CHECK-LABEL: test_i128_shadow:
+; CHECK: ldp x0, x1, [sp]
+
+  ret i128 %sp
+}
+
+; This test is to check if fp128 can be correctly handled on stack.
+define fp128 @test_fp128([8 x float] %arg0, fp128 %arg1) {
+; CHECK-LABEL: test_fp128:
+; CHECK: ldr {{q[0-9]+}}, [sp]
+  ret fp128 %arg1
+}
diff --git a/test/CodeGen/ARM64/abi-varargs.ll b/test/CodeGen/AArch64/arm64-abi-varargs.ll
index 92db392..92db392 100644
--- a/test/CodeGen/ARM64/abi-varargs.ll
+++ b/test/CodeGen/AArch64/arm64-abi-varargs.ll
diff --git a/test/CodeGen/AArch64/arm64-abi.ll b/test/CodeGen/AArch64/arm64-abi.ll
new file mode 100644
index 0000000..e2de434
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-abi.ll
@@ -0,0 +1,238 @@
+; RUN: llc < %s -march=arm64 -mcpu=cyclone -enable-misched=false | FileCheck %s
+; RUN: llc < %s -O0 | FileCheck -check-prefix=FAST %s
+target triple = "arm64-apple-darwin"
+
+; rdar://9932559
+define i64 @i8i16callee(i64 %a1, i64 %a2, i64 %a3, i8 signext %a4, i16 signext %a5, i64 %a6, i64 %a7, i64 %a8, i8 signext %b1, i16 signext %b2, i8 signext %b3, i8 signext %b4) nounwind readnone noinline {
+entry:
+; CHECK-LABEL: i8i16callee:
+; The 8th, 9th, 10th and 11th arguments are passed at sp, sp+2, sp+4, sp+5.
+; They are i8, i16, i8 and i8.
+; CHECK: ldrsb	{{w[0-9]+}}, [sp, #5]
+; CHECK: ldrsh	{{w[0-9]+}}, [sp, #2]
+; CHECK: ldrsb	{{w[0-9]+}}, [sp]
+; CHECK: ldrsb	{{w[0-9]+}}, [sp, #4]
+; FAST-LABEL: i8i16callee:
+; FAST: ldrb  {{w[0-9]+}}, [sp, #5]
+; FAST: ldrb  {{w[0-9]+}}, [sp, #4]
+; FAST: ldrh  {{w[0-9]+}}, [sp, #2]
+; FAST: ldrb  {{w[0-9]+}}, [sp]
+  %conv = sext i8 %a4 to i64
+  %conv3 = sext i16 %a5 to i64
+  %conv8 = sext i8 %b1 to i64
+  %conv9 = sext i16 %b2 to i64
+  %conv11 = sext i8 %b3 to i64
+  %conv13 = sext i8 %b4 to i64
+  %add10 = add i64 %a2, %a1
+  %add12 = add i64 %add10, %a3
+  %add14 = add i64 %add12, %conv
+  %add = add i64 %add14, %conv3
+  %add1 = add i64 %add, %a6
+  %add2 = add i64 %add1, %a7
+  %add4 = add i64 %add2, %a8
+  %add5 = add i64 %add4, %conv8
+  %add6 = add i64 %add5, %conv9
+  %add7 = add i64 %add6, %conv11
+  %add15 = add i64 %add7, %conv13
+  %sext = shl i64 %add15, 32
+  %conv17 = ashr exact i64 %sext, 32
+  ret i64 %conv17
+}
+
+define i32 @i8i16caller() nounwind readnone {
+entry:
+; CHECK: i8i16caller
+; The 8th, 9th, 10th and 11th arguments are passed at sp, sp+2, sp+4, sp+5.
+; They are i8, i16, i8 and i8.
+; CHECK: strb {{w[0-9]+}}, [sp, #5]
+; CHECK: strb {{w[0-9]+}}, [sp, #4]
+; CHECK: strh {{w[0-9]+}}, [sp, #2]
+; CHECK: strb {{w[0-9]+}}, [sp]
+; CHECK: bl
+; FAST: i8i16caller
+; FAST: strb {{w[0-9]+}}, [sp]
+; FAST: strh {{w[0-9]+}}, [sp, #2]
+; FAST: strb {{w[0-9]+}}, [sp, #4]
+; FAST: strb {{w[0-9]+}}, [sp, #5]
+; FAST: bl
+  %call = tail call i64 @i8i16callee(i64 0, i64 1, i64 2, i8 signext 3, i16 signext 4, i64 5, i64 6, i64 7, i8 signext 97, i16 signext 98, i8 signext 99, i8 signext 100)
+  %conv = trunc i64 %call to i32
+  ret i32 %conv
+}
+
+; rdar://12651543
+define double @circle_center([2 x float] %a) nounwind ssp {
+  %call = tail call double @ext([2 x float] %a) nounwind
+; CHECK: circle_center
+; CHECK: bl
+  ret double %call
+}
+declare double @ext([2 x float])
+
+; rdar://12656141
+; 16-byte vector should be aligned at 16-byte when passing on stack.
+; A double argument will be passed on stack, so vecotr should be at sp+16.
+define double @fixed_4i(<4 x i32>* nocapture %in) nounwind {
+entry:
+; CHECK: fixed_4i
+; CHECK: str [[REG_1:q[0-9]+]], [sp, #16]
+; FAST: fixed_4i
+; FAST: sub sp, sp, #64
+; FAST: mov x[[ADDR:[0-9]+]], sp
+; FAST: str [[REG_1:q[0-9]+]], [x[[ADDR]], #16]
+  %0 = load <4 x i32>* %in, align 16
+  %call = tail call double @args_vec_4i(double 3.000000e+00, <4 x i32> %0, <4 x i32> %0, <4 x i32> %0, <4 x i32> %0, <4 x i32> %0, <4 x i32> %0, <4 x i32> %0, double 3.000000e+00, <4 x i32> %0, i8 signext 3)
+  ret double %call
+}
+declare double @args_vec_4i(double, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, double, <4 x i32>, i8 signext)
+
+; rdar://12695237
+; d8 at sp, i in register w0.
+@g_d = common global double 0.000000e+00, align 8
+define void @test1(float %f1, double %d1, double %d2, double %d3, double %d4,
+       double %d5, double %d6, double %d7, double %d8, i32 %i) nounwind ssp {
+entry:
+; CHECK: test1
+; CHECK: ldr [[REG_1:d[0-9]+]], [sp]
+; CHECK: scvtf [[REG_2:s[0-9]+]], w0
+; CHECK: fadd s0, [[REG_2]], s0
+  %conv = sitofp i32 %i to float
+  %add = fadd float %conv, %f1
+  %conv1 = fpext float %add to double
+  %add2 = fadd double %conv1, %d7
+  %add3 = fadd double %add2, %d8
+  store double %add3, double* @g_d, align 8
+  ret void
+}
+
+; i9 at sp, d1 in register s0.
+define void @test2(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
+            i32 %i7, i32 %i8, i32 %i9, float %d1) nounwind ssp {
+entry:
+; CHECK: test2
+; CHECK: scvtf [[REG_2:s[0-9]+]], w0
+; CHECK: fadd s0, [[REG_2]], s0
+; CHECK: ldr [[REG_1:s[0-9]+]], [sp]
+  %conv = sitofp i32 %i1 to float
+  %add = fadd float %conv, %d1
+  %conv1 = fpext float %add to double
+  %conv2 = sitofp i32 %i8 to double
+  %add3 = fadd double %conv2, %conv1
+  %conv4 = sitofp i32 %i9 to double
+  %add5 = fadd double %conv4, %add3
+  store double %add5, double* @g_d, align 8
+  ret void
+}
+
+; rdar://12648441
+; Check alignment on stack for v64, f64, i64, f32, i32.
+define double @test3(<2 x i32>* nocapture %in) nounwind {
+entry:
+; CHECK: test3
+; CHECK: str [[REG_1:d[0-9]+]], [sp, #8]
+; FAST: test3
+; FAST: sub sp, sp, #32
+; FAST: mov x[[ADDR:[0-9]+]], sp
+; FAST: str [[REG_1:d[0-9]+]], [x[[ADDR]], #8]
+  %0 = load <2 x i32>* %in, align 8
+  %call = tail call double @args_vec_2i(double 3.000000e+00, <2 x i32> %0,
+          <2 x i32> %0, <2 x i32> %0, <2 x i32> %0, <2 x i32> %0, <2 x i32> %0,
+          <2 x i32> %0, float 3.000000e+00, <2 x i32> %0, i8 signext 3)
+  ret double %call
+}
+declare double @args_vec_2i(double, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>,
+               <2 x i32>, <2 x i32>, <2 x i32>, float, <2 x i32>, i8 signext)
+
+define double @test4(double* nocapture %in) nounwind {
+entry:
+; CHECK: test4
+; CHECK: str [[REG_1:d[0-9]+]], [sp, #8]
+; CHECK: str [[REG_2:w[0-9]+]], [sp]
+; CHECK: orr w0, wzr, #0x3
+  %0 = load double* %in, align 8
+  %call = tail call double @args_f64(double 3.000000e+00, double %0, double %0,
+          double %0, double %0, double %0, double %0, double %0,
+          float 3.000000e+00, double %0, i8 signext 3)
+  ret double %call
+}
+declare double @args_f64(double, double, double, double, double, double, double,
+               double, float, double, i8 signext)
+
+define i64 @test5(i64* nocapture %in) nounwind {
+entry:
+; CHECK: test5
+; CHECK: strb [[REG_3:w[0-9]+]], [sp, #16]
+; CHECK: str [[REG_1:x[0-9]+]], [sp, #8]
+; CHECK: str [[REG_2:w[0-9]+]], [sp]
+  %0 = load i64* %in, align 8
+  %call = tail call i64 @args_i64(i64 3, i64 %0, i64 %0, i64 %0, i64 %0, i64 %0,
+                         i64 %0, i64 %0, i32 3, i64 %0, i8 signext 3)
+  ret i64 %call
+}
+declare i64 @args_i64(i64, i64, i64, i64, i64, i64, i64, i64, i32, i64,
+             i8 signext)
+
+define i32 @test6(float* nocapture %in) nounwind {
+entry:
+; CHECK: test6
+; CHECK: strb [[REG_2:w[0-9]+]], [sp, #8]
+; CHECK: str [[REG_1:s[0-9]+]], [sp, #4]
+; CHECK: strh [[REG_3:w[0-9]+]], [sp]
+  %0 = load float* %in, align 4
+  %call = tail call i32 @args_f32(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
+          i32 7, i32 8, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0,
+          float 6.0, float 7.0, float 8.0, i16 signext 3, float %0,
+          i8 signext 3)
+  ret i32 %call
+}
+declare i32 @args_f32(i32, i32, i32, i32, i32, i32, i32, i32,
+                      float, float, float, float, float, float, float, float,
+                      i16 signext, float, i8 signext)
+
+define i32 @test7(i32* nocapture %in) nounwind {
+entry:
+; CHECK: test7
+; CHECK: strb [[REG_2:w[0-9]+]], [sp, #8]
+; CHECK: str [[REG_1:w[0-9]+]], [sp, #4]
+; CHECK: strh [[REG_3:w[0-9]+]], [sp]
+  %0 = load i32* %in, align 4
+  %call = tail call i32 @args_i32(i32 3, i32 %0, i32 %0, i32 %0, i32 %0, i32 %0,
+                         i32 %0, i32 %0, i16 signext 3, i32 %0, i8 signext 4)
+  ret i32 %call
+}
+declare i32 @args_i32(i32, i32, i32, i32, i32, i32, i32, i32, i16 signext, i32,
+             i8 signext)
+
+define i32 @test8(i32 %argc, i8** nocapture %argv) nounwind {
+entry:
+; CHECK: test8
+; CHECK: strb {{w[0-9]+}}, [sp, #3]
+; CHECK: strb wzr, [sp, #2]
+; CHECK: strb {{w[0-9]+}}, [sp, #1]
+; CHECK: strb wzr, [sp]
+; CHECK: bl
+; FAST: test8
+; FAST: strb {{w[0-9]+}}, [sp]
+; FAST: strb {{w[0-9]+}}, [sp, #1]
+; FAST: strb {{w[0-9]+}}, [sp, #2]
+; FAST: strb {{w[0-9]+}}, [sp, #3]
+; FAST: bl
+  tail call void @args_i1(i1 zeroext false, i1 zeroext true, i1 zeroext false,
+                  i1 zeroext true, i1 zeroext false, i1 zeroext true,
+                  i1 zeroext false, i1 zeroext true, i1 zeroext false,
+                  i1 zeroext true, i1 zeroext false, i1 zeroext true)
+  ret i32 0
+}
+
+declare void @args_i1(i1 zeroext, i1 zeroext, i1 zeroext, i1 zeroext,
+                      i1 zeroext, i1 zeroext, i1 zeroext, i1 zeroext,
+                      i1 zeroext, i1 zeroext, i1 zeroext, i1 zeroext)
+
+define i32 @i1_stack_incoming(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f,
+                               i64 %g, i64 %h, i64 %i, i1 zeroext %j) {
+; CHECK-LABEL: i1_stack_incoming:
+; CHECK: ldrb w0, [sp, #8]
+; CHECK: ret
+  %v = zext i1 %j to i32
+  ret i32 %v
+}
diff --git a/test/CodeGen/AArch64/arm64-abi_align.ll b/test/CodeGen/AArch64/arm64-abi_align.ll
new file mode 100644
index 0000000..44c5a07
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-abi_align.ll
@@ -0,0 +1,532 @@
+; RUN: llc < %s -march=arm64 -mcpu=cyclone -enable-misched=false | FileCheck %s
+; RUN: llc < %s -O0 | FileCheck -check-prefix=FAST %s
+target triple = "arm64-apple-darwin"
+
+; rdar://12648441
+; Generated from arm64-arguments.c with -O2.
+; Test passing structs with size < 8, < 16 and > 16
+; with alignment of 16 and without
+
+; Structs with size < 8
+%struct.s38 = type { i32, i16 }
+; With alignment of 16, the size will be padded to multiple of 16 bytes.
+%struct.s39 = type { i32, i16, [10 x i8] }
+; Structs with size < 16
+%struct.s40 = type { i32, i16, i32, i16 }
+%struct.s41 = type { i32, i16, i32, i16 }
+; Structs with size > 16
+%struct.s42 = type { i32, i16, i32, i16, i32, i16 }
+%struct.s43 = type { i32, i16, i32, i16, i32, i16, [10 x i8] }
+
+@g38 = common global %struct.s38 zeroinitializer, align 4
+@g38_2 = common global %struct.s38 zeroinitializer, align 4
+@g39 = common global %struct.s39 zeroinitializer, align 16
+@g39_2 = common global %struct.s39 zeroinitializer, align 16
+@g40 = common global %struct.s40 zeroinitializer, align 4
+@g40_2 = common global %struct.s40 zeroinitializer, align 4
+@g41 = common global %struct.s41 zeroinitializer, align 16
+@g41_2 = common global %struct.s41 zeroinitializer, align 16
+@g42 = common global %struct.s42 zeroinitializer, align 4
+@g42_2 = common global %struct.s42 zeroinitializer, align 4
+@g43 = common global %struct.s43 zeroinitializer, align 16
+@g43_2 = common global %struct.s43 zeroinitializer, align 16
+
+; structs with size < 8 bytes, passed via i64 in x1 and x2
+define i32 @f38(i32 %i, i64 %s1.coerce, i64 %s2.coerce) #0 {
+entry:
+; CHECK: f38
+; CHECK: add w[[A:[0-9]+]], w1, w0
+; CHECK: add {{w[0-9]+}}, w[[A]], w2
+  %s1.sroa.0.0.extract.trunc = trunc i64 %s1.coerce to i32
+  %s1.sroa.1.4.extract.shift = lshr i64 %s1.coerce, 32
+  %s2.sroa.0.0.extract.trunc = trunc i64 %s2.coerce to i32
+  %s2.sroa.1.4.extract.shift = lshr i64 %s2.coerce, 32
+  %sext8 = shl nuw nsw i64 %s1.sroa.1.4.extract.shift, 16
+  %sext = trunc i64 %sext8 to i32
+  %conv = ashr exact i32 %sext, 16
+  %sext1011 = shl nuw nsw i64 %s2.sroa.1.4.extract.shift, 16
+  %sext10 = trunc i64 %sext1011 to i32
+  %conv6 = ashr exact i32 %sext10, 16
+  %add = add i32 %s1.sroa.0.0.extract.trunc, %i
+  %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc
+  %add4 = add i32 %add3, %conv
+  %add7 = add i32 %add4, %conv6
+  ret i32 %add7
+}
+
+define i32 @caller38() #1 {
+entry:
+; CHECK: caller38
+; CHECK: ldr x1,
+; CHECK: ldr x2,
+  %0 = load i64* bitcast (%struct.s38* @g38 to i64*), align 4
+  %1 = load i64* bitcast (%struct.s38* @g38_2 to i64*), align 4
+  %call = tail call i32 @f38(i32 3, i64 %0, i64 %1) #5
+  ret i32 %call
+}
+
+declare i32 @f38_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
+                i32 %i7, i32 %i8, i32 %i9, i64 %s1.coerce, i64 %s2.coerce) #0
+
+; structs with size < 8 bytes, passed on stack at [sp+8] and [sp+16]
+; i9 at [sp]
+define i32 @caller38_stack() #1 {
+entry:
+; CHECK: caller38_stack
+; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8]
+; CHECK: movz w[[C:[0-9]+]], #0x9
+; CHECK: str w[[C]], [sp]
+  %0 = load i64* bitcast (%struct.s38* @g38 to i64*), align 4
+  %1 = load i64* bitcast (%struct.s38* @g38_2 to i64*), align 4
+  %call = tail call i32 @f38_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
+                                   i32 7, i32 8, i32 9, i64 %0, i64 %1) #5
+  ret i32 %call
+}
+
+; structs with size < 8 bytes, alignment of 16
+; passed via i128 in x1 and x3
+define i32 @f39(i32 %i, i128 %s1.coerce, i128 %s2.coerce) #0 {
+entry:
+; CHECK: f39
+; CHECK: add w[[A:[0-9]+]], w1, w0
+; CHECK: add {{w[0-9]+}}, w[[A]], w3
+  %s1.sroa.0.0.extract.trunc = trunc i128 %s1.coerce to i32
+  %s1.sroa.1.4.extract.shift = lshr i128 %s1.coerce, 32
+  %s2.sroa.0.0.extract.trunc = trunc i128 %s2.coerce to i32
+  %s2.sroa.1.4.extract.shift = lshr i128 %s2.coerce, 32
+  %sext8 = shl nuw nsw i128 %s1.sroa.1.4.extract.shift, 16
+  %sext = trunc i128 %sext8 to i32
+  %conv = ashr exact i32 %sext, 16
+  %sext1011 = shl nuw nsw i128 %s2.sroa.1.4.extract.shift, 16
+  %sext10 = trunc i128 %sext1011 to i32
+  %conv6 = ashr exact i32 %sext10, 16
+  %add = add i32 %s1.sroa.0.0.extract.trunc, %i
+  %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc
+  %add4 = add i32 %add3, %conv
+  %add7 = add i32 %add4, %conv6
+  ret i32 %add7
+}
+
+define i32 @caller39() #1 {
+entry:
+; CHECK: caller39
+; CHECK: ldp x1, x2,
+; CHECK: ldp x3, x4,
+  %0 = load i128* bitcast (%struct.s39* @g39 to i128*), align 16
+  %1 = load i128* bitcast (%struct.s39* @g39_2 to i128*), align 16
+  %call = tail call i32 @f39(i32 3, i128 %0, i128 %1) #5
+  ret i32 %call
+}
+
+declare i32 @f39_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
+                i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce) #0
+
+; structs with size < 8 bytes, alignment 16
+; passed on stack at [sp+16] and [sp+32]
+define i32 @caller39_stack() #1 {
+entry:
+; CHECK: caller39_stack
+; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #32]
+; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
+; CHECK: movz w[[C:[0-9]+]], #0x9
+; CHECK: str w[[C]], [sp]
+  %0 = load i128* bitcast (%struct.s39* @g39 to i128*), align 16
+  %1 = load i128* bitcast (%struct.s39* @g39_2 to i128*), align 16
+  %call = tail call i32 @f39_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
+                                   i32 7, i32 8, i32 9, i128 %0, i128 %1) #5
+  ret i32 %call
+}
+
+; structs with size < 16 bytes
+; passed via i128 in x1 and x3
+define i32 @f40(i32 %i, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) #0 {
+entry:
+; CHECK: f40
+; CHECK: add w[[A:[0-9]+]], w1, w0
+; CHECK: add {{w[0-9]+}}, w[[A]], w3
+  %s1.coerce.fca.0.extract = extractvalue [2 x i64] %s1.coerce, 0
+  %s2.coerce.fca.0.extract = extractvalue [2 x i64] %s2.coerce, 0
+  %s1.sroa.0.0.extract.trunc = trunc i64 %s1.coerce.fca.0.extract to i32
+  %s2.sroa.0.0.extract.trunc = trunc i64 %s2.coerce.fca.0.extract to i32
+  %s1.sroa.0.4.extract.shift = lshr i64 %s1.coerce.fca.0.extract, 32
+  %sext8 = shl nuw nsw i64 %s1.sroa.0.4.extract.shift, 16
+  %sext = trunc i64 %sext8 to i32
+  %conv = ashr exact i32 %sext, 16
+  %s2.sroa.0.4.extract.shift = lshr i64 %s2.coerce.fca.0.extract, 32
+  %sext1011 = shl nuw nsw i64 %s2.sroa.0.4.extract.shift, 16
+  %sext10 = trunc i64 %sext1011 to i32
+  %conv6 = ashr exact i32 %sext10, 16
+  %add = add i32 %s1.sroa.0.0.extract.trunc, %i
+  %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc
+  %add4 = add i32 %add3, %conv
+  %add7 = add i32 %add4, %conv6
+  ret i32 %add7
+}
+
+define i32 @caller40() #1 {
+entry:
+; CHECK: caller40
+; CHECK: ldp x1, x2,
+; CHECK: ldp x3, x4,
+  %0 = load [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4
+  %1 = load [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4
+  %call = tail call i32 @f40(i32 3, [2 x i64] %0, [2 x i64] %1) #5
+  ret i32 %call
+}
+
+declare i32 @f40_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
+                i32 %i7, i32 %i8, i32 %i9, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) #0
+
+; structs with size < 16 bytes
+; passed on stack at [sp+8] and [sp+24]
+define i32 @caller40_stack() #1 {
+entry:
+; CHECK: caller40_stack
+; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #24]
+; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8]
+; CHECK: movz w[[C:[0-9]+]], #0x9
+; CHECK: str w[[C]], [sp]
+  %0 = load [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4
+  %1 = load [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4
+  %call = tail call i32 @f40_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
+                         i32 7, i32 8, i32 9, [2 x i64] %0, [2 x i64] %1) #5
+  ret i32 %call
+}
+
+; structs with size < 16 bytes, alignment of 16
+; passed via i128 in x1 and x3
+define i32 @f41(i32 %i, i128 %s1.coerce, i128 %s2.coerce) #0 {
+entry:
+; CHECK: f41
+; CHECK: add w[[A:[0-9]+]], w1, w0
+; CHECK: add {{w[0-9]+}}, w[[A]], w3
+  %s1.sroa.0.0.extract.trunc = trunc i128 %s1.coerce to i32
+  %s1.sroa.1.4.extract.shift = lshr i128 %s1.coerce, 32
+  %s2.sroa.0.0.extract.trunc = trunc i128 %s2.coerce to i32
+  %s2.sroa.1.4.extract.shift = lshr i128 %s2.coerce, 32
+  %sext8 = shl nuw nsw i128 %s1.sroa.1.4.extract.shift, 16
+  %sext = trunc i128 %sext8 to i32
+  %conv = ashr exact i32 %sext, 16
+  %sext1011 = shl nuw nsw i128 %s2.sroa.1.4.extract.shift, 16
+  %sext10 = trunc i128 %sext1011 to i32
+  %conv6 = ashr exact i32 %sext10, 16
+  %add = add i32 %s1.sroa.0.0.extract.trunc, %i
+  %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc
+  %add4 = add i32 %add3, %conv
+  %add7 = add i32 %add4, %conv6
+  ret i32 %add7
+}
+
+define i32 @caller41() #1 {
+entry:
+; CHECK: caller41
+; CHECK: ldp x1, x2,
+; CHECK: ldp x3, x4,
+  %0 = load i128* bitcast (%struct.s41* @g41 to i128*), align 16
+  %1 = load i128* bitcast (%struct.s41* @g41_2 to i128*), align 16
+  %call = tail call i32 @f41(i32 3, i128 %0, i128 %1) #5
+  ret i32 %call
+}
+
+declare i32 @f41_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
+                i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce) #0
+
+; structs with size < 16 bytes, alignment of 16
+; passed on stack at [sp+16] and [sp+32]
+define i32 @caller41_stack() #1 {
+entry:
+; CHECK: caller41_stack
+; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #32]
+; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
+; CHECK: movz w[[C:[0-9]+]], #0x9
+; CHECK: str w[[C]], [sp]
+  %0 = load i128* bitcast (%struct.s41* @g41 to i128*), align 16
+  %1 = load i128* bitcast (%struct.s41* @g41_2 to i128*), align 16
+  %call = tail call i32 @f41_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
+                            i32 7, i32 8, i32 9, i128 %0, i128 %1) #5
+  ret i32 %call
+}
+
+; structs with size of 22 bytes, passed indirectly in x1 and x2
+define i32 @f42(i32 %i, %struct.s42* nocapture %s1, %struct.s42* nocapture %s2) #2 {
+entry:
+; CHECK: f42
+; CHECK: ldr w[[A:[0-9]+]], [x1]
+; CHECK: ldr w[[B:[0-9]+]], [x2]
+; CHECK: add w[[C:[0-9]+]], w[[A]], w0
+; CHECK: add {{w[0-9]+}}, w[[C]], w[[B]]
+; FAST: f42
+; FAST: ldr w[[A:[0-9]+]], [x1]
+; FAST: ldr w[[B:[0-9]+]], [x2]
+; FAST: add w[[C:[0-9]+]], w[[A]], w0
+; FAST: add {{w[0-9]+}}, w[[C]], w[[B]]
+  %i1 = getelementptr inbounds %struct.s42* %s1, i64 0, i32 0
+  %0 = load i32* %i1, align 4, !tbaa !0
+  %i2 = getelementptr inbounds %struct.s42* %s2, i64 0, i32 0
+  %1 = load i32* %i2, align 4, !tbaa !0
+  %s = getelementptr inbounds %struct.s42* %s1, i64 0, i32 1
+  %2 = load i16* %s, align 2, !tbaa !3
+  %conv = sext i16 %2 to i32
+  %s5 = getelementptr inbounds %struct.s42* %s2, i64 0, i32 1
+  %3 = load i16* %s5, align 2, !tbaa !3
+  %conv6 = sext i16 %3 to i32
+  %add = add i32 %0, %i
+  %add3 = add i32 %add, %1
+  %add4 = add i32 %add3, %conv
+  %add7 = add i32 %add4, %conv6
+  ret i32 %add7
+}
+
+; For s1, we allocate a 22-byte space, pass its address via x1
+define i32 @caller42() #3 {
+entry:
+; CHECK: caller42
+; CHECK: str {{x[0-9]+}}, [sp, #48]
+; CHECK: str {{q[0-9]+}}, [sp, #32]
+; CHECK: str {{x[0-9]+}}, [sp, #16]
+; CHECK: str {{q[0-9]+}}, [sp]
+; CHECK: add x1, sp, #32
+; CHECK: mov x2, sp
+; Space for s1 is allocated at sp+32
+; Space for s2 is allocated at sp
+
+; FAST: caller42
+; FAST: sub sp, sp, #96
+; Space for s1 is allocated at fp-24 = sp+72
+; Space for s2 is allocated at sp+48
+; FAST: sub x[[A:[0-9]+]], x29, #24
+; FAST: add x[[A:[0-9]+]], sp, #48
+; Call memcpy with size = 24 (0x18)
+; FAST: orr {{x[0-9]+}}, xzr, #0x18
+  %tmp = alloca %struct.s42, align 4
+  %tmp1 = alloca %struct.s42, align 4
+  %0 = bitcast %struct.s42* %tmp to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s42* @g42 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4
+  %1 = bitcast %struct.s42* %tmp1 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s42* @g42_2 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4
+  %call = call i32 @f42(i32 3, %struct.s42* %tmp, %struct.s42* %tmp1) #5
+  ret i32 %call
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) #4
+
+declare i32 @f42_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
+                       i32 %i7, i32 %i8, i32 %i9, %struct.s42* nocapture %s1,
+                       %struct.s42* nocapture %s2) #2
+
+define i32 @caller42_stack() #3 {
+entry:
+; CHECK: caller42_stack
+; CHECK: mov x29, sp
+; CHECK: sub sp, sp, #96
+; CHECK: stur {{x[0-9]+}}, [x29, #-16]
+; CHECK: stur {{q[0-9]+}}, [x29, #-32]
+; CHECK: str {{x[0-9]+}}, [sp, #48]
+; CHECK: str {{q[0-9]+}}, [sp, #32]
+; Space for s1 is allocated at x29-32 = sp+64
+; Space for s2 is allocated at sp+32
+; CHECK: add x[[B:[0-9]+]], sp, #32
+; CHECK: str x[[B]], [sp, #16]
+; CHECK: sub x[[A:[0-9]+]], x29, #32
+; Address of s1 is passed on stack at sp+8
+; CHECK: str x[[A]], [sp, #8]
+; CHECK: movz w[[C:[0-9]+]], #0x9
+; CHECK: str w[[C]], [sp]
+
+; FAST: caller42_stack
+; Space for s1 is allocated at fp-24
+; Space for s2 is allocated at fp-48
+; FAST: sub x[[A:[0-9]+]], x29, #24
+; FAST: sub x[[B:[0-9]+]], x29, #48
+; Call memcpy with size = 24 (0x18)
+; FAST: orr {{x[0-9]+}}, xzr, #0x18
+; FAST: str {{w[0-9]+}}, [sp]
+; Address of s1 is passed on stack at sp+8
+; FAST: str {{x[0-9]+}}, [sp, #8]
+; FAST: str {{x[0-9]+}}, [sp, #16]
+  %tmp = alloca %struct.s42, align 4
+  %tmp1 = alloca %struct.s42, align 4
+  %0 = bitcast %struct.s42* %tmp to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s42* @g42 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4
+  %1 = bitcast %struct.s42* %tmp1 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s42* @g42_2 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4
+  %call = call i32 @f42_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
+                       i32 8, i32 9, %struct.s42* %tmp, %struct.s42* %tmp1) #5
+  ret i32 %call
+}
+
+; structs with size of 22 bytes, alignment of 16
+; passed indirectly in x1 and x2
+define i32 @f43(i32 %i, %struct.s43* nocapture %s1, %struct.s43* nocapture %s2) #2 {
+entry:
+; CHECK: f43
+; CHECK: ldr w[[A:[0-9]+]], [x1]
+; CHECK: ldr w[[B:[0-9]+]], [x2]
+; CHECK: add w[[C:[0-9]+]], w[[A]], w0
+; CHECK: add {{w[0-9]+}}, w[[C]], w[[B]]
+; FAST: f43
+; FAST: ldr w[[A:[0-9]+]], [x1]
+; FAST: ldr w[[B:[0-9]+]], [x2]
+; FAST: add w[[C:[0-9]+]], w[[A]], w0
+; FAST: add {{w[0-9]+}}, w[[C]], w[[B]]
+  %i1 = getelementptr inbounds %struct.s43* %s1, i64 0, i32 0
+  %0 = load i32* %i1, align 4, !tbaa !0
+  %i2 = getelementptr inbounds %struct.s43* %s2, i64 0, i32 0
+  %1 = load i32* %i2, align 4, !tbaa !0
+  %s = getelementptr inbounds %struct.s43* %s1, i64 0, i32 1
+  %2 = load i16* %s, align 2, !tbaa !3
+  %conv = sext i16 %2 to i32
+  %s5 = getelementptr inbounds %struct.s43* %s2, i64 0, i32 1
+  %3 = load i16* %s5, align 2, !tbaa !3
+  %conv6 = sext i16 %3 to i32
+  %add = add i32 %0, %i
+  %add3 = add i32 %add, %1
+  %add4 = add i32 %add3, %conv
+  %add7 = add i32 %add4, %conv6
+  ret i32 %add7
+}
+
+define i32 @caller43() #3 {
+entry:
+; CHECK: caller43
+; CHECK: str {{q[0-9]+}}, [sp, #48]
+; CHECK: str {{q[0-9]+}}, [sp, #32]
+; CHECK: str {{q[0-9]+}}, [sp, #16]
+; CHECK: str {{q[0-9]+}}, [sp]
+; CHECK: add x1, sp, #32
+; CHECK: mov x2, sp
+; Space for s1 is allocated at sp+32
+; Space for s2 is allocated at sp
+
+; FAST: caller43
+; FAST: mov x29, sp
+; Space for s1 is allocated at sp+32
+; Space for s2 is allocated at sp
+; FAST: add x1, sp, #32
+; FAST: mov x2, sp
+; FAST: str {{x[0-9]+}}, [sp, #32]
+; FAST: str {{x[0-9]+}}, [sp, #40]
+; FAST: str {{x[0-9]+}}, [sp, #48]
+; FAST: str {{x[0-9]+}}, [sp, #56]
+; FAST: str {{x[0-9]+}}, [sp]
+; FAST: str {{x[0-9]+}}, [sp, #8]
+; FAST: str {{x[0-9]+}}, [sp, #16]
+; FAST: str {{x[0-9]+}}, [sp, #24]
+  %tmp = alloca %struct.s43, align 16
+  %tmp1 = alloca %struct.s43, align 16
+  %0 = bitcast %struct.s43* %tmp to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s43* @g43 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4
+  %1 = bitcast %struct.s43* %tmp1 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s43* @g43_2 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4
+  %call = call i32 @f43(i32 3, %struct.s43* %tmp, %struct.s43* %tmp1) #5
+  ret i32 %call
+}
+
+declare i32 @f43_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
+                       i32 %i7, i32 %i8, i32 %i9, %struct.s43* nocapture %s1,
+                       %struct.s43* nocapture %s2) #2
+
+define i32 @caller43_stack() #3 {
+entry:
+; CHECK: caller43_stack
+; CHECK: mov x29, sp
+; CHECK: sub sp, sp, #96
+; CHECK: stur {{q[0-9]+}}, [x29, #-16]
+; CHECK: stur {{q[0-9]+}}, [x29, #-32]
+; CHECK: str {{q[0-9]+}}, [sp, #48]
+; CHECK: str {{q[0-9]+}}, [sp, #32]
+; Space for s1 is allocated at x29-32 = sp+64
+; Space for s2 is allocated at sp+32
+; CHECK: add x[[B:[0-9]+]], sp, #32
+; CHECK: str x[[B]], [sp, #16]
+; CHECK: sub x[[A:[0-9]+]], x29, #32
+; Address of s1 is passed on stack at sp+8
+; CHECK: str x[[A]], [sp, #8]
+; CHECK: movz w[[C:[0-9]+]], #0x9
+; CHECK: str w[[C]], [sp]
+
+; FAST: caller43_stack
+; FAST: sub sp, sp, #96
+; Space for s1 is allocated at fp-32 = sp+64
+; Space for s2 is allocated at sp+32
+; FAST: sub x[[A:[0-9]+]], x29, #32
+; FAST: add x[[B:[0-9]+]], sp, #32
+; FAST: stur {{x[0-9]+}}, [x29, #-32]
+; FAST: stur {{x[0-9]+}}, [x29, #-24]
+; FAST: stur {{x[0-9]+}}, [x29, #-16]
+; FAST: stur {{x[0-9]+}}, [x29, #-8]
+; FAST: str {{x[0-9]+}}, [sp, #32]
+; FAST: str {{x[0-9]+}}, [sp, #40]
+; FAST: str {{x[0-9]+}}, [sp, #48]
+; FAST: str {{x[0-9]+}}, [sp, #56]
+; FAST: str {{w[0-9]+}}, [sp]
+; Address of s1 is passed on stack at sp+8
+; FAST: str {{x[0-9]+}}, [sp, #8]
+; FAST: str {{x[0-9]+}}, [sp, #16]
+  %tmp = alloca %struct.s43, align 16
+  %tmp1 = alloca %struct.s43, align 16
+  %0 = bitcast %struct.s43* %tmp to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s43* @g43 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4
+  %1 = bitcast %struct.s43* %tmp1 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s43* @g43_2 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4
+  %call = call i32 @f43_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
+                       i32 8, i32 9, %struct.s43* %tmp, %struct.s43* %tmp1) #5
+  ret i32 %call
+}
+
+; rdar://13668927
+; Check that we don't split an i128.
+declare i32 @callee_i128_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5,
+                               i32 %i6, i32 %i7, i128 %s1, i32 %i8)
+
+define i32 @i128_split() {
+entry:
+; CHECK: i128_split
+; "i128 %0" should be on stack at [sp].
+; "i32 8" should be on stack at [sp, #16].
+; CHECK: str {{w[0-9]+}}, [sp, #16]
+; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp]
+; FAST: i128_split
+; FAST: sub sp, sp, #48
+; FAST: mov x[[ADDR:[0-9]+]], sp
+; FAST: str {{w[0-9]+}}, [x[[ADDR]], #16]
+; Load/Store opt is disabled with -O0, so the i128 is split.
+; FAST: str {{x[0-9]+}}, [x[[ADDR]], #8]
+; FAST: str {{x[0-9]+}}, [x[[ADDR]]]
+  %0 = load i128* bitcast (%struct.s41* @g41 to i128*), align 16
+  %call = tail call i32 @callee_i128_split(i32 1, i32 2, i32 3, i32 4, i32 5,
+                                           i32 6, i32 7, i128 %0, i32 8) #5
+  ret i32 %call
+}
+
+declare i32 @callee_i64(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5,
+                               i32 %i6, i32 %i7, i64 %s1, i32 %i8)
+
+define i32 @i64_split() {
+entry:
+; CHECK: i64_split
+; "i64 %0" should be in register x7.
+; "i32 8" should be on stack at [sp].
+; CHECK: ldr x7, [{{x[0-9]+}}]
+; CHECK: str {{w[0-9]+}}, [sp]
+; FAST: i64_split
+; FAST: ldr x7, [{{x[0-9]+}}]
+; FAST: str {{w[0-9]+}}, [sp]
+  %0 = load i64* bitcast (%struct.s41* @g41 to i64*), align 16
+  %call = tail call i32 @callee_i64(i32 1, i32 2, i32 3, i32 4, i32 5,
+                                    i32 6, i32 7, i64 %0, i32 8) #5
+  ret i32 %call
+}
+
+attributes #0 = { noinline nounwind readnone "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" }
+attributes #1 = { nounwind readonly "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" }
+attributes #2 = { noinline nounwind readonly "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" }
+attributes #3 = { nounwind "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" }
+attributes #4 = { nounwind }
+attributes #5 = { nobuiltin }
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"short", metadata !1}
+!4 = metadata !{i64 0, i64 4, metadata !0, i64 4, i64 2, metadata !3, i64 8, i64 4, metadata !0, i64 12, i64 2, metadata !3, i64 16, i64 4, metadata !0, i64 20, i64 2, metadata !3}
diff --git a/test/CodeGen/AArch64/arm64-addp.ll b/test/CodeGen/AArch64/arm64-addp.ll
new file mode 100644
index 0000000..3f1e5c5
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-addp.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -mcpu=cyclone | FileCheck %s
+
+define double @foo(<2 x double> %a) nounwind {
+; CHECK-LABEL: foo:
+; CHECK: faddp.2d d0, v0
+; CHECK-NEXT: ret
+  %lane0.i = extractelement <2 x double> %a, i32 0
+  %lane1.i = extractelement <2 x double> %a, i32 1
+  %vpaddd.i = fadd double %lane0.i, %lane1.i
+  ret double %vpaddd.i
+}
+
+define i64 @foo0(<2 x i64> %a) nounwind {
+; CHECK-LABEL: foo0:
+; CHECK: addp.2d d0, v0
+; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ret
+  %lane0.i = extractelement <2 x i64> %a, i32 0
+  %lane1.i = extractelement <2 x i64> %a, i32 1
+  %vpaddd.i = add i64 %lane0.i, %lane1.i
+  ret i64 %vpaddd.i
+}
+
+define float @foo1(<2 x float> %a) nounwind {
+; CHECK-LABEL: foo1:
+; CHECK: faddp.2s
+; CHECK-NEXT: ret
+  %lane0.i = extractelement <2 x float> %a, i32 0
+  %lane1.i = extractelement <2 x float> %a, i32 1
+  %vpaddd.i = fadd float %lane0.i, %lane1.i
+  ret float %vpaddd.i
+}
diff --git a/test/CodeGen/AArch64/arm64-addr-mode-folding.ll b/test/CodeGen/AArch64/arm64-addr-mode-folding.ll
new file mode 100644
index 0000000..08fb8c9
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-addr-mode-folding.ll
@@ -0,0 +1,171 @@
+; RUN: llc -O3 -mtriple arm64-apple-ios3 %s -o - | FileCheck %s
+; <rdar://problem/13621857>
+
+@block = common global i8* null, align 8
+
+define i32 @fct(i32 %i1, i32 %i2) {
+; CHECK: @fct
+; Sign extension is used more than once, thus it should not be folded.
+; CodeGenPrepare is not sharing sext across uses, thus this is folded because
+; of that.
+; _CHECK-NOT_: , sxtw]
+entry:
+  %idxprom = sext i32 %i1 to i64
+  %0 = load i8** @block, align 8
+  %arrayidx = getelementptr inbounds i8* %0, i64 %idxprom
+  %1 = load i8* %arrayidx, align 1
+  %idxprom1 = sext i32 %i2 to i64
+  %arrayidx2 = getelementptr inbounds i8* %0, i64 %idxprom1
+  %2 = load i8* %arrayidx2, align 1
+  %cmp = icmp eq i8 %1, %2
+  br i1 %cmp, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  %cmp7 = icmp ugt i8 %1, %2
+  %conv8 = zext i1 %cmp7 to i32
+  br label %return
+
+if.end:                                           ; preds = %entry
+  %inc = add nsw i32 %i1, 1
+  %inc9 = add nsw i32 %i2, 1
+  %idxprom10 = sext i32 %inc to i64
+  %arrayidx11 = getelementptr inbounds i8* %0, i64 %idxprom10
+  %3 = load i8* %arrayidx11, align 1
+  %idxprom12 = sext i32 %inc9 to i64
+  %arrayidx13 = getelementptr inbounds i8* %0, i64 %idxprom12
+  %4 = load i8* %arrayidx13, align 1
+  %cmp16 = icmp eq i8 %3, %4
+  br i1 %cmp16, label %if.end23, label %if.then18
+
+if.then18:                                        ; preds = %if.end
+  %cmp21 = icmp ugt i8 %3, %4
+  %conv22 = zext i1 %cmp21 to i32
+  br label %return
+
+if.end23:                                         ; preds = %if.end
+  %inc24 = add nsw i32 %i1, 2
+  %inc25 = add nsw i32 %i2, 2
+  %idxprom26 = sext i32 %inc24 to i64
+  %arrayidx27 = getelementptr inbounds i8* %0, i64 %idxprom26
+  %5 = load i8* %arrayidx27, align 1
+  %idxprom28 = sext i32 %inc25 to i64
+  %arrayidx29 = getelementptr inbounds i8* %0, i64 %idxprom28
+  %6 = load i8* %arrayidx29, align 1
+  %cmp32 = icmp eq i8 %5, %6
+  br i1 %cmp32, label %return, label %if.then34
+
+if.then34:                                        ; preds = %if.end23
+  %cmp37 = icmp ugt i8 %5, %6
+  %conv38 = zext i1 %cmp37 to i32
+  br label %return
+
+return:                                           ; preds = %if.end23, %if.then34, %if.then18, %if.then
+  %retval.0 = phi i32 [ %conv8, %if.then ], [ %conv22, %if.then18 ], [ %conv38, %if.then34 ], [ 1, %if.end23 ]
+  ret i32 %retval.0
+}
+
+define i32 @fct1(i32 %i1, i32 %i2) optsize {
+; CHECK: @fct1
+; Addressing are folded when optimizing for code size.
+; CHECK: , sxtw]
+; CHECK: , sxtw]
+entry:
+  %idxprom = sext i32 %i1 to i64
+  %0 = load i8** @block, align 8
+  %arrayidx = getelementptr inbounds i8* %0, i64 %idxprom
+  %1 = load i8* %arrayidx, align 1
+  %idxprom1 = sext i32 %i2 to i64
+  %arrayidx2 = getelementptr inbounds i8* %0, i64 %idxprom1
+  %2 = load i8* %arrayidx2, align 1
+  %cmp = icmp eq i8 %1, %2
+  br i1 %cmp, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  %cmp7 = icmp ugt i8 %1, %2
+  %conv8 = zext i1 %cmp7 to i32
+  br label %return
+
+if.end:                                           ; preds = %entry
+  %inc = add nsw i32 %i1, 1
+  %inc9 = add nsw i32 %i2, 1
+  %idxprom10 = sext i32 %inc to i64
+  %arrayidx11 = getelementptr inbounds i8* %0, i64 %idxprom10
+  %3 = load i8* %arrayidx11, align 1
+  %idxprom12 = sext i32 %inc9 to i64
+  %arrayidx13 = getelementptr inbounds i8* %0, i64 %idxprom12
+  %4 = load i8* %arrayidx13, align 1
+  %cmp16 = icmp eq i8 %3, %4
+  br i1 %cmp16, label %if.end23, label %if.then18
+
+if.then18:                                        ; preds = %if.end
+  %cmp21 = icmp ugt i8 %3, %4
+  %conv22 = zext i1 %cmp21 to i32
+  br label %return
+
+if.end23:                                         ; preds = %if.end
+  %inc24 = add nsw i32 %i1, 2
+  %inc25 = add nsw i32 %i2, 2
+  %idxprom26 = sext i32 %inc24 to i64
+  %arrayidx27 = getelementptr inbounds i8* %0, i64 %idxprom26
+  %5 = load i8* %arrayidx27, align 1
+  %idxprom28 = sext i32 %inc25 to i64
+  %arrayidx29 = getelementptr inbounds i8* %0, i64 %idxprom28
+  %6 = load i8* %arrayidx29, align 1
+  %cmp32 = icmp eq i8 %5, %6
+  br i1 %cmp32, label %return, label %if.then34
+
+if.then34:                                        ; preds = %if.end23
+  %cmp37 = icmp ugt i8 %5, %6
+  %conv38 = zext i1 %cmp37 to i32
+  br label %return
+
+return:                                           ; preds = %if.end23, %if.then34, %if.then18, %if.then
+  %retval.0 = phi i32 [ %conv8, %if.then ], [ %conv22, %if.then18 ], [ %conv38, %if.then34 ], [ 1, %if.end23 ]
+  ret i32 %retval.0
+}
+
+; CHECK: @test
+; CHECK-NOT: , uxtw #2]
+define i32 @test(i32* %array, i8 zeroext %c, i32 %arg) {
+entry:
+  %conv = zext i8 %c to i32
+  %add = sub i32 0, %arg
+  %tobool = icmp eq i32 %conv, %add
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  %idxprom = zext i8 %c to i64
+  %arrayidx = getelementptr inbounds i32* %array, i64 %idxprom
+  %0 = load volatile i32* %arrayidx, align 4
+  %1 = load volatile i32* %arrayidx, align 4
+  %add3 = add nsw i32 %1, %0
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+  %res.0 = phi i32 [ %add3, %if.then ], [ 0, %entry ]
+  ret i32 %res.0
+}
+
+
+; CHECK: @test2
+; CHECK: , uxtw #2]
+; CHECK: , uxtw #2]
+define i32 @test2(i32* %array, i8 zeroext %c, i32 %arg) optsize {
+entry:
+  %conv = zext i8 %c to i32
+  %add = sub i32 0, %arg
+  %tobool = icmp eq i32 %conv, %add
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  %idxprom = zext i8 %c to i64
+  %arrayidx = getelementptr inbounds i32* %array, i64 %idxprom
+  %0 = load volatile i32* %arrayidx, align 4
+  %1 = load volatile i32* %arrayidx, align 4
+  %add3 = add nsw i32 %1, %0
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+  %res.0 = phi i32 [ %add3, %if.then ], [ 0, %entry ]
+  ret i32 %res.0
+}
diff --git a/test/CodeGen/AArch64/arm64-addr-type-promotion.ll b/test/CodeGen/AArch64/arm64-addr-type-promotion.ll
new file mode 100644
index 0000000..1a3ca8b
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-addr-type-promotion.ll
@@ -0,0 +1,82 @@
+; RUN: llc -march arm64 < %s | FileCheck %s
+; rdar://13452552
+; ModuleID = 'reduced_test.ll'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128"
+target triple = "arm64-apple-ios3.0.0"
+
+@block = common global i8* null, align 8
+
+define zeroext i8 @fullGtU(i32 %i1, i32 %i2) {
+; CHECK: fullGtU
+; CHECK: adrp [[PAGE:x[0-9]+]], _block@GOTPAGE
+; CHECK: ldr [[ADDR:x[0-9]+]], {{\[}}[[PAGE]], _block@GOTPAGEOFF]
+; CHECK-NEXT: ldr [[BLOCKBASE:x[0-9]+]], {{\[}}[[ADDR]]]
+; CHECK-NEXT: ldrb [[BLOCKVAL1:w[0-9]+]], {{\[}}[[BLOCKBASE]],  w0, sxtw]
+; CHECK-NEXT: ldrb [[BLOCKVAL2:w[0-9]+]], {{\[}}[[BLOCKBASE]], w1, sxtw]
+; CHECK-NEXT cmp [[BLOCKVAL1]], [[BLOCKVAL2]]
+; CHECK-NEXT b.ne
+; Next BB
+; CHECK: add [[BLOCKBASE2:x[0-9]+]], [[BLOCKBASE]], w1, sxtw
+; CHECK-NEXT: add [[BLOCKBASE1:x[0-9]+]], [[BLOCKBASE]], w0, sxtw
+; CHECK-NEXT: ldrb [[LOADEDVAL1:w[0-9]+]], {{\[}}[[BLOCKBASE1]], #1]
+; CHECK-NEXT: ldrb [[LOADEDVAL2:w[0-9]+]], {{\[}}[[BLOCKBASE2]], #1]
+; CHECK-NEXT: cmp [[LOADEDVAL1]], [[LOADEDVAL2]]
+; CHECK-NEXT: b.ne
+; Next BB
+; CHECK: ldrb [[LOADEDVAL3:w[0-9]+]], {{\[}}[[BLOCKBASE1]], #2]
+; CHECK-NEXT: ldrb [[LOADEDVAL4:w[0-9]+]], {{\[}}[[BLOCKBASE2]], #2]
+; CHECK-NEXT: cmp [[LOADEDVAL3]], [[LOADEDVAL4]]
+entry:
+  %idxprom = sext i32 %i1 to i64
+  %tmp = load i8** @block, align 8
+  %arrayidx = getelementptr inbounds i8* %tmp, i64 %idxprom
+  %tmp1 = load i8* %arrayidx, align 1
+  %idxprom1 = sext i32 %i2 to i64
+  %arrayidx2 = getelementptr inbounds i8* %tmp, i64 %idxprom1
+  %tmp2 = load i8* %arrayidx2, align 1
+  %cmp = icmp eq i8 %tmp1, %tmp2
+  br i1 %cmp, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  %cmp7 = icmp ugt i8 %tmp1, %tmp2
+  %conv9 = zext i1 %cmp7 to i8
+  br label %return
+
+if.end:                                           ; preds = %entry
+  %inc = add nsw i32 %i1, 1
+  %inc10 = add nsw i32 %i2, 1
+  %idxprom11 = sext i32 %inc to i64
+  %arrayidx12 = getelementptr inbounds i8* %tmp, i64 %idxprom11
+  %tmp3 = load i8* %arrayidx12, align 1
+  %idxprom13 = sext i32 %inc10 to i64
+  %arrayidx14 = getelementptr inbounds i8* %tmp, i64 %idxprom13
+  %tmp4 = load i8* %arrayidx14, align 1
+  %cmp17 = icmp eq i8 %tmp3, %tmp4
+  br i1 %cmp17, label %if.end25, label %if.then19
+
+if.then19:                                        ; preds = %if.end
+  %cmp22 = icmp ugt i8 %tmp3, %tmp4
+  %conv24 = zext i1 %cmp22 to i8
+  br label %return
+
+if.end25:                                         ; preds = %if.end
+  %inc26 = add nsw i32 %i1, 2
+  %inc27 = add nsw i32 %i2, 2
+  %idxprom28 = sext i32 %inc26 to i64
+  %arrayidx29 = getelementptr inbounds i8* %tmp, i64 %idxprom28
+  %tmp5 = load i8* %arrayidx29, align 1
+  %idxprom30 = sext i32 %inc27 to i64
+  %arrayidx31 = getelementptr inbounds i8* %tmp, i64 %idxprom30
+  %tmp6 = load i8* %arrayidx31, align 1
+  %cmp34 = icmp eq i8 %tmp5, %tmp6
+  br i1 %cmp34, label %return, label %if.then36
+
+if.then36:                                        ; preds = %if.end25
+  %cmp39 = icmp ugt i8 %tmp5, %tmp6
+  %conv41 = zext i1 %cmp39 to i8
+  br label %return
+
+return:                                           ; preds = %if.then36, %if.end25, %if.then19, %if.then
+  %retval.0 = phi i8 [ %conv9, %if.then ], [ %conv24, %if.then19 ], [ %conv41, %if.then36 ], [ 0, %if.end25 ]
+  ret i8 %retval.0
+}
diff --git a/test/CodeGen/AArch64/arm64-addrmode.ll b/test/CodeGen/AArch64/arm64-addrmode.ll
new file mode 100644
index 0000000..700fba8
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-addrmode.ll
@@ -0,0 +1,72 @@
+; RUN: llc -march=arm64 < %s | FileCheck %s
+; rdar://10232252
+
+@object = external hidden global i64, section "__DATA, __objc_ivar", align 8
+
+; base + offset (imm9)
+; CHECK: @t1
+; CHECK: ldr xzr, [x{{[0-9]+}}, #8]
+; CHECK: ret
+define void @t1() {
+  %incdec.ptr = getelementptr inbounds i64* @object, i64 1
+  %tmp = load volatile i64* %incdec.ptr, align 8
+  ret void
+}
+
+; base + offset (> imm9)
+; CHECK: @t2
+; CHECK: sub [[ADDREG:x[0-9]+]], x{{[0-9]+}}, #264
+; CHECK: ldr xzr, [
+; CHECK: [[ADDREG]]]
+; CHECK: ret
+define void @t2() {
+  %incdec.ptr = getelementptr inbounds i64* @object, i64 -33
+  %tmp = load volatile i64* %incdec.ptr, align 8
+  ret void
+}
+
+; base + unsigned offset (> imm9 and <= imm12 * size of type in bytes)
+; CHECK: @t3
+; CHECK: ldr xzr, [x{{[0-9]+}}, #32760]
+; CHECK: ret
+define void @t3() {
+  %incdec.ptr = getelementptr inbounds i64* @object, i64 4095
+  %tmp = load volatile i64* %incdec.ptr, align 8
+  ret void
+}
+
+; base + unsigned offset (> imm12 * size of type in bytes)
+; CHECK: @t4
+; CHECK: add [[ADDREG:x[0-9]+]], x{{[0-9]+}}, #8, lsl #12
+; CHECK: ldr xzr, [
+; CHECK: [[ADDREG]]]
+; CHECK: ret
+define void @t4() {
+  %incdec.ptr = getelementptr inbounds i64* @object, i64 4096
+  %tmp = load volatile i64* %incdec.ptr, align 8
+  ret void
+}
+
+; base + reg
+; CHECK: @t5
+; CHECK: ldr xzr, [x{{[0-9]+}}, x{{[0-9]+}}, lsl #3]
+; CHECK: ret
+define void @t5(i64 %a) {
+  %incdec.ptr = getelementptr inbounds i64* @object, i64 %a
+  %tmp = load volatile i64* %incdec.ptr, align 8
+  ret void
+}
+
+; base + reg + imm
+; CHECK: @t6
+; CHECK: add [[ADDREG:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, lsl #3
+; CHECK-NEXT: add [[ADDREG]], [[ADDREG]], #8, lsl #12
+; CHECK: ldr xzr, [
+; CHECK: [[ADDREG]]]
+; CHECK: ret
+define void @t6(i64 %a) {
+  %tmp1 = getelementptr inbounds i64* @object, i64 %a
+  %incdec.ptr = getelementptr inbounds i64* %tmp1, i64 4096
+  %tmp = load volatile i64* %incdec.ptr, align 8
+  ret void
+}
diff --git a/test/CodeGen/ARM64/alloc-no-stack-realign.ll b/test/CodeGen/AArch64/arm64-alloc-no-stack-realign.ll
index f396bc9..f396bc9 100644
--- a/test/CodeGen/ARM64/alloc-no-stack-realign.ll
+++ b/test/CodeGen/AArch64/arm64-alloc-no-stack-realign.ll
diff --git a/test/CodeGen/ARM64/alloca-frame-pointer-offset.ll b/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll
index 3750f31..3750f31 100644
--- a/test/CodeGen/ARM64/alloca-frame-pointer-offset.ll
+++ b/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll
diff --git a/test/CodeGen/ARM64/andCmpBrToTBZ.ll b/test/CodeGen/AArch64/arm64-andCmpBrToTBZ.ll
index 4194977..4194977 100644
--- a/test/CodeGen/ARM64/andCmpBrToTBZ.ll
+++ b/test/CodeGen/AArch64/arm64-andCmpBrToTBZ.ll
diff --git a/test/CodeGen/AArch64/arm64-ands-bad-peephole.ll b/test/CodeGen/AArch64/arm64-ands-bad-peephole.ll
new file mode 100644
index 0000000..34d6287
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-ands-bad-peephole.ll
@@ -0,0 +1,31 @@
+; RUN: llc %s -o - | FileCheck %s
+; Check that ANDS (tst) is not merged with ADD when the immediate
+; is not 0.
+; <rdar://problem/16693089>
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-ios"
+
+; CHECK-LABEL: tst1:
+; CHECK: add [[REG:w[0-9]+]], w{{[0-9]+}}, #1
+; CHECK: tst [[REG]], #0x1
+define void @tst1() {
+entry:
+  br i1 undef, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %result.09 = phi i32 [ %add2.result.0, %for.body ], [ 1, %entry ]
+  %i.08 = phi i32 [ %inc, %for.body ], [ 2, %entry ]
+  %and = and i32 %i.08, 1
+  %cmp1 = icmp eq i32 %and, 0
+  %add2.result.0 = select i1 %cmp1, i32 undef, i32 %result.09
+  %inc = add nsw i32 %i.08, 1
+  %cmp = icmp slt i32 %i.08, undef
+  br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
+
+for.cond.for.end_crit_edge:                       ; preds = %for.body
+  %add2.result.0.lcssa = phi i32 [ %add2.result.0, %for.body ]
+  br label %for.end
+
+for.end:                                          ; preds = %for.cond.for.end_crit_edge, %entry
+  ret void
+}
diff --git a/test/CodeGen/ARM64/anyregcc-crash.ll b/test/CodeGen/AArch64/arm64-anyregcc-crash.ll
index 241cf97..241cf97 100644
--- a/test/CodeGen/ARM64/anyregcc-crash.ll
+++ b/test/CodeGen/AArch64/arm64-anyregcc-crash.ll
diff --git a/test/CodeGen/ARM64/anyregcc.ll b/test/CodeGen/AArch64/arm64-anyregcc.ll
index e26875d..e26875d 100644
--- a/test/CodeGen/ARM64/anyregcc.ll
+++ b/test/CodeGen/AArch64/arm64-anyregcc.ll
diff --git a/test/CodeGen/AArch64/arm64-arith-saturating.ll b/test/CodeGen/AArch64/arm64-arith-saturating.ll
new file mode 100644
index 0000000..78cd1fc
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-arith-saturating.ll
@@ -0,0 +1,153 @@
+; RUN: llc < %s -march=arm64 -mcpu=cyclone | FileCheck %s
+
+define i32 @qadds(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp {
+; CHECK-LABEL: qadds:
+; CHECK: sqadd s0, s0, s1
+  %vecext = extractelement <4 x i32> %b, i32 0
+  %vecext1 = extractelement <4 x i32> %c, i32 0
+  %vqadd.i = tail call i32 @llvm.aarch64.neon.sqadd.i32(i32 %vecext, i32 %vecext1) nounwind
+  ret i32 %vqadd.i
+}
+
+define i64 @qaddd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp {
+; CHECK-LABEL: qaddd:
+; CHECK: sqadd d0, d0, d1
+  %vecext = extractelement <2 x i64> %b, i32 0
+  %vecext1 = extractelement <2 x i64> %c, i32 0
+  %vqadd.i = tail call i64 @llvm.aarch64.neon.sqadd.i64(i64 %vecext, i64 %vecext1) nounwind
+  ret i64 %vqadd.i
+}
+
+define i32 @uqadds(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp {
+; CHECK-LABEL: uqadds:
+; CHECK: uqadd s0, s0, s1
+  %vecext = extractelement <4 x i32> %b, i32 0
+  %vecext1 = extractelement <4 x i32> %c, i32 0
+  %vqadd.i = tail call i32 @llvm.aarch64.neon.uqadd.i32(i32 %vecext, i32 %vecext1) nounwind
+  ret i32 %vqadd.i
+}
+
+define i64 @uqaddd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp {
+; CHECK-LABEL: uqaddd:
+; CHECK: uqadd d0, d0, d1
+  %vecext = extractelement <2 x i64> %b, i32 0
+  %vecext1 = extractelement <2 x i64> %c, i32 0
+  %vqadd.i = tail call i64 @llvm.aarch64.neon.uqadd.i64(i64 %vecext, i64 %vecext1) nounwind
+  ret i64 %vqadd.i
+}
+
+declare i64 @llvm.aarch64.neon.uqadd.i64(i64, i64) nounwind readnone
+declare i32 @llvm.aarch64.neon.uqadd.i32(i32, i32) nounwind readnone
+declare i64 @llvm.aarch64.neon.sqadd.i64(i64, i64) nounwind readnone
+declare i32 @llvm.aarch64.neon.sqadd.i32(i32, i32) nounwind readnone
+
+define i32 @qsubs(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp {
+; CHECK-LABEL: qsubs:
+; CHECK: sqsub s0, s0, s1
+  %vecext = extractelement <4 x i32> %b, i32 0
+  %vecext1 = extractelement <4 x i32> %c, i32 0
+  %vqsub.i = tail call i32 @llvm.aarch64.neon.sqsub.i32(i32 %vecext, i32 %vecext1) nounwind
+  ret i32 %vqsub.i
+}
+
+define i64 @qsubd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp {
+; CHECK-LABEL: qsubd:
+; CHECK: sqsub d0, d0, d1
+  %vecext = extractelement <2 x i64> %b, i32 0
+  %vecext1 = extractelement <2 x i64> %c, i32 0
+  %vqsub.i = tail call i64 @llvm.aarch64.neon.sqsub.i64(i64 %vecext, i64 %vecext1) nounwind
+  ret i64 %vqsub.i
+}
+
+define i32 @uqsubs(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp {
+; CHECK-LABEL: uqsubs:
+; CHECK: uqsub s0, s0, s1
+  %vecext = extractelement <4 x i32> %b, i32 0
+  %vecext1 = extractelement <4 x i32> %c, i32 0
+  %vqsub.i = tail call i32 @llvm.aarch64.neon.uqsub.i32(i32 %vecext, i32 %vecext1) nounwind
+  ret i32 %vqsub.i
+}
+
+define i64 @uqsubd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp {
+; CHECK-LABEL: uqsubd:
+; CHECK: uqsub d0, d0, d1
+  %vecext = extractelement <2 x i64> %b, i32 0
+  %vecext1 = extractelement <2 x i64> %c, i32 0
+  %vqsub.i = tail call i64 @llvm.aarch64.neon.uqsub.i64(i64 %vecext, i64 %vecext1) nounwind
+  ret i64 %vqsub.i
+}
+
+declare i64 @llvm.aarch64.neon.uqsub.i64(i64, i64) nounwind readnone
+declare i32 @llvm.aarch64.neon.uqsub.i32(i32, i32) nounwind readnone
+declare i64 @llvm.aarch64.neon.sqsub.i64(i64, i64) nounwind readnone
+declare i32 @llvm.aarch64.neon.sqsub.i32(i32, i32) nounwind readnone
+
+define i32 @qabss(<4 x i32> %b, <4 x i32> %c) nounwind readnone {
+; CHECK-LABEL: qabss:
+; CHECK: sqabs s0, s0
+; CHECK: ret
+  %vecext = extractelement <4 x i32> %b, i32 0
+  %vqabs.i = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %vecext) nounwind
+  ret i32 %vqabs.i
+}
+
+define i64 @qabsd(<2 x i64> %b, <2 x i64> %c) nounwind readnone {
+; CHECK-LABEL: qabsd:
+; CHECK: sqabs d0, d0
+; CHECK: ret
+  %vecext = extractelement <2 x i64> %b, i32 0
+  %vqabs.i = tail call i64 @llvm.aarch64.neon.sqabs.i64(i64 %vecext) nounwind
+  ret i64 %vqabs.i
+}
+
+define i32 @qnegs(<4 x i32> %b, <4 x i32> %c) nounwind readnone {
+; CHECK-LABEL: qnegs:
+; CHECK: sqneg s0, s0
+; CHECK: ret
+  %vecext = extractelement <4 x i32> %b, i32 0
+  %vqneg.i = tail call i32 @llvm.aarch64.neon.sqneg.i32(i32 %vecext) nounwind
+  ret i32 %vqneg.i
+}
+
+define i64 @qnegd(<2 x i64> %b, <2 x i64> %c) nounwind readnone {
+; CHECK-LABEL: qnegd:
+; CHECK: sqneg d0, d0
+; CHECK: ret
+  %vecext = extractelement <2 x i64> %b, i32 0
+  %vqneg.i = tail call i64 @llvm.aarch64.neon.sqneg.i64(i64 %vecext) nounwind
+  ret i64 %vqneg.i
+}
+
+declare i64 @llvm.aarch64.neon.sqneg.i64(i64) nounwind readnone
+declare i32 @llvm.aarch64.neon.sqneg.i32(i32) nounwind readnone
+declare i64 @llvm.aarch64.neon.sqabs.i64(i64) nounwind readnone
+declare i32 @llvm.aarch64.neon.sqabs.i32(i32) nounwind readnone
+
+
+define i32 @vqmovund(<2 x i64> %b) nounwind readnone {
+; CHECK-LABEL: vqmovund:
+; CHECK: sqxtun s0, d0
+  %vecext = extractelement <2 x i64> %b, i32 0
+  %vqmovun.i = tail call i32 @llvm.aarch64.neon.scalar.sqxtun.i32.i64(i64 %vecext) nounwind
+  ret i32 %vqmovun.i
+}
+
+define i32 @vqmovnd_s(<2 x i64> %b) nounwind readnone {
+; CHECK-LABEL: vqmovnd_s:
+; CHECK: sqxtn s0, d0
+  %vecext = extractelement <2 x i64> %b, i32 0
+  %vqmovn.i = tail call i32 @llvm.aarch64.neon.scalar.sqxtn.i32.i64(i64 %vecext) nounwind
+  ret i32 %vqmovn.i
+}
+
+define i32 @vqmovnd_u(<2 x i64> %b) nounwind readnone {
+; CHECK-LABEL: vqmovnd_u:
+; CHECK: uqxtn s0, d0
+  %vecext = extractelement <2 x i64> %b, i32 0
+  %vqmovn.i = tail call i32 @llvm.aarch64.neon.scalar.uqxtn.i32.i64(i64 %vecext) nounwind
+  ret i32 %vqmovn.i
+}
+
+declare i32 @llvm.aarch64.neon.scalar.uqxtn.i32.i64(i64) nounwind readnone
+declare i32 @llvm.aarch64.neon.scalar.sqxtn.i32.i64(i64) nounwind readnone
+declare i32 @llvm.aarch64.neon.scalar.sqxtun.i32.i64(i64) nounwind readnone
diff --git a/test/CodeGen/AArch64/arm64-arith.ll b/test/CodeGen/AArch64/arm64-arith.ll
new file mode 100644
index 0000000..ed9b569
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-arith.ll
@@ -0,0 +1,262 @@
+; RUN: llc < %s -march=arm64 -asm-verbose=false | FileCheck %s
+
+define i32 @t1(i32 %a, i32 %b) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: t1:
+; CHECK: add w0, w1, w0
+; CHECK: ret
+  %add = add i32 %b, %a
+  ret i32 %add
+}
+
+define i32 @t2(i32 %a, i32 %b) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: t2:
+; CHECK: udiv w0, w0, w1
+; CHECK: ret
+  %udiv = udiv i32 %a, %b
+  ret i32 %udiv
+}
+
+define i64 @t3(i64 %a, i64 %b) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: t3:
+; CHECK: udiv x0, x0, x1
+; CHECK: ret
+  %udiv = udiv i64 %a, %b
+  ret i64 %udiv
+}
+
+define i32 @t4(i32 %a, i32 %b) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: t4:
+; CHECK: sdiv w0, w0, w1
+; CHECK: ret
+  %sdiv = sdiv i32 %a, %b
+  ret i32 %sdiv
+}
+
+define i64 @t5(i64 %a, i64 %b) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: t5:
+; CHECK: sdiv x0, x0, x1
+; CHECK: ret
+  %sdiv = sdiv i64 %a, %b
+  ret i64 %sdiv
+}
+
+define i32 @t6(i32 %a, i32 %b) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: t6:
+; CHECK: lsl w0, w0, w1
+; CHECK: ret
+  %shl = shl i32 %a, %b
+  ret i32 %shl
+}
+
+define i64 @t7(i64 %a, i64 %b) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: t7:
+; CHECK: lsl x0, x0, x1
+; CHECK: ret
+  %shl = shl i64 %a, %b
+  ret i64 %shl
+}
+
+define i32 @t8(i32 %a, i32 %b) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: t8:
+; CHECK: lsr w0, w0, w1
+; CHECK: ret
+  %lshr = lshr i32 %a, %b
+  ret i32 %lshr
+}
+
+define i64 @t9(i64 %a, i64 %b) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: t9:
+; CHECK: lsr x0, x0, x1
+; CHECK: ret
+  %lshr = lshr i64 %a, %b
+  ret i64 %lshr
+}
+
+define i32 @t10(i32 %a, i32 %b) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: t10:
+; CHECK: asr w0, w0, w1
+; CHECK: ret
+  %ashr = ashr i32 %a, %b
+  ret i32 %ashr
+}
+
+define i64 @t11(i64 %a, i64 %b) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: t11:
+; CHECK: asr x0, x0, x1
+; CHECK: ret
+  %ashr = ashr i64 %a, %b
+  ret i64 %ashr
+}
+
+define i32 @t12(i16 %a, i32 %x) nounwind ssp {
+entry:
+; CHECK-LABEL: t12:
+; CHECK: add	w0, w1, w0, sxth
+; CHECK: ret
+  %c = sext i16 %a to i32
+  %e = add i32 %x, %c
+  ret i32 %e
+}
+
+define i32 @t13(i16 %a, i32 %x) nounwind ssp {
+entry:
+; CHECK-LABEL: t13:
+; CHECK: add	w0, w1, w0, sxth #2
+; CHECK: ret
+  %c = sext i16 %a to i32
+  %d = shl i32 %c, 2
+  %e = add i32 %x, %d
+  ret i32 %e
+}
+
+define i64 @t14(i16 %a, i64 %x) nounwind ssp {
+entry:
+; CHECK-LABEL: t14:
+; CHECK: add	x0, x1, w0, uxth #3
+; CHECK: ret
+  %c = zext i16 %a to i64
+  %d = shl i64 %c, 3
+  %e = add i64 %x, %d
+  ret i64 %e
+}
+
+; rdar://9160598
+define i64 @t15(i64 %a, i64 %x) nounwind ssp {
+entry:
+; CHECK-LABEL: t15:
+; CHECK: add x0, x1, w0, uxtw
+; CHECK: ret
+  %b = and i64 %a, 4294967295
+  %c = add i64 %x, %b
+  ret i64 %c
+}
+
+define i64 @t16(i64 %x) nounwind ssp {
+entry:
+; CHECK-LABEL: t16:
+; CHECK: lsl x0, x0, #1
+; CHECK: ret
+  %a = shl i64 %x, 1
+  ret i64 %a
+}
+
+; rdar://9166974
+define i64 @t17(i16 %a, i64 %x) nounwind ssp {
+entry:
+; CHECK-LABEL: t17:
+; CHECK: sxth [[REG:x[0-9]+]], w0
+; CHECK: neg x0, [[REG]], lsl #32
+; CHECK: ret
+  %tmp16 = sext i16 %a to i64
+  %tmp17 = mul i64 %tmp16, -4294967296
+  ret i64 %tmp17
+}
+
+define i32 @t18(i32 %a, i32 %b) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: t18:
+; CHECK: sdiv w0, w0, w1
+; CHECK: ret
+  %sdiv = call i32 @llvm.aarch64.sdiv.i32(i32 %a, i32 %b)
+  ret i32 %sdiv
+}
+
+define i64 @t19(i64 %a, i64 %b) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: t19:
+; CHECK: sdiv x0, x0, x1
+; CHECK: ret
+  %sdiv = call i64 @llvm.aarch64.sdiv.i64(i64 %a, i64 %b)
+  ret i64 %sdiv
+}
+
+define i32 @t20(i32 %a, i32 %b) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: t20:
+; CHECK: udiv w0, w0, w1
+; CHECK: ret
+  %udiv = call i32 @llvm.aarch64.udiv.i32(i32 %a, i32 %b)
+  ret i32 %udiv
+}
+
+define i64 @t21(i64 %a, i64 %b) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: t21:
+; CHECK: udiv x0, x0, x1
+; CHECK: ret
+  %udiv = call i64 @llvm.aarch64.udiv.i64(i64 %a, i64 %b)
+  ret i64 %udiv
+}
+
+declare i32 @llvm.aarch64.sdiv.i32(i32, i32) nounwind readnone
+declare i64 @llvm.aarch64.sdiv.i64(i64, i64) nounwind readnone
+declare i32 @llvm.aarch64.udiv.i32(i32, i32) nounwind readnone
+declare i64 @llvm.aarch64.udiv.i64(i64, i64) nounwind readnone
+
+; 32-bit not.
+define i32 @inv_32(i32 %x) nounwind ssp {
+entry:
+; CHECK: inv_32
+; CHECK: mvn w0, w0
+; CHECK: ret
+  %inv = xor i32 %x, -1
+  ret i32 %inv
+}
+
+; 64-bit not.
+define i64 @inv_64(i64 %x) nounwind ssp {
+entry:
+; CHECK: inv_64
+; CHECK: mvn x0, x0
+; CHECK: ret
+  %inv = xor i64 %x, -1
+  ret i64 %inv
+}
+
+; Multiplying by a power of two plus or minus one is better done via shift
+; and add/sub rather than the madd/msub instructions. The latter are 4+ cycles,
+; and the former are two (total for the two instruction sequence for subtract).
+define i32 @f0(i32 %a) nounwind readnone ssp {
+; CHECK-LABEL: f0:
+; CHECK-NEXT: add w0, w0, w0, lsl #3
+; CHECK-NEXT: ret
+  %res = mul i32 %a, 9
+  ret i32 %res
+}
+
+define i64 @f1(i64 %a) nounwind readnone ssp {
+; CHECK-LABEL: f1:
+; CHECK-NEXT: lsl x8, x0, #4
+; CHECK-NEXT: sub x0, x8, x0
+; CHECK-NEXT: ret
+  %res = mul i64 %a, 15
+  ret i64 %res
+}
+
+define i32 @f2(i32 %a) nounwind readnone ssp {
+; CHECK-LABEL: f2:
+; CHECK-NEXT: lsl w8, w0, #3
+; CHECK-NEXT: sub w0, w8, w0
+; CHECK-NEXT: ret
+  %res = mul nsw i32 %a, 7
+  ret i32 %res
+}
+
+define i64 @f3(i64 %a) nounwind readnone ssp {
+; CHECK-LABEL: f3:
+; CHECK-NEXT: add x0, x0, x0, lsl #4
+; CHECK-NEXT: ret
+  %res = mul nsw i64 %a, 17
+  ret i64 %res
+}
diff --git a/test/CodeGen/AArch64/arm64-arm64-dead-def-elimination-flag.ll b/test/CodeGen/AArch64/arm64-arm64-dead-def-elimination-flag.ll
new file mode 100644
index 0000000..0904b62
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-arm64-dead-def-elimination-flag.ll
@@ -0,0 +1,16 @@
+; RUN: llc -march=arm64 -aarch64-dead-def-elimination=false < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-ios7.0.0"
+
+; Function Attrs: nounwind ssp uwtable
+define i32 @test1() #0 {
+  %tmp1 = alloca i8
+  %tmp2 = icmp eq i8* %tmp1, null
+  %tmp3 = zext i1 %tmp2 to i32
+
+  ret i32 %tmp3
+
+  ; CHECK-LABEL: test1
+  ; CHECK: adds {{x[0-9]+}}, sp, #15
+}
diff --git a/test/CodeGen/AArch64/arm64-atomic-128.ll b/test/CodeGen/AArch64/arm64-atomic-128.ll
new file mode 100644
index 0000000..3b43aa1
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-atomic-128.ll
@@ -0,0 +1,225 @@
+; RUN: llc < %s -march=arm64 -mtriple=arm64-linux-gnu -verify-machineinstrs -mcpu=cyclone | FileCheck %s
+
+@var = global i128 0
+
+define i128 @val_compare_and_swap(i128* %p, i128 %oldval, i128 %newval) {
+; CHECK-LABEL: val_compare_and_swap:
+; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
+; CHECK: ldaxp   [[RESULTLO:x[0-9]+]], [[RESULTHI:x[0-9]+]], [x[[ADDR:[0-9]+]]]
+; CHECK-DAG: eor     [[MISMATCH_LO:x[0-9]+]], [[RESULTLO]], x2
+; CHECK-DAG: eor     [[MISMATCH_HI:x[0-9]+]], [[RESULTHI]], x3
+; CHECK: orr [[MISMATCH:x[0-9]+]], [[MISMATCH_LO]], [[MISMATCH_HI]]
+; CHECK: cbnz    [[MISMATCH]], [[DONE:.LBB[0-9]+_[0-9]+]]
+; CHECK: stxp   [[SCRATCH_RES:w[0-9]+]], x4, x5, [x[[ADDR]]]
+; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
+; CHECK: [[DONE]]:
+  %val = cmpxchg i128* %p, i128 %oldval, i128 %newval acquire acquire
+  ret i128 %val
+}
+
+define void @fetch_and_nand(i128* %p, i128 %bits) {
+; CHECK-LABEL: fetch_and_nand:
+; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
+; CHECK: ldxp  [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
+; CHECK-DAG: bic    [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2
+; CHECK-DAG: bic    [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3
+; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
+; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
+
+; CHECK-DAG: str    [[DEST_REGHI]]
+; CHECK-DAG: str    [[DEST_REGLO]]
+  %val = atomicrmw nand i128* %p, i128 %bits release
+  store i128 %val, i128* @var, align 16
+  ret void
+}
+
+define void @fetch_and_or(i128* %p, i128 %bits) {
+; CHECK-LABEL: fetch_and_or:
+; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
+; CHECK: ldaxp  [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
+; CHECK-DAG: orr    [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2
+; CHECK-DAG: orr    [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3
+; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
+; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
+
+; CHECK-DAG: str    [[DEST_REGHI]]
+; CHECK-DAG: str    [[DEST_REGLO]]
+  %val = atomicrmw or i128* %p, i128 %bits seq_cst
+  store i128 %val, i128* @var, align 16
+  ret void
+}
+
+define void @fetch_and_add(i128* %p, i128 %bits) {
+; CHECK-LABEL: fetch_and_add:
+; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
+; CHECK: ldaxp  [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
+; CHECK: adds   [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2
+; CHECK: adcs   [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3
+; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
+; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
+
+; CHECK-DAG: str    [[DEST_REGHI]]
+; CHECK-DAG: str    [[DEST_REGLO]]
+  %val = atomicrmw add i128* %p, i128 %bits seq_cst
+  store i128 %val, i128* @var, align 16
+  ret void
+}
+
+define void @fetch_and_sub(i128* %p, i128 %bits) {
+; CHECK-LABEL: fetch_and_sub:
+; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
+; CHECK: ldaxp  [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
+; CHECK: subs   [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2
+; CHECK: sbcs    [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3
+; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
+; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
+
+; CHECK-DAG: str    [[DEST_REGHI]]
+; CHECK-DAG: str    [[DEST_REGLO]]
+  %val = atomicrmw sub i128* %p, i128 %bits seq_cst
+  store i128 %val, i128* @var, align 16
+  ret void
+}
+
+define void @fetch_and_min(i128* %p, i128 %bits) {
+; CHECK-LABEL: fetch_and_min:
+; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
+; CHECK: ldaxp   [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
+; CHECK: cmp     [[DEST_REGLO]], x2
+; CHECK: cset    [[LOCMP:w[0-9]+]], ls
+; CHECK: cmp     [[DEST_REGHI:x[0-9]+]], x3
+; CHECK: cset    [[HICMP:w[0-9]+]], le
+; CHECK: csel    [[CMP:w[0-9]+]], [[LOCMP]], [[HICMP]], eq
+; CHECK: cmp     [[CMP]], #0
+; CHECK-DAG: csel    [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3, ne
+; CHECK-DAG: csel    [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2, ne
+; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
+; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
+
+; CHECK-DAG: str    [[DEST_REGHI]]
+; CHECK-DAG: str    [[DEST_REGLO]]
+  %val = atomicrmw min i128* %p, i128 %bits seq_cst
+  store i128 %val, i128* @var, align 16
+  ret void
+}
+
+define void @fetch_and_max(i128* %p, i128 %bits) {
+; CHECK-LABEL: fetch_and_max:
+; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
+; CHECK: ldaxp  [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
+; CHECK: cmp     [[DEST_REGLO]], x2
+; CHECK: cset    [[LOCMP:w[0-9]+]], hi
+; CHECK: cmp     [[DEST_REGHI:x[0-9]+]], x3
+; CHECK: cset    [[HICMP:w[0-9]+]], gt
+; CHECK: csel    [[CMP:w[0-9]+]], [[LOCMP]], [[HICMP]], eq
+; CHECK: cmp     [[CMP]], #0
+; CHECK-DAG: csel    [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3, ne
+; CHECK-DAG: csel    [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2, ne
+; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
+; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
+
+; CHECK-DAG: str    [[DEST_REGHI]]
+; CHECK-DAG: str    [[DEST_REGLO]]
+  %val = atomicrmw max i128* %p, i128 %bits seq_cst
+  store i128 %val, i128* @var, align 16
+  ret void
+}
+
+define void @fetch_and_umin(i128* %p, i128 %bits) {
+; CHECK-LABEL: fetch_and_umin:
+; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
+; CHECK: ldaxp  [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
+; CHECK: cmp     [[DEST_REGLO]], x2
+; CHECK: cset    [[LOCMP:w[0-9]+]], ls
+; CHECK: cmp     [[DEST_REGHI:x[0-9]+]], x3
+; CHECK: cset    [[HICMP:w[0-9]+]], ls
+; CHECK: csel    [[CMP:w[0-9]+]], [[LOCMP]], [[HICMP]], eq
+; CHECK: cmp     [[CMP]], #0
+; CHECK-DAG: csel    [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3, ne
+; CHECK-DAG: csel    [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2, ne
+; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
+; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
+
+; CHECK-DAG: str    [[DEST_REGHI]]
+; CHECK-DAG: str    [[DEST_REGLO]]
+  %val = atomicrmw umin i128* %p, i128 %bits seq_cst
+  store i128 %val, i128* @var, align 16
+  ret void
+}
+
+define void @fetch_and_umax(i128* %p, i128 %bits) {
+; CHECK-LABEL: fetch_and_umax:
+; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
+; CHECK: ldaxp  [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
+; CHECK: cmp     [[DEST_REGLO]], x2
+; CHECK: cset    [[LOCMP:w[0-9]+]], hi
+; CHECK: cmp     [[DEST_REGHI:x[0-9]+]], x3
+; CHECK: cset    [[HICMP:w[0-9]+]], hi
+; CHECK: csel    [[CMP:w[0-9]+]], [[LOCMP]], [[HICMP]], eq
+; CHECK: cmp     [[CMP]], #0
+; CHECK-DAG: csel    [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3, ne
+; CHECK-DAG: csel    [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2, ne
+; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
+; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
+
+; CHECK-DAG: str    [[DEST_REGHI]]
+; CHECK-DAG: str    [[DEST_REGLO]]
+  %val = atomicrmw umax i128* %p, i128 %bits seq_cst
+  store i128 %val, i128* @var, align 16
+  ret void
+}
+
+define i128 @atomic_load_seq_cst(i128* %p) {
+; CHECK-LABEL: atomic_load_seq_cst:
+; CHECK-NOT: dmb
+; CHECK-LABEL: ldaxp
+; CHECK-NOT: dmb
+   %r = load atomic i128* %p seq_cst, align 16
+   ret i128 %r
+}
+
+define i128 @atomic_load_relaxed(i128* %p) {
+; CHECK-LABEL: atomic_load_relaxed:
+; CHECK-NOT: dmb
+; CHECK: ldxp [[LO:x[0-9]+]], [[HI:x[0-9]+]], [x0]
+; CHECK-NOT: dmb
+   %r = load atomic i128* %p monotonic, align 16
+   ret i128 %r
+}
+
+
+define void @atomic_store_seq_cst(i128 %in, i128* %p) {
+; CHECK-LABEL: atomic_store_seq_cst:
+; CHECK-NOT: dmb
+; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
+; CHECK: ldaxp xzr, xzr, [x2]
+; CHECK: stlxp [[SUCCESS:w[0-9]+]], x0, x1, [x2]
+; CHECK: cbnz [[SUCCESS]], [[LABEL]]
+; CHECK-NOT: dmb
+   store atomic i128 %in, i128* %p seq_cst, align 16
+   ret void
+}
+
+define void @atomic_store_release(i128 %in, i128* %p) {
+; CHECK-LABEL: atomic_store_release:
+; CHECK-NOT: dmb
+; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
+; CHECK: ldxp xzr, xzr, [x2]
+; CHECK: stlxp [[SUCCESS:w[0-9]+]], x0, x1, [x2]
+; CHECK: cbnz [[SUCCESS]], [[LABEL]]
+; CHECK-NOT: dmb
+   store atomic i128 %in, i128* %p release, align 16
+   ret void
+}
+
+define void @atomic_store_relaxed(i128 %in, i128* %p) {
+; CHECK-LABEL: atomic_store_relaxed:
+; CHECK-NOT: dmb
+; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
+; CHECK: ldxp xzr, xzr, [x2]
+; CHECK: stxp [[SUCCESS:w[0-9]+]], x0, x1, [x2]
+; CHECK: cbnz [[SUCCESS]], [[LABEL]]
+; CHECK-NOT: dmb
+   store atomic i128 %in, i128* %p unordered, align 16
+   ret void
+}
diff --git a/test/CodeGen/AArch64/arm64-atomic.ll b/test/CodeGen/AArch64/arm64-atomic.ll
new file mode 100644
index 0000000..aa9b284
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-atomic.ll
@@ -0,0 +1,331 @@
+; RUN: llc < %s -march=arm64 -verify-machineinstrs -mcpu=cyclone | FileCheck %s
+
+define i32 @val_compare_and_swap(i32* %p) {
+; CHECK-LABEL: val_compare_and_swap:
+; CHECK: orr    [[NEWVAL_REG:w[0-9]+]], wzr, #0x4
+; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
+; CHECK: ldaxr   [[RESULT:w[0-9]+]], [x0]
+; CHECK: cmp    [[RESULT]], #7
+; CHECK: b.ne   [[LABEL2:.?LBB[0-9]+_[0-9]+]]
+; CHECK: stxr   [[SCRATCH_REG:w[0-9]+]], [[NEWVAL_REG]], [x0]
+; CHECK: cbnz   [[SCRATCH_REG]], [[LABEL]]
+; CHECK: [[LABEL2]]:
+  %val = cmpxchg i32* %p, i32 7, i32 4 acquire acquire
+  ret i32 %val
+}
+
+define i64 @val_compare_and_swap_64(i64* %p) {
+; CHECK-LABEL: val_compare_and_swap_64:
+; CHECK: orr    w[[NEWVAL_REG:[0-9]+]], wzr, #0x4
+; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
+; CHECK: ldxr   [[RESULT:x[0-9]+]], [x0]
+; CHECK: cmp    [[RESULT]], #7
+; CHECK: b.ne   [[LABEL2:.?LBB[0-9]+_[0-9]+]]
+; CHECK-NOT: stxr x[[NEWVAL_REG]], x[[NEWVAL_REG]]
+; CHECK: stxr   [[SCRATCH_REG:w[0-9]+]], x[[NEWVAL_REG]], [x0]
+; CHECK: cbnz   [[SCRATCH_REG]], [[LABEL]]
+; CHECK: [[LABEL2]]:
+  %val = cmpxchg i64* %p, i64 7, i64 4 monotonic monotonic
+  ret i64 %val
+}
+
+define i32 @fetch_and_nand(i32* %p) {
+; CHECK-LABEL: fetch_and_nand:
+; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
+; CHECK: ldxr   w[[DEST_REG:[0-9]+]], [x0]
+; CHECK: and    [[SCRATCH2_REG:w[0-9]+]], w[[DEST_REG]], #0xfffffff8
+; CHECK-NOT: stlxr [[SCRATCH2_REG]], [[SCRATCH2_REG]]
+; CHECK: stlxr   [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x0]
+; CHECK: cbnz   [[SCRATCH_REG]], [[LABEL]]
+; CHECK: mov    x0, x[[DEST_REG]]
+  %val = atomicrmw nand i32* %p, i32 7 release
+  ret i32 %val
+}
+
+define i64 @fetch_and_nand_64(i64* %p) {
+; CHECK-LABEL: fetch_and_nand_64:
+; CHECK: mov    x[[ADDR:[0-9]+]], x0
+; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
+; CHECK: ldaxr   [[DEST_REG:x[0-9]+]], [x[[ADDR]]]
+; CHECK: and    [[SCRATCH2_REG:x[0-9]+]], [[DEST_REG]], #0xfffffffffffffff8
+; CHECK: stlxr   [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x[[ADDR]]]
+; CHECK: cbnz   [[SCRATCH_REG]], [[LABEL]]
+
+  %val = atomicrmw nand i64* %p, i64 7 acq_rel
+  ret i64 %val
+}
+
+define i32 @fetch_and_or(i32* %p) {
+; CHECK-LABEL: fetch_and_or:
+; CHECK: movz   [[OLDVAL_REG:w[0-9]+]], #0x5
+; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
+; CHECK: ldaxr   w[[DEST_REG:[0-9]+]], [x0]
+; CHECK: orr    [[SCRATCH2_REG:w[0-9]+]], w[[DEST_REG]], [[OLDVAL_REG]]
+; CHECK-NOT: stlxr [[SCRATCH2_REG]], [[SCRATCH2_REG]]
+; CHECK: stlxr [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x0]
+; CHECK: cbnz   [[SCRATCH_REG]], [[LABEL]]
+; CHECK: mov    x0, x[[DEST_REG]]
+  %val = atomicrmw or i32* %p, i32 5 seq_cst
+  ret i32 %val
+}
+
+define i64 @fetch_and_or_64(i64* %p) {
+; CHECK: fetch_and_or_64:
+; CHECK: mov    x[[ADDR:[0-9]+]], x0
+; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
+; CHECK: ldxr   [[DEST_REG:x[0-9]+]], [x[[ADDR]]]
+; CHECK: orr    [[SCRATCH2_REG:x[0-9]+]], [[DEST_REG]], #0x7
+; CHECK: stxr   [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x[[ADDR]]]
+; CHECK: cbnz   [[SCRATCH_REG]], [[LABEL]]
+  %val = atomicrmw or i64* %p, i64 7 monotonic
+  ret i64 %val
+}
+
+define void @acquire_fence() {
+   fence acquire
+   ret void
+   ; CHECK-LABEL: acquire_fence:
+   ; CHECK: dmb ishld
+}
+
+define void @release_fence() {
+   fence release
+   ret void
+   ; CHECK-LABEL: release_fence:
+   ; CHECK: dmb ish{{$}}
+}
+
+define void @seq_cst_fence() {
+   fence seq_cst
+   ret void
+   ; CHECK-LABEL: seq_cst_fence:
+   ; CHECK: dmb ish{{$}}
+}
+
+define i32 @atomic_load(i32* %p) {
+   %r = load atomic i32* %p seq_cst, align 4
+   ret i32 %r
+   ; CHECK-LABEL: atomic_load:
+   ; CHECK: ldar
+}
+
+define i8 @atomic_load_relaxed_8(i8* %p, i32 %off32) {
+; CHECK-LABEL: atomic_load_relaxed_8:
+  %ptr_unsigned = getelementptr i8* %p, i32 4095
+  %val_unsigned = load atomic i8* %ptr_unsigned monotonic, align 1
+; CHECK: ldrb {{w[0-9]+}}, [x0, #4095]
+
+  %ptr_regoff = getelementptr i8* %p, i32 %off32
+  %val_regoff = load atomic i8* %ptr_regoff unordered, align 1
+  %tot1 = add i8 %val_unsigned, %val_regoff
+; CHECK: ldrb {{w[0-9]+}}, [x0, w1, sxtw]
+
+  %ptr_unscaled = getelementptr i8* %p, i32 -256
+  %val_unscaled = load atomic i8* %ptr_unscaled monotonic, align 1
+  %tot2 = add i8 %tot1, %val_unscaled
+; CHECK: ldurb {{w[0-9]+}}, [x0, #-256]
+
+  %ptr_random = getelementptr i8* %p, i32 1191936 ; 0x123000 (i.e. ADD imm)
+  %val_random = load atomic i8* %ptr_random unordered, align 1
+  %tot3 = add i8 %tot2, %val_random
+; CHECK: add x[[ADDR:[0-9]+]], x0, #291, lsl #12
+; CHECK: ldrb {{w[0-9]+}}, [x[[ADDR]]]
+
+  ret i8 %tot3
+}
+
+define i16 @atomic_load_relaxed_16(i16* %p, i32 %off32) {
+; CHECK-LABEL: atomic_load_relaxed_16:
+  %ptr_unsigned = getelementptr i16* %p, i32 4095
+  %val_unsigned = load atomic i16* %ptr_unsigned monotonic, align 2
+; CHECK: ldrh {{w[0-9]+}}, [x0, #8190]
+
+  %ptr_regoff = getelementptr i16* %p, i32 %off32
+  %val_regoff = load atomic i16* %ptr_regoff unordered, align 2
+  %tot1 = add i16 %val_unsigned, %val_regoff
+; CHECK: ldrh {{w[0-9]+}}, [x0, w1, sxtw #1]
+
+  %ptr_unscaled = getelementptr i16* %p, i32 -128
+  %val_unscaled = load atomic i16* %ptr_unscaled monotonic, align 2
+  %tot2 = add i16 %tot1, %val_unscaled
+; CHECK: ldurh {{w[0-9]+}}, [x0, #-256]
+
+  %ptr_random = getelementptr i16* %p, i32 595968 ; 0x123000/2 (i.e. ADD imm)
+  %val_random = load atomic i16* %ptr_random unordered, align 2
+  %tot3 = add i16 %tot2, %val_random
+; CHECK: add x[[ADDR:[0-9]+]], x0, #291, lsl #12
+; CHECK: ldrh {{w[0-9]+}}, [x[[ADDR]]]
+
+  ret i16 %tot3
+}
+
+define i32 @atomic_load_relaxed_32(i32* %p, i32 %off32) {
+; CHECK-LABEL: atomic_load_relaxed_32:
+  %ptr_unsigned = getelementptr i32* %p, i32 4095
+  %val_unsigned = load atomic i32* %ptr_unsigned monotonic, align 4
+; CHECK: ldr {{w[0-9]+}}, [x0, #16380]
+
+  %ptr_regoff = getelementptr i32* %p, i32 %off32
+  %val_regoff = load atomic i32* %ptr_regoff unordered, align 4
+  %tot1 = add i32 %val_unsigned, %val_regoff
+; CHECK: ldr {{w[0-9]+}}, [x0, w1, sxtw #2]
+
+  %ptr_unscaled = getelementptr i32* %p, i32 -64
+  %val_unscaled = load atomic i32* %ptr_unscaled monotonic, align 4
+  %tot2 = add i32 %tot1, %val_unscaled
+; CHECK: ldur {{w[0-9]+}}, [x0, #-256]
+
+  %ptr_random = getelementptr i32* %p, i32 297984 ; 0x123000/4 (i.e. ADD imm)
+  %val_random = load atomic i32* %ptr_random unordered, align 4
+  %tot3 = add i32 %tot2, %val_random
+; CHECK: add x[[ADDR:[0-9]+]], x0, #291, lsl #12
+; CHECK: ldr {{w[0-9]+}}, [x[[ADDR]]]
+
+  ret i32 %tot3
+}
+
+define i64 @atomic_load_relaxed_64(i64* %p, i32 %off32) {
+; CHECK-LABEL: atomic_load_relaxed_64:
+  %ptr_unsigned = getelementptr i64* %p, i32 4095
+  %val_unsigned = load atomic i64* %ptr_unsigned monotonic, align 8
+; CHECK: ldr {{x[0-9]+}}, [x0, #32760]
+
+  %ptr_regoff = getelementptr i64* %p, i32 %off32
+  %val_regoff = load atomic i64* %ptr_regoff unordered, align 8
+  %tot1 = add i64 %val_unsigned, %val_regoff
+; CHECK: ldr {{x[0-9]+}}, [x0, w1, sxtw #3]
+
+  %ptr_unscaled = getelementptr i64* %p, i32 -32
+  %val_unscaled = load atomic i64* %ptr_unscaled monotonic, align 8
+  %tot2 = add i64 %tot1, %val_unscaled
+; CHECK: ldur {{x[0-9]+}}, [x0, #-256]
+
+  %ptr_random = getelementptr i64* %p, i32 148992 ; 0x123000/8 (i.e. ADD imm)
+  %val_random = load atomic i64* %ptr_random unordered, align 8
+  %tot3 = add i64 %tot2, %val_random
+; CHECK: add x[[ADDR:[0-9]+]], x0, #291, lsl #12
+; CHECK: ldr {{x[0-9]+}}, [x[[ADDR]]]
+
+  ret i64 %tot3
+}
+
+
+define void @atomc_store(i32* %p) {
+   store atomic i32 4, i32* %p seq_cst, align 4
+   ret void
+   ; CHECK-LABEL: atomc_store:
+   ; CHECK: stlr
+}
+
+define void @atomic_store_relaxed_8(i8* %p, i32 %off32, i8 %val) {
+; CHECK-LABEL: atomic_store_relaxed_8:
+  %ptr_unsigned = getelementptr i8* %p, i32 4095
+  store atomic i8 %val, i8* %ptr_unsigned monotonic, align 1
+; CHECK: strb {{w[0-9]+}}, [x0, #4095]
+
+  %ptr_regoff = getelementptr i8* %p, i32 %off32
+  store atomic i8 %val, i8* %ptr_regoff unordered, align 1
+; CHECK: strb {{w[0-9]+}}, [x0, w1, sxtw]
+
+  %ptr_unscaled = getelementptr i8* %p, i32 -256
+  store atomic i8 %val, i8* %ptr_unscaled monotonic, align 1
+; CHECK: sturb {{w[0-9]+}}, [x0, #-256]
+
+  %ptr_random = getelementptr i8* %p, i32 1191936 ; 0x123000 (i.e. ADD imm)
+  store atomic i8 %val, i8* %ptr_random unordered, align 1
+; CHECK: add x[[ADDR:[0-9]+]], x0, #291, lsl #12
+; CHECK: strb {{w[0-9]+}}, [x[[ADDR]]]
+
+  ret void
+}
+
+define void @atomic_store_relaxed_16(i16* %p, i32 %off32, i16 %val) {
+; CHECK-LABEL: atomic_store_relaxed_16:
+  %ptr_unsigned = getelementptr i16* %p, i32 4095
+  store atomic i16 %val, i16* %ptr_unsigned monotonic, align 2
+; CHECK: strh {{w[0-9]+}}, [x0, #8190]
+
+  %ptr_regoff = getelementptr i16* %p, i32 %off32
+  store atomic i16 %val, i16* %ptr_regoff unordered, align 2
+; CHECK: strh {{w[0-9]+}}, [x0, w1, sxtw #1]
+
+  %ptr_unscaled = getelementptr i16* %p, i32 -128
+  store atomic i16 %val, i16* %ptr_unscaled monotonic, align 2
+; CHECK: sturh {{w[0-9]+}}, [x0, #-256]
+
+  %ptr_random = getelementptr i16* %p, i32 595968 ; 0x123000/2 (i.e. ADD imm)
+  store atomic i16 %val, i16* %ptr_random unordered, align 2
+; CHECK: add x[[ADDR:[0-9]+]], x0, #291, lsl #12
+; CHECK: strh {{w[0-9]+}}, [x[[ADDR]]]
+
+  ret void
+}
+
+define void @atomic_store_relaxed_32(i32* %p, i32 %off32, i32 %val) {
+; CHECK-LABEL: atomic_store_relaxed_32:
+  %ptr_unsigned = getelementptr i32* %p, i32 4095
+  store atomic i32 %val, i32* %ptr_unsigned monotonic, align 4
+; CHECK: str {{w[0-9]+}}, [x0, #16380]
+
+  %ptr_regoff = getelementptr i32* %p, i32 %off32
+  store atomic i32 %val, i32* %ptr_regoff unordered, align 4
+; CHECK: str {{w[0-9]+}}, [x0, w1, sxtw #2]
+
+  %ptr_unscaled = getelementptr i32* %p, i32 -64
+  store atomic i32 %val, i32* %ptr_unscaled monotonic, align 4
+; CHECK: stur {{w[0-9]+}}, [x0, #-256]
+
+  %ptr_random = getelementptr i32* %p, i32 297984 ; 0x123000/4 (i.e. ADD imm)
+  store atomic i32 %val, i32* %ptr_random unordered, align 4
+; CHECK: add x[[ADDR:[0-9]+]], x0, #291, lsl #12
+; CHECK: str {{w[0-9]+}}, [x[[ADDR]]]
+
+  ret void
+}
+
+define void @atomic_store_relaxed_64(i64* %p, i32 %off32, i64 %val) {
+; CHECK-LABEL: atomic_store_relaxed_64:
+  %ptr_unsigned = getelementptr i64* %p, i32 4095
+  store atomic i64 %val, i64* %ptr_unsigned monotonic, align 8
+; CHECK: str {{x[0-9]+}}, [x0, #32760]
+
+  %ptr_regoff = getelementptr i64* %p, i32 %off32
+  store atomic i64 %val, i64* %ptr_regoff unordered, align 8
+; CHECK: str {{x[0-9]+}}, [x0, w1, sxtw #3]
+
+  %ptr_unscaled = getelementptr i64* %p, i32 -32
+  store atomic i64 %val, i64* %ptr_unscaled monotonic, align 8
+; CHECK: stur {{x[0-9]+}}, [x0, #-256]
+
+  %ptr_random = getelementptr i64* %p, i32 148992 ; 0x123000/8 (i.e. ADD imm)
+  store atomic i64 %val, i64* %ptr_random unordered, align 8
+; CHECK: add x[[ADDR:[0-9]+]], x0, #291, lsl #12
+; CHECK: str {{x[0-9]+}}, [x[[ADDR]]]
+
+  ret void
+}
+
+; rdar://11531169
+; rdar://11531308
+
+%"class.X::Atomic" = type { %struct.x_atomic_t }
+%struct.x_atomic_t = type { i32 }
+
+@counter = external hidden global %"class.X::Atomic", align 4
+
+define i32 @next_id() nounwind optsize ssp align 2 {
+entry:
+  %0 = atomicrmw add i32* getelementptr inbounds (%"class.X::Atomic"* @counter, i64 0, i32 0, i32 0), i32 1 seq_cst
+  %add.i = add i32 %0, 1
+  %tobool = icmp eq i32 %add.i, 0
+  br i1 %tobool, label %if.else, label %return
+
+if.else:                                          ; preds = %entry
+  %1 = atomicrmw add i32* getelementptr inbounds (%"class.X::Atomic"* @counter, i64 0, i32 0, i32 0), i32 1 seq_cst
+  %add.i2 = add i32 %1, 1
+  br label %return
+
+return:                                           ; preds = %if.else, %entry
+  %retval.0 = phi i32 [ %add.i2, %if.else ], [ %add.i, %entry ]
+  ret i32 %retval.0
+}
diff --git a/test/CodeGen/ARM64/basic-pic.ll b/test/CodeGen/AArch64/arm64-basic-pic.ll
index 9fdb1e9..9fdb1e9 100644
--- a/test/CodeGen/ARM64/basic-pic.ll
+++ b/test/CodeGen/AArch64/arm64-basic-pic.ll
diff --git a/test/CodeGen/AArch64/arm64-big-endian-bitconverts.ll b/test/CodeGen/AArch64/arm64-big-endian-bitconverts.ll
new file mode 100644
index 0000000..f0e968b
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-big-endian-bitconverts.ll
@@ -0,0 +1,1101 @@
+; RUN: llc -mtriple arm64_be < %s -aarch64-load-store-opt=false -O1 -o - | FileCheck %s
+; RUN: llc -mtriple arm64_be < %s -aarch64-load-store-opt=false -O0 -fast-isel=true -o - | FileCheck %s
+
+; CHECK-LABEL: test_i64_f64:
+define void @test_i64_f64(double* %p, i64* %q) {
+; CHECK: ldr
+; CHECK: str
+    %1 = load double* %p
+    %2 = fadd double %1, %1
+    %3 = bitcast double %2 to i64
+    %4 = add i64 %3, %3
+    store i64 %4, i64* %q
+    ret void
+}
+
+; CHECK-LABEL: test_i64_v1i64:
+define void @test_i64_v1i64(<1 x i64>* %p, i64* %q) {
+; CHECK: ldr
+; CHECK: str
+    %1 = load <1 x i64>* %p
+    %2 = add <1 x i64> %1, %1
+    %3 = bitcast <1 x i64> %2 to i64
+    %4 = add i64 %3, %3
+    store i64 %4, i64* %q
+    ret void
+}
+
+; CHECK-LABEL: test_i64_v2f32:
+define void @test_i64_v2f32(<2 x float>* %p, i64* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.2s }
+; CHECK: rev64 v{{[0-9]+}}.2s
+; CHECK: str
+    %1 = load <2 x float>* %p
+    %2 = fadd <2 x float> %1, %1
+    %3 = bitcast <2 x float> %2 to i64
+    %4 = add i64 %3, %3
+    store i64 %4, i64* %q
+    ret void
+}
+
+; CHECK-LABEL: test_i64_v2i32:
+define void @test_i64_v2i32(<2 x i32>* %p, i64* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.2s }
+; CHECK: rev64 v{{[0-9]+}}.2s
+; CHECK: str
+    %1 = load <2 x i32>* %p
+    %2 = add <2 x i32> %1, %1
+    %3 = bitcast <2 x i32> %2 to i64
+    %4 = add i64 %3, %3
+    store i64 %4, i64* %q
+    ret void
+}
+
+; CHECK-LABEL: test_i64_v4i16:
+define void @test_i64_v4i16(<4 x i16>* %p, i64* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.4h }
+; CHECK: rev64 v{{[0-9]+}}.4h
+; CHECK: str
+    %1 = load <4 x i16>* %p
+    %2 = add <4 x i16> %1, %1
+    %3 = bitcast <4 x i16> %2 to i64
+    %4 = add i64 %3, %3
+    store i64 %4, i64* %q
+    ret void
+}
+
+; CHECK-LABEL: test_i64_v8i8:
+define void @test_i64_v8i8(<8 x i8>* %p, i64* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.8b }
+; CHECK: rev64 v{{[0-9]+}}.8b
+; CHECK: str
+    %1 = load <8 x i8>* %p
+    %2 = add <8 x i8> %1, %1
+    %3 = bitcast <8 x i8> %2 to i64
+    %4 = add i64 %3, %3
+    store i64 %4, i64* %q
+    ret void
+}
+
+; CHECK-LABEL: test_f64_i64:
+define void @test_f64_i64(i64* %p, double* %q) {
+; CHECK: ldr
+; CHECK: str
+    %1 = load i64* %p
+    %2 = add i64 %1, %1
+    %3 = bitcast i64 %2 to double
+    %4 = fadd double %3, %3
+    store double %4, double* %q
+    ret void
+}
+
+; CHECK-LABEL: test_f64_v1i64:
+define void @test_f64_v1i64(<1 x i64>* %p, double* %q) {
+; CHECK: ldr
+; CHECK: str
+    %1 = load <1 x i64>* %p
+    %2 = add <1 x i64> %1, %1
+    %3 = bitcast <1 x i64> %2 to double
+    %4 = fadd double %3, %3
+    store double %4, double* %q
+    ret void
+}
+
+; CHECK-LABEL: test_f64_v2f32:
+define void @test_f64_v2f32(<2 x float>* %p, double* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.2s }
+; CHECK: rev64 v{{[0-9]+}}.2s
+; CHECK: str
+    %1 = load <2 x float>* %p
+    %2 = fadd <2 x float> %1, %1
+    %3 = bitcast <2 x float> %2 to double
+    %4 = fadd double %3, %3
+    store double %4, double* %q
+    ret void
+}
+
+; CHECK-LABEL: test_f64_v2i32:
+define void @test_f64_v2i32(<2 x i32>* %p, double* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.2s }
+; CHECK: rev64 v{{[0-9]+}}.2s
+; CHECK: str
+    %1 = load <2 x i32>* %p
+    %2 = add <2 x i32> %1, %1
+    %3 = bitcast <2 x i32> %2 to double
+    %4 = fadd double %3, %3
+    store double %4, double* %q
+    ret void
+}
+
+; CHECK-LABEL: test_f64_v4i16:
+define void @test_f64_v4i16(<4 x i16>* %p, double* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.4h }
+; CHECK: rev64 v{{[0-9]+}}.4h
+; CHECK: str
+    %1 = load <4 x i16>* %p
+    %2 = add <4 x i16> %1, %1
+    %3 = bitcast <4 x i16> %2 to double
+    %4 = fadd double %3, %3
+    store double %4, double* %q
+    ret void
+}
+
+; CHECK-LABEL: test_f64_v8i8:
+define void @test_f64_v8i8(<8 x i8>* %p, double* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.8b }
+; CHECK: rev64 v{{[0-9]+}}.8b
+; CHECK: str
+    %1 = load <8 x i8>* %p
+    %2 = add <8 x i8> %1, %1
+    %3 = bitcast <8 x i8> %2 to double
+    %4 = fadd double %3, %3
+    store double %4, double* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v1i64_i64:
+define void @test_v1i64_i64(i64* %p, <1 x i64>* %q) {
+; CHECK: ldr
+; CHECK: str
+    %1 = load i64* %p
+    %2 = add i64 %1, %1
+    %3 = bitcast i64 %2 to <1 x i64>
+    %4 = add <1 x i64> %3, %3
+    store <1 x i64> %4, <1 x i64>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v1i64_f64:
+define void @test_v1i64_f64(double* %p, <1 x i64>* %q) {
+; CHECK: ldr
+; CHECK: str
+    %1 = load double* %p
+    %2 = fadd double %1, %1
+    %3 = bitcast double %2 to <1 x i64>
+    %4 = add <1 x i64> %3, %3
+    store <1 x i64> %4, <1 x i64>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v1i64_v2f32:
+define void @test_v1i64_v2f32(<2 x float>* %p, <1 x i64>* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.2s }
+; CHECK: rev64 v{{[0-9]+}}.2s
+; CHECK: str
+    %1 = load <2 x float>* %p
+    %2 = fadd <2 x float> %1, %1
+    %3 = bitcast <2 x float> %2 to <1 x i64>
+    %4 = add <1 x i64> %3, %3
+    store <1 x i64> %4, <1 x i64>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v1i64_v2i32:
+define void @test_v1i64_v2i32(<2 x i32>* %p, <1 x i64>* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.2s }
+; CHECK: rev64 v{{[0-9]+}}.2s
+; CHECK: str
+    %1 = load <2 x i32>* %p
+    %2 = add <2 x i32> %1, %1
+    %3 = bitcast <2 x i32> %2 to <1 x i64>
+    %4 = add <1 x i64> %3, %3
+    store <1 x i64> %4, <1 x i64>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v1i64_v4i16:
+define void @test_v1i64_v4i16(<4 x i16>* %p, <1 x i64>* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.4h }
+; CHECK: rev64 v{{[0-9]+}}.4h
+; CHECK: str
+    %1 = load <4 x i16>* %p
+    %2 = add <4 x i16> %1, %1
+    %3 = bitcast <4 x i16> %2 to <1 x i64>
+    %4 = add <1 x i64> %3, %3
+    store <1 x i64> %4, <1 x i64>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v1i64_v8i8:
+define void @test_v1i64_v8i8(<8 x i8>* %p, <1 x i64>* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.8b }
+; CHECK: rev64 v{{[0-9]+}}.8b
+; CHECK: str
+    %1 = load <8 x i8>* %p
+    %2 = add <8 x i8> %1, %1
+    %3 = bitcast <8 x i8> %2 to <1 x i64>
+    %4 = add <1 x i64> %3, %3
+    store <1 x i64> %4, <1 x i64>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v2f32_i64:
+define void @test_v2f32_i64(i64* %p, <2 x float>* %q) {
+; CHECK: ldr
+; CHECK: rev64 v{{[0-9]+}}.2s
+; CHECK: st1 { v{{[0-9]+}}.2s }
+    %1 = load i64* %p
+    %2 = add i64 %1, %1
+    %3 = bitcast i64 %2 to <2 x float>
+    %4 = fadd <2 x float> %3, %3
+    store <2 x float> %4, <2 x float>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v2f32_f64:
+define void @test_v2f32_f64(double* %p, <2 x float>* %q) {
+; CHECK: ldr
+; CHECK: rev64 v{{[0-9]+}}.2s
+; CHECK: st1 { v{{[0-9]+}}.2s }
+    %1 = load double* %p
+    %2 = fadd double %1, %1
+    %3 = bitcast double %2 to <2 x float>
+    %4 = fadd <2 x float> %3, %3
+    store <2 x float> %4, <2 x float>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v2f32_v1i64:
+define void @test_v2f32_v1i64(<1 x i64>* %p, <2 x float>* %q) {
+; CHECK: ldr
+; CHECK: rev64 v{{[0-9]+}}.2s
+; CHECK: st1 { v{{[0-9]+}}.2s }
+    %1 = load <1 x i64>* %p
+    %2 = add <1 x i64> %1, %1
+    %3 = bitcast <1 x i64> %2 to <2 x float>
+    %4 = fadd <2 x float> %3, %3
+    store <2 x float> %4, <2 x float>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v2f32_v2i32:
+define void @test_v2f32_v2i32(<2 x i32>* %p, <2 x float>* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.2s }
+; CHECK: st1 { v{{[0-9]+}}.2s }
+    %1 = load <2 x i32>* %p
+    %2 = add <2 x i32> %1, %1
+    %3 = bitcast <2 x i32> %2 to <2 x float>
+    %4 = fadd <2 x float> %3, %3
+    store <2 x float> %4, <2 x float>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v2f32_v4i16:
+define void @test_v2f32_v4i16(<4 x i16>* %p, <2 x float>* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.4h }
+; CHECK: rev32 v{{[0-9]+}}.4h
+; CHECK: st1 { v{{[0-9]+}}.2s }
+    %1 = load <4 x i16>* %p
+    %2 = add <4 x i16> %1, %1
+    %3 = bitcast <4 x i16> %2 to <2 x float>
+    %4 = fadd <2 x float> %3, %3
+    store <2 x float> %4, <2 x float>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v2f32_v8i8:
+define void @test_v2f32_v8i8(<8 x i8>* %p, <2 x float>* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.8b }
+; CHECK: rev32 v{{[0-9]+}}.8b
+; CHECK: st1 { v{{[0-9]+}}.2s }
+    %1 = load <8 x i8>* %p
+    %2 = add <8 x i8> %1, %1
+    %3 = bitcast <8 x i8> %2 to <2 x float>
+    %4 = fadd <2 x float> %3, %3
+    store <2 x float> %4, <2 x float>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v2i32_i64:
+define void @test_v2i32_i64(i64* %p, <2 x i32>* %q) {
+; CHECK: ldr
+; CHECK: rev64 v{{[0-9]+}}.2s
+; CHECK: st1 { v{{[0-9]+}}.2s }
+    %1 = load i64* %p
+    %2 = add i64 %1, %1
+    %3 = bitcast i64 %2 to <2 x i32>
+    %4 = add <2 x i32> %3, %3
+    store <2 x i32> %4, <2 x i32>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v2i32_f64:
+define void @test_v2i32_f64(double* %p, <2 x i32>* %q) {
+; CHECK: ldr
+; CHECK: rev64 v{{[0-9]+}}.2s
+; CHECK: st1 { v{{[0-9]+}}.2s }
+    %1 = load double* %p
+    %2 = fadd double %1, %1
+    %3 = bitcast double %2 to <2 x i32>
+    %4 = add <2 x i32> %3, %3
+    store <2 x i32> %4, <2 x i32>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v2i32_v1i64:
+define void @test_v2i32_v1i64(<1 x i64>* %p, <2 x i32>* %q) {
+; CHECK: ldr
+; CHECK: rev64 v{{[0-9]+}}.2s
+; CHECK: st1 { v{{[0-9]+}}.2s }
+    %1 = load <1 x i64>* %p
+    %2 = add <1 x i64> %1, %1
+    %3 = bitcast <1 x i64> %2 to <2 x i32>
+    %4 = add <2 x i32> %3, %3
+    store <2 x i32> %4, <2 x i32>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v2i32_v2f32:
+define void @test_v2i32_v2f32(<2 x float>* %p, <2 x i32>* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.2s }
+; CHECK: st1 { v{{[0-9]+}}.2s }
+    %1 = load <2 x float>* %p
+    %2 = fadd <2 x float> %1, %1
+    %3 = bitcast <2 x float> %2 to <2 x i32>
+    %4 = add <2 x i32> %3, %3
+    store <2 x i32> %4, <2 x i32>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v2i32_v4i16:
+define void @test_v2i32_v4i16(<4 x i16>* %p, <2 x i32>* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.4h }
+; CHECK: rev32 v{{[0-9]+}}.4h
+; CHECK: st1 { v{{[0-9]+}}.2s }
+    %1 = load <4 x i16>* %p
+    %2 = add <4 x i16> %1, %1
+    %3 = bitcast <4 x i16> %2 to <2 x i32>
+    %4 = add <2 x i32> %3, %3
+    store <2 x i32> %4, <2 x i32>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v2i32_v8i8:
+define void @test_v2i32_v8i8(<8 x i8>* %p, <2 x i32>* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.8b }
+; CHECK: rev32 v{{[0-9]+}}.8b
+; CHECK: st1 { v{{[0-9]+}}.2s }
+    %1 = load <8 x i8>* %p
+    %2 = add <8 x i8> %1, %1
+    %3 = bitcast <8 x i8> %2 to <2 x i32>
+    %4 = add <2 x i32> %3, %3
+    store <2 x i32> %4, <2 x i32>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v4i16_i64:
+define void @test_v4i16_i64(i64* %p, <4 x i16>* %q) {
+; CHECK: ldr
+; CHECK: rev64 v{{[0-9]+}}.4h
+; CHECK: st1 { v{{[0-9]+}}.4h }
+    %1 = load i64* %p
+    %2 = add i64 %1, %1
+    %3 = bitcast i64 %2 to <4 x i16>
+    %4 = add <4 x i16> %3, %3
+    store <4 x i16> %4, <4 x i16>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v4i16_f64:
+define void @test_v4i16_f64(double* %p, <4 x i16>* %q) {
+; CHECK: ldr
+; CHECK: rev64 v{{[0-9]+}}.4h
+; CHECK: st1 { v{{[0-9]+}}.4h }
+    %1 = load double* %p
+    %2 = fadd double %1, %1
+    %3 = bitcast double %2 to <4 x i16>
+    %4 = add <4 x i16> %3, %3
+    store <4 x i16> %4, <4 x i16>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v4i16_v1i64:
+define void @test_v4i16_v1i64(<1 x i64>* %p, <4 x i16>* %q) {
+; CHECK: ldr
+; CHECK: rev64 v{{[0-9]+}}.4h
+; CHECK: st1 { v{{[0-9]+}}.4h }
+    %1 = load <1 x i64>* %p
+    %2 = add <1 x i64> %1, %1
+    %3 = bitcast <1 x i64> %2 to <4 x i16>
+    %4 = add <4 x i16> %3, %3
+    store <4 x i16> %4, <4 x i16>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v4i16_v2f32:
+define void @test_v4i16_v2f32(<2 x float>* %p, <4 x i16>* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.2s }
+; CHECK: rev32 v{{[0-9]+}}.4h
+; CHECK: st1 { v{{[0-9]+}}.4h }
+    %1 = load <2 x float>* %p
+    %2 = fadd <2 x float> %1, %1
+    %3 = bitcast <2 x float> %2 to <4 x i16>
+    %4 = add <4 x i16> %3, %3
+    store <4 x i16> %4, <4 x i16>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v4i16_v2i32:
+define void @test_v4i16_v2i32(<2 x i32>* %p, <4 x i16>* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.2s }
+; CHECK: rev32 v{{[0-9]+}}.4h
+; CHECK: st1 { v{{[0-9]+}}.4h }
+    %1 = load <2 x i32>* %p
+    %2 = add <2 x i32> %1, %1
+    %3 = bitcast <2 x i32> %2 to <4 x i16>
+    %4 = add <4 x i16> %3, %3
+    store <4 x i16> %4, <4 x i16>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v4i16_v8i8:
+define void @test_v4i16_v8i8(<8 x i8>* %p, <4 x i16>* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.8b }
+; CHECK: rev16 v{{[0-9]+}}.8b
+; CHECK: st1 { v{{[0-9]+}}.4h }
+    %1 = load <8 x i8>* %p
+    %2 = add <8 x i8> %1, %1
+    %3 = bitcast <8 x i8> %2 to <4 x i16>
+    %4 = add <4 x i16> %3, %3
+    store <4 x i16> %4, <4 x i16>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v8i8_i64:
+define void @test_v8i8_i64(i64* %p, <8 x i8>* %q) {
+; CHECK: ldr
+; CHECK: rev64 v{{[0-9]+}}.8b
+; CHECK: st1 { v{{[0-9]+}}.8b }
+    %1 = load i64* %p
+    %2 = add i64 %1, %1
+    %3 = bitcast i64 %2 to <8 x i8>
+    %4 = add <8 x i8> %3, %3
+    store <8 x i8> %4, <8 x i8>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v8i8_f64:
+define void @test_v8i8_f64(double* %p, <8 x i8>* %q) {
+; CHECK: ldr
+; CHECK: rev64 v{{[0-9]+}}.8b
+; CHECK: st1 { v{{[0-9]+}}.8b }
+    %1 = load double* %p
+    %2 = fadd double %1, %1
+    %3 = bitcast double %2 to <8 x i8>
+    %4 = add <8 x i8> %3, %3
+    store <8 x i8> %4, <8 x i8>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v8i8_v1i64:
+define void @test_v8i8_v1i64(<1 x i64>* %p, <8 x i8>* %q) {
+; CHECK: ldr
+; CHECK: rev64 v{{[0-9]+}}.8b
+; CHECK: st1 { v{{[0-9]+}}.8b }
+    %1 = load <1 x i64>* %p
+    %2 = add <1 x i64> %1, %1
+    %3 = bitcast <1 x i64> %2 to <8 x i8>
+    %4 = add <8 x i8> %3, %3
+    store <8 x i8> %4, <8 x i8>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v8i8_v2f32:
+define void @test_v8i8_v2f32(<2 x float>* %p, <8 x i8>* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.2s }
+; CHECK: rev32 v{{[0-9]+}}.8b
+; CHECK: st1 { v{{[0-9]+}}.8b }
+    %1 = load <2 x float>* %p
+    %2 = fadd <2 x float> %1, %1
+    %3 = bitcast <2 x float> %2 to <8 x i8>
+    %4 = add <8 x i8> %3, %3
+    store <8 x i8> %4, <8 x i8>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v8i8_v2i32:
+define void @test_v8i8_v2i32(<2 x i32>* %p, <8 x i8>* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.2s }
+; CHECK: rev32 v{{[0-9]+}}.8b
+; CHECK: st1 { v{{[0-9]+}}.8b }
+    %1 = load <2 x i32>* %p
+    %2 = add <2 x i32> %1, %1
+    %3 = bitcast <2 x i32> %2 to <8 x i8>
+    %4 = add <8 x i8> %3, %3
+    store <8 x i8> %4, <8 x i8>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v8i8_v4i16:
+define void @test_v8i8_v4i16(<4 x i16>* %p, <8 x i8>* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.4h }
+; CHECK: rev16 v{{[0-9]+}}.8b
+; CHECK: st1 { v{{[0-9]+}}.8b }
+    %1 = load <4 x i16>* %p
+    %2 = add <4 x i16> %1, %1
+    %3 = bitcast <4 x i16> %2 to <8 x i8>
+    %4 = add <8 x i8> %3, %3
+    store <8 x i8> %4, <8 x i8>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_f128_v2f64:
+define void @test_f128_v2f64(<2 x double>* %p, fp128* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.2d }
+; CHECK: ext
+; CHECK: str
+    %1 = load <2 x double>* %p
+    %2 = fadd <2 x double> %1, %1
+    %3 = bitcast <2 x double> %2 to fp128
+    %4 = fadd fp128 %3, %3
+    store fp128 %4, fp128* %q
+    ret void
+}
+
+; CHECK-LABEL: test_f128_v2i64:
+define void @test_f128_v2i64(<2 x i64>* %p, fp128* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.2d }
+; CHECK: ext
+; CHECK: str
+    %1 = load <2 x i64>* %p
+    %2 = add <2 x i64> %1, %1
+    %3 = bitcast <2 x i64> %2 to fp128
+    %4 = fadd fp128 %3, %3
+    store fp128 %4, fp128* %q
+    ret void
+}
+
+; CHECK-LABEL: test_f128_v4f32:
+define void @test_f128_v4f32(<4 x float>* %p, fp128* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.2d }
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+; CHECK: str q
+    %1 = load <4 x float>* %p
+    %2 = fadd <4 x float> %1, %1
+    %3 = bitcast <4 x float> %2 to fp128
+    %4 = fadd fp128 %3, %3
+    store fp128 %4, fp128* %q
+    ret void
+}
+
+; CHECK-LABEL: test_f128_v4i32:
+define void @test_f128_v4i32(<4 x i32>* %p, fp128* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.4s }
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+; CHECK: str
+    %1 = load <4 x i32>* %p
+    %2 = add <4 x i32> %1, %1
+    %3 = bitcast <4 x i32> %2 to fp128
+    %4 = fadd fp128 %3, %3
+    store fp128 %4, fp128* %q
+    ret void
+}
+
+; CHECK-LABEL: test_f128_v8i16:
+define void @test_f128_v8i16(<8 x i16>* %p, fp128* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.8h }
+; CHECK: rev64 v{{[0-9]+}}.8h
+; CHECK: ext
+; CHECK: str
+    %1 = load <8 x i16>* %p
+    %2 = add <8 x i16> %1, %1
+    %3 = bitcast <8 x i16> %2 to fp128
+    %4 = fadd fp128 %3, %3
+    store fp128 %4, fp128* %q
+    ret void
+}
+
+; CHECK-LABEL: test_f128_v16i8:
+define void @test_f128_v16i8(<16 x i8>* %p, fp128* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.16b }
+; CHECK: ext
+; CHECK: str q
+    %1 = load <16 x i8>* %p
+    %2 = add <16 x i8> %1, %1
+    %3 = bitcast <16 x i8> %2 to fp128
+    %4 = fadd fp128 %3, %3
+    store fp128 %4, fp128* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v2f64_f128:
+define void @test_v2f64_f128(fp128* %p, <2 x double>* %q) {
+; CHECK: ldr
+; CHECK: ext
+; CHECK: st1 { v{{[0-9]+}}.2d }
+    %1 = load fp128* %p
+    %2 = fadd fp128 %1, %1
+    %3 = bitcast fp128 %2 to <2 x double>
+    %4 = fadd <2 x double> %3, %3
+    store <2 x double> %4, <2 x double>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v2f64_v2i64:
+define void @test_v2f64_v2i64(<2 x i64>* %p, <2 x double>* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.2d }
+; CHECK: st1 { v{{[0-9]+}}.2d }
+    %1 = load <2 x i64>* %p
+    %2 = add <2 x i64> %1, %1
+    %3 = bitcast <2 x i64> %2 to <2 x double>
+    %4 = fadd <2 x double> %3, %3
+    store <2 x double> %4, <2 x double>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v2f64_v4f32:
+define void @test_v2f64_v4f32(<4 x float>* %p, <2 x double>* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.2d }
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: st1 { v{{[0-9]+}}.2d }
+    %1 = load <4 x float>* %p
+    %2 = fadd <4 x float> %1, %1
+    %3 = bitcast <4 x float> %2 to <2 x double>
+    %4 = fadd <2 x double> %3, %3
+    store <2 x double> %4, <2 x double>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v2f64_v4i32:
+define void @test_v2f64_v4i32(<4 x i32>* %p, <2 x double>* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.4s }
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: st1 { v{{[0-9]+}}.2d }
+    %1 = load <4 x i32>* %p
+    %2 = add <4 x i32> %1, %1
+    %3 = bitcast <4 x i32> %2 to <2 x double>
+    %4 = fadd <2 x double> %3, %3
+    store <2 x double> %4, <2 x double>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v2f64_v8i16:
+define void @test_v2f64_v8i16(<8 x i16>* %p, <2 x double>* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.8h }
+; CHECK: rev64 v{{[0-9]+}}.8h
+; CHECK: st1 { v{{[0-9]+}}.2d }
+    %1 = load <8 x i16>* %p
+    %2 = add <8 x i16> %1, %1
+    %3 = bitcast <8 x i16> %2 to <2 x double>
+    %4 = fadd <2 x double> %3, %3
+    store <2 x double> %4, <2 x double>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v2f64_v16i8:
+define void @test_v2f64_v16i8(<16 x i8>* %p, <2 x double>* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.16b }
+; CHECK: rev64 v{{[0-9]+}}.16b
+; CHECK: st1 { v{{[0-9]+}}.2d }
+    %1 = load <16 x i8>* %p
+    %2 = add <16 x i8> %1, %1
+    %3 = bitcast <16 x i8> %2 to <2 x double>
+    %4 = fadd <2 x double> %3, %3
+    store <2 x double> %4, <2 x double>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v2i64_f128:
+define void @test_v2i64_f128(fp128* %p, <2 x i64>* %q) {
+; CHECK: ldr
+; CHECK: ext
+; CHECK: st1 { v{{[0-9]+}}.2d }
+    %1 = load fp128* %p
+    %2 = fadd fp128 %1, %1
+    %3 = bitcast fp128 %2 to <2 x i64>
+    %4 = add <2 x i64> %3, %3
+    store <2 x i64> %4, <2 x i64>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v2i64_v2f64:
+define void @test_v2i64_v2f64(<2 x double>* %p, <2 x i64>* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.2d }
+; CHECK: st1 { v{{[0-9]+}}.2d }
+    %1 = load <2 x double>* %p
+    %2 = fadd <2 x double> %1, %1
+    %3 = bitcast <2 x double> %2 to <2 x i64>
+    %4 = add <2 x i64> %3, %3
+    store <2 x i64> %4, <2 x i64>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v2i64_v4f32:
+define void @test_v2i64_v4f32(<4 x float>* %p, <2 x i64>* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.2d }
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: st1 { v{{[0-9]+}}.2d }
+    %1 = load <4 x float>* %p
+    %2 = fadd <4 x float> %1, %1
+    %3 = bitcast <4 x float> %2 to <2 x i64>
+    %4 = add <2 x i64> %3, %3
+    store <2 x i64> %4, <2 x i64>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v2i64_v4i32:
+define void @test_v2i64_v4i32(<4 x i32>* %p, <2 x i64>* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.4s }
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: st1 { v{{[0-9]+}}.2d }
+    %1 = load <4 x i32>* %p
+    %2 = add <4 x i32> %1, %1
+    %3 = bitcast <4 x i32> %2 to <2 x i64>
+    %4 = add <2 x i64> %3, %3
+    store <2 x i64> %4, <2 x i64>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v2i64_v8i16:
+define void @test_v2i64_v8i16(<8 x i16>* %p, <2 x i64>* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.8h }
+; CHECK: rev64 v{{[0-9]+}}.8h
+; CHECK: st1 { v{{[0-9]+}}.2d }
+    %1 = load <8 x i16>* %p
+    %2 = add <8 x i16> %1, %1
+    %3 = bitcast <8 x i16> %2 to <2 x i64>
+    %4 = add <2 x i64> %3, %3
+    store <2 x i64> %4, <2 x i64>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v2i64_v16i8:
+define void @test_v2i64_v16i8(<16 x i8>* %p, <2 x i64>* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.16b }
+; CHECK: rev64 v{{[0-9]+}}.16b
+; CHECK: st1 { v{{[0-9]+}}.2d }
+    %1 = load <16 x i8>* %p
+    %2 = add <16 x i8> %1, %1
+    %3 = bitcast <16 x i8> %2 to <2 x i64>
+    %4 = add <2 x i64> %3, %3
+    store <2 x i64> %4, <2 x i64>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v4f32_f128:
+define void @test_v4f32_f128(fp128* %p, <4 x float>* %q) {
+; CHECK: ldr q
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: st1 { v{{[0-9]+}}.2d }
+    %1 = load fp128* %p
+    %2 = fadd fp128 %1, %1
+    %3 = bitcast fp128 %2 to <4 x float>
+    %4 = fadd <4 x float> %3, %3
+    store <4 x float> %4, <4 x float>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v4f32_v2f64:
+define void @test_v4f32_v2f64(<2 x double>* %p, <4 x float>* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.2d }
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: st1 { v{{[0-9]+}}.2d }
+    %1 = load <2 x double>* %p
+    %2 = fadd <2 x double> %1, %1
+    %3 = bitcast <2 x double> %2 to <4 x float>
+    %4 = fadd <4 x float> %3, %3
+    store <4 x float> %4, <4 x float>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v4f32_v2i64:
+define void @test_v4f32_v2i64(<2 x i64>* %p, <4 x float>* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.2d }
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: st1 { v{{[0-9]+}}.2d }
+    %1 = load <2 x i64>* %p
+    %2 = add <2 x i64> %1, %1
+    %3 = bitcast <2 x i64> %2 to <4 x float>
+    %4 = fadd <4 x float> %3, %3
+    store <4 x float> %4, <4 x float>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v4f32_v4i32:
+define void @test_v4f32_v4i32(<4 x i32>* %p, <4 x float>* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.4s }
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: st1 { v{{[0-9]+}}.2d }
+    %1 = load <4 x i32>* %p
+    %2 = add <4 x i32> %1, %1
+    %3 = bitcast <4 x i32> %2 to <4 x float>
+    %4 = fadd <4 x float> %3, %3
+    store <4 x float> %4, <4 x float>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v4f32_v8i16:
+define void @test_v4f32_v8i16(<8 x i16>* %p, <4 x float>* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.8h }
+; CHECK: rev32 v{{[0-9]+}}.8h
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: st1 { v{{[0-9]+}}.2d }
+    %1 = load <8 x i16>* %p
+    %2 = add <8 x i16> %1, %1
+    %3 = bitcast <8 x i16> %2 to <4 x float>
+    %4 = fadd <4 x float> %3, %3
+    store <4 x float> %4, <4 x float>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v4f32_v16i8:
+define void @test_v4f32_v16i8(<16 x i8>* %p, <4 x float>* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.16b }
+; CHECK: rev32 v{{[0-9]+}}.16b
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: st1 { v{{[0-9]+}}.2d }
+    %1 = load <16 x i8>* %p
+    %2 = add <16 x i8> %1, %1
+    %3 = bitcast <16 x i8> %2 to <4 x float>
+    %4 = fadd <4 x float> %3, %3
+    store <4 x float> %4, <4 x float>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v4i32_f128:
+define void @test_v4i32_f128(fp128* %p, <4 x i32>* %q) {
+; CHECK: ldr
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+; CHECK: st1 { v{{[0-9]+}}.4s }
+    %1 = load fp128* %p
+    %2 = fadd fp128 %1, %1
+    %3 = bitcast fp128 %2 to <4 x i32>
+    %4 = add <4 x i32> %3, %3
+    store <4 x i32> %4, <4 x i32>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v4i32_v2f64:
+define void @test_v4i32_v2f64(<2 x double>* %p, <4 x i32>* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.2d }
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: st1 { v{{[0-9]+}}.4s }
+    %1 = load <2 x double>* %p
+    %2 = fadd <2 x double> %1, %1
+    %3 = bitcast <2 x double> %2 to <4 x i32>
+    %4 = add <4 x i32> %3, %3
+    store <4 x i32> %4, <4 x i32>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v4i32_v2i64:
+define void @test_v4i32_v2i64(<2 x i64>* %p, <4 x i32>* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.2d }
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: st1 { v{{[0-9]+}}.4s }
+    %1 = load <2 x i64>* %p
+    %2 = add <2 x i64> %1, %1
+    %3 = bitcast <2 x i64> %2 to <4 x i32>
+    %4 = add <4 x i32> %3, %3
+    store <4 x i32> %4, <4 x i32>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v4i32_v4f32:
+define void @test_v4i32_v4f32(<4 x float>* %p, <4 x i32>* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.2d }
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: st1 { v{{[0-9]+}}.4s }
+    %1 = load <4 x float>* %p
+    %2 = fadd <4 x float> %1, %1
+    %3 = bitcast <4 x float> %2 to <4 x i32>
+    %4 = add <4 x i32> %3, %3
+    store <4 x i32> %4, <4 x i32>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v4i32_v8i16:
+define void @test_v4i32_v8i16(<8 x i16>* %p, <4 x i32>* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.8h }
+; CHECK: rev32 v{{[0-9]+}}.8h
+; CHECK: st1 { v{{[0-9]+}}.4s }
+    %1 = load <8 x i16>* %p
+    %2 = add <8 x i16> %1, %1
+    %3 = bitcast <8 x i16> %2 to <4 x i32>
+    %4 = add <4 x i32> %3, %3
+    store <4 x i32> %4, <4 x i32>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v4i32_v16i8:
+define void @test_v4i32_v16i8(<16 x i8>* %p, <4 x i32>* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.16b }
+; CHECK: rev32 v{{[0-9]+}}.16b
+; CHECK: st1 { v{{[0-9]+}}.4s }
+    %1 = load <16 x i8>* %p
+    %2 = add <16 x i8> %1, %1
+    %3 = bitcast <16 x i8> %2 to <4 x i32>
+    %4 = add <4 x i32> %3, %3
+    store <4 x i32> %4, <4 x i32>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v8i16_f128:
+define void @test_v8i16_f128(fp128* %p, <8 x i16>* %q) {
+; CHECK: ldr
+; CHECK: rev64 v{{[0-9]+}}.8h
+; CHECK: ext
+; CHECK: st1 { v{{[0-9]+}}.8h }
+    %1 = load fp128* %p
+    %2 = fadd fp128 %1, %1
+    %3 = bitcast fp128 %2 to <8 x i16>
+    %4 = add <8 x i16> %3, %3
+    store <8 x i16> %4, <8 x i16>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v8i16_v2f64:
+define void @test_v8i16_v2f64(<2 x double>* %p, <8 x i16>* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.2d }
+; CHECK: rev64 v{{[0-9]+}}.8h
+; CHECK: st1 { v{{[0-9]+}}.8h }
+    %1 = load <2 x double>* %p
+    %2 = fadd <2 x double> %1, %1
+    %3 = bitcast <2 x double> %2 to <8 x i16>
+    %4 = add <8 x i16> %3, %3
+    store <8 x i16> %4, <8 x i16>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v8i16_v2i64:
+define void @test_v8i16_v2i64(<2 x i64>* %p, <8 x i16>* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.2d }
+; CHECK: rev64 v{{[0-9]+}}.8h
+; CHECK: st1 { v{{[0-9]+}}.8h }
+    %1 = load <2 x i64>* %p
+    %2 = add <2 x i64> %1, %1
+    %3 = bitcast <2 x i64> %2 to <8 x i16>
+    %4 = add <8 x i16> %3, %3
+    store <8 x i16> %4, <8 x i16>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v8i16_v4f32:
+define void @test_v8i16_v4f32(<4 x float>* %p, <8 x i16>* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.2d }
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: rev32 v{{[0-9]+}}.8h
+; CHECK: st1 { v{{[0-9]+}}.8h }
+    %1 = load <4 x float>* %p
+    %2 = fadd <4 x float> %1, %1
+    %3 = bitcast <4 x float> %2 to <8 x i16>
+    %4 = add <8 x i16> %3, %3
+    store <8 x i16> %4, <8 x i16>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v8i16_v4i32:
+define void @test_v8i16_v4i32(<4 x i32>* %p, <8 x i16>* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.4s }
+; CHECK: rev32 v{{[0-9]+}}.8h
+; CHECK: st1 { v{{[0-9]+}}.8h }
+    %1 = load <4 x i32>* %p
+    %2 = add <4 x i32> %1, %1
+    %3 = bitcast <4 x i32> %2 to <8 x i16>
+    %4 = add <8 x i16> %3, %3
+    store <8 x i16> %4, <8 x i16>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v8i16_v16i8:
+define void @test_v8i16_v16i8(<16 x i8>* %p, <8 x i16>* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.16b }
+; CHECK: rev16 v{{[0-9]+}}.16b
+; CHECK: st1 { v{{[0-9]+}}.8h }
+    %1 = load <16 x i8>* %p
+    %2 = add <16 x i8> %1, %1
+    %3 = bitcast <16 x i8> %2 to <8 x i16>
+    %4 = add <8 x i16> %3, %3
+    store <8 x i16> %4, <8 x i16>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v16i8_f128:
+define void @test_v16i8_f128(fp128* %p, <16 x i8>* %q) {
+; CHECK: ldr q
+; CHECK: rev64 v{{[0-9]+}}.16b
+; CHECK: ext
+; CHECK: st1 { v{{[0-9]+}}.16b }
+    %1 = load fp128* %p
+    %2 = fadd fp128 %1, %1
+    %3 = bitcast fp128 %2 to <16 x i8>
+    %4 = add <16 x i8> %3, %3
+    store <16 x i8> %4, <16 x i8>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v16i8_v2f64:
+define void @test_v16i8_v2f64(<2 x double>* %p, <16 x i8>* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.2d }
+; CHECK: rev64 v{{[0-9]+}}.16b
+; CHECK: st1 { v{{[0-9]+}}.16b }
+    %1 = load <2 x double>* %p
+    %2 = fadd <2 x double> %1, %1
+    %3 = bitcast <2 x double> %2 to <16 x i8>
+    %4 = add <16 x i8> %3, %3
+    store <16 x i8> %4, <16 x i8>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v16i8_v2i64:
+define void @test_v16i8_v2i64(<2 x i64>* %p, <16 x i8>* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.2d }
+; CHECK: rev64 v{{[0-9]+}}.16b
+; CHECK: st1 { v{{[0-9]+}}.16b }
+    %1 = load <2 x i64>* %p
+    %2 = add <2 x i64> %1, %1
+    %3 = bitcast <2 x i64> %2 to <16 x i8>
+    %4 = add <16 x i8> %3, %3
+    store <16 x i8> %4, <16 x i8>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v16i8_v4f32:
+define void @test_v16i8_v4f32(<4 x float>* %p, <16 x i8>* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.2d }
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: rev32 v{{[0-9]+}}.16b
+; CHECK: st1 { v{{[0-9]+}}.16b }
+    %1 = load <4 x float>* %p
+    %2 = fadd <4 x float> %1, %1
+    %3 = bitcast <4 x float> %2 to <16 x i8>
+    %4 = add <16 x i8> %3, %3
+    store <16 x i8> %4, <16 x i8>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v16i8_v4i32:
+define void @test_v16i8_v4i32(<4 x i32>* %p, <16 x i8>* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.4s }
+; CHECK: rev32 v{{[0-9]+}}.16b
+; CHECK: st1 { v{{[0-9]+}}.16b }
+    %1 = load <4 x i32>* %p
+    %2 = add <4 x i32> %1, %1
+    %3 = bitcast <4 x i32> %2 to <16 x i8>
+    %4 = add <16 x i8> %3, %3
+    store <16 x i8> %4, <16 x i8>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v16i8_v8i16:
+define void @test_v16i8_v8i16(<8 x i16>* %p, <16 x i8>* %q) {
+; CHECK: ld1 { v{{[0-9]+}}.8h }
+; CHECK: rev16 v{{[0-9]+}}.16b
+; CHECK: st1 { v{{[0-9]+}}.16b }
+    %1 = load <8 x i16>* %p
+    %2 = add <8 x i16> %1, %1
+    %3 = bitcast <8 x i16> %2 to <16 x i8>
+    %4 = add <16 x i8> %3, %3
+    store <16 x i8> %4, <16 x i8>* %q
+    ret void
+}
diff --git a/test/CodeGen/AArch64/arm64-big-endian-eh.ll b/test/CodeGen/AArch64/arm64-big-endian-eh.ll
new file mode 100644
index 0000000..93e7da9
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-big-endian-eh.ll
@@ -0,0 +1,73 @@
+; RUN: llc -mtriple arm64_be-linux-gnu -filetype obj < %s | llvm-objdump -s - | FileCheck %s
+
+; ARM EHABI for big endian
+; This test case checks whether CIE length record is laid out in big endian format.
+;
+; This is the LLVM assembly generated from following C++ code:
+;
+; extern void foo(int);
+; void test(int a, int b) {
+;   try {
+;   foo(a);
+; } catch (...) {
+;   foo(b);
+; }
+;}
+
+define void @_Z4testii(i32 %a, i32 %b) #0 {
+entry:
+  invoke void @_Z3fooi(i32 %a)
+          to label %try.cont unwind label %lpad
+
+lpad:                                             ; preds = %entry
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  %1 = extractvalue { i8*, i32 } %0, 0
+  %2 = tail call i8* @__cxa_begin_catch(i8* %1) #2
+  invoke void @_Z3fooi(i32 %b)
+          to label %invoke.cont2 unwind label %lpad1
+
+invoke.cont2:                                     ; preds = %lpad
+  tail call void @__cxa_end_catch()
+  br label %try.cont
+
+try.cont:                                         ; preds = %entry, %invoke.cont2
+  ret void
+
+lpad1:                                            ; preds = %lpad
+  %3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  invoke void @__cxa_end_catch()
+          to label %eh.resume unwind label %terminate.lpad
+
+eh.resume:                                        ; preds = %lpad1
+  resume { i8*, i32 } %3
+
+terminate.lpad:                                   ; preds = %lpad1
+  %4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  %5 = extractvalue { i8*, i32 } %4, 0
+  tail call void @__clang_call_terminate(i8* %5) #3
+  unreachable
+}
+
+declare void @_Z3fooi(i32) #0
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
+
+; Function Attrs: noinline noreturn nounwind
+define linkonce_odr hidden void @__clang_call_terminate(i8*) #1 {
+  %2 = tail call i8* @__cxa_begin_catch(i8* %0) #2
+  tail call void @_ZSt9terminatev() #3
+  unreachable
+}
+
+declare void @_ZSt9terminatev()
+
+; CHECK-LABEL: Contents of section .eh_frame:
+; CHECK-NEXT: 0000 0000001c
+
diff --git a/test/CodeGen/AArch64/arm64-big-endian-varargs.ll b/test/CodeGen/AArch64/arm64-big-endian-varargs.ll
new file mode 100644
index 0000000..d7b26b9
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-big-endian-varargs.ll
@@ -0,0 +1,58 @@
+; RUN: llc < %s | FileCheck %s
+
+; Vararg saving must save Q registers using the equivalent of STR/STP.
+
+target datalayout = "E-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "arm64_be-arm-none-eabi"
+
+%struct.__va_list = type { i8*, i8*, i8*, i32, i32 }
+
+declare void @llvm.va_start(i8*) nounwind
+declare void @llvm.va_end(i8*) nounwind
+
+define double @callee(i32 %a, ...) {
+; CHECK: stp
+; CHECK: stp
+; CHECK: stp
+; CHECK: stp
+; CHECK: stp
+; CHECK: stp
+entry:
+  %vl = alloca %struct.__va_list, align 8
+  %vl1 = bitcast %struct.__va_list* %vl to i8*
+  call void @llvm.va_start(i8* %vl1)
+  %vr_offs_p = getelementptr inbounds %struct.__va_list* %vl, i64 0, i32 4
+  %vr_offs = load i32* %vr_offs_p, align 4
+  %0 = icmp sgt i32 %vr_offs, -1
+  br i1 %0, label %vaarg.on_stack, label %vaarg.maybe_reg
+
+vaarg.maybe_reg:                                  ; preds = %entry
+  %new_reg_offs = add i32 %vr_offs, 16
+  store i32 %new_reg_offs, i32* %vr_offs_p, align 4
+  %inreg = icmp slt i32 %new_reg_offs, 1
+  br i1 %inreg, label %vaarg.in_reg, label %vaarg.on_stack
+
+vaarg.in_reg:                                     ; preds = %vaarg.maybe_reg
+  %reg_top_p = getelementptr inbounds %struct.__va_list* %vl, i64 0, i32 2
+  %reg_top = load i8** %reg_top_p, align 8
+  %1 = sext i32 %vr_offs to i64
+  %2 = getelementptr i8* %reg_top, i64 %1
+  %3 = ptrtoint i8* %2 to i64
+  %align_be = add i64 %3, 8
+  %4 = inttoptr i64 %align_be to i8*
+  br label %vaarg.end
+
+vaarg.on_stack:                                   ; preds = %vaarg.maybe_reg, %entry
+  %stack_p = getelementptr inbounds %struct.__va_list* %vl, i64 0, i32 0
+  %stack = load i8** %stack_p, align 8
+  %new_stack = getelementptr i8* %stack, i64 8
+  store i8* %new_stack, i8** %stack_p, align 8
+  br label %vaarg.end
+
+vaarg.end:                                        ; preds = %vaarg.on_stack, %vaarg.in_reg
+  %.sink = phi i8* [ %4, %vaarg.in_reg ], [ %stack, %vaarg.on_stack ]
+  %5 = bitcast i8* %.sink to double*
+  %6 = load double* %5, align 8
+  call void @llvm.va_end(i8* %vl1)
+  ret double %6
+}
diff --git a/test/CodeGen/AArch64/arm64-big-endian-vector-callee.ll b/test/CodeGen/AArch64/arm64-big-endian-vector-callee.ll
new file mode 100644
index 0000000..1dcccf1
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-big-endian-vector-callee.ll
@@ -0,0 +1,848 @@
+; RUN: llc -mtriple arm64_be < %s -aarch64-load-store-opt=false -o - | FileCheck %s
+; RUN: llc -mtriple arm64_be < %s -fast-isel=true -aarch64-load-store-opt=false -o - | FileCheck %s
+
+; CHECK-LABEL: test_i64_f64:
+define i64 @test_i64_f64(double %p) {
+; CHECK-NOT: rev
+    %1 = fadd double %p, %p
+    %2 = bitcast double %1 to i64
+    %3 = add i64 %2, %2
+    ret i64 %3
+}
+
+; CHECK-LABEL: test_i64_v1i64:
+define i64 @test_i64_v1i64(<1 x i64> %p) {
+; CHECK-NOT: rev
+    %1 = add <1 x i64> %p, %p
+    %2 = bitcast <1 x i64> %1 to i64
+    %3 = add i64 %2, %2
+    ret i64 %3
+}
+
+; CHECK-LABEL: test_i64_v2f32:
+define i64 @test_i64_v2f32(<2 x float> %p) {
+; CHECK: rev64 v{{[0-9]+}}.2s
+    %1 = fadd <2 x float> %p, %p
+    %2 = bitcast <2 x float> %1 to i64
+    %3 = add i64 %2, %2
+    ret i64 %3
+}
+
+; CHECK-LABEL: test_i64_v2i32:
+define i64 @test_i64_v2i32(<2 x i32> %p) {
+; CHECK: rev64 v{{[0-9]+}}.2s
+    %1 = add <2 x i32> %p, %p
+    %2 = bitcast <2 x i32> %1 to i64
+    %3 = add i64 %2, %2
+    ret i64 %3
+}
+
+; CHECK-LABEL: test_i64_v4i16:
+define i64 @test_i64_v4i16(<4 x i16> %p) {
+; CHECK: rev64 v{{[0-9]+}}.4h
+    %1 = add <4 x i16> %p, %p
+    %2 = bitcast <4 x i16> %1 to i64
+    %3 = add i64 %2, %2
+    ret i64 %3
+}
+
+; CHECK-LABEL: test_i64_v8i8:
+define i64 @test_i64_v8i8(<8 x i8> %p) {
+; CHECK: rev64 v{{[0-9]+}}.8b
+    %1 = add <8 x i8> %p, %p
+    %2 = bitcast <8 x i8> %1 to i64
+    %3 = add i64 %2, %2
+    ret i64 %3
+}
+
+; CHECK-LABEL: test_f64_i64:
+define double @test_f64_i64(i64 %p) {
+; CHECK-NOT: rev
+    %1 = add i64 %p, %p
+    %2 = bitcast i64 %1 to double
+    %3 = fadd double %2, %2
+    ret double %3
+}
+
+; CHECK-LABEL: test_f64_v1i64:
+define double @test_f64_v1i64(<1 x i64> %p) {
+; CHECK-NOT: rev
+    %1 = add <1 x i64> %p, %p
+    %2 = bitcast <1 x i64> %1 to double
+    %3 = fadd double %2, %2
+    ret double %3
+}
+
+; CHECK-LABEL: test_f64_v2f32:
+define double @test_f64_v2f32(<2 x float> %p) {
+; CHECK: rev64 v{{[0-9]+}}.2s
+    %1 = fadd <2 x float> %p, %p
+    %2 = bitcast <2 x float> %1 to double
+    %3 = fadd double %2, %2
+    ret double %3
+}
+
+; CHECK-LABEL: test_f64_v2i32:
+define double @test_f64_v2i32(<2 x i32> %p) {
+; CHECK: rev64 v{{[0-9]+}}.2s
+    %1 = add <2 x i32> %p, %p
+    %2 = bitcast <2 x i32> %1 to double
+    %3 = fadd double %2, %2
+    ret double %3
+}
+
+; CHECK-LABEL: test_f64_v4i16:
+define double @test_f64_v4i16(<4 x i16> %p) {
+; CHECK: rev64 v{{[0-9]+}}.4h
+    %1 = add <4 x i16> %p, %p
+    %2 = bitcast <4 x i16> %1 to double
+    %3 = fadd double %2, %2
+    ret double %3
+}
+
+; CHECK-LABEL: test_f64_v8i8:
+define double @test_f64_v8i8(<8 x i8> %p) {
+; CHECK: rev64 v{{[0-9]+}}.8b
+    %1 = add <8 x i8> %p, %p
+    %2 = bitcast <8 x i8> %1 to double
+    %3 = fadd double %2, %2
+    ret double %3
+}
+
+; CHECK-LABEL: test_v1i64_i64:
+define <1 x i64> @test_v1i64_i64(i64 %p) {
+; CHECK-NOT: rev
+    %1 = add i64 %p, %p
+    %2 = bitcast i64 %1 to <1 x i64>
+    %3 = add <1 x i64> %2, %2
+    ret <1 x i64> %3
+}
+
+; CHECK-LABEL: test_v1i64_f64:
+define <1 x i64> @test_v1i64_f64(double %p) {
+; CHECK-NOT: rev
+    %1 = fadd double %p, %p
+    %2 = bitcast double %1 to <1 x i64>
+    %3 = add <1 x i64> %2, %2
+    ret <1 x i64> %3
+}
+
+; CHECK-LABEL: test_v1i64_v2f32:
+define <1 x i64> @test_v1i64_v2f32(<2 x float> %p) {
+; CHECK: rev64 v{{[0-9]+}}.2s
+    %1 = fadd <2 x float> %p, %p
+    %2 = bitcast <2 x float> %1 to <1 x i64>
+    %3 = add <1 x i64> %2, %2
+    ret <1 x i64> %3
+}
+
+; CHECK-LABEL: test_v1i64_v2i32:
+define <1 x i64> @test_v1i64_v2i32(<2 x i32> %p) {
+; CHECK: rev64 v{{[0-9]+}}.2s
+    %1 = add <2 x i32> %p, %p
+    %2 = bitcast <2 x i32> %1 to <1 x i64>
+    %3 = add <1 x i64> %2, %2
+    ret <1 x i64> %3
+}
+
+; CHECK-LABEL: test_v1i64_v4i16:
+define <1 x i64> @test_v1i64_v4i16(<4 x i16> %p) {
+; CHECK: rev64 v{{[0-9]+}}.4h
+    %1 = add <4 x i16> %p, %p
+    %2 = bitcast <4 x i16> %1 to <1 x i64>
+    %3 = add <1 x i64> %2, %2
+    ret <1 x i64> %3
+}
+
+; CHECK-LABEL: test_v1i64_v8i8:
+define <1 x i64> @test_v1i64_v8i8(<8 x i8> %p) {
+; CHECK: rev64 v{{[0-9]+}}.8b
+    %1 = add <8 x i8> %p, %p
+    %2 = bitcast <8 x i8> %1 to <1 x i64>
+    %3 = add <1 x i64> %2, %2
+    ret <1 x i64> %3
+}
+
+; CHECK-LABEL: test_v2f32_i64:
+define <2 x float> @test_v2f32_i64(i64 %p) {
+; CHECK: rev64 v{{[0-9]+}}.2s
+    %1 = add i64 %p, %p
+    %2 = bitcast i64 %1 to <2 x float>
+    %3 = fadd <2 x float> %2, %2
+    ret <2 x float> %3
+}
+
+; CHECK-LABEL: test_v2f32_f64:
+define <2 x float> @test_v2f32_f64(double %p) {
+; CHECK: rev64 v{{[0-9]+}}.2s
+    %1 = fadd double %p, %p
+    %2 = bitcast double %1 to <2 x float>
+    %3 = fadd <2 x float> %2, %2
+    ret <2 x float> %3
+}
+
+; CHECK-LABEL: test_v2f32_v1i64:
+define <2 x float> @test_v2f32_v1i64(<1 x i64> %p) {
+; CHECK: rev64 v{{[0-9]+}}.2s
+    %1 = add <1 x i64> %p, %p
+    %2 = bitcast <1 x i64> %1 to <2 x float>
+    %3 = fadd <2 x float> %2, %2
+    ret <2 x float> %3
+}
+
+; CHECK-LABEL: test_v2f32_v2i32:
+define <2 x float> @test_v2f32_v2i32(<2 x i32> %p) {
+; CHECK: rev64 v{{[0-9]+}}.2s
+; CHECK: rev64 v{{[0-9]+}}.2s
+    %1 = add <2 x i32> %p, %p
+    %2 = bitcast <2 x i32> %1 to <2 x float>
+    %3 = fadd <2 x float> %2, %2
+    ret <2 x float> %3
+}
+
+; CHECK-LABEL: test_v2f32_v4i16:
+define <2 x float> @test_v2f32_v4i16(<4 x i16> %p) {
+; CHECK: rev64 v{{[0-9]+}}.4h
+; CHECK: rev64 v{{[0-9]+}}.2s
+    %1 = add <4 x i16> %p, %p
+    %2 = bitcast <4 x i16> %1 to <2 x float>
+    %3 = fadd <2 x float> %2, %2
+    ret <2 x float> %3
+}
+
+; CHECK-LABEL: test_v2f32_v8i8:
+define <2 x float> @test_v2f32_v8i8(<8 x i8> %p) {
+; CHECK: rev64 v{{[0-9]+}}.8b
+; CHECK: rev64 v{{[0-9]+}}.2s
+    %1 = add <8 x i8> %p, %p
+    %2 = bitcast <8 x i8> %1 to <2 x float>
+    %3 = fadd <2 x float> %2, %2
+    ret <2 x float> %3
+}
+
+; CHECK-LABEL: test_v2i32_i64:
+define <2 x i32> @test_v2i32_i64(i64 %p) {
+; CHECK: rev64 v{{[0-9]+}}.2s
+    %1 = add i64 %p, %p
+    %2 = bitcast i64 %1 to <2 x i32>
+    %3 = add <2 x i32> %2, %2
+    ret <2 x i32> %3
+}
+
+; CHECK-LABEL: test_v2i32_f64:
+define <2 x i32> @test_v2i32_f64(double %p) {
+; CHECK: rev64 v{{[0-9]+}}.2s
+    %1 = fadd double %p, %p
+    %2 = bitcast double %1 to <2 x i32>
+    %3 = add <2 x i32> %2, %2
+    ret <2 x i32> %3
+}
+
+; CHECK-LABEL: test_v2i32_v1i64:
+define <2 x i32> @test_v2i32_v1i64(<1 x i64> %p) {
+; CHECK: rev64 v{{[0-9]+}}.2s
+    %1 = add <1 x i64> %p, %p
+    %2 = bitcast <1 x i64> %1 to <2 x i32>
+    %3 = add <2 x i32> %2, %2
+    ret <2 x i32> %3
+}
+
+; CHECK-LABEL: test_v2i32_v2f32:
+define <2 x i32> @test_v2i32_v2f32(<2 x float> %p) {
+; CHECK: rev64 v{{[0-9]+}}.2s
+; CHECK: rev64 v{{[0-9]+}}.2s
+    %1 = fadd <2 x float> %p, %p
+    %2 = bitcast <2 x float> %1 to <2 x i32>
+    %3 = add <2 x i32> %2, %2
+    ret <2 x i32> %3
+}
+
+; CHECK-LABEL: test_v2i32_v4i16:
+define <2 x i32> @test_v2i32_v4i16(<4 x i16> %p) {
+; CHECK: rev64 v{{[0-9]+}}.4h
+; CHECK: rev64 v{{[0-9]+}}.2s
+    %1 = add <4 x i16> %p, %p
+    %2 = bitcast <4 x i16> %1 to <2 x i32>
+    %3 = add <2 x i32> %2, %2
+    ret <2 x i32> %3
+}
+
+; CHECK-LABEL: test_v2i32_v8i8:
+define <2 x i32> @test_v2i32_v8i8(<8 x i8> %p) {
+; CHECK: rev64 v{{[0-9]+}}.8b
+; CHECK: rev64 v{{[0-9]+}}.2s
+    %1 = add <8 x i8> %p, %p
+    %2 = bitcast <8 x i8> %1 to <2 x i32>
+    %3 = add <2 x i32> %2, %2
+    ret <2 x i32> %3
+}
+
+; CHECK-LABEL: test_v4i16_i64:
+define <4 x i16> @test_v4i16_i64(i64 %p) {
+; CHECK: rev64 v{{[0-9]+}}.4h
+    %1 = add i64 %p, %p
+    %2 = bitcast i64 %1 to <4 x i16>
+    %3 = add <4 x i16> %2, %2
+    ret <4 x i16> %3
+}
+
+; CHECK-LABEL: test_v4i16_f64:
+define <4 x i16> @test_v4i16_f64(double %p) {
+; CHECK: rev64 v{{[0-9]+}}.4h
+    %1 = fadd double %p, %p
+    %2 = bitcast double %1 to <4 x i16>
+    %3 = add <4 x i16> %2, %2
+    ret <4 x i16> %3
+}
+
+; CHECK-LABEL: test_v4i16_v1i64:
+define <4 x i16> @test_v4i16_v1i64(<1 x i64> %p) {
+; CHECK: rev64 v{{[0-9]+}}.4h
+    %1 = add <1 x i64> %p, %p
+    %2 = bitcast <1 x i64> %1 to <4 x i16>
+    %3 = add <4 x i16> %2, %2
+    ret <4 x i16> %3
+}
+
+; CHECK-LABEL: test_v4i16_v2f32:
+define <4 x i16> @test_v4i16_v2f32(<2 x float> %p) {
+; CHECK: rev64 v{{[0-9]+}}.2s
+; CHECK: rev64 v{{[0-9]+}}.4h
+    %1 = fadd <2 x float> %p, %p
+    %2 = bitcast <2 x float> %1 to <4 x i16>
+    %3 = add <4 x i16> %2, %2
+    ret <4 x i16> %3
+}
+
+; CHECK-LABEL: test_v4i16_v2i32:
+define <4 x i16> @test_v4i16_v2i32(<2 x i32> %p) {
+; CHECK: rev64 v{{[0-9]+}}.2s
+; CHECK: rev64 v{{[0-9]+}}.4h
+    %1 = add <2 x i32> %p, %p
+    %2 = bitcast <2 x i32> %1 to <4 x i16>
+    %3 = add <4 x i16> %2, %2
+    ret <4 x i16> %3
+}
+
+; CHECK-LABEL: test_v4i16_v8i8:
+define <4 x i16> @test_v4i16_v8i8(<8 x i8> %p) {
+; CHECK: rev64 v{{[0-9]+}}.8b
+; CHECK: rev64 v{{[0-9]+}}.4h
+    %1 = add <8 x i8> %p, %p
+    %2 = bitcast <8 x i8> %1 to <4 x i16>
+    %3 = add <4 x i16> %2, %2
+    ret <4 x i16> %3
+}
+
+; CHECK-LABEL: test_v8i8_i64:
+define <8 x i8> @test_v8i8_i64(i64 %p) {
+; CHECK: rev64 v{{[0-9]+}}.8b
+    %1 = add i64 %p, %p
+    %2 = bitcast i64 %1 to <8 x i8>
+    %3 = add <8 x i8> %2, %2
+    ret <8 x i8> %3
+}
+
+; CHECK-LABEL: test_v8i8_f64:
+define <8 x i8> @test_v8i8_f64(double %p) {
+; CHECK: rev64 v{{[0-9]+}}.8b
+    %1 = fadd double %p, %p
+    %2 = bitcast double %1 to <8 x i8>
+    %3 = add <8 x i8> %2, %2
+    ret <8 x i8> %3
+}
+
+; CHECK-LABEL: test_v8i8_v1i64:
+define <8 x i8> @test_v8i8_v1i64(<1 x i64> %p) {
+; CHECK: rev64 v{{[0-9]+}}.8b
+    %1 = add <1 x i64> %p, %p
+    %2 = bitcast <1 x i64> %1 to <8 x i8>
+    %3 = add <8 x i8> %2, %2
+    ret <8 x i8> %3
+}
+
+; CHECK-LABEL: test_v8i8_v2f32:
+define <8 x i8> @test_v8i8_v2f32(<2 x float> %p) {
+; CHECK: rev64 v{{[0-9]+}}.2s
+; CHECK: rev64 v{{[0-9]+}}.8b
+    %1 = fadd <2 x float> %p, %p
+    %2 = bitcast <2 x float> %1 to <8 x i8>
+    %3 = add <8 x i8> %2, %2
+    ret <8 x i8> %3
+}
+
+; CHECK-LABEL: test_v8i8_v2i32:
+define <8 x i8> @test_v8i8_v2i32(<2 x i32> %p) {
+; CHECK: rev64 v{{[0-9]+}}.2s
+; CHECK: rev64 v{{[0-9]+}}.8b
+    %1 = add <2 x i32> %p, %p
+    %2 = bitcast <2 x i32> %1 to <8 x i8>
+    %3 = add <8 x i8> %2, %2
+    ret <8 x i8> %3
+}
+
+; CHECK-LABEL: test_v8i8_v4i16:
+define <8 x i8> @test_v8i8_v4i16(<4 x i16> %p) {
+; CHECK: rev64 v{{[0-9]+}}.4h
+; CHECK: rev64 v{{[0-9]+}}.8b
+    %1 = add <4 x i16> %p, %p
+    %2 = bitcast <4 x i16> %1 to <8 x i8>
+    %3 = add <8 x i8> %2, %2
+    ret <8 x i8> %3
+}
+
+; CHECK-LABEL: test_f128_v2f64:
+define fp128 @test_f128_v2f64(<2 x double> %p) {
+; CHECK: ext
+    %1 = fadd <2 x double> %p, %p
+    %2 = bitcast <2 x double> %1 to fp128
+    %3 = fadd fp128 %2, %2
+    ret fp128 %3
+}
+
+; CHECK-LABEL: test_f128_v2i64:
+define fp128 @test_f128_v2i64(<2 x i64> %p) {
+; CHECK: ext
+    %1 = add <2 x i64> %p, %p
+    %2 = bitcast <2 x i64> %1 to fp128
+    %3 = fadd fp128 %2, %2
+    ret fp128 %3
+}
+
+; CHECK-LABEL: test_f128_v4f32:
+define fp128 @test_f128_v4f32(<4 x float> %p) {
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+    %1 = fadd <4 x float> %p, %p
+    %2 = bitcast <4 x float> %1 to fp128
+    %3 = fadd fp128 %2, %2
+    ret fp128 %3
+}
+
+; CHECK-LABEL: test_f128_v4i32:
+define fp128 @test_f128_v4i32(<4 x i32> %p) {
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+    %1 = add <4 x i32> %p, %p
+    %2 = bitcast <4 x i32> %1 to fp128
+    %3 = fadd fp128 %2, %2
+    ret fp128 %3
+}
+
+; CHECK-LABEL: test_f128_v8i16:
+define fp128 @test_f128_v8i16(<8 x i16> %p) {
+; CHECK: rev64 v{{[0-9]+}}.8h
+; CHECK: ext
+    %1 = add <8 x i16> %p, %p
+    %2 = bitcast <8 x i16> %1 to fp128
+    %3 = fadd fp128 %2, %2
+    ret fp128 %3
+}
+
+; CHECK-LABEL: test_f128_v16i8:
+define fp128 @test_f128_v16i8(<16 x i8> %p) {
+; CHECK: rev64 v{{[0-9]+}}.16b
+; CHECK: ext
+    %1 = add <16 x i8> %p, %p
+    %2 = bitcast <16 x i8> %1 to fp128
+    %3 = fadd fp128 %2, %2
+    ret fp128 %3
+}
+
+; CHECK-LABEL: test_v2f64_f128:
+define <2 x double> @test_v2f64_f128(fp128 %p) {
+; CHECK: ext
+    %1 = fadd fp128 %p, %p
+    %2 = bitcast fp128 %1 to <2 x double>
+    %3 = fadd <2 x double> %2, %2
+    ret <2 x double> %3
+}
+
+; CHECK-LABEL: test_v2f64_v2i64:
+define <2 x double> @test_v2f64_v2i64(<2 x i64> %p) {
+; CHECK: ext
+; CHECK: ext
+    %1 = add <2 x i64> %p, %p
+    %2 = bitcast <2 x i64> %1 to <2 x double>
+    %3 = fadd <2 x double> %2, %2
+    ret <2 x double> %3
+}
+
+; CHECK-LABEL: test_v2f64_v4f32:
+define <2 x double> @test_v2f64_v4f32(<4 x float> %p) {
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+; CHECK: ext
+    %1 = fadd <4 x float> %p, %p
+    %2 = bitcast <4 x float> %1 to <2 x double>
+    %3 = fadd <2 x double> %2, %2
+    ret <2 x double> %3
+}
+
+; CHECK-LABEL: test_v2f64_v4i32:
+define <2 x double> @test_v2f64_v4i32(<4 x i32> %p) {
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+; CHECK: ext
+    %1 = add <4 x i32> %p, %p
+    %2 = bitcast <4 x i32> %1 to <2 x double>
+    %3 = fadd <2 x double> %2, %2
+    ret <2 x double> %3
+}
+
+; CHECK-LABEL: test_v2f64_v8i16:
+define <2 x double> @test_v2f64_v8i16(<8 x i16> %p) {
+; CHECK: rev64 v{{[0-9]+}}.8h
+; CHECK: ext
+; CHECK: ext
+    %1 = add <8 x i16> %p, %p
+    %2 = bitcast <8 x i16> %1 to <2 x double>
+    %3 = fadd <2 x double> %2, %2
+    ret <2 x double> %3
+}
+
+; CHECK-LABEL: test_v2f64_v16i8:
+define <2 x double> @test_v2f64_v16i8(<16 x i8> %p) {
+; CHECK: rev64 v{{[0-9]+}}.16b
+; CHECK: ext
+; CHECK: ext
+    %1 = add <16 x i8> %p, %p
+    %2 = bitcast <16 x i8> %1 to <2 x double>
+    %3 = fadd <2 x double> %2, %2
+    ret <2 x double> %3
+}
+
+; CHECK-LABEL: test_v2i64_f128:
+define <2 x i64> @test_v2i64_f128(fp128 %p) {
+; CHECK: ext
+    %1 = fadd fp128 %p, %p
+    %2 = bitcast fp128 %1 to <2 x i64>
+    %3 = add <2 x i64> %2, %2
+    ret <2 x i64> %3
+}
+
+; CHECK-LABEL: test_v2i64_v2f64:
+define <2 x i64> @test_v2i64_v2f64(<2 x double> %p) {
+; CHECK: ext
+; CHECK: ext
+    %1 = fadd <2 x double> %p, %p
+    %2 = bitcast <2 x double> %1 to <2 x i64>
+    %3 = add <2 x i64> %2, %2
+    ret <2 x i64> %3
+}
+
+; CHECK-LABEL: test_v2i64_v4f32:
+define <2 x i64> @test_v2i64_v4f32(<4 x float> %p) {
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+; CHECK: ext
+    %1 = fadd <4 x float> %p, %p
+    %2 = bitcast <4 x float> %1 to <2 x i64>
+    %3 = add <2 x i64> %2, %2
+    ret <2 x i64> %3
+}
+
+; CHECK-LABEL: test_v2i64_v4i32:
+define <2 x i64> @test_v2i64_v4i32(<4 x i32> %p) {
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+; CHECK: ext
+    %1 = add <4 x i32> %p, %p
+    %2 = bitcast <4 x i32> %1 to <2 x i64>
+    %3 = add <2 x i64> %2, %2
+    ret <2 x i64> %3
+}
+
+; CHECK-LABEL: test_v2i64_v8i16:
+define <2 x i64> @test_v2i64_v8i16(<8 x i16> %p) {
+; CHECK: rev64 v{{[0-9]+}}.8h
+; CHECK: ext
+; CHECK: ext
+    %1 = add <8 x i16> %p, %p
+    %2 = bitcast <8 x i16> %1 to <2 x i64>
+    %3 = add <2 x i64> %2, %2
+    ret <2 x i64> %3
+}
+
+; CHECK-LABEL: test_v2i64_v16i8:
+define <2 x i64> @test_v2i64_v16i8(<16 x i8> %p) {
+; CHECK: rev64 v{{[0-9]+}}.16b
+; CHECK: ext
+; CHECK: ext
+    %1 = add <16 x i8> %p, %p
+    %2 = bitcast <16 x i8> %1 to <2 x i64>
+    %3 = add <2 x i64> %2, %2
+    ret <2 x i64> %3
+}
+
+; CHECK-LABEL: test_v4f32_f128:
+define <4 x float> @test_v4f32_f128(fp128 %p) {
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+    %1 = fadd fp128 %p, %p
+    %2 = bitcast fp128 %1 to <4 x float>
+    %3 = fadd <4 x float> %2, %2
+    ret <4 x float> %3
+}
+
+; CHECK-LABEL: test_v4f32_v2f64:
+define <4 x float> @test_v4f32_v2f64(<2 x double> %p) {
+; CHECK: ext
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+    %1 = fadd <2 x double> %p, %p
+    %2 = bitcast <2 x double> %1 to <4 x float>
+    %3 = fadd <4 x float> %2, %2
+    ret <4 x float> %3
+}
+
+; CHECK-LABEL: test_v4f32_v2i64:
+define <4 x float> @test_v4f32_v2i64(<2 x i64> %p) {
+; CHECK: ext
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+    %1 = add <2 x i64> %p, %p
+    %2 = bitcast <2 x i64> %1 to <4 x float>
+    %3 = fadd <4 x float> %2, %2
+    ret <4 x float> %3
+}
+
+; CHECK-LABEL: test_v4f32_v4i32:
+define <4 x float> @test_v4f32_v4i32(<4 x i32> %p) {
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+    %1 = add <4 x i32> %p, %p
+    %2 = bitcast <4 x i32> %1 to <4 x float>
+    %3 = fadd <4 x float> %2, %2
+    ret <4 x float> %3
+}
+
+; CHECK-LABEL: test_v4f32_v8i16:
+define <4 x float> @test_v4f32_v8i16(<8 x i16> %p) {
+; CHECK: rev64 v{{[0-9]+}}.8h
+; CHECK: ext
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+    %1 = add <8 x i16> %p, %p
+    %2 = bitcast <8 x i16> %1 to <4 x float>
+    %3 = fadd <4 x float> %2, %2
+    ret <4 x float> %3
+}
+
+; CHECK-LABEL: test_v4f32_v16i8:
+define <4 x float> @test_v4f32_v16i8(<16 x i8> %p) {
+; CHECK: rev64 v{{[0-9]+}}.16b
+; CHECK: ext
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+    %1 = add <16 x i8> %p, %p
+    %2 = bitcast <16 x i8> %1 to <4 x float>
+    %3 = fadd <4 x float> %2, %2
+    ret <4 x float> %3
+}
+
+; CHECK-LABEL: test_v4i32_f128:
+define <4 x i32> @test_v4i32_f128(fp128 %p) {
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+    %1 = fadd fp128 %p, %p
+    %2 = bitcast fp128 %1 to <4 x i32>
+    %3 = add <4 x i32> %2, %2
+    ret <4 x i32> %3
+}
+
+; CHECK-LABEL: test_v4i32_v2f64:
+define <4 x i32> @test_v4i32_v2f64(<2 x double> %p) {
+; CHECK: ext
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+    %1 = fadd <2 x double> %p, %p
+    %2 = bitcast <2 x double> %1 to <4 x i32>
+    %3 = add <4 x i32> %2, %2
+    ret <4 x i32> %3
+}
+
+; CHECK-LABEL: test_v4i32_v2i64:
+define <4 x i32> @test_v4i32_v2i64(<2 x i64> %p) {
+; CHECK: ext
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+    %1 = add <2 x i64> %p, %p
+    %2 = bitcast <2 x i64> %1 to <4 x i32>
+    %3 = add <4 x i32> %2, %2
+    ret <4 x i32> %3
+}
+
+; CHECK-LABEL: test_v4i32_v4f32:
+define <4 x i32> @test_v4i32_v4f32(<4 x float> %p) {
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+    %1 = fadd <4 x float> %p, %p
+    %2 = bitcast <4 x float> %1 to <4 x i32>
+    %3 = add <4 x i32> %2, %2
+    ret <4 x i32> %3
+}
+
+; CHECK-LABEL: test_v4i32_v8i16:
+define <4 x i32> @test_v4i32_v8i16(<8 x i16> %p) {
+; CHECK: rev64 v{{[0-9]+}}.8h
+; CHECK: ext
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+    %1 = add <8 x i16> %p, %p
+    %2 = bitcast <8 x i16> %1 to <4 x i32>
+    %3 = add <4 x i32> %2, %2
+    ret <4 x i32> %3
+}
+
+; CHECK-LABEL: test_v4i32_v16i8:
+define <4 x i32> @test_v4i32_v16i8(<16 x i8> %p) {
+; CHECK: rev64 v{{[0-9]+}}.16b
+; CHECK: ext
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+    %1 = add <16 x i8> %p, %p
+    %2 = bitcast <16 x i8> %1 to <4 x i32>
+    %3 = add <4 x i32> %2, %2
+    ret <4 x i32> %3
+}
+
+; CHECK-LABEL: test_v8i16_f128:
+define <8 x i16> @test_v8i16_f128(fp128 %p) {
+; CHECK: rev64 v{{[0-9]+}}.8h
+; CHECK: ext
+    %1 = fadd fp128 %p, %p
+    %2 = bitcast fp128 %1 to <8 x i16>
+    %3 = add <8 x i16> %2, %2
+    ret <8 x i16> %3
+}
+
+; CHECK-LABEL: test_v8i16_v2f64:
+define <8 x i16> @test_v8i16_v2f64(<2 x double> %p) {
+; CHECK: ext
+; CHECK: rev64 v{{[0-9]+}}.8h
+; CHECK: ext
+    %1 = fadd <2 x double> %p, %p
+    %2 = bitcast <2 x double> %1 to <8 x i16>
+    %3 = add <8 x i16> %2, %2
+    ret <8 x i16> %3
+}
+
+; CHECK-LABEL: test_v8i16_v2i64:
+define <8 x i16> @test_v8i16_v2i64(<2 x i64> %p) {
+; CHECK: ext
+; CHECK: rev64 v{{[0-9]+}}.8h
+; CHECK: ext
+    %1 = add <2 x i64> %p, %p
+    %2 = bitcast <2 x i64> %1 to <8 x i16>
+    %3 = add <8 x i16> %2, %2
+    ret <8 x i16> %3
+}
+
+; CHECK-LABEL: test_v8i16_v4f32:
+define <8 x i16> @test_v8i16_v4f32(<4 x float> %p) {
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+; CHECK: rev64 v{{[0-9]+}}.8h
+; CHECK: ext
+    %1 = fadd <4 x float> %p, %p
+    %2 = bitcast <4 x float> %1 to <8 x i16>
+    %3 = add <8 x i16> %2, %2
+    ret <8 x i16> %3
+}
+
+; CHECK-LABEL: test_v8i16_v4i32:
+define <8 x i16> @test_v8i16_v4i32(<4 x i32> %p) {
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+; CHECK: rev64 v{{[0-9]+}}.8h
+; CHECK: ext
+    %1 = add <4 x i32> %p, %p
+    %2 = bitcast <4 x i32> %1 to <8 x i16>
+    %3 = add <8 x i16> %2, %2
+    ret <8 x i16> %3
+}
+
+; CHECK-LABEL: test_v8i16_v16i8:
+define <8 x i16> @test_v8i16_v16i8(<16 x i8> %p) {
+; CHECK: rev64 v{{[0-9]+}}.16b
+; CHECK: ext
+; CHECK: rev64 v{{[0-9]+}}.8h
+; CHECK: ext
+    %1 = add <16 x i8> %p, %p
+    %2 = bitcast <16 x i8> %1 to <8 x i16>
+    %3 = add <8 x i16> %2, %2
+    ret <8 x i16> %3
+}
+
+; CHECK-LABEL: test_v16i8_f128:
+define <16 x i8> @test_v16i8_f128(fp128 %p) {
+; CHECK: rev64 v{{[0-9]+}}.16b
+; CHECK: ext
+    %1 = fadd fp128 %p, %p
+    %2 = bitcast fp128 %1 to <16 x i8>
+    %3 = add <16 x i8> %2, %2
+    ret <16 x i8> %3
+}
+
+; CHECK-LABEL: test_v16i8_v2f64:
+define <16 x i8> @test_v16i8_v2f64(<2 x double> %p) {
+; CHECK: ext
+; CHECK: rev64 v{{[0-9]+}}.16b
+; CHECK: ext
+    %1 = fadd <2 x double> %p, %p
+    %2 = bitcast <2 x double> %1 to <16 x i8>
+    %3 = add <16 x i8> %2, %2
+    ret <16 x i8> %3
+}
+
+; CHECK-LABEL: test_v16i8_v2i64:
+define <16 x i8> @test_v16i8_v2i64(<2 x i64> %p) {
+; CHECK: ext
+; CHECK: rev64 v{{[0-9]+}}.16b
+; CHECK: ext
+    %1 = add <2 x i64> %p, %p
+    %2 = bitcast <2 x i64> %1 to <16 x i8>
+    %3 = add <16 x i8> %2, %2
+    ret <16 x i8> %3
+}
+
+; CHECK-LABEL: test_v16i8_v4f32:
+define <16 x i8> @test_v16i8_v4f32(<4 x float> %p) {
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+; CHECK: rev64 v{{[0-9]+}}.16b
+; CHECK: ext
+    %1 = fadd <4 x float> %p, %p
+    %2 = bitcast <4 x float> %1 to <16 x i8>
+    %3 = add <16 x i8> %2, %2
+    ret <16 x i8> %3
+}
+
+; CHECK-LABEL: test_v16i8_v4i32:
+define <16 x i8> @test_v16i8_v4i32(<4 x i32> %p) {
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+; CHECK: rev64 v{{[0-9]+}}.16b
+; CHECK: ext
+    %1 = add <4 x i32> %p, %p
+    %2 = bitcast <4 x i32> %1 to <16 x i8>
+    %3 = add <16 x i8> %2, %2
+    ret <16 x i8> %3
+}
+
+; CHECK-LABEL: test_v16i8_v8i16:
+define <16 x i8> @test_v16i8_v8i16(<8 x i16> %p) {
+; CHECK: rev64 v{{[0-9]+}}.8h
+; CHECK: ext
+; CHECK: rev64 v{{[0-9]+}}.16b
+; CHECK: ext
+    %1 = add <8 x i16> %p, %p
+    %2 = bitcast <8 x i16> %1 to <16 x i8>
+    %3 = add <16 x i8> %2, %2
+    ret <16 x i8> %3
+}
diff --git a/test/CodeGen/AArch64/arm64-big-endian-vector-caller.ll b/test/CodeGen/AArch64/arm64-big-endian-vector-caller.ll
new file mode 100644
index 0000000..9a12b7a
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-big-endian-vector-caller.ll
@@ -0,0 +1,1100 @@
+; RUN: llc -mtriple arm64_be < %s -aarch64-load-store-opt=false -o - | FileCheck %s
+; RUN: llc -mtriple arm64_be < %s -aarch64-load-store-opt=false -fast-isel=true -O0 -o - | FileCheck %s
+
+; CHECK-LABEL: test_i64_f64:
+declare i64 @test_i64_f64_helper(double %p)
+define void @test_i64_f64(double* %p, i64* %q) {
+; CHECK-NOT: rev
+    %1 = load double* %p
+    %2 = fadd double %1, %1
+    %3 = call i64 @test_i64_f64_helper(double %2)
+    %4 = add i64 %3, %3
+    store i64 %4, i64* %q
+    ret void
+}
+
+; CHECK-LABEL: test_i64_v1i64:
+declare i64 @test_i64_v1i64_helper(<1 x i64> %p)
+define void @test_i64_v1i64(<1 x i64>* %p, i64* %q) {
+; CHECK-NOT: rev
+    %1 = load <1 x i64>* %p
+    %2 = add <1 x i64> %1, %1
+    %3 = call i64 @test_i64_v1i64_helper(<1 x i64> %2)
+    %4 = add i64 %3, %3
+    store i64 %4, i64* %q
+    ret void
+}
+
+; CHECK-LABEL: test_i64_v2f32:
+declare i64 @test_i64_v2f32_helper(<2 x float> %p)
+define void @test_i64_v2f32(<2 x float>* %p, i64* %q) {
+; CHECK: rev64 v{{[0-9]+}}.2s
+    %1 = load <2 x float>* %p
+    %2 = fadd <2 x float> %1, %1
+    %3 = call i64 @test_i64_v2f32_helper(<2 x float> %2)
+    %4 = add i64 %3, %3
+    store i64 %4, i64* %q
+    ret void
+}
+
+; CHECK-LABEL: test_i64_v2i32:
+declare i64 @test_i64_v2i32_helper(<2 x i32> %p)
+define void @test_i64_v2i32(<2 x i32>* %p, i64* %q) {
+; CHECK: rev64 v{{[0-9]+}}.2s
+    %1 = load <2 x i32>* %p
+    %2 = add <2 x i32> %1, %1
+    %3 = call i64 @test_i64_v2i32_helper(<2 x i32> %2)
+    %4 = add i64 %3, %3
+    store i64 %4, i64* %q
+    ret void
+}
+
+; CHECK-LABEL: test_i64_v4i16:
+declare i64 @test_i64_v4i16_helper(<4 x i16> %p)
+define void @test_i64_v4i16(<4 x i16>* %p, i64* %q) {
+; CHECK: rev64 v{{[0-9]+}}.4h
+    %1 = load <4 x i16>* %p
+    %2 = add <4 x i16> %1, %1
+    %3 = call i64 @test_i64_v4i16_helper(<4 x i16> %2)
+    %4 = add i64 %3, %3
+    store i64 %4, i64* %q
+    ret void
+}
+
+; CHECK-LABEL: test_i64_v8i8:
+declare i64 @test_i64_v8i8_helper(<8 x i8> %p)
+define void @test_i64_v8i8(<8 x i8>* %p, i64* %q) {
+; CHECK: rev64 v{{[0-9]+}}.8b
+    %1 = load <8 x i8>* %p
+    %2 = add <8 x i8> %1, %1
+    %3 = call i64 @test_i64_v8i8_helper(<8 x i8> %2)
+    %4 = add i64 %3, %3
+    store i64 %4, i64* %q
+    ret void
+}
+
+; CHECK-LABEL: test_f64_i64:
+declare double @test_f64_i64_helper(i64 %p)
+define void @test_f64_i64(i64* %p, double* %q) {
+; CHECK-NOT: rev
+    %1 = load i64* %p
+    %2 = add i64 %1, %1
+    %3 = call double @test_f64_i64_helper(i64 %2)
+    %4 = fadd double %3, %3
+    store double %4, double* %q
+    ret void
+}
+
+; CHECK-LABEL: test_f64_v1i64:
+declare double @test_f64_v1i64_helper(<1 x i64> %p)
+define void @test_f64_v1i64(<1 x i64>* %p, double* %q) {
+; CHECK-NOT: rev
+    %1 = load <1 x i64>* %p
+    %2 = add <1 x i64> %1, %1
+    %3 = call double @test_f64_v1i64_helper(<1 x i64> %2)
+    %4 = fadd double %3, %3
+    store double %4, double* %q
+    ret void
+}
+
+; CHECK-LABEL: test_f64_v2f32:
+declare double @test_f64_v2f32_helper(<2 x float> %p)
+define void @test_f64_v2f32(<2 x float>* %p, double* %q) {
+; CHECK: rev64 v{{[0-9]+}}.2s
+    %1 = load <2 x float>* %p
+    %2 = fadd <2 x float> %1, %1
+    %3 = call double @test_f64_v2f32_helper(<2 x float> %2)
+    %4 = fadd double %3, %3
+    store double %4, double* %q
+    ret void
+}
+
+; CHECK-LABEL: test_f64_v2i32:
+declare double @test_f64_v2i32_helper(<2 x i32> %p)
+define void @test_f64_v2i32(<2 x i32>* %p, double* %q) {
+; CHECK: rev64 v{{[0-9]+}}.2s
+    %1 = load <2 x i32>* %p
+    %2 = add <2 x i32> %1, %1
+    %3 = call double @test_f64_v2i32_helper(<2 x i32> %2)
+    %4 = fadd double %3, %3
+    store double %4, double* %q
+    ret void
+}
+
+; CHECK-LABEL: test_f64_v4i16:
+declare double @test_f64_v4i16_helper(<4 x i16> %p)
+define void @test_f64_v4i16(<4 x i16>* %p, double* %q) {
+; CHECK: rev64 v{{[0-9]+}}.4h
+    %1 = load <4 x i16>* %p
+    %2 = add <4 x i16> %1, %1
+    %3 = call double @test_f64_v4i16_helper(<4 x i16> %2)
+    %4 = fadd double %3, %3
+    store double %4, double* %q
+    ret void
+}
+
+; CHECK-LABEL: test_f64_v8i8:
+declare double @test_f64_v8i8_helper(<8 x i8> %p)
+define void @test_f64_v8i8(<8 x i8>* %p, double* %q) {
+; CHECK: rev64 v{{[0-9]+}}.8b
+    %1 = load <8 x i8>* %p
+    %2 = add <8 x i8> %1, %1
+    %3 = call double @test_f64_v8i8_helper(<8 x i8> %2)
+    %4 = fadd double %3, %3
+    store double %4, double* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v1i64_i64:
+declare <1 x i64> @test_v1i64_i64_helper(i64 %p)
+define void @test_v1i64_i64(i64* %p, <1 x i64>* %q) {
+; CHECK-NOT: rev
+    %1 = load i64* %p
+    %2 = add i64 %1, %1
+    %3 = call <1 x i64> @test_v1i64_i64_helper(i64 %2)
+    %4 = add <1 x i64> %3, %3
+    store <1 x i64> %4, <1 x i64>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v1i64_f64:
+declare <1 x i64> @test_v1i64_f64_helper(double %p)
+define void @test_v1i64_f64(double* %p, <1 x i64>* %q) {
+; CHECK-NOT: rev
+    %1 = load double* %p
+    %2 = fadd double %1, %1
+    %3 = call <1 x i64> @test_v1i64_f64_helper(double %2)
+    %4 = add <1 x i64> %3, %3
+    store <1 x i64> %4, <1 x i64>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v1i64_v2f32:
+declare <1 x i64> @test_v1i64_v2f32_helper(<2 x float> %p)
+define void @test_v1i64_v2f32(<2 x float>* %p, <1 x i64>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.2s
+    %1 = load <2 x float>* %p
+    %2 = fadd <2 x float> %1, %1
+    %3 = call <1 x i64> @test_v1i64_v2f32_helper(<2 x float> %2)
+    %4 = add <1 x i64> %3, %3
+    store <1 x i64> %4, <1 x i64>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v1i64_v2i32:
+declare <1 x i64> @test_v1i64_v2i32_helper(<2 x i32> %p)
+define void @test_v1i64_v2i32(<2 x i32>* %p, <1 x i64>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.2s
+    %1 = load <2 x i32>* %p
+    %2 = add <2 x i32> %1, %1
+    %3 = call <1 x i64> @test_v1i64_v2i32_helper(<2 x i32> %2)
+    %4 = add <1 x i64> %3, %3
+    store <1 x i64> %4, <1 x i64>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v1i64_v4i16:
+declare <1 x i64> @test_v1i64_v4i16_helper(<4 x i16> %p)
+define void @test_v1i64_v4i16(<4 x i16>* %p, <1 x i64>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.4h
+    %1 = load <4 x i16>* %p
+    %2 = add <4 x i16> %1, %1
+    %3 = call <1 x i64> @test_v1i64_v4i16_helper(<4 x i16> %2)
+    %4 = add <1 x i64> %3, %3
+    store <1 x i64> %4, <1 x i64>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v1i64_v8i8:
+declare <1 x i64> @test_v1i64_v8i8_helper(<8 x i8> %p)
+define void @test_v1i64_v8i8(<8 x i8>* %p, <1 x i64>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.8b
+    %1 = load <8 x i8>* %p
+    %2 = add <8 x i8> %1, %1
+    %3 = call <1 x i64> @test_v1i64_v8i8_helper(<8 x i8> %2)
+    %4 = add <1 x i64> %3, %3
+    store <1 x i64> %4, <1 x i64>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v2f32_i64:
+declare <2 x float> @test_v2f32_i64_helper(i64 %p)
+define void @test_v2f32_i64(i64* %p, <2 x float>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.2s
+    %1 = load i64* %p
+    %2 = add i64 %1, %1
+    %3 = call <2 x float> @test_v2f32_i64_helper(i64 %2)
+    %4 = fadd <2 x float> %3, %3
+    store <2 x float> %4, <2 x float>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v2f32_f64:
+declare <2 x float> @test_v2f32_f64_helper(double %p)
+define void @test_v2f32_f64(double* %p, <2 x float>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.2s
+    %1 = load double* %p
+    %2 = fadd double %1, %1
+    %3 = call <2 x float> @test_v2f32_f64_helper(double %2)
+    %4 = fadd <2 x float> %3, %3
+    store <2 x float> %4, <2 x float>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v2f32_v1i64:
+declare <2 x float> @test_v2f32_v1i64_helper(<1 x i64> %p)
+define void @test_v2f32_v1i64(<1 x i64>* %p, <2 x float>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.2s
+    %1 = load <1 x i64>* %p
+    %2 = add <1 x i64> %1, %1
+    %3 = call <2 x float> @test_v2f32_v1i64_helper(<1 x i64> %2)
+    %4 = fadd <2 x float> %3, %3
+    store <2 x float> %4, <2 x float>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v2f32_v2i32:
+declare <2 x float> @test_v2f32_v2i32_helper(<2 x i32> %p)
+define void @test_v2f32_v2i32(<2 x i32>* %p, <2 x float>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.2s
+; CHECK: rev64 v{{[0-9]+}}.2s
+    %1 = load <2 x i32>* %p
+    %2 = add <2 x i32> %1, %1
+    %3 = call <2 x float> @test_v2f32_v2i32_helper(<2 x i32> %2)
+    %4 = fadd <2 x float> %3, %3
+    store <2 x float> %4, <2 x float>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v2f32_v4i16:
+declare <2 x float> @test_v2f32_v4i16_helper(<4 x i16> %p)
+define void @test_v2f32_v4i16(<4 x i16>* %p, <2 x float>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.4h
+; CHECK: rev64 v{{[0-9]+}}.2s
+    %1 = load <4 x i16>* %p
+    %2 = add <4 x i16> %1, %1
+    %3 = call <2 x float> @test_v2f32_v4i16_helper(<4 x i16> %2)
+    %4 = fadd <2 x float> %3, %3
+    store <2 x float> %4, <2 x float>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v2f32_v8i8:
+declare <2 x float> @test_v2f32_v8i8_helper(<8 x i8> %p)
+define void @test_v2f32_v8i8(<8 x i8>* %p, <2 x float>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.8b
+; CHECK: rev64 v{{[0-9]+}}.2s
+    %1 = load <8 x i8>* %p
+    %2 = add <8 x i8> %1, %1
+    %3 = call <2 x float> @test_v2f32_v8i8_helper(<8 x i8> %2)
+    %4 = fadd <2 x float> %3, %3
+    store <2 x float> %4, <2 x float>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v2i32_i64:
+declare <2 x i32> @test_v2i32_i64_helper(i64 %p)
+define void @test_v2i32_i64(i64* %p, <2 x i32>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.2s
+    %1 = load i64* %p
+    %2 = add i64 %1, %1
+    %3 = call <2 x i32> @test_v2i32_i64_helper(i64 %2)
+    %4 = add <2 x i32> %3, %3
+    store <2 x i32> %4, <2 x i32>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v2i32_f64:
+declare <2 x i32> @test_v2i32_f64_helper(double %p)
+define void @test_v2i32_f64(double* %p, <2 x i32>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.2s
+    %1 = load double* %p
+    %2 = fadd double %1, %1
+    %3 = call <2 x i32> @test_v2i32_f64_helper(double %2)
+    %4 = add <2 x i32> %3, %3
+    store <2 x i32> %4, <2 x i32>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v2i32_v1i64:
+declare <2 x i32> @test_v2i32_v1i64_helper(<1 x i64> %p)
+define void @test_v2i32_v1i64(<1 x i64>* %p, <2 x i32>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.2s
+    %1 = load <1 x i64>* %p
+    %2 = add <1 x i64> %1, %1
+    %3 = call <2 x i32> @test_v2i32_v1i64_helper(<1 x i64> %2)
+    %4 = add <2 x i32> %3, %3
+    store <2 x i32> %4, <2 x i32>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v2i32_v2f32:
+declare <2 x i32> @test_v2i32_v2f32_helper(<2 x float> %p)
+define void @test_v2i32_v2f32(<2 x float>* %p, <2 x i32>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.2s
+; CHECK: rev64 v{{[0-9]+}}.2s
+    %1 = load <2 x float>* %p
+    %2 = fadd <2 x float> %1, %1
+    %3 = call <2 x i32> @test_v2i32_v2f32_helper(<2 x float> %2)
+    %4 = add <2 x i32> %3, %3
+    store <2 x i32> %4, <2 x i32>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v2i32_v4i16:
+declare <2 x i32> @test_v2i32_v4i16_helper(<4 x i16> %p)
+define void @test_v2i32_v4i16(<4 x i16>* %p, <2 x i32>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.4h
+; CHECK: rev64 v{{[0-9]+}}.2s
+    %1 = load <4 x i16>* %p
+    %2 = add <4 x i16> %1, %1
+    %3 = call <2 x i32> @test_v2i32_v4i16_helper(<4 x i16> %2)
+    %4 = add <2 x i32> %3, %3
+    store <2 x i32> %4, <2 x i32>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v2i32_v8i8:
+declare <2 x i32> @test_v2i32_v8i8_helper(<8 x i8> %p)
+define void @test_v2i32_v8i8(<8 x i8>* %p, <2 x i32>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.8b
+; CHECK: rev64 v{{[0-9]+}}.2s
+    %1 = load <8 x i8>* %p
+    %2 = add <8 x i8> %1, %1
+    %3 = call <2 x i32> @test_v2i32_v8i8_helper(<8 x i8> %2)
+    %4 = add <2 x i32> %3, %3
+    store <2 x i32> %4, <2 x i32>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v4i16_i64:
+declare <4 x i16> @test_v4i16_i64_helper(i64 %p)
+define void @test_v4i16_i64(i64* %p, <4 x i16>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.4h
+    %1 = load i64* %p
+    %2 = add i64 %1, %1
+    %3 = call <4 x i16> @test_v4i16_i64_helper(i64 %2)
+    %4 = add <4 x i16> %3, %3
+    store <4 x i16> %4, <4 x i16>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v4i16_f64:
+declare <4 x i16> @test_v4i16_f64_helper(double %p)
+define void @test_v4i16_f64(double* %p, <4 x i16>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.4h
+    %1 = load double* %p
+    %2 = fadd double %1, %1
+    %3 = call <4 x i16> @test_v4i16_f64_helper(double %2)
+    %4 = add <4 x i16> %3, %3
+    store <4 x i16> %4, <4 x i16>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v4i16_v1i64:
+declare <4 x i16> @test_v4i16_v1i64_helper(<1 x i64> %p)
+define void @test_v4i16_v1i64(<1 x i64>* %p, <4 x i16>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.4h
+    %1 = load <1 x i64>* %p
+    %2 = add <1 x i64> %1, %1
+    %3 = call <4 x i16> @test_v4i16_v1i64_helper(<1 x i64> %2)
+    %4 = add <4 x i16> %3, %3
+    store <4 x i16> %4, <4 x i16>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v4i16_v2f32:
+declare <4 x i16> @test_v4i16_v2f32_helper(<2 x float> %p)
+define void @test_v4i16_v2f32(<2 x float>* %p, <4 x i16>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.2s
+; CHECK: rev64 v{{[0-9]+}}.4h
+    %1 = load <2 x float>* %p
+    %2 = fadd <2 x float> %1, %1
+    %3 = call <4 x i16> @test_v4i16_v2f32_helper(<2 x float> %2)
+    %4 = add <4 x i16> %3, %3
+    store <4 x i16> %4, <4 x i16>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v4i16_v2i32:
+declare <4 x i16> @test_v4i16_v2i32_helper(<2 x i32> %p)
+define void @test_v4i16_v2i32(<2 x i32>* %p, <4 x i16>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.2s
+; CHECK: rev64 v{{[0-9]+}}.4h
+    %1 = load <2 x i32>* %p
+    %2 = add <2 x i32> %1, %1
+    %3 = call <4 x i16> @test_v4i16_v2i32_helper(<2 x i32> %2)
+    %4 = add <4 x i16> %3, %3
+    store <4 x i16> %4, <4 x i16>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v4i16_v8i8:
+declare <4 x i16> @test_v4i16_v8i8_helper(<8 x i8> %p)
+define void @test_v4i16_v8i8(<8 x i8>* %p, <4 x i16>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.8b
+; CHECK: rev64 v{{[0-9]+}}.4h
+    %1 = load <8 x i8>* %p
+    %2 = add <8 x i8> %1, %1
+    %3 = call <4 x i16> @test_v4i16_v8i8_helper(<8 x i8> %2)
+    %4 = add <4 x i16> %3, %3
+    store <4 x i16> %4, <4 x i16>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v8i8_i64:
+declare <8 x i8> @test_v8i8_i64_helper(i64 %p)
+define void @test_v8i8_i64(i64* %p, <8 x i8>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.8b
+    %1 = load i64* %p
+    %2 = add i64 %1, %1
+    %3 = call <8 x i8> @test_v8i8_i64_helper(i64 %2)
+    %4 = add <8 x i8> %3, %3
+    store <8 x i8> %4, <8 x i8>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v8i8_f64:
+declare <8 x i8> @test_v8i8_f64_helper(double %p)
+define void @test_v8i8_f64(double* %p, <8 x i8>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.8b
+    %1 = load double* %p
+    %2 = fadd double %1, %1
+    %3 = call <8 x i8> @test_v8i8_f64_helper(double %2)
+    %4 = add <8 x i8> %3, %3
+    store <8 x i8> %4, <8 x i8>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v8i8_v1i64:
+declare <8 x i8> @test_v8i8_v1i64_helper(<1 x i64> %p)
+define void @test_v8i8_v1i64(<1 x i64>* %p, <8 x i8>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.8b
+    %1 = load <1 x i64>* %p
+    %2 = add <1 x i64> %1, %1
+    %3 = call <8 x i8> @test_v8i8_v1i64_helper(<1 x i64> %2)
+    %4 = add <8 x i8> %3, %3
+    store <8 x i8> %4, <8 x i8>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v8i8_v2f32:
+declare <8 x i8> @test_v8i8_v2f32_helper(<2 x float> %p)
+define void @test_v8i8_v2f32(<2 x float>* %p, <8 x i8>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.2s
+; CHECK: rev64 v{{[0-9]+}}.8b
+    %1 = load <2 x float>* %p
+    %2 = fadd <2 x float> %1, %1
+    %3 = call <8 x i8> @test_v8i8_v2f32_helper(<2 x float> %2)
+    %4 = add <8 x i8> %3, %3
+    store <8 x i8> %4, <8 x i8>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v8i8_v2i32:
+declare <8 x i8> @test_v8i8_v2i32_helper(<2 x i32> %p)
+define void @test_v8i8_v2i32(<2 x i32>* %p, <8 x i8>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.2s
+; CHECK: rev64 v{{[0-9]+}}.8b
+    %1 = load <2 x i32>* %p
+    %2 = add <2 x i32> %1, %1
+    %3 = call <8 x i8> @test_v8i8_v2i32_helper(<2 x i32> %2)
+    %4 = add <8 x i8> %3, %3
+    store <8 x i8> %4, <8 x i8>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v8i8_v4i16:
+declare <8 x i8> @test_v8i8_v4i16_helper(<4 x i16> %p)
+define void @test_v8i8_v4i16(<4 x i16>* %p, <8 x i8>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.4h
+; CHECK: rev64 v{{[0-9]+}}.8b
+    %1 = load <4 x i16>* %p
+    %2 = add <4 x i16> %1, %1
+    %3 = call <8 x i8> @test_v8i8_v4i16_helper(<4 x i16> %2)
+    %4 = add <8 x i8> %3, %3
+    store <8 x i8> %4, <8 x i8>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_f128_v2f64:
+declare fp128 @test_f128_v2f64_helper(<2 x double> %p)
+define void @test_f128_v2f64(<2 x double>* %p, fp128* %q) {
+; CHECK: ext
+    %1 = load <2 x double>* %p
+    %2 = fadd <2 x double> %1, %1
+    %3 = call fp128 @test_f128_v2f64_helper(<2 x double> %2)
+    %4 = fadd fp128 %3, %3
+    store fp128 %4, fp128* %q
+    ret void
+}
+
+; CHECK-LABEL: test_f128_v2i64:
+declare fp128 @test_f128_v2i64_helper(<2 x i64> %p)
+define void @test_f128_v2i64(<2 x i64>* %p, fp128* %q) {
+; CHECK: ext
+    %1 = load <2 x i64>* %p
+    %2 = add <2 x i64> %1, %1
+    %3 = call fp128 @test_f128_v2i64_helper(<2 x i64> %2)
+    %4 = fadd fp128 %3, %3
+    store fp128 %4, fp128* %q
+    ret void
+}
+
+; CHECK-LABEL: test_f128_v4f32:
+declare fp128 @test_f128_v4f32_helper(<4 x float> %p)
+define void @test_f128_v4f32(<4 x float>* %p, fp128* %q) {
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+    %1 = load <4 x float>* %p
+    %2 = fadd <4 x float> %1, %1
+    %3 = call fp128 @test_f128_v4f32_helper(<4 x float> %2)
+    %4 = fadd fp128 %3, %3
+    store fp128 %4, fp128* %q
+    ret void
+}
+
+; CHECK-LABEL: test_f128_v4i32:
+declare fp128 @test_f128_v4i32_helper(<4 x i32> %p)
+define void @test_f128_v4i32(<4 x i32>* %p, fp128* %q) {
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+    %1 = load <4 x i32>* %p
+    %2 = add <4 x i32> %1, %1
+    %3 = call fp128 @test_f128_v4i32_helper(<4 x i32> %2)
+    %4 = fadd fp128 %3, %3
+    store fp128 %4, fp128* %q
+    ret void
+}
+
+; CHECK-LABEL: test_f128_v8i16:
+declare fp128 @test_f128_v8i16_helper(<8 x i16> %p)
+define void @test_f128_v8i16(<8 x i16>* %p, fp128* %q) {
+; CHECK: rev64 v{{[0-9]+}}.8h
+; CHECK: ext
+    %1 = load <8 x i16>* %p
+    %2 = add <8 x i16> %1, %1
+    %3 = call fp128 @test_f128_v8i16_helper(<8 x i16> %2)
+    %4 = fadd fp128 %3, %3
+    store fp128 %4, fp128* %q
+    ret void
+}
+
+; CHECK-LABEL: test_f128_v16i8:
+declare fp128 @test_f128_v16i8_helper(<16 x i8> %p)
+define void @test_f128_v16i8(<16 x i8>* %p, fp128* %q) {
+; CHECK: rev64 v{{[0-9]+}}.16b
+; CHECK: ext
+    %1 = load <16 x i8>* %p
+    %2 = add <16 x i8> %1, %1
+    %3 = call fp128 @test_f128_v16i8_helper(<16 x i8> %2)
+    %4 = fadd fp128 %3, %3
+    store fp128 %4, fp128* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v2f64_f128:
+declare <2 x double> @test_v2f64_f128_helper(fp128 %p)
+define void @test_v2f64_f128(fp128* %p, <2 x double>* %q) {
+; CHECK: ext
+    %1 = load fp128* %p
+    %2 = fadd fp128 %1, %1
+    %3 = call <2 x double> @test_v2f64_f128_helper(fp128 %2)
+    %4 = fadd <2 x double> %3, %3
+    store <2 x double> %4, <2 x double>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v2f64_v2i64:
+declare <2 x double> @test_v2f64_v2i64_helper(<2 x i64> %p)
+define void @test_v2f64_v2i64(<2 x i64>* %p, <2 x double>* %q) {
+; CHECK: ext
+; CHECK: ext
+    %1 = load <2 x i64>* %p
+    %2 = add <2 x i64> %1, %1
+    %3 = call <2 x double> @test_v2f64_v2i64_helper(<2 x i64> %2)
+    %4 = fadd <2 x double> %3, %3
+    store <2 x double> %4, <2 x double>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v2f64_v4f32:
+declare <2 x double> @test_v2f64_v4f32_helper(<4 x float> %p)
+define void @test_v2f64_v4f32(<4 x float>* %p, <2 x double>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+; CHECK: ext
+    %1 = load <4 x float>* %p
+    %2 = fadd <4 x float> %1, %1
+    %3 = call <2 x double> @test_v2f64_v4f32_helper(<4 x float> %2)
+    %4 = fadd <2 x double> %3, %3
+    store <2 x double> %4, <2 x double>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v2f64_v4i32:
+declare <2 x double> @test_v2f64_v4i32_helper(<4 x i32> %p)
+define void @test_v2f64_v4i32(<4 x i32>* %p, <2 x double>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+; CHECK: ext
+    %1 = load <4 x i32>* %p
+    %2 = add <4 x i32> %1, %1
+    %3 = call <2 x double> @test_v2f64_v4i32_helper(<4 x i32> %2)
+    %4 = fadd <2 x double> %3, %3
+    store <2 x double> %4, <2 x double>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v2f64_v8i16:
+declare <2 x double> @test_v2f64_v8i16_helper(<8 x i16> %p)
+define void @test_v2f64_v8i16(<8 x i16>* %p, <2 x double>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.8h
+; CHECK: ext
+; CHECK: ext
+    %1 = load <8 x i16>* %p
+    %2 = add <8 x i16> %1, %1
+    %3 = call <2 x double> @test_v2f64_v8i16_helper(<8 x i16> %2)
+    %4 = fadd <2 x double> %3, %3
+    store <2 x double> %4, <2 x double>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v2f64_v16i8:
+declare <2 x double> @test_v2f64_v16i8_helper(<16 x i8> %p)
+define void @test_v2f64_v16i8(<16 x i8>* %p, <2 x double>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.16b
+; CHECK: ext
+; CHECK: ext
+    %1 = load <16 x i8>* %p
+    %2 = add <16 x i8> %1, %1
+    %3 = call <2 x double> @test_v2f64_v16i8_helper(<16 x i8> %2)
+    %4 = fadd <2 x double> %3, %3
+    store <2 x double> %4, <2 x double>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v2i64_f128:
+declare <2 x i64> @test_v2i64_f128_helper(fp128 %p)
+define void @test_v2i64_f128(fp128* %p, <2 x i64>* %q) {
+; CHECK: ext
+    %1 = load fp128* %p
+    %2 = fadd fp128 %1, %1
+    %3 = call <2 x i64> @test_v2i64_f128_helper(fp128 %2)
+    %4 = add <2 x i64> %3, %3
+    store <2 x i64> %4, <2 x i64>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v2i64_v2f64:
+declare <2 x i64> @test_v2i64_v2f64_helper(<2 x double> %p)
+define void @test_v2i64_v2f64(<2 x double>* %p, <2 x i64>* %q) {
+; CHECK: ext
+; CHECK: ext
+    %1 = load <2 x double>* %p
+    %2 = fadd <2 x double> %1, %1
+    %3 = call <2 x i64> @test_v2i64_v2f64_helper(<2 x double> %2)
+    %4 = add <2 x i64> %3, %3
+    store <2 x i64> %4, <2 x i64>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v2i64_v4f32:
+declare <2 x i64> @test_v2i64_v4f32_helper(<4 x float> %p)
+define void @test_v2i64_v4f32(<4 x float>* %p, <2 x i64>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+; CHECK: ext
+    %1 = load <4 x float>* %p
+    %2 = fadd <4 x float> %1, %1
+    %3 = call <2 x i64> @test_v2i64_v4f32_helper(<4 x float> %2)
+    %4 = add <2 x i64> %3, %3
+    store <2 x i64> %4, <2 x i64>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v2i64_v4i32:
+declare <2 x i64> @test_v2i64_v4i32_helper(<4 x i32> %p)
+define void @test_v2i64_v4i32(<4 x i32>* %p, <2 x i64>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+; CHECK: ext
+    %1 = load <4 x i32>* %p
+    %2 = add <4 x i32> %1, %1
+    %3 = call <2 x i64> @test_v2i64_v4i32_helper(<4 x i32> %2)
+    %4 = add <2 x i64> %3, %3
+    store <2 x i64> %4, <2 x i64>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v2i64_v8i16:
+declare <2 x i64> @test_v2i64_v8i16_helper(<8 x i16> %p)
+define void @test_v2i64_v8i16(<8 x i16>* %p, <2 x i64>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.8h
+; CHECK: ext
+; CHECK: ext
+    %1 = load <8 x i16>* %p
+    %2 = add <8 x i16> %1, %1
+    %3 = call <2 x i64> @test_v2i64_v8i16_helper(<8 x i16> %2)
+    %4 = add <2 x i64> %3, %3
+    store <2 x i64> %4, <2 x i64>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v2i64_v16i8:
+declare <2 x i64> @test_v2i64_v16i8_helper(<16 x i8> %p)
+define void @test_v2i64_v16i8(<16 x i8>* %p, <2 x i64>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.16b
+; CHECK: ext
+; CHECK: ext
+    %1 = load <16 x i8>* %p
+    %2 = add <16 x i8> %1, %1
+    %3 = call <2 x i64> @test_v2i64_v16i8_helper(<16 x i8> %2)
+    %4 = add <2 x i64> %3, %3
+    store <2 x i64> %4, <2 x i64>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v4f32_f128:
+declare <4 x float> @test_v4f32_f128_helper(fp128 %p)
+define void @test_v4f32_f128(fp128* %p, <4 x float>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+    %1 = load fp128* %p
+    %2 = fadd fp128 %1, %1
+    %3 = call <4 x float> @test_v4f32_f128_helper(fp128 %2)
+    %4 = fadd <4 x float> %3, %3
+    store <4 x float> %4, <4 x float>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v4f32_v2f64:
+declare <4 x float> @test_v4f32_v2f64_helper(<2 x double> %p)
+define void @test_v4f32_v2f64(<2 x double>* %p, <4 x float>* %q) {
+; CHECK: ext
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+    %1 = load <2 x double>* %p
+    %2 = fadd <2 x double> %1, %1
+    %3 = call <4 x float> @test_v4f32_v2f64_helper(<2 x double> %2)
+    %4 = fadd <4 x float> %3, %3
+    store <4 x float> %4, <4 x float>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v4f32_v2i64:
+declare <4 x float> @test_v4f32_v2i64_helper(<2 x i64> %p)
+define void @test_v4f32_v2i64(<2 x i64>* %p, <4 x float>* %q) {
+; CHECK: ext
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+    %1 = load <2 x i64>* %p
+    %2 = add <2 x i64> %1, %1
+    %3 = call <4 x float> @test_v4f32_v2i64_helper(<2 x i64> %2)
+    %4 = fadd <4 x float> %3, %3
+    store <4 x float> %4, <4 x float>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v4f32_v4i32:
+declare <4 x float> @test_v4f32_v4i32_helper(<4 x i32> %p)
+define void @test_v4f32_v4i32(<4 x i32>* %p, <4 x float>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+    %1 = load <4 x i32>* %p
+    %2 = add <4 x i32> %1, %1
+    %3 = call <4 x float> @test_v4f32_v4i32_helper(<4 x i32> %2)
+    %4 = fadd <4 x float> %3, %3
+    store <4 x float> %4, <4 x float>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v4f32_v8i16:
+declare <4 x float> @test_v4f32_v8i16_helper(<8 x i16> %p)
+define void @test_v4f32_v8i16(<8 x i16>* %p, <4 x float>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.8h
+; CHECK: ext
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+    %1 = load <8 x i16>* %p
+    %2 = add <8 x i16> %1, %1
+    %3 = call <4 x float> @test_v4f32_v8i16_helper(<8 x i16> %2)
+    %4 = fadd <4 x float> %3, %3
+    store <4 x float> %4, <4 x float>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v4f32_v16i8:
+declare <4 x float> @test_v4f32_v16i8_helper(<16 x i8> %p)
+define void @test_v4f32_v16i8(<16 x i8>* %p, <4 x float>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.16b
+; CHECK: ext
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+    %1 = load <16 x i8>* %p
+    %2 = add <16 x i8> %1, %1
+    %3 = call <4 x float> @test_v4f32_v16i8_helper(<16 x i8> %2)
+    %4 = fadd <4 x float> %3, %3
+    store <4 x float> %4, <4 x float>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v4i32_f128:
+declare <4 x i32> @test_v4i32_f128_helper(fp128 %p)
+define void @test_v4i32_f128(fp128* %p, <4 x i32>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+    %1 = load fp128* %p
+    %2 = fadd fp128 %1, %1
+    %3 = call <4 x i32> @test_v4i32_f128_helper(fp128 %2)
+    %4 = add <4 x i32> %3, %3
+    store <4 x i32> %4, <4 x i32>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v4i32_v2f64:
+declare <4 x i32> @test_v4i32_v2f64_helper(<2 x double> %p)
+define void @test_v4i32_v2f64(<2 x double>* %p, <4 x i32>* %q) {
+; CHECK: ext
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+    %1 = load <2 x double>* %p
+    %2 = fadd <2 x double> %1, %1
+    %3 = call <4 x i32> @test_v4i32_v2f64_helper(<2 x double> %2)
+    %4 = add <4 x i32> %3, %3
+    store <4 x i32> %4, <4 x i32>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v4i32_v2i64:
+declare <4 x i32> @test_v4i32_v2i64_helper(<2 x i64> %p)
+define void @test_v4i32_v2i64(<2 x i64>* %p, <4 x i32>* %q) {
+; CHECK: ext
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+    %1 = load <2 x i64>* %p
+    %2 = add <2 x i64> %1, %1
+    %3 = call <4 x i32> @test_v4i32_v2i64_helper(<2 x i64> %2)
+    %4 = add <4 x i32> %3, %3
+    store <4 x i32> %4, <4 x i32>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v4i32_v4f32:
+declare <4 x i32> @test_v4i32_v4f32_helper(<4 x float> %p)
+define void @test_v4i32_v4f32(<4 x float>* %p, <4 x i32>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+    %1 = load <4 x float>* %p
+    %2 = fadd <4 x float> %1, %1
+    %3 = call <4 x i32> @test_v4i32_v4f32_helper(<4 x float> %2)
+    %4 = add <4 x i32> %3, %3
+    store <4 x i32> %4, <4 x i32>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v4i32_v8i16:
+declare <4 x i32> @test_v4i32_v8i16_helper(<8 x i16> %p)
+define void @test_v4i32_v8i16(<8 x i16>* %p, <4 x i32>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.8h
+; CHECK: ext
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+    %1 = load <8 x i16>* %p
+    %2 = add <8 x i16> %1, %1
+    %3 = call <4 x i32> @test_v4i32_v8i16_helper(<8 x i16> %2)
+    %4 = add <4 x i32> %3, %3
+    store <4 x i32> %4, <4 x i32>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v4i32_v16i8:
+declare <4 x i32> @test_v4i32_v16i8_helper(<16 x i8> %p)
+define void @test_v4i32_v16i8(<16 x i8>* %p, <4 x i32>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.16b
+; CHECK: ext
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+    %1 = load <16 x i8>* %p
+    %2 = add <16 x i8> %1, %1
+    %3 = call <4 x i32> @test_v4i32_v16i8_helper(<16 x i8> %2)
+    %4 = add <4 x i32> %3, %3
+    store <4 x i32> %4, <4 x i32>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v8i16_f128:
+declare <8 x i16> @test_v8i16_f128_helper(fp128 %p)
+define void @test_v8i16_f128(fp128* %p, <8 x i16>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.8h
+; CHECK: ext
+    %1 = load fp128* %p
+    %2 = fadd fp128 %1, %1
+    %3 = call <8 x i16> @test_v8i16_f128_helper(fp128 %2)
+    %4 = add <8 x i16> %3, %3
+    store <8 x i16> %4, <8 x i16>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v8i16_v2f64:
+declare <8 x i16> @test_v8i16_v2f64_helper(<2 x double> %p)
+define void @test_v8i16_v2f64(<2 x double>* %p, <8 x i16>* %q) {
+; CHECK: ext
+; CHECK: rev64 v{{[0-9]+}}.8h
+; CHECK: ext
+    %1 = load <2 x double>* %p
+    %2 = fadd <2 x double> %1, %1
+    %3 = call <8 x i16> @test_v8i16_v2f64_helper(<2 x double> %2)
+    %4 = add <8 x i16> %3, %3
+    store <8 x i16> %4, <8 x i16>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v8i16_v2i64:
+declare <8 x i16> @test_v8i16_v2i64_helper(<2 x i64> %p)
+define void @test_v8i16_v2i64(<2 x i64>* %p, <8 x i16>* %q) {
+; CHECK: ext
+; CHECK: rev64 v{{[0-9]+}}.8h
+; CHECK: ext
+    %1 = load <2 x i64>* %p
+    %2 = add <2 x i64> %1, %1
+    %3 = call <8 x i16> @test_v8i16_v2i64_helper(<2 x i64> %2)
+    %4 = add <8 x i16> %3, %3
+    store <8 x i16> %4, <8 x i16>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v8i16_v4f32:
+declare <8 x i16> @test_v8i16_v4f32_helper(<4 x float> %p)
+define void @test_v8i16_v4f32(<4 x float>* %p, <8 x i16>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+; CHECK: rev64 v{{[0-9]+}}.8h
+; CHECK: ext
+    %1 = load <4 x float>* %p
+    %2 = fadd <4 x float> %1, %1
+    %3 = call <8 x i16> @test_v8i16_v4f32_helper(<4 x float> %2)
+    %4 = add <8 x i16> %3, %3
+    store <8 x i16> %4, <8 x i16>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v8i16_v4i32:
+declare <8 x i16> @test_v8i16_v4i32_helper(<4 x i32> %p)
+define void @test_v8i16_v4i32(<4 x i32>* %p, <8 x i16>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+; CHECK: rev64 v{{[0-9]+}}.8h
+; CHECK: ext
+    %1 = load <4 x i32>* %p
+    %2 = add <4 x i32> %1, %1
+    %3 = call <8 x i16> @test_v8i16_v4i32_helper(<4 x i32> %2)
+    %4 = add <8 x i16> %3, %3
+    store <8 x i16> %4, <8 x i16>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v8i16_v16i8:
+declare <8 x i16> @test_v8i16_v16i8_helper(<16 x i8> %p)
+define void @test_v8i16_v16i8(<16 x i8>* %p, <8 x i16>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.16b
+; CHECK: ext
+; CHECK: rev64 v{{[0-9]+}}.8h
+; CHECK: ext
+    %1 = load <16 x i8>* %p
+    %2 = add <16 x i8> %1, %1
+    %3 = call <8 x i16> @test_v8i16_v16i8_helper(<16 x i8> %2)
+    %4 = add <8 x i16> %3, %3
+    store <8 x i16> %4, <8 x i16>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v16i8_f128:
+declare <16 x i8> @test_v16i8_f128_helper(fp128 %p)
+define void @test_v16i8_f128(fp128* %p, <16 x i8>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.16b
+; CHECK: ext
+    %1 = load fp128* %p
+    %2 = fadd fp128 %1, %1
+    %3 = call <16 x i8> @test_v16i8_f128_helper(fp128 %2)
+    %4 = add <16 x i8> %3, %3
+    store <16 x i8> %4, <16 x i8>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v16i8_v2f64:
+declare <16 x i8> @test_v16i8_v2f64_helper(<2 x double> %p)
+define void @test_v16i8_v2f64(<2 x double>* %p, <16 x i8>* %q) {
+; CHECK: ext
+; CHECK: rev64 v{{[0-9]+}}.16b
+; CHECK: ext
+    %1 = load <2 x double>* %p
+    %2 = fadd <2 x double> %1, %1
+    %3 = call <16 x i8> @test_v16i8_v2f64_helper(<2 x double> %2)
+    %4 = add <16 x i8> %3, %3
+    store <16 x i8> %4, <16 x i8>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v16i8_v2i64:
+declare <16 x i8> @test_v16i8_v2i64_helper(<2 x i64> %p)
+define void @test_v16i8_v2i64(<2 x i64>* %p, <16 x i8>* %q) {
+; CHECK: ext
+; CHECK: rev64 v{{[0-9]+}}.16b
+; CHECK: ext
+    %1 = load <2 x i64>* %p
+    %2 = add <2 x i64> %1, %1
+    %3 = call <16 x i8> @test_v16i8_v2i64_helper(<2 x i64> %2)
+    %4 = add <16 x i8> %3, %3
+    store <16 x i8> %4, <16 x i8>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v16i8_v4f32:
+declare <16 x i8> @test_v16i8_v4f32_helper(<4 x float> %p)
+define void @test_v16i8_v4f32(<4 x float>* %p, <16 x i8>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+; CHECK: rev64 v{{[0-9]+}}.16b
+; CHECK: ext
+    %1 = load <4 x float>* %p
+    %2 = fadd <4 x float> %1, %1
+    %3 = call <16 x i8> @test_v16i8_v4f32_helper(<4 x float> %2)
+    %4 = add <16 x i8> %3, %3
+    store <16 x i8> %4, <16 x i8>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v16i8_v4i32:
+declare <16 x i8> @test_v16i8_v4i32_helper(<4 x i32> %p)
+define void @test_v16i8_v4i32(<4 x i32>* %p, <16 x i8>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.4s
+; CHECK: ext
+; CHECK: rev64 v{{[0-9]+}}.16b
+; CHECK: ext
+    %1 = load <4 x i32>* %p
+    %2 = add <4 x i32> %1, %1
+    %3 = call <16 x i8> @test_v16i8_v4i32_helper(<4 x i32> %2)
+    %4 = add <16 x i8> %3, %3
+    store <16 x i8> %4, <16 x i8>* %q
+    ret void
+}
+
+; CHECK-LABEL: test_v16i8_v8i16:
+declare <16 x i8> @test_v16i8_v8i16_helper(<8 x i16> %p)
+define void @test_v16i8_v8i16(<8 x i16>* %p, <16 x i8>* %q) {
+; CHECK: rev64 v{{[0-9]+}}.8h
+; CHECK: ext
+; CHECK: rev64 v{{[0-9]+}}.16b
+; CHECK: ext
+    %1 = load <8 x i16>* %p
+    %2 = add <8 x i16> %1, %1
+    %3 = call <16 x i8> @test_v16i8_v8i16_helper(<8 x i16> %2)
+    %4 = add <16 x i8> %3, %3
+    store <16 x i8> %4, <16 x i8>* %q
+    ret void
+}
diff --git a/test/CodeGen/ARM64/big-imm-offsets.ll b/test/CodeGen/AArch64/arm64-big-imm-offsets.ll
index a56df07..a56df07 100644
--- a/test/CodeGen/ARM64/big-imm-offsets.ll
+++ b/test/CodeGen/AArch64/arm64-big-imm-offsets.ll
diff --git a/test/CodeGen/AArch64/arm64-big-stack.ll b/test/CodeGen/AArch64/arm64-big-stack.ll
new file mode 100644
index 0000000..3f91bb3c2
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-big-stack.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s | FileCheck %s
+target triple = "arm64-apple-macosx10"
+
+; Check that big stacks are generated correctly.
+; Currently, this is done by a sequence of sub instructions,
+; which can encode immediate with a 12 bits mask an optionally
+; shift left (up to 12). I.e., 16773120 is the biggest value.
+; <rdar://12513931>
+; CHECK-LABEL: foo:
+; CHECK: sub sp, sp, #4095, lsl #12
+; CHECK: sub sp, sp, #4095, lsl #12
+; CHECK: sub sp, sp, #2, lsl #12
+define void @foo() nounwind ssp {
+entry:
+  %buffer = alloca [33554432 x i8], align 1
+  %arraydecay = getelementptr inbounds [33554432 x i8]* %buffer, i64 0, i64 0
+  call void @doit(i8* %arraydecay) nounwind
+  ret void
+}
+
+declare void @doit(i8*)
diff --git a/test/CodeGen/AArch64/arm64-bitfield-extract.ll b/test/CodeGen/AArch64/arm64-bitfield-extract.ll
new file mode 100644
index 0000000..112efdd
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-bitfield-extract.ll
@@ -0,0 +1,532 @@
+; RUN: opt -codegenprepare -mtriple=arm64-apple=ios -S -o - %s | FileCheck --check-prefix=OPT %s
+; RUN: llc < %s -march=arm64 | FileCheck %s
+%struct.X = type { i8, i8, [2 x i8] }
+%struct.Y = type { i32, i8 }
+%struct.Z = type { i8, i8, [2 x i8], i16 }
+%struct.A = type { i64, i8 }
+
+define void @foo(%struct.X* nocapture %x, %struct.Y* nocapture %y) nounwind optsize ssp {
+; CHECK-LABEL: foo:
+; CHECK: ubfx
+; CHECK-NOT: and
+; CHECK: ret
+
+  %tmp = bitcast %struct.X* %x to i32*
+  %tmp1 = load i32* %tmp, align 4
+  %b = getelementptr inbounds %struct.Y* %y, i64 0, i32 1
+  %bf.clear = lshr i32 %tmp1, 3
+  %bf.clear.lobit = and i32 %bf.clear, 1
+  %frombool = trunc i32 %bf.clear.lobit to i8
+  store i8 %frombool, i8* %b, align 1
+  ret void
+}
+
+define i32 @baz(i64 %cav1.coerce) nounwind {
+; CHECK-LABEL: baz:
+; CHECK: sbfx  w0, w0, #0, #4
+  %tmp = trunc i64 %cav1.coerce to i32
+  %tmp1 = shl i32 %tmp, 28
+  %bf.val.sext = ashr exact i32 %tmp1, 28
+  ret i32 %bf.val.sext
+}
+
+define i32 @bar(i64 %cav1.coerce) nounwind {
+; CHECK-LABEL: bar:
+; CHECK: sbfx  w0, w0, #4, #6
+  %tmp = trunc i64 %cav1.coerce to i32
+  %cav1.sroa.0.1.insert = shl i32 %tmp, 22
+  %tmp1 = ashr i32 %cav1.sroa.0.1.insert, 26
+  ret i32 %tmp1
+}
+
+define void @fct1(%struct.Z* nocapture %x, %struct.A* nocapture %y) nounwind optsize ssp {
+; CHECK-LABEL: fct1:
+; CHECK: ubfx
+; CHECK-NOT: and
+; CHECK: ret
+
+  %tmp = bitcast %struct.Z* %x to i64*
+  %tmp1 = load i64* %tmp, align 4
+  %b = getelementptr inbounds %struct.A* %y, i64 0, i32 0
+  %bf.clear = lshr i64 %tmp1, 3
+  %bf.clear.lobit = and i64 %bf.clear, 1
+  store i64 %bf.clear.lobit, i64* %b, align 8
+  ret void
+}
+
+define i64 @fct2(i64 %cav1.coerce) nounwind {
+; CHECK-LABEL: fct2:
+; CHECK: sbfx  x0, x0, #0, #36
+  %tmp = shl i64 %cav1.coerce, 28
+  %bf.val.sext = ashr exact i64 %tmp, 28
+  ret i64 %bf.val.sext
+}
+
+define i64 @fct3(i64 %cav1.coerce) nounwind {
+; CHECK-LABEL: fct3:
+; CHECK: sbfx  x0, x0, #4, #38
+  %cav1.sroa.0.1.insert = shl i64 %cav1.coerce, 22
+  %tmp1 = ashr i64 %cav1.sroa.0.1.insert, 26
+  ret i64 %tmp1
+}
+
+define void @fct4(i64* nocapture %y, i64 %x) nounwind optsize inlinehint ssp {
+entry:
+; CHECK-LABEL: fct4:
+; CHECK: ldr [[REG1:x[0-9]+]],
+; CHECK-NEXT: bfxil [[REG1]], x1, #16, #24
+; CHECK-NEXT: str [[REG1]],
+; CHECK-NEXT: ret
+  %0 = load i64* %y, align 8
+  %and = and i64 %0, -16777216
+  %shr = lshr i64 %x, 16
+  %and1 = and i64 %shr, 16777215
+  %or = or i64 %and, %and1
+  store i64 %or, i64* %y, align 8
+  ret void
+}
+
+define void @fct5(i32* nocapture %y, i32 %x) nounwind optsize inlinehint ssp {
+entry:
+; CHECK-LABEL: fct5:
+; CHECK: ldr [[REG1:w[0-9]+]],
+; CHECK-NEXT: bfxil [[REG1]], w1, #16, #3
+; CHECK-NEXT: str [[REG1]],
+; CHECK-NEXT: ret
+  %0 = load i32* %y, align 8
+  %and = and i32 %0, -8
+  %shr = lshr i32 %x, 16
+  %and1 = and i32 %shr, 7
+  %or = or i32 %and, %and1
+  store i32 %or, i32* %y, align 8
+  ret void
+}
+
+; Check if we can still catch bfm instruction when we drop some low bits
+define void @fct6(i32* nocapture %y, i32 %x) nounwind optsize inlinehint ssp {
+entry:
+; CHECK-LABEL: fct6:
+; CHECK: ldr [[REG1:w[0-9]+]],
+; CHECK-NEXT: bfxil [[REG1]], w1, #16, #3
+; lsr is an alias of ubfm
+; CHECK-NEXT: lsr [[REG2:w[0-9]+]], [[REG1]], #2
+; CHECK-NEXT: str [[REG2]],
+; CHECK-NEXT: ret
+  %0 = load i32* %y, align 8
+  %and = and i32 %0, -8
+  %shr = lshr i32 %x, 16
+  %and1 = and i32 %shr, 7
+  %or = or i32 %and, %and1
+  %shr1 = lshr i32 %or, 2
+  store i32 %shr1, i32* %y, align 8
+  ret void
+}
+
+
+; Check if we can still catch bfm instruction when we drop some high bits
+define void @fct7(i32* nocapture %y, i32 %x) nounwind optsize inlinehint ssp {
+entry:
+; CHECK-LABEL: fct7:
+; CHECK: ldr [[REG1:w[0-9]+]],
+; CHECK-NEXT: bfxil [[REG1]], w1, #16, #3
+; lsl is an alias of ubfm
+; CHECK-NEXT: lsl [[REG2:w[0-9]+]], [[REG1]], #2
+; CHECK-NEXT: str [[REG2]],
+; CHECK-NEXT: ret
+  %0 = load i32* %y, align 8
+  %and = and i32 %0, -8
+  %shr = lshr i32 %x, 16
+  %and1 = and i32 %shr, 7
+  %or = or i32 %and, %and1
+  %shl = shl i32 %or, 2
+  store i32 %shl, i32* %y, align 8
+  ret void
+}
+
+
+; Check if we can still catch bfm instruction when we drop some low bits
+; (i64 version)
+define void @fct8(i64* nocapture %y, i64 %x) nounwind optsize inlinehint ssp {
+entry:
+; CHECK-LABEL: fct8:
+; CHECK: ldr [[REG1:x[0-9]+]],
+; CHECK-NEXT: bfxil [[REG1]], x1, #16, #3
+; lsr is an alias of ubfm
+; CHECK-NEXT: lsr [[REG2:x[0-9]+]], [[REG1]], #2
+; CHECK-NEXT: str [[REG2]],
+; CHECK-NEXT: ret
+  %0 = load i64* %y, align 8
+  %and = and i64 %0, -8
+  %shr = lshr i64 %x, 16
+  %and1 = and i64 %shr, 7
+  %or = or i64 %and, %and1
+  %shr1 = lshr i64 %or, 2
+  store i64 %shr1, i64* %y, align 8
+  ret void
+}
+
+
+; Check if we can still catch bfm instruction when we drop some high bits
+; (i64 version)
+define void @fct9(i64* nocapture %y, i64 %x) nounwind optsize inlinehint ssp {
+entry:
+; CHECK-LABEL: fct9:
+; CHECK: ldr [[REG1:x[0-9]+]],
+; CHECK-NEXT: bfxil [[REG1]], x1, #16, #3
+; lsr is an alias of ubfm
+; CHECK-NEXT: lsl [[REG2:x[0-9]+]], [[REG1]], #2
+; CHECK-NEXT: str [[REG2]],
+; CHECK-NEXT: ret
+  %0 = load i64* %y, align 8
+  %and = and i64 %0, -8
+  %shr = lshr i64 %x, 16
+  %and1 = and i64 %shr, 7
+  %or = or i64 %and, %and1
+  %shl = shl i64 %or, 2
+  store i64 %shl, i64* %y, align 8
+  ret void
+}
+
+; Check if we can catch bfm instruction when lsb is 0 (i.e., no lshr)
+; (i32 version)
+define void @fct10(i32* nocapture %y, i32 %x) nounwind optsize inlinehint ssp {
+entry:
+; CHECK-LABEL: fct10:
+; CHECK: ldr [[REG1:w[0-9]+]],
+; CHECK-NEXT: bfxil [[REG1]], w1, #0, #3
+; lsl is an alias of ubfm
+; CHECK-NEXT: lsl [[REG2:w[0-9]+]], [[REG1]], #2
+; CHECK-NEXT: str [[REG2]],
+; CHECK-NEXT: ret
+  %0 = load i32* %y, align 8
+  %and = and i32 %0, -8
+  %and1 = and i32 %x, 7
+  %or = or i32 %and, %and1
+  %shl = shl i32 %or, 2
+  store i32 %shl, i32* %y, align 8
+  ret void
+}
+
+; Check if we can catch bfm instruction when lsb is 0 (i.e., no lshr)
+; (i64 version)
+define void @fct11(i64* nocapture %y, i64 %x) nounwind optsize inlinehint ssp {
+entry:
+; CHECK-LABEL: fct11:
+; CHECK: ldr [[REG1:x[0-9]+]],
+; CHECK-NEXT: bfxil [[REG1]], x1, #0, #3
+; lsl is an alias of ubfm
+; CHECK-NEXT: lsl [[REG2:x[0-9]+]], [[REG1]], #2
+; CHECK-NEXT: str [[REG2]],
+; CHECK-NEXT: ret
+  %0 = load i64* %y, align 8
+  %and = and i64 %0, -8
+  %and1 = and i64 %x, 7
+  %or = or i64 %and, %and1
+  %shl = shl i64 %or, 2
+  store i64 %shl, i64* %y, align 8
+  ret void
+}
+
+define zeroext i1 @fct12bis(i32 %tmp2) unnamed_addr nounwind ssp align 2 {
+; CHECK-LABEL: fct12bis:
+; CHECK-NOT: and
+; CHECK: ubfx w0, w0, #11, #1
+  %and.i.i = and i32 %tmp2, 2048
+  %tobool.i.i = icmp ne i32 %and.i.i, 0
+  ret i1 %tobool.i.i
+}
+
+; Check if we can still catch bfm instruction when we drop some high bits
+; and some low bits
+define void @fct12(i32* nocapture %y, i32 %x) nounwind optsize inlinehint ssp {
+entry:
+; CHECK-LABEL: fct12:
+; CHECK: ldr [[REG1:w[0-9]+]],
+; CHECK-NEXT: bfxil [[REG1]], w1, #16, #3
+; lsr is an alias of ubfm
+; CHECK-NEXT: ubfx [[REG2:w[0-9]+]], [[REG1]], #2, #28
+; CHECK-NEXT: str [[REG2]],
+; CHECK-NEXT: ret
+  %0 = load i32* %y, align 8
+  %and = and i32 %0, -8
+  %shr = lshr i32 %x, 16
+  %and1 = and i32 %shr, 7
+  %or = or i32 %and, %and1
+  %shl = shl i32 %or, 2
+  %shr2 = lshr i32 %shl, 4
+  store i32 %shr2, i32* %y, align 8
+  ret void
+}
+
+; Check if we can still catch bfm instruction when we drop some high bits
+; and some low bits
+; (i64 version)
+define void @fct13(i64* nocapture %y, i64 %x) nounwind optsize inlinehint ssp {
+entry:
+; CHECK-LABEL: fct13:
+; CHECK: ldr [[REG1:x[0-9]+]],
+; CHECK-NEXT: bfxil [[REG1]], x1, #16, #3
+; lsr is an alias of ubfm
+; CHECK-NEXT: ubfx [[REG2:x[0-9]+]], [[REG1]], #2, #60
+; CHECK-NEXT: str [[REG2]],
+; CHECK-NEXT: ret
+  %0 = load i64* %y, align 8
+  %and = and i64 %0, -8
+  %shr = lshr i64 %x, 16
+  %and1 = and i64 %shr, 7
+  %or = or i64 %and, %and1
+  %shl = shl i64 %or, 2
+  %shr2 = lshr i64 %shl, 4
+  store i64 %shr2, i64* %y, align 8
+  ret void
+}
+
+
+; Check if we can still catch bfm instruction when we drop some high bits
+; and some low bits
+define void @fct14(i32* nocapture %y, i32 %x, i32 %x1) nounwind optsize inlinehint ssp {
+entry:
+; CHECK-LABEL: fct14:
+; CHECK: ldr [[REG1:w[0-9]+]],
+; CHECK-NEXT: bfxil [[REG1]], w1, #16, #8
+; lsr is an alias of ubfm
+; CHECK-NEXT: lsr [[REG2:w[0-9]+]], [[REG1]], #4
+; CHECK-NEXT: bfxil [[REG2]], w2, #5, #3
+; lsl is an alias of ubfm
+; CHECK-NEXT: lsl [[REG3:w[0-9]+]], [[REG2]], #2
+; CHECK-NEXT: str [[REG3]],
+; CHECK-NEXT: ret
+  %0 = load i32* %y, align 8
+  %and = and i32 %0, -256
+  %shr = lshr i32 %x, 16
+  %and1 = and i32 %shr, 255
+  %or = or i32 %and, %and1
+  %shl = lshr i32 %or, 4
+  %and2 = and i32 %shl, -8
+  %shr1 = lshr i32 %x1, 5
+  %and3 = and i32 %shr1, 7
+  %or1 = or i32 %and2, %and3
+  %shl1 = shl i32 %or1, 2
+  store i32 %shl1, i32* %y, align 8
+  ret void
+}
+
+; Check if we can still catch bfm instruction when we drop some high bits
+; and some low bits
+; (i64 version)
+define void @fct15(i64* nocapture %y, i64 %x, i64 %x1) nounwind optsize inlinehint ssp {
+entry:
+; CHECK-LABEL: fct15:
+; CHECK: ldr [[REG1:x[0-9]+]],
+; CHECK-NEXT: bfxil [[REG1]], x1, #16, #8
+; lsr is an alias of ubfm
+; CHECK-NEXT: lsr [[REG2:x[0-9]+]], [[REG1]], #4
+; CHECK-NEXT: bfxil [[REG2]], x2, #5, #3
+; lsl is an alias of ubfm
+; CHECK-NEXT: lsl [[REG3:x[0-9]+]], [[REG2]], #2
+; CHECK-NEXT: str [[REG3]],
+; CHECK-NEXT: ret
+  %0 = load i64* %y, align 8
+  %and = and i64 %0, -256
+  %shr = lshr i64 %x, 16
+  %and1 = and i64 %shr, 255
+  %or = or i64 %and, %and1
+  %shl = lshr i64 %or, 4
+  %and2 = and i64 %shl, -8
+  %shr1 = lshr i64 %x1, 5
+  %and3 = and i64 %shr1, 7
+  %or1 = or i64 %and2, %and3
+  %shl1 = shl i64 %or1, 2
+  store i64 %shl1, i64* %y, align 8
+  ret void
+}
+
+; Check if we can still catch bfm instruction when we drop some high bits
+; and some low bits and a masking operation has to be kept
+define void @fct16(i32* nocapture %y, i32 %x) nounwind optsize inlinehint ssp {
+entry:
+; CHECK-LABEL: fct16:
+; CHECK: ldr [[REG1:w[0-9]+]],
+; Create the constant
+; CHECK: movz [[REGCST:w[0-9]+]], #0x1a, lsl #16
+; CHECK: movk [[REGCST]], #0x8160
+; Do the masking
+; CHECK: and [[REG2:w[0-9]+]], [[REG1]], [[REGCST]]
+; CHECK-NEXT: bfxil [[REG2]], w1, #16, #3
+; lsr is an alias of ubfm
+; CHECK-NEXT: ubfx [[REG3:w[0-9]+]], [[REG2]], #2, #28
+; CHECK-NEXT: str [[REG3]],
+; CHECK-NEXT: ret
+  %0 = load i32* %y, align 8
+  %and = and i32 %0, 1737056
+  %shr = lshr i32 %x, 16
+  %and1 = and i32 %shr, 7
+  %or = or i32 %and, %and1
+  %shl = shl i32 %or, 2
+  %shr2 = lshr i32 %shl, 4
+  store i32 %shr2, i32* %y, align 8
+  ret void
+}
+
+
+; Check if we can still catch bfm instruction when we drop some high bits
+; and some low bits and a masking operation has to be kept
+; (i64 version)
+define void @fct17(i64* nocapture %y, i64 %x) nounwind optsize inlinehint ssp {
+entry:
+; CHECK-LABEL: fct17:
+; CHECK: ldr [[REG1:x[0-9]+]],
+; Create the constant
+; CHECK: movz w[[REGCST:[0-9]+]], #0x1a, lsl #16
+; CHECK: movk w[[REGCST]], #0x8160
+; Do the masking
+; CHECK: and [[REG2:x[0-9]+]], [[REG1]], x[[REGCST]]
+; CHECK-NEXT: bfxil [[REG2]], x1, #16, #3
+; lsr is an alias of ubfm
+; CHECK-NEXT: ubfx [[REG3:x[0-9]+]], [[REG2]], #2, #60
+; CHECK-NEXT: str [[REG3]],
+; CHECK-NEXT: ret
+  %0 = load i64* %y, align 8
+  %and = and i64 %0, 1737056
+  %shr = lshr i64 %x, 16
+  %and1 = and i64 %shr, 7
+  %or = or i64 %and, %and1
+  %shl = shl i64 %or, 2
+  %shr2 = lshr i64 %shl, 4
+  store i64 %shr2, i64* %y, align 8
+  ret void
+}
+
+define i64 @fct18(i32 %xor72) nounwind ssp {
+; CHECK-LABEL: fct18:
+; CHECK: ubfx x0, x0, #9, #8
+  %shr81 = lshr i32 %xor72, 9
+  %conv82 = zext i32 %shr81 to i64
+  %result = and i64 %conv82, 255
+  ret i64 %result
+}
+
+; Using the access to the global array to keep the instruction and control flow.
+@first_ones = external global [65536 x i8]
+
+; Function Attrs: nounwind readonly ssp
+define i32 @fct19(i64 %arg1) nounwind readonly ssp  {
+; CHECK-LABEL: fct19:
+entry:
+  %x.sroa.1.0.extract.shift = lshr i64 %arg1, 16
+  %x.sroa.1.0.extract.trunc = trunc i64 %x.sroa.1.0.extract.shift to i16
+  %x.sroa.3.0.extract.shift = lshr i64 %arg1, 32
+  %x.sroa.5.0.extract.shift = lshr i64 %arg1, 48
+  %tobool = icmp eq i64 %x.sroa.5.0.extract.shift, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  %arrayidx3 = getelementptr inbounds [65536 x i8]* @first_ones, i64 0, i64 %x.sroa.5.0.extract.shift
+  %0 = load i8* %arrayidx3, align 1
+  %conv = zext i8 %0 to i32
+  br label %return
+
+; OPT-LABEL: if.end
+if.end:                                           ; preds = %entry
+; OPT: lshr
+; CHECK: ubfx	[[REG1:x[0-9]+]], [[REG2:x[0-9]+]], #32, #16
+  %x.sroa.3.0.extract.trunc = trunc i64 %x.sroa.3.0.extract.shift to i16
+  %tobool6 = icmp eq i16 %x.sroa.3.0.extract.trunc, 0
+; CHECK: cbz
+  br i1 %tobool6, label %if.end13, label %if.then7
+
+; OPT-LABEL: if.then7
+if.then7:                                         ; preds = %if.end
+; OPT: lshr
+; "and" should be combined to "ubfm" while "ubfm" should be removed by cse. 
+; So neither of them should be in the assemble code. 
+; CHECK-NOT: and
+; CHECK-NOT: ubfm
+  %idxprom10 = and i64 %x.sroa.3.0.extract.shift, 65535
+  %arrayidx11 = getelementptr inbounds [65536 x i8]* @first_ones, i64 0, i64 %idxprom10
+  %1 = load i8* %arrayidx11, align 1
+  %conv12 = zext i8 %1 to i32
+  %add = add nsw i32 %conv12, 16
+  br label %return
+
+; OPT-LABEL: if.end13
+if.end13:                                         ; preds = %if.end
+; OPT: lshr
+; OPT: trunc
+; CHECK: ubfx	[[REG3:x[0-9]+]], [[REG4:x[0-9]+]], #16, #16
+  %tobool16 = icmp eq i16 %x.sroa.1.0.extract.trunc, 0
+; CHECK: cbz
+  br i1 %tobool16, label %return, label %if.then17
+
+; OPT-LABEL: if.then17
+if.then17:                                        ; preds = %if.end13
+; OPT: lshr
+; "and" should be combined to "ubfm" while "ubfm" should be removed by cse. 
+; So neither of them should be in the assemble code. 
+; CHECK-NOT: and
+; CHECK-NOT: ubfm
+  %idxprom20 = and i64 %x.sroa.1.0.extract.shift, 65535
+  %arrayidx21 = getelementptr inbounds [65536 x i8]* @first_ones, i64 0, i64 %idxprom20
+  %2 = load i8* %arrayidx21, align 1
+  %conv22 = zext i8 %2 to i32
+  %add23 = add nsw i32 %conv22, 32
+  br label %return
+
+return:                                           ; preds = %if.end13, %if.then17, %if.then7, %if.then
+; CHECK: ret
+  %retval.0 = phi i32 [ %conv, %if.then ], [ %add, %if.then7 ], [ %add23, %if.then17 ], [ 64, %if.end13 ]
+  ret i32 %retval.0
+}
+
+; Make sure we do not assert if the immediate in and is bigger than i64.
+; PR19503.
+; OPT-LABEL: @fct20
+; OPT: lshr
+; OPT-NOT: lshr
+; OPT: ret
+; CHECK-LABEL: fct20:
+; CHECK: ret
+define i80 @fct20(i128 %a, i128 %b) {
+entry:
+  %shr = lshr i128 %a, 18
+  %conv = trunc i128 %shr to i80
+  %tobool = icmp eq i128 %b, 0
+  br i1 %tobool, label %then, label %end
+then:                     
+  %and = and i128 %shr, 483673642326615442599424
+  %conv2 = trunc i128 %and to i80
+  br label %end
+end:
+  %conv3 = phi i80 [%conv, %entry], [%conv2, %then] 
+  ret i80 %conv3
+}
+
+; Check if we can still catch UBFX when "AND" is used by SHL.
+; CHECK-LABEL: fct21:
+; CHECK: ubfx
+@arr = external global [8 x [64 x i64]]
+define i64 @fct21(i64 %x) {
+entry:
+  %shr = lshr i64 %x, 4
+  %and = and i64 %shr, 15
+  %arrayidx = getelementptr inbounds [8 x [64 x i64]]* @arr, i64 0, i64 0, i64 %and
+  %0 = load i64* %arrayidx, align 8
+  ret i64 %0
+}
+
+define i16 @test_ignored_rightbits(i32 %dst, i32 %in) {
+; CHECK-LABEL: test_ignored_rightbits:
+
+  %positioned_field = shl i32 %in, 3
+  %positioned_masked_field = and i32 %positioned_field, 120
+  %masked_dst = and i32 %dst, 7
+  %insertion = or i32 %masked_dst, %positioned_masked_field
+; CHECK: {{bfm|bfi|bfxil}}
+
+  %shl16 = shl i32 %insertion, 8
+  %or18 = or i32 %shl16, %insertion
+  %conv19 = trunc i32 %or18 to i16
+; CHECK: bfi {{w[0-9]+}}, {{w[0-9]+}}, #8, #7
+
+  ret i16 %conv19
+}
diff --git a/test/CodeGen/ARM64/blockaddress.ll b/test/CodeGen/AArch64/arm64-blockaddress.ll
index ac4f19e..ac4f19e 100644
--- a/test/CodeGen/ARM64/blockaddress.ll
+++ b/test/CodeGen/AArch64/arm64-blockaddress.ll
diff --git a/test/CodeGen/AArch64/arm64-build-vector.ll b/test/CodeGen/AArch64/arm64-build-vector.ll
new file mode 100644
index 0000000..c109263
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-build-vector.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+; Check that building up a vector w/ only one non-zero lane initializes
+; intelligently.
+define void @one_lane(i32* nocapture %out_int, i32 %skip0) nounwind {
+; CHECK-LABEL: one_lane:
+; CHECK: dup.16b v[[REG:[0-9]+]], wzr
+; CHECK-NEXT: ins.b v[[REG]][0], w1
+; v and q are aliases, and str is preferred against st.16b when possible
+; rdar://11246289
+; CHECK: str q[[REG]], [x0]
+; CHECK: ret
+  %conv = trunc i32 %skip0 to i8
+  %vset_lane = insertelement <16 x i8> <i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, i8 %conv, i32 0
+  %tmp = bitcast i32* %out_int to <4 x i32>*
+  %tmp1 = bitcast <16 x i8> %vset_lane to <4 x i32>
+  store <4 x i32> %tmp1, <4 x i32>* %tmp, align 16
+  ret void
+}
+
+; Check that building a vector from floats doesn't insert an unnecessary
+; copy for lane zero.
+define <4 x float>  @foo(float %a, float %b, float %c, float %d) nounwind {
+; CHECK-LABEL: foo:
+; CHECK-NOT: ins.s v0[0], v0[0]
+; CHECK: ins.s v0[1], v1[0]
+; CHECK: ins.s v0[2], v2[0]
+; CHECK: ins.s v0[3], v3[0]
+; CHECK: ret
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float %b, i32 1
+  %3 = insertelement <4 x float> %2, float %c, i32 2
+  %4 = insertelement <4 x float> %3, float %d, i32 3
+  ret <4 x float> %4
+}
diff --git a/test/CodeGen/ARM64/call-tailcalls.ll b/test/CodeGen/AArch64/arm64-call-tailcalls.ll
index 487c1d9..487c1d9 100644
--- a/test/CodeGen/ARM64/call-tailcalls.ll
+++ b/test/CodeGen/AArch64/arm64-call-tailcalls.ll
diff --git a/test/CodeGen/AArch64/arm64-cast-opt.ll b/test/CodeGen/AArch64/arm64-cast-opt.ll
new file mode 100644
index 0000000..65a871d
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-cast-opt.ll
@@ -0,0 +1,31 @@
+; RUN: llc -O3 -march=arm64 -mtriple arm64-apple-ios5.0.0 < %s | FileCheck %s
+; <rdar://problem/15992732>
+; Zero truncation is not necessary when the values are extended properly
+; already.
+
+@block = common global i8* null, align 8
+
+define zeroext i8 @foo(i32 %i1, i32 %i2) {
+; CHECK-LABEL: foo:
+; CHECK: cset
+; CHECK-NOT: and
+entry:
+  %idxprom = sext i32 %i1 to i64
+  %0 = load i8** @block, align 8
+  %arrayidx = getelementptr inbounds i8* %0, i64 %idxprom
+  %1 = load i8* %arrayidx, align 1
+  %idxprom1 = sext i32 %i2 to i64
+  %arrayidx2 = getelementptr inbounds i8* %0, i64 %idxprom1
+  %2 = load i8* %arrayidx2, align 1
+  %cmp = icmp eq i8 %1, %2
+  br i1 %cmp, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %cmp7 = icmp ugt i8 %1, %2
+  %conv9 = zext i1 %cmp7 to i8
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i8 [ %conv9, %if.then ], [ 1, %entry ]
+  ret i8 %retval.0
+}
diff --git a/test/CodeGen/AArch64/arm64-ccmp-heuristics.ll b/test/CodeGen/AArch64/arm64-ccmp-heuristics.ll
new file mode 100644
index 0000000..664a26c
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-ccmp-heuristics.ll
@@ -0,0 +1,190 @@
+; RUN: llc < %s -mcpu=cyclone -verify-machineinstrs -aarch64-ccmp | FileCheck %s
+target triple = "arm64-apple-ios7.0.0"
+
+@channelColumns = external global i64
+@channelTracks = external global i64
+@mazeRoute = external hidden unnamed_addr global i8*, align 8
+@TOP = external global i64*
+@BOT = external global i64*
+@netsAssign = external global i64*
+
+; Function from yacr2/maze.c
+; The branch at the end of %if.then is driven by %cmp5 and %cmp6.
+; Isel converts the and i1 into two branches, and arm64-ccmp should not convert
+; it back again. %cmp6 has much higher latency than %cmp5.
+; CHECK: Maze1
+; CHECK: %if.then
+; CHECK: cmp x{{[0-9]+}}, #2
+; CHECK-NEXT b.cc
+; CHECK: %if.then
+; CHECK: cmp x{{[0-9]+}}, #2
+; CHECK-NEXT b.cc
+define i32 @Maze1() nounwind ssp {
+entry:
+  %0 = load i64* @channelColumns, align 8, !tbaa !0
+  %cmp90 = icmp eq i64 %0, 0
+  br i1 %cmp90, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.inc, %entry
+  %1 = phi i64 [ %0, %entry ], [ %37, %for.inc ]
+  %i.092 = phi i64 [ 1, %entry ], [ %inc53, %for.inc ]
+  %numLeft.091 = phi i32 [ 0, %entry ], [ %numLeft.1, %for.inc ]
+  %2 = load i8** @mazeRoute, align 8, !tbaa !3
+  %arrayidx = getelementptr inbounds i8* %2, i64 %i.092
+  %3 = load i8* %arrayidx, align 1, !tbaa !1
+  %tobool = icmp eq i8 %3, 0
+  br i1 %tobool, label %for.inc, label %if.then
+
+if.then:                                          ; preds = %for.body
+  %4 = load i64** @TOP, align 8, !tbaa !3
+  %arrayidx1 = getelementptr inbounds i64* %4, i64 %i.092
+  %5 = load i64* %arrayidx1, align 8, !tbaa !0
+  %6 = load i64** @netsAssign, align 8, !tbaa !3
+  %arrayidx2 = getelementptr inbounds i64* %6, i64 %5
+  %7 = load i64* %arrayidx2, align 8, !tbaa !0
+  %8 = load i64** @BOT, align 8, !tbaa !3
+  %arrayidx3 = getelementptr inbounds i64* %8, i64 %i.092
+  %9 = load i64* %arrayidx3, align 8, !tbaa !0
+  %arrayidx4 = getelementptr inbounds i64* %6, i64 %9
+  %10 = load i64* %arrayidx4, align 8, !tbaa !0
+  %cmp5 = icmp ugt i64 %i.092, 1
+  %cmp6 = icmp ugt i64 %10, 1
+  %or.cond = and i1 %cmp5, %cmp6
+  br i1 %or.cond, label %land.lhs.true7, label %if.else
+
+land.lhs.true7:                                   ; preds = %if.then
+  %11 = load i64* @channelTracks, align 8, !tbaa !0
+  %add = add i64 %11, 1
+  %call = tail call fastcc i32 @Maze1Mech(i64 %i.092, i64 %add, i64 %10, i64 0, i64 %7, i32 -1, i32 -1)
+  %tobool8 = icmp eq i32 %call, 0
+  br i1 %tobool8, label %land.lhs.true7.if.else_crit_edge, label %if.then9
+
+land.lhs.true7.if.else_crit_edge:                 ; preds = %land.lhs.true7
+  %.pre = load i64* @channelColumns, align 8, !tbaa !0
+  br label %if.else
+
+if.then9:                                         ; preds = %land.lhs.true7
+  %12 = load i8** @mazeRoute, align 8, !tbaa !3
+  %arrayidx10 = getelementptr inbounds i8* %12, i64 %i.092
+  store i8 0, i8* %arrayidx10, align 1, !tbaa !1
+  %13 = load i64** @TOP, align 8, !tbaa !3
+  %arrayidx11 = getelementptr inbounds i64* %13, i64 %i.092
+  %14 = load i64* %arrayidx11, align 8, !tbaa !0
+  tail call fastcc void @CleanNet(i64 %14)
+  %15 = load i64** @BOT, align 8, !tbaa !3
+  %arrayidx12 = getelementptr inbounds i64* %15, i64 %i.092
+  %16 = load i64* %arrayidx12, align 8, !tbaa !0
+  tail call fastcc void @CleanNet(i64 %16)
+  br label %for.inc
+
+if.else:                                          ; preds = %land.lhs.true7.if.else_crit_edge, %if.then
+  %17 = phi i64 [ %.pre, %land.lhs.true7.if.else_crit_edge ], [ %1, %if.then ]
+  %cmp13 = icmp ult i64 %i.092, %17
+  %or.cond89 = and i1 %cmp13, %cmp6
+  br i1 %or.cond89, label %land.lhs.true16, label %if.else24
+
+land.lhs.true16:                                  ; preds = %if.else
+  %18 = load i64* @channelTracks, align 8, !tbaa !0
+  %add17 = add i64 %18, 1
+  %call18 = tail call fastcc i32 @Maze1Mech(i64 %i.092, i64 %add17, i64 %10, i64 0, i64 %7, i32 1, i32 -1)
+  %tobool19 = icmp eq i32 %call18, 0
+  br i1 %tobool19, label %if.else24, label %if.then20
+
+if.then20:                                        ; preds = %land.lhs.true16
+  %19 = load i8** @mazeRoute, align 8, !tbaa !3
+  %arrayidx21 = getelementptr inbounds i8* %19, i64 %i.092
+  store i8 0, i8* %arrayidx21, align 1, !tbaa !1
+  %20 = load i64** @TOP, align 8, !tbaa !3
+  %arrayidx22 = getelementptr inbounds i64* %20, i64 %i.092
+  %21 = load i64* %arrayidx22, align 8, !tbaa !0
+  tail call fastcc void @CleanNet(i64 %21)
+  %22 = load i64** @BOT, align 8, !tbaa !3
+  %arrayidx23 = getelementptr inbounds i64* %22, i64 %i.092
+  %23 = load i64* %arrayidx23, align 8, !tbaa !0
+  tail call fastcc void @CleanNet(i64 %23)
+  br label %for.inc
+
+if.else24:                                        ; preds = %land.lhs.true16, %if.else
+  br i1 %cmp5, label %land.lhs.true26, label %if.else36
+
+land.lhs.true26:                                  ; preds = %if.else24
+  %24 = load i64* @channelTracks, align 8, !tbaa !0
+  %cmp27 = icmp ult i64 %7, %24
+  br i1 %cmp27, label %land.lhs.true28, label %if.else36
+
+land.lhs.true28:                                  ; preds = %land.lhs.true26
+  %add29 = add i64 %24, 1
+  %call30 = tail call fastcc i32 @Maze1Mech(i64 %i.092, i64 0, i64 %7, i64 %add29, i64 %10, i32 -1, i32 1)
+  %tobool31 = icmp eq i32 %call30, 0
+  br i1 %tobool31, label %if.else36, label %if.then32
+
+if.then32:                                        ; preds = %land.lhs.true28
+  %25 = load i8** @mazeRoute, align 8, !tbaa !3
+  %arrayidx33 = getelementptr inbounds i8* %25, i64 %i.092
+  store i8 0, i8* %arrayidx33, align 1, !tbaa !1
+  %26 = load i64** @TOP, align 8, !tbaa !3
+  %arrayidx34 = getelementptr inbounds i64* %26, i64 %i.092
+  %27 = load i64* %arrayidx34, align 8, !tbaa !0
+  tail call fastcc void @CleanNet(i64 %27)
+  %28 = load i64** @BOT, align 8, !tbaa !3
+  %arrayidx35 = getelementptr inbounds i64* %28, i64 %i.092
+  %29 = load i64* %arrayidx35, align 8, !tbaa !0
+  tail call fastcc void @CleanNet(i64 %29)
+  br label %for.inc
+
+if.else36:                                        ; preds = %land.lhs.true28, %land.lhs.true26, %if.else24
+  %30 = load i64* @channelColumns, align 8, !tbaa !0
+  %cmp37 = icmp ult i64 %i.092, %30
+  br i1 %cmp37, label %land.lhs.true38, label %if.else48
+
+land.lhs.true38:                                  ; preds = %if.else36
+  %31 = load i64* @channelTracks, align 8, !tbaa !0
+  %cmp39 = icmp ult i64 %7, %31
+  br i1 %cmp39, label %land.lhs.true40, label %if.else48
+
+land.lhs.true40:                                  ; preds = %land.lhs.true38
+  %add41 = add i64 %31, 1
+  %call42 = tail call fastcc i32 @Maze1Mech(i64 %i.092, i64 0, i64 %7, i64 %add41, i64 %10, i32 1, i32 1)
+  %tobool43 = icmp eq i32 %call42, 0
+  br i1 %tobool43, label %if.else48, label %if.then44
+
+if.then44:                                        ; preds = %land.lhs.true40
+  %32 = load i8** @mazeRoute, align 8, !tbaa !3
+  %arrayidx45 = getelementptr inbounds i8* %32, i64 %i.092
+  store i8 0, i8* %arrayidx45, align 1, !tbaa !1
+  %33 = load i64** @TOP, align 8, !tbaa !3
+  %arrayidx46 = getelementptr inbounds i64* %33, i64 %i.092
+  %34 = load i64* %arrayidx46, align 8, !tbaa !0
+  tail call fastcc void @CleanNet(i64 %34)
+  %35 = load i64** @BOT, align 8, !tbaa !3
+  %arrayidx47 = getelementptr inbounds i64* %35, i64 %i.092
+  %36 = load i64* %arrayidx47, align 8, !tbaa !0
+  tail call fastcc void @CleanNet(i64 %36)
+  br label %for.inc
+
+if.else48:                                        ; preds = %land.lhs.true40, %land.lhs.true38, %if.else36
+  %inc = add nsw i32 %numLeft.091, 1
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.else48, %if.then44, %if.then32, %if.then20, %if.then9, %for.body
+  %numLeft.1 = phi i32 [ %numLeft.091, %if.then9 ], [ %numLeft.091, %if.then20 ], [ %numLeft.091, %if.then32 ], [ %numLeft.091, %if.then44 ], [ %inc, %if.else48 ], [ %numLeft.091, %for.body ]
+  %inc53 = add i64 %i.092, 1
+  %37 = load i64* @channelColumns, align 8, !tbaa !0
+  %cmp = icmp ugt i64 %inc53, %37
+  br i1 %cmp, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.inc, %entry
+  %numLeft.0.lcssa = phi i32 [ 0, %entry ], [ %numLeft.1, %for.inc ]
+  ret i32 %numLeft.0.lcssa
+}
+
+; Materializable
+declare hidden fastcc i32 @Maze1Mech(i64, i64, i64, i64, i64, i32, i32) nounwind ssp
+
+; Materializable
+declare hidden fastcc void @CleanNet(i64) nounwind ssp
+
+!0 = metadata !{metadata !"long", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"any pointer", metadata !1}
diff --git a/test/CodeGen/AArch64/arm64-ccmp.ll b/test/CodeGen/AArch64/arm64-ccmp.ll
new file mode 100644
index 0000000..63965f9
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-ccmp.ll
@@ -0,0 +1,289 @@
+; RUN: llc < %s -mcpu=cyclone -verify-machineinstrs -aarch64-ccmp -aarch64-stress-ccmp | FileCheck %s
+target triple = "arm64-apple-ios"
+
+; CHECK: single_same
+; CHECK: cmp w0, #5
+; CHECK-NEXT: ccmp w1, #17, #4, ne
+; CHECK-NEXT: b.ne
+; CHECK: %if.then
+; CHECK: bl _foo
+; CHECK: %if.end
+define i32 @single_same(i32 %a, i32 %b) nounwind ssp {
+entry:
+  %cmp = icmp eq i32 %a, 5
+  %cmp1 = icmp eq i32 %b, 17
+  %or.cond = or i1 %cmp, %cmp1
+  br i1 %or.cond, label %if.then, label %if.end
+
+if.then:
+  %call = tail call i32 @foo() nounwind
+  br label %if.end
+
+if.end:
+  ret i32 7
+}
+
+; Different condition codes for the two compares.
+; CHECK: single_different
+; CHECK: cmp w0, #6
+; CHECK-NEXT: ccmp w1, #17, #0, ge
+; CHECK-NEXT: b.eq
+; CHECK: %if.then
+; CHECK: bl _foo
+; CHECK: %if.end
+define i32 @single_different(i32 %a, i32 %b) nounwind ssp {
+entry:
+  %cmp = icmp sle i32 %a, 5
+  %cmp1 = icmp ne i32 %b, 17
+  %or.cond = or i1 %cmp, %cmp1
+  br i1 %or.cond, label %if.then, label %if.end
+
+if.then:
+  %call = tail call i32 @foo() nounwind
+  br label %if.end
+
+if.end:
+  ret i32 7
+}
+
+; Second block clobbers the flags, can't convert (easily).
+; CHECK: single_flagclobber
+; CHECK: cmp
+; CHECK: b.eq
+; CHECK: cmp
+; CHECK: b.gt
+define i32 @single_flagclobber(i32 %a, i32 %b) nounwind ssp {
+entry:
+  %cmp = icmp eq i32 %a, 5
+  br i1 %cmp, label %if.then, label %lor.lhs.false
+
+lor.lhs.false:                                    ; preds = %entry
+  %cmp1 = icmp slt i32 %b, 7
+  %mul = shl nsw i32 %b, 1
+  %add = add nsw i32 %b, 1
+  %cond = select i1 %cmp1, i32 %mul, i32 %add
+  %cmp2 = icmp slt i32 %cond, 17
+  br i1 %cmp2, label %if.then, label %if.end
+
+if.then:                                          ; preds = %lor.lhs.false, %entry
+  %call = tail call i32 @foo() nounwind
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %lor.lhs.false
+  ret i32 7
+}
+
+; Second block clobbers the flags and ends with a tbz terminator.
+; CHECK: single_flagclobber_tbz
+; CHECK: cmp
+; CHECK: b.eq
+; CHECK: cmp
+; CHECK: tbz
+define i32 @single_flagclobber_tbz(i32 %a, i32 %b) nounwind ssp {
+entry:
+  %cmp = icmp eq i32 %a, 5
+  br i1 %cmp, label %if.then, label %lor.lhs.false
+
+lor.lhs.false:                                    ; preds = %entry
+  %cmp1 = icmp slt i32 %b, 7
+  %mul = shl nsw i32 %b, 1
+  %add = add nsw i32 %b, 1
+  %cond = select i1 %cmp1, i32 %mul, i32 %add
+  %and = and i32 %cond, 8
+  %cmp2 = icmp ne i32 %and, 0
+  br i1 %cmp2, label %if.then, label %if.end
+
+if.then:                                          ; preds = %lor.lhs.false, %entry
+  %call = tail call i32 @foo() nounwind
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %lor.lhs.false
+  ret i32 7
+}
+
+; Speculatively execute division by zero.
+; The sdiv/udiv instructions do not trap when the divisor is zero, so they are
+; safe to speculate.
+; CHECK: speculate_division
+; CHECK-NOT: cmp
+; CHECK: sdiv
+; CHECK: cmp
+; CHECK-NEXT: ccmp
+define i32 @speculate_division(i32 %a, i32 %b) nounwind ssp {
+entry:
+  %cmp = icmp sgt i32 %a, 0
+  br i1 %cmp, label %land.lhs.true, label %if.end
+
+land.lhs.true:
+  %div = sdiv i32 %b, %a
+  %cmp1 = icmp slt i32 %div, 17
+  br i1 %cmp1, label %if.then, label %if.end
+
+if.then:
+  %call = tail call i32 @foo() nounwind
+  br label %if.end
+
+if.end:
+  ret i32 7
+}
+
+; Floating point compare.
+; CHECK: single_fcmp
+; CHECK: cmp
+; CHECK-NOT: b.
+; CHECK: fccmp {{.*}}, #8, ge
+; CHECK: b.lt
+define i32 @single_fcmp(i32 %a, float %b) nounwind ssp {
+entry:
+  %cmp = icmp sgt i32 %a, 0
+  br i1 %cmp, label %land.lhs.true, label %if.end
+
+land.lhs.true:
+  %conv = sitofp i32 %a to float
+  %div = fdiv float %b, %conv
+  %cmp1 = fcmp oge float %div, 1.700000e+01
+  br i1 %cmp1, label %if.then, label %if.end
+
+if.then:
+  %call = tail call i32 @foo() nounwind
+  br label %if.end
+
+if.end:
+  ret i32 7
+}
+
+; Chain multiple compares.
+; CHECK: multi_different
+; CHECK: cmp
+; CHECK: ccmp
+; CHECK: ccmp
+; CHECK: b.
+define void @multi_different(i32 %a, i32 %b, i32 %c) nounwind ssp {
+entry:
+  %cmp = icmp sgt i32 %a, %b
+  br i1 %cmp, label %land.lhs.true, label %if.end
+
+land.lhs.true:
+  %div = sdiv i32 %b, %a
+  %cmp1 = icmp eq i32 %div, 5
+  %cmp4 = icmp sgt i32 %div, %c
+  %or.cond = and i1 %cmp1, %cmp4
+  br i1 %or.cond, label %if.then, label %if.end
+
+if.then:
+  %call = tail call i32 @foo() nounwind
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+; Convert a cbz in the head block.
+; CHECK: cbz_head
+; CHECK: cmp w0, #0
+; CHECK: ccmp
+define i32 @cbz_head(i32 %a, i32 %b) nounwind ssp {
+entry:
+  %cmp = icmp eq i32 %a, 0
+  %cmp1 = icmp ne i32 %b, 17
+  %or.cond = or i1 %cmp, %cmp1
+  br i1 %or.cond, label %if.then, label %if.end
+
+if.then:
+  %call = tail call i32 @foo() nounwind
+  br label %if.end
+
+if.end:
+  ret i32 7
+}
+
+; Check that the immediate operand is in range. The ccmp instruction encodes a
+; smaller range of immediates than subs/adds.
+; The ccmp immediates must be in the range 0-31.
+; CHECK: immediate_range
+; CHECK-NOT: ccmp
+define i32 @immediate_range(i32 %a, i32 %b) nounwind ssp {
+entry:
+  %cmp = icmp eq i32 %a, 5
+  %cmp1 = icmp eq i32 %b, 32
+  %or.cond = or i1 %cmp, %cmp1
+  br i1 %or.cond, label %if.then, label %if.end
+
+if.then:
+  %call = tail call i32 @foo() nounwind
+  br label %if.end
+
+if.end:
+  ret i32 7
+}
+
+; Convert a cbz in the second block.
+; CHECK: cbz_second
+; CHECK: cmp w0, #0
+; CHECK: ccmp w1, #0, #0, ne
+; CHECK: b.eq
+define i32 @cbz_second(i32 %a, i32 %b) nounwind ssp {
+entry:
+  %cmp = icmp eq i32 %a, 0
+  %cmp1 = icmp ne i32 %b, 0
+  %or.cond = or i1 %cmp, %cmp1
+  br i1 %or.cond, label %if.then, label %if.end
+
+if.then:
+  %call = tail call i32 @foo() nounwind
+  br label %if.end
+
+if.end:
+  ret i32 7
+}
+
+; Convert a cbnz in the second block.
+; CHECK: cbnz_second
+; CHECK: cmp w0, #0
+; CHECK: ccmp w1, #0, #4, ne
+; CHECK: b.ne
+define i32 @cbnz_second(i32 %a, i32 %b) nounwind ssp {
+entry:
+  %cmp = icmp eq i32 %a, 0
+  %cmp1 = icmp eq i32 %b, 0
+  %or.cond = or i1 %cmp, %cmp1
+  br i1 %or.cond, label %if.then, label %if.end
+
+if.then:
+  %call = tail call i32 @foo() nounwind
+  br label %if.end
+
+if.end:
+  ret i32 7
+}
+declare i32 @foo()
+
+%str1 = type { %str2 }
+%str2 = type { [24 x i8], i8*, i32, %str1*, i32, [4 x i8], %str1*, %str1*, %str1*, %str1*, %str1*, %str1*, %str1*, %str1*, %str1*, i8*, i8, i8*, %str1*, i8* }
+
+; Test case distilled from 126.gcc.
+; The phi in sw.bb.i.i gets multiple operands for the %entry predecessor.
+; CHECK: build_modify_expr
+define void @build_modify_expr() nounwind ssp {
+entry:
+  switch i32 undef, label %sw.bb.i.i [
+    i32 69, label %if.end85
+    i32 70, label %if.end85
+    i32 71, label %if.end85
+    i32 72, label %if.end85
+    i32 73, label %if.end85
+    i32 105, label %if.end85
+    i32 106, label %if.end85
+  ]
+
+if.end85:
+  ret void
+
+sw.bb.i.i:
+  %ref.tr.i.i = phi %str1* [ %0, %sw.bb.i.i ], [ undef, %entry ]
+  %operands.i.i = getelementptr inbounds %str1* %ref.tr.i.i, i64 0, i32 0, i32 2
+  %arrayidx.i.i = bitcast i32* %operands.i.i to %str1**
+  %0 = load %str1** %arrayidx.i.i, align 8
+  %code1.i.i.phi.trans.insert = getelementptr inbounds %str1* %0, i64 0, i32 0, i32 0, i64 16
+  br label %sw.bb.i.i
+}
diff --git a/test/CodeGen/AArch64/arm64-clrsb.ll b/test/CodeGen/AArch64/arm64-clrsb.ll
new file mode 100644
index 0000000..042e52e
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-clrsb.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -march=arm64 |  FileCheck %s
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-ios7.0.0"
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.ctlz.i32(i32, i1) #0
+declare i64 @llvm.ctlz.i64(i64, i1) #1
+
+; Function Attrs: nounwind ssp
+define i32 @clrsb32(i32 %x) #2 {
+entry:
+  %shr = ashr i32 %x, 31
+  %xor = xor i32 %shr, %x
+  %mul = shl i32 %xor, 1
+  %add = or i32 %mul, 1
+  %0 = tail call i32 @llvm.ctlz.i32(i32 %add, i1 false)
+
+  ret i32 %0
+; CHECK-LABEL: clrsb32
+; CHECK:   cls [[TEMP:w[0-9]+]], [[TEMP]]
+}
+
+; Function Attrs: nounwind ssp
+define i64 @clrsb64(i64 %x) #3 {
+entry:
+  %shr = ashr i64 %x, 63
+  %xor = xor i64 %shr, %x
+  %mul = shl nsw i64 %xor, 1
+  %add = or i64 %mul, 1
+  %0 = tail call i64 @llvm.ctlz.i64(i64 %add, i1 false)
+
+  ret i64 %0
+; CHECK-LABEL: clrsb64
+; CHECK:   cls [[TEMP:x[0-9]+]], [[TEMP]]
+}
diff --git a/test/CodeGen/AArch64/arm64-coalesce-ext.ll b/test/CodeGen/AArch64/arm64-coalesce-ext.ll
new file mode 100644
index 0000000..9420bf3
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-coalesce-ext.ll
@@ -0,0 +1,17 @@
+; RUN: llc -march=arm64 -mtriple=arm64-apple-darwin < %s | FileCheck %s
+; Check that the peephole optimizer knows about sext and zext instructions.
+; CHECK: test1sext
+define i32 @test1sext(i64 %A, i64 %B, i32* %P, i64 *%P2) nounwind {
+  %C = add i64 %A, %B
+  ; CHECK: add x[[SUM:[0-9]+]], x0, x1
+  %D = trunc i64 %C to i32
+  %E = shl i64 %C, 32
+  %F = ashr i64 %E, 32
+  ; CHECK: sxtw x[[EXT:[0-9]+]], w[[SUM]]
+  store volatile i64 %F, i64 *%P2
+  ; CHECK: str x[[EXT]]
+  store volatile i32 %D, i32* %P
+  ; Reuse low bits of extended register, don't extend live range of SUM.
+  ; CHECK: str w[[SUM]]
+  ret i32 %D
+}
diff --git a/test/CodeGen/ARM64/code-model-large-abs.ll b/test/CodeGen/AArch64/arm64-code-model-large-abs.ll
index 264da2d..264da2d 100644
--- a/test/CodeGen/ARM64/code-model-large-abs.ll
+++ b/test/CodeGen/AArch64/arm64-code-model-large-abs.ll
diff --git a/test/CodeGen/AArch64/arm64-collect-loh-garbage-crash.ll b/test/CodeGen/AArch64/arm64-collect-loh-garbage-crash.ll
new file mode 100644
index 0000000..81cee38
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-collect-loh-garbage-crash.ll
@@ -0,0 +1,37 @@
+; RUN: llc -mtriple=arm64-apple-ios -O3 -aarch64-collect-loh -aarch64-collect-loh-bb-only=true -aarch64-collect-loh-pre-collect-register=false < %s -o - | FileCheck %s
+; Check that the LOH analysis does not crash when the analysed chained
+; contains instructions that are filtered out.
+;
+; Before the fix for <rdar://problem/16041712>, these cases were removed
+; from the main container. Now, the deterministic container does not allow
+; to remove arbitrary values, so we have to live with garbage values.
+; <rdar://problem/16041712>
+
+%"class.H4ISP::H4ISPDevice" = type { i32 (%"class.H4ISP::H4ISPDevice"*, i32, i8*, i8*)*, i8*, i32*, %"class.H4ISP::H4ISPCameraManager"* }
+
+%"class.H4ISP::H4ISPCameraManager" = type opaque
+
+declare i32 @_ZN5H4ISP11H4ISPDevice32ISP_SelectBestMIPIFrequencyIndexEjPj(%"class.H4ISP::H4ISPDevice"*)
+
+@pH4ISPDevice = hidden global %"class.H4ISP::H4ISPDevice"* null, align 8
+
+; CHECK-LABEL: _foo:
+; CHECK: ret
+; CHECK-NOT: .loh AdrpLdrGotLdr
+define void @foo() {
+entry:
+  br label %if.then83
+if.then83:                                        ; preds = %if.end81
+  %tmp = load %"class.H4ISP::H4ISPDevice"** @pH4ISPDevice, align 8
+  %call84 = call i32 @_ZN5H4ISP11H4ISPDevice32ISP_SelectBestMIPIFrequencyIndexEjPj(%"class.H4ISP::H4ISPDevice"* %tmp) #19
+  tail call void asm sideeffect "", "~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27}"()
+  %tmp2 = load %"class.H4ISP::H4ISPDevice"** @pH4ISPDevice, align 8
+  tail call void asm sideeffect "", "~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x28}"()
+  %pCameraManager.i268 = getelementptr inbounds %"class.H4ISP::H4ISPDevice"* %tmp2, i64 0, i32 3
+  %tmp3 = load %"class.H4ISP::H4ISPCameraManager"** %pCameraManager.i268, align 8
+  %tobool.i269 = icmp eq %"class.H4ISP::H4ISPCameraManager"* %tmp3, null
+  br i1 %tobool.i269, label %if.then83, label %end
+end:
+  ret void
+}
+
diff --git a/test/CodeGen/AArch64/arm64-collect-loh-str.ll b/test/CodeGen/AArch64/arm64-collect-loh-str.ll
new file mode 100644
index 0000000..d7bc00e
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-collect-loh-str.ll
@@ -0,0 +1,23 @@
+; RUN: llc -mtriple=arm64-apple-ios -O2 -aarch64-collect-loh -aarch64-collect-loh-bb-only=false < %s -o - | FileCheck %s
+; Test case for <rdar://problem/15942912>.
+; AdrpAddStr cannot be used when the store uses same
+; register as address and value. Indeed, the related
+; if applied, may completely remove the definition or
+; at least provide a wrong one (with the offset folded
+; into the definition).
+
+%struct.anon = type { i32*, i32** }
+
+@pptp_wan_head = internal global %struct.anon zeroinitializer, align 8
+
+; CHECK-LABEL: _pptp_wan_init
+; CHECK: ret
+; CHECK-NOT: AdrpAddStr
+define i32 @pptp_wan_init() {
+entry:
+  store i32* null, i32** getelementptr inbounds (%struct.anon* @pptp_wan_head, i64 0, i32 0), align 8
+  store i32** getelementptr inbounds (%struct.anon* @pptp_wan_head, i64 0, i32 0), i32*** getelementptr inbounds (%struct.anon* @pptp_wan_head, i64 0, i32 1), align 8
+  ret i32 0
+}
+
+
diff --git a/test/CodeGen/AArch64/arm64-collect-loh.ll b/test/CodeGen/AArch64/arm64-collect-loh.ll
new file mode 100644
index 0000000..6d73daa
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-collect-loh.ll
@@ -0,0 +1,53 @@
+; RUN: llc -mtriple=arm64-apple-ios -O2 -aarch64-collect-loh -aarch64-collect-loh-bb-only=false < %s -o - | FileCheck %s
+; RUN: llc -mtriple=arm64-linux-gnu -O2 -aarch64-collect-loh -aarch64-collect-loh-bb-only=false < %s -o - | FileCheck %s --check-prefix=CHECK-ELF
+
+; CHECK-ELF-NOT: .loh
+; CHECK-ELF-NOT: AdrpAdrp
+; CHECK-ELF-NOT: AdrpAdd
+; CHECK-ELF-NOT: AdrpLdrGot
+
+@a = internal unnamed_addr global i32 0, align 4
+@b = external global i32
+
+; Function Attrs: noinline nounwind ssp
+define void @foo(i32 %t) {
+entry:
+  %tmp = load i32* @a, align 4
+  %add = add nsw i32 %tmp, %t
+  store i32 %add, i32* @a, align 4
+  ret void
+}
+
+; Function Attrs: nounwind ssp
+; Testcase for <rdar://problem/15438605>, AdrpAdrp reuse is valid only when the first adrp
+; dominates the second.
+; The first adrp comes from the loading of 'a' and the second the loading of 'b'.
+; 'a' is loaded in if.then, 'b' in if.end4, if.then does not dominates if.end4.
+; CHECK-LABEL: _test
+; CHECK: ret
+; CHECK-NOT: .loh AdrpAdrp
+define i32 @test(i32 %t) {
+entry:
+  %cmp = icmp sgt i32 %t, 5
+  br i1 %cmp, label %if.then, label %if.end4
+
+if.then:                                          ; preds = %entry
+  %tmp = load i32* @a, align 4
+  %add = add nsw i32 %tmp, %t
+  %cmp1 = icmp sgt i32 %add, 12
+  br i1 %cmp1, label %if.then2, label %if.end4
+
+if.then2:                                         ; preds = %if.then
+  tail call void @foo(i32 %add)
+  %tmp1 = load i32* @a, align 4
+  br label %if.end4
+
+if.end4:                                          ; preds = %if.then2, %if.then, %entry
+  %t.addr.0 = phi i32 [ %tmp1, %if.then2 ], [ %t, %if.then ], [ %t, %entry ]
+  %tmp2 = load i32* @b, align 4
+  %add5 = add nsw i32 %tmp2, %t.addr.0
+  tail call void @foo(i32 %add5)
+  %tmp3 = load i32* @b, align 4
+  %add6 = add nsw i32 %tmp3, %t.addr.0
+  ret i32 %add6
+}
diff --git a/test/CodeGen/AArch64/arm64-complex-copy-noneon.ll b/test/CodeGen/AArch64/arm64-complex-copy-noneon.ll
new file mode 100644
index 0000000..f65b116
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-complex-copy-noneon.ll
@@ -0,0 +1,21 @@
+; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=-neon < %s
+
+; The DAG combiner decided to use a vector load/store for this struct copy
+; previously. This probably shouldn't happen without NEON, but the most
+; important thing is that it compiles.
+
+define void @store_combine() nounwind {
+  %src = alloca { double, double }, align 8
+  %dst = alloca { double, double }, align 8
+
+  %src.realp = getelementptr inbounds { double, double }* %src, i32 0, i32 0
+  %src.real = load double* %src.realp
+  %src.imagp = getelementptr inbounds { double, double }* %src, i32 0, i32 1
+  %src.imag = load double* %src.imagp
+
+  %dst.realp = getelementptr inbounds { double, double }* %dst, i32 0, i32 0
+  %dst.imagp = getelementptr inbounds { double, double }* %dst, i32 0, i32 1
+  store double %src.real, double* %dst.realp
+  store double %src.imag, double* %dst.imagp
+  ret void
+}
diff --git a/test/CodeGen/ARM64/complex-ret.ll b/test/CodeGen/AArch64/arm64-complex-ret.ll
index 93d50a5..93d50a5 100644
--- a/test/CodeGen/ARM64/complex-ret.ll
+++ b/test/CodeGen/AArch64/arm64-complex-ret.ll
diff --git a/test/CodeGen/AArch64/arm64-const-addr.ll b/test/CodeGen/AArch64/arm64-const-addr.ll
new file mode 100644
index 0000000..c55a922
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-const-addr.ll
@@ -0,0 +1,23 @@
+; RUN: llc -mtriple=arm64-darwin-unknown < %s | FileCheck %s
+
+%T = type { i32, i32, i32, i32 }
+
+; Test if the constant base address gets only materialized once.
+define i32 @test1() nounwind {
+; CHECK-LABEL:  test1
+; CHECK:        movz  w8, #0x40f, lsl #16
+; CHECK-NEXT:   movk  w8, #0xc000
+; CHECK-NEXT:   ldp w9, w10, [x8, #4]
+; CHECK:        ldr w8, [x8, #12]
+  %at = inttoptr i64 68141056 to %T*
+  %o1 = getelementptr %T* %at, i32 0, i32 1
+  %t1 = load i32* %o1
+  %o2 = getelementptr %T* %at, i32 0, i32 2
+  %t2 = load i32* %o2
+  %a1 = add i32 %t1, %t2
+  %o3 = getelementptr %T* %at, i32 0, i32 3
+  %t3 = load i32* %o3
+  %a2 = add i32 %a1, %t3
+  ret i32 %a2
+}
+
diff --git a/test/CodeGen/AArch64/arm64-convert-v2f64-v2i32.ll b/test/CodeGen/AArch64/arm64-convert-v2f64-v2i32.ll
new file mode 100644
index 0000000..d862b1e
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-convert-v2f64-v2i32.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+; CHECK: fptosi_1
+; CHECK: fcvtzs.2d
+; CHECK: xtn.2s
+; CHECK: ret
+define void @fptosi_1() nounwind noinline ssp {
+entry:
+  %0 = fptosi <2 x double> undef to <2 x i32>
+  store <2 x i32> %0, <2 x i32>* undef, align 8
+  ret void
+}
+
+; CHECK: fptoui_1
+; CHECK: fcvtzu.2d
+; CHECK: xtn.2s
+; CHECK: ret
+define void @fptoui_1() nounwind noinline ssp {
+entry:
+  %0 = fptoui <2 x double> undef to <2 x i32>
+  store <2 x i32> %0, <2 x i32>* undef, align 8
+  ret void
+}
+
diff --git a/test/CodeGen/AArch64/arm64-convert-v2i32-v2f64.ll b/test/CodeGen/AArch64/arm64-convert-v2i32-v2f64.ll
new file mode 100644
index 0000000..daaf1e0
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-convert-v2i32-v2f64.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+define <2 x double> @f1(<2 x i32> %v) nounwind readnone {
+; CHECK-LABEL: f1:
+; CHECK: sshll.2d v0, v0, #0
+; CHECK-NEXT: scvtf.2d v0, v0
+; CHECK-NEXT: ret
+  %conv = sitofp <2 x i32> %v to <2 x double>
+  ret <2 x double> %conv
+}
+define <2 x double> @f2(<2 x i32> %v) nounwind readnone {
+; CHECK-LABEL: f2:
+; CHECK: ushll.2d v0, v0, #0
+; CHECK-NEXT: ucvtf.2d v0, v0
+; CHECK-NEXT: ret
+  %conv = uitofp <2 x i32> %v to <2 x double>
+  ret <2 x double> %conv
+}
+
+; CHECK: autogen_SD19655
+; CHECK: scvtf
+; CHECK: ret
+define void @autogen_SD19655() {
+  %T = load <2 x i64>* undef
+  %F = sitofp <2 x i64> undef to <2 x float>
+  store <2 x float> %F, <2 x float>* undef
+  ret void
+}
+
diff --git a/test/CodeGen/AArch64/arm64-copy-tuple.ll b/test/CodeGen/AArch64/arm64-copy-tuple.ll
new file mode 100644
index 0000000..1803787
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-copy-tuple.ll
@@ -0,0 +1,146 @@
+; RUN: llc -mtriple=arm64-apple-ios -o - %s | FileCheck %s
+
+; The main purpose of this test is to find out whether copyPhysReg can deal with
+; the memmove-like situation arising in tuples, where an early copy can clobber
+; the value needed by a later one if the tuples overlap.
+
+; We use dummy inline asm to force LLVM to generate a COPY between the registers
+; we want by clobbering all the others.
+
+define void @test_D1D2_from_D0D1(i8* %addr) #0 {
+; CHECK-LABEL: test_D1D2_from_D0D1:
+; CHECK: mov.8b v2, v1
+; CHECK: mov.8b v1, v0
+entry:
+  %addr_v8i8 = bitcast i8* %addr to <8 x i8>*
+  %vec = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* %addr_v8i8)
+  %vec0 = extractvalue { <8 x i8>, <8 x i8> } %vec, 0
+  %vec1 = extractvalue { <8 x i8>, <8 x i8> } %vec, 1
+  tail call void asm sideeffect "", "~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+  tail call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr)
+
+  tail call void asm sideeffect "", "~{v0},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+  tail call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr)
+  ret void
+}
+
+define void @test_D0D1_from_D1D2(i8* %addr) #0 {
+; CHECK-LABEL: test_D0D1_from_D1D2:
+; CHECK: mov.8b v0, v1
+; CHECK: mov.8b v1, v2
+entry:
+  %addr_v8i8 = bitcast i8* %addr to <8 x i8>*
+  %vec = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* %addr_v8i8)
+  %vec0 = extractvalue { <8 x i8>, <8 x i8> } %vec, 0
+  %vec1 = extractvalue { <8 x i8>, <8 x i8> } %vec, 1
+  tail call void asm sideeffect "", "~{v0},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+  tail call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr)
+
+  tail call void asm sideeffect "", "~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+  tail call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr)
+  ret void
+}
+
+define void @test_D0D1_from_D31D0(i8* %addr) #0 {
+; CHECK-LABEL: test_D0D1_from_D31D0:
+; CHECK: mov.8b v1, v0
+; CHECK: mov.8b v0, v31
+entry:
+  %addr_v8i8 = bitcast i8* %addr to <8 x i8>*
+  %vec = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* %addr_v8i8)
+  %vec0 = extractvalue { <8 x i8>, <8 x i8> } %vec, 0
+  %vec1 = extractvalue { <8 x i8>, <8 x i8> } %vec, 1
+  tail call void asm sideeffect "", "~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30}"()
+  tail call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr)
+
+  tail call void asm sideeffect "", "~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+  tail call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr)
+  ret void
+}
+
+define void @test_D31D0_from_D0D1(i8* %addr) #0 {
+; CHECK-LABEL: test_D31D0_from_D0D1:
+; CHECK: mov.8b v31, v0
+; CHECK: mov.8b v0, v1
+entry:
+  %addr_v8i8 = bitcast i8* %addr to <8 x i8>*
+  %vec = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* %addr_v8i8)
+  %vec0 = extractvalue { <8 x i8>, <8 x i8> } %vec, 0
+  %vec1 = extractvalue { <8 x i8>, <8 x i8> } %vec, 1
+  tail call void asm sideeffect "", "~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+  tail call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr)
+
+  tail call void asm sideeffect "", "~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30}"()
+  tail call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr)
+  ret void
+}
+
+define void @test_D2D3D4_from_D0D1D2(i8* %addr) #0 {
+; CHECK-LABEL: test_D2D3D4_from_D0D1D2:
+; CHECK: mov.8b v4, v2
+; CHECK: mov.8b v3, v1
+; CHECK: mov.8b v2, v0
+entry:
+  %addr_v8i8 = bitcast i8* %addr to <8 x i8>*
+  %vec = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0v8i8(<8 x i8>* %addr_v8i8)
+  %vec0 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vec, 0
+  %vec1 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vec, 1
+  %vec2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vec, 2
+
+  tail call void asm sideeffect "", "~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+  tail call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, <8 x i8> %vec2, i8* %addr)
+
+  tail call void asm sideeffect "", "~{v0},~{v1},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+  tail call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, <8 x i8> %vec2, i8* %addr)
+  ret void
+}
+
+define void @test_Q0Q1Q2_from_Q1Q2Q3(i8* %addr) #0 {
+; CHECK-LABEL: test_Q0Q1Q2_from_Q1Q2Q3:
+; CHECK: mov.16b v0, v1
+; CHECK: mov.16b v1, v2
+; CHECK: mov.16b v2, v3
+entry:
+  %addr_v16i8 = bitcast i8* %addr to <16 x i8>*
+  %vec = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0v16i8(<16 x i8>* %addr_v16i8)
+  %vec0 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vec, 0
+  %vec1 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vec, 1
+  %vec2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vec, 2
+  tail call void asm sideeffect "", "~{v0},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+  tail call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> %vec0, <16 x i8> %vec1, <16 x i8> %vec2, i8* %addr)
+
+  tail call void asm sideeffect "", "~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+  tail call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> %vec0, <16 x i8> %vec1, <16 x i8> %vec2, i8* %addr)
+  ret void
+}
+
+define void @test_Q1Q2Q3Q4_from_Q30Q31Q0Q1(i8* %addr) #0 {
+; CHECK-LABEL: test_Q1Q2Q3Q4_from_Q30Q31Q0Q1:
+; CHECK: mov.16b v4, v1
+; CHECK: mov.16b v3, v0
+; CHECK: mov.16b v2, v31
+; CHECK: mov.16b v1, v30
+  %addr_v16i8 = bitcast i8* %addr to <16 x i8>*
+  %vec = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0v16i8(<16 x i8>* %addr_v16i8)
+  %vec0 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vec, 0
+  %vec1 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vec, 1
+  %vec2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vec, 2
+  %vec3 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vec, 3
+
+  tail call void asm sideeffect "", "~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29}"()
+  tail call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> %vec0, <16 x i8> %vec1, <16 x i8> %vec2, <16 x i8> %vec3, i8* %addr)
+
+  tail call void asm sideeffect "", "~{v0},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+  tail call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> %vec0, <16 x i8> %vec1, <16 x i8> %vec2, <16 x i8> %vec3, i8* %addr)
+  ret void
+}
+
+declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>*)
+declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0v8i8(<8 x i8>*)
+declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0v16i8(<16 x i8>*)
+declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0v16i8(<16 x i8>*)
+
+declare void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*)
+declare void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*)
+declare void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*)
+declare void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*)
diff --git a/test/CodeGen/AArch64/arm64-crc32.ll b/test/CodeGen/AArch64/arm64-crc32.ll
new file mode 100644
index 0000000..d3099e6
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-crc32.ll
@@ -0,0 +1,71 @@
+; RUN: llc -march=arm64 -mattr=+crc -o - %s | FileCheck %s
+
+define i32 @test_crc32b(i32 %cur, i8 %next) {
+; CHECK-LABEL: test_crc32b:
+; CHECK: crc32b w0, w0, w1
+  %bits = zext i8 %next to i32
+  %val = call i32 @llvm.aarch64.crc32b(i32 %cur, i32 %bits)
+  ret i32 %val
+}
+
+define i32 @test_crc32h(i32 %cur, i16 %next) {
+; CHECK-LABEL: test_crc32h:
+; CHECK: crc32h w0, w0, w1
+  %bits = zext i16 %next to i32
+  %val = call i32 @llvm.aarch64.crc32h(i32 %cur, i32 %bits)
+  ret i32 %val
+}
+
+define i32 @test_crc32w(i32 %cur, i32 %next) {
+; CHECK-LABEL: test_crc32w:
+; CHECK: crc32w w0, w0, w1
+  %val = call i32 @llvm.aarch64.crc32w(i32 %cur, i32 %next)
+  ret i32 %val
+}
+
+define i32 @test_crc32x(i32 %cur, i64 %next) {
+; CHECK-LABEL: test_crc32x:
+; CHECK: crc32x w0, w0, x1
+  %val = call i32 @llvm.aarch64.crc32x(i32 %cur, i64 %next)
+  ret i32 %val
+}
+
+define i32 @test_crc32cb(i32 %cur, i8 %next) {
+; CHECK-LABEL: test_crc32cb:
+; CHECK: crc32cb w0, w0, w1
+  %bits = zext i8 %next to i32
+  %val = call i32 @llvm.aarch64.crc32cb(i32 %cur, i32 %bits)
+  ret i32 %val
+}
+
+define i32 @test_crc32ch(i32 %cur, i16 %next) {
+; CHECK-LABEL: test_crc32ch:
+; CHECK: crc32ch w0, w0, w1
+  %bits = zext i16 %next to i32
+  %val = call i32 @llvm.aarch64.crc32ch(i32 %cur, i32 %bits)
+  ret i32 %val
+}
+
+define i32 @test_crc32cw(i32 %cur, i32 %next) {
+; CHECK-LABEL: test_crc32cw:
+; CHECK: crc32cw w0, w0, w1
+  %val = call i32 @llvm.aarch64.crc32cw(i32 %cur, i32 %next)
+  ret i32 %val
+}
+
+define i32 @test_crc32cx(i32 %cur, i64 %next) {
+; CHECK-LABEL: test_crc32cx:
+; CHECK: crc32cx w0, w0, x1
+  %val = call i32 @llvm.aarch64.crc32cx(i32 %cur, i64 %next)
+  ret i32 %val
+}
+
+declare i32 @llvm.aarch64.crc32b(i32, i32)
+declare i32 @llvm.aarch64.crc32h(i32, i32)
+declare i32 @llvm.aarch64.crc32w(i32, i32)
+declare i32 @llvm.aarch64.crc32x(i32, i64)
+
+declare i32 @llvm.aarch64.crc32cb(i32, i32)
+declare i32 @llvm.aarch64.crc32ch(i32, i32)
+declare i32 @llvm.aarch64.crc32cw(i32, i32)
+declare i32 @llvm.aarch64.crc32cx(i32, i64)
diff --git a/test/CodeGen/AArch64/arm64-crypto.ll b/test/CodeGen/AArch64/arm64-crypto.ll
new file mode 100644
index 0000000..2908b33
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-crypto.ll
@@ -0,0 +1,135 @@
+; RUN: llc -march=arm64 -mattr=crypto -aarch64-neon-syntax=apple -o - %s | FileCheck %s
+
+declare <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %data, <16 x i8> %key)
+declare <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %data, <16 x i8> %key)
+declare <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %data)
+declare <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %data)
+
+define <16 x i8> @test_aese(<16 x i8> %data, <16 x i8> %key) {
+; CHECK-LABEL: test_aese:
+; CHECK: aese.16b v0, v1
+  %res = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %data, <16 x i8> %key)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_aesd(<16 x i8> %data, <16 x i8> %key) {
+; CHECK-LABEL: test_aesd:
+; CHECK: aesd.16b v0, v1
+  %res = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %data, <16 x i8> %key)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_aesmc(<16 x i8> %data) {
+; CHECK-LABEL: test_aesmc:
+; CHECK: aesmc.16b v0, v0
+ %res = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %data)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_aesimc(<16 x i8> %data) {
+; CHECK-LABEL: test_aesimc:
+; CHECK: aesimc.16b v0, v0
+ %res = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %data)
+  ret <16 x i8> %res
+}
+
+declare <4 x i32> @llvm.aarch64.crypto.sha1c(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk)
+declare <4 x i32> @llvm.aarch64.crypto.sha1p(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk)
+declare <4 x i32> @llvm.aarch64.crypto.sha1m(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk)
+declare i32 @llvm.aarch64.crypto.sha1h(i32 %hash_e)
+declare <4 x i32> @llvm.aarch64.crypto.sha1su0(<4 x i32> %wk0_3, <4 x i32> %wk4_7, <4 x i32> %wk8_11)
+declare <4 x i32> @llvm.aarch64.crypto.sha1su1(<4 x i32> %wk0_3, <4 x i32> %wk12_15)
+
+define <4 x i32> @test_sha1c(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) {
+; CHECK-LABEL: test_sha1c:
+; CHECK: fmov [[HASH_E:s[0-9]+]], w0
+; CHECK: sha1c.4s q0, [[HASH_E]], v1
+  %res = call <4 x i32> @llvm.aarch64.crypto.sha1c(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk)
+  ret <4 x i32> %res
+}
+
+; <rdar://problem/14742333> Incomplete removal of unnecessary FMOV instructions in intrinsic SHA1
+define <4 x i32> @test_sha1c_in_a_row(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) {
+; CHECK-LABEL: test_sha1c_in_a_row:
+; CHECK: fmov [[HASH_E:s[0-9]+]], w0
+; CHECK: sha1c.4s q[[SHA1RES:[0-9]+]], [[HASH_E]], v1
+; CHECK-NOT: fmov
+; CHECK: sha1c.4s q0, s[[SHA1RES]], v1
+  %res = call <4 x i32> @llvm.aarch64.crypto.sha1c(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk)
+  %extract = extractelement <4 x i32> %res, i32 0
+  %res2 = call <4 x i32> @llvm.aarch64.crypto.sha1c(<4 x i32> %hash_abcd, i32 %extract, <4 x i32> %wk)
+  ret <4 x i32> %res2
+}
+
+define <4 x i32> @test_sha1p(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) {
+; CHECK-LABEL: test_sha1p:
+; CHECK: fmov [[HASH_E:s[0-9]+]], w0
+; CHECK: sha1p.4s q0, [[HASH_E]], v1
+  %res = call <4 x i32> @llvm.aarch64.crypto.sha1p(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_sha1m(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) {
+; CHECK-LABEL: test_sha1m:
+; CHECK: fmov [[HASH_E:s[0-9]+]], w0
+; CHECK: sha1m.4s q0, [[HASH_E]], v1
+  %res = call <4 x i32> @llvm.aarch64.crypto.sha1m(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk)
+  ret <4 x i32> %res
+}
+
+define i32 @test_sha1h(i32 %hash_e) {
+; CHECK-LABEL: test_sha1h:
+; CHECK: fmov [[HASH_E:s[0-9]+]], w0
+; CHECK: sha1h [[RES:s[0-9]+]], [[HASH_E]]
+; CHECK: fmov w0, [[RES]]
+  %res = call i32 @llvm.aarch64.crypto.sha1h(i32 %hash_e)
+  ret i32 %res
+}
+
+define <4 x i32> @test_sha1su0(<4 x i32> %wk0_3, <4 x i32> %wk4_7, <4 x i32> %wk8_11) {
+; CHECK-LABEL: test_sha1su0:
+; CHECK: sha1su0.4s v0, v1, v2
+  %res = call <4 x i32> @llvm.aarch64.crypto.sha1su0(<4 x i32> %wk0_3, <4 x i32> %wk4_7, <4 x i32> %wk8_11)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_sha1su1(<4 x i32> %wk0_3, <4 x i32> %wk12_15) {
+; CHECK-LABEL: test_sha1su1:
+; CHECK: sha1su1.4s v0, v1
+  %res = call <4 x i32> @llvm.aarch64.crypto.sha1su1(<4 x i32> %wk0_3, <4 x i32> %wk12_15)
+  ret <4 x i32> %res
+}
+
+declare <4 x i32> @llvm.aarch64.crypto.sha256h(<4 x i32> %hash_abcd, <4 x i32> %hash_efgh, <4 x i32> %wk)
+declare <4 x i32> @llvm.aarch64.crypto.sha256h2(<4 x i32> %hash_efgh, <4 x i32> %hash_abcd, <4 x i32> %wk)
+declare <4 x i32> @llvm.aarch64.crypto.sha256su0(<4 x i32> %w0_3, <4 x i32> %w4_7)
+declare <4 x i32> @llvm.aarch64.crypto.sha256su1(<4 x i32> %w0_3, <4 x i32> %w8_11, <4 x i32> %w12_15)
+
+define <4 x i32> @test_sha256h(<4 x i32> %hash_abcd, <4 x i32> %hash_efgh, <4 x i32> %wk) {
+; CHECK-LABEL: test_sha256h:
+; CHECK: sha256h.4s q0, q1, v2
+  %res = call <4 x i32> @llvm.aarch64.crypto.sha256h(<4 x i32> %hash_abcd, <4 x i32> %hash_efgh, <4 x i32> %wk)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_sha256h2(<4 x i32> %hash_efgh, <4 x i32> %hash_abcd, <4 x i32> %wk) {
+; CHECK-LABEL: test_sha256h2:
+; CHECK: sha256h2.4s q0, q1, v2
+
+  %res = call <4 x i32> @llvm.aarch64.crypto.sha256h2(<4 x i32> %hash_efgh, <4 x i32> %hash_abcd, <4 x i32> %wk)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_sha256su0(<4 x i32> %w0_3, <4 x i32> %w4_7) {
+; CHECK-LABEL: test_sha256su0:
+; CHECK: sha256su0.4s v0, v1
+  %res = call <4 x i32> @llvm.aarch64.crypto.sha256su0(<4 x i32> %w0_3, <4 x i32> %w4_7)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_sha256su1(<4 x i32> %w0_3, <4 x i32> %w8_11, <4 x i32> %w12_15) {
+; CHECK-LABEL: test_sha256su1:
+; CHECK: sha256su1.4s v0, v1, v2
+  %res = call <4 x i32> @llvm.aarch64.crypto.sha256su1(<4 x i32> %w0_3, <4 x i32> %w8_11, <4 x i32> %w12_15)
+  ret <4 x i32> %res
+}
diff --git a/test/CodeGen/AArch64/arm64-cse.ll b/test/CodeGen/AArch64/arm64-cse.ll
new file mode 100644
index 0000000..bb14c89
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-cse.ll
@@ -0,0 +1,59 @@
+; RUN: llc -O3 < %s | FileCheck %s
+target triple = "arm64-apple-ios"
+
+; rdar://12462006
+; CSE between "icmp reg reg" and "sub reg reg".
+; Both can be in the same basic block or in different basic blocks.
+define i8* @t1(i8* %base, i32* nocapture %offset, i32 %size) nounwind {
+entry:
+; CHECK-LABEL: t1:
+; CHECK: subs
+; CHECK-NOT: cmp
+; CHECK-NOT: sub
+; CHECK: b.ge
+; CHECK: sub
+; CHECK: sub
+; CHECK-NOT: sub
+; CHECK: ret
+ %0 = load i32* %offset, align 4
+ %cmp = icmp slt i32 %0, %size
+ %s = sub nsw i32 %0, %size
+ br i1 %cmp, label %return, label %if.end
+
+if.end:
+ %sub = sub nsw i32 %0, %size
+ %s2 = sub nsw i32 %s, %size
+ %s3 = sub nsw i32 %sub, %s2
+ store i32 %s3, i32* %offset, align 4
+ %add.ptr = getelementptr inbounds i8* %base, i32 %sub
+ br label %return
+
+return:
+ %retval.0 = phi i8* [ %add.ptr, %if.end ], [ null, %entry ]
+ ret i8* %retval.0
+}
+
+; CSE between "icmp reg imm" and "sub reg imm".
+define i8* @t2(i8* %base, i32* nocapture %offset) nounwind {
+entry:
+; CHECK-LABEL: t2:
+; CHECK: subs
+; CHECK-NOT: cmp
+; CHECK-NOT: sub
+; CHECK: b.lt
+; CHECK-NOT: sub
+; CHECK: ret
+ %0 = load i32* %offset, align 4
+ %cmp = icmp slt i32 %0, 1
+ br i1 %cmp, label %return, label %if.end
+
+if.end:
+ %sub = sub nsw i32 %0, 1
+ store i32 %sub, i32* %offset, align 4
+ %add.ptr = getelementptr inbounds i8* %base, i32 %sub
+ br label %return
+
+return:
+ %retval.0 = phi i8* [ %add.ptr, %if.end ], [ null, %entry ]
+ ret i8* %retval.0
+}
diff --git a/test/CodeGen/AArch64/arm64-csel.ll b/test/CodeGen/AArch64/arm64-csel.ll
new file mode 100644
index 0000000..98eba30
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-csel.ll
@@ -0,0 +1,230 @@
+; RUN: llc -O3 < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64"
+target triple = "arm64-unknown-unknown"
+
+; CHECK-LABEL: foo1
+; CHECK: cinc w{{[0-9]+}}, w{{[0-9]+}}, ne
+define i32 @foo1(i32 %b, i32 %c) nounwind readnone ssp {
+entry:
+  %not.tobool = icmp ne i32 %c, 0
+  %add = zext i1 %not.tobool to i32
+  %b.add = add i32 %c, %b
+  %add1 = add i32 %b.add, %add
+  ret i32 %add1
+}
+
+; CHECK-LABEL: foo2
+; CHECK: cneg w{{[0-9]+}}, w{{[0-9]+}}, ne
+define i32 @foo2(i32 %b, i32 %c) nounwind readnone ssp {
+entry:
+  %mul = sub i32 0, %b
+  %tobool = icmp eq i32 %c, 0
+  %b.mul = select i1 %tobool, i32 %b, i32 %mul
+  %add = add nsw i32 %b.mul, %c
+  ret i32 %add
+}
+
+; CHECK-LABEL: foo3
+; CHECK: cinv w{{[0-9]+}}, w{{[0-9]+}}, ne
+define i32 @foo3(i32 %b, i32 %c) nounwind readnone ssp {
+entry:
+  %not.tobool = icmp ne i32 %c, 0
+  %xor = sext i1 %not.tobool to i32
+  %b.xor = xor i32 %xor, %b
+  %add = add nsw i32 %b.xor, %c
+  ret i32 %add
+}
+
+; rdar://11632325
+define i32@foo4(i32 %a) nounwind ssp {
+; CHECK-LABEL: foo4
+; CHECK: cneg
+; CHECK-NEXT: ret
+  %cmp = icmp sgt i32 %a, -1
+  %neg = sub nsw i32 0, %a
+  %cond = select i1 %cmp, i32 %a, i32 %neg
+  ret i32 %cond
+}
+
+define i32@foo5(i32 %a, i32 %b) nounwind ssp {
+entry:
+; CHECK-LABEL: foo5
+; CHECK: subs
+; CHECK-NEXT: cneg
+; CHECK-NEXT: ret
+  %sub = sub nsw i32 %a, %b
+  %cmp = icmp sgt i32 %sub, -1
+  %sub3 = sub nsw i32 0, %sub
+  %cond = select i1 %cmp, i32 %sub, i32 %sub3
+  ret i32 %cond
+}
+
+; make sure we can handle branch instruction in optimizeCompare.
+define i32@foo6(i32 %a, i32 %b) nounwind ssp {
+; CHECK-LABEL: foo6
+; CHECK: b
+  %sub = sub nsw i32 %a, %b
+  %cmp = icmp sgt i32 %sub, 0
+  br i1 %cmp, label %l.if, label %l.else
+
+l.if:
+  ret i32 1
+
+l.else:
+  ret i32 %sub
+}
+
+; If CPSR is used multiple times and V flag is used, we don't remove cmp.
+define i32 @foo7(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK-LABEL: foo7:
+; CHECK: sub
+; CHECK-next: adds
+; CHECK-next: csneg
+; CHECK-next: b
+  %sub = sub nsw i32 %a, %b
+  %cmp = icmp sgt i32 %sub, -1
+  %sub3 = sub nsw i32 0, %sub
+  %cond = select i1 %cmp, i32 %sub, i32 %sub3
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  %cmp2 = icmp slt i32 %sub, -1
+  %sel = select i1 %cmp2, i32 %cond, i32 %a
+  ret i32 %sel
+
+if.else:
+  ret i32 %cond
+}
+
+define i32 @foo8(i32 %v, i32 %a, i32 %b) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: foo8:
+; CHECK: cmp w0, #0
+; CHECK: csinv w0, w1, w2, ne
+  %tobool = icmp eq i32 %v, 0
+  %neg = xor i32 -1, %b
+  %cond = select i1 %tobool, i32 %neg, i32 %a
+  ret i32 %cond
+}
+
+define i32 @foo9(i32 %v) nounwind readnone optsize ssp {
+entry:
+; CHECK-LABEL: foo9:
+; CHECK: cmp w0, #0
+; CHECK: orr w[[REG:[0-9]+]], wzr, #0x4
+; CHECK: cinv w0, w[[REG]], eq
+  %tobool = icmp ne i32 %v, 0
+  %cond = select i1 %tobool, i32 4, i32 -5
+  ret i32 %cond
+}
+
+define i64 @foo10(i64 %v) nounwind readnone optsize ssp {
+entry:
+; CHECK-LABEL: foo10:
+; CHECK: cmp x0, #0
+; CHECK: orr w[[REG:[0-9]+]], wzr, #0x4
+; CHECK: cinv x0, x[[REG]], eq
+  %tobool = icmp ne i64 %v, 0
+  %cond = select i1 %tobool, i64 4, i64 -5
+  ret i64 %cond
+}
+
+define i32 @foo11(i32 %v) nounwind readnone optsize ssp {
+entry:
+; CHECK-LABEL: foo11:
+; CHECK: cmp w0, #0
+; CHECK: orr w[[REG:[0-9]+]], wzr, #0x4
+; CHECK: cneg w0, w[[REG]], eq
+  %tobool = icmp ne i32 %v, 0
+  %cond = select i1 %tobool, i32 4, i32 -4
+  ret i32 %cond
+}
+
+define i64 @foo12(i64 %v) nounwind readnone optsize ssp {
+entry:
+; CHECK-LABEL: foo12:
+; CHECK: cmp x0, #0
+; CHECK: orr w[[REG:[0-9]+]], wzr, #0x4
+; CHECK: cneg x0, x[[REG]], eq
+  %tobool = icmp ne i64 %v, 0
+  %cond = select i1 %tobool, i64 4, i64 -4
+  ret i64 %cond
+}
+
+define i32 @foo13(i32 %v, i32 %a, i32 %b) nounwind readnone optsize ssp {
+entry:
+; CHECK-LABEL: foo13:
+; CHECK: cmp w0, #0
+; CHECK: csneg w0, w1, w2, ne
+  %tobool = icmp eq i32 %v, 0
+  %sub = sub i32 0, %b
+  %cond = select i1 %tobool, i32 %sub, i32 %a
+  ret i32 %cond
+}
+
+define i64 @foo14(i64 %v, i64 %a, i64 %b) nounwind readnone optsize ssp {
+entry:
+; CHECK-LABEL: foo14:
+; CHECK: cmp x0, #0
+; CHECK: csneg x0, x1, x2, ne
+  %tobool = icmp eq i64 %v, 0
+  %sub = sub i64 0, %b
+  %cond = select i1 %tobool, i64 %sub, i64 %a
+  ret i64 %cond
+}
+
+define i32 @foo15(i32 %a, i32 %b) nounwind readnone optsize ssp {
+entry:
+; CHECK-LABEL: foo15:
+; CHECK: cmp w0, w1
+; CHECK: orr w[[REG:[0-9]+]], wzr, #0x1
+; CHECK: cinc w0, w[[REG]], gt
+  %cmp = icmp sgt i32 %a, %b
+  %. = select i1 %cmp, i32 2, i32 1
+  ret i32 %.
+}
+
+define i32 @foo16(i32 %a, i32 %b) nounwind readnone optsize ssp {
+entry:
+; CHECK-LABEL: foo16:
+; CHECK: cmp w0, w1
+; CHECK: orr w[[REG:[0-9]+]], wzr, #0x1
+; CHECK: cinc w0, w[[REG]], le
+  %cmp = icmp sgt i32 %a, %b
+  %. = select i1 %cmp, i32 1, i32 2
+  ret i32 %.
+}
+
+define i64 @foo17(i64 %a, i64 %b) nounwind readnone optsize ssp {
+entry:
+; CHECK-LABEL: foo17:
+; CHECK: cmp x0, x1
+; CHECK: orr w[[REG:[0-9]+]], wzr, #0x1
+; CHECK: cinc x0, x[[REG]], gt
+  %cmp = icmp sgt i64 %a, %b
+  %. = select i1 %cmp, i64 2, i64 1
+  ret i64 %.
+}
+
+define i64 @foo18(i64 %a, i64 %b) nounwind readnone optsize ssp {
+entry:
+; CHECK-LABEL: foo18:
+; CHECK: cmp x0, x1
+; CHECK: orr w[[REG:[0-9]+]], wzr, #0x1
+; CHECK: cinc x0, x[[REG]], le
+  %cmp = icmp sgt i64 %a, %b
+  %. = select i1 %cmp, i64 1, i64 2
+  ret i64 %.
+}
+
+define i64 @foo19(i64 %a, i64 %b, i64 %c) {
+entry:
+; CHECK-LABEL: foo19:
+; CHECK: cinc x0, x2
+; CHECK-NOT: add
+  %cmp = icmp ult i64 %a, %b
+  %inc = zext i1 %cmp to i64
+  %inc.c = add i64 %inc, %c
+  ret i64 %inc.c
+}
diff --git a/test/CodeGen/AArch64/arm64-cvt.ll b/test/CodeGen/AArch64/arm64-cvt.ll
new file mode 100644
index 0000000..420a8bc
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-cvt.ll
@@ -0,0 +1,401 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+;
+; Floating-point scalar convert to signed integer (to nearest with ties to away)
+;
+define i32 @fcvtas_1w1s(float %A) nounwind {
+;CHECK-LABEL: fcvtas_1w1s:
+;CHECK: fcvtas w0, s0
+;CHECK-NEXT: ret
+	%tmp3 = call i32 @llvm.aarch64.neon.fcvtas.i32.f32(float %A)
+	ret i32 %tmp3
+}
+
+define i64 @fcvtas_1x1s(float %A) nounwind {
+;CHECK-LABEL: fcvtas_1x1s:
+;CHECK: fcvtas x0, s0
+;CHECK-NEXT: ret
+	%tmp3 = call i64 @llvm.aarch64.neon.fcvtas.i64.f32(float %A)
+	ret i64 %tmp3
+}
+
+define i32 @fcvtas_1w1d(double %A) nounwind {
+;CHECK-LABEL: fcvtas_1w1d:
+;CHECK: fcvtas w0, d0
+;CHECK-NEXT: ret
+	%tmp3 = call i32 @llvm.aarch64.neon.fcvtas.i32.f64(double %A)
+	ret i32 %tmp3
+}
+
+define i64 @fcvtas_1x1d(double %A) nounwind {
+;CHECK-LABEL: fcvtas_1x1d:
+;CHECK: fcvtas x0, d0
+;CHECK-NEXT: ret
+	%tmp3 = call i64 @llvm.aarch64.neon.fcvtas.i64.f64(double %A)
+	ret i64 %tmp3
+}
+
+declare i32 @llvm.aarch64.neon.fcvtas.i32.f32(float) nounwind readnone
+declare i64 @llvm.aarch64.neon.fcvtas.i64.f32(float) nounwind readnone
+declare i32 @llvm.aarch64.neon.fcvtas.i32.f64(double) nounwind readnone
+declare i64 @llvm.aarch64.neon.fcvtas.i64.f64(double) nounwind readnone
+
+;
+; Floating-point scalar convert to unsigned integer
+;
+define i32 @fcvtau_1w1s(float %A) nounwind {
+;CHECK-LABEL: fcvtau_1w1s:
+;CHECK: fcvtau w0, s0
+;CHECK-NEXT: ret
+	%tmp3 = call i32 @llvm.aarch64.neon.fcvtau.i32.f32(float %A)
+	ret i32 %tmp3
+}
+
+define i64 @fcvtau_1x1s(float %A) nounwind {
+;CHECK-LABEL: fcvtau_1x1s:
+;CHECK: fcvtau x0, s0
+;CHECK-NEXT: ret
+	%tmp3 = call i64 @llvm.aarch64.neon.fcvtau.i64.f32(float %A)
+	ret i64 %tmp3
+}
+
+define i32 @fcvtau_1w1d(double %A) nounwind {
+;CHECK-LABEL: fcvtau_1w1d:
+;CHECK: fcvtau w0, d0
+;CHECK-NEXT: ret
+	%tmp3 = call i32 @llvm.aarch64.neon.fcvtau.i32.f64(double %A)
+	ret i32 %tmp3
+}
+
+define i64 @fcvtau_1x1d(double %A) nounwind {
+;CHECK-LABEL: fcvtau_1x1d:
+;CHECK: fcvtau x0, d0
+;CHECK-NEXT: ret
+	%tmp3 = call i64 @llvm.aarch64.neon.fcvtau.i64.f64(double %A)
+	ret i64 %tmp3
+}
+
+declare i32 @llvm.aarch64.neon.fcvtau.i32.f32(float) nounwind readnone
+declare i64 @llvm.aarch64.neon.fcvtau.i64.f32(float) nounwind readnone
+declare i32 @llvm.aarch64.neon.fcvtau.i32.f64(double) nounwind readnone
+declare i64 @llvm.aarch64.neon.fcvtau.i64.f64(double) nounwind readnone
+
+;
+; Floating-point scalar convert to signed integer (toward -Inf)
+;
+define i32 @fcvtms_1w1s(float %A) nounwind {
+;CHECK-LABEL: fcvtms_1w1s:
+;CHECK: fcvtms w0, s0
+;CHECK-NEXT: ret
+	%tmp3 = call i32 @llvm.aarch64.neon.fcvtms.i32.f32(float %A)
+	ret i32 %tmp3
+}
+
+define i64 @fcvtms_1x1s(float %A) nounwind {
+;CHECK-LABEL: fcvtms_1x1s:
+;CHECK: fcvtms x0, s0
+;CHECK-NEXT: ret
+	%tmp3 = call i64 @llvm.aarch64.neon.fcvtms.i64.f32(float %A)
+	ret i64 %tmp3
+}
+
+define i32 @fcvtms_1w1d(double %A) nounwind {
+;CHECK-LABEL: fcvtms_1w1d:
+;CHECK: fcvtms w0, d0
+;CHECK-NEXT: ret
+	%tmp3 = call i32 @llvm.aarch64.neon.fcvtms.i32.f64(double %A)
+	ret i32 %tmp3
+}
+
+define i64 @fcvtms_1x1d(double %A) nounwind {
+;CHECK-LABEL: fcvtms_1x1d:
+;CHECK: fcvtms x0, d0
+;CHECK-NEXT: ret
+	%tmp3 = call i64 @llvm.aarch64.neon.fcvtms.i64.f64(double %A)
+	ret i64 %tmp3
+}
+
+declare i32 @llvm.aarch64.neon.fcvtms.i32.f32(float) nounwind readnone
+declare i64 @llvm.aarch64.neon.fcvtms.i64.f32(float) nounwind readnone
+declare i32 @llvm.aarch64.neon.fcvtms.i32.f64(double) nounwind readnone
+declare i64 @llvm.aarch64.neon.fcvtms.i64.f64(double) nounwind readnone
+
+;
+; Floating-point scalar convert to unsigned integer (toward -Inf)
+;
+define i32 @fcvtmu_1w1s(float %A) nounwind {
+;CHECK-LABEL: fcvtmu_1w1s:
+;CHECK: fcvtmu w0, s0
+;CHECK-NEXT: ret
+	%tmp3 = call i32 @llvm.aarch64.neon.fcvtmu.i32.f32(float %A)
+	ret i32 %tmp3
+}
+
+define i64 @fcvtmu_1x1s(float %A) nounwind {
+;CHECK-LABEL: fcvtmu_1x1s:
+;CHECK: fcvtmu x0, s0
+;CHECK-NEXT: ret
+	%tmp3 = call i64 @llvm.aarch64.neon.fcvtmu.i64.f32(float %A)
+	ret i64 %tmp3
+}
+
+define i32 @fcvtmu_1w1d(double %A) nounwind {
+;CHECK-LABEL: fcvtmu_1w1d:
+;CHECK: fcvtmu w0, d0
+;CHECK-NEXT: ret
+	%tmp3 = call i32 @llvm.aarch64.neon.fcvtmu.i32.f64(double %A)
+	ret i32 %tmp3
+}
+
+define i64 @fcvtmu_1x1d(double %A) nounwind {
+;CHECK-LABEL: fcvtmu_1x1d:
+;CHECK: fcvtmu x0, d0
+;CHECK-NEXT: ret
+	%tmp3 = call i64 @llvm.aarch64.neon.fcvtmu.i64.f64(double %A)
+	ret i64 %tmp3
+}
+
+declare i32 @llvm.aarch64.neon.fcvtmu.i32.f32(float) nounwind readnone
+declare i64 @llvm.aarch64.neon.fcvtmu.i64.f32(float) nounwind readnone
+declare i32 @llvm.aarch64.neon.fcvtmu.i32.f64(double) nounwind readnone
+declare i64 @llvm.aarch64.neon.fcvtmu.i64.f64(double) nounwind readnone
+
+;
+; Floating-point scalar convert to signed integer (to nearest with ties to even)
+;
+define i32 @fcvtns_1w1s(float %A) nounwind {
+;CHECK-LABEL: fcvtns_1w1s:
+;CHECK: fcvtns w0, s0
+;CHECK-NEXT: ret
+	%tmp3 = call i32 @llvm.aarch64.neon.fcvtns.i32.f32(float %A)
+	ret i32 %tmp3
+}
+
+define i64 @fcvtns_1x1s(float %A) nounwind {
+;CHECK-LABEL: fcvtns_1x1s:
+;CHECK: fcvtns x0, s0
+;CHECK-NEXT: ret
+	%tmp3 = call i64 @llvm.aarch64.neon.fcvtns.i64.f32(float %A)
+	ret i64 %tmp3
+}
+
+define i32 @fcvtns_1w1d(double %A) nounwind {
+;CHECK-LABEL: fcvtns_1w1d:
+;CHECK: fcvtns w0, d0
+;CHECK-NEXT: ret
+	%tmp3 = call i32 @llvm.aarch64.neon.fcvtns.i32.f64(double %A)
+	ret i32 %tmp3
+}
+
+define i64 @fcvtns_1x1d(double %A) nounwind {
+;CHECK-LABEL: fcvtns_1x1d:
+;CHECK: fcvtns x0, d0
+;CHECK-NEXT: ret
+	%tmp3 = call i64 @llvm.aarch64.neon.fcvtns.i64.f64(double %A)
+	ret i64 %tmp3
+}
+
+declare i32 @llvm.aarch64.neon.fcvtns.i32.f32(float) nounwind readnone
+declare i64 @llvm.aarch64.neon.fcvtns.i64.f32(float) nounwind readnone
+declare i32 @llvm.aarch64.neon.fcvtns.i32.f64(double) nounwind readnone
+declare i64 @llvm.aarch64.neon.fcvtns.i64.f64(double) nounwind readnone
+
+;
+; Floating-point scalar convert to unsigned integer (to nearest with ties to even)
+;
+define i32 @fcvtnu_1w1s(float %A) nounwind {
+;CHECK-LABEL: fcvtnu_1w1s:
+;CHECK: fcvtnu w0, s0
+;CHECK-NEXT: ret
+	%tmp3 = call i32 @llvm.aarch64.neon.fcvtnu.i32.f32(float %A)
+	ret i32 %tmp3
+}
+
+define i64 @fcvtnu_1x1s(float %A) nounwind {
+;CHECK-LABEL: fcvtnu_1x1s:
+;CHECK: fcvtnu x0, s0
+;CHECK-NEXT: ret
+	%tmp3 = call i64 @llvm.aarch64.neon.fcvtnu.i64.f32(float %A)
+	ret i64 %tmp3
+}
+
+define i32 @fcvtnu_1w1d(double %A) nounwind {
+;CHECK-LABEL: fcvtnu_1w1d:
+;CHECK: fcvtnu w0, d0
+;CHECK-NEXT: ret
+	%tmp3 = call i32 @llvm.aarch64.neon.fcvtnu.i32.f64(double %A)
+	ret i32 %tmp3
+}
+
+define i64 @fcvtnu_1x1d(double %A) nounwind {
+;CHECK-LABEL: fcvtnu_1x1d:
+;CHECK: fcvtnu x0, d0
+;CHECK-NEXT: ret
+	%tmp3 = call i64 @llvm.aarch64.neon.fcvtnu.i64.f64(double %A)
+	ret i64 %tmp3
+}
+
+declare i32 @llvm.aarch64.neon.fcvtnu.i32.f32(float) nounwind readnone
+declare i64 @llvm.aarch64.neon.fcvtnu.i64.f32(float) nounwind readnone
+declare i32 @llvm.aarch64.neon.fcvtnu.i32.f64(double) nounwind readnone
+declare i64 @llvm.aarch64.neon.fcvtnu.i64.f64(double) nounwind readnone
+
+;
+; Floating-point scalar convert to signed integer (toward +Inf)
+;
+define i32 @fcvtps_1w1s(float %A) nounwind {
+;CHECK-LABEL: fcvtps_1w1s:
+;CHECK: fcvtps w0, s0
+;CHECK-NEXT: ret
+	%tmp3 = call i32 @llvm.aarch64.neon.fcvtps.i32.f32(float %A)
+	ret i32 %tmp3
+}
+
+define i64 @fcvtps_1x1s(float %A) nounwind {
+;CHECK-LABEL: fcvtps_1x1s:
+;CHECK: fcvtps x0, s0
+;CHECK-NEXT: ret
+	%tmp3 = call i64 @llvm.aarch64.neon.fcvtps.i64.f32(float %A)
+	ret i64 %tmp3
+}
+
+define i32 @fcvtps_1w1d(double %A) nounwind {
+;CHECK-LABEL: fcvtps_1w1d:
+;CHECK: fcvtps w0, d0
+;CHECK-NEXT: ret
+	%tmp3 = call i32 @llvm.aarch64.neon.fcvtps.i32.f64(double %A)
+	ret i32 %tmp3
+}
+
+define i64 @fcvtps_1x1d(double %A) nounwind {
+;CHECK-LABEL: fcvtps_1x1d:
+;CHECK: fcvtps x0, d0
+;CHECK-NEXT: ret
+	%tmp3 = call i64 @llvm.aarch64.neon.fcvtps.i64.f64(double %A)
+	ret i64 %tmp3
+}
+
+declare i32 @llvm.aarch64.neon.fcvtps.i32.f32(float) nounwind readnone
+declare i64 @llvm.aarch64.neon.fcvtps.i64.f32(float) nounwind readnone
+declare i32 @llvm.aarch64.neon.fcvtps.i32.f64(double) nounwind readnone
+declare i64 @llvm.aarch64.neon.fcvtps.i64.f64(double) nounwind readnone
+
+;
+; Floating-point scalar convert to unsigned integer (toward +Inf)
+;
+define i32 @fcvtpu_1w1s(float %A) nounwind {
+;CHECK-LABEL: fcvtpu_1w1s:
+;CHECK: fcvtpu w0, s0
+;CHECK-NEXT: ret
+	%tmp3 = call i32 @llvm.aarch64.neon.fcvtpu.i32.f32(float %A)
+	ret i32 %tmp3
+}
+
+define i64 @fcvtpu_1x1s(float %A) nounwind {
+;CHECK-LABEL: fcvtpu_1x1s:
+;CHECK: fcvtpu x0, s0
+;CHECK-NEXT: ret
+	%tmp3 = call i64 @llvm.aarch64.neon.fcvtpu.i64.f32(float %A)
+	ret i64 %tmp3
+}
+
+define i32 @fcvtpu_1w1d(double %A) nounwind {
+;CHECK-LABEL: fcvtpu_1w1d:
+;CHECK: fcvtpu w0, d0
+;CHECK-NEXT: ret
+	%tmp3 = call i32 @llvm.aarch64.neon.fcvtpu.i32.f64(double %A)
+	ret i32 %tmp3
+}
+
+define i64 @fcvtpu_1x1d(double %A) nounwind {
+;CHECK-LABEL: fcvtpu_1x1d:
+;CHECK: fcvtpu x0, d0
+;CHECK-NEXT: ret
+	%tmp3 = call i64 @llvm.aarch64.neon.fcvtpu.i64.f64(double %A)
+	ret i64 %tmp3
+}
+
+declare i32 @llvm.aarch64.neon.fcvtpu.i32.f32(float) nounwind readnone
+declare i64 @llvm.aarch64.neon.fcvtpu.i64.f32(float) nounwind readnone
+declare i32 @llvm.aarch64.neon.fcvtpu.i32.f64(double) nounwind readnone
+declare i64 @llvm.aarch64.neon.fcvtpu.i64.f64(double) nounwind readnone
+
+;
+;  Floating-point scalar convert to signed integer (toward zero)
+;
+define i32 @fcvtzs_1w1s(float %A) nounwind {
+;CHECK-LABEL: fcvtzs_1w1s:
+;CHECK: fcvtzs w0, s0
+;CHECK-NEXT: ret
+	%tmp3 = call i32 @llvm.aarch64.neon.fcvtzs.i32.f32(float %A)
+	ret i32 %tmp3
+}
+
+define i64 @fcvtzs_1x1s(float %A) nounwind {
+;CHECK-LABEL: fcvtzs_1x1s:
+;CHECK: fcvtzs x0, s0
+;CHECK-NEXT: ret
+	%tmp3 = call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %A)
+	ret i64 %tmp3
+}
+
+define i32 @fcvtzs_1w1d(double %A) nounwind {
+;CHECK-LABEL: fcvtzs_1w1d:
+;CHECK: fcvtzs w0, d0
+;CHECK-NEXT: ret
+	%tmp3 = call i32 @llvm.aarch64.neon.fcvtzs.i32.f64(double %A)
+	ret i32 %tmp3
+}
+
+define i64 @fcvtzs_1x1d(double %A) nounwind {
+;CHECK-LABEL: fcvtzs_1x1d:
+;CHECK: fcvtzs x0, d0
+;CHECK-NEXT: ret
+	%tmp3 = call i64 @llvm.aarch64.neon.fcvtzs.i64.f64(double %A)
+	ret i64 %tmp3
+}
+
+declare i32 @llvm.aarch64.neon.fcvtzs.i32.f32(float) nounwind readnone
+declare i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float) nounwind readnone
+declare i32 @llvm.aarch64.neon.fcvtzs.i32.f64(double) nounwind readnone
+declare i64 @llvm.aarch64.neon.fcvtzs.i64.f64(double) nounwind readnone
+
+;
+; Floating-point scalar convert to unsigned integer (toward zero)
+;
+define i32 @fcvtzu_1w1s(float %A) nounwind {
+;CHECK-LABEL: fcvtzu_1w1s:
+;CHECK: fcvtzu w0, s0
+;CHECK-NEXT: ret
+	%tmp3 = call i32 @llvm.aarch64.neon.fcvtzu.i32.f32(float %A)
+	ret i32 %tmp3
+}
+
+define i64 @fcvtzu_1x1s(float %A) nounwind {
+;CHECK-LABEL: fcvtzu_1x1s:
+;CHECK: fcvtzu x0, s0
+;CHECK-NEXT: ret
+	%tmp3 = call i64 @llvm.aarch64.neon.fcvtzu.i64.f32(float %A)
+	ret i64 %tmp3
+}
+
+define i32 @fcvtzu_1w1d(double %A) nounwind {
+;CHECK-LABEL: fcvtzu_1w1d:
+;CHECK: fcvtzu w0, d0
+;CHECK-NEXT: ret
+	%tmp3 = call i32 @llvm.aarch64.neon.fcvtzu.i32.f64(double %A)
+	ret i32 %tmp3
+}
+
+define i64 @fcvtzu_1x1d(double %A) nounwind {
+;CHECK-LABEL: fcvtzu_1x1d:
+;CHECK: fcvtzu x0, d0
+;CHECK-NEXT: ret
+	%tmp3 = call i64 @llvm.aarch64.neon.fcvtzu.i64.f64(double %A)
+	ret i64 %tmp3
+}
+
+declare i32 @llvm.aarch64.neon.fcvtzu.i32.f32(float) nounwind readnone
+declare i64 @llvm.aarch64.neon.fcvtzu.i64.f32(float) nounwind readnone
+declare i32 @llvm.aarch64.neon.fcvtzu.i32.f64(double) nounwind readnone
+declare i64 @llvm.aarch64.neon.fcvtzu.i64.f64(double) nounwind readnone
diff --git a/test/CodeGen/ARM64/dagcombiner-convergence.ll b/test/CodeGen/AArch64/arm64-dagcombiner-convergence.ll
index a45e313..a45e313 100644
--- a/test/CodeGen/ARM64/dagcombiner-convergence.ll
+++ b/test/CodeGen/AArch64/arm64-dagcombiner-convergence.ll
diff --git a/test/CodeGen/AArch64/arm64-dagcombiner-dead-indexed-load.ll b/test/CodeGen/AArch64/arm64-dagcombiner-dead-indexed-load.ll
new file mode 100644
index 0000000..2cf0135
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-dagcombiner-dead-indexed-load.ll
@@ -0,0 +1,29 @@
+; RUN: llc -mcpu=cyclone < %s | FileCheck %s
+
+target datalayout = "e-i64:64-n32:64-S128"
+target triple = "arm64-apple-ios"
+
+%"struct.SU" = type { i32, %"struct.SU"*, i32*, i32, i32, %"struct.BO", i32, [5 x i8] }
+%"struct.BO" = type { %"struct.RE" }
+
+%"struct.RE" = type { i32, i32, i32, i32 }
+
+; This is a read-modify-write of some bifields combined into an i48.  It gets
+; legalized into i32 and i16 accesses.  Only a single store of zero to the low
+; i32 part should be live.
+
+; CHECK-LABEL: test:
+; CHECK-NOT: ldr
+; CHECK: str wzr
+; CHECK-NOT: str
+define void @test(%"struct.SU"* nocapture %su) {
+entry:
+  %r1 = getelementptr inbounds %"struct.SU"* %su, i64 1, i32 5
+  %r2 = bitcast %"struct.BO"* %r1 to i48*
+  %r3 = load i48* %r2, align 8
+  %r4 = and i48 %r3, -4294967296
+  %r5 = or i48 0, %r4
+  store i48 %r5, i48* %r2, align 8
+
+  ret void
+}
diff --git a/test/CodeGen/AArch64/arm64-dagcombiner-indexed-load.ll b/test/CodeGen/AArch64/arm64-dagcombiner-indexed-load.ll
new file mode 100644
index 0000000..2e4b658
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-dagcombiner-indexed-load.ll
@@ -0,0 +1,46 @@
+; RUN: llc -O3 < %s | FileCheck %s
+; RUN: llc -O3 -addr-sink-using-gep=1 < %s | FileCheck %s
+; Test case for a DAG combiner bug where we combined an indexed load
+; with an extension (sext, zext, or any) into a regular extended load,
+; i.e., dropping the indexed value.
+; <rdar://problem/16389332>
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-ios"
+
+%class.A = type { i64, i64 }
+%class.C = type { i64 }
+
+; CHECK-LABEL: XX:
+; CHECK: ldr
+define void @XX(%class.A* %K) {
+entry:
+  br i1 undef, label %if.then, label %lor.rhs.i
+
+lor.rhs.i:                                        ; preds = %entry
+  %tmp = load i32* undef, align 4
+  %y.i.i.i = getelementptr inbounds %class.A* %K, i64 0, i32 1
+  %tmp1 = load i64* %y.i.i.i, align 8
+  %U.sroa.3.8.extract.trunc.i = trunc i64 %tmp1 to i32
+  %div11.i = sdiv i32 %U.sroa.3.8.extract.trunc.i, 17
+  %add12.i = add nsw i32 0, %div11.i
+  %U.sroa.3.12.extract.shift.i = lshr i64 %tmp1, 32
+  %U.sroa.3.12.extract.trunc.i = trunc i64 %U.sroa.3.12.extract.shift.i to i32
+  %div15.i = sdiv i32 %U.sroa.3.12.extract.trunc.i, 13
+  %add16.i = add nsw i32 %add12.i, %div15.i
+  %rem.i.i = srem i32 %add16.i, %tmp
+  %idxprom = sext i32 %rem.i.i to i64
+  %arrayidx = getelementptr inbounds %class.C** undef, i64 %idxprom
+  %tobool533 = icmp eq %class.C* undef, null
+  br i1 %tobool533, label %while.end, label %while.body
+
+if.then:                                          ; preds = %entry
+  unreachable
+
+while.body:                                       ; preds = %lor.rhs.i
+  unreachable
+
+while.end:                                        ; preds = %lor.rhs.i
+  %tmp3 = load %class.C** %arrayidx, align 8
+  unreachable
+}
diff --git a/test/CodeGen/ARM64/dagcombiner-load-slicing.ll b/test/CodeGen/AArch64/arm64-dagcombiner-load-slicing.ll
index 0679014..0679014 100644
--- a/test/CodeGen/ARM64/dagcombiner-load-slicing.ll
+++ b/test/CodeGen/AArch64/arm64-dagcombiner-load-slicing.ll
diff --git a/test/CodeGen/AArch64/arm64-dead-def-frame-index.ll b/test/CodeGen/AArch64/arm64-dead-def-frame-index.ll
new file mode 100644
index 0000000..9bb4b71
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-dead-def-frame-index.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=arm64 < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-ios7.0.0"
+
+; Function Attrs: nounwind ssp uwtable
+define i32 @test1() #0 {
+  %tmp1 = alloca i8
+  %tmp2 = alloca i32, i32 4096
+  %tmp3 = icmp eq i8* %tmp1, null
+  %tmp4 = zext i1 %tmp3 to i32
+
+  ret i32 %tmp4
+
+  ; CHECK-LABEL: test1
+  ; CHECK:   adds [[TEMP:[a-z0-9]+]], sp, #4, lsl #12
+  ; CHECK:   adds [[TEMP]], [[TEMP]], #15
+}
diff --git a/test/CodeGen/AArch64/arm64-dead-register-def-bug.ll b/test/CodeGen/AArch64/arm64-dead-register-def-bug.ll
new file mode 100644
index 0000000..1bbcf50
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-dead-register-def-bug.ll
@@ -0,0 +1,32 @@
+; RUN: llc -mtriple="arm64-apple-ios" < %s | FileCheck %s
+;
+; Check that the dead register definition pass is considering implicit defs.
+; When rematerializing through truncates, the coalescer may produce instructions
+; with dead defs, but live implicit-defs of subregs:
+; E.g. %X1<def, dead> = MOVi64imm 2, %W1<imp-def>; %X1:GPR64, %W1:GPR32
+; These instructions are live, and their definitions should not be rewritten.
+;
+; <rdar://problem/16492408>
+
+define void @testcase() {
+; CHECK: testcase:
+; CHECK-NOT: orr xzr, xzr, #0x2
+
+bb1:
+  %tmp1 = tail call float @ceilf(float 2.000000e+00)
+  %tmp2 = fptoui float %tmp1 to i64
+  br i1 undef, label %bb2, label %bb3
+
+bb2:
+  tail call void @foo()
+  br label %bb3
+
+bb3:
+  %tmp3 = trunc i64 %tmp2 to i32
+  tail call void @bar(i32 %tmp3)
+  ret void
+}
+
+declare void @foo()
+declare void @bar(i32)
+declare float @ceilf(float) nounwind readnone
diff --git a/test/CodeGen/AArch64/arm64-dup.ll b/test/CodeGen/AArch64/arm64-dup.ll
new file mode 100644
index 0000000..0c56b46
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-dup.ll
@@ -0,0 +1,323 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s
+
+define <8 x i8> @v_dup8(i8 %A) nounwind {
+;CHECK-LABEL: v_dup8:
+;CHECK: dup.8b
+	%tmp1 = insertelement <8 x i8> zeroinitializer, i8 %A, i32 0
+	%tmp2 = insertelement <8 x i8> %tmp1, i8 %A, i32 1
+	%tmp3 = insertelement <8 x i8> %tmp2, i8 %A, i32 2
+	%tmp4 = insertelement <8 x i8> %tmp3, i8 %A, i32 3
+	%tmp5 = insertelement <8 x i8> %tmp4, i8 %A, i32 4
+	%tmp6 = insertelement <8 x i8> %tmp5, i8 %A, i32 5
+	%tmp7 = insertelement <8 x i8> %tmp6, i8 %A, i32 6
+	%tmp8 = insertelement <8 x i8> %tmp7, i8 %A, i32 7
+	ret <8 x i8> %tmp8
+}
+
+define <4 x i16> @v_dup16(i16 %A) nounwind {
+;CHECK-LABEL: v_dup16:
+;CHECK: dup.4h
+	%tmp1 = insertelement <4 x i16> zeroinitializer, i16 %A, i32 0
+	%tmp2 = insertelement <4 x i16> %tmp1, i16 %A, i32 1
+	%tmp3 = insertelement <4 x i16> %tmp2, i16 %A, i32 2
+	%tmp4 = insertelement <4 x i16> %tmp3, i16 %A, i32 3
+	ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @v_dup32(i32 %A) nounwind {
+;CHECK-LABEL: v_dup32:
+;CHECK: dup.2s
+	%tmp1 = insertelement <2 x i32> zeroinitializer, i32 %A, i32 0
+	%tmp2 = insertelement <2 x i32> %tmp1, i32 %A, i32 1
+	ret <2 x i32> %tmp2
+}
+
+define <2 x float> @v_dupfloat(float %A) nounwind {
+;CHECK-LABEL: v_dupfloat:
+;CHECK: dup.2s
+	%tmp1 = insertelement <2 x float> zeroinitializer, float %A, i32 0
+	%tmp2 = insertelement <2 x float> %tmp1, float %A, i32 1
+	ret <2 x float> %tmp2
+}
+
+define <16 x i8> @v_dupQ8(i8 %A) nounwind {
+;CHECK-LABEL: v_dupQ8:
+;CHECK: dup.16b
+	%tmp1 = insertelement <16 x i8> zeroinitializer, i8 %A, i32 0
+	%tmp2 = insertelement <16 x i8> %tmp1, i8 %A, i32 1
+	%tmp3 = insertelement <16 x i8> %tmp2, i8 %A, i32 2
+	%tmp4 = insertelement <16 x i8> %tmp3, i8 %A, i32 3
+	%tmp5 = insertelement <16 x i8> %tmp4, i8 %A, i32 4
+	%tmp6 = insertelement <16 x i8> %tmp5, i8 %A, i32 5
+	%tmp7 = insertelement <16 x i8> %tmp6, i8 %A, i32 6
+	%tmp8 = insertelement <16 x i8> %tmp7, i8 %A, i32 7
+	%tmp9 = insertelement <16 x i8> %tmp8, i8 %A, i32 8
+	%tmp10 = insertelement <16 x i8> %tmp9, i8 %A, i32 9
+	%tmp11 = insertelement <16 x i8> %tmp10, i8 %A, i32 10
+	%tmp12 = insertelement <16 x i8> %tmp11, i8 %A, i32 11
+	%tmp13 = insertelement <16 x i8> %tmp12, i8 %A, i32 12
+	%tmp14 = insertelement <16 x i8> %tmp13, i8 %A, i32 13
+	%tmp15 = insertelement <16 x i8> %tmp14, i8 %A, i32 14
+	%tmp16 = insertelement <16 x i8> %tmp15, i8 %A, i32 15
+	ret <16 x i8> %tmp16
+}
+
+define <8 x i16> @v_dupQ16(i16 %A) nounwind {
+;CHECK-LABEL: v_dupQ16:
+;CHECK: dup.8h
+	%tmp1 = insertelement <8 x i16> zeroinitializer, i16 %A, i32 0
+	%tmp2 = insertelement <8 x i16> %tmp1, i16 %A, i32 1
+	%tmp3 = insertelement <8 x i16> %tmp2, i16 %A, i32 2
+	%tmp4 = insertelement <8 x i16> %tmp3, i16 %A, i32 3
+	%tmp5 = insertelement <8 x i16> %tmp4, i16 %A, i32 4
+	%tmp6 = insertelement <8 x i16> %tmp5, i16 %A, i32 5
+	%tmp7 = insertelement <8 x i16> %tmp6, i16 %A, i32 6
+	%tmp8 = insertelement <8 x i16> %tmp7, i16 %A, i32 7
+	ret <8 x i16> %tmp8
+}
+
+define <4 x i32> @v_dupQ32(i32 %A) nounwind {
+;CHECK-LABEL: v_dupQ32:
+;CHECK: dup.4s
+	%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %A, i32 0
+	%tmp2 = insertelement <4 x i32> %tmp1, i32 %A, i32 1
+	%tmp3 = insertelement <4 x i32> %tmp2, i32 %A, i32 2
+	%tmp4 = insertelement <4 x i32> %tmp3, i32 %A, i32 3
+	ret <4 x i32> %tmp4
+}
+
+define <4 x float> @v_dupQfloat(float %A) nounwind {
+;CHECK-LABEL: v_dupQfloat:
+;CHECK: dup.4s
+	%tmp1 = insertelement <4 x float> zeroinitializer, float %A, i32 0
+	%tmp2 = insertelement <4 x float> %tmp1, float %A, i32 1
+	%tmp3 = insertelement <4 x float> %tmp2, float %A, i32 2
+	%tmp4 = insertelement <4 x float> %tmp3, float %A, i32 3
+	ret <4 x float> %tmp4
+}
+
+; Check to make sure it works with shuffles, too.
+
+define <8 x i8> @v_shuffledup8(i8 %A) nounwind {
+;CHECK-LABEL: v_shuffledup8:
+;CHECK: dup.8b
+	%tmp1 = insertelement <8 x i8> undef, i8 %A, i32 0
+	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @v_shuffledup16(i16 %A) nounwind {
+;CHECK-LABEL: v_shuffledup16:
+;CHECK: dup.4h
+	%tmp1 = insertelement <4 x i16> undef, i16 %A, i32 0
+	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @v_shuffledup32(i32 %A) nounwind {
+;CHECK-LABEL: v_shuffledup32:
+;CHECK: dup.2s
+	%tmp1 = insertelement <2 x i32> undef, i32 %A, i32 0
+	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
+	ret <2 x i32> %tmp2
+}
+
+define <2 x float> @v_shuffledupfloat(float %A) nounwind {
+;CHECK-LABEL: v_shuffledupfloat:
+;CHECK: dup.2s
+	%tmp1 = insertelement <2 x float> undef, float %A, i32 0
+	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
+	ret <2 x float> %tmp2
+}
+
+define <16 x i8> @v_shuffledupQ8(i8 %A) nounwind {
+;CHECK-LABEL: v_shuffledupQ8:
+;CHECK: dup.16b
+	%tmp1 = insertelement <16 x i8> undef, i8 %A, i32 0
+	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> zeroinitializer
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @v_shuffledupQ16(i16 %A) nounwind {
+;CHECK-LABEL: v_shuffledupQ16:
+;CHECK: dup.8h
+	%tmp1 = insertelement <8 x i16> undef, i16 %A, i32 0
+	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> zeroinitializer
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @v_shuffledupQ32(i32 %A) nounwind {
+;CHECK-LABEL: v_shuffledupQ32:
+;CHECK: dup.4s
+	%tmp1 = insertelement <4 x i32> undef, i32 %A, i32 0
+	%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> zeroinitializer
+	ret <4 x i32> %tmp2
+}
+
+define <4 x float> @v_shuffledupQfloat(float %A) nounwind {
+;CHECK-LABEL: v_shuffledupQfloat:
+;CHECK: dup.4s
+	%tmp1 = insertelement <4 x float> undef, float %A, i32 0
+	%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
+	ret <4 x float> %tmp2
+}
+
+define <8 x i8> @vduplane8(<8 x i8>* %A) nounwind {
+;CHECK-LABEL: vduplane8:
+;CHECK: dup.8b
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vduplane16(<4 x i16>* %A) nounwind {
+;CHECK-LABEL: vduplane16:
+;CHECK: dup.4h
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vduplane32(<2 x i32>* %A) nounwind {
+;CHECK-LABEL: vduplane32:
+;CHECK: dup.2s
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> < i32 1, i32 1 >
+	ret <2 x i32> %tmp2
+}
+
+define <2 x float> @vduplanefloat(<2 x float>* %A) nounwind {
+;CHECK-LABEL: vduplanefloat:
+;CHECK: dup.2s
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> < i32 1, i32 1 >
+	ret <2 x float> %tmp2
+}
+
+define <16 x i8> @vduplaneQ8(<8 x i8>* %A) nounwind {
+;CHECK-LABEL: vduplaneQ8:
+;CHECK: dup.16b
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <16 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vduplaneQ16(<4 x i16>* %A) nounwind {
+;CHECK-LABEL: vduplaneQ16:
+;CHECK: dup.8h
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vduplaneQ32(<2 x i32>* %A) nounwind {
+;CHECK-LABEL: vduplaneQ32:
+;CHECK: dup.4s
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
+	ret <4 x i32> %tmp2
+}
+
+define <4 x float> @vduplaneQfloat(<2 x float>* %A) nounwind {
+;CHECK-LABEL: vduplaneQfloat:
+;CHECK: dup.4s
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
+	ret <4 x float> %tmp2
+}
+
+define <2 x i64> @foo(<2 x i64> %arg0_int64x1_t) nounwind readnone {
+;CHECK-LABEL: foo:
+;CHECK: dup.2d
+entry:
+  %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+  ret <2 x i64> %0
+}
+
+define <2 x i64> @bar(<2 x i64> %arg0_int64x1_t) nounwind readnone {
+;CHECK-LABEL: bar:
+;CHECK: dup.2d
+entry:
+  %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  ret <2 x i64> %0
+}
+
+define <2 x double> @baz(<2 x double> %arg0_int64x1_t) nounwind readnone {
+;CHECK-LABEL: baz:
+;CHECK: dup.2d
+entry:
+  %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> <i32 1, i32 1>
+  ret <2 x double> %0
+}
+
+define <2 x double> @qux(<2 x double> %arg0_int64x1_t) nounwind readnone {
+;CHECK-LABEL: qux:
+;CHECK: dup.2d
+entry:
+  %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> <i32 0, i32 0>
+  ret <2 x double> %0
+}
+
+define <2 x i32> @f(i32 %a, i32 %b) nounwind readnone  {
+; CHECK-LABEL: f:
+; CHECK-NEXT: fmov s0, w0
+; CHECK-NEXT: ins.s v0[1], w1
+; CHECK-NEXT: ret
+  %vecinit = insertelement <2 x i32> undef, i32 %a, i32 0
+  %vecinit1 = insertelement <2 x i32> %vecinit, i32 %b, i32 1
+  ret <2 x i32> %vecinit1
+}
+
+define <4 x i32> @g(i32 %a, i32 %b) nounwind readnone  {
+; CHECK-LABEL: g:
+; CHECK-NEXT: fmov s0, w0
+; CHECK-NEXT: ins.s v0[1], w1
+; CHECK-NEXT: ins.s v0[2], w1
+; CHECK-NEXT: ins.s v0[3], w0
+; CHECK-NEXT: ret
+  %vecinit = insertelement <4 x i32> undef, i32 %a, i32 0
+  %vecinit1 = insertelement <4 x i32> %vecinit, i32 %b, i32 1
+  %vecinit2 = insertelement <4 x i32> %vecinit1, i32 %b, i32 2
+  %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %a, i32 3
+  ret <4 x i32> %vecinit3
+}
+
+define <2 x i64> @h(i64 %a, i64 %b) nounwind readnone  {
+; CHECK-LABEL: h:
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ins.d v0[1], x1
+; CHECK-NEXT: ret
+  %vecinit = insertelement <2 x i64> undef, i64 %a, i32 0
+  %vecinit1 = insertelement <2 x i64> %vecinit, i64 %b, i32 1
+  ret <2 x i64> %vecinit1
+}
+
+; We used to spot this as a BUILD_VECTOR implementable by dup, but assume that
+; the single value needed was of the same type as the vector. This is false if
+; the scalar corresponding to the vector type is illegal (e.g. a <4 x i16>
+; BUILD_VECTOR will have an i32 as its source). In that case, the operation is
+; not a simple "dup vD.4h, vN.h[idx]" after all, and we crashed.
+;
+; *However*, it is a dup vD.4h, vN.h[2*idx].
+define <4 x i16> @test_build_illegal(<4 x i32> %in) {
+; CHECK-LABEL: test_build_illegal:
+; CHECK: dup.4h v0, v0[6]
+  %val = extractelement <4 x i32> %in, i32 3
+  %smallval = trunc i32 %val to i16
+  %vec = insertelement <4x i16> undef, i16 %smallval, i32 3
+
+  ret <4 x i16> %vec
+}
+
+; We used to inherit an already extract_subvectored v4i16 from
+; SelectionDAGBuilder here. We then added a DUPLANE on top of that, preventing
+; the formation of an indexed-by-7 MLS.
+define <4 x i16> @test_high_splat(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) #0 {
+; CHECK-LABEL: test_high_splat:
+; CHECK: mls.4h v0, v1, v2[7]
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %mul = mul <4 x i16> %shuffle, %b
+  %sub = sub <4 x i16> %a, %mul
+  ret <4 x i16> %sub
+}
diff --git a/test/CodeGen/AArch64/arm64-early-ifcvt.ll b/test/CodeGen/AArch64/arm64-early-ifcvt.ll
new file mode 100644
index 0000000..17d783a
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-early-ifcvt.ll
@@ -0,0 +1,423 @@
+; RUN: llc < %s -stress-early-ifcvt | FileCheck %s
+target triple = "arm64-apple-macosx"
+
+; CHECK: mm2
+define i32 @mm2(i32* nocapture %p, i32 %n) nounwind uwtable readonly ssp {
+entry:
+  br label %do.body
+
+; CHECK: do.body
+; Loop body has no branches before the backedge.
+; CHECK-NOT: LBB
+do.body:
+  %max.0 = phi i32 [ 0, %entry ], [ %max.1, %do.cond ]
+  %min.0 = phi i32 [ 0, %entry ], [ %min.1, %do.cond ]
+  %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %do.cond ]
+  %p.addr.0 = phi i32* [ %p, %entry ], [ %incdec.ptr, %do.cond ]
+  %incdec.ptr = getelementptr inbounds i32* %p.addr.0, i64 1
+  %0 = load i32* %p.addr.0, align 4
+  %cmp = icmp sgt i32 %0, %max.0
+  br i1 %cmp, label %do.cond, label %if.else
+
+if.else:
+  %cmp1 = icmp slt i32 %0, %min.0
+  %.min.0 = select i1 %cmp1, i32 %0, i32 %min.0
+  br label %do.cond
+
+do.cond:
+  %max.1 = phi i32 [ %0, %do.body ], [ %max.0, %if.else ]
+  %min.1 = phi i32 [ %min.0, %do.body ], [ %.min.0, %if.else ]
+; CHECK: cbnz
+  %dec = add i32 %n.addr.0, -1
+  %tobool = icmp eq i32 %dec, 0
+  br i1 %tobool, label %do.end, label %do.body
+
+do.end:
+  %sub = sub nsw i32 %max.1, %min.1
+  ret i32 %sub
+}
+
+; CHECK-LABEL: fold_inc_true_32:
+; CHECK: {{subs.*wzr,|cmp}} w2, #1
+; CHECK-NEXT: csinc w0, w1, w0, eq
+; CHECK-NEXT: ret
+define i32 @fold_inc_true_32(i32 %x, i32 %y, i32 %c) nounwind ssp {
+entry:
+  %tobool = icmp eq i32 %c, 1
+  %inc = add nsw i32 %x, 1
+  br i1 %tobool, label %eq_bb, label %done
+
+eq_bb:
+  br label %done
+
+done:
+  %cond = phi i32 [ %y, %eq_bb ], [ %inc, %entry ]
+  ret i32 %cond
+}
+
+; CHECK-LABEL: fold_inc_true_64:
+; CHECK: {{subs.*xzr,|cmp}} x2, #1
+; CHECK-NEXT: csinc x0, x1, x0, eq
+; CHECK-NEXT: ret
+define i64 @fold_inc_true_64(i64 %x, i64 %y, i64 %c) nounwind ssp {
+entry:
+  %tobool = icmp eq i64 %c, 1
+  %inc = add nsw i64 %x, 1
+  br i1 %tobool, label %eq_bb, label %done
+
+eq_bb:
+  br label %done
+
+done:
+  %cond = phi i64 [ %y, %eq_bb ], [ %inc, %entry ]
+  ret i64 %cond
+}
+
+; CHECK-LABEL: fold_inc_false_32:
+; CHECK: {{subs.*wzr,|cmp}} w2, #1
+; CHECK-NEXT: csinc w0, w1, w0, ne
+; CHECK-NEXT: ret
+define i32 @fold_inc_false_32(i32 %x, i32 %y, i32 %c) nounwind ssp {
+entry:
+  %tobool = icmp eq i32 %c, 1
+  %inc = add nsw i32 %x, 1
+  br i1 %tobool, label %eq_bb, label %done
+
+eq_bb:
+  br label %done
+
+done:
+  %cond = phi i32 [ %inc, %eq_bb ], [ %y, %entry ]
+  ret i32 %cond
+}
+
+; CHECK-LABEL: fold_inc_false_64:
+; CHECK: {{subs.*xzr,|cmp}} x2, #1
+; CHECK-NEXT: csinc x0, x1, x0, ne
+; CHECK-NEXT: ret
+define i64 @fold_inc_false_64(i64 %x, i64 %y, i64 %c) nounwind ssp {
+entry:
+  %tobool = icmp eq i64 %c, 1
+  %inc = add nsw i64 %x, 1
+  br i1 %tobool, label %eq_bb, label %done
+
+eq_bb:
+  br label %done
+
+done:
+  %cond = phi i64 [ %inc, %eq_bb ], [ %y, %entry ]
+  ret i64 %cond
+}
+
+; CHECK-LABEL: fold_inv_true_32:
+; CHECK: {{subs.*wzr,|cmp}} w2, #1
+; CHECK-NEXT: csinv w0, w1, w0, eq
+; CHECK-NEXT: ret
+define i32 @fold_inv_true_32(i32 %x, i32 %y, i32 %c) nounwind ssp {
+entry:
+  %tobool = icmp eq i32 %c, 1
+  %inv = xor i32 %x, -1
+  br i1 %tobool, label %eq_bb, label %done
+
+eq_bb:
+  br label %done
+
+done:
+  %cond = phi i32 [ %y, %eq_bb ], [ %inv, %entry ]
+  ret i32 %cond
+}
+
+; CHECK-LABEL: fold_inv_true_64:
+; CHECK: {{subs.*xzr,|cmp}} x2, #1
+; CHECK-NEXT: csinv x0, x1, x0, eq
+; CHECK-NEXT: ret
+define i64 @fold_inv_true_64(i64 %x, i64 %y, i64 %c) nounwind ssp {
+entry:
+  %tobool = icmp eq i64 %c, 1
+  %inv = xor i64 %x, -1
+  br i1 %tobool, label %eq_bb, label %done
+
+eq_bb:
+  br label %done
+
+done:
+  %cond = phi i64 [ %y, %eq_bb ], [ %inv, %entry ]
+  ret i64 %cond
+}
+
+; CHECK-LABEL: fold_inv_false_32:
+; CHECK: {{subs.*wzr,|cmp}} w2, #1
+; CHECK-NEXT: csinv w0, w1, w0, ne
+; CHECK-NEXT: ret
+define i32 @fold_inv_false_32(i32 %x, i32 %y, i32 %c) nounwind ssp {
+entry:
+  %tobool = icmp eq i32 %c, 1
+  %inv = xor i32 %x, -1
+  br i1 %tobool, label %eq_bb, label %done
+
+eq_bb:
+  br label %done
+
+done:
+  %cond = phi i32 [ %inv, %eq_bb ], [ %y, %entry ]
+  ret i32 %cond
+}
+
+; CHECK-LABEL: fold_inv_false_64:
+; CHECK: {{subs.*xzr,|cmp}} x2, #1
+; CHECK-NEXT: csinv x0, x1, x0, ne
+; CHECK-NEXT: ret
+define i64 @fold_inv_false_64(i64 %x, i64 %y, i64 %c) nounwind ssp {
+entry:
+  %tobool = icmp eq i64 %c, 1
+  %inv = xor i64 %x, -1
+  br i1 %tobool, label %eq_bb, label %done
+
+eq_bb:
+  br label %done
+
+done:
+  %cond = phi i64 [ %inv, %eq_bb ], [ %y, %entry ]
+  ret i64 %cond
+}
+
+; CHECK-LABEL: fold_neg_true_32:
+; CHECK: {{subs.*wzr,|cmp}} w2, #1
+; CHECK-NEXT: csneg w0, w1, w0, eq
+; CHECK-NEXT: ret
+define i32 @fold_neg_true_32(i32 %x, i32 %y, i32 %c) nounwind ssp {
+entry:
+  %tobool = icmp eq i32 %c, 1
+  %neg = sub nsw i32 0, %x
+  br i1 %tobool, label %eq_bb, label %done
+
+eq_bb:
+  br label %done
+
+done:
+  %cond = phi i32 [ %y, %eq_bb ], [ %neg, %entry ]
+  ret i32 %cond
+}
+
+; CHECK-LABEL: fold_neg_true_64:
+; CHECK: {{subs.*xzr,|cmp}} x2, #1
+; CHECK-NEXT: csneg x0, x1, x0, eq
+; CHECK-NEXT: ret
+define i64 @fold_neg_true_64(i64 %x, i64 %y, i64 %c) nounwind ssp {
+entry:
+  %tobool = icmp eq i64 %c, 1
+  %neg = sub nsw i64 0, %x
+  br i1 %tobool, label %eq_bb, label %done
+
+eq_bb:
+  br label %done
+
+done:
+  %cond = phi i64 [ %y, %eq_bb ], [ %neg, %entry ]
+  ret i64 %cond
+}
+
+; CHECK-LABEL: fold_neg_false_32:
+; CHECK: {{subs.*wzr,|cmp}} w2, #1
+; CHECK-NEXT: csneg w0, w1, w0, ne
+; CHECK-NEXT: ret
+define i32 @fold_neg_false_32(i32 %x, i32 %y, i32 %c) nounwind ssp {
+entry:
+  %tobool = icmp eq i32 %c, 1
+  %neg = sub nsw i32 0, %x
+  br i1 %tobool, label %eq_bb, label %done
+
+eq_bb:
+  br label %done
+
+done:
+  %cond = phi i32 [ %neg, %eq_bb ], [ %y, %entry ]
+  ret i32 %cond
+}
+
+; CHECK-LABEL: fold_neg_false_64:
+; CHECK: {{subs.*xzr,|cmp}} x2, #1
+; CHECK-NEXT: csneg x0, x1, x0, ne
+; CHECK-NEXT: ret
+define i64 @fold_neg_false_64(i64 %x, i64 %y, i64 %c) nounwind ssp {
+entry:
+  %tobool = icmp eq i64 %c, 1
+  %neg = sub nsw i64 0, %x
+  br i1 %tobool, label %eq_bb, label %done
+
+eq_bb:
+  br label %done
+
+done:
+  %cond = phi i64 [ %neg, %eq_bb ], [ %y, %entry ]
+  ret i64 %cond
+}
+
+; CHECK: cbnz_32
+; CHECK: {{subs.*wzr,|cmp}} w2, #0
+; CHECK-NEXT: csel w0, w1, w0, ne
+; CHECK-NEXT: ret
+define i32 @cbnz_32(i32 %x, i32 %y, i32 %c) nounwind ssp {
+entry:
+  %tobool = icmp eq i32 %c, 0
+  br i1 %tobool, label %eq_bb, label %done
+
+eq_bb:
+  br label %done
+
+done:
+  %cond = phi i32 [ %x, %eq_bb ], [ %y, %entry ]
+  ret i32 %cond
+}
+
+; CHECK: cbnz_64
+; CHECK: {{subs.*xzr,|cmp}} x2, #0
+; CHECK-NEXT: csel x0, x1, x0, ne
+; CHECK-NEXT: ret
+define i64 @cbnz_64(i64 %x, i64 %y, i64 %c) nounwind ssp {
+entry:
+  %tobool = icmp eq i64 %c, 0
+  br i1 %tobool, label %eq_bb, label %done
+
+eq_bb:
+  br label %done
+
+done:
+  %cond = phi i64 [ %x, %eq_bb ], [ %y, %entry ]
+  ret i64 %cond
+}
+
+; CHECK: cbz_32
+; CHECK: {{subs.*wzr,|cmp}} w2, #0
+; CHECK-NEXT: csel w0, w1, w0, eq
+; CHECK-NEXT: ret
+define i32 @cbz_32(i32 %x, i32 %y, i32 %c) nounwind ssp {
+entry:
+  %tobool = icmp ne i32 %c, 0
+  br i1 %tobool, label %ne_bb, label %done
+
+ne_bb:
+  br label %done
+
+done:
+  %cond = phi i32 [ %x, %ne_bb ], [ %y, %entry ]
+  ret i32 %cond
+}
+
+; CHECK: cbz_64
+; CHECK: {{subs.*xzr,|cmp}} x2, #0
+; CHECK-NEXT: csel x0, x1, x0, eq
+; CHECK-NEXT: ret
+define i64 @cbz_64(i64 %x, i64 %y, i64 %c) nounwind ssp {
+entry:
+  %tobool = icmp ne i64 %c, 0
+  br i1 %tobool, label %ne_bb, label %done
+
+ne_bb:
+  br label %done
+
+done:
+  %cond = phi i64 [ %x, %ne_bb ], [ %y, %entry ]
+  ret i64 %cond
+}
+
+; CHECK: tbnz_32
+; CHECK: {{ands.*xzr,|tst}} w2, #0x80
+; CHECK-NEXT: csel w0, w1, w0, ne
+; CHECK-NEXT: ret
+define i32 @tbnz_32(i32 %x, i32 %y, i32 %c) nounwind ssp {
+entry:
+  %mask = and i32 %c, 128
+  %tobool = icmp eq i32 %mask, 0
+  br i1 %tobool, label %eq_bb, label %done
+
+eq_bb:
+  br label %done
+
+done:
+  %cond = phi i32 [ %x, %eq_bb ], [ %y, %entry ]
+  ret i32 %cond
+}
+
+; CHECK: tbnz_64
+; CHECK: {{ands.*xzr,|tst}} x2, #0x8000000000000000
+; CHECK-NEXT: csel x0, x1, x0, ne
+; CHECK-NEXT: ret
+define i64 @tbnz_64(i64 %x, i64 %y, i64 %c) nounwind ssp {
+entry:
+  %mask = and i64 %c, 9223372036854775808
+  %tobool = icmp eq i64 %mask, 0
+  br i1 %tobool, label %eq_bb, label %done
+
+eq_bb:
+  br label %done
+
+done:
+  %cond = phi i64 [ %x, %eq_bb ], [ %y, %entry ]
+  ret i64 %cond
+}
+
+; CHECK: tbz_32
+; CHECK: {{ands.*xzr,|tst}} w2, #0x80
+; CHECK-NEXT: csel w0, w1, w0, eq
+; CHECK-NEXT: ret
+define i32 @tbz_32(i32 %x, i32 %y, i32 %c) nounwind ssp {
+entry:
+  %mask = and i32 %c, 128
+  %tobool = icmp ne i32 %mask, 0
+  br i1 %tobool, label %ne_bb, label %done
+
+ne_bb:
+  br label %done
+
+done:
+  %cond = phi i32 [ %x, %ne_bb ], [ %y, %entry ]
+  ret i32 %cond
+}
+
+; CHECK: tbz_64
+; CHECK: {{ands.*xzr,|tst}} x2, #0x8000000000000000
+; CHECK-NEXT: csel x0, x1, x0, eq
+; CHECK-NEXT: ret
+define i64 @tbz_64(i64 %x, i64 %y, i64 %c) nounwind ssp {
+entry:
+  %mask = and i64 %c, 9223372036854775808
+  %tobool = icmp ne i64 %mask, 0
+  br i1 %tobool, label %ne_bb, label %done
+
+ne_bb:
+  br label %done
+
+done:
+  %cond = phi i64 [ %x, %ne_bb ], [ %y, %entry ]
+  ret i64 %cond
+}
+
+; This function from 175.vpr folds an ADDWri into a CSINC.
+; Remember to clear the kill flag on the ADDWri.
+define i32 @get_ytrack_to_xtracks() nounwind ssp {
+entry:
+  br label %for.body
+
+for.body:
+  %x0 = load i32* undef, align 4
+  br i1 undef, label %if.then.i146, label %is_sbox.exit155
+
+if.then.i146:
+  %add8.i143 = add nsw i32 0, %x0
+  %rem.i144 = srem i32 %add8.i143, %x0
+  %add9.i145 = add i32 %rem.i144, 1
+  br label %is_sbox.exit155
+
+is_sbox.exit155:                                  ; preds = %if.then.i146, %for.body
+  %seg_offset.0.i151 = phi i32 [ %add9.i145, %if.then.i146 ], [ undef, %for.body ]
+  %idxprom15.i152 = sext i32 %seg_offset.0.i151 to i64
+  %arrayidx18.i154 = getelementptr inbounds i32* null, i64 %idxprom15.i152
+  %x1 = load i32* %arrayidx18.i154, align 4
+  br i1 undef, label %for.body51, label %for.body
+
+for.body51:                                       ; preds = %is_sbox.exit155
+  call fastcc void @get_switch_type(i32 %x1, i32 undef, i16 signext undef, i16 signext undef, i16* undef)
+  unreachable
+}
+declare fastcc void @get_switch_type(i32, i32, i16 signext, i16 signext, i16* nocapture) nounwind ssp
diff --git a/test/CodeGen/ARM64/elf-calls.ll b/test/CodeGen/AArch64/arm64-elf-calls.ll
index 8c40203..8c40203 100644
--- a/test/CodeGen/ARM64/elf-calls.ll
+++ b/test/CodeGen/AArch64/arm64-elf-calls.ll
diff --git a/test/CodeGen/ARM64/elf-constpool.ll b/test/CodeGen/AArch64/arm64-elf-constpool.ll
index 95d3343..95d3343 100644
--- a/test/CodeGen/ARM64/elf-constpool.ll
+++ b/test/CodeGen/AArch64/arm64-elf-constpool.ll
diff --git a/test/CodeGen/AArch64/arm64-elf-globals.ll b/test/CodeGen/AArch64/arm64-elf-globals.ll
new file mode 100644
index 0000000..4ed44e7
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-elf-globals.ll
@@ -0,0 +1,115 @@
+; RUN: llc -mtriple=arm64-linux-gnu -o - %s -mcpu=cyclone | FileCheck %s
+; RUN: llc -mtriple=arm64-linux-gnu -o - %s -O0 -mcpu=cyclone | FileCheck %s --check-prefix=CHECK-FAST
+; RUN: llc -mtriple=arm64-linux-gnu -relocation-model=pic -o - %s -mcpu=cyclone | FileCheck %s --check-prefix=CHECK-PIC
+; RUN: llc -mtriple=arm64-linux-gnu -O0 -relocation-model=pic -o - %s -mcpu=cyclone | FileCheck %s --check-prefix=CHECK-FAST-PIC
+
+@var8 = external global i8, align 1
+@var16 = external global i16, align 2
+@var32 = external global i32, align 4
+@var64 = external global i64, align 8
+
+define i8 @test_i8(i8 %new) {
+  %val = load i8* @var8, align 1
+  store i8 %new, i8* @var8
+  ret i8 %val
+; CHECK-LABEL: test_i8:
+; CHECK: adrp x[[HIREG:[0-9]+]], var8
+; CHECK: ldrb {{w[0-9]+}}, [x[[HIREG]], :lo12:var8]
+; CHECK: strb {{w[0-9]+}}, [x[[HIREG]], :lo12:var8]
+
+; CHECK-PIC-LABEL: test_i8:
+; CHECK-PIC: adrp x[[HIREG:[0-9]+]], :got:var8
+; CHECK-PIC: ldr x[[VAR_ADDR:[0-9]+]], [x[[HIREG]], :got_lo12:var8]
+; CHECK-PIC: ldrb {{w[0-9]+}}, [x[[VAR_ADDR]]]
+
+; CHECK-FAST: adrp x[[HIREG:[0-9]+]], var8
+; CHECK-FAST: ldrb {{w[0-9]+}}, [x[[HIREG]], :lo12:var8]
+
+; CHECK-FAST-PIC: adrp x[[HIREG:[0-9]+]], :got:var8
+; CHECK-FAST-PIC: ldr x[[VARADDR:[0-9]+]], [x[[HIREG]], :got_lo12:var8]
+; CHECK-FAST-PIC: ldr {{w[0-9]+}}, [x[[VARADDR]]]
+}
+
+define i16 @test_i16(i16 %new) {
+  %val = load i16* @var16, align 2
+  store i16 %new, i16* @var16
+  ret i16 %val
+; CHECK-LABEL: test_i16:
+; CHECK: adrp x[[HIREG:[0-9]+]], var16
+; CHECK: ldrh {{w[0-9]+}}, [x[[HIREG]], :lo12:var16]
+; CHECK: strh {{w[0-9]+}}, [x[[HIREG]], :lo12:var16]
+
+; CHECK-FAST: adrp x[[HIREG:[0-9]+]], var16
+; CHECK-FAST: ldrh {{w[0-9]+}}, [x[[HIREG]], :lo12:var16]
+}
+
+define i32 @test_i32(i32 %new) {
+  %val = load i32* @var32, align 4
+  store i32 %new, i32* @var32
+  ret i32 %val
+; CHECK-LABEL: test_i32:
+; CHECK: adrp x[[HIREG:[0-9]+]], var32
+; CHECK: ldr {{w[0-9]+}}, [x[[HIREG]], :lo12:var32]
+; CHECK: str {{w[0-9]+}}, [x[[HIREG]], :lo12:var32]
+
+; CHECK-FAST: adrp x[[HIREG:[0-9]+]], var32
+; CHECK-FAST: add {{x[0-9]+}}, x[[HIREG]], :lo12:var32
+}
+
+define i64 @test_i64(i64 %new) {
+  %val = load i64* @var64, align 8
+  store i64 %new, i64* @var64
+  ret i64 %val
+; CHECK-LABEL: test_i64:
+; CHECK: adrp x[[HIREG:[0-9]+]], var64
+; CHECK: ldr {{x[0-9]+}}, [x[[HIREG]], :lo12:var64]
+; CHECK: str {{x[0-9]+}}, [x[[HIREG]], :lo12:var64]
+
+; CHECK-FAST: adrp x[[HIREG:[0-9]+]], var64
+; CHECK-FAST: add {{x[0-9]+}}, x[[HIREG]], :lo12:var64
+}
+
+define i64* @test_addr() {
+  ret i64* @var64
+; CHECK-LABEL: test_addr:
+; CHECK: adrp [[HIREG:x[0-9]+]], var64
+; CHECK: add x0, [[HIREG]], :lo12:var64
+
+; CHECK-FAST: adrp [[HIREG:x[0-9]+]], var64
+; CHECK-FAST: add x0, [[HIREG]], :lo12:var64
+}
+
+@hiddenvar = hidden global i32 0, align 4
+@protectedvar = protected global i32 0, align 4
+
+define i32 @test_vis() {
+  %lhs = load i32* @hiddenvar, align 4
+  %rhs = load i32* @protectedvar, align 4
+  %ret = add i32 %lhs, %rhs
+  ret i32 %ret
+; CHECK-PIC: adrp {{x[0-9]+}}, hiddenvar
+; CHECK-PIC: ldr {{w[0-9]+}}, [{{x[0-9]+}}, :lo12:hiddenvar]
+; CHECK-PIC: adrp {{x[0-9]+}}, protectedvar
+; CHECK-PIC: ldr {{w[0-9]+}}, [{{x[0-9]+}}, :lo12:protectedvar]
+}
+
+@var_default = external global [2 x i32]
+
+define i32 @test_default_align() {
+  %addr = getelementptr [2 x i32]* @var_default, i32 0, i32 0
+  %val = load i32* %addr
+  ret i32 %val
+; CHECK-LABEL: test_default_align:
+; CHECK: adrp x[[HIREG:[0-9]+]], var_default
+; CHECK: ldr w0, [x[[HIREG]], :lo12:var_default]
+}
+
+define i64 @test_default_unaligned() {
+  %addr = bitcast [2 x i32]* @var_default to i64*
+  %val = load i64* %addr
+  ret i64 %val
+; CHECK-LABEL: test_default_unaligned:
+; CHECK: adrp [[HIREG:x[0-9]+]], var_default
+; CHECK: add x[[ADDR:[0-9]+]], [[HIREG]], :lo12:var_default
+; CHECK: ldr x0, [x[[ADDR]]]
+}
diff --git a/test/CodeGen/AArch64/arm64-ext.ll b/test/CodeGen/AArch64/arm64-ext.ll
new file mode 100644
index 0000000..67860de
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-ext.ll
@@ -0,0 +1,118 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+define <8 x i8> @test_vextd(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: test_vextd:
+;CHECK: {{ext.8b.*#3}}
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
+	ret <8 x i8> %tmp3
+}
+
+define <8 x i8> @test_vextRd(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: test_vextRd:
+;CHECK: {{ext.8b.*#5}}
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4>
+	ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @test_vextq(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: test_vextq:
+;CHECK: {{ext.16b.*3}}
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
+	ret <16 x i8> %tmp3
+}
+
+define <16 x i8> @test_vextRq(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: test_vextRq:
+;CHECK: {{ext.16b.*7}}
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
+	ret <16 x i8> %tmp3
+}
+
+define <4 x i16> @test_vextd16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: test_vextd16:
+;CHECK: {{ext.8b.*#6}}
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+	ret <4 x i16> %tmp3
+}
+
+define <4 x i32> @test_vextq32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: test_vextq32:
+;CHECK: {{ext.16b.*12}}
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+	ret <4 x i32> %tmp3
+}
+
+; Undef shuffle indices should not prevent matching to VEXT:
+
+define <8 x i8> @test_vextd_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: test_vextd_undef:
+;CHECK: {{ext.8b.*}}
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 3, i32 undef, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10>
+	ret <8 x i8> %tmp3
+}
+
+define <8 x i8> @test_vextd_undef2(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: test_vextd_undef2:
+;CHECK: {{ext.8b.*#6}}
+  %tmp1 = load <8 x i8>* %A
+  %tmp2 = load <8 x i8>* %B
+  %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 3, i32 4, i32 5>
+  ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @test_vextRq_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: test_vextRq_undef:
+;CHECK: {{ext.16b.*#7}}
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 23, i32 24, i32 25, i32 26, i32 undef, i32 undef, i32 29, i32 30, i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 undef, i32 6>
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @test_vextRq_undef2(<8 x i16>* %A) nounwind {
+;CHECK-LABEL: test_vextRq_undef2:
+;CHECK: {{ext.16b.*#10}}
+  %tmp1 = load <8 x i16>* %A
+  %vext = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 4>
+  ret <8 x i16> %vext;
+}
+
+; Tests for ReconstructShuffle function. Indices have to be carefully
+; chosen to reach lowering phase as a BUILD_VECTOR.
+
+; One vector needs vext, the other can be handled by extract_subvector
+; Also checks interleaving of sources is handled correctly.
+; Essence: a vext is used on %A and something saner than stack load/store for final result.
+define <4 x i16> @test_interleaved(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: test_interleaved:
+;CHECK: ext.8b
+;CHECK: zip1.4h
+        %tmp1 = load <8 x i16>* %A
+        %tmp2 = load <8 x i16>* %B
+        %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <4 x i32> <i32 3, i32 8, i32 5, i32 9>
+        ret <4 x i16> %tmp3
+}
+
+; An undef in the shuffle list should still be optimizable
+define <4 x i16> @test_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: test_undef:
+;CHECK: zip1.4h
+        %tmp1 = load <8 x i16>* %A
+        %tmp2 = load <8 x i16>* %B
+        %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <4 x i32> <i32 undef, i32 8, i32 5, i32 9>
+        ret <4 x i16> %tmp3
+}
diff --git a/test/CodeGen/AArch64/arm64-extend-int-to-fp.ll b/test/CodeGen/AArch64/arm64-extend-int-to-fp.ll
new file mode 100644
index 0000000..048fdb0
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-extend-int-to-fp.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+define <4 x float> @foo(<4 x i16> %a) nounwind {
+; CHECK-LABEL: foo:
+; CHECK: ushll.4s	v0, v0, #0
+; CHECK-NEXT: ucvtf.4s	v0, v0
+; CHECK-NEXT: ret
+  %vcvt.i = uitofp <4 x i16> %a to <4 x float>
+  ret <4 x float> %vcvt.i
+}
+
+define <4 x float> @bar(<4 x i16> %a) nounwind {
+; CHECK-LABEL: bar:
+; CHECK: sshll.4s	v0, v0, #0
+; CHECK-NEXT: scvtf.4s	v0, v0
+; CHECK-NEXT: ret
+  %vcvt.i = sitofp <4 x i16> %a to <4 x float>
+  ret <4 x float> %vcvt.i
+}
diff --git a/test/CodeGen/AArch64/arm64-extend.ll b/test/CodeGen/AArch64/arm64-extend.ll
new file mode 100644
index 0000000..afcaca2
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-extend.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=arm64-apple-ios | FileCheck %s
+@array = external global [0 x i32]
+
+define i64 @foo(i32 %i) {
+; CHECK: foo
+; CHECK:  adrp  x[[REG:[0-9]+]], _array@GOTPAGE
+; CHECK:  ldr x[[REG1:[0-9]+]], [x[[REG]], _array@GOTPAGEOFF]
+; CHECK:  ldrsw x0, [x[[REG1]], w0, sxtw #2]
+; CHECK:  ret
+  %idxprom = sext i32 %i to i64
+  %arrayidx = getelementptr inbounds [0 x i32]* @array, i64 0, i64 %idxprom
+  %tmp1 = load i32* %arrayidx, align 4
+  %conv = sext i32 %tmp1 to i64
+  ret i64 %conv
+}
diff --git a/test/CodeGen/ARM64/extern-weak.ll b/test/CodeGen/AArch64/arm64-extern-weak.ll
index a239403..a239403 100644
--- a/test/CodeGen/ARM64/extern-weak.ll
+++ b/test/CodeGen/AArch64/arm64-extern-weak.ll
diff --git a/test/CodeGen/ARM64/extload-knownzero.ll b/test/CodeGen/AArch64/arm64-extload-knownzero.ll
index 14e5fd3..14e5fd3 100644
--- a/test/CodeGen/ARM64/extload-knownzero.ll
+++ b/test/CodeGen/AArch64/arm64-extload-knownzero.ll
diff --git a/test/CodeGen/AArch64/arm64-extract.ll b/test/CodeGen/AArch64/arm64-extract.ll
new file mode 100644
index 0000000..0198466
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-extract.ll
@@ -0,0 +1,58 @@
+; RUN: llc -aarch64-extr-generation=true -verify-machineinstrs < %s \
+; RUN: -march=arm64 | FileCheck %s
+
+define i64 @ror_i64(i64 %in) {
+; CHECK-LABEL: ror_i64:
+    %left = shl i64 %in, 19
+    %right = lshr i64 %in, 45
+    %val5 = or i64 %left, %right
+; CHECK: ror {{x[0-9]+}}, x0, #45
+    ret i64 %val5
+}
+
+define i32 @ror_i32(i32 %in) {
+; CHECK-LABEL: ror_i32:
+    %left = shl i32 %in, 9
+    %right = lshr i32 %in, 23
+    %val5 = or i32 %left, %right
+; CHECK: ror {{w[0-9]+}}, w0, #23
+    ret i32 %val5
+}
+
+define i32 @extr_i32(i32 %lhs, i32 %rhs) {
+; CHECK-LABEL: extr_i32:
+  %left = shl i32 %lhs, 6
+  %right = lshr i32 %rhs, 26
+  %val = or i32 %left, %right
+  ; Order of lhs and rhs matters here. Regalloc would have to be very odd to use
+  ; something other than w0 and w1.
+; CHECK: extr {{w[0-9]+}}, w0, w1, #26
+
+  ret i32 %val
+}
+
+define i64 @extr_i64(i64 %lhs, i64 %rhs) {
+; CHECK-LABEL: extr_i64:
+  %right = lshr i64 %rhs, 40
+  %left = shl i64 %lhs, 24
+  %val = or i64 %right, %left
+  ; Order of lhs and rhs matters here. Regalloc would have to be very odd to use
+  ; something other than w0 and w1.
+; CHECK: extr {{x[0-9]+}}, x0, x1, #40
+
+  ret i64 %val
+}
+
+; Regression test: a bad experimental pattern crept into git which optimised
+; this pattern to a single EXTR.
+define i32 @extr_regress(i32 %a, i32 %b) {
+; CHECK-LABEL: extr_regress:
+
+    %sh1 = shl i32 %a, 14
+    %sh2 = lshr i32 %b, 14
+    %val = or i32 %sh2, %sh1
+; CHECK-NOT: extr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, #{{[0-9]+}}
+
+    ret i32 %val
+; CHECK: ret
+}
diff --git a/test/CodeGen/AArch64/arm64-extract_subvector.ll b/test/CodeGen/AArch64/arm64-extract_subvector.ll
new file mode 100644
index 0000000..8b15a64
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-extract_subvector.ll
@@ -0,0 +1,51 @@
+; RUN: llc -march=arm64 -aarch64-neon-syntax=apple < %s | FileCheck %s
+
+; Extract of an upper half of a vector is an "ext.16b v0, v0, v0, #8" insn.
+
+define <8 x i8> @v8i8(<16 x i8> %a) nounwind {
+; CHECK: v8i8
+; CHECK: ext.16b v0, v0, v0, #8
+; CHECK: ret
+  %ret = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32>  <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret <8 x i8> %ret
+}
+
+define <4 x i16> @v4i16(<8 x i16> %a) nounwind {
+; CHECK-LABEL: v4i16:
+; CHECK: ext.16b v0, v0, v0, #8
+; CHECK: ret
+  %ret = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32>  <i32 4, i32 5, i32 6, i32 7>
+  ret <4 x i16> %ret
+}
+
+define <2 x i32> @v2i32(<4 x i32> %a) nounwind {
+; CHECK-LABEL: v2i32:
+; CHECK: ext.16b v0, v0, v0, #8
+; CHECK: ret
+  %ret = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32>  <i32 2, i32 3>
+  ret <2 x i32> %ret
+}
+
+define <1 x i64> @v1i64(<2 x i64> %a) nounwind {
+; CHECK-LABEL: v1i64:
+; CHECK: ext.16b v0, v0, v0, #8
+; CHECK: ret
+  %ret = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32>  <i32 1>
+  ret <1 x i64> %ret
+}
+
+define <2 x float> @v2f32(<4 x float> %a) nounwind {
+; CHECK-LABEL: v2f32:
+; CHECK: ext.16b v0, v0, v0, #8
+; CHECK: ret
+  %ret = shufflevector <4 x float> %a, <4 x float> %a, <2 x i32>  <i32 2, i32 3>
+  ret <2 x float> %ret
+}
+
+define <1 x double> @v1f64(<2 x double> %a) nounwind {
+; CHECK-LABEL: v1f64:
+; CHECK: ext.16b v0, v0, v0, #8
+; CHECK: ret
+  %ret = shufflevector <2 x double> %a, <2 x double> %a, <1 x i32>  <i32 1>
+  ret <1 x double> %ret
+}
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-addr-offset.ll b/test/CodeGen/AArch64/arm64-fast-isel-addr-offset.ll
new file mode 100644
index 0000000..ebd847e
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-fast-isel-addr-offset.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin | FileCheck %s
+
+@sortlist = common global [5001 x i32] zeroinitializer, align 16
+@sortlist2 = common global [5001 x i64] zeroinitializer, align 16
+
+; Load an address with an offset larget then LDR imm can handle
+define i32 @foo() nounwind {
+entry:
+; CHECK: @foo
+; CHECK: adrp x[[REG:[0-9]+]], _sortlist@GOTPAGE
+; CHECK: ldr x[[REG1:[0-9]+]], [x[[REG]], _sortlist@GOTPAGEOFF]
+; CHECK: movz x[[REG2:[0-9]+]], #0x4e20
+; CHECK: add x[[REG3:[0-9]+]], x[[REG1]], x[[REG2]]
+; CHECK: ldr w0, [x[[REG3]]]
+; CHECK: ret
+  %0 = load i32* getelementptr inbounds ([5001 x i32]* @sortlist, i32 0, i64 5000), align 4
+  ret i32 %0
+}
+
+define i64 @foo2() nounwind {
+entry:
+; CHECK: @foo2
+; CHECK: adrp x[[REG:[0-9]+]], _sortlist2@GOTPAGE
+; CHECK: ldr x[[REG1:[0-9]+]], [x[[REG]], _sortlist2@GOTPAGEOFF]
+; CHECK: movz x[[REG2:[0-9]+]], #0x9c40
+; CHECK: add x[[REG3:[0-9]+]], x[[REG1]], x[[REG2]]
+; CHECK: ldr x0, [x[[REG3]]]
+; CHECK: ret
+  %0 = load i64* getelementptr inbounds ([5001 x i64]* @sortlist2, i32 0, i64 5000), align 4
+  ret i64 %0
+}
+
+; Load an address with a ridiculously large offset.
+; rdar://12505553
+@pd2 = common global i8* null, align 8
+
+define signext i8 @foo3() nounwind ssp {
+entry:
+; CHECK: @foo3
+; CHECK: movz x[[REG:[0-9]+]], #0xb3a, lsl #32
+; CHECK: movk x[[REG]], #0x73ce, lsl #16
+; CHECK: movk x[[REG]], #0x2ff2
+  %0 = load i8** @pd2, align 8
+  %arrayidx = getelementptr inbounds i8* %0, i64 12345678901234
+  %1 = load i8* %arrayidx, align 1
+  ret i8 %1
+}
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-alloca.ll b/test/CodeGen/AArch64/arm64-fast-isel-alloca.ll
new file mode 100644
index 0000000..1706e9e
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-fast-isel-alloca.ll
@@ -0,0 +1,25 @@
+; This test should cause the TargetMaterializeAlloca to be invoked
+; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin | FileCheck %s
+
+%struct.S1Ty = type { i64 }
+%struct.S2Ty = type { %struct.S1Ty, %struct.S1Ty }
+
+define void @takeS1(%struct.S1Ty* %V) nounwind {
+entry:
+  %V.addr = alloca %struct.S1Ty*, align 8
+  store %struct.S1Ty* %V, %struct.S1Ty** %V.addr, align 8
+  ret void
+}
+
+define void @main() nounwind {
+entry:
+; CHECK: main
+; CHECK: mov x29, sp
+; CHECK: mov x[[REG:[0-9]+]], sp
+; CHECK-NEXT: orr x[[REG1:[0-9]+]], xzr, #0x8
+; CHECK-NEXT: add x0, x[[REG]], x[[REG1]]
+  %E = alloca %struct.S2Ty, align 4
+  %B = getelementptr inbounds %struct.S2Ty* %E, i32 0, i32 1
+  call void @takeS1(%struct.S1Ty* %B)
+  ret void
+}
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-br.ll b/test/CodeGen/AArch64/arm64-fast-isel-br.ll
new file mode 100644
index 0000000..37a8295
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-fast-isel-br.ll
@@ -0,0 +1,155 @@
+; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin -mcpu=cyclone | FileCheck %s
+
+define void @branch1() nounwind uwtable ssp {
+  %x = alloca i32, align 4
+  store i32 0, i32* %x, align 4
+  %1 = load i32* %x, align 4
+  %2 = icmp ne i32 %1, 0
+  br i1 %2, label %3, label %4
+
+; <label>:3                                       ; preds = %0
+  br label %4
+
+; <label>:4                                       ; preds = %3, %0
+  ret void
+}
+
+define void @branch2() nounwind uwtable ssp {
+  %1 = alloca i32, align 4
+  %x = alloca i32, align 4
+  %y = alloca i32, align 4
+  %z = alloca i32, align 4
+  store i32 0, i32* %1
+  store i32 1, i32* %y, align 4
+  store i32 1, i32* %x, align 4
+  store i32 0, i32* %z, align 4
+  %2 = load i32* %x, align 4
+  %3 = icmp ne i32 %2, 0
+  br i1 %3, label %4, label %5
+
+; <label>:4                                       ; preds = %0
+  store i32 0, i32* %1
+  br label %14
+
+; <label>:5                                       ; preds = %0
+  %6 = load i32* %y, align 4
+  %7 = icmp ne i32 %6, 0
+  br i1 %7, label %8, label %13
+
+; <label>:8                                       ; preds = %5
+  %9 = load i32* %z, align 4
+  %10 = icmp ne i32 %9, 0
+  br i1 %10, label %11, label %12
+
+; <label>:11                                      ; preds = %8
+  store i32 1, i32* %1
+  br label %14
+
+; <label>:12                                      ; preds = %8
+  store i32 0, i32* %1
+  br label %14
+
+; <label>:13                                      ; preds = %5
+  br label %14
+
+; <label>:14                                      ; preds = %4, %11, %12, %13
+  %15 = load i32* %1
+  ret void
+}
+
+define void @true_() nounwind uwtable ssp {
+; CHECK: @true_
+; CHECK: b LBB2_1
+  br i1 true, label %1, label %2
+
+; <label>:1
+; CHECK: LBB2_1
+  br label %2
+
+; <label>:2
+  ret void
+}
+
+define void @false_() nounwind uwtable ssp {
+; CHECK: @false_
+; CHECK: b LBB3_2
+  br i1 false, label %1, label %2
+
+; <label>:1
+  br label %2
+
+; <label>:2
+; CHECK: LBB3_2
+  ret void
+}
+
+define zeroext i8 @trunc_(i8 zeroext %a, i16 zeroext %b, i32 %c, i64 %d) {
+entry:
+  %a.addr = alloca i8, align 1
+  %b.addr = alloca i16, align 2
+  %c.addr = alloca i32, align 4
+  %d.addr = alloca i64, align 8
+  store i8 %a, i8* %a.addr, align 1
+  store i16 %b, i16* %b.addr, align 2
+  store i32 %c, i32* %c.addr, align 4
+  store i64 %d, i64* %d.addr, align 8
+  %0 = load i16* %b.addr, align 2
+; CHECK: and w0, w0, #0x1
+; CHECK: subs w0, w0, #0
+; CHECK: b.eq LBB4_2
+  %conv = trunc i16 %0 to i1
+  br i1 %conv, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  call void @foo1()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  %1 = load i32* %c.addr, align 4
+; CHECK: and w[[REG:[0-9]+]], w{{[0-9]+}}, #0x1
+; CHECK: subs w{{[0-9]+}}, w[[REG]], #0
+; CHECK: b.eq LBB4_4
+  %conv1 = trunc i32 %1 to i1
+  br i1 %conv1, label %if.then3, label %if.end4
+
+if.then3:                                         ; preds = %if.end
+  call void @foo1()
+  br label %if.end4
+
+if.end4:                                          ; preds = %if.then3, %if.end
+  %2 = load i64* %d.addr, align 8
+; CHECK: subs w{{[0-9]+}}, w{{[0-9]+}}, #0
+; CHECK: b.eq LBB4_6
+  %conv5 = trunc i64 %2 to i1
+  br i1 %conv5, label %if.then7, label %if.end8
+
+if.then7:                                         ; preds = %if.end4
+  call void @foo1()
+  br label %if.end8
+
+if.end8:                                          ; preds = %if.then7, %if.end4
+  %3 = load i8* %a.addr, align 1
+  ret i8 %3
+}
+
+declare void @foo1()
+
+; rdar://15174028
+define i32 @trunc64(i64 %foo) nounwind {
+; CHECK: trunc64
+; CHECK: orr  [[REG:x[0-9]+]], xzr, #0x1
+; CHECK: and  [[REG2:x[0-9]+]], x0, [[REG]]
+; CHECK: mov  x[[REG3:[0-9]+]], [[REG2]]
+; CHECK: and  [[REG4:w[0-9]+]], w[[REG3]], #0x1
+; CHECK: subs {{w[0-9]+}}, [[REG4]], #0
+; CHECK: b.eq LBB5_2
+  %a = and i64 %foo, 1
+  %b = trunc i64 %a to i1
+  br i1 %b, label %if.then, label %if.else
+
+if.then:
+  ret i32 1
+
+if.else:
+  ret i32 0
+}
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-call.ll b/test/CodeGen/AArch64/arm64-fast-isel-call.ll
new file mode 100644
index 0000000..8d756ae
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-fast-isel-call.ll
@@ -0,0 +1,100 @@
+; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin | FileCheck %s
+; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64_be-linux-gnu | FileCheck %s --check-prefix=CHECK-BE
+
+define void @call0() nounwind {
+entry:
+  ret void
+}
+
+define void @foo0() nounwind {
+entry:
+; CHECK: foo0
+; CHECK: bl _call0
+  call void @call0()
+  ret void
+}
+
+define i32 @call1(i32 %a) nounwind {
+entry:
+  %a.addr = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  %tmp = load i32* %a.addr, align 4
+  ret i32 %tmp
+}
+
+define i32 @foo1(i32 %a) nounwind {
+entry:
+; CHECK: foo1
+; CHECK: stur w0, [x29, #-4]
+; CHECK-NEXT: ldur w0, [x29, #-4]
+; CHECK-NEXT: bl _call1
+  %a.addr = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  %tmp = load i32* %a.addr, align 4
+  %call = call i32 @call1(i32 %tmp)
+  ret i32 %call
+}
+
+define i32 @sext_(i8 %a, i16 %b) nounwind {
+entry:
+; CHECK: @sext_
+; CHECK: sxtb w0, w0
+; CHECK: sxth w1, w1
+; CHECK: bl _foo_sext_
+  call void @foo_sext_(i8 signext %a, i16 signext %b)
+  ret i32 0
+}
+
+declare void @foo_sext_(i8 %a, i16 %b)
+
+define i32 @zext_(i8 %a, i16 %b) nounwind {
+entry:
+; CHECK: @zext_
+; CHECK: uxtb w0, w0
+; CHECK: uxth w1, w1
+  call void @foo_zext_(i8 zeroext %a, i16 zeroext %b)
+  ret i32 0
+}
+
+declare void @foo_zext_(i8 %a, i16 %b)
+
+define i32 @t1(i32 %argc, i8** nocapture %argv) {
+entry:
+; CHECK: @t1
+; The last parameter will be passed on stack via i8.
+; CHECK: strb w{{[0-9]+}}, [sp]
+; CHECK-NEXT: bl _bar
+  %call = call i32 @bar(i8 zeroext 0, i8 zeroext -8, i8 zeroext -69, i8 zeroext 28, i8 zeroext 40, i8 zeroext -70, i8 zeroext 28, i8 zeroext 39, i8 zeroext -41)
+  ret i32 0
+}
+
+declare i32 @bar(i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext)
+
+; Test materialization of integers.  Target-independent selector handles this.
+define i32 @t2() {
+entry:
+; CHECK: @t2
+; CHECK: movz x0, #0
+; CHECK: orr w1, wzr, #0xfffffff8
+; CHECK: orr w[[REG:[0-9]+]], wzr, #0x3ff
+; CHECK: orr w[[REG2:[0-9]+]], wzr, #0x2
+; CHECK: movz w[[REG3:[0-9]+]], #0
+; CHECK: orr w[[REG4:[0-9]+]], wzr, #0x1
+; CHECK: uxth w2, w[[REG]]
+; CHECK: sxtb w3, w[[REG2]]
+; CHECK: and w4, w[[REG3]], #0x1
+; CHECK: and w5, w[[REG4]], #0x1
+; CHECK: bl	_func2
+  %call = call i32 @func2(i64 zeroext 0, i32 signext -8, i16 zeroext 1023, i8 signext -254, i1 zeroext 0, i1 zeroext 1)
+  ret i32 0
+}
+
+declare i32 @func2(i64 zeroext, i32 signext, i16 zeroext, i8 signext, i1 zeroext, i1 zeroext)
+
+declare void @callee_b0f(i8 %bp10, i8 %bp11, i8 %bp12, i8 %bp13, i8 %bp14, i8 %bp15, i8 %bp17, i8 %bp18, i8 %bp19)
+define void @caller_b1f() {
+entry:
+  ; CHECK-BE: strb w{{.*}}, [sp, #7]
+  call void @callee_b0f(i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 42)
+  ret void
+}
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll b/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll
new file mode 100644
index 0000000..c5417de
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll
@@ -0,0 +1,442 @@
+; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin -mcpu=cyclone | FileCheck %s
+
+;; Test various conversions.
+define zeroext i32 @trunc_(i8 zeroext %a, i16 zeroext %b, i32 %c, i64 %d) nounwind ssp {
+entry:
+; CHECK: trunc_
+; CHECK: sub sp, sp, #16
+; CHECK: strb w0, [sp, #15]
+; CHECK: strh w1, [sp, #12]
+; CHECK: str w2, [sp, #8]
+; CHECK: str x3, [sp]
+; CHECK: ldr x3, [sp]
+; CHECK: mov x0, x3
+; CHECK: str w0, [sp, #8]
+; CHECK: ldr w0, [sp, #8]
+; CHECK: strh w0, [sp, #12]
+; CHECK: ldrh w0, [sp, #12]
+; CHECK: strb w0, [sp, #15]
+; CHECK: ldrb w0, [sp, #15]
+; CHECK: uxtb w0, w0
+; CHECK: add sp, sp, #16
+; CHECK: ret
+  %a.addr = alloca i8, align 1
+  %b.addr = alloca i16, align 2
+  %c.addr = alloca i32, align 4
+  %d.addr = alloca i64, align 8
+  store i8 %a, i8* %a.addr, align 1
+  store i16 %b, i16* %b.addr, align 2
+  store i32 %c, i32* %c.addr, align 4
+  store i64 %d, i64* %d.addr, align 8
+  %tmp = load i64* %d.addr, align 8
+  %conv = trunc i64 %tmp to i32
+  store i32 %conv, i32* %c.addr, align 4
+  %tmp1 = load i32* %c.addr, align 4
+  %conv2 = trunc i32 %tmp1 to i16
+  store i16 %conv2, i16* %b.addr, align 2
+  %tmp3 = load i16* %b.addr, align 2
+  %conv4 = trunc i16 %tmp3 to i8
+  store i8 %conv4, i8* %a.addr, align 1
+  %tmp5 = load i8* %a.addr, align 1
+  %conv6 = zext i8 %tmp5 to i32
+  ret i32 %conv6
+}
+
+define i64 @zext_(i8 zeroext %a, i16 zeroext %b, i32 %c, i64 %d) nounwind ssp {
+entry:
+; CHECK: zext_
+; CHECK: sub sp, sp, #16
+; CHECK: strb w0, [sp, #15]
+; CHECK: strh w1, [sp, #12]
+; CHECK: str w2, [sp, #8]
+; CHECK: str x3, [sp]
+; CHECK: ldrb w0, [sp, #15]
+; CHECK: uxtb w0, w0
+; CHECK: strh w0, [sp, #12]
+; CHECK: ldrh w0, [sp, #12]
+; CHECK: uxth w0, w0
+; CHECK: str w0, [sp, #8]
+; CHECK: ldr w0, [sp, #8]
+; CHECK: mov x3, x0
+; CHECK: ubfx x3, x3, #0, #32
+; CHECK: str x3, [sp]
+; CHECK: ldr x0, [sp]
+; CHECK: ret
+  %a.addr = alloca i8, align 1
+  %b.addr = alloca i16, align 2
+  %c.addr = alloca i32, align 4
+  %d.addr = alloca i64, align 8
+  store i8 %a, i8* %a.addr, align 1
+  store i16 %b, i16* %b.addr, align 2
+  store i32 %c, i32* %c.addr, align 4
+  store i64 %d, i64* %d.addr, align 8
+  %tmp = load i8* %a.addr, align 1
+  %conv = zext i8 %tmp to i16
+  store i16 %conv, i16* %b.addr, align 2
+  %tmp1 = load i16* %b.addr, align 2
+  %conv2 = zext i16 %tmp1 to i32
+  store i32 %conv2, i32* %c.addr, align 4
+  %tmp3 = load i32* %c.addr, align 4
+  %conv4 = zext i32 %tmp3 to i64
+  store i64 %conv4, i64* %d.addr, align 8
+  %tmp5 = load i64* %d.addr, align 8
+  ret i64 %tmp5
+}
+
+define i32 @zext_i1_i32(i1 zeroext %a) nounwind ssp {
+entry:
+; CHECK: @zext_i1_i32
+; CHECK: and w0, w0, #0x1
+  %conv = zext i1 %a to i32
+  ret i32 %conv;
+}
+
+define i64 @zext_i1_i64(i1 zeroext %a) nounwind ssp {
+entry:
+; CHECK: @zext_i1_i64
+; CHECK: and w0, w0, #0x1
+  %conv = zext i1 %a to i64
+  ret i64 %conv;
+}
+
+define i64 @sext_(i8 signext %a, i16 signext %b, i32 %c, i64 %d) nounwind ssp {
+entry:
+; CHECK: sext_
+; CHECK: sub sp, sp, #16
+; CHECK: strb w0, [sp, #15]
+; CHECK: strh w1, [sp, #12]
+; CHECK: str w2, [sp, #8]
+; CHECK: str x3, [sp]
+; CHECK: ldrb w0, [sp, #15]
+; CHECK: sxtb w0, w0
+; CHECK: strh w0, [sp, #12]
+; CHECK: ldrh w0, [sp, #12]
+; CHECK: sxth w0, w0
+; CHECK: str w0, [sp, #8]
+; CHECK: ldr w0, [sp, #8]
+; CHECK: mov x3, x0
+; CHECK: sxtw x3, w3
+; CHECK: str x3, [sp]
+; CHECK: ldr x0, [sp]
+; CHECK: ret
+  %a.addr = alloca i8, align 1
+  %b.addr = alloca i16, align 2
+  %c.addr = alloca i32, align 4
+  %d.addr = alloca i64, align 8
+  store i8 %a, i8* %a.addr, align 1
+  store i16 %b, i16* %b.addr, align 2
+  store i32 %c, i32* %c.addr, align 4
+  store i64 %d, i64* %d.addr, align 8
+  %tmp = load i8* %a.addr, align 1
+  %conv = sext i8 %tmp to i16
+  store i16 %conv, i16* %b.addr, align 2
+  %tmp1 = load i16* %b.addr, align 2
+  %conv2 = sext i16 %tmp1 to i32
+  store i32 %conv2, i32* %c.addr, align 4
+  %tmp3 = load i32* %c.addr, align 4
+  %conv4 = sext i32 %tmp3 to i64
+  store i64 %conv4, i64* %d.addr, align 8
+  %tmp5 = load i64* %d.addr, align 8
+  ret i64 %tmp5
+}
+
+; Test sext i8 to i64
+
+define zeroext i64 @sext_i8_i64(i8 zeroext %in) {
+; CHECK-LABEL: sext_i8_i64:
+; CHECK: mov x[[TMP:[0-9]+]], x0
+; CHECK: sxtb x0, w[[TMP]]
+  %big = sext i8 %in to i64
+  ret i64 %big
+}
+
+define zeroext i64 @sext_i16_i64(i16 zeroext %in) {
+; CHECK-LABEL: sext_i16_i64:
+; CHECK: mov x[[TMP:[0-9]+]], x0
+; CHECK: sxth x0, w[[TMP]]
+  %big = sext i16 %in to i64
+  ret i64 %big
+}
+
+; Test sext i1 to i32
+define i32 @sext_i1_i32(i1 signext %a) nounwind ssp {
+entry:
+; CHECK: sext_i1_i32
+; CHECK: sbfx w0, w0, #0, #1
+  %conv = sext i1 %a to i32
+  ret i32 %conv
+}
+
+; Test sext i1 to i16
+define signext i16 @sext_i1_i16(i1 %a) nounwind ssp {
+entry:
+; CHECK: sext_i1_i16
+; CHECK: sbfx w0, w0, #0, #1
+  %conv = sext i1 %a to i16
+  ret i16 %conv
+}
+
+; Test sext i1 to i8
+define signext i8 @sext_i1_i8(i1 %a) nounwind ssp {
+entry:
+; CHECK: sext_i1_i8
+; CHECK: sbfx w0, w0, #0, #1
+  %conv = sext i1 %a to i8
+  ret i8 %conv
+}
+
+; Test fpext
+define double @fpext_(float %a) nounwind ssp {
+entry:
+; CHECK: fpext_
+; CHECK: fcvt d0, s0
+  %conv = fpext float %a to double
+  ret double %conv
+}
+
+; Test fptrunc
+define float @fptrunc_(double %a) nounwind ssp {
+entry:
+; CHECK: fptrunc_
+; CHECK: fcvt s0, d0
+  %conv = fptrunc double %a to float
+  ret float %conv
+}
+
+; Test fptosi
+define i32 @fptosi_ws(float %a) nounwind ssp {
+entry:
+; CHECK: fptosi_ws
+; CHECK: fcvtzs w0, s0
+  %conv = fptosi float %a to i32
+  ret i32 %conv
+}
+
+; Test fptosi
+define i32 @fptosi_wd(double %a) nounwind ssp {
+entry:
+; CHECK: fptosi_wd
+; CHECK: fcvtzs w0, d0
+  %conv = fptosi double %a to i32
+  ret i32 %conv
+}
+
+; Test fptoui
+define i32 @fptoui_ws(float %a) nounwind ssp {
+entry:
+; CHECK: fptoui_ws
+; CHECK: fcvtzu w0, s0
+  %conv = fptoui float %a to i32
+  ret i32 %conv
+}
+
+; Test fptoui
+define i32 @fptoui_wd(double %a) nounwind ssp {
+entry:
+; CHECK: fptoui_wd
+; CHECK: fcvtzu w0, d0
+  %conv = fptoui double %a to i32
+  ret i32 %conv
+}
+
+; Test sitofp
+define float @sitofp_sw_i1(i1 %a) nounwind ssp {
+entry:
+; CHECK: sitofp_sw_i1
+; CHECK: sbfx w0, w0, #0, #1
+; CHECK: scvtf s0, w0
+  %conv = sitofp i1 %a to float
+  ret float %conv
+}
+
+; Test sitofp
+define float @sitofp_sw_i8(i8 %a) nounwind ssp {
+entry:
+; CHECK: sitofp_sw_i8
+; CHECK: sxtb w0, w0
+; CHECK: scvtf s0, w0
+  %conv = sitofp i8 %a to float
+  ret float %conv
+}
+
+; Test sitofp
+define float @sitofp_sw_i16(i16 %a) nounwind ssp {
+entry:
+; CHECK: sitofp_sw_i16
+; CHECK: sxth w0, w0
+; CHECK: scvtf s0, w0
+  %conv = sitofp i16 %a to float
+  ret float %conv
+}
+
+; Test sitofp
+define float @sitofp_sw(i32 %a) nounwind ssp {
+entry:
+; CHECK: sitofp_sw
+; CHECK: scvtf s0, w0
+  %conv = sitofp i32 %a to float
+  ret float %conv
+}
+
+; Test sitofp
+define float @sitofp_sx(i64 %a) nounwind ssp {
+entry:
+; CHECK: sitofp_sx
+; CHECK: scvtf s0, x0
+  %conv = sitofp i64 %a to float
+  ret float %conv
+}
+
+; Test sitofp
+define double @sitofp_dw(i32 %a) nounwind ssp {
+entry:
+; CHECK: sitofp_dw
+; CHECK: scvtf d0, w0
+  %conv = sitofp i32 %a to double
+  ret double %conv
+}
+
+; Test sitofp
+define double @sitofp_dx(i64 %a) nounwind ssp {
+entry:
+; CHECK: sitofp_dx
+; CHECK: scvtf d0, x0
+  %conv = sitofp i64 %a to double
+  ret double %conv
+}
+
+; Test uitofp
+define float @uitofp_sw_i1(i1 %a) nounwind ssp {
+entry:
+; CHECK: uitofp_sw_i1
+; CHECK: and w0, w0, #0x1
+; CHECK: ucvtf s0, w0
+  %conv = uitofp i1 %a to float
+  ret float %conv
+}
+
+; Test uitofp
+define float @uitofp_sw_i8(i8 %a) nounwind ssp {
+entry:
+; CHECK: uitofp_sw_i8
+; CHECK: uxtb w0, w0
+; CHECK: ucvtf s0, w0
+  %conv = uitofp i8 %a to float
+  ret float %conv
+}
+
+; Test uitofp
+define float @uitofp_sw_i16(i16 %a) nounwind ssp {
+entry:
+; CHECK: uitofp_sw_i16
+; CHECK: uxth w0, w0
+; CHECK: ucvtf s0, w0
+  %conv = uitofp i16 %a to float
+  ret float %conv
+}
+
+; Test uitofp
+define float @uitofp_sw(i32 %a) nounwind ssp {
+entry:
+; CHECK: uitofp_sw
+; CHECK: ucvtf s0, w0
+  %conv = uitofp i32 %a to float
+  ret float %conv
+}
+
+; Test uitofp
+define float @uitofp_sx(i64 %a) nounwind ssp {
+entry:
+; CHECK: uitofp_sx
+; CHECK: ucvtf s0, x0
+  %conv = uitofp i64 %a to float
+  ret float %conv
+}
+
+; Test uitofp
+define double @uitofp_dw(i32 %a) nounwind ssp {
+entry:
+; CHECK: uitofp_dw
+; CHECK: ucvtf d0, w0
+  %conv = uitofp i32 %a to double
+  ret double %conv
+}
+
+; Test uitofp
+define double @uitofp_dx(i64 %a) nounwind ssp {
+entry:
+; CHECK: uitofp_dx
+; CHECK: ucvtf d0, x0
+  %conv = uitofp i64 %a to double
+  ret double %conv
+}
+
+define i32 @i64_trunc_i32(i64 %a) nounwind ssp {
+entry:
+; CHECK: i64_trunc_i32
+; CHECK: mov x1, x0
+  %conv = trunc i64 %a to i32
+  ret i32 %conv
+}
+
+define zeroext i16 @i64_trunc_i16(i64 %a) nounwind ssp {
+entry:
+; CHECK: i64_trunc_i16
+; CHECK: mov x[[REG:[0-9]+]], x0
+; CHECK: and [[REG2:w[0-9]+]], w[[REG]], #0xffff
+; CHECK: uxth w0, [[REG2]]
+  %conv = trunc i64 %a to i16
+  ret i16 %conv
+}
+
+define zeroext i8 @i64_trunc_i8(i64 %a) nounwind ssp {
+entry:
+; CHECK: i64_trunc_i8
+; CHECK: mov x[[REG:[0-9]+]], x0
+; CHECK: and [[REG2:w[0-9]+]], w[[REG]], #0xff
+; CHECK: uxtb w0, [[REG2]]
+  %conv = trunc i64 %a to i8
+  ret i8 %conv
+}
+
+define zeroext i1 @i64_trunc_i1(i64 %a) nounwind ssp {
+entry:
+; CHECK: i64_trunc_i1
+; CHECK: mov x[[REG:[0-9]+]], x0
+; CHECK: and [[REG2:w[0-9]+]], w[[REG]], #0x1
+; CHECK: and w0, [[REG2]], #0x1
+  %conv = trunc i64 %a to i1
+  ret i1 %conv
+}
+
+; rdar://15101939
+define void @stack_trunc() nounwind {
+; CHECK: stack_trunc
+; CHECK: sub  sp, sp, #16
+; CHECK: ldr  [[REG:x[0-9]+]], [sp]
+; CHECK: mov  x[[REG2:[0-9]+]], [[REG]]
+; CHECK: and  [[REG3:w[0-9]+]], w[[REG2]], #0xff
+; CHECK: strb [[REG3]], [sp, #15]
+; CHECK: add  sp, sp, #16
+  %a = alloca i8, align 1
+  %b = alloca i64, align 8
+  %c = load i64* %b, align 8
+  %d = trunc i64 %c to i8
+  store i8 %d, i8* %a, align 1
+  ret void
+}
+
+define zeroext i64 @zext_i8_i64(i8 zeroext %in) {
+; CHECK-LABEL: zext_i8_i64:
+; CHECK: mov x[[TMP:[0-9]+]], x0
+; CHECK: ubfx x0, x[[TMP]], #0, #8
+  %big = zext i8 %in to i64
+  ret i64 %big
+}
+define zeroext i64 @zext_i16_i64(i16 zeroext %in) {
+; CHECK-LABEL: zext_i16_i64:
+; CHECK: mov x[[TMP:[0-9]+]], x0
+; CHECK: ubfx x0, x[[TMP]], #0, #16
+  %big = zext i16 %in to i64
+  ret i64 %big
+}
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-fcmp.ll b/test/CodeGen/AArch64/arm64-fast-isel-fcmp.ll
new file mode 100644
index 0000000..f030596
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-fast-isel-fcmp.ll
@@ -0,0 +1,146 @@
+; RUN: llc < %s -O0 -fast-isel-abort -verify-machineinstrs -mtriple=arm64-apple-darwin | FileCheck %s
+
+define zeroext i1 @fcmp_float1(float %a) nounwind ssp {
+entry:
+; CHECK-LABEL: @fcmp_float1
+; CHECK: fcmp s0, #0.0
+; CHECK: cset w{{[0-9]+}}, ne
+  %cmp = fcmp une float %a, 0.000000e+00
+  ret i1 %cmp
+}
+
+define zeroext i1 @fcmp_float2(float %a, float %b) nounwind ssp {
+entry:
+; CHECK-LABEL: @fcmp_float2
+; CHECK: fcmp s0, s1
+; CHECK: cset w{{[0-9]+}}, ne
+  %cmp = fcmp une float %a, %b
+  ret i1 %cmp
+}
+
+define zeroext i1 @fcmp_double1(double %a) nounwind ssp {
+entry:
+; CHECK-LABEL: @fcmp_double1
+; CHECK: fcmp d0, #0.0
+; CHECK: cset w{{[0-9]+}}, ne
+  %cmp = fcmp une double %a, 0.000000e+00
+  ret i1 %cmp
+}
+
+define zeroext i1 @fcmp_double2(double %a, double %b) nounwind ssp {
+entry:
+; CHECK-LABEL: @fcmp_double2
+; CHECK: fcmp d0, d1
+; CHECK: cset w{{[0-9]+}}, ne
+  %cmp = fcmp une double %a, %b
+  ret i1 %cmp
+}
+
+; Check each fcmp condition
+define float @fcmp_oeq(float %a, float %b) nounwind ssp {
+; CHECK-LABEL: @fcmp_oeq
+; CHECK: fcmp s0, s1
+; CHECK: cset w{{[0-9]+}}, eq
+  %cmp = fcmp oeq float %a, %b
+  %conv = uitofp i1 %cmp to float
+  ret float %conv
+}
+
+define float @fcmp_ogt(float %a, float %b) nounwind ssp {
+; CHECK-LABEL: @fcmp_ogt
+; CHECK: fcmp s0, s1
+; CHECK: cset w{{[0-9]+}}, gt
+  %cmp = fcmp ogt float %a, %b
+  %conv = uitofp i1 %cmp to float
+  ret float %conv
+}
+
+define float @fcmp_oge(float %a, float %b) nounwind ssp {
+; CHECK-LABEL: @fcmp_oge
+; CHECK: fcmp s0, s1
+; CHECK: cset w{{[0-9]+}}, ge
+  %cmp = fcmp oge float %a, %b
+  %conv = uitofp i1 %cmp to float
+  ret float %conv
+}
+
+define float @fcmp_olt(float %a, float %b) nounwind ssp {
+; CHECK-LABEL: @fcmp_olt
+; CHECK: fcmp s0, s1
+; CHECK: cset w{{[0-9]+}}, mi
+  %cmp = fcmp olt float %a, %b
+  %conv = uitofp i1 %cmp to float
+  ret float %conv
+}
+
+define float @fcmp_ole(float %a, float %b) nounwind ssp {
+; CHECK-LABEL: @fcmp_ole
+; CHECK: fcmp s0, s1
+; CHECK: cset w{{[0-9]+}}, ls
+  %cmp = fcmp ole float %a, %b
+  %conv = uitofp i1 %cmp to float
+  ret float %conv
+}
+
+define float @fcmp_ord(float %a, float %b) nounwind ssp {
+; CHECK-LABEL: @fcmp_ord
+; CHECK: fcmp s0, s1
+; CHECK: cset {{w[0-9]+}}, vc
+  %cmp = fcmp ord float %a, %b
+  %conv = uitofp i1 %cmp to float
+  ret float %conv
+}
+
+define float @fcmp_uno(float %a, float %b) nounwind ssp {
+; CHECK-LABEL: @fcmp_uno
+; CHECK: fcmp s0, s1
+; CHECK: cset {{w[0-9]+}}, vs
+  %cmp = fcmp uno float %a, %b
+  %conv = uitofp i1 %cmp to float
+  ret float %conv
+}
+
+define float @fcmp_ugt(float %a, float %b) nounwind ssp {
+; CHECK-LABEL: @fcmp_ugt
+; CHECK: fcmp s0, s1
+; CHECK: cset {{w[0-9]+}}, hi
+  %cmp = fcmp ugt float %a, %b
+  %conv = uitofp i1 %cmp to float
+  ret float %conv
+}
+
+define float @fcmp_uge(float %a, float %b) nounwind ssp {
+; CHECK-LABEL: @fcmp_uge
+; CHECK: fcmp s0, s1
+; CHECK: cset {{w[0-9]+}}, pl
+  %cmp = fcmp uge float %a, %b
+  %conv = uitofp i1 %cmp to float
+  ret float %conv
+}
+
+define float @fcmp_ult(float %a, float %b) nounwind ssp {
+; CHECK-LABEL: @fcmp_ult
+; CHECK: fcmp s0, s1
+; CHECK: cset {{w[0-9]+}}, lt
+  %cmp = fcmp ult float %a, %b
+  %conv = uitofp i1 %cmp to float
+  ret float %conv
+}
+
+define float @fcmp_ule(float %a, float %b) nounwind ssp {
+; CHECK-LABEL: @fcmp_ule
+; CHECK: fcmp s0, s1
+; CHECK: cset {{w[0-9]+}}, le
+  %cmp = fcmp ule float %a, %b
+  %conv = uitofp i1 %cmp to float
+  ret float %conv
+}
+
+define float @fcmp_une(float %a, float %b) nounwind ssp {
+; CHECK-LABEL: @fcmp_une
+; CHECK: fcmp s0, s1
+; CHECK: cset {{w[0-9]+}}, ne
+  %cmp = fcmp une float %a, %b
+  %conv = uitofp i1 %cmp to float
+  ret float %conv
+}
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-gv.ll b/test/CodeGen/AArch64/arm64-fast-isel-gv.ll
new file mode 100644
index 0000000..dc4d895
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-fast-isel-gv.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin | FileCheck %s
+
+; Test load/store of global value from global offset table.
+@seed = common global i64 0, align 8
+
+define void @Initrand() nounwind {
+entry:
+; CHECK: @Initrand
+; CHECK: adrp x[[REG:[0-9]+]], _seed@GOTPAGE
+; CHECK: ldr x[[REG2:[0-9]+]], [x[[REG]], _seed@GOTPAGEOFF]
+; CHECK: str x{{[0-9]+}}, [x[[REG2]]]
+  store i64 74755, i64* @seed, align 8
+  ret void
+}
+
+define i32 @Rand() nounwind {
+entry:
+; CHECK: @Rand
+; CHECK: adrp x[[REG:[0-9]+]], _seed@GOTPAGE
+; CHECK: ldr x[[REG2:[0-9]+]], [x[[REG]], _seed@GOTPAGEOFF]
+; CHECK: movz x[[REG3:[0-9]+]], #0x51d
+; CHECK: ldr x[[REG4:[0-9]+]], [x[[REG2]]]
+; CHECK: mul x[[REG5:[0-9]+]], x[[REG4]], x[[REG3]]
+; CHECK: movz x[[REG6:[0-9]+]], #0x3619
+; CHECK: add x[[REG7:[0-9]+]], x[[REG5]], x[[REG6]]
+; CHECK: orr x[[REG8:[0-9]+]], xzr, #0xffff
+; CHECK: and x[[REG9:[0-9]+]], x[[REG7]], x[[REG8]]
+; CHECK: str x[[REG9]], [x[[REG]]]
+; CHECK: ldr x{{[0-9]+}}, [x[[REG]]]
+  %0 = load i64* @seed, align 8
+  %mul = mul nsw i64 %0, 1309
+  %add = add nsw i64 %mul, 13849
+  %and = and i64 %add, 65535
+  store i64 %and, i64* @seed, align 8
+  %1 = load i64* @seed, align 8
+  %conv = trunc i64 %1 to i32
+  ret i32 %conv
+}
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-icmp.ll b/test/CodeGen/AArch64/arm64-fast-isel-icmp.ll
new file mode 100644
index 0000000..971be5c
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-fast-isel-icmp.ll
@@ -0,0 +1,214 @@
+; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin | FileCheck %s
+
+define i32 @icmp_eq_imm(i32 %a) nounwind ssp {
+entry:
+; CHECK: icmp_eq_imm
+; CHECK: cmp  w0, #31
+; CHECK: cset w0, eq
+  %cmp = icmp eq i32 %a, 31
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @icmp_eq_neg_imm(i32 %a) nounwind ssp {
+entry:
+; CHECK: icmp_eq_neg_imm
+; CHECK: cmn  w0, #7
+; CHECK: cset w0, eq
+  %cmp = icmp eq i32 %a, -7
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @icmp_eq(i32 %a, i32 %b) nounwind ssp {
+entry:
+; CHECK: icmp_eq
+; CHECK: cmp  w0, w1
+; CHECK: cset w0, eq
+  %cmp = icmp eq i32 %a, %b
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @icmp_ne(i32 %a, i32 %b) nounwind ssp {
+entry:
+; CHECK: icmp_ne
+; CHECK: cmp  w0, w1
+; CHECK: cset w0, ne
+  %cmp = icmp ne i32 %a, %b
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @icmp_ugt(i32 %a, i32 %b) nounwind ssp {
+entry:
+; CHECK: icmp_ugt
+; CHECK: cmp  w0, w1
+; CHECK: cset w0, hi
+  %cmp = icmp ugt i32 %a, %b
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @icmp_uge(i32 %a, i32 %b) nounwind ssp {
+entry:
+; CHECK: icmp_uge
+; CHECK: cmp  w0, w1
+; CHECK: cset w0, hs
+  %cmp = icmp uge i32 %a, %b
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @icmp_ult(i32 %a, i32 %b) nounwind ssp {
+entry:
+; CHECK: icmp_ult
+; CHECK: cmp  w0, w1
+; CHECK: cset w0, lo
+  %cmp = icmp ult i32 %a, %b
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @icmp_ule(i32 %a, i32 %b) nounwind ssp {
+entry:
+; CHECK: icmp_ule
+; CHECK: cmp  w0, w1
+; CHECK: cset w0, ls
+  %cmp = icmp ule i32 %a, %b
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @icmp_sgt(i32 %a, i32 %b) nounwind ssp {
+entry:
+; CHECK: icmp_sgt
+; CHECK: cmp  w0, w1
+; CHECK: cset w0, gt
+  %cmp = icmp sgt i32 %a, %b
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @icmp_sge(i32 %a, i32 %b) nounwind ssp {
+entry:
+; CHECK: icmp_sge
+; CHECK: cmp  w0, w1
+; CHECK: cset w0, ge
+  %cmp = icmp sge i32 %a, %b
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @icmp_slt(i32 %a, i32 %b) nounwind ssp {
+entry:
+; CHECK: icmp_slt
+; CHECK: cmp  w0, w1
+; CHECK: cset w0, lt
+  %cmp = icmp slt i32 %a, %b
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @icmp_sle(i32 %a, i32 %b) nounwind ssp {
+entry:
+; CHECK: icmp_sle
+; CHECK: cmp  w0, w1
+; CHECK: cset w0, le
+  %cmp = icmp sle i32 %a, %b
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @icmp_i64(i64 %a, i64 %b) nounwind ssp {
+entry:
+; CHECK: icmp_i64
+; CHECK: cmp  x0, x1
+; CHECK: cset w{{[0-9]+}}, le
+  %cmp = icmp sle i64 %a, %b
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define zeroext i1 @icmp_eq_i16(i16 %a, i16 %b) nounwind ssp {
+entry:
+; CHECK: icmp_eq_i16
+; CHECK: sxth w0, w0
+; CHECK: sxth w1, w1
+; CHECK: cmp  w0, w1
+; CHECK: cset w0, eq
+  %cmp = icmp eq i16 %a, %b
+  ret i1 %cmp
+}
+
+define zeroext i1 @icmp_eq_i8(i8 %a, i8 %b) nounwind ssp {
+entry:
+; CHECK: icmp_eq_i8
+; CHECK: sxtb w0, w0
+; CHECK: sxtb w1, w1
+; CHECK: cmp  w0, w1
+; CHECK: cset w0, eq
+  %cmp = icmp eq i8 %a, %b
+  ret i1 %cmp
+}
+
+define i32 @icmp_i16_unsigned(i16 %a, i16 %b) nounwind {
+entry:
+; CHECK: icmp_i16_unsigned
+; CHECK: uxth w0, w0
+; CHECK: uxth w1, w1
+; CHECK: cmp  w0, w1
+; CHECK: cset w0, lo
+  %cmp = icmp ult i16 %a, %b
+  %conv2 = zext i1 %cmp to i32
+  ret i32 %conv2
+}
+
+define i32 @icmp_i8_signed(i8 %a, i8 %b) nounwind {
+entry:
+; CHECK: @icmp_i8_signed
+; CHECK: sxtb w0, w0
+; CHECK: sxtb w1, w1
+; CHECK: cmp  w0, w1
+; CHECK: cset w0, gt
+  %cmp = icmp sgt i8 %a, %b
+  %conv2 = zext i1 %cmp to i32
+  ret i32 %conv2
+}
+
+
+define i32 @icmp_i16_signed_const(i16 %a) nounwind {
+entry:
+; CHECK: icmp_i16_signed_const
+; CHECK: sxth w0, w0
+; CHECK: cmn  w0, #233
+; CHECK: cset w0, lt
+; CHECK: and w0, w0, #0x1
+  %cmp = icmp slt i16 %a, -233
+  %conv2 = zext i1 %cmp to i32
+  ret i32 %conv2
+}
+
+define i32 @icmp_i8_signed_const(i8 %a) nounwind {
+entry:
+; CHECK: icmp_i8_signed_const
+; CHECK: sxtb w0, w0
+; CHECK: cmp  w0, #124
+; CHECK: cset w0, gt
+; CHECK: and w0, w0, #0x1
+  %cmp = icmp sgt i8 %a, 124
+  %conv2 = zext i1 %cmp to i32
+  ret i32 %conv2
+}
+
+define i32 @icmp_i1_unsigned_const(i1 %a) nounwind {
+entry:
+; CHECK: icmp_i1_unsigned_const
+; CHECK: and w0, w0, #0x1
+; CHECK: cmp  w0, #0
+; CHECK: cset w0, lo
+; CHECK: and w0, w0, #0x1
+  %cmp = icmp ult i1 %a, 0
+  %conv2 = zext i1 %cmp to i32
+  ret i32 %conv2
+}
diff --git a/test/CodeGen/ARM64/fast-isel-indirectbr.ll b/test/CodeGen/AArch64/arm64-fast-isel-indirectbr.ll
index 70335ac..70335ac 100644
--- a/test/CodeGen/ARM64/fast-isel-indirectbr.ll
+++ b/test/CodeGen/AArch64/arm64-fast-isel-indirectbr.ll
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll b/test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll
new file mode 100644
index 0000000..a3d5f6c
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll
@@ -0,0 +1,135 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=arm64-apple-ios | FileCheck %s --check-prefix=ARM64
+
+@message = global [80 x i8] c"The LLVM Compiler Infrastructure\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", align 16
+@temp = common global [80 x i8] zeroinitializer, align 16
+
+define void @t1() {
+; ARM64-LABEL: t1
+; ARM64: adrp x8, _message@PAGE
+; ARM64: add x0, x8, _message@PAGEOFF
+; ARM64: movz w9, #0
+; ARM64: movz x2, #0x50
+; ARM64: uxtb w1, w9
+; ARM64: bl _memset
+  call void @llvm.memset.p0i8.i64(i8* getelementptr inbounds ([80 x i8]* @message, i32 0, i32 0), i8 0, i64 80, i32 16, i1 false)
+  ret void
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
+
+define void @t2() {
+; ARM64-LABEL: t2
+; ARM64: adrp x8, _temp@GOTPAGE
+; ARM64: ldr x0, [x8, _temp@GOTPAGEOFF]
+; ARM64: adrp x8, _message@PAGE
+; ARM64: add x1, x8, _message@PAGEOFF
+; ARM64: movz x2, #0x50
+; ARM64: bl _memcpy
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8]* @message, i32 0, i32 0), i64 80, i32 16, i1 false)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1)
+
+define void @t3() {
+; ARM64-LABEL: t3
+; ARM64: adrp x8, _temp@GOTPAGE
+; ARM64: ldr x0, [x8, _temp@GOTPAGEOFF]
+; ARM64: adrp x8, _message@PAGE
+; ARM64: add x1, x8, _message@PAGEOFF
+; ARM64: movz x2, #0x14
+; ARM64: bl _memmove
+  call void @llvm.memmove.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8]* @message, i32 0, i32 0), i64 20, i32 16, i1 false)
+  ret void
+}
+
+declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1)
+
+define void @t4() {
+; ARM64-LABEL: t4
+; ARM64: adrp x8, _temp@GOTPAGE
+; ARM64: ldr x8, [x8, _temp@GOTPAGEOFF]
+; ARM64: adrp x9, _message@PAGE
+; ARM64: add x9, x9, _message@PAGEOFF
+; ARM64: ldr x10, [x9]
+; ARM64: str x10, [x8]
+; ARM64: ldr x10, [x9, #8]
+; ARM64: str x10, [x8, #8]
+; ARM64: ldrb w11, [x9, #16]
+; ARM64: strb w11, [x8, #16]
+; ARM64: ret
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8]* @message, i32 0, i32 0), i64 17, i32 16, i1 false)
+  ret void
+}
+
+define void @t5() {
+; ARM64-LABEL: t5
+; ARM64: adrp x8, _temp@GOTPAGE
+; ARM64: ldr x8, [x8, _temp@GOTPAGEOFF]
+; ARM64: adrp x9, _message@PAGE
+; ARM64: add x9, x9, _message@PAGEOFF
+; ARM64: ldr x10, [x9]
+; ARM64: str x10, [x8]
+; ARM64: ldr x10, [x9, #8]
+; ARM64: str x10, [x8, #8]
+; ARM64: ldrb w11, [x9, #16]
+; ARM64: strb w11, [x8, #16]
+; ARM64: ret
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8]* @message, i32 0, i32 0), i64 17, i32 8, i1 false)
+  ret void
+}
+
+define void @t6() {
+; ARM64-LABEL: t6
+; ARM64: adrp x8, _temp@GOTPAGE
+; ARM64: ldr x8, [x8, _temp@GOTPAGEOFF]
+; ARM64: adrp x9, _message@PAGE
+; ARM64: add x9, x9, _message@PAGEOFF
+; ARM64: ldr w10, [x9]
+; ARM64: str w10, [x8]
+; ARM64: ldr w10, [x9, #4]
+; ARM64: str w10, [x8, #4]
+; ARM64: ldrb w10, [x9, #8]
+; ARM64: strb w10, [x8, #8]
+; ARM64: ret
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8]* @message, i32 0, i32 0), i64 9, i32 4, i1 false)
+  ret void
+}
+
+define void @t7() {
+; ARM64-LABEL: t7
+; ARM64: adrp x8, _temp@GOTPAGE
+; ARM64: ldr x8, [x8, _temp@GOTPAGEOFF]
+; ARM64: adrp x9, _message@PAGE
+; ARM64: add x9, x9, _message@PAGEOFF
+; ARM64: ldrh w10, [x9]
+; ARM64: strh w10, [x8]
+; ARM64: ldrh w10, [x9, #2]
+; ARM64: strh w10, [x8, #2]
+; ARM64: ldrh w10, [x9, #4]
+; ARM64: strh w10, [x8, #4]
+; ARM64: ldrb w10, [x9, #6]
+; ARM64: strb w10, [x8, #6]
+; ARM64: ret
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8]* @message, i32 0, i32 0), i64 7, i32 2, i1 false)
+  ret void
+}
+
+define void @t8() {
+; ARM64-LABEL: t8
+; ARM64: adrp x8, _temp@GOTPAGE
+; ARM64: ldr x8, [x8, _temp@GOTPAGEOFF]
+; ARM64: adrp x9, _message@PAGE
+; ARM64: add x9, x9, _message@PAGEOFF
+; ARM64: ldrb w10, [x9]
+; ARM64: strb w10, [x8]
+; ARM64: ldrb w10, [x9, #1]
+; ARM64: strb w10, [x8, #1]
+; ARM64: ldrb w10, [x9, #2]
+; ARM64: strb w10, [x8, #2]
+; ARM64: ldrb w10, [x9, #3]
+; ARM64: strb w10, [x8, #3]
+; ARM64: ret
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8]* @message, i32 0, i32 0), i64 4, i32 1, i1 false)
+  ret void
+}
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-materialize.ll b/test/CodeGen/AArch64/arm64-fast-isel-materialize.ll
new file mode 100644
index 0000000..ffac131
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-fast-isel-materialize.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin | FileCheck %s
+
+; Materialize using fmov
+define void @float_(float* %value) {
+; CHECK: @float_
+; CHECK: fmov s0, #1.25000000
+  store float 1.250000e+00, float* %value, align 4
+  ret void
+}
+
+define void @double_(double* %value) {
+; CHECK: @double_
+; CHECK: fmov d0, #1.25000000
+  store double 1.250000e+00, double* %value, align 8
+  ret void
+}
+
+; Materialize from constant pool
+define float @float_cp() {
+; CHECK: @float_cp
+  ret float 0x400921FB60000000
+}
+
+define double @double_cp() {
+; CHECK: @double_cp
+  ret double 0x400921FB54442D18
+}
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-noconvert.ll b/test/CodeGen/AArch64/arm64-fast-isel-noconvert.ll
new file mode 100644
index 0000000..483d179
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-fast-isel-noconvert.ll
@@ -0,0 +1,68 @@
+; RUN: llc -mtriple=arm64-apple-ios -O0 %s -o - | FileCheck %s
+
+; Fast-isel can't do vector conversions yet, but it was emitting some highly
+; suspect UCVTFUWDri MachineInstrs.
+define <4 x float> @test_uitofp(<4 x i32> %in) {
+; CHECK-LABEL: test_uitofp:
+; CHECK: ucvtf.4s v0, v0
+
+  %res = uitofp <4 x i32> %in to <4 x float>
+  ret <4 x float> %res
+}
+
+define <2 x double> @test_sitofp(<2 x i32> %in) {
+; CHECK-LABEL: test_sitofp:
+; CHECK: sshll.2d [[EXT:v[0-9]+]], v0, #0
+; CHECK: scvtf.2d v0, [[EXT]]
+
+  %res = sitofp <2 x i32> %in to <2 x double>
+  ret <2 x double> %res
+}
+
+define <2 x i32> @test_fptoui(<2 x float> %in) {
+; CHECK-LABEL: test_fptoui:
+; CHECK: fcvtzu.2s v0, v0
+
+  %res = fptoui <2 x float> %in to <2 x i32>
+  ret <2 x i32> %res
+}
+
+define <2 x i64> @test_fptosi(<2 x double> %in) {
+; CHECK-LABEL: test_fptosi:
+; CHECK: fcvtzs.2d v0, v0
+
+  %res = fptosi <2 x double> %in to <2 x i64>
+  ret <2 x i64> %res
+}
+
+define fp128 @uitofp_i32_fp128(i32 %a) {
+entry:
+; CHECK-LABEL: uitofp_i32_fp128
+; CHECK: bl ___floatunsitf
+  %conv = uitofp i32 %a to fp128
+  ret fp128 %conv
+}
+
+define fp128 @uitofp_i64_fp128(i64 %a) {
+entry:
+; CHECK-LABEL: uitofp_i64_fp128
+; CHECK: bl ___floatunditf
+  %conv = uitofp i64 %a to fp128
+  ret fp128 %conv
+}
+
+define i32 @uitofp_fp128_i32(fp128 %a) {
+entry:
+; CHECK-LABEL: uitofp_fp128_i32
+; CHECK: ___fixunstfsi
+  %conv = fptoui fp128 %a to i32
+  ret i32 %conv
+}
+
+define i64 @uitofp_fp128_i64(fp128 %a) {
+entry:
+; CHECK-LABEL: uitofp_fp128_i64
+; CHECK: ___fixunstfdi
+  %conv = fptoui fp128 %a to i64
+  ret i64 %conv
+}
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-rem.ll b/test/CodeGen/AArch64/arm64-fast-isel-rem.ll
new file mode 100644
index 0000000..d5bdbaa
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-fast-isel-rem.ll
@@ -0,0 +1,44 @@
+; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin | FileCheck %s
+; RUN: llc %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin -print-machineinstrs=expand-isel-pseudos -o /dev/null 2> %t
+; RUN: FileCheck %s < %t --check-prefix=CHECK-SSA
+; REQUIRES: asserts
+
+; CHECK-SSA-LABEL: Machine code for function t1
+
+; CHECK-SSA: [[QUOTREG:%vreg[0-9]+]]<def> = SDIVWr
+; CHECK-SSA-NOT: [[QUOTREG]]<def> =
+; CHECK-SSA: {{%vreg[0-9]+}}<def> = MSUBWrrr [[QUOTREG]]
+
+; CHECK-SSA-LABEL: Machine code for function t2
+
+define i32 @t1(i32 %a, i32 %b) {
+; CHECK: @t1
+; CHECK: sdiv [[TMP:w[0-9]+]], w0, w1
+; CHECK: msub w0, [[TMP]], w1, w0
+  %1 = srem i32 %a, %b
+  ret i32 %1
+}
+
+define i64 @t2(i64 %a, i64 %b) {
+; CHECK: @t2
+; CHECK: sdiv [[TMP:x[0-9]+]], x0, x1
+; CHECK: msub x0, [[TMP]], x1, x0
+  %1 = srem i64 %a, %b
+  ret i64 %1
+}
+
+define i32 @t3(i32 %a, i32 %b) {
+; CHECK: @t3
+; CHECK: udiv [[TMP:w[0-9]+]], w0, w1
+; CHECK: msub w0, [[TMP]], w1, w0
+  %1 = urem i32 %a, %b
+  ret i32 %1
+}
+
+define i64 @t4(i64 %a, i64 %b) {
+; CHECK: @t4
+; CHECK: udiv [[TMP:x[0-9]+]], x0, x1
+; CHECK: msub x0, [[TMP]], x1, x0
+  %1 = urem i64 %a, %b
+  ret i64 %1
+}
diff --git a/test/CodeGen/ARM64/fast-isel-ret.ll b/test/CodeGen/AArch64/arm64-fast-isel-ret.ll
index d91fd28..d91fd28 100644
--- a/test/CodeGen/ARM64/fast-isel-ret.ll
+++ b/test/CodeGen/AArch64/arm64-fast-isel-ret.ll
diff --git a/test/CodeGen/ARM64/fast-isel-select.ll b/test/CodeGen/AArch64/arm64-fast-isel-select.ll
index 1cc207f..1cc207f 100644
--- a/test/CodeGen/ARM64/fast-isel-select.ll
+++ b/test/CodeGen/AArch64/arm64-fast-isel-select.ll
diff --git a/test/CodeGen/AArch64/arm64-fast-isel.ll b/test/CodeGen/AArch64/arm64-fast-isel.ll
new file mode 100644
index 0000000..0194b3a
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-fast-isel.ll
@@ -0,0 +1,95 @@
+; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin | FileCheck %s
+
+define void @t0(i32 %a) nounwind {
+entry:
+; CHECK: t0
+; CHECK: str {{w[0-9]+}}, [sp, #12]
+; CHECK-NEXT: ldr [[REGISTER:w[0-9]+]], [sp, #12]
+; CHECK-NEXT: str [[REGISTER]], [sp, #12]
+; CHECK: ret
+  %a.addr = alloca i32, align 4
+  store i32 %a, i32* %a.addr
+  %tmp = load i32* %a.addr
+  store i32 %tmp, i32* %a.addr
+  ret void
+}
+
+define void @t1(i64 %a) nounwind {
+; CHECK: t1
+; CHECK: str {{x[0-9]+}}, [sp, #8]
+; CHECK-NEXT: ldr [[REGISTER:x[0-9]+]], [sp, #8]
+; CHECK-NEXT: str [[REGISTER]], [sp, #8]
+; CHECK: ret
+  %a.addr = alloca i64, align 4
+  store i64 %a, i64* %a.addr
+  %tmp = load i64* %a.addr
+  store i64 %tmp, i64* %a.addr
+  ret void
+}
+
+define zeroext i1 @i1(i1 %a) nounwind {
+entry:
+; CHECK: @i1
+; CHECK: and w0, w0, #0x1
+; CHECK: strb w0, [sp, #15]
+; CHECK: ldrb w0, [sp, #15]
+; CHECK: and w0, w0, #0x1
+; CHECK: and w0, w0, #0x1
+; CHECK: add sp, sp, #16
+; CHECK: ret
+  %a.addr = alloca i1, align 1
+  store i1 %a, i1* %a.addr, align 1
+  %0 = load i1* %a.addr, align 1
+  ret i1 %0
+}
+
+define i32 @t2(i32 *%ptr) nounwind {
+entry:
+; CHECK-LABEL: t2:
+; CHECK: ldur w0, [x0, #-4]
+; CHECK: ret
+  %0 = getelementptr i32 *%ptr, i32 -1
+  %1 = load i32* %0, align 4
+  ret i32 %1
+}
+
+define i32 @t3(i32 *%ptr) nounwind {
+entry:
+; CHECK-LABEL: t3:
+; CHECK: ldur w0, [x0, #-256]
+; CHECK: ret
+  %0 = getelementptr i32 *%ptr, i32 -64
+  %1 = load i32* %0, align 4
+  ret i32 %1
+}
+
+define void @t4(i32 *%ptr) nounwind {
+entry:
+; CHECK-LABEL: t4:
+; CHECK: movz w8, #0
+; CHECK: stur w8, [x0, #-4]
+; CHECK: ret
+  %0 = getelementptr i32 *%ptr, i32 -1
+  store i32 0, i32* %0, align 4
+  ret void
+}
+
+define void @t5(i32 *%ptr) nounwind {
+entry:
+; CHECK-LABEL: t5:
+; CHECK: movz w8, #0
+; CHECK: stur w8, [x0, #-256]
+; CHECK: ret
+  %0 = getelementptr i32 *%ptr, i32 -64
+  store i32 0, i32* %0, align 4
+  ret void
+}
+
+define void @t6() nounwind {
+; CHECK: t6
+; CHECK: brk #0x1
+  tail call void @llvm.trap()
+  ret void
+}
+
+declare void @llvm.trap() nounwind
diff --git a/test/CodeGen/ARM64/fastcc-tailcall.ll b/test/CodeGen/AArch64/arm64-fastcc-tailcall.ll
index 8a744c5..8a744c5 100644
--- a/test/CodeGen/ARM64/fastcc-tailcall.ll
+++ b/test/CodeGen/AArch64/arm64-fastcc-tailcall.ll
diff --git a/test/CodeGen/ARM64/fastisel-gep-promote-before-add.ll b/test/CodeGen/AArch64/arm64-fastisel-gep-promote-before-add.ll
index af9fe05..af9fe05 100644
--- a/test/CodeGen/ARM64/fastisel-gep-promote-before-add.ll
+++ b/test/CodeGen/AArch64/arm64-fastisel-gep-promote-before-add.ll
diff --git a/test/CodeGen/AArch64/arm64-fcmp-opt.ll b/test/CodeGen/AArch64/arm64-fcmp-opt.ll
new file mode 100644
index 0000000..41027d4
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-fcmp-opt.ll
@@ -0,0 +1,204 @@
+; RUN: llc < %s -march=arm64 -mcpu=cyclone -aarch64-neon-syntax=apple | FileCheck %s
+; rdar://10263824
+
+define i1 @fcmp_float1(float %a) nounwind ssp {
+entry:
+; CHECK-LABEL: @fcmp_float1
+; CHECK: fcmp s0, #0.0
+; CHECK: cset w0, ne
+  %cmp = fcmp une float %a, 0.000000e+00
+  ret i1 %cmp
+}
+
+define i1 @fcmp_float2(float %a, float %b) nounwind ssp {
+entry:
+; CHECK-LABEL: @fcmp_float2
+; CHECK: fcmp s0, s1
+; CHECK: cset w0, ne
+  %cmp = fcmp une float %a, %b
+  ret i1 %cmp
+}
+
+define i1 @fcmp_double1(double %a) nounwind ssp {
+entry:
+; CHECK-LABEL: @fcmp_double1
+; CHECK: fcmp d0, #0.0
+; CHECK: cset w0, ne
+  %cmp = fcmp une double %a, 0.000000e+00
+  ret i1 %cmp
+}
+
+define i1 @fcmp_double2(double %a, double %b) nounwind ssp {
+entry:
+; CHECK-LABEL: @fcmp_double2
+; CHECK: fcmp d0, d1
+; CHECK: cset w0, ne
+  %cmp = fcmp une double %a, %b
+  ret i1 %cmp
+}
+
+; Check each fcmp condition
+define float @fcmp_oeq(float %a, float %b) nounwind ssp {
+; CHECK-LABEL: @fcmp_oeq
+; CHECK: fcmp s0, s1
+; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
+; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], eq
+
+  %cmp = fcmp oeq float %a, %b
+  %conv = uitofp i1 %cmp to float
+  ret float %conv
+}
+
+define float @fcmp_ogt(float %a, float %b) nounwind ssp {
+; CHECK-LABEL: @fcmp_ogt
+; CHECK: fcmp s0, s1
+; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
+; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], gt
+
+  %cmp = fcmp ogt float %a, %b
+  %conv = uitofp i1 %cmp to float
+  ret float %conv
+}
+
+define float @fcmp_oge(float %a, float %b) nounwind ssp {
+; CHECK-LABEL: @fcmp_oge
+; CHECK: fcmp s0, s1
+; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
+; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], ge
+
+  %cmp = fcmp oge float %a, %b
+  %conv = uitofp i1 %cmp to float
+  ret float %conv
+}
+
+define float @fcmp_olt(float %a, float %b) nounwind ssp {
+; CHECK-LABEL: @fcmp_olt
+; CHECK: fcmp s0, s1
+; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
+; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], mi
+
+  %cmp = fcmp olt float %a, %b
+  %conv = uitofp i1 %cmp to float
+  ret float %conv
+}
+
+define float @fcmp_ole(float %a, float %b) nounwind ssp {
+; CHECK-LABEL: @fcmp_ole
+; CHECK: fcmp s0, s1
+; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
+; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], ls
+
+  %cmp = fcmp ole float %a, %b
+  %conv = uitofp i1 %cmp to float
+  ret float %conv
+}
+
+define float @fcmp_ord(float %a, float %b) nounwind ssp {
+; CHECK-LABEL: @fcmp_ord
+; CHECK: fcmp s0, s1
+; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
+; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], vc
+  %cmp = fcmp ord float %a, %b
+  %conv = uitofp i1 %cmp to float
+  ret float %conv
+}
+
+define float @fcmp_uno(float %a, float %b) nounwind ssp {
+; CHECK-LABEL: @fcmp_uno
+; CHECK: fcmp s0, s1
+; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
+; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], vs
+  %cmp = fcmp uno float %a, %b
+  %conv = uitofp i1 %cmp to float
+  ret float %conv
+}
+
+define float @fcmp_ugt(float %a, float %b) nounwind ssp {
+; CHECK-LABEL: @fcmp_ugt
+; CHECK: fcmp s0, s1
+; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
+; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], hi
+  %cmp = fcmp ugt float %a, %b
+  %conv = uitofp i1 %cmp to float
+  ret float %conv
+}
+
+define float @fcmp_uge(float %a, float %b) nounwind ssp {
+; CHECK-LABEL: @fcmp_uge
+; CHECK: fcmp s0, s1
+; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
+; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], pl
+  %cmp = fcmp uge float %a, %b
+  %conv = uitofp i1 %cmp to float
+  ret float %conv
+}
+
+define float @fcmp_ult(float %a, float %b) nounwind ssp {
+; CHECK-LABEL: @fcmp_ult
+; CHECK: fcmp s0, s1
+; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
+; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], lt
+  %cmp = fcmp ult float %a, %b
+  %conv = uitofp i1 %cmp to float
+  ret float %conv
+}
+
+define float @fcmp_ule(float %a, float %b) nounwind ssp {
+; CHECK-LABEL: @fcmp_ule
+; CHECK: fcmp s0, s1
+; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
+; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], le
+  %cmp = fcmp ule float %a, %b
+  %conv = uitofp i1 %cmp to float
+  ret float %conv
+}
+
+define float @fcmp_une(float %a, float %b) nounwind ssp {
+; CHECK-LABEL: @fcmp_une
+; CHECK: fcmp s0, s1
+; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
+; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], ne
+  %cmp = fcmp une float %a, %b
+  %conv = uitofp i1 %cmp to float
+  ret float %conv
+}
+
+; Possible opportunity for improvement.  See comment in
+; ARM64TargetLowering::LowerSETCC()
+define float @fcmp_one(float %a, float %b) nounwind ssp {
+; CHECK-LABEL: @fcmp_one
+;	fcmp	s0, s1
+; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
+; CHECK: fcsel [[TMP:s[0-9]+]], s[[ONE]], s[[ZERO]], mi
+; CHECK: fcsel s0, s[[ONE]], [[TMP]], gt
+  %cmp = fcmp one float %a, %b
+  %conv = uitofp i1 %cmp to float
+  ret float %conv
+}
+
+; Possible opportunity for improvement.  See comment in
+; ARM64TargetLowering::LowerSETCC()
+define float @fcmp_ueq(float %a, float %b) nounwind ssp {
+; CHECK-LABEL: @fcmp_ueq
+; CHECK: fcmp s0, s1
+; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0
+; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0
+; CHECK: fcsel [[TMP:s[0-9]+]], s[[ONE]], s[[ZERO]], eq
+; CHECK: fcsel s0, s[[ONE]], [[TMP]], vs
+  %cmp = fcmp ueq float %a, %b
+  %conv = uitofp i1 %cmp to float
+  ret float %conv
+}
diff --git a/test/CodeGen/AArch64/arm64-fcopysign.ll b/test/CodeGen/AArch64/arm64-fcopysign.ll
new file mode 100644
index 0000000..66241df
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-fcopysign.ll
@@ -0,0 +1,51 @@
+; RUN: llc < %s -mtriple=arm64-apple-darwin | FileCheck %s
+
+; rdar://9332258
+
+define float @test1(float %x, float %y) nounwind {
+entry:
+; CHECK-LABEL: test1:
+; CHECK: movi.4s	v2, #0x80, lsl #24
+; CHECK: bit.16b	v0, v1, v2
+  %0 = tail call float @copysignf(float %x, float %y) nounwind readnone
+  ret float %0
+}
+
+define double @test2(double %x, double %y) nounwind {
+entry:
+; CHECK-LABEL: test2:
+; CHECK: movi.2d	v2, #0
+; CHECK: fneg.2d	v2, v2
+; CHECK: bit.16b	v0, v1, v2
+  %0 = tail call double @copysign(double %x, double %y) nounwind readnone
+  ret double %0
+}
+
+; rdar://9545768
+define double @test3(double %a, float %b, float %c) nounwind {
+; CHECK-LABEL: test3:
+; CHECK: fcvt d1, s1
+; CHECK: fneg.2d v2, v{{[0-9]+}}
+; CHECK: bit.16b v0, v1, v2
+  %tmp1 = fadd float %b, %c
+  %tmp2 = fpext float %tmp1 to double
+  %tmp = tail call double @copysign( double %a, double %tmp2 ) nounwind readnone
+  ret double %tmp
+}
+
+define float @test4() nounwind {
+entry:
+; CHECK-LABEL: test4:
+; CHECK: fcvt s0, d0
+; CHECK: movi.4s v[[CONST:[0-9]+]], #0x80, lsl #24
+; CHECK: bit.16b v{{[0-9]+}}, v0, v[[CONST]]
+  %0 = tail call double (...)* @bar() nounwind
+  %1 = fptrunc double %0 to float
+  %2 = tail call float @copysignf(float 5.000000e-01, float %1) nounwind readnone
+  %3 = fadd float %1, %2
+  ret float %3
+}
+
+declare double @bar(...)
+declare double @copysign(double, double) nounwind readnone
+declare float @copysignf(float, float) nounwind readnone
diff --git a/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll b/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll
new file mode 100644
index 0000000..e51c38b
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+; DAGCombine to transform a conversion of an extract_vector_elt to an
+; extract_vector_elt of a conversion, which saves a round trip of copies
+; of the value to a GPR and back to and FPR.
+; rdar://11855286
+define double @foo0(<2 x i64> %a) nounwind {
+; CHECK:  scvtf.2d  [[REG:v[0-9]+]], v0, #9
+; CHECK-NEXT:  ins.d v0[0], [[REG]][1]
+  %vecext = extractelement <2 x i64> %a, i32 1
+  %fcvt_n = tail call double @llvm.aarch64.neon.vcvtfxs2fp.f64.i64(i64 %vecext, i32 9)
+  ret double %fcvt_n
+}
+
+declare double @llvm.aarch64.neon.vcvtfxs2fp.f64.i64(i64, i32) nounwind readnone
diff --git a/test/CodeGen/AArch64/arm64-fmadd.ll b/test/CodeGen/AArch64/arm64-fmadd.ll
new file mode 100644
index 0000000..c791900
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-fmadd.ll
@@ -0,0 +1,92 @@
+; RUN: llc -march=arm64 < %s | FileCheck %s
+
+define float @fma32(float %a, float %b, float %c) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: fma32:
+; CHECK: fmadd s0, s0, s1, s2
+  %0 = tail call float @llvm.fma.f32(float %a, float %b, float %c)
+  ret float %0
+}
+
+define float @fnma32(float %a, float %b, float %c) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: fnma32:
+; CHECK: fnmadd s0, s0, s1, s2
+  %0 = tail call float @llvm.fma.f32(float %a, float %b, float %c)
+  %mul = fmul float %0, -1.000000e+00
+  ret float %mul
+}
+
+define float @fms32(float %a, float %b, float %c) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: fms32:
+; CHECK: fmsub s0, s0, s1, s2
+  %mul = fmul float %b, -1.000000e+00
+  %0 = tail call float @llvm.fma.f32(float %a, float %mul, float %c)
+  ret float %0
+}
+
+define float @fms32_com(float %a, float %b, float %c) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: fms32_com:
+; CHECK: fmsub s0, s1, s0, s2
+  %mul = fmul float %b, -1.000000e+00
+  %0 = tail call float @llvm.fma.f32(float %mul, float %a, float %c)
+  ret float %0
+}
+
+define float @fnms32(float %a, float %b, float %c) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: fnms32:
+; CHECK: fnmsub s0, s0, s1, s2
+  %mul = fmul float %c, -1.000000e+00
+  %0 = tail call float @llvm.fma.f32(float %a, float %b, float %mul)
+  ret float %0
+}
+
+define double @fma64(double %a, double %b, double %c) nounwind readnone ssp {
+; CHECK-LABEL: fma64:
+; CHECK: fmadd d0, d0, d1, d2
+entry:
+  %0 = tail call double @llvm.fma.f64(double %a, double %b, double %c)
+  ret double %0
+}
+
+define double @fnma64(double %a, double %b, double %c) nounwind readnone ssp {
+; CHECK-LABEL: fnma64:
+; CHECK: fnmadd d0, d0, d1, d2
+entry:
+  %0 = tail call double @llvm.fma.f64(double %a, double %b, double %c)
+  %mul = fmul double %0, -1.000000e+00
+  ret double %mul
+}
+
+define double @fms64(double %a, double %b, double %c) nounwind readnone ssp {
+; CHECK-LABEL: fms64:
+; CHECK: fmsub d0, d0, d1, d2
+entry:
+  %mul = fmul double %b, -1.000000e+00
+  %0 = tail call double @llvm.fma.f64(double %a, double %mul, double %c)
+  ret double %0
+}
+
+define double @fms64_com(double %a, double %b, double %c) nounwind readnone ssp {
+; CHECK-LABEL: fms64_com:
+; CHECK: fmsub d0, d1, d0, d2
+entry:
+  %mul = fmul double %b, -1.000000e+00
+  %0 = tail call double @llvm.fma.f64(double %mul, double %a, double %c)
+  ret double %0
+}
+
+define double @fnms64(double %a, double %b, double %c) nounwind readnone ssp {
+; CHECK-LABEL: fnms64:
+; CHECK: fnmsub d0, d0, d1, d2
+entry:
+  %mul = fmul double %c, -1.000000e+00
+  %0 = tail call double @llvm.fma.f64(double %a, double %b, double %mul)
+  ret double %0
+}
+
+declare float @llvm.fma.f32(float, float, float) nounwind readnone
+declare double @llvm.fma.f64(double, double, double) nounwind readnone
diff --git a/test/CodeGen/AArch64/arm64-fmax.ll b/test/CodeGen/AArch64/arm64-fmax.ll
new file mode 100644
index 0000000..94b7454
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-fmax.ll
@@ -0,0 +1,34 @@
+; RUN: llc -march=arm64 -enable-no-nans-fp-math < %s | FileCheck %s
+
+define double @test_direct(float %in) #1 {
+; CHECK-LABEL: test_direct:
+  %cmp = fcmp olt float %in, 0.000000e+00
+  %longer = fpext float %in to double
+  %val = select i1 %cmp, double 0.000000e+00, double %longer
+  ret double %val
+
+; CHECK: fmax
+}
+
+define double @test_cross(float %in) #1 {
+; CHECK-LABEL: test_cross:
+  %cmp = fcmp olt float %in, 0.000000e+00
+  %longer = fpext float %in to double
+  %val = select i1 %cmp, double %longer, double 0.000000e+00
+  ret double %val
+
+; CHECK: fmin
+}
+
+; This isn't a min or a max, but passes the first condition for swapping the
+; results. Make sure they're put back before we resort to the normal fcsel.
+define float @test_cross_fail(float %lhs, float %rhs) {
+; CHECK-LABEL: test_cross_fail:
+  %tst = fcmp une float %lhs, %rhs
+  %res = select i1 %tst, float %rhs, float %lhs
+  ret float %res
+
+  ; The register allocator would have to decide to be deliberately obtuse before
+  ; other register were used.
+; CHECK: fcsel s0, s1, s0, ne
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/arm64-fminv.ll b/test/CodeGen/AArch64/arm64-fminv.ll
new file mode 100644
index 0000000..f4c9735
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-fminv.ll
@@ -0,0 +1,101 @@
+; RUN: llc -mtriple=arm64-linux-gnu -o - %s | FileCheck %s
+
+define float @test_fminv_v2f32(<2 x float> %in) {
+; CHECK: test_fminv_v2f32:
+; CHECK: fminp s0, v0.2s
+  %min = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> %in)
+  ret float %min
+}
+
+define float @test_fminv_v4f32(<4 x float> %in) {
+; CHECK: test_fminv_v4f32:
+; CHECK: fminv s0, v0.4s
+  %min = call float @llvm.aarch64.neon.fminv.f32.v4f32(<4 x float> %in)
+  ret float %min
+}
+
+define double @test_fminv_v2f64(<2 x double> %in) {
+; CHECK: test_fminv_v2f64:
+; CHECK: fminp d0, v0.2d
+  %min = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> %in)
+  ret double %min
+}
+
+declare float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float>)
+declare float @llvm.aarch64.neon.fminv.f32.v4f32(<4 x float>)
+declare double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double>)
+
+define float @test_fmaxv_v2f32(<2 x float> %in) {
+; CHECK: test_fmaxv_v2f32:
+; CHECK: fmaxp s0, v0.2s
+  %max = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %in)
+  ret float %max
+}
+
+define float @test_fmaxv_v4f32(<4 x float> %in) {
+; CHECK: test_fmaxv_v4f32:
+; CHECK: fmaxv s0, v0.4s
+  %max = call float @llvm.aarch64.neon.fmaxv.f32.v4f32(<4 x float> %in)
+  ret float %max
+}
+
+define double @test_fmaxv_v2f64(<2 x double> %in) {
+; CHECK: test_fmaxv_v2f64:
+; CHECK: fmaxp d0, v0.2d
+  %max = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> %in)
+  ret double %max
+}
+
+declare float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float>)
+declare float @llvm.aarch64.neon.fmaxv.f32.v4f32(<4 x float>)
+declare double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double>)
+
+define float @test_fminnmv_v2f32(<2 x float> %in) {
+; CHECK: test_fminnmv_v2f32:
+; CHECK: fminnmp s0, v0.2s
+  %minnm = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> %in)
+  ret float %minnm
+}
+
+define float @test_fminnmv_v4f32(<4 x float> %in) {
+; CHECK: test_fminnmv_v4f32:
+; CHECK: fminnmv s0, v0.4s
+  %minnm = call float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float> %in)
+  ret float %minnm
+}
+
+define double @test_fminnmv_v2f64(<2 x double> %in) {
+; CHECK: test_fminnmv_v2f64:
+; CHECK: fminnmp d0, v0.2d
+  %minnm = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> %in)
+  ret double %minnm
+}
+
+declare float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float>)
+declare float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float>)
+declare double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double>)
+
+define float @test_fmaxnmv_v2f32(<2 x float> %in) {
+; CHECK: test_fmaxnmv_v2f32:
+; CHECK: fmaxnmp s0, v0.2s
+  %maxnm = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> %in)
+  ret float %maxnm
+}
+
+define float @test_fmaxnmv_v4f32(<4 x float> %in) {
+; CHECK: test_fmaxnmv_v4f32:
+; CHECK: fmaxnmv s0, v0.4s
+  %maxnm = call float @llvm.aarch64.neon.fmaxnmv.f32.v4f32(<4 x float> %in)
+  ret float %maxnm
+}
+
+define double @test_fmaxnmv_v2f64(<2 x double> %in) {
+; CHECK: test_fmaxnmv_v2f64:
+; CHECK: fmaxnmp d0, v0.2d
+  %maxnm = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> %in)
+  ret double %maxnm
+}
+
+declare float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float>)
+declare float @llvm.aarch64.neon.fmaxnmv.f32.v4f32(<4 x float>)
+declare double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double>)
diff --git a/test/CodeGen/AArch64/arm64-fmuladd.ll b/test/CodeGen/AArch64/arm64-fmuladd.ll
new file mode 100644
index 0000000..6c5eeca
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-fmuladd.ll
@@ -0,0 +1,88 @@
+; RUN: llc -asm-verbose=false < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+define float @test_f32(float* %A, float* %B, float* %C) nounwind {
+;CHECK-LABEL: test_f32:
+;CHECK: fmadd
+;CHECK-NOT: fmadd
+  %tmp1 = load float* %A
+  %tmp2 = load float* %B
+  %tmp3 = load float* %C
+  %tmp4 = call float @llvm.fmuladd.f32(float %tmp1, float %tmp2, float %tmp3)
+  ret float %tmp4
+}
+
+define <2 x float> @test_v2f32(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) nounwind {
+;CHECK-LABEL: test_v2f32:
+;CHECK: fmla.2s
+;CHECK-NOT: fmla.2s
+  %tmp1 = load <2 x float>* %A
+  %tmp2 = load <2 x float>* %B
+  %tmp3 = load <2 x float>* %C
+  %tmp4 = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %tmp1, <2 x float> %tmp2, <2 x float> %tmp3)
+  ret <2 x float> %tmp4
+}
+
+define <4 x float> @test_v4f32(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind {
+;CHECK-LABEL: test_v4f32:
+;CHECK: fmla.4s
+;CHECK-NOT: fmla.4s
+  %tmp1 = load <4 x float>* %A
+  %tmp2 = load <4 x float>* %B
+  %tmp3 = load <4 x float>* %C
+  %tmp4 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %tmp1, <4 x float> %tmp2, <4 x float> %tmp3)
+  ret <4 x float> %tmp4
+}
+
+define <8 x float> @test_v8f32(<8 x float>* %A, <8 x float>* %B, <8 x float>* %C) nounwind {
+;CHECK-LABEL: test_v8f32:
+;CHECK: fmla.4s
+;CHECK: fmla.4s
+;CHECK-NOT: fmla.4s
+  %tmp1 = load <8 x float>* %A
+  %tmp2 = load <8 x float>* %B
+  %tmp3 = load <8 x float>* %C
+  %tmp4 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %tmp1, <8 x float> %tmp2, <8 x float> %tmp3)
+  ret <8 x float> %tmp4
+}
+
+define double @test_f64(double* %A, double* %B, double* %C) nounwind {
+;CHECK-LABEL: test_f64:
+;CHECK: fmadd
+;CHECK-NOT: fmadd
+  %tmp1 = load double* %A
+  %tmp2 = load double* %B
+  %tmp3 = load double* %C
+  %tmp4 = call double @llvm.fmuladd.f64(double %tmp1, double %tmp2, double %tmp3)
+  ret double %tmp4
+}
+
+define <2 x double> @test_v2f64(<2 x double>* %A, <2 x double>* %B, <2 x double>* %C) nounwind {
+;CHECK-LABEL: test_v2f64:
+;CHECK: fmla.2d
+;CHECK-NOT: fmla.2d
+  %tmp1 = load <2 x double>* %A
+  %tmp2 = load <2 x double>* %B
+  %tmp3 = load <2 x double>* %C
+  %tmp4 = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %tmp1, <2 x double> %tmp2, <2 x double> %tmp3)
+  ret <2 x double> %tmp4
+}
+
+define <4 x double> @test_v4f64(<4 x double>* %A, <4 x double>* %B, <4 x double>* %C) nounwind {
+;CHECK-LABEL: test_v4f64:
+;CHECK: fmla.2d
+;CHECK: fmla.2d
+;CHECK-NOT: fmla.2d
+  %tmp1 = load <4 x double>* %A
+  %tmp2 = load <4 x double>* %B
+  %tmp3 = load <4 x double>* %C
+  %tmp4 = call <4 x double> @llvm.fmuladd.v4f64(<4 x double> %tmp1, <4 x double> %tmp2, <4 x double> %tmp3)
+  ret <4 x double> %tmp4
+}
+
+declare float @llvm.fmuladd.f32(float, float, float) nounwind readnone
+declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+declare <8 x float> @llvm.fmuladd.v8f32(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
+declare double @llvm.fmuladd.f64(double, double, double) nounwind readnone
+declare <2 x double> @llvm.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
+declare <4 x double> @llvm.fmuladd.v4f64(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
diff --git a/test/CodeGen/ARM64/fold-address.ll b/test/CodeGen/AArch64/arm64-fold-address.ll
index 96cc3e9..96cc3e9 100644
--- a/test/CodeGen/ARM64/fold-address.ll
+++ b/test/CodeGen/AArch64/arm64-fold-address.ll
diff --git a/test/CodeGen/AArch64/arm64-fold-lsl.ll b/test/CodeGen/AArch64/arm64-fold-lsl.ll
new file mode 100644
index 0000000..ec65e46
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-fold-lsl.ll
@@ -0,0 +1,79 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+;
+; <rdar://problem/14486451>
+
+%struct.a = type [256 x i16]
+%struct.b = type [256 x i32]
+%struct.c = type [256 x i64]
+
+define i16 @load_halfword(%struct.a* %ctx, i32 %xor72) nounwind {
+; CHECK-LABEL: load_halfword:
+; CHECK: ubfx [[REG:x[0-9]+]], x1, #9, #8
+; CHECK: ldrh w0, [x0, [[REG]], lsl #1]
+  %shr81 = lshr i32 %xor72, 9
+  %conv82 = zext i32 %shr81 to i64
+  %idxprom83 = and i64 %conv82, 255
+  %arrayidx86 = getelementptr inbounds %struct.a* %ctx, i64 0, i64 %idxprom83
+  %result = load i16* %arrayidx86, align 2
+  ret i16 %result
+}
+
+define i32 @load_word(%struct.b* %ctx, i32 %xor72) nounwind {
+; CHECK-LABEL: load_word:
+; CHECK: ubfx [[REG:x[0-9]+]], x1, #9, #8
+; CHECK: ldr w0, [x0, [[REG]], lsl #2]
+  %shr81 = lshr i32 %xor72, 9
+  %conv82 = zext i32 %shr81 to i64
+  %idxprom83 = and i64 %conv82, 255
+  %arrayidx86 = getelementptr inbounds %struct.b* %ctx, i64 0, i64 %idxprom83
+  %result = load i32* %arrayidx86, align 4
+  ret i32 %result
+}
+
+define i64 @load_doubleword(%struct.c* %ctx, i32 %xor72) nounwind {
+; CHECK-LABEL: load_doubleword:
+; CHECK: ubfx [[REG:x[0-9]+]], x1, #9, #8
+; CHECK: ldr x0, [x0, [[REG]], lsl #3]
+  %shr81 = lshr i32 %xor72, 9
+  %conv82 = zext i32 %shr81 to i64
+  %idxprom83 = and i64 %conv82, 255
+  %arrayidx86 = getelementptr inbounds %struct.c* %ctx, i64 0, i64 %idxprom83
+  %result = load i64* %arrayidx86, align 8
+  ret i64 %result
+}
+
+define void @store_halfword(%struct.a* %ctx, i32 %xor72, i16 %val) nounwind {
+; CHECK-LABEL: store_halfword:
+; CHECK: ubfx [[REG:x[0-9]+]], x1, #9, #8
+; CHECK: strh w2, [x0, [[REG]], lsl #1]
+  %shr81 = lshr i32 %xor72, 9
+  %conv82 = zext i32 %shr81 to i64
+  %idxprom83 = and i64 %conv82, 255
+  %arrayidx86 = getelementptr inbounds %struct.a* %ctx, i64 0, i64 %idxprom83
+  store i16 %val, i16* %arrayidx86, align 8
+  ret void
+}
+
+define void @store_word(%struct.b* %ctx, i32 %xor72, i32 %val) nounwind {
+; CHECK-LABEL: store_word:
+; CHECK: ubfx [[REG:x[0-9]+]], x1, #9, #8
+; CHECK: str w2, [x0, [[REG]], lsl #2]
+  %shr81 = lshr i32 %xor72, 9
+  %conv82 = zext i32 %shr81 to i64
+  %idxprom83 = and i64 %conv82, 255
+  %arrayidx86 = getelementptr inbounds %struct.b* %ctx, i64 0, i64 %idxprom83
+  store i32 %val, i32* %arrayidx86, align 8
+  ret void
+}
+
+define void @store_doubleword(%struct.c* %ctx, i32 %xor72, i64 %val) nounwind {
+; CHECK-LABEL: store_doubleword:
+; CHECK: ubfx [[REG:x[0-9]+]], x1, #9, #8
+; CHECK: str x2, [x0, [[REG]], lsl #3]
+  %shr81 = lshr i32 %xor72, 9
+  %conv82 = zext i32 %shr81 to i64
+  %idxprom83 = and i64 %conv82, 255
+  %arrayidx86 = getelementptr inbounds %struct.c* %ctx, i64 0, i64 %idxprom83
+  store i64 %val, i64* %arrayidx86, align 8
+  ret void
+}
diff --git a/test/CodeGen/AArch64/arm64-fp-contract-zero.ll b/test/CodeGen/AArch64/arm64-fp-contract-zero.ll
new file mode 100644
index 0000000..f982cbb
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-fp-contract-zero.ll
@@ -0,0 +1,14 @@
+; RUN: llc -mtriple=arm64 -fp-contract=fast -o - %s | FileCheck %s
+
+
+; Make sure we don't try to fold an fneg into +0.0, creating an illegal constant
+; -0.0. It's also good, though not essential, that we don't resort to a litpool.
+define double @test_fms_fold(double %a, double %b) {
+; CHECK-LABEL: test_fms_fold:
+; CHECK: fmov {{d[0-9]+}}, xzr
+; CHECK: ret
+  %mul = fmul double %a, 0.000000e+00
+  %mul1 = fmul double %b, 0.000000e+00
+  %sub = fsub double %mul, %mul1
+  ret double %sub
+}
+\ No newline at end of file
diff --git a/test/CodeGen/ARM64/fp-imm.ll b/test/CodeGen/AArch64/arm64-fp-imm.ll
index 6e271e0..6e271e0 100644
--- a/test/CodeGen/ARM64/fp-imm.ll
+++ b/test/CodeGen/AArch64/arm64-fp-imm.ll
diff --git a/test/CodeGen/ARM64/fp.ll b/test/CodeGen/AArch64/arm64-fp.ll
index 08b1b67..08b1b67 100644
--- a/test/CodeGen/ARM64/fp.ll
+++ b/test/CodeGen/AArch64/arm64-fp.ll
diff --git a/test/CodeGen/ARM64/fp128-folding.ll b/test/CodeGen/AArch64/arm64-fp128-folding.ll
index 6a7d203..6a7d203 100644
--- a/test/CodeGen/ARM64/fp128-folding.ll
+++ b/test/CodeGen/AArch64/arm64-fp128-folding.ll
diff --git a/test/CodeGen/AArch64/arm64-fp128.ll b/test/CodeGen/AArch64/arm64-fp128.ll
new file mode 100644
index 0000000..57bbb93
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-fp128.ll
@@ -0,0 +1,273 @@
+; RUN: llc -mtriple=arm64-linux-gnu -verify-machineinstrs -mcpu=cyclone < %s | FileCheck %s
+
+@lhs = global fp128 zeroinitializer, align 16
+@rhs = global fp128 zeroinitializer, align 16
+
+define fp128 @test_add() {
+; CHECK-LABEL: test_add:
+
+  %lhs = load fp128* @lhs, align 16
+  %rhs = load fp128* @rhs, align 16
+; CHECK: ldr q0, [{{x[0-9]+}}, :lo12:lhs]
+; CHECK: ldr q1, [{{x[0-9]+}}, :lo12:rhs]
+
+  %val = fadd fp128 %lhs, %rhs
+; CHECK: bl __addtf3
+  ret fp128 %val
+}
+
+define fp128 @test_sub() {
+; CHECK-LABEL: test_sub:
+
+  %lhs = load fp128* @lhs, align 16
+  %rhs = load fp128* @rhs, align 16
+; CHECK: ldr q0, [{{x[0-9]+}}, :lo12:lhs]
+; CHECK: ldr q1, [{{x[0-9]+}}, :lo12:rhs]
+
+  %val = fsub fp128 %lhs, %rhs
+; CHECK: bl __subtf3
+  ret fp128 %val
+}
+
+define fp128 @test_mul() {
+; CHECK-LABEL: test_mul:
+
+  %lhs = load fp128* @lhs, align 16
+  %rhs = load fp128* @rhs, align 16
+; CHECK: ldr q0, [{{x[0-9]+}}, :lo12:lhs]
+; CHECK: ldr q1, [{{x[0-9]+}}, :lo12:rhs]
+
+  %val = fmul fp128 %lhs, %rhs
+; CHECK: bl __multf3
+  ret fp128 %val
+}
+
+define fp128 @test_div() {
+; CHECK-LABEL: test_div:
+
+  %lhs = load fp128* @lhs, align 16
+  %rhs = load fp128* @rhs, align 16
+; CHECK: ldr q0, [{{x[0-9]+}}, :lo12:lhs]
+; CHECK: ldr q1, [{{x[0-9]+}}, :lo12:rhs]
+
+  %val = fdiv fp128 %lhs, %rhs
+; CHECK: bl __divtf3
+  ret fp128 %val
+}
+
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @test_fptosi() {
+; CHECK-LABEL: test_fptosi:
+  %val = load fp128* @lhs, align 16
+
+  %val32 = fptosi fp128 %val to i32
+  store i32 %val32, i32* @var32
+; CHECK: bl __fixtfsi
+
+  %val64 = fptosi fp128 %val to i64
+  store i64 %val64, i64* @var64
+; CHECK: bl __fixtfdi
+
+  ret void
+}
+
+define void @test_fptoui() {
+; CHECK-LABEL: test_fptoui:
+  %val = load fp128* @lhs, align 16
+
+  %val32 = fptoui fp128 %val to i32
+  store i32 %val32, i32* @var32
+; CHECK: bl __fixunstfsi
+
+  %val64 = fptoui fp128 %val to i64
+  store i64 %val64, i64* @var64
+; CHECK: bl __fixunstfdi
+
+  ret void
+}
+
+define void @test_sitofp() {
+; CHECK-LABEL: test_sitofp:
+
+  %src32 = load i32* @var32
+  %val32 = sitofp i32 %src32 to fp128
+  store volatile fp128 %val32, fp128* @lhs
+; CHECK: bl __floatsitf
+
+  %src64 = load i64* @var64
+  %val64 = sitofp i64 %src64 to fp128
+  store volatile fp128 %val64, fp128* @lhs
+; CHECK: bl __floatditf
+
+  ret void
+}
+
+define void @test_uitofp() {
+; CHECK-LABEL: test_uitofp:
+
+  %src32 = load i32* @var32
+  %val32 = uitofp i32 %src32 to fp128
+  store volatile fp128 %val32, fp128* @lhs
+; CHECK: bl __floatunsitf
+
+  %src64 = load i64* @var64
+  %val64 = uitofp i64 %src64 to fp128
+  store volatile fp128 %val64, fp128* @lhs
+; CHECK: bl __floatunditf
+
+  ret void
+}
+
+define i1 @test_setcc1() {
+; CHECK-LABEL: test_setcc1:
+
+  %lhs = load fp128* @lhs, align 16
+  %rhs = load fp128* @rhs, align 16
+; CHECK: ldr q0, [{{x[0-9]+}}, :lo12:lhs]
+; CHECK: ldr q1, [{{x[0-9]+}}, :lo12:rhs]
+
+; Technically, everything after the call to __letf2 is redundant, but we'll let
+; LLVM have its fun for now.
+  %val = fcmp ole fp128 %lhs, %rhs
+; CHECK: bl __letf2
+; CHECK: cmp w0, #0
+; CHECK: cset w0, le
+
+  ret i1 %val
+; CHECK: ret
+}
+
+define i1 @test_setcc2() {
+; CHECK-LABEL: test_setcc2:
+
+  %lhs = load fp128* @lhs, align 16
+  %rhs = load fp128* @rhs, align 16
+; CHECK: ldr q0, [{{x[0-9]+}}, :lo12:lhs]
+; CHECK: ldr q1, [{{x[0-9]+}}, :lo12:rhs]
+
+  %val = fcmp ugt fp128 %lhs, %rhs
+; CHECK: bl      __gttf2
+; CHECK: cmp     w0, #0
+; CHECK: cset   [[GT:w[0-9]+]], gt
+
+; CHECK: bl      __unordtf2
+; CHECK: cmp     w0, #0
+; CHECK: cset   [[UNORDERED:w[0-9]+]], ne
+; CHECK: orr     w0, [[UNORDERED]], [[GT]]
+
+  ret i1 %val
+; CHECK: ret
+}
+
+define i32 @test_br_cc() {
+; CHECK-LABEL: test_br_cc:
+
+  %lhs = load fp128* @lhs, align 16
+  %rhs = load fp128* @rhs, align 16
+; CHECK: ldr q0, [{{x[0-9]+}}, :lo12:lhs]
+; CHECK: ldr q1, [{{x[0-9]+}}, :lo12:rhs]
+
+  ; olt == !uge, which LLVM unfortunately "optimizes" this to.
+  %cond = fcmp olt fp128 %lhs, %rhs
+; CHECK: bl      __getf2
+; CHECK: cmp     w0, #0
+; CHECK: cset   [[OGE:w[0-9]+]], ge
+
+; CHECK: bl      __unordtf2
+; CHECK: cmp     w0, #0
+; CHECK: cset   [[UNORDERED:w[0-9]+]], ne
+
+; CHECK: orr     [[UGE:w[0-9]+]], [[UNORDERED]], [[OGE]]
+; CHECK: cbnz [[UGE]], [[RET29:.LBB[0-9]+_[0-9]+]]
+  br i1 %cond, label %iftrue, label %iffalse
+
+iftrue:
+  ret i32 42
+; CHECK-NEXT: BB#
+; CHECK-NEXT: movz w0, #0x2a
+; CHECK-NEXT: b [[REALRET:.LBB[0-9]+_[0-9]+]]
+
+iffalse:
+  ret i32 29
+; CHECK: [[RET29]]:
+; CHECK-NEXT: movz w0, #0x1d
+; CHECK-NEXT: [[REALRET]]:
+; CHECK: ret
+}
+
+define void @test_select(i1 %cond, fp128 %lhs, fp128 %rhs) {
+; CHECK-LABEL: test_select:
+
+  %val = select i1 %cond, fp128 %lhs, fp128 %rhs
+  store fp128 %val, fp128* @lhs, align 16
+; CHECK: tst w0, #0x1
+; CHECK-NEXT: b.eq [[IFFALSE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: BB#
+; CHECK-NEXT: mov v[[VAL:[0-9]+]].16b, v0.16b
+; CHECK-NEXT: [[IFFALSE]]:
+; CHECK: str q[[VAL]], [{{x[0-9]+}}, :lo12:lhs]
+  ret void
+; CHECK: ret
+}
+
+@varfloat = global float 0.0, align 4
+@vardouble = global double 0.0, align 8
+
+define void @test_round() {
+; CHECK-LABEL: test_round:
+
+  %val = load fp128* @lhs, align 16
+
+  %float = fptrunc fp128 %val to float
+  store float %float, float* @varfloat, align 4
+; CHECK: bl __trunctfsf2
+; CHECK: str s0, [{{x[0-9]+}}, :lo12:varfloat]
+
+  %double = fptrunc fp128 %val to double
+  store double %double, double* @vardouble, align 8
+; CHECK: bl __trunctfdf2
+; CHECK: str d0, [{{x[0-9]+}}, :lo12:vardouble]
+
+  ret void
+}
+
+define void @test_extend() {
+; CHECK-LABEL: test_extend:
+
+  %val = load fp128* @lhs, align 16
+
+  %float = load float* @varfloat
+  %fromfloat = fpext float %float to fp128
+  store volatile fp128 %fromfloat, fp128* @lhs, align 16
+; CHECK: bl __extendsftf2
+; CHECK: str q0, [{{x[0-9]+}}, :lo12:lhs]
+
+  %double = load double* @vardouble
+  %fromdouble = fpext double %double to fp128
+  store volatile fp128 %fromdouble, fp128* @lhs, align 16
+; CHECK: bl __extenddftf2
+; CHECK: str q0, [{{x[0-9]+}}, :lo12:lhs]
+
+  ret void
+; CHECK: ret
+}
+
+define fp128 @test_neg(fp128 %in) {
+; CHECK: [[MINUS0:.LCPI[0-9]+_0]]:
+; Make sure the weird hex constant below *is* -0.0
+; CHECK-NEXT: fp128 -0
+
+; CHECK-LABEL: test_neg:
+
+  ; Could in principle be optimized to fneg which we can't select, this makes
+  ; sure that doesn't happen.
+  %ret = fsub fp128 0xL00000000000000008000000000000000, %in
+; CHECK: mov v1.16b, v0.16b
+; CHECK: ldr q0, [{{x[0-9]+}}, :lo12:[[MINUS0]]]
+; CHECK: bl __subtf3
+
+  ret fp128 %ret
+; CHECK: ret
+}
diff --git a/test/CodeGen/ARM64/frame-index.ll b/test/CodeGen/AArch64/arm64-frame-index.ll
index 4a91ff3..4a91ff3 100644
--- a/test/CodeGen/ARM64/frame-index.ll
+++ b/test/CodeGen/AArch64/arm64-frame-index.ll
diff --git a/test/CodeGen/AArch64/arm64-frameaddr.ll b/test/CodeGen/AArch64/arm64-frameaddr.ll
new file mode 100644
index 0000000..469078c
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-frameaddr.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=arm64 | FileCheck %s
+
+define i8* @t() nounwind {
+entry:
+; CHECK-LABEL: t:
+; CHECK: stp x29, x30, [sp, #-16]!
+; CHECK: mov x29, sp
+; CHECK: mov x0, x29
+; CHECK: ldp x29, x30, [sp], #16
+; CHECK: ret
+	%0 = call i8* @llvm.frameaddress(i32 0)
+        ret i8* %0
+}
+
+declare i8* @llvm.frameaddress(i32) nounwind readnone
diff --git a/test/CodeGen/ARM64/global-address.ll b/test/CodeGen/AArch64/arm64-global-address.ll
index 005f414..005f414 100644
--- a/test/CodeGen/ARM64/global-address.ll
+++ b/test/CodeGen/AArch64/arm64-global-address.ll
diff --git a/test/CodeGen/AArch64/arm64-hello.ll b/test/CodeGen/AArch64/arm64-hello.ll
new file mode 100644
index 0000000..a6346fb
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-hello.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -mtriple=arm64-apple-ios7.0 | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-linux-gnu | FileCheck %s --check-prefix=CHECK-LINUX
+
+; CHECK-LABEL: main:
+; CHECK:	stp	x29, x30, [sp, #-16]!
+; CHECK-NEXT:	mov	x29, sp
+; CHECK-NEXT:	sub	sp, sp, #16
+; CHECK-NEXT:	stur	wzr, [x29, #-4]
+; CHECK:	adrp	x0, L_.str@PAGE
+; CHECK:	add	x0, x0, L_.str@PAGEOFF
+; CHECK-NEXT:	bl	_puts
+; CHECK-NEXT:	mov	sp, x29
+; CHECK-NEXT:	ldp	x29, x30, [sp], #16
+; CHECK-NEXT:	ret
+
+; CHECK-LINUX-LABEL: main:
+; CHECK-LINUX:	stp	x29, x30, [sp, #-16]!
+; CHECK-LINUX-NEXT:	mov	x29, sp
+; CHECK-LINUX-NEXT:	sub	sp, sp, #16
+; CHECK-LINUX-NEXT:	stur	wzr, [x29, #-4]
+; CHECK-LINUX:	adrp	x0, .L.str
+; CHECK-LINUX:	add	x0, x0, :lo12:.L.str
+; CHECK-LINUX-NEXT:	bl	puts
+; CHECK-LINUX-NEXT:	mov	sp, x29
+; CHECK-LINUX-NEXT:	ldp	x29, x30, [sp], #16
+; CHECK-LINUX-NEXT:	ret
+
+@.str = private unnamed_addr constant [7 x i8] c"hello\0A\00"
+
+define i32 @main() nounwind ssp {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  %call = call i32 @puts(i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0))
+  ret i32 %call
+}
+
+declare i32 @puts(i8*)
diff --git a/test/CodeGen/AArch64/arm64-i16-subreg-extract.ll b/test/CodeGen/AArch64/arm64-i16-subreg-extract.ll
new file mode 100644
index 0000000..ba759e3
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-i16-subreg-extract.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+define i32 @foo(<4 x i16>* %__a) nounwind {
+; CHECK-LABEL: foo:
+; CHECK: umov.h w{{[0-9]+}}, v{{[0-9]+}}[0]
+  %tmp18 = load <4 x i16>* %__a, align 8
+  %vget_lane = extractelement <4 x i16> %tmp18, i32 0
+  %conv = zext i16 %vget_lane to i32
+  %mul = mul nsw i32 3, %conv
+  ret i32 %mul
+}
+
diff --git a/test/CodeGen/AArch64/arm64-icmp-opt.ll b/test/CodeGen/AArch64/arm64-icmp-opt.ll
new file mode 100644
index 0000000..7b12ed7
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-icmp-opt.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=arm64 | FileCheck %s
+
+; Optimize (x > -1) to (x >= 0) etc.
+; Optimize (cmp (add / sub), 0): eliminate the subs used to update flag
+;   for comparison only
+; rdar://10233472
+
+define i32 @t1(i64 %a) nounwind ssp {
+entry:
+; CHECK-LABEL: t1:
+; CHECK-NOT: movn
+; CHECK: cmp  x0, #0
+; CHECK: cset w0, ge
+  %cmp = icmp sgt i64 %a, -1
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
diff --git a/test/CodeGen/ARM64/illegal-float-ops.ll b/test/CodeGen/AArch64/arm64-illegal-float-ops.ll
index 9a35fe5..9a35fe5 100644
--- a/test/CodeGen/ARM64/illegal-float-ops.ll
+++ b/test/CodeGen/AArch64/arm64-illegal-float-ops.ll
diff --git a/test/CodeGen/AArch64/arm64-indexed-memory.ll b/test/CodeGen/AArch64/arm64-indexed-memory.ll
new file mode 100644
index 0000000..e501c6e
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-indexed-memory.ll
@@ -0,0 +1,351 @@
+; RUN: llc < %s -march=arm64 -aarch64-redzone | FileCheck %s
+
+define void @store64(i64** nocapture %out, i64 %index, i64 %spacing) nounwind noinline ssp {
+; CHECK-LABEL: store64:
+; CHECK: str x{{[0-9+]}}, [x{{[0-9+]}}], #8
+; CHECK: ret
+  %tmp = load i64** %out, align 8
+  %incdec.ptr = getelementptr inbounds i64* %tmp, i64 1
+  store i64 %spacing, i64* %tmp, align 4
+  store i64* %incdec.ptr, i64** %out, align 8
+  ret void
+}
+
+define void @store32(i32** nocapture %out, i32 %index, i32 %spacing) nounwind noinline ssp {
+; CHECK-LABEL: store32:
+; CHECK: str w{{[0-9+]}}, [x{{[0-9+]}}], #4
+; CHECK: ret
+  %tmp = load i32** %out, align 8
+  %incdec.ptr = getelementptr inbounds i32* %tmp, i64 1
+  store i32 %spacing, i32* %tmp, align 4
+  store i32* %incdec.ptr, i32** %out, align 8
+  ret void
+}
+
+define void @store16(i16** nocapture %out, i16 %index, i16 %spacing) nounwind noinline ssp {
+; CHECK-LABEL: store16:
+; CHECK: strh w{{[0-9+]}}, [x{{[0-9+]}}], #2
+; CHECK: ret
+  %tmp = load i16** %out, align 8
+  %incdec.ptr = getelementptr inbounds i16* %tmp, i64 1
+  store i16 %spacing, i16* %tmp, align 4
+  store i16* %incdec.ptr, i16** %out, align 8
+  ret void
+}
+
+define void @store8(i8** nocapture %out, i8 %index, i8 %spacing) nounwind noinline ssp {
+; CHECK-LABEL: store8:
+; CHECK: strb w{{[0-9+]}}, [x{{[0-9+]}}], #1
+; CHECK: ret
+  %tmp = load i8** %out, align 8
+  %incdec.ptr = getelementptr inbounds i8* %tmp, i64 1
+  store i8 %spacing, i8* %tmp, align 4
+  store i8* %incdec.ptr, i8** %out, align 8
+  ret void
+}
+
+define void @truncst64to32(i32** nocapture %out, i32 %index, i64 %spacing) nounwind noinline ssp {
+; CHECK-LABEL: truncst64to32:
+; CHECK: str w{{[0-9+]}}, [x{{[0-9+]}}], #4
+; CHECK: ret
+  %tmp = load i32** %out, align 8
+  %incdec.ptr = getelementptr inbounds i32* %tmp, i64 1
+  %trunc = trunc i64 %spacing to i32
+  store i32 %trunc, i32* %tmp, align 4
+  store i32* %incdec.ptr, i32** %out, align 8
+  ret void
+}
+
+define void @truncst64to16(i16** nocapture %out, i16 %index, i64 %spacing) nounwind noinline ssp {
+; CHECK-LABEL: truncst64to16:
+; CHECK: strh w{{[0-9+]}}, [x{{[0-9+]}}], #2
+; CHECK: ret
+  %tmp = load i16** %out, align 8
+  %incdec.ptr = getelementptr inbounds i16* %tmp, i64 1
+  %trunc = trunc i64 %spacing to i16
+  store i16 %trunc, i16* %tmp, align 4
+  store i16* %incdec.ptr, i16** %out, align 8
+  ret void
+}
+
+define void @truncst64to8(i8** nocapture %out, i8 %index, i64 %spacing) nounwind noinline ssp {
+; CHECK-LABEL: truncst64to8:
+; CHECK: strb w{{[0-9+]}}, [x{{[0-9+]}}], #1
+; CHECK: ret
+  %tmp = load i8** %out, align 8
+  %incdec.ptr = getelementptr inbounds i8* %tmp, i64 1
+  %trunc = trunc i64 %spacing to i8
+  store i8 %trunc, i8* %tmp, align 4
+  store i8* %incdec.ptr, i8** %out, align 8
+  ret void
+}
+
+
+define void @storef32(float** nocapture %out, float %index, float %spacing) nounwind noinline ssp {
+; CHECK-LABEL: storef32:
+; CHECK: str s{{[0-9+]}}, [x{{[0-9+]}}], #4
+; CHECK: ret
+  %tmp = load float** %out, align 8
+  %incdec.ptr = getelementptr inbounds float* %tmp, i64 1
+  store float %spacing, float* %tmp, align 4
+  store float* %incdec.ptr, float** %out, align 8
+  ret void
+}
+
+define void @storef64(double** nocapture %out, double %index, double %spacing) nounwind noinline ssp {
+; CHECK-LABEL: storef64:
+; CHECK: str d{{[0-9+]}}, [x{{[0-9+]}}], #8
+; CHECK: ret
+  %tmp = load double** %out, align 8
+  %incdec.ptr = getelementptr inbounds double* %tmp, i64 1
+  store double %spacing, double* %tmp, align 4
+  store double* %incdec.ptr, double** %out, align 8
+  ret void
+}
+
+define double * @pref64(double** nocapture %out, double %spacing) nounwind noinline ssp {
+; CHECK-LABEL: pref64:
+; CHECK: ldr     x0, [x0]
+; CHECK-NEXT: str     d0, [x0, #32]!
+; CHECK-NEXT: ret
+  %tmp = load double** %out, align 8
+  %ptr = getelementptr inbounds double* %tmp, i64 4
+  store double %spacing, double* %ptr, align 4
+  ret double *%ptr
+}
+
+define float * @pref32(float** nocapture %out, float %spacing) nounwind noinline ssp {
+; CHECK-LABEL: pref32:
+; CHECK: ldr     x0, [x0]
+; CHECK-NEXT: str     s0, [x0, #12]!
+; CHECK-NEXT: ret
+  %tmp = load float** %out, align 8
+  %ptr = getelementptr inbounds float* %tmp, i64 3
+  store float %spacing, float* %ptr, align 4
+  ret float *%ptr
+}
+
+define i64 * @pre64(i64** nocapture %out, i64 %spacing) nounwind noinline ssp {
+; CHECK-LABEL: pre64:
+; CHECK: ldr     x0, [x0]
+; CHECK-NEXT: str     x1, [x0, #16]!
+; CHECK-NEXT: ret
+  %tmp = load i64** %out, align 8
+  %ptr = getelementptr inbounds i64* %tmp, i64 2
+  store i64 %spacing, i64* %ptr, align 4
+  ret i64 *%ptr
+}
+
+define i32 * @pre32(i32** nocapture %out, i32 %spacing) nounwind noinline ssp {
+; CHECK-LABEL: pre32:
+; CHECK: ldr     x0, [x0]
+; CHECK-NEXT: str     w1, [x0, #8]!
+; CHECK-NEXT: ret
+  %tmp = load i32** %out, align 8
+  %ptr = getelementptr inbounds i32* %tmp, i64 2
+  store i32 %spacing, i32* %ptr, align 4
+  ret i32 *%ptr
+}
+
+define i16 * @pre16(i16** nocapture %out, i16 %spacing) nounwind noinline ssp {
+; CHECK-LABEL: pre16:
+; CHECK: ldr     x0, [x0]
+; CHECK-NEXT: strh    w1, [x0, #4]!
+; CHECK-NEXT: ret
+  %tmp = load i16** %out, align 8
+  %ptr = getelementptr inbounds i16* %tmp, i64 2
+  store i16 %spacing, i16* %ptr, align 4
+  ret i16 *%ptr
+}
+
+define i8 * @pre8(i8** nocapture %out, i8 %spacing) nounwind noinline ssp {
+; CHECK-LABEL: pre8:
+; CHECK: ldr     x0, [x0]
+; CHECK-NEXT: strb    w1, [x0, #2]!
+; CHECK-NEXT: ret
+  %tmp = load i8** %out, align 8
+  %ptr = getelementptr inbounds i8* %tmp, i64 2
+  store i8 %spacing, i8* %ptr, align 4
+  ret i8 *%ptr
+}
+
+define i32 * @pretrunc64to32(i32** nocapture %out, i64 %spacing) nounwind noinline ssp {
+; CHECK-LABEL: pretrunc64to32:
+; CHECK: ldr     x0, [x0]
+; CHECK-NEXT: str     w1, [x0, #8]!
+; CHECK-NEXT: ret
+  %tmp = load i32** %out, align 8
+  %ptr = getelementptr inbounds i32* %tmp, i64 2
+  %trunc = trunc i64 %spacing to i32
+  store i32 %trunc, i32* %ptr, align 4
+  ret i32 *%ptr
+}
+
+define i16 * @pretrunc64to16(i16** nocapture %out, i64 %spacing) nounwind noinline ssp {
+; CHECK-LABEL: pretrunc64to16:
+; CHECK: ldr     x0, [x0]
+; CHECK-NEXT: strh    w1, [x0, #4]!
+; CHECK-NEXT: ret
+  %tmp = load i16** %out, align 8
+  %ptr = getelementptr inbounds i16* %tmp, i64 2
+  %trunc = trunc i64 %spacing to i16
+  store i16 %trunc, i16* %ptr, align 4
+  ret i16 *%ptr
+}
+
+define i8 * @pretrunc64to8(i8** nocapture %out, i64 %spacing) nounwind noinline ssp {
+; CHECK-LABEL: pretrunc64to8:
+; CHECK: ldr     x0, [x0]
+; CHECK-NEXT: strb    w1, [x0, #2]!
+; CHECK-NEXT: ret
+  %tmp = load i8** %out, align 8
+  %ptr = getelementptr inbounds i8* %tmp, i64 2
+  %trunc = trunc i64 %spacing to i8
+  store i8 %trunc, i8* %ptr, align 4
+  ret i8 *%ptr
+}
+
+;-----
+; Pre-indexed loads
+;-----
+define double* @preidxf64(double* %src, double* %out) {
+; CHECK-LABEL: preidxf64:
+; CHECK: ldr     d0, [x0, #8]!
+; CHECK: str     d0, [x1]
+; CHECK: ret
+  %ptr = getelementptr inbounds double* %src, i64 1
+  %tmp = load double* %ptr, align 4
+  store double %tmp, double* %out, align 4
+  ret double* %ptr
+}
+
+define float* @preidxf32(float* %src, float* %out) {
+; CHECK-LABEL: preidxf32:
+; CHECK: ldr     s0, [x0, #4]!
+; CHECK: str     s0, [x1]
+; CHECK: ret
+  %ptr = getelementptr inbounds float* %src, i64 1
+  %tmp = load float* %ptr, align 4
+  store float %tmp, float* %out, align 4
+  ret float* %ptr
+}
+
+define i64* @preidx64(i64* %src, i64* %out) {
+; CHECK-LABEL: preidx64:
+; CHECK: ldr     x[[REG:[0-9]+]], [x0, #8]!
+; CHECK: str     x[[REG]], [x1]
+; CHECK: ret
+  %ptr = getelementptr inbounds i64* %src, i64 1
+  %tmp = load i64* %ptr, align 4
+  store i64 %tmp, i64* %out, align 4
+  ret i64* %ptr
+}
+
+define i32* @preidx32(i32* %src, i32* %out) {
+; CHECK: ldr     w[[REG:[0-9]+]], [x0, #4]!
+; CHECK: str     w[[REG]], [x1]
+; CHECK: ret
+  %ptr = getelementptr inbounds i32* %src, i64 1
+  %tmp = load i32* %ptr, align 4
+  store i32 %tmp, i32* %out, align 4
+  ret i32* %ptr
+}
+
+define i16* @preidx16zext32(i16* %src, i32* %out) {
+; CHECK: ldrh    w[[REG:[0-9]+]], [x0, #2]!
+; CHECK: str     w[[REG]], [x1]
+; CHECK: ret
+  %ptr = getelementptr inbounds i16* %src, i64 1
+  %tmp = load i16* %ptr, align 4
+  %ext = zext i16 %tmp to i32
+  store i32 %ext, i32* %out, align 4
+  ret i16* %ptr
+}
+
+define i16* @preidx16zext64(i16* %src, i64* %out) {
+; CHECK: ldrh    w[[REG:[0-9]+]], [x0, #2]!
+; CHECK: str     x[[REG]], [x1]
+; CHECK: ret
+  %ptr = getelementptr inbounds i16* %src, i64 1
+  %tmp = load i16* %ptr, align 4
+  %ext = zext i16 %tmp to i64
+  store i64 %ext, i64* %out, align 4
+  ret i16* %ptr
+}
+
+define i8* @preidx8zext32(i8* %src, i32* %out) {
+; CHECK: ldrb    w[[REG:[0-9]+]], [x0, #1]!
+; CHECK: str     w[[REG]], [x1]
+; CHECK: ret
+  %ptr = getelementptr inbounds i8* %src, i64 1
+  %tmp = load i8* %ptr, align 4
+  %ext = zext i8 %tmp to i32
+  store i32 %ext, i32* %out, align 4
+  ret i8* %ptr
+}
+
+define i8* @preidx8zext64(i8* %src, i64* %out) {
+; CHECK: ldrb    w[[REG:[0-9]+]], [x0, #1]!
+; CHECK: str     x[[REG]], [x1]
+; CHECK: ret
+  %ptr = getelementptr inbounds i8* %src, i64 1
+  %tmp = load i8* %ptr, align 4
+  %ext = zext i8 %tmp to i64
+  store i64 %ext, i64* %out, align 4
+  ret i8* %ptr
+}
+
+define i32* @preidx32sext64(i32* %src, i64* %out) {
+; CHECK: ldrsw   x[[REG:[0-9]+]], [x0, #4]!
+; CHECK: str     x[[REG]], [x1]
+; CHECK: ret
+  %ptr = getelementptr inbounds i32* %src, i64 1
+  %tmp = load i32* %ptr, align 4
+  %ext = sext i32 %tmp to i64
+  store i64 %ext, i64* %out, align 8
+  ret i32* %ptr
+}
+
+define i16* @preidx16sext32(i16* %src, i32* %out) {
+; CHECK: ldrsh   w[[REG:[0-9]+]], [x0, #2]!
+; CHECK: str     w[[REG]], [x1]
+; CHECK: ret
+  %ptr = getelementptr inbounds i16* %src, i64 1
+  %tmp = load i16* %ptr, align 4
+  %ext = sext i16 %tmp to i32
+  store i32 %ext, i32* %out, align 4
+  ret i16* %ptr
+}
+
+define i16* @preidx16sext64(i16* %src, i64* %out) {
+; CHECK: ldrsh   x[[REG:[0-9]+]], [x0, #2]!
+; CHECK: str     x[[REG]], [x1]
+; CHECK: ret
+  %ptr = getelementptr inbounds i16* %src, i64 1
+  %tmp = load i16* %ptr, align 4
+  %ext = sext i16 %tmp to i64
+  store i64 %ext, i64* %out, align 4
+  ret i16* %ptr
+}
+
+define i8* @preidx8sext32(i8* %src, i32* %out) {
+; CHECK: ldrsb   w[[REG:[0-9]+]], [x0, #1]!
+; CHECK: str     w[[REG]], [x1]
+; CHECK: ret
+  %ptr = getelementptr inbounds i8* %src, i64 1
+  %tmp = load i8* %ptr, align 4
+  %ext = sext i8 %tmp to i32
+  store i32 %ext, i32* %out, align 4
+  ret i8* %ptr
+}
+
+define i8* @preidx8sext64(i8* %src, i64* %out) {
+; CHECK: ldrsb   x[[REG:[0-9]+]], [x0, #1]!
+; CHECK: str     x[[REG]], [x1]
+; CHECK: ret
+  %ptr = getelementptr inbounds i8* %src, i64 1
+  %tmp = load i8* %ptr, align 4
+  %ext = sext i8 %tmp to i64
+  store i64 %ext, i64* %out, align 4
+  ret i8* %ptr
+}
diff --git a/test/CodeGen/AArch64/arm64-indexed-vector-ldst-2.ll b/test/CodeGen/AArch64/arm64-indexed-vector-ldst-2.ll
new file mode 100644
index 0000000..c118f10
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-indexed-vector-ldst-2.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s
+
+; This used to assert with "Overran sorted position" in AssignTopologicalOrder
+; due to a cycle created in performPostLD1Combine.
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-ios7.0.0"
+
+; Function Attrs: nounwind ssp
+define void @f(double* %P1) #0 {
+entry:
+  %arrayidx4 = getelementptr inbounds double* %P1, i64 1
+  %0 = load double* %arrayidx4, align 8, !tbaa !1
+  %1 = load double* %P1, align 8, !tbaa !1
+  %2 = insertelement <2 x double> undef, double %0, i32 0
+  %3 = insertelement <2 x double> %2, double %1, i32 1
+  %4 = fsub <2 x double> zeroinitializer, %3
+  %5 = fmul <2 x double> undef, %4
+  %6 = extractelement <2 x double> %5, i32 0
+  %cmp168 = fcmp olt double %6, undef
+  br i1 %cmp168, label %if.then172, label %return
+
+if.then172:                                       ; preds = %cond.end90
+  %7 = tail call i64 @llvm.objectsize.i64.p0i8(i8* undef, i1 false)
+  br label %return
+
+return:                                           ; preds = %if.then172, %cond.end90, %entry
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare i64 @llvm.objectsize.i64.p0i8(i8*, i1) #1
+
+attributes #0 = { nounwind ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!1 = metadata !{metadata !2, metadata !2, i64 0}
+!2 = metadata !{metadata !"double", metadata !3, i64 0}
+!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0}
+!4 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll b/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
new file mode 100644
index 0000000..9ee4063
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
@@ -0,0 +1,6174 @@
+; RUN: llc -mtriple=arm64-apple-ios7.0 -o - %s | FileCheck %s
+
+@ptr = global i8* null
+
+define <8 x i8> @test_v8i8_pre_load(<8 x i8>* %addr) {
+; CHECK-LABEL: test_v8i8_pre_load:
+; CHECK: ldr d0, [x0, #40]!
+  %newaddr = getelementptr <8 x i8>* %addr, i32 5
+  %val = load <8 x i8>* %newaddr, align 8
+  store <8 x i8>* %newaddr, <8 x i8>** bitcast(i8** @ptr to <8 x i8>**)
+  ret <8 x i8> %val
+}
+
+define <8 x i8> @test_v8i8_post_load(<8 x i8>* %addr) {
+; CHECK-LABEL: test_v8i8_post_load:
+; CHECK: ldr d0, [x0], #40
+  %newaddr = getelementptr <8 x i8>* %addr, i32 5
+  %val = load <8 x i8>* %addr, align 8
+  store <8 x i8>* %newaddr, <8 x i8>** bitcast(i8** @ptr to <8 x i8>**)
+  ret <8 x i8> %val
+}
+
+define void @test_v8i8_pre_store(<8 x i8> %in, <8 x i8>* %addr) {
+; CHECK-LABEL: test_v8i8_pre_store:
+; CHECK: str d0, [x0, #40]!
+  %newaddr = getelementptr <8 x i8>* %addr, i32 5
+  store <8 x i8> %in, <8 x i8>* %newaddr, align 8
+  store <8 x i8>* %newaddr, <8 x i8>** bitcast(i8** @ptr to <8 x i8>**)
+  ret void
+}
+
+define void @test_v8i8_post_store(<8 x i8> %in, <8 x i8>* %addr) {
+; CHECK-LABEL: test_v8i8_post_store:
+; CHECK: str d0, [x0], #40
+  %newaddr = getelementptr <8 x i8>* %addr, i32 5
+  store <8 x i8> %in, <8 x i8>* %addr, align 8
+  store <8 x i8>* %newaddr, <8 x i8>** bitcast(i8** @ptr to <8 x i8>**)
+  ret void
+}
+
+define <4 x i16> @test_v4i16_pre_load(<4 x i16>* %addr) {
+; CHECK-LABEL: test_v4i16_pre_load:
+; CHECK: ldr d0, [x0, #40]!
+  %newaddr = getelementptr <4 x i16>* %addr, i32 5
+  %val = load <4 x i16>* %newaddr, align 8
+  store <4 x i16>* %newaddr, <4 x i16>** bitcast(i8** @ptr to <4 x i16>**)
+  ret <4 x i16> %val
+}
+
+define <4 x i16> @test_v4i16_post_load(<4 x i16>* %addr) {
+; CHECK-LABEL: test_v4i16_post_load:
+; CHECK: ldr d0, [x0], #40
+  %newaddr = getelementptr <4 x i16>* %addr, i32 5
+  %val = load <4 x i16>* %addr, align 8
+  store <4 x i16>* %newaddr, <4 x i16>** bitcast(i8** @ptr to <4 x i16>**)
+  ret <4 x i16> %val
+}
+
+define void @test_v4i16_pre_store(<4 x i16> %in, <4 x i16>* %addr) {
+; CHECK-LABEL: test_v4i16_pre_store:
+; CHECK: str d0, [x0, #40]!
+  %newaddr = getelementptr <4 x i16>* %addr, i32 5
+  store <4 x i16> %in, <4 x i16>* %newaddr, align 8
+  store <4 x i16>* %newaddr, <4 x i16>** bitcast(i8** @ptr to <4 x i16>**)
+  ret void
+}
+
+define void @test_v4i16_post_store(<4 x i16> %in, <4 x i16>* %addr) {
+; CHECK-LABEL: test_v4i16_post_store:
+; CHECK: str d0, [x0], #40
+  %newaddr = getelementptr <4 x i16>* %addr, i32 5
+  store <4 x i16> %in, <4 x i16>* %addr, align 8
+  store <4 x i16>* %newaddr, <4 x i16>** bitcast(i8** @ptr to <4 x i16>**)
+  ret void
+}
+
+define <2 x i32> @test_v2i32_pre_load(<2 x i32>* %addr) {
+; CHECK-LABEL: test_v2i32_pre_load:
+; CHECK: ldr d0, [x0, #40]!
+  %newaddr = getelementptr <2 x i32>* %addr, i32 5
+  %val = load <2 x i32>* %newaddr, align 8
+  store <2 x i32>* %newaddr, <2 x i32>** bitcast(i8** @ptr to <2 x i32>**)
+  ret <2 x i32> %val
+}
+
+define <2 x i32> @test_v2i32_post_load(<2 x i32>* %addr) {
+; CHECK-LABEL: test_v2i32_post_load:
+; CHECK: ldr d0, [x0], #40
+  %newaddr = getelementptr <2 x i32>* %addr, i32 5
+  %val = load <2 x i32>* %addr, align 8
+  store <2 x i32>* %newaddr, <2 x i32>** bitcast(i8** @ptr to <2 x i32>**)
+  ret <2 x i32> %val
+}
+
+define void @test_v2i32_pre_store(<2 x i32> %in, <2 x i32>* %addr) {
+; CHECK-LABEL: test_v2i32_pre_store:
+; CHECK: str d0, [x0, #40]!
+  %newaddr = getelementptr <2 x i32>* %addr, i32 5
+  store <2 x i32> %in, <2 x i32>* %newaddr, align 8
+  store <2 x i32>* %newaddr, <2 x i32>** bitcast(i8** @ptr to <2 x i32>**)
+  ret void
+}
+
+define void @test_v2i32_post_store(<2 x i32> %in, <2 x i32>* %addr) {
+; CHECK-LABEL: test_v2i32_post_store:
+; CHECK: str d0, [x0], #40
+  %newaddr = getelementptr <2 x i32>* %addr, i32 5
+  store <2 x i32> %in, <2 x i32>* %addr, align 8
+  store <2 x i32>* %newaddr, <2 x i32>** bitcast(i8** @ptr to <2 x i32>**)
+  ret void
+}
+
+define <2 x float> @test_v2f32_pre_load(<2 x float>* %addr) {
+; CHECK-LABEL: test_v2f32_pre_load:
+; CHECK: ldr d0, [x0, #40]!
+  %newaddr = getelementptr <2 x float>* %addr, i32 5
+  %val = load <2 x float>* %newaddr, align 8
+  store <2 x float>* %newaddr, <2 x float>** bitcast(i8** @ptr to <2 x float>**)
+  ret <2 x float> %val
+}
+
+define <2 x float> @test_v2f32_post_load(<2 x float>* %addr) {
+; CHECK-LABEL: test_v2f32_post_load:
+; CHECK: ldr d0, [x0], #40
+  %newaddr = getelementptr <2 x float>* %addr, i32 5
+  %val = load <2 x float>* %addr, align 8
+  store <2 x float>* %newaddr, <2 x float>** bitcast(i8** @ptr to <2 x float>**)
+  ret <2 x float> %val
+}
+
+define void @test_v2f32_pre_store(<2 x float> %in, <2 x float>* %addr) {
+; CHECK-LABEL: test_v2f32_pre_store:
+; CHECK: str d0, [x0, #40]!
+  %newaddr = getelementptr <2 x float>* %addr, i32 5
+  store <2 x float> %in, <2 x float>* %newaddr, align 8
+  store <2 x float>* %newaddr, <2 x float>** bitcast(i8** @ptr to <2 x float>**)
+  ret void
+}
+
+define void @test_v2f32_post_store(<2 x float> %in, <2 x float>* %addr) {
+; CHECK-LABEL: test_v2f32_post_store:
+; CHECK: str d0, [x0], #40
+  %newaddr = getelementptr <2 x float>* %addr, i32 5
+  store <2 x float> %in, <2 x float>* %addr, align 8
+  store <2 x float>* %newaddr, <2 x float>** bitcast(i8** @ptr to <2 x float>**)
+  ret void
+}
+
+define <1 x i64> @test_v1i64_pre_load(<1 x i64>* %addr) {
+; CHECK-LABEL: test_v1i64_pre_load:
+; CHECK: ldr d0, [x0, #40]!
+  %newaddr = getelementptr <1 x i64>* %addr, i32 5
+  %val = load <1 x i64>* %newaddr, align 8
+  store <1 x i64>* %newaddr, <1 x i64>** bitcast(i8** @ptr to <1 x i64>**)
+  ret <1 x i64> %val
+}
+
+define <1 x i64> @test_v1i64_post_load(<1 x i64>* %addr) {
+; CHECK-LABEL: test_v1i64_post_load:
+; CHECK: ldr d0, [x0], #40
+  %newaddr = getelementptr <1 x i64>* %addr, i32 5
+  %val = load <1 x i64>* %addr, align 8
+  store <1 x i64>* %newaddr, <1 x i64>** bitcast(i8** @ptr to <1 x i64>**)
+  ret <1 x i64> %val
+}
+
+define void @test_v1i64_pre_store(<1 x i64> %in, <1 x i64>* %addr) {
+; CHECK-LABEL: test_v1i64_pre_store:
+; CHECK: str d0, [x0, #40]!
+  %newaddr = getelementptr <1 x i64>* %addr, i32 5
+  store <1 x i64> %in, <1 x i64>* %newaddr, align 8
+  store <1 x i64>* %newaddr, <1 x i64>** bitcast(i8** @ptr to <1 x i64>**)
+  ret void
+}
+
+define void @test_v1i64_post_store(<1 x i64> %in, <1 x i64>* %addr) {
+; CHECK-LABEL: test_v1i64_post_store:
+; CHECK: str d0, [x0], #40
+  %newaddr = getelementptr <1 x i64>* %addr, i32 5
+  store <1 x i64> %in, <1 x i64>* %addr, align 8
+  store <1 x i64>* %newaddr, <1 x i64>** bitcast(i8** @ptr to <1 x i64>**)
+  ret void
+}
+
+define <16 x i8> @test_v16i8_pre_load(<16 x i8>* %addr) {
+; CHECK-LABEL: test_v16i8_pre_load:
+; CHECK: ldr q0, [x0, #80]!
+  %newaddr = getelementptr <16 x i8>* %addr, i32 5
+  %val = load <16 x i8>* %newaddr, align 8
+  store <16 x i8>* %newaddr, <16 x i8>** bitcast(i8** @ptr to <16 x i8>**)
+  ret <16 x i8> %val
+}
+
+define <16 x i8> @test_v16i8_post_load(<16 x i8>* %addr) {
+; CHECK-LABEL: test_v16i8_post_load:
+; CHECK: ldr q0, [x0], #80
+  %newaddr = getelementptr <16 x i8>* %addr, i32 5
+  %val = load <16 x i8>* %addr, align 8
+  store <16 x i8>* %newaddr, <16 x i8>** bitcast(i8** @ptr to <16 x i8>**)
+  ret <16 x i8> %val
+}
+
+define void @test_v16i8_pre_store(<16 x i8> %in, <16 x i8>* %addr) {
+; CHECK-LABEL: test_v16i8_pre_store:
+; CHECK: str q0, [x0, #80]!
+  %newaddr = getelementptr <16 x i8>* %addr, i32 5
+  store <16 x i8> %in, <16 x i8>* %newaddr, align 8
+  store <16 x i8>* %newaddr, <16 x i8>** bitcast(i8** @ptr to <16 x i8>**)
+  ret void
+}
+
+define void @test_v16i8_post_store(<16 x i8> %in, <16 x i8>* %addr) {
+; CHECK-LABEL: test_v16i8_post_store:
+; CHECK: str q0, [x0], #80
+  %newaddr = getelementptr <16 x i8>* %addr, i32 5
+  store <16 x i8> %in, <16 x i8>* %addr, align 8
+  store <16 x i8>* %newaddr, <16 x i8>** bitcast(i8** @ptr to <16 x i8>**)
+  ret void
+}
+
+define <8 x i16> @test_v8i16_pre_load(<8 x i16>* %addr) {
+; CHECK-LABEL: test_v8i16_pre_load:
+; CHECK: ldr q0, [x0, #80]!
+  %newaddr = getelementptr <8 x i16>* %addr, i32 5
+  %val = load <8 x i16>* %newaddr, align 8
+  store <8 x i16>* %newaddr, <8 x i16>** bitcast(i8** @ptr to <8 x i16>**)
+  ret <8 x i16> %val
+}
+
+define <8 x i16> @test_v8i16_post_load(<8 x i16>* %addr) {
+; CHECK-LABEL: test_v8i16_post_load:
+; CHECK: ldr q0, [x0], #80
+  %newaddr = getelementptr <8 x i16>* %addr, i32 5
+  %val = load <8 x i16>* %addr, align 8
+  store <8 x i16>* %newaddr, <8 x i16>** bitcast(i8** @ptr to <8 x i16>**)
+  ret <8 x i16> %val
+}
+
+define void @test_v8i16_pre_store(<8 x i16> %in, <8 x i16>* %addr) {
+; CHECK-LABEL: test_v8i16_pre_store:
+; CHECK: str q0, [x0, #80]!
+  %newaddr = getelementptr <8 x i16>* %addr, i32 5
+  store <8 x i16> %in, <8 x i16>* %newaddr, align 8
+  store <8 x i16>* %newaddr, <8 x i16>** bitcast(i8** @ptr to <8 x i16>**)
+  ret void
+}
+
+define void @test_v8i16_post_store(<8 x i16> %in, <8 x i16>* %addr) {
+; CHECK-LABEL: test_v8i16_post_store:
+; CHECK: str q0, [x0], #80
+  %newaddr = getelementptr <8 x i16>* %addr, i32 5
+  store <8 x i16> %in, <8 x i16>* %addr, align 8
+  store <8 x i16>* %newaddr, <8 x i16>** bitcast(i8** @ptr to <8 x i16>**)
+  ret void
+}
+
+define <4 x i32> @test_v4i32_pre_load(<4 x i32>* %addr) {
+; CHECK-LABEL: test_v4i32_pre_load:
+; CHECK: ldr q0, [x0, #80]!
+  %newaddr = getelementptr <4 x i32>* %addr, i32 5
+  %val = load <4 x i32>* %newaddr, align 8
+  store <4 x i32>* %newaddr, <4 x i32>** bitcast(i8** @ptr to <4 x i32>**)
+  ret <4 x i32> %val
+}
+
+define <4 x i32> @test_v4i32_post_load(<4 x i32>* %addr) {
+; CHECK-LABEL: test_v4i32_post_load:
+; CHECK: ldr q0, [x0], #80
+  %newaddr = getelementptr <4 x i32>* %addr, i32 5
+  %val = load <4 x i32>* %addr, align 8
+  store <4 x i32>* %newaddr, <4 x i32>** bitcast(i8** @ptr to <4 x i32>**)
+  ret <4 x i32> %val
+}
+
+define void @test_v4i32_pre_store(<4 x i32> %in, <4 x i32>* %addr) {
+; CHECK-LABEL: test_v4i32_pre_store:
+; CHECK: str q0, [x0, #80]!
+  %newaddr = getelementptr <4 x i32>* %addr, i32 5
+  store <4 x i32> %in, <4 x i32>* %newaddr, align 8
+  store <4 x i32>* %newaddr, <4 x i32>** bitcast(i8** @ptr to <4 x i32>**)
+  ret void
+}
+
+define void @test_v4i32_post_store(<4 x i32> %in, <4 x i32>* %addr) {
+; CHECK-LABEL: test_v4i32_post_store:
+; CHECK: str q0, [x0], #80
+  %newaddr = getelementptr <4 x i32>* %addr, i32 5
+  store <4 x i32> %in, <4 x i32>* %addr, align 8
+  store <4 x i32>* %newaddr, <4 x i32>** bitcast(i8** @ptr to <4 x i32>**)
+  ret void
+}
+
+
+define <4 x float> @test_v4f32_pre_load(<4 x float>* %addr) {
+; CHECK-LABEL: test_v4f32_pre_load:
+; CHECK: ldr q0, [x0, #80]!
+  %newaddr = getelementptr <4 x float>* %addr, i32 5
+  %val = load <4 x float>* %newaddr, align 8
+  store <4 x float>* %newaddr, <4 x float>** bitcast(i8** @ptr to <4 x float>**)
+  ret <4 x float> %val
+}
+
+define <4 x float> @test_v4f32_post_load(<4 x float>* %addr) {
+; CHECK-LABEL: test_v4f32_post_load:
+; CHECK: ldr q0, [x0], #80
+  %newaddr = getelementptr <4 x float>* %addr, i32 5
+  %val = load <4 x float>* %addr, align 8
+  store <4 x float>* %newaddr, <4 x float>** bitcast(i8** @ptr to <4 x float>**)
+  ret <4 x float> %val
+}
+
+define void @test_v4f32_pre_store(<4 x float> %in, <4 x float>* %addr) {
+; CHECK-LABEL: test_v4f32_pre_store:
+; CHECK: str q0, [x0, #80]!
+  %newaddr = getelementptr <4 x float>* %addr, i32 5
+  store <4 x float> %in, <4 x float>* %newaddr, align 8
+  store <4 x float>* %newaddr, <4 x float>** bitcast(i8** @ptr to <4 x float>**)
+  ret void
+}
+
+define void @test_v4f32_post_store(<4 x float> %in, <4 x float>* %addr) {
+; CHECK-LABEL: test_v4f32_post_store:
+; CHECK: str q0, [x0], #80
+  %newaddr = getelementptr <4 x float>* %addr, i32 5
+  store <4 x float> %in, <4 x float>* %addr, align 8
+  store <4 x float>* %newaddr, <4 x float>** bitcast(i8** @ptr to <4 x float>**)
+  ret void
+}
+
+
+define <2 x i64> @test_v2i64_pre_load(<2 x i64>* %addr) {
+; CHECK-LABEL: test_v2i64_pre_load:
+; CHECK: ldr q0, [x0, #80]!
+  %newaddr = getelementptr <2 x i64>* %addr, i32 5
+  %val = load <2 x i64>* %newaddr, align 8
+  store <2 x i64>* %newaddr, <2 x i64>** bitcast(i8** @ptr to <2 x i64>**)
+  ret <2 x i64> %val
+}
+
+define <2 x i64> @test_v2i64_post_load(<2 x i64>* %addr) {
+; CHECK-LABEL: test_v2i64_post_load:
+; CHECK: ldr q0, [x0], #80
+  %newaddr = getelementptr <2 x i64>* %addr, i32 5
+  %val = load <2 x i64>* %addr, align 8
+  store <2 x i64>* %newaddr, <2 x i64>** bitcast(i8** @ptr to <2 x i64>**)
+  ret <2 x i64> %val
+}
+
+define void @test_v2i64_pre_store(<2 x i64> %in, <2 x i64>* %addr) {
+; CHECK-LABEL: test_v2i64_pre_store:
+; CHECK: str q0, [x0, #80]!
+  %newaddr = getelementptr <2 x i64>* %addr, i32 5
+  store <2 x i64> %in, <2 x i64>* %newaddr, align 8
+  store <2 x i64>* %newaddr, <2 x i64>** bitcast(i8** @ptr to <2 x i64>**)
+  ret void
+}
+
+define void @test_v2i64_post_store(<2 x i64> %in, <2 x i64>* %addr) {
+; CHECK-LABEL: test_v2i64_post_store:
+; CHECK: str q0, [x0], #80
+  %newaddr = getelementptr <2 x i64>* %addr, i32 5
+  store <2 x i64> %in, <2 x i64>* %addr, align 8
+  store <2 x i64>* %newaddr, <2 x i64>** bitcast(i8** @ptr to <2 x i64>**)
+  ret void
+}
+
+
+define <2 x double> @test_v2f64_pre_load(<2 x double>* %addr) {
+; CHECK-LABEL: test_v2f64_pre_load:
+; CHECK: ldr q0, [x0, #80]!
+  %newaddr = getelementptr <2 x double>* %addr, i32 5
+  %val = load <2 x double>* %newaddr, align 8
+  store <2 x double>* %newaddr, <2 x double>** bitcast(i8** @ptr to <2 x double>**)
+  ret <2 x double> %val
+}
+
+define <2 x double> @test_v2f64_post_load(<2 x double>* %addr) {
+; CHECK-LABEL: test_v2f64_post_load:
+; CHECK: ldr q0, [x0], #80
+  %newaddr = getelementptr <2 x double>* %addr, i32 5
+  %val = load <2 x double>* %addr, align 8
+  store <2 x double>* %newaddr, <2 x double>** bitcast(i8** @ptr to <2 x double>**)
+  ret <2 x double> %val
+}
+
+define void @test_v2f64_pre_store(<2 x double> %in, <2 x double>* %addr) {
+; CHECK-LABEL: test_v2f64_pre_store:
+; CHECK: str q0, [x0, #80]!
+  %newaddr = getelementptr <2 x double>* %addr, i32 5
+  store <2 x double> %in, <2 x double>* %newaddr, align 8
+  store <2 x double>* %newaddr, <2 x double>** bitcast(i8** @ptr to <2 x double>**)
+  ret void
+}
+
+define void @test_v2f64_post_store(<2 x double> %in, <2 x double>* %addr) {
+; CHECK-LABEL: test_v2f64_post_store:
+; CHECK: str q0, [x0], #80
+  %newaddr = getelementptr <2 x double>* %addr, i32 5
+  store <2 x double> %in, <2 x double>* %addr, align 8
+  store <2 x double>* %newaddr, <2 x double>** bitcast(i8** @ptr to <2 x double>**)
+  ret void
+}
+
+define i8* @test_v16i8_post_imm_st1_lane(<16 x i8> %in, i8* %addr) {
+; CHECK-LABEL: test_v16i8_post_imm_st1_lane:
+; CHECK: st1.b { v0 }[3], [x0], #1
+  %elt = extractelement <16 x i8> %in, i32 3
+  store i8 %elt, i8* %addr
+
+  %newaddr = getelementptr i8* %addr, i32 1
+  ret i8* %newaddr
+}
+
+define i8* @test_v16i8_post_reg_st1_lane(<16 x i8> %in, i8* %addr) {
+; CHECK-LABEL: test_v16i8_post_reg_st1_lane:
+; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x2
+; CHECK: st1.b { v0 }[3], [x0], x[[OFFSET]]
+  %elt = extractelement <16 x i8> %in, i32 3
+  store i8 %elt, i8* %addr
+
+  %newaddr = getelementptr i8* %addr, i32 2
+  ret i8* %newaddr
+}
+
+
+define i16* @test_v8i16_post_imm_st1_lane(<8 x i16> %in, i16* %addr) {
+; CHECK-LABEL: test_v8i16_post_imm_st1_lane:
+; CHECK: st1.h { v0 }[3], [x0], #2
+  %elt = extractelement <8 x i16> %in, i32 3
+  store i16 %elt, i16* %addr
+
+  %newaddr = getelementptr i16* %addr, i32 1
+  ret i16* %newaddr
+}
+
+define i16* @test_v8i16_post_reg_st1_lane(<8 x i16> %in, i16* %addr) {
+; CHECK-LABEL: test_v8i16_post_reg_st1_lane:
+; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x4
+; CHECK: st1.h { v0 }[3], [x0], x[[OFFSET]]
+  %elt = extractelement <8 x i16> %in, i32 3
+  store i16 %elt, i16* %addr
+
+  %newaddr = getelementptr i16* %addr, i32 2
+  ret i16* %newaddr
+}
+
+define i32* @test_v4i32_post_imm_st1_lane(<4 x i32> %in, i32* %addr) {
+; CHECK-LABEL: test_v4i32_post_imm_st1_lane:
+; CHECK: st1.s { v0 }[3], [x0], #4
+  %elt = extractelement <4 x i32> %in, i32 3
+  store i32 %elt, i32* %addr
+
+  %newaddr = getelementptr i32* %addr, i32 1
+  ret i32* %newaddr
+}
+
+define i32* @test_v4i32_post_reg_st1_lane(<4 x i32> %in, i32* %addr) {
+; CHECK-LABEL: test_v4i32_post_reg_st1_lane:
+; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x8
+; CHECK: st1.s { v0 }[3], [x0], x[[OFFSET]]
+  %elt = extractelement <4 x i32> %in, i32 3
+  store i32 %elt, i32* %addr
+
+  %newaddr = getelementptr i32* %addr, i32 2
+  ret i32* %newaddr
+}
+
+define float* @test_v4f32_post_imm_st1_lane(<4 x float> %in, float* %addr) {
+; CHECK-LABEL: test_v4f32_post_imm_st1_lane:
+; CHECK: st1.s { v0 }[3], [x0], #4
+  %elt = extractelement <4 x float> %in, i32 3
+  store float %elt, float* %addr
+
+  %newaddr = getelementptr float* %addr, i32 1
+  ret float* %newaddr
+}
+
+define float* @test_v4f32_post_reg_st1_lane(<4 x float> %in, float* %addr) {
+; CHECK-LABEL: test_v4f32_post_reg_st1_lane:
+; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x8
+; CHECK: st1.s { v0 }[3], [x0], x[[OFFSET]]
+  %elt = extractelement <4 x float> %in, i32 3
+  store float %elt, float* %addr
+
+  %newaddr = getelementptr float* %addr, i32 2
+  ret float* %newaddr
+}
+
+define i64* @test_v2i64_post_imm_st1_lane(<2 x i64> %in, i64* %addr) {
+; CHECK-LABEL: test_v2i64_post_imm_st1_lane:
+; CHECK: st1.d { v0 }[1], [x0], #8
+  %elt = extractelement <2 x i64> %in, i64 1
+  store i64 %elt, i64* %addr
+
+  %newaddr = getelementptr i64* %addr, i64 1
+  ret i64* %newaddr
+}
+
+define i64* @test_v2i64_post_reg_st1_lane(<2 x i64> %in, i64* %addr) {
+; CHECK-LABEL: test_v2i64_post_reg_st1_lane:
+; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x10
+; CHECK: st1.d { v0 }[1], [x0], x[[OFFSET]]
+  %elt = extractelement <2 x i64> %in, i64 1
+  store i64 %elt, i64* %addr
+
+  %newaddr = getelementptr i64* %addr, i64 2
+  ret i64* %newaddr
+}
+
+define double* @test_v2f64_post_imm_st1_lane(<2 x double> %in, double* %addr) {
+; CHECK-LABEL: test_v2f64_post_imm_st1_lane:
+; CHECK: st1.d { v0 }[1], [x0], #8
+  %elt = extractelement <2 x double> %in, i32 1
+  store double %elt, double* %addr
+
+  %newaddr = getelementptr double* %addr, i32 1
+  ret double* %newaddr
+}
+
+define double* @test_v2f64_post_reg_st1_lane(<2 x double> %in, double* %addr) {
+; CHECK-LABEL: test_v2f64_post_reg_st1_lane:
+; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x10
+; CHECK: st1.d { v0 }[1], [x0], x[[OFFSET]]
+  %elt = extractelement <2 x double> %in, i32 1
+  store double %elt, double* %addr
+
+  %newaddr = getelementptr double* %addr, i32 2
+  ret double* %newaddr
+}
+
+define i8* @test_v8i8_post_imm_st1_lane(<8 x i8> %in, i8* %addr) {
+; CHECK-LABEL: test_v8i8_post_imm_st1_lane:
+; CHECK: st1.b { v0 }[3], [x0], #1
+  %elt = extractelement <8 x i8> %in, i32 3
+  store i8 %elt, i8* %addr
+
+  %newaddr = getelementptr i8* %addr, i32 1
+  ret i8* %newaddr
+}
+
+define i8* @test_v8i8_post_reg_st1_lane(<8 x i8> %in, i8* %addr) {
+; CHECK-LABEL: test_v8i8_post_reg_st1_lane:
+; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x2
+; CHECK: st1.b { v0 }[3], [x0], x[[OFFSET]]
+  %elt = extractelement <8 x i8> %in, i32 3
+  store i8 %elt, i8* %addr
+
+  %newaddr = getelementptr i8* %addr, i32 2
+  ret i8* %newaddr
+}
+
+define i16* @test_v4i16_post_imm_st1_lane(<4 x i16> %in, i16* %addr) {
+; CHECK-LABEL: test_v4i16_post_imm_st1_lane:
+; CHECK: st1.h { v0 }[3], [x0], #2
+  %elt = extractelement <4 x i16> %in, i32 3
+  store i16 %elt, i16* %addr
+
+  %newaddr = getelementptr i16* %addr, i32 1
+  ret i16* %newaddr
+}
+
+define i16* @test_v4i16_post_reg_st1_lane(<4 x i16> %in, i16* %addr) {
+; CHECK-LABEL: test_v4i16_post_reg_st1_lane:
+; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x4
+; CHECK: st1.h { v0 }[3], [x0], x[[OFFSET]]
+  %elt = extractelement <4 x i16> %in, i32 3
+  store i16 %elt, i16* %addr
+
+  %newaddr = getelementptr i16* %addr, i32 2
+  ret i16* %newaddr
+}
+
+define i32* @test_v2i32_post_imm_st1_lane(<2 x i32> %in, i32* %addr) {
+; CHECK-LABEL: test_v2i32_post_imm_st1_lane:
+; CHECK: st1.s { v0 }[1], [x0], #4
+  %elt = extractelement <2 x i32> %in, i32 1
+  store i32 %elt, i32* %addr
+
+  %newaddr = getelementptr i32* %addr, i32 1
+  ret i32* %newaddr
+}
+
+define i32* @test_v2i32_post_reg_st1_lane(<2 x i32> %in, i32* %addr) {
+; CHECK-LABEL: test_v2i32_post_reg_st1_lane:
+; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x8
+; CHECK: st1.s { v0 }[1], [x0], x[[OFFSET]]
+  %elt = extractelement <2 x i32> %in, i32 1
+  store i32 %elt, i32* %addr
+
+  %newaddr = getelementptr i32* %addr, i32 2
+  ret i32* %newaddr
+}
+
+define float* @test_v2f32_post_imm_st1_lane(<2 x float> %in, float* %addr) {
+; CHECK-LABEL: test_v2f32_post_imm_st1_lane:
+; CHECK: st1.s { v0 }[1], [x0], #4
+  %elt = extractelement <2 x float> %in, i32 1
+  store float %elt, float* %addr
+
+  %newaddr = getelementptr float* %addr, i32 1
+  ret float* %newaddr
+}
+
+define float* @test_v2f32_post_reg_st1_lane(<2 x float> %in, float* %addr) {
+; CHECK-LABEL: test_v2f32_post_reg_st1_lane:
+; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x8
+; CHECK: st1.s { v0 }[1], [x0], x[[OFFSET]]
+  %elt = extractelement <2 x float> %in, i32 1
+  store float %elt, float* %addr
+
+  %newaddr = getelementptr float* %addr, i32 2
+  ret float* %newaddr
+}
+
+define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2(i8* %A, i8** %ptr) {
+;CHECK-LABEL: test_v16i8_post_imm_ld2:
+;CHECK: ld2.16b { v0, v1 }, [x0], #32
+  %ld2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0i8(i8* %A)
+  %tmp = getelementptr i8* %A, i32 32
+  store i8* %tmp, i8** %ptr
+  ret { <16 x i8>, <16 x i8> } %ld2
+}
+
+define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2(i8* %A, i8** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v16i8_post_reg_ld2:
+;CHECK: ld2.16b { v0, v1 }, [x0], x{{[0-9]+}}
+  %ld2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0i8(i8* %A)
+  %tmp = getelementptr i8* %A, i64 %inc
+  store i8* %tmp, i8** %ptr
+  ret { <16 x i8>, <16 x i8> } %ld2
+}
+
+declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0i8(i8*)
+
+
+define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2(i8* %A, i8** %ptr) {
+;CHECK-LABEL: test_v8i8_post_imm_ld2:
+;CHECK: ld2.8b { v0, v1 }, [x0], #16
+  %ld2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0i8(i8* %A)
+  %tmp = getelementptr i8* %A, i32 16
+  store i8* %tmp, i8** %ptr
+  ret { <8 x i8>, <8 x i8> } %ld2
+}
+
+define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2(i8* %A, i8** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v8i8_post_reg_ld2:
+;CHECK: ld2.8b { v0, v1 }, [x0], x{{[0-9]+}}
+  %ld2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0i8(i8* %A)
+  %tmp = getelementptr i8* %A, i64 %inc
+  store i8* %tmp, i8** %ptr
+  ret { <8 x i8>, <8 x i8> } %ld2
+}
+
+declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0i8(i8*)
+
+
+define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2(i16* %A, i16** %ptr) {
+;CHECK-LABEL: test_v8i16_post_imm_ld2:
+;CHECK: ld2.8h { v0, v1 }, [x0], #32
+  %ld2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0i16(i16* %A)
+  %tmp = getelementptr i16* %A, i32 16
+  store i16* %tmp, i16** %ptr
+  ret { <8 x i16>, <8 x i16> } %ld2
+}
+
+define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2(i16* %A, i16** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v8i16_post_reg_ld2:
+;CHECK: ld2.8h { v0, v1 }, [x0], x{{[0-9]+}}
+  %ld2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0i16(i16* %A)
+  %tmp = getelementptr i16* %A, i64 %inc
+  store i16* %tmp, i16** %ptr
+  ret { <8 x i16>, <8 x i16> } %ld2
+}
+
+declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0i16(i16*)
+
+
+define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2(i16* %A, i16** %ptr) {
+;CHECK-LABEL: test_v4i16_post_imm_ld2:
+;CHECK: ld2.4h { v0, v1 }, [x0], #16
+  %ld2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0i16(i16* %A)
+  %tmp = getelementptr i16* %A, i32 8
+  store i16* %tmp, i16** %ptr
+  ret { <4 x i16>, <4 x i16> } %ld2
+}
+
+define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2(i16* %A, i16** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v4i16_post_reg_ld2:
+;CHECK: ld2.4h { v0, v1 }, [x0], x{{[0-9]+}}
+  %ld2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0i16(i16* %A)
+  %tmp = getelementptr i16* %A, i64 %inc
+  store i16* %tmp, i16** %ptr
+  ret { <4 x i16>, <4 x i16> } %ld2
+}
+
+declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0i16(i16*)
+
+
+define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2(i32* %A, i32** %ptr) {
+;CHECK-LABEL: test_v4i32_post_imm_ld2:
+;CHECK: ld2.4s { v0, v1 }, [x0], #32
+  %ld2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0i32(i32* %A)
+  %tmp = getelementptr i32* %A, i32 8
+  store i32* %tmp, i32** %ptr
+  ret { <4 x i32>, <4 x i32> } %ld2
+}
+
+define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2(i32* %A, i32** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v4i32_post_reg_ld2:
+;CHECK: ld2.4s { v0, v1 }, [x0], x{{[0-9]+}}
+  %ld2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0i32(i32* %A)
+  %tmp = getelementptr i32* %A, i64 %inc
+  store i32* %tmp, i32** %ptr
+  ret { <4 x i32>, <4 x i32> } %ld2
+}
+
+declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0i32(i32*)
+
+
+define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2(i32* %A, i32** %ptr) {
+;CHECK-LABEL: test_v2i32_post_imm_ld2:
+;CHECK: ld2.2s { v0, v1 }, [x0], #16
+  %ld2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0i32(i32* %A)
+  %tmp = getelementptr i32* %A, i32 4
+  store i32* %tmp, i32** %ptr
+  ret { <2 x i32>, <2 x i32> } %ld2
+}
+
+define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2(i32* %A, i32** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v2i32_post_reg_ld2:
+;CHECK: ld2.2s { v0, v1 }, [x0], x{{[0-9]+}}
+  %ld2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0i32(i32* %A)
+  %tmp = getelementptr i32* %A, i64 %inc
+  store i32* %tmp, i32** %ptr
+  ret { <2 x i32>, <2 x i32> } %ld2
+}
+
+declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0i32(i32*)
+
+
+define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2(i64* %A, i64** %ptr) {
+;CHECK-LABEL: test_v2i64_post_imm_ld2:
+;CHECK: ld2.2d { v0, v1 }, [x0], #32
+  %ld2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0i64(i64* %A)
+  %tmp = getelementptr i64* %A, i32 4
+  store i64* %tmp, i64** %ptr
+  ret { <2 x i64>, <2 x i64> } %ld2
+}
+
+define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2(i64* %A, i64** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v2i64_post_reg_ld2:
+;CHECK: ld2.2d { v0, v1 }, [x0], x{{[0-9]+}}
+  %ld2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0i64(i64* %A)
+  %tmp = getelementptr i64* %A, i64 %inc
+  store i64* %tmp, i64** %ptr
+  ret { <2 x i64>, <2 x i64> } %ld2
+}
+
+declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0i64(i64*)
+
+
+define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2(i64* %A, i64** %ptr) {
+;CHECK-LABEL: test_v1i64_post_imm_ld2:
+;CHECK: ld1.1d { v0, v1 }, [x0], #16
+  %ld2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0i64(i64* %A)
+  %tmp = getelementptr i64* %A, i32 2
+  store i64* %tmp, i64** %ptr
+  ret { <1 x i64>, <1 x i64> } %ld2
+}
+
+define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2(i64* %A, i64** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v1i64_post_reg_ld2:
+;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}}
+  %ld2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0i64(i64* %A)
+  %tmp = getelementptr i64* %A, i64 %inc
+  store i64* %tmp, i64** %ptr
+  ret { <1 x i64>, <1 x i64> } %ld2
+}
+
+declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0i64(i64*)
+
+
+define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2(float* %A, float** %ptr) {
+;CHECK-LABEL: test_v4f32_post_imm_ld2:
+;CHECK: ld2.4s { v0, v1 }, [x0], #32
+  %ld2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0f32(float* %A)
+  %tmp = getelementptr float* %A, i32 8
+  store float* %tmp, float** %ptr
+  ret { <4 x float>, <4 x float> } %ld2
+}
+
+define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2(float* %A, float** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v4f32_post_reg_ld2:
+;CHECK: ld2.4s { v0, v1 }, [x0], x{{[0-9]+}}
+  %ld2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0f32(float* %A)
+  %tmp = getelementptr float* %A, i64 %inc
+  store float* %tmp, float** %ptr
+  ret { <4 x float>, <4 x float> } %ld2
+}
+
+declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0f32(float*)
+
+
+define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2(float* %A, float** %ptr) {
+;CHECK-LABEL: test_v2f32_post_imm_ld2:
+;CHECK: ld2.2s { v0, v1 }, [x0], #16
+  %ld2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0f32(float* %A)
+  %tmp = getelementptr float* %A, i32 4
+  store float* %tmp, float** %ptr
+  ret { <2 x float>, <2 x float> } %ld2
+}
+
+define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2(float* %A, float** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v2f32_post_reg_ld2:
+;CHECK: ld2.2s { v0, v1 }, [x0], x{{[0-9]+}}
+  %ld2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0f32(float* %A)
+  %tmp = getelementptr float* %A, i64 %inc
+  store float* %tmp, float** %ptr
+  ret { <2 x float>, <2 x float> } %ld2
+}
+
+declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0f32(float*)
+
+
+define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2(double* %A, double** %ptr) {
+;CHECK-LABEL: test_v2f64_post_imm_ld2:
+;CHECK: ld2.2d { v0, v1 }, [x0], #32
+  %ld2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0f64(double* %A)
+  %tmp = getelementptr double* %A, i32 4
+  store double* %tmp, double** %ptr
+  ret { <2 x double>, <2 x double> } %ld2
+}
+
+define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2(double* %A, double** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v2f64_post_reg_ld2:
+;CHECK: ld2.2d { v0, v1 }, [x0], x{{[0-9]+}}
+  %ld2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0f64(double* %A)
+  %tmp = getelementptr double* %A, i64 %inc
+  store double* %tmp, double** %ptr
+  ret { <2 x double>, <2 x double> } %ld2
+}
+
+declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0f64(double*)
+
+
+define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2(double* %A, double** %ptr) {
+;CHECK-LABEL: test_v1f64_post_imm_ld2:
+;CHECK: ld1.1d { v0, v1 }, [x0], #16
+  %ld2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0f64(double* %A)
+  %tmp = getelementptr double* %A, i32 2
+  store double* %tmp, double** %ptr
+  ret { <1 x double>, <1 x double> } %ld2
+}
+
+define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2(double* %A, double** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v1f64_post_reg_ld2:
+;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}}
+  %ld2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0f64(double* %A)
+  %tmp = getelementptr double* %A, i64 %inc
+  store double* %tmp, double** %ptr
+  ret { <1 x double>, <1 x double> } %ld2
+}
+
+declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0f64(double*)
+
+
+define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3(i8* %A, i8** %ptr) {
+;CHECK-LABEL: test_v16i8_post_imm_ld3:
+;CHECK: ld3.16b { v0, v1, v2 }, [x0], #48
+  %ld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0i8(i8* %A)
+  %tmp = getelementptr i8* %A, i32 48
+  store i8* %tmp, i8** %ptr
+  ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
+}
+
+define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3(i8* %A, i8** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v16i8_post_reg_ld3:
+;CHECK: ld3.16b { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  %ld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0i8(i8* %A)
+  %tmp = getelementptr i8* %A, i64 %inc
+  store i8* %tmp, i8** %ptr
+  ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
+}
+
+declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0i8(i8*)
+
+
+define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3(i8* %A, i8** %ptr) {
+;CHECK-LABEL: test_v8i8_post_imm_ld3:
+;CHECK: ld3.8b { v0, v1, v2 }, [x0], #24
+  %ld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0i8(i8* %A)
+  %tmp = getelementptr i8* %A, i32 24
+  store i8* %tmp, i8** %ptr
+  ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
+}
+
+define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3(i8* %A, i8** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v8i8_post_reg_ld3:
+;CHECK: ld3.8b { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  %ld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0i8(i8* %A)
+  %tmp = getelementptr i8* %A, i64 %inc
+  store i8* %tmp, i8** %ptr
+  ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
+}
+
+declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0i8(i8*)
+
+
+define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3(i16* %A, i16** %ptr) {
+;CHECK-LABEL: test_v8i16_post_imm_ld3:
+;CHECK: ld3.8h { v0, v1, v2 }, [x0], #48
+  %ld3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0i16(i16* %A)
+  %tmp = getelementptr i16* %A, i32 24
+  store i16* %tmp, i16** %ptr
+  ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
+}
+
+define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3(i16* %A, i16** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v8i16_post_reg_ld3:
+;CHECK: ld3.8h { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  %ld3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0i16(i16* %A)
+  %tmp = getelementptr i16* %A, i64 %inc
+  store i16* %tmp, i16** %ptr
+  ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
+}
+
+declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0i16(i16*)
+
+
+define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3(i16* %A, i16** %ptr) {
+;CHECK-LABEL: test_v4i16_post_imm_ld3:
+;CHECK: ld3.4h { v0, v1, v2 }, [x0], #24
+  %ld3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0i16(i16* %A)
+  %tmp = getelementptr i16* %A, i32 12
+  store i16* %tmp, i16** %ptr
+  ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
+}
+
+define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3(i16* %A, i16** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v4i16_post_reg_ld3:
+;CHECK: ld3.4h { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  %ld3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0i16(i16* %A)
+  %tmp = getelementptr i16* %A, i64 %inc
+  store i16* %tmp, i16** %ptr
+  ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
+}
+
+declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0i16(i16*)
+
+
+define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3(i32* %A, i32** %ptr) {
+;CHECK-LABEL: test_v4i32_post_imm_ld3:
+;CHECK: ld3.4s { v0, v1, v2 }, [x0], #48
+  %ld3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0i32(i32* %A)
+  %tmp = getelementptr i32* %A, i32 12
+  store i32* %tmp, i32** %ptr
+  ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
+}
+
+define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3(i32* %A, i32** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v4i32_post_reg_ld3:
+;CHECK: ld3.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  %ld3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0i32(i32* %A)
+  %tmp = getelementptr i32* %A, i64 %inc
+  store i32* %tmp, i32** %ptr
+  ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
+}
+
+declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0i32(i32*)
+
+
+define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3(i32* %A, i32** %ptr) {
+;CHECK-LABEL: test_v2i32_post_imm_ld3:
+;CHECK: ld3.2s { v0, v1, v2 }, [x0], #24
+  %ld3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0i32(i32* %A)
+  %tmp = getelementptr i32* %A, i32 6
+  store i32* %tmp, i32** %ptr
+  ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
+}
+
+define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3(i32* %A, i32** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v2i32_post_reg_ld3:
+;CHECK: ld3.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  %ld3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0i32(i32* %A)
+  %tmp = getelementptr i32* %A, i64 %inc
+  store i32* %tmp, i32** %ptr
+  ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
+}
+
+declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0i32(i32*)
+
+
+define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3(i64* %A, i64** %ptr) {
+;CHECK-LABEL: test_v2i64_post_imm_ld3:
+;CHECK: ld3.2d { v0, v1, v2 }, [x0], #48
+  %ld3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0i64(i64* %A)
+  %tmp = getelementptr i64* %A, i32 6
+  store i64* %tmp, i64** %ptr
+  ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
+}
+
+define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3(i64* %A, i64** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v2i64_post_reg_ld3:
+;CHECK: ld3.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  %ld3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0i64(i64* %A)
+  %tmp = getelementptr i64* %A, i64 %inc
+  store i64* %tmp, i64** %ptr
+  ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
+}
+
+declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0i64(i64*)
+
+
+define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3(i64* %A, i64** %ptr) {
+;CHECK-LABEL: test_v1i64_post_imm_ld3:
+;CHECK: ld1.1d { v0, v1, v2 }, [x0], #24
+  %ld3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0i64(i64* %A)
+  %tmp = getelementptr i64* %A, i32 3
+  store i64* %tmp, i64** %ptr
+  ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
+}
+
+define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3(i64* %A, i64** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v1i64_post_reg_ld3:
+;CHECK: ld1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  %ld3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0i64(i64* %A)
+  %tmp = getelementptr i64* %A, i64 %inc
+  store i64* %tmp, i64** %ptr
+  ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
+}
+
+declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0i64(i64*)
+
+
+define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3(float* %A, float** %ptr) {
+;CHECK-LABEL: test_v4f32_post_imm_ld3:
+;CHECK: ld3.4s { v0, v1, v2 }, [x0], #48
+  %ld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0f32(float* %A)
+  %tmp = getelementptr float* %A, i32 12
+  store float* %tmp, float** %ptr
+  ret { <4 x float>, <4 x float>, <4 x float> } %ld3
+}
+
+define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3(float* %A, float** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v4f32_post_reg_ld3:
+;CHECK: ld3.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  %ld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0f32(float* %A)
+  %tmp = getelementptr float* %A, i64 %inc
+  store float* %tmp, float** %ptr
+  ret { <4 x float>, <4 x float>, <4 x float> } %ld3
+}
+
+declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0f32(float*)
+
+
+define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3(float* %A, float** %ptr) {
+;CHECK-LABEL: test_v2f32_post_imm_ld3:
+;CHECK: ld3.2s { v0, v1, v2 }, [x0], #24
+  %ld3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0f32(float* %A)
+  %tmp = getelementptr float* %A, i32 6
+  store float* %tmp, float** %ptr
+  ret { <2 x float>, <2 x float>, <2 x float> } %ld3
+}
+
+define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3(float* %A, float** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v2f32_post_reg_ld3:
+;CHECK: ld3.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  %ld3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0f32(float* %A)
+  %tmp = getelementptr float* %A, i64 %inc
+  store float* %tmp, float** %ptr
+  ret { <2 x float>, <2 x float>, <2 x float> } %ld3
+}
+
+declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0f32(float*)
+
+
+define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3(double* %A, double** %ptr) {
+;CHECK-LABEL: test_v2f64_post_imm_ld3:
+;CHECK: ld3.2d { v0, v1, v2 }, [x0], #48
+  %ld3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0f64(double* %A)
+  %tmp = getelementptr double* %A, i32 6
+  store double* %tmp, double** %ptr
+  ret { <2 x double>, <2 x double>, <2 x double> } %ld3
+}
+
+define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3(double* %A, double** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v2f64_post_reg_ld3:
+;CHECK: ld3.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  %ld3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0f64(double* %A)
+  %tmp = getelementptr double* %A, i64 %inc
+  store double* %tmp, double** %ptr
+  ret { <2 x double>, <2 x double>, <2 x double> } %ld3
+}
+
+declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0f64(double*)
+
+
+define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3(double* %A, double** %ptr) {
+;CHECK-LABEL: test_v1f64_post_imm_ld3:
+;CHECK: ld1.1d { v0, v1, v2 }, [x0], #24
+  %ld3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0f64(double* %A)
+  %tmp = getelementptr double* %A, i32 3
+  store double* %tmp, double** %ptr
+  ret { <1 x double>, <1 x double>, <1 x double> } %ld3
+}
+
+define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3(double* %A, double** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v1f64_post_reg_ld3:
+;CHECK: ld1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  %ld3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0f64(double* %A)
+  %tmp = getelementptr double* %A, i64 %inc
+  store double* %tmp, double** %ptr
+  ret { <1 x double>, <1 x double>, <1 x double> } %ld3
+}
+
+declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0f64(double*)
+
+
+define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4(i8* %A, i8** %ptr) {
+;CHECK-LABEL: test_v16i8_post_imm_ld4:
+;CHECK: ld4.16b { v0, v1, v2, v3 }, [x0], #64
+  %ld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0i8(i8* %A)
+  %tmp = getelementptr i8* %A, i32 64
+  store i8* %tmp, i8** %ptr
+  ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
+}
+
+define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4(i8* %A, i8** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v16i8_post_reg_ld4:
+;CHECK: ld4.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  %ld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0i8(i8* %A)
+  %tmp = getelementptr i8* %A, i64 %inc
+  store i8* %tmp, i8** %ptr
+  ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
+}
+
+declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0i8(i8*)
+
+
+define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4(i8* %A, i8** %ptr) {
+;CHECK-LABEL: test_v8i8_post_imm_ld4:
+;CHECK: ld4.8b { v0, v1, v2, v3 }, [x0], #32
+  %ld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0i8(i8* %A)
+  %tmp = getelementptr i8* %A, i32 32
+  store i8* %tmp, i8** %ptr
+  ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
+}
+
+define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4(i8* %A, i8** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v8i8_post_reg_ld4:
+;CHECK: ld4.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  %ld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0i8(i8* %A)
+  %tmp = getelementptr i8* %A, i64 %inc
+  store i8* %tmp, i8** %ptr
+  ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
+}
+
+declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0i8(i8*)
+
+
+define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4(i16* %A, i16** %ptr) {
+;CHECK-LABEL: test_v8i16_post_imm_ld4:
+;CHECK: ld4.8h { v0, v1, v2, v3 }, [x0], #64
+  %ld4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0i16(i16* %A)
+  %tmp = getelementptr i16* %A, i32 32
+  store i16* %tmp, i16** %ptr
+  ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
+}
+
+define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4(i16* %A, i16** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v8i16_post_reg_ld4:
+;CHECK: ld4.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  %ld4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0i16(i16* %A)
+  %tmp = getelementptr i16* %A, i64 %inc
+  store i16* %tmp, i16** %ptr
+  ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
+}
+
+declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0i16(i16*)
+
+
+define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4(i16* %A, i16** %ptr) {
+;CHECK-LABEL: test_v4i16_post_imm_ld4:
+;CHECK: ld4.4h { v0, v1, v2, v3 }, [x0], #32
+  %ld4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0i16(i16* %A)
+  %tmp = getelementptr i16* %A, i32 16
+  store i16* %tmp, i16** %ptr
+  ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
+}
+
+define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4(i16* %A, i16** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v4i16_post_reg_ld4:
+;CHECK: ld4.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  %ld4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0i16(i16* %A)
+  %tmp = getelementptr i16* %A, i64 %inc
+  store i16* %tmp, i16** %ptr
+  ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
+}
+
+declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0i16(i16*)
+
+
+define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4(i32* %A, i32** %ptr) {
+;CHECK-LABEL: test_v4i32_post_imm_ld4:
+;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], #64
+  %ld4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0i32(i32* %A)
+  %tmp = getelementptr i32* %A, i32 16
+  store i32* %tmp, i32** %ptr
+  ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
+}
+
+define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4(i32* %A, i32** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v4i32_post_reg_ld4:
+;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  %ld4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0i32(i32* %A)
+  %tmp = getelementptr i32* %A, i64 %inc
+  store i32* %tmp, i32** %ptr
+  ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
+}
+
+declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0i32(i32*)
+
+
+define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4(i32* %A, i32** %ptr) {
+;CHECK-LABEL: test_v2i32_post_imm_ld4:
+;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], #32
+  %ld4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0i32(i32* %A)
+  %tmp = getelementptr i32* %A, i32 8
+  store i32* %tmp, i32** %ptr
+  ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
+}
+
+define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4(i32* %A, i32** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v2i32_post_reg_ld4:
+;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  %ld4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0i32(i32* %A)
+  %tmp = getelementptr i32* %A, i64 %inc
+  store i32* %tmp, i32** %ptr
+  ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
+}
+
+declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0i32(i32*)
+
+
+define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4(i64* %A, i64** %ptr) {
+;CHECK-LABEL: test_v2i64_post_imm_ld4:
+;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], #64
+  %ld4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0i64(i64* %A)
+  %tmp = getelementptr i64* %A, i32 8
+  store i64* %tmp, i64** %ptr
+  ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
+}
+
+define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4(i64* %A, i64** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v2i64_post_reg_ld4:
+;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  %ld4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0i64(i64* %A)
+  %tmp = getelementptr i64* %A, i64 %inc
+  store i64* %tmp, i64** %ptr
+  ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
+}
+
+declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0i64(i64*)
+
+
+define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4(i64* %A, i64** %ptr) {
+;CHECK-LABEL: test_v1i64_post_imm_ld4:
+;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], #32
+  %ld4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0i64(i64* %A)
+  %tmp = getelementptr i64* %A, i32 4
+  store i64* %tmp, i64** %ptr
+  ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
+}
+
+define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4(i64* %A, i64** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v1i64_post_reg_ld4:
+;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  %ld4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0i64(i64* %A)
+  %tmp = getelementptr i64* %A, i64 %inc
+  store i64* %tmp, i64** %ptr
+  ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
+}
+
+declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0i64(i64*)
+
+
+define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld4(float* %A, float** %ptr) {
+;CHECK-LABEL: test_v4f32_post_imm_ld4:
+;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], #64
+  %ld4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0f32(float* %A)
+  %tmp = getelementptr float* %A, i32 16
+  store float* %tmp, float** %ptr
+  ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
+}
+
+define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld4(float* %A, float** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v4f32_post_reg_ld4:
+;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  %ld4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0f32(float* %A)
+  %tmp = getelementptr float* %A, i64 %inc
+  store float* %tmp, float** %ptr
+  ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
+}
+
+declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0f32(float*)
+
+
+define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld4(float* %A, float** %ptr) {
+;CHECK-LABEL: test_v2f32_post_imm_ld4:
+;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], #32
+  %ld4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0f32(float* %A)
+  %tmp = getelementptr float* %A, i32 8
+  store float* %tmp, float** %ptr
+  ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
+}
+
+define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld4(float* %A, float** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v2f32_post_reg_ld4:
+;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  %ld4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0f32(float* %A)
+  %tmp = getelementptr float* %A, i64 %inc
+  store float* %tmp, float** %ptr
+  ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
+}
+
+declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0f32(float*)
+
+
+define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld4(double* %A, double** %ptr) {
+;CHECK-LABEL: test_v2f64_post_imm_ld4:
+;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], #64
+  %ld4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0f64(double* %A)
+  %tmp = getelementptr double* %A, i32 8
+  store double* %tmp, double** %ptr
+  ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
+}
+
+define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld4(double* %A, double** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v2f64_post_reg_ld4:
+;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  %ld4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0f64(double* %A)
+  %tmp = getelementptr double* %A, i64 %inc
+  store double* %tmp, double** %ptr
+  ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
+}
+
+declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0f64(double*)
+
+
+define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld4(double* %A, double** %ptr) {
+;CHECK-LABEL: test_v1f64_post_imm_ld4:
+;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], #32
+  %ld4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0f64(double* %A)
+  %tmp = getelementptr double* %A, i32 4
+  store double* %tmp, double** %ptr
+  ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
+}
+
+define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld4(double* %A, double** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v1f64_post_reg_ld4:
+;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  %ld4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0f64(double* %A)
+  %tmp = getelementptr double* %A, i64 %inc
+  store double* %tmp, double** %ptr
+  ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
+}
+
+declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0f64(double*)
+
+define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x2(i8* %A, i8** %ptr) {
+;CHECK-LABEL: test_v16i8_post_imm_ld1x2:
+;CHECK: ld1.16b { v0, v1 }, [x0], #32
+  %ld1x2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %A)
+  %tmp = getelementptr i8* %A, i32 32
+  store i8* %tmp, i8** %ptr
+  ret { <16 x i8>, <16 x i8> } %ld1x2
+}
+
+define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld1x2(i8* %A, i8** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v16i8_post_reg_ld1x2:
+;CHECK: ld1.16b { v0, v1 }, [x0], x{{[0-9]+}}
+  %ld1x2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %A)
+  %tmp = getelementptr i8* %A, i64 %inc
+  store i8* %tmp, i8** %ptr
+  ret { <16 x i8>, <16 x i8> } %ld1x2
+}
+
+declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8*)
+
+
+define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x2(i8* %A, i8** %ptr) {
+;CHECK-LABEL: test_v8i8_post_imm_ld1x2:
+;CHECK: ld1.8b { v0, v1 }, [x0], #16
+  %ld1x2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8* %A)
+  %tmp = getelementptr i8* %A, i32 16
+  store i8* %tmp, i8** %ptr
+  ret { <8 x i8>, <8 x i8> } %ld1x2
+}
+
+define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld1x2(i8* %A, i8** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v8i8_post_reg_ld1x2:
+;CHECK: ld1.8b { v0, v1 }, [x0], x{{[0-9]+}}
+  %ld1x2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8* %A)
+  %tmp = getelementptr i8* %A, i64 %inc
+  store i8* %tmp, i8** %ptr
+  ret { <8 x i8>, <8 x i8> } %ld1x2
+}
+
+declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8*)
+
+
+define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x2(i16* %A, i16** %ptr) {
+;CHECK-LABEL: test_v8i16_post_imm_ld1x2:
+;CHECK: ld1.8h { v0, v1 }, [x0], #32
+  %ld1x2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* %A)
+  %tmp = getelementptr i16* %A, i32 16
+  store i16* %tmp, i16** %ptr
+  ret { <8 x i16>, <8 x i16> } %ld1x2
+}
+
+define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld1x2(i16* %A, i16** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v8i16_post_reg_ld1x2:
+;CHECK: ld1.8h { v0, v1 }, [x0], x{{[0-9]+}}
+  %ld1x2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* %A)
+  %tmp = getelementptr i16* %A, i64 %inc
+  store i16* %tmp, i16** %ptr
+  ret { <8 x i16>, <8 x i16> } %ld1x2
+}
+
+declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16*)
+
+
+define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x2(i16* %A, i16** %ptr) {
+;CHECK-LABEL: test_v4i16_post_imm_ld1x2:
+;CHECK: ld1.4h { v0, v1 }, [x0], #16
+  %ld1x2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* %A)
+  %tmp = getelementptr i16* %A, i32 8
+  store i16* %tmp, i16** %ptr
+  ret { <4 x i16>, <4 x i16> } %ld1x2
+}
+
+define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld1x2(i16* %A, i16** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v4i16_post_reg_ld1x2:
+;CHECK: ld1.4h { v0, v1 }, [x0], x{{[0-9]+}}
+  %ld1x2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* %A)
+  %tmp = getelementptr i16* %A, i64 %inc
+  store i16* %tmp, i16** %ptr
+  ret { <4 x i16>, <4 x i16> } %ld1x2
+}
+
+declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16*)
+
+
+define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x2(i32* %A, i32** %ptr) {
+;CHECK-LABEL: test_v4i32_post_imm_ld1x2:
+;CHECK: ld1.4s { v0, v1 }, [x0], #32
+  %ld1x2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32* %A)
+  %tmp = getelementptr i32* %A, i32 8
+  store i32* %tmp, i32** %ptr
+  ret { <4 x i32>, <4 x i32> } %ld1x2
+}
+
+define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld1x2(i32* %A, i32** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v4i32_post_reg_ld1x2:
+;CHECK: ld1.4s { v0, v1 }, [x0], x{{[0-9]+}}
+  %ld1x2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32* %A)
+  %tmp = getelementptr i32* %A, i64 %inc
+  store i32* %tmp, i32** %ptr
+  ret { <4 x i32>, <4 x i32> } %ld1x2
+}
+
+declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32*)
+
+
+define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x2(i32* %A, i32** %ptr) {
+;CHECK-LABEL: test_v2i32_post_imm_ld1x2:
+;CHECK: ld1.2s { v0, v1 }, [x0], #16
+  %ld1x2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32* %A)
+  %tmp = getelementptr i32* %A, i32 4
+  store i32* %tmp, i32** %ptr
+  ret { <2 x i32>, <2 x i32> } %ld1x2
+}
+
+define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld1x2(i32* %A, i32** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v2i32_post_reg_ld1x2:
+;CHECK: ld1.2s { v0, v1 }, [x0], x{{[0-9]+}}
+  %ld1x2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32* %A)
+  %tmp = getelementptr i32* %A, i64 %inc
+  store i32* %tmp, i32** %ptr
+  ret { <2 x i32>, <2 x i32> } %ld1x2
+}
+
+declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32*)
+
+
+define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x2(i64* %A, i64** %ptr) {
+;CHECK-LABEL: test_v2i64_post_imm_ld1x2:
+;CHECK: ld1.2d { v0, v1 }, [x0], #32
+  %ld1x2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* %A)
+  %tmp = getelementptr i64* %A, i32 4
+  store i64* %tmp, i64** %ptr
+  ret { <2 x i64>, <2 x i64> } %ld1x2
+}
+
+define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld1x2(i64* %A, i64** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v2i64_post_reg_ld1x2:
+;CHECK: ld1.2d { v0, v1 }, [x0], x{{[0-9]+}}
+  %ld1x2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* %A)
+  %tmp = getelementptr i64* %A, i64 %inc
+  store i64* %tmp, i64** %ptr
+  ret { <2 x i64>, <2 x i64> } %ld1x2
+}
+
+declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64*)
+
+
+define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x2(i64* %A, i64** %ptr) {
+;CHECK-LABEL: test_v1i64_post_imm_ld1x2:
+;CHECK: ld1.1d { v0, v1 }, [x0], #16
+  %ld1x2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* %A)
+  %tmp = getelementptr i64* %A, i32 2
+  store i64* %tmp, i64** %ptr
+  ret { <1 x i64>, <1 x i64> } %ld1x2
+}
+
+define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld1x2(i64* %A, i64** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v1i64_post_reg_ld1x2:
+;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}}
+  %ld1x2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* %A)
+  %tmp = getelementptr i64* %A, i64 %inc
+  store i64* %tmp, i64** %ptr
+  ret { <1 x i64>, <1 x i64> } %ld1x2
+}
+
+declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64*)
+
+
+define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x2(float* %A, float** %ptr) {
+;CHECK-LABEL: test_v4f32_post_imm_ld1x2:
+;CHECK: ld1.4s { v0, v1 }, [x0], #32
+  %ld1x2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float* %A)
+  %tmp = getelementptr float* %A, i32 8
+  store float* %tmp, float** %ptr
+  ret { <4 x float>, <4 x float> } %ld1x2
+}
+
+define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld1x2(float* %A, float** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v4f32_post_reg_ld1x2:
+;CHECK: ld1.4s { v0, v1 }, [x0], x{{[0-9]+}}
+  %ld1x2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float* %A)
+  %tmp = getelementptr float* %A, i64 %inc
+  store float* %tmp, float** %ptr
+  ret { <4 x float>, <4 x float> } %ld1x2
+}
+
+declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float*)
+
+
+define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x2(float* %A, float** %ptr) {
+;CHECK-LABEL: test_v2f32_post_imm_ld1x2:
+;CHECK: ld1.2s { v0, v1 }, [x0], #16
+  %ld1x2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float* %A)
+  %tmp = getelementptr float* %A, i32 4
+  store float* %tmp, float** %ptr
+  ret { <2 x float>, <2 x float> } %ld1x2
+}
+
+define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld1x2(float* %A, float** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v2f32_post_reg_ld1x2:
+;CHECK: ld1.2s { v0, v1 }, [x0], x{{[0-9]+}}
+  %ld1x2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float* %A)
+  %tmp = getelementptr float* %A, i64 %inc
+  store float* %tmp, float** %ptr
+  ret { <2 x float>, <2 x float> } %ld1x2
+}
+
+declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float*)
+
+
+define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x2(double* %A, double** %ptr) {
+;CHECK-LABEL: test_v2f64_post_imm_ld1x2:
+;CHECK: ld1.2d { v0, v1 }, [x0], #32
+  %ld1x2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double* %A)
+  %tmp = getelementptr double* %A, i32 4
+  store double* %tmp, double** %ptr
+  ret { <2 x double>, <2 x double> } %ld1x2
+}
+
+define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld1x2(double* %A, double** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v2f64_post_reg_ld1x2:
+;CHECK: ld1.2d { v0, v1 }, [x0], x{{[0-9]+}}
+  %ld1x2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double* %A)
+  %tmp = getelementptr double* %A, i64 %inc
+  store double* %tmp, double** %ptr
+  ret { <2 x double>, <2 x double> } %ld1x2
+}
+
+declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double*)
+
+
+define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x2(double* %A, double** %ptr) {
+;CHECK-LABEL: test_v1f64_post_imm_ld1x2:
+;CHECK: ld1.1d { v0, v1 }, [x0], #16
+  %ld1x2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double* %A)
+  %tmp = getelementptr double* %A, i32 2
+  store double* %tmp, double** %ptr
+  ret { <1 x double>, <1 x double> } %ld1x2
+}
+
+define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld1x2(double* %A, double** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v1f64_post_reg_ld1x2:
+;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}}
+  %ld1x2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double* %A)
+  %tmp = getelementptr double* %A, i64 %inc
+  store double* %tmp, double** %ptr
+  ret { <1 x double>, <1 x double> } %ld1x2
+}
+
+declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double*)
+
+
+define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x3(i8* %A, i8** %ptr) {
+;CHECK-LABEL: test_v16i8_post_imm_ld1x3:
+;CHECK: ld1.16b { v0, v1, v2 }, [x0], #48
+  %ld1x3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8* %A)
+  %tmp = getelementptr i8* %A, i32 48
+  store i8* %tmp, i8** %ptr
+  ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld1x3
+}
+
+define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld1x3(i8* %A, i8** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v16i8_post_reg_ld1x3:
+;CHECK: ld1.16b { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  %ld1x3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8* %A)
+  %tmp = getelementptr i8* %A, i64 %inc
+  store i8* %tmp, i8** %ptr
+  ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld1x3
+}
+
+declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8*)
+
+
+define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x3(i8* %A, i8** %ptr) {
+;CHECK-LABEL: test_v8i8_post_imm_ld1x3:
+;CHECK: ld1.8b { v0, v1, v2 }, [x0], #24
+  %ld1x3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8* %A)
+  %tmp = getelementptr i8* %A, i32 24
+  store i8* %tmp, i8** %ptr
+  ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld1x3
+}
+
+define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld1x3(i8* %A, i8** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v8i8_post_reg_ld1x3:
+;CHECK: ld1.8b { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  %ld1x3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8* %A)
+  %tmp = getelementptr i8* %A, i64 %inc
+  store i8* %tmp, i8** %ptr
+  ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld1x3
+}
+
+declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8*)
+
+
+define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x3(i16* %A, i16** %ptr) {
+;CHECK-LABEL: test_v8i16_post_imm_ld1x3:
+;CHECK: ld1.8h { v0, v1, v2 }, [x0], #48
+  %ld1x3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* %A)
+  %tmp = getelementptr i16* %A, i32 24
+  store i16* %tmp, i16** %ptr
+  ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld1x3
+}
+
+define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld1x3(i16* %A, i16** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v8i16_post_reg_ld1x3:
+;CHECK: ld1.8h { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  %ld1x3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* %A)
+  %tmp = getelementptr i16* %A, i64 %inc
+  store i16* %tmp, i16** %ptr
+  ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld1x3
+}
+
+declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16*)
+
+
+define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x3(i16* %A, i16** %ptr) {
+;CHECK-LABEL: test_v4i16_post_imm_ld1x3:
+;CHECK: ld1.4h { v0, v1, v2 }, [x0], #24
+  %ld1x3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* %A)
+  %tmp = getelementptr i16* %A, i32 12
+  store i16* %tmp, i16** %ptr
+  ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld1x3
+}
+
+define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld1x3(i16* %A, i16** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v4i16_post_reg_ld1x3:
+;CHECK: ld1.4h { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  %ld1x3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* %A)
+  %tmp = getelementptr i16* %A, i64 %inc
+  store i16* %tmp, i16** %ptr
+  ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld1x3
+}
+
+declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16*)
+
+
+define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x3(i32* %A, i32** %ptr) {
+;CHECK-LABEL: test_v4i32_post_imm_ld1x3:
+;CHECK: ld1.4s { v0, v1, v2 }, [x0], #48
+  %ld1x3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32* %A)
+  %tmp = getelementptr i32* %A, i32 12
+  store i32* %tmp, i32** %ptr
+  ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld1x3
+}
+
+define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld1x3(i32* %A, i32** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v4i32_post_reg_ld1x3:
+;CHECK: ld1.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  %ld1x3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32* %A)
+  %tmp = getelementptr i32* %A, i64 %inc
+  store i32* %tmp, i32** %ptr
+  ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld1x3
+}
+
+declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32*)
+
+
+define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x3(i32* %A, i32** %ptr) {
+;CHECK-LABEL: test_v2i32_post_imm_ld1x3:
+;CHECK: ld1.2s { v0, v1, v2 }, [x0], #24
+  %ld1x3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32* %A)
+  %tmp = getelementptr i32* %A, i32 6
+  store i32* %tmp, i32** %ptr
+  ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld1x3
+}
+
+define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld1x3(i32* %A, i32** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v2i32_post_reg_ld1x3:
+;CHECK: ld1.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  %ld1x3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32* %A)
+  %tmp = getelementptr i32* %A, i64 %inc
+  store i32* %tmp, i32** %ptr
+  ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld1x3
+}
+
+declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32*)
+
+
+define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x3(i64* %A, i64** %ptr) {
+;CHECK-LABEL: test_v2i64_post_imm_ld1x3:
+;CHECK: ld1.2d { v0, v1, v2 }, [x0], #48
+  %ld1x3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* %A)
+  %tmp = getelementptr i64* %A, i32 6
+  store i64* %tmp, i64** %ptr
+  ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld1x3
+}
+
+define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld1x3(i64* %A, i64** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v2i64_post_reg_ld1x3:
+;CHECK: ld1.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  %ld1x3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* %A)
+  %tmp = getelementptr i64* %A, i64 %inc
+  store i64* %tmp, i64** %ptr
+  ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld1x3
+}
+
+declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64*)
+
+
+define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x3(i64* %A, i64** %ptr) {
+;CHECK-LABEL: test_v1i64_post_imm_ld1x3:
+;CHECK: ld1.1d { v0, v1, v2 }, [x0], #24
+  %ld1x3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* %A)
+  %tmp = getelementptr i64* %A, i32 3
+  store i64* %tmp, i64** %ptr
+  ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld1x3
+}
+
+define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld1x3(i64* %A, i64** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v1i64_post_reg_ld1x3:
+;CHECK: ld1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  %ld1x3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* %A)
+  %tmp = getelementptr i64* %A, i64 %inc
+  store i64* %tmp, i64** %ptr
+  ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld1x3
+}
+
+declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64*)
+
+
+define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x3(float* %A, float** %ptr) {
+;CHECK-LABEL: test_v4f32_post_imm_ld1x3:
+;CHECK: ld1.4s { v0, v1, v2 }, [x0], #48
+  %ld1x3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float* %A)
+  %tmp = getelementptr float* %A, i32 12
+  store float* %tmp, float** %ptr
+  ret { <4 x float>, <4 x float>, <4 x float> } %ld1x3
+}
+
+define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld1x3(float* %A, float** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v4f32_post_reg_ld1x3:
+;CHECK: ld1.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  %ld1x3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float* %A)
+  %tmp = getelementptr float* %A, i64 %inc
+  store float* %tmp, float** %ptr
+  ret { <4 x float>, <4 x float>, <4 x float> } %ld1x3
+}
+
+declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float*)
+
+
+define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x3(float* %A, float** %ptr) {
+;CHECK-LABEL: test_v2f32_post_imm_ld1x3:
+;CHECK: ld1.2s { v0, v1, v2 }, [x0], #24
+  %ld1x3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float* %A)
+  %tmp = getelementptr float* %A, i32 6
+  store float* %tmp, float** %ptr
+  ret { <2 x float>, <2 x float>, <2 x float> } %ld1x3
+}
+
+define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld1x3(float* %A, float** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v2f32_post_reg_ld1x3:
+;CHECK: ld1.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  %ld1x3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float* %A)
+  %tmp = getelementptr float* %A, i64 %inc
+  store float* %tmp, float** %ptr
+  ret { <2 x float>, <2 x float>, <2 x float> } %ld1x3
+}
+
+declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float*)
+
+
+define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x3(double* %A, double** %ptr) {
+;CHECK-LABEL: test_v2f64_post_imm_ld1x3:
+;CHECK: ld1.2d { v0, v1, v2 }, [x0], #48
+  %ld1x3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double* %A)
+  %tmp = getelementptr double* %A, i32 6
+  store double* %tmp, double** %ptr
+  ret { <2 x double>, <2 x double>, <2 x double> } %ld1x3
+}
+
+define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld1x3(double* %A, double** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v2f64_post_reg_ld1x3:
+;CHECK: ld1.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  %ld1x3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double* %A)
+  %tmp = getelementptr double* %A, i64 %inc
+  store double* %tmp, double** %ptr
+  ret { <2 x double>, <2 x double>, <2 x double> } %ld1x3
+}
+
+declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double*)
+
+
+define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x3(double* %A, double** %ptr) {
+;CHECK-LABEL: test_v1f64_post_imm_ld1x3:
+;CHECK: ld1.1d { v0, v1, v2 }, [x0], #24
+  %ld1x3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double* %A)
+  %tmp = getelementptr double* %A, i32 3
+  store double* %tmp, double** %ptr
+  ret { <1 x double>, <1 x double>, <1 x double> } %ld1x3
+}
+
+define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld1x3(double* %A, double** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v1f64_post_reg_ld1x3:
+;CHECK: ld1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  %ld1x3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double* %A)
+  %tmp = getelementptr double* %A, i64 %inc
+  store double* %tmp, double** %ptr
+  ret { <1 x double>, <1 x double>, <1 x double> } %ld1x3
+}
+
+declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double*)
+
+
+define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x4(i8* %A, i8** %ptr) {
+;CHECK-LABEL: test_v16i8_post_imm_ld1x4:
+;CHECK: ld1.16b { v0, v1, v2, v3 }, [x0], #64
+  %ld1x4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8* %A)
+  %tmp = getelementptr i8* %A, i32 64
+  store i8* %tmp, i8** %ptr
+  ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld1x4
+}
+
+define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld1x4(i8* %A, i8** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v16i8_post_reg_ld1x4:
+;CHECK: ld1.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  %ld1x4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8* %A)
+  %tmp = getelementptr i8* %A, i64 %inc
+  store i8* %tmp, i8** %ptr
+  ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld1x4
+}
+
+declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8*)
+
+
+define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x4(i8* %A, i8** %ptr) {
+;CHECK-LABEL: test_v8i8_post_imm_ld1x4:
+;CHECK: ld1.8b { v0, v1, v2, v3 }, [x0], #32
+  %ld1x4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8* %A)
+  %tmp = getelementptr i8* %A, i32 32
+  store i8* %tmp, i8** %ptr
+  ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld1x4
+}
+
+define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld1x4(i8* %A, i8** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v8i8_post_reg_ld1x4:
+;CHECK: ld1.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  %ld1x4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8* %A)
+  %tmp = getelementptr i8* %A, i64 %inc
+  store i8* %tmp, i8** %ptr
+  ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld1x4
+}
+
+declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8*)
+
+
+define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x4(i16* %A, i16** %ptr) {
+;CHECK-LABEL: test_v8i16_post_imm_ld1x4:
+;CHECK: ld1.8h { v0, v1, v2, v3 }, [x0], #64
+  %ld1x4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* %A)
+  %tmp = getelementptr i16* %A, i32 32
+  store i16* %tmp, i16** %ptr
+  ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld1x4
+}
+
+define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld1x4(i16* %A, i16** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v8i16_post_reg_ld1x4:
+;CHECK: ld1.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  %ld1x4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* %A)
+  %tmp = getelementptr i16* %A, i64 %inc
+  store i16* %tmp, i16** %ptr
+  ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld1x4
+}
+
+declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16*)
+
+
+define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x4(i16* %A, i16** %ptr) {
+;CHECK-LABEL: test_v4i16_post_imm_ld1x4:
+;CHECK: ld1.4h { v0, v1, v2, v3 }, [x0], #32
+  %ld1x4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* %A)
+  %tmp = getelementptr i16* %A, i32 16
+  store i16* %tmp, i16** %ptr
+  ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld1x4
+}
+
+define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld1x4(i16* %A, i16** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v4i16_post_reg_ld1x4:
+;CHECK: ld1.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  %ld1x4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* %A)
+  %tmp = getelementptr i16* %A, i64 %inc
+  store i16* %tmp, i16** %ptr
+  ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld1x4
+}
+
+declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16*)
+
+
+define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x4(i32* %A, i32** %ptr) {
+;CHECK-LABEL: test_v4i32_post_imm_ld1x4:
+;CHECK: ld1.4s { v0, v1, v2, v3 }, [x0], #64
+  %ld1x4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32* %A)
+  %tmp = getelementptr i32* %A, i32 16
+  store i32* %tmp, i32** %ptr
+  ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld1x4
+}
+
+define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld1x4(i32* %A, i32** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v4i32_post_reg_ld1x4:
+;CHECK: ld1.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  %ld1x4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32* %A)
+  %tmp = getelementptr i32* %A, i64 %inc
+  store i32* %tmp, i32** %ptr
+  ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld1x4
+}
+
+declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32*)
+
+
+define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x4(i32* %A, i32** %ptr) {
+;CHECK-LABEL: test_v2i32_post_imm_ld1x4:
+;CHECK: ld1.2s { v0, v1, v2, v3 }, [x0], #32
+  %ld1x4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32* %A)
+  %tmp = getelementptr i32* %A, i32 8
+  store i32* %tmp, i32** %ptr
+  ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld1x4
+}
+
+define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld1x4(i32* %A, i32** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v2i32_post_reg_ld1x4:
+;CHECK: ld1.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  %ld1x4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32* %A)
+  %tmp = getelementptr i32* %A, i64 %inc
+  store i32* %tmp, i32** %ptr
+  ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld1x4
+}
+
+declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32*)
+
+
+define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x4(i64* %A, i64** %ptr) {
+;CHECK-LABEL: test_v2i64_post_imm_ld1x4:
+;CHECK: ld1.2d { v0, v1, v2, v3 }, [x0], #64
+  %ld1x4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64* %A)
+  %tmp = getelementptr i64* %A, i32 8
+  store i64* %tmp, i64** %ptr
+  ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld1x4
+}
+
+define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld1x4(i64* %A, i64** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v2i64_post_reg_ld1x4:
+;CHECK: ld1.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  %ld1x4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64* %A)
+  %tmp = getelementptr i64* %A, i64 %inc
+  store i64* %tmp, i64** %ptr
+  ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld1x4
+}
+
+declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64*)
+
+
+define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x4(i64* %A, i64** %ptr) {
+;CHECK-LABEL: test_v1i64_post_imm_ld1x4:
+;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], #32
+  %ld1x4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64* %A)
+  %tmp = getelementptr i64* %A, i32 4
+  store i64* %tmp, i64** %ptr
+  ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld1x4
+}
+
+define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld1x4(i64* %A, i64** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v1i64_post_reg_ld1x4:
+;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  %ld1x4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64* %A)
+  %tmp = getelementptr i64* %A, i64 %inc
+  store i64* %tmp, i64** %ptr
+  ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld1x4
+}
+
+declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64*)
+
+
+define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x4(float* %A, float** %ptr) {
+;CHECK-LABEL: test_v4f32_post_imm_ld1x4:
+;CHECK: ld1.4s { v0, v1, v2, v3 }, [x0], #64
+  %ld1x4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float* %A)
+  %tmp = getelementptr float* %A, i32 16
+  store float* %tmp, float** %ptr
+  ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld1x4
+}
+
+define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld1x4(float* %A, float** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v4f32_post_reg_ld1x4:
+;CHECK: ld1.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  %ld1x4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float* %A)
+  %tmp = getelementptr float* %A, i64 %inc
+  store float* %tmp, float** %ptr
+  ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld1x4
+}
+
+declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float*)
+
+
+define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x4(float* %A, float** %ptr) {
+;CHECK-LABEL: test_v2f32_post_imm_ld1x4:
+;CHECK: ld1.2s { v0, v1, v2, v3 }, [x0], #32
+  %ld1x4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float* %A)
+  %tmp = getelementptr float* %A, i32 8
+  store float* %tmp, float** %ptr
+  ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld1x4
+}
+
+define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld1x4(float* %A, float** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v2f32_post_reg_ld1x4:
+;CHECK: ld1.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  %ld1x4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float* %A)
+  %tmp = getelementptr float* %A, i64 %inc
+  store float* %tmp, float** %ptr
+  ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld1x4
+}
+
+declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float*)
+
+
+define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x4(double* %A, double** %ptr) {
+;CHECK-LABEL: test_v2f64_post_imm_ld1x4:
+;CHECK: ld1.2d { v0, v1, v2, v3 }, [x0], #64
+  %ld1x4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double* %A)
+  %tmp = getelementptr double* %A, i32 8
+  store double* %tmp, double** %ptr
+  ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld1x4
+}
+
+define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld1x4(double* %A, double** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v2f64_post_reg_ld1x4:
+;CHECK: ld1.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  %ld1x4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double* %A)
+  %tmp = getelementptr double* %A, i64 %inc
+  store double* %tmp, double** %ptr
+  ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld1x4
+}
+
+declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double*)
+
+
+define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x4(double* %A, double** %ptr) {
+;CHECK-LABEL: test_v1f64_post_imm_ld1x4:
+;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], #32
+  %ld1x4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double* %A)
+  %tmp = getelementptr double* %A, i32 4
+  store double* %tmp, double** %ptr
+  ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld1x4
+}
+
+define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld1x4(double* %A, double** %ptr, i64 %inc) {
+;CHECK-LABEL: test_v1f64_post_reg_ld1x4:
+;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  %ld1x4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double* %A)
+  %tmp = getelementptr double* %A, i64 %inc
+  store double* %tmp, double** %ptr
+  ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld1x4
+}
+
+declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double*)
+
+
+define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2r(i8* %A, i8** %ptr) nounwind {
+;CHECK-LABEL: test_v16i8_post_imm_ld2r:
+;CHECK: ld2r.16b { v0, v1 }, [x0], #2
+  %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8* %A)
+  %tmp = getelementptr i8* %A, i32 2
+  store i8* %tmp, i8** %ptr
+  ret { <16 x i8>, <16 x i8> } %ld2
+}
+
+define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2r(i8* %A, i8** %ptr, i64 %inc) nounwind {
+;CHECK-LABEL: test_v16i8_post_reg_ld2r:
+;CHECK: ld2r.16b { v0, v1 }, [x0], x{{[0-9]+}}
+  %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8* %A)
+  %tmp = getelementptr i8* %A, i64 %inc
+  store i8* %tmp, i8** %ptr
+  ret { <16 x i8>, <16 x i8> } %ld2
+}
+
+declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8*) nounwind readonly
+
+
+define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2r(i8* %A, i8** %ptr) nounwind {
+;CHECK-LABEL: test_v8i8_post_imm_ld2r:
+;CHECK: ld2r.8b { v0, v1 }, [x0], #2
+  %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8* %A)
+  %tmp = getelementptr i8* %A, i32 2
+  store i8* %tmp, i8** %ptr
+  ret { <8 x i8>, <8 x i8> } %ld2
+}
+
+define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2r(i8* %A, i8** %ptr, i64 %inc) nounwind {
+;CHECK-LABEL: test_v8i8_post_reg_ld2r:
+;CHECK: ld2r.8b { v0, v1 }, [x0], x{{[0-9]+}}
+  %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8* %A)
+  %tmp = getelementptr i8* %A, i64 %inc
+  store i8* %tmp, i8** %ptr
+  ret { <8 x i8>, <8 x i8> } %ld2
+}
+
+declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8*) nounwind readonly
+
+
+define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2r(i16* %A, i16** %ptr) nounwind {
+;CHECK-LABEL: test_v8i16_post_imm_ld2r:
+;CHECK: ld2r.8h { v0, v1 }, [x0], #4
+  %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16* %A)
+  %tmp = getelementptr i16* %A, i32 2
+  store i16* %tmp, i16** %ptr
+  ret { <8 x i16>, <8 x i16> } %ld2
+}
+
+define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2r(i16* %A, i16** %ptr, i64 %inc) nounwind {
+;CHECK-LABEL: test_v8i16_post_reg_ld2r:
+;CHECK: ld2r.8h { v0, v1 }, [x0], x{{[0-9]+}}
+  %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16* %A)
+  %tmp = getelementptr i16* %A, i64 %inc
+  store i16* %tmp, i16** %ptr
+  ret { <8 x i16>, <8 x i16> } %ld2
+}
+
+declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16*) nounwind readonly
+
+
+define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2r(i16* %A, i16** %ptr) nounwind {
+;CHECK-LABEL: test_v4i16_post_imm_ld2r:
+;CHECK: ld2r.4h { v0, v1 }, [x0], #4
+  %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16* %A)
+  %tmp = getelementptr i16* %A, i32 2
+  store i16* %tmp, i16** %ptr
+  ret { <4 x i16>, <4 x i16> } %ld2
+}
+
+define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2r(i16* %A, i16** %ptr, i64 %inc) nounwind {
+;CHECK-LABEL: test_v4i16_post_reg_ld2r:
+;CHECK: ld2r.4h { v0, v1 }, [x0], x{{[0-9]+}}
+  %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16* %A)
+  %tmp = getelementptr i16* %A, i64 %inc
+  store i16* %tmp, i16** %ptr
+  ret { <4 x i16>, <4 x i16> } %ld2
+}
+
+declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16*) nounwind readonly
+
+
+define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2r(i32* %A, i32** %ptr) nounwind {
+;CHECK-LABEL: test_v4i32_post_imm_ld2r:
+;CHECK: ld2r.4s { v0, v1 }, [x0], #8
+  %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32* %A)
+  %tmp = getelementptr i32* %A, i32 2
+  store i32* %tmp, i32** %ptr
+  ret { <4 x i32>, <4 x i32> } %ld2
+}
+
+define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2r(i32* %A, i32** %ptr, i64 %inc) nounwind {
+;CHECK-LABEL: test_v4i32_post_reg_ld2r:
+;CHECK: ld2r.4s { v0, v1 }, [x0], x{{[0-9]+}}
+  %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32* %A)
+  %tmp = getelementptr i32* %A, i64 %inc
+  store i32* %tmp, i32** %ptr
+  ret { <4 x i32>, <4 x i32> } %ld2
+}
+
+declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32*) nounwind readonly
+
+define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2r(i32* %A, i32** %ptr) nounwind {
+;CHECK-LABEL: test_v2i32_post_imm_ld2r:
+;CHECK: ld2r.2s { v0, v1 }, [x0], #8
+  %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32* %A)
+  %tmp = getelementptr i32* %A, i32 2
+  store i32* %tmp, i32** %ptr
+  ret { <2 x i32>, <2 x i32> } %ld2
+}
+
+define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2r(i32* %A, i32** %ptr, i64 %inc) nounwind {
+;CHECK-LABEL: test_v2i32_post_reg_ld2r:
+;CHECK: ld2r.2s { v0, v1 }, [x0], x{{[0-9]+}}
+  %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32* %A)
+  %tmp = getelementptr i32* %A, i64 %inc
+  store i32* %tmp, i32** %ptr
+  ret { <2 x i32>, <2 x i32> } %ld2
+}
+
+declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32*) nounwind readonly
+
+
+define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2r(i64* %A, i64** %ptr) nounwind {
+;CHECK-LABEL: test_v2i64_post_imm_ld2r:
+;CHECK: ld2r.2d { v0, v1 }, [x0], #16
+  %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64* %A)
+  %tmp = getelementptr i64* %A, i32 2
+  store i64* %tmp, i64** %ptr
+  ret { <2 x i64>, <2 x i64> } %ld2
+}
+
+define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2r(i64* %A, i64** %ptr, i64 %inc) nounwind {
+;CHECK-LABEL: test_v2i64_post_reg_ld2r:
+;CHECK: ld2r.2d { v0, v1 }, [x0], x{{[0-9]+}}
+  %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64* %A)
+  %tmp = getelementptr i64* %A, i64 %inc
+  store i64* %tmp, i64** %ptr
+  ret { <2 x i64>, <2 x i64> } %ld2
+}
+
+declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64*) nounwind readonly
+
+define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2r(i64* %A, i64** %ptr) nounwind {
+;CHECK-LABEL: test_v1i64_post_imm_ld2r:
+;CHECK: ld2r.1d { v0, v1 }, [x0], #16
+  %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64* %A)
+  %tmp = getelementptr i64* %A, i32 2
+  store i64* %tmp, i64** %ptr
+  ret { <1 x i64>, <1 x i64> } %ld2
+}
+
+define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2r(i64* %A, i64** %ptr, i64 %inc) nounwind {
+;CHECK-LABEL: test_v1i64_post_reg_ld2r:
+;CHECK: ld2r.1d { v0, v1 }, [x0], x{{[0-9]+}}
+  %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64* %A)
+  %tmp = getelementptr i64* %A, i64 %inc
+  store i64* %tmp, i64** %ptr
+  ret { <1 x i64>, <1 x i64> } %ld2
+}
+
+declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64*) nounwind readonly
+
+
+define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2r(float* %A, float** %ptr) nounwind {
+;CHECK-LABEL: test_v4f32_post_imm_ld2r:
+;CHECK: ld2r.4s { v0, v1 }, [x0], #8
+  %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2r.v4f32.p0f32(float* %A)
+  %tmp = getelementptr float* %A, i32 2
+  store float* %tmp, float** %ptr
+  ret { <4 x float>, <4 x float> } %ld2
+}
+
+define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2r(float* %A, float** %ptr, i64 %inc) nounwind {
+;CHECK-LABEL: test_v4f32_post_reg_ld2r:
+;CHECK: ld2r.4s { v0, v1 }, [x0], x{{[0-9]+}}
+  %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2r.v4f32.p0f32(float* %A)
+  %tmp = getelementptr float* %A, i64 %inc
+  store float* %tmp, float** %ptr
+  ret { <4 x float>, <4 x float> } %ld2
+}
+
+declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2r.v4f32.p0f32(float*) nounwind readonly
+
+define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2r(float* %A, float** %ptr) nounwind {
+;CHECK-LABEL: test_v2f32_post_imm_ld2r:
+;CHECK: ld2r.2s { v0, v1 }, [x0], #8
+  %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2r.v2f32.p0f32(float* %A)
+  %tmp = getelementptr float* %A, i32 2
+  store float* %tmp, float** %ptr
+  ret { <2 x float>, <2 x float> } %ld2
+}
+
+define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2r(float* %A, float** %ptr, i64 %inc) nounwind {
+;CHECK-LABEL: test_v2f32_post_reg_ld2r:
+;CHECK: ld2r.2s { v0, v1 }, [x0], x{{[0-9]+}}
+  %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2r.v2f32.p0f32(float* %A)
+  %tmp = getelementptr float* %A, i64 %inc
+  store float* %tmp, float** %ptr
+  ret { <2 x float>, <2 x float> } %ld2
+}
+
+declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2r.v2f32.p0f32(float*) nounwind readonly
+
+
+define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2r(double* %A, double** %ptr) nounwind {
+;CHECK-LABEL: test_v2f64_post_imm_ld2r:
+;CHECK: ld2r.2d { v0, v1 }, [x0], #16
+  %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2r.v2f64.p0f64(double* %A)
+  %tmp = getelementptr double* %A, i32 2
+  store double* %tmp, double** %ptr
+  ret { <2 x double>, <2 x double> } %ld2
+}
+
+define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2r(double* %A, double** %ptr, i64 %inc) nounwind {
+;CHECK-LABEL: test_v2f64_post_reg_ld2r:
+;CHECK: ld2r.2d { v0, v1 }, [x0], x{{[0-9]+}}
+  %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2r.v2f64.p0f64(double* %A)
+  %tmp = getelementptr double* %A, i64 %inc
+  store double* %tmp, double** %ptr
+  ret { <2 x double>, <2 x double> } %ld2
+}
+
+declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2r.v2f64.p0f64(double*) nounwind readonly
+
+define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2r(double* %A, double** %ptr) nounwind {
+;CHECK-LABEL: test_v1f64_post_imm_ld2r:
+;CHECK: ld2r.1d { v0, v1 }, [x0], #16
+  %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2r.v1f64.p0f64(double* %A)
+  %tmp = getelementptr double* %A, i32 2
+  store double* %tmp, double** %ptr
+  ret { <1 x double>, <1 x double> } %ld2
+}
+
+define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2r(double* %A, double** %ptr, i64 %inc) nounwind {
+;CHECK-LABEL: test_v1f64_post_reg_ld2r:
+;CHECK: ld2r.1d { v0, v1 }, [x0], x{{[0-9]+}}
+  %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2r.v1f64.p0f64(double* %A)
+  %tmp = getelementptr double* %A, i64 %inc
+  store double* %tmp, double** %ptr
+  ret { <1 x double>, <1 x double> } %ld2
+}
+
+declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2r.v1f64.p0f64(double*) nounwind readonly
+
+
+define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3r(i8* %A, i8** %ptr) nounwind {
+;CHECK-LABEL: test_v16i8_post_imm_ld3r:
+;CHECK: ld3r.16b { v0, v1, v2 }, [x0], #3
+  %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8* %A)
+  %tmp = getelementptr i8* %A, i32 3
+  store i8* %tmp, i8** %ptr
+  ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
+}
+
+define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3r(i8* %A, i8** %ptr, i64 %inc) nounwind {
+;CHECK-LABEL: test_v16i8_post_reg_ld3r:
+;CHECK: ld3r.16b { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8* %A)
+  %tmp = getelementptr i8* %A, i64 %inc
+  store i8* %tmp, i8** %ptr
+  ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
+}
+
+declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8*) nounwind readonly
+
+
+define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3r(i8* %A, i8** %ptr) nounwind {
+;CHECK-LABEL: test_v8i8_post_imm_ld3r:
+;CHECK: ld3r.8b { v0, v1, v2 }, [x0], #3
+  %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8* %A)
+  %tmp = getelementptr i8* %A, i32 3
+  store i8* %tmp, i8** %ptr
+  ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
+}
+
+define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3r(i8* %A, i8** %ptr, i64 %inc) nounwind {
+;CHECK-LABEL: test_v8i8_post_reg_ld3r:
+;CHECK: ld3r.8b { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8* %A)
+  %tmp = getelementptr i8* %A, i64 %inc
+  store i8* %tmp, i8** %ptr
+  ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
+}
+
+declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8*) nounwind readonly
+
+
+define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3r(i16* %A, i16** %ptr) nounwind {
+;CHECK-LABEL: test_v8i16_post_imm_ld3r:
+;CHECK: ld3r.8h { v0, v1, v2 }, [x0], #6
+  %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16* %A)
+  %tmp = getelementptr i16* %A, i32 3
+  store i16* %tmp, i16** %ptr
+  ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
+}
+
+define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3r(i16* %A, i16** %ptr, i64 %inc) nounwind {
+;CHECK-LABEL: test_v8i16_post_reg_ld3r:
+;CHECK: ld3r.8h { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16* %A)
+  %tmp = getelementptr i16* %A, i64 %inc
+  store i16* %tmp, i16** %ptr
+  ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
+}
+
+declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16*) nounwind readonly
+
+
+define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3r(i16* %A, i16** %ptr) nounwind {
+;CHECK-LABEL: test_v4i16_post_imm_ld3r:
+;CHECK: ld3r.4h { v0, v1, v2 }, [x0], #6
+  %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16* %A)
+  %tmp = getelementptr i16* %A, i32 3
+  store i16* %tmp, i16** %ptr
+  ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
+}
+
+define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3r(i16* %A, i16** %ptr, i64 %inc) nounwind {
+;CHECK-LABEL: test_v4i16_post_reg_ld3r:
+;CHECK: ld3r.4h { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16* %A)
+  %tmp = getelementptr i16* %A, i64 %inc
+  store i16* %tmp, i16** %ptr
+  ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
+}
+
+declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16*) nounwind readonly
+
+
+define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3r(i32* %A, i32** %ptr) nounwind {
+;CHECK-LABEL: test_v4i32_post_imm_ld3r:
+;CHECK: ld3r.4s { v0, v1, v2 }, [x0], #12
+  %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32* %A)
+  %tmp = getelementptr i32* %A, i32 3
+  store i32* %tmp, i32** %ptr
+  ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
+}
+
+define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3r(i32* %A, i32** %ptr, i64 %inc) nounwind {
+;CHECK-LABEL: test_v4i32_post_reg_ld3r:
+;CHECK: ld3r.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32* %A)
+  %tmp = getelementptr i32* %A, i64 %inc
+  store i32* %tmp, i32** %ptr
+  ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
+}
+
+declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32*) nounwind readonly
+
+define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3r(i32* %A, i32** %ptr) nounwind {
+;CHECK-LABEL: test_v2i32_post_imm_ld3r:
+;CHECK: ld3r.2s { v0, v1, v2 }, [x0], #12
+  %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32* %A)
+  %tmp = getelementptr i32* %A, i32 3
+  store i32* %tmp, i32** %ptr
+  ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
+}
+
+define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3r(i32* %A, i32** %ptr, i64 %inc) nounwind {
+;CHECK-LABEL: test_v2i32_post_reg_ld3r:
+;CHECK: ld3r.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32* %A)
+  %tmp = getelementptr i32* %A, i64 %inc
+  store i32* %tmp, i32** %ptr
+  ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
+}
+
+declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32*) nounwind readonly
+
+
+define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3r(i64* %A, i64** %ptr) nounwind {
+;CHECK-LABEL: test_v2i64_post_imm_ld3r:
+;CHECK: ld3r.2d { v0, v1, v2 }, [x0], #24
+  %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64* %A)
+  %tmp = getelementptr i64* %A, i32 3
+  store i64* %tmp, i64** %ptr
+  ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
+}
+
+define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3r(i64* %A, i64** %ptr, i64 %inc) nounwind {
+;CHECK-LABEL: test_v2i64_post_reg_ld3r:
+;CHECK: ld3r.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64* %A)
+  %tmp = getelementptr i64* %A, i64 %inc
+  store i64* %tmp, i64** %ptr
+  ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
+}
+
+declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64*) nounwind readonly
+
+define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3r(i64* %A, i64** %ptr) nounwind {
+;CHECK-LABEL: test_v1i64_post_imm_ld3r:
+;CHECK: ld3r.1d { v0, v1, v2 }, [x0], #24
+  %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64* %A)
+  %tmp = getelementptr i64* %A, i32 3
+  store i64* %tmp, i64** %ptr
+  ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
+}
+
+define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3r(i64* %A, i64** %ptr, i64 %inc) nounwind {
+;CHECK-LABEL: test_v1i64_post_reg_ld3r:
+;CHECK: ld3r.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64* %A)
+  %tmp = getelementptr i64* %A, i64 %inc
+  store i64* %tmp, i64** %ptr
+  ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
+}
+
+declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64*) nounwind readonly
+
+
+define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3r(float* %A, float** %ptr) nounwind {
+;CHECK-LABEL: test_v4f32_post_imm_ld3r:
+;CHECK: ld3r.4s { v0, v1, v2 }, [x0], #12
+  %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3r.v4f32.p0f32(float* %A)
+  %tmp = getelementptr float* %A, i32 3
+  store float* %tmp, float** %ptr
+  ret { <4 x float>, <4 x float>, <4 x float> } %ld3
+}
+
+define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3r(float* %A, float** %ptr, i64 %inc) nounwind {
+;CHECK-LABEL: test_v4f32_post_reg_ld3r:
+;CHECK: ld3r.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3r.v4f32.p0f32(float* %A)
+  %tmp = getelementptr float* %A, i64 %inc
+  store float* %tmp, float** %ptr
+  ret { <4 x float>, <4 x float>, <4 x float> } %ld3
+}
+
+declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3r.v4f32.p0f32(float*) nounwind readonly
+
+define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3r(float* %A, float** %ptr) nounwind {
+;CHECK-LABEL: test_v2f32_post_imm_ld3r:
+;CHECK: ld3r.2s { v0, v1, v2 }, [x0], #12
+  %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3r.v2f32.p0f32(float* %A)
+  %tmp = getelementptr float* %A, i32 3
+  store float* %tmp, float** %ptr
+  ret { <2 x float>, <2 x float>, <2 x float> } %ld3
+}
+
+define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3r(float* %A, float** %ptr, i64 %inc) nounwind {
+;CHECK-LABEL: test_v2f32_post_reg_ld3r:
+;CHECK: ld3r.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3r.v2f32.p0f32(float* %A)
+  %tmp = getelementptr float* %A, i64 %inc
+  store float* %tmp, float** %ptr
+  ret { <2 x float>, <2 x float>, <2 x float> } %ld3
+}
+
+declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3r.v2f32.p0f32(float*) nounwind readonly
+
+
+define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3r(double* %A, double** %ptr) nounwind {
+;CHECK-LABEL: test_v2f64_post_imm_ld3r:
+;CHECK: ld3r.2d { v0, v1, v2 }, [x0], #24
+  %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3r.v2f64.p0f64(double* %A)
+  %tmp = getelementptr double* %A, i32 3
+  store double* %tmp, double** %ptr
+  ret { <2 x double>, <2 x double>, <2 x double> } %ld3
+}
+
+define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3r(double* %A, double** %ptr, i64 %inc) nounwind {
+;CHECK-LABEL: test_v2f64_post_reg_ld3r:
+;CHECK: ld3r.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3r.v2f64.p0f64(double* %A)
+  %tmp = getelementptr double* %A, i64 %inc
+  store double* %tmp, double** %ptr
+  ret { <2 x double>, <2 x double>, <2 x double> } %ld3
+}
+
+declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3r.v2f64.p0f64(double*) nounwind readonly
+
+define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3r(double* %A, double** %ptr) nounwind {
+;CHECK-LABEL: test_v1f64_post_imm_ld3r:
+;CHECK: ld3r.1d { v0, v1, v2 }, [x0], #24
+  %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3r.v1f64.p0f64(double* %A)
+  %tmp = getelementptr double* %A, i32 3
+  store double* %tmp, double** %ptr
+  ret { <1 x double>, <1 x double>, <1 x double> } %ld3
+}
+
+define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3r(double* %A, double** %ptr, i64 %inc) nounwind {
+;CHECK-LABEL: test_v1f64_post_reg_ld3r:
+;CHECK: ld3r.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3r.v1f64.p0f64(double* %A)
+  %tmp = getelementptr double* %A, i64 %inc
+  store double* %tmp, double** %ptr
+  ret { <1 x double>, <1 x double>, <1 x double> } %ld3
+}
+
+declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3r.v1f64.p0f64(double*) nounwind readonly
+
+
+define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4r(i8* %A, i8** %ptr) nounwind {
+;CHECK-LABEL: test_v16i8_post_imm_ld4r:
+;CHECK: ld4r.16b { v0, v1, v2, v3 }, [x0], #4
+  %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8* %A)
+  %tmp = getelementptr i8* %A, i32 4
+  store i8* %tmp, i8** %ptr
+  ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
+}
+
+define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4r(i8* %A, i8** %ptr, i64 %inc) nounwind {
+;CHECK-LABEL: test_v16i8_post_reg_ld4r:
+;CHECK: ld4r.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8* %A)
+  %tmp = getelementptr i8* %A, i64 %inc
+  store i8* %tmp, i8** %ptr
+  ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
+}
+
+declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8*) nounwind readonly
+
+
+define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4r(i8* %A, i8** %ptr) nounwind {
+;CHECK-LABEL: test_v8i8_post_imm_ld4r:
+;CHECK: ld4r.8b { v0, v1, v2, v3 }, [x0], #4
+  %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8* %A)
+  %tmp = getelementptr i8* %A, i32 4
+  store i8* %tmp, i8** %ptr
+  ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
+}
+
+define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4r(i8* %A, i8** %ptr, i64 %inc) nounwind {
+;CHECK-LABEL: test_v8i8_post_reg_ld4r:
+;CHECK: ld4r.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8* %A)
+  %tmp = getelementptr i8* %A, i64 %inc
+  store i8* %tmp, i8** %ptr
+  ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
+}
+
+declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8*) nounwind readonly
+
+
+define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4r(i16* %A, i16** %ptr) nounwind {
+;CHECK-LABEL: test_v8i16_post_imm_ld4r:
+;CHECK: ld4r.8h { v0, v1, v2, v3 }, [x0], #8
+  %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16* %A)
+  %tmp = getelementptr i16* %A, i32 4
+  store i16* %tmp, i16** %ptr
+  ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
+}
+
+define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4r(i16* %A, i16** %ptr, i64 %inc) nounwind {
+;CHECK-LABEL: test_v8i16_post_reg_ld4r:
+;CHECK: ld4r.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16* %A)
+  %tmp = getelementptr i16* %A, i64 %inc
+  store i16* %tmp, i16** %ptr
+  ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
+}
+
+declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16*) nounwind readonly
+
+
+define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4r(i16* %A, i16** %ptr) nounwind {
+;CHECK-LABEL: test_v4i16_post_imm_ld4r:
+;CHECK: ld4r.4h { v0, v1, v2, v3 }, [x0], #8
+  %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16* %A)
+  %tmp = getelementptr i16* %A, i32 4
+  store i16* %tmp, i16** %ptr
+  ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
+}
+
+define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4r(i16* %A, i16** %ptr, i64 %inc) nounwind {
+;CHECK-LABEL: test_v4i16_post_reg_ld4r:
+;CHECK: ld4r.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16* %A)
+  %tmp = getelementptr i16* %A, i64 %inc
+  store i16* %tmp, i16** %ptr
+  ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
+}
+
+declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16*) nounwind readonly
+
+
+define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4r(i32* %A, i32** %ptr) nounwind {
+;CHECK-LABEL: test_v4i32_post_imm_ld4r:
+;CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0], #16
+  %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32* %A)
+  %tmp = getelementptr i32* %A, i32 4
+  store i32* %tmp, i32** %ptr
+  ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
+}
+
+define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4r(i32* %A, i32** %ptr, i64 %inc) nounwind {
+;CHECK-LABEL: test_v4i32_post_reg_ld4r:
+;CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32* %A)
+  %tmp = getelementptr i32* %A, i64 %inc
+  store i32* %tmp, i32** %ptr
+  ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
+}
+
+declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32*) nounwind readonly
+
+define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4r(i32* %A, i32** %ptr) nounwind {
+;CHECK-LABEL: test_v2i32_post_imm_ld4r:
+;CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0], #16
+  %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32* %A)
+  %tmp = getelementptr i32* %A, i32 4
+  store i32* %tmp, i32** %ptr
+  ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
+}
+
+define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4r(i32* %A, i32** %ptr, i64 %inc) nounwind {
+;CHECK-LABEL: test_v2i32_post_reg_ld4r:
+;CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32* %A)
+  %tmp = getelementptr i32* %A, i64 %inc
+  store i32* %tmp, i32** %ptr
+  ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
+}
+
+declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32*) nounwind readonly
+
+
+define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4r(i64* %A, i64** %ptr) nounwind {
+;CHECK-LABEL: test_v2i64_post_imm_ld4r:
+;CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0], #32
+  %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64* %A)
+  %tmp = getelementptr i64* %A, i32 4
+  store i64* %tmp, i64** %ptr
+  ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
+}
+
+define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4r(i64* %A, i64** %ptr, i64 %inc) nounwind {
+;CHECK-LABEL: test_v2i64_post_reg_ld4r:
+;CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64* %A)
+  %tmp = getelementptr i64* %A, i64 %inc
+  store i64* %tmp, i64** %ptr
+  ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
+}
+
+declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64*) nounwind readonly
+
+define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4r(i64* %A, i64** %ptr) nounwind {
+;CHECK-LABEL: test_v1i64_post_imm_ld4r:
+;CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0], #32
+  %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64* %A)
+  %tmp = getelementptr i64* %A, i32 4
+  store i64* %tmp, i64** %ptr
+  ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
+}
+
+define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4r(i64* %A, i64** %ptr, i64 %inc) nounwind {
+;CHECK-LABEL: test_v1i64_post_reg_ld4r:
+;CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64* %A)
+  %tmp = getelementptr i64* %A, i64 %inc
+  store i64* %tmp, i64** %ptr
+  ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
+}
+
+declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64*) nounwind readonly
+
+
+define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld4r(float* %A, float** %ptr) nounwind {
+;CHECK-LABEL: test_v4f32_post_imm_ld4r:
+;CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0], #16
+  %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4r.v4f32.p0f32(float* %A)
+  %tmp = getelementptr float* %A, i32 4
+  store float* %tmp, float** %ptr
+  ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
+}
+
+define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld4r(float* %A, float** %ptr, i64 %inc) nounwind {
+;CHECK-LABEL: test_v4f32_post_reg_ld4r:
+;CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4r.v4f32.p0f32(float* %A)
+  %tmp = getelementptr float* %A, i64 %inc
+  store float* %tmp, float** %ptr
+  ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
+}
+
+declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4r.v4f32.p0f32(float*) nounwind readonly
+
+define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld4r(float* %A, float** %ptr) nounwind {
+;CHECK-LABEL: test_v2f32_post_imm_ld4r:
+;CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0], #16
+  %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4r.v2f32.p0f32(float* %A)
+  %tmp = getelementptr float* %A, i32 4
+  store float* %tmp, float** %ptr
+  ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
+}
+
+define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld4r(float* %A, float** %ptr, i64 %inc) nounwind {
+;CHECK-LABEL: test_v2f32_post_reg_ld4r:
+;CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4r.v2f32.p0f32(float* %A)
+  %tmp = getelementptr float* %A, i64 %inc
+  store float* %tmp, float** %ptr
+  ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
+}
+
+declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4r.v2f32.p0f32(float*) nounwind readonly
+
+
+define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld4r(double* %A, double** %ptr) nounwind {
+;CHECK-LABEL: test_v2f64_post_imm_ld4r:
+;CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0], #32
+  %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4r.v2f64.p0f64(double* %A)
+  %tmp = getelementptr double* %A, i32 4
+  store double* %tmp, double** %ptr
+  ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
+}
+
+define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld4r(double* %A, double** %ptr, i64 %inc) nounwind {
+;CHECK-LABEL: test_v2f64_post_reg_ld4r:
+;CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4r.v2f64.p0f64(double* %A)
+  %tmp = getelementptr double* %A, i64 %inc
+  store double* %tmp, double** %ptr
+  ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
+}
+
+declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4r.v2f64.p0f64(double*) nounwind readonly
+
+define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld4r(double* %A, double** %ptr) nounwind {
+;CHECK-LABEL: test_v1f64_post_imm_ld4r:
+;CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0], #32
+  %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4r.v1f64.p0f64(double* %A)
+  %tmp = getelementptr double* %A, i32 4
+  store double* %tmp, double** %ptr
+  ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
+}
+
+define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld4r(double* %A, double** %ptr, i64 %inc) nounwind {
+;CHECK-LABEL: test_v1f64_post_reg_ld4r:
+;CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4r.v1f64.p0f64(double* %A)
+  %tmp = getelementptr double* %A, i64 %inc
+  store double* %tmp, double** %ptr
+  ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
+}
+
+declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4r.v1f64.p0f64(double*) nounwind readonly
+
+
+define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
+;CHECK-LABEL: test_v16i8_post_imm_ld2lane:
+;CHECK: ld2.b { v0, v1 }[0], [x0], #2
+  %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A)
+  %tmp = getelementptr i8* %A, i32 2
+  store i8* %tmp, i8** %ptr
+  ret { <16 x i8>, <16 x i8> } %ld2
+}
+
+define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2lane(i8* %A, i8** %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C) nounwind {
+;CHECK-LABEL: test_v16i8_post_reg_ld2lane:
+;CHECK: ld2.b { v0, v1 }[0], [x0], x{{[0-9]+}}
+  %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A)
+  %tmp = getelementptr i8* %A, i64 %inc
+  store i8* %tmp, i8** %ptr
+  ret { <16 x i8>, <16 x i8> } %ld2
+}
+
+declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8*) nounwind readonly
+
+
+define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
+;CHECK-LABEL: test_v8i8_post_imm_ld2lane:
+;CHECK: ld2.b { v0, v1 }[0], [x0], #2
+  %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A)
+  %tmp = getelementptr i8* %A, i32 2
+  store i8* %tmp, i8** %ptr
+  ret { <8 x i8>, <8 x i8> } %ld2
+}
+
+define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2lane(i8* %A, i8** %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C) nounwind {
+;CHECK-LABEL: test_v8i8_post_reg_ld2lane:
+;CHECK: ld2.b { v0, v1 }[0], [x0], x{{[0-9]+}}
+  %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A)
+  %tmp = getelementptr i8* %A, i64 %inc
+  store i8* %tmp, i8** %ptr
+  ret { <8 x i8>, <8 x i8> } %ld2
+}
+
+declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0i8(<8 x i8>, <8 x i8>, i64, i8*) nounwind readonly
+
+
+define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
+;CHECK-LABEL: test_v8i16_post_imm_ld2lane:
+;CHECK: ld2.h { v0, v1 }[0], [x0], #4
+  %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A)
+  %tmp = getelementptr i16* %A, i32 2
+  store i16* %tmp, i16** %ptr
+  ret { <8 x i16>, <8 x i16> } %ld2
+}
+
+define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2lane(i16* %A, i16** %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C) nounwind {
+;CHECK-LABEL: test_v8i16_post_reg_ld2lane:
+;CHECK: ld2.h { v0, v1 }[0], [x0], x{{[0-9]+}}
+  %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A)
+  %tmp = getelementptr i16* %A, i64 %inc
+  store i16* %tmp, i16** %ptr
+  ret { <8 x i16>, <8 x i16> } %ld2
+}
+
+declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i16*) nounwind readonly
+
+
+define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
+;CHECK-LABEL: test_v4i16_post_imm_ld2lane:
+;CHECK: ld2.h { v0, v1 }[0], [x0], #4
+  %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A)
+  %tmp = getelementptr i16* %A, i32 2
+  store i16* %tmp, i16** %ptr
+  ret { <4 x i16>, <4 x i16> } %ld2
+}
+
+define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2lane(i16* %A, i16** %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C) nounwind {
+;CHECK-LABEL: test_v4i16_post_reg_ld2lane:
+;CHECK: ld2.h { v0, v1 }[0], [x0], x{{[0-9]+}}
+  %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A)
+  %tmp = getelementptr i16* %A, i64 %inc
+  store i16* %tmp, i16** %ptr
+  ret { <4 x i16>, <4 x i16> } %ld2
+}
+
+declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0i16(<4 x i16>, <4 x i16>, i64, i16*) nounwind readonly
+
+
+define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
+;CHECK-LABEL: test_v4i32_post_imm_ld2lane:
+;CHECK: ld2.s { v0, v1 }[0], [x0], #8
+  %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A)
+  %tmp = getelementptr i32* %A, i32 2
+  store i32* %tmp, i32** %ptr
+  ret { <4 x i32>, <4 x i32> } %ld2
+}
+
+define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2lane(i32* %A, i32** %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C) nounwind {
+;CHECK-LABEL: test_v4i32_post_reg_ld2lane:
+;CHECK: ld2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
+  %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A)
+  %tmp = getelementptr i32* %A, i64 %inc
+  store i32* %tmp, i32** %ptr
+  ret { <4 x i32>, <4 x i32> } %ld2
+}
+
+declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*) nounwind readonly
+
+
+define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
+;CHECK-LABEL: test_v2i32_post_imm_ld2lane:
+;CHECK: ld2.s { v0, v1 }[0], [x0], #8
+  %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A)
+  %tmp = getelementptr i32* %A, i32 2
+  store i32* %tmp, i32** %ptr
+  ret { <2 x i32>, <2 x i32> } %ld2
+}
+
+define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2lane(i32* %A, i32** %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C) nounwind {
+;CHECK-LABEL: test_v2i32_post_reg_ld2lane:
+;CHECK: ld2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
+  %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A)
+  %tmp = getelementptr i32* %A, i64 %inc
+  store i32* %tmp, i32** %ptr
+  ret { <2 x i32>, <2 x i32> } %ld2
+}
+
+declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0i32(<2 x i32>, <2 x i32>, i64, i32*) nounwind readonly
+
+
+define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
+;CHECK-LABEL: test_v2i64_post_imm_ld2lane:
+;CHECK: ld2.d { v0, v1 }[0], [x0], #16
+  %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A)
+  %tmp = getelementptr i64* %A, i32 2
+  store i64* %tmp, i64** %ptr
+  ret { <2 x i64>, <2 x i64> } %ld2
+}
+
+define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2lane(i64* %A, i64** %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C) nounwind {
+;CHECK-LABEL: test_v2i64_post_reg_ld2lane:
+;CHECK: ld2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
+  %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A)
+  %tmp = getelementptr i64* %A, i64 %inc
+  store i64* %tmp, i64** %ptr
+  ret { <2 x i64>, <2 x i64> } %ld2
+}
+
+declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*) nounwind readonly
+
+
+define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
+;CHECK-LABEL: test_v1i64_post_imm_ld2lane:
+;CHECK: ld2.d { v0, v1 }[0], [x0], #16
+  %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A)
+  %tmp = getelementptr i64* %A, i32 2
+  store i64* %tmp, i64** %ptr
+  ret { <1 x i64>, <1 x i64> } %ld2
+}
+
+define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2lane(i64* %A, i64** %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C) nounwind {
+;CHECK-LABEL: test_v1i64_post_reg_ld2lane:
+;CHECK: ld2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
+  %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A)
+  %tmp = getelementptr i64* %A, i64 %inc
+  store i64* %tmp, i64** %ptr
+  ret { <1 x i64>, <1 x i64> } %ld2
+}
+
+declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i64(<1 x i64>, <1 x i64>, i64, i64*) nounwind readonly
+
+
+define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C) nounwind {
+;CHECK-LABEL: test_v4f32_post_imm_ld2lane:
+;CHECK: ld2.s { v0, v1 }[0], [x0], #8
+  %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A)
+  %tmp = getelementptr float* %A, i32 2
+  store float* %tmp, float** %ptr
+  ret { <4 x float>, <4 x float> } %ld2
+}
+
+define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2lane(float* %A, float** %ptr, i64 %inc, <4 x float> %B, <4 x float> %C) nounwind {
+;CHECK-LABEL: test_v4f32_post_reg_ld2lane:
+;CHECK: ld2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
+  %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A)
+  %tmp = getelementptr float* %A, i64 %inc
+  store float* %tmp, float** %ptr
+  ret { <4 x float>, <4 x float> } %ld2
+}
+
+declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0f32(<4 x float>, <4 x float>, i64, float*) nounwind readonly
+
+
+define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C) nounwind {
+;CHECK-LABEL: test_v2f32_post_imm_ld2lane:
+;CHECK: ld2.s { v0, v1 }[0], [x0], #8
+  %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A)
+  %tmp = getelementptr float* %A, i32 2
+  store float* %tmp, float** %ptr
+  ret { <2 x float>, <2 x float> } %ld2
+}
+
+define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2lane(float* %A, float** %ptr, i64 %inc, <2 x float> %B, <2 x float> %C) nounwind {
+;CHECK-LABEL: test_v2f32_post_reg_ld2lane:
+;CHECK: ld2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
+  %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A)
+  %tmp = getelementptr float* %A, i64 %inc
+  store float* %tmp, float** %ptr
+  ret { <2 x float>, <2 x float> } %ld2
+}
+
+declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0f32(<2 x float>, <2 x float>, i64, float*) nounwind readonly
+
+
+define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C) nounwind {
+;CHECK-LABEL: test_v2f64_post_imm_ld2lane:
+;CHECK: ld2.d { v0, v1 }[0], [x0], #16
+  %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A)
+  %tmp = getelementptr double* %A, i32 2
+  store double* %tmp, double** %ptr
+  ret { <2 x double>, <2 x double> } %ld2
+}
+
+define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2lane(double* %A, double** %ptr, i64 %inc, <2 x double> %B, <2 x double> %C) nounwind {
+;CHECK-LABEL: test_v2f64_post_reg_ld2lane:
+;CHECK: ld2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
+  %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A)
+  %tmp = getelementptr double* %A, i64 %inc
+  store double* %tmp, double** %ptr
+  ret { <2 x double>, <2 x double> } %ld2
+}
+
+declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0f64(<2 x double>, <2 x double>, i64, double*) nounwind readonly
+
+
+define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C) nounwind {
+;CHECK-LABEL: test_v1f64_post_imm_ld2lane:
+;CHECK: ld2.d { v0, v1 }[0], [x0], #16
+  %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A)
+  %tmp = getelementptr double* %A, i32 2
+  store double* %tmp, double** %ptr
+  ret { <1 x double>, <1 x double> } %ld2
+}
+
+define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2lane(double* %A, double** %ptr, i64 %inc, <1 x double> %B, <1 x double> %C) nounwind {
+;CHECK-LABEL: test_v1f64_post_reg_ld2lane:
+;CHECK: ld2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
+  %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A)
+  %tmp = getelementptr double* %A, i64 %inc
+  store double* %tmp, double** %ptr
+  ret { <1 x double>, <1 x double> } %ld2
+}
+
+declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0f64(<1 x double>, <1 x double>, i64, double*) nounwind readonly
+
+
+define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
+;CHECK-LABEL: test_v16i8_post_imm_ld3lane:
+;CHECK: ld3.b { v0, v1, v2 }[0], [x0], #3
+  %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A)
+  %tmp = getelementptr i8* %A, i32 3
+  store i8* %tmp, i8** %ptr
+  ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
+}
+
+define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3lane(i8* %A, i8** %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
+;CHECK-LABEL: test_v16i8_post_reg_ld3lane:
+;CHECK: ld3.b { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
+  %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A)
+  %tmp = getelementptr i8* %A, i64 %inc
+  store i8* %tmp, i8** %ptr
+  ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
+}
+
+declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readonly
+
+
+define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
+;CHECK-LABEL: test_v8i8_post_imm_ld3lane:
+;CHECK: ld3.b { v0, v1, v2 }[0], [x0], #3
+  %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A)
+  %tmp = getelementptr i8* %A, i32 3
+  store i8* %tmp, i8** %ptr
+  ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
+}
+
+define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3lane(i8* %A, i8** %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
+;CHECK-LABEL: test_v8i8_post_reg_ld3lane:
+;CHECK: ld3.b { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
+  %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A)
+  %tmp = getelementptr i8* %A, i64 %inc
+  store i8* %tmp, i8** %ptr
+  ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
+}
+
+declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i64, i8*) nounwind readonly
+
+
+define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
+;CHECK-LABEL: test_v8i16_post_imm_ld3lane:
+;CHECK: ld3.h { v0, v1, v2 }[0], [x0], #6
+  %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A)
+  %tmp = getelementptr i16* %A, i32 3
+  store i16* %tmp, i16** %ptr
+  ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
+}
+
+define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3lane(i16* %A, i16** %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
+;CHECK-LABEL: test_v8i16_post_reg_ld3lane:
+;CHECK: ld3.h { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
+  %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A)
+  %tmp = getelementptr i16* %A, i64 %inc
+  store i16* %tmp, i16** %ptr
+  ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
+}
+
+declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readonly
+
+
+define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
+;CHECK-LABEL: test_v4i16_post_imm_ld3lane:
+;CHECK: ld3.h { v0, v1, v2 }[0], [x0], #6
+  %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A)
+  %tmp = getelementptr i16* %A, i32 3
+  store i16* %tmp, i16** %ptr
+  ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
+}
+
+define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3lane(i16* %A, i16** %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
+;CHECK-LABEL: test_v4i16_post_reg_ld3lane:
+;CHECK: ld3.h { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
+  %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A)
+  %tmp = getelementptr i16* %A, i64 %inc
+  store i16* %tmp, i16** %ptr
+  ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
+}
+
+declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i64, i16*) nounwind readonly
+
+
+define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
+;CHECK-LABEL: test_v4i32_post_imm_ld3lane:
+;CHECK: ld3.s { v0, v1, v2 }[0], [x0], #12
+  %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A)
+  %tmp = getelementptr i32* %A, i32 3
+  store i32* %tmp, i32** %ptr
+  ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
+}
+
+define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3lane(i32* %A, i32** %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
+;CHECK-LABEL: test_v4i32_post_reg_ld3lane:
+;CHECK: ld3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
+  %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A)
+  %tmp = getelementptr i32* %A, i64 %inc
+  store i32* %tmp, i32** %ptr
+  ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
+}
+
+declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readonly
+
+
+define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
+;CHECK-LABEL: test_v2i32_post_imm_ld3lane:
+;CHECK: ld3.s { v0, v1, v2 }[0], [x0], #12
+  %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A)
+  %tmp = getelementptr i32* %A, i32 3
+  store i32* %tmp, i32** %ptr
+  ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
+}
+
+define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3lane(i32* %A, i32** %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
+;CHECK-LABEL: test_v2i32_post_reg_ld3lane:
+;CHECK: ld3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
+  %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A)
+  %tmp = getelementptr i32* %A, i64 %inc
+  store i32* %tmp, i32** %ptr
+  ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
+}
+
+declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i64, i32*) nounwind readonly
+
+
+define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
+;CHECK-LABEL: test_v2i64_post_imm_ld3lane:
+;CHECK: ld3.d { v0, v1, v2 }[0], [x0], #24
+  %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A)
+  %tmp = getelementptr i64* %A, i32 3
+  store i64* %tmp, i64** %ptr
+  ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
+}
+
+define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3lane(i64* %A, i64** %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
+;CHECK-LABEL: test_v2i64_post_reg_ld3lane:
+;CHECK: ld3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
+  %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A)
+  %tmp = getelementptr i64* %A, i64 %inc
+  store i64* %tmp, i64** %ptr
+  ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
+}
+
+declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readonly
+
+
+define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
+;CHECK-LABEL: test_v1i64_post_imm_ld3lane:
+;CHECK: ld3.d { v0, v1, v2 }[0], [x0], #24
+  %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A)
+  %tmp = getelementptr i64* %A, i32 3
+  store i64* %tmp, i64** %ptr
+  ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
+}
+
+define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3lane(i64* %A, i64** %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
+;CHECK-LABEL: test_v1i64_post_reg_ld3lane:
+;CHECK: ld3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
+  %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A)
+  %tmp = getelementptr i64* %A, i64 %inc
+  store i64* %tmp, i64** %ptr
+  ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
+}
+
+declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64, i64*) nounwind readonly
+
+
+define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
+;CHECK-LABEL: test_v4f32_post_imm_ld3lane:
+;CHECK: ld3.s { v0, v1, v2 }[0], [x0], #12
+  %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A)
+  %tmp = getelementptr float* %A, i32 3
+  store float* %tmp, float** %ptr
+  ret { <4 x float>, <4 x float>, <4 x float> } %ld3
+}
+
+define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3lane(float* %A, float** %ptr, i64 %inc, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
+;CHECK-LABEL: test_v4f32_post_reg_ld3lane:
+;CHECK: ld3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
+  %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A)
+  %tmp = getelementptr float* %A, i64 %inc
+  store float* %tmp, float** %ptr
+  ret { <4 x float>, <4 x float>, <4 x float> } %ld3
+}
+
+declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, i64, float*) nounwind readonly
+
+
+define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
+;CHECK-LABEL: test_v2f32_post_imm_ld3lane:
+;CHECK: ld3.s { v0, v1, v2 }[0], [x0], #12
+  %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A)
+  %tmp = getelementptr float* %A, i32 3
+  store float* %tmp, float** %ptr
+  ret { <2 x float>, <2 x float>, <2 x float> } %ld3
+}
+
+define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3lane(float* %A, float** %ptr, i64 %inc, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
+;CHECK-LABEL: test_v2f32_post_reg_ld3lane:
+;CHECK: ld3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
+  %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A)
+  %tmp = getelementptr float* %A, i64 %inc
+  store float* %tmp, float** %ptr
+  ret { <2 x float>, <2 x float>, <2 x float> } %ld3
+}
+
+declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, i64, float*) nounwind readonly
+
+
+define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
+;CHECK-LABEL: test_v2f64_post_imm_ld3lane:
+;CHECK: ld3.d { v0, v1, v2 }[0], [x0], #24
+  %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A)
+  %tmp = getelementptr double* %A, i32 3
+  store double* %tmp, double** %ptr
+  ret { <2 x double>, <2 x double>, <2 x double> } %ld3
+}
+
+define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3lane(double* %A, double** %ptr, i64 %inc, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
+;CHECK-LABEL: test_v2f64_post_reg_ld3lane:
+;CHECK: ld3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
+  %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A)
+  %tmp = getelementptr double* %A, i64 %inc
+  store double* %tmp, double** %ptr
+  ret { <2 x double>, <2 x double>, <2 x double> } %ld3
+}
+
+declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, i64, double*) nounwind readonly
+
+
+define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
+;CHECK-LABEL: test_v1f64_post_imm_ld3lane:
+;CHECK: ld3.d { v0, v1, v2 }[0], [x0], #24
+  %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A)
+  %tmp = getelementptr double* %A, i32 3
+  store double* %tmp, double** %ptr
+  ret { <1 x double>, <1 x double>, <1 x double> } %ld3
+}
+
+define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3lane(double* %A, double** %ptr, i64 %inc, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
+;CHECK-LABEL: test_v1f64_post_reg_ld3lane:
+;CHECK: ld3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
+  %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A)
+  %tmp = getelementptr double* %A, i64 %inc
+  store double* %tmp, double** %ptr
+  ret { <1 x double>, <1 x double>, <1 x double> } %ld3
+}
+
+declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, i64, double*) nounwind readonly
+
+
+define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
+;CHECK-LABEL: test_v16i8_post_imm_ld4lane:
+;CHECK: ld4.b { v0, v1, v2, v3 }[0], [x0], #4
+  %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A)
+  %tmp = getelementptr i8* %A, i32 4
+  store i8* %tmp, i8** %ptr
+  ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
+}
+
+define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4lane(i8* %A, i8** %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
+;CHECK-LABEL: test_v16i8_post_reg_ld4lane:
+;CHECK: ld4.b { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
+  %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A)
+  %tmp = getelementptr i8* %A, i64 %inc
+  store i8* %tmp, i8** %ptr
+  ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
+}
+
+declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readonly
+
+
+define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
+;CHECK-LABEL: test_v8i8_post_imm_ld4lane:
+;CHECK: ld4.b { v0, v1, v2, v3 }[0], [x0], #4
+  %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A)
+  %tmp = getelementptr i8* %A, i32 4
+  store i8* %tmp, i8** %ptr
+  ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
+}
+
+define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4lane(i8* %A, i8** %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
+;CHECK-LABEL: test_v8i8_post_reg_ld4lane:
+;CHECK: ld4.b { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
+  %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A)
+  %tmp = getelementptr i8* %A, i64 %inc
+  store i8* %tmp, i8** %ptr
+  ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
+}
+
+declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i64, i8*) nounwind readonly
+
+
+define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
+;CHECK-LABEL: test_v8i16_post_imm_ld4lane:
+;CHECK: ld4.h { v0, v1, v2, v3 }[0], [x0], #8
+  %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A)
+  %tmp = getelementptr i16* %A, i32 4
+  store i16* %tmp, i16** %ptr
+  ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
+}
+
+define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4lane(i16* %A, i16** %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
+;CHECK-LABEL: test_v8i16_post_reg_ld4lane:
+;CHECK: ld4.h { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
+  %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A)
+  %tmp = getelementptr i16* %A, i64 %inc
+  store i16* %tmp, i16** %ptr
+  ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
+}
+
+declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readonly
+
+
+define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
+;CHECK-LABEL: test_v4i16_post_imm_ld4lane:
+;CHECK: ld4.h { v0, v1, v2, v3 }[0], [x0], #8
+  %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A)
+  %tmp = getelementptr i16* %A, i32 4
+  store i16* %tmp, i16** %ptr
+  ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
+}
+
+define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4lane(i16* %A, i16** %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
+;CHECK-LABEL: test_v4i16_post_reg_ld4lane:
+;CHECK: ld4.h { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
+  %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A)
+  %tmp = getelementptr i16* %A, i64 %inc
+  store i16* %tmp, i16** %ptr
+  ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
+}
+
+declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i64, i16*) nounwind readonly
+
+
+define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
+;CHECK-LABEL: test_v4i32_post_imm_ld4lane:
+;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], #16
+  %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A)
+  %tmp = getelementptr i32* %A, i32 4
+  store i32* %tmp, i32** %ptr
+  ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
+}
+
+define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4lane(i32* %A, i32** %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
+;CHECK-LABEL: test_v4i32_post_reg_ld4lane:
+;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
+  %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A)
+  %tmp = getelementptr i32* %A, i64 %inc
+  store i32* %tmp, i32** %ptr
+  ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
+}
+
+declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readonly
+
+
+define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
+;CHECK-LABEL: test_v2i32_post_imm_ld4lane:
+;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], #16
+  %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A)
+  %tmp = getelementptr i32* %A, i32 4
+  store i32* %tmp, i32** %ptr
+  ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
+}
+
+define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4lane(i32* %A, i32** %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
+;CHECK-LABEL: test_v2i32_post_reg_ld4lane:
+;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
+  %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A)
+  %tmp = getelementptr i32* %A, i64 %inc
+  store i32* %tmp, i32** %ptr
+  ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
+}
+
+declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i64, i32*) nounwind readonly
+
+
+define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
+;CHECK-LABEL: test_v2i64_post_imm_ld4lane:
+;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], #32
+  %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A)
+  %tmp = getelementptr i64* %A, i32 4
+  store i64* %tmp, i64** %ptr
+  ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
+}
+
+define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4lane(i64* %A, i64** %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
+;CHECK-LABEL: test_v2i64_post_reg_ld4lane:
+;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
+  %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A)
+  %tmp = getelementptr i64* %A, i64 %inc
+  store i64* %tmp, i64** %ptr
+  ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
+}
+
+declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readonly
+
+
+define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
+;CHECK-LABEL: test_v1i64_post_imm_ld4lane:
+;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], #32
+  %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A)
+  %tmp = getelementptr i64* %A, i32 4
+  store i64* %tmp, i64** %ptr
+  ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
+}
+
+define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4lane(i64* %A, i64** %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
+;CHECK-LABEL: test_v1i64_post_reg_ld4lane:
+;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
+  %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A)
+  %tmp = getelementptr i64* %A, i64 %inc
+  store i64* %tmp, i64** %ptr
+  ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
+}
+
+declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64, i64*) nounwind readonly
+
+
+define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld4lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
+;CHECK-LABEL: test_v4f32_post_imm_ld4lane:
+;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], #16
+  %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A)
+  %tmp = getelementptr float* %A, i32 4
+  store float* %tmp, float** %ptr
+  ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
+}
+
+define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld4lane(float* %A, float** %ptr, i64 %inc, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
+;CHECK-LABEL: test_v4f32_post_reg_ld4lane:
+;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
+  %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A)
+  %tmp = getelementptr float* %A, i64 %inc
+  store float* %tmp, float** %ptr
+  ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
+}
+
+declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, i64, float*) nounwind readonly
+
+
+define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld4lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
+;CHECK-LABEL: test_v2f32_post_imm_ld4lane:
+;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], #16
+  %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A)
+  %tmp = getelementptr float* %A, i32 4
+  store float* %tmp, float** %ptr
+  ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
+}
+
+define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld4lane(float* %A, float** %ptr, i64 %inc, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
+;CHECK-LABEL: test_v2f32_post_reg_ld4lane:
+;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
+  %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A)
+  %tmp = getelementptr float* %A, i64 %inc
+  store float* %tmp, float** %ptr
+  ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
+}
+
+declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, i64, float*) nounwind readonly
+
+
+define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld4lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
+;CHECK-LABEL: test_v2f64_post_imm_ld4lane:
+;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], #32
+  %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A)
+  %tmp = getelementptr double* %A, i32 4
+  store double* %tmp, double** %ptr
+  ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
+}
+
+define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld4lane(double* %A, double** %ptr, i64 %inc, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
+;CHECK-LABEL: test_v2f64_post_reg_ld4lane:
+;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
+  %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A)
+  %tmp = getelementptr double* %A, i64 %inc
+  store double* %tmp, double** %ptr
+  ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
+}
+
+declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, <2 x double>, i64, double*) nounwind readonly
+
+
+define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld4lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
+;CHECK-LABEL: test_v1f64_post_imm_ld4lane:
+;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], #32
+  %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A)
+  %tmp = getelementptr double* %A, i32 4
+  store double* %tmp, double** %ptr
+  ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
+}
+
+define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld4lane(double* %A, double** %ptr, i64 %inc, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
+;CHECK-LABEL: test_v1f64_post_reg_ld4lane:
+;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
+  %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A)
+  %tmp = getelementptr double* %A, i64 %inc
+  store double* %tmp, double** %ptr
+  ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
+}
+
+declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, i64, double*) nounwind readonly
+
+
+define i8* @test_v16i8_post_imm_st2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
+;CHECK-LABEL: test_v16i8_post_imm_st2:
+;CHECK: st2.16b { v0, v1 }, [x0], #32
+  call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A)
+  %tmp = getelementptr i8* %A, i32 32
+  ret i8* %tmp
+}
+
+define i8* @test_v16i8_post_reg_st2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind {
+;CHECK-LABEL: test_v16i8_post_reg_st2:
+;CHECK: st2.16b { v0, v1 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A)
+  %tmp = getelementptr i8* %A, i64 %inc
+  ret i8* %tmp
+}
+
+declare void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*)
+
+
+define i8* @test_v8i8_post_imm_st2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
+;CHECK-LABEL: test_v8i8_post_imm_st2:
+;CHECK: st2.8b { v0, v1 }, [x0], #16
+  call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A)
+  %tmp = getelementptr i8* %A, i32 16
+  ret i8* %tmp
+}
+
+define i8* @test_v8i8_post_reg_st2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind {
+;CHECK-LABEL: test_v8i8_post_reg_st2:
+;CHECK: st2.8b { v0, v1 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A)
+  %tmp = getelementptr i8* %A, i64 %inc
+  ret i8* %tmp
+}
+
+declare void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*)
+
+
+define i16* @test_v8i16_post_imm_st2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
+;CHECK-LABEL: test_v8i16_post_imm_st2:
+;CHECK: st2.8h { v0, v1 }, [x0], #32
+  call void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A)
+  %tmp = getelementptr i16* %A, i32 16
+  ret i16* %tmp
+}
+
+define i16* @test_v8i16_post_reg_st2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, i64 %inc) nounwind {
+;CHECK-LABEL: test_v8i16_post_reg_st2:
+;CHECK: st2.8h { v0, v1 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A)
+  %tmp = getelementptr i16* %A, i64 %inc
+  ret i16* %tmp
+}
+
+declare void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*)
+
+
+define i16* @test_v4i16_post_imm_st2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
+;CHECK-LABEL: test_v4i16_post_imm_st2:
+;CHECK: st2.4h { v0, v1 }, [x0], #16
+  call void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A)
+  %tmp = getelementptr i16* %A, i32 8
+  ret i16* %tmp
+}
+
+define i16* @test_v4i16_post_reg_st2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, i64 %inc) nounwind {
+;CHECK-LABEL: test_v4i16_post_reg_st2:
+;CHECK: st2.4h { v0, v1 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A)
+  %tmp = getelementptr i16* %A, i64 %inc
+  ret i16* %tmp
+}
+
+declare void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*)
+
+
+define i32* @test_v4i32_post_imm_st2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
+;CHECK-LABEL: test_v4i32_post_imm_st2:
+;CHECK: st2.4s { v0, v1 }, [x0], #32
+  call void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A)
+  %tmp = getelementptr i32* %A, i32 8
+  ret i32* %tmp
+}
+
+define i32* @test_v4i32_post_reg_st2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, i64 %inc) nounwind {
+;CHECK-LABEL: test_v4i32_post_reg_st2:
+;CHECK: st2.4s { v0, v1 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A)
+  %tmp = getelementptr i32* %A, i64 %inc
+  ret i32* %tmp
+}
+
+declare void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*)
+
+
+define i32* @test_v2i32_post_imm_st2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
+;CHECK-LABEL: test_v2i32_post_imm_st2:
+;CHECK: st2.2s { v0, v1 }, [x0], #16
+  call void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A)
+  %tmp = getelementptr i32* %A, i32 4
+  ret i32* %tmp
+}
+
+define i32* @test_v2i32_post_reg_st2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, i64 %inc) nounwind {
+;CHECK-LABEL: test_v2i32_post_reg_st2:
+;CHECK: st2.2s { v0, v1 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A)
+  %tmp = getelementptr i32* %A, i64 %inc
+  ret i32* %tmp
+}
+
+declare void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*)
+
+
+define i64* @test_v2i64_post_imm_st2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
+;CHECK-LABEL: test_v2i64_post_imm_st2:
+;CHECK: st2.2d { v0, v1 }, [x0], #32
+  call void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A)
+  %tmp = getelementptr i64* %A, i64 4
+  ret i64* %tmp
+}
+
+define i64* @test_v2i64_post_reg_st2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, i64 %inc) nounwind {
+;CHECK-LABEL: test_v2i64_post_reg_st2:
+;CHECK: st2.2d { v0, v1 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A)
+  %tmp = getelementptr i64* %A, i64 %inc
+  ret i64* %tmp
+}
+
+declare void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*)
+
+
+define i64* @test_v1i64_post_imm_st2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
+;CHECK-LABEL: test_v1i64_post_imm_st2:
+;CHECK: st1.1d { v0, v1 }, [x0], #16
+  call void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A)
+  %tmp = getelementptr i64* %A, i64 2
+  ret i64* %tmp
+}
+
+define i64* @test_v1i64_post_reg_st2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, i64 %inc) nounwind {
+;CHECK-LABEL: test_v1i64_post_reg_st2:
+;CHECK: st1.1d { v0, v1 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A)
+  %tmp = getelementptr i64* %A, i64 %inc
+  ret i64* %tmp
+}
+
+declare void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*)
+
+
+define float* @test_v4f32_post_imm_st2(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C) nounwind {
+;CHECK-LABEL: test_v4f32_post_imm_st2:
+;CHECK: st2.4s { v0, v1 }, [x0], #32
+  call void @llvm.aarch64.neon.st2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A)
+  %tmp = getelementptr float* %A, i32 8
+  ret float* %tmp
+}
+
+define float* @test_v4f32_post_reg_st2(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, i64 %inc) nounwind {
+;CHECK-LABEL: test_v4f32_post_reg_st2:
+;CHECK: st2.4s { v0, v1 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A)
+  %tmp = getelementptr float* %A, i64 %inc
+  ret float* %tmp
+}
+
+declare void @llvm.aarch64.neon.st2.v4f32.p0f32(<4 x float>, <4 x float>, float*)
+
+
+define float* @test_v2f32_post_imm_st2(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C) nounwind {
+;CHECK-LABEL: test_v2f32_post_imm_st2:
+;CHECK: st2.2s { v0, v1 }, [x0], #16
+  call void @llvm.aarch64.neon.st2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A)
+  %tmp = getelementptr float* %A, i32 4
+  ret float* %tmp
+}
+
+define float* @test_v2f32_post_reg_st2(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, i64 %inc) nounwind {
+;CHECK-LABEL: test_v2f32_post_reg_st2:
+;CHECK: st2.2s { v0, v1 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A)
+  %tmp = getelementptr float* %A, i64 %inc
+  ret float* %tmp
+}
+
+declare void @llvm.aarch64.neon.st2.v2f32.p0f32(<2 x float>, <2 x float>, float*)
+
+
+define double* @test_v2f64_post_imm_st2(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C) nounwind {
+;CHECK-LABEL: test_v2f64_post_imm_st2:
+;CHECK: st2.2d { v0, v1 }, [x0], #32
+  call void @llvm.aarch64.neon.st2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A)
+  %tmp = getelementptr double* %A, i64 4
+  ret double* %tmp
+}
+
+define double* @test_v2f64_post_reg_st2(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, i64 %inc) nounwind {
+;CHECK-LABEL: test_v2f64_post_reg_st2:
+;CHECK: st2.2d { v0, v1 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A)
+  %tmp = getelementptr double* %A, i64 %inc
+  ret double* %tmp
+}
+
+declare void @llvm.aarch64.neon.st2.v2f64.p0f64(<2 x double>, <2 x double>, double*)
+
+
+define double* @test_v1f64_post_imm_st2(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C) nounwind {
+;CHECK-LABEL: test_v1f64_post_imm_st2:
+;CHECK: st1.1d { v0, v1 }, [x0], #16
+  call void @llvm.aarch64.neon.st2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A)
+  %tmp = getelementptr double* %A, i64 2
+  ret double* %tmp
+}
+
+define double* @test_v1f64_post_reg_st2(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, i64 %inc) nounwind {
+;CHECK-LABEL: test_v1f64_post_reg_st2:
+;CHECK: st1.1d { v0, v1 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A)
+  %tmp = getelementptr double* %A, i64 %inc
+  ret double* %tmp
+}
+
+declare void @llvm.aarch64.neon.st2.v1f64.p0f64(<1 x double>, <1 x double>, double*)
+
+
+define i8* @test_v16i8_post_imm_st3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
+;CHECK-LABEL: test_v16i8_post_imm_st3:
+;CHECK: st3.16b { v0, v1, v2 }, [x0], #48
+  call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A)
+  %tmp = getelementptr i8* %A, i32 48
+  ret i8* %tmp
+}
+
+define i8* @test_v16i8_post_reg_st3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind {
+;CHECK-LABEL: test_v16i8_post_reg_st3:
+;CHECK: st3.16b { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A)
+  %tmp = getelementptr i8* %A, i64 %inc
+  ret i8* %tmp
+}
+
+declare void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*)
+
+
+define i8* @test_v8i8_post_imm_st3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
+;CHECK-LABEL: test_v8i8_post_imm_st3:
+;CHECK: st3.8b { v0, v1, v2 }, [x0], #24
+  call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A)
+  %tmp = getelementptr i8* %A, i32 24
+  ret i8* %tmp
+}
+
+define i8* @test_v8i8_post_reg_st3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind {
+;CHECK-LABEL: test_v8i8_post_reg_st3:
+;CHECK: st3.8b { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A)
+  %tmp = getelementptr i8* %A, i64 %inc
+  ret i8* %tmp
+}
+
+declare void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*)
+
+
+define i16* @test_v8i16_post_imm_st3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
+;CHECK-LABEL: test_v8i16_post_imm_st3:
+;CHECK: st3.8h { v0, v1, v2 }, [x0], #48
+  call void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A)
+  %tmp = getelementptr i16* %A, i32 24
+  ret i16* %tmp
+}
+
+define i16* @test_v8i16_post_reg_st3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind {
+;CHECK-LABEL: test_v8i16_post_reg_st3:
+;CHECK: st3.8h { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A)
+  %tmp = getelementptr i16* %A, i64 %inc
+  ret i16* %tmp
+}
+
+declare void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*)
+
+
+define i16* @test_v4i16_post_imm_st3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
+;CHECK-LABEL: test_v4i16_post_imm_st3:
+;CHECK: st3.4h { v0, v1, v2 }, [x0], #24
+  call void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A)
+  %tmp = getelementptr i16* %A, i32 12
+  ret i16* %tmp
+}
+
+define i16* @test_v4i16_post_reg_st3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind {
+;CHECK-LABEL: test_v4i16_post_reg_st3:
+;CHECK: st3.4h { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A)
+  %tmp = getelementptr i16* %A, i64 %inc
+  ret i16* %tmp
+}
+
+declare void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*)
+
+
+define i32* @test_v4i32_post_imm_st3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
+;CHECK-LABEL: test_v4i32_post_imm_st3:
+;CHECK: st3.4s { v0, v1, v2 }, [x0], #48
+  call void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A)
+  %tmp = getelementptr i32* %A, i32 12
+  ret i32* %tmp
+}
+
+define i32* @test_v4i32_post_reg_st3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind {
+;CHECK-LABEL: test_v4i32_post_reg_st3:
+;CHECK: st3.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A)
+  %tmp = getelementptr i32* %A, i64 %inc
+  ret i32* %tmp
+}
+
+declare void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*)
+
+
+define i32* @test_v2i32_post_imm_st3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
+;CHECK-LABEL: test_v2i32_post_imm_st3:
+;CHECK: st3.2s { v0, v1, v2 }, [x0], #24
+  call void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A)
+  %tmp = getelementptr i32* %A, i32 6
+  ret i32* %tmp
+}
+
+define i32* @test_v2i32_post_reg_st3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind {
+;CHECK-LABEL: test_v2i32_post_reg_st3:
+;CHECK: st3.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A)
+  %tmp = getelementptr i32* %A, i64 %inc
+  ret i32* %tmp
+}
+
+declare void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*)
+
+
+define i64* @test_v2i64_post_imm_st3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
+;CHECK-LABEL: test_v2i64_post_imm_st3:
+;CHECK: st3.2d { v0, v1, v2 }, [x0], #48
+  call void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A)
+  %tmp = getelementptr i64* %A, i64 6
+  ret i64* %tmp
+}
+
+define i64* @test_v2i64_post_reg_st3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind {
+;CHECK-LABEL: test_v2i64_post_reg_st3:
+;CHECK: st3.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A)
+  %tmp = getelementptr i64* %A, i64 %inc
+  ret i64* %tmp
+}
+
+declare void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*)
+
+
+define i64* @test_v1i64_post_imm_st3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
+;CHECK-LABEL: test_v1i64_post_imm_st3:
+;CHECK: st1.1d { v0, v1, v2 }, [x0], #24
+  call void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A)
+  %tmp = getelementptr i64* %A, i64 3
+  ret i64* %tmp
+}
+
+define i64* @test_v1i64_post_reg_st3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind {
+;CHECK-LABEL: test_v1i64_post_reg_st3:
+;CHECK: st1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A)
+  %tmp = getelementptr i64* %A, i64 %inc
+  ret i64* %tmp
+}
+
+declare void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*)
+
+
+define float* @test_v4f32_post_imm_st3(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
+;CHECK-LABEL: test_v4f32_post_imm_st3:
+;CHECK: st3.4s { v0, v1, v2 }, [x0], #48
+  call void @llvm.aarch64.neon.st3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A)
+  %tmp = getelementptr float* %A, i32 12
+  ret float* %tmp
+}
+
+define float* @test_v4f32_post_reg_st3(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind {
+;CHECK-LABEL: test_v4f32_post_reg_st3:
+;CHECK: st3.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A)
+  %tmp = getelementptr float* %A, i64 %inc
+  ret float* %tmp
+}
+
+declare void @llvm.aarch64.neon.st3.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, float*)
+
+
+define float* @test_v2f32_post_imm_st3(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
+;CHECK-LABEL: test_v2f32_post_imm_st3:
+;CHECK: st3.2s { v0, v1, v2 }, [x0], #24
+  call void @llvm.aarch64.neon.st3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A)
+  %tmp = getelementptr float* %A, i32 6
+  ret float* %tmp
+}
+
+define float* @test_v2f32_post_reg_st3(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind {
+;CHECK-LABEL: test_v2f32_post_reg_st3:
+;CHECK: st3.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A)
+  %tmp = getelementptr float* %A, i64 %inc
+  ret float* %tmp
+}
+
+declare void @llvm.aarch64.neon.st3.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, float*)
+
+
+define double* @test_v2f64_post_imm_st3(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
+;CHECK-LABEL: test_v2f64_post_imm_st3:
+;CHECK: st3.2d { v0, v1, v2 }, [x0], #48
+  call void @llvm.aarch64.neon.st3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A)
+  %tmp = getelementptr double* %A, i64 6
+  ret double* %tmp
+}
+
+define double* @test_v2f64_post_reg_st3(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind {
+;CHECK-LABEL: test_v2f64_post_reg_st3:
+;CHECK: st3.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A)
+  %tmp = getelementptr double* %A, i64 %inc
+  ret double* %tmp
+}
+
+declare void @llvm.aarch64.neon.st3.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, double*)
+
+
+define double* @test_v1f64_post_imm_st3(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
+;CHECK-LABEL: test_v1f64_post_imm_st3:
+;CHECK: st1.1d { v0, v1, v2 }, [x0], #24
+  call void @llvm.aarch64.neon.st3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A)
+  %tmp = getelementptr double* %A, i64 3
+  ret double* %tmp
+}
+
+define double* @test_v1f64_post_reg_st3(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind {
+;CHECK-LABEL: test_v1f64_post_reg_st3:
+;CHECK: st1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A)
+  %tmp = getelementptr double* %A, i64 %inc
+  ret double* %tmp
+}
+
+declare void @llvm.aarch64.neon.st3.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, double*)
+
+
+define i8* @test_v16i8_post_imm_st4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
+;CHECK-LABEL: test_v16i8_post_imm_st4:
+;CHECK: st4.16b { v0, v1, v2, v3 }, [x0], #64
+  call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A)
+  %tmp = getelementptr i8* %A, i32 64
+  ret i8* %tmp
+}
+
+define i8* @test_v16i8_post_reg_st4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind {
+;CHECK-LABEL: test_v16i8_post_reg_st4:
+;CHECK: st4.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A)
+  %tmp = getelementptr i8* %A, i64 %inc
+  ret i8* %tmp
+}
+
+declare void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*)
+
+
+define i8* @test_v8i8_post_imm_st4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
+;CHECK-LABEL: test_v8i8_post_imm_st4:
+;CHECK: st4.8b { v0, v1, v2, v3 }, [x0], #32
+  call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A)
+  %tmp = getelementptr i8* %A, i32 32
+  ret i8* %tmp
+}
+
+define i8* @test_v8i8_post_reg_st4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind {
+;CHECK-LABEL: test_v8i8_post_reg_st4:
+;CHECK: st4.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A)
+  %tmp = getelementptr i8* %A, i64 %inc
+  ret i8* %tmp
+}
+
+declare void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*)
+
+
+define i16* @test_v8i16_post_imm_st4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
+;CHECK-LABEL: test_v8i16_post_imm_st4:
+;CHECK: st4.8h { v0, v1, v2, v3 }, [x0], #64
+  call void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A)
+  %tmp = getelementptr i16* %A, i32 32
+  ret i16* %tmp
+}
+
+define i16* @test_v8i16_post_reg_st4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind {
+;CHECK-LABEL: test_v8i16_post_reg_st4:
+;CHECK: st4.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A)
+  %tmp = getelementptr i16* %A, i64 %inc
+  ret i16* %tmp
+}
+
+declare void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*)
+
+
+define i16* @test_v4i16_post_imm_st4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
+;CHECK-LABEL: test_v4i16_post_imm_st4:
+;CHECK: st4.4h { v0, v1, v2, v3 }, [x0], #32
+  call void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A)
+  %tmp = getelementptr i16* %A, i32 16
+  ret i16* %tmp
+}
+
+define i16* @test_v4i16_post_reg_st4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind {
+;CHECK-LABEL: test_v4i16_post_reg_st4:
+;CHECK: st4.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A)
+  %tmp = getelementptr i16* %A, i64 %inc
+  ret i16* %tmp
+}
+
+declare void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>,<4 x i16>,  i16*)
+
+
+define i32* @test_v4i32_post_imm_st4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
+;CHECK-LABEL: test_v4i32_post_imm_st4:
+;CHECK: st4.4s { v0, v1, v2, v3 }, [x0], #64
+  call void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A)
+  %tmp = getelementptr i32* %A, i32 16
+  ret i32* %tmp
+}
+
+define i32* @test_v4i32_post_reg_st4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind {
+;CHECK-LABEL: test_v4i32_post_reg_st4:
+;CHECK: st4.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A)
+  %tmp = getelementptr i32* %A, i64 %inc
+  ret i32* %tmp
+}
+
+declare void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>,<4 x i32>,  i32*)
+
+
+define i32* @test_v2i32_post_imm_st4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
+;CHECK-LABEL: test_v2i32_post_imm_st4:
+;CHECK: st4.2s { v0, v1, v2, v3 }, [x0], #32
+  call void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A)
+  %tmp = getelementptr i32* %A, i32 8
+  ret i32* %tmp
+}
+
+define i32* @test_v2i32_post_reg_st4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind {
+;CHECK-LABEL: test_v2i32_post_reg_st4:
+;CHECK: st4.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A)
+  %tmp = getelementptr i32* %A, i64 %inc
+  ret i32* %tmp
+}
+
+declare void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*)
+
+
+define i64* @test_v2i64_post_imm_st4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
+;CHECK-LABEL: test_v2i64_post_imm_st4:
+;CHECK: st4.2d { v0, v1, v2, v3 }, [x0], #64
+  call void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A)
+  %tmp = getelementptr i64* %A, i64 8
+  ret i64* %tmp
+}
+
+define i64* @test_v2i64_post_reg_st4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind {
+;CHECK-LABEL: test_v2i64_post_reg_st4:
+;CHECK: st4.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A)
+  %tmp = getelementptr i64* %A, i64 %inc
+  ret i64* %tmp
+}
+
+declare void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>,<2 x i64>,  i64*)
+
+
+define i64* @test_v1i64_post_imm_st4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
+;CHECK-LABEL: test_v1i64_post_imm_st4:
+;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], #32
+  call void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A)
+  %tmp = getelementptr i64* %A, i64 4
+  ret i64* %tmp
+}
+
+define i64* @test_v1i64_post_reg_st4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind {
+;CHECK-LABEL: test_v1i64_post_reg_st4:
+;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A)
+  %tmp = getelementptr i64* %A, i64 %inc
+  ret i64* %tmp
+}
+
+declare void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>,<1 x i64>,  i64*)
+
+
+define float* @test_v4f32_post_imm_st4(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
+;CHECK-LABEL: test_v4f32_post_imm_st4:
+;CHECK: st4.4s { v0, v1, v2, v3 }, [x0], #64
+  call void @llvm.aarch64.neon.st4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A)
+  %tmp = getelementptr float* %A, i32 16
+  ret float* %tmp
+}
+
+define float* @test_v4f32_post_reg_st4(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind {
+;CHECK-LABEL: test_v4f32_post_reg_st4:
+;CHECK: st4.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A)
+  %tmp = getelementptr float* %A, i64 %inc
+  ret float* %tmp
+}
+
+declare void @llvm.aarch64.neon.st4.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, float*)
+
+
+define float* @test_v2f32_post_imm_st4(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
+;CHECK-LABEL: test_v2f32_post_imm_st4:
+;CHECK: st4.2s { v0, v1, v2, v3 }, [x0], #32
+  call void @llvm.aarch64.neon.st4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A)
+  %tmp = getelementptr float* %A, i32 8
+  ret float* %tmp
+}
+
+define float* @test_v2f32_post_reg_st4(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind {
+;CHECK-LABEL: test_v2f32_post_reg_st4:
+;CHECK: st4.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A)
+  %tmp = getelementptr float* %A, i64 %inc
+  ret float* %tmp
+}
+
+declare void @llvm.aarch64.neon.st4.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, float*)
+
+
+define double* @test_v2f64_post_imm_st4(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
+;CHECK-LABEL: test_v2f64_post_imm_st4:
+;CHECK: st4.2d { v0, v1, v2, v3 }, [x0], #64
+  call void @llvm.aarch64.neon.st4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A)
+  %tmp = getelementptr double* %A, i64 8
+  ret double* %tmp
+}
+
+define double* @test_v2f64_post_reg_st4(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind {
+;CHECK-LABEL: test_v2f64_post_reg_st4:
+;CHECK: st4.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A)
+  %tmp = getelementptr double* %A, i64 %inc
+  ret double* %tmp
+}
+
+declare void @llvm.aarch64.neon.st4.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>,<2 x double>,  double*)
+
+
+define double* @test_v1f64_post_imm_st4(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
+;CHECK-LABEL: test_v1f64_post_imm_st4:
+;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], #32
+  call void @llvm.aarch64.neon.st4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A)
+  %tmp = getelementptr double* %A, i64 4
+  ret double* %tmp
+}
+
+define double* @test_v1f64_post_reg_st4(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind {
+;CHECK-LABEL: test_v1f64_post_reg_st4:
+;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A)
+  %tmp = getelementptr double* %A, i64 %inc
+  ret double* %tmp
+}
+
+declare void @llvm.aarch64.neon.st4.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, double*)
+
+
+define i8* @test_v16i8_post_imm_st1x2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
+;CHECK-LABEL: test_v16i8_post_imm_st1x2:
+;CHECK: st1.16b { v0, v1 }, [x0], #32
+  call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A)
+  %tmp = getelementptr i8* %A, i32 32
+  ret i8* %tmp
+}
+
+define i8* @test_v16i8_post_reg_st1x2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind {
+;CHECK-LABEL: test_v16i8_post_reg_st1x2:
+;CHECK: st1.16b { v0, v1 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A)
+  %tmp = getelementptr i8* %A, i64 %inc
+  ret i8* %tmp
+}
+
+declare void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*)
+
+
+define i8* @test_v8i8_post_imm_st1x2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
+;CHECK-LABEL: test_v8i8_post_imm_st1x2:
+;CHECK: st1.8b { v0, v1 }, [x0], #16
+  call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A)
+  %tmp = getelementptr i8* %A, i32 16
+  ret i8* %tmp
+}
+
+define i8* @test_v8i8_post_reg_st1x2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind {
+;CHECK-LABEL: test_v8i8_post_reg_st1x2:
+;CHECK: st1.8b { v0, v1 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A)
+  %tmp = getelementptr i8* %A, i64 %inc
+  ret i8* %tmp
+}
+
+declare void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*)
+
+
+define i16* @test_v8i16_post_imm_st1x2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
+;CHECK-LABEL: test_v8i16_post_imm_st1x2:
+;CHECK: st1.8h { v0, v1 }, [x0], #32
+  call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A)
+  %tmp = getelementptr i16* %A, i32 16
+  ret i16* %tmp
+}
+
+define i16* @test_v8i16_post_reg_st1x2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, i64 %inc) nounwind {
+;CHECK-LABEL: test_v8i16_post_reg_st1x2:
+;CHECK: st1.8h { v0, v1 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A)
+  %tmp = getelementptr i16* %A, i64 %inc
+  ret i16* %tmp
+}
+
+declare void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*)
+
+
+define i16* @test_v4i16_post_imm_st1x2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
+;CHECK-LABEL: test_v4i16_post_imm_st1x2:
+;CHECK: st1.4h { v0, v1 }, [x0], #16
+  call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A)
+  %tmp = getelementptr i16* %A, i32 8
+  ret i16* %tmp
+}
+
+define i16* @test_v4i16_post_reg_st1x2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, i64 %inc) nounwind {
+;CHECK-LABEL: test_v4i16_post_reg_st1x2:
+;CHECK: st1.4h { v0, v1 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A)
+  %tmp = getelementptr i16* %A, i64 %inc
+  ret i16* %tmp
+}
+
+declare void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*)
+
+
+define i32* @test_v4i32_post_imm_st1x2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
+;CHECK-LABEL: test_v4i32_post_imm_st1x2:
+;CHECK: st1.4s { v0, v1 }, [x0], #32
+  call void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A)
+  %tmp = getelementptr i32* %A, i32 8
+  ret i32* %tmp
+}
+
+define i32* @test_v4i32_post_reg_st1x2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, i64 %inc) nounwind {
+;CHECK-LABEL: test_v4i32_post_reg_st1x2:
+;CHECK: st1.4s { v0, v1 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A)
+  %tmp = getelementptr i32* %A, i64 %inc
+  ret i32* %tmp
+}
+
+declare void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*)
+
+
+define i32* @test_v2i32_post_imm_st1x2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
+;CHECK-LABEL: test_v2i32_post_imm_st1x2:
+;CHECK: st1.2s { v0, v1 }, [x0], #16
+  call void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A)
+  %tmp = getelementptr i32* %A, i32 4
+  ret i32* %tmp
+}
+
+define i32* @test_v2i32_post_reg_st1x2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, i64 %inc) nounwind {
+;CHECK-LABEL: test_v2i32_post_reg_st1x2:
+;CHECK: st1.2s { v0, v1 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A)
+  %tmp = getelementptr i32* %A, i64 %inc
+  ret i32* %tmp
+}
+
+declare void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*)
+
+
+define i64* @test_v2i64_post_imm_st1x2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
+;CHECK-LABEL: test_v2i64_post_imm_st1x2:
+;CHECK: st1.2d { v0, v1 }, [x0], #32
+  call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A)
+  %tmp = getelementptr i64* %A, i64 4
+  ret i64* %tmp
+}
+
+define i64* @test_v2i64_post_reg_st1x2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, i64 %inc) nounwind {
+;CHECK-LABEL: test_v2i64_post_reg_st1x2:
+;CHECK: st1.2d { v0, v1 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A)
+  %tmp = getelementptr i64* %A, i64 %inc
+  ret i64* %tmp
+}
+
+declare void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*)
+
+
+define i64* @test_v1i64_post_imm_st1x2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
+;CHECK-LABEL: test_v1i64_post_imm_st1x2:
+;CHECK: st1.1d { v0, v1 }, [x0], #16
+  call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A)
+  %tmp = getelementptr i64* %A, i64 2
+  ret i64* %tmp
+}
+
+define i64* @test_v1i64_post_reg_st1x2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, i64 %inc) nounwind {
+;CHECK-LABEL: test_v1i64_post_reg_st1x2:
+;CHECK: st1.1d { v0, v1 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A)
+  %tmp = getelementptr i64* %A, i64 %inc
+  ret i64* %tmp
+}
+
+declare void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*)
+
+
+define float* @test_v4f32_post_imm_st1x2(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C) nounwind {
+;CHECK-LABEL: test_v4f32_post_imm_st1x2:
+;CHECK: st1.4s { v0, v1 }, [x0], #32
+  call void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A)
+  %tmp = getelementptr float* %A, i32 8
+  ret float* %tmp
+}
+
+define float* @test_v4f32_post_reg_st1x2(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, i64 %inc) nounwind {
+;CHECK-LABEL: test_v4f32_post_reg_st1x2:
+;CHECK: st1.4s { v0, v1 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A)
+  %tmp = getelementptr float* %A, i64 %inc
+  ret float* %tmp
+}
+
+declare void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float>, <4 x float>, float*)
+
+
+define float* @test_v2f32_post_imm_st1x2(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C) nounwind {
+;CHECK-LABEL: test_v2f32_post_imm_st1x2:
+;CHECK: st1.2s { v0, v1 }, [x0], #16
+  call void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A)
+  %tmp = getelementptr float* %A, i32 4
+  ret float* %tmp
+}
+
+define float* @test_v2f32_post_reg_st1x2(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, i64 %inc) nounwind {
+;CHECK-LABEL: test_v2f32_post_reg_st1x2:
+;CHECK: st1.2s { v0, v1 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A)
+  %tmp = getelementptr float* %A, i64 %inc
+  ret float* %tmp
+}
+
+declare void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float>, <2 x float>, float*)
+
+
+define double* @test_v2f64_post_imm_st1x2(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C) nounwind {
+;CHECK-LABEL: test_v2f64_post_imm_st1x2:
+;CHECK: st1.2d { v0, v1 }, [x0], #32
+  call void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A)
+  %tmp = getelementptr double* %A, i64 4
+  ret double* %tmp
+}
+
+define double* @test_v2f64_post_reg_st1x2(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, i64 %inc) nounwind {
+;CHECK-LABEL: test_v2f64_post_reg_st1x2:
+;CHECK: st1.2d { v0, v1 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A)
+  %tmp = getelementptr double* %A, i64 %inc
+  ret double* %tmp
+}
+
+declare void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double>, <2 x double>, double*)
+
+
+define double* @test_v1f64_post_imm_st1x2(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C) nounwind {
+;CHECK-LABEL: test_v1f64_post_imm_st1x2:
+;CHECK: st1.1d { v0, v1 }, [x0], #16
+  call void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A)
+  %tmp = getelementptr double* %A, i64 2
+  ret double* %tmp
+}
+
+define double* @test_v1f64_post_reg_st1x2(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, i64 %inc) nounwind {
+;CHECK-LABEL: test_v1f64_post_reg_st1x2:
+;CHECK: st1.1d { v0, v1 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A)
+  %tmp = getelementptr double* %A, i64 %inc
+  ret double* %tmp
+}
+
+declare void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double>, <1 x double>, double*)
+
+
+define i8* @test_v16i8_post_imm_st1x3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
+;CHECK-LABEL: test_v16i8_post_imm_st1x3:
+;CHECK: st1.16b { v0, v1, v2 }, [x0], #48
+  call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A)
+  %tmp = getelementptr i8* %A, i32 48
+  ret i8* %tmp
+}
+
+define i8* @test_v16i8_post_reg_st1x3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind {
+;CHECK-LABEL: test_v16i8_post_reg_st1x3:
+;CHECK: st1.16b { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A)
+  %tmp = getelementptr i8* %A, i64 %inc
+  ret i8* %tmp
+}
+
+declare void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*)
+
+
+define i8* @test_v8i8_post_imm_st1x3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
+;CHECK-LABEL: test_v8i8_post_imm_st1x3:
+;CHECK: st1.8b { v0, v1, v2 }, [x0], #24
+  call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A)
+  %tmp = getelementptr i8* %A, i32 24
+  ret i8* %tmp
+}
+
+define i8* @test_v8i8_post_reg_st1x3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind {
+;CHECK-LABEL: test_v8i8_post_reg_st1x3:
+;CHECK: st1.8b { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A)
+  %tmp = getelementptr i8* %A, i64 %inc
+  ret i8* %tmp
+}
+
+declare void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*)
+
+
+define i16* @test_v8i16_post_imm_st1x3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
+;CHECK-LABEL: test_v8i16_post_imm_st1x3:
+;CHECK: st1.8h { v0, v1, v2 }, [x0], #48
+  call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A)
+  %tmp = getelementptr i16* %A, i32 24
+  ret i16* %tmp
+}
+
+define i16* @test_v8i16_post_reg_st1x3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind {
+;CHECK-LABEL: test_v8i16_post_reg_st1x3:
+;CHECK: st1.8h { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A)
+  %tmp = getelementptr i16* %A, i64 %inc
+  ret i16* %tmp
+}
+
+declare void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*)
+
+
+define i16* @test_v4i16_post_imm_st1x3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
+;CHECK-LABEL: test_v4i16_post_imm_st1x3:
+;CHECK: st1.4h { v0, v1, v2 }, [x0], #24
+  call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A)
+  %tmp = getelementptr i16* %A, i32 12
+  ret i16* %tmp
+}
+
+define i16* @test_v4i16_post_reg_st1x3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind {
+;CHECK-LABEL: test_v4i16_post_reg_st1x3:
+;CHECK: st1.4h { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A)
+  %tmp = getelementptr i16* %A, i64 %inc
+  ret i16* %tmp
+}
+
+declare void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*)
+
+
+define i32* @test_v4i32_post_imm_st1x3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
+;CHECK-LABEL: test_v4i32_post_imm_st1x3:
+;CHECK: st1.4s { v0, v1, v2 }, [x0], #48
+  call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A)
+  %tmp = getelementptr i32* %A, i32 12
+  ret i32* %tmp
+}
+
+define i32* @test_v4i32_post_reg_st1x3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind {
+;CHECK-LABEL: test_v4i32_post_reg_st1x3:
+;CHECK: st1.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A)
+  %tmp = getelementptr i32* %A, i64 %inc
+  ret i32* %tmp
+}
+
+declare void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*)
+
+
+define i32* @test_v2i32_post_imm_st1x3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
+;CHECK-LABEL: test_v2i32_post_imm_st1x3:
+;CHECK: st1.2s { v0, v1, v2 }, [x0], #24
+  call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A)
+  %tmp = getelementptr i32* %A, i32 6
+  ret i32* %tmp
+}
+
+define i32* @test_v2i32_post_reg_st1x3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind {
+;CHECK-LABEL: test_v2i32_post_reg_st1x3:
+;CHECK: st1.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A)
+  %tmp = getelementptr i32* %A, i64 %inc
+  ret i32* %tmp
+}
+
+declare void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*)
+
+
+define i64* @test_v2i64_post_imm_st1x3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
+;CHECK-LABEL: test_v2i64_post_imm_st1x3:
+;CHECK: st1.2d { v0, v1, v2 }, [x0], #48
+  call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A)
+  %tmp = getelementptr i64* %A, i64 6
+  ret i64* %tmp
+}
+
+define i64* @test_v2i64_post_reg_st1x3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind {
+;CHECK-LABEL: test_v2i64_post_reg_st1x3:
+;CHECK: st1.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A)
+  %tmp = getelementptr i64* %A, i64 %inc
+  ret i64* %tmp
+}
+
+declare void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*)
+
+
+define i64* @test_v1i64_post_imm_st1x3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
+;CHECK-LABEL: test_v1i64_post_imm_st1x3:
+;CHECK: st1.1d { v0, v1, v2 }, [x0], #24
+  call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A)
+  %tmp = getelementptr i64* %A, i64 3
+  ret i64* %tmp
+}
+
+define i64* @test_v1i64_post_reg_st1x3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind {
+;CHECK-LABEL: test_v1i64_post_reg_st1x3:
+;CHECK: st1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A)
+  %tmp = getelementptr i64* %A, i64 %inc
+  ret i64* %tmp
+}
+
+declare void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*)
+
+
+define float* @test_v4f32_post_imm_st1x3(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
+;CHECK-LABEL: test_v4f32_post_imm_st1x3:
+;CHECK: st1.4s { v0, v1, v2 }, [x0], #48
+  call void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A)
+  %tmp = getelementptr float* %A, i32 12
+  ret float* %tmp
+}
+
+define float* @test_v4f32_post_reg_st1x3(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind {
+;CHECK-LABEL: test_v4f32_post_reg_st1x3:
+;CHECK: st1.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A)
+  %tmp = getelementptr float* %A, i64 %inc
+  ret float* %tmp
+}
+
+declare void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, float*)
+
+
+define float* @test_v2f32_post_imm_st1x3(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
+;CHECK-LABEL: test_v2f32_post_imm_st1x3:
+;CHECK: st1.2s { v0, v1, v2 }, [x0], #24
+  call void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A)
+  %tmp = getelementptr float* %A, i32 6
+  ret float* %tmp
+}
+
+define float* @test_v2f32_post_reg_st1x3(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind {
+;CHECK-LABEL: test_v2f32_post_reg_st1x3:
+;CHECK: st1.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A)
+  %tmp = getelementptr float* %A, i64 %inc
+  ret float* %tmp
+}
+
+declare void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, float*)
+
+
+define double* @test_v2f64_post_imm_st1x3(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
+;CHECK-LABEL: test_v2f64_post_imm_st1x3:
+;CHECK: st1.2d { v0, v1, v2 }, [x0], #48
+  call void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A)
+  %tmp = getelementptr double* %A, i64 6
+  ret double* %tmp
+}
+
+define double* @test_v2f64_post_reg_st1x3(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind {
+;CHECK-LABEL: test_v2f64_post_reg_st1x3:
+;CHECK: st1.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A)
+  %tmp = getelementptr double* %A, i64 %inc
+  ret double* %tmp
+}
+
+declare void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, double*)
+
+
+define double* @test_v1f64_post_imm_st1x3(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
+;CHECK-LABEL: test_v1f64_post_imm_st1x3:
+;CHECK: st1.1d { v0, v1, v2 }, [x0], #24
+  call void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A)
+  %tmp = getelementptr double* %A, i64 3
+  ret double* %tmp
+}
+
+define double* @test_v1f64_post_reg_st1x3(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind {
+;CHECK-LABEL: test_v1f64_post_reg_st1x3:
+;CHECK: st1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A)
+  %tmp = getelementptr double* %A, i64 %inc
+  ret double* %tmp
+}
+
+declare void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, double*)
+
+
+define i8* @test_v16i8_post_imm_st1x4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
+;CHECK-LABEL: test_v16i8_post_imm_st1x4:
+;CHECK: st1.16b { v0, v1, v2, v3 }, [x0], #64
+  call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A)
+  %tmp = getelementptr i8* %A, i32 64
+  ret i8* %tmp
+}
+
+define i8* @test_v16i8_post_reg_st1x4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind {
+;CHECK-LABEL: test_v16i8_post_reg_st1x4:
+;CHECK: st1.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A)
+  %tmp = getelementptr i8* %A, i64 %inc
+  ret i8* %tmp
+}
+
+declare void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*)
+
+
+define i8* @test_v8i8_post_imm_st1x4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
+;CHECK-LABEL: test_v8i8_post_imm_st1x4:
+;CHECK: st1.8b { v0, v1, v2, v3 }, [x0], #32
+  call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A)
+  %tmp = getelementptr i8* %A, i32 32
+  ret i8* %tmp
+}
+
+define i8* @test_v8i8_post_reg_st1x4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind {
+;CHECK-LABEL: test_v8i8_post_reg_st1x4:
+;CHECK: st1.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A)
+  %tmp = getelementptr i8* %A, i64 %inc
+  ret i8* %tmp
+}
+
+declare void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*)
+
+
+define i16* @test_v8i16_post_imm_st1x4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
+;CHECK-LABEL: test_v8i16_post_imm_st1x4:
+;CHECK: st1.8h { v0, v1, v2, v3 }, [x0], #64
+  call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A)
+  %tmp = getelementptr i16* %A, i32 32
+  ret i16* %tmp
+}
+
+define i16* @test_v8i16_post_reg_st1x4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind {
+;CHECK-LABEL: test_v8i16_post_reg_st1x4:
+;CHECK: st1.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A)
+  %tmp = getelementptr i16* %A, i64 %inc
+  ret i16* %tmp
+}
+
+declare void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*)
+
+
+define i16* @test_v4i16_post_imm_st1x4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
+;CHECK-LABEL: test_v4i16_post_imm_st1x4:
+;CHECK: st1.4h { v0, v1, v2, v3 }, [x0], #32
+  call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A)
+  %tmp = getelementptr i16* %A, i32 16
+  ret i16* %tmp
+}
+
+define i16* @test_v4i16_post_reg_st1x4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind {
+;CHECK-LABEL: test_v4i16_post_reg_st1x4:
+;CHECK: st1.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A)
+  %tmp = getelementptr i16* %A, i64 %inc
+  ret i16* %tmp
+}
+
+declare void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>,<4 x i16>,  i16*)
+
+
+define i32* @test_v4i32_post_imm_st1x4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
+;CHECK-LABEL: test_v4i32_post_imm_st1x4:
+;CHECK: st1.4s { v0, v1, v2, v3 }, [x0], #64
+  call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A)
+  %tmp = getelementptr i32* %A, i32 16
+  ret i32* %tmp
+}
+
+define i32* @test_v4i32_post_reg_st1x4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind {
+;CHECK-LABEL: test_v4i32_post_reg_st1x4:
+;CHECK: st1.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A)
+  %tmp = getelementptr i32* %A, i64 %inc
+  ret i32* %tmp
+}
+
+declare void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>,<4 x i32>,  i32*)
+
+
+define i32* @test_v2i32_post_imm_st1x4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
+;CHECK-LABEL: test_v2i32_post_imm_st1x4:
+;CHECK: st1.2s { v0, v1, v2, v3 }, [x0], #32
+  call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A)
+  %tmp = getelementptr i32* %A, i32 8
+  ret i32* %tmp
+}
+
+define i32* @test_v2i32_post_reg_st1x4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind {
+;CHECK-LABEL: test_v2i32_post_reg_st1x4:
+;CHECK: st1.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A)
+  %tmp = getelementptr i32* %A, i64 %inc
+  ret i32* %tmp
+}
+
+declare void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*)
+
+
+define i64* @test_v2i64_post_imm_st1x4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
+;CHECK-LABEL: test_v2i64_post_imm_st1x4:
+;CHECK: st1.2d { v0, v1, v2, v3 }, [x0], #64
+  call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A)
+  %tmp = getelementptr i64* %A, i64 8
+  ret i64* %tmp
+}
+
+define i64* @test_v2i64_post_reg_st1x4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind {
+;CHECK-LABEL: test_v2i64_post_reg_st1x4:
+;CHECK: st1.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A)
+  %tmp = getelementptr i64* %A, i64 %inc
+  ret i64* %tmp
+}
+
+declare void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>,<2 x i64>,  i64*)
+
+
+define i64* @test_v1i64_post_imm_st1x4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
+;CHECK-LABEL: test_v1i64_post_imm_st1x4:
+;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], #32
+  call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A)
+  %tmp = getelementptr i64* %A, i64 4
+  ret i64* %tmp
+}
+
+define i64* @test_v1i64_post_reg_st1x4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind {
+;CHECK-LABEL: test_v1i64_post_reg_st1x4:
+;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A)
+  %tmp = getelementptr i64* %A, i64 %inc
+  ret i64* %tmp
+}
+
+declare void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>,<1 x i64>,  i64*)
+
+
+define float* @test_v4f32_post_imm_st1x4(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
+;CHECK-LABEL: test_v4f32_post_imm_st1x4:
+;CHECK: st1.4s { v0, v1, v2, v3 }, [x0], #64
+  call void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A)
+  %tmp = getelementptr float* %A, i32 16
+  ret float* %tmp
+}
+
+define float* @test_v4f32_post_reg_st1x4(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind {
+;CHECK-LABEL: test_v4f32_post_reg_st1x4:
+;CHECK: st1.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A)
+  %tmp = getelementptr float* %A, i64 %inc
+  ret float* %tmp
+}
+
+declare void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, float*)
+
+
+define float* @test_v2f32_post_imm_st1x4(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
+;CHECK-LABEL: test_v2f32_post_imm_st1x4:
+;CHECK: st1.2s { v0, v1, v2, v3 }, [x0], #32
+  call void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A)
+  %tmp = getelementptr float* %A, i32 8
+  ret float* %tmp
+}
+
+define float* @test_v2f32_post_reg_st1x4(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind {
+;CHECK-LABEL: test_v2f32_post_reg_st1x4:
+;CHECK: st1.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A)
+  %tmp = getelementptr float* %A, i64 %inc
+  ret float* %tmp
+}
+
+declare void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, float*)
+
+
+define double* @test_v2f64_post_imm_st1x4(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
+;CHECK-LABEL: test_v2f64_post_imm_st1x4:
+;CHECK: st1.2d { v0, v1, v2, v3 }, [x0], #64
+  call void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A)
+  %tmp = getelementptr double* %A, i64 8
+  ret double* %tmp
+}
+
+define double* @test_v2f64_post_reg_st1x4(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind {
+;CHECK-LABEL: test_v2f64_post_reg_st1x4:
+;CHECK: st1.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A)
+  %tmp = getelementptr double* %A, i64 %inc
+  ret double* %tmp
+}
+
+declare void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>,<2 x double>,  double*)
+
+
+define double* @test_v1f64_post_imm_st1x4(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
+;CHECK-LABEL: test_v1f64_post_imm_st1x4:
+;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], #32
+  call void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A)
+  %tmp = getelementptr double* %A, i64 4
+  ret double* %tmp
+}
+
+define double* @test_v1f64_post_reg_st1x4(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind {
+;CHECK-LABEL: test_v1f64_post_reg_st1x4:
+;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A)
+  %tmp = getelementptr double* %A, i64 %inc
+  ret double* %tmp
+}
+
+declare void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, double*)
+
+
+define i8* @test_v16i8_post_imm_st2lanelane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) {
+  call void @llvm.aarch64.neon.st2lanelane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i64 1, i8* %A)
+  %tmp = getelementptr i8* %A, i32 2
+  ret i8* %tmp
+}
+
+define i8* @test_v16i8_post_reg_st2lanelane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) {
+  call void @llvm.aarch64.neon.st2lanelane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i64 1, i8* %A)
+  %tmp = getelementptr i8* %A, i64 %inc
+  ret i8* %tmp
+}
+
+declare void @llvm.aarch64.neon.st2lanelane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i64, i8*) nounwind readnone
+
+
+define i8* @test_v16i8_post_imm_st2lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
+;CHECK-LABEL: test_v16i8_post_imm_st2lane:
+;CHECK: st2.b { v0, v1 }[0], [x0], #2
+  call void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A)
+  %tmp = getelementptr i8* %A, i32 2
+  ret i8* %tmp
+}
+
+define i8* @test_v16i8_post_reg_st2lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind {
+;CHECK-LABEL: test_v16i8_post_reg_st2lane:
+;CHECK: st2.b { v0, v1 }[0], [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A)
+  %tmp = getelementptr i8* %A, i64 %inc
+  ret i8* %tmp
+}
+
+declare void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8*)
+
+
+define i8* @test_v8i8_post_imm_st2lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
+;CHECK-LABEL: test_v8i8_post_imm_st2lane:
+;CHECK: st2.b { v0, v1 }[0], [x0], #2
+  call void @llvm.aarch64.neon.st2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A)
+  %tmp = getelementptr i8* %A, i32 2
+  ret i8* %tmp
+}
+
+define i8* @test_v8i8_post_reg_st2lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind {
+;CHECK-LABEL: test_v8i8_post_reg_st2lane:
+;CHECK: st2.b { v0, v1 }[0], [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A)
+  %tmp = getelementptr i8* %A, i64 %inc
+  ret i8* %tmp
+}
+
+declare void @llvm.aarch64.neon.st2lane.v8i8.p0i8(<8 x i8>, <8 x i8>, i64, i8*)
+
+
+define i16* @test_v8i16_post_imm_st2lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
+;CHECK-LABEL: test_v8i16_post_imm_st2lane:
+;CHECK: st2.h { v0, v1 }[0], [x0], #4
+  call void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A)
+  %tmp = getelementptr i16* %A, i32 2
+  ret i16* %tmp
+}
+
+define i16* @test_v8i16_post_reg_st2lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, i64 %inc) nounwind {
+;CHECK-LABEL: test_v8i16_post_reg_st2lane:
+;CHECK: st2.h { v0, v1 }[0], [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A)
+  %tmp = getelementptr i16* %A, i64 %inc
+  ret i16* %tmp
+}
+
+declare void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i16*)
+
+
+define i16* @test_v4i16_post_imm_st2lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
+;CHECK-LABEL: test_v4i16_post_imm_st2lane:
+;CHECK: st2.h { v0, v1 }[0], [x0], #4
+  call void @llvm.aarch64.neon.st2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A)
+  %tmp = getelementptr i16* %A, i32 2
+  ret i16* %tmp
+}
+
+define i16* @test_v4i16_post_reg_st2lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, i64 %inc) nounwind {
+;CHECK-LABEL: test_v4i16_post_reg_st2lane:
+;CHECK: st2.h { v0, v1 }[0], [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A)
+  %tmp = getelementptr i16* %A, i64 %inc
+  ret i16* %tmp
+}
+
+declare void @llvm.aarch64.neon.st2lane.v4i16.p0i16(<4 x i16>, <4 x i16>, i64, i16*)
+
+
+define i32* @test_v4i32_post_imm_st2lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
+;CHECK-LABEL: test_v4i32_post_imm_st2lane:
+;CHECK: st2.s { v0, v1 }[0], [x0], #8
+  call void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A)
+  %tmp = getelementptr i32* %A, i32 2
+  ret i32* %tmp
+}
+
+define i32* @test_v4i32_post_reg_st2lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, i64 %inc) nounwind {
+;CHECK-LABEL: test_v4i32_post_reg_st2lane:
+;CHECK: st2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A)
+  %tmp = getelementptr i32* %A, i64 %inc
+  ret i32* %tmp
+}
+
+declare void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*)
+
+
+define i32* @test_v2i32_post_imm_st2lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
+;CHECK-LABEL: test_v2i32_post_imm_st2lane:
+;CHECK: st2.s { v0, v1 }[0], [x0], #8
+  call void @llvm.aarch64.neon.st2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A)
+  %tmp = getelementptr i32* %A, i32 2
+  ret i32* %tmp
+}
+
+define i32* @test_v2i32_post_reg_st2lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, i64 %inc) nounwind {
+;CHECK-LABEL: test_v2i32_post_reg_st2lane:
+;CHECK: st2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A)
+  %tmp = getelementptr i32* %A, i64 %inc
+  ret i32* %tmp
+}
+
+declare void @llvm.aarch64.neon.st2lane.v2i32.p0i32(<2 x i32>, <2 x i32>, i64, i32*)
+
+
+define i64* @test_v2i64_post_imm_st2lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
+;CHECK-LABEL: test_v2i64_post_imm_st2lane:
+;CHECK: st2.d { v0, v1 }[0], [x0], #16
+  call void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A)
+  %tmp = getelementptr i64* %A, i64 2
+  ret i64* %tmp
+}
+
+define i64* @test_v2i64_post_reg_st2lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, i64 %inc) nounwind {
+;CHECK-LABEL: test_v2i64_post_reg_st2lane:
+;CHECK: st2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A)
+  %tmp = getelementptr i64* %A, i64 %inc
+  ret i64* %tmp
+}
+
+declare void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*)
+
+
+define i64* @test_v1i64_post_imm_st2lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
+;CHECK-LABEL: test_v1i64_post_imm_st2lane:
+;CHECK: st2.d { v0, v1 }[0], [x0], #16
+  call void @llvm.aarch64.neon.st2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A)
+  %tmp = getelementptr i64* %A, i64 2
+  ret i64* %tmp
+}
+
+define i64* @test_v1i64_post_reg_st2lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, i64 %inc) nounwind {
+;CHECK-LABEL: test_v1i64_post_reg_st2lane:
+;CHECK: st2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A)
+  %tmp = getelementptr i64* %A, i64 %inc
+  ret i64* %tmp
+}
+
+declare void @llvm.aarch64.neon.st2lane.v1i64.p0i64(<1 x i64>, <1 x i64>, i64, i64*)
+
+
+define float* @test_v4f32_post_imm_st2lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C) nounwind {
+;CHECK-LABEL: test_v4f32_post_imm_st2lane:
+;CHECK: st2.s { v0, v1 }[0], [x0], #8
+  call void @llvm.aarch64.neon.st2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A)
+  %tmp = getelementptr float* %A, i32 2
+  ret float* %tmp
+}
+
+define float* @test_v4f32_post_reg_st2lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, i64 %inc) nounwind {
+;CHECK-LABEL: test_v4f32_post_reg_st2lane:
+;CHECK: st2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A)
+  %tmp = getelementptr float* %A, i64 %inc
+  ret float* %tmp
+}
+
+declare void @llvm.aarch64.neon.st2lane.v4f32.p0f32(<4 x float>, <4 x float>, i64, float*)
+
+
+define float* @test_v2f32_post_imm_st2lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C) nounwind {
+;CHECK-LABEL: test_v2f32_post_imm_st2lane:
+;CHECK: st2.s { v0, v1 }[0], [x0], #8
+  call void @llvm.aarch64.neon.st2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A)
+  %tmp = getelementptr float* %A, i32 2
+  ret float* %tmp
+}
+
+define float* @test_v2f32_post_reg_st2lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, i64 %inc) nounwind {
+;CHECK-LABEL: test_v2f32_post_reg_st2lane:
+;CHECK: st2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A)
+  %tmp = getelementptr float* %A, i64 %inc
+  ret float* %tmp
+}
+
+declare void @llvm.aarch64.neon.st2lane.v2f32.p0f32(<2 x float>, <2 x float>, i64, float*)
+
+
+define double* @test_v2f64_post_imm_st2lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C) nounwind {
+;CHECK-LABEL: test_v2f64_post_imm_st2lane:
+;CHECK: st2.d { v0, v1 }[0], [x0], #16
+  call void @llvm.aarch64.neon.st2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A)
+  %tmp = getelementptr double* %A, i64 2
+  ret double* %tmp
+}
+
+define double* @test_v2f64_post_reg_st2lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, i64 %inc) nounwind {
+;CHECK-LABEL: test_v2f64_post_reg_st2lane:
+;CHECK: st2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A)
+  %tmp = getelementptr double* %A, i64 %inc
+  ret double* %tmp
+}
+
+declare void @llvm.aarch64.neon.st2lane.v2f64.p0f64(<2 x double>, <2 x double>, i64, double*)
+
+
+define double* @test_v1f64_post_imm_st2lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C) nounwind {
+;CHECK-LABEL: test_v1f64_post_imm_st2lane:
+;CHECK: st2.d { v0, v1 }[0], [x0], #16
+  call void @llvm.aarch64.neon.st2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A)
+  %tmp = getelementptr double* %A, i64 2
+  ret double* %tmp
+}
+
+define double* @test_v1f64_post_reg_st2lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, i64 %inc) nounwind {
+;CHECK-LABEL: test_v1f64_post_reg_st2lane:
+;CHECK: st2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A)
+  %tmp = getelementptr double* %A, i64 %inc
+  ret double* %tmp
+}
+
+declare void @llvm.aarch64.neon.st2lane.v1f64.p0f64(<1 x double>, <1 x double>, i64, double*)
+
+
+define i8* @test_v16i8_post_imm_st3lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
+;CHECK-LABEL: test_v16i8_post_imm_st3lane:
+;CHECK: st3.b { v0, v1, v2 }[0], [x0], #3
+  call void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A)
+  %tmp = getelementptr i8* %A, i32 3
+  ret i8* %tmp
+}
+
+define i8* @test_v16i8_post_reg_st3lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind {
+;CHECK-LABEL: test_v16i8_post_reg_st3lane:
+;CHECK: st3.b { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A)
+  %tmp = getelementptr i8* %A, i64 %inc
+  ret i8* %tmp
+}
+
+declare void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i64, i8*)
+
+
+define i8* @test_v8i8_post_imm_st3lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
+;CHECK-LABEL: test_v8i8_post_imm_st3lane:
+;CHECK: st3.b { v0, v1, v2 }[0], [x0], #3
+  call void @llvm.aarch64.neon.st3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A)
+  %tmp = getelementptr i8* %A, i32 3
+  ret i8* %tmp
+}
+
+define i8* @test_v8i8_post_reg_st3lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind {
+;CHECK-LABEL: test_v8i8_post_reg_st3lane:
+;CHECK: st3.b { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A)
+  %tmp = getelementptr i8* %A, i64 %inc
+  ret i8* %tmp
+}
+
+declare void @llvm.aarch64.neon.st3lane.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i64, i8*)
+
+
+define i16* @test_v8i16_post_imm_st3lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
+;CHECK-LABEL: test_v8i16_post_imm_st3lane:
+;CHECK: st3.h { v0, v1, v2 }[0], [x0], #6
+  call void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A)
+  %tmp = getelementptr i16* %A, i32 3
+  ret i16* %tmp
+}
+
+define i16* @test_v8i16_post_reg_st3lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind {
+;CHECK-LABEL: test_v8i16_post_reg_st3lane:
+;CHECK: st3.h { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A)
+  %tmp = getelementptr i16* %A, i64 %inc
+  ret i16* %tmp
+}
+
+declare void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i64, i16*)
+
+
+define i16* @test_v4i16_post_imm_st3lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
+;CHECK-LABEL: test_v4i16_post_imm_st3lane:
+;CHECK: st3.h { v0, v1, v2 }[0], [x0], #6
+  call void @llvm.aarch64.neon.st3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A)
+  %tmp = getelementptr i16* %A, i32 3
+  ret i16* %tmp
+}
+
+define i16* @test_v4i16_post_reg_st3lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind {
+;CHECK-LABEL: test_v4i16_post_reg_st3lane:
+;CHECK: st3.h { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A)
+  %tmp = getelementptr i16* %A, i64 %inc
+  ret i16* %tmp
+}
+
+declare void @llvm.aarch64.neon.st3lane.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i64, i16*)
+
+
+define i32* @test_v4i32_post_imm_st3lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
+;CHECK-LABEL: test_v4i32_post_imm_st3lane:
+;CHECK: st3.s { v0, v1, v2 }[0], [x0], #12
+  call void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A)
+  %tmp = getelementptr i32* %A, i32 3
+  ret i32* %tmp
+}
+
+define i32* @test_v4i32_post_reg_st3lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind {
+;CHECK-LABEL: test_v4i32_post_reg_st3lane:
+;CHECK: st3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A)
+  %tmp = getelementptr i32* %A, i64 %inc
+  ret i32* %tmp
+}
+
+declare void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*)
+
+
+define i32* @test_v2i32_post_imm_st3lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
+;CHECK-LABEL: test_v2i32_post_imm_st3lane:
+;CHECK: st3.s { v0, v1, v2 }[0], [x0], #12
+  call void @llvm.aarch64.neon.st3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A)
+  %tmp = getelementptr i32* %A, i32 3
+  ret i32* %tmp
+}
+
+define i32* @test_v2i32_post_reg_st3lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind {
+;CHECK-LABEL: test_v2i32_post_reg_st3lane:
+;CHECK: st3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A)
+  %tmp = getelementptr i32* %A, i64 %inc
+  ret i32* %tmp
+}
+
+declare void @llvm.aarch64.neon.st3lane.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i64, i32*)
+
+
+define i64* @test_v2i64_post_imm_st3lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
+;CHECK-LABEL: test_v2i64_post_imm_st3lane:
+;CHECK: st3.d { v0, v1, v2 }[0], [x0], #24
+  call void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A)
+  %tmp = getelementptr i64* %A, i64 3
+  ret i64* %tmp
+}
+
+define i64* @test_v2i64_post_reg_st3lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind {
+;CHECK-LABEL: test_v2i64_post_reg_st3lane:
+;CHECK: st3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A)
+  %tmp = getelementptr i64* %A, i64 %inc
+  ret i64* %tmp
+}
+
+declare void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*)
+
+
+define i64* @test_v1i64_post_imm_st3lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
+;CHECK-LABEL: test_v1i64_post_imm_st3lane:
+;CHECK: st3.d { v0, v1, v2 }[0], [x0], #24
+  call void @llvm.aarch64.neon.st3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A)
+  %tmp = getelementptr i64* %A, i64 3
+  ret i64* %tmp
+}
+
+define i64* @test_v1i64_post_reg_st3lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind {
+;CHECK-LABEL: test_v1i64_post_reg_st3lane:
+;CHECK: st3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A)
+  %tmp = getelementptr i64* %A, i64 %inc
+  ret i64* %tmp
+}
+
+declare void @llvm.aarch64.neon.st3lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64, i64*)
+
+
+define float* @test_v4f32_post_imm_st3lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
+;CHECK-LABEL: test_v4f32_post_imm_st3lane:
+;CHECK: st3.s { v0, v1, v2 }[0], [x0], #12
+  call void @llvm.aarch64.neon.st3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A)
+  %tmp = getelementptr float* %A, i32 3
+  ret float* %tmp
+}
+
+define float* @test_v4f32_post_reg_st3lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind {
+;CHECK-LABEL: test_v4f32_post_reg_st3lane:
+;CHECK: st3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A)
+  %tmp = getelementptr float* %A, i64 %inc
+  ret float* %tmp
+}
+
+declare void @llvm.aarch64.neon.st3lane.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, i64, float*)
+
+
+define float* @test_v2f32_post_imm_st3lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
+;CHECK-LABEL: test_v2f32_post_imm_st3lane:
+;CHECK: st3.s { v0, v1, v2 }[0], [x0], #12
+  call void @llvm.aarch64.neon.st3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A)
+  %tmp = getelementptr float* %A, i32 3
+  ret float* %tmp
+}
+
+define float* @test_v2f32_post_reg_st3lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind {
+;CHECK-LABEL: test_v2f32_post_reg_st3lane:
+;CHECK: st3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A)
+  %tmp = getelementptr float* %A, i64 %inc
+  ret float* %tmp
+}
+
+declare void @llvm.aarch64.neon.st3lane.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, i64, float*)
+
+
+define double* @test_v2f64_post_imm_st3lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
+;CHECK-LABEL: test_v2f64_post_imm_st3lane:
+;CHECK: st3.d { v0, v1, v2 }[0], [x0], #24
+  call void @llvm.aarch64.neon.st3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A)
+  %tmp = getelementptr double* %A, i64 3
+  ret double* %tmp
+}
+
+define double* @test_v2f64_post_reg_st3lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind {
+;CHECK-LABEL: test_v2f64_post_reg_st3lane:
+;CHECK: st3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A)
+  %tmp = getelementptr double* %A, i64 %inc
+  ret double* %tmp
+}
+
+declare void @llvm.aarch64.neon.st3lane.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, i64, double*)
+
+
+define double* @test_v1f64_post_imm_st3lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
+;CHECK-LABEL: test_v1f64_post_imm_st3lane:
+;CHECK: st3.d { v0, v1, v2 }[0], [x0], #24
+  call void @llvm.aarch64.neon.st3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A)
+  %tmp = getelementptr double* %A, i64 3
+  ret double* %tmp
+}
+
+define double* @test_v1f64_post_reg_st3lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind {
+;CHECK-LABEL: test_v1f64_post_reg_st3lane:
+;CHECK: st3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A)
+  %tmp = getelementptr double* %A, i64 %inc
+  ret double* %tmp
+}
+
+declare void @llvm.aarch64.neon.st3lane.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, i64, double*)
+
+
+define i8* @test_v16i8_post_imm_st4lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
+;CHECK-LABEL: test_v16i8_post_imm_st4lane:
+;CHECK: st4.b { v0, v1, v2, v3 }[0], [x0], #4
+  call void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A)
+  %tmp = getelementptr i8* %A, i32 4
+  ret i8* %tmp
+}
+
+define i8* @test_v16i8_post_reg_st4lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind {
+;CHECK-LABEL: test_v16i8_post_reg_st4lane:
+;CHECK: st4.b { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A)
+  %tmp = getelementptr i8* %A, i64 %inc
+  ret i8* %tmp
+}
+
+declare void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, i8*)
+
+
+define i8* @test_v8i8_post_imm_st4lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
+;CHECK-LABEL: test_v8i8_post_imm_st4lane:
+;CHECK: st4.b { v0, v1, v2, v3 }[0], [x0], #4
+  call void @llvm.aarch64.neon.st4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A)
+  %tmp = getelementptr i8* %A, i32 4
+  ret i8* %tmp
+}
+
+define i8* @test_v8i8_post_reg_st4lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind {
+;CHECK-LABEL: test_v8i8_post_reg_st4lane:
+;CHECK: st4.b { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A)
+  %tmp = getelementptr i8* %A, i64 %inc
+  ret i8* %tmp
+}
+
+declare void @llvm.aarch64.neon.st4lane.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i64, i8*)
+
+
+define i16* @test_v8i16_post_imm_st4lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
+;CHECK-LABEL: test_v8i16_post_imm_st4lane:
+;CHECK: st4.h { v0, v1, v2, v3 }[0], [x0], #8
+  call void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A)
+  %tmp = getelementptr i16* %A, i32 4
+  ret i16* %tmp
+}
+
+define i16* @test_v8i16_post_reg_st4lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind {
+;CHECK-LABEL: test_v8i16_post_reg_st4lane:
+;CHECK: st4.h { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A)
+  %tmp = getelementptr i16* %A, i64 %inc
+  ret i16* %tmp
+}
+
+declare void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, i16*)
+
+
+define i16* @test_v4i16_post_imm_st4lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
+;CHECK-LABEL: test_v4i16_post_imm_st4lane:
+;CHECK: st4.h { v0, v1, v2, v3 }[0], [x0], #8
+  call void @llvm.aarch64.neon.st4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A)
+  %tmp = getelementptr i16* %A, i32 4
+  ret i16* %tmp
+}
+
+define i16* @test_v4i16_post_reg_st4lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind {
+;CHECK-LABEL: test_v4i16_post_reg_st4lane:
+;CHECK: st4.h { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A)
+  %tmp = getelementptr i16* %A, i64 %inc
+  ret i16* %tmp
+}
+
+declare void @llvm.aarch64.neon.st4lane.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i64, i16*)
+
+
+define i32* @test_v4i32_post_imm_st4lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
+;CHECK-LABEL: test_v4i32_post_imm_st4lane:
+;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], #16
+  call void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A)
+  %tmp = getelementptr i32* %A, i32 4
+  ret i32* %tmp
+}
+
+define i32* @test_v4i32_post_reg_st4lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind {
+;CHECK-LABEL: test_v4i32_post_reg_st4lane:
+;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A)
+  %tmp = getelementptr i32* %A, i64 %inc
+  ret i32* %tmp
+}
+
+declare void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*)
+
+
+define i32* @test_v2i32_post_imm_st4lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
+;CHECK-LABEL: test_v2i32_post_imm_st4lane:
+;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], #16
+  call void @llvm.aarch64.neon.st4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A)
+  %tmp = getelementptr i32* %A, i32 4
+  ret i32* %tmp
+}
+
+define i32* @test_v2i32_post_reg_st4lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind {
+;CHECK-LABEL: test_v2i32_post_reg_st4lane:
+;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A)
+  %tmp = getelementptr i32* %A, i64 %inc
+  ret i32* %tmp
+}
+
+declare void @llvm.aarch64.neon.st4lane.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i64, i32*)
+
+
+define i64* @test_v2i64_post_imm_st4lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
+;CHECK-LABEL: test_v2i64_post_imm_st4lane:
+;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], #32
+  call void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A)
+  %tmp = getelementptr i64* %A, i64 4
+  ret i64* %tmp
+}
+
+define i64* @test_v2i64_post_reg_st4lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind {
+;CHECK-LABEL: test_v2i64_post_reg_st4lane:
+;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A)
+  %tmp = getelementptr i64* %A, i64 %inc
+  ret i64* %tmp
+}
+
+declare void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, i64*)
+
+
+define i64* @test_v1i64_post_imm_st4lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
+;CHECK-LABEL: test_v1i64_post_imm_st4lane:
+;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], #32
+  call void @llvm.aarch64.neon.st4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A)
+  %tmp = getelementptr i64* %A, i64 4
+  ret i64* %tmp
+}
+
+define i64* @test_v1i64_post_reg_st4lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind {
+;CHECK-LABEL: test_v1i64_post_reg_st4lane:
+;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A)
+  %tmp = getelementptr i64* %A, i64 %inc
+  ret i64* %tmp
+}
+
+declare void @llvm.aarch64.neon.st4lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64, i64*)
+
+
+define float* @test_v4f32_post_imm_st4lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
+;CHECK-LABEL: test_v4f32_post_imm_st4lane:
+;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], #16
+  call void @llvm.aarch64.neon.st4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A)
+  %tmp = getelementptr float* %A, i32 4
+  ret float* %tmp
+}
+
+define float* @test_v4f32_post_reg_st4lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind {
+;CHECK-LABEL: test_v4f32_post_reg_st4lane:
+;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A)
+  %tmp = getelementptr float* %A, i64 %inc
+  ret float* %tmp
+}
+
+declare void @llvm.aarch64.neon.st4lane.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, i64, float*)
+
+
+define float* @test_v2f32_post_imm_st4lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
+;CHECK-LABEL: test_v2f32_post_imm_st4lane:
+;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], #16
+  call void @llvm.aarch64.neon.st4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A)
+  %tmp = getelementptr float* %A, i32 4
+  ret float* %tmp
+}
+
+define float* @test_v2f32_post_reg_st4lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind {
+;CHECK-LABEL: test_v2f32_post_reg_st4lane:
+;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A)
+  %tmp = getelementptr float* %A, i64 %inc
+  ret float* %tmp
+}
+
+declare void @llvm.aarch64.neon.st4lane.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, i64, float*)
+
+
+define double* @test_v2f64_post_imm_st4lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
+;CHECK-LABEL: test_v2f64_post_imm_st4lane:
+;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], #32
+  call void @llvm.aarch64.neon.st4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A)
+  %tmp = getelementptr double* %A, i64 4
+  ret double* %tmp
+}
+
+define double* @test_v2f64_post_reg_st4lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind {
+;CHECK-LABEL: test_v2f64_post_reg_st4lane:
+;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A)
+  %tmp = getelementptr double* %A, i64 %inc
+  ret double* %tmp
+}
+
+declare void @llvm.aarch64.neon.st4lane.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, <2 x double>, i64, double*)
+
+
+define double* @test_v1f64_post_imm_st4lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
+;CHECK-LABEL: test_v1f64_post_imm_st4lane:
+;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], #32
+  call void @llvm.aarch64.neon.st4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A)
+  %tmp = getelementptr double* %A, i64 4
+  ret double* %tmp
+}
+
+define double* @test_v1f64_post_reg_st4lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind {
+;CHECK-LABEL: test_v1f64_post_reg_st4lane:
+;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
+  call void @llvm.aarch64.neon.st4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A)
+  %tmp = getelementptr double* %A, i64 %inc
+  ret double* %tmp
+}
+
+declare void @llvm.aarch64.neon.st4lane.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, i64, double*)
+
+define <16 x i8> @test_v16i8_post_imm_ld1r(i8* %bar, i8** %ptr) {
+; CHECK-LABEL: test_v16i8_post_imm_ld1r:
+; CHECK: ld1r.16b { v0 }, [x0], #1
+  %tmp1 = load i8* %bar
+  %tmp2 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
+  %tmp3 = insertelement <16 x i8> %tmp2, i8 %tmp1, i32 1
+  %tmp4 = insertelement <16 x i8> %tmp3, i8 %tmp1, i32 2
+  %tmp5 = insertelement <16 x i8> %tmp4, i8 %tmp1, i32 3
+  %tmp6 = insertelement <16 x i8> %tmp5, i8 %tmp1, i32 4
+  %tmp7 = insertelement <16 x i8> %tmp6, i8 %tmp1, i32 5
+  %tmp8 = insertelement <16 x i8> %tmp7, i8 %tmp1, i32 6
+  %tmp9 = insertelement <16 x i8> %tmp8, i8 %tmp1, i32 7
+  %tmp10 = insertelement <16 x i8> %tmp9, i8 %tmp1, i32 8
+  %tmp11 = insertelement <16 x i8> %tmp10, i8 %tmp1, i32 9
+  %tmp12 = insertelement <16 x i8> %tmp11, i8 %tmp1, i32 10
+  %tmp13 = insertelement <16 x i8> %tmp12, i8 %tmp1, i32 11
+  %tmp14 = insertelement <16 x i8> %tmp13, i8 %tmp1, i32 12
+  %tmp15 = insertelement <16 x i8> %tmp14, i8 %tmp1, i32 13
+  %tmp16 = insertelement <16 x i8> %tmp15, i8 %tmp1, i32 14
+  %tmp17 = insertelement <16 x i8> %tmp16, i8 %tmp1, i32 15
+  %tmp18 = getelementptr i8* %bar, i64 1
+  store i8* %tmp18, i8** %ptr
+  ret <16 x i8> %tmp17
+}
+
+define <16 x i8> @test_v16i8_post_reg_ld1r(i8* %bar, i8** %ptr, i64 %inc) {
+; CHECK-LABEL: test_v16i8_post_reg_ld1r:
+; CHECK: ld1r.16b { v0 }, [x0], x{{[0-9]+}}
+  %tmp1 = load i8* %bar
+  %tmp2 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
+  %tmp3 = insertelement <16 x i8> %tmp2, i8 %tmp1, i32 1
+  %tmp4 = insertelement <16 x i8> %tmp3, i8 %tmp1, i32 2
+  %tmp5 = insertelement <16 x i8> %tmp4, i8 %tmp1, i32 3
+  %tmp6 = insertelement <16 x i8> %tmp5, i8 %tmp1, i32 4
+  %tmp7 = insertelement <16 x i8> %tmp6, i8 %tmp1, i32 5
+  %tmp8 = insertelement <16 x i8> %tmp7, i8 %tmp1, i32 6
+  %tmp9 = insertelement <16 x i8> %tmp8, i8 %tmp1, i32 7
+  %tmp10 = insertelement <16 x i8> %tmp9, i8 %tmp1, i32 8
+  %tmp11 = insertelement <16 x i8> %tmp10, i8 %tmp1, i32 9
+  %tmp12 = insertelement <16 x i8> %tmp11, i8 %tmp1, i32 10
+  %tmp13 = insertelement <16 x i8> %tmp12, i8 %tmp1, i32 11
+  %tmp14 = insertelement <16 x i8> %tmp13, i8 %tmp1, i32 12
+  %tmp15 = insertelement <16 x i8> %tmp14, i8 %tmp1, i32 13
+  %tmp16 = insertelement <16 x i8> %tmp15, i8 %tmp1, i32 14
+  %tmp17 = insertelement <16 x i8> %tmp16, i8 %tmp1, i32 15
+  %tmp18 = getelementptr i8* %bar, i64 %inc
+  store i8* %tmp18, i8** %ptr
+  ret <16 x i8> %tmp17
+}
+
+define <8 x i8> @test_v8i8_post_imm_ld1r(i8* %bar, i8** %ptr) {
+; CHECK-LABEL: test_v8i8_post_imm_ld1r:
+; CHECK: ld1r.8b { v0 }, [x0], #1
+  %tmp1 = load i8* %bar
+  %tmp2 = insertelement <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
+  %tmp3 = insertelement <8 x i8> %tmp2, i8 %tmp1, i32 1
+  %tmp4 = insertelement <8 x i8> %tmp3, i8 %tmp1, i32 2
+  %tmp5 = insertelement <8 x i8> %tmp4, i8 %tmp1, i32 3
+  %tmp6 = insertelement <8 x i8> %tmp5, i8 %tmp1, i32 4
+  %tmp7 = insertelement <8 x i8> %tmp6, i8 %tmp1, i32 5
+  %tmp8 = insertelement <8 x i8> %tmp7, i8 %tmp1, i32 6
+  %tmp9 = insertelement <8 x i8> %tmp8, i8 %tmp1, i32 7
+  %tmp10 = getelementptr i8* %bar, i64 1
+  store i8* %tmp10, i8** %ptr
+  ret <8 x i8> %tmp9
+}
+
+define <8 x i8> @test_v8i8_post_reg_ld1r(i8* %bar, i8** %ptr, i64 %inc) {
+; CHECK-LABEL: test_v8i8_post_reg_ld1r:
+; CHECK: ld1r.8b { v0 }, [x0], x{{[0-9]+}}
+  %tmp1 = load i8* %bar
+  %tmp2 = insertelement <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
+  %tmp3 = insertelement <8 x i8> %tmp2, i8 %tmp1, i32 1
+  %tmp4 = insertelement <8 x i8> %tmp3, i8 %tmp1, i32 2
+  %tmp5 = insertelement <8 x i8> %tmp4, i8 %tmp1, i32 3
+  %tmp6 = insertelement <8 x i8> %tmp5, i8 %tmp1, i32 4
+  %tmp7 = insertelement <8 x i8> %tmp6, i8 %tmp1, i32 5
+  %tmp8 = insertelement <8 x i8> %tmp7, i8 %tmp1, i32 6
+  %tmp9 = insertelement <8 x i8> %tmp8, i8 %tmp1, i32 7
+  %tmp10 = getelementptr i8* %bar, i64 %inc
+  store i8* %tmp10, i8** %ptr
+  ret <8 x i8> %tmp9
+}
+
+define <8 x i16> @test_v8i16_post_imm_ld1r(i16* %bar, i16** %ptr) {
+; CHECK-LABEL: test_v8i16_post_imm_ld1r:
+; CHECK: ld1r.8h { v0 }, [x0], #2
+  %tmp1 = load i16* %bar
+  %tmp2 = insertelement <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
+  %tmp3 = insertelement <8 x i16> %tmp2, i16 %tmp1, i32 1
+  %tmp4 = insertelement <8 x i16> %tmp3, i16 %tmp1, i32 2
+  %tmp5 = insertelement <8 x i16> %tmp4, i16 %tmp1, i32 3
+  %tmp6 = insertelement <8 x i16> %tmp5, i16 %tmp1, i32 4
+  %tmp7 = insertelement <8 x i16> %tmp6, i16 %tmp1, i32 5
+  %tmp8 = insertelement <8 x i16> %tmp7, i16 %tmp1, i32 6
+  %tmp9 = insertelement <8 x i16> %tmp8, i16 %tmp1, i32 7
+  %tmp10 = getelementptr i16* %bar, i64 1
+  store i16* %tmp10, i16** %ptr
+  ret <8 x i16> %tmp9
+}
+
+define <8 x i16> @test_v8i16_post_reg_ld1r(i16* %bar, i16** %ptr, i64 %inc) {
+; CHECK-LABEL: test_v8i16_post_reg_ld1r:
+; CHECK: ld1r.8h { v0 }, [x0], x{{[0-9]+}}
+  %tmp1 = load i16* %bar
+  %tmp2 = insertelement <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
+  %tmp3 = insertelement <8 x i16> %tmp2, i16 %tmp1, i32 1
+  %tmp4 = insertelement <8 x i16> %tmp3, i16 %tmp1, i32 2
+  %tmp5 = insertelement <8 x i16> %tmp4, i16 %tmp1, i32 3
+  %tmp6 = insertelement <8 x i16> %tmp5, i16 %tmp1, i32 4
+  %tmp7 = insertelement <8 x i16> %tmp6, i16 %tmp1, i32 5
+  %tmp8 = insertelement <8 x i16> %tmp7, i16 %tmp1, i32 6
+  %tmp9 = insertelement <8 x i16> %tmp8, i16 %tmp1, i32 7
+  %tmp10 = getelementptr i16* %bar, i64 %inc
+  store i16* %tmp10, i16** %ptr
+  ret <8 x i16> %tmp9
+}
+
+define <4 x i16> @test_v4i16_post_imm_ld1r(i16* %bar, i16** %ptr) {
+; CHECK-LABEL: test_v4i16_post_imm_ld1r:
+; CHECK: ld1r.4h { v0 }, [x0], #2
+  %tmp1 = load i16* %bar
+  %tmp2 = insertelement <4 x i16> <i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
+  %tmp3 = insertelement <4 x i16> %tmp2, i16 %tmp1, i32 1
+  %tmp4 = insertelement <4 x i16> %tmp3, i16 %tmp1, i32 2
+  %tmp5 = insertelement <4 x i16> %tmp4, i16 %tmp1, i32 3
+  %tmp6 = getelementptr i16* %bar, i64 1
+  store i16* %tmp6, i16** %ptr
+  ret <4 x i16> %tmp5
+}
+
+define <4 x i16> @test_v4i16_post_reg_ld1r(i16* %bar, i16** %ptr, i64 %inc) {
+; CHECK-LABEL: test_v4i16_post_reg_ld1r:
+; CHECK: ld1r.4h { v0 }, [x0], x{{[0-9]+}}
+  %tmp1 = load i16* %bar
+  %tmp2 = insertelement <4 x i16> <i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
+  %tmp3 = insertelement <4 x i16> %tmp2, i16 %tmp1, i32 1
+  %tmp4 = insertelement <4 x i16> %tmp3, i16 %tmp1, i32 2
+  %tmp5 = insertelement <4 x i16> %tmp4, i16 %tmp1, i32 3
+  %tmp6 = getelementptr i16* %bar, i64 %inc
+  store i16* %tmp6, i16** %ptr
+  ret <4 x i16> %tmp5
+}
+
+define <4 x i32> @test_v4i32_post_imm_ld1r(i32* %bar, i32** %ptr) {
+; CHECK-LABEL: test_v4i32_post_imm_ld1r:
+; CHECK: ld1r.4s { v0 }, [x0], #4
+  %tmp1 = load i32* %bar
+  %tmp2 = insertelement <4 x i32> <i32 undef, i32 undef, i32 undef, i32 undef>, i32 %tmp1, i32 0
+  %tmp3 = insertelement <4 x i32> %tmp2, i32 %tmp1, i32 1
+  %tmp4 = insertelement <4 x i32> %tmp3, i32 %tmp1, i32 2
+  %tmp5 = insertelement <4 x i32> %tmp4, i32 %tmp1, i32 3
+  %tmp6 = getelementptr i32* %bar, i64 1
+  store i32* %tmp6, i32** %ptr
+  ret <4 x i32> %tmp5
+}
+
+define <4 x i32> @test_v4i32_post_reg_ld1r(i32* %bar, i32** %ptr, i64 %inc) {
+; CHECK-LABEL: test_v4i32_post_reg_ld1r:
+; CHECK: ld1r.4s { v0 }, [x0], x{{[0-9]+}}
+  %tmp1 = load i32* %bar
+  %tmp2 = insertelement <4 x i32> <i32 undef, i32 undef, i32 undef, i32 undef>, i32 %tmp1, i32 0
+  %tmp3 = insertelement <4 x i32> %tmp2, i32 %tmp1, i32 1
+  %tmp4 = insertelement <4 x i32> %tmp3, i32 %tmp1, i32 2
+  %tmp5 = insertelement <4 x i32> %tmp4, i32 %tmp1, i32 3
+  %tmp6 = getelementptr i32* %bar, i64 %inc
+  store i32* %tmp6, i32** %ptr
+  ret <4 x i32> %tmp5
+}
+
+define <2 x i32> @test_v2i32_post_imm_ld1r(i32* %bar, i32** %ptr) {
+; CHECK-LABEL: test_v2i32_post_imm_ld1r:
+; CHECK: ld1r.2s { v0 }, [x0], #4
+  %tmp1 = load i32* %bar
+  %tmp2 = insertelement <2 x i32> <i32 undef, i32 undef>, i32 %tmp1, i32 0
+  %tmp3 = insertelement <2 x i32> %tmp2, i32 %tmp1, i32 1
+  %tmp4 = getelementptr i32* %bar, i64 1
+  store i32* %tmp4, i32** %ptr
+  ret <2 x i32> %tmp3
+}
+
+define <2 x i32> @test_v2i32_post_reg_ld1r(i32* %bar, i32** %ptr, i64 %inc) {
+; CHECK-LABEL: test_v2i32_post_reg_ld1r:
+; CHECK: ld1r.2s { v0 }, [x0], x{{[0-9]+}}
+  %tmp1 = load i32* %bar
+  %tmp2 = insertelement <2 x i32> <i32 undef, i32 undef>, i32 %tmp1, i32 0
+  %tmp3 = insertelement <2 x i32> %tmp2, i32 %tmp1, i32 1
+  %tmp4 = getelementptr i32* %bar, i64 %inc
+  store i32* %tmp4, i32** %ptr
+  ret <2 x i32> %tmp3
+}
+
+define <2 x i64> @test_v2i64_post_imm_ld1r(i64* %bar, i64** %ptr) {
+; CHECK-LABEL: test_v2i64_post_imm_ld1r:
+; CHECK: ld1r.2d { v0 }, [x0], #8
+  %tmp1 = load i64* %bar
+  %tmp2 = insertelement <2 x i64> <i64 undef, i64 undef>, i64 %tmp1, i32 0
+  %tmp3 = insertelement <2 x i64> %tmp2, i64 %tmp1, i32 1
+  %tmp4 = getelementptr i64* %bar, i64 1
+  store i64* %tmp4, i64** %ptr
+  ret <2 x i64> %tmp3
+}
+
+define <2 x i64> @test_v2i64_post_reg_ld1r(i64* %bar, i64** %ptr, i64 %inc) {
+; CHECK-LABEL: test_v2i64_post_reg_ld1r:
+; CHECK: ld1r.2d { v0 }, [x0], x{{[0-9]+}}
+  %tmp1 = load i64* %bar
+  %tmp2 = insertelement <2 x i64> <i64 undef, i64 undef>, i64 %tmp1, i32 0
+  %tmp3 = insertelement <2 x i64> %tmp2, i64 %tmp1, i32 1
+  %tmp4 = getelementptr i64* %bar, i64 %inc
+  store i64* %tmp4, i64** %ptr
+  ret <2 x i64> %tmp3
+}
+
+define <4 x float> @test_v4f32_post_imm_ld1r(float* %bar, float** %ptr) {
+; CHECK-LABEL: test_v4f32_post_imm_ld1r:
+; CHECK: ld1r.4s { v0 }, [x0], #4
+  %tmp1 = load float* %bar
+  %tmp2 = insertelement <4 x float> <float undef, float undef, float undef, float undef>, float %tmp1, i32 0
+  %tmp3 = insertelement <4 x float> %tmp2, float %tmp1, i32 1
+  %tmp4 = insertelement <4 x float> %tmp3, float %tmp1, i32 2
+  %tmp5 = insertelement <4 x float> %tmp4, float %tmp1, i32 3
+  %tmp6 = getelementptr float* %bar, i64 1
+  store float* %tmp6, float** %ptr
+  ret <4 x float> %tmp5
+}
+
+define <4 x float> @test_v4f32_post_reg_ld1r(float* %bar, float** %ptr, i64 %inc) {
+; CHECK-LABEL: test_v4f32_post_reg_ld1r:
+; CHECK: ld1r.4s { v0 }, [x0], x{{[0-9]+}}
+  %tmp1 = load float* %bar
+  %tmp2 = insertelement <4 x float> <float undef, float undef, float undef, float undef>, float %tmp1, i32 0
+  %tmp3 = insertelement <4 x float> %tmp2, float %tmp1, i32 1
+  %tmp4 = insertelement <4 x float> %tmp3, float %tmp1, i32 2
+  %tmp5 = insertelement <4 x float> %tmp4, float %tmp1, i32 3
+  %tmp6 = getelementptr float* %bar, i64 %inc
+  store float* %tmp6, float** %ptr
+  ret <4 x float> %tmp5
+}
+
+define <2 x float> @test_v2f32_post_imm_ld1r(float* %bar, float** %ptr) {
+; CHECK-LABEL: test_v2f32_post_imm_ld1r:
+; CHECK: ld1r.2s { v0 }, [x0], #4
+  %tmp1 = load float* %bar
+  %tmp2 = insertelement <2 x float> <float undef, float undef>, float %tmp1, i32 0
+  %tmp3 = insertelement <2 x float> %tmp2, float %tmp1, i32 1
+  %tmp4 = getelementptr float* %bar, i64 1
+  store float* %tmp4, float** %ptr
+  ret <2 x float> %tmp3
+}
+
+define <2 x float> @test_v2f32_post_reg_ld1r(float* %bar, float** %ptr, i64 %inc) {
+; CHECK-LABEL: test_v2f32_post_reg_ld1r:
+; CHECK: ld1r.2s { v0 }, [x0], x{{[0-9]+}}
+  %tmp1 = load float* %bar
+  %tmp2 = insertelement <2 x float> <float undef, float undef>, float %tmp1, i32 0
+  %tmp3 = insertelement <2 x float> %tmp2, float %tmp1, i32 1
+  %tmp4 = getelementptr float* %bar, i64 %inc
+  store float* %tmp4, float** %ptr
+  ret <2 x float> %tmp3
+}
+
+define <2 x double> @test_v2f64_post_imm_ld1r(double* %bar, double** %ptr) {
+; CHECK-LABEL: test_v2f64_post_imm_ld1r:
+; CHECK: ld1r.2d { v0 }, [x0], #8
+  %tmp1 = load double* %bar
+  %tmp2 = insertelement <2 x double> <double undef, double undef>, double %tmp1, i32 0
+  %tmp3 = insertelement <2 x double> %tmp2, double %tmp1, i32 1
+  %tmp4 = getelementptr double* %bar, i64 1
+  store double* %tmp4, double** %ptr
+  ret <2 x double> %tmp3
+}
+
+define <2 x double> @test_v2f64_post_reg_ld1r(double* %bar, double** %ptr, i64 %inc) {
+; CHECK-LABEL: test_v2f64_post_reg_ld1r:
+; CHECK: ld1r.2d { v0 }, [x0], x{{[0-9]+}}
+  %tmp1 = load double* %bar
+  %tmp2 = insertelement <2 x double> <double undef, double undef>, double %tmp1, i32 0
+  %tmp3 = insertelement <2 x double> %tmp2, double %tmp1, i32 1
+  %tmp4 = getelementptr double* %bar, i64 %inc
+  store double* %tmp4, double** %ptr
+  ret <2 x double> %tmp3
+}
+
+define <16 x i8> @test_v16i8_post_imm_ld1lane(i8* %bar, i8** %ptr, <16 x i8> %A) {
+; CHECK-LABEL: test_v16i8_post_imm_ld1lane:
+; CHECK: ld1.b { v0 }[1], [x0], #1
+  %tmp1 = load i8* %bar
+  %tmp2 = insertelement <16 x i8> %A, i8 %tmp1, i32 1
+  %tmp3 = getelementptr i8* %bar, i64 1
+  store i8* %tmp3, i8** %ptr
+  ret <16 x i8> %tmp2
+}
+
+define <16 x i8> @test_v16i8_post_reg_ld1lane(i8* %bar, i8** %ptr, i64 %inc, <16 x i8> %A) {
+; CHECK-LABEL: test_v16i8_post_reg_ld1lane:
+; CHECK: ld1.b { v0 }[1], [x0], x{{[0-9]+}}
+  %tmp1 = load i8* %bar
+  %tmp2 = insertelement <16 x i8> %A, i8 %tmp1, i32 1
+  %tmp3 = getelementptr i8* %bar, i64 %inc
+  store i8* %tmp3, i8** %ptr
+  ret <16 x i8> %tmp2
+}
+
+define <8 x i8> @test_v8i8_post_imm_ld1lane(i8* %bar, i8** %ptr, <8 x i8> %A) {
+; CHECK-LABEL: test_v8i8_post_imm_ld1lane:
+; CHECK: ld1.b { v0 }[1], [x0], #1
+  %tmp1 = load i8* %bar
+  %tmp2 = insertelement <8 x i8> %A, i8 %tmp1, i32 1
+  %tmp3 = getelementptr i8* %bar, i64 1
+  store i8* %tmp3, i8** %ptr
+  ret <8 x i8> %tmp2
+}
+
+define <8 x i8> @test_v8i8_post_reg_ld1lane(i8* %bar, i8** %ptr, i64 %inc, <8 x i8> %A) {
+; CHECK-LABEL: test_v8i8_post_reg_ld1lane:
+; CHECK: ld1.b { v0 }[1], [x0], x{{[0-9]+}}
+  %tmp1 = load i8* %bar
+  %tmp2 = insertelement <8 x i8> %A, i8 %tmp1, i32 1
+  %tmp3 = getelementptr i8* %bar, i64 %inc
+  store i8* %tmp3, i8** %ptr
+  ret <8 x i8> %tmp2
+}
+
+define <8 x i16> @test_v8i16_post_imm_ld1lane(i16* %bar, i16** %ptr, <8 x i16> %A) {
+; CHECK-LABEL: test_v8i16_post_imm_ld1lane:
+; CHECK: ld1.h { v0 }[1], [x0], #2
+  %tmp1 = load i16* %bar
+  %tmp2 = insertelement <8 x i16> %A, i16 %tmp1, i32 1
+  %tmp3 = getelementptr i16* %bar, i64 1
+  store i16* %tmp3, i16** %ptr
+  ret <8 x i16> %tmp2
+}
+
+define <8 x i16> @test_v8i16_post_reg_ld1lane(i16* %bar, i16** %ptr, i64 %inc, <8 x i16> %A) {
+; CHECK-LABEL: test_v8i16_post_reg_ld1lane:
+; CHECK: ld1.h { v0 }[1], [x0], x{{[0-9]+}}
+  %tmp1 = load i16* %bar
+  %tmp2 = insertelement <8 x i16> %A, i16 %tmp1, i32 1
+  %tmp3 = getelementptr i16* %bar, i64 %inc
+  store i16* %tmp3, i16** %ptr
+  ret <8 x i16> %tmp2
+}
+
+define <4 x i16> @test_v4i16_post_imm_ld1lane(i16* %bar, i16** %ptr, <4 x i16> %A) {
+; CHECK-LABEL: test_v4i16_post_imm_ld1lane:
+; CHECK: ld1.h { v0 }[1], [x0], #2
+  %tmp1 = load i16* %bar
+  %tmp2 = insertelement <4 x i16> %A, i16 %tmp1, i32 1
+  %tmp3 = getelementptr i16* %bar, i64 1
+  store i16* %tmp3, i16** %ptr
+  ret <4 x i16> %tmp2
+}
+
+define <4 x i16> @test_v4i16_post_reg_ld1lane(i16* %bar, i16** %ptr, i64 %inc, <4 x i16> %A) {
+; CHECK-LABEL: test_v4i16_post_reg_ld1lane:
+; CHECK: ld1.h { v0 }[1], [x0], x{{[0-9]+}}
+  %tmp1 = load i16* %bar
+  %tmp2 = insertelement <4 x i16> %A, i16 %tmp1, i32 1
+  %tmp3 = getelementptr i16* %bar, i64 %inc
+  store i16* %tmp3, i16** %ptr
+  ret <4 x i16> %tmp2
+}
+
+define <4 x i32> @test_v4i32_post_imm_ld1lane(i32* %bar, i32** %ptr, <4 x i32> %A) {
+; CHECK-LABEL: test_v4i32_post_imm_ld1lane:
+; CHECK: ld1.s { v0 }[1], [x0], #4
+  %tmp1 = load i32* %bar
+  %tmp2 = insertelement <4 x i32> %A, i32 %tmp1, i32 1
+  %tmp3 = getelementptr i32* %bar, i64 1
+  store i32* %tmp3, i32** %ptr
+  ret <4 x i32> %tmp2
+}
+
+define <4 x i32> @test_v4i32_post_reg_ld1lane(i32* %bar, i32** %ptr, i64 %inc, <4 x i32> %A) {
+; CHECK-LABEL: test_v4i32_post_reg_ld1lane:
+; CHECK: ld1.s { v0 }[1], [x0], x{{[0-9]+}}
+  %tmp1 = load i32* %bar
+  %tmp2 = insertelement <4 x i32> %A, i32 %tmp1, i32 1
+  %tmp3 = getelementptr i32* %bar, i64 %inc
+  store i32* %tmp3, i32** %ptr
+  ret <4 x i32> %tmp2
+}
+
+define <2 x i32> @test_v2i32_post_imm_ld1lane(i32* %bar, i32** %ptr, <2 x i32> %A) {
+; CHECK-LABEL: test_v2i32_post_imm_ld1lane:
+; CHECK: ld1.s { v0 }[1], [x0], #4
+  %tmp1 = load i32* %bar
+  %tmp2 = insertelement <2 x i32> %A, i32 %tmp1, i32 1
+  %tmp3 = getelementptr i32* %bar, i64 1
+  store i32* %tmp3, i32** %ptr
+  ret <2 x i32> %tmp2
+}
+
+define <2 x i32> @test_v2i32_post_reg_ld1lane(i32* %bar, i32** %ptr, i64 %inc, <2 x i32> %A) {
+; CHECK-LABEL: test_v2i32_post_reg_ld1lane:
+; CHECK: ld1.s { v0 }[1], [x0], x{{[0-9]+}}
+  %tmp1 = load i32* %bar
+  %tmp2 = insertelement <2 x i32> %A, i32 %tmp1, i32 1
+  %tmp3 = getelementptr i32* %bar, i64 %inc
+  store i32* %tmp3, i32** %ptr
+  ret <2 x i32> %tmp2
+}
+
+define <2 x i64> @test_v2i64_post_imm_ld1lane(i64* %bar, i64** %ptr, <2 x i64> %A) {
+; CHECK-LABEL: test_v2i64_post_imm_ld1lane:
+; CHECK: ld1.d { v0 }[1], [x0], #8
+  %tmp1 = load i64* %bar
+  %tmp2 = insertelement <2 x i64> %A, i64 %tmp1, i32 1
+  %tmp3 = getelementptr i64* %bar, i64 1
+  store i64* %tmp3, i64** %ptr
+  ret <2 x i64> %tmp2
+}
+
+define <2 x i64> @test_v2i64_post_reg_ld1lane(i64* %bar, i64** %ptr, i64 %inc, <2 x i64> %A) {
+; CHECK-LABEL: test_v2i64_post_reg_ld1lane:
+; CHECK: ld1.d { v0 }[1], [x0], x{{[0-9]+}}
+  %tmp1 = load i64* %bar
+  %tmp2 = insertelement <2 x i64> %A, i64 %tmp1, i32 1
+  %tmp3 = getelementptr i64* %bar, i64 %inc
+  store i64* %tmp3, i64** %ptr
+  ret <2 x i64> %tmp2
+}
+
+define <4 x float> @test_v4f32_post_imm_ld1lane(float* %bar, float** %ptr, <4 x float> %A) {
+; CHECK-LABEL: test_v4f32_post_imm_ld1lane:
+; CHECK: ld1.s { v0 }[1], [x0], #4
+  %tmp1 = load float* %bar
+  %tmp2 = insertelement <4 x float> %A, float %tmp1, i32 1
+  %tmp3 = getelementptr float* %bar, i64 1
+  store float* %tmp3, float** %ptr
+  ret <4 x float> %tmp2
+}
+
+define <4 x float> @test_v4f32_post_reg_ld1lane(float* %bar, float** %ptr, i64 %inc, <4 x float> %A) {
+; CHECK-LABEL: test_v4f32_post_reg_ld1lane:
+; CHECK: ld1.s { v0 }[1], [x0], x{{[0-9]+}}
+  %tmp1 = load float* %bar
+  %tmp2 = insertelement <4 x float> %A, float %tmp1, i32 1
+  %tmp3 = getelementptr float* %bar, i64 %inc
+  store float* %tmp3, float** %ptr
+  ret <4 x float> %tmp2
+}
+
+define <2 x float> @test_v2f32_post_imm_ld1lane(float* %bar, float** %ptr, <2 x float> %A) {
+; CHECK-LABEL: test_v2f32_post_imm_ld1lane:
+; CHECK: ld1.s { v0 }[1], [x0], #4
+  %tmp1 = load float* %bar
+  %tmp2 = insertelement <2 x float> %A, float %tmp1, i32 1
+  %tmp3 = getelementptr float* %bar, i64 1
+  store float* %tmp3, float** %ptr
+  ret <2 x float> %tmp2
+}
+
+define <2 x float> @test_v2f32_post_reg_ld1lane(float* %bar, float** %ptr, i64 %inc, <2 x float> %A) {
+; CHECK-LABEL: test_v2f32_post_reg_ld1lane:
+; CHECK: ld1.s { v0 }[1], [x0], x{{[0-9]+}}
+  %tmp1 = load float* %bar
+  %tmp2 = insertelement <2 x float> %A, float %tmp1, i32 1
+  %tmp3 = getelementptr float* %bar, i64 %inc
+  store float* %tmp3, float** %ptr
+  ret <2 x float> %tmp2
+}
+
+define <2 x double> @test_v2f64_post_imm_ld1lane(double* %bar, double** %ptr, <2 x double> %A) {
+; CHECK-LABEL: test_v2f64_post_imm_ld1lane:
+; CHECK: ld1.d { v0 }[1], [x0], #8
+  %tmp1 = load double* %bar
+  %tmp2 = insertelement <2 x double> %A, double %tmp1, i32 1
+  %tmp3 = getelementptr double* %bar, i64 1
+  store double* %tmp3, double** %ptr
+  ret <2 x double> %tmp2
+}
+
+define <2 x double> @test_v2f64_post_reg_ld1lane(double* %bar, double** %ptr, i64 %inc, <2 x double> %A) {
+; CHECK-LABEL: test_v2f64_post_reg_ld1lane:
+; CHECK: ld1.d { v0 }[1], [x0], x{{[0-9]+}}
+  %tmp1 = load double* %bar
+  %tmp2 = insertelement <2 x double> %A, double %tmp1, i32 1
+  %tmp3 = getelementptr double* %bar, i64 %inc
+  store double* %tmp3, double** %ptr
+  ret <2 x double> %tmp2
+}
+\ No newline at end of file
diff --git a/test/CodeGen/ARM64/inline-asm-error-I.ll b/test/CodeGen/AArch64/arm64-inline-asm-error-I.ll
index a7aaf9e..a7aaf9e 100644
--- a/test/CodeGen/ARM64/inline-asm-error-I.ll
+++ b/test/CodeGen/AArch64/arm64-inline-asm-error-I.ll
diff --git a/test/CodeGen/ARM64/inline-asm-error-J.ll b/test/CodeGen/AArch64/arm64-inline-asm-error-J.ll
index 077e1b8..077e1b8 100644
--- a/test/CodeGen/ARM64/inline-asm-error-J.ll
+++ b/test/CodeGen/AArch64/arm64-inline-asm-error-J.ll
diff --git a/test/CodeGen/ARM64/inline-asm-error-K.ll b/test/CodeGen/AArch64/arm64-inline-asm-error-K.ll
index 2a7f961..2a7f961 100644
--- a/test/CodeGen/ARM64/inline-asm-error-K.ll
+++ b/test/CodeGen/AArch64/arm64-inline-asm-error-K.ll
diff --git a/test/CodeGen/ARM64/inline-asm-error-L.ll b/test/CodeGen/AArch64/arm64-inline-asm-error-L.ll
index 1701943..1701943 100644
--- a/test/CodeGen/ARM64/inline-asm-error-L.ll
+++ b/test/CodeGen/AArch64/arm64-inline-asm-error-L.ll
diff --git a/test/CodeGen/ARM64/inline-asm-error-M.ll b/test/CodeGen/AArch64/arm64-inline-asm-error-M.ll
index 952bf60..952bf60 100644
--- a/test/CodeGen/ARM64/inline-asm-error-M.ll
+++ b/test/CodeGen/AArch64/arm64-inline-asm-error-M.ll
diff --git a/test/CodeGen/ARM64/inline-asm-error-N.ll b/test/CodeGen/AArch64/arm64-inline-asm-error-N.ll
index b4a199f..b4a199f 100644
--- a/test/CodeGen/ARM64/inline-asm-error-N.ll
+++ b/test/CodeGen/AArch64/arm64-inline-asm-error-N.ll
diff --git a/test/CodeGen/ARM64/inline-asm-zero-reg-error.ll b/test/CodeGen/AArch64/arm64-inline-asm-zero-reg-error.ll
index 6bfce8f..6bfce8f 100644
--- a/test/CodeGen/ARM64/inline-asm-zero-reg-error.ll
+++ b/test/CodeGen/AArch64/arm64-inline-asm-zero-reg-error.ll
diff --git a/test/CodeGen/AArch64/arm64-inline-asm.ll b/test/CodeGen/AArch64/arm64-inline-asm.ll
new file mode 100644
index 0000000..d76cca3
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-inline-asm.ll
@@ -0,0 +1,230 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -no-integrated-as | FileCheck %s
+
+; rdar://9167275
+
+define i32 @t1() nounwind ssp {
+entry:
+; CHECK-LABEL: t1:
+; CHECK: mov {{w[0-9]+}}, 7
+  %0 = tail call i32 asm "mov ${0:w}, 7", "=r"() nounwind
+  ret i32 %0
+}
+
+define i64 @t2() nounwind ssp {
+entry:
+; CHECK-LABEL: t2:
+; CHECK: mov {{x[0-9]+}}, 7
+  %0 = tail call i64 asm "mov $0, 7", "=r"() nounwind
+  ret i64 %0
+}
+
+define i64 @t3() nounwind ssp {
+entry:
+; CHECK-LABEL: t3:
+; CHECK: mov {{w[0-9]+}}, 7
+  %0 = tail call i64 asm "mov ${0:w}, 7", "=r"() nounwind
+  ret i64 %0
+}
+
+; rdar://9281206
+
+define void @t4(i64 %op) nounwind {
+entry:
+; CHECK-LABEL: t4:
+; CHECK: mov x0, {{x[0-9]+}}; svc #0
+  %0 = tail call i64 asm sideeffect "mov x0, $1; svc #0;", "=r,r,r,~{x0}"(i64 %op, i64 undef) nounwind
+  ret void
+}
+
+; rdar://9394290
+
+define float @t5(float %x) nounwind {
+entry:
+; CHECK-LABEL: t5:
+; CHECK: fadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  %0 = tail call float asm "fadd ${0:s}, ${0:s}, ${0:s}", "=w,0"(float %x) nounwind
+  ret float %0
+}
+
+; rdar://9553599
+
+define zeroext i8 @t6(i8* %src) nounwind {
+entry:
+; CHECK-LABEL: t6:
+; CHECK: ldtrb {{w[0-9]+}}, [{{x[0-9]+}}]
+  %0 = tail call i8 asm "ldtrb ${0:w}, [$1]", "=r,r"(i8* %src) nounwind
+  ret i8 %0
+}
+
+define void @t7(i8* %f, i32 %g) nounwind {
+entry:
+  %f.addr = alloca i8*, align 8
+  store i8* %f, i8** %f.addr, align 8
+  ; CHECK-LABEL: t7:
+  ; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}]
+  call void asm "str ${1:w}, $0", "=*Q,r"(i8** %f.addr, i32 %g) nounwind
+  ret void
+}
+
+; rdar://10258229
+; ARM64TargetLowering::getRegForInlineAsmConstraint() should recognize 'v'
+; registers.
+define void @t8() nounwind ssp {
+entry:
+; CHECK-LABEL: t8:
+; CHECK: stp {{d[0-9]+}}, {{d[0-9]+}}, [sp, #-16]
+  tail call void asm sideeffect "nop", "~{v8}"() nounwind
+  ret void
+}
+
+define i32 @constraint_I(i32 %i, i32 %j) nounwind {
+entry:
+  ; CHECK-LABEL: constraint_I:
+  %0 = tail call i32 asm sideeffect "add ${0:w}, ${1:w}, $2", "=r,r,I"(i32 %i, i32 16773120) nounwind
+  ; CHECK: add   {{w[0-9]+}}, {{w[0-9]+}}, #16773120
+  %1 = tail call i32 asm sideeffect "add ${0:w}, ${1:w}, $2", "=r,r,I"(i32 %i, i32 4096) nounwind
+  ; CHECK: add   {{w[0-9]+}}, {{w[0-9]+}}, #4096
+  ret i32 %1
+}
+
+define i32 @constraint_J(i32 %i, i32 %j) nounwind {
+entry:
+  ; CHECK-LABEL: constraint_J:
+  %0 = tail call i32 asm sideeffect "sub ${0:w}, ${1:w}, $2", "=r,r,J"(i32 %i, i32 -16773120) nounwind
+  ; CHECK: sub   {{w[0-9]+}}, {{w[0-9]+}}, #4278194176
+  %1 = tail call i32 asm sideeffect "sub ${0:w}, ${1:w}, $2", "=r,r,J"(i32 %i, i32 -1) nounwind
+  ; CHECK: sub   {{w[0-9]+}}, {{w[0-9]+}}, #4294967295
+  ret i32 %1
+}
+
+define i32 @constraint_KL(i32 %i, i32 %j) nounwind {
+entry:
+  ; CHECK-LABEL: constraint_KL:
+  %0 = tail call i32 asm sideeffect "eor ${0:w}, ${1:w}, $2", "=r,r,K"(i32 %i, i32 255) nounwind
+  ; CHECK: eor {{w[0-9]+}}, {{w[0-9]+}}, #255
+  %1 = tail call i32 asm sideeffect "eor ${0:w}, ${1:w}, $2", "=r,r,L"(i32 %i, i64 16711680) nounwind
+  ; CHECK: eor {{w[0-9]+}}, {{w[0-9]+}}, #16711680
+  ret i32 %1
+}
+
+define i32 @constraint_MN(i32 %i, i32 %j) nounwind {
+entry:
+  ; CHECK-LABEL: constraint_MN:
+  %0 = tail call i32 asm sideeffect "movk ${0:w}, $1", "=r,M"(i32 65535) nounwind
+  ; CHECK: movk  {{w[0-9]+}}, #65535
+  %1 = tail call i32 asm sideeffect "movz ${0:w}, $1", "=r,N"(i64 0) nounwind
+  ; CHECK: movz  {{w[0-9]+}}, #0
+  ret i32 %1
+}
+
+define void @t9() nounwind {
+entry:
+  ; CHECK-LABEL: t9:
+  %data = alloca <2 x double>, align 16
+  %0 = load <2 x double>* %data, align 16
+  call void asm sideeffect "mov.2d v4, $0\0A", "w,~{v4}"(<2 x double> %0) nounwind
+  ; CHECK: mov.2d v4, {{v[0-9]+}}
+  ret void
+}
+
+define void @t10() nounwind {
+entry:
+  ; CHECK-LABEL: t10:
+  %data = alloca <2 x float>, align 8
+  %a = alloca [2 x float], align 4
+  %arraydecay = getelementptr inbounds [2 x float]* %a, i32 0, i32 0
+  %0 = load <2 x float>* %data, align 8
+  call void asm sideeffect "ldr ${1:q}, [$0]\0A", "r,w"(float* %arraydecay, <2 x float> %0) nounwind
+  ; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}]
+  call void asm sideeffect "ldr ${1:d}, [$0]\0A", "r,w"(float* %arraydecay, <2 x float> %0) nounwind
+  ; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}]
+  call void asm sideeffect "ldr ${1:s}, [$0]\0A", "r,w"(float* %arraydecay, <2 x float> %0) nounwind
+  ; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}]
+  call void asm sideeffect "ldr ${1:h}, [$0]\0A", "r,w"(float* %arraydecay, <2 x float> %0) nounwind
+  ; CHECK: ldr {{h[0-9]+}}, [{{x[0-9]+}}]
+  call void asm sideeffect "ldr ${1:b}, [$0]\0A", "r,w"(float* %arraydecay, <2 x float> %0) nounwind
+  ; CHECK: ldr {{b[0-9]+}}, [{{x[0-9]+}}]
+  ret void
+}
+
+define void @t11() nounwind {
+entry:
+  ; CHECK-LABEL: t11:
+  %a = alloca i32, align 4
+  %0 = load i32* %a, align 4
+  call void asm sideeffect "mov ${1:x}, ${0:x}\0A", "r,i"(i32 %0, i32 0) nounwind
+  ; CHECK: mov xzr, {{x[0-9]+}}
+  %1 = load i32* %a, align 4
+  call void asm sideeffect "mov ${1:w}, ${0:w}\0A", "r,i"(i32 %1, i32 0) nounwind
+  ; CHECK: mov wzr, {{w[0-9]+}}
+  ret void
+}
+
+define void @t12() nounwind {
+entry:
+  ; CHECK-LABEL: t12:
+  %data = alloca <4 x float>, align 16
+  %0 = load <4 x float>* %data, align 16
+  call void asm sideeffect "mov.2d v4, $0\0A", "x,~{v4}"(<4 x float> %0) nounwind
+  ; CHECK mov.2d v4, {{v([0-9])|(1[0-5])}}
+  ret void
+}
+
+define void @t13() nounwind {
+entry:
+  ; CHECK-LABEL: t13:
+  tail call void asm sideeffect "mov x4, $0\0A", "N"(i64 1311673391471656960) nounwind
+  ; CHECK: mov x4, #1311673391471656960
+  tail call void asm sideeffect "mov x4, $0\0A", "N"(i64 -4662) nounwind
+  ; CHECK: mov x4, #-4662
+  tail call void asm sideeffect "mov x4, $0\0A", "N"(i64 4660) nounwind
+  ; CHECK: mov x4, #4660
+  call void asm sideeffect "mov x4, $0\0A", "N"(i64 -71777214294589696) nounwind
+  ; CHECK: mov x4, #-71777214294589696
+  ret void
+}
+
+define void @t14() nounwind {
+entry:
+  ; CHECK-LABEL: t14:
+  tail call void asm sideeffect "mov w4, $0\0A", "M"(i32 305397760) nounwind
+  ; CHECK: mov w4, #305397760
+  tail call void asm sideeffect "mov w4, $0\0A", "M"(i32 -4662) nounwind
+  ; CHECK: mov w4, #4294962634
+  tail call void asm sideeffect "mov w4, $0\0A", "M"(i32 4660) nounwind
+  ; CHECK: mov w4, #4660
+  call void asm sideeffect "mov w4, $0\0A", "M"(i32 -16711936) nounwind
+  ; CHECK: mov w4, #4278255360
+  ret void
+}
+
+define void @t15() nounwind {
+entry:
+  %0 = tail call double asm sideeffect "fmov $0, d8", "=r"() nounwind
+  ; CHECK: fmov {{x[0-9]+}}, d8
+  ret void
+}
+
+; rdar://problem/14285178
+
+define void @test_zero_reg(i32* %addr) {
+; CHECK-LABEL: test_zero_reg:
+
+  tail call void asm sideeffect "USE($0)", "z"(i32 0) nounwind
+; CHECK: USE(xzr)
+
+  tail call void asm sideeffect "USE(${0:w})", "zr"(i32 0)
+; CHECK: USE(wzr)
+
+  tail call void asm sideeffect "USE(${0:w})", "zr"(i32 1)
+; CHECK: orr [[VAL1:w[0-9]+]], wzr, #0x1
+; CHECK: USE([[VAL1]])
+
+  tail call void asm sideeffect "USE($0), USE($1)", "z,z"(i32 0, i32 0) nounwind
+; CHECK: USE(xzr), USE(xzr)
+
+  tail call void asm sideeffect "USE($0), USE(${1:w})", "z,z"(i32 0, i32 0) nounwind
+; CHECK: USE(xzr), USE(wzr)
+
+  ret void
+}
diff --git a/test/CodeGen/ARM64/join-reserved.ll b/test/CodeGen/AArch64/arm64-join-reserved.ll
index e99168b..e99168b 100644
--- a/test/CodeGen/ARM64/join-reserved.ll
+++ b/test/CodeGen/AArch64/arm64-join-reserved.ll
diff --git a/test/CodeGen/ARM64/jumptable.ll b/test/CodeGen/AArch64/arm64-jumptable.ll
index 4635cfe..4635cfe 100644
--- a/test/CodeGen/ARM64/jumptable.ll
+++ b/test/CodeGen/AArch64/arm64-jumptable.ll
diff --git a/test/CodeGen/AArch64/arm64-large-frame.ll b/test/CodeGen/AArch64/arm64-large-frame.ll
new file mode 100644
index 0000000..5a53da6
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-large-frame.ll
@@ -0,0 +1,69 @@
+; RUN: llc -verify-machineinstrs -mtriple=arm64-none-linux-gnu -disable-fp-elim < %s | FileCheck %s
+declare void @use_addr(i8*)
+
+@addr = global i8* null
+
+define void @test_bigframe() {
+; CHECK-LABEL: test_bigframe:
+; CHECK: .cfi_startproc
+
+  %var1 = alloca i8, i32 20000000
+  %var2 = alloca i8, i32 16
+  %var3 = alloca i8, i32 20000000
+
+; CHECK: sub sp, sp, #4095, lsl #12
+; CHECK: sub sp, sp, #4095, lsl #12
+; CHECK: sub sp, sp, #1575, lsl #12
+; CHECK: sub sp, sp, #2576
+; CHECK: .cfi_def_cfa_offset 40000032
+
+
+; CHECK: add [[TMP:x[0-9]+]], sp, #4095, lsl #12
+; CHECK: add [[TMP1:x[0-9]+]], [[TMP]], #787, lsl #12
+; CHECK: add {{x[0-9]+}}, [[TMP1]], #3344
+  store volatile i8* %var1, i8** @addr
+
+  %var1plus2 = getelementptr i8* %var1, i32 2
+  store volatile i8* %var1plus2, i8** @addr
+
+; CHECK: add [[TMP:x[0-9]+]], sp, #4095, lsl #12
+; CHECK: add [[TMP1:x[0-9]+]], [[TMP]], #787, lsl #12
+; CHECK: add {{x[0-9]+}}, [[TMP1]], #3328
+  store volatile i8* %var2, i8** @addr
+
+  %var2plus2 = getelementptr i8* %var2, i32 2
+  store volatile i8* %var2plus2, i8** @addr
+
+  store volatile i8* %var3, i8** @addr
+
+  %var3plus2 = getelementptr i8* %var3, i32 2
+  store volatile i8* %var3plus2, i8** @addr
+
+; CHECK: add sp, sp, #4095, lsl #12
+; CHECK: add sp, sp, #4095, lsl #12
+; CHECK: add sp, sp, #1575, lsl #12
+; CHECK: add sp, sp, #2576
+; CHECK: .cfi_endproc
+  ret void
+}
+
+define void @test_mediumframe() {
+; CHECK-LABEL: test_mediumframe:
+  %var1 = alloca i8, i32 1000000
+  %var2 = alloca i8, i32 16
+  %var3 = alloca i8, i32 1000000
+; CHECK: sub sp, sp, #488, lsl #12
+; CHECK-NEXT: sub sp, sp, #1168
+
+  store volatile i8* %var1, i8** @addr
+; CHECK: add     [[VAR1ADDR:x[0-9]+]], sp, #244, lsl #12
+; CHECK: add     [[VAR1ADDR]], [[VAR1ADDR]], #592
+
+; CHECK: add [[VAR2ADDR:x[0-9]+]], sp, #244, lsl #12
+; CHECK: add [[VAR2ADDR]], [[VAR2ADDR]], #576
+
+  store volatile i8* %var2, i8** @addr
+; CHECK: add     sp, sp, #488, lsl #12
+; CHECK: add     sp, sp, #1168
+  ret void
+}
diff --git a/test/CodeGen/AArch64/arm64-ld1.ll b/test/CodeGen/AArch64/arm64-ld1.ll
new file mode 100644
index 0000000..72d808c
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-ld1.ll
@@ -0,0 +1,1345 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s
+
+%struct.__neon_int8x8x2_t = type { <8 x i8>,  <8 x i8> }
+%struct.__neon_int8x8x3_t = type { <8 x i8>,  <8 x i8>,  <8 x i8> }
+%struct.__neon_int8x8x4_t = type { <8 x i8>,  <8 x i8>, <8 x i8>,  <8 x i8> }
+
+define %struct.__neon_int8x8x2_t @ld2_8b(i8* %A) nounwind {
+; CHECK-LABEL: ld2_8b
+; Make sure we are loading into the results defined by the ABI (i.e., v0, v1)
+; and from the argument of the function also defined by ABI (i.e., x0)
+; CHECK ld2.8b { v0, v1 }, [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2.v8i8.p0i8(i8* %A)
+	ret %struct.__neon_int8x8x2_t  %tmp2
+}
+
+define %struct.__neon_int8x8x3_t @ld3_8b(i8* %A) nounwind {
+; CHECK-LABEL: ld3_8b
+; Make sure we are using the operands defined by the ABI
+; CHECK ld3.8b { v0, v1, v2 }, [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3.v8i8.p0i8(i8* %A)
+	ret %struct.__neon_int8x8x3_t  %tmp2
+}
+
+define %struct.__neon_int8x8x4_t @ld4_8b(i8* %A) nounwind {
+; CHECK-LABEL: ld4_8b
+; Make sure we are using the operands defined by the ABI
+; CHECK ld4.8b { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4.v8i8.p0i8(i8* %A)
+	ret %struct.__neon_int8x8x4_t  %tmp2
+}
+
+declare %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2.v8i8.p0i8(i8*) nounwind readonly
+declare %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3.v8i8.p0i8(i8*) nounwind readonly
+declare %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4.v8i8.p0i8(i8*) nounwind readonly
+
+%struct.__neon_int8x16x2_t = type { <16 x i8>,  <16 x i8> }
+%struct.__neon_int8x16x3_t = type { <16 x i8>,  <16 x i8>,  <16 x i8> }
+%struct.__neon_int8x16x4_t = type { <16 x i8>,  <16 x i8>, <16 x i8>,  <16 x i8> }
+
+define %struct.__neon_int8x16x2_t @ld2_16b(i8* %A) nounwind {
+; CHECK-LABEL: ld2_16b
+; Make sure we are using the operands defined by the ABI
+; CHECK ld2.16b { v0, v1 }, [x0]
+; CHECK-NEXT ret
+  %tmp2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2.v16i8.p0i8(i8* %A)
+  ret %struct.__neon_int8x16x2_t  %tmp2
+}
+
+define %struct.__neon_int8x16x3_t @ld3_16b(i8* %A) nounwind {
+; CHECK-LABEL: ld3_16b
+; Make sure we are using the operands defined by the ABI
+; CHECK ld3.16b { v0, v1, v2 }, [x0]
+; CHECK-NEXT ret
+  %tmp2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3.v16i8.p0i8(i8* %A)
+  ret %struct.__neon_int8x16x3_t  %tmp2
+}
+
+define %struct.__neon_int8x16x4_t @ld4_16b(i8* %A) nounwind {
+; CHECK-LABEL: ld4_16b
+; Make sure we are using the operands defined by the ABI
+; CHECK ld4.16b { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT ret
+  %tmp2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4.v16i8.p0i8(i8* %A)
+  ret %struct.__neon_int8x16x4_t  %tmp2
+}
+
+declare %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2.v16i8.p0i8(i8*) nounwind readonly
+declare %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3.v16i8.p0i8(i8*) nounwind readonly
+declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4.v16i8.p0i8(i8*) nounwind readonly
+
+%struct.__neon_int16x4x2_t = type { <4 x i16>,  <4 x i16> }
+%struct.__neon_int16x4x3_t = type { <4 x i16>,  <4 x i16>,  <4 x i16> }
+%struct.__neon_int16x4x4_t = type { <4 x i16>,  <4 x i16>, <4 x i16>,  <4 x i16> }
+
+define %struct.__neon_int16x4x2_t @ld2_4h(i16* %A) nounwind {
+; CHECK-LABEL: ld2_4h
+; Make sure we are using the operands defined by the ABI
+; CHECK ld2.4h { v0, v1 }, [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2.v4i16.p0i16(i16* %A)
+	ret %struct.__neon_int16x4x2_t  %tmp2
+}
+
+define %struct.__neon_int16x4x3_t @ld3_4h(i16* %A) nounwind {
+; CHECK-LABEL: ld3_4h
+; Make sure we are using the operands defined by the ABI
+; CHECK ld3.4h { v0, v1, v2 }, [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3.v4i16.p0i16(i16* %A)
+	ret %struct.__neon_int16x4x3_t  %tmp2
+}
+
+define %struct.__neon_int16x4x4_t @ld4_4h(i16* %A) nounwind {
+; CHECK-LABEL: ld4_4h
+; Make sure we are using the operands defined by the ABI
+; CHECK ld4.4h { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4.v4i16.p0i16(i16* %A)
+	ret %struct.__neon_int16x4x4_t  %tmp2
+}
+
+declare %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2.v4i16.p0i16(i16*) nounwind readonly
+declare %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3.v4i16.p0i16(i16*) nounwind readonly
+declare %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4.v4i16.p0i16(i16*) nounwind readonly
+
+%struct.__neon_int16x8x2_t = type { <8 x i16>,  <8 x i16> }
+%struct.__neon_int16x8x3_t = type { <8 x i16>,  <8 x i16>,  <8 x i16> }
+%struct.__neon_int16x8x4_t = type { <8 x i16>,  <8 x i16>, <8 x i16>,  <8 x i16> }
+
+define %struct.__neon_int16x8x2_t @ld2_8h(i16* %A) nounwind {
+; CHECK-LABEL: ld2_8h
+; Make sure we are using the operands defined by the ABI
+; CHECK ld2.8h { v0, v1 }, [x0]
+; CHECK-NEXT ret
+  %tmp2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2.v8i16.p0i16(i16* %A)
+  ret %struct.__neon_int16x8x2_t  %tmp2
+}
+
+define %struct.__neon_int16x8x3_t @ld3_8h(i16* %A) nounwind {
+; CHECK-LABEL: ld3_8h
+; Make sure we are using the operands defined by the ABI
+; CHECK ld3.8h { v0, v1, v2 }, [x0]
+; CHECK-NEXT ret
+  %tmp2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3.v8i16.p0i16(i16* %A)
+  ret %struct.__neon_int16x8x3_t %tmp2
+}
+
+define %struct.__neon_int16x8x4_t @ld4_8h(i16* %A) nounwind {
+; CHECK-LABEL: ld4_8h
+; Make sure we are using the operands defined by the ABI
+; CHECK ld4.8h { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT ret
+  %tmp2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4.v8i16.p0i16(i16* %A)
+  ret %struct.__neon_int16x8x4_t  %tmp2
+}
+
+declare %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2.v8i16.p0i16(i16*) nounwind readonly
+declare %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3.v8i16.p0i16(i16*) nounwind readonly
+declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4.v8i16.p0i16(i16*) nounwind readonly
+
+%struct.__neon_int32x2x2_t = type { <2 x i32>,  <2 x i32> }
+%struct.__neon_int32x2x3_t = type { <2 x i32>,  <2 x i32>,  <2 x i32> }
+%struct.__neon_int32x2x4_t = type { <2 x i32>,  <2 x i32>, <2 x i32>,  <2 x i32> }
+
+define %struct.__neon_int32x2x2_t @ld2_2s(i32* %A) nounwind {
+; CHECK-LABEL: ld2_2s
+; Make sure we are using the operands defined by the ABI
+; CHECK ld2.2s { v0, v1 }, [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2.v2i32.p0i32(i32* %A)
+	ret %struct.__neon_int32x2x2_t  %tmp2
+}
+
+define %struct.__neon_int32x2x3_t @ld3_2s(i32* %A) nounwind {
+; CHECK-LABEL: ld3_2s
+; Make sure we are using the operands defined by the ABI
+; CHECK ld3.2s { v0, v1, v2 }, [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3.v2i32.p0i32(i32* %A)
+	ret %struct.__neon_int32x2x3_t  %tmp2
+}
+
+define %struct.__neon_int32x2x4_t @ld4_2s(i32* %A) nounwind {
+; CHECK-LABEL: ld4_2s
+; Make sure we are using the operands defined by the ABI
+; CHECK ld4.2s { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4.v2i32.p0i32(i32* %A)
+	ret %struct.__neon_int32x2x4_t  %tmp2
+}
+
+declare %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2.v2i32.p0i32(i32*) nounwind readonly
+declare %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3.v2i32.p0i32(i32*) nounwind readonly
+declare %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4.v2i32.p0i32(i32*) nounwind readonly
+
+%struct.__neon_int32x4x2_t = type { <4 x i32>,  <4 x i32> }
+%struct.__neon_int32x4x3_t = type { <4 x i32>,  <4 x i32>,  <4 x i32> }
+%struct.__neon_int32x4x4_t = type { <4 x i32>,  <4 x i32>, <4 x i32>,  <4 x i32> }
+
+define %struct.__neon_int32x4x2_t @ld2_4s(i32* %A) nounwind {
+; CHECK-LABEL: ld2_4s
+; Make sure we are using the operands defined by the ABI
+; CHECK ld2.4s { v0, v1 }, [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2.v4i32.p0i32(i32* %A)
+	ret %struct.__neon_int32x4x2_t  %tmp2
+}
+
+define %struct.__neon_int32x4x3_t @ld3_4s(i32* %A) nounwind {
+; CHECK-LABEL: ld3_4s
+; Make sure we are using the operands defined by the ABI
+; CHECK ld3.4s { v0, v1, v2 }, [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3.v4i32.p0i32(i32* %A)
+	ret %struct.__neon_int32x4x3_t  %tmp2
+}
+
+define %struct.__neon_int32x4x4_t @ld4_4s(i32* %A) nounwind {
+; CHECK-LABEL: ld4_4s
+; Make sure we are using the operands defined by the ABI
+; CHECK ld4.4s { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4.v4i32.p0i32(i32* %A)
+	ret %struct.__neon_int32x4x4_t  %tmp2
+}
+
+declare %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2.v4i32.p0i32(i32*) nounwind readonly
+declare %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3.v4i32.p0i32(i32*) nounwind readonly
+declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4.v4i32.p0i32(i32*) nounwind readonly
+
+%struct.__neon_int64x2x2_t = type { <2 x i64>,  <2 x i64> }
+%struct.__neon_int64x2x3_t = type { <2 x i64>,  <2 x i64>,  <2 x i64> }
+%struct.__neon_int64x2x4_t = type { <2 x i64>,  <2 x i64>, <2 x i64>,  <2 x i64> }
+
+define %struct.__neon_int64x2x2_t @ld2_2d(i64* %A) nounwind {
+; CHECK-LABEL: ld2_2d
+; Make sure we are using the operands defined by the ABI
+; CHECK ld2.2d { v0, v1 }, [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2.v2i64.p0i64(i64* %A)
+	ret %struct.__neon_int64x2x2_t  %tmp2
+}
+
+define %struct.__neon_int64x2x3_t @ld3_2d(i64* %A) nounwind {
+; CHECK-LABEL: ld3_2d
+; Make sure we are using the operands defined by the ABI
+; CHECK ld3.2d { v0, v1, v2 }, [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3.v2i64.p0i64(i64* %A)
+	ret %struct.__neon_int64x2x3_t  %tmp2
+}
+
+define %struct.__neon_int64x2x4_t @ld4_2d(i64* %A) nounwind {
+; CHECK-LABEL: ld4_2d
+; Make sure we are using the operands defined by the ABI
+; CHECK ld4.2d { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4.v2i64.p0i64(i64* %A)
+	ret %struct.__neon_int64x2x4_t  %tmp2
+}
+
+declare %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2.v2i64.p0i64(i64*) nounwind readonly
+declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3.v2i64.p0i64(i64*) nounwind readonly
+declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4.v2i64.p0i64(i64*) nounwind readonly
+
+%struct.__neon_int64x1x2_t = type { <1 x i64>,  <1 x i64> }
+%struct.__neon_int64x1x3_t = type { <1 x i64>,  <1 x i64>, <1 x i64> }
+%struct.__neon_int64x1x4_t = type { <1 x i64>,  <1 x i64>, <1 x i64>, <1 x i64> }
+
+
+define %struct.__neon_int64x1x2_t @ld2_1di64(i64* %A) nounwind {
+; CHECK-LABEL: ld2_1di64
+; Make sure we are using the operands defined by the ABI
+; CHECK ld1.1d { v0, v1 }, [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2.v1i64.p0i64(i64* %A)
+	ret %struct.__neon_int64x1x2_t  %tmp2
+}
+
+define %struct.__neon_int64x1x3_t @ld3_1di64(i64* %A) nounwind {
+; CHECK-LABEL: ld3_1di64
+; Make sure we are using the operands defined by the ABI
+; CHECK ld1.1d { v0, v1, v2 }, [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3.v1i64.p0i64(i64* %A)
+	ret %struct.__neon_int64x1x3_t  %tmp2
+}
+
+define %struct.__neon_int64x1x4_t @ld4_1di64(i64* %A) nounwind {
+; CHECK-LABEL: ld4_1di64
+; Make sure we are using the operands defined by the ABI
+; CHECK ld1.1d { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4.v1i64.p0i64(i64* %A)
+	ret %struct.__neon_int64x1x4_t  %tmp2
+}
+
+
+declare %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2.v1i64.p0i64(i64*) nounwind readonly
+declare %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3.v1i64.p0i64(i64*) nounwind readonly
+declare %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4.v1i64.p0i64(i64*) nounwind readonly
+
+%struct.__neon_float64x1x2_t = type { <1 x double>,  <1 x double> }
+%struct.__neon_float64x1x3_t = type { <1 x double>,  <1 x double>, <1 x double> }
+%struct.__neon_float64x1x4_t = type { <1 x double>,  <1 x double>, <1 x double>, <1 x double> }
+
+
+define %struct.__neon_float64x1x2_t @ld2_1df64(double* %A) nounwind {
+; CHECK-LABEL: ld2_1df64
+; Make sure we are using the operands defined by the ABI
+; CHECK ld1.1d { v0, v1 }, [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld2.v1f64.p0f64(double* %A)
+	ret %struct.__neon_float64x1x2_t  %tmp2
+}
+
+define %struct.__neon_float64x1x3_t @ld3_1df64(double* %A) nounwind {
+; CHECK-LABEL: ld3_1df64
+; Make sure we are using the operands defined by the ABI
+; CHECK ld1.1d { v0, v1, v2 }, [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld3.v1f64.p0f64(double* %A)
+	ret %struct.__neon_float64x1x3_t  %tmp2
+}
+
+define %struct.__neon_float64x1x4_t @ld4_1df64(double* %A) nounwind {
+; CHECK-LABEL: ld4_1df64
+; Make sure we are using the operands defined by the ABI
+; CHECK ld1.1d { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld4.v1f64.p0f64(double* %A)
+	ret %struct.__neon_float64x1x4_t  %tmp2
+}
+
+declare %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld2.v1f64.p0f64(double*) nounwind readonly
+declare %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld3.v1f64.p0f64(double*) nounwind readonly
+declare %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld4.v1f64.p0f64(double*) nounwind readonly
+
+
+define %struct.__neon_int8x16x2_t @ld2lane_16b(<16 x i8> %L1, <16 x i8> %L2, i8* %A) nounwind {
+; Make sure we are using the operands defined by the ABI
+; CHECK: ld2lane_16b
+; CHECK ld2.b { v0, v1 }[1], [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8> %L1, <16 x i8> %L2, i64 1, i8* %A)
+	ret %struct.__neon_int8x16x2_t  %tmp2
+}
+
+define %struct.__neon_int8x16x3_t @ld3lane_16b(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, i8* %A) nounwind {
+; Make sure we are using the operands defined by the ABI
+; CHECK: ld3lane_16b
+; CHECK ld3.b { v0, v1, v2 }[1], [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, i64 1, i8* %A)
+	ret %struct.__neon_int8x16x3_t  %tmp2
+}
+
+define %struct.__neon_int8x16x4_t @ld4lane_16b(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, <16 x i8> %L4, i8* %A) nounwind {
+; Make sure we are using the operands defined by the ABI
+; CHECK: ld4lane_16b
+; CHECK ld4.b { v0, v1, v2, v3 }[1], [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, <16 x i8> %L4, i64 1, i8* %A)
+	ret %struct.__neon_int8x16x4_t  %tmp2
+}
+
+declare %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8*) nounwind readonly
+declare %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readonly
+declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readonly
+
+define %struct.__neon_int16x8x2_t @ld2lane_8h(<8 x i16> %L1, <8 x i16> %L2, i16* %A) nounwind {
+; Make sure we are using the operands defined by the ABI
+; CHECK: ld2lane_8h
+; CHECK ld2.h { v0, v1 }[1], [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2lane.v8i16.p0i16(<8 x i16> %L1, <8 x i16> %L2, i64 1, i16* %A)
+	ret %struct.__neon_int16x8x2_t  %tmp2
+}
+
+define %struct.__neon_int16x8x3_t @ld3lane_8h(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, i16* %A) nounwind {
+; Make sure we are using the operands defined by the ABI
+; CHECK: ld3lane_8h
+; CHECK ld3.h { v0, v1, v3 }[1], [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3lane.v8i16.p0i16(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, i64 1, i16* %A)
+	ret %struct.__neon_int16x8x3_t  %tmp2
+}
+
+define %struct.__neon_int16x8x4_t @ld4lane_8h(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, <8 x i16> %L4, i16* %A) nounwind {
+; Make sure we are using the operands defined by the ABI
+; CHECK: ld4lane_8h
+; CHECK ld4.h { v0, v1, v2, v3 }[1], [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4lane.v8i16.p0i16(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, <8 x i16> %L4, i64 1, i16* %A)
+	ret %struct.__neon_int16x8x4_t  %tmp2
+}
+
+declare %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i16*) nounwind readonly
+declare %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readonly
+declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readonly
+
+define %struct.__neon_int32x4x2_t @ld2lane_4s(<4 x i32> %L1, <4 x i32> %L2, i32* %A) nounwind {
+; Make sure we are using the operands defined by the ABI
+; CHECK: ld2lane_4s
+; CHECK ld2.s { v0, v1 }[1], [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32> %L1, <4 x i32> %L2, i64 1, i32* %A)
+	ret %struct.__neon_int32x4x2_t  %tmp2
+}
+
+define %struct.__neon_int32x4x3_t @ld3lane_4s(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, i32* %A) nounwind {
+; Make sure we are using the operands defined by the ABI
+; CHECK: ld3lane_4s
+; CHECK ld3.s { v0, v1, v2 }[1], [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, i64 1, i32* %A)
+	ret %struct.__neon_int32x4x3_t  %tmp2
+}
+
+define %struct.__neon_int32x4x4_t @ld4lane_4s(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, <4 x i32> %L4, i32* %A) nounwind {
+; Make sure we are using the operands defined by the ABI
+; CHECK: ld4lane_4s
+; CHECK ld4.s { v0, v1, v2, v3 }[1], [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, <4 x i32> %L4, i64 1, i32* %A)
+	ret %struct.__neon_int32x4x4_t  %tmp2
+}
+
+declare %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*) nounwind readonly
+declare %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readonly
+declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readonly
+
+define %struct.__neon_int64x2x2_t @ld2lane_2d(<2 x i64> %L1, <2 x i64> %L2, i64* %A) nounwind {
+; Make sure we are using the operands defined by the ABI
+; CHECK: ld2lane_2d
+; CHECK ld2.d { v0, v1 }[1], [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2lane.v2i64.p0i64(<2 x i64> %L1, <2 x i64> %L2, i64 1, i64* %A)
+	ret %struct.__neon_int64x2x2_t  %tmp2
+}
+
+define %struct.__neon_int64x2x3_t @ld3lane_2d(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, i64* %A) nounwind {
+; Make sure we are using the operands defined by the ABI
+; CHECK: ld3lane_2d
+; CHECK ld3.d { v0, v1, v3 }[1], [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3lane.v2i64.p0i64(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, i64 1, i64* %A)
+	ret %struct.__neon_int64x2x3_t  %tmp2
+}
+
+define %struct.__neon_int64x2x4_t @ld4lane_2d(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, <2 x i64> %L4, i64* %A) nounwind {
+; Make sure we are using the operands defined by the ABI
+; CHECK: ld4lane_2d
+; CHECK ld4.d { v0, v1, v2, v3 }[1], [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4lane.v2i64.p0i64(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, <2 x i64> %L4, i64 1, i64* %A)
+	ret %struct.__neon_int64x2x4_t  %tmp2
+}
+
+declare %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*) nounwind readonly
+declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readonly
+declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readonly
+
+define <8 x i8> @ld1r_8b(i8* %bar) {
+; CHECK: ld1r_8b
+; Make sure we are using the operands defined by the ABI
+; CHECK: ld1r.8b { v0 }, [x0]
+; CHECK-NEXT ret
+  %tmp1 = load i8* %bar
+  %tmp2 = insertelement <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
+  %tmp3 = insertelement <8 x i8> %tmp2, i8 %tmp1, i32 1
+  %tmp4 = insertelement <8 x i8> %tmp3, i8 %tmp1, i32 2
+  %tmp5 = insertelement <8 x i8> %tmp4, i8 %tmp1, i32 3
+  %tmp6 = insertelement <8 x i8> %tmp5, i8 %tmp1, i32 4
+  %tmp7 = insertelement <8 x i8> %tmp6, i8 %tmp1, i32 5
+  %tmp8 = insertelement <8 x i8> %tmp7, i8 %tmp1, i32 6
+  %tmp9 = insertelement <8 x i8> %tmp8, i8 %tmp1, i32 7
+  ret <8 x i8> %tmp9
+}
+
+define <16 x i8> @ld1r_16b(i8* %bar) {
+; CHECK: ld1r_16b
+; Make sure we are using the operands defined by the ABI
+; CHECK: ld1r.16b { v0 }, [x0]
+; CHECK-NEXT ret
+  %tmp1 = load i8* %bar
+  %tmp2 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
+  %tmp3 = insertelement <16 x i8> %tmp2, i8 %tmp1, i32 1
+  %tmp4 = insertelement <16 x i8> %tmp3, i8 %tmp1, i32 2
+  %tmp5 = insertelement <16 x i8> %tmp4, i8 %tmp1, i32 3
+  %tmp6 = insertelement <16 x i8> %tmp5, i8 %tmp1, i32 4
+  %tmp7 = insertelement <16 x i8> %tmp6, i8 %tmp1, i32 5
+  %tmp8 = insertelement <16 x i8> %tmp7, i8 %tmp1, i32 6
+  %tmp9 = insertelement <16 x i8> %tmp8, i8 %tmp1, i32 7
+  %tmp10 = insertelement <16 x i8> %tmp9, i8 %tmp1, i32 8
+  %tmp11 = insertelement <16 x i8> %tmp10, i8 %tmp1, i32 9
+  %tmp12 = insertelement <16 x i8> %tmp11, i8 %tmp1, i32 10
+  %tmp13 = insertelement <16 x i8> %tmp12, i8 %tmp1, i32 11
+  %tmp14 = insertelement <16 x i8> %tmp13, i8 %tmp1, i32 12
+  %tmp15 = insertelement <16 x i8> %tmp14, i8 %tmp1, i32 13
+  %tmp16 = insertelement <16 x i8> %tmp15, i8 %tmp1, i32 14
+  %tmp17 = insertelement <16 x i8> %tmp16, i8 %tmp1, i32 15
+  ret <16 x i8> %tmp17
+}
+
+define <4 x i16> @ld1r_4h(i16* %bar) {
+; CHECK: ld1r_4h
+; Make sure we are using the operands defined by the ABI
+; CHECK: ld1r.4h { v0 }, [x0]
+; CHECK-NEXT ret
+  %tmp1 = load i16* %bar
+  %tmp2 = insertelement <4 x i16> <i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
+  %tmp3 = insertelement <4 x i16> %tmp2, i16 %tmp1, i32 1
+  %tmp4 = insertelement <4 x i16> %tmp3, i16 %tmp1, i32 2
+  %tmp5 = insertelement <4 x i16> %tmp4, i16 %tmp1, i32 3
+  ret <4 x i16> %tmp5
+}
+
+define <8 x i16> @ld1r_8h(i16* %bar) {
+; CHECK: ld1r_8h
+; Make sure we are using the operands defined by the ABI
+; CHECK: ld1r.8h { v0 }, [x0]
+; CHECK-NEXT ret
+  %tmp1 = load i16* %bar
+  %tmp2 = insertelement <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
+  %tmp3 = insertelement <8 x i16> %tmp2, i16 %tmp1, i32 1
+  %tmp4 = insertelement <8 x i16> %tmp3, i16 %tmp1, i32 2
+  %tmp5 = insertelement <8 x i16> %tmp4, i16 %tmp1, i32 3
+  %tmp6 = insertelement <8 x i16> %tmp5, i16 %tmp1, i32 4
+  %tmp7 = insertelement <8 x i16> %tmp6, i16 %tmp1, i32 5
+  %tmp8 = insertelement <8 x i16> %tmp7, i16 %tmp1, i32 6
+  %tmp9 = insertelement <8 x i16> %tmp8, i16 %tmp1, i32 7
+  ret <8 x i16> %tmp9
+}
+
+define <2 x i32> @ld1r_2s(i32* %bar) {
+; CHECK: ld1r_2s
+; Make sure we are using the operands defined by the ABI
+; CHECK: ld1r.2s { v0 }, [x0]
+; CHECK-NEXT ret
+  %tmp1 = load i32* %bar
+  %tmp2 = insertelement <2 x i32> <i32 undef, i32 undef>, i32 %tmp1, i32 0
+  %tmp3 = insertelement <2 x i32> %tmp2, i32 %tmp1, i32 1
+  ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @ld1r_4s(i32* %bar) {
+; CHECK: ld1r_4s
+; Make sure we are using the operands defined by the ABI
+; CHECK: ld1r.4s { v0 }, [x0]
+; CHECK-NEXT ret
+  %tmp1 = load i32* %bar
+  %tmp2 = insertelement <4 x i32> <i32 undef, i32 undef, i32 undef, i32 undef>, i32 %tmp1, i32 0
+  %tmp3 = insertelement <4 x i32> %tmp2, i32 %tmp1, i32 1
+  %tmp4 = insertelement <4 x i32> %tmp3, i32 %tmp1, i32 2
+  %tmp5 = insertelement <4 x i32> %tmp4, i32 %tmp1, i32 3
+  ret <4 x i32> %tmp5
+}
+
+define <2 x i64> @ld1r_2d(i64* %bar) {
+; CHECK: ld1r_2d
+; Make sure we are using the operands defined by the ABI
+; CHECK: ld1r.2d { v0 }, [x0]
+; CHECK-NEXT ret
+  %tmp1 = load i64* %bar
+  %tmp2 = insertelement <2 x i64> <i64 undef, i64 undef>, i64 %tmp1, i32 0
+  %tmp3 = insertelement <2 x i64> %tmp2, i64 %tmp1, i32 1
+  ret <2 x i64> %tmp3
+}
+
+define %struct.__neon_int8x8x2_t @ld2r_8b(i8* %A) nounwind {
+; CHECK: ld2r_8b
+; Make sure we are using the operands defined by the ABI
+; CHECK ld2r.8b { v0, v1 }, [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8* %A)
+	ret %struct.__neon_int8x8x2_t  %tmp2
+}
+
+define %struct.__neon_int8x8x3_t @ld3r_8b(i8* %A) nounwind {
+; CHECK: ld3r_8b
+; Make sure we are using the operands defined by the ABI
+; CHECK ld3r.8b { v0, v1, v2 }, [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8* %A)
+	ret %struct.__neon_int8x8x3_t  %tmp2
+}
+
+define %struct.__neon_int8x8x4_t @ld4r_8b(i8* %A) nounwind {
+; CHECK: ld4r_8b
+; Make sure we are using the operands defined by the ABI
+; CHECK ld4r.8b { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8* %A)
+	ret %struct.__neon_int8x8x4_t  %tmp2
+}
+
+declare %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8*) nounwind readonly
+declare %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8*) nounwind readonly
+declare %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8*) nounwind readonly
+
+define %struct.__neon_int8x16x2_t @ld2r_16b(i8* %A) nounwind {
+; CHECK: ld2r_16b
+; Make sure we are using the operands defined by the ABI
+; CHECK ld2r.16b { v0, v1 }, [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8* %A)
+	ret %struct.__neon_int8x16x2_t  %tmp2
+}
+
+define %struct.__neon_int8x16x3_t @ld3r_16b(i8* %A) nounwind {
+; CHECK: ld3r_16b
+; Make sure we are using the operands defined by the ABI
+; CHECK ld3r.16b { v0, v1, v2 }, [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8* %A)
+	ret %struct.__neon_int8x16x3_t  %tmp2
+}
+
+define %struct.__neon_int8x16x4_t @ld4r_16b(i8* %A) nounwind {
+; CHECK: ld4r_16b
+; Make sure we are using the operands defined by the ABI
+; CHECK ld4r.16b { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8* %A)
+	ret %struct.__neon_int8x16x4_t  %tmp2
+}
+
+declare %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8*) nounwind readonly
+declare %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8*) nounwind readonly
+declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8*) nounwind readonly
+
+define %struct.__neon_int16x4x2_t @ld2r_4h(i16* %A) nounwind {
+; CHECK: ld2r_4h
+; Make sure we are using the operands defined by the ABI
+; CHECK ld2r.4h { v0, v1 }, [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16* %A)
+	ret %struct.__neon_int16x4x2_t  %tmp2
+}
+
+define %struct.__neon_int16x4x3_t @ld3r_4h(i16* %A) nounwind {
+; CHECK: ld3r_4h
+; Make sure we are using the operands defined by the ABI
+; CHECK ld3r.4h { v0, v1, v2 }, [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16* %A)
+	ret %struct.__neon_int16x4x3_t  %tmp2
+}
+
+define %struct.__neon_int16x4x4_t @ld4r_4h(i16* %A) nounwind {
+; CHECK: ld4r_4h
+; Make sure we are using the operands defined by the ABI
+; CHECK ld4r.4h { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16* %A)
+	ret %struct.__neon_int16x4x4_t  %tmp2
+}
+
+declare %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16*) nounwind readonly
+declare %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16*) nounwind readonly
+declare %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16*) nounwind readonly
+
+define %struct.__neon_int16x8x2_t @ld2r_8h(i16* %A) nounwind {
+; CHECK: ld2r_8h
+; Make sure we are using the operands defined by the ABI
+; CHECK ld2r.8h { v0, v1 }, [x0]
+; CHECK-NEXT ret
+  %tmp2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16* %A)
+  ret %struct.__neon_int16x8x2_t  %tmp2
+}
+
+define %struct.__neon_int16x8x3_t @ld3r_8h(i16* %A) nounwind {
+; CHECK: ld3r_8h
+; Make sure we are using the operands defined by the ABI
+; CHECK ld3r.8h { v0, v1, v2 }, [x0]
+; CHECK-NEXT ret
+  %tmp2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16* %A)
+  ret %struct.__neon_int16x8x3_t  %tmp2
+}
+
+define %struct.__neon_int16x8x4_t @ld4r_8h(i16* %A) nounwind {
+; CHECK: ld4r_8h
+; Make sure we are using the operands defined by the ABI
+; CHECK ld4r.8h { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT ret
+  %tmp2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16* %A)
+  ret %struct.__neon_int16x8x4_t  %tmp2
+}
+
+declare %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16*) nounwind readonly
+declare %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16*) nounwind readonly
+declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16*) nounwind readonly
+
+define %struct.__neon_int32x2x2_t @ld2r_2s(i32* %A) nounwind {
+; CHECK: ld2r_2s
+; Make sure we are using the operands defined by the ABI
+; CHECK ld2r.2s { v0, v1 }, [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32* %A)
+	ret %struct.__neon_int32x2x2_t  %tmp2
+}
+
+define %struct.__neon_int32x2x3_t @ld3r_2s(i32* %A) nounwind {
+; CHECK: ld3r_2s
+; Make sure we are using the operands defined by the ABI
+; CHECK ld3r.2s { v0, v1, v2 }, [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32* %A)
+	ret %struct.__neon_int32x2x3_t  %tmp2
+}
+
+define %struct.__neon_int32x2x4_t @ld4r_2s(i32* %A) nounwind {
+; CHECK: ld4r_2s
+; Make sure we are using the operands defined by the ABI
+; CHECK ld4r.2s { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32* %A)
+	ret %struct.__neon_int32x2x4_t  %tmp2
+}
+
+declare %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32*) nounwind readonly
+declare %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32*) nounwind readonly
+declare %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32*) nounwind readonly
+
+define %struct.__neon_int32x4x2_t @ld2r_4s(i32* %A) nounwind {
+; CHECK: ld2r_4s
+; Make sure we are using the operands defined by the ABI
+; CHECK ld2r.4s { v0, v1 }, [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32* %A)
+	ret %struct.__neon_int32x4x2_t  %tmp2
+}
+
+define %struct.__neon_int32x4x3_t @ld3r_4s(i32* %A) nounwind {
+; CHECK: ld3r_4s
+; Make sure we are using the operands defined by the ABI
+; CHECK ld3r.4s { v0, v1, v2 }, [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32* %A)
+	ret %struct.__neon_int32x4x3_t  %tmp2
+}
+
+define %struct.__neon_int32x4x4_t @ld4r_4s(i32* %A) nounwind {
+; CHECK: ld4r_4s
+; Make sure we are using the operands defined by the ABI
+; CHECK ld4r.4s { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32* %A)
+	ret %struct.__neon_int32x4x4_t  %tmp2
+}
+
+declare %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32*) nounwind readonly
+declare %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32*) nounwind readonly
+declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32*) nounwind readonly
+
+define %struct.__neon_int64x1x2_t @ld2r_1d(i64* %A) nounwind {
+; CHECK: ld2r_1d
+; Make sure we are using the operands defined by the ABI
+; CHECK ld2r.1d { v0, v1 }, [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64* %A)
+	ret %struct.__neon_int64x1x2_t  %tmp2
+}
+
+define %struct.__neon_int64x1x3_t @ld3r_1d(i64* %A) nounwind {
+; CHECK: ld3r_1d
+; Make sure we are using the operands defined by the ABI
+; CHECK ld3r.1d { v0, v1, v2 }, [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64* %A)
+	ret %struct.__neon_int64x1x3_t  %tmp2
+}
+
+define %struct.__neon_int64x1x4_t @ld4r_1d(i64* %A) nounwind {
+; CHECK: ld4r_1d
+; Make sure we are using the operands defined by the ABI
+; CHECK ld4r.1d { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64* %A)
+	ret %struct.__neon_int64x1x4_t  %tmp2
+}
+
+declare %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64*) nounwind readonly
+declare %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64*) nounwind readonly
+declare %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64*) nounwind readonly
+
+define %struct.__neon_int64x2x2_t @ld2r_2d(i64* %A) nounwind {
+; CHECK: ld2r_2d
+; Make sure we are using the operands defined by the ABI
+; CHECK ld2r.2d { v0, v1 }, [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64* %A)
+	ret %struct.__neon_int64x2x2_t  %tmp2
+}
+
+define %struct.__neon_int64x2x3_t @ld3r_2d(i64* %A) nounwind {
+; CHECK: ld3r_2d
+; Make sure we are using the operands defined by the ABI
+; CHECK ld3r.2d { v0, v1, v2 }, [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64* %A)
+	ret %struct.__neon_int64x2x3_t  %tmp2
+}
+
+define %struct.__neon_int64x2x4_t @ld4r_2d(i64* %A) nounwind {
+; CHECK: ld4r_2d
+; Make sure we are using the operands defined by the ABI
+; CHECK ld4r.2d { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT ret
+	%tmp2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64* %A)
+	ret %struct.__neon_int64x2x4_t  %tmp2
+}
+
+declare %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64*) nounwind readonly
+declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64*) nounwind readonly
+declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64*) nounwind readonly
+
+define <16 x i8> @ld1_16b(<16 x i8> %V, i8* %bar) {
+; CHECK-LABEL: ld1_16b
+; Make sure we are using the operands defined by the ABI
+; CHECK: ld1.b { v0 }[0], [x0]
+; CHECK-NEXT ret
+  %tmp1 = load i8* %bar
+  %tmp2 = insertelement <16 x i8> %V, i8 %tmp1, i32 0
+  ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @ld1_8h(<8 x i16> %V, i16* %bar) {
+; CHECK-LABEL: ld1_8h
+; Make sure we are using the operands defined by the ABI
+; CHECK: ld1.h { v0 }[0], [x0]
+; CHECK-NEXT ret
+  %tmp1 = load i16* %bar
+  %tmp2 = insertelement <8 x i16> %V, i16 %tmp1, i32 0
+  ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @ld1_4s(<4 x i32> %V, i32* %bar) {
+; CHECK-LABEL: ld1_4s
+; Make sure we are using the operands defined by the ABI
+; CHECK: ld1.s { v0 }[0], [x0]
+; CHECK-NEXT ret
+  %tmp1 = load i32* %bar
+  %tmp2 = insertelement <4 x i32> %V, i32 %tmp1, i32 0
+  ret <4 x i32> %tmp2
+}
+
+define <4 x float> @ld1_4s_float(<4 x float> %V, float* %bar) {
+; CHECK-LABEL: ld1_4s_float:
+; Make sure we are using the operands defined by the ABI
+; CHECK: ld1.s { v0 }[0], [x0]
+; CHECK-NEXT ret
+  %tmp1 = load float* %bar
+  %tmp2 = insertelement <4 x float> %V, float %tmp1, i32 0
+  ret <4 x float> %tmp2
+}
+
+define <2 x i64> @ld1_2d(<2 x i64> %V, i64* %bar) {
+; CHECK-LABEL: ld1_2d
+; Make sure we are using the operands defined by the ABI
+; CHECK: ld1.d { v0 }[0], [x0]
+; CHECK-NEXT ret
+  %tmp1 = load i64* %bar
+  %tmp2 = insertelement <2 x i64> %V, i64 %tmp1, i32 0
+  ret <2 x i64> %tmp2
+}
+
+define <2 x double> @ld1_2d_double(<2 x double> %V, double* %bar) {
+; CHECK-LABEL: ld1_2d_double:
+; Make sure we are using the operands defined by the ABI
+; CHECK: ld1.d { v0 }[0], [x0]
+; CHECK-NEXT ret
+  %tmp1 = load double* %bar
+  %tmp2 = insertelement <2 x double> %V, double %tmp1, i32 0
+  ret <2 x double> %tmp2
+}
+
+define <1 x i64> @ld1_1d(<1 x i64>* %p) {
+; CHECK-LABEL: ld1_1d
+; Make sure we are using the operands defined by the ABI
+; CHECK: ldr [[REG:d[0-9]+]], [x0]
+; CHECK-NEXT: ret
+  %tmp = load <1 x i64>* %p, align 8
+  ret <1 x i64> %tmp
+}
+
+define <8 x i8> @ld1_8b(<8 x i8> %V, i8* %bar) {
+; CHECK-LABEL: ld1_8b
+; Make sure we are using the operands defined by the ABI
+; CHECK: ld1.b { v0 }[0], [x0]
+; CHECK-NEXT ret
+  %tmp1 = load i8* %bar
+  %tmp2 = insertelement <8 x i8> %V, i8 %tmp1, i32 0
+  ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @ld1_4h(<4 x i16> %V, i16* %bar) {
+; CHECK-LABEL: ld1_4h
+; Make sure we are using the operands defined by the ABI
+; CHECK: ld1.h { v0 }[0], [x0]
+; CHECK-NEXT ret
+  %tmp1 = load i16* %bar
+  %tmp2 = insertelement <4 x i16> %V, i16 %tmp1, i32 0
+  ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @ld1_2s(<2 x i32> %V, i32* %bar) {
+; CHECK-LABEL: ld1_2s:
+; Make sure we are using the operands defined by the ABI
+; CHECK: ld1.s { v0 }[0], [x0]
+; CHECK-NEXT ret
+  %tmp1 = load i32* %bar
+  %tmp2 = insertelement <2 x i32> %V, i32 %tmp1, i32 0
+  ret <2 x i32> %tmp2
+}
+
+define <2 x float> @ld1_2s_float(<2 x float> %V, float* %bar) {
+; CHECK-LABEL: ld1_2s_float:
+; Make sure we are using the operands defined by the ABI
+; CHECK: ld1.s { v0 }[0], [x0]
+; CHECK-NEXT ret
+  %tmp1 = load float* %bar
+  %tmp2 = insertelement <2 x float> %V, float %tmp1, i32 0
+  ret <2 x float> %tmp2
+}
+
+
+; Add rdar://13098923 test case: vld1_dup_u32 doesn't generate ld1r.2s
+define void @ld1r_2s_from_dup(i8* nocapture %a, i8* nocapture %b, i16* nocapture %diff) nounwind ssp {
+entry:
+; CHECK: ld1r_2s_from_dup
+; CHECK: ld1r.2s { [[ARG1:v[0-9]+]] }, [x0]
+; CHECK-NEXT: ld1r.2s { [[ARG2:v[0-9]+]] }, [x1]
+; CHECK-NEXT: usubl.8h v[[RESREGNUM:[0-9]+]], [[ARG1]], [[ARG2]]
+; CHECK-NEXT: str d[[RESREGNUM]], [x2]
+; CHECK-NEXT: ret
+  %tmp = bitcast i8* %a to i32*
+  %tmp1 = load i32* %tmp, align 4
+  %tmp2 = insertelement <2 x i32> undef, i32 %tmp1, i32 0
+  %lane = shufflevector <2 x i32> %tmp2, <2 x i32> undef, <2 x i32> zeroinitializer
+  %tmp3 = bitcast <2 x i32> %lane to <8 x i8>
+  %tmp4 = bitcast i8* %b to i32*
+  %tmp5 = load i32* %tmp4, align 4
+  %tmp6 = insertelement <2 x i32> undef, i32 %tmp5, i32 0
+  %lane1 = shufflevector <2 x i32> %tmp6, <2 x i32> undef, <2 x i32> zeroinitializer
+  %tmp7 = bitcast <2 x i32> %lane1 to <8 x i8>
+  %vmovl.i.i = zext <8 x i8> %tmp3 to <8 x i16>
+  %vmovl.i4.i = zext <8 x i8> %tmp7 to <8 x i16>
+  %sub.i = sub <8 x i16> %vmovl.i.i, %vmovl.i4.i
+  %tmp8 = bitcast <8 x i16> %sub.i to <2 x i64>
+  %shuffle.i = shufflevector <2 x i64> %tmp8, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp9 = bitcast <1 x i64> %shuffle.i to <4 x i16>
+  %tmp10 = bitcast i16* %diff to <4 x i16>*
+  store <4 x i16> %tmp9, <4 x i16>* %tmp10, align 8
+  ret void
+}
+
+; Tests for rdar://11947069: vld1_dup_* and vld1q_dup_* code gen is suboptimal
+define <4 x float> @ld1r_4s_float(float* nocapture %x) {
+entry:
+; CHECK-LABEL: ld1r_4s_float
+; Make sure we are using the operands defined by the ABI
+; CHECK: ld1r.4s { v0 }, [x0]
+; CHECK-NEXT ret
+  %tmp = load float* %x, align 4
+  %tmp1 = insertelement <4 x float> undef, float %tmp, i32 0
+  %tmp2 = insertelement <4 x float> %tmp1, float %tmp, i32 1
+  %tmp3 = insertelement <4 x float> %tmp2, float %tmp, i32 2
+  %tmp4 = insertelement <4 x float> %tmp3, float %tmp, i32 3
+  ret <4 x float> %tmp4
+}
+
+define <2 x float> @ld1r_2s_float(float* nocapture %x) {
+entry:
+; CHECK-LABEL: ld1r_2s_float
+; Make sure we are using the operands defined by the ABI
+; CHECK: ld1r.2s { v0 }, [x0]
+; CHECK-NEXT ret
+  %tmp = load float* %x, align 4
+  %tmp1 = insertelement <2 x float> undef, float %tmp, i32 0
+  %tmp2 = insertelement <2 x float> %tmp1, float %tmp, i32 1
+  ret <2 x float> %tmp2
+}
+
+define <2 x double> @ld1r_2d_double(double* nocapture %x) {
+entry:
+; CHECK-LABEL: ld1r_2d_double
+; Make sure we are using the operands defined by the ABI
+; CHECK: ld1r.2d { v0 }, [x0]
+; CHECK-NEXT ret
+  %tmp = load double* %x, align 4
+  %tmp1 = insertelement <2 x double> undef, double %tmp, i32 0
+  %tmp2 = insertelement <2 x double> %tmp1, double %tmp, i32 1
+  ret <2 x double> %tmp2
+}
+
+define <1 x double> @ld1r_1d_double(double* nocapture %x) {
+entry:
+; CHECK-LABEL: ld1r_1d_double
+; Make sure we are using the operands defined by the ABI
+; CHECK: ldr d0, [x0]
+; CHECK-NEXT ret
+  %tmp = load double* %x, align 4
+  %tmp1 = insertelement <1 x double> undef, double %tmp, i32 0
+  ret <1 x double> %tmp1
+}
+
+define <4 x float> @ld1r_4s_float_shuff(float* nocapture %x) {
+entry:
+; CHECK-LABEL: ld1r_4s_float_shuff
+; Make sure we are using the operands defined by the ABI
+; CHECK: ld1r.4s { v0 }, [x0]
+; CHECK-NEXT ret
+  %tmp = load float* %x, align 4
+  %tmp1 = insertelement <4 x float> undef, float %tmp, i32 0
+  %lane = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
+  ret <4 x float> %lane
+}
+
+define <2 x float> @ld1r_2s_float_shuff(float* nocapture %x) {
+entry:
+; CHECK-LABEL: ld1r_2s_float_shuff
+; Make sure we are using the operands defined by the ABI
+; CHECK: ld1r.2s { v0 }, [x0]
+; CHECK-NEXT ret
+  %tmp = load float* %x, align 4
+  %tmp1 = insertelement <2 x float> undef, float %tmp, i32 0
+  %lane = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
+  ret <2 x float> %lane
+}
+
+define <2 x double> @ld1r_2d_double_shuff(double* nocapture %x) {
+entry:
+; CHECK-LABEL: ld1r_2d_double_shuff
+; Make sure we are using the operands defined by the ABI
+; CHECK: ld1r.2d { v0 }, [x0]
+; CHECK-NEXT ret
+  %tmp = load double* %x, align 4
+  %tmp1 = insertelement <2 x double> undef, double %tmp, i32 0
+  %lane = shufflevector <2 x double> %tmp1, <2 x double> undef, <2 x i32> zeroinitializer
+  ret <2 x double> %lane
+}
+
+define <1 x double> @ld1r_1d_double_shuff(double* nocapture %x) {
+entry:
+; CHECK-LABEL: ld1r_1d_double_shuff
+; Make sure we are using the operands defined by the ABI
+; CHECK: ldr d0, [x0]
+; CHECK-NEXT ret
+  %tmp = load double* %x, align 4
+  %tmp1 = insertelement <1 x double> undef, double %tmp, i32 0
+  %lane = shufflevector <1 x double> %tmp1, <1 x double> undef, <1 x i32> zeroinitializer
+  ret <1 x double> %lane
+}
+
+%struct.__neon_float32x2x2_t = type { <2 x float>,  <2 x float> }
+%struct.__neon_float32x2x3_t = type { <2 x float>,  <2 x float>,  <2 x float> }
+%struct.__neon_float32x2x4_t = type { <2 x float>,  <2 x float>, <2 x float>,  <2 x float> }
+
+declare %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8*) nounwind readonly
+declare %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16*) nounwind readonly
+declare %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32*) nounwind readonly
+declare %struct.__neon_float32x2x2_t @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float*) nounwind readonly
+declare %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64*) nounwind readonly
+declare %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double*) nounwind readonly
+
+define %struct.__neon_int8x8x2_t @ld1_x2_v8i8(i8* %addr) {
+; CHECK-LABEL: ld1_x2_v8i8:
+; CHECK: ld1.8b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  %val = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8* %addr)
+  ret %struct.__neon_int8x8x2_t %val
+}
+
+define %struct.__neon_int16x4x2_t @ld1_x2_v4i16(i16* %addr) {
+; CHECK-LABEL: ld1_x2_v4i16:
+; CHECK: ld1.4h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  %val = call %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* %addr)
+  ret %struct.__neon_int16x4x2_t %val
+}
+
+define %struct.__neon_int32x2x2_t @ld1_x2_v2i32(i32* %addr) {
+; CHECK-LABEL: ld1_x2_v2i32:
+; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  %val = call %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32* %addr)
+  ret %struct.__neon_int32x2x2_t %val
+}
+
+define %struct.__neon_float32x2x2_t @ld1_x2_v2f32(float* %addr) {
+; CHECK-LABEL: ld1_x2_v2f32:
+; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  %val = call %struct.__neon_float32x2x2_t @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float* %addr)
+  ret %struct.__neon_float32x2x2_t %val
+}
+
+define %struct.__neon_int64x1x2_t @ld1_x2_v1i64(i64* %addr) {
+; CHECK-LABEL: ld1_x2_v1i64:
+; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  %val = call %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* %addr)
+  ret %struct.__neon_int64x1x2_t %val
+}
+
+define %struct.__neon_float64x1x2_t @ld1_x2_v1f64(double* %addr) {
+; CHECK-LABEL: ld1_x2_v1f64:
+; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  %val = call %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double* %addr)
+  ret %struct.__neon_float64x1x2_t %val
+}
+
+
+%struct.__neon_float32x4x2_t = type { <4 x float>,  <4 x float> }
+%struct.__neon_float32x4x3_t = type { <4 x float>,  <4 x float>,  <4 x float> }
+%struct.__neon_float32x4x4_t = type { <4 x float>,  <4 x float>, <4 x float>,  <4 x float> }
+
+%struct.__neon_float64x2x2_t = type { <2 x double>,  <2 x double> }
+%struct.__neon_float64x2x3_t = type { <2 x double>,  <2 x double>,  <2 x double> }
+%struct.__neon_float64x2x4_t = type { <2 x double>,  <2 x double>, <2 x double>,  <2 x double> }
+
+declare %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8*) nounwind readonly
+declare %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16*) nounwind readonly
+declare %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32*) nounwind readonly
+declare %struct.__neon_float32x4x2_t @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float*) nounwind readonly
+declare %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64*) nounwind readonly
+declare %struct.__neon_float64x2x2_t @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double*) nounwind readonly
+
+define %struct.__neon_int8x16x2_t @ld1_x2_v16i8(i8* %addr) {
+; CHECK-LABEL: ld1_x2_v16i8:
+; CHECK: ld1.16b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  %val = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %addr)
+  ret %struct.__neon_int8x16x2_t %val
+}
+
+define %struct.__neon_int16x8x2_t @ld1_x2_v8i16(i16* %addr) {
+; CHECK-LABEL: ld1_x2_v8i16:
+; CHECK: ld1.8h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  %val = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* %addr)
+  ret %struct.__neon_int16x8x2_t %val
+}
+
+define %struct.__neon_int32x4x2_t @ld1_x2_v4i32(i32* %addr) {
+; CHECK-LABEL: ld1_x2_v4i32:
+; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  %val = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32* %addr)
+  ret %struct.__neon_int32x4x2_t %val
+}
+
+define %struct.__neon_float32x4x2_t @ld1_x2_v4f32(float* %addr) {
+; CHECK-LABEL: ld1_x2_v4f32:
+; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  %val = call %struct.__neon_float32x4x2_t @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float* %addr)
+  ret %struct.__neon_float32x4x2_t %val
+}
+
+define %struct.__neon_int64x2x2_t @ld1_x2_v2i64(i64* %addr) {
+; CHECK-LABEL: ld1_x2_v2i64:
+; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  %val = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* %addr)
+  ret %struct.__neon_int64x2x2_t %val
+}
+
+define %struct.__neon_float64x2x2_t @ld1_x2_v2f64(double* %addr) {
+; CHECK-LABEL: ld1_x2_v2f64:
+; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  %val = call %struct.__neon_float64x2x2_t @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double* %addr)
+  ret %struct.__neon_float64x2x2_t %val
+}
+
+declare %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8*) nounwind readonly
+declare %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16*) nounwind readonly
+declare %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32*) nounwind readonly
+declare %struct.__neon_float32x2x3_t @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float*) nounwind readonly
+declare %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64*) nounwind readonly
+declare %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double*) nounwind readonly
+
+define %struct.__neon_int8x8x3_t @ld1_x3_v8i8(i8* %addr) {
+; CHECK-LABEL: ld1_x3_v8i8:
+; CHECK: ld1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  %val = call %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8* %addr)
+  ret %struct.__neon_int8x8x3_t %val
+}
+
+define %struct.__neon_int16x4x3_t @ld1_x3_v4i16(i16* %addr) {
+; CHECK-LABEL: ld1_x3_v4i16:
+; CHECK: ld1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  %val = call %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* %addr)
+  ret %struct.__neon_int16x4x3_t %val
+}
+
+define %struct.__neon_int32x2x3_t @ld1_x3_v2i32(i32* %addr) {
+; CHECK-LABEL: ld1_x3_v2i32:
+; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  %val = call %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32* %addr)
+  ret %struct.__neon_int32x2x3_t %val
+}
+
+define %struct.__neon_float32x2x3_t @ld1_x3_v2f32(float* %addr) {
+; CHECK-LABEL: ld1_x3_v2f32:
+; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  %val = call %struct.__neon_float32x2x3_t @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float* %addr)
+  ret %struct.__neon_float32x2x3_t %val
+}
+
+define %struct.__neon_int64x1x3_t @ld1_x3_v1i64(i64* %addr) {
+; CHECK-LABEL: ld1_x3_v1i64:
+; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  %val = call %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* %addr)
+  ret %struct.__neon_int64x1x3_t %val
+}
+
+define %struct.__neon_float64x1x3_t @ld1_x3_v1f64(double* %addr) {
+; CHECK-LABEL: ld1_x3_v1f64:
+; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  %val = call %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double* %addr)
+  ret %struct.__neon_float64x1x3_t %val
+}
+
+declare %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8*) nounwind readonly
+declare %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16*) nounwind readonly
+declare %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32*) nounwind readonly
+declare %struct.__neon_float32x4x3_t @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float*) nounwind readonly
+declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64*) nounwind readonly
+declare %struct.__neon_float64x2x3_t @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double*) nounwind readonly
+
+define %struct.__neon_int8x16x3_t @ld1_x3_v16i8(i8* %addr) {
+; CHECK-LABEL: ld1_x3_v16i8:
+; CHECK: ld1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  %val = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8* %addr)
+  ret %struct.__neon_int8x16x3_t %val
+}
+
+define %struct.__neon_int16x8x3_t @ld1_x3_v8i16(i16* %addr) {
+; CHECK-LABEL: ld1_x3_v8i16:
+; CHECK: ld1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  %val = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* %addr)
+  ret %struct.__neon_int16x8x3_t %val
+}
+
+define %struct.__neon_int32x4x3_t @ld1_x3_v4i32(i32* %addr) {
+; CHECK-LABEL: ld1_x3_v4i32:
+; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  %val = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32* %addr)
+  ret %struct.__neon_int32x4x3_t %val
+}
+
+define %struct.__neon_float32x4x3_t @ld1_x3_v4f32(float* %addr) {
+; CHECK-LABEL: ld1_x3_v4f32:
+; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  %val = call %struct.__neon_float32x4x3_t @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float* %addr)
+  ret %struct.__neon_float32x4x3_t %val
+}
+
+define %struct.__neon_int64x2x3_t @ld1_x3_v2i64(i64* %addr) {
+; CHECK-LABEL: ld1_x3_v2i64:
+; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  %val = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* %addr)
+  ret %struct.__neon_int64x2x3_t %val
+}
+
+define %struct.__neon_float64x2x3_t @ld1_x3_v2f64(double* %addr) {
+; CHECK-LABEL: ld1_x3_v2f64:
+; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  %val = call %struct.__neon_float64x2x3_t @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double* %addr)
+  ret %struct.__neon_float64x2x3_t %val
+}
+
+declare %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8*) nounwind readonly
+declare %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16*) nounwind readonly
+declare %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32*) nounwind readonly
+declare %struct.__neon_float32x2x4_t @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float*) nounwind readonly
+declare %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64*) nounwind readonly
+declare %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double*) nounwind readonly
+
+define %struct.__neon_int8x8x4_t @ld1_x4_v8i8(i8* %addr) {
+; CHECK-LABEL: ld1_x4_v8i8:
+; CHECK: ld1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  %val = call %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8* %addr)
+  ret %struct.__neon_int8x8x4_t %val
+}
+
+define %struct.__neon_int16x4x4_t @ld1_x4_v4i16(i16* %addr) {
+; CHECK-LABEL: ld1_x4_v4i16:
+; CHECK: ld1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  %val = call %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* %addr)
+  ret %struct.__neon_int16x4x4_t %val
+}
+
+define %struct.__neon_int32x2x4_t @ld1_x4_v2i32(i32* %addr) {
+; CHECK-LABEL: ld1_x4_v2i32:
+; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  %val = call %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32* %addr)
+  ret %struct.__neon_int32x2x4_t %val
+}
+
+define %struct.__neon_float32x2x4_t @ld1_x4_v2f32(float* %addr) {
+; CHECK-LABEL: ld1_x4_v2f32:
+; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  %val = call %struct.__neon_float32x2x4_t @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float* %addr)
+  ret %struct.__neon_float32x2x4_t %val
+}
+
+define %struct.__neon_int64x1x4_t @ld1_x4_v1i64(i64* %addr) {
+; CHECK-LABEL: ld1_x4_v1i64:
+; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  %val = call %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64* %addr)
+  ret %struct.__neon_int64x1x4_t %val
+}
+
+define %struct.__neon_float64x1x4_t @ld1_x4_v1f64(double* %addr) {
+; CHECK-LABEL: ld1_x4_v1f64:
+; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  %val = call %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double* %addr)
+  ret %struct.__neon_float64x1x4_t %val
+}
+
+declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8*) nounwind readonly
+declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16*) nounwind readonly
+declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32*) nounwind readonly
+declare %struct.__neon_float32x4x4_t @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float*) nounwind readonly
+declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64*) nounwind readonly
+declare %struct.__neon_float64x2x4_t @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double*) nounwind readonly
+
+define %struct.__neon_int8x16x4_t @ld1_x4_v16i8(i8* %addr) {
+; CHECK-LABEL: ld1_x4_v16i8:
+; CHECK: ld1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  %val = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8* %addr)
+  ret %struct.__neon_int8x16x4_t %val
+}
+
+define %struct.__neon_int16x8x4_t @ld1_x4_v8i16(i16* %addr) {
+; CHECK-LABEL: ld1_x4_v8i16:
+; CHECK: ld1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  %val = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* %addr)
+  ret %struct.__neon_int16x8x4_t %val
+}
+
+define %struct.__neon_int32x4x4_t @ld1_x4_v4i32(i32* %addr) {
+; CHECK-LABEL: ld1_x4_v4i32:
+; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  %val = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32* %addr)
+  ret %struct.__neon_int32x4x4_t %val
+}
+
+define %struct.__neon_float32x4x4_t @ld1_x4_v4f32(float* %addr) {
+; CHECK-LABEL: ld1_x4_v4f32:
+; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  %val = call %struct.__neon_float32x4x4_t @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float* %addr)
+  ret %struct.__neon_float32x4x4_t %val
+}
+
+define %struct.__neon_int64x2x4_t @ld1_x4_v2i64(i64* %addr) {
+; CHECK-LABEL: ld1_x4_v2i64:
+; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  %val = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64* %addr)
+  ret %struct.__neon_int64x2x4_t %val
+}
+
+define %struct.__neon_float64x2x4_t @ld1_x4_v2f64(double* %addr) {
+; CHECK-LABEL: ld1_x4_v2f64:
+; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  %val = call %struct.__neon_float64x2x4_t @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double* %addr)
+  ret %struct.__neon_float64x2x4_t %val
+}
diff --git a/test/CodeGen/AArch64/arm64-ldp.ll b/test/CodeGen/AArch64/arm64-ldp.ll
new file mode 100644
index 0000000..5a98626
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-ldp.ll
@@ -0,0 +1,149 @@
+; RUN: llc < %s -march=arm64 -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-unscaled-mem-op=true\
+; RUN:   -verify-machineinstrs | FileCheck -check-prefix=LDUR_CHK %s
+
+; CHECK: ldp_int
+; CHECK: ldp
+define i32 @ldp_int(i32* %p) nounwind {
+  %tmp = load i32* %p, align 4
+  %add.ptr = getelementptr inbounds i32* %p, i64 1
+  %tmp1 = load i32* %add.ptr, align 4
+  %add = add nsw i32 %tmp1, %tmp
+  ret i32 %add
+}
+
+; CHECK: ldp_long
+; CHECK: ldp
+define i64 @ldp_long(i64* %p) nounwind {
+  %tmp = load i64* %p, align 8
+  %add.ptr = getelementptr inbounds i64* %p, i64 1
+  %tmp1 = load i64* %add.ptr, align 8
+  %add = add nsw i64 %tmp1, %tmp
+  ret i64 %add
+}
+
+; CHECK: ldp_float
+; CHECK: ldp
+define float @ldp_float(float* %p) nounwind {
+  %tmp = load float* %p, align 4
+  %add.ptr = getelementptr inbounds float* %p, i64 1
+  %tmp1 = load float* %add.ptr, align 4
+  %add = fadd float %tmp, %tmp1
+  ret float %add
+}
+
+; CHECK: ldp_double
+; CHECK: ldp
+define double @ldp_double(double* %p) nounwind {
+  %tmp = load double* %p, align 8
+  %add.ptr = getelementptr inbounds double* %p, i64 1
+  %tmp1 = load double* %add.ptr, align 8
+  %add = fadd double %tmp, %tmp1
+  ret double %add
+}
+
+; Test the load/store optimizer---combine ldurs into a ldp, if appropriate
+define i32 @ldur_int(i32* %a) nounwind {
+; LDUR_CHK: ldur_int
+; LDUR_CHK: ldp     [[DST1:w[0-9]+]], [[DST2:w[0-9]+]], [x0, #-8]
+; LDUR_CHK-NEXT: add     w{{[0-9]+}}, [[DST2]], [[DST1]]
+; LDUR_CHK-NEXT: ret
+  %p1 = getelementptr inbounds i32* %a, i32 -1
+  %tmp1 = load i32* %p1, align 2
+  %p2 = getelementptr inbounds i32* %a, i32 -2
+  %tmp2 = load i32* %p2, align 2
+  %tmp3 = add i32 %tmp1, %tmp2
+  ret i32 %tmp3
+}
+
+define i64 @ldur_long(i64* %a) nounwind ssp {
+; LDUR_CHK: ldur_long
+; LDUR_CHK: ldp     [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-16]
+; LDUR_CHK-NEXT: add     x{{[0-9]+}}, [[DST2]], [[DST1]]
+; LDUR_CHK-NEXT: ret
+  %p1 = getelementptr inbounds i64* %a, i64 -1
+  %tmp1 = load i64* %p1, align 2
+  %p2 = getelementptr inbounds i64* %a, i64 -2
+  %tmp2 = load i64* %p2, align 2
+  %tmp3 = add i64 %tmp1, %tmp2
+  ret i64 %tmp3
+}
+
+define float @ldur_float(float* %a) {
+; LDUR_CHK: ldur_float
+; LDUR_CHK: ldp     [[DST1:s[0-9]+]], [[DST2:s[0-9]+]], [x0, #-8]
+; LDUR_CHK-NEXT: add     s{{[0-9]+}}, [[DST2]], [[DST1]]
+; LDUR_CHK-NEXT: ret
+  %p1 = getelementptr inbounds float* %a, i64 -1
+  %tmp1 = load float* %p1, align 2
+  %p2 = getelementptr inbounds float* %a, i64 -2
+  %tmp2 = load float* %p2, align 2
+  %tmp3 = fadd float %tmp1, %tmp2
+  ret float %tmp3
+}
+
+define double @ldur_double(double* %a) {
+; LDUR_CHK: ldur_double
+; LDUR_CHK: ldp     [[DST1:d[0-9]+]], [[DST2:d[0-9]+]], [x0, #-16]
+; LDUR_CHK-NEXT: add     d{{[0-9]+}}, [[DST2]], [[DST1]]
+; LDUR_CHK-NEXT: ret
+  %p1 = getelementptr inbounds double* %a, i64 -1
+  %tmp1 = load double* %p1, align 2
+  %p2 = getelementptr inbounds double* %a, i64 -2
+  %tmp2 = load double* %p2, align 2
+  %tmp3 = fadd double %tmp1, %tmp2
+  ret double %tmp3
+}
+
+; Now check some boundary conditions
+define i64 @pairUpBarelyIn(i64* %a) nounwind ssp {
+; LDUR_CHK: pairUpBarelyIn
+; LDUR_CHK-NOT: ldur
+; LDUR_CHK: ldp     [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-256]
+; LDUR_CHK-NEXT: add     x{{[0-9]+}}, [[DST2]], [[DST1]]
+; LDUR_CHK-NEXT: ret
+  %p1 = getelementptr inbounds i64* %a, i64 -31
+  %tmp1 = load i64* %p1, align 2
+  %p2 = getelementptr inbounds i64* %a, i64 -32
+  %tmp2 = load i64* %p2, align 2
+  %tmp3 = add i64 %tmp1, %tmp2
+  ret i64 %tmp3
+}
+
+define i64 @pairUpBarelyOut(i64* %a) nounwind ssp {
+; LDUR_CHK: pairUpBarelyOut
+; LDUR_CHK-NOT: ldp
+; Don't be fragile about which loads or manipulations of the base register
+; are used---just check that there isn't an ldp before the add
+; LDUR_CHK: add
+; LDUR_CHK-NEXT: ret
+  %p1 = getelementptr inbounds i64* %a, i64 -32
+  %tmp1 = load i64* %p1, align 2
+  %p2 = getelementptr inbounds i64* %a, i64 -33
+  %tmp2 = load i64* %p2, align 2
+  %tmp3 = add i64 %tmp1, %tmp2
+  ret i64 %tmp3
+}
+
+define i64 @pairUpNotAligned(i64* %a) nounwind ssp {
+; LDUR_CHK: pairUpNotAligned
+; LDUR_CHK-NOT: ldp
+; LDUR_CHK: ldur
+; LDUR_CHK-NEXT: ldur
+; LDUR_CHK-NEXT: add
+; LDUR_CHK-NEXT: ret
+  %p1 = getelementptr inbounds i64* %a, i64 -18
+  %bp1 = bitcast i64* %p1 to i8*
+  %bp1p1 = getelementptr inbounds i8* %bp1, i64 1
+  %dp1 = bitcast i8* %bp1p1 to i64*
+  %tmp1 = load i64* %dp1, align 1
+
+  %p2 = getelementptr inbounds i64* %a, i64 -17
+  %bp2 = bitcast i64* %p2 to i8*
+  %bp2p1 = getelementptr inbounds i8* %bp2, i64 1
+  %dp2 = bitcast i8* %bp2p1 to i64*
+  %tmp2 = load i64* %dp2, align 1
+
+  %tmp3 = add i64 %tmp1, %tmp2
+  ret i64 %tmp3
+}
diff --git a/test/CodeGen/ARM64/ldur.ll b/test/CodeGen/AArch64/arm64-ldur.ll
index 2848c06..2848c06 100644
--- a/test/CodeGen/ARM64/ldur.ll
+++ b/test/CodeGen/AArch64/arm64-ldur.ll
diff --git a/test/CodeGen/AArch64/arm64-ldxr-stxr.ll b/test/CodeGen/AArch64/arm64-ldxr-stxr.ll
new file mode 100644
index 0000000..9093df2
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-ldxr-stxr.ll
@@ -0,0 +1,270 @@
+; RUN: llc < %s -mtriple=arm64-linux-gnu | FileCheck %s
+
+%0 = type { i64, i64 }
+
+define i128 @f0(i8* %p) nounwind readonly {
+; CHECK-LABEL: f0:
+; CHECK: ldxp {{x[0-9]+}}, {{x[0-9]+}}, [x0]
+entry:
+  %ldrexd = tail call %0 @llvm.aarch64.ldxp(i8* %p)
+  %0 = extractvalue %0 %ldrexd, 1
+  %1 = extractvalue %0 %ldrexd, 0
+  %2 = zext i64 %0 to i128
+  %3 = zext i64 %1 to i128
+  %shl = shl nuw i128 %2, 64
+  %4 = or i128 %shl, %3
+  ret i128 %4
+}
+
+define i32 @f1(i8* %ptr, i128 %val) nounwind {
+; CHECK-LABEL: f1:
+; CHECK: stxp {{w[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, [x0]
+entry:
+  %tmp4 = trunc i128 %val to i64
+  %tmp6 = lshr i128 %val, 64
+  %tmp7 = trunc i128 %tmp6 to i64
+  %strexd = tail call i32 @llvm.aarch64.stxp(i64 %tmp4, i64 %tmp7, i8* %ptr)
+  ret i32 %strexd
+}
+
+declare %0 @llvm.aarch64.ldxp(i8*) nounwind
+declare i32 @llvm.aarch64.stxp(i64, i64, i8*) nounwind
+
+@var = global i64 0, align 8
+
+define void @test_load_i8(i8* %addr) {
+; CHECK-LABEL: test_load_i8:
+; CHECK: ldxrb w[[LOADVAL:[0-9]+]], [x0]
+; CHECK-NOT: uxtb
+; CHECK-NOT: and
+; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var]
+
+  %val = call i64 @llvm.aarch64.ldxr.p0i8(i8* %addr)
+  %shortval = trunc i64 %val to i8
+  %extval = zext i8 %shortval to i64
+  store i64 %extval, i64* @var, align 8
+  ret void
+}
+
+define void @test_load_i16(i16* %addr) {
+; CHECK-LABEL: test_load_i16:
+; CHECK: ldxrh w[[LOADVAL:[0-9]+]], [x0]
+; CHECK-NOT: uxth
+; CHECK-NOT: and
+; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var]
+
+  %val = call i64 @llvm.aarch64.ldxr.p0i16(i16* %addr)
+  %shortval = trunc i64 %val to i16
+  %extval = zext i16 %shortval to i64
+  store i64 %extval, i64* @var, align 8
+  ret void
+}
+
+define void @test_load_i32(i32* %addr) {
+; CHECK-LABEL: test_load_i32:
+; CHECK: ldxr w[[LOADVAL:[0-9]+]], [x0]
+; CHECK-NOT: uxtw
+; CHECK-NOT: and
+; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var]
+
+  %val = call i64 @llvm.aarch64.ldxr.p0i32(i32* %addr)
+  %shortval = trunc i64 %val to i32
+  %extval = zext i32 %shortval to i64
+  store i64 %extval, i64* @var, align 8
+  ret void
+}
+
+define void @test_load_i64(i64* %addr) {
+; CHECK-LABEL: test_load_i64:
+; CHECK: ldxr x[[LOADVAL:[0-9]+]], [x0]
+; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var]
+
+  %val = call i64 @llvm.aarch64.ldxr.p0i64(i64* %addr)
+  store i64 %val, i64* @var, align 8
+  ret void
+}
+
+
+declare i64 @llvm.aarch64.ldxr.p0i8(i8*) nounwind
+declare i64 @llvm.aarch64.ldxr.p0i16(i16*) nounwind
+declare i64 @llvm.aarch64.ldxr.p0i32(i32*) nounwind
+declare i64 @llvm.aarch64.ldxr.p0i64(i64*) nounwind
+
+define i32 @test_store_i8(i32, i8 %val, i8* %addr) {
+; CHECK-LABEL: test_store_i8:
+; CHECK-NOT: uxtb
+; CHECK-NOT: and
+; CHECK: stxrb w0, w1, [x2]
+  %extval = zext i8 %val to i64
+  %res = call i32 @llvm.aarch64.stxr.p0i8(i64 %extval, i8* %addr)
+  ret i32 %res
+}
+
+define i32 @test_store_i16(i32, i16 %val, i16* %addr) {
+; CHECK-LABEL: test_store_i16:
+; CHECK-NOT: uxth
+; CHECK-NOT: and
+; CHECK: stxrh w0, w1, [x2]
+  %extval = zext i16 %val to i64
+  %res = call i32 @llvm.aarch64.stxr.p0i16(i64 %extval, i16* %addr)
+  ret i32 %res
+}
+
+define i32 @test_store_i32(i32, i32 %val, i32* %addr) {
+; CHECK-LABEL: test_store_i32:
+; CHECK-NOT: uxtw
+; CHECK-NOT: and
+; CHECK: stxr w0, w1, [x2]
+  %extval = zext i32 %val to i64
+  %res = call i32 @llvm.aarch64.stxr.p0i32(i64 %extval, i32* %addr)
+  ret i32 %res
+}
+
+define i32 @test_store_i64(i32, i64 %val, i64* %addr) {
+; CHECK-LABEL: test_store_i64:
+; CHECK: stxr w0, x1, [x2]
+  %res = call i32 @llvm.aarch64.stxr.p0i64(i64 %val, i64* %addr)
+  ret i32 %res
+}
+
+declare i32 @llvm.aarch64.stxr.p0i8(i64, i8*) nounwind
+declare i32 @llvm.aarch64.stxr.p0i16(i64, i16*) nounwind
+declare i32 @llvm.aarch64.stxr.p0i32(i64, i32*) nounwind
+declare i32 @llvm.aarch64.stxr.p0i64(i64, i64*) nounwind
+
+; CHECK: test_clear:
+; CHECK: clrex
+define void @test_clear() {
+  call void @llvm.aarch64.clrex()
+  ret void
+}
+
+declare void @llvm.aarch64.clrex() nounwind
+
+define i128 @test_load_acquire_i128(i8* %p) nounwind readonly {
+; CHECK-LABEL: test_load_acquire_i128:
+; CHECK: ldaxp {{x[0-9]+}}, {{x[0-9]+}}, [x0]
+entry:
+  %ldrexd = tail call %0 @llvm.aarch64.ldaxp(i8* %p)
+  %0 = extractvalue %0 %ldrexd, 1
+  %1 = extractvalue %0 %ldrexd, 0
+  %2 = zext i64 %0 to i128
+  %3 = zext i64 %1 to i128
+  %shl = shl nuw i128 %2, 64
+  %4 = or i128 %shl, %3
+  ret i128 %4
+}
+
+define i32 @test_store_release_i128(i8* %ptr, i128 %val) nounwind {
+; CHECK-LABEL: test_store_release_i128:
+; CHECK: stlxp {{w[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, [x0]
+entry:
+  %tmp4 = trunc i128 %val to i64
+  %tmp6 = lshr i128 %val, 64
+  %tmp7 = trunc i128 %tmp6 to i64
+  %strexd = tail call i32 @llvm.aarch64.stlxp(i64 %tmp4, i64 %tmp7, i8* %ptr)
+  ret i32 %strexd
+}
+
+declare %0 @llvm.aarch64.ldaxp(i8*) nounwind
+declare i32 @llvm.aarch64.stlxp(i64, i64, i8*) nounwind
+
+define void @test_load_acquire_i8(i8* %addr) {
+; CHECK-LABEL: test_load_acquire_i8:
+; CHECK: ldaxrb w[[LOADVAL:[0-9]+]], [x0]
+; CHECK-NOT: uxtb
+; CHECK-NOT: and
+; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var]
+
+  %val = call i64 @llvm.aarch64.ldaxr.p0i8(i8* %addr)
+  %shortval = trunc i64 %val to i8
+  %extval = zext i8 %shortval to i64
+  store i64 %extval, i64* @var, align 8
+  ret void
+}
+
+define void @test_load_acquire_i16(i16* %addr) {
+; CHECK-LABEL: test_load_acquire_i16:
+; CHECK: ldaxrh w[[LOADVAL:[0-9]+]], [x0]
+; CHECK-NOT: uxth
+; CHECK-NOT: and
+; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var]
+
+  %val = call i64 @llvm.aarch64.ldaxr.p0i16(i16* %addr)
+  %shortval = trunc i64 %val to i16
+  %extval = zext i16 %shortval to i64
+  store i64 %extval, i64* @var, align 8
+  ret void
+}
+
+define void @test_load_acquire_i32(i32* %addr) {
+; CHECK-LABEL: test_load_acquire_i32:
+; CHECK: ldaxr w[[LOADVAL:[0-9]+]], [x0]
+; CHECK-NOT: uxtw
+; CHECK-NOT: and
+; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var]
+
+  %val = call i64 @llvm.aarch64.ldaxr.p0i32(i32* %addr)
+  %shortval = trunc i64 %val to i32
+  %extval = zext i32 %shortval to i64
+  store i64 %extval, i64* @var, align 8
+  ret void
+}
+
+define void @test_load_acquire_i64(i64* %addr) {
+; CHECK-LABEL: test_load_acquire_i64:
+; CHECK: ldaxr x[[LOADVAL:[0-9]+]], [x0]
+; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var]
+
+  %val = call i64 @llvm.aarch64.ldaxr.p0i64(i64* %addr)
+  store i64 %val, i64* @var, align 8
+  ret void
+}
+
+
+declare i64 @llvm.aarch64.ldaxr.p0i8(i8*) nounwind
+declare i64 @llvm.aarch64.ldaxr.p0i16(i16*) nounwind
+declare i64 @llvm.aarch64.ldaxr.p0i32(i32*) nounwind
+declare i64 @llvm.aarch64.ldaxr.p0i64(i64*) nounwind
+
+define i32 @test_store_release_i8(i32, i8 %val, i8* %addr) {
+; CHECK-LABEL: test_store_release_i8:
+; CHECK-NOT: uxtb
+; CHECK-NOT: and
+; CHECK: stlxrb w0, w1, [x2]
+  %extval = zext i8 %val to i64
+  %res = call i32 @llvm.aarch64.stlxr.p0i8(i64 %extval, i8* %addr)
+  ret i32 %res
+}
+
+define i32 @test_store_release_i16(i32, i16 %val, i16* %addr) {
+; CHECK-LABEL: test_store_release_i16:
+; CHECK-NOT: uxth
+; CHECK-NOT: and
+; CHECK: stlxrh w0, w1, [x2]
+  %extval = zext i16 %val to i64
+  %res = call i32 @llvm.aarch64.stlxr.p0i16(i64 %extval, i16* %addr)
+  ret i32 %res
+}
+
+define i32 @test_store_release_i32(i32, i32 %val, i32* %addr) {
+; CHECK-LABEL: test_store_release_i32:
+; CHECK-NOT: uxtw
+; CHECK-NOT: and
+; CHECK: stlxr w0, w1, [x2]
+  %extval = zext i32 %val to i64
+  %res = call i32 @llvm.aarch64.stlxr.p0i32(i64 %extval, i32* %addr)
+  ret i32 %res
+}
+
+define i32 @test_store_release_i64(i32, i64 %val, i64* %addr) {
+; CHECK-LABEL: test_store_release_i64:
+; CHECK: stlxr w0, x1, [x2]
+  %res = call i32 @llvm.aarch64.stlxr.p0i64(i64 %val, i64* %addr)
+  ret i32 %res
+}
+
+declare i32 @llvm.aarch64.stlxr.p0i8(i64, i8*) nounwind
+declare i32 @llvm.aarch64.stlxr.p0i16(i64, i16*) nounwind
+declare i32 @llvm.aarch64.stlxr.p0i32(i64, i32*) nounwind
+declare i32 @llvm.aarch64.stlxr.p0i64(i64, i64*) nounwind
diff --git a/test/CodeGen/ARM64/leaf.ll b/test/CodeGen/AArch64/arm64-leaf.ll
index d3b2031..d3b2031 100644
--- a/test/CodeGen/ARM64/leaf.ll
+++ b/test/CodeGen/AArch64/arm64-leaf.ll
diff --git a/test/CodeGen/AArch64/arm64-long-shift.ll b/test/CodeGen/AArch64/arm64-long-shift.ll
new file mode 100644
index 0000000..d5baf16
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-long-shift.ll
@@ -0,0 +1,59 @@
+; RUN: llc < %s -march=arm64 -mcpu=cyclone | FileCheck %s
+
+define i128 @shl(i128 %r, i128 %s) nounwind readnone {
+; CHECK-LABEL: shl:
+; CHECK: lsl  [[XREG_0:x[0-9]+]], x1, x2
+; CHECK-NEXT: orr w[[XREG_1:[0-9]+]], wzr, #0x40
+; CHECK-NEXT: sub [[XREG_2:x[0-9]+]], x[[XREG_1]], x2
+; CHECK-NEXT: lsr  [[XREG_3:x[0-9]+]], x0, [[XREG_2]]
+; CHECK-NEXT: orr [[XREG_6:x[0-9]+]], [[XREG_3]], [[XREG_0]]
+; CHECK-NEXT: sub [[XREG_4:x[0-9]+]], x2, #64
+; CHECK-NEXT: lsl  [[XREG_5:x[0-9]+]], x0, [[XREG_4]]
+; CHECK-NEXT: cmp   [[XREG_4]], #0
+; CHECK-NEXT: csel  x1, [[XREG_5]], [[XREG_6]], ge
+; CHECK-NEXT: lsl  [[SMALLSHIFT_LO:x[0-9]+]], x0, x2
+; CHECK-NEXT: csel  x0, xzr, [[SMALLSHIFT_LO]], ge
+; CHECK-NEXT: ret
+
+  %shl = shl i128 %r, %s
+  ret i128 %shl
+}
+
+define i128 @ashr(i128 %r, i128 %s) nounwind readnone {
+; CHECK-LABEL: ashr:
+; CHECK: lsr  [[XREG_0:x[0-9]+]], x0, x2
+; CHECK-NEXT: orr w[[XREG_1:[0-9]+]], wzr, #0x40
+; CHECK-NEXT: sub [[XREG_2:x[0-9]+]], x[[XREG_1]], x2
+; CHECK-NEXT: lsl  [[XREG_3:x[0-9]+]], x1, [[XREG_2]]
+; CHECK-NEXT: orr [[XREG_4:x[0-9]+]], [[XREG_0]], [[XREG_3]]
+; CHECK-NEXT: sub [[XREG_5:x[0-9]+]], x2, #64
+; CHECK-NEXT: asr  [[XREG_6:x[0-9]+]], x1, [[XREG_5]]
+; CHECK-NEXT: cmp   [[XREG_5]], #0
+; CHECK-NEXT: csel  x0, [[XREG_6]], [[XREG_4]], ge
+; CHECK-NEXT: asr  [[SMALLSHIFT_HI:x[0-9]+]], x1, x2
+; CHECK-NEXT: asr [[BIGSHIFT_HI:x[0-9]+]], x1, #63
+; CHECK-NEXT: csel x1, [[BIGSHIFT_HI]], [[SMALLSHIFT_HI]], ge
+; CHECK-NEXT: ret
+
+  %shr = ashr i128 %r, %s
+  ret i128 %shr
+}
+
+define i128 @lshr(i128 %r, i128 %s) nounwind readnone {
+; CHECK-LABEL: lshr:
+; CHECK: lsr  [[XREG_0:x[0-9]+]], x0, x2
+; CHECK-NEXT: orr w[[XREG_1:[0-9]+]], wzr, #0x40
+; CHECK-NEXT: sub [[XREG_2:x[0-9]+]], x[[XREG_1]], x2
+; CHECK-NEXT: lsl  [[XREG_3:x[0-9]+]], x1, [[XREG_2]]
+; CHECK-NEXT: orr [[XREG_4:x[0-9]+]], [[XREG_0]], [[XREG_3]]
+; CHECK-NEXT: sub [[XREG_5:x[0-9]+]], x2, #64
+; CHECK-NEXT: lsr  [[XREG_6:x[0-9]+]], x1, [[XREG_5]]
+; CHECK-NEXT: cmp   [[XREG_5]], #0
+; CHECK-NEXT: csel  x0, [[XREG_6]], [[XREG_4]], ge
+; CHECK-NEXT: lsr  [[SMALLSHIFT_HI:x[0-9]+]], x1, x2
+; CHECK-NEXT: csel x1, xzr, [[SMALLSHIFT_HI]], ge
+; CHECK-NEXT: ret
+
+  %shr = lshr i128 %r, %s
+  ret i128 %shr
+}
diff --git a/test/CodeGen/AArch64/arm64-memcpy-inline.ll b/test/CodeGen/AArch64/arm64-memcpy-inline.ll
new file mode 100644
index 0000000..f921a59
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-memcpy-inline.ll
@@ -0,0 +1,112 @@
+; RUN: llc < %s -march=arm64 -mcpu=cyclone | FileCheck %s
+
+%struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }
+
+@src = external global %struct.x
+@dst = external global %struct.x
+
+@.str1 = private unnamed_addr constant [31 x i8] c"DHRYSTONE PROGRAM, SOME STRING\00", align 1
+@.str2 = private unnamed_addr constant [36 x i8] c"DHRYSTONE PROGRAM, SOME STRING BLAH\00", align 1
+@.str3 = private unnamed_addr constant [24 x i8] c"DHRYSTONE PROGRAM, SOME\00", align 1
+@.str4 = private unnamed_addr constant [18 x i8] c"DHRYSTONE PROGR  \00", align 1
+@.str5 = private unnamed_addr constant [7 x i8] c"DHRYST\00", align 1
+@.str6 = private unnamed_addr constant [14 x i8] c"/tmp/rmXXXXXX\00", align 1
+@spool.splbuf = internal global [512 x i8] zeroinitializer, align 16
+
+define i32 @t0() {
+entry:
+; CHECK-LABEL: t0:
+; CHECK: ldrb [[REG0:w[0-9]+]], [x[[BASEREG:[0-9]+]], #10]
+; CHECK: strb [[REG0]], [x[[BASEREG2:[0-9]+]], #10]
+; CHECK: ldrh [[REG1:w[0-9]+]], [x[[BASEREG]], #8]
+; CHECK: strh [[REG1]], [x[[BASEREG2]], #8]
+; CHECK: ldr [[REG2:x[0-9]+]],
+; CHECK: str [[REG2]],
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds (%struct.x* @dst, i32 0, i32 0), i8* getelementptr inbounds (%struct.x* @src, i32 0, i32 0), i32 11, i32 8, i1 false)
+  ret i32 0
+}
+
+define void @t1(i8* nocapture %C) nounwind {
+entry:
+; CHECK-LABEL: t1:
+; CHECK: ldur [[DEST:q[0-9]+]], [x[[BASEREG:[0-9]+]], #15]
+; CHECK: stur [[DEST]], [x0, #15]
+; CHECK: ldr [[DEST:q[0-9]+]], [x[[BASEREG]]]
+; CHECK: str [[DEST]], [x0]
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([31 x i8]* @.str1, i64 0, i64 0), i64 31, i32 1, i1 false)
+  ret void
+}
+
+define void @t2(i8* nocapture %C) nounwind {
+entry:
+; CHECK-LABEL: t2:
+; CHECK: movz [[REG3:w[0-9]+]]
+; CHECK: movk [[REG3]],
+; CHECK: str [[REG3]], [x0, #32]
+; CHECK: ldp [[DEST1:q[0-9]+]], [[DEST2:q[0-9]+]], [x{{[0-9]+}}]
+; CHECK: stp [[DEST1]], [[DEST2]], [x0]
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([36 x i8]* @.str2, i64 0, i64 0), i64 36, i32 1, i1 false)
+  ret void
+}
+
+define void @t3(i8* nocapture %C) nounwind {
+entry:
+; CHECK-LABEL: t3:
+; CHECK: ldr [[REG4:x[0-9]+]], [x[[BASEREG:[0-9]+]], #16]
+; CHECK: str [[REG4]], [x0, #16]
+; CHECK: ldr [[DEST:q[0-9]+]], [x[[BASEREG]]]
+; CHECK: str [[DEST]], [x0]
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([24 x i8]* @.str3, i64 0, i64 0), i64 24, i32 1, i1 false)
+  ret void
+}
+
+define void @t4(i8* nocapture %C) nounwind {
+entry:
+; CHECK-LABEL: t4:
+; CHECK: orr [[REG5:w[0-9]+]], wzr, #0x20
+; CHECK: strh [[REG5]], [x0, #16]
+; CHECK: ldr [[REG6:q[0-9]+]], [x{{[0-9]+}}]
+; CHECK: str [[REG6]], [x0]
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([18 x i8]* @.str4, i64 0, i64 0), i64 18, i32 1, i1 false)
+  ret void
+}
+
+define void @t5(i8* nocapture %C) nounwind {
+entry:
+; CHECK-LABEL: t5:
+; CHECK: strb wzr, [x0, #6]
+; CHECK: movz [[REG7:w[0-9]+]], #0x5453
+; CHECK: strh [[REG7]], [x0, #4]
+; CHECK: movz [[REG8:w[0-9]+]],
+; CHECK: movk [[REG8]],
+; CHECK: str [[REG8]], [x0]
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([7 x i8]* @.str5, i64 0, i64 0), i64 7, i32 1, i1 false)
+  ret void
+}
+
+define void @t6() nounwind {
+entry:
+; CHECK-LABEL: t6:
+; CHECK: ldur [[REG9:x[0-9]+]], [x{{[0-9]+}}, #6]
+; CHECK: stur [[REG9]], [x{{[0-9]+}}, #6]
+; CHECK: ldr
+; CHECK: str
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([512 x i8]* @spool.splbuf, i64 0, i64 0), i8* getelementptr inbounds ([14 x i8]* @.str6, i64 0, i64 0), i64 14, i32 1, i1 false)
+  ret void
+}
+
+%struct.Foo = type { i32, i32, i32, i32 }
+
+define void @t7(%struct.Foo* nocapture %a, %struct.Foo* nocapture %b) nounwind {
+entry:
+; CHECK: t7
+; CHECK: ldr [[REG10:q[0-9]+]], [x1]
+; CHECK: str [[REG10]], [x0]
+  %0 = bitcast %struct.Foo* %a to i8*
+  %1 = bitcast %struct.Foo* %b to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 16, i32 4, i1 false)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
diff --git a/test/CodeGen/ARM64/memset-inline.ll b/test/CodeGen/AArch64/arm64-memset-inline.ll
index 2e237f4..2e237f4 100644
--- a/test/CodeGen/ARM64/memset-inline.ll
+++ b/test/CodeGen/AArch64/arm64-memset-inline.ll
diff --git a/test/CodeGen/AArch64/arm64-memset-to-bzero.ll b/test/CodeGen/AArch64/arm64-memset-to-bzero.ll
new file mode 100644
index 0000000..29036ca
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-memset-to-bzero.ll
@@ -0,0 +1,108 @@
+; RUN: llc %s -mtriple=arm64-apple-darwin -o - | \
+; RUN:   FileCheck --check-prefix=CHECK-DARWIN --check-prefix=CHECK %s
+; RUN: llc %s -mtriple=arm64-linux-gnu -o - | \
+; RUN:   FileCheck --check-prefix=CHECK-LINUX --check-prefix=CHECK %s
+; <rdar://problem/14199482> ARM64: Calls to bzero() replaced with calls to memset()
+
+; CHECK: @fct1
+; For small size (<= 256), we do not change memset to bzero.
+; CHECK: memset
+define void @fct1(i8* nocapture %ptr) {
+entry:
+  tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 256, i32 1, i1 false)
+  ret void
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
+
+; CHECK: @fct2
+; When the size is bigger than 256, change into bzero.
+; CHECK-DARWIN: bzero
+; CHECK-LINUX: memset
+define void @fct2(i8* nocapture %ptr) {
+entry:
+  tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 257, i32 1, i1 false)
+  ret void
+}
+
+; CHECK: @fct3
+; For unknown size, change to bzero.
+; CHECK-DARWIN: bzero
+; CHECK-LINUX: memset
+define void @fct3(i8* nocapture %ptr, i32 %unknown) {
+entry:
+  %conv = sext i32 %unknown to i64
+  tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 %conv, i32 1, i1 false)
+  ret void
+}
+
+; CHECK: @fct4
+; Size <= 256, no change.
+; CHECK: memset
+define void @fct4(i8* %ptr) {
+entry:
+  %tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false)
+  %call = tail call i8* @__memset_chk(i8* %ptr, i32 0, i64 256, i64 %tmp)
+  ret void
+}
+
+declare i8* @__memset_chk(i8*, i32, i64, i64)
+
+declare i64 @llvm.objectsize.i64(i8*, i1)
+
+; CHECK: @fct5
+; Size > 256, change.
+; CHECK-DARWIN: bzero
+; CHECK-LINUX: memset
+define void @fct5(i8* %ptr) {
+entry:
+  %tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false)
+  %call = tail call i8* @__memset_chk(i8* %ptr, i32 0, i64 257, i64 %tmp)
+  ret void
+}
+
+; CHECK: @fct6
+; Size = unknown, change.
+; CHECK-DARWIN: bzero
+; CHECK-LINUX: memset
+define void @fct6(i8* %ptr, i32 %unknown) {
+entry:
+  %conv = sext i32 %unknown to i64
+  %tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false)
+  %call = tail call i8* @__memset_chk(i8* %ptr, i32 0, i64 %conv, i64 %tmp)
+  ret void
+}
+
+; Next functions check that memset is not turned into bzero
+; when the set constant is non-zero, whatever the given size.
+
+; CHECK: @fct7
+; memset with something that is not a zero, no change.
+; CHECK: memset
+define void @fct7(i8* %ptr) {
+entry:
+  %tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false)
+  %call = tail call i8* @__memset_chk(i8* %ptr, i32 1, i64 256, i64 %tmp)
+  ret void
+}
+
+; CHECK: @fct8
+; memset with something that is not a zero, no change.
+; CHECK: memset
+define void @fct8(i8* %ptr) {
+entry:
+  %tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false)
+  %call = tail call i8* @__memset_chk(i8* %ptr, i32 1, i64 257, i64 %tmp)
+  ret void
+}
+
+; CHECK: @fct9
+; memset with something that is not a zero, no change.
+; CHECK: memset
+define void @fct9(i8* %ptr, i32 %unknown) {
+entry:
+  %conv = sext i32 %unknown to i64
+  %tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false)
+  %call = tail call i8* @__memset_chk(i8* %ptr, i32 1, i64 %conv, i64 %tmp)
+  ret void
+}
diff --git a/test/CodeGen/AArch64/arm64-misched-basic-A53.ll b/test/CodeGen/AArch64/arm64-misched-basic-A53.ll
new file mode 100644
index 0000000..f88bd6a
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-misched-basic-A53.ll
@@ -0,0 +1,124 @@
+; REQUIRES: asserts
+; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a53 -pre-RA-sched=source -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s
+;
+; The Cortex-A53 machine model will cause the MADD instruction to be scheduled
+; much higher than the ADD instructions in order to hide latency. When not
+; specifying a subtarget, the MADD will remain near the end of the block.
+;
+; CHECK: ********** MI Scheduling **********
+; CHECK: main
+; CHECK: *** Final schedule for BB#2 ***
+; CHECK: MADDWrrr
+; CHECK: ADDWri
+; CHECK: ********** INTERVALS **********
+@main.x = private unnamed_addr constant [8 x i32] [i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1], align 4
+@main.y = private unnamed_addr constant [8 x i32] [i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2], align 4
+
+; Function Attrs: nounwind
+define i32 @main() #0 {
+entry:
+  %retval = alloca i32, align 4
+  %x = alloca [8 x i32], align 4
+  %y = alloca [8 x i32], align 4
+  %i = alloca i32, align 4
+  %xx = alloca i32, align 4
+  %yy = alloca i32, align 4
+  store i32 0, i32* %retval
+  %0 = bitcast [8 x i32]* %x to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast ([8 x i32]* @main.x to i8*), i64 32, i32 4, i1 false)
+  %1 = bitcast [8 x i32]* %y to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast ([8 x i32]* @main.y to i8*), i64 32, i32 4, i1 false)
+  store i32 0, i32* %xx, align 4
+  store i32 0, i32* %yy, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %2 = load i32* %i, align 4
+  %cmp = icmp slt i32 %2, 8
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %3 = load i32* %i, align 4
+  %idxprom = sext i32 %3 to i64
+  %arrayidx = getelementptr inbounds [8 x i32]* %x, i32 0, i64 %idxprom
+  %4 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %4, 1
+  store i32 %add, i32* %xx, align 4
+  %5 = load i32* %xx, align 4
+  %add1 = add nsw i32 %5, 12
+  store i32 %add1, i32* %xx, align 4
+  %6 = load i32* %xx, align 4
+  %add2 = add nsw i32 %6, 23
+  store i32 %add2, i32* %xx, align 4
+  %7 = load i32* %xx, align 4
+  %add3 = add nsw i32 %7, 34
+  store i32 %add3, i32* %xx, align 4
+  %8 = load i32* %i, align 4
+  %idxprom4 = sext i32 %8 to i64
+  %arrayidx5 = getelementptr inbounds [8 x i32]* %y, i32 0, i64 %idxprom4
+  %9 = load i32* %arrayidx5, align 4
+  %10 = load i32* %yy, align 4
+  %mul = mul nsw i32 %10, %9
+  store i32 %mul, i32* %yy, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %11 = load i32* %i, align 4
+  %inc = add nsw i32 %11, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %12 = load i32* %xx, align 4
+  %13 = load i32* %yy, align 4
+  %add6 = add nsw i32 %12, %13
+  ret i32 %add6
+}
+
+
+; The Cortex-A53 machine model will cause the FDIVvvv_42 to be raised to
+; hide latency. Whereas normally there would only be a single FADDvvv_4s
+; after it, this test checks to make sure there are more than one.
+;
+; CHECK: ********** MI Scheduling **********
+; CHECK: neon4xfloat:BB#0
+; CHECK: *** Final schedule for BB#0 ***
+; CHECK: FDIVv4f32
+; CHECK: FADDv4f32
+; CHECK: FADDv4f32
+; CHECK: ********** INTERVALS **********
+define <4 x float> @neon4xfloat(<4 x float> %A, <4 x float> %B) {
+        %tmp1 = fadd <4 x float> %A, %B;
+        %tmp2 = fadd <4 x float> %A, %tmp1;
+        %tmp3 = fadd <4 x float> %A, %tmp2;
+        %tmp4 = fadd <4 x float> %A, %tmp3;
+        %tmp5 = fadd <4 x float> %A, %tmp4;
+        %tmp6 = fadd <4 x float> %A, %tmp5;
+        %tmp7 = fadd <4 x float> %A, %tmp6;
+        %tmp8 = fadd <4 x float> %A, %tmp7;
+        %tmp9 = fdiv <4 x float> %A, %B;
+        %tmp10 = fadd <4 x float> %tmp8, %tmp9;
+
+        ret <4 x float> %tmp10
+}
+
+; Function Attrs: nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind }
+
+
+; Regression Test for PR19761
+;   [ARM64] Cortex-a53 schedule mode can't handle NEON post-increment load
+;
+; Nothing explicit to check other than llc not crashing.
+define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2(i8* %A, i8** %ptr) {
+  %ld2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0i8(i8* %A)
+  %tmp = getelementptr i8* %A, i32 32
+  store i8* %tmp, i8** %ptr
+  ret { <16 x i8>, <16 x i8> } %ld2
+}
+
+declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0i8(i8*)
diff --git a/test/CodeGen/AArch64/arm64-misched-forwarding-A53.ll b/test/CodeGen/AArch64/arm64-misched-forwarding-A53.ll
new file mode 100644
index 0000000..97bfb5c
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-misched-forwarding-A53.ll
@@ -0,0 +1,21 @@
+; REQUIRES: asserts
+; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a53 -pre-RA-sched=source -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s
+;
+; For Cortex-A53, shiftable operands that are not actually shifted
+; are not needed for an additional two cycles.
+;
+; CHECK: ********** MI Scheduling **********
+; CHECK: shiftable
+; CHECK: *** Final schedule for BB#0 ***
+; CHECK: ADDXrr %vreg0, %vreg2
+; CHECK: ADDXrs %vreg0, %vreg2, 5
+; CHECK: ********** INTERVALS **********
+define i64 @shiftable(i64 %A, i64 %B) {
+        %tmp0 = sub i64 %B, 20
+        %tmp1 = shl i64 %tmp0, 5;
+        %tmp2 = add i64 %A, %tmp1;
+        %tmp3 = add i64 %A, %tmp0
+        %tmp4 = mul i64 %tmp2, %tmp3
+
+        ret i64 %tmp4
+}
diff --git a/test/CodeGen/AArch64/arm64-movi.ll b/test/CodeGen/AArch64/arm64-movi.ll
new file mode 100644
index 0000000..2cd368d
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-movi.ll
@@ -0,0 +1,202 @@
+; RUN: llc < %s -march=arm64 | FileCheck %s
+
+;==--------------------------------------------------------------------------==
+; Tests for MOV-immediate implemented with ORR-immediate.
+;==--------------------------------------------------------------------------==
+
+; 64-bit immed with 32-bit pattern size, rotated by 0.
+define i64 @test64_32_rot0() nounwind {
+; CHECK-LABEL: test64_32_rot0:
+; CHECK: orr x0, xzr, #0x700000007
+  ret i64 30064771079
+}
+
+; 64-bit immed with 32-bit pattern size, rotated by 2.
+define i64 @test64_32_rot2() nounwind {
+; CHECK-LABEL: test64_32_rot2:
+; CHECK: orr x0, xzr, #0xc0000003c0000003
+  ret i64 13835058071388291075
+}
+
+; 64-bit immed with 4-bit pattern size, rotated by 3.
+define i64 @test64_4_rot3() nounwind {
+; CHECK-LABEL: test64_4_rot3:
+; CHECK: orr  x0, xzr, #0xeeeeeeeeeeeeeeee
+  ret i64 17216961135462248174
+}
+
+; 32-bit immed with 32-bit pattern size, rotated by 16.
+define i32 @test32_32_rot16() nounwind {
+; CHECK-LABEL: test32_32_rot16:
+; CHECK: orr w0, wzr, #0xff0000
+  ret i32 16711680
+}
+
+; 32-bit immed with 2-bit pattern size, rotated by 1.
+define i32 @test32_2_rot1() nounwind {
+; CHECK-LABEL: test32_2_rot1:
+; CHECK: orr w0, wzr, #0xaaaaaaaa
+  ret i32 2863311530
+}
+
+;==--------------------------------------------------------------------------==
+; Tests for MOVZ with MOVK.
+;==--------------------------------------------------------------------------==
+
+define i32 @movz() nounwind {
+; CHECK-LABEL: movz:
+; CHECK: movz w0, #0x5
+  ret i32 5
+}
+
+define i64 @movz_3movk() nounwind {
+; CHECK-LABEL: movz_3movk:
+; CHECK:      movz x0, #0x5, lsl #48
+; CHECK-NEXT: movk x0, #0x1234, lsl #32
+; CHECK-NEXT: movk x0, #0xabcd, lsl #16
+; CHECK-NEXT: movk x0, #0x5678
+  ret i64 1427392313513592
+}
+
+define i64 @movz_movk_skip1() nounwind {
+; CHECK-LABEL: movz_movk_skip1:
+; CHECK:      movz x0, #0x5, lsl #32
+; CHECK-NEXT: movk x0, #0x4321, lsl #16
+  ret i64 22601072640
+}
+
+define i64 @movz_skip1_movk() nounwind {
+; CHECK-LABEL: movz_skip1_movk:
+; CHECK:      movz x0, #0x8654, lsl #32
+; CHECK-NEXT: movk x0, #0x1234
+  ret i64 147695335379508
+}
+
+;==--------------------------------------------------------------------------==
+; Tests for MOVN with MOVK.
+;==--------------------------------------------------------------------------==
+
+define i64 @movn() nounwind {
+; CHECK-LABEL: movn:
+; CHECK: movn x0, #0x29
+  ret i64 -42
+}
+
+define i64 @movn_skip1_movk() nounwind {
+; CHECK-LABEL: movn_skip1_movk:
+; CHECK:      movn x0, #0x29, lsl #32
+; CHECK-NEXT: movk x0, #0x1234
+  ret i64 -176093720012
+}
+
+;==--------------------------------------------------------------------------==
+; Tests for ORR with MOVK.
+;==--------------------------------------------------------------------------==
+; rdar://14987673
+
+define i64 @orr_movk1() nounwind {
+; CHECK-LABEL: orr_movk1:
+; CHECK: orr x0, xzr, #0xffff0000ffff0
+; CHECK: movk x0, #0xdead, lsl #16
+  ret i64 72056498262245120
+}
+
+define i64 @orr_movk2() nounwind {
+; CHECK-LABEL: orr_movk2:
+; CHECK: orr x0, xzr, #0xffff0000ffff0
+; CHECK: movk x0, #0xdead, lsl #48
+  ret i64 -2400982650836746496
+}
+
+define i64 @orr_movk3() nounwind {
+; CHECK-LABEL: orr_movk3:
+; CHECK: orr x0, xzr, #0xffff0000ffff0
+; CHECK: movk x0, #0xdead, lsl #32
+  ret i64 72020953688702720
+}
+
+define i64 @orr_movk4() nounwind {
+; CHECK-LABEL: orr_movk4:
+; CHECK: orr x0, xzr, #0xffff0000ffff0
+; CHECK: movk x0, #0xdead
+  ret i64 72056494543068845
+}
+
+; rdar://14987618
+define i64 @orr_movk5() nounwind {
+; CHECK-LABEL: orr_movk5:
+; CHECK: orr x0, xzr, #0xff00ff00ff00ff00
+; CHECK: movk x0, #0xdead, lsl #16
+  ret i64 -71777214836900096
+}
+
+define i64 @orr_movk6() nounwind {
+; CHECK-LABEL: orr_movk6:
+; CHECK: orr x0, xzr, #0xff00ff00ff00ff00
+; CHECK: movk x0, #0xdead, lsl #16
+; CHECK: movk x0, #0xdead, lsl #48
+  ret i64 -2400982647117578496
+}
+
+define i64 @orr_movk7() nounwind {
+; CHECK-LABEL: orr_movk7:
+; CHECK: orr x0, xzr, #0xff00ff00ff00ff00
+; CHECK: movk x0, #0xdead, lsl #48
+  ret i64 -2400982646575268096
+}
+
+define i64 @orr_movk8() nounwind {
+; CHECK-LABEL: orr_movk8:
+; CHECK: orr x0, xzr, #0xff00ff00ff00ff00
+; CHECK: movk x0, #0xdead
+; CHECK: movk x0, #0xdead, lsl #48
+  ret i64 -2400982646575276371
+}
+
+; rdar://14987715
+define i64 @orr_movk9() nounwind {
+; CHECK-LABEL: orr_movk9:
+; CHECK: orr x0, xzr, #0xffffff000000000
+; CHECK: movk x0, #0xff00
+; CHECK: movk x0, #0xdead, lsl #16
+  ret i64 1152921439623315200
+}
+
+define i64 @orr_movk10() nounwind {
+; CHECK-LABEL: orr_movk10:
+; CHECK: orr x0, xzr, #0xfffffffffffff00
+; CHECK: movk x0, #0xdead, lsl #16
+  ret i64 1152921504047824640
+}
+
+define i64 @orr_movk11() nounwind {
+; CHECK-LABEL: orr_movk11:
+; CHECK: orr x0, xzr, #0xfff00000000000ff
+; CHECK: movk x0, #0xdead, lsl #16
+; CHECK: movk x0, #0xffff, lsl #32
+  ret i64 -4222125209747201
+}
+
+define i64 @orr_movk12() nounwind {
+; CHECK-LABEL: orr_movk12:
+; CHECK: orr x0, xzr, #0xfff00000000000ff
+; CHECK: movk x0, #0xdead, lsl #32
+  ret i64 -4258765016661761
+}
+
+define i64 @orr_movk13() nounwind {
+; CHECK-LABEL: orr_movk13:
+; CHECK: orr x0, xzr, #0xfffff000000
+; CHECK: movk x0, #0xdead
+; CHECK: movk x0, #0xdead, lsl #48
+  ret i64 -2401245434149282131
+}
+
+; rdar://13944082
+define i64 @g() nounwind {
+; CHECK-LABEL: g:
+; CHECK: movz x0, #0xffff, lsl #48
+; CHECK: movk x0, #0x2
+entry:
+  ret i64 -281474976710654
+}
diff --git a/test/CodeGen/ARM64/mul.ll b/test/CodeGen/AArch64/arm64-mul.ll
index 2e7986d..2e7986d 100644
--- a/test/CodeGen/ARM64/mul.ll
+++ b/test/CodeGen/AArch64/arm64-mul.ll
diff --git a/test/CodeGen/AArch64/arm64-named-reg-alloc.ll b/test/CodeGen/AArch64/arm64-named-reg-alloc.ll
new file mode 100644
index 0000000..d86d2e6
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-named-reg-alloc.ll
@@ -0,0 +1,14 @@
+; RUN: not llc < %s -mtriple=arm64-apple-darwin 2>&1 | FileCheck %s
+; RUN: not llc < %s -mtriple=arm64-linux-gnueabi 2>&1 | FileCheck %s
+
+define i32 @get_stack() nounwind {
+entry:
+; FIXME: Include an allocatable-specific error message
+; CHECK: Invalid register name global variable
+	%sp = call i32 @llvm.read_register.i32(metadata !0)
+  ret i32 %sp
+}
+
+declare i32 @llvm.read_register.i32(metadata) nounwind
+
+!0 = metadata !{metadata !"x5\00"}
diff --git a/test/CodeGen/AArch64/arm64-named-reg-notareg.ll b/test/CodeGen/AArch64/arm64-named-reg-notareg.ll
new file mode 100644
index 0000000..3ca14c4
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-named-reg-notareg.ll
@@ -0,0 +1,13 @@
+; RUN: not llc < %s -mtriple=arm64-apple-darwin 2>&1 | FileCheck %s
+; RUN: not llc < %s -mtriple=arm64-linux-gnueabi 2>&1 | FileCheck %s
+
+define i32 @get_stack() nounwind {
+entry:
+; CHECK: Invalid register name global variable
+	%sp = call i32 @llvm.read_register.i32(metadata !0)
+  ret i32 %sp
+}
+
+declare i32 @llvm.read_register.i32(metadata) nounwind
+
+!0 = metadata !{metadata !"notareg\00"}
diff --git a/test/CodeGen/ARM64/neg.ll b/test/CodeGen/AArch64/arm64-neg.ll
index 659ce98..659ce98 100644
--- a/test/CodeGen/ARM64/neg.ll
+++ b/test/CodeGen/AArch64/arm64-neg.ll
diff --git a/test/CodeGen/AArch64/arm64-neon-2velem-high.ll b/test/CodeGen/AArch64/arm64-neon-2velem-high.ll
new file mode 100644
index 0000000..58df094
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-neon-2velem-high.ll
@@ -0,0 +1,341 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
+
+declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
+
+declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
+
+declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>)
+
+declare <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>)
+
+declare <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>)
+
+declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>)
+
+declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>)
+
+declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>)
+
+declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>)
+
+declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>)
+
+declare <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32>, <2 x i32>)
+
+declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>)
+
+define <4 x i32> @test_vmull_high_n_s16(<8 x i16> %a, i16 %b) {
+; CHECK-LABEL: test_vmull_high_n_s16:
+; CHECK: dup [[REPLICATE:v[0-9]+]].8h, w0
+; CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0
+  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1
+  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2
+  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3
+  %vmull15.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
+  ret <4 x i32> %vmull15.i.i
+}
+
+define <2 x i64> @test_vmull_high_n_s32(<4 x i32> %a, i32 %b) {
+; CHECK-LABEL: test_vmull_high_n_s32:
+; CHECK: dup [[REPLICATE:v[0-9]+]].4s, w0
+; CHECK: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0
+  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1
+  %vmull9.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
+  ret <2 x i64> %vmull9.i.i
+}
+
+define <4 x i32> @test_vmull_high_n_u16(<8 x i16> %a, i16 %b) {
+; CHECK-LABEL: test_vmull_high_n_u16:
+; CHECK: dup [[REPLICATE:v[0-9]+]].8h, w0
+; CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0
+  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1
+  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2
+  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3
+  %vmull15.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
+  ret <4 x i32> %vmull15.i.i
+}
+
+define <2 x i64> @test_vmull_high_n_u32(<4 x i32> %a, i32 %b) {
+; CHECK-LABEL: test_vmull_high_n_u32:
+; CHECK: dup [[REPLICATE:v[0-9]+]].4s, w0
+; CHECK: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0
+  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1
+  %vmull9.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
+  ret <2 x i64> %vmull9.i.i
+}
+
+define <4 x i32> @test_vqdmull_high_n_s16(<8 x i16> %a, i16 %b) {
+; CHECK-LABEL: test_vqdmull_high_n_s16:
+; CHECK: dup [[REPLICATE:v[0-9]+]].8h, w0
+; CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0
+  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1
+  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2
+  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3
+  %vqdmull15.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
+  ret <4 x i32> %vqdmull15.i.i
+}
+
+define <2 x i64> @test_vqdmull_high_n_s32(<4 x i32> %a, i32 %b) {
+; CHECK-LABEL: test_vqdmull_high_n_s32:
+; CHECK: dup [[REPLICATE:v[0-9]+]].4s, w0
+; CHECK: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0
+  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1
+  %vqdmull9.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
+  ret <2 x i64> %vqdmull9.i.i
+}
+
+define <4 x i32> @test_vmlal_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
+; CHECK-LABEL: test_vmlal_high_n_s16:
+; CHECK: dup [[REPLICATE:v[0-9]+]].8h, w0
+; CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
+  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
+  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
+  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
+  %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
+  %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
+  ret <4 x i32> %add.i.i
+}
+
+define <2 x i64> @test_vmlal_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
+; CHECK-LABEL: test_vmlal_high_n_s32:
+; CHECK: dup [[REPLICATE:v[0-9]+]].4s, w0
+; CHECK: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
+  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
+  %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
+  %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
+  ret <2 x i64> %add.i.i
+}
+
+define <4 x i32> @test_vmlal_high_n_u16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
+; CHECK-LABEL: test_vmlal_high_n_u16:
+; CHECK: dup [[REPLICATE:v[0-9]+]].8h, w0
+; CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
+  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
+  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
+  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
+  %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
+  %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
+  ret <4 x i32> %add.i.i
+}
+
+define <2 x i64> @test_vmlal_high_n_u32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
+; CHECK-LABEL: test_vmlal_high_n_u32:
+; CHECK: dup [[REPLICATE:v[0-9]+]].4s, w0
+; CHECK: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
+  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
+  %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
+  %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
+  ret <2 x i64> %add.i.i
+}
+
+define <4 x i32> @test_vqdmlal_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
+; CHECK-LABEL: test_vqdmlal_high_n_s16:
+; CHECK: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
+  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
+  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
+  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
+  %vqdmlal15.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
+  %vqdmlal17.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal15.i.i)
+  ret <4 x i32> %vqdmlal17.i.i
+}
+
+define <2 x i64> @test_vqdmlal_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
+; CHECK-LABEL: test_vqdmlal_high_n_s32:
+; CHECK: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
+  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
+  %vqdmlal9.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
+  %vqdmlal11.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal9.i.i)
+  ret <2 x i64> %vqdmlal11.i.i
+}
+
+define <4 x i32> @test_vmlsl_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
+; CHECK-LABEL: test_vmlsl_high_n_s16:
+; CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
+  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
+  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
+  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
+  %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
+  %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
+  ret <4 x i32> %sub.i.i
+}
+
+define <2 x i64> @test_vmlsl_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
+; CHECK-LABEL: test_vmlsl_high_n_s32:
+; CHECK: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
+  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
+  %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
+  %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
+  ret <2 x i64> %sub.i.i
+}
+
+define <4 x i32> @test_vmlsl_high_n_u16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
+; CHECK-LABEL: test_vmlsl_high_n_u16:
+; CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
+  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
+  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
+  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
+  %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
+  %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
+  ret <4 x i32> %sub.i.i
+}
+
+define <2 x i64> @test_vmlsl_high_n_u32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
+; CHECK-LABEL: test_vmlsl_high_n_u32:
+; CHECK: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
+  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
+  %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
+  %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
+  ret <2 x i64> %sub.i.i
+}
+
+define <4 x i32> @test_vqdmlsl_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
+; CHECK-LABEL: test_vqdmlsl_high_n_s16:
+; CHECK: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
+  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
+  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
+  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
+  %vqdmlsl15.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
+  %vqdmlsl17.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl15.i.i)
+  ret <4 x i32> %vqdmlsl17.i.i
+}
+
+define <2 x i64> @test_vqdmlsl_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
+; CHECK-LABEL: test_vqdmlsl_high_n_s32:
+; CHECK: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
+  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
+  %vqdmlsl9.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
+  %vqdmlsl11.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl9.i.i)
+  ret <2 x i64> %vqdmlsl11.i.i
+}
+
+define <2 x float> @test_vmul_n_f32(<2 x float> %a, float %b) {
+; CHECK-LABEL: test_vmul_n_f32:
+; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %vecinit.i = insertelement <2 x float> undef, float %b, i32 0
+  %vecinit1.i = insertelement <2 x float> %vecinit.i, float %b, i32 1
+  %mul.i = fmul <2 x float> %vecinit1.i, %a
+  ret <2 x float> %mul.i
+}
+
+define <4 x float> @test_vmulq_n_f32(<4 x float> %a, float %b) {
+; CHECK-LABEL: test_vmulq_n_f32:
+; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+entry:
+  %vecinit.i = insertelement <4 x float> undef, float %b, i32 0
+  %vecinit1.i = insertelement <4 x float> %vecinit.i, float %b, i32 1
+  %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %b, i32 2
+  %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %b, i32 3
+  %mul.i = fmul <4 x float> %vecinit3.i, %a
+  ret <4 x float> %mul.i
+}
+
+define <2 x double> @test_vmulq_n_f64(<2 x double> %a, double %b) {
+; CHECK-LABEL: test_vmulq_n_f64:
+; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+entry:
+  %vecinit.i = insertelement <2 x double> undef, double %b, i32 0
+  %vecinit1.i = insertelement <2 x double> %vecinit.i, double %b, i32 1
+  %mul.i = fmul <2 x double> %vecinit1.i, %a
+  ret <2 x double> %mul.i
+}
+
+define <2 x float> @test_vfma_n_f32(<2 x float> %a, <2 x float> %b, float %n) {
+; CHECK-LABEL: test_vfma_n_f32:
+; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
+entry:
+  %vecinit.i = insertelement <2 x float> undef, float %n, i32 0
+  %vecinit1.i = insertelement <2 x float> %vecinit.i, float %n, i32 1
+  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %b, <2 x float> %vecinit1.i, <2 x float> %a)
+  ret <2 x float> %0
+}
+
+define <4 x float> @test_vfmaq_n_f32(<4 x float> %a, <4 x float> %b, float %n) {
+; CHECK-LABEL: test_vfmaq_n_f32:
+; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
+entry:
+  %vecinit.i = insertelement <4 x float> undef, float %n, i32 0
+  %vecinit1.i = insertelement <4 x float> %vecinit.i, float %n, i32 1
+  %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %n, i32 2
+  %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %n, i32 3
+  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> %vecinit3.i, <4 x float> %a)
+  ret <4 x float> %0
+}
+
+define <2 x float> @test_vfms_n_f32(<2 x float> %a, <2 x float> %b, float %n) {
+; CHECK-LABEL: test_vfms_n_f32:
+; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
+entry:
+  %vecinit.i = insertelement <2 x float> undef, float %n, i32 0
+  %vecinit1.i = insertelement <2 x float> %vecinit.i, float %n, i32 1
+  %0 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %b
+  %1 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %0, <2 x float> %vecinit1.i, <2 x float> %a)
+  ret <2 x float> %1
+}
+
+define <4 x float> @test_vfmsq_n_f32(<4 x float> %a, <4 x float> %b, float %n) {
+; CHECK-LABEL: test_vfmsq_n_f32:
+; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
+entry:
+  %vecinit.i = insertelement <4 x float> undef, float %n, i32 0
+  %vecinit1.i = insertelement <4 x float> %vecinit.i, float %n, i32 1
+  %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %n, i32 2
+  %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %n, i32 3
+  %0 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b
+  %1 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %0, <4 x float> %vecinit3.i, <4 x float> %a)
+  ret <4 x float> %1
+}
diff --git a/test/CodeGen/AArch64/arm64-neon-2velem.ll b/test/CodeGen/AArch64/arm64-neon-2velem.ll
new file mode 100644
index 0000000..869966c
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-neon-2velem.ll
@@ -0,0 +1,2853 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
+
+declare <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double>, <2 x double>)
+
+declare <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float>, <4 x float>)
+
+declare <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float>, <2 x float>)
+
+declare <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32>, <4 x i32>)
+
+declare <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32>, <2 x i32>)
+
+declare <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16>, <8 x i16>)
+
+declare <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16>, <4 x i16>)
+
+declare <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32>, <4 x i32>)
+
+declare <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32>, <2 x i32>)
+
+declare <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16>, <8 x i16>)
+
+declare <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16>, <4 x i16>)
+
+declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>)
+
+declare <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>)
+
+declare <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>)
+
+declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>)
+
+declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>)
+
+declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>)
+
+declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>)
+
+declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>)
+
+declare <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32>, <2 x i32>)
+
+declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>)
+
+define <4 x i16> @test_vmla_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
+; CHECK-LABEL: test_vmla_lane_s16:
+; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %mul = mul <4 x i16> %shuffle, %b
+  %add = add <4 x i16> %mul, %a
+  ret <4 x i16> %add
+}
+
+define <8 x i16> @test_vmlaq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) {
+; CHECK-LABEL: test_vmlaq_lane_s16:
+; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+  %mul = mul <8 x i16> %shuffle, %b
+  %add = add <8 x i16> %mul, %a
+  ret <8 x i16> %add
+}
+
+define <2 x i32> @test_vmla_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) {
+; CHECK-LABEL: test_vmla_lane_s32:
+; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %mul = mul <2 x i32> %shuffle, %b
+  %add = add <2 x i32> %mul, %a
+  ret <2 x i32> %add
+}
+
+define <4 x i32> @test_vmlaq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) {
+; CHECK-LABEL: test_vmlaq_lane_s32:
+; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %mul = mul <4 x i32> %shuffle, %b
+  %add = add <4 x i32> %mul, %a
+  ret <4 x i32> %add
+}
+
+define <4 x i16> @test_vmla_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
+; CHECK-LABEL: test_vmla_laneq_s16:
+; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %mul = mul <4 x i16> %shuffle, %b
+  %add = add <4 x i16> %mul, %a
+  ret <4 x i16> %add
+}
+
+define <8 x i16> @test_vmlaq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) {
+; CHECK-LABEL: test_vmlaq_laneq_s16:
+; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+  %mul = mul <8 x i16> %shuffle, %b
+  %add = add <8 x i16> %mul, %a
+  ret <8 x i16> %add
+}
+
+define <2 x i32> @test_vmla_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
+; CHECK-LABEL: test_vmla_laneq_s32:
+; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %mul = mul <2 x i32> %shuffle, %b
+  %add = add <2 x i32> %mul, %a
+  ret <2 x i32> %add
+}
+
+define <4 x i32> @test_vmlaq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) {
+; CHECK-LABEL: test_vmlaq_laneq_s32:
+; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %mul = mul <4 x i32> %shuffle, %b
+  %add = add <4 x i32> %mul, %a
+  ret <4 x i32> %add
+}
+
+define <4 x i16> @test_vmls_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
+; CHECK-LABEL: test_vmls_lane_s16:
+; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %mul = mul <4 x i16> %shuffle, %b
+  %sub = sub <4 x i16> %a, %mul
+  ret <4 x i16> %sub
+}
+
+define <8 x i16> @test_vmlsq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) {
+; CHECK-LABEL: test_vmlsq_lane_s16:
+; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+  %mul = mul <8 x i16> %shuffle, %b
+  %sub = sub <8 x i16> %a, %mul
+  ret <8 x i16> %sub
+}
+
+define <2 x i32> @test_vmls_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) {
+; CHECK-LABEL: test_vmls_lane_s32:
+; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %mul = mul <2 x i32> %shuffle, %b
+  %sub = sub <2 x i32> %a, %mul
+  ret <2 x i32> %sub
+}
+
+define <4 x i32> @test_vmlsq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) {
+; CHECK-LABEL: test_vmlsq_lane_s32:
+; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %mul = mul <4 x i32> %shuffle, %b
+  %sub = sub <4 x i32> %a, %mul
+  ret <4 x i32> %sub
+}
+
+define <4 x i16> @test_vmls_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
+; CHECK-LABEL: test_vmls_laneq_s16:
+; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %mul = mul <4 x i16> %shuffle, %b
+  %sub = sub <4 x i16> %a, %mul
+  ret <4 x i16> %sub
+}
+
+define <8 x i16> @test_vmlsq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) {
+; CHECK-LABEL: test_vmlsq_laneq_s16:
+; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+  %mul = mul <8 x i16> %shuffle, %b
+  %sub = sub <8 x i16> %a, %mul
+  ret <8 x i16> %sub
+}
+
+define <2 x i32> @test_vmls_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
+; CHECK-LABEL: test_vmls_laneq_s32:
+; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %mul = mul <2 x i32> %shuffle, %b
+  %sub = sub <2 x i32> %a, %mul
+  ret <2 x i32> %sub
+}
+
+define <4 x i32> @test_vmlsq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) {
+; CHECK-LABEL: test_vmlsq_laneq_s32:
+; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %mul = mul <4 x i32> %shuffle, %b
+  %sub = sub <4 x i32> %a, %mul
+  ret <4 x i32> %sub
+}
+
+define <4 x i16> @test_vmul_lane_s16(<4 x i16> %a, <4 x i16> %v) {
+; CHECK-LABEL: test_vmul_lane_s16:
+; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %mul = mul <4 x i16> %shuffle, %a
+  ret <4 x i16> %mul
+}
+
+define <8 x i16> @test_vmulq_lane_s16(<8 x i16> %a, <4 x i16> %v) {
+; CHECK-LABEL: test_vmulq_lane_s16:
+; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+  %mul = mul <8 x i16> %shuffle, %a
+  ret <8 x i16> %mul
+}
+
+define <2 x i32> @test_vmul_lane_s32(<2 x i32> %a, <2 x i32> %v) {
+; CHECK-LABEL: test_vmul_lane_s32:
+; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %mul = mul <2 x i32> %shuffle, %a
+  ret <2 x i32> %mul
+}
+
+define <4 x i32> @test_vmulq_lane_s32(<4 x i32> %a, <2 x i32> %v) {
+; CHECK-LABEL: test_vmulq_lane_s32:
+; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %mul = mul <4 x i32> %shuffle, %a
+  ret <4 x i32> %mul
+}
+
+define <4 x i16> @test_vmul_lane_u16(<4 x i16> %a, <4 x i16> %v) {
+; CHECK-LABEL: test_vmul_lane_u16:
+; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %mul = mul <4 x i16> %shuffle, %a
+  ret <4 x i16> %mul
+}
+
+define <8 x i16> @test_vmulq_lane_u16(<8 x i16> %a, <4 x i16> %v) {
+; CHECK-LABEL: test_vmulq_lane_u16:
+; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+  %mul = mul <8 x i16> %shuffle, %a
+  ret <8 x i16> %mul
+}
+
+define <2 x i32> @test_vmul_lane_u32(<2 x i32> %a, <2 x i32> %v) {
+; CHECK-LABEL: test_vmul_lane_u32:
+; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %mul = mul <2 x i32> %shuffle, %a
+  ret <2 x i32> %mul
+}
+
+define <4 x i32> @test_vmulq_lane_u32(<4 x i32> %a, <2 x i32> %v) {
+; CHECK-LABEL: test_vmulq_lane_u32:
+; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %mul = mul <4 x i32> %shuffle, %a
+  ret <4 x i32> %mul
+}
+
+define <4 x i16> @test_vmul_laneq_s16(<4 x i16> %a, <8 x i16> %v) {
+; CHECK-LABEL: test_vmul_laneq_s16:
+; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %mul = mul <4 x i16> %shuffle, %a
+  ret <4 x i16> %mul
+}
+
+define <8 x i16> @test_vmulq_laneq_s16(<8 x i16> %a, <8 x i16> %v) {
+; CHECK-LABEL: test_vmulq_laneq_s16:
+; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+  %mul = mul <8 x i16> %shuffle, %a
+  ret <8 x i16> %mul
+}
+
+define <2 x i32> @test_vmul_laneq_s32(<2 x i32> %a, <4 x i32> %v) {
+; CHECK-LABEL: test_vmul_laneq_s32:
+; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %mul = mul <2 x i32> %shuffle, %a
+  ret <2 x i32> %mul
+}
+
+define <4 x i32> @test_vmulq_laneq_s32(<4 x i32> %a, <4 x i32> %v) {
+; CHECK-LABEL: test_vmulq_laneq_s32:
+; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %mul = mul <4 x i32> %shuffle, %a
+  ret <4 x i32> %mul
+}
+
+define <4 x i16> @test_vmul_laneq_u16(<4 x i16> %a, <8 x i16> %v) {
+; CHECK-LABEL: test_vmul_laneq_u16:
+; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %mul = mul <4 x i16> %shuffle, %a
+  ret <4 x i16> %mul
+}
+
+define <8 x i16> @test_vmulq_laneq_u16(<8 x i16> %a, <8 x i16> %v) {
+; CHECK-LABEL: test_vmulq_laneq_u16:
+; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+  %mul = mul <8 x i16> %shuffle, %a
+  ret <8 x i16> %mul
+}
+
+define <2 x i32> @test_vmul_laneq_u32(<2 x i32> %a, <4 x i32> %v) {
+; CHECK-LABEL: test_vmul_laneq_u32:
+; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %mul = mul <2 x i32> %shuffle, %a
+  ret <2 x i32> %mul
+}
+
+define <4 x i32> @test_vmulq_laneq_u32(<4 x i32> %a, <4 x i32> %v) {
+; CHECK-LABEL: test_vmulq_laneq_u32:
+; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %mul = mul <4 x i32> %shuffle, %a
+  ret <4 x i32> %mul
+}
+
+define <2 x float> @test_vfma_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
+; CHECK-LABEL: test_vfma_lane_f32:
+; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+; CHECK-NEXT: ret
+entry:
+  %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1>
+  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
+  ret <2 x float> %0
+}
+
+declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
+
+define <4 x float> @test_vfmaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) {
+; CHECK-LABEL: test_vfmaq_lane_f32:
+; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+; CHECK-NEXT: ret
+entry:
+  %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
+  ret <4 x float> %0
+}
+
+declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
+
+define <2 x float> @test_vfma_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) {
+; CHECK-LABEL: test_vfma_laneq_f32:
+; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+; CHECK-NEXT: ret
+entry:
+  %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3>
+  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
+  ret <2 x float> %0
+}
+
+define <4 x float> @test_vfmaq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) {
+; CHECK-LABEL: test_vfmaq_laneq_f32:
+; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+; CHECK-NEXT: ret
+entry:
+  %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
+  ret <4 x float> %0
+}
+
+define <2 x float> @test_vfms_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
+; CHECK-LABEL: test_vfms_lane_f32:
+; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+; CHECK-NEXT: ret
+entry:
+  %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
+  %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> <i32 1, i32 1>
+  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
+  ret <2 x float> %0
+}
+
+define <4 x float> @test_vfmsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) {
+; CHECK-LABEL: test_vfmsq_lane_f32:
+; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+; CHECK-NEXT: ret
+entry:
+  %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
+  %lane = shufflevector <2 x float> %sub, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
+  ret <4 x float> %0
+}
+
+define <2 x float> @test_vfms_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) {
+; CHECK-LABEL: test_vfms_laneq_f32:
+; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+; CHECK-NEXT: ret
+entry:
+  %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
+  %lane = shufflevector <4 x float> %sub, <4 x float> undef, <2 x i32> <i32 3, i32 3>
+  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
+  ret <2 x float> %0
+}
+
+define <4 x float> @test_vfmsq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) {
+; CHECK-LABEL: test_vfmsq_laneq_f32:
+; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+; CHECK-NEXT: ret
+entry:
+  %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
+  %lane = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
+  ret <4 x float> %0
+}
+
+define <2 x double> @test_vfmaq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) {
+; CHECK-LABEL: test_vfmaq_lane_f64:
+; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+; CHECK-NEXT: ret
+entry:
+  %lane = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
+  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
+  ret <2 x double> %0
+}
+
+declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
+
+define <2 x double> @test_vfmaq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) {
+; CHECK-LABEL: test_vfmaq_laneq_f64:
+; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1]
+; CHECK-NEXT: ret
+entry:
+  %lane = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1>
+  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
+  ret <2 x double> %0
+}
+
+define <2 x double> @test_vfmsq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) {
+; CHECK-LABEL: test_vfmsq_lane_f64:
+; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+; CHECK-NEXT: ret
+entry:
+  %sub = fsub <1 x double> <double -0.000000e+00>, %v
+  %lane = shufflevector <1 x double> %sub, <1 x double> undef, <2 x i32> zeroinitializer
+  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
+  ret <2 x double> %0
+}
+
+define <2 x double> @test_vfmsq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) {
+; CHECK-LABEL: test_vfmsq_laneq_f64:
+; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1]
+; CHECK-NEXT: ret
+entry:
+  %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v
+  %lane = shufflevector <2 x double> %sub, <2 x double> undef, <2 x i32> <i32 1, i32 1>
+  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
+  ret <2 x double> %0
+}
+
+define float @test_vfmas_laneq_f32(float %a, float %b, <4 x float> %v) {
+; CHECK-LABEL: test_vfmas_laneq_f32
+; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
+; CHECK-NEXT: ret
+entry:
+  %extract = extractelement <4 x float> %v, i32 3
+  %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a)
+  ret float %0
+}
+
+declare float @llvm.fma.f32(float, float, float)
+
+define double @test_vfmsd_lane_f64(double %a, double %b, <1 x double> %v) {
+; CHECK-LABEL: test_vfmsd_lane_f64
+; CHECK: fmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK-NEXT: ret
+entry:
+  %extract.rhs = extractelement <1 x double> %v, i32 0
+  %extract = fsub double -0.000000e+00, %extract.rhs
+  %0 = tail call double @llvm.fma.f64(double %b, double %extract, double %a)
+  ret double %0
+}
+
+declare double @llvm.fma.f64(double, double, double)
+
+define float @test_vfmss_laneq_f32(float %a, float %b, <4 x float> %v) {
+; CHECK-LABEL: test_vfmss_laneq_f32
+; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
+; CHECK-NEXT: ret
+entry:
+  %extract.rhs = extractelement <4 x float> %v, i32 3
+  %extract = fsub float -0.000000e+00, %extract.rhs
+  %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a)
+  ret float %0
+}
+
+define double @test_vfmsd_laneq_f64(double %a, double %b, <2 x double> %v) {
+; CHECK-LABEL: test_vfmsd_laneq_f64
+; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
+; CHECK-NEXT: ret
+entry:
+  %extract.rhs = extractelement <2 x double> %v, i32 1
+  %extract = fsub double -0.000000e+00, %extract.rhs
+  %0 = tail call double @llvm.fma.f64(double %b, double %extract, double %a)
+  ret double %0
+}
+
+define <4 x i32> @test_vmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
+; CHECK-LABEL: test_vmlal_lane_s16:
+; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %add = add <4 x i32> %vmull2.i, %a
+  ret <4 x i32> %add
+}
+
+define <2 x i64> @test_vmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
+; CHECK-LABEL: test_vmlal_lane_s32:
+; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %add = add <2 x i64> %vmull2.i, %a
+  ret <2 x i64> %add
+}
+
+define <4 x i32> @test_vmlal_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
+; CHECK-LABEL: test_vmlal_laneq_s16:
+; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %add = add <4 x i32> %vmull2.i, %a
+  ret <4 x i32> %add
+}
+
+define <2 x i64> @test_vmlal_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
+; CHECK-LABEL: test_vmlal_laneq_s32:
+; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %add = add <2 x i64> %vmull2.i, %a
+  ret <2 x i64> %add
+}
+
+define <4 x i32> @test_vmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
+; CHECK-LABEL: test_vmlal_high_lane_s16:
+; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %add = add <4 x i32> %vmull2.i, %a
+  ret <4 x i32> %add
+}
+
+define <2 x i64> @test_vmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
+; CHECK-LABEL: test_vmlal_high_lane_s32:
+; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %add = add <2 x i64> %vmull2.i, %a
+  ret <2 x i64> %add
+}
+
+define <4 x i32> @test_vmlal_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
+; CHECK-LABEL: test_vmlal_high_laneq_s16:
+; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %add = add <4 x i32> %vmull2.i, %a
+  ret <4 x i32> %add
+}
+
+define <2 x i64> @test_vmlal_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
+; CHECK-LABEL: test_vmlal_high_laneq_s32:
+; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %add = add <2 x i64> %vmull2.i, %a
+  ret <2 x i64> %add
+}
+
+define <4 x i32> @test_vmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
+; CHECK-LABEL: test_vmlsl_lane_s16:
+; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %sub = sub <4 x i32> %a, %vmull2.i
+  ret <4 x i32> %sub
+}
+
+define <2 x i64> @test_vmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
+; CHECK-LABEL: test_vmlsl_lane_s32:
+; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %sub = sub <2 x i64> %a, %vmull2.i
+  ret <2 x i64> %sub
+}
+
+define <4 x i32> @test_vmlsl_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
+; CHECK-LABEL: test_vmlsl_laneq_s16:
+; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %sub = sub <4 x i32> %a, %vmull2.i
+  ret <4 x i32> %sub
+}
+
+define <2 x i64> @test_vmlsl_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
+; CHECK-LABEL: test_vmlsl_laneq_s32:
+; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %sub = sub <2 x i64> %a, %vmull2.i
+  ret <2 x i64> %sub
+}
+
+define <4 x i32> @test_vmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
+; CHECK-LABEL: test_vmlsl_high_lane_s16:
+; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %sub = sub <4 x i32> %a, %vmull2.i
+  ret <4 x i32> %sub
+}
+
+define <2 x i64> @test_vmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
+; CHECK-LABEL: test_vmlsl_high_lane_s32:
+; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %sub = sub <2 x i64> %a, %vmull2.i
+  ret <2 x i64> %sub
+}
+
+define <4 x i32> @test_vmlsl_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
+; CHECK-LABEL: test_vmlsl_high_laneq_s16:
+; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %sub = sub <4 x i32> %a, %vmull2.i
+  ret <4 x i32> %sub
+}
+
+define <2 x i64> @test_vmlsl_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
+; CHECK-LABEL: test_vmlsl_high_laneq_s32:
+; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %sub = sub <2 x i64> %a, %vmull2.i
+  ret <2 x i64> %sub
+}
+
+define <4 x i32> @test_vmlal_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
+; CHECK-LABEL: test_vmlal_lane_u16:
+; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %add = add <4 x i32> %vmull2.i, %a
+  ret <4 x i32> %add
+}
+
+define <2 x i64> @test_vmlal_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
+; CHECK-LABEL: test_vmlal_lane_u32:
+; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %add = add <2 x i64> %vmull2.i, %a
+  ret <2 x i64> %add
+}
+
+define <4 x i32> @test_vmlal_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
+; CHECK-LABEL: test_vmlal_laneq_u16:
+; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %add = add <4 x i32> %vmull2.i, %a
+  ret <4 x i32> %add
+}
+
+define <2 x i64> @test_vmlal_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
+; CHECK-LABEL: test_vmlal_laneq_u32:
+; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %add = add <2 x i64> %vmull2.i, %a
+  ret <2 x i64> %add
+}
+
+define <4 x i32> @test_vmlal_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
+; CHECK-LABEL: test_vmlal_high_lane_u16:
+; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %add = add <4 x i32> %vmull2.i, %a
+  ret <4 x i32> %add
+}
+
+define <2 x i64> @test_vmlal_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
+; CHECK-LABEL: test_vmlal_high_lane_u32:
+; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %add = add <2 x i64> %vmull2.i, %a
+  ret <2 x i64> %add
+}
+
+define <4 x i32> @test_vmlal_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
+; CHECK-LABEL: test_vmlal_high_laneq_u16:
+; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %add = add <4 x i32> %vmull2.i, %a
+  ret <4 x i32> %add
+}
+
+define <2 x i64> @test_vmlal_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
+; CHECK-LABEL: test_vmlal_high_laneq_u32:
+; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %add = add <2 x i64> %vmull2.i, %a
+  ret <2 x i64> %add
+}
+
+define <4 x i32> @test_vmlsl_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
+; CHECK-LABEL: test_vmlsl_lane_u16:
+; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %sub = sub <4 x i32> %a, %vmull2.i
+  ret <4 x i32> %sub
+}
+
+define <2 x i64> @test_vmlsl_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
+; CHECK-LABEL: test_vmlsl_lane_u32:
+; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %sub = sub <2 x i64> %a, %vmull2.i
+  ret <2 x i64> %sub
+}
+
+define <4 x i32> @test_vmlsl_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
+; CHECK-LABEL: test_vmlsl_laneq_u16:
+; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %sub = sub <4 x i32> %a, %vmull2.i
+  ret <4 x i32> %sub
+}
+
+define <2 x i64> @test_vmlsl_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
+; CHECK-LABEL: test_vmlsl_laneq_u32:
+; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %sub = sub <2 x i64> %a, %vmull2.i
+  ret <2 x i64> %sub
+}
+
+define <4 x i32> @test_vmlsl_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
+; CHECK-LABEL: test_vmlsl_high_lane_u16:
+; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %sub = sub <4 x i32> %a, %vmull2.i
+  ret <4 x i32> %sub
+}
+
+define <2 x i64> @test_vmlsl_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
+; CHECK-LABEL: test_vmlsl_high_lane_u32:
+; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %sub = sub <2 x i64> %a, %vmull2.i
+  ret <2 x i64> %sub
+}
+
+define <4 x i32> @test_vmlsl_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
+; CHECK-LABEL: test_vmlsl_high_laneq_u16:
+; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %sub = sub <4 x i32> %a, %vmull2.i
+  ret <4 x i32> %sub
+}
+
+define <2 x i64> @test_vmlsl_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
+; CHECK-LABEL: test_vmlsl_high_laneq_u32:
+; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %sub = sub <2 x i64> %a, %vmull2.i
+  ret <2 x i64> %sub
+}
+
+define <4 x i32> @test_vmull_lane_s16(<4 x i16> %a, <4 x i16> %v) {
+; CHECK-LABEL: test_vmull_lane_s16:
+; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_lane_s32(<2 x i32> %a, <2 x i32> %v) {
+; CHECK-LABEL: test_vmull_lane_s32:
+; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
+  ret <2 x i64> %vmull2.i
+}
+
+define <4 x i32> @test_vmull_lane_u16(<4 x i16> %a, <4 x i16> %v) {
+; CHECK-LABEL: test_vmull_lane_u16:
+; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_lane_u32(<2 x i32> %a, <2 x i32> %v) {
+; CHECK-LABEL: test_vmull_lane_u32:
+; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
+  ret <2 x i64> %vmull2.i
+}
+
+define <4 x i32> @test_vmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) {
+; CHECK-LABEL: test_vmull_high_lane_s16:
+; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) {
+; CHECK-LABEL: test_vmull_high_lane_s32:
+; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  ret <2 x i64> %vmull2.i
+}
+
+define <4 x i32> @test_vmull_high_lane_u16(<8 x i16> %a, <4 x i16> %v) {
+; CHECK-LABEL: test_vmull_high_lane_u16:
+; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_high_lane_u32(<4 x i32> %a, <2 x i32> %v) {
+; CHECK-LABEL: test_vmull_high_lane_u32:
+; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  ret <2 x i64> %vmull2.i
+}
+
+define <4 x i32> @test_vmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) {
+; CHECK-LABEL: test_vmull_laneq_s16:
+; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) {
+; CHECK-LABEL: test_vmull_laneq_s32:
+; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
+  ret <2 x i64> %vmull2.i
+}
+
+define <4 x i32> @test_vmull_laneq_u16(<4 x i16> %a, <8 x i16> %v) {
+; CHECK-LABEL: test_vmull_laneq_u16:
+; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_laneq_u32(<2 x i32> %a, <4 x i32> %v) {
+; CHECK-LABEL: test_vmull_laneq_u32:
+; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
+  ret <2 x i64> %vmull2.i
+}
+
+define <4 x i32> @test_vmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) {
+; CHECK-LABEL: test_vmull_high_laneq_s16:
+; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) {
+; CHECK-LABEL: test_vmull_high_laneq_s32:
+; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  ret <2 x i64> %vmull2.i
+}
+
+define <4 x i32> @test_vmull_high_laneq_u16(<8 x i16> %a, <8 x i16> %v) {
+; CHECK-LABEL: test_vmull_high_laneq_u16:
+; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_high_laneq_u32(<4 x i32> %a, <4 x i32> %v) {
+; CHECK-LABEL: test_vmull_high_laneq_u32:
+; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  ret <2 x i64> %vmull2.i
+}
+
+define <4 x i32> @test_vqdmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
+; CHECK-LABEL: test_vqdmlal_lane_s16:
+; CHECK: qdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
+  ret <4 x i32> %vqdmlal4.i
+}
+
+define <2 x i64> @test_vqdmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
+; CHECK-LABEL: test_vqdmlal_lane_s32:
+; CHECK: qdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
+  ret <2 x i64> %vqdmlal4.i
+}
+
+define <4 x i32> @test_vqdmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
+; CHECK-LABEL: test_vqdmlal_high_lane_s16:
+; CHECK: qdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
+  ret <4 x i32> %vqdmlal4.i
+}
+
+define <2 x i64> @test_vqdmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
+; CHECK-LABEL: test_vqdmlal_high_lane_s32:
+; CHECK: qdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
+  ret <2 x i64> %vqdmlal4.i
+}
+
+define <4 x i32> @test_vqdmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
+; CHECK-LABEL: test_vqdmlsl_lane_s16:
+; CHECK: qdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
+  ret <4 x i32> %vqdmlsl4.i
+}
+
+define <2 x i64> @test_vqdmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
+; CHECK-LABEL: test_vqdmlsl_lane_s32:
+; CHECK: qdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
+  ret <2 x i64> %vqdmlsl4.i
+}
+
+define <4 x i32> @test_vqdmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
+; CHECK-LABEL: test_vqdmlsl_high_lane_s16:
+; CHECK: qdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
+  ret <4 x i32> %vqdmlsl4.i
+}
+
+define <2 x i64> @test_vqdmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
+; CHECK-LABEL: test_vqdmlsl_high_lane_s32:
+; CHECK: qdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
+  ret <2 x i64> %vqdmlsl4.i
+}
+
+define <4 x i32> @test_vqdmull_lane_s16(<4 x i16> %a, <4 x i16> %v) {
+; CHECK-LABEL: test_vqdmull_lane_s16:
+; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
+  ret <4 x i32> %vqdmull2.i
+}
+
+define <2 x i64> @test_vqdmull_lane_s32(<2 x i32> %a, <2 x i32> %v) {
+; CHECK-LABEL: test_vqdmull_lane_s32:
+; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
+  ret <2 x i64> %vqdmull2.i
+}
+
+define <4 x i32> @test_vqdmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) {
+; CHECK-LABEL: test_vqdmull_laneq_s16:
+; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
+  ret <4 x i32> %vqdmull2.i
+}
+
+define <2 x i64> @test_vqdmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) {
+; CHECK-LABEL: test_vqdmull_laneq_s32:
+; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
+  ret <2 x i64> %vqdmull2.i
+}
+
+define <4 x i32> @test_vqdmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) {
+; CHECK-LABEL: test_vqdmull_high_lane_s16:
+; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  ret <4 x i32> %vqdmull2.i
+}
+
+define <2 x i64> @test_vqdmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) {
+; CHECK-LABEL: test_vqdmull_high_lane_s32:
+; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  ret <2 x i64> %vqdmull2.i
+}
+
+define <4 x i32> @test_vqdmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) {
+; CHECK-LABEL: test_vqdmull_high_laneq_s16:
+; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  ret <4 x i32> %vqdmull2.i
+}
+
+define <2 x i64> @test_vqdmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) {
+; CHECK-LABEL: test_vqdmull_high_laneq_s32:
+; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  ret <2 x i64> %vqdmull2.i
+}
+
+define <4 x i16> @test_vqdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) {
+; CHECK-LABEL: test_vqdmulh_lane_s16:
+; CHECK: qdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vqdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle)
+  ret <4 x i16> %vqdmulh2.i
+}
+
+define <8 x i16> @test_vqdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) {
+; CHECK-LABEL: test_vqdmulhq_lane_s16:
+; CHECK: qdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+  %vqdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle)
+  ret <8 x i16> %vqdmulh2.i
+}
+
+define <2 x i32> @test_vqdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) {
+; CHECK-LABEL: test_vqdmulh_lane_s32:
+; CHECK: qdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vqdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle)
+  ret <2 x i32> %vqdmulh2.i
+}
+
+define <4 x i32> @test_vqdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) {
+; CHECK-LABEL: test_vqdmulhq_lane_s32:
+; CHECK: qdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %vqdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle)
+  ret <4 x i32> %vqdmulh2.i
+}
+
+define <4 x i16> @test_vqrdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) {
+; CHECK-LABEL: test_vqrdmulh_lane_s16:
+; CHECK: qrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vqrdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle)
+  ret <4 x i16> %vqrdmulh2.i
+}
+
+define <8 x i16> @test_vqrdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) {
+; CHECK-LABEL: test_vqrdmulhq_lane_s16:
+; CHECK: qrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+  %vqrdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle)
+  ret <8 x i16> %vqrdmulh2.i
+}
+
+define <2 x i32> @test_vqrdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) {
+; CHECK-LABEL: test_vqrdmulh_lane_s32:
+; CHECK: qrdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vqrdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle)
+  ret <2 x i32> %vqrdmulh2.i
+}
+
+define <4 x i32> @test_vqrdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) {
+; CHECK-LABEL: test_vqrdmulhq_lane_s32:
+; CHECK: qrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %vqrdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle)
+  ret <4 x i32> %vqrdmulh2.i
+}
+
+define <2 x float> @test_vmul_lane_f32(<2 x float> %a, <2 x float> %v) {
+; CHECK-LABEL: test_vmul_lane_f32:
+; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1>
+  %mul = fmul <2 x float> %shuffle, %a
+  ret <2 x float> %mul
+}
+
+define <1 x double> @test_vmul_lane_f64(<1 x double> %a, <1 x double> %v) {
+; CHECK-LABEL: test_vmul_lane_f64:
+; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK-NEXT: ret
+entry:
+  %0 = bitcast <1 x double> %a to <8 x i8>
+  %1 = bitcast <8 x i8> %0 to double
+  %extract = extractelement <1 x double> %v, i32 0
+  %2 = fmul double %1, %extract
+  %3 = insertelement <1 x double> undef, double %2, i32 0
+  ret <1 x double> %3
+}
+
+define <4 x float> @test_vmulq_lane_f32(<4 x float> %a, <2 x float> %v) {
+; CHECK-LABEL: test_vmulq_lane_f32:
+; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %mul = fmul <4 x float> %shuffle, %a
+  ret <4 x float> %mul
+}
+
+define <2 x double> @test_vmulq_lane_f64(<2 x double> %a, <1 x double> %v) {
+; CHECK-LABEL: test_vmulq_lane_f64:
+; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
+  %mul = fmul <2 x double> %shuffle, %a
+  ret <2 x double> %mul
+}
+
+define <2 x float> @test_vmul_laneq_f32(<2 x float> %a, <4 x float> %v) {
+; CHECK-LABEL: test_vmul_laneq_f32:
+; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3>
+  %mul = fmul <2 x float> %shuffle, %a
+  ret <2 x float> %mul
+}
+
+define <1 x double> @test_vmul_laneq_f64(<1 x double> %a, <2 x double> %v) {
+; CHECK-LABEL: test_vmul_laneq_f64:
+; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
+; CHECK-NEXT: ret
+entry:
+  %0 = bitcast <1 x double> %a to <8 x i8>
+  %1 = bitcast <8 x i8> %0 to double
+  %extract = extractelement <2 x double> %v, i32 1
+  %2 = fmul double %1, %extract
+  %3 = insertelement <1 x double> undef, double %2, i32 0
+  ret <1 x double> %3
+}
+
+define <4 x float> @test_vmulq_laneq_f32(<4 x float> %a, <4 x float> %v) {
+; CHECK-LABEL: test_vmulq_laneq_f32:
+; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %mul = fmul <4 x float> %shuffle, %a
+  ret <4 x float> %mul
+}
+
+define <2 x double> @test_vmulq_laneq_f64(<2 x double> %a, <2 x double> %v) {
+; CHECK-LABEL: test_vmulq_laneq_f64:
+; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1>
+  %mul = fmul <2 x double> %shuffle, %a
+  ret <2 x double> %mul
+}
+
+define <2 x float> @test_vmulx_lane_f32(<2 x float> %a, <2 x float> %v) {
+; CHECK-LABEL: test_vmulx_lane_f32:
+; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1>
+  %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
+  ret <2 x float> %vmulx2.i
+}
+
+define <4 x float> @test_vmulxq_lane_f32(<4 x float> %a, <2 x float> %v) {
+; CHECK-LABEL: test_vmulxq_lane_f32:
+; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
+  ret <4 x float> %vmulx2.i
+}
+
+define <2 x double> @test_vmulxq_lane_f64(<2 x double> %a, <1 x double> %v) {
+; CHECK-LABEL: test_vmulxq_lane_f64:
+; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
+  %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
+  ret <2 x double> %vmulx2.i
+}
+
+define <2 x float> @test_vmulx_laneq_f32(<2 x float> %a, <4 x float> %v) {
+; CHECK-LABEL: test_vmulx_laneq_f32:
+; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3>
+  %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
+  ret <2 x float> %vmulx2.i
+}
+
+define <4 x float> @test_vmulxq_laneq_f32(<4 x float> %a, <4 x float> %v) {
+; CHECK-LABEL: test_vmulxq_laneq_f32:
+; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
+  ret <4 x float> %vmulx2.i
+}
+
+define <2 x double> @test_vmulxq_laneq_f64(<2 x double> %a, <2 x double> %v) {
+; CHECK-LABEL: test_vmulxq_laneq_f64:
+; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1>
+  %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
+  ret <2 x double> %vmulx2.i
+}
+
+define <4 x i16> @test_vmla_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
+; CHECK-LABEL: test_vmla_lane_s16_0:
+; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %mul = mul <4 x i16> %shuffle, %b
+  %add = add <4 x i16> %mul, %a
+  ret <4 x i16> %add
+}
+
+define <8 x i16> @test_vmlaq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) {
+; CHECK-LABEL: test_vmlaq_lane_s16_0:
+; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
+  %mul = mul <8 x i16> %shuffle, %b
+  %add = add <8 x i16> %mul, %a
+  ret <8 x i16> %add
+}
+
+define <2 x i32> @test_vmla_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) {
+; CHECK-LABEL: test_vmla_lane_s32_0:
+; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %mul = mul <2 x i32> %shuffle, %b
+  %add = add <2 x i32> %mul, %a
+  ret <2 x i32> %add
+}
+
+define <4 x i32> @test_vmlaq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) {
+; CHECK-LABEL: test_vmlaq_lane_s32_0:
+; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
+  %mul = mul <4 x i32> %shuffle, %b
+  %add = add <4 x i32> %mul, %a
+  ret <4 x i32> %add
+}
+
+define <4 x i16> @test_vmla_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
+; CHECK-LABEL: test_vmla_laneq_s16_0:
+; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %mul = mul <4 x i16> %shuffle, %b
+  %add = add <4 x i16> %mul, %a
+  ret <4 x i16> %add
+}
+
+define <8 x i16> @test_vmlaq_laneq_s16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) {
+; CHECK-LABEL: test_vmlaq_laneq_s16_0:
+; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer
+  %mul = mul <8 x i16> %shuffle, %b
+  %add = add <8 x i16> %mul, %a
+  ret <8 x i16> %add
+}
+
+define <2 x i32> @test_vmla_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
+; CHECK-LABEL: test_vmla_laneq_s32_0:
+; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %mul = mul <2 x i32> %shuffle, %b
+  %add = add <2 x i32> %mul, %a
+  ret <2 x i32> %add
+}
+
+define <4 x i32> @test_vmlaq_laneq_s32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) {
+; CHECK-LABEL: test_vmlaq_laneq_s32_0:
+; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
+  %mul = mul <4 x i32> %shuffle, %b
+  %add = add <4 x i32> %mul, %a
+  ret <4 x i32> %add
+}
+
+define <4 x i16> @test_vmls_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
+; CHECK-LABEL: test_vmls_lane_s16_0:
+; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %mul = mul <4 x i16> %shuffle, %b
+  %sub = sub <4 x i16> %a, %mul
+  ret <4 x i16> %sub
+}
+
+define <8 x i16> @test_vmlsq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) {
+; CHECK-LABEL: test_vmlsq_lane_s16_0:
+; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
+  %mul = mul <8 x i16> %shuffle, %b
+  %sub = sub <8 x i16> %a, %mul
+  ret <8 x i16> %sub
+}
+
+define <2 x i32> @test_vmls_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) {
+; CHECK-LABEL: test_vmls_lane_s32_0:
+; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %mul = mul <2 x i32> %shuffle, %b
+  %sub = sub <2 x i32> %a, %mul
+  ret <2 x i32> %sub
+}
+
+define <4 x i32> @test_vmlsq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) {
+; CHECK-LABEL: test_vmlsq_lane_s32_0:
+; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
+  %mul = mul <4 x i32> %shuffle, %b
+  %sub = sub <4 x i32> %a, %mul
+  ret <4 x i32> %sub
+}
+
+define <4 x i16> @test_vmls_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
+; CHECK-LABEL: test_vmls_laneq_s16_0:
+; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %mul = mul <4 x i16> %shuffle, %b
+  %sub = sub <4 x i16> %a, %mul
+  ret <4 x i16> %sub
+}
+
+define <8 x i16> @test_vmlsq_laneq_s16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) {
+; CHECK-LABEL: test_vmlsq_laneq_s16_0:
+; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer
+  %mul = mul <8 x i16> %shuffle, %b
+  %sub = sub <8 x i16> %a, %mul
+  ret <8 x i16> %sub
+}
+
+define <2 x i32> @test_vmls_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
+; CHECK-LABEL: test_vmls_laneq_s32_0:
+; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %mul = mul <2 x i32> %shuffle, %b
+  %sub = sub <2 x i32> %a, %mul
+  ret <2 x i32> %sub
+}
+
+define <4 x i32> @test_vmlsq_laneq_s32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) {
+; CHECK-LABEL: test_vmlsq_laneq_s32_0:
+; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
+  %mul = mul <4 x i32> %shuffle, %b
+  %sub = sub <4 x i32> %a, %mul
+  ret <4 x i32> %sub
+}
+
+define <4 x i16> @test_vmul_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
+; CHECK-LABEL: test_vmul_lane_s16_0:
+; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %mul = mul <4 x i16> %shuffle, %a
+  ret <4 x i16> %mul
+}
+
+define <8 x i16> @test_vmulq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
+; CHECK-LABEL: test_vmulq_lane_s16_0:
+; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
+  %mul = mul <8 x i16> %shuffle, %a
+  ret <8 x i16> %mul
+}
+
+define <2 x i32> @test_vmul_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
+; CHECK-LABEL: test_vmul_lane_s32_0:
+; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %mul = mul <2 x i32> %shuffle, %a
+  ret <2 x i32> %mul
+}
+
+define <4 x i32> @test_vmulq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
+; CHECK-LABEL: test_vmulq_lane_s32_0:
+; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
+  %mul = mul <4 x i32> %shuffle, %a
+  ret <4 x i32> %mul
+}
+
+define <4 x i16> @test_vmul_lane_u16_0(<4 x i16> %a, <4 x i16> %v) {
+; CHECK-LABEL: test_vmul_lane_u16_0:
+; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %mul = mul <4 x i16> %shuffle, %a
+  ret <4 x i16> %mul
+}
+
+define <8 x i16> @test_vmulq_lane_u16_0(<8 x i16> %a, <4 x i16> %v) {
+; CHECK-LABEL: test_vmulq_lane_u16_0:
+; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
+  %mul = mul <8 x i16> %shuffle, %a
+  ret <8 x i16> %mul
+}
+
+define <2 x i32> @test_vmul_lane_u32_0(<2 x i32> %a, <2 x i32> %v) {
+; CHECK-LABEL: test_vmul_lane_u32_0:
+; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %mul = mul <2 x i32> %shuffle, %a
+  ret <2 x i32> %mul
+}
+
+define <4 x i32> @test_vmulq_lane_u32_0(<4 x i32> %a, <2 x i32> %v) {
+; CHECK-LABEL: test_vmulq_lane_u32_0:
+; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
+  %mul = mul <4 x i32> %shuffle, %a
+  ret <4 x i32> %mul
+}
+
+define <4 x i16> @test_vmul_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) {
+; CHECK-LABEL: test_vmul_laneq_s16_0:
+; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %mul = mul <4 x i16> %shuffle, %a
+  ret <4 x i16> %mul
+}
+
+define <8 x i16> @test_vmulq_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) {
+; CHECK-LABEL: test_vmulq_laneq_s16_0:
+; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer
+  %mul = mul <8 x i16> %shuffle, %a
+  ret <8 x i16> %mul
+}
+
+define <2 x i32> @test_vmul_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) {
+; CHECK-LABEL: test_vmul_laneq_s32_0:
+; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %mul = mul <2 x i32> %shuffle, %a
+  ret <2 x i32> %mul
+}
+
+define <4 x i32> @test_vmulq_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) {
+; CHECK-LABEL: test_vmulq_laneq_s32_0:
+; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
+  %mul = mul <4 x i32> %shuffle, %a
+  ret <4 x i32> %mul
+}
+
+define <4 x i16> @test_vmul_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) {
+; CHECK-LABEL: test_vmul_laneq_u16_0:
+; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %mul = mul <4 x i16> %shuffle, %a
+  ret <4 x i16> %mul
+}
+
+define <8 x i16> @test_vmulq_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) {
+; CHECK-LABEL: test_vmulq_laneq_u16_0:
+; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer
+  %mul = mul <8 x i16> %shuffle, %a
+  ret <8 x i16> %mul
+}
+
+define <2 x i32> @test_vmul_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) {
+; CHECK-LABEL: test_vmul_laneq_u32_0:
+; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %mul = mul <2 x i32> %shuffle, %a
+  ret <2 x i32> %mul
+}
+
+define <4 x i32> @test_vmulq_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) {
+; CHECK-LABEL: test_vmulq_laneq_u32_0:
+; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
+  %mul = mul <4 x i32> %shuffle, %a
+  ret <4 x i32> %mul
+}
+
+define <2 x float> @test_vfma_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
+; CHECK-LABEL: test_vfma_lane_f32_0:
+; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer
+  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
+  ret <2 x float> %0
+}
+
+define <4 x float> @test_vfmaq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) {
+; CHECK-LABEL: test_vfmaq_lane_f32_0:
+; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer
+  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
+  ret <4 x float> %0
+}
+
+define <2 x float> @test_vfma_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) {
+; CHECK-LABEL: test_vfma_laneq_f32_0:
+; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer
+  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
+  ret <2 x float> %0
+}
+
+define <4 x float> @test_vfmaq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) {
+; CHECK-LABEL: test_vfmaq_laneq_f32_0:
+; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
+  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
+  ret <4 x float> %0
+}
+
+define <2 x float> @test_vfms_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
+; CHECK-LABEL: test_vfms_lane_f32_0:
+; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
+  %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> zeroinitializer
+  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
+  ret <2 x float> %0
+}
+
+define <4 x float> @test_vfmsq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) {
+; CHECK-LABEL: test_vfmsq_lane_f32_0:
+; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
+  %lane = shufflevector <2 x float> %sub, <2 x float> undef, <4 x i32> zeroinitializer
+  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
+  ret <4 x float> %0
+}
+
+define <2 x float> @test_vfms_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) {
+; CHECK-LABEL: test_vfms_laneq_f32_0:
+; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
+  %lane = shufflevector <4 x float> %sub, <4 x float> undef, <2 x i32> zeroinitializer
+  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
+  ret <2 x float> %0
+}
+
+define <4 x float> @test_vfmsq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) {
+; CHECK-LABEL: test_vfmsq_laneq_f32_0:
+; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
+  %lane = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> zeroinitializer
+  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
+  ret <4 x float> %0
+}
+
+define <2 x double> @test_vfmaq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) {
+; CHECK-LABEL: test_vfmaq_laneq_f64_0:
+; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+; CHECK-NEXT: ret
+entry:
+  %lane = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer
+  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
+  ret <2 x double> %0
+}
+
+define <2 x double> @test_vfmsq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) {
+; CHECK-LABEL: test_vfmsq_laneq_f64_0:
+; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+; CHECK-NEXT: ret
+entry:
+  %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v
+  %lane = shufflevector <2 x double> %sub, <2 x double> undef, <2 x i32> zeroinitializer
+  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
+  ret <2 x double> %0
+}
+
+define <4 x i32> @test_vmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
+; CHECK-LABEL: test_vmlal_lane_s16_0:
+; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %add = add <4 x i32> %vmull2.i, %a
+  ret <4 x i32> %add
+}
+
+define <2 x i64> @test_vmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
+; CHECK-LABEL: test_vmlal_lane_s32_0:
+; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %add = add <2 x i64> %vmull2.i, %a
+  ret <2 x i64> %add
+}
+
+define <4 x i32> @test_vmlal_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
+; CHECK-LABEL: test_vmlal_laneq_s16_0:
+; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %add = add <4 x i32> %vmull2.i, %a
+  ret <4 x i32> %add
+}
+
+define <2 x i64> @test_vmlal_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
+; CHECK-LABEL: test_vmlal_laneq_s32_0:
+; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %add = add <2 x i64> %vmull2.i, %a
+  ret <2 x i64> %add
+}
+
+define <4 x i32> @test_vmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
+; CHECK-LABEL: test_vmlal_high_lane_s16_0:
+; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %add = add <4 x i32> %vmull2.i, %a
+  ret <4 x i32> %add
+}
+
+define <2 x i64> @test_vmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
+; CHECK-LABEL: test_vmlal_high_lane_s32_0:
+; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %add = add <2 x i64> %vmull2.i, %a
+  ret <2 x i64> %add
+}
+
+define <4 x i32> @test_vmlal_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
+; CHECK-LABEL: test_vmlal_high_laneq_s16_0:
+; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %add = add <4 x i32> %vmull2.i, %a
+  ret <4 x i32> %add
+}
+
+define <2 x i64> @test_vmlal_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
+; CHECK-LABEL: test_vmlal_high_laneq_s32_0:
+; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %add = add <2 x i64> %vmull2.i, %a
+  ret <2 x i64> %add
+}
+
+define <4 x i32> @test_vmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
+; CHECK-LABEL: test_vmlsl_lane_s16_0:
+; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %sub = sub <4 x i32> %a, %vmull2.i
+  ret <4 x i32> %sub
+}
+
+define <2 x i64> @test_vmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
+; CHECK-LABEL: test_vmlsl_lane_s32_0:
+; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %sub = sub <2 x i64> %a, %vmull2.i
+  ret <2 x i64> %sub
+}
+
+define <4 x i32> @test_vmlsl_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
+; CHECK-LABEL: test_vmlsl_laneq_s16_0:
+; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %sub = sub <4 x i32> %a, %vmull2.i
+  ret <4 x i32> %sub
+}
+
+define <2 x i64> @test_vmlsl_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
+; CHECK-LABEL: test_vmlsl_laneq_s32_0:
+; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %sub = sub <2 x i64> %a, %vmull2.i
+  ret <2 x i64> %sub
+}
+
+define <4 x i32> @test_vmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
+; CHECK-LABEL: test_vmlsl_high_lane_s16_0:
+; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %sub = sub <4 x i32> %a, %vmull2.i
+  ret <4 x i32> %sub
+}
+
+define <2 x i64> @test_vmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
+; CHECK-LABEL: test_vmlsl_high_lane_s32_0:
+; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %sub = sub <2 x i64> %a, %vmull2.i
+  ret <2 x i64> %sub
+}
+
+define <4 x i32> @test_vmlsl_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
+; CHECK-LABEL: test_vmlsl_high_laneq_s16_0:
+; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %sub = sub <4 x i32> %a, %vmull2.i
+  ret <4 x i32> %sub
+}
+
+define <2 x i64> @test_vmlsl_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
+; CHECK-LABEL: test_vmlsl_high_laneq_s32_0:
+; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %sub = sub <2 x i64> %a, %vmull2.i
+  ret <2 x i64> %sub
+}
+
+define <4 x i32> @test_vmlal_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
+; CHECK-LABEL: test_vmlal_lane_u16_0:
+; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %add = add <4 x i32> %vmull2.i, %a
+  ret <4 x i32> %add
+}
+
+define <2 x i64> @test_vmlal_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
+; CHECK-LABEL: test_vmlal_lane_u32_0:
+; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %add = add <2 x i64> %vmull2.i, %a
+  ret <2 x i64> %add
+}
+
+define <4 x i32> @test_vmlal_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
+; CHECK-LABEL: test_vmlal_laneq_u16_0:
+; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %add = add <4 x i32> %vmull2.i, %a
+  ret <4 x i32> %add
+}
+
+define <2 x i64> @test_vmlal_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
+; CHECK-LABEL: test_vmlal_laneq_u32_0:
+; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %add = add <2 x i64> %vmull2.i, %a
+  ret <2 x i64> %add
+}
+
+define <4 x i32> @test_vmlal_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
+; CHECK-LABEL: test_vmlal_high_lane_u16_0:
+; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %add = add <4 x i32> %vmull2.i, %a
+  ret <4 x i32> %add
+}
+
+define <2 x i64> @test_vmlal_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
+; CHECK-LABEL: test_vmlal_high_lane_u32_0:
+; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %add = add <2 x i64> %vmull2.i, %a
+  ret <2 x i64> %add
+}
+
+define <4 x i32> @test_vmlal_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
+; CHECK-LABEL: test_vmlal_high_laneq_u16_0:
+; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %add = add <4 x i32> %vmull2.i, %a
+  ret <4 x i32> %add
+}
+
+define <2 x i64> @test_vmlal_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
+; CHECK-LABEL: test_vmlal_high_laneq_u32_0:
+; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %add = add <2 x i64> %vmull2.i, %a
+  ret <2 x i64> %add
+}
+
+define <4 x i32> @test_vmlsl_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
+; CHECK-LABEL: test_vmlsl_lane_u16_0:
+; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %sub = sub <4 x i32> %a, %vmull2.i
+  ret <4 x i32> %sub
+}
+
+define <2 x i64> @test_vmlsl_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
+; CHECK-LABEL: test_vmlsl_lane_u32_0:
+; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %sub = sub <2 x i64> %a, %vmull2.i
+  ret <2 x i64> %sub
+}
+
+define <4 x i32> @test_vmlsl_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
+; CHECK-LABEL: test_vmlsl_laneq_u16_0:
+; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %sub = sub <4 x i32> %a, %vmull2.i
+  ret <4 x i32> %sub
+}
+
+define <2 x i64> @test_vmlsl_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
+; CHECK-LABEL: test_vmlsl_laneq_u32_0:
+; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %sub = sub <2 x i64> %a, %vmull2.i
+  ret <2 x i64> %sub
+}
+
+define <4 x i32> @test_vmlsl_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
+; CHECK-LABEL: test_vmlsl_high_lane_u16_0:
+; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %sub = sub <4 x i32> %a, %vmull2.i
+  ret <4 x i32> %sub
+}
+
+define <2 x i64> @test_vmlsl_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
+; CHECK-LABEL: test_vmlsl_high_lane_u32_0:
+; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %sub = sub <2 x i64> %a, %vmull2.i
+  ret <2 x i64> %sub
+}
+
+define <4 x i32> @test_vmlsl_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
+; CHECK-LABEL: test_vmlsl_high_laneq_u16_0:
+; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %sub = sub <4 x i32> %a, %vmull2.i
+  ret <4 x i32> %sub
+}
+
+define <2 x i64> @test_vmlsl_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
+; CHECK-LABEL: test_vmlsl_high_laneq_u32_0:
+; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %sub = sub <2 x i64> %a, %vmull2.i
+  ret <2 x i64> %sub
+}
+
+define <4 x i32> @test_vmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
+; CHECK-LABEL: test_vmull_lane_s16_0:
+; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
+; CHECK-LABEL: test_vmull_lane_s32_0:
+; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
+  ret <2 x i64> %vmull2.i
+}
+
+define <4 x i32> @test_vmull_lane_u16_0(<4 x i16> %a, <4 x i16> %v) {
+; CHECK-LABEL: test_vmull_lane_u16_0:
+; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_lane_u32_0(<2 x i32> %a, <2 x i32> %v) {
+; CHECK-LABEL: test_vmull_lane_u32_0:
+; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
+  ret <2 x i64> %vmull2.i
+}
+
+define <4 x i32> @test_vmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
+; CHECK-LABEL: test_vmull_high_lane_s16_0:
+; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
+; CHECK-LABEL: test_vmull_high_lane_s32_0:
+; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  ret <2 x i64> %vmull2.i
+}
+
+define <4 x i32> @test_vmull_high_lane_u16_0(<8 x i16> %a, <4 x i16> %v) {
+; CHECK-LABEL: test_vmull_high_lane_u16_0:
+; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_high_lane_u32_0(<4 x i32> %a, <2 x i32> %v) {
+; CHECK-LABEL: test_vmull_high_lane_u32_0:
+; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  ret <2 x i64> %vmull2.i
+}
+
+define <4 x i32> @test_vmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) {
+; CHECK-LABEL: test_vmull_laneq_s16_0:
+; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) {
+; CHECK-LABEL: test_vmull_laneq_s32_0:
+; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
+  ret <2 x i64> %vmull2.i
+}
+
+define <4 x i32> @test_vmull_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) {
+; CHECK-LABEL: test_vmull_laneq_u16_0:
+; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) {
+; CHECK-LABEL: test_vmull_laneq_u32_0:
+; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
+  ret <2 x i64> %vmull2.i
+}
+
+define <4 x i32> @test_vmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) {
+; CHECK-LABEL: test_vmull_high_laneq_s16_0:
+; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) {
+; CHECK-LABEL: test_vmull_high_laneq_s32_0:
+; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  ret <2 x i64> %vmull2.i
+}
+
+define <4 x i32> @test_vmull_high_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) {
+; CHECK-LABEL: test_vmull_high_laneq_u16_0:
+; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_high_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) {
+; CHECK-LABEL: test_vmull_high_laneq_u32_0:
+; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  ret <2 x i64> %vmull2.i
+}
+
+define <4 x i32> @test_vqdmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
+; CHECK-LABEL: test_vqdmlal_lane_s16_0:
+; CHECK: qdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
+  ret <4 x i32> %vqdmlal4.i
+}
+
+define <2 x i64> @test_vqdmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
+; CHECK-LABEL: test_vqdmlal_lane_s32_0:
+; CHECK: qdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
+  ret <2 x i64> %vqdmlal4.i
+}
+
+define <4 x i32> @test_vqdmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
+; CHECK-LABEL: test_vqdmlal_high_lane_s16_0:
+; CHECK: qdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
+  ret <4 x i32> %vqdmlal4.i
+}
+
+define <2 x i64> @test_vqdmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
+; CHECK-LABEL: test_vqdmlal_high_lane_s32_0:
+; CHECK: qdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
+  ret <2 x i64> %vqdmlal4.i
+}
+
+define <4 x i32> @test_vqdmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
+; CHECK-LABEL: test_vqdmlsl_lane_s16_0:
+; CHECK: qdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
+  %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
+  ret <4 x i32> %vqdmlsl4.i
+}
+
+define <2 x i64> @test_vqdmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
+; CHECK-LABEL: test_vqdmlsl_lane_s32_0:
+; CHECK: qdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
+  %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
+  ret <2 x i64> %vqdmlsl4.i
+}
+
+define <4 x i32> @test_vqdmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
+; CHECK-LABEL: test_vqdmlsl_high_lane_s16_0:
+; CHECK: qdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
+  ret <4 x i32> %vqdmlsl4.i
+}
+
+define <2 x i64> @test_vqdmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
+; CHECK-LABEL: test_vqdmlsl_high_lane_s32_0:
+; CHECK: qdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
+  ret <2 x i64> %vqdmlsl4.i
+}
+
+define <4 x i32> @test_vqdmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
+; CHECK-LABEL: test_vqdmull_lane_s16_0:
+; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
+  ret <4 x i32> %vqdmull2.i
+}
+
+define <2 x i64> @test_vqdmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
+; CHECK-LABEL: test_vqdmull_lane_s32_0:
+; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
+  ret <2 x i64> %vqdmull2.i
+}
+
+define <4 x i32> @test_vqdmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) {
+; CHECK-LABEL: test_vqdmull_laneq_s16_0:
+; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
+  ret <4 x i32> %vqdmull2.i
+}
+
+define <2 x i64> @test_vqdmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) {
+; CHECK-LABEL: test_vqdmull_laneq_s32_0:
+; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
+  ret <2 x i64> %vqdmull2.i
+}
+
+define <4 x i32> @test_vqdmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
+; CHECK-LABEL: test_vqdmull_high_lane_s16_0:
+; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  ret <4 x i32> %vqdmull2.i
+}
+
+define <2 x i64> @test_vqdmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
+; CHECK-LABEL: test_vqdmull_high_lane_s32_0:
+; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  ret <2 x i64> %vqdmull2.i
+}
+
+define <4 x i32> @test_vqdmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) {
+; CHECK-LABEL: test_vqdmull_high_laneq_s16_0:
+; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
+  %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
+  ret <4 x i32> %vqdmull2.i
+}
+
+define <2 x i64> @test_vqdmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) {
+; CHECK-LABEL: test_vqdmull_high_laneq_s32_0:
+; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
+  %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
+  ret <2 x i64> %vqdmull2.i
+}
+
+define <4 x i16> @test_vqdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
+; CHECK-LABEL: test_vqdmulh_lane_s16_0:
+; CHECK: qdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vqdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle)
+  ret <4 x i16> %vqdmulh2.i
+}
+
+define <8 x i16> @test_vqdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
+; CHECK-LABEL: test_vqdmulhq_lane_s16_0:
+; CHECK: qdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
+  %vqdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle)
+  ret <8 x i16> %vqdmulh2.i
+}
+
+define <2 x i32> @test_vqdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
+; CHECK-LABEL: test_vqdmulh_lane_s32_0:
+; CHECK: qdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vqdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle)
+  ret <2 x i32> %vqdmulh2.i
+}
+
+define <4 x i32> @test_vqdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
+; CHECK-LABEL: test_vqdmulhq_lane_s32_0:
+; CHECK: qdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
+  %vqdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle)
+  ret <4 x i32> %vqdmulh2.i
+}
+
+define <4 x i16> @test_vqrdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
+; CHECK-LABEL: test_vqrdmulh_lane_s16_0:
+; CHECK: qrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
+  %vqrdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle)
+  ret <4 x i16> %vqrdmulh2.i
+}
+
+define <8 x i16> @test_vqrdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
+; CHECK-LABEL: test_vqrdmulhq_lane_s16_0:
+; CHECK: qrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
+  %vqrdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle)
+  ret <8 x i16> %vqrdmulh2.i
+}
+
+define <2 x i32> @test_vqrdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
+; CHECK-LABEL: test_vqrdmulh_lane_s32_0:
+; CHECK: qrdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+  %vqrdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle)
+  ret <2 x i32> %vqrdmulh2.i
+}
+
+define <4 x i32> @test_vqrdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
+; CHECK-LABEL: test_vqrdmulhq_lane_s32_0:
+; CHECK: qrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
+  %vqrdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle)
+  ret <4 x i32> %vqrdmulh2.i
+}
+
+define <2 x float> @test_vmul_lane_f32_0(<2 x float> %a, <2 x float> %v) {
+; CHECK-LABEL: test_vmul_lane_f32_0:
+; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer
+  %mul = fmul <2 x float> %shuffle, %a
+  ret <2 x float> %mul
+}
+
+define <4 x float> @test_vmulq_lane_f32_0(<4 x float> %a, <2 x float> %v) {
+; CHECK-LABEL: test_vmulq_lane_f32_0:
+; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer
+  %mul = fmul <4 x float> %shuffle, %a
+  ret <4 x float> %mul
+}
+
+define <2 x float> @test_vmul_laneq_f32_0(<2 x float> %a, <4 x float> %v) {
+; CHECK-LABEL: test_vmul_laneq_f32_0:
+; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer
+  %mul = fmul <2 x float> %shuffle, %a
+  ret <2 x float> %mul
+}
+
+define <1 x double> @test_vmul_laneq_f64_0(<1 x double> %a, <2 x double> %v) {
+; CHECK-LABEL: test_vmul_laneq_f64_0:
+; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
+; CHECK-NEXT: ret
+entry:
+  %0 = bitcast <1 x double> %a to <8 x i8>
+  %1 = bitcast <8 x i8> %0 to double
+  %extract = extractelement <2 x double> %v, i32 0
+  %2 = fmul double %1, %extract
+  %3 = insertelement <1 x double> undef, double %2, i32 0
+  ret <1 x double> %3
+}
+
+define <4 x float> @test_vmulq_laneq_f32_0(<4 x float> %a, <4 x float> %v) {
+; CHECK-LABEL: test_vmulq_laneq_f32_0:
+; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
+  %mul = fmul <4 x float> %shuffle, %a
+  ret <4 x float> %mul
+}
+
+define <2 x double> @test_vmulq_laneq_f64_0(<2 x double> %a, <2 x double> %v) {
+; CHECK-LABEL: test_vmulq_laneq_f64_0:
+; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer
+  %mul = fmul <2 x double> %shuffle, %a
+  ret <2 x double> %mul
+}
+
+define <2 x float> @test_vmulx_lane_f32_0(<2 x float> %a, <2 x float> %v) {
+; CHECK-LABEL: test_vmulx_lane_f32_0:
+; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer
+  %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
+  ret <2 x float> %vmulx2.i
+}
+
+define <4 x float> @test_vmulxq_lane_f32_0(<4 x float> %a, <2 x float> %v) {
+; CHECK-LABEL: test_vmulxq_lane_f32_0:
+; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer
+  %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
+  ret <4 x float> %vmulx2.i
+}
+
+define <2 x double> @test_vmulxq_lane_f64_0(<2 x double> %a, <1 x double> %v) {
+; CHECK-LABEL: test_vmulxq_lane_f64_0:
+; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
+  %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
+  ret <2 x double> %vmulx2.i
+}
+
+define <2 x float> @test_vmulx_laneq_f32_0(<2 x float> %a, <4 x float> %v) {
+; CHECK-LABEL: test_vmulx_laneq_f32_0:
+; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer
+  %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
+  ret <2 x float> %vmulx2.i
+}
+
+define <4 x float> @test_vmulxq_laneq_f32_0(<4 x float> %a, <4 x float> %v) {
+; CHECK-LABEL: test_vmulxq_laneq_f32_0:
+; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
+  %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
+  ret <4 x float> %vmulx2.i
+}
+
+define <2 x double> @test_vmulxq_laneq_f64_0(<2 x double> %a, <2 x double> %v) {
+; CHECK-LABEL: test_vmulxq_laneq_f64_0:
+; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer
+  %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
+  ret <2 x double> %vmulx2.i
+}
+
diff --git a/test/CodeGen/AArch64/arm64-neon-3vdiff.ll b/test/CodeGen/AArch64/arm64-neon-3vdiff.ll
new file mode 100644
index 0000000..cb9b36c
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-neon-3vdiff.ll
@@ -0,0 +1,1829 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+declare <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8>, <8 x i8>)
+
+declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>)
+
+declare <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>)
+
+declare <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>)
+
+declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>)
+
+declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>)
+
+declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>)
+
+declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>)
+
+declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>)
+
+declare <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8>, <8 x i8>)
+
+declare <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32>, <2 x i32>)
+
+declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>)
+
+declare <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8>, <8 x i8>)
+
+declare <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32>, <2 x i32>)
+
+declare <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16>, <4 x i16>)
+
+declare <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8>, <8 x i8>)
+
+declare <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32>, <2 x i32>)
+
+declare <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16>, <4 x i16>)
+
+declare <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8>, <8 x i8>)
+
+declare <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64>, <2 x i64>)
+
+declare <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32>, <4 x i32>)
+
+declare <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16>, <8 x i16>)
+
+declare <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64>, <2 x i64>)
+
+declare <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32>, <4 x i32>)
+
+declare <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16>, <8 x i16>)
+
+define <8 x i16> @test_vaddl_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK-LABEL: test_vaddl_s8:
+; CHECK: saddl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vmovl.i.i = sext <8 x i8> %a to <8 x i16>
+  %vmovl.i2.i = sext <8 x i8> %b to <8 x i16>
+  %add.i = add <8 x i16> %vmovl.i.i, %vmovl.i2.i
+  ret <8 x i16> %add.i
+}
+
+define <4 x i32> @test_vaddl_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK-LABEL: test_vaddl_s16:
+; CHECK: saddl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vmovl.i.i = sext <4 x i16> %a to <4 x i32>
+  %vmovl.i2.i = sext <4 x i16> %b to <4 x i32>
+  %add.i = add <4 x i32> %vmovl.i.i, %vmovl.i2.i
+  ret <4 x i32> %add.i
+}
+
+define <2 x i64> @test_vaddl_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: test_vaddl_s32:
+; CHECK: saddl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vmovl.i.i = sext <2 x i32> %a to <2 x i64>
+  %vmovl.i2.i = sext <2 x i32> %b to <2 x i64>
+  %add.i = add <2 x i64> %vmovl.i.i, %vmovl.i2.i
+  ret <2 x i64> %add.i
+}
+
+define <8 x i16> @test_vaddl_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK-LABEL: test_vaddl_u8:
+; CHECK: uaddl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vmovl.i.i = zext <8 x i8> %a to <8 x i16>
+  %vmovl.i2.i = zext <8 x i8> %b to <8 x i16>
+  %add.i = add <8 x i16> %vmovl.i.i, %vmovl.i2.i
+  ret <8 x i16> %add.i
+}
+
+define <4 x i32> @test_vaddl_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK-LABEL: test_vaddl_u16:
+; CHECK: uaddl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vmovl.i.i = zext <4 x i16> %a to <4 x i32>
+  %vmovl.i2.i = zext <4 x i16> %b to <4 x i32>
+  %add.i = add <4 x i32> %vmovl.i.i, %vmovl.i2.i
+  ret <4 x i32> %add.i
+}
+
+define <2 x i64> @test_vaddl_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: test_vaddl_u32:
+; CHECK: uaddl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vmovl.i.i = zext <2 x i32> %a to <2 x i64>
+  %vmovl.i2.i = zext <2 x i32> %b to <2 x i64>
+  %add.i = add <2 x i64> %vmovl.i.i, %vmovl.i2.i
+  ret <2 x i64> %add.i
+}
+
+define <8 x i16> @test_vaddl_high_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vaddl_high_s8:
+; CHECK: saddl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16>
+  %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %1 = sext <8 x i8> %shuffle.i.i2.i to <8 x i16>
+  %add.i = add <8 x i16> %0, %1
+  ret <8 x i16> %add.i
+}
+
+define <4 x i32> @test_vaddl_high_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vaddl_high_s16:
+; CHECK: saddl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32>
+  %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %1 = sext <4 x i16> %shuffle.i.i2.i to <4 x i32>
+  %add.i = add <4 x i32> %0, %1
+  ret <4 x i32> %add.i
+}
+
+define <2 x i64> @test_vaddl_high_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vaddl_high_s32:
+; CHECK: saddl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64>
+  %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %1 = sext <2 x i32> %shuffle.i.i2.i to <2 x i64>
+  %add.i = add <2 x i64> %0, %1
+  ret <2 x i64> %add.i
+}
+
+define <8 x i16> @test_vaddl_high_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vaddl_high_u8:
+; CHECK: uaddl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
+  %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %1 = zext <8 x i8> %shuffle.i.i2.i to <8 x i16>
+  %add.i = add <8 x i16> %0, %1
+  ret <8 x i16> %add.i
+}
+
+define <4 x i32> @test_vaddl_high_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vaddl_high_u16:
+; CHECK: uaddl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32>
+  %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %1 = zext <4 x i16> %shuffle.i.i2.i to <4 x i32>
+  %add.i = add <4 x i32> %0, %1
+  ret <4 x i32> %add.i
+}
+
+define <2 x i64> @test_vaddl_high_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vaddl_high_u32:
+; CHECK: uaddl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64>
+  %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %1 = zext <2 x i32> %shuffle.i.i2.i to <2 x i64>
+  %add.i = add <2 x i64> %0, %1
+  ret <2 x i64> %add.i
+}
+
+define <8 x i16> @test_vaddw_s8(<8 x i16> %a, <8 x i8> %b) {
+; CHECK-LABEL: test_vaddw_s8:
+; CHECK: saddw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b
+entry:
+  %vmovl.i.i = sext <8 x i8> %b to <8 x i16>
+  %add.i = add <8 x i16> %vmovl.i.i, %a
+  ret <8 x i16> %add.i
+}
+
+define <4 x i32> @test_vaddw_s16(<4 x i32> %a, <4 x i16> %b) {
+; CHECK-LABEL: test_vaddw_s16:
+; CHECK: saddw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h
+entry:
+  %vmovl.i.i = sext <4 x i16> %b to <4 x i32>
+  %add.i = add <4 x i32> %vmovl.i.i, %a
+  ret <4 x i32> %add.i
+}
+
+define <2 x i64> @test_vaddw_s32(<2 x i64> %a, <2 x i32> %b) {
+; CHECK-LABEL: test_vaddw_s32:
+; CHECK: saddw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s
+entry:
+  %vmovl.i.i = sext <2 x i32> %b to <2 x i64>
+  %add.i = add <2 x i64> %vmovl.i.i, %a
+  ret <2 x i64> %add.i
+}
+
+define <8 x i16> @test_vaddw_u8(<8 x i16> %a, <8 x i8> %b) {
+; CHECK-LABEL: test_vaddw_u8:
+; CHECK: uaddw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b
+entry:
+  %vmovl.i.i = zext <8 x i8> %b to <8 x i16>
+  %add.i = add <8 x i16> %vmovl.i.i, %a
+  ret <8 x i16> %add.i
+}
+
+define <4 x i32> @test_vaddw_u16(<4 x i32> %a, <4 x i16> %b) {
+; CHECK-LABEL: test_vaddw_u16:
+; CHECK: uaddw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h
+entry:
+  %vmovl.i.i = zext <4 x i16> %b to <4 x i32>
+  %add.i = add <4 x i32> %vmovl.i.i, %a
+  ret <4 x i32> %add.i
+}
+
+define <2 x i64> @test_vaddw_u32(<2 x i64> %a, <2 x i32> %b) {
+; CHECK-LABEL: test_vaddw_u32:
+; CHECK: uaddw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s
+entry:
+  %vmovl.i.i = zext <2 x i32> %b to <2 x i64>
+  %add.i = add <2 x i64> %vmovl.i.i, %a
+  ret <2 x i64> %add.i
+}
+
+define <8 x i16> @test_vaddw_high_s8(<8 x i16> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vaddw_high_s8:
+; CHECK: saddw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16>
+  %add.i = add <8 x i16> %0, %a
+  ret <8 x i16> %add.i
+}
+
+define <4 x i32> @test_vaddw_high_s16(<4 x i32> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vaddw_high_s16:
+; CHECK: saddw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32>
+  %add.i = add <4 x i32> %0, %a
+  ret <4 x i32> %add.i
+}
+
+define <2 x i64> @test_vaddw_high_s32(<2 x i64> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vaddw_high_s32:
+; CHECK: saddw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64>
+  %add.i = add <2 x i64> %0, %a
+  ret <2 x i64> %add.i
+}
+
+define <8 x i16> @test_vaddw_high_u8(<8 x i16> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vaddw_high_u8:
+; CHECK: uaddw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
+  %add.i = add <8 x i16> %0, %a
+  ret <8 x i16> %add.i
+}
+
+define <4 x i32> @test_vaddw_high_u16(<4 x i32> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vaddw_high_u16:
+; CHECK: uaddw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32>
+  %add.i = add <4 x i32> %0, %a
+  ret <4 x i32> %add.i
+}
+
+define <2 x i64> @test_vaddw_high_u32(<2 x i64> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vaddw_high_u32:
+; CHECK: uaddw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64>
+  %add.i = add <2 x i64> %0, %a
+  ret <2 x i64> %add.i
+}
+
+define <8 x i16> @test_vsubl_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK-LABEL: test_vsubl_s8:
+; CHECK: ssubl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vmovl.i.i = sext <8 x i8> %a to <8 x i16>
+  %vmovl.i2.i = sext <8 x i8> %b to <8 x i16>
+  %sub.i = sub <8 x i16> %vmovl.i.i, %vmovl.i2.i
+  ret <8 x i16> %sub.i
+}
+
+define <4 x i32> @test_vsubl_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK-LABEL: test_vsubl_s16:
+; CHECK: ssubl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vmovl.i.i = sext <4 x i16> %a to <4 x i32>
+  %vmovl.i2.i = sext <4 x i16> %b to <4 x i32>
+  %sub.i = sub <4 x i32> %vmovl.i.i, %vmovl.i2.i
+  ret <4 x i32> %sub.i
+}
+
+define <2 x i64> @test_vsubl_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: test_vsubl_s32:
+; CHECK: ssubl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vmovl.i.i = sext <2 x i32> %a to <2 x i64>
+  %vmovl.i2.i = sext <2 x i32> %b to <2 x i64>
+  %sub.i = sub <2 x i64> %vmovl.i.i, %vmovl.i2.i
+  ret <2 x i64> %sub.i
+}
+
+define <8 x i16> @test_vsubl_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK-LABEL: test_vsubl_u8:
+; CHECK: usubl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vmovl.i.i = zext <8 x i8> %a to <8 x i16>
+  %vmovl.i2.i = zext <8 x i8> %b to <8 x i16>
+  %sub.i = sub <8 x i16> %vmovl.i.i, %vmovl.i2.i
+  ret <8 x i16> %sub.i
+}
+
+define <4 x i32> @test_vsubl_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK-LABEL: test_vsubl_u16:
+; CHECK: usubl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vmovl.i.i = zext <4 x i16> %a to <4 x i32>
+  %vmovl.i2.i = zext <4 x i16> %b to <4 x i32>
+  %sub.i = sub <4 x i32> %vmovl.i.i, %vmovl.i2.i
+  ret <4 x i32> %sub.i
+}
+
+define <2 x i64> @test_vsubl_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: test_vsubl_u32:
+; CHECK: usubl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vmovl.i.i = zext <2 x i32> %a to <2 x i64>
+  %vmovl.i2.i = zext <2 x i32> %b to <2 x i64>
+  %sub.i = sub <2 x i64> %vmovl.i.i, %vmovl.i2.i
+  ret <2 x i64> %sub.i
+}
+
+define <8 x i16> @test_vsubl_high_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vsubl_high_s8:
+; CHECK: ssubl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16>
+  %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %1 = sext <8 x i8> %shuffle.i.i2.i to <8 x i16>
+  %sub.i = sub <8 x i16> %0, %1
+  ret <8 x i16> %sub.i
+}
+
+define <4 x i32> @test_vsubl_high_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vsubl_high_s16:
+; CHECK: ssubl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32>
+  %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %1 = sext <4 x i16> %shuffle.i.i2.i to <4 x i32>
+  %sub.i = sub <4 x i32> %0, %1
+  ret <4 x i32> %sub.i
+}
+
+define <2 x i64> @test_vsubl_high_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vsubl_high_s32:
+; CHECK: ssubl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64>
+  %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %1 = sext <2 x i32> %shuffle.i.i2.i to <2 x i64>
+  %sub.i = sub <2 x i64> %0, %1
+  ret <2 x i64> %sub.i
+}
+
+define <8 x i16> @test_vsubl_high_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vsubl_high_u8:
+; CHECK: usubl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
+  %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %1 = zext <8 x i8> %shuffle.i.i2.i to <8 x i16>
+  %sub.i = sub <8 x i16> %0, %1
+  ret <8 x i16> %sub.i
+}
+
+define <4 x i32> @test_vsubl_high_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vsubl_high_u16:
+; CHECK: usubl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32>
+  %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %1 = zext <4 x i16> %shuffle.i.i2.i to <4 x i32>
+  %sub.i = sub <4 x i32> %0, %1
+  ret <4 x i32> %sub.i
+}
+
+define <2 x i64> @test_vsubl_high_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vsubl_high_u32:
+; CHECK: usubl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64>
+  %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %1 = zext <2 x i32> %shuffle.i.i2.i to <2 x i64>
+  %sub.i = sub <2 x i64> %0, %1
+  ret <2 x i64> %sub.i
+}
+
+define <8 x i16> @test_vsubw_s8(<8 x i16> %a, <8 x i8> %b) {
+; CHECK-LABEL: test_vsubw_s8:
+; CHECK: ssubw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b
+entry:
+  %vmovl.i.i = sext <8 x i8> %b to <8 x i16>
+  %sub.i = sub <8 x i16> %a, %vmovl.i.i
+  ret <8 x i16> %sub.i
+}
+
+define <4 x i32> @test_vsubw_s16(<4 x i32> %a, <4 x i16> %b) {
+; CHECK-LABEL: test_vsubw_s16:
+; CHECK: ssubw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h
+entry:
+  %vmovl.i.i = sext <4 x i16> %b to <4 x i32>
+  %sub.i = sub <4 x i32> %a, %vmovl.i.i
+  ret <4 x i32> %sub.i
+}
+
+define <2 x i64> @test_vsubw_s32(<2 x i64> %a, <2 x i32> %b) {
+; CHECK-LABEL: test_vsubw_s32:
+; CHECK: ssubw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s
+entry:
+  %vmovl.i.i = sext <2 x i32> %b to <2 x i64>
+  %sub.i = sub <2 x i64> %a, %vmovl.i.i
+  ret <2 x i64> %sub.i
+}
+
+define <8 x i16> @test_vsubw_u8(<8 x i16> %a, <8 x i8> %b) {
+; CHECK-LABEL: test_vsubw_u8:
+; CHECK: usubw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b
+entry:
+  %vmovl.i.i = zext <8 x i8> %b to <8 x i16>
+  %sub.i = sub <8 x i16> %a, %vmovl.i.i
+  ret <8 x i16> %sub.i
+}
+
+define <4 x i32> @test_vsubw_u16(<4 x i32> %a, <4 x i16> %b) {
+; CHECK-LABEL: test_vsubw_u16:
+; CHECK: usubw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h
+entry:
+  %vmovl.i.i = zext <4 x i16> %b to <4 x i32>
+  %sub.i = sub <4 x i32> %a, %vmovl.i.i
+  ret <4 x i32> %sub.i
+}
+
+define <2 x i64> @test_vsubw_u32(<2 x i64> %a, <2 x i32> %b) {
+; CHECK-LABEL: test_vsubw_u32:
+; CHECK: usubw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s
+entry:
+  %vmovl.i.i = zext <2 x i32> %b to <2 x i64>
+  %sub.i = sub <2 x i64> %a, %vmovl.i.i
+  ret <2 x i64> %sub.i
+}
+
+define <8 x i16> @test_vsubw_high_s8(<8 x i16> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vsubw_high_s8:
+; CHECK: ssubw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16>
+  %sub.i = sub <8 x i16> %a, %0
+  ret <8 x i16> %sub.i
+}
+
+define <4 x i32> @test_vsubw_high_s16(<4 x i32> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vsubw_high_s16:
+; CHECK: ssubw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32>
+  %sub.i = sub <4 x i32> %a, %0
+  ret <4 x i32> %sub.i
+}
+
+define <2 x i64> @test_vsubw_high_s32(<2 x i64> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vsubw_high_s32:
+; CHECK: ssubw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64>
+  %sub.i = sub <2 x i64> %a, %0
+  ret <2 x i64> %sub.i
+}
+
+define <8 x i16> @test_vsubw_high_u8(<8 x i16> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vsubw_high_u8:
+; CHECK: usubw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
+  %sub.i = sub <8 x i16> %a, %0
+  ret <8 x i16> %sub.i
+}
+
+define <4 x i32> @test_vsubw_high_u16(<4 x i32> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vsubw_high_u16:
+; CHECK: usubw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32>
+  %sub.i = sub <4 x i32> %a, %0
+  ret <4 x i32> %sub.i
+}
+
+define <2 x i64> @test_vsubw_high_u32(<2 x i64> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vsubw_high_u32:
+; CHECK: usubw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64>
+  %sub.i = sub <2 x i64> %a, %0
+  ret <2 x i64> %sub.i
+}
+
+define <8 x i8> @test_vaddhn_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vaddhn_s16:
+; CHECK: addhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vaddhn.i = add <8 x i16> %a, %b
+  %vaddhn1.i = lshr <8 x i16> %vaddhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  %vaddhn2.i = trunc <8 x i16> %vaddhn1.i to <8 x i8>
+  ret <8 x i8> %vaddhn2.i
+}
+
+define <4 x i16> @test_vaddhn_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vaddhn_s32:
+; CHECK: addhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vaddhn.i = add <4 x i32> %a, %b
+  %vaddhn1.i = lshr <4 x i32> %vaddhn.i, <i32 16, i32 16, i32 16, i32 16>
+  %vaddhn2.i = trunc <4 x i32> %vaddhn1.i to <4 x i16>
+  ret <4 x i16> %vaddhn2.i
+}
+
+define <2 x i32> @test_vaddhn_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vaddhn_s64:
+; CHECK: addhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %vaddhn.i = add <2 x i64> %a, %b
+  %vaddhn1.i = lshr <2 x i64> %vaddhn.i, <i64 32, i64 32>
+  %vaddhn2.i = trunc <2 x i64> %vaddhn1.i to <2 x i32>
+  ret <2 x i32> %vaddhn2.i
+}
+
+define <8 x i8> @test_vaddhn_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vaddhn_u16:
+; CHECK: addhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vaddhn.i = add <8 x i16> %a, %b
+  %vaddhn1.i = lshr <8 x i16> %vaddhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  %vaddhn2.i = trunc <8 x i16> %vaddhn1.i to <8 x i8>
+  ret <8 x i8> %vaddhn2.i
+}
+
+define <4 x i16> @test_vaddhn_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vaddhn_u32:
+; CHECK: addhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vaddhn.i = add <4 x i32> %a, %b
+  %vaddhn1.i = lshr <4 x i32> %vaddhn.i, <i32 16, i32 16, i32 16, i32 16>
+  %vaddhn2.i = trunc <4 x i32> %vaddhn1.i to <4 x i16>
+  ret <4 x i16> %vaddhn2.i
+}
+
+define <2 x i32> @test_vaddhn_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vaddhn_u64:
+; CHECK: addhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %vaddhn.i = add <2 x i64> %a, %b
+  %vaddhn1.i = lshr <2 x i64> %vaddhn.i, <i64 32, i64 32>
+  %vaddhn2.i = trunc <2 x i64> %vaddhn1.i to <2 x i32>
+  ret <2 x i32> %vaddhn2.i
+}
+
+define <16 x i8> @test_vaddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vaddhn_high_s16:
+; CHECK: addhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vaddhn.i.i = add <8 x i16> %a, %b
+  %vaddhn1.i.i = lshr <8 x i16> %vaddhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  %vaddhn2.i.i = trunc <8 x i16> %vaddhn1.i.i to <8 x i8>
+  %0 = bitcast <8 x i8> %r to <1 x i64>
+  %1 = bitcast <8 x i8> %vaddhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
+  ret <16 x i8> %2
+}
+
+define <8 x i16> @test_vaddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vaddhn_high_s32:
+; CHECK: addhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vaddhn.i.i = add <4 x i32> %a, %b
+  %vaddhn1.i.i = lshr <4 x i32> %vaddhn.i.i, <i32 16, i32 16, i32 16, i32 16>
+  %vaddhn2.i.i = trunc <4 x i32> %vaddhn1.i.i to <4 x i16>
+  %0 = bitcast <4 x i16> %r to <1 x i64>
+  %1 = bitcast <4 x i16> %vaddhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
+  ret <8 x i16> %2
+}
+
+define <4 x i32> @test_vaddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vaddhn_high_s64:
+; CHECK: addhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %vaddhn.i.i = add <2 x i64> %a, %b
+  %vaddhn1.i.i = lshr <2 x i64> %vaddhn.i.i, <i64 32, i64 32>
+  %vaddhn2.i.i = trunc <2 x i64> %vaddhn1.i.i to <2 x i32>
+  %0 = bitcast <2 x i32> %r to <1 x i64>
+  %1 = bitcast <2 x i32> %vaddhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
+  ret <4 x i32> %2
+}
+
+define <16 x i8> @test_vaddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vaddhn_high_u16:
+; CHECK: addhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vaddhn.i.i = add <8 x i16> %a, %b
+  %vaddhn1.i.i = lshr <8 x i16> %vaddhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  %vaddhn2.i.i = trunc <8 x i16> %vaddhn1.i.i to <8 x i8>
+  %0 = bitcast <8 x i8> %r to <1 x i64>
+  %1 = bitcast <8 x i8> %vaddhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
+  ret <16 x i8> %2
+}
+
+define <8 x i16> @test_vaddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vaddhn_high_u32:
+; CHECK: addhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vaddhn.i.i = add <4 x i32> %a, %b
+  %vaddhn1.i.i = lshr <4 x i32> %vaddhn.i.i, <i32 16, i32 16, i32 16, i32 16>
+  %vaddhn2.i.i = trunc <4 x i32> %vaddhn1.i.i to <4 x i16>
+  %0 = bitcast <4 x i16> %r to <1 x i64>
+  %1 = bitcast <4 x i16> %vaddhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
+  ret <8 x i16> %2
+}
+
+define <4 x i32> @test_vaddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vaddhn_high_u64:
+; CHECK: addhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %vaddhn.i.i = add <2 x i64> %a, %b
+  %vaddhn1.i.i = lshr <2 x i64> %vaddhn.i.i, <i64 32, i64 32>
+  %vaddhn2.i.i = trunc <2 x i64> %vaddhn1.i.i to <2 x i32>
+  %0 = bitcast <2 x i32> %r to <1 x i64>
+  %1 = bitcast <2 x i32> %vaddhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
+  ret <4 x i32> %2
+}
+
+define <8 x i8> @test_vraddhn_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vraddhn_s16:
+; CHECK: raddhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vraddhn2.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+  ret <8 x i8> %vraddhn2.i
+}
+
+define <4 x i16> @test_vraddhn_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vraddhn_s32:
+; CHECK: raddhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vraddhn2.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i16> %vraddhn2.i
+}
+
+define <2 x i32> @test_vraddhn_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vraddhn_s64:
+; CHECK: raddhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %vraddhn2.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+  ret <2 x i32> %vraddhn2.i
+}
+
+define <8 x i8> @test_vraddhn_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vraddhn_u16:
+; CHECK: raddhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vraddhn2.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+  ret <8 x i8> %vraddhn2.i
+}
+
+define <4 x i16> @test_vraddhn_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vraddhn_u32:
+; CHECK: raddhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vraddhn2.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i16> %vraddhn2.i
+}
+
+define <2 x i32> @test_vraddhn_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vraddhn_u64:
+; CHECK: raddhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %vraddhn2.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+  ret <2 x i32> %vraddhn2.i
+}
+
+define <16 x i8> @test_vraddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vraddhn_high_s16:
+; CHECK: raddhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vraddhn2.i.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+  %0 = bitcast <8 x i8> %r to <1 x i64>
+  %1 = bitcast <8 x i8> %vraddhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
+  ret <16 x i8> %2
+}
+
+define <8 x i16> @test_vraddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vraddhn_high_s32:
+; CHECK: raddhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vraddhn2.i.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+  %0 = bitcast <4 x i16> %r to <1 x i64>
+  %1 = bitcast <4 x i16> %vraddhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
+  ret <8 x i16> %2
+}
+
+define <4 x i32> @test_vraddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vraddhn_high_s64:
+; CHECK: raddhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %vraddhn2.i.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+  %0 = bitcast <2 x i32> %r to <1 x i64>
+  %1 = bitcast <2 x i32> %vraddhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
+  ret <4 x i32> %2
+}
+
+define <16 x i8> @test_vraddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vraddhn_high_u16:
+; CHECK: raddhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vraddhn2.i.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+  %0 = bitcast <8 x i8> %r to <1 x i64>
+  %1 = bitcast <8 x i8> %vraddhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
+  ret <16 x i8> %2
+}
+
+define <8 x i16> @test_vraddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vraddhn_high_u32:
+; CHECK: raddhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vraddhn2.i.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+  %0 = bitcast <4 x i16> %r to <1 x i64>
+  %1 = bitcast <4 x i16> %vraddhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
+  ret <8 x i16> %2
+}
+
+define <4 x i32> @test_vraddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vraddhn_high_u64:
+; CHECK: raddhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %vraddhn2.i.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+  %0 = bitcast <2 x i32> %r to <1 x i64>
+  %1 = bitcast <2 x i32> %vraddhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
+  ret <4 x i32> %2
+}
+
+define <8 x i8> @test_vsubhn_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vsubhn_s16:
+; CHECK: subhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vsubhn.i = sub <8 x i16> %a, %b
+  %vsubhn1.i = lshr <8 x i16> %vsubhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  %vsubhn2.i = trunc <8 x i16> %vsubhn1.i to <8 x i8>
+  ret <8 x i8> %vsubhn2.i
+}
+
+define <4 x i16> @test_vsubhn_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vsubhn_s32:
+; CHECK: subhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vsubhn.i = sub <4 x i32> %a, %b
+  %vsubhn1.i = lshr <4 x i32> %vsubhn.i, <i32 16, i32 16, i32 16, i32 16>
+  %vsubhn2.i = trunc <4 x i32> %vsubhn1.i to <4 x i16>
+  ret <4 x i16> %vsubhn2.i
+}
+
+define <2 x i32> @test_vsubhn_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vsubhn_s64:
+; CHECK: subhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %vsubhn.i = sub <2 x i64> %a, %b
+  %vsubhn1.i = lshr <2 x i64> %vsubhn.i, <i64 32, i64 32>
+  %vsubhn2.i = trunc <2 x i64> %vsubhn1.i to <2 x i32>
+  ret <2 x i32> %vsubhn2.i
+}
+
+define <8 x i8> @test_vsubhn_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vsubhn_u16:
+; CHECK: subhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vsubhn.i = sub <8 x i16> %a, %b
+  %vsubhn1.i = lshr <8 x i16> %vsubhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  %vsubhn2.i = trunc <8 x i16> %vsubhn1.i to <8 x i8>
+  ret <8 x i8> %vsubhn2.i
+}
+
+define <4 x i16> @test_vsubhn_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vsubhn_u32:
+; CHECK: subhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vsubhn.i = sub <4 x i32> %a, %b
+  %vsubhn1.i = lshr <4 x i32> %vsubhn.i, <i32 16, i32 16, i32 16, i32 16>
+  %vsubhn2.i = trunc <4 x i32> %vsubhn1.i to <4 x i16>
+  ret <4 x i16> %vsubhn2.i
+}
+
+define <2 x i32> @test_vsubhn_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vsubhn_u64:
+; CHECK: subhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %vsubhn.i = sub <2 x i64> %a, %b
+  %vsubhn1.i = lshr <2 x i64> %vsubhn.i, <i64 32, i64 32>
+  %vsubhn2.i = trunc <2 x i64> %vsubhn1.i to <2 x i32>
+  ret <2 x i32> %vsubhn2.i
+}
+
+define <16 x i8> @test_vsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vsubhn_high_s16:
+; CHECK: subhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vsubhn.i.i = sub <8 x i16> %a, %b
+  %vsubhn1.i.i = lshr <8 x i16> %vsubhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  %vsubhn2.i.i = trunc <8 x i16> %vsubhn1.i.i to <8 x i8>
+  %0 = bitcast <8 x i8> %r to <1 x i64>
+  %1 = bitcast <8 x i8> %vsubhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
+  ret <16 x i8> %2
+}
+
+define <8 x i16> @test_vsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vsubhn_high_s32:
+; CHECK: subhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vsubhn.i.i = sub <4 x i32> %a, %b
+  %vsubhn1.i.i = lshr <4 x i32> %vsubhn.i.i, <i32 16, i32 16, i32 16, i32 16>
+  %vsubhn2.i.i = trunc <4 x i32> %vsubhn1.i.i to <4 x i16>
+  %0 = bitcast <4 x i16> %r to <1 x i64>
+  %1 = bitcast <4 x i16> %vsubhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
+  ret <8 x i16> %2
+}
+
+define <4 x i32> @test_vsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vsubhn_high_s64:
+; CHECK: subhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %vsubhn.i.i = sub <2 x i64> %a, %b
+  %vsubhn1.i.i = lshr <2 x i64> %vsubhn.i.i, <i64 32, i64 32>
+  %vsubhn2.i.i = trunc <2 x i64> %vsubhn1.i.i to <2 x i32>
+  %0 = bitcast <2 x i32> %r to <1 x i64>
+  %1 = bitcast <2 x i32> %vsubhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
+  ret <4 x i32> %2
+}
+
+define <16 x i8> @test_vsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vsubhn_high_u16:
+; CHECK: subhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vsubhn.i.i = sub <8 x i16> %a, %b
+  %vsubhn1.i.i = lshr <8 x i16> %vsubhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  %vsubhn2.i.i = trunc <8 x i16> %vsubhn1.i.i to <8 x i8>
+  %0 = bitcast <8 x i8> %r to <1 x i64>
+  %1 = bitcast <8 x i8> %vsubhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
+  ret <16 x i8> %2
+}
+
+define <8 x i16> @test_vsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vsubhn_high_u32:
+; CHECK: subhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vsubhn.i.i = sub <4 x i32> %a, %b
+  %vsubhn1.i.i = lshr <4 x i32> %vsubhn.i.i, <i32 16, i32 16, i32 16, i32 16>
+  %vsubhn2.i.i = trunc <4 x i32> %vsubhn1.i.i to <4 x i16>
+  %0 = bitcast <4 x i16> %r to <1 x i64>
+  %1 = bitcast <4 x i16> %vsubhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
+  ret <8 x i16> %2
+}
+
+define <4 x i32> @test_vsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vsubhn_high_u64:
+; CHECK: subhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %vsubhn.i.i = sub <2 x i64> %a, %b
+  %vsubhn1.i.i = lshr <2 x i64> %vsubhn.i.i, <i64 32, i64 32>
+  %vsubhn2.i.i = trunc <2 x i64> %vsubhn1.i.i to <2 x i32>
+  %0 = bitcast <2 x i32> %r to <1 x i64>
+  %1 = bitcast <2 x i32> %vsubhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
+  ret <4 x i32> %2
+}
+
+define <8 x i8> @test_vrsubhn_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vrsubhn_s16:
+; CHECK: rsubhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vrsubhn2.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+  ret <8 x i8> %vrsubhn2.i
+}
+
+define <4 x i16> @test_vrsubhn_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vrsubhn_s32:
+; CHECK: rsubhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vrsubhn2.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i16> %vrsubhn2.i
+}
+
+define <2 x i32> @test_vrsubhn_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vrsubhn_s64:
+; CHECK: rsubhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %vrsubhn2.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+  ret <2 x i32> %vrsubhn2.i
+}
+
+define <8 x i8> @test_vrsubhn_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vrsubhn_u16:
+; CHECK: rsubhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vrsubhn2.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+  ret <8 x i8> %vrsubhn2.i
+}
+
+define <4 x i16> @test_vrsubhn_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vrsubhn_u32:
+; CHECK: rsubhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vrsubhn2.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i16> %vrsubhn2.i
+}
+
+define <2 x i32> @test_vrsubhn_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vrsubhn_u64:
+; CHECK: rsubhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %vrsubhn2.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+  ret <2 x i32> %vrsubhn2.i
+}
+
+define <16 x i8> @test_vrsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vrsubhn_high_s16:
+; CHECK: rsubhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vrsubhn2.i.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+  %0 = bitcast <8 x i8> %r to <1 x i64>
+  %1 = bitcast <8 x i8> %vrsubhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
+  ret <16 x i8> %2
+}
+
+define <8 x i16> @test_vrsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vrsubhn_high_s32:
+; CHECK: rsubhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vrsubhn2.i.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+  %0 = bitcast <4 x i16> %r to <1 x i64>
+  %1 = bitcast <4 x i16> %vrsubhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
+  ret <8 x i16> %2
+}
+
+define <4 x i32> @test_vrsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vrsubhn_high_s64:
+; CHECK: rsubhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %vrsubhn2.i.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+  %0 = bitcast <2 x i32> %r to <1 x i64>
+  %1 = bitcast <2 x i32> %vrsubhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
+  ret <4 x i32> %2
+}
+
+define <16 x i8> @test_vrsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vrsubhn_high_u16:
+; CHECK: rsubhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %vrsubhn2.i.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+  %0 = bitcast <8 x i8> %r to <1 x i64>
+  %1 = bitcast <8 x i8> %vrsubhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
+  ret <16 x i8> %2
+}
+
+define <8 x i16> @test_vrsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vrsubhn_high_u32:
+; CHECK: rsubhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %vrsubhn2.i.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+  %0 = bitcast <4 x i16> %r to <1 x i64>
+  %1 = bitcast <4 x i16> %vrsubhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
+  ret <8 x i16> %2
+}
+
+define <4 x i32> @test_vrsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vrsubhn_high_u64:
+; CHECK: rsubhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %vrsubhn2.i.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+  %0 = bitcast <2 x i32> %r to <1 x i64>
+  %1 = bitcast <2 x i32> %vrsubhn2.i.i to <1 x i64>
+  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
+  ret <4 x i32> %2
+}
+
+define <8 x i16> @test_vabdl_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK-LABEL: test_vabdl_s8:
+; CHECK: sabdl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vabd.i.i = tail call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %a, <8 x i8> %b)
+  %vmovl.i.i = zext <8 x i8> %vabd.i.i to <8 x i16>
+  ret <8 x i16> %vmovl.i.i
+}
+
+define <4 x i32> @test_vabdl_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK-LABEL: test_vabdl_s16:
+; CHECK: sabdl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vabd2.i.i = tail call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %a, <4 x i16> %b)
+  %vmovl.i.i = zext <4 x i16> %vabd2.i.i to <4 x i32>
+  ret <4 x i32> %vmovl.i.i
+}
+
+define <2 x i64> @test_vabdl_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: test_vabdl_s32:
+; CHECK: sabdl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vabd2.i.i = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %a, <2 x i32> %b)
+  %vmovl.i.i = zext <2 x i32> %vabd2.i.i to <2 x i64>
+  ret <2 x i64> %vmovl.i.i
+}
+
+define <8 x i16> @test_vabdl_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK-LABEL: test_vabdl_u8:
+; CHECK: uabdl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vabd.i.i = tail call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %a, <8 x i8> %b)
+  %vmovl.i.i = zext <8 x i8> %vabd.i.i to <8 x i16>
+  ret <8 x i16> %vmovl.i.i
+}
+
+define <4 x i32> @test_vabdl_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK-LABEL: test_vabdl_u16:
+; CHECK: uabdl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vabd2.i.i = tail call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %a, <4 x i16> %b)
+  %vmovl.i.i = zext <4 x i16> %vabd2.i.i to <4 x i32>
+  ret <4 x i32> %vmovl.i.i
+}
+
+define <2 x i64> @test_vabdl_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: test_vabdl_u32:
+; CHECK: uabdl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vabd2.i.i = tail call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %a, <2 x i32> %b)
+  %vmovl.i.i = zext <2 x i32> %vabd2.i.i to <2 x i64>
+  ret <2 x i64> %vmovl.i.i
+}
+
+define <8 x i16> @test_vabal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
+; CHECK-LABEL: test_vabal_s8:
+; CHECK: sabal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vabd.i.i.i = tail call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %b, <8 x i8> %c)
+  %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16>
+  %add.i = add <8 x i16> %vmovl.i.i.i, %a
+  ret <8 x i16> %add.i
+}
+
+define <4 x i32> @test_vabal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
+; CHECK-LABEL: test_vabal_s16:
+; CHECK: sabal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vabd2.i.i.i = tail call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %b, <4 x i16> %c)
+  %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32>
+  %add.i = add <4 x i32> %vmovl.i.i.i, %a
+  ret <4 x i32> %add.i
+}
+
+define <2 x i64> @test_vabal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
+; CHECK-LABEL: test_vabal_s32:
+; CHECK: sabal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vabd2.i.i.i = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %b, <2 x i32> %c)
+  %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64>
+  %add.i = add <2 x i64> %vmovl.i.i.i, %a
+  ret <2 x i64> %add.i
+}
+
+define <8 x i16> @test_vabal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
+; CHECK-LABEL: test_vabal_u8:
+; CHECK: uabal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vabd.i.i.i = tail call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %b, <8 x i8> %c)
+  %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16>
+  %add.i = add <8 x i16> %vmovl.i.i.i, %a
+  ret <8 x i16> %add.i
+}
+
+define <4 x i32> @test_vabal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
+; CHECK-LABEL: test_vabal_u16:
+; CHECK: uabal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vabd2.i.i.i = tail call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %b, <4 x i16> %c)
+  %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32>
+  %add.i = add <4 x i32> %vmovl.i.i.i, %a
+  ret <4 x i32> %add.i
+}
+
+define <2 x i64> @test_vabal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
+; CHECK-LABEL: test_vabal_u32:
+; CHECK: uabal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vabd2.i.i.i = tail call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %b, <2 x i32> %c)
+  %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64>
+  %add.i = add <2 x i64> %vmovl.i.i.i, %a
+  ret <2 x i64> %add.i
+}
+
+define <8 x i16> @test_vabdl_high_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vabdl_high_s8:
+; CHECK: sabdl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vabd.i.i.i = tail call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+  %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16>
+  ret <8 x i16> %vmovl.i.i.i
+}
+
+define <4 x i32> @test_vabdl_high_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vabdl_high_s16:
+; CHECK: sabdl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vabd2.i.i.i = tail call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32>
+  ret <4 x i32> %vmovl.i.i.i
+}
+
+define <2 x i64> @test_vabdl_high_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vabdl_high_s32:
+; CHECK: sabdl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vabd2.i.i.i = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64>
+  ret <2 x i64> %vmovl.i.i.i
+}
+
+define <8 x i16> @test_vabdl_high_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vabdl_high_u8:
+; CHECK: uabdl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vabd.i.i.i = tail call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+  %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16>
+  ret <8 x i16> %vmovl.i.i.i
+}
+
+define <4 x i32> @test_vabdl_high_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vabdl_high_u16:
+; CHECK: uabdl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vabd2.i.i.i = tail call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32>
+  ret <4 x i32> %vmovl.i.i.i
+}
+
+define <2 x i64> @test_vabdl_high_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vabdl_high_u32:
+; CHECK: uabdl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vabd2.i.i.i = tail call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64>
+  ret <2 x i64> %vmovl.i.i.i
+}
+
+define <8 x i16> @test_vabal_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK-LABEL: test_vabal_high_s8:
+; CHECK: sabal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vabd.i.i.i.i = tail call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+  %vmovl.i.i.i.i = zext <8 x i8> %vabd.i.i.i.i to <8 x i16>
+  %add.i.i = add <8 x i16> %vmovl.i.i.i.i, %a
+  ret <8 x i16> %add.i.i
+}
+
+define <4 x i32> @test_vabal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
+; CHECK-LABEL: test_vabal_high_s16:
+; CHECK: sabal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vabd2.i.i.i.i = tail call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  %vmovl.i.i.i.i = zext <4 x i16> %vabd2.i.i.i.i to <4 x i32>
+  %add.i.i = add <4 x i32> %vmovl.i.i.i.i, %a
+  ret <4 x i32> %add.i.i
+}
+
+define <2 x i64> @test_vabal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK-LABEL: test_vabal_high_s32:
+; CHECK: sabal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vabd2.i.i.i.i = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  %vmovl.i.i.i.i = zext <2 x i32> %vabd2.i.i.i.i to <2 x i64>
+  %add.i.i = add <2 x i64> %vmovl.i.i.i.i, %a
+  ret <2 x i64> %add.i.i
+}
+
+define <8 x i16> @test_vabal_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK-LABEL: test_vabal_high_u8:
+; CHECK: uabal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vabd.i.i.i.i = tail call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+  %vmovl.i.i.i.i = zext <8 x i8> %vabd.i.i.i.i to <8 x i16>
+  %add.i.i = add <8 x i16> %vmovl.i.i.i.i, %a
+  ret <8 x i16> %add.i.i
+}
+
+define <4 x i32> @test_vabal_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
+; CHECK-LABEL: test_vabal_high_u16:
+; CHECK: uabal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vabd2.i.i.i.i = tail call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  %vmovl.i.i.i.i = zext <4 x i16> %vabd2.i.i.i.i to <4 x i32>
+  %add.i.i = add <4 x i32> %vmovl.i.i.i.i, %a
+  ret <4 x i32> %add.i.i
+}
+
+define <2 x i64> @test_vabal_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK-LABEL: test_vabal_high_u32:
+; CHECK: uabal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vabd2.i.i.i.i = tail call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  %vmovl.i.i.i.i = zext <2 x i32> %vabd2.i.i.i.i to <2 x i64>
+  %add.i.i = add <2 x i64> %vmovl.i.i.i.i, %a
+  ret <2 x i64> %add.i.i
+}
+
+define <8 x i16> @test_vmull_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK-LABEL: test_vmull_s8:
+; CHECK: smull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vmull.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %a, <8 x i8> %b)
+  ret <8 x i16> %vmull.i
+}
+
+define <4 x i32> @test_vmull_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK-LABEL: test_vmull_s16:
+; CHECK: smull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %b)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: test_vmull_s32:
+; CHECK: smull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %b)
+  ret <2 x i64> %vmull2.i
+}
+
+define <8 x i16> @test_vmull_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK-LABEL: test_vmull_u8:
+; CHECK: umull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vmull.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %a, <8 x i8> %b)
+  ret <8 x i16> %vmull.i
+}
+
+define <4 x i32> @test_vmull_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK-LABEL: test_vmull_u16:
+; CHECK: umull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %b)
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @test_vmull_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: test_vmull_u32:
+; CHECK: umull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %b)
+  ret <2 x i64> %vmull2.i
+}
+
+define <8 x i16> @test_vmull_high_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vmull_high_s8:
+; CHECK: smull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+  ret <8 x i16> %vmull.i.i
+}
+
+define <4 x i32> @test_vmull_high_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vmull_high_s16:
+; CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  ret <4 x i32> %vmull2.i.i
+}
+
+define <2 x i64> @test_vmull_high_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vmull_high_s32:
+; CHECK: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  ret <2 x i64> %vmull2.i.i
+}
+
+define <8 x i16> @test_vmull_high_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vmull_high_u8:
+; CHECK: umull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+  ret <8 x i16> %vmull.i.i
+}
+
+define <4 x i32> @test_vmull_high_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vmull_high_u16:
+; CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  ret <4 x i32> %vmull2.i.i
+}
+
+define <2 x i64> @test_vmull_high_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vmull_high_u32:
+; CHECK: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  ret <2 x i64> %vmull2.i.i
+}
+
+define <8 x i16> @test_vmlal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
+; CHECK-LABEL: test_vmlal_s8:
+; CHECK: smlal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c)
+  %add.i = add <8 x i16> %vmull.i.i, %a
+  ret <8 x i16> %add.i
+}
+
+define <4 x i32> @test_vmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
+; CHECK-LABEL: test_vmlal_s16:
+; CHECK: smlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c)
+  %add.i = add <4 x i32> %vmull2.i.i, %a
+  ret <4 x i32> %add.i
+}
+
+define <2 x i64> @test_vmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
+; CHECK-LABEL: test_vmlal_s32:
+; CHECK: smlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c)
+  %add.i = add <2 x i64> %vmull2.i.i, %a
+  ret <2 x i64> %add.i
+}
+
+define <8 x i16> @test_vmlal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
+; CHECK-LABEL: test_vmlal_u8:
+; CHECK: umlal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c)
+  %add.i = add <8 x i16> %vmull.i.i, %a
+  ret <8 x i16> %add.i
+}
+
+define <4 x i32> @test_vmlal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
+; CHECK-LABEL: test_vmlal_u16:
+; CHECK: umlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c)
+  %add.i = add <4 x i32> %vmull2.i.i, %a
+  ret <4 x i32> %add.i
+}
+
+define <2 x i64> @test_vmlal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
+; CHECK-LABEL: test_vmlal_u32:
+; CHECK: umlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c)
+  %add.i = add <2 x i64> %vmull2.i.i, %a
+  ret <2 x i64> %add.i
+}
+
+define <8 x i16> @test_vmlal_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK-LABEL: test_vmlal_high_s8:
+; CHECK: smlal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vmull.i.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+  %add.i.i = add <8 x i16> %vmull.i.i.i, %a
+  ret <8 x i16> %add.i.i
+}
+
+define <4 x i32> @test_vmlal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
+; CHECK-LABEL: test_vmlal_high_s16:
+; CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
+  ret <4 x i32> %add.i.i
+}
+
+define <2 x i64> @test_vmlal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK-LABEL: test_vmlal_high_s32:
+; CHECK: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
+  ret <2 x i64> %add.i.i
+}
+
+define <8 x i16> @test_vmlal_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK-LABEL: test_vmlal_high_u8:
+; CHECK: umlal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vmull.i.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+  %add.i.i = add <8 x i16> %vmull.i.i.i, %a
+  ret <8 x i16> %add.i.i
+}
+
+define <4 x i32> @test_vmlal_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
+; CHECK-LABEL: test_vmlal_high_u16:
+; CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
+  ret <4 x i32> %add.i.i
+}
+
+define <2 x i64> @test_vmlal_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK-LABEL: test_vmlal_high_u32:
+; CHECK: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
+  ret <2 x i64> %add.i.i
+}
+
+define <8 x i16> @test_vmlsl_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
+; CHECK-LABEL: test_vmlsl_s8:
+; CHECK: smlsl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c)
+  %sub.i = sub <8 x i16> %a, %vmull.i.i
+  ret <8 x i16> %sub.i
+}
+
+define <4 x i32> @test_vmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
+; CHECK-LABEL: test_vmlsl_s16:
+; CHECK: smlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c)
+  %sub.i = sub <4 x i32> %a, %vmull2.i.i
+  ret <4 x i32> %sub.i
+}
+
+define <2 x i64> @test_vmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
+; CHECK-LABEL: test_vmlsl_s32:
+; CHECK: smlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c)
+  %sub.i = sub <2 x i64> %a, %vmull2.i.i
+  ret <2 x i64> %sub.i
+}
+
+define <8 x i16> @test_vmlsl_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
+; CHECK-LABEL: test_vmlsl_u8:
+; CHECK: umlsl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c)
+  %sub.i = sub <8 x i16> %a, %vmull.i.i
+  ret <8 x i16> %sub.i
+}
+
+define <4 x i32> @test_vmlsl_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
+; CHECK-LABEL: test_vmlsl_u16:
+; CHECK: umlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c)
+  %sub.i = sub <4 x i32> %a, %vmull2.i.i
+  ret <4 x i32> %sub.i
+}
+
+define <2 x i64> @test_vmlsl_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
+; CHECK-LABEL: test_vmlsl_u32:
+; CHECK: umlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c)
+  %sub.i = sub <2 x i64> %a, %vmull2.i.i
+  ret <2 x i64> %sub.i
+}
+
+define <8 x i16> @test_vmlsl_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK-LABEL: test_vmlsl_high_s8:
+; CHECK: smlsl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vmull.i.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+  %sub.i.i = sub <8 x i16> %a, %vmull.i.i.i
+  ret <8 x i16> %sub.i.i
+}
+
+define <4 x i32> @test_vmlsl_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
+; CHECK-LABEL: test_vmlsl_high_s16:
+; CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
+  ret <4 x i32> %sub.i.i
+}
+
+define <2 x i64> @test_vmlsl_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK-LABEL: test_vmlsl_high_s32:
+; CHECK: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
+  ret <2 x i64> %sub.i.i
+}
+
+define <8 x i16> @test_vmlsl_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK-LABEL: test_vmlsl_high_u8:
+; CHECK: umlsl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vmull.i.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+  %sub.i.i = sub <8 x i16> %a, %vmull.i.i.i
+  ret <8 x i16> %sub.i.i
+}
+
+define <4 x i32> @test_vmlsl_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
+; CHECK-LABEL: test_vmlsl_high_u16:
+; CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
+  ret <4 x i32> %sub.i.i
+}
+
+define <2 x i64> @test_vmlsl_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK-LABEL: test_vmlsl_high_u32:
+; CHECK: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
+  ret <2 x i64> %sub.i.i
+}
+
+define <4 x i32> @test_vqdmull_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK-LABEL: test_vqdmull_s16:
+; CHECK: sqdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %b)
+  ret <4 x i32> %vqdmull2.i
+}
+
+define <2 x i64> @test_vqdmull_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: test_vqdmull_s32:
+; CHECK: sqdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %b)
+  ret <2 x i64> %vqdmull2.i
+}
+
+define <4 x i32> @test_vqdmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
+; CHECK-LABEL: test_vqdmlal_s16:
+; CHECK: sqdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
+  %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
+  ret <4 x i32> %vqdmlal4.i
+}
+
+define <2 x i64> @test_vqdmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
+; CHECK-LABEL: test_vqdmlal_s32:
+; CHECK: sqdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
+  %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
+  ret <2 x i64> %vqdmlal4.i
+}
+
+define <4 x i32> @test_vqdmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
+; CHECK-LABEL: test_vqdmlsl_s16:
+; CHECK: sqdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+entry:
+  %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
+  %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
+  ret <4 x i32> %vqdmlsl4.i
+}
+
+define <2 x i64> @test_vqdmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
+; CHECK-LABEL: test_vqdmlsl_s32:
+; CHECK: sqdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
+  %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
+  ret <2 x i64> %vqdmlsl4.i
+}
+
+define <4 x i32> @test_vqdmull_high_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vqdmull_high_s16:
+; CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vqdmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  ret <4 x i32> %vqdmull2.i.i
+}
+
+define <2 x i64> @test_vqdmull_high_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vqdmull_high_s32:
+; CHECK: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vqdmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  ret <2 x i64> %vqdmull2.i.i
+}
+
+define <4 x i32> @test_vqdmlal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
+; CHECK-LABEL: test_vqdmlal_high_s16:
+; CHECK: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vqdmlal2.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  %vqdmlal4.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i.i)
+  ret <4 x i32> %vqdmlal4.i.i
+}
+
+define <2 x i64> @test_vqdmlal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK-LABEL: test_vqdmlal_high_s32:
+; CHECK: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vqdmlal2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  %vqdmlal4.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i.i)
+  ret <2 x i64> %vqdmlal4.i.i
+}
+
+define <4 x i32> @test_vqdmlsl_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
+; CHECK-LABEL: test_vqdmlsl_high_s16:
+; CHECK: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+entry:
+  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vqdmlsl2.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
+  %vqdmlsl4.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i.i)
+  ret <4 x i32> %vqdmlsl4.i.i
+}
+
+define <2 x i64> @test_vqdmlsl_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK-LABEL: test_vqdmlsl_high_s32:
+; CHECK: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+entry:
+  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %vqdmlsl2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
+  %vqdmlsl4.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i.i)
+  ret <2 x i64> %vqdmlsl4.i.i
+}
+
+define <8 x i16> @test_vmull_p8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK-LABEL: test_vmull_p8:
+; CHECK: pmull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vmull.i = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %a, <8 x i8> %b)
+  ret <8 x i16> %vmull.i
+}
+
+define <8 x i16> @test_vmull_high_p8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vmull_high_p8:
+; CHECK: pmull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
+  ret <8 x i16> %vmull.i.i
+}
+
+define i128 @test_vmull_p64(i64 %a, i64 %b) #4 {
+; CHECK-LABEL: test_vmull_p64
+; CHECK: pmull {{v[0-9]+}}.1q, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d
+entry:
+  %vmull2.i = tail call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %a, i64 %b)
+  %vmull3.i = bitcast <16 x i8> %vmull2.i to i128
+  ret i128 %vmull3.i
+}
+
+define i128 @test_vmull_high_p64(<2 x i64> %a, <2 x i64> %b) #4 {
+; CHECK-LABEL: test_vmull_high_p64
+; CHECK: pmull2 {{v[0-9]+}}.1q, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %0 = extractelement <2 x i64> %a, i32 1
+  %1 = extractelement <2 x i64> %b, i32 1
+  %vmull2.i.i = tail call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %0, i64 %1) #1
+  %vmull3.i.i = bitcast <16 x i8> %vmull2.i.i to i128
+  ret i128 %vmull3.i.i
+}
+
+declare <16 x i8> @llvm.aarch64.neon.pmull64(i64, i64) #5
+
+
diff --git a/test/CodeGen/AArch64/arm64-neon-aba-abd.ll b/test/CodeGen/AArch64/arm64-neon-aba-abd.ll
new file mode 100644
index 0000000..6404ab7
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-neon-aba-abd.ll
@@ -0,0 +1,236 @@
+; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+declare <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8>, <8 x i8>)
+declare <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8>, <8 x i8>)
+
+define <8 x i8> @test_uabd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
+; CHECK: test_uabd_v8i8:
+  %abd = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
+; CHECK: uabd v0.8b, v0.8b, v1.8b
+  ret <8 x i8> %abd
+}
+
+define <8 x i8> @test_uaba_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
+; CHECK: test_uaba_v8i8:
+  %abd = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
+  %aba = add <8 x i8> %lhs, %abd
+; CHECK: uaba v0.8b, v0.8b, v1.8b
+  ret <8 x i8> %aba
+}
+
+define <8 x i8> @test_sabd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
+; CHECK: test_sabd_v8i8:
+  %abd = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
+; CHECK: sabd v0.8b, v0.8b, v1.8b
+  ret <8 x i8> %abd
+}
+
+define <8 x i8> @test_saba_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
+; CHECK: test_saba_v8i8:
+  %abd = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
+  %aba = add <8 x i8> %lhs, %abd
+; CHECK: saba v0.8b, v0.8b, v1.8b
+  ret <8 x i8> %aba
+}
+
+declare <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8>, <16 x i8>)
+
+define <16 x i8> @test_uabd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
+; CHECK: test_uabd_v16i8:
+  %abd = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
+; CHECK: uabd v0.16b, v0.16b, v1.16b
+  ret <16 x i8> %abd
+}
+
+define <16 x i8> @test_uaba_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
+; CHECK: test_uaba_v16i8:
+  %abd = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
+  %aba = add <16 x i8> %lhs, %abd
+; CHECK: uaba v0.16b, v0.16b, v1.16b
+  ret <16 x i8> %aba
+}
+
+define <16 x i8> @test_sabd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
+; CHECK: test_sabd_v16i8:
+  %abd = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
+; CHECK: sabd v0.16b, v0.16b, v1.16b
+  ret <16 x i8> %abd
+}
+
+define <16 x i8> @test_saba_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
+; CHECK: test_saba_v16i8:
+  %abd = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
+  %aba = add <16 x i8> %lhs, %abd
+; CHECK: saba v0.16b, v0.16b, v1.16b
+  ret <16 x i8> %aba
+}
+
+declare <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16>, <4 x i16>)
+declare <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16>, <4 x i16>)
+
+define <4 x i16> @test_uabd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: test_uabd_v4i16:
+  %abd = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
+; CHECK: uabd v0.4h, v0.4h, v1.4h
+  ret <4 x i16> %abd
+}
+
+define <4 x i16> @test_uaba_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: test_uaba_v4i16:
+  %abd = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
+  %aba = add <4 x i16> %lhs, %abd
+; CHECK: uaba v0.4h, v0.4h, v1.4h
+  ret <4 x i16> %aba
+}
+
+define <4 x i16> @test_sabd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: test_sabd_v4i16:
+  %abd = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
+; CHECK: sabd v0.4h, v0.4h, v1.4h
+  ret <4 x i16> %abd
+}
+
+define <4 x i16> @test_saba_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: test_saba_v4i16:
+  %abd = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
+  %aba = add <4 x i16> %lhs, %abd
+; CHECK: saba v0.4h, v0.4h, v1.4h
+  ret <4 x i16> %aba
+}
+
+declare <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16>, <8 x i16>)
+
+define <8 x i16> @test_uabd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
+; CHECK: test_uabd_v8i16:
+  %abd = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
+; CHECK: uabd v0.8h, v0.8h, v1.8h
+  ret <8 x i16> %abd
+}
+
+define <8 x i16> @test_uaba_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
+; CHECK: test_uaba_v8i16:
+  %abd = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
+  %aba = add <8 x i16> %lhs, %abd
+; CHECK: uaba v0.8h, v0.8h, v1.8h
+  ret <8 x i16> %aba
+}
+
+define <8 x i16> @test_sabd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
+; CHECK: test_sabd_v8i16:
+  %abd = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
+; CHECK: sabd v0.8h, v0.8h, v1.8h
+  ret <8 x i16> %abd
+}
+
+define <8 x i16> @test_saba_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
+; CHECK: test_saba_v8i16:
+  %abd = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
+  %aba = add <8 x i16> %lhs, %abd
+; CHECK: saba v0.8h, v0.8h, v1.8h
+  ret <8 x i16> %aba
+}
+
+declare <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32>, <2 x i32>)
+declare <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32>, <2 x i32>)
+
+define <2 x i32> @test_uabd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
+; CHECK: test_uabd_v2i32:
+  %abd = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
+; CHECK: uabd v0.2s, v0.2s, v1.2s
+  ret <2 x i32> %abd
+}
+
+define <2 x i32> @test_uaba_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
+; CHECK: test_uaba_v2i32:
+  %abd = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
+  %aba = add <2 x i32> %lhs, %abd
+; CHECK: uaba v0.2s, v0.2s, v1.2s
+  ret <2 x i32> %aba
+}
+
+define <2 x i32> @test_sabd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
+; CHECK: test_sabd_v2i32:
+  %abd = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
+; CHECK: sabd v0.2s, v0.2s, v1.2s
+  ret <2 x i32> %abd
+}
+
+define <2 x i32> @test_sabd_v2i32_const() {
+; CHECK: test_sabd_v2i32_const:
+; CHECK: movi     d1, #0x00ffffffff0000
+; CHECK-NEXT: sabd v0.2s, v0.2s, v1.2s
+  %1 = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(
+    <2 x i32> <i32 -2147483648, i32 2147450880>,
+    <2 x i32> <i32 -65536, i32 65535>)
+  ret <2 x i32> %1
+}
+
+define <2 x i32> @test_saba_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
+; CHECK: test_saba_v2i32:
+  %abd = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
+  %aba = add <2 x i32> %lhs, %abd
+; CHECK: saba v0.2s, v0.2s, v1.2s
+  ret <2 x i32> %aba
+}
+
+declare <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32>, <4 x i32>)
+
+define <4 x i32> @test_uabd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
+; CHECK: test_uabd_v4i32:
+  %abd = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
+; CHECK: uabd v0.4s, v0.4s, v1.4s
+  ret <4 x i32> %abd
+}
+
+define <4 x i32> @test_uaba_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
+; CHECK: test_uaba_v4i32:
+  %abd = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
+  %aba = add <4 x i32> %lhs, %abd
+; CHECK: uaba v0.4s, v0.4s, v1.4s
+  ret <4 x i32> %aba
+}
+
+define <4 x i32> @test_sabd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
+; CHECK: test_sabd_v4i32:
+  %abd = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
+; CHECK: sabd v0.4s, v0.4s, v1.4s
+  ret <4 x i32> %abd
+}
+
+define <4 x i32> @test_saba_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
+; CHECK: test_saba_v4i32:
+  %abd = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
+  %aba = add <4 x i32> %lhs, %abd
+; CHECK: saba v0.4s, v0.4s, v1.4s
+  ret <4 x i32> %aba
+}
+
+declare <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float>, <2 x float>)
+
+define <2 x float> @test_fabd_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
+; CHECK: test_fabd_v2f32:
+  %abd = call <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float> %lhs, <2 x float> %rhs)
+; CHECK: fabd v0.2s, v0.2s, v1.2s
+  ret <2 x float> %abd
+}
+
+declare <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float>, <4 x float>)
+
+define <4 x float> @test_fabd_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
+; CHECK: test_fabd_v4f32:
+  %abd = call <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float> %lhs, <4 x float> %rhs)
+; CHECK: fabd v0.4s, v0.4s, v1.4s
+  ret <4 x float> %abd
+}
+
+declare <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double>, <2 x double>)
+
+define <2 x double> @test_fabd_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
+; CHECK: test_fabd_v2f64:
+  %abd = call <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double> %lhs, <2 x double> %rhs)
+; CHECK: fabd v0.2d, v0.2d, v1.2d
+  ret <2 x double> %abd
+}
diff --git a/test/CodeGen/AArch64/arm64-neon-across.ll b/test/CodeGen/AArch64/arm64-neon-across.ll
new file mode 100644
index 0000000..3a63673
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-neon-across.ll
@@ -0,0 +1,460 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+declare float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float>)
+
+declare float @llvm.aarch64.neon.fmaxnmv.f32.v4f32(<4 x float>)
+
+declare float @llvm.aarch64.neon.fminv.f32.v4f32(<4 x float>)
+
+declare float @llvm.aarch64.neon.fmaxv.f32.v4f32(<4 x float>)
+
+declare i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32>)
+
+declare i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16>)
+
+declare i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8>)
+
+declare i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16>)
+
+declare i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8>)
+
+declare i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32>)
+
+declare i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16>)
+
+declare i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8>)
+
+declare i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32>)
+
+declare i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16>)
+
+declare i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8>)
+
+declare i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16>)
+
+declare i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8>)
+
+declare i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16>)
+
+declare i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8>)
+
+declare i32 @llvm.aarch64.neon.umaxv.i32.v4i32(<4 x i32>)
+
+declare i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16>)
+
+declare i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8>)
+
+declare i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32>)
+
+declare i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16>)
+
+declare i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8>)
+
+declare i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16>)
+
+declare i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8>)
+
+declare i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16>)
+
+declare i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8>)
+
+declare i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32>)
+
+declare i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16>)
+
+declare i32 @llvm.aarch64.neon.uaddlv.i32.v16i8(<16 x i8>)
+
+declare i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32>)
+
+declare i32 @llvm.aarch64.neon.saddlv.i32.v8i16(<8 x i16>)
+
+declare i32 @llvm.aarch64.neon.saddlv.i32.v16i8(<16 x i8>)
+
+declare i32 @llvm.aarch64.neon.uaddlv.i32.v4i16(<4 x i16>)
+
+declare i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8>)
+
+declare i32 @llvm.aarch64.neon.saddlv.i32.v4i16(<4 x i16>)
+
+declare i32 @llvm.aarch64.neon.saddlv.i32.v8i8(<8 x i8>)
+
+define i16 @test_vaddlv_s8(<8 x i8> %a) {
+; CHECK: test_vaddlv_s8:
+; CHECK: saddlv h{{[0-9]+}}, {{v[0-9]+}}.8b
+entry:
+  %saddlvv.i = tail call i32 @llvm.aarch64.neon.saddlv.i32.v8i8(<8 x i8> %a)
+  %0 = trunc i32 %saddlvv.i to i16
+  ret i16 %0
+}
+
+define i32 @test_vaddlv_s16(<4 x i16> %a) {
+; CHECK: test_vaddlv_s16:
+; CHECK: saddlv s{{[0-9]+}}, {{v[0-9]+}}.4h
+entry:
+  %saddlvv.i = tail call i32 @llvm.aarch64.neon.saddlv.i32.v4i16(<4 x i16> %a)
+  ret i32 %saddlvv.i
+}
+
+define i16 @test_vaddlv_u8(<8 x i8> %a) {
+; CHECK: test_vaddlv_u8:
+; CHECK: uaddlv h{{[0-9]+}}, {{v[0-9]+}}.8b
+entry:
+  %uaddlvv.i = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8> %a)
+  %0 = trunc i32 %uaddlvv.i to i16
+  ret i16 %0
+}
+
+define i32 @test_vaddlv_u16(<4 x i16> %a) {
+; CHECK: test_vaddlv_u16:
+; CHECK: uaddlv s{{[0-9]+}}, {{v[0-9]+}}.4h
+entry:
+  %uaddlvv.i = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v4i16(<4 x i16> %a)
+  ret i32 %uaddlvv.i
+}
+
+define i16 @test_vaddlvq_s8(<16 x i8> %a) {
+; CHECK: test_vaddlvq_s8:
+; CHECK: saddlv h{{[0-9]+}}, {{v[0-9]+}}.16b
+entry:
+  %saddlvv.i = tail call i32 @llvm.aarch64.neon.saddlv.i32.v16i8(<16 x i8> %a)
+  %0 = trunc i32 %saddlvv.i to i16
+  ret i16 %0
+}
+
+define i32 @test_vaddlvq_s16(<8 x i16> %a) {
+; CHECK: test_vaddlvq_s16:
+; CHECK: saddlv s{{[0-9]+}}, {{v[0-9]+}}.8h
+entry:
+  %saddlvv.i = tail call i32 @llvm.aarch64.neon.saddlv.i32.v8i16(<8 x i16> %a)
+  ret i32 %saddlvv.i
+}
+
+define i64 @test_vaddlvq_s32(<4 x i32> %a) {
+; CHECK: test_vaddlvq_s32:
+; CHECK: saddlv d{{[0-9]+}}, {{v[0-9]+}}.4s
+entry:
+  %saddlvv.i = tail call i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32> %a)
+  ret i64 %saddlvv.i
+}
+
+define i16 @test_vaddlvq_u8(<16 x i8> %a) {
+; CHECK: test_vaddlvq_u8:
+; CHECK: uaddlv h{{[0-9]+}}, {{v[0-9]+}}.16b
+entry:
+  %uaddlvv.i = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v16i8(<16 x i8> %a)
+  %0 = trunc i32 %uaddlvv.i to i16
+  ret i16 %0
+}
+
+define i32 @test_vaddlvq_u16(<8 x i16> %a) {
+; CHECK: test_vaddlvq_u16:
+; CHECK: uaddlv s{{[0-9]+}}, {{v[0-9]+}}.8h
+entry:
+  %uaddlvv.i = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16> %a)
+  ret i32 %uaddlvv.i
+}
+
+define i64 @test_vaddlvq_u32(<4 x i32> %a) {
+; CHECK: test_vaddlvq_u32:
+; CHECK: uaddlv d{{[0-9]+}}, {{v[0-9]+}}.4s
+entry:
+  %uaddlvv.i = tail call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> %a)
+  ret i64 %uaddlvv.i
+}
+
+define i8 @test_vmaxv_s8(<8 x i8> %a) {
+; CHECK: test_vmaxv_s8:
+; CHECK: smaxv b{{[0-9]+}}, {{v[0-9]+}}.8b
+entry:
+  %smaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> %a)
+  %0 = trunc i32 %smaxv.i to i8
+  ret i8 %0
+}
+
+define i16 @test_vmaxv_s16(<4 x i16> %a) {
+; CHECK: test_vmaxv_s16:
+; CHECK: smaxv h{{[0-9]+}}, {{v[0-9]+}}.4h
+entry:
+  %smaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> %a)
+  %0 = trunc i32 %smaxv.i to i16
+  ret i16 %0
+}
+
+define i8 @test_vmaxv_u8(<8 x i8> %a) {
+; CHECK: test_vmaxv_u8:
+; CHECK: umaxv b{{[0-9]+}}, {{v[0-9]+}}.8b
+entry:
+  %umaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8> %a)
+  %0 = trunc i32 %umaxv.i to i8
+  ret i8 %0
+}
+
+define i16 @test_vmaxv_u16(<4 x i16> %a) {
+; CHECK: test_vmaxv_u16:
+; CHECK: umaxv h{{[0-9]+}}, {{v[0-9]+}}.4h
+entry:
+  %umaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16> %a)
+  %0 = trunc i32 %umaxv.i to i16
+  ret i16 %0
+}
+
+define i8 @test_vmaxvq_s8(<16 x i8> %a) {
+; CHECK: test_vmaxvq_s8:
+; CHECK: smaxv b{{[0-9]+}}, {{v[0-9]+}}.16b
+entry:
+  %smaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> %a)
+  %0 = trunc i32 %smaxv.i to i8
+  ret i8 %0
+}
+
+define i16 @test_vmaxvq_s16(<8 x i16> %a) {
+; CHECK: test_vmaxvq_s16:
+; CHECK: smaxv h{{[0-9]+}}, {{v[0-9]+}}.8h
+entry:
+  %smaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> %a)
+  %0 = trunc i32 %smaxv.i to i16
+  ret i16 %0
+}
+
+define i32 @test_vmaxvq_s32(<4 x i32> %a) {
+; CHECK: test_vmaxvq_s32:
+; CHECK: smaxv s{{[0-9]+}}, {{v[0-9]+}}.4s
+entry:
+  %smaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> %a)
+  ret i32 %smaxv.i
+}
+
+define i8 @test_vmaxvq_u8(<16 x i8> %a) {
+; CHECK: test_vmaxvq_u8:
+; CHECK: umaxv b{{[0-9]+}}, {{v[0-9]+}}.16b
+entry:
+  %umaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8> %a)
+  %0 = trunc i32 %umaxv.i to i8
+  ret i8 %0
+}
+
+define i16 @test_vmaxvq_u16(<8 x i16> %a) {
+; CHECK: test_vmaxvq_u16:
+; CHECK: umaxv h{{[0-9]+}}, {{v[0-9]+}}.8h
+entry:
+  %umaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16> %a)
+  %0 = trunc i32 %umaxv.i to i16
+  ret i16 %0
+}
+
+define i32 @test_vmaxvq_u32(<4 x i32> %a) {
+; CHECK: test_vmaxvq_u32:
+; CHECK: umaxv s{{[0-9]+}}, {{v[0-9]+}}.4s
+entry:
+  %umaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v4i32(<4 x i32> %a)
+  ret i32 %umaxv.i
+}
+
+define i8 @test_vminv_s8(<8 x i8> %a) {
+; CHECK: test_vminv_s8:
+; CHECK: sminv b{{[0-9]+}}, {{v[0-9]+}}.8b
+entry:
+  %sminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8> %a)
+  %0 = trunc i32 %sminv.i to i8
+  ret i8 %0
+}
+
+define i16 @test_vminv_s16(<4 x i16> %a) {
+; CHECK: test_vminv_s16:
+; CHECK: sminv h{{[0-9]+}}, {{v[0-9]+}}.4h
+entry:
+  %sminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16> %a)
+  %0 = trunc i32 %sminv.i to i16
+  ret i16 %0
+}
+
+define i8 @test_vminv_u8(<8 x i8> %a) {
+; CHECK: test_vminv_u8:
+; CHECK: uminv b{{[0-9]+}}, {{v[0-9]+}}.8b
+entry:
+  %uminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8> %a)
+  %0 = trunc i32 %uminv.i to i8
+  ret i8 %0
+}
+
+define i16 @test_vminv_u16(<4 x i16> %a) {
+; CHECK: test_vminv_u16:
+; CHECK: uminv h{{[0-9]+}}, {{v[0-9]+}}.4h
+entry:
+  %uminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16> %a)
+  %0 = trunc i32 %uminv.i to i16
+  ret i16 %0
+}
+
+define i8 @test_vminvq_s8(<16 x i8> %a) {
+; CHECK: test_vminvq_s8:
+; CHECK: sminv b{{[0-9]+}}, {{v[0-9]+}}.16b
+entry:
+  %sminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8> %a)
+  %0 = trunc i32 %sminv.i to i8
+  ret i8 %0
+}
+
+define i16 @test_vminvq_s16(<8 x i16> %a) {
+; CHECK: test_vminvq_s16:
+; CHECK: sminv h{{[0-9]+}}, {{v[0-9]+}}.8h
+entry:
+  %sminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16> %a)
+  %0 = trunc i32 %sminv.i to i16
+  ret i16 %0
+}
+
+define i32 @test_vminvq_s32(<4 x i32> %a) {
+; CHECK: test_vminvq_s32:
+; CHECK: sminv s{{[0-9]+}}, {{v[0-9]+}}.4s
+entry:
+  %sminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32> %a)
+  ret i32 %sminv.i
+}
+
+define i8 @test_vminvq_u8(<16 x i8> %a) {
+; CHECK: test_vminvq_u8:
+; CHECK: uminv b{{[0-9]+}}, {{v[0-9]+}}.16b
+entry:
+  %uminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8> %a)
+  %0 = trunc i32 %uminv.i to i8
+  ret i8 %0
+}
+
+define i16 @test_vminvq_u16(<8 x i16> %a) {
+; CHECK: test_vminvq_u16:
+; CHECK: uminv h{{[0-9]+}}, {{v[0-9]+}}.8h
+entry:
+  %uminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16> %a)
+  %0 = trunc i32 %uminv.i to i16
+  ret i16 %0
+}
+
+define i32 @test_vminvq_u32(<4 x i32> %a) {
+; CHECK: test_vminvq_u32:
+; CHECK: uminv s{{[0-9]+}}, {{v[0-9]+}}.4s
+entry:
+  %uminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32> %a)
+  ret i32 %uminv.i
+}
+
+define i8 @test_vaddv_s8(<8 x i8> %a) {
+; CHECK: test_vaddv_s8:
+; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.8b
+entry:
+  %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8> %a)
+  %0 = trunc i32 %vaddv.i to i8
+  ret i8 %0
+}
+
+define i16 @test_vaddv_s16(<4 x i16> %a) {
+; CHECK: test_vaddv_s16:
+; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.4h
+entry:
+  %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16> %a)
+  %0 = trunc i32 %vaddv.i to i16
+  ret i16 %0
+}
+
+define i8 @test_vaddv_u8(<8 x i8> %a) {
+; CHECK: test_vaddv_u8:
+; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.8b
+entry:
+  %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8> %a)
+  %0 = trunc i32 %vaddv.i to i8
+  ret i8 %0
+}
+
+define i16 @test_vaddv_u16(<4 x i16> %a) {
+; CHECK: test_vaddv_u16:
+; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.4h
+entry:
+  %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16> %a)
+  %0 = trunc i32 %vaddv.i to i16
+  ret i16 %0
+}
+
+define i8 @test_vaddvq_s8(<16 x i8> %a) {
+; CHECK: test_vaddvq_s8:
+; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.16b
+entry:
+  %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8> %a)
+  %0 = trunc i32 %vaddv.i to i8
+  ret i8 %0
+}
+
+define i16 @test_vaddvq_s16(<8 x i16> %a) {
+; CHECK: test_vaddvq_s16:
+; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.8h
+entry:
+  %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16> %a)
+  %0 = trunc i32 %vaddv.i to i16
+  ret i16 %0
+}
+
+define i32 @test_vaddvq_s32(<4 x i32> %a) {
+; CHECK: test_vaddvq_s32:
+; CHECK: addv s{{[0-9]+}}, {{v[0-9]+}}.4s
+entry:
+  %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32> %a)
+  ret i32 %vaddv.i
+}
+
+define i8 @test_vaddvq_u8(<16 x i8> %a) {
+; CHECK: test_vaddvq_u8:
+; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.16b
+entry:
+  %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8> %a)
+  %0 = trunc i32 %vaddv.i to i8
+  ret i8 %0
+}
+
+define i16 @test_vaddvq_u16(<8 x i16> %a) {
+; CHECK: test_vaddvq_u16:
+; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.8h
+entry:
+  %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16> %a)
+  %0 = trunc i32 %vaddv.i to i16
+  ret i16 %0
+}
+
+define i32 @test_vaddvq_u32(<4 x i32> %a) {
+; CHECK: test_vaddvq_u32:
+; CHECK: addv s{{[0-9]+}}, {{v[0-9]+}}.4s
+entry:
+  %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32> %a)
+  ret i32 %vaddv.i
+}
+
+define float @test_vmaxvq_f32(<4 x float> %a) {
+; CHECK: test_vmaxvq_f32:
+; CHECK: fmaxv s{{[0-9]+}}, {{v[0-9]+}}.4s
+entry:
+  %0 = call float @llvm.aarch64.neon.fmaxv.f32.v4f32(<4 x float> %a)
+  ret float %0
+}
+
+define float @test_vminvq_f32(<4 x float> %a) {
+; CHECK: test_vminvq_f32:
+; CHECK: fminv s{{[0-9]+}}, {{v[0-9]+}}.4s
+entry:
+  %0 = call float @llvm.aarch64.neon.fminv.f32.v4f32(<4 x float> %a)
+  ret float %0
+}
+
+define float @test_vmaxnmvq_f32(<4 x float> %a) {
+; CHECK: test_vmaxnmvq_f32:
+; CHECK: fmaxnmv s{{[0-9]+}}, {{v[0-9]+}}.4s
+entry:
+  %0 = call float @llvm.aarch64.neon.fmaxnmv.f32.v4f32(<4 x float> %a)
+  ret float %0
+}
+
+define float @test_vminnmvq_f32(<4 x float> %a) {
+; CHECK: test_vminnmvq_f32:
+; CHECK: fminnmv s{{[0-9]+}}, {{v[0-9]+}}.4s
+entry:
+  %0 = call float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float> %a)
+  ret float %0
+}
+
diff --git a/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll b/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll
new file mode 100644
index 0000000..d3dc1b8
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll
@@ -0,0 +1,100 @@
+; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+declare <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8>, <8 x i8>)
+
+define <8 x i8> @test_addp_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; CHECK: test_addp_v8i8:
+  %tmp1 = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
+; CHECK: addp v0.8b, v0.8b, v1.8b
+  ret <8 x i8> %tmp1
+}
+
+declare <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8>, <16 x i8>)
+
+define <16 x i8> @test_addp_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
+; CHECK: test_addp_v16i8:
+  %tmp1 = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
+; CHECK: addp v0.16b, v0.16b, v1.16b
+  ret <16 x i8> %tmp1
+}
+
+declare <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16>, <4 x i16>)
+
+define <4 x i16> @test_addp_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: test_addp_v4i16:
+  %tmp1 = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
+; CHECK: addp v0.4h, v0.4h, v1.4h
+  ret <4 x i16> %tmp1
+}
+
+declare <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16>, <8 x i16>)
+
+define <8 x i16> @test_addp_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
+; CHECK: test_addp_v8i16:
+  %tmp1 = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
+; CHECK: addp v0.8h, v0.8h, v1.8h
+  ret <8 x i16> %tmp1
+}
+
+declare <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32>, <2 x i32>)
+
+define <2 x i32> @test_addp_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
+; CHECK: test_addp_v2i32:
+  %tmp1 = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
+; CHECK: addp v0.2s, v0.2s, v1.2s
+  ret <2 x i32> %tmp1
+}
+
+declare <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32>, <4 x i32>)
+
+define <4 x i32> @test_addp_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
+; CHECK: test_addp_v4i32:
+  %tmp1 = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
+; CHECK: addp v0.4s, v0.4s, v1.4s
+  ret <4 x i32> %tmp1
+}
+
+
+declare <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64>, <2 x i64>)
+
+define <2 x i64> @test_addp_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
+; CHECK: test_addp_v2i64:
+        %val = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
+; CHECK: addp v0.2d, v0.2d, v1.2d
+        ret <2 x i64> %val
+}
+
+declare <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float>, <2 x float>)
+declare <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float>, <4 x float>)
+declare <2 x double> @llvm.aarch64.neon.addp.v2f64(<2 x double>, <2 x double>)
+
+define <2 x float> @test_faddp_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
+; CHECK: test_faddp_v2f32:
+        %val = call <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float> %lhs, <2 x float> %rhs)
+; CHECK: faddp v0.2s, v0.2s, v1.2s
+        ret <2 x float> %val
+}
+
+define <4 x float> @test_faddp_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
+; CHECK: test_faddp_v4f32:
+        %val = call <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float> %lhs, <4 x float> %rhs)
+; CHECK: faddp v0.4s, v0.4s, v1.4s
+        ret <4 x float> %val
+}
+
+define <2 x double> @test_faddp_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
+; CHECK: test_faddp_v2f64:
+        %val = call <2 x double> @llvm.aarch64.neon.addp.v2f64(<2 x double> %lhs, <2 x double> %rhs)
+; CHECK: faddp v0.2d, v0.2d, v1.2d
+        ret <2 x double> %val
+}
+
+define i32 @test_vaddv.v2i32(<2 x i32> %a) {
+; CHECK-LABEL: test_vaddv.v2i32
+; CHECK: addp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+  %1 = tail call i32 @llvm.aarch64.neon.saddv.i32.v2i32(<2 x i32> %a)
+  ret i32 %1
+}
+
+declare i32 @llvm.aarch64.neon.saddv.i32.v2i32(<2 x i32>)
diff --git a/test/CodeGen/AArch64/arm64-neon-add-sub.ll b/test/CodeGen/AArch64/arm64-neon-add-sub.ll
new file mode 100644
index 0000000..fbde606
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-neon-add-sub.ll
@@ -0,0 +1,237 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -aarch64-simd-scalar| FileCheck %s
+
+define <8 x i8> @add8xi8(<8 x i8> %A, <8 x i8> %B) {
+;CHECK: add {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+	%tmp3 = add <8 x i8> %A, %B;
+	ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @add16xi8(<16 x i8> %A, <16 x i8> %B) {
+;CHECK: add {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+	%tmp3 = add <16 x i8> %A, %B;
+	ret <16 x i8> %tmp3
+}
+
+define <4 x i16> @add4xi16(<4 x i16> %A, <4 x i16> %B) {
+;CHECK: add {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+	%tmp3 = add <4 x i16> %A, %B;
+	ret <4 x i16> %tmp3
+}
+
+define <8 x i16> @add8xi16(<8 x i16> %A, <8 x i16> %B) {
+;CHECK: add {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+	%tmp3 = add <8 x i16> %A, %B;
+	ret <8 x i16> %tmp3
+}
+
+define <2 x i32> @add2xi32(<2 x i32> %A, <2 x i32> %B) {
+;CHECK: add {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+	%tmp3 = add <2 x i32> %A, %B;
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @add4x32(<4 x i32> %A, <4 x i32> %B) {
+;CHECK: add {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+	%tmp3 = add <4 x i32> %A, %B;
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @add2xi64(<2 x i64> %A, <2 x i64> %B) {
+;CHECK: add {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+	%tmp3 = add <2 x i64> %A, %B;
+	ret <2 x i64> %tmp3
+}
+
+define <2 x float> @add2xfloat(<2 x float> %A, <2 x float> %B) {
+;CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+	%tmp3 = fadd <2 x float> %A, %B;
+	ret <2 x float> %tmp3
+}
+
+define <4 x float> @add4xfloat(<4 x float> %A, <4 x float> %B) {
+;CHECK: fadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+	%tmp3 = fadd <4 x float> %A, %B;
+	ret <4 x float> %tmp3
+}
+define <2 x double> @add2xdouble(<2 x double> %A, <2 x double> %B) {
+;CHECK: add {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+	%tmp3 = fadd <2 x double> %A, %B;
+	ret <2 x double> %tmp3
+}
+
+define <8 x i8> @sub8xi8(<8 x i8> %A, <8 x i8> %B) {
+;CHECK: sub {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+	%tmp3 = sub <8 x i8> %A, %B;
+	ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @sub16xi8(<16 x i8> %A, <16 x i8> %B) {
+;CHECK: sub {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+	%tmp3 = sub <16 x i8> %A, %B;
+	ret <16 x i8> %tmp3
+}
+
+define <4 x i16> @sub4xi16(<4 x i16> %A, <4 x i16> %B) {
+;CHECK: sub {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+	%tmp3 = sub <4 x i16> %A, %B;
+	ret <4 x i16> %tmp3
+}
+
+define <8 x i16> @sub8xi16(<8 x i16> %A, <8 x i16> %B) {
+;CHECK: sub {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+	%tmp3 = sub <8 x i16> %A, %B;
+	ret <8 x i16> %tmp3
+}
+
+define <2 x i32> @sub2xi32(<2 x i32> %A, <2 x i32> %B) {
+;CHECK: sub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+	%tmp3 = sub <2 x i32> %A, %B;
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @sub4x32(<4 x i32> %A, <4 x i32> %B) {
+;CHECK: sub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+	%tmp3 = sub <4 x i32> %A, %B;
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @sub2xi64(<2 x i64> %A, <2 x i64> %B) {
+;CHECK: sub {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+	%tmp3 = sub <2 x i64> %A, %B;
+	ret <2 x i64> %tmp3
+}
+
+define <2 x float> @sub2xfloat(<2 x float> %A, <2 x float> %B) {
+;CHECK: fsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+	%tmp3 = fsub <2 x float> %A, %B;
+	ret <2 x float> %tmp3
+}
+
+define <4 x float> @sub4xfloat(<4 x float> %A, <4 x float> %B) {
+;CHECK: fsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+	%tmp3 = fsub <4 x float> %A, %B;
+	ret <4 x float> %tmp3
+}
+define <2 x double> @sub2xdouble(<2 x double> %A, <2 x double> %B) {
+;CHECK: sub {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+	%tmp3 = fsub <2 x double> %A, %B;
+	ret <2 x double> %tmp3
+}
+
+define <1 x double> @test_vadd_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vadd_f64
+; CHECK: fadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = fadd <1 x double> %a, %b
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vmul_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vmul_f64
+; CHECK: fmul d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = fmul <1 x double> %a, %b
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vdiv_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vdiv_f64
+; CHECK: fdiv d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = fdiv <1 x double> %a, %b
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vmla_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) {
+; CHECK-LABEL: test_vmla_f64
+; CHECK: fmul d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+; CHECK: fadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = fmul <1 x double> %b, %c
+  %2 = fadd <1 x double> %1, %a
+  ret <1 x double> %2
+}
+
+define <1 x double> @test_vmls_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) {
+; CHECK-LABEL: test_vmls_f64
+; CHECK: fmul d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+; CHECK: fsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = fmul <1 x double> %b, %c
+  %2 = fsub <1 x double> %a, %1
+  ret <1 x double> %2
+}
+
+define <1 x double> @test_vfms_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) {
+; CHECK-LABEL: test_vfms_f64
+; CHECK: fmsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = fsub <1 x double> <double -0.000000e+00>, %b
+  %2 = tail call <1 x double> @llvm.fma.v1f64(<1 x double> %1, <1 x double> %c, <1 x double> %a)
+  ret <1 x double> %2
+}
+
+define <1 x double> @test_vfma_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) {
+; CHECK-LABEL: test_vfma_f64
+; CHECK: fmadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.fma.v1f64(<1 x double> %b, <1 x double> %c, <1 x double> %a)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vsub_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vsub_f64
+; CHECK: fsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = fsub <1 x double> %a, %b
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vabd_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vabd_f64
+; CHECK: fabd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.aarch64.neon.fabd.v1f64(<1 x double> %a, <1 x double> %b)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vmax_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vmax_f64
+; CHECK: fmax d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.aarch64.neon.fmax.v1f64(<1 x double> %a, <1 x double> %b)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vmin_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vmin_f64
+; CHECK: fmin d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.aarch64.neon.fmin.v1f64(<1 x double> %a, <1 x double> %b)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vmaxnm_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vmaxnm_f64
+; CHECK: fmaxnm d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.aarch64.neon.fmaxnm.v1f64(<1 x double> %a, <1 x double> %b)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vminnm_f64(<1 x double> %a, <1 x double> %b) {
+; CHECK-LABEL: test_vminnm_f64
+; CHECK: fminnm d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.aarch64.neon.fminnm.v1f64(<1 x double> %a, <1 x double> %b)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vabs_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vabs_f64
+; CHECK: fabs d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = tail call <1 x double> @llvm.fabs.v1f64(<1 x double> %a)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vneg_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vneg_f64
+; CHECK: fneg d{{[0-9]+}}, d{{[0-9]+}}
+  %1 = fsub <1 x double> <double -0.000000e+00>, %a
+  ret <1 x double> %1
+}
+
+declare <1 x double> @llvm.fabs.v1f64(<1 x double>)
+declare <1 x double> @llvm.aarch64.neon.fminnm.v1f64(<1 x double>, <1 x double>)
+declare <1 x double> @llvm.aarch64.neon.fmaxnm.v1f64(<1 x double>, <1 x double>)
+declare <1 x double> @llvm.aarch64.neon.fmin.v1f64(<1 x double>, <1 x double>)
+declare <1 x double> @llvm.aarch64.neon.fmax.v1f64(<1 x double>, <1 x double>)
+declare <1 x double> @llvm.aarch64.neon.fabd.v1f64(<1 x double>, <1 x double>)
+declare <1 x double> @llvm.fma.v1f64(<1 x double>, <1 x double>, <1 x double>)
diff --git a/test/CodeGen/AArch64/arm64-neon-compare-instructions.ll b/test/CodeGen/AArch64/arm64-neon-compare-instructions.ll
new file mode 100644
index 0000000..cba81ef
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-neon-compare-instructions.ll
@@ -0,0 +1,1191 @@
+; RUN: llc -mtriple=arm64-none-linux-gnu < %s | FileCheck %s
+
+define <8 x i8> @cmeq8xi8(<8 x i8> %A, <8 x i8> %B) {
+;CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+	%tmp3 = icmp eq <8 x i8> %A, %B;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <16 x i8> @cmeq16xi8(<16 x i8> %A, <16 x i8> %B) {
+;CHECK: cmeq {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+	%tmp3 = icmp eq <16 x i8> %A, %B;
+   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <4 x i16> @cmeq4xi16(<4 x i16> %A, <4 x i16> %B) {
+;CHECK: cmeq {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+	%tmp3 = icmp eq <4 x i16> %A, %B;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <8 x i16> @cmeq8xi16(<8 x i16> %A, <8 x i16> %B) {
+;CHECK: cmeq {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+	%tmp3 = icmp eq <8 x i16> %A, %B;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <2 x i32> @cmeq2xi32(<2 x i32> %A, <2 x i32> %B) {
+;CHECK: cmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+	%tmp3 = icmp eq <2 x i32> %A, %B;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @cmeq4xi32(<4 x i32> %A, <4 x i32> %B) {
+;CHECK: cmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+	%tmp3 = icmp eq <4 x i32> %A, %B;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @cmeq2xi64(<2 x i64> %A, <2 x i64> %B) {
+;CHECK: cmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+	%tmp3 = icmp eq <2 x i64> %A, %B;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @cmne8xi8(<8 x i8> %A, <8 x i8> %B) {
+;CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT: mvn {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+	%tmp3 = icmp ne <8 x i8> %A, %B;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <16 x i8> @cmne16xi8(<16 x i8> %A, <16 x i8> %B) {
+;CHECK: cmeq {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+;CHECK-NEXT: mvn {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+	%tmp3 = icmp ne <16 x i8> %A, %B;
+   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <4 x i16> @cmne4xi16(<4 x i16> %A, <4 x i16> %B) {
+;CHECK: cmeq {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+;CHECK-NEXT: mvn {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+	%tmp3 = icmp ne <4 x i16> %A, %B;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <8 x i16> @cmne8xi16(<8 x i16> %A, <8 x i16> %B) {
+;CHECK: cmeq {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+;CHECK-NEXT: mvn {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+	%tmp3 = icmp ne <8 x i16> %A, %B;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <2 x i32> @cmne2xi32(<2 x i32> %A, <2 x i32> %B) {
+;CHECK: cmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+;CHECK-NEXT: mvn {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+	%tmp3 = icmp ne <2 x i32> %A, %B;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @cmne4xi32(<4 x i32> %A, <4 x i32> %B) {
+;CHECK: cmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+;CHECK-NEXT: mvn {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+	%tmp3 = icmp ne <4 x i32> %A, %B;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @cmne2xi64(<2 x i64> %A, <2 x i64> %B) {
+;CHECK: cmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+;CHECK-NEXT: mvn {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+	%tmp3 = icmp ne <2 x i64> %A, %B;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @cmgt8xi8(<8 x i8> %A, <8 x i8> %B) {
+;CHECK: cmgt {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+	%tmp3 = icmp sgt <8 x i8> %A, %B;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <16 x i8> @cmgt16xi8(<16 x i8> %A, <16 x i8> %B) {
+;CHECK: cmgt {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+	%tmp3 = icmp sgt <16 x i8> %A, %B;
+   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <4 x i16> @cmgt4xi16(<4 x i16> %A, <4 x i16> %B) {
+;CHECK: cmgt {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+	%tmp3 = icmp sgt <4 x i16> %A, %B;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <8 x i16> @cmgt8xi16(<8 x i16> %A, <8 x i16> %B) {
+;CHECK: cmgt {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+	%tmp3 = icmp sgt <8 x i16> %A, %B;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <2 x i32> @cmgt2xi32(<2 x i32> %A, <2 x i32> %B) {
+;CHECK: cmgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+	%tmp3 = icmp sgt <2 x i32> %A, %B;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @cmgt4xi32(<4 x i32> %A, <4 x i32> %B) {
+;CHECK: cmgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+	%tmp3 = icmp sgt <4 x i32> %A, %B;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @cmgt2xi64(<2 x i64> %A, <2 x i64> %B) {
+;CHECK: cmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+	%tmp3 = icmp sgt <2 x i64> %A, %B;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @cmlt8xi8(<8 x i8> %A, <8 x i8> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LT implemented as GT, so check reversed operands.
+;CHECK: cmgt {{v[0-9]+}}.8b, v1.8b, v0.8b
+	%tmp3 = icmp slt <8 x i8> %A, %B;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <16 x i8> @cmlt16xi8(<16 x i8> %A, <16 x i8> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LT implemented as GT, so check reversed operands.
+;CHECK: cmgt {{v[0-9]+}}.16b, v1.16b, v0.16b
+	%tmp3 = icmp slt <16 x i8> %A, %B;
+   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <4 x i16> @cmlt4xi16(<4 x i16> %A, <4 x i16> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LT implemented as GT, so check reversed operands.
+;CHECK: cmgt {{v[0-9]+}}.4h, v1.4h, v0.4h
+	%tmp3 = icmp slt <4 x i16> %A, %B;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <8 x i16> @cmlt8xi16(<8 x i16> %A, <8 x i16> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LT implemented as GT, so check reversed operands.
+;CHECK: cmgt {{v[0-9]+}}.8h, v1.8h, v0.8h
+	%tmp3 = icmp slt <8 x i16> %A, %B;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <2 x i32> @cmlt2xi32(<2 x i32> %A, <2 x i32> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LT implemented as GT, so check reversed operands.
+;CHECK: cmgt {{v[0-9]+}}.2s, v1.2s, v0.2s
+	%tmp3 = icmp slt <2 x i32> %A, %B;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @cmlt4xi32(<4 x i32> %A, <4 x i32> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LT implemented as GT, so check reversed operands.
+;CHECK: cmgt {{v[0-9]+}}.4s, v1.4s, v0.4s
+	%tmp3 = icmp slt <4 x i32> %A, %B;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @cmlt2xi64(<2 x i64> %A, <2 x i64> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LT implemented as GT, so check reversed operands.
+;CHECK: cmgt {{v[0-9]+}}.2d, v1.2d, v0.2d
+	%tmp3 = icmp slt <2 x i64> %A, %B;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @cmge8xi8(<8 x i8> %A, <8 x i8> %B) {
+;CHECK: cmge {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+	%tmp3 = icmp sge <8 x i8> %A, %B;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <16 x i8> @cmge16xi8(<16 x i8> %A, <16 x i8> %B) {
+;CHECK: cmge {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+	%tmp3 = icmp sge <16 x i8> %A, %B;
+   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <4 x i16> @cmge4xi16(<4 x i16> %A, <4 x i16> %B) {
+;CHECK: cmge {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+	%tmp3 = icmp sge <4 x i16> %A, %B;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <8 x i16> @cmge8xi16(<8 x i16> %A, <8 x i16> %B) {
+;CHECK: cmge {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+	%tmp3 = icmp sge <8 x i16> %A, %B;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <2 x i32> @cmge2xi32(<2 x i32> %A, <2 x i32> %B) {
+;CHECK: cmge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+	%tmp3 = icmp sge <2 x i32> %A, %B;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @cmge4xi32(<4 x i32> %A, <4 x i32> %B) {
+;CHECK: cmge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+	%tmp3 = icmp sge <4 x i32> %A, %B;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @cmge2xi64(<2 x i64> %A, <2 x i64> %B) {
+;CHECK: cmge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+	%tmp3 = icmp sge <2 x i64> %A, %B;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @cmle8xi8(<8 x i8> %A, <8 x i8> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LE implemented as GE, so check reversed operands.
+;CHECK: cmge {{v[0-9]+}}.8b, v1.8b, v0.8b
+	%tmp3 = icmp sle <8 x i8> %A, %B;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <16 x i8> @cmle16xi8(<16 x i8> %A, <16 x i8> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LE implemented as GE, so check reversed operands.
+;CHECK: cmge {{v[0-9]+}}.16b, v1.16b, v0.16b
+	%tmp3 = icmp sle <16 x i8> %A, %B;
+   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <4 x i16> @cmle4xi16(<4 x i16> %A, <4 x i16> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LE implemented as GE, so check reversed operands.
+;CHECK: cmge {{v[0-9]+}}.4h, v1.4h, v0.4h
+	%tmp3 = icmp sle <4 x i16> %A, %B;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <8 x i16> @cmle8xi16(<8 x i16> %A, <8 x i16> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LE implemented as GE, so check reversed operands.
+;CHECK: cmge {{v[0-9]+}}.8h, v1.8h, v0.8h
+	%tmp3 = icmp sle <8 x i16> %A, %B;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <2 x i32> @cmle2xi32(<2 x i32> %A, <2 x i32> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LE implemented as GE, so check reversed operands.
+;CHECK: cmge {{v[0-9]+}}.2s, v1.2s, v0.2s
+	%tmp3 = icmp sle <2 x i32> %A, %B;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @cmle4xi32(<4 x i32> %A, <4 x i32> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LE implemented as GE, so check reversed operands.
+;CHECK: cmge {{v[0-9]+}}.4s, v1.4s, v0.4s
+	%tmp3 = icmp sle <4 x i32> %A, %B;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @cmle2xi64(<2 x i64> %A, <2 x i64> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LE implemented as GE, so check reversed operands.
+;CHECK: cmge {{v[0-9]+}}.2d, v1.2d, v0.2d
+	%tmp3 = icmp sle <2 x i64> %A, %B;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @cmhi8xi8(<8 x i8> %A, <8 x i8> %B) {
+;CHECK: cmhi {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+	%tmp3 = icmp ugt <8 x i8> %A, %B;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <16 x i8> @cmhi16xi8(<16 x i8> %A, <16 x i8> %B) {
+;CHECK: cmhi {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+	%tmp3 = icmp ugt <16 x i8> %A, %B;
+   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <4 x i16> @cmhi4xi16(<4 x i16> %A, <4 x i16> %B) {
+;CHECK: cmhi {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+	%tmp3 = icmp ugt <4 x i16> %A, %B;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <8 x i16> @cmhi8xi16(<8 x i16> %A, <8 x i16> %B) {
+;CHECK: cmhi {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+	%tmp3 = icmp ugt <8 x i16> %A, %B;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <2 x i32> @cmhi2xi32(<2 x i32> %A, <2 x i32> %B) {
+;CHECK: cmhi {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+	%tmp3 = icmp ugt <2 x i32> %A, %B;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @cmhi4xi32(<4 x i32> %A, <4 x i32> %B) {
+;CHECK: cmhi {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+	%tmp3 = icmp ugt <4 x i32> %A, %B;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @cmhi2xi64(<2 x i64> %A, <2 x i64> %B) {
+;CHECK: cmhi {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+	%tmp3 = icmp ugt <2 x i64> %A, %B;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @cmlo8xi8(<8 x i8> %A, <8 x i8> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LO implemented as HI, so check reversed operands.
+;CHECK: cmhi {{v[0-9]+}}.8b, v1.8b, v0.8b
+	%tmp3 = icmp ult <8 x i8> %A, %B;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <16 x i8> @cmlo16xi8(<16 x i8> %A, <16 x i8> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LO implemented as HI, so check reversed operands.
+;CHECK: cmhi {{v[0-9]+}}.16b, v1.16b, v0.16b
+	%tmp3 = icmp ult <16 x i8> %A, %B;
+   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <4 x i16> @cmlo4xi16(<4 x i16> %A, <4 x i16> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LO implemented as HI, so check reversed operands.
+;CHECK: cmhi {{v[0-9]+}}.4h, v1.4h, v0.4h
+	%tmp3 = icmp ult <4 x i16> %A, %B;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <8 x i16> @cmlo8xi16(<8 x i16> %A, <8 x i16> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LO implemented as HI, so check reversed operands.
+;CHECK: cmhi {{v[0-9]+}}.8h, v1.8h, v0.8h
+	%tmp3 = icmp ult <8 x i16> %A, %B;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <2 x i32> @cmlo2xi32(<2 x i32> %A, <2 x i32> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LO implemented as HI, so check reversed operands.
+;CHECK: cmhi {{v[0-9]+}}.2s, v1.2s, v0.2s
+	%tmp3 = icmp ult <2 x i32> %A, %B;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @cmlo4xi32(<4 x i32> %A, <4 x i32> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LO implemented as HI, so check reversed operands.
+;CHECK: cmhi {{v[0-9]+}}.4s, v1.4s, v0.4s
+	%tmp3 = icmp ult <4 x i32> %A, %B;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @cmlo2xi64(<2 x i64> %A, <2 x i64> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LO implemented as HI, so check reversed operands.
+;CHECK: cmhi {{v[0-9]+}}.2d, v1.2d, v0.2d
+	%tmp3 = icmp ult <2 x i64> %A, %B;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @cmhs8xi8(<8 x i8> %A, <8 x i8> %B) {
+;CHECK: cmhs {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+	%tmp3 = icmp uge <8 x i8> %A, %B;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <16 x i8> @cmhs16xi8(<16 x i8> %A, <16 x i8> %B) {
+;CHECK: cmhs {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+	%tmp3 = icmp uge <16 x i8> %A, %B;
+   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <4 x i16> @cmhs4xi16(<4 x i16> %A, <4 x i16> %B) {
+;CHECK: cmhs {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+	%tmp3 = icmp uge <4 x i16> %A, %B;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <8 x i16> @cmhs8xi16(<8 x i16> %A, <8 x i16> %B) {
+;CHECK: cmhs {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+	%tmp3 = icmp uge <8 x i16> %A, %B;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <2 x i32> @cmhs2xi32(<2 x i32> %A, <2 x i32> %B) {
+;CHECK: cmhs {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+	%tmp3 = icmp uge <2 x i32> %A, %B;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @cmhs4xi32(<4 x i32> %A, <4 x i32> %B) {
+;CHECK: cmhs {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+	%tmp3 = icmp uge <4 x i32> %A, %B;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @cmhs2xi64(<2 x i64> %A, <2 x i64> %B) {
+;CHECK: cmhs {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+	%tmp3 = icmp uge <2 x i64> %A, %B;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @cmls8xi8(<8 x i8> %A, <8 x i8> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LS implemented as HS, so check reversed operands.
+;CHECK: cmhs {{v[0-9]+}}.8b, v1.8b, v0.8b
+	%tmp3 = icmp ule <8 x i8> %A, %B;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <16 x i8> @cmls16xi8(<16 x i8> %A, <16 x i8> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LS implemented as HS, so check reversed operands.
+;CHECK: cmhs {{v[0-9]+}}.16b, v1.16b, v0.16b
+	%tmp3 = icmp ule <16 x i8> %A, %B;
+   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <4 x i16> @cmls4xi16(<4 x i16> %A, <4 x i16> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LS implemented as HS, so check reversed operands.
+;CHECK: cmhs {{v[0-9]+}}.4h, v1.4h, v0.4h
+	%tmp3 = icmp ule <4 x i16> %A, %B;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <8 x i16> @cmls8xi16(<8 x i16> %A, <8 x i16> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LS implemented as HS, so check reversed operands.
+;CHECK: cmhs {{v[0-9]+}}.8h, v1.8h, v0.8h
+	%tmp3 = icmp ule <8 x i16> %A, %B;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <2 x i32> @cmls2xi32(<2 x i32> %A, <2 x i32> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LS implemented as HS, so check reversed operands.
+;CHECK: cmhs {{v[0-9]+}}.2s, v1.2s, v0.2s
+	%tmp3 = icmp ule <2 x i32> %A, %B;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @cmls4xi32(<4 x i32> %A, <4 x i32> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LS implemented as HS, so check reversed operands.
+;CHECK: cmhs {{v[0-9]+}}.4s, v1.4s, v0.4s
+	%tmp3 = icmp ule <4 x i32> %A, %B;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @cmls2xi64(<2 x i64> %A, <2 x i64> %B) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LS implemented as HS, so check reversed operands.
+;CHECK: cmhs {{v[0-9]+}}.2d, v1.2d, v0.2d
+	%tmp3 = icmp ule <2 x i64> %A, %B;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+
+define <8 x i8> @cmeqz8xi8(<8 x i8> %A) {
+;CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0
+	%tmp3 = icmp eq <8 x i8> %A, zeroinitializer;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <16 x i8> @cmeqz16xi8(<16 x i8> %A) {
+;CHECK: cmeq {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0
+	%tmp3 = icmp eq <16 x i8> %A, zeroinitializer;
+   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <4 x i16> @cmeqz4xi16(<4 x i16> %A) {
+;CHECK: cmeq {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0
+	%tmp3 = icmp eq <4 x i16> %A, zeroinitializer;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <8 x i16> @cmeqz8xi16(<8 x i16> %A) {
+;CHECK: cmeq {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0
+	%tmp3 = icmp eq <8 x i16> %A, zeroinitializer;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <2 x i32> @cmeqz2xi32(<2 x i32> %A) {
+;CHECK: cmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0
+	%tmp3 = icmp eq <2 x i32> %A, zeroinitializer;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @cmeqz4xi32(<4 x i32> %A) {
+;CHECK: cmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0
+	%tmp3 = icmp eq <4 x i32> %A, zeroinitializer;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @cmeqz2xi64(<2 x i64> %A) {
+;CHECK: cmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0
+	%tmp3 = icmp eq <2 x i64> %A, zeroinitializer;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+
+define <8 x i8> @cmgez8xi8(<8 x i8> %A) {
+;CHECK: cmge {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0
+	%tmp3 = icmp sge <8 x i8> %A, zeroinitializer;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <16 x i8> @cmgez16xi8(<16 x i8> %A) {
+;CHECK: cmge {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0
+	%tmp3 = icmp sge <16 x i8> %A, zeroinitializer;
+   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <4 x i16> @cmgez4xi16(<4 x i16> %A) {
+;CHECK: cmge {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0
+	%tmp3 = icmp sge <4 x i16> %A, zeroinitializer;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <8 x i16> @cmgez8xi16(<8 x i16> %A) {
+;CHECK: cmge {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0
+	%tmp3 = icmp sge <8 x i16> %A, zeroinitializer;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <2 x i32> @cmgez2xi32(<2 x i32> %A) {
+;CHECK: cmge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0
+	%tmp3 = icmp sge <2 x i32> %A, zeroinitializer;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @cmgez4xi32(<4 x i32> %A) {
+;CHECK: cmge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0
+	%tmp3 = icmp sge <4 x i32> %A, zeroinitializer;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @cmgez2xi64(<2 x i64> %A) {
+;CHECK: cmge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0
+	%tmp3 = icmp sge <2 x i64> %A, zeroinitializer;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+
+define <8 x i8> @cmgtz8xi8(<8 x i8> %A) {
+;CHECK: cmgt {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0
+	%tmp3 = icmp sgt <8 x i8> %A, zeroinitializer;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <16 x i8> @cmgtz16xi8(<16 x i8> %A) {
+;CHECK: cmgt {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0
+	%tmp3 = icmp sgt <16 x i8> %A, zeroinitializer;
+   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <4 x i16> @cmgtz4xi16(<4 x i16> %A) {
+;CHECK: cmgt {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0
+	%tmp3 = icmp sgt <4 x i16> %A, zeroinitializer;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <8 x i16> @cmgtz8xi16(<8 x i16> %A) {
+;CHECK: cmgt {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0
+	%tmp3 = icmp sgt <8 x i16> %A, zeroinitializer;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <2 x i32> @cmgtz2xi32(<2 x i32> %A) {
+;CHECK: cmgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0
+	%tmp3 = icmp sgt <2 x i32> %A, zeroinitializer;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @cmgtz4xi32(<4 x i32> %A) {
+;CHECK: cmgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0
+	%tmp3 = icmp sgt <4 x i32> %A, zeroinitializer;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @cmgtz2xi64(<2 x i64> %A) {
+;CHECK: cmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0
+	%tmp3 = icmp sgt <2 x i64> %A, zeroinitializer;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @cmlez8xi8(<8 x i8> %A) {
+;CHECK: cmle {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0
+	%tmp3 = icmp sle <8 x i8> %A, zeroinitializer;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <16 x i8> @cmlez16xi8(<16 x i8> %A) {
+;CHECK: cmle {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0
+	%tmp3 = icmp sle <16 x i8> %A, zeroinitializer;
+   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <4 x i16> @cmlez4xi16(<4 x i16> %A) {
+;CHECK: cmle {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0
+	%tmp3 = icmp sle <4 x i16> %A, zeroinitializer;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <8 x i16> @cmlez8xi16(<8 x i16> %A) {
+;CHECK: cmle {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0
+	%tmp3 = icmp sle <8 x i16> %A, zeroinitializer;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <2 x i32> @cmlez2xi32(<2 x i32> %A) {
+;CHECK: cmle {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0
+	%tmp3 = icmp sle <2 x i32> %A, zeroinitializer;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @cmlez4xi32(<4 x i32> %A) {
+;CHECK: cmle {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0
+	%tmp3 = icmp sle <4 x i32> %A, zeroinitializer;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @cmlez2xi64(<2 x i64> %A) {
+;CHECK: cmle {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0
+	%tmp3 = icmp sle <2 x i64> %A, zeroinitializer;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @cmltz8xi8(<8 x i8> %A) {
+;CHECK: cmlt {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0
+	%tmp3 = icmp slt <8 x i8> %A, zeroinitializer;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <16 x i8> @cmltz16xi8(<16 x i8> %A) {
+;CHECK: cmlt {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0
+	%tmp3 = icmp slt <16 x i8> %A, zeroinitializer;
+   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <4 x i16> @cmltz4xi16(<4 x i16> %A) {
+;CHECK: cmlt {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0
+	%tmp3 = icmp slt <4 x i16> %A, zeroinitializer;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <8 x i16> @cmltz8xi16(<8 x i16> %A) {
+;CHECK: cmlt {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0
+	%tmp3 = icmp slt <8 x i16> %A, zeroinitializer;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <2 x i32> @cmltz2xi32(<2 x i32> %A) {
+;CHECK: cmlt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0
+	%tmp3 = icmp slt <2 x i32> %A, zeroinitializer;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @cmltz4xi32(<4 x i32> %A) {
+;CHECK: cmlt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0
+	%tmp3 = icmp slt <4 x i32> %A, zeroinitializer;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @cmltz2xi64(<2 x i64> %A) {
+;CHECK: cmlt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0
+	%tmp3 = icmp slt <2 x i64> %A, zeroinitializer;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @cmneqz8xi8(<8 x i8> %A) {
+;CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0
+;CHECK-NEXT: mvn {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+	%tmp3 = icmp ne <8 x i8> %A, zeroinitializer;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <16 x i8> @cmneqz16xi8(<16 x i8> %A) {
+;CHECK: cmeq {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0
+;CHECK-NEXT: mvn {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+	%tmp3 = icmp ne <16 x i8> %A, zeroinitializer;
+   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <4 x i16> @cmneqz4xi16(<4 x i16> %A) {
+;CHECK: cmeq {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0
+;CHECK-NEXT: mvn {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+	%tmp3 = icmp ne <4 x i16> %A, zeroinitializer;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <8 x i16> @cmneqz8xi16(<8 x i16> %A) {
+;CHECK: cmeq {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0
+;CHECK-NEXT: mvn {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+	%tmp3 = icmp ne <8 x i16> %A, zeroinitializer;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <2 x i32> @cmneqz2xi32(<2 x i32> %A) {
+;CHECK: cmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0
+;CHECK-NEXT: mvn {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+	%tmp3 = icmp ne <2 x i32> %A, zeroinitializer;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @cmneqz4xi32(<4 x i32> %A) {
+;CHECK: cmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0
+;CHECK-NEXT: mvn {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+	%tmp3 = icmp ne <4 x i32> %A, zeroinitializer;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @cmneqz2xi64(<2 x i64> %A) {
+;CHECK: cmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0
+;CHECK-NEXT: mvn {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+	%tmp3 = icmp ne <2 x i64> %A, zeroinitializer;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @cmhsz8xi8(<8 x i8> %A) {
+;CHECK: movi d[[ZERO:[0-9]+]], #0
+;CHECK-NEXT: cmhs {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, v[[ZERO]].8b
+	%tmp3 = icmp uge <8 x i8> %A, zeroinitializer;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <16 x i8> @cmhsz16xi8(<16 x i8> %A) {
+;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
+;CHECK-NEXT: cmhs {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, v[[ZERO]].16b
+	%tmp3 = icmp uge <16 x i8> %A, zeroinitializer;
+   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <4 x i16> @cmhsz4xi16(<4 x i16> %A) {
+;CHECK: movi d[[ZERO:[0-9]+]], #0
+;CHECK-NEXT: cmhs {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, v[[ZERO]].4h
+	%tmp3 = icmp uge <4 x i16> %A, zeroinitializer;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <8 x i16> @cmhsz8xi16(<8 x i16> %A) {
+;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
+;CHECK-NEXT: cmhs {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, v[[ZERO]].8h
+	%tmp3 = icmp uge <8 x i16> %A, zeroinitializer;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <2 x i32> @cmhsz2xi32(<2 x i32> %A) {
+;CHECK: movi d[[ZERO:[0-9]+]], #0
+;CHECK-NEXT: cmhs {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, v[[ZERO]].2s
+	%tmp3 = icmp uge <2 x i32> %A, zeroinitializer;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @cmhsz4xi32(<4 x i32> %A) {
+;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
+;CHECK-NEXT: cmhs {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, v[[ZERO]].4s
+	%tmp3 = icmp uge <4 x i32> %A, zeroinitializer;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @cmhsz2xi64(<2 x i64> %A) {
+;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
+;CHECK-NEXT: cmhs {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, v[[ZERO]].2d
+	%tmp3 = icmp uge <2 x i64> %A, zeroinitializer;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+
+define <8 x i8> @cmhiz8xi8(<8 x i8> %A) {
+;CHECK: movi d[[ZERO:[0-9]+]], #0
+;CHECK-NEXT: cmhi {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, v[[ZERO]].8b
+	%tmp3 = icmp ugt <8 x i8> %A, zeroinitializer;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <16 x i8> @cmhiz16xi8(<16 x i8> %A) {
+;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
+;CHECK-NEXT: cmhi {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, v[[ZERO]].16b
+	%tmp3 = icmp ugt <16 x i8> %A, zeroinitializer;
+   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <4 x i16> @cmhiz4xi16(<4 x i16> %A) {
+;CHECK: movi d[[ZERO:[0-9]+]], #0
+;CHECK-NEXT: cmhi {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, v[[ZERO]].4h
+	%tmp3 = icmp ugt <4 x i16> %A, zeroinitializer;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <8 x i16> @cmhiz8xi16(<8 x i16> %A) {
+;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
+;CHECK-NEXT: cmhi {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, v[[ZERO]].8h
+	%tmp3 = icmp ugt <8 x i16> %A, zeroinitializer;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <2 x i32> @cmhiz2xi32(<2 x i32> %A) {
+;CHECK: movi d[[ZERO:[0-9]+]], #0
+;CHECK-NEXT: cmhi {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, v[[ZERO]].2s
+	%tmp3 = icmp ugt <2 x i32> %A, zeroinitializer;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @cmhiz4xi32(<4 x i32> %A) {
+;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
+;CHECK-NEXT: cmhi {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, v[[ZERO]].4s
+	%tmp3 = icmp ugt <4 x i32> %A, zeroinitializer;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @cmhiz2xi64(<2 x i64> %A) {
+;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
+;CHECK-NEXT: cmhi {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, v[[ZERO]].2d
+	%tmp3 = icmp ugt <2 x i64> %A, zeroinitializer;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @cmlsz8xi8(<8 x i8> %A) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LS implemented as HS, so check reversed operands.
+;CHECK: movi d[[ZERO:[0-9]+]], #0
+;CHECK-NEXT: cmhs {{v[0-9]+}}.8b, v[[ZERO]].8b, v0.8b
+	%tmp3 = icmp ule <8 x i8> %A, zeroinitializer;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <16 x i8> @cmlsz16xi8(<16 x i8> %A) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LS implemented as HS, so check reversed operands.
+;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
+;CHECK-NEXT: cmhs {{v[0-9]+}}.16b, v[[ZERO]].16b, v0.16b
+	%tmp3 = icmp ule <16 x i8> %A, zeroinitializer;
+   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <4 x i16> @cmlsz4xi16(<4 x i16> %A) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LS implemented as HS, so check reversed operands.
+;CHECK: movi d[[ZERO:[0-9]+]], #0
+;CHECK-NEXT: cmhs {{v[0-9]+}}.4h, v[[ZERO]].4h, v0.4h
+	%tmp3 = icmp ule <4 x i16> %A, zeroinitializer;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <8 x i16> @cmlsz8xi16(<8 x i16> %A) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LS implemented as HS, so check reversed operands.
+;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
+;CHECK-NEXT: cmhs {{v[0-9]+}}.8h, v[[ZERO]].8h, v0.8h
+	%tmp3 = icmp ule <8 x i16> %A, zeroinitializer;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <2 x i32> @cmlsz2xi32(<2 x i32> %A) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LS implemented as HS, so check reversed operands.
+;CHECK: movi d[[ZERO:[0-9]+]], #0
+;CHECK-NEXT: cmhs {{v[0-9]+}}.2s, v[[ZERO]].2s, v0.2s
+	%tmp3 = icmp ule <2 x i32> %A, zeroinitializer;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @cmlsz4xi32(<4 x i32> %A) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LS implemented as HS, so check reversed operands.
+;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
+;CHECK-NEXT: cmhs {{v[0-9]+}}.4s, v[[ZERO]].4s, v0.4s
+	%tmp3 = icmp ule <4 x i32> %A, zeroinitializer;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @cmlsz2xi64(<2 x i64> %A) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LS implemented as HS, so check reversed operands.
+;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
+;CHECK-NEXT: cmhs {{v[0-9]+}}.2d, v[[ZERO]].2d, v0.2d
+	%tmp3 = icmp ule <2 x i64> %A, zeroinitializer;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @cmloz8xi8(<8 x i8> %A) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LO implemented as HI, so check reversed operands.
+;CHECK: movi d[[ZERO:[0-9]+]], #0
+;CHECK-NEXT: cmhi {{v[0-9]+}}.8b, v[[ZERO]].8b, {{v[0-9]+}}.8b
+	%tmp3 = icmp ult <8 x i8> %A, zeroinitializer;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <16 x i8> @cmloz16xi8(<16 x i8> %A) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LO implemented as HI, so check reversed operands.
+;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
+;CHECK-NEXT: cmhi {{v[0-9]+}}.16b, v[[ZERO]].16b, v0.16b
+	%tmp3 = icmp ult <16 x i8> %A, zeroinitializer;
+   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <4 x i16> @cmloz4xi16(<4 x i16> %A) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LO implemented as HI, so check reversed operands.
+;CHECK: movi d[[ZERO:[0-9]+]], #0
+;CHECK-NEXT: cmhi {{v[0-9]+}}.4h, v[[ZERO]].4h, v0.4h
+	%tmp3 = icmp ult <4 x i16> %A, zeroinitializer;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <8 x i16> @cmloz8xi16(<8 x i16> %A) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LO implemented as HI, so check reversed operands.
+;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
+;CHECK-NEXT: cmhi {{v[0-9]+}}.8h, v[[ZERO]].8h, v0.8h
+	%tmp3 = icmp ult <8 x i16> %A, zeroinitializer;
+   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <2 x i32> @cmloz2xi32(<2 x i32> %A) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LO implemented as HI, so check reversed operands.
+;CHECK: movi d[[ZERO:[0-9]+]], #0
+;CHECK-NEXT: cmhi {{v[0-9]+}}.2s, v[[ZERO]].2s, v0.2s
+	%tmp3 = icmp ult <2 x i32> %A, zeroinitializer;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @cmloz4xi32(<4 x i32> %A) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LO implemented as HI, so check reversed operands.
+;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
+;CHECK-NEXT: cmhi {{v[0-9]+}}.4s, v[[ZERO]].4s, v0.4s
+	%tmp3 = icmp ult <4 x i32> %A, zeroinitializer;
+   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @cmloz2xi64(<2 x i64> %A) {
+; Using registers other than v0, v1 are possible, but would be odd.
+; LO implemented as HI, so check reversed operands.
+;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
+;CHECK-NEXT: cmhi {{v[0-9]+}}.2d, v[[ZERO]].2d, v0.2d
+	%tmp3 = icmp ult <2 x i64> %A, zeroinitializer;
+   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <1 x i64> @cmeqz_v1i64(<1 x i64> %A) {
+; CHECK-LABEL: cmeqz_v1i64:
+; CHECK: cmeq d0, d0, #0
+  %tst = icmp eq <1 x i64> %A, <i64 0>
+  %mask = sext <1 x i1> %tst to <1 x i64>
+  ret <1 x i64> %mask
+}
+
+define <1 x i64> @cmgez_v1i64(<1 x i64> %A) {
+; CHECK-LABEL: cmgez_v1i64:
+; CHECK: cmge d0, d0, #0
+  %tst = icmp sge <1 x i64> %A, <i64 0>
+  %mask = sext <1 x i1> %tst to <1 x i64>
+  ret <1 x i64> %mask
+}
+
+define <1 x i64> @cmgtz_v1i64(<1 x i64> %A) {
+; CHECK-LABEL: cmgtz_v1i64:
+; CHECK: cmgt d0, d0, #0
+  %tst = icmp sgt <1 x i64> %A, <i64 0>
+  %mask = sext <1 x i1> %tst to <1 x i64>
+  ret <1 x i64> %mask
+}
+
+define <1 x i64> @cmlez_v1i64(<1 x i64> %A) {
+; CHECK-LABEL: cmlez_v1i64:
+; CHECK: cmle d0, d0, #0
+  %tst = icmp sle <1 x i64> %A, <i64 0>
+  %mask = sext <1 x i1> %tst to <1 x i64>
+  ret <1 x i64> %mask
+}
+
+define <1 x i64> @cmltz_v1i64(<1 x i64> %A) {
+; CHECK-LABEL: cmltz_v1i64:
+; CHECK: cmlt d0, d0, #0
+  %tst = icmp slt <1 x i64> %A, <i64 0>
+  %mask = sext <1 x i1> %tst to <1 x i64>
+  ret <1 x i64> %mask
+}
+
+define <1 x i64> @fcmeqz_v1f64(<1 x double> %A) {
+; CHECK-LABEL: fcmeqz_v1f64:
+; CHECK: fcmeq d0, d0, #0
+  %tst = fcmp oeq <1 x double> %A, <double 0.0>
+  %mask = sext <1 x i1> %tst to <1 x i64>
+  ret <1 x i64> %mask
+}
+
+define <1 x i64> @fcmgez_v1f64(<1 x double> %A) {
+; CHECK-LABEL: fcmgez_v1f64:
+; CHECK: fcmge d0, d0, #0
+  %tst = fcmp oge <1 x double> %A, <double 0.0>
+  %mask = sext <1 x i1> %tst to <1 x i64>
+  ret <1 x i64> %mask
+}
+
+define <1 x i64> @fcmgtz_v1f64(<1 x double> %A) {
+; CHECK-LABEL: fcmgtz_v1f64:
+; CHECK: fcmgt d0, d0, #0
+  %tst = fcmp ogt <1 x double> %A, <double 0.0>
+  %mask = sext <1 x i1> %tst to <1 x i64>
+  ret <1 x i64> %mask
+}
+
+define <1 x i64> @fcmlez_v1f64(<1 x double> %A) {
+; CHECK-LABEL: fcmlez_v1f64:
+; CHECK: fcmle d0, d0, #0
+  %tst = fcmp ole <1 x double> %A, <double 0.0>
+  %mask = sext <1 x i1> %tst to <1 x i64>
+  ret <1 x i64> %mask
+}
+
+define <1 x i64> @fcmltz_v1f64(<1 x double> %A) {
+; CHECK-LABEL: fcmltz_v1f64:
+; CHECK: fcmlt d0, d0, #0
+  %tst = fcmp olt <1 x double> %A, <double 0.0>
+  %mask = sext <1 x i1> %tst to <1 x i64>
+  ret <1 x i64> %mask
+}
diff --git a/test/CodeGen/AArch64/arm64-neon-copy.ll b/test/CodeGen/AArch64/arm64-neon-copy.ll
new file mode 100644
index 0000000..cfc2ebf
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-neon-copy.ll
@@ -0,0 +1,1445 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
+
+
+define <16 x i8> @ins16bw(<16 x i8> %tmp1, i8 %tmp2) {
+; CHECK-LABEL: ins16bw:
+; CHECK: ins {{v[0-9]+}}.b[15], {{w[0-9]+}}
+  %tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 15
+  ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @ins8hw(<8 x i16> %tmp1, i16 %tmp2) {
+; CHECK-LABEL: ins8hw:
+; CHECK: ins {{v[0-9]+}}.h[6], {{w[0-9]+}}
+  %tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 6
+  ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @ins4sw(<4 x i32> %tmp1, i32 %tmp2) {
+; CHECK-LABEL: ins4sw:
+; CHECK: ins {{v[0-9]+}}.s[2], {{w[0-9]+}}
+  %tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 2
+  ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @ins2dw(<2 x i64> %tmp1, i64 %tmp2) {
+; CHECK-LABEL: ins2dw:
+; CHECK: ins {{v[0-9]+}}.d[1], {{x[0-9]+}}
+  %tmp3 = insertelement <2 x i64> %tmp1, i64 %tmp2, i32 1
+  ret <2 x i64> %tmp3
+}
+
+define <8 x i8> @ins8bw(<8 x i8> %tmp1, i8 %tmp2) {
+; CHECK-LABEL: ins8bw:
+; CHECK: ins {{v[0-9]+}}.b[5], {{w[0-9]+}}
+  %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 5
+  ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @ins4hw(<4 x i16> %tmp1, i16 %tmp2) {
+; CHECK-LABEL: ins4hw:
+; CHECK: ins {{v[0-9]+}}.h[3], {{w[0-9]+}}
+  %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 3
+  ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @ins2sw(<2 x i32> %tmp1, i32 %tmp2) {
+; CHECK-LABEL: ins2sw:
+; CHECK: ins {{v[0-9]+}}.s[1], {{w[0-9]+}}
+  %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
+  ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @ins16b16(<16 x i8> %tmp1, <16 x i8> %tmp2) {
+; CHECK-LABEL: ins16b16:
+; CHECK: ins {{v[0-9]+}}.b[15], {{v[0-9]+}}.b[2]
+  %tmp3 = extractelement <16 x i8> %tmp1, i32 2
+  %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15
+  ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @ins8h8(<8 x i16> %tmp1, <8 x i16> %tmp2) {
+; CHECK-LABEL: ins8h8:
+; CHECK: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[2]
+  %tmp3 = extractelement <8 x i16> %tmp1, i32 2
+  %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7
+  ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @ins4s4(<4 x i32> %tmp1, <4 x i32> %tmp2) {
+; CHECK-LABEL: ins4s4:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
+  %tmp3 = extractelement <4 x i32> %tmp1, i32 2
+  %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1
+  ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @ins2d2(<2 x i64> %tmp1, <2 x i64> %tmp2) {
+; CHECK-LABEL: ins2d2:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+  %tmp3 = extractelement <2 x i64> %tmp1, i32 0
+  %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1
+  ret <2 x i64> %tmp4
+}
+
+define <4 x float> @ins4f4(<4 x float> %tmp1, <4 x float> %tmp2) {
+; CHECK-LABEL: ins4f4:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
+  %tmp3 = extractelement <4 x float> %tmp1, i32 2
+  %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1
+  ret <4 x float> %tmp4
+}
+
+define <2 x double> @ins2df2(<2 x double> %tmp1, <2 x double> %tmp2) {
+; CHECK-LABEL: ins2df2:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+  %tmp3 = extractelement <2 x double> %tmp1, i32 0
+  %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1
+  ret <2 x double> %tmp4
+}
+
+define <16 x i8> @ins8b16(<8 x i8> %tmp1, <16 x i8> %tmp2) {
+; CHECK-LABEL: ins8b16:
+; CHECK: ins {{v[0-9]+}}.b[15], {{v[0-9]+}}.b[2]
+  %tmp3 = extractelement <8 x i8> %tmp1, i32 2
+  %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15
+  ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @ins4h8(<4 x i16> %tmp1, <8 x i16> %tmp2) {
+; CHECK-LABEL: ins4h8:
+; CHECK: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[2]
+  %tmp3 = extractelement <4 x i16> %tmp1, i32 2
+  %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7
+  ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @ins2s4(<2 x i32> %tmp1, <4 x i32> %tmp2) {
+; CHECK-LABEL: ins2s4:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[1]
+  %tmp3 = extractelement <2 x i32> %tmp1, i32 1
+  %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1
+  ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @ins1d2(<1 x i64> %tmp1, <2 x i64> %tmp2) {
+; CHECK-LABEL: ins1d2:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+  %tmp3 = extractelement <1 x i64> %tmp1, i32 0
+  %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1
+  ret <2 x i64> %tmp4
+}
+
+define <4 x float> @ins2f4(<2 x float> %tmp1, <4 x float> %tmp2) {
+; CHECK-LABEL: ins2f4:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[1]
+  %tmp3 = extractelement <2 x float> %tmp1, i32 1
+  %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1
+  ret <4 x float> %tmp4
+}
+
+define <2 x double> @ins1f2(<1 x double> %tmp1, <2 x double> %tmp2) {
+; CHECK-LABEL: ins1f2:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+  %tmp3 = extractelement <1 x double> %tmp1, i32 0
+  %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1
+  ret <2 x double> %tmp4
+}
+
+define <8 x i8> @ins16b8(<16 x i8> %tmp1, <8 x i8> %tmp2) {
+; CHECK-LABEL: ins16b8:
+; CHECK: ins {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[2]
+  %tmp3 = extractelement <16 x i8> %tmp1, i32 2
+  %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 7
+  ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @ins8h4(<8 x i16> %tmp1, <4 x i16> %tmp2) {
+; CHECK-LABEL: ins8h4:
+; CHECK: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[2]
+  %tmp3 = extractelement <8 x i16> %tmp1, i32 2
+  %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3
+  ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @ins4s2(<4 x i32> %tmp1, <2 x i32> %tmp2) {
+; CHECK-LABEL: ins4s2:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
+  %tmp3 = extractelement <4 x i32> %tmp1, i32 2
+  %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1
+  ret <2 x i32> %tmp4
+}
+
+define <1 x i64> @ins2d1(<2 x i64> %tmp1, <1 x i64> %tmp2) {
+; CHECK-LABEL: ins2d1:
+; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0]
+  %tmp3 = extractelement <2 x i64> %tmp1, i32 0
+  %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
+  ret <1 x i64> %tmp4
+}
+
+define <2 x float> @ins4f2(<4 x float> %tmp1, <2 x float> %tmp2) {
+; CHECK-LABEL: ins4f2:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
+  %tmp3 = extractelement <4 x float> %tmp1, i32 2
+  %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1
+  ret <2 x float> %tmp4
+}
+
+define <1 x double> @ins2f1(<2 x double> %tmp1, <1 x double> %tmp2) {
+; CHECK-LABEL: ins2f1:
+; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+  %tmp3 = extractelement <2 x double> %tmp1, i32 1
+  %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0
+  ret <1 x double> %tmp4
+}
+
+define <8 x i8> @ins8b8(<8 x i8> %tmp1, <8 x i8> %tmp2) {
+; CHECK-LABEL: ins8b8:
+; CHECK: ins {{v[0-9]+}}.b[4], {{v[0-9]+}}.b[2]
+  %tmp3 = extractelement <8 x i8> %tmp1, i32 2
+  %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 4
+  ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @ins4h4(<4 x i16> %tmp1, <4 x i16> %tmp2) {
+; CHECK-LABEL: ins4h4:
+; CHECK: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[2]
+  %tmp3 = extractelement <4 x i16> %tmp1, i32 2
+  %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3
+  ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @ins2s2(<2 x i32> %tmp1, <2 x i32> %tmp2) {
+; CHECK-LABEL: ins2s2:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+  %tmp3 = extractelement <2 x i32> %tmp1, i32 0
+  %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1
+  ret <2 x i32> %tmp4
+}
+
+define <1 x i64> @ins1d1(<1 x i64> %tmp1, <1 x i64> %tmp2) {
+; CHECK-LABEL: ins1d1:
+; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0]
+  %tmp3 = extractelement <1 x i64> %tmp1, i32 0
+  %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
+  ret <1 x i64> %tmp4
+}
+
+define <2 x float> @ins2f2(<2 x float> %tmp1, <2 x float> %tmp2) {
+; CHECK-LABEL: ins2f2:
+; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+  %tmp3 = extractelement <2 x float> %tmp1, i32 0
+  %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1
+  ret <2 x float> %tmp4
+}
+
+define <1 x double> @ins1df1(<1 x double> %tmp1, <1 x double> %tmp2) {
+; CHECK-LABEL: ins1df1:
+; CHECK-NOT: ins {{v[0-9]+}}
+  %tmp3 = extractelement <1 x double> %tmp1, i32 0
+  %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0
+  ret <1 x double> %tmp4
+}
+
+define i32 @umovw16b(<16 x i8> %tmp1) {
+; CHECK-LABEL: umovw16b:
+; CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[8]
+  %tmp3 = extractelement <16 x i8> %tmp1, i32 8
+  %tmp4 = zext i8 %tmp3 to i32
+  ret i32 %tmp4
+}
+
+define i32 @umovw8h(<8 x i16> %tmp1) {
+; CHECK-LABEL: umovw8h:
+; CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
+  %tmp3 = extractelement <8 x i16> %tmp1, i32 2
+  %tmp4 = zext i16 %tmp3 to i32
+  ret i32 %tmp4
+}
+
+define i32 @umovw4s(<4 x i32> %tmp1) {
+; CHECK-LABEL: umovw4s:
+; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.s[2]
+  %tmp3 = extractelement <4 x i32> %tmp1, i32 2
+  ret i32 %tmp3
+}
+
+define i64 @umovx2d(<2 x i64> %tmp1) {
+; CHECK-LABEL: umovx2d:
+; CHECK: mov {{x[0-9]+}}, {{v[0-9]+}}.d[1]
+  %tmp3 = extractelement <2 x i64> %tmp1, i32 1
+  ret i64 %tmp3
+}
+
+define i32 @umovw8b(<8 x i8> %tmp1) {
+; CHECK-LABEL: umovw8b:
+; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.b[7]
+  %tmp3 = extractelement <8 x i8> %tmp1, i32 7
+  %tmp4 = zext i8 %tmp3 to i32
+  ret i32 %tmp4
+}
+
+define i32 @umovw4h(<4 x i16> %tmp1) {
+; CHECK-LABEL: umovw4h:
+; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
+  %tmp3 = extractelement <4 x i16> %tmp1, i32 2
+  %tmp4 = zext i16 %tmp3 to i32
+  ret i32 %tmp4
+}
+
+define i32 @umovw2s(<2 x i32> %tmp1) {
+; CHECK-LABEL: umovw2s:
+; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.s[1]
+  %tmp3 = extractelement <2 x i32> %tmp1, i32 1
+  ret i32 %tmp3
+}
+
+define i64 @umovx1d(<1 x i64> %tmp1) {
+; CHECK-LABEL: umovx1d:
+; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
+  %tmp3 = extractelement <1 x i64> %tmp1, i32 0
+  ret i64 %tmp3
+}
+
+define i32 @smovw16b(<16 x i8> %tmp1) {
+; CHECK-LABEL: smovw16b:
+; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[8]
+  %tmp3 = extractelement <16 x i8> %tmp1, i32 8
+  %tmp4 = sext i8 %tmp3 to i32
+  %tmp5 = add i32 %tmp4, %tmp4
+  ret i32 %tmp5
+}
+
+define i32 @smovw8h(<8 x i16> %tmp1) {
+; CHECK-LABEL: smovw8h:
+; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
+  %tmp3 = extractelement <8 x i16> %tmp1, i32 2
+  %tmp4 = sext i16 %tmp3 to i32
+  %tmp5 = add i32 %tmp4, %tmp4
+  ret i32 %tmp5
+}
+
+define i32 @smovx16b(<16 x i8> %tmp1) {
+; CHECK-LABEL: smovx16b:
+; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.b[8]
+  %tmp3 = extractelement <16 x i8> %tmp1, i32 8
+  %tmp4 = sext i8 %tmp3 to i32
+  %tmp5 = add i32 %tmp4, %tmp4
+  ret i32 %tmp5
+}
+
+define i32 @smovx8h(<8 x i16> %tmp1) {
+; CHECK-LABEL: smovx8h:
+; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.h[2]
+  %tmp3 = extractelement <8 x i16> %tmp1, i32 2
+  %tmp4 = sext i16 %tmp3 to i32
+  ret i32 %tmp4
+}
+
+define i64 @smovx4s(<4 x i32> %tmp1) {
+; CHECK-LABEL: smovx4s:
+; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[2]
+  %tmp3 = extractelement <4 x i32> %tmp1, i32 2
+  %tmp4 = sext i32 %tmp3 to i64
+  ret i64 %tmp4
+}
+
+define i32 @smovw8b(<8 x i8> %tmp1) {
+; CHECK-LABEL: smovw8b:
+; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[4]
+  %tmp3 = extractelement <8 x i8> %tmp1, i32 4
+  %tmp4 = sext i8 %tmp3 to i32
+  %tmp5 = add i32 %tmp4, %tmp4
+  ret i32 %tmp5
+}
+
+define i32 @smovw4h(<4 x i16> %tmp1) {
+; CHECK-LABEL: smovw4h:
+; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
+  %tmp3 = extractelement <4 x i16> %tmp1, i32 2
+  %tmp4 = sext i16 %tmp3 to i32
+  %tmp5 = add i32 %tmp4, %tmp4
+  ret i32 %tmp5
+}
+
+define i32 @smovx8b(<8 x i8> %tmp1) {
+; CHECK-LABEL: smovx8b:
+; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.b[6]
+  %tmp3 = extractelement <8 x i8> %tmp1, i32 6
+  %tmp4 = sext i8 %tmp3 to i32
+  ret i32 %tmp4
+}
+
+define i32 @smovx4h(<4 x i16> %tmp1) {
+; CHECK-LABEL: smovx4h:
+; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.h[2]
+  %tmp3 = extractelement <4 x i16> %tmp1, i32 2
+  %tmp4 = sext i16 %tmp3 to i32
+  ret i32 %tmp4
+}
+
+define i64 @smovx2s(<2 x i32> %tmp1) {
+; CHECK-LABEL: smovx2s:
+; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[1]
+  %tmp3 = extractelement <2 x i32> %tmp1, i32 1
+  %tmp4 = sext i32 %tmp3 to i64
+  ret i64 %tmp4
+}
+
+define <8 x i8> @test_vcopy_lane_s8(<8 x i8> %v1, <8 x i8> %v2) {
+; CHECK-LABEL: test_vcopy_lane_s8:
+; CHECK: ins  {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
+  %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 11, i32 6, i32 7>
+  ret <8 x i8> %vset_lane
+}
+
+define <16 x i8> @test_vcopyq_laneq_s8(<16 x i8> %v1, <16 x i8> %v2) {
+; CHECK-LABEL: test_vcopyq_laneq_s8:
+; CHECK: ins  {{v[0-9]+}}.b[14], {{v[0-9]+}}.b[6]
+  %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 22, i32 15>
+  ret <16 x i8> %vset_lane
+}
+
+define <8 x i8> @test_vcopy_lane_swap_s8(<8 x i8> %v1, <8 x i8> %v2) {
+; CHECK-LABEL: test_vcopy_lane_swap_s8:
+; CHECK: ins {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[0]
+  %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0>
+  ret <8 x i8> %vset_lane
+}
+
+define <16 x i8> @test_vcopyq_laneq_swap_s8(<16 x i8> %v1, <16 x i8> %v2) {
+; CHECK-LABEL: test_vcopyq_laneq_swap_s8:
+; CHECK: ins {{v[0-9]+}}.b[0], {{v[0-9]+}}.b[15]
+  %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 15, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  ret <16 x i8> %vset_lane
+}
+
+define <8 x i8> @test_vdup_n_u8(i8 %v1) #0 {
+; CHECK-LABEL: test_vdup_n_u8:
+; CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
+  %vecinit.i = insertelement <8 x i8> undef, i8 %v1, i32 0
+  %vecinit1.i = insertelement <8 x i8> %vecinit.i, i8 %v1, i32 1
+  %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 %v1, i32 2
+  %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 %v1, i32 3
+  %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 %v1, i32 4
+  %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 %v1, i32 5
+  %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 %v1, i32 6
+  %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 %v1, i32 7
+  ret <8 x i8> %vecinit7.i
+}
+
+define <4 x i16> @test_vdup_n_u16(i16 %v1) #0 {
+; CHECK-LABEL: test_vdup_n_u16:
+; CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}}
+  %vecinit.i = insertelement <4 x i16> undef, i16 %v1, i32 0
+  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %v1, i32 1
+  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %v1, i32 2
+  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %v1, i32 3
+  ret <4 x i16> %vecinit3.i
+}
+
+define <2 x i32> @test_vdup_n_u32(i32 %v1) #0 {
+; CHECK-LABEL: test_vdup_n_u32:
+; CHECK: dup {{v[0-9]+}}.2s, {{w[0-9]+}}
+  %vecinit.i = insertelement <2 x i32> undef, i32 %v1, i32 0
+  %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %v1, i32 1
+  ret <2 x i32> %vecinit1.i
+}
+
+define <1 x i64> @test_vdup_n_u64(i64 %v1) #0 {
+; CHECK-LABEL: test_vdup_n_u64:
+; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+  %vecinit.i = insertelement <1 x i64> undef, i64 %v1, i32 0
+  ret <1 x i64> %vecinit.i
+}
+
+define <16 x i8> @test_vdupq_n_u8(i8 %v1) #0 {
+; CHECK-LABEL: test_vdupq_n_u8:
+; CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}}
+  %vecinit.i = insertelement <16 x i8> undef, i8 %v1, i32 0
+  %vecinit1.i = insertelement <16 x i8> %vecinit.i, i8 %v1, i32 1
+  %vecinit2.i = insertelement <16 x i8> %vecinit1.i, i8 %v1, i32 2
+  %vecinit3.i = insertelement <16 x i8> %vecinit2.i, i8 %v1, i32 3
+  %vecinit4.i = insertelement <16 x i8> %vecinit3.i, i8 %v1, i32 4
+  %vecinit5.i = insertelement <16 x i8> %vecinit4.i, i8 %v1, i32 5
+  %vecinit6.i = insertelement <16 x i8> %vecinit5.i, i8 %v1, i32 6
+  %vecinit7.i = insertelement <16 x i8> %vecinit6.i, i8 %v1, i32 7
+  %vecinit8.i = insertelement <16 x i8> %vecinit7.i, i8 %v1, i32 8
+  %vecinit9.i = insertelement <16 x i8> %vecinit8.i, i8 %v1, i32 9
+  %vecinit10.i = insertelement <16 x i8> %vecinit9.i, i8 %v1, i32 10
+  %vecinit11.i = insertelement <16 x i8> %vecinit10.i, i8 %v1, i32 11
+  %vecinit12.i = insertelement <16 x i8> %vecinit11.i, i8 %v1, i32 12
+  %vecinit13.i = insertelement <16 x i8> %vecinit12.i, i8 %v1, i32 13
+  %vecinit14.i = insertelement <16 x i8> %vecinit13.i, i8 %v1, i32 14
+  %vecinit15.i = insertelement <16 x i8> %vecinit14.i, i8 %v1, i32 15
+  ret <16 x i8> %vecinit15.i
+}
+
+define <8 x i16> @test_vdupq_n_u16(i16 %v1) #0 {
+; CHECK-LABEL: test_vdupq_n_u16:
+; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
+  %vecinit.i = insertelement <8 x i16> undef, i16 %v1, i32 0
+  %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %v1, i32 1
+  %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %v1, i32 2
+  %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %v1, i32 3
+  %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %v1, i32 4
+  %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %v1, i32 5
+  %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %v1, i32 6
+  %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %v1, i32 7
+  ret <8 x i16> %vecinit7.i
+}
+
+define <4 x i32> @test_vdupq_n_u32(i32 %v1) #0 {
+; CHECK-LABEL: test_vdupq_n_u32:
+; CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}}
+  %vecinit.i = insertelement <4 x i32> undef, i32 %v1, i32 0
+  %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %v1, i32 1
+  %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %v1, i32 2
+  %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %v1, i32 3
+  ret <4 x i32> %vecinit3.i
+}
+
+define <2 x i64> @test_vdupq_n_u64(i64 %v1) #0 {
+; CHECK-LABEL: test_vdupq_n_u64:
+; CHECK: dup {{v[0-9]+}}.2d, {{x[0-9]+}}
+  %vecinit.i = insertelement <2 x i64> undef, i64 %v1, i32 0
+  %vecinit1.i = insertelement <2 x i64> %vecinit.i, i64 %v1, i32 1
+  ret <2 x i64> %vecinit1.i
+}
+
+define <8 x i8> @test_vdup_lane_s8(<8 x i8> %v1) #0 {
+; CHECK-LABEL: test_vdup_lane_s8:
+; CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5]
+  %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+  ret <8 x i8> %shuffle
+}
+
+define <4 x i16> @test_vdup_lane_s16(<4 x i16> %v1) #0 {
+; CHECK-LABEL: test_vdup_lane_s16:
+; CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
+  %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i16> %shuffle
+}
+
+define <2 x i32> @test_vdup_lane_s32(<2 x i32> %v1) #0 {
+; CHECK-LABEL: test_vdup_lane_s32:
+; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+  %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  ret <2 x i32> %shuffle
+}
+
+define <16 x i8> @test_vdupq_lane_s8(<8 x i8> %v1) #0 {
+; CHECK-LABEL: test_vdupq_lane_s8:
+; CHECK: {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5]
+  %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+  ret <16 x i8> %shuffle
+}
+
+define <8 x i16> @test_vdupq_lane_s16(<4 x i16> %v1) #0 {
+; CHECK-LABEL: test_vdupq_lane_s16:
+; CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
+  %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  ret <8 x i16> %shuffle
+}
+
+define <4 x i32> @test_vdupq_lane_s32(<2 x i32> %v1) #0 {
+; CHECK-LABEL: test_vdupq_lane_s32:
+; CHECK: {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+  %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x i32> %shuffle
+}
+
+define <2 x i64> @test_vdupq_lane_s64(<1 x i64> %v1) #0 {
+; CHECK-LABEL: test_vdupq_lane_s64:
+; CHECK: {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+  %shuffle = shufflevector <1 x i64> %v1, <1 x i64> undef, <2 x i32> zeroinitializer
+  ret <2 x i64> %shuffle
+}
+
+define <8 x i8> @test_vdup_laneq_s8(<16 x i8> %v1) #0 {
+; CHECK-LABEL: test_vdup_laneq_s8:
+; CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5]
+  %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+  ret <8 x i8> %shuffle
+}
+
+define <4 x i16> @test_vdup_laneq_s16(<8 x i16> %v1) #0 {
+; CHECK-LABEL: test_vdup_laneq_s16:
+; CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
+  %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i16> %shuffle
+}
+
+define <2 x i32> @test_vdup_laneq_s32(<4 x i32> %v1) #0 {
+; CHECK-LABEL: test_vdup_laneq_s32:
+; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+  %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
+  ret <2 x i32> %shuffle
+}
+
+define <16 x i8> @test_vdupq_laneq_s8(<16 x i8> %v1) #0 {
+; CHECK-LABEL: test_vdupq_laneq_s8:
+; CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5]
+  %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+  ret <16 x i8> %shuffle
+}
+
+define <8 x i16> @test_vdupq_laneq_s16(<8 x i16> %v1) #0 {
+; CHECK-LABEL: test_vdupq_laneq_s16:
+; CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
+  %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  ret <8 x i16> %shuffle
+}
+
+define <4 x i32> @test_vdupq_laneq_s32(<4 x i32> %v1) #0 {
+; CHECK-LABEL: test_vdupq_laneq_s32:
+; CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+  %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x i32> %shuffle
+}
+
+define <2 x i64> @test_vdupq_laneq_s64(<2 x i64> %v1) #0 {
+; CHECK-LABEL: test_vdupq_laneq_s64:
+; CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
+  %shuffle = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
+  ret <2 x i64> %shuffle
+}
+
+define i64 @test_bitcastv8i8toi64(<8 x i8> %in) {
+; CHECK-LABEL: test_bitcastv8i8toi64:
+   %res = bitcast <8 x i8> %in to i64
+; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
+   ret i64 %res
+}
+
+define i64 @test_bitcastv4i16toi64(<4 x i16> %in) {
+; CHECK-LABEL: test_bitcastv4i16toi64:
+   %res = bitcast <4 x i16> %in to i64
+; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
+   ret i64 %res
+}
+
+define i64 @test_bitcastv2i32toi64(<2 x i32> %in) {
+; CHECK-LABEL: test_bitcastv2i32toi64:
+   %res = bitcast <2 x i32> %in to i64
+; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
+   ret i64 %res
+}
+
+define i64 @test_bitcastv2f32toi64(<2 x float> %in) {
+; CHECK-LABEL: test_bitcastv2f32toi64:
+   %res = bitcast <2 x float> %in to i64
+; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
+   ret i64 %res
+}
+
+define i64 @test_bitcastv1i64toi64(<1 x i64> %in) {
+; CHECK-LABEL: test_bitcastv1i64toi64:
+   %res = bitcast <1 x i64> %in to i64
+; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
+   ret i64 %res
+}
+
+define i64 @test_bitcastv1f64toi64(<1 x double> %in) {
+; CHECK-LABEL: test_bitcastv1f64toi64:
+   %res = bitcast <1 x double> %in to i64
+; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
+   ret i64 %res
+}
+
+define <8 x i8> @test_bitcasti64tov8i8(i64 %in) {
+; CHECK-LABEL: test_bitcasti64tov8i8:
+   %res = bitcast i64 %in to <8 x i8>
+; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+   ret <8 x i8> %res
+}
+
+define <4 x i16> @test_bitcasti64tov4i16(i64 %in) {
+; CHECK-LABEL: test_bitcasti64tov4i16:
+   %res = bitcast i64 %in to <4 x i16>
+; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+   ret <4 x i16> %res
+}
+
+define <2 x i32> @test_bitcasti64tov2i32(i64 %in) {
+; CHECK-LABEL: test_bitcasti64tov2i32:
+   %res = bitcast i64 %in to <2 x i32>
+; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+   ret <2 x i32> %res
+}
+
+define <2 x float> @test_bitcasti64tov2f32(i64 %in) {
+; CHECK-LABEL: test_bitcasti64tov2f32:
+   %res = bitcast i64 %in to <2 x float>
+; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+   ret <2 x float> %res
+}
+
+define <1 x i64> @test_bitcasti64tov1i64(i64 %in) {
+; CHECK-LABEL: test_bitcasti64tov1i64:
+   %res = bitcast i64 %in to <1 x i64>
+; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+   ret <1 x i64> %res
+}
+
+define <1 x double> @test_bitcasti64tov1f64(i64 %in) {
+; CHECK-LABEL: test_bitcasti64tov1f64:
+   %res = bitcast i64 %in to <1 x double>
+; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+   ret <1 x double> %res
+}
+
+define <1 x i64> @test_bitcastv8i8tov1f64(<8 x i8> %a) #0 {
+; CHECK-LABEL: test_bitcastv8i8tov1f64:
+; CHECK: neg {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}}
+  %sub.i = sub <8 x i8> zeroinitializer, %a
+  %1 = bitcast <8 x i8> %sub.i to <1 x double>
+  %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
+  ret <1 x i64> %vcvt.i
+}
+
+define <1 x i64> @test_bitcastv4i16tov1f64(<4 x i16> %a) #0 {
+; CHECK-LABEL: test_bitcastv4i16tov1f64:
+; CHECK: neg {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK-NEXT: fcvtzs {{[dx][0-9]+}}, {{d[0-9]+}}
+  %sub.i = sub <4 x i16> zeroinitializer, %a
+  %1 = bitcast <4 x i16> %sub.i to <1 x double>
+  %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
+  ret <1 x i64> %vcvt.i
+}
+
+define <1 x i64> @test_bitcastv2i32tov1f64(<2 x i32> %a) #0 {
+; CHECK-LABEL: test_bitcastv2i32tov1f64:
+; CHECK: neg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}}
+  %sub.i = sub <2 x i32> zeroinitializer, %a
+  %1 = bitcast <2 x i32> %sub.i to <1 x double>
+  %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
+  ret <1 x i64> %vcvt.i
+}
+
+define <1 x i64> @test_bitcastv1i64tov1f64(<1 x i64> %a) #0 {
+; CHECK-LABEL: test_bitcastv1i64tov1f64:
+; CHECK: neg {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK-NEXT: fcvtzs {{[dx][0-9]+}}, {{d[0-9]+}}
+  %sub.i = sub <1 x i64> zeroinitializer, %a
+  %1 = bitcast <1 x i64> %sub.i to <1 x double>
+  %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
+  ret <1 x i64> %vcvt.i
+}
+
+define <1 x i64> @test_bitcastv2f32tov1f64(<2 x float> %a) #0 {
+; CHECK-LABEL: test_bitcastv2f32tov1f64:
+; CHECK: fneg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}}
+  %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %a
+  %1 = bitcast <2 x float> %sub.i to <1 x double>
+  %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
+  ret <1 x i64> %vcvt.i
+}
+
+define <8 x i8> @test_bitcastv1f64tov8i8(<1 x i64> %a) #0 {
+; CHECK-LABEL: test_bitcastv1f64tov8i8:
+; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
+; CHECK-NEXT: neg {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+  %vcvt.i = sitofp <1 x i64> %a to <1 x double>
+  %1 = bitcast <1 x double> %vcvt.i to <8 x i8>
+  %sub.i = sub <8 x i8> zeroinitializer, %1
+  ret <8 x i8> %sub.i
+}
+
+define <4 x i16> @test_bitcastv1f64tov4i16(<1 x i64> %a) #0 {
+; CHECK-LABEL: test_bitcastv1f64tov4i16:
+; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
+; CHECK-NEXT: neg {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+  %vcvt.i = sitofp <1 x i64> %a to <1 x double>
+  %1 = bitcast <1 x double> %vcvt.i to <4 x i16>
+  %sub.i = sub <4 x i16> zeroinitializer, %1
+  ret <4 x i16> %sub.i
+}
+
+define <2 x i32> @test_bitcastv1f64tov2i32(<1 x i64> %a) #0 {
+; CHECK-LABEL: test_bitcastv1f64tov2i32:
+; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
+; CHECK-NEXT: neg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+  %vcvt.i = sitofp <1 x i64> %a to <1 x double>
+  %1 = bitcast <1 x double> %vcvt.i to <2 x i32>
+  %sub.i = sub <2 x i32> zeroinitializer, %1
+  ret <2 x i32> %sub.i
+}
+
+define <1 x i64> @test_bitcastv1f64tov1i64(<1 x i64> %a) #0 {
+; CHECK-LABEL: test_bitcastv1f64tov1i64:
+; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
+; CHECK-NEXT: neg {{d[0-9]+}}, {{d[0-9]+}}
+  %vcvt.i = sitofp <1 x i64> %a to <1 x double>
+  %1 = bitcast <1 x double> %vcvt.i to <1 x i64>
+  %sub.i = sub <1 x i64> zeroinitializer, %1
+  ret <1 x i64> %sub.i
+}
+
+define <2 x float> @test_bitcastv1f64tov2f32(<1 x i64> %a) #0 {
+; CHECK-LABEL: test_bitcastv1f64tov2f32:
+; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
+; CHECK-NEXT: fneg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+  %vcvt.i = sitofp <1 x i64> %a to <1 x double>
+  %1 = bitcast <1 x double> %vcvt.i to <2 x float>
+  %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %1
+  ret <2 x float> %sub.i
+}
+
+; Test insert element into an undef vector
+define <8 x i8> @scalar_to_vector.v8i8(i8 %a) {
+; CHECK-LABEL: scalar_to_vector.v8i8:
+; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
+  %b = insertelement <8 x i8> undef, i8 %a, i32 0
+  ret <8 x i8> %b
+}
+
+define <16 x i8> @scalar_to_vector.v16i8(i8 %a) {
+; CHECK-LABEL: scalar_to_vector.v16i8:
+; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
+  %b = insertelement <16 x i8> undef, i8 %a, i32 0
+  ret <16 x i8> %b
+}
+
+define <4 x i16> @scalar_to_vector.v4i16(i16 %a) {
+; CHECK-LABEL: scalar_to_vector.v4i16:
+; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
+  %b = insertelement <4 x i16> undef, i16 %a, i32 0
+  ret <4 x i16> %b
+}
+
+define <8 x i16> @scalar_to_vector.v8i16(i16 %a) {
+; CHECK-LABEL: scalar_to_vector.v8i16:
+; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
+  %b = insertelement <8 x i16> undef, i16 %a, i32 0
+  ret <8 x i16> %b
+}
+
+define <2 x i32> @scalar_to_vector.v2i32(i32 %a) {
+; CHECK-LABEL: scalar_to_vector.v2i32:
+; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
+  %b = insertelement <2 x i32> undef, i32 %a, i32 0
+  ret <2 x i32> %b
+}
+
+define <4 x i32> @scalar_to_vector.v4i32(i32 %a) {
+; CHECK-LABEL: scalar_to_vector.v4i32:
+; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
+  %b = insertelement <4 x i32> undef, i32 %a, i32 0
+  ret <4 x i32> %b
+}
+
+define <2 x i64> @scalar_to_vector.v2i64(i64 %a) {
+; CHECK-LABEL: scalar_to_vector.v2i64:
+; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+  %b = insertelement <2 x i64> undef, i64 %a, i32 0
+  ret <2 x i64> %b
+}
+
+define <8 x i8> @testDUP.v1i8(<1 x i8> %a) {
+; CHECK-LABEL: testDUP.v1i8:
+; CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
+  %b = extractelement <1 x i8> %a, i32 0
+  %c = insertelement <8 x i8> undef, i8 %b, i32 0
+  %d = insertelement <8 x i8> %c, i8 %b, i32 1
+  %e = insertelement <8 x i8> %d, i8 %b, i32 2
+  %f = insertelement <8 x i8> %e, i8 %b, i32 3
+  %g = insertelement <8 x i8> %f, i8 %b, i32 4
+  %h = insertelement <8 x i8> %g, i8 %b, i32 5
+  %i = insertelement <8 x i8> %h, i8 %b, i32 6
+  %j = insertelement <8 x i8> %i, i8 %b, i32 7
+  ret <8 x i8> %j
+}
+
+define <8 x i16> @testDUP.v1i16(<1 x i16> %a) {
+; CHECK-LABEL: testDUP.v1i16:
+; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
+  %b = extractelement <1 x i16> %a, i32 0
+  %c = insertelement <8 x i16> undef, i16 %b, i32 0
+  %d = insertelement <8 x i16> %c, i16 %b, i32 1
+  %e = insertelement <8 x i16> %d, i16 %b, i32 2
+  %f = insertelement <8 x i16> %e, i16 %b, i32 3
+  %g = insertelement <8 x i16> %f, i16 %b, i32 4
+  %h = insertelement <8 x i16> %g, i16 %b, i32 5
+  %i = insertelement <8 x i16> %h, i16 %b, i32 6
+  %j = insertelement <8 x i16> %i, i16 %b, i32 7
+  ret <8 x i16> %j
+}
+
+define <4 x i32> @testDUP.v1i32(<1 x i32> %a) {
+; CHECK-LABEL: testDUP.v1i32:
+; CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}}
+  %b = extractelement <1 x i32> %a, i32 0
+  %c = insertelement <4 x i32> undef, i32 %b, i32 0
+  %d = insertelement <4 x i32> %c, i32 %b, i32 1
+  %e = insertelement <4 x i32> %d, i32 %b, i32 2
+  %f = insertelement <4 x i32> %e, i32 %b, i32 3
+  ret <4 x i32> %f
+}
+
+define <8 x i8> @getl(<16 x i8> %x) #0 {
+; CHECK-LABEL: getl:
+; CHECK: ret
+  %vecext = extractelement <16 x i8> %x, i32 0
+  %vecinit = insertelement <8 x i8> undef, i8 %vecext, i32 0
+  %vecext1 = extractelement <16 x i8> %x, i32 1
+  %vecinit2 = insertelement <8 x i8> %vecinit, i8 %vecext1, i32 1
+  %vecext3 = extractelement <16 x i8> %x, i32 2
+  %vecinit4 = insertelement <8 x i8> %vecinit2, i8 %vecext3, i32 2
+  %vecext5 = extractelement <16 x i8> %x, i32 3
+  %vecinit6 = insertelement <8 x i8> %vecinit4, i8 %vecext5, i32 3
+  %vecext7 = extractelement <16 x i8> %x, i32 4
+  %vecinit8 = insertelement <8 x i8> %vecinit6, i8 %vecext7, i32 4
+  %vecext9 = extractelement <16 x i8> %x, i32 5
+  %vecinit10 = insertelement <8 x i8> %vecinit8, i8 %vecext9, i32 5
+  %vecext11 = extractelement <16 x i8> %x, i32 6
+  %vecinit12 = insertelement <8 x i8> %vecinit10, i8 %vecext11, i32 6
+  %vecext13 = extractelement <16 x i8> %x, i32 7
+  %vecinit14 = insertelement <8 x i8> %vecinit12, i8 %vecext13, i32 7
+  ret <8 x i8> %vecinit14
+}
+
+define <4 x i16> @test_dup_v2i32_v4i16(<2 x i32> %a) {
+; CHECK-LABEL: test_dup_v2i32_v4i16:
+; CHECK: dup v0.4h, v0.h[2]
+entry:
+  %x = extractelement <2 x i32> %a, i32 1
+  %vget_lane = trunc i32 %x to i16
+  %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
+  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
+  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
+  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
+  ret <4 x i16> %vecinit3.i
+}
+
+define <8 x i16> @test_dup_v4i32_v8i16(<4 x i32> %a) {
+; CHECK-LABEL: test_dup_v4i32_v8i16:
+; CHECK: dup v0.8h, v0.h[6]
+entry:
+  %x = extractelement <4 x i32> %a, i32 3
+  %vget_lane = trunc i32 %x to i16
+  %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0
+  %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1
+  %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2
+  %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3
+  %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4
+  %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5
+  %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6
+  %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7
+  ret <8 x i16> %vecinit7.i
+}
+
+define <4 x i16> @test_dup_v1i64_v4i16(<1 x i64> %a) {
+; CHECK-LABEL: test_dup_v1i64_v4i16:
+; CHECK: dup v0.4h, v0.h[0]
+entry:
+  %x = extractelement <1 x i64> %a, i32 0
+  %vget_lane = trunc i64 %x to i16
+  %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
+  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
+  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
+  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
+  ret <4 x i16> %vecinit3.i
+}
+
+define <2 x i32> @test_dup_v1i64_v2i32(<1 x i64> %a) {
+; CHECK-LABEL: test_dup_v1i64_v2i32:
+; CHECK: dup v0.2s, v0.s[0]
+entry:
+  %x = extractelement <1 x i64> %a, i32 0
+  %vget_lane = trunc i64 %x to i32
+  %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0
+  %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1
+  ret <2 x i32> %vecinit1.i
+}
+
+define <8 x i16> @test_dup_v2i64_v8i16(<2 x i64> %a) {
+; CHECK-LABEL: test_dup_v2i64_v8i16:
+; CHECK: dup v0.8h, v0.h[4]
+entry:
+  %x = extractelement <2 x i64> %a, i32 1
+  %vget_lane = trunc i64 %x to i16
+  %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0
+  %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1
+  %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2
+  %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3
+  %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4
+  %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5
+  %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6
+  %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7
+  ret <8 x i16> %vecinit7.i
+}
+
+define <4 x i32> @test_dup_v2i64_v4i32(<2 x i64> %a) {
+; CHECK-LABEL: test_dup_v2i64_v4i32:
+; CHECK: dup v0.4s, v0.s[2]
+entry:
+  %x = extractelement <2 x i64> %a, i32 1
+  %vget_lane = trunc i64 %x to i32
+  %vecinit.i = insertelement <4 x i32> undef, i32 %vget_lane, i32 0
+  %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %vget_lane, i32 1
+  %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %vget_lane, i32 2
+  %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %vget_lane, i32 3
+  ret <4 x i32> %vecinit3.i
+}
+
+define <4 x i16> @test_dup_v4i32_v4i16(<4 x i32> %a) {
+; CHECK-LABEL: test_dup_v4i32_v4i16:
+; CHECK: dup v0.4h, v0.h[2]
+entry:
+  %x = extractelement <4 x i32> %a, i32 1
+  %vget_lane = trunc i32 %x to i16
+  %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
+  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
+  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
+  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
+  ret <4 x i16> %vecinit3.i
+}
+
+define <4 x i16> @test_dup_v2i64_v4i16(<2 x i64> %a) {
+; CHECK-LABEL: test_dup_v2i64_v4i16:
+; CHECK: dup v0.4h, v0.h[0]
+entry:
+  %x = extractelement <2 x i64> %a, i32 0
+  %vget_lane = trunc i64 %x to i16
+  %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
+  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
+  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
+  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
+  ret <4 x i16> %vecinit3.i
+}
+
+define <2 x i32> @test_dup_v2i64_v2i32(<2 x i64> %a) {
+; CHECK-LABEL: test_dup_v2i64_v2i32:
+; CHECK: dup v0.2s, v0.s[0]
+entry:
+  %x = extractelement <2 x i64> %a, i32 0
+  %vget_lane = trunc i64 %x to i32
+  %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0
+  %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1
+  ret <2 x i32> %vecinit1.i
+}
+
+
+define <2 x float> @test_scalar_to_vector_f32_to_v2f32(<2 x float> %a) {
+; CHECK-LABEL: test_scalar_to_vector_f32_to_v2f32:
+; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s
+; CHECK-NEXT: ret
+entry:
+  %0 = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
+  %1 = insertelement <1 x float> undef, float %0, i32 0
+  %2 = extractelement <1 x float> %1, i32 0
+  %vecinit1.i = insertelement <2 x float> undef, float %2, i32 0
+  ret <2 x float> %vecinit1.i
+}
+
+define <4 x float> @test_scalar_to_vector_f32_to_v4f32(<2 x float> %a) {
+; CHECK-LABEL: test_scalar_to_vector_f32_to_v4f32:
+; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s
+; CHECK-NEXT: ret
+entry:
+  %0 = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
+  %1 = insertelement <1 x float> undef, float %0, i32 0
+  %2 = extractelement <1 x float> %1, i32 0
+  %vecinit1.i = insertelement <4 x float> undef, float %2, i32 0
+  ret <4 x float> %vecinit1.i
+}
+
+declare float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float>)
+
+define <2 x i32> @test_concat_undef_v1i32(<2 x i32> %a) {
+; CHECK-LABEL: test_concat_undef_v1i32:
+; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+entry:
+  %0 = extractelement <2 x i32> %a, i32 0
+  %vecinit1.i = insertelement <2 x i32> undef, i32 %0, i32 1
+  ret <2 x i32> %vecinit1.i
+}
+
+declare i32 @llvm.aarch64.neon.sqabs.i32(i32) #4
+
+define <2 x i32> @test_concat_v1i32_undef(i32 %a) {
+; CHECK-LABEL: test_concat_v1i32_undef:
+; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
+; CHECK-NEXT: ret
+entry:
+  %b = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
+  %vecinit.i432 = insertelement <2 x i32> undef, i32 %b, i32 0
+  ret <2 x i32> %vecinit.i432
+}
+
+define <2 x i32> @test_concat_same_v1i32_v1i32(<2 x i32> %a) {
+; CHECK-LABEL: test_concat_same_v1i32_v1i32:
+; CHECK: dup v{{[0-9]+}}.2s, v{{[0-9]+}}.s[0]
+entry:
+  %0 = extractelement <2 x i32> %a, i32 0
+  %vecinit.i = insertelement <2 x i32> undef, i32 %0, i32 0
+  %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %0, i32 1
+  ret <2 x i32> %vecinit1.i
+}
+
+define <2 x i32> @test_concat_diff_v1i32_v1i32(i32 %a, i32 %b) {
+; CHECK-LABEL: test_concat_diff_v1i32_v1i32:
+; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
+; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
+; CHECK-NEXT: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+entry:
+  %c = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
+  %d = insertelement <2 x i32> undef, i32 %c, i32 0
+  %e = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %b)
+  %f = insertelement <2 x i32> undef, i32 %e, i32 0
+  %h = shufflevector <2 x i32> %d, <2 x i32> %f, <2 x i32> <i32 0, i32 2>
+  ret <2 x i32> %h
+}
+
+define <16 x i8> @test_concat_v16i8_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) #0 {
+; CHECK-LABEL: test_concat_v16i8_v16i8_v16i8:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+  %vecinit30 = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+  ret <16 x i8> %vecinit30
+}
+
+define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 {
+; CHECK-LABEL: test_concat_v16i8_v8i8_v16i8:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+  %vecext = extractelement <8 x i8> %x, i32 0
+  %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
+  %vecext1 = extractelement <8 x i8> %x, i32 1
+  %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
+  %vecext3 = extractelement <8 x i8> %x, i32 2
+  %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
+  %vecext5 = extractelement <8 x i8> %x, i32 3
+  %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
+  %vecext7 = extractelement <8 x i8> %x, i32 4
+  %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
+  %vecext9 = extractelement <8 x i8> %x, i32 5
+  %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
+  %vecext11 = extractelement <8 x i8> %x, i32 6
+  %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
+  %vecext13 = extractelement <8 x i8> %x, i32 7
+  %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
+  %vecinit30 = shufflevector <16 x i8> %vecinit14, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+  ret <16 x i8> %vecinit30
+}
+
+define <16 x i8> @test_concat_v16i8_v16i8_v8i8(<16 x i8> %x, <8 x i8> %y) #0 {
+; CHECK-LABEL: test_concat_v16i8_v16i8_v8i8:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+  %vecext = extractelement <16 x i8> %x, i32 0
+  %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
+  %vecext1 = extractelement <16 x i8> %x, i32 1
+  %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
+  %vecext3 = extractelement <16 x i8> %x, i32 2
+  %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
+  %vecext5 = extractelement <16 x i8> %x, i32 3
+  %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
+  %vecext7 = extractelement <16 x i8> %x, i32 4
+  %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
+  %vecext9 = extractelement <16 x i8> %x, i32 5
+  %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
+  %vecext11 = extractelement <16 x i8> %x, i32 6
+  %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
+  %vecext13 = extractelement <16 x i8> %x, i32 7
+  %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
+  %vecext15 = extractelement <8 x i8> %y, i32 0
+  %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8
+  %vecext17 = extractelement <8 x i8> %y, i32 1
+  %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9
+  %vecext19 = extractelement <8 x i8> %y, i32 2
+  %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10
+  %vecext21 = extractelement <8 x i8> %y, i32 3
+  %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11
+  %vecext23 = extractelement <8 x i8> %y, i32 4
+  %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12
+  %vecext25 = extractelement <8 x i8> %y, i32 5
+  %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13
+  %vecext27 = extractelement <8 x i8> %y, i32 6
+  %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14
+  %vecext29 = extractelement <8 x i8> %y, i32 7
+  %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15
+  ret <16 x i8> %vecinit30
+}
+
+define <16 x i8> @test_concat_v16i8_v8i8_v8i8(<8 x i8> %x, <8 x i8> %y) #0 {
+; CHECK-LABEL: test_concat_v16i8_v8i8_v8i8:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+  %vecext = extractelement <8 x i8> %x, i32 0
+  %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
+  %vecext1 = extractelement <8 x i8> %x, i32 1
+  %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
+  %vecext3 = extractelement <8 x i8> %x, i32 2
+  %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
+  %vecext5 = extractelement <8 x i8> %x, i32 3
+  %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
+  %vecext7 = extractelement <8 x i8> %x, i32 4
+  %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
+  %vecext9 = extractelement <8 x i8> %x, i32 5
+  %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
+  %vecext11 = extractelement <8 x i8> %x, i32 6
+  %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
+  %vecext13 = extractelement <8 x i8> %x, i32 7
+  %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
+  %vecext15 = extractelement <8 x i8> %y, i32 0
+  %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8
+  %vecext17 = extractelement <8 x i8> %y, i32 1
+  %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9
+  %vecext19 = extractelement <8 x i8> %y, i32 2
+  %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10
+  %vecext21 = extractelement <8 x i8> %y, i32 3
+  %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11
+  %vecext23 = extractelement <8 x i8> %y, i32 4
+  %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12
+  %vecext25 = extractelement <8 x i8> %y, i32 5
+  %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13
+  %vecext27 = extractelement <8 x i8> %y, i32 6
+  %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14
+  %vecext29 = extractelement <8 x i8> %y, i32 7
+  %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15
+  ret <16 x i8> %vecinit30
+}
+
+define <8 x i16> @test_concat_v8i16_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) #0 {
+; CHECK-LABEL: test_concat_v8i16_v8i16_v8i16:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+  %vecinit14 = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+  ret <8 x i16> %vecinit14
+}
+
+define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 {
+; CHECK-LABEL: test_concat_v8i16_v4i16_v8i16:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+  %vecext = extractelement <4 x i16> %x, i32 0
+  %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
+  %vecext1 = extractelement <4 x i16> %x, i32 1
+  %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
+  %vecext3 = extractelement <4 x i16> %x, i32 2
+  %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
+  %vecext5 = extractelement <4 x i16> %x, i32 3
+  %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
+  %vecinit14 = shufflevector <8 x i16> %vecinit6, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+  ret <8 x i16> %vecinit14
+}
+
+define <8 x i16> @test_concat_v8i16_v8i16_v4i16(<8 x i16> %x, <4 x i16> %y) #0 {
+; CHECK-LABEL: test_concat_v8i16_v8i16_v4i16:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+  %vecext = extractelement <8 x i16> %x, i32 0
+  %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
+  %vecext1 = extractelement <8 x i16> %x, i32 1
+  %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
+  %vecext3 = extractelement <8 x i16> %x, i32 2
+  %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
+  %vecext5 = extractelement <8 x i16> %x, i32 3
+  %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
+  %vecext7 = extractelement <4 x i16> %y, i32 0
+  %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4
+  %vecext9 = extractelement <4 x i16> %y, i32 1
+  %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5
+  %vecext11 = extractelement <4 x i16> %y, i32 2
+  %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6
+  %vecext13 = extractelement <4 x i16> %y, i32 3
+  %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7
+  ret <8 x i16> %vecinit14
+}
+
+define <8 x i16> @test_concat_v8i16_v4i16_v4i16(<4 x i16> %x, <4 x i16> %y) #0 {
+; CHECK-LABEL: test_concat_v8i16_v4i16_v4i16:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+  %vecext = extractelement <4 x i16> %x, i32 0
+  %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
+  %vecext1 = extractelement <4 x i16> %x, i32 1
+  %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
+  %vecext3 = extractelement <4 x i16> %x, i32 2
+  %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
+  %vecext5 = extractelement <4 x i16> %x, i32 3
+  %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
+  %vecext7 = extractelement <4 x i16> %y, i32 0
+  %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4
+  %vecext9 = extractelement <4 x i16> %y, i32 1
+  %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5
+  %vecext11 = extractelement <4 x i16> %y, i32 2
+  %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6
+  %vecext13 = extractelement <4 x i16> %y, i32 3
+  %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7
+  ret <8 x i16> %vecinit14
+}
+
+define <4 x i32> @test_concat_v4i32_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) #0 {
+; CHECK-LABEL: test_concat_v4i32_v4i32_v4i32:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+  %vecinit6 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+  ret <4 x i32> %vecinit6
+}
+
+define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 {
+; CHECK-LABEL: test_concat_v4i32_v2i32_v4i32:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+  %vecext = extractelement <2 x i32> %x, i32 0
+  %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
+  %vecext1 = extractelement <2 x i32> %x, i32 1
+  %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
+  %vecinit6 = shufflevector <4 x i32> %vecinit2, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+  ret <4 x i32> %vecinit6
+}
+
+define <4 x i32> @test_concat_v4i32_v4i32_v2i32(<4 x i32> %x, <2 x i32> %y) #0 {
+; CHECK-LABEL: test_concat_v4i32_v4i32_v2i32:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+  %vecext = extractelement <4 x i32> %x, i32 0
+  %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
+  %vecext1 = extractelement <4 x i32> %x, i32 1
+  %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
+  %vecext3 = extractelement <2 x i32> %y, i32 0
+  %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2
+  %vecext5 = extractelement <2 x i32> %y, i32 1
+  %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3
+  ret <4 x i32> %vecinit6
+}
+
+define <4 x i32> @test_concat_v4i32_v2i32_v2i32(<2 x i32> %x, <2 x i32> %y) #0 {
+; CHECK-LABEL: test_concat_v4i32_v2i32_v2i32:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+  %vecinit6 = shufflevector <2 x i32> %x, <2 x i32> %y, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i32> %vecinit6
+}
+
+define <2 x i64> @test_concat_v2i64_v2i64_v2i64(<2 x i64> %x, <2 x i64> %y) #0 {
+; CHECK-LABEL: test_concat_v2i64_v2i64_v2i64:
+; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %vecinit2 = shufflevector <2 x i64> %x, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
+  ret <2 x i64> %vecinit2
+}
+
+define <2 x i64> @test_concat_v2i64_v1i64_v2i64(<1 x i64> %x, <2 x i64> %y) #0 {
+; CHECK-LABEL: test_concat_v2i64_v1i64_v2i64:
+; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+entry:
+  %vecext = extractelement <1 x i64> %x, i32 0
+  %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
+  %vecinit2 = shufflevector <2 x i64> %vecinit, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
+  ret <2 x i64> %vecinit2
+}
+
+define <2 x i64> @test_concat_v2i64_v2i64_v1i64(<2 x i64> %x, <1 x i64> %y) #0 {
+; CHECK-LABEL: test_concat_v2i64_v2i64_v1i64:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+  %vecext = extractelement <2 x i64> %x, i32 0
+  %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
+  %vecext1 = extractelement <1 x i64> %y, i32 0
+  %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
+  ret <2 x i64> %vecinit2
+}
+
+define <2 x i64> @test_concat_v2i64_v1i64_v1i64(<1 x i64> %x, <1 x i64> %y) #0 {
+; CHECK-LABEL: test_concat_v2i64_v1i64_v1i64:
+; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+entry:
+  %vecext = extractelement <1 x i64> %x, i32 0
+  %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
+  %vecext1 = extractelement <1 x i64> %y, i32 0
+  %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
+  ret <2 x i64> %vecinit2
+}
+
+
+define <4 x i16> @concat_vector_v4i16_const() {
+; CHECK-LABEL: concat_vector_v4i16_const:
+; CHECK: movi {{d[0-9]+}}, #0
+ %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <4 x i32> zeroinitializer
+ ret <4 x i16> %r
+}
+
+define <4 x i16> @concat_vector_v4i16_const_one() {
+; CHECK-LABEL: concat_vector_v4i16_const_one:
+; CHECK: movi {{v[0-9]+}}.4h, #0x1
+ %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <4 x i32> zeroinitializer
+ ret <4 x i16> %r
+}
+
+define <4 x i32> @concat_vector_v4i32_const() {
+; CHECK-LABEL: concat_vector_v4i32_const:
+; CHECK: movi {{v[0-9]+}}.2d, #0
+ %r = shufflevector <1 x i32> zeroinitializer, <1 x i32> undef, <4 x i32> zeroinitializer
+ ret <4 x i32> %r
+}
+
+define <8 x i8> @concat_vector_v8i8_const() {
+; CHECK-LABEL: concat_vector_v8i8_const:
+; CHECK: movi {{d[0-9]+}}, #0
+ %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <8 x i32> zeroinitializer
+ ret <8 x i8> %r
+}
+
+define <8 x i16> @concat_vector_v8i16_const() {
+; CHECK-LABEL: concat_vector_v8i16_const:
+; CHECK: movi {{v[0-9]+}}.2d, #0
+ %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <8 x i32> zeroinitializer
+ ret <8 x i16> %r
+}
+
+define <8 x i16> @concat_vector_v8i16_const_one() {
+; CHECK-LABEL: concat_vector_v8i16_const_one:
+; CHECK: movi {{v[0-9]+}}.8h, #0x1
+ %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <8 x i32> zeroinitializer
+ ret <8 x i16> %r
+}
+
+define <16 x i8> @concat_vector_v16i8_const() {
+; CHECK-LABEL: concat_vector_v16i8_const:
+; CHECK: movi {{v[0-9]+}}.2d, #0
+ %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <16 x i32> zeroinitializer
+ ret <16 x i8> %r
+}
+
+define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) {
+; CHECK-LABEL: concat_vector_v4i16:
+; CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}}
+ %r = shufflevector <1 x i16> %a, <1 x i16> undef, <4 x i32> zeroinitializer
+ ret <4 x i16> %r
+}
+
+define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) {
+; CHECK-LABEL: concat_vector_v4i32:
+; CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}}
+ %r = shufflevector <1 x i32> %a, <1 x i32> undef, <4 x i32> zeroinitializer
+ ret <4 x i32> %r
+}
+
+define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) {
+; CHECK-LABEL: concat_vector_v8i8:
+; CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
+ %r = shufflevector <1 x i8> %a, <1 x i8> undef, <8 x i32> zeroinitializer
+ ret <8 x i8> %r
+}
+
+define <8 x i16> @concat_vector_v8i16(<1 x i16> %a) {
+; CHECK-LABEL: concat_vector_v8i16:
+; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
+ %r = shufflevector <1 x i16> %a, <1 x i16> undef, <8 x i32> zeroinitializer
+ ret <8 x i16> %r
+}
+
+define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) {
+; CHECK-LABEL: concat_vector_v16i8:
+; CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}}
+ %r = shufflevector <1 x i8> %a, <1 x i8> undef, <16 x i32> zeroinitializer
+ ret <16 x i8> %r
+}
diff --git a/test/CodeGen/AArch64/arm64-neon-copyPhysReg-tuple.ll b/test/CodeGen/AArch64/arm64-neon-copyPhysReg-tuple.ll
new file mode 100644
index 0000000..276ac13
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-neon-copyPhysReg-tuple.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
+; arm64 has a separate copy due to intrinsics
+
+define <4 x i32> @copyTuple.QPair(i32* %a, i32* %b) {
+; CHECK-LABEL: copyTuple.QPair:
+; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+; CHECK: ld2 { {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x{{[0-9]+|sp}}]
+entry:
+  %vld = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 2, i32 2, i32 2, i32 2>, i64 1, i32* %a)
+  %extract = extractvalue { <4 x i32>, <4 x i32> } %vld, 0
+  %vld1 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32> %extract, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i64 1, i32* %b)
+  %vld1.fca.0.extract = extractvalue { <4 x i32>, <4 x i32> } %vld1, 0
+  ret <4 x i32> %vld1.fca.0.extract
+}
+
+define <4 x i32> @copyTuple.QTriple(i32* %a, i32* %b, <4 x i32> %c) {
+; CHECK-LABEL: copyTuple.QTriple:
+; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+; CHECK: ld3 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x{{[0-9]+|sp}}]
+entry:
+  %vld = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %c, <4 x i32> %c, i64 1, i32* %a)
+  %extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld, 0
+  %vld1 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32> %extract, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %c, i64 1, i32* %b)
+  %vld1.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld1, 0
+  ret <4 x i32> %vld1.fca.0.extract
+}
+
+define <4 x i32> @copyTuple.QQuad(i32* %a, i32* %b, <4 x i32> %c) {
+; CHECK-LABEL: copyTuple.QQuad:
+; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
+; CHECK: ld4 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x{{[0-9]+|sp}}]
+entry:
+  %vld = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %c, <4 x i32> %c, <4 x i32> %c, i64 1, i32* %a)
+  %extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld, 0
+  %vld1 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32> %extract, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %c, <4 x i32> %c, i64 1, i32* %b)
+  %vld1.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld1, 0
+  ret <4 x i32> %vld1.fca.0.extract
+}
+
+declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*)
+declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*)
+declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*)
diff --git a/test/CodeGen/AArch64/arm64-neon-mul-div.ll b/test/CodeGen/AArch64/arm64-neon-mul-div.ll
new file mode 100644
index 0000000..720f3eb
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-neon-mul-div.ll
@@ -0,0 +1,797 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
+; arm64 has its own copy of this because of the intrinsics
+
+define <8 x i8> @mul8xi8(<8 x i8> %A, <8 x i8> %B) {
+; CHECK-LABEL: mul8xi8:
+; CHECK: mul {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+	%tmp3 = mul <8 x i8> %A, %B;
+	ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @mul16xi8(<16 x i8> %A, <16 x i8> %B) {
+; CHECK-LABEL: mul16xi8:
+; CHECK: mul {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+	%tmp3 = mul <16 x i8> %A, %B;
+	ret <16 x i8> %tmp3
+}
+
+define <4 x i16> @mul4xi16(<4 x i16> %A, <4 x i16> %B) {
+; CHECK-LABEL: mul4xi16:
+; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+	%tmp3 = mul <4 x i16> %A, %B;
+	ret <4 x i16> %tmp3
+}
+
+define <8 x i16> @mul8xi16(<8 x i16> %A, <8 x i16> %B) {
+; CHECK-LABEL: mul8xi16:
+; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+	%tmp3 = mul <8 x i16> %A, %B;
+	ret <8 x i16> %tmp3
+}
+
+define <2 x i32> @mul2xi32(<2 x i32> %A, <2 x i32> %B) {
+; CHECK-LABEL: mul2xi32:
+; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+	%tmp3 = mul <2 x i32> %A, %B;
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @mul4x32(<4 x i32> %A, <4 x i32> %B) {
+; CHECK-LABEL: mul4x32:
+; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+	%tmp3 = mul <4 x i32> %A, %B;
+	ret <4 x i32> %tmp3
+}
+
+define <1 x i64> @mul1xi64(<1 x i64> %A, <1 x i64> %B) {
+; CHECK-LABEL: mul1xi64:
+; CHECK: mul x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}}
+  %tmp3 = mul <1 x i64> %A, %B;
+  ret <1 x i64> %tmp3
+}
+
+define <2 x i64> @mul2xi64(<2 x i64> %A, <2 x i64> %B) {
+; CHECK-LABEL: mul2xi64:
+; CHECK: mul x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}}
+; CHECK: mul x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}}
+  %tmp3 = mul <2 x i64> %A, %B;
+  ret <2 x i64> %tmp3
+}
+
+ define <2 x float> @mul2xfloat(<2 x float> %A, <2 x float> %B) {
+; CHECK-LABEL: mul2xfloat:
+; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+	%tmp3 = fmul <2 x float> %A, %B;
+	ret <2 x float> %tmp3
+}
+
+define <4 x float> @mul4xfloat(<4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: mul4xfloat:
+; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+	%tmp3 = fmul <4 x float> %A, %B;
+	ret <4 x float> %tmp3
+}
+define <2 x double> @mul2xdouble(<2 x double> %A, <2 x double> %B) {
+; CHECK-LABEL: mul2xdouble:
+; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+	%tmp3 = fmul <2 x double> %A, %B;
+	ret <2 x double> %tmp3
+}
+
+
+ define <2 x float> @div2xfloat(<2 x float> %A, <2 x float> %B) {
+; CHECK-LABEL: div2xfloat:
+; CHECK: fdiv {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+	%tmp3 = fdiv <2 x float> %A, %B;
+	ret <2 x float> %tmp3
+}
+
+define <4 x float> @div4xfloat(<4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: div4xfloat:
+; CHECK: fdiv {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+	%tmp3 = fdiv <4 x float> %A, %B;
+	ret <4 x float> %tmp3
+}
+define <2 x double> @div2xdouble(<2 x double> %A, <2 x double> %B) {
+; CHECK-LABEL: div2xdouble:
+; CHECK: fdiv {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+	%tmp3 = fdiv <2 x double> %A, %B;
+	ret <2 x double> %tmp3
+}
+
+define <1 x i8> @sdiv1x8(<1 x i8> %A, <1 x i8> %B) {
+; CHECK-LABEL: sdiv1x8:
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+	%tmp3 = sdiv <1 x i8> %A, %B;
+	ret <1 x i8> %tmp3
+}
+
+define <8 x i8> @sdiv8x8(<8 x i8> %A, <8 x i8> %B) {
+; CHECK-LABEL: sdiv8x8:
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+	%tmp3 = sdiv <8 x i8> %A, %B;
+	ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @sdiv16x8(<16 x i8> %A, <16 x i8> %B) {
+; CHECK-LABEL: sdiv16x8:
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+	%tmp3 = sdiv <16 x i8> %A, %B;
+	ret <16 x i8> %tmp3
+}
+
+define <1 x i16> @sdiv1x16(<1 x i16> %A, <1 x i16> %B) {
+; CHECK-LABEL: sdiv1x16:
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+	%tmp3 = sdiv <1 x i16> %A, %B;
+	ret <1 x i16> %tmp3
+}
+
+define <4 x i16> @sdiv4x16(<4 x i16> %A, <4 x i16> %B) {
+; CHECK-LABEL: sdiv4x16:
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+	%tmp3 = sdiv <4 x i16> %A, %B;
+	ret <4 x i16> %tmp3
+}
+
+define <8 x i16> @sdiv8x16(<8 x i16> %A, <8 x i16> %B) {
+; CHECK-LABEL: sdiv8x16:
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+	%tmp3 = sdiv <8 x i16> %A, %B;
+	ret <8 x i16> %tmp3
+}
+
+define <1 x i32> @sdiv1x32(<1 x i32> %A, <1 x i32> %B) {
+; CHECK-LABEL: sdiv1x32:
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+	%tmp3 = sdiv <1 x i32> %A, %B;
+	ret <1 x i32> %tmp3
+}
+
+define <2 x i32> @sdiv2x32(<2 x i32> %A, <2 x i32> %B) {
+; CHECK-LABEL: sdiv2x32:
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+	%tmp3 = sdiv <2 x i32> %A, %B;
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @sdiv4x32(<4 x i32> %A, <4 x i32> %B) {
+; CHECK-LABEL: sdiv4x32:
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+	%tmp3 = sdiv <4 x i32> %A, %B;
+	ret <4 x i32> %tmp3
+}
+
+define <1 x i64> @sdiv1x64(<1 x i64> %A, <1 x i64> %B) {
+; CHECK-LABEL: sdiv1x64:
+; CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+	%tmp3 = sdiv <1 x i64> %A, %B;
+	ret <1 x i64> %tmp3
+}
+
+define <2 x i64> @sdiv2x64(<2 x i64> %A, <2 x i64> %B) {
+; CHECK-LABEL: sdiv2x64:
+; CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+; CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+	%tmp3 = sdiv <2 x i64> %A, %B;
+	ret <2 x i64> %tmp3
+}
+
+define <1 x i8> @udiv1x8(<1 x i8> %A, <1 x i8> %B) {
+; CHECK-LABEL: udiv1x8:
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+	%tmp3 = udiv <1 x i8> %A, %B;
+	ret <1 x i8> %tmp3
+}
+
+define <8 x i8> @udiv8x8(<8 x i8> %A, <8 x i8> %B) {
+; CHECK-LABEL: udiv8x8:
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+	%tmp3 = udiv <8 x i8> %A, %B;
+	ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @udiv16x8(<16 x i8> %A, <16 x i8> %B) {
+; CHECK-LABEL: udiv16x8:
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+	%tmp3 = udiv <16 x i8> %A, %B;
+	ret <16 x i8> %tmp3
+}
+
+define <1 x i16> @udiv1x16(<1 x i16> %A, <1 x i16> %B) {
+; CHECK-LABEL: udiv1x16:
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+	%tmp3 = udiv <1 x i16> %A, %B;
+	ret <1 x i16> %tmp3
+}
+
+define <4 x i16> @udiv4x16(<4 x i16> %A, <4 x i16> %B) {
+; CHECK-LABEL: udiv4x16:
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+	%tmp3 = udiv <4 x i16> %A, %B;
+	ret <4 x i16> %tmp3
+}
+
+define <8 x i16> @udiv8x16(<8 x i16> %A, <8 x i16> %B) {
+; CHECK-LABEL: udiv8x16:
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+	%tmp3 = udiv <8 x i16> %A, %B;
+	ret <8 x i16> %tmp3
+}
+
+define <1 x i32> @udiv1x32(<1 x i32> %A, <1 x i32> %B) {
+; CHECK-LABEL: udiv1x32:
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+	%tmp3 = udiv <1 x i32> %A, %B;
+	ret <1 x i32> %tmp3
+}
+
+define <2 x i32> @udiv2x32(<2 x i32> %A, <2 x i32> %B) {
+; CHECK-LABEL: udiv2x32:
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+	%tmp3 = udiv <2 x i32> %A, %B;
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @udiv4x32(<4 x i32> %A, <4 x i32> %B) {
+; CHECK-LABEL: udiv4x32:
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+	%tmp3 = udiv <4 x i32> %A, %B;
+	ret <4 x i32> %tmp3
+}
+
+define <1 x i64> @udiv1x64(<1 x i64> %A, <1 x i64> %B) {
+; CHECK-LABEL: udiv1x64:
+; CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+	%tmp3 = udiv <1 x i64> %A, %B;
+	ret <1 x i64> %tmp3
+}
+
+define <2 x i64> @udiv2x64(<2 x i64> %A, <2 x i64> %B) {
+; CHECK-LABEL: udiv2x64:
+; CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+; CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+	%tmp3 = udiv <2 x i64> %A, %B;
+	ret <2 x i64> %tmp3
+}
+
+define <1 x i8> @srem1x8(<1 x i8> %A, <1 x i8> %B) {
+; CHECK-LABEL: srem1x8:
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+	%tmp3 = srem <1 x i8> %A, %B;
+	ret <1 x i8> %tmp3
+}
+
+define <8 x i8> @srem8x8(<8 x i8> %A, <8 x i8> %B) {
+; CHECK-LABEL: srem8x8:
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+	%tmp3 = srem <8 x i8> %A, %B;
+	ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @srem16x8(<16 x i8> %A, <16 x i8> %B) {
+; CHECK-LABEL: srem16x8:
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+	%tmp3 = srem <16 x i8> %A, %B;
+	ret <16 x i8> %tmp3
+}
+
+define <1 x i16> @srem1x16(<1 x i16> %A, <1 x i16> %B) {
+; CHECK-LABEL: srem1x16:
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+	%tmp3 = srem <1 x i16> %A, %B;
+	ret <1 x i16> %tmp3
+}
+
+define <4 x i16> @srem4x16(<4 x i16> %A, <4 x i16> %B) {
+; CHECK-LABEL: srem4x16:
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+	%tmp3 = srem <4 x i16> %A, %B;
+	ret <4 x i16> %tmp3
+}
+
+define <8 x i16> @srem8x16(<8 x i16> %A, <8 x i16> %B) {
+; CHECK-LABEL: srem8x16:
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+	%tmp3 = srem <8 x i16> %A, %B;
+	ret <8 x i16> %tmp3
+}
+
+define <1 x i32> @srem1x32(<1 x i32> %A, <1 x i32> %B) {
+; CHECK-LABEL: srem1x32:
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+	%tmp3 = srem <1 x i32> %A, %B;
+	ret <1 x i32> %tmp3
+}
+
+define <2 x i32> @srem2x32(<2 x i32> %A, <2 x i32> %B) {
+; CHECK-LABEL: srem2x32:
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+	%tmp3 = srem <2 x i32> %A, %B;
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @srem4x32(<4 x i32> %A, <4 x i32> %B) {
+; CHECK-LABEL: srem4x32:
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+	%tmp3 = srem <4 x i32> %A, %B;
+	ret <4 x i32> %tmp3
+}
+
+define <1 x i64> @srem1x64(<1 x i64> %A, <1 x i64> %B) {
+; CHECK-LABEL: srem1x64:
+; CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+; CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+	%tmp3 = srem <1 x i64> %A, %B;
+	ret <1 x i64> %tmp3
+}
+
+define <2 x i64> @srem2x64(<2 x i64> %A, <2 x i64> %B) {
+; CHECK-LABEL: srem2x64:
+; CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+; CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+; CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+; CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+	%tmp3 = srem <2 x i64> %A, %B;
+	ret <2 x i64> %tmp3
+}
+
+define <1 x i8> @urem1x8(<1 x i8> %A, <1 x i8> %B) {
+; CHECK-LABEL: urem1x8:
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+	%tmp3 = urem <1 x i8> %A, %B;
+	ret <1 x i8> %tmp3
+}
+
+define <8 x i8> @urem8x8(<8 x i8> %A, <8 x i8> %B) {
+; CHECK-LABEL: urem8x8:
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+	%tmp3 = urem <8 x i8> %A, %B;
+	ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @urem16x8(<16 x i8> %A, <16 x i8> %B) {
+; CHECK-LABEL: urem16x8:
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+	%tmp3 = urem <16 x i8> %A, %B;
+	ret <16 x i8> %tmp3
+}
+
+define <1 x i16> @urem1x16(<1 x i16> %A, <1 x i16> %B) {
+; CHECK-LABEL: urem1x16:
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+	%tmp3 = urem <1 x i16> %A, %B;
+	ret <1 x i16> %tmp3
+}
+
+define <4 x i16> @urem4x16(<4 x i16> %A, <4 x i16> %B) {
+; CHECK-LABEL: urem4x16:
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+	%tmp3 = urem <4 x i16> %A, %B;
+	ret <4 x i16> %tmp3
+}
+
+define <8 x i16> @urem8x16(<8 x i16> %A, <8 x i16> %B) {
+; CHECK-LABEL: urem8x16:
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+	%tmp3 = urem <8 x i16> %A, %B;
+	ret <8 x i16> %tmp3
+}
+
+define <1 x i32> @urem1x32(<1 x i32> %A, <1 x i32> %B) {
+; CHECK-LABEL: urem1x32:
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+	%tmp3 = urem <1 x i32> %A, %B;
+	ret <1 x i32> %tmp3
+}
+
+define <2 x i32> @urem2x32(<2 x i32> %A, <2 x i32> %B) {
+; CHECK-LABEL: urem2x32:
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+	%tmp3 = urem <2 x i32> %A, %B;
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @urem4x32(<4 x i32> %A, <4 x i32> %B) {
+; CHECK-LABEL: urem4x32:
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+	%tmp3 = urem <4 x i32> %A, %B;
+	ret <4 x i32> %tmp3
+}
+
+define <1 x i64> @urem1x64(<1 x i64> %A, <1 x i64> %B) {
+; CHECK-LABEL: urem1x64:
+; CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+; CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+	%tmp3 = urem <1 x i64> %A, %B;
+	ret <1 x i64> %tmp3
+}
+
+define <2 x i64> @urem2x64(<2 x i64> %A, <2 x i64> %B) {
+; CHECK-LABEL: urem2x64:
+; CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+; CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+; CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+; CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+	%tmp3 = urem <2 x i64> %A, %B;
+	ret <2 x i64> %tmp3
+}
+
+define <2 x float> @frem2f32(<2 x float> %A, <2 x float> %B) {
+; CHECK-LABEL: frem2f32:
+; CHECK: bl fmodf
+; CHECK: bl fmodf
+	%tmp3 = frem <2 x float> %A, %B;
+	ret <2 x float> %tmp3
+}
+
+define <4 x float> @frem4f32(<4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: frem4f32:
+; CHECK: bl fmodf
+; CHECK: bl fmodf
+; CHECK: bl fmodf
+; CHECK: bl fmodf
+	%tmp3 = frem <4 x float> %A, %B;
+	ret <4 x float> %tmp3
+}
+
+define <1 x double> @frem1d64(<1 x double> %A, <1 x double> %B) {
+; CHECK-LABEL: frem1d64:
+; CHECK: bl fmod
+	%tmp3 = frem <1 x double> %A, %B;
+	ret <1 x double> %tmp3
+}
+
+define <2 x double> @frem2d64(<2 x double> %A, <2 x double> %B) {
+; CHECK-LABEL: frem2d64:
+; CHECK: bl fmod
+; CHECK: bl fmod
+	%tmp3 = frem <2 x double> %A, %B;
+	ret <2 x double> %tmp3
+}
+
+declare <8 x i8> @llvm.aarch64.neon.pmul.v8i8(<8 x i8>, <8 x i8>)
+declare <16 x i8> @llvm.aarch64.neon.pmul.v16i8(<16 x i8>, <16 x i8>)
+
+define <8 x i8> @poly_mulv8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
+; CHECK-LABEL: poly_mulv8i8:
+   %prod = call <8 x i8> @llvm.aarch64.neon.pmul.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
+; CHECK: pmul v0.8b, v0.8b, v1.8b
+   ret <8 x i8> %prod
+}
+
+define <16 x i8> @poly_mulv16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
+; CHECK-LABEL: poly_mulv16i8:
+   %prod = call <16 x i8> @llvm.aarch64.neon.pmul.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
+; CHECK: pmul v0.16b, v0.16b, v1.16b
+   ret <16 x i8> %prod
+}
+
+declare <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16>, <4 x i16>)
+declare <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16>, <8 x i16>)
+declare <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32>, <2 x i32>)
+declare <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32>, <4 x i32>)
+
+define <4 x i16> @test_sqdmulh_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK-LABEL: test_sqdmulh_v4i16:
+   %prod = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
+; CHECK: sqdmulh v0.4h, v0.4h, v1.4h
+   ret <4 x i16> %prod
+}
+
+define <8 x i16> @test_sqdmulh_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
+; CHECK-LABEL: test_sqdmulh_v8i16:
+   %prod = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
+; CHECK: sqdmulh v0.8h, v0.8h, v1.8h
+   ret <8 x i16> %prod
+}
+
+define <2 x i32> @test_sqdmulh_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
+; CHECK-LABEL: test_sqdmulh_v2i32:
+   %prod = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
+; CHECK: sqdmulh v0.2s, v0.2s, v1.2s
+   ret <2 x i32> %prod
+}
+
+define <4 x i32> @test_sqdmulh_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
+; CHECK-LABEL: test_sqdmulh_v4i32:
+   %prod = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
+; CHECK: sqdmulh v0.4s, v0.4s, v1.4s
+   ret <4 x i32> %prod
+}
+
+declare <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16>, <4 x i16>)
+declare <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16>, <8 x i16>)
+declare <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32>, <2 x i32>)
+declare <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32>, <4 x i32>)
+
+define <4 x i16> @test_sqrdmulh_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK-LABEL: test_sqrdmulh_v4i16:
+   %prod = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
+; CHECK: sqrdmulh v0.4h, v0.4h, v1.4h
+   ret <4 x i16> %prod
+}
+
+define <8 x i16> @test_sqrdmulh_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
+; CHECK-LABEL: test_sqrdmulh_v8i16:
+   %prod = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
+; CHECK: sqrdmulh v0.8h, v0.8h, v1.8h
+   ret <8 x i16> %prod
+}
+
+define <2 x i32> @test_sqrdmulh_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
+; CHECK-LABEL: test_sqrdmulh_v2i32:
+   %prod = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
+; CHECK: sqrdmulh v0.2s, v0.2s, v1.2s
+   ret <2 x i32> %prod
+}
+
+define <4 x i32> @test_sqrdmulh_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
+; CHECK-LABEL: test_sqrdmulh_v4i32:
+   %prod = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
+; CHECK: sqrdmulh v0.4s, v0.4s, v1.4s
+   ret <4 x i32> %prod
+}
+
+declare <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float>, <2 x float>)
+declare <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float>, <4 x float>)
+declare <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double>, <2 x double>)
+
+define <2 x float> @fmulx_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
+; CHECK-LABEL: fmulx_v2f32:
+; Using registers other than v0, v1 and v2 are possible, but would be odd.
+; CHECK: fmulx v0.2s, v0.2s, v1.2s
+        %val = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %lhs, <2 x float> %rhs)
+        ret <2 x float> %val
+}
+
+define <4 x float> @fmulx_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
+; CHECK-LABEL: fmulx_v4f32:
+; Using registers other than v0, v1 and v2 are possible, but would be odd.
+; CHECK: fmulx v0.4s, v0.4s, v1.4s
+        %val = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %lhs, <4 x float> %rhs)
+        ret <4 x float> %val
+}
+
+define <2 x double> @fmulx_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
+; CHECK-LABEL: fmulx_v2f64:
+; Using registers other than v0, v1 and v2 are possible, but would be odd.
+; CHECK: fmulx v0.2d, v0.2d, v1.2d
+        %val = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %lhs, <2 x double> %rhs)
+        ret <2 x double> %val
+}
+
diff --git a/test/CodeGen/AArch64/arm64-neon-scalar-by-elem-mul.ll b/test/CodeGen/AArch64/arm64-neon-scalar-by-elem-mul.ll
new file mode 100644
index 0000000..92ed239
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-neon-scalar-by-elem-mul.ll
@@ -0,0 +1,124 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
+
+define float @test_fmul_lane_ss2S(float %a, <2 x float> %v) {
+  ; CHECK-LABEL: test_fmul_lane_ss2S
+  ; CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+  %tmp1 = extractelement <2 x float> %v, i32 1
+  %tmp2 = fmul float %a, %tmp1;
+  ret float %tmp2;
+}
+
+define float @test_fmul_lane_ss2S_swap(float %a, <2 x float> %v) {
+  ; CHECK-LABEL: test_fmul_lane_ss2S_swap
+  ; CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+  %tmp1 = extractelement <2 x float> %v, i32 1
+  %tmp2 = fmul float %tmp1, %a;
+  ret float %tmp2;
+}
+
+
+define float @test_fmul_lane_ss4S(float %a, <4 x float> %v) {
+  ; CHECK-LABEL: test_fmul_lane_ss4S
+  ; CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
+  %tmp1 = extractelement <4 x float> %v, i32 3
+  %tmp2 = fmul float %a, %tmp1;
+  ret float %tmp2;
+}
+
+define float @test_fmul_lane_ss4S_swap(float %a, <4 x float> %v) {
+  ; CHECK-LABEL: test_fmul_lane_ss4S_swap
+  ; CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
+  %tmp1 = extractelement <4 x float> %v, i32 3
+  %tmp2 = fmul float %tmp1, %a;
+  ret float %tmp2;
+}
+
+
+define double @test_fmul_lane_ddD(double %a, <1 x double> %v) {
+  ; CHECK-LABEL: test_fmul_lane_ddD
+  ; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+.d\[0]|d[0-9]+}}
+  %tmp1 = extractelement <1 x double> %v, i32 0
+  %tmp2 = fmul double %a, %tmp1;
+  ret double %tmp2;
+}
+
+
+
+define double @test_fmul_lane_dd2D(double %a, <2 x double> %v) {
+  ; CHECK-LABEL: test_fmul_lane_dd2D
+  ; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
+  %tmp1 = extractelement <2 x double> %v, i32 1
+  %tmp2 = fmul double %a, %tmp1;
+  ret double %tmp2;
+}
+
+
+define double @test_fmul_lane_dd2D_swap(double %a, <2 x double> %v) {
+  ; CHECK-LABEL: test_fmul_lane_dd2D_swap
+  ; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
+  %tmp1 = extractelement <2 x double> %v, i32 1
+  %tmp2 = fmul double %tmp1, %a;
+  ret double %tmp2;
+}
+
+declare float @llvm.aarch64.neon.fmulx.f32(float, float)
+
+define float @test_fmulx_lane_f32(float %a, <2 x float> %v) {
+  ; CHECK-LABEL: test_fmulx_lane_f32
+  ; CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+  %tmp1 = extractelement <2 x float> %v, i32 1
+  %tmp2 = call float @llvm.aarch64.neon.fmulx.f32(float %a, float %tmp1)
+  ret float %tmp2;
+}
+
+define float @test_fmulx_laneq_f32(float %a, <4 x float> %v) {
+  ; CHECK-LABEL: test_fmulx_laneq_f32
+  ; CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
+  %tmp1 = extractelement <4 x float> %v, i32 3
+  %tmp2 = call float @llvm.aarch64.neon.fmulx.f32(float %a, float %tmp1)
+  ret float %tmp2;
+}
+
+define float @test_fmulx_laneq_f32_swap(float %a, <4 x float> %v) {
+  ; CHECK-LABEL: test_fmulx_laneq_f32_swap
+  ; CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
+  %tmp1 = extractelement <4 x float> %v, i32 3
+  %tmp2 = call float @llvm.aarch64.neon.fmulx.f32(float %tmp1, float %a)
+  ret float %tmp2;
+}
+
+declare double @llvm.aarch64.neon.fmulx.f64(double, double)
+
+define double @test_fmulx_lane_f64(double %a, <1 x double> %v) {
+  ; CHECK-LABEL: test_fmulx_lane_f64
+  ; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+.d\[0]|d[0-9]+}}
+  %tmp1 = extractelement <1 x double> %v, i32 0
+  %tmp2 = call double @llvm.aarch64.neon.fmulx.f64(double %a, double %tmp1)
+  ret double %tmp2;
+}
+
+define double @test_fmulx_laneq_f64_0(double %a, <2 x double> %v) {
+  ; CHECK-LABEL: test_fmulx_laneq_f64_0
+  ; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
+  %tmp1 = extractelement <2 x double> %v, i32 0
+  %tmp2 = call double @llvm.aarch64.neon.fmulx.f64(double %a, double %tmp1)
+  ret double %tmp2;
+}
+
+
+define double @test_fmulx_laneq_f64_1(double %a, <2 x double> %v) {
+  ; CHECK-LABEL: test_fmulx_laneq_f64_1
+  ; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
+  %tmp1 = extractelement <2 x double> %v, i32 1
+  %tmp2 = call double @llvm.aarch64.neon.fmulx.f64(double %a, double %tmp1)
+  ret double %tmp2;
+}
+
+define double @test_fmulx_laneq_f64_1_swap(double %a, <2 x double> %v) {
+  ; CHECK-LABEL: test_fmulx_laneq_f64_1_swap
+  ; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
+  %tmp1 = extractelement <2 x double> %v, i32 1
+  %tmp2 = call double @llvm.aarch64.neon.fmulx.f64(double %tmp1, double %a)
+  ret double %tmp2;
+}
+
diff --git a/test/CodeGen/AArch64/arm64-neon-select_cc.ll b/test/CodeGen/AArch64/arm64-neon-select_cc.ll
new file mode 100644
index 0000000..255b90d
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-neon-select_cc.ll
@@ -0,0 +1,206 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
+
+define <8x i8> @test_select_cc_v8i8_i8(i8 %a, i8 %b, <8x i8> %c, <8x i8> %d ) {
+; CHECK-LABEL: test_select_cc_v8i8_i8:
+; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0
+; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1
+; CHECK: cmeq [[MASK:v[0-9]+]].8b, v[[LHS]].8b, v[[RHS]].8b
+; CHECK: dup [[DUPMASK:v[0-9]+]].8b, [[MASK]].b[0]
+; CHECK: bsl [[DUPMASK]].8b, v0.8b, v1.8b
+  %cmp31 = icmp eq i8 %a, %b
+  %e = select i1 %cmp31, <8x i8> %c, <8x i8> %d
+  ret <8x i8> %e
+}
+
+define <8x i8> @test_select_cc_v8i8_f32(float %a, float %b, <8x i8> %c, <8x i8> %d ) {
+; CHECK-LABEL: test_select_cc_v8i8_f32:
+; CHECK: fcmeq [[MASK:v[0-9]+]].2s, v0.2s, v1.2s
+; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].2s, [[MASK]].s[0]
+; CHECK-NEXT: bsl [[DUPMASK]].8b, v2.8b, v3.8b
+  %cmp31 = fcmp oeq float %a, %b
+  %e = select i1 %cmp31, <8x i8> %c, <8x i8> %d
+  ret <8x i8> %e
+}
+
+define <8x i8> @test_select_cc_v8i8_f64(double %a, double %b, <8x i8> %c, <8x i8> %d ) {
+; CHECK-LABEL: test_select_cc_v8i8_f64:
+; CHECK: fcmeq d[[MASK:[0-9]+]], d0, d1
+; CHECK-NEXT: bsl v[[MASK]].8b, v2.8b, v3.8b
+  %cmp31 = fcmp oeq double %a, %b
+  %e = select i1 %cmp31, <8x i8> %c, <8x i8> %d
+  ret <8x i8> %e
+}
+
+define <16x i8> @test_select_cc_v16i8_i8(i8 %a, i8 %b, <16x i8> %c, <16x i8> %d ) {
+; CHECK-LABEL: test_select_cc_v16i8_i8:
+; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0
+; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1
+; CHECK: cmeq [[MASK:v[0-9]+]].16b, v[[LHS]].16b, v[[RHS]].16b
+; CHECK: dup [[DUPMASK:v[0-9]+]].16b, [[MASK]].b[0]
+; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b
+  %cmp31 = icmp eq i8 %a, %b
+  %e = select i1 %cmp31, <16x i8> %c, <16x i8> %d
+  ret <16x i8> %e
+}
+
+define <16x i8> @test_select_cc_v16i8_f32(float %a, float %b, <16x i8> %c, <16x i8> %d ) {
+; CHECK-LABEL: test_select_cc_v16i8_f32:
+; CHECK: fcmeq [[MASK:v[0-9]+]].4s, v0.4s, v1.4s
+; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0]
+; CHECK-NEXT: bsl [[DUPMASK]].16b, v2.16b, v3.16b
+  %cmp31 = fcmp oeq float %a, %b
+  %e = select i1 %cmp31, <16x i8> %c, <16x i8> %d
+  ret <16x i8> %e
+}
+
+define <16x i8> @test_select_cc_v16i8_f64(double %a, double %b, <16x i8> %c, <16x i8> %d ) {
+; CHECK-LABEL: test_select_cc_v16i8_f64:
+; CHECK: fcmeq [[MASK:v[0-9]+]].2d, v0.2d, v1.2d
+; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].2d, [[MASK]].d[0]
+; CHECK-NEXT: bsl [[DUPMASK]].16b, v2.16b, v3.16b
+  %cmp31 = fcmp oeq double %a, %b
+  %e = select i1 %cmp31, <16x i8> %c, <16x i8> %d
+  ret <16x i8> %e
+}
+
+define <4x i16> @test_select_cc_v4i16(i16 %a, i16 %b, <4x i16> %c, <4x i16> %d ) {
+; CHECK-LABEL: test_select_cc_v4i16:
+; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0
+; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1
+; CHECK: cmeq [[MASK:v[0-9]+]].4h, v[[LHS]].4h, v[[RHS]].4h
+; CHECK: dup [[DUPMASK:v[0-9]+]].4h, [[MASK]].h[0]
+; CHECK: bsl [[DUPMASK]].8b, v0.8b, v1.8b
+  %cmp31 = icmp eq i16 %a, %b
+  %e = select i1 %cmp31, <4x i16> %c, <4x i16> %d
+  ret <4x i16> %e
+}
+
+define <8x i16> @test_select_cc_v8i16(i16 %a, i16 %b, <8x i16> %c, <8x i16> %d ) {
+; CHECK-LABEL: test_select_cc_v8i16:
+; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0
+; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1
+; CHECK: cmeq [[MASK:v[0-9]+]].8h, v[[LHS]].8h, v[[RHS]].8h
+; CHECK: dup [[DUPMASK:v[0-9]+]].8h, [[MASK]].h[0]
+; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b
+  %cmp31 = icmp eq i16 %a, %b
+  %e = select i1 %cmp31, <8x i16> %c, <8x i16> %d
+  ret <8x i16> %e
+}
+
+define <2x i32> @test_select_cc_v2i32(i32 %a, i32 %b, <2x i32> %c, <2x i32> %d ) {
+; CHECK-LABEL: test_select_cc_v2i32:
+; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0
+; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1
+; CHECK: cmeq [[MASK:v[0-9]+]].2s, v[[LHS]].2s, v[[RHS]].2s
+; CHECK: dup [[DUPMASK:v[0-9]+]].2s, [[MASK]].s[0]
+; CHECK: bsl [[DUPMASK]].8b, v0.8b, v1.8b
+  %cmp31 = icmp eq i32 %a, %b
+  %e = select i1 %cmp31, <2x i32> %c, <2x i32> %d
+  ret <2x i32> %e
+}
+
+define <4x i32> @test_select_cc_v4i32(i32 %a, i32 %b, <4x i32> %c, <4x i32> %d ) {
+; CHECK-LABEL: test_select_cc_v4i32:
+; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0
+; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1
+; CHECK: cmeq [[MASK:v[0-9]+]].4s, v[[LHS]].4s, v[[RHS]].4s
+; CHECK: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0]
+; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b
+  %cmp31 = icmp eq i32 %a, %b
+  %e = select i1 %cmp31, <4x i32> %c, <4x i32> %d
+  ret <4x i32> %e
+}
+
+define <1x i64> @test_select_cc_v1i64(i64 %a, i64 %b, <1x i64> %c, <1x i64> %d ) {
+; CHECK-LABEL: test_select_cc_v1i64:
+; CHECK-DAG: fmov d[[LHS:[0-9]+]], x0
+; CHECK-DAG: fmov d[[RHS:[0-9]+]], x1
+; CHECK: cmeq d[[MASK:[0-9]+]], d[[LHS]], d[[RHS]]
+; CHECK: bsl v[[MASK]].8b, v0.8b, v1.8b
+  %cmp31 = icmp eq i64 %a, %b
+  %e = select i1 %cmp31, <1x i64> %c, <1x i64> %d
+  ret <1x i64> %e
+}
+
+define <2x i64> @test_select_cc_v2i64(i64 %a, i64 %b, <2x i64> %c, <2x i64> %d ) {
+; CHECK-LABEL: test_select_cc_v2i64:
+; CHECK-DAG: fmov d[[LHS:[0-9]+]], x0
+; CHECK-DAG: fmov d[[RHS:[0-9]+]], x1
+; CHECK: cmeq [[MASK:v[0-9]+]].2d, v[[LHS]].2d, v[[RHS]].2d
+; CHECK: dup [[DUPMASK:v[0-9]+]].2d, [[MASK]].d[0]
+; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b
+  %cmp31 = icmp eq i64 %a, %b
+  %e = select i1 %cmp31, <2x i64> %c, <2x i64> %d
+  ret <2x i64> %e
+}
+
+define <1 x float> @test_select_cc_v1f32(float %a, float %b, <1 x float> %c, <1 x float> %d ) {
+; CHECK-LABEL: test_select_cc_v1f32:
+; CHECK: fcmp s0, s1
+; CHECK-NEXT: fcsel s0, s2, s3, eq
+  %cmp31 = fcmp oeq float %a, %b
+  %e = select i1 %cmp31, <1 x float> %c, <1 x float> %d
+  ret <1 x float> %e
+}
+
+define <2 x float> @test_select_cc_v2f32(float %a, float %b, <2 x float> %c, <2 x float> %d ) {
+; CHECK-LABEL: test_select_cc_v2f32:
+; CHECK: fcmeq [[MASK:v[0-9]+]].2s, v0.2s, v1.2s
+; CHECK: dup [[DUPMASK:v[0-9]+]].2s, [[MASK]].s[0]
+; CHECK: bsl [[DUPMASK]].8b, v2.8b, v3.8b
+  %cmp31 = fcmp oeq float %a, %b
+  %e = select i1 %cmp31, <2 x float> %c, <2 x float> %d
+  ret <2 x float> %e
+}
+
+define <4x float> @test_select_cc_v4f32(float %a, float %b, <4x float> %c, <4x float> %d ) {
+; CHECK-LABEL: test_select_cc_v4f32:
+; CHECK: fcmeq [[MASK:v[0-9]+]].4s, v0.4s, v1.4s
+; CHECK: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0]
+; CHECK: bsl [[DUPMASK]].16b, v2.16b, v3.16b
+  %cmp31 = fcmp oeq float %a, %b
+  %e = select i1 %cmp31, <4x float> %c, <4x float> %d
+  ret <4x float> %e
+}
+
+define <4x float> @test_select_cc_v4f32_icmp(i32 %a, i32 %b, <4x float> %c, <4x float> %d ) {
+; CHECK-LABEL: test_select_cc_v4f32_icmp:
+; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0
+; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1
+; CHECK: cmeq [[MASK:v[0-9]+]].4s, v[[LHS]].4s, v[[RHS]].4s
+; CHECK: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0]
+; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b
+  %cmp31 = icmp eq i32 %a, %b
+  %e = select i1 %cmp31, <4x float> %c, <4x float> %d
+  ret <4x float> %e
+}
+
+define <1 x double> @test_select_cc_v1f64(double %a, double %b, <1 x double> %c, <1 x double> %d ) {
+; CHECK-LABEL: test_select_cc_v1f64:
+; CHECK: fcmeq d[[MASK:[0-9]+]], d0, d1
+; CHECK: bsl v[[MASK]].8b, v2.8b, v3.8b
+  %cmp31 = fcmp oeq double %a, %b
+  %e = select i1 %cmp31, <1 x double> %c, <1 x double> %d
+  ret <1 x double> %e
+}
+
+define <1 x double> @test_select_cc_v1f64_icmp(i64 %a, i64 %b, <1 x double> %c, <1 x double> %d ) {
+; CHECK-LABEL: test_select_cc_v1f64_icmp:
+; CHECK-DAG: fmov [[LHS:d[0-9]+]], x0
+; CHECK-DAG: fmov [[RHS:d[0-9]+]], x1
+; CHECK: cmeq d[[MASK:[0-9]+]], [[LHS]], [[RHS]]
+; CHECK: bsl v[[MASK]].8b, v0.8b, v1.8b
+  %cmp31 = icmp eq i64 %a, %b
+  %e = select i1 %cmp31, <1 x double> %c, <1 x double> %d
+  ret <1 x double> %e
+}
+
+define <2 x double> @test_select_cc_v2f64(double %a, double %b, <2 x double> %c, <2 x double> %d ) {
+; CHECK-LABEL: test_select_cc_v2f64:
+; CHECK: fcmeq [[MASK:v[0-9]+]].2d, v0.2d, v1.2d
+; CHECK: dup [[DUPMASK:v[0-9]+]].2d, [[MASK]].d[0]
+; CHECK: bsl [[DUPMASK]].16b, v2.16b, v3.16b
+  %cmp31 = fcmp oeq double %a, %b
+  %e = select i1 %cmp31, <2 x double> %c, <2 x double> %d
+  ret <2 x double> %e
+}
diff --git a/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll b/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll
new file mode 100644
index 0000000..cca6bfe
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll
@@ -0,0 +1,482 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+
+%struct.uint8x16x2_t = type { [2 x <16 x i8>] }
+%struct.poly8x16x2_t = type { [2 x <16 x i8>] }
+%struct.uint8x16x3_t = type { [3 x <16 x i8>] }
+%struct.int8x16x2_t = type { [2 x <16 x i8>] }
+%struct.int16x8x2_t = type { [2 x <8 x i16>] }
+%struct.int32x4x2_t = type { [2 x <4 x i32>] }
+%struct.int64x2x2_t = type { [2 x <2 x i64>] }
+%struct.float32x4x2_t = type { [2 x <4 x float>] }
+%struct.float64x2x2_t = type { [2 x <2 x double>] }
+%struct.int8x8x2_t = type { [2 x <8 x i8>] }
+%struct.int16x4x2_t = type { [2 x <4 x i16>] }
+%struct.int32x2x2_t = type { [2 x <2 x i32>] }
+%struct.int64x1x2_t = type { [2 x <1 x i64>] }
+%struct.float32x2x2_t = type { [2 x <2 x float>] }
+%struct.float64x1x2_t = type { [2 x <1 x double>] }
+%struct.int8x16x3_t = type { [3 x <16 x i8>] }
+%struct.int16x8x3_t = type { [3 x <8 x i16>] }
+%struct.int32x4x3_t = type { [3 x <4 x i32>] }
+%struct.int64x2x3_t = type { [3 x <2 x i64>] }
+%struct.float32x4x3_t = type { [3 x <4 x float>] }
+%struct.float64x2x3_t = type { [3 x <2 x double>] }
+%struct.int8x8x3_t = type { [3 x <8 x i8>] }
+%struct.int16x4x3_t = type { [3 x <4 x i16>] }
+%struct.int32x2x3_t = type { [3 x <2 x i32>] }
+%struct.int64x1x3_t = type { [3 x <1 x i64>] }
+%struct.float32x2x3_t = type { [3 x <2 x float>] }
+%struct.float64x1x3_t = type { [3 x <1 x double>] }
+%struct.int8x16x4_t = type { [4 x <16 x i8>] }
+%struct.int16x8x4_t = type { [4 x <8 x i16>] }
+%struct.int32x4x4_t = type { [4 x <4 x i32>] }
+%struct.int64x2x4_t = type { [4 x <2 x i64>] }
+%struct.float32x4x4_t = type { [4 x <4 x float>] }
+%struct.float64x2x4_t = type { [4 x <2 x double>] }
+%struct.int8x8x4_t = type { [4 x <8 x i8>] }
+%struct.int16x4x4_t = type { [4 x <4 x i16>] }
+%struct.int32x2x4_t = type { [4 x <2 x i32>] }
+%struct.int64x1x4_t = type { [4 x <1 x i64>] }
+%struct.float32x2x4_t = type { [4 x <2 x float>] }
+%struct.float64x1x4_t = type { [4 x <1 x double>] }
+
+define <16 x i8> @test_ld_from_poll_v16i8(<16 x i8> %a) {
+; CHECK-LABEL: test_ld_from_poll_v16i8:
+; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
+; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}]
+entry:
+  %b = add <16 x i8> %a, <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 2, i8 13, i8 14, i8 15, i8 16>
+  ret <16 x i8> %b
+}
+
+define <8 x i16> @test_ld_from_poll_v8i16(<8 x i16> %a) {
+; CHECK-LABEL: test_ld_from_poll_v8i16:
+; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
+; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}]
+entry:
+  %b = add <8 x i16> %a, <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>
+  ret <8 x i16> %b
+}
+
+define <4 x i32> @test_ld_from_poll_v4i32(<4 x i32> %a) {
+; CHECK-LABEL: test_ld_from_poll_v4i32:
+; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
+; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}]
+entry:
+  %b = add <4 x i32> %a, <i32 1, i32 2, i32 3, i32 4>
+  ret <4 x i32> %b
+}
+
+define <2 x i64> @test_ld_from_poll_v2i64(<2 x i64> %a) {
+; CHECK-LABEL: test_ld_from_poll_v2i64:
+; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
+; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}]
+entry:
+  %b = add <2 x i64> %a, <i64 1, i64 2>
+  ret <2 x i64> %b
+}
+
+define <4 x float> @test_ld_from_poll_v4f32(<4 x float> %a) {
+; CHECK-LABEL: test_ld_from_poll_v4f32:
+; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
+; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}]
+entry:
+  %b = fadd <4 x float> %a, <float 1.0, float 2.0, float 3.0, float 4.0>
+  ret <4 x float> %b
+}
+
+define <2 x double> @test_ld_from_poll_v2f64(<2 x double> %a) {
+; CHECK-LABEL: test_ld_from_poll_v2f64:
+; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
+; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}]
+entry:
+  %b = fadd <2 x double> %a, <double 1.0, double 2.0>
+  ret <2 x double> %b
+}
+
+define <8 x i8> @test_ld_from_poll_v8i8(<8 x i8> %a) {
+; CHECK-LABEL: test_ld_from_poll_v8i8:
+; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
+; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}]
+entry:
+  %b = add <8 x i8> %a, <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>
+  ret <8 x i8> %b
+}
+
+define <4 x i16> @test_ld_from_poll_v4i16(<4 x i16> %a) {
+; CHECK-LABEL: test_ld_from_poll_v4i16:
+; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
+; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}]
+entry:
+  %b = add <4 x i16> %a, <i16 1, i16 2, i16 3, i16 4>
+  ret <4 x i16> %b
+}
+
+define <2 x i32> @test_ld_from_poll_v2i32(<2 x i32> %a) {
+; CHECK-LABEL: test_ld_from_poll_v2i32:
+; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
+; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}]
+entry:
+  %b = add <2 x i32> %a, <i32 1, i32 2>
+  ret <2 x i32> %b
+}
+
+define <16 x i8> @test_vld1q_dup_s8(i8* %a) {
+; CHECK-LABEL: test_vld1q_dup_s8:
+; CHECK: ld1r {{{ ?v[0-9]+.16b ?}}}, [x0]
+entry:
+  %0 = load i8* %a, align 1
+  %1 = insertelement <16 x i8> undef, i8 %0, i32 0
+  %lane = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> zeroinitializer
+  ret <16 x i8> %lane
+}
+
+define <8 x i16> @test_vld1q_dup_s16(i16* %a) {
+; CHECK-LABEL: test_vld1q_dup_s16:
+; CHECK: ld1r {{{ ?v[0-9]+.8h ?}}}, [x0]
+entry:
+  %0 = load i16* %a, align 2
+  %1 = insertelement <8 x i16> undef, i16 %0, i32 0
+  %lane = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> zeroinitializer
+  ret <8 x i16> %lane
+}
+
+define <4 x i32> @test_vld1q_dup_s32(i32* %a) {
+; CHECK-LABEL: test_vld1q_dup_s32:
+; CHECK: ld1r {{{ ?v[0-9]+.4s ?}}}, [x0]
+entry:
+  %0 = load i32* %a, align 4
+  %1 = insertelement <4 x i32> undef, i32 %0, i32 0
+  %lane = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> zeroinitializer
+  ret <4 x i32> %lane
+}
+
+define <2 x i64> @test_vld1q_dup_s64(i64* %a) {
+; CHECK-LABEL: test_vld1q_dup_s64:
+; CHECK: ld1r {{{ ?v[0-9]+.2d ?}}}, [x0]
+entry:
+  %0 = load i64* %a, align 8
+  %1 = insertelement <2 x i64> undef, i64 %0, i32 0
+  %lane = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> zeroinitializer
+  ret <2 x i64> %lane
+}
+
+define <4 x float> @test_vld1q_dup_f32(float* %a) {
+; CHECK-LABEL: test_vld1q_dup_f32:
+; CHECK: ld1r {{{ ?v[0-9]+.4s ?}}}, [x0]
+entry:
+  %0 = load float* %a, align 4
+  %1 = insertelement <4 x float> undef, float %0, i32 0
+  %lane = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer
+  ret <4 x float> %lane
+}
+
+define <2 x double> @test_vld1q_dup_f64(double* %a) {
+; CHECK-LABEL: test_vld1q_dup_f64:
+; CHECK: ld1r {{{ ?v[0-9]+.2d ?}}}, [x0]
+entry:
+  %0 = load double* %a, align 8
+  %1 = insertelement <2 x double> undef, double %0, i32 0
+  %lane = shufflevector <2 x double> %1, <2 x double> undef, <2 x i32> zeroinitializer
+  ret <2 x double> %lane
+}
+
+define <8 x i8> @test_vld1_dup_s8(i8* %a) {
+; CHECK-LABEL: test_vld1_dup_s8:
+; CHECK: ld1r {{{ ?v[0-9]+.8b ?}}}, [x0]
+entry:
+  %0 = load i8* %a, align 1
+  %1 = insertelement <8 x i8> undef, i8 %0, i32 0
+  %lane = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
+  ret <8 x i8> %lane
+}
+
+define <4 x i16> @test_vld1_dup_s16(i16* %a) {
+; CHECK-LABEL: test_vld1_dup_s16:
+; CHECK: ld1r {{{ ?v[0-9]+.4h ?}}}, [x0]
+entry:
+  %0 = load i16* %a, align 2
+  %1 = insertelement <4 x i16> undef, i16 %0, i32 0
+  %lane = shufflevector <4 x i16> %1, <4 x i16> undef, <4 x i32> zeroinitializer
+  ret <4 x i16> %lane
+}
+
+define <2 x i32> @test_vld1_dup_s32(i32* %a) {
+; CHECK-LABEL: test_vld1_dup_s32:
+; CHECK: ld1r {{{ ?v[0-9]+.2s ?}}}, [x0]
+entry:
+  %0 = load i32* %a, align 4
+  %1 = insertelement <2 x i32> undef, i32 %0, i32 0
+  %lane = shufflevector <2 x i32> %1, <2 x i32> undef, <2 x i32> zeroinitializer
+  ret <2 x i32> %lane
+}
+
+define <1 x i64> @test_vld1_dup_s64(i64* %a) {
+; CHECK-LABEL: test_vld1_dup_s64:
+; CHECK: ldr {{d[0-9]+}}, [x0]
+entry:
+  %0 = load i64* %a, align 8
+  %1 = insertelement <1 x i64> undef, i64 %0, i32 0
+  ret <1 x i64> %1
+}
+
+define <2 x float> @test_vld1_dup_f32(float* %a) {
+; CHECK-LABEL: test_vld1_dup_f32:
+; CHECK: ld1r {{{ ?v[0-9]+.2s ?}}}, [x0]
+entry:
+  %0 = load float* %a, align 4
+  %1 = insertelement <2 x float> undef, float %0, i32 0
+  %lane = shufflevector <2 x float> %1, <2 x float> undef, <2 x i32> zeroinitializer
+  ret <2 x float> %lane
+}
+
+define <1 x double> @test_vld1_dup_f64(double* %a) {
+; CHECK-LABEL: test_vld1_dup_f64:
+; CHECK: ldr {{d[0-9]+}}, [x0]
+entry:
+  %0 = load double* %a, align 8
+  %1 = insertelement <1 x double> undef, double %0, i32 0
+  ret <1 x double> %1
+}
+
+define <1 x i64> @testDUP.v1i64(i64* %a, i64* %b) #0 {
+; As there is a store operation depending on %1, LD1R pattern can't be selected.
+; So LDR and FMOV should be emitted.
+; CHECK-LABEL: testDUP.v1i64:
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}]
+; CHECK-DAG: fmov {{d[0-9]+}}, {{x[0-9]+}}
+; CHECK-DAG: str {{x[0-9]+}}, [{{x[0-9]+}}]
+  %1 = load i64* %a, align 8
+  store i64 %1, i64* %b, align 8
+  %vecinit.i = insertelement <1 x i64> undef, i64 %1, i32 0
+  ret <1 x i64> %vecinit.i
+}
+
+define <1 x double> @testDUP.v1f64(double* %a, double* %b) #0 {
+; As there is a store operation depending on %1, LD1R pattern can't be selected.
+; So LDR and FMOV should be emitted.
+; CHECK-LABEL: testDUP.v1f64:
+; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}]
+; CHECK: str {{d[0-9]+}}, [{{x[0-9]+}}]
+  %1 = load double* %a, align 8
+  store double %1, double* %b, align 8
+  %vecinit.i = insertelement <1 x double> undef, double %1, i32 0
+  ret <1 x double> %vecinit.i
+}
+
+define <16 x i8> @test_vld1q_lane_s8(i8* %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vld1q_lane_s8:
+; CHECK: ld1 { {{v[0-9]+}}.b }[{{[0-9]+}}], [x0]
+entry:
+  %0 = load i8* %a, align 1
+  %vld1_lane = insertelement <16 x i8> %b, i8 %0, i32 15
+  ret <16 x i8> %vld1_lane
+}
+
+define <8 x i16> @test_vld1q_lane_s16(i16* %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vld1q_lane_s16:
+; CHECK: ld1 { {{v[0-9]+}}.h }[{{[0-9]+}}], [x0]
+entry:
+  %0 = load i16* %a, align 2
+  %vld1_lane = insertelement <8 x i16> %b, i16 %0, i32 7
+  ret <8 x i16> %vld1_lane
+}
+
+define <4 x i32> @test_vld1q_lane_s32(i32* %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vld1q_lane_s32:
+; CHECK: ld1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
+entry:
+  %0 = load i32* %a, align 4
+  %vld1_lane = insertelement <4 x i32> %b, i32 %0, i32 3
+  ret <4 x i32> %vld1_lane
+}
+
+define <2 x i64> @test_vld1q_lane_s64(i64* %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vld1q_lane_s64:
+; CHECK: ld1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
+entry:
+  %0 = load i64* %a, align 8
+  %vld1_lane = insertelement <2 x i64> %b, i64 %0, i32 1
+  ret <2 x i64> %vld1_lane
+}
+
+define <4 x float> @test_vld1q_lane_f32(float* %a, <4 x float> %b) {
+; CHECK-LABEL: test_vld1q_lane_f32:
+; CHECK: ld1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
+entry:
+  %0 = load float* %a, align 4
+  %vld1_lane = insertelement <4 x float> %b, float %0, i32 3
+  ret <4 x float> %vld1_lane
+}
+
+define <2 x double> @test_vld1q_lane_f64(double* %a, <2 x double> %b) {
+; CHECK-LABEL: test_vld1q_lane_f64:
+; CHECK: ld1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
+entry:
+  %0 = load double* %a, align 8
+  %vld1_lane = insertelement <2 x double> %b, double %0, i32 1
+  ret <2 x double> %vld1_lane
+}
+
+define <8 x i8> @test_vld1_lane_s8(i8* %a, <8 x i8> %b) {
+; CHECK-LABEL: test_vld1_lane_s8:
+; CHECK: ld1 { {{v[0-9]+}}.b }[{{[0-9]+}}], [x0]
+entry:
+  %0 = load i8* %a, align 1
+  %vld1_lane = insertelement <8 x i8> %b, i8 %0, i32 7
+  ret <8 x i8> %vld1_lane
+}
+
+define <4 x i16> @test_vld1_lane_s16(i16* %a, <4 x i16> %b) {
+; CHECK-LABEL: test_vld1_lane_s16:
+; CHECK: ld1 { {{v[0-9]+}}.h }[{{[0-9]+}}], [x0]
+entry:
+  %0 = load i16* %a, align 2
+  %vld1_lane = insertelement <4 x i16> %b, i16 %0, i32 3
+  ret <4 x i16> %vld1_lane
+}
+
+define <2 x i32> @test_vld1_lane_s32(i32* %a, <2 x i32> %b) {
+; CHECK-LABEL: test_vld1_lane_s32:
+; CHECK: ld1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
+entry:
+  %0 = load i32* %a, align 4
+  %vld1_lane = insertelement <2 x i32> %b, i32 %0, i32 1
+  ret <2 x i32> %vld1_lane
+}
+
+define <1 x i64> @test_vld1_lane_s64(i64* %a, <1 x i64> %b) {
+; CHECK-LABEL: test_vld1_lane_s64:
+; CHECK: ldr {{d[0-9]+}}, [x0]
+entry:
+  %0 = load i64* %a, align 8
+  %vld1_lane = insertelement <1 x i64> undef, i64 %0, i32 0
+  ret <1 x i64> %vld1_lane
+}
+
+define <2 x float> @test_vld1_lane_f32(float* %a, <2 x float> %b) {
+; CHECK-LABEL: test_vld1_lane_f32:
+; CHECK: ld1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
+entry:
+  %0 = load float* %a, align 4
+  %vld1_lane = insertelement <2 x float> %b, float %0, i32 1
+  ret <2 x float> %vld1_lane
+}
+
+define <1 x double> @test_vld1_lane_f64(double* %a, <1 x double> %b) {
+; CHECK-LABEL: test_vld1_lane_f64:
+; CHECK: ldr {{d[0-9]+}}, [x0]
+entry:
+  %0 = load double* %a, align 8
+  %vld1_lane = insertelement <1 x double> undef, double %0, i32 0
+  ret <1 x double> %vld1_lane
+}
+
+define void @test_vst1q_lane_s8(i8* %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vst1q_lane_s8:
+; CHECK: st1 { {{v[0-9]+}}.b }[{{[0-9]+}}], [x0]
+entry:
+  %0 = extractelement <16 x i8> %b, i32 15
+  store i8 %0, i8* %a, align 1
+  ret void
+}
+
+define void @test_vst1q_lane_s16(i16* %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vst1q_lane_s16:
+; CHECK: st1 { {{v[0-9]+}}.h }[{{[0-9]+}}], [x0]
+entry:
+  %0 = extractelement <8 x i16> %b, i32 7
+  store i16 %0, i16* %a, align 2
+  ret void
+}
+
+define void @test_vst1q_lane_s32(i32* %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vst1q_lane_s32:
+; CHECK: st1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
+entry:
+  %0 = extractelement <4 x i32> %b, i32 3
+  store i32 %0, i32* %a, align 4
+  ret void
+}
+
+define void @test_vst1q_lane_s64(i64* %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vst1q_lane_s64:
+; CHECK: st1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
+entry:
+  %0 = extractelement <2 x i64> %b, i32 1
+  store i64 %0, i64* %a, align 8
+  ret void
+}
+
+define void @test_vst1q_lane_f32(float* %a, <4 x float> %b) {
+; CHECK-LABEL: test_vst1q_lane_f32:
+; CHECK: st1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
+entry:
+  %0 = extractelement <4 x float> %b, i32 3
+  store float %0, float* %a, align 4
+  ret void
+}
+
+define void @test_vst1q_lane_f64(double* %a, <2 x double> %b) {
+; CHECK-LABEL: test_vst1q_lane_f64:
+; CHECK: st1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
+entry:
+  %0 = extractelement <2 x double> %b, i32 1
+  store double %0, double* %a, align 8
+  ret void
+}
+
+define void @test_vst1_lane_s8(i8* %a, <8 x i8> %b) {
+; CHECK-LABEL: test_vst1_lane_s8:
+; CHECK: st1 { {{v[0-9]+}}.b }[{{[0-9]+}}], [x0]
+entry:
+  %0 = extractelement <8 x i8> %b, i32 7
+  store i8 %0, i8* %a, align 1
+  ret void
+}
+
+define void @test_vst1_lane_s16(i16* %a, <4 x i16> %b) {
+; CHECK-LABEL: test_vst1_lane_s16:
+; CHECK: st1 { {{v[0-9]+}}.h }[{{[0-9]+}}], [x0]
+entry:
+  %0 = extractelement <4 x i16> %b, i32 3
+  store i16 %0, i16* %a, align 2
+  ret void
+}
+
+define void @test_vst1_lane_s32(i32* %a, <2 x i32> %b) {
+; CHECK-LABEL: test_vst1_lane_s32:
+; CHECK: st1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
+entry:
+  %0 = extractelement <2 x i32> %b, i32 1
+  store i32 %0, i32* %a, align 4
+  ret void
+}
+
+define void @test_vst1_lane_s64(i64* %a, <1 x i64> %b) {
+; CHECK-LABEL: test_vst1_lane_s64:
+; CHECK: st1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
+entry:
+  %0 = extractelement <1 x i64> %b, i32 0
+  store i64 %0, i64* %a, align 8
+  ret void
+}
+
+define void @test_vst1_lane_f32(float* %a, <2 x float> %b) {
+; CHECK-LABEL: test_vst1_lane_f32:
+; CHECK: st1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
+entry:
+  %0 = extractelement <2 x float> %b, i32 1
+  store float %0, float* %a, align 4
+  ret void
+}
+
+define void @test_vst1_lane_f64(double* %a, <1 x double> %b) {
+; CHECK-LABEL: test_vst1_lane_f64:
+; CHECK: str {{d[0-9]+}}, [x0]
+entry:
+  %0 = extractelement <1 x double> %b, i32 0
+  store double %0, double* %a, align 8
+  ret void
+}
diff --git a/test/CodeGen/AArch64/arm64-neon-simd-shift.ll b/test/CodeGen/AArch64/arm64-neon-simd-shift.ll
new file mode 100644
index 0000000..447fb63
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-neon-simd-shift.ll
@@ -0,0 +1,663 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+define <8 x i8> @test_vshr_n_s8(<8 x i8> %a) {
+; CHECK: test_vshr_n_s8
+; CHECK: sshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %vshr_n = ashr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <8 x i8> %vshr_n
+}
+
+define <4 x i16> @test_vshr_n_s16(<4 x i16> %a) {
+; CHECK: test_vshr_n_s16
+; CHECK: sshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+  %vshr_n = ashr <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3>
+  ret <4 x i16> %vshr_n
+}
+
+define <2 x i32> @test_vshr_n_s32(<2 x i32> %a) {
+; CHECK: test_vshr_n_s32
+; CHECK: sshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+  %vshr_n = ashr <2 x i32> %a, <i32 3, i32 3>
+  ret <2 x i32> %vshr_n
+}
+
+define <16 x i8> @test_vshrq_n_s8(<16 x i8> %a) {
+; CHECK: test_vshrq_n_s8
+; CHECK: sshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %vshr_n = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <16 x i8> %vshr_n
+}
+
+define <8 x i16> @test_vshrq_n_s16(<8 x i16> %a) {
+; CHECK: test_vshrq_n_s16
+; CHECK: sshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+  %vshr_n = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  ret <8 x i16> %vshr_n
+}
+
+define <4 x i32> @test_vshrq_n_s32(<4 x i32> %a) {
+; CHECK: test_vshrq_n_s32
+; CHECK: sshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+  %vshr_n = ashr <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
+  ret <4 x i32> %vshr_n
+}
+
+define <2 x i64> @test_vshrq_n_s64(<2 x i64> %a) {
+; CHECK: test_vshrq_n_s64
+; CHECK: sshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+  %vshr_n = ashr <2 x i64> %a, <i64 3, i64 3>
+  ret <2 x i64> %vshr_n
+}
+
+define <8 x i8> @test_vshr_n_u8(<8 x i8> %a) {
+; CHECK: test_vshr_n_u8
+; CHECK: ushr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %vshr_n = lshr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <8 x i8> %vshr_n
+}
+
+define <4 x i16> @test_vshr_n_u16(<4 x i16> %a) {
+; CHECK: test_vshr_n_u16
+; CHECK: ushr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+  %vshr_n = lshr <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3>
+  ret <4 x i16> %vshr_n
+}
+
+define <2 x i32> @test_vshr_n_u32(<2 x i32> %a) {
+; CHECK: test_vshr_n_u32
+; CHECK: ushr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+  %vshr_n = lshr <2 x i32> %a, <i32 3, i32 3>
+  ret <2 x i32> %vshr_n
+}
+
+define <16 x i8> @test_vshrq_n_u8(<16 x i8> %a) {
+; CHECK: test_vshrq_n_u8
+; CHECK: ushr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %vshr_n = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <16 x i8> %vshr_n
+}
+
+define <8 x i16> @test_vshrq_n_u16(<8 x i16> %a) {
+; CHECK: test_vshrq_n_u16
+; CHECK: ushr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+  %vshr_n = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  ret <8 x i16> %vshr_n
+}
+
+define <4 x i32> @test_vshrq_n_u32(<4 x i32> %a) {
+; CHECK: test_vshrq_n_u32
+; CHECK: ushr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+  %vshr_n = lshr <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
+  ret <4 x i32> %vshr_n
+}
+
+define <2 x i64> @test_vshrq_n_u64(<2 x i64> %a) {
+; CHECK: test_vshrq_n_u64
+; CHECK: ushr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+  %vshr_n = lshr <2 x i64> %a, <i64 3, i64 3>
+  ret <2 x i64> %vshr_n
+}
+
+define <8 x i8> @test_vsra_n_s8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vsra_n_s8
+; CHECK: ssra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %vsra_n = ashr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  %1 = add <8 x i8> %vsra_n, %a
+  ret <8 x i8> %1
+}
+
+define <4 x i16> @test_vsra_n_s16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vsra_n_s16
+; CHECK: ssra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+  %vsra_n = ashr <4 x i16> %b, <i16 3, i16 3, i16 3, i16 3>
+  %1 = add <4 x i16> %vsra_n, %a
+  ret <4 x i16> %1
+}
+
+define <2 x i32> @test_vsra_n_s32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vsra_n_s32
+; CHECK: ssra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+  %vsra_n = ashr <2 x i32> %b, <i32 3, i32 3>
+  %1 = add <2 x i32> %vsra_n, %a
+  ret <2 x i32> %1
+}
+
+define <16 x i8> @test_vsraq_n_s8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vsraq_n_s8
+; CHECK: ssra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %vsra_n = ashr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  %1 = add <16 x i8> %vsra_n, %a
+  ret <16 x i8> %1
+}
+
+define <8 x i16> @test_vsraq_n_s16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vsraq_n_s16
+; CHECK: ssra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+  %vsra_n = ashr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  %1 = add <8 x i16> %vsra_n, %a
+  ret <8 x i16> %1
+}
+
+define <4 x i32> @test_vsraq_n_s32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vsraq_n_s32
+; CHECK: ssra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+  %vsra_n = ashr <4 x i32> %b, <i32 3, i32 3, i32 3, i32 3>
+  %1 = add <4 x i32> %vsra_n, %a
+  ret <4 x i32> %1
+}
+
+define <2 x i64> @test_vsraq_n_s64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vsraq_n_s64
+; CHECK: ssra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+  %vsra_n = ashr <2 x i64> %b, <i64 3, i64 3>
+  %1 = add <2 x i64> %vsra_n, %a
+  ret <2 x i64> %1
+}
+
+define <8 x i8> @test_vsra_n_u8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: test_vsra_n_u8
+; CHECK: usra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
+  %vsra_n = lshr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  %1 = add <8 x i8> %vsra_n, %a
+  ret <8 x i8> %1
+}
+
+define <4 x i16> @test_vsra_n_u16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: test_vsra_n_u16
+; CHECK: usra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
+  %vsra_n = lshr <4 x i16> %b, <i16 3, i16 3, i16 3, i16 3>
+  %1 = add <4 x i16> %vsra_n, %a
+  ret <4 x i16> %1
+}
+
+define <2 x i32> @test_vsra_n_u32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: test_vsra_n_u32
+; CHECK: usra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
+  %vsra_n = lshr <2 x i32> %b, <i32 3, i32 3>
+  %1 = add <2 x i32> %vsra_n, %a
+  ret <2 x i32> %1
+}
+
+define <16 x i8> @test_vsraq_n_u8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: test_vsraq_n_u8
+; CHECK: usra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
+  %vsra_n = lshr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  %1 = add <16 x i8> %vsra_n, %a
+  ret <16 x i8> %1
+}
+
+define <8 x i16> @test_vsraq_n_u16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: test_vsraq_n_u16
+; CHECK: usra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
+  %vsra_n = lshr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  %1 = add <8 x i16> %vsra_n, %a
+  ret <8 x i16> %1
+}
+
+define <4 x i32> @test_vsraq_n_u32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: test_vsraq_n_u32
+; CHECK: usra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
+  %vsra_n = lshr <4 x i32> %b, <i32 3, i32 3, i32 3, i32 3>
+  %1 = add <4 x i32> %vsra_n, %a
+  ret <4 x i32> %1
+}
+
+define <2 x i64> @test_vsraq_n_u64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: test_vsraq_n_u64
+; CHECK: usra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
+  %vsra_n = lshr <2 x i64> %b, <i64 3, i64 3>
+  %1 = add <2 x i64> %vsra_n, %a
+  ret <2 x i64> %1
+}
+
+define <8 x i8> @test_vshrn_n_s16(<8 x i16> %a) {
+; CHECK: test_vshrn_n_s16
+; CHECK: shrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
+  %1 = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
+  ret <8 x i8> %vshrn_n
+}
+
+define <4 x i16> @test_vshrn_n_s32(<4 x i32> %a) {
+; CHECK: test_vshrn_n_s32
+; CHECK: shrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
+  %1 = ashr <4 x i32> %a, <i32 9, i32 9, i32 9, i32 9>
+  %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
+  ret <4 x i16> %vshrn_n
+}
+
+define <2 x i32> @test_vshrn_n_s64(<2 x i64> %a) {
+; CHECK: test_vshrn_n_s64
+; CHECK: shrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
+  %1 = ashr <2 x i64> %a, <i64 19, i64 19>
+  %vshrn_n = trunc <2 x i64> %1 to <2 x i32>
+  ret <2 x i32> %vshrn_n
+}
+
+define <8 x i8> @test_vshrn_n_u16(<8 x i16> %a) {
+; CHECK: test_vshrn_n_u16
+; CHECK: shrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
+  %1 = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
+  ret <8 x i8> %vshrn_n
+}
+
+define <4 x i16> @test_vshrn_n_u32(<4 x i32> %a) {
+; CHECK: test_vshrn_n_u32
+; CHECK: shrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
+  %1 = lshr <4 x i32> %a, <i32 9, i32 9, i32 9, i32 9>
+  %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
+  ret <4 x i16> %vshrn_n
+}
+
+define <2 x i32> @test_vshrn_n_u64(<2 x i64> %a) {
+; CHECK: test_vshrn_n_u64
+; CHECK: shrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
+  %1 = lshr <2 x i64> %a, <i64 19, i64 19>
+  %vshrn_n = trunc <2 x i64> %1 to <2 x i32>
+  ret <2 x i32> %vshrn_n
+}
+
+define <16 x i8> @test_vshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
+; CHECK: test_vshrn_high_n_s16
+; CHECK: shrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+  %1 = ashr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
+  %2 = bitcast <8 x i8> %a to <1 x i64>
+  %3 = bitcast <8 x i8> %vshrn_n to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
+  %4 = bitcast <2 x i64> %shuffle.i to <16 x i8>
+  ret <16 x i8> %4
+}
+
+define <8 x i16> @test_vshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
+; CHECK: test_vshrn_high_n_s32
+; CHECK: shrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+  %1 = ashr <4 x i32> %b, <i32 9, i32 9, i32 9, i32 9>
+  %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
+  %2 = bitcast <4 x i16> %a to <1 x i64>
+  %3 = bitcast <4 x i16> %vshrn_n to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
+  %4 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %4
+}
+
+define <4 x i32> @test_vshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
+; CHECK: test_vshrn_high_n_s64
+; CHECK: shrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+  %1 = bitcast <2 x i32> %a to <1 x i64>
+  %2 = ashr <2 x i64> %b, <i64 19, i64 19>
+  %vshrn_n = trunc <2 x i64> %2 to <2 x i32>
+  %3 = bitcast <2 x i32> %vshrn_n to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
+  %4 = bitcast <2 x i64> %shuffle.i to <4 x i32>
+  ret <4 x i32> %4
+}
+
+define <16 x i8> @test_vshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
+; CHECK: test_vshrn_high_n_u16
+; CHECK: shrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+  %1 = lshr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
+  %2 = bitcast <8 x i8> %a to <1 x i64>
+  %3 = bitcast <8 x i8> %vshrn_n to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
+  %4 = bitcast <2 x i64> %shuffle.i to <16 x i8>
+  ret <16 x i8> %4
+}
+
+define <8 x i16> @test_vshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
+; CHECK: test_vshrn_high_n_u32
+; CHECK: shrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+  %1 = lshr <4 x i32> %b, <i32 9, i32 9, i32 9, i32 9>
+  %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
+  %2 = bitcast <4 x i16> %a to <1 x i64>
+  %3 = bitcast <4 x i16> %vshrn_n to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
+  %4 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %4
+}
+
+define <4 x i32> @test_vshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
+; CHECK: test_vshrn_high_n_u64
+; CHECK: shrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+  %1 = bitcast <2 x i32> %a to <1 x i64>
+  %2 = lshr <2 x i64> %b, <i64 19, i64 19>
+  %vshrn_n = trunc <2 x i64> %2 to <2 x i32>
+  %3 = bitcast <2 x i32> %vshrn_n to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
+  %4 = bitcast <2 x i64> %shuffle.i to <4 x i32>
+  ret <4 x i32> %4
+}
+
+define <16 x i8> @test_vqshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
+; CHECK: test_vqshrun_high_n_s16
+; CHECK: sqshrun2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+  %vqshrun = tail call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %b, i32 3)
+  %1 = bitcast <8 x i8> %a to <1 x i64>
+  %2 = bitcast <8 x i8> %vqshrun to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
+  ret <16 x i8> %3
+}
+
+define <8 x i16> @test_vqshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
+; CHECK: test_vqshrun_high_n_s32
+; CHECK: sqshrun2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+  %vqshrun = tail call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %b, i32 9)
+  %1 = bitcast <4 x i16> %a to <1 x i64>
+  %2 = bitcast <4 x i16> %vqshrun to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %3
+}
+
+define <4 x i32> @test_vqshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
+; CHECK: test_vqshrun_high_n_s64
+; CHECK: sqshrun2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+  %1 = bitcast <2 x i32> %a to <1 x i64>
+  %vqshrun = tail call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %b, i32 19)
+  %2 = bitcast <2 x i32> %vqshrun to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
+  ret <4 x i32> %3
+}
+
+define <16 x i8> @test_vrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
+; CHECK: test_vrshrn_high_n_s16
+; CHECK: rshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+  %vrshrn = tail call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %b, i32 3)
+  %1 = bitcast <8 x i8> %a to <1 x i64>
+  %2 = bitcast <8 x i8> %vrshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
+  ret <16 x i8> %3
+}
+
+define <8 x i16> @test_vrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
+; CHECK: test_vrshrn_high_n_s32
+; CHECK: rshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+  %vrshrn = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %b, i32 9)
+  %1 = bitcast <4 x i16> %a to <1 x i64>
+  %2 = bitcast <4 x i16> %vrshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %3
+}
+
+define <4 x i32> @test_vrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
+; CHECK: test_vrshrn_high_n_s64
+; CHECK: rshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+  %1 = bitcast <2 x i32> %a to <1 x i64>
+  %vrshrn = tail call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %b, i32 19)
+  %2 = bitcast <2 x i32> %vrshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
+  ret <4 x i32> %3
+}
+
+define <16 x i8> @test_vqrshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
+; CHECK: test_vqrshrun_high_n_s16
+; CHECK: sqrshrun2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+  %vqrshrun = tail call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %b, i32 3)
+  %1 = bitcast <8 x i8> %a to <1 x i64>
+  %2 = bitcast <8 x i8> %vqrshrun to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
+  ret <16 x i8> %3
+}
+
+define <8 x i16> @test_vqrshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
+; CHECK: test_vqrshrun_high_n_s32
+; CHECK: sqrshrun2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+  %vqrshrun = tail call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %b, i32 9)
+  %1 = bitcast <4 x i16> %a to <1 x i64>
+  %2 = bitcast <4 x i16> %vqrshrun to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %3
+}
+
+define <4 x i32> @test_vqrshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
+; CHECK: test_vqrshrun_high_n_s64
+; CHECK: sqrshrun2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+  %1 = bitcast <2 x i32> %a to <1 x i64>
+  %vqrshrun = tail call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %b, i32 19)
+  %2 = bitcast <2 x i32> %vqrshrun to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
+  ret <4 x i32> %3
+}
+
+define <16 x i8> @test_vqshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
+; CHECK: test_vqshrn_high_n_s16
+; CHECK: sqshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+  %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %b, i32 3)
+  %1 = bitcast <8 x i8> %a to <1 x i64>
+  %2 = bitcast <8 x i8> %vqshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
+  ret <16 x i8> %3
+}
+
+define <8 x i16> @test_vqshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
+; CHECK: test_vqshrn_high_n_s32
+; CHECK: sqshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+  %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %b, i32 9)
+  %1 = bitcast <4 x i16> %a to <1 x i64>
+  %2 = bitcast <4 x i16> %vqshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %3
+}
+
+define <4 x i32> @test_vqshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
+; CHECK: test_vqshrn_high_n_s64
+; CHECK: sqshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+  %1 = bitcast <2 x i32> %a to <1 x i64>
+  %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %b, i32 19)
+  %2 = bitcast <2 x i32> %vqshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
+  ret <4 x i32> %3
+}
+
+define <16 x i8> @test_vqshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
+; CHECK: test_vqshrn_high_n_u16
+; CHECK: uqshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+  %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %b, i32 3)
+  %1 = bitcast <8 x i8> %a to <1 x i64>
+  %2 = bitcast <8 x i8> %vqshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
+  ret <16 x i8> %3
+}
+
+define <8 x i16> @test_vqshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
+; CHECK: test_vqshrn_high_n_u32
+; CHECK: uqshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+  %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %b, i32 9)
+  %1 = bitcast <4 x i16> %a to <1 x i64>
+  %2 = bitcast <4 x i16> %vqshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %3
+}
+
+define <4 x i32> @test_vqshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
+; CHECK: test_vqshrn_high_n_u64
+; CHECK: uqshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+  %1 = bitcast <2 x i32> %a to <1 x i64>
+  %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %b, i32 19)
+  %2 = bitcast <2 x i32> %vqshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
+  ret <4 x i32> %3
+}
+
+define <16 x i8> @test_vqrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
+; CHECK: test_vqrshrn_high_n_s16
+; CHECK: sqrshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+  %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %b, i32 3)
+  %1 = bitcast <8 x i8> %a to <1 x i64>
+  %2 = bitcast <8 x i8> %vqrshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
+  ret <16 x i8> %3
+}
+
+define <8 x i16> @test_vqrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
+; CHECK: test_vqrshrn_high_n_s32
+; CHECK: sqrshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+  %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %b, i32 9)
+  %1 = bitcast <4 x i16> %a to <1 x i64>
+  %2 = bitcast <4 x i16> %vqrshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %3
+}
+
+define <4 x i32> @test_vqrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
+; CHECK: test_vqrshrn_high_n_s64
+; CHECK: sqrshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+  %1 = bitcast <2 x i32> %a to <1 x i64>
+  %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %b, i32 19)
+  %2 = bitcast <2 x i32> %vqrshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
+  ret <4 x i32> %3
+}
+
+define <16 x i8> @test_vqrshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
+; CHECK: test_vqrshrn_high_n_u16
+; CHECK: uqrshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
+  %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %b, i32 3)
+  %1 = bitcast <8 x i8> %a to <1 x i64>
+  %2 = bitcast <8 x i8> %vqrshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
+  ret <16 x i8> %3
+}
+
+define <8 x i16> @test_vqrshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
+; CHECK: test_vqrshrn_high_n_u32
+; CHECK: uqrshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
+  %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %b, i32 9)
+  %1 = bitcast <4 x i16> %a to <1 x i64>
+  %2 = bitcast <4 x i16> %vqrshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %3
+}
+
+define <4 x i32> @test_vqrshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
+; CHECK: test_vqrshrn_high_n_u64
+; CHECK: uqrshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
+  %1 = bitcast <2 x i32> %a to <1 x i64>
+  %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %b, i32 19)
+  %2 = bitcast <2 x i32> %vqrshrn to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
+  ret <4 x i32> %3
+}
+
+
+
+declare <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16>, i32)
+
+declare <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32>, i32)
+
+declare <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64>, i32)
+
+declare <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16>, i32)
+
+declare <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32>, i32)
+
+declare <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64>, i32)
+
+declare <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16>, i32)
+
+declare <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32>, i32)
+
+declare <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64>, i32)
+
+declare <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16>, i32)
+
+declare <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32>, i32)
+
+declare <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64>, i32)
+
+declare <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16>, i32)
+
+declare <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32>, i32)
+
+declare <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64>, i32)
+
+declare <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16>, i32)
+
+declare <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32>, i32)
+
+declare <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64>, i32)
+
+declare <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16>, i32)
+
+declare <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32>, i32)
+
+declare <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64>, i32)
+
+declare <2 x float> @llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32)
+
+declare <4 x float> @llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32)
+
+declare <2 x double> @llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64>, i32)
+
+declare <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32)
+
+declare <4 x float> @llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32)
+
+declare <2 x double> @llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64>, i32)
+
+declare <2 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float>, i32)
+
+declare <4 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float>, i32)
+
+declare <2 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double>, i32)
+
+declare <2 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float>, i32)
+
+declare <4 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32)
+
+declare <2 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double>, i32)
+
+define <1 x i64> @test_vcvt_n_s64_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vcvt_n_s64_f64
+; CHECK: fcvtzs d{{[0-9]+}}, d{{[0-9]+}}, #64
+  %1 = tail call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double> %a, i32 64)
+  ret <1 x i64> %1
+}
+
+define <1 x i64> @test_vcvt_n_u64_f64(<1 x double> %a) {
+; CHECK-LABEL: test_vcvt_n_u64_f64
+; CHECK: fcvtzu d{{[0-9]+}}, d{{[0-9]+}}, #64
+  %1 = tail call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double> %a, i32 64)
+  ret <1 x i64> %1
+}
+
+define <1 x double> @test_vcvt_n_f64_s64(<1 x i64> %a) {
+; CHECK-LABEL: test_vcvt_n_f64_s64
+; CHECK: scvtf d{{[0-9]+}}, d{{[0-9]+}}, #64
+  %1 = tail call <1 x double> @llvm.aarch64.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64> %a, i32 64)
+  ret <1 x double> %1
+}
+
+define <1 x double> @test_vcvt_n_f64_u64(<1 x i64> %a) {
+; CHECK-LABEL: test_vcvt_n_f64_u64
+; CHECK: ucvtf d{{[0-9]+}}, d{{[0-9]+}}, #64
+  %1 = tail call <1 x double> @llvm.aarch64.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64> %a, i32 64)
+  ret <1 x double> %1
+}
+
+declare <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double>, i32)
+declare <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double>, i32)
+declare <1 x double> @llvm.aarch64.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64>, i32)
+declare <1 x double> @llvm.aarch64.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64>, i32)
diff --git a/test/CodeGen/AArch64/arm64-neon-simd-vget.ll b/test/CodeGen/AArch64/arm64-neon-simd-vget.ll
new file mode 100644
index 0000000..87f3956
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-neon-simd-vget.ll
@@ -0,0 +1,225 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+define <8 x i8> @test_vget_high_s8(<16 x i8> %a) {
+; CHECK-LABEL: test_vget_high_s8:
+; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret <8 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vget_high_s16(<8 x i16> %a) {
+; CHECK-LABEL: test_vget_high_s16:
+; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  ret <4 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vget_high_s32(<4 x i32> %a) {
+; CHECK-LABEL: test_vget_high_s32:
+; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  ret <2 x i32> %shuffle.i
+}
+
+define <1 x i64> @test_vget_high_s64(<2 x i64> %a) {
+; CHECK-LABEL: test_vget_high_s64:
+; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> <i32 1>
+  ret <1 x i64> %shuffle.i
+}
+
+define <8 x i8> @test_vget_high_u8(<16 x i8> %a) {
+; CHECK-LABEL: test_vget_high_u8:
+; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret <8 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vget_high_u16(<8 x i16> %a) {
+; CHECK-LABEL: test_vget_high_u16:
+; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  ret <4 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vget_high_u32(<4 x i32> %a) {
+; CHECK-LABEL: test_vget_high_u32:
+; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  ret <2 x i32> %shuffle.i
+}
+
+define <1 x i64> @test_vget_high_u64(<2 x i64> %a) {
+; CHECK-LABEL: test_vget_high_u64:
+; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> <i32 1>
+  ret <1 x i64> %shuffle.i
+}
+
+define <1 x i64> @test_vget_high_p64(<2 x i64> %a) {
+; CHECK-LABEL: test_vget_high_p64:
+; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> <i32 1>
+  ret <1 x i64> %shuffle.i
+}
+
+define <4 x i16> @test_vget_high_f16(<8 x i16> %a) {
+; CHECK-LABEL: test_vget_high_f16:
+; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  ret <4 x i16> %shuffle.i
+}
+
+define <2 x float> @test_vget_high_f32(<4 x float> %a) {
+; CHECK-LABEL: test_vget_high_f32:
+; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
+entry:
+  %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 2, i32 3>
+  ret <2 x float> %shuffle.i
+}
+
+define <8 x i8> @test_vget_high_p8(<16 x i8> %a) {
+; CHECK-LABEL: test_vget_high_p8:
+; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret <8 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vget_high_p16(<8 x i16> %a) {
+; CHECK-LABEL: test_vget_high_p16:
+; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  ret <4 x i16> %shuffle.i
+}
+
+define <1 x double> @test_vget_high_f64(<2 x double> %a) {
+; CHECK-LABEL: test_vget_high_f64:
+; CHECK: ext v0.16b, v0.16b, {{v[0-9]+}}.16b, #8
+entry:
+  %shuffle.i = shufflevector <2 x double> %a, <2 x double> undef, <1 x i32> <i32 1>
+  ret <1 x double> %shuffle.i
+}
+
+define <8 x i8> @test_vget_low_s8(<16 x i8> %a) {
+; CHECK-LABEL: test_vget_low_s8:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vget_low_s16(<8 x i16> %a) {
+; CHECK-LABEL: test_vget_low_s16:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vget_low_s32(<4 x i32> %a) {
+; CHECK-LABEL: test_vget_low_s32:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x i32> %shuffle.i
+}
+
+define <1 x i64> @test_vget_low_s64(<2 x i64> %a) {
+; CHECK-LABEL: test_vget_low_s64:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> zeroinitializer
+  ret <1 x i64> %shuffle.i
+}
+
+define <8 x i8> @test_vget_low_u8(<16 x i8> %a) {
+; CHECK-LABEL: test_vget_low_u8:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vget_low_u16(<8 x i16> %a) {
+; CHECK-LABEL: test_vget_low_u16:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i16> %shuffle.i
+}
+
+define <2 x i32> @test_vget_low_u32(<4 x i32> %a) {
+; CHECK-LABEL: test_vget_low_u32:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x i32> %shuffle.i
+}
+
+define <1 x i64> @test_vget_low_u64(<2 x i64> %a) {
+; CHECK-LABEL: test_vget_low_u64:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> zeroinitializer
+  ret <1 x i64> %shuffle.i
+}
+
+define <1 x i64> @test_vget_low_p64(<2 x i64> %a) {
+; CHECK-LABEL: test_vget_low_p64:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> zeroinitializer
+  ret <1 x i64> %shuffle.i
+}
+
+define <4 x i16> @test_vget_low_f16(<8 x i16> %a) {
+; CHECK-LABEL: test_vget_low_f16:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i16> %shuffle.i
+}
+
+define <2 x float> @test_vget_low_f32(<4 x float> %a) {
+; CHECK-LABEL: test_vget_low_f32:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x float> %shuffle.i
+}
+
+define <8 x i8> @test_vget_low_p8(<16 x i8> %a) {
+; CHECK-LABEL: test_vget_low_p8:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i8> %shuffle.i
+}
+
+define <4 x i16> @test_vget_low_p16(<8 x i16> %a) {
+; CHECK-LABEL: test_vget_low_p16:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i16> %shuffle.i
+}
+
+define <1 x double> @test_vget_low_f64(<2 x double> %a) {
+; CHECK-LABEL: test_vget_low_f64:
+; CHECK: ret
+entry:
+  %shuffle.i = shufflevector <2 x double> %a, <2 x double> undef, <1 x i32> zeroinitializer
+  ret <1 x double> %shuffle.i
+}
diff --git a/test/CodeGen/AArch64/arm64-neon-v1i1-setcc.ll b/test/CodeGen/AArch64/arm64-neon-v1i1-setcc.ll
new file mode 100644
index 0000000..74e3af8
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-neon-v1i1-setcc.ll
@@ -0,0 +1,74 @@
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=arm64-none-linux-gnu | FileCheck %s
+
+; This is the analogue of AArch64's file of the same name. It's mostly testing
+; some form of correct lowering occurs, the tests are a little artificial but I
+; strongly suspect there's room for improved CodeGen (FIXME).
+
+define i64 @test_sext_extr_cmp_0(<1 x i64> %v1, <1 x i64> %v2) {
+; CHECK-LABEL: test_sext_extr_cmp_0:
+; CHECK: cmp {{x[0-9]+}}, {{x[0-9]+}}
+; CHECK: cset
+  %1 = icmp sge <1 x i64> %v1, %v2
+  %2 = extractelement <1 x i1> %1, i32 0
+  %vget_lane = sext i1 %2 to i64
+  ret i64 %vget_lane
+}
+
+define i64 @test_sext_extr_cmp_1(<1 x double> %v1, <1 x double> %v2) {
+; CHECK-LABEL: test_sext_extr_cmp_1:
+; CHECK: fcmp {{d[0-9]+}}, {{d[0-9]+}}
+  %1 = fcmp oeq <1 x double> %v1, %v2
+  %2 = extractelement <1 x i1> %1, i32 0
+  %vget_lane = sext i1 %2 to i64
+  ret i64 %vget_lane
+}
+
+define <1 x i64> @test_select_v1i1_0(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) {
+; CHECK-LABEL: test_select_v1i1_0:
+; CHECK: cmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+; CHECK: bic v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+  %1 = icmp eq <1 x i64> %v1, %v2
+  %res = select <1 x i1> %1, <1 x i64> zeroinitializer, <1 x i64> %v3
+  ret <1 x i64> %res
+}
+
+define <1 x i64> @test_select_v1i1_1(<1 x double> %v1, <1 x double> %v2, <1 x i64> %v3) {
+; CHECK-LABEL: test_select_v1i1_1:
+; CHECK: fcmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+; CHECK: bic v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+  %1 = fcmp oeq <1 x double> %v1, %v2
+  %res = select <1 x i1> %1, <1 x i64> zeroinitializer, <1 x i64> %v3
+  ret <1 x i64> %res
+}
+
+define <1 x double> @test_select_v1i1_2(<1 x i64> %v1, <1 x i64> %v2, <1 x double> %v3) {
+; CHECK-LABEL: test_select_v1i1_2:
+; CHECK: cmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+; CHECK: bic v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+  %1 = icmp eq <1 x i64> %v1, %v2
+  %res = select <1 x i1> %1, <1 x double> zeroinitializer, <1 x double> %v3
+  ret <1 x double> %res
+}
+
+define <1 x i64> @test_select_v1i1_3(i64 %lhs, i64 %rhs, <1 x i64> %v3) {
+; CHECK-LABEL: test_select_v1i1_3:
+; CHECK: cmp {{x[0-9]+}}, {{x[0-9]+}}
+  %tst = icmp eq i64 %lhs, %rhs
+  %evil = insertelement <1 x i1> undef, i1 %tst, i32 0
+  %res = select <1 x i1> %evil, <1 x i64> zeroinitializer, <1 x i64> %v3
+  ret <1 x i64> %res
+}
+
+define i32 @test_br_extr_cmp(<1 x i64> %v1, <1 x i64> %v2) {
+; CHECK-LABEL: test_br_extr_cmp:
+; CHECK: cmp x{{[0-9]+}}, x{{[0-9]+}}
+  %1 = icmp eq <1 x i64> %v1, %v2
+  %2 = extractelement <1 x i1> %1, i32 0
+  br i1 %2, label %if.end, label %if.then
+
+if.then:
+  ret i32 0;
+
+if.end:
+  ret i32 1;
+}
diff --git a/test/CodeGen/AArch64/arm64-neon-vector-list-spill.ll b/test/CodeGen/AArch64/arm64-neon-vector-list-spill.ll
new file mode 100644
index 0000000..8262fe4
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-neon-vector-list-spill.ll
@@ -0,0 +1,175 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
+
+; FIXME: We should not generate ld/st for such register spill/fill, because the
+; test case seems very simple and the register pressure is not high. If the
+; spill/fill algorithm is optimized, this test case may not be triggered. And
+; then we can delete it.
+define i32 @spill.DPairReg(i32* %arg1, i32 %arg2) {
+; CHECK-LABEL: spill.DPairReg:
+; CHECK: ld2 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}]
+; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
+; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
+entry:
+  %vld = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0i32(i32* %arg1)
+  %cmp = icmp eq i32 %arg2, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  tail call void @foo()
+  br label %if.end
+
+if.end:
+  %vld.extract = extractvalue { <2 x i32>, <2 x i32> } %vld, 0
+  %res = extractelement <2 x i32> %vld.extract, i32 1
+  ret i32 %res
+}
+
+define i16 @spill.DTripleReg(i16* %arg1, i32 %arg2) {
+; CHECK-LABEL: spill.DTripleReg:
+; CHECK: ld3 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h }, [{{x[0-9]+|sp}}]
+; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
+; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
+entry:
+  %vld = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0i16(i16* %arg1)
+  %cmp = icmp eq i32 %arg2, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  tail call void @foo()
+  br label %if.end
+
+if.end:
+  %vld.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld, 0
+  %res = extractelement <4 x i16> %vld.extract, i32 1
+  ret i16 %res
+}
+
+define i16 @spill.DQuadReg(i16* %arg1, i32 %arg2) {
+; CHECK-LABEL: spill.DQuadReg:
+; CHECK: ld4 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h }, [{{x[0-9]+|sp}}]
+; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
+; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
+entry:
+  %vld = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0i16(i16* %arg1)
+  %cmp = icmp eq i32 %arg2, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  tail call void @foo()
+  br label %if.end
+
+if.end:
+  %vld.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld, 0
+  %res = extractelement <4 x i16> %vld.extract, i32 0
+  ret i16 %res
+}
+
+define i32 @spill.QPairReg(i32* %arg1, i32 %arg2) {
+; CHECK-LABEL: spill.QPairReg:
+; CHECK: ld2 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}]
+; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
+; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
+entry:
+  %vld = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0i32(i32* %arg1)
+  %cmp = icmp eq i32 %arg2, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  tail call void @foo()
+  br label %if.end
+
+if.end:
+  %vld.extract = extractvalue { <4 x i32>, <4 x i32> } %vld, 0
+  %res = extractelement <4 x i32> %vld.extract, i32 1
+  ret i32 %res
+}
+
+define float @spill.QTripleReg(float* %arg1, i32 %arg2) {
+; CHECK-LABEL: spill.QTripleReg:
+; CHECK: ld3 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}]
+; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
+; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
+entry:
+  %vld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0f32(float* %arg1)
+  %cmp = icmp eq i32 %arg2, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  tail call void @foo()
+  br label %if.end
+
+if.end:
+  %vld3.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3, 0
+  %res = extractelement <4 x float> %vld3.extract, i32 1
+  ret float %res
+}
+
+define i8 @spill.QQuadReg(i8* %arg1, i32 %arg2) {
+; CHECK-LABEL: spill.QQuadReg:
+; CHECK: ld4 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b }, [{{x[0-9]+|sp}}]
+; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
+; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}]
+entry:
+  %vld = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0i8(i8* %arg1)
+  %cmp = icmp eq i32 %arg2, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  tail call void @foo()
+  br label %if.end
+
+if.end:
+  %vld.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld, 0
+  %res = extractelement <16 x i8> %vld.extract, i32 1
+  ret i8 %res
+}
+
+declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0i32(i32*)
+declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0i16(i16*)
+declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0i16(i16*)
+declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0i32(i32*)
+declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0f32(float*)
+declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0i8(i8*)
+
+declare void @foo()
+
+; FIXME: We should not generate ld/st for such register spill/fill, because the
+; test case seems very simple and the register pressure is not high. If the
+; spill/fill algorithm is optimized, this test case may not be triggered. And
+; then we can delete it.
+; check the spill for Register Class QPair_with_qsub_0_in_FPR128Lo
+define <8 x i16> @test_2xFPR128Lo(i64 %got, i64* %ptr, <1 x i64> %a) {
+  tail call void @llvm.aarch64.neon.st2lane.v1i64.p0i64(<1 x i64> zeroinitializer, <1 x i64> zeroinitializer, i64 0, i64* %ptr)
+  tail call void @foo()
+  %sv = shufflevector <1 x i64> zeroinitializer, <1 x i64> %a, <2 x i32> <i32 0, i32 1>
+  %1 = bitcast <2 x i64> %sv to <8 x i16>
+  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  %3 = mul <8 x i16> %2, %2
+  ret <8 x i16> %3
+}
+
+; check the spill for Register Class QTriple_with_qsub_0_in_FPR128Lo
+define <8 x i16> @test_3xFPR128Lo(i64 %got, i64* %ptr, <1 x i64> %a) {
+  tail call void @llvm.aarch64.neon.st3lane.v1i64.p0i64(<1 x i64> zeroinitializer, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, i64 0, i64* %ptr)
+  tail call void @foo()
+  %sv = shufflevector <1 x i64> zeroinitializer, <1 x i64> %a, <2 x i32> <i32 0, i32 1>
+  %1 = bitcast <2 x i64> %sv to <8 x i16>
+  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  %3 = mul <8 x i16> %2, %2
+  ret <8 x i16> %3
+}
+
+; check the spill for Register Class QQuad_with_qsub_0_in_FPR128Lo
+define <8 x i16> @test_4xFPR128Lo(i64 %got, i64* %ptr, <1 x i64> %a) {
+  tail call void @llvm.aarch64.neon.st4lane.v1i64.p0i64(<1 x i64> zeroinitializer, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, i64 0, i64* %ptr)
+  tail call void @foo()
+  %sv = shufflevector <1 x i64> zeroinitializer, <1 x i64> %a, <2 x i32> <i32 0, i32 1>
+  %1 = bitcast <2 x i64> %sv to <8 x i16>
+  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  %3 = mul <8 x i16> %2, %2
+  ret <8 x i16> %3
+}
+
+declare void @llvm.aarch64.neon.st2lane.v1i64.p0i64(<1 x i64>, <1 x i64>, i64, i64*)
+declare void @llvm.aarch64.neon.st3lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64, i64*)
+declare void @llvm.aarch64.neon.st4lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64, i64*)
diff --git a/test/CodeGen/AArch64/arm64-patchpoint.ll b/test/CodeGen/AArch64/arm64-patchpoint.ll
new file mode 100644
index 0000000..039cdfc
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-patchpoint.ll
@@ -0,0 +1,171 @@
+; RUN: llc < %s -mtriple=arm64-apple-darwin -enable-misched=0 -mcpu=cyclone | FileCheck %s
+
+; Trivial patchpoint codegen
+;
+define i64 @trivial_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
+entry:
+; CHECK-LABEL: trivial_patchpoint_codegen:
+; CHECK:       movz x16, #0xdead, lsl #32
+; CHECK-NEXT:  movk x16, #0xbeef, lsl #16
+; CHECK-NEXT:  movk x16, #0xcafe
+; CHECK-NEXT:  blr  x16
+; CHECK:       movz x16, #0xdead, lsl #32
+; CHECK-NEXT:  movk x16, #0xbeef, lsl #16
+; CHECK-NEXT:  movk x16, #0xcaff
+; CHECK-NEXT:  blr  x16
+; CHECK:       ret
+  %resolveCall2 = inttoptr i64 244837814094590 to i8*
+  %result = tail call i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 2, i32 20, i8* %resolveCall2, i32 4, i64 %p1, i64 %p2, i64 %p3, i64 %p4)
+  %resolveCall3 = inttoptr i64 244837814094591 to i8*
+  tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 3, i32 20, i8* %resolveCall3, i32 2, i64 %p1, i64 %result)
+  ret i64 %result
+}
+
+; Caller frame metadata with stackmaps. This should not be optimized
+; as a leaf function.
+;
+; CHECK-LABEL: caller_meta_leaf
+; CHECK:       mov x29, sp
+; CHECK-NEXT:  sub sp, sp, #32
+; CHECK:       Ltmp
+; CHECK:       mov sp, x29
+; CHECK:       ret
+
+define void @caller_meta_leaf() {
+entry:
+  %metadata = alloca i64, i32 3, align 8
+  store i64 11, i64* %metadata
+  store i64 12, i64* %metadata
+  store i64 13, i64* %metadata
+  call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 4, i32 0, i64* %metadata)
+  ret void
+}
+
+; Test the webkit_jscc calling convention.
+; One argument will be passed in register, the other will be pushed on the stack.
+; Return value in x0.
+define void @jscall_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
+entry:
+; CHECK-LABEL: jscall_patchpoint_codegen:
+; CHECK:      Ltmp
+; CHECK:      str x{{.+}}, [sp]
+; CHECK-NEXT: mov  x0, x{{.+}}
+; CHECK:      Ltmp
+; CHECK-NEXT: movz  x16, #0xffff, lsl #32
+; CHECK-NEXT: movk  x16, #0xdead, lsl #16
+; CHECK-NEXT: movk  x16, #0xbeef
+; CHECK-NEXT: blr x16
+  %resolveCall2 = inttoptr i64 281474417671919 to i8*
+  %result = tail call webkit_jscc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 5, i32 20, i8* %resolveCall2, i32 2, i64 %p4, i64 %p2)
+  %resolveCall3 = inttoptr i64 244837814038255 to i8*
+  tail call webkit_jscc void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 6, i32 20, i8* %resolveCall3, i32 2, i64 %p4, i64 %result)
+  ret void
+}
+
+; Test if the arguments are properly aligned and that we don't store undef arguments.
+define i64 @jscall_patchpoint_codegen2(i64 %callee) {
+entry:
+; CHECK-LABEL: jscall_patchpoint_codegen2:
+; CHECK:      Ltmp
+; CHECK:      orr w{{.+}}, wzr, #0x6
+; CHECK-NEXT: str x{{.+}}, [sp, #24]
+; CHECK-NEXT: orr w{{.+}}, wzr, #0x4
+; CHECK-NEXT: str w{{.+}}, [sp, #16]
+; CHECK-NEXT: orr w{{.+}}, wzr, #0x2
+; CHECK-NEXT: str x{{.+}}, [sp]
+; CHECK:      Ltmp
+; CHECK-NEXT: movz  x16, #0xffff, lsl #32
+; CHECK-NEXT: movk  x16, #0xdead, lsl #16
+; CHECK-NEXT: movk  x16, #0xbeef
+; CHECK-NEXT: blr x16
+  %call = inttoptr i64 281474417671919 to i8*
+  %result = call webkit_jscc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 7, i32 20, i8* %call, i32 6, i64 %callee, i64 2, i64 undef, i32 4, i32 undef, i64 6)
+  ret i64 %result
+}
+
+; Test if the arguments are properly aligned and that we don't store undef arguments.
+define i64 @jscall_patchpoint_codegen3(i64 %callee) {
+entry:
+; CHECK-LABEL: jscall_patchpoint_codegen3:
+; CHECK:      Ltmp
+; CHECK:      movz  w{{.+}}, #0xa
+; CHECK-NEXT: str x{{.+}}, [sp, #48]
+; CHECK-NEXT: orr w{{.+}}, wzr, #0x8
+; CHECK-NEXT: str w{{.+}}, [sp, #36]
+; CHECK-NEXT: orr w{{.+}}, wzr, #0x6
+; CHECK-NEXT: str x{{.+}}, [sp, #24]
+; CHECK-NEXT: orr w{{.+}}, wzr, #0x4
+; CHECK-NEXT: str w{{.+}}, [sp, #16]
+; CHECK-NEXT: orr w{{.+}}, wzr, #0x2
+; CHECK-NEXT: str x{{.+}}, [sp]
+; CHECK:      Ltmp
+; CHECK-NEXT: movz  x16, #0xffff, lsl #32
+; CHECK-NEXT: movk  x16, #0xdead, lsl #16
+; CHECK-NEXT: movk  x16, #0xbeef
+; CHECK-NEXT: blr x16
+  %call = inttoptr i64 281474417671919 to i8*
+  %result = call webkit_jscc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 7, i32 20, i8* %call, i32 10, i64 %callee, i64 2, i64 undef, i32 4, i32 undef, i64 6, i32 undef, i32 8, i32 undef, i64 10)
+  ret i64 %result
+}
+
+; Test patchpoints reusing the same TargetConstant.
+; <rdar:15390785> Assertion failed: (CI.getNumArgOperands() >= NumArgs + 4)
+; There is no way to verify this, since it depends on memory allocation.
+; But I think it's useful to include as a working example.
+define i64 @testLowerConstant(i64 %arg, i64 %tmp2, i64 %tmp10, i64* %tmp33, i64 %tmp79) {
+entry:
+  %tmp80 = add i64 %tmp79, -16
+  %tmp81 = inttoptr i64 %tmp80 to i64*
+  %tmp82 = load i64* %tmp81, align 8
+  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 14, i32 8, i64 %arg, i64 %tmp2, i64 %tmp10, i64 %tmp82)
+  tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 15, i32 32, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp82)
+  %tmp83 = load i64* %tmp33, align 8
+  %tmp84 = add i64 %tmp83, -24
+  %tmp85 = inttoptr i64 %tmp84 to i64*
+  %tmp86 = load i64* %tmp85, align 8
+  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 17, i32 8, i64 %arg, i64 %tmp10, i64 %tmp86)
+  tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 18, i32 32, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp86)
+  ret i64 10
+}
+
+; Test small patchpoints that don't emit calls.
+define void @small_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
+entry:
+; CHECK-LABEL: small_patchpoint_codegen:
+; CHECK:      Ltmp
+; CHECK:      nop
+; CHECK-NEXT: nop
+; CHECK-NEXT: nop
+; CHECK-NEXT: nop
+; CHECK-NEXT: nop
+; CHECK-NEXT: ldp
+; CHECK-NEXT: ret
+  %result = tail call i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 5, i32 20, i8* null, i32 2, i64 %p1, i64 %p2)
+  ret void
+}
+
+; Test that scratch registers are spilled around patchpoints
+; CHECK: InlineAsm End
+; CHECK-NEXT: mov x{{[0-9]+}}, x16
+; CHECK-NEXT: mov x{{[0-9]+}}, x17
+; CHECK-NEXT: Ltmp
+; CHECK-NEXT: nop
+define void @clobberScratch(i32* %p) {
+  %v = load i32* %p
+  tail call void asm sideeffect "nop", "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{x29},~{x30},~{x31}"() nounwind
+  tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 5, i32 20, i8* null, i32 0, i32* %p, i32 %v)
+  store i32 %v, i32* %p
+  ret void
+}
+
+declare void @llvm.experimental.stackmap(i64, i32, ...)
+declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...)
+declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...)
+
+; CHECK-LABEL: test_i16:
+; CHECK: ldrh [[BREG:w[0-9]+]], [sp]
+; CHECK: add w0, w0, [[BREG]]
+define webkit_jscc i16 @test_i16(i16 zeroext %a, i16 zeroext %b) {
+  %sum = add i16 %a, %b
+  ret i16 %sum
+}
diff --git a/test/CodeGen/AArch64/arm64-pic-local-symbol.ll b/test/CodeGen/AArch64/arm64-pic-local-symbol.ll
new file mode 100644
index 0000000..627e741
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-pic-local-symbol.ll
@@ -0,0 +1,22 @@
+; RUN: llc -mtriple=arm64-unknown-linux-gnu -relocation-model=pic < %s | FileCheck %s
+
+@a = internal unnamed_addr global i32 0, align 4
+@.str = private unnamed_addr constant [6 x i8] c"test\0A\00", align 1
+
+define i32 @get() {
+; CHECK: get:
+; CHECK: adrp x{{[0-9]+}}, a
+; CHECK-NEXT: ldr w{{[0-9]+}}, [x{{[0-9]}}, :lo12:a]
+  %res = load i32* @a, align 4
+  ret i32 %res
+}
+
+define void @foo() nounwind {
+; CHECK: foo:
+; CHECK: adrp x{{[0-9]}}, .L.str
+; CHECK-NEXT: add x{{[0-9]}}, x{{[0-9]}}, :lo12:.L.str
+  tail call void @bar(i8* getelementptr inbounds ([6 x i8]* @.str, i64 0, i64 0))
+  ret void
+}
+
+declare void @bar(i8*)
diff --git a/test/CodeGen/ARM64/platform-reg.ll b/test/CodeGen/AArch64/arm64-platform-reg.ll
index 651c793..651c793 100644
--- a/test/CodeGen/ARM64/platform-reg.ll
+++ b/test/CodeGen/AArch64/arm64-platform-reg.ll
diff --git a/test/CodeGen/AArch64/arm64-popcnt.ll b/test/CodeGen/AArch64/arm64-popcnt.ll
new file mode 100644
index 0000000..2afade2
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-popcnt.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+define i32 @cnt32_advsimd(i32 %x) nounwind readnone {
+  %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
+  ret i32 %cnt
+; CHECK: fmov	s0, w0
+; CHECK: cnt.8b	v0, v0
+; CHECK: uaddlv.8b	h0, v0
+; CHECK: fmov w0, s0
+; CHECK: ret
+}
+
+define i64 @cnt64_advsimd(i64 %x) nounwind readnone {
+  %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
+  ret i64 %cnt
+; CHECK: fmov	d0, x0
+; CHECK: cnt.8b	v0, v0
+; CHECK: uaddlv.8b	h0, v0
+; CHECK: fmov	w0, s0
+; CHECK: ret
+}
+
+; Do not use AdvSIMD when -mno-implicit-float is specified.
+; rdar://9473858
+
+define i32 @cnt32(i32 %x) nounwind readnone noimplicitfloat {
+  %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
+  ret i32 %cnt
+; CHECK-LABEL: cnt32:
+; CHECK-NOT 16b
+; CHECK: ret
+}
+
+define i64 @cnt64(i64 %x) nounwind readnone noimplicitfloat {
+  %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
+  ret i64 %cnt
+; CHECK-LABEL: cnt64:
+; CHECK-NOT 16b
+; CHECK: ret
+}
+
+declare i32 @llvm.ctpop.i32(i32) nounwind readnone
+declare i64 @llvm.ctpop.i64(i64) nounwind readnone
diff --git a/test/CodeGen/ARM64/prefetch.ll b/test/CodeGen/AArch64/arm64-prefetch.ll
index b2e06ed..b2e06ed 100644
--- a/test/CodeGen/ARM64/prefetch.ll
+++ b/test/CodeGen/AArch64/arm64-prefetch.ll
diff --git a/test/CodeGen/AArch64/arm64-promote-const.ll b/test/CodeGen/AArch64/arm64-promote-const.ll
new file mode 100644
index 0000000..380ff55
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-promote-const.ll
@@ -0,0 +1,255 @@
+; Disable machine cse to stress the different path of the algorithm.
+; Otherwise, we always fall in the simple case, i.e., only one definition.
+; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -disable-machine-cse -aarch64-stress-promote-const -mcpu=cyclone | FileCheck -check-prefix=PROMOTED %s
+; The REGULAR run just checks that the inputs passed to promote const expose
+; the appropriate patterns.
+; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -disable-machine-cse -aarch64-promote-const=false -mcpu=cyclone | FileCheck -check-prefix=REGULAR %s
+
+%struct.uint8x16x4_t = type { [4 x <16 x i8>] }
+
+; Constant is a structure
+define %struct.uint8x16x4_t @test1() {
+; PROMOTED-LABEL: test1:
+; Promote constant has created a big constant for the whole structure
+; PROMOTED: adrp [[PAGEADDR:x[0-9]+]], __PromotedConst@PAGE
+; PROMOTED: add [[BASEADDR:x[0-9]+]], [[PAGEADDR]], __PromotedConst@PAGEOFF
+; Destination registers are defined by the ABI
+; PROMOTED-NEXT: ldp q0, q1, {{\[}}[[BASEADDR]]]
+; PROMOTED-NEXT: ldp q2, q3, {{\[}}[[BASEADDR]], #32]
+; PROMOTED-NEXT: ret
+
+; REGULAR-LABEL: test1:
+; Regular access is quite bad, it performs 4 loads, one for each chunk of
+; the structure
+; REGULAR: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL:lCP.*]]@PAGE
+; Destination registers are defined by the ABI
+; REGULAR: ldr q0, {{\[}}[[PAGEADDR]], [[CSTLABEL]]@PAGEOFF]
+; REGULAR: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL:lCP.*]]@PAGE
+; REGULAR: ldr q1, {{\[}}[[PAGEADDR]], [[CSTLABEL]]@PAGEOFF]
+; REGULAR: adrp [[PAGEADDR2:x[0-9]+]], [[CSTLABEL2:lCP.*]]@PAGE
+; REGULAR: ldr q2, {{\[}}[[PAGEADDR2]], [[CSTLABEL2]]@PAGEOFF]
+; REGULAR: adrp [[PAGEADDR3:x[0-9]+]], [[CSTLABEL3:lCP.*]]@PAGE
+; REGULAR: ldr q3, {{\[}}[[PAGEADDR3]], [[CSTLABEL3]]@PAGEOFF]
+; REGULAR-NEXT: ret
+entry:
+  ret %struct.uint8x16x4_t { [4 x <16 x i8>] [<16 x i8> <i8 -40, i8 -93, i8 -118, i8 -99, i8 -75, i8 -105, i8 74, i8 -110, i8 62, i8 -115, i8 -119, i8 -120, i8 34, i8 -124, i8 0, i8 -128>, <16 x i8> <i8 32, i8 124, i8 121, i8 120, i8 8, i8 117, i8 -56, i8 113, i8 -76, i8 110, i8 -53, i8 107, i8 7, i8 105, i8 103, i8 102>, <16 x i8> <i8 -24, i8 99, i8 -121, i8 97, i8 66, i8 95, i8 24, i8 93, i8 6, i8 91, i8 12, i8 89, i8 39, i8 87, i8 86, i8 85>, <16 x i8> <i8 -104, i8 83, i8 -20, i8 81, i8 81, i8 80, i8 -59, i8 78, i8 73, i8 77, i8 -37, i8 75, i8 122, i8 74, i8 37, i8 73>] }
+}
+
+; Two different uses of the same constant in the same basic block
+define <16 x i8> @test2(<16 x i8> %arg) {
+entry:
+; PROMOTED-LABEL: test2:
+; In stress mode, constant vector are promoted
+; PROMOTED: adrp [[PAGEADDR:x[0-9]+]], [[CSTV1:__PromotedConst[0-9]+]]@PAGE
+; PROMOTED: add [[BASEADDR:x[0-9]+]], [[PAGEADDR]], [[CSTV1]]@PAGEOFF
+; PROMOTED: ldr q[[REGNUM:[0-9]+]], {{\[}}[[BASEADDR]]]
+; Destination register is defined by ABI
+; PROMOTED-NEXT: add.16b v0, v0, v[[REGNUM]]
+; PROMOTED-NEXT: mla.16b v0, v0, v[[REGNUM]]
+; PROMOTED-NEXT: ret
+
+; REGULAR-LABEL: test2:
+; Regular access is strickly the same as promoted access.
+; The difference is that the address (and thus the space in memory) is not
+; shared between constants
+; REGULAR: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL:lCP.*]]@PAGE
+; REGULAR: ldr q[[REGNUM:[0-9]+]], {{\[}}[[PAGEADDR]], [[CSTLABEL]]@PAGEOFF]
+; Destination register is defined by ABI
+; REGULAR-NEXT: add.16b v0, v0, v[[REGNUM]]
+; REGULAR-NEXT: mla.16b v0, v0, v[[REGNUM]]
+; REGULAR-NEXT: ret
+  %add.i = add <16 x i8> %arg, <i8 -40, i8 -93, i8 -118, i8 -99, i8 -75, i8 -105, i8 74, i8 -110, i8 62, i8 -115, i8 -119, i8 -120, i8 34, i8 -124, i8 0, i8 -128>
+  %mul.i = mul <16 x i8> %add.i, <i8 -40, i8 -93, i8 -118, i8 -99, i8 -75, i8 -105, i8 74, i8 -110, i8 62, i8 -115, i8 -119, i8 -120, i8 34, i8 -124, i8 0, i8 -128>
+  %add.i9 = add <16 x i8> %add.i, %mul.i
+  ret <16 x i8> %add.i9
+}
+
+; Two different uses of the sane constant in two different basic blocks,
+; one dominates the other
+define <16 x i8> @test3(<16 x i8> %arg, i32 %path) {
+; PROMOTED-LABEL: test3:
+; In stress mode, constant vector are promoted
+; Since, the constant is the same as the previous function,
+; the same address must be used
+; PROMOTED: adrp [[PAGEADDR:x[0-9]+]], [[CSTV1]]@PAGE
+; PROMOTED: add [[BASEADDR:x[0-9]+]], [[PAGEADDR]], [[CSTV1]]@PAGEOFF
+; PROMOTED-NEXT: ldr q[[REGNUM:[0-9]+]], {{\[}}[[BASEADDR]]]
+; Destination register is defined by ABI
+; PROMOTED-NEXT: add.16b v0, v0, v[[REGNUM]]
+; PROMOTED-NEXT: cbnz w0, [[LABEL:LBB.*]]
+; Next BB
+; PROMOTED: adrp [[PAGEADDR:x[0-9]+]], [[CSTV2:__PromotedConst[0-9]+]]@PAGE
+; PROMOTED: add [[BASEADDR:x[0-9]+]], [[PAGEADDR]], [[CSTV2]]@PAGEOFF
+; PROMOTED-NEXT: ldr q[[REGNUM]], {{\[}}[[BASEADDR]]]
+; Next BB
+; PROMOTED-NEXT: [[LABEL]]:
+; PROMOTED-NEXT: mul.16b [[DESTV:v[0-9]+]], v0, v[[REGNUM]]
+; PROMOTED-NEXT: add.16b v0, v0, [[DESTV]]
+; PROMOTED-NEXT: ret
+
+; REGULAR-LABEL: test3:
+; Regular mode does not elimitate common sub expression by its own.
+; In other words, the same loads appears several times.
+; REGULAR: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL1:lCP.*]]@PAGE
+; REGULAR-NEXT: ldr q[[REGNUM:[0-9]+]], {{\[}}[[PAGEADDR]], [[CSTLABEL1]]@PAGEOFF]
+; Destination register is defined by ABI
+; REGULAR-NEXT: add.16b v0, v0, v[[REGNUM]]
+; REGULAR-NEXT: cbz w0, [[LABELelse:LBB.*]]
+; Next BB
+; Redundant load
+; REGULAR: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL1]]@PAGE
+; REGULAR-NEXT: ldr q[[REGNUM]], {{\[}}[[PAGEADDR]], [[CSTLABEL1]]@PAGEOFF]
+; REGULAR-NEXT: b [[LABELend:LBB.*]]
+; Next BB
+; REGULAR-NEXT: [[LABELelse]]
+; REGULAR-NEXT: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL2:lCP.*]]@PAGE
+; REGULAR-NEXT: ldr q[[REGNUM]], {{\[}}[[PAGEADDR]], [[CSTLABEL2]]@PAGEOFF]
+; Next BB
+; REGULAR-NEXT: [[LABELend]]:
+; REGULAR-NEXT: mul.16b [[DESTV:v[0-9]+]], v0, v[[REGNUM]]
+; REGULAR-NEXT: add.16b v0, v0, [[DESTV]]
+; REGULAR-NEXT: ret
+entry:
+  %add.i = add <16 x i8> %arg, <i8 -40, i8 -93, i8 -118, i8 -99, i8 -75, i8 -105, i8 74, i8 -110, i8 62, i8 -115, i8 -119, i8 -120, i8 34, i8 -124, i8 0, i8 -128>
+  %tobool = icmp eq i32 %path, 0
+  br i1 %tobool, label %if.else, label %if.then
+
+if.then:                                          ; preds = %entry
+  %mul.i13 = mul <16 x i8> %add.i, <i8 -40, i8 -93, i8 -118, i8 -99, i8 -75, i8 -105, i8 74, i8 -110, i8 62, i8 -115, i8 -119, i8 -120, i8 34, i8 -124, i8 0, i8 -128>
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  %mul.i = mul <16 x i8> %add.i, <i8 -24, i8 99, i8 -121, i8 97, i8 66, i8 95, i8 24, i8 93, i8 6, i8 91, i8 12, i8 89, i8 39, i8 87, i8 86, i8 85>
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  %ret2.0 = phi <16 x i8> [ %mul.i13, %if.then ], [ %mul.i, %if.else ]
+  %add.i12 = add <16 x i8> %add.i, %ret2.0
+  ret <16 x i8> %add.i12
+}
+
+; Two different uses of the sane constant in two different basic blocks,
+; none dominates the other
+define <16 x i8> @test4(<16 x i8> %arg, i32 %path) {
+; PROMOTED-LABEL: test4:
+; In stress mode, constant vector are promoted
+; Since, the constant is the same as the previous function,
+; the same address must be used
+; PROMOTED: adrp [[PAGEADDR:x[0-9]+]], [[CSTV1]]@PAGE
+; PROMOTED: add [[BASEADDR:x[0-9]+]], [[PAGEADDR]], [[CSTV1]]@PAGEOFF
+; PROMOTED-NEXT: ldr q[[REGNUM:[0-9]+]], {{\[}}[[BASEADDR]]]
+; Destination register is defined by ABI
+; PROMOTED-NEXT: add.16b v0, v0, v[[REGNUM]]
+; PROMOTED-NEXT: cbz w0, [[LABEL:LBB.*]]
+; Next BB
+; PROMOTED: mul.16b v0, v0, v[[REGNUM]]
+; Next BB
+; PROMOTED-NEXT: [[LABEL]]:
+; PROMOTED-NEXT: ret
+
+
+; REGULAR-LABEL: test4:
+; REGULAR: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL3:lCP.*]]@PAGE
+; REGULAR-NEXT: ldr q[[REGNUM:[0-9]+]], {{\[}}[[PAGEADDR]], [[CSTLABEL3]]@PAGEOFF]
+; Destination register is defined by ABI
+; REGULAR-NEXT: add.16b v0, v0, v[[REGNUM]]
+; REGULAR-NEXT: cbz w0, [[LABEL:LBB.*]]
+; Next BB
+; Redundant expression
+; REGULAR: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL3]]@PAGE
+; REGULAR-NEXT: ldr q[[REGNUM:[0-9]+]], {{\[}}[[PAGEADDR]], [[CSTLABEL3]]@PAGEOFF]
+; Destination register is defined by ABI
+; REGULAR-NEXT: mul.16b v0, v0, v[[REGNUM]]
+; Next BB
+; REGULAR-NEXT: [[LABEL]]:
+; REGULAR-NEXT: ret
+entry:
+  %add.i = add <16 x i8> %arg, <i8 -40, i8 -93, i8 -118, i8 -99, i8 -75, i8 -105, i8 74, i8 -110, i8 62, i8 -115, i8 -119, i8 -120, i8 34, i8 -124, i8 0, i8 -128>
+  %tobool = icmp eq i32 %path, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  %mul.i = mul <16 x i8> %add.i, <i8 -40, i8 -93, i8 -118, i8 -99, i8 -75, i8 -105, i8 74, i8 -110, i8 62, i8 -115, i8 -119, i8 -120, i8 34, i8 -124, i8 0, i8 -128>
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+  %ret.0 = phi <16 x i8> [ %mul.i, %if.then ], [ %add.i, %entry ]
+  ret <16 x i8> %ret.0
+}
+
+; Two different uses of the sane constant in two different basic blocks,
+; one is in a phi.
+define <16 x i8> @test5(<16 x i8> %arg, i32 %path) {
+; PROMOTED-LABEL: test5:
+; In stress mode, constant vector are promoted
+; Since, the constant is the same as the previous function,
+; the same address must be used
+; PROMOTED: adrp [[PAGEADDR:x[0-9]+]], [[CSTV1]]@PAGE
+; PROMOTED: add [[BASEADDR:x[0-9]+]], [[PAGEADDR]], [[CSTV1]]@PAGEOFF
+; PROMOTED-NEXT: ldr q[[REGNUM:[0-9]+]], {{\[}}[[BASEADDR]]]
+; PROMOTED-NEXT: cbz w0, [[LABEL:LBB.*]]
+; Next BB
+; PROMOTED: add.16b [[DESTV:v[0-9]+]], v0, v[[REGNUM]]
+; PROMOTED-NEXT: mul.16b v[[REGNUM]], [[DESTV]], v[[REGNUM]]
+; Next BB
+; PROMOTED-NEXT: [[LABEL]]:
+; PROMOTED-NEXT: mul.16b [[TMP1:v[0-9]+]], v[[REGNUM]], v[[REGNUM]]
+; PROMOTED-NEXT: mul.16b [[TMP2:v[0-9]+]], [[TMP1]], [[TMP1]]
+; PROMOTED-NEXT: mul.16b [[TMP3:v[0-9]+]], [[TMP2]], [[TMP2]]
+; PROMOTED-NEXT: mul.16b v0, [[TMP3]], [[TMP3]]
+; PROMOTED-NEXT: ret
+
+; REGULAR-LABEL: test5:
+; REGULAR: cbz w0, [[LABELelse:LBB.*]]
+; Next BB
+; REGULAR: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL:lCP.*]]@PAGE
+; REGULAR-NEXT: ldr q[[REGNUM:[0-9]+]], {{\[}}[[PAGEADDR]], [[CSTLABEL]]@PAGEOFF]
+; REGULAR-NEXT: add.16b [[DESTV:v[0-9]+]], v0, v[[REGNUM]]
+; REGULAR-NEXT: mul.16b v[[DESTREGNUM:[0-9]+]], [[DESTV]], v[[REGNUM]]
+; REGULAR-NEXT: b [[LABELend:LBB.*]]
+; Next BB
+; REGULAR-NEXT: [[LABELelse]]
+; REGULAR-NEXT: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL:lCP.*]]@PAGE
+; REGULAR-NEXT: ldr q[[DESTREGNUM]], {{\[}}[[PAGEADDR]], [[CSTLABEL]]@PAGEOFF]
+; Next BB
+; REGULAR-NEXT: [[LABELend]]:
+; REGULAR-NEXT: mul.16b [[TMP1:v[0-9]+]], v[[DESTREGNUM]], v[[DESTREGNUM]]
+; REGULAR-NEXT: mul.16b [[TMP2:v[0-9]+]], [[TMP1]], [[TMP1]]
+; REGULAR-NEXT: mul.16b [[TMP3:v[0-9]+]], [[TMP2]], [[TMP2]]
+; REGULAR-NEXT: mul.16b v0, [[TMP3]], [[TMP3]]
+; REGULAR-NEXT: ret
+entry:
+  %tobool = icmp eq i32 %path, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  %add.i = add <16 x i8> %arg, <i8 -40, i8 -93, i8 -118, i8 -99, i8 -75, i8 -105, i8 74, i8 -110, i8 62, i8 -115, i8 -119, i8 -120, i8 34, i8 -124, i8 0, i8 -128>
+  %mul.i26 = mul <16 x i8> %add.i, <i8 -40, i8 -93, i8 -118, i8 -99, i8 -75, i8 -105, i8 74, i8 -110, i8 62, i8 -115, i8 -119, i8 -120, i8 34, i8 -124, i8 0, i8 -128>
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+  %ret.0 = phi <16 x i8> [ %mul.i26, %if.then ], [ <i8 -40, i8 -93, i8 -118, i8 -99, i8 -75, i8 -105, i8 74, i8 -110, i8 62, i8 -115, i8 -119, i8 -120, i8 34, i8 -124, i8 0, i8 -128>, %entry ]
+  %mul.i25 = mul <16 x i8> %ret.0, %ret.0
+  %mul.i24 = mul <16 x i8> %mul.i25, %mul.i25
+  %mul.i23 = mul <16 x i8> %mul.i24, %mul.i24
+  %mul.i = mul <16 x i8> %mul.i23, %mul.i23
+  ret <16 x i8> %mul.i
+}
+
+define void @accessBig(i64* %storage) {
+; PROMOTED-LABEL: accessBig:
+; PROMOTED: adrp
+; PROMOTED: ret
+  %addr = bitcast i64* %storage to <1 x i80>*
+  store <1 x i80> <i80 483673642326615442599424>, <1 x i80>* %addr
+  ret void
+}
+
+define void @asmStatement() {
+; PROMOTED-LABEL: asmStatement:
+; PROMOTED-NOT: adrp
+; PROMOTED: ret
+  call void asm sideeffect "bfxil w0, w0, $0, $1", "i,i"(i32 28, i32 4)
+  ret void
+}
+
diff --git a/test/CodeGen/AArch64/arm64-redzone.ll b/test/CodeGen/AArch64/arm64-redzone.ll
new file mode 100644
index 0000000..9b0c384
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-redzone.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=arm64 -aarch64-redzone | FileCheck %s
+
+define i32 @foo(i32 %a, i32 %b) nounwind ssp {
+; CHECK-LABEL: foo:
+; CHECK-NOT: sub sp, sp
+; CHECK: ret
+  %a.addr = alloca i32, align 4
+  %b.addr = alloca i32, align 4
+  %x = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 %b, i32* %b.addr, align 4
+  %tmp = load i32* %a.addr, align 4
+  %tmp1 = load i32* %b.addr, align 4
+  %add = add nsw i32 %tmp, %tmp1
+  store i32 %add, i32* %x, align 4
+  %tmp2 = load i32* %x, align 4
+  ret i32 %tmp2
+}
diff --git a/test/CodeGen/AArch64/arm64-reg-copy-noneon.ll b/test/CodeGen/AArch64/arm64-reg-copy-noneon.ll
new file mode 100644
index 0000000..29255ef
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-reg-copy-noneon.ll
@@ -0,0 +1,20 @@
+; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=-neon < %s | FileCheck %s
+
+define float @copy_FPR32(float %a, float %b) {
+;CHECK-LABEL: copy_FPR32:
+;CHECK: fmov s0, s1
+  ret float %b;
+}
+  
+define double @copy_FPR64(double %a, double %b) {
+;CHECK-LABEL: copy_FPR64:
+;CHECK: fmov d0, d1
+  ret double %b;
+}
+  
+define fp128 @copy_FPR128(fp128 %a, fp128 %b) {
+;CHECK-LABEL: copy_FPR128:
+;CHECK: str	q1, [sp, #-16]!
+;CHECK-NEXT: ldr	q0, [sp, #16]!
+  ret fp128 %b;
+}
diff --git a/test/CodeGen/AArch64/arm64-register-offset-addressing.ll b/test/CodeGen/AArch64/arm64-register-offset-addressing.ll
new file mode 100644
index 0000000..045712b
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-register-offset-addressing.ll
@@ -0,0 +1,145 @@
+; RUN: llc < %s -mtriple=arm64-apple-darwin | FileCheck %s
+
+define i8 @test_64bit_add(i16* %a, i64 %b) {
+; CHECK-LABEL: test_64bit_add:
+; CHECK: lsl [[REG:x[0-9]+]], x1, #1
+; CHECK: ldrb w0, [x0, [[REG]]]
+; CHECK: ret
+  %tmp1 = getelementptr inbounds i16* %a, i64 %b
+  %tmp2 = load i16* %tmp1
+  %tmp3 = trunc i16 %tmp2 to i8
+  ret i8 %tmp3
+}
+
+; These tests are trying to form SEXT and ZEXT operations that never leave i64
+; space, to make sure LLVM can adapt the offset register correctly.
+define void @ldst_8bit(i8* %base, i64 %offset) minsize {
+; CHECK-LABEL: ldst_8bit:
+
+   %off32.sext.tmp = shl i64 %offset, 32
+   %off32.sext = ashr i64 %off32.sext.tmp, 32
+   %addr8_sxtw = getelementptr i8* %base, i64 %off32.sext
+   %val8_sxtw = load volatile i8* %addr8_sxtw
+   %val32_signed = sext i8 %val8_sxtw to i32
+   store volatile i32 %val32_signed, i32* @var_32bit
+; CHECK: ldrsb {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
+
+  %addrint_uxtw = ptrtoint i8* %base to i64
+  %offset_uxtw = and i64 %offset, 4294967295
+  %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
+  %addr_uxtw = inttoptr i64 %addrint1_uxtw to i8*
+  %val8_uxtw = load volatile i8* %addr_uxtw
+  %newval8 = add i8 %val8_uxtw, 1
+  store volatile i8 %newval8, i8* @var_8bit
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
+
+   ret void
+}
+
+
+define void @ldst_16bit(i16* %base, i64 %offset) minsize {
+; CHECK-LABEL: ldst_16bit:
+
+  %addrint_uxtw = ptrtoint i16* %base to i64
+  %offset_uxtw = and i64 %offset, 4294967295
+  %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
+  %addr_uxtw = inttoptr i64 %addrint1_uxtw to i16*
+  %val8_uxtw = load volatile i16* %addr_uxtw
+  %newval8 = add i16 %val8_uxtw, 1
+  store volatile i16 %newval8, i16* @var_16bit
+; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
+
+  %base_sxtw = ptrtoint i16* %base to i64
+  %offset_sxtw.tmp = shl i64 %offset, 32
+  %offset_sxtw = ashr i64 %offset_sxtw.tmp, 32
+  %addrint_sxtw = add i64 %base_sxtw, %offset_sxtw
+  %addr_sxtw = inttoptr i64 %addrint_sxtw to i16*
+  %val16_sxtw = load volatile i16* %addr_sxtw
+  %val64_signed = sext i16 %val16_sxtw to i64
+  store volatile i64 %val64_signed, i64* @var_64bit
+; CHECK: ldrsh {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
+
+
+  %base_uxtwN = ptrtoint i16* %base to i64
+  %offset_uxtwN = and i64 %offset, 4294967295
+  %offset2_uxtwN = shl i64 %offset_uxtwN, 1
+  %addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN
+  %addr_uxtwN = inttoptr i64 %addrint_uxtwN to i16*
+  %val32 = load volatile i32* @var_32bit
+  %val16_trunc32 = trunc i32 %val32 to i16
+  store volatile i16 %val16_trunc32, i16* %addr_uxtwN
+; CHECK: strh {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #1]
+   ret void
+}
+
+define void @ldst_32bit(i32* %base, i64 %offset) minsize {
+; CHECK-LABEL: ldst_32bit:
+
+  %addrint_uxtw = ptrtoint i32* %base to i64
+  %offset_uxtw = and i64 %offset, 4294967295
+  %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
+  %addr_uxtw = inttoptr i64 %addrint1_uxtw to i32*
+  %val32_uxtw = load volatile i32* %addr_uxtw
+  %newval32 = add i32 %val32_uxtw, 1
+  store volatile i32 %newval32, i32* @var_32bit
+; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
+
+  %base_sxtw = ptrtoint i32* %base to i64
+  %offset_sxtw.tmp = shl i64 %offset, 32
+  %offset_sxtw = ashr i64 %offset_sxtw.tmp, 32
+  %addrint_sxtw = add i64 %base_sxtw, %offset_sxtw
+  %addr_sxtw = inttoptr i64 %addrint_sxtw to i32*
+  %val32_sxtw = load volatile i32* %addr_sxtw
+  %val64_signed = sext i32 %val32_sxtw to i64
+  store volatile i64 %val64_signed, i64* @var_64bit
+; CHECK: ldrsw {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
+
+
+  %base_uxtwN = ptrtoint i32* %base to i64
+  %offset_uxtwN = and i64 %offset, 4294967295
+  %offset2_uxtwN = shl i64 %offset_uxtwN, 2
+  %addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN
+  %addr_uxtwN = inttoptr i64 %addrint_uxtwN to i32*
+  %val32 = load volatile i32* @var_32bit
+  store volatile i32 %val32, i32* %addr_uxtwN
+; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #2]
+   ret void
+}
+
+define void @ldst_64bit(i64* %base, i64 %offset) minsize {
+; CHECK-LABEL: ldst_64bit:
+
+  %addrint_uxtw = ptrtoint i64* %base to i64
+  %offset_uxtw = and i64 %offset, 4294967295
+  %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
+  %addr_uxtw = inttoptr i64 %addrint1_uxtw to i64*
+  %val64_uxtw = load volatile i64* %addr_uxtw
+  %newval8 = add i64 %val64_uxtw, 1
+  store volatile i64 %newval8, i64* @var_64bit
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
+
+  %base_sxtw = ptrtoint i64* %base to i64
+  %offset_sxtw.tmp = shl i64 %offset, 32
+  %offset_sxtw = ashr i64 %offset_sxtw.tmp, 32
+  %addrint_sxtw = add i64 %base_sxtw, %offset_sxtw
+  %addr_sxtw = inttoptr i64 %addrint_sxtw to i64*
+  %val64_sxtw = load volatile i64* %addr_sxtw
+  store volatile i64 %val64_sxtw, i64* @var_64bit
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
+
+
+  %base_uxtwN = ptrtoint i64* %base to i64
+  %offset_uxtwN = and i64 %offset, 4294967295
+  %offset2_uxtwN = shl i64 %offset_uxtwN, 3
+  %addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN
+  %addr_uxtwN = inttoptr i64 %addrint_uxtwN to i64*
+  %val64 = load volatile i64* @var_64bit
+  store volatile i64 %val64, i64* %addr_uxtwN
+; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #3]
+   ret void
+}
+
+@var_8bit = global i8 0
+@var_16bit = global i16 0
+@var_32bit = global i32 0
+@var_64bit = global i64 0
diff --git a/test/CodeGen/AArch64/arm64-register-pairing.ll b/test/CodeGen/AArch64/arm64-register-pairing.ll
new file mode 100644
index 0000000..99defb1
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-register-pairing.ll
@@ -0,0 +1,53 @@
+; RUN: llc -mtriple=arm64-apple-ios < %s | FileCheck %s
+;
+; rdar://14075006
+
+define void @odd() nounwind {
+; CHECK-LABEL: odd:
+; CHECK: stp d15, d14, [sp, #-144]!
+; CHECK: stp d13, d12, [sp, #16]
+; CHECK: stp d11, d10, [sp, #32]
+; CHECK: stp d9, d8, [sp, #48]
+; CHECK: stp x28, x27, [sp, #64]
+; CHECK: stp x26, x25, [sp, #80]
+; CHECK: stp x24, x23, [sp, #96]
+; CHECK: stp x22, x21, [sp, #112]
+; CHECK: stp x20, x19, [sp, #128]
+; CHECK: movz x0, #0x2a
+; CHECK: ldp x20, x19, [sp, #128]
+; CHECK: ldp x22, x21, [sp, #112]
+; CHECK: ldp x24, x23, [sp, #96]
+; CHECK: ldp x26, x25, [sp, #80]
+; CHECK: ldp x28, x27, [sp, #64]
+; CHECK: ldp d9, d8, [sp, #48]
+; CHECK: ldp d11, d10, [sp, #32]
+; CHECK: ldp d13, d12, [sp, #16]
+; CHECK: ldp d15, d14, [sp], #144
+  call void asm sideeffect "mov x0, #42", "~{x0},~{x19},~{x21},~{x23},~{x25},~{x27},~{d8},~{d10},~{d12},~{d14}"() nounwind
+  ret void
+}
+
+define void @even() nounwind {
+; CHECK-LABEL: even:
+; CHECK: stp d15, d14, [sp, #-144]!
+; CHECK: stp d13, d12, [sp, #16]
+; CHECK: stp d11, d10, [sp, #32]
+; CHECK: stp d9, d8, [sp, #48]
+; CHECK: stp x28, x27, [sp, #64]
+; CHECK: stp x26, x25, [sp, #80]
+; CHECK: stp x24, x23, [sp, #96]
+; CHECK: stp x22, x21, [sp, #112]
+; CHECK: stp x20, x19, [sp, #128]
+; CHECK: movz x0, #0x2a
+; CHECK: ldp x20, x19, [sp, #128]
+; CHECK: ldp x22, x21, [sp, #112]
+; CHECK: ldp x24, x23, [sp, #96]
+; CHECK: ldp x26, x25, [sp, #80]
+; CHECK: ldp x28, x27, [sp, #64]
+; CHECK: ldp d9, d8, [sp, #48]
+; CHECK: ldp d11, d10, [sp, #32]
+; CHECK: ldp d13, d12, [sp, #16]
+; CHECK: ldp d15, d14, [sp], #144
+  call void asm sideeffect "mov x0, #42", "~{x0},~{x20},~{x22},~{x24},~{x26},~{x28},~{d9},~{d11},~{d13},~{d15}"() nounwind
+  ret void
+}
diff --git a/test/CodeGen/ARM64/regress-f128csel-flags.ll b/test/CodeGen/AArch64/arm64-regress-f128csel-flags.ll
index a1daf03..a1daf03 100644
--- a/test/CodeGen/ARM64/regress-f128csel-flags.ll
+++ b/test/CodeGen/AArch64/arm64-regress-f128csel-flags.ll
diff --git a/test/CodeGen/AArch64/arm64-regress-interphase-shift.ll b/test/CodeGen/AArch64/arm64-regress-interphase-shift.ll
new file mode 100644
index 0000000..fec8933
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-regress-interphase-shift.ll
@@ -0,0 +1,33 @@
+; RUN: llc -march=arm64 -o - %s | FileCheck %s
+
+; This is mostly a "don't assert" test. The type of the RHS of a shift depended
+; on the phase of legalization, which led to the creation of an unexpected and
+; unselectable "rotr" node: (i32 (rotr i32, i64)).
+
+; FIXME: This test is xfailed because it relies on an optimization that has
+; been reverted (see PR17975).
+; XFAIL: *
+
+define void @foo(i64* nocapture %d) {
+; CHECK-LABEL: foo:
+; CHECK: rorv
+  %tmp = load i64* undef, align 8
+  %sub397 = sub i64 0, %tmp
+  %and398 = and i64 %sub397, 4294967295
+  %shr404 = lshr i64 %and398, 0
+  %or405 = or i64 0, %shr404
+  %xor406 = xor i64 %or405, 0
+  %xor417 = xor i64 0, %xor406
+  %xor428 = xor i64 0, %xor417
+  %sub430 = sub i64 %xor417, 0
+  %and431 = and i64 %sub430, 4294967295
+  %and432 = and i64 %xor428, 31
+  %sub433 = sub i64 32, %and432
+  %shl434 = shl i64 %and431, %sub433
+  %shr437 = lshr i64 %and431, %and432
+  %or438 = or i64 %shl434, %shr437
+  %xor439 = xor i64 %or438, %xor428
+  %sub441 = sub i64 %xor439, 0
+  store i64 %sub441, i64* %d, align 8
+  ret void
+}
diff --git a/test/CodeGen/ARM64/return-vector.ll b/test/CodeGen/AArch64/arm64-return-vector.ll
index 9457d8b..9457d8b 100644
--- a/test/CodeGen/ARM64/return-vector.ll
+++ b/test/CodeGen/AArch64/arm64-return-vector.ll
diff --git a/test/CodeGen/AArch64/arm64-returnaddr.ll b/test/CodeGen/AArch64/arm64-returnaddr.ll
new file mode 100644
index 0000000..285b295
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-returnaddr.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=arm64 | FileCheck %s
+
+define i8* @rt0(i32 %x) nounwind readnone {
+entry:
+; CHECK-LABEL: rt0:
+; CHECK: mov x0, x30
+; CHECK: ret
+  %0 = tail call i8* @llvm.returnaddress(i32 0)
+  ret i8* %0
+}
+
+define i8* @rt2() nounwind readnone {
+entry:
+; CHECK-LABEL: rt2:
+; CHECK: stp x29, x30, [sp, #-16]!
+; CHECK: mov x29, sp
+; CHECK: ldr x[[REG:[0-9]+]], [x29]
+; CHECK: ldr x[[REG2:[0-9]+]], [x[[REG]]]
+; CHECK: ldr x0, [x[[REG2]], #8]
+; CHECK: ldp x29, x30, [sp], #16
+; CHECK: ret
+  %0 = tail call i8* @llvm.returnaddress(i32 2)
+  ret i8* %0
+}
+
+declare i8* @llvm.returnaddress(i32) nounwind readnone
diff --git a/test/CodeGen/AArch64/arm64-rev.ll b/test/CodeGen/AArch64/arm64-rev.ll
new file mode 100644
index 0000000..30d9f4f
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-rev.ll
@@ -0,0 +1,235 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+define i32 @test_rev_w(i32 %a) nounwind {
+entry:
+; CHECK-LABEL: test_rev_w:
+; CHECK: rev w0, w0
+  %0 = tail call i32 @llvm.bswap.i32(i32 %a)
+  ret i32 %0
+}
+
+define i64 @test_rev_x(i64 %a) nounwind {
+entry:
+; CHECK-LABEL: test_rev_x:
+; CHECK: rev x0, x0
+  %0 = tail call i64 @llvm.bswap.i64(i64 %a)
+  ret i64 %0
+}
+
+declare i32 @llvm.bswap.i32(i32) nounwind readnone
+declare i64 @llvm.bswap.i64(i64) nounwind readnone
+
+define i32 @test_rev16_w(i32 %X) nounwind {
+entry:
+; CHECK-LABEL: test_rev16_w:
+; CHECK: rev16 w0, w0
+  %tmp1 = lshr i32 %X, 8
+  %X15 = bitcast i32 %X to i32
+  %tmp4 = shl i32 %X15, 8
+  %tmp2 = and i32 %tmp1, 16711680
+  %tmp5 = and i32 %tmp4, -16777216
+  %tmp9 = and i32 %tmp1, 255
+  %tmp13 = and i32 %tmp4, 65280
+  %tmp6 = or i32 %tmp5, %tmp2
+  %tmp10 = or i32 %tmp6, %tmp13
+  %tmp14 = or i32 %tmp10, %tmp9
+  ret i32 %tmp14
+}
+
+; 64-bit REV16 is *not* a swap then a 16-bit rotation:
+;   01234567 ->(bswap) 76543210 ->(rotr) 10765432
+;   01234567 ->(rev16) 10325476
+define i64 @test_rev16_x(i64 %a) nounwind {
+entry:
+; CHECK-LABEL: test_rev16_x:
+; CHECK-NOT: rev16 x0, x0
+  %0 = tail call i64 @llvm.bswap.i64(i64 %a)
+  %1 = lshr i64 %0, 16
+  %2 = shl i64 %0, 48
+  %3 = or i64 %1, %2
+  ret i64 %3
+}
+
+define i64 @test_rev32_x(i64 %a) nounwind {
+entry:
+; CHECK-LABEL: test_rev32_x:
+; CHECK: rev32 x0, x0
+  %0 = tail call i64 @llvm.bswap.i64(i64 %a)
+  %1 = lshr i64 %0, 32
+  %2 = shl i64 %0, 32
+  %3 = or i64 %1, %2
+  ret i64 %3
+}
+
+define <8 x i8> @test_vrev64D8(<8 x i8>* %A) nounwind {
+;CHECK-LABEL: test_vrev64D8:
+;CHECK: rev64.8b
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @test_vrev64D16(<4 x i16>* %A) nounwind {
+;CHECK-LABEL: test_vrev64D16:
+;CHECK: rev64.4h
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @test_vrev64D32(<2 x i32>* %A) nounwind {
+;CHECK-LABEL: test_vrev64D32:
+;CHECK: rev64.2s
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
+	ret <2 x i32> %tmp2
+}
+
+define <2 x float> @test_vrev64Df(<2 x float>* %A) nounwind {
+;CHECK-LABEL: test_vrev64Df:
+;CHECK: rev64.2s
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> <i32 1, i32 0>
+	ret <2 x float> %tmp2
+}
+
+define <16 x i8> @test_vrev64Q8(<16 x i8>* %A) nounwind {
+;CHECK-LABEL: test_vrev64Q8:
+;CHECK: rev64.16b
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @test_vrev64Q16(<8 x i16>* %A) nounwind {
+;CHECK-LABEL: test_vrev64Q16:
+;CHECK: rev64.8h
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @test_vrev64Q32(<4 x i32>* %A) nounwind {
+;CHECK-LABEL: test_vrev64Q32:
+;CHECK: rev64.4s
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+	ret <4 x i32> %tmp2
+}
+
+define <4 x float> @test_vrev64Qf(<4 x float>* %A) nounwind {
+;CHECK-LABEL: test_vrev64Qf:
+;CHECK: rev64.4s
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+	ret <4 x float> %tmp2
+}
+
+define <8 x i8> @test_vrev32D8(<8 x i8>* %A) nounwind {
+;CHECK-LABEL: test_vrev32D8:
+;CHECK: rev32.8b
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @test_vrev32D16(<4 x i16>* %A) nounwind {
+;CHECK-LABEL: test_vrev32D16:
+;CHECK: rev32.4h
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+	ret <4 x i16> %tmp2
+}
+
+define <16 x i8> @test_vrev32Q8(<16 x i8>* %A) nounwind {
+;CHECK-LABEL: test_vrev32Q8:
+;CHECK: rev32.16b
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @test_vrev32Q16(<8 x i16>* %A) nounwind {
+;CHECK-LABEL: test_vrev32Q16:
+;CHECK: rev32.8h
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+	ret <8 x i16> %tmp2
+}
+
+define <8 x i8> @test_vrev16D8(<8 x i8>* %A) nounwind {
+;CHECK-LABEL: test_vrev16D8:
+;CHECK: rev16.8b
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+	ret <8 x i8> %tmp2
+}
+
+define <16 x i8> @test_vrev16Q8(<16 x i8>* %A) nounwind {
+;CHECK-LABEL: test_vrev16Q8:
+;CHECK: rev16.16b
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+	ret <16 x i8> %tmp2
+}
+
+; Undef shuffle indices should not prevent matching to VREV:
+
+define <8 x i8> @test_vrev64D8_undef(<8 x i8>* %A) nounwind {
+;CHECK-LABEL: test_vrev64D8_undef:
+;CHECK: rev64.8b
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 undef, i32 undef, i32 4, i32 3, i32 2, i32 1, i32 0>
+	ret <8 x i8> %tmp2
+}
+
+define <8 x i16> @test_vrev32Q16_undef(<8 x i16>* %A) nounwind {
+;CHECK-LABEL: test_vrev32Q16_undef:
+;CHECK: rev32.8h
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 undef, i32 2, i32 5, i32 4, i32 7, i32 undef>
+	ret <8 x i16> %tmp2
+}
+
+; vrev <4 x i16> should use REV32 and not REV64
+define void @test_vrev64(<4 x i16>* nocapture %source, <2 x i16>* nocapture %dst) nounwind ssp {
+; CHECK-LABEL: test_vrev64:
+; CHECK: ldr [[DEST:q[0-9]+]],
+; CHECK: st1.h
+; CHECK: st1.h
+entry:
+  %0 = bitcast <4 x i16>* %source to <8 x i16>*
+  %tmp2 = load <8 x i16>* %0, align 4
+  %tmp3 = extractelement <8 x i16> %tmp2, i32 6
+  %tmp5 = insertelement <2 x i16> undef, i16 %tmp3, i32 0
+  %tmp9 = extractelement <8 x i16> %tmp2, i32 5
+  %tmp11 = insertelement <2 x i16> %tmp5, i16 %tmp9, i32 1
+  store <2 x i16> %tmp11, <2 x i16>* %dst, align 4
+  ret void
+}
+
+; Test vrev of float4
+define void @float_vrev64(float* nocapture %source, <4 x float>* nocapture %dest) nounwind noinline ssp {
+; CHECK: float_vrev64
+; CHECK: ldr [[DEST:q[0-9]+]],
+; CHECK: rev64.4s
+entry:
+  %0 = bitcast float* %source to <4 x float>*
+  %tmp2 = load <4 x float>* %0, align 4
+  %tmp5 = shufflevector <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <4 x float> %tmp2, <4 x i32> <i32 0, i32 7, i32 0, i32 0>
+  %arrayidx8 = getelementptr inbounds <4 x float>* %dest, i32 11
+  store <4 x float> %tmp5, <4 x float>* %arrayidx8, align 4
+  ret void
+}
+
+
+define <4 x i32> @test_vrev32_bswap(<4 x i32> %source) nounwind {
+; CHECK-LABEL: test_vrev32_bswap:
+; CHECK: rev32.16b
+; CHECK-NOT: rev
+; CHECK: ret
+  %bswap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %source)
+  ret <4 x i32> %bswap
+}
+
+declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) nounwind readnone
diff --git a/test/CodeGen/AArch64/arm64-rounding.ll b/test/CodeGen/AArch64/arm64-rounding.ll
new file mode 100644
index 0000000..9311144
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-rounding.ll
@@ -0,0 +1,208 @@
+; RUN: llc -O3 < %s -mcpu=cyclone | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64"
+target triple = "arm64-apple-ios6.0.0"
+
+; CHECK: test1
+; CHECK: frintx
+; CHECK: frintm
+define float @test1(float %a) #0 {
+entry:
+  %call = tail call float @floorf(float %a) nounwind readnone
+  ret float %call
+}
+
+declare float @floorf(float) nounwind readnone
+
+; CHECK: test2
+; CHECK: frintx
+; CHECK: frintm
+define double @test2(double %a) #0 {
+entry:
+  %call = tail call double @floor(double %a) nounwind readnone
+  ret double %call
+}
+
+declare double @floor(double) nounwind readnone
+
+; CHECK: test3
+; CHECK: frinti
+define float @test3(float %a) #0 {
+entry:
+  %call = tail call float @nearbyintf(float %a) nounwind readnone
+  ret float %call
+}
+
+declare float @nearbyintf(float) nounwind readnone
+
+; CHECK: test4
+; CHECK: frinti
+define double @test4(double %a) #0 {
+entry:
+  %call = tail call double @nearbyint(double %a) nounwind readnone
+  ret double %call
+}
+
+declare double @nearbyint(double) nounwind readnone
+
+; CHECK: test5
+; CHECK: frintx
+; CHECK: frintp
+define float @test5(float %a) #0 {
+entry:
+  %call = tail call float @ceilf(float %a) nounwind readnone
+  ret float %call
+}
+
+declare float @ceilf(float) nounwind readnone
+
+; CHECK: test6
+; CHECK: frintx
+; CHECK: frintp
+define double @test6(double %a) #0 {
+entry:
+  %call = tail call double @ceil(double %a) nounwind readnone
+  ret double %call
+}
+
+declare double @ceil(double) nounwind readnone
+
+; CHECK: test7
+; CHECK: frintx
+define float @test7(float %a) #0 {
+entry:
+  %call = tail call float @rintf(float %a) nounwind readnone
+  ret float %call
+}
+
+declare float @rintf(float) nounwind readnone
+
+; CHECK: test8
+; CHECK: frintx
+define double @test8(double %a) #0 {
+entry:
+  %call = tail call double @rint(double %a) nounwind readnone
+  ret double %call
+}
+
+declare double @rint(double) nounwind readnone
+
+; CHECK: test9
+; CHECK: frintx
+; CHECK: frintz
+define float @test9(float %a) #0 {
+entry:
+  %call = tail call float @truncf(float %a) nounwind readnone
+  ret float %call
+}
+
+declare float @truncf(float) nounwind readnone
+
+; CHECK: test10
+; CHECK: frintx
+; CHECK: frintz
+define double @test10(double %a) #0 {
+entry:
+  %call = tail call double @trunc(double %a) nounwind readnone
+  ret double %call
+}
+
+declare double @trunc(double) nounwind readnone
+
+; CHECK: test11
+; CHECK: frintx
+; CHECK: frinta
+define float @test11(float %a) #0 {
+entry:
+  %call = tail call float @roundf(float %a) nounwind readnone
+  ret float %call
+}
+
+declare float @roundf(float %a) nounwind readnone
+
+; CHECK: test12
+; CHECK: frintx
+; CHECK: frinta
+define double @test12(double %a) #0 {
+entry:
+  %call = tail call double @round(double %a) nounwind readnone
+  ret double %call
+}
+
+declare double @round(double %a) nounwind readnone
+
+; CHECK: test13
+; CHECK-NOT: frintx
+; CHECK: frintm
+define float @test13(float %a) #1 {
+entry:
+  %call = tail call float @floorf(float %a) nounwind readnone
+  ret float %call
+}
+
+; CHECK: test14
+; CHECK-NOT: frintx
+; CHECK: frintm
+define double @test14(double %a) #1 {
+entry:
+  %call = tail call double @floor(double %a) nounwind readnone
+  ret double %call
+}
+
+; CHECK: test15
+; CHECK-NOT: frintx
+; CHECK: frintp
+define float @test15(float %a) #1 {
+entry:
+  %call = tail call float @ceilf(float %a) nounwind readnone
+  ret float %call
+}
+
+; CHECK: test16
+; CHECK-NOT: frintx
+; CHECK: frintp
+define double @test16(double %a) #1 {
+entry:
+  %call = tail call double @ceil(double %a) nounwind readnone
+  ret double %call
+}
+
+; CHECK: test17
+; CHECK-NOT: frintx
+; CHECK: frintz
+define float @test17(float %a) #1 {
+entry:
+  %call = tail call float @truncf(float %a) nounwind readnone
+  ret float %call
+}
+
+; CHECK: test18
+; CHECK-NOT: frintx
+; CHECK: frintz
+define double @test18(double %a) #1 {
+entry:
+  %call = tail call double @trunc(double %a) nounwind readnone
+  ret double %call
+}
+
+; CHECK: test19
+; CHECK-NOT: frintx
+; CHECK: frinta
+define float @test19(float %a) #1 {
+entry:
+  %call = tail call float @roundf(float %a) nounwind readnone
+  ret float %call
+}
+
+; CHECK: test20
+; CHECK-NOT: frintx
+; CHECK: frinta
+define double @test20(double %a) #1 {
+entry:
+  %call = tail call double @round(double %a) nounwind readnone
+  ret double %call
+}
+
+
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind "unsafe-fp-math"="true" }
diff --git a/test/CodeGen/ARM64/scaled_iv.ll b/test/CodeGen/AArch64/arm64-scaled_iv.ll
index 987373e..987373e 100644
--- a/test/CodeGen/ARM64/scaled_iv.ll
+++ b/test/CodeGen/AArch64/arm64-scaled_iv.ll
diff --git a/test/CodeGen/AArch64/arm64-scvt.ll b/test/CodeGen/AArch64/arm64-scvt.ll
new file mode 100644
index 0000000..2e006cf
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-scvt.ll
@@ -0,0 +1,830 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; rdar://13082402
+
+define float @t1(i32* nocapture %src) nounwind ssp {
+entry:
+; CHECK-LABEL: t1:
+; CHECK: ldr s0, [x0]
+; CHECK: scvtf s0, s0
+  %tmp1 = load i32* %src, align 4
+  %tmp2 = sitofp i32 %tmp1 to float
+  ret float %tmp2
+}
+
+define float @t2(i32* nocapture %src) nounwind ssp {
+entry:
+; CHECK-LABEL: t2:
+; CHECK: ldr s0, [x0]
+; CHECK: ucvtf s0, s0
+  %tmp1 = load i32* %src, align 4
+  %tmp2 = uitofp i32 %tmp1 to float
+  ret float %tmp2
+}
+
+define double @t3(i64* nocapture %src) nounwind ssp {
+entry:
+; CHECK-LABEL: t3:
+; CHECK: ldr d0, [x0]
+; CHECK: scvtf d0, d0
+  %tmp1 = load i64* %src, align 4
+  %tmp2 = sitofp i64 %tmp1 to double
+  ret double %tmp2
+}
+
+define double @t4(i64* nocapture %src) nounwind ssp {
+entry:
+; CHECK-LABEL: t4:
+; CHECK: ldr d0, [x0]
+; CHECK: ucvtf d0, d0
+  %tmp1 = load i64* %src, align 4
+  %tmp2 = uitofp i64 %tmp1 to double
+  ret double %tmp2
+}
+
+; rdar://13136456
+define double @t5(i32* nocapture %src) nounwind ssp optsize {
+entry:
+; CHECK-LABEL: t5:
+; CHECK: ldr [[REG:w[0-9]+]], [x0]
+; CHECK: scvtf d0, [[REG]]
+  %tmp1 = load i32* %src, align 4
+  %tmp2 = sitofp i32 %tmp1 to double
+  ret double %tmp2
+}
+
+; Check that we load in FP register when we want to convert into
+; floating point value.
+; This is much faster than loading on GPR and making the conversion
+; GPR -> FPR.
+; <rdar://problem/14599607>
+;
+; Check the flollowing patterns for signed/unsigned:
+; 1. load with scaled imm to float.
+; 2. load with scaled register to float.
+; 3. load with scaled imm to double.
+; 4. load with scaled register to double.
+; 5. load with unscaled imm to float.
+; 6. load with unscaled imm to double.
+; With loading size: 8, 16, 32, and 64-bits.
+
+; ********* 1. load with scaled imm to float. *********
+define float @fct1(i8* nocapture %sp0) {
+; CHECK-LABEL: fct1:
+; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
+; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
+; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
+entry:
+  %addr = getelementptr i8* %sp0, i64 1
+  %pix_sp0.0.copyload = load i8* %addr, align 1
+  %val = uitofp i8 %pix_sp0.0.copyload to float
+  %vmull.i = fmul float %val, %val
+  ret float %vmull.i
+}
+
+define float @fct2(i16* nocapture %sp0) {
+; CHECK-LABEL: fct2:
+; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
+; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
+; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
+entry:
+  %addr = getelementptr i16* %sp0, i64 1
+  %pix_sp0.0.copyload = load i16* %addr, align 1
+  %val = uitofp i16 %pix_sp0.0.copyload to float
+  %vmull.i = fmul float %val, %val
+  ret float %vmull.i
+}
+
+define float @fct3(i32* nocapture %sp0) {
+; CHECK-LABEL: fct3:
+; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
+; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
+; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
+entry:
+  %addr = getelementptr i32* %sp0, i64 1
+  %pix_sp0.0.copyload = load i32* %addr, align 1
+  %val = uitofp i32 %pix_sp0.0.copyload to float
+  %vmull.i = fmul float %val, %val
+  ret float %vmull.i
+}
+
+; i64 -> f32 is not supported on floating point unit.
+define float @fct4(i64* nocapture %sp0) {
+; CHECK-LABEL: fct4:
+; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, #8]
+; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], x[[REGNUM]]
+; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
+entry:
+  %addr = getelementptr i64* %sp0, i64 1
+  %pix_sp0.0.copyload = load i64* %addr, align 1
+  %val = uitofp i64 %pix_sp0.0.copyload to float
+  %vmull.i = fmul float %val, %val
+  ret float %vmull.i
+}
+
+; ********* 2. load with scaled register to float. *********
+define float @fct5(i8* nocapture %sp0, i64 %offset) {
+; CHECK-LABEL: fct5:
+; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
+; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
+; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
+entry:
+  %addr = getelementptr i8* %sp0, i64 %offset
+  %pix_sp0.0.copyload = load i8* %addr, align 1
+  %val = uitofp i8 %pix_sp0.0.copyload to float
+  %vmull.i = fmul float %val, %val
+  ret float %vmull.i
+}
+
+define float @fct6(i16* nocapture %sp0, i64 %offset) {
+; CHECK-LABEL: fct6:
+; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
+; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
+; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
+entry:
+  %addr = getelementptr i16* %sp0, i64 %offset
+  %pix_sp0.0.copyload = load i16* %addr, align 1
+  %val = uitofp i16 %pix_sp0.0.copyload to float
+  %vmull.i = fmul float %val, %val
+  ret float %vmull.i
+}
+
+define float @fct7(i32* nocapture %sp0, i64 %offset) {
+; CHECK-LABEL: fct7:
+; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
+; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
+; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
+entry:
+  %addr = getelementptr i32* %sp0, i64 %offset
+  %pix_sp0.0.copyload = load i32* %addr, align 1
+  %val = uitofp i32 %pix_sp0.0.copyload to float
+  %vmull.i = fmul float %val, %val
+  ret float %vmull.i
+}
+
+; i64 -> f32 is not supported on floating point unit.
+define float @fct8(i64* nocapture %sp0, i64 %offset) {
+; CHECK-LABEL: fct8:
+; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, x1, lsl #3]
+; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], x[[REGNUM]]
+; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
+entry:
+  %addr = getelementptr i64* %sp0, i64 %offset
+  %pix_sp0.0.copyload = load i64* %addr, align 1
+  %val = uitofp i64 %pix_sp0.0.copyload to float
+  %vmull.i = fmul float %val, %val
+  ret float %vmull.i
+}
+
+
+; ********* 3. load with scaled imm to double. *********
+define double @fct9(i8* nocapture %sp0) {
+; CHECK-LABEL: fct9:
+; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
+; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
+; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
+entry:
+  %addr = getelementptr i8* %sp0, i64 1
+  %pix_sp0.0.copyload = load i8* %addr, align 1
+  %val = uitofp i8 %pix_sp0.0.copyload to double
+  %vmull.i = fmul double %val, %val
+  ret double %vmull.i
+}
+
+define double @fct10(i16* nocapture %sp0) {
+; CHECK-LABEL: fct10:
+; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
+; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
+; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
+entry:
+  %addr = getelementptr i16* %sp0, i64 1
+  %pix_sp0.0.copyload = load i16* %addr, align 1
+  %val = uitofp i16 %pix_sp0.0.copyload to double
+  %vmull.i = fmul double %val, %val
+  ret double %vmull.i
+}
+
+define double @fct11(i32* nocapture %sp0) {
+; CHECK-LABEL: fct11:
+; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
+; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
+; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
+entry:
+  %addr = getelementptr i32* %sp0, i64 1
+  %pix_sp0.0.copyload = load i32* %addr, align 1
+  %val = uitofp i32 %pix_sp0.0.copyload to double
+  %vmull.i = fmul double %val, %val
+  ret double %vmull.i
+}
+
+define double @fct12(i64* nocapture %sp0) {
+; CHECK-LABEL: fct12:
+; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, #8]
+; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
+; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
+entry:
+  %addr = getelementptr i64* %sp0, i64 1
+  %pix_sp0.0.copyload = load i64* %addr, align 1
+  %val = uitofp i64 %pix_sp0.0.copyload to double
+  %vmull.i = fmul double %val, %val
+  ret double %vmull.i
+}
+
+; ********* 4. load with scaled register to double. *********
+define double @fct13(i8* nocapture %sp0, i64 %offset) {
+; CHECK-LABEL: fct13:
+; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
+; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
+; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
+entry:
+  %addr = getelementptr i8* %sp0, i64 %offset
+  %pix_sp0.0.copyload = load i8* %addr, align 1
+  %val = uitofp i8 %pix_sp0.0.copyload to double
+  %vmull.i = fmul double %val, %val
+  ret double %vmull.i
+}
+
+define double @fct14(i16* nocapture %sp0, i64 %offset) {
+; CHECK-LABEL: fct14:
+; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
+; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
+; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
+entry:
+  %addr = getelementptr i16* %sp0, i64 %offset
+  %pix_sp0.0.copyload = load i16* %addr, align 1
+  %val = uitofp i16 %pix_sp0.0.copyload to double
+  %vmull.i = fmul double %val, %val
+  ret double %vmull.i
+}
+
+define double @fct15(i32* nocapture %sp0, i64 %offset) {
+; CHECK-LABEL: fct15:
+; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
+; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
+; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
+entry:
+  %addr = getelementptr i32* %sp0, i64 %offset
+  %pix_sp0.0.copyload = load i32* %addr, align 1
+  %val = uitofp i32 %pix_sp0.0.copyload to double
+  %vmull.i = fmul double %val, %val
+  ret double %vmull.i
+}
+
+define double @fct16(i64* nocapture %sp0, i64 %offset) {
+; CHECK-LABEL: fct16:
+; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, x1, lsl #3]
+; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
+; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
+entry:
+  %addr = getelementptr i64* %sp0, i64 %offset
+  %pix_sp0.0.copyload = load i64* %addr, align 1
+  %val = uitofp i64 %pix_sp0.0.copyload to double
+  %vmull.i = fmul double %val, %val
+  ret double %vmull.i
+}
+
+; ********* 5. load with unscaled imm to float. *********
+define float @fct17(i8* nocapture %sp0) {
+entry:
+; CHECK-LABEL: fct17:
+; CHECK: ldur b[[REGNUM:[0-9]+]], [x0, #-1]
+; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
+; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
+  %bitcast = ptrtoint i8* %sp0 to i64
+  %add = add i64 %bitcast, -1
+  %addr = inttoptr i64 %add to i8*
+  %pix_sp0.0.copyload = load i8* %addr, align 1
+  %val = uitofp i8 %pix_sp0.0.copyload to float
+  %vmull.i = fmul float %val, %val
+  ret float %vmull.i
+}
+
+define float @fct18(i16* nocapture %sp0) {
+; CHECK-LABEL: fct18:
+; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1]
+; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
+; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
+  %bitcast = ptrtoint i16* %sp0 to i64
+  %add = add i64 %bitcast, 1
+  %addr = inttoptr i64 %add to i16*
+  %pix_sp0.0.copyload = load i16* %addr, align 1
+  %val = uitofp i16 %pix_sp0.0.copyload to float
+  %vmull.i = fmul float %val, %val
+  ret float %vmull.i
+}
+
+define float @fct19(i32* nocapture %sp0) {
+; CHECK-LABEL: fct19:
+; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1]
+; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
+; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
+  %bitcast = ptrtoint i32* %sp0 to i64
+  %add = add i64 %bitcast, 1
+  %addr = inttoptr i64 %add to i32*
+  %pix_sp0.0.copyload = load i32* %addr, align 1
+  %val = uitofp i32 %pix_sp0.0.copyload to float
+  %vmull.i = fmul float %val, %val
+  ret float %vmull.i
+}
+
+; i64 -> f32 is not supported on floating point unit.
+define float @fct20(i64* nocapture %sp0) {
+; CHECK-LABEL: fct20:
+; CHECK: ldur x[[REGNUM:[0-9]+]], [x0, #1]
+; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], x[[REGNUM]]
+; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
+  %bitcast = ptrtoint i64* %sp0 to i64
+  %add = add i64 %bitcast, 1
+  %addr = inttoptr i64 %add to i64*
+  %pix_sp0.0.copyload = load i64* %addr, align 1
+  %val = uitofp i64 %pix_sp0.0.copyload to float
+  %vmull.i = fmul float %val, %val
+  ret float %vmull.i
+
+}
+
+; ********* 6. load with unscaled imm to double. *********
+define double @fct21(i8* nocapture %sp0) {
+entry:
+; CHECK-LABEL: fct21:
+; CHECK: ldur b[[REGNUM:[0-9]+]], [x0, #-1]
+; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
+; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
+  %bitcast = ptrtoint i8* %sp0 to i64
+  %add = add i64 %bitcast, -1
+  %addr = inttoptr i64 %add to i8*
+  %pix_sp0.0.copyload = load i8* %addr, align 1
+  %val = uitofp i8 %pix_sp0.0.copyload to double
+  %vmull.i = fmul double %val, %val
+  ret double %vmull.i
+}
+
+define double @fct22(i16* nocapture %sp0) {
+; CHECK-LABEL: fct22:
+; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1]
+; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
+; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
+  %bitcast = ptrtoint i16* %sp0 to i64
+  %add = add i64 %bitcast, 1
+  %addr = inttoptr i64 %add to i16*
+  %pix_sp0.0.copyload = load i16* %addr, align 1
+  %val = uitofp i16 %pix_sp0.0.copyload to double
+  %vmull.i = fmul double %val, %val
+  ret double %vmull.i
+}
+
+define double @fct23(i32* nocapture %sp0) {
+; CHECK-LABEL: fct23:
+; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1]
+; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
+; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
+  %bitcast = ptrtoint i32* %sp0 to i64
+  %add = add i64 %bitcast, 1
+  %addr = inttoptr i64 %add to i32*
+  %pix_sp0.0.copyload = load i32* %addr, align 1
+  %val = uitofp i32 %pix_sp0.0.copyload to double
+  %vmull.i = fmul double %val, %val
+  ret double %vmull.i
+}
+
+define double @fct24(i64* nocapture %sp0) {
+; CHECK-LABEL: fct24:
+; CHECK: ldur d[[REGNUM:[0-9]+]], [x0, #1]
+; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
+; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
+  %bitcast = ptrtoint i64* %sp0 to i64
+  %add = add i64 %bitcast, 1
+  %addr = inttoptr i64 %add to i64*
+  %pix_sp0.0.copyload = load i64* %addr, align 1
+  %val = uitofp i64 %pix_sp0.0.copyload to double
+  %vmull.i = fmul double %val, %val
+  ret double %vmull.i
+
+}
+
+; ********* 1s. load with scaled imm to float. *********
+define float @sfct1(i8* nocapture %sp0) {
+; CHECK-LABEL: sfct1:
+; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
+; CHECK-NEXT: sshll.8h [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
+; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
+; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
+; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
+entry:
+  %addr = getelementptr i8* %sp0, i64 1
+  %pix_sp0.0.copyload = load i8* %addr, align 1
+  %val = sitofp i8 %pix_sp0.0.copyload to float
+  %vmull.i = fmul float %val, %val
+  ret float %vmull.i
+}
+
+define float @sfct2(i16* nocapture %sp0) {
+; CHECK-LABEL: sfct2:
+; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
+; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
+; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
+; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
+entry:
+  %addr = getelementptr i16* %sp0, i64 1
+  %pix_sp0.0.copyload = load i16* %addr, align 1
+  %val = sitofp i16 %pix_sp0.0.copyload to float
+  %vmull.i = fmul float %val, %val
+  ret float %vmull.i
+}
+
+define float @sfct3(i32* nocapture %sp0) {
+; CHECK-LABEL: sfct3:
+; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
+; CHECK-NEXT: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
+; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
+entry:
+  %addr = getelementptr i32* %sp0, i64 1
+  %pix_sp0.0.copyload = load i32* %addr, align 1
+  %val = sitofp i32 %pix_sp0.0.copyload to float
+  %vmull.i = fmul float %val, %val
+  ret float %vmull.i
+}
+
+; i64 -> f32 is not supported on floating point unit.
+define float @sfct4(i64* nocapture %sp0) {
+; CHECK-LABEL: sfct4:
+; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, #8]
+; CHECK-NEXT: scvtf [[REG:s[0-9]+]], x[[REGNUM]]
+; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
+entry:
+  %addr = getelementptr i64* %sp0, i64 1
+  %pix_sp0.0.copyload = load i64* %addr, align 1
+  %val = sitofp i64 %pix_sp0.0.copyload to float
+  %vmull.i = fmul float %val, %val
+  ret float %vmull.i
+}
+
+; ********* 2s. load with scaled register to float. *********
+define float @sfct5(i8* nocapture %sp0, i64 %offset) {
+; CHECK-LABEL: sfct5:
+; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
+; CHECK-NEXT: sshll.8h [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
+; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
+; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
+; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
+entry:
+  %addr = getelementptr i8* %sp0, i64 %offset
+  %pix_sp0.0.copyload = load i8* %addr, align 1
+  %val = sitofp i8 %pix_sp0.0.copyload to float
+  %vmull.i = fmul float %val, %val
+  ret float %vmull.i
+}
+
+define float @sfct6(i16* nocapture %sp0, i64 %offset) {
+; CHECK-LABEL: sfct6:
+; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
+; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
+; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
+; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
+entry:
+  %addr = getelementptr i16* %sp0, i64 %offset
+  %pix_sp0.0.copyload = load i16* %addr, align 1
+  %val = sitofp i16 %pix_sp0.0.copyload to float
+  %vmull.i = fmul float %val, %val
+  ret float %vmull.i
+}
+
+define float @sfct7(i32* nocapture %sp0, i64 %offset) {
+; CHECK-LABEL: sfct7:
+; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
+; CHECK-NEXT: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
+; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
+entry:
+  %addr = getelementptr i32* %sp0, i64 %offset
+  %pix_sp0.0.copyload = load i32* %addr, align 1
+  %val = sitofp i32 %pix_sp0.0.copyload to float
+  %vmull.i = fmul float %val, %val
+  ret float %vmull.i
+}
+
+; i64 -> f32 is not supported on floating point unit.
+define float @sfct8(i64* nocapture %sp0, i64 %offset) {
+; CHECK-LABEL: sfct8:
+; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, x1, lsl #3]
+; CHECK-NEXT: scvtf [[REG:s[0-9]+]], x[[REGNUM]]
+; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
+entry:
+  %addr = getelementptr i64* %sp0, i64 %offset
+  %pix_sp0.0.copyload = load i64* %addr, align 1
+  %val = sitofp i64 %pix_sp0.0.copyload to float
+  %vmull.i = fmul float %val, %val
+  ret float %vmull.i
+}
+
+; ********* 3s. load with scaled imm to double. *********
+define double @sfct9(i8* nocapture %sp0) {
+; CHECK-LABEL: sfct9:
+; CHECK: ldrsb w[[REGNUM:[0-9]+]], [x0, #1]
+; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
+; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
+entry:
+  %addr = getelementptr i8* %sp0, i64 1
+  %pix_sp0.0.copyload = load i8* %addr, align 1
+  %val = sitofp i8 %pix_sp0.0.copyload to double
+  %vmull.i = fmul double %val, %val
+  ret double %vmull.i
+}
+
+define double @sfct10(i16* nocapture %sp0) {
+; CHECK-LABEL: sfct10:
+; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
+; CHECK-NEXT: sshll.4s [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
+; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
+; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
+; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
+entry:
+  %addr = getelementptr i16* %sp0, i64 1
+  %pix_sp0.0.copyload = load i16* %addr, align 1
+  %val = sitofp i16 %pix_sp0.0.copyload to double
+  %vmull.i = fmul double %val, %val
+  ret double %vmull.i
+}
+
+define double @sfct11(i32* nocapture %sp0) {
+; CHECK-LABEL: sfct11:
+; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
+; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
+; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
+; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
+entry:
+  %addr = getelementptr i32* %sp0, i64 1
+  %pix_sp0.0.copyload = load i32* %addr, align 1
+  %val = sitofp i32 %pix_sp0.0.copyload to double
+  %vmull.i = fmul double %val, %val
+  ret double %vmull.i
+}
+
+define double @sfct12(i64* nocapture %sp0) {
+; CHECK-LABEL: sfct12:
+; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, #8]
+; CHECK-NEXT: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
+; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
+entry:
+  %addr = getelementptr i64* %sp0, i64 1
+  %pix_sp0.0.copyload = load i64* %addr, align 1
+  %val = sitofp i64 %pix_sp0.0.copyload to double
+  %vmull.i = fmul double %val, %val
+  ret double %vmull.i
+}
+
+; ********* 4s. load with scaled register to double. *********
+define double @sfct13(i8* nocapture %sp0, i64 %offset) {
+; CHECK-LABEL: sfct13:
+; CHECK: ldrsb w[[REGNUM:[0-9]+]], [x0, x1]
+; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
+; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
+entry:
+  %addr = getelementptr i8* %sp0, i64 %offset
+  %pix_sp0.0.copyload = load i8* %addr, align 1
+  %val = sitofp i8 %pix_sp0.0.copyload to double
+  %vmull.i = fmul double %val, %val
+  ret double %vmull.i
+}
+
+define double @sfct14(i16* nocapture %sp0, i64 %offset) {
+; CHECK-LABEL: sfct14:
+; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
+; CHECK-NEXT: sshll.4s [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
+; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
+; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
+; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
+entry:
+  %addr = getelementptr i16* %sp0, i64 %offset
+  %pix_sp0.0.copyload = load i16* %addr, align 1
+  %val = sitofp i16 %pix_sp0.0.copyload to double
+  %vmull.i = fmul double %val, %val
+  ret double %vmull.i
+}
+
+define double @sfct15(i32* nocapture %sp0, i64 %offset) {
+; CHECK-LABEL: sfct15:
+; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
+; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
+; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
+; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
+entry:
+  %addr = getelementptr i32* %sp0, i64 %offset
+  %pix_sp0.0.copyload = load i32* %addr, align 1
+  %val = sitofp i32 %pix_sp0.0.copyload to double
+  %vmull.i = fmul double %val, %val
+  ret double %vmull.i
+}
+
+define double @sfct16(i64* nocapture %sp0, i64 %offset) {
+; CHECK-LABEL: sfct16:
+; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, x1, lsl #3]
+; CHECK-NEXT: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
+; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
+entry:
+  %addr = getelementptr i64* %sp0, i64 %offset
+  %pix_sp0.0.copyload = load i64* %addr, align 1
+  %val = sitofp i64 %pix_sp0.0.copyload to double
+  %vmull.i = fmul double %val, %val
+  ret double %vmull.i
+}
+
+; ********* 5s. load with unscaled imm to float. *********
+define float @sfct17(i8* nocapture %sp0) {
+entry:
+; CHECK-LABEL: sfct17:
+; CHECK: ldur b[[REGNUM:[0-9]+]], [x0, #-1]
+; CHECK-NEXT: sshll.8h [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
+; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
+; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
+; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
+  %bitcast = ptrtoint i8* %sp0 to i64
+  %add = add i64 %bitcast, -1
+  %addr = inttoptr i64 %add to i8*
+  %pix_sp0.0.copyload = load i8* %addr, align 1
+  %val = sitofp i8 %pix_sp0.0.copyload to float
+  %vmull.i = fmul float %val, %val
+  ret float %vmull.i
+}
+
+define float @sfct18(i16* nocapture %sp0) {
+; CHECK-LABEL: sfct18:
+; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1]
+; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
+; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
+; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
+  %bitcast = ptrtoint i16* %sp0 to i64
+  %add = add i64 %bitcast, 1
+  %addr = inttoptr i64 %add to i16*
+  %pix_sp0.0.copyload = load i16* %addr, align 1
+  %val = sitofp i16 %pix_sp0.0.copyload to float
+  %vmull.i = fmul float %val, %val
+  ret float %vmull.i
+}
+
+define float @sfct19(i32* nocapture %sp0) {
+; CHECK-LABEL: sfct19:
+; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1]
+; CHECK-NEXT: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
+; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
+  %bitcast = ptrtoint i32* %sp0 to i64
+  %add = add i64 %bitcast, 1
+  %addr = inttoptr i64 %add to i32*
+  %pix_sp0.0.copyload = load i32* %addr, align 1
+  %val = sitofp i32 %pix_sp0.0.copyload to float
+  %vmull.i = fmul float %val, %val
+  ret float %vmull.i
+}
+
+; i64 -> f32 is not supported on floating point unit.
+define float @sfct20(i64* nocapture %sp0) {
+; CHECK-LABEL: sfct20:
+; CHECK: ldur x[[REGNUM:[0-9]+]], [x0, #1]
+; CHECK-NEXT: scvtf [[REG:s[0-9]+]], x[[REGNUM]]
+; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
+  %bitcast = ptrtoint i64* %sp0 to i64
+  %add = add i64 %bitcast, 1
+  %addr = inttoptr i64 %add to i64*
+  %pix_sp0.0.copyload = load i64* %addr, align 1
+  %val = sitofp i64 %pix_sp0.0.copyload to float
+  %vmull.i = fmul float %val, %val
+  ret float %vmull.i
+
+}
+
+; ********* 6s. load with unscaled imm to double. *********
+define double @sfct21(i8* nocapture %sp0) {
+entry:
+; CHECK-LABEL: sfct21:
+; CHECK: ldursb w[[REGNUM:[0-9]+]], [x0, #-1]
+; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
+; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
+  %bitcast = ptrtoint i8* %sp0 to i64
+  %add = add i64 %bitcast, -1
+  %addr = inttoptr i64 %add to i8*
+  %pix_sp0.0.copyload = load i8* %addr, align 1
+  %val = sitofp i8 %pix_sp0.0.copyload to double
+  %vmull.i = fmul double %val, %val
+  ret double %vmull.i
+}
+
+define double @sfct22(i16* nocapture %sp0) {
+; CHECK-LABEL: sfct22:
+; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1]
+; CHECK-NEXT: sshll.4s [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
+; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
+; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
+; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
+  %bitcast = ptrtoint i16* %sp0 to i64
+  %add = add i64 %bitcast, 1
+  %addr = inttoptr i64 %add to i16*
+  %pix_sp0.0.copyload = load i16* %addr, align 1
+  %val = sitofp i16 %pix_sp0.0.copyload to double
+  %vmull.i = fmul double %val, %val
+  ret double %vmull.i
+}
+
+define double @sfct23(i32* nocapture %sp0) {
+; CHECK-LABEL: sfct23:
+; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1]
+; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
+; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
+; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
+  %bitcast = ptrtoint i32* %sp0 to i64
+  %add = add i64 %bitcast, 1
+  %addr = inttoptr i64 %add to i32*
+  %pix_sp0.0.copyload = load i32* %addr, align 1
+  %val = sitofp i32 %pix_sp0.0.copyload to double
+  %vmull.i = fmul double %val, %val
+  ret double %vmull.i
+}
+
+define double @sfct24(i64* nocapture %sp0) {
+; CHECK-LABEL: sfct24:
+; CHECK: ldur d[[REGNUM:[0-9]+]], [x0, #1]
+; CHECK-NEXT: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
+; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
+  %bitcast = ptrtoint i64* %sp0 to i64
+  %add = add i64 %bitcast, 1
+  %addr = inttoptr i64 %add to i64*
+  %pix_sp0.0.copyload = load i64* %addr, align 1
+  %val = sitofp i64 %pix_sp0.0.copyload to double
+  %vmull.i = fmul double %val, %val
+  ret double %vmull.i
+
+}
+
+; Check that we do not use SSHLL code sequence when code size is a concern.
+define float @codesize_sfct17(i8* nocapture %sp0) optsize {
+entry:
+; CHECK-LABEL: codesize_sfct17:
+; CHECK: ldursb w[[REGNUM:[0-9]+]], [x0, #-1]
+; CHECK-NEXT: scvtf [[REG:s[0-9]+]], w[[REGNUM]]
+; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
+  %bitcast = ptrtoint i8* %sp0 to i64
+  %add = add i64 %bitcast, -1
+  %addr = inttoptr i64 %add to i8*
+  %pix_sp0.0.copyload = load i8* %addr, align 1
+  %val = sitofp i8 %pix_sp0.0.copyload to float
+  %vmull.i = fmul float %val, %val
+  ret float %vmull.i
+}
+
+define double @codesize_sfct11(i32* nocapture %sp0) minsize {
+; CHECK-LABEL: sfct11:
+; CHECK: ldr w[[REGNUM:[0-9]+]], [x0, #4]
+; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
+; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
+entry:
+  %addr = getelementptr i32* %sp0, i64 1
+  %pix_sp0.0.copyload = load i32* %addr, align 1
+  %val = sitofp i32 %pix_sp0.0.copyload to double
+  %vmull.i = fmul double %val, %val
+  ret double %vmull.i
+}
+
+; Adding fp128 custom lowering makes these a little fragile since we have to
+; return the correct mix of Legal/Expand from the custom method.
+;
+; rdar://problem/14991489
+
+define float @float_from_i128(i128 %in) {
+; CHECK-LABEL: float_from_i128:
+; CHECK: bl {{_?__floatuntisf}}
+  %conv = uitofp i128 %in to float
+  ret float %conv
+}
+
+define double @double_from_i128(i128 %in) {
+; CHECK-LABEL: double_from_i128:
+; CHECK: bl {{_?__floattidf}}
+  %conv = sitofp i128 %in to double
+  ret double %conv
+}
+
+define fp128 @fp128_from_i128(i128 %in) {
+; CHECK-LABEL: fp128_from_i128:
+; CHECK: bl {{_?__floatuntitf}}
+  %conv = uitofp i128 %in to fp128
+  ret fp128 %conv
+}
+
+define i128 @i128_from_float(float %in) {
+; CHECK-LABEL: i128_from_float
+; CHECK: bl {{_?__fixsfti}}
+  %conv = fptosi float %in to i128
+  ret i128 %conv
+}
+
+define i128 @i128_from_double(double %in) {
+; CHECK-LABEL: i128_from_double
+; CHECK: bl {{_?__fixunsdfti}}
+  %conv = fptoui double %in to i128
+  ret i128 %conv
+}
+
+define i128 @i128_from_fp128(fp128 %in) {
+; CHECK-LABEL: i128_from_fp128
+; CHECK: bl {{_?__fixtfti}}
+  %conv = fptosi fp128 %in to i128
+  ret i128 %conv
+}
+
diff --git a/test/CodeGen/AArch64/arm64-shifted-sext.ll b/test/CodeGen/AArch64/arm64-shifted-sext.ll
new file mode 100644
index 0000000..b7b4e5d
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-shifted-sext.ll
@@ -0,0 +1,277 @@
+; RUN: llc -march=arm64 -mtriple=arm64-apple-ios < %s | FileCheck %s
+;
+; <rdar://problem/13820218>
+
+define signext i16 @extendedLeftShiftcharToshortBy4(i8 signext %a) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: extendedLeftShiftcharToshortBy4:
+; CHECK: add [[REG:w[0-9]+]], w0, #1
+; CHECK: sbfiz w0, [[REG]], #4, #8
+  %inc = add i8 %a, 1
+  %conv1 = sext i8 %inc to i32
+  %shl = shl nsw i32 %conv1, 4
+  %conv2 = trunc i32 %shl to i16
+  ret i16 %conv2
+}
+
+define signext i16 @extendedRightShiftcharToshortBy4(i8 signext %a) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: extendedRightShiftcharToshortBy4:
+; CHECK: add [[REG:w[0-9]+]], w0, #1
+; CHECK: sbfx w0, [[REG]], #4, #4
+  %inc = add i8 %a, 1
+  %conv1 = sext i8 %inc to i32
+  %shr4 = lshr i32 %conv1, 4
+  %conv2 = trunc i32 %shr4 to i16
+  ret i16 %conv2
+}
+
+define signext i16 @extendedLeftShiftcharToshortBy8(i8 signext %a) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: extendedLeftShiftcharToshortBy8:
+; CHECK: add [[REG:w[0-9]+]], w0, #1
+; CHECK: sbfiz w0, [[REG]], #8, #8
+  %inc = add i8 %a, 1
+  %conv1 = sext i8 %inc to i32
+  %shl = shl nsw i32 %conv1, 8
+  %conv2 = trunc i32 %shl to i16
+  ret i16 %conv2
+}
+
+define signext i16 @extendedRightShiftcharToshortBy8(i8 signext %a) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: extendedRightShiftcharToshortBy8:
+; CHECK: add [[REG:w[0-9]+]], w0, #1
+; CHECK: sxtb [[REG]], [[REG]]
+; CHECK: asr w0, [[REG]], #8
+  %inc = add i8 %a, 1
+  %conv1 = sext i8 %inc to i32
+  %shr4 = lshr i32 %conv1, 8
+  %conv2 = trunc i32 %shr4 to i16
+  ret i16 %conv2
+}
+
+define i32 @extendedLeftShiftcharTointBy4(i8 signext %a) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: extendedLeftShiftcharTointBy4:
+; CHECK: add [[REG:w[0-9]+]], w0, #1
+; CHECK: sbfiz w0, [[REG]], #4, #8
+  %inc = add i8 %a, 1
+  %conv = sext i8 %inc to i32
+  %shl = shl nsw i32 %conv, 4
+  ret i32 %shl
+}
+
+define i32 @extendedRightShiftcharTointBy4(i8 signext %a) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: extendedRightShiftcharTointBy4:
+; CHECK: add [[REG:w[0-9]+]], w0, #1
+; CHECK: sbfx w0, [[REG]], #4, #4
+  %inc = add i8 %a, 1
+  %conv = sext i8 %inc to i32
+  %shr = ashr i32 %conv, 4
+  ret i32 %shr
+}
+
+define i32 @extendedLeftShiftcharTointBy8(i8 signext %a) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: extendedLeftShiftcharTointBy8:
+; CHECK: add [[REG:w[0-9]+]], w0, #1
+; CHECK: sbfiz w0, [[REG]], #8, #8
+  %inc = add i8 %a, 1
+  %conv = sext i8 %inc to i32
+  %shl = shl nsw i32 %conv, 8
+  ret i32 %shl
+}
+
+define i32 @extendedRightShiftcharTointBy8(i8 signext %a) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: extendedRightShiftcharTointBy8:
+; CHECK: add [[REG:w[0-9]+]], w0, #1
+; CHECK: sxtb [[REG]], [[REG]]
+; CHECK: asr w0, [[REG]], #8
+  %inc = add i8 %a, 1
+  %conv = sext i8 %inc to i32
+  %shr = ashr i32 %conv, 8
+  ret i32 %shr
+}
+
+define i64 @extendedLeftShiftcharToint64By4(i8 signext %a) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: extendedLeftShiftcharToint64By4:
+; CHECK: add w[[REG:[0-9]+]], w0, #1
+; CHECK: sbfiz x0, x[[REG]], #4, #8
+  %inc = add i8 %a, 1
+  %conv = sext i8 %inc to i64
+  %shl = shl nsw i64 %conv, 4
+  ret i64 %shl
+}
+
+define i64 @extendedRightShiftcharToint64By4(i8 signext %a) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: extendedRightShiftcharToint64By4:
+; CHECK: add w[[REG:[0-9]+]], w0, #1
+; CHECK: sbfx x0, x[[REG]], #4, #4
+  %inc = add i8 %a, 1
+  %conv = sext i8 %inc to i64
+  %shr = ashr i64 %conv, 4
+  ret i64 %shr
+}
+
+define i64 @extendedLeftShiftcharToint64By8(i8 signext %a) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: extendedLeftShiftcharToint64By8:
+; CHECK: add w[[REG:[0-9]+]], w0, #1
+; CHECK: sbfiz x0, x[[REG]], #8, #8
+  %inc = add i8 %a, 1
+  %conv = sext i8 %inc to i64
+  %shl = shl nsw i64 %conv, 8
+  ret i64 %shl
+}
+
+define i64 @extendedRightShiftcharToint64By8(i8 signext %a) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: extendedRightShiftcharToint64By8:
+; CHECK: add w[[REG:[0-9]+]], w0, #1
+; CHECK: sxtb x[[REG]], w[[REG]]
+; CHECK: asr x0, x[[REG]], #8
+  %inc = add i8 %a, 1
+  %conv = sext i8 %inc to i64
+  %shr = ashr i64 %conv, 8
+  ret i64 %shr
+}
+
+define i32 @extendedLeftShiftshortTointBy4(i16 signext %a) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: extendedLeftShiftshortTointBy4:
+; CHECK: add [[REG:w[0-9]+]], w0, #1
+; CHECK: sbfiz w0, [[REG]], #4, #16
+  %inc = add i16 %a, 1
+  %conv = sext i16 %inc to i32
+  %shl = shl nsw i32 %conv, 4
+  ret i32 %shl
+}
+
+define i32 @extendedRightShiftshortTointBy4(i16 signext %a) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: extendedRightShiftshortTointBy4:
+; CHECK: add [[REG:w[0-9]+]], w0, #1
+; CHECK: sbfx w0, [[REG]], #4, #12
+  %inc = add i16 %a, 1
+  %conv = sext i16 %inc to i32
+  %shr = ashr i32 %conv, 4
+  ret i32 %shr
+}
+
+define i32 @extendedLeftShiftshortTointBy16(i16 signext %a) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: extendedLeftShiftshortTointBy16:
+; CHECK: add [[REG:w[0-9]+]], w0, #1
+; CHECK: lsl w0, [[REG]], #16
+  %inc = add i16 %a, 1
+  %conv2 = zext i16 %inc to i32
+  %shl = shl nuw i32 %conv2, 16
+  ret i32 %shl
+}
+
+define i32 @extendedRightShiftshortTointBy16(i16 signext %a) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: extendedRightShiftshortTointBy16:
+; CHECK: add [[REG:w[0-9]+]], w0, #1
+; CHECK: sxth [[REG]], [[REG]]
+; CHECK: asr w0, [[REG]], #16
+  %inc = add i16 %a, 1
+  %conv = sext i16 %inc to i32
+  %shr = ashr i32 %conv, 16
+  ret i32 %shr
+}
+
+define i64 @extendedLeftShiftshortToint64By4(i16 signext %a) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: extendedLeftShiftshortToint64By4:
+; CHECK: add w[[REG:[0-9]+]], w0, #1
+; CHECK: sbfiz x0, x[[REG]], #4, #16
+  %inc = add i16 %a, 1
+  %conv = sext i16 %inc to i64
+  %shl = shl nsw i64 %conv, 4
+  ret i64 %shl
+}
+
+define i64 @extendedRightShiftshortToint64By4(i16 signext %a) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: extendedRightShiftshortToint64By4:
+; CHECK: add w[[REG:[0-9]+]], w0, #1
+; CHECK: sbfx x0, x[[REG]], #4, #12
+  %inc = add i16 %a, 1
+  %conv = sext i16 %inc to i64
+  %shr = ashr i64 %conv, 4
+  ret i64 %shr
+}
+
+define i64 @extendedLeftShiftshortToint64By16(i16 signext %a) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: extendedLeftShiftshortToint64By16:
+; CHECK: add w[[REG:[0-9]+]], w0, #1
+; CHECK: sbfiz x0, x[[REG]], #16, #16
+  %inc = add i16 %a, 1
+  %conv = sext i16 %inc to i64
+  %shl = shl nsw i64 %conv, 16
+  ret i64 %shl
+}
+
+define i64 @extendedRightShiftshortToint64By16(i16 signext %a) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: extendedRightShiftshortToint64By16:
+; CHECK: add w[[REG:[0-9]+]], w0, #1
+; CHECK: sxth x[[REG]], w[[REG]]
+; CHECK: asr x0, x[[REG]], #16
+  %inc = add i16 %a, 1
+  %conv = sext i16 %inc to i64
+  %shr = ashr i64 %conv, 16
+  ret i64 %shr
+}
+
+define i64 @extendedLeftShiftintToint64By4(i32 %a) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: extendedLeftShiftintToint64By4:
+; CHECK: add w[[REG:[0-9]+]], w0, #1
+; CHECK: sbfiz x0, x[[REG]], #4, #32
+  %inc = add nsw i32 %a, 1
+  %conv = sext i32 %inc to i64
+  %shl = shl nsw i64 %conv, 4
+  ret i64 %shl
+}
+
+define i64 @extendedRightShiftintToint64By4(i32 %a) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: extendedRightShiftintToint64By4:
+; CHECK: add w[[REG:[0-9]+]], w0, #1
+; CHECK: sbfx x0, x[[REG]], #4, #28
+  %inc = add nsw i32 %a, 1
+  %conv = sext i32 %inc to i64
+  %shr = ashr i64 %conv, 4
+  ret i64 %shr
+}
+
+define i64 @extendedLeftShiftintToint64By32(i32 %a) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: extendedLeftShiftintToint64By32:
+; CHECK: add w[[REG:[0-9]+]], w0, #1
+; CHECK: lsl x0, x[[REG]], #32
+  %inc = add nsw i32 %a, 1
+  %conv2 = zext i32 %inc to i64
+  %shl = shl nuw i64 %conv2, 32
+  ret i64 %shl
+}
+
+define i64 @extendedRightShiftintToint64By32(i32 %a) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: extendedRightShiftintToint64By32:
+; CHECK: add w[[REG:[0-9]+]], w0, #1
+; CHECK: sxtw x[[REG]], w[[REG]]
+; CHECK: asr x0, x[[REG]], #32
+  %inc = add nsw i32 %a, 1
+  %conv = sext i32 %inc to i64
+  %shr = ashr i64 %conv, 32
+  ret i64 %shr
+}
diff --git a/test/CodeGen/AArch64/arm64-simd-scalar-to-vector.ll b/test/CodeGen/AArch64/arm64-simd-scalar-to-vector.ll
new file mode 100644
index 0000000..aed39e7
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-simd-scalar-to-vector.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -mcpu=cyclone | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -O0 -mcpu=cyclone | FileCheck %s --check-prefix=CHECK-FAST
+
+define <16 x i8> @foo(<16 x i8> %a) nounwind optsize readnone ssp {
+; CHECK: uaddlv.16b h0, v0
+; CHECK: rshrn.8b v0, v0, #4
+; CHECK: dup.16b v0, v0[0]
+; CHECK: ret
+
+; CHECK-FAST: uaddlv.16b
+; CHECK-FAST: rshrn.8b
+; CHECK-FAST: dup.16b
+  %tmp = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v16i8(<16 x i8> %a) nounwind
+  %tmp1 = trunc i32 %tmp to i16
+  %tmp2 = insertelement <8 x i16> undef, i16 %tmp1, i32 0
+  %tmp3 = tail call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %tmp2, i32 4)
+  %tmp4 = shufflevector <8 x i8> %tmp3, <8 x i8> undef, <16 x i32> zeroinitializer
+  ret <16 x i8> %tmp4
+}
+
+declare <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16>, i32) nounwind readnone
+declare i32 @llvm.aarch64.neon.uaddlv.i32.v16i8(<16 x i8>) nounwind readnone
diff --git a/test/CodeGen/ARM64/simplest-elf.ll b/test/CodeGen/AArch64/arm64-simplest-elf.ll
index 1254365..1254365 100644
--- a/test/CodeGen/ARM64/simplest-elf.ll
+++ b/test/CodeGen/AArch64/arm64-simplest-elf.ll
diff --git a/test/CodeGen/ARM64/sincos.ll b/test/CodeGen/AArch64/arm64-sincos.ll
index 06157b2..06157b2 100644
--- a/test/CodeGen/ARM64/sincos.ll
+++ b/test/CodeGen/AArch64/arm64-sincos.ll
diff --git a/test/CodeGen/ARM64/sitofp-combine-chains.ll b/test/CodeGen/AArch64/arm64-sitofp-combine-chains.ll
index 10b433b..10b433b 100644
--- a/test/CodeGen/ARM64/sitofp-combine-chains.ll
+++ b/test/CodeGen/AArch64/arm64-sitofp-combine-chains.ll
diff --git a/test/CodeGen/AArch64/arm64-sli-sri-opt.ll b/test/CodeGen/AArch64/arm64-sli-sri-opt.ll
new file mode 100644
index 0000000..7fec539
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-sli-sri-opt.ll
@@ -0,0 +1,41 @@
+; RUN: llc -aarch64-shift-insert-generation=true -march=arm64 -aarch64-neon-syntax=apple < %s | FileCheck %s
+
+define void @testLeftGood(<16 x i8> %src1, <16 x i8> %src2, <16 x i8>* %dest) nounwind {
+; CHECK-LABEL: testLeftGood:
+; CHECK: sli.16b v0, v1, #3
+  %and.i = and <16 x i8> %src1, <i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252>
+  %vshl_n = shl <16 x i8> %src2, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  %result = or <16 x i8> %and.i, %vshl_n
+  store <16 x i8> %result, <16 x i8>* %dest, align 16
+  ret void
+}
+
+define void @testLeftBad(<16 x i8> %src1, <16 x i8> %src2, <16 x i8>* %dest) nounwind {
+; CHECK-LABEL: testLeftBad:
+; CHECK-NOT: sli
+  %and.i = and <16 x i8> %src1, <i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165>
+  %vshl_n = shl <16 x i8> %src2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  %result = or <16 x i8> %and.i, %vshl_n
+  store <16 x i8> %result, <16 x i8>* %dest, align 16
+  ret void
+}
+
+define void @testRightGood(<16 x i8> %src1, <16 x i8> %src2, <16 x i8>* %dest) nounwind {
+; CHECK-LABEL: testRightGood:
+; CHECK: sri.16b v0, v1, #3
+  %and.i = and <16 x i8> %src1, <i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252>
+  %vshl_n = lshr <16 x i8> %src2, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  %result = or <16 x i8> %and.i, %vshl_n
+  store <16 x i8> %result, <16 x i8>* %dest, align 16
+  ret void
+}
+
+define void @testRightBad(<16 x i8> %src1, <16 x i8> %src2, <16 x i8>* %dest) nounwind {
+; CHECK-LABEL: testRightBad:
+; CHECK-NOT: sri
+  %and.i = and <16 x i8> %src1, <i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165>
+  %vshl_n = lshr <16 x i8> %src2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  %result = or <16 x i8> %and.i, %vshl_n
+  store <16 x i8> %result, <16 x i8>* %dest, align 16
+  ret void
+}
diff --git a/test/CodeGen/AArch64/arm64-smaxv.ll b/test/CodeGen/AArch64/arm64-smaxv.ll
new file mode 100644
index 0000000..183e667
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-smaxv.ll
@@ -0,0 +1,74 @@
+; RUN: llc -march=arm64 -aarch64-neon-syntax=apple < %s | FileCheck %s
+
+define signext i8 @test_vmaxv_s8(<8 x i8> %a1) {
+; CHECK: test_vmaxv_s8
+; CHECK: smaxv.8b b[[REGNUM:[0-9]+]], v0
+; CHECK-NEXT: smov.b w0, v[[REGNUM]][0]
+; CHECK-NEXT: ret
+entry:
+  %vmaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> %a1)
+  %0 = trunc i32 %vmaxv.i to i8
+  ret i8 %0
+}
+
+define signext i16 @test_vmaxv_s16(<4 x i16> %a1) {
+; CHECK: test_vmaxv_s16
+; CHECK: smaxv.4h h[[REGNUM:[0-9]+]], v0
+; CHECK-NEXT: smov.h w0, v[[REGNUM]][0]
+; CHECK-NEXT: ret
+entry:
+  %vmaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> %a1)
+  %0 = trunc i32 %vmaxv.i to i16
+  ret i16 %0
+}
+
+define i32 @test_vmaxv_s32(<2 x i32> %a1) {
+; CHECK: test_vmaxv_s32
+; 2 x i32 is not supported by the ISA, thus, this is a special case
+; CHECK: smaxp.2s v[[REGNUM:[0-9]+]], v0, v0
+; CHECK-NEXT: fmov w0, s[[REGNUM]]
+; CHECK-NEXT: ret
+entry:
+  %vmaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> %a1)
+  ret i32 %vmaxv.i
+}
+
+define signext i8 @test_vmaxvq_s8(<16 x i8> %a1) {
+; CHECK: test_vmaxvq_s8
+; CHECK: smaxv.16b b[[REGNUM:[0-9]+]], v0
+; CHECK-NEXT: smov.b w0, v[[REGNUM]][0]
+; CHECK-NEXT: ret
+entry:
+  %vmaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> %a1)
+  %0 = trunc i32 %vmaxv.i to i8
+  ret i8 %0
+}
+
+define signext i16 @test_vmaxvq_s16(<8 x i16> %a1) {
+; CHECK: test_vmaxvq_s16
+; CHECK: smaxv.8h h[[REGNUM:[0-9]+]], v0
+; CHECK-NEXT: smov.h w0, v[[REGNUM]][0]
+; CHECK-NEXT: ret
+entry:
+  %vmaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> %a1)
+  %0 = trunc i32 %vmaxv.i to i16
+  ret i16 %0
+}
+
+define i32 @test_vmaxvq_s32(<4 x i32> %a1) {
+; CHECK: test_vmaxvq_s32
+; CHECK: smaxv.4s [[REGNUM:s[0-9]+]], v0
+; CHECK-NEXT: fmov w0, [[REGNUM]]
+; CHECK-NEXT: ret
+entry:
+  %vmaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> %a1)
+  ret i32 %vmaxv.i
+}
+
+declare i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32>)
+declare i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16>)
+declare i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8>)
+declare i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32>)
+declare i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16>)
+declare i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8>)
+
diff --git a/test/CodeGen/AArch64/arm64-sminv.ll b/test/CodeGen/AArch64/arm64-sminv.ll
new file mode 100644
index 0000000..195c4e5
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-sminv.ll
@@ -0,0 +1,74 @@
+; RUN: llc -march=arm64 -aarch64-neon-syntax=apple < %s | FileCheck %s
+
+define signext i8 @test_vminv_s8(<8 x i8> %a1) {
+; CHECK: test_vminv_s8
+; CHECK: sminv.8b b[[REGNUM:[0-9]+]], v0
+; CHECK-NEXT: smov.b w0, v[[REGNUM]][0]
+; CHECK-NEXT: ret
+entry:
+  %vminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8> %a1)
+  %0 = trunc i32 %vminv.i to i8
+  ret i8 %0
+}
+
+define signext i16 @test_vminv_s16(<4 x i16> %a1) {
+; CHECK: test_vminv_s16
+; CHECK: sminv.4h h[[REGNUM:[0-9]+]], v0
+; CHECK-NEXT: smov.h w0, v[[REGNUM]][0]
+; CHECK-NEXT: ret
+entry:
+  %vminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16> %a1)
+  %0 = trunc i32 %vminv.i to i16
+  ret i16 %0
+}
+
+define i32 @test_vminv_s32(<2 x i32> %a1) {
+; CHECK: test_vminv_s32
+; 2 x i32 is not supported by the ISA, thus, this is a special case
+; CHECK: sminp.2s v[[REGNUM:[0-9]+]], v0, v0
+; CHECK-NEXT: fmov w0, s[[REGNUM]]
+; CHECK-NEXT: ret
+entry:
+  %vminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v2i32(<2 x i32> %a1)
+  ret i32 %vminv.i
+}
+
+define signext i8 @test_vminvq_s8(<16 x i8> %a1) {
+; CHECK: test_vminvq_s8
+; CHECK: sminv.16b b[[REGNUM:[0-9]+]], v0
+; CHECK-NEXT: smov.b w0, v[[REGNUM]][0]
+; CHECK-NEXT: ret
+entry:
+  %vminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8> %a1)
+  %0 = trunc i32 %vminv.i to i8
+  ret i8 %0
+}
+
+define signext i16 @test_vminvq_s16(<8 x i16> %a1) {
+; CHECK: test_vminvq_s16
+; CHECK: sminv.8h h[[REGNUM:[0-9]+]], v0
+; CHECK-NEXT: smov.h w0, v[[REGNUM]][0]
+; CHECK-NEXT: ret
+entry:
+  %vminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16> %a1)
+  %0 = trunc i32 %vminv.i to i16
+  ret i16 %0
+}
+
+define i32 @test_vminvq_s32(<4 x i32> %a1) {
+; CHECK: test_vminvq_s32
+; CHECK: sminv.4s [[REGNUM:s[0-9]+]], v0
+; CHECK-NEXT: fmov w0, [[REGNUM]]
+; CHECK-NEXT: ret
+entry:
+  %vminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32> %a1)
+  ret i32 %vminv.i
+}
+
+declare i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32>)
+declare i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16>)
+declare i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8>)
+declare i32 @llvm.aarch64.neon.sminv.i32.v2i32(<2 x i32>)
+declare i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16>)
+declare i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8>)
+
diff --git a/test/CodeGen/ARM64/spill-lr.ll b/test/CodeGen/AArch64/arm64-spill-lr.ll
index fb6588e..fb6588e 100644
--- a/test/CodeGen/ARM64/spill-lr.ll
+++ b/test/CodeGen/AArch64/arm64-spill-lr.ll
diff --git a/test/CodeGen/AArch64/arm64-spill.ll b/test/CodeGen/AArch64/arm64-spill.ll
new file mode 100644
index 0000000..47cdc2b
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-spill.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -aarch64-neon-syntax=apple -verify-machineinstrs
+
+; CHECK: fpr128
+; CHECK: ld1.2d
+; CHECK: str q
+; CHECK: inlineasm
+; CHECK: ldr q
+; CHECK: st1.2d
+define void @fpr128(<4 x float>* %p) nounwind ssp {
+entry:
+  %x = load <4 x float>* %p, align 16
+  call void asm sideeffect "; inlineasm", "~{q0},~{q1},~{q2},~{q3},~{q4},~{q5},~{q6},~{q7},~{q8},~{q9},~{q10},~{q11},~{q12},~{q13},~{q14},~{q15},~{q16},~{q17},~{q18},~{q19},~{q20},~{q21},~{q22},~{q23},~{q24},~{q25},~{q26},~{q27},~{q28},~{q29},~{q30},~{q31},~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{fp},~{lr},~{sp},~{memory}"() nounwind
+  store <4 x float> %x, <4 x float>* %p, align 16
+  ret void
+}
diff --git a/test/CodeGen/AArch64/arm64-st1.ll b/test/CodeGen/AArch64/arm64-st1.ll
new file mode 100644
index 0000000..4370484
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-st1.ll
@@ -0,0 +1,676 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s
+
+define void @st1lane_16b(<16 x i8> %A, i8* %D) {
+; CHECK-LABEL: st1lane_16b
+; CHECK: st1.b
+  %tmp = extractelement <16 x i8> %A, i32 1
+  store i8 %tmp, i8* %D
+  ret void
+}
+
+define void @st1lane_8h(<8 x i16> %A, i16* %D) {
+; CHECK-LABEL: st1lane_8h
+; CHECK: st1.h
+  %tmp = extractelement <8 x i16> %A, i32 1
+  store i16 %tmp, i16* %D
+  ret void
+}
+
+define void @st1lane_4s(<4 x i32> %A, i32* %D) {
+; CHECK-LABEL: st1lane_4s
+; CHECK: st1.s
+  %tmp = extractelement <4 x i32> %A, i32 1
+  store i32 %tmp, i32* %D
+  ret void
+}
+
+define void @st1lane_4s_float(<4 x float> %A, float* %D) {
+; CHECK-LABEL: st1lane_4s_float
+; CHECK: st1.s
+  %tmp = extractelement <4 x float> %A, i32 1
+  store float %tmp, float* %D
+  ret void
+}
+
+define void @st1lane_2d(<2 x i64> %A, i64* %D) {
+; CHECK-LABEL: st1lane_2d
+; CHECK: st1.d
+  %tmp = extractelement <2 x i64> %A, i32 1
+  store i64 %tmp, i64* %D
+  ret void
+}
+
+define void @st1lane_2d_double(<2 x double> %A, double* %D) {
+; CHECK-LABEL: st1lane_2d_double
+; CHECK: st1.d
+  %tmp = extractelement <2 x double> %A, i32 1
+  store double %tmp, double* %D
+  ret void
+}
+
+define void @st1lane_8b(<8 x i8> %A, i8* %D) {
+; CHECK-LABEL: st1lane_8b
+; CHECK: st1.b
+  %tmp = extractelement <8 x i8> %A, i32 1
+  store i8 %tmp, i8* %D
+  ret void
+}
+
+define void @st1lane_4h(<4 x i16> %A, i16* %D) {
+; CHECK-LABEL: st1lane_4h
+; CHECK: st1.h
+  %tmp = extractelement <4 x i16> %A, i32 1
+  store i16 %tmp, i16* %D
+  ret void
+}
+
+define void @st1lane_2s(<2 x i32> %A, i32* %D) {
+; CHECK-LABEL: st1lane_2s
+; CHECK: st1.s
+  %tmp = extractelement <2 x i32> %A, i32 1
+  store i32 %tmp, i32* %D
+  ret void
+}
+
+define void @st1lane_2s_float(<2 x float> %A, float* %D) {
+; CHECK-LABEL: st1lane_2s_float
+; CHECK: st1.s
+  %tmp = extractelement <2 x float> %A, i32 1
+  store float %tmp, float* %D
+  ret void
+}
+
+define void @st2lane_16b(<16 x i8> %A, <16 x i8> %B, i8* %D) {
+; CHECK-LABEL: st2lane_16b
+; CHECK: st2.b
+  call void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i64 1, i8* %D)
+  ret void
+}
+
+define void @st2lane_8h(<8 x i16> %A, <8 x i16> %B, i16* %D) {
+; CHECK-LABEL: st2lane_8h
+; CHECK: st2.h
+  call void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i64 1, i16* %D)
+  ret void
+}
+
+define void @st2lane_4s(<4 x i32> %A, <4 x i32> %B, i32* %D) {
+; CHECK-LABEL: st2lane_4s
+; CHECK: st2.s
+  call void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i64 1, i32* %D)
+  ret void
+}
+
+define void @st2lane_2d(<2 x i64> %A, <2 x i64> %B, i64* %D) {
+; CHECK-LABEL: st2lane_2d
+; CHECK: st2.d
+  call void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64 1, i64* %D)
+  ret void
+}
+
+declare void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8*) nounwind readnone
+declare void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i16*) nounwind readnone
+declare void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*) nounwind readnone
+declare void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*) nounwind readnone
+
+define void @st3lane_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %D) {
+; CHECK-LABEL: st3lane_16b
+; CHECK: st3.b
+  call void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i64 1, i8* %D)
+  ret void
+}
+
+define void @st3lane_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %D) {
+; CHECK-LABEL: st3lane_8h
+; CHECK: st3.h
+  call void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i64 1, i16* %D)
+  ret void
+}
+
+define void @st3lane_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %D) {
+; CHECK-LABEL: st3lane_4s
+; CHECK: st3.s
+  call void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i64 1, i32* %D)
+  ret void
+}
+
+define void @st3lane_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %D) {
+; CHECK-LABEL: st3lane_2d
+; CHECK: st3.d
+  call void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64 1, i64* %D)
+  ret void
+}
+
+declare void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readnone
+declare void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readnone
+declare void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readnone
+declare void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readnone
+
+define void @st4lane_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %E) {
+; CHECK-LABEL: st4lane_16b
+; CHECK: st4.b
+  call void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 1, i8* %E)
+  ret void
+}
+
+define void @st4lane_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %E) {
+; CHECK-LABEL: st4lane_8h
+; CHECK: st4.h
+  call void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 1, i16* %E)
+  ret void
+}
+
+define void @st4lane_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %E) {
+; CHECK-LABEL: st4lane_4s
+; CHECK: st4.s
+  call void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 1, i32* %E)
+  ret void
+}
+
+define void @st4lane_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %E) {
+; CHECK-LABEL: st4lane_2d
+; CHECK: st4.d
+  call void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 1, i64* %E)
+  ret void
+}
+
+declare void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readnone
+declare void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readnone
+declare void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readnone
+declare void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readnone
+
+
+define void @st2_8b(<8 x i8> %A, <8 x i8> %B, i8* %P) nounwind {
+; CHECK-LABEL: st2_8b
+; CHECK st2.8b
+	call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, i8* %P)
+	ret void
+}
+
+define void @st3_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %P) nounwind {
+; CHECK-LABEL: st3_8b
+; CHECK st3.8b
+	call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %P)
+	ret void
+}
+
+define void @st4_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %P) nounwind {
+; CHECK-LABEL: st4_8b
+; CHECK st4.8b
+	call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %P)
+	ret void
+}
+
+declare void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*) nounwind readonly
+declare void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly
+declare void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly
+
+define void @st2_16b(<16 x i8> %A, <16 x i8> %B, i8* %P) nounwind {
+; CHECK-LABEL: st2_16b
+; CHECK st2.16b
+	call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i8* %P)
+	ret void
+}
+
+define void @st3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %P) nounwind {
+; CHECK-LABEL: st3_16b
+; CHECK st3.16b
+	call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %P)
+	ret void
+}
+
+define void @st4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %P) nounwind {
+; CHECK-LABEL: st4_16b
+; CHECK st4.16b
+	call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %P)
+	ret void
+}
+
+declare void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*) nounwind readonly
+declare void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly
+declare void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly
+
+define void @st2_4h(<4 x i16> %A, <4 x i16> %B, i16* %P) nounwind {
+; CHECK-LABEL: st2_4h
+; CHECK st2.4h
+	call void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, i16* %P)
+	ret void
+}
+
+define void @st3_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %P) nounwind {
+; CHECK-LABEL: st3_4h
+; CHECK st3.4h
+	call void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %P)
+	ret void
+}
+
+define void @st4_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %P) nounwind {
+; CHECK-LABEL: st4_4h
+; CHECK st4.4h
+	call void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %P)
+	ret void
+}
+
+declare void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*) nounwind readonly
+declare void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly
+declare void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly
+
+define void @st2_8h(<8 x i16> %A, <8 x i16> %B, i16* %P) nounwind {
+; CHECK-LABEL: st2_8h
+; CHECK st2.8h
+	call void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i16* %P)
+	ret void
+}
+
+define void @st3_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %P) nounwind {
+; CHECK-LABEL: st3_8h
+; CHECK st3.8h
+	call void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %P)
+	ret void
+}
+
+define void @st4_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %P) nounwind {
+; CHECK-LABEL: st4_8h
+; CHECK st4.8h
+	call void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %P)
+	ret void
+}
+
+declare void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*) nounwind readonly
+declare void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly
+declare void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly
+
+define void @st2_2s(<2 x i32> %A, <2 x i32> %B, i32* %P) nounwind {
+; CHECK-LABEL: st2_2s
+; CHECK st2.2s
+	call void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, i32* %P)
+	ret void
+}
+
+define void @st3_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %P) nounwind {
+; CHECK-LABEL: st3_2s
+; CHECK st3.2s
+	call void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %P)
+	ret void
+}
+
+define void @st4_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %P) nounwind {
+; CHECK-LABEL: st4_2s
+; CHECK st4.2s
+	call void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %P)
+	ret void
+}
+
+declare void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*) nounwind readonly
+declare void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly
+declare void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly
+
+define void @st2_4s(<4 x i32> %A, <4 x i32> %B, i32* %P) nounwind {
+; CHECK-LABEL: st2_4s
+; CHECK st2.4s
+	call void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i32* %P)
+	ret void
+}
+
+define void @st3_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %P) nounwind {
+; CHECK-LABEL: st3_4s
+; CHECK st3.4s
+	call void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %P)
+	ret void
+}
+
+define void @st4_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %P) nounwind {
+; CHECK-LABEL: st4_4s
+; CHECK st4.4s
+	call void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %P)
+	ret void
+}
+
+declare void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*) nounwind readonly
+declare void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly
+declare void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly
+
+define void @st2_1d(<1 x i64> %A, <1 x i64> %B, i64* %P) nounwind {
+; CHECK-LABEL: st2_1d
+; CHECK st1.2d
+	call void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, i64* %P)
+	ret void
+}
+
+define void @st3_1d(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %P) nounwind {
+; CHECK-LABEL: st3_1d
+; CHECK st1.3d
+	call void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %P)
+	ret void
+}
+
+define void @st4_1d(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %P) nounwind {
+; CHECK-LABEL: st4_1d
+; CHECK st1.4d
+	call void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %P)
+	ret void
+}
+
+declare void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*) nounwind readonly
+declare void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly
+declare void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly
+
+define void @st2_2d(<2 x i64> %A, <2 x i64> %B, i64* %P) nounwind {
+; CHECK-LABEL: st2_2d
+; CHECK st2.2d
+	call void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64* %P)
+	ret void
+}
+
+define void @st3_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %P) nounwind {
+; CHECK-LABEL: st3_2d
+; CHECK st2.3d
+	call void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %P)
+	ret void
+}
+
+define void @st4_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %P) nounwind {
+; CHECK-LABEL: st4_2d
+; CHECK st2.4d
+	call void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %P)
+	ret void
+}
+
+declare void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*) nounwind readonly
+declare void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly
+declare void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly
+
+declare void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float>, <2 x float>, float*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double>, <1 x double>, double*) nounwind readonly
+
+define void @st1_x2_v8i8(<8 x i8> %A, <8 x i8> %B, i8* %addr) {
+; CHECK-LABEL: st1_x2_v8i8:
+; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, i8* %addr)
+  ret void
+}
+
+define void @st1_x2_v4i16(<4 x i16> %A, <4 x i16> %B, i16* %addr) {
+; CHECK-LABEL: st1_x2_v4i16:
+; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, i16* %addr)
+  ret void
+}
+
+define void @st1_x2_v2i32(<2 x i32> %A, <2 x i32> %B, i32* %addr) {
+; CHECK-LABEL: st1_x2_v2i32:
+; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  call void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, i32* %addr)
+  ret void
+}
+
+define void @st1_x2_v2f32(<2 x float> %A, <2 x float> %B, float* %addr) {
+; CHECK-LABEL: st1_x2_v2f32:
+; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  call void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float> %A, <2 x float> %B, float* %addr)
+  ret void
+}
+
+define void @st1_x2_v1i64(<1 x i64> %A, <1 x i64> %B, i64* %addr) {
+; CHECK-LABEL: st1_x2_v1i64:
+; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, i64* %addr)
+  ret void
+}
+
+define void @st1_x2_v1f64(<1 x double> %A, <1 x double> %B, double* %addr) {
+; CHECK-LABEL: st1_x2_v1f64:
+; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  call void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double> %A, <1 x double> %B, double* %addr)
+  ret void
+}
+
+declare void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float>, <4 x float>, float*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double>, <2 x double>, double*) nounwind readonly
+
+define void @st1_x2_v16i8(<16 x i8> %A, <16 x i8> %B, i8* %addr) {
+; CHECK-LABEL: st1_x2_v16i8:
+; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i8* %addr)
+  ret void
+}
+
+define void @st1_x2_v8i16(<8 x i16> %A, <8 x i16> %B, i16* %addr) {
+; CHECK-LABEL: st1_x2_v8i16:
+; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i16* %addr)
+  ret void
+}
+
+define void @st1_x2_v4i32(<4 x i32> %A, <4 x i32> %B, i32* %addr) {
+; CHECK-LABEL: st1_x2_v4i32:
+; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  call void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i32* %addr)
+  ret void
+}
+
+define void @st1_x2_v4f32(<4 x float> %A, <4 x float> %B, float* %addr) {
+; CHECK-LABEL: st1_x2_v4f32:
+; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  call void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float> %A, <4 x float> %B, float* %addr)
+  ret void
+}
+
+define void @st1_x2_v2i64(<2 x i64> %A, <2 x i64> %B, i64* %addr) {
+; CHECK-LABEL: st1_x2_v2i64:
+; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64* %addr)
+  ret void
+}
+
+define void @st1_x2_v2f64(<2 x double> %A, <2 x double> %B, double* %addr) {
+; CHECK-LABEL: st1_x2_v2f64:
+; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  call void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double> %A, <2 x double> %B, double* %addr)
+  ret void
+}
+
+declare void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, float*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, double*) nounwind readonly
+
+define void @st1_x3_v8i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %addr) {
+; CHECK-LABEL: st1_x3_v8i8:
+; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %addr)
+  ret void
+}
+
+define void @st1_x3_v4i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %addr) {
+; CHECK-LABEL: st1_x3_v4i16:
+; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %addr)
+  ret void
+}
+
+define void @st1_x3_v2i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %addr) {
+; CHECK-LABEL: st1_x3_v2i32:
+; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %addr)
+  ret void
+}
+
+define void @st1_x3_v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, float* %addr) {
+; CHECK-LABEL: st1_x3_v2f32:
+; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  call void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, float* %addr)
+  ret void
+}
+
+define void @st1_x3_v1i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %addr) {
+; CHECK-LABEL: st1_x3_v1i64:
+; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %addr)
+  ret void
+}
+
+define void @st1_x3_v1f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, double* %addr) {
+; CHECK-LABEL: st1_x3_v1f64:
+; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  call void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, double* %addr)
+  ret void
+}
+
+declare void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, float*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, double*) nounwind readonly
+
+define void @st1_x3_v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %addr) {
+; CHECK-LABEL: st1_x3_v16i8:
+; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %addr)
+  ret void
+}
+
+define void @st1_x3_v8i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %addr) {
+; CHECK-LABEL: st1_x3_v8i16:
+; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %addr)
+  ret void
+}
+
+define void @st1_x3_v4i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %addr) {
+; CHECK-LABEL: st1_x3_v4i32:
+; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %addr)
+  ret void
+}
+
+define void @st1_x3_v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, float* %addr) {
+; CHECK-LABEL: st1_x3_v4f32:
+; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  call void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, float* %addr)
+  ret void
+}
+
+define void @st1_x3_v2i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %addr) {
+; CHECK-LABEL: st1_x3_v2i64:
+; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %addr)
+  ret void
+}
+
+define void @st1_x3_v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, double* %addr) {
+; CHECK-LABEL: st1_x3_v2f64:
+; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  call void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, double* %addr)
+  ret void
+}
+
+
+declare void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, float*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, double*) nounwind readonly
+
+define void @st1_x4_v8i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %addr) {
+; CHECK-LABEL: st1_x4_v8i8:
+; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %addr)
+  ret void
+}
+
+define void @st1_x4_v4i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %addr) {
+; CHECK-LABEL: st1_x4_v4i16:
+; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %addr)
+  ret void
+}
+
+define void @st1_x4_v2i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %addr) {
+; CHECK-LABEL: st1_x4_v2i32:
+; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %addr)
+  ret void
+}
+
+define void @st1_x4_v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, <2 x float> %D, float* %addr) {
+; CHECK-LABEL: st1_x4_v2f32:
+; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  call void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, <2 x float> %D, float* %addr)
+  ret void
+}
+
+define void @st1_x4_v1i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %addr) {
+; CHECK-LABEL: st1_x4_v1i64:
+; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %addr)
+  ret void
+}
+
+define void @st1_x4_v1f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, <1 x double> %D, double* %addr) {
+; CHECK-LABEL: st1_x4_v1f64:
+; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  call void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, <1 x double> %D, double* %addr)
+  ret void
+}
+
+declare void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, float*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly
+declare void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, <2 x double>, double*) nounwind readonly
+
+define void @st1_x4_v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %addr) {
+; CHECK-LABEL: st1_x4_v16i8:
+; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %addr)
+  ret void
+}
+
+define void @st1_x4_v8i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %addr) {
+; CHECK-LABEL: st1_x4_v8i16:
+; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %addr)
+  ret void
+}
+
+define void @st1_x4_v4i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %addr) {
+; CHECK-LABEL: st1_x4_v4i32:
+; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %addr)
+  ret void
+}
+
+define void @st1_x4_v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, <4 x float> %D, float* %addr) {
+; CHECK-LABEL: st1_x4_v4f32:
+; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  call void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, <4 x float> %D, float* %addr)
+  ret void
+}
+
+define void @st1_x4_v2i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %addr) {
+; CHECK-LABEL: st1_x4_v2i64:
+; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %addr)
+  ret void
+}
+
+define void @st1_x4_v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, <2 x double> %D, double* %addr) {
+; CHECK-LABEL: st1_x4_v2f64:
+; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
+  call void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, <2 x double> %D, double* %addr)
+  ret void
+}
diff --git a/test/CodeGen/ARM64/stack-no-frame.ll b/test/CodeGen/AArch64/arm64-stack-no-frame.ll
index b5970c0..b5970c0 100644
--- a/test/CodeGen/ARM64/stack-no-frame.ll
+++ b/test/CodeGen/AArch64/arm64-stack-no-frame.ll
diff --git a/test/CodeGen/ARM64/stackmap.ll b/test/CodeGen/AArch64/arm64-stackmap.ll
index 2c7c6ae..2c7c6ae 100644
--- a/test/CodeGen/ARM64/stackmap.ll
+++ b/test/CodeGen/AArch64/arm64-stackmap.ll
diff --git a/test/CodeGen/AArch64/arm64-stackpointer.ll b/test/CodeGen/AArch64/arm64-stackpointer.ll
new file mode 100644
index 0000000..581faf1
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-stackpointer.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=arm64-linux-gnu | FileCheck %s
+
+define i64 @get_stack() nounwind {
+entry:
+; CHECK-LABEL: get_stack:
+; CHECK: mov   x0, sp
+	%sp = call i64 @llvm.read_register.i64(metadata !0)
+  ret i64 %sp
+}
+
+define void @set_stack(i64 %val) nounwind {
+entry:
+; CHECK-LABEL: set_stack:
+; CHECK: mov   sp, x0
+  call void @llvm.write_register.i64(metadata !0, i64 %val)
+  ret void
+}
+
+declare i64 @llvm.read_register.i64(metadata) nounwind
+declare void @llvm.write_register.i64(metadata, i64) nounwind
+
+; register unsigned long current_stack_pointer asm("sp");
+; CHECK-NOT: .asciz  "sp"
+!0 = metadata !{metadata !"sp\00"}
diff --git a/test/CodeGen/ARM64/stacksave.ll b/test/CodeGen/AArch64/arm64-stacksave.ll
index a79e99b..a79e99b 100644
--- a/test/CodeGen/ARM64/stacksave.ll
+++ b/test/CodeGen/AArch64/arm64-stacksave.ll
diff --git a/test/CodeGen/AArch64/arm64-stp.ll b/test/CodeGen/AArch64/arm64-stp.ll
new file mode 100644
index 0000000..40bdf22
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-stp.ll
@@ -0,0 +1,101 @@
+; RUN: llc < %s -march=arm64 -aarch64-stp-suppress=false -verify-machineinstrs -mcpu=cyclone | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-unscaled-mem-op=true\
+; RUN:   -verify-machineinstrs -mcpu=cyclone | FileCheck -check-prefix=STUR_CHK %s
+
+; CHECK: stp_int
+; CHECK: stp w0, w1, [x2]
+define void @stp_int(i32 %a, i32 %b, i32* nocapture %p) nounwind {
+  store i32 %a, i32* %p, align 4
+  %add.ptr = getelementptr inbounds i32* %p, i64 1
+  store i32 %b, i32* %add.ptr, align 4
+  ret void
+}
+
+; CHECK: stp_long
+; CHECK: stp x0, x1, [x2]
+define void @stp_long(i64 %a, i64 %b, i64* nocapture %p) nounwind {
+  store i64 %a, i64* %p, align 8
+  %add.ptr = getelementptr inbounds i64* %p, i64 1
+  store i64 %b, i64* %add.ptr, align 8
+  ret void
+}
+
+; CHECK: stp_float
+; CHECK: stp s0, s1, [x0]
+define void @stp_float(float %a, float %b, float* nocapture %p) nounwind {
+  store float %a, float* %p, align 4
+  %add.ptr = getelementptr inbounds float* %p, i64 1
+  store float %b, float* %add.ptr, align 4
+  ret void
+}
+
+; CHECK: stp_double
+; CHECK: stp d0, d1, [x0]
+define void @stp_double(double %a, double %b, double* nocapture %p) nounwind {
+  store double %a, double* %p, align 8
+  %add.ptr = getelementptr inbounds double* %p, i64 1
+  store double %b, double* %add.ptr, align 8
+  ret void
+}
+
+; Test the load/store optimizer---combine ldurs into a ldp, if appropriate
+define void @stur_int(i32 %a, i32 %b, i32* nocapture %p) nounwind {
+; STUR_CHK: stur_int
+; STUR_CHK: stp w{{[0-9]+}}, {{w[0-9]+}}, [x{{[0-9]+}}, #-8]
+; STUR_CHK-NEXT: ret
+  %p1 = getelementptr inbounds i32* %p, i32 -1
+  store i32 %a, i32* %p1, align 2
+  %p2 = getelementptr inbounds i32* %p, i32 -2
+  store i32 %b, i32* %p2, align 2
+  ret void
+}
+
+define void @stur_long(i64 %a, i64 %b, i64* nocapture %p) nounwind {
+; STUR_CHK: stur_long
+; STUR_CHK: stp x{{[0-9]+}}, {{x[0-9]+}}, [x{{[0-9]+}}, #-16]
+; STUR_CHK-NEXT: ret
+  %p1 = getelementptr inbounds i64* %p, i32 -1
+  store i64 %a, i64* %p1, align 2
+  %p2 = getelementptr inbounds i64* %p, i32 -2
+  store i64 %b, i64* %p2, align 2
+  ret void
+}
+
+define void @stur_float(float %a, float %b, float* nocapture %p) nounwind {
+; STUR_CHK: stur_float
+; STUR_CHK: stp s{{[0-9]+}}, {{s[0-9]+}}, [x{{[0-9]+}}, #-8]
+; STUR_CHK-NEXT: ret
+  %p1 = getelementptr inbounds float* %p, i32 -1
+  store float %a, float* %p1, align 2
+  %p2 = getelementptr inbounds float* %p, i32 -2
+  store float %b, float* %p2, align 2
+  ret void
+}
+
+define void @stur_double(double %a, double %b, double* nocapture %p) nounwind {
+; STUR_CHK: stur_double
+; STUR_CHK: stp d{{[0-9]+}}, {{d[0-9]+}}, [x{{[0-9]+}}, #-16]
+; STUR_CHK-NEXT: ret
+  %p1 = getelementptr inbounds double* %p, i32 -1
+  store double %a, double* %p1, align 2
+  %p2 = getelementptr inbounds double* %p, i32 -2
+  store double %b, double* %p2, align 2
+  ret void
+}
+
+define void @splat_v4i32(i32 %v, i32 *%p) {
+entry:
+
+; CHECK-LABEL: splat_v4i32
+; CHECK-DAG: stp w0, w0, [x1]
+; CHECK-DAG: stp w0, w0, [x1, #8]
+; CHECK: ret
+
+  %p17 = insertelement <4 x i32> undef, i32 %v, i32 0
+  %p18 = insertelement <4 x i32> %p17, i32 %v, i32 1
+  %p19 = insertelement <4 x i32> %p18, i32 %v, i32 2
+  %p20 = insertelement <4 x i32> %p19, i32 %v, i32 3
+  %p21 = bitcast i32* %p to <4 x i32>*
+  store <4 x i32> %p20, <4 x i32>* %p21, align 4
+  ret void
+}
diff --git a/test/CodeGen/AArch64/arm64-strict-align.ll b/test/CodeGen/AArch64/arm64-strict-align.ll
new file mode 100644
index 0000000..5d13704
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-strict-align.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -mtriple=arm64-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-apple-darwin -aarch64-no-strict-align | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-apple-darwin -aarch64-strict-align | FileCheck %s --check-prefix=CHECK-STRICT
+
+define i32 @f0(i32* nocapture %p) nounwind {
+; CHECK-STRICT: ldrh [[HIGH:w[0-9]+]], [x0, #2]
+; CHECK-STRICT: ldrh [[LOW:w[0-9]+]], [x0]
+; CHECK-STRICT: bfi [[LOW]], [[HIGH]], #16, #16
+; CHECK-STRICT: ret
+
+; CHECK: ldr w0, [x0]
+; CHECK: ret
+  %tmp = load i32* %p, align 2
+  ret i32 %tmp
+}
+
+define i64 @f1(i64* nocapture %p) nounwind {
+; CHECK-STRICT:	ldp	w[[LOW:[0-9]+]], w[[HIGH:[0-9]+]], [x0]
+; CHECK-STRICT: bfi x[[LOW]], x[[HIGH]], #32, #32
+; CHECK-STRICT:	ret
+
+; CHECK: ldr x0, [x0]
+; CHECK: ret
+  %tmp = load i64* %p, align 4
+  ret i64 %tmp
+}
diff --git a/test/CodeGen/AArch64/arm64-stur.ll b/test/CodeGen/AArch64/arm64-stur.ll
new file mode 100644
index 0000000..a2e684d
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-stur.ll
@@ -0,0 +1,98 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -mcpu=cyclone | FileCheck %s
+%struct.X = type <{ i32, i64, i64 }>
+
+define void @foo1(i32* %p, i64 %val) nounwind {
+; CHECK-LABEL: foo1:
+; CHECK: 	stur	w1, [x0, #-4]
+; CHECK-NEXT: 	ret
+  %tmp1 = trunc i64 %val to i32
+  %ptr = getelementptr inbounds i32* %p, i64 -1
+  store i32 %tmp1, i32* %ptr, align 4
+  ret void
+}
+define void @foo2(i16* %p, i64 %val) nounwind {
+; CHECK-LABEL: foo2:
+; CHECK: 	sturh	w1, [x0, #-2]
+; CHECK-NEXT: 	ret
+  %tmp1 = trunc i64 %val to i16
+  %ptr = getelementptr inbounds i16* %p, i64 -1
+  store i16 %tmp1, i16* %ptr, align 2
+  ret void
+}
+define void @foo3(i8* %p, i64 %val) nounwind {
+; CHECK-LABEL: foo3:
+; CHECK: 	sturb	w1, [x0, #-1]
+; CHECK-NEXT: 	ret
+  %tmp1 = trunc i64 %val to i8
+  %ptr = getelementptr inbounds i8* %p, i64 -1
+  store i8 %tmp1, i8* %ptr, align 1
+  ret void
+}
+define void @foo4(i16* %p, i32 %val) nounwind {
+; CHECK-LABEL: foo4:
+; CHECK: 	sturh	w1, [x0, #-2]
+; CHECK-NEXT: 	ret
+  %tmp1 = trunc i32 %val to i16
+  %ptr = getelementptr inbounds i16* %p, i32 -1
+  store i16 %tmp1, i16* %ptr, align 2
+  ret void
+}
+define void @foo5(i8* %p, i32 %val) nounwind {
+; CHECK-LABEL: foo5:
+; CHECK: 	sturb	w1, [x0, #-1]
+; CHECK-NEXT: 	ret
+  %tmp1 = trunc i32 %val to i8
+  %ptr = getelementptr inbounds i8* %p, i32 -1
+  store i8 %tmp1, i8* %ptr, align 1
+  ret void
+}
+
+define void @foo(%struct.X* nocapture %p) nounwind optsize ssp {
+; CHECK-LABEL: foo:
+; CHECK-NOT: str
+; CHECK: stur    xzr, [x0, #12]
+; CHECK-NEXT: stur    xzr, [x0, #4]
+; CHECK-NEXT: ret
+  %B = getelementptr inbounds %struct.X* %p, i64 0, i32 1
+  %val = bitcast i64* %B to i8*
+  call void @llvm.memset.p0i8.i64(i8* %val, i8 0, i64 16, i32 1, i1 false)
+  ret void
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+
+; Unaligned 16b stores are split into 8b stores for performance.
+; radar://15424193
+
+; CHECK-LABEL: unaligned:
+; CHECK-NOT: str q0
+; CHECK: str     d[[REG:[0-9]+]], [x0]
+; CHECK: ext.16b v[[REG2:[0-9]+]], v[[REG]], v[[REG]], #8
+; CHECK: str     d[[REG2]], [x0, #8]
+define void @unaligned(<4 x i32>* %p, <4 x i32> %v) nounwind {
+  store <4 x i32> %v, <4 x i32>* %p, align 4
+  ret void
+}
+
+; CHECK-LABEL: aligned:
+; CHECK: str q0
+define void @aligned(<4 x i32>* %p, <4 x i32> %v) nounwind {
+  store <4 x i32> %v, <4 x i32>* %p
+  ret void
+}
+
+; Don't split one and two byte aligned stores.
+; radar://16349308
+
+; CHECK-LABEL: twobytealign:
+; CHECK: str q0
+define void @twobytealign(<4 x i32>* %p, <4 x i32> %v) nounwind {
+  store <4 x i32> %v, <4 x i32>* %p, align 2
+  ret void
+}
+; CHECK-LABEL: onebytealign:
+; CHECK: str q0
+define void @onebytealign(<4 x i32>* %p, <4 x i32> %v) nounwind {
+  store <4 x i32> %v, <4 x i32>* %p, align 1
+  ret void
+}
diff --git a/test/CodeGen/AArch64/arm64-subsections.ll b/test/CodeGen/AArch64/arm64-subsections.ll
new file mode 100644
index 0000000..316e7c3
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-subsections.ll
@@ -0,0 +1,5 @@
+; RUN: llc -mtriple=arm64-apple-ios7.0 -o - %s | FileCheck %s --check-prefix=CHECK-MACHO
+; RUN: llc -mtriple=arm64-linux-gnu -o - %s | FileCheck %s --check-prefix=CHECK-ELF
+
+; CHECK-MACHO: .subsections_via_symbols
+; CHECK-ELF-NOT: .subsections_via_symbols
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/arm64-subvector-extend.ll b/test/CodeGen/AArch64/arm64-subvector-extend.ll
new file mode 100644
index 0000000..d5a178a
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-subvector-extend.ll
@@ -0,0 +1,141 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s
+
+; Test efficient codegen of vector extends up from legal type to 128 bit
+; and 256 bit vector types.
+
+;-----
+; Vectors of i16.
+;-----
+define <8 x i16> @func1(<8 x i8> %v0) nounwind {
+; CHECK-LABEL: func1:
+; CHECK-NEXT: ushll.8h  v0, v0, #0
+; CHECK-NEXT: ret
+  %r = zext <8 x i8> %v0 to <8 x i16>
+  ret <8 x i16> %r
+}
+
+define <8 x i16> @func2(<8 x i8> %v0) nounwind {
+; CHECK-LABEL: func2:
+; CHECK-NEXT: sshll.8h  v0, v0, #0
+; CHECK-NEXT: ret
+  %r = sext <8 x i8> %v0 to <8 x i16>
+  ret <8 x i16> %r
+}
+
+define <16 x i16> @func3(<16 x i8> %v0) nounwind {
+; CHECK-LABEL: func3:
+; CHECK-NEXT: ushll2.8h  v1, v0, #0
+; CHECK-NEXT: ushll.8h  v0, v0, #0
+; CHECK-NEXT: ret
+  %r = zext <16 x i8> %v0 to <16 x i16>
+  ret <16 x i16> %r
+}
+
+define <16 x i16> @func4(<16 x i8> %v0) nounwind {
+; CHECK-LABEL: func4:
+; CHECK-NEXT: sshll2.8h  v1, v0, #0
+; CHECK-NEXT: sshll.8h  v0, v0, #0
+; CHECK-NEXT: ret
+  %r = sext <16 x i8> %v0 to <16 x i16>
+  ret <16 x i16> %r
+}
+
+;-----
+; Vectors of i32.
+;-----
+
+define <4 x i32> @afunc1(<4 x i16> %v0) nounwind {
+; CHECK-LABEL: afunc1:
+; CHECK-NEXT: ushll.4s v0, v0, #0
+; CHECK-NEXT: ret
+  %r = zext <4 x i16> %v0 to <4 x i32>
+  ret <4 x i32> %r
+}
+
+define <4 x i32> @afunc2(<4 x i16> %v0) nounwind {
+; CHECK-LABEL: afunc2:
+; CHECK-NEXT: sshll.4s v0, v0, #0
+; CHECK-NEXT: ret
+  %r = sext <4 x i16> %v0 to <4 x i32>
+  ret <4 x i32> %r
+}
+
+define <8 x i32> @afunc3(<8 x i16> %v0) nounwind {
+; CHECK-LABEL: afunc3:
+; CHECK-NEXT: ushll2.4s v1, v0, #0
+; CHECK-NEXT: ushll.4s v0, v0, #0
+; CHECK-NEXT: ret
+  %r = zext <8 x i16> %v0 to <8 x i32>
+  ret <8 x i32> %r
+}
+
+define <8 x i32> @afunc4(<8 x i16> %v0) nounwind {
+; CHECK-LABEL: afunc4:
+; CHECK-NEXT: sshll2.4s v1, v0, #0
+; CHECK-NEXT: sshll.4s v0, v0, #0
+; CHECK-NEXT: ret
+  %r = sext <8 x i16> %v0 to <8 x i32>
+  ret <8 x i32> %r
+}
+
+define <8 x i32> @bfunc1(<8 x i8> %v0) nounwind {
+; CHECK-LABEL: bfunc1:
+; CHECK-NEXT: ushll.8h  v0, v0, #0
+; CHECK-NEXT: ushll2.4s v1, v0, #0
+; CHECK-NEXT: ushll.4s  v0, v0, #0
+; CHECK-NEXT: ret
+  %r = zext <8 x i8> %v0 to <8 x i32>
+  ret <8 x i32> %r
+}
+
+define <8 x i32> @bfunc2(<8 x i8> %v0) nounwind {
+; CHECK-LABEL: bfunc2:
+; CHECK-NEXT: sshll.8h  v0, v0, #0
+; CHECK-NEXT: sshll2.4s v1, v0, #0
+; CHECK-NEXT: sshll.4s  v0, v0, #0
+; CHECK-NEXT: ret
+  %r = sext <8 x i8> %v0 to <8 x i32>
+  ret <8 x i32> %r
+}
+
+;-----
+; Vectors of i64.
+;-----
+
+define <4 x i64> @zfunc1(<4 x i32> %v0) nounwind {
+; CHECK-LABEL: zfunc1:
+; CHECK-NEXT: ushll2.2d v1, v0, #0
+; CHECK-NEXT: ushll.2d v0, v0, #0
+; CHECK-NEXT: ret
+  %r = zext <4 x i32> %v0 to <4 x i64>
+  ret <4 x i64> %r
+}
+
+define <4 x i64> @zfunc2(<4 x i32> %v0) nounwind {
+; CHECK-LABEL: zfunc2:
+; CHECK-NEXT: sshll2.2d v1, v0, #0
+; CHECK-NEXT: sshll.2d v0, v0, #0
+; CHECK-NEXT: ret
+  %r = sext <4 x i32> %v0 to <4 x i64>
+  ret <4 x i64> %r
+}
+
+define <4 x i64> @bfunc3(<4 x i16> %v0) nounwind {
+; CHECK-LABEL: func3:
+; CHECK-NEXT: ushll.4s  v0, v0, #0
+; CHECK-NEXT: ushll2.2d v1, v0, #0
+; CHECK-NEXT: ushll.2d  v0, v0, #0
+; CHECK-NEXT: ret
+  %r = zext <4 x i16> %v0 to <4 x i64>
+  ret <4 x i64> %r
+}
+
+define <4 x i64> @cfunc4(<4 x i16> %v0) nounwind {
+; CHECK-LABEL: func4:
+; CHECK-NEXT: sshll.4s  v0, v0, #0
+; CHECK-NEXT: sshll2.2d v1, v0, #0
+; CHECK-NEXT: sshll.2d  v0, v0, #0
+; CHECK-NEXT: ret
+  %r = sext <4 x i16> %v0 to <4 x i64>
+  ret <4 x i64> %r
+}
diff --git a/test/CodeGen/ARM64/swizzle-tbl-i16-layout.ll b/test/CodeGen/AArch64/arm64-swizzle-tbl-i16-layout.ll
index 4ab2bee..4ab2bee 100644
--- a/test/CodeGen/ARM64/swizzle-tbl-i16-layout.ll
+++ b/test/CodeGen/AArch64/arm64-swizzle-tbl-i16-layout.ll
diff --git a/test/CodeGen/AArch64/arm64-tbl.ll b/test/CodeGen/AArch64/arm64-tbl.ll
new file mode 100644
index 0000000..b1ce15a
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-tbl.ll
@@ -0,0 +1,132 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+define <8 x i8> @tbl1_8b(<16 x i8> %A, <8 x i8> %B) nounwind {
+; CHECK: tbl1_8b
+; CHECK: tbl.8b
+  %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %A, <8 x i8> %B)
+  ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @tbl1_16b(<16 x i8> %A, <16 x i8> %B) nounwind {
+; CHECK: tbl1_16b
+; CHECK: tbl.16b
+  %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %A, <16 x i8> %B)
+  ret <16 x i8> %tmp3
+}
+
+define <8 x i8> @tbl2_8b(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) {
+; CHECK: tbl2_8b
+; CHECK: tbl.8b
+  %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C)
+  ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @tbl2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) {
+; CHECK: tbl2_16b
+; CHECK: tbl.16b
+  %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C)
+  ret <16 x i8> %tmp3
+}
+
+define <8 x i8> @tbl3_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) {
+; CHECK: tbl3_8b
+; CHECK: tbl.8b
+  %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D)
+  ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @tbl3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) {
+; CHECK: tbl3_16b
+; CHECK: tbl.16b
+  %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D)
+  ret <16 x i8> %tmp3
+}
+
+define <8 x i8> @tbl4_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) {
+; CHECK: tbl4_8b
+; CHECK: tbl.8b
+  %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E)
+  ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @tbl4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) {
+; CHECK: tbl4_16b
+; CHECK: tbl.16b
+  %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E)
+  ret <16 x i8> %tmp3
+}
+
+declare <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
+
+define <8 x i8> @tbx1_8b(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C) nounwind {
+; CHECK: tbx1_8b
+; CHECK: tbx.8b
+  %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C)
+  ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @tbx1_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) nounwind {
+; CHECK: tbx1_16b
+; CHECK: tbx.16b
+  %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C)
+  ret <16 x i8> %tmp3
+}
+
+define <8 x i8> @tbx2_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) {
+; CHECK: tbx2_8b
+; CHECK: tbx.8b
+  %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D)
+  ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @tbx2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) {
+; CHECK: tbx2_16b
+; CHECK: tbx.16b
+  %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D)
+  ret <16 x i8> %tmp3
+}
+
+define <8 x i8> @tbx3_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) {
+; CHECK: tbx3_8b
+; CHECK: tbx.8b
+  %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(< 8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E)
+  ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @tbx3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) {
+; CHECK: tbx3_16b
+; CHECK: tbx.16b
+  %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E)
+  ret <16 x i8> %tmp3
+}
+
+define <8 x i8> @tbx4_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F) {
+; CHECK: tbx4_8b
+; CHECK: tbx.8b
+  %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F)
+  ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @tbx4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F) {
+; CHECK: tbx4_16b
+; CHECK: tbx.16b
+  %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F)
+  ret <16 x i8> %tmp3
+}
+
+declare <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
+
diff --git a/test/CodeGen/ARM64/this-return.ll b/test/CodeGen/AArch64/arm64-this-return.ll
index 30f5b9b..30f5b9b 100644
--- a/test/CodeGen/ARM64/this-return.ll
+++ b/test/CodeGen/AArch64/arm64-this-return.ll
diff --git a/test/CodeGen/ARM64/tls-darwin.ll b/test/CodeGen/AArch64/arm64-tls-darwin.ll
index 5e8ec33..5e8ec33 100644
--- a/test/CodeGen/ARM64/tls-darwin.ll
+++ b/test/CodeGen/AArch64/arm64-tls-darwin.ll
diff --git a/test/CodeGen/ARM64/tls-dynamic-together.ll b/test/CodeGen/AArch64/arm64-tls-dynamic-together.ll
index 3daae62..3daae62 100644
--- a/test/CodeGen/ARM64/tls-dynamic-together.ll
+++ b/test/CodeGen/AArch64/arm64-tls-dynamic-together.ll
diff --git a/test/CodeGen/ARM64/tls-dynamics.ll b/test/CodeGen/AArch64/arm64-tls-dynamics.ll
index e8a83fd..e8a83fd 100644
--- a/test/CodeGen/ARM64/tls-dynamics.ll
+++ b/test/CodeGen/AArch64/arm64-tls-dynamics.ll
diff --git a/test/CodeGen/ARM64/tls-execs.ll b/test/CodeGen/AArch64/arm64-tls-execs.ll
index f0130d8..f0130d8 100644
--- a/test/CodeGen/ARM64/tls-execs.ll
+++ b/test/CodeGen/AArch64/arm64-tls-execs.ll
diff --git a/test/CodeGen/AArch64/arm64-trap.ll b/test/CodeGen/AArch64/arm64-trap.ll
new file mode 100644
index 0000000..5e99c32
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-trap.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=arm64 | FileCheck %s
+define void @foo() nounwind {
+; CHECK: foo
+; CHECK: brk #0x1
+  tail call void @llvm.trap()
+  ret void
+}
+declare void @llvm.trap() nounwind
diff --git a/test/CodeGen/AArch64/arm64-trn.ll b/test/CodeGen/AArch64/arm64-trn.ll
new file mode 100644
index 0000000..2db7a14
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-trn.ll
@@ -0,0 +1,134 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+define <8 x i8> @vtrni8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: vtrni8:
+;CHECK: trn1.8b
+;CHECK: trn2.8b
+;CHECK-NEXT: add.8b
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+        %tmp5 = add <8 x i8> %tmp3, %tmp4
+	ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vtrni16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: vtrni16:
+;CHECK: trn1.4h
+;CHECK: trn2.4h
+;CHECK-NEXT: add.4h
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+	%tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+        %tmp5 = add <4 x i16> %tmp3, %tmp4
+	ret <4 x i16> %tmp5
+}
+
+; 2xi32 TRN is redundant with ZIP
+define <2 x i32> @vtrni32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: vtrni32:
+;CHECK: zip1.2s
+;CHECK: zip2.2s
+;CHECK-NEXT: add.2s
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> <i32 0, i32 2>
+	%tmp4 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 3>
+        %tmp5 = add <2 x i32> %tmp3, %tmp4
+	ret <2 x i32> %tmp5
+}
+
+define <2 x float> @vtrnf(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK-LABEL: vtrnf:
+;CHECK: zip1.2s
+;CHECK: zip2.2s
+;CHECK-NEXT: fadd.2s
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x i32> <i32 0, i32 2>
+	%tmp4 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x i32> <i32 1, i32 3>
+        %tmp5 = fadd <2 x float> %tmp3, %tmp4
+	ret <2 x float> %tmp5
+}
+
+define <16 x i8> @vtrnQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: vtrnQi8:
+;CHECK: trn1.16b
+;CHECK: trn2.16b
+;CHECK-NEXT: add.16b
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+	%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
+        %tmp5 = add <16 x i8> %tmp3, %tmp4
+	ret <16 x i8> %tmp5
+}
+
+define <8 x i16> @vtrnQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: vtrnQi16:
+;CHECK: trn1.8h
+;CHECK: trn2.8h
+;CHECK-NEXT: add.8h
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+        %tmp5 = add <8 x i16> %tmp3, %tmp4
+	ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vtrnQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: vtrnQi32:
+;CHECK: trn1.4s
+;CHECK: trn2.4s
+;CHECK-NEXT: add.4s
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+	%tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+        %tmp5 = add <4 x i32> %tmp3, %tmp4
+	ret <4 x i32> %tmp5
+}
+
+define <4 x float> @vtrnQf(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK-LABEL: vtrnQf:
+;CHECK: trn1.4s
+;CHECK: trn2.4s
+;CHECK-NEXT: fadd.4s
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+	%tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+        %tmp5 = fadd <4 x float> %tmp3, %tmp4
+	ret <4 x float> %tmp5
+}
+
+; Undef shuffle indices should not prevent matching to VTRN:
+
+define <8 x i8> @vtrni8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: vtrni8_undef:
+;CHECK: trn1.8b
+;CHECK: trn2.8b
+;CHECK-NEXT: add.8b
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 2, i32 10, i32 undef, i32 12, i32 6, i32 14>
+	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 undef, i32 undef, i32 15>
+        %tmp5 = add <8 x i8> %tmp3, %tmp4
+	ret <8 x i8> %tmp5
+}
+
+define <8 x i16> @vtrnQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: vtrnQi16_undef:
+;CHECK: trn1.8h
+;CHECK: trn2.8h
+;CHECK-NEXT: add.8h
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 undef, i32 undef, i32 4, i32 12, i32 6, i32 14>
+	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 undef, i32 3, i32 11, i32 5, i32 13, i32 undef, i32 undef>
+        %tmp5 = add <8 x i16> %tmp3, %tmp4
+	ret <8 x i16> %tmp5
+}
diff --git a/test/CodeGen/AArch64/arm64-trunc-store.ll b/test/CodeGen/AArch64/arm64-trunc-store.ll
new file mode 100644
index 0000000..cf15247
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-trunc-store.ll
@@ -0,0 +1,75 @@
+; RUN: llc < %s -mtriple=arm64-apple-ios7.0 | FileCheck %s
+
+define void @bar(<8 x i16> %arg, <8 x i8>* %p) nounwind {
+; CHECK-LABEL: bar:
+; CHECK: xtn.8b v[[REG:[0-9]+]], v0
+; CHECK-NEXT: str d[[REG]], [x0]
+; CHECK-NEXT: ret
+  %tmp = trunc <8 x i16> %arg to <8 x i8>
+  store <8 x i8> %tmp, <8 x i8>* %p, align 8
+  ret void
+}
+
+@zptr8 = common global i8* null, align 8
+@zptr16 = common global i16* null, align 8
+@zptr32 = common global i32* null, align 8
+
+define void @fct32(i32 %arg, i64 %var) {
+; CHECK: fct32
+; CHECK: adrp [[GLOBALPAGE:x[0-9]+]], _zptr32@GOTPAGE
+; CHECK: ldr [[GLOBALOFF:x[0-9]+]], {{\[}}[[GLOBALPAGE]], _zptr32@GOTPAGEOFF]
+; CHECK: ldr [[GLOBALADDR:x[0-9]+]], {{\[}}[[GLOBALOFF]]]
+; w0 is %arg
+; CHECK-NEXT: sub w[[OFFSETREGNUM:[0-9]+]], w0, #1
+; w1 is %var truncated
+; CHECK-NEXT: str w1, {{\[}}[[GLOBALADDR]], w[[OFFSETREGNUM]], sxtw #2]
+; CHECK-NEXT: ret
+bb:
+  %.pre37 = load i32** @zptr32, align 8
+  %dec = add nsw i32 %arg, -1
+  %idxprom8 = sext i32 %dec to i64
+  %arrayidx9 = getelementptr inbounds i32* %.pre37, i64 %idxprom8
+  %tmp = trunc i64 %var to i32
+  store i32 %tmp, i32* %arrayidx9, align 4
+  ret void
+}
+
+define void @fct16(i32 %arg, i64 %var) {
+; CHECK: fct16
+; CHECK: adrp [[GLOBALPAGE:x[0-9]+]], _zptr16@GOTPAGE
+; CHECK: ldr [[GLOBALOFF:x[0-9]+]], {{\[}}[[GLOBALPAGE]], _zptr16@GOTPAGEOFF]
+; CHECK: ldr [[GLOBALADDR:x[0-9]+]], {{\[}}[[GLOBALOFF]]]
+; w0 is %arg
+; CHECK-NEXT: sub w[[OFFSETREGNUM:[0-9]+]], w0, #1
+; w1 is %var truncated
+; CHECK-NEXT: strh w1, {{\[}}[[GLOBALADDR]], w[[OFFSETREGNUM]], sxtw #1]
+; CHECK-NEXT: ret
+bb:
+  %.pre37 = load i16** @zptr16, align 8
+  %dec = add nsw i32 %arg, -1
+  %idxprom8 = sext i32 %dec to i64
+  %arrayidx9 = getelementptr inbounds i16* %.pre37, i64 %idxprom8
+  %tmp = trunc i64 %var to i16
+  store i16 %tmp, i16* %arrayidx9, align 4
+  ret void
+}
+
+define void @fct8(i32 %arg, i64 %var) {
+; CHECK: fct8
+; CHECK: adrp [[GLOBALPAGE:x[0-9]+]], _zptr8@GOTPAGE
+; CHECK: ldr [[GLOBALOFF:x[0-9]+]], {{\[}}[[GLOBALPAGE]], _zptr8@GOTPAGEOFF]
+; CHECK: ldr [[BASEADDR:x[0-9]+]], {{\[}}[[GLOBALOFF]]]
+; w0 is %arg
+; CHECK-NEXT: add [[ADDR:x[0-9]+]], [[BASEADDR]], w0, sxtw
+; w1 is %var truncated
+; CHECK-NEXT: sturb w1, {{\[}}[[ADDR]], #-1]
+; CHECK-NEXT: ret
+bb:
+  %.pre37 = load i8** @zptr8, align 8
+  %dec = add nsw i32 %arg, -1
+  %idxprom8 = sext i32 %dec to i64
+  %arrayidx9 = getelementptr inbounds i8* %.pre37, i64 %idxprom8
+  %tmp = trunc i64 %var to i8
+  store i8 %tmp, i8* %arrayidx9, align 4
+  ret void
+}
diff --git a/test/CodeGen/AArch64/arm64-umaxv.ll b/test/CodeGen/AArch64/arm64-umaxv.ll
new file mode 100644
index 0000000..d523f31
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-umaxv.ll
@@ -0,0 +1,92 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+define i32 @vmax_u8x8(<8 x i8> %a) nounwind ssp {
+; CHECK-LABEL: vmax_u8x8:
+; CHECK: umaxv.8b        b[[REG:[0-9]+]], v0
+; CHECK: fmov    [[REG2:w[0-9]+]], s[[REG]]
+; CHECK-NOT: and
+; CHECK: cbz     [[REG2]],
+entry:
+  %vmaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8> %a) nounwind
+  %tmp = trunc i32 %vmaxv.i to i8
+  %tobool = icmp eq i8 %tmp, 0
+  br i1 %tobool, label %return, label %if.then
+
+if.then:
+  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() nounwind
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
+  ret i32 %retval.0
+}
+
+declare i32 @bar(...)
+
+define i32 @vmax_u4x16(<4 x i16> %a) nounwind ssp {
+; CHECK-LABEL: vmax_u4x16:
+; CHECK: umaxv.4h        h[[REG:[0-9]+]], v0
+; CHECK: fmov    [[REG2:w[0-9]+]], s[[REG]]
+; CHECK-NOT: and
+; CHECK: cbz     [[REG2]],
+entry:
+  %vmaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16> %a) nounwind
+  %tmp = trunc i32 %vmaxv.i to i16
+  %tobool = icmp eq i16 %tmp, 0
+  br i1 %tobool, label %return, label %if.then
+
+if.then:
+  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() nounwind
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
+  ret i32 %retval.0
+}
+
+define i32 @vmax_u8x16(<8 x i16> %a) nounwind ssp {
+; CHECK-LABEL: vmax_u8x16:
+; CHECK: umaxv.8h        h[[REG:[0-9]+]], v0
+; CHECK: fmov    [[REG2:w[0-9]+]], s[[REG]]
+; CHECK-NOT: and
+; CHECK: cbz     [[REG2]],
+entry:
+  %vmaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16> %a) nounwind
+  %tmp = trunc i32 %vmaxv.i to i16
+  %tobool = icmp eq i16 %tmp, 0
+  br i1 %tobool, label %return, label %if.then
+
+if.then:
+  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() nounwind
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
+  ret i32 %retval.0
+}
+
+define i32 @vmax_u16x8(<16 x i8> %a) nounwind ssp {
+; CHECK-LABEL: vmax_u16x8:
+; CHECK: umaxv.16b        b[[REG:[0-9]+]], v0
+; CHECK: fmov     [[REG2:w[0-9]+]], s[[REG]]
+; CHECK-NOT: and
+; CHECK: cbz     [[REG2]],
+entry:
+  %vmaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8> %a) nounwind
+  %tmp = trunc i32 %vmaxv.i to i8
+  %tobool = icmp eq i8 %tmp, 0
+  br i1 %tobool, label %return, label %if.then
+
+if.then:
+  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() nounwind
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
+  ret i32 %retval.0
+}
+
+declare i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8>) nounwind readnone
+declare i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16>) nounwind readnone
+declare i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16>) nounwind readnone
+declare i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8>) nounwind readnone
diff --git a/test/CodeGen/AArch64/arm64-uminv.ll b/test/CodeGen/AArch64/arm64-uminv.ll
new file mode 100644
index 0000000..3bade4b
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-uminv.ll
@@ -0,0 +1,92 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+define i32 @vmin_u8x8(<8 x i8> %a) nounwind ssp {
+; CHECK-LABEL: vmin_u8x8:
+; CHECK: uminv.8b        b[[REG:[0-9]+]], v0
+; CHECK: fmov    [[REG2:w[0-9]+]], s[[REG]]
+; CHECK-NOT: and
+; CHECK: cbz     [[REG2]],
+entry:
+  %vminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8> %a) nounwind
+  %tmp = trunc i32 %vminv.i to i8
+  %tobool = icmp eq i8 %tmp, 0
+  br i1 %tobool, label %return, label %if.then
+
+if.then:
+  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() nounwind
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
+  ret i32 %retval.0
+}
+
+declare i32 @bar(...)
+
+define i32 @vmin_u4x16(<4 x i16> %a) nounwind ssp {
+; CHECK-LABEL: vmin_u4x16:
+; CHECK: uminv.4h        h[[REG:[0-9]+]], v0
+; CHECK: fmov    [[REG2:w[0-9]+]], s[[REG]]
+; CHECK-NOT: and
+; CHECK: cbz     [[REG2]],
+entry:
+  %vminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16> %a) nounwind
+  %tmp = trunc i32 %vminv.i to i16
+  %tobool = icmp eq i16 %tmp, 0
+  br i1 %tobool, label %return, label %if.then
+
+if.then:
+  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() nounwind
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
+  ret i32 %retval.0
+}
+
+define i32 @vmin_u8x16(<8 x i16> %a) nounwind ssp {
+; CHECK-LABEL: vmin_u8x16:
+; CHECK: uminv.8h        h[[REG:[0-9]+]], v0
+; CHECK: fmov    [[REG2:w[0-9]+]], s[[REG]]
+; CHECK-NOT: and
+; CHECK: cbz     [[REG2]],
+entry:
+  %vminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16> %a) nounwind
+  %tmp = trunc i32 %vminv.i to i16
+  %tobool = icmp eq i16 %tmp, 0
+  br i1 %tobool, label %return, label %if.then
+
+if.then:
+  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() nounwind
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
+  ret i32 %retval.0
+}
+
+define i32 @vmin_u16x8(<16 x i8> %a) nounwind ssp {
+; CHECK-LABEL: vmin_u16x8:
+; CHECK: uminv.16b        b[[REG:[0-9]+]], v0
+; CHECK: fmov     [[REG2:w[0-9]+]], s[[REG]]
+; CHECK-NOT: and
+; CHECK: cbz     [[REG2]],
+entry:
+  %vminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8> %a) nounwind
+  %tmp = trunc i32 %vminv.i to i8
+  %tobool = icmp eq i8 %tmp, 0
+  br i1 %tobool, label %return, label %if.then
+
+if.then:
+  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() nounwind
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
+  ret i32 %retval.0
+}
+
+declare i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8>) nounwind readnone
+declare i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16>) nounwind readnone
+declare i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16>) nounwind readnone
+declare i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8>) nounwind readnone
diff --git a/test/CodeGen/AArch64/arm64-umov.ll b/test/CodeGen/AArch64/arm64-umov.ll
new file mode 100644
index 0000000..a1ef990
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-umov.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+define zeroext i8 @f1(<16 x i8> %a) {
+; CHECK-LABEL: f1:
+; CHECK: mov.b w0, v0[3]
+; CHECK-NEXT: ret
+  %vecext = extractelement <16 x i8> %a, i32 3
+  ret i8 %vecext
+}
+
+define zeroext i16 @f2(<4 x i16> %a) {
+; CHECK-LABEL: f2:
+; CHECK: mov.h w0, v0[2]
+; CHECK-NEXT: ret
+  %vecext = extractelement <4 x i16> %a, i32 2
+  ret i16 %vecext
+}
+
+define i32 @f3(<2 x i32> %a) {
+; CHECK-LABEL: f3:
+; CHECK: mov.s w0, v0[1]
+; CHECK-NEXT: ret
+  %vecext = extractelement <2 x i32> %a, i32 1
+  ret i32 %vecext
+}
+
+define i64 @f4(<2 x i64> %a) {
+; CHECK-LABEL: f4:
+; CHECK: mov.d x0, v0[1]
+; CHECK-NEXT: ret
+  %vecext = extractelement <2 x i64> %a, i32 1
+  ret i64 %vecext
+}
diff --git a/test/CodeGen/ARM64/unaligned_ldst.ll b/test/CodeGen/AArch64/arm64-unaligned_ldst.ll
index 20b80c0..20b80c0 100644
--- a/test/CodeGen/ARM64/unaligned_ldst.ll
+++ b/test/CodeGen/AArch64/arm64-unaligned_ldst.ll
diff --git a/test/CodeGen/AArch64/arm64-uzp.ll b/test/CodeGen/AArch64/arm64-uzp.ll
new file mode 100644
index 0000000..cdd8d31
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-uzp.ll
@@ -0,0 +1,107 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+define <8 x i8> @vuzpi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: vuzpi8:
+;CHECK: uzp1.8b
+;CHECK: uzp2.8b
+;CHECK-NEXT: add.8b
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+        %tmp5 = add <8 x i8> %tmp3, %tmp4
+	ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vuzpi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: vuzpi16:
+;CHECK: uzp1.4h
+;CHECK: uzp2.4h
+;CHECK-NEXT: add.4h
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+	%tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+        %tmp5 = add <4 x i16> %tmp3, %tmp4
+	ret <4 x i16> %tmp5
+}
+
+define <16 x i8> @vuzpQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: vuzpQi8:
+;CHECK: uzp1.16b
+;CHECK: uzp2.16b
+;CHECK-NEXT: add.16b
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+	%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+        %tmp5 = add <16 x i8> %tmp3, %tmp4
+	ret <16 x i8> %tmp5
+}
+
+define <8 x i16> @vuzpQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: vuzpQi16:
+;CHECK: uzp1.8h
+;CHECK: uzp2.8h
+;CHECK-NEXT: add.8h
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+        %tmp5 = add <8 x i16> %tmp3, %tmp4
+	ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vuzpQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: vuzpQi32:
+;CHECK: uzp1.4s
+;CHECK: uzp2.4s
+;CHECK-NEXT: add.4s
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+	%tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+        %tmp5 = add <4 x i32> %tmp3, %tmp4
+	ret <4 x i32> %tmp5
+}
+
+define <4 x float> @vuzpQf(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK-LABEL: vuzpQf:
+;CHECK: uzp1.4s
+;CHECK: uzp2.4s
+;CHECK-NEXT: fadd.4s
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+	%tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+        %tmp5 = fadd <4 x float> %tmp3, %tmp4
+	ret <4 x float> %tmp5
+}
+
+; Undef shuffle indices should not prevent matching to VUZP:
+
+define <8 x i8> @vuzpi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: vuzpi8_undef:
+;CHECK: uzp1.8b
+;CHECK: uzp2.8b
+;CHECK-NEXT: add.8b
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 8, i32 10, i32 12, i32 14>
+	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 13, i32 15>
+        %tmp5 = add <8 x i8> %tmp3, %tmp4
+	ret <8 x i8> %tmp5
+}
+
+define <8 x i16> @vuzpQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: vuzpQi16_undef:
+;CHECK: uzp1.8h
+;CHECK: uzp2.8h
+;CHECK-NEXT: add.8h
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 14>
+	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 undef, i32 undef, i32 11, i32 13, i32 15>
+        %tmp5 = add <8 x i16> %tmp3, %tmp4
+	ret <8 x i16> %tmp5
+}
diff --git a/test/CodeGen/ARM64/vaargs.ll b/test/CodeGen/AArch64/arm64-vaargs.ll
index ce07635..ce07635 100644
--- a/test/CodeGen/ARM64/vaargs.ll
+++ b/test/CodeGen/AArch64/arm64-vaargs.ll
diff --git a/test/CodeGen/AArch64/arm64-vabs.ll b/test/CodeGen/AArch64/arm64-vabs.ll
new file mode 100644
index 0000000..5afc8d9
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-vabs.ll
@@ -0,0 +1,804 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+
+define <8 x i16> @sabdl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: sabdl8h:
+;CHECK: sabdl.8h
+        %tmp1 = load <8 x i8>* %A
+        %tmp2 = load <8 x i8>* %B
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+        %tmp4 = zext <8 x i8> %tmp3 to <8 x i16>
+        ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @sabdl4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: sabdl4s:
+;CHECK: sabdl.4s
+        %tmp1 = load <4 x i16>* %A
+        %tmp2 = load <4 x i16>* %B
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+        %tmp4 = zext <4 x i16> %tmp3 to <4 x i32>
+        ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @sabdl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: sabdl2d:
+;CHECK: sabdl.2d
+        %tmp1 = load <2 x i32>* %A
+        %tmp2 = load <2 x i32>* %B
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+        %tmp4 = zext <2 x i32> %tmp3 to <2 x i64>
+        ret <2 x i64> %tmp4
+}
+
+define <8 x i16> @sabdl2_8h(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: sabdl2_8h:
+;CHECK: sabdl2.8h
+        %load1 = load <16 x i8>* %A
+        %load2 = load <16 x i8>* %B
+        %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+        %tmp2 = shufflevector <16 x i8> %load2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+        %tmp4 = zext <8 x i8> %tmp3 to <8 x i16>
+        ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @sabdl2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: sabdl2_4s:
+;CHECK: sabdl2.4s
+        %load1 = load <8 x i16>* %A
+        %load2 = load <8 x i16>* %B
+        %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+        %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+        %tmp4 = zext <4 x i16> %tmp3 to <4 x i32>
+        ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @sabdl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: sabdl2_2d:
+;CHECK: sabdl2.2d
+        %load1 = load <4 x i32>* %A
+        %load2 = load <4 x i32>* %B
+        %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+        %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+        %tmp4 = zext <2 x i32> %tmp3 to <2 x i64>
+        ret <2 x i64> %tmp4
+}
+
+define <8 x i16> @uabdl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: uabdl8h:
+;CHECK: uabdl.8h
+  %tmp1 = load <8 x i8>* %A
+  %tmp2 = load <8 x i8>* %B
+  %tmp3 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+  %tmp4 = zext <8 x i8> %tmp3 to <8 x i16>
+  ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @uabdl4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: uabdl4s:
+;CHECK: uabdl.4s
+  %tmp1 = load <4 x i16>* %A
+  %tmp2 = load <4 x i16>* %B
+  %tmp3 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  %tmp4 = zext <4 x i16> %tmp3 to <4 x i32>
+  ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @uabdl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: uabdl2d:
+;CHECK: uabdl.2d
+  %tmp1 = load <2 x i32>* %A
+  %tmp2 = load <2 x i32>* %B
+  %tmp3 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  %tmp4 = zext <2 x i32> %tmp3 to <2 x i64>
+  ret <2 x i64> %tmp4
+}
+
+define <8 x i16> @uabdl2_8h(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: uabdl2_8h:
+;CHECK: uabdl2.8h
+  %load1 = load <16 x i8>* %A
+  %load2 = load <16 x i8>* %B
+  %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %tmp2 = shufflevector <16 x i8> %load2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+
+  %tmp3 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+  %tmp4 = zext <8 x i8> %tmp3 to <8 x i16>
+  ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @uabdl2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: uabdl2_4s:
+;CHECK: uabdl2.4s
+  %load1 = load <8 x i16>* %A
+  %load2 = load <8 x i16>* %B
+  %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %tmp3 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  %tmp4 = zext <4 x i16> %tmp3 to <4 x i32>
+  ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @uabdl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: uabdl2_2d:
+;CHECK: uabdl2.2d
+  %load1 = load <4 x i32>* %A
+  %load2 = load <4 x i32>* %B
+  %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %tmp3 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  %tmp4 = zext <2 x i32> %tmp3 to <2 x i64>
+  ret <2 x i64> %tmp4
+}
+
+define <2 x float> @fabd_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK-LABEL: fabd_2s:
+;CHECK: fabd.2s
+        %tmp1 = load <2 x float>* %A
+        %tmp2 = load <2 x float>* %B
+        %tmp3 = call <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+        ret <2 x float> %tmp3
+}
+
+define <4 x float> @fabd_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK-LABEL: fabd_4s:
+;CHECK: fabd.4s
+        %tmp1 = load <4 x float>* %A
+        %tmp2 = load <4 x float>* %B
+        %tmp3 = call <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+        ret <4 x float> %tmp3
+}
+
+define <2 x double> @fabd_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
+;CHECK-LABEL: fabd_2d:
+;CHECK: fabd.2d
+        %tmp1 = load <2 x double>* %A
+        %tmp2 = load <2 x double>* %B
+        %tmp3 = call <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
+        ret <2 x double> %tmp3
+}
+
+declare <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double>, <2 x double>) nounwind readnone
+
+define <8 x i8> @sabd_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: sabd_8b:
+;CHECK: sabd.8b
+        %tmp1 = load <8 x i8>* %A
+        %tmp2 = load <8 x i8>* %B
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+        ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @sabd_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: sabd_16b:
+;CHECK: sabd.16b
+        %tmp1 = load <16 x i8>* %A
+        %tmp2 = load <16 x i8>* %B
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+        ret <16 x i8> %tmp3
+}
+
+define <4 x i16> @sabd_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: sabd_4h:
+;CHECK: sabd.4h
+        %tmp1 = load <4 x i16>* %A
+        %tmp2 = load <4 x i16>* %B
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+        ret <4 x i16> %tmp3
+}
+
+define <8 x i16> @sabd_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: sabd_8h:
+;CHECK: sabd.8h
+        %tmp1 = load <8 x i16>* %A
+        %tmp2 = load <8 x i16>* %B
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+        ret <8 x i16> %tmp3
+}
+
+define <2 x i32> @sabd_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: sabd_2s:
+;CHECK: sabd.2s
+        %tmp1 = load <2 x i32>* %A
+        %tmp2 = load <2 x i32>* %B
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+        ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @sabd_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: sabd_4s:
+;CHECK: sabd.4s
+        %tmp1 = load <4 x i32>* %A
+        %tmp2 = load <4 x i32>* %B
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+        ret <4 x i32> %tmp3
+}
+
+declare <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <8 x i8> @uabd_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: uabd_8b:
+;CHECK: uabd.8b
+        %tmp1 = load <8 x i8>* %A
+        %tmp2 = load <8 x i8>* %B
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+        ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @uabd_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: uabd_16b:
+;CHECK: uabd.16b
+        %tmp1 = load <16 x i8>* %A
+        %tmp2 = load <16 x i8>* %B
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+        ret <16 x i8> %tmp3
+}
+
+define <4 x i16> @uabd_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: uabd_4h:
+;CHECK: uabd.4h
+        %tmp1 = load <4 x i16>* %A
+        %tmp2 = load <4 x i16>* %B
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+        ret <4 x i16> %tmp3
+}
+
+define <8 x i16> @uabd_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: uabd_8h:
+;CHECK: uabd.8h
+        %tmp1 = load <8 x i16>* %A
+        %tmp2 = load <8 x i16>* %B
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+        ret <8 x i16> %tmp3
+}
+
+define <2 x i32> @uabd_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: uabd_2s:
+;CHECK: uabd.2s
+        %tmp1 = load <2 x i32>* %A
+        %tmp2 = load <2 x i32>* %B
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+        ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @uabd_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: uabd_4s:
+;CHECK: uabd.4s
+        %tmp1 = load <4 x i32>* %A
+        %tmp2 = load <4 x i32>* %B
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+        ret <4 x i32> %tmp3
+}
+
+declare <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <8 x i8> @sqabs_8b(<8 x i8>* %A) nounwind {
+;CHECK-LABEL: sqabs_8b:
+;CHECK: sqabs.8b
+        %tmp1 = load <8 x i8>* %A
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqabs.v8i8(<8 x i8> %tmp1)
+        ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @sqabs_16b(<16 x i8>* %A) nounwind {
+;CHECK-LABEL: sqabs_16b:
+;CHECK: sqabs.16b
+        %tmp1 = load <16 x i8>* %A
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqabs.v16i8(<16 x i8> %tmp1)
+        ret <16 x i8> %tmp3
+}
+
+define <4 x i16> @sqabs_4h(<4 x i16>* %A) nounwind {
+;CHECK-LABEL: sqabs_4h:
+;CHECK: sqabs.4h
+        %tmp1 = load <4 x i16>* %A
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqabs.v4i16(<4 x i16> %tmp1)
+        ret <4 x i16> %tmp3
+}
+
+define <8 x i16> @sqabs_8h(<8 x i16>* %A) nounwind {
+;CHECK-LABEL: sqabs_8h:
+;CHECK: sqabs.8h
+        %tmp1 = load <8 x i16>* %A
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqabs.v8i16(<8 x i16> %tmp1)
+        ret <8 x i16> %tmp3
+}
+
+define <2 x i32> @sqabs_2s(<2 x i32>* %A) nounwind {
+;CHECK-LABEL: sqabs_2s:
+;CHECK: sqabs.2s
+        %tmp1 = load <2 x i32>* %A
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqabs.v2i32(<2 x i32> %tmp1)
+        ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @sqabs_4s(<4 x i32>* %A) nounwind {
+;CHECK-LABEL: sqabs_4s:
+;CHECK: sqabs.4s
+        %tmp1 = load <4 x i32>* %A
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqabs.v4i32(<4 x i32> %tmp1)
+        ret <4 x i32> %tmp3
+}
+
+declare <8 x i8> @llvm.aarch64.neon.sqabs.v8i8(<8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.sqabs.v16i8(<16 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.sqabs.v4i16(<4 x i16>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.sqabs.v8i16(<8 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqabs.v2i32(<2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.sqabs.v4i32(<4 x i32>) nounwind readnone
+
+define <8 x i8> @sqneg_8b(<8 x i8>* %A) nounwind {
+;CHECK-LABEL: sqneg_8b:
+;CHECK: sqneg.8b
+        %tmp1 = load <8 x i8>* %A
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqneg.v8i8(<8 x i8> %tmp1)
+        ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @sqneg_16b(<16 x i8>* %A) nounwind {
+;CHECK-LABEL: sqneg_16b:
+;CHECK: sqneg.16b
+        %tmp1 = load <16 x i8>* %A
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqneg.v16i8(<16 x i8> %tmp1)
+        ret <16 x i8> %tmp3
+}
+
+define <4 x i16> @sqneg_4h(<4 x i16>* %A) nounwind {
+;CHECK-LABEL: sqneg_4h:
+;CHECK: sqneg.4h
+        %tmp1 = load <4 x i16>* %A
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqneg.v4i16(<4 x i16> %tmp1)
+        ret <4 x i16> %tmp3
+}
+
+define <8 x i16> @sqneg_8h(<8 x i16>* %A) nounwind {
+;CHECK-LABEL: sqneg_8h:
+;CHECK: sqneg.8h
+        %tmp1 = load <8 x i16>* %A
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqneg.v8i16(<8 x i16> %tmp1)
+        ret <8 x i16> %tmp3
+}
+
+define <2 x i32> @sqneg_2s(<2 x i32>* %A) nounwind {
+;CHECK-LABEL: sqneg_2s:
+;CHECK: sqneg.2s
+        %tmp1 = load <2 x i32>* %A
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqneg.v2i32(<2 x i32> %tmp1)
+        ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @sqneg_4s(<4 x i32>* %A) nounwind {
+;CHECK-LABEL: sqneg_4s:
+;CHECK: sqneg.4s
+        %tmp1 = load <4 x i32>* %A
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqneg.v4i32(<4 x i32> %tmp1)
+        ret <4 x i32> %tmp3
+}
+
+declare <8 x i8> @llvm.aarch64.neon.sqneg.v8i8(<8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.sqneg.v16i8(<16 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.sqneg.v4i16(<4 x i16>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.sqneg.v8i16(<8 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqneg.v2i32(<2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.sqneg.v4i32(<4 x i32>) nounwind readnone
+
+define <8 x i8> @abs_8b(<8 x i8>* %A) nounwind {
+;CHECK-LABEL: abs_8b:
+;CHECK: abs.8b
+        %tmp1 = load <8 x i8>* %A
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.abs.v8i8(<8 x i8> %tmp1)
+        ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @abs_16b(<16 x i8>* %A) nounwind {
+;CHECK-LABEL: abs_16b:
+;CHECK: abs.16b
+        %tmp1 = load <16 x i8>* %A
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.abs.v16i8(<16 x i8> %tmp1)
+        ret <16 x i8> %tmp3
+}
+
+define <4 x i16> @abs_4h(<4 x i16>* %A) nounwind {
+;CHECK-LABEL: abs_4h:
+;CHECK: abs.4h
+        %tmp1 = load <4 x i16>* %A
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.abs.v4i16(<4 x i16> %tmp1)
+        ret <4 x i16> %tmp3
+}
+
+define <8 x i16> @abs_8h(<8 x i16>* %A) nounwind {
+;CHECK-LABEL: abs_8h:
+;CHECK: abs.8h
+        %tmp1 = load <8 x i16>* %A
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.abs.v8i16(<8 x i16> %tmp1)
+        ret <8 x i16> %tmp3
+}
+
+define <2 x i32> @abs_2s(<2 x i32>* %A) nounwind {
+;CHECK-LABEL: abs_2s:
+;CHECK: abs.2s
+        %tmp1 = load <2 x i32>* %A
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.abs.v2i32(<2 x i32> %tmp1)
+        ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @abs_4s(<4 x i32>* %A) nounwind {
+;CHECK-LABEL: abs_4s:
+;CHECK: abs.4s
+        %tmp1 = load <4 x i32>* %A
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.abs.v4i32(<4 x i32> %tmp1)
+        ret <4 x i32> %tmp3
+}
+
+define <1 x i64> @abs_1d(<1 x i64> %A) nounwind {
+; CHECK-LABEL: abs_1d:
+; CHECK: abs d0, d0
+  %abs = call <1 x i64> @llvm.aarch64.neon.abs.v1i64(<1 x i64> %A)
+  ret <1 x i64> %abs
+}
+
+define i64 @abs_1d_honestly(i64 %A) nounwind {
+; CHECK-LABEL: abs_1d_honestly:
+; CHECK: abs d0, d0
+  %abs = call i64 @llvm.aarch64.neon.abs.i64(i64 %A)
+  ret i64 %abs
+}
+
+declare <8 x i8> @llvm.aarch64.neon.abs.v8i8(<8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.abs.v16i8(<16 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.abs.v4i16(<4 x i16>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.abs.v8i16(<8 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.abs.v2i32(<2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.abs.v4i32(<4 x i32>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.abs.v1i64(<1 x i64>) nounwind readnone
+declare i64 @llvm.aarch64.neon.abs.i64(i64) nounwind readnone
+
+define <8 x i16> @sabal8h(<8 x i8>* %A, <8 x i8>* %B,  <8 x i16>* %C) nounwind {
+;CHECK-LABEL: sabal8h:
+;CHECK: sabal.8h
+        %tmp1 = load <8 x i8>* %A
+        %tmp2 = load <8 x i8>* %B
+        %tmp3 = load <8 x i16>* %C
+        %tmp4 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+        %tmp4.1 = zext <8 x i8> %tmp4 to <8 x i16>
+        %tmp5 = add <8 x i16> %tmp3, %tmp4.1
+        ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @sabal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
+;CHECK-LABEL: sabal4s:
+;CHECK: sabal.4s
+        %tmp1 = load <4 x i16>* %A
+        %tmp2 = load <4 x i16>* %B
+        %tmp3 = load <4 x i32>* %C
+        %tmp4 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+        %tmp4.1 = zext <4 x i16> %tmp4 to <4 x i32>
+        %tmp5 = add <4 x i32> %tmp3, %tmp4.1
+        ret <4 x i32> %tmp5
+}
+
+define <2 x i64> @sabal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
+;CHECK-LABEL: sabal2d:
+;CHECK: sabal.2d
+        %tmp1 = load <2 x i32>* %A
+        %tmp2 = load <2 x i32>* %B
+        %tmp3 = load <2 x i64>* %C
+        %tmp4 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+        %tmp4.1 = zext <2 x i32> %tmp4 to <2 x i64>
+        %tmp4.1.1 = zext <2 x i32> %tmp4 to <2 x i64>
+        %tmp5 = add <2 x i64> %tmp3, %tmp4.1
+        ret <2 x i64> %tmp5
+}
+
+define <8 x i16> @sabal2_8h(<16 x i8>* %A, <16 x i8>* %B, <8 x i16>* %C) nounwind {
+;CHECK-LABEL: sabal2_8h:
+;CHECK: sabal2.8h
+        %load1 = load <16 x i8>* %A
+        %load2 = load <16 x i8>* %B
+        %tmp3 = load <8 x i16>* %C
+        %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+        %tmp2 = shufflevector <16 x i8> %load2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+        %tmp4 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+        %tmp4.1 = zext <8 x i8> %tmp4 to <8 x i16>
+        %tmp5 = add <8 x i16> %tmp3, %tmp4.1
+        ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @sabal2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwind {
+;CHECK-LABEL: sabal2_4s:
+;CHECK: sabal2.4s
+        %load1 = load <8 x i16>* %A
+        %load2 = load <8 x i16>* %B
+        %tmp3 = load <4 x i32>* %C
+        %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+        %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+        %tmp4 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+        %tmp4.1 = zext <4 x i16> %tmp4 to <4 x i32>
+        %tmp5 = add <4 x i32> %tmp3, %tmp4.1
+        ret <4 x i32> %tmp5
+}
+
+define <2 x i64> @sabal2_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwind {
+;CHECK-LABEL: sabal2_2d:
+;CHECK: sabal2.2d
+        %load1 = load <4 x i32>* %A
+        %load2 = load <4 x i32>* %B
+        %tmp3 = load <2 x i64>* %C
+        %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+        %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+        %tmp4 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+        %tmp4.1 = zext <2 x i32> %tmp4 to <2 x i64>
+        %tmp5 = add <2 x i64> %tmp3, %tmp4.1
+        ret <2 x i64> %tmp5
+}
+
+define <8 x i16> @uabal8h(<8 x i8>* %A, <8 x i8>* %B,  <8 x i16>* %C) nounwind {
+;CHECK-LABEL: uabal8h:
+;CHECK: uabal.8h
+        %tmp1 = load <8 x i8>* %A
+        %tmp2 = load <8 x i8>* %B
+        %tmp3 = load <8 x i16>* %C
+        %tmp4 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+        %tmp4.1 = zext <8 x i8> %tmp4 to <8 x i16>
+        %tmp5 = add <8 x i16> %tmp3, %tmp4.1
+        ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @uabal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
+;CHECK-LABEL: uabal4s:
+;CHECK: uabal.4s
+        %tmp1 = load <4 x i16>* %A
+        %tmp2 = load <4 x i16>* %B
+        %tmp3 = load <4 x i32>* %C
+        %tmp4 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+        %tmp4.1 = zext <4 x i16> %tmp4 to <4 x i32>
+        %tmp5 = add <4 x i32> %tmp3, %tmp4.1
+        ret <4 x i32> %tmp5
+}
+
+define <2 x i64> @uabal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
+;CHECK-LABEL: uabal2d:
+;CHECK: uabal.2d
+        %tmp1 = load <2 x i32>* %A
+        %tmp2 = load <2 x i32>* %B
+        %tmp3 = load <2 x i64>* %C
+        %tmp4 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+        %tmp4.1 = zext <2 x i32> %tmp4 to <2 x i64>
+        %tmp5 = add <2 x i64> %tmp3, %tmp4.1
+        ret <2 x i64> %tmp5
+}
+
+define <8 x i16> @uabal2_8h(<16 x i8>* %A, <16 x i8>* %B, <8 x i16>* %C) nounwind {
+;CHECK-LABEL: uabal2_8h:
+;CHECK: uabal2.8h
+        %load1 = load <16 x i8>* %A
+        %load2 = load <16 x i8>* %B
+        %tmp3 = load <8 x i16>* %C
+        %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+        %tmp2 = shufflevector <16 x i8> %load2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+        %tmp4 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+        %tmp4.1 = zext <8 x i8> %tmp4 to <8 x i16>
+        %tmp5 = add <8 x i16> %tmp3, %tmp4.1
+        ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @uabal2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwind {
+;CHECK-LABEL: uabal2_4s:
+;CHECK: uabal2.4s
+        %load1 = load <8 x i16>* %A
+        %load2 = load <8 x i16>* %B
+        %tmp3 = load <4 x i32>* %C
+        %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+        %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+        %tmp4 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+        %tmp4.1 = zext <4 x i16> %tmp4 to <4 x i32>
+        %tmp5 = add <4 x i32> %tmp3, %tmp4.1
+        ret <4 x i32> %tmp5
+}
+
+define <2 x i64> @uabal2_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwind {
+;CHECK-LABEL: uabal2_2d:
+;CHECK: uabal2.2d
+        %load1 = load <4 x i32>* %A
+        %load2 = load <4 x i32>* %B
+        %tmp3 = load <2 x i64>* %C
+        %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+        %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+        %tmp4 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+        %tmp4.1 = zext <2 x i32> %tmp4 to <2 x i64>
+        %tmp5 = add <2 x i64> %tmp3, %tmp4.1
+        ret <2 x i64> %tmp5
+}
+
+define <8 x i8> @saba_8b(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK-LABEL: saba_8b:
+;CHECK: saba.8b
+        %tmp1 = load <8 x i8>* %A
+        %tmp2 = load <8 x i8>* %B
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+        %tmp4 = load <8 x i8>* %C
+        %tmp5 = add <8 x i8> %tmp3, %tmp4
+        ret <8 x i8> %tmp5
+}
+
+define <16 x i8> @saba_16b(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
+;CHECK-LABEL: saba_16b:
+;CHECK: saba.16b
+        %tmp1 = load <16 x i8>* %A
+        %tmp2 = load <16 x i8>* %B
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+        %tmp4 = load <16 x i8>* %C
+        %tmp5 = add <16 x i8> %tmp3, %tmp4
+        ret <16 x i8> %tmp5
+}
+
+define <4 x i16> @saba_4h(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK-LABEL: saba_4h:
+;CHECK: saba.4h
+        %tmp1 = load <4 x i16>* %A
+        %tmp2 = load <4 x i16>* %B
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+        %tmp4 = load <4 x i16>* %C
+        %tmp5 = add <4 x i16> %tmp3, %tmp4
+        ret <4 x i16> %tmp5
+}
+
+define <8 x i16> @saba_8h(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
+;CHECK-LABEL: saba_8h:
+;CHECK: saba.8h
+        %tmp1 = load <8 x i16>* %A
+        %tmp2 = load <8 x i16>* %B
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+        %tmp4 = load <8 x i16>* %C
+        %tmp5 = add <8 x i16> %tmp3, %tmp4
+        ret <8 x i16> %tmp5
+}
+
+define <2 x i32> @saba_2s(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK-LABEL: saba_2s:
+;CHECK: saba.2s
+        %tmp1 = load <2 x i32>* %A
+        %tmp2 = load <2 x i32>* %B
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+        %tmp4 = load <2 x i32>* %C
+        %tmp5 = add <2 x i32> %tmp3, %tmp4
+        ret <2 x i32> %tmp5
+}
+
+define <4 x i32> @saba_4s(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
+;CHECK-LABEL: saba_4s:
+;CHECK: saba.4s
+        %tmp1 = load <4 x i32>* %A
+        %tmp2 = load <4 x i32>* %B
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+        %tmp4 = load <4 x i32>* %C
+        %tmp5 = add <4 x i32> %tmp3, %tmp4
+        ret <4 x i32> %tmp5
+}
+
+define <8 x i8> @uaba_8b(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK-LABEL: uaba_8b:
+;CHECK: uaba.8b
+        %tmp1 = load <8 x i8>* %A
+        %tmp2 = load <8 x i8>* %B
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+        %tmp4 = load <8 x i8>* %C
+        %tmp5 = add <8 x i8> %tmp3, %tmp4
+        ret <8 x i8> %tmp5
+}
+
+define <16 x i8> @uaba_16b(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
+;CHECK-LABEL: uaba_16b:
+;CHECK: uaba.16b
+        %tmp1 = load <16 x i8>* %A
+        %tmp2 = load <16 x i8>* %B
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+        %tmp4 = load <16 x i8>* %C
+        %tmp5 = add <16 x i8> %tmp3, %tmp4
+        ret <16 x i8> %tmp5
+}
+
+define <4 x i16> @uaba_4h(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK-LABEL: uaba_4h:
+;CHECK: uaba.4h
+        %tmp1 = load <4 x i16>* %A
+        %tmp2 = load <4 x i16>* %B
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+        %tmp4 = load <4 x i16>* %C
+        %tmp5 = add <4 x i16> %tmp3, %tmp4
+        ret <4 x i16> %tmp5
+}
+
+define <8 x i16> @uaba_8h(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
+;CHECK-LABEL: uaba_8h:
+;CHECK: uaba.8h
+        %tmp1 = load <8 x i16>* %A
+        %tmp2 = load <8 x i16>* %B
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+        %tmp4 = load <8 x i16>* %C
+        %tmp5 = add <8 x i16> %tmp3, %tmp4
+        ret <8 x i16> %tmp5
+}
+
+define <2 x i32> @uaba_2s(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK-LABEL: uaba_2s:
+;CHECK: uaba.2s
+        %tmp1 = load <2 x i32>* %A
+        %tmp2 = load <2 x i32>* %B
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+        %tmp4 = load <2 x i32>* %C
+        %tmp5 = add <2 x i32> %tmp3, %tmp4
+        ret <2 x i32> %tmp5
+}
+
+define <4 x i32> @uaba_4s(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
+;CHECK-LABEL: uaba_4s:
+;CHECK: uaba.4s
+        %tmp1 = load <4 x i32>* %A
+        %tmp2 = load <4 x i32>* %B
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+        %tmp4 = load <4 x i32>* %C
+        %tmp5 = add <4 x i32> %tmp3, %tmp4
+        ret <4 x i32> %tmp5
+}
+
+; Scalar FABD
+define float @fabds(float %a, float %b) nounwind {
+; CHECK-LABEL: fabds:
+; CHECK: fabd s0, s0, s1
+  %vabd.i = tail call float @llvm.aarch64.sisd.fabd.f32(float %a, float %b) nounwind
+  ret float %vabd.i
+}
+
+define double @fabdd(double %a, double %b) nounwind {
+; CHECK-LABEL: fabdd:
+; CHECK: fabd d0, d0, d1
+  %vabd.i = tail call double @llvm.aarch64.sisd.fabd.f64(double %a, double %b) nounwind
+  ret double %vabd.i
+}
+
+declare double @llvm.aarch64.sisd.fabd.f64(double, double) nounwind readnone
+declare float @llvm.aarch64.sisd.fabd.f32(float, float) nounwind readnone
+
+define <2 x i64> @uabdl_from_extract_dup(<4 x i32> %lhs, i32 %rhs) {
+; CHECK-LABEL: uabdl_from_extract_dup:
+; CHECK-NOT: ext.16b
+; CHECK: uabdl2.2d
+  %rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0
+  %rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1
+
+  %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+
+  %res = tail call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %lhs.high, <2 x i32> %rhsvec) nounwind
+  %res1 = zext <2 x i32> %res to <2 x i64>
+  ret <2 x i64> %res1
+}
+
+define <2 x i64> @sabdl_from_extract_dup(<4 x i32> %lhs, i32 %rhs) {
+; CHECK-LABEL: sabdl_from_extract_dup:
+; CHECK-NOT: ext.16b
+; CHECK: sabdl2.2d
+  %rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0
+  %rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1
+
+  %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+
+  %res = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %lhs.high, <2 x i32> %rhsvec) nounwind
+  %res1 = zext <2 x i32> %res to <2 x i64>
+  ret <2 x i64> %res1
+}
diff --git a/test/CodeGen/AArch64/arm64-vadd.ll b/test/CodeGen/AArch64/arm64-vadd.ll
new file mode 100644
index 0000000..9ed8aa6
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-vadd.ll
@@ -0,0 +1,941 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s
+
+define <8 x i8> @addhn8b(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: addhn8b:
+;CHECK: addhn.8b
+        %tmp1 = load <8 x i16>* %A
+        %tmp2 = load <8 x i16>* %B
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.addhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
+        ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @addhn4h(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: addhn4h:
+;CHECK: addhn.4h
+        %tmp1 = load <4 x i32>* %A
+        %tmp2 = load <4 x i32>* %B
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.addhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
+        ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @addhn2s(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK-LABEL: addhn2s:
+;CHECK: addhn.2s
+        %tmp1 = load <2 x i64>* %A
+        %tmp2 = load <2 x i64>* %B
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.addhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
+        ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @addhn2_16b(<8 x i16> %a, <8 x i16> %b) nounwind {
+;CHECK-LABEL: addhn2_16b:
+;CHECK: addhn.8b
+;CHECK-NEXT: addhn2.16b
+  %vaddhn2.i = tail call <8 x i8> @llvm.aarch64.neon.addhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind
+  %vaddhn_high2.i = tail call <8 x i8> @llvm.aarch64.neon.addhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind
+  %res = shufflevector <8 x i8> %vaddhn2.i, <8 x i8> %vaddhn_high2.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret <16 x i8> %res
+}
+
+define <8 x i16> @addhn2_8h(<4 x i32> %a, <4 x i32> %b) nounwind {
+;CHECK-LABEL: addhn2_8h:
+;CHECK: addhn.4h
+;CHECK-NEXT: addhn2.8h
+  %vaddhn2.i = tail call <4 x i16> @llvm.aarch64.neon.addhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind
+  %vaddhn_high3.i = tail call <4 x i16> @llvm.aarch64.neon.addhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind
+  %res = shufflevector <4 x i16> %vaddhn2.i, <4 x i16> %vaddhn_high3.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i16> %res
+}
+
+define <4 x i32> @addhn2_4s(<2 x i64> %a, <2 x i64> %b) nounwind {
+;CHECK-LABEL: addhn2_4s:
+;CHECK: addhn.2s
+;CHECK-NEXT: addhn2.4s
+  %vaddhn2.i = tail call <2 x i32> @llvm.aarch64.neon.addhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind
+  %vaddhn_high3.i = tail call <2 x i32> @llvm.aarch64.neon.addhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind
+  %res = shufflevector <2 x i32> %vaddhn2.i, <2 x i32> %vaddhn_high3.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i32> %res
+}
+
+declare <2 x i32> @llvm.aarch64.neon.addhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.addhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.addhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <8 x i8> @raddhn8b(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: raddhn8b:
+;CHECK: raddhn.8b
+        %tmp1 = load <8 x i16>* %A
+        %tmp2 = load <8 x i16>* %B
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
+        ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @raddhn4h(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: raddhn4h:
+;CHECK: raddhn.4h
+        %tmp1 = load <4 x i32>* %A
+        %tmp2 = load <4 x i32>* %B
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
+        ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @raddhn2s(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK-LABEL: raddhn2s:
+;CHECK: raddhn.2s
+        %tmp1 = load <2 x i64>* %A
+        %tmp2 = load <2 x i64>* %B
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
+        ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @raddhn2_16b(<8 x i16> %a, <8 x i16> %b) nounwind {
+;CHECK-LABEL: raddhn2_16b:
+;CHECK: raddhn.8b
+;CHECK-NEXT: raddhn2.16b
+  %vraddhn2.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind
+  %vraddhn_high2.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind
+  %res = shufflevector <8 x i8> %vraddhn2.i, <8 x i8> %vraddhn_high2.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret <16 x i8> %res
+}
+
+define <8 x i16> @raddhn2_8h(<4 x i32> %a, <4 x i32> %b) nounwind {
+;CHECK-LABEL: raddhn2_8h:
+;CHECK: raddhn.4h
+;CHECK-NEXT: raddhn2.8h
+  %vraddhn2.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind
+  %vraddhn_high3.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind
+  %res = shufflevector <4 x i16> %vraddhn2.i, <4 x i16> %vraddhn_high3.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i16> %res
+}
+
+define <4 x i32> @raddhn2_4s(<2 x i64> %a, <2 x i64> %b) nounwind {
+;CHECK-LABEL: raddhn2_4s:
+;CHECK: raddhn.2s
+;CHECK-NEXT: raddhn2.4s
+  %vraddhn2.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind
+  %vraddhn_high3.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind
+  %res = shufflevector <2 x i32> %vraddhn2.i, <2 x i32> %vraddhn_high3.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i32> %res
+}
+
+declare <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @saddl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: saddl8h:
+;CHECK: saddl.8h
+        %tmp1 = load <8 x i8>* %A
+        %tmp2 = load <8 x i8>* %B
+  %tmp3 = sext <8 x i8> %tmp1 to <8 x i16>
+  %tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
+  %tmp5 = add <8 x i16> %tmp3, %tmp4
+        ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @saddl4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: saddl4s:
+;CHECK: saddl.4s
+        %tmp1 = load <4 x i16>* %A
+        %tmp2 = load <4 x i16>* %B
+  %tmp3 = sext <4 x i16> %tmp1 to <4 x i32>
+  %tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
+  %tmp5 = add <4 x i32> %tmp3, %tmp4
+        ret <4 x i32> %tmp5
+}
+
+define <2 x i64> @saddl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: saddl2d:
+;CHECK: saddl.2d
+        %tmp1 = load <2 x i32>* %A
+        %tmp2 = load <2 x i32>* %B
+  %tmp3 = sext <2 x i32> %tmp1 to <2 x i64>
+  %tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
+  %tmp5 = add <2 x i64> %tmp3, %tmp4
+        ret <2 x i64> %tmp5
+}
+
+define <8 x i16> @saddl2_8h(<16 x i8> %a, <16 x i8> %b) nounwind  {
+; CHECK-LABEL: saddl2_8h:
+; CHECK-NEXT: saddl2.8h v0, v0, v1
+; CHECK-NEXT: ret
+  %tmp = bitcast <16 x i8> %a to <2 x i64>
+  %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp1 = bitcast <1 x i64> %shuffle.i.i.i to <8 x i8>
+  %vmovl.i.i.i = sext <8 x i8> %tmp1 to <8 x i16>
+  %tmp2 = bitcast <16 x i8> %b to <2 x i64>
+  %shuffle.i.i4.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp3 = bitcast <1 x i64> %shuffle.i.i4.i to <8 x i8>
+  %vmovl.i.i5.i = sext <8 x i8> %tmp3 to <8 x i16>
+  %add.i = add <8 x i16> %vmovl.i.i.i, %vmovl.i.i5.i
+  ret <8 x i16> %add.i
+}
+
+define <4 x i32> @saddl2_4s(<8 x i16> %a, <8 x i16> %b) nounwind  {
+; CHECK-LABEL: saddl2_4s:
+; CHECK-NEXT: saddl2.4s v0, v0, v1
+; CHECK-NEXT: ret
+  %tmp = bitcast <8 x i16> %a to <2 x i64>
+  %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp1 = bitcast <1 x i64> %shuffle.i.i.i to <4 x i16>
+  %vmovl.i.i.i = sext <4 x i16> %tmp1 to <4 x i32>
+  %tmp2 = bitcast <8 x i16> %b to <2 x i64>
+  %shuffle.i.i4.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp3 = bitcast <1 x i64> %shuffle.i.i4.i to <4 x i16>
+  %vmovl.i.i5.i = sext <4 x i16> %tmp3 to <4 x i32>
+  %add.i = add <4 x i32> %vmovl.i.i.i, %vmovl.i.i5.i
+  ret <4 x i32> %add.i
+}
+
+define <2 x i64> @saddl2_2d(<4 x i32> %a, <4 x i32> %b) nounwind  {
+; CHECK-LABEL: saddl2_2d:
+; CHECK-NEXT: saddl2.2d v0, v0, v1
+; CHECK-NEXT: ret
+  %tmp = bitcast <4 x i32> %a to <2 x i64>
+  %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp1 = bitcast <1 x i64> %shuffle.i.i.i to <2 x i32>
+  %vmovl.i.i.i = sext <2 x i32> %tmp1 to <2 x i64>
+  %tmp2 = bitcast <4 x i32> %b to <2 x i64>
+  %shuffle.i.i4.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp3 = bitcast <1 x i64> %shuffle.i.i4.i to <2 x i32>
+  %vmovl.i.i5.i = sext <2 x i32> %tmp3 to <2 x i64>
+  %add.i = add <2 x i64> %vmovl.i.i.i, %vmovl.i.i5.i
+  ret <2 x i64> %add.i
+}
+
+define <8 x i16> @uaddl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: uaddl8h:
+;CHECK: uaddl.8h
+  %tmp1 = load <8 x i8>* %A
+  %tmp2 = load <8 x i8>* %B
+  %tmp3 = zext <8 x i8> %tmp1 to <8 x i16>
+  %tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
+  %tmp5 = add <8 x i16> %tmp3, %tmp4
+  ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @uaddl4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: uaddl4s:
+;CHECK: uaddl.4s
+  %tmp1 = load <4 x i16>* %A
+  %tmp2 = load <4 x i16>* %B
+  %tmp3 = zext <4 x i16> %tmp1 to <4 x i32>
+  %tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
+  %tmp5 = add <4 x i32> %tmp3, %tmp4
+  ret <4 x i32> %tmp5
+}
+
+define <2 x i64> @uaddl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: uaddl2d:
+;CHECK: uaddl.2d
+  %tmp1 = load <2 x i32>* %A
+  %tmp2 = load <2 x i32>* %B
+  %tmp3 = zext <2 x i32> %tmp1 to <2 x i64>
+  %tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
+  %tmp5 = add <2 x i64> %tmp3, %tmp4
+  ret <2 x i64> %tmp5
+}
+
+
+define <8 x i16> @uaddl2_8h(<16 x i8> %a, <16 x i8> %b) nounwind  {
+; CHECK-LABEL: uaddl2_8h:
+; CHECK-NEXT: uaddl2.8h v0, v0, v1
+; CHECK-NEXT: ret
+  %tmp = bitcast <16 x i8> %a to <2 x i64>
+  %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp1 = bitcast <1 x i64> %shuffle.i.i.i to <8 x i8>
+  %vmovl.i.i.i = zext <8 x i8> %tmp1 to <8 x i16>
+  %tmp2 = bitcast <16 x i8> %b to <2 x i64>
+  %shuffle.i.i4.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp3 = bitcast <1 x i64> %shuffle.i.i4.i to <8 x i8>
+  %vmovl.i.i5.i = zext <8 x i8> %tmp3 to <8 x i16>
+  %add.i = add <8 x i16> %vmovl.i.i.i, %vmovl.i.i5.i
+  ret <8 x i16> %add.i
+}
+
+define <4 x i32> @uaddl2_4s(<8 x i16> %a, <8 x i16> %b) nounwind  {
+; CHECK-LABEL: uaddl2_4s:
+; CHECK-NEXT: uaddl2.4s v0, v0, v1
+; CHECK-NEXT: ret
+  %tmp = bitcast <8 x i16> %a to <2 x i64>
+  %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp1 = bitcast <1 x i64> %shuffle.i.i.i to <4 x i16>
+  %vmovl.i.i.i = zext <4 x i16> %tmp1 to <4 x i32>
+  %tmp2 = bitcast <8 x i16> %b to <2 x i64>
+  %shuffle.i.i4.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp3 = bitcast <1 x i64> %shuffle.i.i4.i to <4 x i16>
+  %vmovl.i.i5.i = zext <4 x i16> %tmp3 to <4 x i32>
+  %add.i = add <4 x i32> %vmovl.i.i.i, %vmovl.i.i5.i
+  ret <4 x i32> %add.i
+}
+
+define <2 x i64> @uaddl2_2d(<4 x i32> %a, <4 x i32> %b) nounwind  {
+; CHECK-LABEL: uaddl2_2d:
+; CHECK-NEXT: uaddl2.2d v0, v0, v1
+; CHECK-NEXT: ret
+  %tmp = bitcast <4 x i32> %a to <2 x i64>
+  %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp1 = bitcast <1 x i64> %shuffle.i.i.i to <2 x i32>
+  %vmovl.i.i.i = zext <2 x i32> %tmp1 to <2 x i64>
+  %tmp2 = bitcast <4 x i32> %b to <2 x i64>
+  %shuffle.i.i4.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp3 = bitcast <1 x i64> %shuffle.i.i4.i to <2 x i32>
+  %vmovl.i.i5.i = zext <2 x i32> %tmp3 to <2 x i64>
+  %add.i = add <2 x i64> %vmovl.i.i.i, %vmovl.i.i5.i
+  ret <2 x i64> %add.i
+}
+
+define <8 x i16> @uaddw8h(<8 x i16>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: uaddw8h:
+;CHECK: uaddw.8h
+        %tmp1 = load <8 x i16>* %A
+        %tmp2 = load <8 x i8>* %B
+  %tmp3 = zext <8 x i8> %tmp2 to <8 x i16>
+  %tmp4 = add <8 x i16> %tmp1, %tmp3
+        ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @uaddw4s(<4 x i32>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: uaddw4s:
+;CHECK: uaddw.4s
+        %tmp1 = load <4 x i32>* %A
+        %tmp2 = load <4 x i16>* %B
+  %tmp3 = zext <4 x i16> %tmp2 to <4 x i32>
+  %tmp4 = add <4 x i32> %tmp1, %tmp3
+        ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @uaddw2d(<2 x i64>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: uaddw2d:
+;CHECK: uaddw.2d
+        %tmp1 = load <2 x i64>* %A
+        %tmp2 = load <2 x i32>* %B
+  %tmp3 = zext <2 x i32> %tmp2 to <2 x i64>
+  %tmp4 = add <2 x i64> %tmp1, %tmp3
+        ret <2 x i64> %tmp4
+}
+
+define <8 x i16> @uaddw2_8h(<8 x i16>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: uaddw2_8h:
+;CHECK: uaddw2.8h
+        %tmp1 = load <8 x i16>* %A
+
+        %tmp2 = load <16 x i8>* %B
+        %high2 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+        %ext2 = zext <8 x i8> %high2 to <8 x i16>
+
+        %res = add <8 x i16> %tmp1, %ext2
+        ret <8 x i16> %res
+}
+
+define <4 x i32> @uaddw2_4s(<4 x i32>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: uaddw2_4s:
+;CHECK: uaddw2.4s
+        %tmp1 = load <4 x i32>* %A
+
+        %tmp2 = load <8 x i16>* %B
+        %high2 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+        %ext2 = zext <4 x i16> %high2 to <4 x i32>
+
+        %res = add <4 x i32> %tmp1, %ext2
+        ret <4 x i32> %res
+}
+
+define <2 x i64> @uaddw2_2d(<2 x i64>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: uaddw2_2d:
+;CHECK: uaddw2.2d
+        %tmp1 = load <2 x i64>* %A
+
+        %tmp2 = load <4 x i32>* %B
+        %high2 = shufflevector <4 x i32> %tmp2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+        %ext2 = zext <2 x i32> %high2 to <2 x i64>
+
+        %res = add <2 x i64> %tmp1, %ext2
+        ret <2 x i64> %res
+}
+
+define <8 x i16> @saddw8h(<8 x i16>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: saddw8h:
+;CHECK: saddw.8h
+        %tmp1 = load <8 x i16>* %A
+        %tmp2 = load <8 x i8>* %B
+        %tmp3 = sext <8 x i8> %tmp2 to <8 x i16>
+        %tmp4 = add <8 x i16> %tmp1, %tmp3
+        ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @saddw4s(<4 x i32>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: saddw4s:
+;CHECK: saddw.4s
+        %tmp1 = load <4 x i32>* %A
+        %tmp2 = load <4 x i16>* %B
+        %tmp3 = sext <4 x i16> %tmp2 to <4 x i32>
+        %tmp4 = add <4 x i32> %tmp1, %tmp3
+        ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @saddw2d(<2 x i64>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: saddw2d:
+;CHECK: saddw.2d
+        %tmp1 = load <2 x i64>* %A
+        %tmp2 = load <2 x i32>* %B
+        %tmp3 = sext <2 x i32> %tmp2 to <2 x i64>
+        %tmp4 = add <2 x i64> %tmp1, %tmp3
+        ret <2 x i64> %tmp4
+}
+
+define <8 x i16> @saddw2_8h(<8 x i16>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: saddw2_8h:
+;CHECK: saddw2.8h
+        %tmp1 = load <8 x i16>* %A
+
+        %tmp2 = load <16 x i8>* %B
+        %high2 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+        %ext2 = sext <8 x i8> %high2 to <8 x i16>
+
+        %res = add <8 x i16> %tmp1, %ext2
+        ret <8 x i16> %res
+}
+
+define <4 x i32> @saddw2_4s(<4 x i32>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: saddw2_4s:
+;CHECK: saddw2.4s
+        %tmp1 = load <4 x i32>* %A
+
+        %tmp2 = load <8 x i16>* %B
+        %high2 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+        %ext2 = sext <4 x i16> %high2 to <4 x i32>
+
+        %res = add <4 x i32> %tmp1, %ext2
+        ret <4 x i32> %res
+}
+
+define <2 x i64> @saddw2_2d(<2 x i64>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: saddw2_2d:
+;CHECK: saddw2.2d
+        %tmp1 = load <2 x i64>* %A
+
+        %tmp2 = load <4 x i32>* %B
+        %high2 = shufflevector <4 x i32> %tmp2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+        %ext2 = sext <2 x i32> %high2 to <2 x i64>
+
+        %res = add <2 x i64> %tmp1, %ext2
+        ret <2 x i64> %res
+}
+
+define <4 x i16> @saddlp4h(<8 x i8>* %A) nounwind {
+;CHECK-LABEL: saddlp4h:
+;CHECK: saddlp.4h
+        %tmp1 = load <8 x i8>* %A
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.saddlp.v4i16.v8i8(<8 x i8> %tmp1)
+        ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @saddlp2s(<4 x i16>* %A) nounwind {
+;CHECK-LABEL: saddlp2s:
+;CHECK: saddlp.2s
+        %tmp1 = load <4 x i16>* %A
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16> %tmp1)
+        ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @saddlp1d(<2 x i32>* %A) nounwind {
+;CHECK-LABEL: saddlp1d:
+;CHECK: saddlp.1d
+        %tmp1 = load <2 x i32>* %A
+        %tmp3 = call <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32> %tmp1)
+        ret <1 x i64> %tmp3
+}
+
+define <8 x i16> @saddlp8h(<16 x i8>* %A) nounwind {
+;CHECK-LABEL: saddlp8h:
+;CHECK: saddlp.8h
+        %tmp1 = load <16 x i8>* %A
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.saddlp.v8i16.v16i8(<16 x i8> %tmp1)
+        ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @saddlp4s(<8 x i16>* %A) nounwind {
+;CHECK-LABEL: saddlp4s:
+;CHECK: saddlp.4s
+        %tmp1 = load <8 x i16>* %A
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.saddlp.v4i32.v8i16(<8 x i16> %tmp1)
+        ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @saddlp2d(<4 x i32>* %A) nounwind {
+;CHECK-LABEL: saddlp2d:
+;CHECK: saddlp.2d
+        %tmp1 = load <4 x i32>* %A
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.saddlp.v2i64.v4i32(<4 x i32> %tmp1)
+        ret <2 x i64> %tmp3
+}
+
+declare <4 x i16>  @llvm.aarch64.neon.saddlp.v4i16.v8i8(<8 x i8>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32>) nounwind readnone
+
+declare <8 x i16>  @llvm.aarch64.neon.saddlp.v8i16.v16i8(<16 x i8>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.saddlp.v4i32.v8i16(<8 x i16>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.saddlp.v2i64.v4i32(<4 x i32>) nounwind readnone
+
+define <4 x i16> @uaddlp4h(<8 x i8>* %A) nounwind {
+;CHECK-LABEL: uaddlp4h:
+;CHECK: uaddlp.4h
+        %tmp1 = load <8 x i8>* %A
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.uaddlp.v4i16.v8i8(<8 x i8> %tmp1)
+        ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @uaddlp2s(<4 x i16>* %A) nounwind {
+;CHECK-LABEL: uaddlp2s:
+;CHECK: uaddlp.2s
+        %tmp1 = load <4 x i16>* %A
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.uaddlp.v2i32.v4i16(<4 x i16> %tmp1)
+        ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @uaddlp1d(<2 x i32>* %A) nounwind {
+;CHECK-LABEL: uaddlp1d:
+;CHECK: uaddlp.1d
+        %tmp1 = load <2 x i32>* %A
+        %tmp3 = call <1 x i64> @llvm.aarch64.neon.uaddlp.v1i64.v2i32(<2 x i32> %tmp1)
+        ret <1 x i64> %tmp3
+}
+
+define <8 x i16> @uaddlp8h(<16 x i8>* %A) nounwind {
+;CHECK-LABEL: uaddlp8h:
+;CHECK: uaddlp.8h
+        %tmp1 = load <16 x i8>* %A
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8> %tmp1)
+        ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @uaddlp4s(<8 x i16>* %A) nounwind {
+;CHECK-LABEL: uaddlp4s:
+;CHECK: uaddlp.4s
+        %tmp1 = load <8 x i16>* %A
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> %tmp1)
+        ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @uaddlp2d(<4 x i32>* %A) nounwind {
+;CHECK-LABEL: uaddlp2d:
+;CHECK: uaddlp.2d
+        %tmp1 = load <4 x i32>* %A
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32> %tmp1)
+        ret <2 x i64> %tmp3
+}
+
+declare <4 x i16>  @llvm.aarch64.neon.uaddlp.v4i16.v8i8(<8 x i8>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.uaddlp.v2i32.v4i16(<4 x i16>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.uaddlp.v1i64.v2i32(<2 x i32>) nounwind readnone
+
+declare <8 x i16>  @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32>) nounwind readnone
+
+define <4 x i16> @sadalp4h(<8 x i8>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: sadalp4h:
+;CHECK: sadalp.4h
+        %tmp1 = load <8 x i8>* %A
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.saddlp.v4i16.v8i8(<8 x i8> %tmp1)
+        %tmp4 = load <4 x i16>* %B
+        %tmp5 = add <4 x i16> %tmp3, %tmp4
+        ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @sadalp2s(<4 x i16>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: sadalp2s:
+;CHECK: sadalp.2s
+        %tmp1 = load <4 x i16>* %A
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16> %tmp1)
+        %tmp4 = load <2 x i32>* %B
+        %tmp5 = add <2 x i32> %tmp3, %tmp4
+        ret <2 x i32> %tmp5
+}
+
+define <8 x i16> @sadalp8h(<16 x i8>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: sadalp8h:
+;CHECK: sadalp.8h
+        %tmp1 = load <16 x i8>* %A
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.saddlp.v8i16.v16i8(<16 x i8> %tmp1)
+        %tmp4 = load <8 x i16>* %B
+        %tmp5 = add <8 x i16> %tmp3, %tmp4
+        ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @sadalp4s(<8 x i16>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: sadalp4s:
+;CHECK: sadalp.4s
+        %tmp1 = load <8 x i16>* %A
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.saddlp.v4i32.v8i16(<8 x i16> %tmp1)
+        %tmp4 = load <4 x i32>* %B
+        %tmp5 = add <4 x i32> %tmp3, %tmp4
+        ret <4 x i32> %tmp5
+}
+
+define <2 x i64> @sadalp2d(<4 x i32>* %A, <2 x i64>* %B) nounwind {
+;CHECK-LABEL: sadalp2d:
+;CHECK: sadalp.2d
+        %tmp1 = load <4 x i32>* %A
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.saddlp.v2i64.v4i32(<4 x i32> %tmp1)
+        %tmp4 = load <2 x i64>* %B
+        %tmp5 = add <2 x i64> %tmp3, %tmp4
+        ret <2 x i64> %tmp5
+}
+
+define <4 x i16> @uadalp4h(<8 x i8>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: uadalp4h:
+;CHECK: uadalp.4h
+        %tmp1 = load <8 x i8>* %A
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.uaddlp.v4i16.v8i8(<8 x i8> %tmp1)
+        %tmp4 = load <4 x i16>* %B
+        %tmp5 = add <4 x i16> %tmp3, %tmp4
+        ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @uadalp2s(<4 x i16>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: uadalp2s:
+;CHECK: uadalp.2s
+        %tmp1 = load <4 x i16>* %A
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.uaddlp.v2i32.v4i16(<4 x i16> %tmp1)
+        %tmp4 = load <2 x i32>* %B
+        %tmp5 = add <2 x i32> %tmp3, %tmp4
+        ret <2 x i32> %tmp5
+}
+
+define <8 x i16> @uadalp8h(<16 x i8>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: uadalp8h:
+;CHECK: uadalp.8h
+        %tmp1 = load <16 x i8>* %A
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8> %tmp1)
+        %tmp4 = load <8 x i16>* %B
+        %tmp5 = add <8 x i16> %tmp3, %tmp4
+        ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @uadalp4s(<8 x i16>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: uadalp4s:
+;CHECK: uadalp.4s
+        %tmp1 = load <8 x i16>* %A
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> %tmp1)
+        %tmp4 = load <4 x i32>* %B
+        %tmp5 = add <4 x i32> %tmp3, %tmp4
+        ret <4 x i32> %tmp5
+}
+
+define <2 x i64> @uadalp2d(<4 x i32>* %A, <2 x i64>* %B) nounwind {
+;CHECK-LABEL: uadalp2d:
+;CHECK: uadalp.2d
+        %tmp1 = load <4 x i32>* %A
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32> %tmp1)
+        %tmp4 = load <2 x i64>* %B
+        %tmp5 = add <2 x i64> %tmp3, %tmp4
+        ret <2 x i64> %tmp5
+}
+
+define <8 x i8> @addp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: addp_8b:
+;CHECK: addp.8b
+        %tmp1 = load <8 x i8>* %A
+        %tmp2 = load <8 x i8>* %B
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+        ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @addp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: addp_16b:
+;CHECK: addp.16b
+        %tmp1 = load <16 x i8>* %A
+        %tmp2 = load <16 x i8>* %B
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+        ret <16 x i8> %tmp3
+}
+
+define <4 x i16> @addp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: addp_4h:
+;CHECK: addp.4h
+        %tmp1 = load <4 x i16>* %A
+        %tmp2 = load <4 x i16>* %B
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+        ret <4 x i16> %tmp3
+}
+
+define <8 x i16> @addp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: addp_8h:
+;CHECK: addp.8h
+        %tmp1 = load <8 x i16>* %A
+        %tmp2 = load <8 x i16>* %B
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+        ret <8 x i16> %tmp3
+}
+
+define <2 x i32> @addp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: addp_2s:
+;CHECK: addp.2s
+        %tmp1 = load <2 x i32>* %A
+        %tmp2 = load <2 x i32>* %B
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+        ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @addp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: addp_4s:
+;CHECK: addp.4s
+        %tmp1 = load <4 x i32>* %A
+        %tmp2 = load <4 x i32>* %B
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+        ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @addp_2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK-LABEL: addp_2d:
+;CHECK: addp.2d
+        %tmp1 = load <2 x i64>* %A
+        %tmp2 = load <2 x i64>* %B
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+        ret <2 x i64> %tmp3
+}
+
+declare <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <2 x float> @faddp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK-LABEL: faddp_2s:
+;CHECK: faddp.2s
+        %tmp1 = load <2 x float>* %A
+        %tmp2 = load <2 x float>* %B
+        %tmp3 = call <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+        ret <2 x float> %tmp3
+}
+
+define <4 x float> @faddp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK-LABEL: faddp_4s:
+;CHECK: faddp.4s
+        %tmp1 = load <4 x float>* %A
+        %tmp2 = load <4 x float>* %B
+        %tmp3 = call <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+        ret <4 x float> %tmp3
+}
+
+define <2 x double> @faddp_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
+;CHECK-LABEL: faddp_2d:
+;CHECK: faddp.2d
+        %tmp1 = load <2 x double>* %A
+        %tmp2 = load <2 x double>* %B
+        %tmp3 = call <2 x double> @llvm.aarch64.neon.addp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
+        ret <2 x double> %tmp3
+}
+
+declare <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.addp.v2f64(<2 x double>, <2 x double>) nounwind readnone
+
+define <2 x i64> @uaddl2_duprhs(<4 x i32> %lhs, i32 %rhs) {
+; CHECK-LABEL: uaddl2_duprhs
+; CHECK-NOT: ext.16b
+; CHECK: uaddl2.2d
+  %rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0
+  %rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1
+
+  %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+
+  %lhs.ext = zext <2 x i32> %lhs.high to <2 x i64>
+  %rhs.ext = zext <2 x i32> %rhsvec to <2 x i64>
+
+  %res = add <2 x i64> %lhs.ext, %rhs.ext
+  ret <2 x i64> %res
+}
+
+define <2 x i64> @saddl2_duplhs(i32 %lhs, <4 x i32> %rhs) {
+; CHECK-LABEL: saddl2_duplhs
+; CHECK-NOT: ext.16b
+; CHECK: saddl2.2d
+  %lhsvec.tmp = insertelement <2 x i32> undef, i32 %lhs, i32 0
+  %lhsvec = insertelement <2 x i32> %lhsvec.tmp, i32 %lhs, i32 1
+
+  %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+
+  %lhs.ext = sext <2 x i32> %lhsvec to <2 x i64>
+  %rhs.ext = sext <2 x i32> %rhs.high to <2 x i64>
+
+  %res = add <2 x i64> %lhs.ext, %rhs.ext
+  ret <2 x i64> %res
+}
+
+define <2 x i64> @usubl2_duprhs(<4 x i32> %lhs, i32 %rhs) {
+; CHECK-LABEL: usubl2_duprhs
+; CHECK-NOT: ext.16b
+; CHECK: usubl2.2d
+  %rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0
+  %rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1
+
+  %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+
+  %lhs.ext = zext <2 x i32> %lhs.high to <2 x i64>
+  %rhs.ext = zext <2 x i32> %rhsvec to <2 x i64>
+
+  %res = sub <2 x i64> %lhs.ext, %rhs.ext
+  ret <2 x i64> %res
+}
+
+define <2 x i64> @ssubl2_duplhs(i32 %lhs, <4 x i32> %rhs) {
+; CHECK-LABEL: ssubl2_duplhs
+; CHECK-NOT: ext.16b
+; CHECK: ssubl2.2d
+  %lhsvec.tmp = insertelement <2 x i32> undef, i32 %lhs, i32 0
+  %lhsvec = insertelement <2 x i32> %lhsvec.tmp, i32 %lhs, i32 1
+
+  %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+
+  %lhs.ext = sext <2 x i32> %lhsvec to <2 x i64>
+  %rhs.ext = sext <2 x i32> %rhs.high to <2 x i64>
+
+  %res = sub <2 x i64> %lhs.ext, %rhs.ext
+  ret <2 x i64> %res
+}
+
+define <8 x i8> @addhn8b_natural(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: addhn8b_natural:
+;CHECK: addhn.8b
+        %tmp1 = load <8 x i16>* %A
+        %tmp2 = load <8 x i16>* %B
+        %sum = add <8 x i16> %tmp1, %tmp2
+        %high_bits = lshr <8 x i16> %sum, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+        %narrowed = trunc <8 x i16> %high_bits to <8 x i8>
+        ret <8 x i8> %narrowed
+}
+
+define <4 x i16> @addhn4h_natural(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: addhn4h_natural:
+;CHECK: addhn.4h
+        %tmp1 = load <4 x i32>* %A
+        %tmp2 = load <4 x i32>* %B
+        %sum = add <4 x i32> %tmp1, %tmp2
+        %high_bits = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
+        %narrowed = trunc <4 x i32> %high_bits to <4 x i16>
+        ret <4 x i16> %narrowed
+}
+
+define <2 x i32> @addhn2s_natural(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK-LABEL: addhn2s_natural:
+;CHECK: addhn.2s
+        %tmp1 = load <2 x i64>* %A
+        %tmp2 = load <2 x i64>* %B
+        %sum = add <2 x i64> %tmp1, %tmp2
+        %high_bits = lshr <2 x i64> %sum, <i64 32, i64 32>
+        %narrowed = trunc <2 x i64> %high_bits to <2 x i32>
+        ret <2 x i32> %narrowed
+}
+
+define <16 x i8> @addhn2_16b_natural(<8 x i8> %low, <8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: addhn2_16b_natural:
+;CHECK: addhn2.16b
+        %tmp1 = load <8 x i16>* %A
+        %tmp2 = load <8 x i16>* %B
+        %sum = add <8 x i16> %tmp1, %tmp2
+        %high_bits = lshr <8 x i16> %sum, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+        %narrowed = trunc <8 x i16> %high_bits to <8 x i8>
+        %res = shufflevector <8 x i8> %low, <8 x i8> %narrowed, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+        ret <16 x i8> %res
+}
+
+define <8 x i16> @addhn2_8h_natural(<4 x i16> %low, <4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: addhn2_8h_natural:
+;CHECK: addhn2.8h
+        %tmp1 = load <4 x i32>* %A
+        %tmp2 = load <4 x i32>* %B
+        %sum = add <4 x i32> %tmp1, %tmp2
+        %high_bits = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
+        %narrowed = trunc <4 x i32> %high_bits to <4 x i16>
+        %res = shufflevector <4 x i16> %low, <4 x i16> %narrowed, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+        ret <8 x i16> %res
+}
+
+define <4 x i32> @addhn2_4s_natural(<2 x i32> %low, <2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK-LABEL: addhn2_4s_natural:
+;CHECK: addhn2.4s
+        %tmp1 = load <2 x i64>* %A
+        %tmp2 = load <2 x i64>* %B
+        %sum = add <2 x i64> %tmp1, %tmp2
+        %high_bits = lshr <2 x i64> %sum, <i64 32, i64 32>
+        %narrowed = trunc <2 x i64> %high_bits to <2 x i32>
+        %res = shufflevector <2 x i32> %low, <2 x i32> %narrowed, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+        ret <4 x i32> %res
+}
+
+define <8 x i8> @subhn8b_natural(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: subhn8b_natural:
+;CHECK: subhn.8b
+        %tmp1 = load <8 x i16>* %A
+        %tmp2 = load <8 x i16>* %B
+        %diff = sub <8 x i16> %tmp1, %tmp2
+        %high_bits = lshr <8 x i16> %diff, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+        %narrowed = trunc <8 x i16> %high_bits to <8 x i8>
+        ret <8 x i8> %narrowed
+}
+
+define <4 x i16> @subhn4h_natural(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: subhn4h_natural:
+;CHECK: subhn.4h
+        %tmp1 = load <4 x i32>* %A
+        %tmp2 = load <4 x i32>* %B
+        %diff = sub <4 x i32> %tmp1, %tmp2
+        %high_bits = lshr <4 x i32> %diff, <i32 16, i32 16, i32 16, i32 16>
+        %narrowed = trunc <4 x i32> %high_bits to <4 x i16>
+        ret <4 x i16> %narrowed
+}
+
+define <2 x i32> @subhn2s_natural(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK-LABEL: subhn2s_natural:
+;CHECK: subhn.2s
+        %tmp1 = load <2 x i64>* %A
+        %tmp2 = load <2 x i64>* %B
+        %diff = sub <2 x i64> %tmp1, %tmp2
+        %high_bits = lshr <2 x i64> %diff, <i64 32, i64 32>
+        %narrowed = trunc <2 x i64> %high_bits to <2 x i32>
+        ret <2 x i32> %narrowed
+}
+
+define <16 x i8> @subhn2_16b_natural(<8 x i8> %low, <8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: subhn2_16b_natural:
+;CHECK: subhn2.16b
+        %tmp1 = load <8 x i16>* %A
+        %tmp2 = load <8 x i16>* %B
+        %diff = sub <8 x i16> %tmp1, %tmp2
+        %high_bits = lshr <8 x i16> %diff, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+        %narrowed = trunc <8 x i16> %high_bits to <8 x i8>
+        %res = shufflevector <8 x i8> %low, <8 x i8> %narrowed, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+        ret <16 x i8> %res
+}
+
+define <8 x i16> @subhn2_8h_natural(<4 x i16> %low, <4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: subhn2_8h_natural:
+;CHECK: subhn2.8h
+        %tmp1 = load <4 x i32>* %A
+        %tmp2 = load <4 x i32>* %B
+        %diff = sub <4 x i32> %tmp1, %tmp2
+        %high_bits = lshr <4 x i32> %diff, <i32 16, i32 16, i32 16, i32 16>
+        %narrowed = trunc <4 x i32> %high_bits to <4 x i16>
+        %res = shufflevector <4 x i16> %low, <4 x i16> %narrowed, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+        ret <8 x i16> %res
+}
+
+define <4 x i32> @subhn2_4s_natural(<2 x i32> %low, <2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK-LABEL: subhn2_4s_natural:
+;CHECK: subhn2.4s
+        %tmp1 = load <2 x i64>* %A
+        %tmp2 = load <2 x i64>* %B
+        %diff = sub <2 x i64> %tmp1, %tmp2
+        %high_bits = lshr <2 x i64> %diff, <i64 32, i64 32>
+        %narrowed = trunc <2 x i64> %high_bits to <2 x i32>
+        %res = shufflevector <2 x i32> %low, <2 x i32> %narrowed, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+        ret <4 x i32> %res
+}
diff --git a/test/CodeGen/AArch64/arm64-vaddlv.ll b/test/CodeGen/AArch64/arm64-vaddlv.ll
new file mode 100644
index 0000000..2d64138
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-vaddlv.ll
@@ -0,0 +1,26 @@
+; RUN: llc -march=arm64 -aarch64-neon-syntax=apple < %s | FileCheck %s
+
+define i64 @test_vaddlv_s32(<2 x i32> %a1) nounwind readnone {
+; CHECK: test_vaddlv_s32
+; CHECK: saddlp.1d v[[REGNUM:[0-9]+]], v[[INREG:[0-9]+]]
+; CHECK-NEXT: fmov x[[OUTREG:[0-9]+]], d[[REGNUM]]
+; CHECK-NEXT: ret
+entry:
+  %vaddlv.i = tail call i64 @llvm.aarch64.neon.saddlv.i64.v2i32(<2 x i32> %a1) nounwind
+  ret i64 %vaddlv.i
+}
+
+define i64 @test_vaddlv_u32(<2 x i32> %a1) nounwind readnone {
+; CHECK: test_vaddlv_u32
+; CHECK: uaddlp.1d v[[REGNUM:[0-9]+]], v[[INREG:[0-9]+]]
+; CHECK-NEXT: fmov x[[OUTREG:[0-9]+]], d[[REGNUM]]
+; CHECK-NEXT: ret
+entry:
+  %vaddlv.i = tail call i64 @llvm.aarch64.neon.uaddlv.i64.v2i32(<2 x i32> %a1) nounwind
+  ret i64 %vaddlv.i
+}
+
+declare i64 @llvm.aarch64.neon.uaddlv.i64.v2i32(<2 x i32>) nounwind readnone
+
+declare i64 @llvm.aarch64.neon.saddlv.i64.v2i32(<2 x i32>) nounwind readnone
+
diff --git a/test/CodeGen/AArch64/arm64-vaddv.ll b/test/CodeGen/AArch64/arm64-vaddv.ll
new file mode 100644
index 0000000..2d92ce6
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-vaddv.ll
@@ -0,0 +1,245 @@
+; RUN: llc -march=arm64 -aarch64-neon-syntax=apple < %s -mcpu=cyclone | FileCheck %s
+
+define signext i8 @test_vaddv_s8(<8 x i8> %a1) {
+; CHECK-LABEL: test_vaddv_s8:
+; CHECK: addv.8b b[[REGNUM:[0-9]+]], v0
+; CHECK-NEXT: smov.b w0, v[[REGNUM]][0]
+; CHECK-NEXT: ret
+entry:
+  %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8> %a1)
+  %0 = trunc i32 %vaddv.i to i8
+  ret i8 %0
+}
+
+define signext i16 @test_vaddv_s16(<4 x i16> %a1) {
+; CHECK-LABEL: test_vaddv_s16:
+; CHECK: addv.4h h[[REGNUM:[0-9]+]], v0
+; CHECK-NEXT: smov.h w0, v[[REGNUM]][0]
+; CHECK-NEXT: ret
+entry:
+  %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16> %a1)
+  %0 = trunc i32 %vaddv.i to i16
+  ret i16 %0
+}
+
+define i32 @test_vaddv_s32(<2 x i32> %a1) {
+; CHECK-LABEL: test_vaddv_s32:
+; 2 x i32 is not supported by the ISA, thus, this is a special case
+; CHECK: addp.2s v[[REGNUM:[0-9]+]], v0, v0
+; CHECK-NEXT: fmov w0, s[[REGNUM]]
+; CHECK-NEXT: ret
+entry:
+  %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v2i32(<2 x i32> %a1)
+  ret i32 %vaddv.i
+}
+
+define i64 @test_vaddv_s64(<2 x i64> %a1) {
+; CHECK-LABEL: test_vaddv_s64:
+; CHECK: addp.2d [[REGNUM:d[0-9]+]], v0
+; CHECK-NEXT: fmov x0, [[REGNUM]]
+; CHECK-NEXT: ret
+entry:
+  %vaddv.i = tail call i64 @llvm.aarch64.neon.saddv.i64.v2i64(<2 x i64> %a1)
+  ret i64 %vaddv.i
+}
+
+define zeroext i8 @test_vaddv_u8(<8 x i8> %a1) {
+; CHECK-LABEL: test_vaddv_u8:
+; CHECK: addv.8b b[[REGNUM:[0-9]+]], v0
+; CHECK-NEXT: fmov w0, s[[REGNUM]]
+; CHECK-NEXT: ret
+entry:
+  %vaddv.i = tail call i32 @llvm.aarch64.neon.uaddv.i32.v8i8(<8 x i8> %a1)
+  %0 = trunc i32 %vaddv.i to i8
+  ret i8 %0
+}
+
+define i32 @test_vaddv_u8_masked(<8 x i8> %a1) {
+; CHECK-LABEL: test_vaddv_u8_masked:
+; CHECK: addv.8b b[[REGNUM:[0-9]+]], v0
+; CHECK-NEXT: fmov w0, s[[REGNUM]]
+; CHECK-NEXT: ret
+entry:
+  %vaddv.i = tail call i32 @llvm.aarch64.neon.uaddv.i32.v8i8(<8 x i8> %a1)
+  %0 = and i32 %vaddv.i, 511 ; 0x1ff
+  ret i32 %0
+}
+
+define zeroext i16 @test_vaddv_u16(<4 x i16> %a1) {
+; CHECK-LABEL: test_vaddv_u16:
+; CHECK: addv.4h h[[REGNUM:[0-9]+]], v0
+; CHECK-NEXT: fmov w0, s[[REGNUM]]
+; CHECK-NEXT: ret
+entry:
+  %vaddv.i = tail call i32 @llvm.aarch64.neon.uaddv.i32.v4i16(<4 x i16> %a1)
+  %0 = trunc i32 %vaddv.i to i16
+  ret i16 %0
+}
+
+define i32 @test_vaddv_u16_masked(<4 x i16> %a1) {
+; CHECK-LABEL: test_vaddv_u16_masked:
+; CHECK: addv.4h h[[REGNUM:[0-9]+]], v0
+; CHECK-NEXT: fmov w0, s[[REGNUM]]
+; CHECK-NEXT: ret
+entry:
+  %vaddv.i = tail call i32 @llvm.aarch64.neon.uaddv.i32.v4i16(<4 x i16> %a1)
+  %0 = and i32 %vaddv.i, 3276799 ; 0x31ffff
+  ret i32 %0
+}
+
+define i32 @test_vaddv_u32(<2 x i32> %a1) {
+; CHECK-LABEL: test_vaddv_u32:
+; 2 x i32 is not supported by the ISA, thus, this is a special case
+; CHECK: addp.2s v[[REGNUM:[0-9]+]], v0, v0
+; CHECK-NEXT: fmov w0, s[[REGNUM]]
+; CHECK-NEXT: ret
+entry:
+  %vaddv.i = tail call i32 @llvm.aarch64.neon.uaddv.i32.v2i32(<2 x i32> %a1)
+  ret i32 %vaddv.i
+}
+
+define float @test_vaddv_f32(<2 x float> %a1) {
+; CHECK-LABEL: test_vaddv_f32:
+; CHECK: faddp.2s s0, v0
+; CHECK-NEXT: ret
+entry:
+  %vaddv.i = tail call float @llvm.aarch64.neon.faddv.f32.v2f32(<2 x float> %a1)
+  ret float %vaddv.i
+}
+
+define float @test_vaddv_v4f32(<4 x float> %a1) {
+; CHECK-LABEL: test_vaddv_v4f32:
+; CHECK: faddp.4s [[REGNUM:v[0-9]+]], v0, v0
+; CHECK: faddp.2s s0, [[REGNUM]]
+; CHECK-NEXT: ret
+entry:
+  %vaddv.i = tail call float @llvm.aarch64.neon.faddv.f32.v4f32(<4 x float> %a1)
+  ret float %vaddv.i
+}
+
+define double @test_vaddv_f64(<2 x double> %a1) {
+; CHECK-LABEL: test_vaddv_f64:
+; CHECK: faddp.2d d0, v0
+; CHECK-NEXT: ret
+entry:
+  %vaddv.i = tail call double @llvm.aarch64.neon.faddv.f64.v2f64(<2 x double> %a1)
+  ret double %vaddv.i
+}
+
+define i64 @test_vaddv_u64(<2 x i64> %a1) {
+; CHECK-LABEL: test_vaddv_u64:
+; CHECK: addp.2d [[REGNUM:d[0-9]+]], v0
+; CHECK-NEXT: fmov x0, [[REGNUM]]
+; CHECK-NEXT: ret
+entry:
+  %vaddv.i = tail call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a1)
+  ret i64 %vaddv.i
+}
+
+define <1 x i64> @test_vaddv_u64_to_vec(<2 x i64> %a1) {
+; CHECK-LABEL: test_vaddv_u64_to_vec:
+; CHECK: addp.2d d0, v0
+; CHECK-NOT: fmov
+; CHECK-NOT: ins
+; CHECK: ret
+entry:
+  %vaddv.i = tail call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a1)
+  %vec = insertelement <1 x i64> undef, i64 %vaddv.i, i32 0
+  ret <1 x i64> %vec
+}
+
+define signext i8 @test_vaddvq_s8(<16 x i8> %a1) {
+; CHECK-LABEL: test_vaddvq_s8:
+; CHECK: addv.16b b[[REGNUM:[0-9]+]], v0
+; CHECK-NEXT: smov.b w0, v[[REGNUM]][0]
+; CHECK-NEXT: ret
+entry:
+  %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8> %a1)
+  %0 = trunc i32 %vaddv.i to i8
+  ret i8 %0
+}
+
+define signext i16 @test_vaddvq_s16(<8 x i16> %a1) {
+; CHECK-LABEL: test_vaddvq_s16:
+; CHECK: addv.8h h[[REGNUM:[0-9]+]], v0
+; CHECK-NEXT: smov.h w0, v[[REGNUM]][0]
+; CHECK-NEXT: ret
+entry:
+  %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16> %a1)
+  %0 = trunc i32 %vaddv.i to i16
+  ret i16 %0
+}
+
+define i32 @test_vaddvq_s32(<4 x i32> %a1) {
+; CHECK-LABEL: test_vaddvq_s32:
+; CHECK: addv.4s [[REGNUM:s[0-9]+]], v0
+; CHECK-NEXT: fmov w0, [[REGNUM]]
+; CHECK-NEXT: ret
+entry:
+  %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32> %a1)
+  ret i32 %vaddv.i
+}
+
+define zeroext i8 @test_vaddvq_u8(<16 x i8> %a1) {
+; CHECK-LABEL: test_vaddvq_u8:
+; CHECK: addv.16b b[[REGNUM:[0-9]+]], v0
+; CHECK-NEXT: fmov w0, s[[REGNUM]]
+; CHECK-NEXT: ret
+entry:
+  %vaddv.i = tail call i32 @llvm.aarch64.neon.uaddv.i32.v16i8(<16 x i8> %a1)
+  %0 = trunc i32 %vaddv.i to i8
+  ret i8 %0
+}
+
+define zeroext i16 @test_vaddvq_u16(<8 x i16> %a1) {
+; CHECK-LABEL: test_vaddvq_u16:
+; CHECK: addv.8h h[[REGNUM:[0-9]+]], v0
+; CHECK-NEXT: fmov w0, s[[REGNUM]]
+; CHECK-NEXT: ret
+entry:
+  %vaddv.i = tail call i32 @llvm.aarch64.neon.uaddv.i32.v8i16(<8 x i16> %a1)
+  %0 = trunc i32 %vaddv.i to i16
+  ret i16 %0
+}
+
+define i32 @test_vaddvq_u32(<4 x i32> %a1) {
+; CHECK-LABEL: test_vaddvq_u32:
+; CHECK: addv.4s [[REGNUM:s[0-9]+]], v0
+; CHECK-NEXT: fmov [[FMOVRES:w[0-9]+]], [[REGNUM]]
+; CHECK-NEXT: ret
+entry:
+  %vaddv.i = tail call i32 @llvm.aarch64.neon.uaddv.i32.v4i32(<4 x i32> %a1)
+  ret i32 %vaddv.i
+}
+
+declare i32 @llvm.aarch64.neon.uaddv.i32.v4i32(<4 x i32>)
+
+declare i32 @llvm.aarch64.neon.uaddv.i32.v8i16(<8 x i16>)
+
+declare i32 @llvm.aarch64.neon.uaddv.i32.v16i8(<16 x i8>)
+
+declare i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32>)
+
+declare i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16>)
+
+declare i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8>)
+
+declare i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64>)
+
+declare i32 @llvm.aarch64.neon.uaddv.i32.v2i32(<2 x i32>)
+
+declare i32 @llvm.aarch64.neon.uaddv.i32.v4i16(<4 x i16>)
+
+declare i32 @llvm.aarch64.neon.uaddv.i32.v8i8(<8 x i8>)
+
+declare i32 @llvm.aarch64.neon.saddv.i32.v2i32(<2 x i32>)
+
+declare i64 @llvm.aarch64.neon.saddv.i64.v2i64(<2 x i64>)
+
+declare i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16>)
+
+declare i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8>)
+
+declare float @llvm.aarch64.neon.faddv.f32.v2f32(<2 x float> %a1)
+declare float @llvm.aarch64.neon.faddv.f32.v4f32(<4 x float> %a1)
+declare double @llvm.aarch64.neon.faddv.f64.v2f64(<2 x double> %a1)
diff --git a/test/CodeGen/AArch64/arm64-variadic-aapcs.ll b/test/CodeGen/AArch64/arm64-variadic-aapcs.ll
new file mode 100644
index 0000000..36a7bfd
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-variadic-aapcs.ll
@@ -0,0 +1,143 @@
+; RUN: llc -verify-machineinstrs -mtriple=arm64-linux-gnu -pre-RA-sched=linearize -enable-misched=false < %s | FileCheck %s
+
+%va_list = type {i8*, i8*, i8*, i32, i32}
+
+@var = global %va_list zeroinitializer, align 8
+
+declare void @llvm.va_start(i8*)
+
+define void @test_simple(i32 %n, ...) {
+; CHECK-LABEL: test_simple:
+; CHECK: sub sp, sp, #[[STACKSIZE:[0-9]+]]
+; CHECK: add [[STACK_TOP:x[0-9]+]], sp, #[[STACKSIZE]]
+
+; CHECK: adrp x[[VA_LIST_HI:[0-9]+]], var
+
+; CHECK: stp x1, x2, [sp, #[[GR_BASE:[0-9]+]]]
+; ... omit middle ones ...
+; CHECK: str x7, [sp, #
+
+; CHECK: stp q0, q1, [sp]
+; ... omit middle ones ...
+; CHECK: stp q6, q7, [sp, #
+
+; CHECK: str [[STACK_TOP]], [x[[VA_LIST_HI]], :lo12:var]
+
+; CHECK: add [[GR_TOPTMP:x[0-9]+]], sp, #[[GR_BASE]]
+; CHECK: add [[GR_TOP:x[0-9]+]], [[GR_TOPTMP]], #56
+; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, :lo12:var
+; CHECK: str [[GR_TOP]], [x[[VA_LIST]], #8]
+
+; CHECK: mov [[VR_TOPTMP:x[0-9]+]], sp
+; CHECK: add [[VR_TOP:x[0-9]+]], [[VR_TOPTMP]], #128
+; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16]
+
+; CHECK: movn [[GR_OFFS:w[0-9]+]], #0x37
+; CHECK: str [[GR_OFFS]], [x[[VA_LIST]], #24]
+
+; CHECK: orr [[VR_OFFS:w[0-9]+]], wzr, #0xffffff80
+; CHECK: str [[VR_OFFS]], [x[[VA_LIST]], #28]
+
+  %addr = bitcast %va_list* @var to i8*
+  call void @llvm.va_start(i8* %addr)
+
+  ret void
+}
+
+define void @test_fewargs(i32 %n, i32 %n1, i32 %n2, float %m, ...) {
+; CHECK-LABEL: test_fewargs:
+; CHECK: sub sp, sp, #[[STACKSIZE:[0-9]+]]
+; CHECK: add [[STACK_TOP:x[0-9]+]], sp, #[[STACKSIZE]]
+
+; CHECK: adrp x[[VA_LIST_HI:[0-9]+]], var
+
+; CHECK: stp x3, x4, [sp, #[[GR_BASE:[0-9]+]]]
+; ... omit middle ones ...
+; CHECK: str x7, [sp, #
+
+; CHECK: stp q1, q2, [sp]
+; ... omit middle ones ...
+; CHECK: str q7, [sp, #
+
+; CHECK: str [[STACK_TOP]], [x[[VA_LIST_HI]], :lo12:var]
+
+; CHECK: add [[GR_TOPTMP:x[0-9]+]], sp, #[[GR_BASE]]
+; CHECK: add [[GR_TOP:x[0-9]+]], [[GR_TOPTMP]], #40
+; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, :lo12:var
+; CHECK: str [[GR_TOP]], [x[[VA_LIST]], #8]
+
+; CHECK: mov [[VR_TOPTMP:x[0-9]+]], sp
+; CHECK: add [[VR_TOP:x[0-9]+]], [[VR_TOPTMP]], #112
+; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16]
+
+; CHECK: movn [[GR_OFFS:w[0-9]+]], #0x27
+; CHECK: str [[GR_OFFS]], [x[[VA_LIST]], #24]
+
+; CHECK: movn [[VR_OFFS:w[0-9]+]], #0x6f
+; CHECK: str [[VR_OFFS]], [x[[VA_LIST]], #28]
+
+  %addr = bitcast %va_list* @var to i8*
+  call void @llvm.va_start(i8* %addr)
+
+  ret void
+}
+
+define void @test_nospare([8 x i64], [8 x float], ...) {
+; CHECK-LABEL: test_nospare:
+
+  %addr = bitcast %va_list* @var to i8*
+  call void @llvm.va_start(i8* %addr)
+; CHECK-NOT: sub sp, sp
+; CHECK: mov [[STACK:x[0-9]+]], sp
+; CHECK: str [[STACK]], [{{x[0-9]+}}, :lo12:var]
+
+  ret void
+}
+
+; If there are non-variadic arguments on the stack (here two i64s) then the
+; __stack field should point just past them.
+define void @test_offsetstack([10 x i64], [3 x float], ...) {
+; CHECK-LABEL: test_offsetstack:
+; CHECK: sub sp, sp, #80
+; CHECK: add [[STACK_TOP:x[0-9]+]], sp, #96
+; CHECK: str [[STACK_TOP]], [{{x[0-9]+}}, :lo12:var]
+
+  %addr = bitcast %va_list* @var to i8*
+  call void @llvm.va_start(i8* %addr)
+  ret void
+}
+
+declare void @llvm.va_end(i8*)
+
+define void @test_va_end() nounwind {
+; CHECK-LABEL: test_va_end:
+; CHECK-NEXT: BB#0
+
+  %addr = bitcast %va_list* @var to i8*
+  call void @llvm.va_end(i8* %addr)
+
+  ret void
+; CHECK-NEXT: ret
+}
+
+declare void @llvm.va_copy(i8* %dest, i8* %src)
+
+@second_list = global %va_list zeroinitializer
+
+define void @test_va_copy() {
+; CHECK-LABEL: test_va_copy:
+  %srcaddr = bitcast %va_list* @var to i8*
+  %dstaddr = bitcast %va_list* @second_list to i8*
+  call void @llvm.va_copy(i8* %dstaddr, i8* %srcaddr)
+
+; CHECK: add x[[SRC:[0-9]+]], {{x[0-9]+}}, :lo12:var
+
+; CHECK: ldr [[BLOCK:q[0-9]+]], [x[[SRC]]]
+; CHECK: add x[[DST:[0-9]+]], {{x[0-9]+}}, :lo12:second_list
+; CHECK: str [[BLOCK]], [x[[DST]]]
+
+; CHECK: ldr [[BLOCK:q[0-9]+]], [x[[SRC]], #16]
+; CHECK: str [[BLOCK]], [x[[DST]], #16]
+  ret void
+; CHECK: ret
+}
diff --git a/test/CodeGen/AArch64/arm64-vbitwise.ll b/test/CodeGen/AArch64/arm64-vbitwise.ll
new file mode 100644
index 0000000..93de95e
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-vbitwise.ll
@@ -0,0 +1,91 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+define <8 x i8> @rbit_8b(<8 x i8>* %A) nounwind {
+;CHECK-LABEL: rbit_8b:
+;CHECK: rbit.8b
+	%tmp1 = load <8 x i8>* %A
+	%tmp3 = call <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8> %tmp1)
+	ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @rbit_16b(<16 x i8>* %A) nounwind {
+;CHECK-LABEL: rbit_16b:
+;CHECK: rbit.16b
+	%tmp1 = load <16 x i8>* %A
+	%tmp3 = call <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8> %tmp1)
+	ret <16 x i8> %tmp3
+}
+
+declare <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8>) nounwind readnone
+
+define <8 x i16> @sxtl8h(<8 x i8>* %A) nounwind {
+;CHECK-LABEL: sxtl8h:
+;CHECK: sshll.8h
+	%tmp1 = load <8 x i8>* %A
+  %tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
+  ret <8 x i16> %tmp2
+}
+
+define <8 x i16> @uxtl8h(<8 x i8>* %A) nounwind {
+;CHECK-LABEL: uxtl8h:
+;CHECK: ushll.8h
+	%tmp1 = load <8 x i8>* %A
+  %tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
+  ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @sxtl4s(<4 x i16>* %A) nounwind {
+;CHECK-LABEL: sxtl4s:
+;CHECK: sshll.4s
+	%tmp1 = load <4 x i16>* %A
+  %tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
+  ret <4 x i32> %tmp2
+}
+
+define <4 x i32> @uxtl4s(<4 x i16>* %A) nounwind {
+;CHECK-LABEL: uxtl4s:
+;CHECK: ushll.4s
+	%tmp1 = load <4 x i16>* %A
+  %tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
+  ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @sxtl2d(<2 x i32>* %A) nounwind {
+;CHECK-LABEL: sxtl2d:
+;CHECK: sshll.2d
+	%tmp1 = load <2 x i32>* %A
+  %tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
+  ret <2 x i64> %tmp2
+}
+
+define <2 x i64> @uxtl2d(<2 x i32>* %A) nounwind {
+;CHECK-LABEL: uxtl2d:
+;CHECK: ushll.2d
+	%tmp1 = load <2 x i32>* %A
+  %tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
+  ret <2 x i64> %tmp2
+}
+
+; Check for incorrect use of vector bic.
+; rdar://11553859
+define void @test_vsliq(i8* nocapture %src, i8* nocapture %dest) nounwind noinline ssp {
+entry:
+; CHECK-LABEL: test_vsliq:
+; CHECK-NOT: bic
+; CHECK: movi.2d [[REG1:v[0-9]+]], #0x0000ff000000ff
+; CHECK: and.16b v{{[0-9]+}}, v{{[0-9]+}}, [[REG1]]
+  %0 = bitcast i8* %src to <16 x i8>*
+  %1 = load <16 x i8>* %0, align 16
+  %and.i = and <16 x i8> %1, <i8 -1, i8 0, i8 0, i8 0, i8 -1, i8 0, i8 0, i8 0, i8 -1, i8 0, i8 0, i8 0, i8 -1, i8 0, i8 0, i8 0>
+  %2 = bitcast <16 x i8> %and.i to <8 x i16>
+  %vshl_n = shl <8 x i16> %2, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  %3 = or <8 x i16> %2, %vshl_n
+  %4 = bitcast <8 x i16> %3 to <4 x i32>
+  %vshl_n8 = shl <4 x i32> %4, <i32 16, i32 16, i32 16, i32 16>
+  %5 = or <4 x i32> %4, %vshl_n8
+  %6 = bitcast <4 x i32> %5 to <16 x i8>
+  %7 = bitcast i8* %dest to <16 x i8>*
+  store <16 x i8> %6, <16 x i8>* %7, align 16
+  ret void
+}
diff --git a/test/CodeGen/AArch64/arm64-vclz.ll b/test/CodeGen/AArch64/arm64-vclz.ll
new file mode 100644
index 0000000..cf5670a
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-vclz.ll
@@ -0,0 +1,109 @@
+; RUN: llc -march=arm64 -aarch64-neon-syntax=apple < %s | FileCheck %s
+
+define <8 x i8> @test_vclz_u8(<8 x i8> %a) nounwind readnone ssp {
+  ; CHECK-LABEL: test_vclz_u8:
+  ; CHECK: clz.8b v0, v0
+  ; CHECK-NEXT: ret
+  %vclz.i = tail call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false) nounwind
+  ret <8 x i8> %vclz.i
+}
+
+define <8 x i8> @test_vclz_s8(<8 x i8> %a) nounwind readnone ssp {
+  ; CHECK-LABEL: test_vclz_s8:
+  ; CHECK: clz.8b v0, v0
+  ; CHECK-NEXT: ret
+  %vclz.i = tail call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false) nounwind
+  ret <8 x i8> %vclz.i
+}
+
+define <4 x i16> @test_vclz_u16(<4 x i16> %a) nounwind readnone ssp {
+  ; CHECK-LABEL: test_vclz_u16:
+  ; CHECK: clz.4h v0, v0
+  ; CHECK-NEXT: ret
+  %vclz1.i = tail call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false) nounwind
+  ret <4 x i16> %vclz1.i
+}
+
+define <4 x i16> @test_vclz_s16(<4 x i16> %a) nounwind readnone ssp {
+  ; CHECK-LABEL: test_vclz_s16:
+  ; CHECK: clz.4h v0, v0
+  ; CHECK-NEXT: ret
+  %vclz1.i = tail call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false) nounwind
+  ret <4 x i16> %vclz1.i
+}
+
+define <2 x i32> @test_vclz_u32(<2 x i32> %a) nounwind readnone ssp {
+  ; CHECK-LABEL: test_vclz_u32:
+  ; CHECK: clz.2s v0, v0
+  ; CHECK-NEXT: ret
+  %vclz1.i = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) nounwind
+  ret <2 x i32> %vclz1.i
+}
+
+define <2 x i32> @test_vclz_s32(<2 x i32> %a) nounwind readnone ssp {
+  ; CHECK-LABEL: test_vclz_s32:
+  ; CHECK: clz.2s v0, v0
+  ; CHECK-NEXT: ret
+  %vclz1.i = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) nounwind
+  ret <2 x i32> %vclz1.i
+}
+
+define <16 x i8> @test_vclzq_u8(<16 x i8> %a) nounwind readnone ssp {
+  ; CHECK-LABEL: test_vclzq_u8:
+  ; CHECK: clz.16b v0, v0
+  ; CHECK-NEXT: ret
+  %vclz.i = tail call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) nounwind
+  ret <16 x i8> %vclz.i
+}
+
+define <16 x i8> @test_vclzq_s8(<16 x i8> %a) nounwind readnone ssp {
+  ; CHECK-LABEL: test_vclzq_s8:
+  ; CHECK: clz.16b v0, v0
+  ; CHECK-NEXT: ret
+  %vclz.i = tail call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) nounwind
+  ret <16 x i8> %vclz.i
+}
+
+define <8 x i16> @test_vclzq_u16(<8 x i16> %a) nounwind readnone ssp {
+  ; CHECK-LABEL: test_vclzq_u16:
+  ; CHECK: clz.8h v0, v0
+  ; CHECK-NEXT: ret
+  %vclz1.i = tail call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) nounwind
+  ret <8 x i16> %vclz1.i
+}
+
+define <8 x i16> @test_vclzq_s16(<8 x i16> %a) nounwind readnone ssp {
+  ; CHECK-LABEL: test_vclzq_s16:
+  ; CHECK: clz.8h v0, v0
+  ; CHECK-NEXT: ret
+  %vclz1.i = tail call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) nounwind
+  ret <8 x i16> %vclz1.i
+}
+
+define <4 x i32> @test_vclzq_u32(<4 x i32> %a) nounwind readnone ssp {
+  ; CHECK-LABEL: test_vclzq_u32:
+  ; CHECK: clz.4s v0, v0
+  ; CHECK-NEXT: ret
+  %vclz1.i = tail call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) nounwind
+  ret <4 x i32> %vclz1.i
+}
+
+define <4 x i32> @test_vclzq_s32(<4 x i32> %a) nounwind readnone ssp {
+  ; CHECK-LABEL: test_vclzq_s32:
+  ; CHECK: clz.4s v0, v0
+  ; CHECK-NEXT: ret
+  %vclz1.i = tail call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) nounwind
+  ret <4 x i32> %vclz1.i
+}
+
+declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) nounwind readnone
+
+declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1) nounwind readnone
+
+declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1) nounwind readnone
+
+declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone
+
+declare <4 x i16> @llvm.ctlz.v4i16(<4 x i16>, i1) nounwind readnone
+
+declare <8 x i8> @llvm.ctlz.v8i8(<8 x i8>, i1) nounwind readnone
diff --git a/test/CodeGen/AArch64/arm64-vcmp.ll b/test/CodeGen/AArch64/arm64-vcmp.ll
new file mode 100644
index 0000000..982ab09
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-vcmp.ll
@@ -0,0 +1,236 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+
+define void @fcmltz_4s(<4 x float> %a, <4 x i16>* %p) nounwind {
+;CHECK-LABEL: fcmltz_4s:
+;CHECK: fcmlt.4s [[REG:v[0-9]+]], v0, #0
+;CHECK-NEXT: xtn.4h v[[REG_1:[0-9]+]], [[REG]]
+;CHECK-NEXT: str d[[REG_1]], [x0]
+;CHECK-NEXT: ret
+  %tmp = fcmp olt <4 x float> %a, zeroinitializer
+  %tmp2 = sext <4 x i1> %tmp to <4 x i16>
+  store <4 x i16> %tmp2, <4 x i16>* %p, align 8
+  ret void
+}
+
+define <2 x i32> @facge_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK-LABEL: facge_2s:
+;CHECK: facge.2s
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @facge_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK-LABEL: facge_4s:
+;CHECK: facge.4s
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @facge_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
+;CHECK-LABEL: facge_2d:
+;CHECK: facge.2d
+	%tmp1 = load <2 x double>* %A
+	%tmp2 = load <2 x double>* %B
+	%tmp3 = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+declare <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double>, <2 x double>) nounwind readnone
+
+define <2 x i32> @facgt_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK-LABEL: facgt_2s:
+;CHECK: facgt.2s
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @facgt_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK-LABEL: facgt_4s:
+;CHECK: facgt.4s
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @facgt_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
+;CHECK-LABEL: facgt_2d:
+;CHECK: facgt.2d
+	%tmp1 = load <2 x double>* %A
+	%tmp2 = load <2 x double>* %B
+	%tmp3 = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+declare <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double>, <2 x double>) nounwind readnone
+
+define i32 @facge_s(float %A, float %B) nounwind {
+; CHECK-LABEL: facge_s:
+; CHECK: facge {{s[0-9]+}}, s0, s1
+  %mask = call i32 @llvm.aarch64.neon.facge.i32.f32(float %A, float %B)
+  ret i32 %mask
+}
+
+define i64 @facge_d(double %A, double %B) nounwind {
+; CHECK-LABEL: facge_d:
+; CHECK: facge {{d[0-9]+}}, d0, d1
+  %mask = call i64 @llvm.aarch64.neon.facge.i64.f64(double %A, double %B)
+  ret i64 %mask
+}
+
+declare i64 @llvm.aarch64.neon.facge.i64.f64(double, double)
+declare i32 @llvm.aarch64.neon.facge.i32.f32(float, float)
+
+define i32 @facgt_s(float %A, float %B) nounwind {
+; CHECK-LABEL: facgt_s:
+; CHECK: facgt {{s[0-9]+}}, s0, s1
+  %mask = call i32 @llvm.aarch64.neon.facgt.i32.f32(float %A, float %B)
+  ret i32 %mask
+}
+
+define i64 @facgt_d(double %A, double %B) nounwind {
+; CHECK-LABEL: facgt_d:
+; CHECK: facgt {{d[0-9]+}}, d0, d1
+  %mask = call i64 @llvm.aarch64.neon.facgt.i64.f64(double %A, double %B)
+  ret i64 %mask
+}
+
+declare i64 @llvm.aarch64.neon.facgt.i64.f64(double, double)
+declare i32 @llvm.aarch64.neon.facgt.i32.f32(float, float)
+
+define <8 x i8> @cmtst_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: cmtst_8b:
+;CHECK: cmtst.8b
+  %tmp1 = load <8 x i8>* %A
+  %tmp2 = load <8 x i8>* %B
+  %commonbits = and <8 x i8> %tmp1, %tmp2
+  %mask = icmp ne <8 x i8> %commonbits, zeroinitializer
+  %res = sext <8 x i1> %mask to <8 x i8>
+  ret <8 x i8> %res
+}
+
+define <16 x i8> @cmtst_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: cmtst_16b:
+;CHECK: cmtst.16b
+  %tmp1 = load <16 x i8>* %A
+  %tmp2 = load <16 x i8>* %B
+  %commonbits = and <16 x i8> %tmp1, %tmp2
+  %mask = icmp ne <16 x i8> %commonbits, zeroinitializer
+  %res = sext <16 x i1> %mask to <16 x i8>
+  ret <16 x i8> %res
+}
+
+define <4 x i16> @cmtst_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: cmtst_4h:
+;CHECK: cmtst.4h
+  %tmp1 = load <4 x i16>* %A
+  %tmp2 = load <4 x i16>* %B
+  %commonbits = and <4 x i16> %tmp1, %tmp2
+  %mask = icmp ne <4 x i16> %commonbits, zeroinitializer
+  %res = sext <4 x i1> %mask to <4 x i16>
+  ret <4 x i16> %res
+}
+
+define <8 x i16> @cmtst_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: cmtst_8h:
+;CHECK: cmtst.8h
+  %tmp1 = load <8 x i16>* %A
+  %tmp2 = load <8 x i16>* %B
+  %commonbits = and <8 x i16> %tmp1, %tmp2
+  %mask = icmp ne <8 x i16> %commonbits, zeroinitializer
+  %res = sext <8 x i1> %mask to <8 x i16>
+  ret <8 x i16> %res
+}
+
+define <2 x i32> @cmtst_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: cmtst_2s:
+;CHECK: cmtst.2s
+  %tmp1 = load <2 x i32>* %A
+  %tmp2 = load <2 x i32>* %B
+  %commonbits = and <2 x i32> %tmp1, %tmp2
+  %mask = icmp ne <2 x i32> %commonbits, zeroinitializer
+  %res = sext <2 x i1> %mask to <2 x i32>
+  ret <2 x i32> %res
+}
+
+define <4 x i32> @cmtst_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: cmtst_4s:
+;CHECK: cmtst.4s
+  %tmp1 = load <4 x i32>* %A
+  %tmp2 = load <4 x i32>* %B
+  %commonbits = and <4 x i32> %tmp1, %tmp2
+  %mask = icmp ne <4 x i32> %commonbits, zeroinitializer
+  %res = sext <4 x i1> %mask to <4 x i32>
+  ret <4 x i32> %res
+}
+
+define <2 x i64> @cmtst_2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK-LABEL: cmtst_2d:
+;CHECK: cmtst.2d
+  %tmp1 = load <2 x i64>* %A
+  %tmp2 = load <2 x i64>* %B
+  %commonbits = and <2 x i64> %tmp1, %tmp2
+  %mask = icmp ne <2 x i64> %commonbits, zeroinitializer
+  %res = sext <2 x i1> %mask to <2 x i64>
+  ret <2 x i64> %res
+}
+
+define <1 x i64> @fcmeq_d(<1 x double> %A, <1 x double> %B) nounwind {
+; CHECK-LABEL: fcmeq_d:
+; CHECK: fcmeq {{d[0-9]+}}, d0, d1
+  %tst = fcmp oeq <1 x double> %A, %B
+  %mask = sext <1 x i1> %tst to <1 x i64>
+  ret <1 x i64> %mask
+}
+
+define <1 x i64> @fcmge_d(<1 x double> %A, <1 x double> %B) nounwind {
+; CHECK-LABEL: fcmge_d:
+; CHECK: fcmge {{d[0-9]+}}, d0, d1
+  %tst = fcmp oge <1 x double> %A, %B
+  %mask = sext <1 x i1> %tst to <1 x i64>
+  ret <1 x i64> %mask
+}
+
+define <1 x i64> @fcmle_d(<1 x double> %A, <1 x double> %B) nounwind {
+; CHECK-LABEL: fcmle_d:
+; CHECK: fcmge {{d[0-9]+}}, d1, d0
+  %tst = fcmp ole <1 x double> %A, %B
+  %mask = sext <1 x i1> %tst to <1 x i64>
+  ret <1 x i64> %mask
+}
+
+define <1 x i64> @fcmgt_d(<1 x double> %A, <1 x double> %B) nounwind {
+; CHECK-LABEL: fcmgt_d:
+; CHECK: fcmgt {{d[0-9]+}}, d0, d1
+  %tst = fcmp ogt <1 x double> %A, %B
+  %mask = sext <1 x i1> %tst to <1 x i64>
+  ret <1 x i64> %mask
+}
+
+define <1 x i64> @fcmlt_d(<1 x double> %A, <1 x double> %B) nounwind {
+; CHECK-LABEL: fcmlt_d:
+; CHECK: fcmgt {{d[0-9]+}}, d1, d0
+  %tst = fcmp olt <1 x double> %A, %B
+  %mask = sext <1 x i1> %tst to <1 x i64>
+  ret <1 x i64> %mask
+}
+
+define <1 x i64> @cmnez_d(<1 x i64> %A) nounwind {
+; CHECK-LABEL: cmnez_d:
+; CHECK: cmeq d[[EQ:[0-9]+]], d0, #0
+; CHECK: mvn.8b v0, v[[EQ]]
+  %tst = icmp ne <1 x i64> %A, zeroinitializer
+  %mask = sext <1 x i1> %tst to <1 x i64>
+  ret <1 x i64> %mask
+}
diff --git a/test/CodeGen/AArch64/arm64-vcnt.ll b/test/CodeGen/AArch64/arm64-vcnt.ll
new file mode 100644
index 0000000..903501e
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-vcnt.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+define <8 x i8> @cls_8b(<8 x i8>* %A) nounwind {
+;CHECK-LABEL: cls_8b:
+;CHECK: cls.8b
+	%tmp1 = load <8 x i8>* %A
+	%tmp3 = call <8 x i8> @llvm.aarch64.neon.cls.v8i8(<8 x i8> %tmp1)
+	ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @cls_16b(<16 x i8>* %A) nounwind {
+;CHECK-LABEL: cls_16b:
+;CHECK: cls.16b
+	%tmp1 = load <16 x i8>* %A
+	%tmp3 = call <16 x i8> @llvm.aarch64.neon.cls.v16i8(<16 x i8> %tmp1)
+	ret <16 x i8> %tmp3
+}
+
+define <4 x i16> @cls_4h(<4 x i16>* %A) nounwind {
+;CHECK-LABEL: cls_4h:
+;CHECK: cls.4h
+	%tmp1 = load <4 x i16>* %A
+	%tmp3 = call <4 x i16> @llvm.aarch64.neon.cls.v4i16(<4 x i16> %tmp1)
+	ret <4 x i16> %tmp3
+}
+
+define <8 x i16> @cls_8h(<8 x i16>* %A) nounwind {
+;CHECK-LABEL: cls_8h:
+;CHECK: cls.8h
+	%tmp1 = load <8 x i16>* %A
+	%tmp3 = call <8 x i16> @llvm.aarch64.neon.cls.v8i16(<8 x i16> %tmp1)
+	ret <8 x i16> %tmp3
+}
+
+define <2 x i32> @cls_2s(<2 x i32>* %A) nounwind {
+;CHECK-LABEL: cls_2s:
+;CHECK: cls.2s
+	%tmp1 = load <2 x i32>* %A
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.cls.v2i32(<2 x i32> %tmp1)
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @cls_4s(<4 x i32>* %A) nounwind {
+;CHECK-LABEL: cls_4s:
+;CHECK: cls.4s
+	%tmp1 = load <4 x i32>* %A
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.cls.v4i32(<4 x i32> %tmp1)
+	ret <4 x i32> %tmp3
+}
+
+declare <8 x i8> @llvm.aarch64.neon.cls.v8i8(<8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.cls.v16i8(<16 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.cls.v4i16(<4 x i16>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.cls.v8i16(<8 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.cls.v2i32(<2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.cls.v4i32(<4 x i32>) nounwind readnone
diff --git a/test/CodeGen/AArch64/arm64-vcombine.ll b/test/CodeGen/AArch64/arm64-vcombine.ll
new file mode 100644
index 0000000..fa12996
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-vcombine.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+; LowerCONCAT_VECTORS() was reversing the order of two parts.
+; rdar://11558157
+; rdar://11559553
+define <16 x i8> @test(<16 x i8> %q0, <16 x i8> %q1, i8* nocapture %dest) nounwind {
+entry:
+; CHECK-LABEL: test:
+; CHECK: ins.d v0[1], v1[0]
+  %0 = bitcast <16 x i8> %q0 to <2 x i64>
+  %shuffle.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> zeroinitializer
+  %1 = bitcast <16 x i8> %q1 to <2 x i64>
+  %shuffle.i4 = shufflevector <2 x i64> %1, <2 x i64> undef, <1 x i32> zeroinitializer
+  %shuffle.i3 = shufflevector <1 x i64> %shuffle.i, <1 x i64> %shuffle.i4, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i3 to <16 x i8>
+  ret <16 x i8> %2
+}
diff --git a/test/CodeGen/AArch64/arm64-vcvt.ll b/test/CodeGen/AArch64/arm64-vcvt.ll
new file mode 100644
index 0000000..8c9e4e9
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-vcvt.ll
@@ -0,0 +1,686 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+define <2 x i32> @fcvtas_2s(<2 x float> %A) nounwind {
+;CHECK-LABEL: fcvtas_2s:
+;CHECK-NOT: ld1
+;CHECK: fcvtas.2s v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.fcvtas.v2i32.v2f32(<2 x float> %A)
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @fcvtas_4s(<4 x float> %A) nounwind {
+;CHECK-LABEL: fcvtas_4s:
+;CHECK-NOT: ld1
+;CHECK: fcvtas.4s v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.fcvtas.v4i32.v4f32(<4 x float> %A)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @fcvtas_2d(<2 x double> %A) nounwind {
+;CHECK-LABEL: fcvtas_2d:
+;CHECK-NOT: ld1
+;CHECK: fcvtas.2d v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = call <2 x i64> @llvm.aarch64.neon.fcvtas.v2i64.v2f64(<2 x double> %A)
+	ret <2 x i64> %tmp3
+}
+
+declare <2 x i32> @llvm.aarch64.neon.fcvtas.v2i32.v2f32(<2 x float>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.fcvtas.v4i32.v4f32(<4 x float>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.fcvtas.v2i64.v2f64(<2 x double>) nounwind readnone
+
+define <2 x i32> @fcvtau_2s(<2 x float> %A) nounwind {
+;CHECK-LABEL: fcvtau_2s:
+;CHECK-NOT: ld1
+;CHECK: fcvtau.2s v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.fcvtau.v2i32.v2f32(<2 x float> %A)
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @fcvtau_4s(<4 x float> %A) nounwind {
+;CHECK-LABEL: fcvtau_4s:
+;CHECK-NOT: ld1
+;CHECK: fcvtau.4s v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.fcvtau.v4i32.v4f32(<4 x float> %A)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @fcvtau_2d(<2 x double> %A) nounwind {
+;CHECK-LABEL: fcvtau_2d:
+;CHECK-NOT: ld1
+;CHECK: fcvtau.2d v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = call <2 x i64> @llvm.aarch64.neon.fcvtau.v2i64.v2f64(<2 x double> %A)
+	ret <2 x i64> %tmp3
+}
+
+declare <2 x i32> @llvm.aarch64.neon.fcvtau.v2i32.v2f32(<2 x float>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.fcvtau.v4i32.v4f32(<4 x float>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.fcvtau.v2i64.v2f64(<2 x double>) nounwind readnone
+
+define <2 x i32> @fcvtms_2s(<2 x float> %A) nounwind {
+;CHECK-LABEL: fcvtms_2s:
+;CHECK-NOT: ld1
+;CHECK: fcvtms.2s v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.fcvtms.v2i32.v2f32(<2 x float> %A)
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @fcvtms_4s(<4 x float> %A) nounwind {
+;CHECK-LABEL: fcvtms_4s:
+;CHECK-NOT: ld1
+;CHECK: fcvtms.4s v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.fcvtms.v4i32.v4f32(<4 x float> %A)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @fcvtms_2d(<2 x double> %A) nounwind {
+;CHECK-LABEL: fcvtms_2d:
+;CHECK-NOT: ld1
+;CHECK: fcvtms.2d v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = call <2 x i64> @llvm.aarch64.neon.fcvtms.v2i64.v2f64(<2 x double> %A)
+	ret <2 x i64> %tmp3
+}
+
+declare <2 x i32> @llvm.aarch64.neon.fcvtms.v2i32.v2f32(<2 x float>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.fcvtms.v4i32.v4f32(<4 x float>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.fcvtms.v2i64.v2f64(<2 x double>) nounwind readnone
+
+define <2 x i32> @fcvtmu_2s(<2 x float> %A) nounwind {
+;CHECK-LABEL: fcvtmu_2s:
+;CHECK-NOT: ld1
+;CHECK: fcvtmu.2s v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.fcvtmu.v2i32.v2f32(<2 x float> %A)
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @fcvtmu_4s(<4 x float> %A) nounwind {
+;CHECK-LABEL: fcvtmu_4s:
+;CHECK-NOT: ld1
+;CHECK: fcvtmu.4s v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.fcvtmu.v4i32.v4f32(<4 x float> %A)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @fcvtmu_2d(<2 x double> %A) nounwind {
+;CHECK-LABEL: fcvtmu_2d:
+;CHECK-NOT: ld1
+;CHECK: fcvtmu.2d v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = call <2 x i64> @llvm.aarch64.neon.fcvtmu.v2i64.v2f64(<2 x double> %A)
+	ret <2 x i64> %tmp3
+}
+
+declare <2 x i32> @llvm.aarch64.neon.fcvtmu.v2i32.v2f32(<2 x float>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.fcvtmu.v4i32.v4f32(<4 x float>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.fcvtmu.v2i64.v2f64(<2 x double>) nounwind readnone
+
+define <2 x i32> @fcvtps_2s(<2 x float> %A) nounwind {
+;CHECK-LABEL: fcvtps_2s:
+;CHECK-NOT: ld1
+;CHECK: fcvtps.2s v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.fcvtps.v2i32.v2f32(<2 x float> %A)
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @fcvtps_4s(<4 x float> %A) nounwind {
+;CHECK-LABEL: fcvtps_4s:
+;CHECK-NOT: ld1
+;CHECK: fcvtps.4s v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.fcvtps.v4i32.v4f32(<4 x float> %A)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @fcvtps_2d(<2 x double> %A) nounwind {
+;CHECK-LABEL: fcvtps_2d:
+;CHECK-NOT: ld1
+;CHECK: fcvtps.2d v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = call <2 x i64> @llvm.aarch64.neon.fcvtps.v2i64.v2f64(<2 x double> %A)
+	ret <2 x i64> %tmp3
+}
+
+declare <2 x i32> @llvm.aarch64.neon.fcvtps.v2i32.v2f32(<2 x float>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.fcvtps.v4i32.v4f32(<4 x float>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.fcvtps.v2i64.v2f64(<2 x double>) nounwind readnone
+
+define <2 x i32> @fcvtpu_2s(<2 x float> %A) nounwind {
+;CHECK-LABEL: fcvtpu_2s:
+;CHECK-NOT: ld1
+;CHECK: fcvtpu.2s v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.fcvtpu.v2i32.v2f32(<2 x float> %A)
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @fcvtpu_4s(<4 x float> %A) nounwind {
+;CHECK-LABEL: fcvtpu_4s:
+;CHECK-NOT: ld1
+;CHECK: fcvtpu.4s v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.fcvtpu.v4i32.v4f32(<4 x float> %A)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @fcvtpu_2d(<2 x double> %A) nounwind {
+;CHECK-LABEL: fcvtpu_2d:
+;CHECK-NOT: ld1
+;CHECK: fcvtpu.2d v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = call <2 x i64> @llvm.aarch64.neon.fcvtpu.v2i64.v2f64(<2 x double> %A)
+	ret <2 x i64> %tmp3
+}
+
+declare <2 x i32> @llvm.aarch64.neon.fcvtpu.v2i32.v2f32(<2 x float>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.fcvtpu.v4i32.v4f32(<4 x float>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.fcvtpu.v2i64.v2f64(<2 x double>) nounwind readnone
+
+define <2 x i32> @fcvtns_2s(<2 x float> %A) nounwind {
+;CHECK-LABEL: fcvtns_2s:
+;CHECK-NOT: ld1
+;CHECK: fcvtns.2s v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.fcvtns.v2i32.v2f32(<2 x float> %A)
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @fcvtns_4s(<4 x float> %A) nounwind {
+;CHECK-LABEL: fcvtns_4s:
+;CHECK-NOT: ld1
+;CHECK: fcvtns.4s v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.fcvtns.v4i32.v4f32(<4 x float> %A)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @fcvtns_2d(<2 x double> %A) nounwind {
+;CHECK-LABEL: fcvtns_2d:
+;CHECK-NOT: ld1
+;CHECK: fcvtns.2d v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = call <2 x i64> @llvm.aarch64.neon.fcvtns.v2i64.v2f64(<2 x double> %A)
+	ret <2 x i64> %tmp3
+}
+
+declare <2 x i32> @llvm.aarch64.neon.fcvtns.v2i32.v2f32(<2 x float>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.fcvtns.v4i32.v4f32(<4 x float>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.fcvtns.v2i64.v2f64(<2 x double>) nounwind readnone
+
+define <2 x i32> @fcvtnu_2s(<2 x float> %A) nounwind {
+;CHECK-LABEL: fcvtnu_2s:
+;CHECK-NOT: ld1
+;CHECK: fcvtnu.2s v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.fcvtnu.v2i32.v2f32(<2 x float> %A)
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @fcvtnu_4s(<4 x float> %A) nounwind {
+;CHECK-LABEL: fcvtnu_4s:
+;CHECK-NOT: ld1
+;CHECK: fcvtnu.4s v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.fcvtnu.v4i32.v4f32(<4 x float> %A)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @fcvtnu_2d(<2 x double> %A) nounwind {
+;CHECK-LABEL: fcvtnu_2d:
+;CHECK-NOT: ld1
+;CHECK: fcvtnu.2d v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = call <2 x i64> @llvm.aarch64.neon.fcvtnu.v2i64.v2f64(<2 x double> %A)
+	ret <2 x i64> %tmp3
+}
+
+declare <2 x i32> @llvm.aarch64.neon.fcvtnu.v2i32.v2f32(<2 x float>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.fcvtnu.v4i32.v4f32(<4 x float>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.fcvtnu.v2i64.v2f64(<2 x double>) nounwind readnone
+
+define <2 x i32> @fcvtzs_2s(<2 x float> %A) nounwind {
+;CHECK-LABEL: fcvtzs_2s:
+;CHECK-NOT: ld1
+;CHECK: fcvtzs.2s v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = fptosi <2 x float> %A to <2 x i32>
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @fcvtzs_4s(<4 x float> %A) nounwind {
+;CHECK-LABEL: fcvtzs_4s:
+;CHECK-NOT: ld1
+;CHECK: fcvtzs.4s v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = fptosi <4 x float> %A to <4 x i32>
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @fcvtzs_2d(<2 x double> %A) nounwind {
+;CHECK-LABEL: fcvtzs_2d:
+;CHECK-NOT: ld1
+;CHECK: fcvtzs.2d v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = fptosi <2 x double> %A to <2 x i64>
+	ret <2 x i64> %tmp3
+}
+
+
+define <2 x i32> @fcvtzu_2s(<2 x float> %A) nounwind {
+;CHECK-LABEL: fcvtzu_2s:
+;CHECK-NOT: ld1
+;CHECK: fcvtzu.2s v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = fptoui <2 x float> %A to <2 x i32>
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @fcvtzu_4s(<4 x float> %A) nounwind {
+;CHECK-LABEL: fcvtzu_4s:
+;CHECK-NOT: ld1
+;CHECK: fcvtzu.4s v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = fptoui <4 x float> %A to <4 x i32>
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @fcvtzu_2d(<2 x double> %A) nounwind {
+;CHECK-LABEL: fcvtzu_2d:
+;CHECK-NOT: ld1
+;CHECK: fcvtzu.2d v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = fptoui <2 x double> %A to <2 x i64>
+	ret <2 x i64> %tmp3
+}
+
+define <2 x float> @frinta_2s(<2 x float> %A) nounwind {
+;CHECK-LABEL: frinta_2s:
+;CHECK-NOT: ld1
+;CHECK: frinta.2s v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = call <2 x float> @llvm.round.v2f32(<2 x float> %A)
+	ret <2 x float> %tmp3
+}
+
+define <4 x float> @frinta_4s(<4 x float> %A) nounwind {
+;CHECK-LABEL: frinta_4s:
+;CHECK-NOT: ld1
+;CHECK: frinta.4s v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = call <4 x float> @llvm.round.v4f32(<4 x float> %A)
+	ret <4 x float> %tmp3
+}
+
+define <2 x double> @frinta_2d(<2 x double> %A) nounwind {
+;CHECK-LABEL: frinta_2d:
+;CHECK-NOT: ld1
+;CHECK: frinta.2d v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = call <2 x double> @llvm.round.v2f64(<2 x double> %A)
+	ret <2 x double> %tmp3
+}
+
+declare <2 x float> @llvm.round.v2f32(<2 x float>) nounwind readnone
+declare <4 x float> @llvm.round.v4f32(<4 x float>) nounwind readnone
+declare <2 x double> @llvm.round.v2f64(<2 x double>) nounwind readnone
+
+define <2 x float> @frinti_2s(<2 x float> %A) nounwind {
+;CHECK-LABEL: frinti_2s:
+;CHECK-NOT: ld1
+;CHECK: frinti.2s v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = call <2 x float> @llvm.nearbyint.v2f32(<2 x float> %A)
+	ret <2 x float> %tmp3
+}
+
+define <4 x float> @frinti_4s(<4 x float> %A) nounwind {
+;CHECK-LABEL: frinti_4s:
+;CHECK-NOT: ld1
+;CHECK: frinti.4s v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %A)
+	ret <4 x float> %tmp3
+}
+
+define <2 x double> @frinti_2d(<2 x double> %A) nounwind {
+;CHECK-LABEL: frinti_2d:
+;CHECK-NOT: ld1
+;CHECK: frinti.2d v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %A)
+	ret <2 x double> %tmp3
+}
+
+declare <2 x float> @llvm.nearbyint.v2f32(<2 x float>) nounwind readnone
+declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) nounwind readnone
+declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) nounwind readnone
+
+define <2 x float> @frintm_2s(<2 x float> %A) nounwind {
+;CHECK-LABEL: frintm_2s:
+;CHECK-NOT: ld1
+;CHECK: frintm.2s v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = call <2 x float> @llvm.floor.v2f32(<2 x float> %A)
+	ret <2 x float> %tmp3
+}
+
+define <4 x float> @frintm_4s(<4 x float> %A) nounwind {
+;CHECK-LABEL: frintm_4s:
+;CHECK-NOT: ld1
+;CHECK: frintm.4s v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = call <4 x float> @llvm.floor.v4f32(<4 x float> %A)
+	ret <4 x float> %tmp3
+}
+
+define <2 x double> @frintm_2d(<2 x double> %A) nounwind {
+;CHECK-LABEL: frintm_2d:
+;CHECK-NOT: ld1
+;CHECK: frintm.2d v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = call <2 x double> @llvm.floor.v2f64(<2 x double> %A)
+	ret <2 x double> %tmp3
+}
+
+declare <2 x float> @llvm.floor.v2f32(<2 x float>) nounwind readnone
+declare <4 x float> @llvm.floor.v4f32(<4 x float>) nounwind readnone
+declare <2 x double> @llvm.floor.v2f64(<2 x double>) nounwind readnone
+
+define <2 x float> @frintn_2s(<2 x float> %A) nounwind {
+;CHECK-LABEL: frintn_2s:
+;CHECK-NOT: ld1
+;CHECK: frintn.2s v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = call <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float> %A)
+	ret <2 x float> %tmp3
+}
+
+define <4 x float> @frintn_4s(<4 x float> %A) nounwind {
+;CHECK-LABEL: frintn_4s:
+;CHECK-NOT: ld1
+;CHECK: frintn.4s v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = call <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float> %A)
+	ret <4 x float> %tmp3
+}
+
+define <2 x double> @frintn_2d(<2 x double> %A) nounwind {
+;CHECK-LABEL: frintn_2d:
+;CHECK-NOT: ld1
+;CHECK: frintn.2d v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = call <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double> %A)
+	ret <2 x double> %tmp3
+}
+
+declare <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float>) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float>) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double>) nounwind readnone
+
+define <2 x float> @frintp_2s(<2 x float> %A) nounwind {
+;CHECK-LABEL: frintp_2s:
+;CHECK-NOT: ld1
+;CHECK: frintp.2s v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = call <2 x float> @llvm.ceil.v2f32(<2 x float> %A)
+	ret <2 x float> %tmp3
+}
+
+define <4 x float> @frintp_4s(<4 x float> %A) nounwind {
+;CHECK-LABEL: frintp_4s:
+;CHECK-NOT: ld1
+;CHECK: frintp.4s v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %A)
+	ret <4 x float> %tmp3
+}
+
+define <2 x double> @frintp_2d(<2 x double> %A) nounwind {
+;CHECK-LABEL: frintp_2d:
+;CHECK-NOT: ld1
+;CHECK: frintp.2d v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = call <2 x double> @llvm.ceil.v2f64(<2 x double> %A)
+	ret <2 x double> %tmp3
+}
+
+declare <2 x float> @llvm.ceil.v2f32(<2 x float>) nounwind readnone
+declare <4 x float> @llvm.ceil.v4f32(<4 x float>) nounwind readnone
+declare <2 x double> @llvm.ceil.v2f64(<2 x double>) nounwind readnone
+
+define <2 x float> @frintx_2s(<2 x float> %A) nounwind {
+;CHECK-LABEL: frintx_2s:
+;CHECK-NOT: ld1
+;CHECK: frintx.2s v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = call <2 x float> @llvm.rint.v2f32(<2 x float> %A)
+	ret <2 x float> %tmp3
+}
+
+define <4 x float> @frintx_4s(<4 x float> %A) nounwind {
+;CHECK-LABEL: frintx_4s:
+;CHECK-NOT: ld1
+;CHECK: frintx.4s v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = call <4 x float> @llvm.rint.v4f32(<4 x float> %A)
+	ret <4 x float> %tmp3
+}
+
+define <2 x double> @frintx_2d(<2 x double> %A) nounwind {
+;CHECK-LABEL: frintx_2d:
+;CHECK-NOT: ld1
+;CHECK: frintx.2d v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = call <2 x double> @llvm.rint.v2f64(<2 x double> %A)
+	ret <2 x double> %tmp3
+}
+
+declare <2 x float> @llvm.rint.v2f32(<2 x float>) nounwind readnone
+declare <4 x float> @llvm.rint.v4f32(<4 x float>) nounwind readnone
+declare <2 x double> @llvm.rint.v2f64(<2 x double>) nounwind readnone
+
+define <2 x float> @frintz_2s(<2 x float> %A) nounwind {
+;CHECK-LABEL: frintz_2s:
+;CHECK-NOT: ld1
+;CHECK: frintz.2s v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = call <2 x float> @llvm.trunc.v2f32(<2 x float> %A)
+	ret <2 x float> %tmp3
+}
+
+define <4 x float> @frintz_4s(<4 x float> %A) nounwind {
+;CHECK-LABEL: frintz_4s:
+;CHECK-NOT: ld1
+;CHECK: frintz.4s v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = call <4 x float> @llvm.trunc.v4f32(<4 x float> %A)
+	ret <4 x float> %tmp3
+}
+
+define <2 x double> @frintz_2d(<2 x double> %A) nounwind {
+;CHECK-LABEL: frintz_2d:
+;CHECK-NOT: ld1
+;CHECK: frintz.2d v0, v0
+;CHECK-NEXT: ret
+	%tmp3 = call <2 x double> @llvm.trunc.v2f64(<2 x double> %A)
+	ret <2 x double> %tmp3
+}
+
+declare <2 x float> @llvm.trunc.v2f32(<2 x float>) nounwind readnone
+declare <4 x float> @llvm.trunc.v4f32(<4 x float>) nounwind readnone
+declare <2 x double> @llvm.trunc.v2f64(<2 x double>) nounwind readnone
+
+define <2 x float> @fcvtxn_2s(<2 x double> %A) nounwind {
+;CHECK-LABEL: fcvtxn_2s:
+;CHECK-NOT: ld1
+;CHECK: fcvtxn v0.2s, v0.2d
+;CHECK-NEXT: ret
+	%tmp3 = call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %A)
+	ret <2 x float> %tmp3
+}
+
+define <4 x float> @fcvtxn_4s(<2 x float> %ret, <2 x double> %A) nounwind {
+;CHECK-LABEL: fcvtxn_4s:
+;CHECK-NOT: ld1
+;CHECK: fcvtxn2 v0.4s, v1.2d
+;CHECK-NEXT: ret
+	%tmp3 = call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %A)
+        %res = shufflevector <2 x float> %ret, <2 x float> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+	ret <4 x float> %res
+}
+
+declare <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double>) nounwind readnone
+
+define <2 x i32> @fcvtzsc_2s(<2 x float> %A) nounwind {
+;CHECK-LABEL: fcvtzsc_2s:
+;CHECK-NOT: ld1
+;CHECK: fcvtzs.2s v0, v0, #1
+;CHECK-NEXT: ret
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> %A, i32 1)
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @fcvtzsc_4s(<4 x float> %A) nounwind {
+;CHECK-LABEL: fcvtzsc_4s:
+;CHECK-NOT: ld1
+;CHECK: fcvtzs.4s v0, v0, #1
+;CHECK-NEXT: ret
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> %A, i32 1)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @fcvtzsc_2d(<2 x double> %A) nounwind {
+;CHECK-LABEL: fcvtzsc_2d:
+;CHECK-NOT: ld1
+;CHECK: fcvtzs.2d v0, v0, #1
+;CHECK-NEXT: ret
+	%tmp3 = call <2 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double> %A, i32 1)
+	ret <2 x i64> %tmp3
+}
+
+declare <2 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float>, i32) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float>, i32) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double>, i32) nounwind readnone
+
+define <2 x i32> @fcvtzuc_2s(<2 x float> %A) nounwind {
+;CHECK-LABEL: fcvtzuc_2s:
+;CHECK-NOT: ld1
+;CHECK: fcvtzu.2s v0, v0, #1
+;CHECK-NEXT: ret
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> %A, i32 1)
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @fcvtzuc_4s(<4 x float> %A) nounwind {
+;CHECK-LABEL: fcvtzuc_4s:
+;CHECK-NOT: ld1
+;CHECK: fcvtzu.4s v0, v0, #1
+;CHECK-NEXT: ret
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> %A, i32 1)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @fcvtzuc_2d(<2 x double> %A) nounwind {
+;CHECK-LABEL: fcvtzuc_2d:
+;CHECK-NOT: ld1
+;CHECK: fcvtzu.2d v0, v0, #1
+;CHECK-NEXT: ret
+	%tmp3 = call <2 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double> %A, i32 1)
+	ret <2 x i64> %tmp3
+}
+
+declare <2 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float>, i32) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double>, i32) nounwind readnone
+
+define <2 x float> @scvtf_2sc(<2 x i32> %A) nounwind {
+;CHECK-LABEL: scvtf_2sc:
+;CHECK-NOT: ld1
+;CHECK: scvtf.2s v0, v0, #1
+;CHECK-NEXT: ret
+	%tmp3 = call <2 x float> @llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> %A, i32 1)
+	ret <2 x float> %tmp3
+}
+
+define <4 x float> @scvtf_4sc(<4 x i32> %A) nounwind {
+;CHECK-LABEL: scvtf_4sc:
+;CHECK-NOT: ld1
+;CHECK: scvtf.4s v0, v0, #1
+;CHECK-NEXT: ret
+	%tmp3 = call <4 x float> @llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> %A, i32 1)
+	ret <4 x float> %tmp3
+}
+
+define <2 x double> @scvtf_2dc(<2 x i64> %A) nounwind {
+;CHECK-LABEL: scvtf_2dc:
+;CHECK-NOT: ld1
+;CHECK: scvtf.2d v0, v0, #1
+;CHECK-NEXT: ret
+	%tmp3 = call <2 x double> @llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64> %A, i32 1)
+	ret <2 x double> %tmp3
+}
+
+declare <2 x float> @llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64>, i32) nounwind readnone
+
+define <2 x float> @ucvtf_2sc(<2 x i32> %A) nounwind {
+;CHECK-LABEL: ucvtf_2sc:
+;CHECK-NOT: ld1
+;CHECK: ucvtf.2s v0, v0, #1
+;CHECK-NEXT: ret
+	%tmp3 = call <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> %A, i32 1)
+	ret <2 x float> %tmp3
+}
+
+define <4 x float> @ucvtf_4sc(<4 x i32> %A) nounwind {
+;CHECK-LABEL: ucvtf_4sc:
+;CHECK-NOT: ld1
+;CHECK: ucvtf.4s v0, v0, #1
+;CHECK-NEXT: ret
+	%tmp3 = call <4 x float> @llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> %A, i32 1)
+	ret <4 x float> %tmp3
+}
+
+define <2 x double> @ucvtf_2dc(<2 x i64> %A) nounwind {
+;CHECK-LABEL: ucvtf_2dc:
+;CHECK-NOT: ld1
+;CHECK: ucvtf.2d v0, v0, #1
+;CHECK-NEXT: ret
+	%tmp3 = call <2 x double> @llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64> %A, i32 1)
+	ret <2 x double> %tmp3
+}
+
+
+;CHECK-LABEL: autogen_SD28458:
+;CHECK: fcvt
+;CHECK: ret
+define void @autogen_SD28458() {
+  %Tr53 = fptrunc <8 x double> undef to <8 x float>
+  store <8 x float> %Tr53, <8 x float>* undef
+  ret void
+}
+
+;CHECK-LABEL: autogen_SD19225:
+;CHECK: fcvt
+;CHECK: ret
+define void @autogen_SD19225() {
+  %A = load <8 x float>* undef
+  %Tr53 = fpext <8 x float> %A to <8 x double>
+  store <8 x double> %Tr53, <8 x double>* undef
+  ret void
+}
+
+declare <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64>, i32) nounwind readnone
diff --git a/test/CodeGen/AArch64/arm64-vcvt_f.ll b/test/CodeGen/AArch64/arm64-vcvt_f.ll
new file mode 100644
index 0000000..d244958
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-vcvt_f.ll
@@ -0,0 +1,82 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -O0 -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+define <2 x double> @test_vcvt_f64_f32(<2 x float> %x) nounwind readnone ssp {
+; CHECK-LABEL: test_vcvt_f64_f32:
+  %vcvt1.i = fpext <2 x float> %x to <2 x double>
+; CHECK: fcvtl	v0.2d, v0.2s
+  ret <2 x double> %vcvt1.i
+; CHECK: ret
+}
+
+define <2 x double> @test_vcvt_high_f64_f32(<4 x float> %x) nounwind readnone ssp {
+; CHECK-LABEL: test_vcvt_high_f64_f32:
+  %cvt_in = shufflevector <4 x float> %x, <4 x float> undef, <2 x i32> <i32 2, i32 3>
+  %vcvt1.i = fpext <2 x float> %cvt_in to <2 x double>
+; CHECK: fcvtl2	v0.2d, v0.4s
+  ret <2 x double> %vcvt1.i
+; CHECK: ret
+}
+
+define <2 x float> @test_vcvt_f32_f64(<2 x double> %v) nounwind readnone ssp {
+; CHECK-LABEL: test_vcvt_f32_f64:
+  %vcvt1.i = fptrunc <2 x double> %v to <2 x float>
+; CHECK: fcvtn
+  ret <2 x float> %vcvt1.i
+; CHECK: ret
+}
+
+define <4 x float> @test_vcvt_high_f32_f64(<2 x float> %x, <2 x double> %v) nounwind readnone ssp {
+; CHECK-LABEL: test_vcvt_high_f32_f64:
+
+  %cvt = fptrunc <2 x double> %v to <2 x float>
+  %vcvt2.i = shufflevector <2 x float> %x, <2 x float> %cvt, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK: fcvtn2
+  ret <4 x float> %vcvt2.i
+; CHECK: ret
+}
+
+define <2 x float> @test_vcvtx_f32_f64(<2 x double> %v) nounwind readnone ssp {
+; CHECK-LABEL: test_vcvtx_f32_f64:
+  %vcvtx1.i = tail call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %v) nounwind
+; CHECK: fcvtxn
+  ret <2 x float> %vcvtx1.i
+; CHECK: ret
+}
+
+define <4 x float> @test_vcvtx_high_f32_f64(<2 x float> %x, <2 x double> %v) nounwind readnone ssp {
+; CHECK-LABEL: test_vcvtx_high_f32_f64:
+  %vcvtx2.i = tail call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %v) nounwind
+  %res = shufflevector <2 x float> %x, <2 x float> %vcvtx2.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK: fcvtxn2
+  ret <4 x float> %res
+; CHECK: ret
+}
+
+
+declare <2 x double> @llvm.aarch64.neon.vcvthighfp2df(<4 x float>) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.vcvtfp2df(<2 x float>) nounwind readnone
+
+declare <2 x float> @llvm.aarch64.neon.vcvtdf2fp(<2 x double>) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.vcvthighdf2fp(<2 x float>, <2 x double>) nounwind readnone
+
+declare <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double>) nounwind readnone
+
+define i16 @to_half(float %in) {
+; CHECK-LABEL: to_half:
+; CHECK: fcvt h[[HALFVAL:[0-9]+]], s0
+; CHECK: fmov {{w[0-9]+}}, {{s[0-9]+}}
+  %res = call i16 @llvm.convert.to.fp16(float %in)
+  ret i16 %res
+}
+
+define float @from_half(i16 %in) {
+; CHECK-LABEL: from_half:
+; CHECK: fmov s[[HALFVAL:[0-9]+]], {{w[0-9]+}}
+; CHECK: fcvt s0, h[[HALFVAL]]
+  %res = call float @llvm.convert.from.fp16(i16 %in)
+  ret float %res
+}
+
+declare float @llvm.convert.from.fp16(i16) #1
+declare i16 @llvm.convert.to.fp16(float) #1
diff --git a/test/CodeGen/AArch64/arm64-vcvt_f32_su32.ll b/test/CodeGen/AArch64/arm64-vcvt_f32_su32.ll
new file mode 100644
index 0000000..1eb7b43
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-vcvt_f32_su32.ll
@@ -0,0 +1,73 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+define <2 x float> @ucvt(<2 x i32> %a) nounwind readnone ssp {
+; CHECK-LABEL: ucvt:
+; CHECK: ucvtf.2s  v0, v0
+; CHECK: ret
+
+  %vcvt.i = uitofp <2 x i32> %a to <2 x float>
+  ret <2 x float> %vcvt.i
+}
+
+define <2 x float> @scvt(<2 x i32> %a) nounwind readnone ssp {
+; CHECK-LABEL: scvt:
+; CHECK: scvtf.2s  v0, v0
+; CHECK: ret
+  %vcvt.i = sitofp <2 x i32> %a to <2 x float>
+  ret <2 x float> %vcvt.i
+}
+
+define <4 x float> @ucvtq(<4 x i32> %a) nounwind readnone ssp {
+; CHECK-LABEL: ucvtq:
+; CHECK: ucvtf.4s  v0, v0
+; CHECK: ret
+  %vcvt.i = uitofp <4 x i32> %a to <4 x float>
+  ret <4 x float> %vcvt.i
+}
+
+define <4 x float> @scvtq(<4 x i32> %a) nounwind readnone ssp {
+; CHECK-LABEL: scvtq:
+; CHECK: scvtf.4s  v0, v0
+; CHECK: ret
+  %vcvt.i = sitofp <4 x i32> %a to <4 x float>
+  ret <4 x float> %vcvt.i
+}
+
+define <4 x float> @cvtf16(<4 x i16> %a) nounwind readnone ssp {
+; CHECK-LABEL: cvtf16:
+; CHECK: fcvtl  v0.4s, v0.4h
+; CHECK-NEXT: ret
+  %vcvt1.i = tail call <4 x float> @llvm.aarch64.neon.vcvthf2fp(<4 x i16> %a) nounwind
+  ret <4 x float> %vcvt1.i
+}
+
+define <4 x float> @cvtf16_high(<8 x i16> %a) nounwind readnone ssp {
+; CHECK-LABEL: cvtf16_high:
+; CHECK: fcvtl2  v0.4s, v0.8h
+; CHECK-NEXT: ret
+  %in = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vcvt1.i = tail call <4 x float> @llvm.aarch64.neon.vcvthf2fp(<4 x i16> %in) nounwind
+  ret <4 x float> %vcvt1.i
+}
+
+
+
+define <4 x i16> @cvtf16f32(<4 x float> %a) nounwind readnone ssp {
+; CHECK-LABEL: cvtf16f32:
+; CHECK: fcvtn  v0.4h, v0.4s
+; CHECK-NEXT: ret
+  %vcvt1.i = tail call <4 x i16> @llvm.aarch64.neon.vcvtfp2hf(<4 x float> %a) nounwind
+  ret <4 x i16> %vcvt1.i
+}
+
+define <8 x i16> @cvtf16f32_high(<4 x i16> %low, <4 x float> %high_big) {
+; CHECK-LABEL: cvtf16f32_high:
+; CHECK: fcvtn2 v0.8h, v1.4s
+; CHECK-NEXT: ret
+  %high = call <4 x i16> @llvm.aarch64.neon.vcvtfp2hf(<4 x float> %high_big)
+  %res = shufflevector <4 x i16> %low, <4 x i16> %high, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i16> %res
+}
+
+declare <4 x float> @llvm.aarch64.neon.vcvthf2fp(<4 x i16>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.vcvtfp2hf(<4 x float>) nounwind readnone
diff --git a/test/CodeGen/AArch64/arm64-vcvt_n.ll b/test/CodeGen/AArch64/arm64-vcvt_n.ll
new file mode 100644
index 0000000..7ed5be6
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-vcvt_n.ll
@@ -0,0 +1,49 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+define <2 x float> @cvtf32fxpu(<2 x i32> %a) nounwind readnone ssp {
+; CHECK-LABEL: cvtf32fxpu:
+; CHECK: ucvtf.2s	v0, v0, #9
+; CHECK: ret
+  %vcvt_n1 = tail call <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> %a, i32 9)
+  ret <2 x float> %vcvt_n1
+}
+
+define <2 x float> @cvtf32fxps(<2 x i32> %a) nounwind readnone ssp {
+; CHECK-LABEL: cvtf32fxps:
+; CHECK: scvtf.2s	v0, v0, #12
+; CHECK: ret
+  %vcvt_n1 = tail call <2 x float> @llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> %a, i32 12)
+  ret <2 x float> %vcvt_n1
+}
+
+define <4 x float> @cvtqf32fxpu(<4 x i32> %a) nounwind readnone ssp {
+; CHECK-LABEL: cvtqf32fxpu:
+; CHECK: ucvtf.4s	v0, v0, #18
+; CHECK: ret
+  %vcvt_n1 = tail call <4 x float> @llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> %a, i32 18)
+  ret <4 x float> %vcvt_n1
+}
+
+define <4 x float> @cvtqf32fxps(<4 x i32> %a) nounwind readnone ssp {
+; CHECK-LABEL: cvtqf32fxps:
+; CHECK: scvtf.4s	v0, v0, #30
+; CHECK: ret
+  %vcvt_n1 = tail call <4 x float> @llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> %a, i32 30)
+  ret <4 x float> %vcvt_n1
+}
+define <2 x double> @f1(<2 x i64> %a) nounwind readnone ssp {
+  %vcvt_n1 = tail call <2 x double> @llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64> %a, i32 12)
+  ret <2 x double> %vcvt_n1
+}
+
+define <2 x double> @f2(<2 x i64> %a) nounwind readnone ssp {
+  %vcvt_n1 = tail call <2 x double> @llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64> %a, i32 9)
+  ret <2 x double> %vcvt_n1
+}
+
+declare <4 x float> @llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone
+declare <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone
+declare <2 x float> @llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64>, i32) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64>, i32) nounwind readnone
diff --git a/test/CodeGen/AArch64/arm64-vcvt_su32_f32.ll b/test/CodeGen/AArch64/arm64-vcvt_su32_f32.ll
new file mode 100644
index 0000000..985a5f7
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-vcvt_su32_f32.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+define <2 x i32> @c1(<2 x float> %a) nounwind readnone ssp {
+; CHECK: c1
+; CHECK: fcvtzs.2s	v0, v0
+; CHECK: ret
+  %vcvt.i = fptosi <2 x float> %a to <2 x i32>
+  ret <2 x i32> %vcvt.i
+}
+
+define <2 x i32> @c2(<2 x float> %a) nounwind readnone ssp {
+; CHECK: c2
+; CHECK: fcvtzu.2s	v0, v0
+; CHECK: ret
+  %vcvt.i = fptoui <2 x float> %a to <2 x i32>
+  ret <2 x i32> %vcvt.i
+}
+
+define <4 x i32> @c3(<4 x float> %a) nounwind readnone ssp {
+; CHECK: c3
+; CHECK: fcvtzs.4s	v0, v0
+; CHECK: ret
+  %vcvt.i = fptosi <4 x float> %a to <4 x i32>
+  ret <4 x i32> %vcvt.i
+}
+
+define <4 x i32> @c4(<4 x float> %a) nounwind readnone ssp {
+; CHECK: c4
+; CHECK: fcvtzu.4s	v0, v0
+; CHECK: ret
+  %vcvt.i = fptoui <4 x float> %a to <4 x i32>
+  ret <4 x i32> %vcvt.i
+}
+
diff --git a/test/CodeGen/AArch64/arm64-vcvtxd_f32_f64.ll b/test/CodeGen/AArch64/arm64-vcvtxd_f32_f64.ll
new file mode 100644
index 0000000..b29c22c
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-vcvtxd_f32_f64.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=arm64 | FileCheck %s
+
+define float @fcvtxn(double %a) {
+; CHECK-LABEL: fcvtxn:
+; CHECK: fcvtxn s0, d0
+; CHECK-NEXT: ret
+  %vcvtxd.i = tail call float @llvm.aarch64.sisd.fcvtxn(double %a) nounwind
+  ret float %vcvtxd.i
+}
+
+declare float @llvm.aarch64.sisd.fcvtxn(double) nounwind readnone
diff --git a/test/CodeGen/AArch64/arm64-vecCmpBr.ll b/test/CodeGen/AArch64/arm64-vecCmpBr.ll
new file mode 100644
index 0000000..c7321e4
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-vecCmpBr.ll
@@ -0,0 +1,207 @@
+; RUN: llc -march=arm64 -aarch64-neon-syntax=apple < %s -mcpu=cyclone | FileCheck %s
+; ModuleID = 'arm64_vecCmpBr.c'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128"
+target triple = "arm64-apple-ios3.0.0"
+
+
+define i32 @anyZero64(<4 x i16> %a) #0 {
+; CHECK: _anyZero64:
+; CHECK: uminv.8b b[[REGNO1:[0-9]+]], v0
+; CHECK-NEXT: fmov w[[REGNO2:[0-9]+]], s[[REGNO1]]
+; CHECK-NEXT: cbz w[[REGNO2]], [[LABEL:[A-Z_0-9]+]]
+; CHECK: [[LABEL]]:
+; CHECK-NEXT: b _bar
+entry:
+  %0 = bitcast <4 x i16> %a to <8 x i8>
+  %vminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8> %0) #3
+  %1 = trunc i32 %vminv.i to i8
+  %tobool = icmp eq i8 %1, 0
+  br i1 %tobool, label %if.then, label %return
+
+if.then:                                          ; preds = %entry
+  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() #4
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
+  ret i32 %retval.0
+}
+
+declare i32 @bar(...) #1
+
+define i32 @anyZero128(<8 x i16> %a) #0 {
+; CHECK: _anyZero128:
+; CHECK: uminv.16b b[[REGNO1:[0-9]+]], v0
+; CHECK-NEXT: fmov w[[REGNO2:[0-9]+]], s[[REGNO1]]
+; CHECK-NEXT: cbz w[[REGNO2]], [[LABEL:[A-Z_0-9]+]]
+; CHECK: [[LABEL]]:
+; CHECK-NEXT: b _bar
+
+entry:
+  %0 = bitcast <8 x i16> %a to <16 x i8>
+  %vminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8> %0) #3
+  %1 = trunc i32 %vminv.i to i8
+  %tobool = icmp eq i8 %1, 0
+  br i1 %tobool, label %if.then, label %return
+
+if.then:                                          ; preds = %entry
+  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() #4
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
+  ret i32 %retval.0
+}
+
+define i32 @anyNonZero64(<4 x i16> %a) #0 {
+; CHECK: _anyNonZero64:
+; CHECK: umaxv.8b b[[REGNO1:[0-9]+]], v0
+; CHECK-NEXT: fmov w[[REGNO2:[0-9]+]], s[[REGNO1]]
+; CHECK-NEXT: cbz w[[REGNO2]], [[LABEL:[A-Z_0-9]+]]
+; CHECK: [[LABEL]]:
+; CHECK-NEXT: movz w0, #0
+
+entry:
+  %0 = bitcast <4 x i16> %a to <8 x i8>
+  %vmaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8> %0) #3
+  %1 = trunc i32 %vmaxv.i to i8
+  %tobool = icmp eq i8 %1, 0
+  br i1 %tobool, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() #4
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
+  ret i32 %retval.0
+}
+
+define i32 @anyNonZero128(<8 x i16> %a) #0 {
+; CHECK: _anyNonZero128:
+; CHECK: umaxv.16b b[[REGNO1:[0-9]+]], v0
+; CHECK-NEXT: fmov w[[REGNO2:[0-9]+]], s[[REGNO1]]
+; CHECK-NEXT: cbz w[[REGNO2]], [[LABEL:[A-Z_0-9]+]]
+; CHECK: [[LABEL]]:
+; CHECK-NEXT: movz w0, #0
+entry:
+  %0 = bitcast <8 x i16> %a to <16 x i8>
+  %vmaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8> %0) #3
+  %1 = trunc i32 %vmaxv.i to i8
+  %tobool = icmp eq i8 %1, 0
+  br i1 %tobool, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() #4
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
+  ret i32 %retval.0
+}
+
+define i32 @allZero64(<4 x i16> %a) #0 {
+; CHECK: _allZero64:
+; CHECK: umaxv.8b b[[REGNO1:[0-9]+]], v0
+; CHECK-NEXT: fmov w[[REGNO2:[0-9]+]], s[[REGNO1]]
+; CHECK-NEXT: cbz w[[REGNO2]], [[LABEL:[A-Z_0-9]+]]
+; CHECK: [[LABEL]]:
+; CHECK-NEXT: b _bar
+entry:
+  %0 = bitcast <4 x i16> %a to <8 x i8>
+  %vmaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8> %0) #3
+  %1 = trunc i32 %vmaxv.i to i8
+  %tobool = icmp eq i8 %1, 0
+  br i1 %tobool, label %if.then, label %return
+
+if.then:                                          ; preds = %entry
+  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() #4
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
+  ret i32 %retval.0
+}
+
+define i32 @allZero128(<8 x i16> %a) #0 {
+; CHECK: _allZero128:
+; CHECK: umaxv.16b b[[REGNO1:[0-9]+]], v0
+; CHECK-NEXT: fmov w[[REGNO2:[0-9]+]], s[[REGNO1]]
+; CHECK-NEXT: cbz w[[REGNO2]], [[LABEL:[A-Z_0-9]+]]
+; CHECK: [[LABEL]]:
+; CHECK-NEXT: b _bar
+entry:
+  %0 = bitcast <8 x i16> %a to <16 x i8>
+  %vmaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8> %0) #3
+  %1 = trunc i32 %vmaxv.i to i8
+  %tobool = icmp eq i8 %1, 0
+  br i1 %tobool, label %if.then, label %return
+
+if.then:                                          ; preds = %entry
+  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() #4
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
+  ret i32 %retval.0
+}
+
+define i32 @allNonZero64(<4 x i16> %a) #0 {
+; CHECK: _allNonZero64:
+; CHECK: uminv.8b b[[REGNO1:[0-9]+]], v0
+; CHECK-NEXT: fmov w[[REGNO2:[0-9]+]], s[[REGNO1]]
+; CHECK-NEXT: cbz w[[REGNO2]], [[LABEL:[A-Z_0-9]+]]
+; CHECK: [[LABEL]]:
+; CHECK-NEXT: movz w0, #0
+entry:
+  %0 = bitcast <4 x i16> %a to <8 x i8>
+  %vminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8> %0) #3
+  %1 = trunc i32 %vminv.i to i8
+  %tobool = icmp eq i8 %1, 0
+  br i1 %tobool, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() #4
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
+  ret i32 %retval.0
+}
+
+define i32 @allNonZero128(<8 x i16> %a) #0 {
+; CHECK: _allNonZero128:
+; CHECK: uminv.16b b[[REGNO1:[0-9]+]], v0
+; CHECK-NEXT: fmov w[[REGNO2:[0-9]+]], s[[REGNO1]]
+; CHECK-NEXT: cbz w[[REGNO2]], [[LABEL:[A-Z_0-9]+]]
+; CHECK: [[LABEL]]:
+; CHECK-NEXT: movz w0, #0
+entry:
+  %0 = bitcast <8 x i16> %a to <16 x i8>
+  %vminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8> %0) #3
+  %1 = trunc i32 %vminv.i to i8
+  %tobool = icmp eq i8 %1, 0
+  br i1 %tobool, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() #4
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
+  ret i32 %retval.0
+}
+
+declare i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8>) #2
+
+declare i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8>) #2
+
+declare i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8>) #2
+
+declare i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8>) #2
+
+attributes #0 = { nounwind ssp "target-cpu"="cyclone" }
+attributes #1 = { "target-cpu"="cyclone" }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind }
+attributes #4 = { nobuiltin nounwind }
diff --git a/test/CodeGen/AArch64/arm64-vecFold.ll b/test/CodeGen/AArch64/arm64-vecFold.ll
new file mode 100644
index 0000000..aeacfcc
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-vecFold.ll
@@ -0,0 +1,145 @@
+; RUN: llc -march=arm64 -aarch64-neon-syntax=apple -o - %s| FileCheck %s
+
+define <16 x i8> @foov16i8(<8 x i16> %a0, <8 x i16> %b0) nounwind readnone ssp {
+; CHECK-LABEL: foov16i8:
+  %vshrn_low_shift = lshr <8 x i16> %a0, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
+  %vshrn_low = trunc <8 x i16> %vshrn_low_shift to <8 x i8>
+  %vshrn_high_shift = lshr <8 x i16> %b0, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
+  %vshrn_high = trunc <8 x i16> %vshrn_high_shift to <8 x i8>
+; CHECK: shrn.8b v0, v0, #5
+; CHECK-NEXT: shrn2.16b v0, v1, #5
+; CHECK-NEXT: ret
+  %1 = bitcast <8 x i8> %vshrn_low to <1 x i64>
+  %2 = bitcast <8 x i8> %vshrn_high to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
+  ret <16 x i8> %3
+}
+
+define <8 x i16> @foov8i16(<4 x i32> %a0, <4 x i32> %b0) nounwind readnone ssp {
+; CHECK-LABEL: foov8i16:
+  %vshrn_low_shift = lshr <4 x i32> %a0, <i32 5, i32 5, i32 5, i32 5>
+  %vshrn_low = trunc <4 x i32> %vshrn_low_shift to <4 x i16>
+  %vshrn_high_shift = lshr <4 x i32> %b0, <i32 5, i32 5, i32 5, i32 5>
+  %vshrn_high = trunc <4 x i32> %vshrn_high_shift to <4 x i16>
+; CHECK: shrn.4h v0, v0, #5
+; CHECK-NEXT: shrn2.8h v0, v1, #5
+; CHECK-NEXT: ret
+  %1 = bitcast <4 x i16> %vshrn_low to <1 x i64>
+  %2 = bitcast <4 x i16> %vshrn_high to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %3
+}
+
+define <4 x i32> @foov4i32(<2 x i64> %a0, <2 x i64> %b0) nounwind readnone ssp {
+; CHECK-LABEL: foov4i32:
+  %vshrn_low_shift = lshr <2 x i64> %a0, <i64 5, i64 5>
+  %vshrn_low = trunc <2 x i64> %vshrn_low_shift to <2 x i32>
+  %vshrn_high_shift = lshr <2 x i64> %b0, <i64 5, i64 5>
+  %vshrn_high = trunc <2 x i64> %vshrn_high_shift to <2 x i32>
+; CHECK: shrn.2s v0, v0, #5
+; CHECK-NEXT: shrn2.4s v0, v1, #5
+; CHECK-NEXT: ret
+  %1 = bitcast <2 x i32> %vshrn_low to <1 x i64>
+  %2 = bitcast <2 x i32> %vshrn_high to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
+  ret <4 x i32> %3
+}
+
+define <8 x i16> @bar(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %b0, <4 x i32> %b1) nounwind readnone ssp {
+; CHECK-LABEL: bar:
+  %vaddhn2.i = tail call <4 x i16> @llvm.aarch64.neon.addhn.v4i16(<4 x i32> %a0, <4 x i32> %a1) nounwind
+  %vaddhn2.i10 = tail call <4 x i16> @llvm.aarch64.neon.addhn.v4i16(<4 x i32> %b0, <4 x i32> %b1) nounwind
+; CHECK: addhn.4h	v0, v0, v1
+; CHECK-NEXT: addhn2.8h	v0, v2, v3
+; CHECK-NEXT: ret
+  %1 = bitcast <4 x i16> %vaddhn2.i to <1 x i64>
+  %2 = bitcast <4 x i16> %vaddhn2.i10 to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %3
+}
+
+define <8 x i16> @baz(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %b0, <4 x i32> %b1) nounwind readnone ssp {
+; CHECK-LABEL: baz:
+  %vaddhn2.i = tail call <4 x i16> @llvm.aarch64.neon.addhn.v4i16(<4 x i32> %a0, <4 x i32> %a1) nounwind
+  %vshrn_high_shift = ashr <4 x i32> %b0, <i32 5, i32 5, i32 5, i32 5>
+  %vshrn_high = trunc <4 x i32> %vshrn_high_shift to <4 x i16>
+; CHECK: addhn.4h	v0, v0, v1
+; CHECK-NEXT: shrn2.8h	v0, v2, #5
+; CHECK-NEXT: ret
+  %1 = bitcast <4 x i16> %vaddhn2.i to <1 x i64>
+  %2 = bitcast <4 x i16> %vshrn_high to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %3
+}
+
+define <8 x i16> @raddhn(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %b0, <4 x i32> %b1) nounwind readnone ssp {
+; CHECK-LABEL: raddhn:
+entry:
+; CHECK: 	raddhn.4h	v0, v0, v1
+; CHECK-NEXT: 	raddhn2.8h	v0, v2, v3
+; CHECK-NEXT: 	ret
+  %vraddhn2.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a0, <4 x i32> %a1) nounwind
+  %vraddhn2.i10 = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %b0, <4 x i32> %b1) nounwind
+  %0 = bitcast <4 x i16> %vraddhn2.i to <1 x i64>
+  %1 = bitcast <4 x i16> %vraddhn2.i10 to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %2
+}
+
+define <8 x i16> @vrshrn(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %b0, <8 x i16> %b1) nounwind readnone ssp {
+; CHECK-LABEL: vrshrn:
+; CHECK: rshrn.8b	v0, v0, #5
+; CHECK-NEXT: rshrn2.16b	v0, v2, #6
+; CHECK-NEXT: ret
+  %vrshrn_n1 = tail call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %a0, i32 5)
+  %vrshrn_n4 = tail call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %b0, i32 6)
+  %1 = bitcast <8 x i8> %vrshrn_n1 to <1 x i64>
+  %2 = bitcast <8 x i8> %vrshrn_n4 to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %3
+}
+
+define <8 x i16> @vrsubhn(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %b0, <8 x i16> %b1) nounwind readnone ssp {
+; CHECK-LABEL: vrsubhn:
+; CHECK: rsubhn.8b	v0, v0, v1
+; CHECK: rsubhn2.16b	v0, v2, v3
+; CHECK-NEXT: 	ret
+  %vrsubhn2.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a0, <8 x i16> %a1) nounwind
+  %vrsubhn2.i10 = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %b0, <8 x i16> %b1) nounwind
+  %1 = bitcast <8 x i8> %vrsubhn2.i to <1 x i64>
+  %2 = bitcast <8 x i8> %vrsubhn2.i10 to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %3
+}
+
+define <8 x i16> @noOpt1(<2 x i32> %a0, <2 x i32> %a1, <4 x i32> %b0, <4 x i32> %b1) nounwind readnone ssp {
+; CHECK-LABEL: noOpt1:
+  %vqsub2.i = tail call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> %a0, <2 x i32> %a1) nounwind
+  %vaddhn2.i = tail call <4 x i16> @llvm.aarch64.neon.addhn.v4i16(<4 x i32> %b0, <4 x i32> %b1) nounwind
+; CHECK:	sqsub.2s	v0, v0, v1
+; CHECK-NEXT:	addhn2.8h	v0, v2, v3
+  %1 = bitcast <2 x i32> %vqsub2.i to <1 x i64>
+  %2 = bitcast <4 x i16> %vaddhn2.i to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
+  ret <8 x i16> %3
+}
+
+declare <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8> @llvm.aarch64.neon.shrn.v8i8(<8 x i16>, i32) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.shrn.v4i16(<4 x i32>, i32) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.shrn.v2i32(<2 x i64>, i32) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.addhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16>, i32) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+
diff --git a/test/CodeGen/AArch64/arm64-vector-ext.ll b/test/CodeGen/AArch64/arm64-vector-ext.ll
new file mode 100644
index 0000000..650ff1e
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-vector-ext.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+;CHECK: @func30
+;CHECK: ushll.4s  v0, v0, #0
+;CHECK: movi.4s v1, #0x1
+;CHECK: and.16b v0, v0, v1
+;CHECK: str  q0, [x0]
+;CHECK: ret
+
+%T0_30 = type <4 x i1>
+%T1_30 = type <4 x i32>
+define void @func30(%T0_30 %v0, %T1_30* %p1) {
+  %r = zext %T0_30 %v0 to %T1_30
+  store %T1_30 %r, %T1_30* %p1
+  ret void
+}
diff --git a/test/CodeGen/AArch64/arm64-vector-imm.ll b/test/CodeGen/AArch64/arm64-vector-imm.ll
new file mode 100644
index 0000000..9fb088b
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-vector-imm.ll
@@ -0,0 +1,134 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+define <8 x i8> @v_orrimm(<8 x i8>* %A) nounwind {
+; CHECK-LABEL: v_orrimm:
+; CHECK-NOT: mov
+; CHECK-NOT: mvn
+; CHECK: orr
+	%tmp1 = load <8 x i8>* %A
+	%tmp3 = or <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1>
+	ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @v_orrimmQ(<16 x i8>* %A) nounwind {
+; CHECK: v_orrimmQ
+; CHECK-NOT: mov
+; CHECK-NOT: mvn
+; CHECK: orr
+	%tmp1 = load <16 x i8>* %A
+	%tmp3 = or <16 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1>
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i8> @v_bicimm(<8 x i8>* %A) nounwind {
+; CHECK-LABEL: v_bicimm:
+; CHECK-NOT: mov
+; CHECK-NOT: mvn
+; CHECK: bic
+	%tmp1 = load <8 x i8>* %A
+	%tmp3 = and <8 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0 >
+	ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @v_bicimmQ(<16 x i8>* %A) nounwind {
+; CHECK-LABEL: v_bicimmQ:
+; CHECK-NOT: mov
+; CHECK-NOT: mvn
+; CHECK: bic
+	%tmp1 = load <16 x i8>* %A
+	%tmp3 = and <16 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0 >
+	ret <16 x i8> %tmp3
+}
+
+define <2 x double> @foo(<2 x double> %bar) nounwind {
+; CHECK: foo
+; CHECK: fmov.2d	v1, #1.0000000
+  %add = fadd <2 x double> %bar, <double 1.0, double 1.0>
+  ret <2 x double> %add
+}
+
+define <4 x i32> @movi_4s_imm_t1() nounwind readnone ssp {
+entry:
+; CHECK-LABEL: movi_4s_imm_t1:
+; CHECK: movi.4s v0, #0x4b
+  ret <4 x i32> <i32 75, i32 75, i32 75, i32 75>
+}
+
+define <4 x i32> @movi_4s_imm_t2() nounwind readnone ssp {
+entry:
+; CHECK-LABEL: movi_4s_imm_t2:
+; CHECK: movi.4s v0, #0x4b, lsl #8
+  ret <4 x i32> <i32 19200, i32 19200, i32 19200, i32 19200>
+}
+
+define <4 x i32> @movi_4s_imm_t3() nounwind readnone ssp {
+entry:
+; CHECK-LABEL: movi_4s_imm_t3:
+; CHECK: movi.4s v0, #0x4b, lsl #16
+  ret <4 x i32> <i32 4915200, i32 4915200, i32 4915200, i32 4915200>
+}
+
+define <4 x i32> @movi_4s_imm_t4() nounwind readnone ssp {
+entry:
+; CHECK-LABEL: movi_4s_imm_t4:
+; CHECK: movi.4s v0, #0x4b, lsl #24
+  ret <4 x i32> <i32 1258291200, i32 1258291200, i32 1258291200, i32 1258291200>
+}
+
+define <8 x i16> @movi_8h_imm_t5() nounwind readnone ssp {
+entry:
+; CHECK-LABEL: movi_8h_imm_t5:
+; CHECK: movi.8h v0, #0x4b
+  ret <8 x i16> <i16 75, i16 75, i16 75, i16 75, i16 75, i16 75, i16 75, i16 75>
+}
+
+; rdar://11989841
+define <8 x i16> @movi_8h_imm_t6() nounwind readnone ssp {
+entry:
+; CHECK-LABEL: movi_8h_imm_t6:
+; CHECK: movi.8h v0, #0x4b, lsl #8
+  ret <8 x i16> <i16 19200, i16 19200, i16 19200, i16 19200, i16 19200, i16 19200, i16 19200, i16 19200>
+}
+
+define <4 x i32> @movi_4s_imm_t7() nounwind readnone ssp {
+entry:
+; CHECK-LABEL: movi_4s_imm_t7:
+; CHECK: movi.4s v0, #0x4b, msl #8
+ret <4 x i32> <i32 19455, i32 19455, i32 19455, i32 19455>
+}
+
+define <4 x i32> @movi_4s_imm_t8() nounwind readnone ssp {
+entry:
+; CHECK-LABEL: movi_4s_imm_t8:
+; CHECK: movi.4s v0, #0x4b, msl #16
+ret <4 x i32> <i32 4980735, i32 4980735, i32 4980735, i32 4980735>
+}
+
+define <16 x i8> @movi_16b_imm_t9() nounwind readnone ssp {
+entry:
+; CHECK-LABEL: movi_16b_imm_t9:
+; CHECK: movi.16b v0, #0x4b
+ret <16 x i8> <i8 75, i8 75, i8 75, i8 75, i8 75, i8 75, i8 75, i8 75,
+               i8 75, i8 75, i8 75, i8 75, i8 75, i8 75, i8 75, i8 75>
+}
+
+define <2 x i64> @movi_2d_imm_t10() nounwind readnone ssp {
+entry:
+; CHECK-LABEL: movi_2d_imm_t10:
+; CHECK: movi.2d v0, #0xff00ff00ff00ff
+ret <2 x i64> <i64 71777214294589695, i64 71777214294589695>
+}
+
+define <4 x i32> @movi_4s_imm_t11() nounwind readnone ssp {
+entry:
+; CHECK-LABEL: movi_4s_imm_t11:
+; CHECK: fmov.4s v0, #-0.32812500
+ret <4 x i32> <i32 3198681088, i32 3198681088, i32 3198681088, i32 3198681088>
+}
+
+define <2 x i64> @movi_2d_imm_t12() nounwind readnone ssp {
+entry:
+; CHECK-LABEL: movi_2d_imm_t12:
+; CHECK: fmov.2d v0, #-0.17187500
+ret <2 x i64> <i64 13818732506632945664, i64 13818732506632945664>
+}
diff --git a/test/CodeGen/AArch64/arm64-vector-insertion.ll b/test/CodeGen/AArch64/arm64-vector-insertion.ll
new file mode 100644
index 0000000..8fbff71
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-vector-insertion.ll
@@ -0,0 +1,33 @@
+; RUN: llc -march=arm64 -mcpu=generic -aarch64-neon-syntax=apple < %s | FileCheck %s
+
+define void @test0f(float* nocapture %x, float %a) #0 {
+entry:
+  %0 = insertelement <4 x float> <float undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float %a, i32 0
+  %1 = bitcast float* %x to <4 x float>*
+  store <4 x float> %0, <4 x float>* %1, align 16
+  ret void
+
+  ; CHECK-LABEL: test0f
+  ; CHECK: movi.2d v[[TEMP:[0-9]+]], #0000000000000000
+  ; CHECK: ins.s v[[TEMP]][0], v{{[0-9]+}}[0]
+  ; CHECK: str q[[TEMP]], [x0]
+  ; CHECK: ret
+
+
+}
+
+
+define void @test1f(float* nocapture %x, float %a) #0 {
+entry:
+  %0 = insertelement <4 x float> <float undef, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, float %a, i32 0
+  %1 = bitcast float* %x to <4 x float>*
+  store <4 x float> %0, <4 x float>* %1, align 16
+  ret void
+
+  ; CHECK-LABEL: test1f
+  ; CHECK: fmov  s[[TEMP:[0-9]+]], #1.0000000
+  ; CHECK: dup.4s  v[[TEMP2:[0-9]+]], v[[TEMP]][0]
+  ; CHECK: ins.s v[[TEMP2]][0], v0[0]
+  ; CHECK: str q[[TEMP2]], [x0]
+  ; CHECK: ret
+}
diff --git a/test/CodeGen/AArch64/arm64-vector-ldst.ll b/test/CodeGen/AArch64/arm64-vector-ldst.ll
new file mode 100644
index 0000000..c001915
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-vector-ldst.ll
@@ -0,0 +1,601 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s
+
+; rdar://9428579
+
+%type1 = type { <16 x i8> }
+%type2 = type { <8 x i8> }
+%type3 = type { <4 x i16> }
+
+
+define hidden fastcc void @t1(%type1** %argtable) nounwind {
+entry:
+; CHECK-LABEL: t1:
+; CHECK: ldr x[[REG:[0-9]+]], [x0]
+; CHECK: str q0, [x[[REG]]]
+  %tmp1 = load %type1** %argtable, align 8
+  %tmp2 = getelementptr inbounds %type1* %tmp1, i64 0, i32 0
+  store <16 x i8> zeroinitializer, <16 x i8>* %tmp2, align 16
+  ret void
+}
+
+define hidden fastcc void @t2(%type2** %argtable) nounwind {
+entry:
+; CHECK-LABEL: t2:
+; CHECK: ldr x[[REG:[0-9]+]], [x0]
+; CHECK: str d0, [x[[REG]]]
+  %tmp1 = load %type2** %argtable, align 8
+  %tmp2 = getelementptr inbounds %type2* %tmp1, i64 0, i32 0
+  store <8 x i8> zeroinitializer, <8 x i8>* %tmp2, align 8
+  ret void
+}
+
+; add a bunch of tests for rdar://11246289
+
+@globalArray64x2 = common global <2 x i64>* null, align 8
+@globalArray32x4 = common global <4 x i32>* null, align 8
+@globalArray16x8 = common global <8 x i16>* null, align 8
+@globalArray8x16 = common global <16 x i8>* null, align 8
+@globalArray64x1 = common global <1 x i64>* null, align 8
+@globalArray32x2 = common global <2 x i32>* null, align 8
+@globalArray16x4 = common global <4 x i16>* null, align 8
+@globalArray8x8 = common global <8 x i8>* null, align 8
+@floatglobalArray64x2 = common global <2 x double>* null, align 8
+@floatglobalArray32x4 = common global <4 x float>* null, align 8
+@floatglobalArray64x1 = common global <1 x double>* null, align 8
+@floatglobalArray32x2 = common global <2 x float>* null, align 8
+
+define void @fct1_64x2(<2 x i64>* nocapture %array, i64 %offset) nounwind ssp {
+entry:
+; CHECK-LABEL: fct1_64x2:
+; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4
+; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]
+; CHECK: ldr [[BASE:x[0-9]+]],
+; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
+  %arrayidx = getelementptr inbounds <2 x i64>* %array, i64 %offset
+  %tmp = load <2 x i64>* %arrayidx, align 16
+  %tmp1 = load <2 x i64>** @globalArray64x2, align 8
+  %arrayidx1 = getelementptr inbounds <2 x i64>* %tmp1, i64 %offset
+  store <2 x i64> %tmp, <2 x i64>* %arrayidx1, align 16
+  ret void
+}
+
+define void @fct2_64x2(<2 x i64>* nocapture %array) nounwind ssp {
+entry:
+; CHECK-LABEL: fct2_64x2:
+; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
+; CHECK: ldr [[BASE:x[0-9]+]],
+; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
+  %arrayidx = getelementptr inbounds <2 x i64>* %array, i64 3
+  %tmp = load <2 x i64>* %arrayidx, align 16
+  %tmp1 = load <2 x i64>** @globalArray64x2, align 8
+  %arrayidx1 = getelementptr inbounds <2 x i64>* %tmp1, i64 5
+  store <2 x i64> %tmp, <2 x i64>* %arrayidx1, align 16
+  ret void
+}
+
+define void @fct1_32x4(<4 x i32>* nocapture %array, i64 %offset) nounwind ssp {
+entry:
+; CHECK-LABEL: fct1_32x4:
+; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4
+; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
+; CHECK: ldr [[BASE:x[0-9]+]],
+; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
+  %arrayidx = getelementptr inbounds <4 x i32>* %array, i64 %offset
+  %tmp = load <4 x i32>* %arrayidx, align 16
+  %tmp1 = load <4 x i32>** @globalArray32x4, align 8
+  %arrayidx1 = getelementptr inbounds <4 x i32>* %tmp1, i64 %offset
+  store <4 x i32> %tmp, <4 x i32>* %arrayidx1, align 16
+  ret void
+}
+
+define void @fct2_32x4(<4 x i32>* nocapture %array) nounwind ssp {
+entry:
+; CHECK-LABEL: fct2_32x4:
+; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
+; CHECK: ldr [[BASE:x[0-9]+]],
+; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
+  %arrayidx = getelementptr inbounds <4 x i32>* %array, i64 3
+  %tmp = load <4 x i32>* %arrayidx, align 16
+  %tmp1 = load <4 x i32>** @globalArray32x4, align 8
+  %arrayidx1 = getelementptr inbounds <4 x i32>* %tmp1, i64 5
+  store <4 x i32> %tmp, <4 x i32>* %arrayidx1, align 16
+  ret void
+}
+
+define void @fct1_16x8(<8 x i16>* nocapture %array, i64 %offset) nounwind ssp {
+entry:
+; CHECK-LABEL: fct1_16x8:
+; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4
+; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
+; CHECK: ldr [[BASE:x[0-9]+]],
+; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
+  %arrayidx = getelementptr inbounds <8 x i16>* %array, i64 %offset
+  %tmp = load <8 x i16>* %arrayidx, align 16
+  %tmp1 = load <8 x i16>** @globalArray16x8, align 8
+  %arrayidx1 = getelementptr inbounds <8 x i16>* %tmp1, i64 %offset
+  store <8 x i16> %tmp, <8 x i16>* %arrayidx1, align 16
+  ret void
+}
+
+define void @fct2_16x8(<8 x i16>* nocapture %array) nounwind ssp {
+entry:
+; CHECK-LABEL: fct2_16x8:
+; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
+; CHECK: ldr [[BASE:x[0-9]+]],
+; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
+  %arrayidx = getelementptr inbounds <8 x i16>* %array, i64 3
+  %tmp = load <8 x i16>* %arrayidx, align 16
+  %tmp1 = load <8 x i16>** @globalArray16x8, align 8
+  %arrayidx1 = getelementptr inbounds <8 x i16>* %tmp1, i64 5
+  store <8 x i16> %tmp, <8 x i16>* %arrayidx1, align 16
+  ret void
+}
+
+define void @fct1_8x16(<16 x i8>* nocapture %array, i64 %offset) nounwind ssp {
+entry:
+; CHECK-LABEL: fct1_8x16:
+; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4
+; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
+; CHECK: ldr [[BASE:x[0-9]+]],
+; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
+  %arrayidx = getelementptr inbounds <16 x i8>* %array, i64 %offset
+  %tmp = load <16 x i8>* %arrayidx, align 16
+  %tmp1 = load <16 x i8>** @globalArray8x16, align 8
+  %arrayidx1 = getelementptr inbounds <16 x i8>* %tmp1, i64 %offset
+  store <16 x i8> %tmp, <16 x i8>* %arrayidx1, align 16
+  ret void
+}
+
+define void @fct2_8x16(<16 x i8>* nocapture %array) nounwind ssp {
+entry:
+; CHECK-LABEL: fct2_8x16:
+; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
+; CHECK: ldr [[BASE:x[0-9]+]],
+; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
+  %arrayidx = getelementptr inbounds <16 x i8>* %array, i64 3
+  %tmp = load <16 x i8>* %arrayidx, align 16
+  %tmp1 = load <16 x i8>** @globalArray8x16, align 8
+  %arrayidx1 = getelementptr inbounds <16 x i8>* %tmp1, i64 5
+  store <16 x i8> %tmp, <16 x i8>* %arrayidx1, align 16
+  ret void
+}
+
+define void @fct1_64x1(<1 x i64>* nocapture %array, i64 %offset) nounwind ssp {
+entry:
+; CHECK-LABEL: fct1_64x1:
+; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3
+; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
+; CHECK: ldr [[BASE:x[0-9]+]],
+; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
+  %arrayidx = getelementptr inbounds <1 x i64>* %array, i64 %offset
+  %tmp = load <1 x i64>* %arrayidx, align 8
+  %tmp1 = load <1 x i64>** @globalArray64x1, align 8
+  %arrayidx1 = getelementptr inbounds <1 x i64>* %tmp1, i64 %offset
+  store <1 x i64> %tmp, <1 x i64>* %arrayidx1, align 8
+  ret void
+}
+
+define void @fct2_64x1(<1 x i64>* nocapture %array) nounwind ssp {
+entry:
+; CHECK-LABEL: fct2_64x1:
+; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24]
+; CHECK: ldr [[BASE:x[0-9]+]],
+; CHECK: str [[DEST]], {{\[}}[[BASE]], #40]
+  %arrayidx = getelementptr inbounds <1 x i64>* %array, i64 3
+  %tmp = load <1 x i64>* %arrayidx, align 8
+  %tmp1 = load <1 x i64>** @globalArray64x1, align 8
+  %arrayidx1 = getelementptr inbounds <1 x i64>* %tmp1, i64 5
+  store <1 x i64> %tmp, <1 x i64>* %arrayidx1, align 8
+  ret void
+}
+
+define void @fct1_32x2(<2 x i32>* nocapture %array, i64 %offset) nounwind ssp {
+entry:
+; CHECK-LABEL: fct1_32x2:
+; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3
+; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
+; CHECK: ldr [[BASE:x[0-9]+]],
+; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
+  %arrayidx = getelementptr inbounds <2 x i32>* %array, i64 %offset
+  %tmp = load <2 x i32>* %arrayidx, align 8
+  %tmp1 = load <2 x i32>** @globalArray32x2, align 8
+  %arrayidx1 = getelementptr inbounds <2 x i32>* %tmp1, i64 %offset
+  store <2 x i32> %tmp, <2 x i32>* %arrayidx1, align 8
+  ret void
+}
+
+define void @fct2_32x2(<2 x i32>* nocapture %array) nounwind ssp {
+entry:
+; CHECK-LABEL: fct2_32x2:
+; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24]
+; CHECK: ldr [[BASE:x[0-9]+]],
+; CHECK: str [[DEST]], {{\[}}[[BASE]], #40]
+  %arrayidx = getelementptr inbounds <2 x i32>* %array, i64 3
+  %tmp = load <2 x i32>* %arrayidx, align 8
+  %tmp1 = load <2 x i32>** @globalArray32x2, align 8
+  %arrayidx1 = getelementptr inbounds <2 x i32>* %tmp1, i64 5
+  store <2 x i32> %tmp, <2 x i32>* %arrayidx1, align 8
+  ret void
+}
+
+define void @fct1_16x4(<4 x i16>* nocapture %array, i64 %offset) nounwind ssp {
+entry:
+; CHECK-LABEL: fct1_16x4:
+; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3
+; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
+; CHECK: ldr [[BASE:x[0-9]+]],
+; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
+  %arrayidx = getelementptr inbounds <4 x i16>* %array, i64 %offset
+  %tmp = load <4 x i16>* %arrayidx, align 8
+  %tmp1 = load <4 x i16>** @globalArray16x4, align 8
+  %arrayidx1 = getelementptr inbounds <4 x i16>* %tmp1, i64 %offset
+  store <4 x i16> %tmp, <4 x i16>* %arrayidx1, align 8
+  ret void
+}
+
+define void @fct2_16x4(<4 x i16>* nocapture %array) nounwind ssp {
+entry:
+; CHECK-LABEL: fct2_16x4:
+; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24]
+; CHECK: ldr [[BASE:x[0-9]+]],
+; CHECK: str [[DEST]], {{\[}}[[BASE]], #40]
+  %arrayidx = getelementptr inbounds <4 x i16>* %array, i64 3
+  %tmp = load <4 x i16>* %arrayidx, align 8
+  %tmp1 = load <4 x i16>** @globalArray16x4, align 8
+  %arrayidx1 = getelementptr inbounds <4 x i16>* %tmp1, i64 5
+  store <4 x i16> %tmp, <4 x i16>* %arrayidx1, align 8
+  ret void
+}
+
+define void @fct1_8x8(<8 x i8>* nocapture %array, i64 %offset) nounwind ssp {
+entry:
+; CHECK-LABEL: fct1_8x8:
+; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3
+; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
+; CHECK: ldr [[BASE:x[0-9]+]],
+; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
+  %arrayidx = getelementptr inbounds <8 x i8>* %array, i64 %offset
+  %tmp = load <8 x i8>* %arrayidx, align 8
+  %tmp1 = load <8 x i8>** @globalArray8x8, align 8
+  %arrayidx1 = getelementptr inbounds <8 x i8>* %tmp1, i64 %offset
+  store <8 x i8> %tmp, <8 x i8>* %arrayidx1, align 8
+  ret void
+}
+
+; Add a bunch of tests for rdar://13258794: Match LDUR/STUR for D and Q
+; registers for unscaled vector accesses
+@str = global [63 x i8] c"Test case for rdar://13258794: LDUR/STUR for D and Q registers\00", align 1
+
+define <1 x i64> @fct0() nounwind readonly ssp {
+entry:
+; CHECK-LABEL: fct0:
+; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
+  %0 = load <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <1 x i64>*), align 8
+  ret <1 x i64> %0
+}
+
+define <2 x i32> @fct1() nounwind readonly ssp {
+entry:
+; CHECK-LABEL: fct1:
+; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
+  %0 = load <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <2 x i32>*), align 8
+  ret <2 x i32> %0
+}
+
+define <4 x i16> @fct2() nounwind readonly ssp {
+entry:
+; CHECK-LABEL: fct2:
+; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
+  %0 = load <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <4 x i16>*), align 8
+  ret <4 x i16> %0
+}
+
+define <8 x i8> @fct3() nounwind readonly ssp {
+entry:
+; CHECK-LABEL: fct3:
+; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
+  %0 = load <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <8 x i8>*), align 8
+  ret <8 x i8> %0
+}
+
+define <2 x i64> @fct4() nounwind readonly ssp {
+entry:
+; CHECK-LABEL: fct4:
+; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
+  %0 = load <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <2 x i64>*), align 16
+  ret <2 x i64> %0
+}
+
+define <4 x i32> @fct5() nounwind readonly ssp {
+entry:
+; CHECK-LABEL: fct5:
+; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
+  %0 = load <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <4 x i32>*), align 16
+  ret <4 x i32> %0
+}
+
+define <8 x i16> @fct6() nounwind readonly ssp {
+entry:
+; CHECK-LABEL: fct6:
+; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
+  %0 = load <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <8 x i16>*), align 16
+  ret <8 x i16> %0
+}
+
+define <16 x i8> @fct7() nounwind readonly ssp {
+entry:
+; CHECK-LABEL: fct7:
+; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
+  %0 = load <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <16 x i8>*), align 16
+  ret <16 x i8> %0
+}
+
+define void @fct8() nounwind ssp {
+entry:
+; CHECK-LABEL: fct8:
+; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
+; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
+  %0 = load <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <1 x i64>*), align 8
+  store <1 x i64> %0, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <1 x i64>*), align 8
+  ret void
+}
+
+define void @fct9() nounwind ssp {
+entry:
+; CHECK-LABEL: fct9:
+; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
+; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
+  %0 = load <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <2 x i32>*), align 8
+  store <2 x i32> %0, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <2 x i32>*), align 8
+  ret void
+}
+
+define void @fct10() nounwind ssp {
+entry:
+; CHECK-LABEL: fct10:
+; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
+; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
+  %0 = load <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <4 x i16>*), align 8
+  store <4 x i16> %0, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <4 x i16>*), align 8
+  ret void
+}
+
+define void @fct11() nounwind ssp {
+entry:
+; CHECK-LABEL: fct11:
+; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
+; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
+  %0 = load <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <8 x i8>*), align 8
+  store <8 x i8> %0, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <8 x i8>*), align 8
+  ret void
+}
+
+define void @fct12() nounwind ssp {
+entry:
+; CHECK-LABEL: fct12:
+; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
+; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
+  %0 = load <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <2 x i64>*), align 16
+  store <2 x i64> %0, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <2 x i64>*), align 16
+  ret void
+}
+
+define void @fct13() nounwind ssp {
+entry:
+; CHECK-LABEL: fct13:
+; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
+; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
+  %0 = load <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <4 x i32>*), align 16
+  store <4 x i32> %0, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <4 x i32>*), align 16
+  ret void
+}
+
+define void @fct14() nounwind ssp {
+entry:
+; CHECK-LABEL: fct14:
+; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
+; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
+  %0 = load <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <8 x i16>*), align 16
+  store <8 x i16> %0, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <8 x i16>*), align 16
+  ret void
+}
+
+define void @fct15() nounwind ssp {
+entry:
+; CHECK-LABEL: fct15:
+; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
+; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
+  %0 = load <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <16 x i8>*), align 16
+  store <16 x i8> %0, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <16 x i8>*), align 16
+  ret void
+}
+
+; Check the building of vector from a single loaded value.
+; Part of <rdar://problem/14170854>
+;
+; Single loads with immediate offset.
+define <8 x i8> @fct16(i8* nocapture %sp0) {
+; CHECK-LABEL: fct16:
+; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
+; CHECK-NEXT: mul.8b v0, v[[REGNUM]], v[[REGNUM]]
+entry:
+  %addr = getelementptr i8* %sp0, i64 1
+  %pix_sp0.0.copyload = load i8* %addr, align 1
+  %vec = insertelement <8 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
+  %vmull.i = mul <8 x i8> %vec, %vec
+  ret <8 x i8> %vmull.i
+}
+
+define <16 x i8> @fct17(i8* nocapture %sp0) {
+; CHECK-LABEL: fct17:
+; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
+; CHECK-NEXT: mul.16b v0, v[[REGNUM]], v[[REGNUM]]
+entry:
+  %addr = getelementptr i8* %sp0, i64 1
+  %pix_sp0.0.copyload = load i8* %addr, align 1
+  %vec = insertelement <16 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
+  %vmull.i = mul <16 x i8> %vec, %vec
+  ret <16 x i8> %vmull.i
+}
+
+define <4 x i16> @fct18(i16* nocapture %sp0) {
+; CHECK-LABEL: fct18:
+; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
+; CHECK-NEXT: mul.4h v0, v[[REGNUM]], v[[REGNUM]]
+entry:
+  %addr = getelementptr i16* %sp0, i64 1
+  %pix_sp0.0.copyload = load i16* %addr, align 1
+  %vec = insertelement <4 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
+  %vmull.i = mul <4 x i16> %vec, %vec
+  ret <4 x i16> %vmull.i
+}
+
+define <8 x i16> @fct19(i16* nocapture %sp0) {
+; CHECK-LABEL: fct19:
+; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
+; CHECK-NEXT: mul.8h v0, v[[REGNUM]], v[[REGNUM]]
+entry:
+  %addr = getelementptr i16* %sp0, i64 1
+  %pix_sp0.0.copyload = load i16* %addr, align 1
+  %vec = insertelement <8 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
+  %vmull.i = mul <8 x i16> %vec, %vec
+  ret <8 x i16> %vmull.i
+}
+
+define <2 x i32> @fct20(i32* nocapture %sp0) {
+; CHECK-LABEL: fct20:
+; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
+; CHECK-NEXT: mul.2s v0, v[[REGNUM]], v[[REGNUM]]
+entry:
+  %addr = getelementptr i32* %sp0, i64 1
+  %pix_sp0.0.copyload = load i32* %addr, align 1
+  %vec = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
+  %vmull.i = mul <2 x i32> %vec, %vec
+  ret <2 x i32> %vmull.i
+}
+
+define <4 x i32> @fct21(i32* nocapture %sp0) {
+; CHECK-LABEL: fct21:
+; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
+; CHECK-NEXT: mul.4s v0, v[[REGNUM]], v[[REGNUM]]
+entry:
+  %addr = getelementptr i32* %sp0, i64 1
+  %pix_sp0.0.copyload = load i32* %addr, align 1
+  %vec = insertelement <4 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
+  %vmull.i = mul <4 x i32> %vec, %vec
+  ret <4 x i32> %vmull.i
+}
+
+define <1 x i64> @fct22(i64* nocapture %sp0) {
+; CHECK-LABEL: fct22:
+; CHECK: ldr d0, [x0, #8]
+entry:
+  %addr = getelementptr i64* %sp0, i64 1
+  %pix_sp0.0.copyload = load i64* %addr, align 1
+  %vec = insertelement <1 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
+   ret <1 x i64> %vec
+}
+
+define <2 x i64> @fct23(i64* nocapture %sp0) {
+; CHECK-LABEL: fct23:
+; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, #8]
+entry:
+  %addr = getelementptr i64* %sp0, i64 1
+  %pix_sp0.0.copyload = load i64* %addr, align 1
+  %vec = insertelement <2 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
+  ret <2 x i64> %vec
+}
+
+;
+; Single loads with register offset.
+define <8 x i8> @fct24(i8* nocapture %sp0, i64 %offset) {
+; CHECK-LABEL: fct24:
+; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
+; CHECK-NEXT: mul.8b v0, v[[REGNUM]], v[[REGNUM]]
+entry:
+  %addr = getelementptr i8* %sp0, i64 %offset
+  %pix_sp0.0.copyload = load i8* %addr, align 1
+  %vec = insertelement <8 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
+  %vmull.i = mul <8 x i8> %vec, %vec
+  ret <8 x i8> %vmull.i
+}
+
+define <16 x i8> @fct25(i8* nocapture %sp0, i64 %offset) {
+; CHECK-LABEL: fct25:
+; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
+; CHECK-NEXT: mul.16b v0, v[[REGNUM]], v[[REGNUM]]
+entry:
+  %addr = getelementptr i8* %sp0, i64 %offset
+  %pix_sp0.0.copyload = load i8* %addr, align 1
+  %vec = insertelement <16 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
+  %vmull.i = mul <16 x i8> %vec, %vec
+  ret <16 x i8> %vmull.i
+}
+
+define <4 x i16> @fct26(i16* nocapture %sp0, i64 %offset) {
+; CHECK-LABEL: fct26:
+; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
+; CHECK-NEXT: mul.4h v0, v[[REGNUM]], v[[REGNUM]]
+entry:
+  %addr = getelementptr i16* %sp0, i64 %offset
+  %pix_sp0.0.copyload = load i16* %addr, align 1
+  %vec = insertelement <4 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
+  %vmull.i = mul <4 x i16> %vec, %vec
+  ret <4 x i16> %vmull.i
+}
+
+define <8 x i16> @fct27(i16* nocapture %sp0, i64 %offset) {
+; CHECK-LABEL: fct27:
+; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
+; CHECK-NEXT: mul.8h v0, v[[REGNUM]], v[[REGNUM]]
+entry:
+  %addr = getelementptr i16* %sp0, i64 %offset
+  %pix_sp0.0.copyload = load i16* %addr, align 1
+  %vec = insertelement <8 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
+  %vmull.i = mul <8 x i16> %vec, %vec
+  ret <8 x i16> %vmull.i
+}
+
+define <2 x i32> @fct28(i32* nocapture %sp0, i64 %offset) {
+; CHECK-LABEL: fct28:
+; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
+; CHECK-NEXT: mul.2s v0, v[[REGNUM]], v[[REGNUM]]
+entry:
+  %addr = getelementptr i32* %sp0, i64 %offset
+  %pix_sp0.0.copyload = load i32* %addr, align 1
+  %vec = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
+  %vmull.i = mul <2 x i32> %vec, %vec
+  ret <2 x i32> %vmull.i
+}
+
+define <4 x i32> @fct29(i32* nocapture %sp0, i64 %offset) {
+; CHECK-LABEL: fct29:
+; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
+; CHECK-NEXT: mul.4s v0, v[[REGNUM]], v[[REGNUM]]
+entry:
+  %addr = getelementptr i32* %sp0, i64 %offset
+  %pix_sp0.0.copyload = load i32* %addr, align 1
+  %vec = insertelement <4 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
+  %vmull.i = mul <4 x i32> %vec, %vec
+  ret <4 x i32> %vmull.i
+}
+
+define <1 x i64> @fct30(i64* nocapture %sp0, i64 %offset) {
+; CHECK-LABEL: fct30:
+; CHECK: ldr d0, [x0, x1, lsl #3]
+entry:
+  %addr = getelementptr i64* %sp0, i64 %offset
+  %pix_sp0.0.copyload = load i64* %addr, align 1
+  %vec = insertelement <1 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
+   ret <1 x i64> %vec
+}
+
+define <2 x i64> @fct31(i64* nocapture %sp0, i64 %offset) {
+; CHECK-LABEL: fct31:
+; CHECK: ldr d0, [x0, x1, lsl #3]
+entry:
+  %addr = getelementptr i64* %sp0, i64 %offset
+  %pix_sp0.0.copyload = load i64* %addr, align 1
+  %vec = insertelement <2 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
+  ret <2 x i64> %vec
+}
diff --git a/test/CodeGen/AArch64/arm64-vext.ll b/test/CodeGen/AArch64/arm64-vext.ll
new file mode 100644
index 0000000..2240dfd
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-vext.ll
@@ -0,0 +1,464 @@
+; RUN: llc -march=arm64 -aarch64-neon-syntax=apple < %s | FileCheck %s
+
+define void @test_vext_s8() nounwind ssp {
+  ; CHECK-LABEL: test_vext_s8:
+  ; CHECK: {{ext.8.*#1}}
+  %xS8x8 = alloca <8 x i8>, align 8
+  %__a = alloca <8 x i8>, align 8
+  %__b = alloca <8 x i8>, align 8
+  %tmp = load <8 x i8>* %xS8x8, align 8
+  store <8 x i8> %tmp, <8 x i8>* %__a, align 8
+  %tmp1 = load <8 x i8>* %xS8x8, align 8
+  store <8 x i8> %tmp1, <8 x i8>* %__b, align 8
+  %tmp2 = load <8 x i8>* %__a, align 8
+  %tmp3 = load <8 x i8>* %__b, align 8
+  %vext = shufflevector <8 x i8> %tmp2, <8 x i8> %tmp3, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
+  store <8 x i8> %vext, <8 x i8>* %xS8x8, align 8
+  ret void
+}
+
+define void @test_vext_u8() nounwind ssp {
+  ; CHECK-LABEL: test_vext_u8:
+  ; CHECK: {{ext.8.*#2}}
+  %xU8x8 = alloca <8 x i8>, align 8
+  %__a = alloca <8 x i8>, align 8
+  %__b = alloca <8 x i8>, align 8
+  %tmp = load <8 x i8>* %xU8x8, align 8
+  store <8 x i8> %tmp, <8 x i8>* %__a, align 8
+  %tmp1 = load <8 x i8>* %xU8x8, align 8
+  store <8 x i8> %tmp1, <8 x i8>* %__b, align 8
+  %tmp2 = load <8 x i8>* %__a, align 8
+  %tmp3 = load <8 x i8>* %__b, align 8
+  %vext = shufflevector <8 x i8> %tmp2, <8 x i8> %tmp3, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
+  store <8 x i8> %vext, <8 x i8>* %xU8x8, align 8
+  ret void
+}
+
+define void @test_vext_p8() nounwind ssp {
+  ; CHECK-LABEL: test_vext_p8:
+  ; CHECK: {{ext.8.*#3}}
+  %xP8x8 = alloca <8 x i8>, align 8
+  %__a = alloca <8 x i8>, align 8
+  %__b = alloca <8 x i8>, align 8
+  %tmp = load <8 x i8>* %xP8x8, align 8
+  store <8 x i8> %tmp, <8 x i8>* %__a, align 8
+  %tmp1 = load <8 x i8>* %xP8x8, align 8
+  store <8 x i8> %tmp1, <8 x i8>* %__b, align 8
+  %tmp2 = load <8 x i8>* %__a, align 8
+  %tmp3 = load <8 x i8>* %__b, align 8
+  %vext = shufflevector <8 x i8> %tmp2, <8 x i8> %tmp3, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
+  store <8 x i8> %vext, <8 x i8>* %xP8x8, align 8
+  ret void
+}
+
+define void @test_vext_s16() nounwind ssp {
+  ; CHECK-LABEL: test_vext_s16:
+  ; CHECK: {{ext.8.*#2}}
+  %xS16x4 = alloca <4 x i16>, align 8
+  %__a = alloca <4 x i16>, align 8
+  %__b = alloca <4 x i16>, align 8
+  %tmp = load <4 x i16>* %xS16x4, align 8
+  store <4 x i16> %tmp, <4 x i16>* %__a, align 8
+  %tmp1 = load <4 x i16>* %xS16x4, align 8
+  store <4 x i16> %tmp1, <4 x i16>* %__b, align 8
+  %tmp2 = load <4 x i16>* %__a, align 8
+  %tmp3 = bitcast <4 x i16> %tmp2 to <8 x i8>
+  %tmp4 = load <4 x i16>* %__b, align 8
+  %tmp5 = bitcast <4 x i16> %tmp4 to <8 x i8>
+  %tmp6 = bitcast <8 x i8> %tmp3 to <4 x i16>
+  %tmp7 = bitcast <8 x i8> %tmp5 to <4 x i16>
+  %vext = shufflevector <4 x i16> %tmp6, <4 x i16> %tmp7, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+  store <4 x i16> %vext, <4 x i16>* %xS16x4, align 8
+  ret void
+}
+
+define void @test_vext_u16() nounwind ssp {
+  ; CHECK-LABEL: test_vext_u16:
+  ; CHECK: {{ext.8.*#4}}
+  %xU16x4 = alloca <4 x i16>, align 8
+  %__a = alloca <4 x i16>, align 8
+  %__b = alloca <4 x i16>, align 8
+  %tmp = load <4 x i16>* %xU16x4, align 8
+  store <4 x i16> %tmp, <4 x i16>* %__a, align 8
+  %tmp1 = load <4 x i16>* %xU16x4, align 8
+  store <4 x i16> %tmp1, <4 x i16>* %__b, align 8
+  %tmp2 = load <4 x i16>* %__a, align 8
+  %tmp3 = bitcast <4 x i16> %tmp2 to <8 x i8>
+  %tmp4 = load <4 x i16>* %__b, align 8
+  %tmp5 = bitcast <4 x i16> %tmp4 to <8 x i8>
+  %tmp6 = bitcast <8 x i8> %tmp3 to <4 x i16>
+  %tmp7 = bitcast <8 x i8> %tmp5 to <4 x i16>
+  %vext = shufflevector <4 x i16> %tmp6, <4 x i16> %tmp7, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+  store <4 x i16> %vext, <4 x i16>* %xU16x4, align 8
+  ret void
+}
+
+define void @test_vext_p16() nounwind ssp {
+  ; CHECK-LABEL: test_vext_p16:
+  ; CHECK: {{ext.8.*#6}}
+  %xP16x4 = alloca <4 x i16>, align 8
+  %__a = alloca <4 x i16>, align 8
+  %__b = alloca <4 x i16>, align 8
+  %tmp = load <4 x i16>* %xP16x4, align 8
+  store <4 x i16> %tmp, <4 x i16>* %__a, align 8
+  %tmp1 = load <4 x i16>* %xP16x4, align 8
+  store <4 x i16> %tmp1, <4 x i16>* %__b, align 8
+  %tmp2 = load <4 x i16>* %__a, align 8
+  %tmp3 = bitcast <4 x i16> %tmp2 to <8 x i8>
+  %tmp4 = load <4 x i16>* %__b, align 8
+  %tmp5 = bitcast <4 x i16> %tmp4 to <8 x i8>
+  %tmp6 = bitcast <8 x i8> %tmp3 to <4 x i16>
+  %tmp7 = bitcast <8 x i8> %tmp5 to <4 x i16>
+  %vext = shufflevector <4 x i16> %tmp6, <4 x i16> %tmp7, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+  store <4 x i16> %vext, <4 x i16>* %xP16x4, align 8
+  ret void
+}
+
+define void @test_vext_s32() nounwind ssp {
+  ; CHECK-LABEL: test_vext_s32:
+  ; CHECK: {{ext.8.*#4}}
+  %xS32x2 = alloca <2 x i32>, align 8
+  %__a = alloca <2 x i32>, align 8
+  %__b = alloca <2 x i32>, align 8
+  %tmp = load <2 x i32>* %xS32x2, align 8
+  store <2 x i32> %tmp, <2 x i32>* %__a, align 8
+  %tmp1 = load <2 x i32>* %xS32x2, align 8
+  store <2 x i32> %tmp1, <2 x i32>* %__b, align 8
+  %tmp2 = load <2 x i32>* %__a, align 8
+  %tmp3 = bitcast <2 x i32> %tmp2 to <8 x i8>
+  %tmp4 = load <2 x i32>* %__b, align 8
+  %tmp5 = bitcast <2 x i32> %tmp4 to <8 x i8>
+  %tmp6 = bitcast <8 x i8> %tmp3 to <2 x i32>
+  %tmp7 = bitcast <8 x i8> %tmp5 to <2 x i32>
+  %vext = shufflevector <2 x i32> %tmp6, <2 x i32> %tmp7, <2 x i32> <i32 1, i32 2>
+  store <2 x i32> %vext, <2 x i32>* %xS32x2, align 8
+  ret void
+}
+
+define void @test_vext_u32() nounwind ssp {
+  ; CHECK-LABEL: test_vext_u32:
+  ; CHECK: {{ext.8.*#4}}
+  %xU32x2 = alloca <2 x i32>, align 8
+  %__a = alloca <2 x i32>, align 8
+  %__b = alloca <2 x i32>, align 8
+  %tmp = load <2 x i32>* %xU32x2, align 8
+  store <2 x i32> %tmp, <2 x i32>* %__a, align 8
+  %tmp1 = load <2 x i32>* %xU32x2, align 8
+  store <2 x i32> %tmp1, <2 x i32>* %__b, align 8
+  %tmp2 = load <2 x i32>* %__a, align 8
+  %tmp3 = bitcast <2 x i32> %tmp2 to <8 x i8>
+  %tmp4 = load <2 x i32>* %__b, align 8
+  %tmp5 = bitcast <2 x i32> %tmp4 to <8 x i8>
+  %tmp6 = bitcast <8 x i8> %tmp3 to <2 x i32>
+  %tmp7 = bitcast <8 x i8> %tmp5 to <2 x i32>
+  %vext = shufflevector <2 x i32> %tmp6, <2 x i32> %tmp7, <2 x i32> <i32 1, i32 2>
+  store <2 x i32> %vext, <2 x i32>* %xU32x2, align 8
+  ret void
+}
+
+define void @test_vext_f32() nounwind ssp {
+  ; CHECK-LABEL: test_vext_f32:
+  ; CHECK: {{ext.8.*#4}}
+  %xF32x2 = alloca <2 x float>, align 8
+  %__a = alloca <2 x float>, align 8
+  %__b = alloca <2 x float>, align 8
+  %tmp = load <2 x float>* %xF32x2, align 8
+  store <2 x float> %tmp, <2 x float>* %__a, align 8
+  %tmp1 = load <2 x float>* %xF32x2, align 8
+  store <2 x float> %tmp1, <2 x float>* %__b, align 8
+  %tmp2 = load <2 x float>* %__a, align 8
+  %tmp3 = bitcast <2 x float> %tmp2 to <8 x i8>
+  %tmp4 = load <2 x float>* %__b, align 8
+  %tmp5 = bitcast <2 x float> %tmp4 to <8 x i8>
+  %tmp6 = bitcast <8 x i8> %tmp3 to <2 x float>
+  %tmp7 = bitcast <8 x i8> %tmp5 to <2 x float>
+  %vext = shufflevector <2 x float> %tmp6, <2 x float> %tmp7, <2 x i32> <i32 1, i32 2>
+  store <2 x float> %vext, <2 x float>* %xF32x2, align 8
+  ret void
+}
+
+define void @test_vext_s64() nounwind ssp {
+  ; CHECK-LABEL: test_vext_s64:
+  ; CHECK_FIXME: {{ext.8.*#1}}
+  ; this just turns into a load of the second element
+  %xS64x1 = alloca <1 x i64>, align 8
+  %__a = alloca <1 x i64>, align 8
+  %__b = alloca <1 x i64>, align 8
+  %tmp = load <1 x i64>* %xS64x1, align 8
+  store <1 x i64> %tmp, <1 x i64>* %__a, align 8
+  %tmp1 = load <1 x i64>* %xS64x1, align 8
+  store <1 x i64> %tmp1, <1 x i64>* %__b, align 8
+  %tmp2 = load <1 x i64>* %__a, align 8
+  %tmp3 = bitcast <1 x i64> %tmp2 to <8 x i8>
+  %tmp4 = load <1 x i64>* %__b, align 8
+  %tmp5 = bitcast <1 x i64> %tmp4 to <8 x i8>
+  %tmp6 = bitcast <8 x i8> %tmp3 to <1 x i64>
+  %tmp7 = bitcast <8 x i8> %tmp5 to <1 x i64>
+  %vext = shufflevector <1 x i64> %tmp6, <1 x i64> %tmp7, <1 x i32> <i32 1>
+  store <1 x i64> %vext, <1 x i64>* %xS64x1, align 8
+  ret void
+}
+
+define void @test_vext_u64() nounwind ssp {
+  ; CHECK-LABEL: test_vext_u64:
+  ; CHECK_FIXME: {{ext.8.*#1}}
+  ; this is turned into a simple load of the 2nd element
+  %xU64x1 = alloca <1 x i64>, align 8
+  %__a = alloca <1 x i64>, align 8
+  %__b = alloca <1 x i64>, align 8
+  %tmp = load <1 x i64>* %xU64x1, align 8
+  store <1 x i64> %tmp, <1 x i64>* %__a, align 8
+  %tmp1 = load <1 x i64>* %xU64x1, align 8
+  store <1 x i64> %tmp1, <1 x i64>* %__b, align 8
+  %tmp2 = load <1 x i64>* %__a, align 8
+  %tmp3 = bitcast <1 x i64> %tmp2 to <8 x i8>
+  %tmp4 = load <1 x i64>* %__b, align 8
+  %tmp5 = bitcast <1 x i64> %tmp4 to <8 x i8>
+  %tmp6 = bitcast <8 x i8> %tmp3 to <1 x i64>
+  %tmp7 = bitcast <8 x i8> %tmp5 to <1 x i64>
+  %vext = shufflevector <1 x i64> %tmp6, <1 x i64> %tmp7, <1 x i32> <i32 1>
+  store <1 x i64> %vext, <1 x i64>* %xU64x1, align 8
+  ret void
+}
+
+define void @test_vextq_s8() nounwind ssp {
+  ; CHECK-LABEL: test_vextq_s8:
+  ; CHECK: {{ext.16.*#4}}
+  %xS8x16 = alloca <16 x i8>, align 16
+  %__a = alloca <16 x i8>, align 16
+  %__b = alloca <16 x i8>, align 16
+  %tmp = load <16 x i8>* %xS8x16, align 16
+  store <16 x i8> %tmp, <16 x i8>* %__a, align 16
+  %tmp1 = load <16 x i8>* %xS8x16, align 16
+  store <16 x i8> %tmp1, <16 x i8>* %__b, align 16
+  %tmp2 = load <16 x i8>* %__a, align 16
+  %tmp3 = load <16 x i8>* %__b, align 16
+  %vext = shufflevector <16 x i8> %tmp2, <16 x i8> %tmp3, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+  store <16 x i8> %vext, <16 x i8>* %xS8x16, align 16
+  ret void
+}
+
+define void @test_vextq_u8() nounwind ssp {
+  ; CHECK-LABEL: test_vextq_u8:
+  ; CHECK: {{ext.16.*#5}}
+  %xU8x16 = alloca <16 x i8>, align 16
+  %__a = alloca <16 x i8>, align 16
+  %__b = alloca <16 x i8>, align 16
+  %tmp = load <16 x i8>* %xU8x16, align 16
+  store <16 x i8> %tmp, <16 x i8>* %__a, align 16
+  %tmp1 = load <16 x i8>* %xU8x16, align 16
+  store <16 x i8> %tmp1, <16 x i8>* %__b, align 16
+  %tmp2 = load <16 x i8>* %__a, align 16
+  %tmp3 = load <16 x i8>* %__b, align 16
+  %vext = shufflevector <16 x i8> %tmp2, <16 x i8> %tmp3, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
+  store <16 x i8> %vext, <16 x i8>* %xU8x16, align 16
+  ret void
+}
+
+define void @test_vextq_p8() nounwind ssp {
+  ; CHECK-LABEL: test_vextq_p8:
+  ; CHECK: {{ext.16.*#6}}
+  %xP8x16 = alloca <16 x i8>, align 16
+  %__a = alloca <16 x i8>, align 16
+  %__b = alloca <16 x i8>, align 16
+  %tmp = load <16 x i8>* %xP8x16, align 16
+  store <16 x i8> %tmp, <16 x i8>* %__a, align 16
+  %tmp1 = load <16 x i8>* %xP8x16, align 16
+  store <16 x i8> %tmp1, <16 x i8>* %__b, align 16
+  %tmp2 = load <16 x i8>* %__a, align 16
+  %tmp3 = load <16 x i8>* %__b, align 16
+  %vext = shufflevector <16 x i8> %tmp2, <16 x i8> %tmp3, <16 x i32> <i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21>
+  store <16 x i8> %vext, <16 x i8>* %xP8x16, align 16
+  ret void
+}
+
+define void @test_vextq_s16() nounwind ssp {
+  ; CHECK-LABEL: test_vextq_s16:
+  ; CHECK: {{ext.16.*#14}}
+  %xS16x8 = alloca <8 x i16>, align 16
+  %__a = alloca <8 x i16>, align 16
+  %__b = alloca <8 x i16>, align 16
+  %tmp = load <8 x i16>* %xS16x8, align 16
+  store <8 x i16> %tmp, <8 x i16>* %__a, align 16
+  %tmp1 = load <8 x i16>* %xS16x8, align 16
+  store <8 x i16> %tmp1, <8 x i16>* %__b, align 16
+  %tmp2 = load <8 x i16>* %__a, align 16
+  %tmp3 = bitcast <8 x i16> %tmp2 to <16 x i8>
+  %tmp4 = load <8 x i16>* %__b, align 16
+  %tmp5 = bitcast <8 x i16> %tmp4 to <16 x i8>
+  %tmp6 = bitcast <16 x i8> %tmp3 to <8 x i16>
+  %tmp7 = bitcast <16 x i8> %tmp5 to <8 x i16>
+  %vext = shufflevector <8 x i16> %tmp6, <8 x i16> %tmp7, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
+  store <8 x i16> %vext, <8 x i16>* %xS16x8, align 16
+  ret void
+}
+
+define void @test_vextq_u16() nounwind ssp {
+  ; CHECK-LABEL: test_vextq_u16:
+  ; CHECK: {{ext.16.*#8}}
+  %xU16x8 = alloca <8 x i16>, align 16
+  %__a = alloca <8 x i16>, align 16
+  %__b = alloca <8 x i16>, align 16
+  %tmp = load <8 x i16>* %xU16x8, align 16
+  store <8 x i16> %tmp, <8 x i16>* %__a, align 16
+  %tmp1 = load <8 x i16>* %xU16x8, align 16
+  store <8 x i16> %tmp1, <8 x i16>* %__b, align 16
+  %tmp2 = load <8 x i16>* %__a, align 16
+  %tmp3 = bitcast <8 x i16> %tmp2 to <16 x i8>
+  %tmp4 = load <8 x i16>* %__b, align 16
+  %tmp5 = bitcast <8 x i16> %tmp4 to <16 x i8>
+  %tmp6 = bitcast <16 x i8> %tmp3 to <8 x i16>
+  %tmp7 = bitcast <16 x i8> %tmp5 to <8 x i16>
+  %vext = shufflevector <8 x i16> %tmp6, <8 x i16> %tmp7, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+  store <8 x i16> %vext, <8 x i16>* %xU16x8, align 16
+  ret void
+}
+
+define void @test_vextq_p16() nounwind ssp {
+  ; CHECK-LABEL: test_vextq_p16:
+  ; CHECK: {{ext.16.*#10}}
+  %xP16x8 = alloca <8 x i16>, align 16
+  %__a = alloca <8 x i16>, align 16
+  %__b = alloca <8 x i16>, align 16
+  %tmp = load <8 x i16>* %xP16x8, align 16
+  store <8 x i16> %tmp, <8 x i16>* %__a, align 16
+  %tmp1 = load <8 x i16>* %xP16x8, align 16
+  store <8 x i16> %tmp1, <8 x i16>* %__b, align 16
+  %tmp2 = load <8 x i16>* %__a, align 16
+  %tmp3 = bitcast <8 x i16> %tmp2 to <16 x i8>
+  %tmp4 = load <8 x i16>* %__b, align 16
+  %tmp5 = bitcast <8 x i16> %tmp4 to <16 x i8>
+  %tmp6 = bitcast <16 x i8> %tmp3 to <8 x i16>
+  %tmp7 = bitcast <16 x i8> %tmp5 to <8 x i16>
+  %vext = shufflevector <8 x i16> %tmp6, <8 x i16> %tmp7, <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12>
+  store <8 x i16> %vext, <8 x i16>* %xP16x8, align 16
+  ret void
+}
+
+define void @test_vextq_s32() nounwind ssp {
+  ; CHECK-LABEL: test_vextq_s32:
+  ; CHECK: {{ext.16.*#4}}
+  %xS32x4 = alloca <4 x i32>, align 16
+  %__a = alloca <4 x i32>, align 16
+  %__b = alloca <4 x i32>, align 16
+  %tmp = load <4 x i32>* %xS32x4, align 16
+  store <4 x i32> %tmp, <4 x i32>* %__a, align 16
+  %tmp1 = load <4 x i32>* %xS32x4, align 16
+  store <4 x i32> %tmp1, <4 x i32>* %__b, align 16
+  %tmp2 = load <4 x i32>* %__a, align 16
+  %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8>
+  %tmp4 = load <4 x i32>* %__b, align 16
+  %tmp5 = bitcast <4 x i32> %tmp4 to <16 x i8>
+  %tmp6 = bitcast <16 x i8> %tmp3 to <4 x i32>
+  %tmp7 = bitcast <16 x i8> %tmp5 to <4 x i32>
+  %vext = shufflevector <4 x i32> %tmp6, <4 x i32> %tmp7, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+  store <4 x i32> %vext, <4 x i32>* %xS32x4, align 16
+  ret void
+}
+
+define void @test_vextq_u32() nounwind ssp {
+  ; CHECK-LABEL: test_vextq_u32:
+  ; CHECK: {{ext.16.*#8}}
+  %xU32x4 = alloca <4 x i32>, align 16
+  %__a = alloca <4 x i32>, align 16
+  %__b = alloca <4 x i32>, align 16
+  %tmp = load <4 x i32>* %xU32x4, align 16
+  store <4 x i32> %tmp, <4 x i32>* %__a, align 16
+  %tmp1 = load <4 x i32>* %xU32x4, align 16
+  store <4 x i32> %tmp1, <4 x i32>* %__b, align 16
+  %tmp2 = load <4 x i32>* %__a, align 16
+  %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8>
+  %tmp4 = load <4 x i32>* %__b, align 16
+  %tmp5 = bitcast <4 x i32> %tmp4 to <16 x i8>
+  %tmp6 = bitcast <16 x i8> %tmp3 to <4 x i32>
+  %tmp7 = bitcast <16 x i8> %tmp5 to <4 x i32>
+  %vext = shufflevector <4 x i32> %tmp6, <4 x i32> %tmp7, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+  store <4 x i32> %vext, <4 x i32>* %xU32x4, align 16
+  ret void
+}
+
+define void @test_vextq_f32() nounwind ssp {
+  ; CHECK-LABEL: test_vextq_f32:
+  ; CHECK: {{ext.16.*#12}}
+  %xF32x4 = alloca <4 x float>, align 16
+  %__a = alloca <4 x float>, align 16
+  %__b = alloca <4 x float>, align 16
+  %tmp = load <4 x float>* %xF32x4, align 16
+  store <4 x float> %tmp, <4 x float>* %__a, align 16
+  %tmp1 = load <4 x float>* %xF32x4, align 16
+  store <4 x float> %tmp1, <4 x float>* %__b, align 16
+  %tmp2 = load <4 x float>* %__a, align 16
+  %tmp3 = bitcast <4 x float> %tmp2 to <16 x i8>
+  %tmp4 = load <4 x float>* %__b, align 16
+  %tmp5 = bitcast <4 x float> %tmp4 to <16 x i8>
+  %tmp6 = bitcast <16 x i8> %tmp3 to <4 x float>
+  %tmp7 = bitcast <16 x i8> %tmp5 to <4 x float>
+  %vext = shufflevector <4 x float> %tmp6, <4 x float> %tmp7, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+  store <4 x float> %vext, <4 x float>* %xF32x4, align 16
+  ret void
+}
+
+define void @test_vextq_s64() nounwind ssp {
+  ; CHECK-LABEL: test_vextq_s64:
+  ; CHECK: {{ext.16.*#8}}
+  %xS64x2 = alloca <2 x i64>, align 16
+  %__a = alloca <2 x i64>, align 16
+  %__b = alloca <2 x i64>, align 16
+  %tmp = load <2 x i64>* %xS64x2, align 16
+  store <2 x i64> %tmp, <2 x i64>* %__a, align 16
+  %tmp1 = load <2 x i64>* %xS64x2, align 16
+  store <2 x i64> %tmp1, <2 x i64>* %__b, align 16
+  %tmp2 = load <2 x i64>* %__a, align 16
+  %tmp3 = bitcast <2 x i64> %tmp2 to <16 x i8>
+  %tmp4 = load <2 x i64>* %__b, align 16
+  %tmp5 = bitcast <2 x i64> %tmp4 to <16 x i8>
+  %tmp6 = bitcast <16 x i8> %tmp3 to <2 x i64>
+  %tmp7 = bitcast <16 x i8> %tmp5 to <2 x i64>
+  %vext = shufflevector <2 x i64> %tmp6, <2 x i64> %tmp7, <2 x i32> <i32 1, i32 2>
+  store <2 x i64> %vext, <2 x i64>* %xS64x2, align 16
+  ret void
+}
+
+define void @test_vextq_u64() nounwind ssp {
+  ; CHECK-LABEL: test_vextq_u64:
+  ; CHECK: {{ext.16.*#8}}
+  %xU64x2 = alloca <2 x i64>, align 16
+  %__a = alloca <2 x i64>, align 16
+  %__b = alloca <2 x i64>, align 16
+  %tmp = load <2 x i64>* %xU64x2, align 16
+  store <2 x i64> %tmp, <2 x i64>* %__a, align 16
+  %tmp1 = load <2 x i64>* %xU64x2, align 16
+  store <2 x i64> %tmp1, <2 x i64>* %__b, align 16
+  %tmp2 = load <2 x i64>* %__a, align 16
+  %tmp3 = bitcast <2 x i64> %tmp2 to <16 x i8>
+  %tmp4 = load <2 x i64>* %__b, align 16
+  %tmp5 = bitcast <2 x i64> %tmp4 to <16 x i8>
+  %tmp6 = bitcast <16 x i8> %tmp3 to <2 x i64>
+  %tmp7 = bitcast <16 x i8> %tmp5 to <2 x i64>
+  %vext = shufflevector <2 x i64> %tmp6, <2 x i64> %tmp7, <2 x i32> <i32 1, i32 2>
+  store <2 x i64> %vext, <2 x i64>* %xU64x2, align 16
+  ret void
+}
+
+; shuffles with an undef second operand can use an EXT also so long as the
+; indices wrap and stay sequential.
+; rdar://12051674
+define <16 x i8> @vext1(<16 x i8> %_a) nounwind {
+; CHECK-LABEL: vext1:
+; CHECK: ext.16b  v0, v0, v0, #8
+  %vext = shufflevector <16 x i8> %_a, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <16 x i8> %vext
+}
+
+; <rdar://problem/12212062>
+define <2 x i64> @vext2(<2 x i64> %p0, <2 x i64> %p1) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: vext2:
+; CHECK: ext.16b v1, v1, v1, #8
+; CHECK: ext.16b v0, v0, v0, #8
+; CHECK: add.2d  v0, v0, v1
+  %t0 = shufflevector <2 x i64> %p1, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
+  %t1 = shufflevector <2 x i64> %p0, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
+  %t2 = add <2 x i64> %t1, %t0
+  ret <2 x i64> %t2
+}
diff --git a/test/CodeGen/AArch64/arm64-vext_reverse.ll b/test/CodeGen/AArch64/arm64-vext_reverse.ll
new file mode 100644
index 0000000..c45e55e
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-vext_reverse.ll
@@ -0,0 +1,172 @@
+; RUN: llc -mtriple=arm64-linux-gnuabi < %s | FileCheck %s
+
+; The following tests is to check the correctness of reversing input operand 
+; of vext by enumerating all cases of using two undefs in shuffle masks.
+
+define <4 x i16> @vext_6701_0(<4 x i16> %a1, <4 x i16> %a2) {
+entry:
+; CHECK-LABEL: vext_6701_0:
+; CHECK: ext	v0.8b, v1.8b, v0.8b, #4
+  %x = shufflevector <4 x i16> %a1, <4 x i16> %a2, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
+  ret <4 x i16> %x
+}
+
+define <4 x i16> @vext_6701_12(<4 x i16> %a1, <4 x i16> %a2) {
+entry:
+; CHECK-LABEL: vext_6701_12:
+; CHECK: ext	v0.8b, v0.8b, v0.8b, #4
+  %x = shufflevector <4 x i16> %a1, <4 x i16> %a2, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
+  ret <4 x i16> %x
+}
+
+define <4 x i16> @vext_6701_13(<4 x i16> %a1, <4 x i16> %a2) {
+entry:
+; CHECK-LABEL: vext_6701_13:
+; CHECK: ext	v0.8b, v1.8b, v0.8b, #4
+  %x = shufflevector <4 x i16> %a1, <4 x i16> %a2, <4 x i32> <i32 undef, i32 7, i32 undef, i32 1>
+  ret <4 x i16> %x
+}
+
+define <4 x i16> @vext_6701_14(<4 x i16> %a1, <4 x i16> %a2) {
+entry:
+; CHECK-LABEL: vext_6701_14:
+; CHECK: ext	v0.8b, v1.8b, v0.8b, #4
+  %x = shufflevector <4 x i16> %a1, <4 x i16> %a2, <4 x i32> <i32 undef, i32 7, i32 0, i32 undef>
+  ret <4 x i16> %x
+}
+
+define <4 x i16> @vext_6701_23(<4 x i16> %a1, <4 x i16> %a2) {
+entry:
+; CHECK-LABEL: vext_6701_23:
+; CHECK: ext	v0.8b, v1.8b, v0.8b, #4
+  %x = shufflevector <4 x i16> %a1, <4 x i16> %a2, <4 x i32> <i32 6, i32 undef, i32 undef, i32 1>
+  ret <4 x i16> %x
+}
+
+define <4 x i16> @vext_6701_24(<4 x i16> %a1, <4 x i16> %a2) {
+entry:
+; CHECK-LABEL: vext_6701_24:
+; CHECK: ext	v0.8b, v1.8b, v0.8b, #4
+  %x = shufflevector <4 x i16> %a1, <4 x i16> %a2, <4 x i32> <i32 6, i32 undef, i32 0, i32 undef>
+  ret <4 x i16> %x
+}
+
+define <4 x i16> @vext_6701_34(<4 x i16> %a1, <4 x i16> %a2) {
+entry:
+; CHECK-LABEL: vext_6701_34:
+; CHECK: ext	v0.8b, v1.8b, v0.8b, #4
+  %x = shufflevector <4 x i16> %a1, <4 x i16> %a2, <4 x i32> <i32 6, i32 7, i32 undef, i32 undef>
+  ret <4 x i16> %x
+}
+
+define <4 x i16> @vext_5670_0(<4 x i16> %a1, <4 x i16> %a2) {
+entry:
+; CHECK-LABEL: vext_5670_0:
+; CHECK: ext	v0.8b, v1.8b, v0.8b, #2
+  %x = shufflevector <4 x i16> %a1, <4 x i16> %a2, <4 x i32> <i32 5, i32 6, i32 7, i32 0>
+  ret <4 x i16> %x
+}
+
+define <4 x i16> @vext_5670_12(<4 x i16> %a1, <4 x i16> %a2) {
+entry:
+; CHECK-LABEL: vext_5670_12:
+; CHECK: ext	v0.8b, v1.8b, v0.8b, #2
+  %x = shufflevector <4 x i16> %a1, <4 x i16> %a2, <4 x i32> <i32 undef, i32 undef, i32 7, i32 0>
+  ret <4 x i16> %x
+}
+
+define <4 x i16> @vext_5670_13(<4 x i16> %a1, <4 x i16> %a2) {
+entry:
+; CHECK-LABEL: vext_5670_13:
+; CHECK: ext	v0.8b, v1.8b, v0.8b, #2
+  %x = shufflevector <4 x i16> %a1, <4 x i16> %a2, <4 x i32> <i32 undef, i32 6, i32 undef, i32 0>
+  ret <4 x i16> %x
+}
+
+define <4 x i16> @vext_5670_14(<4 x i16> %a1, <4 x i16> %a2) {
+entry:
+; CHECK-LABEL: vext_5670_14:
+; CHECK: ext	v0.8b, v1.8b, v0.8b, #2
+  %x = shufflevector <4 x i16> %a1, <4 x i16> %a2, <4 x i32> <i32 undef, i32 6, i32 7, i32 undef>
+  ret <4 x i16> %x
+}
+
+define <4 x i16> @vext_5670_23(<4 x i16> %a1, <4 x i16> %a2) {
+entry:
+; CHECK-LABEL: vext_5670_23:
+; CHECK: ext	v0.8b, v1.8b, v0.8b, #2
+  %x = shufflevector <4 x i16> %a1, <4 x i16> %a2, <4 x i32> <i32 5, i32 undef, i32 undef, i32 0>
+  ret <4 x i16> %x
+}
+
+define <4 x i16> @vext_5670_24(<4 x i16> %a1, <4 x i16> %a2) {
+entry:
+; CHECK-LABEL: vext_5670_24:
+; CHECK: rev32   v0.4h, v1.4h
+  %x = shufflevector <4 x i16> %a1, <4 x i16> %a2, <4 x i32> <i32 5, i32 undef, i32 7, i32 undef>
+  ret <4 x i16> %x
+}
+
+define <4 x i16> @vext_5670_34(<4 x i16> %a1, <4 x i16> %a2) {
+entry:
+; CHECK-LABEL: vext_5670_34:
+; CHECK: ext	v0.8b, v1.8b, v0.8b, #2
+  %x = shufflevector <4 x i16> %a1, <4 x i16> %a2, <4 x i32> <i32 5, i32 6, i32 undef, i32 undef>
+  ret <4 x i16> %x
+}
+
+define <4 x i16> @vext_7012_0(<4 x i16> %a1, <4 x i16> %a2) {
+entry:
+; CHECK-LABEL: vext_7012_0:
+; CHECK: ext	v0.8b, v1.8b, v0.8b, #6
+  %x = shufflevector <4 x i16> %a1, <4 x i16> %a2, <4 x i32> <i32 7, i32 0, i32 1, i32 2>
+  ret <4 x i16> %x
+}
+
+define <4 x i16> @vext_7012_12(<4 x i16> %a1, <4 x i16> %a2) {
+entry:
+; CHECK-LABEL: vext_7012_12:
+; CHECK: ext	v0.8b, v0.8b, v0.8b, #6
+  %x = shufflevector <4 x i16> %a1, <4 x i16> %a2, <4 x i32> <i32 undef, i32 undef, i32 1, i32 2>
+  ret <4 x i16> %x
+}
+
+define <4 x i16> @vext_7012_13(<4 x i16> %a1, <4 x i16> %a2) {
+entry:
+; CHECK-LABEL: vext_7012_13:
+; CHECK: rev32   v0.4h, v0.4h
+  %x = shufflevector <4 x i16> %a1, <4 x i16> %a2, <4 x i32> <i32 undef, i32 0, i32 undef, i32 2>
+  ret <4 x i16> %x
+}
+
+define <4 x i16> @vext_7012_14(<4 x i16> %a1, <4 x i16> %a2) {
+entry:
+; CHECK-LABEL: vext_7012_14:
+; CHECK: ext	v0.8b, v0.8b, v0.8b, #6
+  %x = shufflevector <4 x i16> %a1, <4 x i16> %a2, <4 x i32> <i32 undef, i32 0, i32 1, i32 undef>
+  ret <4 x i16> %x
+}
+
+define <4 x i16> @vext_7012_23(<4 x i16> %a1, <4 x i16> %a2) {
+entry:
+; CHECK-LABEL: vext_7012_23:
+; CHECK: ext	v0.8b, v1.8b, v0.8b, #6
+  %x = shufflevector <4 x i16> %a1, <4 x i16> %a2, <4 x i32> <i32 7, i32 undef, i32 undef, i32 2>
+  ret <4 x i16> %x
+}
+
+define <4 x i16> @vext_7012_24(<4 x i16> %a1, <4 x i16> %a2) {
+entry:
+; CHECK-LABEL: vext_7012_24:
+; CHECK: ext	v0.8b, v1.8b, v0.8b, #6
+  %x = shufflevector <4 x i16> %a1, <4 x i16> %a2, <4 x i32> <i32 7, i32 undef, i32 1, i32 undef>
+  ret <4 x i16> %x
+}
+
+define <4 x i16> @vext_7012_34(<4 x i16> %a1, <4 x i16> %a2) {
+entry:
+; CHECK-LABEL: vext_7012_34:
+; CHECK: ext	v0.8b, v1.8b, v0.8b, #6
+  %x = shufflevector <4 x i16> %a1, <4 x i16> %a2, <4 x i32> <i32 7, i32 0, i32 undef, i32 undef>
+  ret <4 x i16> %x
+}
diff --git a/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll b/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll
new file mode 100644
index 0000000..255a182
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll
@@ -0,0 +1,375 @@
+; RUN: llc -march=arm64 -aarch64-neon-syntax=apple < %s | FileCheck %s
+
+;;; Float vectors
+
+%v2f32 = type <2 x float>
+; CHECK: test_v2f32.sqrt:
+define %v2f32 @test_v2f32.sqrt(%v2f32 %a) {
+  ; CHECK: fsqrt.2s
+  %1 = call %v2f32 @llvm.sqrt.v2f32(%v2f32 %a)
+  ret %v2f32 %1
+}
+; CHECK: test_v2f32.powi:
+define %v2f32 @test_v2f32.powi(%v2f32 %a, i32 %b) {
+  ; CHECK: pow
+  %1 = call %v2f32 @llvm.powi.v2f32(%v2f32 %a, i32 %b)
+  ret %v2f32 %1
+}
+; CHECK: test_v2f32.sin:
+define %v2f32 @test_v2f32.sin(%v2f32 %a) {
+  ; CHECK: sin
+  %1 = call %v2f32 @llvm.sin.v2f32(%v2f32 %a)
+  ret %v2f32 %1
+}
+; CHECK: test_v2f32.cos:
+define %v2f32 @test_v2f32.cos(%v2f32 %a) {
+  ; CHECK: cos
+  %1 = call %v2f32 @llvm.cos.v2f32(%v2f32 %a)
+  ret %v2f32 %1
+}
+; CHECK: test_v2f32.pow:
+define %v2f32 @test_v2f32.pow(%v2f32 %a, %v2f32 %b) {
+  ; CHECK: pow
+  %1 = call %v2f32 @llvm.pow.v2f32(%v2f32 %a, %v2f32 %b)
+  ret %v2f32 %1
+}
+; CHECK: test_v2f32.exp:
+define %v2f32 @test_v2f32.exp(%v2f32 %a) {
+  ; CHECK: exp
+  %1 = call %v2f32 @llvm.exp.v2f32(%v2f32 %a)
+  ret %v2f32 %1
+}
+; CHECK: test_v2f32.exp2:
+define %v2f32 @test_v2f32.exp2(%v2f32 %a) {
+  ; CHECK: exp
+  %1 = call %v2f32 @llvm.exp2.v2f32(%v2f32 %a)
+  ret %v2f32 %1
+}
+; CHECK: test_v2f32.log:
+define %v2f32 @test_v2f32.log(%v2f32 %a) {
+  ; CHECK: log
+  %1 = call %v2f32 @llvm.log.v2f32(%v2f32 %a)
+  ret %v2f32 %1
+}
+; CHECK: test_v2f32.log10:
+define %v2f32 @test_v2f32.log10(%v2f32 %a) {
+  ; CHECK: log
+  %1 = call %v2f32 @llvm.log10.v2f32(%v2f32 %a)
+  ret %v2f32 %1
+}
+; CHECK: test_v2f32.log2:
+define %v2f32 @test_v2f32.log2(%v2f32 %a) {
+  ; CHECK: log
+  %1 = call %v2f32 @llvm.log2.v2f32(%v2f32 %a)
+  ret %v2f32 %1
+}
+; CHECK: test_v2f32.fma:
+define %v2f32 @test_v2f32.fma(%v2f32 %a, %v2f32 %b, %v2f32 %c) {
+  ; CHECK: fma
+  %1 = call %v2f32 @llvm.fma.v2f32(%v2f32 %a, %v2f32 %b, %v2f32 %c)
+  ret %v2f32 %1
+}
+; CHECK: test_v2f32.fabs:
+define %v2f32 @test_v2f32.fabs(%v2f32 %a) {
+  ; CHECK: fabs
+  %1 = call %v2f32 @llvm.fabs.v2f32(%v2f32 %a)
+  ret %v2f32 %1
+}
+; CHECK: test_v2f32.floor:
+define %v2f32 @test_v2f32.floor(%v2f32 %a) {
+  ; CHECK: frintm.2s
+  %1 = call %v2f32 @llvm.floor.v2f32(%v2f32 %a)
+  ret %v2f32 %1
+}
+; CHECK: test_v2f32.ceil:
+define %v2f32 @test_v2f32.ceil(%v2f32 %a) {
+  ; CHECK: frintp.2s
+  %1 = call %v2f32 @llvm.ceil.v2f32(%v2f32 %a)
+  ret %v2f32 %1
+}
+; CHECK: test_v2f32.trunc:
+define %v2f32 @test_v2f32.trunc(%v2f32 %a) {
+  ; CHECK: frintz.2s
+  %1 = call %v2f32 @llvm.trunc.v2f32(%v2f32 %a)
+  ret %v2f32 %1
+}
+; CHECK: test_v2f32.rint:
+define %v2f32 @test_v2f32.rint(%v2f32 %a) {
+  ; CHECK: frintx.2s
+  %1 = call %v2f32 @llvm.rint.v2f32(%v2f32 %a)
+  ret %v2f32 %1
+}
+; CHECK: test_v2f32.nearbyint:
+define %v2f32 @test_v2f32.nearbyint(%v2f32 %a) {
+  ; CHECK: frinti.2s
+  %1 = call %v2f32 @llvm.nearbyint.v2f32(%v2f32 %a)
+  ret %v2f32 %1
+}
+
+declare %v2f32 @llvm.sqrt.v2f32(%v2f32) #0
+declare %v2f32 @llvm.powi.v2f32(%v2f32, i32) #0
+declare %v2f32 @llvm.sin.v2f32(%v2f32) #0
+declare %v2f32 @llvm.cos.v2f32(%v2f32) #0
+declare %v2f32 @llvm.pow.v2f32(%v2f32, %v2f32) #0
+declare %v2f32 @llvm.exp.v2f32(%v2f32) #0
+declare %v2f32 @llvm.exp2.v2f32(%v2f32) #0
+declare %v2f32 @llvm.log.v2f32(%v2f32) #0
+declare %v2f32 @llvm.log10.v2f32(%v2f32) #0
+declare %v2f32 @llvm.log2.v2f32(%v2f32) #0
+declare %v2f32 @llvm.fma.v2f32(%v2f32, %v2f32, %v2f32) #0
+declare %v2f32 @llvm.fabs.v2f32(%v2f32) #0
+declare %v2f32 @llvm.floor.v2f32(%v2f32) #0
+declare %v2f32 @llvm.ceil.v2f32(%v2f32) #0
+declare %v2f32 @llvm.trunc.v2f32(%v2f32) #0
+declare %v2f32 @llvm.rint.v2f32(%v2f32) #0
+declare %v2f32 @llvm.nearbyint.v2f32(%v2f32) #0
+
+;;;
+
+%v4f32 = type <4 x float>
+; CHECK: test_v4f32.sqrt:
+define %v4f32 @test_v4f32.sqrt(%v4f32 %a) {
+  ; CHECK: fsqrt.4s
+  %1 = call %v4f32 @llvm.sqrt.v4f32(%v4f32 %a)
+  ret %v4f32 %1
+}
+; CHECK: test_v4f32.powi:
+define %v4f32 @test_v4f32.powi(%v4f32 %a, i32 %b) {
+  ; CHECK: pow
+  %1 = call %v4f32 @llvm.powi.v4f32(%v4f32 %a, i32 %b)
+  ret %v4f32 %1
+}
+; CHECK: test_v4f32.sin:
+define %v4f32 @test_v4f32.sin(%v4f32 %a) {
+  ; CHECK: sin
+  %1 = call %v4f32 @llvm.sin.v4f32(%v4f32 %a)
+  ret %v4f32 %1
+}
+; CHECK: test_v4f32.cos:
+define %v4f32 @test_v4f32.cos(%v4f32 %a) {
+  ; CHECK: cos
+  %1 = call %v4f32 @llvm.cos.v4f32(%v4f32 %a)
+  ret %v4f32 %1
+}
+; CHECK: test_v4f32.pow:
+define %v4f32 @test_v4f32.pow(%v4f32 %a, %v4f32 %b) {
+  ; CHECK: pow
+  %1 = call %v4f32 @llvm.pow.v4f32(%v4f32 %a, %v4f32 %b)
+  ret %v4f32 %1
+}
+; CHECK: test_v4f32.exp:
+define %v4f32 @test_v4f32.exp(%v4f32 %a) {
+  ; CHECK: exp
+  %1 = call %v4f32 @llvm.exp.v4f32(%v4f32 %a)
+  ret %v4f32 %1
+}
+; CHECK: test_v4f32.exp2:
+define %v4f32 @test_v4f32.exp2(%v4f32 %a) {
+  ; CHECK: exp
+  %1 = call %v4f32 @llvm.exp2.v4f32(%v4f32 %a)
+  ret %v4f32 %1
+}
+; CHECK: test_v4f32.log:
+define %v4f32 @test_v4f32.log(%v4f32 %a) {
+  ; CHECK: log
+  %1 = call %v4f32 @llvm.log.v4f32(%v4f32 %a)
+  ret %v4f32 %1
+}
+; CHECK: test_v4f32.log10:
+define %v4f32 @test_v4f32.log10(%v4f32 %a) {
+  ; CHECK: log
+  %1 = call %v4f32 @llvm.log10.v4f32(%v4f32 %a)
+  ret %v4f32 %1
+}
+; CHECK: test_v4f32.log2:
+define %v4f32 @test_v4f32.log2(%v4f32 %a) {
+  ; CHECK: log
+  %1 = call %v4f32 @llvm.log2.v4f32(%v4f32 %a)
+  ret %v4f32 %1
+}
+; CHECK: test_v4f32.fma:
+define %v4f32 @test_v4f32.fma(%v4f32 %a, %v4f32 %b, %v4f32 %c) {
+  ; CHECK: fma
+  %1 = call %v4f32 @llvm.fma.v4f32(%v4f32 %a, %v4f32 %b, %v4f32 %c)
+  ret %v4f32 %1
+}
+; CHECK: test_v4f32.fabs:
+define %v4f32 @test_v4f32.fabs(%v4f32 %a) {
+  ; CHECK: fabs
+  %1 = call %v4f32 @llvm.fabs.v4f32(%v4f32 %a)
+  ret %v4f32 %1
+}
+; CHECK: test_v4f32.floor:
+define %v4f32 @test_v4f32.floor(%v4f32 %a) {
+  ; CHECK: frintm.4s
+  %1 = call %v4f32 @llvm.floor.v4f32(%v4f32 %a)
+  ret %v4f32 %1
+}
+; CHECK: test_v4f32.ceil:
+define %v4f32 @test_v4f32.ceil(%v4f32 %a) {
+  ; CHECK: frintp.4s
+  %1 = call %v4f32 @llvm.ceil.v4f32(%v4f32 %a)
+  ret %v4f32 %1
+}
+; CHECK: test_v4f32.trunc:
+define %v4f32 @test_v4f32.trunc(%v4f32 %a) {
+  ; CHECK: frintz.4s
+  %1 = call %v4f32 @llvm.trunc.v4f32(%v4f32 %a)
+  ret %v4f32 %1
+}
+; CHECK: test_v4f32.rint:
+define %v4f32 @test_v4f32.rint(%v4f32 %a) {
+  ; CHECK: frintx.4s
+  %1 = call %v4f32 @llvm.rint.v4f32(%v4f32 %a)
+  ret %v4f32 %1
+}
+; CHECK: test_v4f32.nearbyint:
+define %v4f32 @test_v4f32.nearbyint(%v4f32 %a) {
+  ; CHECK: frinti.4s
+  %1 = call %v4f32 @llvm.nearbyint.v4f32(%v4f32 %a)
+  ret %v4f32 %1
+}
+
+declare %v4f32 @llvm.sqrt.v4f32(%v4f32) #0
+declare %v4f32 @llvm.powi.v4f32(%v4f32, i32) #0
+declare %v4f32 @llvm.sin.v4f32(%v4f32) #0
+declare %v4f32 @llvm.cos.v4f32(%v4f32) #0
+declare %v4f32 @llvm.pow.v4f32(%v4f32, %v4f32) #0
+declare %v4f32 @llvm.exp.v4f32(%v4f32) #0
+declare %v4f32 @llvm.exp2.v4f32(%v4f32) #0
+declare %v4f32 @llvm.log.v4f32(%v4f32) #0
+declare %v4f32 @llvm.log10.v4f32(%v4f32) #0
+declare %v4f32 @llvm.log2.v4f32(%v4f32) #0
+declare %v4f32 @llvm.fma.v4f32(%v4f32, %v4f32, %v4f32) #0
+declare %v4f32 @llvm.fabs.v4f32(%v4f32) #0
+declare %v4f32 @llvm.floor.v4f32(%v4f32) #0
+declare %v4f32 @llvm.ceil.v4f32(%v4f32) #0
+declare %v4f32 @llvm.trunc.v4f32(%v4f32) #0
+declare %v4f32 @llvm.rint.v4f32(%v4f32) #0
+declare %v4f32 @llvm.nearbyint.v4f32(%v4f32) #0
+
+;;; Double vector
+
+%v2f64 = type <2 x double>
+; CHECK: test_v2f64.sqrt:
+define %v2f64 @test_v2f64.sqrt(%v2f64 %a) {
+  ; CHECK: fsqrt.2d
+  %1 = call %v2f64 @llvm.sqrt.v2f64(%v2f64 %a)
+  ret %v2f64 %1
+}
+; CHECK: test_v2f64.powi:
+define %v2f64 @test_v2f64.powi(%v2f64 %a, i32 %b) {
+  ; CHECK: pow
+  %1 = call %v2f64 @llvm.powi.v2f64(%v2f64 %a, i32 %b)
+  ret %v2f64 %1
+}
+; CHECK: test_v2f64.sin:
+define %v2f64 @test_v2f64.sin(%v2f64 %a) {
+  ; CHECK: sin
+  %1 = call %v2f64 @llvm.sin.v2f64(%v2f64 %a)
+  ret %v2f64 %1
+}
+; CHECK: test_v2f64.cos:
+define %v2f64 @test_v2f64.cos(%v2f64 %a) {
+  ; CHECK: cos
+  %1 = call %v2f64 @llvm.cos.v2f64(%v2f64 %a)
+  ret %v2f64 %1
+}
+; CHECK: test_v2f64.pow:
+define %v2f64 @test_v2f64.pow(%v2f64 %a, %v2f64 %b) {
+  ; CHECK: pow
+  %1 = call %v2f64 @llvm.pow.v2f64(%v2f64 %a, %v2f64 %b)
+  ret %v2f64 %1
+}
+; CHECK: test_v2f64.exp:
+define %v2f64 @test_v2f64.exp(%v2f64 %a) {
+  ; CHECK: exp
+  %1 = call %v2f64 @llvm.exp.v2f64(%v2f64 %a)
+  ret %v2f64 %1
+}
+; CHECK: test_v2f64.exp2:
+define %v2f64 @test_v2f64.exp2(%v2f64 %a) {
+  ; CHECK: exp
+  %1 = call %v2f64 @llvm.exp2.v2f64(%v2f64 %a)
+  ret %v2f64 %1
+}
+; CHECK: test_v2f64.log:
+define %v2f64 @test_v2f64.log(%v2f64 %a) {
+  ; CHECK: log
+  %1 = call %v2f64 @llvm.log.v2f64(%v2f64 %a)
+  ret %v2f64 %1
+}
+; CHECK: test_v2f64.log10:
+define %v2f64 @test_v2f64.log10(%v2f64 %a) {
+  ; CHECK: log
+  %1 = call %v2f64 @llvm.log10.v2f64(%v2f64 %a)
+  ret %v2f64 %1
+}
+; CHECK: test_v2f64.log2:
+define %v2f64 @test_v2f64.log2(%v2f64 %a) {
+  ; CHECK: log
+  %1 = call %v2f64 @llvm.log2.v2f64(%v2f64 %a)
+  ret %v2f64 %1
+}
+; CHECK: test_v2f64.fma:
+define %v2f64 @test_v2f64.fma(%v2f64 %a, %v2f64 %b, %v2f64 %c) {
+  ; CHECK: fma
+  %1 = call %v2f64 @llvm.fma.v2f64(%v2f64 %a, %v2f64 %b, %v2f64 %c)
+  ret %v2f64 %1
+}
+; CHECK: test_v2f64.fabs:
+define %v2f64 @test_v2f64.fabs(%v2f64 %a) {
+  ; CHECK: fabs
+  %1 = call %v2f64 @llvm.fabs.v2f64(%v2f64 %a)
+  ret %v2f64 %1
+}
+; CHECK: test_v2f64.floor:
+define %v2f64 @test_v2f64.floor(%v2f64 %a) {
+  ; CHECK: frintm.2d
+  %1 = call %v2f64 @llvm.floor.v2f64(%v2f64 %a)
+  ret %v2f64 %1
+}
+; CHECK: test_v2f64.ceil:
+define %v2f64 @test_v2f64.ceil(%v2f64 %a) {
+  ; CHECK: frintp.2d
+  %1 = call %v2f64 @llvm.ceil.v2f64(%v2f64 %a)
+  ret %v2f64 %1
+}
+; CHECK: test_v2f64.trunc:
+define %v2f64 @test_v2f64.trunc(%v2f64 %a) {
+  ; CHECK: frintz.2d
+  %1 = call %v2f64 @llvm.trunc.v2f64(%v2f64 %a)
+  ret %v2f64 %1
+}
+; CHECK: test_v2f64.rint:
+define %v2f64 @test_v2f64.rint(%v2f64 %a) {
+  ; CHECK: frintx.2d
+  %1 = call %v2f64 @llvm.rint.v2f64(%v2f64 %a)
+  ret %v2f64 %1
+}
+; CHECK: test_v2f64.nearbyint:
+define %v2f64 @test_v2f64.nearbyint(%v2f64 %a) {
+  ; CHECK: frinti.2d
+  %1 = call %v2f64 @llvm.nearbyint.v2f64(%v2f64 %a)
+  ret %v2f64 %1
+}
+
+declare %v2f64 @llvm.sqrt.v2f64(%v2f64) #0
+declare %v2f64 @llvm.powi.v2f64(%v2f64, i32) #0
+declare %v2f64 @llvm.sin.v2f64(%v2f64) #0
+declare %v2f64 @llvm.cos.v2f64(%v2f64) #0
+declare %v2f64 @llvm.pow.v2f64(%v2f64, %v2f64) #0
+declare %v2f64 @llvm.exp.v2f64(%v2f64) #0
+declare %v2f64 @llvm.exp2.v2f64(%v2f64) #0
+declare %v2f64 @llvm.log.v2f64(%v2f64) #0
+declare %v2f64 @llvm.log10.v2f64(%v2f64) #0
+declare %v2f64 @llvm.log2.v2f64(%v2f64) #0
+declare %v2f64 @llvm.fma.v2f64(%v2f64, %v2f64, %v2f64) #0
+declare %v2f64 @llvm.fabs.v2f64(%v2f64) #0
+declare %v2f64 @llvm.floor.v2f64(%v2f64) #0
+declare %v2f64 @llvm.ceil.v2f64(%v2f64) #0
+declare %v2f64 @llvm.trunc.v2f64(%v2f64) #0
+declare %v2f64 @llvm.rint.v2f64(%v2f64) #0
+declare %v2f64 @llvm.nearbyint.v2f64(%v2f64) #0
+
+attributes #0 = { nounwind readonly }
diff --git a/test/CodeGen/AArch64/arm64-vhadd.ll b/test/CodeGen/AArch64/arm64-vhadd.ll
new file mode 100644
index 0000000..6178bf9
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-vhadd.ll
@@ -0,0 +1,249 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+define <8 x i8> @shadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: shadd8b:
+;CHECK: shadd.8b
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @shadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: shadd16b:
+;CHECK: shadd.16b
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <4 x i16> @shadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: shadd4h:
+;CHECK: shadd.4h
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <8 x i16> @shadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: shadd8h:
+;CHECK: shadd.8h
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <2 x i32> @shadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: shadd2s:
+;CHECK: shadd.2s
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @shadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: shadd4s:
+;CHECK: shadd.4s
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <8 x i8> @uhadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: uhadd8b:
+;CHECK: uhadd.8b
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @uhadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: uhadd16b:
+;CHECK: uhadd.16b
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <4 x i16> @uhadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: uhadd4h:
+;CHECK: uhadd.4h
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <8 x i16> @uhadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: uhadd8h:
+;CHECK: uhadd.8h
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <2 x i32> @uhadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: uhadd2s:
+;CHECK: uhadd.2s
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @uhadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: uhadd4s:
+;CHECK: uhadd.4s
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+declare <8 x i8>  @llvm.aarch64.neon.shadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8>  @llvm.aarch64.neon.uhadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <8 x i8> @srhadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: srhadd8b:
+;CHECK: srhadd.8b
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @srhadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: srhadd16b:
+;CHECK: srhadd.16b
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.aarch64.neon.srhadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <4 x i16> @srhadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: srhadd4h:
+;CHECK: srhadd.4h
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <8 x i16> @srhadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: srhadd8h:
+;CHECK: srhadd.8h
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <2 x i32> @srhadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: srhadd2s:
+;CHECK: srhadd.2s
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @srhadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: srhadd4s:
+;CHECK: srhadd.4s
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <8 x i8> @urhadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: urhadd8b:
+;CHECK: urhadd.8b
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @urhadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: urhadd16b:
+;CHECK: urhadd.16b
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <4 x i16> @urhadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: urhadd4h:
+;CHECK: urhadd.4h
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <8 x i16> @urhadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: urhadd8h:
+;CHECK: urhadd.8h
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <2 x i32> @urhadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: urhadd2s:
+;CHECK: urhadd.2s
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @urhadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: urhadd4s:
+;CHECK: urhadd.4s
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+declare <8 x i8>  @llvm.aarch64.neon.srhadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8>  @llvm.aarch64.neon.urhadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.aarch64.neon.srhadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
diff --git a/test/CodeGen/AArch64/arm64-vhsub.ll b/test/CodeGen/AArch64/arm64-vhsub.ll
new file mode 100644
index 0000000..13bfda3
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-vhsub.ll
@@ -0,0 +1,125 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+define <8 x i8> @shsub8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: shsub8b:
+;CHECK: shsub.8b
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.aarch64.neon.shsub.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @shsub16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: shsub16b:
+;CHECK: shsub.16b
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.aarch64.neon.shsub.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <4 x i16> @shsub4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: shsub4h:
+;CHECK: shsub.4h
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.aarch64.neon.shsub.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <8 x i16> @shsub8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: shsub8h:
+;CHECK: shsub.8h
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.aarch64.neon.shsub.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <2 x i32> @shsub2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: shsub2s:
+;CHECK: shsub.2s
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.shsub.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @shsub4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: shsub4s:
+;CHECK: shsub.4s
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.shsub.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <8 x i8> @uhsub8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: uhsub8b:
+;CHECK: uhsub.8b
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.aarch64.neon.uhsub.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @uhsub16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: uhsub16b:
+;CHECK: uhsub.16b
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.aarch64.neon.uhsub.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <4 x i16> @uhsub4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: uhsub4h:
+;CHECK: uhsub.4h
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.aarch64.neon.uhsub.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <8 x i16> @uhsub8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: uhsub8h:
+;CHECK: uhsub.8h
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.aarch64.neon.uhsub.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <2 x i32> @uhsub2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: uhsub2s:
+;CHECK: uhsub.2s
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.uhsub.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @uhsub4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: uhsub4s:
+;CHECK: uhsub.4s
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.uhsub.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+declare <8 x i8>  @llvm.aarch64.neon.shsub.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.shsub.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.shsub.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8>  @llvm.aarch64.neon.uhsub.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.uhsub.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.uhsub.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.aarch64.neon.shsub.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.shsub.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.shsub.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.aarch64.neon.uhsub.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.uhsub.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.uhsub.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM64/virtual_base.ll b/test/CodeGen/AArch64/arm64-virtual_base.ll
index cb95954..cb95954 100644
--- a/test/CodeGen/ARM64/virtual_base.ll
+++ b/test/CodeGen/AArch64/arm64-virtual_base.ll
diff --git a/test/CodeGen/AArch64/arm64-vmax.ll b/test/CodeGen/AArch64/arm64-vmax.ll
new file mode 100644
index 0000000..3f2c134
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-vmax.ll
@@ -0,0 +1,679 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+define <8 x i8> @smax_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: smax_8b:
+;CHECK: smax.8b
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @smax_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: smax_16b:
+;CHECK: smax.16b
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <4 x i16> @smax_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: smax_4h:
+;CHECK: smax.4h
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <8 x i16> @smax_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: smax_8h:
+;CHECK: smax.8h
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <2 x i32> @smax_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: smax_2s:
+;CHECK: smax.2s
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @smax_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: smax_4s:
+;CHECK: smax.4s
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+declare <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <8 x i8> @umax_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: umax_8b:
+;CHECK: umax.8b
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @umax_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: umax_16b:
+;CHECK: umax.16b
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <4 x i16> @umax_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: umax_4h:
+;CHECK: umax.4h
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <8 x i16> @umax_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: umax_8h:
+;CHECK: umax.8h
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <2 x i32> @umax_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: umax_2s:
+;CHECK: umax.2s
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @umax_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: umax_4s:
+;CHECK: umax.4s
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+declare <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <8 x i8> @smin_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: smin_8b:
+;CHECK: smin.8b
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @smin_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: smin_16b:
+;CHECK: smin.16b
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <4 x i16> @smin_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: smin_4h:
+;CHECK: smin.4h
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <8 x i16> @smin_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: smin_8h:
+;CHECK: smin.8h
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <2 x i32> @smin_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: smin_2s:
+;CHECK: smin.2s
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @smin_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: smin_4s:
+;CHECK: smin.4s
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+declare <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <8 x i8> @umin_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: umin_8b:
+;CHECK: umin.8b
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @umin_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: umin_16b:
+;CHECK: umin.16b
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <4 x i16> @umin_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: umin_4h:
+;CHECK: umin.4h
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <8 x i16> @umin_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: umin_8h:
+;CHECK: umin.8h
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <2 x i32> @umin_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: umin_2s:
+;CHECK: umin.2s
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @umin_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: umin_4s:
+;CHECK: umin.4s
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+declare <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+define <8 x i8> @smaxp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: smaxp_8b:
+;CHECK: smaxp.8b
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @smaxp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: smaxp_16b:
+;CHECK: smaxp.16b
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <4 x i16> @smaxp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: smaxp_4h:
+;CHECK: smaxp.4h
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <8 x i16> @smaxp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: smaxp_8h:
+;CHECK: smaxp.8h
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <2 x i32> @smaxp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: smaxp_2s:
+;CHECK: smaxp.2s
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @smaxp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: smaxp_4s:
+;CHECK: smaxp.4s
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+declare <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <8 x i8> @umaxp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: umaxp_8b:
+;CHECK: umaxp.8b
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @umaxp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: umaxp_16b:
+;CHECK: umaxp.16b
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <4 x i16> @umaxp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: umaxp_4h:
+;CHECK: umaxp.4h
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <8 x i16> @umaxp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: umaxp_8h:
+;CHECK: umaxp.8h
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <2 x i32> @umaxp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: umaxp_2s:
+;CHECK: umaxp.2s
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @umaxp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: umaxp_4s:
+;CHECK: umaxp.4s
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+declare <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+define <8 x i8> @sminp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: sminp_8b:
+;CHECK: sminp.8b
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @sminp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: sminp_16b:
+;CHECK: sminp.16b
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <4 x i16> @sminp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: sminp_4h:
+;CHECK: sminp.4h
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <8 x i16> @sminp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: sminp_8h:
+;CHECK: sminp.8h
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <2 x i32> @sminp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: sminp_2s:
+;CHECK: sminp.2s
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @sminp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: sminp_4s:
+;CHECK: sminp.4s
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+declare <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <8 x i8> @uminp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: uminp_8b:
+;CHECK: uminp.8b
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @uminp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: uminp_16b:
+;CHECK: uminp.16b
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <4 x i16> @uminp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: uminp_4h:
+;CHECK: uminp.4h
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <8 x i16> @uminp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: uminp_8h:
+;CHECK: uminp.8h
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <2 x i32> @uminp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: uminp_2s:
+;CHECK: uminp.2s
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @uminp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: uminp_4s:
+;CHECK: uminp.4s
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+declare <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x float> @fmax_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK-LABEL: fmax_2s:
+;CHECK: fmax.2s
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x float> %tmp3
+}
+
+define <4 x float> @fmax_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK-LABEL: fmax_4s:
+;CHECK: fmax.4s
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = call <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	ret <4 x float> %tmp3
+}
+
+define <2 x double> @fmax_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
+;CHECK-LABEL: fmax_2d:
+;CHECK: fmax.2d
+	%tmp1 = load <2 x double>* %A
+	%tmp2 = load <2 x double>* %B
+	%tmp3 = call <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
+	ret <2 x double> %tmp3
+}
+
+declare <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double>, <2 x double>) nounwind readnone
+
+define <2 x float> @fmaxp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK-LABEL: fmaxp_2s:
+;CHECK: fmaxp.2s
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x float> %tmp3
+}
+
+define <4 x float> @fmaxp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK-LABEL: fmaxp_4s:
+;CHECK: fmaxp.4s
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = call <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	ret <4 x float> %tmp3
+}
+
+define <2 x double> @fmaxp_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
+;CHECK-LABEL: fmaxp_2d:
+;CHECK: fmaxp.2d
+	%tmp1 = load <2 x double>* %A
+	%tmp2 = load <2 x double>* %B
+	%tmp3 = call <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
+	ret <2 x double> %tmp3
+}
+
+declare <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double>, <2 x double>) nounwind readnone
+
+define <2 x float> @fmin_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK-LABEL: fmin_2s:
+;CHECK: fmin.2s
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x float> %tmp3
+}
+
+define <4 x float> @fmin_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK-LABEL: fmin_4s:
+;CHECK: fmin.4s
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = call <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	ret <4 x float> %tmp3
+}
+
+define <2 x double> @fmin_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
+;CHECK-LABEL: fmin_2d:
+;CHECK: fmin.2d
+	%tmp1 = load <2 x double>* %A
+	%tmp2 = load <2 x double>* %B
+	%tmp3 = call <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
+	ret <2 x double> %tmp3
+}
+
+declare <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double>, <2 x double>) nounwind readnone
+
+define <2 x float> @fminp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK-LABEL: fminp_2s:
+;CHECK: fminp.2s
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x float> %tmp3
+}
+
+define <4 x float> @fminp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK-LABEL: fminp_4s:
+;CHECK: fminp.4s
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = call <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	ret <4 x float> %tmp3
+}
+
+define <2 x double> @fminp_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
+;CHECK-LABEL: fminp_2d:
+;CHECK: fminp.2d
+	%tmp1 = load <2 x double>* %A
+	%tmp2 = load <2 x double>* %B
+	%tmp3 = call <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
+	ret <2 x double> %tmp3
+}
+
+declare <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double>, <2 x double>) nounwind readnone
+
+define <2 x float> @fminnmp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK-LABEL: fminnmp_2s:
+;CHECK: fminnmp.2s
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x float> %tmp3
+}
+
+define <4 x float> @fminnmp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK-LABEL: fminnmp_4s:
+;CHECK: fminnmp.4s
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = call <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	ret <4 x float> %tmp3
+}
+
+define <2 x double> @fminnmp_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
+;CHECK-LABEL: fminnmp_2d:
+;CHECK: fminnmp.2d
+	%tmp1 = load <2 x double>* %A
+	%tmp2 = load <2 x double>* %B
+	%tmp3 = call <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
+	ret <2 x double> %tmp3
+}
+
+declare <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double>, <2 x double>) nounwind readnone
+
+define <2 x float> @fmaxnmp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK-LABEL: fmaxnmp_2s:
+;CHECK: fmaxnmp.2s
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x float> %tmp3
+}
+
+define <4 x float> @fmaxnmp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK-LABEL: fmaxnmp_4s:
+;CHECK: fmaxnmp.4s
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = call <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	ret <4 x float> %tmp3
+}
+
+define <2 x double> @fmaxnmp_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
+;CHECK-LABEL: fmaxnmp_2d:
+;CHECK: fmaxnmp.2d
+	%tmp1 = load <2 x double>* %A
+	%tmp2 = load <2 x double>* %B
+	%tmp3 = call <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
+	ret <2 x double> %tmp3
+}
+
+declare <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double>, <2 x double>) nounwind readnone
diff --git a/test/CodeGen/AArch64/arm64-vminmaxnm.ll b/test/CodeGen/AArch64/arm64-vminmaxnm.ll
new file mode 100644
index 0000000..b5aca45
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-vminmaxnm.ll
@@ -0,0 +1,68 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+define <2 x float> @f1(<2 x float> %a, <2 x float> %b) nounwind readnone ssp {
+; CHECK: fmaxnm.2s	v0, v0, v1
+; CHECK: ret
+  %vmaxnm2.i = tail call <2 x float> @llvm.aarch64.neon.fmaxnm.v2f32(<2 x float> %a, <2 x float> %b) nounwind
+  ret <2 x float> %vmaxnm2.i
+}
+
+define <4 x float> @f2(<4 x float> %a, <4 x float> %b) nounwind readnone ssp {
+; CHECK: fmaxnm.4s	v0, v0, v1
+; CHECK: ret
+  %vmaxnm2.i = tail call <4 x float> @llvm.aarch64.neon.fmaxnm.v4f32(<4 x float> %a, <4 x float> %b) nounwind
+  ret <4 x float> %vmaxnm2.i
+}
+
+define <2 x double> @f3(<2 x double> %a, <2 x double> %b) nounwind readnone ssp {
+; CHECK: fmaxnm.2d	v0, v0, v1
+; CHECK: ret
+  %vmaxnm2.i = tail call <2 x double> @llvm.aarch64.neon.fmaxnm.v2f64(<2 x double> %a, <2 x double> %b) nounwind
+  ret <2 x double> %vmaxnm2.i
+}
+
+define <2 x float> @f4(<2 x float> %a, <2 x float> %b) nounwind readnone ssp {
+; CHECK: fminnm.2s	v0, v0, v1
+; CHECK: ret
+  %vminnm2.i = tail call <2 x float> @llvm.aarch64.neon.fminnm.v2f32(<2 x float> %a, <2 x float> %b) nounwind
+  ret <2 x float> %vminnm2.i
+}
+
+define <4 x float> @f5(<4 x float> %a, <4 x float> %b) nounwind readnone ssp {
+; CHECK: fminnm.4s	v0, v0, v1
+; CHECK: ret
+  %vminnm2.i = tail call <4 x float> @llvm.aarch64.neon.fminnm.v4f32(<4 x float> %a, <4 x float> %b) nounwind
+  ret <4 x float> %vminnm2.i
+}
+
+define <2 x double> @f6(<2 x double> %a, <2 x double> %b) nounwind readnone ssp {
+; CHECK: fminnm.2d	v0, v0, v1
+; CHECK: ret
+  %vminnm2.i = tail call <2 x double> @llvm.aarch64.neon.fminnm.v2f64(<2 x double> %a, <2 x double> %b) nounwind
+  ret <2 x double> %vminnm2.i
+}
+
+declare <2 x double> @llvm.aarch64.neon.fminnm.v2f64(<2 x double>, <2 x double>) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.fminnm.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <2 x float> @llvm.aarch64.neon.fminnm.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.fmaxnm.v2f64(<2 x double>, <2 x double>) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.fmaxnm.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <2 x float> @llvm.aarch64.neon.fmaxnm.v2f32(<2 x float>, <2 x float>) nounwind readnone
+
+
+define double @test_fmaxnmv(<2 x double> %in) {
+; CHECK-LABEL: test_fmaxnmv:
+; CHECK: fmaxnmp.2d d0, v0
+  %max = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> %in)
+  ret double %max
+}
+
+define double @test_fminnmv(<2 x double> %in) {
+; CHECK-LABEL: test_fminnmv:
+; CHECK: fminnmp.2d d0, v0
+  %min = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> %in)
+  ret double %min
+}
+
+declare double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double>)
+declare double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double>)
diff --git a/test/CodeGen/AArch64/arm64-vmovn.ll b/test/CodeGen/AArch64/arm64-vmovn.ll
new file mode 100644
index 0000000..67e2816
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-vmovn.ll
@@ -0,0 +1,242 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+define <8 x i8> @xtn8b(<8 x i16> %A) nounwind {
+;CHECK-LABEL: xtn8b:
+;CHECK-NOT: ld1
+;CHECK: xtn.8b v0, v0
+;CHECK-NEXT: ret
+  %tmp3 = trunc <8 x i16> %A to <8 x i8>
+        ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @xtn4h(<4 x i32> %A) nounwind {
+;CHECK-LABEL: xtn4h:
+;CHECK-NOT: ld1
+;CHECK: xtn.4h v0, v0
+;CHECK-NEXT: ret
+  %tmp3 = trunc <4 x i32> %A to <4 x i16>
+        ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @xtn2s(<2 x i64> %A) nounwind {
+;CHECK-LABEL: xtn2s:
+;CHECK-NOT: ld1
+;CHECK: xtn.2s v0, v0
+;CHECK-NEXT: ret
+  %tmp3 = trunc <2 x i64> %A to <2 x i32>
+        ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @xtn2_16b(<8 x i8> %ret, <8 x i16> %A) nounwind {
+;CHECK-LABEL: xtn2_16b:
+;CHECK-NOT: ld1
+;CHECK: xtn2.16b v0, v1
+;CHECK-NEXT: ret
+        %tmp3 = trunc <8 x i16> %A to <8 x i8>
+        %res = shufflevector <8 x i8> %ret, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+        ret <16 x i8> %res
+}
+
+define <8 x i16> @xtn2_8h(<4 x i16> %ret, <4 x i32> %A) nounwind {
+;CHECK-LABEL: xtn2_8h:
+;CHECK-NOT: ld1
+;CHECK: xtn2.8h v0, v1
+;CHECK-NEXT: ret
+        %tmp3 = trunc <4 x i32> %A to <4 x i16>
+        %res = shufflevector <4 x i16> %ret, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+        ret <8 x i16> %res
+}
+
+define <4 x i32> @xtn2_4s(<2 x i32> %ret, <2 x i64> %A) nounwind {
+;CHECK-LABEL: xtn2_4s:
+;CHECK-NOT: ld1
+;CHECK: xtn2.4s v0, v1
+;CHECK-NEXT: ret
+        %tmp3 = trunc <2 x i64> %A to <2 x i32>
+        %res = shufflevector <2 x i32> %ret, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+        ret <4 x i32> %res
+}
+
+define <8 x i8> @sqxtn8b(<8 x i16> %A) nounwind {
+;CHECK-LABEL: sqxtn8b:
+;CHECK-NOT: ld1
+;CHECK: sqxtn.8b v0, v0
+;CHECK-NEXT: ret
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> %A)
+        ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @sqxtn4h(<4 x i32> %A) nounwind {
+;CHECK-LABEL: sqxtn4h:
+;CHECK-NOT: ld1
+;CHECK: sqxtn.4h v0, v0
+;CHECK-NEXT: ret
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> %A)
+        ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @sqxtn2s(<2 x i64> %A) nounwind {
+;CHECK-LABEL: sqxtn2s:
+;CHECK-NOT: ld1
+;CHECK: sqxtn.2s v0, v0
+;CHECK-NEXT: ret
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqxtn.v2i32(<2 x i64> %A)
+        ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @sqxtn2_16b(<8 x i8> %ret, <8 x i16> %A) nounwind {
+;CHECK-LABEL: sqxtn2_16b:
+;CHECK-NOT: ld1
+;CHECK: sqxtn2.16b v0, v1
+;CHECK-NEXT: ret
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> %A)
+        %res = shufflevector <8 x i8> %ret, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+        ret <16 x i8> %res
+}
+
+define <8 x i16> @sqxtn2_8h(<4 x i16> %ret, <4 x i32> %A) nounwind {
+;CHECK-LABEL: sqxtn2_8h:
+;CHECK-NOT: ld1
+;CHECK: sqxtn2.8h v0, v1
+;CHECK-NEXT: ret
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> %A)
+        %res = shufflevector <4 x i16> %ret, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+        ret <8 x i16> %res
+}
+
+define <4 x i32> @sqxtn2_4s(<2 x i32> %ret, <2 x i64> %A) nounwind {
+;CHECK-LABEL: sqxtn2_4s:
+;CHECK-NOT: ld1
+;CHECK: sqxtn2.4s v0, v1
+;CHECK-NEXT: ret
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqxtn.v2i32(<2 x i64> %A)
+        %res = shufflevector <2 x i32> %ret, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+        ret <4 x i32> %res
+}
+
+declare <8 x i8>  @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqxtn.v2i32(<2 x i64>) nounwind readnone
+
+define <8 x i8> @uqxtn8b(<8 x i16> %A) nounwind {
+;CHECK-LABEL: uqxtn8b:
+;CHECK-NOT: ld1
+;CHECK: uqxtn.8b v0, v0
+;CHECK-NEXT: ret
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> %A)
+        ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @uqxtn4h(<4 x i32> %A) nounwind {
+;CHECK-LABEL: uqxtn4h:
+;CHECK-NOT: ld1
+;CHECK: uqxtn.4h v0, v0
+;CHECK-NEXT: ret
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> %A)
+        ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @uqxtn2s(<2 x i64> %A) nounwind {
+;CHECK-LABEL: uqxtn2s:
+;CHECK-NOT: ld1
+;CHECK: uqxtn.2s v0, v0
+;CHECK-NEXT: ret
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqxtn.v2i32(<2 x i64> %A)
+        ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @uqxtn2_16b(<8 x i8> %ret, <8 x i16> %A) nounwind {
+;CHECK-LABEL: uqxtn2_16b:
+;CHECK-NOT: ld1
+;CHECK: uqxtn2.16b v0, v1
+;CHECK-NEXT: ret
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> %A)
+        %res = shufflevector <8 x i8> %ret, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+        ret <16 x i8> %res
+}
+
+define <8 x i16> @uqxtn2_8h(<4 x i16> %ret, <4 x i32> %A) nounwind {
+;CHECK-LABEL: uqxtn2_8h:
+;CHECK-NOT: ld1
+;CHECK: uqxtn2.8h v0, v1
+;CHECK-NEXT: ret
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> %A)
+        %res = shufflevector <4 x i16> %ret, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+        ret <8 x i16> %res
+}
+
+define <4 x i32> @uqxtn2_4s(<2 x i32> %ret, <2 x i64> %A) nounwind {
+;CHECK-LABEL: uqxtn2_4s:
+;CHECK-NOT: ld1
+;CHECK: uqxtn2.4s v0, v1
+;CHECK-NEXT: ret
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqxtn.v2i32(<2 x i64> %A)
+        %res = shufflevector <2 x i32> %ret, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+        ret <4 x i32> %res
+}
+
+declare <8 x i8>  @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.uqxtn.v2i32(<2 x i64>) nounwind readnone
+
+define <8 x i8> @sqxtun8b(<8 x i16> %A) nounwind {
+;CHECK-LABEL: sqxtun8b:
+;CHECK-NOT: ld1
+;CHECK: sqxtun.8b v0, v0
+;CHECK-NEXT: ret
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> %A)
+        ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @sqxtun4h(<4 x i32> %A) nounwind {
+;CHECK-LABEL: sqxtun4h:
+;CHECK-NOT: ld1
+;CHECK: sqxtun.4h v0, v0
+;CHECK-NEXT: ret
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> %A)
+        ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @sqxtun2s(<2 x i64> %A) nounwind {
+;CHECK-LABEL: sqxtun2s:
+;CHECK-NOT: ld1
+;CHECK: sqxtun.2s v0, v0
+;CHECK-NEXT: ret
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqxtun.v2i32(<2 x i64> %A)
+        ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @sqxtun2_16b(<8 x i8> %ret, <8 x i16> %A) nounwind {
+;CHECK-LABEL: sqxtun2_16b:
+;CHECK-NOT: ld1
+;CHECK: sqxtun2.16b v0, v1
+;CHECK-NEXT: ret
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> %A)
+        %res = shufflevector <8 x i8> %ret, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+        ret <16 x i8> %res
+}
+
+define <8 x i16> @sqxtun2_8h(<4 x i16> %ret, <4 x i32> %A) nounwind {
+;CHECK-LABEL: sqxtun2_8h:
+;CHECK-NOT: ld1
+;CHECK: sqxtun2.8h v0, v1
+;CHECK-NEXT: ret
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> %A)
+        %res = shufflevector <4 x i16> %ret, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+        ret <8 x i16> %res
+}
+
+define <4 x i32> @sqxtun2_4s(<2 x i32> %ret, <2 x i64> %A) nounwind {
+;CHECK-LABEL: sqxtun2_4s:
+;CHECK-NOT: ld1
+;CHECK: sqxtun2.4s v0, v1
+;CHECK-NEXT: ret
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqxtun.v2i32(<2 x i64> %A)
+        %res = shufflevector <2 x i32> %ret, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+        ret <4 x i32> %res
+}
+
+declare <8 x i8>  @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqxtun.v2i32(<2 x i64>) nounwind readnone
+
diff --git a/test/CodeGen/AArch64/arm64-vmul.ll b/test/CodeGen/AArch64/arm64-vmul.ll
new file mode 100644
index 0000000..6fa60fe
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-vmul.ll
@@ -0,0 +1,2036 @@
+; RUN: llc -asm-verbose=false < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+
+define <8 x i16> @smull8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: smull8h:
+;CHECK: smull.8h
+  %tmp1 = load <8 x i8>* %A
+  %tmp2 = load <8 x i8>* %B
+  %tmp3 = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+  ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @smull4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: smull4s:
+;CHECK: smull.4s
+  %tmp1 = load <4 x i16>* %A
+  %tmp2 = load <4 x i16>* %B
+  %tmp3 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @smull2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: smull2d:
+;CHECK: smull.2d
+  %tmp1 = load <2 x i32>* %A
+  %tmp2 = load <2 x i32>* %B
+  %tmp3 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  ret <2 x i64> %tmp3
+}
+
+declare <8 x i16>  @llvm.aarch64.neon.smull.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+define <8 x i16> @umull8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: umull8h:
+;CHECK: umull.8h
+  %tmp1 = load <8 x i8>* %A
+  %tmp2 = load <8 x i8>* %B
+  %tmp3 = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+  ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @umull4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: umull4s:
+;CHECK: umull.4s
+  %tmp1 = load <4 x i16>* %A
+  %tmp2 = load <4 x i16>* %B
+  %tmp3 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @umull2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: umull2d:
+;CHECK: umull.2d
+  %tmp1 = load <2 x i32>* %A
+  %tmp2 = load <2 x i32>* %B
+  %tmp3 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  ret <2 x i64> %tmp3
+}
+
+declare <8 x i16>  @llvm.aarch64.neon.umull.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+define <4 x i32> @sqdmull4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: sqdmull4s:
+;CHECK: sqdmull.4s
+  %tmp1 = load <4 x i16>* %A
+  %tmp2 = load <4 x i16>* %B
+  %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @sqdmull2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: sqdmull2d:
+;CHECK: sqdmull.2d
+  %tmp1 = load <2 x i32>* %A
+  %tmp2 = load <2 x i32>* %B
+  %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  ret <2 x i64> %tmp3
+}
+
+define <4 x i32> @sqdmull2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: sqdmull2_4s:
+;CHECK: sqdmull2.4s
+  %load1 = load <8 x i16>* %A
+  %load2 = load <8 x i16>* %B
+  %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @sqdmull2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: sqdmull2_2d:
+;CHECK: sqdmull2.2d
+  %load1 = load <4 x i32>* %A
+  %load2 = load <4 x i32>* %B
+  %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  ret <2 x i64> %tmp3
+}
+
+
+declare <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+define <8 x i16> @pmull8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: pmull8h:
+;CHECK: pmull.8h
+  %tmp1 = load <8 x i8>* %A
+  %tmp2 = load <8 x i8>* %B
+  %tmp3 = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+  ret <8 x i16> %tmp3
+}
+
+declare <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+
+define <4 x i16> @sqdmulh_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: sqdmulh_4h:
+;CHECK: sqdmulh.4h
+  %tmp1 = load <4 x i16>* %A
+  %tmp2 = load <4 x i16>* %B
+  %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  ret <4 x i16> %tmp3
+}
+
+define <8 x i16> @sqdmulh_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: sqdmulh_8h:
+;CHECK: sqdmulh.8h
+  %tmp1 = load <8 x i16>* %A
+  %tmp2 = load <8 x i16>* %B
+  %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+  ret <8 x i16> %tmp3
+}
+
+define <2 x i32> @sqdmulh_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: sqdmulh_2s:
+;CHECK: sqdmulh.2s
+  %tmp1 = load <2 x i32>* %A
+  %tmp2 = load <2 x i32>* %B
+  %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @sqdmulh_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: sqdmulh_4s:
+;CHECK: sqdmulh.4s
+  %tmp1 = load <4 x i32>* %A
+  %tmp2 = load <4 x i32>* %B
+  %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+  ret <4 x i32> %tmp3
+}
+
+define i32 @sqdmulh_1s(i32* %A, i32* %B) nounwind {
+;CHECK-LABEL: sqdmulh_1s:
+;CHECK: sqdmulh s0, {{s[0-9]+}}, {{s[0-9]+}}
+  %tmp1 = load i32* %A
+  %tmp2 = load i32* %B
+  %tmp3 = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %tmp1, i32 %tmp2)
+  ret i32 %tmp3
+}
+
+declare <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare i32 @llvm.aarch64.neon.sqdmulh.i32(i32, i32) nounwind readnone
+
+define <4 x i16> @sqrdmulh_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: sqrdmulh_4h:
+;CHECK: sqrdmulh.4h
+  %tmp1 = load <4 x i16>* %A
+  %tmp2 = load <4 x i16>* %B
+  %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  ret <4 x i16> %tmp3
+}
+
+define <8 x i16> @sqrdmulh_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: sqrdmulh_8h:
+;CHECK: sqrdmulh.8h
+  %tmp1 = load <8 x i16>* %A
+  %tmp2 = load <8 x i16>* %B
+  %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+  ret <8 x i16> %tmp3
+}
+
+define <2 x i32> @sqrdmulh_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: sqrdmulh_2s:
+;CHECK: sqrdmulh.2s
+  %tmp1 = load <2 x i32>* %A
+  %tmp2 = load <2 x i32>* %B
+  %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @sqrdmulh_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: sqrdmulh_4s:
+;CHECK: sqrdmulh.4s
+  %tmp1 = load <4 x i32>* %A
+  %tmp2 = load <4 x i32>* %B
+  %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+  ret <4 x i32> %tmp3
+}
+
+define i32 @sqrdmulh_1s(i32* %A, i32* %B) nounwind {
+;CHECK-LABEL: sqrdmulh_1s:
+;CHECK: sqrdmulh s0, {{s[0-9]+}}, {{s[0-9]+}}
+  %tmp1 = load i32* %A
+  %tmp2 = load i32* %B
+  %tmp3 = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %tmp1, i32 %tmp2)
+  ret i32 %tmp3
+}
+
+declare <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare i32 @llvm.aarch64.neon.sqrdmulh.i32(i32, i32) nounwind readnone
+
+define <2 x float> @fmulx_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK-LABEL: fmulx_2s:
+;CHECK: fmulx.2s
+  %tmp1 = load <2 x float>* %A
+  %tmp2 = load <2 x float>* %B
+  %tmp3 = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+  ret <2 x float> %tmp3
+}
+
+define <4 x float> @fmulx_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK-LABEL: fmulx_4s:
+;CHECK: fmulx.4s
+  %tmp1 = load <4 x float>* %A
+  %tmp2 = load <4 x float>* %B
+  %tmp3 = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+  ret <4 x float> %tmp3
+}
+
+define <2 x double> @fmulx_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
+;CHECK-LABEL: fmulx_2d:
+;CHECK: fmulx.2d
+  %tmp1 = load <2 x double>* %A
+  %tmp2 = load <2 x double>* %B
+  %tmp3 = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
+  ret <2 x double> %tmp3
+}
+
+declare <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double>, <2 x double>) nounwind readnone
+
+define <4 x i32> @smlal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
+;CHECK-LABEL: smlal4s:
+;CHECK: smlal.4s
+  %tmp1 = load <4 x i16>* %A
+  %tmp2 = load <4 x i16>* %B
+  %tmp3 = load <4 x i32>* %C
+  %tmp4 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  %tmp5 = add <4 x i32> %tmp3, %tmp4
+  ret <4 x i32> %tmp5
+}
+
+define <2 x i64> @smlal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
+;CHECK-LABEL: smlal2d:
+;CHECK: smlal.2d
+  %tmp1 = load <2 x i32>* %A
+  %tmp2 = load <2 x i32>* %B
+  %tmp3 = load <2 x i64>* %C
+  %tmp4 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  %tmp5 = add <2 x i64> %tmp3, %tmp4
+  ret <2 x i64> %tmp5
+}
+
+define <4 x i32> @smlsl4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
+;CHECK-LABEL: smlsl4s:
+;CHECK: smlsl.4s
+  %tmp1 = load <4 x i16>* %A
+  %tmp2 = load <4 x i16>* %B
+  %tmp3 = load <4 x i32>* %C
+  %tmp4 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  %tmp5 = sub <4 x i32> %tmp3, %tmp4
+  ret <4 x i32> %tmp5
+}
+
+define <2 x i64> @smlsl2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
+;CHECK-LABEL: smlsl2d:
+;CHECK: smlsl.2d
+  %tmp1 = load <2 x i32>* %A
+  %tmp2 = load <2 x i32>* %B
+  %tmp3 = load <2 x i64>* %C
+  %tmp4 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  %tmp5 = sub <2 x i64> %tmp3, %tmp4
+  ret <2 x i64> %tmp5
+}
+
+declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>)
+declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>)
+declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>)
+declare <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>)
+
+define <4 x i32> @sqdmlal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
+;CHECK-LABEL: sqdmlal4s:
+;CHECK: sqdmlal.4s
+  %tmp1 = load <4 x i16>* %A
+  %tmp2 = load <4 x i16>* %B
+  %tmp3 = load <4 x i32>* %C
+  %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp4)
+  ret <4 x i32> %tmp5
+}
+
+define <2 x i64> @sqdmlal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
+;CHECK-LABEL: sqdmlal2d:
+;CHECK: sqdmlal.2d
+  %tmp1 = load <2 x i32>* %A
+  %tmp2 = load <2 x i32>* %B
+  %tmp3 = load <2 x i64>* %C
+  %tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp4)
+  ret <2 x i64> %tmp5
+}
+
+define <4 x i32> @sqdmlal2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwind {
+;CHECK-LABEL: sqdmlal2_4s:
+;CHECK: sqdmlal2.4s
+  %load1 = load <8 x i16>* %A
+  %load2 = load <8 x i16>* %B
+  %tmp3 = load <4 x i32>* %C
+  %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp4)
+  ret <4 x i32> %tmp5
+}
+
+define <2 x i64> @sqdmlal2_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwind {
+;CHECK-LABEL: sqdmlal2_2d:
+;CHECK: sqdmlal2.2d
+  %load1 = load <4 x i32>* %A
+  %load2 = load <4 x i32>* %B
+  %tmp3 = load <2 x i64>* %C
+  %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp4)
+  ret <2 x i64> %tmp5
+}
+
+define <4 x i32> @sqdmlsl4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
+;CHECK-LABEL: sqdmlsl4s:
+;CHECK: sqdmlsl.4s
+  %tmp1 = load <4 x i16>* %A
+  %tmp2 = load <4 x i16>* %B
+  %tmp3 = load <4 x i32>* %C
+  %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp4)
+  ret <4 x i32> %tmp5
+}
+
+define <2 x i64> @sqdmlsl2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
+;CHECK-LABEL: sqdmlsl2d:
+;CHECK: sqdmlsl.2d
+  %tmp1 = load <2 x i32>* %A
+  %tmp2 = load <2 x i32>* %B
+  %tmp3 = load <2 x i64>* %C
+  %tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp4)
+  ret <2 x i64> %tmp5
+}
+
+define <4 x i32> @sqdmlsl2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwind {
+;CHECK-LABEL: sqdmlsl2_4s:
+;CHECK: sqdmlsl2.4s
+  %load1 = load <8 x i16>* %A
+  %load2 = load <8 x i16>* %B
+  %tmp3 = load <4 x i32>* %C
+  %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp4)
+  ret <4 x i32> %tmp5
+}
+
+define <2 x i64> @sqdmlsl2_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwind {
+;CHECK-LABEL: sqdmlsl2_2d:
+;CHECK: sqdmlsl2.2d
+  %load1 = load <4 x i32>* %A
+  %load2 = load <4 x i32>* %B
+  %tmp3 = load <2 x i64>* %C
+  %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp4)
+  ret <2 x i64> %tmp5
+}
+
+define <4 x i32> @umlal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
+;CHECK-LABEL: umlal4s:
+;CHECK: umlal.4s
+  %tmp1 = load <4 x i16>* %A
+  %tmp2 = load <4 x i16>* %B
+  %tmp3 = load <4 x i32>* %C
+  %tmp4 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  %tmp5 = add <4 x i32> %tmp3, %tmp4
+  ret <4 x i32> %tmp5
+}
+
+define <2 x i64> @umlal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
+;CHECK-LABEL: umlal2d:
+;CHECK: umlal.2d
+  %tmp1 = load <2 x i32>* %A
+  %tmp2 = load <2 x i32>* %B
+  %tmp3 = load <2 x i64>* %C
+  %tmp4 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  %tmp5 = add <2 x i64> %tmp3, %tmp4
+  ret <2 x i64> %tmp5
+}
+
+define <4 x i32> @umlsl4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
+;CHECK-LABEL: umlsl4s:
+;CHECK: umlsl.4s
+  %tmp1 = load <4 x i16>* %A
+  %tmp2 = load <4 x i16>* %B
+  %tmp3 = load <4 x i32>* %C
+  %tmp4 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  %tmp5 = sub <4 x i32> %tmp3, %tmp4
+  ret <4 x i32> %tmp5
+}
+
+define <2 x i64> @umlsl2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
+;CHECK-LABEL: umlsl2d:
+;CHECK: umlsl.2d
+  %tmp1 = load <2 x i32>* %A
+  %tmp2 = load <2 x i32>* %B
+  %tmp3 = load <2 x i64>* %C
+  %tmp4 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  %tmp5 = sub <2 x i64> %tmp3, %tmp4
+  ret <2 x i64> %tmp5
+}
+
+define <2 x float> @fmla_2s(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) nounwind {
+;CHECK-LABEL: fmla_2s:
+;CHECK: fmla.2s
+  %tmp1 = load <2 x float>* %A
+  %tmp2 = load <2 x float>* %B
+  %tmp3 = load <2 x float>* %C
+  %tmp4 = call <2 x float> @llvm.fma.v2f32(<2 x float> %tmp1, <2 x float> %tmp2, <2 x float> %tmp3)
+  ret <2 x float> %tmp4
+}
+
+define <4 x float> @fmla_4s(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind {
+;CHECK-LABEL: fmla_4s:
+;CHECK: fmla.4s
+  %tmp1 = load <4 x float>* %A
+  %tmp2 = load <4 x float>* %B
+  %tmp3 = load <4 x float>* %C
+  %tmp4 = call <4 x float> @llvm.fma.v4f32(<4 x float> %tmp1, <4 x float> %tmp2, <4 x float> %tmp3)
+  ret <4 x float> %tmp4
+}
+
+define <2 x double> @fmla_2d(<2 x double>* %A, <2 x double>* %B, <2 x double>* %C) nounwind {
+;CHECK-LABEL: fmla_2d:
+;CHECK: fmla.2d
+  %tmp1 = load <2 x double>* %A
+  %tmp2 = load <2 x double>* %B
+  %tmp3 = load <2 x double>* %C
+  %tmp4 = call <2 x double> @llvm.fma.v2f64(<2 x double> %tmp1, <2 x double> %tmp2, <2 x double> %tmp3)
+  ret <2 x double> %tmp4
+}
+
+declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
+
+define <2 x float> @fmls_2s(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) nounwind {
+;CHECK-LABEL: fmls_2s:
+;CHECK: fmls.2s
+  %tmp1 = load <2 x float>* %A
+  %tmp2 = load <2 x float>* %B
+  %tmp3 = load <2 x float>* %C
+  %tmp4 = fsub <2 x float> <float -0.0, float -0.0>, %tmp2
+  %tmp5 = call <2 x float> @llvm.fma.v2f32(<2 x float> %tmp1, <2 x float> %tmp4, <2 x float> %tmp3)
+  ret <2 x float> %tmp5
+}
+
+define <4 x float> @fmls_4s(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind {
+;CHECK-LABEL: fmls_4s:
+;CHECK: fmls.4s
+  %tmp1 = load <4 x float>* %A
+  %tmp2 = load <4 x float>* %B
+  %tmp3 = load <4 x float>* %C
+  %tmp4 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %tmp2
+  %tmp5 = call <4 x float> @llvm.fma.v4f32(<4 x float> %tmp1, <4 x float> %tmp4, <4 x float> %tmp3)
+  ret <4 x float> %tmp5
+}
+
+define <2 x double> @fmls_2d(<2 x double>* %A, <2 x double>* %B, <2 x double>* %C) nounwind {
+;CHECK-LABEL: fmls_2d:
+;CHECK: fmls.2d
+  %tmp1 = load <2 x double>* %A
+  %tmp2 = load <2 x double>* %B
+  %tmp3 = load <2 x double>* %C
+  %tmp4 = fsub <2 x double> <double -0.0, double -0.0>, %tmp2
+  %tmp5 = call <2 x double> @llvm.fma.v2f64(<2 x double> %tmp1, <2 x double> %tmp4, <2 x double> %tmp3)
+  ret <2 x double> %tmp5
+}
+
+define <2 x float> @fmls_commuted_neg_2s(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) nounwind {
+;CHECK-LABEL: fmls_commuted_neg_2s:
+;CHECK: fmls.2s
+  %tmp1 = load <2 x float>* %A
+  %tmp2 = load <2 x float>* %B
+  %tmp3 = load <2 x float>* %C
+  %tmp4 = fsub <2 x float> <float -0.0, float -0.0>, %tmp2
+  %tmp5 = call <2 x float> @llvm.fma.v2f32(<2 x float> %tmp4, <2 x float> %tmp1, <2 x float> %tmp3)
+  ret <2 x float> %tmp5
+}
+
+define <4 x float> @fmls_commuted_neg_4s(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind {
+;CHECK-LABEL: fmls_commuted_neg_4s:
+;CHECK: fmls.4s
+  %tmp1 = load <4 x float>* %A
+  %tmp2 = load <4 x float>* %B
+  %tmp3 = load <4 x float>* %C
+  %tmp4 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %tmp2
+  %tmp5 = call <4 x float> @llvm.fma.v4f32(<4 x float> %tmp4, <4 x float> %tmp1, <4 x float> %tmp3)
+  ret <4 x float> %tmp5
+}
+
+define <2 x double> @fmls_commuted_neg_2d(<2 x double>* %A, <2 x double>* %B, <2 x double>* %C) nounwind {
+;CHECK-LABEL: fmls_commuted_neg_2d:
+;CHECK: fmls.2d
+  %tmp1 = load <2 x double>* %A
+  %tmp2 = load <2 x double>* %B
+  %tmp3 = load <2 x double>* %C
+  %tmp4 = fsub <2 x double> <double -0.0, double -0.0>, %tmp2
+  %tmp5 = call <2 x double> @llvm.fma.v2f64(<2 x double> %tmp4, <2 x double> %tmp1, <2 x double> %tmp3)
+  ret <2 x double> %tmp5
+}
+
+define <2 x float> @fmls_indexed_2s(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone ssp {
+;CHECK-LABEL: fmls_indexed_2s:
+;CHECK: fmls.2s
+entry:
+  %0 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %c
+  %lane = shufflevector <2 x float> %b, <2 x float> undef, <2 x i32> zeroinitializer
+  %fmls1 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %0, <2 x float> %lane, <2 x float> %a)
+  ret <2 x float> %fmls1
+}
+
+define <4 x float> @fmls_indexed_4s(<4 x float> %a, <4 x float> %b, <4 x float> %c) nounwind readnone ssp {
+;CHECK-LABEL: fmls_indexed_4s:
+;CHECK: fmls.4s
+entry:
+  %0 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
+  %lane = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer
+  %fmls1 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %0, <4 x float> %lane, <4 x float> %a)
+  ret <4 x float> %fmls1
+}
+
+define <2 x double> @fmls_indexed_2d(<2 x double> %a, <2 x double> %b, <2 x double> %c) nounwind readnone ssp {
+;CHECK-LABEL: fmls_indexed_2d:
+;CHECK: fmls.2d
+entry:
+  %0 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %c
+  %lane = shufflevector <2 x double> %b, <2 x double> undef, <2 x i32> zeroinitializer
+  %fmls1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %0, <2 x double> %lane, <2 x double> %a)
+  ret <2 x double> %fmls1
+}
+
+define <2 x float> @fmla_indexed_scalar_2s(<2 x float> %a, <2 x float> %b, float %c) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: fmla_indexed_scalar_2s:
+; CHECK-NEXT: fmla.2s
+; CHECK-NEXT: ret
+  %v1 = insertelement <2 x float> undef, float %c, i32 0
+  %v2 = insertelement <2 x float> %v1, float %c, i32 1
+  %fmla1 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %v1, <2 x float> %b, <2 x float> %a) nounwind
+  ret <2 x float> %fmla1
+}
+
+define <4 x float> @fmla_indexed_scalar_4s(<4 x float> %a, <4 x float> %b, float %c) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: fmla_indexed_scalar_4s:
+; CHECK-NEXT: fmla.4s
+; CHECK-NEXT: ret
+  %v1 = insertelement <4 x float> undef, float %c, i32 0
+  %v2 = insertelement <4 x float> %v1, float %c, i32 1
+  %v3 = insertelement <4 x float> %v2, float %c, i32 2
+  %v4 = insertelement <4 x float> %v3, float %c, i32 3
+  %fmla1 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %v4, <4 x float> %b, <4 x float> %a) nounwind
+  ret <4 x float> %fmla1
+}
+
+define <2 x double> @fmla_indexed_scalar_2d(<2 x double> %a, <2 x double> %b, double %c) nounwind readnone ssp {
+; CHECK-LABEL: fmla_indexed_scalar_2d:
+; CHECK-NEXT: fmla.2d
+; CHECK-NEXT: ret
+entry:
+  %v1 = insertelement <2 x double> undef, double %c, i32 0
+  %v2 = insertelement <2 x double> %v1, double %c, i32 1
+  %fmla1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %v2, <2 x double> %b, <2 x double> %a) nounwind
+  ret <2 x double> %fmla1
+}
+
+define <4 x i16> @mul_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: mul_4h:
+;CHECK-NOT: dup
+;CHECK: mul.4h
+  %tmp1 = load <4 x i16>* %A
+  %tmp2 = load <4 x i16>* %B
+  %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp4 = mul <4 x i16> %tmp1, %tmp3
+  ret <4 x i16> %tmp4
+}
+
+define <8 x i16> @mul_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: mul_8h:
+;CHECK-NOT: dup
+;CHECK: mul.8h
+  %tmp1 = load <8 x i16>* %A
+  %tmp2 = load <8 x i16>* %B
+  %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %tmp4 = mul <8 x i16> %tmp1, %tmp3
+  ret <8 x i16> %tmp4
+}
+
+define <2 x i32> @mul_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: mul_2s:
+;CHECK-NOT: dup
+;CHECK: mul.2s
+  %tmp1 = load <2 x i32>* %A
+  %tmp2 = load <2 x i32>* %B
+  %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
+  %tmp4 = mul <2 x i32> %tmp1, %tmp3
+  ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @mul_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: mul_4s:
+;CHECK-NOT: dup
+;CHECK: mul.4s
+  %tmp1 = load <4 x i32>* %A
+  %tmp2 = load <4 x i32>* %B
+  %tmp3 = shufflevector <4 x i32> %tmp2, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp4 = mul <4 x i32> %tmp1, %tmp3
+  ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @mul_2d(<2 x i64> %A, <2 x i64> %B) nounwind {
+; CHECK-LABEL: mul_2d:
+; CHECK: mul
+; CHECK: mul
+  %tmp1 = mul <2 x i64> %A, %B
+  ret <2 x i64> %tmp1
+}
+
+define <2 x float> @fmul_lane_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK-LABEL: fmul_lane_2s:
+;CHECK-NOT: dup
+;CHECK: fmul.2s
+  %tmp1 = load <2 x float>* %A
+  %tmp2 = load <2 x float>* %B
+  %tmp3 = shufflevector <2 x float> %tmp2, <2 x float> %tmp2, <2 x i32> <i32 1, i32 1>
+  %tmp4 = fmul <2 x float> %tmp1, %tmp3
+  ret <2 x float> %tmp4
+}
+
+define <4 x float> @fmul_lane_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK-LABEL: fmul_lane_4s:
+;CHECK-NOT: dup
+;CHECK: fmul.4s
+  %tmp1 = load <4 x float>* %A
+  %tmp2 = load <4 x float>* %B
+  %tmp3 = shufflevector <4 x float> %tmp2, <4 x float> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp4 = fmul <4 x float> %tmp1, %tmp3
+  ret <4 x float> %tmp4
+}
+
+define <2 x double> @fmul_lane_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
+;CHECK-LABEL: fmul_lane_2d:
+;CHECK-NOT: dup
+;CHECK: fmul.2d
+  %tmp1 = load <2 x double>* %A
+  %tmp2 = load <2 x double>* %B
+  %tmp3 = shufflevector <2 x double> %tmp2, <2 x double> %tmp2, <2 x i32> <i32 1, i32 1>
+  %tmp4 = fmul <2 x double> %tmp1, %tmp3
+  ret <2 x double> %tmp4
+}
+
+define float @fmul_lane_s(float %A, <4 x float> %vec) nounwind {
+;CHECK-LABEL: fmul_lane_s:
+;CHECK-NOT: dup
+;CHECK: fmul.s s0, s0, v1[3]
+  %B = extractelement <4 x float> %vec, i32 3
+  %res = fmul float %A, %B
+  ret float %res
+}
+
+define double @fmul_lane_d(double %A, <2 x double> %vec) nounwind {
+;CHECK-LABEL: fmul_lane_d:
+;CHECK-NOT: dup
+;CHECK: fmul.d d0, d0, v1[1]
+  %B = extractelement <2 x double> %vec, i32 1
+  %res = fmul double %A, %B
+  ret double %res
+}
+
+
+
+define <2 x float> @fmulx_lane_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK-LABEL: fmulx_lane_2s:
+;CHECK-NOT: dup
+;CHECK: fmulx.2s
+  %tmp1 = load <2 x float>* %A
+  %tmp2 = load <2 x float>* %B
+  %tmp3 = shufflevector <2 x float> %tmp2, <2 x float> %tmp2, <2 x i32> <i32 1, i32 1>
+  %tmp4 = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %tmp1, <2 x float> %tmp3)
+  ret <2 x float> %tmp4
+}
+
+define <4 x float> @fmulx_lane_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK-LABEL: fmulx_lane_4s:
+;CHECK-NOT: dup
+;CHECK: fmulx.4s
+  %tmp1 = load <4 x float>* %A
+  %tmp2 = load <4 x float>* %B
+  %tmp3 = shufflevector <4 x float> %tmp2, <4 x float> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp4 = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %tmp1, <4 x float> %tmp3)
+  ret <4 x float> %tmp4
+}
+
+define <2 x double> @fmulx_lane_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
+;CHECK-LABEL: fmulx_lane_2d:
+;CHECK-NOT: dup
+;CHECK: fmulx.2d
+  %tmp1 = load <2 x double>* %A
+  %tmp2 = load <2 x double>* %B
+  %tmp3 = shufflevector <2 x double> %tmp2, <2 x double> %tmp2, <2 x i32> <i32 1, i32 1>
+  %tmp4 = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %tmp1, <2 x double> %tmp3)
+  ret <2 x double> %tmp4
+}
+
+define <4 x i16> @sqdmulh_lane_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: sqdmulh_lane_4h:
+;CHECK-NOT: dup
+;CHECK: sqdmulh.4h
+  %tmp1 = load <4 x i16>* %A
+  %tmp2 = load <4 x i16>* %B
+  %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp4 = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp3)
+  ret <4 x i16> %tmp4
+}
+
+define <8 x i16> @sqdmulh_lane_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: sqdmulh_lane_8h:
+;CHECK-NOT: dup
+;CHECK: sqdmulh.8h
+  %tmp1 = load <8 x i16>* %A
+  %tmp2 = load <8 x i16>* %B
+  %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %tmp4 = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp3)
+  ret <8 x i16> %tmp4
+}
+
+define <2 x i32> @sqdmulh_lane_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: sqdmulh_lane_2s:
+;CHECK-NOT: dup
+;CHECK: sqdmulh.2s
+  %tmp1 = load <2 x i32>* %A
+  %tmp2 = load <2 x i32>* %B
+  %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
+  %tmp4 = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp3)
+  ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @sqdmulh_lane_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: sqdmulh_lane_4s:
+;CHECK-NOT: dup
+;CHECK: sqdmulh.4s
+  %tmp1 = load <4 x i32>* %A
+  %tmp2 = load <4 x i32>* %B
+  %tmp3 = shufflevector <4 x i32> %tmp2, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp3)
+  ret <4 x i32> %tmp4
+}
+
+define i32 @sqdmulh_lane_1s(i32 %A, <4 x i32> %B) nounwind {
+;CHECK-LABEL: sqdmulh_lane_1s:
+;CHECK-NOT: dup
+;CHECK: sqdmulh.s s0, {{s[0-9]+}}, {{v[0-9]+}}[1]
+  %tmp1 = extractelement <4 x i32> %B, i32 1
+  %tmp2 = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %A, i32 %tmp1)
+  ret i32 %tmp2
+}
+
+define <4 x i16> @sqrdmulh_lane_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: sqrdmulh_lane_4h:
+;CHECK-NOT: dup
+;CHECK: sqrdmulh.4h
+  %tmp1 = load <4 x i16>* %A
+  %tmp2 = load <4 x i16>* %B
+  %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp4 = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp3)
+  ret <4 x i16> %tmp4
+}
+
+define <8 x i16> @sqrdmulh_lane_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: sqrdmulh_lane_8h:
+;CHECK-NOT: dup
+;CHECK: sqrdmulh.8h
+  %tmp1 = load <8 x i16>* %A
+  %tmp2 = load <8 x i16>* %B
+  %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %tmp4 = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp3)
+  ret <8 x i16> %tmp4
+}
+
+define <2 x i32> @sqrdmulh_lane_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: sqrdmulh_lane_2s:
+;CHECK-NOT: dup
+;CHECK: sqrdmulh.2s
+  %tmp1 = load <2 x i32>* %A
+  %tmp2 = load <2 x i32>* %B
+  %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
+  %tmp4 = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp3)
+  ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @sqrdmulh_lane_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: sqrdmulh_lane_4s:
+;CHECK-NOT: dup
+;CHECK: sqrdmulh.4s
+  %tmp1 = load <4 x i32>* %A
+  %tmp2 = load <4 x i32>* %B
+  %tmp3 = shufflevector <4 x i32> %tmp2, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp3)
+  ret <4 x i32> %tmp4
+}
+
+define i32 @sqrdmulh_lane_1s(i32 %A, <4 x i32> %B) nounwind {
+;CHECK-LABEL: sqrdmulh_lane_1s:
+;CHECK-NOT: dup
+;CHECK: sqrdmulh.s s0, {{s[0-9]+}}, {{v[0-9]+}}[1]
+  %tmp1 = extractelement <4 x i32> %B, i32 1
+  %tmp2 = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %A, i32 %tmp1)
+  ret i32 %tmp2
+}
+
+define <4 x i32> @sqdmull_lane_4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: sqdmull_lane_4s:
+;CHECK-NOT: dup
+;CHECK: sqdmull.4s
+  %tmp1 = load <4 x i16>* %A
+  %tmp2 = load <4 x i16>* %B
+  %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3)
+  ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @sqdmull_lane_2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: sqdmull_lane_2d:
+;CHECK-NOT: dup
+;CHECK: sqdmull.2d
+  %tmp1 = load <2 x i32>* %A
+  %tmp2 = load <2 x i32>* %B
+  %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
+  %tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3)
+  ret <2 x i64> %tmp4
+}
+
+define <4 x i32> @sqdmull2_lane_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: sqdmull2_lane_4s:
+;CHECK-NOT: dup
+;CHECK: sqdmull2.4s
+  %load1 = load <8 x i16>* %A
+  %load2 = load <8 x i16>* %B
+  %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @sqdmull2_lane_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: sqdmull2_lane_2d:
+;CHECK-NOT: dup
+;CHECK: sqdmull2.2d
+  %load1 = load <4 x i32>* %A
+  %load2 = load <4 x i32>* %B
+  %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  ret <2 x i64> %tmp4
+}
+
+define <4 x i32> @umull_lane_4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: umull_lane_4s:
+;CHECK-NOT: dup
+;CHECK: umull.4s
+  %tmp1 = load <4 x i16>* %A
+  %tmp2 = load <4 x i16>* %B
+  %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp4 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3)
+  ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @umull_lane_2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: umull_lane_2d:
+;CHECK-NOT: dup
+;CHECK: umull.2d
+  %tmp1 = load <2 x i32>* %A
+  %tmp2 = load <2 x i32>* %B
+  %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
+  %tmp4 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3)
+  ret <2 x i64> %tmp4
+}
+
+define <4 x i32> @smull_lane_4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: smull_lane_4s:
+;CHECK-NOT: dup
+;CHECK: smull.4s
+  %tmp1 = load <4 x i16>* %A
+  %tmp2 = load <4 x i16>* %B
+  %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp4 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3)
+  ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @smull_lane_2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: smull_lane_2d:
+;CHECK-NOT: dup
+;CHECK: smull.2d
+  %tmp1 = load <2 x i32>* %A
+  %tmp2 = load <2 x i32>* %B
+  %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
+  %tmp4 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3)
+  ret <2 x i64> %tmp4
+}
+
+define <4 x i32> @smlal_lane_4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
+;CHECK-LABEL: smlal_lane_4s:
+;CHECK-NOT: dup
+;CHECK: smlal.4s
+  %tmp1 = load <4 x i16>* %A
+  %tmp2 = load <4 x i16>* %B
+  %tmp3 = load <4 x i32>* %C
+  %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp5 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
+  %tmp6 = add <4 x i32> %tmp3, %tmp5
+  ret <4 x i32> %tmp6
+}
+
+define <2 x i64> @smlal_lane_2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
+;CHECK-LABEL: smlal_lane_2d:
+;CHECK-NOT: dup
+;CHECK: smlal.2d
+  %tmp1 = load <2 x i32>* %A
+  %tmp2 = load <2 x i32>* %B
+  %tmp3 = load <2 x i64>* %C
+  %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
+  %tmp5 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
+  %tmp6 = add <2 x i64> %tmp3, %tmp5
+  ret <2 x i64> %tmp6
+}
+
+define <4 x i32> @sqdmlal_lane_4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
+;CHECK-LABEL: sqdmlal_lane_4s:
+;CHECK-NOT: dup
+;CHECK: sqdmlal.4s
+  %tmp1 = load <4 x i16>* %A
+  %tmp2 = load <4 x i16>* %B
+  %tmp3 = load <4 x i32>* %C
+  %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
+  %tmp6 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5)
+  ret <4 x i32> %tmp6
+}
+
+define <2 x i64> @sqdmlal_lane_2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
+;CHECK-LABEL: sqdmlal_lane_2d:
+;CHECK-NOT: dup
+;CHECK: sqdmlal.2d
+  %tmp1 = load <2 x i32>* %A
+  %tmp2 = load <2 x i32>* %B
+  %tmp3 = load <2 x i64>* %C
+  %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
+  %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
+  %tmp6 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5)
+  ret <2 x i64> %tmp6
+}
+
+define <4 x i32> @sqdmlal2_lane_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwind {
+;CHECK-LABEL: sqdmlal2_lane_4s:
+;CHECK-NOT: dup
+;CHECK: sqdmlal2.4s
+  %load1 = load <8 x i16>* %A
+  %load2 = load <8 x i16>* %B
+  %tmp3 = load <4 x i32>* %C
+  %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  %tmp6 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5)
+  ret <4 x i32> %tmp6
+}
+
+define <2 x i64> @sqdmlal2_lane_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwind {
+;CHECK-LABEL: sqdmlal2_lane_2d:
+;CHECK-NOT: dup
+;CHECK: sqdmlal2.2d
+  %load1 = load <4 x i32>* %A
+  %load2 = load <4 x i32>* %B
+  %tmp3 = load <2 x i64>* %C
+  %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  %tmp6 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5)
+  ret <2 x i64> %tmp6
+}
+
+define i32 @sqdmlal_lane_1s(i32 %A, i16 %B, <4 x i16> %C) nounwind {
+;CHECK-LABEL: sqdmlal_lane_1s:
+;CHECK: sqdmlal.4s
+  %lhs = insertelement <4 x i16> undef, i16 %B, i32 0
+  %rhs = shufflevector <4 x i16> %C, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %prod.vec = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %lhs, <4 x i16> %rhs)
+  %prod = extractelement <4 x i32> %prod.vec, i32 0
+  %res = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %A, i32 %prod)
+  ret i32 %res
+}
+declare i32 @llvm.aarch64.neon.sqadd.i32(i32, i32)
+
+define i32 @sqdmlsl_lane_1s(i32 %A, i16 %B, <4 x i16> %C) nounwind {
+;CHECK-LABEL: sqdmlsl_lane_1s:
+;CHECK: sqdmlsl.4s
+  %lhs = insertelement <4 x i16> undef, i16 %B, i32 0
+  %rhs = shufflevector <4 x i16> %C, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %prod.vec = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %lhs, <4 x i16> %rhs)
+  %prod = extractelement <4 x i32> %prod.vec, i32 0
+  %res = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %A, i32 %prod)
+  ret i32 %res
+}
+declare i32 @llvm.aarch64.neon.sqsub.i32(i32, i32)
+
+define i64 @sqdmlal_lane_1d(i64 %A, i32 %B, <2 x i32> %C) nounwind {
+;CHECK-LABEL: sqdmlal_lane_1d:
+;CHECK: sqdmlal.s
+  %rhs = extractelement <2 x i32> %C, i32 1
+  %prod = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %B, i32 %rhs)
+  %res = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %A, i64 %prod)
+  ret i64 %res
+}
+declare i64 @llvm.aarch64.neon.sqdmulls.scalar(i32, i32)
+declare i64 @llvm.aarch64.neon.sqadd.i64(i64, i64)
+
+define i64 @sqdmlsl_lane_1d(i64 %A, i32 %B, <2 x i32> %C) nounwind {
+;CHECK-LABEL: sqdmlsl_lane_1d:
+;CHECK: sqdmlsl.s
+  %rhs = extractelement <2 x i32> %C, i32 1
+  %prod = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %B, i32 %rhs)
+  %res = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %A, i64 %prod)
+  ret i64 %res
+}
+declare i64 @llvm.aarch64.neon.sqsub.i64(i64, i64)
+
+
+define <4 x i32> @umlal_lane_4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
+;CHECK-LABEL: umlal_lane_4s:
+;CHECK-NOT: dup
+;CHECK: umlal.4s
+  %tmp1 = load <4 x i16>* %A
+  %tmp2 = load <4 x i16>* %B
+  %tmp3 = load <4 x i32>* %C
+  %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp5 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
+  %tmp6 = add <4 x i32> %tmp3, %tmp5
+  ret <4 x i32> %tmp6
+}
+
+define <2 x i64> @umlal_lane_2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
+;CHECK-LABEL: umlal_lane_2d:
+;CHECK-NOT: dup
+;CHECK: umlal.2d
+  %tmp1 = load <2 x i32>* %A
+  %tmp2 = load <2 x i32>* %B
+  %tmp3 = load <2 x i64>* %C
+  %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
+  %tmp5 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
+  %tmp6 = add <2 x i64> %tmp3, %tmp5
+  ret <2 x i64> %tmp6
+}
+
+
+define <4 x i32> @smlsl_lane_4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
+;CHECK-LABEL: smlsl_lane_4s:
+;CHECK-NOT: dup
+;CHECK: smlsl.4s
+  %tmp1 = load <4 x i16>* %A
+  %tmp2 = load <4 x i16>* %B
+  %tmp3 = load <4 x i32>* %C
+  %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp5 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
+  %tmp6 = sub <4 x i32> %tmp3, %tmp5
+  ret <4 x i32> %tmp6
+}
+
+define <2 x i64> @smlsl_lane_2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
+;CHECK-LABEL: smlsl_lane_2d:
+;CHECK-NOT: dup
+;CHECK: smlsl.2d
+  %tmp1 = load <2 x i32>* %A
+  %tmp2 = load <2 x i32>* %B
+  %tmp3 = load <2 x i64>* %C
+  %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
+  %tmp5 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
+  %tmp6 = sub <2 x i64> %tmp3, %tmp5
+  ret <2 x i64> %tmp6
+}
+
+define <4 x i32> @sqdmlsl_lane_4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
+;CHECK-LABEL: sqdmlsl_lane_4s:
+;CHECK-NOT: dup
+;CHECK: sqdmlsl.4s
+  %tmp1 = load <4 x i16>* %A
+  %tmp2 = load <4 x i16>* %B
+  %tmp3 = load <4 x i32>* %C
+  %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
+  %tmp6 = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5)
+  ret <4 x i32> %tmp6
+}
+
+define <2 x i64> @sqdmlsl_lane_2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
+;CHECK-LABEL: sqdmlsl_lane_2d:
+;CHECK-NOT: dup
+;CHECK: sqdmlsl.2d
+  %tmp1 = load <2 x i32>* %A
+  %tmp2 = load <2 x i32>* %B
+  %tmp3 = load <2 x i64>* %C
+  %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
+  %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
+  %tmp6 = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5)
+  ret <2 x i64> %tmp6
+}
+
+define <4 x i32> @sqdmlsl2_lane_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwind {
+;CHECK-LABEL: sqdmlsl2_lane_4s:
+;CHECK-NOT: dup
+;CHECK: sqdmlsl2.4s
+  %load1 = load <8 x i16>* %A
+  %load2 = load <8 x i16>* %B
+  %tmp3 = load <4 x i32>* %C
+  %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  %tmp6 = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5)
+  ret <4 x i32> %tmp6
+}
+
+define <2 x i64> @sqdmlsl2_lane_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwind {
+;CHECK-LABEL: sqdmlsl2_lane_2d:
+;CHECK-NOT: dup
+;CHECK: sqdmlsl2.2d
+  %load1 = load <4 x i32>* %A
+  %load2 = load <4 x i32>* %B
+  %tmp3 = load <2 x i64>* %C
+  %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  %tmp6 = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5)
+  ret <2 x i64> %tmp6
+}
+
+define <4 x i32> @umlsl_lane_4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
+;CHECK-LABEL: umlsl_lane_4s:
+;CHECK-NOT: dup
+;CHECK: umlsl.4s
+  %tmp1 = load <4 x i16>* %A
+  %tmp2 = load <4 x i16>* %B
+  %tmp3 = load <4 x i32>* %C
+  %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp5 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
+  %tmp6 = sub <4 x i32> %tmp3, %tmp5
+  ret <4 x i32> %tmp6
+}
+
+define <2 x i64> @umlsl_lane_2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
+;CHECK-LABEL: umlsl_lane_2d:
+;CHECK-NOT: dup
+;CHECK: umlsl.2d
+  %tmp1 = load <2 x i32>* %A
+  %tmp2 = load <2 x i32>* %B
+  %tmp3 = load <2 x i64>* %C
+  %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
+  %tmp5 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
+  %tmp6 = sub <2 x i64> %tmp3, %tmp5
+  ret <2 x i64> %tmp6
+}
+
+; Scalar FMULX
+define float @fmulxs(float %a, float %b) nounwind {
+; CHECK-LABEL: fmulxs:
+; CHECKNEXT: fmulx s0, s0, s1
+  %fmulx.i = tail call float @llvm.aarch64.neon.fmulx.f32(float %a, float %b) nounwind
+; CHECKNEXT: ret
+  ret float %fmulx.i
+}
+
+define double @fmulxd(double %a, double %b) nounwind {
+; CHECK-LABEL: fmulxd:
+; CHECKNEXT: fmulx d0, d0, d1
+  %fmulx.i = tail call double @llvm.aarch64.neon.fmulx.f64(double %a, double %b) nounwind
+; CHECKNEXT: ret
+  ret double %fmulx.i
+}
+
+define float @fmulxs_lane(float %a, <4 x float> %vec) nounwind {
+; CHECK-LABEL: fmulxs_lane:
+; CHECKNEXT: fmulx.s s0, s0, v1[3]
+  %b = extractelement <4 x float> %vec, i32 3
+  %fmulx.i = tail call float @llvm.aarch64.neon.fmulx.f32(float %a, float %b) nounwind
+; CHECKNEXT: ret
+  ret float %fmulx.i
+}
+
+define double @fmulxd_lane(double %a, <2 x double> %vec) nounwind {
+; CHECK-LABEL: fmulxd_lane:
+; CHECKNEXT: fmulx d0, d0, v1[1]
+  %b = extractelement <2 x double> %vec, i32 1
+  %fmulx.i = tail call double @llvm.aarch64.neon.fmulx.f64(double %a, double %b) nounwind
+; CHECKNEXT: ret
+  ret double %fmulx.i
+}
+
+declare double @llvm.aarch64.neon.fmulx.f64(double, double) nounwind readnone
+declare float @llvm.aarch64.neon.fmulx.f32(float, float) nounwind readnone
+
+
+define <8 x i16> @smull2_8h_simple(<16 x i8> %a, <16 x i8> %b) nounwind {
+; CHECK-LABEL: smull2_8h_simple:
+; CHECK-NEXT: smull2.8h v0, v0, v1
+; CHECK-NEXT: ret
+  %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %2 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %3 = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %1, <8 x i8> %2) #2
+  ret <8 x i16> %3
+}
+
+define <8 x i16> @foo0(<16 x i8> %a, <16 x i8> %b) nounwind {
+; CHECK-LABEL: foo0:
+; CHECK: smull2.8h v0, v0, v1
+  %tmp = bitcast <16 x i8> %a to <2 x i64>
+  %shuffle.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp1 = bitcast <1 x i64> %shuffle.i.i to <8 x i8>
+  %tmp2 = bitcast <16 x i8> %b to <2 x i64>
+  %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <8 x i8>
+  %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp3) nounwind
+  ret <8 x i16> %vmull.i.i
+}
+
+define <4 x i32> @foo1(<8 x i16> %a, <8 x i16> %b) nounwind {
+; CHECK-LABEL: foo1:
+; CHECK: smull2.4s v0, v0, v1
+  %tmp = bitcast <8 x i16> %a to <2 x i64>
+  %shuffle.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp1 = bitcast <1 x i64> %shuffle.i.i to <4 x i16>
+  %tmp2 = bitcast <8 x i16> %b to <2 x i64>
+  %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <4 x i16>
+  %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind
+  ret <4 x i32> %vmull2.i.i
+}
+
+define <2 x i64> @foo2(<4 x i32> %a, <4 x i32> %b) nounwind {
+; CHECK-LABEL: foo2:
+; CHECK: smull2.2d v0, v0, v1
+  %tmp = bitcast <4 x i32> %a to <2 x i64>
+  %shuffle.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp1 = bitcast <1 x i64> %shuffle.i.i to <2 x i32>
+  %tmp2 = bitcast <4 x i32> %b to <2 x i64>
+  %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <2 x i32>
+  %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind
+  ret <2 x i64> %vmull2.i.i
+}
+
+define <8 x i16> @foo3(<16 x i8> %a, <16 x i8> %b) nounwind {
+; CHECK-LABEL: foo3:
+; CHECK: umull2.8h v0, v0, v1
+  %tmp = bitcast <16 x i8> %a to <2 x i64>
+  %shuffle.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp1 = bitcast <1 x i64> %shuffle.i.i to <8 x i8>
+  %tmp2 = bitcast <16 x i8> %b to <2 x i64>
+  %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <8 x i8>
+  %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp3) nounwind
+  ret <8 x i16> %vmull.i.i
+}
+
+define <4 x i32> @foo4(<8 x i16> %a, <8 x i16> %b) nounwind {
+; CHECK-LABEL: foo4:
+; CHECK: umull2.4s v0, v0, v1
+  %tmp = bitcast <8 x i16> %a to <2 x i64>
+  %shuffle.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp1 = bitcast <1 x i64> %shuffle.i.i to <4 x i16>
+  %tmp2 = bitcast <8 x i16> %b to <2 x i64>
+  %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <4 x i16>
+  %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind
+  ret <4 x i32> %vmull2.i.i
+}
+
+define <2 x i64> @foo5(<4 x i32> %a, <4 x i32> %b) nounwind {
+; CHECK-LABEL: foo5:
+; CHECK: umull2.2d v0, v0, v1
+  %tmp = bitcast <4 x i32> %a to <2 x i64>
+  %shuffle.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp1 = bitcast <1 x i64> %shuffle.i.i to <2 x i32>
+  %tmp2 = bitcast <4 x i32> %b to <2 x i64>
+  %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <2 x i32>
+  %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind
+  ret <2 x i64> %vmull2.i.i
+}
+
+define <4 x i32> @foo6(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind readnone optsize ssp {
+; CHECK-LABEL: foo6:
+; CHECK-NEXT: smull2.4s v0, v1, v2[1]
+; CHECK-NEXT: ret
+entry:
+  %0 = bitcast <8 x i16> %b to <2 x i64>
+  %shuffle.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1>
+  %1 = bitcast <1 x i64> %shuffle.i to <4 x i16>
+  %shuffle = shufflevector <4 x i16> %c, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %1, <4 x i16> %shuffle) nounwind
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @foo7(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind readnone optsize ssp {
+; CHECK-LABEL: foo7:
+; CHECK-NEXT: smull2.2d v0, v1, v2[1]
+; CHECK-NEXT: ret
+entry:
+  %0 = bitcast <4 x i32> %b to <2 x i64>
+  %shuffle.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1>
+  %1 = bitcast <1 x i64> %shuffle.i to <2 x i32>
+  %shuffle = shufflevector <2 x i32> %c, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %1, <2 x i32> %shuffle) nounwind
+  ret <2 x i64> %vmull2.i
+}
+
+define <4 x i32> @foo8(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind readnone optsize ssp {
+; CHECK-LABEL: foo8:
+; CHECK-NEXT: umull2.4s v0, v1, v2[1]
+; CHECK-NEXT: ret
+entry:
+  %0 = bitcast <8 x i16> %b to <2 x i64>
+  %shuffle.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1>
+  %1 = bitcast <1 x i64> %shuffle.i to <4 x i16>
+  %shuffle = shufflevector <4 x i16> %c, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %1, <4 x i16> %shuffle) nounwind
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @foo9(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind readnone optsize ssp {
+; CHECK-LABEL: foo9:
+; CHECK-NEXT: umull2.2d v0, v1, v2[1]
+; CHECK-NEXT: ret
+entry:
+  %0 = bitcast <4 x i32> %b to <2 x i64>
+  %shuffle.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1>
+  %1 = bitcast <1 x i64> %shuffle.i to <2 x i32>
+  %shuffle = shufflevector <2 x i32> %c, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %1, <2 x i32> %shuffle) nounwind
+  ret <2 x i64> %vmull2.i
+}
+
+define <8 x i16> @bar0(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) nounwind {
+; CHECK-LABEL: bar0:
+; CHECK: smlal2.8h v0, v1, v2
+; CHECK-NEXT: ret
+
+  %tmp = bitcast <16 x i8> %b to <2 x i64>
+  %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp1 = bitcast <1 x i64> %shuffle.i.i.i to <8 x i8>
+  %tmp2 = bitcast <16 x i8> %c to <2 x i64>
+  %shuffle.i3.i.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp3 = bitcast <1 x i64> %shuffle.i3.i.i to <8 x i8>
+  %vmull.i.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp3) nounwind
+  %add.i = add <8 x i16> %vmull.i.i.i, %a
+  ret <8 x i16> %add.i
+}
+
+define <4 x i32> @bar1(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) nounwind {
+; CHECK-LABEL: bar1:
+; CHECK: smlal2.4s v0, v1, v2
+; CHECK-NEXT: ret
+
+  %tmp = bitcast <8 x i16> %b to <2 x i64>
+  %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp1 = bitcast <1 x i64> %shuffle.i.i.i to <4 x i16>
+  %tmp2 = bitcast <8 x i16> %c to <2 x i64>
+  %shuffle.i3.i.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp3 = bitcast <1 x i64> %shuffle.i3.i.i to <4 x i16>
+  %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind
+  %add.i = add <4 x i32> %vmull2.i.i.i, %a
+  ret <4 x i32> %add.i
+}
+
+define <2 x i64> @bar2(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) nounwind {
+; CHECK-LABEL: bar2:
+; CHECK: smlal2.2d v0, v1, v2
+; CHECK-NEXT: ret
+
+  %tmp = bitcast <4 x i32> %b to <2 x i64>
+  %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp1 = bitcast <1 x i64> %shuffle.i.i.i to <2 x i32>
+  %tmp2 = bitcast <4 x i32> %c to <2 x i64>
+  %shuffle.i3.i.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp3 = bitcast <1 x i64> %shuffle.i3.i.i to <2 x i32>
+  %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind
+  %add.i = add <2 x i64> %vmull2.i.i.i, %a
+  ret <2 x i64> %add.i
+}
+
+define <8 x i16> @bar3(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) nounwind {
+; CHECK-LABEL: bar3:
+; CHECK: umlal2.8h v0, v1, v2
+; CHECK-NEXT: ret
+
+  %tmp = bitcast <16 x i8> %b to <2 x i64>
+  %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp1 = bitcast <1 x i64> %shuffle.i.i.i to <8 x i8>
+  %tmp2 = bitcast <16 x i8> %c to <2 x i64>
+  %shuffle.i3.i.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp3 = bitcast <1 x i64> %shuffle.i3.i.i to <8 x i8>
+  %vmull.i.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp3) nounwind
+  %add.i = add <8 x i16> %vmull.i.i.i, %a
+  ret <8 x i16> %add.i
+}
+
+define <4 x i32> @bar4(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) nounwind {
+; CHECK-LABEL: bar4:
+; CHECK: umlal2.4s v0, v1, v2
+; CHECK-NEXT: ret
+
+  %tmp = bitcast <8 x i16> %b to <2 x i64>
+  %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp1 = bitcast <1 x i64> %shuffle.i.i.i to <4 x i16>
+  %tmp2 = bitcast <8 x i16> %c to <2 x i64>
+  %shuffle.i3.i.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp3 = bitcast <1 x i64> %shuffle.i3.i.i to <4 x i16>
+  %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind
+  %add.i = add <4 x i32> %vmull2.i.i.i, %a
+  ret <4 x i32> %add.i
+}
+
+define <2 x i64> @bar5(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) nounwind {
+; CHECK-LABEL: bar5:
+; CHECK: umlal2.2d v0, v1, v2
+; CHECK-NEXT: ret
+
+  %tmp = bitcast <4 x i32> %b to <2 x i64>
+  %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp1 = bitcast <1 x i64> %shuffle.i.i.i to <2 x i32>
+  %tmp2 = bitcast <4 x i32> %c to <2 x i64>
+  %shuffle.i3.i.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp3 = bitcast <1 x i64> %shuffle.i3.i.i to <2 x i32>
+  %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind
+  %add.i = add <2 x i64> %vmull2.i.i.i, %a
+  ret <2 x i64> %add.i
+}
+
+define <4 x i32> @mlal2_1(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind {
+; CHECK-LABEL: mlal2_1:
+; CHECK: smlal2.4s v0, v1, v2[3]
+; CHECK-NEXT: ret
+  %shuffle = shufflevector <4 x i16> %c, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+  %tmp = bitcast <8 x i16> %b to <2 x i64>
+  %shuffle.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp1 = bitcast <1 x i64> %shuffle.i.i to <4 x i16>
+  %tmp2 = bitcast <8 x i16> %shuffle to <2 x i64>
+  %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <4 x i16>
+  %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind
+  %add = add <4 x i32> %vmull2.i.i, %a
+  ret <4 x i32> %add
+}
+
+define <2 x i64> @mlal2_2(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind {
+; CHECK-LABEL: mlal2_2:
+; CHECK: smlal2.2d v0, v1, v2[1]
+; CHECK-NEXT: ret
+  %shuffle = shufflevector <2 x i32> %c, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp = bitcast <4 x i32> %b to <2 x i64>
+  %shuffle.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp1 = bitcast <1 x i64> %shuffle.i.i to <2 x i32>
+  %tmp2 = bitcast <4 x i32> %shuffle to <2 x i64>
+  %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <2 x i32>
+  %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind
+  %add = add <2 x i64> %vmull2.i.i, %a
+  ret <2 x i64> %add
+}
+
+define <4 x i32> @mlal2_4(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind {
+; CHECK-LABEL: mlal2_4:
+; CHECK: umlal2.4s v0, v1, v2[2]
+; CHECK-NEXT: ret
+
+  %shuffle = shufflevector <4 x i16> %c, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  %tmp = bitcast <8 x i16> %b to <2 x i64>
+  %shuffle.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp1 = bitcast <1 x i64> %shuffle.i.i to <4 x i16>
+  %tmp2 = bitcast <8 x i16> %shuffle to <2 x i64>
+  %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <4 x i16>
+  %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind
+  %add = add <4 x i32> %vmull2.i.i, %a
+  ret <4 x i32> %add
+}
+
+define <2 x i64> @mlal2_5(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind {
+; CHECK-LABEL: mlal2_5:
+; CHECK: umlal2.2d v0, v1, v2[0]
+; CHECK-NEXT: ret
+  %shuffle = shufflevector <2 x i32> %c, <2 x i32> undef, <4 x i32> zeroinitializer
+  %tmp = bitcast <4 x i32> %b to <2 x i64>
+  %shuffle.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp1 = bitcast <1 x i64> %shuffle.i.i to <2 x i32>
+  %tmp2 = bitcast <4 x i32> %shuffle to <2 x i64>
+  %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <2 x i32>
+  %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind
+  %add = add <2 x i64> %vmull2.i.i, %a
+  ret <2 x i64> %add
+}
+
+; rdar://12328502
+define <2 x double> @vmulq_n_f64(<2 x double> %x, double %y) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: vmulq_n_f64:
+; CHECK-NOT: dup.2d
+; CHECK: fmul.2d v0, v0, v1[0]
+  %vecinit.i = insertelement <2 x double> undef, double %y, i32 0
+  %vecinit1.i = insertelement <2 x double> %vecinit.i, double %y, i32 1
+  %mul.i = fmul <2 x double> %vecinit1.i, %x
+  ret <2 x double> %mul.i
+}
+
+define <4 x float> @vmulq_n_f32(<4 x float> %x, float %y) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: vmulq_n_f32:
+; CHECK-NOT: dup.4s
+; CHECK: fmul.4s v0, v0, v1[0]
+  %vecinit.i = insertelement <4 x float> undef, float %y, i32 0
+  %vecinit1.i = insertelement <4 x float> %vecinit.i, float %y, i32 1
+  %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %y, i32 2
+  %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %y, i32 3
+  %mul.i = fmul <4 x float> %vecinit3.i, %x
+  ret <4 x float> %mul.i
+}
+
+define <2 x float> @vmul_n_f32(<2 x float> %x, float %y) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: vmul_n_f32:
+; CHECK-NOT: dup.2s
+; CHECK: fmul.2s v0, v0, v1[0]
+  %vecinit.i = insertelement <2 x float> undef, float %y, i32 0
+  %vecinit1.i = insertelement <2 x float> %vecinit.i, float %y, i32 1
+  %mul.i = fmul <2 x float> %vecinit1.i, %x
+  ret <2 x float> %mul.i
+}
+
+define <4 x i16> @vmla_laneq_s16_test(<4 x i16> %a, <4 x i16> %b, <8 x i16> %c) nounwind readnone ssp {
+entry:
+; CHECK: vmla_laneq_s16_test
+; CHECK-NOT: ext
+; CHECK: mla.4h v0, v1, v2[6]
+; CHECK-NEXT: ret
+  %shuffle = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 6, i32 6, i32 6, i32 6>
+  %mul = mul <4 x i16> %shuffle, %b
+  %add = add <4 x i16> %mul, %a
+  ret <4 x i16> %add
+}
+
+define <2 x i32> @vmla_laneq_s32_test(<2 x i32> %a, <2 x i32> %b, <4 x i32> %c) nounwind readnone ssp {
+entry:
+; CHECK: vmla_laneq_s32_test
+; CHECK-NOT: ext
+; CHECK: mla.2s v0, v1, v2[3]
+; CHECK-NEXT: ret
+  %shuffle = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %mul = mul <2 x i32> %shuffle, %b
+  %add = add <2 x i32> %mul, %a
+  ret <2 x i32> %add
+}
+
+define <8 x i16> @not_really_vmlaq_laneq_s16_test(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) nounwind readnone ssp {
+entry:
+; CHECK: not_really_vmlaq_laneq_s16_test
+; CHECK-NOT: ext
+; CHECK: mla.8h v0, v1, v2[5]
+; CHECK-NEXT: ret
+  %shuffle1 = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuffle2 = shufflevector <4 x i16> %shuffle1, <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %mul = mul <8 x i16> %shuffle2, %b
+  %add = add <8 x i16> %mul, %a
+  ret <8 x i16> %add
+}
+
+define <4 x i32> @not_really_vmlaq_laneq_s32_test(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) nounwind readnone ssp {
+entry:
+; CHECK: not_really_vmlaq_laneq_s32_test
+; CHECK-NOT: ext
+; CHECK: mla.4s v0, v1, v2[3]
+; CHECK-NEXT: ret
+  %shuffle1 = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuffle2 = shufflevector <2 x i32> %shuffle1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %mul = mul <4 x i32> %shuffle2, %b
+  %add = add <4 x i32> %mul, %a
+  ret <4 x i32> %add
+}
+
+define <4 x i32> @vmull_laneq_s16_test(<4 x i16> %a, <8 x i16> %b) nounwind readnone ssp {
+entry:
+; CHECK: vmull_laneq_s16_test
+; CHECK-NOT: ext
+; CHECK: smull.4s v0, v0, v1[6]
+; CHECK-NEXT: ret
+  %shuffle = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 6, i32 6, i32 6, i32 6>
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) #2
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @vmull_laneq_s32_test(<2 x i32> %a, <4 x i32> %b) nounwind readnone ssp {
+entry:
+; CHECK: vmull_laneq_s32_test
+; CHECK-NOT: ext
+; CHECK: smull.2d v0, v0, v1[2]
+; CHECK-NEXT: ret
+  %shuffle = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 2>
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) #2
+  ret <2 x i64> %vmull2.i
+}
+define <4 x i32> @vmull_laneq_u16_test(<4 x i16> %a, <8 x i16> %b) nounwind readnone ssp {
+entry:
+; CHECK: vmull_laneq_u16_test
+; CHECK-NOT: ext
+; CHECK: umull.4s v0, v0, v1[6]
+; CHECK-NEXT: ret
+  %shuffle = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 6, i32 6, i32 6, i32 6>
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) #2
+  ret <4 x i32> %vmull2.i
+}
+
+define <2 x i64> @vmull_laneq_u32_test(<2 x i32> %a, <4 x i32> %b) nounwind readnone ssp {
+entry:
+; CHECK: vmull_laneq_u32_test
+; CHECK-NOT: ext
+; CHECK: umull.2d v0, v0, v1[2]
+; CHECK-NEXT: ret
+  %shuffle = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 2>
+  %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) #2
+  ret <2 x i64> %vmull2.i
+}
+
+define <4 x i32> @vmull_high_n_s16_test(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c, i32 %d) nounwind readnone optsize ssp {
+entry:
+; CHECK: vmull_high_n_s16_test
+; CHECK-NOT: ext
+; CHECK: smull2.4s
+; CHECK-NEXT: ret
+  %conv = trunc i32 %d to i16
+  %0 = bitcast <8 x i16> %b to <2 x i64>
+  %shuffle.i.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1>
+  %1 = bitcast <1 x i64> %shuffle.i.i to <4 x i16>
+  %vecinit.i = insertelement <4 x i16> undef, i16 %conv, i32 0
+  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %conv, i32 1
+  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %conv, i32 2
+  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %conv, i32 3
+  %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %1, <4 x i16> %vecinit3.i) nounwind
+  ret <4 x i32> %vmull2.i.i
+}
+
+define <2 x i64> @vmull_high_n_s32_test(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c, i32 %d) nounwind readnone optsize ssp {
+entry:
+; CHECK: vmull_high_n_s32_test
+; CHECK-NOT: ext
+; CHECK: smull2.2d
+; CHECK-NEXT: ret
+  %0 = bitcast <4 x i32> %b to <2 x i64>
+  %shuffle.i.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1>
+  %1 = bitcast <1 x i64> %shuffle.i.i to <2 x i32>
+  %vecinit.i = insertelement <2 x i32> undef, i32 %d, i32 0
+  %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %d, i32 1
+  %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %1, <2 x i32> %vecinit1.i) nounwind
+  ret <2 x i64> %vmull2.i.i
+}
+
+define <4 x i32> @vmull_high_n_u16_test(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c, i32 %d) nounwind readnone optsize ssp {
+entry:
+; CHECK: vmull_high_n_u16_test
+; CHECK-NOT: ext
+; CHECK: umull2.4s
+; CHECK-NEXT: ret
+  %conv = trunc i32 %d to i16
+  %0 = bitcast <8 x i16> %b to <2 x i64>
+  %shuffle.i.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1>
+  %1 = bitcast <1 x i64> %shuffle.i.i to <4 x i16>
+  %vecinit.i = insertelement <4 x i16> undef, i16 %conv, i32 0
+  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %conv, i32 1
+  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %conv, i32 2
+  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %conv, i32 3
+  %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %1, <4 x i16> %vecinit3.i) nounwind
+  ret <4 x i32> %vmull2.i.i
+}
+
+define <2 x i64> @vmull_high_n_u32_test(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c, i32 %d) nounwind readnone optsize ssp {
+entry:
+; CHECK: vmull_high_n_u32_test
+; CHECK-NOT: ext
+; CHECK: umull2.2d
+; CHECK-NEXT: ret
+  %0 = bitcast <4 x i32> %b to <2 x i64>
+  %shuffle.i.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1>
+  %1 = bitcast <1 x i64> %shuffle.i.i to <2 x i32>
+  %vecinit.i = insertelement <2 x i32> undef, i32 %d, i32 0
+  %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %d, i32 1
+  %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %1, <2 x i32> %vecinit1.i) nounwind
+  ret <2 x i64> %vmull2.i.i
+}
+
+define <4 x i32> @vmul_built_dup_test(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: vmul_built_dup_test:
+; CHECK-NOT: ins
+; CHECK-NOT: dup
+; CHECK: mul.4s {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}[1]
+  %vget_lane = extractelement <4 x i32> %b, i32 1
+  %vecinit.i = insertelement <4 x i32> undef, i32 %vget_lane, i32 0
+  %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %vget_lane, i32 1
+  %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %vget_lane, i32 2
+  %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %vget_lane, i32 3
+  %prod = mul <4 x i32> %a, %vecinit3.i
+  ret <4 x i32> %prod
+}
+
+define <4 x i16> @vmul_built_dup_fromsmall_test(<4 x i16> %a, <4 x i16> %b) {
+; CHECK-LABEL: vmul_built_dup_fromsmall_test:
+; CHECK-NOT: ins
+; CHECK-NOT: dup
+; CHECK: mul.4h {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}[3]
+  %vget_lane = extractelement <4 x i16> %b, i32 3
+  %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
+  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
+  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
+  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
+  %prod = mul <4 x i16> %a, %vecinit3.i
+  ret <4 x i16> %prod
+}
+
+define <8 x i16> @vmulq_built_dup_fromsmall_test(<8 x i16> %a, <4 x i16> %b) {
+; CHECK-LABEL: vmulq_built_dup_fromsmall_test:
+; CHECK-NOT: ins
+; CHECK-NOT: dup
+; CHECK: mul.8h {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}[0]
+  %vget_lane = extractelement <4 x i16> %b, i32 0
+  %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0
+  %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1
+  %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2
+  %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3
+  %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4
+  %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5
+  %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6
+  %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7
+  %prod = mul <8 x i16> %a, %vecinit7.i
+  ret <8 x i16> %prod
+}
+
+define <2 x i64> @mull_from_two_extracts(<4 x i32> %lhs, <4 x i32> %rhs) {
+; CHECK-LABEL: mull_from_two_extracts:
+; CHECK-NOT: ext
+; CHECK: sqdmull2.2d
+
+  %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+
+  %res = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind
+  ret <2 x i64> %res
+}
+
+define <2 x i64> @mlal_from_two_extracts(<2 x i64> %accum, <4 x i32> %lhs, <4 x i32> %rhs) {
+; CHECK-LABEL: mlal_from_two_extracts:
+; CHECK-NOT: ext
+; CHECK: sqdmlal2.2d
+
+  %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+
+  %res = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind
+  %sum = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %accum, <2 x i64> %res)
+  ret <2 x i64> %sum
+}
+
+define <2 x i64> @mull_from_extract_dup(<4 x i32> %lhs, i32 %rhs) {
+; CHECK-LABEL: mull_from_extract_dup:
+; CHECK-NOT: ext
+; CHECK: sqdmull2.2d
+  %rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0
+  %rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1
+
+  %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+
+  %res = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhsvec) nounwind
+  ret <2 x i64> %res
+}
+
+define <8 x i16> @pmull_from_extract_dup(<16 x i8> %lhs, i8 %rhs) {
+; CHECK-LABEL: pmull_from_extract_dup:
+; CHECK-NOT: ext
+; CHECK: pmull2.8h
+  %rhsvec.0 = insertelement <8 x i8> undef, i8 %rhs, i32 0
+  %rhsvec = shufflevector <8 x i8> %rhsvec.0, <8 x i8> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+
+  %lhs.high = shufflevector <16 x i8> %lhs, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+
+  %res = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %lhs.high, <8 x i8> %rhsvec) nounwind
+  ret <8 x i16> %res
+}
+
+define <8 x i16> @pmull_from_extract_duplane(<16 x i8> %lhs, <8 x i8> %rhs) {
+; CHECK-LABEL: pmull_from_extract_duplane:
+; CHECK-NOT: ext
+; CHECK: pmull2.8h
+
+  %lhs.high = shufflevector <16 x i8> %lhs, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %rhs.high = shufflevector <8 x i8> %rhs, <8 x i8> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+
+  %res = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %lhs.high, <8 x i8> %rhs.high) nounwind
+  ret <8 x i16> %res
+}
+
+define <2 x i64> @sqdmull_from_extract_duplane(<4 x i32> %lhs, <4 x i32> %rhs) {
+; CHECK-LABEL: sqdmull_from_extract_duplane:
+; CHECK-NOT: ext
+; CHECK: sqdmull2.2d
+
+  %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 0, i32 0>
+
+  %res = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind
+  ret <2 x i64> %res
+}
+
+define <2 x i64> @sqdmlal_from_extract_duplane(<2 x i64> %accum, <4 x i32> %lhs, <4 x i32> %rhs) {
+; CHECK-LABEL: sqdmlal_from_extract_duplane:
+; CHECK-NOT: ext
+; CHECK: sqdmlal2.2d
+
+  %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 0, i32 0>
+
+  %res = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind
+  %sum = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %accum, <2 x i64> %res)
+  ret <2 x i64> %sum
+}
+
+define <2 x i64> @umlal_from_extract_duplane(<2 x i64> %accum, <4 x i32> %lhs, <4 x i32> %rhs) {
+; CHECK-LABEL: umlal_from_extract_duplane:
+; CHECK-NOT: ext
+; CHECK: umlal2.2d
+
+  %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 0, i32 0>
+
+  %res = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind
+  %sum = add <2 x i64> %accum, %res
+  ret <2 x i64> %sum
+}
+
+define float @scalar_fmla_from_extract_v4f32(float %accum, float %lhs, <4 x float> %rvec) {
+; CHECK-LABEL: scalar_fmla_from_extract_v4f32:
+; CHECK: fmla.s s0, s1, v2[3]
+  %rhs = extractelement <4 x float> %rvec, i32 3
+  %res = call float @llvm.fma.f32(float %lhs, float %rhs, float %accum)
+  ret float %res
+}
+
+define float @scalar_fmla_from_extract_v2f32(float %accum, float %lhs, <2 x float> %rvec) {
+; CHECK-LABEL: scalar_fmla_from_extract_v2f32:
+; CHECK: fmla.s s0, s1, v2[1]
+  %rhs = extractelement <2 x float> %rvec, i32 1
+  %res = call float @llvm.fma.f32(float %lhs, float %rhs, float %accum)
+  ret float %res
+}
+
+define float @scalar_fmls_from_extract_v4f32(float %accum, float %lhs, <4 x float> %rvec) {
+; CHECK-LABEL: scalar_fmls_from_extract_v4f32:
+; CHECK: fmls.s s0, s1, v2[3]
+  %rhs.scal = extractelement <4 x float> %rvec, i32 3
+  %rhs = fsub float -0.0, %rhs.scal
+  %res = call float @llvm.fma.f32(float %lhs, float %rhs, float %accum)
+  ret float %res
+}
+
+define float @scalar_fmls_from_extract_v2f32(float %accum, float %lhs, <2 x float> %rvec) {
+; CHECK-LABEL: scalar_fmls_from_extract_v2f32:
+; CHECK: fmls.s s0, s1, v2[1]
+  %rhs.scal = extractelement <2 x float> %rvec, i32 1
+  %rhs = fsub float -0.0, %rhs.scal
+  %res = call float @llvm.fma.f32(float %lhs, float %rhs, float %accum)
+  ret float %res
+}
+
+declare float @llvm.fma.f32(float, float, float)
+
+define double @scalar_fmla_from_extract_v2f64(double %accum, double %lhs, <2 x double> %rvec) {
+; CHECK-LABEL: scalar_fmla_from_extract_v2f64:
+; CHECK: fmla.d d0, d1, v2[1]
+  %rhs = extractelement <2 x double> %rvec, i32 1
+  %res = call double @llvm.fma.f64(double %lhs, double %rhs, double %accum)
+  ret double %res
+}
+
+define double @scalar_fmls_from_extract_v2f64(double %accum, double %lhs, <2 x double> %rvec) {
+; CHECK-LABEL: scalar_fmls_from_extract_v2f64:
+; CHECK: fmls.d d0, d1, v2[1]
+  %rhs.scal = extractelement <2 x double> %rvec, i32 1
+  %rhs = fsub double -0.0, %rhs.scal
+  %res = call double @llvm.fma.f64(double %lhs, double %rhs, double %accum)
+  ret double %res
+}
+
+declare double @llvm.fma.f64(double, double, double)
+
+define <2 x float> @fmls_with_fneg_before_extract_v2f32(<2 x float> %accum, <2 x float> %lhs, <4 x float> %rhs) {
+; CHECK-LABEL: fmls_with_fneg_before_extract_v2f32:
+; CHECK: fmls.2s v0, v1, v2[3]
+  %rhs_neg = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %rhs
+  %splat = shufflevector <4 x float> %rhs_neg, <4 x float> undef, <2 x i32> <i32 3, i32 3>
+  %res = call <2 x float> @llvm.fma.v2f32(<2 x float> %lhs, <2 x float> %splat, <2 x float> %accum)
+  ret <2 x float> %res
+}
+
+define <2 x float> @fmls_with_fneg_before_extract_v2f32_1(<2 x float> %accum, <2 x float> %lhs, <2 x float> %rhs) {
+; CHECK-LABEL: fmls_with_fneg_before_extract_v2f32_1:
+; CHECK: fmls.2s v0, v1, v2[1]
+  %rhs_neg = fsub <2 x float> <float -0.0, float -0.0>, %rhs
+  %splat = shufflevector <2 x float> %rhs_neg, <2 x float> undef, <2 x i32> <i32 1, i32 1>
+  %res = call <2 x float> @llvm.fma.v2f32(<2 x float> %lhs, <2 x float> %splat, <2 x float> %accum)
+  ret <2 x float> %res
+}
+
+define <4 x float> @fmls_with_fneg_before_extract_v4f32(<4 x float> %accum, <4 x float> %lhs, <4 x float> %rhs) {
+; CHECK-LABEL: fmls_with_fneg_before_extract_v4f32:
+; CHECK: fmls.4s v0, v1, v2[3]
+  %rhs_neg = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %rhs
+  %splat = shufflevector <4 x float> %rhs_neg, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %res = call <4 x float> @llvm.fma.v4f32(<4 x float> %lhs, <4 x float> %splat, <4 x float> %accum)
+  ret <4 x float> %res
+}
+
+define <4 x float> @fmls_with_fneg_before_extract_v4f32_1(<4 x float> %accum, <4 x float> %lhs, <2 x float> %rhs) {
+; CHECK-LABEL: fmls_with_fneg_before_extract_v4f32_1:
+; CHECK: fmls.4s v0, v1, v2[1]
+  %rhs_neg = fsub <2 x float> <float -0.0, float -0.0>, %rhs
+  %splat = shufflevector <2 x float> %rhs_neg, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %res = call <4 x float> @llvm.fma.v4f32(<4 x float> %lhs, <4 x float> %splat, <4 x float> %accum)
+  ret <4 x float> %res
+}
+
+define <2 x double> @fmls_with_fneg_before_extract_v2f64(<2 x double> %accum, <2 x double> %lhs, <2 x double> %rhs) {
+; CHECK-LABEL: fmls_with_fneg_before_extract_v2f64:
+; CHECK: fmls.2d v0, v1, v2[1]
+  %rhs_neg = fsub <2 x double> <double -0.0, double -0.0>, %rhs
+  %splat = shufflevector <2 x double> %rhs_neg, <2 x double> undef, <2 x i32> <i32 1, i32 1>
+  %res = call <2 x double> @llvm.fma.v2f64(<2 x double> %lhs, <2 x double> %splat, <2 x double> %accum)
+  ret <2 x double> %res
+}
+
+define <1 x double> @test_fmul_v1f64(<1 x double> %L, <1 x double> %R) nounwind {
+; CHECK-LABEL: test_fmul_v1f64:
+; CHECK: fmul
+  %prod = fmul <1 x double> %L, %R
+  ret <1 x double> %prod
+}
+
+define <1 x double> @test_fdiv_v1f64(<1 x double> %L, <1 x double> %R) nounwind {
+; CHECK-LABEL: test_fdiv_v1f64:
+; CHECK-LABEL: fdiv
+  %prod = fdiv <1 x double> %L, %R
+  ret <1 x double> %prod
+}
+
+define i64 @sqdmlal_d(i32 %A, i32 %B, i64 %C) nounwind {
+;CHECK-LABEL: sqdmlal_d:
+;CHECK: sqdmlal
+  %tmp4 = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %A, i32 %B)
+  %tmp5 = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %C, i64 %tmp4)
+  ret i64 %tmp5
+}
+
+define i64 @sqdmlsl_d(i32 %A, i32 %B, i64 %C) nounwind {
+;CHECK-LABEL: sqdmlsl_d:
+;CHECK: sqdmlsl
+  %tmp4 = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %A, i32 %B)
+  %tmp5 = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %C, i64 %tmp4)
+  ret i64 %tmp5
+}
+
+define <16 x i8> @test_pmull_64(i64 %l, i64 %r) nounwind {
+; CHECK-LABEL: test_pmull_64:
+; CHECK: pmull.1q
+  %val = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %l, i64 %r)
+  ret <16 x i8> %val
+}
+
+define <16 x i8> @test_pmull_high_64(<2 x i64> %l, <2 x i64> %r) nounwind {
+; CHECK-LABEL: test_pmull_high_64:
+; CHECK: pmull2.1q
+  %l_hi = extractelement <2 x i64> %l, i32 1
+  %r_hi = extractelement <2 x i64> %r, i32 1
+  %val = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %l_hi, i64 %r_hi)
+  ret <16 x i8> %val
+}
+
+declare <16 x i8> @llvm.aarch64.neon.pmull64(i64, i64)
+
+define <1 x i64> @test_mul_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) nounwind {
+; CHECK-LABEL: test_mul_v1i64:
+; CHECK: mul
+  %prod = mul <1 x i64> %lhs, %rhs
+  ret <1 x i64> %prod
+}
diff --git a/test/CodeGen/ARM64/volatile.ll b/test/CodeGen/AArch64/arm64-volatile.ll
index e00ac5a..e00ac5a 100644
--- a/test/CodeGen/ARM64/volatile.ll
+++ b/test/CodeGen/AArch64/arm64-volatile.ll
diff --git a/test/CodeGen/AArch64/arm64-vpopcnt.ll b/test/CodeGen/AArch64/arm64-vpopcnt.ll
new file mode 100644
index 0000000..25306eb
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-vpopcnt.ll
@@ -0,0 +1,68 @@
+; RUN: llc < %s -march=arm64 -mcpu=cyclone | FileCheck %s
+target triple = "arm64-apple-ios"
+
+; The non-byte ones used to fail with "Cannot select"
+
+; CHECK-LABEL: ctpopv8i8
+; CHECK: cnt.8b
+define <8 x i8> @ctpopv8i8(<8 x i8> %x) nounwind readnone {
+  %cnt = tail call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %x)
+  ret <8 x i8> %cnt
+}
+
+declare <8 x i8> @llvm.ctpop.v8i8(<8 x i8>) nounwind readnone
+
+; CHECK-LABEL: ctpopv4i16
+; CHECK: cnt.8b
+define <4 x i16> @ctpopv4i16(<4 x i16> %x) nounwind readnone {
+  %cnt = tail call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> %x)
+  ret <4 x i16> %cnt
+}
+
+declare <4 x i16> @llvm.ctpop.v4i16(<4 x i16>) nounwind readnone
+
+; CHECK-LABEL: ctpopv2i32
+; CHECK: cnt.8b
+define <2 x i32> @ctpopv2i32(<2 x i32> %x) nounwind readnone {
+  %cnt = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %x)
+  ret <2 x i32> %cnt
+}
+
+declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>) nounwind readnone
+
+
+; CHECK-LABEL: ctpopv16i8
+; CHECK: cnt.16b
+define <16 x i8> @ctpopv16i8(<16 x i8> %x) nounwind readnone {
+  %cnt = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %x)
+  ret <16 x i8> %cnt
+}
+
+declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) nounwind readnone
+
+; CHECK-LABEL: ctpopv8i16
+; CHECK: cnt.8b
+define <8 x i16> @ctpopv8i16(<8 x i16> %x) nounwind readnone {
+  %cnt = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %x)
+  ret <8 x i16> %cnt
+}
+
+declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>) nounwind readnone
+
+; CHECK-LABEL: ctpopv4i32
+; CHECK: cnt.8b
+define <4 x i32> @ctpopv4i32(<4 x i32> %x) nounwind readnone {
+  %cnt = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %x)
+  ret <4 x i32> %cnt
+}
+
+declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) nounwind readnone
+
+; CHECK-LABEL: ctpopv2i64
+; CHECK: cnt.8b
+define <2 x i64> @ctpopv2i64(<2 x i64> %x) nounwind readnone {
+  %cnt = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %x)
+  ret <2 x i64> %cnt
+}
+
+declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) nounwind readnone
diff --git a/test/CodeGen/AArch64/arm64-vqadd.ll b/test/CodeGen/AArch64/arm64-vqadd.ll
new file mode 100644
index 0000000..20f7e2c
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-vqadd.ll
@@ -0,0 +1,332 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+define <8 x i8> @sqadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: sqadd8b:
+;CHECK: sqadd.8b
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @sqadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: sqadd4h:
+;CHECK: sqadd.4h
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @sqadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: sqadd2s:
+;CHECK: sqadd.2s
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @uqadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: uqadd8b:
+;CHECK: uqadd.8b
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @uqadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: uqadd4h:
+;CHECK: uqadd.4h
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @uqadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: uqadd2s:
+;CHECK: uqadd.2s
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.uqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @sqadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: sqadd16b:
+;CHECK: sqadd.16b
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.aarch64.neon.sqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @sqadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: sqadd8h:
+;CHECK: sqadd.8h
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @sqadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: sqadd4s:
+;CHECK: sqadd.4s
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @sqadd2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK-LABEL: sqadd2d:
+;CHECK: sqadd.2d
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <16 x i8> @uqadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: uqadd16b:
+;CHECK: uqadd.16b
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @uqadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: uqadd8h:
+;CHECK: uqadd.8h
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.aarch64.neon.uqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @uqadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: uqadd4s:
+;CHECK: uqadd.4s
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.uqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @uqadd2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK-LABEL: uqadd2d:
+;CHECK: uqadd.2d
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.aarch64.neon.uqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+declare <8 x i8>  @llvm.aarch64.neon.sqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.sqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.aarch64.neon.uqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.uqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.uqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.aarch64.neon.sqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.uqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.uqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.uqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @usqadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: usqadd8b:
+;CHECK: usqadd.8b
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @usqadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: usqadd4h:
+;CHECK: usqadd.4h
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @usqadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: usqadd2s:
+;CHECK: usqadd.2s
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.usqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @usqadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: usqadd16b:
+;CHECK: usqadd.16b
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.aarch64.neon.usqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @usqadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: usqadd8h:
+;CHECK: usqadd.8h
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.aarch64.neon.usqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @usqadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: usqadd4s:
+;CHECK: usqadd.4s
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.usqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @usqadd2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK-LABEL: usqadd2d:
+;CHECK: usqadd.2d
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.aarch64.neon.usqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define i64 @usqadd_d(i64 %l, i64 %r) nounwind {
+; CHECK-LABEL: usqadd_d:
+; CHECK: usqadd {{d[0-9]+}}, {{d[0-9]+}}
+  %sum = call i64 @llvm.aarch64.neon.usqadd.i64(i64 %l, i64 %r)
+  ret i64 %sum
+}
+
+define i32 @usqadd_s(i32 %l, i32 %r) nounwind {
+; CHECK-LABEL: usqadd_s:
+; CHECK: usqadd {{s[0-9]+}}, {{s[0-9]+}}
+  %sum = call i32 @llvm.aarch64.neon.usqadd.i32(i32 %l, i32 %r)
+  ret i32 %sum
+}
+
+declare <8 x i8>  @llvm.aarch64.neon.usqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.usqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+declare i64 @llvm.aarch64.neon.usqadd.i64(i64, i64) nounwind readnone
+declare i32 @llvm.aarch64.neon.usqadd.i32(i32, i32) nounwind readnone
+
+declare <16 x i8> @llvm.aarch64.neon.usqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.usqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.usqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.usqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @suqadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: suqadd8b:
+;CHECK: suqadd.8b
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @suqadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: suqadd4h:
+;CHECK: suqadd.4h
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @suqadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: suqadd2s:
+;CHECK: suqadd.2s
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @suqadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: suqadd16b:
+;CHECK: suqadd.16b
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @suqadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: suqadd8h:
+;CHECK: suqadd.8h
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @suqadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: suqadd4s:
+;CHECK: suqadd.4s
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @suqadd2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK-LABEL: suqadd2d:
+;CHECK: suqadd.2d
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <1 x i64> @suqadd_1d(<1 x i64> %l, <1 x i64> %r) nounwind {
+; CHECK-LABEL: suqadd_1d:
+; CHECK: suqadd {{d[0-9]+}}, {{d[0-9]+}}
+  %sum = call <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64> %l, <1 x i64> %r)
+  ret <1 x i64> %sum
+}
+
+define i64 @suqadd_d(i64 %l, i64 %r) nounwind {
+; CHECK-LABEL: suqadd_d:
+; CHECK: suqadd {{d[0-9]+}}, {{d[0-9]+}}
+  %sum = call i64 @llvm.aarch64.neon.suqadd.i64(i64 %l, i64 %r)
+  ret i64 %sum
+}
+
+define i32 @suqadd_s(i32 %l, i32 %r) nounwind {
+; CHECK-LABEL: suqadd_s:
+; CHECK: suqadd {{s[0-9]+}}, {{s[0-9]+}}
+  %sum = call i32 @llvm.aarch64.neon.suqadd.i32(i32 %l, i32 %r)
+  ret i32 %sum
+}
+
+declare <8 x i8>  @llvm.aarch64.neon.suqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+declare i64 @llvm.aarch64.neon.suqadd.i64(i64, i64) nounwind readnone
+declare i32 @llvm.aarch64.neon.suqadd.i32(i32, i32) nounwind readnone
+
+declare <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
diff --git a/test/CodeGen/AArch64/arm64-vqsub.ll b/test/CodeGen/AArch64/arm64-vqsub.ll
new file mode 100644
index 0000000..dde3ac3
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-vqsub.ll
@@ -0,0 +1,147 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+define <8 x i8> @sqsub8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: sqsub8b:
+;CHECK: sqsub.8b
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @sqsub4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: sqsub4h:
+;CHECK: sqsub.4h
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @sqsub2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: sqsub2s:
+;CHECK: sqsub.2s
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @uqsub8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: uqsub8b:
+;CHECK: uqsub.8b
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @uqsub4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: uqsub4h:
+;CHECK: uqsub.4h
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @uqsub2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: uqsub2s:
+;CHECK: uqsub.2s
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.uqsub.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @sqsub16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: sqsub16b:
+;CHECK: sqsub.16b
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.aarch64.neon.sqsub.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @sqsub8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: sqsub8h:
+;CHECK: sqsub.8h
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @sqsub4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: sqsub4s:
+;CHECK: sqsub.4s
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @sqsub2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK-LABEL: sqsub2d:
+;CHECK: sqsub.2d
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <16 x i8> @uqsub16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: uqsub16b:
+;CHECK: uqsub.16b
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.aarch64.neon.uqsub.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @uqsub8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: uqsub8h:
+;CHECK: uqsub.8h
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.aarch64.neon.uqsub.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @uqsub4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: uqsub4s:
+;CHECK: uqsub.4s
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.uqsub.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @uqsub2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK-LABEL: uqsub2d:
+;CHECK: uqsub.2d
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.aarch64.neon.uqsub.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+declare <8 x i8>  @llvm.aarch64.neon.sqsub.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.sqsub.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.aarch64.neon.uqsub.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.uqsub.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.uqsub.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.aarch64.neon.sqsub.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.aarch64.neon.uqsub.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.uqsub.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.uqsub.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.uqsub.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
diff --git a/test/CodeGen/AArch64/arm64-vselect.ll b/test/CodeGen/AArch64/arm64-vselect.ll
new file mode 100644
index 0000000..9988512
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-vselect.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+;CHECK: @func63
+;CHECK: cmeq.4h v0, v0, v1
+
+;FIXME: currently, it will generate 3 instructions:
+; ushll.4s	v0, v0, #0
+; shl.4s	v0, v0, #31
+; sshr.4s	v0, v0, #31
+;But these instrucitons can be optimized into 1 instruction:
+; sshll.4s  v0, v0, #0
+
+;CHECK: bsl.16b v0, v2, v3
+;CHECK: str  q0, [x0]
+;CHECK: ret
+
+%T0_63 = type <4 x i16>
+%T1_63 = type <4 x i32>
+%T2_63 = type <4 x i1>
+define void @func63(%T1_63* %out, %T0_63 %v0, %T0_63 %v1, %T1_63 %v2, %T1_63 %v3) {
+  %cond = icmp eq %T0_63 %v0, %v1
+  %r = select %T2_63 %cond, %T1_63 %v2, %T1_63 %v3
+  store %T1_63 %r, %T1_63* %out
+  ret void
+}
diff --git a/test/CodeGen/AArch64/arm64-vsetcc_fp.ll b/test/CodeGen/AArch64/arm64-vsetcc_fp.ll
new file mode 100644
index 0000000..f4f4714
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-vsetcc_fp.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s
+define <2 x i32> @fcmp_one(<2 x float> %x, <2 x float> %y) nounwind optsize readnone {
+; CHECK-LABEL: fcmp_one:
+; CHECK-NEXT: fcmgt.2s [[REG:v[0-9]+]], v0, v1
+; CHECK-NEXT: fcmgt.2s [[REG2:v[0-9]+]], v1, v0
+; CHECK-NEXT: orr.8b v0, [[REG2]], [[REG]]
+; CHECK-NEXT: ret
+  %tmp = fcmp one <2 x float> %x, %y
+  %or = sext <2 x i1> %tmp to <2 x i32>
+  ret <2 x i32> %or
+}
diff --git a/test/CodeGen/AArch64/arm64-vshift.ll b/test/CodeGen/AArch64/arm64-vshift.ll
new file mode 100644
index 0000000..82ae486
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-vshift.ll
@@ -0,0 +1,1917 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -enable-misched=false | FileCheck %s
+
+define <8 x i8> @sqshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: sqshl8b:
+;CHECK: sqshl.8b
+        %tmp1 = load <8 x i8>* %A
+        %tmp2 = load <8 x i8>* %B
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+        ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @sqshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: sqshl4h:
+;CHECK: sqshl.4h
+        %tmp1 = load <4 x i16>* %A
+        %tmp2 = load <4 x i16>* %B
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+        ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @sqshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: sqshl2s:
+;CHECK: sqshl.2s
+        %tmp1 = load <2 x i32>* %A
+        %tmp2 = load <2 x i32>* %B
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+        ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @uqshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: uqshl8b:
+;CHECK: uqshl.8b
+        %tmp1 = load <8 x i8>* %A
+        %tmp2 = load <8 x i8>* %B
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+        ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @uqshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: uqshl4h:
+;CHECK: uqshl.4h
+        %tmp1 = load <4 x i16>* %A
+        %tmp2 = load <4 x i16>* %B
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+        ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @uqshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: uqshl2s:
+;CHECK: uqshl.2s
+        %tmp1 = load <2 x i32>* %A
+        %tmp2 = load <2 x i32>* %B
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+        ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @sqshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: sqshl16b:
+;CHECK: sqshl.16b
+        %tmp1 = load <16 x i8>* %A
+        %tmp2 = load <16 x i8>* %B
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+        ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @sqshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: sqshl8h:
+;CHECK: sqshl.8h
+        %tmp1 = load <8 x i16>* %A
+        %tmp2 = load <8 x i16>* %B
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+        ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @sqshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: sqshl4s:
+;CHECK: sqshl.4s
+        %tmp1 = load <4 x i32>* %A
+        %tmp2 = load <4 x i32>* %B
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+        ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @sqshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK-LABEL: sqshl2d:
+;CHECK: sqshl.2d
+        %tmp1 = load <2 x i64>* %A
+        %tmp2 = load <2 x i64>* %B
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+        ret <2 x i64> %tmp3
+}
+
+define <16 x i8> @uqshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: uqshl16b:
+;CHECK: uqshl.16b
+        %tmp1 = load <16 x i8>* %A
+        %tmp2 = load <16 x i8>* %B
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+        ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @uqshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: uqshl8h:
+;CHECK: uqshl.8h
+        %tmp1 = load <8 x i16>* %A
+        %tmp2 = load <8 x i16>* %B
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+        ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @uqshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: uqshl4s:
+;CHECK: uqshl.4s
+        %tmp1 = load <4 x i32>* %A
+        %tmp2 = load <4 x i32>* %B
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+        ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @uqshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK-LABEL: uqshl2d:
+;CHECK: uqshl.2d
+        %tmp1 = load <2 x i64>* %A
+        %tmp2 = load <2 x i64>* %B
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+        ret <2 x i64> %tmp3
+}
+
+declare <8 x i8>  @llvm.aarch64.neon.sqshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.aarch64.neon.uqshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @srshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: srshl8b:
+;CHECK: srshl.8b
+        %tmp1 = load <8 x i8>* %A
+        %tmp2 = load <8 x i8>* %B
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+        ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @srshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: srshl4h:
+;CHECK: srshl.4h
+        %tmp1 = load <4 x i16>* %A
+        %tmp2 = load <4 x i16>* %B
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+        ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @srshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: srshl2s:
+;CHECK: srshl.2s
+        %tmp1 = load <2 x i32>* %A
+        %tmp2 = load <2 x i32>* %B
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+        ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @urshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: urshl8b:
+;CHECK: urshl.8b
+        %tmp1 = load <8 x i8>* %A
+        %tmp2 = load <8 x i8>* %B
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+        ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @urshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: urshl4h:
+;CHECK: urshl.4h
+        %tmp1 = load <4 x i16>* %A
+        %tmp2 = load <4 x i16>* %B
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+        ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @urshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: urshl2s:
+;CHECK: urshl.2s
+        %tmp1 = load <2 x i32>* %A
+        %tmp2 = load <2 x i32>* %B
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+        ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @srshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: srshl16b:
+;CHECK: srshl.16b
+        %tmp1 = load <16 x i8>* %A
+        %tmp2 = load <16 x i8>* %B
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+        ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @srshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: srshl8h:
+;CHECK: srshl.8h
+        %tmp1 = load <8 x i16>* %A
+        %tmp2 = load <8 x i16>* %B
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+        ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @srshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: srshl4s:
+;CHECK: srshl.4s
+        %tmp1 = load <4 x i32>* %A
+        %tmp2 = load <4 x i32>* %B
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+        ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @srshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK-LABEL: srshl2d:
+;CHECK: srshl.2d
+        %tmp1 = load <2 x i64>* %A
+        %tmp2 = load <2 x i64>* %B
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+        ret <2 x i64> %tmp3
+}
+
+define <16 x i8> @urshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: urshl16b:
+;CHECK: urshl.16b
+        %tmp1 = load <16 x i8>* %A
+        %tmp2 = load <16 x i8>* %B
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+        ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @urshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: urshl8h:
+;CHECK: urshl.8h
+        %tmp1 = load <8 x i16>* %A
+        %tmp2 = load <8 x i16>* %B
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+        ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @urshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: urshl4s:
+;CHECK: urshl.4s
+        %tmp1 = load <4 x i32>* %A
+        %tmp2 = load <4 x i32>* %B
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+        ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @urshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK-LABEL: urshl2d:
+;CHECK: urshl.2d
+        %tmp1 = load <2 x i64>* %A
+        %tmp2 = load <2 x i64>* %B
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+        ret <2 x i64> %tmp3
+}
+
+declare <8 x i8>  @llvm.aarch64.neon.srshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.aarch64.neon.urshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @sqrshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: sqrshl8b:
+;CHECK: sqrshl.8b
+        %tmp1 = load <8 x i8>* %A
+        %tmp2 = load <8 x i8>* %B
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+        ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @sqrshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: sqrshl4h:
+;CHECK: sqrshl.4h
+        %tmp1 = load <4 x i16>* %A
+        %tmp2 = load <4 x i16>* %B
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+        ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @sqrshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: sqrshl2s:
+;CHECK: sqrshl.2s
+        %tmp1 = load <2 x i32>* %A
+        %tmp2 = load <2 x i32>* %B
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+        ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @uqrshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: uqrshl8b:
+;CHECK: uqrshl.8b
+        %tmp1 = load <8 x i8>* %A
+        %tmp2 = load <8 x i8>* %B
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+        ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @uqrshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: uqrshl4h:
+;CHECK: uqrshl.4h
+        %tmp1 = load <4 x i16>* %A
+        %tmp2 = load <4 x i16>* %B
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+        ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @uqrshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: uqrshl2s:
+;CHECK: uqrshl.2s
+        %tmp1 = load <2 x i32>* %A
+        %tmp2 = load <2 x i32>* %B
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+        ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @sqrshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: sqrshl16b:
+;CHECK: sqrshl.16b
+        %tmp1 = load <16 x i8>* %A
+        %tmp2 = load <16 x i8>* %B
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+        ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @sqrshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: sqrshl8h:
+;CHECK: sqrshl.8h
+        %tmp1 = load <8 x i16>* %A
+        %tmp2 = load <8 x i16>* %B
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+        ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @sqrshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: sqrshl4s:
+;CHECK: sqrshl.4s
+        %tmp1 = load <4 x i32>* %A
+        %tmp2 = load <4 x i32>* %B
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+        ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @sqrshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK-LABEL: sqrshl2d:
+;CHECK: sqrshl.2d
+        %tmp1 = load <2 x i64>* %A
+        %tmp2 = load <2 x i64>* %B
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+        ret <2 x i64> %tmp3
+}
+
+define <16 x i8> @uqrshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: uqrshl16b:
+;CHECK: uqrshl.16b
+        %tmp1 = load <16 x i8>* %A
+        %tmp2 = load <16 x i8>* %B
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+        ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @uqrshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: uqrshl8h:
+;CHECK: uqrshl.8h
+        %tmp1 = load <8 x i16>* %A
+        %tmp2 = load <8 x i16>* %B
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+        ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @uqrshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: uqrshl4s:
+;CHECK: uqrshl.4s
+        %tmp1 = load <4 x i32>* %A
+        %tmp2 = load <4 x i32>* %B
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+        ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @uqrshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK-LABEL: uqrshl2d:
+;CHECK: uqrshl.2d
+        %tmp1 = load <2 x i64>* %A
+        %tmp2 = load <2 x i64>* %B
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+        ret <2 x i64> %tmp3
+}
+
+declare <8 x i8>  @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @urshr8b(<8 x i8>* %A) nounwind {
+;CHECK-LABEL: urshr8b:
+;CHECK: urshr.8b
+        %tmp1 = load <8 x i8>* %A
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+        ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @urshr4h(<4 x i16>* %A) nounwind {
+;CHECK-LABEL: urshr4h:
+;CHECK: urshr.4h
+        %tmp1 = load <4 x i16>* %A
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
+        ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @urshr2s(<2 x i32>* %A) nounwind {
+;CHECK-LABEL: urshr2s:
+;CHECK: urshr.2s
+        %tmp1 = load <2 x i32>* %A
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
+        ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @urshr16b(<16 x i8>* %A) nounwind {
+;CHECK-LABEL: urshr16b:
+;CHECK: urshr.16b
+        %tmp1 = load <16 x i8>* %A
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+        ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @urshr8h(<8 x i16>* %A) nounwind {
+;CHECK-LABEL: urshr8h:
+;CHECK: urshr.8h
+        %tmp1 = load <8 x i16>* %A
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
+        ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @urshr4s(<4 x i32>* %A) nounwind {
+;CHECK-LABEL: urshr4s:
+;CHECK: urshr.4s
+        %tmp1 = load <4 x i32>* %A
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
+        ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @urshr2d(<2 x i64>* %A) nounwind {
+;CHECK-LABEL: urshr2d:
+;CHECK: urshr.2d
+        %tmp1 = load <2 x i64>* %A
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
+        ret <2 x i64> %tmp3
+}
+
+define <8 x i8> @srshr8b(<8 x i8>* %A) nounwind {
+;CHECK-LABEL: srshr8b:
+;CHECK: srshr.8b
+        %tmp1 = load <8 x i8>* %A
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+        ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @srshr4h(<4 x i16>* %A) nounwind {
+;CHECK-LABEL: srshr4h:
+;CHECK: srshr.4h
+        %tmp1 = load <4 x i16>* %A
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
+        ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @srshr2s(<2 x i32>* %A) nounwind {
+;CHECK-LABEL: srshr2s:
+;CHECK: srshr.2s
+        %tmp1 = load <2 x i32>* %A
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
+        ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @srshr16b(<16 x i8>* %A) nounwind {
+;CHECK-LABEL: srshr16b:
+;CHECK: srshr.16b
+        %tmp1 = load <16 x i8>* %A
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+        ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @srshr8h(<8 x i16>* %A) nounwind {
+;CHECK-LABEL: srshr8h:
+;CHECK: srshr.8h
+        %tmp1 = load <8 x i16>* %A
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
+        ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @srshr4s(<4 x i32>* %A) nounwind {
+;CHECK-LABEL: srshr4s:
+;CHECK: srshr.4s
+        %tmp1 = load <4 x i32>* %A
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
+        ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @srshr2d(<2 x i64>* %A) nounwind {
+;CHECK-LABEL: srshr2d:
+;CHECK: srshr.2d
+        %tmp1 = load <2 x i64>* %A
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
+        ret <2 x i64> %tmp3
+}
+
+define <8 x i8> @sqshlu8b(<8 x i8>* %A) nounwind {
+;CHECK-LABEL: sqshlu8b:
+;CHECK: sqshlu.8b v0, {{v[0-9]+}}, #1
+        %tmp1 = load <8 x i8>* %A
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+        ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @sqshlu4h(<4 x i16>* %A) nounwind {
+;CHECK-LABEL: sqshlu4h:
+;CHECK: sqshlu.4h v0, {{v[0-9]+}}, #1
+        %tmp1 = load <4 x i16>* %A
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
+        ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @sqshlu2s(<2 x i32>* %A) nounwind {
+;CHECK-LABEL: sqshlu2s:
+;CHECK: sqshlu.2s v0, {{v[0-9]+}}, #1
+        %tmp1 = load <2 x i32>* %A
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
+        ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @sqshlu16b(<16 x i8>* %A) nounwind {
+;CHECK-LABEL: sqshlu16b:
+;CHECK: sqshlu.16b v0, {{v[0-9]+}}, #1
+        %tmp1 = load <16 x i8>* %A
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+        ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @sqshlu8h(<8 x i16>* %A) nounwind {
+;CHECK-LABEL: sqshlu8h:
+;CHECK: sqshlu.8h v0, {{v[0-9]+}}, #1
+        %tmp1 = load <8 x i16>* %A
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+        ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @sqshlu4s(<4 x i32>* %A) nounwind {
+;CHECK-LABEL: sqshlu4s:
+;CHECK: sqshlu.4s v0, {{v[0-9]+}}, #1
+        %tmp1 = load <4 x i32>* %A
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+        ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @sqshlu2d(<2 x i64>* %A) nounwind {
+;CHECK-LABEL: sqshlu2d:
+;CHECK: sqshlu.2d v0, {{v[0-9]+}}, #1
+        %tmp1 = load <2 x i64>* %A
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
+        ret <2 x i64> %tmp3
+}
+
+declare <8 x i8>  @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @rshrn8b(<8 x i16>* %A) nounwind {
+;CHECK-LABEL: rshrn8b:
+;CHECK: rshrn.8b v0, {{v[0-9]+}}, #1
+        %tmp1 = load <8 x i16>* %A
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %tmp1, i32 1)
+        ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @rshrn4h(<4 x i32>* %A) nounwind {
+;CHECK-LABEL: rshrn4h:
+;CHECK: rshrn.4h v0, {{v[0-9]+}}, #1
+        %tmp1 = load <4 x i32>* %A
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %tmp1, i32 1)
+        ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @rshrn2s(<2 x i64>* %A) nounwind {
+;CHECK-LABEL: rshrn2s:
+;CHECK: rshrn.2s v0, {{v[0-9]+}}, #1
+        %tmp1 = load <2 x i64>* %A
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %tmp1, i32 1)
+        ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @rshrn16b(<8 x i8> *%ret, <8 x i16>* %A) nounwind {
+;CHECK-LABEL: rshrn16b:
+;CHECK: rshrn2.16b v0, {{v[0-9]+}}, #1
+        %out = load <8 x i8>* %ret
+        %tmp1 = load <8 x i16>* %A
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %tmp1, i32 1)
+        %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+        ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @rshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
+;CHECK-LABEL: rshrn8h:
+;CHECK: rshrn2.8h v0, {{v[0-9]+}}, #1
+        %out = load <4 x i16>* %ret
+        %tmp1 = load <4 x i32>* %A
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %tmp1, i32 1)
+        %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+        ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @rshrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
+;CHECK-LABEL: rshrn4s:
+;CHECK: rshrn2.4s v0, {{v[0-9]+}}, #1
+        %out = load <2 x i32>* %ret
+        %tmp1 = load <2 x i64>* %A
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %tmp1, i32 1)
+        %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+        ret <4 x i32> %tmp4
+}
+
+declare <8 x i8>  @llvm.aarch64.neon.rshrn.v8i8(<8 x i16>, i32) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32>, i32) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64>, i32) nounwind readnone
+
+define <8 x i8> @shrn8b(<8 x i16>* %A) nounwind {
+;CHECK-LABEL: shrn8b:
+;CHECK: shrn.8b v0, {{v[0-9]+}}, #1
+        %tmp1 = load <8 x i16>* %A
+        %tmp2 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+        %tmp3 = trunc <8 x i16> %tmp2 to <8 x i8>
+        ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @shrn4h(<4 x i32>* %A) nounwind {
+;CHECK-LABEL: shrn4h:
+;CHECK: shrn.4h v0, {{v[0-9]+}}, #1
+        %tmp1 = load <4 x i32>* %A
+        %tmp2 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
+        %tmp3 = trunc <4 x i32> %tmp2 to <4 x i16>
+        ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @shrn2s(<2 x i64>* %A) nounwind {
+;CHECK-LABEL: shrn2s:
+;CHECK: shrn.2s v0, {{v[0-9]+}}, #1
+        %tmp1 = load <2 x i64>* %A
+        %tmp2 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
+        %tmp3 = trunc <2 x i64> %tmp2 to <2 x i32>
+        ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @shrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
+;CHECK-LABEL: shrn16b:
+;CHECK: shrn2.16b v0, {{v[0-9]+}}, #1
+        %out = load <8 x i8>* %ret
+        %tmp1 = load <8 x i16>* %A
+        %tmp2 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+        %tmp3 = trunc <8 x i16> %tmp2 to <8 x i8>
+        %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+        ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @shrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
+;CHECK-LABEL: shrn8h:
+;CHECK: shrn2.8h v0, {{v[0-9]+}}, #1
+        %out = load <4 x i16>* %ret
+        %tmp1 = load <4 x i32>* %A
+        %tmp2 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
+        %tmp3 = trunc <4 x i32> %tmp2 to <4 x i16>
+        %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+        ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @shrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
+;CHECK-LABEL: shrn4s:
+;CHECK: shrn2.4s v0, {{v[0-9]+}}, #1
+        %out = load <2 x i32>* %ret
+        %tmp1 = load <2 x i64>* %A
+        %tmp2 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
+        %tmp3 = trunc <2 x i64> %tmp2 to <2 x i32>
+        %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+        ret <4 x i32> %tmp4
+}
+
+declare <8 x i8>  @llvm.aarch64.neon.shrn.v8i8(<8 x i16>, i32) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.shrn.v4i16(<4 x i32>, i32) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.shrn.v2i32(<2 x i64>, i32) nounwind readnone
+
+define i32 @sqshrn1s(i64 %A) nounwind {
+; CHECK-LABEL: sqshrn1s:
+; CHECK: sqshrn {{s[0-9]+}}, d0, #1
+  %tmp = call i32 @llvm.aarch64.neon.sqshrn.i32(i64 %A, i32 1)
+  ret i32 %tmp
+}
+
+define <8 x i8> @sqshrn8b(<8 x i16>* %A) nounwind {
+;CHECK-LABEL: sqshrn8b:
+;CHECK: sqshrn.8b v0, {{v[0-9]+}}, #1
+        %tmp1 = load <8 x i16>* %A
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %tmp1, i32 1)
+        ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @sqshrn4h(<4 x i32>* %A) nounwind {
+;CHECK-LABEL: sqshrn4h:
+;CHECK: sqshrn.4h v0, {{v[0-9]+}}, #1
+        %tmp1 = load <4 x i32>* %A
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %tmp1, i32 1)
+        ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @sqshrn2s(<2 x i64>* %A) nounwind {
+;CHECK-LABEL: sqshrn2s:
+;CHECK: sqshrn.2s v0, {{v[0-9]+}}, #1
+        %tmp1 = load <2 x i64>* %A
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %tmp1, i32 1)
+        ret <2 x i32> %tmp3
+}
+
+
+define <16 x i8> @sqshrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
+;CHECK-LABEL: sqshrn16b:
+;CHECK: sqshrn2.16b v0, {{v[0-9]+}}, #1
+        %out = load <8 x i8>* %ret
+        %tmp1 = load <8 x i16>* %A
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %tmp1, i32 1)
+        %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+        ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @sqshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
+;CHECK-LABEL: sqshrn8h:
+;CHECK: sqshrn2.8h v0, {{v[0-9]+}}, #1
+        %out = load <4 x i16>* %ret
+        %tmp1 = load <4 x i32>* %A
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %tmp1, i32 1)
+        %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+        ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @sqshrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
+;CHECK-LABEL: sqshrn4s:
+;CHECK: sqshrn2.4s v0, {{v[0-9]+}}, #1
+        %out = load <2 x i32>* %ret
+        %tmp1 = load <2 x i64>* %A
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %tmp1, i32 1)
+        %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+        ret <4 x i32> %tmp4
+}
+
+declare i32  @llvm.aarch64.neon.sqshrn.i32(i64, i32) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16>, i32) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32>, i32) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64>, i32) nounwind readnone
+
+define i32 @sqshrun1s(i64 %A) nounwind {
+; CHECK-LABEL: sqshrun1s:
+; CHECK: sqshrun {{s[0-9]+}}, d0, #1
+  %tmp = call i32 @llvm.aarch64.neon.sqshrun.i32(i64 %A, i32 1)
+  ret i32 %tmp
+}
+
+define <8 x i8> @sqshrun8b(<8 x i16>* %A) nounwind {
+;CHECK-LABEL: sqshrun8b:
+;CHECK: sqshrun.8b v0, {{v[0-9]+}}, #1
+        %tmp1 = load <8 x i16>* %A
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %tmp1, i32 1)
+        ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @sqshrun4h(<4 x i32>* %A) nounwind {
+;CHECK-LABEL: sqshrun4h:
+;CHECK: sqshrun.4h v0, {{v[0-9]+}}, #1
+        %tmp1 = load <4 x i32>* %A
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %tmp1, i32 1)
+        ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @sqshrun2s(<2 x i64>* %A) nounwind {
+;CHECK-LABEL: sqshrun2s:
+;CHECK: sqshrun.2s v0, {{v[0-9]+}}, #1
+        %tmp1 = load <2 x i64>* %A
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %tmp1, i32 1)
+        ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @sqshrun16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
+;CHECK-LABEL: sqshrun16b:
+;CHECK: sqshrun2.16b v0, {{v[0-9]+}}, #1
+        %out = load <8 x i8>* %ret
+        %tmp1 = load <8 x i16>* %A
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %tmp1, i32 1)
+        %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+        ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @sqshrun8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
+;CHECK-LABEL: sqshrun8h:
+;CHECK: sqshrun2.8h v0, {{v[0-9]+}}, #1
+        %out = load <4 x i16>* %ret
+        %tmp1 = load <4 x i32>* %A
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %tmp1, i32 1)
+        %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+        ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @sqshrun4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
+;CHECK-LABEL: sqshrun4s:
+;CHECK: sqshrun2.4s v0, {{v[0-9]+}}, #1
+        %out = load <2 x i32>* %ret
+        %tmp1 = load <2 x i64>* %A
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %tmp1, i32 1)
+        %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+        ret <4 x i32> %tmp4
+}
+
+declare i32  @llvm.aarch64.neon.sqshrun.i32(i64, i32) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16>, i32) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32>, i32) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64>, i32) nounwind readnone
+
+define i32 @sqrshrn1s(i64 %A) nounwind {
+; CHECK-LABEL: sqrshrn1s:
+; CHECK: sqrshrn {{s[0-9]+}}, d0, #1
+  %tmp = call i32 @llvm.aarch64.neon.sqrshrn.i32(i64 %A, i32 1)
+  ret i32 %tmp
+}
+
+define <8 x i8> @sqrshrn8b(<8 x i16>* %A) nounwind {
+;CHECK-LABEL: sqrshrn8b:
+;CHECK: sqrshrn.8b v0, {{v[0-9]+}}, #1
+        %tmp1 = load <8 x i16>* %A
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
+        ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @sqrshrn4h(<4 x i32>* %A) nounwind {
+;CHECK-LABEL: sqrshrn4h:
+;CHECK: sqrshrn.4h v0, {{v[0-9]+}}, #1
+        %tmp1 = load <4 x i32>* %A
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
+        ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @sqrshrn2s(<2 x i64>* %A) nounwind {
+;CHECK-LABEL: sqrshrn2s:
+;CHECK: sqrshrn.2s v0, {{v[0-9]+}}, #1
+        %tmp1 = load <2 x i64>* %A
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
+        ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @sqrshrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
+;CHECK-LABEL: sqrshrn16b:
+;CHECK: sqrshrn2.16b v0, {{v[0-9]+}}, #1
+        %out = load <8 x i8>* %ret
+        %tmp1 = load <8 x i16>* %A
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
+        %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+        ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @sqrshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
+;CHECK-LABEL: sqrshrn8h:
+;CHECK: sqrshrn2.8h v0, {{v[0-9]+}}, #1
+        %out = load <4 x i16>* %ret
+        %tmp1 = load <4 x i32>* %A
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
+        %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+        ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @sqrshrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
+;CHECK-LABEL: sqrshrn4s:
+;CHECK: sqrshrn2.4s v0, {{v[0-9]+}}, #1
+        %out = load <2 x i32>* %ret
+        %tmp1 = load <2 x i64>* %A
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
+        %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+        ret <4 x i32> %tmp4
+}
+
+declare i32  @llvm.aarch64.neon.sqrshrn.i32(i64, i32) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16>, i32) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32>, i32) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64>, i32) nounwind readnone
+
+define i32 @sqrshrun1s(i64 %A) nounwind {
+; CHECK-LABEL: sqrshrun1s:
+; CHECK: sqrshrun {{s[0-9]+}}, d0, #1
+  %tmp = call i32 @llvm.aarch64.neon.sqrshrun.i32(i64 %A, i32 1)
+  ret i32 %tmp
+}
+
+define <8 x i8> @sqrshrun8b(<8 x i16>* %A) nounwind {
+;CHECK-LABEL: sqrshrun8b:
+;CHECK: sqrshrun.8b v0, {{v[0-9]+}}, #1
+        %tmp1 = load <8 x i16>* %A
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %tmp1, i32 1)
+        ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @sqrshrun4h(<4 x i32>* %A) nounwind {
+;CHECK-LABEL: sqrshrun4h:
+;CHECK: sqrshrun.4h v0, {{v[0-9]+}}, #1
+        %tmp1 = load <4 x i32>* %A
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %tmp1, i32 1)
+        ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @sqrshrun2s(<2 x i64>* %A) nounwind {
+;CHECK-LABEL: sqrshrun2s:
+;CHECK: sqrshrun.2s v0, {{v[0-9]+}}, #1
+        %tmp1 = load <2 x i64>* %A
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %tmp1, i32 1)
+        ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @sqrshrun16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
+;CHECK-LABEL: sqrshrun16b:
+;CHECK: sqrshrun2.16b v0, {{v[0-9]+}}, #1
+        %out = load <8 x i8>* %ret
+        %tmp1 = load <8 x i16>* %A
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %tmp1, i32 1)
+        %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+        ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @sqrshrun8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
+;CHECK-LABEL: sqrshrun8h:
+;CHECK: sqrshrun2.8h v0, {{v[0-9]+}}, #1
+        %out = load <4 x i16>* %ret
+        %tmp1 = load <4 x i32>* %A
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %tmp1, i32 1)
+        %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+        ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @sqrshrun4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
+;CHECK-LABEL: sqrshrun4s:
+;CHECK: sqrshrun2.4s v0, {{v[0-9]+}}, #1
+        %out = load <2 x i32>* %ret
+        %tmp1 = load <2 x i64>* %A
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %tmp1, i32 1)
+        %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+        ret <4 x i32> %tmp4
+}
+
+declare i32  @llvm.aarch64.neon.sqrshrun.i32(i64, i32) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16>, i32) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32>, i32) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64>, i32) nounwind readnone
+
+define i32 @uqrshrn1s(i64 %A) nounwind {
+; CHECK-LABEL: uqrshrn1s:
+; CHECK: uqrshrn {{s[0-9]+}}, d0, #1
+  %tmp = call i32 @llvm.aarch64.neon.uqrshrn.i32(i64 %A, i32 1)
+  ret i32 %tmp
+}
+
+define <8 x i8> @uqrshrn8b(<8 x i16>* %A) nounwind {
+;CHECK-LABEL: uqrshrn8b:
+;CHECK: uqrshrn.8b v0, {{v[0-9]+}}, #1
+        %tmp1 = load <8 x i16>* %A
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
+        ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @uqrshrn4h(<4 x i32>* %A) nounwind {
+;CHECK-LABEL: uqrshrn4h:
+;CHECK: uqrshrn.4h v0, {{v[0-9]+}}, #1
+        %tmp1 = load <4 x i32>* %A
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
+        ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @uqrshrn2s(<2 x i64>* %A) nounwind {
+;CHECK-LABEL: uqrshrn2s:
+;CHECK: uqrshrn.2s v0, {{v[0-9]+}}, #1
+        %tmp1 = load <2 x i64>* %A
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
+        ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @uqrshrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
+;CHECK-LABEL: uqrshrn16b:
+;CHECK: uqrshrn2.16b v0, {{v[0-9]+}}, #1
+        %out = load <8 x i8>* %ret
+        %tmp1 = load <8 x i16>* %A
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
+        %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+        ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @uqrshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
+;CHECK-LABEL: uqrshrn8h:
+;CHECK: uqrshrn2.8h v0, {{v[0-9]+}}, #1
+        %out = load <4 x i16>* %ret
+        %tmp1 = load <4 x i32>* %A
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
+        %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+        ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @uqrshrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
+;CHECK-LABEL: uqrshrn4s:
+;CHECK: uqrshrn2.4s v0, {{v[0-9]+}}, #1
+        %out = load <2 x i32>* %ret
+        %tmp1 = load <2 x i64>* %A
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
+        %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+        ret <4 x i32> %tmp4
+}
+
+declare i32  @llvm.aarch64.neon.uqrshrn.i32(i64, i32) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16>, i32) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32>, i32) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64>, i32) nounwind readnone
+
+define i32 @uqshrn1s(i64 %A) nounwind {
+; CHECK-LABEL: uqshrn1s:
+; CHECK: uqshrn {{s[0-9]+}}, d0, #1
+  %tmp = call i32 @llvm.aarch64.neon.uqshrn.i32(i64 %A, i32 1)
+  ret i32 %tmp
+}
+
+define <8 x i8> @uqshrn8b(<8 x i16>* %A) nounwind {
+;CHECK-LABEL: uqshrn8b:
+;CHECK: uqshrn.8b v0, {{v[0-9]+}}, #1
+        %tmp1 = load <8 x i16>* %A
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %tmp1, i32 1)
+        ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @uqshrn4h(<4 x i32>* %A) nounwind {
+;CHECK-LABEL: uqshrn4h:
+;CHECK: uqshrn.4h v0, {{v[0-9]+}}, #1
+        %tmp1 = load <4 x i32>* %A
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %tmp1, i32 1)
+        ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @uqshrn2s(<2 x i64>* %A) nounwind {
+;CHECK-LABEL: uqshrn2s:
+;CHECK: uqshrn.2s v0, {{v[0-9]+}}, #1
+        %tmp1 = load <2 x i64>* %A
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %tmp1, i32 1)
+        ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @uqshrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
+;CHECK-LABEL: uqshrn16b:
+;CHECK: uqshrn2.16b v0, {{v[0-9]+}}, #1
+        %out = load <8 x i8>* %ret
+        %tmp1 = load <8 x i16>* %A
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %tmp1, i32 1)
+        %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+        ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @uqshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
+;CHECK-LABEL: uqshrn8h:
+;CHECK: uqshrn2.8h v0, {{v[0-9]+}}, #1
+  %out = load <4 x i16>* %ret
+  %tmp1 = load <4 x i32>* %A
+  %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %tmp1, i32 1)
+  %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @uqshrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
+;CHECK-LABEL: uqshrn4s:
+;CHECK: uqshrn2.4s v0, {{v[0-9]+}}, #1
+  %out = load <2 x i32>* %ret
+  %tmp1 = load <2 x i64>* %A
+  %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %tmp1, i32 1)
+  %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i32> %tmp4
+}
+
+declare i32  @llvm.aarch64.neon.uqshrn.i32(i64, i32) nounwind readnone
+declare <8 x i8>  @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16>, i32) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32>, i32) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64>, i32) nounwind readnone
+
+define <8 x i16> @ushll8h(<8 x i8>* %A) nounwind {
+;CHECK-LABEL: ushll8h:
+;CHECK: ushll.8h v0, {{v[0-9]+}}, #1
+        %tmp1 = load <8 x i8>* %A
+        %tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
+        %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+        ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @ushll4s(<4 x i16>* %A) nounwind {
+;CHECK-LABEL: ushll4s:
+;CHECK: ushll.4s v0, {{v[0-9]+}}, #1
+        %tmp1 = load <4 x i16>* %A
+        %tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
+        %tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1>
+        ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @ushll2d(<2 x i32>* %A) nounwind {
+;CHECK-LABEL: ushll2d:
+;CHECK: ushll.2d v0, {{v[0-9]+}}, #1
+        %tmp1 = load <2 x i32>* %A
+        %tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
+        %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
+        ret <2 x i64> %tmp3
+}
+
+define <8 x i16> @ushll2_8h(<16 x i8>* %A) nounwind {
+;CHECK-LABEL: ushll2_8h:
+;CHECK: ushll2.8h v0, {{v[0-9]+}}, #1
+        %load1 = load <16 x i8>* %A
+        %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+        %tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
+        %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+        ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @ushll2_4s(<8 x i16>* %A) nounwind {
+;CHECK-LABEL: ushll2_4s:
+;CHECK: ushll2.4s v0, {{v[0-9]+}}, #1
+        %load1 = load <8 x i16>* %A
+        %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+        %tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
+        %tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1>
+        ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @ushll2_2d(<4 x i32>* %A) nounwind {
+;CHECK-LABEL: ushll2_2d:
+;CHECK: ushll2.2d v0, {{v[0-9]+}}, #1
+        %load1 = load <4 x i32>* %A
+        %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+        %tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
+        %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
+        ret <2 x i64> %tmp3
+}
+
+define <8 x i16> @sshll8h(<8 x i8>* %A) nounwind {
+;CHECK-LABEL: sshll8h:
+;CHECK: sshll.8h v0, {{v[0-9]+}}, #1
+        %tmp1 = load <8 x i8>* %A
+        %tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
+        %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+        ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @sshll4s(<4 x i16>* %A) nounwind {
+;CHECK-LABEL: sshll4s:
+;CHECK: sshll.4s v0, {{v[0-9]+}}, #1
+        %tmp1 = load <4 x i16>* %A
+        %tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
+        %tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1>
+        ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @sshll2d(<2 x i32>* %A) nounwind {
+;CHECK-LABEL: sshll2d:
+;CHECK: sshll.2d v0, {{v[0-9]+}}, #1
+        %tmp1 = load <2 x i32>* %A
+        %tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
+        %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
+        ret <2 x i64> %tmp3
+}
+
+define <8 x i16> @sshll2_8h(<16 x i8>* %A) nounwind {
+;CHECK-LABEL: sshll2_8h:
+;CHECK: sshll2.8h v0, {{v[0-9]+}}, #1
+        %load1 = load <16 x i8>* %A
+        %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+        %tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
+        %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+        ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @sshll2_4s(<8 x i16>* %A) nounwind {
+;CHECK-LABEL: sshll2_4s:
+;CHECK: sshll2.4s v0, {{v[0-9]+}}, #1
+        %load1 = load <8 x i16>* %A
+        %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+        %tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
+        %tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1>
+        ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @sshll2_2d(<4 x i32>* %A) nounwind {
+;CHECK-LABEL: sshll2_2d:
+;CHECK: sshll2.2d v0, {{v[0-9]+}}, #1
+        %load1 = load <4 x i32>* %A
+        %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+        %tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
+        %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
+        ret <2 x i64> %tmp3
+}
+
+define <8 x i8> @sqshli8b(<8 x i8>* %A) nounwind {
+;CHECK-LABEL: sqshli8b:
+;CHECK: sqshl.8b v0, {{v[0-9]+}}, #1
+        %tmp1 = load <8 x i8>* %A
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+        ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @sqshli4h(<4 x i16>* %A) nounwind {
+;CHECK-LABEL: sqshli4h:
+;CHECK: sqshl.4h v0, {{v[0-9]+}}, #1
+        %tmp1 = load <4 x i16>* %A
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
+        ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @sqshli2s(<2 x i32>* %A) nounwind {
+;CHECK-LABEL: sqshli2s:
+;CHECK: sqshl.2s v0, {{v[0-9]+}}, #1
+        %tmp1 = load <2 x i32>* %A
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
+        ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @sqshli16b(<16 x i8>* %A) nounwind {
+;CHECK-LABEL: sqshli16b:
+;CHECK: sqshl.16b v0, {{v[0-9]+}}, #1
+        %tmp1 = load <16 x i8>* %A
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+        ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @sqshli8h(<8 x i16>* %A) nounwind {
+;CHECK-LABEL: sqshli8h:
+;CHECK: sqshl.8h v0, {{v[0-9]+}}, #1
+        %tmp1 = load <8 x i16>* %A
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+        ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @sqshli4s(<4 x i32>* %A) nounwind {
+;CHECK-LABEL: sqshli4s:
+;CHECK: sqshl.4s v0, {{v[0-9]+}}, #1
+        %tmp1 = load <4 x i32>* %A
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+        ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @sqshli2d(<2 x i64>* %A) nounwind {
+;CHECK-LABEL: sqshli2d:
+;CHECK: sqshl.2d v0, {{v[0-9]+}}, #1
+        %tmp1 = load <2 x i64>* %A
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
+        ret <2 x i64> %tmp3
+}
+
+define <8 x i8> @uqshli8b(<8 x i8>* %A) nounwind {
+;CHECK-LABEL: uqshli8b:
+;CHECK: uqshl.8b v0, {{v[0-9]+}}, #1
+        %tmp1 = load <8 x i8>* %A
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+        ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @uqshli4h(<4 x i16>* %A) nounwind {
+;CHECK-LABEL: uqshli4h:
+;CHECK: uqshl.4h v0, {{v[0-9]+}}, #1
+        %tmp1 = load <4 x i16>* %A
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
+        ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @uqshli2s(<2 x i32>* %A) nounwind {
+;CHECK-LABEL: uqshli2s:
+;CHECK: uqshl.2s v0, {{v[0-9]+}}, #1
+        %tmp1 = load <2 x i32>* %A
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
+        ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @uqshli16b(<16 x i8>* %A) nounwind {
+;CHECK-LABEL: uqshli16b:
+;CHECK: uqshl.16b
+        %tmp1 = load <16 x i8>* %A
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+        ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @uqshli8h(<8 x i16>* %A) nounwind {
+;CHECK-LABEL: uqshli8h:
+;CHECK: uqshl.8h v0, {{v[0-9]+}}, #1
+        %tmp1 = load <8 x i16>* %A
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+        ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @uqshli4s(<4 x i32>* %A) nounwind {
+;CHECK-LABEL: uqshli4s:
+;CHECK: uqshl.4s v0, {{v[0-9]+}}, #1
+        %tmp1 = load <4 x i32>* %A
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+        ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @uqshli2d(<2 x i64>* %A) nounwind {
+;CHECK-LABEL: uqshli2d:
+;CHECK: uqshl.2d v0, {{v[0-9]+}}, #1
+        %tmp1 = load <2 x i64>* %A
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
+        ret <2 x i64> %tmp3
+}
+
+define <8 x i8> @ursra8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: ursra8b:
+;CHECK: ursra.8b v0, {{v[0-9]+}}, #1
+        %tmp1 = load <8 x i8>* %A
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+        %tmp4 = load <8 x i8>* %B
+        %tmp5 = add <8 x i8> %tmp3, %tmp4
+        ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @ursra4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: ursra4h:
+;CHECK: ursra.4h v0, {{v[0-9]+}}, #1
+        %tmp1 = load <4 x i16>* %A
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
+        %tmp4 = load <4 x i16>* %B
+        %tmp5 = add <4 x i16> %tmp3, %tmp4
+        ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @ursra2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: ursra2s:
+;CHECK: ursra.2s v0, {{v[0-9]+}}, #1
+        %tmp1 = load <2 x i32>* %A
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
+        %tmp4 = load <2 x i32>* %B
+        %tmp5 = add <2 x i32> %tmp3, %tmp4
+        ret <2 x i32> %tmp5
+}
+
+define <16 x i8> @ursra16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: ursra16b:
+;CHECK: ursra.16b v0, {{v[0-9]+}}, #1
+        %tmp1 = load <16 x i8>* %A
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+        %tmp4 = load <16 x i8>* %B
+        %tmp5 = add <16 x i8> %tmp3, %tmp4
+         ret <16 x i8> %tmp5
+}
+
+define <8 x i16> @ursra8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: ursra8h:
+;CHECK: ursra.8h v0, {{v[0-9]+}}, #1
+        %tmp1 = load <8 x i16>* %A
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
+        %tmp4 = load <8 x i16>* %B
+        %tmp5 = add <8 x i16> %tmp3, %tmp4
+         ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @ursra4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: ursra4s:
+;CHECK: ursra.4s v0, {{v[0-9]+}}, #1
+        %tmp1 = load <4 x i32>* %A
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
+        %tmp4 = load <4 x i32>* %B
+        %tmp5 = add <4 x i32> %tmp3, %tmp4
+         ret <4 x i32> %tmp5
+}
+
+define <2 x i64> @ursra2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK-LABEL: ursra2d:
+;CHECK: ursra.2d v0, {{v[0-9]+}}, #1
+        %tmp1 = load <2 x i64>* %A
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
+        %tmp4 = load <2 x i64>* %B
+        %tmp5 = add <2 x i64> %tmp3, %tmp4
+         ret <2 x i64> %tmp5
+}
+
+define <8 x i8> @srsra8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: srsra8b:
+;CHECK: srsra.8b v0, {{v[0-9]+}}, #1
+        %tmp1 = load <8 x i8>* %A
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+        %tmp4 = load <8 x i8>* %B
+        %tmp5 = add <8 x i8> %tmp3, %tmp4
+        ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @srsra4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: srsra4h:
+;CHECK: srsra.4h v0, {{v[0-9]+}}, #1
+        %tmp1 = load <4 x i16>* %A
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
+        %tmp4 = load <4 x i16>* %B
+        %tmp5 = add <4 x i16> %tmp3, %tmp4
+        ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @srsra2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: srsra2s:
+;CHECK: srsra.2s v0, {{v[0-9]+}}, #1
+        %tmp1 = load <2 x i32>* %A
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
+        %tmp4 = load <2 x i32>* %B
+        %tmp5 = add <2 x i32> %tmp3, %tmp4
+        ret <2 x i32> %tmp5
+}
+
+define <16 x i8> @srsra16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: srsra16b:
+;CHECK: srsra.16b v0, {{v[0-9]+}}, #1
+        %tmp1 = load <16 x i8>* %A
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+        %tmp4 = load <16 x i8>* %B
+        %tmp5 = add <16 x i8> %tmp3, %tmp4
+         ret <16 x i8> %tmp5
+}
+
+define <8 x i16> @srsra8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: srsra8h:
+;CHECK: srsra.8h v0, {{v[0-9]+}}, #1
+        %tmp1 = load <8 x i16>* %A
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
+        %tmp4 = load <8 x i16>* %B
+        %tmp5 = add <8 x i16> %tmp3, %tmp4
+         ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @srsra4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: srsra4s:
+;CHECK: srsra.4s v0, {{v[0-9]+}}, #1
+        %tmp1 = load <4 x i32>* %A
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
+        %tmp4 = load <4 x i32>* %B
+        %tmp5 = add <4 x i32> %tmp3, %tmp4
+         ret <4 x i32> %tmp5
+}
+
+define <2 x i64> @srsra2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK-LABEL: srsra2d:
+;CHECK: srsra.2d v0, {{v[0-9]+}}, #1
+        %tmp1 = load <2 x i64>* %A
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
+        %tmp4 = load <2 x i64>* %B
+        %tmp5 = add <2 x i64> %tmp3, %tmp4
+         ret <2 x i64> %tmp5
+}
+
+define <8 x i8> @usra8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: usra8b:
+;CHECK: usra.8b v0, {{v[0-9]+}}, #1
+        %tmp1 = load <8 x i8>* %A
+        %tmp3 = lshr <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+        %tmp4 = load <8 x i8>* %B
+        %tmp5 = add <8 x i8> %tmp3, %tmp4
+        ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @usra4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: usra4h:
+;CHECK: usra.4h v0, {{v[0-9]+}}, #1
+        %tmp1 = load <4 x i16>* %A
+        %tmp3 = lshr <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
+        %tmp4 = load <4 x i16>* %B
+        %tmp5 = add <4 x i16> %tmp3, %tmp4
+        ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @usra2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: usra2s:
+;CHECK: usra.2s v0, {{v[0-9]+}}, #1
+        %tmp1 = load <2 x i32>* %A
+        %tmp3 = lshr <2 x i32> %tmp1, <i32 1, i32 1>
+        %tmp4 = load <2 x i32>* %B
+        %tmp5 = add <2 x i32> %tmp3, %tmp4
+        ret <2 x i32> %tmp5
+}
+
+define <16 x i8> @usra16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: usra16b:
+;CHECK: usra.16b v0, {{v[0-9]+}}, #1
+        %tmp1 = load <16 x i8>* %A
+        %tmp3 = lshr <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+        %tmp4 = load <16 x i8>* %B
+        %tmp5 = add <16 x i8> %tmp3, %tmp4
+         ret <16 x i8> %tmp5
+}
+
+define <8 x i16> @usra8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: usra8h:
+;CHECK: usra.8h v0, {{v[0-9]+}}, #1
+        %tmp1 = load <8 x i16>* %A
+        %tmp3 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+        %tmp4 = load <8 x i16>* %B
+        %tmp5 = add <8 x i16> %tmp3, %tmp4
+         ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @usra4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: usra4s:
+;CHECK: usra.4s v0, {{v[0-9]+}}, #1
+        %tmp1 = load <4 x i32>* %A
+        %tmp3 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
+        %tmp4 = load <4 x i32>* %B
+        %tmp5 = add <4 x i32> %tmp3, %tmp4
+         ret <4 x i32> %tmp5
+}
+
+define <2 x i64> @usra2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK-LABEL: usra2d:
+;CHECK: usra.2d v0, {{v[0-9]+}}, #1
+        %tmp1 = load <2 x i64>* %A
+        %tmp3 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
+        %tmp4 = load <2 x i64>* %B
+        %tmp5 = add <2 x i64> %tmp3, %tmp4
+         ret <2 x i64> %tmp5
+}
+
+define <8 x i8> @ssra8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: ssra8b:
+;CHECK: ssra.8b v0, {{v[0-9]+}}, #1
+        %tmp1 = load <8 x i8>* %A
+        %tmp3 = ashr <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+        %tmp4 = load <8 x i8>* %B
+        %tmp5 = add <8 x i8> %tmp3, %tmp4
+        ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @ssra4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: ssra4h:
+;CHECK: ssra.4h v0, {{v[0-9]+}}, #1
+        %tmp1 = load <4 x i16>* %A
+        %tmp3 = ashr <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
+        %tmp4 = load <4 x i16>* %B
+        %tmp5 = add <4 x i16> %tmp3, %tmp4
+        ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @ssra2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: ssra2s:
+;CHECK: ssra.2s v0, {{v[0-9]+}}, #1
+        %tmp1 = load <2 x i32>* %A
+        %tmp3 = ashr <2 x i32> %tmp1, <i32 1, i32 1>
+        %tmp4 = load <2 x i32>* %B
+        %tmp5 = add <2 x i32> %tmp3, %tmp4
+        ret <2 x i32> %tmp5
+}
+
+define <16 x i8> @ssra16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: ssra16b:
+;CHECK: ssra.16b v0, {{v[0-9]+}}, #1
+        %tmp1 = load <16 x i8>* %A
+        %tmp3 = ashr <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+        %tmp4 = load <16 x i8>* %B
+        %tmp5 = add <16 x i8> %tmp3, %tmp4
+         ret <16 x i8> %tmp5
+}
+
+define <8 x i16> @ssra8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: ssra8h:
+;CHECK: ssra.8h v0, {{v[0-9]+}}, #1
+        %tmp1 = load <8 x i16>* %A
+        %tmp3 = ashr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+        %tmp4 = load <8 x i16>* %B
+        %tmp5 = add <8 x i16> %tmp3, %tmp4
+         ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @ssra4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: ssra4s:
+;CHECK: ssra.4s v0, {{v[0-9]+}}, #1
+        %tmp1 = load <4 x i32>* %A
+        %tmp3 = ashr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
+        %tmp4 = load <4 x i32>* %B
+        %tmp5 = add <4 x i32> %tmp3, %tmp4
+         ret <4 x i32> %tmp5
+}
+
+define <2 x i64> @ssra2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK-LABEL: ssra2d:
+;CHECK: ssra.2d v0, {{v[0-9]+}}, #1
+        %tmp1 = load <2 x i64>* %A
+        %tmp3 = ashr <2 x i64> %tmp1, <i64 1, i64 1>
+        %tmp4 = load <2 x i64>* %B
+        %tmp5 = add <2 x i64> %tmp3, %tmp4
+         ret <2 x i64> %tmp5
+}
+
+define <8 x i8> @shr_orr8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: shr_orr8b:
+;CHECK: shr.8b v0, {{v[0-9]+}}, #1
+;CHECK-NEXT: orr.8b
+;CHECK-NEXT: ret
+        %tmp1 = load <8 x i8>* %A
+        %tmp4 = load <8 x i8>* %B
+        %tmp3 = lshr <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+        %tmp5 = or <8 x i8> %tmp3, %tmp4
+        ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @shr_orr4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: shr_orr4h:
+;CHECK: shr.4h v0, {{v[0-9]+}}, #1
+;CHECK-NEXT: orr.8b
+;CHECK-NEXT: ret
+        %tmp1 = load <4 x i16>* %A
+        %tmp4 = load <4 x i16>* %B
+        %tmp3 = lshr <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
+        %tmp5 = or <4 x i16> %tmp3, %tmp4
+        ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @shr_orr2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: shr_orr2s:
+;CHECK: shr.2s v0, {{v[0-9]+}}, #1
+;CHECK-NEXT: orr.8b
+;CHECK-NEXT: ret
+        %tmp1 = load <2 x i32>* %A
+        %tmp4 = load <2 x i32>* %B
+        %tmp3 = lshr <2 x i32> %tmp1, <i32 1, i32 1>
+        %tmp5 = or <2 x i32> %tmp3, %tmp4
+        ret <2 x i32> %tmp5
+}
+
+define <16 x i8> @shr_orr16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: shr_orr16b:
+;CHECK: shr.16b v0, {{v[0-9]+}}, #1
+;CHECK-NEXT: orr.16b
+;CHECK-NEXT: ret
+        %tmp1 = load <16 x i8>* %A
+        %tmp4 = load <16 x i8>* %B
+        %tmp3 = lshr <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+        %tmp5 = or <16 x i8> %tmp3, %tmp4
+         ret <16 x i8> %tmp5
+}
+
+define <8 x i16> @shr_orr8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: shr_orr8h:
+;CHECK: shr.8h v0, {{v[0-9]+}}, #1
+;CHECK-NEXT: orr.16b
+;CHECK-NEXT: ret
+        %tmp1 = load <8 x i16>* %A
+        %tmp4 = load <8 x i16>* %B
+        %tmp3 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+        %tmp5 = or <8 x i16> %tmp3, %tmp4
+         ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @shr_orr4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: shr_orr4s:
+;CHECK: shr.4s v0, {{v[0-9]+}}, #1
+;CHECK-NEXT: orr.16b
+;CHECK-NEXT: ret
+        %tmp1 = load <4 x i32>* %A
+        %tmp4 = load <4 x i32>* %B
+        %tmp3 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
+        %tmp5 = or <4 x i32> %tmp3, %tmp4
+         ret <4 x i32> %tmp5
+}
+
+define <2 x i64> @shr_orr2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK-LABEL: shr_orr2d:
+;CHECK: shr.2d v0, {{v[0-9]+}}, #1
+;CHECK-NEXT: orr.16b
+;CHECK-NEXT: ret
+        %tmp1 = load <2 x i64>* %A
+        %tmp4 = load <2 x i64>* %B
+        %tmp3 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
+        %tmp5 = or <2 x i64> %tmp3, %tmp4
+         ret <2 x i64> %tmp5
+}
+
+define <8 x i8> @shl_orr8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: shl_orr8b:
+;CHECK: shl.8b v0, {{v[0-9]+}}, #1
+;CHECK-NEXT: orr.8b
+;CHECK-NEXT: ret
+        %tmp1 = load <8 x i8>* %A
+        %tmp4 = load <8 x i8>* %B
+        %tmp3 = shl <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+        %tmp5 = or <8 x i8> %tmp3, %tmp4
+        ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @shl_orr4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: shl_orr4h:
+;CHECK: shl.4h v0, {{v[0-9]+}}, #1
+;CHECK-NEXT: orr.8b
+;CHECK-NEXT: ret
+        %tmp1 = load <4 x i16>* %A
+        %tmp4 = load <4 x i16>* %B
+        %tmp3 = shl <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
+        %tmp5 = or <4 x i16> %tmp3, %tmp4
+        ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @shl_orr2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: shl_orr2s:
+;CHECK: shl.2s v0, {{v[0-9]+}}, #1
+;CHECK-NEXT: orr.8b
+;CHECK-NEXT: ret
+        %tmp1 = load <2 x i32>* %A
+        %tmp4 = load <2 x i32>* %B
+        %tmp3 = shl <2 x i32> %tmp1, <i32 1, i32 1>
+        %tmp5 = or <2 x i32> %tmp3, %tmp4
+        ret <2 x i32> %tmp5
+}
+
+define <16 x i8> @shl_orr16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: shl_orr16b:
+;CHECK: shl.16b v0, {{v[0-9]+}}, #1
+;CHECK-NEXT: orr.16b
+;CHECK-NEXT: ret
+        %tmp1 = load <16 x i8>* %A
+        %tmp4 = load <16 x i8>* %B
+        %tmp3 = shl <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+        %tmp5 = or <16 x i8> %tmp3, %tmp4
+         ret <16 x i8> %tmp5
+}
+
+define <8 x i16> @shl_orr8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: shl_orr8h:
+;CHECK: shl.8h v0, {{v[0-9]+}}, #1
+;CHECK-NEXT: orr.16b
+;CHECK-NEXT: ret
+        %tmp1 = load <8 x i16>* %A
+        %tmp4 = load <8 x i16>* %B
+        %tmp3 = shl <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+        %tmp5 = or <8 x i16> %tmp3, %tmp4
+         ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @shl_orr4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: shl_orr4s:
+;CHECK: shl.4s v0, {{v[0-9]+}}, #1
+;CHECK-NEXT: orr.16b
+;CHECK-NEXT: ret
+        %tmp1 = load <4 x i32>* %A
+        %tmp4 = load <4 x i32>* %B
+        %tmp3 = shl <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
+        %tmp5 = or <4 x i32> %tmp3, %tmp4
+         ret <4 x i32> %tmp5
+}
+
+define <2 x i64> @shl_orr2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK-LABEL: shl_orr2d:
+;CHECK: shl.2d v0, {{v[0-9]+}}, #1
+;CHECK-NEXT: orr.16b
+;CHECK-NEXT: ret
+        %tmp1 = load <2 x i64>* %A
+        %tmp4 = load <2 x i64>* %B
+        %tmp3 = shl <2 x i64> %tmp1, <i64 1, i64 1>
+        %tmp5 = or <2 x i64> %tmp3, %tmp4
+         ret <2 x i64> %tmp5
+}
+
+define <8 x i16> @shll(<8 x i8> %in) {
+; CHECK-LABEL: shll:
+; CHECK: shll.8h v0, {{v[0-9]+}}, #8
+  %ext = zext <8 x i8> %in to <8 x i16>
+  %res = shl <8 x i16> %ext, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  ret <8 x i16> %res
+}
+
+define <4 x i32> @shll_high(<8 x i16> %in) {
+; CHECK-LABEL: shll_high
+; CHECK: shll2.4s v0, {{v[0-9]+}}, #16
+  %extract = shufflevector <8 x i16> %in, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %ext = zext <4 x i16> %extract to <4 x i32>
+  %res = shl <4 x i32> %ext, <i32 16, i32 16, i32 16, i32 16>
+  ret <4 x i32> %res
+}
+
+define <8 x i8> @sli8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: sli8b:
+;CHECK: sli.8b v0, {{v[0-9]+}}, #1
+        %tmp1 = load <8 x i8>* %A
+        %tmp2 = load <8 x i8>* %B
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, i32 1)
+        ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @sli4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: sli4h:
+;CHECK: sli.4h v0, {{v[0-9]+}}, #1
+        %tmp1 = load <4 x i16>* %A
+        %tmp2 = load <4 x i16>* %B
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, i32 1)
+        ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @sli2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: sli2s:
+;CHECK: sli.2s v0, {{v[0-9]+}}, #1
+        %tmp1 = load <2 x i32>* %A
+        %tmp2 = load <2 x i32>* %B
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, i32 1)
+        ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @sli1d(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK-LABEL: sli1d:
+;CHECK: sli d0, {{d[0-9]+}}, #1
+        %tmp1 = load <1 x i64>* %A
+        %tmp2 = load <1 x i64>* %B
+        %tmp3 = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2, i32 1)
+        ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @sli16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: sli16b:
+;CHECK: sli.16b v0, {{v[0-9]+}}, #1
+        %tmp1 = load <16 x i8>* %A
+        %tmp2 = load <16 x i8>* %B
+        %tmp3 = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, i32 1)
+        ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @sli8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: sli8h:
+;CHECK: sli.8h v0, {{v[0-9]+}}, #1
+        %tmp1 = load <8 x i16>* %A
+        %tmp2 = load <8 x i16>* %B
+        %tmp3 = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, i32 1)
+        ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @sli4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: sli4s:
+;CHECK: sli.4s v0, {{v[0-9]+}}, #1
+        %tmp1 = load <4 x i32>* %A
+        %tmp2 = load <4 x i32>* %B
+        %tmp3 = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, i32 1)
+        ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @sli2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK-LABEL: sli2d:
+;CHECK: sli.2d v0, {{v[0-9]+}}, #1
+        %tmp1 = load <2 x i64>* %A
+        %tmp2 = load <2 x i64>* %B
+        %tmp3 = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2, i32 1)
+        ret <2 x i64> %tmp3
+}
+
+declare <8 x i8>  @llvm.aarch64.neon.vsli.v8i8(<8 x i8>, <8 x i8>, i32) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16>, <4 x i16>, i32) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32>, <2 x i32>, i32) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64>, <1 x i64>, i32) nounwind readnone
+
+declare <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8>, <16 x i8>, i32) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16>, <8 x i16>, i32) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32>, <4 x i32>, i32) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64>, <2 x i64>, i32) nounwind readnone
+
+define <1 x i64> @ashr_v1i64(<1 x i64> %a, <1 x i64> %b) {
+; CHECK-LABEL: ashr_v1i64:
+; CHECK: neg d{{[0-9]+}}, d{{[0-9]+}}
+; CHECK: sshl d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %c = ashr <1 x i64> %a, %b
+  ret <1 x i64> %c
+}
diff --git a/test/CodeGen/AArch64/arm64-vshr.ll b/test/CodeGen/AArch64/arm64-vshr.ll
new file mode 100644
index 0000000..21eb579
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-vshr.ll
@@ -0,0 +1,63 @@
+; RUN: llc -march=arm64 -aarch64-neon-syntax=apple < %s -mcpu=cyclone | FileCheck %s
+
+define <8 x i16> @testShiftRightArith_v8i16(<8 x i16> %a, <8 x i16> %b) #0 {
+; CHECK-LABEL: testShiftRightArith_v8i16:
+; CHECK: neg.8h	[[REG1:v[0-9]+]], [[REG1]]
+; CHECK-NEXT: sshl.8h [[REG2:v[0-9]+]], [[REG2]], [[REG1]]
+
+entry:
+  %a.addr = alloca <8 x i16>, align 16
+  %b.addr = alloca <8 x i16>, align 16
+  store <8 x i16> %a, <8 x i16>* %a.addr, align 16
+  store <8 x i16> %b, <8 x i16>* %b.addr, align 16
+  %0 = load <8 x i16>* %a.addr, align 16
+  %1 = load <8 x i16>* %b.addr, align 16
+  %shr = ashr <8 x i16> %0, %1
+  ret <8 x i16> %shr
+}
+
+define <4 x i32> @testShiftRightArith_v4i32(<4 x i32> %a, <4 x i32> %b) #0 {
+; CHECK-LABEL: testShiftRightArith_v4i32:
+; CHECK: neg.4s	[[REG3:v[0-9]+]], [[REG3]]
+; CHECK-NEXT: sshl.4s [[REG4:v[0-9]+]], [[REG4]], [[REG3]]
+entry:
+  %a.addr = alloca <4 x i32>, align 32
+  %b.addr = alloca <4 x i32>, align 32
+  store <4 x i32> %a, <4 x i32>* %a.addr, align 32
+  store <4 x i32> %b, <4 x i32>* %b.addr, align 32
+  %0 = load <4 x i32>* %a.addr, align 32
+  %1 = load <4 x i32>* %b.addr, align 32
+  %shr = ashr <4 x i32> %0, %1
+  ret <4 x i32> %shr
+}
+
+define <8 x i16> @testShiftRightLogical(<8 x i16> %a, <8 x i16> %b) #0 {
+; CHECK: testShiftRightLogical
+; CHECK: neg.8h	[[REG5:v[0-9]+]], [[REG5]]
+; CHECK-NEXT: ushl.8h [[REG6:v[0-9]+]], [[REG6]], [[REG5]]
+entry:
+  %a.addr = alloca <8 x i16>, align 16
+  %b.addr = alloca <8 x i16>, align 16
+  store <8 x i16> %a, <8 x i16>* %a.addr, align 16
+  store <8 x i16> %b, <8 x i16>* %b.addr, align 16
+  %0 = load <8 x i16>* %a.addr, align 16
+  %1 = load <8 x i16>* %b.addr, align 16
+  %shr = lshr <8 x i16> %0, %1
+  ret <8 x i16> %shr
+}
+
+define <1 x i64> @sshr_v1i64(<1 x i64> %A) nounwind {
+; CHECK-LABEL: sshr_v1i64:
+; CHECK: sshr d0, d0, #63
+  %tmp3 = ashr <1 x i64> %A, < i64 63 >
+  ret <1 x i64> %tmp3
+}
+
+define <1 x i64> @ushr_v1i64(<1 x i64> %A) nounwind {
+; CHECK-LABEL: ushr_v1i64:
+; CHECK: ushr d0, d0, #63
+  %tmp3 = lshr <1 x i64> %A, < i64 63 >
+  ret <1 x i64> %tmp3
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AArch64/arm64-vshuffle.ll b/test/CodeGen/AArch64/arm64-vshuffle.ll
new file mode 100644
index 0000000..62fd961
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-vshuffle.ll
@@ -0,0 +1,115 @@
+; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -mcpu=cyclone | FileCheck %s
+
+
+; The mask:
+; CHECK: lCPI0_0:
+; CHECK:  .byte   2                       ; 0x2
+; CHECK:  .byte   255                     ; 0xff
+; CHECK:  .byte   6                       ; 0x6
+; CHECK:  .byte   255                     ; 0xff
+; The second vector is legalized to undef and the elements of the first vector
+; are used instead.
+; CHECK:  .byte   2                       ; 0x2
+; CHECK:  .byte   4                       ; 0x4
+; CHECK:  .byte   6                       ; 0x6
+; CHECK:  .byte   0                       ; 0x0
+; CHECK: test1
+; CHECK: ldr d[[REG0:[0-9]+]], [{{.*}}, lCPI0_0
+; CHECK: movi.8h v[[REG1:[0-9]+]], #0x1, lsl #8
+; CHECK: tbl.8b  v{{[0-9]+}}, { v[[REG1]] }, v[[REG0]]
+define <8 x i1> @test1() {
+entry:
+  %Shuff = shufflevector <8 x i1> <i1 0, i1 1, i1 2, i1 3, i1 4, i1 5, i1 6,
+                                   i1 7>,
+                         <8 x i1> <i1 0, i1 1, i1 2, i1 3, i1 4, i1 5, i1 6,
+                                   i1 7>,
+                         <8 x i32> <i32 2, i32 undef, i32 6, i32 undef, i32 10,
+                                    i32 12, i32 14, i32 0>
+  ret <8 x i1> %Shuff
+}
+
+; CHECK: lCPI1_0:
+; CHECK:          .byte   2                       ; 0x2
+; CHECK:          .byte   255                     ; 0xff
+; CHECK:          .byte   6                       ; 0x6
+; CHECK:          .byte   255                     ; 0xff
+; CHECK:          .byte   10                      ; 0xa
+; CHECK:          .byte   12                      ; 0xc
+; CHECK:          .byte   14                      ; 0xe
+; CHECK:          .byte   0                       ; 0x0
+; CHECK: test2
+; CHECK: ldr     d[[REG0:[0-9]+]], [{{.*}}, lCPI1_0@PAGEOFF]
+; CHECK: adrp    x[[REG2:[0-9]+]], lCPI1_1@PAGE
+; CHECK: ldr     q[[REG1:[0-9]+]], [x[[REG2]], lCPI1_1@PAGEOFF]
+; CHECK: tbl.8b  v{{[0-9]+}}, { v[[REG1]] }, v[[REG0]]
+define <8 x i1>@test2() {
+bb:
+  %Shuff = shufflevector <8 x i1> zeroinitializer,
+     <8 x i1> <i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0>,
+     <8 x i32> <i32 2, i32 undef, i32 6, i32 undef, i32 10, i32 12, i32 14,
+                i32 0>
+  ret <8 x i1> %Shuff
+}
+
+; CHECK: lCPI2_0:
+; CHECK:         .byte   2                       ; 0x2
+; CHECK:         .byte   255                     ; 0xff
+; CHECK:         .byte   6                       ; 0x6
+; CHECK:         .byte   255                     ; 0xff
+; CHECK:         .byte   10                      ; 0xa
+; CHECK:         .byte   12                      ; 0xc
+; CHECK:         .byte   14                      ; 0xe
+; CHECK:         .byte   0                       ; 0x0
+; CHECK:         .byte   2                       ; 0x2
+; CHECK:         .byte   255                     ; 0xff
+; CHECK:         .byte   6                       ; 0x6
+; CHECK:         .byte   255                     ; 0xff
+; CHECK:         .byte   10                      ; 0xa
+; CHECK:         .byte   12                      ; 0xc
+; CHECK:         .byte   14                      ; 0xe
+; CHECK:         .byte   0                       ; 0x0
+; CHECK: test3
+; CHECK: adrp    x[[REG3:[0-9]+]], lCPI2_0@PAGE
+; CHECK: ldr     q[[REG0:[0-9]+]], [x[[REG3]], lCPI2_0@PAGEOFF]
+; CHECK: ldr     q[[REG1:[0-9]+]], [x[[REG3]], lCPI2_1@PAGEOFF]
+; CHECK: tbl.16b v{{[0-9]+}}, { v[[REG1]] }, v[[REG0]]
+define <16 x i1> @test3(i1* %ptr, i32 %v) {
+bb:
+  %Shuff = shufflevector <16 x i1> <i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0>, <16 x i1> undef,
+     <16 x i32> <i32 2, i32 undef, i32 6, i32 undef, i32 10, i32 12, i32 14,
+                 i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 10, i32 12,
+                 i32 14, i32 0>
+  ret <16 x i1> %Shuff
+}
+; CHECK: lCPI3_1:
+; CHECK:         .byte   2                       ; 0x2
+; CHECK:         .byte   1                       ; 0x1
+; CHECK:         .byte   6                       ; 0x6
+; CHECK:         .byte   18                      ; 0x12
+; CHECK:         .byte   10                      ; 0xa
+; CHECK:         .byte   12                      ; 0xc
+; CHECK:         .byte   14                      ; 0xe
+; CHECK:         .byte   0                       ; 0x0
+; CHECK:         .byte   2                       ; 0x2
+; CHECK:         .byte   31                      ; 0x1f
+; CHECK:         .byte   6                       ; 0x6
+; CHECK:         .byte   30                      ; 0x1e
+; CHECK:         .byte   10                      ; 0xa
+; CHECK:         .byte   12                      ; 0xc
+; CHECK:         .byte   14                      ; 0xe
+; CHECK:         .byte   0                       ; 0x0
+; CHECK: _test4:
+; CHECK:         ldr     q[[REG1:[0-9]+]]
+; CHECK:         movi.2d v[[REG0:[0-9]+]], #0000000000000000
+; CHECK:         adrp    x[[REG3:[0-9]+]], lCPI3_1@PAGE
+; CHECK:         ldr     q[[REG2:[0-9]+]], [x[[REG3]], lCPI3_1@PAGEOFF]
+; CHECK:         tbl.16b v{{[0-9]+}}, { v[[REG0]], v[[REG1]] }, v[[REG2]]
+define <16 x i1> @test4(i1* %ptr, i32 %v) {
+bb:
+  %Shuff = shufflevector <16 x i1> zeroinitializer,
+     <16 x i1> <i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1,
+                i1 1, i1 0, i1 0, i1 1, i1 0, i1 0>,
+     <16 x i32> <i32 2, i32 1, i32 6, i32 18, i32 10, i32 12, i32 14, i32 0,
+                 i32 2, i32 31, i32 6, i32 30, i32 10, i32 12, i32 14, i32 0>
+  ret <16 x i1> %Shuff
+}
diff --git a/test/CodeGen/AArch64/arm64-vsqrt.ll b/test/CodeGen/AArch64/arm64-vsqrt.ll
new file mode 100644
index 0000000..02b7c7e
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-vsqrt.ll
@@ -0,0 +1,232 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+define <2 x float> @frecps_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK-LABEL: frecps_2s:
+;CHECK: frecps.2s
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x float> @llvm.aarch64.neon.frecps.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x float> %tmp3
+}
+
+define <4 x float> @frecps_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK-LABEL: frecps_4s:
+;CHECK: frecps.4s
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = call <4 x float> @llvm.aarch64.neon.frecps.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	ret <4 x float> %tmp3
+}
+
+define <2 x double> @frecps_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
+;CHECK-LABEL: frecps_2d:
+;CHECK: frecps.2d
+	%tmp1 = load <2 x double>* %A
+	%tmp2 = load <2 x double>* %B
+	%tmp3 = call <2 x double> @llvm.aarch64.neon.frecps.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
+	ret <2 x double> %tmp3
+}
+
+declare <2 x float> @llvm.aarch64.neon.frecps.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.frecps.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.frecps.v2f64(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <2 x float> @frsqrts_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK-LABEL: frsqrts_2s:
+;CHECK: frsqrts.2s
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x float> @llvm.aarch64.neon.frsqrts.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x float> %tmp3
+}
+
+define <4 x float> @frsqrts_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK-LABEL: frsqrts_4s:
+;CHECK: frsqrts.4s
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = call <4 x float> @llvm.aarch64.neon.frsqrts.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	ret <4 x float> %tmp3
+}
+
+define <2 x double> @frsqrts_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
+;CHECK-LABEL: frsqrts_2d:
+;CHECK: frsqrts.2d
+	%tmp1 = load <2 x double>* %A
+	%tmp2 = load <2 x double>* %B
+	%tmp3 = call <2 x double> @llvm.aarch64.neon.frsqrts.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
+	ret <2 x double> %tmp3
+}
+
+declare <2 x float> @llvm.aarch64.neon.frsqrts.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.frsqrts.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.frsqrts.v2f64(<2 x double>, <2 x double>) nounwind readnone
+
+define <2 x float> @frecpe_2s(<2 x float>* %A) nounwind {
+;CHECK-LABEL: frecpe_2s:
+;CHECK: frecpe.2s
+	%tmp1 = load <2 x float>* %A
+	%tmp3 = call <2 x float> @llvm.aarch64.neon.frecpe.v2f32(<2 x float> %tmp1)
+	ret <2 x float> %tmp3
+}
+
+define <4 x float> @frecpe_4s(<4 x float>* %A) nounwind {
+;CHECK-LABEL: frecpe_4s:
+;CHECK: frecpe.4s
+	%tmp1 = load <4 x float>* %A
+	%tmp3 = call <4 x float> @llvm.aarch64.neon.frecpe.v4f32(<4 x float> %tmp1)
+	ret <4 x float> %tmp3
+}
+
+define <2 x double> @frecpe_2d(<2 x double>* %A) nounwind {
+;CHECK-LABEL: frecpe_2d:
+;CHECK: frecpe.2d
+	%tmp1 = load <2 x double>* %A
+	%tmp3 = call <2 x double> @llvm.aarch64.neon.frecpe.v2f64(<2 x double> %tmp1)
+	ret <2 x double> %tmp3
+}
+
+define float @frecpe_s(float* %A) nounwind {
+;CHECK-LABEL: frecpe_s:
+;CHECK: frecpe s0, {{s[0-9]+}}
+  %tmp1 = load float* %A
+  %tmp3 = call float @llvm.aarch64.neon.frecpe.f32(float %tmp1)
+  ret float %tmp3
+}
+
+define double @frecpe_d(double* %A) nounwind {
+;CHECK-LABEL: frecpe_d:
+;CHECK: frecpe d0, {{d[0-9]+}}
+  %tmp1 = load double* %A
+  %tmp3 = call double @llvm.aarch64.neon.frecpe.f64(double %tmp1)
+  ret double %tmp3
+}
+
+declare <2 x float> @llvm.aarch64.neon.frecpe.v2f32(<2 x float>) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.frecpe.v4f32(<4 x float>) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.frecpe.v2f64(<2 x double>) nounwind readnone
+declare float @llvm.aarch64.neon.frecpe.f32(float) nounwind readnone
+declare double @llvm.aarch64.neon.frecpe.f64(double) nounwind readnone
+
+define float @frecpx_s(float* %A) nounwind {
+;CHECK-LABEL: frecpx_s:
+;CHECK: frecpx s0, {{s[0-9]+}}
+  %tmp1 = load float* %A
+  %tmp3 = call float @llvm.aarch64.neon.frecpx.f32(float %tmp1)
+  ret float %tmp3
+}
+
+define double @frecpx_d(double* %A) nounwind {
+;CHECK-LABEL: frecpx_d:
+;CHECK: frecpx d0, {{d[0-9]+}}
+  %tmp1 = load double* %A
+  %tmp3 = call double @llvm.aarch64.neon.frecpx.f64(double %tmp1)
+  ret double %tmp3
+}
+
+declare float @llvm.aarch64.neon.frecpx.f32(float) nounwind readnone
+declare double @llvm.aarch64.neon.frecpx.f64(double) nounwind readnone
+
+define <2 x float> @frsqrte_2s(<2 x float>* %A) nounwind {
+;CHECK-LABEL: frsqrte_2s:
+;CHECK: frsqrte.2s
+	%tmp1 = load <2 x float>* %A
+	%tmp3 = call <2 x float> @llvm.aarch64.neon.frsqrte.v2f32(<2 x float> %tmp1)
+	ret <2 x float> %tmp3
+}
+
+define <4 x float> @frsqrte_4s(<4 x float>* %A) nounwind {
+;CHECK-LABEL: frsqrte_4s:
+;CHECK: frsqrte.4s
+	%tmp1 = load <4 x float>* %A
+	%tmp3 = call <4 x float> @llvm.aarch64.neon.frsqrte.v4f32(<4 x float> %tmp1)
+	ret <4 x float> %tmp3
+}
+
+define <2 x double> @frsqrte_2d(<2 x double>* %A) nounwind {
+;CHECK-LABEL: frsqrte_2d:
+;CHECK: frsqrte.2d
+	%tmp1 = load <2 x double>* %A
+	%tmp3 = call <2 x double> @llvm.aarch64.neon.frsqrte.v2f64(<2 x double> %tmp1)
+	ret <2 x double> %tmp3
+}
+
+define float @frsqrte_s(float* %A) nounwind {
+;CHECK-LABEL: frsqrte_s:
+;CHECK: frsqrte s0, {{s[0-9]+}}
+  %tmp1 = load float* %A
+  %tmp3 = call float @llvm.aarch64.neon.frsqrte.f32(float %tmp1)
+  ret float %tmp3
+}
+
+define double @frsqrte_d(double* %A) nounwind {
+;CHECK-LABEL: frsqrte_d:
+;CHECK: frsqrte d0, {{d[0-9]+}}
+  %tmp1 = load double* %A
+  %tmp3 = call double @llvm.aarch64.neon.frsqrte.f64(double %tmp1)
+  ret double %tmp3
+}
+
+declare <2 x float> @llvm.aarch64.neon.frsqrte.v2f32(<2 x float>) nounwind readnone
+declare <4 x float> @llvm.aarch64.neon.frsqrte.v4f32(<4 x float>) nounwind readnone
+declare <2 x double> @llvm.aarch64.neon.frsqrte.v2f64(<2 x double>) nounwind readnone
+declare float @llvm.aarch64.neon.frsqrte.f32(float) nounwind readnone
+declare double @llvm.aarch64.neon.frsqrte.f64(double) nounwind readnone
+
+define <2 x i32> @urecpe_2s(<2 x i32>* %A) nounwind {
+;CHECK-LABEL: urecpe_2s:
+;CHECK: urecpe.2s
+	%tmp1 = load <2 x i32>* %A
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.urecpe.v2i32(<2 x i32> %tmp1)
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @urecpe_4s(<4 x i32>* %A) nounwind {
+;CHECK-LABEL: urecpe_4s:
+;CHECK: urecpe.4s
+	%tmp1 = load <4 x i32>* %A
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.urecpe.v4i32(<4 x i32> %tmp1)
+	ret <4 x i32> %tmp3
+}
+
+declare <2 x i32> @llvm.aarch64.neon.urecpe.v2i32(<2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.urecpe.v4i32(<4 x i32>) nounwind readnone
+
+define <2 x i32> @ursqrte_2s(<2 x i32>* %A) nounwind {
+;CHECK-LABEL: ursqrte_2s:
+;CHECK: ursqrte.2s
+	%tmp1 = load <2 x i32>* %A
+	%tmp3 = call <2 x i32> @llvm.aarch64.neon.ursqrte.v2i32(<2 x i32> %tmp1)
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @ursqrte_4s(<4 x i32>* %A) nounwind {
+;CHECK-LABEL: ursqrte_4s:
+;CHECK: ursqrte.4s
+	%tmp1 = load <4 x i32>* %A
+	%tmp3 = call <4 x i32> @llvm.aarch64.neon.ursqrte.v4i32(<4 x i32> %tmp1)
+	ret <4 x i32> %tmp3
+}
+
+declare <2 x i32> @llvm.aarch64.neon.ursqrte.v2i32(<2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.ursqrte.v4i32(<4 x i32>) nounwind readnone
+
+define float @f1(float %a, float %b) nounwind readnone optsize ssp {
+; CHECK-LABEL: f1:
+; CHECK: frsqrts s0, s0, s1
+; CHECK-NEXT: ret
+  %vrsqrtss.i = tail call float @llvm.aarch64.neon.frsqrts.f32(float %a, float %b) nounwind
+  ret float %vrsqrtss.i
+}
+
+define double @f2(double %a, double %b) nounwind readnone optsize ssp {
+; CHECK-LABEL: f2:
+; CHECK: frsqrts d0, d0, d1
+; CHECK-NEXT: ret
+  %vrsqrtsd.i = tail call double @llvm.aarch64.neon.frsqrts.f64(double %a, double %b) nounwind
+  ret double %vrsqrtsd.i
+}
+
+declare double @llvm.aarch64.neon.frsqrts.f64(double, double) nounwind readnone
+declare float @llvm.aarch64.neon.frsqrts.f32(float, float) nounwind readnone
diff --git a/test/CodeGen/AArch64/arm64-vsra.ll b/test/CodeGen/AArch64/arm64-vsra.ll
new file mode 100644
index 0000000..5e9cef3
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-vsra.ll
@@ -0,0 +1,150 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+define <8 x i8> @vsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: vsras8:
+;CHECK: ssra.8b
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = ashr <8 x i8> %tmp2, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
+        %tmp4 = add <8 x i8> %tmp1, %tmp3
+	ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vsras16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: vsras16:
+;CHECK: ssra.4h
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = ashr <4 x i16> %tmp2, < i16 15, i16 15, i16 15, i16 15 >
+        %tmp4 = add <4 x i16> %tmp1, %tmp3
+	ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vsras32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: vsras32:
+;CHECK: ssra.2s
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = ashr <2 x i32> %tmp2, < i32 31, i32 31 >
+        %tmp4 = add <2 x i32> %tmp1, %tmp3
+	ret <2 x i32> %tmp4
+}
+
+define <16 x i8> @vsraQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: vsraQs8:
+;CHECK: ssra.16b
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = ashr <16 x i8> %tmp2, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
+        %tmp4 = add <16 x i8> %tmp1, %tmp3
+	ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vsraQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: vsraQs16:
+;CHECK: ssra.8h
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = ashr <8 x i16> %tmp2, < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >
+        %tmp4 = add <8 x i16> %tmp1, %tmp3
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vsraQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: vsraQs32:
+;CHECK: ssra.4s
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = ashr <4 x i32> %tmp2, < i32 31, i32 31, i32 31, i32 31 >
+        %tmp4 = add <4 x i32> %tmp1, %tmp3
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vsraQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK-LABEL: vsraQs64:
+;CHECK: ssra.2d
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = ashr <2 x i64> %tmp2, < i64 63, i64 63 >
+        %tmp4 = add <2 x i64> %tmp1, %tmp3
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @vsrau8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: vsrau8:
+;CHECK: usra.8b
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = lshr <8 x i8> %tmp2, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
+        %tmp4 = add <8 x i8> %tmp1, %tmp3
+	ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vsrau16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: vsrau16:
+;CHECK: usra.4h
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = lshr <4 x i16> %tmp2, < i16 15, i16 15, i16 15, i16 15 >
+        %tmp4 = add <4 x i16> %tmp1, %tmp3
+	ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vsrau32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: vsrau32:
+;CHECK: usra.2s
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = lshr <2 x i32> %tmp2, < i32 31, i32 31 >
+        %tmp4 = add <2 x i32> %tmp1, %tmp3
+	ret <2 x i32> %tmp4
+}
+
+
+define <16 x i8> @vsraQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: vsraQu8:
+;CHECK: usra.16b
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = lshr <16 x i8> %tmp2, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
+        %tmp4 = add <16 x i8> %tmp1, %tmp3
+	ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vsraQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: vsraQu16:
+;CHECK: usra.8h
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = lshr <8 x i16> %tmp2, < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >
+        %tmp4 = add <8 x i16> %tmp1, %tmp3
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vsraQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: vsraQu32:
+;CHECK: usra.4s
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = lshr <4 x i32> %tmp2, < i32 31, i32 31, i32 31, i32 31 >
+        %tmp4 = add <4 x i32> %tmp1, %tmp3
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vsraQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK-LABEL: vsraQu64:
+;CHECK: usra.2d
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = lshr <2 x i64> %tmp2, < i64 63, i64 63 >
+        %tmp4 = add <2 x i64> %tmp1, %tmp3
+	ret <2 x i64> %tmp4
+}
+
+define <1 x i64> @vsra_v1i64(<1 x i64> %A, <1 x i64> %B) nounwind {
+; CHECK-LABEL: vsra_v1i64:
+; CHECK: ssra d0, d1, #63
+  %tmp3 = ashr <1 x i64> %B, < i64 63 >
+  %tmp4 = add <1 x i64> %A, %tmp3
+  ret <1 x i64> %tmp4
+}
diff --git a/test/CodeGen/AArch64/arm64-vsub.ll b/test/CodeGen/AArch64/arm64-vsub.ll
new file mode 100644
index 0000000..c2c8755
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-vsub.ll
@@ -0,0 +1,417 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+define <8 x i8> @subhn8b(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: subhn8b:
+;CHECK: subhn.8b
+        %tmp1 = load <8 x i16>* %A
+        %tmp2 = load <8 x i16>* %B
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.subhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
+        ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @subhn4h(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: subhn4h:
+;CHECK: subhn.4h
+        %tmp1 = load <4 x i32>* %A
+        %tmp2 = load <4 x i32>* %B
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.subhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
+        ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @subhn2s(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK-LABEL: subhn2s:
+;CHECK: subhn.2s
+        %tmp1 = load <2 x i64>* %A
+        %tmp2 = load <2 x i64>* %B
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.subhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
+        ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @subhn2_16b(<8 x i16> %a, <8 x i16> %b) nounwind  {
+;CHECK-LABEL: subhn2_16b:
+;CHECK: subhn.8b
+;CHECK-NEXT: subhn2.16b
+  %vsubhn2.i = tail call <8 x i8> @llvm.aarch64.neon.subhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind
+  %vsubhn_high2.i = tail call <8 x i8> @llvm.aarch64.neon.subhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind
+  %res = shufflevector <8 x i8> %vsubhn2.i, <8 x i8> %vsubhn_high2.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret <16 x i8> %res
+}
+
+define <8 x i16> @subhn2_8h(<4 x i32> %a, <4 x i32> %b) nounwind  {
+;CHECK-LABEL: subhn2_8h:
+;CHECK: subhn.4h
+;CHECK-NEXT: subhn2.8h
+  %vsubhn2.i = tail call <4 x i16> @llvm.aarch64.neon.subhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind
+  %vsubhn_high3.i = tail call <4 x i16> @llvm.aarch64.neon.subhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind
+  %res = shufflevector <4 x i16> %vsubhn2.i, <4 x i16> %vsubhn_high3.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i16> %res
+}
+
+define <4 x i32> @subhn2_4s(<2 x i64> %a, <2 x i64> %b) nounwind  {
+;CHECK-LABEL: subhn2_4s:
+;CHECK: subhn.2s
+;CHECK-NEXT: subhn2.4s
+  %vsubhn2.i = tail call <2 x i32> @llvm.aarch64.neon.subhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind
+  %vsubhn_high3.i = tail call <2 x i32> @llvm.aarch64.neon.subhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind
+  %res = shufflevector <2 x i32> %vsubhn2.i, <2 x i32> %vsubhn_high3.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i32> %res
+}
+
+declare <2 x i32> @llvm.aarch64.neon.subhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.subhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.subhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i8> @rsubhn8b(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: rsubhn8b:
+;CHECK: rsubhn.8b
+        %tmp1 = load <8 x i16>* %A
+        %tmp2 = load <8 x i16>* %B
+        %tmp3 = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
+        ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @rsubhn4h(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: rsubhn4h:
+;CHECK: rsubhn.4h
+        %tmp1 = load <4 x i32>* %A
+        %tmp2 = load <4 x i32>* %B
+        %tmp3 = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
+        ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @rsubhn2s(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK-LABEL: rsubhn2s:
+;CHECK: rsubhn.2s
+        %tmp1 = load <2 x i64>* %A
+        %tmp2 = load <2 x i64>* %B
+        %tmp3 = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
+        ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @rsubhn2_16b(<8 x i16> %a, <8 x i16> %b) nounwind  {
+;CHECK-LABEL: rsubhn2_16b:
+;CHECK: rsubhn.8b
+;CHECK-NEXT: rsubhn2.16b
+  %vrsubhn2.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind
+  %vrsubhn_high2.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind
+  %res = shufflevector <8 x i8> %vrsubhn2.i, <8 x i8> %vrsubhn_high2.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret <16 x i8> %res
+}
+
+define <8 x i16> @rsubhn2_8h(<4 x i32> %a, <4 x i32> %b) nounwind  {
+;CHECK-LABEL: rsubhn2_8h:
+;CHECK: rsubhn.4h
+;CHECK-NEXT: rsubhn2.8h
+  %vrsubhn2.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind
+  %vrsubhn_high3.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind
+  %res = shufflevector <4 x i16> %vrsubhn2.i, <4 x i16> %vrsubhn_high3.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i16> %res
+}
+
+define <4 x i32> @rsubhn2_4s(<2 x i64> %a, <2 x i64> %b) nounwind  {
+;CHECK-LABEL: rsubhn2_4s:
+;CHECK: rsubhn.2s
+;CHECK-NEXT: rsubhn2.4s
+  %vrsubhn2.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind
+  %vrsubhn_high3.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind
+  %res = shufflevector <2 x i32> %vrsubhn2.i, <2 x i32> %vrsubhn_high3.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i32> %res
+}
+
+declare <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @ssubl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: ssubl8h:
+;CHECK: ssubl.8h
+        %tmp1 = load <8 x i8>* %A
+        %tmp2 = load <8 x i8>* %B
+  %tmp3 = sext <8 x i8> %tmp1 to <8 x i16>
+  %tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
+  %tmp5 = sub <8 x i16> %tmp3, %tmp4
+        ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @ssubl4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: ssubl4s:
+;CHECK: ssubl.4s
+        %tmp1 = load <4 x i16>* %A
+        %tmp2 = load <4 x i16>* %B
+  %tmp3 = sext <4 x i16> %tmp1 to <4 x i32>
+  %tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
+  %tmp5 = sub <4 x i32> %tmp3, %tmp4
+        ret <4 x i32> %tmp5
+}
+
+define <2 x i64> @ssubl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: ssubl2d:
+;CHECK: ssubl.2d
+        %tmp1 = load <2 x i32>* %A
+        %tmp2 = load <2 x i32>* %B
+  %tmp3 = sext <2 x i32> %tmp1 to <2 x i64>
+  %tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
+  %tmp5 = sub <2 x i64> %tmp3, %tmp4
+        ret <2 x i64> %tmp5
+}
+
+define <8 x i16> @ssubl2_8h(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: ssubl2_8h:
+;CHECK: ssubl2.8h
+        %tmp1 = load <16 x i8>* %A
+        %high1 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+        %ext1 = sext <8 x i8> %high1 to <8 x i16>
+
+        %tmp2 = load <16 x i8>* %B
+        %high2 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+        %ext2 = sext <8 x i8> %high2 to <8 x i16>
+
+        %res = sub <8 x i16> %ext1, %ext2
+        ret <8 x i16> %res
+}
+
+define <4 x i32> @ssubl2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: ssubl2_4s:
+;CHECK: ssubl2.4s
+        %tmp1 = load <8 x i16>* %A
+        %high1 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+        %ext1 = sext <4 x i16> %high1 to <4 x i32>
+
+        %tmp2 = load <8 x i16>* %B
+        %high2 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+        %ext2 = sext <4 x i16> %high2 to <4 x i32>
+
+        %res = sub <4 x i32> %ext1, %ext2
+        ret <4 x i32> %res
+}
+
+define <2 x i64> @ssubl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: ssubl2_2d:
+;CHECK: ssubl2.2d
+        %tmp1 = load <4 x i32>* %A
+        %high1 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+        %ext1 = sext <2 x i32> %high1 to <2 x i64>
+
+        %tmp2 = load <4 x i32>* %B
+        %high2 = shufflevector <4 x i32> %tmp2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+        %ext2 = sext <2 x i32> %high2 to <2 x i64>
+
+        %res = sub <2 x i64> %ext1, %ext2
+        ret <2 x i64> %res
+}
+
+define <8 x i16> @usubl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: usubl8h:
+;CHECK: usubl.8h
+  %tmp1 = load <8 x i8>* %A
+  %tmp2 = load <8 x i8>* %B
+  %tmp3 = zext <8 x i8> %tmp1 to <8 x i16>
+  %tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
+  %tmp5 = sub <8 x i16> %tmp3, %tmp4
+  ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @usubl4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: usubl4s:
+;CHECK: usubl.4s
+  %tmp1 = load <4 x i16>* %A
+  %tmp2 = load <4 x i16>* %B
+  %tmp3 = zext <4 x i16> %tmp1 to <4 x i32>
+  %tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
+  %tmp5 = sub <4 x i32> %tmp3, %tmp4
+  ret <4 x i32> %tmp5
+}
+
+define <2 x i64> @usubl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: usubl2d:
+;CHECK: usubl.2d
+  %tmp1 = load <2 x i32>* %A
+  %tmp2 = load <2 x i32>* %B
+  %tmp3 = zext <2 x i32> %tmp1 to <2 x i64>
+  %tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
+  %tmp5 = sub <2 x i64> %tmp3, %tmp4
+  ret <2 x i64> %tmp5
+}
+
+define <8 x i16> @usubl2_8h(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: usubl2_8h:
+;CHECK: usubl2.8h
+  %tmp1 = load <16 x i8>* %A
+  %high1 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %ext1 = zext <8 x i8> %high1 to <8 x i16>
+
+  %tmp2 = load <16 x i8>* %B
+  %high2 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %ext2 = zext <8 x i8> %high2 to <8 x i16>
+
+  %res = sub <8 x i16> %ext1, %ext2
+  ret <8 x i16> %res
+}
+
+define <4 x i32> @usubl2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: usubl2_4s:
+;CHECK: usubl2.4s
+  %tmp1 = load <8 x i16>* %A
+  %high1 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %ext1 = zext <4 x i16> %high1 to <4 x i32>
+
+  %tmp2 = load <8 x i16>* %B
+  %high2 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %ext2 = zext <4 x i16> %high2 to <4 x i32>
+
+  %res = sub <4 x i32> %ext1, %ext2
+  ret <4 x i32> %res
+}
+
+define <2 x i64> @usubl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: usubl2_2d:
+;CHECK: usubl2.2d
+  %tmp1 = load <4 x i32>* %A
+  %high1 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %ext1 = zext <2 x i32> %high1 to <2 x i64>
+
+  %tmp2 = load <4 x i32>* %B
+  %high2 = shufflevector <4 x i32> %tmp2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %ext2 = zext <2 x i32> %high2 to <2 x i64>
+
+  %res = sub <2 x i64> %ext1, %ext2
+  ret <2 x i64> %res
+}
+
+define <8 x i16> @ssubw8h(<8 x i16>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: ssubw8h:
+;CHECK: ssubw.8h
+        %tmp1 = load <8 x i16>* %A
+        %tmp2 = load <8 x i8>* %B
+  %tmp3 = sext <8 x i8> %tmp2 to <8 x i16>
+  %tmp4 = sub <8 x i16> %tmp1, %tmp3
+        ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @ssubw4s(<4 x i32>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: ssubw4s:
+;CHECK: ssubw.4s
+        %tmp1 = load <4 x i32>* %A
+        %tmp2 = load <4 x i16>* %B
+  %tmp3 = sext <4 x i16> %tmp2 to <4 x i32>
+  %tmp4 = sub <4 x i32> %tmp1, %tmp3
+        ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @ssubw2d(<2 x i64>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: ssubw2d:
+;CHECK: ssubw.2d
+        %tmp1 = load <2 x i64>* %A
+        %tmp2 = load <2 x i32>* %B
+  %tmp3 = sext <2 x i32> %tmp2 to <2 x i64>
+  %tmp4 = sub <2 x i64> %tmp1, %tmp3
+        ret <2 x i64> %tmp4
+}
+
+define <8 x i16> @ssubw2_8h(<8 x i16>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: ssubw2_8h:
+;CHECK: ssubw2.8h
+        %tmp1 = load <8 x i16>* %A
+
+        %tmp2 = load <16 x i8>* %B
+        %high2 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+        %ext2 = sext <8 x i8> %high2 to <8 x i16>
+
+        %res = sub <8 x i16> %tmp1, %ext2
+        ret <8 x i16> %res
+}
+
+define <4 x i32> @ssubw2_4s(<4 x i32>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: ssubw2_4s:
+;CHECK: ssubw2.4s
+        %tmp1 = load <4 x i32>* %A
+
+        %tmp2 = load <8 x i16>* %B
+        %high2 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+        %ext2 = sext <4 x i16> %high2 to <4 x i32>
+
+        %res = sub <4 x i32> %tmp1, %ext2
+        ret <4 x i32> %res
+}
+
+define <2 x i64> @ssubw2_2d(<2 x i64>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: ssubw2_2d:
+;CHECK: ssubw2.2d
+        %tmp1 = load <2 x i64>* %A
+
+        %tmp2 = load <4 x i32>* %B
+        %high2 = shufflevector <4 x i32> %tmp2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+        %ext2 = sext <2 x i32> %high2 to <2 x i64>
+
+        %res = sub <2 x i64> %tmp1, %ext2
+        ret <2 x i64> %res
+}
+
+define <8 x i16> @usubw8h(<8 x i16>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: usubw8h:
+;CHECK: usubw.8h
+        %tmp1 = load <8 x i16>* %A
+        %tmp2 = load <8 x i8>* %B
+  %tmp3 = zext <8 x i8> %tmp2 to <8 x i16>
+  %tmp4 = sub <8 x i16> %tmp1, %tmp3
+        ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @usubw4s(<4 x i32>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: usubw4s:
+;CHECK: usubw.4s
+        %tmp1 = load <4 x i32>* %A
+        %tmp2 = load <4 x i16>* %B
+  %tmp3 = zext <4 x i16> %tmp2 to <4 x i32>
+  %tmp4 = sub <4 x i32> %tmp1, %tmp3
+        ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @usubw2d(<2 x i64>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: usubw2d:
+;CHECK: usubw.2d
+        %tmp1 = load <2 x i64>* %A
+        %tmp2 = load <2 x i32>* %B
+  %tmp3 = zext <2 x i32> %tmp2 to <2 x i64>
+  %tmp4 = sub <2 x i64> %tmp1, %tmp3
+        ret <2 x i64> %tmp4
+}
+
+define <8 x i16> @usubw2_8h(<8 x i16>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: usubw2_8h:
+;CHECK: usubw2.8h
+        %tmp1 = load <8 x i16>* %A
+
+        %tmp2 = load <16 x i8>* %B
+        %high2 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+        %ext2 = zext <8 x i8> %high2 to <8 x i16>
+
+        %res = sub <8 x i16> %tmp1, %ext2
+        ret <8 x i16> %res
+}
+
+define <4 x i32> @usubw2_4s(<4 x i32>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: usubw2_4s:
+;CHECK: usubw2.4s
+        %tmp1 = load <4 x i32>* %A
+
+        %tmp2 = load <8 x i16>* %B
+        %high2 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+        %ext2 = zext <4 x i16> %high2 to <4 x i32>
+
+        %res = sub <4 x i32> %tmp1, %ext2
+        ret <4 x i32> %res
+}
+
+define <2 x i64> @usubw2_2d(<2 x i64>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: usubw2_2d:
+;CHECK: usubw2.2d
+        %tmp1 = load <2 x i64>* %A
+
+        %tmp2 = load <4 x i32>* %B
+        %high2 = shufflevector <4 x i32> %tmp2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+        %ext2 = zext <2 x i32> %high2 to <2 x i64>
+
+        %res = sub <2 x i64> %tmp1, %ext2
+        ret <2 x i64> %res
+}
diff --git a/test/CodeGen/ARM64/weak-reference.ll b/test/CodeGen/AArch64/arm64-weak-reference.ll
index b2135e0..b2135e0 100644
--- a/test/CodeGen/ARM64/weak-reference.ll
+++ b/test/CodeGen/AArch64/arm64-weak-reference.ll
diff --git a/test/CodeGen/AArch64/arm64-xaluo.ll b/test/CodeGen/AArch64/arm64-xaluo.ll
new file mode 100644
index 0000000..6cffbde
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-xaluo.ll
@@ -0,0 +1,524 @@
+; RUN: llc < %s -march=arm64 | FileCheck %s
+
+;
+; Get the actual value of the overflow bit.
+;
+define i1 @saddo.i32(i32 %v1, i32 %v2, i32* %res) {
+entry:
+; CHECK-LABEL:  saddo.i32
+; CHECK:        adds w8, w0, w1
+; CHECK-NEXT:   cset w0, vs
+  %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, i32* %res
+  ret i1 %obit
+}
+
+define i1 @saddo.i64(i64 %v1, i64 %v2, i64* %res) {
+entry:
+; CHECK-LABEL:  saddo.i64
+; CHECK:        adds x8, x0, x1
+; CHECK-NEXT:   cset w0, vs
+  %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, i64* %res
+  ret i1 %obit
+}
+
+define i1 @uaddo.i32(i32 %v1, i32 %v2, i32* %res) {
+entry:
+; CHECK-LABEL:  uaddo.i32
+; CHECK:        adds w8, w0, w1
+; CHECK-NEXT:   cset w0, hs
+  %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, i32* %res
+  ret i1 %obit
+}
+
+define i1 @uaddo.i64(i64 %v1, i64 %v2, i64* %res) {
+entry:
+; CHECK-LABEL:  uaddo.i64
+; CHECK:        adds x8, x0, x1
+; CHECK-NEXT:   cset w0, hs
+  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, i64* %res
+  ret i1 %obit
+}
+
+define i1 @ssubo.i32(i32 %v1, i32 %v2, i32* %res) {
+entry:
+; CHECK-LABEL:  ssubo.i32
+; CHECK:        subs w8, w0, w1
+; CHECK-NEXT:   cset w0, vs
+  %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, i32* %res
+  ret i1 %obit
+}
+
+define i1 @ssubo.i64(i64 %v1, i64 %v2, i64* %res) {
+entry:
+; CHECK-LABEL:  ssubo.i64
+; CHECK:        subs x8, x0, x1
+; CHECK-NEXT:   cset w0, vs
+  %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, i64* %res
+  ret i1 %obit
+}
+
+define i1 @usubo.i32(i32 %v1, i32 %v2, i32* %res) {
+entry:
+; CHECK-LABEL:  usubo.i32
+; CHECK:        subs w8, w0, w1
+; CHECK-NEXT:   cset w0, lo
+  %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, i32* %res
+  ret i1 %obit
+}
+
+define i1 @usubo.i64(i64 %v1, i64 %v2, i64* %res) {
+entry:
+; CHECK-LABEL:  usubo.i64
+; CHECK:        subs x8, x0, x1
+; CHECK-NEXT:   cset w0, lo
+  %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, i64* %res
+  ret i1 %obit
+}
+
+define i1 @smulo.i32(i32 %v1, i32 %v2, i32* %res) {
+entry:
+; CHECK-LABEL:  smulo.i32
+; CHECK:        smull x8, w0, w1
+; CHECK-NEXT:   lsr x9, x8, #32
+; CHECK-NEXT:   cmp w9, w8, asr #31
+; CHECK-NEXT:   cset w0, ne
+  %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, i32* %res
+  ret i1 %obit
+}
+
+define i1 @smulo.i64(i64 %v1, i64 %v2, i64* %res) {
+entry:
+; CHECK-LABEL:  smulo.i64
+; CHECK:        mul x8, x0, x1
+; CHECK-NEXT:   smulh x9, x0, x1
+; CHECK-NEXT:   cmp x9, x8, asr #63
+; CHECK-NEXT:   cset w0, ne
+  %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, i64* %res
+  ret i1 %obit
+}
+
+define i1 @umulo.i32(i32 %v1, i32 %v2, i32* %res) {
+entry:
+; CHECK-LABEL:  umulo.i32
+; CHECK:        umull x8, w0, w1
+; CHECK-NEXT:   cmp xzr, x8, lsr #32
+; CHECK-NEXT:   cset w0, ne
+  %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, i32* %res
+  ret i1 %obit
+}
+
+define i1 @umulo.i64(i64 %v1, i64 %v2, i64* %res) {
+entry:
+; CHECK-LABEL:  umulo.i64
+; CHECK:        umulh x8, x0, x1
+; CHECK-NEXT:   cmp xzr, x8
+; CHECK-NEXT:   cset w8, ne
+; CHECK-NEXT:   mul x9, x0, x1
+  %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, i64* %res
+  ret i1 %obit
+}
+
+
+;
+; Check the use of the overflow bit in combination with a select instruction.
+;
+define i32 @saddo.select.i32(i32 %v1, i32 %v2) {
+entry:
+; CHECK-LABEL:  saddo.select.i32
+; CHECK:        cmn w0, w1
+; CHECK-NEXT:   csel w0, w0, w1, vs
+  %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
+  %obit = extractvalue {i32, i1} %t, 1
+  %ret = select i1 %obit, i32 %v1, i32 %v2
+  ret i32 %ret
+}
+
+define i64 @saddo.select.i64(i64 %v1, i64 %v2) {
+entry:
+; CHECK-LABEL:  saddo.select.i64
+; CHECK:        cmn x0, x1
+; CHECK-NEXT:   csel x0, x0, x1, vs
+  %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2)
+  %obit = extractvalue {i64, i1} %t, 1
+  %ret = select i1 %obit, i64 %v1, i64 %v2
+  ret i64 %ret
+}
+
+define i32 @uaddo.select.i32(i32 %v1, i32 %v2) {
+entry:
+; CHECK-LABEL:  uaddo.select.i32
+; CHECK:        cmn w0, w1
+; CHECK-NEXT:   csel w0, w0, w1, hs
+  %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)
+  %obit = extractvalue {i32, i1} %t, 1
+  %ret = select i1 %obit, i32 %v1, i32 %v2
+  ret i32 %ret
+}
+
+define i64 @uaddo.select.i64(i64 %v1, i64 %v2) {
+entry:
+; CHECK-LABEL:  uaddo.select.i64
+; CHECK:        cmn x0, x1
+; CHECK-NEXT:   csel x0, x0, x1, hs
+  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 %v2)
+  %obit = extractvalue {i64, i1} %t, 1
+  %ret = select i1 %obit, i64 %v1, i64 %v2
+  ret i64 %ret
+}
+
+define i32 @ssubo.select.i32(i32 %v1, i32 %v2) {
+entry:
+; CHECK-LABEL:  ssubo.select.i32
+; CHECK:        cmp w0, w1
+; CHECK-NEXT:   csel w0, w0, w1, vs
+  %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2)
+  %obit = extractvalue {i32, i1} %t, 1
+  %ret = select i1 %obit, i32 %v1, i32 %v2
+  ret i32 %ret
+}
+
+define i64 @ssubo.select.i64(i64 %v1, i64 %v2) {
+entry:
+; CHECK-LABEL:  ssubo.select.i64
+; CHECK:        cmp x0, x1
+; CHECK-NEXT:   csel x0, x0, x1, vs
+  %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %v1, i64 %v2)
+  %obit = extractvalue {i64, i1} %t, 1
+  %ret = select i1 %obit, i64 %v1, i64 %v2
+  ret i64 %ret
+}
+
+define i32 @usubo.select.i32(i32 %v1, i32 %v2) {
+entry:
+; CHECK-LABEL:  usubo.select.i32
+; CHECK:        cmp w0, w1
+; CHECK-NEXT:   csel w0, w0, w1, lo
+  %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2)
+  %obit = extractvalue {i32, i1} %t, 1
+  %ret = select i1 %obit, i32 %v1, i32 %v2
+  ret i32 %ret
+}
+
+define i64 @usubo.select.i64(i64 %v1, i64 %v2) {
+entry:
+; CHECK-LABEL:  usubo.select.i64
+; CHECK:        cmp x0, x1
+; CHECK-NEXT:   csel x0, x0, x1, lo
+  %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %v1, i64 %v2)
+  %obit = extractvalue {i64, i1} %t, 1
+  %ret = select i1 %obit, i64 %v1, i64 %v2
+  ret i64 %ret
+}
+
+define i32 @smulo.select.i32(i32 %v1, i32 %v2) {
+entry:
+; CHECK-LABEL:  smulo.select.i32
+; CHECK:        smull    x8, w0, w1
+; CHECK-NEXT:   lsr     x9, x8, #32
+; CHECK-NEXT:   cmp     w9, w8, asr #31
+; CHECK-NEXT:   csel    w0, w0, w1, ne
+  %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
+  %obit = extractvalue {i32, i1} %t, 1
+  %ret = select i1 %obit, i32 %v1, i32 %v2
+  ret i32 %ret
+}
+
+define i64 @smulo.select.i64(i64 %v1, i64 %v2) {
+entry:
+; CHECK-LABEL:  smulo.select.i64
+; CHECK:        mul      x8, x0, x1
+; CHECK-NEXT:   smulh   x9, x0, x1
+; CHECK-NEXT:   cmp     x9, x8, asr #63
+; CHECK-NEXT:   csel    x0, x0, x1, ne
+  %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
+  %obit = extractvalue {i64, i1} %t, 1
+  %ret = select i1 %obit, i64 %v1, i64 %v2
+  ret i64 %ret
+}
+
+define i32 @umulo.select.i32(i32 %v1, i32 %v2) {
+entry:
+; CHECK-LABEL:  umulo.select.i32
+; CHECK:        umull    x8, w0, w1
+; CHECK-NEXT:   cmp     xzr, x8, lsr #32
+; CHECK-NEXT:   csel    w0, w0, w1, ne
+  %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
+  %obit = extractvalue {i32, i1} %t, 1
+  %ret = select i1 %obit, i32 %v1, i32 %v2
+  ret i32 %ret
+}
+
+define i64 @umulo.select.i64(i64 %v1, i64 %v2) {
+entry:
+; CHECK-LABEL:  umulo.select.i64
+; CHECK:        umulh   x8, x0, x1
+; CHECK-NEXT:   cmp     xzr, x8
+; CHECK-NEXT:   csel    x0, x0, x1, ne
+  %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2)
+  %obit = extractvalue {i64, i1} %t, 1
+  %ret = select i1 %obit, i64 %v1, i64 %v2
+  ret i64 %ret
+}
+
+
+;
+; Check the use of the overflow bit in combination with a branch instruction.
+;
+define i1 @saddo.br.i32(i32 %v1, i32 %v2) {
+entry:
+; CHECK-LABEL:  saddo.br.i32
+; CHECK:        cmn w0, w1
+; CHECK-NEXT:   b.vc
+  %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define i1 @saddo.br.i64(i64 %v1, i64 %v2) {
+entry:
+; CHECK-LABEL:  saddo.br.i64
+; CHECK:        cmn x0, x1
+; CHECK-NEXT:   b.vc
+  %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define i1 @uaddo.br.i32(i32 %v1, i32 %v2) {
+entry:
+; CHECK-LABEL:  uaddo.br.i32
+; CHECK:        cmn w0, w1
+; CHECK-NEXT:   b.lo
+  %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define i1 @uaddo.br.i64(i64 %v1, i64 %v2) {
+entry:
+; CHECK-LABEL:  uaddo.br.i64
+; CHECK:        cmn x0, x1
+; CHECK-NEXT:   b.lo
+  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define i1 @ssubo.br.i32(i32 %v1, i32 %v2) {
+entry:
+; CHECK-LABEL:  ssubo.br.i32
+; CHECK:        cmp w0, w1
+; CHECK-NEXT:   b.vc
+  %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define i1 @ssubo.br.i64(i64 %v1, i64 %v2) {
+entry:
+; CHECK-LABEL:  ssubo.br.i64
+; CHECK:        cmp x0, x1
+; CHECK-NEXT:   b.vc
+  %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define i1 @usubo.br.i32(i32 %v1, i32 %v2) {
+entry:
+; CHECK-LABEL:  usubo.br.i32
+; CHECK:        cmp w0, w1
+; CHECK-NEXT:   b.hs
+  %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define i1 @usubo.br.i64(i64 %v1, i64 %v2) {
+entry:
+; CHECK-LABEL:  usubo.br.i64
+; CHECK:        cmp x0, x1
+; CHECK-NEXT:   b.hs
+  %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define i1 @smulo.br.i32(i32 %v1, i32 %v2) {
+entry:
+; CHECK-LABEL:  smulo.br.i32
+; CHECK:        smull    x8, w0, w1
+; CHECK-NEXT:   lsr     x9, x8, #32
+; CHECK-NEXT:   cmp     w9, w8, asr #31
+; CHECK-NEXT:   b.eq
+  %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define i1 @smulo.br.i64(i64 %v1, i64 %v2) {
+entry:
+; CHECK-LABEL:  smulo.br.i64
+; CHECK:        mul      x8, x0, x1
+; CHECK-NEXT:   smulh   x9, x0, x1
+; CHECK-NEXT:   cmp     x9, x8, asr #63
+; CHECK-NEXT:   b.eq
+  %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define i1 @umulo.br.i32(i32 %v1, i32 %v2) {
+entry:
+; CHECK-LABEL:  umulo.br.i32
+; CHECK:        umull    x8, w0, w1
+; CHECK-NEXT:   cmp     xzr, x8, lsr #32
+; CHECK-NEXT:   b.eq
+  %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define i1 @umulo.br.i64(i64 %v1, i64 %v2) {
+entry:
+; CHECK-LABEL:  umulo.br.i64
+; CHECK:        umulh   x8, x0, x1
+; CHECK-NEXT:   cbz
+  %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone
+declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone
+declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
+declare {i64, i1} @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
+declare {i32, i1} @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone
+declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone
+declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone
+declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone
+declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32) nounwind readnone
+declare {i64, i1} @llvm.smul.with.overflow.i64(i64, i64) nounwind readnone
+declare {i32, i1} @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone
+declare {i64, i1} @llvm.umul.with.overflow.i64(i64, i64) nounwind readnone
+
diff --git a/test/CodeGen/ARM64/zero-cycle-regmov.ll b/test/CodeGen/AArch64/arm64-zero-cycle-regmov.ll
index c56d607..c56d607 100644
--- a/test/CodeGen/ARM64/zero-cycle-regmov.ll
+++ b/test/CodeGen/AArch64/arm64-zero-cycle-regmov.ll
diff --git a/test/CodeGen/ARM64/zero-cycle-zeroing.ll b/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll
index 349bb6f..349bb6f 100644
--- a/test/CodeGen/ARM64/zero-cycle-zeroing.ll
+++ b/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll
diff --git a/test/CodeGen/ARM64/zext.ll b/test/CodeGen/AArch64/arm64-zext.ll
index 8d9e5ea..8d9e5ea 100644
--- a/test/CodeGen/ARM64/zext.ll
+++ b/test/CodeGen/AArch64/arm64-zext.ll
diff --git a/test/CodeGen/ARM64/zextload-unscaled.ll b/test/CodeGen/AArch64/arm64-zextload-unscaled.ll
index c475dbd..c475dbd 100644
--- a/test/CodeGen/ARM64/zextload-unscaled.ll
+++ b/test/CodeGen/AArch64/arm64-zextload-unscaled.ll
diff --git a/test/CodeGen/AArch64/arm64-zip.ll b/test/CodeGen/AArch64/arm64-zip.ll
new file mode 100644
index 0000000..304b280
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-zip.ll
@@ -0,0 +1,107 @@
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+define <8 x i8> @vzipi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: vzipi8:
+;CHECK: zip1.8b
+;CHECK: zip2.8b
+;CHECK-NEXT: add.8b
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+        %tmp5 = add <8 x i8> %tmp3, %tmp4
+	ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vzipi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: vzipi16:
+;CHECK: zip1.4h
+;CHECK: zip2.4h
+;CHECK-NEXT: add.4h
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+	%tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+        %tmp5 = add <4 x i16> %tmp3, %tmp4
+	ret <4 x i16> %tmp5
+}
+
+define <16 x i8> @vzipQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: vzipQi8:
+;CHECK: zip1.16b
+;CHECK: zip2.16b
+;CHECK-NEXT: add.16b
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+	%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+        %tmp5 = add <16 x i8> %tmp3, %tmp4
+	ret <16 x i8> %tmp5
+}
+
+define <8 x i16> @vzipQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: vzipQi16:
+;CHECK: zip1.8h
+;CHECK: zip2.8h
+;CHECK-NEXT: add.8h
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+        %tmp5 = add <8 x i16> %tmp3, %tmp4
+	ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vzipQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: vzipQi32:
+;CHECK: zip1.4s
+;CHECK: zip2.4s
+;CHECK-NEXT: add.4s
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+	%tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+        %tmp5 = add <4 x i32> %tmp3, %tmp4
+	ret <4 x i32> %tmp5
+}
+
+define <4 x float> @vzipQf(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK-LABEL: vzipQf:
+;CHECK: zip1.4s
+;CHECK: zip2.4s
+;CHECK-NEXT: fadd.4s
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+	%tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+        %tmp5 = fadd <4 x float> %tmp3, %tmp4
+	ret <4 x float> %tmp5
+}
+
+; Undef shuffle indices should not prevent matching to VZIP:
+
+define <8 x i8> @vzipi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK-LABEL: vzipi8_undef:
+;CHECK: zip1.8b
+;CHECK: zip2.8b
+;CHECK-NEXT: add.8b
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 undef, i32 10, i32 3, i32 11>
+	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 undef, i32 undef, i32 15>
+        %tmp5 = add <8 x i8> %tmp3, %tmp4
+	ret <8 x i8> %tmp5
+}
+
+define <16 x i8> @vzipQi8_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK-LABEL: vzipQi8_undef:
+;CHECK: zip1.16b
+;CHECK: zip2.16b
+;CHECK-NEXT: add.16b
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 undef, i32 undef, i32 undef, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+	%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 undef, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 undef, i32 14, i32 30, i32 undef, i32 31>
+        %tmp5 = add <16 x i8> %tmp3, %tmp4
+	ret <16 x i8> %tmp5
+}
diff --git a/test/CodeGen/AArch64/asm-large-immediate.ll b/test/CodeGen/AArch64/asm-large-immediate.ll
new file mode 100644
index 0000000..05e4ddd
--- /dev/null
+++ b/test/CodeGen/AArch64/asm-large-immediate.ll
@@ -0,0 +1,10 @@
+; RUN: llc -march=aarch64 -no-integrated-as < %s | FileCheck %s
+
+define void @test() {
+entry:
+; CHECK: /* result: 68719476738 */
+        tail call void asm sideeffect "/* result: ${0:c} */", "i,~{dirflag},~{fpsr},~{flags}"( i64 68719476738 )
+; CHECK: /* result: -68719476738 */
+        tail call void asm sideeffect "/* result: ${0:n} */", "i,~{dirflag},~{fpsr},~{flags}"( i64 68719476738 )
+        ret void
+}
diff --git a/test/CodeGen/AArch64/assertion-rc-mismatch.ll b/test/CodeGen/AArch64/assertion-rc-mismatch.ll
index 02b0c0e..bcf206e 100644
--- a/test/CodeGen/AArch64/assertion-rc-mismatch.ll
+++ b/test/CodeGen/AArch64/assertion-rc-mismatch.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-apple-ios7.0 | FileCheck %s
 ; Test case related to <rdar://problem/15633429>.
 
 ; CHECK-LABEL: small
diff --git a/test/CodeGen/AArch64/atomic-ops.ll b/test/CodeGen/AArch64/atomic-ops.ll
index 5fe2936..58b5d1d 100644
--- a/test/CodeGen/AArch64/atomic-ops.ll
+++ b/test/CodeGen/AArch64/atomic-ops.ll
@@ -1,5 +1,11 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-REG %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-REG
+
+
+; Point of CHECK-REG is to make sure UNPREDICTABLE instructions aren't created
+; (i.e. reusing a register for status & data in store exclusive).
+; CHECK-REG-NOT: stlxrb w[[NEW:[0-9]+]], w[[NEW]], [x{{[0-9]+}}]
+; CHECK-REG-NOT: stlxrb w[[NEW:[0-9]+]], x[[NEW]], [x{{[0-9]+}}]
 
 @var8 = global i8 0
 @var16 = global i16 0
@@ -11,20 +17,18 @@ define i8 @test_atomic_load_add_i8(i8 %offset) nounwind {
    %old = atomicrmw add i8* @var8, i8 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], w0
-; CHECK-REG: add w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
-; CHECK-REG-NOT: stlxrb w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
 
-; CHECK: mov x0, x[[OLD]]
+; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
    ret i8 %old
 }
 
@@ -33,20 +37,18 @@ define i16 @test_atomic_load_add_i16(i16 %offset) nounwind {
    %old = atomicrmw add i16* @var16, i16 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+; ; CHECK: ldaxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], w0
-; CHECK-REG: add w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
-; CHECK-REG-NOT: stxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
 
-; CHECK: mov x0, x[[OLD]]
+; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
    ret i16 %old
 }
 
@@ -55,20 +57,18 @@ define i32 @test_atomic_load_add_i32(i32 %offset) nounwind {
    %old = atomicrmw add i32* @var32, i32 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+; ; CHECK: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], w0
-; CHECK-REG: add w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
-; CHECK-REG-NOT: stlxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
 
-; CHECK: mov x0, x[[OLD]]
+; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
    ret i32 %old
 }
 
@@ -77,15 +77,13 @@ define i64 @test_atomic_load_add_i64(i64 %offset) nounwind {
    %old = atomicrmw add i64* @var64, i64 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+; ; CHECK: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
   ; x0 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
 ; CHECK-NEXT: add [[NEW:x[0-9]+]], x[[OLD]], x0
-; CHECK-REG: add x[[NEW:[0-9]+]], x{{[0-9]+}}, x0
-; CHECK-REG-NOT: stxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -99,20 +97,18 @@ define i8 @test_atomic_load_sub_i8(i8 %offset) nounwind {
    %old = atomicrmw sub i8* @var8, i8 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+; ; CHECK: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: sub [[NEW:w[0-9]+]], w[[OLD]], w0
-; CHECK-REG: sub w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
-; CHECK-REG-NOT: stxrb w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
 
-; CHECK: mov x0, x[[OLD]]
+; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
    ret i8 %old
 }
 
@@ -121,20 +117,18 @@ define i16 @test_atomic_load_sub_i16(i16 %offset) nounwind {
    %old = atomicrmw sub i16* @var16, i16 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+; ; CHECK: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: sub [[NEW:w[0-9]+]], w[[OLD]], w0
-; CHECK-REG: sub w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
-; CHECK-REG-NOT: stlxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
 
-; CHECK: mov x0, x[[OLD]]
+; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
    ret i16 %old
 }
 
@@ -143,20 +137,18 @@ define i32 @test_atomic_load_sub_i32(i32 %offset) nounwind {
    %old = atomicrmw sub i32* @var32, i32 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+; ; CHECK: ldaxr w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: sub [[NEW:w[0-9]+]], w[[OLD]], w0
-; CHECK-REG: sub w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
-; CHECK-REG-NOT: stxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
 
-; CHECK: mov x0, x[[OLD]]
+; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
    ret i32 %old
 }
 
@@ -165,15 +157,13 @@ define i64 @test_atomic_load_sub_i64(i64 %offset) nounwind {
    %old = atomicrmw sub i64* @var64, i64 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+; ; CHECK: ldaxr x[[OLD:[0-9]+]], [x[[ADDR]]]
   ; x0 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
 ; CHECK-NEXT: sub [[NEW:x[0-9]+]], x[[OLD]], x0
-; CHECK-REG: sub x[[NEW:[0-9]+]], x{{[0-9]+}}, x0
-; CHECK-REG-NOT: stlxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -187,20 +177,18 @@ define i8 @test_atomic_load_and_i8(i8 %offset) nounwind {
    %old = atomicrmw and i8* @var8, i8 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+; ; CHECK: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: and [[NEW:w[0-9]+]], w[[OLD]], w0
-; CHECK-REG: and w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
-; CHECK-REG-NOT: stlxrb w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
 
-; CHECK: mov x0, x[[OLD]]
+; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
    ret i8 %old
 }
 
@@ -209,20 +197,18 @@ define i16 @test_atomic_load_and_i16(i16 %offset) nounwind {
    %old = atomicrmw and i16* @var16, i16 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+; ; CHECK: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: and [[NEW:w[0-9]+]], w[[OLD]], w0
-; CHECK-REG: and w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
-; CHECK-REG-NOT: stxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
 
-; CHECK: mov x0, x[[OLD]]
+; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
    ret i16 %old
 }
 
@@ -231,20 +217,18 @@ define i32 @test_atomic_load_and_i32(i32 %offset) nounwind {
    %old = atomicrmw and i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+; ; CHECK: ldaxr w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: and [[NEW:w[0-9]+]], w[[OLD]], w0
-; CHECK-REG: and w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
-; CHECK-REG-NOT: stlxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
 
-; CHECK: mov x0, x[[OLD]]
+; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
    ret i32 %old
 }
 
@@ -253,15 +237,13 @@ define i64 @test_atomic_load_and_i64(i64 %offset) nounwind {
    %old = atomicrmw and i64* @var64, i64 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+; ; CHECK: ldaxr x[[OLD:[0-9]+]], [x[[ADDR]]]
   ; x0 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
 ; CHECK-NEXT: and [[NEW:x[0-9]+]], x[[OLD]], x0
-; CHECK-REG: and x[[NEW:[0-9]+]], x{{[0-9]+}}, x0
-; CHECK-REG-NOT: stxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -275,20 +257,18 @@ define i8 @test_atomic_load_or_i8(i8 %offset) nounwind {
    %old = atomicrmw or i8* @var8, i8 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+; ; CHECK: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: orr [[NEW:w[0-9]+]], w[[OLD]], w0
-; CHECK-REG: orr w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
-; CHECK-REG-NOT: stlxrb w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
 
-; CHECK: mov x0, x[[OLD]]
+; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
    ret i8 %old
 }
 
@@ -297,20 +277,18 @@ define i16 @test_atomic_load_or_i16(i16 %offset) nounwind {
    %old = atomicrmw or i16* @var16, i16 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+; ; CHECK: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: orr [[NEW:w[0-9]+]], w[[OLD]], w0
-; CHECK-REG: orr w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
-; CHECK-REG-NOT: stxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
 
-; CHECK: mov x0, x[[OLD]]
+; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
    ret i16 %old
 }
 
@@ -319,20 +297,18 @@ define i32 @test_atomic_load_or_i32(i32 %offset) nounwind {
    %old = atomicrmw or i32* @var32, i32 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+; ; CHECK: ldaxr w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: orr [[NEW:w[0-9]+]], w[[OLD]], w0
-; CHECK-REG: orr w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
-; CHECK-REG-NOT: stxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
 
-; CHECK: mov x0, x[[OLD]]
+; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
    ret i32 %old
 }
 
@@ -341,15 +317,13 @@ define i64 @test_atomic_load_or_i64(i64 %offset) nounwind {
    %old = atomicrmw or i64* @var64, i64 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+; ; CHECK: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
   ; x0 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
 ; CHECK-NEXT: orr [[NEW:x[0-9]+]], x[[OLD]], x0
-; CHECK-REG: orr x[[NEW:[0-9]+]], x{{[0-9]+}}, x0
-; CHECK-REG-NOT: stlxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -363,20 +337,18 @@ define i8 @test_atomic_load_xor_i8(i8 %offset) nounwind {
    %old = atomicrmw xor i8* @var8, i8 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+; ; CHECK: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: eor [[NEW:w[0-9]+]], w[[OLD]], w0
-; CHECK-REG: eor w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
-; CHECK-REG-NOT: stxrb w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
 
-; CHECK: mov x0, x[[OLD]]
+; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
    ret i8 %old
 }
 
@@ -385,20 +357,18 @@ define i16 @test_atomic_load_xor_i16(i16 %offset) nounwind {
    %old = atomicrmw xor i16* @var16, i16 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+; ; CHECK: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: eor [[NEW:w[0-9]+]], w[[OLD]], w0
-; CHECK-REG: eor w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
-; CHECK-REG-NOT: stxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
 
-; CHECK: mov x0, x[[OLD]]
+; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
    ret i16 %old
 }
 
@@ -407,20 +377,18 @@ define i32 @test_atomic_load_xor_i32(i32 %offset) nounwind {
    %old = atomicrmw xor i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+; ; CHECK: ldaxr w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: eor [[NEW:w[0-9]+]], w[[OLD]], w0
-; CHECK-REG: eor w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
-; CHECK-REG-NOT: stlxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
 
-; CHECK: mov x0, x[[OLD]]
+; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
    ret i32 %old
 }
 
@@ -429,15 +397,13 @@ define i64 @test_atomic_load_xor_i64(i64 %offset) nounwind {
    %old = atomicrmw xor i64* @var64, i64 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+; ; CHECK: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
   ; x0 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
 ; CHECK-NEXT: eor [[NEW:x[0-9]+]], x[[OLD]], x0
-; CHECK-REG: eor x[[NEW:[0-9]+]], x{{[0-9]+}}, x0
-; CHECK-REG-NOT: stxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}]
 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -451,18 +417,17 @@ define i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind {
    %old = atomicrmw xchg i8* @var8, i8 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+; ; CHECK: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
-; CHECK-REG-NOT: stxrb w0, w0, [x{{[0-9]+}}]
 ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], w0, [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
 
-; CHECK: mov x0, x[[OLD]]
+; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
    ret i8 %old
 }
 
@@ -471,18 +436,17 @@ define i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind {
    %old = atomicrmw xchg i16* @var16, i16 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+; ; CHECK: ldaxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
-; CHECK-REG-NOT: stlxrh w0, w0, [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxrh [[STATUS:w[0-9]+]], w0, [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
 
-; CHECK: mov x0, x[[OLD]]
+; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
    ret i16 %old
 }
 
@@ -491,18 +455,17 @@ define i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind {
    %old = atomicrmw xchg i32* @var32, i32 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+; ; CHECK: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
-; CHECK-REG-NOT: stlxr w0, w0, [x{{[0-9]+}}]
 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], w0, [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
 
-; CHECK: mov x0, x[[OLD]]
+; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
    ret i32 %old
 }
 
@@ -511,13 +474,12 @@ define i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind {
    %old = atomicrmw xchg i64* @var64, i64 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+; ; CHECK: ldaxr x[[OLD:[0-9]+]], [x[[ADDR]]]
   ; x0 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
-; CHECK-REG-NOT: stxr w0, x0, [x{{[0-9]+}}]
 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], x0, [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -532,21 +494,22 @@ define i8 @test_atomic_load_min_i8(i8 %offset) nounwind {
    %old = atomicrmw min i8* @var8, i8 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
-; CHECK-NEXT: cmp w0, w[[OLD]], sxtb
-; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt
-; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, gt
-; CHECK-REG-NOT: stxrb w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
+
+; CHECK-NEXT: sxtb w[[OLD_EXT:[0-9]+]], w[[OLD]]
+; CHECK-NEXT: cmp w[[OLD_EXT]], w0, sxtb
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, le
+
 ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
 
-; CHECK: mov x0, x[[OLD]]
+; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
    ret i8 %old
 }
 
@@ -555,21 +518,23 @@ define i16 @test_atomic_load_min_i16(i16 %offset) nounwind {
    %old = atomicrmw min i16* @var16, i16 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
-; CHECK-NEXT: cmp w0, w[[OLD]], sxth
-; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt
-; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, gt
-; CHECK-REG-NOT: stlxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
+
+; CHECK-NEXT: sxth w[[OLD_EXT:[0-9]+]], w[[OLD]]
+; CHECK-NEXT: cmp w[[OLD_EXT]], w0, sxth
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, le
+
+
 ; CHECK-NEXT: stlxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
 
-; CHECK: mov x0, x[[OLD]]
+; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
    ret i16 %old
 }
 
@@ -578,21 +543,22 @@ define i32 @test_atomic_load_min_i32(i32 %offset) nounwind {
    %old = atomicrmw min i32* @var32, i32 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
-; CHECK-NEXT: cmp w0, w[[OLD]]
-; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt
-; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, gt
-; CHECK-REG-NOT: stxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
+
+; CHECK-NEXT: cmp w[[OLD]], w0
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, le
+
+
 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
 
-; CHECK: mov x0, x[[OLD]]
+; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
    ret i32 %old
 }
 
@@ -601,16 +567,17 @@ define i64 @test_atomic_load_min_i64(i64 %offset) nounwind {
    %old = atomicrmw min i64* @var64, i64 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK: ldaxr x[[OLD:[0-9]+]], [x[[ADDR]]]
   ; x0 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
-; CHECK-NEXT: cmp x0, x[[OLD]]
-; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, gt
-; CHECK-REG: csel x[[NEW:[0-9]+]], x{{[0-9]+}}, x0, gt
-; CHECK-REG-NOT: stlxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}]
+
+; CHECK-NEXT: cmp x[[OLD]], x0
+; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, le
+
+
 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -624,21 +591,23 @@ define i8 @test_atomic_load_max_i8(i8 %offset) nounwind {
    %old = atomicrmw max i8* @var8, i8 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
-; CHECK-NEXT: cmp w0, w[[OLD]], sxtb
-; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt
-; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, lt
-; CHECK-REG-NOT: stlxrb w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
+
+; CHECK-NEXT: sxtb w[[OLD_EXT:[0-9]+]], w[[OLD]]
+; CHECK-NEXT: cmp w[[OLD_EXT]], w0, sxtb
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt
+
+
 ; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
 
-; CHECK: mov x0, x[[OLD]]
+; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
    ret i8 %old
 }
 
@@ -647,21 +616,23 @@ define i16 @test_atomic_load_max_i16(i16 %offset) nounwind {
    %old = atomicrmw max i16* @var16, i16 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK: ldaxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
-; CHECK-NEXT: cmp w0, w[[OLD]], sxth
-; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt
-; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, lt
-; CHECK-REG-NOT: stxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
+
+; CHECK-NEXT: sxth w[[OLD_EXT:[0-9]+]], w[[OLD]]
+; CHECK-NEXT: cmp w[[OLD_EXT]], w0, sxth
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt
+
+
 ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
 
-; CHECK: mov x0, x[[OLD]]
+; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
    ret i16 %old
 }
 
@@ -670,21 +641,22 @@ define i32 @test_atomic_load_max_i32(i32 %offset) nounwind {
    %old = atomicrmw max i32* @var32, i32 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
-; CHECK-NEXT: cmp w0, w[[OLD]]
-; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt
-; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, lt
-; CHECK-REG-NOT: stlxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
+
+; CHECK-NEXT: cmp w[[OLD]], w0
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt
+
+
 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
 
-; CHECK: mov x0, x[[OLD]]
+; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
    ret i32 %old
 }
 
@@ -693,16 +665,17 @@ define i64 @test_atomic_load_max_i64(i64 %offset) nounwind {
    %old = atomicrmw max i64* @var64, i64 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
   ; x0 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
-; CHECK-NEXT: cmp x0, x[[OLD]]
-; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, lt
-; CHECK-REG: csel x[[NEW:[0-9]+]], x{{[0-9]+}}, x0, lt
-; CHECK-REG-NOT: stlxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}]
+
+; CHECK-NEXT: cmp x[[OLD]], x0
+; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, gt
+
+
 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -716,21 +689,22 @@ define i8 @test_atomic_load_umin_i8(i8 %offset) nounwind {
    %old = atomicrmw umin i8* @var8, i8 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
-; CHECK-NEXT: cmp w0, w[[OLD]], uxtb
-; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi
-; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, hi
-; CHECK-REG-NOT: stlxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
+
+; CHECK-NEXT: cmp w[[OLD]], w0, uxtb
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, ls
+
+
 ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
 
-; CHECK: mov x0, x[[OLD]]
+; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
    ret i8 %old
 }
 
@@ -739,21 +713,22 @@ define i16 @test_atomic_load_umin_i16(i16 %offset) nounwind {
    %old = atomicrmw umin i16* @var16, i16 %offset acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK: ldaxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
-; CHECK-NEXT: cmp w0, w[[OLD]], uxth
-; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi
-; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, hi
-; CHECK-REG-NOT: stxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
+
+; CHECK-NEXT: cmp w[[OLD]], w0, uxth
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, ls
+
+
 ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
 
-; CHECK: mov x0, x[[OLD]]
+; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
    ret i16 %old
 }
 
@@ -762,21 +737,22 @@ define i32 @test_atomic_load_umin_i32(i32 %offset) nounwind {
    %old = atomicrmw umin i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK: ldaxr w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
-; CHECK-NEXT: cmp w0, w[[OLD]]
-; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi
-; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, hi
-; CHECK-REG-NOT: stlxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
+
+; CHECK-NEXT: cmp w[[OLD]], w0
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, ls
+
+
 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
 
-; CHECK: mov x0, x[[OLD]]
+; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
    ret i32 %old
 }
 
@@ -785,16 +761,17 @@ define i64 @test_atomic_load_umin_i64(i64 %offset) nounwind {
    %old = atomicrmw umin i64* @var64, i64 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK: ldaxr x[[OLD:[0-9]+]], [x[[ADDR]]]
   ; x0 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
-; CHECK-NEXT: cmp x0, x[[OLD]]
-; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, hi
-; CHECK-REG: csel x[[NEW:[0-9]+]], x{{[0-9]+}}, x0, hi
-; CHECK-REG-NOT: stlxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}]
+
+; CHECK-NEXT: cmp x[[OLD]], x0
+; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, ls
+
+
 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -808,21 +785,22 @@ define i8 @test_atomic_load_umax_i8(i8 %offset) nounwind {
    %old = atomicrmw umax i8* @var8, i8 %offset acq_rel
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
-; CHECK-NEXT: cmp w0, w[[OLD]], uxtb
-; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo
-; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, lo
-; CHECK-REG-NOT: stlxrb w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
+
+; CHECK-NEXT: cmp w[[OLD]], w0, uxtb
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi
+
+
 ; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
 
-; CHECK: mov x0, x[[OLD]]
+; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
    ret i8 %old
 }
 
@@ -831,21 +809,22 @@ define i16 @test_atomic_load_umax_i16(i16 %offset) nounwind {
    %old = atomicrmw umax i16* @var16, i16 %offset monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
-; CHECK-NEXT: cmp w0, w[[OLD]], uxth
-; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo
-; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, lo
-; CHECK-REG-NOT: stxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
+
+; CHECK-NEXT: cmp w[[OLD]], w0, uxth
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi
+
+
 ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
 
-; CHECK: mov x0, x[[OLD]]
+; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
    ret i16 %old
 }
 
@@ -854,21 +833,22 @@ define i32 @test_atomic_load_umax_i32(i32 %offset) nounwind {
    %old = atomicrmw umax i32* @var32, i32 %offset seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK: ldaxr w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
-; CHECK-NEXT: cmp w0, w[[OLD]]
-; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo
-; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, lo
-; CHECK-REG-NOT: stlxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
+
+; CHECK-NEXT: cmp w[[OLD]], w0
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi
+
+
 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
 
-; CHECK: mov x0, x[[OLD]]
+; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
    ret i32 %old
 }
 
@@ -877,16 +857,17 @@ define i64 @test_atomic_load_umax_i64(i64 %offset) nounwind {
    %old = atomicrmw umax i64* @var64, i64 %offset release
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
   ; x0 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
-; CHECK-NEXT: cmp x0, x[[OLD]]
-; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, lo
-; CHECK-REG: csel x[[NEW:[0-9]+]], x{{[0-9]+}}, x0, lo
-; CHECK-REG-NOT: stlxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}]
+
+; CHECK-NEXT: cmp x[[OLD]], x0
+; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, hi
+
+
 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
@@ -900,21 +881,20 @@ define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
    %old = cmpxchg i8* @var8, i8 %wanted, i8 %new acquire acquire
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 
 ; CHECK: [[STARTAGAIN:.LBB[0-9]+_[0-9]+]]:
-; CHECK-NEXT: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: cmp w[[OLD]], w0
 ; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
   ; As above, w1 is a reasonable guess.
-; CHECK-REG-NOT: stxrb w1, w1, [x{{[0-9]+}}]
 ; CHECK: stxrb [[STATUS:w[0-9]+]], w1, [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
 ; CHECK-NOT: dmb
 
-; CHECK: mov x0, x[[OLD]]
+; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
    ret i8 %old
 }
 
@@ -923,21 +903,20 @@ define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
    %old = cmpxchg i16* @var16, i16 %wanted, i16 %new seq_cst seq_cst
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
 
 ; CHECK: [[STARTAGAIN:.LBB[0-9]+_[0-9]+]]:
-; CHECK-NEXT: ldaxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK: ldaxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: cmp w[[OLD]], w0
 ; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
   ; As above, w1 is a reasonable guess.
-; CHECK-REG-NOT: stlxrh w1, w1, [x{{[0-9]+}}]
 ; CHECK: stlxrh [[STATUS:w[0-9]+]], w1, [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
 ; CHECK-NOT: dmb
 
-; CHECK: mov x0, x[[OLD]]
+; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
    ret i16 %old
 }
 
@@ -946,45 +925,44 @@ define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
    %old = cmpxchg i32* @var32, i32 %wanted, i32 %new release monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
 ; CHECK: [[STARTAGAIN:.LBB[0-9]+_[0-9]+]]:
-; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: cmp w[[OLD]], w0
 ; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
   ; As above, w1 is a reasonable guess.
-; CHECK-REG-NOT: stlxr w1, w1, [x{{[0-9]+}}]
 ; CHECK: stlxr [[STATUS:w[0-9]+]], w1, [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
 ; CHECK-NOT: dmb
 
-; CHECK: mov x0, x[[OLD]]
+; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
    ret i32 %old
 }
 
-define i64 @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
+define void @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i64:
    %old = cmpxchg i64* @var64, i64 %wanted, i64 %new monotonic monotonic
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
 
 ; CHECK: [[STARTAGAIN:.LBB[0-9]+_[0-9]+]]:
-; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: cmp x[[OLD]], x0
 ; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
   ; As above, w1 is a reasonable guess.
-; CHECK-REG-NOT: stxr w1, x1, [x{{[0-9]+}}]
 ; CHECK: stxr [[STATUS:w[0-9]+]], x1, [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
 ; CHECK-NOT: dmb
 
-; CHECK: mov x0, x[[OLD]]
-   ret i64 %old
+; CHECK: str x[[OLD]],
+   store i64 %old, i64* @var64
+   ret void
 }
 
 define i8 @test_atomic_load_monotonic_i8() nounwind {
@@ -992,7 +970,7 @@ define i8 @test_atomic_load_monotonic_i8() nounwind {
   %val = load atomic i8* @var8 monotonic, align 1
 ; CHECK-NOT: dmb
 ; CHECK: adrp x[[HIADDR:[0-9]+]], var8
-; CHECK: ldrb w0, [x[[HIADDR]], #:lo12:var8]
+; CHECK: ldrb w0, [x[[HIADDR]], {{#?}}:lo12:var8]
 ; CHECK-NOT: dmb
 
   ret i8 %val
@@ -1017,7 +995,7 @@ define i8 @test_atomic_load_acquire_i8() nounwind {
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
 ; CHECK-NOT: dmb
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
 ; CHECK-NOT: dmb
 ; CHECK: ldarb w0, [x[[ADDR]]]
 ; CHECK-NOT: dmb
@@ -1030,7 +1008,7 @@ define i8 @test_atomic_load_seq_cst_i8() nounwind {
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[HIADDR:x[0-9]+]], var8
 ; CHECK-NOT: dmb
-; CHECK: add x[[ADDR:[0-9]+]], [[HIADDR]], #:lo12:var8
+; CHECK: add x[[ADDR:[0-9]+]], [[HIADDR]], {{#?}}:lo12:var8
 ; CHECK-NOT: dmb
 ; CHECK: ldarb w0, [x[[ADDR]]]
 ; CHECK-NOT: dmb
@@ -1043,7 +1021,7 @@ define i16 @test_atomic_load_monotonic_i16() nounwind {
 ; CHECK-NOT: dmb
 ; CHECK: adrp x[[HIADDR:[0-9]+]], var16
 ; CHECK-NOT: dmb
-; CHECK: ldrh w0, [x[[HIADDR]], #:lo12:var16]
+; CHECK: ldrh w0, [x[[HIADDR]], {{#?}}:lo12:var16]
 ; CHECK-NOT: dmb
 
   ret i16 %val
@@ -1068,7 +1046,7 @@ define i64 @test_atomic_load_seq_cst_i64() nounwind {
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[HIADDR:x[0-9]+]], var64
 ; CHECK-NOT: dmb
-; CHECK: add x[[ADDR:[0-9]+]], [[HIADDR]], #:lo12:var64
+; CHECK: add x[[ADDR:[0-9]+]], [[HIADDR]], {{#?}}:lo12:var64
 ; CHECK-NOT: dmb
 ; CHECK: ldar x0, [x[[ADDR]]]
 ; CHECK-NOT: dmb
@@ -1079,7 +1057,7 @@ define void @test_atomic_store_monotonic_i8(i8 %val) nounwind {
 ; CHECK-LABEL: test_atomic_store_monotonic_i8:
   store atomic i8 %val, i8* @var8 monotonic, align 1
 ; CHECK: adrp x[[HIADDR:[0-9]+]], var8
-; CHECK: strb w0, [x[[HIADDR]], #:lo12:var8]
+; CHECK: strb w0, [x[[HIADDR]], {{#?}}:lo12:var8]
 
   ret void
 }
@@ -1101,7 +1079,7 @@ define void @test_atomic_store_release_i8(i8 %val) nounwind {
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[HIADDR:x[0-9]+]], var8
 ; CHECK-NOT: dmb
-; CHECK: add x[[ADDR:[0-9]+]], [[HIADDR]], #:lo12:var8
+; CHECK: add x[[ADDR:[0-9]+]], [[HIADDR]], {{#?}}:lo12:var8
 ; CHECK-NOT: dmb
 ; CHECK: stlrb w0, [x[[ADDR]]]
 ; CHECK-NOT: dmb
@@ -1114,7 +1092,7 @@ define void @test_atomic_store_seq_cst_i8(i8 %val) nounwind {
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[HIADDR:x[0-9]+]], var8
 ; CHECK-NOT: dmb
-; CHECK: add x[[ADDR:[0-9]+]], [[HIADDR]], #:lo12:var8
+; CHECK: add x[[ADDR:[0-9]+]], [[HIADDR]], {{#?}}:lo12:var8
 ; CHECK-NOT: dmb
 ; CHECK: stlrb w0, [x[[ADDR]]]
 ; CHECK-NOT: dmb
@@ -1128,7 +1106,7 @@ define void @test_atomic_store_monotonic_i16(i16 %val) nounwind {
 ; CHECK-NOT: dmb
 ; CHECK: adrp x[[HIADDR:[0-9]+]], var16
 ; CHECK-NOT: dmb
-; CHECK: strh w0, [x[[HIADDR]], #:lo12:var16]
+; CHECK: strh w0, [x[[HIADDR]], {{#?}}:lo12:var16]
 ; CHECK-NOT: dmb
   ret void
 }
@@ -1153,7 +1131,7 @@ define void @test_atomic_store_release_i64(i64 %val) nounwind {
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[HIADDR:x[0-9]+]], var64
 ; CHECK-NOT: dmb
-; CHECK: add x[[ADDR:[0-9]+]], [[HIADDR]], #:lo12:var64
+; CHECK: add x[[ADDR:[0-9]+]], [[HIADDR]], {{#?}}:lo12:var64
 ; CHECK-NOT: dmb
 ; CHECK: stlr x0, [x[[ADDR]]]
 ; CHECK-NOT: dmb
diff --git a/test/CodeGen/AArch64/basic-pic.ll b/test/CodeGen/AArch64/basic-pic.ll
index 682b7ba..62d41bc 100644
--- a/test/CodeGen/AArch64/basic-pic.ll
+++ b/test/CodeGen/AArch64/basic-pic.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -relocation-model=pic %s -o - | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -relocation-model=pic %s -o - | FileCheck %s
 
 @var = global i32 0
 
@@ -7,7 +7,7 @@ define i32 @get_globalvar() {
 
   %val = load i32* @var
 ; CHECK: adrp x[[GOTHI:[0-9]+]], :got:var
-; CHECK: ldr x[[GOTLOC:[0-9]+]], [x[[GOTHI]], #:got_lo12:var]
+; CHECK: ldr x[[GOTLOC:[0-9]+]], [x[[GOTHI]], {{#?}}:got_lo12:var]
 ; CHECK: ldr w0, [x[[GOTLOC]]]
 
   ret i32 %val
@@ -18,7 +18,7 @@ define i32* @get_globalvaraddr() {
 
   %val = load i32* @var
 ; CHECK: adrp x[[GOTHI:[0-9]+]], :got:var
-; CHECK: ldr x0, [x[[GOTHI]], #:got_lo12:var]
+; CHECK: ldr x0, [x[[GOTHI]], {{#?}}:got_lo12:var]
 
   ret i32* @var
 }
@@ -30,7 +30,7 @@ define i32 @get_hiddenvar() {
 
   %val = load i32* @hiddenvar
 ; CHECK: adrp x[[HI:[0-9]+]], hiddenvar
-; CHECK: ldr w0, [x[[HI]], #:lo12:hiddenvar]
+; CHECK: ldr w0, [x[[HI]], {{#?}}:lo12:hiddenvar]
 
   ret i32 %val
 }
@@ -40,7 +40,7 @@ define i32* @get_hiddenvaraddr() {
 
   %val = load i32* @hiddenvar
 ; CHECK: adrp [[HI:x[0-9]+]], hiddenvar
-; CHECK: add x0, [[HI]], #:lo12:hiddenvar
+; CHECK: add x0, [[HI]], {{#?}}:lo12:hiddenvar
 
   ret i32* @hiddenvar
 }
@@ -50,5 +50,5 @@ define void()* @get_func() {
 
   ret void()* bitcast(void()*()* @get_func to void()*)
 ; CHECK: adrp x[[GOTHI:[0-9]+]], :got:get_func
-; CHECK: ldr x0, [x[[GOTHI]], #:got_lo12:get_func]
+; CHECK: ldr x0, [x[[GOTHI]], {{#?}}:got_lo12:get_func]
 }
diff --git a/test/CodeGen/AArch64/bitfield-insert-0.ll b/test/CodeGen/AArch64/bitfield-insert-0.ll
index 37a18b7..da0ed8a 100644
--- a/test/CodeGen/AArch64/bitfield-insert-0.ll
+++ b/test/CodeGen/AArch64/bitfield-insert-0.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -filetype=obj < %s | llvm-objdump -disassemble - | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -filetype=obj -o - %s | llvm-objdump -disassemble - | FileCheck %s
 
 ; The encoding of lsb -> immr in the CGed bitfield instructions was wrong at one
 ; point, in the edge case where lsb = 0. Just make sure.
diff --git a/test/CodeGen/AArch64/bitfield-insert.ll b/test/CodeGen/AArch64/bitfield-insert.ll
index 1f04608..2369a55 100644
--- a/test/CodeGen/AArch64/bitfield-insert.ll
+++ b/test/CodeGen/AArch64/bitfield-insert.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s --check-prefix=CHECK
 
 ; First, a simple example from Clang. The registers could plausibly be
 ; different, but probably won't be.
@@ -7,8 +7,7 @@
 
 define [1 x i64] @from_clang([1 x i64] %f.coerce, i32 %n) nounwind readnone {
 ; CHECK-LABEL: from_clang:
-; CHECK: bfi w0, w1, #3, #4
-; CHECK-NEXT: ret
+; CHECK: bfi {{w[0-9]+}}, {{w[0-9]+}}, #3, #4
 
 entry:
   %f.coerce.fca.0.extract = extractvalue [1 x i64] %f.coerce, 0
@@ -26,6 +25,7 @@ entry:
 
 define void @test_whole32(i32* %existing, i32* %new) {
 ; CHECK-LABEL: test_whole32:
+
 ; CHECK: bfi {{w[0-9]+}}, {{w[0-9]+}}, #26, #5
 
   %oldval = load volatile i32* %existing
@@ -62,8 +62,10 @@ define void @test_whole64(i64* %existing, i64* %new) {
 
 define void @test_whole32_from64(i64* %existing, i64* %new) {
 ; CHECK-LABEL: test_whole32_from64:
-; CHECK: bfi {{w[0-9]+}}, {{w[0-9]+}}, #{{0|16}}, #16
-; CHECK-NOT: and
+
+
+; CHECK: bfxil {{x[0-9]+}}, {{x[0-9]+}}, #0, #16
+
 ; CHECK: ret
 
   %oldval = load volatile i64* %existing
@@ -80,8 +82,9 @@ define void @test_whole32_from64(i64* %existing, i64* %new) {
 
 define void @test_32bit_masked(i32 *%existing, i32 *%new) {
 ; CHECK-LABEL: test_32bit_masked:
+
+; CHECK: and
 ; CHECK: bfi [[INSERT:w[0-9]+]], {{w[0-9]+}}, #3, #4
-; CHECK: and {{w[0-9]+}}, [[INSERT]], #0xff
 
   %oldval = load volatile i32* %existing
   %oldval_keep = and i32 %oldval, 135 ; = 0x87
@@ -98,8 +101,8 @@ define void @test_32bit_masked(i32 *%existing, i32 *%new) {
 
 define void @test_64bit_masked(i64 *%existing, i64 *%new) {
 ; CHECK-LABEL: test_64bit_masked:
+; CHECK: and
 ; CHECK: bfi [[INSERT:x[0-9]+]], {{x[0-9]+}}, #40, #8
-; CHECK: and {{x[0-9]+}}, [[INSERT]], #0xffff00000000
 
   %oldval = load volatile i64* %existing
   %oldval_keep = and i64 %oldval, 1095216660480 ; = 0xff_0000_0000
@@ -117,8 +120,9 @@ define void @test_64bit_masked(i64 *%existing, i64 *%new) {
 ; Mask is too complicated for literal ANDwwi, make sure other avenues are tried.
 define void @test_32bit_complexmask(i32 *%existing, i32 *%new) {
 ; CHECK-LABEL: test_32bit_complexmask:
+
+; CHECK: and
 ; CHECK: bfi {{w[0-9]+}}, {{w[0-9]+}}, #3, #4
-; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
 
   %oldval = load volatile i32* %existing
   %oldval_keep = and i32 %oldval, 647 ; = 0x287
@@ -137,6 +141,7 @@ define void @test_32bit_complexmask(i32 *%existing, i32 *%new) {
 define void @test_32bit_badmask(i32 *%existing, i32 *%new) {
 ; CHECK-LABEL: test_32bit_badmask:
 ; CHECK-NOT: bfi
+; CHECK-NOT: bfm
 ; CHECK: ret
 
   %oldval = load volatile i32* %existing
@@ -156,6 +161,7 @@ define void @test_32bit_badmask(i32 *%existing, i32 *%new) {
 define void @test_64bit_badmask(i64 *%existing, i64 *%new) {
 ; CHECK-LABEL: test_64bit_badmask:
 ; CHECK-NOT: bfi
+; CHECK-NOT: bfm
 ; CHECK: ret
 
   %oldval = load volatile i64* %existing
@@ -186,8 +192,7 @@ define void @test_32bit_with_shr(i32* %existing, i32* %new) {
   %combined = or i32 %oldval_keep, %newval_masked
   store volatile i32 %combined, i32* %existing
 ; CHECK: lsr [[BIT:w[0-9]+]], {{w[0-9]+}}, #14
-; CHECK: bfi {{w[0-9]}}, [[BIT]], #26, #5
+; CHECK: bfi {{w[0-9]+}}, [[BIT]], #26, #5
 
   ret void
 }
-
diff --git a/test/CodeGen/AArch64/bitfield.ll b/test/CodeGen/AArch64/bitfield.ll
index 1c84f5d..0e12653 100644
--- a/test/CodeGen/AArch64/bitfield.ll
+++ b/test/CodeGen/AArch64/bitfield.ll
@@ -1,5 +1,4 @@
-
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefix=CHECK
 
 @var32 = global i32 0
 @var64 = global i64 0
@@ -24,7 +23,7 @@ define void @test_extendb(i8 %var) {
 
   %uxt64 = zext i8 %var to i64
   store volatile i64 %uxt64, i64* @var64
-; CHECK: uxtb {{x[0-9]+}}, {{w[0-9]+}}
+; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, #0xff
   ret void
 }
 
@@ -48,7 +47,7 @@ define void @test_extendh(i16 %var) {
 
   %uxt64 = zext i16 %var to i64
   store volatile i64 %uxt64, i64* @var64
-; CHECK: uxth {{x[0-9]+}}, {{w[0-9]+}}
+; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, #0xffff
   ret void
 }
 
@@ -61,7 +60,7 @@ define void @test_extendw(i32 %var) {
 
   %uxt64 = zext i32 %var to i64
   store volatile i64 %uxt64, i64* @var64
-; CHECK: ubfx {{w[0-9]+}}, {{w[0-9]+}}, #0, #32
+; CHECK: ubfx {{x[0-9]+}}, {{x[0-9]+}}, #0, #32
   ret void
 }
 
@@ -190,7 +189,6 @@ define i32 @test_ubfx32(i32* %addr) {
 define i64 @test_ubfx64(i64* %addr) {
 ; CHECK-LABEL: test_ubfx64:
 ; CHECK: ubfx {{x[0-9]+}}, {{x[0-9]+}}, #25, #10
-
    %fields = load i64* %addr
    %shifted = lshr i64 %fields, 25
    %masked = and i64 %shifted, 1023
diff --git a/test/CodeGen/AArch64/blockaddress.ll b/test/CodeGen/AArch64/blockaddress.ll
index 8cda431..1eec4cc 100644
--- a/test/CodeGen/AArch64/blockaddress.ll
+++ b/test/CodeGen/AArch64/blockaddress.ll
@@ -9,7 +9,7 @@ define void @test_blockaddress() {
   %val = load volatile i8** @addr
   indirectbr i8* %val, [label %block]
 ; CHECK: adrp [[DEST_HI:x[0-9]+]], [[DEST_LBL:.Ltmp[0-9]+]]
-; CHECK: add [[DEST:x[0-9]+]], [[DEST_HI]], #:lo12:[[DEST_LBL]]
+; CHECK: add [[DEST:x[0-9]+]], [[DEST_HI]], {{#?}}:lo12:[[DEST_LBL]]
 ; CHECK: str [[DEST]],
 ; CHECK: ldr [[NEWDEST:x[0-9]+]]
 ; CHECK: br [[NEWDEST]]
diff --git a/test/CodeGen/AArch64/bool-loads.ll b/test/CodeGen/AArch64/bool-loads.ll
index 5c7640b..881aeaa 100644
--- a/test/CodeGen/AArch64/bool-loads.ll
+++ b/test/CodeGen/AArch64/bool-loads.ll
@@ -1,54 +1,54 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s
 
 @var = global i1 0
 
 define i32 @test_sextloadi32() {
-; CHECK: test_sextloadi32
+; CHECK-LABEL: test_sextloadi32
 
   %val = load i1* @var
   %ret = sext i1 %val to i32
-; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var]
-; CHECK: sbfx {{x[0-9]+}}, {{x[0-9]+}}, #0, #1
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var]
+; CHECK: {{sbfx x[0-9]+, x[0-9]+, #0, #1|sbfx w[0-9]+, w[0-9]+, #0, #1}}
 
   ret i32 %ret
 ; CHECK: ret
 }
 
 define i64 @test_sextloadi64() {
-; CHECK: test_sextloadi64
+; CHECK-LABEL: test_sextloadi64
 
   %val = load i1* @var
   %ret = sext i1 %val to i64
-; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var]
-; CHECK: sbfx {{x[0-9]+}}, {{x[0-9]+}}, #0, #1
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var]
+; CHECK: {{sbfx x[0-9]+, x[0-9]+, #0, #1}}
 
   ret i64 %ret
 ; CHECK: ret
 }
 
 define i32 @test_zextloadi32() {
-; CHECK: test_zextloadi32
+; CHECK-LABEL: test_zextloadi32
 
 ; It's not actually necessary that "ret" is next, but as far as LLVM
 ; is concerned only 0 or 1 should be loadable so no extension is
 ; necessary.
   %val = load i1* @var
   %ret = zext i1 %val to i32
-; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var]
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var]
 
   ret i32 %ret
 ; CHECK-NEXT: ret
 }
 
 define i64 @test_zextloadi64() {
-; CHECK: test_zextloadi64
+; CHECK-LABEL: test_zextloadi64
 
 ; It's not actually necessary that "ret" is next, but as far as LLVM
 ; is concerned only 0 or 1 should be loadable so no extension is
 ; necessary.
   %val = load i1* @var
   %ret = zext i1 %val to i64
-; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var]
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var]
 
   ret i64 %ret
 ; CHECK-NEXT: ret
diff --git a/test/CodeGen/AArch64/breg.ll b/test/CodeGen/AArch64/breg.ll
index 1ed5b9b..591f483 100644
--- a/test/CodeGen/AArch64/breg.ll
+++ b/test/CodeGen/AArch64/breg.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s
 
 @stored_label = global i8* null
 
@@ -7,7 +7,7 @@ define void @foo() {
   %lab = load i8** @stored_label
   indirectbr i8* %lab, [label  %otherlab, label %retlab]
 ; CHECK: adrp {{x[0-9]+}}, stored_label
-; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:stored_label]
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:stored_label]
 ; CHECK: br {{x[0-9]+}}
 
 otherlab:
diff --git a/test/CodeGen/AArch64/callee-save.ll b/test/CodeGen/AArch64/callee-save.ll
index 52243b0..046e6ce 100644
--- a/test/CodeGen/AArch64/callee-save.ll
+++ b/test/CodeGen/AArch64/callee-save.ll
@@ -1,14 +1,14 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s
 
 @var = global float 0.0
 
 define void @foo() {
 ; CHECK-LABEL: foo:
 
-; CHECK: stp d14, d15, [sp
-; CHECK: stp d12, d13, [sp
-; CHECK: stp d10, d11, [sp
-; CHECK: stp d8, d9, [sp
+; CHECK: stp d15, d14, [sp
+; CHECK: stp d13, d12, [sp
+; CHECK: stp d11, d10, [sp
+; CHECK: stp d9, d8, [sp
 
   ; Create lots of live variables to exhaust the supply of
   ; caller-saved registers
@@ -78,9 +78,9 @@ define void @foo() {
   store volatile float %val31, float* @var
   store volatile float %val32, float* @var
 
-; CHECK: ldp     d8, d9, [sp
-; CHECK: ldp     d10, d11, [sp
-; CHECK: ldp     d12, d13, [sp
-; CHECK: ldp     d14, d15, [sp
+; CHECK: ldp     d9, d8, [sp
+; CHECK: ldp     d11, d10, [sp
+; CHECK: ldp     d13, d12, [sp
+; CHECK: ldp     d15, d14, [sp
   ret void
 }
diff --git a/test/CodeGen/AArch64/code-model-large-abs.ll b/test/CodeGen/AArch64/code-model-large-abs.ll
index b387f28..ca92500 100644
--- a/test/CodeGen/AArch64/code-model-large-abs.ll
+++ b/test/CodeGen/AArch64/code-model-large-abs.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -code-model=large < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -code-model=large -o - %s | FileCheck %s
 
 @var8 = global i8 0
 @var16 = global i16 0
diff --git a/test/CodeGen/AArch64/compare-branch.ll b/test/CodeGen/AArch64/compare-branch.ll
index 75efd9d..a1a87cf 100644
--- a/test/CodeGen/AArch64/compare-branch.ll
+++ b/test/CodeGen/AArch64/compare-branch.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s
 
 @var32 = global i32 0
 @var64 = global i64 0
diff --git a/test/CodeGen/AArch64/concatvector-bugs.ll b/test/CodeGen/AArch64/concatvector-bugs.ll
deleted file mode 100644
index 5889e22..0000000
--- a/test/CodeGen/AArch64/concatvector-bugs.ll
+++ /dev/null
@@ -1,68 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon
-; Bug: i8 type in FRP8 register but not registering with register class causes segmentation fault.
-; Fix: Removed i8 type from FPR8 register class.
-
-define void @test_concatvector_v8i8() {
-entry.split:
-  br i1 undef, label %if.then, label %if.end
-
-if.then:                                          ; preds = %entry.split
-  unreachable
-
-if.end:                                           ; preds = %entry.split
-  br i1 undef, label %if.then9, label %if.end18
-
-if.then9:                                         ; preds = %if.end
-  unreachable
-
-if.end18:                                         ; preds = %if.end
-  br label %for.body
-
-for.body:                                         ; preds = %for.inc, %if.end18
-  br i1 false, label %if.then30, label %for.inc
-
-if.then30:                                        ; preds = %for.body
-  unreachable
-
-for.inc:                                          ; preds = %for.body
-  br i1 undef, label %for.end, label %for.body
-
-for.end:                                          ; preds = %for.inc
-  br label %for.body77
-
-for.body77:                                       ; preds = %for.body77, %for.end
-  br i1 undef, label %for.end106, label %for.body77
-
-for.end106:                                       ; preds = %for.body77
-  br i1 undef, label %for.body130.us.us, label %stmt.for.body130.us.us
-
-stmt.for.body130.us.us:                     ; preds = %stmt.for.body130.us.us, %for.end106
-  %_p_splat.us = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <8 x i32> zeroinitializer
-  store <8 x i8> %_p_splat.us, <8 x i8>* undef, align 1
-  br label %stmt.for.body130.us.us
-
-for.body130.us.us:                                ; preds = %for.body130.us.us, %for.end106
-  br label %for.body130.us.us
-}
-
-declare <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32>, i32)
-
-define <8 x i16> @test_splat(i32 %l) nounwind {
-; CHECK-LABEL: test_splat:
-; CHECK: ret
-  %lhs = insertelement <1 x i32> undef, i32 %l, i32 0
-  %shift = tail call <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32> %lhs, i32 11)
-  %vec = shufflevector <1 x i16> %shift, <1 x i16> undef, <8 x i32> zeroinitializer
-  ret <8 x i16> %vec
-}
-
-
-define <8 x i16> @test_notsplat(<8 x i16> %a, <8 x i16> %b, i32 %l) nounwind {
-; CHECK-LABEL: test_notsplat:
-; CHECK: ret
-entry:
-  %lhs = insertelement <1 x i32> undef, i32 %l, i32 0
-  %shift = tail call <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32> %lhs, i32 11)
-  %vec = shufflevector <1 x i16> %shift, <1 x i16> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 0, i32 0, i32 0>
-  ret <8 x i16> %vec
-}
diff --git a/test/CodeGen/AArch64/cond-sel.ll b/test/CodeGen/AArch64/cond-sel.ll
index 9c1dfeb..5f81cba 100644
--- a/test/CodeGen/AArch64/cond-sel.ll
+++ b/test/CodeGen/AArch64/cond-sel.ll
@@ -1,25 +1,25 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mcpu=cyclone | FileCheck %s --check-prefix=CHECK
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
 
 @var32 = global i32 0
 @var64 = global i64 0
 
-define void @test_csel(i32 %lhs32, i32 %rhs32, i64 %lhs64) {
+define void @test_csel(i32 %lhs32, i32 %rhs32, i64 %lhs64) minsize {
 ; CHECK-LABEL: test_csel:
 
   %tst1 = icmp ugt i32 %lhs32, %rhs32
   %val1 = select i1 %tst1, i32 42, i32 52
   store i32 %val1, i32* @var32
-; CHECK-DAG: movz [[W52:w[0-9]+]], #52
-; CHECK-DAG: movz [[W42:w[0-9]+]], #42
+; CHECK-DAG: movz [[W52:w[0-9]+]], #{{52|0x34}}
+; CHECK-DAG: movz [[W42:w[0-9]+]], #{{42|0x2a}}
 ; CHECK: csel {{w[0-9]+}}, [[W42]], [[W52]], hi
 
   %rhs64 = sext i32 %rhs32 to i64
   %tst2 = icmp sle i64 %lhs64, %rhs64
   %val2 = select i1 %tst2, i64 %lhs64, i64 %rhs64
   store i64 %val2, i64* @var64
-; CHECK-DAG: cmp [[LHS:x[0-9]+]], [[RHS:w[0-9]+]], sxtw
-; CHECK-DAG: sxtw [[EXT_RHS:x[0-9]+]], [[RHS]]
+; CHECK: sxtw [[EXT_RHS:x[0-9]+]], {{[wx]}}[[RHS:[0-9]+]]
+; CHECK: cmp [[LHS:x[0-9]+]], w[[RHS]], sxtw
 ; CHECK: csel {{x[0-9]+}}, [[LHS]], [[EXT_RHS]], le
 
   ret void
@@ -34,8 +34,8 @@ define void @test_floatcsel(float %lhs32, float %rhs32, double %lhs64, double %r
 ; CHECK-NOFP-NOT: fcmp
   %val1 = select i1 %tst1, i32 42, i32 52
   store i32 %val1, i32* @var32
-; CHECK: movz [[W52:w[0-9]+]], #52
-; CHECK: movz [[W42:w[0-9]+]], #42
+; CHECK: movz [[W52:w[0-9]+]], #{{52|0x34}}
+; CHECK: movz [[W42:w[0-9]+]], #{{42|0x2a}}
 ; CHECK: csel [[MAYBETRUE:w[0-9]+]], [[W42]], [[W52]], mi
 ; CHECK: csel {{w[0-9]+}}, [[W42]], [[MAYBETRUE]], gt
 
@@ -45,17 +45,17 @@ define void @test_floatcsel(float %lhs32, float %rhs32, double %lhs64, double %r
 ; CHECK-NOFP-NOT: fcmp
   %val2 = select i1 %tst2, i64 9, i64 15
   store i64 %val2, i64* @var64
-; CHECK: movz [[CONST15:x[0-9]+]], #15
-; CHECK: movz [[CONST9:x[0-9]+]], #9
-; CHECK: csel [[MAYBETRUE:x[0-9]+]], [[CONST9]], [[CONST15]], eq
-; CHECK: csel {{x[0-9]+}}, [[CONST9]], [[MAYBETRUE]], vs
+; CHECK: orr w[[CONST15:[0-9]+]], wzr, #0xf
+; CHECK: movz {{[wx]}}[[CONST9:[0-9]+]], #{{9|0x9}}
+; CHECK: csel [[MAYBETRUE:x[0-9]+]], x[[CONST9]], x[[CONST15]], eq
+; CHECK: csel {{x[0-9]+}}, x[[CONST9]], [[MAYBETRUE]], vs
 
   ret void
 ; CHECK: ret
 }
 
 
-define void @test_csinc(i32 %lhs32, i32 %rhs32, i64 %lhs64) {
+define void @test_csinc(i32 %lhs32, i32 %rhs32, i64 %lhs64) minsize {
 ; CHECK-LABEL: test_csinc:
 
 ; Note that commuting rhs and lhs in the select changes ugt to ule (i.e. hi to ls).
@@ -95,7 +95,7 @@ define void @test_csinc(i32 %lhs32, i32 %rhs32, i64 %lhs64) {
 ; CHECK: ret
 }
 
-define void @test_csinv(i32 %lhs32, i32 %rhs32, i64 %lhs64) {
+define void @test_csinv(i32 %lhs32, i32 %rhs32, i64 %lhs64) minsize {
 ; CHECK-LABEL: test_csinv:
 
 ; Note that commuting rhs and lhs in the select changes ugt to ule (i.e. hi to ls).
@@ -135,7 +135,7 @@ define void @test_csinv(i32 %lhs32, i32 %rhs32, i64 %lhs64) {
 ; CHECK: ret
 }
 
-define void @test_csneg(i32 %lhs32, i32 %rhs32, i64 %lhs64) {
+define void @test_csneg(i32 %lhs32, i32 %rhs32, i64 %lhs64) minsize {
 ; CHECK-LABEL: test_csneg:
 
 ; Note that commuting rhs and lhs in the select changes ugt to ule (i.e. hi to ls).
@@ -184,13 +184,13 @@ define void @test_cset(i32 %lhs, i32 %rhs, i64 %lhs64) {
   %val1 = zext i1 %tst1 to i32
   store i32 %val1, i32* @var32
 ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: csinc {{w[0-9]+}}, wzr, wzr, ne
+; CHECK: cset {{w[0-9]+}}, eq
 
   %rhs64 = sext i32 %rhs to i64
   %tst2 = icmp ule i64 %lhs64, %rhs64
   %val2 = zext i1 %tst2 to i64
   store i64 %val2, i64* @var64
-; CHECK: csinc {{w[0-9]+}}, wzr, wzr, hi
+; CHECK: cset {{w[0-9]+}}, ls
 
   ret void
 ; CHECK: ret
@@ -203,13 +203,13 @@ define void @test_csetm(i32 %lhs, i32 %rhs, i64 %lhs64) {
   %val1 = sext i1 %tst1 to i32
   store i32 %val1, i32* @var32
 ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: csinv {{w[0-9]+}}, wzr, wzr, ne
+; CHECK: csetm {{w[0-9]+}}, eq
 
   %rhs64 = sext i32 %rhs to i64
   %tst2 = icmp ule i64 %lhs64, %rhs64
   %val2 = sext i1 %tst2 to i64
   store i64 %val2, i64* @var64
-; CHECK: csinv {{x[0-9]+}}, xzr, xzr, hi
+; CHECK: csetm {{x[0-9]+}}, ls
 
   ret void
 ; CHECK: ret
diff --git a/test/CodeGen/AArch64/cpus.ll b/test/CodeGen/AArch64/cpus.ll
index f0b60f0..f0f36bd 100644
--- a/test/CodeGen/AArch64/cpus.ll
+++ b/test/CodeGen/AArch64/cpus.ll
@@ -1,9 +1,10 @@
 ; This tests that llc accepts all valid AArch64 CPUs
 
-; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mcpu=generic 2>&1 | FileCheck %s
-; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mcpu=cortex-a53 2>&1 | FileCheck %s
-; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mcpu=cortex-a57 2>&1 | FileCheck %s
-; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mcpu=invalidcpu 2>&1 | FileCheck %s --check-prefix=INVALID
+
+; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=generic 2>&1 | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=cortex-a53 2>&1 | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=cortex-a57 2>&1 | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=invalidcpu 2>&1 | FileCheck %s --check-prefix=INVALID
 
 ; CHECK-NOT: {{.*}}  is not a recognized processor for this target
 ; INVALID: {{.*}}  is not a recognized processor for this target
diff --git a/test/CodeGen/AArch64/directcond.ll b/test/CodeGen/AArch64/directcond.ll
index 12c7b6a..1b51928 100644
--- a/test/CodeGen/AArch64/directcond.ll
+++ b/test/CodeGen/AArch64/directcond.ll
@@ -1,11 +1,10 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s --check-prefix=CHECK
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
 
 define i32 @test_select_i32(i1 %bit, i32 %a, i32 %b) {
 ; CHECK-LABEL: test_select_i32:
   %val = select i1 %bit, i32 %a, i32 %b
-; CHECK: movz [[ONE:w[0-9]+]], #1
-; CHECK: tst w0, [[ONE]]
+; CHECK: tst w0, #0x1
 ; CHECK-NEXT: csel w0, w1, w2, ne
 
   ret i32 %val
@@ -14,8 +13,7 @@ define i32 @test_select_i32(i1 %bit, i32 %a, i32 %b) {
 define i64 @test_select_i64(i1 %bit, i64 %a, i64 %b) {
 ; CHECK-LABEL: test_select_i64:
   %val = select i1 %bit, i64 %a, i64 %b
-; CHECK: movz [[ONE:w[0-9]+]], #1
-; CHECK: tst w0, [[ONE]]
+; CHECK: tst w0, #0x1
 ; CHECK-NEXT: csel x0, x1, x2, ne
 
   ret i64 %val
@@ -24,8 +22,7 @@ define i64 @test_select_i64(i1 %bit, i64 %a, i64 %b) {
 define float @test_select_float(i1 %bit, float %a, float %b) {
 ; CHECK-LABEL: test_select_float:
   %val = select i1 %bit, float %a, float %b
-; CHECK: movz [[ONE:w[0-9]+]], #1
-; CHECK: tst w0, [[ONE]]
+; CHECK: tst w0, #0x1
 ; CHECK-NEXT: fcsel s0, s0, s1, ne
 ; CHECK-NOFP-NOT: fcsel
   ret float %val
@@ -34,8 +31,7 @@ define float @test_select_float(i1 %bit, float %a, float %b) {
 define double @test_select_double(i1 %bit, double %a, double %b) {
 ; CHECK-LABEL: test_select_double:
   %val = select i1 %bit, double %a, double %b
-; CHECK: movz [[ONE:w[0-9]+]], #1
-; CHECK: tst w0, [[ONE]]
+; CHECK: tst w0, #0x1
 ; CHECK-NEXT: fcsel d0, d0, d1, ne
 ; CHECK-NOFP-NOT: fcsel
 
@@ -45,7 +41,7 @@ define double @test_select_double(i1 %bit, double %a, double %b) {
 define i32 @test_brcond(i1 %bit) {
 ; CHECK-LABEL: test_brcond:
   br i1 %bit, label %true, label %false
-; CHECK: tbz {{w[0-9]+}}, #0, .LBB
+; CHECK: tbz {{w[0-9]+}}, #0, {{.?LBB}}
 
 true:
   ret i32 0
@@ -57,7 +53,7 @@ define i1 @test_setcc_float(float %lhs, float %rhs) {
 ; CHECK: test_setcc_float
   %val = fcmp oeq float %lhs, %rhs
 ; CHECK: fcmp s0, s1
-; CHECK: csinc w0, wzr, wzr, ne
+; CHECK: cset w0, eq
 ; CHECK-NOFP-NOT: fcmp
   ret i1 %val
 }
@@ -66,7 +62,7 @@ define i1 @test_setcc_double(double %lhs, double %rhs) {
 ; CHECK: test_setcc_double
   %val = fcmp oeq double %lhs, %rhs
 ; CHECK: fcmp d0, d1
-; CHECK: csinc w0, wzr, wzr, ne
+; CHECK: cset w0, eq
 ; CHECK-NOFP-NOT: fcmp
   ret i1 %val
 }
@@ -75,7 +71,7 @@ define i1 @test_setcc_i32(i32 %lhs, i32 %rhs) {
 ; CHECK: test_setcc_i32
   %val = icmp ugt i32 %lhs, %rhs
 ; CHECK: cmp w0, w1
-; CHECK: csinc w0, wzr, wzr, ls
+; CHECK: cset w0, hi
   ret i1 %val
 }
 
@@ -83,6 +79,6 @@ define i1 @test_setcc_i64(i64 %lhs, i64 %rhs) {
 ; CHECK: test_setcc_i64
   %val = icmp ne i64 %lhs, %rhs
 ; CHECK: cmp x0, x1
-; CHECK: csinc w0, wzr, wzr, eq
+; CHECK: cset w0, ne
   ret i1 %val
 }
diff --git a/test/CodeGen/AArch64/dp-3source.ll b/test/CodeGen/AArch64/dp-3source.ll
index 81d9e15..22bd4a8 100644
--- a/test/CodeGen/AArch64/dp-3source.ll
+++ b/test/CodeGen/AArch64/dp-3source.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s
 
 define i32 @test_madd32(i32 %val0, i32 %val1, i32 %val2) {
 ; CHECK-LABEL: test_madd32:
diff --git a/test/CodeGen/AArch64/dp1.ll b/test/CodeGen/AArch64/dp1.ll
index 6a8d55c..662b415 100644
--- a/test/CodeGen/AArch64/dp1.ll
+++ b/test/CodeGen/AArch64/dp1.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s
 
 @var32 = global i32 0
 @var64 = global i64 0
diff --git a/test/CodeGen/AArch64/dp2.ll b/test/CodeGen/AArch64/dp2.ll
index 48b0701..71b3169 100644
--- a/test/CodeGen/AArch64/dp2.ll
+++ b/test/CodeGen/AArch64/dp2.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64 | FileCheck %s
 
 @var32_0 = global i32 0
 @var32_1 = global i32 0
@@ -13,7 +13,7 @@ define void @rorv_i64() {
     %val3_tmp = shl i64 %val0_tmp, %val2_tmp
     %val4_tmp = lshr i64 %val0_tmp, %val1_tmp
     %val5_tmp = or i64 %val3_tmp, %val4_tmp
-; CHECK: ror	{{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+; CHECK: {{ror|rorv}} {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
     store volatile i64 %val5_tmp, i64* @var64_0
     ret void
 }
@@ -23,7 +23,7 @@ define void @asrv_i64() {
     %val0_tmp = load i64* @var64_0
     %val1_tmp = load i64* @var64_1
     %val4_tmp = ashr i64 %val0_tmp, %val1_tmp
-; CHECK: asr	{{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+; CHECK: {{asr|asrv}} {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
     store volatile i64 %val4_tmp, i64* @var64_1
     ret void
 }
@@ -33,7 +33,7 @@ define void @lsrv_i64() {
     %val0_tmp = load i64* @var64_0
     %val1_tmp = load i64* @var64_1
     %val4_tmp = lshr i64 %val0_tmp, %val1_tmp
-; CHECK: lsr	{{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+; CHECK: {{lsr|lsrv}} {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
     store volatile i64 %val4_tmp, i64* @var64_0
     ret void
 }
@@ -43,7 +43,7 @@ define void @lslv_i64() {
     %val0_tmp = load i64* @var64_0
     %val1_tmp = load i64* @var64_1
     %val4_tmp = shl i64 %val0_tmp, %val1_tmp
-; CHECK: lsl	{{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+; CHECK: {{lsl|lslv}} {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
     store volatile i64 %val4_tmp, i64* @var64_1
     ret void
 }
@@ -75,7 +75,7 @@ define void @lsrv_i32() {
     %val1_tmp = load i32* @var32_1
     %val2_tmp = add i32 1, %val1_tmp
     %val4_tmp = lshr i32 %val0_tmp, %val2_tmp
-; CHECK: lsr	{{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: {{lsr|lsrv}} {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
     store volatile i32 %val4_tmp, i32* @var32_0
     ret void
 }
@@ -86,7 +86,7 @@ define void @lslv_i32() {
     %val1_tmp = load i32* @var32_1
     %val2_tmp = add i32 1, %val1_tmp
     %val4_tmp = shl i32 %val0_tmp, %val2_tmp
-; CHECK: lsl	{{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: {{lsl|lslv}} {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
     store volatile i32 %val4_tmp, i32* @var32_1
     ret void
 }
@@ -100,7 +100,7 @@ define void @rorv_i32() {
     %val3_tmp = shl i32 %val0_tmp, %val2_tmp
     %val4_tmp = lshr i32 %val0_tmp, %val1_tmp
     %val5_tmp = or i32 %val3_tmp, %val4_tmp
-; CHECK: ror	{{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: {{ror|rorv}} {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
     store volatile i32 %val5_tmp, i32* @var32_0
     ret void
 }
@@ -111,7 +111,7 @@ define void @asrv_i32() {
     %val1_tmp = load i32* @var32_1
     %val2_tmp = add i32 1, %val1_tmp
     %val4_tmp = ashr i32 %val0_tmp, %val2_tmp
-; CHECK: asr	{{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: {{asr|asrv}} {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
     store volatile i32 %val4_tmp, i32* @var32_1
     ret void
 }
@@ -143,7 +143,7 @@ define i32 @test_lsl32() {
 
   %val = load i32* @var32_0
   %ret = shl i32 1, %val
-; CHECK: lsl {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: {{lsl|lslv}} {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
 
   ret i32 %ret
 }
@@ -153,7 +153,7 @@ define i32 @test_lsr32() {
 
   %val = load i32* @var32_0
   %ret = lshr i32 1, %val
-; CHECK: lsr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: {{lsr|lsrv}} {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
 
   ret i32 %ret
 }
@@ -163,7 +163,7 @@ define i32 @test_asr32(i32 %in) {
 
   %val = load i32* @var32_0
   %ret = ashr i32 %in, %val
-; CHECK: asr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: {{asr|asrv}} {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
 
   ret i32 %ret
 }
diff --git a/test/CodeGen/AArch64/eliminate-trunc.ll b/test/CodeGen/AArch64/eliminate-trunc.ll
new file mode 100644
index 0000000..ea86a08
--- /dev/null
+++ b/test/CodeGen/AArch64/eliminate-trunc.ll
@@ -0,0 +1,39 @@
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-apple-ios7.0 -mcpu=cyclone | FileCheck %s
+
+; Check  trunc i64 operation is translated as a subregister access
+; eliminating an i32 induction varible.
+
+; CHECK-NOT: add {{x[0-9]+}}, {{x[0-9]+}}, #1
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, #1
+; CHECK-NEXT: cmp {{w[0-9]+}}, {{w[0-9]+}}
+define void @test1_signed([8 x i8]* nocapture %a, i8* nocapture readonly %box, i8 %limit) minsize {
+entry:
+  %conv = zext i8 %limit to i32
+  %cmp223 = icmp eq i8 %limit, 0
+  br i1 %cmp223, label %for.end15, label %for.body4.lr.ph.us
+
+for.body4.us:
+  %indvars.iv = phi i64 [ 0, %for.body4.lr.ph.us ], [ %indvars.iv.next, %for.body4.us ]
+  %arrayidx6.us = getelementptr inbounds [8 x i8]* %a, i64 %indvars.iv26, i64 %indvars.iv
+  %0 = load i8* %arrayidx6.us, align 1
+  %idxprom7.us = zext i8 %0 to i64
+  %arrayidx8.us = getelementptr inbounds i8* %box, i64 %idxprom7.us
+  %1 = load i8* %arrayidx8.us, align 1
+  store i8 %1, i8* %arrayidx6.us, align 1
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %2 = trunc i64 %indvars.iv.next to i32
+  %cmp2.us = icmp slt i32 %2, %conv
+  br i1 %cmp2.us, label %for.body4.us, label %for.cond1.for.inc13_crit_edge.us
+
+for.body4.lr.ph.us:
+  %indvars.iv26 = phi i64 [ %indvars.iv.next27, %for.cond1.for.inc13_crit_edge.us ], [ 0, %entry ]
+  br label %for.body4.us
+
+for.cond1.for.inc13_crit_edge.us:
+  %indvars.iv.next27 = add nuw nsw i64 %indvars.iv26, 1
+  %exitcond28 = icmp eq i64 %indvars.iv26, 3
+  br i1 %exitcond28, label %for.end15, label %for.body4.lr.ph.us
+
+for.end15:
+  ret void
+}
diff --git a/test/CodeGen/AArch64/extern-weak.ll b/test/CodeGen/AArch64/extern-weak.ll
index 322b3f4..ce5c0f6 100644
--- a/test/CodeGen/AArch64/extern-weak.ll
+++ b/test/CodeGen/AArch64/extern-weak.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -o - < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-none-linux-gnu -code-model=large -o - < %s | FileCheck --check-prefix=CHECK-LARGE %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -code-model=large -o - %s | FileCheck --check-prefix=CHECK-LARGE %s
 
 declare extern_weak i32 @var()
 
@@ -7,10 +7,10 @@ define i32()* @foo() {
 ; The usual ADRP/ADD pair can't be used for a weak reference because it must
 ; evaluate to 0 if the symbol is undefined. We use a litpool entry.
   ret i32()* @var
-; CHECK: .LCPI0_0:
-; CHECK-NEXT: .xword var
 
-; CHECK: ldr x0, [{{x[0-9]+}}, #:lo12:.LCPI0_0]
+
+; CHECK: adrp x[[ADDRHI:[0-9]+]], :got:var
+; CHECK: ldr x0, [x[[ADDRHI]], :got_lo12:var]
 
   ; In the large model, the usual relocations are absolute and can
   ; materialise 0.
@@ -25,27 +25,29 @@ define i32()* @foo() {
 
 define i32* @bar() {
   %addr = getelementptr [10 x i32]* @arr_var, i32 0, i32 5
-; CHECK: .LCPI1_0:
-; CHECK-NEXT: .xword arr_var
 
-; CHECK: ldr [[BASE:x[0-9]+]], [{{x[0-9]+}}, #:lo12:.LCPI1_0]
+
+; CHECK: adrp x[[ADDRHI:[0-9]+]], :got:arr_var
+; CHECK: ldr [[BASE:x[0-9]+]], [x[[ADDRHI]], :got_lo12:arr_var]
 ; CHECK: add x0, [[BASE]], #20
+
   ret i32* %addr
 
   ; In the large model, the usual relocations are absolute and can
   ; materialise 0.
-; CHECK-LARGE: movz x0, #:abs_g3:arr_var
-; CHECK-LARGE: movk x0, #:abs_g2_nc:arr_var
-; CHECK-LARGE: movk x0, #:abs_g1_nc:arr_var
-; CHECK-LARGE: movk x0, #:abs_g0_nc:arr_var
+; CHECK-LARGE: movz [[ADDR:x[0-9]+]], #:abs_g3:arr_var
+; CHECK-LARGE: movk [[ADDR]], #:abs_g2_nc:arr_var
+; CHECK-LARGE: movk [[ADDR]], #:abs_g1_nc:arr_var
+; CHECK-LARGE: movk [[ADDR]], #:abs_g0_nc:arr_var
 }
 
 @defined_weak_var = internal unnamed_addr global i32 0
 
 define i32* @wibble() {
   ret i32* @defined_weak_var
+
 ; CHECK: adrp [[BASE:x[0-9]+]], defined_weak_var
-; CHECK: add x0, [[BASE]], #:lo12:defined_weak_var
+; CHECK: add x0, [[BASE]], :lo12:defined_weak_var
 
 ; CHECK-LARGE: movz x0, #:abs_g3:defined_weak_var
 ; CHECK-LARGE: movk x0, #:abs_g2_nc:defined_weak_var
diff --git a/test/CodeGen/AArch64/extract.ll b/test/CodeGen/AArch64/extract.ll
index 62d9ed2..1fc9387 100644
--- a/test/CodeGen/AArch64/extract.ll
+++ b/test/CodeGen/AArch64/extract.ll
@@ -1,11 +1,11 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s
 
 define i64 @ror_i64(i64 %in) {
 ; CHECK-LABEL: ror_i64:
     %left = shl i64 %in, 19
     %right = lshr i64 %in, 45
     %val5 = or i64 %left, %right
-; CHECK: extr {{x[0-9]+}}, x0, x0, #45
+; CHECK: ror {{x[0-9]+}}, x0, #45
     ret i64 %val5
 }
 
@@ -14,7 +14,7 @@ define i32 @ror_i32(i32 %in) {
     %left = shl i32 %in, 9
     %right = lshr i32 %in, 23
     %val5 = or i32 %left, %right
-; CHECK: extr {{w[0-9]+}}, w0, w0, #23
+; CHECK: ror {{w[0-9]+}}, w0, #23
     ret i32 %val5
 }
 
diff --git a/test/CodeGen/AArch64/fastcc-reserved.ll b/test/CodeGen/AArch64/fastcc-reserved.ll
index c6c0505..a392619 100644
--- a/test/CodeGen/AArch64/fastcc-reserved.ll
+++ b/test/CodeGen/AArch64/fastcc-reserved.ll
@@ -12,8 +12,8 @@ define fastcc void @foo(i32 %in) {
   %addr = alloca i8, i32 %in
 
 ; Normal frame setup stuff:
-; CHECK: sub sp, sp,
-; CHECK: stp x29, x30
+; CHECK: stp     x29, x30, [sp, #-16]!
+; CHECK: mov     x29, sp
 
 ; Reserve space for call-frame:
 ; CHECK: sub sp, sp, #16
@@ -26,8 +26,8 @@ define fastcc void @foo(i32 %in) {
 ; CHECK-NOT: sub sp, sp, #16
 ; CHECK-NOT: add sp, sp,
 
-; CHECK: ldp x29, x30
-; CHECK: add sp, sp,
+; CHECK: mov     sp, x29
+; CHECK: ldp     x29, x30, [sp], #16
   ret void
 }
 
@@ -38,8 +38,8 @@ define void @foo1(i32 %in) {
 
   %addr = alloca i8, i32 %in
 ; Normal frame setup again
-; CHECK: sub sp, sp,
-; CHECK: stp x29, x30
+; CHECK: stp     x29, x30, [sp, #-16]!
+; CHECK: mov     x29, sp
 
 ; Reserve space for call-frame
 ; CHECK: sub sp, sp, #16
@@ -52,7 +52,7 @@ define void @foo1(i32 %in) {
 
 ; Check for epilogue (primarily to make sure sp spotted above wasn't
 ; part of it).
-; CHECK: ldp x29, x30
-; CHECK: add sp, sp,
+; CHECK: mov     sp, x29
+; CHECK: ldp     x29, x30, [sp], #16
   ret void
 }
diff --git a/test/CodeGen/AArch64/fastcc.ll b/test/CodeGen/AArch64/fastcc.ll
index a4cd378..9917fcd 100644
--- a/test/CodeGen/AArch64/fastcc.ll
+++ b/test/CodeGen/AArch64/fastcc.ll
@@ -6,10 +6,13 @@
 
 define fastcc void @func_stack0() {
 ; CHECK-LABEL: func_stack0:
-; CHECK: sub sp, sp, #48
+; CHECK: mov x29, sp
+; CHECK-NEXT: sub sp, sp, #32
 
 ; CHECK-TAIL-LABEL: func_stack0:
-; CHECK-TAIL: sub sp, sp, #48
+; CHECK-TAIL: stp x29, x30, [sp, #-16]!
+; CHECK-TAIL-NEXT: mov x29, sp
+; CHECK-TAIL-NEXT: sub sp, sp, #32
 
 
   call fastcc void @func_stack8([8 x i32] undef, i32 42)
@@ -24,6 +27,7 @@ define fastcc void @func_stack0() {
 ; CHECK: bl func_stack32
 ; CHECK-NOT: sub sp, sp,
 
+
 ; CHECK-TAIL: bl func_stack32
 ; CHECK-TAIL: sub sp, sp, #32
 
@@ -32,30 +36,39 @@ define fastcc void @func_stack0() {
 ; CHECK: bl func_stack0
 ; CHECK-NOT: sub sp, sp
 
+
 ; CHECK-TAIL: bl func_stack0
 ; CHECK-TAIL-NOT: sub sp, sp
 
   ret void
-; CHECK: add sp, sp, #48
+; CHECK: mov sp, x29
+; CHECK-NEXT: ldp     x29, x30, [sp], #16
 ; CHECK-NEXT: ret
 
-; CHECK-TAIL: add sp, sp, #48
-; CHECK-TAIL-NEXT: ret
 
+; CHECK-TAIL: mov sp, x29
+; CHECK-TAIL-NEXT: ldp     x29, x30, [sp], #16
+; CHECK-TAIL-NEXT: ret
 }
 
 define fastcc void @func_stack8([8 x i32], i32 %stacked) {
 ; CHECK-LABEL: func_stack8:
-; CHECK: sub sp, sp, #48
+; CHECK: stp x29, x30, [sp, #-16]!
+; CHECK: mov x29, sp
+; CHECK: sub sp, sp, #32
+
 
 ; CHECK-TAIL-LABEL: func_stack8:
-; CHECK-TAIL: sub sp, sp, #48
+; CHECK-TAIL: stp x29, x30, [sp, #-16]!
+; CHECK-TAIL: mov x29, sp
+; CHECK-TAIL: sub sp, sp, #32
 
 
   call fastcc void @func_stack8([8 x i32] undef, i32 42)
 ; CHECK:  bl func_stack8
 ; CHECK-NOT: sub sp, sp,
 
+
 ; CHECK-TAIL: bl func_stack8
 ; CHECK-TAIL: sub sp, sp, #16
 
@@ -64,6 +77,7 @@ define fastcc void @func_stack8([8 x i32], i32 %stacked) {
 ; CHECK: bl func_stack32
 ; CHECK-NOT: sub sp, sp,
 
+
 ; CHECK-TAIL: bl func_stack32
 ; CHECK-TAIL: sub sp, sp, #32
 
@@ -76,19 +90,22 @@ define fastcc void @func_stack8([8 x i32], i32 %stacked) {
 ; CHECK-TAIL-NOT: sub sp, sp
 
   ret void
-; CHECK: add sp, sp, #48
+; CHECK: mov sp, x29
+; CHECK-NEXT: ldp     x29, x30, [sp], #16
 ; CHECK-NEXT: ret
 
-; CHECK-TAIL: add sp, sp, #64
+
+; CHECK-TAIL: mov sp, x29
+; CHECK-TAIL-NEXT: ldp     x29, x30, [sp], #16
 ; CHECK-TAIL-NEXT: ret
 }
 
 define fastcc void @func_stack32([8 x i32], i128 %stacked0, i128 %stacked1) {
 ; CHECK-LABEL: func_stack32:
-; CHECK: sub sp, sp, #48
+; CHECK: mov x29, sp
 
 ; CHECK-TAIL-LABEL: func_stack32:
-; CHECK-TAIL: sub sp, sp, #48
+; CHECK-TAIL: mov x29, sp
 
 
   call fastcc void @func_stack8([8 x i32] undef, i32 42)
@@ -103,6 +120,7 @@ define fastcc void @func_stack32([8 x i32], i128 %stacked0, i128 %stacked1) {
 ; CHECK: bl func_stack32
 ; CHECK-NOT: sub sp, sp,
 
+
 ; CHECK-TAIL: bl func_stack32
 ; CHECK-TAIL: sub sp, sp, #32
 
@@ -111,13 +129,16 @@ define fastcc void @func_stack32([8 x i32], i128 %stacked0, i128 %stacked1) {
 ; CHECK: bl func_stack0
 ; CHECK-NOT: sub sp, sp
 
+
 ; CHECK-TAIL: bl func_stack0
 ; CHECK-TAIL-NOT: sub sp, sp
 
   ret void
-; CHECK: add sp, sp, #48
+; CHECK: mov sp, x29
+; CHECK-NEXT: ldp     x29, x30, [sp], #16
 ; CHECK-NEXT: ret
 
-; CHECK-TAIL: add sp, sp, #80
+; CHECK-TAIL: mov sp, x29
+; CHECK-TAIL-NEXT: ldp     x29, x30, [sp], #16
 ; CHECK-TAIL-NEXT: ret
 }
diff --git a/test/CodeGen/AArch64/fcmp.ll b/test/CodeGen/AArch64/fcmp.ll
index a9518ea..3c74508 100644
--- a/test/CodeGen/AArch64/fcmp.ll
+++ b/test/CodeGen/AArch64/fcmp.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
 
 declare void @bar(i32)
 
diff --git a/test/CodeGen/AArch64/fcvt-fixed.ll b/test/CodeGen/AArch64/fcvt-fixed.ll
index 9d66da4..ccb3616 100644
--- a/test/CodeGen/AArch64/fcvt-fixed.ll
+++ b/test/CodeGen/AArch64/fcvt-fixed.ll
@@ -1,4 +1,8 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -O0 | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-apple-ios7.0 -O0
+
+; (The O0 test is to make sure FastISel still constrains its operands properly
+; and the verifier doesn't trigger).
 
 @var32 = global i32 0
 @var64 = global i64 0
diff --git a/test/CodeGen/AArch64/fcvt-int.ll b/test/CodeGen/AArch64/fcvt-int.ll
index 97427a7..d549c7e 100644
--- a/test/CodeGen/AArch64/fcvt-int.ll
+++ b/test/CodeGen/AArch64/fcvt-int.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s
 
 define i32 @test_floattoi32(float %in) {
 ; CHECK-LABEL: test_floattoi32:
diff --git a/test/CodeGen/AArch64/flags-multiuse.ll b/test/CodeGen/AArch64/flags-multiuse.ll
index e99c728..c9b0b9f 100644
--- a/test/CodeGen/AArch64/flags-multiuse.ll
+++ b/test/CodeGen/AArch64/flags-multiuse.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
 
 ; LLVM should be able to cope with multiple uses of the same flag-setting
 ; instruction at different points of a routine. Either by rematerializing the
@@ -15,7 +15,7 @@ define i32 @test_multiflag(i32 %n, i32 %m, i32 %o) {
 ; CHECK: cmp [[LHS:w[0-9]+]], [[RHS:w[0-9]+]]
 
   %val = zext i1 %test to i32
-; CHECK: csinc {{[xw][0-9]+}}, {{xzr|wzr}}, {{xzr|wzr}}, eq
+; CHECK: cset {{[xw][0-9]+}}, ne
 
   store i32 %val, i32* @var
 
diff --git a/test/CodeGen/AArch64/floatdp_1source.ll b/test/CodeGen/AArch64/floatdp_1source.ll
index 3d7f8f0..8c02787 100644
--- a/test/CodeGen/AArch64/floatdp_1source.ll
+++ b/test/CodeGen/AArch64/floatdp_1source.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s
 
 @varhalf = global half 0.0
 @varfloat = global float 0.0
diff --git a/test/CodeGen/AArch64/floatdp_2source.ll b/test/CodeGen/AArch64/floatdp_2source.ll
index bb65528..2622717 100644
--- a/test/CodeGen/AArch64/floatdp_2source.ll
+++ b/test/CodeGen/AArch64/floatdp_2source.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu -mcpu=cyclone | FileCheck %s
 
 @varfloat = global float 0.0
 @vardouble = global double 0.0
diff --git a/test/CodeGen/AArch64/fp-cond-sel.ll b/test/CodeGen/AArch64/fp-cond-sel.ll
index 572f42e..b4f4d77 100644
--- a/test/CodeGen/AArch64/fp-cond-sel.ll
+++ b/test/CodeGen/AArch64/fp-cond-sel.ll
@@ -1,25 +1,34 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu -mcpu=cyclone | FileCheck %s --check-prefix=CHECK
 
 @varfloat = global float 0.0
 @vardouble = global double 0.0
 
+declare void @use_float(float)
+declare void @use_double(double)
+
 define void @test_csel(i32 %lhs32, i32 %rhs32, i64 %lhs64) {
 ; CHECK-LABEL: test_csel:
 
   %tst1 = icmp ugt i32 %lhs32, %rhs32
   %val1 = select i1 %tst1, float 0.0, float 1.0
   store float %val1, float* @varfloat
-; CHECK: ldr [[FLT0:s[0-9]+]], [{{x[0-9]+}}, #:lo12:.LCPI
-; CHECK: fmov [[FLT1:s[0-9]+]], #1.0
-; CHECK: fcsel {{s[0-9]+}}, [[FLT0]], [[FLT1]], hi
+; CHECK: movi v[[FLT0:[0-9]+]].2d, #0
+; CHECK: fmov s[[FLT1:[0-9]+]], #1.0
+; CHECK: fcsel {{s[0-9]+}}, s[[FLT0]], s[[FLT1]], hi
 
   %rhs64 = sext i32 %rhs32 to i64
   %tst2 = icmp sle i64 %lhs64, %rhs64
   %val2 = select i1 %tst2, double 1.0, double 0.0
   store double %val2, double* @vardouble
-; CHECK: ldr [[FLT0:d[0-9]+]], [{{x[0-9]+}}, #:lo12:.LCPI
-; CHECK: fmov [[FLT1:d[0-9]+]], #1.0
-; CHECK: fcsel {{d[0-9]+}}, [[FLT1]], [[FLT0]], le
+; FLT0 is reused from above on ARM64.
+; CHECK: fmov d[[FLT1:[0-9]+]], #1.0
+; CHECK: fcsel {{d[0-9]+}}, d[[FLT1]], d[[FLT0]], le
+
+  call void @use_float(float 0.0)
+  call void @use_float(float 1.0)
+
+  call void @use_double(double 0.0)
+  call void @use_double(double 1.0)
 
   ret void
 ; CHECK: ret
diff --git a/test/CodeGen/AArch64/fp-dp3.ll b/test/CodeGen/AArch64/fp-dp3.ll
index 2a6790e..10f88fd 100644
--- a/test/CodeGen/AArch64/fp-dp3.ll
+++ b/test/CodeGen/AArch64/fp-dp3.ll
@@ -1,5 +1,5 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -fp-contract=fast | FileCheck %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s -check-prefix=CHECK-NOFAST
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu -fp-contract=fast | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s -check-prefix=CHECK-NOFAST
 
 declare float @llvm.fma.f32(float, float, float)
 declare double @llvm.fma.f64(double, double, double)
diff --git a/test/CodeGen/AArch64/fp128-folding.ll b/test/CodeGen/AArch64/fp128-folding.ll
index b1c560d..892b19c 100644
--- a/test/CodeGen/AArch64/fp128-folding.ll
+++ b/test/CodeGen/AArch64/fp128-folding.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
 declare void @bar(i8*, i8*, i32*)
 
 ; SelectionDAG used to try to fold some fp128 operations using the ppc128 type,
@@ -12,6 +12,6 @@ define fp128 @test_folding() {
   %fpval = sitofp i32 %val to fp128
   ; If the value is loaded from a constant pool into an fp128, it's been folded
   ; successfully.
-; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, #:lo12:.LCPI
+; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.LCPI
   ret fp128 %fpval
 }
diff --git a/test/CodeGen/AArch64/fp128.ll b/test/CodeGen/AArch64/fp128.ll
deleted file mode 100644
index c312bb1..0000000
--- a/test/CodeGen/AArch64/fp128.ll
+++ /dev/null
@@ -1,279 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
-
-@lhs = global fp128 zeroinitializer
-@rhs = global fp128 zeroinitializer
-
-define fp128 @test_add() {
-; CHECK-LABEL: test_add:
-
-  %lhs = load fp128* @lhs
-  %rhs = load fp128* @rhs
-; CHECK: ldr q0, [{{x[0-9]+}}, #:lo12:lhs]
-; CHECK: ldr q1, [{{x[0-9]+}}, #:lo12:rhs]
-
-  %val = fadd fp128 %lhs, %rhs
-; CHECK: bl __addtf3
-  ret fp128 %val
-}
-
-define fp128 @test_sub() {
-; CHECK-LABEL: test_sub:
-
-  %lhs = load fp128* @lhs
-  %rhs = load fp128* @rhs
-; CHECK: ldr q0, [{{x[0-9]+}}, #:lo12:lhs]
-; CHECK: ldr q1, [{{x[0-9]+}}, #:lo12:rhs]
-
-  %val = fsub fp128 %lhs, %rhs
-; CHECK: bl __subtf3
-  ret fp128 %val
-}
-
-define fp128 @test_mul() {
-; CHECK-LABEL: test_mul:
-
-  %lhs = load fp128* @lhs
-  %rhs = load fp128* @rhs
-; CHECK: ldr q0, [{{x[0-9]+}}, #:lo12:lhs]
-; CHECK: ldr q1, [{{x[0-9]+}}, #:lo12:rhs]
-
-  %val = fmul fp128 %lhs, %rhs
-; CHECK: bl __multf3
-  ret fp128 %val
-}
-
-define fp128 @test_div() {
-; CHECK-LABEL: test_div:
-
-  %lhs = load fp128* @lhs
-  %rhs = load fp128* @rhs
-; CHECK: ldr q0, [{{x[0-9]+}}, #:lo12:lhs]
-; CHECK: ldr q1, [{{x[0-9]+}}, #:lo12:rhs]
-
-  %val = fdiv fp128 %lhs, %rhs
-; CHECK: bl __divtf3
-  ret fp128 %val
-}
-
-@var32 = global i32 0
-@var64 = global i64 0
-
-define void @test_fptosi() {
-; CHECK-LABEL: test_fptosi:
-  %val = load fp128* @lhs
-
-  %val32 = fptosi fp128 %val to i32
-  store i32 %val32, i32* @var32
-; CHECK: bl __fixtfsi
-
-  %val64 = fptosi fp128 %val to i64
-  store i64 %val64, i64* @var64
-; CHECK: bl __fixtfdi
-
-  ret void
-}
-
-define void @test_fptoui() {
-; CHECK-LABEL: test_fptoui:
-  %val = load fp128* @lhs
-
-  %val32 = fptoui fp128 %val to i32
-  store i32 %val32, i32* @var32
-; CHECK: bl __fixunstfsi
-
-  %val64 = fptoui fp128 %val to i64
-  store i64 %val64, i64* @var64
-; CHECK: bl __fixunstfdi
-
-  ret void
-}
-
-define void @test_sitofp() {
-; CHECK-LABEL: test_sitofp:
-
-  %src32 = load i32* @var32
-  %val32 = sitofp i32 %src32 to fp128
-  store volatile fp128 %val32, fp128* @lhs
-; CHECK: bl __floatsitf
-
-  %src64 = load i64* @var64
-  %val64 = sitofp i64 %src64 to fp128
-  store volatile fp128 %val64, fp128* @lhs
-; CHECK: bl __floatditf
-
-  ret void
-}
-
-define void @test_uitofp() {
-; CHECK-LABEL: test_uitofp:
-
-  %src32 = load i32* @var32
-  %val32 = uitofp i32 %src32 to fp128
-  store volatile fp128 %val32, fp128* @lhs
-; CHECK: bl __floatunsitf
-
-  %src64 = load i64* @var64
-  %val64 = uitofp i64 %src64 to fp128
-  store volatile fp128 %val64, fp128* @lhs
-; CHECK: bl __floatunditf
-
-  ret void
-}
-
-define i1 @test_setcc1() {
-; CHECK-LABEL: test_setcc1:
-
-  %lhs = load fp128* @lhs
-  %rhs = load fp128* @rhs
-; CHECK: ldr q0, [{{x[0-9]+}}, #:lo12:lhs]
-; CHECK: ldr q1, [{{x[0-9]+}}, #:lo12:rhs]
-
-; Technically, everything after the call to __letf2 is redundant, but we'll let
-; LLVM have its fun for now.
-  %val = fcmp ole fp128 %lhs, %rhs
-; CHECK: bl __letf2
-; CHECK: cmp w0, #0
-; CHECK: csinc w0, wzr, wzr, gt
-
-  ret i1 %val
-; CHECK: ret
-}
-
-define i1 @test_setcc2() {
-; CHECK-LABEL: test_setcc2:
-
-  %lhs = load fp128* @lhs
-  %rhs = load fp128* @rhs
-; CHECK: ldr q0, [{{x[0-9]+}}, #:lo12:lhs]
-; CHECK: ldr q1, [{{x[0-9]+}}, #:lo12:rhs]
-
-; Technically, everything after the call to __letf2 is redundant, but we'll let
-; LLVM have its fun for now.
-  %val = fcmp ugt fp128 %lhs, %rhs
-; CHECK: bl      __gttf2
-; CHECK: cmp w0, #0
-; CHECK: csinc   [[GT:w[0-9]+]], wzr, wzr, le
-
-; CHECK: bl      __unordtf2
-; CHECK: cmp w0, #0
-; CHECK: csinc   [[UNORDERED:w[0-9]+]], wzr, wzr, eq
-
-; CHECK: orr     w0, [[UNORDERED]], [[GT]]
-
-  ret i1 %val
-; CHECK: ret
-}
-
-define i32 @test_br_cc() {
-; CHECK-LABEL: test_br_cc:
-
-  %lhs = load fp128* @lhs
-  %rhs = load fp128* @rhs
-; CHECK: ldr q0, [{{x[0-9]+}}, #:lo12:lhs]
-; CHECK: ldr q1, [{{x[0-9]+}}, #:lo12:rhs]
-
-  ; olt == !uge, which LLVM unfortunately "optimizes" this to.
-  %cond = fcmp olt fp128 %lhs, %rhs
-; CHECK: bl      __getf2
-; CHECK: cmp w0, #0
-; CHECK: csinc   [[OGE:w[0-9]+]], wzr, wzr, lt
-
-; CHECK: bl      __unordtf2
-; CHECK: cmp w0, #0
-; CHECK: csinc   [[UNORDERED:w[0-9]+]], wzr, wzr, eq
-
-; CHECK: orr     [[UGE:w[0-9]+]], [[UNORDERED]], [[OGE]]
-; CHECK: cbnz [[UGE]], [[RET29:.LBB[0-9]+_[0-9]+]]
-  br i1 %cond, label %iftrue, label %iffalse
-
-iftrue:
-  ret i32 42
-; CHECK-NEXT: BB#
-; CHECK-NEXT: movz x0, #42
-; CHECK-NEXT: b [[REALRET:.LBB[0-9]+_[0-9]+]]
-
-iffalse:
-  ret i32 29
-; CHECK: [[RET29]]:
-; CHECK-NEXT: movz x0, #29
-; CHECK-NEXT: [[REALRET]]:
-; CHECK: ret
-}
-
-define void @test_select(i1 %cond, fp128 %lhs, fp128 %rhs) {
-; CHECK-LABEL: test_select:
-
-  %val = select i1 %cond, fp128 %lhs, fp128 %rhs
-  store fp128 %val, fp128* @lhs
-; CHECK: cmp w0, #0
-; CHECK: str q1, [sp]
-; CHECK-NEXT: b.eq [[IFFALSE:.LBB[0-9]+_[0-9]+]]
-; CHECK-NEXT: BB#
-; CHECK-NEXT: str q0, [sp]
-; CHECK-NEXT: [[IFFALSE]]:
-; CHECK-NEXT: ldr q0, [sp]
-; CHECK: str q0, [{{x[0-9]+}}, #:lo12:lhs]
-  ret void
-; CHECK: ret
-}
-
-@varfloat = global float 0.0
-@vardouble = global double 0.0
-
-define void @test_round() {
-; CHECK-LABEL: test_round:
-
-  %val = load fp128* @lhs
-
-  %float = fptrunc fp128 %val to float
-  store float %float, float* @varfloat
-; CHECK: bl __trunctfsf2
-; CHECK: str s0, [{{x[0-9]+}}, #:lo12:varfloat]
-
-  %double = fptrunc fp128 %val to double
-  store double %double, double* @vardouble
-; CHECK: bl __trunctfdf2
-; CHECK: str d0, [{{x[0-9]+}}, #:lo12:vardouble]
-
-  ret void
-}
-
-define void @test_extend() {
-; CHECK-LABEL: test_extend:
-
-  %val = load fp128* @lhs
-
-  %float = load float* @varfloat
-  %fromfloat = fpext float %float to fp128
-  store volatile fp128 %fromfloat, fp128* @lhs
-; CHECK: bl __extendsftf2
-; CHECK: str q0, [{{x[0-9]+}}, #:lo12:lhs]
-
-  %double = load double* @vardouble
-  %fromdouble = fpext double %double to fp128
-  store volatile fp128 %fromdouble, fp128* @lhs
-; CHECK: bl __extenddftf2
-; CHECK: str q0, [{{x[0-9]+}}, #:lo12:lhs]
-
-  ret void
-; CHECK: ret
-}
-
-define fp128 @test_neg(fp128 %in) {
-; CHECK: [[MINUS0:.LCPI[0-9]+_0]]:
-; Make sure the weird hex constant below *is* -0.0
-; CHECK-NEXT: fp128 -0
-
-; CHECK-LABEL: test_neg:
-
-  ; Could in principle be optimized to fneg which we can't select, this makes
-  ; sure that doesn't happen.
-  %ret = fsub fp128 0xL00000000000000008000000000000000, %in
-; CHECK: str q0, [sp, #-16]
-; CHECK-NEXT: ldr q1, [sp], #16
-; CHECK: ldr q0, [{{x[0-9]+}}, #:lo12:[[MINUS0]]]
-; CHECK: bl __subtf3
-
-  ret fp128 %ret
-; CHECK: ret
-}
diff --git a/test/CodeGen/AArch64/fpimm.ll b/test/CodeGen/AArch64/fpimm.ll
index b8f7169..e59520c 100644
--- a/test/CodeGen/AArch64/fpimm.ll
+++ b/test/CodeGen/AArch64/fpimm.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s
 
 @varf32 = global float 0.0
 @varf64 = global double 0.0
@@ -13,7 +13,7 @@ define void @check_float() {
 
   %newval2 = fadd float %val, 128.0
   store volatile float %newval2, float* @varf32
-; CHECK-DAG: ldr [[HARD:s[0-9]+]], [{{x[0-9]+}}, #:lo12:.LCPI0_0
+; CHECK-DAG: ldr [[HARD:s[0-9]+]], [{{x[0-9]+}}, {{#?}}:lo12:.LCPI0_0
 
 ; CHECK: ret
   ret void
@@ -29,7 +29,7 @@ define void @check_double() {
 
   %newval2 = fadd double %val, 128.0
   store volatile double %newval2, double* @varf64
-; CHECK-DAG: ldr {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:.LCPI1_0
+; CHECK-DAG: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.LCPI1_0
 
 ; CHECK: ret
   ret void
diff --git a/test/CodeGen/AArch64/frameaddr.ll b/test/CodeGen/AArch64/frameaddr.ll
index 182704b..85d95e2 100644
--- a/test/CodeGen/AArch64/frameaddr.ll
+++ b/test/CodeGen/AArch64/frameaddr.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=aarch64-none-linux-gnu  | FileCheck %s
+; RUN: llc -o - %s -mtriple=arm64-apple-ios7.0  | FileCheck %s
 
 define i8* @t() nounwind {
 entry:
@@ -12,7 +12,7 @@ define i8* @t2() nounwind {
 entry:
 ; CHECK-LABEL: t2:
 ; CHECK: ldr x[[reg:[0-9]+]], [x29]
-; CHECK: ldr x[[reg]], [x[[reg]]]
+; CHECK: ldr {{x[0-9]+}}, [x[[reg]]]
 	%0 = call i8* @llvm.frameaddress(i32 2)
         ret i8* %0
 }
diff --git a/test/CodeGen/AArch64/free-zext.ll b/test/CodeGen/AArch64/free-zext.ll
new file mode 100644
index 0000000..d69105e
--- /dev/null
+++ b/test/CodeGen/AArch64/free-zext.ll
@@ -0,0 +1,14 @@
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s
+
+define i64 @test_free_zext(i8* %a, i16* %b) {
+; CHECK-LABEL: test_free_zext
+; CHECK-DAG: ldrb w[[A:[0-9]+]], [x0]
+; CHECK: ldrh w[[B:[0-9]+]], [x1]
+; CHECK: add x0, x[[B]], x[[A]]
+  %1 = load i8* %a, align 1
+  %conv = zext i8 %1 to i64
+  %2 = load i16* %b, align 2
+  %conv1 = zext i16 %2 to i64
+  %add = add nsw i64 %conv1, %conv
+  ret i64 %add
+}
diff --git a/test/CodeGen/AArch64/func-argpassing.ll b/test/CodeGen/AArch64/func-argpassing.ll
index f307686..abb732c 100644
--- a/test/CodeGen/AArch64/func-argpassing.ll
+++ b/test/CodeGen/AArch64/func-argpassing.ll
@@ -1,7 +1,5 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-LE %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck --check-prefix=CHECK %s
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
 
 %myStruct = type { i64 , i8, i32 }
 
@@ -18,7 +16,7 @@ define void @take_i8s(i8 %val1, i8 %val2) {
     store i8 %val2, i8* @var8
     ; Not using w1 may be technically allowed, but it would indicate a
     ; problem in itself.
-;  CHECK: strb w1, [{{x[0-9]+}}, #:lo12:var8]
+;  CHECK: strb w1, [{{x[0-9]+}}, {{#?}}:lo12:var8]
     ret void
 }
 
@@ -28,7 +26,7 @@ define void @add_floats(float %val1, float %val2) {
 ; CHECK: fadd [[ADDRES:s[0-9]+]], s0, s1
 ; CHECK-NOFP-NOT: fadd
     store float %newval, float* @varfloat
-; CHECK: str [[ADDRES]], [{{x[0-9]+}}, #:lo12:varfloat]
+; CHECK: str [[ADDRES]], [{{x[0-9]+}}, {{#?}}:lo12:varfloat]
     ret void
 }
 
@@ -43,12 +41,12 @@ define void @take_struct(%myStruct* byval %structval) {
     ; Some weird move means x0 is used for one access
 ; CHECK: ldr [[REG32:w[0-9]+]], [{{x[0-9]+|sp}}, #12]
     store volatile i32 %val0, i32* @var32
-; CHECK: str [[REG32]], [{{x[0-9]+}}, #:lo12:var32]
+; CHECK: str [[REG32]], [{{x[0-9]+}}, {{#?}}:lo12:var32]
 
     %val1 = load volatile i64* %addr1
 ; CHECK: ldr [[REG64:x[0-9]+]], [{{x[0-9]+|sp}}]
     store volatile i64 %val1, i64* @var64
-; CHECK: str [[REG64]], [{{x[0-9]+}}, #:lo12:var64]
+; CHECK: str [[REG64]], [{{x[0-9]+}}, {{#?}}:lo12:var64]
 
     ret void
 }
@@ -62,15 +60,14 @@ define void @check_byval_align(i32* byval %ignore, %myStruct* byval align 16 %st
 
     %val0 = load volatile i32* %addr0
     ; Some weird move means x0 is used for one access
-; CHECK: add x[[STRUCTVAL_ADDR:[0-9]+]], sp, #16
-; CHECK: ldr [[REG32:w[0-9]+]], [x[[STRUCTVAL_ADDR]], #12]
+; CHECK: ldr [[REG32:w[0-9]+]], [sp, #28]
     store i32 %val0, i32* @var32
-; CHECK: str [[REG32]], [{{x[0-9]+}}, #:lo12:var32]
+; CHECK: str [[REG32]], [{{x[0-9]+}}, {{#?}}:lo12:var32]
 
     %val1 = load volatile i64* %addr1
 ; CHECK: ldr [[REG64:x[0-9]+]], [sp, #16]
     store i64 %val1, i64* @var64
-; CHECK: str [[REG64]], [{{x[0-9]+}}, #:lo12:var64]
+; CHECK: str [[REG64]], [{{x[0-9]+}}, {{#?}}:lo12:var64]
 
     ret void
 }
@@ -79,7 +76,7 @@ define i32 @return_int() {
 ; CHECK-LABEL: return_int:
     %val = load i32* @var32
     ret i32 %val
-; CHECK: ldr w0, [{{x[0-9]+}}, #:lo12:var32]
+; CHECK: ldr w0, [{{x[0-9]+}}, {{#?}}:lo12:var32]
     ; Make sure epilogue follows
 ; CHECK-NEXT: ret
 }
@@ -87,7 +84,7 @@ define i32 @return_int() {
 define double @return_double() {
 ; CHECK-LABEL: return_double:
     ret double 3.14
-; CHECK: ldr d0, [{{x[0-9]+}}, #:lo12:.LCPI
+; CHECK: ldr d0, [{{x[0-9]+}}, {{#?}}:lo12:.LCPI
 ; CHECK-NOFP-NOT: ldr d0,
 }
 
@@ -99,10 +96,10 @@ define [2 x i64] @return_struct() {
     %addr = bitcast %myStruct* @varstruct to [2 x i64]*
     %val = load [2 x i64]* %addr
     ret [2 x i64] %val
-; CHECK: ldr x0, [{{x[0-9]+}}, #:lo12:varstruct]
+; CHECK-DAG: ldr x0, [{{x[0-9]+}}, {{#?}}:lo12:varstruct]
     ; Odd register regex below disallows x0 which we want to be live now.
-; CHECK: add {{x[1-9][0-9]*}}, {{x[1-9][0-9]*}}, #:lo12:varstruct
-; CHECK-NEXT: ldr x1, [{{x[1-9][0-9]*}}, #8]
+; CHECK-DAG: add {{x[1-9][0-9]*}}, {{x[1-9][0-9]*}}, {{#?}}:lo12:varstruct
+; CHECK: ldr x1, [{{x[1-9][0-9]*}}, #8]
     ; Make sure epilogue immediately follows
 ; CHECK-NEXT: ret
 }
@@ -139,17 +136,16 @@ define i32 @struct_on_stack(i8 %var0, i16 %var1, i32 %var2, i64 %var3, i128 %var
     store volatile i64 %val64, i64* @var64
     ; Currently nothing on local stack, so struct should be at sp
 ; CHECK: ldr [[VAL64:x[0-9]+]], [sp]
-; CHECK: str [[VAL64]], [{{x[0-9]+}}, #:lo12:var64]
+; CHECK: str [[VAL64]], [{{x[0-9]+}}, {{#?}}:lo12:var64]
 
     store volatile double %notstacked, double* @vardouble
 ; CHECK-NOT: ldr d0
-; CHECK: str d0, [{{x[0-9]+}}, #:lo12:vardouble
+; CHECK: str d0, [{{x[0-9]+}}, {{#?}}:lo12:vardouble
 ; CHECK-NOFP-NOT: str d0,
 
     %retval = load volatile i32* %stacked
     ret i32 %retval
 ; CHECK-LE: ldr w0, [sp, #16]
-; CHECK-BE: ldr w0, [sp, #20]
 }
 
 define void @stacked_fpu(float %var0, double %var1, float %var2, float %var3,
@@ -159,36 +155,36 @@ define void @stacked_fpu(float %var0, double %var1, float %var2, float %var3,
     store float %var8, float* @varfloat
     ; Beware as above: the offset would be different on big-endian
     ; machines if the first ldr were changed to use s-registers.
-; CHECK: ldr d[[VALFLOAT:[0-9]+]], [sp]
-; CHECK: str s[[VALFLOAT]], [{{x[0-9]+}}, #:lo12:varfloat]
+; CHECK: ldr {{[ds]}}[[VALFLOAT:[0-9]+]], [sp]
+; CHECK: str s[[VALFLOAT]], [{{x[0-9]+}}, {{#?}}:lo12:varfloat]
 
     ret void
 }
 
 ; 128-bit integer types should be passed in xEVEN, xODD rather than
 ; the reverse. In this case x2 and x3. Nothing should use x1.
-define i32 @check_i128_regalign(i32 %val0, i128 %val1, i32 %val2) {
-; CHECK: check_i128_regalign
+define i64 @check_i128_regalign(i32 %val0, i128 %val1, i64 %val2) {
+; CHECK-LABEL: check_i128_regalign
     store i128 %val1, i128* @var128
-; CHECK: str x2, [{{x[0-9]+}}, #:lo12:var128]
-; CHECK: str x3, [{{x[0-9]+}}, #8]
+; CHECK-DAG: str x2, [{{x[0-9]+}}, {{#?}}:lo12:var128]
+; CHECK-DAG: str x3, [{{x[0-9]+}}, #8]
 
-    ret i32 %val2
+    ret i64 %val2
 ; CHECK: mov x0, x4
 }
 
 define void @check_i128_stackalign(i32 %val0, i32 %val1, i32 %val2, i32 %val3,
                                    i32 %val4, i32 %val5, i32 %val6, i32 %val7,
                                    i32 %stack1, i128 %stack2) {
-; CHECK: check_i128_stackalign
+; CHECK-LABEL: check_i128_stackalign
     store i128 %stack2, i128* @var128
     ; Nothing local on stack in current codegen, so first stack is 16 away
 ; CHECK-LE: add     x[[REG:[0-9]+]], sp, #16
 ; CHECK-LE: ldr {{x[0-9]+}}, [x[[REG]], #8]
-; CHECK-BE: ldr {{x[0-9]+}}, [sp, #24]
 
     ; Important point is that we address sp+24 for second dword
-; CHECK: ldr     {{x[0-9]+}}, [sp, #16]
+
+; CHECK: ldp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
     ret void
 }
 
@@ -200,3 +196,13 @@ define i32 @test_extern() {
 ; CHECK: bl memcpy
   ret i32 0
 }
+
+
+; A sub-i32 stack argument must be loaded on big endian with ldr{h,b}, not just
+; implicitly extended to a 32-bit load.
+define i16 @stacked_i16(i32 %val0, i32 %val1, i32 %val2, i32 %val3,
+                        i32 %val4, i32 %val5, i32 %val6, i32 %val7,
+                        i16 %stack1) {
+; CHECK-LABEL: stacked_i16
+  ret i16 %stack1
+}
diff --git a/test/CodeGen/AArch64/func-calls.ll b/test/CodeGen/AArch64/func-calls.ll
index f029bf2..422c576 100644
--- a/test/CodeGen/AArch64/func-calls.ll
+++ b/test/CodeGen/AArch64/func-calls.ll
@@ -1,7 +1,7 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefix=CHECK
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-neon | FileCheck --check-prefix=CHECK-NONEON %s
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-BE --check-prefix=CHECK-NOFP %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=arm64_be-none-linux-gnu | FileCheck --check-prefix=CHECK-BE %s
 
 %myStruct = type { i64 , i8, i32 }
 
@@ -24,15 +24,15 @@ define void @simple_args() {
   %char1 = load i8* @var8
   %char2 = load i8* @var8_2
   call void @take_i8s(i8 %char1, i8 %char2)
-; CHECK-DAG: ldrb w0, [{{x[0-9]+}}, #:lo12:var8]
-; CHECK-DAG: ldrb w1, [{{x[0-9]+}}, #:lo12:var8_2]
+; CHECK-DAG: ldrb w0, [{{x[0-9]+}}, {{#?}}:lo12:var8]
+; CHECK-DAG: ldrb w1, [{{x[0-9]+}}, {{#?}}:lo12:var8_2]
 ; CHECK: bl take_i8s
 
   %float1 = load float* @varfloat
   %float2 = load float* @varfloat_2
   call void @take_floats(float %float1, float %float2)
-; CHECK-DAG: ldr s1, [{{x[0-9]+}}, #:lo12:varfloat_2]
-; CHECK-DAG: ldr s0, [{{x[0-9]+}}, #:lo12:varfloat]
+; CHECK-DAG: ldr s1, [{{x[0-9]+}}, {{#?}}:lo12:varfloat_2]
+; CHECK-DAG: ldr s0, [{{x[0-9]+}}, {{#?}}:lo12:varfloat]
 ; CHECK: bl take_floats
 ; CHECK-NOFP-NOT: ldr s1,
 ; CHECK-NOFP-NOT: ldr s0,
@@ -51,22 +51,22 @@ define void @simple_rets() {
   %int = call i32 @return_int()
   store i32 %int, i32* @var32
 ; CHECK: bl return_int
-; CHECK: str w0, [{{x[0-9]+}}, #:lo12:var32]
+; CHECK: str w0, [{{x[0-9]+}}, {{#?}}:lo12:var32]
 
   %dbl = call double @return_double()
   store double %dbl, double* @vardouble
 ; CHECK: bl return_double
-; CHECK: str d0, [{{x[0-9]+}}, #:lo12:vardouble]
+; CHECK: str d0, [{{x[0-9]+}}, {{#?}}:lo12:vardouble]
 ; CHECK-NOFP-NOT: str d0,
 
   %arr = call [2 x i64] @return_smallstruct()
   store [2 x i64] %arr, [2 x i64]* @varsmallstruct
 ; CHECK: bl return_smallstruct
 ; CHECK: str x1, [{{x[0-9]+}}, #8]
-; CHECK: str x0, [{{x[0-9]+}}, #:lo12:varsmallstruct]
+; CHECK: str x0, [{{x[0-9]+}}, {{#?}}:lo12:varsmallstruct]
 
   call void @return_large_struct(%myStruct* sret @varstruct)
-; CHECK: add x8, {{x[0-9]+}}, #:lo12:varstruct
+; CHECK: add x8, {{x[0-9]+}}, {{#?}}:lo12:varstruct
 ; CHECK: bl return_large_struct
 
   ret void
@@ -88,19 +88,28 @@ define void @check_stack_args() {
   ; Want to check that the final double is passed in registers and
   ; that varstruct is passed on the stack. Rather dependent on how a
   ; memcpy gets created, but the following works for now.
-; CHECK: mov x[[SPREG:[0-9]+]], sp
-; CHECK-DAG: str {{w[0-9]+}}, [x[[SPREG]]]
-; CHECK-DAG: str {{w[0-9]+}}, [x[[SPREG]], #12]
-; CHECK-DAG: fmov d0,
+
+; CHECK-DAG: str {{q[0-9]+}}, [sp]
+; CHECK-DAG: fmov d[[FINAL_DOUBLE:[0-9]+]], #1.0
+; CHECK: mov v0.16b, v[[FINAL_DOUBLE]].16b
+
+; CHECK-NONEON-DAG: str {{q[0-9]+}}, [sp]
+; CHECK-NONEON-DAG: fmov d[[FINAL_DOUBLE:[0-9]+]], #1.0
+; CHECK-NONEON: fmov d0, d[[FINAL_DOUBLE]]
+
 ; CHECK: bl struct_on_stack
 ; CHECK-NOFP-NOT: fmov
 
   call void @stacked_fpu(float -1.0, double 1.0, float 4.0, float 2.0,
                          float -2.0, float -8.0, float 16.0, float 1.0,
                          float 64.0)
-; CHECK: ldr s[[STACKEDREG:[0-9]+]], [{{x[0-9]+}}, #:lo12:.LCPI
-; CHECK: mov x0, sp
-; CHECK: str d[[STACKEDREG]], [x0]
+
+; CHECK:  movz [[SIXTY_FOUR:w[0-9]+]], #0x4280, lsl #16
+; CHECK: str [[SIXTY_FOUR]], [sp]
+
+; CHECK-NONEON:  movz [[SIXTY_FOUR:w[0-9]+]], #0x4280, lsl #16
+; CHECK-NONEON: str [[SIXTY_FOUR]], [sp]
+
 ; CHECK: bl stacked_fpu
   ret void
 }
@@ -119,18 +128,20 @@ define void @check_i128_align() {
   call void @check_i128_stackalign(i32 0, i32 1, i32 2, i32 3,
                                    i32 4, i32 5, i32 6, i32 7,
                                    i32 42, i128 %val)
-; CHECK: ldr [[I128LO:x[0-9]+]], [{{x[0-9]+}}, #:lo12:var128]
+; CHECK: ldr [[I128LO:x[0-9]+]], [{{x[0-9]+}}, {{#?}}:lo12:var128]
 ; CHECK: ldr [[I128HI:x[0-9]+]], [{{x[0-9]+}}, #8]
-; CHECK: mov x[[SPREG:[0-9]+]], sp
-; CHECK: str [[I128HI]], [x[[SPREG]], #24]
-; CHECK: str [[I128LO]], [x[[SPREG]], #16]
+; CHECK: stp [[I128LO]], [[I128HI]], [sp, #16]
+
+; CHECK-NONEON: ldr [[I128LO:x[0-9]+]], [{{x[0-9]+}}, :lo12:var128]
+; CHECK-NONEON: ldr [[I128HI:x[0-9]+]], [{{x[0-9]+}}, #8]
+; CHECK-NONEON: stp [[I128LO]], [[I128HI]], [sp, #16]
 ; CHECK: bl check_i128_stackalign
 
   call void @check_i128_regalign(i32 0, i128 42)
 ; CHECK-NOT: mov x1
-; CHECK-LE: movz x2, #42
+; CHECK-LE: movz x2, #{{0x2a|42}}
 ; CHECK-LE: mov x3, xzr
-; CHECK-BE: movz x3, #42
+; CHECK-BE: movz {{x|w}}3, #{{0x2a|42}}
 ; CHECK-BE: mov x2, xzr
 ; CHECK: bl check_i128_regalign
 
@@ -143,7 +154,7 @@ define void @check_indirect_call() {
 ; CHECK-LABEL: check_indirect_call:
   %func = load void()** @fptr
   call void %func()
-; CHECK: ldr [[FPTR:x[0-9]+]], [{{x[0-9]+}}, #:lo12:fptr]
+; CHECK: ldr [[FPTR:x[0-9]+]], [{{x[0-9]+}}, {{#?}}:lo12:fptr]
 ; CHECK: blr [[FPTR]]
 
   ret void
diff --git a/test/CodeGen/AArch64/global-alignment.ll b/test/CodeGen/AArch64/global-alignment.ll
index 56e5cba..451b9d6 100644
--- a/test/CodeGen/AArch64/global-alignment.ll
+++ b/test/CodeGen/AArch64/global-alignment.ll
@@ -1,8 +1,9 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
 
 @var32 = global [3 x i32] zeroinitializer
 @var64 = global [3 x i64] zeroinitializer
 @var32_align64 = global [3 x i32] zeroinitializer, align 8
+@alias = alias [3 x i32]* @var32_align64
 
 define i64 @test_align32() {
 ; CHECK-LABEL: test_align32:
@@ -12,7 +13,7 @@ define i64 @test_align32() {
   ; emit an "LDR x0, [x0, #:lo12:var32] instruction to implement this load.
   %val = load i64* %addr
 ; CHECK: adrp [[HIBITS:x[0-9]+]], var32
-; CHECK: add x[[ADDR:[0-9]+]], [[HIBITS]], #:lo12:var32
+; CHECK: add x[[ADDR:[0-9]+]], [[HIBITS]], {{#?}}:lo12:var32
 ; CHECK: ldr x0, [x[[ADDR]]]
 
   ret i64 %val
@@ -27,7 +28,7 @@ define i64 @test_align64() {
   %val = load i64* %addr
 ; CHECK: adrp x[[HIBITS:[0-9]+]], var64
 ; CHECK-NOT: add x[[HIBITS]]
-; CHECK: ldr x0, [x[[HIBITS]], #:lo12:var64]
+; CHECK: ldr x0, [x[[HIBITS]], {{#?}}:lo12:var64]
 
   ret i64 %val
 }
@@ -41,7 +42,20 @@ define i64 @test_var32_align64() {
   %val = load i64* %addr
 ; CHECK: adrp x[[HIBITS:[0-9]+]], var32_align64
 ; CHECK-NOT: add x[[HIBITS]]
-; CHECK: ldr x0, [x[[HIBITS]], #:lo12:var32_align64]
+; CHECK: ldr x0, [x[[HIBITS]], {{#?}}:lo12:var32_align64]
+
+  ret i64 %val
+}
+
+define i64 @test_var32_alias() {
+; CHECK-LABEL: test_var32_alias:
+  %addr = bitcast [3 x i32]* @alias to i64*
+
+  ; Test that we can find the alignment for aliases.
+  %val = load i64* %addr
+; CHECK: adrp x[[HIBITS:[0-9]+]], alias
+; CHECK-NOT: add x[[HIBITS]]
+; CHECK: ldr x0, [x[[HIBITS]], {{#?}}:lo12:alias]
 
   ret i64 %val
 }
@@ -56,7 +70,7 @@ define i64 @test_yet_another_var() {
   ; so we can't fold the load.
   %val = load i64* bitcast({i32, i32}* @yet_another_var to i64*)
 ; CHECK: adrp [[HIBITS:x[0-9]+]], yet_another_var
-; CHECK: add x[[ADDR:[0-9]+]], [[HIBITS]], #:lo12:yet_another_var
+; CHECK: add x[[ADDR:[0-9]+]], [[HIBITS]], {{#?}}:lo12:yet_another_var
 ; CHECK: ldr x0, [x[[ADDR]]]
   ret i64 %val
 }
@@ -65,5 +79,5 @@ define i64()* @test_functions() {
 ; CHECK-LABEL: test_functions:
   ret i64()* @test_yet_another_var
 ; CHECK: adrp [[HIBITS:x[0-9]+]], test_yet_another_var
-; CHECK: add x0, [[HIBITS]], #:lo12:test_yet_another_var
+; CHECK: add x0, [[HIBITS]], {{#?}}:lo12:test_yet_another_var
 }
diff --git a/test/CodeGen/AArch64/got-abuse.ll b/test/CodeGen/AArch64/got-abuse.ll
index 8b06031..7a02b10 100644
--- a/test/CodeGen/AArch64/got-abuse.ll
+++ b/test/CodeGen/AArch64/got-abuse.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -filetype=obj < %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -filetype=obj -o - %s
 
 ; LLVM gives well-defined semantics to this horrible construct (though C says
 ; it's undefined). Regardless, we shouldn't crash. The important feature here is
@@ -17,7 +17,7 @@ define void @foo() nounwind {
 entry:
   call void @consume(i32 ptrtoint (void ()* @func to i32))
 ; CHECK: adrp x[[ADDRHI:[0-9]+]], :got:func
-; CHECK: ldr {{x[0-9]+}}, [x[[ADDRHI]], #:got_lo12:func]
+; CHECK: ldr {{x[0-9]+}}, [x[[ADDRHI]], {{#?}}:got_lo12:func]
   ret void
 }
 
diff --git a/test/CodeGen/AArch64/i1-contents.ll b/test/CodeGen/AArch64/i1-contents.ll
new file mode 100644
index 0000000..7f133fc
--- /dev/null
+++ b/test/CodeGen/AArch64/i1-contents.ll
@@ -0,0 +1,55 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s
+%big = type i32
+
+@var = global %big 0
+
+; AAPCS: low 8 bits of %in (== w0) will be either 0 or 1. Need to extend to
+; 32-bits.
+define void @consume_i1_arg(i1 %in) {
+; CHECK-LABEL: consume_i1_arg:
+; CHECK: and [[BOOL32:w[0-9]+]], w0, #{{0x1|0xff}}
+; CHECK: str [[BOOL32]], [{{x[0-9]+}}, :lo12:var]
+  %val = zext i1 %in to %big
+  store %big %val, %big* @var
+  ret void
+}
+
+; AAPCS: low 8 bits of %val1 (== w0) will be either 0 or 1. Need to extend to
+; 32-bits (doesn't really matter if it's from 1 or 8 bits).
+define void @consume_i1_ret() {
+; CHECK-LABEL: consume_i1_ret:
+; CHECK: bl produce_i1_ret
+; CHECK: and [[BOOL32:w[0-9]+]], w0, #{{0x1|0xff}}
+; CHECK: str [[BOOL32]], [{{x[0-9]+}}, :lo12:var]
+  %val1 = call i1 @produce_i1_ret()
+  %val = zext i1 %val1 to %big
+  store %big %val, %big* @var
+  ret void
+}
+
+; AAPCS: low 8 bits of w0 must be either 0 or 1. Need to mask them off.
+define i1 @produce_i1_ret() {
+; CHECK-LABEL: produce_i1_ret:
+; CHECK: ldr [[VAR32:w[0-9]+]], [{{x[0-9]+}}, :lo12:var]
+; CHECK: and w0, [[VAR32]], #{{0x1|0xff}}
+  %val = load %big* @var
+  %val1 = trunc %big %val to i1
+  ret i1 %val1
+}
+
+define void @produce_i1_arg() {
+; CHECK-LABEL: produce_i1_arg:
+; CHECK: ldr [[VAR32:w[0-9]+]], [{{x[0-9]+}}, :lo12:var]
+; CHECK: and w0, [[VAR32]], #{{0x1|0xff}}
+; CHECK: bl consume_i1_arg
+  %val = load %big* @var
+  %val1 = trunc %big %val to i1
+  call void @consume_i1_arg(i1 %val1)
+  ret void
+}
+
+
+;define zeroext i1 @foo(i8 %in) {
+;  %val = trunc i8 %in to i1
+;  ret i1 %val
+;}
diff --git a/test/CodeGen/AArch64/i128-align.ll b/test/CodeGen/AArch64/i128-align.ll
index 21ca7ed..a1b4d6f 100644
--- a/test/CodeGen/AArch64/i128-align.ll
+++ b/test/CodeGen/AArch64/i128-align.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=arm64-apple-ios7.0 -verify-machineinstrs -o - %s | FileCheck %s
 
 %struct = type { i32, i128, i8 }
 
@@ -13,7 +13,7 @@ define i64 @check_size() {
 
   %diff = sub i64 %endi, %starti
   ret i64 %diff
-; CHECK: movz x0, #48
+; CHECK: {{movz x0, #48|orr w0, wzr, #0x30}}
 }
 
 define i64 @check_field() {
@@ -25,5 +25,5 @@ define i64 @check_field() {
 
   %diff = sub i64 %endi, %starti
   ret i64 %diff
-; CHECK: movz x0, #16
+; CHECK: {{movz x0, #16|orr w0, wzr, #0x10}}
 }
diff --git a/test/CodeGen/AArch64/i128-shift.ll b/test/CodeGen/AArch64/i128-shift.ll
deleted file mode 100644
index d786d44..0000000
--- a/test/CodeGen/AArch64/i128-shift.ll
+++ /dev/null
@@ -1,43 +0,0 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
-
-define i128 @test_i128_lsl(i128 %a, i32 %shift) {
-; CHECK-LABEL: test_i128_lsl:
-
-  %sh_prom = zext i32 %shift to i128
-  %shl = shl i128 %a, %sh_prom
-
-; CHECK: movz [[SIXTYFOUR:x[0-9]+]], #64
-; CHECK-NEXT: sub [[REVSHAMT:x[0-9]+]], [[SIXTYFOUR]], [[SHAMT_32:w[0-9]+]], uxtw
-; CHECK-NEXT: lsr [[TMP1:x[0-9]+]], [[LO:x[0-9]+]], [[REVSHAMT]]
-; CHECK: lsl [[TMP2:x[0-9]+]], [[HI:x[0-9]+]], [[SHAMT:x[0-9]+]]
-; CHECK-NEXT: orr [[FALSEVAL:x[0-9]+]], [[TMP1]], [[TMP2]]
-; CHECK-NEXT: sub [[EXTRASHAMT:x[0-9]+]], [[SHAMT]], #64
-; CHECK-NEXT: lsl [[TMP3:x[0-9]+]], [[LO]], [[EXTRASHAMT]]
-; CHECK-NEXT: cmp [[EXTRASHAMT]], #0
-; CHECK-NEXT: csel [[RESULTHI:x[0-9]+]], [[TMP3]], [[FALSEVAL]], ge
-; CHECK-NEXT: lsl [[TMP4:x[0-9]+]], [[LO]], [[SHAMT]]
-; CHECK-NEXT: csel [[RESULTLO:x[0-9]+]], xzr, [[TMP4]], ge
-
-  ret i128 %shl
-}
-
-define i128 @test_i128_shr(i128 %a, i32 %shift) {
-; CHECK-LABEL: test_i128_shr:
-
-  %sh_prom = zext i32 %shift to i128
-  %shr = lshr i128 %a, %sh_prom
-
-; CHECK: movz [[SIXTYFOUR]], #64
-; CHECK-NEXT: sub [[REVSHAMT:x[0-9]+]], [[SIXTYFOUR]], [[SHAMT_32:w[0-9]+]], uxtw
-; CHECK-NEXT: lsl [[TMP2:x[0-9]+]], [[HI:x[0-9]+]], [[REVSHAMT]]
-; CHECK: lsr [[TMP1:x[0-9]+]], [[LO:x[0-9]+]], [[SHAMT:x[0-9]+]]
-; CHECK-NEXT: orr [[FALSEVAL:x[0-9]+]], [[TMP1]], [[TMP2]]
-; CHECK-NEXT: sub [[EXTRASHAMT:x[0-9]+]], [[SHAMT]], #64
-; CHECK-NEXT: lsr [[TRUEVAL:x[0-9]+]], [[HI]], [[EXTRASHAMT]]
-; CHECK-NEXT: cmp [[EXTRASHAMT]], #0
-; CHECK-NEXT: csel [[RESULTLO:x[0-9]+]], [[TRUEVAL]], [[FALSEVAL]], ge
-; CHECK-NEXT: lsr [[TMP3:x[0-9]+]], [[HI]], [[SHAMT]]
-; CHECK-NEXT: csel [[RESULTHI:x[0-9]+]], xzr, [[TMP3]], ge
-
-  ret i128 %shr
-}
diff --git a/test/CodeGen/AArch64/illegal-float-ops.ll b/test/CodeGen/AArch64/illegal-float-ops.ll
index 03c6d8d..9f7dd99 100644
--- a/test/CodeGen/AArch64/illegal-float-ops.ll
+++ b/test/CodeGen/AArch64/illegal-float-ops.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
 
 @varfloat = global float 0.0
 @vardouble = global double 0.0
diff --git a/test/CodeGen/AArch64/init-array.ll b/test/CodeGen/AArch64/init-array.ll
index 076ae27..f47b490 100644
--- a/test/CodeGen/AArch64/init-array.ll
+++ b/test/CodeGen/AArch64/init-array.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -use-init-array < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-none-none-eabi -verify-machineinstrs -use-init-array < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -use-init-array -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-none-eabi -verify-machineinstrs -use-init-array -o - %s | FileCheck %s
 
 define internal void @_GLOBAL__I_a() section ".text.startup" {
   ret void
diff --git a/test/CodeGen/AArch64/inline-asm-constraints-badI.ll b/test/CodeGen/AArch64/inline-asm-constraints-badI.ll
index 61bbfc2..9d833d9 100644
--- a/test/CodeGen/AArch64/inline-asm-constraints-badI.ll
+++ b/test/CodeGen/AArch64/inline-asm-constraints-badI.ll
@@ -1,7 +1,7 @@
-; RUN: not llc -mtriple=aarch64-none-linux-gnu < %s
+; RUN: not llc -mtriple=aarch64-none-linux-gnu -o - %s
 
 define void @foo() {
   ; Out of range immediate for I.
-  call void asm sideeffect "add x0, x0, $0", "I"(i32 4096)
+  call void asm sideeffect "add x0, x0, $0", "I"(i32 4097)
   ret void
 }
diff --git a/test/CodeGen/AArch64/inline-asm-constraints-badK.ll b/test/CodeGen/AArch64/inline-asm-constraints-badK.ll
index 40746e1..6ffc05d 100644
--- a/test/CodeGen/AArch64/inline-asm-constraints-badK.ll
+++ b/test/CodeGen/AArch64/inline-asm-constraints-badK.ll
@@ -1,4 +1,4 @@
-; RUN: not llc -mtriple=aarch64-none-linux-gnu < %s
+; RUN: not llc -mtriple=arm64-apple-ios7.0 -o - %s
 
 define void @foo() {
   ; 32-bit bitpattern ending in 1101 can't be produced.
diff --git a/test/CodeGen/AArch64/inline-asm-constraints-badK2.ll b/test/CodeGen/AArch64/inline-asm-constraints-badK2.ll
index 2c53381..1726013 100644
--- a/test/CodeGen/AArch64/inline-asm-constraints-badK2.ll
+++ b/test/CodeGen/AArch64/inline-asm-constraints-badK2.ll
@@ -1,4 +1,4 @@
-; RUN: not llc -mtriple=aarch64-none-linux-gnu < %s
+; RUN: not llc -mtriple=aarch64-none-linux-gnu -o - %s
 
 define void @foo() {
   ; 32-bit bitpattern ending in 1101 can't be produced.
diff --git a/test/CodeGen/AArch64/inline-asm-constraints-badL.ll b/test/CodeGen/AArch64/inline-asm-constraints-badL.ll
index d82d5a2..3c2f60c 100644
--- a/test/CodeGen/AArch64/inline-asm-constraints-badL.ll
+++ b/test/CodeGen/AArch64/inline-asm-constraints-badL.ll
@@ -1,4 +1,4 @@
-; RUN: not llc -mtriple=aarch64-none-linux-gnu < %s
+; RUN: not llc -mtriple=arm64-apple-ios7.0 -o - %s
 
 define void @foo() {
   ; 32-bit bitpattern ending in 1101 can't be produced.
diff --git a/test/CodeGen/AArch64/inline-asm-constraints.ll b/test/CodeGen/AArch64/inline-asm-constraints.ll
deleted file mode 100644
index 365453c..0000000
--- a/test/CodeGen/AArch64/inline-asm-constraints.ll
+++ /dev/null
@@ -1,137 +0,0 @@
-;RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon -no-integrated-as < %s | FileCheck %s
-
-define i64 @test_inline_constraint_r(i64 %base, i32 %offset) {
-; CHECK-LABEL: test_inline_constraint_r:
-  %val = call i64 asm "add $0, $1, $2, sxtw", "=r,r,r"(i64 %base, i32 %offset)
-; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw
-  ret i64 %val
-}
-
-define i16 @test_small_reg(i16 %lhs, i16 %rhs) {
-; CHECK-LABEL: test_small_reg:
-  %val = call i16 asm sideeffect "add $0, $1, $2, sxth", "=r,r,r"(i16 %lhs, i16 %rhs)
-; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxth
-  ret i16 %val
-}
-
-define i64 @test_inline_constraint_r_imm(i64 %base, i32 %offset) {
-; CHECK-LABEL: test_inline_constraint_r_imm:
-  %val = call i64 asm "add $0, $1, $2, sxtw", "=r,r,r"(i64 4, i32 12)
-; CHECK: movz [[FOUR:x[0-9]+]], #4
-; CHECK: movz [[TWELVE:w[0-9]+]], #12
-; CHECK: add {{x[0-9]+}}, [[FOUR]], [[TWELVE]], sxtw
-  ret i64 %val
-}
-
-; m is permitted to have a base/offset form. We don't do that
-; currently though.
-define i32 @test_inline_constraint_m(i32 *%ptr) {
-; CHECK-LABEL: test_inline_constraint_m:
-  %val = call i32 asm "ldr $0, $1", "=r,m"(i32 *%ptr)
-; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}]
-  ret i32 %val
-}
-
-@arr = global [8 x i32] zeroinitializer
-
-; Q should *never* have base/offset form even if given the chance.
-define i32 @test_inline_constraint_Q(i32 *%ptr) {
-; CHECK-LABEL: test_inline_constraint_Q:
-  %val = call i32 asm "ldr $0, $1", "=r,Q"(i32* getelementptr([8 x i32]* @arr, i32 0, i32 1))
-; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}]
-  ret i32 %val
-}
-
-@dump = global fp128 zeroinitializer
-
-define void @test_inline_constraint_w(<8 x i8> %vec64, <4 x float> %vec128, half %hlf, float %flt, double %dbl, fp128 %quad) {
-; CHECK: test_inline_constraint_w:
-  call <8 x i8> asm sideeffect "add $0.8b, $1.8b, $1.8b", "=w,w"(<8 x i8> %vec64)
-  call <8 x i8> asm sideeffect "fadd $0.4s, $1.4s, $1.4s", "=w,w"(<4 x float> %vec128)
-; CHECK: add {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-; CHECK: fadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-
-  ; Arguably semantically dodgy to output "vN", but it's what GCC does
-  ; so purely for compatibility we want vector registers to be output.
-  call float asm sideeffect "fcvt ${0:s}, ${1:h}", "=w,w"(half undef)
-  call float asm sideeffect "fadd $0.2s, $0.2s, $0.2s", "=w,w"(float %flt)
-  call double asm sideeffect "fadd $0.2d, $0.2d, $0.2d", "=w,w"(double %dbl)
-  call fp128 asm sideeffect "fadd $0.2d, $0.2d, $0.2d", "=w,w"(fp128 %quad)
-; CHECK: fcvt {{s[0-9]+}}, {{h[0-9]+}}
-; CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-; CHECK: fadd {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-; CHECK: fadd {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-  ret void
-}
-
-define void @test_inline_constraint_I() {
-; CHECK-LABEL: test_inline_constraint_I:
-  call void asm sideeffect "add x0, x0, $0", "I"(i32 0)
-  call void asm sideeffect "add x0, x0, $0", "I"(i64 4095)
-; CHECK: add x0, x0, #0
-; CHECK: add x0, x0, #4095
-
-  ret void
-}
-
-; Skip J because it's useless
-
-define void @test_inline_constraint_K() {
-; CHECK-LABEL: test_inline_constraint_K:
-  call void asm sideeffect "and w0, w0, $0", "K"(i32 2863311530) ; = 0xaaaaaaaa
-  call void asm sideeffect "and w0, w0, $0", "K"(i32 65535)
-; CHECK: and w0, w0, #-1431655766
-; CHECK: and w0, w0, #65535
-
-  ret void
-}
-
-define void @test_inline_constraint_L() {
-; CHECK-LABEL: test_inline_constraint_L:
-  call void asm sideeffect "and x0, x0, $0", "L"(i64 4294967296) ; = 0xaaaaaaaa
-  call void asm sideeffect "and x0, x0, $0", "L"(i64 65535)
-; CHECK: and x0, x0, #4294967296
-; CHECK: and x0, x0, #65535
-
-  ret void
-}
-
-; Skip M and N because we don't support MOV pseudo-instructions yet.
-
-@var = global i32 0
-
-define void @test_inline_constraint_S() {
-; CHECK-LABEL: test_inline_constraint_S:
-  call void asm sideeffect "adrp x0, $0", "S"(i32* @var)
-  call void asm sideeffect "adrp x0, ${0:A}", "S"(i32* @var)
-  call void asm sideeffect "add x0, x0, ${0:L}", "S"(i32* @var)
-; CHECK: adrp x0, var
-; CHECK: adrp x0, var
-; CHECK: add x0, x0, #:lo12:var
-  ret void
-}
-
-define i32 @test_inline_constraint_S_label(i1 %in) {
-; CHECK-LABEL: test_inline_constraint_S_label:
-  call void asm sideeffect "adr x0, $0", "S"(i8* blockaddress(@test_inline_constraint_S_label, %loc))
-; CHECK: adr x0, .Ltmp{{[0-9]+}}
-  br i1 %in, label %loc, label %loc2
-loc:
-  ret i32 0
-loc2:
-  ret i32 42
-}
-
-define void @test_inline_constraint_Y() {
-; CHECK-LABEL: test_inline_constraint_Y:
-  call void asm sideeffect "fcmp s0, $0", "Y"(float 0.0)
-; CHECK: fcmp s0, #0.0
-  ret void
-}
-
-define void @test_inline_constraint_Z() {
-; CHECK-LABEL: test_inline_constraint_Z:
-  call void asm sideeffect "cmp w0, $0", "Z"(i32 0)
-; CHECK: cmp w0, #0
-  ret void
-}
diff --git a/test/CodeGen/AArch64/inline-asm-modifiers.ll b/test/CodeGen/AArch64/inline-asm-modifiers.ll
deleted file mode 100644
index cb66335..0000000
--- a/test/CodeGen/AArch64/inline-asm-modifiers.ll
+++ /dev/null
@@ -1,147 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -no-integrated-as < %s | FileCheck %s
-
-@var_simple = hidden global i32 0
-@var_got = global i32 0
-@var_tlsgd = thread_local global i32 0
-@var_tlsld = thread_local(localdynamic) global i32 0
-@var_tlsie = thread_local(initialexec) global i32 0
-@var_tlsle = thread_local(localexec) global i32 0
-
-define void @test_inline_modifier_L() nounwind {
-; CHECK-LABEL: test_inline_modifier_L:
-  call void asm sideeffect "add x0, x0, ${0:L}", "S,~{x0}"(i32* @var_simple)
-  call void asm sideeffect "ldr x0, [x0, ${0:L}]", "S,~{x0}"(i32* @var_got)
-  call void asm sideeffect "add x0, x0, ${0:L}", "S,~{x0}"(i32* @var_tlsgd)
-  call void asm sideeffect "add x0, x0, ${0:L}", "S,~{x0}"(i32* @var_tlsld)
-  call void asm sideeffect "ldr x0, [x0, ${0:L}]", "S,~{x0}"(i32* @var_tlsie)
-  call void asm sideeffect "add x0, x0, ${0:L}", "S,~{x0}"(i32* @var_tlsle)
-; CHECK: add x0, x0, #:lo12:var_simple
-; CHECK: ldr x0, [x0, #:got_lo12:var_got]
-; CHECK: add x0, x0, #:tlsdesc_lo12:var_tlsgd
-; CHECK: add x0, x0, #:dtprel_lo12:var_tlsld
-; CHECK: ldr x0, [x0, #:gottprel_lo12:var_tlsie]
-; CHECK: add x0, x0, #:tprel_lo12:var_tlsle
-
-  call void asm sideeffect "add x0, x0, ${0:L}", "Si,~{x0}"(i32 64)
-  call void asm sideeffect "ldr x0, [x0, ${0:L}]", "Si,~{x0}"(i32 64)
-; CHECK: add x0, x0, #64
-; CHECK: ldr x0, [x0, #64]
-
-  ret void
-}
-
-define void @test_inline_modifier_G() nounwind {
-; CHECK-LABEL: test_inline_modifier_G:
-  call void asm sideeffect "add x0, x0, ${0:G}, lsl #12", "S,~{x0}"(i32* @var_tlsld)
-  call void asm sideeffect "add x0, x0, ${0:G}, lsl #12", "S,~{x0}"(i32* @var_tlsle)
-; CHECK: add x0, x0, #:dtprel_hi12:var_tlsld, lsl #12
-; CHECK: add x0, x0, #:tprel_hi12:var_tlsle, lsl #12
-
-  call void asm sideeffect "add x0, x0, ${0:G}", "Si,~{x0}"(i32 42)
-; CHECK: add x0, x0, #42
-  ret void
-}
-
-define void @test_inline_modifier_A() nounwind {
-; CHECK-LABEL: test_inline_modifier_A:
-  call void asm sideeffect "adrp x0, ${0:A}", "S,~{x0}"(i32* @var_simple)
-  call void asm sideeffect "adrp x0, ${0:A}", "S,~{x0}"(i32* @var_got)
-  call void asm sideeffect "adrp x0, ${0:A}", "S,~{x0}"(i32* @var_tlsgd)
-  call void asm sideeffect "adrp x0, ${0:A}", "S,~{x0}"(i32* @var_tlsie)
-  ; N.b. All tprel and dtprel relocs are modified: lo12 or granules.
-; CHECK: adrp x0, var_simple
-; CHECK: adrp x0, :got:var_got
-; CHECK: adrp x0, :tlsdesc:var_tlsgd
-; CHECK: adrp x0, :gottprel:var_tlsie
-
-  call void asm sideeffect "adrp x0, ${0:A}", "Si,~{x0}"(i32 40)
-; CHECK: adrp x0, #40
-
-  ret void
-}
-
-define void @test_inline_modifier_wx(i32 %small, i64 %big) nounwind {
-; CHECK-LABEL: test_inline_modifier_wx:
-  call i32 asm sideeffect "add $0, $0, $0", "=r,0"(i32 %small)
-  call i32 asm sideeffect "add ${0:w}, ${0:w}, ${0:w}", "=r,0"(i32 %small)
-  call i32 asm sideeffect "add ${0:x}, ${0:x}, ${0:x}", "=r,0"(i32 %small)
-; CHECK: //APP
-; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-
-  call i64 asm sideeffect "add $0, $0, $0", "=r,0"(i64 %big)
-  call i64 asm sideeffect "add ${0:w}, ${0:w}, ${0:w}", "=r,0"(i64 %big)
-  call i64 asm sideeffect "add ${0:x}, ${0:x}, ${0:x}", "=r,0"(i64 %big)
-; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-
-  call i32 asm sideeffect "add ${0:w}, ${1:w}, ${1:w}", "=r,r"(i32 0)
-  call i32 asm sideeffect "add ${0:x}, ${1:x}, ${1:x}", "=r,r"(i32 0)
-; CHECK: add {{w[0-9]+}}, wzr, wzr
-; CHECK: add {{x[0-9]+}}, xzr, xzr
-
-  call i32 asm sideeffect "add ${0:w}, ${0:w}, ${1:w}", "=r,Ir,0"(i32 123, i32 %small)
-  call i64 asm sideeffect "add ${0:x}, ${0:x}, ${1:x}", "=r,Ir,0"(i32 456, i64 %big)
-; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, #123
-; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #456
-
-  ret void
-}
-
-define void @test_inline_modifier_bhsdq() nounwind {
-; CHECK-LABEL: test_inline_modifier_bhsdq:
-  call float asm sideeffect "ldr ${0:b}, [sp]", "=w"()
-  call float asm sideeffect "ldr ${0:h}, [sp]", "=w"()
-  call float asm sideeffect "ldr ${0:s}, [sp]", "=w"()
-  call float asm sideeffect "ldr ${0:d}, [sp]", "=w"()
-  call float asm sideeffect "ldr ${0:q}, [sp]", "=w"()
-; CHECK: ldr b0, [sp]
-; CHECK: ldr h0, [sp]
-; CHECK: ldr s0, [sp]
-; CHECK: ldr d0, [sp]
-; CHECK: ldr q0, [sp]
-
-  call double asm sideeffect "ldr ${0:b}, [sp]", "=w"()
-  call double asm sideeffect "ldr ${0:h}, [sp]", "=w"()
-  call double asm sideeffect "ldr ${0:s}, [sp]", "=w"()
-  call double asm sideeffect "ldr ${0:d}, [sp]", "=w"()
-  call double asm sideeffect "ldr ${0:q}, [sp]", "=w"()
-; CHECK: ldr b0, [sp]
-; CHECK: ldr h0, [sp]
-; CHECK: ldr s0, [sp]
-; CHECK: ldr d0, [sp]
-; CHECK: ldr q0, [sp]
-
-  call void asm sideeffect "fcmp b0, ${0:b}", "Yw"(float 0.0)
-  call void asm sideeffect "fcmp h0, ${0:h}", "Yw"(float 0.0)
-  call void asm sideeffect "fcmp s0, ${0:s}", "Yw"(float 0.0)
-  call void asm sideeffect "fcmp d0, ${0:d}", "Yw"(float 0.0)
-  call void asm sideeffect "fcmp q0, ${0:q}", "Yw"(float 0.0)
-; CHECK: fcmp b0, #0
-; CHECK: fcmp h0, #0
-; CHECK: fcmp s0, #0
-; CHECK: fcmp d0, #0
-; CHECK: fcmp q0, #0
-
-  ret void
-}
-
-define void @test_inline_modifier_c() nounwind {
-; CHECK-LABEL: test_inline_modifier_c:
-  call void asm sideeffect "adr x0, ${0:c}", "i"(i32 3)
-; CHECK: adr x0, 3
-
-  ret void
-}
-
-define void @test_inline_modifier_a() nounwind {
-; CHECK-LABEL: test_inline_modifier_a:
-  call void asm sideeffect "prfm pldl1keep, ${0:a}", "r"(i32* @var_simple)
-; CHECK: adrp [[VARHI:x[0-9]+]], var_simple
-; CHECK: add x[[VARADDR:[0-9]+]], [[VARHI]], #:lo12:var_simple
-; CHECK: prfm pldl1keep, [x[[VARADDR]]]
-  ret void
-}
-
diff --git a/test/CodeGen/AArch64/jump-table.ll b/test/CodeGen/AArch64/jump-table.ll
index 94717f5..1dfb789 100644
--- a/test/CodeGen/AArch64/jump-table.ll
+++ b/test/CodeGen/AArch64/jump-table.ll
@@ -1,6 +1,6 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
-; RUN: llc -code-model=large -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck --check-prefix=CHECK-LARGE %s
-; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -relocation-model=pic <%s | FileCheck --check-prefix=CHECK-PIC %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -code-model=large -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu | FileCheck --check-prefix=CHECK-LARGE %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -relocation-model=pic -o - %s | FileCheck --check-prefix=CHECK-PIC %s
 
 define i32 @test_jumptable(i32 %in) {
 ; CHECK: test_jumptable
@@ -12,7 +12,7 @@ define i32 @test_jumptable(i32 %in) {
     i32 4, label %lbl4
   ]
 ; CHECK: adrp [[JTPAGE:x[0-9]+]], .LJTI0_0
-; CHECK: add x[[JT:[0-9]+]], [[JTPAGE]], #:lo12:.LJTI0_0
+; CHECK: add x[[JT:[0-9]+]], [[JTPAGE]], {{#?}}:lo12:.LJTI0_0
 ; CHECK: ldr [[DEST:x[0-9]+]], [x[[JT]], {{x[0-9]+}}, lsl #3]
 ; CHECK: br [[DEST]]
 
@@ -24,7 +24,7 @@ define i32 @test_jumptable(i32 %in) {
 ; CHECK-LARGE: br [[DEST]]
 
 ; CHECK-PIC: adrp [[JTPAGE:x[0-9]+]], .LJTI0_0
-; CHECK-PIC: add x[[JT:[0-9]+]], [[JTPAGE]], #:lo12:.LJTI0_0
+; CHECK-PIC: add x[[JT:[0-9]+]], [[JTPAGE]], {{#?}}:lo12:.LJTI0_0
 ; CHECK-PIC: ldrsw [[DEST:x[0-9]+]], [x[[JT]], {{x[0-9]+}}, lsl #2]
 ; CHECK-PIC: add [[TABLE:x[0-9]+]], [[DEST]], x[[JT]]
 ; CHECK-PIC: br [[TABLE]]
diff --git a/test/CodeGen/AArch64/large-consts.ll b/test/CodeGen/AArch64/large-consts.ll
index 1b769c6..6bf85e8 100644
--- a/test/CodeGen/AArch64/large-consts.ll
+++ b/test/CodeGen/AArch64/large-consts.ll
@@ -4,10 +4,11 @@
 ; it's not the linker's job to put it there.
 
 define double @foo() {
-; CHECK: movz [[CPADDR:x[0-9]+]], #:abs_g3:.LCPI0_0   // encoding: [A,A,0xe0'A',0xd2'A']
-; CHECK: movk [[CPADDR]], #:abs_g2_nc:.LCPI0_0 // encoding: [A,A,0xc0'A',0xf2'A']
-; CHECK: movk [[CPADDR]], #:abs_g1_nc:.LCPI0_0 // encoding: [A,A,0xa0'A',0xf2'A']
-; CHECK: movk [[CPADDR]], #:abs_g0_nc:.LCPI0_0 // encoding: [A,A,0x80'A',0xf2'A']
+
+; CHECK: movz [[CPADDR:x[0-9]+]], #:abs_g3:.LCPI0_0   // encoding: [0bAAA01000,A,0b111AAAAA,0xd2]
+; CHECK: movk [[CPADDR]], #:abs_g2_nc:.LCPI0_0 // encoding: [0bAAA01000,A,0b110AAAAA,0xf2]
+; CHECK: movk [[CPADDR]], #:abs_g1_nc:.LCPI0_0 // encoding: [0bAAA01000,A,0b101AAAAA,0xf2]
+; CHECK: movk [[CPADDR]], #:abs_g0_nc:.LCPI0_0 // encoding: [0bAAA01000,A,0b100AAAAA,0xf2]
 
   ret double 3.14159
 }
diff --git a/test/CodeGen/AArch64/large-frame.ll b/test/CodeGen/AArch64/large-frame.ll
deleted file mode 100644
index fde3036..0000000
--- a/test/CodeGen/AArch64/large-frame.ll
+++ /dev/null
@@ -1,119 +0,0 @@
-; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
-declare void @use_addr(i8*)
-
-@addr = global i8* null
-
-define void @test_bigframe() {
-; CHECK-LABEL: test_bigframe:
-; CHECK: .cfi_startproc
-
-  %var1 = alloca i8, i32 20000000
-  %var2 = alloca i8, i32 16
-  %var3 = alloca i8, i32 20000000
-; CHECK: sub sp, sp, #496
-; CHECK: .cfi_def_cfa sp, 496
-; CHECK: str x30, [sp, #488]
-  ; Total adjust is 39999536
-; CHECK: movz [[SUBCONST:x[0-9]+]], #22576
-; CHECK: movk [[SUBCONST]], #610, lsl #16
-; CHECK: sub sp, sp, [[SUBCONST]]
-; CHECK: .cfi_def_cfa sp, 40000032
-; CHECK: .cfi_offset x30, -8
-
-  ; Total offset is 20000024
-; CHECK: movz [[VAR1OFFSET:x[0-9]+]], #11544
-; CHECK: movk [[VAR1OFFSET]], #305, lsl #16
-; CHECK: add {{x[0-9]+}}, sp, [[VAR1OFFSET]]
-  store volatile i8* %var1, i8** @addr
-
-  %var1plus2 = getelementptr i8* %var1, i32 2
-  store volatile i8* %var1plus2, i8** @addr
-
-; CHECK: movz [[VAR2OFFSET:x[0-9]+]], #11528
-; CHECK: movk [[VAR2OFFSET]], #305, lsl #16
-; CHECK: add {{x[0-9]+}}, sp, [[VAR2OFFSET]]
-  store volatile i8* %var2, i8** @addr
-
-  %var2plus2 = getelementptr i8* %var2, i32 2
-  store volatile i8* %var2plus2, i8** @addr
-
-  store volatile i8* %var3, i8** @addr
-
-  %var3plus2 = getelementptr i8* %var3, i32 2
-  store volatile i8* %var3plus2, i8** @addr
-
-; CHECK: movz [[ADDCONST:x[0-9]+]], #22576
-; CHECK: movk [[ADDCONST]], #610, lsl #16
-; CHECK: add sp, sp, [[ADDCONST]]
-; CHECK: .cfi_endproc
-  ret void
-}
-
-define void @test_mediumframe() {
-; CHECK-LABEL: test_mediumframe:
-  %var1 = alloca i8, i32 1000000
-  %var2 = alloca i8, i32 16
-  %var3 = alloca i8, i32 1000000
-; CHECK: sub sp, sp, #496
-; CHECK: str x30, [sp, #488]
-; CHECK: sub sp, sp, #688
-; CHECK-NEXT: sub sp, sp, #488, lsl #12
-
-  store volatile i8* %var1, i8** @addr
-; CHECK: add [[VAR1ADDR:x[0-9]+]], sp, #600
-; CHECK: add [[VAR1ADDR]], [[VAR1ADDR]], #244, lsl #12
-
-  %var1plus2 = getelementptr i8* %var1, i32 2
-  store volatile i8* %var1plus2, i8** @addr
-; CHECK: add [[VAR1PLUS2:x[0-9]+]], {{x[0-9]+}}, #2
-
-  store volatile i8* %var2, i8** @addr
-; CHECK: add [[VAR2ADDR:x[0-9]+]], sp, #584
-; CHECK: add [[VAR2ADDR]], [[VAR2ADDR]], #244, lsl #12
-
-  %var2plus2 = getelementptr i8* %var2, i32 2
-  store volatile i8* %var2plus2, i8** @addr
-; CHECK: add [[VAR2PLUS2:x[0-9]+]], {{x[0-9]+}}, #2
-
-  store volatile i8* %var3, i8** @addr
-
-  %var3plus2 = getelementptr i8* %var3, i32 2
-  store volatile i8* %var3plus2, i8** @addr
-
-; CHECK: add sp, sp, #688
-; CHECK: add sp, sp, #488, lsl #12
-; CHECK: ldr x30, [sp, #488]
-; CHECK: add sp, sp, #496
-  ret void
-}
-
-
-@bigspace = global [8 x i64] zeroinitializer
-
-; If temporary registers are allocated for adjustment, they should *not* clobber
-; argument registers.
-define void @test_tempallocation([8 x i64] %val) nounwind {
-; CHECK-LABEL: test_tempallocation:
-  %var = alloca i8, i32 1000000
-; CHECK: sub sp, sp,
-
-; Make sure the prologue is reasonably efficient
-; CHECK-NEXT: stp x29, x30, [sp,
-; CHECK-NEXT: stp x25, x26, [sp,
-; CHECK-NEXT: stp x23, x24, [sp,
-; CHECK-NEXT: stp x21, x22, [sp,
-; CHECK-NEXT: stp x19, x20, [sp,
-
-; Make sure we don't trash an argument register
-; CHECK-NOT: movz {{x[0-7],}}
-; CHECK: sub sp, sp,
-
-; CHECK-NOT: movz {{x[0-7],}}
-
-; CHECK: bl use_addr
-  call void @use_addr(i8* %var)
-
-  store [8 x i64] %val, [8 x i64]* @bigspace
-  ret void
-; CHECK: ret
-}
diff --git a/test/CodeGen/AArch64/ldst-opt.ll b/test/CodeGen/AArch64/ldst-opt.ll
new file mode 100644
index 0000000..1ce5c95
--- /dev/null
+++ b/test/CodeGen/AArch64/ldst-opt.ll
@@ -0,0 +1,301 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
+
+; This file contains tests for the AArch64 load/store optimizer.
+
+%padding = type { i8*, i8*, i8*, i8* }
+%s.word = type { i32, i32 }
+%s.doubleword = type { i64, i32 }
+%s.quadword = type { fp128, i32 }
+%s.float = type { float, i32 }
+%s.double = type { double, i32 }
+%struct.word = type { %padding, %s.word }
+%struct.doubleword = type { %padding, %s.doubleword }
+%struct.quadword = type { %padding, %s.quadword }
+%struct.float = type { %padding, %s.float }
+%struct.double = type { %padding, %s.double }
+
+; Check the following transform:
+;
+; (ldr|str) X, [x0, #32]
+;  ...
+; add x0, x0, #32
+;  ->
+; (ldr|str) X, [x0, #32]!
+;
+; with X being either w1, x1, s0, d0 or q0.
+
+declare void @bar_word(%s.word*, i32)
+
+define void @load-pre-indexed-word(%struct.word* %ptr) nounwind {
+; CHECK-LABEL: load-pre-indexed-word
+; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}, #32]!
+entry:
+  %a = getelementptr inbounds %struct.word* %ptr, i64 0, i32 1, i32 0
+  %add = load i32* %a, align 4
+  br label %bar
+bar:
+  %c = getelementptr inbounds %struct.word* %ptr, i64 0, i32 1
+  tail call void @bar_word(%s.word* %c, i32 %add)
+  ret void
+}
+
+define void @store-pre-indexed-word(%struct.word* %ptr, i32 %val) nounwind {
+; CHECK-LABEL: store-pre-indexed-word
+; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}, #32]!
+entry:
+  %a = getelementptr inbounds %struct.word* %ptr, i64 0, i32 1, i32 0
+  store i32 %val, i32* %a, align 4
+  br label %bar
+bar:
+  %c = getelementptr inbounds %struct.word* %ptr, i64 0, i32 1
+  tail call void @bar_word(%s.word* %c, i32 %val)
+  ret void
+}
+
+declare void @bar_doubleword(%s.doubleword*, i64)
+
+define void @load-pre-indexed-doubleword(%struct.doubleword* %ptr) nounwind {
+; CHECK-LABEL: load-pre-indexed-doubleword
+; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}, #32]!
+entry:
+  %a = getelementptr inbounds %struct.doubleword* %ptr, i64 0, i32 1, i32 0
+  %add = load i64* %a, align 4
+  br label %bar
+bar:
+  %c = getelementptr inbounds %struct.doubleword* %ptr, i64 0, i32 1
+  tail call void @bar_doubleword(%s.doubleword* %c, i64 %add)
+  ret void
+}
+
+define void @store-pre-indexed-doubleword(%struct.doubleword* %ptr, i64 %val) nounwind {
+; CHECK-LABEL: store-pre-indexed-doubleword
+; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}, #32]!
+entry:
+  %a = getelementptr inbounds %struct.doubleword* %ptr, i64 0, i32 1, i32 0
+  store i64 %val, i64* %a, align 4
+  br label %bar
+bar:
+  %c = getelementptr inbounds %struct.doubleword* %ptr, i64 0, i32 1
+  tail call void @bar_doubleword(%s.doubleword* %c, i64 %val)
+  ret void
+}
+
+declare void @bar_quadword(%s.quadword*, fp128)
+
+define void @load-pre-indexed-quadword(%struct.quadword* %ptr) nounwind {
+; CHECK-LABEL: load-pre-indexed-quadword
+; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+}}, #32]!
+entry:
+  %a = getelementptr inbounds %struct.quadword* %ptr, i64 0, i32 1, i32 0
+  %add = load fp128* %a, align 4
+  br label %bar
+bar:
+  %c = getelementptr inbounds %struct.quadword* %ptr, i64 0, i32 1
+  tail call void @bar_quadword(%s.quadword* %c, fp128 %add)
+  ret void
+}
+
+define void @store-pre-indexed-quadword(%struct.quadword* %ptr, fp128 %val) nounwind {
+; CHECK-LABEL: store-pre-indexed-quadword
+; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}, #32]!
+entry:
+  %a = getelementptr inbounds %struct.quadword* %ptr, i64 0, i32 1, i32 0
+  store fp128 %val, fp128* %a, align 4
+  br label %bar
+bar:
+  %c = getelementptr inbounds %struct.quadword* %ptr, i64 0, i32 1
+  tail call void @bar_quadword(%s.quadword* %c, fp128 %val)
+  ret void
+}
+
+declare void @bar_float(%s.float*, float)
+
+define void @load-pre-indexed-float(%struct.float* %ptr) nounwind {
+; CHECK-LABEL: load-pre-indexed-float
+; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+}}, #32]!
+entry:
+  %a = getelementptr inbounds %struct.float* %ptr, i64 0, i32 1, i32 0
+  %add = load float* %a, align 4
+  br label %bar
+bar:
+  %c = getelementptr inbounds %struct.float* %ptr, i64 0, i32 1
+  tail call void @bar_float(%s.float* %c, float %add)
+  ret void
+}
+
+define void @store-pre-indexed-float(%struct.float* %ptr, float %val) nounwind {
+; CHECK-LABEL: store-pre-indexed-float
+; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}, #32]!
+entry:
+  %a = getelementptr inbounds %struct.float* %ptr, i64 0, i32 1, i32 0
+  store float %val, float* %a, align 4
+  br label %bar
+bar:
+  %c = getelementptr inbounds %struct.float* %ptr, i64 0, i32 1
+  tail call void @bar_float(%s.float* %c, float %val)
+  ret void
+}
+
+declare void @bar_double(%s.double*, double)
+
+define void @load-pre-indexed-double(%struct.double* %ptr) nounwind {
+; CHECK-LABEL: load-pre-indexed-double
+; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+}}, #32]!
+entry:
+  %a = getelementptr inbounds %struct.double* %ptr, i64 0, i32 1, i32 0
+  %add = load double* %a, align 4
+  br label %bar
+bar:
+  %c = getelementptr inbounds %struct.double* %ptr, i64 0, i32 1
+  tail call void @bar_double(%s.double* %c, double %add)
+  ret void
+}
+
+define void @store-pre-indexed-double(%struct.double* %ptr, double %val) nounwind {
+; CHECK-LABEL: store-pre-indexed-double
+; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}, #32]!
+entry:
+  %a = getelementptr inbounds %struct.double* %ptr, i64 0, i32 1, i32 0
+  store double %val, double* %a, align 4
+  br label %bar
+bar:
+  %c = getelementptr inbounds %struct.double* %ptr, i64 0, i32 1
+  tail call void @bar_double(%s.double* %c, double %val)
+  ret void
+}
+
+; Check the following transform:
+;
+; ldr X, [x20]
+;  ...
+; add x20, x20, #32
+;  ->
+; ldr X, [x20], #32
+;
+; with X being either w0, x0, s0, d0 or q0.
+
+define void @load-post-indexed-word(i32* %array, i64 %count) nounwind {
+; CHECK-LABEL: load-post-indexed-word
+; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}], #16
+entry:
+  %gep1 = getelementptr i32* %array, i64 2
+  br label %body
+
+body:
+  %iv2 = phi i32* [ %gep3, %body ], [ %gep1, %entry ]
+  %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
+  %gep2 = getelementptr i32* %iv2, i64 -1
+  %load = load i32* %gep2
+  call void @use-word(i32 %load)
+  %load2 = load i32* %iv2
+  call void @use-word(i32 %load2)
+  %iv.next = add i64 %iv, -4
+  %gep3 = getelementptr i32* %iv2, i64 4
+  %cond = icmp eq i64 %iv.next, 0
+  br i1 %cond, label %exit, label %body
+
+exit:
+  ret void
+}
+
+define void @load-post-indexed-doubleword(i64* %array, i64 %count) nounwind {
+; CHECK-LABEL: load-post-indexed-doubleword
+; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}], #32
+entry:
+  %gep1 = getelementptr i64* %array, i64 2
+  br label %body
+
+body:
+  %iv2 = phi i64* [ %gep3, %body ], [ %gep1, %entry ]
+  %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
+  %gep2 = getelementptr i64* %iv2, i64 -1
+  %load = load i64* %gep2
+  call void @use-doubleword(i64 %load)
+  %load2 = load i64* %iv2
+  call void @use-doubleword(i64 %load2)
+  %iv.next = add i64 %iv, -4
+  %gep3 = getelementptr i64* %iv2, i64 4
+  %cond = icmp eq i64 %iv.next, 0
+  br i1 %cond, label %exit, label %body
+
+exit:
+  ret void
+}
+
+define void @load-post-indexed-quadword(<2 x i64>* %array, i64 %count) nounwind {
+; CHECK-LABEL: load-post-indexed-quadword
+; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+}}], #64
+entry:
+  %gep1 = getelementptr <2 x i64>* %array, i64 2
+  br label %body
+
+body:
+  %iv2 = phi <2 x i64>* [ %gep3, %body ], [ %gep1, %entry ]
+  %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
+  %gep2 = getelementptr <2 x i64>* %iv2, i64 -1
+  %load = load <2 x i64>* %gep2
+  call void @use-quadword(<2 x i64> %load)
+  %load2 = load <2 x i64>* %iv2
+  call void @use-quadword(<2 x i64> %load2)
+  %iv.next = add i64 %iv, -4
+  %gep3 = getelementptr <2 x i64>* %iv2, i64 4
+  %cond = icmp eq i64 %iv.next, 0
+  br i1 %cond, label %exit, label %body
+
+exit:
+  ret void
+}
+
+define void @load-post-indexed-float(float* %array, i64 %count) nounwind {
+; CHECK-LABEL: load-post-indexed-float
+; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+}}], #16
+entry:
+  %gep1 = getelementptr float* %array, i64 2
+  br label %body
+
+body:
+  %iv2 = phi float* [ %gep3, %body ], [ %gep1, %entry ]
+  %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
+  %gep2 = getelementptr float* %iv2, i64 -1
+  %load = load float* %gep2
+  call void @use-float(float %load)
+  %load2 = load float* %iv2
+  call void @use-float(float %load2)
+  %iv.next = add i64 %iv, -4
+  %gep3 = getelementptr float* %iv2, i64 4
+  %cond = icmp eq i64 %iv.next, 0
+  br i1 %cond, label %exit, label %body
+
+exit:
+  ret void
+}
+
+define void @load-post-indexed-double(double* %array, i64 %count) nounwind {
+; CHECK-LABEL: load-post-indexed-double
+; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+}}], #32
+entry:
+  %gep1 = getelementptr double* %array, i64 2
+  br label %body
+
+body:
+  %iv2 = phi double* [ %gep3, %body ], [ %gep1, %entry ]
+  %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
+  %gep2 = getelementptr double* %iv2, i64 -1
+  %load = load double* %gep2
+  call void @use-double(double %load)
+  %load2 = load double* %iv2
+  call void @use-double(double %load2)
+  %iv.next = add i64 %iv, -4
+  %gep3 = getelementptr double* %iv2, i64 4
+  %cond = icmp eq i64 %iv.next, 0
+  br i1 %cond, label %exit, label %body
+
+exit:
+  ret void
+}
+
+declare void @use-word(i32)
+declare void @use-doubleword(i64)
+declare void @use-quadword(<2 x i64>)
+declare void @use-float(float)
+declare void @use-double(double)
diff --git a/test/CodeGen/AArch64/ldst-regoffset.ll b/test/CodeGen/AArch64/ldst-regoffset.ll
index db30fd9..e2fa08b 100644
--- a/test/CodeGen/AArch64/ldst-regoffset.ll
+++ b/test/CodeGen/AArch64/ldst-regoffset.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
 
 @var_8bit = global i8 0
@@ -9,14 +9,14 @@
 @var_float = global float 0.0
 @var_double = global double 0.0
 
-define void @ldst_8bit(i8* %base, i32 %off32, i64 %off64) {
+define void @ldst_8bit(i8* %base, i32 %off32, i64 %off64) minsize {
 ; CHECK-LABEL: ldst_8bit:
 
    %addr8_sxtw = getelementptr i8* %base, i32 %off32
    %val8_sxtw = load volatile i8* %addr8_sxtw
    %val32_signed = sext i8 %val8_sxtw to i32
    store volatile i32 %val32_signed, i32* @var_32bit
-; CHECK: ldrsb {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
+; CHECK: ldrsb {{w[0-9]+}}, [{{x[0-9]+}}, {{[wx][0-9]+}}, sxtw]
 
   %addr_lsl = getelementptr i8* %base, i64 %off64
   %val8_lsl = load volatile i8* %addr_lsl
@@ -31,20 +31,20 @@ define void @ldst_8bit(i8* %base, i32 %off32, i64 %off64) {
   %val8_uxtw = load volatile i8* %addr_uxtw
   %newval8 = add i8 %val8_uxtw, 1
   store volatile i8 %newval8, i8* @var_8bit
-; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, uxtw]
 
    ret void
 }
 
 
-define void @ldst_16bit(i16* %base, i32 %off32, i64 %off64) {
+define void @ldst_16bit(i16* %base, i32 %off32, i64 %off64) minsize {
 ; CHECK-LABEL: ldst_16bit:
 
    %addr8_sxtwN = getelementptr i16* %base, i32 %off32
    %val8_sxtwN = load volatile i16* %addr8_sxtwN
    %val32_signed = sext i16 %val8_sxtwN to i32
    store volatile i32 %val32_signed, i32* @var_32bit
-; CHECK: ldrsh {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #1]
+; CHECK: ldrsh {{w[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw #1]
 
   %addr_lslN = getelementptr i16* %base, i64 %off64
   %val8_lslN = load volatile i16* %addr_lslN
@@ -59,7 +59,7 @@ define void @ldst_16bit(i16* %base, i32 %off32, i64 %off64) {
   %val8_uxtw = load volatile i16* %addr_uxtw
   %newval8 = add i16 %val8_uxtw, 1
   store volatile i16 %newval8, i16* @var_16bit
-; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
+; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, uxtw]
 
   %base_sxtw = ptrtoint i16* %base to i64
   %offset_sxtw = sext i32 %off32 to i64
@@ -68,7 +68,7 @@ define void @ldst_16bit(i16* %base, i32 %off32, i64 %off64) {
   %val16_sxtw = load volatile i16* %addr_sxtw
   %val64_signed = sext i16 %val16_sxtw to i64
   store volatile i64 %val64_signed, i64* @var_64bit
-; CHECK: ldrsh {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
+; CHECK: ldrsh {{x[0-9]+}}, [{{x[0-9]+}}, {{[wx][0-9]+}}, sxtw]
 
 
   %base_lsl = ptrtoint i16* %base to i64
@@ -87,17 +87,17 @@ define void @ldst_16bit(i16* %base, i32 %off32, i64 %off64) {
   %val32 = load volatile i32* @var_32bit
   %val16_trunc32 = trunc i32 %val32 to i16
   store volatile i16 %val16_trunc32, i16* %addr_uxtwN
-; CHECK: strh {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #1]
+; CHECK: strh {{w[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, uxtw #1]
    ret void
 }
 
-define void @ldst_32bit(i32* %base, i32 %off32, i64 %off64) {
+define void @ldst_32bit(i32* %base, i32 %off32, i64 %off64) minsize {
 ; CHECK-LABEL: ldst_32bit:
 
    %addr_sxtwN = getelementptr i32* %base, i32 %off32
    %val_sxtwN = load volatile i32* %addr_sxtwN
    store volatile i32 %val_sxtwN, i32* @var_32bit
-; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #2]
+; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw #2]
 
   %addr_lslN = getelementptr i32* %base, i64 %off64
   %val_lslN = load volatile i32* %addr_lslN
@@ -111,7 +111,7 @@ define void @ldst_32bit(i32* %base, i32 %off32, i64 %off64) {
   %val_uxtw = load volatile i32* %addr_uxtw
   %newval8 = add i32 %val_uxtw, 1
   store volatile i32 %newval8, i32* @var_32bit
-; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
+; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, uxtw]
 
 
   %base_sxtw = ptrtoint i32* %base to i64
@@ -121,7 +121,7 @@ define void @ldst_32bit(i32* %base, i32 %off32, i64 %off64) {
   %val16_sxtw = load volatile i32* %addr_sxtw
   %val64_signed = sext i32 %val16_sxtw to i64
   store volatile i64 %val64_signed, i64* @var_64bit
-; CHECK: ldrsw {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
+; CHECK: ldrsw {{x[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw]
 
 
   %base_lsl = ptrtoint i32* %base to i64
@@ -139,17 +139,17 @@ define void @ldst_32bit(i32* %base, i32 %off32, i64 %off64) {
   %addr_uxtwN = inttoptr i64 %addrint_uxtwN to i32*
   %val32 = load volatile i32* @var_32bit
   store volatile i32 %val32, i32* %addr_uxtwN
-; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #2]
+; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, uxtw #2]
    ret void
 }
 
-define void @ldst_64bit(i64* %base, i32 %off32, i64 %off64) {
+define void @ldst_64bit(i64* %base, i32 %off32, i64 %off64) minsize {
 ; CHECK-LABEL: ldst_64bit:
 
    %addr_sxtwN = getelementptr i64* %base, i32 %off32
    %val_sxtwN = load volatile i64* %addr_sxtwN
    store volatile i64 %val_sxtwN, i64* @var_64bit
-; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #3]
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw #3]
 
   %addr_lslN = getelementptr i64* %base, i64 %off64
   %val_lslN = load volatile i64* %addr_lslN
@@ -163,7 +163,7 @@ define void @ldst_64bit(i64* %base, i32 %off32, i64 %off64) {
   %val8_uxtw = load volatile i64* %addr_uxtw
   %newval8 = add i64 %val8_uxtw, 1
   store volatile i64 %newval8, i64* @var_64bit
-; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, uxtw]
 
   %base_sxtw = ptrtoint i64* %base to i64
   %offset_sxtw = sext i32 %off32 to i64
@@ -171,7 +171,7 @@ define void @ldst_64bit(i64* %base, i32 %off32, i64 %off64) {
   %addr_sxtw = inttoptr i64 %addrint_sxtw to i64*
   %val64_sxtw = load volatile i64* %addr_sxtw
   store volatile i64 %val64_sxtw, i64* @var_64bit
-; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw]
 
   %base_lsl = ptrtoint i64* %base to i64
   %addrint_lsl = add i64 %base_lsl, %off64
@@ -187,17 +187,17 @@ define void @ldst_64bit(i64* %base, i32 %off32, i64 %off64) {
   %addr_uxtwN = inttoptr i64 %addrint_uxtwN to i64*
   %val64 = load volatile i64* @var_64bit
   store volatile i64 %val64, i64* %addr_uxtwN
-; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #3]
+; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, uxtw #3]
    ret void
 }
 
-define void @ldst_float(float* %base, i32 %off32, i64 %off64) {
+define void @ldst_float(float* %base, i32 %off32, i64 %off64) minsize {
 ; CHECK-LABEL: ldst_float:
 
    %addr_sxtwN = getelementptr float* %base, i32 %off32
    %val_sxtwN = load volatile float* %addr_sxtwN
    store volatile float %val_sxtwN, float* @var_float
-; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #2]
+; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw #2]
 ; CHECK-NOFP-NOT: ldr {{s[0-9]+}},
 
   %addr_lslN = getelementptr float* %base, i64 %off64
@@ -212,7 +212,7 @@ define void @ldst_float(float* %base, i32 %off32, i64 %off64) {
   %addr_uxtw = inttoptr i64 %addrint1_uxtw to float*
   %val_uxtw = load volatile float* %addr_uxtw
   store volatile float %val_uxtw, float* @var_float
-; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
+; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, uxtw]
 ; CHECK-NOFP-NOT: ldr {{s[0-9]+}},
 
   %base_sxtw = ptrtoint float* %base to i64
@@ -221,7 +221,7 @@ define void @ldst_float(float* %base, i32 %off32, i64 %off64) {
   %addr_sxtw = inttoptr i64 %addrint_sxtw to float*
   %val64_sxtw = load volatile float* %addr_sxtw
   store volatile float %val64_sxtw, float* @var_float
-; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
+; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw]
 ; CHECK-NOFP-NOT: ldr {{s[0-9]+}},
 
   %base_lsl = ptrtoint float* %base to i64
@@ -239,18 +239,18 @@ define void @ldst_float(float* %base, i32 %off32, i64 %off64) {
   %addr_uxtwN = inttoptr i64 %addrint_uxtwN to float*
   %val64 = load volatile float* @var_float
   store volatile float %val64, float* %addr_uxtwN
-; CHECK: str {{s[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #2]
+; CHECK: str {{s[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, uxtw #2]
 ; CHECK-NOFP-NOT: ldr {{s[0-9]+}},
    ret void
 }
 
-define void @ldst_double(double* %base, i32 %off32, i64 %off64) {
+define void @ldst_double(double* %base, i32 %off32, i64 %off64) minsize {
 ; CHECK-LABEL: ldst_double:
 
    %addr_sxtwN = getelementptr double* %base, i32 %off32
    %val_sxtwN = load volatile double* %addr_sxtwN
    store volatile double %val_sxtwN, double* @var_double
-; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #3]
+; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw #3]
 ; CHECK-NOFP-NOT: ldr {{d[0-9]+}},
 
   %addr_lslN = getelementptr double* %base, i64 %off64
@@ -265,7 +265,7 @@ define void @ldst_double(double* %base, i32 %off32, i64 %off64) {
   %addr_uxtw = inttoptr i64 %addrint1_uxtw to double*
   %val_uxtw = load volatile double* %addr_uxtw
   store volatile double %val_uxtw, double* @var_double
-; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
+; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, uxtw]
 ; CHECK-NOFP-NOT: ldr {{d[0-9]+}},
 
   %base_sxtw = ptrtoint double* %base to i64
@@ -274,7 +274,7 @@ define void @ldst_double(double* %base, i32 %off32, i64 %off64) {
   %addr_sxtw = inttoptr i64 %addrint_sxtw to double*
   %val64_sxtw = load volatile double* %addr_sxtw
   store volatile double %val64_sxtw, double* @var_double
-; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
+; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw]
 ; CHECK-NOFP-NOT: ldr {{d[0-9]+}},
 
   %base_lsl = ptrtoint double* %base to i64
@@ -292,26 +292,26 @@ define void @ldst_double(double* %base, i32 %off32, i64 %off64) {
   %addr_uxtwN = inttoptr i64 %addrint_uxtwN to double*
   %val64 = load volatile double* @var_double
   store volatile double %val64, double* %addr_uxtwN
-; CHECK: str {{d[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #3]
+; CHECK: str {{d[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, uxtw #3]
 ; CHECK-NOFP-NOT: ldr {{d[0-9]+}},
    ret void
 }
 
 
-define void @ldst_128bit(fp128* %base, i32 %off32, i64 %off64) {
+define void @ldst_128bit(fp128* %base, i32 %off32, i64 %off64) minsize {
 ; CHECK-LABEL: ldst_128bit:
 
    %addr_sxtwN = getelementptr fp128* %base, i32 %off32
    %val_sxtwN = load volatile fp128* %addr_sxtwN
    store volatile fp128 %val_sxtwN, fp128* %base
-; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #4]
-; CHECK-NOFP-NOT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #4]
+; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw #4]
+; CHECK-NOFP-NOT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw #4]
 
   %addr_lslN = getelementptr fp128* %base, i64 %off64
   %val_lslN = load volatile fp128* %addr_lslN
   store volatile fp128 %val_lslN, fp128* %base
 ; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}, lsl #4]
-; CHECK-NOFP-NOT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #4]
+; CHECK-NOFP-NOT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw #4]
 
   %addrint_uxtw = ptrtoint fp128* %base to i64
   %offset_uxtw = zext i32 %off32 to i64
@@ -319,8 +319,8 @@ define void @ldst_128bit(fp128* %base, i32 %off32, i64 %off64) {
   %addr_uxtw = inttoptr i64 %addrint1_uxtw to fp128*
   %val_uxtw = load volatile fp128* %addr_uxtw
   store volatile fp128 %val_uxtw, fp128* %base
-; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
-; CHECK-NOFP-NOT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #4]
+; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, uxtw]
+; CHECK-NOFP-NOT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw #4]
 
   %base_sxtw = ptrtoint fp128* %base to i64
   %offset_sxtw = sext i32 %off32 to i64
@@ -328,8 +328,8 @@ define void @ldst_128bit(fp128* %base, i32 %off32, i64 %off64) {
   %addr_sxtw = inttoptr i64 %addrint_sxtw to fp128*
   %val64_sxtw = load volatile fp128* %addr_sxtw
   store volatile fp128 %val64_sxtw, fp128* %base
-; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
-; CHECK-NOFP-NOT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #4]
+; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw]
+; CHECK-NOFP-NOT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw #4]
 
   %base_lsl = ptrtoint fp128* %base to i64
   %addrint_lsl = add i64 %base_lsl, %off64
@@ -337,7 +337,7 @@ define void @ldst_128bit(fp128* %base, i32 %off32, i64 %off64) {
   %val64_lsl = load volatile fp128* %addr_lsl
   store volatile fp128 %val64_lsl, fp128* %base
 ; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}]
-; CHECK-NOFP-NOT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #4]
+; CHECK-NOFP-NOT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw #4]
 
   %base_uxtwN = ptrtoint fp128* %base to i64
   %offset_uxtwN = zext i32 %off32 to i64
@@ -346,7 +346,7 @@ define void @ldst_128bit(fp128* %base, i32 %off32, i64 %off64) {
   %addr_uxtwN = inttoptr i64 %addrint_uxtwN to fp128*
   %val64 = load volatile fp128* %base
   store volatile fp128 %val64, fp128* %addr_uxtwN
-; CHECK: str {{q[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #4]
-; CHECK-NOFP-NOT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #4]
+; CHECK: str {{q[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, uxtw #4]
+; CHECK-NOFP-NOT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw #4]
    ret void
 }
diff --git a/test/CodeGen/AArch64/ldst-unscaledimm.ll b/test/CodeGen/AArch64/ldst-unscaledimm.ll
index bea5bb5..1de8443 100644
--- a/test/CodeGen/AArch64/ldst-unscaledimm.ll
+++ b/test/CodeGen/AArch64/ldst-unscaledimm.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
 
 @var_8bit = global i8 0
@@ -160,7 +160,7 @@ define void @ldst_32bit() {
   %val64_unsigned = zext i32 %val32_zext to i64
   store volatile i64 %val64_unsigned, i64* @var_64bit
 ; CHECK: ldur {{w[0-9]+}}, [{{x[0-9]+}}, #-256]
-; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_64bit]
+; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_64bit]
 
 ; Sign-extension to 64-bits
   %addr32_8_sext = getelementptr i8* %addr_8bit, i64 -12
@@ -169,7 +169,7 @@ define void @ldst_32bit() {
   %val64_signed = sext i32 %val32_sext to i64
   store volatile i64 %val64_signed, i64* @var_64bit
 ; CHECK: ldursw {{x[0-9]+}}, [{{x[0-9]+}}, #-12]
-; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_64bit]
+; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_64bit]
 
 ; Truncation from 64-bits
   %addr64_8_trunc = getelementptr i8* %addr_8bit, i64 255
diff --git a/test/CodeGen/AArch64/ldst-unsignedimm.ll b/test/CodeGen/AArch64/ldst-unsignedimm.ll
index 44c1586..e171d22 100644
--- a/test/CodeGen/AArch64/ldst-unsignedimm.ll
+++ b/test/CodeGen/AArch64/ldst-unsignedimm.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
 
 @var_8bit = global i8 0
@@ -20,25 +20,25 @@ define void @ldst_8bit() {
    %val32_signed = sext i8 %val8_sext32 to i32
    store volatile i32 %val32_signed, i32* @var_32bit
 ; CHECK: adrp {{x[0-9]+}}, var_8bit
-; CHECK: ldrsb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_8bit]
+; CHECK: ldrsb {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_8bit]
 
 ; match a zero-extending load volatile 8-bit -> 32-bit
   %val8_zext32 = load volatile i8* @var_8bit
   %val32_unsigned = zext i8 %val8_zext32 to i32
   store volatile i32 %val32_unsigned, i32* @var_32bit
-; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_8bit]
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_8bit]
 
 ; match an any-extending load volatile 8-bit -> 32-bit
   %val8_anyext = load volatile i8* @var_8bit
   %newval8 = add i8 %val8_anyext, 1
   store volatile i8 %newval8, i8* @var_8bit
-; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_8bit]
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_8bit]
 
 ; match a sign-extending load volatile 8-bit -> 64-bit
   %val8_sext64 = load volatile i8* @var_8bit
   %val64_signed = sext i8 %val8_sext64 to i64
   store volatile i64 %val64_signed, i64* @var_64bit
-; CHECK: ldrsb {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_8bit]
+; CHECK: ldrsb {{x[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_8bit]
 
 ; match a zero-extending load volatile 8-bit -> 64-bit.
 ; This uses the fact that ldrb w0, [x0] will zero out the high 32-bits
@@ -46,19 +46,19 @@ define void @ldst_8bit() {
   %val8_zext64 = load volatile i8* @var_8bit
   %val64_unsigned = zext i8 %val8_zext64 to i64
   store volatile i64 %val64_unsigned, i64* @var_64bit
-; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_8bit]
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_8bit]
 
 ; truncating store volatile 32-bits to 8-bits
   %val32 = load volatile i32* @var_32bit
   %val8_trunc32 = trunc i32 %val32 to i8
   store volatile i8 %val8_trunc32, i8* @var_8bit
-; CHECK: strb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_8bit]
+; CHECK: strb {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_8bit]
 
 ; truncating store volatile 64-bits to 8-bits
   %val64 = load volatile i64* @var_64bit
   %val8_trunc64 = trunc i64 %val64 to i8
   store volatile i8 %val8_trunc64, i8* @var_8bit
-; CHECK: strb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_8bit]
+; CHECK: strb {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_8bit]
 
    ret void
 }
@@ -74,25 +74,25 @@ define void @ldst_16bit() {
   %val32_signed = sext i16 %val16_sext32 to i32
   store volatile i32 %val32_signed, i32* @var_32bit
 ; CHECK: adrp {{x[0-9]+}}, var_16bit
-; CHECK: ldrsh {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_16bit]
+; CHECK: ldrsh {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_16bit]
 
 ; match a zero-extending load volatile 16-bit -> 32-bit
   %val16_zext32 = load volatile i16* @var_16bit
   %val32_unsigned = zext i16 %val16_zext32 to i32
   store volatile i32 %val32_unsigned, i32* @var_32bit
-; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_16bit]
+; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_16bit]
 
 ; match an any-extending load volatile 16-bit -> 32-bit
   %val16_anyext = load volatile i16* @var_16bit
   %newval16 = add i16 %val16_anyext, 1
   store volatile i16 %newval16, i16* @var_16bit
-; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_16bit]
+; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_16bit]
 
 ; match a sign-extending load volatile 16-bit -> 64-bit
   %val16_sext64 = load volatile i16* @var_16bit
   %val64_signed = sext i16 %val16_sext64 to i64
   store volatile i64 %val64_signed, i64* @var_64bit
-; CHECK: ldrsh {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_16bit]
+; CHECK: ldrsh {{x[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_16bit]
 
 ; match a zero-extending load volatile 16-bit -> 64-bit.
 ; This uses the fact that ldrb w0, [x0] will zero out the high 32-bits
@@ -100,19 +100,19 @@ define void @ldst_16bit() {
   %val16_zext64 = load volatile i16* @var_16bit
   %val64_unsigned = zext i16 %val16_zext64 to i64
   store volatile i64 %val64_unsigned, i64* @var_64bit
-; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_16bit]
+; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_16bit]
 
 ; truncating store volatile 32-bits to 16-bits
   %val32 = load volatile i32* @var_32bit
   %val16_trunc32 = trunc i32 %val32 to i16
   store volatile i16 %val16_trunc32, i16* @var_16bit
-; CHECK: strh {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_16bit]
+; CHECK: strh {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_16bit]
 
 ; truncating store volatile 64-bits to 16-bits
   %val64 = load volatile i64* @var_64bit
   %val16_trunc64 = trunc i64 %val64 to i16
   store volatile i16 %val16_trunc64, i16* @var_16bit
-; CHECK: strh {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_16bit]
+; CHECK: strh {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_16bit]
 
   ret void
 }
@@ -124,29 +124,29 @@ define void @ldst_32bit() {
   %val32_noext = load volatile i32* @var_32bit
   store volatile i32 %val32_noext, i32* @var_32bit
 ; CHECK: adrp {{x[0-9]+}}, var_32bit
-; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_32bit]
-; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_32bit]
+; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_32bit]
+; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_32bit]
 
 ; Zero-extension to 64-bits
   %val32_zext = load volatile i32* @var_32bit
   %val64_unsigned = zext i32 %val32_zext to i64
   store volatile i64 %val64_unsigned, i64* @var_64bit
-; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_32bit]
-; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_64bit]
+; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_32bit]
+; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_64bit]
 
 ; Sign-extension to 64-bits
   %val32_sext = load volatile i32* @var_32bit
   %val64_signed = sext i32 %val32_sext to i64
   store volatile i64 %val64_signed, i64* @var_64bit
-; CHECK: ldrsw {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_32bit]
-; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_64bit]
+; CHECK: ldrsw {{x[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_32bit]
+; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_64bit]
 
 ; Truncation from 64-bits
   %val64_trunc = load volatile i64* @var_64bit
   %val32_trunc = trunc i64 %val64_trunc to i32
   store volatile i32 %val32_trunc, i32* @var_32bit
-; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_64bit]
-; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_32bit]
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_64bit]
+; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_32bit]
 
   ret void
 }
@@ -165,7 +165,7 @@ define void @ldst_complex_offsets() {
 ; CHECK: ldst_complex_offsets
   %arr8_addr = load volatile i8** @arr8
 ; CHECK: adrp {{x[0-9]+}}, arr8
-; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:arr8]
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:arr8]
 
   %arr8_sub1_addr = getelementptr i8* %arr8_addr, i64 1
   %arr8_sub1 = load volatile i8* %arr8_sub1_addr
@@ -180,7 +180,7 @@ define void @ldst_complex_offsets() {
 
   %arr16_addr = load volatile i16** @arr16
 ; CHECK: adrp {{x[0-9]+}}, arr16
-; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:arr16]
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:arr16]
 
   %arr16_sub1_addr = getelementptr i16* %arr16_addr, i64 1
   %arr16_sub1 = load volatile i16* %arr16_sub1_addr
@@ -195,7 +195,7 @@ define void @ldst_complex_offsets() {
 
   %arr32_addr = load volatile i32** @arr32
 ; CHECK: adrp {{x[0-9]+}}, arr32
-; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:arr32]
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:arr32]
 
   %arr32_sub1_addr = getelementptr i32* %arr32_addr, i64 1
   %arr32_sub1 = load volatile i32* %arr32_sub1_addr
@@ -210,7 +210,7 @@ define void @ldst_complex_offsets() {
 
   %arr64_addr = load volatile i64** @arr64
 ; CHECK: adrp {{x[0-9]+}}, arr64
-; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:arr64]
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:arr64]
 
   %arr64_sub1_addr = getelementptr i64* %arr64_addr, i64 1
   %arr64_sub1 = load volatile i64* %arr64_sub1_addr
@@ -230,11 +230,11 @@ define void @ldst_float() {
 
    %valfp = load volatile float* @var_float
 ; CHECK: adrp {{x[0-9]+}}, var_float
-; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_float]
+; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_float]
 ; CHECK-NOFP-NOT: ldr {{s[0-9]+}},
 
   store volatile float %valfp, float* @var_float
-; CHECK: str {{s[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_float]
+; CHECK: str {{s[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_float]
 ; CHECK-NOFP-NOT: str {{s[0-9]+}},
 
    ret void
@@ -245,11 +245,11 @@ define void @ldst_double() {
 
    %valfp = load volatile double* @var_double
 ; CHECK: adrp {{x[0-9]+}}, var_double
-; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_double]
+; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_double]
 ; CHECK-NOFP-NOT: ldr {{d[0-9]+}},
 
   store volatile double %valfp, double* @var_double
-; CHECK: str {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_double]
+; CHECK: str {{d[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_double]
 ; CHECK-NOFP-NOT: str {{d[0-9]+}},
 
    ret void
diff --git a/test/CodeGen/AArch64/lit.local.cfg b/test/CodeGen/AArch64/lit.local.cfg
index 9a66a00..77493d8 100644
--- a/test/CodeGen/AArch64/lit.local.cfg
+++ b/test/CodeGen/AArch64/lit.local.cfg
@@ -1,4 +1,11 @@
+import re
+
+config.suffixes = ['.ll']
+
 targets = set(config.root.targets_to_build.split())
 if not 'AArch64' in targets:
     config.unsupported = True
 
+# For now we don't test arm64-win32.
+if re.search(r'cygwin|mingw32|win32', config.target_triple):
+    config.unsupported = True
diff --git a/test/CodeGen/AArch64/literal_pools.ll b/test/CodeGen/AArch64/literal_pools.ll
deleted file mode 100644
index fc33aee..0000000
--- a/test/CodeGen/AArch64/literal_pools.ll
+++ /dev/null
@@ -1,103 +0,0 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -code-model=large | FileCheck --check-prefix=CHECK-LARGE %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -code-model=large -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP-LARGE %s
-
-@var32 = global i32 0
-@var64 = global i64 0
-
-define void @foo() {
-; CHECK-LABEL: foo:
-    %val32 = load i32* @var32
-    %val64 = load i64* @var64
-
-    %val32_lit32 = and i32 %val32, 123456785
-    store volatile i32 %val32_lit32, i32* @var32
-; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI0_[0-9]+]]
-; CHECK: ldr {{w[0-9]+}}, [x[[LITBASE]], #:lo12:[[CURLIT]]]
-
-; CHECK-LARGE: movz x[[LITADDR:[0-9]+]], #:abs_g3:[[CURLIT:.LCPI0_[0-9]+]]
-; CHECK-LARGE: movk x[[LITADDR]], #:abs_g2_nc:[[CURLIT]]
-; CHECK-LARGE: movk x[[LITADDR]], #:abs_g1_nc:[[CURLIT]]
-; CHECK-LARGE: movk x[[LITADDR]], #:abs_g0_nc:[[CURLIT]]
-; CHECK-LARGE: ldr {{w[0-9]+}}, [x[[LITADDR]]]
-
-    %val64_lit32 = and i64 %val64, 305402420
-    store volatile i64 %val64_lit32, i64* @var64
-; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI0_[0-9]+]]
-; CHECK: ldr {{w[0-9]+}}, [x[[LITBASE]], #:lo12:[[CURLIT]]]
-
-; CHECK-LARGE: movz x[[LITADDR:[0-9]+]], #:abs_g3:[[CURLIT:.LCPI0_[0-9]+]]
-; CHECK-LARGE: movk x[[LITADDR]], #:abs_g2_nc:[[CURLIT]]
-; CHECK-LARGE: movk x[[LITADDR]], #:abs_g1_nc:[[CURLIT]]
-; CHECK-LARGE: movk x[[LITADDR]], #:abs_g0_nc:[[CURLIT]]
-; CHECK-LARGE: ldr {{w[0-9]+}}, [x[[LITADDR]]]
-
-    %val64_lit32signed = and i64 %val64, -12345678
-    store volatile i64 %val64_lit32signed, i64* @var64
-; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI0_[0-9]+]]
-; CHECK: ldrsw {{x[0-9]+}}, [x[[LITBASE]], #:lo12:[[CURLIT]]]
-
-; CHECK-LARGE: movz x[[LITADDR:[0-9]+]], #:abs_g3:[[CURLIT:.LCPI0_[0-9]+]]
-; CHECK-LARGE: movk x[[LITADDR]], #:abs_g2_nc:[[CURLIT]]
-; CHECK-LARGE: movk x[[LITADDR]], #:abs_g1_nc:[[CURLIT]]
-; CHECK-LARGE: movk x[[LITADDR]], #:abs_g0_nc:[[CURLIT]]
-; CHECK-LARGE: ldrsw {{x[0-9]+}}, [x[[LITADDR]]]
-
-    %val64_lit64 = and i64 %val64, 1234567898765432
-    store volatile i64 %val64_lit64, i64* @var64
-; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI0_[0-9]+]]
-; CHECK: ldr {{x[0-9]+}}, [x[[LITBASE]], #:lo12:[[CURLIT]]]
-
-; CHECK-LARGE: movz x[[LITADDR:[0-9]+]], #:abs_g3:[[CURLIT:.LCPI0_[0-9]+]]
-; CHECK-LARGE: movk x[[LITADDR]], #:abs_g2_nc:[[CURLIT]]
-; CHECK-LARGE: movk x[[LITADDR]], #:abs_g1_nc:[[CURLIT]]
-; CHECK-LARGE: movk x[[LITADDR]], #:abs_g0_nc:[[CURLIT]]
-; CHECK-LARGE: ldr {{x[0-9]+}}, [x[[LITADDR]]]
-
-    ret void
-}
-
-@varfloat = global float 0.0
-@vardouble = global double 0.0
-
-define void @floating_lits() {
-; CHECK-LABEL: floating_lits:
-
-  %floatval = load float* @varfloat
-  %newfloat = fadd float %floatval, 128.0
-; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI1_[0-9]+]]
-; CHECK: ldr [[LIT128:s[0-9]+]], [x[[LITBASE]], #:lo12:[[CURLIT]]]
-; CHECK-NOFP-NOT: ldr {{s[0-9]+}},
-
-; CHECK-LARGE: movz x[[LITADDR:[0-9]+]], #:abs_g3:[[CURLIT:.LCPI1_[0-9]+]]
-; CHECK-LARGE: movk x[[LITADDR]], #:abs_g2_nc:[[CURLIT]]
-; CHECK-LARGE: movk x[[LITADDR]], #:abs_g1_nc:[[CURLIT]]
-; CHECK-LARGE: movk x[[LITADDR]], #:abs_g0_nc:[[CURLIT]]
-; CHECK-LARGE: ldr {{s[0-9]+}}, [x[[LITADDR]]]
-; CHECK-LARGE: fadd
-; CHECK-NOFP-LARGE-NOT: ldr {{s[0-9]+}},
-; CHECK-NOFP-LARGE-NOT: fadd
-
-  store float %newfloat, float* @varfloat
-
-  %doubleval = load double* @vardouble
-  %newdouble = fadd double %doubleval, 129.0
-; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI1_[0-9]+]]
-; CHECK: ldr [[LIT129:d[0-9]+]], [x[[LITBASE]], #:lo12:[[CURLIT]]]
-; CHECK: fadd {{s[0-9]+}}, {{s[0-9]+}}, [[LIT128]]
-; CHECK: fadd {{d[0-9]+}}, {{d[0-9]+}}, [[LIT129]]
-; CHECK-NOFP-NOT: ldr {{d[0-9]+}},
-; CHECK-NOFP-NOT: fadd
-
-; CHECK-LARGE: movz x[[LITADDR:[0-9]+]], #:abs_g3:[[CURLIT:.LCPI1_[0-9]+]]
-; CHECK-LARGE: movk x[[LITADDR]], #:abs_g2_nc:[[CURLIT]]
-; CHECK-LARGE: movk x[[LITADDR]], #:abs_g1_nc:[[CURLIT]]
-; CHECK-LARGE: movk x[[LITADDR]], #:abs_g0_nc:[[CURLIT]]
-; CHECK-LARGE: ldr {{d[0-9]+}}, [x[[LITADDR]]]
-; CHECK-NOFP-LARGE-NOT: ldr {{d[0-9]+}},
-
-  store double %newdouble, double* @vardouble
-
-  ret void
-}
diff --git a/test/CodeGen/AArch64/literal_pools_float.ll b/test/CodeGen/AArch64/literal_pools_float.ll
new file mode 100644
index 0000000..e53b8b6
--- /dev/null
+++ b/test/CodeGen/AArch64/literal_pools_float.ll
@@ -0,0 +1,46 @@
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu -mcpu=cyclone | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu -code-model=large -mcpu=cyclone | FileCheck --check-prefix=CHECK-LARGE %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -code-model=large -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP-LARGE %s
+
+@varfloat = global float 0.0
+@vardouble = global double 0.0
+
+define void @floating_lits() {
+; CHECK-LABEL: floating_lits:
+
+  %floatval = load float* @varfloat
+  %newfloat = fadd float %floatval, 128.0
+; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI[0-9]+_[0-9]+]]
+; CHECK: ldr [[LIT128:s[0-9]+]], [x[[LITBASE]], {{#?}}:lo12:[[CURLIT]]]
+; CHECK-NOFP-NOT: ldr {{s[0-9]+}},
+
+; CHECK-LARGE: movz x[[LITADDR:[0-9]+]], #:abs_g3:[[CURLIT:.LCPI[0-9]+_[0-9]+]]
+; CHECK-LARGE: movk x[[LITADDR]], #:abs_g2_nc:[[CURLIT]]
+; CHECK-LARGE: movk x[[LITADDR]], #:abs_g1_nc:[[CURLIT]]
+; CHECK-LARGE: movk x[[LITADDR]], #:abs_g0_nc:[[CURLIT]]
+; CHECK-LARGE: ldr {{s[0-9]+}}, [x[[LITADDR]]]
+; CHECK-LARGE: fadd
+; CHECK-NOFP-LARGE-NOT: ldr {{s[0-9]+}},
+; CHECK-NOFP-LARGE-NOT: fadd
+
+  store float %newfloat, float* @varfloat
+
+  %doubleval = load double* @vardouble
+  %newdouble = fadd double %doubleval, 129.0
+; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI[0-9]+_[0-9]+]]
+; CHECK: ldr [[LIT129:d[0-9]+]], [x[[LITBASE]], {{#?}}:lo12:[[CURLIT]]]
+; CHECK-NOFP-NOT: ldr {{d[0-9]+}},
+; CHECK-NOFP-NOT: fadd
+
+; CHECK-LARGE: movz x[[LITADDR:[0-9]+]], #:abs_g3:[[CURLIT:.LCPI[0-9]+_[0-9]+]]
+; CHECK-LARGE: movk x[[LITADDR]], #:abs_g2_nc:[[CURLIT]]
+; CHECK-LARGE: movk x[[LITADDR]], #:abs_g1_nc:[[CURLIT]]
+; CHECK-LARGE: movk x[[LITADDR]], #:abs_g0_nc:[[CURLIT]]
+; CHECK-LARGE: ldr {{d[0-9]+}}, [x[[LITADDR]]]
+; CHECK-NOFP-LARGE-NOT: ldr {{d[0-9]+}},
+
+  store double %newdouble, double* @vardouble
+
+  ret void
+}
diff --git a/test/CodeGen/AArch64/local_vars.ll b/test/CodeGen/AArch64/local_vars.ll
index b5cef85..2f5b9f2 100644
--- a/test/CodeGen/AArch64/local_vars.ll
+++ b/test/CodeGen/AArch64/local_vars.ll
@@ -1,5 +1,5 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -O0 | FileCheck %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -O0 -disable-fp-elim | FileCheck -check-prefix CHECK-WITHFP %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -disable-fp-elim | FileCheck -check-prefix CHECK-WITHFP-ARM64 %s
 
 ; Make sure a reasonably sane prologue and epilogue are
 ; generated. This test is not robust in the face of an frame-handling
@@ -16,7 +16,7 @@
 declare void @foo()
 
 define void @trivial_func() nounwind {
-; CHECK: trivial_func: // @trivial_func
+; CHECK-LABEL: trivial_func: // @trivial_func
 ; CHECK-NEXT: // BB#0
 ; CHECK-NEXT: ret
 
@@ -24,11 +24,14 @@ define void @trivial_func() nounwind {
 }
 
 define void @trivial_fp_func() {
-; CHECK-WITHFP-LABEL: trivial_fp_func:
+; CHECK-WITHFP-AARCH64-LABEL: trivial_fp_func:
+; CHECK-WITHFP-AARCH64: sub sp, sp, #16
+; CHECK-WITHFP-AARCH64: stp x29, x30, [sp]
+; CHECK-WITHFP-AARCH64-NEXT: mov x29, sp
 
-; CHECK-WITHFP: sub sp, sp, #16
-; CHECK-WITHFP: stp x29, x30, [sp]
-; CHECK-WITHFP-NEXT: mov x29, sp
+; CHECK-WITHFP-ARM64-LABEL: trivial_fp_func:
+; CHECK-WITHFP-ARM64: stp x29, x30, [sp, #-16]!
+; CHECK-WITHFP-ARM64-NEXT: mov x29, sp
 
 ; Dont't really care, but it would be a Bad Thing if this came after the epilogue.
 ; CHECK: bl foo
@@ -48,10 +51,10 @@ define void @stack_local() {
 
   %val = load i64* @var
   store i64 %val, i64* %local_var
-; CHECK: str {{x[0-9]+}}, [sp, #{{[0-9]+}}]
+; CHECK-DAG: str {{x[0-9]+}}, [sp, #{{[0-9]+}}]
 
   store i64* %local_var, i64** @local_addr
-; CHECK: add {{x[0-9]+}}, sp, #{{[0-9]+}}
+; CHECK-DAG: add {{x[0-9]+}}, sp, #{{[0-9]+}}
 
   ret void
 }
diff --git a/test/CodeGen/AArch64/logical-imm.ll b/test/CodeGen/AArch64/logical-imm.ll
index e04bb51..a5e4a99 100644
--- a/test/CodeGen/AArch64/logical-imm.ll
+++ b/test/CodeGen/AArch64/logical-imm.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s
 
 @var32 = global i32 0
 @var64 = global i64 0
diff --git a/test/CodeGen/AArch64/logical_shifted_reg.ll b/test/CodeGen/AArch64/logical_shifted_reg.ll
index a08ba20..b249d72 100644
--- a/test/CodeGen/AArch64/logical_shifted_reg.ll
+++ b/test/CodeGen/AArch64/logical_shifted_reg.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -O0 | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
 
 @var1_32 = global i32 0
 @var2_32 = global i32 0
@@ -6,7 +6,7 @@
 @var1_64 = global i64 0
 @var2_64 = global i64 0
 
-define void @logical_32bit() {
+define void @logical_32bit() minsize {
 ; CHECK-LABEL: logical_32bit:
   %val1 = load i32* @var1_32
   %val2 = load i32* @var2_32
@@ -96,7 +96,7 @@ define void @logical_32bit() {
   ret void
 }
 
-define void @logical_64bit() {
+define void @logical_64bit() minsize {
 ; CHECK-LABEL: logical_64bit:
   %val1 = load i64* @var1_64
   %val2 = load i64* @var2_64
diff --git a/test/CodeGen/AArch64/mature-mc-support.ll b/test/CodeGen/AArch64/mature-mc-support.ll
index 06e3cc7..276c54d 100644
--- a/test/CodeGen/AArch64/mature-mc-support.ll
+++ b/test/CodeGen/AArch64/mature-mc-support.ll
@@ -1,11 +1,11 @@
 ; Test that inline assembly is parsed by the MC layer when MC support is mature
 ; (even when the output is assembly).
 
-; RUN: not llc -mtriple=aarch64-pc-linux < %s > /dev/null 2> %t1
-; RUN: FileCheck %s < %t1
+; RUN: not llc -mtriple=aarch64-pc-linux < %s > /dev/null 2> %t3
+; RUN: FileCheck %s < %t3
 
-; RUN: not llc -mtriple=aarch64-pc-linux -filetype=obj < %s > /dev/null 2> %t2
-; RUN: FileCheck %s < %t2
+; RUN: not llc -mtriple=aarch64-pc-linux -filetype=obj < %s > /dev/null 2> %t4
+; RUN: FileCheck %s < %t4
 
 module asm "	.this_directive_is_very_unlikely_to_exist"
 
diff --git a/test/CodeGen/AArch64/misched-basic-A53.ll b/test/CodeGen/AArch64/misched-basic-A53.ll
deleted file mode 100644
index 1555c48..0000000
--- a/test/CodeGen/AArch64/misched-basic-A53.ll
+++ /dev/null
@@ -1,112 +0,0 @@
-; REQUIRES: asserts
-; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a53 -pre-RA-sched=source -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s
-;
-; The Cortex-A53 machine model will cause the MADD instruction to be scheduled
-; much higher than the ADD instructions in order to hide latency. When not
-; specifying a subtarget, the MADD will remain near the end of the block.
-;
-; CHECK: ********** MI Scheduling **********
-; CHECK: main
-; CHECK: *** Final schedule for BB#2 ***
-; CHECK: SU(13)
-; CHECK: MADDwwww
-; CHECK: SU(4)
-; CHECK: ADDwwi_lsl0_s
-; CHECK: ********** INTERVALS **********
-@main.x = private unnamed_addr constant [8 x i32] [i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1], align 4
-@main.y = private unnamed_addr constant [8 x i32] [i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2], align 4
-
-; Function Attrs: nounwind
-define i32 @main() #0 {
-entry:
-  %retval = alloca i32, align 4
-  %x = alloca [8 x i32], align 4
-  %y = alloca [8 x i32], align 4
-  %i = alloca i32, align 4
-  %xx = alloca i32, align 4
-  %yy = alloca i32, align 4
-  store i32 0, i32* %retval
-  %0 = bitcast [8 x i32]* %x to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast ([8 x i32]* @main.x to i8*), i64 32, i32 4, i1 false)
-  %1 = bitcast [8 x i32]* %y to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast ([8 x i32]* @main.y to i8*), i64 32, i32 4, i1 false)
-  store i32 0, i32* %xx, align 4
-  store i32 0, i32* %yy, align 4
-  store i32 0, i32* %i, align 4
-  br label %for.cond
-
-for.cond:                                         ; preds = %for.inc, %entry
-  %2 = load i32* %i, align 4
-  %cmp = icmp slt i32 %2, 8
-  br i1 %cmp, label %for.body, label %for.end
-
-for.body:                                         ; preds = %for.cond
-  %3 = load i32* %i, align 4
-  %idxprom = sext i32 %3 to i64
-  %arrayidx = getelementptr inbounds [8 x i32]* %x, i32 0, i64 %idxprom
-  %4 = load i32* %arrayidx, align 4
-  %add = add nsw i32 %4, 1
-  store i32 %add, i32* %xx, align 4
-  %5 = load i32* %xx, align 4
-  %add1 = add nsw i32 %5, 12
-  store i32 %add1, i32* %xx, align 4
-  %6 = load i32* %xx, align 4
-  %add2 = add nsw i32 %6, 23
-  store i32 %add2, i32* %xx, align 4
-  %7 = load i32* %xx, align 4
-  %add3 = add nsw i32 %7, 34
-  store i32 %add3, i32* %xx, align 4
-  %8 = load i32* %i, align 4
-  %idxprom4 = sext i32 %8 to i64
-  %arrayidx5 = getelementptr inbounds [8 x i32]* %y, i32 0, i64 %idxprom4
-  %9 = load i32* %arrayidx5, align 4
-  %10 = load i32* %yy, align 4
-  %mul = mul nsw i32 %10, %9
-  store i32 %mul, i32* %yy, align 4
-  br label %for.inc
-
-for.inc:                                          ; preds = %for.body
-  %11 = load i32* %i, align 4
-  %inc = add nsw i32 %11, 1
-  store i32 %inc, i32* %i, align 4
-  br label %for.cond
-
-for.end:                                          ; preds = %for.cond
-  %12 = load i32* %xx, align 4
-  %13 = load i32* %yy, align 4
-  %add6 = add nsw i32 %12, %13
-  ret i32 %add6
-}
-
-
-; The Cortex-A53 machine model will cause the FDIVvvv_42 to be raised to
-; hide latency. Whereas normally there would only be a single FADDvvv_4s
-; after it, this test checks to make sure there are more than one.
-;
-; CHECK: ********** MI Scheduling **********
-; CHECK: neon4xfloat:BB#0
-; CHECK: *** Final schedule for BB#0 ***
-; CHECK: FDIVvvv_4S
-; CHECK: FADDvvv_4S
-; CHECK: FADDvvv_4S
-; CHECK: ********** INTERVALS **********
-define <4 x float> @neon4xfloat(<4 x float> %A, <4 x float> %B) {
-        %tmp1 = fadd <4 x float> %A, %B;
-        %tmp2 = fadd <4 x float> %A, %tmp1;
-        %tmp3 = fadd <4 x float> %A, %tmp2;
-        %tmp4 = fadd <4 x float> %A, %tmp3;
-        %tmp5 = fadd <4 x float> %A, %tmp4;
-        %tmp6 = fadd <4 x float> %A, %tmp5;
-        %tmp7 = fadd <4 x float> %A, %tmp6;
-        %tmp8 = fadd <4 x float> %A, %tmp7;
-        %tmp9 = fdiv <4 x float> %A, %B;
-        %tmp10 = fadd <4 x float> %tmp8, %tmp9;
-
-        ret <4 x float> %tmp10
-}
-
-; Function Attrs: nounwind
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1
-
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind }
diff --git a/test/CodeGen/AArch64/movw-consts.ll b/test/CodeGen/AArch64/movw-consts.ll
index 38e37db..93c1812 100644
--- a/test/CodeGen/AArch64/movw-consts.ll
+++ b/test/CodeGen/AArch64/movw-consts.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -O0 < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s --check-prefix=CHECK
 
 define i64 @test0() {
 ; CHECK-LABEL: test0:
@@ -9,43 +9,43 @@ define i64 @test0() {
 
 define i64 @test1() {
 ; CHECK-LABEL: test1:
-; CHECK: movz x0, #1
+; CHECK: orr w0, wzr, #0x1
   ret i64 1
 }
 
 define i64 @test2() {
 ; CHECK-LABEL: test2:
-; CHECK: movz x0, #65535
+; CHECK: orr w0, wzr, #0xffff
   ret i64 65535
 }
 
 define i64 @test3() {
 ; CHECK-LABEL: test3:
-; CHECK: movz x0, #1, lsl #16
+; CHECK: orr w0, wzr, #0x10000
   ret i64 65536
 }
 
 define i64 @test4() {
 ; CHECK-LABEL: test4:
-; CHECK: movz x0, #65535, lsl #16
+; CHECK: orr w0, wzr, #0xffff0000
   ret i64 4294901760
 }
 
 define i64 @test5() {
 ; CHECK-LABEL: test5:
-; CHECK: movz x0, #1, lsl #32
+; CHECK: orr x0, xzr, #0x100000000
   ret i64 4294967296
 }
 
 define i64 @test6() {
 ; CHECK-LABEL: test6:
-; CHECK: movz x0, #65535, lsl #32
+; CHECK: orr x0, xzr, #0xffff00000000
   ret i64 281470681743360
 }
 
 define i64 @test7() {
 ; CHECK-LABEL: test7:
-; CHECK: movz x0, #1, lsl #48
+; CHECK: orr x0, xzr, #0x1000000000000
   ret i64 281474976710656
 }
 
@@ -53,7 +53,7 @@ define i64 @test7() {
 ; couldn't. Useful even for i64
 define i64 @test8() {
 ; CHECK-LABEL: test8:
-; CHECK: movn w0, #60875
+; CHECK: movn w0, #{{60875|0xedcb}}
   ret i64 4294906420
 }
 
@@ -65,7 +65,7 @@ define i64 @test9() {
 
 define i64 @test10() {
 ; CHECK-LABEL: test10:
-; CHECK: movn x0, #60875, lsl #16
+; CHECK: movn x0, #{{60875|0xedcb}}, lsl #16
   ret i64 18446744069720047615
 }
 
@@ -75,35 +75,35 @@ define i64 @test10() {
 
 define void @test11() {
 ; CHECK-LABEL: test11:
-; CHECK: mov {{w[0-9]+}}, wzr
+; CHECK: str wzr
   store i32 0, i32* @var32
   ret void
 }
 
 define void @test12() {
 ; CHECK-LABEL: test12:
-; CHECK: movz {{w[0-9]+}}, #1
+; CHECK: orr {{w[0-9]+}}, wzr, #0x1
   store i32 1, i32* @var32
   ret void
 }
 
 define void @test13() {
 ; CHECK-LABEL: test13:
-; CHECK: movz {{w[0-9]+}}, #65535
+; CHECK: orr {{w[0-9]+}}, wzr, #0xffff
   store i32 65535, i32* @var32
   ret void
 }
 
 define void @test14() {
 ; CHECK-LABEL: test14:
-; CHECK: movz {{w[0-9]+}}, #1, lsl #16
+; CHECK: orr {{w[0-9]+}}, wzr, #0x10000
   store i32 65536, i32* @var32
   ret void
 }
 
 define void @test15() {
 ; CHECK-LABEL: test15:
-; CHECK: movz {{w[0-9]+}}, #65535, lsl #16
+; CHECK: orr {{w[0-9]+}}, wzr, #0xffff0000
   store i32 4294901760, i32* @var32
   ret void
 }
@@ -119,6 +119,6 @@ define i64 @test17() {
 ; CHECK-LABEL: test17:
 
   ; Mustn't MOVN w0 here.
-; CHECK: movn x0, #2
+; CHECK: orr x0, xzr, #0xfffffffffffffffd
   ret i64 -3
 }
diff --git a/test/CodeGen/AArch64/movw-shift-encoding.ll b/test/CodeGen/AArch64/movw-shift-encoding.ll
index ec133bd..178fccc 100644
--- a/test/CodeGen/AArch64/movw-shift-encoding.ll
+++ b/test/CodeGen/AArch64/movw-shift-encoding.ll
@@ -7,8 +7,9 @@
 
 define i32* @get_var() {
   ret i32* @var
-; CHECK: movz    x0, #:abs_g3:var        // encoding: [A,A,0xe0'A',0xd2'A']
-; CHECK: movk    x0, #:abs_g2_nc:var     // encoding: [A,A,0xc0'A',0xf2'A']
-; CHECK: movk    x0, #:abs_g1_nc:var     // encoding: [A,A,0xa0'A',0xf2'A']
-; CHECK: movk    x0, #:abs_g0_nc:var     // encoding: [A,A,0x80'A',0xf2'A']
+
+; CHECK: movz    x0, #:abs_g3:var        // encoding: [0bAAA00000,A,0b111AAAAA,0xd2]
+; CHECK: movk    x0, #:abs_g2_nc:var     // encoding: [0bAAA00000,A,0b110AAAAA,0xf2]
+; CHECK: movk    x0, #:abs_g1_nc:var     // encoding: [0bAAA00000,A,0b101AAAAA,0xf2]
+; CHECK: movk    x0, #:abs_g0_nc:var     // encoding: [0bAAA00000,A,0b100AAAAA,0xf2]
 }
diff --git a/test/CodeGen/AArch64/mul-lohi.ll b/test/CodeGen/AArch64/mul-lohi.ll
index f58c598..0689fbd 100644
--- a/test/CodeGen/AArch64/mul-lohi.ll
+++ b/test/CodeGen/AArch64/mul-lohi.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=aarch64-linux-gnu %s -o - | FileCheck %s
-; RUN: llc -mtriple=aarch64_be-linux-gnu %s -o - | FileCheck --check-prefix=CHECK-BE %s
+; RUN: llc -mtriple=arm64-apple-ios7.0 %s -o - | FileCheck %s
+; RUN: llc -mtriple=arm64_be-linux-gnu %s -o - | FileCheck --check-prefix=CHECK-BE %s
 
 define i128 @test_128bitmul(i128 %lhs, i128 %rhs) {
 ; CHECK-LABEL: test_128bitmul:
diff --git a/test/CodeGen/AArch64/neon-2velem-high.ll b/test/CodeGen/AArch64/neon-2velem-high.ll
deleted file mode 100644
index 97031d9..0000000
--- a/test/CodeGen/AArch64/neon-2velem-high.ll
+++ /dev/null
@@ -1,331 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
-
-declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
-
-declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
-
-declare <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32>, <2 x i32>)
-
-declare <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64>, <2 x i64>)
-
-declare <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16>, <4 x i16>)
-
-declare <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32>, <4 x i32>)
-
-declare <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>)
-
-declare <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32>, <4 x i32>)
-
-declare <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32>, <2 x i32>)
-
-declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>)
-
-declare <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32>, <2 x i32>)
-
-declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>)
-
-define <4 x i32> @test_vmull_high_n_s16(<8 x i16> %a, i16 %b) {
-; CHECK: test_vmull_high_n_s16:
-; CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0
-  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1
-  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2
-  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3
-  %vmull15.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
-  ret <4 x i32> %vmull15.i.i
-}
-
-define <2 x i64> @test_vmull_high_n_s32(<4 x i32> %a, i32 %b) {
-; CHECK: test_vmull_high_n_s32:
-; CHECK: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0
-  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1
-  %vmull9.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
-  ret <2 x i64> %vmull9.i.i
-}
-
-define <4 x i32> @test_vmull_high_n_u16(<8 x i16> %a, i16 %b) {
-; CHECK: test_vmull_high_n_u16:
-; CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0
-  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1
-  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2
-  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3
-  %vmull15.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
-  ret <4 x i32> %vmull15.i.i
-}
-
-define <2 x i64> @test_vmull_high_n_u32(<4 x i32> %a, i32 %b) {
-; CHECK: test_vmull_high_n_u32:
-; CHECK: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0
-  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1
-  %vmull9.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
-  ret <2 x i64> %vmull9.i.i
-}
-
-define <4 x i32> @test_vqdmull_high_n_s16(<8 x i16> %a, i16 %b) {
-; CHECK: test_vqdmull_high_n_s16:
-; CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0
-  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1
-  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2
-  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3
-  %vqdmull15.i.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
-  ret <4 x i32> %vqdmull15.i.i
-}
-
-define <2 x i64> @test_vqdmull_high_n_s32(<4 x i32> %a, i32 %b) {
-; CHECK: test_vqdmull_high_n_s32:
-; CHECK: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0
-  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1
-  %vqdmull9.i.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
-  ret <2 x i64> %vqdmull9.i.i
-}
-
-define <4 x i32> @test_vmlal_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
-; CHECK: test_vmlal_high_n_s16:
-; CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}]
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
-  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
-  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
-  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
-  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
-  %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
-  ret <4 x i32> %add.i.i
-}
-
-define <2 x i64> @test_vmlal_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
-; CHECK: test_vmlal_high_n_s32:
-; CHECK: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
-  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
-  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
-  %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
-  ret <2 x i64> %add.i.i
-}
-
-define <4 x i32> @test_vmlal_high_n_u16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
-; CHECK: test_vmlal_high_n_u16:
-; CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}]
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
-  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
-  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
-  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
-  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
-  %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
-  ret <4 x i32> %add.i.i
-}
-
-define <2 x i64> @test_vmlal_high_n_u32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
-; CHECK: test_vmlal_high_n_u32:
-; CHECK: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
-  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
-  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
-  %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
-  ret <2 x i64> %add.i.i
-}
-
-define <4 x i32> @test_vqdmlal_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
-; CHECK: test_vqdmlal_high_n_s16:
-; CHECK: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}]
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
-  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
-  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
-  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
-  %vqdmlal15.i.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
-  %vqdmlal17.i.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal15.i.i)
-  ret <4 x i32> %vqdmlal17.i.i
-}
-
-define <2 x i64> @test_vqdmlal_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
-; CHECK: test_vqdmlal_high_n_s32:
-; CHECK: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
-  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
-  %vqdmlal9.i.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
-  %vqdmlal11.i.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal9.i.i)
-  ret <2 x i64> %vqdmlal11.i.i
-}
-
-define <4 x i32> @test_vmlsl_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
-; CHECK: test_vmlsl_high_n_s16:
-; CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}]
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
-  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
-  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
-  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
-  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
-  %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
-  ret <4 x i32> %sub.i.i
-}
-
-define <2 x i64> @test_vmlsl_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
-; CHECK: test_vmlsl_high_n_s32:
-; CHECK: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
-  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
-  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
-  %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
-  ret <2 x i64> %sub.i.i
-}
-
-define <4 x i32> @test_vmlsl_high_n_u16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
-; CHECK: test_vmlsl_high_n_u16:
-; CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}]
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
-  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
-  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
-  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
-  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
-  %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
-  ret <4 x i32> %sub.i.i
-}
-
-define <2 x i64> @test_vmlsl_high_n_u32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
-; CHECK: test_vmlsl_high_n_u32:
-; CHECK: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
-  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
-  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
-  %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
-  ret <2 x i64> %sub.i.i
-}
-
-define <4 x i32> @test_vqdmlsl_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) {
-; CHECK: test_vqdmlsl_high_n_s16:
-; CHECK: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}]
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0
-  %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1
-  %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2
-  %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3
-  %vqdmlsl15.i.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i)
-  %vqdmlsl17.i.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl15.i.i)
-  ret <4 x i32> %vqdmlsl17.i.i
-}
-
-define <2 x i64> @test_vqdmlsl_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) {
-; CHECK: test_vqdmlsl_high_n_s32:
-; CHECK: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0
-  %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1
-  %vqdmlsl9.i.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i)
-  %vqdmlsl11.i.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl9.i.i)
-  ret <2 x i64> %vqdmlsl11.i.i
-}
-
-define <2 x float> @test_vmul_n_f32(<2 x float> %a, float %b) {
-; CHECK: test_vmul_n_f32:
-; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-entry:
-  %vecinit.i = insertelement <2 x float> undef, float %b, i32 0
-  %vecinit1.i = insertelement <2 x float> %vecinit.i, float %b, i32 1
-  %mul.i = fmul <2 x float> %vecinit1.i, %a
-  ret <2 x float> %mul.i
-}
-
-define <4 x float> @test_vmulq_n_f32(<4 x float> %a, float %b) {
-; CHECK: test_vmulq_n_f32:
-; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-entry:
-  %vecinit.i = insertelement <4 x float> undef, float %b, i32 0
-  %vecinit1.i = insertelement <4 x float> %vecinit.i, float %b, i32 1
-  %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %b, i32 2
-  %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %b, i32 3
-  %mul.i = fmul <4 x float> %vecinit3.i, %a
-  ret <4 x float> %mul.i
-}
-
-define <2 x double> @test_vmulq_n_f64(<2 x double> %a, double %b) {
-; CHECK: test_vmulq_n_f64:
-; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
-entry:
-  %vecinit.i = insertelement <2 x double> undef, double %b, i32 0
-  %vecinit1.i = insertelement <2 x double> %vecinit.i, double %b, i32 1
-  %mul.i = fmul <2 x double> %vecinit1.i, %a
-  ret <2 x double> %mul.i
-}
-
-define <2 x float> @test_vfma_n_f32(<2 x float> %a, <2 x float> %b, float %n) {
-; CHECK: test_vfma_n_f32:
-; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
-entry:
-  %vecinit.i = insertelement <2 x float> undef, float %n, i32 0
-  %vecinit1.i = insertelement <2 x float> %vecinit.i, float %n, i32 1
-  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %b, <2 x float> %vecinit1.i, <2 x float> %a)
-  ret <2 x float> %0
-}
-
-define <4 x float> @test_vfmaq_n_f32(<4 x float> %a, <4 x float> %b, float %n) {
-; CHECK: test_vfmaq_n_f32:
-; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
-entry:
-  %vecinit.i = insertelement <4 x float> undef, float %n, i32 0
-  %vecinit1.i = insertelement <4 x float> %vecinit.i, float %n, i32 1
-  %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %n, i32 2
-  %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %n, i32 3
-  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> %vecinit3.i, <4 x float> %a)
-  ret <4 x float> %0
-}
-
-define <2 x float> @test_vfms_n_f32(<2 x float> %a, <2 x float> %b, float %n) {
-; CHECK: test_vfms_n_f32:
-; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}]
-entry:
-  %vecinit.i = insertelement <2 x float> undef, float %n, i32 0
-  %vecinit1.i = insertelement <2 x float> %vecinit.i, float %n, i32 1
-  %0 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %b
-  %1 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %0, <2 x float> %vecinit1.i, <2 x float> %a)
-  ret <2 x float> %1
-}
-
-define <4 x float> @test_vfmsq_n_f32(<4 x float> %a, <4 x float> %b, float %n) {
-; CHECK: test_vfmsq_n_f32:
-; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}]
-entry:
-  %vecinit.i = insertelement <4 x float> undef, float %n, i32 0
-  %vecinit1.i = insertelement <4 x float> %vecinit.i, float %n, i32 1
-  %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %n, i32 2
-  %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %n, i32 3
-  %0 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b
-  %1 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %0, <4 x float> %vecinit3.i, <4 x float> %a)
-  ret <4 x float> %1
-}
diff --git a/test/CodeGen/AArch64/neon-2velem.ll b/test/CodeGen/AArch64/neon-2velem.ll
deleted file mode 100644
index acffb14..0000000
--- a/test/CodeGen/AArch64/neon-2velem.ll
+++ /dev/null
@@ -1,2853 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
-
-declare <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double>, <2 x double>)
-
-declare <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float>, <4 x float>)
-
-declare <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float>, <2 x float>)
-
-declare <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32>, <4 x i32>)
-
-declare <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32>, <2 x i32>)
-
-declare <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16>, <8 x i16>)
-
-declare <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16>, <4 x i16>)
-
-declare <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32>, <4 x i32>)
-
-declare <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32>, <2 x i32>)
-
-declare <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16>, <8 x i16>)
-
-declare <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16>, <4 x i16>)
-
-declare <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32>, <2 x i32>)
-
-declare <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16>, <4 x i16>)
-
-declare <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64>, <2 x i64>)
-
-declare <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32>, <4 x i32>)
-
-declare <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>)
-
-declare <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32>, <4 x i32>)
-
-declare <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32>, <2 x i32>)
-
-declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>)
-
-declare <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32>, <2 x i32>)
-
-declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>)
-
-define <4 x i16> @test_vmla_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vmla_lane_s16:
-; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %mul = mul <4 x i16> %shuffle, %b
-  %add = add <4 x i16> %mul, %a
-  ret <4 x i16> %add
-}
-
-define <8 x i16> @test_vmlaq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vmlaq_lane_s16:
-; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-  %mul = mul <8 x i16> %shuffle, %b
-  %add = add <8 x i16> %mul, %a
-  ret <8 x i16> %add
-}
-
-define <2 x i32> @test_vmla_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vmla_lane_s32:
-; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %mul = mul <2 x i32> %shuffle, %b
-  %add = add <2 x i32> %mul, %a
-  ret <2 x i32> %add
-}
-
-define <4 x i32> @test_vmlaq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vmlaq_lane_s32:
-; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %mul = mul <4 x i32> %shuffle, %b
-  %add = add <4 x i32> %mul, %a
-  ret <4 x i32> %add
-}
-
-define <4 x i16> @test_vmla_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK: test_vmla_laneq_s16:
-; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %mul = mul <4 x i16> %shuffle, %b
-  %add = add <4 x i16> %mul, %a
-  ret <4 x i16> %add
-}
-
-define <8 x i16> @test_vmlaq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) {
-; CHECK: test_vmlaq_laneq_s16:
-; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-  %mul = mul <8 x i16> %shuffle, %b
-  %add = add <8 x i16> %mul, %a
-  ret <8 x i16> %add
-}
-
-define <2 x i32> @test_vmla_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK: test_vmla_laneq_s32:
-; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %mul = mul <2 x i32> %shuffle, %b
-  %add = add <2 x i32> %mul, %a
-  ret <2 x i32> %add
-}
-
-define <4 x i32> @test_vmlaq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) {
-; CHECK: test_vmlaq_laneq_s32:
-; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %mul = mul <4 x i32> %shuffle, %b
-  %add = add <4 x i32> %mul, %a
-  ret <4 x i32> %add
-}
-
-define <4 x i16> @test_vmls_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vmls_lane_s16:
-; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %mul = mul <4 x i16> %shuffle, %b
-  %sub = sub <4 x i16> %a, %mul
-  ret <4 x i16> %sub
-}
-
-define <8 x i16> @test_vmlsq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vmlsq_lane_s16:
-; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-  %mul = mul <8 x i16> %shuffle, %b
-  %sub = sub <8 x i16> %a, %mul
-  ret <8 x i16> %sub
-}
-
-define <2 x i32> @test_vmls_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vmls_lane_s32:
-; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %mul = mul <2 x i32> %shuffle, %b
-  %sub = sub <2 x i32> %a, %mul
-  ret <2 x i32> %sub
-}
-
-define <4 x i32> @test_vmlsq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vmlsq_lane_s32:
-; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %mul = mul <4 x i32> %shuffle, %b
-  %sub = sub <4 x i32> %a, %mul
-  ret <4 x i32> %sub
-}
-
-define <4 x i16> @test_vmls_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK: test_vmls_laneq_s16:
-; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %mul = mul <4 x i16> %shuffle, %b
-  %sub = sub <4 x i16> %a, %mul
-  ret <4 x i16> %sub
-}
-
-define <8 x i16> @test_vmlsq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) {
-; CHECK: test_vmlsq_laneq_s16:
-; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-  %mul = mul <8 x i16> %shuffle, %b
-  %sub = sub <8 x i16> %a, %mul
-  ret <8 x i16> %sub
-}
-
-define <2 x i32> @test_vmls_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK: test_vmls_laneq_s32:
-; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %mul = mul <2 x i32> %shuffle, %b
-  %sub = sub <2 x i32> %a, %mul
-  ret <2 x i32> %sub
-}
-
-define <4 x i32> @test_vmlsq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) {
-; CHECK: test_vmlsq_laneq_s32:
-; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %mul = mul <4 x i32> %shuffle, %b
-  %sub = sub <4 x i32> %a, %mul
-  ret <4 x i32> %sub
-}
-
-define <4 x i16> @test_vmul_lane_s16(<4 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vmul_lane_s16:
-; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %mul = mul <4 x i16> %shuffle, %a
-  ret <4 x i16> %mul
-}
-
-define <8 x i16> @test_vmulq_lane_s16(<8 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vmulq_lane_s16:
-; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-  %mul = mul <8 x i16> %shuffle, %a
-  ret <8 x i16> %mul
-}
-
-define <2 x i32> @test_vmul_lane_s32(<2 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vmul_lane_s32:
-; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %mul = mul <2 x i32> %shuffle, %a
-  ret <2 x i32> %mul
-}
-
-define <4 x i32> @test_vmulq_lane_s32(<4 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vmulq_lane_s32:
-; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %mul = mul <4 x i32> %shuffle, %a
-  ret <4 x i32> %mul
-}
-
-define <4 x i16> @test_vmul_lane_u16(<4 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vmul_lane_u16:
-; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %mul = mul <4 x i16> %shuffle, %a
-  ret <4 x i16> %mul
-}
-
-define <8 x i16> @test_vmulq_lane_u16(<8 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vmulq_lane_u16:
-; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-  %mul = mul <8 x i16> %shuffle, %a
-  ret <8 x i16> %mul
-}
-
-define <2 x i32> @test_vmul_lane_u32(<2 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vmul_lane_u32:
-; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %mul = mul <2 x i32> %shuffle, %a
-  ret <2 x i32> %mul
-}
-
-define <4 x i32> @test_vmulq_lane_u32(<4 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vmulq_lane_u32:
-; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %mul = mul <4 x i32> %shuffle, %a
-  ret <4 x i32> %mul
-}
-
-define <4 x i16> @test_vmul_laneq_s16(<4 x i16> %a, <8 x i16> %v) {
-; CHECK: test_vmul_laneq_s16:
-; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %mul = mul <4 x i16> %shuffle, %a
-  ret <4 x i16> %mul
-}
-
-define <8 x i16> @test_vmulq_laneq_s16(<8 x i16> %a, <8 x i16> %v) {
-; CHECK: test_vmulq_laneq_s16:
-; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-  %mul = mul <8 x i16> %shuffle, %a
-  ret <8 x i16> %mul
-}
-
-define <2 x i32> @test_vmul_laneq_s32(<2 x i32> %a, <4 x i32> %v) {
-; CHECK: test_vmul_laneq_s32:
-; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %mul = mul <2 x i32> %shuffle, %a
-  ret <2 x i32> %mul
-}
-
-define <4 x i32> @test_vmulq_laneq_s32(<4 x i32> %a, <4 x i32> %v) {
-; CHECK: test_vmulq_laneq_s32:
-; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %mul = mul <4 x i32> %shuffle, %a
-  ret <4 x i32> %mul
-}
-
-define <4 x i16> @test_vmul_laneq_u16(<4 x i16> %a, <8 x i16> %v) {
-; CHECK: test_vmul_laneq_u16:
-; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %mul = mul <4 x i16> %shuffle, %a
-  ret <4 x i16> %mul
-}
-
-define <8 x i16> @test_vmulq_laneq_u16(<8 x i16> %a, <8 x i16> %v) {
-; CHECK: test_vmulq_laneq_u16:
-; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-  %mul = mul <8 x i16> %shuffle, %a
-  ret <8 x i16> %mul
-}
-
-define <2 x i32> @test_vmul_laneq_u32(<2 x i32> %a, <4 x i32> %v) {
-; CHECK: test_vmul_laneq_u32:
-; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %mul = mul <2 x i32> %shuffle, %a
-  ret <2 x i32> %mul
-}
-
-define <4 x i32> @test_vmulq_laneq_u32(<4 x i32> %a, <4 x i32> %v) {
-; CHECK: test_vmulq_laneq_u32:
-; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %mul = mul <4 x i32> %shuffle, %a
-  ret <4 x i32> %mul
-}
-
-define <2 x float> @test_vfma_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
-; CHECK: test_vfma_lane_f32:
-; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1>
-  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
-  ret <2 x float> %0
-}
-
-declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
-
-define <4 x float> @test_vfmaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) {
-; CHECK: test_vfmaq_lane_f32:
-; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
-  ret <4 x float> %0
-}
-
-declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
-
-define <2 x float> @test_vfma_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) {
-; CHECK: test_vfma_laneq_f32:
-; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3>
-  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
-  ret <2 x float> %0
-}
-
-define <4 x float> @test_vfmaq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) {
-; CHECK: test_vfmaq_laneq_f32:
-; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
-  ret <4 x float> %0
-}
-
-define <2 x float> @test_vfms_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
-; CHECK: test_vfms_lane_f32:
-; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
-  %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> <i32 1, i32 1>
-  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
-  ret <2 x float> %0
-}
-
-define <4 x float> @test_vfmsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) {
-; CHECK: test_vfmsq_lane_f32:
-; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
-  %lane = shufflevector <2 x float> %sub, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
-  ret <4 x float> %0
-}
-
-define <2 x float> @test_vfms_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) {
-; CHECK: test_vfms_laneq_f32:
-; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
-  %lane = shufflevector <4 x float> %sub, <4 x float> undef, <2 x i32> <i32 3, i32 3>
-  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
-  ret <2 x float> %0
-}
-
-define <4 x float> @test_vfmsq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) {
-; CHECK: test_vfmsq_laneq_f32:
-; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
-  %lane = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
-  ret <4 x float> %0
-}
-
-define <2 x double> @test_vfmaq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) {
-; CHECK: test_vfmaq_lane_f64:
-; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
-; CHECK-NEXT: ret
-entry:
-  %lane = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
-  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
-  ret <2 x double> %0
-}
-
-declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
-
-define <2 x double> @test_vfmaq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) {
-; CHECK: test_vfmaq_laneq_f64:
-; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1]
-; CHECK-NEXT: ret
-entry:
-  %lane = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1>
-  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
-  ret <2 x double> %0
-}
-
-define <2 x double> @test_vfmsq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) {
-; CHECK: test_vfmsq_lane_f64:
-; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
-; CHECK-NEXT: ret
-entry:
-  %sub = fsub <1 x double> <double -0.000000e+00>, %v
-  %lane = shufflevector <1 x double> %sub, <1 x double> undef, <2 x i32> zeroinitializer
-  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
-  ret <2 x double> %0
-}
-
-define <2 x double> @test_vfmsq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) {
-; CHECK: test_vfmsq_laneq_f64:
-; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1]
-; CHECK-NEXT: ret
-entry:
-  %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v
-  %lane = shufflevector <2 x double> %sub, <2 x double> undef, <2 x i32> <i32 1, i32 1>
-  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
-  ret <2 x double> %0
-}
-
-define float @test_vfmas_laneq_f32(float %a, float %b, <4 x float> %v) {
-; CHECK-LABEL: test_vfmas_laneq_f32
-; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %extract = extractelement <4 x float> %v, i32 3
-  %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a)
-  ret float %0
-}
-
-declare float @llvm.fma.f32(float, float, float)
-
-define double @test_vfmsd_lane_f64(double %a, double %b, <1 x double> %v) {
-; CHECK-LABEL: test_vfmsd_lane_f64
-; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
-; CHECK-NEXT: ret
-entry:
-  %extract.rhs = extractelement <1 x double> %v, i32 0
-  %extract = fsub double -0.000000e+00, %extract.rhs
-  %0 = tail call double @llvm.fma.f64(double %b, double %extract, double %a)
-  ret double %0
-}
-
-declare double @llvm.fma.f64(double, double, double)
-
-define float @test_vfmss_laneq_f32(float %a, float %b, <4 x float> %v) {
-; CHECK: test_vfmss_laneq_f32
-; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %extract.rhs = extractelement <4 x float> %v, i32 3
-  %extract = fsub float -0.000000e+00, %extract.rhs
-  %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a)
-  ret float %0
-}
-
-define double @test_vfmsd_laneq_f64(double %a, double %b, <2 x double> %v) {
-; CHECK-LABEL: test_vfmsd_laneq_f64
-; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
-; CHECK-NEXT: ret
-entry:
-  %extract.rhs = extractelement <2 x double> %v, i32 1
-  %extract = fsub double -0.000000e+00, %extract.rhs
-  %0 = tail call double @llvm.fma.f64(double %b, double %extract, double %a)
-  ret double %0
-}
-
-define <4 x i32> @test_vmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vmlal_lane_s16:
-; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %add = add <4 x i32> %vmull2.i, %a
-  ret <4 x i32> %add
-}
-
-define <2 x i64> @test_vmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vmlal_lane_s32:
-; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %add = add <2 x i64> %vmull2.i, %a
-  ret <2 x i64> %add
-}
-
-define <4 x i32> @test_vmlal_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK: test_vmlal_laneq_s16:
-; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %add = add <4 x i32> %vmull2.i, %a
-  ret <4 x i32> %add
-}
-
-define <2 x i64> @test_vmlal_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK: test_vmlal_laneq_s32:
-; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %add = add <2 x i64> %vmull2.i, %a
-  ret <2 x i64> %add
-}
-
-define <4 x i32> @test_vmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vmlal_high_lane_s16:
-; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %add = add <4 x i32> %vmull2.i, %a
-  ret <4 x i32> %add
-}
-
-define <2 x i64> @test_vmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vmlal_high_lane_s32:
-; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %add = add <2 x i64> %vmull2.i, %a
-  ret <2 x i64> %add
-}
-
-define <4 x i32> @test_vmlal_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
-; CHECK: test_vmlal_high_laneq_s16:
-; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %add = add <4 x i32> %vmull2.i, %a
-  ret <4 x i32> %add
-}
-
-define <2 x i64> @test_vmlal_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
-; CHECK: test_vmlal_high_laneq_s32:
-; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %add = add <2 x i64> %vmull2.i, %a
-  ret <2 x i64> %add
-}
-
-define <4 x i32> @test_vmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vmlsl_lane_s16:
-; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %sub = sub <4 x i32> %a, %vmull2.i
-  ret <4 x i32> %sub
-}
-
-define <2 x i64> @test_vmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vmlsl_lane_s32:
-; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %sub = sub <2 x i64> %a, %vmull2.i
-  ret <2 x i64> %sub
-}
-
-define <4 x i32> @test_vmlsl_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK: test_vmlsl_laneq_s16:
-; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %sub = sub <4 x i32> %a, %vmull2.i
-  ret <4 x i32> %sub
-}
-
-define <2 x i64> @test_vmlsl_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK: test_vmlsl_laneq_s32:
-; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %sub = sub <2 x i64> %a, %vmull2.i
-  ret <2 x i64> %sub
-}
-
-define <4 x i32> @test_vmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vmlsl_high_lane_s16:
-; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %sub = sub <4 x i32> %a, %vmull2.i
-  ret <4 x i32> %sub
-}
-
-define <2 x i64> @test_vmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vmlsl_high_lane_s32:
-; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %sub = sub <2 x i64> %a, %vmull2.i
-  ret <2 x i64> %sub
-}
-
-define <4 x i32> @test_vmlsl_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
-; CHECK: test_vmlsl_high_laneq_s16:
-; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %sub = sub <4 x i32> %a, %vmull2.i
-  ret <4 x i32> %sub
-}
-
-define <2 x i64> @test_vmlsl_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
-; CHECK: test_vmlsl_high_laneq_s32:
-; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %sub = sub <2 x i64> %a, %vmull2.i
-  ret <2 x i64> %sub
-}
-
-define <4 x i32> @test_vmlal_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vmlal_lane_u16:
-; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %add = add <4 x i32> %vmull2.i, %a
-  ret <4 x i32> %add
-}
-
-define <2 x i64> @test_vmlal_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vmlal_lane_u32:
-; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %add = add <2 x i64> %vmull2.i, %a
-  ret <2 x i64> %add
-}
-
-define <4 x i32> @test_vmlal_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK: test_vmlal_laneq_u16:
-; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %add = add <4 x i32> %vmull2.i, %a
-  ret <4 x i32> %add
-}
-
-define <2 x i64> @test_vmlal_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK: test_vmlal_laneq_u32:
-; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %add = add <2 x i64> %vmull2.i, %a
-  ret <2 x i64> %add
-}
-
-define <4 x i32> @test_vmlal_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vmlal_high_lane_u16:
-; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %add = add <4 x i32> %vmull2.i, %a
-  ret <4 x i32> %add
-}
-
-define <2 x i64> @test_vmlal_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vmlal_high_lane_u32:
-; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %add = add <2 x i64> %vmull2.i, %a
-  ret <2 x i64> %add
-}
-
-define <4 x i32> @test_vmlal_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
-; CHECK: test_vmlal_high_laneq_u16:
-; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %add = add <4 x i32> %vmull2.i, %a
-  ret <4 x i32> %add
-}
-
-define <2 x i64> @test_vmlal_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
-; CHECK: test_vmlal_high_laneq_u32:
-; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %add = add <2 x i64> %vmull2.i, %a
-  ret <2 x i64> %add
-}
-
-define <4 x i32> @test_vmlsl_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vmlsl_lane_u16:
-; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %sub = sub <4 x i32> %a, %vmull2.i
-  ret <4 x i32> %sub
-}
-
-define <2 x i64> @test_vmlsl_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vmlsl_lane_u32:
-; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %sub = sub <2 x i64> %a, %vmull2.i
-  ret <2 x i64> %sub
-}
-
-define <4 x i32> @test_vmlsl_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK: test_vmlsl_laneq_u16:
-; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %sub = sub <4 x i32> %a, %vmull2.i
-  ret <4 x i32> %sub
-}
-
-define <2 x i64> @test_vmlsl_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK: test_vmlsl_laneq_u32:
-; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %sub = sub <2 x i64> %a, %vmull2.i
-  ret <2 x i64> %sub
-}
-
-define <4 x i32> @test_vmlsl_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vmlsl_high_lane_u16:
-; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %sub = sub <4 x i32> %a, %vmull2.i
-  ret <4 x i32> %sub
-}
-
-define <2 x i64> @test_vmlsl_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vmlsl_high_lane_u32:
-; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %sub = sub <2 x i64> %a, %vmull2.i
-  ret <2 x i64> %sub
-}
-
-define <4 x i32> @test_vmlsl_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
-; CHECK: test_vmlsl_high_laneq_u16:
-; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %sub = sub <4 x i32> %a, %vmull2.i
-  ret <4 x i32> %sub
-}
-
-define <2 x i64> @test_vmlsl_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
-; CHECK: test_vmlsl_high_laneq_u32:
-; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %sub = sub <2 x i64> %a, %vmull2.i
-  ret <2 x i64> %sub
-}
-
-define <4 x i32> @test_vmull_lane_s16(<4 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vmull_lane_s16:
-; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_lane_s32(<2 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vmull_lane_s32:
-; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
-  ret <2 x i64> %vmull2.i
-}
-
-define <4 x i32> @test_vmull_lane_u16(<4 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vmull_lane_u16:
-; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_lane_u32(<2 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vmull_lane_u32:
-; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
-  ret <2 x i64> %vmull2.i
-}
-
-define <4 x i32> @test_vmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vmull_high_lane_s16:
-; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vmull_high_lane_s32:
-; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  ret <2 x i64> %vmull2.i
-}
-
-define <4 x i32> @test_vmull_high_lane_u16(<8 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vmull_high_lane_u16:
-; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_high_lane_u32(<4 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vmull_high_lane_u32:
-; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  ret <2 x i64> %vmull2.i
-}
-
-define <4 x i32> @test_vmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) {
-; CHECK: test_vmull_laneq_s16:
-; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) {
-; CHECK: test_vmull_laneq_s32:
-; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
-  ret <2 x i64> %vmull2.i
-}
-
-define <4 x i32> @test_vmull_laneq_u16(<4 x i16> %a, <8 x i16> %v) {
-; CHECK: test_vmull_laneq_u16:
-; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_laneq_u32(<2 x i32> %a, <4 x i32> %v) {
-; CHECK: test_vmull_laneq_u32:
-; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
-  ret <2 x i64> %vmull2.i
-}
-
-define <4 x i32> @test_vmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) {
-; CHECK: test_vmull_high_laneq_s16:
-; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) {
-; CHECK: test_vmull_high_laneq_s32:
-; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  ret <2 x i64> %vmull2.i
-}
-
-define <4 x i32> @test_vmull_high_laneq_u16(<8 x i16> %a, <8 x i16> %v) {
-; CHECK: test_vmull_high_laneq_u16:
-; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_high_laneq_u32(<4 x i32> %a, <4 x i32> %v) {
-; CHECK: test_vmull_high_laneq_u32:
-; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  ret <2 x i64> %vmull2.i
-}
-
-define <4 x i32> @test_vqdmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vqdmlal_lane_s16:
-; CHECK: qdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vqdmlal2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %vqdmlal4.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
-  ret <4 x i32> %vqdmlal4.i
-}
-
-define <2 x i64> @test_vqdmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vqdmlal_lane_s32:
-; CHECK: qdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vqdmlal2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %vqdmlal4.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
-  ret <2 x i64> %vqdmlal4.i
-}
-
-define <4 x i32> @test_vqdmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vqdmlal_high_lane_s16:
-; CHECK: qdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vqdmlal2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %vqdmlal4.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
-  ret <4 x i32> %vqdmlal4.i
-}
-
-define <2 x i64> @test_vqdmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vqdmlal_high_lane_s32:
-; CHECK: qdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vqdmlal2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %vqdmlal4.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
-  ret <2 x i64> %vqdmlal4.i
-}
-
-define <4 x i32> @test_vqdmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vqdmlsl_lane_s16:
-; CHECK: qdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vqdmlsl2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %vqdmlsl4.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
-  ret <4 x i32> %vqdmlsl4.i
-}
-
-define <2 x i64> @test_vqdmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vqdmlsl_lane_s32:
-; CHECK: qdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vqdmlsl2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %vqdmlsl4.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
-  ret <2 x i64> %vqdmlsl4.i
-}
-
-define <4 x i32> @test_vqdmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vqdmlsl_high_lane_s16:
-; CHECK: qdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vqdmlsl2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %vqdmlsl4.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
-  ret <4 x i32> %vqdmlsl4.i
-}
-
-define <2 x i64> @test_vqdmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vqdmlsl_high_lane_s32:
-; CHECK: qdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vqdmlsl2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %vqdmlsl4.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
-  ret <2 x i64> %vqdmlsl4.i
-}
-
-define <4 x i32> @test_vqdmull_lane_s16(<4 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vqdmull_lane_s16:
-; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
-  ret <4 x i32> %vqdmull2.i
-}
-
-define <2 x i64> @test_vqdmull_lane_s32(<2 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vqdmull_lane_s32:
-; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
-  ret <2 x i64> %vqdmull2.i
-}
-
-define <4 x i32> @test_vqdmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) {
-; CHECK: test_vqdmull_laneq_s16:
-; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
-  ret <4 x i32> %vqdmull2.i
-}
-
-define <2 x i64> @test_vqdmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) {
-; CHECK: test_vqdmull_laneq_s32:
-; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
-  ret <2 x i64> %vqdmull2.i
-}
-
-define <4 x i32> @test_vqdmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vqdmull_high_lane_s16:
-; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  ret <4 x i32> %vqdmull2.i
-}
-
-define <2 x i64> @test_vqdmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vqdmull_high_lane_s32:
-; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  ret <2 x i64> %vqdmull2.i
-}
-
-define <4 x i32> @test_vqdmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) {
-; CHECK: test_vqdmull_high_laneq_s16:
-; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  ret <4 x i32> %vqdmull2.i
-}
-
-define <2 x i64> @test_vqdmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) {
-; CHECK: test_vqdmull_high_laneq_s32:
-; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  ret <2 x i64> %vqdmull2.i
-}
-
-define <4 x i16> @test_vqdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vqdmulh_lane_s16:
-; CHECK: qdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vqdmulh2.i = tail call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle)
-  ret <4 x i16> %vqdmulh2.i
-}
-
-define <8 x i16> @test_vqdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vqdmulhq_lane_s16:
-; CHECK: qdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-  %vqdmulh2.i = tail call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle)
-  ret <8 x i16> %vqdmulh2.i
-}
-
-define <2 x i32> @test_vqdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vqdmulh_lane_s32:
-; CHECK: qdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vqdmulh2.i = tail call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle)
-  ret <2 x i32> %vqdmulh2.i
-}
-
-define <4 x i32> @test_vqdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vqdmulhq_lane_s32:
-; CHECK: qdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %vqdmulh2.i = tail call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle)
-  ret <4 x i32> %vqdmulh2.i
-}
-
-define <4 x i16> @test_vqrdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vqrdmulh_lane_s16:
-; CHECK: qrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vqrdmulh2.i = tail call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle)
-  ret <4 x i16> %vqrdmulh2.i
-}
-
-define <8 x i16> @test_vqrdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vqrdmulhq_lane_s16:
-; CHECK: qrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-  %vqrdmulh2.i = tail call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle)
-  ret <8 x i16> %vqrdmulh2.i
-}
-
-define <2 x i32> @test_vqrdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vqrdmulh_lane_s32:
-; CHECK: qrdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vqrdmulh2.i = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle)
-  ret <2 x i32> %vqrdmulh2.i
-}
-
-define <4 x i32> @test_vqrdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vqrdmulhq_lane_s32:
-; CHECK: qrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %vqrdmulh2.i = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle)
-  ret <4 x i32> %vqrdmulh2.i
-}
-
-define <2 x float> @test_vmul_lane_f32(<2 x float> %a, <2 x float> %v) {
-; CHECK: test_vmul_lane_f32:
-; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1>
-  %mul = fmul <2 x float> %shuffle, %a
-  ret <2 x float> %mul
-}
-
-define <1 x double> @test_vmul_lane_f64(<1 x double> %a, <1 x double> %v) {
-; CHECK: test_vmul_lane_f64:
-; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
-; CHECK-NEXT: ret
-entry:
-  %0 = bitcast <1 x double> %a to <8 x i8>
-  %1 = bitcast <8 x i8> %0 to double
-  %extract = extractelement <1 x double> %v, i32 0
-  %2 = fmul double %1, %extract
-  %3 = insertelement <1 x double> undef, double %2, i32 0
-  ret <1 x double> %3
-}
-
-define <4 x float> @test_vmulq_lane_f32(<4 x float> %a, <2 x float> %v) {
-; CHECK: test_vmulq_lane_f32:
-; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %mul = fmul <4 x float> %shuffle, %a
-  ret <4 x float> %mul
-}
-
-define <2 x double> @test_vmulq_lane_f64(<2 x double> %a, <1 x double> %v) {
-; CHECK: test_vmulq_lane_f64:
-; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
-  %mul = fmul <2 x double> %shuffle, %a
-  ret <2 x double> %mul
-}
-
-define <2 x float> @test_vmul_laneq_f32(<2 x float> %a, <4 x float> %v) {
-; CHECK: test_vmul_laneq_f32:
-; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3>
-  %mul = fmul <2 x float> %shuffle, %a
-  ret <2 x float> %mul
-}
-
-define <1 x double> @test_vmul_laneq_f64(<1 x double> %a, <2 x double> %v) {
-; CHECK: test_vmul_laneq_f64:
-; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
-; CHECK-NEXT: ret
-entry:
-  %0 = bitcast <1 x double> %a to <8 x i8>
-  %1 = bitcast <8 x i8> %0 to double
-  %extract = extractelement <2 x double> %v, i32 1
-  %2 = fmul double %1, %extract
-  %3 = insertelement <1 x double> undef, double %2, i32 0
-  ret <1 x double> %3
-}
-
-define <4 x float> @test_vmulq_laneq_f32(<4 x float> %a, <4 x float> %v) {
-; CHECK: test_vmulq_laneq_f32:
-; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %mul = fmul <4 x float> %shuffle, %a
-  ret <4 x float> %mul
-}
-
-define <2 x double> @test_vmulq_laneq_f64(<2 x double> %a, <2 x double> %v) {
-; CHECK: test_vmulq_laneq_f64:
-; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1>
-  %mul = fmul <2 x double> %shuffle, %a
-  ret <2 x double> %mul
-}
-
-define <2 x float> @test_vmulx_lane_f32(<2 x float> %a, <2 x float> %v) {
-; CHECK: test_vmulx_lane_f32:
-; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1>
-  %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
-  ret <2 x float> %vmulx2.i
-}
-
-define <4 x float> @test_vmulxq_lane_f32(<4 x float> %a, <2 x float> %v) {
-; CHECK: test_vmulxq_lane_f32:
-; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
-  ret <4 x float> %vmulx2.i
-}
-
-define <2 x double> @test_vmulxq_lane_f64(<2 x double> %a, <1 x double> %v) {
-; CHECK: test_vmulxq_lane_f64:
-; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
-  %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
-  ret <2 x double> %vmulx2.i
-}
-
-define <2 x float> @test_vmulx_laneq_f32(<2 x float> %a, <4 x float> %v) {
-; CHECK: test_vmulx_laneq_f32:
-; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3>
-  %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
-  ret <2 x float> %vmulx2.i
-}
-
-define <4 x float> @test_vmulxq_laneq_f32(<4 x float> %a, <4 x float> %v) {
-; CHECK: test_vmulxq_laneq_f32:
-; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
-  ret <4 x float> %vmulx2.i
-}
-
-define <2 x double> @test_vmulxq_laneq_f64(<2 x double> %a, <2 x double> %v) {
-; CHECK: test_vmulxq_laneq_f64:
-; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1>
-  %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
-  ret <2 x double> %vmulx2.i
-}
-
-define <4 x i16> @test_vmla_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vmla_lane_s16_0:
-; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %mul = mul <4 x i16> %shuffle, %b
-  %add = add <4 x i16> %mul, %a
-  ret <4 x i16> %add
-}
-
-define <8 x i16> @test_vmlaq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vmlaq_lane_s16_0:
-; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
-  %mul = mul <8 x i16> %shuffle, %b
-  %add = add <8 x i16> %mul, %a
-  ret <8 x i16> %add
-}
-
-define <2 x i32> @test_vmla_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vmla_lane_s32_0:
-; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %mul = mul <2 x i32> %shuffle, %b
-  %add = add <2 x i32> %mul, %a
-  ret <2 x i32> %add
-}
-
-define <4 x i32> @test_vmlaq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vmlaq_lane_s32_0:
-; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
-  %mul = mul <4 x i32> %shuffle, %b
-  %add = add <4 x i32> %mul, %a
-  ret <4 x i32> %add
-}
-
-define <4 x i16> @test_vmla_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK: test_vmla_laneq_s16_0:
-; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %mul = mul <4 x i16> %shuffle, %b
-  %add = add <4 x i16> %mul, %a
-  ret <4 x i16> %add
-}
-
-define <8 x i16> @test_vmlaq_laneq_s16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) {
-; CHECK: test_vmlaq_laneq_s16_0:
-; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer
-  %mul = mul <8 x i16> %shuffle, %b
-  %add = add <8 x i16> %mul, %a
-  ret <8 x i16> %add
-}
-
-define <2 x i32> @test_vmla_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK: test_vmla_laneq_s32_0:
-; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %mul = mul <2 x i32> %shuffle, %b
-  %add = add <2 x i32> %mul, %a
-  ret <2 x i32> %add
-}
-
-define <4 x i32> @test_vmlaq_laneq_s32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) {
-; CHECK: test_vmlaq_laneq_s32_0:
-; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
-  %mul = mul <4 x i32> %shuffle, %b
-  %add = add <4 x i32> %mul, %a
-  ret <4 x i32> %add
-}
-
-define <4 x i16> @test_vmls_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vmls_lane_s16_0:
-; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %mul = mul <4 x i16> %shuffle, %b
-  %sub = sub <4 x i16> %a, %mul
-  ret <4 x i16> %sub
-}
-
-define <8 x i16> @test_vmlsq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vmlsq_lane_s16_0:
-; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
-  %mul = mul <8 x i16> %shuffle, %b
-  %sub = sub <8 x i16> %a, %mul
-  ret <8 x i16> %sub
-}
-
-define <2 x i32> @test_vmls_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vmls_lane_s32_0:
-; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %mul = mul <2 x i32> %shuffle, %b
-  %sub = sub <2 x i32> %a, %mul
-  ret <2 x i32> %sub
-}
-
-define <4 x i32> @test_vmlsq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vmlsq_lane_s32_0:
-; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
-  %mul = mul <4 x i32> %shuffle, %b
-  %sub = sub <4 x i32> %a, %mul
-  ret <4 x i32> %sub
-}
-
-define <4 x i16> @test_vmls_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK: test_vmls_laneq_s16_0:
-; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %mul = mul <4 x i16> %shuffle, %b
-  %sub = sub <4 x i16> %a, %mul
-  ret <4 x i16> %sub
-}
-
-define <8 x i16> @test_vmlsq_laneq_s16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) {
-; CHECK: test_vmlsq_laneq_s16_0:
-; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer
-  %mul = mul <8 x i16> %shuffle, %b
-  %sub = sub <8 x i16> %a, %mul
-  ret <8 x i16> %sub
-}
-
-define <2 x i32> @test_vmls_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK: test_vmls_laneq_s32_0:
-; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %mul = mul <2 x i32> %shuffle, %b
-  %sub = sub <2 x i32> %a, %mul
-  ret <2 x i32> %sub
-}
-
-define <4 x i32> @test_vmlsq_laneq_s32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) {
-; CHECK: test_vmlsq_laneq_s32_0:
-; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
-  %mul = mul <4 x i32> %shuffle, %b
-  %sub = sub <4 x i32> %a, %mul
-  ret <4 x i32> %sub
-}
-
-define <4 x i16> @test_vmul_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vmul_lane_s16_0:
-; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %mul = mul <4 x i16> %shuffle, %a
-  ret <4 x i16> %mul
-}
-
-define <8 x i16> @test_vmulq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vmulq_lane_s16_0:
-; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
-  %mul = mul <8 x i16> %shuffle, %a
-  ret <8 x i16> %mul
-}
-
-define <2 x i32> @test_vmul_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vmul_lane_s32_0:
-; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %mul = mul <2 x i32> %shuffle, %a
-  ret <2 x i32> %mul
-}
-
-define <4 x i32> @test_vmulq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vmulq_lane_s32_0:
-; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
-  %mul = mul <4 x i32> %shuffle, %a
-  ret <4 x i32> %mul
-}
-
-define <4 x i16> @test_vmul_lane_u16_0(<4 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vmul_lane_u16_0:
-; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %mul = mul <4 x i16> %shuffle, %a
-  ret <4 x i16> %mul
-}
-
-define <8 x i16> @test_vmulq_lane_u16_0(<8 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vmulq_lane_u16_0:
-; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
-  %mul = mul <8 x i16> %shuffle, %a
-  ret <8 x i16> %mul
-}
-
-define <2 x i32> @test_vmul_lane_u32_0(<2 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vmul_lane_u32_0:
-; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %mul = mul <2 x i32> %shuffle, %a
-  ret <2 x i32> %mul
-}
-
-define <4 x i32> @test_vmulq_lane_u32_0(<4 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vmulq_lane_u32_0:
-; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
-  %mul = mul <4 x i32> %shuffle, %a
-  ret <4 x i32> %mul
-}
-
-define <4 x i16> @test_vmul_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) {
-; CHECK: test_vmul_laneq_s16_0:
-; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %mul = mul <4 x i16> %shuffle, %a
-  ret <4 x i16> %mul
-}
-
-define <8 x i16> @test_vmulq_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) {
-; CHECK: test_vmulq_laneq_s16_0:
-; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer
-  %mul = mul <8 x i16> %shuffle, %a
-  ret <8 x i16> %mul
-}
-
-define <2 x i32> @test_vmul_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) {
-; CHECK: test_vmul_laneq_s32_0:
-; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %mul = mul <2 x i32> %shuffle, %a
-  ret <2 x i32> %mul
-}
-
-define <4 x i32> @test_vmulq_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) {
-; CHECK: test_vmulq_laneq_s32_0:
-; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
-  %mul = mul <4 x i32> %shuffle, %a
-  ret <4 x i32> %mul
-}
-
-define <4 x i16> @test_vmul_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) {
-; CHECK: test_vmul_laneq_u16_0:
-; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %mul = mul <4 x i16> %shuffle, %a
-  ret <4 x i16> %mul
-}
-
-define <8 x i16> @test_vmulq_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) {
-; CHECK: test_vmulq_laneq_u16_0:
-; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer
-  %mul = mul <8 x i16> %shuffle, %a
-  ret <8 x i16> %mul
-}
-
-define <2 x i32> @test_vmul_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) {
-; CHECK: test_vmul_laneq_u32_0:
-; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %mul = mul <2 x i32> %shuffle, %a
-  ret <2 x i32> %mul
-}
-
-define <4 x i32> @test_vmulq_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) {
-; CHECK: test_vmulq_laneq_u32_0:
-; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
-  %mul = mul <4 x i32> %shuffle, %a
-  ret <4 x i32> %mul
-}
-
-define <2 x float> @test_vfma_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
-; CHECK: test_vfma_lane_f32_0:
-; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer
-  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
-  ret <2 x float> %0
-}
-
-define <4 x float> @test_vfmaq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) {
-; CHECK: test_vfmaq_lane_f32_0:
-; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer
-  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
-  ret <4 x float> %0
-}
-
-define <2 x float> @test_vfma_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) {
-; CHECK: test_vfma_laneq_f32_0:
-; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer
-  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
-  ret <2 x float> %0
-}
-
-define <4 x float> @test_vfmaq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) {
-; CHECK: test_vfmaq_laneq_f32_0:
-; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
-  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
-  ret <4 x float> %0
-}
-
-define <2 x float> @test_vfms_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
-; CHECK: test_vfms_lane_f32_0:
-; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
-  %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> zeroinitializer
-  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
-  ret <2 x float> %0
-}
-
-define <4 x float> @test_vfmsq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) {
-; CHECK: test_vfmsq_lane_f32_0:
-; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
-  %lane = shufflevector <2 x float> %sub, <2 x float> undef, <4 x i32> zeroinitializer
-  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
-  ret <4 x float> %0
-}
-
-define <2 x float> @test_vfms_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) {
-; CHECK: test_vfms_laneq_f32_0:
-; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
-  %lane = shufflevector <4 x float> %sub, <4 x float> undef, <2 x i32> zeroinitializer
-  %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
-  ret <2 x float> %0
-}
-
-define <4 x float> @test_vfmsq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) {
-; CHECK: test_vfmsq_laneq_f32_0:
-; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
-  %lane = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> zeroinitializer
-  %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
-  ret <4 x float> %0
-}
-
-define <2 x double> @test_vfmaq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) {
-; CHECK: test_vfmaq_laneq_f64_0:
-; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
-; CHECK-NEXT: ret
-entry:
-  %lane = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer
-  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
-  ret <2 x double> %0
-}
-
-define <2 x double> @test_vfmsq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) {
-; CHECK: test_vfmsq_laneq_f64_0:
-; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
-; CHECK-NEXT: ret
-entry:
-  %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v
-  %lane = shufflevector <2 x double> %sub, <2 x double> undef, <2 x i32> zeroinitializer
-  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a)
-  ret <2 x double> %0
-}
-
-define <4 x i32> @test_vmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vmlal_lane_s16_0:
-; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %add = add <4 x i32> %vmull2.i, %a
-  ret <4 x i32> %add
-}
-
-define <2 x i64> @test_vmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vmlal_lane_s32_0:
-; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %add = add <2 x i64> %vmull2.i, %a
-  ret <2 x i64> %add
-}
-
-define <4 x i32> @test_vmlal_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK: test_vmlal_laneq_s16_0:
-; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %add = add <4 x i32> %vmull2.i, %a
-  ret <4 x i32> %add
-}
-
-define <2 x i64> @test_vmlal_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK: test_vmlal_laneq_s32_0:
-; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %add = add <2 x i64> %vmull2.i, %a
-  ret <2 x i64> %add
-}
-
-define <4 x i32> @test_vmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vmlal_high_lane_s16_0:
-; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %add = add <4 x i32> %vmull2.i, %a
-  ret <4 x i32> %add
-}
-
-define <2 x i64> @test_vmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vmlal_high_lane_s32_0:
-; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %add = add <2 x i64> %vmull2.i, %a
-  ret <2 x i64> %add
-}
-
-define <4 x i32> @test_vmlal_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
-; CHECK: test_vmlal_high_laneq_s16_0:
-; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %add = add <4 x i32> %vmull2.i, %a
-  ret <4 x i32> %add
-}
-
-define <2 x i64> @test_vmlal_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
-; CHECK: test_vmlal_high_laneq_s32_0:
-; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %add = add <2 x i64> %vmull2.i, %a
-  ret <2 x i64> %add
-}
-
-define <4 x i32> @test_vmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vmlsl_lane_s16_0:
-; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %sub = sub <4 x i32> %a, %vmull2.i
-  ret <4 x i32> %sub
-}
-
-define <2 x i64> @test_vmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vmlsl_lane_s32_0:
-; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %sub = sub <2 x i64> %a, %vmull2.i
-  ret <2 x i64> %sub
-}
-
-define <4 x i32> @test_vmlsl_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK: test_vmlsl_laneq_s16_0:
-; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %sub = sub <4 x i32> %a, %vmull2.i
-  ret <4 x i32> %sub
-}
-
-define <2 x i64> @test_vmlsl_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK: test_vmlsl_laneq_s32_0:
-; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %sub = sub <2 x i64> %a, %vmull2.i
-  ret <2 x i64> %sub
-}
-
-define <4 x i32> @test_vmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vmlsl_high_lane_s16_0:
-; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %sub = sub <4 x i32> %a, %vmull2.i
-  ret <4 x i32> %sub
-}
-
-define <2 x i64> @test_vmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vmlsl_high_lane_s32_0:
-; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %sub = sub <2 x i64> %a, %vmull2.i
-  ret <2 x i64> %sub
-}
-
-define <4 x i32> @test_vmlsl_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
-; CHECK: test_vmlsl_high_laneq_s16_0:
-; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %sub = sub <4 x i32> %a, %vmull2.i
-  ret <4 x i32> %sub
-}
-
-define <2 x i64> @test_vmlsl_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
-; CHECK: test_vmlsl_high_laneq_s32_0:
-; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %sub = sub <2 x i64> %a, %vmull2.i
-  ret <2 x i64> %sub
-}
-
-define <4 x i32> @test_vmlal_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vmlal_lane_u16_0:
-; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %add = add <4 x i32> %vmull2.i, %a
-  ret <4 x i32> %add
-}
-
-define <2 x i64> @test_vmlal_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vmlal_lane_u32_0:
-; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %add = add <2 x i64> %vmull2.i, %a
-  ret <2 x i64> %add
-}
-
-define <4 x i32> @test_vmlal_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK: test_vmlal_laneq_u16_0:
-; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %add = add <4 x i32> %vmull2.i, %a
-  ret <4 x i32> %add
-}
-
-define <2 x i64> @test_vmlal_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK: test_vmlal_laneq_u32_0:
-; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %add = add <2 x i64> %vmull2.i, %a
-  ret <2 x i64> %add
-}
-
-define <4 x i32> @test_vmlal_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vmlal_high_lane_u16_0:
-; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %add = add <4 x i32> %vmull2.i, %a
-  ret <4 x i32> %add
-}
-
-define <2 x i64> @test_vmlal_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vmlal_high_lane_u32_0:
-; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %add = add <2 x i64> %vmull2.i, %a
-  ret <2 x i64> %add
-}
-
-define <4 x i32> @test_vmlal_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
-; CHECK: test_vmlal_high_laneq_u16_0:
-; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %add = add <4 x i32> %vmull2.i, %a
-  ret <4 x i32> %add
-}
-
-define <2 x i64> @test_vmlal_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
-; CHECK: test_vmlal_high_laneq_u32_0:
-; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %add = add <2 x i64> %vmull2.i, %a
-  ret <2 x i64> %add
-}
-
-define <4 x i32> @test_vmlsl_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vmlsl_lane_u16_0:
-; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %sub = sub <4 x i32> %a, %vmull2.i
-  ret <4 x i32> %sub
-}
-
-define <2 x i64> @test_vmlsl_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vmlsl_lane_u32_0:
-; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %sub = sub <2 x i64> %a, %vmull2.i
-  ret <2 x i64> %sub
-}
-
-define <4 x i32> @test_vmlsl_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) {
-; CHECK: test_vmlsl_laneq_u16_0:
-; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %sub = sub <4 x i32> %a, %vmull2.i
-  ret <4 x i32> %sub
-}
-
-define <2 x i64> @test_vmlsl_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) {
-; CHECK: test_vmlsl_laneq_u32_0:
-; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %sub = sub <2 x i64> %a, %vmull2.i
-  ret <2 x i64> %sub
-}
-
-define <4 x i32> @test_vmlsl_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vmlsl_high_lane_u16_0:
-; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %sub = sub <4 x i32> %a, %vmull2.i
-  ret <4 x i32> %sub
-}
-
-define <2 x i64> @test_vmlsl_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vmlsl_high_lane_u32_0:
-; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %sub = sub <2 x i64> %a, %vmull2.i
-  ret <2 x i64> %sub
-}
-
-define <4 x i32> @test_vmlsl_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) {
-; CHECK: test_vmlsl_high_laneq_u16_0:
-; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %sub = sub <4 x i32> %a, %vmull2.i
-  ret <4 x i32> %sub
-}
-
-define <2 x i64> @test_vmlsl_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) {
-; CHECK: test_vmlsl_high_laneq_u32_0:
-; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %sub = sub <2 x i64> %a, %vmull2.i
-  ret <2 x i64> %sub
-}
-
-define <4 x i32> @test_vmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vmull_lane_s16_0:
-; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vmull_lane_s32_0:
-; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
-  ret <2 x i64> %vmull2.i
-}
-
-define <4 x i32> @test_vmull_lane_u16_0(<4 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vmull_lane_u16_0:
-; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_lane_u32_0(<2 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vmull_lane_u32_0:
-; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
-  ret <2 x i64> %vmull2.i
-}
-
-define <4 x i32> @test_vmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vmull_high_lane_s16_0:
-; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vmull_high_lane_s32_0:
-; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  ret <2 x i64> %vmull2.i
-}
-
-define <4 x i32> @test_vmull_high_lane_u16_0(<8 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vmull_high_lane_u16_0:
-; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_high_lane_u32_0(<4 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vmull_high_lane_u32_0:
-; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  ret <2 x i64> %vmull2.i
-}
-
-define <4 x i32> @test_vmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) {
-; CHECK: test_vmull_laneq_s16_0:
-; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) {
-; CHECK: test_vmull_laneq_s32_0:
-; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
-  ret <2 x i64> %vmull2.i
-}
-
-define <4 x i32> @test_vmull_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) {
-; CHECK: test_vmull_laneq_u16_0:
-; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) {
-; CHECK: test_vmull_laneq_u32_0:
-; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
-  ret <2 x i64> %vmull2.i
-}
-
-define <4 x i32> @test_vmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) {
-; CHECK: test_vmull_high_laneq_s16_0:
-; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) {
-; CHECK: test_vmull_high_laneq_s32_0:
-; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  ret <2 x i64> %vmull2.i
-}
-
-define <4 x i32> @test_vmull_high_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) {
-; CHECK: test_vmull_high_laneq_u16_0:
-; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_high_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) {
-; CHECK: test_vmull_high_laneq_u32_0:
-; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  ret <2 x i64> %vmull2.i
-}
-
-define <4 x i32> @test_vqdmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vqdmlal_lane_s16_0:
-; CHECK: qdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vqdmlal2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %vqdmlal4.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
-  ret <4 x i32> %vqdmlal4.i
-}
-
-define <2 x i64> @test_vqdmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vqdmlal_lane_s32_0:
-; CHECK: qdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vqdmlal2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %vqdmlal4.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
-  ret <2 x i64> %vqdmlal4.i
-}
-
-define <4 x i32> @test_vqdmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vqdmlal_high_lane_s16_0:
-; CHECK: qdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vqdmlal2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %vqdmlal4.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
-  ret <4 x i32> %vqdmlal4.i
-}
-
-define <2 x i64> @test_vqdmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vqdmlal_high_lane_s32_0:
-; CHECK: qdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vqdmlal2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %vqdmlal4.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
-  ret <2 x i64> %vqdmlal4.i
-}
-
-define <4 x i32> @test_vqdmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vqdmlsl_lane_s16_0:
-; CHECK: qdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vqdmlsl2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle)
-  %vqdmlsl4.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
-  ret <4 x i32> %vqdmlsl4.i
-}
-
-define <2 x i64> @test_vqdmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vqdmlsl_lane_s32_0:
-; CHECK: qdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vqdmlsl2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle)
-  %vqdmlsl4.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
-  ret <2 x i64> %vqdmlsl4.i
-}
-
-define <4 x i32> @test_vqdmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
-; CHECK: test_vqdmlsl_high_lane_s16_0:
-; CHECK: qdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vqdmlsl2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  %vqdmlsl4.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
-  ret <4 x i32> %vqdmlsl4.i
-}
-
-define <2 x i64> @test_vqdmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
-; CHECK: test_vqdmlsl_high_lane_s32_0:
-; CHECK: qdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vqdmlsl2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  %vqdmlsl4.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
-  ret <2 x i64> %vqdmlsl4.i
-}
-
-define <4 x i32> @test_vqdmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vqdmull_lane_s16_0:
-; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
-  ret <4 x i32> %vqdmull2.i
-}
-
-define <2 x i64> @test_vqdmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vqdmull_lane_s32_0:
-; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
-  ret <2 x i64> %vqdmull2.i
-}
-
-define <4 x i32> @test_vqdmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) {
-; CHECK: test_vqdmull_laneq_s16_0:
-; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle)
-  ret <4 x i32> %vqdmull2.i
-}
-
-define <2 x i64> @test_vqdmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) {
-; CHECK: test_vqdmull_laneq_s32_0:
-; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle)
-  ret <2 x i64> %vqdmull2.i
-}
-
-define <4 x i32> @test_vqdmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vqdmull_high_lane_s16_0:
-; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  ret <4 x i32> %vqdmull2.i
-}
-
-define <2 x i64> @test_vqdmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vqdmull_high_lane_s32_0:
-; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  ret <2 x i64> %vqdmull2.i
-}
-
-define <4 x i32> @test_vqdmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) {
-; CHECK: test_vqdmull_high_laneq_s16_0:
-; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer
-  %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle)
-  ret <4 x i32> %vqdmull2.i
-}
-
-define <2 x i64> @test_vqdmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) {
-; CHECK: test_vqdmull_high_laneq_s32_0:
-; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer
-  %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle)
-  ret <2 x i64> %vqdmull2.i
-}
-
-define <4 x i16> @test_vqdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vqdmulh_lane_s16_0:
-; CHECK: qdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vqdmulh2.i = tail call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle)
-  ret <4 x i16> %vqdmulh2.i
-}
-
-define <8 x i16> @test_vqdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vqdmulhq_lane_s16_0:
-; CHECK: qdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
-  %vqdmulh2.i = tail call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle)
-  ret <8 x i16> %vqdmulh2.i
-}
-
-define <2 x i32> @test_vqdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vqdmulh_lane_s32_0:
-; CHECK: qdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vqdmulh2.i = tail call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle)
-  ret <2 x i32> %vqdmulh2.i
-}
-
-define <4 x i32> @test_vqdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vqdmulhq_lane_s32_0:
-; CHECK: qdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
-  %vqdmulh2.i = tail call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle)
-  ret <4 x i32> %vqdmulh2.i
-}
-
-define <4 x i16> @test_vqrdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vqrdmulh_lane_s16_0:
-; CHECK: qrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer
-  %vqrdmulh2.i = tail call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle)
-  ret <4 x i16> %vqrdmulh2.i
-}
-
-define <8 x i16> @test_vqrdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
-; CHECK: test_vqrdmulhq_lane_s16_0:
-; CHECK: qrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
-  %vqrdmulh2.i = tail call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle)
-  ret <8 x i16> %vqrdmulh2.i
-}
-
-define <2 x i32> @test_vqrdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vqrdmulh_lane_s32_0:
-; CHECK: qrdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
-  %vqrdmulh2.i = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle)
-  ret <2 x i32> %vqrdmulh2.i
-}
-
-define <4 x i32> @test_vqrdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
-; CHECK: test_vqrdmulhq_lane_s32_0:
-; CHECK: qrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
-  %vqrdmulh2.i = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle)
-  ret <4 x i32> %vqrdmulh2.i
-}
-
-define <2 x float> @test_vmul_lane_f32_0(<2 x float> %a, <2 x float> %v) {
-; CHECK: test_vmul_lane_f32_0:
-; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer
-  %mul = fmul <2 x float> %shuffle, %a
-  ret <2 x float> %mul
-}
-
-define <4 x float> @test_vmulq_lane_f32_0(<4 x float> %a, <2 x float> %v) {
-; CHECK: test_vmulq_lane_f32_0:
-; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer
-  %mul = fmul <4 x float> %shuffle, %a
-  ret <4 x float> %mul
-}
-
-define <2 x float> @test_vmul_laneq_f32_0(<2 x float> %a, <4 x float> %v) {
-; CHECK: test_vmul_laneq_f32_0:
-; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer
-  %mul = fmul <2 x float> %shuffle, %a
-  ret <2 x float> %mul
-}
-
-define <1 x double> @test_vmul_laneq_f64_0(<1 x double> %a, <2 x double> %v) {
-; CHECK: test_vmul_laneq_f64_0:
-; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
-; CHECK-NEXT: ret
-entry:
-  %0 = bitcast <1 x double> %a to <8 x i8>
-  %1 = bitcast <8 x i8> %0 to double
-  %extract = extractelement <2 x double> %v, i32 0
-  %2 = fmul double %1, %extract
-  %3 = insertelement <1 x double> undef, double %2, i32 0
-  ret <1 x double> %3
-}
-
-define <4 x float> @test_vmulq_laneq_f32_0(<4 x float> %a, <4 x float> %v) {
-; CHECK: test_vmulq_laneq_f32_0:
-; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
-  %mul = fmul <4 x float> %shuffle, %a
-  ret <4 x float> %mul
-}
-
-define <2 x double> @test_vmulq_laneq_f64_0(<2 x double> %a, <2 x double> %v) {
-; CHECK: test_vmulq_laneq_f64_0:
-; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer
-  %mul = fmul <2 x double> %shuffle, %a
-  ret <2 x double> %mul
-}
-
-define <2 x float> @test_vmulx_lane_f32_0(<2 x float> %a, <2 x float> %v) {
-; CHECK: test_vmulx_lane_f32_0:
-; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer
-  %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
-  ret <2 x float> %vmulx2.i
-}
-
-define <4 x float> @test_vmulxq_lane_f32_0(<4 x float> %a, <2 x float> %v) {
-; CHECK: test_vmulxq_lane_f32_0:
-; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer
-  %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
-  ret <4 x float> %vmulx2.i
-}
-
-define <2 x double> @test_vmulxq_lane_f64_0(<2 x double> %a, <1 x double> %v) {
-; CHECK: test_vmulxq_lane_f64_0:
-; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer
-  %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
-  ret <2 x double> %vmulx2.i
-}
-
-define <2 x float> @test_vmulx_laneq_f32_0(<2 x float> %a, <4 x float> %v) {
-; CHECK: test_vmulx_laneq_f32_0:
-; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer
-  %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float> %a, <2 x float> %shuffle)
-  ret <2 x float> %vmulx2.i
-}
-
-define <4 x float> @test_vmulxq_laneq_f32_0(<4 x float> %a, <4 x float> %v) {
-; CHECK: test_vmulxq_laneq_f32_0:
-; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
-  %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float> %a, <4 x float> %shuffle)
-  ret <4 x float> %vmulx2.i
-}
-
-define <2 x double> @test_vmulxq_laneq_f64_0(<2 x double> %a, <2 x double> %v) {
-; CHECK: test_vmulxq_laneq_f64_0:
-; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
-; CHECK-NEXT: ret
-entry:
-  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer
-  %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double> %a, <2 x double> %shuffle)
-  ret <2 x double> %vmulx2.i
-}
-
diff --git a/test/CodeGen/AArch64/neon-3vdiff.ll b/test/CodeGen/AArch64/neon-3vdiff.ll
deleted file mode 100644
index 96400eb..0000000
--- a/test/CodeGen/AArch64/neon-3vdiff.ll
+++ /dev/null
@@ -1,1833 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
-
-declare <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8>, <8 x i8>)
-
-declare <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32>, <2 x i32>)
-
-declare <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64>, <2 x i64>)
-
-declare <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16>, <4 x i16>)
-
-declare <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32>, <4 x i32>)
-
-declare <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>)
-
-declare <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32>, <4 x i32>)
-
-declare <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32>, <2 x i32>)
-
-declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>)
-
-declare <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8>, <8 x i8>)
-
-declare <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32>, <2 x i32>)
-
-declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>)
-
-declare <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8>, <8 x i8>)
-
-declare <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32>, <2 x i32>)
-
-declare <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16>, <4 x i16>)
-
-declare <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8>, <8 x i8>)
-
-declare <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32>, <2 x i32>)
-
-declare <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16>, <4 x i16>)
-
-declare <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8>, <8 x i8>)
-
-declare <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64>, <2 x i64>)
-
-declare <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32>, <4 x i32>)
-
-declare <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16>, <8 x i16>)
-
-declare <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64>, <2 x i64>)
-
-declare <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32>, <4 x i32>)
-
-declare <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16>, <8 x i16>)
-
-define <8 x i16> @test_vaddl_s8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vaddl_s8:
-; CHECK: saddl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vmovl.i.i = sext <8 x i8> %a to <8 x i16>
-  %vmovl.i2.i = sext <8 x i8> %b to <8 x i16>
-  %add.i = add <8 x i16> %vmovl.i.i, %vmovl.i2.i
-  ret <8 x i16> %add.i
-}
-
-define <4 x i32> @test_vaddl_s16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vaddl_s16:
-; CHECK: saddl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vmovl.i.i = sext <4 x i16> %a to <4 x i32>
-  %vmovl.i2.i = sext <4 x i16> %b to <4 x i32>
-  %add.i = add <4 x i32> %vmovl.i.i, %vmovl.i2.i
-  ret <4 x i32> %add.i
-}
-
-define <2 x i64> @test_vaddl_s32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK: test_vaddl_s32:
-; CHECK: saddl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vmovl.i.i = sext <2 x i32> %a to <2 x i64>
-  %vmovl.i2.i = sext <2 x i32> %b to <2 x i64>
-  %add.i = add <2 x i64> %vmovl.i.i, %vmovl.i2.i
-  ret <2 x i64> %add.i
-}
-
-define <8 x i16> @test_vaddl_u8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vaddl_u8:
-; CHECK: uaddl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vmovl.i.i = zext <8 x i8> %a to <8 x i16>
-  %vmovl.i2.i = zext <8 x i8> %b to <8 x i16>
-  %add.i = add <8 x i16> %vmovl.i.i, %vmovl.i2.i
-  ret <8 x i16> %add.i
-}
-
-define <4 x i32> @test_vaddl_u16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vaddl_u16:
-; CHECK: uaddl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vmovl.i.i = zext <4 x i16> %a to <4 x i32>
-  %vmovl.i2.i = zext <4 x i16> %b to <4 x i32>
-  %add.i = add <4 x i32> %vmovl.i.i, %vmovl.i2.i
-  ret <4 x i32> %add.i
-}
-
-define <2 x i64> @test_vaddl_u32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK: test_vaddl_u32:
-; CHECK: uaddl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vmovl.i.i = zext <2 x i32> %a to <2 x i64>
-  %vmovl.i2.i = zext <2 x i32> %b to <2 x i64>
-  %add.i = add <2 x i64> %vmovl.i.i, %vmovl.i2.i
-  ret <2 x i64> %add.i
-}
-
-define <8 x i16> @test_vaddl_high_s8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vaddl_high_s8:
-; CHECK: saddl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16>
-  %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %1 = sext <8 x i8> %shuffle.i.i2.i to <8 x i16>
-  %add.i = add <8 x i16> %0, %1
-  ret <8 x i16> %add.i
-}
-
-define <4 x i32> @test_vaddl_high_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vaddl_high_s16:
-; CHECK: saddl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32>
-  %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %1 = sext <4 x i16> %shuffle.i.i2.i to <4 x i32>
-  %add.i = add <4 x i32> %0, %1
-  ret <4 x i32> %add.i
-}
-
-define <2 x i64> @test_vaddl_high_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vaddl_high_s32:
-; CHECK: saddl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64>
-  %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %1 = sext <2 x i32> %shuffle.i.i2.i to <2 x i64>
-  %add.i = add <2 x i64> %0, %1
-  ret <2 x i64> %add.i
-}
-
-define <8 x i16> @test_vaddl_high_u8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vaddl_high_u8:
-; CHECK: uaddl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
-  %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %1 = zext <8 x i8> %shuffle.i.i2.i to <8 x i16>
-  %add.i = add <8 x i16> %0, %1
-  ret <8 x i16> %add.i
-}
-
-define <4 x i32> @test_vaddl_high_u16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vaddl_high_u16:
-; CHECK: uaddl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32>
-  %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %1 = zext <4 x i16> %shuffle.i.i2.i to <4 x i32>
-  %add.i = add <4 x i32> %0, %1
-  ret <4 x i32> %add.i
-}
-
-define <2 x i64> @test_vaddl_high_u32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vaddl_high_u32:
-; CHECK: uaddl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64>
-  %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %1 = zext <2 x i32> %shuffle.i.i2.i to <2 x i64>
-  %add.i = add <2 x i64> %0, %1
-  ret <2 x i64> %add.i
-}
-
-define <8 x i16> @test_vaddw_s8(<8 x i16> %a, <8 x i8> %b) {
-; CHECK: test_vaddw_s8:
-; CHECK: saddw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b
-entry:
-  %vmovl.i.i = sext <8 x i8> %b to <8 x i16>
-  %add.i = add <8 x i16> %vmovl.i.i, %a
-  ret <8 x i16> %add.i
-}
-
-define <4 x i32> @test_vaddw_s16(<4 x i32> %a, <4 x i16> %b) {
-; CHECK: test_vaddw_s16:
-; CHECK: saddw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h
-entry:
-  %vmovl.i.i = sext <4 x i16> %b to <4 x i32>
-  %add.i = add <4 x i32> %vmovl.i.i, %a
-  ret <4 x i32> %add.i
-}
-
-define <2 x i64> @test_vaddw_s32(<2 x i64> %a, <2 x i32> %b) {
-; CHECK: test_vaddw_s32:
-; CHECK: saddw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s
-entry:
-  %vmovl.i.i = sext <2 x i32> %b to <2 x i64>
-  %add.i = add <2 x i64> %vmovl.i.i, %a
-  ret <2 x i64> %add.i
-}
-
-define <8 x i16> @test_vaddw_u8(<8 x i16> %a, <8 x i8> %b) {
-; CHECK: test_vaddw_u8:
-; CHECK: uaddw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b
-entry:
-  %vmovl.i.i = zext <8 x i8> %b to <8 x i16>
-  %add.i = add <8 x i16> %vmovl.i.i, %a
-  ret <8 x i16> %add.i
-}
-
-define <4 x i32> @test_vaddw_u16(<4 x i32> %a, <4 x i16> %b) {
-; CHECK: test_vaddw_u16:
-; CHECK: uaddw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h
-entry:
-  %vmovl.i.i = zext <4 x i16> %b to <4 x i32>
-  %add.i = add <4 x i32> %vmovl.i.i, %a
-  ret <4 x i32> %add.i
-}
-
-define <2 x i64> @test_vaddw_u32(<2 x i64> %a, <2 x i32> %b) {
-; CHECK: test_vaddw_u32:
-; CHECK: uaddw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s
-entry:
-  %vmovl.i.i = zext <2 x i32> %b to <2 x i64>
-  %add.i = add <2 x i64> %vmovl.i.i, %a
-  ret <2 x i64> %add.i
-}
-
-define <8 x i16> @test_vaddw_high_s8(<8 x i16> %a, <16 x i8> %b) {
-; CHECK: test_vaddw_high_s8:
-; CHECK: saddw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16>
-  %add.i = add <8 x i16> %0, %a
-  ret <8 x i16> %add.i
-}
-
-define <4 x i32> @test_vaddw_high_s16(<4 x i32> %a, <8 x i16> %b) {
-; CHECK: test_vaddw_high_s16:
-; CHECK: saddw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32>
-  %add.i = add <4 x i32> %0, %a
-  ret <4 x i32> %add.i
-}
-
-define <2 x i64> @test_vaddw_high_s32(<2 x i64> %a, <4 x i32> %b) {
-; CHECK: test_vaddw_high_s32:
-; CHECK: saddw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64>
-  %add.i = add <2 x i64> %0, %a
-  ret <2 x i64> %add.i
-}
-
-define <8 x i16> @test_vaddw_high_u8(<8 x i16> %a, <16 x i8> %b) {
-; CHECK: test_vaddw_high_u8:
-; CHECK: uaddw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
-  %add.i = add <8 x i16> %0, %a
-  ret <8 x i16> %add.i
-}
-
-define <4 x i32> @test_vaddw_high_u16(<4 x i32> %a, <8 x i16> %b) {
-; CHECK: test_vaddw_high_u16:
-; CHECK: uaddw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32>
-  %add.i = add <4 x i32> %0, %a
-  ret <4 x i32> %add.i
-}
-
-define <2 x i64> @test_vaddw_high_u32(<2 x i64> %a, <4 x i32> %b) {
-; CHECK: test_vaddw_high_u32:
-; CHECK: uaddw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64>
-  %add.i = add <2 x i64> %0, %a
-  ret <2 x i64> %add.i
-}
-
-define <8 x i16> @test_vsubl_s8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vsubl_s8:
-; CHECK: ssubl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vmovl.i.i = sext <8 x i8> %a to <8 x i16>
-  %vmovl.i2.i = sext <8 x i8> %b to <8 x i16>
-  %sub.i = sub <8 x i16> %vmovl.i.i, %vmovl.i2.i
-  ret <8 x i16> %sub.i
-}
-
-define <4 x i32> @test_vsubl_s16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vsubl_s16:
-; CHECK: ssubl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vmovl.i.i = sext <4 x i16> %a to <4 x i32>
-  %vmovl.i2.i = sext <4 x i16> %b to <4 x i32>
-  %sub.i = sub <4 x i32> %vmovl.i.i, %vmovl.i2.i
-  ret <4 x i32> %sub.i
-}
-
-define <2 x i64> @test_vsubl_s32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK: test_vsubl_s32:
-; CHECK: ssubl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vmovl.i.i = sext <2 x i32> %a to <2 x i64>
-  %vmovl.i2.i = sext <2 x i32> %b to <2 x i64>
-  %sub.i = sub <2 x i64> %vmovl.i.i, %vmovl.i2.i
-  ret <2 x i64> %sub.i
-}
-
-define <8 x i16> @test_vsubl_u8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vsubl_u8:
-; CHECK: usubl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vmovl.i.i = zext <8 x i8> %a to <8 x i16>
-  %vmovl.i2.i = zext <8 x i8> %b to <8 x i16>
-  %sub.i = sub <8 x i16> %vmovl.i.i, %vmovl.i2.i
-  ret <8 x i16> %sub.i
-}
-
-define <4 x i32> @test_vsubl_u16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vsubl_u16:
-; CHECK: usubl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vmovl.i.i = zext <4 x i16> %a to <4 x i32>
-  %vmovl.i2.i = zext <4 x i16> %b to <4 x i32>
-  %sub.i = sub <4 x i32> %vmovl.i.i, %vmovl.i2.i
-  ret <4 x i32> %sub.i
-}
-
-define <2 x i64> @test_vsubl_u32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK: test_vsubl_u32:
-; CHECK: usubl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vmovl.i.i = zext <2 x i32> %a to <2 x i64>
-  %vmovl.i2.i = zext <2 x i32> %b to <2 x i64>
-  %sub.i = sub <2 x i64> %vmovl.i.i, %vmovl.i2.i
-  ret <2 x i64> %sub.i
-}
-
-define <8 x i16> @test_vsubl_high_s8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vsubl_high_s8:
-; CHECK: ssubl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16>
-  %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %1 = sext <8 x i8> %shuffle.i.i2.i to <8 x i16>
-  %sub.i = sub <8 x i16> %0, %1
-  ret <8 x i16> %sub.i
-}
-
-define <4 x i32> @test_vsubl_high_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vsubl_high_s16:
-; CHECK: ssubl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32>
-  %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %1 = sext <4 x i16> %shuffle.i.i2.i to <4 x i32>
-  %sub.i = sub <4 x i32> %0, %1
-  ret <4 x i32> %sub.i
-}
-
-define <2 x i64> @test_vsubl_high_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vsubl_high_s32:
-; CHECK: ssubl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64>
-  %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %1 = sext <2 x i32> %shuffle.i.i2.i to <2 x i64>
-  %sub.i = sub <2 x i64> %0, %1
-  ret <2 x i64> %sub.i
-}
-
-define <8 x i16> @test_vsubl_high_u8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vsubl_high_u8:
-; CHECK: usubl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
-  %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %1 = zext <8 x i8> %shuffle.i.i2.i to <8 x i16>
-  %sub.i = sub <8 x i16> %0, %1
-  ret <8 x i16> %sub.i
-}
-
-define <4 x i32> @test_vsubl_high_u16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vsubl_high_u16:
-; CHECK: usubl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32>
-  %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %1 = zext <4 x i16> %shuffle.i.i2.i to <4 x i32>
-  %sub.i = sub <4 x i32> %0, %1
-  ret <4 x i32> %sub.i
-}
-
-define <2 x i64> @test_vsubl_high_u32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vsubl_high_u32:
-; CHECK: usubl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64>
-  %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %1 = zext <2 x i32> %shuffle.i.i2.i to <2 x i64>
-  %sub.i = sub <2 x i64> %0, %1
-  ret <2 x i64> %sub.i
-}
-
-define <8 x i16> @test_vsubw_s8(<8 x i16> %a, <8 x i8> %b) {
-; CHECK: test_vsubw_s8:
-; CHECK: ssubw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b
-entry:
-  %vmovl.i.i = sext <8 x i8> %b to <8 x i16>
-  %sub.i = sub <8 x i16> %a, %vmovl.i.i
-  ret <8 x i16> %sub.i
-}
-
-define <4 x i32> @test_vsubw_s16(<4 x i32> %a, <4 x i16> %b) {
-; CHECK: test_vsubw_s16:
-; CHECK: ssubw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h
-entry:
-  %vmovl.i.i = sext <4 x i16> %b to <4 x i32>
-  %sub.i = sub <4 x i32> %a, %vmovl.i.i
-  ret <4 x i32> %sub.i
-}
-
-define <2 x i64> @test_vsubw_s32(<2 x i64> %a, <2 x i32> %b) {
-; CHECK: test_vsubw_s32:
-; CHECK: ssubw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s
-entry:
-  %vmovl.i.i = sext <2 x i32> %b to <2 x i64>
-  %sub.i = sub <2 x i64> %a, %vmovl.i.i
-  ret <2 x i64> %sub.i
-}
-
-define <8 x i16> @test_vsubw_u8(<8 x i16> %a, <8 x i8> %b) {
-; CHECK: test_vsubw_u8:
-; CHECK: usubw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b
-entry:
-  %vmovl.i.i = zext <8 x i8> %b to <8 x i16>
-  %sub.i = sub <8 x i16> %a, %vmovl.i.i
-  ret <8 x i16> %sub.i
-}
-
-define <4 x i32> @test_vsubw_u16(<4 x i32> %a, <4 x i16> %b) {
-; CHECK: test_vsubw_u16:
-; CHECK: usubw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h
-entry:
-  %vmovl.i.i = zext <4 x i16> %b to <4 x i32>
-  %sub.i = sub <4 x i32> %a, %vmovl.i.i
-  ret <4 x i32> %sub.i
-}
-
-define <2 x i64> @test_vsubw_u32(<2 x i64> %a, <2 x i32> %b) {
-; CHECK: test_vsubw_u32:
-; CHECK: usubw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s
-entry:
-  %vmovl.i.i = zext <2 x i32> %b to <2 x i64>
-  %sub.i = sub <2 x i64> %a, %vmovl.i.i
-  ret <2 x i64> %sub.i
-}
-
-define <8 x i16> @test_vsubw_high_s8(<8 x i16> %a, <16 x i8> %b) {
-; CHECK: test_vsubw_high_s8:
-; CHECK: ssubw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16>
-  %sub.i = sub <8 x i16> %a, %0
-  ret <8 x i16> %sub.i
-}
-
-define <4 x i32> @test_vsubw_high_s16(<4 x i32> %a, <8 x i16> %b) {
-; CHECK: test_vsubw_high_s16:
-; CHECK: ssubw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32>
-  %sub.i = sub <4 x i32> %a, %0
-  ret <4 x i32> %sub.i
-}
-
-define <2 x i64> @test_vsubw_high_s32(<2 x i64> %a, <4 x i32> %b) {
-; CHECK: test_vsubw_high_s32:
-; CHECK: ssubw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64>
-  %sub.i = sub <2 x i64> %a, %0
-  ret <2 x i64> %sub.i
-}
-
-define <8 x i16> @test_vsubw_high_u8(<8 x i16> %a, <16 x i8> %b) {
-; CHECK: test_vsubw_high_u8:
-; CHECK: usubw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
-  %sub.i = sub <8 x i16> %a, %0
-  ret <8 x i16> %sub.i
-}
-
-define <4 x i32> @test_vsubw_high_u16(<4 x i32> %a, <8 x i16> %b) {
-; CHECK: test_vsubw_high_u16:
-; CHECK: usubw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32>
-  %sub.i = sub <4 x i32> %a, %0
-  ret <4 x i32> %sub.i
-}
-
-define <2 x i64> @test_vsubw_high_u32(<2 x i64> %a, <4 x i32> %b) {
-; CHECK: test_vsubw_high_u32:
-; CHECK: usubw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64>
-  %sub.i = sub <2 x i64> %a, %0
-  ret <2 x i64> %sub.i
-}
-
-define <8 x i8> @test_vaddhn_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vaddhn_s16:
-; CHECK: addhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %vaddhn.i = add <8 x i16> %a, %b
-  %vaddhn1.i = lshr <8 x i16> %vaddhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
-  %vaddhn2.i = trunc <8 x i16> %vaddhn1.i to <8 x i8>
-  ret <8 x i8> %vaddhn2.i
-}
-
-define <4 x i16> @test_vaddhn_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vaddhn_s32:
-; CHECK: addhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %vaddhn.i = add <4 x i32> %a, %b
-  %vaddhn1.i = lshr <4 x i32> %vaddhn.i, <i32 16, i32 16, i32 16, i32 16>
-  %vaddhn2.i = trunc <4 x i32> %vaddhn1.i to <4 x i16>
-  ret <4 x i16> %vaddhn2.i
-}
-
-define <2 x i32> @test_vaddhn_s64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vaddhn_s64:
-; CHECK: addhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %vaddhn.i = add <2 x i64> %a, %b
-  %vaddhn1.i = lshr <2 x i64> %vaddhn.i, <i64 32, i64 32>
-  %vaddhn2.i = trunc <2 x i64> %vaddhn1.i to <2 x i32>
-  ret <2 x i32> %vaddhn2.i
-}
-
-define <8 x i8> @test_vaddhn_u16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vaddhn_u16:
-; CHECK: addhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %vaddhn.i = add <8 x i16> %a, %b
-  %vaddhn1.i = lshr <8 x i16> %vaddhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
-  %vaddhn2.i = trunc <8 x i16> %vaddhn1.i to <8 x i8>
-  ret <8 x i8> %vaddhn2.i
-}
-
-define <4 x i16> @test_vaddhn_u32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vaddhn_u32:
-; CHECK: addhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %vaddhn.i = add <4 x i32> %a, %b
-  %vaddhn1.i = lshr <4 x i32> %vaddhn.i, <i32 16, i32 16, i32 16, i32 16>
-  %vaddhn2.i = trunc <4 x i32> %vaddhn1.i to <4 x i16>
-  ret <4 x i16> %vaddhn2.i
-}
-
-define <2 x i32> @test_vaddhn_u64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vaddhn_u64:
-; CHECK: addhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %vaddhn.i = add <2 x i64> %a, %b
-  %vaddhn1.i = lshr <2 x i64> %vaddhn.i, <i64 32, i64 32>
-  %vaddhn2.i = trunc <2 x i64> %vaddhn1.i to <2 x i32>
-  ret <2 x i32> %vaddhn2.i
-}
-
-define <16 x i8> @test_vaddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vaddhn_high_s16:
-; CHECK: addhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %vaddhn.i.i = add <8 x i16> %a, %b
-  %vaddhn1.i.i = lshr <8 x i16> %vaddhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
-  %vaddhn2.i.i = trunc <8 x i16> %vaddhn1.i.i to <8 x i8>
-  %0 = bitcast <8 x i8> %r to <1 x i64>
-  %1 = bitcast <8 x i8> %vaddhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
-  ret <16 x i8> %2
-}
-
-define <8 x i16> @test_vaddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vaddhn_high_s32:
-; CHECK: addhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %vaddhn.i.i = add <4 x i32> %a, %b
-  %vaddhn1.i.i = lshr <4 x i32> %vaddhn.i.i, <i32 16, i32 16, i32 16, i32 16>
-  %vaddhn2.i.i = trunc <4 x i32> %vaddhn1.i.i to <4 x i16>
-  %0 = bitcast <4 x i16> %r to <1 x i64>
-  %1 = bitcast <4 x i16> %vaddhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
-  ret <8 x i16> %2
-}
-
-define <4 x i32> @test_vaddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vaddhn_high_s64:
-; CHECK: addhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %vaddhn.i.i = add <2 x i64> %a, %b
-  %vaddhn1.i.i = lshr <2 x i64> %vaddhn.i.i, <i64 32, i64 32>
-  %vaddhn2.i.i = trunc <2 x i64> %vaddhn1.i.i to <2 x i32>
-  %0 = bitcast <2 x i32> %r to <1 x i64>
-  %1 = bitcast <2 x i32> %vaddhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
-  ret <4 x i32> %2
-}
-
-define <16 x i8> @test_vaddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vaddhn_high_u16:
-; CHECK: addhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %vaddhn.i.i = add <8 x i16> %a, %b
-  %vaddhn1.i.i = lshr <8 x i16> %vaddhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
-  %vaddhn2.i.i = trunc <8 x i16> %vaddhn1.i.i to <8 x i8>
-  %0 = bitcast <8 x i8> %r to <1 x i64>
-  %1 = bitcast <8 x i8> %vaddhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
-  ret <16 x i8> %2
-}
-
-define <8 x i16> @test_vaddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vaddhn_high_u32:
-; CHECK: addhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %vaddhn.i.i = add <4 x i32> %a, %b
-  %vaddhn1.i.i = lshr <4 x i32> %vaddhn.i.i, <i32 16, i32 16, i32 16, i32 16>
-  %vaddhn2.i.i = trunc <4 x i32> %vaddhn1.i.i to <4 x i16>
-  %0 = bitcast <4 x i16> %r to <1 x i64>
-  %1 = bitcast <4 x i16> %vaddhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
-  ret <8 x i16> %2
-}
-
-define <4 x i32> @test_vaddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vaddhn_high_u64:
-; CHECK: addhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %vaddhn.i.i = add <2 x i64> %a, %b
-  %vaddhn1.i.i = lshr <2 x i64> %vaddhn.i.i, <i64 32, i64 32>
-  %vaddhn2.i.i = trunc <2 x i64> %vaddhn1.i.i to <2 x i32>
-  %0 = bitcast <2 x i32> %r to <1 x i64>
-  %1 = bitcast <2 x i32> %vaddhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
-  ret <4 x i32> %2
-}
-
-define <8 x i8> @test_vraddhn_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vraddhn_s16:
-; CHECK: raddhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %vraddhn2.i = tail call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
-  ret <8 x i8> %vraddhn2.i
-}
-
-define <4 x i16> @test_vraddhn_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vraddhn_s32:
-; CHECK: raddhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %vraddhn2.i = tail call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
-  ret <4 x i16> %vraddhn2.i
-}
-
-define <2 x i32> @test_vraddhn_s64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vraddhn_s64:
-; CHECK: raddhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %vraddhn2.i = tail call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
-  ret <2 x i32> %vraddhn2.i
-}
-
-define <8 x i8> @test_vraddhn_u16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vraddhn_u16:
-; CHECK: raddhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %vraddhn2.i = tail call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
-  ret <8 x i8> %vraddhn2.i
-}
-
-define <4 x i16> @test_vraddhn_u32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vraddhn_u32:
-; CHECK: raddhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %vraddhn2.i = tail call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
-  ret <4 x i16> %vraddhn2.i
-}
-
-define <2 x i32> @test_vraddhn_u64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vraddhn_u64:
-; CHECK: raddhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %vraddhn2.i = tail call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
-  ret <2 x i32> %vraddhn2.i
-}
-
-define <16 x i8> @test_vraddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vraddhn_high_s16:
-; CHECK: raddhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %vraddhn2.i.i = tail call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
-  %0 = bitcast <8 x i8> %r to <1 x i64>
-  %1 = bitcast <8 x i8> %vraddhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
-  ret <16 x i8> %2
-}
-
-define <8 x i16> @test_vraddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vraddhn_high_s32:
-; CHECK: raddhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %vraddhn2.i.i = tail call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
-  %0 = bitcast <4 x i16> %r to <1 x i64>
-  %1 = bitcast <4 x i16> %vraddhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
-  ret <8 x i16> %2
-}
-
-define <4 x i32> @test_vraddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vraddhn_high_s64:
-; CHECK: raddhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %vraddhn2.i.i = tail call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
-  %0 = bitcast <2 x i32> %r to <1 x i64>
-  %1 = bitcast <2 x i32> %vraddhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
-  ret <4 x i32> %2
-}
-
-define <16 x i8> @test_vraddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vraddhn_high_u16:
-; CHECK: raddhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %vraddhn2.i.i = tail call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
-  %0 = bitcast <8 x i8> %r to <1 x i64>
-  %1 = bitcast <8 x i8> %vraddhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
-  ret <16 x i8> %2
-}
-
-define <8 x i16> @test_vraddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vraddhn_high_u32:
-; CHECK: raddhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %vraddhn2.i.i = tail call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
-  %0 = bitcast <4 x i16> %r to <1 x i64>
-  %1 = bitcast <4 x i16> %vraddhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
-  ret <8 x i16> %2
-}
-
-define <4 x i32> @test_vraddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vraddhn_high_u64:
-; CHECK: raddhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %vraddhn2.i.i = tail call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
-  %0 = bitcast <2 x i32> %r to <1 x i64>
-  %1 = bitcast <2 x i32> %vraddhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
-  ret <4 x i32> %2
-}
-
-define <8 x i8> @test_vsubhn_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vsubhn_s16:
-; CHECK: subhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %vsubhn.i = sub <8 x i16> %a, %b
-  %vsubhn1.i = lshr <8 x i16> %vsubhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
-  %vsubhn2.i = trunc <8 x i16> %vsubhn1.i to <8 x i8>
-  ret <8 x i8> %vsubhn2.i
-}
-
-define <4 x i16> @test_vsubhn_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vsubhn_s32:
-; CHECK: subhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %vsubhn.i = sub <4 x i32> %a, %b
-  %vsubhn1.i = lshr <4 x i32> %vsubhn.i, <i32 16, i32 16, i32 16, i32 16>
-  %vsubhn2.i = trunc <4 x i32> %vsubhn1.i to <4 x i16>
-  ret <4 x i16> %vsubhn2.i
-}
-
-define <2 x i32> @test_vsubhn_s64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vsubhn_s64:
-; CHECK: subhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %vsubhn.i = sub <2 x i64> %a, %b
-  %vsubhn1.i = lshr <2 x i64> %vsubhn.i, <i64 32, i64 32>
-  %vsubhn2.i = trunc <2 x i64> %vsubhn1.i to <2 x i32>
-  ret <2 x i32> %vsubhn2.i
-}
-
-define <8 x i8> @test_vsubhn_u16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vsubhn_u16:
-; CHECK: subhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %vsubhn.i = sub <8 x i16> %a, %b
-  %vsubhn1.i = lshr <8 x i16> %vsubhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
-  %vsubhn2.i = trunc <8 x i16> %vsubhn1.i to <8 x i8>
-  ret <8 x i8> %vsubhn2.i
-}
-
-define <4 x i16> @test_vsubhn_u32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vsubhn_u32:
-; CHECK: subhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %vsubhn.i = sub <4 x i32> %a, %b
-  %vsubhn1.i = lshr <4 x i32> %vsubhn.i, <i32 16, i32 16, i32 16, i32 16>
-  %vsubhn2.i = trunc <4 x i32> %vsubhn1.i to <4 x i16>
-  ret <4 x i16> %vsubhn2.i
-}
-
-define <2 x i32> @test_vsubhn_u64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vsubhn_u64:
-; CHECK: subhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %vsubhn.i = sub <2 x i64> %a, %b
-  %vsubhn1.i = lshr <2 x i64> %vsubhn.i, <i64 32, i64 32>
-  %vsubhn2.i = trunc <2 x i64> %vsubhn1.i to <2 x i32>
-  ret <2 x i32> %vsubhn2.i
-}
-
-define <16 x i8> @test_vsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vsubhn_high_s16:
-; CHECK: subhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %vsubhn.i.i = sub <8 x i16> %a, %b
-  %vsubhn1.i.i = lshr <8 x i16> %vsubhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
-  %vsubhn2.i.i = trunc <8 x i16> %vsubhn1.i.i to <8 x i8>
-  %0 = bitcast <8 x i8> %r to <1 x i64>
-  %1 = bitcast <8 x i8> %vsubhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
-  ret <16 x i8> %2
-}
-
-define <8 x i16> @test_vsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vsubhn_high_s32:
-; CHECK: subhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %vsubhn.i.i = sub <4 x i32> %a, %b
-  %vsubhn1.i.i = lshr <4 x i32> %vsubhn.i.i, <i32 16, i32 16, i32 16, i32 16>
-  %vsubhn2.i.i = trunc <4 x i32> %vsubhn1.i.i to <4 x i16>
-  %0 = bitcast <4 x i16> %r to <1 x i64>
-  %1 = bitcast <4 x i16> %vsubhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
-  ret <8 x i16> %2
-}
-
-define <4 x i32> @test_vsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vsubhn_high_s64:
-; CHECK: subhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %vsubhn.i.i = sub <2 x i64> %a, %b
-  %vsubhn1.i.i = lshr <2 x i64> %vsubhn.i.i, <i64 32, i64 32>
-  %vsubhn2.i.i = trunc <2 x i64> %vsubhn1.i.i to <2 x i32>
-  %0 = bitcast <2 x i32> %r to <1 x i64>
-  %1 = bitcast <2 x i32> %vsubhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
-  ret <4 x i32> %2
-}
-
-define <16 x i8> @test_vsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vsubhn_high_u16:
-; CHECK: subhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %vsubhn.i.i = sub <8 x i16> %a, %b
-  %vsubhn1.i.i = lshr <8 x i16> %vsubhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
-  %vsubhn2.i.i = trunc <8 x i16> %vsubhn1.i.i to <8 x i8>
-  %0 = bitcast <8 x i8> %r to <1 x i64>
-  %1 = bitcast <8 x i8> %vsubhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
-  ret <16 x i8> %2
-}
-
-define <8 x i16> @test_vsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vsubhn_high_u32:
-; CHECK: subhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %vsubhn.i.i = sub <4 x i32> %a, %b
-  %vsubhn1.i.i = lshr <4 x i32> %vsubhn.i.i, <i32 16, i32 16, i32 16, i32 16>
-  %vsubhn2.i.i = trunc <4 x i32> %vsubhn1.i.i to <4 x i16>
-  %0 = bitcast <4 x i16> %r to <1 x i64>
-  %1 = bitcast <4 x i16> %vsubhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
-  ret <8 x i16> %2
-}
-
-define <4 x i32> @test_vsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vsubhn_high_u64:
-; CHECK: subhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %vsubhn.i.i = sub <2 x i64> %a, %b
-  %vsubhn1.i.i = lshr <2 x i64> %vsubhn.i.i, <i64 32, i64 32>
-  %vsubhn2.i.i = trunc <2 x i64> %vsubhn1.i.i to <2 x i32>
-  %0 = bitcast <2 x i32> %r to <1 x i64>
-  %1 = bitcast <2 x i32> %vsubhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
-  ret <4 x i32> %2
-}
-
-define <8 x i8> @test_vrsubhn_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vrsubhn_s16:
-; CHECK: rsubhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %vrsubhn2.i = tail call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
-  ret <8 x i8> %vrsubhn2.i
-}
-
-define <4 x i16> @test_vrsubhn_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vrsubhn_s32:
-; CHECK: rsubhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %vrsubhn2.i = tail call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
-  ret <4 x i16> %vrsubhn2.i
-}
-
-define <2 x i32> @test_vrsubhn_s64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vrsubhn_s64:
-; CHECK: rsubhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %vrsubhn2.i = tail call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
-  ret <2 x i32> %vrsubhn2.i
-}
-
-define <8 x i8> @test_vrsubhn_u16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vrsubhn_u16:
-; CHECK: rsubhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %vrsubhn2.i = tail call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
-  ret <8 x i8> %vrsubhn2.i
-}
-
-define <4 x i16> @test_vrsubhn_u32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vrsubhn_u32:
-; CHECK: rsubhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %vrsubhn2.i = tail call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
-  ret <4 x i16> %vrsubhn2.i
-}
-
-define <2 x i32> @test_vrsubhn_u64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vrsubhn_u64:
-; CHECK: rsubhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %vrsubhn2.i = tail call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
-  ret <2 x i32> %vrsubhn2.i
-}
-
-define <16 x i8> @test_vrsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vrsubhn_high_s16:
-; CHECK: rsubhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %vrsubhn2.i.i = tail call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
-  %0 = bitcast <8 x i8> %r to <1 x i64>
-  %1 = bitcast <8 x i8> %vrsubhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
-  ret <16 x i8> %2
-}
-
-define <8 x i16> @test_vrsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vrsubhn_high_s32:
-; CHECK: rsubhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %vrsubhn2.i.i = tail call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
-  %0 = bitcast <4 x i16> %r to <1 x i64>
-  %1 = bitcast <4 x i16> %vrsubhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
-  ret <8 x i16> %2
-}
-
-define <4 x i32> @test_vrsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vrsubhn_high_s64:
-; CHECK: rsubhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %vrsubhn2.i.i = tail call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
-  %0 = bitcast <2 x i32> %r to <1 x i64>
-  %1 = bitcast <2 x i32> %vrsubhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
-  ret <4 x i32> %2
-}
-
-define <16 x i8> @test_vrsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vrsubhn_high_u16:
-; CHECK: rsubhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %vrsubhn2.i.i = tail call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
-  %0 = bitcast <8 x i8> %r to <1 x i64>
-  %1 = bitcast <8 x i8> %vrsubhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8>
-  ret <16 x i8> %2
-}
-
-define <8 x i16> @test_vrsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vrsubhn_high_u32:
-; CHECK: rsubhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %vrsubhn2.i.i = tail call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
-  %0 = bitcast <4 x i16> %r to <1 x i64>
-  %1 = bitcast <4 x i16> %vrsubhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16>
-  ret <8 x i16> %2
-}
-
-define <4 x i32> @test_vrsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vrsubhn_high_u64:
-; CHECK: rsubhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %vrsubhn2.i.i = tail call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
-  %0 = bitcast <2 x i32> %r to <1 x i64>
-  %1 = bitcast <2 x i32> %vrsubhn2.i.i to <1 x i64>
-  %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32>
-  ret <4 x i32> %2
-}
-
-define <8 x i16> @test_vabdl_s8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vabdl_s8:
-; CHECK: sabdl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vabd.i.i = tail call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %a, <8 x i8> %b)
-  %vmovl.i.i = zext <8 x i8> %vabd.i.i to <8 x i16>
-  ret <8 x i16> %vmovl.i.i
-}
-
-define <4 x i32> @test_vabdl_s16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vabdl_s16:
-; CHECK: sabdl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vabd2.i.i = tail call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %a, <4 x i16> %b)
-  %vmovl.i.i = zext <4 x i16> %vabd2.i.i to <4 x i32>
-  ret <4 x i32> %vmovl.i.i
-}
-
-define <2 x i64> @test_vabdl_s32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK: test_vabdl_s32:
-; CHECK: sabdl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vabd2.i.i = tail call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %a, <2 x i32> %b)
-  %vmovl.i.i = zext <2 x i32> %vabd2.i.i to <2 x i64>
-  ret <2 x i64> %vmovl.i.i
-}
-
-define <8 x i16> @test_vabdl_u8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vabdl_u8:
-; CHECK: uabdl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vabd.i.i = tail call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %a, <8 x i8> %b)
-  %vmovl.i.i = zext <8 x i8> %vabd.i.i to <8 x i16>
-  ret <8 x i16> %vmovl.i.i
-}
-
-define <4 x i32> @test_vabdl_u16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vabdl_u16:
-; CHECK: uabdl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vabd2.i.i = tail call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %a, <4 x i16> %b)
-  %vmovl.i.i = zext <4 x i16> %vabd2.i.i to <4 x i32>
-  ret <4 x i32> %vmovl.i.i
-}
-
-define <2 x i64> @test_vabdl_u32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK: test_vabdl_u32:
-; CHECK: uabdl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vabd2.i.i = tail call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %a, <2 x i32> %b)
-  %vmovl.i.i = zext <2 x i32> %vabd2.i.i to <2 x i64>
-  ret <2 x i64> %vmovl.i.i
-}
-
-define <8 x i16> @test_vabal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
-; CHECK: test_vabal_s8:
-; CHECK: sabal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vabd.i.i.i = tail call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %b, <8 x i8> %c)
-  %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16>
-  %add.i = add <8 x i16> %vmovl.i.i.i, %a
-  ret <8 x i16> %add.i
-}
-
-define <4 x i32> @test_vabal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
-; CHECK: test_vabal_s16:
-; CHECK: sabal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vabd2.i.i.i = tail call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %b, <4 x i16> %c)
-  %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32>
-  %add.i = add <4 x i32> %vmovl.i.i.i, %a
-  ret <4 x i32> %add.i
-}
-
-define <2 x i64> @test_vabal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
-; CHECK: test_vabal_s32:
-; CHECK: sabal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vabd2.i.i.i = tail call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %b, <2 x i32> %c)
-  %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64>
-  %add.i = add <2 x i64> %vmovl.i.i.i, %a
-  ret <2 x i64> %add.i
-}
-
-define <8 x i16> @test_vabal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
-; CHECK: test_vabal_u8:
-; CHECK: uabal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vabd.i.i.i = tail call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %b, <8 x i8> %c)
-  %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16>
-  %add.i = add <8 x i16> %vmovl.i.i.i, %a
-  ret <8 x i16> %add.i
-}
-
-define <4 x i32> @test_vabal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
-; CHECK: test_vabal_u16:
-; CHECK: uabal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vabd2.i.i.i = tail call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %b, <4 x i16> %c)
-  %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32>
-  %add.i = add <4 x i32> %vmovl.i.i.i, %a
-  ret <4 x i32> %add.i
-}
-
-define <2 x i64> @test_vabal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
-; CHECK: test_vabal_u32:
-; CHECK: uabal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vabd2.i.i.i = tail call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %b, <2 x i32> %c)
-  %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64>
-  %add.i = add <2 x i64> %vmovl.i.i.i, %a
-  ret <2 x i64> %add.i
-}
-
-define <8 x i16> @test_vabdl_high_s8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vabdl_high_s8:
-; CHECK: sabdl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vabd.i.i.i = tail call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
-  %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16>
-  ret <8 x i16> %vmovl.i.i.i
-}
-
-define <4 x i32> @test_vabdl_high_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vabdl_high_s16:
-; CHECK: sabdl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vabd2.i.i.i = tail call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
-  %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32>
-  ret <4 x i32> %vmovl.i.i.i
-}
-
-define <2 x i64> @test_vabdl_high_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vabdl_high_s32:
-; CHECK: sabdl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vabd2.i.i.i = tail call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
-  %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64>
-  ret <2 x i64> %vmovl.i.i.i
-}
-
-define <8 x i16> @test_vabdl_high_u8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vabdl_high_u8:
-; CHECK: uabdl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vabd.i.i.i = tail call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
-  %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16>
-  ret <8 x i16> %vmovl.i.i.i
-}
-
-define <4 x i32> @test_vabdl_high_u16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vabdl_high_u16:
-; CHECK: uabdl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vabd2.i.i.i = tail call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
-  %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32>
-  ret <4 x i32> %vmovl.i.i.i
-}
-
-define <2 x i64> @test_vabdl_high_u32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vabdl_high_u32:
-; CHECK: uabdl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vabd2.i.i.i = tail call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
-  %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64>
-  ret <2 x i64> %vmovl.i.i.i
-}
-
-define <8 x i16> @test_vabal_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
-; CHECK: test_vabal_high_s8:
-; CHECK: sabal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vabd.i.i.i.i = tail call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
-  %vmovl.i.i.i.i = zext <8 x i8> %vabd.i.i.i.i to <8 x i16>
-  %add.i.i = add <8 x i16> %vmovl.i.i.i.i, %a
-  ret <8 x i16> %add.i.i
-}
-
-define <4 x i32> @test_vabal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
-; CHECK: test_vabal_high_s16:
-; CHECK: sabal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vabd2.i.i.i.i = tail call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
-  %vmovl.i.i.i.i = zext <4 x i16> %vabd2.i.i.i.i to <4 x i32>
-  %add.i.i = add <4 x i32> %vmovl.i.i.i.i, %a
-  ret <4 x i32> %add.i.i
-}
-
-define <2 x i64> @test_vabal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
-; CHECK: test_vabal_high_s32:
-; CHECK: sabal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vabd2.i.i.i.i = tail call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
-  %vmovl.i.i.i.i = zext <2 x i32> %vabd2.i.i.i.i to <2 x i64>
-  %add.i.i = add <2 x i64> %vmovl.i.i.i.i, %a
-  ret <2 x i64> %add.i.i
-}
-
-define <8 x i16> @test_vabal_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
-; CHECK: test_vabal_high_u8:
-; CHECK: uabal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vabd.i.i.i.i = tail call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
-  %vmovl.i.i.i.i = zext <8 x i8> %vabd.i.i.i.i to <8 x i16>
-  %add.i.i = add <8 x i16> %vmovl.i.i.i.i, %a
-  ret <8 x i16> %add.i.i
-}
-
-define <4 x i32> @test_vabal_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
-; CHECK: test_vabal_high_u16:
-; CHECK: uabal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vabd2.i.i.i.i = tail call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
-  %vmovl.i.i.i.i = zext <4 x i16> %vabd2.i.i.i.i to <4 x i32>
-  %add.i.i = add <4 x i32> %vmovl.i.i.i.i, %a
-  ret <4 x i32> %add.i.i
-}
-
-define <2 x i64> @test_vabal_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
-; CHECK: test_vabal_high_u32:
-; CHECK: uabal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vabd2.i.i.i.i = tail call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
-  %vmovl.i.i.i.i = zext <2 x i32> %vabd2.i.i.i.i to <2 x i64>
-  %add.i.i = add <2 x i64> %vmovl.i.i.i.i, %a
-  ret <2 x i64> %add.i.i
-}
-
-define <8 x i16> @test_vmull_s8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vmull_s8:
-; CHECK: smull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vmull.i = tail call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %a, <8 x i8> %b)
-  ret <8 x i16> %vmull.i
-}
-
-define <4 x i32> @test_vmull_s16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vmull_s16:
-; CHECK: smull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %b)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_s32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK: test_vmull_s32:
-; CHECK: smull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %b)
-  ret <2 x i64> %vmull2.i
-}
-
-define <8 x i16> @test_vmull_u8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vmull_u8:
-; CHECK: umull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vmull.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %a, <8 x i8> %b)
-  ret <8 x i16> %vmull.i
-}
-
-define <4 x i32> @test_vmull_u16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vmull_u16:
-; CHECK: umull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %b)
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @test_vmull_u32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK: test_vmull_u32:
-; CHECK: umull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %b)
-  ret <2 x i64> %vmull2.i
-}
-
-define <8 x i16> @test_vmull_high_s8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vmull_high_s8:
-; CHECK: smull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
-  ret <8 x i16> %vmull.i.i
-}
-
-define <4 x i32> @test_vmull_high_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vmull_high_s16:
-; CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
-  ret <4 x i32> %vmull2.i.i
-}
-
-define <2 x i64> @test_vmull_high_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vmull_high_s32:
-; CHECK: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
-  ret <2 x i64> %vmull2.i.i
-}
-
-define <8 x i16> @test_vmull_high_u8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vmull_high_u8:
-; CHECK: umull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
-  ret <8 x i16> %vmull.i.i
-}
-
-define <4 x i32> @test_vmull_high_u16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vmull_high_u16:
-; CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
-  ret <4 x i32> %vmull2.i.i
-}
-
-define <2 x i64> @test_vmull_high_u32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vmull_high_u32:
-; CHECK: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
-  ret <2 x i64> %vmull2.i.i
-}
-
-define <8 x i16> @test_vmlal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
-; CHECK: test_vmlal_s8:
-; CHECK: smlal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %b, <8 x i8> %c)
-  %add.i = add <8 x i16> %vmull.i.i, %a
-  ret <8 x i16> %add.i
-}
-
-define <4 x i32> @test_vmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
-; CHECK: test_vmlal_s16:
-; CHECK: smlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %c)
-  %add.i = add <4 x i32> %vmull2.i.i, %a
-  ret <4 x i32> %add.i
-}
-
-define <2 x i64> @test_vmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
-; CHECK: test_vmlal_s32:
-; CHECK: smlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %c)
-  %add.i = add <2 x i64> %vmull2.i.i, %a
-  ret <2 x i64> %add.i
-}
-
-define <8 x i16> @test_vmlal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
-; CHECK: test_vmlal_u8:
-; CHECK: umlal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %b, <8 x i8> %c)
-  %add.i = add <8 x i16> %vmull.i.i, %a
-  ret <8 x i16> %add.i
-}
-
-define <4 x i32> @test_vmlal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
-; CHECK: test_vmlal_u16:
-; CHECK: umlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %c)
-  %add.i = add <4 x i32> %vmull2.i.i, %a
-  ret <4 x i32> %add.i
-}
-
-define <2 x i64> @test_vmlal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
-; CHECK: test_vmlal_u32:
-; CHECK: umlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %c)
-  %add.i = add <2 x i64> %vmull2.i.i, %a
-  ret <2 x i64> %add.i
-}
-
-define <8 x i16> @test_vmlal_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
-; CHECK: test_vmlal_high_s8:
-; CHECK: smlal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vmull.i.i.i = tail call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
-  %add.i.i = add <8 x i16> %vmull.i.i.i, %a
-  ret <8 x i16> %add.i.i
-}
-
-define <4 x i32> @test_vmlal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
-; CHECK: test_vmlal_high_s16:
-; CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
-  %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
-  ret <4 x i32> %add.i.i
-}
-
-define <2 x i64> @test_vmlal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
-; CHECK: test_vmlal_high_s32:
-; CHECK: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
-  %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
-  ret <2 x i64> %add.i.i
-}
-
-define <8 x i16> @test_vmlal_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
-; CHECK: test_vmlal_high_u8:
-; CHECK: umlal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vmull.i.i.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
-  %add.i.i = add <8 x i16> %vmull.i.i.i, %a
-  ret <8 x i16> %add.i.i
-}
-
-define <4 x i32> @test_vmlal_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
-; CHECK: test_vmlal_high_u16:
-; CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
-  %add.i.i = add <4 x i32> %vmull2.i.i.i, %a
-  ret <4 x i32> %add.i.i
-}
-
-define <2 x i64> @test_vmlal_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
-; CHECK: test_vmlal_high_u32:
-; CHECK: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
-  %add.i.i = add <2 x i64> %vmull2.i.i.i, %a
-  ret <2 x i64> %add.i.i
-}
-
-define <8 x i16> @test_vmlsl_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
-; CHECK: test_vmlsl_s8:
-; CHECK: smlsl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %b, <8 x i8> %c)
-  %sub.i = sub <8 x i16> %a, %vmull.i.i
-  ret <8 x i16> %sub.i
-}
-
-define <4 x i32> @test_vmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
-; CHECK: test_vmlsl_s16:
-; CHECK: smlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %c)
-  %sub.i = sub <4 x i32> %a, %vmull2.i.i
-  ret <4 x i32> %sub.i
-}
-
-define <2 x i64> @test_vmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
-; CHECK: test_vmlsl_s32:
-; CHECK: smlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %c)
-  %sub.i = sub <2 x i64> %a, %vmull2.i.i
-  ret <2 x i64> %sub.i
-}
-
-define <8 x i16> @test_vmlsl_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) {
-; CHECK: test_vmlsl_u8:
-; CHECK: umlsl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %b, <8 x i8> %c)
-  %sub.i = sub <8 x i16> %a, %vmull.i.i
-  ret <8 x i16> %sub.i
-}
-
-define <4 x i32> @test_vmlsl_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
-; CHECK: test_vmlsl_u16:
-; CHECK: umlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %c)
-  %sub.i = sub <4 x i32> %a, %vmull2.i.i
-  ret <4 x i32> %sub.i
-}
-
-define <2 x i64> @test_vmlsl_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
-; CHECK: test_vmlsl_u32:
-; CHECK: umlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %c)
-  %sub.i = sub <2 x i64> %a, %vmull2.i.i
-  ret <2 x i64> %sub.i
-}
-
-define <8 x i16> @test_vmlsl_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
-; CHECK: test_vmlsl_high_s8:
-; CHECK: smlsl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vmull.i.i.i = tail call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
-  %sub.i.i = sub <8 x i16> %a, %vmull.i.i.i
-  ret <8 x i16> %sub.i.i
-}
-
-define <4 x i32> @test_vmlsl_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
-; CHECK: test_vmlsl_high_s16:
-; CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
-  %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
-  ret <4 x i32> %sub.i.i
-}
-
-define <2 x i64> @test_vmlsl_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
-; CHECK: test_vmlsl_high_s32:
-; CHECK: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
-  %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
-  ret <2 x i64> %sub.i.i
-}
-
-define <8 x i16> @test_vmlsl_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) {
-; CHECK: test_vmlsl_high_u8:
-; CHECK: umlsl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vmull.i.i.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
-  %sub.i.i = sub <8 x i16> %a, %vmull.i.i.i
-  ret <8 x i16> %sub.i.i
-}
-
-define <4 x i32> @test_vmlsl_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
-; CHECK: test_vmlsl_high_u16:
-; CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
-  %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i
-  ret <4 x i32> %sub.i.i
-}
-
-define <2 x i64> @test_vmlsl_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
-; CHECK: test_vmlsl_high_u32:
-; CHECK: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
-  %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i
-  ret <2 x i64> %sub.i.i
-}
-
-define <4 x i32> @test_vqdmull_s16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vqdmull_s16:
-; CHECK: sqdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %b)
-  ret <4 x i32> %vqdmull2.i
-}
-
-define <2 x i64> @test_vqdmull_s32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK: test_vqdmull_s32:
-; CHECK: sqdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %b)
-  ret <2 x i64> %vqdmull2.i
-}
-
-define <4 x i32> @test_vqdmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
-; CHECK: test_vqdmlal_s16:
-; CHECK: sqdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vqdmlal2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
-  %vqdmlal4.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i)
-  ret <4 x i32> %vqdmlal4.i
-}
-
-define <2 x i64> @test_vqdmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
-; CHECK: test_vqdmlal_s32:
-; CHECK: sqdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vqdmlal2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
-  %vqdmlal4.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i)
-  ret <2 x i64> %vqdmlal4.i
-}
-
-define <4 x i32> @test_vqdmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) {
-; CHECK: test_vqdmlsl_s16:
-; CHECK: sqdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-entry:
-  %vqdmlsl2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
-  %vqdmlsl4.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i)
-  ret <4 x i32> %vqdmlsl4.i
-}
-
-define <2 x i64> @test_vqdmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
-; CHECK: test_vqdmlsl_s32:
-; CHECK: sqdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vqdmlsl2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
-  %vqdmlsl4.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i)
-  ret <2 x i64> %vqdmlsl4.i
-}
-
-define <4 x i32> @test_vqdmull_high_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vqdmull_high_s16:
-; CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vqdmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
-  ret <4 x i32> %vqdmull2.i.i
-}
-
-define <2 x i64> @test_vqdmull_high_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vqdmull_high_s32:
-; CHECK: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vqdmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
-  ret <2 x i64> %vqdmull2.i.i
-}
-
-define <4 x i32> @test_vqdmlal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
-; CHECK: test_vqdmlal_high_s16:
-; CHECK: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vqdmlal2.i.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
-  %vqdmlal4.i.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i.i)
-  ret <4 x i32> %vqdmlal4.i.i
-}
-
-define <2 x i64> @test_vqdmlal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
-; CHECK: test_vqdmlal_high_s32:
-; CHECK: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vqdmlal2.i.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
-  %vqdmlal4.i.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i.i)
-  ret <2 x i64> %vqdmlal4.i.i
-}
-
-define <4 x i32> @test_vqdmlsl_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) {
-; CHECK: test_vqdmlsl_high_s16:
-; CHECK: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-entry:
-  %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vqdmlsl2.i.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i)
-  %vqdmlsl4.i.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i.i)
-  ret <4 x i32> %vqdmlsl4.i.i
-}
-
-define <2 x i64> @test_vqdmlsl_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) {
-; CHECK: test_vqdmlsl_high_s32:
-; CHECK: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %vqdmlsl2.i.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i)
-  %vqdmlsl4.i.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i.i)
-  ret <2 x i64> %vqdmlsl4.i.i
-}
-
-define <8 x i16> @test_vmull_p8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vmull_p8:
-; CHECK: pmull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vmull.i = tail call <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8> %a, <8 x i8> %b)
-  ret <8 x i16> %vmull.i
-}
-
-define <8 x i16> @test_vmull_high_p8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vmull_high_p8:
-; CHECK: pmull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i)
-  ret <8 x i16> %vmull.i.i
-}
-
-define i128 @test_vmull_p64(i64 %a, i64 %b) #4 {
-; CHECK: test_vmull_p64
-; CHECK: pmull {{v[0-9]+}}.1q, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d
-entry:
-  %vmull.i = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vmull1.i = insertelement <1 x i64> undef, i64 %b, i32 0
-  %vmull2.i = tail call <16 x i8> @llvm.aarch64.neon.vmull.p64(<1 x i64> %vmull.i, <1 x i64> %vmull1.i) #1
-  %vmull3.i = bitcast <16 x i8> %vmull2.i to i128
-  ret i128 %vmull3.i
-}
-
-define i128 @test_vmull_high_p64(<2 x i64> %a, <2 x i64> %b) #4 {
-; CHECK: test_vmull_high_p64
-; CHECK: pmull2 {{v[0-9]+}}.1q, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-entry:
-  %0 = extractelement <2 x i64> %a, i32 1
-  %1 = extractelement <2 x i64> %b, i32 1
-  %vmull.i.i = insertelement <1 x i64> undef, i64 %0, i32 0
-  %vmull1.i.i = insertelement <1 x i64> undef, i64 %1, i32 0
-  %vmull2.i.i = tail call <16 x i8> @llvm.aarch64.neon.vmull.p64(<1 x i64> %vmull.i.i, <1 x i64> %vmull1.i.i) #1
-  %vmull3.i.i = bitcast <16 x i8> %vmull2.i.i to i128
-  ret i128 %vmull3.i.i
-}
-
-declare <16 x i8> @llvm.aarch64.neon.vmull.p64(<1 x i64>, <1 x i64>) #5
-
-
diff --git a/test/CodeGen/AArch64/neon-aba-abd.ll b/test/CodeGen/AArch64/neon-aba-abd.ll
deleted file mode 100644
index 5400984..0000000
--- a/test/CodeGen/AArch64/neon-aba-abd.ll
+++ /dev/null
@@ -1,236 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-
-declare <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8>, <8 x i8>)
-declare <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8>, <8 x i8>)
-
-define <8 x i8> @test_uabd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK: test_uabd_v8i8:
-  %abd = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: uabd v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %abd
-}
-
-define <8 x i8> @test_uaba_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK: test_uaba_v8i8:
-  %abd = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-  %aba = add <8 x i8> %lhs, %abd
-; CHECK: uaba v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %aba
-}
-
-define <8 x i8> @test_sabd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK: test_sabd_v8i8:
-  %abd = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: sabd v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %abd
-}
-
-define <8 x i8> @test_saba_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK: test_saba_v8i8:
-  %abd = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-  %aba = add <8 x i8> %lhs, %abd
-; CHECK: saba v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %aba
-}
-
-declare <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8>, <16 x i8>)
-declare <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8>, <16 x i8>)
-
-define <16 x i8> @test_uabd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_uabd_v16i8:
-  %abd = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: uabd v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %abd
-}
-
-define <16 x i8> @test_uaba_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_uaba_v16i8:
-  %abd = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-  %aba = add <16 x i8> %lhs, %abd
-; CHECK: uaba v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %aba
-}
-
-define <16 x i8> @test_sabd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_sabd_v16i8:
-  %abd = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: sabd v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %abd
-}
-
-define <16 x i8> @test_saba_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_saba_v16i8:
-  %abd = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-  %aba = add <16 x i8> %lhs, %abd
-; CHECK: saba v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %aba
-}
-
-declare <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16>, <4 x i16>)
-declare <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16>, <4 x i16>)
-
-define <4 x i16> @test_uabd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_uabd_v4i16:
-  %abd = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: uabd v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %abd
-}
-
-define <4 x i16> @test_uaba_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_uaba_v4i16:
-  %abd = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-  %aba = add <4 x i16> %lhs, %abd
-; CHECK: uaba v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %aba
-}
-
-define <4 x i16> @test_sabd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_sabd_v4i16:
-  %abd = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: sabd v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %abd
-}
-
-define <4 x i16> @test_saba_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_saba_v4i16:
-  %abd = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-  %aba = add <4 x i16> %lhs, %abd
-; CHECK: saba v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %aba
-}
-
-declare <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16>, <8 x i16>)
-declare <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16>, <8 x i16>)
-
-define <8 x i16> @test_uabd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_uabd_v8i16:
-  %abd = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: uabd v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %abd
-}
-
-define <8 x i16> @test_uaba_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_uaba_v8i16:
-  %abd = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-  %aba = add <8 x i16> %lhs, %abd
-; CHECK: uaba v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %aba
-}
-
-define <8 x i16> @test_sabd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_sabd_v8i16:
-  %abd = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: sabd v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %abd
-}
-
-define <8 x i16> @test_saba_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_saba_v8i16:
-  %abd = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-  %aba = add <8 x i16> %lhs, %abd
-; CHECK: saba v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %aba
-}
-
-declare <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32>, <2 x i32>)
-declare <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32>, <2 x i32>)
-
-define <2 x i32> @test_uabd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_uabd_v2i32:
-  %abd = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: uabd v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %abd
-}
-
-define <2 x i32> @test_uaba_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_uaba_v2i32:
-  %abd = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-  %aba = add <2 x i32> %lhs, %abd
-; CHECK: uaba v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %aba
-}
-
-define <2 x i32> @test_sabd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_sabd_v2i32:
-  %abd = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: sabd v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %abd
-}
-
-define <2 x i32> @test_sabd_v2i32_const() {
-; CHECK: test_sabd_v2i32_const:
-; CHECK: movi     d1, #0xffffffff0000
-; CHECK-NEXT: sabd v0.2s, v0.2s, v1.2s
-  %1 = tail call <2 x i32> @llvm.arm.neon.vabds.v2i32(
-    <2 x i32> <i32 -2147483648, i32 2147450880>,
-    <2 x i32> <i32 -65536, i32 65535>)
-  ret <2 x i32> %1
-}
-
-define <2 x i32> @test_saba_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_saba_v2i32:
-  %abd = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-  %aba = add <2 x i32> %lhs, %abd
-; CHECK: saba v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %aba
-}
-
-declare <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32>, <4 x i32>)
-
-define <4 x i32> @test_uabd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_uabd_v4i32:
-  %abd = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: uabd v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %abd
-}
-
-define <4 x i32> @test_uaba_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_uaba_v4i32:
-  %abd = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-  %aba = add <4 x i32> %lhs, %abd
-; CHECK: uaba v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %aba
-}
-
-define <4 x i32> @test_sabd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_sabd_v4i32:
-  %abd = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: sabd v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %abd
-}
-
-define <4 x i32> @test_saba_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_saba_v4i32:
-  %abd = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-  %aba = add <4 x i32> %lhs, %abd
-; CHECK: saba v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %aba
-}
-
-declare <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float>, <2 x float>)
-
-define <2 x float> @test_fabd_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
-; CHECK: test_fabd_v2f32:
-  %abd = call <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float> %lhs, <2 x float> %rhs)
-; CHECK: fabd v0.2s, v0.2s, v1.2s
-  ret <2 x float> %abd
-}
-
-declare <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float>, <4 x float>)
-
-define <4 x float> @test_fabd_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
-; CHECK: test_fabd_v4f32:
-  %abd = call <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float> %lhs, <4 x float> %rhs)
-; CHECK: fabd v0.4s, v0.4s, v1.4s
-  ret <4 x float> %abd
-}
-
-declare <2 x double> @llvm.arm.neon.vabds.v2f64(<2 x double>, <2 x double>)
-
-define <2 x double> @test_fabd_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
-; CHECK: test_fabd_v2f64:
-  %abd = call <2 x double> @llvm.arm.neon.vabds.v2f64(<2 x double> %lhs, <2 x double> %rhs)
-; CHECK: fabd v0.2d, v0.2d, v1.2d
-  ret <2 x double> %abd
-}
diff --git a/test/CodeGen/AArch64/neon-across.ll b/test/CodeGen/AArch64/neon-across.ll
deleted file mode 100644
index 6d30c95..0000000
--- a/test/CodeGen/AArch64/neon-across.ll
+++ /dev/null
@@ -1,472 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
-
-declare float @llvm.aarch64.neon.vminnmv(<4 x float>)
-
-declare float @llvm.aarch64.neon.vmaxnmv(<4 x float>)
-
-declare float @llvm.aarch64.neon.vminv(<4 x float>)
-
-declare float @llvm.aarch64.neon.vmaxv(<4 x float>)
-
-declare <1 x i32> @llvm.aarch64.neon.vaddv.v1i32.v4i32(<4 x i32>)
-
-declare <1 x i16> @llvm.aarch64.neon.vaddv.v1i16.v8i16(<8 x i16>)
-
-declare <1 x i8> @llvm.aarch64.neon.vaddv.v1i8.v16i8(<16 x i8>)
-
-declare <1 x i16> @llvm.aarch64.neon.vaddv.v1i16.v4i16(<4 x i16>)
-
-declare <1 x i8> @llvm.aarch64.neon.vaddv.v1i8.v8i8(<8 x i8>)
-
-declare <1 x i32> @llvm.aarch64.neon.uminv.v1i32.v4i32(<4 x i32>)
-
-declare <1 x i16> @llvm.aarch64.neon.uminv.v1i16.v8i16(<8 x i16>)
-
-declare <1 x i8> @llvm.aarch64.neon.uminv.v1i8.v16i8(<16 x i8>)
-
-declare <1 x i32> @llvm.aarch64.neon.sminv.v1i32.v4i32(<4 x i32>)
-
-declare <1 x i16> @llvm.aarch64.neon.sminv.v1i16.v8i16(<8 x i16>)
-
-declare <1 x i8> @llvm.aarch64.neon.sminv.v1i8.v16i8(<16 x i8>)
-
-declare <1 x i16> @llvm.aarch64.neon.uminv.v1i16.v4i16(<4 x i16>)
-
-declare <1 x i8> @llvm.aarch64.neon.uminv.v1i8.v8i8(<8 x i8>)
-
-declare <1 x i16> @llvm.aarch64.neon.sminv.v1i16.v4i16(<4 x i16>)
-
-declare <1 x i8> @llvm.aarch64.neon.sminv.v1i8.v8i8(<8 x i8>)
-
-declare <1 x i32> @llvm.aarch64.neon.umaxv.v1i32.v4i32(<4 x i32>)
-
-declare <1 x i16> @llvm.aarch64.neon.umaxv.v1i16.v8i16(<8 x i16>)
-
-declare <1 x i8> @llvm.aarch64.neon.umaxv.v1i8.v16i8(<16 x i8>)
-
-declare <1 x i32> @llvm.aarch64.neon.smaxv.v1i32.v4i32(<4 x i32>)
-
-declare <1 x i16> @llvm.aarch64.neon.smaxv.v1i16.v8i16(<8 x i16>)
-
-declare <1 x i8> @llvm.aarch64.neon.smaxv.v1i8.v16i8(<16 x i8>)
-
-declare <1 x i16> @llvm.aarch64.neon.umaxv.v1i16.v4i16(<4 x i16>)
-
-declare <1 x i8> @llvm.aarch64.neon.umaxv.v1i8.v8i8(<8 x i8>)
-
-declare <1 x i16> @llvm.aarch64.neon.smaxv.v1i16.v4i16(<4 x i16>)
-
-declare <1 x i8> @llvm.aarch64.neon.smaxv.v1i8.v8i8(<8 x i8>)
-
-declare <1 x i64> @llvm.aarch64.neon.uaddlv.v1i64.v4i32(<4 x i32>)
-
-declare <1 x i32> @llvm.aarch64.neon.uaddlv.v1i32.v8i16(<8 x i16>)
-
-declare <1 x i16> @llvm.aarch64.neon.uaddlv.v1i16.v16i8(<16 x i8>)
-
-declare <1 x i64> @llvm.aarch64.neon.saddlv.v1i64.v4i32(<4 x i32>)
-
-declare <1 x i32> @llvm.aarch64.neon.saddlv.v1i32.v8i16(<8 x i16>)
-
-declare <1 x i16> @llvm.aarch64.neon.saddlv.v1i16.v16i8(<16 x i8>)
-
-declare <1 x i32> @llvm.aarch64.neon.uaddlv.v1i32.v4i16(<4 x i16>)
-
-declare <1 x i16> @llvm.aarch64.neon.uaddlv.v1i16.v8i8(<8 x i8>)
-
-declare <1 x i32> @llvm.aarch64.neon.saddlv.v1i32.v4i16(<4 x i16>)
-
-declare <1 x i16> @llvm.aarch64.neon.saddlv.v1i16.v8i8(<8 x i8>)
-
-define i16 @test_vaddlv_s8(<8 x i8> %a) {
-; CHECK: test_vaddlv_s8:
-; CHECK: saddlv h{{[0-9]+}}, {{v[0-9]+}}.8b
-entry:
-  %saddlv.i = tail call <1 x i16> @llvm.aarch64.neon.saddlv.v1i16.v8i8(<8 x i8> %a)
-  %0 = extractelement <1 x i16> %saddlv.i, i32 0
-  ret i16 %0
-}
-
-define i32 @test_vaddlv_s16(<4 x i16> %a) {
-; CHECK: test_vaddlv_s16:
-; CHECK: saddlv s{{[0-9]+}}, {{v[0-9]+}}.4h
-entry:
-  %saddlv.i = tail call <1 x i32> @llvm.aarch64.neon.saddlv.v1i32.v4i16(<4 x i16> %a)
-  %0 = extractelement <1 x i32> %saddlv.i, i32 0
-  ret i32 %0
-}
-
-define i16 @test_vaddlv_u8(<8 x i8> %a) {
-; CHECK: test_vaddlv_u8:
-; CHECK: uaddlv h{{[0-9]+}}, {{v[0-9]+}}.8b
-entry:
-  %uaddlv.i = tail call <1 x i16> @llvm.aarch64.neon.uaddlv.v1i16.v8i8(<8 x i8> %a)
-  %0 = extractelement <1 x i16> %uaddlv.i, i32 0
-  ret i16 %0
-}
-
-define i32 @test_vaddlv_u16(<4 x i16> %a) {
-; CHECK: test_vaddlv_u16:
-; CHECK: uaddlv s{{[0-9]+}}, {{v[0-9]+}}.4h
-entry:
-  %uaddlv.i = tail call <1 x i32> @llvm.aarch64.neon.uaddlv.v1i32.v4i16(<4 x i16> %a)
-  %0 = extractelement <1 x i32> %uaddlv.i, i32 0
-  ret i32 %0
-}
-
-define i16 @test_vaddlvq_s8(<16 x i8> %a) {
-; CHECK: test_vaddlvq_s8:
-; CHECK: saddlv h{{[0-9]+}}, {{v[0-9]+}}.16b
-entry:
-  %saddlv.i = tail call <1 x i16> @llvm.aarch64.neon.saddlv.v1i16.v16i8(<16 x i8> %a)
-  %0 = extractelement <1 x i16> %saddlv.i, i32 0
-  ret i16 %0
-}
-
-define i32 @test_vaddlvq_s16(<8 x i16> %a) {
-; CHECK: test_vaddlvq_s16:
-; CHECK: saddlv s{{[0-9]+}}, {{v[0-9]+}}.8h
-entry:
-  %saddlv.i = tail call <1 x i32> @llvm.aarch64.neon.saddlv.v1i32.v8i16(<8 x i16> %a)
-  %0 = extractelement <1 x i32> %saddlv.i, i32 0
-  ret i32 %0
-}
-
-define i64 @test_vaddlvq_s32(<4 x i32> %a) {
-; CHECK: test_vaddlvq_s32:
-; CHECK: saddlv d{{[0-9]+}}, {{v[0-9]+}}.4s
-entry:
-  %saddlv.i = tail call <1 x i64> @llvm.aarch64.neon.saddlv.v1i64.v4i32(<4 x i32> %a)
-  %0 = extractelement <1 x i64> %saddlv.i, i32 0
-  ret i64 %0
-}
-
-define i16 @test_vaddlvq_u8(<16 x i8> %a) {
-; CHECK: test_vaddlvq_u8:
-; CHECK: uaddlv h{{[0-9]+}}, {{v[0-9]+}}.16b
-entry:
-  %uaddlv.i = tail call <1 x i16> @llvm.aarch64.neon.uaddlv.v1i16.v16i8(<16 x i8> %a)
-  %0 = extractelement <1 x i16> %uaddlv.i, i32 0
-  ret i16 %0
-}
-
-define i32 @test_vaddlvq_u16(<8 x i16> %a) {
-; CHECK: test_vaddlvq_u16:
-; CHECK: uaddlv s{{[0-9]+}}, {{v[0-9]+}}.8h
-entry:
-  %uaddlv.i = tail call <1 x i32> @llvm.aarch64.neon.uaddlv.v1i32.v8i16(<8 x i16> %a)
-  %0 = extractelement <1 x i32> %uaddlv.i, i32 0
-  ret i32 %0
-}
-
-define i64 @test_vaddlvq_u32(<4 x i32> %a) {
-; CHECK: test_vaddlvq_u32:
-; CHECK: uaddlv d{{[0-9]+}}, {{v[0-9]+}}.4s
-entry:
-  %uaddlv.i = tail call <1 x i64> @llvm.aarch64.neon.uaddlv.v1i64.v4i32(<4 x i32> %a)
-  %0 = extractelement <1 x i64> %uaddlv.i, i32 0
-  ret i64 %0
-}
-
-define i8 @test_vmaxv_s8(<8 x i8> %a) {
-; CHECK: test_vmaxv_s8:
-; CHECK: smaxv b{{[0-9]+}}, {{v[0-9]+}}.8b
-entry:
-  %smaxv.i = tail call <1 x i8> @llvm.aarch64.neon.smaxv.v1i8.v8i8(<8 x i8> %a)
-  %0 = extractelement <1 x i8> %smaxv.i, i32 0
-  ret i8 %0
-}
-
-define i16 @test_vmaxv_s16(<4 x i16> %a) {
-; CHECK: test_vmaxv_s16:
-; CHECK: smaxv h{{[0-9]+}}, {{v[0-9]+}}.4h
-entry:
-  %smaxv.i = tail call <1 x i16> @llvm.aarch64.neon.smaxv.v1i16.v4i16(<4 x i16> %a)
-  %0 = extractelement <1 x i16> %smaxv.i, i32 0
-  ret i16 %0
-}
-
-define i8 @test_vmaxv_u8(<8 x i8> %a) {
-; CHECK: test_vmaxv_u8:
-; CHECK: umaxv b{{[0-9]+}}, {{v[0-9]+}}.8b
-entry:
-  %umaxv.i = tail call <1 x i8> @llvm.aarch64.neon.umaxv.v1i8.v8i8(<8 x i8> %a)
-  %0 = extractelement <1 x i8> %umaxv.i, i32 0
-  ret i8 %0
-}
-
-define i16 @test_vmaxv_u16(<4 x i16> %a) {
-; CHECK: test_vmaxv_u16:
-; CHECK: umaxv h{{[0-9]+}}, {{v[0-9]+}}.4h
-entry:
-  %umaxv.i = tail call <1 x i16> @llvm.aarch64.neon.umaxv.v1i16.v4i16(<4 x i16> %a)
-  %0 = extractelement <1 x i16> %umaxv.i, i32 0
-  ret i16 %0
-}
-
-define i8 @test_vmaxvq_s8(<16 x i8> %a) {
-; CHECK: test_vmaxvq_s8:
-; CHECK: smaxv b{{[0-9]+}}, {{v[0-9]+}}.16b
-entry:
-  %smaxv.i = tail call <1 x i8> @llvm.aarch64.neon.smaxv.v1i8.v16i8(<16 x i8> %a)
-  %0 = extractelement <1 x i8> %smaxv.i, i32 0
-  ret i8 %0
-}
-
-define i16 @test_vmaxvq_s16(<8 x i16> %a) {
-; CHECK: test_vmaxvq_s16:
-; CHECK: smaxv h{{[0-9]+}}, {{v[0-9]+}}.8h
-entry:
-  %smaxv.i = tail call <1 x i16> @llvm.aarch64.neon.smaxv.v1i16.v8i16(<8 x i16> %a)
-  %0 = extractelement <1 x i16> %smaxv.i, i32 0
-  ret i16 %0
-}
-
-define i32 @test_vmaxvq_s32(<4 x i32> %a) {
-; CHECK: test_vmaxvq_s32:
-; CHECK: smaxv s{{[0-9]+}}, {{v[0-9]+}}.4s
-entry:
-  %smaxv.i = tail call <1 x i32> @llvm.aarch64.neon.smaxv.v1i32.v4i32(<4 x i32> %a)
-  %0 = extractelement <1 x i32> %smaxv.i, i32 0
-  ret i32 %0
-}
-
-define i8 @test_vmaxvq_u8(<16 x i8> %a) {
-; CHECK: test_vmaxvq_u8:
-; CHECK: umaxv b{{[0-9]+}}, {{v[0-9]+}}.16b
-entry:
-  %umaxv.i = tail call <1 x i8> @llvm.aarch64.neon.umaxv.v1i8.v16i8(<16 x i8> %a)
-  %0 = extractelement <1 x i8> %umaxv.i, i32 0
-  ret i8 %0
-}
-
-define i16 @test_vmaxvq_u16(<8 x i16> %a) {
-; CHECK: test_vmaxvq_u16:
-; CHECK: umaxv h{{[0-9]+}}, {{v[0-9]+}}.8h
-entry:
-  %umaxv.i = tail call <1 x i16> @llvm.aarch64.neon.umaxv.v1i16.v8i16(<8 x i16> %a)
-  %0 = extractelement <1 x i16> %umaxv.i, i32 0
-  ret i16 %0
-}
-
-define i32 @test_vmaxvq_u32(<4 x i32> %a) {
-; CHECK: test_vmaxvq_u32:
-; CHECK: umaxv s{{[0-9]+}}, {{v[0-9]+}}.4s
-entry:
-  %umaxv.i = tail call <1 x i32> @llvm.aarch64.neon.umaxv.v1i32.v4i32(<4 x i32> %a)
-  %0 = extractelement <1 x i32> %umaxv.i, i32 0
-  ret i32 %0
-}
-
-define i8 @test_vminv_s8(<8 x i8> %a) {
-; CHECK: test_vminv_s8:
-; CHECK: sminv b{{[0-9]+}}, {{v[0-9]+}}.8b
-entry:
-  %sminv.i = tail call <1 x i8> @llvm.aarch64.neon.sminv.v1i8.v8i8(<8 x i8> %a)
-  %0 = extractelement <1 x i8> %sminv.i, i32 0
-  ret i8 %0
-}
-
-define i16 @test_vminv_s16(<4 x i16> %a) {
-; CHECK: test_vminv_s16:
-; CHECK: sminv h{{[0-9]+}}, {{v[0-9]+}}.4h
-entry:
-  %sminv.i = tail call <1 x i16> @llvm.aarch64.neon.sminv.v1i16.v4i16(<4 x i16> %a)
-  %0 = extractelement <1 x i16> %sminv.i, i32 0
-  ret i16 %0
-}
-
-define i8 @test_vminv_u8(<8 x i8> %a) {
-; CHECK: test_vminv_u8:
-; CHECK: uminv b{{[0-9]+}}, {{v[0-9]+}}.8b
-entry:
-  %uminv.i = tail call <1 x i8> @llvm.aarch64.neon.uminv.v1i8.v8i8(<8 x i8> %a)
-  %0 = extractelement <1 x i8> %uminv.i, i32 0
-  ret i8 %0
-}
-
-define i16 @test_vminv_u16(<4 x i16> %a) {
-; CHECK: test_vminv_u16:
-; CHECK: uminv h{{[0-9]+}}, {{v[0-9]+}}.4h
-entry:
-  %uminv.i = tail call <1 x i16> @llvm.aarch64.neon.uminv.v1i16.v4i16(<4 x i16> %a)
-  %0 = extractelement <1 x i16> %uminv.i, i32 0
-  ret i16 %0
-}
-
-define i8 @test_vminvq_s8(<16 x i8> %a) {
-; CHECK: test_vminvq_s8:
-; CHECK: sminv b{{[0-9]+}}, {{v[0-9]+}}.16b
-entry:
-  %sminv.i = tail call <1 x i8> @llvm.aarch64.neon.sminv.v1i8.v16i8(<16 x i8> %a)
-  %0 = extractelement <1 x i8> %sminv.i, i32 0
-  ret i8 %0
-}
-
-define i16 @test_vminvq_s16(<8 x i16> %a) {
-; CHECK: test_vminvq_s16:
-; CHECK: sminv h{{[0-9]+}}, {{v[0-9]+}}.8h
-entry:
-  %sminv.i = tail call <1 x i16> @llvm.aarch64.neon.sminv.v1i16.v8i16(<8 x i16> %a)
-  %0 = extractelement <1 x i16> %sminv.i, i32 0
-  ret i16 %0
-}
-
-define i32 @test_vminvq_s32(<4 x i32> %a) {
-; CHECK: test_vminvq_s32:
-; CHECK: sminv s{{[0-9]+}}, {{v[0-9]+}}.4s
-entry:
-  %sminv.i = tail call <1 x i32> @llvm.aarch64.neon.sminv.v1i32.v4i32(<4 x i32> %a)
-  %0 = extractelement <1 x i32> %sminv.i, i32 0
-  ret i32 %0
-}
-
-define i8 @test_vminvq_u8(<16 x i8> %a) {
-; CHECK: test_vminvq_u8:
-; CHECK: uminv b{{[0-9]+}}, {{v[0-9]+}}.16b
-entry:
-  %uminv.i = tail call <1 x i8> @llvm.aarch64.neon.uminv.v1i8.v16i8(<16 x i8> %a)
-  %0 = extractelement <1 x i8> %uminv.i, i32 0
-  ret i8 %0
-}
-
-define i16 @test_vminvq_u16(<8 x i16> %a) {
-; CHECK: test_vminvq_u16:
-; CHECK: uminv h{{[0-9]+}}, {{v[0-9]+}}.8h
-entry:
-  %uminv.i = tail call <1 x i16> @llvm.aarch64.neon.uminv.v1i16.v8i16(<8 x i16> %a)
-  %0 = extractelement <1 x i16> %uminv.i, i32 0
-  ret i16 %0
-}
-
-define i32 @test_vminvq_u32(<4 x i32> %a) {
-; CHECK: test_vminvq_u32:
-; CHECK: uminv s{{[0-9]+}}, {{v[0-9]+}}.4s
-entry:
-  %uminv.i = tail call <1 x i32> @llvm.aarch64.neon.uminv.v1i32.v4i32(<4 x i32> %a)
-  %0 = extractelement <1 x i32> %uminv.i, i32 0
-  ret i32 %0
-}
-
-define i8 @test_vaddv_s8(<8 x i8> %a) {
-; CHECK: test_vaddv_s8:
-; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.8b
-entry:
-  %vaddv.i = tail call <1 x i8> @llvm.aarch64.neon.vaddv.v1i8.v8i8(<8 x i8> %a)
-  %0 = extractelement <1 x i8> %vaddv.i, i32 0
-  ret i8 %0
-}
-
-define i16 @test_vaddv_s16(<4 x i16> %a) {
-; CHECK: test_vaddv_s16:
-; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.4h
-entry:
-  %vaddv.i = tail call <1 x i16> @llvm.aarch64.neon.vaddv.v1i16.v4i16(<4 x i16> %a)
-  %0 = extractelement <1 x i16> %vaddv.i, i32 0
-  ret i16 %0
-}
-
-define i8 @test_vaddv_u8(<8 x i8> %a) {
-; CHECK: test_vaddv_u8:
-; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.8b
-entry:
-  %vaddv.i = tail call <1 x i8> @llvm.aarch64.neon.vaddv.v1i8.v8i8(<8 x i8> %a)
-  %0 = extractelement <1 x i8> %vaddv.i, i32 0
-  ret i8 %0
-}
-
-define i16 @test_vaddv_u16(<4 x i16> %a) {
-; CHECK: test_vaddv_u16:
-; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.4h
-entry:
-  %vaddv.i = tail call <1 x i16> @llvm.aarch64.neon.vaddv.v1i16.v4i16(<4 x i16> %a)
-  %0 = extractelement <1 x i16> %vaddv.i, i32 0
-  ret i16 %0
-}
-
-define i8 @test_vaddvq_s8(<16 x i8> %a) {
-; CHECK: test_vaddvq_s8:
-; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.16b
-entry:
-  %vaddv.i = tail call <1 x i8> @llvm.aarch64.neon.vaddv.v1i8.v16i8(<16 x i8> %a)
-  %0 = extractelement <1 x i8> %vaddv.i, i32 0
-  ret i8 %0
-}
-
-define i16 @test_vaddvq_s16(<8 x i16> %a) {
-; CHECK: test_vaddvq_s16:
-; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.8h
-entry:
-  %vaddv.i = tail call <1 x i16> @llvm.aarch64.neon.vaddv.v1i16.v8i16(<8 x i16> %a)
-  %0 = extractelement <1 x i16> %vaddv.i, i32 0
-  ret i16 %0
-}
-
-define i32 @test_vaddvq_s32(<4 x i32> %a) {
-; CHECK: test_vaddvq_s32:
-; CHECK: addv s{{[0-9]+}}, {{v[0-9]+}}.4s
-entry:
-  %vaddv.i = tail call <1 x i32> @llvm.aarch64.neon.vaddv.v1i32.v4i32(<4 x i32> %a)
-  %0 = extractelement <1 x i32> %vaddv.i, i32 0
-  ret i32 %0
-}
-
-define i8 @test_vaddvq_u8(<16 x i8> %a) {
-; CHECK: test_vaddvq_u8:
-; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.16b
-entry:
-  %vaddv.i = tail call <1 x i8> @llvm.aarch64.neon.vaddv.v1i8.v16i8(<16 x i8> %a)
-  %0 = extractelement <1 x i8> %vaddv.i, i32 0
-  ret i8 %0
-}
-
-define i16 @test_vaddvq_u16(<8 x i16> %a) {
-; CHECK: test_vaddvq_u16:
-; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.8h
-entry:
-  %vaddv.i = tail call <1 x i16> @llvm.aarch64.neon.vaddv.v1i16.v8i16(<8 x i16> %a)
-  %0 = extractelement <1 x i16> %vaddv.i, i32 0
-  ret i16 %0
-}
-
-define i32 @test_vaddvq_u32(<4 x i32> %a) {
-; CHECK: test_vaddvq_u32:
-; CHECK: addv s{{[0-9]+}}, {{v[0-9]+}}.4s
-entry:
-  %vaddv.i = tail call <1 x i32> @llvm.aarch64.neon.vaddv.v1i32.v4i32(<4 x i32> %a)
-  %0 = extractelement <1 x i32> %vaddv.i, i32 0
-  ret i32 %0
-}
-
-define float @test_vmaxvq_f32(<4 x float> %a) {
-; CHECK: test_vmaxvq_f32:
-; CHECK: fmaxv s{{[0-9]+}}, {{v[0-9]+}}.4s
-entry:
-  %0 = call float @llvm.aarch64.neon.vmaxv(<4 x float> %a)
-  ret float %0
-}
-
-define float @test_vminvq_f32(<4 x float> %a) {
-; CHECK: test_vminvq_f32:
-; CHECK: fminv s{{[0-9]+}}, {{v[0-9]+}}.4s
-entry:
-  %0 = call float @llvm.aarch64.neon.vminv(<4 x float> %a)
-  ret float %0
-}
-
-define float @test_vmaxnmvq_f32(<4 x float> %a) {
-; CHECK: test_vmaxnmvq_f32:
-; CHECK: fmaxnmv s{{[0-9]+}}, {{v[0-9]+}}.4s
-entry:
-  %0 = call float @llvm.aarch64.neon.vmaxnmv(<4 x float> %a)
-  ret float %0
-}
-
-define float @test_vminnmvq_f32(<4 x float> %a) {
-; CHECK: test_vminnmvq_f32:
-; CHECK: fminnmv s{{[0-9]+}}, {{v[0-9]+}}.4s
-entry:
-  %0 = call float @llvm.aarch64.neon.vminnmv(<4 x float> %a)
-  ret float %0
-}
-
diff --git a/test/CodeGen/AArch64/neon-add-pairwise.ll b/test/CodeGen/AArch64/neon-add-pairwise.ll
deleted file mode 100644
index 32d8222..0000000
--- a/test/CodeGen/AArch64/neon-add-pairwise.ll
+++ /dev/null
@@ -1,101 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-
-declare <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8>, <8 x i8>)
-
-define <8 x i8> @test_addp_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; CHECK: test_addp_v8i8:
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: addp v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-declare <16 x i8> @llvm.arm.neon.vpadd.v16i8(<16 x i8>, <16 x i8>)
-
-define <16 x i8> @test_addp_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_addp_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vpadd.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: addp v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-declare <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16>, <4 x i16>)
-
-define <4 x i16> @test_addp_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_addp_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: addp v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-declare <8 x i16> @llvm.arm.neon.vpadd.v8i16(<8 x i16>, <8 x i16>)
-
-define <8 x i16> @test_addp_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_addp_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vpadd.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: addp v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-declare <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32>, <2 x i32>)
-
-define <2 x i32> @test_addp_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_addp_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: addp v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-declare <4 x i32> @llvm.arm.neon.vpadd.v4i32(<4 x i32>, <4 x i32>)
-
-define <4 x i32> @test_addp_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_addp_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vpadd.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: addp v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-
-declare <2 x i64> @llvm.arm.neon.vpadd.v2i64(<2 x i64>, <2 x i64>)
-
-define <2 x i64> @test_addp_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
-; CHECK: test_addp_v2i64:
-        %val = call <2 x i64> @llvm.arm.neon.vpadd.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
-; CHECK: addp v0.2d, v0.2d, v1.2d
-        ret <2 x i64> %val
-}
-
-declare <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float>, <2 x float>)
-declare <4 x float> @llvm.arm.neon.vpadd.v4f32(<4 x float>, <4 x float>)
-declare <2 x double> @llvm.arm.neon.vpadd.v2f64(<2 x double>, <2 x double>)
-
-define <2 x float> @test_faddp_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
-; CHECK: test_faddp_v2f32:
-        %val = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %lhs, <2 x float> %rhs)
-; CHECK: faddp v0.2s, v0.2s, v1.2s
-        ret <2 x float> %val
-}
-
-define <4 x float> @test_faddp_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
-; CHECK: test_faddp_v4f32:
-        %val = call <4 x float> @llvm.arm.neon.vpadd.v4f32(<4 x float> %lhs, <4 x float> %rhs)
-; CHECK: faddp v0.4s, v0.4s, v1.4s
-        ret <4 x float> %val
-}
-
-define <2 x double> @test_faddp_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
-; CHECK: test_faddp_v2f64:
-        %val = call <2 x double> @llvm.arm.neon.vpadd.v2f64(<2 x double> %lhs, <2 x double> %rhs)
-; CHECK: faddp v0.2d, v0.2d, v1.2d
-        ret <2 x double> %val
-}
-
-define i32 @test_vaddv.v2i32(<2 x i32> %a) {
-; CHECK-LABEL: test_vaddv.v2i32
-; CHECK: addp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-  %1 = tail call <1 x i32> @llvm.aarch64.neon.vaddv.v1i32.v2i32(<2 x i32> %a)
-  %2 = extractelement <1 x i32> %1, i32 0
-  ret i32 %2
-}
-
-declare <1 x i32> @llvm.aarch64.neon.vaddv.v1i32.v2i32(<2 x i32>)
-\ No newline at end of file
diff --git a/test/CodeGen/AArch64/neon-add-sub.ll b/test/CodeGen/AArch64/neon-add-sub.ll
deleted file mode 100644
index 9015237..0000000
--- a/test/CodeGen/AArch64/neon-add-sub.ll
+++ /dev/null
@@ -1,279 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
-
-define <8 x i8> @add8xi8(<8 x i8> %A, <8 x i8> %B) {
-;CHECK: add {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-	%tmp3 = add <8 x i8> %A, %B;
-	ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @add16xi8(<16 x i8> %A, <16 x i8> %B) {
-;CHECK: add {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-	%tmp3 = add <16 x i8> %A, %B;
-	ret <16 x i8> %tmp3
-}
-
-define <4 x i16> @add4xi16(<4 x i16> %A, <4 x i16> %B) {
-;CHECK: add {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-	%tmp3 = add <4 x i16> %A, %B;
-	ret <4 x i16> %tmp3
-}
-
-define <8 x i16> @add8xi16(<8 x i16> %A, <8 x i16> %B) {
-;CHECK: add {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-	%tmp3 = add <8 x i16> %A, %B;
-	ret <8 x i16> %tmp3
-}
-
-define <2 x i32> @add2xi32(<2 x i32> %A, <2 x i32> %B) {
-;CHECK: add {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-	%tmp3 = add <2 x i32> %A, %B;
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @add4x32(<4 x i32> %A, <4 x i32> %B) {
-;CHECK: add {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-	%tmp3 = add <4 x i32> %A, %B;
-	ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @add2xi64(<2 x i64> %A, <2 x i64> %B) {
-;CHECK: add {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-	%tmp3 = add <2 x i64> %A, %B;
-	ret <2 x i64> %tmp3
-}
-
-define <2 x float> @add2xfloat(<2 x float> %A, <2 x float> %B) {
-;CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-	%tmp3 = fadd <2 x float> %A, %B;
-	ret <2 x float> %tmp3
-}
-
-define <4 x float> @add4xfloat(<4 x float> %A, <4 x float> %B) {
-;CHECK: fadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-	%tmp3 = fadd <4 x float> %A, %B;
-	ret <4 x float> %tmp3
-}
-define <2 x double> @add2xdouble(<2 x double> %A, <2 x double> %B) {
-;CHECK: add {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-	%tmp3 = fadd <2 x double> %A, %B;
-	ret <2 x double> %tmp3
-}
-
-define <8 x i8> @sub8xi8(<8 x i8> %A, <8 x i8> %B) {
-;CHECK: sub {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-	%tmp3 = sub <8 x i8> %A, %B;
-	ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @sub16xi8(<16 x i8> %A, <16 x i8> %B) {
-;CHECK: sub {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-	%tmp3 = sub <16 x i8> %A, %B;
-	ret <16 x i8> %tmp3
-}
-
-define <4 x i16> @sub4xi16(<4 x i16> %A, <4 x i16> %B) {
-;CHECK: sub {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-	%tmp3 = sub <4 x i16> %A, %B;
-	ret <4 x i16> %tmp3
-}
-
-define <8 x i16> @sub8xi16(<8 x i16> %A, <8 x i16> %B) {
-;CHECK: sub {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-	%tmp3 = sub <8 x i16> %A, %B;
-	ret <8 x i16> %tmp3
-}
-
-define <2 x i32> @sub2xi32(<2 x i32> %A, <2 x i32> %B) {
-;CHECK: sub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-	%tmp3 = sub <2 x i32> %A, %B;
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @sub4x32(<4 x i32> %A, <4 x i32> %B) {
-;CHECK: sub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-	%tmp3 = sub <4 x i32> %A, %B;
-	ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @sub2xi64(<2 x i64> %A, <2 x i64> %B) {
-;CHECK: sub {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-	%tmp3 = sub <2 x i64> %A, %B;
-	ret <2 x i64> %tmp3
-}
-
-define <2 x float> @sub2xfloat(<2 x float> %A, <2 x float> %B) {
-;CHECK: fsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-	%tmp3 = fsub <2 x float> %A, %B;
-	ret <2 x float> %tmp3
-}
-
-define <4 x float> @sub4xfloat(<4 x float> %A, <4 x float> %B) {
-;CHECK: fsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-	%tmp3 = fsub <4 x float> %A, %B;
-	ret <4 x float> %tmp3
-}
-define <2 x double> @sub2xdouble(<2 x double> %A, <2 x double> %B) {
-;CHECK: sub {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-	%tmp3 = fsub <2 x double> %A, %B;
-	ret <2 x double> %tmp3
-}
-
-define <1 x double> @test_vadd_f64(<1 x double> %a, <1 x double> %b) {
-; CHECK-LABEL: test_vadd_f64
-; CHECK: fadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = fadd <1 x double> %a, %b
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vmul_f64(<1 x double> %a, <1 x double> %b) {
-; CHECK-LABEL: test_vmul_f64
-; CHECK: fmul d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = fmul <1 x double> %a, %b
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vdiv_f64(<1 x double> %a, <1 x double> %b) {
-; CHECK-LABEL: test_vdiv_f64
-; CHECK: fdiv d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = fdiv <1 x double> %a, %b
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vmla_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) {
-; CHECK-LABEL: test_vmla_f64
-; CHECK: fmul d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-; CHECK: fadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = fmul <1 x double> %b, %c
-  %2 = fadd <1 x double> %1, %a
-  ret <1 x double> %2
-}
-
-define <1 x double> @test_vmls_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) {
-; CHECK-LABEL: test_vmls_f64
-; CHECK: fmul d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-; CHECK: fsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = fmul <1 x double> %b, %c
-  %2 = fsub <1 x double> %a, %1
-  ret <1 x double> %2
-}
-
-define <1 x double> @test_vfms_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) {
-; CHECK-LABEL: test_vfms_f64
-; CHECK: fmsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = fsub <1 x double> <double -0.000000e+00>, %b
-  %2 = tail call <1 x double> @llvm.fma.v1f64(<1 x double> %1, <1 x double> %c, <1 x double> %a)
-  ret <1 x double> %2
-}
-
-define <1 x double> @test_vfma_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) {
-; CHECK-LABEL: test_vfma_f64
-; CHECK: fmadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x double> @llvm.fma.v1f64(<1 x double> %b, <1 x double> %c, <1 x double> %a)
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vsub_f64(<1 x double> %a, <1 x double> %b) {
-; CHECK-LABEL: test_vsub_f64
-; CHECK: fsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = fsub <1 x double> %a, %b
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vabd_f64(<1 x double> %a, <1 x double> %b) {
-; CHECK-LABEL: test_vabd_f64
-; CHECK: fabd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x double> @llvm.arm.neon.vabds.v1f64(<1 x double> %a, <1 x double> %b)
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vmax_f64(<1 x double> %a, <1 x double> %b) {
-; CHECK-LABEL: test_vmax_f64
-; CHECK: fmax d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x double> @llvm.arm.neon.vmaxs.v1f64(<1 x double> %a, <1 x double> %b)
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vmin_f64(<1 x double> %a, <1 x double> %b) {
-; CHECK-LABEL: test_vmin_f64
-; CHECK: fmin d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x double> @llvm.arm.neon.vmins.v1f64(<1 x double> %a, <1 x double> %b)
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vmaxnm_f64(<1 x double> %a, <1 x double> %b) {
-; CHECK-LABEL: test_vmaxnm_f64
-; CHECK: fmaxnm d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x double> @llvm.aarch64.neon.vmaxnm.v1f64(<1 x double> %a, <1 x double> %b)
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vminnm_f64(<1 x double> %a, <1 x double> %b) {
-; CHECK-LABEL: test_vminnm_f64
-; CHECK: fminnm d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x double> @llvm.aarch64.neon.vminnm.v1f64(<1 x double> %a, <1 x double> %b)
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vabs_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vabs_f64
-; CHECK: fabs d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x double> @llvm.fabs.v1f64(<1 x double> %a)
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vneg_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vneg_f64
-; CHECK: fneg d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = fsub <1 x double> <double -0.000000e+00>, %a
-  ret <1 x double> %1
-}
-
-declare <1 x double> @llvm.fabs.v1f64(<1 x double>)
-declare <1 x double> @llvm.aarch64.neon.vminnm.v1f64(<1 x double>, <1 x double>)
-declare <1 x double> @llvm.aarch64.neon.vmaxnm.v1f64(<1 x double>, <1 x double>)
-declare <1 x double> @llvm.arm.neon.vmins.v1f64(<1 x double>, <1 x double>)
-declare <1 x double> @llvm.arm.neon.vmaxs.v1f64(<1 x double>, <1 x double>)
-declare <1 x double> @llvm.arm.neon.vabds.v1f64(<1 x double>, <1 x double>)
-declare <1 x double> @llvm.fma.v1f64(<1 x double>, <1 x double>, <1 x double>)
-
-define <1 x i8> @test_add_v1i8(<1 x i8> %a, <1 x i8> %b) {
-;CHECK-LABEL: test_add_v1i8:
-;CHECK: add {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-  %c = add <1 x i8> %a, %b
-  ret <1 x i8> %c
-}
-
-define <1 x i16> @test_add_v1i16(<1 x i16> %a, <1 x i16> %b) {
-;CHECK-LABEL: test_add_v1i16:
-;CHECK: add {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-  %c = add <1 x i16> %a, %b
-  ret <1 x i16> %c
-}
-
-define <1 x i32> @test_add_v1i32(<1 x i32> %a, <1 x i32> %b) {
-;CHECK-LABEL: test_add_v1i32:
-;CHECK: add {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-  %c = add <1 x i32> %a, %b
-  ret <1 x i32> %c
-}
-
-define <1 x i8> @test_sub_v1i8(<1 x i8> %a, <1 x i8> %b) {
-;CHECK-LABEL: test_sub_v1i8:
-;CHECK: sub {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-  %c = sub <1 x i8> %a, %b
-  ret <1 x i8> %c
-}
-
-define <1 x i16> @test_sub_v1i16(<1 x i16> %a, <1 x i16> %b) {
-;CHECK-LABEL: test_sub_v1i16:
-;CHECK: sub {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-  %c = sub <1 x i16> %a, %b
-  ret <1 x i16> %c
-}
-
-define <1 x i32> @test_sub_v1i32(<1 x i32> %a, <1 x i32> %b) {
-;CHECK-LABEL: test_sub_v1i32:
-;CHECK: sub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-  %c = sub <1 x i32> %a, %b
-  ret <1 x i32> %c
-}
-\ No newline at end of file
diff --git a/test/CodeGen/AArch64/neon-bitwise-instructions.ll b/test/CodeGen/AArch64/neon-bitwise-instructions.ll
index 7e5b693..6497856 100644
--- a/test/CodeGen/AArch64/neon-bitwise-instructions.ll
+++ b/test/CodeGen/AArch64/neon-bitwise-instructions.ll
@@ -1,45 +1,52 @@
 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
 
 define <8 x i8> @and8xi8(<8 x i8> %a, <8 x i8> %b) {
-;CHECK: and {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: and8xi8:
+; CHECK: and {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 	%tmp1 = and <8 x i8> %a, %b;
 	ret <8 x i8> %tmp1
 }
 
 define <16 x i8> @and16xi8(<16 x i8> %a, <16 x i8> %b) {
-;CHECK: and {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: and16xi8:
+; CHECK: and {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 	%tmp1 = and <16 x i8> %a, %b;
 	ret <16 x i8> %tmp1
 }
 
 
 define <8 x i8> @orr8xi8(<8 x i8> %a, <8 x i8> %b) {
-;CHECK: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: orr8xi8:
+; CHECK: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 	%tmp1 = or <8 x i8> %a, %b;
 	ret <8 x i8> %tmp1
 }
 
 define <16 x i8> @orr16xi8(<16 x i8> %a, <16 x i8> %b) {
-;CHECK: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: orr16xi8:
+; CHECK: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 	%tmp1 = or <16 x i8> %a, %b;
 	ret <16 x i8> %tmp1
 }
 
 
 define <8 x i8> @xor8xi8(<8 x i8> %a, <8 x i8> %b) {
-;CHECK: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: xor8xi8:
+; CHECK: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 	%tmp1 = xor <8 x i8> %a, %b;
 	ret <8 x i8> %tmp1
 }
 
 define <16 x i8> @xor16xi8(<16 x i8> %a, <16 x i8> %b) {
-;CHECK: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: xor16xi8:
+; CHECK: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 	%tmp1 = xor <16 x i8> %a, %b;
 	ret <16 x i8> %tmp1
 }
 
 define <8 x i8> @bsl8xi8_const(<8 x i8> %a, <8 x i8> %b)  {
-;CHECK:  bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: bsl8xi8_const:
+; CHECK:  bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 	%tmp1 = and <8 x i8> %a, < i8 -1, i8 -1, i8 0, i8 0, i8 -1, i8 -1, i8 0, i8 0 >
 	%tmp2 = and <8 x i8> %b, < i8 0, i8 0, i8 -1, i8 -1, i8 0, i8 0, i8 -1, i8 -1 >
 	%tmp3 = or <8 x i8> %tmp1, %tmp2
@@ -47,7 +54,8 @@ define <8 x i8> @bsl8xi8_const(<8 x i8> %a, <8 x i8> %b)  {
 }
 
 define <16 x i8> @bsl16xi8_const(<16 x i8> %a, <16 x i8> %b) {
-;CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: bsl16xi8_const:
+; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 	%tmp1 = and <16 x i8> %a, < i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0 >
 	%tmp2 = and <16 x i8> %b, < i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 -1 >
 	%tmp3 = or <16 x i8> %tmp1, %tmp2
@@ -55,398 +63,461 @@ define <16 x i8> @bsl16xi8_const(<16 x i8> %a, <16 x i8> %b) {
 }
 
 define <8 x i8> @orn8xi8(<8 x i8> %a, <8 x i8> %b)  {
-;CHECK:  orn {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: orn8xi8:
+; CHECK:  orn {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
   %tmp1 = xor <8 x i8> %b, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
   %tmp2 = or <8 x i8> %a, %tmp1
   ret <8 x i8> %tmp2
 }
 
 define <16 x i8> @orn16xi8(<16 x i8> %a, <16 x i8> %b) {
-;CHECK:  orn {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: orn16xi8:
+; CHECK:  orn {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
   %tmp1 = xor <16 x i8> %b, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
   %tmp2 = or <16 x i8> %a, %tmp1
   ret <16 x i8> %tmp2
 }
 
 define <8 x i8> @bic8xi8(<8 x i8> %a, <8 x i8> %b)  {
-;CHECK:  bic {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: bic8xi8:
+; CHECK:  bic {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
   %tmp1 = xor <8 x i8> %b, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
   %tmp2 = and <8 x i8> %a, %tmp1
   ret <8 x i8> %tmp2
 }
 
 define <16 x i8> @bic16xi8(<16 x i8> %a, <16 x i8> %b) {
-;CHECK:  bic {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: bic16xi8:
+; CHECK:  bic {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
   %tmp1 = xor <16 x i8> %b, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
   %tmp2 = and <16 x i8> %a, %tmp1
   ret <16 x i8> %tmp2
 }
 
 define <2 x i32> @orrimm2s_lsl0(<2 x i32> %a) {
-;CHECK:  orr {{v[0-9]+}}.2s, #0xff
+; CHECK-LABEL: orrimm2s_lsl0:
+; CHECK:  orr {{v[0-9]+}}.2s, #{{0xff|255}}
 	%tmp1 = or <2 x i32> %a, < i32 255, i32 255>
 	ret <2 x i32> %tmp1
 }
 
 define <2 x i32> @orrimm2s_lsl8(<2 x i32> %a) {
-;CHECK:  orr {{v[0-9]+}}.2s, #0xff, lsl #8
+; CHECK-LABEL: orrimm2s_lsl8:
+; CHECK:  orr {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #8
 	%tmp1 = or <2 x i32> %a, < i32 65280, i32 65280>
 	ret <2 x i32> %tmp1
 }
 
 define <2 x i32> @orrimm2s_lsl16(<2 x i32> %a) {
-;CHECK:  orr {{v[0-9]+}}.2s, #0xff, lsl #16
+; CHECK-LABEL: orrimm2s_lsl16:
+; CHECK:  orr {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #16
 	%tmp1 = or <2 x i32> %a, < i32 16711680, i32 16711680>
 	ret <2 x i32> %tmp1
 }
 
 define <2 x i32> @orrimm2s_lsl24(<2 x i32> %a) {
-;CHECK:  orr {{v[0-9]+}}.2s, #0xff, lsl #24
+; CHECK-LABEL: orrimm2s_lsl24:
+; CHECK:  orr {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #24
 	%tmp1 = or <2 x i32> %a, < i32 4278190080, i32 4278190080>
 	ret <2 x i32> %tmp1
 }
 
 define <4 x i32> @orrimm4s_lsl0(<4 x i32> %a) {
-;CHECK:  orr {{v[0-9]+}}.4s, #0xff
+; CHECK-LABEL: orrimm4s_lsl0:
+; CHECK:  orr {{v[0-9]+}}.4s, #{{0xff|255}}
 	%tmp1 = or <4 x i32> %a, < i32 255, i32 255, i32 255, i32 255>
 	ret <4 x i32> %tmp1
 }
 
 define <4 x i32> @orrimm4s_lsl8(<4 x i32> %a) {
-;CHECK:  orr {{v[0-9]+}}.4s, #0xff, lsl #8
+; CHECK-LABEL: orrimm4s_lsl8:
+; CHECK:  orr {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #8
 	%tmp1 = or <4 x i32> %a, < i32 65280, i32 65280, i32 65280, i32 65280>
 	ret <4 x i32> %tmp1
 }
 
 define <4 x i32> @orrimm4s_lsl16(<4 x i32> %a) {
-;CHECK:  orr {{v[0-9]+}}.4s, #0xff, lsl #16
+; CHECK-LABEL: orrimm4s_lsl16:
+; CHECK:  orr {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #16
 	%tmp1 = or <4 x i32> %a, < i32 16711680, i32 16711680, i32 16711680, i32 16711680>
 	ret <4 x i32> %tmp1
 }
 
 define <4 x i32> @orrimm4s_lsl24(<4 x i32> %a) {
-;CHECK:  orr {{v[0-9]+}}.4s, #0xff, lsl #24
+; CHECK-LABEL: orrimm4s_lsl24:
+; CHECK:  orr {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #24
 	%tmp1 = or <4 x i32> %a, < i32 4278190080, i32 4278190080, i32 4278190080, i32 4278190080>
 	ret <4 x i32> %tmp1
 }
 
 define <4 x i16> @orrimm4h_lsl0(<4 x i16> %a) {
-;CHECK:  orr {{v[0-9]+}}.4h, #0xff
+; CHECK-LABEL: orrimm4h_lsl0:
+; CHECK:  orr {{v[0-9]+}}.4h, #{{0xff|255}}
 	%tmp1 = or <4 x i16> %a, < i16 255, i16 255, i16 255, i16 255 >
 	ret <4 x i16> %tmp1
 }
 
 define <4 x i16> @orrimm4h_lsl8(<4 x i16> %a) {
-;CHECK:  orr {{v[0-9]+}}.4h, #0xff, lsl #8
+; CHECK-LABEL: orrimm4h_lsl8:
+; CHECK:  orr {{v[0-9]+}}.4h, #{{0xff|255}}, lsl #8
 	%tmp1 = or <4 x i16> %a, < i16 65280, i16 65280, i16 65280, i16 65280 >
 	ret <4 x i16> %tmp1
 }
 
 define <8 x i16> @orrimm8h_lsl0(<8 x i16> %a) {
-;CHECK:  orr {{v[0-9]+}}.8h, #0xff
+; CHECK-LABEL: orrimm8h_lsl0:
+; CHECK:  orr {{v[0-9]+}}.8h, #{{0xff|255}}
 	%tmp1 = or <8 x i16> %a, < i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255 >
 	ret <8 x i16> %tmp1
 }
 
 define <8 x i16> @orrimm8h_lsl8(<8 x i16> %a) {
-;CHECK:  orr {{v[0-9]+}}.8h, #0xff, lsl #8
+; CHECK-LABEL: orrimm8h_lsl8:
+; CHECK:  orr {{v[0-9]+}}.8h, #{{0xff|255}}, lsl #8
 	%tmp1 = or <8 x i16> %a, < i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280 >
 	ret <8 x i16> %tmp1
 }
 
 define <2 x i32> @bicimm2s_lsl0(<2 x i32> %a) {
-;CHECK:  bic {{v[0-9]+}}.2s, #0x10
+; CHECK-LABEL: bicimm2s_lsl0:
+; CHECK:  bic {{v[0-9]+}}.2s, #{{0x10|16}}
 	%tmp1 = and <2 x i32> %a, < i32 4294967279, i32 4294967279 >
 	ret <2 x i32> %tmp1
 }
 
 define <2 x i32> @bicimm2s_lsl8(<2 x i32> %a) {
-;CHECK:  bic {{v[0-9]+}}.2s, #0x10, lsl #8
+; CHECK-LABEL: bicimm2s_lsl8:
+; CHECK:  bic {{v[0-9]+}}.2s, #{{0x10|16}}, lsl #8
 	%tmp1 = and <2 x i32> %a, < i32 4294963199, i32  4294963199 >
 	ret <2 x i32> %tmp1
 }
 
 define <2 x i32> @bicimm2s_lsl16(<2 x i32> %a) {
-;CHECK:  bic {{v[0-9]+}}.2s, #0x10, lsl #16
+; CHECK-LABEL: bicimm2s_lsl16:
+; CHECK:  bic {{v[0-9]+}}.2s, #{{0x10|16}}, lsl #16
 	%tmp1 = and <2 x i32> %a, < i32 4293918719, i32 4293918719 >
 	ret <2 x i32> %tmp1
 }
 
 define <2 x i32> @bicimm2s_lsl124(<2 x i32> %a) {
-;CHECK:  bic {{v[0-9]+}}.2s, #0x10, lsl #24
+; CHECK-LABEL: bicimm2s_lsl124:
+; CHECK:  bic {{v[0-9]+}}.2s, #{{0x10|16}}, lsl #24
 	%tmp1 = and <2 x i32> %a, < i32 4026531839, i32  4026531839>
 	ret <2 x i32> %tmp1
 }
 
 define <4 x i32> @bicimm4s_lsl0(<4 x i32> %a) {
-;CHECK:  bic {{v[0-9]+}}.4s, #0x10
+; CHECK-LABEL: bicimm4s_lsl0:
+; CHECK:  bic {{v[0-9]+}}.4s, #{{0x10|16}}
 	%tmp1 = and <4 x i32> %a, < i32 4294967279, i32 4294967279, i32 4294967279, i32 4294967279 >
 	ret <4 x i32> %tmp1
 }
 
 define <4 x i32> @bicimm4s_lsl8(<4 x i32> %a) {
-;CHECK:  bic {{v[0-9]+}}.4s, #0x10, lsl #8
+; CHECK-LABEL: bicimm4s_lsl8:
+; CHECK:  bic {{v[0-9]+}}.4s, #{{0x10|16}}, lsl #8
 	%tmp1 = and <4 x i32> %a, < i32 4294963199, i32  4294963199, i32  4294963199, i32  4294963199 >
 	ret <4 x i32> %tmp1
 }
 
 define <4 x i32> @bicimm4s_lsl16(<4 x i32> %a) {
-;CHECK:  bic {{v[0-9]+}}.4s, #0x10, lsl #16
+; CHECK-LABEL: bicimm4s_lsl16:
+; CHECK:  bic {{v[0-9]+}}.4s, #{{0x10|16}}, lsl #16
 	%tmp1 = and <4 x i32> %a, < i32 4293918719, i32 4293918719, i32 4293918719, i32 4293918719 >
 	ret <4 x i32> %tmp1
 }
 
 define <4 x i32> @bicimm4s_lsl124(<4 x i32> %a) {
-;CHECK:  bic {{v[0-9]+}}.4s, #0x10, lsl #24
+; CHECK-LABEL: bicimm4s_lsl124:
+; CHECK:  bic {{v[0-9]+}}.4s, #{{0x10|16}}, lsl #24
 	%tmp1 = and <4 x i32> %a, < i32 4026531839, i32  4026531839, i32  4026531839, i32  4026531839>
 	ret <4 x i32> %tmp1
 }
 
 define <4 x i16> @bicimm4h_lsl0_a(<4 x i16> %a) {
-;CHECK:  bic {{v[0-9]+}}.4h, #0x10
+; CHECK-LABEL: bicimm4h_lsl0_a:
+; CHECK:  bic {{v[0-9]+}}.4h, #{{0x10|16}}
 	%tmp1 = and <4 x i16> %a, < i16 4294967279, i16  4294967279, i16  4294967279, i16  4294967279 >
 	ret <4 x i16> %tmp1
 }
 
 define <4 x i16> @bicimm4h_lsl0_b(<4 x i16> %a) {
-;CHECK:  bic {{v[0-9]+}}.4h, #0xff
+; CHECK-LABEL: bicimm4h_lsl0_b:
+; CHECK:  bic {{v[0-9]+}}.4h, #{{0xff|255}}
 	%tmp1 = and <4 x i16> %a, < i16 65280, i16  65280, i16  65280, i16 65280 >
 	ret <4 x i16> %tmp1
 }
 
 define <4 x i16> @bicimm4h_lsl8_a(<4 x i16> %a) {
-;CHECK:  bic {{v[0-9]+}}.4h, #0x10, lsl #8
+; CHECK-LABEL: bicimm4h_lsl8_a:
+; CHECK:  bic {{v[0-9]+}}.4h, #{{0x10|16}}, lsl #8
 	%tmp1 = and <4 x i16> %a, < i16 4294963199, i16  4294963199, i16  4294963199, i16  4294963199>
 	ret <4 x i16> %tmp1
 }
 
 define <4 x i16> @bicimm4h_lsl8_b(<4 x i16> %a) {
-;CHECK:  bic {{v[0-9]+}}.4h, #0xff, lsl #8
+; CHECK-LABEL: bicimm4h_lsl8_b:
+; CHECK:  bic {{v[0-9]+}}.4h, #{{0xff|255}}, lsl #8
 	%tmp1 = and <4 x i16> %a, < i16 255, i16 255, i16 255, i16 255>
 	ret <4 x i16> %tmp1
 }
 
 define <8 x i16> @bicimm8h_lsl0_a(<8 x i16> %a) {
-;CHECK:  bic {{v[0-9]+}}.8h, #0x10
+; CHECK-LABEL: bicimm8h_lsl0_a:
+; CHECK:  bic {{v[0-9]+}}.8h, #{{0x10|16}}
 	%tmp1 = and <8 x i16> %a, < i16 4294967279, i16  4294967279, i16  4294967279, i16  4294967279,
    i16  4294967279, i16  4294967279, i16  4294967279, i16  4294967279 >
 	ret <8 x i16> %tmp1
 }
 
 define <8 x i16> @bicimm8h_lsl0_b(<8 x i16> %a) {
-;CHECK:  bic {{v[0-9]+}}.8h, #0xff
+; CHECK-LABEL: bicimm8h_lsl0_b:
+; CHECK:  bic {{v[0-9]+}}.8h, #{{0xff|255}}
 	%tmp1 = and <8 x i16> %a, < i16 65280, i16  65280, i16  65280, i16 65280, i16 65280, i16  65280, i16  65280, i16 65280 >
 	ret <8 x i16> %tmp1
 }
 
 define <8 x i16> @bicimm8h_lsl8_a(<8 x i16> %a) {
-;CHECK:  bic {{v[0-9]+}}.8h, #0x10, lsl #8
+; CHECK-LABEL: bicimm8h_lsl8_a:
+; CHECK:  bic {{v[0-9]+}}.8h, #{{0x10|16}}, lsl #8
 	%tmp1 = and <8 x i16> %a, < i16 4294963199, i16  4294963199, i16  4294963199, i16  4294963199,
    i16  4294963199, i16  4294963199, i16  4294963199, i16  4294963199>
 	ret <8 x i16> %tmp1
 }
 
 define <8 x i16> @bicimm8h_lsl8_b(<8 x i16> %a) {
-;CHECK:  bic {{v[0-9]+}}.8h, #0xff, lsl #8
+; CHECK-LABEL: bicimm8h_lsl8_b:
+; CHECK:  bic {{v[0-9]+}}.8h, #{{0xff|255}}, lsl #8
 	%tmp1 = and <8 x i16> %a, < i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
 	ret <8 x i16> %tmp1
 }
 
 define <2 x i32> @and2xi32(<2 x i32> %a, <2 x i32> %b) {
-;CHECK: and {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: and2xi32:
+; CHECK: and {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 	%tmp1 = and <2 x i32> %a, %b;
 	ret <2 x i32> %tmp1
 }
 
 define <4 x i16> @and4xi16(<4 x i16> %a, <4 x i16> %b) {
-;CHECK: and {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: and4xi16:
+; CHECK: and {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 	%tmp1 = and <4 x i16> %a, %b;
 	ret <4 x i16> %tmp1
 }
 
 define <1 x i64> @and1xi64(<1 x i64> %a, <1 x i64> %b) {
-;CHECK: and {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: and1xi64:
+; CHECK: and {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 	%tmp1 = and <1 x i64> %a, %b;
 	ret <1 x i64> %tmp1
 }
 
 define <4 x i32> @and4xi32(<4 x i32> %a, <4 x i32> %b) {
-;CHECK: and {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: and4xi32:
+; CHECK: and {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 	%tmp1 = and <4 x i32> %a, %b;
 	ret <4 x i32> %tmp1
 }
 
 define <8 x i16> @and8xi16(<8 x i16> %a, <8 x i16> %b) {
-;CHECK: and {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: and8xi16:
+; CHECK: and {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 	%tmp1 = and <8 x i16> %a, %b;
 	ret <8 x i16> %tmp1
 }
 
 define <2 x i64> @and2xi64(<2 x i64> %a, <2 x i64> %b) {
-;CHECK: and {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: and2xi64:
+; CHECK: and {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 	%tmp1 = and <2 x i64> %a, %b;
 	ret <2 x i64> %tmp1
 }
 
 define <2 x i32> @orr2xi32(<2 x i32> %a, <2 x i32> %b) {
-;CHECK: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: orr2xi32:
+; CHECK: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 	%tmp1 = or <2 x i32> %a, %b;
 	ret <2 x i32> %tmp1
 }
 
 define <4 x i16> @orr4xi16(<4 x i16> %a, <4 x i16> %b) {
-;CHECK: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: orr4xi16:
+; CHECK: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 	%tmp1 = or <4 x i16> %a, %b;
 	ret <4 x i16> %tmp1
 }
 
 define <1 x i64> @orr1xi64(<1 x i64> %a, <1 x i64> %b) {
-;CHECK: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: orr1xi64:
+; CHECK: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 	%tmp1 = or <1 x i64> %a, %b;
 	ret <1 x i64> %tmp1
 }
 
 define <4 x i32> @orr4xi32(<4 x i32> %a, <4 x i32> %b) {
-;CHECK: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: orr4xi32:
+; CHECK: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 	%tmp1 = or <4 x i32> %a, %b;
 	ret <4 x i32> %tmp1
 }
 
 define <8 x i16> @orr8xi16(<8 x i16> %a, <8 x i16> %b) {
-;CHECK: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: orr8xi16:
+; CHECK: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 	%tmp1 = or <8 x i16> %a, %b;
 	ret <8 x i16> %tmp1
 }
 
 define <2 x i64> @orr2xi64(<2 x i64> %a, <2 x i64> %b) {
-;CHECK: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: orr2xi64:
+; CHECK: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 	%tmp1 = or <2 x i64> %a, %b;
 	ret <2 x i64> %tmp1
 }
 
 define <2 x i32> @eor2xi32(<2 x i32> %a, <2 x i32> %b) {
-;CHECK: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: eor2xi32:
+; CHECK: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 	%tmp1 = xor <2 x i32> %a, %b;
 	ret <2 x i32> %tmp1
 }
 
 define <4 x i16> @eor4xi16(<4 x i16> %a, <4 x i16> %b) {
-;CHECK: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: eor4xi16:
+; CHECK: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 	%tmp1 = xor <4 x i16> %a, %b;
 	ret <4 x i16> %tmp1
 }
 
 define <1 x i64> @eor1xi64(<1 x i64> %a, <1 x i64> %b) {
-;CHECK: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: eor1xi64:
+; CHECK: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 	%tmp1 = xor <1 x i64> %a, %b;
 	ret <1 x i64> %tmp1
 }
 
 define <4 x i32> @eor4xi32(<4 x i32> %a, <4 x i32> %b) {
-;CHECK: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: eor4xi32:
+; CHECK: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 	%tmp1 = xor <4 x i32> %a, %b;
 	ret <4 x i32> %tmp1
 }
 
 define <8 x i16> @eor8xi16(<8 x i16> %a, <8 x i16> %b) {
-;CHECK: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: eor8xi16:
+; CHECK: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 	%tmp1 = xor <8 x i16> %a, %b;
 	ret <8 x i16> %tmp1
 }
 
 define <2 x i64> @eor2xi64(<2 x i64> %a, <2 x i64> %b) {
-;CHECK: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: eor2xi64:
+; CHECK: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 	%tmp1 = xor <2 x i64> %a, %b;
 	ret <2 x i64> %tmp1
 }
 
 
 define <2 x i32> @bic2xi32(<2 x i32> %a, <2 x i32> %b)  {
-;CHECK:  bic {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: bic2xi32:
+; CHECK:  bic {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
   %tmp1 = xor <2 x i32> %b, < i32 -1, i32 -1 >
   %tmp2 = and <2 x i32> %a, %tmp1
   ret <2 x i32> %tmp2
 }
 
 define <4 x i16> @bic4xi16(<4 x i16> %a, <4 x i16> %b)  {
-;CHECK:  bic {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: bic4xi16:
+; CHECK:  bic {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
   %tmp1 = xor <4 x i16> %b, < i16 -1, i16 -1, i16 -1, i16-1 >
   %tmp2 = and <4 x i16> %a, %tmp1
   ret <4 x i16> %tmp2
 }
 
 define <1 x i64> @bic1xi64(<1 x i64> %a, <1 x i64> %b)  {
-;CHECK:  bic {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: bic1xi64:
+; CHECK:  bic {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
   %tmp1 = xor <1 x i64> %b, < i64 -1>
   %tmp2 = and <1 x i64> %a, %tmp1
   ret <1 x i64> %tmp2
 }
 
 define <4 x i32> @bic4xi32(<4 x i32> %a, <4 x i32> %b)  {
-;CHECK:  bic {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: bic4xi32:
+; CHECK:  bic {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
   %tmp1 = xor <4 x i32> %b, < i32 -1, i32 -1, i32 -1, i32 -1>
   %tmp2 = and <4 x i32> %a, %tmp1
   ret <4 x i32> %tmp2
 }
 
 define <8 x i16> @bic8xi16(<8 x i16> %a, <8 x i16> %b)  {
-;CHECK:  bic {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: bic8xi16:
+; CHECK:  bic {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
   %tmp1 = xor <8 x i16> %b, < i16 -1, i16 -1, i16 -1, i16-1, i16 -1, i16 -1, i16 -1, i16 -1 >
   %tmp2 = and <8 x i16> %a, %tmp1
   ret <8 x i16> %tmp2
 }
 
 define <2 x i64> @bic2xi64(<2 x i64> %a, <2 x i64> %b)  {
-;CHECK:  bic {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: bic2xi64:
+; CHECK:  bic {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
   %tmp1 = xor <2 x i64> %b, < i64 -1, i64 -1>
   %tmp2 = and <2 x i64> %a, %tmp1
   ret <2 x i64> %tmp2
 }
 
 define <2 x i32> @orn2xi32(<2 x i32> %a, <2 x i32> %b)  {
-;CHECK:  orn {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: orn2xi32:
+; CHECK:  orn {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
   %tmp1 = xor <2 x i32> %b, < i32 -1, i32 -1 >
   %tmp2 = or <2 x i32> %a, %tmp1
   ret <2 x i32> %tmp2
 }
 
 define <4 x i16> @orn4xi16(<4 x i16> %a, <4 x i16> %b)  {
-;CHECK:  orn {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: orn4xi16:
+; CHECK:  orn {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
   %tmp1 = xor <4 x i16> %b, < i16 -1, i16 -1, i16 -1, i16-1 >
   %tmp2 = or <4 x i16> %a, %tmp1
   ret <4 x i16> %tmp2
 }
 
 define <1 x i64> @orn1xi64(<1 x i64> %a, <1 x i64> %b)  {
-;CHECK:  orn {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: orn1xi64:
+; CHECK:  orn {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
   %tmp1 = xor <1 x i64> %b, < i64 -1>
   %tmp2 = or <1 x i64> %a, %tmp1
   ret <1 x i64> %tmp2
 }
 
 define <4 x i32> @orn4xi32(<4 x i32> %a, <4 x i32> %b)  {
-;CHECK:  orn {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: orn4xi32:
+; CHECK:  orn {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
   %tmp1 = xor <4 x i32> %b, < i32 -1, i32 -1, i32 -1, i32 -1>
   %tmp2 = or <4 x i32> %a, %tmp1
   ret <4 x i32> %tmp2
 }
 
 define <8 x i16> @orn8xi16(<8 x i16> %a, <8 x i16> %b)  {
-;CHECK:  orn {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: orn8xi16:
+; CHECK:  orn {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
   %tmp1 = xor <8 x i16> %b, < i16 -1, i16 -1, i16 -1, i16-1, i16 -1, i16 -1, i16 -1, i16 -1 >
   %tmp2 = or <8 x i16> %a, %tmp1
   ret <8 x i16> %tmp2
 }
 
 define <2 x i64> @orn2xi64(<2 x i64> %a, <2 x i64> %b)  {
-;CHECK:  orn {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: orn2xi64:
+; CHECK:  orn {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
   %tmp1 = xor <2 x i64> %b, < i64 -1, i64 -1>
   %tmp2 = or <2 x i64> %a, %tmp1
   ret <2 x i64> %tmp2
 }
 
 define <2 x i32> @bsl2xi32_const(<2 x i32> %a, <2 x i32> %b)  {
-;CHECK:  bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: bsl2xi32_const:
+; CHECK:  bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 	%tmp1 = and <2 x i32> %a, < i32 -1, i32 0 >
 	%tmp2 = and <2 x i32> %b, < i32 0, i32 -1 >
 	%tmp3 = or <2 x i32> %tmp1, %tmp2
@@ -455,7 +526,8 @@ define <2 x i32> @bsl2xi32_const(<2 x i32> %a, <2 x i32> %b)  {
 
 
 define <4 x i16> @bsl4xi16_const(<4 x i16> %a, <4 x i16> %b)  {
-;CHECK:  bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: bsl4xi16_const:
+; CHECK:  bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 	%tmp1 = and <4 x i16> %a, < i16 -1, i16 0, i16 -1,i16 0 >
 	%tmp2 = and <4 x i16> %b, < i16 0, i16 -1,i16 0, i16 -1 >
 	%tmp3 = or <4 x i16> %tmp1, %tmp2
@@ -463,7 +535,8 @@ define <4 x i16> @bsl4xi16_const(<4 x i16> %a, <4 x i16> %b)  {
 }
 
 define <1 x i64> @bsl1xi64_const(<1 x i64> %a, <1 x i64> %b)  {
-;CHECK:  bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: bsl1xi64_const:
+; CHECK:  bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 	%tmp1 = and <1 x i64> %a, < i64 -16 >
 	%tmp2 = and <1 x i64> %b, < i64 15 >
 	%tmp3 = or <1 x i64> %tmp1, %tmp2
@@ -471,7 +544,8 @@ define <1 x i64> @bsl1xi64_const(<1 x i64> %a, <1 x i64> %b)  {
 }
 
 define <4 x i32> @bsl4xi32_const(<4 x i32> %a, <4 x i32> %b)  {
-;CHECK:  bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: bsl4xi32_const:
+; CHECK:  bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 	%tmp1 = and <4 x i32> %a, < i32 -1, i32 0, i32 -1, i32 0 >
 	%tmp2 = and <4 x i32> %b, < i32 0, i32 -1, i32 0, i32 -1 >
 	%tmp3 = or <4 x i32> %tmp1, %tmp2
@@ -479,7 +553,8 @@ define <4 x i32> @bsl4xi32_const(<4 x i32> %a, <4 x i32> %b)  {
 }
 
 define <8 x i16> @bsl8xi16_const(<8 x i16> %a, <8 x i16> %b)  {
-;CHECK:  bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: bsl8xi16_const:
+; CHECK:  bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 	%tmp1 = and <8 x i16> %a, < i16 -1, i16 -1, i16 0,i16 0, i16 -1, i16 -1, i16 0,i16 0 >
 	%tmp2 = and <8 x i16> %b, < i16 0, i16 0, i16 -1, i16 -1, i16 0, i16 0, i16 -1, i16 -1 >
 	%tmp3 = or <8 x i16> %tmp1, %tmp2
@@ -487,7 +562,8 @@ define <8 x i16> @bsl8xi16_const(<8 x i16> %a, <8 x i16> %b)  {
 }
 
 define <2 x i64> @bsl2xi64_const(<2 x i64> %a, <2 x i64> %b)  {
-;CHECK:  bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: bsl2xi64_const:
+; CHECK:  bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 	%tmp1 = and <2 x i64> %a, < i64 -1, i64 0 >
 	%tmp2 = and <2 x i64> %b, < i64 0, i64 -1 >
 	%tmp3 = or <2 x i64> %tmp1, %tmp2
@@ -496,7 +572,8 @@ define <2 x i64> @bsl2xi64_const(<2 x i64> %a, <2 x i64> %b)  {
 
 
 define <8 x i8> @bsl8xi8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) {
-;CHECK:  bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: bsl8xi8:
+; CHECK:  bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
   %1 = and <8 x i8> %v1, %v2
   %2 = xor <8 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
   %3 = and <8 x i8> %2, %v3
@@ -505,7 +582,8 @@ define <8 x i8> @bsl8xi8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) {
 }
 
 define <4 x i16> @bsl4xi16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) {
-;CHECK:  bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: bsl4xi16:
+; CHECK:  bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
   %1 = and <4 x i16> %v1, %v2
   %2 = xor <4 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1>
   %3 = and <4 x i16> %2, %v3
@@ -514,7 +592,8 @@ define <4 x i16> @bsl4xi16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) {
 }
 
 define <2 x i32> @bsl2xi32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) {
-;CHECK:  bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: bsl2xi32:
+; CHECK:  bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
   %1 = and <2 x i32> %v1, %v2
   %2 = xor <2 x i32> %v1, <i32 -1, i32 -1>
   %3 = and <2 x i32> %2, %v3
@@ -523,7 +602,8 @@ define <2 x i32> @bsl2xi32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) {
 }
 
 define <1 x i64> @bsl1xi64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) {
-;CHECK:  bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: bsl1xi64:
+; CHECK:  bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
   %1 = and <1 x i64> %v1, %v2
   %2 = xor <1 x i64> %v1, <i64 -1>
   %3 = and <1 x i64> %2, %v3
@@ -532,7 +612,8 @@ define <1 x i64> @bsl1xi64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) {
 }
 
 define <16 x i8> @bsl16xi8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) {
-;CHECK:  bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: bsl16xi8:
+; CHECK:  bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
   %1 = and <16 x i8> %v1, %v2
   %2 = xor <16 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
   %3 = and <16 x i8> %2, %v3
@@ -541,7 +622,8 @@ define <16 x i8> @bsl16xi8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) {
 }
 
 define <8 x i16> @bsl8xi16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) {
-;CHECK:  bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: bsl8xi16:
+; CHECK:  bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
   %1 = and <8 x i16> %v1, %v2
   %2 = xor <8 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
   %3 = and <8 x i16> %2, %v3
@@ -550,7 +632,8 @@ define <8 x i16> @bsl8xi16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) {
 }
 
 define <4 x i32> @bsl4xi32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
-;CHECK:  bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: bsl4xi32:
+; CHECK:  bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
   %1 = and <4 x i32> %v1, %v2
   %2 = xor <4 x i32> %v1, <i32 -1, i32 -1, i32 -1, i32 -1>
   %3 = and <4 x i32> %2, %v3
@@ -559,56 +642,63 @@ define <4 x i32> @bsl4xi32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
 }
 
 define <8 x i8> @vselect_v8i8(<8 x i8> %a) {
-;CHECK:  movi	 {{d[0-9]+}}, #0xffff
-;CHECK-NEXT:  bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: vselect_v8i8:
+; CHECK:  movi {{d[0-9]+}}, #0x{{0*}}ffff
+; CHECK-NEXT:  {{bsl v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b|and v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b}}
   %b = select <8 x i1> <i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <8 x i8> %a, <8 x i8> <i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
   ret <8 x i8> %b
 }
 
 define <4 x i16> @vselect_v4i16(<4 x i16> %a) {
-;CHECK:  movi	 {{d[0-9]+}}, #0xffff
-;CHECK-NEXT:  bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: vselect_v4i16:
+; CHECK:  movi {{d[0-9]+}}, #0x{{0*}}ffff
+; CHECK-NEXT:  {{bsl v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b|and v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b}}
   %b = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i16> %a, <4 x i16> <i16 undef, i16 0, i16 0, i16 0>
   ret <4 x i16> %b
 }
 
 define <8 x i8> @vselect_cmp_ne(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) {
-;CHECK:  cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-;CHECK-NEXT:  not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-;CHECK-NEXT:  bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: vselect_cmp_ne:
+; CHECK:  cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-NEXT:  {{mvn|not}} {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-NEXT:  bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
   %cmp = icmp ne <8 x i8> %a, %b
   %d = select <8 x i1> %cmp, <8 x i8> %b, <8 x i8> %c
   ret <8 x i8> %d
 }
 
 define <8 x i8> @vselect_cmp_eq(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) {
-;CHECK:  cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-;CHECK-NEXT:  bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: vselect_cmp_eq:
+; CHECK:  cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-NEXT:  bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
   %cmp = icmp eq <8 x i8> %a, %b
   %d = select <8 x i1> %cmp, <8 x i8> %b, <8 x i8> %c
   ret <8 x i8> %d
 }
 
 define <8 x i8> @vselect_cmpz_ne(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) {
-;CHECK:  cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0
-;CHECK-NEXT:  not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-;CHECK-NEXT:  bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: vselect_cmpz_ne:
+; CHECK:  cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0
+; CHECK-NEXT:  {{mvn|not}} {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-NEXT:  bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
   %cmp = icmp ne <8 x i8> %a, zeroinitializer
   %d = select <8 x i1> %cmp, <8 x i8> %b, <8 x i8> %c
   ret <8 x i8> %d
 }
 
 define <8 x i8> @vselect_cmpz_eq(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) {
-;CHECK:  cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0
-;CHECK-NEXT:  bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: vselect_cmpz_eq:
+; CHECK:  cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0
+; CHECK-NEXT:  bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
   %cmp = icmp eq <8 x i8> %a, zeroinitializer
   %d = select <8 x i1> %cmp, <8 x i8> %b, <8 x i8> %c
   ret <8 x i8> %d
 }
 
 define <8 x i8> @vselect_tst(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) {
-;CHECK:  cmtst {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-;CHECK-NEXT:  bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: vselect_tst:
+; CHECK:  cmtst {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-NEXT:  bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 	%tmp3 = and <8 x i8> %a, %b
 	%tmp4 = icmp ne <8 x i8> %tmp3, zeroinitializer
   %d = select <8 x i1> %tmp4, <8 x i8> %b, <8 x i8> %c
@@ -616,7 +706,8 @@ define <8 x i8> @vselect_tst(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) {
 }
 
 define <2 x i64> @bsl2xi64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3) {
-;CHECK:  bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: bsl2xi64:
+; CHECK:  bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
   %1 = and <2 x i64> %v1, %v2
   %2 = xor <2 x i64> %v1, <i64 -1, i64 -1>
   %3 = and <2 x i64> %2, %v3
@@ -625,458 +716,534 @@ define <2 x i64> @bsl2xi64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3) {
 }
 
 define <8 x i8> @orrimm8b_as_orrimm4h_lsl0(<8 x i8> %a) {
-;CHECK:  orr {{v[0-9]+}}.4h, #0xff
+; CHECK-LABEL: orrimm8b_as_orrimm4h_lsl0:
+; CHECK:  orr {{v[0-9]+}}.4h, #{{0xff|255}}
   %val = or <8 x i8> %a, <i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0>
   ret <8 x i8> %val
 }
 
 define <8 x i8> @orrimm8b_as_orimm4h_lsl8(<8 x i8> %a) {
-;CHECK:  orr {{v[0-9]+}}.4h, #0xff, lsl #8
+; CHECK-LABEL: orrimm8b_as_orimm4h_lsl8:
+; CHECK:  orr {{v[0-9]+}}.4h, #{{0xff|255}}, lsl #8
   %val = or <8 x i8> %a, <i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255>
   ret <8 x i8> %val
 }
 
 define <16 x i8> @orimm16b_as_orrimm8h_lsl0(<16 x i8> %a) {
-;CHECK:  orr {{v[0-9]+}}.8h, #0xff
+; CHECK-LABEL: orimm16b_as_orrimm8h_lsl0:
+; CHECK:  orr {{v[0-9]+}}.8h, #{{0xff|255}}
   %val = or <16 x i8> %a, <i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0>
   ret <16 x i8> %val
 }
 
 define <16 x i8> @orimm16b_as_orrimm8h_lsl8(<16 x i8> %a) {
-;CHECK:  orr {{v[0-9]+}}.8h, #0xff, lsl #8
+; CHECK-LABEL: orimm16b_as_orrimm8h_lsl8:
+; CHECK:  orr {{v[0-9]+}}.8h, #{{0xff|255}}, lsl #8
   %val = or <16 x i8> %a, <i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255>
   ret <16 x i8> %val
 }
 
 define <8 x i8> @and8imm2s_lsl0(<8 x i8> %a) {
-;CHECK:  bic {{v[0-9]+}}.2s, #0xff
+; CHECK-LABEL: and8imm2s_lsl0:
+; CHECK:  bic {{v[0-9]+}}.2s, #{{0xff|255}}
 	%tmp1 = and <8 x i8> %a, < i8 0, i8 255, i8 255, i8 255, i8 0, i8 255, i8 255, i8 255>
 	ret <8 x i8> %tmp1
 }
 
 define <8 x i8> @and8imm2s_lsl8(<8 x i8> %a) {
-;CHECK:  bic {{v[0-9]+}}.2s, #0xff, lsl #8
+; CHECK-LABEL: and8imm2s_lsl8:
+; CHECK:  bic {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #8
 	%tmp1 = and <8 x i8> %a, < i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 255, i8 255>
 	ret <8 x i8> %tmp1
 }
 
 define <8 x i8> @and8imm2s_lsl16(<8 x i8> %a) {
-;CHECK:  bic {{v[0-9]+}}.2s, #0xff, lsl #16
+; CHECK-LABEL: and8imm2s_lsl16:
+; CHECK:  bic {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #16
 	%tmp1 = and <8 x i8> %a, < i8 255, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 255>
 	ret <8 x i8> %tmp1
 }
 
 define <8 x i8> @and8imm2s_lsl24(<8 x i8> %a) {
-;CHECK:  bic {{v[0-9]+}}.2s, #0xfe, lsl #24
+; CHECK-LABEL: and8imm2s_lsl24:
+; CHECK:  bic {{v[0-9]+}}.2s, #{{0xfe|254}}, lsl #24
 	%tmp1 = and <8 x i8> %a, < i8 255, i8 255, i8 255, i8 1, i8 255, i8 255, i8 255, i8 1>
 	ret <8 x i8> %tmp1
 }
 
 define <4 x i16> @and16imm2s_lsl0(<4 x i16> %a) {
-;CHECK:  bic {{v[0-9]+}}.2s, #0xff
+; CHECK-LABEL: and16imm2s_lsl0:
+; CHECK:  bic {{v[0-9]+}}.2s, #{{0xff|255}}
 	%tmp1 = and <4 x i16> %a, < i16 65280, i16 65535, i16 65280, i16 65535>
 	ret <4 x i16> %tmp1
 }
 
 define <4 x i16> @and16imm2s_lsl8(<4 x i16> %a) {
-;CHECK:  bic {{v[0-9]+}}.2s, #0xff, lsl #8
+; CHECK-LABEL: and16imm2s_lsl8:
+; CHECK:  bic {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #8
 	%tmp1 = and <4 x i16> %a, < i16 255, i16 65535, i16 255, i16 65535>
 	ret <4 x i16> %tmp1
 }
 
 define <4 x i16> @and16imm2s_lsl16(<4 x i16> %a) {
-;CHECK:  bic {{v[0-9]+}}.2s, #0xff, lsl #16
+; CHECK-LABEL: and16imm2s_lsl16:
+; CHECK:  bic {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #16
 	%tmp1 = and <4 x i16> %a, < i16 65535, i16 65280, i16 65535, i16 65280>
 	ret <4 x i16> %tmp1
 }
 
 define <4 x i16> @and16imm2s_lsl24(<4 x i16> %a) {
-;CHECK:  bic {{v[0-9]+}}.2s, #0xfe, lsl #24
+; CHECK-LABEL: and16imm2s_lsl24:
+; CHECK:  bic {{v[0-9]+}}.2s, #{{0xfe|254}}, lsl #24
 	%tmp1 = and <4 x i16> %a, < i16 65535, i16 511, i16 65535, i16 511>
 	ret <4 x i16> %tmp1
 }
 
 
 define <1 x i64> @and64imm2s_lsl0(<1 x i64> %a) {
-;CHECK:  bic {{v[0-9]+}}.2s, #0xff
+; CHECK-LABEL: and64imm2s_lsl0:
+; CHECK:  bic {{v[0-9]+}}.2s, #{{0xff|255}}
 	%tmp1 = and <1 x i64> %a, < i64 -1095216660736>
 	ret <1 x i64> %tmp1
 }
 
 define <1 x i64> @and64imm2s_lsl8(<1 x i64> %a) {
-;CHECK:  bic {{v[0-9]+}}.2s, #0xff, lsl #8
+; CHECK-LABEL: and64imm2s_lsl8:
+; CHECK:  bic {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #8
 	%tmp1 = and <1 x i64> %a, < i64 -280375465148161>
 	ret <1 x i64> %tmp1
 }
 
 define <1 x i64> @and64imm2s_lsl16(<1 x i64> %a) {
-;CHECK:  bic {{v[0-9]+}}.2s, #0xff, lsl #16
+; CHECK-LABEL: and64imm2s_lsl16:
+; CHECK:  bic {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #16
 	%tmp1 = and <1 x i64> %a, < i64 -71776119077928961>
 	ret <1 x i64> %tmp1
 }
 
 define <1 x i64> @and64imm2s_lsl24(<1 x i64> %a) {
-;CHECK:  bic {{v[0-9]+}}.2s, #0xfe, lsl #24
+; CHECK-LABEL: and64imm2s_lsl24:
+; CHECK:  bic {{v[0-9]+}}.2s, #{{0xfe|254}}, lsl #24
 	%tmp1 = and <1 x i64> %a, < i64 144115183814443007>
 	ret <1 x i64> %tmp1
 }
 
 define <16 x i8> @and8imm4s_lsl0(<16 x i8> %a) {
-;CHECK:  bic {{v[0-9]+}}.4s, #0xff
+; CHECK-LABEL: and8imm4s_lsl0:
+; CHECK:  bic {{v[0-9]+}}.4s, #{{0xff|255}}
 	%tmp1 = and <16 x i8> %a, < i8 0, i8 255, i8 255, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 255, i8 255, i8 255>
 	ret <16 x i8> %tmp1
 }
 
 define <16 x i8> @and8imm4s_lsl8(<16 x i8> %a) {
-;CHECK:  bic {{v[0-9]+}}.4s, #0xff, lsl #8
+; CHECK-LABEL: and8imm4s_lsl8:
+; CHECK:  bic {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #8
 	%tmp1 = and <16 x i8> %a, < i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 255, i8 255>
 	ret <16 x i8> %tmp1
 }
 
 define <16 x i8> @and8imm4s_lsl16(<16 x i8> %a) {
-;CHECK:  bic {{v[0-9]+}}.4s, #0xff, lsl #16
+; CHECK-LABEL: and8imm4s_lsl16:
+; CHECK:  bic {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #16
 	%tmp1 = and <16 x i8> %a, < i8 255, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 255>
 	ret <16 x i8> %tmp1
 }
 
 define <16 x i8> @and8imm4s_lsl24(<16 x i8> %a) {
-;CHECK:  bic {{v[0-9]+}}.4s, #0xfe, lsl #24
+; CHECK-LABEL: and8imm4s_lsl24:
+; CHECK:  bic {{v[0-9]+}}.4s, #{{0xfe|254}}, lsl #24
 	%tmp1 = and <16 x i8> %a, < i8 255, i8 255, i8 255, i8 1, i8 255, i8 255, i8 255, i8 1, i8 255, i8 255, i8 255, i8 1, i8 255, i8 255, i8 255, i8 1>
 	ret <16 x i8> %tmp1
 }
 
 define <8 x i16> @and16imm4s_lsl0(<8 x i16> %a) {
-;CHECK:  bic {{v[0-9]+}}.4s, #0xff
+; CHECK-LABEL: and16imm4s_lsl0:
+; CHECK:  bic {{v[0-9]+}}.4s, #{{0xff|255}}
 	%tmp1 = and <8 x i16> %a, < i16 65280, i16 65535, i16 65280, i16 65535, i16 65280, i16 65535, i16 65280, i16 65535>
 	ret <8 x i16> %tmp1
 }
 
 define <8 x i16> @and16imm4s_lsl8(<8 x i16> %a) {
-;CHECK:  bic {{v[0-9]+}}.4s, #0xff, lsl #8
+; CHECK-LABEL: and16imm4s_lsl8:
+; CHECK:  bic {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #8
 	%tmp1 = and <8 x i16> %a, < i16 255, i16 65535, i16 255, i16 65535, i16 255, i16 65535, i16 255, i16 65535>
 	ret <8 x i16> %tmp1
 }
 
 define <8 x i16> @and16imm4s_lsl16(<8 x i16> %a) {
-;CHECK:  bic {{v[0-9]+}}.4s, #0xff, lsl #16
+; CHECK-LABEL: and16imm4s_lsl16:
+; CHECK:  bic {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #16
 	%tmp1 = and <8 x i16> %a, < i16 65535, i16 65280, i16 65535, i16 65280, i16 65535, i16 65280, i16 65535, i16 65280>
 	ret <8 x i16> %tmp1
 }
 
 define <8 x i16> @and16imm4s_lsl24(<8 x i16> %a) {
-;CHECK:  bic {{v[0-9]+}}.4s, #0xfe, lsl #24
+; CHECK-LABEL: and16imm4s_lsl24:
+; CHECK:  bic {{v[0-9]+}}.4s, #{{0xfe|254}}, lsl #24
 	%tmp1 = and <8 x i16> %a, < i16 65535, i16 511, i16 65535, i16 511, i16 65535, i16 511, i16 65535, i16 511>
 	ret <8 x i16> %tmp1
 }
 
 define <2 x i64> @and64imm4s_lsl0(<2 x i64> %a) {
-;CHECK:  bic {{v[0-9]+}}.4s, #0xff
+; CHECK-LABEL: and64imm4s_lsl0:
+; CHECK:  bic {{v[0-9]+}}.4s, #{{0xff|255}}
 	%tmp1 = and <2 x i64> %a, < i64 -1095216660736, i64 -1095216660736>
 	ret <2 x i64> %tmp1
 }
 
 define <2 x i64> @and64imm4s_lsl8(<2 x i64> %a) {
-;CHECK:  bic {{v[0-9]+}}.4s, #0xff, lsl #8
+; CHECK-LABEL: and64imm4s_lsl8:
+; CHECK:  bic {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #8
 	%tmp1 = and <2 x i64> %a, < i64 -280375465148161, i64 -280375465148161>
 	ret <2 x i64> %tmp1
 }
 
 define <2 x i64> @and64imm4s_lsl16(<2 x i64> %a) {
-;CHECK:  bic {{v[0-9]+}}.4s, #0xff, lsl #16
+; CHECK-LABEL: and64imm4s_lsl16:
+; CHECK:  bic {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #16
 	%tmp1 = and <2 x i64> %a, < i64 -71776119077928961, i64 -71776119077928961>
 	ret <2 x i64> %tmp1
 }
 
 define <2 x i64> @and64imm4s_lsl24(<2 x i64> %a) {
-;CHECK:  bic {{v[0-9]+}}.4s, #0xfe, lsl #24
+; CHECK-LABEL: and64imm4s_lsl24:
+; CHECK:  bic {{v[0-9]+}}.4s, #{{0xfe|254}}, lsl #24
 	%tmp1 = and <2 x i64> %a, < i64 144115183814443007, i64 144115183814443007>
 	ret <2 x i64> %tmp1
 }
 
 define <8 x i8> @and8imm4h_lsl0(<8 x i8> %a) {
-;CHECK:  bic {{v[0-9]+}}.4h, #0xff
+; CHECK-LABEL: and8imm4h_lsl0:
+; CHECK:  bic {{v[0-9]+}}.4h, #{{0xff|255}}
 	%tmp1 = and <8 x i8> %a, < i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255>
 	ret <8 x i8> %tmp1
 }
 
 define <8 x i8> @and8imm4h_lsl8(<8 x i8> %a) {
-;CHECK:  bic {{v[0-9]+}}.4h, #0xff, lsl #8
+; CHECK-LABEL: and8imm4h_lsl8:
+; CHECK:  bic {{v[0-9]+}}.4h, #{{0xff|255}}, lsl #8
 	%tmp1 = and <8 x i8> %a, < i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0>
 	ret <8 x i8> %tmp1
 }
 
 define <2 x i32> @and16imm4h_lsl0(<2 x i32> %a) {
-;CHECK:  bic {{v[0-9]+}}.4h, #0xff
+; CHECK-LABEL: and16imm4h_lsl0:
+; CHECK:  bic {{v[0-9]+}}.4h, #{{0xff|255}}
 	%tmp1 = and <2 x i32> %a, < i32 4278255360, i32 4278255360>
 	ret <2 x i32> %tmp1
 }
 
 define <2 x i32> @and16imm4h_lsl8(<2 x i32> %a) {
-;CHECK:  bic {{v[0-9]+}}.4h, #0xff, lsl #8
+; CHECK-LABEL: and16imm4h_lsl8:
+; CHECK:  bic {{v[0-9]+}}.4h, #{{0xff|255}}, lsl #8
 	%tmp1 = and <2 x i32> %a, < i32 16711935, i32 16711935>
 	ret <2 x i32> %tmp1
 }
 
 define <1 x i64> @and64imm4h_lsl0(<1 x i64> %a) {
-;CHECK:  bic {{v[0-9]+}}.4h, #0xff
+; CHECK-LABEL: and64imm4h_lsl0:
+; CHECK:  bic {{v[0-9]+}}.4h, #{{0xff|255}}
 	%tmp1 = and <1 x i64> %a, < i64 -71777214294589696>
 	ret <1 x i64> %tmp1
 }
 
 define <1 x i64> @and64imm4h_lsl8(<1 x i64> %a) {
-;CHECK:  bic {{v[0-9]+}}.4h, #0xff, lsl #8
+; CHECK-LABEL: and64imm4h_lsl8:
+; CHECK:  bic {{v[0-9]+}}.4h, #{{0xff|255}}, lsl #8
 	%tmp1 = and <1 x i64> %a, < i64 71777214294589695>
 	ret <1 x i64> %tmp1
 }
 
 define <16 x i8> @and8imm8h_lsl0(<16 x i8> %a) {
-;CHECK:  bic {{v[0-9]+}}.8h, #0xff
+; CHECK-LABEL: and8imm8h_lsl0:
+; CHECK:  bic {{v[0-9]+}}.8h, #{{0xff|255}}
 	%tmp1 = and <16 x i8> %a, < i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255 >
 	ret <16 x i8> %tmp1
 }
 
 define <16 x i8> @and8imm8h_lsl8(<16 x i8> %a) {
-;CHECK:  bic {{v[0-9]+}}.8h, #0xff, lsl #8
+; CHECK-LABEL: and8imm8h_lsl8:
+; CHECK:  bic {{v[0-9]+}}.8h, #{{0xff|255}}, lsl #8
 	%tmp1 = and <16 x i8> %a, <i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0 >
 	ret <16 x i8> %tmp1
 }
 
 define <4 x i32> @and16imm8h_lsl0(<4 x i32> %a) {
-;CHECK:  bic {{v[0-9]+}}.8h, #0xff
+; CHECK-LABEL: and16imm8h_lsl0:
+; CHECK:  bic {{v[0-9]+}}.8h, #{{0xff|255}}
 	%tmp1 = and <4 x i32> %a, < i32 4278255360, i32 4278255360, i32 4278255360, i32 4278255360>
 	ret <4 x i32> %tmp1
 }
 
 define <4 x i32> @and16imm8h_lsl8(<4 x i32> %a) {
-;CHECK:  bic {{v[0-9]+}}.8h, #0xff, lsl #8
+; CHECK-LABEL: and16imm8h_lsl8:
+; CHECK:  bic {{v[0-9]+}}.8h, #{{0xff|255}}, lsl #8
 	%tmp1 = and <4 x i32> %a, < i32 16711935, i32 16711935, i32 16711935, i32 16711935>
 	ret <4 x i32> %tmp1
 }
 
 define <2 x i64> @and64imm8h_lsl0(<2 x i64> %a) {
-;CHECK:  bic {{v[0-9]+}}.8h, #0xff
+; CHECK-LABEL: and64imm8h_lsl0:
+; CHECK:  bic {{v[0-9]+}}.8h, #{{0xff|255}}
 	%tmp1 = and <2 x i64> %a, < i64 -71777214294589696, i64 -71777214294589696>
 	ret <2 x i64> %tmp1
 }
 
 define <2 x i64> @and64imm8h_lsl8(<2 x i64> %a) {
-;CHECK:  bic {{v[0-9]+}}.8h, #0xff, lsl #8
+; CHECK-LABEL: and64imm8h_lsl8:
+; CHECK:  bic {{v[0-9]+}}.8h, #{{0xff|255}}, lsl #8
 	%tmp1 = and <2 x i64> %a, < i64 71777214294589695, i64 71777214294589695>
 	ret <2 x i64> %tmp1
 }
 
 define <8 x i8> @orr8imm2s_lsl0(<8 x i8> %a) {
-;CHECK:  orr {{v[0-9]+}}.2s, #0xff
+; CHECK-LABEL: orr8imm2s_lsl0:
+; CHECK:  orr {{v[0-9]+}}.2s, #{{0xff|255}}
 	%tmp1 = or <8 x i8> %a, < i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0>
 	ret <8 x i8> %tmp1
 }
 
 define <8 x i8> @orr8imm2s_lsl8(<8 x i8> %a) {
-;CHECK:  orr {{v[0-9]+}}.2s, #0xff, lsl #8
+; CHECK-LABEL: orr8imm2s_lsl8:
+; CHECK:  orr {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #8
 	%tmp1 = or <8 x i8> %a, < i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0>
 	ret <8 x i8> %tmp1
 }
 
 define <8 x i8> @orr8imm2s_lsl16(<8 x i8> %a) {
-;CHECK:  orr {{v[0-9]+}}.2s, #0xff, lsl #16
+; CHECK-LABEL: orr8imm2s_lsl16:
+; CHECK:  orr {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #16
 	%tmp1 = or <8 x i8> %a, < i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0>
 	ret <8 x i8> %tmp1
 }
 
 define <8 x i8> @orr8imm2s_lsl24(<8 x i8> %a) {
-;CHECK:  orr {{v[0-9]+}}.2s, #0xff, lsl #24
+; CHECK-LABEL: orr8imm2s_lsl24:
+; CHECK:  orr {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #24
 	%tmp1 = or <8 x i8> %a, < i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255>
 	ret <8 x i8> %tmp1
 }
 
 define <4 x i16> @orr16imm2s_lsl0(<4 x i16> %a) {
-;CHECK:  orr {{v[0-9]+}}.2s, #0xff
+; CHECK-LABEL: orr16imm2s_lsl0:
+; CHECK:  orr {{v[0-9]+}}.2s, #{{0xff|255}}
 	%tmp1 = or <4 x i16> %a, < i16 255, i16 0, i16 255, i16 0>
 	ret <4 x i16> %tmp1
 }
 
 define <4 x i16> @orr16imm2s_lsl8(<4 x i16> %a) {
-;CHECK:  orr {{v[0-9]+}}.2s, #0xff, lsl #8
+; CHECK-LABEL: orr16imm2s_lsl8:
+; CHECK:  orr {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #8
 	%tmp1 = or <4 x i16> %a, < i16 65280, i16 0, i16 65280, i16 0>
 	ret <4 x i16> %tmp1
 }
 
 define <4 x i16> @orr16imm2s_lsl16(<4 x i16> %a) {
-;CHECK:  orr {{v[0-9]+}}.2s, #0xff, lsl #16
+; CHECK-LABEL: orr16imm2s_lsl16:
+; CHECK:  orr {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #16
 	%tmp1 = or <4 x i16> %a, < i16 0, i16 255, i16 0, i16 255>
 	ret <4 x i16> %tmp1
 }
 
 define <4 x i16> @orr16imm2s_lsl24(<4 x i16> %a) {
-;CHECK:  orr {{v[0-9]+}}.2s, #0xff, lsl #24
+; CHECK-LABEL: orr16imm2s_lsl24:
+; CHECK:  orr {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #24
 	%tmp1 = or <4 x i16> %a, < i16 0, i16 65280, i16 0, i16 65280>
 	ret <4 x i16> %tmp1
 }
 
 define <1 x i64> @orr64imm2s_lsl0(<1 x i64> %a) {
-;CHECK:  orr {{v[0-9]+}}.2s, #0xff
+; CHECK-LABEL: orr64imm2s_lsl0:
+; CHECK:  orr {{v[0-9]+}}.2s, #{{0xff|255}}
 	%tmp1 = or <1 x i64> %a, < i64 1095216660735>
 	ret <1 x i64> %tmp1
 }
 
 define <1 x i64> @orr64imm2s_lsl8(<1 x i64> %a) {
-;CHECK:  orr {{v[0-9]+}}.2s, #0xff, lsl #8
+; CHECK-LABEL: orr64imm2s_lsl8:
+; CHECK:  orr {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #8
 	%tmp1 = or <1 x i64> %a, < i64 280375465148160>
 	ret <1 x i64> %tmp1
 }
 
 define <1 x i64> @orr64imm2s_lsl16(<1 x i64> %a) {
-;CHECK:  orr {{v[0-9]+}}.2s, #0xff, lsl #16
+; CHECK-LABEL: orr64imm2s_lsl16:
+; CHECK:  orr {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #16
 	%tmp1 = or <1 x i64> %a, < i64 71776119077928960>
 	ret <1 x i64> %tmp1
 }
 
 define <1 x i64> @orr64imm2s_lsl24(<1 x i64> %a) {
-;CHECK:  orr {{v[0-9]+}}.2s, #0xff, lsl #24
+; CHECK-LABEL: orr64imm2s_lsl24:
+; CHECK:  orr {{v[0-9]+}}.2s, #{{0xff|255}}, lsl #24
 	%tmp1 = or <1 x i64> %a, < i64 -72057589759737856>
 	ret <1 x i64> %tmp1
 }
 
 define <16 x i8> @orr8imm4s_lsl0(<16 x i8> %a) {
-;CHECK:  orr {{v[0-9]+}}.4s, #0xff
+; CHECK-LABEL: orr8imm4s_lsl0:
+; CHECK:  orr {{v[0-9]+}}.4s, #{{0xff|255}}
 	%tmp1 = or <16 x i8> %a, < i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0>
 	ret <16 x i8> %tmp1
 }
 
 define <16 x i8> @orr8imm4s_lsl8(<16 x i8> %a) {
-;CHECK:  orr {{v[0-9]+}}.4s, #0xff, lsl #8
+; CHECK-LABEL: orr8imm4s_lsl8:
+; CHECK:  orr {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #8
 	%tmp1 = or <16 x i8> %a, < i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0>
 	ret <16 x i8> %tmp1
 }
 
 define <16 x i8> @orr8imm4s_lsl16(<16 x i8> %a) {
-;CHECK:  orr {{v[0-9]+}}.4s, #0xff, lsl #16
+; CHECK-LABEL: orr8imm4s_lsl16:
+; CHECK:  orr {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #16
 	%tmp1 = or <16 x i8> %a, < i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0>
 	ret <16 x i8> %tmp1
 }
 
 define <16 x i8> @orr8imm4s_lsl24(<16 x i8> %a) {
-;CHECK:  orr {{v[0-9]+}}.4s, #0xff, lsl #24
+; CHECK-LABEL: orr8imm4s_lsl24:
+; CHECK:  orr {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #24
 	%tmp1 = or <16 x i8> %a, < i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255>
 	ret <16 x i8> %tmp1
 }
 
 define <8 x i16> @orr16imm4s_lsl0(<8 x i16> %a) {
-;CHECK:  orr {{v[0-9]+}}.4s, #0xff
+; CHECK-LABEL: orr16imm4s_lsl0:
+; CHECK:  orr {{v[0-9]+}}.4s, #{{0xff|255}}
 	%tmp1 = or <8 x i16> %a, < i16 255, i16 0, i16 255, i16 0, i16 255, i16 0, i16 255, i16 0>
 	ret <8 x i16> %tmp1
 }
 
 define <8 x i16> @orr16imm4s_lsl8(<8 x i16> %a) {
-;CHECK:  orr {{v[0-9]+}}.4s, #0xff, lsl #8
+; CHECK-LABEL: orr16imm4s_lsl8:
+; CHECK:  orr {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #8
 	%tmp1 = or <8 x i16> %a, < i16 65280, i16 0, i16 65280, i16 0, i16 65280, i16 0, i16 65280, i16 0>
 	ret <8 x i16> %tmp1
 }
 
 define <8 x i16> @orr16imm4s_lsl16(<8 x i16> %a) {
-;CHECK:  orr {{v[0-9]+}}.4s, #0xff, lsl #16
+; CHECK-LABEL: orr16imm4s_lsl16:
+; CHECK:  orr {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #16
 	%tmp1 = or <8 x i16> %a, < i16 0, i16 255, i16 0, i16 255, i16 0, i16 255, i16 0, i16 255>
 	ret <8 x i16> %tmp1
 }
 
 define <8 x i16> @orr16imm4s_lsl24(<8 x i16> %a) {
-;CHECK:  orr {{v[0-9]+}}.4s, #0xff, lsl #24
+; CHECK-LABEL: orr16imm4s_lsl24:
+; CHECK:  orr {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #24
 	%tmp1 = or <8 x i16> %a, < i16 0, i16 65280, i16 0, i16 65280, i16 0, i16 65280, i16 0, i16 65280>
 	ret <8 x i16> %tmp1
 }
 
 define <2 x i64> @orr64imm4s_lsl0(<2 x i64> %a) {
-;CHECK:  orr {{v[0-9]+}}.4s, #0xff
+; CHECK-LABEL: orr64imm4s_lsl0:
+; CHECK:  orr {{v[0-9]+}}.4s, #{{0xff|255}}
 	%tmp1 = or <2 x i64> %a, < i64 1095216660735, i64 1095216660735>
 	ret <2 x i64> %tmp1
 }
 
 define <2 x i64> @orr64imm4s_lsl8(<2 x i64> %a) {
-;CHECK:  orr {{v[0-9]+}}.4s, #0xff, lsl #8
+; CHECK-LABEL: orr64imm4s_lsl8:
+; CHECK:  orr {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #8
 	%tmp1 = or <2 x i64> %a, < i64 280375465148160, i64 280375465148160>
 	ret <2 x i64> %tmp1
 }
 
 define <2 x i64> @orr64imm4s_lsl16(<2 x i64> %a) {
-;CHECK:  orr {{v[0-9]+}}.4s, #0xff, lsl #16
+; CHECK-LABEL: orr64imm4s_lsl16:
+; CHECK:  orr {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #16
 	%tmp1 = or <2 x i64> %a, < i64 71776119077928960, i64 71776119077928960>
 	ret <2 x i64> %tmp1
 }
 
 define <2 x i64> @orr64imm4s_lsl24(<2 x i64> %a) {
-;CHECK:  orr {{v[0-9]+}}.4s, #0xff, lsl #24
+; CHECK-LABEL: orr64imm4s_lsl24:
+; CHECK:  orr {{v[0-9]+}}.4s, #{{0xff|255}}, lsl #24
 	%tmp1 = or <2 x i64> %a, < i64 -72057589759737856, i64 -72057589759737856>
 	ret <2 x i64> %tmp1
 }
 
 define <8 x i8> @orr8imm4h_lsl0(<8 x i8> %a) {
-;CHECK:  orr {{v[0-9]+}}.4h, #0xff
+; CHECK-LABEL: orr8imm4h_lsl0:
+; CHECK:  orr {{v[0-9]+}}.4h, #{{0xff|255}}
 	%tmp1 = or <8 x i8> %a, < i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0>
 	ret <8 x i8> %tmp1
 }
 
 define <8 x i8> @orr8imm4h_lsl8(<8 x i8> %a) {
-;CHECK:  orr {{v[0-9]+}}.4h, #0xff, lsl #8
+; CHECK-LABEL: orr8imm4h_lsl8:
+; CHECK:  orr {{v[0-9]+}}.4h, #{{0xff|255}}, lsl #8
 	%tmp1 = or <8 x i8> %a, < i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255>
 	ret <8 x i8> %tmp1
 }
 
 define <2 x i32> @orr16imm4h_lsl0(<2 x i32> %a) {
-;CHECK:  orr {{v[0-9]+}}.4h, #0xff
+; CHECK-LABEL: orr16imm4h_lsl0:
+; CHECK:  orr {{v[0-9]+}}.4h, #{{0xff|255}}
 	%tmp1 = or <2 x i32> %a, < i32 16711935, i32 16711935>
 	ret <2 x i32> %tmp1
 }
 
 define <2 x i32> @orr16imm4h_lsl8(<2 x i32> %a) {
-;CHECK:  orr {{v[0-9]+}}.4h, #0xff, lsl #8
+; CHECK-LABEL: orr16imm4h_lsl8:
+; CHECK:  orr {{v[0-9]+}}.4h, #{{0xff|255}}, lsl #8
 	%tmp1 = or <2 x i32> %a, < i32 4278255360, i32 4278255360>
 	ret <2 x i32> %tmp1
 }
 
 define <1 x i64> @orr64imm4h_lsl0(<1 x i64> %a) {
-;CHECK:  orr {{v[0-9]+}}.4h, #0xff
+; CHECK-LABEL: orr64imm4h_lsl0:
+; CHECK:  orr {{v[0-9]+}}.4h, #{{0xff|255}}
 	%tmp1 = or <1 x i64> %a, < i64 71777214294589695>
 	ret <1 x i64> %tmp1
 }
 
 define <1 x i64> @orr64imm4h_lsl8(<1 x i64> %a) {
-;CHECK:  orr {{v[0-9]+}}.4h, #0xff, lsl #8
+; CHECK-LABEL: orr64imm4h_lsl8:
+; CHECK:  orr {{v[0-9]+}}.4h, #{{0xff|255}}, lsl #8
 	%tmp1 = or <1 x i64> %a, < i64 -71777214294589696>
 	ret <1 x i64> %tmp1
 }
 
 define <16 x i8> @orr8imm8h_lsl0(<16 x i8> %a) {
-;CHECK:  orr {{v[0-9]+}}.8h, #0xff
+; CHECK-LABEL: orr8imm8h_lsl0:
+; CHECK:  orr {{v[0-9]+}}.8h, #{{0xff|255}}
 	%tmp1 = or <16 x i8> %a, < i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0>
 	ret <16 x i8> %tmp1
 }
 
 define <16 x i8> @orr8imm8h_lsl8(<16 x i8> %a) {
-;CHECK:  orr {{v[0-9]+}}.8h, #0xff, lsl #8
+; CHECK-LABEL: orr8imm8h_lsl8:
+; CHECK:  orr {{v[0-9]+}}.8h, #{{0xff|255}}, lsl #8
 	%tmp1 = or <16 x i8> %a, < i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255>
 	ret <16 x i8> %tmp1
 }
 
 define <4 x i32> @orr16imm8h_lsl0(<4 x i32> %a) {
-;CHECK:  orr {{v[0-9]+}}.8h, #0xff
+; CHECK-LABEL: orr16imm8h_lsl0:
+; CHECK:  orr {{v[0-9]+}}.8h, #{{0xff|255}}
 	%tmp1 = or <4 x i32> %a, < i32 16711935, i32 16711935, i32 16711935, i32 16711935>
 	ret <4 x i32> %tmp1
 }
 
 define <4 x i32> @orr16imm8h_lsl8(<4 x i32> %a) {
-;CHECK:  orr {{v[0-9]+}}.8h, #0xff, lsl #8
+; CHECK-LABEL: orr16imm8h_lsl8:
+; CHECK:  orr {{v[0-9]+}}.8h, #{{0xff|255}}, lsl #8
 	%tmp1 = or <4 x i32> %a, < i32 4278255360, i32 4278255360, i32 4278255360, i32 4278255360>
 	ret <4 x i32> %tmp1
 }
 
 define <2 x i64> @orr64imm8h_lsl0(<2 x i64> %a) {
-;CHECK:  orr {{v[0-9]+}}.8h, #0xff
+; CHECK-LABEL: orr64imm8h_lsl0:
+; CHECK:  orr {{v[0-9]+}}.8h, #{{0xff|255}}
 	%tmp1 = or <2 x i64> %a, < i64 71777214294589695, i64 71777214294589695>
 	ret <2 x i64> %tmp1
 }
 
 define <2 x i64> @orr64imm8h_lsl8(<2 x i64> %a) {
-;CHECK:  orr {{v[0-9]+}}.8h, #0xff, lsl #8
+; CHECK-LABEL: orr64imm8h_lsl8:
+; CHECK:  orr {{v[0-9]+}}.8h, #{{0xff|255}}, lsl #8
 	%tmp1 = or <2 x i64> %a, < i64 -71777214294589696, i64 -71777214294589696>
 	ret <2 x i64> %tmp1
 }
diff --git a/test/CodeGen/AArch64/neon-bsl.ll b/test/CodeGen/AArch64/neon-bsl.ll
deleted file mode 100644
index c55fd01..0000000
--- a/test/CodeGen/AArch64/neon-bsl.ll
+++ /dev/null
@@ -1,235 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
-
-declare <2 x double> @llvm.arm.neon.vbsl.v2f64(<2 x double>, <2 x double>, <2 x double>)
-
-declare <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
-
-declare <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
-
-declare <4 x float> @llvm.arm.neon.vbsl.v4f32(<4 x float>, <4 x float>, <4 x float>)
-
-declare <2 x i64> @llvm.arm.neon.vbsl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
-
-declare <4 x i32> @llvm.arm.neon.vbsl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
-
-declare <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16>, <4 x i16>, <4 x i16>)
-
-declare <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8>, <8 x i8>, <8 x i8>)
-
-declare <1 x double> @llvm.arm.neon.vbsl.v1f64(<1 x double>, <1 x double>, <1 x double>)
-
-declare <2 x float> @llvm.arm.neon.vbsl.v2f32(<2 x float>, <2 x float>, <2 x float>)
-
-declare <1 x i64> @llvm.arm.neon.vbsl.v1i64(<1 x i64>, <1 x i64>, <1 x i64>)
-
-declare <2 x i32> @llvm.arm.neon.vbsl.v2i32(<2 x i32>, <2 x i32>, <2 x i32>)
-
-define <8 x i8> @test_vbsl_s8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) {
-; CHECK-LABEL: test_vbsl_s8:
-; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3)
-  ret <8 x i8> %vbsl.i
-}
-
-define <8 x i8> @test_vbsl_s16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) {
-; CHECK-LABEL: test_vbsl_s16:
-; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vbsl3.i = tail call <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3)
-  %0 = bitcast <4 x i16> %vbsl3.i to <8 x i8>
-  ret <8 x i8> %0
-}
-
-define <2 x i32> @test_vbsl_s32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) {
-; CHECK-LABEL: test_vbsl_s32:
-; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vbsl3.i = tail call <2 x i32> @llvm.arm.neon.vbsl.v2i32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3)
-  ret <2 x i32> %vbsl3.i
-}
-
-define <1 x i64> @test_vbsl_s64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) {
-; CHECK-LABEL: test_vbsl_s64:
-; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vbsl3.i = tail call <1 x i64> @llvm.arm.neon.vbsl.v1i64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3)
-  ret <1 x i64> %vbsl3.i
-}
-
-define <8 x i8> @test_vbsl_u8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) {
-; CHECK-LABEL: test_vbsl_u8:
-; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3)
-  ret <8 x i8> %vbsl.i
-}
-
-define <4 x i16> @test_vbsl_u16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) {
-; CHECK-LABEL: test_vbsl_u16:
-; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vbsl3.i = tail call <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3)
-  ret <4 x i16> %vbsl3.i
-}
-
-define <2 x i32> @test_vbsl_u32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) {
-; CHECK-LABEL: test_vbsl_u32:
-; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vbsl3.i = tail call <2 x i32> @llvm.arm.neon.vbsl.v2i32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3)
-  ret <2 x i32> %vbsl3.i
-}
-
-define <1 x i64> @test_vbsl_u64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) {
-; CHECK-LABEL: test_vbsl_u64:
-; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vbsl3.i = tail call <1 x i64> @llvm.arm.neon.vbsl.v1i64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3)
-  ret <1 x i64> %vbsl3.i
-}
-
-define <2 x float> @test_vbsl_f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3) {
-; CHECK-LABEL: test_vbsl_f32:
-; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vbsl3.i = tail call <2 x float> @llvm.arm.neon.vbsl.v2f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3)
-  ret <2 x float> %vbsl3.i
-}
-
-define <1 x double> @test_vbsl_f64(<1 x i64> %v1, <1 x double> %v2, <1 x double> %v3) {
-; CHECK-LABEL: test_vbsl_f64:
-; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vbsl.i = bitcast <1 x i64> %v1 to <1 x double>
-  %vbsl3.i = tail call <1 x double> @llvm.arm.neon.vbsl.v1f64(<1 x double> %vbsl.i, <1 x double> %v2, <1 x double> %v3)
-  ret <1 x double> %vbsl3.i
-}
-
-define <8 x i8> @test_vbsl_p8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) {
-; CHECK-LABEL: test_vbsl_p8:
-; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3)
-  ret <8 x i8> %vbsl.i
-}
-
-define <4 x i16> @test_vbsl_p16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) {
-; CHECK-LABEL: test_vbsl_p16:
-; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-entry:
-  %vbsl3.i = tail call <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3)
-  ret <4 x i16> %vbsl3.i
-}
-
-define <16 x i8> @test_vbslq_s8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) {
-; CHECK-LABEL: test_vbslq_s8:
-; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %vbsl.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3)
-  ret <16 x i8> %vbsl.i
-}
-
-define <8 x i16> @test_vbslq_s16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) {
-; CHECK-LABEL: test_vbslq_s16:
-; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %vbsl3.i = tail call <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3)
-  ret <8 x i16> %vbsl3.i
-}
-
-define <4 x i32> @test_vbslq_s32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
-; CHECK-LABEL: test_vbslq_s32:
-; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %vbsl3.i = tail call <4 x i32> @llvm.arm.neon.vbsl.v4i32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3)
-  ret <4 x i32> %vbsl3.i
-}
-
-define <2 x i64> @test_vbslq_s64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3) {
-; CHECK-LABEL: test_vbslq_s64:
-; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %vbsl3.i = tail call <2 x i64> @llvm.arm.neon.vbsl.v2i64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3)
-  ret <2 x i64> %vbsl3.i
-}
-
-define <16 x i8> @test_vbslq_u8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) {
-; CHECK-LABEL: test_vbslq_u8:
-; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %vbsl.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3)
-  ret <16 x i8> %vbsl.i
-}
-
-define <8 x i16> @test_vbslq_u16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) {
-; CHECK-LABEL: test_vbslq_u16:
-; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %vbsl3.i = tail call <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3)
-  ret <8 x i16> %vbsl3.i
-}
-
-define <4 x i32> @test_vbslq_u32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
-; CHECK-LABEL: test_vbslq_u32:
-; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %vbsl3.i = tail call <4 x i32> @llvm.arm.neon.vbsl.v4i32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3)
-  ret <4 x i32> %vbsl3.i
-}
-
-define <2 x i64> @test_vbslq_u64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3) {
-; CHECK-LABEL: test_vbslq_u64:
-; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %vbsl3.i = tail call <2 x i64> @llvm.arm.neon.vbsl.v2i64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3)
-  ret <2 x i64> %vbsl3.i
-}
-
-define <4 x float> @test_vbslq_f32(<4 x i32> %v1, <4 x float> %v2, <4 x float> %v3) {
-; CHECK-LABEL: test_vbslq_f32:
-; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %vbsl.i = bitcast <4 x i32> %v1 to <4 x float>
-  %vbsl3.i = tail call <4 x float> @llvm.arm.neon.vbsl.v4f32(<4 x float> %vbsl.i, <4 x float> %v2, <4 x float> %v3)
-  ret <4 x float> %vbsl3.i
-}
-
-define <16 x i8> @test_vbslq_p8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) {
-; CHECK-LABEL: test_vbslq_p8:
-; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %vbsl.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3)
-  ret <16 x i8> %vbsl.i
-}
-
-define <8 x i16> @test_vbslq_p16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) {
-; CHECK-LABEL: test_vbslq_p16:
-; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %vbsl3.i = tail call <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3)
-  ret <8 x i16> %vbsl3.i
-}
-
-define <2 x double> @test_vbslq_f64(<2 x i64> %v1, <2 x double> %v2, <2 x double> %v3) {
-; CHECK-LABEL: test_vbslq_f64:
-; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %vbsl.i = bitcast <2 x i64> %v1 to <2 x double>
-  %vbsl3.i = tail call <2 x double> @llvm.arm.neon.vbsl.v2f64(<2 x double> %vbsl.i, <2 x double> %v2, <2 x double> %v3)
-  ret <2 x double> %vbsl3.i
-}
-
-define <2 x double> @test_bsl_v2f64(<2 x i1> %v1, <2 x double> %v2, <2 x double> %v3) {
-; CHECK-LABEL: test_bsl_v2f64:
-; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-  %1 = select <2 x i1> %v1, <2 x double> %v2, <2 x double> %v3
-  ret <2 x double> %1
-}
-
-define <4 x float> @test_bsl_v4f32(<4 x i1> %v1, <4 x float> %v2, <4 x float> %v3) {
-; CHECK-LABEL: test_bsl_v4f32:
-; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-  %1 = select <4 x i1> %v1, <4 x float> %v2, <4 x float> %v3
-  ret <4 x float> %1
-}
diff --git a/test/CodeGen/AArch64/neon-compare-instructions.ll b/test/CodeGen/AArch64/neon-compare-instructions.ll
index 68f0342..6d89dfb 100644
--- a/test/CodeGen/AArch64/neon-compare-instructions.ll
+++ b/test/CodeGen/AArch64/neon-compare-instructions.ll
@@ -1,560 +1,631 @@
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
 
 define <8 x i8> @cmeq8xi8(<8 x i8> %A, <8 x i8> %B) {
-;CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: cmeq8xi8:
+; CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 	%tmp3 = icmp eq <8 x i8> %A, %B;
    %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
 	ret <8 x i8> %tmp4
 }
 
 define <16 x i8> @cmeq16xi8(<16 x i8> %A, <16 x i8> %B) {
-;CHECK: cmeq {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: cmeq16xi8:
+; CHECK: cmeq {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 	%tmp3 = icmp eq <16 x i8> %A, %B;
    %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
 	ret <16 x i8> %tmp4
 }
 
 define <4 x i16> @cmeq4xi16(<4 x i16> %A, <4 x i16> %B) {
-;CHECK: cmeq {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK-LABEL: cmeq4xi16:
+; CHECK: cmeq {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 	%tmp3 = icmp eq <4 x i16> %A, %B;
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
 	ret <4 x i16> %tmp4
 }
 
 define <8 x i16> @cmeq8xi16(<8 x i16> %A, <8 x i16> %B) {
-;CHECK: cmeq {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK-LABEL: cmeq8xi16:
+; CHECK: cmeq {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 	%tmp3 = icmp eq <8 x i16> %A, %B;
    %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
 	ret <8 x i16> %tmp4
 }
 
 define <2 x i32> @cmeq2xi32(<2 x i32> %A, <2 x i32> %B) {
-;CHECK: cmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK-LABEL: cmeq2xi32:
+; CHECK: cmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 	%tmp3 = icmp eq <2 x i32> %A, %B;
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @cmeq4xi32(<4 x i32> %A, <4 x i32> %B) {
-;CHECK: cmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK-LABEL: cmeq4xi32:
+; CHECK: cmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 	%tmp3 = icmp eq <4 x i32> %A, %B;
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
 
 define <2 x i64> @cmeq2xi64(<2 x i64> %A, <2 x i64> %B) {
-;CHECK: cmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK-LABEL: cmeq2xi64:
+; CHECK: cmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 	%tmp3 = icmp eq <2 x i64> %A, %B;
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
 }
 
 define <8 x i8> @cmne8xi8(<8 x i8> %A, <8 x i8> %B) {
-;CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: cmne8xi8:
+; CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 	%tmp3 = icmp ne <8 x i8> %A, %B;
    %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
 	ret <8 x i8> %tmp4
 }
 
 define <16 x i8> @cmne16xi8(<16 x i8> %A, <16 x i8> %B) {
-;CHECK: cmeq {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: cmne16xi8:
+; CHECK: cmeq {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 	%tmp3 = icmp ne <16 x i8> %A, %B;
    %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
 	ret <16 x i8> %tmp4
 }
 
 define <4 x i16> @cmne4xi16(<4 x i16> %A, <4 x i16> %B) {
-;CHECK: cmeq {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: cmne4xi16:
+; CHECK: cmeq {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 	%tmp3 = icmp ne <4 x i16> %A, %B;
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
 	ret <4 x i16> %tmp4
 }
 
 define <8 x i16> @cmne8xi16(<8 x i16> %A, <8 x i16> %B) {
-;CHECK: cmeq {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: cmne8xi16:
+; CHECK: cmeq {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 	%tmp3 = icmp ne <8 x i16> %A, %B;
    %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
 	ret <8 x i16> %tmp4
 }
 
 define <2 x i32> @cmne2xi32(<2 x i32> %A, <2 x i32> %B) {
-;CHECK: cmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: cmne2xi32:
+; CHECK: cmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 	%tmp3 = icmp ne <2 x i32> %A, %B;
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @cmne4xi32(<4 x i32> %A, <4 x i32> %B) {
-;CHECK: cmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: cmne4xi32:
+; CHECK: cmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 	%tmp3 = icmp ne <4 x i32> %A, %B;
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
 
 define <2 x i64> @cmne2xi64(<2 x i64> %A, <2 x i64> %B) {
-;CHECK: cmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: cmne2xi64:
+; CHECK: cmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 	%tmp3 = icmp ne <2 x i64> %A, %B;
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
 }
 
 define <8 x i8> @cmgt8xi8(<8 x i8> %A, <8 x i8> %B) {
-;CHECK: cmgt {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: cmgt8xi8:
+; CHECK: cmgt {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 	%tmp3 = icmp sgt <8 x i8> %A, %B;
    %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
 	ret <8 x i8> %tmp4
 }
 
 define <16 x i8> @cmgt16xi8(<16 x i8> %A, <16 x i8> %B) {
-;CHECK: cmgt {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: cmgt16xi8:
+; CHECK: cmgt {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 	%tmp3 = icmp sgt <16 x i8> %A, %B;
    %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
 	ret <16 x i8> %tmp4
 }
 
 define <4 x i16> @cmgt4xi16(<4 x i16> %A, <4 x i16> %B) {
-;CHECK: cmgt {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK-LABEL: cmgt4xi16:
+; CHECK: cmgt {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 	%tmp3 = icmp sgt <4 x i16> %A, %B;
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
 	ret <4 x i16> %tmp4
 }
 
 define <8 x i16> @cmgt8xi16(<8 x i16> %A, <8 x i16> %B) {
-;CHECK: cmgt {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK-LABEL: cmgt8xi16:
+; CHECK: cmgt {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 	%tmp3 = icmp sgt <8 x i16> %A, %B;
    %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
 	ret <8 x i16> %tmp4
 }
 
 define <2 x i32> @cmgt2xi32(<2 x i32> %A, <2 x i32> %B) {
-;CHECK: cmgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK-LABEL: cmgt2xi32:
+; CHECK: cmgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 	%tmp3 = icmp sgt <2 x i32> %A, %B;
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @cmgt4xi32(<4 x i32> %A, <4 x i32> %B) {
-;CHECK: cmgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK-LABEL: cmgt4xi32:
+; CHECK: cmgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 	%tmp3 = icmp sgt <4 x i32> %A, %B;
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
 
 define <2 x i64> @cmgt2xi64(<2 x i64> %A, <2 x i64> %B) {
-;CHECK: cmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK-LABEL: cmgt2xi64:
+; CHECK: cmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 	%tmp3 = icmp sgt <2 x i64> %A, %B;
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
 }
 
 define <8 x i8> @cmlt8xi8(<8 x i8> %A, <8 x i8> %B) {
+; CHECK-LABEL: cmlt8xi8:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; LT implemented as GT, so check reversed operands.
-;CHECK: cmgt {{v[0-9]+}}.8b, v1.8b, v0.8b
+; CHECK: cmgt {{v[0-9]+}}.8b, v1.8b, v0.8b
 	%tmp3 = icmp slt <8 x i8> %A, %B;
    %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
 	ret <8 x i8> %tmp4
 }
 
 define <16 x i8> @cmlt16xi8(<16 x i8> %A, <16 x i8> %B) {
+; CHECK-LABEL: cmlt16xi8:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; LT implemented as GT, so check reversed operands.
-;CHECK: cmgt {{v[0-9]+}}.16b, v1.16b, v0.16b
+; CHECK: cmgt {{v[0-9]+}}.16b, v1.16b, v0.16b
 	%tmp3 = icmp slt <16 x i8> %A, %B;
    %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
 	ret <16 x i8> %tmp4
 }
 
 define <4 x i16> @cmlt4xi16(<4 x i16> %A, <4 x i16> %B) {
+; CHECK-LABEL: cmlt4xi16:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; LT implemented as GT, so check reversed operands.
-;CHECK: cmgt {{v[0-9]+}}.4h, v1.4h, v0.4h
+; CHECK: cmgt {{v[0-9]+}}.4h, v1.4h, v0.4h
 	%tmp3 = icmp slt <4 x i16> %A, %B;
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
 	ret <4 x i16> %tmp4
 }
 
 define <8 x i16> @cmlt8xi16(<8 x i16> %A, <8 x i16> %B) {
+; CHECK-LABEL: cmlt8xi16:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; LT implemented as GT, so check reversed operands.
-;CHECK: cmgt {{v[0-9]+}}.8h, v1.8h, v0.8h
+; CHECK: cmgt {{v[0-9]+}}.8h, v1.8h, v0.8h
 	%tmp3 = icmp slt <8 x i16> %A, %B;
    %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
 	ret <8 x i16> %tmp4
 }
 
 define <2 x i32> @cmlt2xi32(<2 x i32> %A, <2 x i32> %B) {
+; CHECK-LABEL: cmlt2xi32:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; LT implemented as GT, so check reversed operands.
-;CHECK: cmgt {{v[0-9]+}}.2s, v1.2s, v0.2s
+; CHECK: cmgt {{v[0-9]+}}.2s, v1.2s, v0.2s
 	%tmp3 = icmp slt <2 x i32> %A, %B;
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @cmlt4xi32(<4 x i32> %A, <4 x i32> %B) {
+; CHECK-LABEL: cmlt4xi32:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; LT implemented as GT, so check reversed operands.
-;CHECK: cmgt {{v[0-9]+}}.4s, v1.4s, v0.4s
+; CHECK: cmgt {{v[0-9]+}}.4s, v1.4s, v0.4s
 	%tmp3 = icmp slt <4 x i32> %A, %B;
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
 
 define <2 x i64> @cmlt2xi64(<2 x i64> %A, <2 x i64> %B) {
+; CHECK-LABEL: cmlt2xi64:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; LT implemented as GT, so check reversed operands.
-;CHECK: cmgt {{v[0-9]+}}.2d, v1.2d, v0.2d
+; CHECK: cmgt {{v[0-9]+}}.2d, v1.2d, v0.2d
 	%tmp3 = icmp slt <2 x i64> %A, %B;
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
 }
 
 define <8 x i8> @cmge8xi8(<8 x i8> %A, <8 x i8> %B) {
-;CHECK: cmge {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: cmge8xi8:
+; CHECK: cmge {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 	%tmp3 = icmp sge <8 x i8> %A, %B;
    %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
 	ret <8 x i8> %tmp4
 }
 
 define <16 x i8> @cmge16xi8(<16 x i8> %A, <16 x i8> %B) {
-;CHECK: cmge {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: cmge16xi8:
+; CHECK: cmge {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 	%tmp3 = icmp sge <16 x i8> %A, %B;
    %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
 	ret <16 x i8> %tmp4
 }
 
 define <4 x i16> @cmge4xi16(<4 x i16> %A, <4 x i16> %B) {
-;CHECK: cmge {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK-LABEL: cmge4xi16:
+; CHECK: cmge {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 	%tmp3 = icmp sge <4 x i16> %A, %B;
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
 	ret <4 x i16> %tmp4
 }
 
 define <8 x i16> @cmge8xi16(<8 x i16> %A, <8 x i16> %B) {
-;CHECK: cmge {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK-LABEL: cmge8xi16:
+; CHECK: cmge {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 	%tmp3 = icmp sge <8 x i16> %A, %B;
    %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
 	ret <8 x i16> %tmp4
 }
 
 define <2 x i32> @cmge2xi32(<2 x i32> %A, <2 x i32> %B) {
-;CHECK: cmge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK-LABEL: cmge2xi32:
+; CHECK: cmge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 	%tmp3 = icmp sge <2 x i32> %A, %B;
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @cmge4xi32(<4 x i32> %A, <4 x i32> %B) {
-;CHECK: cmge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK-LABEL: cmge4xi32:
+; CHECK: cmge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 	%tmp3 = icmp sge <4 x i32> %A, %B;
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
 
 define <2 x i64> @cmge2xi64(<2 x i64> %A, <2 x i64> %B) {
-;CHECK: cmge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK-LABEL: cmge2xi64:
+; CHECK: cmge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 	%tmp3 = icmp sge <2 x i64> %A, %B;
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
 }
 
 define <8 x i8> @cmle8xi8(<8 x i8> %A, <8 x i8> %B) {
+; CHECK-LABEL: cmle8xi8:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; LE implemented as GE, so check reversed operands.
-;CHECK: cmge {{v[0-9]+}}.8b, v1.8b, v0.8b
+; CHECK: cmge {{v[0-9]+}}.8b, v1.8b, v0.8b
 	%tmp3 = icmp sle <8 x i8> %A, %B;
    %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
 	ret <8 x i8> %tmp4
 }
 
 define <16 x i8> @cmle16xi8(<16 x i8> %A, <16 x i8> %B) {
+; CHECK-LABEL: cmle16xi8:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; LE implemented as GE, so check reversed operands.
-;CHECK: cmge {{v[0-9]+}}.16b, v1.16b, v0.16b
+; CHECK: cmge {{v[0-9]+}}.16b, v1.16b, v0.16b
 	%tmp3 = icmp sle <16 x i8> %A, %B;
    %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
 	ret <16 x i8> %tmp4
 }
 
 define <4 x i16> @cmle4xi16(<4 x i16> %A, <4 x i16> %B) {
+; CHECK-LABEL: cmle4xi16:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; LE implemented as GE, so check reversed operands.
-;CHECK: cmge {{v[0-9]+}}.4h, v1.4h, v0.4h
+; CHECK: cmge {{v[0-9]+}}.4h, v1.4h, v0.4h
 	%tmp3 = icmp sle <4 x i16> %A, %B;
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
 	ret <4 x i16> %tmp4
 }
 
 define <8 x i16> @cmle8xi16(<8 x i16> %A, <8 x i16> %B) {
+; CHECK-LABEL: cmle8xi16:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; LE implemented as GE, so check reversed operands.
-;CHECK: cmge {{v[0-9]+}}.8h, v1.8h, v0.8h
+; CHECK: cmge {{v[0-9]+}}.8h, v1.8h, v0.8h
 	%tmp3 = icmp sle <8 x i16> %A, %B;
    %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
 	ret <8 x i16> %tmp4
 }
 
 define <2 x i32> @cmle2xi32(<2 x i32> %A, <2 x i32> %B) {
+; CHECK-LABEL: cmle2xi32:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; LE implemented as GE, so check reversed operands.
-;CHECK: cmge {{v[0-9]+}}.2s, v1.2s, v0.2s
+; CHECK: cmge {{v[0-9]+}}.2s, v1.2s, v0.2s
 	%tmp3 = icmp sle <2 x i32> %A, %B;
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @cmle4xi32(<4 x i32> %A, <4 x i32> %B) {
+; CHECK-LABEL: cmle4xi32:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; LE implemented as GE, so check reversed operands.
-;CHECK: cmge {{v[0-9]+}}.4s, v1.4s, v0.4s
+; CHECK: cmge {{v[0-9]+}}.4s, v1.4s, v0.4s
 	%tmp3 = icmp sle <4 x i32> %A, %B;
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
 
 define <2 x i64> @cmle2xi64(<2 x i64> %A, <2 x i64> %B) {
+; CHECK-LABEL: cmle2xi64:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; LE implemented as GE, so check reversed operands.
-;CHECK: cmge {{v[0-9]+}}.2d, v1.2d, v0.2d
+; CHECK: cmge {{v[0-9]+}}.2d, v1.2d, v0.2d
 	%tmp3 = icmp sle <2 x i64> %A, %B;
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
 }
 
 define <8 x i8> @cmhi8xi8(<8 x i8> %A, <8 x i8> %B) {
-;CHECK: cmhi {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: cmhi8xi8:
+; CHECK: cmhi {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 	%tmp3 = icmp ugt <8 x i8> %A, %B;
    %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
 	ret <8 x i8> %tmp4
 }
 
 define <16 x i8> @cmhi16xi8(<16 x i8> %A, <16 x i8> %B) {
-;CHECK: cmhi {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: cmhi16xi8:
+; CHECK: cmhi {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 	%tmp3 = icmp ugt <16 x i8> %A, %B;
    %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
 	ret <16 x i8> %tmp4
 }
 
 define <4 x i16> @cmhi4xi16(<4 x i16> %A, <4 x i16> %B) {
-;CHECK: cmhi {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK-LABEL: cmhi4xi16:
+; CHECK: cmhi {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 	%tmp3 = icmp ugt <4 x i16> %A, %B;
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
 	ret <4 x i16> %tmp4
 }
 
 define <8 x i16> @cmhi8xi16(<8 x i16> %A, <8 x i16> %B) {
-;CHECK: cmhi {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK-LABEL: cmhi8xi16:
+; CHECK: cmhi {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 	%tmp3 = icmp ugt <8 x i16> %A, %B;
    %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
 	ret <8 x i16> %tmp4
 }
 
 define <2 x i32> @cmhi2xi32(<2 x i32> %A, <2 x i32> %B) {
-;CHECK: cmhi {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK-LABEL: cmhi2xi32:
+; CHECK: cmhi {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 	%tmp3 = icmp ugt <2 x i32> %A, %B;
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @cmhi4xi32(<4 x i32> %A, <4 x i32> %B) {
-;CHECK: cmhi {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK-LABEL: cmhi4xi32:
+; CHECK: cmhi {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 	%tmp3 = icmp ugt <4 x i32> %A, %B;
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
 
 define <2 x i64> @cmhi2xi64(<2 x i64> %A, <2 x i64> %B) {
-;CHECK: cmhi {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK-LABEL: cmhi2xi64:
+; CHECK: cmhi {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 	%tmp3 = icmp ugt <2 x i64> %A, %B;
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
 }
 
 define <8 x i8> @cmlo8xi8(<8 x i8> %A, <8 x i8> %B) {
+; CHECK-LABEL: cmlo8xi8:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; LO implemented as HI, so check reversed operands.
-;CHECK: cmhi {{v[0-9]+}}.8b, v1.8b, v0.8b
+; CHECK: cmhi {{v[0-9]+}}.8b, v1.8b, v0.8b
 	%tmp3 = icmp ult <8 x i8> %A, %B;
    %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
 	ret <8 x i8> %tmp4
 }
 
 define <16 x i8> @cmlo16xi8(<16 x i8> %A, <16 x i8> %B) {
+; CHECK-LABEL: cmlo16xi8:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; LO implemented as HI, so check reversed operands.
-;CHECK: cmhi {{v[0-9]+}}.16b, v1.16b, v0.16b
+; CHECK: cmhi {{v[0-9]+}}.16b, v1.16b, v0.16b
 	%tmp3 = icmp ult <16 x i8> %A, %B;
    %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
 	ret <16 x i8> %tmp4
 }
 
 define <4 x i16> @cmlo4xi16(<4 x i16> %A, <4 x i16> %B) {
+; CHECK-LABEL: cmlo4xi16:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; LO implemented as HI, so check reversed operands.
-;CHECK: cmhi {{v[0-9]+}}.4h, v1.4h, v0.4h
+; CHECK: cmhi {{v[0-9]+}}.4h, v1.4h, v0.4h
 	%tmp3 = icmp ult <4 x i16> %A, %B;
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
 	ret <4 x i16> %tmp4
 }
 
 define <8 x i16> @cmlo8xi16(<8 x i16> %A, <8 x i16> %B) {
+; CHECK-LABEL: cmlo8xi16:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; LO implemented as HI, so check reversed operands.
-;CHECK: cmhi {{v[0-9]+}}.8h, v1.8h, v0.8h
+; CHECK: cmhi {{v[0-9]+}}.8h, v1.8h, v0.8h
 	%tmp3 = icmp ult <8 x i16> %A, %B;
    %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
 	ret <8 x i16> %tmp4
 }
 
 define <2 x i32> @cmlo2xi32(<2 x i32> %A, <2 x i32> %B) {
+; CHECK-LABEL: cmlo2xi32:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; LO implemented as HI, so check reversed operands.
-;CHECK: cmhi {{v[0-9]+}}.2s, v1.2s, v0.2s
+; CHECK: cmhi {{v[0-9]+}}.2s, v1.2s, v0.2s
 	%tmp3 = icmp ult <2 x i32> %A, %B;
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @cmlo4xi32(<4 x i32> %A, <4 x i32> %B) {
+; CHECK-LABEL: cmlo4xi32:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; LO implemented as HI, so check reversed operands.
-;CHECK: cmhi {{v[0-9]+}}.4s, v1.4s, v0.4s
+; CHECK: cmhi {{v[0-9]+}}.4s, v1.4s, v0.4s
 	%tmp3 = icmp ult <4 x i32> %A, %B;
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
 
 define <2 x i64> @cmlo2xi64(<2 x i64> %A, <2 x i64> %B) {
+; CHECK-LABEL: cmlo2xi64:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; LO implemented as HI, so check reversed operands.
-;CHECK: cmhi {{v[0-9]+}}.2d, v1.2d, v0.2d
+; CHECK: cmhi {{v[0-9]+}}.2d, v1.2d, v0.2d
 	%tmp3 = icmp ult <2 x i64> %A, %B;
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
 }
 
 define <8 x i8> @cmhs8xi8(<8 x i8> %A, <8 x i8> %B) {
-;CHECK: cmhs {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: cmhs8xi8:
+; CHECK: cmhs {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 	%tmp3 = icmp uge <8 x i8> %A, %B;
    %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
 	ret <8 x i8> %tmp4
 }
 
 define <16 x i8> @cmhs16xi8(<16 x i8> %A, <16 x i8> %B) {
-;CHECK: cmhs {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: cmhs16xi8:
+; CHECK: cmhs {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 	%tmp3 = icmp uge <16 x i8> %A, %B;
    %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
 	ret <16 x i8> %tmp4
 }
 
 define <4 x i16> @cmhs4xi16(<4 x i16> %A, <4 x i16> %B) {
-;CHECK: cmhs {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK-LABEL: cmhs4xi16:
+; CHECK: cmhs {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 	%tmp3 = icmp uge <4 x i16> %A, %B;
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
 	ret <4 x i16> %tmp4
 }
 
 define <8 x i16> @cmhs8xi16(<8 x i16> %A, <8 x i16> %B) {
-;CHECK: cmhs {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK-LABEL: cmhs8xi16:
+; CHECK: cmhs {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 	%tmp3 = icmp uge <8 x i16> %A, %B;
    %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
 	ret <8 x i16> %tmp4
 }
 
 define <2 x i32> @cmhs2xi32(<2 x i32> %A, <2 x i32> %B) {
-;CHECK: cmhs {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK-LABEL: cmhs2xi32:
+; CHECK: cmhs {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 	%tmp3 = icmp uge <2 x i32> %A, %B;
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @cmhs4xi32(<4 x i32> %A, <4 x i32> %B) {
-;CHECK: cmhs {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK-LABEL: cmhs4xi32:
+; CHECK: cmhs {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 	%tmp3 = icmp uge <4 x i32> %A, %B;
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
 
 define <2 x i64> @cmhs2xi64(<2 x i64> %A, <2 x i64> %B) {
-;CHECK: cmhs {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK-LABEL: cmhs2xi64:
+; CHECK: cmhs {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 	%tmp3 = icmp uge <2 x i64> %A, %B;
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
 }
 
 define <8 x i8> @cmls8xi8(<8 x i8> %A, <8 x i8> %B) {
+; CHECK-LABEL: cmls8xi8:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; LS implemented as HS, so check reversed operands.
-;CHECK: cmhs {{v[0-9]+}}.8b, v1.8b, v0.8b
+; CHECK: cmhs {{v[0-9]+}}.8b, v1.8b, v0.8b
 	%tmp3 = icmp ule <8 x i8> %A, %B;
    %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
 	ret <8 x i8> %tmp4
 }
 
 define <16 x i8> @cmls16xi8(<16 x i8> %A, <16 x i8> %B) {
+; CHECK-LABEL: cmls16xi8:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; LS implemented as HS, so check reversed operands.
-;CHECK: cmhs {{v[0-9]+}}.16b, v1.16b, v0.16b
+; CHECK: cmhs {{v[0-9]+}}.16b, v1.16b, v0.16b
 	%tmp3 = icmp ule <16 x i8> %A, %B;
    %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
 	ret <16 x i8> %tmp4
 }
 
 define <4 x i16> @cmls4xi16(<4 x i16> %A, <4 x i16> %B) {
+; CHECK-LABEL: cmls4xi16:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; LS implemented as HS, so check reversed operands.
-;CHECK: cmhs {{v[0-9]+}}.4h, v1.4h, v0.4h
+; CHECK: cmhs {{v[0-9]+}}.4h, v1.4h, v0.4h
 	%tmp3 = icmp ule <4 x i16> %A, %B;
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
 	ret <4 x i16> %tmp4
 }
 
 define <8 x i16> @cmls8xi16(<8 x i16> %A, <8 x i16> %B) {
+; CHECK-LABEL: cmls8xi16:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; LS implemented as HS, so check reversed operands.
-;CHECK: cmhs {{v[0-9]+}}.8h, v1.8h, v0.8h
+; CHECK: cmhs {{v[0-9]+}}.8h, v1.8h, v0.8h
 	%tmp3 = icmp ule <8 x i16> %A, %B;
    %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
 	ret <8 x i16> %tmp4
 }
 
 define <2 x i32> @cmls2xi32(<2 x i32> %A, <2 x i32> %B) {
+; CHECK-LABEL: cmls2xi32:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; LS implemented as HS, so check reversed operands.
-;CHECK: cmhs {{v[0-9]+}}.2s, v1.2s, v0.2s
+; CHECK: cmhs {{v[0-9]+}}.2s, v1.2s, v0.2s
 	%tmp3 = icmp ule <2 x i32> %A, %B;
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @cmls4xi32(<4 x i32> %A, <4 x i32> %B) {
+; CHECK-LABEL: cmls4xi32:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; LS implemented as HS, so check reversed operands.
-;CHECK: cmhs {{v[0-9]+}}.4s, v1.4s, v0.4s
+; CHECK: cmhs {{v[0-9]+}}.4s, v1.4s, v0.4s
 	%tmp3 = icmp ule <4 x i32> %A, %B;
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
 
 define <2 x i64> @cmls2xi64(<2 x i64> %A, <2 x i64> %B) {
+; CHECK-LABEL: cmls2xi64:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; LS implemented as HS, so check reversed operands.
-;CHECK: cmhs {{v[0-9]+}}.2d, v1.2d, v0.2d
+; CHECK: cmhs {{v[0-9]+}}.2d, v1.2d, v0.2d
 	%tmp3 = icmp ule <2 x i64> %A, %B;
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
 }
 
 define <8 x i8> @cmtst8xi8(<8 x i8> %A, <8 x i8> %B) {
-;CHECK: cmtst {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: cmtst8xi8:
+; CHECK: cmtst {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 	%tmp3 = and <8 x i8> %A, %B
 	%tmp4 = icmp ne <8 x i8> %tmp3, zeroinitializer
    %tmp5 = sext <8 x i1> %tmp4 to <8 x i8>
@@ -562,7 +633,8 @@ define <8 x i8> @cmtst8xi8(<8 x i8> %A, <8 x i8> %B) {
 }
 
 define <16 x i8> @cmtst16xi8(<16 x i8> %A, <16 x i8> %B) {
-;CHECK: cmtst {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: cmtst16xi8:
+; CHECK: cmtst {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 	%tmp3 = and <16 x i8> %A, %B
 	%tmp4 = icmp ne <16 x i8> %tmp3, zeroinitializer
    %tmp5 = sext <16 x i1> %tmp4 to <16 x i8>
@@ -570,7 +642,8 @@ define <16 x i8> @cmtst16xi8(<16 x i8> %A, <16 x i8> %B) {
 }
 
 define <4 x i16> @cmtst4xi16(<4 x i16> %A, <4 x i16> %B) {
-;CHECK: cmtst {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK-LABEL: cmtst4xi16:
+; CHECK: cmtst {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 	%tmp3 = and <4 x i16> %A, %B
 	%tmp4 = icmp ne <4 x i16> %tmp3, zeroinitializer
    %tmp5 = sext <4 x i1> %tmp4 to <4 x i16>
@@ -578,7 +651,8 @@ define <4 x i16> @cmtst4xi16(<4 x i16> %A, <4 x i16> %B) {
 }
 
 define <8 x i16> @cmtst8xi16(<8 x i16> %A, <8 x i16> %B) {
-;CHECK: cmtst {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK-LABEL: cmtst8xi16:
+; CHECK: cmtst {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 	%tmp3 = and <8 x i16> %A, %B
 	%tmp4 = icmp ne <8 x i16> %tmp3, zeroinitializer
    %tmp5 = sext <8 x i1> %tmp4 to <8 x i16>
@@ -586,7 +660,8 @@ define <8 x i16> @cmtst8xi16(<8 x i16> %A, <8 x i16> %B) {
 }
 
 define <2 x i32> @cmtst2xi32(<2 x i32> %A, <2 x i32> %B) {
-;CHECK: cmtst {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK-LABEL: cmtst2xi32:
+; CHECK: cmtst {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 	%tmp3 = and <2 x i32> %A, %B
 	%tmp4 = icmp ne <2 x i32> %tmp3, zeroinitializer
    %tmp5 = sext <2 x i1> %tmp4 to <2 x i32>
@@ -594,7 +669,8 @@ define <2 x i32> @cmtst2xi32(<2 x i32> %A, <2 x i32> %B) {
 }
 
 define <4 x i32> @cmtst4xi32(<4 x i32> %A, <4 x i32> %B) {
-;CHECK: cmtst {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK-LABEL: cmtst4xi32:
+; CHECK: cmtst {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 	%tmp3 = and <4 x i32> %A, %B
 	%tmp4 = icmp ne <4 x i32> %tmp3, zeroinitializer
    %tmp5 = sext <4 x i1> %tmp4 to <4 x i32>
@@ -602,7 +678,8 @@ define <4 x i32> @cmtst4xi32(<4 x i32> %A, <4 x i32> %B) {
 }
 
 define <2 x i64> @cmtst2xi64(<2 x i64> %A, <2 x i64> %B) {
-;CHECK: cmtst {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK-LABEL: cmtst2xi64:
+; CHECK: cmtst {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 	%tmp3 = and <2 x i64> %A, %B
 	%tmp4 = icmp ne <2 x i64> %tmp3, zeroinitializer
    %tmp5 = sext <2 x i1> %tmp4 to <2 x i64>
@@ -612,49 +689,56 @@ define <2 x i64> @cmtst2xi64(<2 x i64> %A, <2 x i64> %B) {
 
 
 define <8 x i8> @cmeqz8xi8(<8 x i8> %A) {
-;CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x0
+; CHECK-LABEL: cmeqz8xi8:
+; CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #{{0x0|0}}
 	%tmp3 = icmp eq <8 x i8> %A, zeroinitializer;
    %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
 	ret <8 x i8> %tmp4
 }
 
 define <16 x i8> @cmeqz16xi8(<16 x i8> %A) {
-;CHECK: cmeq {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x0
+; CHECK-LABEL: cmeqz16xi8:
+; CHECK: cmeq {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x0|0}}
 	%tmp3 = icmp eq <16 x i8> %A, zeroinitializer;
    %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
 	ret <16 x i8> %tmp4
 }
 
 define <4 x i16> @cmeqz4xi16(<4 x i16> %A) {
-;CHECK: cmeq {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0x0
+; CHECK-LABEL: cmeqz4xi16:
+; CHECK: cmeq {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #{{0x0|0}}
 	%tmp3 = icmp eq <4 x i16> %A, zeroinitializer;
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
 	ret <4 x i16> %tmp4
 }
 
 define <8 x i16> @cmeqz8xi16(<8 x i16> %A) {
-;CHECK: cmeq {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0x0
+; CHECK-LABEL: cmeqz8xi16:
+; CHECK: cmeq {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #{{0x0|0}}
 	%tmp3 = icmp eq <8 x i16> %A, zeroinitializer;
    %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
 	ret <8 x i16> %tmp4
 }
 
 define <2 x i32> @cmeqz2xi32(<2 x i32> %A) {
-;CHECK: cmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0x0
+; CHECK-LABEL: cmeqz2xi32:
+; CHECK: cmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #{{0x0|0}}
 	%tmp3 = icmp eq <2 x i32> %A, zeroinitializer;
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @cmeqz4xi32(<4 x i32> %A) {
-;CHECK: cmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0x0
+; CHECK-LABEL: cmeqz4xi32:
+; CHECK: cmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #{{0x0|0}}
 	%tmp3 = icmp eq <4 x i32> %A, zeroinitializer;
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
 
 define <2 x i64> @cmeqz2xi64(<2 x i64> %A) {
-;CHECK: cmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0x0
+; CHECK-LABEL: cmeqz2xi64:
+; CHECK: cmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #{{0x0|0}}
 	%tmp3 = icmp eq <2 x i64> %A, zeroinitializer;
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
@@ -662,49 +746,56 @@ define <2 x i64> @cmeqz2xi64(<2 x i64> %A) {
 
 
 define <8 x i8> @cmgez8xi8(<8 x i8> %A) {
-;CHECK: cmge {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x0
+; CHECK-LABEL: cmgez8xi8:
+; CHECK: cmge {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #{{0x0|0}}
 	%tmp3 = icmp sge <8 x i8> %A, zeroinitializer;
    %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
 	ret <8 x i8> %tmp4
 }
 
 define <16 x i8> @cmgez16xi8(<16 x i8> %A) {
-;CHECK: cmge {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x0
+; CHECK-LABEL: cmgez16xi8:
+; CHECK: cmge {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x0|0}}
 	%tmp3 = icmp sge <16 x i8> %A, zeroinitializer;
    %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
 	ret <16 x i8> %tmp4
 }
 
 define <4 x i16> @cmgez4xi16(<4 x i16> %A) {
-;CHECK: cmge {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0x0
+; CHECK-LABEL: cmgez4xi16:
+; CHECK: cmge {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #{{0x0|0}}
 	%tmp3 = icmp sge <4 x i16> %A, zeroinitializer;
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
 	ret <4 x i16> %tmp4
 }
 
 define <8 x i16> @cmgez8xi16(<8 x i16> %A) {
-;CHECK: cmge {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0x0
+; CHECK-LABEL: cmgez8xi16:
+; CHECK: cmge {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #{{0x0|0}}
 	%tmp3 = icmp sge <8 x i16> %A, zeroinitializer;
    %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
 	ret <8 x i16> %tmp4
 }
 
 define <2 x i32> @cmgez2xi32(<2 x i32> %A) {
-;CHECK: cmge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0x0
+; CHECK-LABEL: cmgez2xi32:
+; CHECK: cmge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #{{0x0|0}}
 	%tmp3 = icmp sge <2 x i32> %A, zeroinitializer;
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @cmgez4xi32(<4 x i32> %A) {
-;CHECK: cmge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0x0
+; CHECK-LABEL: cmgez4xi32:
+; CHECK: cmge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #{{0x0|0}}
 	%tmp3 = icmp sge <4 x i32> %A, zeroinitializer;
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
 
 define <2 x i64> @cmgez2xi64(<2 x i64> %A) {
-;CHECK: cmge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0x0
+; CHECK-LABEL: cmgez2xi64:
+; CHECK: cmge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #{{0x0|0}}
 	%tmp3 = icmp sge <2 x i64> %A, zeroinitializer;
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
@@ -712,259 +803,294 @@ define <2 x i64> @cmgez2xi64(<2 x i64> %A) {
 
 
 define <8 x i8> @cmgtz8xi8(<8 x i8> %A) {
-;CHECK: cmgt {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x0
+; CHECK-LABEL: cmgtz8xi8:
+; CHECK: cmgt {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #{{0x0|0}}
 	%tmp3 = icmp sgt <8 x i8> %A, zeroinitializer;
    %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
 	ret <8 x i8> %tmp4
 }
 
 define <16 x i8> @cmgtz16xi8(<16 x i8> %A) {
-;CHECK: cmgt {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x0
+; CHECK-LABEL: cmgtz16xi8:
+; CHECK: cmgt {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x0|0}}
 	%tmp3 = icmp sgt <16 x i8> %A, zeroinitializer;
    %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
 	ret <16 x i8> %tmp4
 }
 
 define <4 x i16> @cmgtz4xi16(<4 x i16> %A) {
-;CHECK: cmgt {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0x0
+; CHECK-LABEL: cmgtz4xi16:
+; CHECK: cmgt {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #{{0x0|0}}
 	%tmp3 = icmp sgt <4 x i16> %A, zeroinitializer;
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
 	ret <4 x i16> %tmp4
 }
 
 define <8 x i16> @cmgtz8xi16(<8 x i16> %A) {
-;CHECK: cmgt {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0x0
+; CHECK-LABEL: cmgtz8xi16:
+; CHECK: cmgt {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #{{0x0|0}}
 	%tmp3 = icmp sgt <8 x i16> %A, zeroinitializer;
    %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
 	ret <8 x i16> %tmp4
 }
 
 define <2 x i32> @cmgtz2xi32(<2 x i32> %A) {
-;CHECK: cmgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0x0
+; CHECK-LABEL: cmgtz2xi32:
+; CHECK: cmgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #{{0x0|0}}
 	%tmp3 = icmp sgt <2 x i32> %A, zeroinitializer;
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @cmgtz4xi32(<4 x i32> %A) {
-;CHECK: cmgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0x0
+; CHECK-LABEL: cmgtz4xi32:
+; CHECK: cmgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #{{0x0|0}}
 	%tmp3 = icmp sgt <4 x i32> %A, zeroinitializer;
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
 
 define <2 x i64> @cmgtz2xi64(<2 x i64> %A) {
-;CHECK: cmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0x0
+; CHECK-LABEL: cmgtz2xi64:
+; CHECK: cmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #{{0x0|0}}
 	%tmp3 = icmp sgt <2 x i64> %A, zeroinitializer;
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
 }
 
 define <8 x i8> @cmlez8xi8(<8 x i8> %A) {
-;CHECK: cmle {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x0
+; CHECK-LABEL: cmlez8xi8:
+; CHECK: cmle {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #{{0x0|0}}
 	%tmp3 = icmp sle <8 x i8> %A, zeroinitializer;
    %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
 	ret <8 x i8> %tmp4
 }
 
 define <16 x i8> @cmlez16xi8(<16 x i8> %A) {
-;CHECK: cmle {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x0
+; CHECK-LABEL: cmlez16xi8:
+; CHECK: cmle {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x0|0}}
 	%tmp3 = icmp sle <16 x i8> %A, zeroinitializer;
    %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
 	ret <16 x i8> %tmp4
 }
 
 define <4 x i16> @cmlez4xi16(<4 x i16> %A) {
-;CHECK: cmle {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0x0
+; CHECK-LABEL: cmlez4xi16:
+; CHECK: cmle {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #{{0x0|0}}
 	%tmp3 = icmp sle <4 x i16> %A, zeroinitializer;
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
 	ret <4 x i16> %tmp4
 }
 
 define <8 x i16> @cmlez8xi16(<8 x i16> %A) {
-;CHECK: cmle {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0x0
+; CHECK-LABEL: cmlez8xi16:
+; CHECK: cmle {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #{{0x0|0}}
 	%tmp3 = icmp sle <8 x i16> %A, zeroinitializer;
    %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
 	ret <8 x i16> %tmp4
 }
 
 define <2 x i32> @cmlez2xi32(<2 x i32> %A) {
-;CHECK: cmle {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0x0
+; CHECK-LABEL: cmlez2xi32:
+; CHECK: cmle {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #{{0x0|0}}
 	%tmp3 = icmp sle <2 x i32> %A, zeroinitializer;
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @cmlez4xi32(<4 x i32> %A) {
-;CHECK: cmle {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0x0
+; CHECK-LABEL: cmlez4xi32:
+; CHECK: cmle {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #{{0x0|0}}
 	%tmp3 = icmp sle <4 x i32> %A, zeroinitializer;
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
 
 define <2 x i64> @cmlez2xi64(<2 x i64> %A) {
-;CHECK: cmle {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0x0
+; CHECK-LABEL: cmlez2xi64:
+; CHECK: cmle {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #{{0x0|0}}
 	%tmp3 = icmp sle <2 x i64> %A, zeroinitializer;
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
 }
 
 define <8 x i8> @cmltz8xi8(<8 x i8> %A) {
-;CHECK: cmlt {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x0
+; CHECK-LABEL: cmltz8xi8:
+; CHECK: cmlt {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #{{0x0|0}}
 	%tmp3 = icmp slt <8 x i8> %A, zeroinitializer;
    %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
 	ret <8 x i8> %tmp4
 }
 
 define <16 x i8> @cmltz16xi8(<16 x i8> %A) {
-;CHECK: cmlt {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x0
+; CHECK-LABEL: cmltz16xi8:
+; CHECK: cmlt {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x0|0}}
 	%tmp3 = icmp slt <16 x i8> %A, zeroinitializer;
    %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
 	ret <16 x i8> %tmp4
 }
 
 define <4 x i16> @cmltz4xi16(<4 x i16> %A) {
-;CHECK: cmlt {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0x0
+; CHECK-LABEL: cmltz4xi16:
+; CHECK: cmlt {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #{{0x0|0}}
 	%tmp3 = icmp slt <4 x i16> %A, zeroinitializer;
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
 	ret <4 x i16> %tmp4
 }
 
 define <8 x i16> @cmltz8xi16(<8 x i16> %A) {
-;CHECK: cmlt {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0x0
+; CHECK-LABEL: cmltz8xi16:
+; CHECK: cmlt {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #{{0x0|0}}
 	%tmp3 = icmp slt <8 x i16> %A, zeroinitializer;
    %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
 	ret <8 x i16> %tmp4
 }
 
 define <2 x i32> @cmltz2xi32(<2 x i32> %A) {
-;CHECK: cmlt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0x0
+; CHECK-LABEL: cmltz2xi32:
+; CHECK: cmlt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #{{0x0|0}}
 	%tmp3 = icmp slt <2 x i32> %A, zeroinitializer;
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @cmltz4xi32(<4 x i32> %A) {
-;CHECK: cmlt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0x0
+; CHECK-LABEL: cmltz4xi32:
+; CHECK: cmlt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #{{0x0|0}}
 	%tmp3 = icmp slt <4 x i32> %A, zeroinitializer;
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
 
 define <2 x i64> @cmltz2xi64(<2 x i64> %A) {
-;CHECK: cmlt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0x0
+; CHECK-LABEL: cmltz2xi64:
+; CHECK: cmlt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #{{0x0|0}}
 	%tmp3 = icmp slt <2 x i64> %A, zeroinitializer;
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
 }
 
 define <8 x i8> @cmneqz8xi8(<8 x i8> %A) {
-;CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x0
-;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: cmneqz8xi8:
+; CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #{{0x0|0}}
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 	%tmp3 = icmp ne <8 x i8> %A, zeroinitializer;
    %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
 	ret <8 x i8> %tmp4
 }
 
 define <16 x i8> @cmneqz16xi8(<16 x i8> %A) {
-;CHECK: cmeq {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x0
-;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: cmneqz16xi8:
+; CHECK: cmeq {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x0|0}}
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 	%tmp3 = icmp ne <16 x i8> %A, zeroinitializer;
    %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
 	ret <16 x i8> %tmp4
 }
 
 define <4 x i16> @cmneqz4xi16(<4 x i16> %A) {
-;CHECK: cmeq {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0x0
-;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: cmneqz4xi16:
+; CHECK: cmeq {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #{{0x0|0}}
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 	%tmp3 = icmp ne <4 x i16> %A, zeroinitializer;
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
 	ret <4 x i16> %tmp4
 }
 
 define <8 x i16> @cmneqz8xi16(<8 x i16> %A) {
-;CHECK: cmeq {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0x0
-;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: cmneqz8xi16:
+; CHECK: cmeq {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #{{0x0|0}}
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 	%tmp3 = icmp ne <8 x i16> %A, zeroinitializer;
    %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
 	ret <8 x i16> %tmp4
 }
 
 define <2 x i32> @cmneqz2xi32(<2 x i32> %A) {
-;CHECK: cmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0x0
-;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: cmneqz2xi32:
+; CHECK: cmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #{{0x0|0}}
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 	%tmp3 = icmp ne <2 x i32> %A, zeroinitializer;
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @cmneqz4xi32(<4 x i32> %A) {
-;CHECK: cmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0x0
-;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: cmneqz4xi32:
+; CHECK: cmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #{{0x0|0}}
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 	%tmp3 = icmp ne <4 x i32> %A, zeroinitializer;
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
 
 define <2 x i64> @cmneqz2xi64(<2 x i64> %A) {
-;CHECK: cmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0x0
-;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: cmneqz2xi64:
+; CHECK: cmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #{{0x0|0}}
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 	%tmp3 = icmp ne <2 x i64> %A, zeroinitializer;
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
 }
 
 define <8 x i8> @cmhsz8xi8(<8 x i8> %A) {
-;CHECK: movi {{v[0-9]+}}.8b, #0x0
-;CHECK-NEXT: cmhs {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: cmhsz8xi8:
+; CHECK: movi {{v[0-9]+.8b|d[0-9]+}}, #{{0x0|0}}
+; CHECK-NEXT: cmhs {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 	%tmp3 = icmp uge <8 x i8> %A, zeroinitializer;
    %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
 	ret <8 x i8> %tmp4
 }
 
 define <16 x i8> @cmhsz16xi8(<16 x i8> %A) {
-;CHECK: movi {{v[0-9]+}}.16b, #0x0
-;CHECK-NEXT: cmhs {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: cmhsz16xi8:
+; CHECK: movi {{v[0-9]+.(16b|2d)}}, #{{0x0|0}}
+; CHECK-NEXT: cmhs {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 	%tmp3 = icmp uge <16 x i8> %A, zeroinitializer;
    %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
 	ret <16 x i8> %tmp4
 }
 
 define <4 x i16> @cmhsz4xi16(<4 x i16> %A) {
-;CHECK: movi {{v[0-9]+}}.8b, #0x0
-;CHECK-NEXT: cmhs {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK-LABEL: cmhsz4xi16:
+; CHECK: movi {{v[0-9]+.8b|d[0-9]+}}, #{{0x0|0}}
+; CHECK-NEXT: cmhs {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 	%tmp3 = icmp uge <4 x i16> %A, zeroinitializer;
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
 	ret <4 x i16> %tmp4
 }
 
 define <8 x i16> @cmhsz8xi16(<8 x i16> %A) {
-;CHECK: movi {{v[0-9]+}}.16b, #0x0
-;CHECK-NEXT: cmhs {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK-LABEL: cmhsz8xi16:
+; CHECK: movi {{v[0-9]+.(16b|2d)}}, #{{0x0|0}}
+; CHECK-NEXT: cmhs {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 	%tmp3 = icmp uge <8 x i16> %A, zeroinitializer;
    %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
 	ret <8 x i16> %tmp4
 }
 
 define <2 x i32> @cmhsz2xi32(<2 x i32> %A) {
-;CHECK: movi {{v[0-9]+}}.8b, #0x0
-;CHECK-NEXT: cmhs {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK-LABEL: cmhsz2xi32:
+; CHECK: movi {{v[0-9]+.8b|d[0-9]+}}, #{{0x0|0}}
+; CHECK-NEXT: cmhs {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 	%tmp3 = icmp uge <2 x i32> %A, zeroinitializer;
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @cmhsz4xi32(<4 x i32> %A) {
-;CHECK: movi {{v[0-9]+}}.16b, #0x0
-;CHECK-NEXT: cmhs {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK-LABEL: cmhsz4xi32:
+; CHECK: movi {{v[0-9]+.(16b|2d)}}, #{{0x0|0}}
+; CHECK-NEXT: cmhs {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 	%tmp3 = icmp uge <4 x i32> %A, zeroinitializer;
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
 
 define <2 x i64> @cmhsz2xi64(<2 x i64> %A) {
-;CHECK: movi {{v[0-9]+}}.16b, #0x0
-;CHECK-NEXT: cmhs {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK-LABEL: cmhsz2xi64:
+; CHECK: movi {{v[0-9]+.(16b|2d)}}, #{{0x0|0}}
+; CHECK-NEXT: cmhs {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 	%tmp3 = icmp uge <2 x i64> %A, zeroinitializer;
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
@@ -972,196 +1098,217 @@ define <2 x i64> @cmhsz2xi64(<2 x i64> %A) {
 
 
 define <8 x i8> @cmhiz8xi8(<8 x i8> %A) {
-;CHECK: movi {{v[0-9]+}}.8b, #0x0
-;CHECK-NEXT: cmhi {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-LABEL: cmhiz8xi8:
+; CHECK: movi {{v[0-9]+.8b|d[0-9]+}}, #{{0x0|0}}
+; CHECK-NEXT: cmhi {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 	%tmp3 = icmp ugt <8 x i8> %A, zeroinitializer;
    %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
 	ret <8 x i8> %tmp4
 }
 
 define <16 x i8> @cmhiz16xi8(<16 x i8> %A) {
-;CHECK: movi {{v[0-9]+}}.16b, #0x0
-;CHECK-NEXT: cmhi {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: cmhiz16xi8:
+; CHECK: movi {{v[0-9]+.(16b|2d)}}, #{{0x0|0}}
+; CHECK-NEXT: cmhi {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 	%tmp3 = icmp ugt <16 x i8> %A, zeroinitializer;
    %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
 	ret <16 x i8> %tmp4
 }
 
 define <4 x i16> @cmhiz4xi16(<4 x i16> %A) {
-;CHECK: movi {{v[0-9]+}}.8b, #0x0
-;CHECK-NEXT: cmhi {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK-LABEL: cmhiz4xi16:
+; CHECK: movi {{v[0-9]+.8b|d[0-9]+}}, #{{0x0|0}}
+; CHECK-NEXT: cmhi {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 	%tmp3 = icmp ugt <4 x i16> %A, zeroinitializer;
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
 	ret <4 x i16> %tmp4
 }
 
 define <8 x i16> @cmhiz8xi16(<8 x i16> %A) {
-;CHECK: movi {{v[0-9]+}}.16b, #0x0
-;CHECK-NEXT: cmhi {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK-LABEL: cmhiz8xi16:
+; CHECK: movi {{v[0-9]+.(16b|2d)}}, #{{0x0|0}}
+; CHECK-NEXT: cmhi {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 	%tmp3 = icmp ugt <8 x i16> %A, zeroinitializer;
    %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
 	ret <8 x i16> %tmp4
 }
 
 define <2 x i32> @cmhiz2xi32(<2 x i32> %A) {
-;CHECK: movi {{v[0-9]+}}.8b, #0x0
-;CHECK-NEXT: cmhi {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK-LABEL: cmhiz2xi32:
+; CHECK: movi {{v[0-9]+.8b|d[0-9]+}}, #{{0x0|0}}
+; CHECK-NEXT: cmhi {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 	%tmp3 = icmp ugt <2 x i32> %A, zeroinitializer;
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @cmhiz4xi32(<4 x i32> %A) {
-;CHECK: movi {{v[0-9]+}}.16b, #0x0
-;CHECK-NEXT: cmhi {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK-LABEL: cmhiz4xi32:
+; CHECK: movi {{v[0-9]+.(16b|2d)}}, #{{0x0|0}}
+; CHECK-NEXT: cmhi {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 	%tmp3 = icmp ugt <4 x i32> %A, zeroinitializer;
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
 
 define <2 x i64> @cmhiz2xi64(<2 x i64> %A) {
-;CHECK: movi {{v[0-9]+}}.16b, #0x0
-;CHECK-NEXT: cmhi {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK-LABEL: cmhiz2xi64:
+; CHECK: movi {{v[0-9]+.(16b|2d)}}, #{{0x0|0}}
+; CHECK-NEXT: cmhi {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 	%tmp3 = icmp ugt <2 x i64> %A, zeroinitializer;
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
 }
 
 define <8 x i8> @cmlsz8xi8(<8 x i8> %A) {
+; CHECK-LABEL: cmlsz8xi8:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; LS implemented as HS, so check reversed operands.
-;CHECK: movi v1.8b, #0x0
-;CHECK-NEXT: cmhs {{v[0-9]+}}.8b, v1.8b, v0.8b
+; CHECK: movi {{v1.8b|d1}}, #{{0x0|0}}
+; CHECK-NEXT: cmhs {{v[0-9]+}}.8b, v1.8b, v0.8b
 	%tmp3 = icmp ule <8 x i8> %A, zeroinitializer;
    %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
 	ret <8 x i8> %tmp4
 }
 
 define <16 x i8> @cmlsz16xi8(<16 x i8> %A) {
+; CHECK-LABEL: cmlsz16xi8:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; LS implemented as HS, so check reversed operands.
-;CHECK: movi v1.16b, #0x0
-;CHECK-NEXT: cmhs {{v[0-9]+}}.16b, v1.16b, v0.16b
+; CHECK: movi {{v1.16b|v1.2d}}, #{{0x0|0}}
+; CHECK-NEXT: cmhs {{v[0-9]+}}.16b, v1.16b, v0.16b
 	%tmp3 = icmp ule <16 x i8> %A, zeroinitializer;
    %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
 	ret <16 x i8> %tmp4
 }
 
 define <4 x i16> @cmlsz4xi16(<4 x i16> %A) {
+; CHECK-LABEL: cmlsz4xi16:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; LS implemented as HS, so check reversed operands.
-;CHECK: movi v1.8b, #0x0
-;CHECK-NEXT: cmhs {{v[0-9]+}}.4h, v1.4h, v0.4h
+; CHECK: movi {{v1.8b|d1}}, #{{0x0|0}}
+; CHECK-NEXT: cmhs {{v[0-9]+}}.4h, v1.4h, v0.4h
 	%tmp3 = icmp ule <4 x i16> %A, zeroinitializer;
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
 	ret <4 x i16> %tmp4
 }
 
 define <8 x i16> @cmlsz8xi16(<8 x i16> %A) {
+; CHECK-LABEL: cmlsz8xi16:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; LS implemented as HS, so check reversed operands.
-;CHECK: movi v1.16b, #0x0
-;CHECK-NEXT: cmhs {{v[0-9]+}}.8h, v1.8h, v0.8h
+; CHECK: movi {{v1.16b|v1.2d}}, #{{0x0|0}}
+; CHECK-NEXT: cmhs {{v[0-9]+}}.8h, v1.8h, v0.8h
 	%tmp3 = icmp ule <8 x i16> %A, zeroinitializer;
    %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
 	ret <8 x i16> %tmp4
 }
 
 define <2 x i32> @cmlsz2xi32(<2 x i32> %A) {
+; CHECK-LABEL: cmlsz2xi32:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; LS implemented as HS, so check reversed operands.
-;CHECK: movi v1.8b, #0x0
-;CHECK-NEXT: cmhs {{v[0-9]+}}.2s, v1.2s, v0.2s
+; CHECK: movi {{v1.8b|d1}}, #{{0x0|0}}
+; CHECK-NEXT: cmhs {{v[0-9]+}}.2s, v1.2s, v0.2s
 	%tmp3 = icmp ule <2 x i32> %A, zeroinitializer;
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @cmlsz4xi32(<4 x i32> %A) {
+; CHECK-LABEL: cmlsz4xi32:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; LS implemented as HS, so check reversed operands.
-;CHECK: movi v1.16b, #0x0
-;CHECK-NEXT: cmhs {{v[0-9]+}}.4s, v1.4s, v0.4s
+; CHECK: movi {{v1.16b|v1.2d}}, #{{0x0|0}}
+; CHECK-NEXT: cmhs {{v[0-9]+}}.4s, v1.4s, v0.4s
 	%tmp3 = icmp ule <4 x i32> %A, zeroinitializer;
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
 
 define <2 x i64> @cmlsz2xi64(<2 x i64> %A) {
+; CHECK-LABEL: cmlsz2xi64:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; LS implemented as HS, so check reversed operands.
-;CHECK: movi v1.16b, #0x0
-;CHECK-NEXT: cmhs {{v[0-9]+}}.2d, v1.2d, v0.2d
+; CHECK: movi {{v1.16b|v1.2d}}, #{{0x0|0}}
+; CHECK-NEXT: cmhs {{v[0-9]+}}.2d, v1.2d, v0.2d
 	%tmp3 = icmp ule <2 x i64> %A, zeroinitializer;
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
 }
 
 define <8 x i8> @cmloz8xi8(<8 x i8> %A) {
+; CHECK-LABEL: cmloz8xi8:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; LO implemented as HI, so check reversed operands.
-;CHECK: movi v1.8b, #0x0
-;CHECK-NEXT: cmhi {{v[0-9]+}}.8b, v1.8b, {{v[0-9]+}}.8b
+; CHECK: movi {{v1.8b|d1}}, #{{0x0|0}}
+; CHECK-NEXT: cmhi {{v[0-9]+}}.8b, v1.8b, {{v[0-9]+}}.8b
 	%tmp3 = icmp ult <8 x i8> %A, zeroinitializer;
    %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
 	ret <8 x i8> %tmp4
 }
 
 define <16 x i8> @cmloz16xi8(<16 x i8> %A) {
+; CHECK-LABEL: cmloz16xi8:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; LO implemented as HI, so check reversed operands.
-;CHECK: movi v1.16b, #0x0
-;CHECK-NEXT: cmhi {{v[0-9]+}}.16b, v1.16b, v0.16b
+; CHECK: movi {{v1.16b|v1.2d}}, #{{0x0|0}}
+; CHECK-NEXT: cmhi {{v[0-9]+}}.16b, v1.16b, v0.16b
 	%tmp3 = icmp ult <16 x i8> %A, zeroinitializer;
    %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
 	ret <16 x i8> %tmp4
 }
 
 define <4 x i16> @cmloz4xi16(<4 x i16> %A) {
+; CHECK-LABEL: cmloz4xi16:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; LO implemented as HI, so check reversed operands.
-;CHECK: movi v1.8b, #0x0
-;CHECK-NEXT: cmhi {{v[0-9]+}}.4h, v1.4h, v0.4h
+; CHECK: movi {{v1.8b|d1}}, #{{0x0|0}}
+; CHECK-NEXT: cmhi {{v[0-9]+}}.4h, v1.4h, v0.4h
 	%tmp3 = icmp ult <4 x i16> %A, zeroinitializer;
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
 	ret <4 x i16> %tmp4
 }
 
 define <8 x i16> @cmloz8xi16(<8 x i16> %A) {
+; CHECK-LABEL: cmloz8xi16:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; LO implemented as HI, so check reversed operands.
-;CHECK: movi v1.16b, #0x0
-;CHECK-NEXT: cmhi {{v[0-9]+}}.8h, v1.8h, v0.8h
+; CHECK: movi {{v1.16b|v1.2d}}, #{{0x0|0}}
+; CHECK-NEXT: cmhi {{v[0-9]+}}.8h, v1.8h, v0.8h
 	%tmp3 = icmp ult <8 x i16> %A, zeroinitializer;
    %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
 	ret <8 x i16> %tmp4
 }
 
 define <2 x i32> @cmloz2xi32(<2 x i32> %A) {
+; CHECK-LABEL: cmloz2xi32:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; LO implemented as HI, so check reversed operands.
-;CHECK: movi v1.8b, #0x0
-;CHECK-NEXT: cmhi {{v[0-9]+}}.2s, v1.2s, v0.2s
+; CHECK: movi {{v1.8b|d1}}, #{{0x0|0}}
+; CHECK-NEXT: cmhi {{v[0-9]+}}.2s, v1.2s, v0.2s
 	%tmp3 = icmp ult <2 x i32> %A, zeroinitializer;
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @cmloz4xi32(<4 x i32> %A) {
+; CHECK-LABEL: cmloz4xi32:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; LO implemented as HI, so check reversed operands.
-;CHECK: movi v1.16b, #0x0
-;CHECK-NEXT: cmhi {{v[0-9]+}}.4s, v1.4s, v0.4s
+; CHECK: movi {{v1.16b|v1.2d}}, #{{0x0|0}}
+; CHECK-NEXT: cmhi {{v[0-9]+}}.4s, v1.4s, v0.4s
 	%tmp3 = icmp ult <4 x i32> %A, zeroinitializer;
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
 
 define <2 x i64> @cmloz2xi64(<2 x i64> %A) {
+; CHECK-LABEL: cmloz2xi64:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; LO implemented as HI, so check reversed operands.
-;CHECK: movi v1.16b, #0x0
-;CHECK-NEXT: cmhi {{v[0-9]+}}.2d, v1.2d, v0.2d
+; CHECK: movi {{v1.16b|v1.2d}}, #{{0x0|0}}
+; CHECK-NEXT: cmhi {{v[0-9]+}}.2d, v1.2d, v0.2d
 	%tmp3 = icmp ult <2 x i64> %A, zeroinitializer;
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
@@ -1169,144 +1316,162 @@ define <2 x i64> @cmloz2xi64(<2 x i64> %A) {
 
 
 define <2 x i32> @fcmoeq2xfloat(<2 x float> %A, <2 x float> %B) {
-;CHECK: fcmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK-LABEL: fcmoeq2xfloat:
+; CHECK: fcmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
    %tmp3 = fcmp oeq <2 x float> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @fcmoeq4xfloat(<4 x float> %A, <4 x float> %B) {
-;CHECK: fcmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK-LABEL: fcmoeq4xfloat:
+; CHECK: fcmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
    %tmp3 = fcmp oeq <4 x float> %A, %B
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
 define <2 x i64> @fcmoeq2xdouble(<2 x double> %A, <2 x double> %B) {
-;CHECK: fcmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK-LABEL: fcmoeq2xdouble:
+; CHECK: fcmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
    %tmp3 = fcmp oeq <2 x double> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
 }
 
 define <2 x i32> @fcmoge2xfloat(<2 x float> %A, <2 x float> %B) {
-;CHECK: fcmge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK-LABEL: fcmoge2xfloat:
+; CHECK: fcmge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
    %tmp3 = fcmp oge <2 x float> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @fcmoge4xfloat(<4 x float> %A, <4 x float> %B) {
-;CHECK: fcmge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK-LABEL: fcmoge4xfloat:
+; CHECK: fcmge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
    %tmp3 = fcmp oge <4 x float> %A, %B
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
 define <2 x i64> @fcmoge2xdouble(<2 x double> %A, <2 x double> %B) {
-;CHECK: fcmge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK-LABEL: fcmoge2xdouble:
+; CHECK: fcmge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
    %tmp3 = fcmp oge <2 x double> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
 }
 
 define <2 x i32> @fcmogt2xfloat(<2 x float> %A, <2 x float> %B) {
-;CHECK: fcmgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK-LABEL: fcmogt2xfloat:
+; CHECK: fcmgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
    %tmp3 = fcmp ogt <2 x float> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @fcmogt4xfloat(<4 x float> %A, <4 x float> %B) {
-;CHECK: fcmgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK-LABEL: fcmogt4xfloat:
+; CHECK: fcmgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
    %tmp3 = fcmp ogt <4 x float> %A, %B
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
 define <2 x i64> @fcmogt2xdouble(<2 x double> %A, <2 x double> %B) {
-;CHECK: fcmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+; CHECK-LABEL: fcmogt2xdouble:
+; CHECK: fcmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
    %tmp3 = fcmp ogt <2 x double> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
 }
 
 define <2 x i32> @fcmole2xfloat(<2 x float> %A, <2 x float> %B) {
+; CHECK-LABEL: fcmole2xfloat:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; OLE implemented as OGE, so check reversed operands.
-;CHECK: fcmge {{v[0-9]+}}.2s, v1.2s, v0.2s
+; CHECK: fcmge {{v[0-9]+}}.2s, v1.2s, v0.2s
    %tmp3 = fcmp ole <2 x float> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @fcmole4xfloat(<4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: fcmole4xfloat:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; OLE implemented as OGE, so check reversed operands.
-;CHECK: fcmge {{v[0-9]+}}.4s, v1.4s, v0.4s
+; CHECK: fcmge {{v[0-9]+}}.4s, v1.4s, v0.4s
    %tmp3 = fcmp ole <4 x float> %A, %B
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
 define <2 x i64> @fcmole2xdouble(<2 x double> %A, <2 x double> %B) {
+; CHECK-LABEL: fcmole2xdouble:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; OLE implemented as OGE, so check reversed operands.
-;CHECK: fcmge {{v[0-9]+}}.2d, v1.2d, v0.2d
+; CHECK: fcmge {{v[0-9]+}}.2d, v1.2d, v0.2d
    %tmp3 = fcmp ole <2 x double> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
 }
 
 define <2 x i32> @fcmolt2xfloat(<2 x float> %A, <2 x float> %B) {
+; CHECK-LABEL: fcmolt2xfloat:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; OLE implemented as OGE, so check reversed operands.
-;CHECK: fcmgt {{v[0-9]+}}.2s, v1.2s, v0.2s
+; CHECK: fcmgt {{v[0-9]+}}.2s, v1.2s, v0.2s
    %tmp3 = fcmp olt <2 x float> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @fcmolt4xfloat(<4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: fcmolt4xfloat:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; OLE implemented as OGE, so check reversed operands.
-;CHECK: fcmgt {{v[0-9]+}}.4s, v1.4s, v0.4s
+; CHECK: fcmgt {{v[0-9]+}}.4s, v1.4s, v0.4s
    %tmp3 = fcmp olt <4 x float> %A, %B
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
 define <2 x i64> @fcmolt2xdouble(<2 x double> %A, <2 x double> %B) {
+; CHECK-LABEL: fcmolt2xdouble:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; OLE implemented as OGE, so check reversed operands.
-;CHECK: fcmgt {{v[0-9]+}}.2d, v1.2d, v0.2d
+; CHECK: fcmgt {{v[0-9]+}}.2d, v1.2d, v0.2d
    %tmp3 = fcmp olt <2 x double> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
 }
 
 define <2 x i32> @fcmone2xfloat(<2 x float> %A, <2 x float> %B) {
+; CHECK-LABEL: fcmone2xfloat:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; ONE = OGT | OLT, OLT implemented as OGT so check reversed operands
-;CHECK: fcmgt {{v[0-9]+}}.2s, v0.2s, v1.2s
-;CHECK-NEXT: fcmgt {{v[0-9]+}}.2s, v1.2s, v0.2s
-;CHECK-NEXT: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: fcmgt {{v[0-9]+}}.2s, v0.2s, v1.2s
+; CHECK-NEXT: fcmgt {{v[0-9]+}}.2s, v1.2s, v0.2s
+; CHECK-NEXT: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    %tmp3 = fcmp one <2 x float> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @fcmone4xfloat(<4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: fcmone4xfloat:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; ONE = OGT | OLT, OLT implemented as OGT so check reversed operands
-;CHECK: fcmgt {{v[0-9]+}}.4s, v0.4s, v1.4s
-;CHECK-NEXT: fcmgt {{v[0-9]+}}.4s, v1.4s, v0.4s
-;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: fcmgt {{v[0-9]+}}.4s, v0.4s, v1.4s
+; CHECK-NEXT: fcmgt {{v[0-9]+}}.4s, v1.4s, v0.4s
+; CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp one <4 x float> %A, %B
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
 define <2 x i64> @fcmone2xdouble(<2 x double> %A, <2 x double> %B) {
+; CHECK-LABEL: fcmone2xdouble:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; ONE = OGT | OLT, OLT implemented as OGT so check reversed operands
-;CHECK: fcmgt {{v[0-9]+}}.2d, v0.2d, v1.2d
-;CHECK-NEXT: fcmgt {{v[0-9]+}}.2d, v1.2d, v0.2d
-;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: fcmgt {{v[0-9]+}}.2d, v0.2d, v1.2d
+; CHECK-NEXT: fcmgt {{v[0-9]+}}.2d, v1.2d, v0.2d
+; CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 ; todo check reversed operands
    %tmp3 = fcmp one <2 x double> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -1315,11 +1480,12 @@ define <2 x i64> @fcmone2xdouble(<2 x double> %A, <2 x double> %B) {
 
 
 define <2 x i32> @fcmord2xfloat(<2 x float> %A, <2 x float> %B) {
+; CHECK-LABEL: fcmord2xfloat:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; ORD = OGE | OLT, OLT implemented as OGT, so check reversed operands.
-;CHECK: fcmge {{v[0-9]+}}.2s, v0.2s, v1.2s
-;CHECK-NEXT: fcmgt {{v[0-9]+}}.2s, v1.2s, v0.2s
-;CHECK-NEXT: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: fcmge {{v[0-9]+}}.2s, v0.2s, v1.2s
+; CHECK-NEXT: fcmgt {{v[0-9]+}}.2s, v1.2s, v0.2s
+; CHECK-NEXT: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    %tmp3 = fcmp ord <2 x float> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -1327,22 +1493,24 @@ define <2 x i32> @fcmord2xfloat(<2 x float> %A, <2 x float> %B) {
 
 
 define <4 x i32> @fcmord4xfloat(<4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: fcmord4xfloat:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; ORD = OGE | OLT, OLT implemented as OGT, so check reversed operands.
-;CHECK: fcmge {{v[0-9]+}}.4s, v0.4s, v1.4s
-;CHECK-NEXT: fcmgt {{v[0-9]+}}.4s, v1.4s, v0.4s
-;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: fcmge {{v[0-9]+}}.4s, v0.4s, v1.4s
+; CHECK-NEXT: fcmgt {{v[0-9]+}}.4s, v1.4s, v0.4s
+; CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ord <4 x float> %A, %B
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
 
 define <2 x i64> @fcmord2xdouble(<2 x double> %A, <2 x double> %B) {
+; CHECK-LABEL: fcmord2xdouble:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; ORD = OGE | OLT, OLT implemented as OGT, so check reversed operands.
-;CHECK: fcmge {{v[0-9]+}}.2d, v0.2d, v1.2d
-;CHECK-NEXT: fcmgt {{v[0-9]+}}.2d, v1.2d, v0.2d
-;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: fcmge {{v[0-9]+}}.2d, v0.2d, v1.2d
+; CHECK-NEXT: fcmgt {{v[0-9]+}}.2d, v1.2d, v0.2d
+; CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ord <2 x double> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
@@ -1350,236 +1518,260 @@ define <2 x i64> @fcmord2xdouble(<2 x double> %A, <2 x double> %B) {
 
 
 define <2 x i32> @fcmuno2xfloat(<2 x float> %A, <2 x float> %B) {
+; CHECK-LABEL: fcmuno2xfloat:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; UNO = !(OGE | OLT), OLT implemented as OGT, so check reversed operands.
-;CHECK: fcmge {{v[0-9]+}}.2s, v0.2s, v1.2s
-;CHECK-NEXT: fcmgt {{v[0-9]+}}.2s, v1.2s, v0.2s
-;CHECK-NEXT: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: fcmge {{v[0-9]+}}.2s, v0.2s, v1.2s
+; CHECK-NEXT: fcmgt {{v[0-9]+}}.2s, v1.2s, v0.2s
+; CHECK-NEXT: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    %tmp3 = fcmp uno <2 x float> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @fcmuno4xfloat(<4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: fcmuno4xfloat:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; UNO = !(OGE | OLT), OLT implemented as OGT, so check reversed operands.
-;CHECK: fcmge {{v[0-9]+}}.4s, v0.4s, v1.4s
-;CHECK-NEXT: fcmgt {{v[0-9]+}}.4s, v1.4s, v0.4s
-;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: fcmge {{v[0-9]+}}.4s, v0.4s, v1.4s
+; CHECK-NEXT: fcmgt {{v[0-9]+}}.4s, v1.4s, v0.4s
+; CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp uno <4 x float> %A, %B
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
 
 define <2 x i64> @fcmuno2xdouble(<2 x double> %A, <2 x double> %B) {
+; CHECK-LABEL: fcmuno2xdouble:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; UNO = !(OGE | OLT), OLT implemented as OGT, so check reversed operands.
-;CHECK: fcmge {{v[0-9]+}}.2d, v0.2d, v1.2d
-;CHECK-NEXT: fcmgt {{v[0-9]+}}.2d, v1.2d, v0.2d
-;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: fcmge {{v[0-9]+}}.2d, v0.2d, v1.2d
+; CHECK-NEXT: fcmgt {{v[0-9]+}}.2d, v1.2d, v0.2d
+; CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp uno <2 x double> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
 }
 
 define <2 x i32> @fcmueq2xfloat(<2 x float> %A, <2 x float> %B) {
+; CHECK-LABEL: fcmueq2xfloat:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; UEQ = !ONE = !(OGT | OLT), OLT implemented as OGT so check reversed operands
-;CHECK: fcmgt {{v[0-9]+}}.2s, v0.2s, v1.2s
-;CHECK-NEXT: fcmgt {{v[0-9]+}}.2s, v1.2s, v0.2s
-;CHECK-NEXT: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: fcmgt {{v[0-9]+}}.2s, v0.2s, v1.2s
+; CHECK-NEXT: fcmgt {{v[0-9]+}}.2s, v1.2s, v0.2s
+; CHECK-NEXT: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    %tmp3 = fcmp ueq <2 x float> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @fcmueq4xfloat(<4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: fcmueq4xfloat:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; UEQ = !ONE = !(OGT | OLT), OLT implemented as OGT so check reversed operands
-;CHECK: fcmgt {{v[0-9]+}}.4s, v0.4s, v1.4s
-;CHECK-NEXT: fcmgt {{v[0-9]+}}.4s, v1.4s, v0.4s
-;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: fcmgt {{v[0-9]+}}.4s, v0.4s, v1.4s
+; CHECK-NEXT: fcmgt {{v[0-9]+}}.4s, v1.4s, v0.4s
+; CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ueq <4 x float> %A, %B
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
 
 define <2 x i64> @fcmueq2xdouble(<2 x double> %A, <2 x double> %B) {
+; CHECK-LABEL: fcmueq2xdouble:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; UEQ = !ONE = !(OGT | OLT), OLT implemented as OGT so check reversed operands
-;CHECK: fcmgt {{v[0-9]+}}.2d, v0.2d, v1.2d
-;CHECK-NEXT: fcmgt {{v[0-9]+}}.2d, v1.2d, v0.2d
-;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: fcmgt {{v[0-9]+}}.2d, v0.2d, v1.2d
+; CHECK-NEXT: fcmgt {{v[0-9]+}}.2d, v1.2d, v0.2d
+; CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ueq <2 x double> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
 }
 
 define <2 x i32> @fcmuge2xfloat(<2 x float> %A, <2 x float> %B) {
+; CHECK-LABEL: fcmuge2xfloat:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; UGE = ULE with swapped operands, ULE implemented as !OGT.
-;CHECK: fcmgt {{v[0-9]+}}.2s, v1.2s, v0.2s
-;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: fcmgt {{v[0-9]+}}.2s, v1.2s, v0.2s
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    %tmp3 = fcmp uge <2 x float> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @fcmuge4xfloat(<4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: fcmuge4xfloat:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; UGE = ULE with swapped operands, ULE implemented as !OGT.
-;CHECK: fcmgt {{v[0-9]+}}.4s, v1.4s, v0.4s
-;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: fcmgt {{v[0-9]+}}.4s, v1.4s, v0.4s
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp uge <4 x float> %A, %B
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
 
 define <2 x i64> @fcmuge2xdouble(<2 x double> %A, <2 x double> %B) {
+; CHECK-LABEL: fcmuge2xdouble:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; UGE = ULE with swapped operands, ULE implemented as !OGT.
-;CHECK: fcmgt {{v[0-9]+}}.2d, v1.2d, v0.2d
-;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: fcmgt {{v[0-9]+}}.2d, v1.2d, v0.2d
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp uge <2 x double> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
 }
 
 define <2 x i32> @fcmugt2xfloat(<2 x float> %A, <2 x float> %B) {
+; CHECK-LABEL: fcmugt2xfloat:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; UGT = ULT with swapped operands, ULT implemented as !OGE.
-;CHECK: fcmge {{v[0-9]+}}.2s, v1.2s, v0.2s
-;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: fcmge {{v[0-9]+}}.2s, v1.2s, v0.2s
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    %tmp3 = fcmp ugt <2 x float> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @fcmugt4xfloat(<4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: fcmugt4xfloat:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; UGT = ULT with swapped operands, ULT implemented as !OGE.
-;CHECK: fcmge {{v[0-9]+}}.4s, v1.4s, v0.4s
-;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: fcmge {{v[0-9]+}}.4s, v1.4s, v0.4s
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ugt <4 x float> %A, %B
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
 define <2 x i64> @fcmugt2xdouble(<2 x double> %A, <2 x double> %B) {
-;CHECK: fcmge {{v[0-9]+}}.2d, v1.2d, v0.2d
-;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: fcmugt2xdouble:
+; CHECK: fcmge {{v[0-9]+}}.2d, v1.2d, v0.2d
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ugt <2 x double> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
 }
 
 define <2 x i32> @fcmule2xfloat(<2 x float> %A, <2 x float> %B) {
+; CHECK-LABEL: fcmule2xfloat:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; ULE implemented as !OGT.
-;CHECK: fcmgt {{v[0-9]+}}.2s, v0.2s, v1.2s
-;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: fcmgt {{v[0-9]+}}.2s, v0.2s, v1.2s
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    %tmp3 = fcmp ule <2 x float> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @fcmule4xfloat(<4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: fcmule4xfloat:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; ULE implemented as !OGT.
-;CHECK: fcmgt {{v[0-9]+}}.4s, v0.4s, v1.4s
-;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: fcmgt {{v[0-9]+}}.4s, v0.4s, v1.4s
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ule <4 x float> %A, %B
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
 define <2 x i64> @fcmule2xdouble(<2 x double> %A, <2 x double> %B) {
+; CHECK-LABEL: fcmule2xdouble:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; ULE implemented as !OGT.
-;CHECK: fcmgt {{v[0-9]+}}.2d, v0.2d, v1.2d
-;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: fcmgt {{v[0-9]+}}.2d, v0.2d, v1.2d
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ule <2 x double> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
 }
 
 define <2 x i32> @fcmult2xfloat(<2 x float> %A, <2 x float> %B) {
+; CHECK-LABEL: fcmult2xfloat:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; ULT implemented as !OGE.
-;CHECK: fcmge {{v[0-9]+}}.2s, v0.2s, v1.2s
-;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: fcmge {{v[0-9]+}}.2s, v0.2s, v1.2s
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    %tmp3 = fcmp ult <2 x float> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @fcmult4xfloat(<4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: fcmult4xfloat:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; ULT implemented as !OGE.
-;CHECK: fcmge {{v[0-9]+}}.4s, v0.4s, v1.4s
-;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: fcmge {{v[0-9]+}}.4s, v0.4s, v1.4s
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ult <4 x float> %A, %B
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
 define <2 x i64> @fcmult2xdouble(<2 x double> %A, <2 x double> %B) {
+; CHECK-LABEL: fcmult2xdouble:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; ULT implemented as !OGE.
-;CHECK: fcmge {{v[0-9]+}}.2d, v0.2d, v1.2d
-;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: fcmge {{v[0-9]+}}.2d, v0.2d, v1.2d
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ult <2 x double> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
 }
 
 define <2 x i32> @fcmune2xfloat(<2 x float> %A, <2 x float> %B) {
+; CHECK-LABEL: fcmune2xfloat:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; UNE = !OEQ.
-;CHECK: fcmeq {{v[0-9]+}}.2s, v0.2s, v1.2s
-;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: fcmeq {{v[0-9]+}}.2s, v0.2s, v1.2s
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    %tmp3 = fcmp une <2 x float> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @fcmune4xfloat(<4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: fcmune4xfloat:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; UNE = !OEQ.
-;CHECK: fcmeq {{v[0-9]+}}.4s, v0.4s, v1.4s
-;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: fcmeq {{v[0-9]+}}.4s, v0.4s, v1.4s
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp une <4 x float> %A, %B
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
 define <2 x i64> @fcmune2xdouble(<2 x double> %A, <2 x double> %B) {
+; CHECK-LABEL: fcmune2xdouble:
 ; Using registers other than v0, v1 are possible, but would be odd.
 ; UNE = !OEQ.
-;CHECK: fcmeq {{v[0-9]+}}.2d, v0.2d, v1.2d
-;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: fcmeq {{v[0-9]+}}.2d, v0.2d, v1.2d
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp une <2 x double> %A, %B
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
 }
 
 define <2 x i32> @fcmoeqz2xfloat(<2 x float> %A) {
-;CHECK: fcmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
+; CHECK-LABEL: fcmoeqz2xfloat:
+; CHECK: fcmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #{{0.0|0}}
    %tmp3 = fcmp oeq <2 x float> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @fcmoeqz4xfloat(<4 x float> %A) {
-;CHECK: fcmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
+; CHECK-LABEL: fcmoeqz4xfloat:
+; CHECK: fcmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #{{0.0|0}}
    %tmp3 = fcmp oeq <4 x float> %A, zeroinitializer
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
 define <2 x i64> @fcmoeqz2xdouble(<2 x double> %A) {
-;CHECK: fcmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
+; CHECK-LABEL: fcmoeqz2xdouble:
+; CHECK: fcmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #{{0.0|0}}
    %tmp3 = fcmp oeq <2 x double> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
@@ -1587,250 +1779,280 @@ define <2 x i64> @fcmoeqz2xdouble(<2 x double> %A) {
 
 
 define <2 x i32> @fcmogez2xfloat(<2 x float> %A) {
-;CHECK: fcmge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
+; CHECK-LABEL: fcmogez2xfloat:
+; CHECK: fcmge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #{{0.0|0}}
    %tmp3 = fcmp oge <2 x float> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @fcmogez4xfloat(<4 x float> %A) {
-;CHECK: fcmge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
+; CHECK-LABEL: fcmogez4xfloat:
+; CHECK: fcmge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #{{0.0|0}}
    %tmp3 = fcmp oge <4 x float> %A, zeroinitializer
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
 define <2 x i64> @fcmogez2xdouble(<2 x double> %A) {
-;CHECK: fcmge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
+; CHECK-LABEL: fcmogez2xdouble:
+; CHECK: fcmge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #{{0.0|0}}
    %tmp3 = fcmp oge <2 x double> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
 }
 
 define <2 x i32> @fcmogtz2xfloat(<2 x float> %A) {
-;CHECK: fcmgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
+; CHECK-LABEL: fcmogtz2xfloat:
+; CHECK: fcmgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #{{0.0|0}}
    %tmp3 = fcmp ogt <2 x float> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @fcmogtz4xfloat(<4 x float> %A) {
-;CHECK: fcmgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
+; CHECK-LABEL: fcmogtz4xfloat:
+; CHECK: fcmgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #{{0.0|0}}
    %tmp3 = fcmp ogt <4 x float> %A, zeroinitializer
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
 define <2 x i64> @fcmogtz2xdouble(<2 x double> %A) {
-;CHECK: fcmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
+; CHECK-LABEL: fcmogtz2xdouble:
+; CHECK: fcmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #{{0.0|0}}
    %tmp3 = fcmp ogt <2 x double> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
 }
 
 define <2 x i32> @fcmoltz2xfloat(<2 x float> %A) {
-;CHECK: fcmlt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
+; CHECK-LABEL: fcmoltz2xfloat:
+; CHECK: fcmlt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #{{0.0|0}}
    %tmp3 = fcmp olt <2 x float> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @fcmoltz4xfloat(<4 x float> %A) {
-;CHECK: fcmlt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
+; CHECK-LABEL: fcmoltz4xfloat:
+; CHECK: fcmlt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #{{0.0|0}}
    %tmp3 = fcmp olt <4 x float> %A, zeroinitializer
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
 
 define <2 x i64> @fcmoltz2xdouble(<2 x double> %A) {
-;CHECK: fcmlt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
+; CHECK-LABEL: fcmoltz2xdouble:
+; CHECK: fcmlt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #{{0.0|0}}
    %tmp3 = fcmp olt <2 x double> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
 }
 
 define <2 x i32> @fcmolez2xfloat(<2 x float> %A) {
-;CHECK: fcmle {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
+; CHECK-LABEL: fcmolez2xfloat:
+; CHECK: fcmle {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #{{0.0|0}}
    %tmp3 = fcmp ole <2 x float> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @fcmolez4xfloat(<4 x float> %A) {
-;CHECK: fcmle {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
+; CHECK-LABEL: fcmolez4xfloat:
+; CHECK: fcmle {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #{{0.0|0}}
    %tmp3 = fcmp ole <4 x float> %A, zeroinitializer
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
 
 define <2 x i64> @fcmolez2xdouble(<2 x double> %A) {
-;CHECK: fcmle {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
+; CHECK-LABEL: fcmolez2xdouble:
+; CHECK: fcmle {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #{{0.0|0}}
    %tmp3 = fcmp ole <2 x double> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
 }
 
 define <2 x i32> @fcmonez2xfloat(<2 x float> %A) {
+; CHECK-LABEL: fcmonez2xfloat:
 ; ONE with zero = OLT | OGT
-;CHECK: fcmgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
-;CHECK-NEXT: fcmlt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
-;CHECK-NEXT: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: fcmgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #{{0.0|0}}
+; CHECK-NEXT: fcmlt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #{{0.0|0}}
+; CHECK-NEXT: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    %tmp3 = fcmp one <2 x float> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @fcmonez4xfloat(<4 x float> %A) {
+; CHECK-LABEL: fcmonez4xfloat:
 ; ONE with zero = OLT | OGT
-;CHECK: fcmgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
-;CHECK-NEXT: fcmlt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
-;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: fcmgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #{{0.0|0}}
+; CHECK-NEXT: fcmlt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #{{0.0|0}}
+; CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp one <4 x float> %A, zeroinitializer
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
 define <2 x i64> @fcmonez2xdouble(<2 x double> %A) {
+; CHECK-LABEL: fcmonez2xdouble:
 ; ONE with zero = OLT | OGT
-;CHECK: fcmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
-;CHECK-NEXT: fcmlt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
-;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: fcmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #{{0.0|0}}
+; CHECK-NEXT: fcmlt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #{{0.0|0}}
+; CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp one <2 x double> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
 }
 
 define <2 x i32> @fcmordz2xfloat(<2 x float> %A) {
+; CHECK-LABEL: fcmordz2xfloat:
 ; ORD with zero = OLT | OGE
-;CHECK: fcmge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
-;CHECK-NEXT: fcmlt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
-;CHECK-NEXT: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: fcmge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #{{0.0|0}}
+; CHECK-NEXT: fcmlt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #{{0.0|0}}
+; CHECK-NEXT: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    %tmp3 = fcmp ord <2 x float> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @fcmordz4xfloat(<4 x float> %A) {
+; CHECK-LABEL: fcmordz4xfloat:
 ; ORD with zero = OLT | OGE
-;CHECK: fcmge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
-;CHECK-NEXT: fcmlt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
-;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: fcmge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #{{0.0|0}}
+; CHECK-NEXT: fcmlt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #{{0.0|0}}
+; CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ord <4 x float> %A, zeroinitializer
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
 define <2 x i64> @fcmordz2xdouble(<2 x double> %A) {
+; CHECK-LABEL: fcmordz2xdouble:
 ; ORD with zero = OLT | OGE
-;CHECK: fcmge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
-;CHECK-NEXT: fcmlt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
-;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: fcmge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #{{0.0|0}}
+; CHECK-NEXT: fcmlt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #{{0.0|0}}
+; CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ord <2 x double> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
 }
 
 define <2 x i32> @fcmueqz2xfloat(<2 x float> %A) {
+; CHECK-LABEL: fcmueqz2xfloat:
 ; UEQ with zero = !ONE = !(OLT |OGT)
-;CHECK: fcmgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
-;CHECK-NEXT: fcmlt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
-;CHECK-NEXT: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: fcmgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #{{0.0|0}}
+; CHECK-NEXT: fcmlt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #{{0.0|0}}
+; CHECK-NEXT: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    %tmp3 = fcmp ueq <2 x float> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @fcmueqz4xfloat(<4 x float> %A) {
+; CHECK-LABEL: fcmueqz4xfloat:
 ; UEQ with zero = !ONE = !(OLT |OGT)
-;CHECK: fcmgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
-;CHECK-NEXT: fcmlt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
-;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: fcmgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #{{0.0|0}}
+; CHECK-NEXT: fcmlt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #{{0.0|0}}
+; CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ueq <4 x float> %A, zeroinitializer
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
 
 define <2 x i64> @fcmueqz2xdouble(<2 x double> %A) {
+; CHECK-LABEL: fcmueqz2xdouble:
 ; UEQ with zero = !ONE = !(OLT |OGT)
-;CHECK: fcmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
-;CHECK-NEXT: fcmlt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
-;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: fcmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #{{0.0|0}}
+; CHECK-NEXT: fcmlt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #{{0.0|0}}
+; CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ueq <2 x double> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
 }
 
 define <2 x i32> @fcmugez2xfloat(<2 x float> %A) {
+; CHECK-LABEL: fcmugez2xfloat:
 ; UGE with zero = !OLT
-;CHECK: fcmlt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
-;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: fcmlt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #{{0.0|0}}
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    %tmp3 = fcmp uge <2 x float> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @fcmugez4xfloat(<4 x float> %A) {
+; CHECK-LABEL: fcmugez4xfloat:
 ; UGE with zero = !OLT
-;CHECK: fcmlt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
-;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: fcmlt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #{{0.0|0}}
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp uge <4 x float> %A, zeroinitializer
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
 define <2 x i64> @fcmugez2xdouble(<2 x double> %A) {
+; CHECK-LABEL: fcmugez2xdouble:
 ; UGE with zero = !OLT
-;CHECK: fcmlt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
-;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: fcmlt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #{{0.0|0}}
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp uge <2 x double> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
 }
 
 define <2 x i32> @fcmugtz2xfloat(<2 x float> %A) {
+; CHECK-LABEL: fcmugtz2xfloat:
 ; UGT with zero = !OLE
-;CHECK: fcmle {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
-;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: fcmle {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #{{0.0|0}}
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    %tmp3 = fcmp ugt <2 x float> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @fcmugtz4xfloat(<4 x float> %A) {
+; CHECK-LABEL: fcmugtz4xfloat:
 ; UGT with zero = !OLE
-;CHECK: fcmle {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
-;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: fcmle {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #{{0.0|0}}
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ugt <4 x float> %A, zeroinitializer
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
 define <2 x i64> @fcmugtz2xdouble(<2 x double> %A) {
+; CHECK-LABEL: fcmugtz2xdouble:
 ; UGT with zero = !OLE
-;CHECK: fcmle {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
-;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: fcmle {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #{{0.0|0}}
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ugt <2 x double> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
 }
 
 define <2 x i32> @fcmultz2xfloat(<2 x float> %A) {
+; CHECK-LABEL: fcmultz2xfloat:
 ; ULT with zero = !OGE
-;CHECK: fcmge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
-;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: fcmge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #{{0.0|0}}
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    %tmp3 = fcmp ult <2 x float> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @fcmultz4xfloat(<4 x float> %A) {
-;CHECK: fcmge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
-;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: fcmultz4xfloat:
+; CHECK: fcmge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #{{0.0|0}}
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ult <4 x float> %A, zeroinitializer
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
 
 define <2 x i64> @fcmultz2xdouble(<2 x double> %A) {
-;CHECK: fcmge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
-;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: fcmultz2xdouble:
+; CHECK: fcmge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #{{0.0|0}}
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ult <2 x double> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
@@ -1838,53 +2060,59 @@ define <2 x i64> @fcmultz2xdouble(<2 x double> %A) {
 
 
 define <2 x i32> @fcmulez2xfloat(<2 x float> %A) {
+; CHECK-LABEL: fcmulez2xfloat:
 ; ULE with zero = !OGT
-;CHECK: fcmgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
-;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: fcmgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #{{0.0|0}}
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    %tmp3 = fcmp ule <2 x float> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @fcmulez4xfloat(<4 x float> %A) {
+; CHECK-LABEL: fcmulez4xfloat:
 ; ULE with zero = !OGT
-;CHECK: fcmgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
-;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: fcmgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #{{0.0|0}}
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ule <4 x float> %A, zeroinitializer
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
 
 define <2 x i64> @fcmulez2xdouble(<2 x double> %A) {
+; CHECK-LABEL: fcmulez2xdouble:
 ; ULE with zero = !OGT
-;CHECK: fcmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
-;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: fcmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #{{0.0|0}}
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp ule <2 x double> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
 }
 
 define <2 x i32> @fcmunez2xfloat(<2 x float> %A) {
+; CHECK-LABEL: fcmunez2xfloat:
 ; UNE with zero = !OEQ with zero
-;CHECK: fcmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
-;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: fcmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #{{0.0|0}}
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    %tmp3 = fcmp une <2 x float> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @fcmunez4xfloat(<4 x float> %A) {
+; CHECK-LABEL: fcmunez4xfloat:
 ; UNE with zero = !OEQ with zero
-;CHECK: fcmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
-;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: fcmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #{{0.0|0}}
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp une <4 x float> %A, zeroinitializer
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
 define <2 x i64> @fcmunez2xdouble(<2 x double> %A) {
+; CHECK-LABEL: fcmunez2xdouble:
 ; UNE with zero = !OEQ with zero
-;CHECK: fcmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
-;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: fcmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #{{0.0|0}}
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp une <2 x double> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
@@ -1892,33 +2120,36 @@ define <2 x i64> @fcmunez2xdouble(<2 x double> %A) {
 
 
 define <2 x i32> @fcmunoz2xfloat(<2 x float> %A) {
+; CHECK-LABEL: fcmunoz2xfloat:
 ; UNO with zero = !ORD = !(OLT | OGE)
-;CHECK: fcmge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
-;CHECK-NEXT: fcmlt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0.0
-;CHECK-NEXT: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK: fcmge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #{{0.0|0}}
+; CHECK-NEXT: fcmlt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #{{0.0|0}}
+; CHECK-NEXT: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    %tmp3 = fcmp uno <2 x float> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @fcmunoz4xfloat(<4 x float> %A) {
+; CHECK-LABEL: fcmunoz4xfloat:
 ; UNO with zero = !ORD = !(OLT | OGE)
-;CHECK: fcmge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
-;CHECK-NEXT: fcmlt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0.0
-;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: fcmge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #{{0.0|0}}
+; CHECK-NEXT: fcmlt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #{{0.0|0}}
+; CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp uno <4 x float> %A, zeroinitializer
    %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
 
 define <2 x i64> @fcmunoz2xdouble(<2 x double> %A) {
+; CHECK-LABEL: fcmunoz2xdouble:
 ; UNO with zero = !ORD = !(OLT | OGE)
-;CHECK: fcmge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
-;CHECK-NEXT: fcmlt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0.0
-;CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK: fcmge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #{{0.0|0}}
+; CHECK-NEXT: fcmlt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #{{0.0|0}}
+; CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
    %tmp3 = fcmp uno <2 x double> %A, zeroinitializer
    %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
diff --git a/test/CodeGen/AArch64/neon-copy.ll b/test/CodeGen/AArch64/neon-copy.ll
deleted file mode 100644
index b4d55df..0000000
--- a/test/CodeGen/AArch64/neon-copy.ll
+++ /dev/null
@@ -1,1402 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
-
-
-define <16 x i8> @ins16bw(<16 x i8> %tmp1, i8 %tmp2) {
-;CHECK: ins {{v[0-9]+}}.b[15], {{w[0-9]+}}
-  %tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 15
-  ret <16 x i8> %tmp3
-}
-
-define <8 x i16> @ins8hw(<8 x i16> %tmp1, i16 %tmp2) {
-;CHECK: ins {{v[0-9]+}}.h[6], {{w[0-9]+}}
-  %tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 6
-  ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @ins4sw(<4 x i32> %tmp1, i32 %tmp2) {
-;CHECK: ins {{v[0-9]+}}.s[2], {{w[0-9]+}}
-  %tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 2
-  ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @ins2dw(<2 x i64> %tmp1, i64 %tmp2) {
-;CHECK: ins {{v[0-9]+}}.d[1], {{x[0-9]+}}
-  %tmp3 = insertelement <2 x i64> %tmp1, i64 %tmp2, i32 1
-  ret <2 x i64> %tmp3
-}
-
-define <8 x i8> @ins8bw(<8 x i8> %tmp1, i8 %tmp2) {
-;CHECK: ins {{v[0-9]+}}.b[5], {{w[0-9]+}}
-  %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 5
-  ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @ins4hw(<4 x i16> %tmp1, i16 %tmp2) {
-;CHECK: ins {{v[0-9]+}}.h[3], {{w[0-9]+}}
-  %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 3
-  ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @ins2sw(<2 x i32> %tmp1, i32 %tmp2) {
-;CHECK: ins {{v[0-9]+}}.s[1], {{w[0-9]+}}
-  %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
-  ret <2 x i32> %tmp3
-}
-
-define <16 x i8> @ins16b16(<16 x i8> %tmp1, <16 x i8> %tmp2) {
-;CHECK: ins {{v[0-9]+}}.b[15], {{v[0-9]+}}.b[2]
-  %tmp3 = extractelement <16 x i8> %tmp1, i32 2
-  %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15
-  ret <16 x i8> %tmp4
-}
-
-define <8 x i16> @ins8h8(<8 x i16> %tmp1, <8 x i16> %tmp2) {
-;CHECK: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[2]
-  %tmp3 = extractelement <8 x i16> %tmp1, i32 2
-  %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7
-  ret <8 x i16> %tmp4
-}
-
-define <4 x i32> @ins4s4(<4 x i32> %tmp1, <4 x i32> %tmp2) {
-;CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
-  %tmp3 = extractelement <4 x i32> %tmp1, i32 2
-  %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1
-  ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @ins2d2(<2 x i64> %tmp1, <2 x i64> %tmp2) {
-;CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-  %tmp3 = extractelement <2 x i64> %tmp1, i32 0
-  %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1
-  ret <2 x i64> %tmp4
-}
-
-define <4 x float> @ins4f4(<4 x float> %tmp1, <4 x float> %tmp2) {
-;CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
-  %tmp3 = extractelement <4 x float> %tmp1, i32 2
-  %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1
-  ret <4 x float> %tmp4
-}
-
-define <2 x double> @ins2df2(<2 x double> %tmp1, <2 x double> %tmp2) {
-;CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-  %tmp3 = extractelement <2 x double> %tmp1, i32 0
-  %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1
-  ret <2 x double> %tmp4
-}
-
-define <16 x i8> @ins8b16(<8 x i8> %tmp1, <16 x i8> %tmp2) {
-;CHECK: ins {{v[0-9]+}}.b[15], {{v[0-9]+}}.b[2]
-  %tmp3 = extractelement <8 x i8> %tmp1, i32 2
-  %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15
-  ret <16 x i8> %tmp4
-}
-
-define <8 x i16> @ins4h8(<4 x i16> %tmp1, <8 x i16> %tmp2) {
-;CHECK: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[2]
-  %tmp3 = extractelement <4 x i16> %tmp1, i32 2
-  %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7
-  ret <8 x i16> %tmp4
-}
-
-define <4 x i32> @ins2s4(<2 x i32> %tmp1, <4 x i32> %tmp2) {
-;CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[1]
-  %tmp3 = extractelement <2 x i32> %tmp1, i32 1
-  %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1
-  ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @ins1d2(<1 x i64> %tmp1, <2 x i64> %tmp2) {
-;CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-  %tmp3 = extractelement <1 x i64> %tmp1, i32 0
-  %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1
-  ret <2 x i64> %tmp4
-}
-
-define <4 x float> @ins2f4(<2 x float> %tmp1, <4 x float> %tmp2) {
-;CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[1]
-  %tmp3 = extractelement <2 x float> %tmp1, i32 1
-  %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1
-  ret <4 x float> %tmp4
-}
-
-define <2 x double> @ins1f2(<1 x double> %tmp1, <2 x double> %tmp2) {
-;CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-  %tmp3 = extractelement <1 x double> %tmp1, i32 0
-  %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1
-  ret <2 x double> %tmp4
-}
-
-define <8 x i8> @ins16b8(<16 x i8> %tmp1, <8 x i8> %tmp2) {
-;CHECK: ins {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[2]
-  %tmp3 = extractelement <16 x i8> %tmp1, i32 2
-  %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 7
-  ret <8 x i8> %tmp4
-}
-
-define <4 x i16> @ins8h4(<8 x i16> %tmp1, <4 x i16> %tmp2) {
-;CHECK: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[2]
-  %tmp3 = extractelement <8 x i16> %tmp1, i32 2
-  %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3
-  ret <4 x i16> %tmp4
-}
-
-define <2 x i32> @ins4s2(<4 x i32> %tmp1, <2 x i32> %tmp2) {
-;CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
-  %tmp3 = extractelement <4 x i32> %tmp1, i32 2
-  %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1
-  ret <2 x i32> %tmp4
-}
-
-define <1 x i64> @ins2d1(<2 x i64> %tmp1, <1 x i64> %tmp2) {
-;CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0]
-  %tmp3 = extractelement <2 x i64> %tmp1, i32 0
-  %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
-  ret <1 x i64> %tmp4
-}
-
-define <2 x float> @ins4f2(<4 x float> %tmp1, <2 x float> %tmp2) {
-;CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
-  %tmp3 = extractelement <4 x float> %tmp1, i32 2
-  %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1
-  ret <2 x float> %tmp4
-}
-
-define <1 x double> @ins2f1(<2 x double> %tmp1, <1 x double> %tmp2) {
-;CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0]
-  %tmp3 = extractelement <2 x double> %tmp1, i32 0
-  %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0
-  ret <1 x double> %tmp4
-}
-
-define <8 x i8> @ins8b8(<8 x i8> %tmp1, <8 x i8> %tmp2) {
-;CHECK: ins {{v[0-9]+}}.b[4], {{v[0-9]+}}.b[2]
-  %tmp3 = extractelement <8 x i8> %tmp1, i32 2
-  %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 4
-  ret <8 x i8> %tmp4
-}
-
-define <4 x i16> @ins4h4(<4 x i16> %tmp1, <4 x i16> %tmp2) {
-;CHECK: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[2]
-  %tmp3 = extractelement <4 x i16> %tmp1, i32 2
-  %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3
-  ret <4 x i16> %tmp4
-}
-
-define <2 x i32> @ins2s2(<2 x i32> %tmp1, <2 x i32> %tmp2) {
-;CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
-  %tmp3 = extractelement <2 x i32> %tmp1, i32 0
-  %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1
-  ret <2 x i32> %tmp4
-}
-
-define <1 x i64> @ins1d1(<1 x i64> %tmp1, <1 x i64> %tmp2) {
-;CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0]
-  %tmp3 = extractelement <1 x i64> %tmp1, i32 0
-  %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
-  ret <1 x i64> %tmp4
-}
-
-define <2 x float> @ins2f2(<2 x float> %tmp1, <2 x float> %tmp2) {
-;CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
-  %tmp3 = extractelement <2 x float> %tmp1, i32 0
-  %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1
-  ret <2 x float> %tmp4
-}
-
-define <1 x double> @ins1df1(<1 x double> %tmp1, <1 x double> %tmp2) {
-;CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0]
-  %tmp3 = extractelement <1 x double> %tmp1, i32 0
-  %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0
-  ret <1 x double> %tmp4
-}
-
-define i32 @umovw16b(<16 x i8> %tmp1) {
-;CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[8]
-  %tmp3 = extractelement <16 x i8> %tmp1, i32 8
-  %tmp4 = zext i8 %tmp3 to i32
-  ret i32 %tmp4
-}
-
-define i32 @umovw8h(<8 x i16> %tmp1) {
-;CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
-  %tmp3 = extractelement <8 x i16> %tmp1, i32 2
-  %tmp4 = zext i16 %tmp3 to i32
-  ret i32 %tmp4
-}
-
-define i32 @umovw4s(<4 x i32> %tmp1) {
-;CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.s[2]
-  %tmp3 = extractelement <4 x i32> %tmp1, i32 2
-  ret i32 %tmp3
-}
-
-define i64 @umovx2d(<2 x i64> %tmp1) {
-;CHECK: umov {{x[0-9]+}}, {{v[0-9]+}}.d[0]
-  %tmp3 = extractelement <2 x i64> %tmp1, i32 0
-  ret i64 %tmp3
-}
-
-define i32 @umovw8b(<8 x i8> %tmp1) {
-;CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[7]
-  %tmp3 = extractelement <8 x i8> %tmp1, i32 7
-  %tmp4 = zext i8 %tmp3 to i32
-  ret i32 %tmp4
-}
-
-define i32 @umovw4h(<4 x i16> %tmp1) {
-;CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
-  %tmp3 = extractelement <4 x i16> %tmp1, i32 2
-  %tmp4 = zext i16 %tmp3 to i32
-  ret i32 %tmp4
-}
-
-define i32 @umovw2s(<2 x i32> %tmp1) {
-;CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.s[1]
-  %tmp3 = extractelement <2 x i32> %tmp1, i32 1
-  ret i32 %tmp3
-}
-
-define i64 @umovx1d(<1 x i64> %tmp1) {
-;CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
-  %tmp3 = extractelement <1 x i64> %tmp1, i32 0
-  ret i64 %tmp3
-}
-
-define i32 @smovw16b(<16 x i8> %tmp1) {
-;CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[8]
-  %tmp3 = extractelement <16 x i8> %tmp1, i32 8
-  %tmp4 = sext i8 %tmp3 to i32
-  %tmp5 = add i32 5, %tmp4
-  ret i32 %tmp5
-}
-
-define i32 @smovw8h(<8 x i16> %tmp1) {
-;CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
-  %tmp3 = extractelement <8 x i16> %tmp1, i32 2
-  %tmp4 = sext i16 %tmp3 to i32
-  %tmp5 = add i32 5, %tmp4
-  ret i32 %tmp5
-}
-
-define i32 @smovx16b(<16 x i8> %tmp1) {
-;CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.b[8]
-  %tmp3 = extractelement <16 x i8> %tmp1, i32 8
-  %tmp4 = sext i8 %tmp3 to i32
-  ret i32 %tmp4
-}
-
-define i32 @smovx8h(<8 x i16> %tmp1) {
-;CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.h[2]
-  %tmp3 = extractelement <8 x i16> %tmp1, i32 2
-  %tmp4 = sext i16 %tmp3 to i32
-  ret i32 %tmp4
-}
-
-define i64 @smovx4s(<4 x i32> %tmp1) {
-;CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[2]
-  %tmp3 = extractelement <4 x i32> %tmp1, i32 2
-  %tmp4 = sext i32 %tmp3 to i64
-  ret i64 %tmp4
-}
-
-define i32 @smovw8b(<8 x i8> %tmp1) {
-;CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[4]
-  %tmp3 = extractelement <8 x i8> %tmp1, i32 4
-  %tmp4 = sext i8 %tmp3 to i32
-  %tmp5 = add i32 5, %tmp4
-  ret i32 %tmp5
-}
-
-define i32 @smovw4h(<4 x i16> %tmp1) {
-;CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
-  %tmp3 = extractelement <4 x i16> %tmp1, i32 2
-  %tmp4 = sext i16 %tmp3 to i32
-  %tmp5 = add i32 5, %tmp4
-  ret i32 %tmp5
-}
-
-define i32 @smovx8b(<8 x i8> %tmp1) {
-;CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.b[6]
-  %tmp3 = extractelement <8 x i8> %tmp1, i32 6
-  %tmp4 = sext i8 %tmp3 to i32
-  ret i32 %tmp4
-}
-
-define i32 @smovx4h(<4 x i16> %tmp1) {
-;CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.h[2]
-  %tmp3 = extractelement <4 x i16> %tmp1, i32 2
-  %tmp4 = sext i16 %tmp3 to i32
-  ret i32 %tmp4
-}
-
-define i64 @smovx2s(<2 x i32> %tmp1) {
-;CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[1]
-  %tmp3 = extractelement <2 x i32> %tmp1, i32 1
-  %tmp4 = sext i32 %tmp3 to i64
-  ret i64 %tmp4
-}
-
-define <8 x i8> @test_vcopy_lane_s8(<8 x i8> %v1, <8 x i8> %v2) {
-;CHECK: ins  {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
-  %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 11, i32 6, i32 7>
-  ret <8 x i8> %vset_lane
-}
-
-define <16 x i8> @test_vcopyq_laneq_s8(<16 x i8> %v1, <16 x i8> %v2) {
-;CHECK: ins  {{v[0-9]+}}.b[14], {{v[0-9]+}}.b[6]
-  %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 22, i32 15>
-  ret <16 x i8> %vset_lane
-}
-
-define <8 x i8> @test_vcopy_lane_swap_s8(<8 x i8> %v1, <8 x i8> %v2) {
-;CHECK: ins {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[0]
-  %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0>
-  ret <8 x i8> %vset_lane
-}
-
-define <16 x i8> @test_vcopyq_laneq_swap_s8(<16 x i8> %v1, <16 x i8> %v2) {
-;CHECK: ins {{v[0-9]+}}.b[0], {{v[0-9]+}}.b[15]
-  %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 15, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
-  ret <16 x i8> %vset_lane
-}
-
-define <8 x i8> @test_vdup_n_u8(i8 %v1) #0 {
-;CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
-  %vecinit.i = insertelement <8 x i8> undef, i8 %v1, i32 0
-  %vecinit1.i = insertelement <8 x i8> %vecinit.i, i8 %v1, i32 1
-  %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 %v1, i32 2
-  %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 %v1, i32 3
-  %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 %v1, i32 4
-  %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 %v1, i32 5
-  %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 %v1, i32 6
-  %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 %v1, i32 7
-  ret <8 x i8> %vecinit7.i
-}
-
-define <4 x i16> @test_vdup_n_u16(i16 %v1) #0 {
-;CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}}
-  %vecinit.i = insertelement <4 x i16> undef, i16 %v1, i32 0
-  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %v1, i32 1
-  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %v1, i32 2
-  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %v1, i32 3
-  ret <4 x i16> %vecinit3.i
-}
-
-define <2 x i32> @test_vdup_n_u32(i32 %v1) #0 {
-;CHECK: dup {{v[0-9]+}}.2s, {{w[0-9]+}}
-  %vecinit.i = insertelement <2 x i32> undef, i32 %v1, i32 0
-  %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %v1, i32 1
-  ret <2 x i32> %vecinit1.i
-}
-
-define <1 x i64> @test_vdup_n_u64(i64 %v1) #0 {
-;CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
-  %vecinit.i = insertelement <1 x i64> undef, i64 %v1, i32 0
-  ret <1 x i64> %vecinit.i
-}
-
-define <16 x i8> @test_vdupq_n_u8(i8 %v1) #0 {
-;CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}}
-  %vecinit.i = insertelement <16 x i8> undef, i8 %v1, i32 0
-  %vecinit1.i = insertelement <16 x i8> %vecinit.i, i8 %v1, i32 1
-  %vecinit2.i = insertelement <16 x i8> %vecinit1.i, i8 %v1, i32 2
-  %vecinit3.i = insertelement <16 x i8> %vecinit2.i, i8 %v1, i32 3
-  %vecinit4.i = insertelement <16 x i8> %vecinit3.i, i8 %v1, i32 4
-  %vecinit5.i = insertelement <16 x i8> %vecinit4.i, i8 %v1, i32 5
-  %vecinit6.i = insertelement <16 x i8> %vecinit5.i, i8 %v1, i32 6
-  %vecinit7.i = insertelement <16 x i8> %vecinit6.i, i8 %v1, i32 7
-  %vecinit8.i = insertelement <16 x i8> %vecinit7.i, i8 %v1, i32 8
-  %vecinit9.i = insertelement <16 x i8> %vecinit8.i, i8 %v1, i32 9
-  %vecinit10.i = insertelement <16 x i8> %vecinit9.i, i8 %v1, i32 10
-  %vecinit11.i = insertelement <16 x i8> %vecinit10.i, i8 %v1, i32 11
-  %vecinit12.i = insertelement <16 x i8> %vecinit11.i, i8 %v1, i32 12
-  %vecinit13.i = insertelement <16 x i8> %vecinit12.i, i8 %v1, i32 13
-  %vecinit14.i = insertelement <16 x i8> %vecinit13.i, i8 %v1, i32 14
-  %vecinit15.i = insertelement <16 x i8> %vecinit14.i, i8 %v1, i32 15
-  ret <16 x i8> %vecinit15.i
-}
-
-define <8 x i16> @test_vdupq_n_u16(i16 %v1) #0 {
-;CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
-  %vecinit.i = insertelement <8 x i16> undef, i16 %v1, i32 0
-  %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %v1, i32 1
-  %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %v1, i32 2
-  %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %v1, i32 3
-  %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %v1, i32 4
-  %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %v1, i32 5
-  %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %v1, i32 6
-  %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %v1, i32 7
-  ret <8 x i16> %vecinit7.i
-}
-
-define <4 x i32> @test_vdupq_n_u32(i32 %v1) #0 {
-;CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}}
-  %vecinit.i = insertelement <4 x i32> undef, i32 %v1, i32 0
-  %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %v1, i32 1
-  %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %v1, i32 2
-  %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %v1, i32 3
-  ret <4 x i32> %vecinit3.i
-}
-
-define <2 x i64> @test_vdupq_n_u64(i64 %v1) #0 {
-;CHECK: dup {{v[0-9]+}}.2d, {{x[0-9]+}}
-  %vecinit.i = insertelement <2 x i64> undef, i64 %v1, i32 0
-  %vecinit1.i = insertelement <2 x i64> %vecinit.i, i64 %v1, i32 1
-  ret <2 x i64> %vecinit1.i
-}
-
-define <8 x i8> @test_vdup_lane_s8(<8 x i8> %v1) #0 {
-;CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5]
-  %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
-  ret <8 x i8> %shuffle
-}
-
-define <4 x i16> @test_vdup_lane_s16(<4 x i16> %v1) #0 {
-;CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
-  %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
-  ret <4 x i16> %shuffle
-}
-
-define <2 x i32> @test_vdup_lane_s32(<2 x i32> %v1) #0 {
-;CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-  %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  ret <2 x i32> %shuffle
-}
-
-define <16 x i8> @test_vdupq_lane_s8(<8 x i8> %v1) #0 {
-;CHECK: {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5]
-  %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
-  ret <16 x i8> %shuffle
-}
-
-define <8 x i16> @test_vdupq_lane_s16(<4 x i16> %v1) #0 {
-;CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
-  %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-  ret <8 x i16> %shuffle
-}
-
-define <4 x i32> @test_vdupq_lane_s32(<2 x i32> %v1) #0 {
-;CHECK: {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-  %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  ret <4 x i32> %shuffle
-}
-
-define <2 x i64> @test_vdupq_lane_s64(<1 x i64> %v1) #0 {
-;CHECK: {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
-  %shuffle = shufflevector <1 x i64> %v1, <1 x i64> undef, <2 x i32> zeroinitializer
-  ret <2 x i64> %shuffle
-}
-
-define <8 x i8> @test_vdup_laneq_s8(<16 x i8> %v1) #0 {
-;CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5]
-  %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
-  ret <8 x i8> %shuffle
-}
-
-define <4 x i16> @test_vdup_laneq_s16(<8 x i16> %v1) #0 {
-;CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
-  %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
-  ret <4 x i16> %shuffle
-}
-
-define <2 x i32> @test_vdup_laneq_s32(<4 x i32> %v1) #0 {
-;CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
-  %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
-  ret <2 x i32> %shuffle
-}
-
-define <16 x i8> @test_vdupq_laneq_s8(<16 x i8> %v1) #0 {
-;CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5]
-  %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
-  ret <16 x i8> %shuffle
-}
-
-define <8 x i16> @test_vdupq_laneq_s16(<8 x i16> %v1) #0 {
-;CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
-  %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-  ret <8 x i16> %shuffle
-}
-
-define <4 x i32> @test_vdupq_laneq_s32(<4 x i32> %v1) #0 {
-;CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
-  %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  ret <4 x i32> %shuffle
-}
-
-define <2 x i64> @test_vdupq_laneq_s64(<2 x i64> %v1) #0 {
-;CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
-  %shuffle = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
-  ret <2 x i64> %shuffle
-}
-
-define i64 @test_bitcastv8i8toi64(<8 x i8> %in) {
-; CHECK-LABEL: test_bitcastv8i8toi64:
-   %res = bitcast <8 x i8> %in to i64
-; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
-   ret i64 %res
-}
-
-define i64 @test_bitcastv4i16toi64(<4 x i16> %in) {
-; CHECK-LABEL: test_bitcastv4i16toi64:
-   %res = bitcast <4 x i16> %in to i64
-; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
-   ret i64 %res
-}
-
-define i64 @test_bitcastv2i32toi64(<2 x i32> %in) {
-; CHECK-LABEL: test_bitcastv2i32toi64:
-   %res = bitcast <2 x i32> %in to i64
-; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
-   ret i64 %res
-}
-
-define i64 @test_bitcastv2f32toi64(<2 x float> %in) {
-; CHECK-LABEL: test_bitcastv2f32toi64:
-   %res = bitcast <2 x float> %in to i64
-; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
-   ret i64 %res
-}
-
-define i64 @test_bitcastv1i64toi64(<1 x i64> %in) {
-; CHECK-LABEL: test_bitcastv1i64toi64:
-   %res = bitcast <1 x i64> %in to i64
-; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
-   ret i64 %res
-}
-
-define i64 @test_bitcastv1f64toi64(<1 x double> %in) {
-; CHECK-LABEL: test_bitcastv1f64toi64:
-   %res = bitcast <1 x double> %in to i64
-; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
-   ret i64 %res
-}
-
-define <8 x i8> @test_bitcasti64tov8i8(i64 %in) {
-; CHECK-LABEL: test_bitcasti64tov8i8:
-   %res = bitcast i64 %in to <8 x i8>
-; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
-   ret <8 x i8> %res
-}
-
-define <4 x i16> @test_bitcasti64tov4i16(i64 %in) {
-; CHECK-LABEL: test_bitcasti64tov4i16:
-   %res = bitcast i64 %in to <4 x i16>
-; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
-   ret <4 x i16> %res
-}
-
-define <2 x i32> @test_bitcasti64tov2i32(i64 %in) {
-; CHECK-LABEL: test_bitcasti64tov2i32:
-   %res = bitcast i64 %in to <2 x i32>
-; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
-   ret <2 x i32> %res
-}
-
-define <2 x float> @test_bitcasti64tov2f32(i64 %in) {
-; CHECK-LABEL: test_bitcasti64tov2f32:
-   %res = bitcast i64 %in to <2 x float>
-; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
-   ret <2 x float> %res
-}
-
-define <1 x i64> @test_bitcasti64tov1i64(i64 %in) {
-; CHECK-LABEL: test_bitcasti64tov1i64:
-   %res = bitcast i64 %in to <1 x i64>
-; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
-   ret <1 x i64> %res
-}
-
-define <1 x double> @test_bitcasti64tov1f64(i64 %in) {
-; CHECK-LABEL: test_bitcasti64tov1f64:
-   %res = bitcast i64 %in to <1 x double>
-; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
-   ret <1 x double> %res
-}
-
-define <1 x i64> @test_bitcastv8i8tov1f64(<8 x i8> %a) #0 {
-; CHECK-LABEL: test_bitcastv8i8tov1f64:
-; CHECK: neg {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-; CHECK-NEXT: fcvtzs {{d[0-9]+}}, {{d[0-9]+}}
-  %sub.i = sub <8 x i8> zeroinitializer, %a
-  %1 = bitcast <8 x i8> %sub.i to <1 x double>
-  %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
-  ret <1 x i64> %vcvt.i
-}
-
-define <1 x i64> @test_bitcastv4i16tov1f64(<4 x i16> %a) #0 {
-; CHECK-LABEL: test_bitcastv4i16tov1f64:
-; CHECK: neg {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-; CHECK-NEXT: fcvtzs {{d[0-9]+}}, {{d[0-9]+}}
-  %sub.i = sub <4 x i16> zeroinitializer, %a
-  %1 = bitcast <4 x i16> %sub.i to <1 x double>
-  %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
-  ret <1 x i64> %vcvt.i
-}
-
-define <1 x i64> @test_bitcastv2i32tov1f64(<2 x i32> %a) #0 {
-; CHECK-LABEL: test_bitcastv2i32tov1f64:
-; CHECK: neg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-; CHECK-NEXT: fcvtzs {{d[0-9]+}}, {{d[0-9]+}}
-  %sub.i = sub <2 x i32> zeroinitializer, %a
-  %1 = bitcast <2 x i32> %sub.i to <1 x double>
-  %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
-  ret <1 x i64> %vcvt.i
-}
-
-define <1 x i64> @test_bitcastv1i64tov1f64(<1 x i64> %a) #0 {
-; CHECK-LABEL: test_bitcastv1i64tov1f64:
-; CHECK: neg {{d[0-9]+}}, {{d[0-9]+}}
-; CHECK-NEXT: fcvtzs {{d[0-9]+}}, {{d[0-9]+}}
-  %sub.i = sub <1 x i64> zeroinitializer, %a
-  %1 = bitcast <1 x i64> %sub.i to <1 x double>
-  %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
-  ret <1 x i64> %vcvt.i
-}
-
-define <1 x i64> @test_bitcastv2f32tov1f64(<2 x float> %a) #0 {
-; CHECK-LABEL: test_bitcastv2f32tov1f64:
-; CHECK: fneg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-; CHECK-NEXT: fcvtzs {{d[0-9]+}}, {{d[0-9]+}}
-  %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %a
-  %1 = bitcast <2 x float> %sub.i to <1 x double>
-  %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
-  ret <1 x i64> %vcvt.i
-}
-
-define <8 x i8> @test_bitcastv1f64tov8i8(<1 x i64> %a) #0 {
-; CHECK-LABEL: test_bitcastv1f64tov8i8:
-; CHECK: scvtf {{d[0-9]+}}, {{d[0-9]+}}
-; CHECK-NEXT: neg {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-  %vcvt.i = sitofp <1 x i64> %a to <1 x double>
-  %1 = bitcast <1 x double> %vcvt.i to <8 x i8>
-  %sub.i = sub <8 x i8> zeroinitializer, %1
-  ret <8 x i8> %sub.i
-}
-
-define <4 x i16> @test_bitcastv1f64tov4i16(<1 x i64> %a) #0 {
-; CHECK-LABEL: test_bitcastv1f64tov4i16:
-; CHECK: scvtf {{d[0-9]+}}, {{d[0-9]+}}
-; CHECK-NEXT: neg {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-  %vcvt.i = sitofp <1 x i64> %a to <1 x double>
-  %1 = bitcast <1 x double> %vcvt.i to <4 x i16>
-  %sub.i = sub <4 x i16> zeroinitializer, %1
-  ret <4 x i16> %sub.i
-}
-
-define <2 x i32> @test_bitcastv1f64tov2i32(<1 x i64> %a) #0 {
-; CHECK-LABEL: test_bitcastv1f64tov2i32:
-; CHECK: scvtf {{d[0-9]+}}, {{d[0-9]+}}
-; CHECK-NEXT: neg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-  %vcvt.i = sitofp <1 x i64> %a to <1 x double>
-  %1 = bitcast <1 x double> %vcvt.i to <2 x i32>
-  %sub.i = sub <2 x i32> zeroinitializer, %1
-  ret <2 x i32> %sub.i
-}
-
-define <1 x i64> @test_bitcastv1f64tov1i64(<1 x i64> %a) #0 {
-; CHECK-LABEL: test_bitcastv1f64tov1i64:
-; CHECK: scvtf {{d[0-9]+}}, {{d[0-9]+}}
-; CHECK-NEXT: neg {{d[0-9]+}}, {{d[0-9]+}}
-  %vcvt.i = sitofp <1 x i64> %a to <1 x double>
-  %1 = bitcast <1 x double> %vcvt.i to <1 x i64>
-  %sub.i = sub <1 x i64> zeroinitializer, %1
-  ret <1 x i64> %sub.i
-}
-
-define <2 x float> @test_bitcastv1f64tov2f32(<1 x i64> %a) #0 {
-; CHECK-LABEL: test_bitcastv1f64tov2f32:
-; CHECK: scvtf {{d[0-9]+}}, {{d[0-9]+}}
-; CHECK-NEXT: fneg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-  %vcvt.i = sitofp <1 x i64> %a to <1 x double>
-  %1 = bitcast <1 x double> %vcvt.i to <2 x float>
-  %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %1
-  ret <2 x float> %sub.i
-}
-
-; Test insert element into an undef vector
-define <8 x i8> @scalar_to_vector.v8i8(i8 %a) {
-; CHECK-LABEL: scalar_to_vector.v8i8:
-; CHECK: ins {{v[0-9]+}}.b[0], {{w[0-9]+}}
-  %b = insertelement <8 x i8> undef, i8 %a, i32 0
-  ret <8 x i8> %b
-}
-
-define <16 x i8> @scalar_to_vector.v16i8(i8 %a) {
-; CHECK-LABEL: scalar_to_vector.v16i8:
-; CHECK: ins {{v[0-9]+}}.b[0], {{w[0-9]+}}
-  %b = insertelement <16 x i8> undef, i8 %a, i32 0
-  ret <16 x i8> %b
-}
-
-define <4 x i16> @scalar_to_vector.v4i16(i16 %a) {
-; CHECK-LABEL: scalar_to_vector.v4i16:
-; CHECK: ins {{v[0-9]+}}.h[0], {{w[0-9]+}}
-  %b = insertelement <4 x i16> undef, i16 %a, i32 0
-  ret <4 x i16> %b
-}
-
-define <8 x i16> @scalar_to_vector.v8i16(i16 %a) {
-; CHECK-LABEL: scalar_to_vector.v8i16:
-; CHECK: ins {{v[0-9]+}}.h[0], {{w[0-9]+}}
-  %b = insertelement <8 x i16> undef, i16 %a, i32 0
-  ret <8 x i16> %b
-}
-
-define <2 x i32> @scalar_to_vector.v2i32(i32 %a) {
-; CHECK-LABEL: scalar_to_vector.v2i32:
-; CHECK: ins {{v[0-9]+}}.s[0], {{w[0-9]+}}
-  %b = insertelement <2 x i32> undef, i32 %a, i32 0
-  ret <2 x i32> %b
-}
-
-define <4 x i32> @scalar_to_vector.v4i32(i32 %a) {
-; CHECK-LABEL: scalar_to_vector.v4i32:
-; CHECK: ins {{v[0-9]+}}.s[0], {{w[0-9]+}}
-  %b = insertelement <4 x i32> undef, i32 %a, i32 0
-  ret <4 x i32> %b
-}
-
-define <2 x i64> @scalar_to_vector.v2i64(i64 %a) {
-; CHECK-LABEL: scalar_to_vector.v2i64:
-; CHECK: ins {{v[0-9]+}}.d[0], {{x[0-9]+}}
-  %b = insertelement <2 x i64> undef, i64 %a, i32 0
-  ret <2 x i64> %b
-}
-
-define <8 x i8> @testDUP.v1i8(<1 x i8> %a) {
-; CHECK-LABEL: testDUP.v1i8:
-; CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
-  %b = extractelement <1 x i8> %a, i32 0
-  %c = insertelement <8 x i8> undef, i8 %b, i32 0
-  %d = insertelement <8 x i8> %c, i8 %b, i32 1
-  %e = insertelement <8 x i8> %d, i8 %b, i32 2
-  %f = insertelement <8 x i8> %e, i8 %b, i32 3
-  %g = insertelement <8 x i8> %f, i8 %b, i32 4
-  %h = insertelement <8 x i8> %g, i8 %b, i32 5
-  %i = insertelement <8 x i8> %h, i8 %b, i32 6
-  %j = insertelement <8 x i8> %i, i8 %b, i32 7
-  ret <8 x i8> %j
-}
-
-define <8 x i16> @testDUP.v1i16(<1 x i16> %a) {
-; CHECK-LABEL: testDUP.v1i16:
-; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
-  %b = extractelement <1 x i16> %a, i32 0
-  %c = insertelement <8 x i16> undef, i16 %b, i32 0
-  %d = insertelement <8 x i16> %c, i16 %b, i32 1
-  %e = insertelement <8 x i16> %d, i16 %b, i32 2
-  %f = insertelement <8 x i16> %e, i16 %b, i32 3
-  %g = insertelement <8 x i16> %f, i16 %b, i32 4
-  %h = insertelement <8 x i16> %g, i16 %b, i32 5
-  %i = insertelement <8 x i16> %h, i16 %b, i32 6
-  %j = insertelement <8 x i16> %i, i16 %b, i32 7
-  ret <8 x i16> %j
-}
-
-define <4 x i32> @testDUP.v1i32(<1 x i32> %a) {
-; CHECK-LABEL: testDUP.v1i32:
-; CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}}
-  %b = extractelement <1 x i32> %a, i32 0
-  %c = insertelement <4 x i32> undef, i32 %b, i32 0
-  %d = insertelement <4 x i32> %c, i32 %b, i32 1
-  %e = insertelement <4 x i32> %d, i32 %b, i32 2
-  %f = insertelement <4 x i32> %e, i32 %b, i32 3
-  ret <4 x i32> %f
-}
-
-define <8 x i8> @getl(<16 x i8> %x) #0 {
-; CHECK-LABEL: getl:
-; CHECK: ret
-  %vecext = extractelement <16 x i8> %x, i32 0
-  %vecinit = insertelement <8 x i8> undef, i8 %vecext, i32 0
-  %vecext1 = extractelement <16 x i8> %x, i32 1
-  %vecinit2 = insertelement <8 x i8> %vecinit, i8 %vecext1, i32 1
-  %vecext3 = extractelement <16 x i8> %x, i32 2
-  %vecinit4 = insertelement <8 x i8> %vecinit2, i8 %vecext3, i32 2
-  %vecext5 = extractelement <16 x i8> %x, i32 3
-  %vecinit6 = insertelement <8 x i8> %vecinit4, i8 %vecext5, i32 3
-  %vecext7 = extractelement <16 x i8> %x, i32 4
-  %vecinit8 = insertelement <8 x i8> %vecinit6, i8 %vecext7, i32 4
-  %vecext9 = extractelement <16 x i8> %x, i32 5
-  %vecinit10 = insertelement <8 x i8> %vecinit8, i8 %vecext9, i32 5
-  %vecext11 = extractelement <16 x i8> %x, i32 6
-  %vecinit12 = insertelement <8 x i8> %vecinit10, i8 %vecext11, i32 6
-  %vecext13 = extractelement <16 x i8> %x, i32 7
-  %vecinit14 = insertelement <8 x i8> %vecinit12, i8 %vecext13, i32 7
-  ret <8 x i8> %vecinit14
-}
-
-define <4 x i16> @test_dup_v2i32_v4i16(<2 x i32> %a) {
-; CHECK-LABEL: test_dup_v2i32_v4i16:
-; CHECK: dup v0.4h, v0.h[2]
-entry:
-  %x = extractelement <2 x i32> %a, i32 1
-  %vget_lane = trunc i32 %x to i16
-  %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
-  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
-  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
-  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
-  ret <4 x i16> %vecinit3.i
-}
-
-define <8 x i16> @test_dup_v4i32_v8i16(<4 x i32> %a) {
-; CHECK-LABEL: test_dup_v4i32_v8i16:
-; CHECK: dup v0.8h, v0.h[6]
-entry:
-  %x = extractelement <4 x i32> %a, i32 3
-  %vget_lane = trunc i32 %x to i16
-  %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0
-  %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1
-  %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2
-  %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3
-  %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4
-  %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5
-  %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6
-  %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7
-  ret <8 x i16> %vecinit7.i
-}
-
-define <4 x i16> @test_dup_v1i64_v4i16(<1 x i64> %a) {
-; CHECK-LABEL: test_dup_v1i64_v4i16:
-; CHECK: dup v0.4h, v0.h[0]
-entry:
-  %x = extractelement <1 x i64> %a, i32 0
-  %vget_lane = trunc i64 %x to i16
-  %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
-  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
-  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
-  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
-  ret <4 x i16> %vecinit3.i
-}
-
-define <2 x i32> @test_dup_v1i64_v2i32(<1 x i64> %a) {
-; CHECK-LABEL: test_dup_v1i64_v2i32:
-; CHECK: dup v0.2s, v0.s[0]
-entry:
-  %x = extractelement <1 x i64> %a, i32 0
-  %vget_lane = trunc i64 %x to i32
-  %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0
-  %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1
-  ret <2 x i32> %vecinit1.i
-}
-
-define <8 x i16> @test_dup_v2i64_v8i16(<2 x i64> %a) {
-; CHECK-LABEL: test_dup_v2i64_v8i16:
-; CHECK: dup v0.8h, v0.h[4]
-entry:
-  %x = extractelement <2 x i64> %a, i32 1
-  %vget_lane = trunc i64 %x to i16
-  %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0
-  %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1
-  %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2
-  %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3
-  %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4
-  %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5
-  %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6
-  %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7
-  ret <8 x i16> %vecinit7.i
-}
-
-define <4 x i32> @test_dup_v2i64_v4i32(<2 x i64> %a) {
-; CHECK-LABEL: test_dup_v2i64_v4i32:
-; CHECK: dup v0.4s, v0.s[2]
-entry:
-  %x = extractelement <2 x i64> %a, i32 1
-  %vget_lane = trunc i64 %x to i32
-  %vecinit.i = insertelement <4 x i32> undef, i32 %vget_lane, i32 0
-  %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %vget_lane, i32 1
-  %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %vget_lane, i32 2
-  %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %vget_lane, i32 3
-  ret <4 x i32> %vecinit3.i
-}
-
-define <4 x i16> @test_dup_v4i32_v4i16(<4 x i32> %a) {
-; CHECK-LABEL: test_dup_v4i32_v4i16:
-; CHECK: dup v0.4h, v0.h[2]
-entry:
-  %x = extractelement <4 x i32> %a, i32 1
-  %vget_lane = trunc i32 %x to i16
-  %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
-  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
-  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
-  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
-  ret <4 x i16> %vecinit3.i
-}
-
-define <4 x i16> @test_dup_v2i64_v4i16(<2 x i64> %a) {
-; CHECK-LABEL: test_dup_v2i64_v4i16:
-; CHECK: dup v0.4h, v0.h[0]
-entry:
-  %x = extractelement <2 x i64> %a, i32 0
-  %vget_lane = trunc i64 %x to i16
-  %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
-  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
-  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
-  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
-  ret <4 x i16> %vecinit3.i
-}
-
-define <2 x i32> @test_dup_v2i64_v2i32(<2 x i64> %a) {
-; CHECK-LABEL: test_dup_v2i64_v2i32:
-; CHECK: dup v0.2s, v0.s[0]
-entry:
-  %x = extractelement <2 x i64> %a, i32 0
-  %vget_lane = trunc i64 %x to i32
-  %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0
-  %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1
-  ret <2 x i32> %vecinit1.i
-}
-
-
-define <2 x float> @test_scalar_to_vector_f32_to_v2f32(<2 x float> %a) {
-; CHECK-LABEL: test_scalar_to_vector_f32_to_v2f32:
-; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s
-; CHECK-NEXT: ret
-entry:
-  %0 = call float @llvm.aarch64.neon.vpmax.f32.v2f32(<2 x float> %a)
-  %1 = insertelement <1 x float> undef, float %0, i32 0
-  %2 = extractelement <1 x float> %1, i32 0
-  %vecinit1.i = insertelement <2 x float> undef, float %2, i32 0
-  ret <2 x float> %vecinit1.i
-}
-
-define <4 x float> @test_scalar_to_vector_f32_to_v4f32(<2 x float> %a) {
-; CHECK-LABEL: test_scalar_to_vector_f32_to_v4f32:
-; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s
-; CHECK-NEXT: ret
-entry:
-  %0 = call float @llvm.aarch64.neon.vpmax.f32.v2f32(<2 x float> %a)
-  %1 = insertelement <1 x float> undef, float %0, i32 0
-  %2 = extractelement <1 x float> %1, i32 0
-  %vecinit1.i = insertelement <4 x float> undef, float %2, i32 0
-  ret <4 x float> %vecinit1.i
-}
-
-declare float @llvm.aarch64.neon.vpmax.f32.v2f32(<2 x float>)
-
-define <2 x i32> @test_concat_undef_v1i32(<1 x i32> %a) {
-; CHECK-LABEL: test_concat_undef_v1i32:
-; CHECK: ins v{{[0-9]+}}.s[1], v{{[0-9]+}}.s[0]
-entry:
-  %0 = extractelement <1 x i32> %a, i32 0
-  %vecinit1.i = insertelement <2 x i32> undef, i32 %0, i32 1
-  ret <2 x i32> %vecinit1.i
-}
-
-declare <1 x i32> @llvm.arm.neon.vqabs.v1i32(<1 x i32>) #4
-
-define <2 x i32> @test_concat_v1i32_undef(<1 x i32> %a) {
-; CHECK-LABEL: test_concat_v1i32_undef:
-; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
-; CHECK-NEXT: ret
-entry:
-  %b = tail call <1 x i32> @llvm.arm.neon.vqabs.v1i32(<1 x i32> %a)
-  %0 = extractelement <1 x i32> %b, i32 0
-  %vecinit.i432 = insertelement <2 x i32> undef, i32 %0, i32 0
-  ret <2 x i32> %vecinit.i432
-}
-
-define <2 x i32> @test_concat_same_v1i32_v1i32(<1 x i32> %a) {
-; CHECK-LABEL: test_concat_same_v1i32_v1i32:
-; CHECK: dup v{{[0-9]+}}.2s, v{{[0-9]+}}.s[0]
-entry:
-  %0 = extractelement <1 x i32> %a, i32 0
-  %vecinit.i = insertelement <2 x i32> undef, i32 %0, i32 0
-  %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %0, i32 1
-  ret <2 x i32> %vecinit1.i
-}
-
-define <2 x i32> @test_concat_diff_v1i32_v1i32(<1 x i32> %a, <1 x i32> %b) {
-; CHECK-LABEL: test_concat_diff_v1i32_v1i32:
-; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
-; CHECK-NEXT: sqabs s{{[0-9]+}}, s{{[0-9]+}}
-; CHECK-NEXT: ins v0.s[1], v1.s[0]
-entry:
-  %c = tail call <1 x i32> @llvm.arm.neon.vqabs.v1i32(<1 x i32> %a)
-  %d = extractelement <1 x i32> %c, i32 0
-  %e = tail call <1 x i32> @llvm.arm.neon.vqabs.v1i32(<1 x i32> %b)
-  %f = extractelement <1 x i32> %e, i32 0
-  %h = shufflevector <1 x i32> %c, <1 x i32> %e, <2 x i32> <i32 0, i32 1>
-  ret <2 x i32> %h
-}
-
-define <16 x i8> @test_concat_v16i8_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) #0 {
-; CHECK-LABEL: test_concat_v16i8_v16i8_v16i8:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-entry:
-  %vecinit30 = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
-  ret <16 x i8> %vecinit30
-}
-
-define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 {
-; CHECK-LABEL: test_concat_v16i8_v8i8_v16i8:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-entry:
-  %vecext = extractelement <8 x i8> %x, i32 0
-  %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
-  %vecext1 = extractelement <8 x i8> %x, i32 1
-  %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
-  %vecext3 = extractelement <8 x i8> %x, i32 2
-  %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
-  %vecext5 = extractelement <8 x i8> %x, i32 3
-  %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
-  %vecext7 = extractelement <8 x i8> %x, i32 4
-  %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
-  %vecext9 = extractelement <8 x i8> %x, i32 5
-  %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
-  %vecext11 = extractelement <8 x i8> %x, i32 6
-  %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
-  %vecext13 = extractelement <8 x i8> %x, i32 7
-  %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
-  %vecinit30 = shufflevector <16 x i8> %vecinit14, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
-  ret <16 x i8> %vecinit30
-}
-
-define <16 x i8> @test_concat_v16i8_v16i8_v8i8(<16 x i8> %x, <8 x i8> %y) #0 {
-; CHECK-LABEL: test_concat_v16i8_v16i8_v8i8:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-entry:
-  %vecext = extractelement <16 x i8> %x, i32 0
-  %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
-  %vecext1 = extractelement <16 x i8> %x, i32 1
-  %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
-  %vecext3 = extractelement <16 x i8> %x, i32 2
-  %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
-  %vecext5 = extractelement <16 x i8> %x, i32 3
-  %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
-  %vecext7 = extractelement <16 x i8> %x, i32 4
-  %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
-  %vecext9 = extractelement <16 x i8> %x, i32 5
-  %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
-  %vecext11 = extractelement <16 x i8> %x, i32 6
-  %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
-  %vecext13 = extractelement <16 x i8> %x, i32 7
-  %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
-  %vecext15 = extractelement <8 x i8> %y, i32 0
-  %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8
-  %vecext17 = extractelement <8 x i8> %y, i32 1
-  %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9
-  %vecext19 = extractelement <8 x i8> %y, i32 2
-  %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10
-  %vecext21 = extractelement <8 x i8> %y, i32 3
-  %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11
-  %vecext23 = extractelement <8 x i8> %y, i32 4
-  %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12
-  %vecext25 = extractelement <8 x i8> %y, i32 5
-  %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13
-  %vecext27 = extractelement <8 x i8> %y, i32 6
-  %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14
-  %vecext29 = extractelement <8 x i8> %y, i32 7
-  %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15
-  ret <16 x i8> %vecinit30
-}
-
-define <16 x i8> @test_concat_v16i8_v8i8_v8i8(<8 x i8> %x, <8 x i8> %y) #0 {
-; CHECK-LABEL: test_concat_v16i8_v8i8_v8i8:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-entry:
-  %vecext = extractelement <8 x i8> %x, i32 0
-  %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
-  %vecext1 = extractelement <8 x i8> %x, i32 1
-  %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
-  %vecext3 = extractelement <8 x i8> %x, i32 2
-  %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
-  %vecext5 = extractelement <8 x i8> %x, i32 3
-  %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
-  %vecext7 = extractelement <8 x i8> %x, i32 4
-  %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
-  %vecext9 = extractelement <8 x i8> %x, i32 5
-  %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
-  %vecext11 = extractelement <8 x i8> %x, i32 6
-  %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
-  %vecext13 = extractelement <8 x i8> %x, i32 7
-  %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
-  %vecext15 = extractelement <8 x i8> %y, i32 0
-  %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8
-  %vecext17 = extractelement <8 x i8> %y, i32 1
-  %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9
-  %vecext19 = extractelement <8 x i8> %y, i32 2
-  %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10
-  %vecext21 = extractelement <8 x i8> %y, i32 3
-  %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11
-  %vecext23 = extractelement <8 x i8> %y, i32 4
-  %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12
-  %vecext25 = extractelement <8 x i8> %y, i32 5
-  %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13
-  %vecext27 = extractelement <8 x i8> %y, i32 6
-  %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14
-  %vecext29 = extractelement <8 x i8> %y, i32 7
-  %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15
-  ret <16 x i8> %vecinit30
-}
-
-define <8 x i16> @test_concat_v8i16_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) #0 {
-; CHECK-LABEL: test_concat_v8i16_v8i16_v8i16:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-entry:
-  %vecinit14 = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
-  ret <8 x i16> %vecinit14
-}
-
-define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 {
-; CHECK-LABEL: test_concat_v8i16_v4i16_v8i16:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-entry:
-  %vecext = extractelement <4 x i16> %x, i32 0
-  %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
-  %vecext1 = extractelement <4 x i16> %x, i32 1
-  %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
-  %vecext3 = extractelement <4 x i16> %x, i32 2
-  %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
-  %vecext5 = extractelement <4 x i16> %x, i32 3
-  %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
-  %vecinit14 = shufflevector <8 x i16> %vecinit6, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
-  ret <8 x i16> %vecinit14
-}
-
-define <8 x i16> @test_concat_v8i16_v8i16_v4i16(<8 x i16> %x, <4 x i16> %y) #0 {
-; CHECK-LABEL: test_concat_v8i16_v8i16_v4i16:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-entry:
-  %vecext = extractelement <8 x i16> %x, i32 0
-  %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
-  %vecext1 = extractelement <8 x i16> %x, i32 1
-  %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
-  %vecext3 = extractelement <8 x i16> %x, i32 2
-  %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
-  %vecext5 = extractelement <8 x i16> %x, i32 3
-  %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
-  %vecext7 = extractelement <4 x i16> %y, i32 0
-  %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4
-  %vecext9 = extractelement <4 x i16> %y, i32 1
-  %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5
-  %vecext11 = extractelement <4 x i16> %y, i32 2
-  %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6
-  %vecext13 = extractelement <4 x i16> %y, i32 3
-  %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7
-  ret <8 x i16> %vecinit14
-}
-
-define <8 x i16> @test_concat_v8i16_v4i16_v4i16(<4 x i16> %x, <4 x i16> %y) #0 {
-; CHECK-LABEL: test_concat_v8i16_v4i16_v4i16:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-entry:
-  %vecext = extractelement <4 x i16> %x, i32 0
-  %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
-  %vecext1 = extractelement <4 x i16> %x, i32 1
-  %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
-  %vecext3 = extractelement <4 x i16> %x, i32 2
-  %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
-  %vecext5 = extractelement <4 x i16> %x, i32 3
-  %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
-  %vecext7 = extractelement <4 x i16> %y, i32 0
-  %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4
-  %vecext9 = extractelement <4 x i16> %y, i32 1
-  %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5
-  %vecext11 = extractelement <4 x i16> %y, i32 2
-  %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6
-  %vecext13 = extractelement <4 x i16> %y, i32 3
-  %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7
-  ret <8 x i16> %vecinit14
-}
-
-define <4 x i32> @test_concat_v4i32_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) #0 {
-; CHECK-LABEL: test_concat_v4i32_v4i32_v4i32:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-entry:
-  %vecinit6 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-  ret <4 x i32> %vecinit6
-}
-
-define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 {
-; CHECK-LABEL: test_concat_v4i32_v2i32_v4i32:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-entry:
-  %vecext = extractelement <2 x i32> %x, i32 0
-  %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
-  %vecext1 = extractelement <2 x i32> %x, i32 1
-  %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
-  %vecinit6 = shufflevector <4 x i32> %vecinit2, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-  ret <4 x i32> %vecinit6
-}
-
-define <4 x i32> @test_concat_v4i32_v4i32_v2i32(<4 x i32> %x, <2 x i32> %y) #0 {
-; CHECK-LABEL: test_concat_v4i32_v4i32_v2i32:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-entry:
-  %vecext = extractelement <4 x i32> %x, i32 0
-  %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
-  %vecext1 = extractelement <4 x i32> %x, i32 1
-  %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
-  %vecext3 = extractelement <2 x i32> %y, i32 0
-  %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2
-  %vecext5 = extractelement <2 x i32> %y, i32 1
-  %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3
-  ret <4 x i32> %vecinit6
-}
-
-define <4 x i32> @test_concat_v4i32_v2i32_v2i32(<2 x i32> %x, <2 x i32> %y) #0 {
-; CHECK-LABEL: test_concat_v4i32_v2i32_v2i32:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-entry:
-  %vecext = extractelement <2 x i32> %x, i32 0
-  %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
-  %vecext1 = extractelement <2 x i32> %x, i32 1
-  %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
-  %vecext3 = extractelement <2 x i32> %y, i32 0
-  %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2
-  %vecext5 = extractelement <2 x i32> %y, i32 1
-  %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3
-  ret <4 x i32> %vecinit6
-}
-
-define <2 x i64> @test_concat_v2i64_v2i64_v2i64(<2 x i64> %x, <2 x i64> %y) #0 {
-; CHECK-LABEL: test_concat_v2i64_v2i64_v2i64:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-entry:
-  %vecinit2 = shufflevector <2 x i64> %x, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
-  ret <2 x i64> %vecinit2
-}
-
-define <2 x i64> @test_concat_v2i64_v1i64_v2i64(<1 x i64> %x, <2 x i64> %y) #0 {
-; CHECK-LABEL: test_concat_v2i64_v1i64_v2i64:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-entry:
-  %vecext = extractelement <1 x i64> %x, i32 0
-  %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
-  %vecinit2 = shufflevector <2 x i64> %vecinit, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
-  ret <2 x i64> %vecinit2
-}
-
-define <2 x i64> @test_concat_v2i64_v2i64_v1i64(<2 x i64> %x, <1 x i64> %y) #0 {
-; CHECK-LABEL: test_concat_v2i64_v2i64_v1i64:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-entry:
-  %vecext = extractelement <2 x i64> %x, i32 0
-  %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
-  %vecext1 = extractelement <1 x i64> %y, i32 0
-  %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
-  ret <2 x i64> %vecinit2
-}
-
-define <2 x i64> @test_concat_v2i64_v1i64_v1i64(<1 x i64> %x, <1 x i64> %y) #0 {
-; CHECK-LABEL: test_concat_v2i64_v1i64_v1i64:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
-entry:
-  %vecext = extractelement <1 x i64> %x, i32 0
-  %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
-  %vecext1 = extractelement <1 x i64> %y, i32 0
-  %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
-  ret <2 x i64> %vecinit2
-}
-
-declare <1 x i8> @llvm.aarch64.neon.vsqadd.v1i8(<1 x i8>, <1 x i8>)
-
-; This case tests the copy of two FPR8 registers, which is implemented by fmov
-; of two FPR32 registers.
-define <1 x i8> @test_copy_FPR8_FPR8(<1 x i8> %a, <1 x i8> %b) {
-; CHECK-LABEL: test_copy_FPR8_FPR8:
-; CHECK: usqadd b1, b0
-; CHECK-NEXT: fmov s0, s1
-entry:
- %vsqadd2.i = call <1 x i8> @llvm.aarch64.neon.vsqadd.v1i8(<1 x i8> %b, <1 x i8> %a)
- ret <1 x i8> %vsqadd2.i
-}
-
-declare <1 x i16> @llvm.aarch64.neon.vsqadd.v1i16(<1 x i16>, <1 x i16>)
-
-define <1 x i16> @test_copy_FPR16_FPR16(<1 x i16> %a, <1 x i16> %b) {
-; CHECK-LABEL: test_copy_FPR16_FPR16:
-; CHECK: usqadd h1, h0
-; CHECK-NEXT: fmov s0, s1
-entry:
-  %vsqadd2.i = call <1 x i16> @llvm.aarch64.neon.vsqadd.v1i16(<1 x i16> %b, <1 x i16> %a)
-  ret <1 x i16> %vsqadd2.i
-}
-
-define <4 x i16> @concat_vector_v4i16_const() {
-; CHECK-LABEL: concat_vector_v4i16_const:
-; CHECK: dup {{v[0-9]+}}.4h, wzr
- %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <4 x i32> zeroinitializer
- ret <4 x i16> %r
-}
-
-define <4 x i16> @concat_vector_v4i16_const_one() {
-; CHECK-LABEL: concat_vector_v4i16_const_one:
-; CHECK: movz {{w[0-9]+}}, #1
-; CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}}
- %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <4 x i32> zeroinitializer
- ret <4 x i16> %r
-}
-
-define <4 x i32> @concat_vector_v4i32_const() {
-; CHECK-LABEL: concat_vector_v4i32_const:
-; CHECK: dup {{v[0-9]+}}.4s, wzr
- %r = shufflevector <1 x i32> zeroinitializer, <1 x i32> undef, <4 x i32> zeroinitializer
- ret <4 x i32> %r
-}
-
-define <8 x i8> @concat_vector_v8i8_const() {
-; CHECK-LABEL: concat_vector_v8i8_const:
-; CHECK: dup {{v[0-9]+}}.8b, wzr
- %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <8 x i32> zeroinitializer
- ret <8 x i8> %r
-}
-
-define <8 x i16> @concat_vector_v8i16_const() {
-; CHECK-LABEL: concat_vector_v8i16_const:
-; CHECK: dup {{v[0-9]+}}.8h, wzr
- %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <8 x i32> zeroinitializer
- ret <8 x i16> %r
-}
-
-define <8 x i16> @concat_vector_v8i16_const_one() {
-; CHECK-LABEL: concat_vector_v8i16_const_one:
-; CHECK: movz {{w[0-9]+}}, #1
-; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
- %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <8 x i32> zeroinitializer
- ret <8 x i16> %r
-}
-
-define <16 x i8> @concat_vector_v16i8_const() {
-; CHECK-LABEL: concat_vector_v16i8_const:
-; CHECK: dup {{v[0-9]+}}.16b, wzr
- %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <16 x i32> zeroinitializer
- ret <16 x i8> %r
-}
-
-define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) {
-; CHECK-LABEL: concat_vector_v4i16:
-; CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
- %r = shufflevector <1 x i16> %a, <1 x i16> undef, <4 x i32> zeroinitializer
- ret <4 x i16> %r
-}
-
-define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) {
-; CHECK-LABEL: concat_vector_v4i32:
-; CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
- %r = shufflevector <1 x i32> %a, <1 x i32> undef, <4 x i32> zeroinitializer
- ret <4 x i32> %r
-}
-
-define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) {
-; CHECK-LABEL: concat_vector_v8i8:
-; CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[0]
- %r = shufflevector <1 x i8> %a, <1 x i8> undef, <8 x i32> zeroinitializer
- ret <8 x i8> %r
-}
-
-define <8 x i16> @concat_vector_v8i16(<1 x i16> %a) {
-; CHECK-LABEL: concat_vector_v8i16:
-; CHECK: dup {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
- %r = shufflevector <1 x i16> %a, <1 x i16> undef, <8 x i32> zeroinitializer
- ret <8 x i16> %r
-}
-
-define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) {
-; CHECK-LABEL: concat_vector_v16i8:
-; CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[0]
- %r = shufflevector <1 x i8> %a, <1 x i8> undef, <16 x i32> zeroinitializer
- ret <16 x i8> %r
-}
diff --git a/test/CodeGen/AArch64/neon-copyPhysReg-tuple.ll b/test/CodeGen/AArch64/neon-copyPhysReg-tuple.ll
deleted file mode 100644
index 4dffcd1..0000000
--- a/test/CodeGen/AArch64/neon-copyPhysReg-tuple.ll
+++ /dev/null
@@ -1,47 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
-
-define <4 x i32> @copyTuple.QPair(i8* %a, i8* %b) {
-; CHECK-LABEL: copyTuple.QPair:
-; CHECK: orr v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-; CHECK: orr v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-; CHECK: ld2 {{{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x{{[0-9]+|sp}}]
-entry:
-  %vld = tail call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32(i8* %a, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 2, i32 2, i32 2, i32 2>, i32 0, i32 4)
-  %extract = extractvalue { <4 x i32>, <4 x i32> } %vld, 0
-  %vld1 = tail call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32(i8* %b, <4 x i32> %extract, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i32 1, i32 4)
-  %vld1.fca.0.extract = extractvalue { <4 x i32>, <4 x i32> } %vld1, 0
-  ret <4 x i32> %vld1.fca.0.extract
-}
-
-define <4 x i32> @copyTuple.QTriple(i8* %a, i8* %b, <4 x i32> %c) {
-; CHECK-LABEL: copyTuple.QTriple:
-; CHECK: orr v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-; CHECK: orr v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-; CHECK: orr v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-; CHECK: ld3 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x{{[0-9]+|sp}}]
-entry:
-  %vld = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3lane.v4i32(i8* %a, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %c, <4 x i32> %c, i32 0, i32 4)
-  %extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld, 0
-  %vld1 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3lane.v4i32(i8* %b, <4 x i32> %extract, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %c, i32 1, i32 4)
-  %vld1.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld1, 0
-  ret <4 x i32> %vld1.fca.0.extract
-}
-
-define <4 x i32> @copyTuple.QQuad(i8* %a, i8* %b, <4 x i32> %c) {
-; CHECK-LABEL: copyTuple.QQuad:
-; CHECK: orr v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-; CHECK: orr v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-; CHECK: orr v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-; CHECK: orr v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-; CHECK: ld4 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x{{[0-9]+|sp}}]
-entry:
-  %vld = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4lane.v4i32(i8* %a, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %c, <4 x i32> %c, <4 x i32> %c, i32 0, i32 4)
-  %extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld, 0
-  %vld1 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4lane.v4i32(i8* %b, <4 x i32> %extract, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %c, <4 x i32> %c, i32 1, i32 4)
-  %vld1.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld1, 0
-  ret <4 x i32> %vld1.fca.0.extract
-}
-
-declare { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32)
-declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32)
-declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32)
-\ No newline at end of file
diff --git a/test/CodeGen/AArch64/neon-crypto.ll b/test/CodeGen/AArch64/neon-crypto.ll
deleted file mode 100644
index c0014fa..0000000
--- a/test/CodeGen/AArch64/neon-crypto.ll
+++ /dev/null
@@ -1,144 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -mattr=+crypto | FileCheck %s
-; RUN: not llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon 2>&1 | FileCheck --check-prefix=CHECK-NO-CRYPTO %s
-
-declare <4 x i32> @llvm.arm.neon.sha256su1(<4 x i32>, <4 x i32>, <4 x i32>) #1
-
-declare <4 x i32> @llvm.arm.neon.sha256h2(<4 x i32>, <4 x i32>, <4 x i32>) #1
-
-declare <4 x i32> @llvm.arm.neon.sha256h(<4 x i32>, <4 x i32>, <4 x i32>) #1
-
-declare <4 x i32> @llvm.arm.neon.sha1su0(<4 x i32>, <4 x i32>, <4 x i32>) #1
-
-declare <4 x i32> @llvm.arm.neon.sha1m(<4 x i32>, i32, <4 x i32>) #1
-
-declare <4 x i32> @llvm.arm.neon.sha1p(<4 x i32>, i32, <4 x i32>) #1
-
-declare <4 x i32> @llvm.arm.neon.sha1c(<4 x i32>, i32, <4 x i32>) #1
-
-declare <4 x i32> @llvm.arm.neon.sha256su0(<4 x i32>, <4 x i32>) #1
-
-declare <4 x i32> @llvm.arm.neon.sha1su1(<4 x i32>, <4 x i32>) #1
-
-declare i32 @llvm.arm.neon.sha1h(i32) #1
-
-declare <16 x i8> @llvm.arm.neon.aesimc(<16 x i8>) #1
-
-declare <16 x i8> @llvm.arm.neon.aesmc(<16 x i8>) #1
-
-declare <16 x i8> @llvm.arm.neon.aesd(<16 x i8>, <16 x i8>) #1
-
-declare <16 x i8> @llvm.arm.neon.aese(<16 x i8>, <16 x i8>) #1
-
-define <16 x i8> @test_vaeseq_u8(<16 x i8> %data, <16 x i8> %key) {
-; CHECK: test_vaeseq_u8:
-; CHECK: aese {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-; CHECK-NO-CRYPTO: Cannot select: intrinsic %llvm.arm.neon.aese
-entry:
-  %aese.i = tail call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %data, <16 x i8> %key)
-  ret <16 x i8> %aese.i
-}
-
-define <16 x i8> @test_vaesdq_u8(<16 x i8> %data, <16 x i8> %key) {
-; CHECK: test_vaesdq_u8:
-; CHECK: aesd {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %aesd.i = tail call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %data, <16 x i8> %key)
-  ret <16 x i8> %aesd.i
-}
-
-define <16 x i8> @test_vaesmcq_u8(<16 x i8> %data) {
-; CHECK: test_vaesmcq_u8:
-; CHECK: aesmc {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %aesmc.i = tail call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %data)
-  ret <16 x i8> %aesmc.i
-}
-
-define <16 x i8> @test_vaesimcq_u8(<16 x i8> %data) {
-; CHECK: test_vaesimcq_u8:
-; CHECK: aesimc {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-entry:
-  %aesimc.i = tail call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %data)
-  ret <16 x i8> %aesimc.i
-}
-
-define i32 @test_vsha1h_u32(i32 %hash_e) {
-; CHECK: test_vsha1h_u32:
-; CHECK: sha1h {{s[0-9]+}}, {{s[0-9]+}}
-entry:
-  %sha1h1.i = tail call i32 @llvm.arm.neon.sha1h(i32 %hash_e)
-  ret i32 %sha1h1.i
-}
-
-define <4 x i32> @test_vsha1su1q_u32(<4 x i32> %tw0_3, <4 x i32> %w12_15) {
-; CHECK: test_vsha1su1q_u32:
-; CHECK: sha1su1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %sha1su12.i = tail call <4 x i32> @llvm.arm.neon.sha1su1(<4 x i32> %tw0_3, <4 x i32> %w12_15)
-  ret <4 x i32> %sha1su12.i
-}
-
-define <4 x i32> @test_vsha256su0q_u32(<4 x i32> %w0_3, <4 x i32> %w4_7) {
-; CHECK: test_vsha256su0q_u32:
-; CHECK: sha256su0 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %sha256su02.i = tail call <4 x i32> @llvm.arm.neon.sha256su0(<4 x i32> %w0_3, <4 x i32> %w4_7)
-  ret <4 x i32> %sha256su02.i
-}
-
-define <4 x i32> @test_vsha1cq_u32(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) {
-; CHECK: test_vsha1cq_u32:
-; CHECK: sha1c {{q[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.4s
-entry:
-  %sha1c1.i = tail call <4 x i32> @llvm.arm.neon.sha1c(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk)
-  ret <4 x i32> %sha1c1.i
-}
-
-define <4 x i32> @test_vsha1pq_u32(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) {
-; CHECK: test_vsha1pq_u32:
-; CHECK: sha1p {{q[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.4s
-entry:
-  %sha1p1.i = tail call <4 x i32> @llvm.arm.neon.sha1p(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk)
-  ret <4 x i32> %sha1p1.i
-}
-
-define <4 x i32> @test_vsha1mq_u32(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) {
-; CHECK: test_vsha1mq_u32:
-; CHECK: sha1m {{q[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.4s
-entry:
-  %sha1m1.i = tail call <4 x i32> @llvm.arm.neon.sha1m(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk)
-  ret <4 x i32> %sha1m1.i
-}
-
-define <4 x i32> @test_vsha1su0q_u32(<4 x i32> %w0_3, <4 x i32> %w4_7, <4 x i32> %w8_11) {
-; CHECK: test_vsha1su0q_u32:
-; CHECK: sha1su0 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %sha1su03.i = tail call <4 x i32> @llvm.arm.neon.sha1su0(<4 x i32> %w0_3, <4 x i32> %w4_7, <4 x i32> %w8_11)
-  ret <4 x i32> %sha1su03.i
-}
-
-define <4 x i32> @test_vsha256hq_u32(<4 x i32> %hash_abcd, <4 x i32> %hash_efgh, <4 x i32> %wk) {
-; CHECK: test_vsha256hq_u32:
-; CHECK: sha256h {{q[0-9]+}}, {{q[0-9]+}}, {{v[0-9]+}}.4s
-entry:
-  %sha256h3.i = tail call <4 x i32> @llvm.arm.neon.sha256h(<4 x i32> %hash_abcd, <4 x i32> %hash_efgh, <4 x i32> %wk)
-  ret <4 x i32> %sha256h3.i
-}
-
-define <4 x i32> @test_vsha256h2q_u32(<4 x i32> %hash_efgh, <4 x i32> %hash_abcd, <4 x i32> %wk) {
-; CHECK: test_vsha256h2q_u32:
-; CHECK: sha256h2 {{q[0-9]+}}, {{q[0-9]+}}, {{v[0-9]+}}.4s
-entry:
-  %sha256h23.i = tail call <4 x i32> @llvm.arm.neon.sha256h2(<4 x i32> %hash_efgh, <4 x i32> %hash_abcd, <4 x i32> %wk)
-  ret <4 x i32> %sha256h23.i
-}
-
-define <4 x i32> @test_vsha256su1q_u32(<4 x i32> %tw0_3, <4 x i32> %w8_11, <4 x i32> %w12_15) {
-; CHECK: test_vsha256su1q_u32:
-; CHECK: sha256su1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %sha256su13.i = tail call <4 x i32> @llvm.arm.neon.sha256su1(<4 x i32> %tw0_3, <4 x i32> %w8_11, <4 x i32> %w12_15)
-  ret <4 x i32> %sha256su13.i
-}
-
diff --git a/test/CodeGen/AArch64/neon-diagnostics.ll b/test/CodeGen/AArch64/neon-diagnostics.ll
index f546aa7..099b685 100644
--- a/test/CodeGen/AArch64/neon-diagnostics.ll
+++ b/test/CodeGen/AArch64/neon-diagnostics.ll
@@ -21,4 +21,4 @@ define <4 x i32> @test_vshrn_not_match(<2 x i32> %a, <2 x i64> %b) {
   %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
   %4 = bitcast <2 x i64> %shuffle.i to <4 x i32>
   ret <4 x i32> %4
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/AArch64/neon-extract.ll b/test/CodeGen/AArch64/neon-extract.ll
index cddc226..f270b54 100644
--- a/test/CodeGen/AArch64/neon-extract.ll
+++ b/test/CodeGen/AArch64/neon-extract.ll
@@ -1,221 +1,221 @@
 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
 
 define <8 x i8> @test_vext_s8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vext_s8:
-; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x2
+; CHECK-LABEL: test_vext_s8:
+; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #{{0x2|2}}
 entry:
   %vext = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
   ret <8 x i8> %vext
 }
 
 define <4 x i16> @test_vext_s16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vext_s16:
-; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x6
+; CHECK-LABEL: test_vext_s16:
+; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #{{0x6|6}}
 entry:
   %vext = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
   ret <4 x i16> %vext
 }
 
 define <2 x i32> @test_vext_s32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK: test_vext_s32:
-; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x4
+; CHECK-LABEL: test_vext_s32:
+; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #{{0x4|4}}
 entry:
   %vext = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 2>
   ret <2 x i32> %vext
 }
 
 define <1 x i64> @test_vext_s64(<1 x i64> %a, <1 x i64> %b) {
-; CHECK: test_vext_s64:
+; CHECK-LABEL: test_vext_s64:
 entry:
   %vext = shufflevector <1 x i64> %a, <1 x i64> %b, <1 x i32> <i32 0>
   ret <1 x i64> %vext
 }
 
 define <16 x i8> @test_vextq_s8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vextq_s8:
-; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x2
+; CHECK-LABEL: test_vextq_s8:
+; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x2|2}}
 entry:
   %vext = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17>
   ret <16 x i8> %vext
 }
 
 define <8 x i16> @test_vextq_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vextq_s16:
-; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x6
+; CHECK-LABEL: test_vextq_s16:
+; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x6|6}}
 entry:
   %vext = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
   ret <8 x i16> %vext
 }
 
 define <4 x i32> @test_vextq_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vextq_s32:
-; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x4
+; CHECK-LABEL: test_vextq_s32:
+; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x4|4}}
 entry:
   %vext = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
   ret <4 x i32> %vext
 }
 
 define <2 x i64> @test_vextq_s64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vextq_s64:
-; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x8
+; CHECK-LABEL: test_vextq_s64:
+; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x8|8}}
 entry:
   %vext = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
   ret <2 x i64> %vext
 }
 
 define <8 x i8> @test_vext_u8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vext_u8:
-; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x2
+; CHECK-LABEL: test_vext_u8:
+; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #{{0x2|2}}
 entry:
   %vext = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
   ret <8 x i8> %vext
 }
 
 define <4 x i16> @test_vext_u16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vext_u16:
-; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x6
+; CHECK-LABEL: test_vext_u16:
+; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #{{0x6|6}}
 entry:
   %vext = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
   ret <4 x i16> %vext
 }
 
 define <2 x i32> @test_vext_u32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK: test_vext_u32:
-; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x4
+; CHECK-LABEL: test_vext_u32:
+; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #{{0x4|4}}
 entry:
   %vext = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 2>
   ret <2 x i32> %vext
 }
 
 define <1 x i64> @test_vext_u64(<1 x i64> %a, <1 x i64> %b) {
-; CHECK: test_vext_u64:
+; CHECK-LABEL: test_vext_u64:
 entry:
   %vext = shufflevector <1 x i64> %a, <1 x i64> %b, <1 x i32> <i32 0>
   ret <1 x i64> %vext
 }
 
 define <16 x i8> @test_vextq_u8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vextq_u8:
-; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x2
+; CHECK-LABEL: test_vextq_u8:
+; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x2|2}}
 entry:
   %vext = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17>
   ret <16 x i8> %vext
 }
 
 define <8 x i16> @test_vextq_u16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vextq_u16:
-; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x6
+; CHECK-LABEL: test_vextq_u16:
+; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x6|6}}
 entry:
   %vext = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
   ret <8 x i16> %vext
 }
 
 define <4 x i32> @test_vextq_u32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vextq_u32:
-; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x4
+; CHECK-LABEL: test_vextq_u32:
+; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x4|4}}
 entry:
   %vext = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
   ret <4 x i32> %vext
 }
 
 define <2 x i64> @test_vextq_u64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vextq_u64:
-; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x8
+; CHECK-LABEL: test_vextq_u64:
+; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x8|8}}
 entry:
   %vext = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
   ret <2 x i64> %vext
 }
 
 define <2 x float> @test_vext_f32(<2 x float> %a, <2 x float> %b) {
-; CHECK: test_vext_f32:
-; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x4
+; CHECK-LABEL: test_vext_f32:
+; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #{{0x4|4}}
 entry:
   %vext = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 2>
   ret <2 x float> %vext
 }
 
 define <1 x double> @test_vext_f64(<1 x double> %a, <1 x double> %b) {
-; CHECK: test_vext_f64:
+; CHECK-LABEL: test_vext_f64:
 entry:
   %vext = shufflevector <1 x double> %a, <1 x double> %b, <1 x i32> <i32 0>
   ret <1 x double> %vext
 }
 
 define <4 x float> @test_vextq_f32(<4 x float> %a, <4 x float> %b) {
-; CHECK: test_vextq_f32:
-; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x4
+; CHECK-LABEL: test_vextq_f32:
+; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x4|4}}
 entry:
   %vext = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
   ret <4 x float> %vext
 }
 
 define <2 x double> @test_vextq_f64(<2 x double> %a, <2 x double> %b) {
-; CHECK: test_vextq_f64:
-; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x8
+; CHECK-LABEL: test_vextq_f64:
+; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x8|8}}
 entry:
   %vext = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 2>
   ret <2 x double> %vext
 }
 
 define <8 x i8> @test_vext_p8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vext_p8:
-; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x2
+; CHECK-LABEL: test_vext_p8:
+; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #{{0x2|2}}
 entry:
   %vext = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
   ret <8 x i8> %vext
 }
 
 define <4 x i16> @test_vext_p16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vext_p16:
-; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x6
+; CHECK-LABEL: test_vext_p16:
+; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #{{0x6|6}}
 entry:
   %vext = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
   ret <4 x i16> %vext
 }
 
 define <16 x i8> @test_vextq_p8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vextq_p8:
-; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x2
+; CHECK-LABEL: test_vextq_p8:
+; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x2|2}}
 entry:
   %vext = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17>
   ret <16 x i8> %vext
 }
 
 define <8 x i16> @test_vextq_p16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vextq_p16:
-; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x6
+; CHECK-LABEL: test_vextq_p16:
+; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x6|6}}
 entry:
   %vext = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
   ret <8 x i16> %vext
 }
 
 define <8 x i8> @test_undef_vext_s8(<8 x i8> %a) {
-; CHECK: test_undef_vext_s8:
-; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x2
+; CHECK-LABEL: test_undef_vext_s8:
+; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #{{0x2|2}}
 entry:
   %vext = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 10, i32 10, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
   ret <8 x i8> %vext
 }
 
 define <16 x i8> @test_undef_vextq_s8(<16 x i8> %a) {
-; CHECK: test_undef_vextq_s8:
-; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x6
+; CHECK-LABEL: test_undef_vextq_s8:
+; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x6|6}}
 entry:
   %vext = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 20, i32 20, i32 20, i32 20, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 20, i32 20, i32 20, i32 20, i32 20>
   ret <16 x i8> %vext
 }
 
 define <4 x i16> @test_undef_vext_s16(<4 x i16> %a) {
-; CHECK: test_undef_vext_s16:
-; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x4
+; CHECK-LABEL: test_undef_vext_s16:
+; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #{{0x4|4}}
 entry:
   %vext = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
   ret <4 x i16> %vext
 }
 
 define <8 x i16> @test_undef_vextq_s16(<8 x i16> %a) {
-; CHECK: test_undef_vextq_s16:
-; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x6
+; CHECK-LABEL: test_undef_vextq_s16:
+; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x6|6}}
 entry:
   %vext = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 10, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
   ret <8 x i16> %vext
diff --git a/test/CodeGen/AArch64/neon-facge-facgt.ll b/test/CodeGen/AArch64/neon-facge-facgt.ll
deleted file mode 100644
index 28e8212..0000000
--- a/test/CodeGen/AArch64/neon-facge-facgt.ll
+++ /dev/null
@@ -1,56 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-
-declare <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float>, <2 x float>)
-declare <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float>, <4 x float>)
-declare <2 x i64> @llvm.arm.neon.vacge.v2i64.v2f64(<2 x double>, <2 x double>)
-
-define <2 x i32> @facge_from_intr_v2i32(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
-; Using registers other than v0, v1 and v2 are possible, but would be odd.
-; CHECK: facge_from_intr_v2i32:
-  %val = call <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float> %A, <2 x float> %B)
-; CHECK: facge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-  ret <2 x i32> %val
-}
-define <4 x i32> @facge_from_intr_v4i32( <4 x float> %A, <4 x float> %B) {
-; Using registers other than v0, v1 and v2 are possible, but would be odd.
-; CHECK: facge_from_intr_v4i32:
-  %val = call <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float> %A, <4 x float> %B)
-; CHECK: facge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-  ret <4 x i32> %val
-}
-
-define <2 x i64> @facge_from_intr_v2i64(<2 x double> %A, <2 x double> %B) {
-; Using registers other than v0, v1 and v2 are possible, but would be odd.
-; CHECK: facge_from_intr_v2i64:
-  %val = call <2 x i64> @llvm.arm.neon.vacge.v2i64.v2f64(<2 x double> %A, <2 x double> %B)
-; CHECK: facge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-  ret <2 x i64> %val
-}
-
-declare <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float>, <2 x float>)
-declare <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float>, <4 x float>)
-declare <2 x i64> @llvm.arm.neon.vacgt.v2i64.v2f64(<2 x double>, <2 x double>)
-
-define <2 x i32> @facgt_from_intr_v2i32(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
-; Using registers other than v0, v1 and v2 are possible, but would be odd.
-; CHECK: facgt_from_intr_v2i32:
-  %val = call <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float> %A, <2 x float> %B)
-; CHECK: facgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-  ret <2 x i32> %val
-}
-define <4 x i32> @facgt_from_intr_v4i32( <4 x float> %A, <4 x float> %B) {
-; Using registers other than v0, v1 and v2 are possible, but would be odd.
-; CHECK: facgt_from_intr_v4i32:
-  %val = call <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float> %A, <4 x float> %B)
-; CHECK: facgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-  ret <4 x i32> %val
-}
-
-define <2 x i64> @facgt_from_intr_v2i64(<2 x double> %A, <2 x double> %B) {
-; Using registers other than v0, v1 and v2 are possible, but would be odd.
-; CHECK: facgt_from_intr_v2i64:
-  %val = call <2 x i64> @llvm.arm.neon.vacgt.v2i64.v2f64(<2 x double> %A, <2 x double> %B)
-; CHECK: facgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-  ret <2 x i64> %val
-}
-
diff --git a/test/CodeGen/AArch64/neon-frsqrt-frecp.ll b/test/CodeGen/AArch64/neon-frsqrt-frecp.ll
deleted file mode 100644
index 46fe25d..0000000
--- a/test/CodeGen/AArch64/neon-frsqrt-frecp.ll
+++ /dev/null
@@ -1,54 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon  | FileCheck %s
-
-; Set of tests for when the intrinsic is used.
-
-declare <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float>, <2 x float>)
-declare <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float>, <4 x float>)
-declare <2 x double> @llvm.arm.neon.vrsqrts.v2f64(<2 x double>, <2 x double>)
-
-define <2 x float> @frsqrts_from_intr_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; CHECK: frsqrts v0.2s, v0.2s, v1.2s
-        %val = call <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float> %lhs, <2 x float> %rhs)
-        ret <2 x float> %val
-}
-
-define <4 x float> @frsqrts_from_intr_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; CHECK: frsqrts v0.4s, v0.4s, v1.4s
-        %val = call <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float> %lhs, <4 x float> %rhs)
-        ret <4 x float> %val
-}
-
-define <2 x double> @frsqrts_from_intr_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; CHECK: frsqrts v0.2d, v0.2d, v1.2d
-        %val = call <2 x double> @llvm.arm.neon.vrsqrts.v2f64(<2 x double> %lhs, <2 x double> %rhs)
-        ret <2 x double> %val
-}
-
-declare <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float>, <2 x float>)
-declare <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float>, <4 x float>)
-declare <2 x double> @llvm.arm.neon.vrecps.v2f64(<2 x double>, <2 x double>)
-
-define <2 x float> @frecps_from_intr_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; CHECK: frecps v0.2s, v0.2s, v1.2s
-        %val = call <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float> %lhs, <2 x float> %rhs)
-        ret <2 x float> %val
-}
-
-define <4 x float> @frecps_from_intr_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; CHECK: frecps v0.4s, v0.4s, v1.4s
-        %val = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %lhs, <4 x float> %rhs)
-        ret <4 x float> %val
-}
-
-define <2 x double> @frecps_from_intr_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; CHECK: frecps v0.2d, v0.2d, v1.2d
-        %val = call <2 x double> @llvm.arm.neon.vrecps.v2f64(<2 x double> %lhs, <2 x double> %rhs)
-        ret <2 x double> %val
-}
-
diff --git a/test/CodeGen/AArch64/neon-halving-add-sub.ll b/test/CodeGen/AArch64/neon-halving-add-sub.ll
deleted file mode 100644
index a8f59db..0000000
--- a/test/CodeGen/AArch64/neon-halving-add-sub.ll
+++ /dev/null
@@ -1,207 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-
-declare <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8>, <8 x i8>)
-declare <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8>, <8 x i8>)
-
-define <8 x i8> @test_uhadd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK: test_uhadd_v8i8:
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: uhadd v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-define <8 x i8> @test_shadd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK: test_shadd_v8i8:
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: shadd v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-declare <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8>, <16 x i8>)
-declare <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8>, <16 x i8>)
-
-define <16 x i8> @test_uhadd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_uhadd_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: uhadd v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-define <16 x i8> @test_shadd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_shadd_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: shadd v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-declare <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16>, <4 x i16>)
-declare <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16>, <4 x i16>)
-
-define <4 x i16> @test_uhadd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_uhadd_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: uhadd v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-define <4 x i16> @test_shadd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_shadd_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: shadd v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-declare <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16>, <8 x i16>)
-declare <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16>, <8 x i16>)
-
-define <8 x i16> @test_uhadd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_uhadd_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: uhadd v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-define <8 x i16> @test_shadd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_shadd_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: shadd v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-declare <2 x i32> @llvm.arm.neon.vhaddu.v2i32(<2 x i32>, <2 x i32>)
-declare <2 x i32> @llvm.arm.neon.vhadds.v2i32(<2 x i32>, <2 x i32>)
-
-define <2 x i32> @test_uhadd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_uhadd_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vhaddu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: uhadd v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-define <2 x i32> @test_shadd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_shadd_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vhadds.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: shadd v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-declare <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32>, <4 x i32>)
-
-define <4 x i32> @test_uhadd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_uhadd_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: uhadd v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-define <4 x i32> @test_shadd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_shadd_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: shadd v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-
-declare <8 x i8> @llvm.arm.neon.vhsubu.v8i8(<8 x i8>, <8 x i8>)
-declare <8 x i8> @llvm.arm.neon.vhsubs.v8i8(<8 x i8>, <8 x i8>)
-
-define <8 x i8> @test_uhsub_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK: test_uhsub_v8i8:
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vhsubu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: uhsub v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-define <8 x i8> @test_shsub_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK: test_shsub_v8i8:
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vhsubs.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: shsub v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-declare <16 x i8> @llvm.arm.neon.vhsubu.v16i8(<16 x i8>, <16 x i8>)
-declare <16 x i8> @llvm.arm.neon.vhsubs.v16i8(<16 x i8>, <16 x i8>)
-
-define <16 x i8> @test_uhsub_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_uhsub_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vhsubu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: uhsub v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-define <16 x i8> @test_shsub_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_shsub_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vhsubs.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: shsub v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-declare <4 x i16> @llvm.arm.neon.vhsubu.v4i16(<4 x i16>, <4 x i16>)
-declare <4 x i16> @llvm.arm.neon.vhsubs.v4i16(<4 x i16>, <4 x i16>)
-
-define <4 x i16> @test_uhsub_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_uhsub_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vhsubu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: uhsub v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-define <4 x i16> @test_shsub_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_shsub_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vhsubs.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: shsub v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-declare <8 x i16> @llvm.arm.neon.vhsubu.v8i16(<8 x i16>, <8 x i16>)
-declare <8 x i16> @llvm.arm.neon.vhsubs.v8i16(<8 x i16>, <8 x i16>)
-
-define <8 x i16> @test_uhsub_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_uhsub_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vhsubu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: uhsub v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-define <8 x i16> @test_shsub_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_shsub_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vhsubs.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: shsub v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-declare <2 x i32> @llvm.arm.neon.vhsubu.v2i32(<2 x i32>, <2 x i32>)
-declare <2 x i32> @llvm.arm.neon.vhsubs.v2i32(<2 x i32>, <2 x i32>)
-
-define <2 x i32> @test_uhsub_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_uhsub_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vhsubu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: uhsub v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-define <2 x i32> @test_shsub_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_shsub_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vhsubs.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: shsub v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-declare <4 x i32> @llvm.arm.neon.vhsubu.v4i32(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.arm.neon.vhsubs.v4i32(<4 x i32>, <4 x i32>)
-
-define <4 x i32> @test_uhsub_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_uhsub_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vhsubu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: uhsub v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-define <4 x i32> @test_shsub_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_shsub_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vhsubs.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: shsub v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
diff --git a/test/CodeGen/AArch64/neon-idiv.ll b/test/CodeGen/AArch64/neon-idiv.ll
new file mode 100644
index 0000000..de402c4
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-idiv.ll
@@ -0,0 +1,13 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s -mattr=+neon | FileCheck %s
+
+define <4 x i32> @test1(<4 x i32> %a) {
+  %rem = srem <4 x i32> %a, <i32 7, i32 7, i32 7, i32 7>
+  ret <4 x i32> %rem
+; CHECK-LABEL: test1
+; FIXME: Can we lower this more efficiently?
+; CHECK: mul
+; CHECK: mul
+; CHECK: mul
+; CHECK: mul
+}
+
diff --git a/test/CodeGen/AArch64/neon-load-store-v1i32.ll b/test/CodeGen/AArch64/neon-load-store-v1i32.ll
deleted file mode 100644
index 92f704d..0000000
--- a/test/CodeGen/AArch64/neon-load-store-v1i32.ll
+++ /dev/null
@@ -1,29 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
-
-; Test load/store of v1i8, v1i16, v1i32 types can be selected correctly
-define void @load.store.v1i8(<1 x i8>* %ptr, <1 x i8>* %ptr2) {
-; CHECK-LABEL: load.store.v1i8:
-; CHECK: ldr b{{[0-9]+}}, [x{{[0-9]+|sp}}]
-; CHECK: str b{{[0-9]+}}, [x{{[0-9]+|sp}}]
-  %a = load <1 x i8>* %ptr
-  store <1 x i8> %a, <1 x i8>* %ptr2
-  ret void
-}
-
-define void @load.store.v1i16(<1 x i16>* %ptr, <1 x i16>* %ptr2) {
-; CHECK-LABEL: load.store.v1i16:
-; CHECK: ldr h{{[0-9]+}}, [x{{[0-9]+|sp}}]
-; CHECK: str h{{[0-9]+}}, [x{{[0-9]+|sp}}]
-  %a = load <1 x i16>* %ptr
-  store <1 x i16> %a, <1 x i16>* %ptr2
-  ret void
-}
-
-define void @load.store.v1i32(<1 x i32>* %ptr, <1 x i32>* %ptr2) {
-; CHECK-LABEL: load.store.v1i32:
-; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+|sp}}]
-; CHECK: str s{{[0-9]+}}, [x{{[0-9]+|sp}}]
-  %a = load <1 x i32>* %ptr
-  store <1 x i32> %a, <1 x i32>* %ptr2
-  ret void
-}
diff --git a/test/CodeGen/AArch64/neon-max-min-pairwise.ll b/test/CodeGen/AArch64/neon-max-min-pairwise.ll
deleted file mode 100644
index 3e18077..0000000
--- a/test/CodeGen/AArch64/neon-max-min-pairwise.ll
+++ /dev/null
@@ -1,346 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-
-declare <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8>, <8 x i8>)
-declare <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8>, <8 x i8>)
-
-define <8 x i8> @test_smaxp_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; CHECK: test_smaxp_v8i8:
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: smaxp v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-define <8 x i8> @test_umaxp_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: umaxp v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-declare <16 x i8> @llvm.arm.neon.vpmaxs.v16i8(<16 x i8>, <16 x i8>)
-declare <16 x i8> @llvm.arm.neon.vpmaxu.v16i8(<16 x i8>, <16 x i8>)
-
-define <16 x i8> @test_smaxp_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_smaxp_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vpmaxs.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: smaxp v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-define <16 x i8> @test_umaxp_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_umaxp_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vpmaxu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: umaxp v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-declare <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16>, <4 x i16>)
-declare <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16>, <4 x i16>)
-
-define <4 x i16> @test_smaxp_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_smaxp_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: smaxp v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-define <4 x i16> @test_umaxp_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_umaxp_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: umaxp v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-
-declare <8 x i16> @llvm.arm.neon.vpmaxs.v8i16(<8 x i16>, <8 x i16>)
-declare <8 x i16> @llvm.arm.neon.vpmaxu.v8i16(<8 x i16>, <8 x i16>)
-
-define <8 x i16> @test_smaxp_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_smaxp_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vpmaxs.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: smaxp v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-define <8 x i16> @test_umaxp_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_umaxp_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vpmaxu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: umaxp v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-
-declare <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32>, <2 x i32>)
-declare <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32>, <2 x i32>)
-
-define <2 x i32> @test_smaxp_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_smaxp_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: smaxp v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-define <2 x i32> @test_umaxp_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_umaxp_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: umaxp v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-declare <4 x i32> @llvm.arm.neon.vpmaxs.v4i32(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.arm.neon.vpmaxu.v4i32(<4 x i32>, <4 x i32>)
-
-define <4 x i32> @test_smaxp_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_smaxp_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vpmaxs.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: smaxp v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-define <4 x i32> @test_umaxp_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_umaxp_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vpmaxu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: umaxp v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-declare <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8>, <8 x i8>)
-declare <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8>, <8 x i8>)
-
-define <8 x i8> @test_sminp_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; CHECK: test_sminp_v8i8:
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: sminp v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-define <8 x i8> @test_uminp_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: uminp v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-declare <16 x i8> @llvm.arm.neon.vpmins.v16i8(<16 x i8>, <16 x i8>)
-declare <16 x i8> @llvm.arm.neon.vpminu.v16i8(<16 x i8>, <16 x i8>)
-
-define <16 x i8> @test_sminp_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_sminp_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vpmins.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: sminp v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-define <16 x i8> @test_uminp_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_uminp_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vpminu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: uminp v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-declare <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16>, <4 x i16>)
-declare <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16>, <4 x i16>)
-
-define <4 x i16> @test_sminp_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_sminp_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: sminp v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-define <4 x i16> @test_uminp_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_uminp_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: uminp v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-
-declare <8 x i16> @llvm.arm.neon.vpmins.v8i16(<8 x i16>, <8 x i16>)
-declare <8 x i16> @llvm.arm.neon.vpminu.v8i16(<8 x i16>, <8 x i16>)
-
-define <8 x i16> @test_sminp_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_sminp_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vpmins.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: sminp v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-define <8 x i16> @test_uminp_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_uminp_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vpminu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: uminp v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-
-declare <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32>, <2 x i32>)
-declare <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32>, <2 x i32>)
-
-define <2 x i32> @test_sminp_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_sminp_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: sminp v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-define <2 x i32> @test_uminp_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_uminp_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: uminp v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-declare <4 x i32> @llvm.arm.neon.vpmins.v4i32(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.arm.neon.vpminu.v4i32(<4 x i32>, <4 x i32>)
-
-define <4 x i32> @test_sminp_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_sminp_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vpmins.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: sminp v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-define <4 x i32> @test_uminp_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_uminp_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vpminu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: uminp v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-declare <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float>, <2 x float>)
-declare <4 x float> @llvm.arm.neon.vpmaxs.v4f32(<4 x float>, <4 x float>)
-declare <2 x double> @llvm.arm.neon.vpmaxs.v2f64(<2 x double>, <2 x double>)
-
-define <2 x float> @test_fmaxp_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
-; CHECK: test_fmaxp_v2f32:
-        %val = call <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float> %lhs, <2 x float> %rhs)
-; CHECK: fmaxp v0.2s, v0.2s, v1.2s
-        ret <2 x float> %val
-}
-
-define <4 x float> @test_fmaxp_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
-; CHECK: test_fmaxp_v4f32:
-        %val = call <4 x float> @llvm.arm.neon.vpmaxs.v4f32(<4 x float> %lhs, <4 x float> %rhs)
-; CHECK: fmaxp v0.4s, v0.4s, v1.4s
-        ret <4 x float> %val
-}
-
-define <2 x double> @test_fmaxp_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
-; CHECK: test_fmaxp_v2f64:
-        %val = call <2 x double> @llvm.arm.neon.vpmaxs.v2f64(<2 x double> %lhs, <2 x double> %rhs)
-; CHECK: fmaxp v0.2d, v0.2d, v1.2d
-        ret <2 x double> %val
-}
-
-declare <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float>, <2 x float>)
-declare <4 x float> @llvm.arm.neon.vpmins.v4f32(<4 x float>, <4 x float>)
-declare <2 x double> @llvm.arm.neon.vpmins.v2f64(<2 x double>, <2 x double>)
-
-define <2 x float> @test_fminp_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
-; CHECK: test_fminp_v2f32:
-        %val = call <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float> %lhs, <2 x float> %rhs)
-; CHECK: fminp v0.2s, v0.2s, v1.2s
-        ret <2 x float> %val
-}
-
-define <4 x float> @test_fminp_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
-; CHECK: test_fminp_v4f32:
-        %val = call <4 x float> @llvm.arm.neon.vpmins.v4f32(<4 x float> %lhs, <4 x float> %rhs)
-; CHECK: fminp v0.4s, v0.4s, v1.4s
-        ret <4 x float> %val
-}
-
-define <2 x double> @test_fminp_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
-; CHECK: test_fminp_v2f64:
-        %val = call <2 x double> @llvm.arm.neon.vpmins.v2f64(<2 x double> %lhs, <2 x double> %rhs)
-; CHECK: fminp v0.2d, v0.2d, v1.2d
-        ret <2 x double> %val
-}
-
-declare <2 x float> @llvm.aarch64.neon.vpmaxnm.v2f32(<2 x float>, <2 x float>)
-declare <4 x float> @llvm.aarch64.neon.vpmaxnm.v4f32(<4 x float>, <4 x float>)
-declare <2 x double> @llvm.aarch64.neon.vpmaxnm.v2f64(<2 x double>, <2 x double>)
-
-define <2 x float> @test_fmaxnmp_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
-; CHECK: test_fmaxnmp_v2f32:
-        %val = call <2 x float> @llvm.aarch64.neon.vpmaxnm.v2f32(<2 x float> %lhs, <2 x float> %rhs)
-; CHECK: fmaxnmp v0.2s, v0.2s, v1.2s
-        ret <2 x float> %val
-}
-
-define <4 x float> @test_fmaxnmp_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
-; CHECK: test_fmaxnmp_v4f32:
-        %val = call <4 x float> @llvm.aarch64.neon.vpmaxnm.v4f32(<4 x float> %lhs, <4 x float> %rhs)
-; CHECK: fmaxnmp v0.4s, v0.4s, v1.4s
-        ret <4 x float> %val
-}
-
-define <2 x double> @test_fmaxnmp_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
-; CHECK: test_fmaxnmp_v2f64:
-        %val = call <2 x double> @llvm.aarch64.neon.vpmaxnm.v2f64(<2 x double> %lhs, <2 x double> %rhs)
-; CHECK: fmaxnmp v0.2d, v0.2d, v1.2d
-        ret <2 x double> %val
-}
-
-declare <2 x float> @llvm.aarch64.neon.vpminnm.v2f32(<2 x float>, <2 x float>)
-declare <4 x float> @llvm.aarch64.neon.vpminnm.v4f32(<4 x float>, <4 x float>)
-declare <2 x double> @llvm.aarch64.neon.vpminnm.v2f64(<2 x double>, <2 x double>)
-
-define <2 x float> @test_fminnmp_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
-; CHECK: test_fminnmp_v2f32:
-        %val = call <2 x float> @llvm.aarch64.neon.vpminnm.v2f32(<2 x float> %lhs, <2 x float> %rhs)
-; CHECK: fminnmp v0.2s, v0.2s, v1.2s
-        ret <2 x float> %val
-}
-
-define <4 x float> @test_fminnmp_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
-; CHECK: test_fminnmp_v4f32:
-        %val = call <4 x float> @llvm.aarch64.neon.vpminnm.v4f32(<4 x float> %lhs, <4 x float> %rhs)
-; CHECK: fminnmp v0.4s, v0.4s, v1.4s
-        ret <4 x float> %val
-}
-
-define <2 x double> @test_fminnmp_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
-; CHECK: test_fminnmp_v2f64:
-        %val = call <2 x double> @llvm.aarch64.neon.vpminnm.v2f64(<2 x double> %lhs, <2 x double> %rhs)
-; CHECK: fminnmp v0.2d, v0.2d, v1.2d
-        ret <2 x double> %val
-}
-
-define i32 @test_vminv_s32(<2 x i32> %a) {
-; CHECK-LABEL: test_vminv_s32
-; CHECK: sminp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-  %1 = tail call <1 x i32> @llvm.aarch64.neon.sminv.v1i32.v2i32(<2 x i32> %a)
-  %2 = extractelement <1 x i32> %1, i32 0
-  ret i32 %2
-}
-
-define i32 @test_vminv_u32(<2 x i32> %a) {
-; CHECK-LABEL: test_vminv_u32
-; CHECK: uminp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-  %1 = tail call <1 x i32> @llvm.aarch64.neon.uminv.v1i32.v2i32(<2 x i32> %a)
-  %2 = extractelement <1 x i32> %1, i32 0
-  ret i32 %2
-}
-
-define i32 @test_vmaxv_s32(<2 x i32> %a) {
-; CHECK-LABEL: test_vmaxv_s32
-; CHECK: smaxp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-  %1 = tail call <1 x i32> @llvm.aarch64.neon.smaxv.v1i32.v2i32(<2 x i32> %a)
-  %2 = extractelement <1 x i32> %1, i32 0
-  ret i32 %2
-}
-
-define i32 @test_vmaxv_u32(<2 x i32> %a) {
-; CHECK-LABEL: test_vmaxv_u32
-; CHECK: umaxp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-  %1 = tail call <1 x i32> @llvm.aarch64.neon.umaxv.v1i32.v2i32(<2 x i32> %a)
-  %2 = extractelement <1 x i32> %1, i32 0
-  ret i32 %2
-}
-
-declare <1 x i32> @llvm.aarch64.neon.uminv.v1i32.v2i32(<2 x i32>)
-declare <1 x i32> @llvm.aarch64.neon.sminv.v1i32.v2i32(<2 x i32>)
-declare <1 x i32> @llvm.aarch64.neon.umaxv.v1i32.v2i32(<2 x i32>)
-declare <1 x i32> @llvm.aarch64.neon.smaxv.v1i32.v2i32(<2 x i32>)
-\ No newline at end of file
diff --git a/test/CodeGen/AArch64/neon-max-min.ll b/test/CodeGen/AArch64/neon-max-min.ll
deleted file mode 100644
index 7889c77..0000000
--- a/test/CodeGen/AArch64/neon-max-min.ll
+++ /dev/null
@@ -1,310 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-
-declare <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8>, <8 x i8>)
-declare <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8>, <8 x i8>)
-
-define <8 x i8> @test_smax_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; CHECK: test_smax_v8i8:
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: smax v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-define <8 x i8> @test_umax_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: umax v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-declare <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8>, <16 x i8>)
-declare <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8>, <16 x i8>)
-
-define <16 x i8> @test_smax_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_smax_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: smax v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-define <16 x i8> @test_umax_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_umax_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: umax v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-declare <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16>, <4 x i16>)
-declare <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16>, <4 x i16>)
-
-define <4 x i16> @test_smax_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_smax_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: smax v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-define <4 x i16> @test_umax_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_umax_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: umax v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-
-declare <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16>, <8 x i16>)
-declare <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16>, <8 x i16>)
-
-define <8 x i16> @test_smax_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_smax_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: smax v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-define <8 x i16> @test_umax_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_umax_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: umax v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-
-declare <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32>, <2 x i32>)
-declare <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32>, <2 x i32>)
-
-define <2 x i32> @test_smax_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_smax_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: smax v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-define <2 x i32> @test_umax_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_umax_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: umax v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-declare <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32>, <4 x i32>)
-
-define <4 x i32> @test_smax_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_smax_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: smax v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-define <4 x i32> @test_umax_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_umax_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: umax v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-declare <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8>, <8 x i8>)
-declare <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8>, <8 x i8>)
-
-define <8 x i8> @test_smin_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; CHECK: test_smin_v8i8:
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: smin v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-define <8 x i8> @test_umin_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: umin v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-declare <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8>, <16 x i8>)
-declare <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8>, <16 x i8>)
-
-define <16 x i8> @test_smin_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_smin_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: smin v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-define <16 x i8> @test_umin_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_umin_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: umin v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-declare <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16>, <4 x i16>)
-declare <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16>, <4 x i16>)
-
-define <4 x i16> @test_smin_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_smin_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: smin v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-define <4 x i16> @test_umin_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_umin_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: umin v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-
-declare <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16>, <8 x i16>)
-declare <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16>, <8 x i16>)
-
-define <8 x i16> @test_smin_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_smin_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: smin v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-define <8 x i16> @test_umin_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_umin_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: umin v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-
-declare <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32>, <2 x i32>)
-declare <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32>, <2 x i32>)
-
-define <2 x i32> @test_smin_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_smin_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: smin v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-define <2 x i32> @test_umin_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_umin_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: umin v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-declare <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32>, <4 x i32>)
-
-define <4 x i32> @test_smin_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_smin_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: smin v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-define <4 x i32> @test_umin_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_umin_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: umin v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-declare <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float>, <2 x float>)
-declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>)
-declare <2 x double> @llvm.arm.neon.vmaxs.v2f64(<2 x double>, <2 x double>)
-
-define <2 x float> @test_fmax_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
-; CHECK: test_fmax_v2f32:
-        %val = call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %lhs, <2 x float> %rhs)
-; CHECK: fmax v0.2s, v0.2s, v1.2s
-        ret <2 x float> %val
-}
-
-define <4 x float> @test_fmax_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
-; CHECK: test_fmax_v4f32:
-        %val = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %lhs, <4 x float> %rhs)
-; CHECK: fmax v0.4s, v0.4s, v1.4s
-        ret <4 x float> %val
-}
-
-define <2 x double> @test_fmax_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
-; CHECK: test_fmax_v2f64:
-        %val = call <2 x double> @llvm.arm.neon.vmaxs.v2f64(<2 x double> %lhs, <2 x double> %rhs)
-; CHECK: fmax v0.2d, v0.2d, v1.2d
-        ret <2 x double> %val
-}
-
-declare <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float>, <2 x float>)
-declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>)
-declare <2 x double> @llvm.arm.neon.vmins.v2f64(<2 x double>, <2 x double>)
-
-define <2 x float> @test_fmin_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
-; CHECK: test_fmin_v2f32:
-        %val = call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %lhs, <2 x float> %rhs)
-; CHECK: fmin v0.2s, v0.2s, v1.2s
-        ret <2 x float> %val
-}
-
-define <4 x float> @test_fmin_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
-; CHECK: test_fmin_v4f32:
-        %val = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %lhs, <4 x float> %rhs)
-; CHECK: fmin v0.4s, v0.4s, v1.4s
-        ret <4 x float> %val
-}
-
-define <2 x double> @test_fmin_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
-; CHECK: test_fmin_v2f64:
-        %val = call <2 x double> @llvm.arm.neon.vmins.v2f64(<2 x double> %lhs, <2 x double> %rhs)
-; CHECK: fmin v0.2d, v0.2d, v1.2d
-        ret <2 x double> %val
-}
-
-
-declare <2 x float> @llvm.aarch64.neon.vmaxnm.v2f32(<2 x float>, <2 x float>)
-declare <4 x float> @llvm.aarch64.neon.vmaxnm.v4f32(<4 x float>, <4 x float>)
-declare <2 x double> @llvm.aarch64.neon.vmaxnm.v2f64(<2 x double>, <2 x double>)
-
-define <2 x float> @test_fmaxnm_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
-; CHECK: test_fmaxnm_v2f32:
-        %val = call <2 x float> @llvm.aarch64.neon.vmaxnm.v2f32(<2 x float> %lhs, <2 x float> %rhs)
-; CHECK: fmaxnm v0.2s, v0.2s, v1.2s
-        ret <2 x float> %val
-}
-
-define <4 x float> @test_fmaxnm_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
-; CHECK: test_fmaxnm_v4f32:
-        %val = call <4 x float> @llvm.aarch64.neon.vmaxnm.v4f32(<4 x float> %lhs, <4 x float> %rhs)
-; CHECK: fmaxnm v0.4s, v0.4s, v1.4s
-        ret <4 x float> %val
-}
-
-define <2 x double> @test_fmaxnm_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
-; CHECK: test_fmaxnm_v2f64:
-        %val = call <2 x double> @llvm.aarch64.neon.vmaxnm.v2f64(<2 x double> %lhs, <2 x double> %rhs)
-; CHECK: fmaxnm v0.2d, v0.2d, v1.2d
-        ret <2 x double> %val
-}
-
-declare <2 x float> @llvm.aarch64.neon.vminnm.v2f32(<2 x float>, <2 x float>)
-declare <4 x float> @llvm.aarch64.neon.vminnm.v4f32(<4 x float>, <4 x float>)
-declare <2 x double> @llvm.aarch64.neon.vminnm.v2f64(<2 x double>, <2 x double>)
-
-define <2 x float> @test_fminnm_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
-; CHECK: test_fminnm_v2f32:
-        %val = call <2 x float> @llvm.aarch64.neon.vminnm.v2f32(<2 x float> %lhs, <2 x float> %rhs)
-; CHECK: fminnm v0.2s, v0.2s, v1.2s
-        ret <2 x float> %val
-}
-
-define <4 x float> @test_fminnm_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
-; CHECK: test_fminnm_v4f32:
-        %val = call <4 x float> @llvm.aarch64.neon.vminnm.v4f32(<4 x float> %lhs, <4 x float> %rhs)
-; CHECK: fminnm v0.4s, v0.4s, v1.4s
-        ret <4 x float> %val
-}
-
-define <2 x double> @test_fminnm_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
-; CHECK: test_fminnm_v2f64:
-        %val = call <2 x double> @llvm.aarch64.neon.vminnm.v2f64(<2 x double> %lhs, <2 x double> %rhs)
-; CHECK: fminnm v0.2d, v0.2d, v1.2d
-        ret <2 x double> %val
-}
diff --git a/test/CodeGen/AArch64/neon-misc-scalar.ll b/test/CodeGen/AArch64/neon-misc-scalar.ll
deleted file mode 100644
index cca8deb..0000000
--- a/test/CodeGen/AArch64/neon-misc-scalar.ll
+++ /dev/null
@@ -1,60 +0,0 @@
-;RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
-
-declare <1 x i64> @llvm.arm.neon.vqneg.v1i64(<1 x i64>)
-
-declare <1 x i64> @llvm.arm.neon.vqabs.v1i64(<1 x i64>)
-
-declare <1 x i64> @llvm.arm.neon.vabs.v1i64(<1 x i64>)
-
-declare <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64>, <1 x i64>)
-
-declare <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64>, <1 x i64>)
-
-define <1 x i64> @test_vuqadd_s64(<1 x i64> %a, <1 x i64> %b) {
-entry:
-  ; CHECK: test_vuqadd_s64
-  %vuqadd2.i = tail call <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64> %a, <1 x i64> %b)
-  ; CHECK: suqadd d{{[0-9]+}}, d{{[0-9]+}}
-  ret <1 x i64> %vuqadd2.i
-}
-
-define <1 x i64> @test_vsqadd_u64(<1 x i64> %a, <1 x i64> %b) {
-entry:
-  ; CHECK: test_vsqadd_u64
-  %vsqadd2.i = tail call <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64> %a, <1 x i64> %b)
-  ; CHECK: usqadd d{{[0-9]+}}, d{{[0-9]+}}
-  ret <1 x i64> %vsqadd2.i
-}
-
-define <1 x i64> @test_vabs_s64(<1 x i64> %a) {
-  ; CHECK: test_vabs_s64
-entry:
-  %vabs1.i = tail call <1 x i64> @llvm.arm.neon.vabs.v1i64(<1 x i64> %a)
-  ; CHECK: abs d{{[0-9]+}}, d{{[0-9]+}}
-  ret <1 x i64> %vabs1.i
-}
-
-define <1 x i64> @test_vqabs_s64(<1 x i64> %a) {
-  ; CHECK: test_vqabs_s64
-entry:
-  %vqabs1.i = tail call <1 x i64> @llvm.arm.neon.vqabs.v1i64(<1 x i64> %a)
-  ; CHECK: sqabs d{{[0-9]+}}, d{{[0-9]+}}
-  ret <1 x i64> %vqabs1.i
-}
-
-define <1 x i64> @test_vqneg_s64(<1 x i64> %a) {
-  ; CHECK: test_vqneg_s64
-entry:
-  %vqneg1.i = tail call <1 x i64> @llvm.arm.neon.vqneg.v1i64(<1 x i64> %a)
-  ; CHECK: sqneg d{{[0-9]+}}, d{{[0-9]+}}
-  ret <1 x i64> %vqneg1.i
-}
-
-define <1 x i64> @test_vneg_s64(<1 x i64> %a) {
-  ; CHECK: test_vneg_s64
-entry:
-  %sub.i = sub <1 x i64> zeroinitializer, %a
-  ; CHECK: neg d{{[0-9]+}}, d{{[0-9]+}}
-  ret <1 x i64> %sub.i
-}
-
diff --git a/test/CodeGen/AArch64/neon-misc.ll b/test/CodeGen/AArch64/neon-misc.ll
deleted file mode 100644
index 7ec36c2..0000000
--- a/test/CodeGen/AArch64/neon-misc.ll
+++ /dev/null
@@ -1,2014 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
-
-
-define <8 x i8> @test_vrev16_s8(<8 x i8> %a) #0 {
-; CHECK: rev16 v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
-  ret <8 x i8> %shuffle.i
-}
-
-define <16 x i8> @test_vrev16q_s8(<16 x i8> %a) #0 {
-; CHECK: rev16 v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
-  ret <16 x i8> %shuffle.i
-}
-
-define <8 x i8> @test_vrev32_s8(<8 x i8> %a) #0 {
-; CHECK: rev32 v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
-  ret <8 x i8> %shuffle.i
-}
-
-define <4 x i16> @test_vrev32_s16(<4 x i16> %a) #0 {
-; CHECK: rev32 v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
-  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-  ret <4 x i16> %shuffle.i
-}
-
-define <16 x i8> @test_vrev32q_s8(<16 x i8> %a) #0 {
-; CHECK: rev32 v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
-  ret <16 x i8> %shuffle.i
-}
-
-define <8 x i16> @test_vrev32q_s16(<8 x i16> %a) #0 {
-; CHECK: rev32 v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
-  ret <8 x i16> %shuffle.i
-}
-
-define <8 x i8> @test_vrev64_s8(<8 x i8> %a) #0 {
-; CHECK: rev64 v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-  ret <8 x i8> %shuffle.i
-}
-
-define <4 x i16> @test_vrev64_s16(<4 x i16> %a) #0 {
-; CHECK: rev64 v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
-  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-  ret <4 x i16> %shuffle.i
-}
-
-define <2 x i32> @test_vrev64_s32(<2 x i32> %a) #0 {
-; CHECK: rev64 v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
-  ret <2 x i32> %shuffle.i
-}
-
-define <2 x float> @test_vrev64_f32(<2 x float> %a) #0 {
-; CHECK: rev64 v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %shuffle.i = shufflevector <2 x float> %a, <2 x float> undef, <2 x i32> <i32 1, i32 0>
-  ret <2 x float> %shuffle.i
-}
-
-define <16 x i8> @test_vrev64q_s8(<16 x i8> %a) #0 {
-; CHECK: rev64 v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
-  ret <16 x i8> %shuffle.i
-}
-
-define <8 x i16> @test_vrev64q_s16(<8 x i16> %a) #0 {
-; CHECK: rev64 v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
-  ret <8 x i16> %shuffle.i
-}
-
-define <4 x i32> @test_vrev64q_s32(<4 x i32> %a) #0 {
-; CHECK: rev64 v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-  ret <4 x i32> %shuffle.i
-}
-
-define <4 x float> @test_vrev64q_f32(<4 x float> %a) #0 {
-; CHECK: rev64 v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-  ret <4 x float> %shuffle.i
-}
-
-define <4 x i16> @test_vpaddl_s8(<8 x i8> %a) #0 {
-; CHECK: saddlp v{{[0-9]+}}.4h, v{{[0-9]+}}.8b
-  %vpaddl.i = tail call <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8> %a) #4
-  ret <4 x i16> %vpaddl.i
-}
-
-define <2 x i32> @test_vpaddl_s16(<4 x i16> %a) #0 {
-; CHECK: saddlp v{{[0-9]+}}.2s, v{{[0-9]+}}.4h
-  %vpaddl1.i = tail call <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16> %a) #4
-  ret <2 x i32> %vpaddl1.i
-}
-
-define <1 x i64> @test_vpaddl_s32(<2 x i32> %a) #0 {
-; CHECK: saddlp v{{[0-9]+}}.1d, v{{[0-9]+}}.2s
-  %vpaddl1.i = tail call <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32> %a) #4
-  ret <1 x i64> %vpaddl1.i
-}
-
-define <4 x i16> @test_vpaddl_u8(<8 x i8> %a) #0 {
-; CHECK: uaddlp v{{[0-9]+}}.4h, v{{[0-9]+}}.8b
-  %vpaddl.i = tail call <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8> %a) #4
-  ret <4 x i16> %vpaddl.i
-}
-
-define <2 x i32> @test_vpaddl_u16(<4 x i16> %a) #0 {
-; CHECK: uaddlp v{{[0-9]+}}.2s, v{{[0-9]+}}.4h
-  %vpaddl1.i = tail call <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16> %a) #4
-  ret <2 x i32> %vpaddl1.i
-}
-
-define <1 x i64> @test_vpaddl_u32(<2 x i32> %a) #0 {
-; CHECK: uaddlp v{{[0-9]+}}.1d, v{{[0-9]+}}.2s
-  %vpaddl1.i = tail call <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32> %a) #4
-  ret <1 x i64> %vpaddl1.i
-}
-
-define <8 x i16> @test_vpaddlq_s8(<16 x i8> %a) #0 {
-; CHECK: saddlp v{{[0-9]+}}.8h, v{{[0-9]+}}.16b
-  %vpaddl.i = tail call <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8> %a) #4
-  ret <8 x i16> %vpaddl.i
-}
-
-define <4 x i32> @test_vpaddlq_s16(<8 x i16> %a) #0 {
-; CHECK: saddlp v{{[0-9]+}}.4s, v{{[0-9]+}}.8h
-  %vpaddl1.i = tail call <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16> %a) #4
-  ret <4 x i32> %vpaddl1.i
-}
-
-define <2 x i64> @test_vpaddlq_s32(<4 x i32> %a) #0 {
-; CHECK: saddlp v{{[0-9]+}}.2d, v{{[0-9]+}}.4s
-  %vpaddl1.i = tail call <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32> %a) #4
-  ret <2 x i64> %vpaddl1.i
-}
-
-define <8 x i16> @test_vpaddlq_u8(<16 x i8> %a) #0 {
-; CHECK: uaddlp v{{[0-9]+}}.8h, v{{[0-9]+}}.16b
-  %vpaddl.i = tail call <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8> %a) #4
-  ret <8 x i16> %vpaddl.i
-}
-
-define <4 x i32> @test_vpaddlq_u16(<8 x i16> %a) #0 {
-; CHECK: uaddlp v{{[0-9]+}}.4s, v{{[0-9]+}}.8h
-  %vpaddl1.i = tail call <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16> %a) #4
-  ret <4 x i32> %vpaddl1.i
-}
-
-define <2 x i64> @test_vpaddlq_u32(<4 x i32> %a) #0 {
-; CHECK: uaddlp v{{[0-9]+}}.2d, v{{[0-9]+}}.4s
-  %vpaddl1.i = tail call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> %a) #4
-  ret <2 x i64> %vpaddl1.i
-}
-
-define <4 x i16> @test_vpadal_s8(<4 x i16> %a, <8 x i8> %b) #0 {
-; CHECK: sadalp v{{[0-9]+}}.4h, v{{[0-9]+}}.8b
-  %vpadal1.i = tail call <4 x i16> @llvm.arm.neon.vpadals.v4i16.v8i8(<4 x i16> %a, <8 x i8> %b) #4
-  ret <4 x i16> %vpadal1.i
-}
-
-define <2 x i32> @test_vpadal_s16(<2 x i32> %a, <4 x i16> %b) #0 {
-; CHECK: sadalp v{{[0-9]+}}.2s, v{{[0-9]+}}.4h
-  %vpadal2.i = tail call <2 x i32> @llvm.arm.neon.vpadals.v2i32.v4i16(<2 x i32> %a, <4 x i16> %b) #4
-  ret <2 x i32> %vpadal2.i
-}
-
-define <1 x i64> @test_vpadal_s32(<1 x i64> %a, <2 x i32> %b) #0 {
-; CHECK: sadalp v{{[0-9]+}}.1d, v{{[0-9]+}}.2s
-  %vpadal2.i = tail call <1 x i64> @llvm.arm.neon.vpadals.v1i64.v2i32(<1 x i64> %a, <2 x i32> %b) #4
-  ret <1 x i64> %vpadal2.i
-}
-
-define <4 x i16> @test_vpadal_u8(<4 x i16> %a, <8 x i8> %b) #0 {
-; CHECK: uadalp v{{[0-9]+}}.4h, v{{[0-9]+}}.8b
-  %vpadal1.i = tail call <4 x i16> @llvm.arm.neon.vpadalu.v4i16.v8i8(<4 x i16> %a, <8 x i8> %b) #4
-  ret <4 x i16> %vpadal1.i
-}
-
-define <2 x i32> @test_vpadal_u16(<2 x i32> %a, <4 x i16> %b) #0 {
-; CHECK: uadalp v{{[0-9]+}}.2s, v{{[0-9]+}}.4h
-  %vpadal2.i = tail call <2 x i32> @llvm.arm.neon.vpadalu.v2i32.v4i16(<2 x i32> %a, <4 x i16> %b) #4
-  ret <2 x i32> %vpadal2.i
-}
-
-define <1 x i64> @test_vpadal_u32(<1 x i64> %a, <2 x i32> %b) #0 {
-; CHECK: uadalp v{{[0-9]+}}.1d, v{{[0-9]+}}.2s
-  %vpadal2.i = tail call <1 x i64> @llvm.arm.neon.vpadalu.v1i64.v2i32(<1 x i64> %a, <2 x i32> %b) #4
-  ret <1 x i64> %vpadal2.i
-}
-
-define <8 x i16> @test_vpadalq_s8(<8 x i16> %a, <16 x i8> %b) #0 {
-; CHECK: sadalp v{{[0-9]+}}.8h, v{{[0-9]+}}.16b
-  %vpadal1.i = tail call <8 x i16> @llvm.arm.neon.vpadals.v8i16.v16i8(<8 x i16> %a, <16 x i8> %b) #4
-  ret <8 x i16> %vpadal1.i
-}
-
-define <4 x i32> @test_vpadalq_s16(<4 x i32> %a, <8 x i16> %b) #0 {
-; CHECK: sadalp v{{[0-9]+}}.4s, v{{[0-9]+}}.8h
-  %vpadal2.i = tail call <4 x i32> @llvm.arm.neon.vpadals.v4i32.v8i16(<4 x i32> %a, <8 x i16> %b) #4
-  ret <4 x i32> %vpadal2.i
-}
-
-define <2 x i64> @test_vpadalq_s32(<2 x i64> %a, <4 x i32> %b) #0 {
-; CHECK: sadalp v{{[0-9]+}}.2d, v{{[0-9]+}}.4s
-  %vpadal2.i = tail call <2 x i64> @llvm.arm.neon.vpadals.v2i64.v4i32(<2 x i64> %a, <4 x i32> %b) #4
-  ret <2 x i64> %vpadal2.i
-}
-
-define <8 x i16> @test_vpadalq_u8(<8 x i16> %a, <16 x i8> %b) #0 {
-; CHECK: uadalp v{{[0-9]+}}.8h, v{{[0-9]+}}.16b
-  %vpadal1.i = tail call <8 x i16> @llvm.arm.neon.vpadalu.v8i16.v16i8(<8 x i16> %a, <16 x i8> %b) #4
-  ret <8 x i16> %vpadal1.i
-}
-
-define <4 x i32> @test_vpadalq_u16(<4 x i32> %a, <8 x i16> %b) #0 {
-; CHECK: uadalp v{{[0-9]+}}.4s, v{{[0-9]+}}.8h
-  %vpadal2.i = tail call <4 x i32> @llvm.arm.neon.vpadalu.v4i32.v8i16(<4 x i32> %a, <8 x i16> %b) #4
-  ret <4 x i32> %vpadal2.i
-}
-
-define <2 x i64> @test_vpadalq_u32(<2 x i64> %a, <4 x i32> %b) #0 {
-; CHECK: uadalp v{{[0-9]+}}.2d, v{{[0-9]+}}.4s
-  %vpadal2.i = tail call <2 x i64> @llvm.arm.neon.vpadalu.v2i64.v4i32(<2 x i64> %a, <4 x i32> %b) #4
-  ret <2 x i64> %vpadal2.i
-}
-
-define <8 x i8> @test_vqabs_s8(<8 x i8> %a) #0 {
-; CHECK: sqabs v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-  %vqabs.i = tail call <8 x i8> @llvm.arm.neon.vqabs.v8i8(<8 x i8> %a) #4
-  ret <8 x i8> %vqabs.i
-}
-
-define <16 x i8> @test_vqabsq_s8(<16 x i8> %a) #0 {
-; CHECK: sqabs v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-  %vqabs.i = tail call <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8> %a) #4
-  ret <16 x i8> %vqabs.i
-}
-
-define <4 x i16> @test_vqabs_s16(<4 x i16> %a) #0 {
-; CHECK: sqabs v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
-  %vqabs1.i = tail call <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16> %a) #4
-  ret <4 x i16> %vqabs1.i
-}
-
-define <8 x i16> @test_vqabsq_s16(<8 x i16> %a) #0 {
-; CHECK: sqabs v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
-  %vqabs1.i = tail call <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16> %a) #4
-  ret <8 x i16> %vqabs1.i
-}
-
-define <2 x i32> @test_vqabs_s32(<2 x i32> %a) #0 {
-; CHECK: sqabs v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vqabs1.i = tail call <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32> %a) #4
-  ret <2 x i32> %vqabs1.i
-}
-
-define <4 x i32> @test_vqabsq_s32(<4 x i32> %a) #0 {
-; CHECK: sqabs v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vqabs1.i = tail call <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32> %a) #4
-  ret <4 x i32> %vqabs1.i
-}
-
-define <2 x i64> @test_vqabsq_s64(<2 x i64> %a) #0 {
-; CHECK: sqabs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vqabs1.i = tail call <2 x i64> @llvm.arm.neon.vqabs.v2i64(<2 x i64> %a) #4
-  ret <2 x i64> %vqabs1.i
-}
-
-define <8 x i8> @test_vqneg_s8(<8 x i8> %a) #0 {
-; CHECK: sqneg v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-  %vqneg.i = tail call <8 x i8> @llvm.arm.neon.vqneg.v8i8(<8 x i8> %a) #4
-  ret <8 x i8> %vqneg.i
-}
-
-define <16 x i8> @test_vqnegq_s8(<16 x i8> %a) #0 {
-; CHECK: sqneg v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-  %vqneg.i = tail call <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8> %a) #4
-  ret <16 x i8> %vqneg.i
-}
-
-define <4 x i16> @test_vqneg_s16(<4 x i16> %a) #0 {
-; CHECK: sqneg v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
-  %vqneg1.i = tail call <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16> %a) #4
-  ret <4 x i16> %vqneg1.i
-}
-
-define <8 x i16> @test_vqnegq_s16(<8 x i16> %a) #0 {
-; CHECK: sqneg v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
-  %vqneg1.i = tail call <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16> %a) #4
-  ret <8 x i16> %vqneg1.i
-}
-
-define <2 x i32> @test_vqneg_s32(<2 x i32> %a) #0 {
-; CHECK: sqneg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vqneg1.i = tail call <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32> %a) #4
-  ret <2 x i32> %vqneg1.i
-}
-
-define <4 x i32> @test_vqnegq_s32(<4 x i32> %a) #0 {
-; CHECK: sqneg v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vqneg1.i = tail call <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32> %a) #4
-  ret <4 x i32> %vqneg1.i
-}
-
-define <2 x i64> @test_vqnegq_s64(<2 x i64> %a) #0 {
-; CHECK: sqneg v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vqneg1.i = tail call <2 x i64> @llvm.arm.neon.vqneg.v2i64(<2 x i64> %a) #4
-  ret <2 x i64> %vqneg1.i
-}
-
-define <8 x i8> @test_vneg_s8(<8 x i8> %a) #0 {
-; CHECK: neg v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-  %sub.i = sub <8 x i8> zeroinitializer, %a
-  ret <8 x i8> %sub.i
-}
-
-define <16 x i8> @test_vnegq_s8(<16 x i8> %a) #0 {
-; CHECK: neg v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-  %sub.i = sub <16 x i8> zeroinitializer, %a
-  ret <16 x i8> %sub.i
-}
-
-define <4 x i16> @test_vneg_s16(<4 x i16> %a) #0 {
-; CHECK: neg v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
-  %sub.i = sub <4 x i16> zeroinitializer, %a
-  ret <4 x i16> %sub.i
-}
-
-define <8 x i16> @test_vnegq_s16(<8 x i16> %a) #0 {
-; CHECK: neg v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
-  %sub.i = sub <8 x i16> zeroinitializer, %a
-  ret <8 x i16> %sub.i
-}
-
-define <2 x i32> @test_vneg_s32(<2 x i32> %a) #0 {
-; CHECK: neg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %sub.i = sub <2 x i32> zeroinitializer, %a
-  ret <2 x i32> %sub.i
-}
-
-define <4 x i32> @test_vnegq_s32(<4 x i32> %a) #0 {
-; CHECK: neg v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %sub.i = sub <4 x i32> zeroinitializer, %a
-  ret <4 x i32> %sub.i
-}
-
-define <2 x i64> @test_vnegq_s64(<2 x i64> %a) #0 {
-; CHECK: neg v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %sub.i = sub <2 x i64> zeroinitializer, %a
-  ret <2 x i64> %sub.i
-}
-
-define <2 x float> @test_vneg_f32(<2 x float> %a) #0 {
-; CHECK: fneg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %a
-  ret <2 x float> %sub.i
-}
-
-define <4 x float> @test_vnegq_f32(<4 x float> %a) #0 {
-; CHECK: fneg v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
-  ret <4 x float> %sub.i
-}
-
-define <2 x double> @test_vnegq_f64(<2 x double> %a) #0 {
-; CHECK: fneg v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a
-  ret <2 x double> %sub.i
-}
-
-define <8 x i8> @test_vabs_s8(<8 x i8> %a) #0 {
-; CHECK: abs v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-  %vabs.i = tail call <8 x i8> @llvm.arm.neon.vabs.v8i8(<8 x i8> %a) #4
-  ret <8 x i8> %vabs.i
-}
-
-define <16 x i8> @test_vabsq_s8(<16 x i8> %a) #0 {
-; CHECK: abs v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-  %vabs.i = tail call <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8> %a) #4
-  ret <16 x i8> %vabs.i
-}
-
-define <4 x i16> @test_vabs_s16(<4 x i16> %a) #0 {
-; CHECK: abs v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
-  %vabs1.i = tail call <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16> %a) #4
-  ret <4 x i16> %vabs1.i
-}
-
-define <8 x i16> @test_vabsq_s16(<8 x i16> %a) #0 {
-; CHECK: abs v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
-  %vabs1.i = tail call <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16> %a) #4
-  ret <8 x i16> %vabs1.i
-}
-
-define <2 x i32> @test_vabs_s32(<2 x i32> %a) #0 {
-; CHECK: abs v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vabs1.i = tail call <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32> %a) #4
-  ret <2 x i32> %vabs1.i
-}
-
-define <4 x i32> @test_vabsq_s32(<4 x i32> %a) #0 {
-; CHECK: abs v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vabs1.i = tail call <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32> %a) #4
-  ret <4 x i32> %vabs1.i
-}
-
-define <2 x i64> @test_vabsq_s64(<2 x i64> %a) #0 {
-; CHECK: abs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vabs1.i = tail call <2 x i64> @llvm.arm.neon.vabs.v2i64(<2 x i64> %a) #4
-  ret <2 x i64> %vabs1.i
-}
-
-define <2 x float> @test_vabs_f32(<2 x float> %a) #1 {
-; CHECK: fabs v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vabs1.i = tail call <2 x float> @llvm.fabs.v2f32(<2 x float> %a) #4
-  ret <2 x float> %vabs1.i
-}
-
-define <4 x float> @test_vabsq_f32(<4 x float> %a) #1 {
-; CHECK: fabs v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vabs1.i = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> %a) #4
-  ret <4 x float> %vabs1.i
-}
-
-define <2 x double> @test_vabsq_f64(<2 x double> %a) #1 {
-; CHECK: fabs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vabs1.i = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> %a) #4
-  ret <2 x double> %vabs1.i
-}
-
-define <8 x i8> @test_vuqadd_s8(<8 x i8> %a, <8 x i8> %b) #0 {
-; CHECK: suqadd v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-  %vuqadd.i = tail call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> %a, <8 x i8> %b) #4
-  ret <8 x i8> %vuqadd.i
-}
-
-define <16 x i8> @test_vuqaddq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
-; CHECK: suqadd v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-  %vuqadd.i = tail call <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8> %a, <16 x i8> %b) #4
-  ret <16 x i8> %vuqadd.i
-}
-
-define <4 x i16> @test_vuqadd_s16(<4 x i16> %a, <4 x i16> %b) #0 {
-; CHECK: suqadd v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
-  %vuqadd2.i = tail call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> %a, <4 x i16> %b) #4
-  ret <4 x i16> %vuqadd2.i
-}
-
-define <8 x i16> @test_vuqaddq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
-; CHECK: suqadd v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
-  %vuqadd2.i = tail call <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16> %a, <8 x i16> %b) #4
-  ret <8 x i16> %vuqadd2.i
-}
-
-define <2 x i32> @test_vuqadd_s32(<2 x i32> %a, <2 x i32> %b) #0 {
-; CHECK: suqadd v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vuqadd2.i = tail call <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32> %a, <2 x i32> %b) #4
-  ret <2 x i32> %vuqadd2.i
-}
-
-define <4 x i32> @test_vuqaddq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
-; CHECK: suqadd v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vuqadd2.i = tail call <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32> %a, <4 x i32> %b) #4
-  ret <4 x i32> %vuqadd2.i
-}
-
-define <2 x i64> @test_vuqaddq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
-; CHECK: suqadd v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vuqadd2.i = tail call <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64> %a, <2 x i64> %b) #4
-  ret <2 x i64> %vuqadd2.i
-}
-
-define <8 x i8> @test_vcls_s8(<8 x i8> %a) #0 {
-; CHECK: cls v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-  %vcls.i = tail call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %a) #4
-  ret <8 x i8> %vcls.i
-}
-
-define <16 x i8> @test_vclsq_s8(<16 x i8> %a) #0 {
-; CHECK: cls v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-  %vcls.i = tail call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %a) #4
-  ret <16 x i8> %vcls.i
-}
-
-define <4 x i16> @test_vcls_s16(<4 x i16> %a) #0 {
-; CHECK: cls v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
-  %vcls1.i = tail call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> %a) #4
-  ret <4 x i16> %vcls1.i
-}
-
-define <8 x i16> @test_vclsq_s16(<8 x i16> %a) #0 {
-; CHECK: cls v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
-  %vcls1.i = tail call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> %a) #4
-  ret <8 x i16> %vcls1.i
-}
-
-define <2 x i32> @test_vcls_s32(<2 x i32> %a) #0 {
-; CHECK: cls v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vcls1.i = tail call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> %a) #4
-  ret <2 x i32> %vcls1.i
-}
-
-define <4 x i32> @test_vclsq_s32(<4 x i32> %a) #0 {
-; CHECK: cls v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vcls1.i = tail call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> %a) #4
-  ret <4 x i32> %vcls1.i
-}
-
-define <8 x i8> @test_vclz_s8(<8 x i8> %a) #0 {
-; CHECK: clz v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-  %vclz.i = tail call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false) #4
-  ret <8 x i8> %vclz.i
-}
-
-define <16 x i8> @test_vclzq_s8(<16 x i8> %a) #0 {
-; CHECK: clz v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-  %vclz.i = tail call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) #4
-  ret <16 x i8> %vclz.i
-}
-
-define <4 x i16> @test_vclz_s16(<4 x i16> %a) #0 {
-; CHECK: clz v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
-  %vclz1.i = tail call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false) #4
-  ret <4 x i16> %vclz1.i
-}
-
-define <8 x i16> @test_vclzq_s16(<8 x i16> %a) #0 {
-; CHECK: clz v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
-  %vclz1.i = tail call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) #4
-  ret <8 x i16> %vclz1.i
-}
-
-define <2 x i32> @test_vclz_s32(<2 x i32> %a) #0 {
-; CHECK: clz v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vclz1.i = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) #4
-  ret <2 x i32> %vclz1.i
-}
-
-define <4 x i32> @test_vclzq_s32(<4 x i32> %a) #0 {
-; CHECK: clz v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vclz1.i = tail call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) #4
-  ret <4 x i32> %vclz1.i
-}
-
-define <8 x i8> @test_vcnt_s8(<8 x i8> %a) #0 {
-; CHECK: cnt v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-  %vctpop.i = tail call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a) #4
-  ret <8 x i8> %vctpop.i
-}
-
-define <16 x i8> @test_vcntq_s8(<16 x i8> %a) #0 {
-; CHECK: cnt v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-  %vctpop.i = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) #4
-  ret <16 x i8> %vctpop.i
-}
-
-define <8 x i8> @test_vmvn_s8(<8 x i8> %a) #0 {
-; CHECK: not v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-  %neg.i = xor <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
-  ret <8 x i8> %neg.i
-}
-
-define <16 x i8> @test_vmvnq_s8(<16 x i8> %a) #0 {
-; CHECK: not v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-  %neg.i = xor <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
-  ret <16 x i8> %neg.i
-}
-
-define <4 x i16> @test_vmvn_s16(<4 x i16> %a) #0 {
-; CHECK: not v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-  %neg.i = xor <4 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1>
-  ret <4 x i16> %neg.i
-}
-
-define <8 x i16> @test_vmvnq_s16(<8 x i16> %a) #0 {
-; CHECK: not v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-  %neg.i = xor <8 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
-  ret <8 x i16> %neg.i
-}
-
-define <2 x i32> @test_vmvn_s32(<2 x i32> %a) #0 {
-; CHECK: not v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-  %neg.i = xor <2 x i32> %a, <i32 -1, i32 -1>
-  ret <2 x i32> %neg.i
-}
-
-define <4 x i32> @test_vmvnq_s32(<4 x i32> %a) #0 {
-; CHECK: not v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-  %neg.i = xor <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
-  ret <4 x i32> %neg.i
-}
-
-define <8 x i8> @test_vrbit_s8(<8 x i8> %a) #0 {
-; CHECK: rbit v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-  %vrbit.i = tail call <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8> %a) #4
-  ret <8 x i8> %vrbit.i
-}
-
-define <16 x i8> @test_vrbitq_s8(<16 x i8> %a) #0 {
-; CHECK: rbit v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-  %vrbit.i = tail call <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8> %a) #4
-  ret <16 x i8> %vrbit.i
-}
-
-define <8 x i8> @test_vmovn_s16(<8 x i16> %a) #0 {
-; CHECK: xtn v{{[0-9]+}}.8b, v{{[0-9]+}}.8h
-  %vmovn.i = trunc <8 x i16> %a to <8 x i8>
-  ret <8 x i8> %vmovn.i
-}
-
-define <4 x i16> @test_vmovn_s32(<4 x i32> %a) #0 {
-; CHECK: xtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s
-  %vmovn.i = trunc <4 x i32> %a to <4 x i16>
-  ret <4 x i16> %vmovn.i
-}
-
-define <2 x i32> @test_vmovn_s64(<2 x i64> %a) #0 {
-; CHECK: xtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
-  %vmovn.i = trunc <2 x i64> %a to <2 x i32>
-  ret <2 x i32> %vmovn.i
-}
-
-define <16 x i8> @test_vmovn_high_s16(<8 x i8> %a, <8 x i16> %b) #0 {
-; CHECK: xtn2 v{{[0-9]+}}.16b, v{{[0-9]+}}.8h
-  %vmovn.i.i = trunc <8 x i16> %b to <8 x i8>
-  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %vmovn.i.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  ret <16 x i8> %shuffle.i
-}
-
-define <8 x i16> @test_vmovn_high_s32(<4 x i16> %a, <4 x i32> %b) #0 {
-; CHECK: xtn2 v{{[0-9]+}}.8h, v{{[0-9]+}}.4s
-  %vmovn.i.i = trunc <4 x i32> %b to <4 x i16>
-  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %vmovn.i.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  ret <8 x i16> %shuffle.i
-}
-
-define <4 x i32> @test_vmovn_high_s64(<2 x i32> %a, <2 x i64> %b) #0 {
-; CHECK: xtn2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d
-  %vmovn.i.i = trunc <2 x i64> %b to <2 x i32>
-  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %vmovn.i.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  ret <4 x i32> %shuffle.i
-}
-
-define <8 x i8> @test_vqmovun_s16(<8 x i16> %a) #0 {
-; CHECK: sqxtun v{{[0-9]+}}.8b, v{{[0-9]+}}.8h
-  %vqdmull1.i = tail call <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16> %a) #4
-  ret <8 x i8> %vqdmull1.i
-}
-
-define <4 x i16> @test_vqmovun_s32(<4 x i32> %a) #0 {
-; CHECK: sqxtun v{{[0-9]+}}.4h, v{{[0-9]+}}.4s
-  %vqdmull1.i = tail call <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32> %a) #4
-  ret <4 x i16> %vqdmull1.i
-}
-
-define <2 x i32> @test_vqmovun_s64(<2 x i64> %a) #0 {
-; CHECK: sqxtun v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
-  %vqdmull1.i = tail call <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64> %a) #4
-  ret <2 x i32> %vqdmull1.i
-}
-
-define <16 x i8> @test_vqmovun_high_s16(<8 x i8> %a, <8 x i16> %b) #0 {
-; CHECK: sqxtun2 v{{[0-9]+}}.16b, v{{[0-9]+}}.8h
-  %vqdmull1.i.i = tail call <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16> %b) #4
-  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %vqdmull1.i.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  ret <16 x i8> %shuffle.i
-}
-
-define <8 x i16> @test_vqmovun_high_s32(<4 x i16> %a, <4 x i32> %b) #0 {
-; CHECK: sqxtun2 v{{[0-9]+}}.8h, v{{[0-9]+}}.4s
-  %vqdmull1.i.i = tail call <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32> %b) #4
-  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %vqdmull1.i.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  ret <8 x i16> %shuffle.i
-}
-
-define <4 x i32> @test_vqmovun_high_s64(<2 x i32> %a, <2 x i64> %b) #0 {
-; CHECK: sqxtun2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d
-  %vqdmull1.i.i = tail call <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64> %b) #4
-  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %vqdmull1.i.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  ret <4 x i32> %shuffle.i
-}
-
-define <8 x i8> @test_vqmovn_s16(<8 x i16> %a) #0 {
-; CHECK: sqxtn v{{[0-9]+}}.8b, v{{[0-9]+}}.8h
-  %vqmovn1.i = tail call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> %a) #4
-  ret <8 x i8> %vqmovn1.i
-}
-
-define <4 x i16> @test_vqmovn_s32(<4 x i32> %a) #0 {
-; CHECK: sqxtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s
-  %vqmovn1.i = tail call <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32> %a) #4
-  ret <4 x i16> %vqmovn1.i
-}
-
-define <2 x i32> @test_vqmovn_s64(<2 x i64> %a) #0 {
-; CHECK: sqxtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
-  %vqmovn1.i = tail call <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64> %a) #4
-  ret <2 x i32> %vqmovn1.i
-}
-
-define <16 x i8> @test_vqmovn_high_s16(<8 x i8> %a, <8 x i16> %b) #0 {
-; CHECK: sqxtn2 v{{[0-9]+}}.16b, v{{[0-9]+}}.8h
-  %vqmovn1.i.i = tail call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> %b) #4
-  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %vqmovn1.i.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  ret <16 x i8> %shuffle.i
-}
-
-define <8 x i16> @test_vqmovn_high_s32(<4 x i16> %a, <4 x i32> %b) #0 {
-; CHECK: test_vqmovn_high_s32
-  %vqmovn1.i.i = tail call <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32> %b) #4
-  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %vqmovn1.i.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  ret <8 x i16> %shuffle.i
-}
-
-define <4 x i32> @test_vqmovn_high_s64(<2 x i32> %a, <2 x i64> %b) #0 {
-; CHECK: test_vqmovn_high_s64
-  %vqmovn1.i.i = tail call <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64> %b) #4
-  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %vqmovn1.i.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  ret <4 x i32> %shuffle.i
-}
-
-define <8 x i8> @test_vqmovn_u16(<8 x i16> %a) #0 {
-; CHECK: uqxtn v{{[0-9]+}}.8b, v{{[0-9]+}}.8h
-  %vqmovn1.i = tail call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %a) #4
-  ret <8 x i8> %vqmovn1.i
-}
-
-define <4 x i16> @test_vqmovn_u32(<4 x i32> %a) #0 {
-; CHECK: uqxtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s
-  %vqmovn1.i = tail call <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32> %a) #4
-  ret <4 x i16> %vqmovn1.i
-}
-
-define <2 x i32> @test_vqmovn_u64(<2 x i64> %a) #0 {
-; CHECK: uqxtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
-  %vqmovn1.i = tail call <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64> %a) #4
-  ret <2 x i32> %vqmovn1.i
-}
-
-define <16 x i8> @test_vqmovn_high_u16(<8 x i8> %a, <8 x i16> %b) #0 {
-; CHECK: uqxtn2 v{{[0-9]+}}.16b, v{{[0-9]+}}.8h
-  %vqmovn1.i.i = tail call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %b) #4
-  %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %vqmovn1.i.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  ret <16 x i8> %shuffle.i
-}
-
-define <8 x i16> @test_vqmovn_high_u32(<4 x i16> %a, <4 x i32> %b) #0 {
-; CHECK: uqxtn2 v{{[0-9]+}}.8h, v{{[0-9]+}}.4s
-  %vqmovn1.i.i = tail call <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32> %b) #4
-  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %vqmovn1.i.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  ret <8 x i16> %shuffle.i
-}
-
-define <4 x i32> @test_vqmovn_high_u64(<2 x i32> %a, <2 x i64> %b) #0 {
-; CHECK: uqxtn2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d
-  %vqmovn1.i.i = tail call <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64> %b) #4
-  %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %vqmovn1.i.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  ret <4 x i32> %shuffle.i
-}
-
-define <8 x i16> @test_vshll_n_s8(<8 x i8> %a) #0 {
-; CHECK: shll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #8
-  %1 = sext <8 x i8> %a to <8 x i16>
-  %vshll_n = shl <8 x i16> %1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
-  ret <8 x i16> %vshll_n
-}
-
-define <4 x i32> @test_vshll_n_s16(<4 x i16> %a) #0 {
-; CHECK: shll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #16
-  %1 = sext <4 x i16> %a to <4 x i32>
-  %vshll_n = shl <4 x i32> %1, <i32 16, i32 16, i32 16, i32 16>
-  ret <4 x i32> %vshll_n
-}
-
-define <2 x i64> @test_vshll_n_s32(<2 x i32> %a) #0 {
-; CHECK: shll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #32
-  %1 = sext <2 x i32> %a to <2 x i64>
-  %vshll_n = shl <2 x i64> %1, <i64 32, i64 32>
-  ret <2 x i64> %vshll_n
-}
-
-define <8 x i16> @test_vshll_n_u8(<8 x i8> %a) #0 {
-; CHECK: shll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #8
-  %1 = zext <8 x i8> %a to <8 x i16>
-  %vshll_n = shl <8 x i16> %1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
-  ret <8 x i16> %vshll_n
-}
-
-define <4 x i32> @test_vshll_n_u16(<4 x i16> %a) #0 {
-; CHECK: shll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #16
-  %1 = zext <4 x i16> %a to <4 x i32>
-  %vshll_n = shl <4 x i32> %1, <i32 16, i32 16, i32 16, i32 16>
-  ret <4 x i32> %vshll_n
-}
-
-define <2 x i64> @test_vshll_n_u32(<2 x i32> %a) #0 {
-; CHECK: shll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #32
-  %1 = zext <2 x i32> %a to <2 x i64>
-  %vshll_n = shl <2 x i64> %1, <i64 32, i64 32>
-  ret <2 x i64> %vshll_n
-}
-
-define <8 x i16> @test_vshll_high_n_s8(<16 x i8> %a) #0 {
-; CHECK: shll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #8
-  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %1 = sext <8 x i8> %shuffle.i to <8 x i16>
-  %vshll_n = shl <8 x i16> %1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
-  ret <8 x i16> %vshll_n
-}
-
-define <4 x i32> @test_vshll_high_n_s16(<8 x i16> %a) #0 {
-; CHECK: shll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #16
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %1 = sext <4 x i16> %shuffle.i to <4 x i32>
-  %vshll_n = shl <4 x i32> %1, <i32 16, i32 16, i32 16, i32 16>
-  ret <4 x i32> %vshll_n
-}
-
-define <2 x i64> @test_vshll_high_n_s32(<4 x i32> %a) #0 {
-; CHECK: shll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #32
-  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %1 = sext <2 x i32> %shuffle.i to <2 x i64>
-  %vshll_n = shl <2 x i64> %1, <i64 32, i64 32>
-  ret <2 x i64> %vshll_n
-}
-
-define <8 x i16> @test_vshll_high_n_u8(<16 x i8> %a) #0 {
-; CHECK: shll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #8
-  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %1 = zext <8 x i8> %shuffle.i to <8 x i16>
-  %vshll_n = shl <8 x i16> %1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
-  ret <8 x i16> %vshll_n
-}
-
-define <4 x i32> @test_vshll_high_n_u16(<8 x i16> %a) #0 {
-; CHECK: shll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #16
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %1 = zext <4 x i16> %shuffle.i to <4 x i32>
-  %vshll_n = shl <4 x i32> %1, <i32 16, i32 16, i32 16, i32 16>
-  ret <4 x i32> %vshll_n
-}
-
-define <2 x i64> @test_vshll_high_n_u32(<4 x i32> %a) #0 {
-; CHECK: shll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #32
-  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %1 = zext <2 x i32> %shuffle.i to <2 x i64>
-  %vshll_n = shl <2 x i64> %1, <i64 32, i64 32>
-  ret <2 x i64> %vshll_n
-}
-
-define <4 x i16> @test_vcvt_f16_f32(<4 x float> %a) #0 {
-; CHECK: fcvtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s
-  %vcvt1.i = tail call <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float> %a) #4
-  ret <4 x i16> %vcvt1.i
-}
-
-define <8 x i16> @test_vcvt_high_f16_f32(<4 x i16> %a, <4 x float> %b) #0 {
-; CHECK: fcvtn2 v{{[0-9]+}}.8h, v{{[0-9]+}}.4s
-  %vcvt1.i.i = tail call <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float> %b) #4
-  %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %vcvt1.i.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  ret <8 x i16> %shuffle.i
-}
-
-define <4 x float> @test_vcvt_f32_f16(<4 x i16> %a) #0 {
-; CHECK: fcvtl v{{[0-9]+}}.4s, v{{[0-9]+}}.4h
-  %vcvt1.i = tail call <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16> %a) #4
-  ret <4 x float> %vcvt1.i
-}
-
-define <4 x float> @test_vcvt_high_f32_f16(<8 x i16> %a) #0 {
-; CHECK: fcvtl2 v{{[0-9]+}}.4s, v{{[0-9]+}}.8h
-  %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vcvt1.i.i = tail call <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16> %shuffle.i.i) #4
-  ret <4 x float> %vcvt1.i.i
-}
-
-define <2 x float> @test_vcvt_f32_f64(<2 x double> %a) #0 {
-; CHECK: fcvtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
-  %vcvt.i = fptrunc <2 x double> %a to <2 x float>
-  ret <2 x float> %vcvt.i
-}
-
-define <4 x float> @test_vcvt_high_f32_f64(<2 x float> %a, <2 x double> %b) #0 {
-; CHECK: fcvtn2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d
-  %vcvt.i.i = fptrunc <2 x double> %b to <2 x float>
-  %shuffle.i = shufflevector <2 x float> %a, <2 x float> %vcvt.i.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  ret <4 x float> %shuffle.i
-}
-
-define <2 x float> @test_vcvtx_f32_f64(<2 x double> %a) #0 {
-; CHECK: fcvtxn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
-  %vcvtx_f32_f641.i = call <2 x float> @llvm.aarch64.neon.vcvtxn.v2f32.v2f64(<2 x double> %a) #4
-  ret <2 x float> %vcvtx_f32_f641.i
-}
-
-define <4 x float> @test_vcvtx_high_f32_f64(<2 x float> %a, <2 x double> %b) #0 {
-; CHECK: fcvtxn2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d
-  %vcvtx_f32_f641.i.i = tail call <2 x float> @llvm.aarch64.neon.vcvtxn.v2f32.v2f64(<2 x double> %b) #4
-  %shuffle.i = shufflevector <2 x float> %a, <2 x float> %vcvtx_f32_f641.i.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  ret <4 x float> %shuffle.i
-}
-
-define <2 x double> @test_vcvt_f64_f32(<2 x float> %a) #0 {
-; CHECK: fcvtl v{{[0-9]+}}.2d, v{{[0-9]+}}.2s
-  %vcvt.i = fpext <2 x float> %a to <2 x double>
-  ret <2 x double> %vcvt.i
-}
-
-define <2 x double> @test_vcvt_high_f64_f32(<4 x float> %a) #0 {
-; CHECK: fcvtl2 v{{[0-9]+}}.2d, v{{[0-9]+}}.4s
-  %shuffle.i.i = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 2, i32 3>
-  %vcvt.i.i = fpext <2 x float> %shuffle.i.i to <2 x double>
-  ret <2 x double> %vcvt.i.i
-}
-
-define <2 x float> @test_vrndn_f32(<2 x float> %a) #0 {
-; CHECK: frintn v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vrndn1.i = tail call <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float> %a) #4
-  ret <2 x float> %vrndn1.i
-}
-
-define <4 x float> @test_vrndnq_f32(<4 x float> %a) #0 {
-; CHECK: frintn v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vrndn1.i = tail call <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float> %a) #4
-  ret <4 x float> %vrndn1.i
-}
-
-define <2 x double> @test_vrndnq_f64(<2 x double> %a) #0 {
-; CHECK: frintn v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vrndn1.i = tail call <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double> %a) #4
-  ret <2 x double> %vrndn1.i
-}
-
-define <2 x float> @test_vrnda_f32(<2 x float> %a) #0 {
-; CHECK: frinta v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vrnda1.i = tail call <2 x float> @llvm.round.v2f32(<2 x float> %a) #4
-  ret <2 x float> %vrnda1.i
-}
-
-define <4 x float> @test_vrndaq_f32(<4 x float> %a) #0 {
-; CHECK: frinta v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-   %vrnda1.i = tail call <4 x float> @llvm.round.v4f32(<4 x float> %a) #4
-  ret <4 x float> %vrnda1.i
-}
-
-define <2 x double> @test_vrndaq_f64(<2 x double> %a) #0 {
-; CHECK: frinta v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vrnda1.i = tail call <2 x double> @llvm.round.v2f64(<2 x double> %a) #4
-  ret <2 x double> %vrnda1.i
-}
-
-define <2 x float> @test_vrndp_f32(<2 x float> %a) #0 {
-; CHECK: frintp v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vrndp1.i = tail call <2 x float> @llvm.ceil.v2f32(<2 x float> %a) #4
-  ret <2 x float> %vrndp1.i
-}
-
-define <4 x float> @test_vrndpq_f32(<4 x float> %a) #0 {
-; CHECK: frintp v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
- %vrndp1.i = tail call <4 x float> @llvm.ceil.v4f32(<4 x float> %a) #4
-  ret <4 x float> %vrndp1.i
-}
-
-define <2 x double> @test_vrndpq_f64(<2 x double> %a) #0 {
-; CHECK: frintp v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vrndp1.i = tail call <2 x double> @llvm.ceil.v2f64(<2 x double> %a) #4
-  ret <2 x double> %vrndp1.i
-}
-
-define <2 x float> @test_vrndm_f32(<2 x float> %a) #0 {
-; CHECK: frintm v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vrndm1.i = tail call <2 x float> @llvm.floor.v2f32(<2 x float> %a) #4
-  ret <2 x float> %vrndm1.i
-}
-
-define <4 x float> @test_vrndmq_f32(<4 x float> %a) #0 {
-; CHECK: frintm v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vrndm1.i = tail call <4 x float> @llvm.floor.v4f32(<4 x float> %a) #4
-  ret <4 x float> %vrndm1.i
-}
-
-define <2 x double> @test_vrndmq_f64(<2 x double> %a) #0 {
-; CHECK: frintm v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-   %vrndm1.i = tail call <2 x double> @llvm.floor.v2f64(<2 x double> %a) #4
-  ret <2 x double> %vrndm1.i
-}
-
-define <2 x float> @test_vrndx_f32(<2 x float> %a) #0 {
-; CHECK: frintx v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vrndx1.i = tail call <2 x float> @llvm.rint.v2f32(<2 x float> %a) #4
-  ret <2 x float> %vrndx1.i
-}
-
-define <4 x float> @test_vrndxq_f32(<4 x float> %a) #0 {
-; CHECK: frintx v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vrndx1.i = tail call <4 x float> @llvm.rint.v4f32(<4 x float> %a) #4
-  ret <4 x float> %vrndx1.i
-}
-
-define <2 x double> @test_vrndxq_f64(<2 x double> %a) #0 {
-; CHECK: frintx v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vrndx1.i = tail call <2 x double> @llvm.rint.v2f64(<2 x double> %a) #4
-  ret <2 x double> %vrndx1.i
-}
-
-define <2 x float> @test_vrnd_f32(<2 x float> %a) #0 {
-; CHECK: frintz v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-   %vrnd1.i = tail call <2 x float> @llvm.trunc.v2f32(<2 x float> %a) #4
-  ret <2 x float> %vrnd1.i
-}
-
-define <4 x float> @test_vrndq_f32(<4 x float> %a) #0 {
-; CHECK: frintz v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vrnd1.i = tail call <4 x float> @llvm.trunc.v4f32(<4 x float> %a) #4
-  ret <4 x float> %vrnd1.i
-}
-
-define <2 x double> @test_vrndq_f64(<2 x double> %a) #0 {
-; CHECK: frintz v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vrnd1.i = tail call <2 x double> @llvm.trunc.v2f64(<2 x double> %a) #4
-  ret <2 x double> %vrnd1.i
-}
-
-define <2 x float> @test_vrndi_f32(<2 x float> %a) #0 {
-; CHECK: frinti v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vrndi1.i = tail call <2 x float> @llvm.nearbyint.v2f32(<2 x float> %a) #4
-  ret <2 x float> %vrndi1.i
-}
-
-define <4 x float> @test_vrndiq_f32(<4 x float> %a) #0 {
-; CHECK: frinti v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vrndi1.i = tail call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %a) #4
-  ret <4 x float> %vrndi1.i
-}
-
-define <2 x double> @test_vrndiq_f64(<2 x double> %a) #0 {
-; CHECK: frinti v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vrndi1.i = tail call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a) #4
-  ret <2 x double> %vrndi1.i
-}
-
-define <2 x i32> @test_vcvt_s32_f32(<2 x float> %a) #0 {
-; CHECK: fcvtzs v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vcvt.i = fptosi <2 x float> %a to <2 x i32>
-  ret <2 x i32> %vcvt.i
-}
-
-define <4 x i32> @test_vcvtq_s32_f32(<4 x float> %a) #0 {
-; CHECK: fcvtzs v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vcvt.i = fptosi <4 x float> %a to <4 x i32>
-  ret <4 x i32> %vcvt.i
-}
-
-define <2 x i64> @test_vcvtq_s64_f64(<2 x double> %a) #0 {
-; CHECK: fcvtzs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vcvt.i = fptosi <2 x double> %a to <2 x i64>
-  ret <2 x i64> %vcvt.i
-}
-
-define <2 x i32> @test_vcvt_u32_f32(<2 x float> %a) #0 {
-; CHECK: fcvtzu v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vcvt.i = fptoui <2 x float> %a to <2 x i32>
-  ret <2 x i32> %vcvt.i
-}
-
-define <4 x i32> @test_vcvtq_u32_f32(<4 x float> %a) #0 {
-; CHECK: fcvtzu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vcvt.i = fptoui <4 x float> %a to <4 x i32>
-  ret <4 x i32> %vcvt.i
-}
-
-define <2 x i64> @test_vcvtq_u64_f64(<2 x double> %a) #0 {
-; CHECK: fcvtzu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vcvt.i = fptoui <2 x double> %a to <2 x i64>
-  ret <2 x i64> %vcvt.i
-}
-
-define <2 x i64> @test_vcvt_s64_f32(<2 x float> %a) #0 {
-; CHECK: fcvtl v{{[0-9]+}}.2d, v{{[0-9]+}}.2s
-; CHECK: fcvtzs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vcvt.i = fptosi <2 x float> %a to <2 x i64>
-  ret <2 x i64> %vcvt.i
-}
-
-define <2 x i64> @test_vcvt_u64_f32(<2 x float> %a) #0 {
-; CHECK: fcvtl v{{[0-9]+}}.2d, v{{[0-9]+}}.2s
-; CHECK: fcvtzu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vcvt.i = fptoui <2 x float> %a to <2 x i64>
-  ret <2 x i64> %vcvt.i
-}
-
-define <4 x i16> @test_vcvt_s16_f32(<4 x float> %a) #0 {
-; CHECK: fcvtzs v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-; CHECK: xtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s
-  %vcvt.i = fptosi <4 x float> %a to <4 x i16>
-  ret <4 x i16> %vcvt.i
-}
-
-define <4 x i16> @test_vcvt_u16_f32(<4 x float> %a) #0 {
-; CHECK: fcvtzu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-; CHECK: xtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s
-  %vcvt.i = fptoui <4 x float> %a to <4 x i16>
-  ret <4 x i16> %vcvt.i
-}
-
-define <2 x i32> @test_vcvt_s32_f64(<2 x double> %a) #0 {
-; CHECK: fcvtzs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-; CHECK: xtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
-  %vcvt.i = fptosi <2 x double> %a to <2 x i32>
-  ret <2 x i32> %vcvt.i
-}
-
-define <2 x i32> @test_vcvt_u32_f64(<2 x double> %a) #0 {
-; CHECK: fcvtzu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-; CHECK: xtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
-  %vcvt.i = fptoui <2 x double> %a to <2 x i32>
-  ret <2 x i32> %vcvt.i
-}
-
-define <1 x i8> @test_vcvt_s8_f64(<1 x double> %a) #0 {
-; CHECK: fcvtzs w{{[0-9]+}}, d{{[0-9]+}}
-; CHECK: ins v{{[0-9]+}}.b[0], w{{[0-9]+}}
-  %vcvt.i = fptosi <1 x double> %a to <1 x i8>
-  ret <1 x i8> %vcvt.i
-}
-
-define <1 x i8> @test_vcvt_u8_f64(<1 x double> %a) #0 {
-; CHECK: fcvtzs w{{[0-9]+}}, d{{[0-9]+}}
-; CHECK: ins v{{[0-9]+}}.b[0], w{{[0-9]+}}
-  %vcvt.i = fptoui <1 x double> %a to <1 x i8>
-  ret <1 x i8> %vcvt.i
-}
-
-define <1 x i16> @test_vcvt_s16_f64(<1 x double> %a) #0 {
-; CHECK: fcvtzs w{{[0-9]+}}, d{{[0-9]+}}
-; CHECK: ins v{{[0-9]+}}.h[0], w{{[0-9]+}}
-  %vcvt.i = fptosi <1 x double> %a to <1 x i16>
-  ret <1 x i16> %vcvt.i
-}
-
-define <1 x i16> @test_vcvt_u16_f64(<1 x double> %a) #0 {
-; CHECK: fcvtzs w{{[0-9]+}}, d{{[0-9]+}}
-; CHECK: ins v{{[0-9]+}}.h[0], w{{[0-9]+}}
-  %vcvt.i = fptoui <1 x double> %a to <1 x i16>
-  ret <1 x i16> %vcvt.i
-}
-
-define <1 x i32> @test_vcvt_s32_f64_v1(<1 x double> %a) #0 {
-; CHECK: fcvtzs w{{[0-9]+}}, d{{[0-9]+}}
-; CHECK: fmov s{{[0-9]+}}, w{{[0-9]+}}
-  %vcvt.i = fptosi <1 x double> %a to <1 x i32>
-  ret <1 x i32> %vcvt.i
-}
-
-define <1 x i32> @test_vcvt_u32_f64_v1(<1 x double> %a) #0 {
-; CHECK: fcvtzu w{{[0-9]+}}, d{{[0-9]+}}
-; CHECK: fmov s{{[0-9]+}}, w{{[0-9]+}}
-  %vcvt.i = fptoui <1 x double> %a to <1 x i32>
-  ret <1 x i32> %vcvt.i
-}
-
-define <2 x i32> @test_vcvtn_s32_f32(<2 x float> %a) {
-; CHECK-LABEL: test_vcvtn_s32_f32
-; CHECK: fcvtns v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vcvtns_f321.i = call <2 x i32> @llvm.arm.neon.vcvtns.v2i32.v2f32(<2 x float> %a)
-  ret <2 x i32> %vcvtns_f321.i
-}
-
-define <4 x i32> @test_vcvtnq_s32_f32(<4 x float> %a) {
-; CHECK-LABEL: test_vcvtnq_s32_f32
-; CHECK: fcvtns v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vcvtns_f321.i = call <4 x i32> @llvm.arm.neon.vcvtns.v4i32.v4f32(<4 x float> %a)
-  ret <4 x i32> %vcvtns_f321.i
-}
-
-define <2 x i64> @test_vcvtnq_s64_f64(<2 x double> %a) {
-; CHECK-LABEL: test_vcvtnq_s64_f64
-; CHECK: fcvtns v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vcvtns_f641.i = call <2 x i64> @llvm.arm.neon.vcvtns.v2i64.v2f64(<2 x double> %a)
-  ret <2 x i64> %vcvtns_f641.i
-}
-
-define <2 x i32> @test_vcvtn_u32_f32(<2 x float> %a) {
-; CHECK-LABEL: test_vcvtn_u32_f32
-; CHECK: fcvtnu v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vcvtnu_f321.i = call <2 x i32> @llvm.arm.neon.vcvtnu.v2i32.v2f32(<2 x float> %a)
-  ret <2 x i32> %vcvtnu_f321.i
-}
-
-define <4 x i32> @test_vcvtnq_u32_f32(<4 x float> %a) {
-; CHECK-LABEL: test_vcvtnq_u32_f32
-; CHECK: fcvtnu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vcvtnu_f321.i = call <4 x i32> @llvm.arm.neon.vcvtnu.v4i32.v4f32(<4 x float> %a)
-  ret <4 x i32> %vcvtnu_f321.i
-}
-
-define <2 x i64> @test_vcvtnq_u64_f64(<2 x double> %a) {
-; CHECK-LABEL: test_vcvtnq_u64_f64
-; CHECK: fcvtnu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vcvtnu_f641.i = call <2 x i64> @llvm.arm.neon.vcvtnu.v2i64.v2f64(<2 x double> %a)
-  ret <2 x i64> %vcvtnu_f641.i
-}
-
-define <2 x i32> @test_vcvtp_s32_f32(<2 x float> %a) {
-; CHECK-LABEL: test_vcvtp_s32_f32
-; CHECK: fcvtps v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vcvtps_f321.i = call <2 x i32> @llvm.arm.neon.vcvtps.v2i32.v2f32(<2 x float> %a)
-  ret <2 x i32> %vcvtps_f321.i
-}
-
-define <4 x i32> @test_vcvtpq_s32_f32(<4 x float> %a) {
-; CHECK-LABEL: test_vcvtpq_s32_f32
-; CHECK: fcvtps v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vcvtps_f321.i = call <4 x i32> @llvm.arm.neon.vcvtps.v4i32.v4f32(<4 x float> %a)
-  ret <4 x i32> %vcvtps_f321.i
-}
-
-define <2 x i64> @test_vcvtpq_s64_f64(<2 x double> %a) {
-; CHECK-LABEL: test_vcvtpq_s64_f64
-; CHECK: fcvtps v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vcvtps_f641.i = call <2 x i64> @llvm.arm.neon.vcvtps.v2i64.v2f64(<2 x double> %a)
-  ret <2 x i64> %vcvtps_f641.i
-}
-
-define <2 x i32> @test_vcvtp_u32_f32(<2 x float> %a) {
-; CHECK-LABEL: test_vcvtp_u32_f32
-; CHECK: fcvtpu v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vcvtpu_f321.i = call <2 x i32> @llvm.arm.neon.vcvtpu.v2i32.v2f32(<2 x float> %a)
-  ret <2 x i32> %vcvtpu_f321.i
-}
-
-define <4 x i32> @test_vcvtpq_u32_f32(<4 x float> %a) {
-; CHECK-LABEL: test_vcvtpq_u32_f32
-; CHECK: fcvtpu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vcvtpu_f321.i = call <4 x i32> @llvm.arm.neon.vcvtpu.v4i32.v4f32(<4 x float> %a)
-  ret <4 x i32> %vcvtpu_f321.i
-}
-
-define <2 x i64> @test_vcvtpq_u64_f64(<2 x double> %a) {
-; CHECK-LABEL: test_vcvtpq_u64_f64
-; CHECK: fcvtpu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vcvtpu_f641.i = call <2 x i64> @llvm.arm.neon.vcvtpu.v2i64.v2f64(<2 x double> %a)
-  ret <2 x i64> %vcvtpu_f641.i
-}
-
-define <2 x i32> @test_vcvtm_s32_f32(<2 x float> %a) {
-; CHECK-LABEL: test_vcvtm_s32_f32
-; CHECK: fcvtms v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vcvtms_f321.i = call <2 x i32> @llvm.arm.neon.vcvtms.v2i32.v2f32(<2 x float> %a)
-  ret <2 x i32> %vcvtms_f321.i
-}
-
-define <4 x i32> @test_vcvtmq_s32_f32(<4 x float> %a) {
-; CHECK-LABEL: test_vcvtmq_s32_f32
-; CHECK: fcvtms v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vcvtms_f321.i = call <4 x i32> @llvm.arm.neon.vcvtms.v4i32.v4f32(<4 x float> %a)
-  ret <4 x i32> %vcvtms_f321.i
-}
-
-define <2 x i64> @test_vcvtmq_s64_f64(<2 x double> %a) {
-; CHECK-LABEL: test_vcvtmq_s64_f64
-; CHECK: fcvtms v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vcvtms_f641.i = call <2 x i64> @llvm.arm.neon.vcvtms.v2i64.v2f64(<2 x double> %a)
-  ret <2 x i64> %vcvtms_f641.i
-}
-
-define <2 x i32> @test_vcvtm_u32_f32(<2 x float> %a) {
-; CHECK-LABEL: test_vcvtm_u32_f32
-; CHECK: fcvtmu v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vcvtmu_f321.i = call <2 x i32> @llvm.arm.neon.vcvtmu.v2i32.v2f32(<2 x float> %a)
-  ret <2 x i32> %vcvtmu_f321.i
-}
-
-define <4 x i32> @test_vcvtmq_u32_f32(<4 x float> %a) {
-; CHECK-LABEL: test_vcvtmq_u32_f32
-; CHECK: fcvtmu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vcvtmu_f321.i = call <4 x i32> @llvm.arm.neon.vcvtmu.v4i32.v4f32(<4 x float> %a)
-  ret <4 x i32> %vcvtmu_f321.i
-}
-
-define <2 x i64> @test_vcvtmq_u64_f64(<2 x double> %a) {
-; CHECK-LABEL: test_vcvtmq_u64_f64
-; CHECK: fcvtmu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vcvtmu_f641.i = call <2 x i64> @llvm.arm.neon.vcvtmu.v2i64.v2f64(<2 x double> %a)
-  ret <2 x i64> %vcvtmu_f641.i
-}
-
-define <2 x i32> @test_vcvta_s32_f32(<2 x float> %a) {
-; CHECK-LABEL: test_vcvta_s32_f32
-; CHECK: fcvtas v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vcvtas_f321.i = call <2 x i32> @llvm.arm.neon.vcvtas.v2i32.v2f32(<2 x float> %a)
-  ret <2 x i32> %vcvtas_f321.i
-}
-
-define <4 x i32> @test_vcvtaq_s32_f32(<4 x float> %a) {
-; CHECK-LABEL: test_vcvtaq_s32_f32
-; CHECK: fcvtas v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vcvtas_f321.i = call <4 x i32> @llvm.arm.neon.vcvtas.v4i32.v4f32(<4 x float> %a)
-  ret <4 x i32> %vcvtas_f321.i
-}
-
-define <2 x i64> @test_vcvtaq_s64_f64(<2 x double> %a) {
-; CHECK-LABEL: test_vcvtaq_s64_f64
-; CHECK: fcvtas v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vcvtas_f641.i = call <2 x i64> @llvm.arm.neon.vcvtas.v2i64.v2f64(<2 x double> %a)
-  ret <2 x i64> %vcvtas_f641.i
-}
-
-define <2 x i32> @test_vcvta_u32_f32(<2 x float> %a) {
-; CHECK-LABEL: test_vcvta_u32_f32
-; CHECK: fcvtau v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vcvtau_f321.i = call <2 x i32> @llvm.arm.neon.vcvtau.v2i32.v2f32(<2 x float> %a)
-  ret <2 x i32> %vcvtau_f321.i
-}
-
-define <4 x i32> @test_vcvtaq_u32_f32(<4 x float> %a) {
-; CHECK-LABEL: test_vcvtaq_u32_f32
-; CHECK: fcvtau v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vcvtau_f321.i = call <4 x i32> @llvm.arm.neon.vcvtau.v4i32.v4f32(<4 x float> %a)
-  ret <4 x i32> %vcvtau_f321.i
-}
-
-define <2 x i64> @test_vcvtaq_u64_f64(<2 x double> %a) {
-; CHECK-LABEL: test_vcvtaq_u64_f64
-; CHECK: fcvtau v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vcvtau_f641.i = call <2 x i64> @llvm.arm.neon.vcvtau.v2i64.v2f64(<2 x double> %a)
-  ret <2 x i64> %vcvtau_f641.i
-}
-
-define <2 x float> @test_vrsqrte_f32(<2 x float> %a) #0 {
-; CHECK: frsqrte v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vrsqrte1.i = tail call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %a) #4
-  ret <2 x float> %vrsqrte1.i
-}
-
-define <4 x float> @test_vrsqrteq_f32(<4 x float> %a) #0 {
-; CHECK: frsqrte v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vrsqrte1.i = tail call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %a) #4
-  ret <4 x float> %vrsqrte1.i
-}
-
-define <2 x double> @test_vrsqrteq_f64(<2 x double> %a) #0 {
-; CHECK: frsqrte v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vrsqrte1.i = tail call <2 x double> @llvm.arm.neon.vrsqrte.v2f64(<2 x double> %a) #4
-  ret <2 x double> %vrsqrte1.i
-}
-
-define <2 x float> @test_vrecpe_f32(<2 x float> %a) #0 {
-; CHECK: frecpe v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vrecpe1.i = tail call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %a) #4
-  ret <2 x float> %vrecpe1.i
-}
-
-define <4 x float> @test_vrecpeq_f32(<4 x float> %a) #0 {
-; CHECK: frecpe v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vrecpe1.i = tail call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %a) #4
-  ret <4 x float> %vrecpe1.i
-}
-
-define <2 x double> @test_vrecpeq_f64(<2 x double> %a) #0 {
-; CHECK: frecpe v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vrecpe1.i = tail call <2 x double> @llvm.arm.neon.vrecpe.v2f64(<2 x double> %a) #4
-  ret <2 x double> %vrecpe1.i
-}
-
-define <2 x i32> @test_vrecpe_u32(<2 x i32> %a) #0 {
-; CHECK: urecpe v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vrecpe1.i = tail call <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32> %a) #4
-  ret <2 x i32> %vrecpe1.i
-}
-
-define <4 x i32> @test_vrecpeq_u32(<4 x i32> %a) #0 {
-; CHECK: urecpe v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vrecpe1.i = tail call <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32> %a) #4
-  ret <4 x i32> %vrecpe1.i
-}
-
-define <2 x float> @test_vsqrt_f32(<2 x float> %a) #0 {
-; CHECK: fsqrt v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vsqrt1.i = tail call <2 x float> @llvm.sqrt.v2f32(<2 x float> %a) #4
-  ret <2 x float> %vsqrt1.i
-}
-
-define <4 x float> @test_vsqrtq_f32(<4 x float> %a) #0 {
-; CHECK: fsqrt v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vsqrt1.i = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) #4
-  ret <4 x float> %vsqrt1.i
-}
-
-define <2 x double> @test_vsqrtq_f64(<2 x double> %a) #0 {
-; CHECK: fsqrt v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vsqrt1.i = tail call <2 x double> @llvm.sqrt.v2f64(<2 x double> %a) #4
-  ret <2 x double> %vsqrt1.i
-}
-
-define <2 x float> @test_vcvt_f32_s32(<2 x i32> %a) #0 {
-; CHECK: scvtf v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vcvt.i = sitofp <2 x i32> %a to <2 x float>
-  ret <2 x float> %vcvt.i
-}
-
-define <2 x float> @test_vcvt_f32_u32(<2 x i32> %a) #0 {
-; CHECK: ucvtf v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vcvt.i = uitofp <2 x i32> %a to <2 x float>
-  ret <2 x float> %vcvt.i
-}
-
-define <4 x float> @test_vcvtq_f32_s32(<4 x i32> %a) #0 {
-; CHECK: scvtf v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vcvt.i = sitofp <4 x i32> %a to <4 x float>
-  ret <4 x float> %vcvt.i
-}
-
-define <4 x float> @test_vcvtq_f32_u32(<4 x i32> %a) #0 {
-; CHECK: ucvtf v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vcvt.i = uitofp <4 x i32> %a to <4 x float>
-  ret <4 x float> %vcvt.i
-}
-
-define <2 x double> @test_vcvtq_f64_s64(<2 x i64> %a) #0 {
-; CHECK: scvtf v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vcvt.i = sitofp <2 x i64> %a to <2 x double>
-  ret <2 x double> %vcvt.i
-}
-
-define <2 x double> @test_vcvtq_f64_u64(<2 x i64> %a) #0 {
-; CHECK: ucvtf v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vcvt.i = uitofp <2 x i64> %a to <2 x double>
-  ret <2 x double> %vcvt.i
-}
-
-define <2 x float> @test_vcvt_f32_s64(<2 x i64> %a) #0 {
-; CHECK: scvtf v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-; CHECK: fcvtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
-  %vcvt.i = sitofp <2 x i64> %a to <2 x float>
-  ret <2 x float> %vcvt.i
-}
-
-define <2 x float> @test_vcvt_f32_u64(<2 x i64> %a) #0 {
-; CHECK: ucvtf v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-; CHECK: fcvtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
-  %vcvt.i = uitofp <2 x i64> %a to <2 x float>
-  ret <2 x float> %vcvt.i
-}
-
-define <4 x float> @test_vcvt_f32_s16(<4 x i16> %a) #0 {
-; CHECK: sshll v{{[0-9]+}}.4s, v{{[0-9]+}}.4h, #0
-; CHECK: scvtf v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vcvt.i = sitofp <4 x i16> %a to <4 x float>
-  ret <4 x float> %vcvt.i
-}
-
-define <4 x float> @test_vcvt_f32_u16(<4 x i16> %a) #0 {
-; CHECK: ushll v{{[0-9]+}}.4s, v{{[0-9]+}}.4h, #0
-; CHECK: ucvtf v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vcvt.i = uitofp <4 x i16> %a to <4 x float>
-  ret <4 x float> %vcvt.i
-}
-
-define <2 x double> @test_vcvt_f64_s32(<2 x i32> %a) #0 {
-; CHECK: sshll v{{[0-9]+}}.2d, v{{[0-9]+}}.2s, #0
-; CHECK: scvtf v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vcvt.i = sitofp <2 x i32> %a to <2 x double>
-  ret <2 x double> %vcvt.i
-}
-
-define <2 x double> @test_vcvt_f64_u32(<2 x i32> %a) #0 {
-; CHECK: ushll v{{[0-9]+}}.2d, v{{[0-9]+}}.2s, #0
-; CHECK: ucvtf v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vcvt.i = uitofp <2 x i32> %a to <2 x double>
-  ret <2 x double> %vcvt.i
-}
-
-define <1 x double> @test_vcvt_f64_s8(<1 x i8> %a) #0 {
-; CHECK: umov w{{[0-9]+}}, v{{[0-9]+}}.b[0]
-; CHECK: sxtb w{{[0-9]+}}, w{{[0-9]+}}
-; CHECK: scvtf d{{[0-9]+}}, w{{[0-9]+}}
-  %vcvt.i = sitofp <1 x i8> %a to <1 x double>
-  ret <1 x double> %vcvt.i
-}
-
-define <1 x double> @test_vcvt_f64_u8(<1 x i8> %a) #0 {
-; CHECK: umov w{{[0-9]+}}, v{{[0-9]+}}.b[0]
-; CHECK: and w{{[0-9]+}}, w{{[0-9]+}}, #0xff
-; CHECK: ucvtf d{{[0-9]+}}, w{{[0-9]+}}
-  %vcvt.i = uitofp <1 x i8> %a to <1 x double>
-  ret <1 x double> %vcvt.i
-}
-
-define <1 x double> @test_vcvt_f64_s16(<1 x i16> %a) #0 {
-; CHECK: umov w{{[0-9]+}}, v{{[0-9]+}}.h[0]
-; CHECK: sxth w{{[0-9]+}}, w{{[0-9]+}}
-; CHECK: scvtf d{{[0-9]+}}, w{{[0-9]+}}
-  %vcvt.i = sitofp <1 x i16> %a to <1 x double>
-  ret <1 x double> %vcvt.i
-}
-
-define <1 x double> @test_vcvt_f64_u16(<1 x i16> %a) #0 {
-; CHECK: umov w{{[0-9]+}}, v{{[0-9]+}}.h[0]
-; CHECK: and w{{[0-9]+}}, w{{[0-9]+}}, #0xffff
-; CHECK: ucvtf d{{[0-9]+}}, w{{[0-9]+}}
-  %vcvt.i = uitofp <1 x i16> %a to <1 x double>
-  ret <1 x double> %vcvt.i
-}
-
-define <1 x double> @test_vcvt_f64_s32_v1(<1 x i32> %a) #0 {
-; CHECK: fmov w{{[0-9]+}}, s{{[0-9]+}}
-; CHECK: scvtf d{{[0-9]+}}, w{{[0-9]+}}
-  %vcvt.i = sitofp <1 x i32> %a to <1 x double>
-  ret <1 x double> %vcvt.i
-}
-
-define <1 x double> @test_vcvt_f64_u32_v1(<1 x i32> %a) #0 {
-; CHECK: fmov w{{[0-9]+}}, s{{[0-9]+}}
-; CHECK: ucvtf d{{[0-9]+}}, w{{[0-9]+}}
-  %vcvt.i = uitofp <1 x i32> %a to <1 x double>
-  ret <1 x double> %vcvt.i
-}
-
-declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) #2
-
-declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) #2
-
-declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) #2
-
-declare <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32>) #2
-
-declare <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32>) #2
-
-declare <2 x double> @llvm.arm.neon.vrecpe.v2f64(<2 x double>) #2
-
-declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) #2
-
-declare <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float>) #2
-
-declare <2 x double> @llvm.arm.neon.vrsqrte.v2f64(<2 x double>) #2
-
-declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) #2
-
-declare <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float>) #2
-
-declare <2 x i64> @llvm.arm.neon.vcvtau.v2i64.v2f64(<2 x double>)
-
-declare <4 x i32> @llvm.arm.neon.vcvtau.v4i32.v4f32(<4 x float>)
-
-declare <2 x i32> @llvm.arm.neon.vcvtau.v2i32.v2f32(<2 x float>)
-
-declare <2 x i64> @llvm.arm.neon.vcvtas.v2i64.v2f64(<2 x double>)
-
-declare <4 x i32> @llvm.arm.neon.vcvtas.v4i32.v4f32(<4 x float>)
-
-declare <2 x i32> @llvm.arm.neon.vcvtas.v2i32.v2f32(<2 x float>)
-
-declare <2 x i64> @llvm.arm.neon.vcvtmu.v2i64.v2f64(<2 x double>)
-
-declare <4 x i32> @llvm.arm.neon.vcvtmu.v4i32.v4f32(<4 x float>)
-
-declare <2 x i32> @llvm.arm.neon.vcvtmu.v2i32.v2f32(<2 x float>)
-
-declare <2 x i64> @llvm.arm.neon.vcvtms.v2i64.v2f64(<2 x double>)
-
-declare <4 x i32> @llvm.arm.neon.vcvtms.v4i32.v4f32(<4 x float>)
-
-declare <2 x i32> @llvm.arm.neon.vcvtms.v2i32.v2f32(<2 x float>)
-
-declare <2 x i64> @llvm.arm.neon.vcvtpu.v2i64.v2f64(<2 x double>)
-
-declare <4 x i32> @llvm.arm.neon.vcvtpu.v4i32.v4f32(<4 x float>)
-
-declare <2 x i32> @llvm.arm.neon.vcvtpu.v2i32.v2f32(<2 x float>)
-
-declare <2 x i64> @llvm.arm.neon.vcvtps.v2i64.v2f64(<2 x double>)
-
-declare <4 x i32> @llvm.arm.neon.vcvtps.v4i32.v4f32(<4 x float>)
-
-declare <2 x i32> @llvm.arm.neon.vcvtps.v2i32.v2f32(<2 x float>)
-
-declare <2 x i64> @llvm.arm.neon.vcvtnu.v2i64.v2f64(<2 x double>)
-
-declare <4 x i32> @llvm.arm.neon.vcvtnu.v4i32.v4f32(<4 x float>)
-
-declare <2 x i32> @llvm.arm.neon.vcvtnu.v2i32.v2f32(<2 x float>)
-
-declare <2 x i64> @llvm.arm.neon.vcvtns.v2i64.v2f64(<2 x double>)
-
-declare <4 x i32> @llvm.arm.neon.vcvtns.v4i32.v4f32(<4 x float>)
-
-declare <2 x i32> @llvm.arm.neon.vcvtns.v2i32.v2f32(<2 x float>)
-
-declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) #3
-
-declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) #3
-
-declare <2 x float> @llvm.nearbyint.v2f32(<2 x float>) #3
-
-declare <2 x double> @llvm.trunc.v2f64(<2 x double>) #3
-
-declare <4 x float> @llvm.trunc.v4f32(<4 x float>) #3
-
-declare <2 x float> @llvm.trunc.v2f32(<2 x float>) #3
-
-declare <2 x double> @llvm.rint.v2f64(<2 x double>) #3
-
-declare <4 x float> @llvm.rint.v4f32(<4 x float>) #3
-
-declare <2 x float> @llvm.rint.v2f32(<2 x float>) #3
-
-declare <2 x double> @llvm.floor.v2f64(<2 x double>) #3
-
-declare <4 x float> @llvm.floor.v4f32(<4 x float>) #3
-
-declare <2 x float> @llvm.floor.v2f32(<2 x float>) #3
-
-declare <2 x double> @llvm.ceil.v2f64(<2 x double>) #3
-
-declare <4 x float> @llvm.ceil.v4f32(<4 x float>) #3
-
-declare <2 x float> @llvm.ceil.v2f32(<2 x float>) #3
-
-declare <2 x double> @llvm.round.v2f64(<2 x double>) #3
-
-declare <4 x float> @llvm.round.v4f32(<4 x float>) #3
-
-declare <2 x float> @llvm.round.v2f32(<2 x float>) #3
-
-declare <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double>) #2
-
-declare <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float>) #2
-
-declare <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float>) #2
-
-declare <2 x float> @llvm.aarch64.neon.vcvtxn.v2f32.v2f64(<2 x double>) #2
-
-declare <2 x float> @llvm.aarch64.neon.fcvtn.v2f32.v2f64(<2 x double>) #2
-
-declare <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64>) #2
-
-declare <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32>) #2
-
-declare <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16>) #2
-
-declare <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64>) #2
-
-declare <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32>) #2
-
-declare <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16>) #2
-
-declare <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64>) #2
-
-declare <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32>) #2
-
-declare <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16>) #2
-
-declare <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8>) #2
-
-declare <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8>) #2
-
-declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) #2
-
-declare <8 x i8> @llvm.ctpop.v8i8(<8 x i8>) #2
-
-declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) #2
-
-declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) #2
-
-declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1) #2
-
-declare <4 x i16> @llvm.ctlz.v4i16(<4 x i16>, i1) #2
-
-declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1) #2
-
-declare <8 x i8> @llvm.ctlz.v8i8(<8 x i8>, i1) #2
-
-declare <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32>) #2
-
-declare <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32>) #2
-
-declare <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16>) #2
-
-declare <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16>) #2
-
-declare <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8>) #2
-
-declare <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8>) #2
-
-declare <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64>, <2 x i64>) #2
-
-declare <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32>, <4 x i32>) #2
-
-declare <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32>, <2 x i32>) #2
-
-declare <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16>, <8 x i16>) #2
-
-declare <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16>, <4 x i16>) #2
-
-declare <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8>, <16 x i8>) #2
-
-declare <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8>, <8 x i8>) #2
-
-declare <2 x double> @llvm.fabs.v2f64(<2 x double>) #3
-
-declare <4 x float> @llvm.fabs.v4f32(<4 x float>) #3
-
-declare <2 x float> @llvm.fabs.v2f32(<2 x float>) #3
-
-declare <2 x i64> @llvm.arm.neon.vabs.v2i64(<2 x i64>) #2
-
-declare <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32>) #2
-
-declare <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32>) #2
-
-declare <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16>) #2
-
-declare <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16>) #2
-
-declare <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8>) #2
-
-declare <8 x i8> @llvm.arm.neon.vabs.v8i8(<8 x i8>) #2
-
-declare <2 x i64> @llvm.arm.neon.vqneg.v2i64(<2 x i64>) #2
-
-declare <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32>) #2
-
-declare <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32>) #2
-
-declare <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16>) #2
-
-declare <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16>) #2
-
-declare <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8>) #2
-
-declare <8 x i8> @llvm.arm.neon.vqneg.v8i8(<8 x i8>) #2
-
-declare <2 x i64> @llvm.arm.neon.vqabs.v2i64(<2 x i64>) #2
-
-declare <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32>) #2
-
-declare <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32>) #2
-
-declare <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16>) #2
-
-declare <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16>) #2
-
-declare <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8>) #2
-
-declare <8 x i8> @llvm.arm.neon.vqabs.v8i8(<8 x i8>) #2
-
-declare <2 x i64> @llvm.arm.neon.vpadalu.v2i64.v4i32(<2 x i64>, <4 x i32>) #2
-
-declare <4 x i32> @llvm.arm.neon.vpadalu.v4i32.v8i16(<4 x i32>, <8 x i16>) #2
-
-declare <8 x i16> @llvm.arm.neon.vpadalu.v8i16.v16i8(<8 x i16>, <16 x i8>) #2
-
-declare <2 x i64> @llvm.arm.neon.vpadals.v2i64.v4i32(<2 x i64>, <4 x i32>) #2
-
-declare <4 x i32> @llvm.arm.neon.vpadals.v4i32.v8i16(<4 x i32>, <8 x i16>) #2
-
-declare <8 x i16> @llvm.arm.neon.vpadals.v8i16.v16i8(<8 x i16>, <16 x i8>) #2
-
-declare <1 x i64> @llvm.arm.neon.vpadalu.v1i64.v2i32(<1 x i64>, <2 x i32>) #2
-
-declare <2 x i32> @llvm.arm.neon.vpadalu.v2i32.v4i16(<2 x i32>, <4 x i16>) #2
-
-declare <4 x i16> @llvm.arm.neon.vpadalu.v4i16.v8i8(<4 x i16>, <8 x i8>) #2
-
-declare <1 x i64> @llvm.arm.neon.vpadals.v1i64.v2i32(<1 x i64>, <2 x i32>) #2
-
-declare <2 x i32> @llvm.arm.neon.vpadals.v2i32.v4i16(<2 x i32>, <4 x i16>) #2
-
-declare <4 x i16> @llvm.arm.neon.vpadals.v4i16.v8i8(<4 x i16>, <8 x i8>) #2
-
-declare <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32>) #2
-
-declare <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16>) #2
-
-declare <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8>) #2
-
-declare <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32>) #2
-
-declare <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16>) #2
-
-declare <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8>) #2
-
-declare <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32>) #2
-
-declare <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16>) #2
-
-declare <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8>) #2
-
-declare <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32>) #2
-
-declare <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16>) #2
-
-declare <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8>) #2
-
-declare <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16>) #2
-
-declare <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float>) #2
-
-
-define <1 x i64> @test_vcvt_s64_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vcvt_s64_f64
-; CHECK: fcvtzs d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = fptosi <1 x double> %a to <1 x i64>
-  ret <1 x i64> %1
-}
-
-define <1 x i64> @test_vcvt_u64_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vcvt_u64_f64
-; CHECK: fcvtzu d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = fptoui <1 x double> %a to <1 x i64>
-  ret <1 x i64> %1
-}
-
-define <1 x i64> @test_vcvtn_s64_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vcvtn_s64_f64
-; CHECK: fcvtns d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = call <1 x i64> @llvm.arm.neon.vcvtns.v1i64.v1f64(<1 x double> %a)
-  ret <1 x i64> %1
-}
-
-define <1 x i64> @test_vcvtn_u64_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vcvtn_u64_f64
-; CHECK: fcvtnu d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = call <1 x i64> @llvm.arm.neon.vcvtnu.v1i64.v1f64(<1 x double> %a)
-  ret <1 x i64> %1
-}
-
-define <1 x i64> @test_vcvtp_s64_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vcvtp_s64_f64
-; CHECK: fcvtps d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = call <1 x i64> @llvm.arm.neon.vcvtps.v1i64.v1f64(<1 x double> %a)
-  ret <1 x i64> %1
-}
-
-define <1 x i64> @test_vcvtp_u64_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vcvtp_u64_f64
-; CHECK: fcvtpu d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = call <1 x i64> @llvm.arm.neon.vcvtpu.v1i64.v1f64(<1 x double> %a)
-  ret <1 x i64> %1
-}
-
-define <1 x i64> @test_vcvtm_s64_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vcvtm_s64_f64
-; CHECK: fcvtms d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = call <1 x i64> @llvm.arm.neon.vcvtms.v1i64.v1f64(<1 x double> %a)
-  ret <1 x i64> %1
-}
-
-define <1 x i64> @test_vcvtm_u64_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vcvtm_u64_f64
-; CHECK: fcvtmu d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = call <1 x i64> @llvm.arm.neon.vcvtmu.v1i64.v1f64(<1 x double> %a)
-  ret <1 x i64> %1
-}
-
-define <1 x i64> @test_vcvta_s64_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vcvta_s64_f64
-; CHECK: fcvtas d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = call <1 x i64> @llvm.arm.neon.vcvtas.v1i64.v1f64(<1 x double> %a)
-  ret <1 x i64> %1
-}
-
-define <1 x i64> @test_vcvta_u64_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vcvta_u64_f64
-; CHECK: fcvtau d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = call <1 x i64> @llvm.arm.neon.vcvtau.v1i64.v1f64(<1 x double> %a)
-  ret <1 x i64> %1
-}
-
-define <1 x double> @test_vcvt_f64_s64(<1 x i64> %a) {
-; CHECK-LABEL: test_vcvt_f64_s64
-; CHECK: scvtf d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = sitofp <1 x i64> %a to <1 x double>
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vcvt_f64_u64(<1 x i64> %a) {
-; CHECK-LABEL: test_vcvt_f64_u64
-; CHECK: ucvtf d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = uitofp <1 x i64> %a to <1 x double>
-  ret <1 x double> %1
-}
-
-declare <1 x i64> @llvm.arm.neon.vcvtau.v1i64.v1f64(<1 x double>)
-declare <1 x i64> @llvm.arm.neon.vcvtas.v1i64.v1f64(<1 x double>)
-declare <1 x i64> @llvm.arm.neon.vcvtmu.v1i64.v1f64(<1 x double>)
-declare <1 x i64> @llvm.arm.neon.vcvtms.v1i64.v1f64(<1 x double>)
-declare <1 x i64> @llvm.arm.neon.vcvtpu.v1i64.v1f64(<1 x double>)
-declare <1 x i64> @llvm.arm.neon.vcvtps.v1i64.v1f64(<1 x double>)
-declare <1 x i64> @llvm.arm.neon.vcvtnu.v1i64.v1f64(<1 x double>)
-declare <1 x i64> @llvm.arm.neon.vcvtns.v1i64.v1f64(<1 x double>)
-
-define <1 x double> @test_vrndn_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vrndn_f64
-; CHECK: frintn d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x double> @llvm.aarch64.neon.frintn.v1f64(<1 x double> %a)
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vrnda_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vrnda_f64
-; CHECK: frinta d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x double> @llvm.round.v1f64(<1 x double> %a)
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vrndp_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vrndp_f64
-; CHECK: frintp d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x double> @llvm.ceil.v1f64(<1 x double> %a)
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vrndm_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vrndm_f64
-; CHECK: frintm d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x double> @llvm.floor.v1f64(<1 x double> %a)
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vrndx_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vrndx_f64
-; CHECK: frintx d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x double> @llvm.rint.v1f64(<1 x double> %a)
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vrnd_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vrnd_f64
-; CHECK: frintz d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x double> @llvm.trunc.v1f64(<1 x double> %a)
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vrndi_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vrndi_f64
-; CHECK: frinti d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x double> @llvm.nearbyint.v1f64(<1 x double> %a)
-  ret <1 x double> %1
-}
-
-declare <1 x double> @llvm.nearbyint.v1f64(<1 x double>)
-declare <1 x double> @llvm.trunc.v1f64(<1 x double>)
-declare <1 x double> @llvm.rint.v1f64(<1 x double>)
-declare <1 x double> @llvm.floor.v1f64(<1 x double>)
-declare <1 x double> @llvm.ceil.v1f64(<1 x double>)
-declare <1 x double> @llvm.round.v1f64(<1 x double>)
-declare <1 x double> @llvm.aarch64.neon.frintn.v1f64(<1 x double>)
-
-define <1 x double> @test_vrsqrte_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vrsqrte_f64
-; CHECK: frsqrte d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x double> @llvm.arm.neon.vrsqrte.v1f64(<1 x double> %a)
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vrecpe_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vrecpe_f64
-; CHECK: frecpe d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x double> @llvm.arm.neon.vrecpe.v1f64(<1 x double> %a)
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vsqrt_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vsqrt_f64
-; CHECK: fsqrt d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x double> @llvm.sqrt.v1f64(<1 x double> %a)
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vrecps_f64(<1 x double> %a, <1 x double> %b) {
-; CHECK-LABEL: test_vrecps_f64
-; CHECK: frecps d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x double> @llvm.arm.neon.vrecps.v1f64(<1 x double> %a, <1 x double> %b)
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vrsqrts_f64(<1 x double> %a, <1 x double> %b) {
-; CHECK-LABEL: test_vrsqrts_f64
-; CHECK: frsqrts d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x double> @llvm.arm.neon.vrsqrts.v1f64(<1 x double> %a, <1 x double> %b)
-  ret <1 x double> %1
-}
-
-declare <1 x double> @llvm.arm.neon.vrsqrts.v1f64(<1 x double>, <1 x double>)
-declare <1 x double> @llvm.arm.neon.vrecps.v1f64(<1 x double>, <1 x double>)
-declare <1 x double> @llvm.sqrt.v1f64(<1 x double>)
-declare <1 x double> @llvm.arm.neon.vrecpe.v1f64(<1 x double>)
-declare <1 x double> @llvm.arm.neon.vrsqrte.v1f64(<1 x double>)
-
-define i64 @test_vaddlv_s32(<2 x i32> %a) {
-; CHECK-LABEL: test_vaddlv_s32
-; CHECK: saddlp {{v[0-9]+}}.1d, {{v[0-9]+}}.2s
-  %1 = tail call <1 x i64> @llvm.aarch64.neon.saddlv.v1i64.v2i32(<2 x i32> %a)
-  %2 = extractelement <1 x i64> %1, i32 0
-  ret i64 %2
-}
-
-define i64 @test_vaddlv_u32(<2 x i32> %a) {
-; CHECK-LABEL: test_vaddlv_u32
-; CHECK: uaddlp {{v[0-9]+}}.1d, {{v[0-9]+}}.2s
-  %1 = tail call <1 x i64> @llvm.aarch64.neon.uaddlv.v1i64.v2i32(<2 x i32> %a)
-  %2 = extractelement <1 x i64> %1, i32 0
-  ret i64 %2
-}
-
-declare <1 x i64> @llvm.aarch64.neon.saddlv.v1i64.v2i32(<2 x i32>)
-declare <1 x i64> @llvm.aarch64.neon.uaddlv.v1i64.v2i32(<2 x i32>)
-\ No newline at end of file
diff --git a/test/CodeGen/AArch64/neon-mov.ll b/test/CodeGen/AArch64/neon-mov.ll
index 4035b91..40649ae 100644
--- a/test/CodeGen/AArch64/neon-mov.ll
+++ b/test/CodeGen/AArch64/neon-mov.ll
@@ -1,218 +1,259 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK
 
 define <8 x i8> @movi8b() {
-;CHECK:  movi {{v[0-9]+}}.8b, #0x8
+; CHECK-LABEL: movi8b:
+; CHECK:  movi {{v[0-9]+}}.8b, #{{0x8|8}}
    ret <8 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
 }
 
 define <16 x i8> @movi16b() {
-;CHECK:  movi {{v[0-9]+}}.16b, #0x8
+; CHECK-LABEL: movi16b:
+; CHECK:  movi {{v[0-9]+}}.16b, #{{0x8|8}}
    ret <16 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
 }
 
 define <2 x i32> @movi2s_lsl0() {
-;CHECK:  movi {{v[0-9]+}}.2s, #0xff
+; CHECK-LABEL: movi2s_lsl0:
+; CHECK: movi {{d[0-9]+}}, #0x0000ff000000ff
    ret <2 x i32> < i32 255, i32 255 >
 }
 
 define <2 x i32> @movi2s_lsl8() {
-;CHECK:  movi {{v[0-9]+}}.2s, #0xff, lsl #8
+; CHECK-LABEL: movi2s_lsl8:
+; CHECK: movi {{d[0-9]+}}, #0x00ff000000ff00
    ret <2 x i32> < i32 65280, i32 65280 >
 }
 
 define <2 x i32> @movi2s_lsl16() {
-;CHECK:  movi {{v[0-9]+}}.2s, #0xff, lsl #16
+; CHECK-LABEL: movi2s_lsl16:
+; CHECK: movi {{d[0-9]+}}, #0xff000000ff0000
    ret <2 x i32> < i32 16711680, i32 16711680 >
 
 }
 
 define <2 x i32> @movi2s_lsl24() {
-;CHECK:  movi {{v[0-9]+}}.2s, #0xff, lsl #24
+; CHECK-LABEL: movi2s_lsl24:
+; CHECK: movi {{d[0-9]+}}, #0xff000000ff000000
    ret <2 x i32> < i32 4278190080, i32 4278190080 >
 }
 
 define <4 x i32> @movi4s_lsl0() {
-;CHECK:  movi {{v[0-9]+}}.4s, #0xff
+; CHECK-LABEL: movi4s_lsl0:
+; CHECK: movi {{v[0-9]+}}.2d, #0x0000ff000000ff
    ret <4 x i32> < i32 255, i32 255, i32 255, i32 255 >
 }
 
 define <4 x i32> @movi4s_lsl8() {
-;CHECK:  movi {{v[0-9]+}}.4s, #0xff, lsl #8
+; CHECK-LABEL: movi4s_lsl8:
+; CHECK: movi {{v[0-9]+}}.2d, #0x00ff000000ff00
    ret <4 x i32> < i32 65280, i32 65280, i32 65280, i32 65280 >
 }
 
 define <4 x i32> @movi4s_lsl16() {
-;CHECK:  movi {{v[0-9]+}}.4s, #0xff, lsl #16
+; CHECK-LABEL: movi4s_lsl16:
+; CHECK:  movi {{v[0-9]+}}.2d, #0xff000000ff0000
    ret <4 x i32> < i32 16711680, i32 16711680, i32 16711680, i32 16711680 >
 
 }
 
 define <4 x i32> @movi4s_lsl24() {
-;CHECK:  movi {{v[0-9]+}}.4s, #0xff, lsl #24
+; CHECK-LABEL: movi4s_lsl24:
+; CHECK:  movi {{v[0-9]+}}.2d, #0xff000000ff000000
    ret <4 x i32> < i32 4278190080, i32 4278190080, i32 4278190080, i32 4278190080 >
 }
 
 define <4 x i16> @movi4h_lsl0() {
-;CHECK:  movi {{v[0-9]+}}.4h, #0xff
+; CHECK-LABEL: movi4h_lsl0:
+; CHECK:  movi {{d[0-9]+}}, #0xff00ff00ff00ff
    ret <4 x i16> < i16 255, i16 255, i16 255, i16 255 >
 }
 
 define <4 x i16> @movi4h_lsl8() {
-;CHECK:  movi {{v[0-9]+}}.4h, #0xff, lsl #8
+; CHECK-LABEL: movi4h_lsl8:
+; CHECK: movi d0, #0xff00ff00ff00ff00
    ret <4 x i16> < i16 65280, i16 65280, i16 65280, i16 65280 >
 }
 
 define <8 x i16> @movi8h_lsl0() {
-;CHECK:  movi {{v[0-9]+}}.8h, #0xff
+; CHECK-LABEL: movi8h_lsl0:
+; CHECK: movi v0.2d, #0xff00ff00ff00ff
    ret <8 x i16> < i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255 >
 }
 
 define <8 x i16> @movi8h_lsl8() {
-;CHECK:  movi {{v[0-9]+}}.8h, #0xff, lsl #8
+; CHECK-LABEL: movi8h_lsl8:
+; CHECK: movi v0.2d, #0xff00ff00ff00ff00
    ret <8 x i16> < i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280 >
 }
 
 
 define <2 x i32> @mvni2s_lsl0() {
-;CHECK:  mvni {{v[0-9]+}}.2s, #0x10
+; CHECK-LABEL: mvni2s_lsl0:
+; CHECK:  mvni {{v[0-9]+}}.2s, #{{0x10|16}}
    ret <2 x i32> < i32 4294967279, i32 4294967279 >
 }
 
 define <2 x i32> @mvni2s_lsl8() {
-;CHECK:  mvni {{v[0-9]+}}.2s, #0x10, lsl #8
+; CHECK-LABEL: mvni2s_lsl8:
+; CHECK:  mvni {{v[0-9]+}}.2s, #{{0x10|16}}, lsl #8
    ret <2 x i32> < i32 4294963199, i32 4294963199 >
 }
 
 define <2 x i32> @mvni2s_lsl16() {
-;CHECK:  mvni {{v[0-9]+}}.2s, #0x10, lsl #16
+; CHECK-LABEL: mvni2s_lsl16:
+; CHECK:  mvni {{v[0-9]+}}.2s, #{{0x10|16}}, lsl #16
    ret <2 x i32> < i32 4293918719, i32 4293918719 >
 }
 
 define <2 x i32> @mvni2s_lsl24() {
-;CHECK:  mvni {{v[0-9]+}}.2s, #0x10, lsl #24
+; CHECK-LABEL: mvni2s_lsl24:
+; CHECK:  mvni {{v[0-9]+}}.2s, #{{0x10|16}}, lsl #24
    ret <2 x i32> < i32 4026531839, i32 4026531839 >
 }
 
 define <4 x i32> @mvni4s_lsl0() {
-;CHECK:  mvni {{v[0-9]+}}.4s, #0x10
+; CHECK-LABEL: mvni4s_lsl0:
+; CHECK:  mvni {{v[0-9]+}}.4s, #{{0x10|16}}
    ret <4 x i32> < i32 4294967279, i32 4294967279, i32 4294967279, i32 4294967279 >
 }
 
 define <4 x i32> @mvni4s_lsl8() {
-;CHECK:  mvni {{v[0-9]+}}.4s, #0x10, lsl #8
+; CHECK-LABEL: mvni4s_lsl8:
+; CHECK:  mvni {{v[0-9]+}}.4s, #{{0x10|16}}, lsl #8
    ret <4 x i32> < i32 4294963199, i32 4294963199, i32 4294963199, i32 4294963199 >
 }
 
 define <4 x i32> @mvni4s_lsl16() {
-;CHECK:  mvni {{v[0-9]+}}.4s, #0x10, lsl #16
+; CHECK-LABEL: mvni4s_lsl16:
+; CHECK:  mvni {{v[0-9]+}}.4s, #{{0x10|16}}, lsl #16
    ret <4 x i32> < i32 4293918719, i32 4293918719, i32 4293918719, i32 4293918719 >
 
 }
 
 define <4 x i32> @mvni4s_lsl24() {
-;CHECK:  mvni {{v[0-9]+}}.4s, #0x10, lsl #24
+; CHECK-LABEL: mvni4s_lsl24:
+; CHECK:  mvni {{v[0-9]+}}.4s, #{{0x10|16}}, lsl #24
    ret <4 x i32> < i32 4026531839, i32 4026531839, i32 4026531839, i32 4026531839 >
 }
 
 
 define <4 x i16> @mvni4h_lsl0() {
-;CHECK:  mvni {{v[0-9]+}}.4h, #0x10
+; CHECK-LABEL: mvni4h_lsl0:
+; CHECK:  mvni {{v[0-9]+}}.4h, #{{0x10|16}}
    ret <4 x i16> < i16 65519, i16 65519, i16 65519, i16 65519 >
 }
 
 define <4 x i16> @mvni4h_lsl8() {
-;CHECK:  mvni {{v[0-9]+}}.4h, #0x10, lsl #8
+; CHECK-LABEL: mvni4h_lsl8:
+; CHECK:  mvni {{v[0-9]+}}.4h, #{{0x10|16}}, lsl #8
    ret <4 x i16> < i16 61439, i16 61439, i16 61439, i16 61439 >
 }
 
 define <8 x i16> @mvni8h_lsl0() {
-;CHECK:  mvni {{v[0-9]+}}.8h, #0x10
+; CHECK-LABEL: mvni8h_lsl0:
+; CHECK:  mvni {{v[0-9]+}}.8h, #{{0x10|16}}
    ret <8 x i16> < i16 65519, i16 65519, i16 65519, i16 65519, i16 65519, i16 65519, i16 65519, i16 65519 >
 }
 
 define <8 x i16> @mvni8h_lsl8() {
-;CHECK:  mvni {{v[0-9]+}}.8h, #0x10, lsl #8
+; CHECK-LABEL: mvni8h_lsl8:
+; CHECK:  mvni {{v[0-9]+}}.8h, #{{0x10|16}}, lsl #8
    ret <8 x i16> < i16 61439, i16 61439, i16 61439, i16 61439, i16 61439, i16 61439, i16 61439, i16 61439 >
 }
 
 
 define <2 x i32> @movi2s_msl8(<2 x i32> %a) {
-;CHECK:  movi {{v[0-9]+}}.2s, #0xff, msl #8
+; CHECK-LABEL: movi2s_msl8:
+; CHECK: movi {{d[0-9]+}}, #0x00ffff0000ffff
 	ret <2 x i32> < i32 65535, i32 65535 >
 }
 
 define <2 x i32> @movi2s_msl16() {
-;CHECK:  movi {{v[0-9]+}}.2s, #0xff, msl #16
+; CHECK-LABEL: movi2s_msl16:
+; CHECK:  movi d0, #0xffffff00ffffff
    ret <2 x i32> < i32 16777215, i32 16777215 >
 }
 
 
 define <4 x i32> @movi4s_msl8() {
-;CHECK:  movi {{v[0-9]+}}.4s, #0xff, msl #8
+; CHECK-LABEL: movi4s_msl8:
+; CHECK:  movi v0.2d, #0x00ffff0000ffff
    ret <4 x i32> < i32 65535, i32 65535, i32 65535, i32 65535 >
 }
 
 define <4 x i32> @movi4s_msl16() {
-;CHECK:  movi {{v[0-9]+}}.4s, #0xff, msl #16
+; CHECK-LABEL: movi4s_msl16:
+; CHECK:  movi v0.2d, #0xffffff00ffffff
    ret <4 x i32> < i32 16777215, i32 16777215, i32 16777215, i32 16777215 >
 }
 
 define <2 x i32> @mvni2s_msl8() {
-;CHECK:  mvni {{v[0-9]+}}.2s, #0x10, msl #8
+; CHECK-LABEL: mvni2s_msl8:
+; CHECK:  mvni {{v[0-9]+}}.2s, #{{0x10|16}}, msl #8
    ret <2 x i32> < i32 18446744073709547264, i32 18446744073709547264>
 }
 
 define <2 x i32> @mvni2s_msl16() {
-;CHECK:  mvni {{v[0-9]+}}.2s, #0x10, msl #16
+; CHECK-LABEL: mvni2s_msl16:
+; CHECK:  mvni {{v[0-9]+}}.2s, #{{0x10|16}}, msl #16
    ret <2 x i32> < i32 18446744073708437504, i32 18446744073708437504>
 }
 
 define <4 x i32> @mvni4s_msl8() {
-;CHECK:  mvni {{v[0-9]+}}.4s, #0x10, msl #8
+; CHECK-LABEL: mvni4s_msl8:
+; CHECK:  mvni {{v[0-9]+}}.4s, #{{0x10|16}}, msl #8
    ret <4 x i32> < i32 18446744073709547264, i32 18446744073709547264, i32 18446744073709547264, i32 18446744073709547264>
 }
 
 define <4 x i32> @mvni4s_msl16() {
-;CHECK:  mvni {{v[0-9]+}}.4s, #0x10, msl #16
+; CHECK-LABEL: mvni4s_msl16:
+; CHECK:  mvni {{v[0-9]+}}.4s, #{{0x10|16}}, msl #16
    ret <4 x i32> < i32 18446744073708437504, i32 18446744073708437504, i32 18446744073708437504, i32 18446744073708437504>
 }
 
 define <2 x i64> @movi2d() {
-;CHECK: movi {{v[0-9]+}}.2d, #0xff0000ff0000ffff
+; CHECK-LABEL: movi2d:
+; CHECK: movi {{v[0-9]+}}.2d, #0xff0000ff0000ffff
 	ret <2 x i64> < i64 18374687574888349695, i64 18374687574888349695 >
 }
 
 define <1 x i64> @movid() {
-;CHECK: movi {{d[0-9]+}}, #0xff0000ff0000ffff
+; CHECK-LABEL: movid:
+; CHECK: movi {{d[0-9]+}}, #0xff0000ff0000ffff
 	ret  <1 x i64> < i64 18374687574888349695 >
 }
 
 define <2 x float> @fmov2s() {
-;CHECK:  fmov {{v[0-9]+}}.2s, #-12.00000000
+; CHECK-LABEL: fmov2s:
+; CHECK:  fmov {{v[0-9]+}}.2s, #{{-12.00000000|-1.200000e\+01}}
 	ret <2 x float> < float -1.2e1, float -1.2e1>
 }
 
 define <4 x float> @fmov4s() {
-;CHECK:  fmov {{v[0-9]+}}.4s, #-12.00000000
+; CHECK-LABEL: fmov4s:
+; CHECK:  fmov {{v[0-9]+}}.4s, #{{-12.00000000|-1.200000e\+01}}
 	ret <4 x float> < float -1.2e1, float -1.2e1, float -1.2e1, float -1.2e1>
 }
 
 define <2 x double> @fmov2d() {
-;CHECK:  fmov {{v[0-9]+}}.2d, #-12.00000000
+; CHECK-LABEL: fmov2d:
+; CHECK:  fmov {{v[0-9]+}}.2d, #{{-12.00000000|-1.200000e\+01}}
 	ret <2 x double> < double -1.2e1, double -1.2e1>
 }
 
 define <2 x i32> @movi1d_1() {
-; CHECK: movi    d0, #0xffffffff0000
+; CHECK-LABEL: movi1d_1:
+; CHECK: movi    d0, #0x{{0*}}ffffffff0000
   ret <2 x i32> < i32  -65536, i32 65535>
 }
 
 
 declare <2 x i32> @test_movi1d(<2 x i32>, <2 x i32>)
 define <2 x i32> @movi1d() {
+; CHECK-LABEL: movi1d:
 ; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
-; CHECK-NEXT: ldr {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}]
-; CHECK-NEXT: movi     d1, #0xffffffff0000
+; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}]
+; CHECK-NEXT: movi     d1, #0x{{0*}}ffffffff0000
   %1 = tail call <2 x i32> @test_movi1d(<2 x i32> <i32 -2147483648, i32 2147450880>, <2 x i32> <i32 -65536, i32 65535>)
   ret <2 x i32> %1
 }
diff --git a/test/CodeGen/AArch64/neon-mul-div.ll b/test/CodeGen/AArch64/neon-mul-div.ll
deleted file mode 100644
index da22ce8..0000000
--- a/test/CodeGen/AArch64/neon-mul-div.ll
+++ /dev/null
@@ -1,754 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
-
-
-define <8 x i8> @mul8xi8(<8 x i8> %A, <8 x i8> %B) {
-;CHECK: mul {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-	%tmp3 = mul <8 x i8> %A, %B;
-	ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @mul16xi8(<16 x i8> %A, <16 x i8> %B) {
-;CHECK: mul {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-	%tmp3 = mul <16 x i8> %A, %B;
-	ret <16 x i8> %tmp3
-}
-
-define <4 x i16> @mul4xi16(<4 x i16> %A, <4 x i16> %B) {
-;CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-	%tmp3 = mul <4 x i16> %A, %B;
-	ret <4 x i16> %tmp3
-}
-
-define <8 x i16> @mul8xi16(<8 x i16> %A, <8 x i16> %B) {
-;CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-	%tmp3 = mul <8 x i16> %A, %B;
-	ret <8 x i16> %tmp3
-}
-
-define <2 x i32> @mul2xi32(<2 x i32> %A, <2 x i32> %B) {
-;CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-	%tmp3 = mul <2 x i32> %A, %B;
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @mul4x32(<4 x i32> %A, <4 x i32> %B) {
-;CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-	%tmp3 = mul <4 x i32> %A, %B;
-	ret <4 x i32> %tmp3
-}
-
-define <1 x i64> @mul1xi64(<1 x i64> %A, <1 x i64> %B) {
-;CHECK-LABEL: mul1xi64:
-;CHECK: mul x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}}
-  %tmp3 = mul <1 x i64> %A, %B;
-  ret <1 x i64> %tmp3
-}
-
-define <2 x i64> @mul2xi64(<2 x i64> %A, <2 x i64> %B) {
-;CHECK-LABEL: mul2xi64:
-;CHECK: mul x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}}
-;CHECK: mul x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}}
-  %tmp3 = mul <2 x i64> %A, %B;
-  ret <2 x i64> %tmp3
-}
-
- define <2 x float> @mul2xfloat(<2 x float> %A, <2 x float> %B) {
-;CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-	%tmp3 = fmul <2 x float> %A, %B;
-	ret <2 x float> %tmp3
-}
-
-define <4 x float> @mul4xfloat(<4 x float> %A, <4 x float> %B) {
-;CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-	%tmp3 = fmul <4 x float> %A, %B;
-	ret <4 x float> %tmp3
-}
-define <2 x double> @mul2xdouble(<2 x double> %A, <2 x double> %B) {
-;CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-	%tmp3 = fmul <2 x double> %A, %B;
-	ret <2 x double> %tmp3
-}
-
-
- define <2 x float> @div2xfloat(<2 x float> %A, <2 x float> %B) {
-;CHECK: fdiv {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-	%tmp3 = fdiv <2 x float> %A, %B;
-	ret <2 x float> %tmp3
-}
-
-define <4 x float> @div4xfloat(<4 x float> %A, <4 x float> %B) {
-;CHECK: fdiv {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-	%tmp3 = fdiv <4 x float> %A, %B;
-	ret <4 x float> %tmp3
-}
-define <2 x double> @div2xdouble(<2 x double> %A, <2 x double> %B) {
-;CHECK: fdiv {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-	%tmp3 = fdiv <2 x double> %A, %B;
-	ret <2 x double> %tmp3
-}
-
-define <1 x i8> @sdiv1x8(<1 x i8> %A, <1 x i8> %B) {
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = sdiv <1 x i8> %A, %B;
-	ret <1 x i8> %tmp3
-}
-
-define <8 x i8> @sdiv8x8(<8 x i8> %A, <8 x i8> %B) {
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = sdiv <8 x i8> %A, %B;
-	ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @sdiv16x8(<16 x i8> %A, <16 x i8> %B) {
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = sdiv <16 x i8> %A, %B;
-	ret <16 x i8> %tmp3
-}
-
-define <1 x i16> @sdiv1x16(<1 x i16> %A, <1 x i16> %B) {
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = sdiv <1 x i16> %A, %B;
-	ret <1 x i16> %tmp3
-}
-
-define <4 x i16> @sdiv4x16(<4 x i16> %A, <4 x i16> %B) {
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = sdiv <4 x i16> %A, %B;
-	ret <4 x i16> %tmp3
-}
-
-define <8 x i16> @sdiv8x16(<8 x i16> %A, <8 x i16> %B) {
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = sdiv <8 x i16> %A, %B;
-	ret <8 x i16> %tmp3
-}
-
-define <1 x i32> @sdiv1x32(<1 x i32> %A, <1 x i32> %B) {
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = sdiv <1 x i32> %A, %B;
-	ret <1 x i32> %tmp3
-}
-
-define <2 x i32> @sdiv2x32(<2 x i32> %A, <2 x i32> %B) {
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = sdiv <2 x i32> %A, %B;
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @sdiv4x32(<4 x i32> %A, <4 x i32> %B) {
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = sdiv <4 x i32> %A, %B;
-	ret <4 x i32> %tmp3
-}
-
-define <1 x i64> @sdiv1x64(<1 x i64> %A, <1 x i64> %B) {
-;CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-	%tmp3 = sdiv <1 x i64> %A, %B;
-	ret <1 x i64> %tmp3
-}
-
-define <2 x i64> @sdiv2x64(<2 x i64> %A, <2 x i64> %B) {
-;CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-;CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-	%tmp3 = sdiv <2 x i64> %A, %B;
-	ret <2 x i64> %tmp3
-}
-
-define <1 x i8> @udiv1x8(<1 x i8> %A, <1 x i8> %B) {
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = udiv <1 x i8> %A, %B;
-	ret <1 x i8> %tmp3
-}
-
-define <8 x i8> @udiv8x8(<8 x i8> %A, <8 x i8> %B) {
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = udiv <8 x i8> %A, %B;
-	ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @udiv16x8(<16 x i8> %A, <16 x i8> %B) {
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = udiv <16 x i8> %A, %B;
-	ret <16 x i8> %tmp3
-}
-
-define <1 x i16> @udiv1x16(<1 x i16> %A, <1 x i16> %B) {
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = udiv <1 x i16> %A, %B;
-	ret <1 x i16> %tmp3
-}
-
-define <4 x i16> @udiv4x16(<4 x i16> %A, <4 x i16> %B) {
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = udiv <4 x i16> %A, %B;
-	ret <4 x i16> %tmp3
-}
-
-define <8 x i16> @udiv8x16(<8 x i16> %A, <8 x i16> %B) {
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = udiv <8 x i16> %A, %B;
-	ret <8 x i16> %tmp3
-}
-
-define <1 x i32> @udiv1x32(<1 x i32> %A, <1 x i32> %B) {
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = udiv <1 x i32> %A, %B;
-	ret <1 x i32> %tmp3
-}
-
-define <2 x i32> @udiv2x32(<2 x i32> %A, <2 x i32> %B) {
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = udiv <2 x i32> %A, %B;
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @udiv4x32(<4 x i32> %A, <4 x i32> %B) {
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = udiv <4 x i32> %A, %B;
-	ret <4 x i32> %tmp3
-}
-
-define <1 x i64> @udiv1x64(<1 x i64> %A, <1 x i64> %B) {
-;CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-	%tmp3 = udiv <1 x i64> %A, %B;
-	ret <1 x i64> %tmp3
-}
-
-define <2 x i64> @udiv2x64(<2 x i64> %A, <2 x i64> %B) {
-;CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-;CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-	%tmp3 = udiv <2 x i64> %A, %B;
-	ret <2 x i64> %tmp3
-}
-
-define <1 x i8> @srem1x8(<1 x i8> %A, <1 x i8> %B) {
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = srem <1 x i8> %A, %B;
-	ret <1 x i8> %tmp3
-}
-
-define <8 x i8> @srem8x8(<8 x i8> %A, <8 x i8> %B) {
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = srem <8 x i8> %A, %B;
-	ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @srem16x8(<16 x i8> %A, <16 x i8> %B) {
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = srem <16 x i8> %A, %B;
-	ret <16 x i8> %tmp3
-}
-
-define <1 x i16> @srem1x16(<1 x i16> %A, <1 x i16> %B) {
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = srem <1 x i16> %A, %B;
-	ret <1 x i16> %tmp3
-}
-
-define <4 x i16> @srem4x16(<4 x i16> %A, <4 x i16> %B) {
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = srem <4 x i16> %A, %B;
-	ret <4 x i16> %tmp3
-}
-
-define <8 x i16> @srem8x16(<8 x i16> %A, <8 x i16> %B) {
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = srem <8 x i16> %A, %B;
-	ret <8 x i16> %tmp3
-}
-
-define <1 x i32> @srem1x32(<1 x i32> %A, <1 x i32> %B) {
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = srem <1 x i32> %A, %B;
-	ret <1 x i32> %tmp3
-}
-
-define <2 x i32> @srem2x32(<2 x i32> %A, <2 x i32> %B) {
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = srem <2 x i32> %A, %B;
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @srem4x32(<4 x i32> %A, <4 x i32> %B) {
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = srem <4 x i32> %A, %B;
-	ret <4 x i32> %tmp3
-}
-
-define <1 x i64> @srem1x64(<1 x i64> %A, <1 x i64> %B) {
-;CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-;CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-	%tmp3 = srem <1 x i64> %A, %B;
-	ret <1 x i64> %tmp3
-}
-
-define <2 x i64> @srem2x64(<2 x i64> %A, <2 x i64> %B) {
-;CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-;CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-;CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-;CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-	%tmp3 = srem <2 x i64> %A, %B;
-	ret <2 x i64> %tmp3
-}
-
-define <1 x i8> @urem1x8(<1 x i8> %A, <1 x i8> %B) {
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = urem <1 x i8> %A, %B;
-	ret <1 x i8> %tmp3
-}
-
-define <8 x i8> @urem8x8(<8 x i8> %A, <8 x i8> %B) {
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = urem <8 x i8> %A, %B;
-	ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @urem16x8(<16 x i8> %A, <16 x i8> %B) {
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = urem <16 x i8> %A, %B;
-	ret <16 x i8> %tmp3
-}
-
-define <1 x i16> @urem1x16(<1 x i16> %A, <1 x i16> %B) {
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = urem <1 x i16> %A, %B;
-	ret <1 x i16> %tmp3
-}
-
-define <4 x i16> @urem4x16(<4 x i16> %A, <4 x i16> %B) {
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = urem <4 x i16> %A, %B;
-	ret <4 x i16> %tmp3
-}
-
-define <8 x i16> @urem8x16(<8 x i16> %A, <8 x i16> %B) {
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = urem <8 x i16> %A, %B;
-	ret <8 x i16> %tmp3
-}
-
-define <1 x i32> @urem1x32(<1 x i32> %A, <1 x i32> %B) {
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = urem <1 x i32> %A, %B;
-	ret <1 x i32> %tmp3
-}
-
-define <2 x i32> @urem2x32(<2 x i32> %A, <2 x i32> %B) {
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = urem <2 x i32> %A, %B;
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @urem4x32(<4 x i32> %A, <4 x i32> %B) {
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
-	%tmp3 = urem <4 x i32> %A, %B;
-	ret <4 x i32> %tmp3
-}
-
-define <1 x i64> @urem1x64(<1 x i64> %A, <1 x i64> %B) {
-;CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-;CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-	%tmp3 = urem <1 x i64> %A, %B;
-	ret <1 x i64> %tmp3
-}
-
-define <2 x i64> @urem2x64(<2 x i64> %A, <2 x i64> %B) {
-;CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-;CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-;CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-;CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
-	%tmp3 = urem <2 x i64> %A, %B;
-	ret <2 x i64> %tmp3
-}
-
-define <2 x float> @frem2f32(<2 x float> %A, <2 x float> %B) {
-; CHECK: bl fmodf
-; CHECK: bl fmodf
-	%tmp3 = frem <2 x float> %A, %B;
-	ret <2 x float> %tmp3
-}
-
-define <4 x float> @frem4f32(<4 x float> %A, <4 x float> %B) {
-; CHECK: bl fmodf
-; CHECK: bl fmodf
-; CHECK: bl fmodf
-; CHECK: bl fmodf
-	%tmp3 = frem <4 x float> %A, %B;
-	ret <4 x float> %tmp3
-}
-
-define <1 x double> @frem1d64(<1 x double> %A, <1 x double> %B) {
-; CHECK: bl fmod
-	%tmp3 = frem <1 x double> %A, %B;
-	ret <1 x double> %tmp3
-}
-
-define <2 x double> @frem2d64(<2 x double> %A, <2 x double> %B) {
-; CHECK: bl fmod
-; CHECK: bl fmod
-	%tmp3 = frem <2 x double> %A, %B;
-	ret <2 x double> %tmp3
-}
-
-declare <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8>, <8 x i8>)
-declare <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8>, <16 x i8>)
-
-define <8 x i8> @poly_mulv8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK: poly_mulv8i8:
-   %prod = call <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: pmul v0.8b, v0.8b, v1.8b
-   ret <8 x i8> %prod
-}
-
-define <16 x i8> @poly_mulv16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: poly_mulv16i8:
-   %prod = call <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: pmul v0.16b, v0.16b, v1.16b
-   ret <16 x i8> %prod
-}
-
-declare <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16>, <4 x i16>)
-declare <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16>, <8 x i16>)
-declare <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32>, <2 x i32>)
-declare <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32>, <4 x i32>)
-
-define <4 x i16> @test_sqdmulh_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_sqdmulh_v4i16:
-   %prod = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: sqdmulh v0.4h, v0.4h, v1.4h
-   ret <4 x i16> %prod
-}
-
-define <8 x i16> @test_sqdmulh_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_sqdmulh_v8i16:
-   %prod = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: sqdmulh v0.8h, v0.8h, v1.8h
-   ret <8 x i16> %prod
-}
-
-define <2 x i32> @test_sqdmulh_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_sqdmulh_v2i32:
-   %prod = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: sqdmulh v0.2s, v0.2s, v1.2s
-   ret <2 x i32> %prod
-}
-
-define <4 x i32> @test_sqdmulh_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_sqdmulh_v4i32:
-   %prod = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: sqdmulh v0.4s, v0.4s, v1.4s
-   ret <4 x i32> %prod
-}
-
-declare <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16>, <4 x i16>)
-declare <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16>, <8 x i16>)
-declare <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32>, <2 x i32>)
-declare <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32>, <4 x i32>)
-
-define <4 x i16> @test_sqrdmulh_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_sqrdmulh_v4i16:
-   %prod = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: sqrdmulh v0.4h, v0.4h, v1.4h
-   ret <4 x i16> %prod
-}
-
-define <8 x i16> @test_sqrdmulh_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_sqrdmulh_v8i16:
-   %prod = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: sqrdmulh v0.8h, v0.8h, v1.8h
-   ret <8 x i16> %prod
-}
-
-define <2 x i32> @test_sqrdmulh_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_sqrdmulh_v2i32:
-   %prod = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: sqrdmulh v0.2s, v0.2s, v1.2s
-   ret <2 x i32> %prod
-}
-
-define <4 x i32> @test_sqrdmulh_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_sqrdmulh_v4i32:
-   %prod = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: sqrdmulh v0.4s, v0.4s, v1.4s
-   ret <4 x i32> %prod
-}
-
-declare <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float>, <2 x float>)
-declare <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float>, <4 x float>)
-declare <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double>, <2 x double>)
-
-define <2 x float> @fmulx_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
-; Using registers other than v0, v1 and v2 are possible, but would be odd.
-; CHECK: fmulx v0.2s, v0.2s, v1.2s
-        %val = call <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float> %lhs, <2 x float> %rhs)
-        ret <2 x float> %val
-}
-
-define <4 x float> @fmulx_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
-; Using registers other than v0, v1 and v2 are possible, but would be odd.
-; CHECK: fmulx v0.4s, v0.4s, v1.4s
-        %val = call <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float> %lhs, <4 x float> %rhs)
-        ret <4 x float> %val
-}
-
-define <2 x double> @fmulx_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
-; Using registers other than v0, v1 and v2 are possible, but would be odd.
-; CHECK: fmulx v0.2d, v0.2d, v1.2d
-        %val = call <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double> %lhs, <2 x double> %rhs)
-        ret <2 x double> %val
-}
-
-define <1 x i8> @test_mul_v1i8(<1 x i8> %a, <1 x i8> %b) {
-;CHECK-LABEL: test_mul_v1i8:
-;CHECK: mul {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-  %c = mul <1 x i8> %a, %b
-  ret <1 x i8> %c
-}
-
-define <1 x i16> @test_mul_v1i16(<1 x i16> %a, <1 x i16> %b) {
-;CHECK-LABEL: test_mul_v1i16:
-;CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-  %c = mul <1 x i16> %a, %b
-  ret <1 x i16> %c
-}
-
-define <1 x i32> @test_mul_v1i32(<1 x i32> %a, <1 x i32> %b) {
-;CHECK-LABEL: test_mul_v1i32:
-;CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-  %c = mul <1 x i32> %a, %b
-  ret <1 x i32> %c
-}
diff --git a/test/CodeGen/AArch64/neon-perm.ll b/test/CodeGen/AArch64/neon-perm.ll
index a0b17e1..4f8571d 100644
--- a/test/CodeGen/AArch64/neon-perm.ll
+++ b/test/CodeGen/AArch64/neon-perm.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK
 
 %struct.int8x8x2_t = type { [2 x <8 x i8>] }
 %struct.int16x4x2_t = type { [2 x <4 x i16>] }
@@ -20,7 +20,7 @@
 %struct.poly16x8x2_t = type { [2 x <8 x i16>] }
 
 define <8 x i8> @test_vuzp1_s8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vuzp1_s8:
+; CHECK-LABEL: test_vuzp1_s8:
 ; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
@@ -28,7 +28,7 @@ entry:
 }
 
 define <16 x i8> @test_vuzp1q_s8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vuzp1q_s8:
+; CHECK-LABEL: test_vuzp1q_s8:
 ; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
@@ -36,7 +36,7 @@ entry:
 }
 
 define <4 x i16> @test_vuzp1_s16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vuzp1_s16:
+; CHECK-LABEL: test_vuzp1_s16:
 ; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
@@ -44,7 +44,7 @@ entry:
 }
 
 define <8 x i16> @test_vuzp1q_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vuzp1q_s16:
+; CHECK-LABEL: test_vuzp1q_s16:
 ; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
@@ -52,15 +52,15 @@ entry:
 }
 
 define <2 x i32> @test_vuzp1_s32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK: test_vuzp1_s32:
-; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+; CHECK-LABEL: test_vuzp1_s32:
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x i32> %shuffle.i
 }
 
 define <4 x i32> @test_vuzp1q_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vuzp1q_s32:
+; CHECK-LABEL: test_vuzp1q_s32:
 ; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
@@ -68,15 +68,15 @@ entry:
 }
 
 define <2 x i64> @test_vuzp1q_s64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vuzp1q_s64:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+; CHECK-LABEL: test_vuzp1q_s64:
+; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x i64> %shuffle.i
 }
 
 define <8 x i8> @test_vuzp1_u8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vuzp1_u8:
+; CHECK-LABEL: test_vuzp1_u8:
 ; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
@@ -84,7 +84,7 @@ entry:
 }
 
 define <16 x i8> @test_vuzp1q_u8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vuzp1q_u8:
+; CHECK-LABEL: test_vuzp1q_u8:
 ; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
@@ -92,7 +92,7 @@ entry:
 }
 
 define <4 x i16> @test_vuzp1_u16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vuzp1_u16:
+; CHECK-LABEL: test_vuzp1_u16:
 ; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
@@ -100,7 +100,7 @@ entry:
 }
 
 define <8 x i16> @test_vuzp1q_u16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vuzp1q_u16:
+; CHECK-LABEL: test_vuzp1q_u16:
 ; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
@@ -108,15 +108,15 @@ entry:
 }
 
 define <2 x i32> @test_vuzp1_u32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK: test_vuzp1_u32:
-; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+; CHECK-LABEL: test_vuzp1_u32:
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x i32> %shuffle.i
 }
 
 define <4 x i32> @test_vuzp1q_u32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vuzp1q_u32:
+; CHECK-LABEL: test_vuzp1q_u32:
 ; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
@@ -124,23 +124,23 @@ entry:
 }
 
 define <2 x i64> @test_vuzp1q_u64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vuzp1q_u64:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+; CHECK-LABEL: test_vuzp1q_u64:
+; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x i64> %shuffle.i
 }
 
 define <2 x float> @test_vuzp1_f32(<2 x float> %a, <2 x float> %b) {
-; CHECK: test_vuzp1_f32:
-; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+; CHECK-LABEL: test_vuzp1_f32:
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x float> %shuffle.i
 }
 
 define <4 x float> @test_vuzp1q_f32(<4 x float> %a, <4 x float> %b) {
-; CHECK: test_vuzp1q_f32:
+; CHECK-LABEL: test_vuzp1q_f32:
 ; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
@@ -148,15 +148,15 @@ entry:
 }
 
 define <2 x double> @test_vuzp1q_f64(<2 x double> %a, <2 x double> %b) {
-; CHECK: test_vuzp1q_f64:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+; CHECK-LABEL: test_vuzp1q_f64:
+; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x double> %shuffle.i
 }
 
 define <8 x i8> @test_vuzp1_p8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vuzp1_p8:
+; CHECK-LABEL: test_vuzp1_p8:
 ; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
@@ -164,7 +164,7 @@ entry:
 }
 
 define <16 x i8> @test_vuzp1q_p8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vuzp1q_p8:
+; CHECK-LABEL: test_vuzp1q_p8:
 ; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
@@ -172,7 +172,7 @@ entry:
 }
 
 define <4 x i16> @test_vuzp1_p16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vuzp1_p16:
+; CHECK-LABEL: test_vuzp1_p16:
 ; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
@@ -180,7 +180,7 @@ entry:
 }
 
 define <8 x i16> @test_vuzp1q_p16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vuzp1q_p16:
+; CHECK-LABEL: test_vuzp1q_p16:
 ; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
@@ -188,7 +188,7 @@ entry:
 }
 
 define <8 x i8> @test_vuzp2_s8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vuzp2_s8:
+; CHECK-LABEL: test_vuzp2_s8:
 ; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
@@ -196,7 +196,7 @@ entry:
 }
 
 define <16 x i8> @test_vuzp2q_s8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vuzp2q_s8:
+; CHECK-LABEL: test_vuzp2q_s8:
 ; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
@@ -204,7 +204,7 @@ entry:
 }
 
 define <4 x i16> @test_vuzp2_s16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vuzp2_s16:
+; CHECK-LABEL: test_vuzp2_s16:
 ; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
@@ -212,7 +212,7 @@ entry:
 }
 
 define <8 x i16> @test_vuzp2q_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vuzp2q_s16:
+; CHECK-LABEL: test_vuzp2q_s16:
 ; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
@@ -220,15 +220,15 @@ entry:
 }
 
 define <2 x i32> @test_vuzp2_s32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK: test_vuzp2_s32:
-; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+; CHECK-LABEL: test_vuzp2_s32:
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x i32> %shuffle.i
 }
 
 define <4 x i32> @test_vuzp2q_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vuzp2q_s32:
+; CHECK-LABEL: test_vuzp2q_s32:
 ; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
@@ -236,16 +236,15 @@ entry:
 }
 
 define <2 x i64> @test_vuzp2q_s64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vuzp2q_s64:
-; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
-; CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: test_vuzp2q_s64:
+; CHECK: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x i64> %shuffle.i
 }
 
 define <8 x i8> @test_vuzp2_u8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vuzp2_u8:
+; CHECK-LABEL: test_vuzp2_u8:
 ; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
@@ -253,7 +252,7 @@ entry:
 }
 
 define <16 x i8> @test_vuzp2q_u8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vuzp2q_u8:
+; CHECK-LABEL: test_vuzp2q_u8:
 ; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
@@ -261,7 +260,7 @@ entry:
 }
 
 define <4 x i16> @test_vuzp2_u16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vuzp2_u16:
+; CHECK-LABEL: test_vuzp2_u16:
 ; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
@@ -269,7 +268,7 @@ entry:
 }
 
 define <8 x i16> @test_vuzp2q_u16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vuzp2q_u16:
+; CHECK-LABEL: test_vuzp2q_u16:
 ; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
@@ -277,15 +276,15 @@ entry:
 }
 
 define <2 x i32> @test_vuzp2_u32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK: test_vuzp2_u32:
-; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+; CHECK-LABEL: test_vuzp2_u32:
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x i32> %shuffle.i
 }
 
 define <4 x i32> @test_vuzp2q_u32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vuzp2q_u32:
+; CHECK-LABEL: test_vuzp2q_u32:
 ; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
@@ -293,24 +292,23 @@ entry:
 }
 
 define <2 x i64> @test_vuzp2q_u64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vuzp2q_u64:
-; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
-; CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: test_vuzp2q_u64:
+; CHECK: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x i64> %shuffle.i
 }
 
 define <2 x float> @test_vuzp2_f32(<2 x float> %a, <2 x float> %b) {
-; CHECK: test_vuzp2_f32:
-; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+; CHECK-LABEL: test_vuzp2_f32:
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x float> %shuffle.i
 }
 
 define <4 x float> @test_vuzp2q_f32(<4 x float> %a, <4 x float> %b) {
-; CHECK: test_vuzp2q_f32:
+; CHECK-LABEL: test_vuzp2q_f32:
 ; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
@@ -318,16 +316,15 @@ entry:
 }
 
 define <2 x double> @test_vuzp2q_f64(<2 x double> %a, <2 x double> %b) {
-; CHECK: test_vuzp2q_f64:
-; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
-; CHECK-NEXT: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+; CHECK-LABEL: test_vuzp2q_f64:
+; CHECK: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x double> %shuffle.i
 }
 
 define <8 x i8> @test_vuzp2_p8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vuzp2_p8:
+; CHECK-LABEL: test_vuzp2_p8:
 ; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
@@ -335,7 +332,7 @@ entry:
 }
 
 define <16 x i8> @test_vuzp2q_p8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vuzp2q_p8:
+; CHECK-LABEL: test_vuzp2q_p8:
 ; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
@@ -343,7 +340,7 @@ entry:
 }
 
 define <4 x i16> @test_vuzp2_p16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vuzp2_p16:
+; CHECK-LABEL: test_vuzp2_p16:
 ; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
@@ -351,7 +348,7 @@ entry:
 }
 
 define <8 x i16> @test_vuzp2q_p16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vuzp2q_p16:
+; CHECK-LABEL: test_vuzp2q_p16:
 ; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
@@ -359,7 +356,7 @@ entry:
 }
 
 define <8 x i8> @test_vzip1_s8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vzip1_s8:
+; CHECK-LABEL: test_vzip1_s8:
 ; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
@@ -367,7 +364,7 @@ entry:
 }
 
 define <16 x i8> @test_vzip1q_s8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vzip1q_s8:
+; CHECK-LABEL: test_vzip1q_s8:
 ; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
@@ -375,7 +372,7 @@ entry:
 }
 
 define <4 x i16> @test_vzip1_s16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vzip1_s16:
+; CHECK-LABEL: test_vzip1_s16:
 ; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
@@ -383,7 +380,7 @@ entry:
 }
 
 define <8 x i16> @test_vzip1q_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vzip1q_s16:
+; CHECK-LABEL: test_vzip1q_s16:
 ; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
@@ -391,15 +388,15 @@ entry:
 }
 
 define <2 x i32> @test_vzip1_s32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK: test_vzip1_s32:
-; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+; CHECK-LABEL: test_vzip1_s32:
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x i32> %shuffle.i
 }
 
 define <4 x i32> @test_vzip1q_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vzip1q_s32:
+; CHECK-LABEL: test_vzip1q_s32:
 ; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
@@ -407,15 +404,15 @@ entry:
 }
 
 define <2 x i64> @test_vzip1q_s64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vzip1q_s64:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+; CHECK-LABEL: test_vzip1q_s64:
+; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x i64> %shuffle.i
 }
 
 define <8 x i8> @test_vzip1_u8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vzip1_u8:
+; CHECK-LABEL: test_vzip1_u8:
 ; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
@@ -423,7 +420,7 @@ entry:
 }
 
 define <16 x i8> @test_vzip1q_u8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vzip1q_u8:
+; CHECK-LABEL: test_vzip1q_u8:
 ; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
@@ -431,7 +428,7 @@ entry:
 }
 
 define <4 x i16> @test_vzip1_u16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vzip1_u16:
+; CHECK-LABEL: test_vzip1_u16:
 ; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
@@ -439,7 +436,7 @@ entry:
 }
 
 define <8 x i16> @test_vzip1q_u16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vzip1q_u16:
+; CHECK-LABEL: test_vzip1q_u16:
 ; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
@@ -447,15 +444,15 @@ entry:
 }
 
 define <2 x i32> @test_vzip1_u32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK: test_vzip1_u32:
-; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+; CHECK-LABEL: test_vzip1_u32:
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x i32> %shuffle.i
 }
 
 define <4 x i32> @test_vzip1q_u32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vzip1q_u32:
+; CHECK-LABEL: test_vzip1q_u32:
 ; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
@@ -463,23 +460,23 @@ entry:
 }
 
 define <2 x i64> @test_vzip1q_u64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vzip1q_u64:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+; CHECK-LABEL: test_vzip1q_u64:
+; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x i64> %shuffle.i
 }
 
 define <2 x float> @test_vzip1_f32(<2 x float> %a, <2 x float> %b) {
-; CHECK: test_vzip1_f32:
-; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+; CHECK-LABEL: test_vzip1_f32:
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x float> %shuffle.i
 }
 
 define <4 x float> @test_vzip1q_f32(<4 x float> %a, <4 x float> %b) {
-; CHECK: test_vzip1q_f32:
+; CHECK-LABEL: test_vzip1q_f32:
 ; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
@@ -487,15 +484,15 @@ entry:
 }
 
 define <2 x double> @test_vzip1q_f64(<2 x double> %a, <2 x double> %b) {
-; CHECK: test_vzip1q_f64:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+; CHECK-LABEL: test_vzip1q_f64:
+; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x double> %shuffle.i
 }
 
 define <8 x i8> @test_vzip1_p8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vzip1_p8:
+; CHECK-LABEL: test_vzip1_p8:
 ; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
@@ -503,7 +500,7 @@ entry:
 }
 
 define <16 x i8> @test_vzip1q_p8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vzip1q_p8:
+; CHECK-LABEL: test_vzip1q_p8:
 ; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
@@ -511,7 +508,7 @@ entry:
 }
 
 define <4 x i16> @test_vzip1_p16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vzip1_p16:
+; CHECK-LABEL: test_vzip1_p16:
 ; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
@@ -519,7 +516,7 @@ entry:
 }
 
 define <8 x i16> @test_vzip1q_p16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vzip1q_p16:
+; CHECK-LABEL: test_vzip1q_p16:
 ; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
@@ -527,7 +524,7 @@ entry:
 }
 
 define <8 x i8> @test_vzip2_s8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vzip2_s8:
+; CHECK-LABEL: test_vzip2_s8:
 ; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
@@ -535,7 +532,7 @@ entry:
 }
 
 define <16 x i8> @test_vzip2q_s8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vzip2q_s8:
+; CHECK-LABEL: test_vzip2q_s8:
 ; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
@@ -543,7 +540,7 @@ entry:
 }
 
 define <4 x i16> @test_vzip2_s16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vzip2_s16:
+; CHECK-LABEL: test_vzip2_s16:
 ; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
@@ -551,7 +548,7 @@ entry:
 }
 
 define <8 x i16> @test_vzip2q_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vzip2q_s16:
+; CHECK-LABEL: test_vzip2q_s16:
 ; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
@@ -559,15 +556,15 @@ entry:
 }
 
 define <2 x i32> @test_vzip2_s32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK: test_vzip2_s32:
-; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+; CHECK-LABEL: test_vzip2_s32:
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x i32> %shuffle.i
 }
 
 define <4 x i32> @test_vzip2q_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vzip2q_s32:
+; CHECK-LABEL: test_vzip2q_s32:
 ; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
@@ -575,15 +572,15 @@ entry:
 }
 
 define <2 x i64> @test_vzip2q_s64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vzip2q_s64:
-; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+; CHECK-LABEL: test_vzip2q_s64:
+; CHECK: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x i64> %shuffle.i
 }
 
 define <8 x i8> @test_vzip2_u8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vzip2_u8:
+; CHECK-LABEL: test_vzip2_u8:
 ; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
@@ -591,7 +588,7 @@ entry:
 }
 
 define <16 x i8> @test_vzip2q_u8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vzip2q_u8:
+; CHECK-LABEL: test_vzip2q_u8:
 ; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
@@ -599,7 +596,7 @@ entry:
 }
 
 define <4 x i16> @test_vzip2_u16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vzip2_u16:
+; CHECK-LABEL: test_vzip2_u16:
 ; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
@@ -607,7 +604,7 @@ entry:
 }
 
 define <8 x i16> @test_vzip2q_u16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vzip2q_u16:
+; CHECK-LABEL: test_vzip2q_u16:
 ; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
@@ -615,15 +612,15 @@ entry:
 }
 
 define <2 x i32> @test_vzip2_u32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK: test_vzip2_u32:
-; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+; CHECK-LABEL: test_vzip2_u32:
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x i32> %shuffle.i
 }
 
 define <4 x i32> @test_vzip2q_u32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vzip2q_u32:
+; CHECK-LABEL: test_vzip2q_u32:
 ; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
@@ -631,23 +628,23 @@ entry:
 }
 
 define <2 x i64> @test_vzip2q_u64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vzip2q_u64:
-; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+; CHECK-LABEL: test_vzip2q_u64:
+; CHECK: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x i64> %shuffle.i
 }
 
 define <2 x float> @test_vzip2_f32(<2 x float> %a, <2 x float> %b) {
-; CHECK: test_vzip2_f32:
-; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+; CHECK-LABEL: test_vzip2_f32:
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x float> %shuffle.i
 }
 
 define <4 x float> @test_vzip2q_f32(<4 x float> %a, <4 x float> %b) {
-; CHECK: test_vzip2q_f32:
+; CHECK-LABEL: test_vzip2q_f32:
 ; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
@@ -655,15 +652,15 @@ entry:
 }
 
 define <2 x double> @test_vzip2q_f64(<2 x double> %a, <2 x double> %b) {
-; CHECK: test_vzip2q_f64:
-; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+; CHECK-LABEL: test_vzip2q_f64:
+; CHECK: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x double> %shuffle.i
 }
 
 define <8 x i8> @test_vzip2_p8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vzip2_p8:
+; CHECK-LABEL: test_vzip2_p8:
 ; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
@@ -671,7 +668,7 @@ entry:
 }
 
 define <16 x i8> @test_vzip2q_p8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vzip2q_p8:
+; CHECK-LABEL: test_vzip2q_p8:
 ; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
@@ -679,7 +676,7 @@ entry:
 }
 
 define <4 x i16> @test_vzip2_p16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vzip2_p16:
+; CHECK-LABEL: test_vzip2_p16:
 ; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
@@ -687,7 +684,7 @@ entry:
 }
 
 define <8 x i16> @test_vzip2q_p16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vzip2q_p16:
+; CHECK-LABEL: test_vzip2q_p16:
 ; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
@@ -695,7 +692,7 @@ entry:
 }
 
 define <8 x i8> @test_vtrn1_s8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vtrn1_s8:
+; CHECK-LABEL: test_vtrn1_s8:
 ; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
@@ -703,7 +700,7 @@ entry:
 }
 
 define <16 x i8> @test_vtrn1q_s8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vtrn1q_s8:
+; CHECK-LABEL: test_vtrn1q_s8:
 ; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
@@ -711,7 +708,7 @@ entry:
 }
 
 define <4 x i16> @test_vtrn1_s16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vtrn1_s16:
+; CHECK-LABEL: test_vtrn1_s16:
 ; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
@@ -719,7 +716,7 @@ entry:
 }
 
 define <8 x i16> @test_vtrn1q_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vtrn1q_s16:
+; CHECK-LABEL: test_vtrn1q_s16:
 ; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
@@ -727,15 +724,15 @@ entry:
 }
 
 define <2 x i32> @test_vtrn1_s32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK: test_vtrn1_s32:
-; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+; CHECK-LABEL: test_vtrn1_s32:
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x i32> %shuffle.i
 }
 
 define <4 x i32> @test_vtrn1q_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vtrn1q_s32:
+; CHECK-LABEL: test_vtrn1q_s32:
 ; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
@@ -743,15 +740,15 @@ entry:
 }
 
 define <2 x i64> @test_vtrn1q_s64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vtrn1q_s64:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+; CHECK-LABEL: test_vtrn1q_s64:
+; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x i64> %shuffle.i
 }
 
 define <8 x i8> @test_vtrn1_u8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vtrn1_u8:
+; CHECK-LABEL: test_vtrn1_u8:
 ; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
@@ -759,7 +756,7 @@ entry:
 }
 
 define <16 x i8> @test_vtrn1q_u8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vtrn1q_u8:
+; CHECK-LABEL: test_vtrn1q_u8:
 ; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
@@ -767,7 +764,7 @@ entry:
 }
 
 define <4 x i16> @test_vtrn1_u16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vtrn1_u16:
+; CHECK-LABEL: test_vtrn1_u16:
 ; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
@@ -775,7 +772,7 @@ entry:
 }
 
 define <8 x i16> @test_vtrn1q_u16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vtrn1q_u16:
+; CHECK-LABEL: test_vtrn1q_u16:
 ; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
@@ -783,15 +780,15 @@ entry:
 }
 
 define <2 x i32> @test_vtrn1_u32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK: test_vtrn1_u32:
-; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+; CHECK-LABEL: test_vtrn1_u32:
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x i32> %shuffle.i
 }
 
 define <4 x i32> @test_vtrn1q_u32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vtrn1q_u32:
+; CHECK-LABEL: test_vtrn1q_u32:
 ; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
@@ -799,23 +796,23 @@ entry:
 }
 
 define <2 x i64> @test_vtrn1q_u64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vtrn1q_u64:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+; CHECK-LABEL: test_vtrn1q_u64:
+; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x i64> %shuffle.i
 }
 
 define <2 x float> @test_vtrn1_f32(<2 x float> %a, <2 x float> %b) {
-; CHECK: test_vtrn1_f32:
-; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
+; CHECK-LABEL: test_vtrn1_f32:
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x float> %shuffle.i
 }
 
 define <4 x float> @test_vtrn1q_f32(<4 x float> %a, <4 x float> %b) {
-; CHECK: test_vtrn1q_f32:
+; CHECK-LABEL: test_vtrn1q_f32:
 ; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
@@ -823,15 +820,15 @@ entry:
 }
 
 define <2 x double> @test_vtrn1q_f64(<2 x double> %a, <2 x double> %b) {
-; CHECK: test_vtrn1q_f64:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+; CHECK-LABEL: test_vtrn1q_f64:
+; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x double> %shuffle.i
 }
 
 define <8 x i8> @test_vtrn1_p8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vtrn1_p8:
+; CHECK-LABEL: test_vtrn1_p8:
 ; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
@@ -839,7 +836,7 @@ entry:
 }
 
 define <16 x i8> @test_vtrn1q_p8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vtrn1q_p8:
+; CHECK-LABEL: test_vtrn1q_p8:
 ; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
@@ -847,7 +844,7 @@ entry:
 }
 
 define <4 x i16> @test_vtrn1_p16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vtrn1_p16:
+; CHECK-LABEL: test_vtrn1_p16:
 ; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
@@ -855,7 +852,7 @@ entry:
 }
 
 define <8 x i16> @test_vtrn1q_p16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vtrn1q_p16:
+; CHECK-LABEL: test_vtrn1q_p16:
 ; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
@@ -863,7 +860,7 @@ entry:
 }
 
 define <8 x i8> @test_vtrn2_s8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vtrn2_s8:
+; CHECK-LABEL: test_vtrn2_s8:
 ; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
@@ -871,7 +868,7 @@ entry:
 }
 
 define <16 x i8> @test_vtrn2q_s8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vtrn2q_s8:
+; CHECK-LABEL: test_vtrn2q_s8:
 ; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
@@ -879,7 +876,7 @@ entry:
 }
 
 define <4 x i16> @test_vtrn2_s16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vtrn2_s16:
+; CHECK-LABEL: test_vtrn2_s16:
 ; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
@@ -887,7 +884,7 @@ entry:
 }
 
 define <8 x i16> @test_vtrn2q_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vtrn2q_s16:
+; CHECK-LABEL: test_vtrn2q_s16:
 ; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
@@ -895,15 +892,15 @@ entry:
 }
 
 define <2 x i32> @test_vtrn2_s32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK: test_vtrn2_s32:
-; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+; CHECK-LABEL: test_vtrn2_s32:
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x i32> %shuffle.i
 }
 
 define <4 x i32> @test_vtrn2q_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vtrn2q_s32:
+; CHECK-LABEL: test_vtrn2q_s32:
 ; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
@@ -911,15 +908,15 @@ entry:
 }
 
 define <2 x i64> @test_vtrn2q_s64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vtrn2q_s64:
-; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+; CHECK-LABEL: test_vtrn2q_s64:
+; CHECK: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x i64> %shuffle.i
 }
 
 define <8 x i8> @test_vtrn2_u8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vtrn2_u8:
+; CHECK-LABEL: test_vtrn2_u8:
 ; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
@@ -927,7 +924,7 @@ entry:
 }
 
 define <16 x i8> @test_vtrn2q_u8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vtrn2q_u8:
+; CHECK-LABEL: test_vtrn2q_u8:
 ; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
@@ -935,7 +932,7 @@ entry:
 }
 
 define <4 x i16> @test_vtrn2_u16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vtrn2_u16:
+; CHECK-LABEL: test_vtrn2_u16:
 ; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
@@ -943,7 +940,7 @@ entry:
 }
 
 define <8 x i16> @test_vtrn2q_u16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vtrn2q_u16:
+; CHECK-LABEL: test_vtrn2q_u16:
 ; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
@@ -951,15 +948,15 @@ entry:
 }
 
 define <2 x i32> @test_vtrn2_u32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK: test_vtrn2_u32:
-; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+; CHECK-LABEL: test_vtrn2_u32:
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x i32> %shuffle.i
 }
 
 define <4 x i32> @test_vtrn2q_u32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vtrn2q_u32:
+; CHECK-LABEL: test_vtrn2q_u32:
 ; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
@@ -967,23 +964,23 @@ entry:
 }
 
 define <2 x i64> @test_vtrn2q_u64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vtrn2q_u64:
-; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+; CHECK-LABEL: test_vtrn2q_u64:
+; CHECK: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x i64> %shuffle.i
 }
 
 define <2 x float> @test_vtrn2_f32(<2 x float> %a, <2 x float> %b) {
-; CHECK: test_vtrn2_f32:
-; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+; CHECK-LABEL: test_vtrn2_f32:
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x float> %shuffle.i
 }
 
 define <4 x float> @test_vtrn2q_f32(<4 x float> %a, <4 x float> %b) {
-; CHECK: test_vtrn2q_f32:
+; CHECK-LABEL: test_vtrn2q_f32:
 ; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
@@ -991,15 +988,15 @@ entry:
 }
 
 define <2 x double> @test_vtrn2q_f64(<2 x double> %a, <2 x double> %b) {
-; CHECK: test_vtrn2q_f64:
-; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+; CHECK-LABEL: test_vtrn2q_f64:
+; CHECK: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 entry:
   %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x double> %shuffle.i
 }
 
 define <8 x i8> @test_vtrn2_p8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vtrn2_p8:
+; CHECK-LABEL: test_vtrn2_p8:
 ; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
@@ -1007,7 +1004,7 @@ entry:
 }
 
 define <16 x i8> @test_vtrn2q_p8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vtrn2q_p8:
+; CHECK-LABEL: test_vtrn2q_p8:
 ; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
@@ -1015,7 +1012,7 @@ entry:
 }
 
 define <4 x i16> @test_vtrn2_p16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vtrn2_p16:
+; CHECK-LABEL: test_vtrn2_p16:
 ; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
@@ -1023,7 +1020,7 @@ entry:
 }
 
 define <8 x i16> @test_vtrn2q_p16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vtrn2q_p16:
+; CHECK-LABEL: test_vtrn2q_p16:
 ; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
@@ -1031,7 +1028,7 @@ entry:
 }
 
 define <8 x i8> @test_same_vuzp1_s8(<8 x i8> %a) {
-; CHECK: test_same_vuzp1_s8:
+; CHECK-LABEL: test_same_vuzp1_s8:
 ; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
@@ -1039,7 +1036,7 @@ entry:
 }
 
 define <16 x i8> @test_same_vuzp1q_s8(<16 x i8> %a) {
-; CHECK: test_same_vuzp1q_s8:
+; CHECK-LABEL: test_same_vuzp1q_s8:
 ; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
@@ -1047,7 +1044,7 @@ entry:
 }
 
 define <4 x i16> @test_same_vuzp1_s16(<4 x i16> %a) {
-; CHECK: test_same_vuzp1_s16:
+; CHECK-LABEL: test_same_vuzp1_s16:
 ; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
@@ -1055,7 +1052,7 @@ entry:
 }
 
 define <8 x i16> @test_same_vuzp1q_s16(<8 x i16> %a) {
-; CHECK: test_same_vuzp1q_s16:
+; CHECK-LABEL: test_same_vuzp1q_s16:
 ; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
@@ -1063,7 +1060,7 @@ entry:
 }
 
 define <4 x i32> @test_same_vuzp1q_s32(<4 x i32> %a) {
-; CHECK: test_same_vuzp1q_s32:
+; CHECK-LABEL: test_same_vuzp1q_s32:
 ; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
@@ -1071,7 +1068,7 @@ entry:
 }
 
 define <8 x i8> @test_same_vuzp1_u8(<8 x i8> %a) {
-; CHECK: test_same_vuzp1_u8:
+; CHECK-LABEL: test_same_vuzp1_u8:
 ; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
@@ -1079,7 +1076,7 @@ entry:
 }
 
 define <16 x i8> @test_same_vuzp1q_u8(<16 x i8> %a) {
-; CHECK: test_same_vuzp1q_u8:
+; CHECK-LABEL: test_same_vuzp1q_u8:
 ; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
@@ -1087,7 +1084,7 @@ entry:
 }
 
 define <4 x i16> @test_same_vuzp1_u16(<4 x i16> %a) {
-; CHECK: test_same_vuzp1_u16:
+; CHECK-LABEL: test_same_vuzp1_u16:
 ; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
@@ -1095,7 +1092,7 @@ entry:
 }
 
 define <8 x i16> @test_same_vuzp1q_u16(<8 x i16> %a) {
-; CHECK: test_same_vuzp1q_u16:
+; CHECK-LABEL: test_same_vuzp1q_u16:
 ; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
@@ -1103,7 +1100,7 @@ entry:
 }
 
 define <4 x i32> @test_same_vuzp1q_u32(<4 x i32> %a) {
-; CHECK: test_same_vuzp1q_u32:
+; CHECK-LABEL: test_same_vuzp1q_u32:
 ; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
@@ -1111,7 +1108,7 @@ entry:
 }
 
 define <4 x float> @test_same_vuzp1q_f32(<4 x float> %a) {
-; CHECK: test_same_vuzp1q_f32:
+; CHECK-LABEL: test_same_vuzp1q_f32:
 ; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
@@ -1119,7 +1116,7 @@ entry:
 }
 
 define <8 x i8> @test_same_vuzp1_p8(<8 x i8> %a) {
-; CHECK: test_same_vuzp1_p8:
+; CHECK-LABEL: test_same_vuzp1_p8:
 ; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
@@ -1127,7 +1124,7 @@ entry:
 }
 
 define <16 x i8> @test_same_vuzp1q_p8(<16 x i8> %a) {
-; CHECK: test_same_vuzp1q_p8:
+; CHECK-LABEL: test_same_vuzp1q_p8:
 ; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
@@ -1135,7 +1132,7 @@ entry:
 }
 
 define <4 x i16> @test_same_vuzp1_p16(<4 x i16> %a) {
-; CHECK: test_same_vuzp1_p16:
+; CHECK-LABEL: test_same_vuzp1_p16:
 ; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
@@ -1143,7 +1140,7 @@ entry:
 }
 
 define <8 x i16> @test_same_vuzp1q_p16(<8 x i16> %a) {
-; CHECK: test_same_vuzp1q_p16:
+; CHECK-LABEL: test_same_vuzp1q_p16:
 ; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
@@ -1151,7 +1148,7 @@ entry:
 }
 
 define <8 x i8> @test_same_vuzp2_s8(<8 x i8> %a) {
-; CHECK: test_same_vuzp2_s8:
+; CHECK-LABEL: test_same_vuzp2_s8:
 ; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
@@ -1159,7 +1156,7 @@ entry:
 }
 
 define <16 x i8> @test_same_vuzp2q_s8(<16 x i8> %a) {
-; CHECK: test_same_vuzp2q_s8:
+; CHECK-LABEL: test_same_vuzp2q_s8:
 ; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
@@ -1167,7 +1164,7 @@ entry:
 }
 
 define <4 x i16> @test_same_vuzp2_s16(<4 x i16> %a) {
-; CHECK: test_same_vuzp2_s16:
+; CHECK-LABEL: test_same_vuzp2_s16:
 ; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
@@ -1175,7 +1172,7 @@ entry:
 }
 
 define <8 x i16> @test_same_vuzp2q_s16(<8 x i16> %a) {
-; CHECK: test_same_vuzp2q_s16:
+; CHECK-LABEL: test_same_vuzp2q_s16:
 ; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
@@ -1183,7 +1180,7 @@ entry:
 }
 
 define <4 x i32> @test_same_vuzp2q_s32(<4 x i32> %a) {
-; CHECK: test_same_vuzp2q_s32:
+; CHECK-LABEL: test_same_vuzp2q_s32:
 ; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
@@ -1191,7 +1188,7 @@ entry:
 }
 
 define <8 x i8> @test_same_vuzp2_u8(<8 x i8> %a) {
-; CHECK: test_same_vuzp2_u8:
+; CHECK-LABEL: test_same_vuzp2_u8:
 ; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
@@ -1199,7 +1196,7 @@ entry:
 }
 
 define <16 x i8> @test_same_vuzp2q_u8(<16 x i8> %a) {
-; CHECK: test_same_vuzp2q_u8:
+; CHECK-LABEL: test_same_vuzp2q_u8:
 ; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
@@ -1207,7 +1204,7 @@ entry:
 }
 
 define <4 x i16> @test_same_vuzp2_u16(<4 x i16> %a) {
-; CHECK: test_same_vuzp2_u16:
+; CHECK-LABEL: test_same_vuzp2_u16:
 ; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
@@ -1215,7 +1212,7 @@ entry:
 }
 
 define <8 x i16> @test_same_vuzp2q_u16(<8 x i16> %a) {
-; CHECK: test_same_vuzp2q_u16:
+; CHECK-LABEL: test_same_vuzp2q_u16:
 ; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
@@ -1223,7 +1220,7 @@ entry:
 }
 
 define <4 x i32> @test_same_vuzp2q_u32(<4 x i32> %a) {
-; CHECK: test_same_vuzp2q_u32:
+; CHECK-LABEL: test_same_vuzp2q_u32:
 ; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
@@ -1231,7 +1228,7 @@ entry:
 }
 
 define <4 x float> @test_same_vuzp2q_f32(<4 x float> %a) {
-; CHECK: test_same_vuzp2q_f32:
+; CHECK-LABEL: test_same_vuzp2q_f32:
 ; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
@@ -1239,7 +1236,7 @@ entry:
 }
 
 define <8 x i8> @test_same_vuzp2_p8(<8 x i8> %a) {
-; CHECK: test_same_vuzp2_p8:
+; CHECK-LABEL: test_same_vuzp2_p8:
 ; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
@@ -1247,7 +1244,7 @@ entry:
 }
 
 define <16 x i8> @test_same_vuzp2q_p8(<16 x i8> %a) {
-; CHECK: test_same_vuzp2q_p8:
+; CHECK-LABEL: test_same_vuzp2q_p8:
 ; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
@@ -1255,7 +1252,7 @@ entry:
 }
 
 define <4 x i16> @test_same_vuzp2_p16(<4 x i16> %a) {
-; CHECK: test_same_vuzp2_p16:
+; CHECK-LABEL: test_same_vuzp2_p16:
 ; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
@@ -1263,7 +1260,7 @@ entry:
 }
 
 define <8 x i16> @test_same_vuzp2q_p16(<8 x i16> %a) {
-; CHECK: test_same_vuzp2q_p16:
+; CHECK-LABEL: test_same_vuzp2q_p16:
 ; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
@@ -1271,7 +1268,7 @@ entry:
 }
 
 define <8 x i8> @test_same_vzip1_s8(<8 x i8> %a) {
-; CHECK: test_same_vzip1_s8:
+; CHECK-LABEL: test_same_vzip1_s8:
 ; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
@@ -1279,7 +1276,7 @@ entry:
 }
 
 define <16 x i8> @test_same_vzip1q_s8(<16 x i8> %a) {
-; CHECK: test_same_vzip1q_s8:
+; CHECK-LABEL: test_same_vzip1q_s8:
 ; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
@@ -1287,7 +1284,7 @@ entry:
 }
 
 define <4 x i16> @test_same_vzip1_s16(<4 x i16> %a) {
-; CHECK: test_same_vzip1_s16:
+; CHECK-LABEL: test_same_vzip1_s16:
 ; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
@@ -1295,7 +1292,7 @@ entry:
 }
 
 define <8 x i16> @test_same_vzip1q_s16(<8 x i16> %a) {
-; CHECK: test_same_vzip1q_s16:
+; CHECK-LABEL: test_same_vzip1q_s16:
 ; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
@@ -1303,7 +1300,7 @@ entry:
 }
 
 define <4 x i32> @test_same_vzip1q_s32(<4 x i32> %a) {
-; CHECK: test_same_vzip1q_s32:
+; CHECK-LABEL: test_same_vzip1q_s32:
 ; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
@@ -1311,7 +1308,7 @@ entry:
 }
 
 define <8 x i8> @test_same_vzip1_u8(<8 x i8> %a) {
-; CHECK: test_same_vzip1_u8:
+; CHECK-LABEL: test_same_vzip1_u8:
 ; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
@@ -1319,7 +1316,7 @@ entry:
 }
 
 define <16 x i8> @test_same_vzip1q_u8(<16 x i8> %a) {
-; CHECK: test_same_vzip1q_u8:
+; CHECK-LABEL: test_same_vzip1q_u8:
 ; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
@@ -1327,7 +1324,7 @@ entry:
 }
 
 define <4 x i16> @test_same_vzip1_u16(<4 x i16> %a) {
-; CHECK: test_same_vzip1_u16:
+; CHECK-LABEL: test_same_vzip1_u16:
 ; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
@@ -1335,7 +1332,7 @@ entry:
 }
 
 define <8 x i16> @test_same_vzip1q_u16(<8 x i16> %a) {
-; CHECK: test_same_vzip1q_u16:
+; CHECK-LABEL: test_same_vzip1q_u16:
 ; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
@@ -1343,7 +1340,7 @@ entry:
 }
 
 define <4 x i32> @test_same_vzip1q_u32(<4 x i32> %a) {
-; CHECK: test_same_vzip1q_u32:
+; CHECK-LABEL: test_same_vzip1q_u32:
 ; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
@@ -1351,7 +1348,7 @@ entry:
 }
 
 define <4 x float> @test_same_vzip1q_f32(<4 x float> %a) {
-; CHECK: test_same_vzip1q_f32:
+; CHECK-LABEL: test_same_vzip1q_f32:
 ; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
@@ -1359,7 +1356,7 @@ entry:
 }
 
 define <8 x i8> @test_same_vzip1_p8(<8 x i8> %a) {
-; CHECK: test_same_vzip1_p8:
+; CHECK-LABEL: test_same_vzip1_p8:
 ; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
@@ -1367,7 +1364,7 @@ entry:
 }
 
 define <16 x i8> @test_same_vzip1q_p8(<16 x i8> %a) {
-; CHECK: test_same_vzip1q_p8:
+; CHECK-LABEL: test_same_vzip1q_p8:
 ; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
@@ -1375,7 +1372,7 @@ entry:
 }
 
 define <4 x i16> @test_same_vzip1_p16(<4 x i16> %a) {
-; CHECK: test_same_vzip1_p16:
+; CHECK-LABEL: test_same_vzip1_p16:
 ; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
@@ -1383,7 +1380,7 @@ entry:
 }
 
 define <8 x i16> @test_same_vzip1q_p16(<8 x i16> %a) {
-; CHECK: test_same_vzip1q_p16:
+; CHECK-LABEL: test_same_vzip1q_p16:
 ; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
@@ -1391,7 +1388,7 @@ entry:
 }
 
 define <8 x i8> @test_same_vzip2_s8(<8 x i8> %a) {
-; CHECK: test_same_vzip2_s8:
+; CHECK-LABEL: test_same_vzip2_s8:
 ; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
@@ -1399,7 +1396,7 @@ entry:
 }
 
 define <16 x i8> @test_same_vzip2q_s8(<16 x i8> %a) {
-; CHECK: test_same_vzip2q_s8:
+; CHECK-LABEL: test_same_vzip2q_s8:
 ; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
@@ -1407,7 +1404,7 @@ entry:
 }
 
 define <4 x i16> @test_same_vzip2_s16(<4 x i16> %a) {
-; CHECK: test_same_vzip2_s16:
+; CHECK-LABEL: test_same_vzip2_s16:
 ; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
@@ -1415,7 +1412,7 @@ entry:
 }
 
 define <8 x i16> @test_same_vzip2q_s16(<8 x i16> %a) {
-; CHECK: test_same_vzip2q_s16:
+; CHECK-LABEL: test_same_vzip2q_s16:
 ; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
@@ -1423,7 +1420,7 @@ entry:
 }
 
 define <4 x i32> @test_same_vzip2q_s32(<4 x i32> %a) {
-; CHECK: test_same_vzip2q_s32:
+; CHECK-LABEL: test_same_vzip2q_s32:
 ; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
@@ -1431,7 +1428,7 @@ entry:
 }
 
 define <8 x i8> @test_same_vzip2_u8(<8 x i8> %a) {
-; CHECK: test_same_vzip2_u8:
+; CHECK-LABEL: test_same_vzip2_u8:
 ; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
@@ -1439,7 +1436,7 @@ entry:
 }
 
 define <16 x i8> @test_same_vzip2q_u8(<16 x i8> %a) {
-; CHECK: test_same_vzip2q_u8:
+; CHECK-LABEL: test_same_vzip2q_u8:
 ; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
@@ -1447,7 +1444,7 @@ entry:
 }
 
 define <4 x i16> @test_same_vzip2_u16(<4 x i16> %a) {
-; CHECK: test_same_vzip2_u16:
+; CHECK-LABEL: test_same_vzip2_u16:
 ; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
@@ -1455,7 +1452,7 @@ entry:
 }
 
 define <8 x i16> @test_same_vzip2q_u16(<8 x i16> %a) {
-; CHECK: test_same_vzip2q_u16:
+; CHECK-LABEL: test_same_vzip2q_u16:
 ; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
@@ -1463,7 +1460,7 @@ entry:
 }
 
 define <4 x i32> @test_same_vzip2q_u32(<4 x i32> %a) {
-; CHECK: test_same_vzip2q_u32:
+; CHECK-LABEL: test_same_vzip2q_u32:
 ; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
@@ -1471,7 +1468,7 @@ entry:
 }
 
 define <4 x float> @test_same_vzip2q_f32(<4 x float> %a) {
-; CHECK: test_same_vzip2q_f32:
+; CHECK-LABEL: test_same_vzip2q_f32:
 ; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
@@ -1479,7 +1476,7 @@ entry:
 }
 
 define <8 x i8> @test_same_vzip2_p8(<8 x i8> %a) {
-; CHECK: test_same_vzip2_p8:
+; CHECK-LABEL: test_same_vzip2_p8:
 ; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
@@ -1487,7 +1484,7 @@ entry:
 }
 
 define <16 x i8> @test_same_vzip2q_p8(<16 x i8> %a) {
-; CHECK: test_same_vzip2q_p8:
+; CHECK-LABEL: test_same_vzip2q_p8:
 ; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
@@ -1495,7 +1492,7 @@ entry:
 }
 
 define <4 x i16> @test_same_vzip2_p16(<4 x i16> %a) {
-; CHECK: test_same_vzip2_p16:
+; CHECK-LABEL: test_same_vzip2_p16:
 ; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
@@ -1503,7 +1500,7 @@ entry:
 }
 
 define <8 x i16> @test_same_vzip2q_p16(<8 x i16> %a) {
-; CHECK: test_same_vzip2q_p16:
+; CHECK-LABEL: test_same_vzip2q_p16:
 ; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
@@ -1511,7 +1508,7 @@ entry:
 }
 
 define <8 x i8> @test_same_vtrn1_s8(<8 x i8> %a) {
-; CHECK: test_same_vtrn1_s8:
+; CHECK-LABEL: test_same_vtrn1_s8:
 ; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
@@ -1519,7 +1516,7 @@ entry:
 }
 
 define <16 x i8> @test_same_vtrn1q_s8(<16 x i8> %a) {
-; CHECK: test_same_vtrn1q_s8:
+; CHECK-LABEL: test_same_vtrn1q_s8:
 ; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
@@ -1527,7 +1524,7 @@ entry:
 }
 
 define <4 x i16> @test_same_vtrn1_s16(<4 x i16> %a) {
-; CHECK: test_same_vtrn1_s16:
+; CHECK-LABEL: test_same_vtrn1_s16:
 ; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
@@ -1535,7 +1532,7 @@ entry:
 }
 
 define <8 x i16> @test_same_vtrn1q_s16(<8 x i16> %a) {
-; CHECK: test_same_vtrn1q_s16:
+; CHECK-LABEL: test_same_vtrn1q_s16:
 ; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
@@ -1543,7 +1540,7 @@ entry:
 }
 
 define <4 x i32> @test_same_vtrn1q_s32(<4 x i32> %a) {
-; CHECK: test_same_vtrn1q_s32:
+; CHECK-LABEL: test_same_vtrn1q_s32:
 ; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
@@ -1551,7 +1548,7 @@ entry:
 }
 
 define <8 x i8> @test_same_vtrn1_u8(<8 x i8> %a) {
-; CHECK: test_same_vtrn1_u8:
+; CHECK-LABEL: test_same_vtrn1_u8:
 ; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
@@ -1559,7 +1556,7 @@ entry:
 }
 
 define <16 x i8> @test_same_vtrn1q_u8(<16 x i8> %a) {
-; CHECK: test_same_vtrn1q_u8:
+; CHECK-LABEL: test_same_vtrn1q_u8:
 ; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
@@ -1567,7 +1564,7 @@ entry:
 }
 
 define <4 x i16> @test_same_vtrn1_u16(<4 x i16> %a) {
-; CHECK: test_same_vtrn1_u16:
+; CHECK-LABEL: test_same_vtrn1_u16:
 ; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
@@ -1575,7 +1572,7 @@ entry:
 }
 
 define <8 x i16> @test_same_vtrn1q_u16(<8 x i16> %a) {
-; CHECK: test_same_vtrn1q_u16:
+; CHECK-LABEL: test_same_vtrn1q_u16:
 ; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
@@ -1583,7 +1580,7 @@ entry:
 }
 
 define <4 x i32> @test_same_vtrn1q_u32(<4 x i32> %a) {
-; CHECK: test_same_vtrn1q_u32:
+; CHECK-LABEL: test_same_vtrn1q_u32:
 ; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
@@ -1591,7 +1588,7 @@ entry:
 }
 
 define <4 x float> @test_same_vtrn1q_f32(<4 x float> %a) {
-; CHECK: test_same_vtrn1q_f32:
+; CHECK-LABEL: test_same_vtrn1q_f32:
 ; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
@@ -1599,7 +1596,7 @@ entry:
 }
 
 define <8 x i8> @test_same_vtrn1_p8(<8 x i8> %a) {
-; CHECK: test_same_vtrn1_p8:
+; CHECK-LABEL: test_same_vtrn1_p8:
 ; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
@@ -1607,7 +1604,7 @@ entry:
 }
 
 define <16 x i8> @test_same_vtrn1q_p8(<16 x i8> %a) {
-; CHECK: test_same_vtrn1q_p8:
+; CHECK-LABEL: test_same_vtrn1q_p8:
 ; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
@@ -1615,7 +1612,7 @@ entry:
 }
 
 define <4 x i16> @test_same_vtrn1_p16(<4 x i16> %a) {
-; CHECK: test_same_vtrn1_p16:
+; CHECK-LABEL: test_same_vtrn1_p16:
 ; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
@@ -1623,7 +1620,7 @@ entry:
 }
 
 define <8 x i16> @test_same_vtrn1q_p16(<8 x i16> %a) {
-; CHECK: test_same_vtrn1q_p16:
+; CHECK-LABEL: test_same_vtrn1q_p16:
 ; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
@@ -1631,7 +1628,7 @@ entry:
 }
 
 define <8 x i8> @test_same_vtrn2_s8(<8 x i8> %a) {
-; CHECK: test_same_vtrn2_s8:
+; CHECK-LABEL: test_same_vtrn2_s8:
 ; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
@@ -1639,7 +1636,7 @@ entry:
 }
 
 define <16 x i8> @test_same_vtrn2q_s8(<16 x i8> %a) {
-; CHECK: test_same_vtrn2q_s8:
+; CHECK-LABEL: test_same_vtrn2q_s8:
 ; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
@@ -1647,7 +1644,7 @@ entry:
 }
 
 define <4 x i16> @test_same_vtrn2_s16(<4 x i16> %a) {
-; CHECK: test_same_vtrn2_s16:
+; CHECK-LABEL: test_same_vtrn2_s16:
 ; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
@@ -1655,7 +1652,7 @@ entry:
 }
 
 define <8 x i16> @test_same_vtrn2q_s16(<8 x i16> %a) {
-; CHECK: test_same_vtrn2q_s16:
+; CHECK-LABEL: test_same_vtrn2q_s16:
 ; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
@@ -1663,7 +1660,7 @@ entry:
 }
 
 define <4 x i32> @test_same_vtrn2q_s32(<4 x i32> %a) {
-; CHECK: test_same_vtrn2q_s32:
+; CHECK-LABEL: test_same_vtrn2q_s32:
 ; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
@@ -1671,7 +1668,7 @@ entry:
 }
 
 define <8 x i8> @test_same_vtrn2_u8(<8 x i8> %a) {
-; CHECK: test_same_vtrn2_u8:
+; CHECK-LABEL: test_same_vtrn2_u8:
 ; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
@@ -1679,7 +1676,7 @@ entry:
 }
 
 define <16 x i8> @test_same_vtrn2q_u8(<16 x i8> %a) {
-; CHECK: test_same_vtrn2q_u8:
+; CHECK-LABEL: test_same_vtrn2q_u8:
 ; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
@@ -1687,7 +1684,7 @@ entry:
 }
 
 define <4 x i16> @test_same_vtrn2_u16(<4 x i16> %a) {
-; CHECK: test_same_vtrn2_u16:
+; CHECK-LABEL: test_same_vtrn2_u16:
 ; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
@@ -1695,7 +1692,7 @@ entry:
 }
 
 define <8 x i16> @test_same_vtrn2q_u16(<8 x i16> %a) {
-; CHECK: test_same_vtrn2q_u16:
+; CHECK-LABEL: test_same_vtrn2q_u16:
 ; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
@@ -1703,7 +1700,7 @@ entry:
 }
 
 define <4 x i32> @test_same_vtrn2q_u32(<4 x i32> %a) {
-; CHECK: test_same_vtrn2q_u32:
+; CHECK-LABEL: test_same_vtrn2q_u32:
 ; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
@@ -1711,7 +1708,7 @@ entry:
 }
 
 define <4 x float> @test_same_vtrn2q_f32(<4 x float> %a) {
-; CHECK: test_same_vtrn2q_f32:
+; CHECK-LABEL: test_same_vtrn2q_f32:
 ; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
@@ -1719,7 +1716,7 @@ entry:
 }
 
 define <8 x i8> @test_same_vtrn2_p8(<8 x i8> %a) {
-; CHECK: test_same_vtrn2_p8:
+; CHECK-LABEL: test_same_vtrn2_p8:
 ; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
@@ -1727,7 +1724,7 @@ entry:
 }
 
 define <16 x i8> @test_same_vtrn2q_p8(<16 x i8> %a) {
-; CHECK: test_same_vtrn2q_p8:
+; CHECK-LABEL: test_same_vtrn2q_p8:
 ; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
@@ -1735,7 +1732,7 @@ entry:
 }
 
 define <4 x i16> @test_same_vtrn2_p16(<4 x i16> %a) {
-; CHECK: test_same_vtrn2_p16:
+; CHECK-LABEL: test_same_vtrn2_p16:
 ; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
@@ -1743,7 +1740,7 @@ entry:
 }
 
 define <8 x i16> @test_same_vtrn2q_p16(<8 x i16> %a) {
-; CHECK: test_same_vtrn2q_p16:
+; CHECK-LABEL: test_same_vtrn2q_p16:
 ; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
@@ -1752,7 +1749,7 @@ entry:
 
 
 define <8 x i8> @test_undef_vuzp1_s8(<8 x i8> %a) {
-; CHECK: test_undef_vuzp1_s8:
+; CHECK-LABEL: test_undef_vuzp1_s8:
 ; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
@@ -1760,7 +1757,7 @@ entry:
 }
 
 define <16 x i8> @test_undef_vuzp1q_s8(<16 x i8> %a) {
-; CHECK: test_undef_vuzp1q_s8:
+; CHECK-LABEL: test_undef_vuzp1q_s8:
 ; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
@@ -1768,7 +1765,7 @@ entry:
 }
 
 define <4 x i16> @test_undef_vuzp1_s16(<4 x i16> %a) {
-; CHECK: test_undef_vuzp1_s16:
+; CHECK-LABEL: test_undef_vuzp1_s16:
 ; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
@@ -1776,7 +1773,7 @@ entry:
 }
 
 define <8 x i16> @test_undef_vuzp1q_s16(<8 x i16> %a) {
-; CHECK: test_undef_vuzp1q_s16:
+; CHECK-LABEL: test_undef_vuzp1q_s16:
 ; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
@@ -1784,7 +1781,7 @@ entry:
 }
 
 define <4 x i32> @test_undef_vuzp1q_s32(<4 x i32> %a) {
-; CHECK: test_undef_vuzp1q_s32:
+; CHECK-LABEL: test_undef_vuzp1q_s32:
 ; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
@@ -1792,7 +1789,7 @@ entry:
 }
 
 define <8 x i8> @test_undef_vuzp1_u8(<8 x i8> %a) {
-; CHECK: test_undef_vuzp1_u8:
+; CHECK-LABEL: test_undef_vuzp1_u8:
 ; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
@@ -1800,7 +1797,7 @@ entry:
 }
 
 define <16 x i8> @test_undef_vuzp1q_u8(<16 x i8> %a) {
-; CHECK: test_undef_vuzp1q_u8:
+; CHECK-LABEL: test_undef_vuzp1q_u8:
 ; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
@@ -1808,7 +1805,7 @@ entry:
 }
 
 define <4 x i16> @test_undef_vuzp1_u16(<4 x i16> %a) {
-; CHECK: test_undef_vuzp1_u16:
+; CHECK-LABEL: test_undef_vuzp1_u16:
 ; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
@@ -1816,7 +1813,7 @@ entry:
 }
 
 define <8 x i16> @test_undef_vuzp1q_u16(<8 x i16> %a) {
-; CHECK: test_undef_vuzp1q_u16:
+; CHECK-LABEL: test_undef_vuzp1q_u16:
 ; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
@@ -1824,7 +1821,7 @@ entry:
 }
 
 define <4 x i32> @test_undef_vuzp1q_u32(<4 x i32> %a) {
-; CHECK: test_undef_vuzp1q_u32:
+; CHECK-LABEL: test_undef_vuzp1q_u32:
 ; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
@@ -1832,7 +1829,7 @@ entry:
 }
 
 define <4 x float> @test_undef_vuzp1q_f32(<4 x float> %a) {
-; CHECK: test_undef_vuzp1q_f32:
+; CHECK-LABEL: test_undef_vuzp1q_f32:
 ; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
@@ -1840,7 +1837,7 @@ entry:
 }
 
 define <8 x i8> @test_undef_vuzp1_p8(<8 x i8> %a) {
-; CHECK: test_undef_vuzp1_p8:
+; CHECK-LABEL: test_undef_vuzp1_p8:
 ; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
@@ -1848,7 +1845,7 @@ entry:
 }
 
 define <16 x i8> @test_undef_vuzp1q_p8(<16 x i8> %a) {
-; CHECK: test_undef_vuzp1q_p8:
+; CHECK-LABEL: test_undef_vuzp1q_p8:
 ; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
@@ -1856,7 +1853,7 @@ entry:
 }
 
 define <4 x i16> @test_undef_vuzp1_p16(<4 x i16> %a) {
-; CHECK: test_undef_vuzp1_p16:
+; CHECK-LABEL: test_undef_vuzp1_p16:
 ; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
@@ -1864,7 +1861,7 @@ entry:
 }
 
 define <8 x i16> @test_undef_vuzp1q_p16(<8 x i16> %a) {
-; CHECK: test_undef_vuzp1q_p16:
+; CHECK-LABEL: test_undef_vuzp1q_p16:
 ; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
@@ -1872,7 +1869,7 @@ entry:
 }
 
 define <8 x i8> @test_undef_vuzp2_s8(<8 x i8> %a) {
-; CHECK: test_undef_vuzp2_s8:
+; CHECK-LABEL: test_undef_vuzp2_s8:
 ; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
@@ -1880,7 +1877,7 @@ entry:
 }
 
 define <16 x i8> @test_undef_vuzp2q_s8(<16 x i8> %a) {
-; CHECK: test_undef_vuzp2q_s8:
+; CHECK-LABEL: test_undef_vuzp2q_s8:
 ; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
@@ -1888,7 +1885,7 @@ entry:
 }
 
 define <4 x i16> @test_undef_vuzp2_s16(<4 x i16> %a) {
-; CHECK: test_undef_vuzp2_s16:
+; CHECK-LABEL: test_undef_vuzp2_s16:
 ; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
@@ -1896,7 +1893,7 @@ entry:
 }
 
 define <8 x i16> @test_undef_vuzp2q_s16(<8 x i16> %a) {
-; CHECK: test_undef_vuzp2q_s16:
+; CHECK-LABEL: test_undef_vuzp2q_s16:
 ; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
@@ -1904,7 +1901,7 @@ entry:
 }
 
 define <4 x i32> @test_undef_vuzp2q_s32(<4 x i32> %a) {
-; CHECK: test_undef_vuzp2q_s32:
+; CHECK-LABEL: test_undef_vuzp2q_s32:
 ; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
@@ -1912,7 +1909,7 @@ entry:
 }
 
 define <8 x i8> @test_undef_vuzp2_u8(<8 x i8> %a) {
-; CHECK: test_undef_vuzp2_u8:
+; CHECK-LABEL: test_undef_vuzp2_u8:
 ; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
@@ -1920,7 +1917,7 @@ entry:
 }
 
 define <16 x i8> @test_undef_vuzp2q_u8(<16 x i8> %a) {
-; CHECK: test_undef_vuzp2q_u8:
+; CHECK-LABEL: test_undef_vuzp2q_u8:
 ; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
@@ -1928,7 +1925,7 @@ entry:
 }
 
 define <4 x i16> @test_undef_vuzp2_u16(<4 x i16> %a) {
-; CHECK: test_undef_vuzp2_u16:
+; CHECK-LABEL: test_undef_vuzp2_u16:
 ; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
@@ -1936,7 +1933,7 @@ entry:
 }
 
 define <8 x i16> @test_undef_vuzp2q_u16(<8 x i16> %a) {
-; CHECK: test_undef_vuzp2q_u16:
+; CHECK-LABEL: test_undef_vuzp2q_u16:
 ; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
@@ -1944,7 +1941,7 @@ entry:
 }
 
 define <4 x i32> @test_undef_vuzp2q_u32(<4 x i32> %a) {
-; CHECK: test_undef_vuzp2q_u32:
+; CHECK-LABEL: test_undef_vuzp2q_u32:
 ; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
@@ -1952,7 +1949,7 @@ entry:
 }
 
 define <4 x float> @test_undef_vuzp2q_f32(<4 x float> %a) {
-; CHECK: test_undef_vuzp2q_f32:
+; CHECK-LABEL: test_undef_vuzp2q_f32:
 ; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
@@ -1960,7 +1957,7 @@ entry:
 }
 
 define <8 x i8> @test_undef_vuzp2_p8(<8 x i8> %a) {
-; CHECK: test_undef_vuzp2_p8:
+; CHECK-LABEL: test_undef_vuzp2_p8:
 ; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
@@ -1968,7 +1965,7 @@ entry:
 }
 
 define <16 x i8> @test_undef_vuzp2q_p8(<16 x i8> %a) {
-; CHECK: test_undef_vuzp2q_p8:
+; CHECK-LABEL: test_undef_vuzp2q_p8:
 ; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
@@ -1976,7 +1973,7 @@ entry:
 }
 
 define <4 x i16> @test_undef_vuzp2_p16(<4 x i16> %a) {
-; CHECK: test_undef_vuzp2_p16:
+; CHECK-LABEL: test_undef_vuzp2_p16:
 ; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
@@ -1984,7 +1981,7 @@ entry:
 }
 
 define <8 x i16> @test_undef_vuzp2q_p16(<8 x i16> %a) {
-; CHECK: test_undef_vuzp2q_p16:
+; CHECK-LABEL: test_undef_vuzp2q_p16:
 ; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
@@ -1992,7 +1989,7 @@ entry:
 }
 
 define <8 x i8> @test_undef_vzip1_s8(<8 x i8> %a) {
-; CHECK: test_undef_vzip1_s8:
+; CHECK-LABEL: test_undef_vzip1_s8:
 ; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
@@ -2000,7 +1997,7 @@ entry:
 }
 
 define <16 x i8> @test_undef_vzip1q_s8(<16 x i8> %a) {
-; CHECK: test_undef_vzip1q_s8:
+; CHECK-LABEL: test_undef_vzip1q_s8:
 ; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
@@ -2008,7 +2005,7 @@ entry:
 }
 
 define <4 x i16> @test_undef_vzip1_s16(<4 x i16> %a) {
-; CHECK: test_undef_vzip1_s16:
+; CHECK-LABEL: test_undef_vzip1_s16:
 ; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
@@ -2016,7 +2013,7 @@ entry:
 }
 
 define <8 x i16> @test_undef_vzip1q_s16(<8 x i16> %a) {
-; CHECK: test_undef_vzip1q_s16:
+; CHECK-LABEL: test_undef_vzip1q_s16:
 ; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
@@ -2024,7 +2021,7 @@ entry:
 }
 
 define <4 x i32> @test_undef_vzip1q_s32(<4 x i32> %a) {
-; CHECK: test_undef_vzip1q_s32:
+; CHECK-LABEL: test_undef_vzip1q_s32:
 ; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
@@ -2032,7 +2029,7 @@ entry:
 }
 
 define <8 x i8> @test_undef_vzip1_u8(<8 x i8> %a) {
-; CHECK: test_undef_vzip1_u8:
+; CHECK-LABEL: test_undef_vzip1_u8:
 ; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
@@ -2040,7 +2037,7 @@ entry:
 }
 
 define <16 x i8> @test_undef_vzip1q_u8(<16 x i8> %a) {
-; CHECK: test_undef_vzip1q_u8:
+; CHECK-LABEL: test_undef_vzip1q_u8:
 ; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
@@ -2048,7 +2045,7 @@ entry:
 }
 
 define <4 x i16> @test_undef_vzip1_u16(<4 x i16> %a) {
-; CHECK: test_undef_vzip1_u16:
+; CHECK-LABEL: test_undef_vzip1_u16:
 ; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
@@ -2056,7 +2053,7 @@ entry:
 }
 
 define <8 x i16> @test_undef_vzip1q_u16(<8 x i16> %a) {
-; CHECK: test_undef_vzip1q_u16:
+; CHECK-LABEL: test_undef_vzip1q_u16:
 ; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
@@ -2064,7 +2061,7 @@ entry:
 }
 
 define <4 x i32> @test_undef_vzip1q_u32(<4 x i32> %a) {
-; CHECK: test_undef_vzip1q_u32:
+; CHECK-LABEL: test_undef_vzip1q_u32:
 ; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
@@ -2072,7 +2069,7 @@ entry:
 }
 
 define <4 x float> @test_undef_vzip1q_f32(<4 x float> %a) {
-; CHECK: test_undef_vzip1q_f32:
+; CHECK-LABEL: test_undef_vzip1q_f32:
 ; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
@@ -2080,7 +2077,7 @@ entry:
 }
 
 define <8 x i8> @test_undef_vzip1_p8(<8 x i8> %a) {
-; CHECK: test_undef_vzip1_p8:
+; CHECK-LABEL: test_undef_vzip1_p8:
 ; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
@@ -2088,7 +2085,7 @@ entry:
 }
 
 define <16 x i8> @test_undef_vzip1q_p8(<16 x i8> %a) {
-; CHECK: test_undef_vzip1q_p8:
+; CHECK-LABEL: test_undef_vzip1q_p8:
 ; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
@@ -2096,7 +2093,7 @@ entry:
 }
 
 define <4 x i16> @test_undef_vzip1_p16(<4 x i16> %a) {
-; CHECK: test_undef_vzip1_p16:
+; CHECK-LABEL: test_undef_vzip1_p16:
 ; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
@@ -2104,7 +2101,7 @@ entry:
 }
 
 define <8 x i16> @test_undef_vzip1q_p16(<8 x i16> %a) {
-; CHECK: test_undef_vzip1q_p16:
+; CHECK-LABEL: test_undef_vzip1q_p16:
 ; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
@@ -2112,7 +2109,7 @@ entry:
 }
 
 define <8 x i8> @test_undef_vzip2_s8(<8 x i8> %a) {
-; CHECK: test_undef_vzip2_s8:
+; CHECK-LABEL: test_undef_vzip2_s8:
 ; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
@@ -2120,7 +2117,7 @@ entry:
 }
 
 define <16 x i8> @test_undef_vzip2q_s8(<16 x i8> %a) {
-; CHECK: test_undef_vzip2q_s8:
+; CHECK-LABEL: test_undef_vzip2q_s8:
 ; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
@@ -2128,7 +2125,7 @@ entry:
 }
 
 define <4 x i16> @test_undef_vzip2_s16(<4 x i16> %a) {
-; CHECK: test_undef_vzip2_s16:
+; CHECK-LABEL: test_undef_vzip2_s16:
 ; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
@@ -2136,7 +2133,7 @@ entry:
 }
 
 define <8 x i16> @test_undef_vzip2q_s16(<8 x i16> %a) {
-; CHECK: test_undef_vzip2q_s16:
+; CHECK-LABEL: test_undef_vzip2q_s16:
 ; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
@@ -2144,7 +2141,7 @@ entry:
 }
 
 define <4 x i32> @test_undef_vzip2q_s32(<4 x i32> %a) {
-; CHECK: test_undef_vzip2q_s32:
+; CHECK-LABEL: test_undef_vzip2q_s32:
 ; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
@@ -2152,7 +2149,7 @@ entry:
 }
 
 define <8 x i8> @test_undef_vzip2_u8(<8 x i8> %a) {
-; CHECK: test_undef_vzip2_u8:
+; CHECK-LABEL: test_undef_vzip2_u8:
 ; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
@@ -2160,7 +2157,7 @@ entry:
 }
 
 define <16 x i8> @test_undef_vzip2q_u8(<16 x i8> %a) {
-; CHECK: test_undef_vzip2q_u8:
+; CHECK-LABEL: test_undef_vzip2q_u8:
 ; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
@@ -2168,7 +2165,7 @@ entry:
 }
 
 define <4 x i16> @test_undef_vzip2_u16(<4 x i16> %a) {
-; CHECK: test_undef_vzip2_u16:
+; CHECK-LABEL: test_undef_vzip2_u16:
 ; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
@@ -2176,7 +2173,7 @@ entry:
 }
 
 define <8 x i16> @test_undef_vzip2q_u16(<8 x i16> %a) {
-; CHECK: test_undef_vzip2q_u16:
+; CHECK-LABEL: test_undef_vzip2q_u16:
 ; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
@@ -2184,7 +2181,7 @@ entry:
 }
 
 define <4 x i32> @test_undef_vzip2q_u32(<4 x i32> %a) {
-; CHECK: test_undef_vzip2q_u32:
+; CHECK-LABEL: test_undef_vzip2q_u32:
 ; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
@@ -2192,7 +2189,7 @@ entry:
 }
 
 define <4 x float> @test_undef_vzip2q_f32(<4 x float> %a) {
-; CHECK: test_undef_vzip2q_f32:
+; CHECK-LABEL: test_undef_vzip2q_f32:
 ; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
@@ -2200,7 +2197,7 @@ entry:
 }
 
 define <8 x i8> @test_undef_vzip2_p8(<8 x i8> %a) {
-; CHECK: test_undef_vzip2_p8:
+; CHECK-LABEL: test_undef_vzip2_p8:
 ; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
@@ -2208,7 +2205,7 @@ entry:
 }
 
 define <16 x i8> @test_undef_vzip2q_p8(<16 x i8> %a) {
-; CHECK: test_undef_vzip2q_p8:
+; CHECK-LABEL: test_undef_vzip2q_p8:
 ; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
@@ -2216,7 +2213,7 @@ entry:
 }
 
 define <4 x i16> @test_undef_vzip2_p16(<4 x i16> %a) {
-; CHECK: test_undef_vzip2_p16:
+; CHECK-LABEL: test_undef_vzip2_p16:
 ; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
@@ -2224,7 +2221,7 @@ entry:
 }
 
 define <8 x i16> @test_undef_vzip2q_p16(<8 x i16> %a) {
-; CHECK: test_undef_vzip2q_p16:
+; CHECK-LABEL: test_undef_vzip2q_p16:
 ; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
@@ -2232,7 +2229,7 @@ entry:
 }
 
 define <8 x i8> @test_undef_vtrn1_s8(<8 x i8> %a) {
-; CHECK: test_undef_vtrn1_s8:
+; CHECK-LABEL: test_undef_vtrn1_s8:
 ; CHECK: ret
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
@@ -2240,7 +2237,7 @@ entry:
 }
 
 define <16 x i8> @test_undef_vtrn1q_s8(<16 x i8> %a) {
-; CHECK: test_undef_vtrn1q_s8:
+; CHECK-LABEL: test_undef_vtrn1q_s8:
 ; CHECK: ret
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
@@ -2248,7 +2245,7 @@ entry:
 }
 
 define <4 x i16> @test_undef_vtrn1_s16(<4 x i16> %a) {
-; CHECK: test_undef_vtrn1_s16:
+; CHECK-LABEL: test_undef_vtrn1_s16:
 ; CHECK: ret
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
@@ -2256,7 +2253,7 @@ entry:
 }
 
 define <8 x i16> @test_undef_vtrn1q_s16(<8 x i16> %a) {
-; CHECK: test_undef_vtrn1q_s16:
+; CHECK-LABEL: test_undef_vtrn1q_s16:
 ; CHECK: ret
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
@@ -2264,7 +2261,7 @@ entry:
 }
 
 define <4 x i32> @test_undef_vtrn1q_s32(<4 x i32> %a) {
-; CHECK: test_undef_vtrn1q_s32:
+; CHECK-LABEL: test_undef_vtrn1q_s32:
 ; CHECK: ret
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
@@ -2272,7 +2269,7 @@ entry:
 }
 
 define <8 x i8> @test_undef_vtrn1_u8(<8 x i8> %a) {
-; CHECK: test_undef_vtrn1_u8:
+; CHECK-LABEL: test_undef_vtrn1_u8:
 ; CHECK: ret
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
@@ -2280,7 +2277,7 @@ entry:
 }
 
 define <16 x i8> @test_undef_vtrn1q_u8(<16 x i8> %a) {
-; CHECK: test_undef_vtrn1q_u8:
+; CHECK-LABEL: test_undef_vtrn1q_u8:
 ; CHECK: ret
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
@@ -2288,7 +2285,7 @@ entry:
 }
 
 define <4 x i16> @test_undef_vtrn1_u16(<4 x i16> %a) {
-; CHECK: test_undef_vtrn1_u16:
+; CHECK-LABEL: test_undef_vtrn1_u16:
 ; CHECK: ret
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
@@ -2296,7 +2293,7 @@ entry:
 }
 
 define <8 x i16> @test_undef_vtrn1q_u16(<8 x i16> %a) {
-; CHECK: test_undef_vtrn1q_u16:
+; CHECK-LABEL: test_undef_vtrn1q_u16:
 ; CHECK: ret
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
@@ -2304,7 +2301,7 @@ entry:
 }
 
 define <4 x i32> @test_undef_vtrn1q_u32(<4 x i32> %a) {
-; CHECK: test_undef_vtrn1q_u32:
+; CHECK-LABEL: test_undef_vtrn1q_u32:
 ; CHECK: ret
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
@@ -2312,7 +2309,7 @@ entry:
 }
 
 define <4 x float> @test_undef_vtrn1q_f32(<4 x float> %a) {
-; CHECK: test_undef_vtrn1q_f32:
+; CHECK-LABEL: test_undef_vtrn1q_f32:
 ; CHECK: ret
 entry:
   %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
@@ -2320,7 +2317,7 @@ entry:
 }
 
 define <8 x i8> @test_undef_vtrn1_p8(<8 x i8> %a) {
-; CHECK: test_undef_vtrn1_p8:
+; CHECK-LABEL: test_undef_vtrn1_p8:
 ; CHECK: ret
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
@@ -2328,7 +2325,7 @@ entry:
 }
 
 define <16 x i8> @test_undef_vtrn1q_p8(<16 x i8> %a) {
-; CHECK: test_undef_vtrn1q_p8:
+; CHECK-LABEL: test_undef_vtrn1q_p8:
 ; CHECK: ret
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
@@ -2336,7 +2333,7 @@ entry:
 }
 
 define <4 x i16> @test_undef_vtrn1_p16(<4 x i16> %a) {
-; CHECK: test_undef_vtrn1_p16:
+; CHECK-LABEL: test_undef_vtrn1_p16:
 ; CHECK: ret
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
@@ -2344,7 +2341,7 @@ entry:
 }
 
 define <8 x i16> @test_undef_vtrn1q_p16(<8 x i16> %a) {
-; CHECK: test_undef_vtrn1q_p16:
+; CHECK-LABEL: test_undef_vtrn1q_p16:
 ; CHECK: ret
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
@@ -2352,7 +2349,7 @@ entry:
 }
 
 define <8 x i8> @test_undef_vtrn2_s8(<8 x i8> %a) {
-; CHECK: test_undef_vtrn2_s8:
+; CHECK-LABEL: test_undef_vtrn2_s8:
 ; CHECK: rev16 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
@@ -2360,7 +2357,7 @@ entry:
 }
 
 define <16 x i8> @test_undef_vtrn2q_s8(<16 x i8> %a) {
-; CHECK: test_undef_vtrn2q_s8:
+; CHECK-LABEL: test_undef_vtrn2q_s8:
 ; CHECK: rev16 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
@@ -2368,7 +2365,7 @@ entry:
 }
 
 define <4 x i16> @test_undef_vtrn2_s16(<4 x i16> %a) {
-; CHECK: test_undef_vtrn2_s16:
+; CHECK-LABEL: test_undef_vtrn2_s16:
 ; CHECK: rev32 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
@@ -2376,7 +2373,7 @@ entry:
 }
 
 define <8 x i16> @test_undef_vtrn2q_s16(<8 x i16> %a) {
-; CHECK: test_undef_vtrn2q_s16:
+; CHECK-LABEL: test_undef_vtrn2q_s16:
 ; CHECK: rev32 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
@@ -2384,7 +2381,7 @@ entry:
 }
 
 define <4 x i32> @test_undef_vtrn2q_s32(<4 x i32> %a) {
-; CHECK: test_undef_vtrn2q_s32:
+; CHECK-LABEL: test_undef_vtrn2q_s32:
 ; CHECK: rev64 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
@@ -2392,7 +2389,7 @@ entry:
 }
 
 define <8 x i8> @test_undef_vtrn2_u8(<8 x i8> %a) {
-; CHECK: test_undef_vtrn2_u8:
+; CHECK-LABEL: test_undef_vtrn2_u8:
 ; CHECK: rev16 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
@@ -2400,7 +2397,7 @@ entry:
 }
 
 define <16 x i8> @test_undef_vtrn2q_u8(<16 x i8> %a) {
-; CHECK: test_undef_vtrn2q_u8:
+; CHECK-LABEL: test_undef_vtrn2q_u8:
 ; CHECK: rev16 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
@@ -2408,7 +2405,7 @@ entry:
 }
 
 define <4 x i16> @test_undef_vtrn2_u16(<4 x i16> %a) {
-; CHECK: test_undef_vtrn2_u16:
+; CHECK-LABEL: test_undef_vtrn2_u16:
 ; CHECK: rev32 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
@@ -2416,7 +2413,7 @@ entry:
 }
 
 define <8 x i16> @test_undef_vtrn2q_u16(<8 x i16> %a) {
-; CHECK: test_undef_vtrn2q_u16:
+; CHECK-LABEL: test_undef_vtrn2q_u16:
 ; CHECK: rev32 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
@@ -2424,7 +2421,7 @@ entry:
 }
 
 define <4 x i32> @test_undef_vtrn2q_u32(<4 x i32> %a) {
-; CHECK: test_undef_vtrn2q_u32:
+; CHECK-LABEL: test_undef_vtrn2q_u32:
 ; CHECK: rev64 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
@@ -2432,7 +2429,7 @@ entry:
 }
 
 define <4 x float> @test_undef_vtrn2q_f32(<4 x float> %a) {
-; CHECK: test_undef_vtrn2q_f32:
+; CHECK-LABEL: test_undef_vtrn2q_f32:
 ; CHECK: rev64 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
   %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
@@ -2440,7 +2437,7 @@ entry:
 }
 
 define <8 x i8> @test_undef_vtrn2_p8(<8 x i8> %a) {
-; CHECK: test_undef_vtrn2_p8:
+; CHECK-LABEL: test_undef_vtrn2_p8:
 ; CHECK: rev16 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
   %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
@@ -2448,7 +2445,7 @@ entry:
 }
 
 define <16 x i8> @test_undef_vtrn2q_p8(<16 x i8> %a) {
-; CHECK: test_undef_vtrn2q_p8:
+; CHECK-LABEL: test_undef_vtrn2q_p8:
 ; CHECK: rev16 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
@@ -2456,7 +2453,7 @@ entry:
 }
 
 define <4 x i16> @test_undef_vtrn2_p16(<4 x i16> %a) {
-; CHECK: test_undef_vtrn2_p16:
+; CHECK-LABEL: test_undef_vtrn2_p16:
 ; CHECK: rev32 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
   %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
@@ -2464,7 +2461,7 @@ entry:
 }
 
 define <8 x i16> @test_undef_vtrn2q_p16(<8 x i16> %a) {
-; CHECK: test_undef_vtrn2q_p16:
+; CHECK-LABEL: test_undef_vtrn2q_p16:
 ; CHECK: rev32 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
@@ -2472,7 +2469,7 @@ entry:
 }
 
 define %struct.int8x8x2_t @test_vuzp_s8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vuzp_s8:
+; CHECK-LABEL: test_vuzp_s8:
 ; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 ; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
@@ -2484,7 +2481,7 @@ entry:
 }
 
 define %struct.int16x4x2_t @test_vuzp_s16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vuzp_s16:
+; CHECK-LABEL: test_vuzp_s16:
 ; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 ; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
@@ -2496,9 +2493,9 @@ entry:
 }
 
 define %struct.int32x2x2_t @test_vuzp_s32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK: test_vuzp_s32:
-; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
-; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+; CHECK-LABEL: test_vuzp_s32:
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %vuzp.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
   %vuzp1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
@@ -2508,7 +2505,7 @@ entry:
 }
 
 define %struct.uint8x8x2_t @test_vuzp_u8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vuzp_u8:
+; CHECK-LABEL: test_vuzp_u8:
 ; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 ; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
@@ -2520,7 +2517,7 @@ entry:
 }
 
 define %struct.uint16x4x2_t @test_vuzp_u16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vuzp_u16:
+; CHECK-LABEL: test_vuzp_u16:
 ; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 ; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
@@ -2532,9 +2529,9 @@ entry:
 }
 
 define %struct.uint32x2x2_t @test_vuzp_u32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK: test_vuzp_u32:
-; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
-; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+; CHECK-LABEL: test_vuzp_u32:
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %vuzp.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
   %vuzp1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
@@ -2544,9 +2541,9 @@ entry:
 }
 
 define %struct.float32x2x2_t @test_vuzp_f32(<2 x float> %a, <2 x float> %b) {
-; CHECK: test_vuzp_f32:
-; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
-; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+; CHECK-LABEL: test_vuzp_f32:
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %vuzp.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
   %vuzp1.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
@@ -2556,7 +2553,7 @@ entry:
 }
 
 define %struct.poly8x8x2_t @test_vuzp_p8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vuzp_p8:
+; CHECK-LABEL: test_vuzp_p8:
 ; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 ; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
@@ -2568,7 +2565,7 @@ entry:
 }
 
 define %struct.poly16x4x2_t @test_vuzp_p16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vuzp_p16:
+; CHECK-LABEL: test_vuzp_p16:
 ; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 ; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
@@ -2580,7 +2577,7 @@ entry:
 }
 
 define %struct.int8x16x2_t @test_vuzpq_s8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vuzpq_s8:
+; CHECK-LABEL: test_vuzpq_s8:
 ; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 ; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
@@ -2592,7 +2589,7 @@ entry:
 }
 
 define %struct.int16x8x2_t @test_vuzpq_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vuzpq_s16:
+; CHECK-LABEL: test_vuzpq_s16:
 ; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 ; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
@@ -2604,7 +2601,7 @@ entry:
 }
 
 define %struct.int32x4x2_t @test_vuzpq_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vuzpq_s32:
+; CHECK-LABEL: test_vuzpq_s32:
 ; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 ; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
@@ -2616,7 +2613,7 @@ entry:
 }
 
 define %struct.uint8x16x2_t @test_vuzpq_u8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vuzpq_u8:
+; CHECK-LABEL: test_vuzpq_u8:
 ; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 ; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
@@ -2628,7 +2625,7 @@ entry:
 }
 
 define %struct.uint16x8x2_t @test_vuzpq_u16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vuzpq_u16:
+; CHECK-LABEL: test_vuzpq_u16:
 ; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 ; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
@@ -2640,7 +2637,7 @@ entry:
 }
 
 define %struct.uint32x4x2_t @test_vuzpq_u32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vuzpq_u32:
+; CHECK-LABEL: test_vuzpq_u32:
 ; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 ; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
@@ -2652,7 +2649,7 @@ entry:
 }
 
 define %struct.float32x4x2_t @test_vuzpq_f32(<4 x float> %a, <4 x float> %b) {
-; CHECK: test_vuzpq_f32:
+; CHECK-LABEL: test_vuzpq_f32:
 ; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 ; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
@@ -2664,7 +2661,7 @@ entry:
 }
 
 define %struct.poly8x16x2_t @test_vuzpq_p8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vuzpq_p8:
+; CHECK-LABEL: test_vuzpq_p8:
 ; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 ; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
@@ -2676,7 +2673,7 @@ entry:
 }
 
 define %struct.poly16x8x2_t @test_vuzpq_p16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vuzpq_p16:
+; CHECK-LABEL: test_vuzpq_p16:
 ; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 ; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
@@ -2688,7 +2685,7 @@ entry:
 }
 
 define %struct.int8x8x2_t @test_vzip_s8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vzip_s8:
+; CHECK-LABEL: test_vzip_s8:
 ; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 ; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
@@ -2700,7 +2697,7 @@ entry:
 }
 
 define %struct.int16x4x2_t @test_vzip_s16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vzip_s16:
+; CHECK-LABEL: test_vzip_s16:
 ; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 ; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
@@ -2712,9 +2709,9 @@ entry:
 }
 
 define %struct.int32x2x2_t @test_vzip_s32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK: test_vzip_s32:
-; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
-; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+; CHECK-LABEL: test_vzip_s32:
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %vzip.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
   %vzip1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
@@ -2724,7 +2721,7 @@ entry:
 }
 
 define %struct.uint8x8x2_t @test_vzip_u8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vzip_u8:
+; CHECK-LABEL: test_vzip_u8:
 ; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 ; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
@@ -2736,7 +2733,7 @@ entry:
 }
 
 define %struct.uint16x4x2_t @test_vzip_u16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vzip_u16:
+; CHECK-LABEL: test_vzip_u16:
 ; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 ; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
@@ -2748,9 +2745,9 @@ entry:
 }
 
 define %struct.uint32x2x2_t @test_vzip_u32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK: test_vzip_u32:
-; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
-; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+; CHECK-LABEL: test_vzip_u32:
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %vzip.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
   %vzip1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
@@ -2760,9 +2757,9 @@ entry:
 }
 
 define %struct.float32x2x2_t @test_vzip_f32(<2 x float> %a, <2 x float> %b) {
-; CHECK: test_vzip_f32:
-; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
-; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+; CHECK-LABEL: test_vzip_f32:
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %vzip.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
   %vzip1.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
@@ -2772,7 +2769,7 @@ entry:
 }
 
 define %struct.poly8x8x2_t @test_vzip_p8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vzip_p8:
+; CHECK-LABEL: test_vzip_p8:
 ; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 ; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
@@ -2784,7 +2781,7 @@ entry:
 }
 
 define %struct.poly16x4x2_t @test_vzip_p16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vzip_p16:
+; CHECK-LABEL: test_vzip_p16:
 ; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 ; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
@@ -2796,7 +2793,7 @@ entry:
 }
 
 define %struct.int8x16x2_t @test_vzipq_s8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vzipq_s8:
+; CHECK-LABEL: test_vzipq_s8:
 ; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 ; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
@@ -2808,7 +2805,7 @@ entry:
 }
 
 define %struct.int16x8x2_t @test_vzipq_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vzipq_s16:
+; CHECK-LABEL: test_vzipq_s16:
 ; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 ; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
@@ -2820,7 +2817,7 @@ entry:
 }
 
 define %struct.int32x4x2_t @test_vzipq_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vzipq_s32:
+; CHECK-LABEL: test_vzipq_s32:
 ; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 ; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
@@ -2832,7 +2829,7 @@ entry:
 }
 
 define %struct.uint8x16x2_t @test_vzipq_u8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vzipq_u8:
+; CHECK-LABEL: test_vzipq_u8:
 ; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 ; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
@@ -2844,7 +2841,7 @@ entry:
 }
 
 define %struct.uint16x8x2_t @test_vzipq_u16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vzipq_u16:
+; CHECK-LABEL: test_vzipq_u16:
 ; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 ; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
@@ -2856,7 +2853,7 @@ entry:
 }
 
 define %struct.uint32x4x2_t @test_vzipq_u32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vzipq_u32:
+; CHECK-LABEL: test_vzipq_u32:
 ; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 ; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
@@ -2868,7 +2865,7 @@ entry:
 }
 
 define %struct.float32x4x2_t @test_vzipq_f32(<4 x float> %a, <4 x float> %b) {
-; CHECK: test_vzipq_f32:
+; CHECK-LABEL: test_vzipq_f32:
 ; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 ; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
@@ -2880,7 +2877,7 @@ entry:
 }
 
 define %struct.poly8x16x2_t @test_vzipq_p8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vzipq_p8:
+; CHECK-LABEL: test_vzipq_p8:
 ; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 ; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
@@ -2892,7 +2889,7 @@ entry:
 }
 
 define %struct.poly16x8x2_t @test_vzipq_p16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vzipq_p16:
+; CHECK-LABEL: test_vzipq_p16:
 ; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 ; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
@@ -2904,7 +2901,7 @@ entry:
 }
 
 define %struct.int8x8x2_t @test_vtrn_s8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vtrn_s8:
+; CHECK-LABEL: test_vtrn_s8:
 ; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 ; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
@@ -2916,7 +2913,7 @@ entry:
 }
 
 define %struct.int16x4x2_t @test_vtrn_s16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vtrn_s16:
+; CHECK-LABEL: test_vtrn_s16:
 ; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 ; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
@@ -2928,9 +2925,9 @@ entry:
 }
 
 define %struct.int32x2x2_t @test_vtrn_s32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK: test_vtrn_s32:
-; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
-; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+; CHECK-LABEL: test_vtrn_s32:
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %vtrn.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
   %vtrn1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
@@ -2940,7 +2937,7 @@ entry:
 }
 
 define %struct.uint8x8x2_t @test_vtrn_u8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vtrn_u8:
+; CHECK-LABEL: test_vtrn_u8:
 ; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 ; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
@@ -2952,7 +2949,7 @@ entry:
 }
 
 define %struct.uint16x4x2_t @test_vtrn_u16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vtrn_u16:
+; CHECK-LABEL: test_vtrn_u16:
 ; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 ; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
@@ -2964,9 +2961,9 @@ entry:
 }
 
 define %struct.uint32x2x2_t @test_vtrn_u32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK: test_vtrn_u32:
-; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
-; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+; CHECK-LABEL: test_vtrn_u32:
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %vtrn.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
   %vtrn1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
@@ -2976,9 +2973,9 @@ entry:
 }
 
 define %struct.float32x2x2_t @test_vtrn_f32(<2 x float> %a, <2 x float> %b) {
-; CHECK: test_vtrn_f32:
-; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
-; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+; CHECK-LABEL: test_vtrn_f32:
+; CHECK: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 entry:
   %vtrn.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
   %vtrn1.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
@@ -2988,7 +2985,7 @@ entry:
 }
 
 define %struct.poly8x8x2_t @test_vtrn_p8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vtrn_p8:
+; CHECK-LABEL: test_vtrn_p8:
 ; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 ; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 entry:
@@ -3000,7 +2997,7 @@ entry:
 }
 
 define %struct.poly16x4x2_t @test_vtrn_p16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vtrn_p16:
+; CHECK-LABEL: test_vtrn_p16:
 ; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 ; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 entry:
@@ -3012,7 +3009,7 @@ entry:
 }
 
 define %struct.int8x16x2_t @test_vtrnq_s8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vtrnq_s8:
+; CHECK-LABEL: test_vtrnq_s8:
 ; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 ; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
@@ -3024,7 +3021,7 @@ entry:
 }
 
 define %struct.int16x8x2_t @test_vtrnq_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vtrnq_s16:
+; CHECK-LABEL: test_vtrnq_s16:
 ; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 ; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
@@ -3036,7 +3033,7 @@ entry:
 }
 
 define %struct.int32x4x2_t @test_vtrnq_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vtrnq_s32:
+; CHECK-LABEL: test_vtrnq_s32:
 ; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 ; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
@@ -3048,7 +3045,7 @@ entry:
 }
 
 define %struct.uint8x16x2_t @test_vtrnq_u8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vtrnq_u8:
+; CHECK-LABEL: test_vtrnq_u8:
 ; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 ; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
@@ -3060,7 +3057,7 @@ entry:
 }
 
 define %struct.uint16x8x2_t @test_vtrnq_u16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vtrnq_u16:
+; CHECK-LABEL: test_vtrnq_u16:
 ; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 ; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
@@ -3072,7 +3069,7 @@ entry:
 }
 
 define %struct.uint32x4x2_t @test_vtrnq_u32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vtrnq_u32:
+; CHECK-LABEL: test_vtrnq_u32:
 ; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 ; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
@@ -3084,7 +3081,7 @@ entry:
 }
 
 define %struct.float32x4x2_t @test_vtrnq_f32(<4 x float> %a, <4 x float> %b) {
-; CHECK: test_vtrnq_f32:
+; CHECK-LABEL: test_vtrnq_f32:
 ; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 ; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 entry:
@@ -3096,7 +3093,7 @@ entry:
 }
 
 define %struct.poly8x16x2_t @test_vtrnq_p8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vtrnq_p8:
+; CHECK-LABEL: test_vtrnq_p8:
 ; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 ; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
 entry:
@@ -3108,7 +3105,7 @@ entry:
 }
 
 define %struct.poly16x8x2_t @test_vtrnq_p16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vtrnq_p16:
+; CHECK-LABEL: test_vtrnq_p16:
 ; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 ; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 entry:
@@ -3120,7 +3117,7 @@ entry:
 }
 
 define %struct.uint8x8x2_t @test_uzp(<16 x i8> %y) {
-; CHECK: test_uzp:
+; CHECK-LABEL: test_uzp:
 
   %vuzp.i = shufflevector <16 x i8> %y, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
   %vuzp1.i = shufflevector <16 x i8> %y, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
@@ -3128,7 +3125,4 @@ define %struct.uint8x8x2_t @test_uzp(<16 x i8> %y) {
   %.fca.0.1.insert = insertvalue %struct.uint8x8x2_t %.fca.0.0.insert, <8 x i8> %vuzp1.i, 0, 1
   ret %struct.uint8x8x2_t %.fca.0.1.insert
 
-; CHECK: dup	{{d[0-9]+}}, {{v[0-9]+}}.d[1]
-; CHECK-NEXT: uzp1	{{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-; CHECK-NEXT: uzp2	{{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
 }
diff --git a/test/CodeGen/AArch64/neon-rounding-halving-add.ll b/test/CodeGen/AArch64/neon-rounding-halving-add.ll
deleted file mode 100644
index 009da3b..0000000
--- a/test/CodeGen/AArch64/neon-rounding-halving-add.ll
+++ /dev/null
@@ -1,105 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-
-declare <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8>, <8 x i8>)
-declare <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8>, <8 x i8>)
-
-define <8 x i8> @test_urhadd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK: test_urhadd_v8i8:
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: urhadd v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-define <8 x i8> @test_srhadd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK: test_srhadd_v8i8:
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: srhadd v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-declare <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8>, <16 x i8>)
-declare <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8>, <16 x i8>)
-
-define <16 x i8> @test_urhadd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_urhadd_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: urhadd v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-define <16 x i8> @test_srhadd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_srhadd_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: srhadd v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-declare <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16>, <4 x i16>)
-declare <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16>, <4 x i16>)
-
-define <4 x i16> @test_urhadd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_urhadd_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: urhadd v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-define <4 x i16> @test_srhadd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_srhadd_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: srhadd v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-declare <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16>, <8 x i16>)
-declare <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16>, <8 x i16>)
-
-define <8 x i16> @test_urhadd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_urhadd_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: urhadd v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-define <8 x i16> @test_srhadd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_srhadd_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: srhadd v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-declare <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32>, <2 x i32>)
-declare <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32>, <2 x i32>)
-
-define <2 x i32> @test_urhadd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_urhadd_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: urhadd v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-define <2 x i32> @test_srhadd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_srhadd_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: srhadd v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-declare <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32>, <4 x i32>)
-
-define <4 x i32> @test_urhadd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_urhadd_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: urhadd v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-define <4 x i32> @test_srhadd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_srhadd_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: srhadd v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-
diff --git a/test/CodeGen/AArch64/neon-rounding-shift.ll b/test/CodeGen/AArch64/neon-rounding-shift.ll
deleted file mode 100644
index 5b4ec28..0000000
--- a/test/CodeGen/AArch64/neon-rounding-shift.ll
+++ /dev/null
@@ -1,121 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-
-declare <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8>, <8 x i8>)
-declare <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8>, <8 x i8>)
-
-define <8 x i8> @test_urshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK: test_urshl_v8i8:
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: urshl v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-define <8 x i8> @test_srshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK: test_srshl_v8i8:
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: srshl v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-declare <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8>, <16 x i8>)
-declare <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8>, <16 x i8>)
-
-define <16 x i8> @test_urshl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_urshl_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: urshl v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-define <16 x i8> @test_srshl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_srshl_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: srshl v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-declare <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16>, <4 x i16>)
-declare <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16>, <4 x i16>)
-
-define <4 x i16> @test_urshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_urshl_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: urshl v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-define <4 x i16> @test_srshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_srshl_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: srshl v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-declare <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16>, <8 x i16>)
-declare <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16>, <8 x i16>)
-
-define <8 x i16> @test_urshl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_urshl_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: urshl v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-define <8 x i16> @test_srshl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_srshl_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: srshl v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-declare <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32>, <2 x i32>)
-declare <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32>, <2 x i32>)
-
-define <2 x i32> @test_urshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_urshl_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: urshl v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-define <2 x i32> @test_srshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_srshl_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: srshl v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-declare <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32>, <4 x i32>)
-
-define <4 x i32> @test_urshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_urshl_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: urshl v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-define <4 x i32> @test_srshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_srshl_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: srshl v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-declare <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64>, <2 x i64>)
-declare <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64>, <2 x i64>)
-
-define <2 x i64> @test_urshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
-; CHECK: test_urshl_v2i64:
-  %tmp1 = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
-; CHECK: urshl v0.2d, v0.2d, v1.2d
-  ret <2 x i64> %tmp1
-}
-
-define <2 x i64> @test_srshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
-; CHECK: test_srshl_v2i64:
-  %tmp1 = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
-; CHECK: srshl v0.2d, v0.2d, v1.2d
-  ret <2 x i64> %tmp1
-}
-
diff --git a/test/CodeGen/AArch64/neon-saturating-add-sub.ll b/test/CodeGen/AArch64/neon-saturating-add-sub.ll
deleted file mode 100644
index fc60d90..0000000
--- a/test/CodeGen/AArch64/neon-saturating-add-sub.ll
+++ /dev/null
@@ -1,241 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-
-declare <8 x i8> @llvm.arm.neon.vqaddu.v8i8(<8 x i8>, <8 x i8>)
-declare <8 x i8> @llvm.arm.neon.vqadds.v8i8(<8 x i8>, <8 x i8>)
-
-define <8 x i8> @test_uqadd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK: test_uqadd_v8i8:
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vqaddu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: uqadd v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-define <8 x i8> @test_sqadd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK: test_sqadd_v8i8:
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vqadds.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: sqadd v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-declare <16 x i8> @llvm.arm.neon.vqaddu.v16i8(<16 x i8>, <16 x i8>)
-declare <16 x i8> @llvm.arm.neon.vqadds.v16i8(<16 x i8>, <16 x i8>)
-
-define <16 x i8> @test_uqadd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_uqadd_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vqaddu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: uqadd v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-define <16 x i8> @test_sqadd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_sqadd_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vqadds.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: sqadd v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-declare <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16>, <4 x i16>)
-declare <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16>, <4 x i16>)
-
-define <4 x i16> @test_uqadd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_uqadd_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: uqadd v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-define <4 x i16> @test_sqadd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_sqadd_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: sqadd v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-declare <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16>, <8 x i16>)
-declare <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16>, <8 x i16>)
-
-define <8 x i16> @test_uqadd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_uqadd_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: uqadd v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-define <8 x i16> @test_sqadd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_sqadd_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: sqadd v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-declare <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32>, <2 x i32>)
-declare <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32>, <2 x i32>)
-
-define <2 x i32> @test_uqadd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_uqadd_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: uqadd v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-define <2 x i32> @test_sqadd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_sqadd_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: sqadd v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-declare <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32>, <4 x i32>)
-
-define <4 x i32> @test_uqadd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_uqadd_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: uqadd v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-define <4 x i32> @test_sqadd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_sqadd_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: sqadd v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-
-
-declare <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64>, <2 x i64>)
-declare <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>)
-
-define <2 x i64> @test_uqadd_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
-; CHECK: test_uqadd_v2i64:
-  %tmp1 = call <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
-; CHECK: uqadd v0.2d, v0.2d, v1.2d
-  ret <2 x i64> %tmp1
-}
-
-define <2 x i64> @test_sqadd_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
-; CHECK: test_sqadd_v2i64:
-  %tmp1 = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
-; CHECK: sqadd v0.2d, v0.2d, v1.2d
-  ret <2 x i64> %tmp1
-}
-
-declare <8 x i8> @llvm.arm.neon.vqsubu.v8i8(<8 x i8>, <8 x i8>)
-declare <8 x i8> @llvm.arm.neon.vqsubs.v8i8(<8 x i8>, <8 x i8>)
-
-define <8 x i8> @test_uqsub_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK: test_uqsub_v8i8:
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vqsubu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: uqsub v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-define <8 x i8> @test_sqsub_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK: test_sqsub_v8i8:
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vqsubs.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: sqsub v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-declare <16 x i8> @llvm.arm.neon.vqsubu.v16i8(<16 x i8>, <16 x i8>)
-declare <16 x i8> @llvm.arm.neon.vqsubs.v16i8(<16 x i8>, <16 x i8>)
-
-define <16 x i8> @test_uqsub_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_uqsub_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vqsubu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: uqsub v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-define <16 x i8> @test_sqsub_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_sqsub_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vqsubs.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: sqsub v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-declare <4 x i16> @llvm.arm.neon.vqsubu.v4i16(<4 x i16>, <4 x i16>)
-declare <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16>, <4 x i16>)
-
-define <4 x i16> @test_uqsub_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_uqsub_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vqsubu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: uqsub v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-define <4 x i16> @test_sqsub_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_sqsub_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: sqsub v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-declare <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16>, <8 x i16>)
-declare <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16>, <8 x i16>)
-
-define <8 x i16> @test_uqsub_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_uqsub_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: uqsub v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-define <8 x i16> @test_sqsub_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_sqsub_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: sqsub v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-declare <2 x i32> @llvm.arm.neon.vqsubu.v2i32(<2 x i32>, <2 x i32>)
-declare <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32>, <2 x i32>)
-
-define <2 x i32> @test_uqsub_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_uqsub_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vqsubu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: uqsub v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-define <2 x i32> @test_sqsub_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_sqsub_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: sqsub v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-declare <4 x i32> @llvm.arm.neon.vqsubu.v4i32(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32>, <4 x i32>)
-
-define <4 x i32> @test_uqsub_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_uqsub_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vqsubu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: uqsub v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-define <4 x i32> @test_sqsub_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_sqsub_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: sqsub v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-declare <2 x i64> @llvm.arm.neon.vqsubu.v2i64(<2 x i64>, <2 x i64>)
-declare <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64>, <2 x i64>)
-
-define <2 x i64> @test_uqsub_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
-; CHECK: test_uqsub_v2i64:
-  %tmp1 = call <2 x i64> @llvm.arm.neon.vqsubu.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
-; CHECK: uqsub v0.2d, v0.2d, v1.2d
-  ret <2 x i64> %tmp1
-}
-
-define <2 x i64> @test_sqsub_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
-; CHECK: test_sqsub_v2i64:
-  %tmp1 = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
-; CHECK: sqsub v0.2d, v0.2d, v1.2d
-  ret <2 x i64> %tmp1
-}
diff --git a/test/CodeGen/AArch64/neon-saturating-rounding-shift.ll b/test/CodeGen/AArch64/neon-saturating-rounding-shift.ll
deleted file mode 100644
index d89262c..0000000
--- a/test/CodeGen/AArch64/neon-saturating-rounding-shift.ll
+++ /dev/null
@@ -1,121 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-
-declare <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8>, <8 x i8>)
-declare <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8>, <8 x i8>)
-
-define <8 x i8> @test_uqrshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK: test_uqrshl_v8i8:
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: uqrshl v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-define <8 x i8> @test_sqrshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK: test_sqrshl_v8i8:
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: sqrshl v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-declare <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8>, <16 x i8>)
-declare <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8>, <16 x i8>)
-
-define <16 x i8> @test_uqrshl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_uqrshl_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: uqrshl v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-define <16 x i8> @test_sqrshl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_sqrshl_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: sqrshl v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-declare <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16>, <4 x i16>)
-declare <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16>, <4 x i16>)
-
-define <4 x i16> @test_uqrshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_uqrshl_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: uqrshl v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-define <4 x i16> @test_sqrshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_sqrshl_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: sqrshl v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-declare <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16>, <8 x i16>)
-declare <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16>, <8 x i16>)
-
-define <8 x i16> @test_uqrshl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_uqrshl_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: uqrshl v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-define <8 x i16> @test_sqrshl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_sqrshl_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: sqrshl v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-declare <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32>, <2 x i32>)
-declare <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32>, <2 x i32>)
-
-define <2 x i32> @test_uqrshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_uqrshl_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: uqrshl v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-define <2 x i32> @test_sqrshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_sqrshl_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: sqrshl v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-declare <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32>, <4 x i32>)
-
-define <4 x i32> @test_uqrshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_uqrshl_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: uqrshl v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-define <4 x i32> @test_sqrshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_sqrshl_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: sqrshl v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-declare <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64>, <2 x i64>)
-declare <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64>, <2 x i64>)
-
-define <2 x i64> @test_uqrshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
-; CHECK: test_uqrshl_v2i64:
-  %tmp1 = call <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
-; CHECK: uqrshl v0.2d, v0.2d, v1.2d
-  ret <2 x i64> %tmp1
-}
-
-define <2 x i64> @test_sqrshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
-; CHECK: test_sqrshl_v2i64:
-  %tmp1 = call <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
-; CHECK: sqrshl v0.2d, v0.2d, v1.2d
-  ret <2 x i64> %tmp1
-}
-
diff --git a/test/CodeGen/AArch64/neon-saturating-shift.ll b/test/CodeGen/AArch64/neon-saturating-shift.ll
deleted file mode 100644
index 11009fb..0000000
--- a/test/CodeGen/AArch64/neon-saturating-shift.ll
+++ /dev/null
@@ -1,121 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-
-declare <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8>, <8 x i8>)
-declare <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8>, <8 x i8>)
-
-define <8 x i8> @test_uqshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK: test_uqshl_v8i8:
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: uqshl v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-define <8 x i8> @test_sqshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK: test_sqshl_v8i8:
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: sqshl v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-declare <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8>, <16 x i8>)
-declare <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8>, <16 x i8>)
-
-define <16 x i8> @test_uqshl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_uqshl_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: uqshl v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-define <16 x i8> @test_sqshl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_sqshl_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: sqshl v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-declare <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16>, <4 x i16>)
-declare <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16>, <4 x i16>)
-
-define <4 x i16> @test_uqshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_uqshl_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: uqshl v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-define <4 x i16> @test_sqshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_sqshl_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: sqshl v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-declare <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16>, <8 x i16>)
-declare <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16>, <8 x i16>)
-
-define <8 x i16> @test_uqshl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_uqshl_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: uqshl v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-define <8 x i16> @test_sqshl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_sqshl_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: sqshl v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-declare <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32>, <2 x i32>)
-declare <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32>, <2 x i32>)
-
-define <2 x i32> @test_uqshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_uqshl_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: uqshl v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-define <2 x i32> @test_sqshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_sqshl_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: sqshl v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-declare <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32>, <4 x i32>)
-
-define <4 x i32> @test_uqshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_uqshl_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: uqshl v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-define <4 x i32> @test_sqshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_sqshl_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: sqshl v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-declare <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64>, <2 x i64>)
-declare <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64>, <2 x i64>)
-
-define <2 x i64> @test_uqshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
-; CHECK: test_uqshl_v2i64:
-  %tmp1 = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
-; CHECK: uqshl v0.2d, v0.2d, v1.2d
-  ret <2 x i64> %tmp1
-}
-
-define <2 x i64> @test_sqshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
-; CHECK: test_sqshl_v2i64:
-  %tmp1 = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
-; CHECK: sqshl v0.2d, v0.2d, v1.2d
-  ret <2 x i64> %tmp1
-}
-
diff --git a/test/CodeGen/AArch64/neon-scalar-abs.ll b/test/CodeGen/AArch64/neon-scalar-abs.ll
deleted file mode 100644
index 03a89e04..0000000
--- a/test/CodeGen/AArch64/neon-scalar-abs.ll
+++ /dev/null
@@ -1,61 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
-
-define i64 @test_vabsd_s64(i64 %a) {
-; CHECK: test_vabsd_s64
-; CHECK: abs {{d[0-9]+}}, {{d[0-9]+}}
-entry:
-  %vabs.i = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vabs1.i = tail call <1 x i64> @llvm.aarch64.neon.vabs(<1 x i64> %vabs.i)
-  %0 = extractelement <1 x i64> %vabs1.i, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vabs(<1 x i64>)
-
-define i8 @test_vqabsb_s8(i8 %a) {
-; CHECK: test_vqabsb_s8
-; CHECK: sqabs {{b[0-9]+}}, {{b[0-9]+}}
-entry:
-  %vqabs.i = insertelement <1 x i8> undef, i8 %a, i32 0
-  %vqabs1.i = call <1 x i8> @llvm.arm.neon.vqabs.v1i8(<1 x i8> %vqabs.i)
-  %0 = extractelement <1 x i8> %vqabs1.i, i32 0
-  ret i8 %0
-}
-
-declare <1 x i8> @llvm.arm.neon.vqabs.v1i8(<1 x i8>)
-
-define i16 @test_vqabsh_s16(i16 %a) {
-; CHECK: test_vqabsh_s16
-; CHECK: sqabs {{h[0-9]+}}, {{h[0-9]+}}
-entry:
-  %vqabs.i = insertelement <1 x i16> undef, i16 %a, i32 0
-  %vqabs1.i = call <1 x i16> @llvm.arm.neon.vqabs.v1i16(<1 x i16> %vqabs.i)
-  %0 = extractelement <1 x i16> %vqabs1.i, i32 0
-  ret i16 %0
-}
-
-declare <1 x i16> @llvm.arm.neon.vqabs.v1i16(<1 x i16>)
-
-define i32 @test_vqabss_s32(i32 %a) {
-; CHECK: test_vqabss_s32
-; CHECK: sqabs {{s[0-9]+}}, {{s[0-9]+}}
-entry:
-  %vqabs.i = insertelement <1 x i32> undef, i32 %a, i32 0
-  %vqabs1.i = call <1 x i32> @llvm.arm.neon.vqabs.v1i32(<1 x i32> %vqabs.i)
-  %0 = extractelement <1 x i32> %vqabs1.i, i32 0
-  ret i32 %0
-}
-
-declare <1 x i32> @llvm.arm.neon.vqabs.v1i32(<1 x i32>)
-
-define i64 @test_vqabsd_s64(i64 %a) {
-; CHECK: test_vqabsd_s64
-; CHECK: sqabs {{d[0-9]+}}, {{d[0-9]+}}
-entry:
-  %vqabs.i = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vqabs1.i = call <1 x i64> @llvm.arm.neon.vqabs.v1i64(<1 x i64> %vqabs.i)
-  %0 = extractelement <1 x i64> %vqabs1.i, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.arm.neon.vqabs.v1i64(<1 x i64>)
diff --git a/test/CodeGen/AArch64/neon-scalar-add-sub.ll b/test/CodeGen/AArch64/neon-scalar-add-sub.ll
deleted file mode 100644
index 4f322e0..0000000
--- a/test/CodeGen/AArch64/neon-scalar-add-sub.ll
+++ /dev/null
@@ -1,50 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
-
-define <1 x i64> @add1xi64(<1 x i64> %A, <1 x i64> %B) {
-;CHECK: add {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-	%tmp3 = add <1 x i64> %A, %B;
-	ret <1 x i64> %tmp3
-}
-
-define <1 x i64> @sub1xi64(<1 x i64> %A, <1 x i64> %B) {
-;CHECK: sub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-	%tmp3 = sub <1 x i64> %A, %B;
-	ret <1 x i64> %tmp3
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vaddds(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.aarch64.neon.vadddu(<1 x i64>, <1 x i64>)
-
-define <1 x i64> @test_add_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_add_v1i64:
-  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vaddds(<1 x i64> %lhs, <1 x i64> %rhs)
-; CHECK: add {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  ret <1 x i64> %tmp1
-}
-
-define <1 x i64> @test_uadd_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_uadd_v1i64:
-  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vadddu(<1 x i64> %lhs, <1 x i64> %rhs)
-;CHECK: add {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  ret <1 x i64> %tmp1
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vsubds(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.aarch64.neon.vsubdu(<1 x i64>, <1 x i64>)
-
-define <1 x i64> @test_sub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_sub_v1i64:
-  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vsubds(<1 x i64> %lhs, <1 x i64> %rhs)
-; CHECK: sub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  ret <1 x i64> %tmp1
-}
-
-define <1 x i64> @test_usub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_usub_v1i64:
-  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vsubdu(<1 x i64> %lhs, <1 x i64> %rhs)
-;CHECK: sub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  ret <1 x i64> %tmp1
-}
-
-
-
diff --git a/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll b/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll
index 247514c..32f5962 100644
--- a/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll
+++ b/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll
@@ -4,7 +4,7 @@ declare float @llvm.fma.f32(float, float, float)
 declare double @llvm.fma.f64(double, double, double)
 
 define float @test_fmla_ss4S(float %a, float %b, <4 x float> %v) {
-  ; CHECK: test_fmla_ss4S
+  ; CHECK-LABEL: test_fmla_ss4S
   ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
   %tmp1 = extractelement <4 x float> %v, i32 3
   %tmp2 = call float @llvm.fma.f32(float %b, float %tmp1, float %a)
@@ -12,7 +12,7 @@ define float @test_fmla_ss4S(float %a, float %b, <4 x float> %v) {
 }
 
 define float @test_fmla_ss4S_swap(float %a, float %b, <4 x float> %v) {
-  ; CHECK: test_fmla_ss4S_swap
+  ; CHECK-LABEL: test_fmla_ss4S_swap
   ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
   %tmp1 = extractelement <4 x float> %v, i32 3
   %tmp2 = call float @llvm.fma.f32(float %tmp1, float %a, float %a)
@@ -20,7 +20,7 @@ define float @test_fmla_ss4S_swap(float %a, float %b, <4 x float> %v) {
 }
 
 define float @test_fmla_ss2S(float %a, float %b, <2 x float> %v) {
-  ; CHECK: test_fmla_ss2S
+  ; CHECK-LABEL: test_fmla_ss2S
   ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
   %tmp1 = extractelement <2 x float> %v, i32 1
   %tmp2 = call float @llvm.fma.f32(float %b, float %tmp1, float %a)
@@ -28,15 +28,15 @@ define float @test_fmla_ss2S(float %a, float %b, <2 x float> %v) {
 }
 
 define double @test_fmla_ddD(double %a, double %b, <1 x double> %v) {
-  ; CHECK: test_fmla_ddD
-  ; CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
+  ; CHECK-LABEL: test_fmla_ddD
+  ; CHECK: {{fmla d[0-9]+, d[0-9]+, v[0-9]+.d\[0]|fmadd d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+}}
   %tmp1 = extractelement <1 x double> %v, i32 0
   %tmp2 = call double @llvm.fma.f64(double %b, double %tmp1, double %a)
   ret double %tmp2
 }
 
 define double @test_fmla_dd2D(double %a, double %b, <2 x double> %v) {
-  ; CHECK: test_fmla_dd2D
+  ; CHECK-LABEL: test_fmla_dd2D
   ; CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
   %tmp1 = extractelement <2 x double> %v, i32 1
   %tmp2 = call double @llvm.fma.f64(double %b, double %tmp1, double %a)
@@ -44,7 +44,7 @@ define double @test_fmla_dd2D(double %a, double %b, <2 x double> %v) {
 }
 
 define double @test_fmla_dd2D_swap(double %a, double %b, <2 x double> %v) {
-  ; CHECK: test_fmla_dd2D_swap
+  ; CHECK-LABEL: test_fmla_dd2D_swap
   ; CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
   %tmp1 = extractelement <2 x double> %v, i32 1
   %tmp2 = call double @llvm.fma.f64(double %tmp1, double %b, double %a)
@@ -52,7 +52,7 @@ define double @test_fmla_dd2D_swap(double %a, double %b, <2 x double> %v) {
 }
 
 define float @test_fmls_ss4S(float %a, float %b, <4 x float> %v) {
-  ; CHECK: test_fmls_ss4S
+  ; CHECK-LABEL: test_fmls_ss4S
   ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
   %tmp1 = extractelement <4 x float> %v, i32 3
   %tmp2 = fsub float -0.0, %tmp1
@@ -61,7 +61,7 @@ define float @test_fmls_ss4S(float %a, float %b, <4 x float> %v) {
 }
 
 define float @test_fmls_ss4S_swap(float %a, float %b, <4 x float> %v) {
-  ; CHECK: test_fmls_ss4S_swap
+  ; CHECK-LABEL: test_fmls_ss4S_swap
   ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
   %tmp1 = extractelement <4 x float> %v, i32 3
   %tmp2 = fsub float -0.0, %tmp1
@@ -71,7 +71,7 @@ define float @test_fmls_ss4S_swap(float %a, float %b, <4 x float> %v) {
 
 
 define float @test_fmls_ss2S(float %a, float %b, <2 x float> %v) {
-  ; CHECK: test_fmls_ss2S
+  ; CHECK-LABEL: test_fmls_ss2S
   ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
   %tmp1 = extractelement <2 x float> %v, i32 1
   %tmp2 = fsub float -0.0, %tmp1
@@ -80,8 +80,8 @@ define float @test_fmls_ss2S(float %a, float %b, <2 x float> %v) {
 }
 
 define double @test_fmls_ddD(double %a, double %b, <1 x double> %v) {
-  ; CHECK: test_fmls_ddD
-  ; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
+  ; CHECK-LABEL: test_fmls_ddD
+  ; CHECK: {{fmls d[0-9]+, d[0-9]+, v[0-9]+.d\[0]|fmsub d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+}}
   %tmp1 = extractelement <1 x double> %v, i32 0
   %tmp2 = fsub double -0.0, %tmp1
   %tmp3 = call double @llvm.fma.f64(double %tmp2, double %tmp1, double %a)
@@ -89,7 +89,7 @@ define double @test_fmls_ddD(double %a, double %b, <1 x double> %v) {
 }
 
 define double @test_fmls_dd2D(double %a, double %b, <2 x double> %v) {
-  ; CHECK: test_fmls_dd2D
+  ; CHECK-LABEL: test_fmls_dd2D
   ; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
   %tmp1 = extractelement <2 x double> %v, i32 1
   %tmp2 = fsub double -0.0, %tmp1
@@ -98,7 +98,7 @@ define double @test_fmls_dd2D(double %a, double %b, <2 x double> %v) {
 }
 
 define double @test_fmls_dd2D_swap(double %a, double %b, <2 x double> %v) {
-  ; CHECK: test_fmls_dd2D_swap
+  ; CHECK-LABEL: test_fmls_dd2D_swap
   ; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
   %tmp1 = extractelement <2 x double> %v, i32 1
   %tmp2 = fsub double -0.0, %tmp1
diff --git a/test/CodeGen/AArch64/neon-scalar-by-elem-mul.ll b/test/CodeGen/AArch64/neon-scalar-by-elem-mul.ll
deleted file mode 100644
index c9128e7..0000000
--- a/test/CodeGen/AArch64/neon-scalar-by-elem-mul.ll
+++ /dev/null
@@ -1,124 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
-
-define float @test_fmul_lane_ss2S(float %a, <2 x float> %v) {
-  ; CHECK: test_fmul_lane_ss2S
-  ; CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
-  %tmp1 = extractelement <2 x float> %v, i32 1
-  %tmp2 = fmul float %a, %tmp1;
-  ret float %tmp2;
-}
-
-define float @test_fmul_lane_ss2S_swap(float %a, <2 x float> %v) {
-  ; CHECK: test_fmul_lane_ss2S_swap
-  ; CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
-  %tmp1 = extractelement <2 x float> %v, i32 1
-  %tmp2 = fmul float %tmp1, %a;
-  ret float %tmp2;
-}
-
-
-define float @test_fmul_lane_ss4S(float %a, <4 x float> %v) {
-  ; CHECK: test_fmul_lane_ss4S
-  ; CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
-  %tmp1 = extractelement <4 x float> %v, i32 3
-  %tmp2 = fmul float %a, %tmp1;
-  ret float %tmp2;
-}
-
-define float @test_fmul_lane_ss4S_swap(float %a, <4 x float> %v) {
-  ; CHECK: test_fmul_lane_ss4S_swap
-  ; CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
-  %tmp1 = extractelement <4 x float> %v, i32 3
-  %tmp2 = fmul float %tmp1, %a;
-  ret float %tmp2;
-}
-
-
-define double @test_fmul_lane_ddD(double %a, <1 x double> %v) {
-  ; CHECK: test_fmul_lane_ddD
-  ; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
-  %tmp1 = extractelement <1 x double> %v, i32 0
-  %tmp2 = fmul double %a, %tmp1;
-  ret double %tmp2;
-}
-
-
-
-define double @test_fmul_lane_dd2D(double %a, <2 x double> %v) {
-  ; CHECK: test_fmul_lane_dd2D
-  ; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
-  %tmp1 = extractelement <2 x double> %v, i32 1
-  %tmp2 = fmul double %a, %tmp1;
-  ret double %tmp2;
-}
-
-
-define double @test_fmul_lane_dd2D_swap(double %a, <2 x double> %v) {
-  ; CHECK: test_fmul_lane_dd2D_swap
-  ; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
-  %tmp1 = extractelement <2 x double> %v, i32 1
-  %tmp2 = fmul double %tmp1, %a;
-  ret double %tmp2;
-}
-
-declare float @llvm.aarch64.neon.vmulx.f32(float, float)
-
-define float @test_fmulx_lane_f32(float %a, <2 x float> %v) {
-  ; CHECK: test_fmulx_lane_f32
-  ; CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
-  %tmp1 = extractelement <2 x float> %v, i32 1
-  %tmp2 = call float @llvm.aarch64.neon.vmulx.f32(float %a, float %tmp1)
-  ret float %tmp2;
-}
-
-define float @test_fmulx_laneq_f32(float %a, <4 x float> %v) {
-  ; CHECK: test_fmulx_laneq_f32
-  ; CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
-  %tmp1 = extractelement <4 x float> %v, i32 3
-  %tmp2 = call float @llvm.aarch64.neon.vmulx.f32(float %a, float %tmp1)
-  ret float %tmp2;
-}
-
-define float @test_fmulx_laneq_f32_swap(float %a, <4 x float> %v) {
-  ; CHECK: test_fmulx_laneq_f32_swap
-  ; CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
-  %tmp1 = extractelement <4 x float> %v, i32 3
-  %tmp2 = call float @llvm.aarch64.neon.vmulx.f32(float %tmp1, float %a)
-  ret float %tmp2;
-}
-
-declare double @llvm.aarch64.neon.vmulx.f64(double, double)
-
-define double @test_fmulx_lane_f64(double %a, <1 x double> %v) {
-  ; CHECK: test_fmulx_lane_f64
-  ; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
-  %tmp1 = extractelement <1 x double> %v, i32 0
-  %tmp2 = call double @llvm.aarch64.neon.vmulx.f64(double %a, double %tmp1)
-  ret double %tmp2;
-}
-
-define double @test_fmulx_laneq_f64_0(double %a, <2 x double> %v) {
-  ; CHECK: test_fmulx_laneq_f64_0
-  ; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
-  %tmp1 = extractelement <2 x double> %v, i32 0
-  %tmp2 = call double @llvm.aarch64.neon.vmulx.f64(double %a, double %tmp1)
-  ret double %tmp2;
-}
-
-
-define double @test_fmulx_laneq_f64_1(double %a, <2 x double> %v) {
-  ; CHECK: test_fmulx_laneq_f64_1
-  ; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
-  %tmp1 = extractelement <2 x double> %v, i32 1
-  %tmp2 = call double @llvm.aarch64.neon.vmulx.f64(double %a, double %tmp1)
-  ret double %tmp2;
-}
-
-define double @test_fmulx_laneq_f64_1_swap(double %a, <2 x double> %v) {
-  ; CHECK: test_fmulx_laneq_f64_1_swap
-  ; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
-  %tmp1 = extractelement <2 x double> %v, i32 1
-  %tmp2 = call double @llvm.aarch64.neon.vmulx.f64(double %tmp1, double %a)
-  ret double %tmp2;
-}
-
diff --git a/test/CodeGen/AArch64/neon-scalar-compare.ll b/test/CodeGen/AArch64/neon-scalar-compare.ll
deleted file mode 100644
index e1f3964..0000000
--- a/test/CodeGen/AArch64/neon-scalar-compare.ll
+++ /dev/null
@@ -1,343 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-
-;; Scalar Integer Compare
-
-define i64 @test_vceqd(i64 %a, i64 %b) {
-; CHECK: test_vceqd
-; CHECK: cmeq {{d[0-9]+}}, {{d[0-9]}}, {{d[0-9]}}
-entry:
-  %vceq.i = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vceq1.i = insertelement <1 x i64> undef, i64 %b, i32 0
-  %vceq2.i = call <1 x i64> @llvm.aarch64.neon.vceq.v1i64.v1i64.v1i64(<1 x i64> %vceq.i, <1 x i64> %vceq1.i)
-  %0 = extractelement <1 x i64> %vceq2.i, i32 0
-  ret i64 %0
-}
-
-define i64 @test_vceqzd(i64 %a) {
-; CHECK: test_vceqzd
-; CHECK: cmeq {{d[0-9]}}, {{d[0-9]}}, #0x0
-entry:
-  %vceqz.i = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vceqz1.i = call <1 x i64> @llvm.aarch64.neon.vceq.v1i64.v1i64.v1i64(<1 x i64> %vceqz.i, <1 x i64> zeroinitializer)
-  %0 = extractelement <1 x i64> %vceqz1.i, i32 0
-  ret i64 %0
-}
-
-define i64 @test_vcged(i64 %a, i64 %b) {
-; CHECK: test_vcged
-; CHECK: cmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-entry:
-  %vcge.i = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vcge1.i = insertelement <1 x i64> undef, i64 %b, i32 0
-  %vcge2.i = call <1 x i64> @llvm.aarch64.neon.vcge.v1i64.v1i64.v1i64(<1 x i64> %vcge.i, <1 x i64> %vcge1.i)
-  %0 = extractelement <1 x i64> %vcge2.i, i32 0
-  ret i64 %0
-}
-
-define i64 @test_vcgezd(i64 %a) {
-; CHECK: test_vcgezd
-; CHECK: cmge {{d[0-9]}}, {{d[0-9]}}, #0x0
-entry:
-  %vcgez.i = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vcgez1.i = call <1 x i64> @llvm.aarch64.neon.vcge.v1i64.v1i64.v1i64(<1 x i64> %vcgez.i, <1 x i64> zeroinitializer)
-  %0 = extractelement <1 x i64> %vcgez1.i, i32 0
-  ret i64 %0
-}
-
-define i64 @test_vcgtd(i64 %a, i64 %b) {
-; CHECK: test_vcgtd
-; CHECK: cmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-entry:
-  %vcgt.i = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vcgt1.i = insertelement <1 x i64> undef, i64 %b, i32 0
-  %vcgt2.i = call <1 x i64> @llvm.aarch64.neon.vcgt.v1i64.v1i64.v1i64(<1 x i64> %vcgt.i, <1 x i64> %vcgt1.i)
-  %0 = extractelement <1 x i64> %vcgt2.i, i32 0
-  ret i64 %0
-}
-
-define i64 @test_vcgtzd(i64 %a) {
-; CHECK: test_vcgtzd
-; CHECK: cmgt {{d[0-9]}}, {{d[0-9]}}, #0x0
-entry:
-  %vcgtz.i = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vcgtz1.i = call <1 x i64> @llvm.aarch64.neon.vcgt.v1i64.v1i64.v1i64(<1 x i64> %vcgtz.i, <1 x i64> zeroinitializer)
-  %0 = extractelement <1 x i64> %vcgtz1.i, i32 0
-  ret i64 %0
-}
-
-define i64 @test_vcled(i64 %a, i64 %b) {
-; CHECK: test_vcled
-; CHECK: cmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-entry:
-  %vcgt.i = insertelement <1 x i64> undef, i64 %b, i32 0
-  %vcgt1.i = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vcgt2.i = call <1 x i64> @llvm.aarch64.neon.vcgt.v1i64.v1i64.v1i64(<1 x i64> %vcgt.i, <1 x i64> %vcgt1.i)
-  %0 = extractelement <1 x i64> %vcgt2.i, i32 0
-  ret i64 %0
-}
-
-define i64 @test_vclezd(i64 %a) {
-; CHECK: test_vclezd
-; CHECK: cmle {{d[0-9]}}, {{d[0-9]}}, #0x0
-entry:
-  %vclez.i = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vclez1.i = call <1 x i64> @llvm.aarch64.neon.vclez.v1i64.v1i64.v1i64(<1 x i64> %vclez.i, <1 x i64> zeroinitializer)
-  %0 = extractelement <1 x i64> %vclez1.i, i32 0
-  ret i64 %0
-}
-
-define i64 @test_vcltd(i64 %a, i64 %b) {
-; CHECK: test_vcltd
-; CHECK: cmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-entry:
-  %vcge.i = insertelement <1 x i64> undef, i64 %b, i32 0
-  %vcge1.i = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vcge2.i = call <1 x i64> @llvm.aarch64.neon.vcge.v1i64.v1i64.v1i64(<1 x i64> %vcge.i, <1 x i64> %vcge1.i)
-  %0 = extractelement <1 x i64> %vcge2.i, i32 0
-  ret i64 %0
-}
-
-define i64 @test_vcltzd(i64 %a) {
-; CHECK: test_vcltzd
-; CHECK: cmlt {{d[0-9]}}, {{d[0-9]}}, #0x0
-entry:
-  %vcltz.i = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vcltz1.i = call <1 x i64> @llvm.aarch64.neon.vcltz.v1i64.v1i64.v1i64(<1 x i64> %vcltz.i, <1 x i64> zeroinitializer)
-  %0 = extractelement <1 x i64> %vcltz1.i, i32 0
-  ret i64 %0
-}
-
-define i64 @test_vtstd(i64 %a, i64 %b) {
-; CHECK: test_vtstd
-; CHECK: cmtst {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-entry:
-  %vtst.i = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vtst1.i = insertelement <1 x i64> undef, i64 %b, i32 0
-  %vtst2.i = call <1 x i64> @llvm.aarch64.neon.vtstd.v1i64.v1i64.v1i64(<1 x i64> %vtst.i, <1 x i64> %vtst1.i)
-  %0 = extractelement <1 x i64> %vtst2.i, i32 0
-  ret i64 %0
-}
-
-
-define <1 x i64> @test_vcage_f64(<1 x double> %a, <1 x double> %b) #0 {
-; CHECK: test_vcage_f64
-; CHECK: facge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-  %vcage2.i = tail call <1 x i64> @llvm.arm.neon.vacge.v1i64.v1f64(<1 x double> %a, <1 x double> %b) #2
-  ret <1 x i64> %vcage2.i
-}
-
-define <1 x i64> @test_vcagt_f64(<1 x double> %a, <1 x double> %b) #0 {
-; CHECK: test_vcagt_f64
-; CHECK: facgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-  %vcagt2.i = tail call <1 x i64> @llvm.arm.neon.vacgt.v1i64.v1f64(<1 x double> %a, <1 x double> %b) #2
-  ret <1 x i64> %vcagt2.i
-}
-
-define <1 x i64> @test_vcale_f64(<1 x double> %a, <1 x double> %b) #0 {
-; CHECK: test_vcale_f64
-; CHECK: facge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-  %vcage2.i = tail call <1 x i64> @llvm.arm.neon.vacge.v1i64.v1f64(<1 x double> %b, <1 x double> %a) #2
-  ret <1 x i64> %vcage2.i
-}
-
-define <1 x i64> @test_vcalt_f64(<1 x double> %a, <1 x double> %b) #0 {
-; CHECK: test_vcalt_f64
-; CHECK: facgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-  %vcagt2.i = tail call <1 x i64> @llvm.arm.neon.vacgt.v1i64.v1f64(<1 x double> %b, <1 x double> %a) #2
-  ret <1 x i64> %vcagt2.i
-}
-
-define <1 x i64> @test_vceq_s64(<1 x i64> %a, <1 x i64> %b) #0 {
-; CHECK: test_vceq_s64
-; CHECK: cmeq {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-  %cmp.i = icmp eq <1 x i64> %a, %b
-  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
-  ret <1 x i64> %sext.i
-}
-
-define <1 x i64> @test_vceq_u64(<1 x i64> %a, <1 x i64> %b) #0 {
-; CHECK: test_vceq_u64
-; CHECK: cmeq {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-  %cmp.i = icmp eq <1 x i64> %a, %b
-  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
-  ret <1 x i64> %sext.i
-}
-
-define <1 x i64> @test_vceq_f64(<1 x double> %a, <1 x double> %b) #0 {
-; CHECK: test_vceq_f64
-; CHECK: fcmeq {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-  %cmp.i = fcmp oeq <1 x double> %a, %b
-  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
-  ret <1 x i64> %sext.i
-}
-
-define <1 x i64> @test_vcge_s64(<1 x i64> %a, <1 x i64> %b) #0 {
-; CHECK: test_vcge_s64
-; CHECK: cmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-  %cmp.i = icmp sge <1 x i64> %a, %b
-  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
-  ret <1 x i64> %sext.i
-}
-
-define <1 x i64> @test_vcge_u64(<1 x i64> %a, <1 x i64> %b) #0 {
-; CHECK: test_vcge_u64
-; CHECK: cmhs {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-  %cmp.i = icmp uge <1 x i64> %a, %b
-  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
-  ret <1 x i64> %sext.i
-}
-
-define <1 x i64> @test_vcge_f64(<1 x double> %a, <1 x double> %b) #0 {
-; CHECK: test_vcge_f64
-; CHECK: fcmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-  %cmp.i = fcmp oge <1 x double> %a, %b
-  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
-  ret <1 x i64> %sext.i
-}
-
-define <1 x i64> @test_vcle_s64(<1 x i64> %a, <1 x i64> %b) #0 {
-; CHECK: test_vcle_s64
-; CHECK: cmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-  %cmp.i = icmp sle <1 x i64> %a, %b
-  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
-  ret <1 x i64> %sext.i
-}
-
-define <1 x i64> @test_vcle_u64(<1 x i64> %a, <1 x i64> %b) #0 {
-; CHECK: test_vcle_u64
-; CHECK: cmhs {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-  %cmp.i = icmp ule <1 x i64> %a, %b
-  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
-  ret <1 x i64> %sext.i
-}
-
-define <1 x i64> @test_vcle_f64(<1 x double> %a, <1 x double> %b) #0 {
-; CHECK: test_vcle_f64
-; CHECK: fcmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-  %cmp.i = fcmp ole <1 x double> %a, %b
-  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
-  ret <1 x i64> %sext.i
-}
-
-define <1 x i64> @test_vcgt_s64(<1 x i64> %a, <1 x i64> %b) #0 {
-; CHECK: test_vcgt_s64
-; CHECK: cmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-  %cmp.i = icmp sgt <1 x i64> %a, %b
-  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
-  ret <1 x i64> %sext.i
-}
-
-define <1 x i64> @test_vcgt_u64(<1 x i64> %a, <1 x i64> %b) #0 {
-; CHECK: test_vcgt_u64
-; CHECK: cmhi {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-  %cmp.i = icmp ugt <1 x i64> %a, %b
-  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
-  ret <1 x i64> %sext.i
-}
-
-define <1 x i64> @test_vcgt_f64(<1 x double> %a, <1 x double> %b) #0 {
-; CHECK: test_vcgt_f64
-; CHECK: fcmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-  %cmp.i = fcmp ogt <1 x double> %a, %b
-  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
-  ret <1 x i64> %sext.i
-}
-
-define <1 x i64> @test_vclt_s64(<1 x i64> %a, <1 x i64> %b) #0 {
-; CHECK: test_vclt_s64
-; CHECK: cmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-  %cmp.i = icmp slt <1 x i64> %a, %b
-  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
-  ret <1 x i64> %sext.i
-}
-
-define <1 x i64> @test_vclt_u64(<1 x i64> %a, <1 x i64> %b) #0 {
-; CHECK: test_vclt_u64
-; CHECK: cmhi {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-  %cmp.i = icmp ult <1 x i64> %a, %b
-  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
-  ret <1 x i64> %sext.i
-}
-
-define <1 x i64> @test_vclt_f64(<1 x double> %a, <1 x double> %b) #0 {
-; CHECK: test_vclt_f64
-; CHECK: fcmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-  %cmp.i = fcmp olt <1 x double> %a, %b
-  %sext.i = sext <1 x i1> %cmp.i to <1 x i64>
-  ret <1 x i64> %sext.i
-}
-
-define <1 x i64> @test_vceqz_s64(<1 x i64> %a) #0 {
-; CHECK: test_vceqz_s64
-; CHECK: cmeq {{d[0-9]}}, {{d[0-9]}}, #0x0
-  %1 = icmp eq <1 x i64> %a, zeroinitializer
-  %vceqz.i = sext <1 x i1> %1 to <1 x i64>
-  ret <1 x i64> %vceqz.i
-}
-
-define <1 x i64> @test_vceqz_u64(<1 x i64> %a) #0 {
-; CHECK: test_vceqz_u64
-; CHECK: cmeq {{d[0-9]}}, {{d[0-9]}}, #0x0
-  %1 = icmp eq <1 x i64> %a, zeroinitializer
-  %vceqz.i = sext <1 x i1> %1 to <1 x i64>
-  ret <1 x i64> %vceqz.i
-}
-
-define <1 x i64> @test_vceqz_p64(<1 x i64> %a) #0 {
-; CHECK: test_vceqz_p64
-; CHECK: cmeq {{d[0-9]}}, {{d[0-9]}}, #0x0
-  %1 = icmp eq <1 x i64> %a, zeroinitializer
-  %vceqz.i = sext <1 x i1> %1 to <1 x i64>
-  ret <1 x i64> %vceqz.i
-}
-
-define <2 x i64> @test_vceqzq_p64(<2 x i64> %a) #0 {
-; CHECK: test_vceqzq_p64
-; CHECK: cmeq  {{v[0-9]}}.2d, {{v[0-9]}}.2d, #0
-  %1 = icmp eq <2 x i64> %a, zeroinitializer
-  %vceqz.i = sext <2 x i1> %1 to <2 x i64>
-  ret <2 x i64> %vceqz.i
-}
-
-define <1 x i64> @test_vcgez_s64(<1 x i64> %a) #0 {
-; CHECK: test_vcgez_s64
-; CHECK: cmge {{d[0-9]}}, {{d[0-9]}}, #0x0
-  %1 = icmp sge <1 x i64> %a, zeroinitializer
-  %vcgez.i = sext <1 x i1> %1 to <1 x i64>
-  ret <1 x i64> %vcgez.i
-}
-
-define <1 x i64> @test_vclez_s64(<1 x i64> %a) #0 {
-; CHECK: test_vclez_s64
-; CHECK: cmle {{d[0-9]}}, {{d[0-9]}}, #0x0
-  %1 = icmp sle <1 x i64> %a, zeroinitializer
-  %vclez.i = sext <1 x i1> %1 to <1 x i64>
-  ret <1 x i64> %vclez.i
-}
-
-define <1 x i64> @test_vcgtz_s64(<1 x i64> %a) #0 {
-; CHECK: test_vcgtz_s64
-; CHECK: cmgt {{d[0-9]}}, {{d[0-9]}}, #0x0
-  %1 = icmp sgt <1 x i64> %a, zeroinitializer
-  %vcgtz.i = sext <1 x i1> %1 to <1 x i64>
-  ret <1 x i64> %vcgtz.i
-}
-
-define <1 x i64> @test_vcltz_s64(<1 x i64> %a) #0 {
-; CHECK: test_vcltz_s64
-; CHECK: cmlt {{d[0-9]}}, {{d[0-9]}}, #0
-  %1 = icmp slt <1 x i64> %a, zeroinitializer
-  %vcltz.i = sext <1 x i1> %1 to <1 x i64>
-  ret <1 x i64> %vcltz.i
-}
-
-declare <1 x i64> @llvm.arm.neon.vacgt.v1i64.v1f64(<1 x double>, <1 x double>)
-declare <1 x i64> @llvm.arm.neon.vacge.v1i64.v1f64(<1 x double>, <1 x double>)
-declare <1 x i64> @llvm.aarch64.neon.vtstd.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.aarch64.neon.vcltz.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.aarch64.neon.vchs.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.aarch64.neon.vcge.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.aarch64.neon.vclez.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.aarch64.neon.vchi.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.aarch64.neon.vcgt.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.aarch64.neon.vceq.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>)
diff --git a/test/CodeGen/AArch64/neon-scalar-copy.ll b/test/CodeGen/AArch64/neon-scalar-copy.ll
index fadd734..a01df32 100644
--- a/test/CodeGen/AArch64/neon-scalar-copy.ll
+++ b/test/CodeGen/AArch64/neon-scalar-copy.ll
@@ -1,103 +1,101 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s --check-prefix=CHECK
+
 
 define float @test_dup_sv2S(<2 x float> %v) {
- ;CHECK: test_dup_sv2S
- ;CHECK: dup {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+ ; CHECK-LABEL: test_dup_sv2S
+ ; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
  %tmp1 = extractelement <2 x float> %v, i32 1
  ret float  %tmp1
 }
 
 define float @test_dup_sv2S_0(<2 x float> %v) {
- ;CHECK-LABEL: test_dup_sv2S_0
- ;CHECK-NOT: dup {{s[0-9]+}}, {{v[0-9]+}}.s[0]
- ;CHECK: ret
+ ; CHECK-LABEL: test_dup_sv2S_0
+ ; CHECK-NOT: dup {{[vsd][0-9]+}}
+ ; CHECK-NOT: ins {{[vsd][0-9]+}}
+ ; CHECK: ret
  %tmp1 = extractelement <2 x float> %v, i32 0
  ret float  %tmp1
 }
 
 define float @test_dup_sv4S(<4 x float> %v) {
- ;CHECK-LABEL: test_dup_sv4S
- ;CHECK-NOT: dup {{s[0-9]+}}, {{v[0-9]+}}.s[0]
- ;CHECK: ret
+ ; CHECK-LABEL: test_dup_sv4S
+ ; CHECK-NOT: dup {{[vsd][0-9]+}}
+ ; CHECK-NOT: ins {{[vsd][0-9]+}}
+ ; CHECK: ret
  %tmp1 = extractelement <4 x float> %v, i32 0
  ret float  %tmp1
 }
 
 define double @test_dup_dvD(<1 x double> %v) {
- ;CHECK: test_dup_dvD
- ;CHECK-NOT: dup {{d[0-9]+}}, {{v[0-9]+}}.d[0]
- ;CHECK: ret
+ ; CHECK-LABEL: test_dup_dvD
+ ; CHECK-NOT: dup {{[vsd][0-9]+}}
+ ; CHECK-NOT: ins {{[vsd][0-9]+}}
+ ; CHECK: ret
  %tmp1 = extractelement <1 x double> %v, i32 0
  ret double  %tmp1
 }
 
 define double @test_dup_dv2D(<2 x double> %v) {
- ;CHECK: test_dup_dv2D
- ;CHECK: dup {{d[0-9]+}}, {{v[0-9]+}}.d[1]
+ ; CHECK-LABEL: test_dup_dv2D
+ ; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
  %tmp1 = extractelement <2 x double> %v, i32 1
  ret double  %tmp1
 }
 
 define double @test_dup_dv2D_0(<2 x double> %v) {
- ;CHECK: test_dup_dv2D_0
- ;CHECK-NOT: dup {{d[0-9]+}}, {{v[0-9]+}}.d[0]
- ;CHECK: ret
+ ; CHECK-LABEL: test_dup_dv2D_0
+ ; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+ ; CHECK: ret
  %tmp1 = extractelement <2 x double> %v, i32 1
  ret double  %tmp1
 }
 
 define <1 x i8> @test_vector_dup_bv16B(<16 x i8> %v1) {
- ;CHECK: test_vector_dup_bv16B
- ;CHECK: dup {{b[0-9]+}}, {{v[0-9]+}}.b[14]
+ ; CHECK-LABEL: test_vector_dup_bv16B
  %shuffle.i = shufflevector <16 x i8> %v1, <16 x i8> undef, <1 x i32> <i32 14> 
  ret <1 x i8> %shuffle.i
 }
 
 define <1 x i8> @test_vector_dup_bv8B(<8 x i8> %v1) {
- ;CHECK: test_vector_dup_bv8B
- ;CHECK: dup {{b[0-9]+}}, {{v[0-9]+}}.b[7]
+ ; CHECK-LABEL: test_vector_dup_bv8B
  %shuffle.i = shufflevector <8 x i8> %v1, <8 x i8> undef, <1 x i32> <i32 7> 
  ret <1 x i8> %shuffle.i
 }
 
 define <1 x i16> @test_vector_dup_hv8H(<8 x i16> %v1) {
- ;CHECK: test_vector_dup_hv8H
- ;CHECK: dup {{h[0-9]+}}, {{v[0-9]+}}.h[7]
+ ; CHECK-LABEL: test_vector_dup_hv8H
  %shuffle.i = shufflevector <8 x i16> %v1, <8 x i16> undef, <1 x i32> <i32 7> 
  ret <1 x i16> %shuffle.i
 }
 
 define <1 x i16> @test_vector_dup_hv4H(<4 x i16> %v1) {
- ;CHECK: test_vector_dup_hv4H
- ;CHECK: dup {{h[0-9]+}}, {{v[0-9]+}}.h[3]
+ ; CHECK-LABEL: test_vector_dup_hv4H
  %shuffle.i = shufflevector <4 x i16> %v1, <4 x i16> undef, <1 x i32> <i32 3> 
  ret <1 x i16> %shuffle.i
 }
 
 define <1 x i32> @test_vector_dup_sv4S(<4 x i32> %v1) {
- ;CHECK: test_vector_dup_sv4S
- ;CHECK: dup {{s[0-9]+}}, {{v[0-9]+}}.s[3]
+ ; CHECK-LABEL: test_vector_dup_sv4S
  %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <1 x i32> <i32 3> 
  ret <1 x i32> %shuffle
 }
 
 define <1 x i32> @test_vector_dup_sv2S(<2 x i32> %v1) {
- ;CHECK: test_vector_dup_sv2S
- ;CHECK: dup {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+ ; CHECK-LABEL: test_vector_dup_sv2S
  %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <1 x i32> <i32 1> 
  ret <1 x i32> %shuffle
 }
 
 define <1 x i64> @test_vector_dup_dv2D(<2 x i64> %v1) {
- ;CHECK: test_vector_dup_dv2D
- ;CHECK: dup {{d[0-9]+}}, {{v[0-9]+}}.d[1]
+ ; CHECK-LABEL: test_vector_dup_dv2D
+ ; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #8
  %shuffle.i = shufflevector <2 x i64> %v1, <2 x i64> undef, <1 x i32> <i32 1> 
  ret <1 x i64> %shuffle.i
 }
 
 define <1 x i64> @test_vector_copy_dup_dv2D(<1 x i64> %a, <2 x i64> %c) {
-  ;CHECK: test_vector_copy_dup_dv2D
-  ;CHECK: dup {{d[0-9]+}}, {{v[0-9]+}}.d[1]
+  ; CHECK-LABEL: test_vector_copy_dup_dv2D
+  ; CHECK: {{dup|mov}} {{d[0-9]+}}, {{v[0-9]+}}.d[1]
   %vget_lane = extractelement <2 x i64> %c, i32 1
   %vset_lane = insertelement <1 x i64> undef, i64 %vget_lane, i32 0
   ret <1 x i64> %vset_lane
diff --git a/test/CodeGen/AArch64/neon-scalar-cvt.ll b/test/CodeGen/AArch64/neon-scalar-cvt.ll
deleted file mode 100644
index 3a19bed..0000000
--- a/test/CodeGen/AArch64/neon-scalar-cvt.ll
+++ /dev/null
@@ -1,133 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-
-define float @test_vcvts_f32_s32(i32 %a) {
-; CHECK: test_vcvts_f32_s32
-; CHECK: scvtf {{s[0-9]+}}, {{s[0-9]+}}
-entry:
-  %vcvtf.i = insertelement <1 x i32> undef, i32 %a, i32 0
-  %0 = call float @llvm.aarch64.neon.vcvtint2fps.f32.v1i32(<1 x i32> %vcvtf.i)
-  ret float %0
-}
-
-declare float @llvm.aarch64.neon.vcvtint2fps.f32.v1i32(<1 x i32>)
-
-define double @test_vcvtd_f64_s64(i64 %a) {
-; CHECK: test_vcvtd_f64_s64
-; CHECK: scvtf {{d[0-9]+}}, {{d[0-9]+}}
-entry:
-  %vcvtf.i = insertelement <1 x i64> undef, i64 %a, i32 0
-  %0 = call double @llvm.aarch64.neon.vcvtint2fps.f64.v1i64(<1 x i64> %vcvtf.i)
-  ret double %0
-}
-
-declare double @llvm.aarch64.neon.vcvtint2fps.f64.v1i64(<1 x i64>)
-
-define float @test_vcvts_f32_u32(i32 %a) {
-; CHECK: test_vcvts_f32_u32
-; CHECK: ucvtf {{s[0-9]+}}, {{s[0-9]+}}
-entry:
-  %vcvtf.i = insertelement <1 x i32> undef, i32 %a, i32 0
-  %0 = call float @llvm.aarch64.neon.vcvtint2fpu.f32.v1i32(<1 x i32> %vcvtf.i)
-  ret float %0
-}
-
-declare float @llvm.aarch64.neon.vcvtint2fpu.f32.v1i32(<1 x i32>)
-
-define double @test_vcvtd_f64_u64(i64 %a) {
-; CHECK: test_vcvtd_f64_u64
-; CHECK: ucvtf {{d[0-9]+}}, {{d[0-9]+}}
-entry:
-  %vcvtf.i = insertelement <1 x i64> undef, i64 %a, i32 0
-  %0 = call double @llvm.aarch64.neon.vcvtint2fpu.f64.v1i64(<1 x i64> %vcvtf.i)
-  ret double %0
-}
-
-declare double @llvm.aarch64.neon.vcvtint2fpu.f64.v1i64(<1 x i64>)
-
-define float @test_vcvts_n_f32_s32(i32 %a) {
-; CHECK: test_vcvts_n_f32_s32
-; CHECK: scvtf {{s[0-9]+}}, {{s[0-9]+}}, #1
-entry:
-  %vcvtf = insertelement <1 x i32> undef, i32 %a, i32 0
-  %0 = call float @llvm.aarch64.neon.vcvtfxs2fp.n.f32.v1i32(<1 x i32> %vcvtf, i32 1)
-  ret float %0
-}
-
-declare float @llvm.aarch64.neon.vcvtfxs2fp.n.f32.v1i32(<1 x i32>, i32)
-
-define double @test_vcvtd_n_f64_s64(i64 %a) {
-; CHECK: test_vcvtd_n_f64_s64
-; CHECK: scvtf {{d[0-9]+}}, {{d[0-9]+}}, #1
-entry:
-  %vcvtf = insertelement <1 x i64> undef, i64 %a, i32 0
-  %0 = call double @llvm.aarch64.neon.vcvtfxs2fp.n.f64.v1i64(<1 x i64> %vcvtf, i32 1)
-  ret double %0
-}
-
-declare double @llvm.aarch64.neon.vcvtfxs2fp.n.f64.v1i64(<1 x i64>, i32)
-
-define float @test_vcvts_n_f32_u32(i32 %a) {
-; CHECK: test_vcvts_n_f32_u32
-; CHECK: ucvtf {{s[0-9]+}}, {{s[0-9]+}}, #1
-entry:
-  %vcvtf = insertelement <1 x i32> undef, i32 %a, i32 0
-  %0 = call float @llvm.aarch64.neon.vcvtfxu2fp.n.f32.v1i32(<1 x i32> %vcvtf, i32 1)
-  ret float %0
-}
-
-declare float @llvm.aarch64.neon.vcvtfxu2fp.n.f32.v1i32(<1 x i32>, i32)
-
-define double @test_vcvtd_n_f64_u64(i64 %a) {
-; CHECK: test_vcvtd_n_f64_u64
-; CHECK: ucvtf {{d[0-9]+}}, {{d[0-9]+}}, #1
-entry:
-  %vcvtf = insertelement <1 x i64> undef, i64 %a, i32 0
-  %0 = call double @llvm.aarch64.neon.vcvtfxu2fp.n.f64.v1i64(<1 x i64> %vcvtf, i32 1)
-  ret double %0
-}
-
-declare double @llvm.aarch64.neon.vcvtfxu2fp.n.f64.v1i64(<1 x i64>, i32)
-
-define i32 @test_vcvts_n_s32_f32(float %a) {
-; CHECK: test_vcvts_n_s32_f32
-; CHECK: fcvtzs {{s[0-9]+}}, {{s[0-9]+}}, #1
-entry:
-  %fcvtzs1 = call <1 x i32> @llvm.aarch64.neon.vcvtfp2fxs.n.v1i32.f32(float %a, i32 1)
-  %0 = extractelement <1 x i32> %fcvtzs1, i32 0
-  ret i32 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.vcvtfp2fxs.n.v1i32.f32(float, i32)
-
-define i64 @test_vcvtd_n_s64_f64(double %a) {
-; CHECK: test_vcvtd_n_s64_f64
-; CHECK: fcvtzs {{d[0-9]+}}, {{d[0-9]+}}, #1
-entry:
-  %fcvtzs1 = call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.n.v1i64.f64(double %a, i32 1)
-  %0 = extractelement <1 x i64> %fcvtzs1, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.n.v1i64.f64(double, i32)
-
-define i32 @test_vcvts_n_u32_f32(float %a) {
-; CHECK: test_vcvts_n_u32_f32
-; CHECK: fcvtzu {{s[0-9]+}}, {{s[0-9]+}}, #32
-entry:
-  %fcvtzu1 = call <1 x i32> @llvm.aarch64.neon.vcvtfp2fxu.n.v1i32.f32(float %a, i32 32)
-  %0 = extractelement <1 x i32> %fcvtzu1, i32 0
-  ret i32 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.vcvtfp2fxu.n.v1i32.f32(float, i32)
-
-define i64 @test_vcvtd_n_u64_f64(double %a) {
-; CHECK: test_vcvtd_n_u64_f64
-; CHECK: fcvtzu {{d[0-9]+}}, {{d[0-9]+}}, #64
-entry:
-  %fcvtzu1 = tail call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.n.v1i64.f64(double %a, i32 64)
-  %0 = extractelement <1 x i64> %fcvtzu1, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.n.v1i64.f64(double, i32)
diff --git a/test/CodeGen/AArch64/neon-scalar-ext.ll b/test/CodeGen/AArch64/neon-scalar-ext.ll
deleted file mode 100644
index 51dea06..0000000
--- a/test/CodeGen/AArch64/neon-scalar-ext.ll
+++ /dev/null
@@ -1,113 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-
-define <1 x i64> @test_zext_v1i32_v1i64(<2 x i32> %v) nounwind readnone {
-; CHECK-LABEL: test_zext_v1i32_v1i64:
-; CHECK: ushll	v0.2d, v0.2s, #0
-  %1 = extractelement <2 x i32> %v, i32 0
-  %2 = insertelement <1 x i32> undef, i32 %1, i32 0
-  %3 = zext <1 x i32> %2 to <1 x i64>
-  ret <1 x i64> %3
-}
-
-define <1 x i32> @test_zext_v1i16_v1i32(<4 x i16> %v) nounwind readnone {
-; CHECK-LABEL: test_zext_v1i16_v1i32:
-; CHECK: ushll	v0.4s, v0.4h, #0
-  %1 = extractelement <4 x i16> %v, i32 0
-  %2 = insertelement <1 x i16> undef, i16 %1, i32 0
-  %3 = zext <1 x i16> %2 to <1 x i32>
-  ret <1 x i32> %3
-}
-
-define <1 x i16> @test_zext_v1i8_v1i16(<8 x i8> %v) nounwind readnone {
-; CHECK-LABEL: test_zext_v1i8_v1i16:
-; CHECK: ushll	v0.8h, v0.8b, #0
-  %1 = extractelement <8 x i8> %v, i32 0
-  %2 = insertelement <1 x i8> undef, i8 %1, i32 0
-  %3 = zext <1 x i8> %2 to <1 x i16>
-  ret <1 x i16> %3
-}
-
-define <1 x i32> @test_zext_v1i8_v1i32(<8 x i8> %v) nounwind readnone {
-; CHECK-LABEL: test_zext_v1i8_v1i32:
-; CHECK: dup     b0, v0.b[0]
-  %1 = extractelement <8 x i8> %v, i32 0
-  %2 = insertelement <1 x i8> undef, i8 %1, i32 0
-  %3 = zext <1 x i8> %2 to <1 x i32>
-  ret <1 x i32> %3
-}
-
-define <1 x i64> @test_zext_v1i16_v1i64(<4 x i16> %v) nounwind readnone {
-; CHECK-LABEL: test_zext_v1i16_v1i64:
-; CHECK: dup    h0, v0.h[0]
-  %1 = extractelement <4 x i16> %v, i32 0
-  %2 = insertelement <1 x i16> undef, i16 %1, i32 0
-  %3 = zext <1 x i16> %2 to <1 x i64>
-  ret <1 x i64> %3
-}
-
-define <1 x i64> @test_zext_v1i8_v1i64(<8 x i8> %v) nounwind readnone {
-; CHECK-LABEL: test_zext_v1i8_v1i64:
-; CHECK: dup	b0, v0.b[0]
-  %1 = extractelement <8 x i8> %v, i32 0
-  %2 = insertelement <1 x i8> undef, i8 %1, i32 0
-  %3 = zext <1 x i8> %2 to <1 x i64>
-  ret <1 x i64> %3
-}
-
-define <1 x i64> @test_sext_v1i32_v1i64(<2 x i32> %v) nounwind readnone {
-; CHECK-LABEL: test_sext_v1i32_v1i64:
-; CHECK: sshll	v0.2d, v0.2s, #0
-  %1 = extractelement <2 x i32> %v, i32 0
-  %2 = insertelement <1 x i32> undef, i32 %1, i32 0
-  %3 = sext <1 x i32> %2 to <1 x i64>
-  ret <1 x i64> %3
-}
-
-define <1 x i32> @test_sext_v1i16_v1i32(<4 x i16> %v) nounwind readnone {
-; CHECK-LABEL: test_sext_v1i16_v1i32:
-; CHECK: sshll	v0.4s, v0.4h, #0
-  %1 = extractelement <4 x i16> %v, i32 0
-  %2 = insertelement <1 x i16> undef, i16 %1, i32 0
-  %3 = sext <1 x i16> %2 to <1 x i32>
-  ret <1 x i32> %3
-}
-
-define <1 x i16> @test_sext_v1i8_v1i16(<8 x i8> %v) nounwind readnone {
-; CHECK-LABEL: test_sext_v1i8_v1i16:
-; CHECK: sshll	v0.8h, v0.8b, #0
-  %1 = extractelement <8 x i8> %v, i32 0
-  %2 = insertelement <1 x i8> undef, i8 %1, i32 0
-  %3 = sext <1 x i8> %2 to <1 x i16>
-  ret <1 x i16> %3
-}
-
-define <1 x i32> @test_sext_v1i8_v1i32(<8 x i8> %v) nounwind readnone {
-; CHECK-LABEL: test_sext_v1i8_v1i32:
-; CHECK: sshll	v0.8h, v0.8b, #0
-; CHECK: sshll	v0.4s, v0.4h, #0
-  %1 = extractelement <8 x i8> %v, i32 0
-  %2 = insertelement <1 x i8> undef, i8 %1, i32 0
-  %3 = sext <1 x i8> %2 to <1 x i32>
-  ret <1 x i32> %3
-}
-
-define <1 x i64> @test_sext_v1i16_v1i64(<4 x i16> %v) nounwind readnone {
-; CHECK-LABEL: test_sext_v1i16_v1i64:
-; CHECK: sshll	v0.4s, v0.4h, #0
-; CHECK: sshll	v0.2d, v0.2s, #0
-  %1 = extractelement <4 x i16> %v, i32 0
-  %2 = insertelement <1 x i16> undef, i16 %1, i32 0
-  %3 = sext <1 x i16> %2 to <1 x i64>
-  ret <1 x i64> %3
-}
-
-define <1 x i64> @test_sext_v1i8_v1i64(<8 x i8> %v) nounwind readnone {
-; CHECK-LABEL: test_sext_v1i8_v1i64:
-; CHECK: sshll	v0.8h, v0.8b, #0
-; CHECK: sshll	v0.4s, v0.4h, #0
-; CHECK: sshll	v0.2d, v0.2s, #0
-  %1 = extractelement <8 x i8> %v, i32 0
-  %2 = insertelement <1 x i8> undef, i8 %1, i32 0
-  %3 = sext <1 x i8> %2 to <1 x i64>
-  ret <1 x i64> %3
-}
diff --git a/test/CodeGen/AArch64/neon-scalar-extract-narrow.ll b/test/CodeGen/AArch64/neon-scalar-extract-narrow.ll
deleted file mode 100644
index faf521b..0000000
--- a/test/CodeGen/AArch64/neon-scalar-extract-narrow.ll
+++ /dev/null
@@ -1,104 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-
-define i8 @test_vqmovunh_s16(i16 %a) {
-; CHECK: test_vqmovunh_s16
-; CHECK: sqxtun {{b[0-9]+}}, {{h[0-9]+}}
-entry:
-  %vqmovun.i = insertelement <1 x i16> undef, i16 %a, i32 0
-  %vqmovun1.i = call <1 x i8> @llvm.arm.neon.vqmovnsu.v1i8(<1 x i16> %vqmovun.i)
-  %0 = extractelement <1 x i8> %vqmovun1.i, i32 0
-  ret i8 %0
-}
-
-define i16 @test_vqmovuns_s32(i32 %a) {
-; CHECK: test_vqmovuns_s32
-; CHECK: sqxtun {{h[0-9]+}}, {{s[0-9]+}}
-entry:
-  %vqmovun.i = insertelement <1 x i32> undef, i32 %a, i32 0
-  %vqmovun1.i = call <1 x i16> @llvm.arm.neon.vqmovnsu.v1i16(<1 x i32> %vqmovun.i)
-  %0 = extractelement <1 x i16> %vqmovun1.i, i32 0
-  ret i16 %0
-}
-
-define i32 @test_vqmovund_s64(i64 %a) {
-; CHECK: test_vqmovund_s64
-; CHECK: sqxtun {{s[0-9]+}}, {{d[0-9]+}}
-entry:
-  %vqmovun.i = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vqmovun1.i = call <1 x i32> @llvm.arm.neon.vqmovnsu.v1i32(<1 x i64> %vqmovun.i)
-  %0 = extractelement <1 x i32> %vqmovun1.i, i32 0
-  ret i32 %0
-}
-
-declare <1 x i8> @llvm.arm.neon.vqmovnsu.v1i8(<1 x i16>)
-declare <1 x i16> @llvm.arm.neon.vqmovnsu.v1i16(<1 x i32>)
-declare <1 x i32> @llvm.arm.neon.vqmovnsu.v1i32(<1 x i64>)
-
-define i8 @test_vqmovnh_s16(i16 %a) {
-; CHECK: test_vqmovnh_s16
-; CHECK: sqxtn {{b[0-9]+}}, {{h[0-9]+}}
-entry:
-  %vqmovn.i = insertelement <1 x i16> undef, i16 %a, i32 0
-  %vqmovn1.i = call <1 x i8> @llvm.arm.neon.vqmovns.v1i8(<1 x i16> %vqmovn.i)
-  %0 = extractelement <1 x i8> %vqmovn1.i, i32 0
-  ret i8 %0
-}
-
-define i16 @test_vqmovns_s32(i32 %a) {
-; CHECK: test_vqmovns_s32
-; CHECK: sqxtn {{h[0-9]+}}, {{s[0-9]+}}
-entry:
-  %vqmovn.i = insertelement <1 x i32> undef, i32 %a, i32 0
-  %vqmovn1.i = call <1 x i16> @llvm.arm.neon.vqmovns.v1i16(<1 x i32> %vqmovn.i)
-  %0 = extractelement <1 x i16> %vqmovn1.i, i32 0
-  ret i16 %0
-}
-
-define i32 @test_vqmovnd_s64(i64 %a) {
-; CHECK: test_vqmovnd_s64
-; CHECK: sqxtn {{s[0-9]+}}, {{d[0-9]+}}
-entry:
-  %vqmovn.i = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vqmovn1.i = call <1 x i32> @llvm.arm.neon.vqmovns.v1i32(<1 x i64> %vqmovn.i)
-  %0 = extractelement <1 x i32> %vqmovn1.i, i32 0
-  ret i32 %0
-}
-
-declare <1 x i8> @llvm.arm.neon.vqmovns.v1i8(<1 x i16>)
-declare <1 x i16> @llvm.arm.neon.vqmovns.v1i16(<1 x i32>)
-declare <1 x i32> @llvm.arm.neon.vqmovns.v1i32(<1 x i64>)
-
-define i8 @test_vqmovnh_u16(i16 %a) {
-; CHECK: test_vqmovnh_u16
-; CHECK: uqxtn {{b[0-9]+}}, {{h[0-9]+}}
-entry:
-  %vqmovn.i = insertelement <1 x i16> undef, i16 %a, i32 0
-  %vqmovn1.i = call <1 x i8> @llvm.arm.neon.vqmovnu.v1i8(<1 x i16> %vqmovn.i)
-  %0 = extractelement <1 x i8> %vqmovn1.i, i32 0
-  ret i8 %0
-}
-
-
-define i16 @test_vqmovns_u32(i32 %a) {
-; CHECK: test_vqmovns_u32
-; CHECK: uqxtn {{h[0-9]+}}, {{s[0-9]+}}
-entry:
-  %vqmovn.i = insertelement <1 x i32> undef, i32 %a, i32 0
-  %vqmovn1.i = call <1 x i16> @llvm.arm.neon.vqmovnu.v1i16(<1 x i32> %vqmovn.i)
-  %0 = extractelement <1 x i16> %vqmovn1.i, i32 0
-  ret i16 %0
-}
-
-define i32 @test_vqmovnd_u64(i64 %a) {
-; CHECK: test_vqmovnd_u64
-; CHECK: uqxtn {{s[0-9]+}}, {{d[0-9]+}}
-entry:
-  %vqmovn.i = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vqmovn1.i = call <1 x i32> @llvm.arm.neon.vqmovnu.v1i32(<1 x i64> %vqmovn.i)
-  %0 = extractelement <1 x i32> %vqmovn1.i, i32 0
-  ret i32 %0
-}
-
-declare <1 x i8> @llvm.arm.neon.vqmovnu.v1i8(<1 x i16>)
-declare <1 x i16> @llvm.arm.neon.vqmovnu.v1i16(<1 x i32>)
-declare <1 x i32> @llvm.arm.neon.vqmovnu.v1i32(<1 x i64>)
diff --git a/test/CodeGen/AArch64/neon-scalar-fabd.ll b/test/CodeGen/AArch64/neon-scalar-fabd.ll
deleted file mode 100644
index 6343310..0000000
--- a/test/CodeGen/AArch64/neon-scalar-fabd.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
-
-define float @test_vabds_f32(float %a, float %b) {
-; CHECK-LABEL: test_vabds_f32
-; CHECK: fabd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
-entry:
-  %0 = call float @llvm.aarch64.neon.vabd.f32(float %a, float %a)
-  ret float %0
-}
-
-define double @test_vabdd_f64(double %a, double %b) {
-; CHECK-LABEL: test_vabdd_f64
-; CHECK: fabd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-entry:
-  %0 = call double @llvm.aarch64.neon.vabd.f64(double %a, double %b)
-  ret double %0
-}
-
-declare double @llvm.aarch64.neon.vabd.f64(double, double)
-declare float @llvm.aarch64.neon.vabd.f32(float, float)
diff --git a/test/CodeGen/AArch64/neon-scalar-fcvt.ll b/test/CodeGen/AArch64/neon-scalar-fcvt.ll
deleted file mode 100644
index 6cf30a7..0000000
--- a/test/CodeGen/AArch64/neon-scalar-fcvt.ll
+++ /dev/null
@@ -1,233 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-
-;; Scalar Floating-point Convert
-
-define float @test_vcvtxn(double %a) {
-; CHECK: test_vcvtxn
-; CHECK: fcvtxn {{s[0-9]}}, {{d[0-9]}}
-entry:
-  %vcvtf = call float @llvm.aarch64.neon.fcvtxn(double %a)
-  ret float %vcvtf
-}
-
-declare float @llvm.aarch64.neon.fcvtxn(double)
-
-define i32 @test_vcvtass(float %a) {
-; CHECK: test_vcvtass
-; CHECK: fcvtas {{s[0-9]}}, {{s[0-9]}}
-entry:
-  %vcvtas1.i = call <1 x i32> @llvm.aarch64.neon.fcvtas.v1i32.f32(float %a)
-  %0 = extractelement <1 x i32> %vcvtas1.i, i32 0
-  ret i32 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.fcvtas.v1i32.f32(float)
-
-define i64 @test_test_vcvtasd(double %a) {
-; CHECK: test_test_vcvtasd
-; CHECK: fcvtas {{d[0-9]}}, {{d[0-9]}}
-entry:
-  %vcvtas1.i = call <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.f64(double %a)
-  %0 = extractelement <1 x i64> %vcvtas1.i, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.f64(double)
-
-define i32 @test_vcvtaus(float %a) {
-; CHECK: test_vcvtaus
-; CHECK: fcvtau {{s[0-9]}}, {{s[0-9]}}
-entry:
-  %vcvtau1.i = call <1 x i32> @llvm.aarch64.neon.fcvtau.v1i32.f32(float %a)
-  %0 = extractelement <1 x i32> %vcvtau1.i, i32 0
-  ret i32 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.fcvtau.v1i32.f32(float)
-
-define i64 @test_vcvtaud(double %a) {
-; CHECK: test_vcvtaud
-; CHECK: fcvtau {{d[0-9]}}, {{d[0-9]}}
-entry:
-  %vcvtau1.i = call <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.f64(double %a)
-  %0 = extractelement <1 x i64> %vcvtau1.i, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.f64(double) 
-
-define i32 @test_vcvtmss(float %a) {
-; CHECK: test_vcvtmss
-; CHECK: fcvtms {{s[0-9]}}, {{s[0-9]}}
-entry:
-  %vcvtms1.i = call <1 x i32> @llvm.aarch64.neon.fcvtms.v1i32.f32(float %a)
-  %0 = extractelement <1 x i32> %vcvtms1.i, i32 0
-  ret i32 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.fcvtms.v1i32.f32(float)
-
-define i64 @test_vcvtmd_s64_f64(double %a) {
-; CHECK: test_vcvtmd_s64_f64
-; CHECK: fcvtms {{d[0-9]}}, {{d[0-9]}}
-entry:
-  %vcvtms1.i = call <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.f64(double %a)
-  %0 = extractelement <1 x i64> %vcvtms1.i, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.f64(double)
-
-define i32 @test_vcvtmus(float %a) {
-; CHECK: test_vcvtmus
-; CHECK: fcvtmu {{s[0-9]}}, {{s[0-9]}}
-entry:
-  %vcvtmu1.i = call <1 x i32> @llvm.aarch64.neon.fcvtmu.v1i32.f32(float %a)
-  %0 = extractelement <1 x i32> %vcvtmu1.i, i32 0
-  ret i32 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.fcvtmu.v1i32.f32(float)
-
-define i64 @test_vcvtmud(double %a) {
-; CHECK: test_vcvtmud
-; CHECK: fcvtmu {{d[0-9]}}, {{d[0-9]}}
-entry:
-  %vcvtmu1.i = call <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.f64(double %a)
-  %0 = extractelement <1 x i64> %vcvtmu1.i, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.f64(double)
-
-define i32 @test_vcvtnss(float %a) {
-; CHECK: test_vcvtnss
-; CHECK: fcvtns {{s[0-9]}}, {{s[0-9]}}
-entry:
-  %vcvtns1.i = call <1 x i32> @llvm.aarch64.neon.fcvtns.v1i32.f32(float %a)
-  %0 = extractelement <1 x i32> %vcvtns1.i, i32 0
-  ret i32 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.fcvtns.v1i32.f32(float)
-
-define i64 @test_vcvtnd_s64_f64(double %a) {
-; CHECK: test_vcvtnd_s64_f64
-; CHECK: fcvtns {{d[0-9]}}, {{d[0-9]}}
-entry:
-  %vcvtns1.i = call <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.f64(double %a)
-  %0 = extractelement <1 x i64> %vcvtns1.i, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.f64(double)
-
-define i32 @test_vcvtnus(float %a) {
-; CHECK: test_vcvtnus
-; CHECK: fcvtnu {{s[0-9]}}, {{s[0-9]}}
-entry:
-  %vcvtnu1.i = call <1 x i32> @llvm.aarch64.neon.fcvtnu.v1i32.f32(float %a)
-  %0 = extractelement <1 x i32> %vcvtnu1.i, i32 0
-  ret i32 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.fcvtnu.v1i32.f32(float)
-
-define i64 @test_vcvtnud(double %a) {
-; CHECK: test_vcvtnud
-; CHECK: fcvtnu {{d[0-9]}}, {{d[0-9]}}
-entry:
-  %vcvtnu1.i = call <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.f64(double %a)
-  %0 = extractelement <1 x i64> %vcvtnu1.i, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.f64(double)
-
-define i32 @test_vcvtpss(float %a) {
-; CHECK: test_vcvtpss
-; CHECK: fcvtps {{s[0-9]}}, {{s[0-9]}}
-entry:
-  %vcvtps1.i = call <1 x i32> @llvm.aarch64.neon.fcvtps.v1i32.f32(float %a)
-  %0 = extractelement <1 x i32> %vcvtps1.i, i32 0
-  ret i32 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.fcvtps.v1i32.f32(float)
-
-define i64 @test_vcvtpd_s64_f64(double %a) {
-; CHECK: test_vcvtpd_s64_f64
-; CHECK: fcvtps {{d[0-9]}}, {{d[0-9]}}
-entry:
-  %vcvtps1.i = call <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.f64(double %a)
-  %0 = extractelement <1 x i64> %vcvtps1.i, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.f64(double)
-
-define i32 @test_vcvtpus(float %a) {
-; CHECK: test_vcvtpus
-; CHECK: fcvtpu {{s[0-9]}}, {{s[0-9]}}
-entry:
-  %vcvtpu1.i = call <1 x i32> @llvm.aarch64.neon.fcvtpu.v1i32.f32(float %a)
-  %0 = extractelement <1 x i32> %vcvtpu1.i, i32 0
-  ret i32 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.fcvtpu.v1i32.f32(float)
-
-define i64 @test_vcvtpud(double %a) {
-; CHECK: test_vcvtpud
-; CHECK: fcvtpu {{d[0-9]}}, {{d[0-9]}}
-entry:
-  %vcvtpu1.i = call <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.f64(double %a)
-  %0 = extractelement <1 x i64> %vcvtpu1.i, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.f64(double)
-
-define i32 @test_vcvtss(float %a) {
-; CHECK: test_vcvtss
-; CHECK: fcvtzs {{s[0-9]}}, {{s[0-9]}}
-entry:
-  %vcvtzs1.i = call <1 x i32> @llvm.aarch64.neon.fcvtzs.v1i32.f32(float %a)
-  %0 = extractelement <1 x i32> %vcvtzs1.i, i32 0
-  ret i32 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.fcvtzs.v1i32.f32(float)
-
-define i64 @test_vcvtd_s64_f64(double %a) {
-; CHECK: test_vcvtd_s64_f64
-; CHECK: fcvtzs {{d[0-9]}}, {{d[0-9]}}
-entry:
-  %vcvzs1.i = call <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.f64(double %a)
-  %0 = extractelement <1 x i64> %vcvzs1.i, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.f64(double)
-
-define i32 @test_vcvtus(float %a) {
-; CHECK: test_vcvtus
-; CHECK: fcvtzu {{s[0-9]}}, {{s[0-9]}}
-entry:
-  %vcvtzu1.i = call <1 x i32> @llvm.aarch64.neon.fcvtzu.v1i32.f32(float %a)
-  %0 = extractelement <1 x i32> %vcvtzu1.i, i32 0
-  ret i32 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.fcvtzu.v1i32.f32(float)
-
-define i64 @test_vcvtud(double %a) {
-; CHECK: test_vcvtud
-; CHECK: fcvtzu {{d[0-9]}}, {{d[0-9]}}
-entry:
-  %vcvtzu1.i = call <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.f64(double %a)
-  %0 = extractelement <1 x i64> %vcvtzu1.i, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.f64(double)
diff --git a/test/CodeGen/AArch64/neon-scalar-fp-compare.ll b/test/CodeGen/AArch64/neon-scalar-fp-compare.ll
deleted file mode 100644
index e0dce13..0000000
--- a/test/CodeGen/AArch64/neon-scalar-fp-compare.ll
+++ /dev/null
@@ -1,282 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-
-;; Scalar Floating-point Compare
-
-define i32 @test_vceqs_f32(float %a, float %b) {
-; CHECK-LABEL: test_vceqs_f32
-; CHECK: fcmeq {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}}
-entry:
-  %fceq2.i = call <1 x i32> @llvm.aarch64.neon.fceq.v1i32.f32.f32(float %a, float %b)
-  %0 = extractelement <1 x i32> %fceq2.i, i32 0
-  ret i32 %0
-}
-
-define i64 @test_vceqd_f64(double %a, double %b) {
-; CHECK-LABEL: test_vceqd_f64
-; CHECK: fcmeq {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-entry:
-  %fceq2.i = call <1 x i64> @llvm.aarch64.neon.fceq.v1i64.f64.f64(double %a, double %b)
-  %0 = extractelement <1 x i64> %fceq2.i, i32 0
-  ret i64 %0
-}
-
-define <1 x i64> @test_vceqz_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vceqz_f64
-; CHECK: fcmeq  {{d[0-9]+}}, {{d[0-9]+}}, #0.0
-entry:
-  %0 = fcmp oeq <1 x double> %a, zeroinitializer
-  %vceqz.i = sext <1 x i1> %0 to <1 x i64>
-  ret <1 x i64> %vceqz.i
-}
-
-define i32 @test_vceqzs_f32(float %a) {
-; CHECK-LABEL: test_vceqzs_f32
-; CHECK: fcmeq {{s[0-9]}}, {{s[0-9]}}, #0.0
-entry:
-  %fceq1.i = call <1 x i32> @llvm.aarch64.neon.fceq.v1i32.f32.f32(float %a, float 0.0)
-  %0 = extractelement <1 x i32> %fceq1.i, i32 0
-  ret i32 %0
-}
-
-define i64 @test_vceqzd_f64(double %a) {
-; CHECK-LABEL: test_vceqzd_f64
-; CHECK: fcmeq {{d[0-9]}}, {{d[0-9]}}, #0.0
-entry:
-  %fceq1.i = call <1 x i64> @llvm.aarch64.neon.fceq.v1i64.f64.f32(double %a, float 0.0)
-  %0 = extractelement <1 x i64> %fceq1.i, i32 0
-  ret i64 %0
-}
-
-define i32 @test_vcges_f32(float %a, float %b) {
-; CHECK-LABEL: test_vcges_f32
-; CHECK: fcmge {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}}
-entry:
-  %fcge2.i = call <1 x i32> @llvm.aarch64.neon.fcge.v1i32.f32.f32(float %a, float %b)
-  %0 = extractelement <1 x i32> %fcge2.i, i32 0
-  ret i32 %0
-}
-
-define i64 @test_vcged_f64(double %a, double %b) {
-; CHECK-LABEL: test_vcged_f64
-; CHECK: fcmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-entry:
-  %fcge2.i = call <1 x i64> @llvm.aarch64.neon.fcge.v1i64.f64.f64(double %a, double %b)
-  %0 = extractelement <1 x i64> %fcge2.i, i32 0
-  ret i64 %0
-}
-
-define i32 @test_vcgezs_f32(float %a) {
-; CHECK-LABEL: test_vcgezs_f32
-; CHECK: fcmge {{s[0-9]}}, {{s[0-9]}}, #0.0
-entry:
-  %fcge1.i = call <1 x i32> @llvm.aarch64.neon.fcge.v1i32.f32.f32(float %a, float 0.0)
-  %0 = extractelement <1 x i32> %fcge1.i, i32 0
-  ret i32 %0
-}
-
-define i64 @test_vcgezd_f64(double %a) {
-; CHECK-LABEL: test_vcgezd_f64
-; CHECK: fcmge {{d[0-9]}}, {{d[0-9]}}, #0.0
-entry:
-  %fcge1.i = call <1 x i64> @llvm.aarch64.neon.fcge.v1i64.f64.f32(double %a, float 0.0)
-  %0 = extractelement <1 x i64> %fcge1.i, i32 0
-  ret i64 %0
-}
-
-define i32 @test_vcgts_f32(float %a, float %b) {
-; CHECK-LABEL: test_vcgts_f32
-; CHECK: fcmgt {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}}
-entry:
-  %fcgt2.i = call <1 x i32> @llvm.aarch64.neon.fcgt.v1i32.f32.f32(float %a, float %b)
-  %0 = extractelement <1 x i32> %fcgt2.i, i32 0
-  ret i32 %0
-}
-
-define i64 @test_vcgtd_f64(double %a, double %b) {
-; CHECK-LABEL: test_vcgtd_f64
-; CHECK: fcmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-entry:
-  %fcgt2.i = call <1 x i64> @llvm.aarch64.neon.fcgt.v1i64.f64.f64(double %a, double %b)
-  %0 = extractelement <1 x i64> %fcgt2.i, i32 0
-  ret i64 %0
-}
-
-define i32 @test_vcgtzs_f32(float %a) {
-; CHECK-LABEL: test_vcgtzs_f32
-; CHECK: fcmgt {{s[0-9]}}, {{s[0-9]}}, #0.0
-entry:
-  %fcgt1.i = call <1 x i32> @llvm.aarch64.neon.fcgt.v1i32.f32.f32(float %a, float 0.0)
-  %0 = extractelement <1 x i32> %fcgt1.i, i32 0
-  ret i32 %0
-}
-
-define i64 @test_vcgtzd_f64(double %a) {
-; CHECK-LABEL: test_vcgtzd_f64
-; CHECK: fcmgt {{d[0-9]}}, {{d[0-9]}}, #0.0
-entry:
-  %fcgt1.i = call <1 x i64> @llvm.aarch64.neon.fcgt.v1i64.f64.f32(double %a, float 0.0)
-  %0 = extractelement <1 x i64> %fcgt1.i, i32 0
-  ret i64 %0
-}
-
-define i32 @test_vcles_f32(float %a, float %b) {
-; CHECK-LABEL: test_vcles_f32
-; CHECK: fcmge {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}}
-entry:
-  %fcge2.i = call <1 x i32> @llvm.aarch64.neon.fcge.v1i32.f32.f32(float %a, float %b)
-  %0 = extractelement <1 x i32> %fcge2.i, i32 0
-  ret i32 %0
-}
-
-define i64 @test_vcled_f64(double %a, double %b) {
-; CHECK-LABEL: test_vcled_f64
-; CHECK: fcmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-entry:
-  %fcge2.i = call <1 x i64> @llvm.aarch64.neon.fcge.v1i64.f64.f64(double %a, double %b)
-  %0 = extractelement <1 x i64> %fcge2.i, i32 0
-  ret i64 %0
-}
-
-define i32 @test_vclezs_f32(float %a) {
-; CHECK-LABEL: test_vclezs_f32
-; CHECK: fcmle {{s[0-9]}}, {{s[0-9]}}, #0.0
-entry:
-  %fcle1.i = call <1 x i32> @llvm.aarch64.neon.fclez.v1i32.f32.f32(float %a, float 0.0)
-  %0 = extractelement <1 x i32> %fcle1.i, i32 0
-  ret i32 %0
-}
-
-define i64 @test_vclezd_f64(double %a) {
-; CHECK-LABEL: test_vclezd_f64
-; CHECK: fcmle {{d[0-9]}}, {{d[0-9]}}, #0.0
-entry:
-  %fcle1.i = call <1 x i64> @llvm.aarch64.neon.fclez.v1i64.f64.f32(double %a, float 0.0)
-  %0 = extractelement <1 x i64> %fcle1.i, i32 0
-  ret i64 %0
-}
-
-define i32 @test_vclts_f32(float %a, float %b) {
-; CHECK-LABEL: test_vclts_f32
-; CHECK: fcmgt {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}}
-entry:
-  %fcgt2.i = call <1 x i32> @llvm.aarch64.neon.fcgt.v1i32.f32.f32(float %a, float %b)
-  %0 = extractelement <1 x i32> %fcgt2.i, i32 0
-  ret i32 %0
-}
-
-define i64 @test_vcltd_f64(double %a, double %b) {
-; CHECK-LABEL: test_vcltd_f64
-; CHECK: fcmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-entry:
-  %fcgt2.i = call <1 x i64> @llvm.aarch64.neon.fcgt.v1i64.f64.f64(double %a, double %b)
-  %0 = extractelement <1 x i64> %fcgt2.i, i32 0
-  ret i64 %0
-}
-
-define i32 @test_vcltzs_f32(float %a) {
-; CHECK-LABEL: test_vcltzs_f32
-; CHECK: fcmlt {{s[0-9]}}, {{s[0-9]}}, #0.0
-entry:
-  %fclt1.i = call <1 x i32> @llvm.aarch64.neon.fcltz.v1i32.f32.f32(float %a, float 0.0)
-  %0 = extractelement <1 x i32> %fclt1.i, i32 0
-  ret i32 %0
-}
-
-define i64 @test_vcltzd_f64(double %a) {
-; CHECK-LABEL: test_vcltzd_f64
-; CHECK: fcmlt {{d[0-9]}}, {{d[0-9]}}, #0.0
-entry:
-  %fclt1.i = call <1 x i64> @llvm.aarch64.neon.fcltz.v1i64.f64.f32(double %a, float 0.0)
-  %0 = extractelement <1 x i64> %fclt1.i, i32 0
-  ret i64 %0
-}
-
-define i32 @test_vcages_f32(float %a, float %b) {
-; CHECK-LABEL: test_vcages_f32
-; CHECK: facge {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}}
-entry:
-  %fcage2.i = call <1 x i32> @llvm.aarch64.neon.fcage.v1i32.f32.f32(float %a, float %b)
-  %0 = extractelement <1 x i32> %fcage2.i, i32 0
-  ret i32 %0
-}
-
-define i64 @test_vcaged_f64(double %a, double %b) {
-; CHECK-LABEL: test_vcaged_f64
-; CHECK: facge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-entry:
-  %fcage2.i = call <1 x i64> @llvm.aarch64.neon.fcage.v1i64.f64.f64(double %a, double %b)
-  %0 = extractelement <1 x i64> %fcage2.i, i32 0
-  ret i64 %0
-}
-
-define i32 @test_vcagts_f32(float %a, float %b) {
-; CHECK-LABEL: test_vcagts_f32
-; CHECK: facgt {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}}
-entry:
-  %fcagt2.i = call <1 x i32> @llvm.aarch64.neon.fcagt.v1i32.f32.f32(float %a, float %b)
-  %0 = extractelement <1 x i32> %fcagt2.i, i32 0
-  ret i32 %0
-}
-
-define i64 @test_vcagtd_f64(double %a, double %b) {
-; CHECK-LABEL: test_vcagtd_f64
-; CHECK: facgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-entry:
-  %fcagt2.i = call <1 x i64> @llvm.aarch64.neon.fcagt.v1i64.f64.f64(double %a, double %b)
-  %0 = extractelement <1 x i64> %fcagt2.i, i32 0
-  ret i64 %0
-}
-
-define i32 @test_vcales_f32(float %a, float %b) {
-; CHECK-LABEL: test_vcales_f32
-; CHECK: facge {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}}
-entry:
-  %fcage2.i = call <1 x i32> @llvm.aarch64.neon.fcage.v1i32.f32.f32(float %a, float %b)
-  %0 = extractelement <1 x i32> %fcage2.i, i32 0
-  ret i32 %0
-}
-
-define i64 @test_vcaled_f64(double %a, double %b) {
-; CHECK-LABEL: test_vcaled_f64
-; CHECK: facge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-entry:
-  %fcage2.i = call <1 x i64> @llvm.aarch64.neon.fcage.v1i64.f64.f64(double %a, double %b)
-  %0 = extractelement <1 x i64> %fcage2.i, i32 0
-  ret i64 %0
-}
-
-define i32 @test_vcalts_f32(float %a, float %b) {
-; CHECK-LABEL: test_vcalts_f32
-; CHECK: facgt {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}}
-entry:
-  %fcalt2.i = call <1 x i32> @llvm.aarch64.neon.fcagt.v1i32.f32.f32(float %a, float %b)
-  %0 = extractelement <1 x i32> %fcalt2.i, i32 0
-  ret i32 %0
-}
-
-define i64 @test_vcaltd_f64(double %a, double %b) {
-; CHECK-LABEL: test_vcaltd_f64
-; CHECK: facgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}}
-entry:
-  %fcalt2.i = call <1 x i64> @llvm.aarch64.neon.fcagt.v1i64.f64.f64(double %a, double %b)
-  %0 = extractelement <1 x i64> %fcalt2.i, i32 0
-  ret i64 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.fceq.v1i32.f32.f32(float, float)
-declare <1 x i64> @llvm.aarch64.neon.fceq.v1i64.f64.f32(double, float)
-declare <1 x i64> @llvm.aarch64.neon.fceq.v1i64.f64.f64(double, double)
-declare <1 x i32> @llvm.aarch64.neon.fcge.v1i32.f32.f32(float, float)
-declare <1 x i64> @llvm.aarch64.neon.fcge.v1i64.f64.f32(double, float)
-declare <1 x i64> @llvm.aarch64.neon.fcge.v1i64.f64.f64(double, double)
-declare <1 x i32> @llvm.aarch64.neon.fclez.v1i32.f32.f32(float, float)
-declare <1 x i64> @llvm.aarch64.neon.fclez.v1i64.f64.f32(double, float)
-declare <1 x i32> @llvm.aarch64.neon.fcgt.v1i32.f32.f32(float, float)
-declare <1 x i64> @llvm.aarch64.neon.fcgt.v1i64.f64.f32(double, float)
-declare <1 x i64> @llvm.aarch64.neon.fcgt.v1i64.f64.f64(double, double)
-declare <1 x i32> @llvm.aarch64.neon.fcltz.v1i32.f32.f32(float, float)
-declare <1 x i64> @llvm.aarch64.neon.fcltz.v1i64.f64.f32(double, float)
-declare <1 x i32> @llvm.aarch64.neon.fcage.v1i32.f32.f32(float, float)
-declare <1 x i64> @llvm.aarch64.neon.fcage.v1i64.f64.f64(double, double)
-declare <1 x i32> @llvm.aarch64.neon.fcagt.v1i32.f32.f32(float, float)
-declare <1 x i64> @llvm.aarch64.neon.fcagt.v1i64.f64.f64(double, double)
diff --git a/test/CodeGen/AArch64/neon-scalar-mul.ll b/test/CodeGen/AArch64/neon-scalar-mul.ll
deleted file mode 100644
index 991037f..0000000
--- a/test/CodeGen/AArch64/neon-scalar-mul.ll
+++ /dev/null
@@ -1,143 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-
-define i16 @test_vqdmulhh_s16(i16 %a, i16 %b) {
-; CHECK: test_vqdmulhh_s16
-; CHECK: sqdmulh {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
-  %1 = insertelement <1 x i16> undef, i16 %a, i32 0
-  %2 = insertelement <1 x i16> undef, i16 %b, i32 0
-  %3 = call <1 x i16> @llvm.arm.neon.vqdmulh.v1i16(<1 x i16> %1, <1 x i16> %2)
-  %4 = extractelement <1 x i16> %3, i32 0
-  ret i16 %4
-}
-
-define i32 @test_vqdmulhs_s32(i32 %a, i32 %b) {
-; CHECK: test_vqdmulhs_s32
-; CHECK: sqdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
-  %1 = insertelement <1 x i32> undef, i32 %a, i32 0
-  %2 = insertelement <1 x i32> undef, i32 %b, i32 0
-  %3 = call <1 x i32> @llvm.arm.neon.vqdmulh.v1i32(<1 x i32> %1, <1 x i32> %2)
-  %4 = extractelement <1 x i32> %3, i32 0
-  ret i32 %4
-}
-
-declare <1 x i16> @llvm.arm.neon.vqdmulh.v1i16(<1 x i16>, <1 x i16>)
-declare <1 x i32> @llvm.arm.neon.vqdmulh.v1i32(<1 x i32>, <1 x i32>)
-
-define i16 @test_vqrdmulhh_s16(i16 %a, i16 %b) {
-; CHECK: test_vqrdmulhh_s16
-; CHECK: sqrdmulh {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
-  %1 = insertelement <1 x i16> undef, i16 %a, i32 0
-  %2 = insertelement <1 x i16> undef, i16 %b, i32 0
-  %3 = call <1 x i16> @llvm.arm.neon.vqrdmulh.v1i16(<1 x i16> %1, <1 x i16> %2)
-  %4 = extractelement <1 x i16> %3, i32 0
-  ret i16 %4
-}
-
-define i32 @test_vqrdmulhs_s32(i32 %a, i32 %b) {
-; CHECK: test_vqrdmulhs_s32
-; CHECK: sqrdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
-  %1 = insertelement <1 x i32> undef, i32 %a, i32 0
-  %2 = insertelement <1 x i32> undef, i32 %b, i32 0
-  %3 = call <1 x i32> @llvm.arm.neon.vqrdmulh.v1i32(<1 x i32> %1, <1 x i32> %2)
-  %4 = extractelement <1 x i32> %3, i32 0
-  ret i32 %4
-}
-
-declare <1 x i16> @llvm.arm.neon.vqrdmulh.v1i16(<1 x i16>, <1 x i16>)
-declare <1 x i32> @llvm.arm.neon.vqrdmulh.v1i32(<1 x i32>, <1 x i32>)
-
-define float @test_vmulxs_f32(float %a, float %b) {
-; CHECK: test_vmulxs_f32
-; CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
-  %1 = call float @llvm.aarch64.neon.vmulx.f32(float %a, float %b)
-  ret float %1
-}
-
-define double @test_vmulxd_f64(double %a, double %b) {
-; CHECK: test_vmulxd_f64
-; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  %1 = call double @llvm.aarch64.neon.vmulx.f64(double %a, double %b)
-  ret double %1
-}
-
-declare float @llvm.aarch64.neon.vmulx.f32(float, float)
-declare double @llvm.aarch64.neon.vmulx.f64(double, double)
-
-define i32 @test_vqdmlalh_s16(i32 %a, i16 %b, i16 %c) {
-; CHECK: test_vqdmlalh_s16
-; CHECK: sqdmlal {{s[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
-entry:
-  %vqdmlal.i = insertelement <1 x i32> undef, i32 %a, i32 0
-  %vqdmlal1.i = insertelement <1 x i16> undef, i16 %b, i32 0
-  %vqdmlal2.i = insertelement <1 x i16> undef, i16 %c, i32 0
-  %vqdmlal3.i = call <1 x i32> @llvm.aarch64.neon.vqdmlal.v1i32(<1 x i32> %vqdmlal.i, <1 x i16> %vqdmlal1.i, <1 x i16> %vqdmlal2.i)
-  %0 = extractelement <1 x i32> %vqdmlal3.i, i32 0
-  ret i32 %0
-}
-
-define i64 @test_vqdmlals_s32(i64 %a, i32 %b, i32 %c) {
-; CHECK: test_vqdmlals_s32
-; CHECK: sqdmlal {{d[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
-entry:
-  %vqdmlal.i = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vqdmlal1.i = insertelement <1 x i32> undef, i32 %b, i32 0
-  %vqdmlal2.i = insertelement <1 x i32> undef, i32 %c, i32 0
-  %vqdmlal3.i = call <1 x i64> @llvm.aarch64.neon.vqdmlal.v1i64(<1 x i64> %vqdmlal.i, <1 x i32> %vqdmlal1.i, <1 x i32> %vqdmlal2.i)
-  %0 = extractelement <1 x i64> %vqdmlal3.i, i32 0
-  ret i64 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.vqdmlal.v1i32(<1 x i32>, <1 x i16>, <1 x i16>)
-declare <1 x i64> @llvm.aarch64.neon.vqdmlal.v1i64(<1 x i64>, <1 x i32>, <1 x i32>)
-
-define i32 @test_vqdmlslh_s16(i32 %a, i16 %b, i16 %c) {
-; CHECK: test_vqdmlslh_s16
-; CHECK: sqdmlsl {{s[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
-entry:
-  %vqdmlsl.i = insertelement <1 x i32> undef, i32 %a, i32 0
-  %vqdmlsl1.i = insertelement <1 x i16> undef, i16 %b, i32 0
-  %vqdmlsl2.i = insertelement <1 x i16> undef, i16 %c, i32 0
-  %vqdmlsl3.i = call <1 x i32> @llvm.aarch64.neon.vqdmlsl.v1i32(<1 x i32> %vqdmlsl.i, <1 x i16> %vqdmlsl1.i, <1 x i16> %vqdmlsl2.i)
-  %0 = extractelement <1 x i32> %vqdmlsl3.i, i32 0
-  ret i32 %0
-}
-
-define i64 @test_vqdmlsls_s32(i64 %a, i32 %b, i32 %c) {
-; CHECK: test_vqdmlsls_s32
-; CHECK: sqdmlsl {{d[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
-entry:
-  %vqdmlsl.i = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vqdmlsl1.i = insertelement <1 x i32> undef, i32 %b, i32 0
-  %vqdmlsl2.i = insertelement <1 x i32> undef, i32 %c, i32 0
-  %vqdmlsl3.i = call <1 x i64> @llvm.aarch64.neon.vqdmlsl.v1i64(<1 x i64> %vqdmlsl.i, <1 x i32> %vqdmlsl1.i, <1 x i32> %vqdmlsl2.i)
-  %0 = extractelement <1 x i64> %vqdmlsl3.i, i32 0
-  ret i64 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.vqdmlsl.v1i32(<1 x i32>, <1 x i16>, <1 x i16>)
-declare <1 x i64> @llvm.aarch64.neon.vqdmlsl.v1i64(<1 x i64>, <1 x i32>, <1 x i32>)
-
-define i32 @test_vqdmullh_s16(i16 %a, i16 %b) {
-; CHECK: test_vqdmullh_s16
-; CHECK: sqdmull {{s[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
-entry:
-  %vqdmull.i = insertelement <1 x i16> undef, i16 %a, i32 0
-  %vqdmull1.i = insertelement <1 x i16> undef, i16 %b, i32 0
-  %vqdmull2.i = call <1 x i32> @llvm.arm.neon.vqdmull.v1i32(<1 x i16> %vqdmull.i, <1 x i16> %vqdmull1.i)
-  %0 = extractelement <1 x i32> %vqdmull2.i, i32 0
-  ret i32 %0
-}
-
-define i64 @test_vqdmulls_s32(i32 %a, i32 %b) {
-; CHECK: test_vqdmulls_s32
-; CHECK: sqdmull {{d[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
-entry:
-  %vqdmull.i = insertelement <1 x i32> undef, i32 %a, i32 0
-  %vqdmull1.i = insertelement <1 x i32> undef, i32 %b, i32 0
-  %vqdmull2.i = call <1 x i64> @llvm.arm.neon.vqdmull.v1i64(<1 x i32> %vqdmull.i, <1 x i32> %vqdmull1.i)
-  %0 = extractelement <1 x i64> %vqdmull2.i, i32 0
-  ret i64 %0
-}
-
-declare <1 x i32> @llvm.arm.neon.vqdmull.v1i32(<1 x i16>, <1 x i16>)
-declare <1 x i64> @llvm.arm.neon.vqdmull.v1i64(<1 x i32>, <1 x i32>)
diff --git a/test/CodeGen/AArch64/neon-scalar-neg.ll b/test/CodeGen/AArch64/neon-scalar-neg.ll
deleted file mode 100644
index 4dc9d51..0000000
--- a/test/CodeGen/AArch64/neon-scalar-neg.ll
+++ /dev/null
@@ -1,61 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
-
-define i64 @test_vnegd_s64(i64 %a) {
-; CHECK: test_vnegd_s64
-; CHECK: neg {{d[0-9]+}}, {{d[0-9]+}}
-entry:
-  %vneg.i = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vneg1.i = tail call <1 x i64> @llvm.aarch64.neon.vneg(<1 x i64> %vneg.i)
-  %0 = extractelement <1 x i64> %vneg1.i, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vneg(<1 x i64>)
-
-define i8 @test_vqnegb_s8(i8 %a) {
-; CHECK: test_vqnegb_s8
-; CHECK: sqneg {{b[0-9]+}}, {{b[0-9]+}}
-entry:
-  %vqneg.i = insertelement <1 x i8> undef, i8 %a, i32 0
-  %vqneg1.i = call <1 x i8> @llvm.arm.neon.vqneg.v1i8(<1 x i8> %vqneg.i)
-  %0 = extractelement <1 x i8> %vqneg1.i, i32 0
-  ret i8 %0
-}
-
-declare <1 x i8> @llvm.arm.neon.vqneg.v1i8(<1 x i8>)
-
-define i16 @test_vqnegh_s16(i16 %a) {
-; CHECK: test_vqnegh_s16
-; CHECK: sqneg {{h[0-9]+}}, {{h[0-9]+}}
-entry:
-  %vqneg.i = insertelement <1 x i16> undef, i16 %a, i32 0
-  %vqneg1.i = call <1 x i16> @llvm.arm.neon.vqneg.v1i16(<1 x i16> %vqneg.i)
-  %0 = extractelement <1 x i16> %vqneg1.i, i32 0
-  ret i16 %0
-}
-
-declare <1 x i16> @llvm.arm.neon.vqneg.v1i16(<1 x i16>)
-
-define i32 @test_vqnegs_s32(i32 %a) {
-; CHECK: test_vqnegs_s32
-; CHECK: sqneg {{s[0-9]+}}, {{s[0-9]+}}
-entry:
-  %vqneg.i = insertelement <1 x i32> undef, i32 %a, i32 0
-  %vqneg1.i = call <1 x i32> @llvm.arm.neon.vqneg.v1i32(<1 x i32> %vqneg.i)
-  %0 = extractelement <1 x i32> %vqneg1.i, i32 0
-  ret i32 %0
-}
-
-declare <1 x i32> @llvm.arm.neon.vqneg.v1i32(<1 x i32>)
-
-define i64 @test_vqnegd_s64(i64 %a) {
-; CHECK: test_vqnegd_s64
-; CHECK: sqneg {{d[0-9]+}}, {{d[0-9]+}}
-entry:
-  %vqneg.i = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vqneg1.i = call <1 x i64> @llvm.arm.neon.vqneg.v1i64(<1 x i64> %vqneg.i)
-  %0 = extractelement <1 x i64> %vqneg1.i, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.arm.neon.vqneg.v1i64(<1 x i64>)
-\ No newline at end of file
diff --git a/test/CodeGen/AArch64/neon-scalar-recip.ll b/test/CodeGen/AArch64/neon-scalar-recip.ll
deleted file mode 100644
index 100839b..0000000
--- a/test/CodeGen/AArch64/neon-scalar-recip.ll
+++ /dev/null
@@ -1,92 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-
-define float @test_vrecpss_f32(float %a, float %b) {
-; CHECK: test_vrecpss_f32
-; CHECK: frecps {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
-  %1 = call float @llvm.aarch64.neon.vrecps.f32(float %a, float %b)
-  ret float %1
-}
-
-define double @test_vrecpsd_f64(double %a, double %b) {
-; CHECK: test_vrecpsd_f64
-; CHECK: frecps {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  %1 = call double @llvm.aarch64.neon.vrecps.f64(double %a, double %b)
-  ret double %1
-}
-
-declare float @llvm.aarch64.neon.vrecps.f32(float, float)
-declare double @llvm.aarch64.neon.vrecps.f64(double, double)
-
-define float @test_vrsqrtss_f32(float %a, float %b) {
-; CHECK: test_vrsqrtss_f32
-; CHECK: frsqrts {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
-  %1 = call float @llvm.aarch64.neon.vrsqrts.f32(float %a, float %b)
-  ret float %1
-}
-
-define double @test_vrsqrtsd_f64(double %a, double %b) {
-; CHECK: test_vrsqrtsd_f64
-; CHECK: frsqrts {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  %1 = call double @llvm.aarch64.neon.vrsqrts.f64(double %a, double %b)
-  ret double %1
-}
-
-declare float @llvm.aarch64.neon.vrsqrts.f32(float, float)
-declare double @llvm.aarch64.neon.vrsqrts.f64(double, double)
-
-define float @test_vrecpes_f32(float %a) {
-; CHECK: test_vrecpes_f32
-; CHECK: frecpe {{s[0-9]+}}, {{s[0-9]+}}
-entry:
-  %0 = call float @llvm.aarch64.neon.vrecpe.f32(float %a)
-  ret float %0
-}
-
-define double @test_vrecped_f64(double %a) {
-; CHECK: test_vrecped_f64
-; CHECK: frecpe {{d[0-9]+}}, {{d[0-9]+}}
-entry:
-  %0 = call double @llvm.aarch64.neon.vrecpe.f64(double %a)
-  ret double %0
-}
-
-declare float @llvm.aarch64.neon.vrecpe.f32(float)
-declare double @llvm.aarch64.neon.vrecpe.f64(double)
-
-define float @test_vrecpxs_f32(float %a) {
-; CHECK: test_vrecpxs_f32
-; CHECK: frecpx {{s[0-9]+}}, {{s[0-9]+}}
-entry:
-  %0 = call float @llvm.aarch64.neon.vrecpx.f32(float %a)
-  ret float %0
-}
-
-define double @test_vrecpxd_f64(double %a) {
-; CHECK: test_vrecpxd_f64
-; CHECK: frecpx {{d[0-9]+}}, {{d[0-9]+}}
-entry:
-  %0 = call double @llvm.aarch64.neon.vrecpx.f64(double %a)
-  ret double %0
-}
-
-declare float @llvm.aarch64.neon.vrecpx.f32(float)
-declare double @llvm.aarch64.neon.vrecpx.f64(double)
-
-define float @test_vrsqrtes_f32(float %a) {
-; CHECK: test_vrsqrtes_f32
-; CHECK: frsqrte {{s[0-9]+}}, {{s[0-9]+}}
-entry:
-  %0 = call float @llvm.aarch64.neon.vrsqrte.f32(float %a)
-  ret float %0
-}
-
-define double @test_vrsqrted_f64(double %a) {
-; CHECK: test_vrsqrted_f64
-; CHECK: frsqrte {{d[0-9]+}}, {{d[0-9]+}}
-entry:
-  %0 = call double @llvm.aarch64.neon.vrsqrte.f64(double %a)
-  ret double %0
-}
-
-declare float @llvm.aarch64.neon.vrsqrte.f32(float)
-declare double @llvm.aarch64.neon.vrsqrte.f64(double)
diff --git a/test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll b/test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll
deleted file mode 100644
index 33ce5cf..0000000
--- a/test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll
+++ /dev/null
@@ -1,215 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-
-declare <1 x i64> @llvm.aarch64.neon.vpadd(<2 x i64>)
-
-define <1 x i64> @test_addp_v1i64(<2 x i64> %a) {
-; CHECK: test_addp_v1i64:
-; CHECK: addp {{d[0-9]+}}, {{v[0-9]+}}.2d
-  %val = call <1 x i64> @llvm.aarch64.neon.vpadd(<2 x i64> %a)
-  ret <1 x i64> %val
-}
-
-declare float @llvm.aarch64.neon.vpfadd.f32.v2f32(<2 x float>)
-
-define float @test_faddp_f32(<2 x float> %a) {
-; CHECK: test_faddp_f32:
-; CHECK: faddp {{s[0-9]+}}, {{v[0-9]+}}.2s
-  %val = call float @llvm.aarch64.neon.vpfadd.f32.v2f32(<2 x float> %a)
-  ret float %val
-}
-
-declare double @llvm.aarch64.neon.vpfadd.f64.v2f64(<2 x double>)
-
-define double @test_faddp_f64(<2 x double> %a) {
-; CHECK: test_faddp_f64:
-; CHECK: faddp {{d[0-9]+}}, {{v[0-9]+}}.2d
-  %val = call double @llvm.aarch64.neon.vpfadd.f64.v2f64(<2 x double> %a)
-  ret double %val
-}
-
-
-declare float @llvm.aarch64.neon.vpmax.f32.v2f32(<2 x float>)
-
-define float @test_fmaxp_f32(<2 x float> %a) {
-; CHECK: test_fmaxp_f32:
-; CHECK: fmaxp {{s[0-9]+}}, {{v[0-9]+}}.2s
-  %val = call float @llvm.aarch64.neon.vpmax.f32.v2f32(<2 x float> %a)
-  ret float %val
-}
-
-declare double @llvm.aarch64.neon.vpmax.f64.v2f64(<2 x double>)
-
-define double @test_fmaxp_f64(<2 x double> %a) {
-; CHECK: test_fmaxp_f64:
-; CHECK: fmaxp {{d[0-9]+}}, {{v[0-9]+}}.2d
-  %val = call double @llvm.aarch64.neon.vpmax.f64.v2f64(<2 x double> %a)
-  ret double %val
-}
-
-declare float @llvm.aarch64.neon.vpmin.f32.v2f32(<2 x float>)
-
-define float @test_fminp_f32(<2 x float> %a) {
-; CHECK: test_fminp_f32:
-; CHECK: fminp {{s[0-9]+}}, {{v[0-9]+}}.2s
-  %val = call float @llvm.aarch64.neon.vpmin.f32.v2f32(<2 x float> %a)
-  ret float %val
-}
-
-declare double @llvm.aarch64.neon.vpmin.f64.v2f64(<2 x double>)
-
-define double @test_fminp_f64(<2 x double> %a) {
-; CHECK: test_fminp_f64:
-; CHECK: fminp {{d[0-9]+}}, {{v[0-9]+}}.2d
-  %val = call double @llvm.aarch64.neon.vpmin.f64.v2f64(<2 x double> %a)
-  ret double %val
-}
-
-declare float @llvm.aarch64.neon.vpfmaxnm.f32.v2f32(<2 x float>)
-
-define float @test_fmaxnmp_f32(<2 x float> %a) {
-; CHECK: test_fmaxnmp_f32:
-; CHECK: fmaxnmp {{s[0-9]+}}, {{v[0-9]+}}.2s
-  %val = call float @llvm.aarch64.neon.vpfmaxnm.f32.v2f32(<2 x float> %a)
-  ret float %val
-}
-
-declare double @llvm.aarch64.neon.vpfmaxnm.f64.v2f64(<2 x double>)
-
-define double @test_fmaxnmp_f64(<2 x double> %a) {
-; CHECK: test_fmaxnmp_f64:
-; CHECK: fmaxnmp {{d[0-9]+}}, {{v[0-9]+}}.2d
-  %val = call double @llvm.aarch64.neon.vpfmaxnm.f64.v2f64(<2 x double> %a)
-  ret double %val
-}
-
-declare float @llvm.aarch64.neon.vpfminnm.f32.v2f32(<2 x float>)
-
-define float @test_fminnmp_f32(<2 x float> %a) {
-; CHECK: test_fminnmp_f32:
-; CHECK: fminnmp {{s[0-9]+}}, {{v[0-9]+}}.2s
-  %val = call float @llvm.aarch64.neon.vpfminnm.f32.v2f32(<2 x float> %a)
-  ret float %val
-}
-
-declare double @llvm.aarch64.neon.vpfminnm.f64.v2f64(<2 x double>)
-
-define double @test_fminnmp_f64(<2 x double> %a) {
-; CHECK: test_fminnmp_f64:
-; CHECK: fminnmp {{d[0-9]+}}, {{v[0-9]+}}.2d
-  %val = call double @llvm.aarch64.neon.vpfminnm.f64.v2f64(<2 x double> %a)
-  ret double %val
-}
-
-define float @test_vaddv_f32(<2 x float> %a) {
-; CHECK-LABEL: test_vaddv_f32
-; CHECK: faddp {{s[0-9]+}}, {{v[0-9]+}}.2s
-  %1 = call float @llvm.aarch64.neon.vpfadd.f32.v2f32(<2 x float> %a)
-  ret float %1
-}
-
-define float @test_vaddvq_f32(<4 x float> %a) {
-; CHECK-LABEL: test_vaddvq_f32
-; CHECK: faddp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-; CHECK: faddp {{s[0-9]+}}, {{v[0-9]+}}.2s
-  %1 = call float @llvm.aarch64.neon.vpfadd.f32.v4f32(<4 x float> %a)
-  ret float %1
-}
-
-define double @test_vaddvq_f64(<2 x double> %a) {
-; CHECK-LABEL: test_vaddvq_f64
-; CHECK: faddp {{d[0-9]+}}, {{v[0-9]+}}.2d
-  %1 = call double @llvm.aarch64.neon.vpfadd.f64.v2f64(<2 x double> %a)
-  ret double %1
-}
-
-define float @test_vmaxv_f32(<2 x float> %a) {
-; CHECK-LABEL: test_vmaxv_f32
-; CHECK: fmaxp {{s[0-9]+}}, {{v[0-9]+}}.2s
-  %1 = call float @llvm.aarch64.neon.vpmax.f32.v2f32(<2 x float> %a)
-  ret float %1
-}
-
-define double @test_vmaxvq_f64(<2 x double> %a) {
-; CHECK-LABEL: test_vmaxvq_f64
-; CHECK: fmaxp {{d[0-9]+}}, {{v[0-9]+}}.2d
-  %1 = call double @llvm.aarch64.neon.vpmax.f64.v2f64(<2 x double> %a)
-  ret double %1
-}
-
-define float @test_vminv_f32(<2 x float> %a) {
-; CHECK-LABEL: test_vminv_f32
-; CHECK: fminp {{s[0-9]+}}, {{v[0-9]+}}.2s
-  %1 = call float @llvm.aarch64.neon.vpmin.f32.v2f32(<2 x float> %a)
-  ret float %1
-}
-
-define double @test_vminvq_f64(<2 x double> %a) {
-; CHECK-LABEL: test_vminvq_f64
-; CHECK: fminp {{d[0-9]+}}, {{v[0-9]+}}.2d
-  %1 = call double @llvm.aarch64.neon.vpmin.f64.v2f64(<2 x double> %a)
-  ret double %1
-}
-
-define double @test_vmaxnmvq_f64(<2 x double> %a) {
-; CHECK-LABEL: test_vmaxnmvq_f64
-; CHECK: fmaxnmp {{d[0-9]+}}, {{v[0-9]+}}.2d
-  %1 = call double @llvm.aarch64.neon.vpfmaxnm.f64.v2f64(<2 x double> %a)
-  ret double %1
-}
-
-define float @test_vmaxnmv_f32(<2 x float> %a) {
-; CHECK-LABEL: test_vmaxnmv_f32
-; CHECK: fmaxnmp {{s[0-9]+}}, {{v[0-9]+}}.2s
-  %1 = call float @llvm.aarch64.neon.vpfmaxnm.f32.v2f32(<2 x float> %a)
-  ret float %1
-}
-
-define double @test_vminnmvq_f64(<2 x double> %a) {
-; CHECK-LABEL: test_vminnmvq_f64
-; CHECK: fminnmp {{d[0-9]+}}, {{v[0-9]+}}.2d
-  %1 = call double @llvm.aarch64.neon.vpfminnm.f64.v2f64(<2 x double> %a)
-  ret double %1
-}
-
-define float @test_vminnmv_f32(<2 x float> %a) {
-; CHECK-LABEL: test_vminnmv_f32
-; CHECK: fminnmp {{s[0-9]+}}, {{v[0-9]+}}.2s
-  %1 = call float @llvm.aarch64.neon.vpfminnm.f32.v2f32(<2 x float> %a)
-  ret float %1
-}
-
-define <2 x i64> @test_vpaddq_s64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vpaddq_s64
-; CHECK: addp {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-  %1 = call <2 x i64> @llvm.arm.neon.vpadd.v2i64(<2 x i64> %a, <2 x i64> %b)
-  ret <2 x i64> %1
-}
-
-define <2 x i64> @test_vpaddq_u64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vpaddq_u64
-; CHECK: addp {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-  %1 = call <2 x i64> @llvm.arm.neon.vpadd.v2i64(<2 x i64> %a, <2 x i64> %b)
-  ret <2 x i64> %1
-}
-
-define i64 @test_vaddvq_s64(<2 x i64> %a) {
-; CHECK-LABEL: test_vaddvq_s64
-; CHECK: addp {{d[0-9]+}}, {{v[0-9]+}}.2d
-  %1 = call <1 x i64> @llvm.aarch64.neon.vaddv.v1i64.v2i64(<2 x i64> %a)
-  %2 = extractelement <1 x i64> %1, i32 0
-  ret i64 %2
-}
-
-define i64 @test_vaddvq_u64(<2 x i64> %a) {
-; CHECK-LABEL: test_vaddvq_u64
-; CHECK: addp {{d[0-9]+}}, {{v[0-9]+}}.2d
-  %1 = call <1 x i64> @llvm.aarch64.neon.vaddv.v1i64.v2i64(<2 x i64> %a)
-  %2 = extractelement <1 x i64> %1, i32 0
-  ret i64 %2
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vaddv.v1i64.v2i64(<2 x i64>)
-
-declare <2 x i64> @llvm.arm.neon.vpadd.v2i64(<2 x i64>, <2 x i64>)
-
-declare float @llvm.aarch64.neon.vpfadd.f32.v4f32(<4 x float>)
diff --git a/test/CodeGen/AArch64/neon-scalar-rounding-shift.ll b/test/CodeGen/AArch64/neon-scalar-rounding-shift.ll
deleted file mode 100644
index 7c9ffa0..0000000
--- a/test/CodeGen/AArch64/neon-scalar-rounding-shift.ll
+++ /dev/null
@@ -1,39 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-
-
-declare <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64>, <1 x i64>)
-
-define <1 x i64> @test_urshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_urshl_v1i64:
-  %tmp1 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-;CHECK: urshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  ret <1 x i64> %tmp1
-}
-
-define <1 x i64> @test_srshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_srshl_v1i64:
-  %tmp1 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-;CHECK: srshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  ret <1 x i64> %tmp1
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vrshldu(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.aarch64.neon.vrshlds(<1 x i64>, <1 x i64>)
-
-define <1 x i64> @test_urshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_urshl_v1i64_aarch64:
-  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vrshldu(<1 x i64> %lhs, <1 x i64> %rhs)
-;CHECK: urshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  ret <1 x i64> %tmp1
-}
-
-define <1 x i64> @test_srshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_srshl_v1i64_aarch64:
-  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vrshlds(<1 x i64> %lhs, <1 x i64> %rhs)
-;CHECK: srshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  ret <1 x i64> %tmp1
-}
-
-
-
diff --git a/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll b/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll
deleted file mode 100644
index 5c010ef..0000000
--- a/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll
+++ /dev/null
@@ -1,242 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-
-declare <1 x i8> @llvm.arm.neon.vqaddu.v1i8(<1 x i8>, <1 x i8>)
-declare <1 x i8> @llvm.arm.neon.vqadds.v1i8(<1 x i8>, <1 x i8>)
-
-define <1 x i8> @test_uqadd_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
-; CHECK: test_uqadd_v1i8_aarch64:
-  %tmp1 = call <1 x i8> @llvm.arm.neon.vqaddu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
-;CHECK: uqadd {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}}
-  ret <1 x i8> %tmp1
-}
-
-define <1 x i8> @test_sqadd_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
-; CHECK: test_sqadd_v1i8_aarch64:
-  %tmp1 = call <1 x i8> @llvm.arm.neon.vqadds.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
-;CHECK: sqadd {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}}
-  ret <1 x i8> %tmp1
-}
-
-declare <1 x i8> @llvm.arm.neon.vqsubu.v1i8(<1 x i8>, <1 x i8>)
-declare <1 x i8> @llvm.arm.neon.vqsubs.v1i8(<1 x i8>, <1 x i8>)
-
-define <1 x i8> @test_uqsub_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
-; CHECK: test_uqsub_v1i8_aarch64:
-  %tmp1 = call <1 x i8> @llvm.arm.neon.vqsubu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
-;CHECK: uqsub {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}}
-  ret <1 x i8> %tmp1
-}
-
-define <1 x i8> @test_sqsub_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
-; CHECK: test_sqsub_v1i8_aarch64:
-  %tmp1 = call <1 x i8> @llvm.arm.neon.vqsubs.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
-;CHECK: sqsub {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}}
-  ret <1 x i8> %tmp1
-}
-
-declare <1 x i16> @llvm.arm.neon.vqaddu.v1i16(<1 x i16>, <1 x i16>)
-declare <1 x i16> @llvm.arm.neon.vqadds.v1i16(<1 x i16>, <1 x i16>)
-
-define <1 x i16> @test_uqadd_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
-; CHECK: test_uqadd_v1i16_aarch64:
-  %tmp1 = call <1 x i16> @llvm.arm.neon.vqaddu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
-;CHECK: uqadd {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
-  ret <1 x i16> %tmp1
-}
-
-define <1 x i16> @test_sqadd_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
-; CHECK: test_sqadd_v1i16_aarch64:
-  %tmp1 = call <1 x i16> @llvm.arm.neon.vqadds.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
-;CHECK: sqadd {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
-  ret <1 x i16> %tmp1
-}
-
-declare <1 x i16> @llvm.arm.neon.vqsubu.v1i16(<1 x i16>, <1 x i16>)
-declare <1 x i16> @llvm.arm.neon.vqsubs.v1i16(<1 x i16>, <1 x i16>)
-
-define <1 x i16> @test_uqsub_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
-; CHECK: test_uqsub_v1i16_aarch64:
-  %tmp1 = call <1 x i16> @llvm.arm.neon.vqsubu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
-;CHECK: uqsub {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
-  ret <1 x i16> %tmp1
-}
-
-define <1 x i16> @test_sqsub_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
-; CHECK: test_sqsub_v1i16_aarch64:
-  %tmp1 = call <1 x i16> @llvm.arm.neon.vqsubs.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
-;CHECK: sqsub {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
-  ret <1 x i16> %tmp1
-}
-
-declare <1 x i32> @llvm.arm.neon.vqaddu.v1i32(<1 x i32>, <1 x i32>)
-declare <1 x i32> @llvm.arm.neon.vqadds.v1i32(<1 x i32>, <1 x i32>)
-
-define <1 x i32> @test_uqadd_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
-; CHECK: test_uqadd_v1i32_aarch64:
-  %tmp1 = call <1 x i32> @llvm.arm.neon.vqaddu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
-;CHECK: uqadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
-  ret <1 x i32> %tmp1
-}
-
-define <1 x i32> @test_sqadd_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
-; CHECK: test_sqadd_v1i32_aarch64:
-  %tmp1 = call <1 x i32> @llvm.arm.neon.vqadds.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
-;CHECK: sqadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
-  ret <1 x i32> %tmp1
-}
-
-declare <1 x i32> @llvm.arm.neon.vqsubu.v1i32(<1 x i32>, <1 x i32>)
-declare <1 x i32> @llvm.arm.neon.vqsubs.v1i32(<1 x i32>, <1 x i32>)
-
-define <1 x i32> @test_uqsub_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
-; CHECK: test_uqsub_v1i32_aarch64:
-  %tmp1 = call <1 x i32> @llvm.arm.neon.vqsubu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
-;CHECK: uqsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
-  ret <1 x i32> %tmp1
-}
-
-
-define <1 x i32> @test_sqsub_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
-; CHECK: test_sqsub_v1i32_aarch64:
-  %tmp1 = call <1 x i32> @llvm.arm.neon.vqsubs.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
-;CHECK: sqsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
-  ret <1 x i32> %tmp1
-}
-
-declare <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64>, <1 x i64>)
-
-define <1 x i64> @test_uqadd_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_uqadd_v1i64_aarch64:
-  %tmp1 = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-;CHECK: uqadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  ret <1 x i64> %tmp1
-}
-
-define <1 x i64> @test_sqadd_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_sqadd_v1i64_aarch64:
-  %tmp1 = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-;CHECK: sqadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  ret <1 x i64> %tmp1
-}
-
-declare <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64>, <1 x i64>)
-
-define <1 x i64> @test_uqsub_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_uqsub_v1i64_aarch64:
-  %tmp1 = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-;CHECK: uqsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  ret <1 x i64> %tmp1
-}
-
-define <1 x i64> @test_sqsub_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_sqsub_v1i64_aarch64:
-  %tmp1 = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-;CHECK: sqsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  ret <1 x i64> %tmp1
-}
-
-define i8 @test_vuqaddb_s8(i8 %a, i8 %b) {
-; CHECK: test_vuqaddb_s8
-; CHECK: suqadd {{b[0-9]+}}, {{b[0-9]+}}
-entry:
-  %vuqadd.i = insertelement <1 x i8> undef, i8 %a, i32 0
-  %vuqadd1.i = insertelement <1 x i8> undef, i8 %b, i32 0
-  %vuqadd2.i = call <1 x i8> @llvm.aarch64.neon.vuqadd.v1i8(<1 x i8> %vuqadd.i, <1 x i8> %vuqadd1.i)
-  %0 = extractelement <1 x i8> %vuqadd2.i, i32 0
-  ret i8 %0
-}
-
-declare <1 x i8> @llvm.aarch64.neon.vsqadd.v1i8(<1 x i8>, <1 x i8>)
-
-define i16 @test_vuqaddh_s16(i16 %a, i16 %b) {
-; CHECK: test_vuqaddh_s16
-; CHECK: suqadd {{h[0-9]+}}, {{h[0-9]+}}
-entry:
-  %vuqadd.i = insertelement <1 x i16> undef, i16 %a, i32 0
-  %vuqadd1.i = insertelement <1 x i16> undef, i16 %b, i32 0
-  %vuqadd2.i = call <1 x i16> @llvm.aarch64.neon.vuqadd.v1i16(<1 x i16> %vuqadd.i, <1 x i16> %vuqadd1.i)
-  %0 = extractelement <1 x i16> %vuqadd2.i, i32 0
-  ret i16 %0
-}
-
-declare <1 x i16> @llvm.aarch64.neon.vsqadd.v1i16(<1 x i16>, <1 x i16>)
-
-define i32 @test_vuqadds_s32(i32 %a, i32 %b) {
-; CHECK: test_vuqadds_s32
-; CHECK: suqadd {{s[0-9]+}}, {{s[0-9]+}}
-entry:
-  %vuqadd.i = insertelement <1 x i32> undef, i32 %a, i32 0
-  %vuqadd1.i = insertelement <1 x i32> undef, i32 %b, i32 0
-  %vuqadd2.i = call <1 x i32> @llvm.aarch64.neon.vuqadd.v1i32(<1 x i32> %vuqadd.i, <1 x i32> %vuqadd1.i)
-  %0 = extractelement <1 x i32> %vuqadd2.i, i32 0
-  ret i32 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.vsqadd.v1i32(<1 x i32>, <1 x i32>)
-
-define i64 @test_vuqaddd_s64(i64 %a, i64 %b) {
-; CHECK: test_vuqaddd_s64
-; CHECK: suqadd {{d[0-9]+}}, {{d[0-9]+}}
-entry:
-  %vuqadd.i = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vuqadd1.i = insertelement <1 x i64> undef, i64 %b, i32 0
-  %vuqadd2.i = call <1 x i64> @llvm.aarch64.neon.vuqadd.v1i64(<1 x i64> %vuqadd.i, <1 x i64> %vuqadd1.i)
-  %0 = extractelement <1 x i64> %vuqadd2.i, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vsqadd.v1i64(<1 x i64>, <1 x i64>)
-
-define i8 @test_vsqaddb_u8(i8 %a, i8 %b) {
-; CHECK: test_vsqaddb_u8
-; CHECK: usqadd {{b[0-9]+}}, {{b[0-9]+}}
-entry:
-  %vsqadd.i = insertelement <1 x i8> undef, i8 %a, i32 0
-  %vsqadd1.i = insertelement <1 x i8> undef, i8 %b, i32 0
-  %vsqadd2.i = call <1 x i8> @llvm.aarch64.neon.vsqadd.v1i8(<1 x i8> %vsqadd.i, <1 x i8> %vsqadd1.i)
-  %0 = extractelement <1 x i8> %vsqadd2.i, i32 0
-  ret i8 %0
-}
-
-declare <1 x i8> @llvm.aarch64.neon.vuqadd.v1i8(<1 x i8>, <1 x i8>)
-
-define i16 @test_vsqaddh_u16(i16 %a, i16 %b) {
-; CHECK: test_vsqaddh_u16
-; CHECK: usqadd {{h[0-9]+}}, {{h[0-9]+}}
-entry:
-  %vsqadd.i = insertelement <1 x i16> undef, i16 %a, i32 0
-  %vsqadd1.i = insertelement <1 x i16> undef, i16 %b, i32 0
-  %vsqadd2.i = call <1 x i16> @llvm.aarch64.neon.vsqadd.v1i16(<1 x i16> %vsqadd.i, <1 x i16> %vsqadd1.i)
-  %0 = extractelement <1 x i16> %vsqadd2.i, i32 0
-  ret i16 %0
-}
-
-declare <1 x i16> @llvm.aarch64.neon.vuqadd.v1i16(<1 x i16>, <1 x i16>)
-
-define i32 @test_vsqadds_u32(i32 %a, i32 %b) {
-; CHECK: test_vsqadds_u32
-; CHECK: usqadd {{s[0-9]+}}, {{s[0-9]+}}
-entry:
-  %vsqadd.i = insertelement <1 x i32> undef, i32 %a, i32 0
-  %vsqadd1.i = insertelement <1 x i32> undef, i32 %b, i32 0
-  %vsqadd2.i = call <1 x i32> @llvm.aarch64.neon.vsqadd.v1i32(<1 x i32> %vsqadd.i, <1 x i32> %vsqadd1.i)
-  %0 = extractelement <1 x i32> %vsqadd2.i, i32 0
-  ret i32 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.vuqadd.v1i32(<1 x i32>, <1 x i32>)
-
-define i64 @test_vsqaddd_u64(i64 %a, i64 %b) {
-; CHECK: test_vsqaddd_u64
-; CHECK: usqadd {{d[0-9]+}}, {{d[0-9]+}}
-entry:
-  %vsqadd.i = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vsqadd1.i = insertelement <1 x i64> undef, i64 %b, i32 0
-  %vsqadd2.i = call <1 x i64> @llvm.aarch64.neon.vsqadd.v1i64(<1 x i64> %vsqadd.i, <1 x i64> %vsqadd1.i)
-  %0 = extractelement <1 x i64> %vsqadd2.i, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vuqadd.v1i64(<1 x i64>, <1 x i64>)
diff --git a/test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll b/test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll
deleted file mode 100644
index dbf9669..0000000
--- a/test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll
+++ /dev/null
@@ -1,94 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-
-declare <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64>, <1 x i64>)
-
-define <1 x i64> @test_uqrshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_uqrshl_v1i64:
-  %tmp1 = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-;CHECK: uqrshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-
-  ret <1 x i64> %tmp1
-}
-
-define <1 x i64> @test_sqrshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_sqrshl_v1i64:
-  %tmp1 = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-;CHECK: sqrshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  ret <1 x i64> %tmp1
-}
-
-declare <1 x i8> @llvm.aarch64.neon.vqrshlu.v1i8(<1 x i8>, <1 x i8>)
-declare <1 x i8> @llvm.aarch64.neon.vqrshls.v1i8(<1 x i8>, <1 x i8>)
-
-define <1 x i8> @test_uqrshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
-; CHECK: test_uqrshl_v1i8_aarch64:
-  %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqrshlu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
-;CHECK: uqrshl {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}}
-
-  ret <1 x i8> %tmp1
-}
-
-define <1 x i8> @test_sqrshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
-; CHECK: test_sqrshl_v1i8_aarch64:
-  %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqrshls.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
-;CHECK: sqrshl {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}}
-  ret <1 x i8> %tmp1
-}
-
-declare <1 x i16> @llvm.aarch64.neon.vqrshlu.v1i16(<1 x i16>, <1 x i16>)
-declare <1 x i16> @llvm.aarch64.neon.vqrshls.v1i16(<1 x i16>, <1 x i16>)
-
-define <1 x i16> @test_uqrshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
-; CHECK: test_uqrshl_v1i16_aarch64:
-  %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqrshlu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
-;CHECK: uqrshl {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
-
-  ret <1 x i16> %tmp1
-}
-
-define <1 x i16> @test_sqrshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
-; CHECK: test_sqrshl_v1i16_aarch64:
-  %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqrshls.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
-;CHECK: sqrshl {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
-  ret <1 x i16> %tmp1
-}
-
-declare <1 x i32> @llvm.aarch64.neon.vqrshlu.v1i32(<1 x i32>, <1 x i32>)
-declare <1 x i32> @llvm.aarch64.neon.vqrshls.v1i32(<1 x i32>, <1 x i32>)
-
-define <1 x i32> @test_uqrshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
-; CHECK: test_uqrshl_v1i32_aarch64:
-  %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqrshlu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
-;CHECK: uqrshl {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
-
-  ret <1 x i32> %tmp1
-}
-
-define <1 x i32> @test_sqrshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
-; CHECK: test_sqrshl_v1i32_aarch64:
-  %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqrshls.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
-;CHECK: sqrshl {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
-  ret <1 x i32> %tmp1
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vqrshlu.v1i64(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.aarch64.neon.vqrshls.v1i64(<1 x i64>, <1 x i64>)
-
-define <1 x i64> @test_uqrshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_uqrshl_v1i64_aarch64:
-  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqrshlu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-;CHECK: uqrshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-
-  ret <1 x i64> %tmp1
-}
-
-define <1 x i64> @test_sqrshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_sqrshl_v1i64_aarch64:
-  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqrshls.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-;CHECK: sqrshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  ret <1 x i64> %tmp1
-}
-
-
-
diff --git a/test/CodeGen/AArch64/neon-scalar-saturating-shift.ll b/test/CodeGen/AArch64/neon-scalar-saturating-shift.ll
deleted file mode 100644
index 0a1f4c9..0000000
--- a/test/CodeGen/AArch64/neon-scalar-saturating-shift.ll
+++ /dev/null
@@ -1,88 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-
-declare <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64>, <1 x i64>)
-
-define <1 x i64> @test_uqshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_uqshl_v1i64:
-  %tmp1 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-;CHECK: uqshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  ret <1 x i64> %tmp1
-}
-
-define <1 x i64> @test_sqshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_sqshl_v1i64:
-  %tmp1 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-;CHECK: sqshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  ret <1 x i64> %tmp1
-}
-
-declare <1 x i8> @llvm.aarch64.neon.vqshlu.v1i8(<1 x i8>, <1 x i8>)
-declare <1 x i8> @llvm.aarch64.neon.vqshls.v1i8(<1 x i8>, <1 x i8>)
-
-define <1 x i8> @test_uqshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
-; CHECK: test_uqshl_v1i8_aarch64:
-  %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqshlu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
-;CHECK: uqshl {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}}
-  ret <1 x i8> %tmp1
-}
-
-define <1 x i8> @test_sqshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
-; CHECK: test_sqshl_v1i8_aarch64:
-  %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqshls.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
-;CHECK: sqshl {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}}
-  ret <1 x i8> %tmp1
-}
-
-declare <1 x i16> @llvm.aarch64.neon.vqshlu.v1i16(<1 x i16>, <1 x i16>)
-declare <1 x i16> @llvm.aarch64.neon.vqshls.v1i16(<1 x i16>, <1 x i16>)
-
-define <1 x i16> @test_uqshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
-; CHECK: test_uqshl_v1i16_aarch64:
-  %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqshlu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
-;CHECK: uqshl {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
-  ret <1 x i16> %tmp1
-}
-
-define <1 x i16> @test_sqshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
-; CHECK: test_sqshl_v1i16_aarch64:
-  %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqshls.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
-;CHECK: sqshl {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
-  ret <1 x i16> %tmp1
-}
-
-declare <1 x i32> @llvm.aarch64.neon.vqshlu.v1i32(<1 x i32>, <1 x i32>)
-declare <1 x i32> @llvm.aarch64.neon.vqshls.v1i32(<1 x i32>, <1 x i32>)
-
-define <1 x i32> @test_uqshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
-; CHECK: test_uqshl_v1i32_aarch64:
-  %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqshlu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
-;CHECK: uqshl {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
-  ret <1 x i32> %tmp1
-}
-
-define <1 x i32> @test_sqshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
-; CHECK: test_sqshl_v1i32_aarch64:
-  %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqshls.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
-;CHECK: sqshl {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
-  ret <1 x i32> %tmp1
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vqshlu.v1i64(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.aarch64.neon.vqshls.v1i64(<1 x i64>, <1 x i64>)
-
-define <1 x i64> @test_uqshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_uqshl_v1i64_aarch64:
-  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqshlu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-;CHECK: uqshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  ret <1 x i64> %tmp1
-}
-
-define <1 x i64> @test_sqshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_sqshl_v1i64_aarch64:
-  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqshls.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-;CHECK: sqshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  ret <1 x i64> %tmp1
-}
-
-
diff --git a/test/CodeGen/AArch64/neon-scalar-shift-imm.ll b/test/CodeGen/AArch64/neon-scalar-shift-imm.ll
deleted file mode 100644
index 6224361..0000000
--- a/test/CodeGen/AArch64/neon-scalar-shift-imm.ll
+++ /dev/null
@@ -1,531 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
-
-define i64 @test_vshrd_n_s64(i64 %a) {
-; CHECK: test_vshrd_n_s64
-; CHECK: sshr {{d[0-9]+}}, {{d[0-9]+}}, #63
-entry:
-  %vsshr = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vsshr1 = call <1 x i64> @llvm.aarch64.neon.vshrds.n(<1 x i64> %vsshr, i32 63)
-  %0 = extractelement <1 x i64> %vsshr1, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vshrds.n(<1 x i64>, i32)
-
-define i64 @test_vshrd_n_u64(i64 %a) {
-; CHECK: test_vshrd_n_u64
-; CHECK: ushr {{d[0-9]+}}, {{d[0-9]+}}, #63
-entry:
-  %vushr = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vushr1 = call <1 x i64> @llvm.aarch64.neon.vshrdu.n(<1 x i64> %vushr, i32 63)
-  %0 = extractelement <1 x i64> %vushr1, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vshrdu.n(<1 x i64>, i32)
-
-define i64 @test_vrshrd_n_s64(i64 %a) {
-; CHECK: test_vrshrd_n_s64
-; CHECK: srshr {{d[0-9]+}}, {{d[0-9]+}}, #63
-entry:
-  %vsrshr = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vsrshr1 = call <1 x i64> @llvm.aarch64.neon.vsrshr.v1i64(<1 x i64> %vsrshr, i32 63)
-  %0 = extractelement <1 x i64> %vsrshr1, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vsrshr.v1i64(<1 x i64>, i32)
-
-define i64 @test_vrshrd_n_u64(i64 %a) {
-; CHECK: test_vrshrd_n_u64
-; CHECK: urshr {{d[0-9]+}}, {{d[0-9]+}}, #63
-entry:
-  %vurshr = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vurshr1 = call <1 x i64> @llvm.aarch64.neon.vurshr.v1i64(<1 x i64> %vurshr, i32 63)
-  %0 = extractelement <1 x i64> %vurshr1, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vurshr.v1i64(<1 x i64>, i32)
-
-define i64 @test_vsrad_n_s64(i64 %a, i64 %b) {
-; CHECK: test_vsrad_n_s64
-; CHECK: ssra {{d[0-9]+}}, {{d[0-9]+}}, #63
-entry:
-  %vssra = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vssra1 = insertelement <1 x i64> undef, i64 %b, i32 0
-  %vssra2 = call <1 x i64> @llvm.aarch64.neon.vsrads.n(<1 x i64> %vssra, <1 x i64> %vssra1, i32 63)
-  %0 = extractelement <1 x i64> %vssra2, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vsrads.n(<1 x i64>, <1 x i64>, i32)
-
-define i64 @test_vsrad_n_u64(i64 %a, i64 %b) {
-; CHECK: test_vsrad_n_u64
-; CHECK: usra {{d[0-9]+}}, {{d[0-9]+}}, #63
-entry:
-  %vusra = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vusra1 = insertelement <1 x i64> undef, i64 %b, i32 0
-  %vusra2 = call <1 x i64> @llvm.aarch64.neon.vsradu.n(<1 x i64> %vusra, <1 x i64> %vusra1, i32 63)
-  %0 = extractelement <1 x i64> %vusra2, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vsradu.n(<1 x i64>, <1 x i64>, i32)
-
-define i64 @test_vrsrad_n_s64(i64 %a, i64 %b) {
-; CHECK: test_vrsrad_n_s64
-; CHECK: srsra {{d[0-9]+}}, {{d[0-9]+}}, #63
-entry:
-  %vsrsra = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vsrsra1 = insertelement <1 x i64> undef, i64 %b, i32 0
-  %vsrsra2 = call <1 x i64> @llvm.aarch64.neon.vrsrads.n(<1 x i64> %vsrsra, <1 x i64> %vsrsra1, i32 63)
-  %0 = extractelement <1 x i64> %vsrsra2, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vrsrads.n(<1 x i64>, <1 x i64>, i32)
-
-define i64 @test_vrsrad_n_u64(i64 %a, i64 %b) {
-; CHECK: test_vrsrad_n_u64
-; CHECK: ursra {{d[0-9]+}}, {{d[0-9]+}}, #63
-entry:
-  %vursra = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vursra1 = insertelement <1 x i64> undef, i64 %b, i32 0
-  %vursra2 = call <1 x i64> @llvm.aarch64.neon.vrsradu.n(<1 x i64> %vursra, <1 x i64> %vursra1, i32 63)
-  %0 = extractelement <1 x i64> %vursra2, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vrsradu.n(<1 x i64>, <1 x i64>, i32)
-
-define i64 @test_vshld_n_s64(i64 %a) {
-; CHECK: test_vshld_n_s64
-; CHECK: shl {{d[0-9]+}}, {{d[0-9]+}}, #63
-entry:
-  %vshl = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vshl1 = call <1 x i64> @llvm.aarch64.neon.vshld.n(<1 x i64> %vshl, i32 63)
-  %0 = extractelement <1 x i64> %vshl1, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vshld.n(<1 x i64>, i32)
-
-define i64 @test_vshld_n_u64(i64 %a) {
-; CHECK: test_vshld_n_u64
-; CHECK: shl {{d[0-9]+}}, {{d[0-9]+}}, #63
-entry:
-  %vshl = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vshl1 = call <1 x i64> @llvm.aarch64.neon.vshld.n(<1 x i64> %vshl, i32 63)
-  %0 = extractelement <1 x i64> %vshl1, i32 0
-  ret i64 %0
-}
-
-define i8 @test_vqshlb_n_s8(i8 %a) {
-; CHECK: test_vqshlb_n_s8
-; CHECK: sqshl {{b[0-9]+}}, {{b[0-9]+}}, #7
-entry:
-  %vsqshl = insertelement <1 x i8> undef, i8 %a, i32 0
-  %vsqshl1 = call <1 x i8> @llvm.aarch64.neon.vqshls.n.v1i8(<1 x i8> %vsqshl, i32 7)
-  %0 = extractelement <1 x i8> %vsqshl1, i32 0
-  ret i8 %0
-}
-
-declare <1 x i8> @llvm.aarch64.neon.vqshls.n.v1i8(<1 x i8>, i32)
-
-define i16 @test_vqshlh_n_s16(i16 %a) {
-; CHECK: test_vqshlh_n_s16
-; CHECK: sqshl {{h[0-9]+}}, {{h[0-9]+}}, #15
-entry:
-  %vsqshl = insertelement <1 x i16> undef, i16 %a, i32 0
-  %vsqshl1 = call <1 x i16> @llvm.aarch64.neon.vqshls.n.v1i16(<1 x i16> %vsqshl, i32 15)
-  %0 = extractelement <1 x i16> %vsqshl1, i32 0
-  ret i16 %0
-}
-
-declare <1 x i16> @llvm.aarch64.neon.vqshls.n.v1i16(<1 x i16>, i32)
-
-define i32 @test_vqshls_n_s32(i32 %a) {
-; CHECK: test_vqshls_n_s32
-; CHECK: sqshl {{s[0-9]+}}, {{s[0-9]+}}, #31
-entry:
-  %vsqshl = insertelement <1 x i32> undef, i32 %a, i32 0
-  %vsqshl1 = call <1 x i32> @llvm.aarch64.neon.vqshls.n.v1i32(<1 x i32> %vsqshl, i32 31)
-  %0 = extractelement <1 x i32> %vsqshl1, i32 0
-  ret i32 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.vqshls.n.v1i32(<1 x i32>, i32)
-
-define i64 @test_vqshld_n_s64(i64 %a) {
-; CHECK: test_vqshld_n_s64
-; CHECK: sqshl {{d[0-9]+}}, {{d[0-9]+}}, #63
-entry:
-  %vsqshl = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vsqshl1 = call <1 x i64> @llvm.aarch64.neon.vqshls.n.v1i64(<1 x i64> %vsqshl, i32 63)
-  %0 = extractelement <1 x i64> %vsqshl1, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vqshls.n.v1i64(<1 x i64>, i32)
-
-define i8 @test_vqshlb_n_u8(i8 %a) {
-; CHECK: test_vqshlb_n_u8
-; CHECK: uqshl {{b[0-9]+}}, {{b[0-9]+}}, #7
-entry:
-  %vuqshl = insertelement <1 x i8> undef, i8 %a, i32 0
-  %vuqshl1 = call <1 x i8> @llvm.aarch64.neon.vqshlu.n.v1i8(<1 x i8> %vuqshl, i32 7)
-  %0 = extractelement <1 x i8> %vuqshl1, i32 0
-  ret i8 %0
-}
-
-declare <1 x i8> @llvm.aarch64.neon.vqshlu.n.v1i8(<1 x i8>, i32)
-
-define i16 @test_vqshlh_n_u16(i16 %a) {
-; CHECK: test_vqshlh_n_u16
-; CHECK: uqshl {{h[0-9]+}}, {{h[0-9]+}}, #15
-entry:
-  %vuqshl = insertelement <1 x i16> undef, i16 %a, i32 0
-  %vuqshl1 = call <1 x i16> @llvm.aarch64.neon.vqshlu.n.v1i16(<1 x i16> %vuqshl, i32 15)
-  %0 = extractelement <1 x i16> %vuqshl1, i32 0
-  ret i16 %0
-}
-
-declare <1 x i16> @llvm.aarch64.neon.vqshlu.n.v1i16(<1 x i16>, i32)
-
-define i32 @test_vqshls_n_u32(i32 %a) {
-; CHECK: test_vqshls_n_u32
-; CHECK: uqshl {{s[0-9]+}}, {{s[0-9]+}}, #31
-entry:
-  %vuqshl = insertelement <1 x i32> undef, i32 %a, i32 0
-  %vuqshl1 = call <1 x i32> @llvm.aarch64.neon.vqshlu.n.v1i32(<1 x i32> %vuqshl, i32 31)
-  %0 = extractelement <1 x i32> %vuqshl1, i32 0
-  ret i32 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.vqshlu.n.v1i32(<1 x i32>, i32)
-
-define i64 @test_vqshld_n_u64(i64 %a) {
-; CHECK: test_vqshld_n_u64
-; CHECK: uqshl {{d[0-9]+}}, {{d[0-9]+}}, #63
-entry:
-  %vuqshl = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vuqshl1 = call <1 x i64> @llvm.aarch64.neon.vqshlu.n.v1i64(<1 x i64> %vuqshl, i32 63)
-  %0 = extractelement <1 x i64> %vuqshl1, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vqshlu.n.v1i64(<1 x i64>, i32)
-
-define i8 @test_vqshlub_n_s8(i8 %a) {
-; CHECK: test_vqshlub_n_s8
-; CHECK: sqshlu {{b[0-9]+}}, {{b[0-9]+}}, #7
-entry:
-  %vsqshlu = insertelement <1 x i8> undef, i8 %a, i32 0
-  %vsqshlu1 = call <1 x i8> @llvm.aarch64.neon.vsqshlu.v1i8(<1 x i8> %vsqshlu, i32 7)
-  %0 = extractelement <1 x i8> %vsqshlu1, i32 0
-  ret i8 %0
-}
-
-declare <1 x i8> @llvm.aarch64.neon.vsqshlu.v1i8(<1 x i8>, i32)
-
-define i16 @test_vqshluh_n_s16(i16 %a) {
-; CHECK: test_vqshluh_n_s16
-; CHECK: sqshlu {{h[0-9]+}}, {{h[0-9]+}}, #15
-entry:
-  %vsqshlu = insertelement <1 x i16> undef, i16 %a, i32 0
-  %vsqshlu1 = call <1 x i16> @llvm.aarch64.neon.vsqshlu.v1i16(<1 x i16> %vsqshlu, i32 15)
-  %0 = extractelement <1 x i16> %vsqshlu1, i32 0
-  ret i16 %0
-}
-
-declare <1 x i16> @llvm.aarch64.neon.vsqshlu.v1i16(<1 x i16>, i32)
-
-define i32 @test_vqshlus_n_s32(i32 %a) {
-; CHECK: test_vqshlus_n_s32
-; CHECK: sqshlu {{s[0-9]+}}, {{s[0-9]+}}, #31
-entry:
-  %vsqshlu = insertelement <1 x i32> undef, i32 %a, i32 0
-  %vsqshlu1 = call <1 x i32> @llvm.aarch64.neon.vsqshlu.v1i32(<1 x i32> %vsqshlu, i32 31)
-  %0 = extractelement <1 x i32> %vsqshlu1, i32 0
-  ret i32 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.vsqshlu.v1i32(<1 x i32>, i32)
-
-define i64 @test_vqshlud_n_s64(i64 %a) {
-; CHECK: test_vqshlud_n_s64
-; CHECK: sqshlu {{d[0-9]+}}, {{d[0-9]+}}, #63
-entry:
-  %vsqshlu = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vsqshlu1 = call <1 x i64> @llvm.aarch64.neon.vsqshlu.v1i64(<1 x i64> %vsqshlu, i32 63)
-  %0 = extractelement <1 x i64> %vsqshlu1, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vsqshlu.v1i64(<1 x i64>, i32)
-
-define i64 @test_vsrid_n_s64(i64 %a, i64 %b) {
-; CHECK: test_vsrid_n_s64
-; CHECK: sri {{d[0-9]+}}, {{d[0-9]+}}, #63
-entry:
-  %vsri = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vsri1 = insertelement <1 x i64> undef, i64 %b, i32 0
-  %vsri2 = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> %vsri, <1 x i64> %vsri1, i32 63)
-  %0 = extractelement <1 x i64> %vsri2, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64>, <1 x i64>, i32)
-
-define i64 @test_vsrid_n_u64(i64 %a, i64 %b) {
-; CHECK: test_vsrid_n_u64
-; CHECK: sri {{d[0-9]+}}, {{d[0-9]+}}, #63
-entry:
-  %vsri = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vsri1 = insertelement <1 x i64> undef, i64 %b, i32 0
-  %vsri2 = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> %vsri, <1 x i64> %vsri1, i32 63)
-  %0 = extractelement <1 x i64> %vsri2, i32 0
-  ret i64 %0
-}
-
-define i64 @test_vslid_n_s64(i64 %a, i64 %b) {
-; CHECK: test_vslid_n_s64
-; CHECK: sli {{d[0-9]+}}, {{d[0-9]+}}, #63
-entry:
-  %vsli = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vsli1 = insertelement <1 x i64> undef, i64 %b, i32 0
-  %vsli2 = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> %vsli, <1 x i64> %vsli1, i32 63)
-  %0 = extractelement <1 x i64> %vsli2, i32 0
-  ret i64 %0
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64>, <1 x i64>, i32)
-
-define i64 @test_vslid_n_u64(i64 %a, i64 %b) {
-; CHECK: test_vslid_n_u64
-; CHECK: sli {{d[0-9]+}}, {{d[0-9]+}}, #63
-entry:
-  %vsli = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vsli1 = insertelement <1 x i64> undef, i64 %b, i32 0
-  %vsli2 = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> %vsli, <1 x i64> %vsli1, i32 63)
-  %0 = extractelement <1 x i64> %vsli2, i32 0
-  ret i64 %0
-}
-
-define i8 @test_vqshrnh_n_s16(i16 %a) {
-; CHECK: test_vqshrnh_n_s16
-; CHECK: sqshrn {{b[0-9]+}}, {{h[0-9]+}}, #8
-entry:
-  %vsqshrn = insertelement <1 x i16> undef, i16 %a, i32 0
-  %vsqshrn1 = call <1 x i8> @llvm.aarch64.neon.vsqshrn.v1i8(<1 x i16> %vsqshrn, i32 8)
-  %0 = extractelement <1 x i8> %vsqshrn1, i32 0
-  ret i8 %0
-}
-
-declare <1 x i8> @llvm.aarch64.neon.vsqshrn.v1i8(<1 x i16>, i32)
-
-define i16 @test_vqshrns_n_s32(i32 %a) {
-; CHECK: test_vqshrns_n_s32
-; CHECK: sqshrn {{h[0-9]+}}, {{s[0-9]+}}, #16
-entry:
-  %vsqshrn = insertelement <1 x i32> undef, i32 %a, i32 0
-  %vsqshrn1 = call <1 x i16> @llvm.aarch64.neon.vsqshrn.v1i16(<1 x i32> %vsqshrn, i32 16)
-  %0 = extractelement <1 x i16> %vsqshrn1, i32 0
-  ret i16 %0
-}
-
-declare <1 x i16> @llvm.aarch64.neon.vsqshrn.v1i16(<1 x i32>, i32)
-
-define i32 @test_vqshrnd_n_s64(i64 %a) {
-; CHECK: test_vqshrnd_n_s64
-; CHECK: sqshrn {{s[0-9]+}}, {{d[0-9]+}}, #32
-entry:
-  %vsqshrn = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vsqshrn1 = call <1 x i32> @llvm.aarch64.neon.vsqshrn.v1i32(<1 x i64> %vsqshrn, i32 32)
-  %0 = extractelement <1 x i32> %vsqshrn1, i32 0
-  ret i32 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.vsqshrn.v1i32(<1 x i64>, i32)
-
-define i8 @test_vqshrnh_n_u16(i16 %a) {
-; CHECK: test_vqshrnh_n_u16
-; CHECK: uqshrn {{b[0-9]+}}, {{h[0-9]+}}, #8
-entry:
-  %vuqshrn = insertelement <1 x i16> undef, i16 %a, i32 0
-  %vuqshrn1 = call <1 x i8> @llvm.aarch64.neon.vuqshrn.v1i8(<1 x i16> %vuqshrn, i32 8)
-  %0 = extractelement <1 x i8> %vuqshrn1, i32 0
-  ret i8 %0
-}
-
-declare <1 x i8> @llvm.aarch64.neon.vuqshrn.v1i8(<1 x i16>, i32)
-
-define i16 @test_vqshrns_n_u32(i32 %a) {
-; CHECK: test_vqshrns_n_u32
-; CHECK: uqshrn {{h[0-9]+}}, {{s[0-9]+}}, #16
-entry:
-  %vuqshrn = insertelement <1 x i32> undef, i32 %a, i32 0
-  %vuqshrn1 = call <1 x i16> @llvm.aarch64.neon.vuqshrn.v1i16(<1 x i32> %vuqshrn, i32 16)
-  %0 = extractelement <1 x i16> %vuqshrn1, i32 0
-  ret i16 %0
-}
-
-declare <1 x i16> @llvm.aarch64.neon.vuqshrn.v1i16(<1 x i32>, i32)
-
-define i32 @test_vqshrnd_n_u64(i64 %a) {
-; CHECK: test_vqshrnd_n_u64
-; CHECK: uqshrn {{s[0-9]+}}, {{d[0-9]+}}, #32
-entry:
-  %vuqshrn = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vuqshrn1 = call <1 x i32> @llvm.aarch64.neon.vuqshrn.v1i32(<1 x i64> %vuqshrn, i32 32)
-  %0 = extractelement <1 x i32> %vuqshrn1, i32 0
-  ret i32 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.vuqshrn.v1i32(<1 x i64>, i32)
-
-define i8 @test_vqrshrnh_n_s16(i16 %a) {
-; CHECK: test_vqrshrnh_n_s16
-; CHECK: sqrshrn {{b[0-9]+}}, {{h[0-9]+}}, #8
-entry:
-  %vsqrshrn = insertelement <1 x i16> undef, i16 %a, i32 0
-  %vsqrshrn1 = call <1 x i8> @llvm.aarch64.neon.vsqrshrn.v1i8(<1 x i16> %vsqrshrn, i32 8)
-  %0 = extractelement <1 x i8> %vsqrshrn1, i32 0
-  ret i8 %0
-}
-
-declare <1 x i8> @llvm.aarch64.neon.vsqrshrn.v1i8(<1 x i16>, i32)
-
-define i16 @test_vqrshrns_n_s32(i32 %a) {
-; CHECK: test_vqrshrns_n_s32
-; CHECK: sqrshrn {{h[0-9]+}}, {{s[0-9]+}}, #16
-entry:
-  %vsqrshrn = insertelement <1 x i32> undef, i32 %a, i32 0
-  %vsqrshrn1 = call <1 x i16> @llvm.aarch64.neon.vsqrshrn.v1i16(<1 x i32> %vsqrshrn, i32 16)
-  %0 = extractelement <1 x i16> %vsqrshrn1, i32 0
-  ret i16 %0
-}
-
-declare <1 x i16> @llvm.aarch64.neon.vsqrshrn.v1i16(<1 x i32>, i32)
-
-define i32 @test_vqrshrnd_n_s64(i64 %a) {
-; CHECK: test_vqrshrnd_n_s64
-; CHECK: sqrshrn {{s[0-9]+}}, {{d[0-9]+}}, #32
-entry:
-  %vsqrshrn = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vsqrshrn1 = call <1 x i32> @llvm.aarch64.neon.vsqrshrn.v1i32(<1 x i64> %vsqrshrn, i32 32)
-  %0 = extractelement <1 x i32> %vsqrshrn1, i32 0
-  ret i32 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.vsqrshrn.v1i32(<1 x i64>, i32)
-
-define i8 @test_vqrshrnh_n_u16(i16 %a) {
-; CHECK: test_vqrshrnh_n_u16
-; CHECK: uqrshrn {{b[0-9]+}}, {{h[0-9]+}}, #8
-entry:
-  %vuqrshrn = insertelement <1 x i16> undef, i16 %a, i32 0
-  %vuqrshrn1 = call <1 x i8> @llvm.aarch64.neon.vuqrshrn.v1i8(<1 x i16> %vuqrshrn, i32 8)
-  %0 = extractelement <1 x i8> %vuqrshrn1, i32 0
-  ret i8 %0
-}
-
-declare <1 x i8> @llvm.aarch64.neon.vuqrshrn.v1i8(<1 x i16>, i32)
-
-define i16 @test_vqrshrns_n_u32(i32 %a) {
-; CHECK: test_vqrshrns_n_u32
-; CHECK: uqrshrn {{h[0-9]+}}, {{s[0-9]+}}, #16
-entry:
-  %vuqrshrn = insertelement <1 x i32> undef, i32 %a, i32 0
-  %vuqrshrn1 = call <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32> %vuqrshrn, i32 16)
-  %0 = extractelement <1 x i16> %vuqrshrn1, i32 0
-  ret i16 %0
-}
-
-declare <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32>, i32)
-
-define i32 @test_vqrshrnd_n_u64(i64 %a) {
-; CHECK: test_vqrshrnd_n_u64
-; CHECK: uqrshrn {{s[0-9]+}}, {{d[0-9]+}}, #32
-entry:
-  %vuqrshrn = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vuqrshrn1 = call <1 x i32> @llvm.aarch64.neon.vuqrshrn.v1i32(<1 x i64> %vuqrshrn, i32 32)
-  %0 = extractelement <1 x i32> %vuqrshrn1, i32 0
-  ret i32 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.vuqrshrn.v1i32(<1 x i64>, i32)
-
-define i8 @test_vqshrunh_n_s16(i16 %a) {
-; CHECK: test_vqshrunh_n_s16
-; CHECK: sqshrun {{b[0-9]+}}, {{h[0-9]+}}, #8
-entry:
-  %vsqshrun = insertelement <1 x i16> undef, i16 %a, i32 0
-  %vsqshrun1 = call <1 x i8> @llvm.aarch64.neon.vsqshrun.v1i8(<1 x i16> %vsqshrun, i32 8)
-  %0 = extractelement <1 x i8> %vsqshrun1, i32 0
-  ret i8 %0
-}
-
-declare <1 x i8> @llvm.aarch64.neon.vsqshrun.v1i8(<1 x i16>, i32)
-
-define i16 @test_vqshruns_n_s32(i32 %a) {
-; CHECK: test_vqshruns_n_s32
-; CHECK: sqshrun {{h[0-9]+}}, {{s[0-9]+}}, #16
-entry:
-  %vsqshrun = insertelement <1 x i32> undef, i32 %a, i32 0
-  %vsqshrun1 = call <1 x i16> @llvm.aarch64.neon.vsqshrun.v1i16(<1 x i32> %vsqshrun, i32 16)
-  %0 = extractelement <1 x i16> %vsqshrun1, i32 0
-  ret i16 %0
-}
-
-declare <1 x i16> @llvm.aarch64.neon.vsqshrun.v1i16(<1 x i32>, i32)
-
-define i32 @test_vqshrund_n_s64(i64 %a) {
-; CHECK: test_vqshrund_n_s64
-; CHECK: sqshrun {{s[0-9]+}}, {{d[0-9]+}}, #32
-entry:
-  %vsqshrun = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vsqshrun1 = call <1 x i32> @llvm.aarch64.neon.vsqshrun.v1i32(<1 x i64> %vsqshrun, i32 32)
-  %0 = extractelement <1 x i32> %vsqshrun1, i32 0
-  ret i32 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.vsqshrun.v1i32(<1 x i64>, i32)
-
-define i8 @test_vqrshrunh_n_s16(i16 %a) {
-; CHECK: test_vqrshrunh_n_s16
-; CHECK: sqrshrun {{b[0-9]+}}, {{h[0-9]+}}, #8
-entry:
-  %vsqrshrun = insertelement <1 x i16> undef, i16 %a, i32 0
-  %vsqrshrun1 = call <1 x i8> @llvm.aarch64.neon.vsqrshrun.v1i8(<1 x i16> %vsqrshrun, i32 8)
-  %0 = extractelement <1 x i8> %vsqrshrun1, i32 0
-  ret i8 %0
-}
-
-declare <1 x i8> @llvm.aarch64.neon.vsqrshrun.v1i8(<1 x i16>, i32)
-
-define i16 @test_vqrshruns_n_s32(i32 %a) {
-; CHECK: test_vqrshruns_n_s32
-; CHECK: sqrshrun {{h[0-9]+}}, {{s[0-9]+}}, #16
-entry:
-  %vsqrshrun = insertelement <1 x i32> undef, i32 %a, i32 0
-  %vsqrshrun1 = call <1 x i16> @llvm.aarch64.neon.vsqrshrun.v1i16(<1 x i32> %vsqrshrun, i32 16)
-  %0 = extractelement <1 x i16> %vsqrshrun1, i32 0
-  ret i16 %0
-}
-
-declare <1 x i16> @llvm.aarch64.neon.vsqrshrun.v1i16(<1 x i32>, i32)
-
-define i32 @test_vqrshrund_n_s64(i64 %a) {
-; CHECK: test_vqrshrund_n_s64
-; CHECK: sqrshrun {{s[0-9]+}}, {{d[0-9]+}}, #32
-entry:
-  %vsqrshrun = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vsqrshrun1 = call <1 x i32> @llvm.aarch64.neon.vsqrshrun.v1i32(<1 x i64> %vsqrshrun, i32 32)
-  %0 = extractelement <1 x i32> %vsqrshrun1, i32 0
-  ret i32 %0
-}
-
-declare <1 x i32> @llvm.aarch64.neon.vsqrshrun.v1i32(<1 x i64>, i32)
diff --git a/test/CodeGen/AArch64/neon-scalar-shift.ll b/test/CodeGen/AArch64/neon-scalar-shift.ll
deleted file mode 100644
index b712ea4..0000000
--- a/test/CodeGen/AArch64/neon-scalar-shift.ll
+++ /dev/null
@@ -1,236 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
-
-declare <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64>, <1 x i64>)
-
-define <1 x i64> @test_ushl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_ushl_v1i64:
-  %tmp1 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-; CHECK: ushl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-
-  ret <1 x i64> %tmp1
-}
-
-define <1 x i64> @test_sshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_sshl_v1i64:
-  %tmp1 = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
-; CHECK: sshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  ret <1 x i64> %tmp1
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vshldu(<1 x i64>, <1 x i64>)
-declare <1 x i64> @llvm.aarch64.neon.vshlds(<1 x i64>, <1 x i64>)
-
-define <1 x i64> @test_ushl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_ushl_v1i64_aarch64:
-  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vshldu(<1 x i64> %lhs, <1 x i64> %rhs)
-; CHECK: ushl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  ret <1 x i64> %tmp1
-}
-
-define <1 x i64> @test_sshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
-; CHECK: test_sshl_v1i64_aarch64:
-  %tmp1 = call <1 x i64> @llvm.aarch64.neon.vshlds(<1 x i64> %lhs, <1 x i64> %rhs)
-; CHECK: sshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  ret <1 x i64> %tmp1
-}
-
-define <1 x i64> @test_vtst_s64(<1 x i64> %a, <1 x i64> %b) {
-; CHECK-LABEL: test_vtst_s64
-; CHECK: cmtst {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-entry:
-  %0 = and <1 x i64> %a, %b
-  %1 = icmp ne <1 x i64> %0, zeroinitializer
-  %vtst.i = sext <1 x i1> %1 to <1 x i64>
-  ret <1 x i64> %vtst.i
-}
-
-define <1 x i64> @test_vtst_u64(<1 x i64> %a, <1 x i64> %b) {
-; CHECK-LABEL: test_vtst_u64
-; CHECK: cmtst {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-entry:
-  %0 = and <1 x i64> %a, %b
-  %1 = icmp ne <1 x i64> %0, zeroinitializer
-  %vtst.i = sext <1 x i1> %1 to <1 x i64>
-  ret <1 x i64> %vtst.i
-}
-
-define <1 x i64> @test_vsli_n_p64(<1 x i64> %a, <1 x i64> %b) {
-; CHECK-LABEL: test_vsli_n_p64
-; CHECK: sli {{d[0-9]+}}, {{d[0-9]+}}, #0
-entry:
-  %vsli_n2 = tail call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> %a, <1 x i64> %b, i32 0)
-  ret <1 x i64> %vsli_n2
-}
-
-declare <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64>, <1 x i64>, i32)
-
-define <2 x i64> @test_vsliq_n_p64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vsliq_n_p64
-; CHECK: sli {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0
-entry:
-  %vsli_n2 = tail call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> %a, <2 x i64> %b, i32 0)
-  ret <2 x i64> %vsli_n2
-}
-
-declare <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64>, <2 x i64>, i32)
-
-define <2 x i32> @test_vrsqrte_u32(<2 x i32> %a) {
-; CHECK-LABEL: test_vrsqrte_u32
-; CHECK: ursqrte {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-entry:
-  %vrsqrte1.i = tail call <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32> %a)
-  ret <2 x i32> %vrsqrte1.i
-}
-
-define <4 x i32> @test_vrsqrteq_u32(<4 x i32> %a) {
-; CHECK-LABEL: test_vrsqrteq_u32
-; CHECK: ursqrte {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-entry:
-  %vrsqrte1.i = tail call <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32> %a)
-  ret <4 x i32> %vrsqrte1.i
-}
-
-define <8 x i8> @test_vqshl_n_s8(<8 x i8> %a) {
-; CHECK-LABEL: test_vqshl_n_s8
-; CHECK: sqshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0
-entry:
-  %vqshl_n = tail call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %a, <8 x i8> zeroinitializer)
-  ret <8 x i8> %vqshl_n
-}
-
-declare <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8>, <8 x i8>)
-
-define <16 x i8> @test_vqshlq_n_s8(<16 x i8> %a) {
-; CHECK-LABEL: test_vqshlq_n_s8
-; CHECK: sqshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0
-entry:
-  %vqshl_n = tail call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer)
-  ret <16 x i8> %vqshl_n
-}
-
-declare <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8>, <16 x i8>)
-
-define <4 x i16> @test_vqshl_n_s16(<4 x i16> %a) {
-; CHECK-LABEL: test_vqshl_n_s16
-; CHECK: sqshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0
-entry:
-  %vqshl_n1 = tail call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %a, <4 x i16> zeroinitializer)
-  ret <4 x i16> %vqshl_n1
-}
-
-declare <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16>, <4 x i16>)
-
-define <8 x i16> @test_vqshlq_n_s16(<8 x i16> %a) {
-; CHECK-LABEL: test_vqshlq_n_s16
-; CHECK: sqshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0
-entry:
-  %vqshl_n1 = tail call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %a, <8 x i16> zeroinitializer)
-  ret <8 x i16> %vqshl_n1
-}
-
-declare <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16>, <8 x i16>)
-
-define <2 x i32> @test_vqshl_n_s32(<2 x i32> %a) {
-; CHECK-LABEL: test_vqshl_n_s32
-; CHECK: sqshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0
-entry:
-  %vqshl_n1 = tail call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %a, <2 x i32> zeroinitializer)
-  ret <2 x i32> %vqshl_n1
-}
-
-declare <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32>, <2 x i32>)
-
-define <4 x i32> @test_vqshlq_n_s32(<4 x i32> %a) {
-; CHECK-LABEL: test_vqshlq_n_s32
-; CHECK: sqshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0
-entry:
-  %vqshl_n1 = tail call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %a, <4 x i32> zeroinitializer)
-  ret <4 x i32> %vqshl_n1
-}
-
-declare <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32>, <4 x i32>)
-
-define <2 x i64> @test_vqshlq_n_s64(<2 x i64> %a) {
-; CHECK-LABEL: test_vqshlq_n_s64
-; CHECK: sqshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0
-entry:
-  %vqshl_n1 = tail call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %a, <2 x i64> zeroinitializer)
-  ret <2 x i64> %vqshl_n1
-}
-
-declare <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64>, <2 x i64>)
-
-define <8 x i8> @test_vqshl_n_u8(<8 x i8> %a) {
-; CHECK-LABEL: test_vqshl_n_u8
-; CHECK: uqshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0
-entry:
-  %vqshl_n = tail call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %a, <8 x i8> zeroinitializer)
-  ret <8 x i8> %vqshl_n
-}
-
-declare <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8>, <8 x i8>)
-
-define <16 x i8> @test_vqshlq_n_u8(<16 x i8> %a) {
-; CHECK-LABEL: test_vqshlq_n_u8
-; CHECK: uqshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0
-entry:
-  %vqshl_n = tail call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer)
-  ret <16 x i8> %vqshl_n
-}
-
-declare <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8>, <16 x i8>)
-
-define <4 x i16> @test_vqshl_n_u16(<4 x i16> %a) {
-; CHECK-LABEL: test_vqshl_n_u16
-; CHECK: uqshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0
-entry:
-  %vqshl_n1 = tail call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %a, <4 x i16> zeroinitializer)
-  ret <4 x i16> %vqshl_n1
-}
-
-declare <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16>, <4 x i16>)
-
-define <8 x i16> @test_vqshlq_n_u16(<8 x i16> %a) {
-; CHECK-LABEL: test_vqshlq_n_u16
-; CHECK: uqshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0
-entry:
-  %vqshl_n1 = tail call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %a, <8 x i16> zeroinitializer)
-  ret <8 x i16> %vqshl_n1
-}
-
-declare <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16>, <8 x i16>)
-
-define <2 x i32> @test_vqshl_n_u32(<2 x i32> %a) {
-; CHECK-LABEL: test_vqshl_n_u32
-; CHECK: uqshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0
-entry:
-  %vqshl_n1 = tail call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %a, <2 x i32> zeroinitializer)
-  ret <2 x i32> %vqshl_n1
-}
-
-declare <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32>, <2 x i32>)
-
-define <4 x i32> @test_vqshlq_n_u32(<4 x i32> %a) {
-; CHECK-LABEL: test_vqshlq_n_u32
-; CHECK: uqshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0
-entry:
-  %vqshl_n1 = tail call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %a, <4 x i32> zeroinitializer)
-  ret <4 x i32> %vqshl_n1
-}
-
-declare <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32>, <4 x i32>)
-
-define <2 x i64> @test_vqshlq_n_u64(<2 x i64> %a) {
-; CHECK-LABEL: test_vqshlq_n_u64
-; CHECK: uqshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d,
-entry:
-  %vqshl_n1 = tail call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %a, <2 x i64> zeroinitializer)
-  ret <2 x i64> %vqshl_n1
-}
-
-declare <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64>, <2 x i64>)
-
-declare <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32>)
-
-declare <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32>)
diff --git a/test/CodeGen/AArch64/neon-select_cc.ll b/test/CodeGen/AArch64/neon-select_cc.ll
deleted file mode 100644
index f6b5d3c..0000000
--- a/test/CodeGen/AArch64/neon-select_cc.ll
+++ /dev/null
@@ -1,202 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
-
-define <8x i8> @test_select_cc_v8i8_i8(i8 %a, i8 %b, <8x i8> %c, <8x i8> %d ) {
-; CHECK-LABEL: test_select_cc_v8i8_i8:
-; CHECK: and	w0, w0, #0xff
-; CHECK-NEXT: cmp	w0, w1, uxtb
-; CHECK-NEXT: csinv	w0, wzr, wzr, ne
-; CHECK-NEXT: dup	v{{[0-9]+}}.8b, w0
-; CHECK-NEXT:	bsl	v{{[0-9]+}}.8b, v0.8b, v1.8b
-  %cmp31 = icmp eq i8 %a, %b
-  %e = select i1 %cmp31, <8x i8> %c, <8x i8> %d
-  ret <8x i8> %e
-}
-
-define <8x i8> @test_select_cc_v8i8_f32(float %a, float %b, <8x i8> %c, <8x i8> %d ) {
-; CHECK-LABEL: test_select_cc_v8i8_f32:
-; CHECK: fcmeq	v{{[0-9]+}}.4s, v0.4s, v1.4s
-; CHECK-NEXT: dup	v{{[0-9]+}}.2s, v{{[0-9]+}}.s[0]
-; CHECK-NEXT:	bsl	v{{[0-9]+}}.8b, v2.8b, v3.8b
-  %cmp31 = fcmp oeq float %a, %b
-  %e = select i1 %cmp31, <8x i8> %c, <8x i8> %d
-  ret <8x i8> %e
-}
-
-define <8x i8> @test_select_cc_v8i8_f64(double %a, double %b, <8x i8> %c, <8x i8> %d ) {
-; CHECK-LABEL: test_select_cc_v8i8_f64:
-; CHECK: fcmeq	v{{[0-9]+}}.2d, v0.2d, v1.2d
-; CHECK-NEXT:	bsl	v{{[0-9]+}}.8b, v2.8b, v3.8b
-  %cmp31 = fcmp oeq double %a, %b
-  %e = select i1 %cmp31, <8x i8> %c, <8x i8> %d
-  ret <8x i8> %e
-}
-
-define <16x i8> @test_select_cc_v16i8_i8(i8 %a, i8 %b, <16x i8> %c, <16x i8> %d ) {
-; CHECK-LABEL: test_select_cc_v16i8_i8:
-; CHECK: and	w0, w0, #0xff
-; CHECK-NEXT: cmp	w0, w1, uxtb
-; CHECK-NEXT: csinv	w0, wzr, wzr, ne
-; CHECK-NEXT: dup	v{{[0-9]+}}.16b, w0
-; CHECK-NEXT:	bsl	v{{[0-9]+}}.16b, v0.16b, v1.16b
-  %cmp31 = icmp eq i8 %a, %b
-  %e = select i1 %cmp31, <16x i8> %c, <16x i8> %d
-  ret <16x i8> %e
-}
-
-define <16x i8> @test_select_cc_v16i8_f32(float %a, float %b, <16x i8> %c, <16x i8> %d ) {
-; CHECK-LABEL: test_select_cc_v16i8_f32:
-; CHECK: fcmeq	v{{[0-9]+}}.4s, v0.4s, v1.4s
-; CHECK-NEXT: dup	v{{[0-9]+}}.4s, v{{[0-9]+}}.s[0]
-; CHECK-NEXT:	bsl	v{{[0-9]+}}.16b, v2.16b, v3.16b
-  %cmp31 = fcmp oeq float %a, %b
-  %e = select i1 %cmp31, <16x i8> %c, <16x i8> %d
-  ret <16x i8> %e
-}
-
-define <16x i8> @test_select_cc_v16i8_f64(double %a, double %b, <16x i8> %c, <16x i8> %d ) {
-; CHECK-LABEL: test_select_cc_v16i8_f64:
-; CHECK: fcmeq	v{{[0-9]+}}.2d, v0.2d, v1.2d
-; CHECK-NEXT: dup	v{{[0-9]+}}.2d, v{{[0-9]+}}.d[0]
-; CHECK-NEXT:	bsl	v{{[0-9]+}}.16b, v2.16b, v3.16b
-  %cmp31 = fcmp oeq double %a, %b
-  %e = select i1 %cmp31, <16x i8> %c, <16x i8> %d
-  ret <16x i8> %e
-}
-
-define <4x i16> @test_select_cc_v4i16(i16 %a, i16 %b, <4x i16> %c, <4x i16> %d ) {
-; CHECK-LABEL: test_select_cc_v4i16:
-; CHECK: and	w0, w0, #0xffff
-; CHECK-NEXT: cmp	w0, w1, uxth
-; CHECK-NEXT: csinv	w0, wzr, wzr, ne
-; CHECK-NEXT: dup	v{{[0-9]+}}.4h, w0
-; CHECK-NEXT:	bsl	v{{[0-9]+}}.8b, v0.8b, v1.8b
-  %cmp31 = icmp eq i16 %a, %b
-  %e = select i1 %cmp31, <4x i16> %c, <4x i16> %d
-  ret <4x i16> %e
-}
-
-define <8x i16> @test_select_cc_v8i16(i16 %a, i16 %b, <8x i16> %c, <8x i16> %d ) {
-; CHECK-LABEL: test_select_cc_v8i16:
-; CHECK: and	w0, w0, #0xffff
-; CHECK-NEXT: cmp	w0, w1, uxth
-; CHECK-NEXT: csinv	w0, wzr, wzr, ne
-; CHECK-NEXT: dup	v{{[0-9]+}}.8h, w0
-; CHECK-NEXT:	bsl	v{{[0-9]+}}.16b, v0.16b, v1.16b
-  %cmp31 = icmp eq i16 %a, %b
-  %e = select i1 %cmp31, <8x i16> %c, <8x i16> %d
-  ret <8x i16> %e
-}
-
-define <2x i32> @test_select_cc_v2i32(i32 %a, i32 %b, <2x i32> %c, <2x i32> %d ) {
-; CHECK-LABEL: test_select_cc_v2i32:
-; CHECK: cmp	w0, w1, uxtw
-; CHECK-NEXT: csinv	w0, wzr, wzr, ne
-; CHECK-NEXT: dup	v{{[0-9]+}}.2s, w0
-; CHECK-NEXT:	bsl	v{{[0-9]+}}.8b, v0.8b, v1.8b
-  %cmp31 = icmp eq i32 %a, %b
-  %e = select i1 %cmp31, <2x i32> %c, <2x i32> %d
-  ret <2x i32> %e
-}
-
-define <4x i32> @test_select_cc_v4i32(i32 %a, i32 %b, <4x i32> %c, <4x i32> %d ) {
-; CHECK-LABEL: test_select_cc_v4i32:
-; CHECK: cmp	w0, w1, uxtw
-; CHECK-NEXT: csinv	w0, wzr, wzr, ne
-; CHECK-NEXT: dup	v{{[0-9]+}}.4s, w0
-; CHECK-NEXT:	bsl	v{{[0-9]+}}.16b, v0.16b, v1.16b
-  %cmp31 = icmp eq i32 %a, %b
-  %e = select i1 %cmp31, <4x i32> %c, <4x i32> %d
-  ret <4x i32> %e
-}
-
-define <1x i64> @test_select_cc_v1i64(i64 %a, i64 %b, <1x i64> %c, <1x i64> %d ) {
-; CHECK-LABEL: test_select_cc_v1i64:
-; CHECK: cmp	x0, x1
-; CHECK-NEXT: csinv	x0, xzr, xzr, ne
-; CHECK-NEXT: fmov	d{{[0-9]+}}, x0
-; CHECK-NEXT:	bsl	v{{[0-9]+}}.8b, v0.8b, v1.8b
-  %cmp31 = icmp eq i64 %a, %b
-  %e = select i1 %cmp31, <1x i64> %c, <1x i64> %d
-  ret <1x i64> %e
-}
-
-define <2x i64> @test_select_cc_v2i64(i64 %a, i64 %b, <2x i64> %c, <2x i64> %d ) {
-; CHECK-LABEL: test_select_cc_v2i64:
-; CHECK: cmp	x0, x1
-; CHECK-NEXT: csinv	x0, xzr, xzr, ne
-; CHECK-NEXT: dup	v{{[0-9]+}}.2d, x0
-; CHECK-NEXT:	bsl	v{{[0-9]+}}.16b, v0.16b, v1.16b
-  %cmp31 = icmp eq i64 %a, %b
-  %e = select i1 %cmp31, <2x i64> %c, <2x i64> %d
-  ret <2x i64> %e
-}
-
-define <1 x float> @test_select_cc_v1f32(float %a, float %b, <1 x float> %c, <1 x float> %d ) {
-; CHECK-LABEL: test_select_cc_v1f32:
-; CHECK: fcmp	s0, s1
-; CHECK-NEXT: fcsel	s0, s2, s3, eq
-  %cmp31 = fcmp oeq float %a, %b
-  %e = select i1 %cmp31, <1 x float> %c, <1 x float> %d
-  ret <1 x float> %e
-}
-  
-define <2 x float> @test_select_cc_v2f32(float %a, float %b, <2 x float> %c, <2 x float> %d ) {
-; CHECK-LABEL: test_select_cc_v2f32:
-; CHECK: fcmeq	v{{[0-9]+}}.4s, v0.4s, v1.4s
-; CHECK-NEXT: dup	v{{[0-9]+}}.2s, v{{[0-9]+}}.s[0]
-; CHECK-NEXT:	bsl	v{{[0-9]+}}.8b, v2.8b, v3.8b
-  %cmp31 = fcmp oeq float %a, %b
-  %e = select i1 %cmp31, <2 x float> %c, <2 x float> %d
-  ret <2 x float> %e
-}
-
-define <4x float> @test_select_cc_v4f32(float %a, float %b, <4x float> %c, <4x float> %d ) {
-; CHECK-LABEL: test_select_cc_v4f32:
-; CHECK: fcmeq	v{{[0-9]+}}.4s, v0.4s, v1.4s
-; CHECK-NEXT: dup	v{{[0-9]+}}.4s, v{{[0-9]+}}.s[0]
-; CHECK-NEXT:	bsl	v{{[0-9]+}}.16b, v2.16b, v3.16b
-  %cmp31 = fcmp oeq float %a, %b
-  %e = select i1 %cmp31, <4x float> %c, <4x float> %d
-  ret <4x float> %e
-}
-
-define <4x float> @test_select_cc_v4f32_icmp(i32 %a, i32 %b, <4x float> %c, <4x float> %d ) {
-; CHECK-LABEL: test_select_cc_v4f32_icmp:
-; CHECK: cmp	w0, w1, uxtw
-; CHECK: csinv	w0, wzr, wzr, ne
-; CHECK-NEXT: dup	v{{[0-9]+}}.4s, w0
-; CHECK-NEXT:	bsl	v{{[0-9]+}}.16b, v0.16b, v1.16b
-  %cmp31 = icmp eq i32 %a, %b
-  %e = select i1 %cmp31, <4x float> %c, <4x float> %d
-  ret <4x float> %e
-}
-
-define <1 x double> @test_select_cc_v1f64(double %a, double %b, <1 x double> %c, <1 x double> %d ) {
-; CHECK-LABEL: test_select_cc_v1f64:
-; CHECK: fcmeq	v{{[0-9]+}}.2d, v0.2d, v1.2d
-; CHECK-NEXT:	bsl	v{{[0-9]+}}.8b, v2.8b, v3.8b
-  %cmp31 = fcmp oeq double %a, %b
-  %e = select i1 %cmp31, <1 x double> %c, <1 x double> %d
-  ret <1 x double> %e
-}
-
-define <1 x double> @test_select_cc_v1f64_icmp(i64 %a, i64 %b, <1 x double> %c, <1 x double> %d ) {
-; CHECK-LABEL: test_select_cc_v1f64_icmp:
-; CHECK: cmp	 x0, x1
-; CHECK-NEXT: csinv	x0, xzr, xzr, ne
-; CHECK-NEXT: fmov	d{{[0-9]+}}, x0
-; CHECK-NEXT:	bsl	v{{[0-9]+}}.8b, v0.8b, v1.8b
-  %cmp31 = icmp eq i64 %a, %b
-  %e = select i1 %cmp31, <1 x double> %c, <1 x double> %d
-  ret <1 x double> %e
-}
-
-define <2 x double> @test_select_cc_v2f64(double %a, double %b, <2 x double> %c, <2 x double> %d ) {
-; CHECK-LABEL: test_select_cc_v2f64:
-; CHECK: fcmeq	v{{[0-9]+}}.2d, v0.2d, v1.2d
-; CHECK-NEXT: dup	v{{[0-9]+}}.2d, v{{[0-9]+}}.d[0]
-; CHECK-NEXT:	bsl	v{{[0-9]+}}.16b, v2.16b, v3.16b
-  %cmp31 = fcmp oeq double %a, %b
-  %e = select i1 %cmp31, <2 x double> %c, <2 x double> %d
-  ret <2 x double> %e
-}
diff --git a/test/CodeGen/AArch64/neon-shift.ll b/test/CodeGen/AArch64/neon-shift.ll
deleted file mode 100644
index 33b04ce..0000000
--- a/test/CodeGen/AArch64/neon-shift.ll
+++ /dev/null
@@ -1,171 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
-
-declare <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8>, <8 x i8>)
-declare <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8>, <8 x i8>)
-
-define <8 x i8> @test_uqshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK: test_uqshl_v8i8:
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: ushl v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-define <8 x i8> @test_sqshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK: test_sqshl_v8i8:
-  %tmp1 = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
-; CHECK: sshl v0.8b, v0.8b, v1.8b
-  ret <8 x i8> %tmp1
-}
-
-declare <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8>, <16 x i8>)
-declare <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8>, <16 x i8>)
-
-define <16 x i8> @test_ushl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_ushl_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: ushl v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-define <16 x i8> @test_sshl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
-; CHECK: test_sshl_v16i8:
-  %tmp1 = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
-; CHECK: sshl v0.16b, v0.16b, v1.16b
-  ret <16 x i8> %tmp1
-}
-
-declare <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16>, <4 x i16>)
-declare <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16>, <4 x i16>)
-
-define <4 x i16> @test_ushl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_ushl_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: ushl v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-define <4 x i16> @test_sshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: test_sshl_v4i16:
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
-; CHECK: sshl v0.4h, v0.4h, v1.4h
-  ret <4 x i16> %tmp1
-}
-
-declare <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16>, <8 x i16>)
-declare <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16>, <8 x i16>)
-
-define <8 x i16> @test_ushl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_ushl_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: ushl v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-define <8 x i16> @test_sshl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK: test_sshl_v8i16:
-  %tmp1 = call <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
-; CHECK: sshl v0.8h, v0.8h, v1.8h
-  ret <8 x i16> %tmp1
-}
-
-declare <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32>, <2 x i32>)
-declare <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32>, <2 x i32>)
-
-define <2 x i32> @test_ushl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_ushl_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: ushl v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-define <2 x i32> @test_sshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
-; CHECK: test_sshl_v2i32:
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
-; CHECK: sshl v0.2s, v0.2s, v1.2s
-  ret <2 x i32> %tmp1
-}
-
-declare <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32>, <4 x i32>)
-
-define <4 x i32> @test_ushl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_ushl_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: ushl v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-define <4 x i32> @test_sshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK: test_sshl_v4i32:
-  %tmp1 = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
-; CHECK: sshl v0.4s, v0.4s, v1.4s
-  ret <4 x i32> %tmp1
-}
-
-declare <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64>, <2 x i64>)
-declare <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64>, <2 x i64>)
-
-define <2 x i64> @test_ushl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
-; CHECK: test_ushl_v2i64:
-  %tmp1 = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
-; CHECK: ushl v0.2d, v0.2d, v1.2d
-  ret <2 x i64> %tmp1
-}
-
-define <2 x i64> @test_sshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
-; CHECK: test_sshl_v2i64:
-  %tmp1 = call <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64> %lhs, <2 x i64> %rhs)
-; CHECK: sshl v0.2d, v0.2d, v1.2d
-  ret <2 x i64> %tmp1
-}
-
-
-define <8 x i8> @test_shl_v8i8(<8 x i8> %a) {
-; CHECK: test_shl_v8i8:
-; CHECK: shl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
-  %tmp = shl <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-  ret <8 x i8> %tmp
-}
-
-define <4 x i16> @test_shl_v4i16(<4 x i16> %a) {
-; CHECK: test_shl_v4i16:
-; CHECK: shl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
-  %tmp = shl <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3>
-  ret <4 x i16> %tmp
-}
-
-define <2 x i32> @test_shl_v2i32(<2 x i32> %a) {
-; CHECK: test_shl_v2i32:
-; CHECK: shl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
-  %tmp = shl <2 x i32> %a, <i32 3, i32 3>
-  ret <2 x i32> %tmp
-}
-
-define <16 x i8> @test_shl_v16i8(<16 x i8> %a) {
-; CHECK: test_shl_v16i8:
-; CHECK: shl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
-  %tmp = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-  ret <16 x i8> %tmp
-}
-
-define <8 x i16> @test_shl_v8i16(<8 x i16> %a) {
-; CHECK: test_shl_v8i16:
-; CHECK: shl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
-  %tmp = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-  ret <8 x i16> %tmp
-}
-
-define <4 x i32> @test_shl_v4i32(<4 x i32> %a) {
-; CHECK: test_shl_v4i32:
-; CHECK: shl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
-  %tmp = shl <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
-  ret <4 x i32> %tmp
-}
-
-define <2 x i64> @test_shl_v2i64(<2 x i64> %a) {
-; CHECK: test_shl_v2i64:
-; CHECK: shl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #63
-  %tmp = shl <2 x i64> %a, <i64 63, i64 63>
-  ret <2 x i64> %tmp
-}
-
diff --git a/test/CodeGen/AArch64/neon-shl-ashr-lshr.ll b/test/CodeGen/AArch64/neon-shl-ashr-lshr.ll
deleted file mode 100644
index 0b520d7..0000000
--- a/test/CodeGen/AArch64/neon-shl-ashr-lshr.ll
+++ /dev/null
@@ -1,333 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
-
-define <8 x i8> @shl.v8i8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK-LABEL: shl.v8i8:
-; CHECK: ushl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-  %c = shl <8 x i8> %a, %b
-  ret <8 x i8> %c
-}
-
-define <4 x i16> @shl.v4i16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK-LABEL: shl.v4i16:
-; CHECK: ushl v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
-  %c = shl <4 x i16> %a, %b
-  ret <4 x i16> %c
-}
-
-define <2 x i32> @shl.v2i32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK-LABEL: shl.v2i32:
-; CHECK: ushl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %c = shl <2 x i32> %a, %b
-  ret <2 x i32> %c
-}
-
-define <1 x i64> @shl.v1i64(<1 x i64> %a, <1 x i64> %b) {
-; CHECK-LABEL: shl.v1i64:
-; CHECK: ushl d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %c = shl <1 x i64> %a, %b
-  ret <1 x i64> %c
-}
-
-define <16 x i8> @shl.v16i8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-LABEL: shl.v16i8:
-; CHECK: ushl v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-  %c = shl <16 x i8> %a, %b
-  ret <16 x i8> %c
-}
-
-define <8 x i16> @shl.v8i16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: shl.v8i16:
-; CHECK: ushl v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
-  %c = shl <8 x i16> %a, %b
-  ret <8 x i16> %c
-}
-
-define <4 x i32> @shl.v4i32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: shl.v4i32:
-; CHECK: ushl v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %c = shl <4 x i32> %a, %b
-  ret <4 x i32> %c
-}
-
-define <2 x i64> @shl.v2i64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: shl.v2i64:
-; CHECK: ushl v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %c = shl <2 x i64> %a, %b
-  ret <2 x i64> %c
-}
-
-define <8 x i8> @lshr.v8i8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK-LABEL: lshr.v8i8:
-; CHECK: neg v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-; CHECK: ushl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-  %c = lshr <8 x i8> %a, %b
-  ret <8 x i8> %c
-}
-
-define <4 x i16> @lshr.v4i16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK-LABEL: lshr.v4i16:
-; CHECK: neg v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
-; CHECK: ushl v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
-  %c = lshr <4 x i16> %a, %b
-  ret <4 x i16> %c
-}
-
-define <2 x i32> @lshr.v2i32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK-LABEL: lshr.v2i32:
-; CHECK: neg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-; CHECK: ushl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %c = lshr <2 x i32> %a, %b
-  ret <2 x i32> %c
-}
-
-define <1 x i64> @lshr.v1i64(<1 x i64> %a, <1 x i64> %b) {
-; CHECK-LABEL: lshr.v1i64:
-; CHECK: neg d{{[0-9]+}}, d{{[0-9]+}}
-; CHECK: ushl d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %c = lshr <1 x i64> %a, %b
-  ret <1 x i64> %c
-}
-
-define <16 x i8> @lshr.v16i8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-LABEL: lshr.v16i8:
-; CHECK: neg v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-; CHECK: ushl v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-  %c = lshr <16 x i8> %a, %b
-  ret <16 x i8> %c
-}
-
-define <8 x i16> @lshr.v8i16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: lshr.v8i16:
-; CHECK: neg v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
-; CHECK: ushl v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
-  %c = lshr <8 x i16> %a, %b
-  ret <8 x i16> %c
-}
-
-define <4 x i32> @lshr.v4i32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: lshr.v4i32:
-; CHECK: neg v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-; CHECK: ushl v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %c = lshr <4 x i32> %a, %b
-  ret <4 x i32> %c
-}
-
-define <2 x i64> @lshr.v2i64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: lshr.v2i64:
-; CHECK: neg v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-; CHECK: ushl v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %c = lshr <2 x i64> %a, %b
-  ret <2 x i64> %c
-}
-
-define <8 x i8> @ashr.v8i8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK-LABEL: ashr.v8i8:
-; CHECK: neg v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-; CHECK: sshl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-  %c = ashr <8 x i8> %a, %b
-  ret <8 x i8> %c
-}
-
-define <4 x i16> @ashr.v4i16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK-LABEL: ashr.v4i16:
-; CHECK: neg v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
-; CHECK: sshl v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
-  %c = ashr <4 x i16> %a, %b
-  ret <4 x i16> %c
-}
-
-define <2 x i32> @ashr.v2i32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK-LABEL: ashr.v2i32:
-; CHECK: neg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-; CHECK: sshl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %c = ashr <2 x i32> %a, %b
-  ret <2 x i32> %c
-}
-
-define <1 x i64> @ashr.v1i64(<1 x i64> %a, <1 x i64> %b) {
-; CHECK-LABEL: ashr.v1i64:
-; CHECK: neg d{{[0-9]+}}, d{{[0-9]+}}
-; CHECK: sshl d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %c = ashr <1 x i64> %a, %b
-  ret <1 x i64> %c
-}
-
-define <16 x i8> @ashr.v16i8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-LABEL: ashr.v16i8:
-; CHECK: neg v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-; CHECK: sshl v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
-  %c = ashr <16 x i8> %a, %b
-  ret <16 x i8> %c
-}
-
-define <8 x i16> @ashr.v8i16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: ashr.v8i16:
-; CHECK: neg v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
-; CHECK: sshl v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
-  %c = ashr <8 x i16> %a, %b
-  ret <8 x i16> %c
-}
-
-define <4 x i32> @ashr.v4i32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: ashr.v4i32:
-; CHECK: neg v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-; CHECK: sshl v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %c = ashr <4 x i32> %a, %b
-  ret <4 x i32> %c
-}
-
-define <2 x i64> @ashr.v2i64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: ashr.v2i64:
-; CHECK: neg v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-; CHECK: sshl v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %c = ashr <2 x i64> %a, %b
-  ret <2 x i64> %c
-}
-
-define <1 x i64> @shl.v1i64.0(<1 x i64> %a) {
-; CHECK-LABEL: shl.v1i64.0:
-; CHECK-NOT: shl d{{[0-9]+}}, d{{[0-9]+}}, #0
-  %c = shl <1 x i64> %a, zeroinitializer
-  ret <1 x i64> %c
-}
-
-define <2 x i32> @shl.v2i32.0(<2 x i32> %a) {
-; CHECK-LABEL: shl.v2i32.0:
-; CHECK-NOT: shl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, #0
-  %c = shl <2 x i32> %a, zeroinitializer
-  ret <2 x i32> %c
-}
-
-; The following test cases test shl/ashr/lshr with v1i8/v1i16/v1i32 types
-
-define <1 x i8> @shl.v1i8(<1 x i8> %a, <1 x i8> %b) {
-; CHECK-LABEL: shl.v1i8:
-; CHECK: ushl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-  %c = shl <1 x i8> %a, %b
-  ret <1 x i8> %c
-}
-
-define <1 x i16> @shl.v1i16(<1 x i16> %a, <1 x i16> %b) {
-; CHECK-LABEL: shl.v1i16:
-; CHECK: ushl v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
-  %c = shl <1 x i16> %a, %b
-  ret <1 x i16> %c
-}
-
-define <1 x i32> @shl.v1i32(<1 x i32> %a, <1 x i32> %b) {
-; CHECK-LABEL: shl.v1i32:
-; CHECK: ushl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %c = shl <1 x i32> %a, %b
-  ret <1 x i32> %c
-}
-
-define <1 x i8> @ashr.v1i8(<1 x i8> %a, <1 x i8> %b) {
-; CHECK-LABEL: ashr.v1i8:
-; CHECK: neg v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-; CHECK: sshl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-  %c = ashr <1 x i8> %a, %b
-  ret <1 x i8> %c
-}
-
-define <1 x i16> @ashr.v1i16(<1 x i16> %a, <1 x i16> %b) {
-; CHECK-LABEL: ashr.v1i16:
-; CHECK: neg v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
-; CHECK: sshl v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
-  %c = ashr <1 x i16> %a, %b
-  ret <1 x i16> %c
-}
-
-define <1 x i32> @ashr.v1i32(<1 x i32> %a, <1 x i32> %b) {
-; CHECK-LABEL: ashr.v1i32:
-; CHECK: neg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-; CHECK: sshl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %c = ashr <1 x i32> %a, %b
-  ret <1 x i32> %c
-}
-
-define <1 x i8> @lshr.v1i8(<1 x i8> %a, <1 x i8> %b) {
-; CHECK-LABEL: lshr.v1i8:
-; CHECK: neg v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-; CHECK: ushl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-  %c = lshr <1 x i8> %a, %b
-  ret <1 x i8> %c
-}
-
-define <1 x i16> @lshr.v1i16(<1 x i16> %a, <1 x i16> %b) {
-; CHECK-LABEL: lshr.v1i16:
-; CHECK: neg v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
-; CHECK: ushl v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
-  %c = lshr <1 x i16> %a, %b
-  ret <1 x i16> %c
-}
-
-define <1 x i32> @lshr.v1i32(<1 x i32> %a, <1 x i32> %b) {
-; CHECK-LABEL: lshr.v1i32:
-; CHECK: neg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-; CHECK: ushl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %c = lshr <1 x i32> %a, %b
-  ret <1 x i32> %c
-}
-
-define <1 x i8> @shl.v1i8.imm(<1 x i8> %a) {
-; CHECK-LABEL: shl.v1i8.imm:
-; CHECK: shl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, #3
-  %c = shl <1 x i8> %a, <i8 3>
-  ret <1 x i8> %c
-}
-
-define <1 x i16> @shl.v1i16.imm(<1 x i16> %a) {
-; CHECK-LABEL: shl.v1i16.imm:
-; CHECK: shl v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, #5
-  %c = shl <1 x i16> %a, <i16 5>
-  ret <1 x i16> %c
-}
-
-define <1 x i32> @shl.v1i32.imm(<1 x i32> %a) {
-; CHECK-LABEL: shl.v1i32.imm:
-; CHECK-NOT: shl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, #0
-  %c = shl <1 x i32> %a, zeroinitializer
-  ret <1 x i32> %c
-}
-
-define <1 x i8> @ashr.v1i8.imm(<1 x i8> %a) {
-; CHECK-LABEL: ashr.v1i8.imm:
-; CHECK: sshr v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, #3
-  %c = ashr <1 x i8> %a, <i8 3>
-  ret <1 x i8> %c
-}
-
-define <1 x i16> @ashr.v1i16.imm(<1 x i16> %a) {
-; CHECK-LABEL: ashr.v1i16.imm:
-; CHECK: sshr v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, #10
-  %c = ashr <1 x i16> %a, <i16 10>
-  ret <1 x i16> %c
-}
-
-define <1 x i32> @ashr.v1i32.imm(<1 x i32> %a) {
-; CHECK-LABEL: ashr.v1i32.imm:
-; CHECK: sshr v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, #31
-  %c = ashr <1 x i32> %a, <i32 31>
-  ret <1 x i32> %c
-}
-
-define <1 x i8> @lshr.v1i8.imm(<1 x i8> %a) {
-; CHECK-LABEL: lshr.v1i8.imm:
-; CHECK: ushr v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, #3
-  %c = lshr <1 x i8> %a, <i8 3>
-  ret <1 x i8> %c
-}
-
-define <1 x i16> @lshr.v1i16.imm(<1 x i16> %a) {
-; CHECK-LABEL: lshr.v1i16.imm:
-; CHECK: ushr v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, #10
-  %c = lshr <1 x i16> %a, <i16 10>
-  ret <1 x i16> %c
-}
-
-define <1 x i32> @lshr.v1i32.imm(<1 x i32> %a) {
-; CHECK-LABEL: lshr.v1i32.imm:
-; CHECK: ushr v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, #31
-  %c = lshr <1 x i32> %a, <i32 31>
-  ret <1 x i32> %c
-}
diff --git a/test/CodeGen/AArch64/neon-simd-ldst-multi-elem.ll b/test/CodeGen/AArch64/neon-simd-ldst-multi-elem.ll
deleted file mode 100644
index d5557c0..0000000
--- a/test/CodeGen/AArch64/neon-simd-ldst-multi-elem.ll
+++ /dev/null
@@ -1,2314 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
-
-define void @test_ldst1_v16i8(<16 x i8>* %ptr, <16 x i8>* %ptr2) {
-; CHECK-LABEL: test_ldst1_v16i8:
-; CHECK: ld1 {v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}]
-; CHECK: st1 {v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}]
-  %tmp = load <16 x i8>* %ptr
-  store <16 x i8> %tmp, <16 x i8>* %ptr2
-  ret void
-}
-
-define void @test_ldst1_v8i16(<8 x i16>* %ptr, <8 x i16>* %ptr2) {
-; CHECK-LABEL: test_ldst1_v8i16:
-; CHECK: ld1 {v{{[0-9]+}}.8h}, [x{{[0-9]+|sp}}]
-; CHECK: st1 {v{{[0-9]+}}.8h}, [x{{[0-9]+|sp}}]
-  %tmp = load <8 x i16>* %ptr
-  store <8 x i16> %tmp, <8 x i16>* %ptr2
-  ret void
-}
-
-define void @test_ldst1_v4i32(<4 x i32>* %ptr, <4 x i32>* %ptr2) {
-; CHECK-LABEL: test_ldst1_v4i32:
-; CHECK: ld1 {v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
-; CHECK: st1 {v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
-  %tmp = load <4 x i32>* %ptr
-  store <4 x i32> %tmp, <4 x i32>* %ptr2
-  ret void
-}
-
-define void @test_ldst1_v2i64(<2 x i64>* %ptr, <2 x i64>* %ptr2) {
-; CHECK-LABEL: test_ldst1_v2i64:
-; CHECK: ld1 {v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}]
-; CHECK: st1 {v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}]
-  %tmp = load <2 x i64>* %ptr
-  store <2 x i64> %tmp, <2 x i64>* %ptr2
-  ret void
-}
-
-define void @test_ldst1_v8i8(<8 x i8>* %ptr, <8 x i8>* %ptr2) {
-; CHECK-LABEL: test_ldst1_v8i8:
-; CHECK: ld1 {v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}]
-; CHECK: st1 {v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}]
-  %tmp = load <8 x i8>* %ptr
-  store <8 x i8> %tmp, <8 x i8>* %ptr2
-  ret void
-}
-
-define void @test_ldst1_v4i16(<4 x i16>* %ptr, <4 x i16>* %ptr2) {
-; CHECK-LABEL: test_ldst1_v4i16:
-; CHECK: ld1 {v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}]
-; CHECK: st1 {v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}]
-  %tmp = load <4 x i16>* %ptr
-  store <4 x i16> %tmp, <4 x i16>* %ptr2
-  ret void
-}
-
-define void @test_ldst1_v2i32(<2 x i32>* %ptr, <2 x i32>* %ptr2) {
-; CHECK-LABEL: test_ldst1_v2i32:
-; CHECK: ld1 {v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
-; CHECK: st1 {v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
-  %tmp = load <2 x i32>* %ptr
-  store <2 x i32> %tmp, <2 x i32>* %ptr2
-  ret void
-}
-
-define void @test_ldst1_v1i64(<1 x i64>* %ptr, <1 x i64>* %ptr2) {
-; CHECK-LABEL: test_ldst1_v1i64:
-; CHECK: ld1 {v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
-; CHECK: st1 {v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
-  %tmp = load <1 x i64>* %ptr
-  store <1 x i64> %tmp, <1 x i64>* %ptr2
-  ret void
-}
-
-%struct.int8x16x2_t = type { [2 x <16 x i8>] }
-%struct.int16x8x2_t = type { [2 x <8 x i16>] }
-%struct.int32x4x2_t = type { [2 x <4 x i32>] }
-%struct.int64x2x2_t = type { [2 x <2 x i64>] }
-%struct.float32x4x2_t = type { [2 x <4 x float>] }
-%struct.float64x2x2_t = type { [2 x <2 x double>] }
-%struct.int8x8x2_t = type { [2 x <8 x i8>] }
-%struct.int16x4x2_t = type { [2 x <4 x i16>] }
-%struct.int32x2x2_t = type { [2 x <2 x i32>] }
-%struct.int64x1x2_t = type { [2 x <1 x i64>] }
-%struct.float32x2x2_t = type { [2 x <2 x float>] }
-%struct.float64x1x2_t = type { [2 x <1 x double>] }
-%struct.int8x16x3_t = type { [3 x <16 x i8>] }
-%struct.int16x8x3_t = type { [3 x <8 x i16>] }
-%struct.int32x4x3_t = type { [3 x <4 x i32>] }
-%struct.int64x2x3_t = type { [3 x <2 x i64>] }
-%struct.float32x4x3_t = type { [3 x <4 x float>] }
-%struct.float64x2x3_t = type { [3 x <2 x double>] }
-%struct.int8x8x3_t = type { [3 x <8 x i8>] }
-%struct.int16x4x3_t = type { [3 x <4 x i16>] }
-%struct.int32x2x3_t = type { [3 x <2 x i32>] }
-%struct.int64x1x3_t = type { [3 x <1 x i64>] }
-%struct.float32x2x3_t = type { [3 x <2 x float>] }
-%struct.float64x1x3_t = type { [3 x <1 x double>] }
-%struct.int8x16x4_t = type { [4 x <16 x i8>] }
-%struct.int16x8x4_t = type { [4 x <8 x i16>] }
-%struct.int32x4x4_t = type { [4 x <4 x i32>] }
-%struct.int64x2x4_t = type { [4 x <2 x i64>] }
-%struct.float32x4x4_t = type { [4 x <4 x float>] }
-%struct.float64x2x4_t = type { [4 x <2 x double>] }
-%struct.int8x8x4_t = type { [4 x <8 x i8>] }
-%struct.int16x4x4_t = type { [4 x <4 x i16>] }
-%struct.int32x2x4_t = type { [4 x <2 x i32>] }
-%struct.int64x1x4_t = type { [4 x <1 x i64>] }
-%struct.float32x2x4_t = type { [4 x <2 x float>] }
-%struct.float64x1x4_t = type { [4 x <1 x double>] }
-
-
-define <16 x i8> @test_vld1q_s8(i8* readonly %a) {
-; CHECK-LABEL: test_vld1q_s8
-; CHECK: ld1 {v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}]
-  %vld1 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %a, i32 1)
-  ret <16 x i8> %vld1
-}
-
-define <8 x i16> @test_vld1q_s16(i16* readonly %a) {
-; CHECK-LABEL: test_vld1q_s16
-; CHECK: ld1 {v{{[0-9]+}}.8h}, [x{{[0-9]+|sp}}]
-  %1 = bitcast i16* %a to i8*
-  %vld1 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %1, i32 2)
-  ret <8 x i16> %vld1
-}
-
-define <4 x i32> @test_vld1q_s32(i32* readonly %a) {
-; CHECK-LABEL: test_vld1q_s32
-; CHECK: ld1 {v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
-  %1 = bitcast i32* %a to i8*
-  %vld1 = tail call <4 x i32> @llvm.arm.neon.vld1.v4i32(i8* %1, i32 4)
-  ret <4 x i32> %vld1
-}
-
-define <2 x i64> @test_vld1q_s64(i64* readonly %a) {
-; CHECK-LABEL: test_vld1q_s64
-; CHECK: ld1 {v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}]
-  %1 = bitcast i64* %a to i8*
-  %vld1 = tail call <2 x i64> @llvm.arm.neon.vld1.v2i64(i8* %1, i32 8)
-  ret <2 x i64> %vld1
-}
-
-define <4 x float> @test_vld1q_f32(float* readonly %a) {
-; CHECK-LABEL: test_vld1q_f32
-; CHECK: ld1 {v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
-  %1 = bitcast float* %a to i8*
-  %vld1 = tail call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %1, i32 4)
-  ret <4 x float> %vld1
-}
-
-define <2 x double> @test_vld1q_f64(double* readonly %a) {
-; CHECK-LABEL: test_vld1q_f64
-; CHECK: ld1 {v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
-  %1 = bitcast double* %a to i8*
-  %vld1 = tail call <2 x double> @llvm.arm.neon.vld1.v2f64(i8* %1, i32 8)
-  ret <2 x double> %vld1
-}
-
-define <8 x i8> @test_vld1_s8(i8* readonly %a) {
-; CHECK-LABEL: test_vld1_s8
-; CHECK: ld1 {v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}]
-  %vld1 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %a, i32 1)
-  ret <8 x i8> %vld1
-}
-
-define <4 x i16> @test_vld1_s16(i16* readonly %a) {
-; CHECK-LABEL: test_vld1_s16
-; CHECK: ld1 {v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}]
-  %1 = bitcast i16* %a to i8*
-  %vld1 = tail call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %1, i32 2)
-  ret <4 x i16> %vld1
-}
-
-define <2 x i32> @test_vld1_s32(i32* readonly %a) {
-; CHECK-LABEL: test_vld1_s32
-; CHECK: ld1 {v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
-  %1 = bitcast i32* %a to i8*
-  %vld1 = tail call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %1, i32 4)
-  ret <2 x i32> %vld1
-}
-
-define <1 x i64> @test_vld1_s64(i64* readonly %a) {
-; CHECK-LABEL: test_vld1_s64
-; CHECK: ld1 {v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
-  %1 = bitcast i64* %a to i8*
-  %vld1 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %1, i32 8)
-  ret <1 x i64> %vld1
-}
-
-define <2 x float> @test_vld1_f32(float* readonly %a) {
-; CHECK-LABEL: test_vld1_f32
-; CHECK: ld1 {v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
-  %1 = bitcast float* %a to i8*
-  %vld1 = tail call <2 x float> @llvm.arm.neon.vld1.v2f32(i8* %1, i32 4)
-  ret <2 x float> %vld1
-}
-
-define <1 x double> @test_vld1_f64(double* readonly %a) {
-; CHECK-LABEL: test_vld1_f64
-; CHECK: ld1 {v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
-  %1 = bitcast double* %a to i8*
-  %vld1 = tail call <1 x double> @llvm.arm.neon.vld1.v1f64(i8* %1, i32 8)
-  ret <1 x double> %vld1
-}
-
-define <8 x i8> @test_vld1_p8(i8* readonly %a) {
-; CHECK-LABEL: test_vld1_p8
-; CHECK: ld1 {v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}]
-  %vld1 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %a, i32 1)
-  ret <8 x i8> %vld1
-}
-
-define <4 x i16> @test_vld1_p16(i16* readonly %a) {
-; CHECK-LABEL: test_vld1_p16
-; CHECK: ld1 {v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}]
-  %1 = bitcast i16* %a to i8*
-  %vld1 = tail call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %1, i32 2)
-  ret <4 x i16> %vld1
-}
-
-define %struct.int8x16x2_t @test_vld2q_s8(i8* readonly %a) {
-; CHECK-LABEL: test_vld2q_s8
-; CHECK: ld2 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}]
-  %vld2 = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8(i8* %a, i32 1)
-  %vld2.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2, 0
-  %vld2.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2, 1
-  %.fca.0.0.insert = insertvalue %struct.int8x16x2_t undef, <16 x i8> %vld2.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int8x16x2_t %.fca.0.0.insert, <16 x i8> %vld2.fca.1.extract, 0, 1
-  ret %struct.int8x16x2_t %.fca.0.1.insert
-}
-
-define %struct.int16x8x2_t @test_vld2q_s16(i16* readonly %a) {
-; CHECK-LABEL: test_vld2q_s16
-; CHECK: ld2 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [x{{[0-9]+|sp}}]
-  %1 = bitcast i16* %a to i8*
-  %vld2 = tail call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2.v8i16(i8* %1, i32 2)
-  %vld2.fca.0.extract = extractvalue { <8 x i16>, <8 x i16> } %vld2, 0
-  %vld2.fca.1.extract = extractvalue { <8 x i16>, <8 x i16> } %vld2, 1
-  %.fca.0.0.insert = insertvalue %struct.int16x8x2_t undef, <8 x i16> %vld2.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int16x8x2_t %.fca.0.0.insert, <8 x i16> %vld2.fca.1.extract, 0, 1
-  ret %struct.int16x8x2_t %.fca.0.1.insert
-}
-
-define %struct.int32x4x2_t @test_vld2q_s32(i32* readonly %a) {
-; CHECK-LABEL: test_vld2q_s32
-; CHECK: ld2 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
-  %1 = bitcast i32* %a to i8*
-  %vld2 = tail call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32(i8* %1, i32 4)
-  %vld2.fca.0.extract = extractvalue { <4 x i32>, <4 x i32> } %vld2, 0
-  %vld2.fca.1.extract = extractvalue { <4 x i32>, <4 x i32> } %vld2, 1
-  %.fca.0.0.insert = insertvalue %struct.int32x4x2_t undef, <4 x i32> %vld2.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int32x4x2_t %.fca.0.0.insert, <4 x i32> %vld2.fca.1.extract, 0, 1
-  ret %struct.int32x4x2_t %.fca.0.1.insert
-}
-
-define %struct.int64x2x2_t @test_vld2q_s64(i64* readonly %a) {
-; CHECK-LABEL: test_vld2q_s64
-; CHECK: ld2 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}]
-  %1 = bitcast i64* %a to i8*
-  %vld2 = tail call { <2 x i64>, <2 x i64> } @llvm.arm.neon.vld2.v2i64(i8* %1, i32 8)
-  %vld2.fca.0.extract = extractvalue { <2 x i64>, <2 x i64> } %vld2, 0
-  %vld2.fca.1.extract = extractvalue { <2 x i64>, <2 x i64> } %vld2, 1
-  %.fca.0.0.insert = insertvalue %struct.int64x2x2_t undef, <2 x i64> %vld2.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int64x2x2_t %.fca.0.0.insert, <2 x i64> %vld2.fca.1.extract, 0, 1
-  ret %struct.int64x2x2_t %.fca.0.1.insert
-}
-
-define %struct.float32x4x2_t @test_vld2q_f32(float* readonly %a) {
-; CHECK-LABEL: test_vld2q_f32
-; CHECK: ld2 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
-  %1 = bitcast float* %a to i8*
-  %vld2 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8* %1, i32 4)
-  %vld2.fca.0.extract = extractvalue { <4 x float>, <4 x float> } %vld2, 0
-  %vld2.fca.1.extract = extractvalue { <4 x float>, <4 x float> } %vld2, 1
-  %.fca.0.0.insert = insertvalue %struct.float32x4x2_t undef, <4 x float> %vld2.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float32x4x2_t %.fca.0.0.insert, <4 x float> %vld2.fca.1.extract, 0, 1
-  ret %struct.float32x4x2_t %.fca.0.1.insert
-}
-
-define %struct.float64x2x2_t @test_vld2q_f64(double* readonly %a) {
-; CHECK-LABEL: test_vld2q_f64
-; CHECK: ld2 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}]
-  %1 = bitcast double* %a to i8*
-  %vld2 = tail call { <2 x double>, <2 x double> } @llvm.arm.neon.vld2.v2f64(i8* %1, i32 8)
-  %vld2.fca.0.extract = extractvalue { <2 x double>, <2 x double> } %vld2, 0
-  %vld2.fca.1.extract = extractvalue { <2 x double>, <2 x double> } %vld2, 1
-  %.fca.0.0.insert = insertvalue %struct.float64x2x2_t undef, <2 x double> %vld2.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float64x2x2_t %.fca.0.0.insert, <2 x double> %vld2.fca.1.extract, 0, 1
-  ret %struct.float64x2x2_t %.fca.0.1.insert
-}
-
-define %struct.int8x8x2_t @test_vld2_s8(i8* readonly %a) {
-; CHECK-LABEL: test_vld2_s8
-; CHECK: ld2 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}]
-  %vld2 = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8(i8* %a, i32 1)
-  %vld2.fca.0.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2, 0
-  %vld2.fca.1.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2, 1
-  %.fca.0.0.insert = insertvalue %struct.int8x8x2_t undef, <8 x i8> %vld2.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int8x8x2_t %.fca.0.0.insert, <8 x i8> %vld2.fca.1.extract, 0, 1
-  ret %struct.int8x8x2_t %.fca.0.1.insert
-}
-
-define %struct.int16x4x2_t @test_vld2_s16(i16* readonly %a) {
-; CHECK-LABEL: test_vld2_s16
-; CHECK: ld2 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}]
-  %1 = bitcast i16* %a to i8*
-  %vld2 = tail call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2.v4i16(i8* %1, i32 2)
-  %vld2.fca.0.extract = extractvalue { <4 x i16>, <4 x i16> } %vld2, 0
-  %vld2.fca.1.extract = extractvalue { <4 x i16>, <4 x i16> } %vld2, 1
-  %.fca.0.0.insert = insertvalue %struct.int16x4x2_t undef, <4 x i16> %vld2.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int16x4x2_t %.fca.0.0.insert, <4 x i16> %vld2.fca.1.extract, 0, 1
-  ret %struct.int16x4x2_t %.fca.0.1.insert
-}
-
-define %struct.int32x2x2_t @test_vld2_s32(i32* readonly %a) {
-; CHECK-LABEL: test_vld2_s32
-; CHECK: ld2 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
-  %1 = bitcast i32* %a to i8*
-  %vld2 = tail call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32(i8* %1, i32 4)
-  %vld2.fca.0.extract = extractvalue { <2 x i32>, <2 x i32> } %vld2, 0
-  %vld2.fca.1.extract = extractvalue { <2 x i32>, <2 x i32> } %vld2, 1
-  %.fca.0.0.insert = insertvalue %struct.int32x2x2_t undef, <2 x i32> %vld2.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int32x2x2_t %.fca.0.0.insert, <2 x i32> %vld2.fca.1.extract, 0, 1
-  ret %struct.int32x2x2_t %.fca.0.1.insert
-}
-
-define %struct.int64x1x2_t @test_vld2_s64(i64* readonly %a) {
-; CHECK-LABEL: test_vld2_s64
-; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
-  %1 = bitcast i64* %a to i8*
-  %vld2 = tail call { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64(i8* %1, i32 8)
-  %vld2.fca.0.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2, 0
-  %vld2.fca.1.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2, 1
-  %.fca.0.0.insert = insertvalue %struct.int64x1x2_t undef, <1 x i64> %vld2.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int64x1x2_t %.fca.0.0.insert, <1 x i64> %vld2.fca.1.extract, 0, 1
-  ret %struct.int64x1x2_t %.fca.0.1.insert
-}
-
-define %struct.float32x2x2_t @test_vld2_f32(float* readonly %a) {
-; CHECK-LABEL: test_vld2_f32
-; CHECK: ld2 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
-  %1 = bitcast float* %a to i8*
-  %vld2 = tail call { <2 x float>, <2 x float> } @llvm.arm.neon.vld2.v2f32(i8* %1, i32 4)
-  %vld2.fca.0.extract = extractvalue { <2 x float>, <2 x float> } %vld2, 0
-  %vld2.fca.1.extract = extractvalue { <2 x float>, <2 x float> } %vld2, 1
-  %.fca.0.0.insert = insertvalue %struct.float32x2x2_t undef, <2 x float> %vld2.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float32x2x2_t %.fca.0.0.insert, <2 x float> %vld2.fca.1.extract, 0, 1
-  ret %struct.float32x2x2_t %.fca.0.1.insert
-}
-
-define %struct.float64x1x2_t @test_vld2_f64(double* readonly %a) {
-; CHECK-LABEL: test_vld2_f64
-; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
-  %1 = bitcast double* %a to i8*
-  %vld2 = tail call { <1 x double>, <1 x double> } @llvm.arm.neon.vld2.v1f64(i8* %1, i32 8)
-  %vld2.fca.0.extract = extractvalue { <1 x double>, <1 x double> } %vld2, 0
-  %vld2.fca.1.extract = extractvalue { <1 x double>, <1 x double> } %vld2, 1
-  %.fca.0.0.insert = insertvalue %struct.float64x1x2_t undef, <1 x double> %vld2.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float64x1x2_t %.fca.0.0.insert, <1 x double> %vld2.fca.1.extract, 0, 1
-  ret %struct.float64x1x2_t %.fca.0.1.insert
-}
-
-define %struct.int8x16x3_t @test_vld3q_s8(i8* readonly %a) {
-; CHECK-LABEL: test_vld3q_s8
-; CHECK: ld3 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}]
-  %vld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8* %a, i32 1)
-  %vld3.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 0
-  %vld3.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 1
-  %vld3.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 2
-  %.fca.0.0.insert = insertvalue %struct.int8x16x3_t undef, <16 x i8> %vld3.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int8x16x3_t %.fca.0.0.insert, <16 x i8> %vld3.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int8x16x3_t %.fca.0.1.insert, <16 x i8> %vld3.fca.2.extract, 0, 2
-  ret %struct.int8x16x3_t %.fca.0.2.insert
-}
-
-define %struct.int16x8x3_t @test_vld3q_s16(i16* readonly %a) {
-; CHECK-LABEL: test_vld3q_s16
-; CHECK: ld3 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [x{{[0-9]+|sp}}]
-  %1 = bitcast i16* %a to i8*
-  %vld3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3.v8i16(i8* %1, i32 2)
-  %vld3.fca.0.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3, 0
-  %vld3.fca.1.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3, 1
-  %vld3.fca.2.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3, 2
-  %.fca.0.0.insert = insertvalue %struct.int16x8x3_t undef, <8 x i16> %vld3.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int16x8x3_t %.fca.0.0.insert, <8 x i16> %vld3.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int16x8x3_t %.fca.0.1.insert, <8 x i16> %vld3.fca.2.extract, 0, 2
-  ret %struct.int16x8x3_t %.fca.0.2.insert
-}
-
-define %struct.int32x4x3_t @test_vld3q_s32(i32* readonly %a) {
-; CHECK-LABEL: test_vld3q_s32
-; CHECK: ld3 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
-  %1 = bitcast i32* %a to i8*
-  %vld3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32(i8* %1, i32 4)
-  %vld3.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3, 0
-  %vld3.fca.1.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3, 1
-  %vld3.fca.2.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3, 2
-  %.fca.0.0.insert = insertvalue %struct.int32x4x3_t undef, <4 x i32> %vld3.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int32x4x3_t %.fca.0.0.insert, <4 x i32> %vld3.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int32x4x3_t %.fca.0.1.insert, <4 x i32> %vld3.fca.2.extract, 0, 2
-  ret %struct.int32x4x3_t %.fca.0.2.insert
-}
-
-define %struct.int64x2x3_t @test_vld3q_s64(i64* readonly %a) {
-; CHECK-LABEL: test_vld3q_s64
-; CHECK: ld3 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}]
-  %1 = bitcast i64* %a to i8*
-  %vld3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld3.v2i64(i8* %1, i32 8)
-  %vld3.fca.0.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld3, 0
-  %vld3.fca.1.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld3, 1
-  %vld3.fca.2.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld3, 2
-  %.fca.0.0.insert = insertvalue %struct.int64x2x3_t undef, <2 x i64> %vld3.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int64x2x3_t %.fca.0.0.insert, <2 x i64> %vld3.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int64x2x3_t %.fca.0.1.insert, <2 x i64> %vld3.fca.2.extract, 0, 2
-  ret %struct.int64x2x3_t %.fca.0.2.insert
-}
-
-define %struct.float32x4x3_t @test_vld3q_f32(float* readonly %a) {
-; CHECK-LABEL: test_vld3q_f32
-; CHECK: ld3 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
-  %1 = bitcast float* %a to i8*
-  %vld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3.v4f32(i8* %1, i32 4)
-  %vld3.fca.0.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3, 0
-  %vld3.fca.1.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3, 1
-  %vld3.fca.2.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3, 2
-  %.fca.0.0.insert = insertvalue %struct.float32x4x3_t undef, <4 x float> %vld3.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float32x4x3_t %.fca.0.0.insert, <4 x float> %vld3.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.float32x4x3_t %.fca.0.1.insert, <4 x float> %vld3.fca.2.extract, 0, 2
-  ret %struct.float32x4x3_t %.fca.0.2.insert
-}
-
-define %struct.float64x2x3_t @test_vld3q_f64(double* readonly %a) {
-; CHECK-LABEL: test_vld3q_f64
-; CHECK: ld3 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}]
-  %1 = bitcast double* %a to i8*
-  %vld3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld3.v2f64(i8* %1, i32 8)
-  %vld3.fca.0.extract = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld3, 0
-  %vld3.fca.1.extract = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld3, 1
-  %vld3.fca.2.extract = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld3, 2
-  %.fca.0.0.insert = insertvalue %struct.float64x2x3_t undef, <2 x double> %vld3.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float64x2x3_t %.fca.0.0.insert, <2 x double> %vld3.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.float64x2x3_t %.fca.0.1.insert, <2 x double> %vld3.fca.2.extract, 0, 2
-  ret %struct.float64x2x3_t %.fca.0.2.insert
-}
-
-define %struct.int8x8x3_t @test_vld3_s8(i8* readonly %a) {
-; CHECK-LABEL: test_vld3_s8
-; CHECK: ld3 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}]
-  %vld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8(i8* %a, i32 1)
-  %vld3.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 0
-  %vld3.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 1
-  %vld3.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 2
-  %.fca.0.0.insert = insertvalue %struct.int8x8x3_t undef, <8 x i8> %vld3.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int8x8x3_t %.fca.0.0.insert, <8 x i8> %vld3.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int8x8x3_t %.fca.0.1.insert, <8 x i8> %vld3.fca.2.extract, 0, 2
-  ret %struct.int8x8x3_t %.fca.0.2.insert
-}
-
-define %struct.int16x4x3_t @test_vld3_s16(i16* readonly %a) {
-; CHECK-LABEL: test_vld3_s16
-; CHECK: ld3 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}]
-  %1 = bitcast i16* %a to i8*
-  %vld3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16(i8* %1, i32 2)
-  %vld3.fca.0.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3, 0
-  %vld3.fca.1.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3, 1
-  %vld3.fca.2.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3, 2
-  %.fca.0.0.insert = insertvalue %struct.int16x4x3_t undef, <4 x i16> %vld3.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int16x4x3_t %.fca.0.0.insert, <4 x i16> %vld3.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int16x4x3_t %.fca.0.1.insert, <4 x i16> %vld3.fca.2.extract, 0, 2
-  ret %struct.int16x4x3_t %.fca.0.2.insert
-}
-
-define %struct.int32x2x3_t @test_vld3_s32(i32* readonly %a) {
-; CHECK-LABEL: test_vld3_s32
-; CHECK: ld3 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
-  %1 = bitcast i32* %a to i8*
-  %vld3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3.v2i32(i8* %1, i32 4)
-  %vld3.fca.0.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3, 0
-  %vld3.fca.1.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3, 1
-  %vld3.fca.2.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3, 2
-  %.fca.0.0.insert = insertvalue %struct.int32x2x3_t undef, <2 x i32> %vld3.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int32x2x3_t %.fca.0.0.insert, <2 x i32> %vld3.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int32x2x3_t %.fca.0.1.insert, <2 x i32> %vld3.fca.2.extract, 0, 2
-  ret %struct.int32x2x3_t %.fca.0.2.insert
-}
-
-define %struct.int64x1x3_t @test_vld3_s64(i64* readonly %a) {
-; CHECK-LABEL: test_vld3_s64
-; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
-  %1 = bitcast i64* %a to i8*
-  %vld3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64(i8* %1, i32 8)
-  %vld3.fca.0.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3, 0
-  %vld3.fca.1.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3, 1
-  %vld3.fca.2.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3, 2
-  %.fca.0.0.insert = insertvalue %struct.int64x1x3_t undef, <1 x i64> %vld3.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int64x1x3_t %.fca.0.0.insert, <1 x i64> %vld3.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int64x1x3_t %.fca.0.1.insert, <1 x i64> %vld3.fca.2.extract, 0, 2
-  ret %struct.int64x1x3_t %.fca.0.2.insert
-}
-
-define %struct.float32x2x3_t @test_vld3_f32(float* readonly %a) {
-; CHECK-LABEL: test_vld3_f32
-; CHECK: ld3 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
-  %1 = bitcast float* %a to i8*
-  %vld3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3.v2f32(i8* %1, i32 4)
-  %vld3.fca.0.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3, 0
-  %vld3.fca.1.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3, 1
-  %vld3.fca.2.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3, 2
-  %.fca.0.0.insert = insertvalue %struct.float32x2x3_t undef, <2 x float> %vld3.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float32x2x3_t %.fca.0.0.insert, <2 x float> %vld3.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.float32x2x3_t %.fca.0.1.insert, <2 x float> %vld3.fca.2.extract, 0, 2
-  ret %struct.float32x2x3_t %.fca.0.2.insert
-}
-
-define %struct.float64x1x3_t @test_vld3_f64(double* readonly %a) {
-; CHECK-LABEL: test_vld3_f64
-; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
-  %1 = bitcast double* %a to i8*
-  %vld3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld3.v1f64(i8* %1, i32 8)
-  %vld3.fca.0.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld3, 0
-  %vld3.fca.1.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld3, 1
-  %vld3.fca.2.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld3, 2
-  %.fca.0.0.insert = insertvalue %struct.float64x1x3_t undef, <1 x double> %vld3.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float64x1x3_t %.fca.0.0.insert, <1 x double> %vld3.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.float64x1x3_t %.fca.0.1.insert, <1 x double> %vld3.fca.2.extract, 0, 2
-  ret %struct.float64x1x3_t %.fca.0.2.insert
-}
-
-define %struct.int8x16x4_t @test_vld4q_s8(i8* readonly %a) {
-; CHECK-LABEL: test_vld4q_s8
-; CHECK: ld4 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}]
-  %vld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8* %a, i32 1)
-  %vld4.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 0
-  %vld4.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 1
-  %vld4.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 2
-  %vld4.fca.3.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 3
-  %.fca.0.0.insert = insertvalue %struct.int8x16x4_t undef, <16 x i8> %vld4.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int8x16x4_t %.fca.0.0.insert, <16 x i8> %vld4.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int8x16x4_t %.fca.0.1.insert, <16 x i8> %vld4.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.int8x16x4_t %.fca.0.2.insert, <16 x i8> %vld4.fca.3.extract, 0, 3
-  ret %struct.int8x16x4_t %.fca.0.3.insert
-}
-
-define %struct.int16x8x4_t @test_vld4q_s16(i16* readonly %a) {
-; CHECK-LABEL: test_vld4q_s16
-; CHECK: ld4 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [x{{[0-9]+|sp}}]
-  %1 = bitcast i16* %a to i8*
-  %vld4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16(i8* %1, i32 2)
-  %vld4.fca.0.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4, 0
-  %vld4.fca.1.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4, 1
-  %vld4.fca.2.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4, 2
-  %vld4.fca.3.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4, 3
-  %.fca.0.0.insert = insertvalue %struct.int16x8x4_t undef, <8 x i16> %vld4.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int16x8x4_t %.fca.0.0.insert, <8 x i16> %vld4.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int16x8x4_t %.fca.0.1.insert, <8 x i16> %vld4.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.int16x8x4_t %.fca.0.2.insert, <8 x i16> %vld4.fca.3.extract, 0, 3
-  ret %struct.int16x8x4_t %.fca.0.3.insert
-}
-
-define %struct.int32x4x4_t @test_vld4q_s32(i32* readonly %a) {
-; CHECK-LABEL: test_vld4q_s32
-; CHECK: ld4 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
-  %1 = bitcast i32* %a to i8*
-  %vld4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4.v4i32(i8* %1, i32 4)
-  %vld4.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4, 0
-  %vld4.fca.1.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4, 1
-  %vld4.fca.2.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4, 2
-  %vld4.fca.3.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4, 3
-  %.fca.0.0.insert = insertvalue %struct.int32x4x4_t undef, <4 x i32> %vld4.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int32x4x4_t %.fca.0.0.insert, <4 x i32> %vld4.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int32x4x4_t %.fca.0.1.insert, <4 x i32> %vld4.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.int32x4x4_t %.fca.0.2.insert, <4 x i32> %vld4.fca.3.extract, 0, 3
-  ret %struct.int32x4x4_t %.fca.0.3.insert
-}
-
-define %struct.int64x2x4_t @test_vld4q_s64(i64* readonly %a) {
-; CHECK-LABEL: test_vld4q_s64
-; CHECK: ld4 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}]
-  %1 = bitcast i64* %a to i8*
-  %vld4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld4.v2i64(i8* %1, i32 8)
-  %vld4.fca.0.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld4, 0
-  %vld4.fca.1.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld4, 1
-  %vld4.fca.2.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld4, 2
-  %vld4.fca.3.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld4, 3
-  %.fca.0.0.insert = insertvalue %struct.int64x2x4_t undef, <2 x i64> %vld4.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int64x2x4_t %.fca.0.0.insert, <2 x i64> %vld4.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int64x2x4_t %.fca.0.1.insert, <2 x i64> %vld4.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.int64x2x4_t %.fca.0.2.insert, <2 x i64> %vld4.fca.3.extract, 0, 3
-  ret %struct.int64x2x4_t %.fca.0.3.insert
-}
-
-define %struct.float32x4x4_t @test_vld4q_f32(float* readonly %a) {
-; CHECK-LABEL: test_vld4q_f32
-; CHECK: ld4 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
-  %1 = bitcast float* %a to i8*
-  %vld4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4.v4f32(i8* %1, i32 4)
-  %vld4.fca.0.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld4, 0
-  %vld4.fca.1.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld4, 1
-  %vld4.fca.2.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld4, 2
-  %vld4.fca.3.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld4, 3
-  %.fca.0.0.insert = insertvalue %struct.float32x4x4_t undef, <4 x float> %vld4.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float32x4x4_t %.fca.0.0.insert, <4 x float> %vld4.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.float32x4x4_t %.fca.0.1.insert, <4 x float> %vld4.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.float32x4x4_t %.fca.0.2.insert, <4 x float> %vld4.fca.3.extract, 0, 3
-  ret %struct.float32x4x4_t %.fca.0.3.insert
-}
-
-define %struct.float64x2x4_t @test_vld4q_f64(double* readonly %a) {
-; CHECK-LABEL: test_vld4q_f64
-; CHECK: ld4 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}]
-  %1 = bitcast double* %a to i8*
-  %vld4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4.v2f64(i8* %1, i32 8)
-  %vld4.fca.0.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld4, 0
-  %vld4.fca.1.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld4, 1
-  %vld4.fca.2.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld4, 2
-  %vld4.fca.3.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld4, 3
-  %.fca.0.0.insert = insertvalue %struct.float64x2x4_t undef, <2 x double> %vld4.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float64x2x4_t %.fca.0.0.insert, <2 x double> %vld4.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.float64x2x4_t %.fca.0.1.insert, <2 x double> %vld4.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.float64x2x4_t %.fca.0.2.insert, <2 x double> %vld4.fca.3.extract, 0, 3
-  ret %struct.float64x2x4_t %.fca.0.3.insert
-}
-
-define %struct.int8x8x4_t @test_vld4_s8(i8* readonly %a) {
-; CHECK-LABEL: test_vld4_s8
-; CHECK: ld4 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}]
-  %vld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8(i8* %a, i32 1)
-  %vld4.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 0
-  %vld4.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 1
-  %vld4.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 2
-  %vld4.fca.3.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 3
-  %.fca.0.0.insert = insertvalue %struct.int8x8x4_t undef, <8 x i8> %vld4.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int8x8x4_t %.fca.0.0.insert, <8 x i8> %vld4.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int8x8x4_t %.fca.0.1.insert, <8 x i8> %vld4.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.int8x8x4_t %.fca.0.2.insert, <8 x i8> %vld4.fca.3.extract, 0, 3
-  ret %struct.int8x8x4_t %.fca.0.3.insert
-}
-
-define %struct.int16x4x4_t @test_vld4_s16(i16* readonly %a) {
-; CHECK-LABEL: test_vld4_s16
-; CHECK: ld4 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}]
-  %1 = bitcast i16* %a to i8*
-  %vld4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16(i8* %1, i32 2)
-  %vld4.fca.0.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4, 0
-  %vld4.fca.1.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4, 1
-  %vld4.fca.2.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4, 2
-  %vld4.fca.3.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4, 3
-  %.fca.0.0.insert = insertvalue %struct.int16x4x4_t undef, <4 x i16> %vld4.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int16x4x4_t %.fca.0.0.insert, <4 x i16> %vld4.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int16x4x4_t %.fca.0.1.insert, <4 x i16> %vld4.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.int16x4x4_t %.fca.0.2.insert, <4 x i16> %vld4.fca.3.extract, 0, 3
-  ret %struct.int16x4x4_t %.fca.0.3.insert
-}
-
-define %struct.int32x2x4_t @test_vld4_s32(i32* readonly %a) {
-; CHECK-LABEL: test_vld4_s32
-; CHECK: ld4 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
-  %1 = bitcast i32* %a to i8*
-  %vld4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4.v2i32(i8* %1, i32 4)
-  %vld4.fca.0.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4, 0
-  %vld4.fca.1.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4, 1
-  %vld4.fca.2.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4, 2
-  %vld4.fca.3.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4, 3
-  %.fca.0.0.insert = insertvalue %struct.int32x2x4_t undef, <2 x i32> %vld4.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int32x2x4_t %.fca.0.0.insert, <2 x i32> %vld4.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int32x2x4_t %.fca.0.1.insert, <2 x i32> %vld4.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.int32x2x4_t %.fca.0.2.insert, <2 x i32> %vld4.fca.3.extract, 0, 3
-  ret %struct.int32x2x4_t %.fca.0.3.insert
-}
-
-define %struct.int64x1x4_t @test_vld4_s64(i64* readonly %a) {
-; CHECK-LABEL: test_vld4_s64
-; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
-  %1 = bitcast i64* %a to i8*
-  %vld4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64(i8* %1, i32 8)
-  %vld4.fca.0.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld4, 0
-  %vld4.fca.1.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld4, 1
-  %vld4.fca.2.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld4, 2
-  %vld4.fca.3.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld4, 3
-  %.fca.0.0.insert = insertvalue %struct.int64x1x4_t undef, <1 x i64> %vld4.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int64x1x4_t %.fca.0.0.insert, <1 x i64> %vld4.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int64x1x4_t %.fca.0.1.insert, <1 x i64> %vld4.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.int64x1x4_t %.fca.0.2.insert, <1 x i64> %vld4.fca.3.extract, 0, 3
-  ret %struct.int64x1x4_t %.fca.0.3.insert
-}
-
-define %struct.float32x2x4_t @test_vld4_f32(float* readonly %a) {
-; CHECK-LABEL: test_vld4_f32
-; CHECK: ld4 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
-  %1 = bitcast float* %a to i8*
-  %vld4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4.v2f32(i8* %1, i32 4)
-  %vld4.fca.0.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld4, 0
-  %vld4.fca.1.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld4, 1
-  %vld4.fca.2.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld4, 2
-  %vld4.fca.3.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld4, 3
-  %.fca.0.0.insert = insertvalue %struct.float32x2x4_t undef, <2 x float> %vld4.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float32x2x4_t %.fca.0.0.insert, <2 x float> %vld4.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.float32x2x4_t %.fca.0.1.insert, <2 x float> %vld4.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.float32x2x4_t %.fca.0.2.insert, <2 x float> %vld4.fca.3.extract, 0, 3
-  ret %struct.float32x2x4_t %.fca.0.3.insert
-}
-
-define %struct.float64x1x4_t @test_vld4_f64(double* readonly %a) {
-; CHECK-LABEL: test_vld4_f64
-; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
-  %1 = bitcast double* %a to i8*
-  %vld4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld4.v1f64(i8* %1, i32 8)
-  %vld4.fca.0.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld4, 0
-  %vld4.fca.1.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld4, 1
-  %vld4.fca.2.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld4, 2
-  %vld4.fca.3.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld4, 3
-  %.fca.0.0.insert = insertvalue %struct.float64x1x4_t undef, <1 x double> %vld4.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float64x1x4_t %.fca.0.0.insert, <1 x double> %vld4.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.float64x1x4_t %.fca.0.1.insert, <1 x double> %vld4.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.float64x1x4_t %.fca.0.2.insert, <1 x double> %vld4.fca.3.extract, 0, 3
-  ret %struct.float64x1x4_t %.fca.0.3.insert
-}
-
-declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8*, i32)
-declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32)
-declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*, i32)
-declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*, i32)
-declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32)
-declare <2 x double> @llvm.arm.neon.vld1.v2f64(i8*, i32)
-declare <8 x i8> @llvm.arm.neon.vld1.v8i8(i8*, i32)
-declare <4 x i16> @llvm.arm.neon.vld1.v4i16(i8*, i32)
-declare <2 x i32> @llvm.arm.neon.vld1.v2i32(i8*, i32)
-declare <1 x i64> @llvm.arm.neon.vld1.v1i64(i8*, i32)
-declare <2 x float> @llvm.arm.neon.vld1.v2f32(i8*, i32)
-declare <1 x double> @llvm.arm.neon.vld1.v1f64(i8*, i32)
-declare { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8(i8*, i32)
-declare { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2.v8i16(i8*, i32)
-declare { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32(i8*, i32)
-declare { <2 x i64>, <2 x i64> } @llvm.arm.neon.vld2.v2i64(i8*, i32)
-declare { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8*, i32)
-declare { <2 x double>, <2 x double> } @llvm.arm.neon.vld2.v2f64(i8*, i32)
-declare { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8(i8*, i32)
-declare { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2.v4i16(i8*, i32)
-declare { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32(i8*, i32)
-declare { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64(i8*, i32)
-declare { <2 x float>, <2 x float> } @llvm.arm.neon.vld2.v2f32(i8*, i32)
-declare { <1 x double>, <1 x double> } @llvm.arm.neon.vld2.v1f64(i8*, i32)
-declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8*, i32)
-declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3.v8i16(i8*, i32)
-declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32(i8*, i32)
-declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld3.v2i64(i8*, i32)
-declare { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3.v4f32(i8*, i32)
-declare { <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld3.v2f64(i8*, i32)
-declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8(i8*, i32)
-declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16(i8*, i32)
-declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3.v2i32(i8*, i32)
-declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64(i8*, i32)
-declare { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3.v2f32(i8*, i32)
-declare { <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld3.v1f64(i8*, i32)
-declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8*, i32)
-declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16(i8*, i32)
-declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4.v4i32(i8*, i32)
-declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld4.v2i64(i8*, i32)
-declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4.v4f32(i8*, i32)
-declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4.v2f64(i8*, i32)
-declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8(i8*, i32)
-declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16(i8*, i32)
-declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4.v2i32(i8*, i32)
-declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64(i8*, i32)
-declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4.v2f32(i8*, i32)
-declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld4.v1f64(i8*, i32)
-
-define void @test_vst1q_s8(i8* %a, <16 x i8> %b) {
-; CHECK-LABEL: test_vst1q_s8
-; CHECK: st1 {v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
-  tail call void @llvm.arm.neon.vst1.v16i8(i8* %a, <16 x i8> %b, i32 1)
-  ret void
-}
-
-define void @test_vst1q_s16(i16* %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vst1q_s16
-; CHECK: st1 {v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
-  %1 = bitcast i16* %a to i8*
-  tail call void @llvm.arm.neon.vst1.v8i16(i8* %1, <8 x i16> %b, i32 2)
-  ret void
-}
-
-define void @test_vst1q_s32(i32* %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vst1q_s32
-; CHECK: st1 {v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
-  %1 = bitcast i32* %a to i8*
-  tail call void @llvm.arm.neon.vst1.v4i32(i8* %1, <4 x i32> %b, i32 4)
-  ret void
-}
-
-define void @test_vst1q_s64(i64* %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vst1q_s64
-; CHECK: st1 {v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
-  %1 = bitcast i64* %a to i8*
-  tail call void @llvm.arm.neon.vst1.v2i64(i8* %1, <2 x i64> %b, i32 8)
-  ret void
-}
-
-define void @test_vst1q_f32(float* %a, <4 x float> %b) {
-; CHECK-LABEL: test_vst1q_f32
-; CHECK: st1 {v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
-  %1 = bitcast float* %a to i8*
-  tail call void @llvm.arm.neon.vst1.v4f32(i8* %1, <4 x float> %b, i32 4)
-  ret void
-}
-
-define void @test_vst1q_f64(double* %a, <2 x double> %b) {
-; CHECK-LABEL: test_vst1q_f64
-; CHECK: st1 {v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
-  %1 = bitcast double* %a to i8*
-  tail call void @llvm.arm.neon.vst1.v2f64(i8* %1, <2 x double> %b, i32 8)
-  ret void
-}
-
-define void @test_vst1_s8(i8* %a, <8 x i8> %b) {
-; CHECK-LABEL: test_vst1_s8
-; CHECK: st1 {v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
-  tail call void @llvm.arm.neon.vst1.v8i8(i8* %a, <8 x i8> %b, i32 1)
-  ret void
-}
-
-define void @test_vst1_s16(i16* %a, <4 x i16> %b) {
-; CHECK-LABEL: test_vst1_s16
-; CHECK: st1 {v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
-  %1 = bitcast i16* %a to i8*
-  tail call void @llvm.arm.neon.vst1.v4i16(i8* %1, <4 x i16> %b, i32 2)
-  ret void
-}
-
-define void @test_vst1_s32(i32* %a, <2 x i32> %b) {
-; CHECK-LABEL: test_vst1_s32
-; CHECK: st1 {v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
-  %1 = bitcast i32* %a to i8*
-  tail call void @llvm.arm.neon.vst1.v2i32(i8* %1, <2 x i32> %b, i32 4)
-  ret void
-}
-
-define void @test_vst1_s64(i64* %a, <1 x i64> %b) {
-; CHECK-LABEL: test_vst1_s64
-; CHECK: st1 {v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
-  %1 = bitcast i64* %a to i8*
-  tail call void @llvm.arm.neon.vst1.v1i64(i8* %1, <1 x i64> %b, i32 8)
-  ret void
-}
-
-define void @test_vst1_f32(float* %a, <2 x float> %b) {
-; CHECK-LABEL: test_vst1_f32
-; CHECK: st1 {v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
-  %1 = bitcast float* %a to i8*
-  tail call void @llvm.arm.neon.vst1.v2f32(i8* %1, <2 x float> %b, i32 4)
-  ret void
-}
-
-define void @test_vst1_f64(double* %a, <1 x double> %b) {
-; CHECK-LABEL: test_vst1_f64
-; CHECK: st1 {v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
-  %1 = bitcast double* %a to i8*
-  tail call void @llvm.arm.neon.vst1.v1f64(i8* %1, <1 x double> %b, i32 8)
-  ret void
-}
-
-define void @test_vst2q_s8(i8* %a, [2 x <16 x i8>] %b.coerce) {
-; CHECK-LABEL: test_vst2q_s8
-; CHECK: st2 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [2 x <16 x i8>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <16 x i8>] %b.coerce, 1
-  tail call void @llvm.arm.neon.vst2.v16i8(i8* %a, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, i32 1)
-  ret void
-}
-
-define void @test_vst2q_s16(i16* %a, [2 x <8 x i16>] %b.coerce) {
-; CHECK-LABEL: test_vst2q_s16
-; CHECK: st2 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [2 x <8 x i16>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <8 x i16>] %b.coerce, 1
-  %1 = bitcast i16* %a to i8*
-  tail call void @llvm.arm.neon.vst2.v8i16(i8* %1, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, i32 2)
-  ret void
-}
-
-define void @test_vst2q_s32(i32* %a, [2 x <4 x i32>] %b.coerce) {
-; CHECK-LABEL: test_vst2q_s32
-; CHECK: st2 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [2 x <4 x i32>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <4 x i32>] %b.coerce, 1
-  %1 = bitcast i32* %a to i8*
-  tail call void @llvm.arm.neon.vst2.v4i32(i8* %1, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, i32 4)
-  ret void
-}
-
-define void @test_vst2q_s64(i64* %a, [2 x <2 x i64>] %b.coerce) {
-; CHECK-LABEL: test_vst2q_s64
-; CHECK: st2 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [2 x <2 x i64>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <2 x i64>] %b.coerce, 1
-  %1 = bitcast i64* %a to i8*
-  tail call void @llvm.arm.neon.vst2.v2i64(i8* %1, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, i32 8)
-  ret void
-}
-
-define void @test_vst2q_f32(float* %a, [2 x <4 x float>] %b.coerce) {
-; CHECK-LABEL: test_vst2q_f32
-; CHECK: st2 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [2 x <4 x float>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <4 x float>] %b.coerce, 1
-  %1 = bitcast float* %a to i8*
-  tail call void @llvm.arm.neon.vst2.v4f32(i8* %1, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, i32 4)
-  ret void
-}
-
-define void @test_vst2q_f64(double* %a, [2 x <2 x double>] %b.coerce) {
-; CHECK-LABEL: test_vst2q_f64
-; CHECK: st2 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [2 x <2 x double>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <2 x double>] %b.coerce, 1
-  %1 = bitcast double* %a to i8*
-  tail call void @llvm.arm.neon.vst2.v2f64(i8* %1, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, i32 8)
-  ret void
-}
-
-define void @test_vst2_s8(i8* %a, [2 x <8 x i8>] %b.coerce) {
-; CHECK-LABEL: test_vst2_s8
-; CHECK: st2 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [2 x <8 x i8>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <8 x i8>] %b.coerce, 1
-  tail call void @llvm.arm.neon.vst2.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, i32 1)
-  ret void
-}
-
-define void @test_vst2_s16(i16* %a, [2 x <4 x i16>] %b.coerce) {
-; CHECK-LABEL: test_vst2_s16
-; CHECK: st2 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [2 x <4 x i16>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <4 x i16>] %b.coerce, 1
-  %1 = bitcast i16* %a to i8*
-  tail call void @llvm.arm.neon.vst2.v4i16(i8* %1, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, i32 2)
-  ret void
-}
-
-define void @test_vst2_s32(i32* %a, [2 x <2 x i32>] %b.coerce) {
-; CHECK-LABEL: test_vst2_s32
-; CHECK: st2 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [2 x <2 x i32>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <2 x i32>] %b.coerce, 1
-  %1 = bitcast i32* %a to i8*
-  tail call void @llvm.arm.neon.vst2.v2i32(i8* %1, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, i32 4)
-  ret void
-}
-
-define void @test_vst2_s64(i64* %a, [2 x <1 x i64>] %b.coerce) {
-; CHECK-LABEL: test_vst2_s64
-; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [2 x <1 x i64>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <1 x i64>] %b.coerce, 1
-  %1 = bitcast i64* %a to i8*
-  tail call void @llvm.arm.neon.vst2.v1i64(i8* %1, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, i32 8)
-  ret void
-}
-
-define void @test_vst2_f32(float* %a, [2 x <2 x float>] %b.coerce) {
-; CHECK-LABEL: test_vst2_f32
-; CHECK: st2 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [2 x <2 x float>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <2 x float>] %b.coerce, 1
-  %1 = bitcast float* %a to i8*
-  tail call void @llvm.arm.neon.vst2.v2f32(i8* %1, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, i32 4)
-  ret void
-}
-
-define void @test_vst2_f64(double* %a, [2 x <1 x double>] %b.coerce) {
-; CHECK-LABEL: test_vst2_f64
-; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [2 x <1 x double>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <1 x double>] %b.coerce, 1
-  %1 = bitcast double* %a to i8*
-  tail call void @llvm.arm.neon.vst2.v1f64(i8* %1, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, i32 8)
-  ret void
-}
-
-define void @test_vst3q_s8(i8* %a, [3 x <16 x i8>] %b.coerce) {
-; CHECK-LABEL: test_vst3q_s8
-; CHECK: st3 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [3 x <16 x i8>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <16 x i8>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <16 x i8>] %b.coerce, 2
-  tail call void @llvm.arm.neon.vst3.v16i8(i8* %a, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, <16 x i8> %b.coerce.fca.2.extract, i32 1)
-  ret void
-}
-
-define void @test_vst3q_s16(i16* %a, [3 x <8 x i16>] %b.coerce) {
-; CHECK-LABEL: test_vst3q_s16
-; CHECK: st3 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [3 x <8 x i16>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <8 x i16>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <8 x i16>] %b.coerce, 2
-  %1 = bitcast i16* %a to i8*
-  tail call void @llvm.arm.neon.vst3.v8i16(i8* %1, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, <8 x i16> %b.coerce.fca.2.extract, i32 2)
-  ret void
-}
-
-define void @test_vst3q_s32(i32* %a, [3 x <4 x i32>] %b.coerce) {
-; CHECK-LABEL: test_vst3q_s32
-; CHECK: st3 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [3 x <4 x i32>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <4 x i32>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <4 x i32>] %b.coerce, 2
-  %1 = bitcast i32* %a to i8*
-  tail call void @llvm.arm.neon.vst3.v4i32(i8* %1, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, <4 x i32> %b.coerce.fca.2.extract, i32 4)
-  ret void
-}
-
-define void @test_vst3q_s64(i64* %a, [3 x <2 x i64>] %b.coerce) {
-; CHECK-LABEL: test_vst3q_s64
-; CHECK: st3 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [3 x <2 x i64>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <2 x i64>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <2 x i64>] %b.coerce, 2
-  %1 = bitcast i64* %a to i8*
-  tail call void @llvm.arm.neon.vst3.v2i64(i8* %1, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, <2 x i64> %b.coerce.fca.2.extract, i32 8)
-  ret void
-}
-
-define void @test_vst3q_f32(float* %a, [3 x <4 x float>] %b.coerce) {
-; CHECK-LABEL: test_vst3q_f32
-; CHECK: st3 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [3 x <4 x float>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <4 x float>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <4 x float>] %b.coerce, 2
-  %1 = bitcast float* %a to i8*
-  tail call void @llvm.arm.neon.vst3.v4f32(i8* %1, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, <4 x float> %b.coerce.fca.2.extract, i32 4)
-  ret void
-}
-
-define void @test_vst3q_f64(double* %a, [3 x <2 x double>] %b.coerce) {
-; CHECK-LABEL: test_vst3q_f64
-; CHECK: st3 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [3 x <2 x double>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <2 x double>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <2 x double>] %b.coerce, 2
-  %1 = bitcast double* %a to i8*
-  tail call void @llvm.arm.neon.vst3.v2f64(i8* %1, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, <2 x double> %b.coerce.fca.2.extract, i32 8)
-  ret void
-}
-
-define void @test_vst3_s8(i8* %a, [3 x <8 x i8>] %b.coerce) {
-; CHECK-LABEL: test_vst3_s8
-; CHECK: st3 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [3 x <8 x i8>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <8 x i8>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <8 x i8>] %b.coerce, 2
-  tail call void @llvm.arm.neon.vst3.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, <8 x i8> %b.coerce.fca.2.extract, i32 1)
-  ret void
-}
-
-define void @test_vst3_s16(i16* %a, [3 x <4 x i16>] %b.coerce) {
-; CHECK-LABEL: test_vst3_s16
-; CHECK: st3 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [3 x <4 x i16>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <4 x i16>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <4 x i16>] %b.coerce, 2
-  %1 = bitcast i16* %a to i8*
-  tail call void @llvm.arm.neon.vst3.v4i16(i8* %1, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, <4 x i16> %b.coerce.fca.2.extract, i32 2)
-  ret void
-}
-
-define void @test_vst3_s32(i32* %a, [3 x <2 x i32>] %b.coerce) {
-; CHECK-LABEL: test_vst3_s32
-; CHECK: st3 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [3 x <2 x i32>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <2 x i32>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <2 x i32>] %b.coerce, 2
-  %1 = bitcast i32* %a to i8*
-  tail call void @llvm.arm.neon.vst3.v2i32(i8* %1, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, <2 x i32> %b.coerce.fca.2.extract, i32 4)
-  ret void
-}
-
-define void @test_vst3_s64(i64* %a, [3 x <1 x i64>] %b.coerce) {
-; CHECK-LABEL: test_vst3_s64
-; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [3 x <1 x i64>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <1 x i64>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <1 x i64>] %b.coerce, 2
-  %1 = bitcast i64* %a to i8*
-  tail call void @llvm.arm.neon.vst3.v1i64(i8* %1, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, <1 x i64> %b.coerce.fca.2.extract, i32 8)
-  ret void
-}
-
-define void @test_vst3_f32(float* %a, [3 x <2 x float>] %b.coerce) {
-; CHECK-LABEL: test_vst3_f32
-; CHECK: st3 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [3 x <2 x float>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <2 x float>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <2 x float>] %b.coerce, 2
-  %1 = bitcast float* %a to i8*
-  tail call void @llvm.arm.neon.vst3.v2f32(i8* %1, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, <2 x float> %b.coerce.fca.2.extract, i32 4)
-  ret void
-}
-
-define void @test_vst3_f64(double* %a, [3 x <1 x double>] %b.coerce) {
-; CHECK-LABEL: test_vst3_f64
-; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [3 x <1 x double>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <1 x double>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <1 x double>] %b.coerce, 2
-  %1 = bitcast double* %a to i8*
-  tail call void @llvm.arm.neon.vst3.v1f64(i8* %1, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, <1 x double> %b.coerce.fca.2.extract, i32 8)
-  ret void
-}
-
-define void @test_vst4q_s8(i8* %a, [4 x <16 x i8>] %b.coerce) {
-; CHECK-LABEL: test_vst4q_s8
-; CHECK: st4 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [4 x <16 x i8>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <16 x i8>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <16 x i8>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <16 x i8>] %b.coerce, 3
-  tail call void @llvm.arm.neon.vst4.v16i8(i8* %a, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, <16 x i8> %b.coerce.fca.2.extract, <16 x i8> %b.coerce.fca.3.extract, i32 1)
-  ret void
-}
-
-define void @test_vst4q_s16(i16* %a, [4 x <8 x i16>] %b.coerce) {
-; CHECK-LABEL: test_vst4q_s16
-; CHECK: st4 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [4 x <8 x i16>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <8 x i16>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <8 x i16>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <8 x i16>] %b.coerce, 3
-  %1 = bitcast i16* %a to i8*
-  tail call void @llvm.arm.neon.vst4.v8i16(i8* %1, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, <8 x i16> %b.coerce.fca.2.extract, <8 x i16> %b.coerce.fca.3.extract, i32 2)
-  ret void
-}
-
-define void @test_vst4q_s32(i32* %a, [4 x <4 x i32>] %b.coerce) {
-; CHECK-LABEL: test_vst4q_s32
-; CHECK: st4 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [4 x <4 x i32>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <4 x i32>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <4 x i32>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <4 x i32>] %b.coerce, 3
-  %1 = bitcast i32* %a to i8*
-  tail call void @llvm.arm.neon.vst4.v4i32(i8* %1, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, <4 x i32> %b.coerce.fca.2.extract, <4 x i32> %b.coerce.fca.3.extract, i32 4)
-  ret void
-}
-
-define void @test_vst4q_s64(i64* %a, [4 x <2 x i64>] %b.coerce) {
-; CHECK-LABEL: test_vst4q_s64
-; CHECK: st4 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [4 x <2 x i64>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <2 x i64>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <2 x i64>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <2 x i64>] %b.coerce, 3
-  %1 = bitcast i64* %a to i8*
-  tail call void @llvm.arm.neon.vst4.v2i64(i8* %1, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, <2 x i64> %b.coerce.fca.2.extract, <2 x i64> %b.coerce.fca.3.extract, i32 8)
-  ret void
-}
-
-define void @test_vst4q_f32(float* %a, [4 x <4 x float>] %b.coerce) {
-; CHECK-LABEL: test_vst4q_f32
-; CHECK: st4 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [4 x <4 x float>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <4 x float>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <4 x float>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <4 x float>] %b.coerce, 3
-  %1 = bitcast float* %a to i8*
-  tail call void @llvm.arm.neon.vst4.v4f32(i8* %1, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, <4 x float> %b.coerce.fca.2.extract, <4 x float> %b.coerce.fca.3.extract, i32 4)
-  ret void
-}
-
-define void @test_vst4q_f64(double* %a, [4 x <2 x double>] %b.coerce) {
-; CHECK-LABEL: test_vst4q_f64
-; CHECK: st4 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [4 x <2 x double>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <2 x double>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <2 x double>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <2 x double>] %b.coerce, 3
-  %1 = bitcast double* %a to i8*
-  tail call void @llvm.arm.neon.vst4.v2f64(i8* %1, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, <2 x double> %b.coerce.fca.2.extract, <2 x double> %b.coerce.fca.3.extract, i32 8)
-  ret void
-}
-
-define void @test_vst4_s8(i8* %a, [4 x <8 x i8>] %b.coerce) {
-; CHECK-LABEL: test_vst4_s8
-; CHECK: st4 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [4 x <8 x i8>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <8 x i8>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <8 x i8>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <8 x i8>] %b.coerce, 3
-  tail call void @llvm.arm.neon.vst4.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, <8 x i8> %b.coerce.fca.2.extract, <8 x i8> %b.coerce.fca.3.extract, i32 1)
-  ret void
-}
-
-define void @test_vst4_s16(i16* %a, [4 x <4 x i16>] %b.coerce) {
-; CHECK-LABEL: test_vst4_s16
-; CHECK: st4 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [4 x <4 x i16>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <4 x i16>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <4 x i16>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <4 x i16>] %b.coerce, 3
-  %1 = bitcast i16* %a to i8*
-  tail call void @llvm.arm.neon.vst4.v4i16(i8* %1, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, <4 x i16> %b.coerce.fca.2.extract, <4 x i16> %b.coerce.fca.3.extract, i32 2)
-  ret void
-}
-
-define void @test_vst4_s32(i32* %a, [4 x <2 x i32>] %b.coerce) {
-; CHECK-LABEL: test_vst4_s32
-; CHECK: st4 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [4 x <2 x i32>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <2 x i32>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <2 x i32>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <2 x i32>] %b.coerce, 3
-  %1 = bitcast i32* %a to i8*
-  tail call void @llvm.arm.neon.vst4.v2i32(i8* %1, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, <2 x i32> %b.coerce.fca.2.extract, <2 x i32> %b.coerce.fca.3.extract, i32 4)
-  ret void
-}
-
-define void @test_vst4_s64(i64* %a, [4 x <1 x i64>] %b.coerce) {
-; CHECK-LABEL: test_vst4_s64
-; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [4 x <1 x i64>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <1 x i64>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <1 x i64>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <1 x i64>] %b.coerce, 3
-  %1 = bitcast i64* %a to i8*
-  tail call void @llvm.arm.neon.vst4.v1i64(i8* %1, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, <1 x i64> %b.coerce.fca.2.extract, <1 x i64> %b.coerce.fca.3.extract, i32 8)
-  ret void
-}
-
-define void @test_vst4_f32(float* %a, [4 x <2 x float>] %b.coerce) {
-; CHECK-LABEL: test_vst4_f32
-; CHECK: st4 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [4 x <2 x float>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <2 x float>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <2 x float>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <2 x float>] %b.coerce, 3
-  %1 = bitcast float* %a to i8*
-  tail call void @llvm.arm.neon.vst4.v2f32(i8* %1, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, <2 x float> %b.coerce.fca.2.extract, <2 x float> %b.coerce.fca.3.extract, i32 4)
-  ret void
-}
-
-define void @test_vst4_f64(double* %a, [4 x <1 x double>] %b.coerce) {
-; CHECK-LABEL: test_vst4_f64
-; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
-  %b.coerce.fca.0.extract = extractvalue [4 x <1 x double>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <1 x double>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <1 x double>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <1 x double>] %b.coerce, 3
-  %1 = bitcast double* %a to i8*
-  tail call void @llvm.arm.neon.vst4.v1f64(i8* %1, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, <1 x double> %b.coerce.fca.2.extract, <1 x double> %b.coerce.fca.3.extract, i32 8)
-  ret void
-}
-
-declare void @llvm.arm.neon.vst1.v16i8(i8*, <16 x i8>, i32)
-declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32)
-declare void @llvm.arm.neon.vst1.v4i32(i8*, <4 x i32>, i32)
-declare void @llvm.arm.neon.vst1.v2i64(i8*, <2 x i64>, i32)
-declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32)
-declare void @llvm.arm.neon.vst1.v2f64(i8*, <2 x double>, i32)
-declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32)
-declare void @llvm.arm.neon.vst1.v4i16(i8*, <4 x i16>, i32)
-declare void @llvm.arm.neon.vst1.v2i32(i8*, <2 x i32>, i32)
-declare void @llvm.arm.neon.vst1.v1i64(i8*, <1 x i64>, i32)
-declare void @llvm.arm.neon.vst1.v2f32(i8*, <2 x float>, i32)
-declare void @llvm.arm.neon.vst1.v1f64(i8*, <1 x double>, i32)
-declare void @llvm.arm.neon.vst2.v16i8(i8*, <16 x i8>, <16 x i8>, i32)
-declare void @llvm.arm.neon.vst2.v8i16(i8*, <8 x i16>, <8 x i16>, i32)
-declare void @llvm.arm.neon.vst2.v4i32(i8*, <4 x i32>, <4 x i32>, i32)
-declare void @llvm.arm.neon.vst2.v2i64(i8*, <2 x i64>, <2 x i64>, i32)
-declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>, i32)
-declare void @llvm.arm.neon.vst2.v2f64(i8*, <2 x double>, <2 x double>, i32)
-declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32)
-declare void @llvm.arm.neon.vst2.v4i16(i8*, <4 x i16>, <4 x i16>, i32)
-declare void @llvm.arm.neon.vst2.v2i32(i8*, <2 x i32>, <2 x i32>, i32)
-declare void @llvm.arm.neon.vst2.v1i64(i8*, <1 x i64>, <1 x i64>, i32)
-declare void @llvm.arm.neon.vst2.v2f32(i8*, <2 x float>, <2 x float>, i32)
-declare void @llvm.arm.neon.vst2.v1f64(i8*, <1 x double>, <1 x double>, i32)
-declare void @llvm.arm.neon.vst3.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32)
-declare void @llvm.arm.neon.vst3.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32)
-declare void @llvm.arm.neon.vst3.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32)
-declare void @llvm.arm.neon.vst3.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, i32)
-declare void @llvm.arm.neon.vst3.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32)
-declare void @llvm.arm.neon.vst3.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, i32)
-declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32)
-declare void @llvm.arm.neon.vst3.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32)
-declare void @llvm.arm.neon.vst3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32)
-declare void @llvm.arm.neon.vst3.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32)
-declare void @llvm.arm.neon.vst3.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32)
-declare void @llvm.arm.neon.vst3.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, i32)
-declare void @llvm.arm.neon.vst4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32)
-declare void @llvm.arm.neon.vst4.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32)
-declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32)
-declare void @llvm.arm.neon.vst4.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i32)
-declare void @llvm.arm.neon.vst4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32)
-declare void @llvm.arm.neon.vst4.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, i32)
-declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32)
-declare void @llvm.arm.neon.vst4.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32)
-declare void @llvm.arm.neon.vst4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32)
-declare void @llvm.arm.neon.vst4.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32)
-declare void @llvm.arm.neon.vst4.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32)
-declare void @llvm.arm.neon.vst4.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, <1 x double>, i32)
-
-define %struct.int8x16x2_t @test_vld1q_s8_x2(i8* %a)  {
-; CHECK-LABEL: test_vld1q_s8_x2
-; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
-  %1 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x2.v16i8(i8* %a, i32 1)
-  %2 = extractvalue { <16 x i8>, <16 x i8> } %1, 0
-  %3 = extractvalue { <16 x i8>, <16 x i8> } %1, 1
-  %4 = insertvalue %struct.int8x16x2_t undef, <16 x i8> %2, 0, 0
-  %5 = insertvalue %struct.int8x16x2_t %4, <16 x i8> %3, 0, 1
-  ret %struct.int8x16x2_t %5
-}
-
-define %struct.int16x8x2_t @test_vld1q_s16_x2(i16* %a)  {
-; CHECK-LABEL: test_vld1q_s16_x2
-; CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
-  %1 = bitcast i16* %a to i8*
-  %2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x2.v8i16(i8* %1, i32 2)
-  %3 = extractvalue { <8 x i16>, <8 x i16> } %2, 0
-  %4 = extractvalue { <8 x i16>, <8 x i16> } %2, 1
-  %5 = insertvalue %struct.int16x8x2_t undef, <8 x i16> %3, 0, 0
-  %6 = insertvalue %struct.int16x8x2_t %5, <8 x i16> %4, 0, 1
-  ret %struct.int16x8x2_t %6
-}
-
-define %struct.int32x4x2_t @test_vld1q_s32_x2(i32* %a)  {
-; CHECK-LABEL: test_vld1q_s32_x2
-; CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
-  %1 = bitcast i32* %a to i8*
-  %2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x2.v4i32(i8* %1, i32 4)
-  %3 = extractvalue { <4 x i32>, <4 x i32> } %2, 0
-  %4 = extractvalue { <4 x i32>, <4 x i32> } %2, 1
-  %5 = insertvalue %struct.int32x4x2_t undef, <4 x i32> %3, 0, 0
-  %6 = insertvalue %struct.int32x4x2_t %5, <4 x i32> %4, 0, 1
-  ret %struct.int32x4x2_t %6
-}
-
-define %struct.int64x2x2_t @test_vld1q_s64_x2(i64* %a)  {
-; CHECK-LABEL: test_vld1q_s64_x2
-; CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
-  %1 = bitcast i64* %a to i8*
-  %2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x2.v2i64(i8* %1, i32 8)
-  %3 = extractvalue { <2 x i64>, <2 x i64> } %2, 0
-  %4 = extractvalue { <2 x i64>, <2 x i64> } %2, 1
-  %5 = insertvalue %struct.int64x2x2_t undef, <2 x i64> %3, 0, 0
-  %6 = insertvalue %struct.int64x2x2_t %5, <2 x i64> %4, 0, 1
-  ret %struct.int64x2x2_t %6
-}
-
-define %struct.float32x4x2_t @test_vld1q_f32_x2(float* %a)  {
-; CHECK-LABEL: test_vld1q_f32_x2
-; CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
-  %1 = bitcast float* %a to i8*
-  %2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x2.v4f32(i8* %1, i32 4)
-  %3 = extractvalue { <4 x float>, <4 x float> } %2, 0
-  %4 = extractvalue { <4 x float>, <4 x float> } %2, 1
-  %5 = insertvalue %struct.float32x4x2_t undef, <4 x float> %3, 0, 0
-  %6 = insertvalue %struct.float32x4x2_t %5, <4 x float> %4, 0, 1
-  ret %struct.float32x4x2_t %6
-}
-
-
-define %struct.float64x2x2_t @test_vld1q_f64_x2(double* %a)  {
-; CHECK-LABEL: test_vld1q_f64_x2
-; CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
-  %1 = bitcast double* %a to i8*
-  %2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x2.v2f64(i8* %1, i32 8)
-  %3 = extractvalue { <2 x double>, <2 x double> } %2, 0
-  %4 = extractvalue { <2 x double>, <2 x double> } %2, 1
-  %5 = insertvalue %struct.float64x2x2_t undef, <2 x double> %3, 0, 0
-  %6 = insertvalue %struct.float64x2x2_t %5, <2 x double> %4, 0, 1
-  ret %struct.float64x2x2_t %6
-}
-
-define %struct.int8x8x2_t @test_vld1_s8_x2(i8* %a)  {
-; CHECK-LABEL: test_vld1_s8_x2
-; CHECK: ld1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
-  %1 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x2.v8i8(i8* %a, i32 1)
-  %2 = extractvalue { <8 x i8>, <8 x i8> } %1, 0
-  %3 = extractvalue { <8 x i8>, <8 x i8> } %1, 1
-  %4 = insertvalue %struct.int8x8x2_t undef, <8 x i8> %2, 0, 0
-  %5 = insertvalue %struct.int8x8x2_t %4, <8 x i8> %3, 0, 1
-  ret %struct.int8x8x2_t %5
-}
-
-define %struct.int16x4x2_t @test_vld1_s16_x2(i16* %a)  {
-; CHECK-LABEL: test_vld1_s16_x2
-; CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
-  %1 = bitcast i16* %a to i8*
-  %2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x2.v4i16(i8* %1, i32 2)
-  %3 = extractvalue { <4 x i16>, <4 x i16> } %2, 0
-  %4 = extractvalue { <4 x i16>, <4 x i16> } %2, 1
-  %5 = insertvalue %struct.int16x4x2_t undef, <4 x i16> %3, 0, 0
-  %6 = insertvalue %struct.int16x4x2_t %5, <4 x i16> %4, 0, 1
-  ret %struct.int16x4x2_t %6
-}
-
-define %struct.int32x2x2_t @test_vld1_s32_x2(i32* %a)  {
-; CHECK-LABEL: test_vld1_s32_x2
-; CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
-  %1 = bitcast i32* %a to i8*
-  %2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x2.v2i32(i8* %1, i32 4)
-  %3 = extractvalue { <2 x i32>, <2 x i32> } %2, 0
-  %4 = extractvalue { <2 x i32>, <2 x i32> } %2, 1
-  %5 = insertvalue %struct.int32x2x2_t undef, <2 x i32> %3, 0, 0
-  %6 = insertvalue %struct.int32x2x2_t %5, <2 x i32> %4, 0, 1
-  ret %struct.int32x2x2_t %6
-}
-
-define %struct.int64x1x2_t @test_vld1_s64_x2(i64* %a)  {
-; CHECK-LABEL: test_vld1_s64_x2
-; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
-  %1 = bitcast i64* %a to i8*
-  %2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x2.v1i64(i8* %1, i32 8)
-  %3 = extractvalue { <1 x i64>, <1 x i64> } %2, 0
-  %4 = extractvalue { <1 x i64>, <1 x i64> } %2, 1
-  %5 = insertvalue %struct.int64x1x2_t undef, <1 x i64> %3, 0, 0
-  %6 = insertvalue %struct.int64x1x2_t %5, <1 x i64> %4, 0, 1
-  ret %struct.int64x1x2_t %6
-}
-
-define %struct.float32x2x2_t @test_vld1_f32_x2(float* %a)  {
-; CHECK-LABEL: test_vld1_f32_x2
-; CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
-  %1 = bitcast float* %a to i8*
-  %2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x2.v2f32(i8* %1, i32 4)
-  %3 = extractvalue { <2 x float>, <2 x float> } %2, 0
-  %4 = extractvalue { <2 x float>, <2 x float> } %2, 1
-  %5 = insertvalue %struct.float32x2x2_t undef, <2 x float> %3, 0, 0
-  %6 = insertvalue %struct.float32x2x2_t %5, <2 x float> %4, 0, 1
-  ret %struct.float32x2x2_t %6
-}
-
-define %struct.float64x1x2_t @test_vld1_f64_x2(double* %a)  {
-; CHECK-LABEL: test_vld1_f64_x2
-; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
-  %1 = bitcast double* %a to i8*
-  %2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x2.v1f64(i8* %1, i32 8)
-  %3 = extractvalue { <1 x double>, <1 x double> } %2, 0
-  %4 = extractvalue { <1 x double>, <1 x double> } %2, 1
-  %5 = insertvalue %struct.float64x1x2_t undef, <1 x double> %3, 0, 0
-  %6 = insertvalue %struct.float64x1x2_t %5, <1 x double> %4, 0, 1
-  ret %struct.float64x1x2_t %6
-}
-
-define %struct.int8x16x3_t @test_vld1q_s8_x3(i8* %a)  {
-; CHECK-LABEL: test_vld1q_s8_x3
-; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b},
-; [{{x[0-9]+|sp}}]
-  %1 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x3.v16i8(i8* %a, i32 1)
-  %2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %1, 0
-  %3 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %1, 1
-  %4 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %1, 2
-  %5 = insertvalue %struct.int8x16x3_t undef, <16 x i8> %2, 0, 0
-  %6 = insertvalue %struct.int8x16x3_t %5, <16 x i8> %3, 0, 1
-  %7 = insertvalue %struct.int8x16x3_t %6, <16 x i8> %4, 0, 2
-  ret %struct.int8x16x3_t %7
-}
-
-define %struct.int16x8x3_t @test_vld1q_s16_x3(i16* %a)  {
-; CHECK-LABEL: test_vld1q_s16_x3
-; CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h},
-; [{{x[0-9]+|sp}}]
-  %1 = bitcast i16* %a to i8*
-  %2 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x3.v8i16(i8* %1, i32 2)
-  %3 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %2, 0
-  %4 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %2, 1
-  %5 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %2, 2
-  %6 = insertvalue %struct.int16x8x3_t undef, <8 x i16> %3, 0, 0
-  %7 = insertvalue %struct.int16x8x3_t %6, <8 x i16> %4, 0, 1
-  %8 = insertvalue %struct.int16x8x3_t %7, <8 x i16> %5, 0, 2
-  ret %struct.int16x8x3_t %8
-}
-
-define %struct.int32x4x3_t @test_vld1q_s32_x3(i32* %a)  {
-; CHECK-LABEL: test_vld1q_s32_x3
-; CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s},
-; [{{x[0-9]+|sp}}]
-  %1 = bitcast i32* %a to i8*
-  %2 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x3.v4i32(i8* %1, i32 4)
-  %3 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %2, 0
-  %4 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %2, 1
-  %5 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %2, 2
-  %6 = insertvalue %struct.int32x4x3_t undef, <4 x i32> %3, 0, 0
-  %7 = insertvalue %struct.int32x4x3_t %6, <4 x i32> %4, 0, 1
-  %8 = insertvalue %struct.int32x4x3_t %7, <4 x i32> %5, 0, 2
-  ret %struct.int32x4x3_t %8
-}
-
-define %struct.int64x2x3_t @test_vld1q_s64_x3(i64* %a)  {
-; CHECK-LABEL: test_vld1q_s64_x3
-; CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d},
-; [{{x[0-9]+|sp}}]
-  %1 = bitcast i64* %a to i8*
-  %2 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x3.v2i64(i8* %1, i32 8)
-  %3 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %2, 0
-  %4 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %2, 1
-  %5 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %2, 2
-  %6 = insertvalue %struct.int64x2x3_t undef, <2 x i64> %3, 0, 0
-  %7 = insertvalue %struct.int64x2x3_t %6, <2 x i64> %4, 0, 1
-  %8 = insertvalue %struct.int64x2x3_t %7, <2 x i64> %5, 0, 2
-  ret %struct.int64x2x3_t %8
-}
-
-define %struct.float32x4x3_t @test_vld1q_f32_x3(float* %a)  {
-; CHECK-LABEL: test_vld1q_f32_x3
-; CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s},
-; [{{x[0-9]+|sp}}]
-  %1 = bitcast float* %a to i8*
-  %2 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x3.v4f32(i8* %1, i32 4)
-  %3 = extractvalue { <4 x float>, <4 x float>, <4 x float> } %2, 0
-  %4 = extractvalue { <4 x float>, <4 x float>, <4 x float> } %2, 1
-  %5 = extractvalue { <4 x float>, <4 x float>, <4 x float> } %2, 2
-  %6 = insertvalue %struct.float32x4x3_t undef, <4 x float> %3, 0, 0
-  %7 = insertvalue %struct.float32x4x3_t %6, <4 x float> %4, 0, 1
-  %8 = insertvalue %struct.float32x4x3_t %7, <4 x float> %5, 0, 2
-  ret %struct.float32x4x3_t %8
-}
-
-
-define %struct.float64x2x3_t @test_vld1q_f64_x3(double* %a)  {
-; CHECK-LABEL: test_vld1q_f64_x3
-; CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d},
-; [{{x[0-9]+|sp}}]
-  %1 = bitcast double* %a to i8*
-  %2 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x3.v2f64(i8* %1, i32 8)
-  %3 = extractvalue { <2 x double>, <2 x double>, <2 x double> } %2, 0
-  %4 = extractvalue { <2 x double>, <2 x double>, <2 x double> } %2, 1
-  %5 = extractvalue { <2 x double>, <2 x double>, <2 x double> } %2, 2
-  %6 = insertvalue %struct.float64x2x3_t undef, <2 x double> %3, 0, 0
-  %7 = insertvalue %struct.float64x2x3_t %6, <2 x double> %4, 0, 1
-  %8 = insertvalue %struct.float64x2x3_t %7, <2 x double> %5, 0, 2
-  ret %struct.float64x2x3_t %8
-}
-
-define %struct.int8x8x3_t @test_vld1_s8_x3(i8* %a)  {
-; CHECK-LABEL: test_vld1_s8_x3
-; CHECK: ld1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b},
-; [{{x[0-9]+|sp}}]
-  %1 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x3.v8i8(i8* %a, i32 1)
-  %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 0
-  %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 1
-  %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 2
-  %5 = insertvalue %struct.int8x8x3_t undef, <8 x i8> %2, 0, 0
-  %6 = insertvalue %struct.int8x8x3_t %5, <8 x i8> %3, 0, 1
-  %7 = insertvalue %struct.int8x8x3_t %6, <8 x i8> %4, 0, 2
-  ret %struct.int8x8x3_t %7
-}
-
-define %struct.int16x4x3_t @test_vld1_s16_x3(i16* %a)  {
-; CHECK-LABEL: test_vld1_s16_x3
-; CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h},
-; [{{x[0-9]+|sp}}]
-  %1 = bitcast i16* %a to i8*
-  %2 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x3.v4i16(i8* %1, i32 2)
-  %3 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %2, 0
-  %4 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %2, 1
-  %5 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %2, 2
-  %6 = insertvalue %struct.int16x4x3_t undef, <4 x i16> %3, 0, 0
-  %7 = insertvalue %struct.int16x4x3_t %6, <4 x i16> %4, 0, 1
-  %8 = insertvalue %struct.int16x4x3_t %7, <4 x i16> %5, 0, 2
-  ret %struct.int16x4x3_t %8
-}
-
-define %struct.int32x2x3_t @test_vld1_s32_x3(i32* %a)  {
-  %1 = bitcast i32* %a to i8*
-; CHECK-LABEL: test_vld1_s32_x3
-; CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s},
-; [{{x[0-9]+|sp}}]
-  %2 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x3.v2i32(i8* %1, i32 4)
-  %3 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %2, 0
-  %4 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %2, 1
-  %5 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %2, 2
-  %6 = insertvalue %struct.int32x2x3_t undef, <2 x i32> %3, 0, 0
-  %7 = insertvalue %struct.int32x2x3_t %6, <2 x i32> %4, 0, 1
-  %8 = insertvalue %struct.int32x2x3_t %7, <2 x i32> %5, 0, 2
-  ret %struct.int32x2x3_t %8
-}
-
-define %struct.int64x1x3_t @test_vld1_s64_x3(i64* %a)  {
-; CHECK-LABEL: test_vld1_s64_x3
-; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d},
-; [{{x[0-9]+|sp}}]
-  %1 = bitcast i64* %a to i8*
-  %2 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x3.v1i64(i8* %1, i32 8)
-  %3 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %2, 0
-  %4 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %2, 1
-  %5 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %2, 2
-  %6 = insertvalue %struct.int64x1x3_t undef, <1 x i64> %3, 0, 0
-  %7 = insertvalue %struct.int64x1x3_t %6, <1 x i64> %4, 0, 1
-  %8 = insertvalue %struct.int64x1x3_t %7, <1 x i64> %5, 0, 2
-  ret %struct.int64x1x3_t %8
-}
-
-define %struct.float32x2x3_t @test_vld1_f32_x3(float* %a)  {
-; CHECK-LABEL: test_vld1_f32_x3
-; CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s},
-; [{{x[0-9]+|sp}}]
-  %1 = bitcast float* %a to i8*
-  %2 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x3.v2f32(i8* %1, i32 4)
-  %3 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %2, 0
-  %4 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %2, 1
-  %5 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %2, 2
-  %6 = insertvalue %struct.float32x2x3_t undef, <2 x float> %3, 0, 0
-  %7 = insertvalue %struct.float32x2x3_t %6, <2 x float> %4, 0, 1
-  %8 = insertvalue %struct.float32x2x3_t %7, <2 x float> %5, 0, 2
-  ret %struct.float32x2x3_t %8
-}
-
-
-define %struct.float64x1x3_t @test_vld1_f64_x3(double* %a)  {
-; CHECK-LABEL: test_vld1_f64_x3
-; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d},
-; [{{x[0-9]+|sp}}]
-  %1 = bitcast double* %a to i8*
-  %2 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x3.v1f64(i8* %1, i32 8)
-  %3 = extractvalue { <1 x double>, <1 x double>, <1 x double> } %2, 0
-  %4 = extractvalue { <1 x double>, <1 x double>, <1 x double> } %2, 1
-  %5 = extractvalue { <1 x double>, <1 x double>, <1 x double> } %2, 2
-  %6 = insertvalue %struct.float64x1x3_t undef, <1 x double> %3, 0, 0
-  %7 = insertvalue %struct.float64x1x3_t %6, <1 x double> %4, 0, 1
-  %8 = insertvalue %struct.float64x1x3_t %7, <1 x double> %5, 0, 2
-  ret %struct.float64x1x3_t %8
-}
-
-define %struct.int8x16x4_t @test_vld1q_s8_x4(i8* %a)  {
-; CHECK-LABEL: test_vld1q_s8_x4
-; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b,
-; v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
-  %1 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x4.v16i8(i8* %a, i32 1)
-  %2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 0
-  %3 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 1
-  %4 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 2
-  %5 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 3
-  %6 = insertvalue %struct.int8x16x4_t undef, <16 x i8> %2, 0, 0
-  %7 = insertvalue %struct.int8x16x4_t %6, <16 x i8> %3, 0, 1
-  %8 = insertvalue %struct.int8x16x4_t %7, <16 x i8> %4, 0, 2
-  %9 = insertvalue %struct.int8x16x4_t %8, <16 x i8> %5, 0, 3
-  ret %struct.int8x16x4_t %9
-}
-
-define %struct.int16x8x4_t @test_vld1q_s16_x4(i16* %a)  {
-; CHECK-LABEL: test_vld1q_s16_x4
-; CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h,
-; v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
-  %1 = bitcast i16* %a to i8*
-  %2 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x4.v8i16(i8* %1, i32 2)
-  %3 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %2, 0
-  %4 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %2, 1
-  %5 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %2, 2
-  %6 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %2, 3
-  %7 = insertvalue %struct.int16x8x4_t undef, <8 x i16> %3, 0, 0
-  %8 = insertvalue %struct.int16x8x4_t %7, <8 x i16> %4, 0, 1
-  %9 = insertvalue %struct.int16x8x4_t %8, <8 x i16> %5, 0, 2
-  %10 = insertvalue %struct.int16x8x4_t %9, <8 x i16> %6, 0, 3
-  ret %struct.int16x8x4_t %10
-}
-
-define %struct.int32x4x4_t @test_vld1q_s32_x4(i32* %a)  {
-; CHECK-LABEL: test_vld1q_s32_x4
-; CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
-; v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
-  %1 = bitcast i32* %a to i8*
-  %2 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x4.v4i32(i8* %1, i32 4)
-  %3 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %2, 0
-  %4 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %2, 1
-  %5 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %2, 2
-  %6 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %2, 3
-  %7 = insertvalue %struct.int32x4x4_t undef, <4 x i32> %3, 0, 0
-  %8 = insertvalue %struct.int32x4x4_t %7, <4 x i32> %4, 0, 1
-  %9 = insertvalue %struct.int32x4x4_t %8, <4 x i32> %5, 0, 2
-  %10 = insertvalue %struct.int32x4x4_t %9, <4 x i32> %6, 0, 3
-  ret %struct.int32x4x4_t %10
-}
-
-define %struct.int64x2x4_t @test_vld1q_s64_x4(i64* %a)  {
-; CHECK-LABEL: test_vld1q_s64_x4
-; CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
-; v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
-  %1 = bitcast i64* %a to i8*
-  %2 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x4.v2i64(i8* %1, i32 8)
-  %3 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %2, 0
-  %4 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %2, 1
-  %5 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %2, 2
-  %6 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %2, 3
-  %7 = insertvalue %struct.int64x2x4_t undef, <2 x i64> %3, 0, 0
-  %8 = insertvalue %struct.int64x2x4_t %7, <2 x i64> %4, 0, 1
-  %9 = insertvalue %struct.int64x2x4_t %8, <2 x i64> %5, 0, 2
-  %10 = insertvalue %struct.int64x2x4_t %9, <2 x i64> %6, 0, 3
-  ret %struct.int64x2x4_t %10
-}
-
-define %struct.float32x4x4_t @test_vld1q_f32_x4(float* %a)  {
-; CHECK-LABEL: test_vld1q_f32_x4
-; CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
-; v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
-  %1 = bitcast float* %a to i8*
-  %2 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x4.v4f32(i8* %1, i32 4)
-  %3 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %2, 0
-  %4 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %2, 1
-  %5 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %2, 2
-  %6 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %2, 3
-  %7 = insertvalue %struct.float32x4x4_t undef, <4 x float> %3, 0, 0
-  %8 = insertvalue %struct.float32x4x4_t %7, <4 x float> %4, 0, 1
-  %9 = insertvalue %struct.float32x4x4_t %8, <4 x float> %5, 0, 2
-  %10 = insertvalue %struct.float32x4x4_t %9, <4 x float> %6, 0, 3
-  ret %struct.float32x4x4_t %10
-}
-
-define %struct.float64x2x4_t @test_vld1q_f64_x4(double* %a)  {
-; CHECK-LABEL: test_vld1q_f64_x4
-; CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
-; v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
-  %1 = bitcast double* %a to i8*
-  %2 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x4.v2f64(i8* %1, i32 8)
-  %3 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 0
-  %4 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 1
-  %5 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 2
-  %6 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 3
-  %7 = insertvalue %struct.float64x2x4_t undef, <2 x double> %3, 0, 0
-  %8 = insertvalue %struct.float64x2x4_t %7, <2 x double> %4, 0, 1
-  %9 = insertvalue %struct.float64x2x4_t %8, <2 x double> %5, 0, 2
-  %10 = insertvalue %struct.float64x2x4_t %9, <2 x double> %6, 0, 3
-  ret %struct.float64x2x4_t %10
-}
-
-define %struct.int8x8x4_t @test_vld1_s8_x4(i8* %a)  {
-; CHECK-LABEL: test_vld1_s8_x4
-; CHECK: ld1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b,
-; v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
-  %1 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x4.v8i8(i8* %a, i32 1)
-  %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 0
-  %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 1
-  %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 2
-  %5 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 3
-  %6 = insertvalue %struct.int8x8x4_t undef, <8 x i8> %2, 0, 0
-  %7 = insertvalue %struct.int8x8x4_t %6, <8 x i8> %3, 0, 1
-  %8 = insertvalue %struct.int8x8x4_t %7, <8 x i8> %4, 0, 2
-  %9 = insertvalue %struct.int8x8x4_t %8, <8 x i8> %5, 0, 3
-  ret %struct.int8x8x4_t %9
-}
-
-define %struct.int16x4x4_t @test_vld1_s16_x4(i16* %a)  {
-; CHECK-LABEL: test_vld1_s16_x4
-; CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h,
-; v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
-  %1 = bitcast i16* %a to i8*
-  %2 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x4.v4i16(i8* %1, i32 2)
-  %3 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %2, 0
-  %4 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %2, 1
-  %5 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %2, 2
-  %6 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %2, 3
-  %7 = insertvalue %struct.int16x4x4_t undef, <4 x i16> %3, 0, 0
-  %8 = insertvalue %struct.int16x4x4_t %7, <4 x i16> %4, 0, 1
-  %9 = insertvalue %struct.int16x4x4_t %8, <4 x i16> %5, 0, 2
-  %10 = insertvalue %struct.int16x4x4_t %9, <4 x i16> %6, 0, 3
-  ret %struct.int16x4x4_t %10
-}
-
-define %struct.int32x2x4_t @test_vld1_s32_x4(i32* %a)  {
-; CHECK-LABEL: test_vld1_s32_x4
-; CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
-; v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
-  %1 = bitcast i32* %a to i8*
-  %2 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x4.v2i32(i8* %1, i32 4)
-  %3 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 0
-  %4 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 1
-  %5 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 2
-  %6 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 3
-  %7 = insertvalue %struct.int32x2x4_t undef, <2 x i32> %3, 0, 0
-  %8 = insertvalue %struct.int32x2x4_t %7, <2 x i32> %4, 0, 1
-  %9 = insertvalue %struct.int32x2x4_t %8, <2 x i32> %5, 0, 2
-  %10 = insertvalue %struct.int32x2x4_t %9, <2 x i32> %6, 0, 3
-  ret %struct.int32x2x4_t %10
-}
-
-define %struct.int64x1x4_t @test_vld1_s64_x4(i64* %a)  {
-; CHECK-LABEL: test_vld1_s64_x4
-; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
-; v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
-  %1 = bitcast i64* %a to i8*
-  %2 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x4.v1i64(i8* %1, i32 8)
-  %3 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %2, 0
-  %4 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %2, 1
-  %5 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %2, 2
-  %6 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %2, 3
-  %7 = insertvalue %struct.int64x1x4_t undef, <1 x i64> %3, 0, 0
-  %8 = insertvalue %struct.int64x1x4_t %7, <1 x i64> %4, 0, 1
-  %9 = insertvalue %struct.int64x1x4_t %8, <1 x i64> %5, 0, 2
-  %10 = insertvalue %struct.int64x1x4_t %9, <1 x i64> %6, 0, 3
-  ret %struct.int64x1x4_t %10
-}
-
-define %struct.float32x2x4_t @test_vld1_f32_x4(float* %a)  {
-; CHECK-LABEL: test_vld1_f32_x4
-; CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
-; v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
-  %1 = bitcast float* %a to i8*
-  %2 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x4.v2f32(i8* %1, i32 4)
-  %3 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %2, 0
-  %4 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %2, 1
-  %5 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %2, 2
-  %6 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %2, 3
-  %7 = insertvalue %struct.float32x2x4_t undef, <2 x float> %3, 0, 0
-  %8 = insertvalue %struct.float32x2x4_t %7, <2 x float> %4, 0, 1
-  %9 = insertvalue %struct.float32x2x4_t %8, <2 x float> %5, 0, 2
-  %10 = insertvalue %struct.float32x2x4_t %9, <2 x float> %6, 0, 3
-  ret %struct.float32x2x4_t %10
-}
-
-
-define %struct.float64x1x4_t @test_vld1_f64_x4(double* %a)  {
-; CHECK-LABEL: test_vld1_f64_x4
-; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
-; v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
-  %1 = bitcast double* %a to i8*
-  %2 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x4.v1f64(i8* %1, i32 8)
-  %3 = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %2, 0
-  %4 = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %2, 1
-  %5 = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %2, 2
-  %6 = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %2, 3
-  %7 = insertvalue %struct.float64x1x4_t undef, <1 x double> %3, 0, 0
-  %8 = insertvalue %struct.float64x1x4_t %7, <1 x double> %4, 0, 1
-  %9 = insertvalue %struct.float64x1x4_t %8, <1 x double> %5, 0, 2
-  %10 = insertvalue %struct.float64x1x4_t %9, <1 x double> %6, 0, 3
-  ret %struct.float64x1x4_t %10
-}
-
-define void @test_vst1q_s8_x2(i8* %a, [2 x <16 x i8>] %b)  {
-; CHECK-LABEL: test_vst1q_s8_x2
-; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
-  %1 = extractvalue [2 x <16 x i8>] %b, 0
-  %2 = extractvalue [2 x <16 x i8>] %b, 1
-  tail call void @llvm.aarch64.neon.vst1x2.v16i8(i8* %a, <16 x i8> %1, <16 x i8> %2, i32 1)
-  ret void
-}
-
-define void @test_vst1q_s16_x2(i16* %a, [2 x <8 x i16>] %b)  {
-; CHECK-LABEL: test_vst1q_s16_x2
-; CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
-  %1 = extractvalue [2 x <8 x i16>] %b, 0
-  %2 = extractvalue [2 x <8 x i16>] %b, 1
-  %3 = bitcast i16* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x2.v8i16(i8* %3, <8 x i16> %1, <8 x i16> %2, i32 2)
-  ret void
-}
-
-define void @test_vst1q_s32_x2(i32* %a, [2 x <4 x i32>] %b)  {
-; CHECK-LABEL: test_vst1q_s32_x2
-; CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
-  %1 = extractvalue [2 x <4 x i32>] %b, 0
-  %2 = extractvalue [2 x <4 x i32>] %b, 1
-  %3 = bitcast i32* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x2.v4i32(i8* %3, <4 x i32> %1, <4 x i32> %2, i32 4)
-  ret void
-}
-
-define void @test_vst1q_s64_x2(i64* %a, [2 x <2 x i64>] %b)  {
-; CHECK-LABEL: test_vst1q_s64_x2
-; CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
-  %1 = extractvalue [2 x <2 x i64>] %b, 0
-  %2 = extractvalue [2 x <2 x i64>] %b, 1
-  %3 = bitcast i64* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x2.v2i64(i8* %3, <2 x i64> %1, <2 x i64> %2, i32 8)
-  ret void
-}
-
-define void @test_vst1q_f32_x2(float* %a, [2 x <4 x float>] %b)  {
-; CHECK-LABEL: test_vst1q_f32_x2
-; CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
-  %1 = extractvalue [2 x <4 x float>] %b, 0
-  %2 = extractvalue [2 x <4 x float>] %b, 1
-  %3 = bitcast float* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x2.v4f32(i8* %3, <4 x float> %1, <4 x float> %2, i32 4)
-  ret void
-}
-
-
-define void @test_vst1q_f64_x2(double* %a, [2 x <2 x double>] %b)  {
-; CHECK-LABEL: test_vst1q_f64_x2
-; CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
-  %1 = extractvalue [2 x <2 x double>] %b, 0
-  %2 = extractvalue [2 x <2 x double>] %b, 1
-  %3 = bitcast double* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x2.v2f64(i8* %3, <2 x double> %1, <2 x double> %2, i32 8)
-  ret void
-}
-
-define void @test_vst1_s8_x2(i8* %a, [2 x <8 x i8>] %b)  {
-; CHECK-LABEL: test_vst1_s8_x2
-; CHECK: st1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
-  %1 = extractvalue [2 x <8 x i8>] %b, 0
-  %2 = extractvalue [2 x <8 x i8>] %b, 1
-  tail call void @llvm.aarch64.neon.vst1x2.v8i8(i8* %a, <8 x i8> %1, <8 x i8> %2, i32 1)
-  ret void
-}
-
-define void @test_vst1_s16_x2(i16* %a, [2 x <4 x i16>] %b)  {
-; CHECK-LABEL: test_vst1_s16_x2
-; CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
-  %1 = extractvalue [2 x <4 x i16>] %b, 0
-  %2 = extractvalue [2 x <4 x i16>] %b, 1
-  %3 = bitcast i16* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x2.v4i16(i8* %3, <4 x i16> %1, <4 x i16> %2, i32 2)
-  ret void
-}
-
-define void @test_vst1_s32_x2(i32* %a, [2 x <2 x i32>] %b)  {
-; CHECK-LABEL: test_vst1_s32_x2
-; CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
-  %1 = extractvalue [2 x <2 x i32>] %b, 0
-  %2 = extractvalue [2 x <2 x i32>] %b, 1
-  %3 = bitcast i32* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x2.v2i32(i8* %3, <2 x i32> %1, <2 x i32> %2, i32 4)
-  ret void
-}
-
-define void @test_vst1_s64_x2(i64* %a, [2 x <1 x i64>] %b)  {
-; CHECK-LABEL: test_vst1_s64_x2
-; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
-  %1 = extractvalue [2 x <1 x i64>] %b, 0
-  %2 = extractvalue [2 x <1 x i64>] %b, 1
-  %3 = bitcast i64* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x2.v1i64(i8* %3, <1 x i64> %1, <1 x i64> %2, i32 8)
-  ret void
-}
-
-define void @test_vst1_f32_x2(float* %a, [2 x <2 x float>] %b)  {
-; CHECK-LABEL: test_vst1_f32_x2
-; CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
-  %1 = extractvalue [2 x <2 x float>] %b, 0
-  %2 = extractvalue [2 x <2 x float>] %b, 1
-  %3 = bitcast float* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x2.v2f32(i8* %3, <2 x float> %1, <2 x float> %2, i32 4)
-  ret void
-}
-
-define void @test_vst1_f64_x2(double* %a, [2 x <1 x double>] %b)  {
-; CHECK-LABEL: test_vst1_f64_x2
-; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
-  %1 = extractvalue [2 x <1 x double>] %b, 0
-  %2 = extractvalue [2 x <1 x double>] %b, 1
-  %3 = bitcast double* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x2.v1f64(i8* %3, <1 x double> %1, <1 x double> %2, i32 8)
-  ret void
-}
-
-define void @test_vst1q_s8_x3(i8* %a, [3 x <16 x i8>] %b)  {
-; CHECK-LABEL: test_vst1q_s8_x3
-; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b},
-; [{{x[0-9]+|sp}}]
-  %1 = extractvalue [3 x <16 x i8>] %b, 0
-  %2 = extractvalue [3 x <16 x i8>] %b, 1
-  %3 = extractvalue [3 x <16 x i8>] %b, 2
-  tail call void @llvm.aarch64.neon.vst1x3.v16i8(i8* %a, <16 x i8> %1, <16 x i8> %2, <16 x i8> %3, i32 1)
-  ret void
-}
-
-define void @test_vst1q_s16_x3(i16* %a, [3 x <8 x i16>] %b)  {
-; CHECK-LABEL: test_vst1q_s16_x3
-; CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h},
-; [{{x[0-9]+|sp}}]
-  %1 = extractvalue [3 x <8 x i16>] %b, 0
-  %2 = extractvalue [3 x <8 x i16>] %b, 1
-  %3 = extractvalue [3 x <8 x i16>] %b, 2
-  %4 = bitcast i16* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x3.v8i16(i8* %4, <8 x i16> %1, <8 x i16> %2, <8 x i16> %3, i32 2)
-  ret void
-}
-
-define void @test_vst1q_s32_x3(i32* %a, [3 x <4 x i32>] %b)  {
-; CHECK-LABEL: test_vst1q_s32_x3
-; CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s},
-; [{{x[0-9]+|sp}}]
-  %1 = extractvalue [3 x <4 x i32>] %b, 0
-  %2 = extractvalue [3 x <4 x i32>] %b, 1
-  %3 = extractvalue [3 x <4 x i32>] %b, 2
-  %4 = bitcast i32* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x3.v4i32(i8* %4, <4 x i32> %1, <4 x i32> %2, <4 x i32> %3, i32 4)
-  ret void
-}
-
-define void @test_vst1q_s64_x3(i64* %a, [3 x <2 x i64>] %b)  {
-; CHECK-LABEL: test_vst1q_s64_x3
-; CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d},
-; [{{x[0-9]+|sp}}]
-  %1 = extractvalue [3 x <2 x i64>] %b, 0
-  %2 = extractvalue [3 x <2 x i64>] %b, 1
-  %3 = extractvalue [3 x <2 x i64>] %b, 2
-  %4 = bitcast i64* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x3.v2i64(i8* %4, <2 x i64> %1, <2 x i64> %2, <2 x i64> %3, i32 8)
-  ret void
-}
-
-define void @test_vst1q_f32_x3(float* %a, [3 x <4 x float>] %b)  {
-; CHECK-LABEL: test_vst1q_f32_x3
-; CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s},
-; [{{x[0-9]+|sp}}]
-  %1 = extractvalue [3 x <4 x float>] %b, 0
-  %2 = extractvalue [3 x <4 x float>] %b, 1
-  %3 = extractvalue [3 x <4 x float>] %b, 2
-  %4 = bitcast float* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x3.v4f32(i8* %4, <4 x float> %1, <4 x float> %2, <4 x float> %3, i32 4)
-  ret void
-}
-
-define void @test_vst1q_f64_x3(double* %a, [3 x <2 x double>] %b)  {
-; CHECK-LABEL: test_vst1q_f64_x3
-; CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d},
-; [{{x[0-9]+|sp}}]
-  %1 = extractvalue [3 x <2 x double>] %b, 0
-  %2 = extractvalue [3 x <2 x double>] %b, 1
-  %3 = extractvalue [3 x <2 x double>] %b, 2
-  %4 = bitcast double* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x3.v2f64(i8* %4, <2 x double> %1, <2 x double> %2, <2 x double> %3, i32 8)
-  ret void
-}
-
-define void @test_vst1_s8_x3(i8* %a, [3 x <8 x i8>] %b)  {
-; CHECK-LABEL: test_vst1_s8_x3
-; CHECK: st1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b},
-; [{{x[0-9]+|sp}}]
-  %1 = extractvalue [3 x <8 x i8>] %b, 0
-  %2 = extractvalue [3 x <8 x i8>] %b, 1
-  %3 = extractvalue [3 x <8 x i8>] %b, 2
-  tail call void @llvm.aarch64.neon.vst1x3.v8i8(i8* %a, <8 x i8> %1, <8 x i8> %2, <8 x i8> %3, i32 1)
-  ret void
-}
-
-define void @test_vst1_s16_x3(i16* %a, [3 x <4 x i16>] %b)  {
-; CHECK-LABEL: test_vst1_s16_x3
-; CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h},
-; [{{x[0-9]+|sp}}]
-  %1 = extractvalue [3 x <4 x i16>] %b, 0
-  %2 = extractvalue [3 x <4 x i16>] %b, 1
-  %3 = extractvalue [3 x <4 x i16>] %b, 2
-  %4 = bitcast i16* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x3.v4i16(i8* %4, <4 x i16> %1, <4 x i16> %2, <4 x i16> %3, i32 2)
-  ret void
-}
-
-define void @test_vst1_s32_x3(i32* %a, [3 x <2 x i32>] %b)  {
-; CHECK-LABEL: test_vst1_s32_x3
-; CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s},
-; [{{x[0-9]+|sp}}]
-  %1 = extractvalue [3 x <2 x i32>] %b, 0
-  %2 = extractvalue [3 x <2 x i32>] %b, 1
-  %3 = extractvalue [3 x <2 x i32>] %b, 2
-  %4 = bitcast i32* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x3.v2i32(i8* %4, <2 x i32> %1, <2 x i32> %2, <2 x i32> %3, i32 4)
-  ret void
-}
-
-define void @test_vst1_s64_x3(i64* %a, [3 x <1 x i64>] %b)  {
-; CHECK-LABEL: test_vst1_s64_x3
-; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d},
-; [{{x[0-9]+|sp}}]
-  %1 = extractvalue [3 x <1 x i64>] %b, 0
-  %2 = extractvalue [3 x <1 x i64>] %b, 1
-  %3 = extractvalue [3 x <1 x i64>] %b, 2
-  %4 = bitcast i64* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x3.v1i64(i8* %4, <1 x i64> %1, <1 x i64> %2, <1 x i64> %3, i32 8)
-  ret void
-}
-
-define void @test_vst1_f32_x3(float* %a, [3 x <2 x float>] %b)  {
-; CHECK-LABEL: test_vst1_f32_x3
-; CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s},
-; [{{x[0-9]+|sp}}]
-  %1 = extractvalue [3 x <2 x float>] %b, 0
-  %2 = extractvalue [3 x <2 x float>] %b, 1
-  %3 = extractvalue [3 x <2 x float>] %b, 2
-  %4 = bitcast float* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x3.v2f32(i8* %4, <2 x float> %1, <2 x float> %2, <2 x float> %3, i32 4)
-  ret void
-}
-
-define void @test_vst1_f64_x3(double* %a, [3 x <1 x double>] %b)  {
-; CHECK-LABEL: test_vst1_f64_x3
-; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d},
-; [{{x[0-9]+|sp}}]
-  %1 = extractvalue [3 x <1 x double>] %b, 0
-  %2 = extractvalue [3 x <1 x double>] %b, 1
-  %3 = extractvalue [3 x <1 x double>] %b, 2
-  %4 = bitcast double* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x3.v1f64(i8* %4, <1 x double> %1, <1 x double> %2, <1 x double> %3, i32 8)
-  ret void
-}
-
-define void @test_vst1q_s8_x4(i8* %a, [4 x <16 x i8>] %b)  {
-; CHECK-LABEL: test_vst1q_s8_x4
-; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b,
-; v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
-  %1 = extractvalue [4 x <16 x i8>] %b, 0
-  %2 = extractvalue [4 x <16 x i8>] %b, 1
-  %3 = extractvalue [4 x <16 x i8>] %b, 2
-  %4 = extractvalue [4 x <16 x i8>] %b, 3
-  tail call void @llvm.aarch64.neon.vst1x4.v16i8(i8* %a, <16 x i8> %1, <16 x i8> %2, <16 x i8> %3, <16 x i8> %4, i32 1)
-  ret void
-}
-
-define void @test_vst1q_s16_x4(i16* %a, [4 x <8 x i16>] %b)  {
-; CHECK-LABEL: test_vst1q_s16_x4
-; CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h,
-; v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
-  %1 = extractvalue [4 x <8 x i16>] %b, 0
-  %2 = extractvalue [4 x <8 x i16>] %b, 1
-  %3 = extractvalue [4 x <8 x i16>] %b, 2
-  %4 = extractvalue [4 x <8 x i16>] %b, 3
-  %5 = bitcast i16* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x4.v8i16(i8* %5, <8 x i16> %1, <8 x i16> %2, <8 x i16> %3, <8 x i16> %4, i32 2)
-  ret void
-}
-
-define void @test_vst1q_s32_x4(i32* %a, [4 x <4 x i32>] %b)  {
-; CHECK-LABEL: test_vst1q_s32_x4
-; CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
-; v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
-  %1 = extractvalue [4 x <4 x i32>] %b, 0
-  %2 = extractvalue [4 x <4 x i32>] %b, 1
-  %3 = extractvalue [4 x <4 x i32>] %b, 2
-  %4 = extractvalue [4 x <4 x i32>] %b, 3
-  %5 = bitcast i32* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x4.v4i32(i8* %5, <4 x i32> %1, <4 x i32> %2, <4 x i32> %3, <4 x i32> %4, i32 4)
-  ret void
-}
-
-define void @test_vst1q_s64_x4(i64* %a, [4 x <2 x i64>] %b)  {
-; CHECK-LABEL: test_vst1q_s64_x4
-; CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
-; v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
-  %1 = extractvalue [4 x <2 x i64>] %b, 0
-  %2 = extractvalue [4 x <2 x i64>] %b, 1
-  %3 = extractvalue [4 x <2 x i64>] %b, 2
-  %4 = extractvalue [4 x <2 x i64>] %b, 3
-  %5 = bitcast i64* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x4.v2i64(i8* %5, <2 x i64> %1, <2 x i64> %2, <2 x i64> %3, <2 x i64> %4, i32 8)
-  ret void
-}
-
-define void @test_vst1q_f32_x4(float* %a, [4 x <4 x float>] %b)  {
-; CHECK-LABEL: test_vst1q_f32_x4
-; CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
-; v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
-  %1 = extractvalue [4 x <4 x float>] %b, 0
-  %2 = extractvalue [4 x <4 x float>] %b, 1
-  %3 = extractvalue [4 x <4 x float>] %b, 2
-  %4 = extractvalue [4 x <4 x float>] %b, 3
-  %5 = bitcast float* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x4.v4f32(i8* %5, <4 x float> %1, <4 x float> %2, <4 x float> %3, <4 x float> %4, i32 4)
-  ret void
-}
-
-define void @test_vst1q_f64_x4(double* %a, [4 x <2 x double>] %b)  {
-; CHECK-LABEL: test_vst1q_f64_x4
-; CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
-; v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
-  %1 = extractvalue [4 x <2 x double>] %b, 0
-  %2 = extractvalue [4 x <2 x double>] %b, 1
-  %3 = extractvalue [4 x <2 x double>] %b, 2
-  %4 = extractvalue [4 x <2 x double>] %b, 3
-  %5 = bitcast double* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x4.v2f64(i8* %5, <2 x double> %1, <2 x double> %2, <2 x double> %3, <2 x double> %4, i32 8)
-  ret void
-}
-
-define void @test_vst1_s8_x4(i8* %a, [4 x <8 x i8>] %b)  {
-; CHECK-LABEL: test_vst1_s8_x4
-; CHECK: st1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b,
-; v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
-  %1 = extractvalue [4 x <8 x i8>] %b, 0
-  %2 = extractvalue [4 x <8 x i8>] %b, 1
-  %3 = extractvalue [4 x <8 x i8>] %b, 2
-  %4 = extractvalue [4 x <8 x i8>] %b, 3
-  tail call void @llvm.aarch64.neon.vst1x4.v8i8(i8* %a, <8 x i8> %1, <8 x i8> %2, <8 x i8> %3, <8 x i8> %4, i32 1)
-  ret void
-}
-
-define void @test_vst1_s16_x4(i16* %a, [4 x <4 x i16>] %b)  {
-; CHECK-LABEL: test_vst1_s16_x4
-; CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h,
-; v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
-  %1 = extractvalue [4 x <4 x i16>] %b, 0
-  %2 = extractvalue [4 x <4 x i16>] %b, 1
-  %3 = extractvalue [4 x <4 x i16>] %b, 2
-  %4 = extractvalue [4 x <4 x i16>] %b, 3
-  %5 = bitcast i16* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x4.v4i16(i8* %5, <4 x i16> %1, <4 x i16> %2, <4 x i16> %3, <4 x i16> %4, i32 2)
-  ret void
-}
-
-define void @test_vst1_s32_x4(i32* %a, [4 x <2 x i32>] %b)  {
-; CHECK-LABEL: test_vst1_s32_x4
-; CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
-; v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
-  %1 = extractvalue [4 x <2 x i32>] %b, 0
-  %2 = extractvalue [4 x <2 x i32>] %b, 1
-  %3 = extractvalue [4 x <2 x i32>] %b, 2
-  %4 = extractvalue [4 x <2 x i32>] %b, 3
-  %5 = bitcast i32* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x4.v2i32(i8* %5, <2 x i32> %1, <2 x i32> %2, <2 x i32> %3, <2 x i32> %4, i32 4)
-  ret void
-}
-
-define void @test_vst1_s64_x4(i64* %a, [4 x <1 x i64>] %b)  {
-; CHECK-LABEL: test_vst1_s64_x4
-; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
-; v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
-  %1 = extractvalue [4 x <1 x i64>] %b, 0
-  %2 = extractvalue [4 x <1 x i64>] %b, 1
-  %3 = extractvalue [4 x <1 x i64>] %b, 2
-  %4 = extractvalue [4 x <1 x i64>] %b, 3
-  %5 = bitcast i64* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x4.v1i64(i8* %5, <1 x i64> %1, <1 x i64> %2, <1 x i64> %3, <1 x i64> %4, i32 8)
-  ret void
-}
-
-define void @test_vst1_f32_x4(float* %a, [4 x <2 x float>] %b)  {
-; CHECK-LABEL: test_vst1_f32_x4
-; CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
-; v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
-  %1 = extractvalue [4 x <2 x float>] %b, 0
-  %2 = extractvalue [4 x <2 x float>] %b, 1
-  %3 = extractvalue [4 x <2 x float>] %b, 2
-  %4 = extractvalue [4 x <2 x float>] %b, 3
-  %5 = bitcast float* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x4.v2f32(i8* %5, <2 x float> %1, <2 x float> %2, <2 x float> %3, <2 x float> %4, i32 4)
-  ret void
-}
-
-define void @test_vst1_f64_x4(double* %a, [4 x <1 x double>] %b)  {
-; CHECK-LABEL: test_vst1_f64_x4
-; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
-; v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
-  %1 = extractvalue [4 x <1 x double>] %b, 0
-  %2 = extractvalue [4 x <1 x double>] %b, 1
-  %3 = extractvalue [4 x <1 x double>] %b, 2
-  %4 = extractvalue [4 x <1 x double>] %b, 3
-  %5 = bitcast double* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x4.v1f64(i8* %5, <1 x double> %1, <1 x double> %2, <1 x double> %3, <1 x double> %4, i32 8)
-  ret void
-}
-
-declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x2.v16i8(i8*, i32)
-declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x2.v8i16(i8*, i32)
-declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x2.v4i32(i8*, i32)
-declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x2.v2i64(i8*, i32)
-declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x2.v4f32(i8*, i32)
-declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x2.v2f64(i8*, i32)
-declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x2.v8i8(i8*, i32)
-declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x2.v4i16(i8*, i32)
-declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x2.v2i32(i8*, i32)
-declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x2.v1i64(i8*, i32)
-declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x2.v2f32(i8*, i32)
-declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x2.v1f64(i8*, i32)
-declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x3.v16i8(i8*, i32)
-declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x3.v8i16(i8*, i32)
-declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x3.v4i32(i8*, i32)
-declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x3.v2i64(i8*, i32)
-declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x3.v4f32(i8*, i32)
-declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x3.v2f64(i8*, i32)
-declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x3.v8i8(i8*, i32)
-declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x3.v4i16(i8*, i32)
-declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x3.v2i32(i8*, i32)
-declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x3.v1i64(i8*, i32)
-declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x3.v2f32(i8*, i32)
-declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x3.v1f64(i8*, i32)
-declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x4.v16i8(i8*, i32)
-declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x4.v8i16(i8*, i32)
-declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x4.v4i32(i8*, i32)
-declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x4.v2i64(i8*, i32)
-declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x4.v4f32(i8*, i32)
-declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x4.v2f64(i8*, i32)
-declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x4.v8i8(i8*, i32)
-declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x4.v4i16(i8*, i32)
-declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x4.v2i32(i8*, i32)
-declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x4.v1i64(i8*, i32)
-declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x4.v2f32(i8*, i32)
-declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x4.v1f64(i8*, i32)
-declare void @llvm.aarch64.neon.vst1x2.v16i8(i8*, <16 x i8>, <16 x i8>, i32)
-declare void @llvm.aarch64.neon.vst1x2.v8i16(i8*, <8 x i16>, <8 x i16>, i32)
-declare void @llvm.aarch64.neon.vst1x2.v4i32(i8*, <4 x i32>, <4 x i32>, i32)
-declare void @llvm.aarch64.neon.vst1x2.v2i64(i8*, <2 x i64>, <2 x i64>, i32)
-declare void @llvm.aarch64.neon.vst1x2.v4f32(i8*, <4 x float>, <4 x float>, i32)
-declare void @llvm.aarch64.neon.vst1x2.v2f64(i8*, <2 x double>, <2 x double>, i32)
-declare void @llvm.aarch64.neon.vst1x2.v8i8(i8*, <8 x i8>, <8 x i8>, i32)
-declare void @llvm.aarch64.neon.vst1x2.v4i16(i8*, <4 x i16>, <4 x i16>, i32)
-declare void @llvm.aarch64.neon.vst1x2.v2i32(i8*, <2 x i32>, <2 x i32>, i32)
-declare void @llvm.aarch64.neon.vst1x2.v1i64(i8*, <1 x i64>, <1 x i64>, i32)
-declare void @llvm.aarch64.neon.vst1x2.v2f32(i8*, <2 x float>, <2 x float>, i32)
-declare void @llvm.aarch64.neon.vst1x2.v1f64(i8*, <1 x double>, <1 x double>, i32)
-declare void @llvm.aarch64.neon.vst1x3.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32)
-declare void @llvm.aarch64.neon.vst1x3.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32)
-declare void @llvm.aarch64.neon.vst1x3.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32)
-declare void @llvm.aarch64.neon.vst1x3.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, i32)
-declare void @llvm.aarch64.neon.vst1x3.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32)
-declare void @llvm.aarch64.neon.vst1x3.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, i32)
-declare void @llvm.aarch64.neon.vst1x3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32)
-declare void @llvm.aarch64.neon.vst1x3.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32)
-declare void @llvm.aarch64.neon.vst1x3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32)
-declare void @llvm.aarch64.neon.vst1x3.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32)
-declare void @llvm.aarch64.neon.vst1x3.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32)
-declare void @llvm.aarch64.neon.vst1x3.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, i32)
-declare void @llvm.aarch64.neon.vst1x4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32)
-declare void @llvm.aarch64.neon.vst1x4.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32)
-declare void @llvm.aarch64.neon.vst1x4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32)
-declare void @llvm.aarch64.neon.vst1x4.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i32)
-declare void @llvm.aarch64.neon.vst1x4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32)
-declare void @llvm.aarch64.neon.vst1x4.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, i32)
-declare void @llvm.aarch64.neon.vst1x4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32)
-declare void @llvm.aarch64.neon.vst1x4.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32)
-declare void @llvm.aarch64.neon.vst1x4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32)
-declare void @llvm.aarch64.neon.vst1x4.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32)
-declare void @llvm.aarch64.neon.vst1x4.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32)
-declare void @llvm.aarch64.neon.vst1x4.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, <1 x double>, i32)
diff --git a/test/CodeGen/AArch64/neon-simd-ldst-one.ll b/test/CodeGen/AArch64/neon-simd-ldst-one.ll
deleted file mode 100644
index 927c933..0000000
--- a/test/CodeGen/AArch64/neon-simd-ldst-one.ll
+++ /dev/null
@@ -1,2299 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
-
-%struct.uint8x16x2_t = type { [2 x <16 x i8>] }
-%struct.poly8x16x2_t = type { [2 x <16 x i8>] }
-%struct.uint8x16x3_t = type { [3 x <16 x i8>] }
-%struct.int8x16x2_t = type { [2 x <16 x i8>] }
-%struct.int16x8x2_t = type { [2 x <8 x i16>] }
-%struct.int32x4x2_t = type { [2 x <4 x i32>] }
-%struct.int64x2x2_t = type { [2 x <2 x i64>] }
-%struct.float32x4x2_t = type { [2 x <4 x float>] }
-%struct.float64x2x2_t = type { [2 x <2 x double>] }
-%struct.int8x8x2_t = type { [2 x <8 x i8>] }
-%struct.int16x4x2_t = type { [2 x <4 x i16>] }
-%struct.int32x2x2_t = type { [2 x <2 x i32>] }
-%struct.int64x1x2_t = type { [2 x <1 x i64>] }
-%struct.float32x2x2_t = type { [2 x <2 x float>] }
-%struct.float64x1x2_t = type { [2 x <1 x double>] }
-%struct.int8x16x3_t = type { [3 x <16 x i8>] }
-%struct.int16x8x3_t = type { [3 x <8 x i16>] }
-%struct.int32x4x3_t = type { [3 x <4 x i32>] }
-%struct.int64x2x3_t = type { [3 x <2 x i64>] }
-%struct.float32x4x3_t = type { [3 x <4 x float>] }
-%struct.float64x2x3_t = type { [3 x <2 x double>] }
-%struct.int8x8x3_t = type { [3 x <8 x i8>] }
-%struct.int16x4x3_t = type { [3 x <4 x i16>] }
-%struct.int32x2x3_t = type { [3 x <2 x i32>] }
-%struct.int64x1x3_t = type { [3 x <1 x i64>] }
-%struct.float32x2x3_t = type { [3 x <2 x float>] }
-%struct.float64x1x3_t = type { [3 x <1 x double>] }
-%struct.int8x16x4_t = type { [4 x <16 x i8>] }
-%struct.int16x8x4_t = type { [4 x <8 x i16>] }
-%struct.int32x4x4_t = type { [4 x <4 x i32>] }
-%struct.int64x2x4_t = type { [4 x <2 x i64>] }
-%struct.float32x4x4_t = type { [4 x <4 x float>] }
-%struct.float64x2x4_t = type { [4 x <2 x double>] }
-%struct.int8x8x4_t = type { [4 x <8 x i8>] }
-%struct.int16x4x4_t = type { [4 x <4 x i16>] }
-%struct.int32x2x4_t = type { [4 x <2 x i32>] }
-%struct.int64x1x4_t = type { [4 x <1 x i64>] }
-%struct.float32x2x4_t = type { [4 x <2 x float>] }
-%struct.float64x1x4_t = type { [4 x <1 x double>] }
-
-define <16 x i8> @test_ld_from_poll_v16i8(<16 x i8> %a) {
-; CHECK-LABEL: test_ld_from_poll_v16i8
-; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
-; CHECK-NEXT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}]
-entry:
-  %b = add <16 x i8> %a, <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 2, i8 13, i8 14, i8 15, i8 16>
-  ret <16 x i8> %b
-}
-
-define <8 x i16> @test_ld_from_poll_v8i16(<8 x i16> %a) {
-; CHECK-LABEL: test_ld_from_poll_v8i16
-; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
-; CHECK-NEXT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}]
-entry:
-  %b = add <8 x i16> %a, <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>
-  ret <8 x i16> %b
-}
-
-define <4 x i32> @test_ld_from_poll_v4i32(<4 x i32> %a) {
-; CHECK-LABEL: test_ld_from_poll_v4i32
-; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
-; CHECK-NEXT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}]
-entry:
-  %b = add <4 x i32> %a, <i32 1, i32 2, i32 3, i32 4>
-  ret <4 x i32> %b
-}
-
-define <2 x i64> @test_ld_from_poll_v2i64(<2 x i64> %a) {
-; CHECK-LABEL: test_ld_from_poll_v2i64
-; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
-; CHECK-NEXT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}]
-entry:
-  %b = add <2 x i64> %a, <i64 1, i64 2>
-  ret <2 x i64> %b
-}
-
-define <4 x float> @test_ld_from_poll_v4f32(<4 x float> %a) {
-; CHECK-LABEL: test_ld_from_poll_v4f32
-; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
-; CHECK-NEXT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}]
-entry:
-  %b = fadd <4 x float> %a, <float 1.0, float 2.0, float 3.0, float 4.0>
-  ret <4 x float> %b
-}
-
-define <2 x double> @test_ld_from_poll_v2f64(<2 x double> %a) {
-; CHECK-LABEL: test_ld_from_poll_v2f64
-; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
-; CHECK-NEXT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}]
-entry:
-  %b = fadd <2 x double> %a, <double 1.0, double 2.0>
-  ret <2 x double> %b
-}
-
-define <8 x i8> @test_ld_from_poll_v8i8(<8 x i8> %a) {
-; CHECK-LABEL: test_ld_from_poll_v8i8
-; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
-; CHECK-NEXT: ldr {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}]
-entry:
-  %b = add <8 x i8> %a, <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>
-  ret <8 x i8> %b
-}
-
-define <4 x i16> @test_ld_from_poll_v4i16(<4 x i16> %a) {
-; CHECK-LABEL: test_ld_from_poll_v4i16
-; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
-; CHECK-NEXT: ldr {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}]
-entry:
-  %b = add <4 x i16> %a, <i16 1, i16 2, i16 3, i16 4>
-  ret <4 x i16> %b
-}
-
-define <2 x i32> @test_ld_from_poll_v2i32(<2 x i32> %a) {
-; CHECK-LABEL: test_ld_from_poll_v2i32
-; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
-; CHECK-NEXT: ldr {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}]
-entry:
-  %b = add <2 x i32> %a, <i32 1, i32 2>
-  ret <2 x i32> %b
-}
-
-define <16 x i8> @test_vld1q_dup_s8(i8* %a) {
-; CHECK-LABEL: test_vld1q_dup_s8
-; CHECK: ld1r {{{v[0-9]+}}.16b}, [x0]
-entry:
-  %0 = load i8* %a, align 1
-  %1 = insertelement <16 x i8> undef, i8 %0, i32 0
-  %lane = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> zeroinitializer
-  ret <16 x i8> %lane
-}
-
-define <8 x i16> @test_vld1q_dup_s16(i16* %a) {
-; CHECK-LABEL: test_vld1q_dup_s16
-; CHECK: ld1r {{{v[0-9]+}}.8h}, [x0]
-entry:
-  %0 = load i16* %a, align 2
-  %1 = insertelement <8 x i16> undef, i16 %0, i32 0
-  %lane = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> zeroinitializer
-  ret <8 x i16> %lane
-}
-
-define <4 x i32> @test_vld1q_dup_s32(i32* %a) {
-; CHECK-LABEL: test_vld1q_dup_s32
-; CHECK: ld1r {{{v[0-9]+}}.4s}, [x0]
-entry:
-  %0 = load i32* %a, align 4
-  %1 = insertelement <4 x i32> undef, i32 %0, i32 0
-  %lane = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> zeroinitializer
-  ret <4 x i32> %lane
-}
-
-define <2 x i64> @test_vld1q_dup_s64(i64* %a) {
-; CHECK-LABEL: test_vld1q_dup_s64
-; CHECK: ld1r {{{v[0-9]+}}.2d}, [x0]
-entry:
-  %0 = load i64* %a, align 8
-  %1 = insertelement <2 x i64> undef, i64 %0, i32 0
-  %lane = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> zeroinitializer
-  ret <2 x i64> %lane
-}
-
-define <4 x float> @test_vld1q_dup_f32(float* %a) {
-; CHECK-LABEL: test_vld1q_dup_f32
-; CHECK: ld1r {{{v[0-9]+}}.4s}, [x0]
-entry:
-  %0 = load float* %a, align 4
-  %1 = insertelement <4 x float> undef, float %0, i32 0
-  %lane = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer
-  ret <4 x float> %lane
-}
-
-define <2 x double> @test_vld1q_dup_f64(double* %a) {
-; CHECK-LABEL: test_vld1q_dup_f64
-; CHECK: ld1r {{{v[0-9]+}}.2d}, [x0]
-entry:
-  %0 = load double* %a, align 8
-  %1 = insertelement <2 x double> undef, double %0, i32 0
-  %lane = shufflevector <2 x double> %1, <2 x double> undef, <2 x i32> zeroinitializer
-  ret <2 x double> %lane
-}
-
-define <8 x i8> @test_vld1_dup_s8(i8* %a) {
-; CHECK-LABEL: test_vld1_dup_s8
-; CHECK: ld1r {{{v[0-9]+}}.8b}, [x0]
-entry:
-  %0 = load i8* %a, align 1
-  %1 = insertelement <8 x i8> undef, i8 %0, i32 0
-  %lane = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
-  ret <8 x i8> %lane
-}
-
-define <4 x i16> @test_vld1_dup_s16(i16* %a) {
-; CHECK-LABEL: test_vld1_dup_s16
-; CHECK: ld1r {{{v[0-9]+}}.4h}, [x0]
-entry:
-  %0 = load i16* %a, align 2
-  %1 = insertelement <4 x i16> undef, i16 %0, i32 0
-  %lane = shufflevector <4 x i16> %1, <4 x i16> undef, <4 x i32> zeroinitializer
-  ret <4 x i16> %lane
-}
-
-define <2 x i32> @test_vld1_dup_s32(i32* %a) {
-; CHECK-LABEL: test_vld1_dup_s32
-; CHECK: ld1r {{{v[0-9]+}}.2s}, [x0]
-entry:
-  %0 = load i32* %a, align 4
-  %1 = insertelement <2 x i32> undef, i32 %0, i32 0
-  %lane = shufflevector <2 x i32> %1, <2 x i32> undef, <2 x i32> zeroinitializer
-  ret <2 x i32> %lane
-}
-
-define <1 x i64> @test_vld1_dup_s64(i64* %a) {
-; CHECK-LABEL: test_vld1_dup_s64
-; CHECK: ld1r {{{v[0-9]+}}.1d}, [x0]
-entry:
-  %0 = load i64* %a, align 8
-  %1 = insertelement <1 x i64> undef, i64 %0, i32 0
-  ret <1 x i64> %1
-}
-
-define <2 x float> @test_vld1_dup_f32(float* %a) {
-; CHECK-LABEL: test_vld1_dup_f32
-; CHECK: ld1r {{{v[0-9]+}}.2s}, [x0]
-entry:
-  %0 = load float* %a, align 4
-  %1 = insertelement <2 x float> undef, float %0, i32 0
-  %lane = shufflevector <2 x float> %1, <2 x float> undef, <2 x i32> zeroinitializer
-  ret <2 x float> %lane
-}
-
-define <1 x double> @test_vld1_dup_f64(double* %a) {
-; CHECK-LABEL: test_vld1_dup_f64
-; CHECK: ld1r {{{v[0-9]+}}.1d}, [x0]
-entry:
-  %0 = load double* %a, align 8
-  %1 = insertelement <1 x double> undef, double %0, i32 0
-  ret <1 x double> %1
-}
-
-define <1 x i64> @testDUP.v1i64(i64* %a, i64* %b) #0 {
-; As there is a store operation depending on %1, LD1R pattern can't be selected.
-; So LDR and FMOV should be emitted.
-; CHECK-LABEL: testDUP.v1i64
-; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}]
-; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
-; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}]
-  %1 = load i64* %a, align 8
-  store i64 %1, i64* %b, align 8
-  %vecinit.i = insertelement <1 x i64> undef, i64 %1, i32 0
-  ret <1 x i64> %vecinit.i
-}
-
-define <1 x double> @testDUP.v1f64(double* %a, double* %b) #0 {
-; As there is a store operation depending on %1, LD1R pattern can't be selected.
-; So LDR and FMOV should be emitted.
-; CHECK-LABEL: testDUP.v1f64
-; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}]
-; CHECK: str {{d[0-9]+}}, [{{x[0-9]+}}]
-  %1 = load double* %a, align 8
-  store double %1, double* %b, align 8
-  %vecinit.i = insertelement <1 x double> undef, double %1, i32 0
-  ret <1 x double> %vecinit.i
-}
-
-define %struct.int8x16x2_t @test_vld2q_dup_s8(i8* %a) {
-; CHECK-LABEL: test_vld2q_dup_s8
-; CHECK: ld2r {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, [x0]
-entry:
-  %vld_dup = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2lane.v16i8(i8* %a, <16 x i8> undef, <16 x i8> undef, i32 0, i32 1)
-  %0 = extractvalue { <16 x i8>, <16 x i8> } %vld_dup, 0
-  %lane = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> zeroinitializer
-  %1 = extractvalue { <16 x i8>, <16 x i8> } %vld_dup, 1
-  %lane1 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.int8x16x2_t undef, <16 x i8> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int8x16x2_t %.fca.0.0.insert, <16 x i8> %lane1, 0, 1
-  ret %struct.int8x16x2_t %.fca.0.1.insert
-}
-
-define %struct.int16x8x2_t @test_vld2q_dup_s16(i16* %a) {
-; CHECK-LABEL: test_vld2q_dup_s16
-; CHECK: ld2r {{{v[0-9]+}}.8h, {{v[0-9]+}}.8h}, [x0]
-entry:
-  %0 = bitcast i16* %a to i8*
-  %vld_dup = tail call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2lane.v8i16(i8* %0, <8 x i16> undef, <8 x i16> undef, i32 0, i32 2)
-  %1 = extractvalue { <8 x i16>, <8 x i16> } %vld_dup, 0
-  %lane = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> zeroinitializer
-  %2 = extractvalue { <8 x i16>, <8 x i16> } %vld_dup, 1
-  %lane1 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.int16x8x2_t undef, <8 x i16> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int16x8x2_t %.fca.0.0.insert, <8 x i16> %lane1, 0, 1
-  ret %struct.int16x8x2_t %.fca.0.1.insert
-}
-
-define %struct.int32x4x2_t @test_vld2q_dup_s32(i32* %a) {
-; CHECK-LABEL: test_vld2q_dup_s32
-; CHECK: ld2r {{{v[0-9]+}}.4s, {{v[0-9]+}}.4s}, [x0]
-entry:
-  %0 = bitcast i32* %a to i8*
-  %vld_dup = tail call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32(i8* %0, <4 x i32> undef, <4 x i32> undef, i32 0, i32 4)
-  %1 = extractvalue { <4 x i32>, <4 x i32> } %vld_dup, 0
-  %lane = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> zeroinitializer
-  %2 = extractvalue { <4 x i32>, <4 x i32> } %vld_dup, 1
-  %lane1 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.int32x4x2_t undef, <4 x i32> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int32x4x2_t %.fca.0.0.insert, <4 x i32> %lane1, 0, 1
-  ret %struct.int32x4x2_t %.fca.0.1.insert
-}
-
-define %struct.int64x2x2_t @test_vld2q_dup_s64(i64* %a) {
-; CHECK-LABEL: test_vld2q_dup_s64
-; CHECK: ld2r {{{v[0-9]+}}.2d, {{v[0-9]+}}.2d}, [x0]
-entry:
-  %0 = bitcast i64* %a to i8*
-  %vld_dup = tail call { <2 x i64>, <2 x i64> } @llvm.arm.neon.vld2lane.v2i64(i8* %0, <2 x i64> undef, <2 x i64> undef, i32 0, i32 8)
-  %1 = extractvalue { <2 x i64>, <2 x i64> } %vld_dup, 0
-  %lane = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> zeroinitializer
-  %2 = extractvalue { <2 x i64>, <2 x i64> } %vld_dup, 1
-  %lane1 = shufflevector <2 x i64> %2, <2 x i64> undef, <2 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.int64x2x2_t undef, <2 x i64> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int64x2x2_t %.fca.0.0.insert, <2 x i64> %lane1, 0, 1
-  ret %struct.int64x2x2_t %.fca.0.1.insert
-}
-
-define %struct.float32x4x2_t @test_vld2q_dup_f32(float* %a) {
-; CHECK-LABEL: test_vld2q_dup_f32
-; CHECK: ld2r {{{v[0-9]+}}.4s, {{v[0-9]+}}.4s}, [x0]
-entry:
-  %0 = bitcast float* %a to i8*
-  %vld_dup = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2lane.v4f32(i8* %0, <4 x float> undef, <4 x float> undef, i32 0, i32 4)
-  %1 = extractvalue { <4 x float>, <4 x float> } %vld_dup, 0
-  %lane = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer
-  %2 = extractvalue { <4 x float>, <4 x float> } %vld_dup, 1
-  %lane1 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.float32x4x2_t undef, <4 x float> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float32x4x2_t %.fca.0.0.insert, <4 x float> %lane1, 0, 1
-  ret %struct.float32x4x2_t %.fca.0.1.insert
-}
-
-define %struct.float64x2x2_t @test_vld2q_dup_f64(double* %a) {
-; CHECK-LABEL: test_vld2q_dup_f64
-; CHECK: ld2r {{{v[0-9]+}}.2d, {{v[0-9]+}}.2d}, [x0]
-entry:
-  %0 = bitcast double* %a to i8*
-  %vld_dup = tail call { <2 x double>, <2 x double> } @llvm.arm.neon.vld2lane.v2f64(i8* %0, <2 x double> undef, <2 x double> undef, i32 0, i32 8)
-  %1 = extractvalue { <2 x double>, <2 x double> } %vld_dup, 0
-  %lane = shufflevector <2 x double> %1, <2 x double> undef, <2 x i32> zeroinitializer
-  %2 = extractvalue { <2 x double>, <2 x double> } %vld_dup, 1
-  %lane1 = shufflevector <2 x double> %2, <2 x double> undef, <2 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.float64x2x2_t undef, <2 x double> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float64x2x2_t %.fca.0.0.insert, <2 x double> %lane1, 0, 1
-  ret %struct.float64x2x2_t %.fca.0.1.insert
-}
-
-define %struct.int8x8x2_t @test_vld2_dup_s8(i8* %a) {
-; CHECK-LABEL: test_vld2_dup_s8
-; CHECK: ld2r {{{v[0-9]+}}.8b, {{v[0-9]+}}.8b}, [x0]
-entry:
-  %vld_dup = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8(i8* %a, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1)
-  %0 = extractvalue { <8 x i8>, <8 x i8> } %vld_dup, 0
-  %lane = shufflevector <8 x i8> %0, <8 x i8> undef, <8 x i32> zeroinitializer
-  %1 = extractvalue { <8 x i8>, <8 x i8> } %vld_dup, 1
-  %lane1 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.int8x8x2_t undef, <8 x i8> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int8x8x2_t %.fca.0.0.insert, <8 x i8> %lane1, 0, 1
-  ret %struct.int8x8x2_t %.fca.0.1.insert
-}
-
-define %struct.int16x4x2_t @test_vld2_dup_s16(i16* %a) {
-; CHECK-LABEL: test_vld2_dup_s16
-; CHECK: ld2r {{{v[0-9]+}}.4h, {{v[0-9]+}}.4h}, [x0]
-entry:
-  %0 = bitcast i16* %a to i8*
-  %vld_dup = tail call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16(i8* %0, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
-  %1 = extractvalue { <4 x i16>, <4 x i16> } %vld_dup, 0
-  %lane = shufflevector <4 x i16> %1, <4 x i16> undef, <4 x i32> zeroinitializer
-  %2 = extractvalue { <4 x i16>, <4 x i16> } %vld_dup, 1
-  %lane1 = shufflevector <4 x i16> %2, <4 x i16> undef, <4 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.int16x4x2_t undef, <4 x i16> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int16x4x2_t %.fca.0.0.insert, <4 x i16> %lane1, 0, 1
-  ret %struct.int16x4x2_t %.fca.0.1.insert
-}
-
-define %struct.int32x2x2_t @test_vld2_dup_s32(i32* %a) {
-; CHECK-LABEL: test_vld2_dup_s32
-; CHECK: ld2r {{{v[0-9]+}}.2s, {{v[0-9]+}}.2s}, [x0]
-entry:
-  %0 = bitcast i32* %a to i8*
-  %vld_dup = tail call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2lane.v2i32(i8* %0, <2 x i32> undef, <2 x i32> undef, i32 0, i32 4)
-  %1 = extractvalue { <2 x i32>, <2 x i32> } %vld_dup, 0
-  %lane = shufflevector <2 x i32> %1, <2 x i32> undef, <2 x i32> zeroinitializer
-  %2 = extractvalue { <2 x i32>, <2 x i32> } %vld_dup, 1
-  %lane1 = shufflevector <2 x i32> %2, <2 x i32> undef, <2 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.int32x2x2_t undef, <2 x i32> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int32x2x2_t %.fca.0.0.insert, <2 x i32> %lane1, 0, 1
-  ret %struct.int32x2x2_t %.fca.0.1.insert
-}
-
-define %struct.int64x1x2_t @test_vld2_dup_s64(i64* %a) {
-; CHECK-LABEL: test_vld2_dup_s64
-; CHECK: ld1 {{{v[0-9]+}}.1d, {{v[0-9]+}}.1d}, [x0]
-entry:
-  %0 = bitcast i64* %a to i8*
-  %vld_dup = tail call { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64(i8* %0, i32 8)
-  %vld_dup.fca.0.extract = extractvalue { <1 x i64>, <1 x i64> } %vld_dup, 0
-  %vld_dup.fca.1.extract = extractvalue { <1 x i64>, <1 x i64> } %vld_dup, 1
-  %.fca.0.0.insert = insertvalue %struct.int64x1x2_t undef, <1 x i64> %vld_dup.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int64x1x2_t %.fca.0.0.insert, <1 x i64> %vld_dup.fca.1.extract, 0, 1
-  ret %struct.int64x1x2_t %.fca.0.1.insert
-}
-
-define %struct.float32x2x2_t @test_vld2_dup_f32(float* %a) {
-; CHECK-LABEL: test_vld2_dup_f32
-; CHECK: ld2r {{{v[0-9]+}}.2s, {{v[0-9]+}}.2s}, [x0]
-entry:
-  %0 = bitcast float* %a to i8*
-  %vld_dup = tail call { <2 x float>, <2 x float> } @llvm.arm.neon.vld2lane.v2f32(i8* %0, <2 x float> undef, <2 x float> undef, i32 0, i32 4)
-  %1 = extractvalue { <2 x float>, <2 x float> } %vld_dup, 0
-  %lane = shufflevector <2 x float> %1, <2 x float> undef, <2 x i32> zeroinitializer
-  %2 = extractvalue { <2 x float>, <2 x float> } %vld_dup, 1
-  %lane1 = shufflevector <2 x float> %2, <2 x float> undef, <2 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.float32x2x2_t undef, <2 x float> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float32x2x2_t %.fca.0.0.insert, <2 x float> %lane1, 0, 1
-  ret %struct.float32x2x2_t %.fca.0.1.insert
-}
-
-define %struct.float64x1x2_t @test_vld2_dup_f64(double* %a) {
-; CHECK-LABEL: test_vld2_dup_f64
-; CHECK: ld1 {{{v[0-9]+}}.1d, {{v[0-9]+}}.1d}, [x0]
-entry:
-  %0 = bitcast double* %a to i8*
-  %vld_dup = tail call { <1 x double>, <1 x double> } @llvm.arm.neon.vld2.v1f64(i8* %0, i32 8)
-  %vld_dup.fca.0.extract = extractvalue { <1 x double>, <1 x double> } %vld_dup, 0
-  %vld_dup.fca.1.extract = extractvalue { <1 x double>, <1 x double> } %vld_dup, 1
-  %.fca.0.0.insert = insertvalue %struct.float64x1x2_t undef, <1 x double> %vld_dup.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float64x1x2_t %.fca.0.0.insert, <1 x double> %vld_dup.fca.1.extract, 0, 1
-  ret %struct.float64x1x2_t %.fca.0.1.insert
-}
-
-define %struct.int8x16x3_t @test_vld3q_dup_s8(i8* %a) {
-; CHECK-LABEL: test_vld3q_dup_s8
-; CHECK: ld3r {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, [x0]
-entry:
-  %vld_dup = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3lane.v16i8(i8* %a, <16 x i8> undef, <16 x i8> undef, <16 x i8> undef, i32 0, i32 1)
-  %0 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld_dup, 0
-  %lane = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> zeroinitializer
-  %1 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld_dup, 1
-  %lane1 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> zeroinitializer
-  %2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld_dup, 2
-  %lane2 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.int8x16x3_t undef, <16 x i8> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int8x16x3_t %.fca.0.0.insert, <16 x i8> %lane1, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int8x16x3_t %.fca.0.1.insert, <16 x i8> %lane2, 0, 2
-  ret %struct.int8x16x3_t %.fca.0.2.insert
-}
-
-define %struct.int16x8x3_t @test_vld3q_dup_s16(i16* %a) {
-; CHECK-LABEL: test_vld3q_dup_s16
-; CHECK: ld3r {{{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h}, [x0]
-entry:
-  %0 = bitcast i16* %a to i8*
-  %vld_dup = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16(i8* %0, <8 x i16> undef, <8 x i16> undef, <8 x i16> undef, i32 0, i32 2)
-  %1 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld_dup, 0
-  %lane = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> zeroinitializer
-  %2 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld_dup, 1
-  %lane1 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> zeroinitializer
-  %3 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld_dup, 2
-  %lane2 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.int16x8x3_t undef, <8 x i16> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int16x8x3_t %.fca.0.0.insert, <8 x i16> %lane1, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int16x8x3_t %.fca.0.1.insert, <8 x i16> %lane2, 0, 2
-  ret %struct.int16x8x3_t %.fca.0.2.insert
-}
-
-define %struct.int32x4x3_t @test_vld3q_dup_s32(i32* %a) {
-; CHECK-LABEL: test_vld3q_dup_s32
-; CHECK: ld3r {{{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s}, [x0]
-entry:
-  %0 = bitcast i32* %a to i8*
-  %vld_dup = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3lane.v4i32(i8* %0, <4 x i32> undef, <4 x i32> undef, <4 x i32> undef, i32 0, i32 4)
-  %1 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld_dup, 0
-  %lane = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> zeroinitializer
-  %2 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld_dup, 1
-  %lane1 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> zeroinitializer
-  %3 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld_dup, 2
-  %lane2 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.int32x4x3_t undef, <4 x i32> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int32x4x3_t %.fca.0.0.insert, <4 x i32> %lane1, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int32x4x3_t %.fca.0.1.insert, <4 x i32> %lane2, 0, 2
-  ret %struct.int32x4x3_t %.fca.0.2.insert
-}
-
-define %struct.int64x2x3_t @test_vld3q_dup_s64(i64* %a) {
-; CHECK-LABEL: test_vld3q_dup_s64
-; CHECK: ld3r {{{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d}, [x0]
-entry:
-  %0 = bitcast i64* %a to i8*
-  %vld_dup = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld3lane.v2i64(i8* %0, <2 x i64> undef, <2 x i64> undef, <2 x i64> undef, i32 0, i32 8)
-  %1 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld_dup, 0
-  %lane = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> zeroinitializer
-  %2 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld_dup, 1
-  %lane1 = shufflevector <2 x i64> %2, <2 x i64> undef, <2 x i32> zeroinitializer
-  %3 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld_dup, 2
-  %lane2 = shufflevector <2 x i64> %3, <2 x i64> undef, <2 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.int64x2x3_t undef, <2 x i64> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int64x2x3_t %.fca.0.0.insert, <2 x i64> %lane1, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int64x2x3_t %.fca.0.1.insert, <2 x i64> %lane2, 0, 2
-  ret %struct.int64x2x3_t %.fca.0.2.insert
-}
-
-define %struct.float32x4x3_t @test_vld3q_dup_f32(float* %a) {
-; CHECK-LABEL: test_vld3q_dup_f32
-; CHECK: ld3r {{{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s}, [x0]
-entry:
-  %0 = bitcast float* %a to i8*
-  %vld_dup = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3lane.v4f32(i8* %0, <4 x float> undef, <4 x float> undef, <4 x float> undef, i32 0, i32 4)
-  %1 = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld_dup, 0
-  %lane = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer
-  %2 = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld_dup, 1
-  %lane1 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> zeroinitializer
-  %3 = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld_dup, 2
-  %lane2 = shufflevector <4 x float> %3, <4 x float> undef, <4 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.float32x4x3_t undef, <4 x float> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float32x4x3_t %.fca.0.0.insert, <4 x float> %lane1, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.float32x4x3_t %.fca.0.1.insert, <4 x float> %lane2, 0, 2
-  ret %struct.float32x4x3_t %.fca.0.2.insert
-}
-
-define %struct.float64x2x3_t @test_vld3q_dup_f64(double* %a) {
-; CHECK-LABEL: test_vld3q_dup_f64
-; CHECK: ld3r {{{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d}, [x0]
-entry:
-  %0 = bitcast double* %a to i8*
-  %vld_dup = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld3lane.v2f64(i8* %0, <2 x double> undef, <2 x double> undef, <2 x double> undef, i32 0, i32 8)
-  %1 = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld_dup, 0
-  %lane = shufflevector <2 x double> %1, <2 x double> undef, <2 x i32> zeroinitializer
-  %2 = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld_dup, 1
-  %lane1 = shufflevector <2 x double> %2, <2 x double> undef, <2 x i32> zeroinitializer
-  %3 = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld_dup, 2
-  %lane2 = shufflevector <2 x double> %3, <2 x double> undef, <2 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.float64x2x3_t undef, <2 x double> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float64x2x3_t %.fca.0.0.insert, <2 x double> %lane1, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.float64x2x3_t %.fca.0.1.insert, <2 x double> %lane2, 0, 2
-  ret %struct.float64x2x3_t %.fca.0.2.insert
-}
-
-define %struct.int8x8x3_t @test_vld3_dup_s8(i8* %a) {
-; CHECK-LABEL: test_vld3_dup_s8
-; CHECK: ld3r {{{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b}, [x0]
-entry:
-  %vld_dup = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8(i8* %a, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1)
-  %0 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld_dup, 0
-  %lane = shufflevector <8 x i8> %0, <8 x i8> undef, <8 x i32> zeroinitializer
-  %1 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld_dup, 1
-  %lane1 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
-  %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld_dup, 2
-  %lane2 = shufflevector <8 x i8> %2, <8 x i8> undef, <8 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.int8x8x3_t undef, <8 x i8> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int8x8x3_t %.fca.0.0.insert, <8 x i8> %lane1, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int8x8x3_t %.fca.0.1.insert, <8 x i8> %lane2, 0, 2
-  ret %struct.int8x8x3_t %.fca.0.2.insert
-}
-
-define %struct.int16x4x3_t @test_vld3_dup_s16(i16* %a) {
-; CHECK-LABEL: test_vld3_dup_s16
-; CHECK: ld3r {{{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h}, [x0]
-entry:
-  %0 = bitcast i16* %a to i8*
-  %vld_dup = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16(i8* %0, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
-  %1 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld_dup, 0
-  %lane = shufflevector <4 x i16> %1, <4 x i16> undef, <4 x i32> zeroinitializer
-  %2 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld_dup, 1
-  %lane1 = shufflevector <4 x i16> %2, <4 x i16> undef, <4 x i32> zeroinitializer
-  %3 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld_dup, 2
-  %lane2 = shufflevector <4 x i16> %3, <4 x i16> undef, <4 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.int16x4x3_t undef, <4 x i16> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int16x4x3_t %.fca.0.0.insert, <4 x i16> %lane1, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int16x4x3_t %.fca.0.1.insert, <4 x i16> %lane2, 0, 2
-  ret %struct.int16x4x3_t %.fca.0.2.insert
-}
-
-define %struct.int32x2x3_t @test_vld3_dup_s32(i32* %a) {
-; CHECK-LABEL: test_vld3_dup_s32
-; CHECK: ld3r {{{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s}, [x0]
-entry:
-  %0 = bitcast i32* %a to i8*
-  %vld_dup = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3lane.v2i32(i8* %0, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 4)
-  %1 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld_dup, 0
-  %lane = shufflevector <2 x i32> %1, <2 x i32> undef, <2 x i32> zeroinitializer
-  %2 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld_dup, 1
-  %lane1 = shufflevector <2 x i32> %2, <2 x i32> undef, <2 x i32> zeroinitializer
-  %3 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld_dup, 2
-  %lane2 = shufflevector <2 x i32> %3, <2 x i32> undef, <2 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.int32x2x3_t undef, <2 x i32> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int32x2x3_t %.fca.0.0.insert, <2 x i32> %lane1, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int32x2x3_t %.fca.0.1.insert, <2 x i32> %lane2, 0, 2
-  ret %struct.int32x2x3_t %.fca.0.2.insert
-}
-
-define %struct.int64x1x3_t @test_vld3_dup_s64(i64* %a) {
-; CHECK-LABEL: test_vld3_dup_s64
-; CHECK: ld1 {{{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d}, [x0]
-entry:
-  %0 = bitcast i64* %a to i8*
-  %vld_dup = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64(i8* %0, i32 8)
-  %vld_dup.fca.0.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld_dup, 0
-  %vld_dup.fca.1.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld_dup, 1
-  %vld_dup.fca.2.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld_dup, 2
-  %.fca.0.0.insert = insertvalue %struct.int64x1x3_t undef, <1 x i64> %vld_dup.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int64x1x3_t %.fca.0.0.insert, <1 x i64> %vld_dup.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int64x1x3_t %.fca.0.1.insert, <1 x i64> %vld_dup.fca.2.extract, 0, 2
-  ret %struct.int64x1x3_t %.fca.0.2.insert
-}
-
-define %struct.float32x2x3_t @test_vld3_dup_f32(float* %a) {
-; CHECK-LABEL: test_vld3_dup_f32
-; CHECK: ld3r {{{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s}, [x0]
-entry:
-  %0 = bitcast float* %a to i8*
-  %vld_dup = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3lane.v2f32(i8* %0, <2 x float> undef, <2 x float> undef, <2 x float> undef, i32 0, i32 4)
-  %1 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld_dup, 0
-  %lane = shufflevector <2 x float> %1, <2 x float> undef, <2 x i32> zeroinitializer
-  %2 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld_dup, 1
-  %lane1 = shufflevector <2 x float> %2, <2 x float> undef, <2 x i32> zeroinitializer
-  %3 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld_dup, 2
-  %lane2 = shufflevector <2 x float> %3, <2 x float> undef, <2 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.float32x2x3_t undef, <2 x float> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float32x2x3_t %.fca.0.0.insert, <2 x float> %lane1, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.float32x2x3_t %.fca.0.1.insert, <2 x float> %lane2, 0, 2
-  ret %struct.float32x2x3_t %.fca.0.2.insert
-}
-
-define %struct.float64x1x3_t @test_vld3_dup_f64(double* %a) {
-; CHECK-LABEL: test_vld3_dup_f64
-; CHECK: ld1 {{{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d}, [x0]
-entry:
-  %0 = bitcast double* %a to i8*
-  %vld_dup = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld3.v1f64(i8* %0, i32 8)
-  %vld_dup.fca.0.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld_dup, 0
-  %vld_dup.fca.1.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld_dup, 1
-  %vld_dup.fca.2.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld_dup, 2
-  %.fca.0.0.insert = insertvalue %struct.float64x1x3_t undef, <1 x double> %vld_dup.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float64x1x3_t %.fca.0.0.insert, <1 x double> %vld_dup.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.float64x1x3_t %.fca.0.1.insert, <1 x double> %vld_dup.fca.2.extract, 0, 2
-  ret %struct.float64x1x3_t %.fca.0.2.insert
-}
-
-define %struct.int8x16x4_t @test_vld4q_dup_s8(i8* %a) {
-; CHECK-LABEL: test_vld4q_dup_s8
-; CHECK: ld4r {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, [x0]
-entry:
-  %vld_dup = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4lane.v16i8(i8* %a, <16 x i8> undef, <16 x i8> undef, <16 x i8> undef, <16 x i8> undef, i32 0, i32 1)
-  %0 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld_dup, 0
-  %lane = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> zeroinitializer
-  %1 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld_dup, 1
-  %lane1 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> zeroinitializer
-  %2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld_dup, 2
-  %lane2 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> zeroinitializer
-  %3 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld_dup, 3
-  %lane3 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.int8x16x4_t undef, <16 x i8> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int8x16x4_t %.fca.0.0.insert, <16 x i8> %lane1, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int8x16x4_t %.fca.0.1.insert, <16 x i8> %lane2, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.int8x16x4_t %.fca.0.2.insert, <16 x i8> %lane3, 0, 3
-  ret %struct.int8x16x4_t %.fca.0.3.insert
-}
-
-define %struct.int16x8x4_t @test_vld4q_dup_s16(i16* %a) {
-; CHECK-LABEL: test_vld4q_dup_s16
-; CHECK: ld4r {{{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h}, [x0]
-entry:
-  %0 = bitcast i16* %a to i8*
-  %vld_dup = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4lane.v8i16(i8* %0, <8 x i16> undef, <8 x i16> undef, <8 x i16> undef, <8 x i16> undef, i32 0, i32 2)
-  %1 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld_dup, 0
-  %lane = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> zeroinitializer
-  %2 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld_dup, 1
-  %lane1 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> zeroinitializer
-  %3 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld_dup, 2
-  %lane2 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> zeroinitializer
-  %4 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld_dup, 3
-  %lane3 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.int16x8x4_t undef, <8 x i16> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int16x8x4_t %.fca.0.0.insert, <8 x i16> %lane1, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int16x8x4_t %.fca.0.1.insert, <8 x i16> %lane2, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.int16x8x4_t %.fca.0.2.insert, <8 x i16> %lane3, 0, 3
-  ret %struct.int16x8x4_t %.fca.0.3.insert
-}
-
-define %struct.int32x4x4_t @test_vld4q_dup_s32(i32* %a) {
-; CHECK-LABEL: test_vld4q_dup_s32
-; CHECK: ld4r {{{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s}, [x0]
-entry:
-  %0 = bitcast i32* %a to i8*
-  %vld_dup = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4lane.v4i32(i8* %0, <4 x i32> undef, <4 x i32> undef, <4 x i32> undef, <4 x i32> undef, i32 0, i32 4)
-  %1 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld_dup, 0
-  %lane = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> zeroinitializer
-  %2 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld_dup, 1
-  %lane1 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> zeroinitializer
-  %3 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld_dup, 2
-  %lane2 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer
-  %4 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld_dup, 3
-  %lane3 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.int32x4x4_t undef, <4 x i32> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int32x4x4_t %.fca.0.0.insert, <4 x i32> %lane1, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int32x4x4_t %.fca.0.1.insert, <4 x i32> %lane2, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.int32x4x4_t %.fca.0.2.insert, <4 x i32> %lane3, 0, 3
-  ret %struct.int32x4x4_t %.fca.0.3.insert
-}
-
-define %struct.int64x2x4_t @test_vld4q_dup_s64(i64* %a) {
-; CHECK-LABEL: test_vld4q_dup_s64
-; CHECK: ld4r {{{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d}, [x0]
-entry:
-  %0 = bitcast i64* %a to i8*
-  %vld_dup = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld4lane.v2i64(i8* %0, <2 x i64> undef, <2 x i64> undef, <2 x i64> undef, <2 x i64> undef, i32 0, i32 8)
-  %1 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld_dup, 0
-  %lane = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> zeroinitializer
-  %2 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld_dup, 1
-  %lane1 = shufflevector <2 x i64> %2, <2 x i64> undef, <2 x i32> zeroinitializer
-  %3 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld_dup, 2
-  %lane2 = shufflevector <2 x i64> %3, <2 x i64> undef, <2 x i32> zeroinitializer
-  %4 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld_dup, 3
-  %lane3 = shufflevector <2 x i64> %4, <2 x i64> undef, <2 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.int64x2x4_t undef, <2 x i64> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int64x2x4_t %.fca.0.0.insert, <2 x i64> %lane1, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int64x2x4_t %.fca.0.1.insert, <2 x i64> %lane2, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.int64x2x4_t %.fca.0.2.insert, <2 x i64> %lane3, 0, 3
-  ret %struct.int64x2x4_t %.fca.0.3.insert
-}
-
-define %struct.float32x4x4_t @test_vld4q_dup_f32(float* %a) {
-; CHECK-LABEL: test_vld4q_dup_f32
-; CHECK: ld4r {{{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s}, [x0]
-entry:
-  %0 = bitcast float* %a to i8*
-  %vld_dup = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4lane.v4f32(i8* %0, <4 x float> undef, <4 x float> undef, <4 x float> undef, <4 x float> undef, i32 0, i32 4)
-  %1 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld_dup, 0
-  %lane = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer
-  %2 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld_dup, 1
-  %lane1 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> zeroinitializer
-  %3 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld_dup, 2
-  %lane2 = shufflevector <4 x float> %3, <4 x float> undef, <4 x i32> zeroinitializer
-  %4 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld_dup, 3
-  %lane3 = shufflevector <4 x float> %4, <4 x float> undef, <4 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.float32x4x4_t undef, <4 x float> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float32x4x4_t %.fca.0.0.insert, <4 x float> %lane1, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.float32x4x4_t %.fca.0.1.insert, <4 x float> %lane2, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.float32x4x4_t %.fca.0.2.insert, <4 x float> %lane3, 0, 3
-  ret %struct.float32x4x4_t %.fca.0.3.insert
-}
-
-define %struct.float64x2x4_t @test_vld4q_dup_f64(double* %a) {
-; CHECK-LABEL: test_vld4q_dup_f64
-; CHECK: ld4r {{{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d}, [x0]
-entry:
-  %0 = bitcast double* %a to i8*
-  %vld_dup = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4lane.v2f64(i8* %0, <2 x double> undef, <2 x double> undef, <2 x double> undef, <2 x double> undef, i32 0, i32 8)
-  %1 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld_dup, 0
-  %lane = shufflevector <2 x double> %1, <2 x double> undef, <2 x i32> zeroinitializer
-  %2 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld_dup, 1
-  %lane1 = shufflevector <2 x double> %2, <2 x double> undef, <2 x i32> zeroinitializer
-  %3 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld_dup, 2
-  %lane2 = shufflevector <2 x double> %3, <2 x double> undef, <2 x i32> zeroinitializer
-  %4 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld_dup, 3
-  %lane3 = shufflevector <2 x double> %4, <2 x double> undef, <2 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.float64x2x4_t undef, <2 x double> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float64x2x4_t %.fca.0.0.insert, <2 x double> %lane1, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.float64x2x4_t %.fca.0.1.insert, <2 x double> %lane2, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.float64x2x4_t %.fca.0.2.insert, <2 x double> %lane3, 0, 3
-  ret %struct.float64x2x4_t %.fca.0.3.insert
-}
-
-define %struct.int8x8x4_t @test_vld4_dup_s8(i8* %a) {
-; CHECK-LABEL: test_vld4_dup_s8
-; CHECK: ld4r {{{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b}, [x0]
-entry:
-  %vld_dup = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4lane.v8i8(i8* %a, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1)
-  %0 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld_dup, 0
-  %lane = shufflevector <8 x i8> %0, <8 x i8> undef, <8 x i32> zeroinitializer
-  %1 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld_dup, 1
-  %lane1 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
-  %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld_dup, 2
-  %lane2 = shufflevector <8 x i8> %2, <8 x i8> undef, <8 x i32> zeroinitializer
-  %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld_dup, 3
-  %lane3 = shufflevector <8 x i8> %3, <8 x i8> undef, <8 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.int8x8x4_t undef, <8 x i8> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int8x8x4_t %.fca.0.0.insert, <8 x i8> %lane1, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int8x8x4_t %.fca.0.1.insert, <8 x i8> %lane2, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.int8x8x4_t %.fca.0.2.insert, <8 x i8> %lane3, 0, 3
-  ret %struct.int8x8x4_t %.fca.0.3.insert
-}
-
-define %struct.int16x4x4_t @test_vld4_dup_s16(i16* %a) {
-; CHECK-LABEL: test_vld4_dup_s16
-; CHECK: ld4r {{{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h}, [x0]
-entry:
-  %0 = bitcast i16* %a to i8*
-  %vld_dup = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16(i8* %0, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
-  %1 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld_dup, 0
-  %lane = shufflevector <4 x i16> %1, <4 x i16> undef, <4 x i32> zeroinitializer
-  %2 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld_dup, 1
-  %lane1 = shufflevector <4 x i16> %2, <4 x i16> undef, <4 x i32> zeroinitializer
-  %3 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld_dup, 2
-  %lane2 = shufflevector <4 x i16> %3, <4 x i16> undef, <4 x i32> zeroinitializer
-  %4 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld_dup, 3
-  %lane3 = shufflevector <4 x i16> %4, <4 x i16> undef, <4 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.int16x4x4_t undef, <4 x i16> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int16x4x4_t %.fca.0.0.insert, <4 x i16> %lane1, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int16x4x4_t %.fca.0.1.insert, <4 x i16> %lane2, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.int16x4x4_t %.fca.0.2.insert, <4 x i16> %lane3, 0, 3
-  ret %struct.int16x4x4_t %.fca.0.3.insert
-}
-
-define %struct.int32x2x4_t @test_vld4_dup_s32(i32* %a) {
-; CHECK-LABEL: test_vld4_dup_s32
-; CHECK: ld4r {{{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s}, [x0]
-entry:
-  %0 = bitcast i32* %a to i8*
-  %vld_dup = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32(i8* %0, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 4)
-  %1 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld_dup, 0
-  %lane = shufflevector <2 x i32> %1, <2 x i32> undef, <2 x i32> zeroinitializer
-  %2 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld_dup, 1
-  %lane1 = shufflevector <2 x i32> %2, <2 x i32> undef, <2 x i32> zeroinitializer
-  %3 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld_dup, 2
-  %lane2 = shufflevector <2 x i32> %3, <2 x i32> undef, <2 x i32> zeroinitializer
-  %4 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld_dup, 3
-  %lane3 = shufflevector <2 x i32> %4, <2 x i32> undef, <2 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.int32x2x4_t undef, <2 x i32> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int32x2x4_t %.fca.0.0.insert, <2 x i32> %lane1, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int32x2x4_t %.fca.0.1.insert, <2 x i32> %lane2, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.int32x2x4_t %.fca.0.2.insert, <2 x i32> %lane3, 0, 3
-  ret %struct.int32x2x4_t %.fca.0.3.insert
-}
-
-define %struct.int64x1x4_t @test_vld4_dup_s64(i64* %a) {
-; CHECK-LABEL: test_vld4_dup_s64
-; CHECK: ld1 {{{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d}, [x0]
-entry:
-  %0 = bitcast i64* %a to i8*
-  %vld_dup = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64(i8* %0, i32 8)
-  %vld_dup.fca.0.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld_dup, 0
-  %vld_dup.fca.1.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld_dup, 1
-  %vld_dup.fca.2.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld_dup, 2
-  %vld_dup.fca.3.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld_dup, 3
-  %.fca.0.0.insert = insertvalue %struct.int64x1x4_t undef, <1 x i64> %vld_dup.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int64x1x4_t %.fca.0.0.insert, <1 x i64> %vld_dup.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int64x1x4_t %.fca.0.1.insert, <1 x i64> %vld_dup.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.int64x1x4_t %.fca.0.2.insert, <1 x i64> %vld_dup.fca.3.extract, 0, 3
-  ret %struct.int64x1x4_t %.fca.0.3.insert
-}
-
-define %struct.float32x2x4_t @test_vld4_dup_f32(float* %a) {
-; CHECK-LABEL: test_vld4_dup_f32
-; CHECK: ld4r {{{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s}, [x0]
-entry:
-  %0 = bitcast float* %a to i8*
-  %vld_dup = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4lane.v2f32(i8* %0, <2 x float> undef, <2 x float> undef, <2 x float> undef, <2 x float> undef, i32 0, i32 4)
-  %1 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld_dup, 0
-  %lane = shufflevector <2 x float> %1, <2 x float> undef, <2 x i32> zeroinitializer
-  %2 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld_dup, 1
-  %lane1 = shufflevector <2 x float> %2, <2 x float> undef, <2 x i32> zeroinitializer
-  %3 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld_dup, 2
-  %lane2 = shufflevector <2 x float> %3, <2 x float> undef, <2 x i32> zeroinitializer
-  %4 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld_dup, 3
-  %lane3 = shufflevector <2 x float> %4, <2 x float> undef, <2 x i32> zeroinitializer
-  %.fca.0.0.insert = insertvalue %struct.float32x2x4_t undef, <2 x float> %lane, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float32x2x4_t %.fca.0.0.insert, <2 x float> %lane1, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.float32x2x4_t %.fca.0.1.insert, <2 x float> %lane2, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.float32x2x4_t %.fca.0.2.insert, <2 x float> %lane3, 0, 3
-  ret %struct.float32x2x4_t %.fca.0.3.insert
-}
-
-define %struct.float64x1x4_t @test_vld4_dup_f64(double* %a) {
-; CHECK-LABEL: test_vld4_dup_f64
-; CHECK: ld1 {{{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d}, [x0]
-entry:
-  %0 = bitcast double* %a to i8*
-  %vld_dup = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld4.v1f64(i8* %0, i32 8)
-  %vld_dup.fca.0.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld_dup, 0
-  %vld_dup.fca.1.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld_dup, 1
-  %vld_dup.fca.2.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld_dup, 2
-  %vld_dup.fca.3.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld_dup, 3
-  %.fca.0.0.insert = insertvalue %struct.float64x1x4_t undef, <1 x double> %vld_dup.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float64x1x4_t %.fca.0.0.insert, <1 x double> %vld_dup.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.float64x1x4_t %.fca.0.1.insert, <1 x double> %vld_dup.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.float64x1x4_t %.fca.0.2.insert, <1 x double> %vld_dup.fca.3.extract, 0, 3
-  ret %struct.float64x1x4_t %.fca.0.3.insert
-}
-
-define <16 x i8> @test_vld1q_lane_s8(i8* %a, <16 x i8> %b) {
-; CHECK-LABEL: test_vld1q_lane_s8
-; CHECK: ld1 {{{v[0-9]+}}.b}[{{[0-9]+}}], [x0]
-entry:
-  %0 = load i8* %a, align 1
-  %vld1_lane = insertelement <16 x i8> %b, i8 %0, i32 15
-  ret <16 x i8> %vld1_lane
-}
-
-define <8 x i16> @test_vld1q_lane_s16(i16* %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vld1q_lane_s16
-; CHECK: ld1 {{{v[0-9]+}}.h}[{{[0-9]+}}], [x0]
-entry:
-  %0 = load i16* %a, align 2
-  %vld1_lane = insertelement <8 x i16> %b, i16 %0, i32 7
-  ret <8 x i16> %vld1_lane
-}
-
-define <4 x i32> @test_vld1q_lane_s32(i32* %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vld1q_lane_s32
-; CHECK: ld1 {{{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
-entry:
-  %0 = load i32* %a, align 4
-  %vld1_lane = insertelement <4 x i32> %b, i32 %0, i32 3
-  ret <4 x i32> %vld1_lane
-}
-
-define <2 x i64> @test_vld1q_lane_s64(i64* %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vld1q_lane_s64
-; CHECK: ld1 {{{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
-entry:
-  %0 = load i64* %a, align 8
-  %vld1_lane = insertelement <2 x i64> %b, i64 %0, i32 1
-  ret <2 x i64> %vld1_lane
-}
-
-define <4 x float> @test_vld1q_lane_f32(float* %a, <4 x float> %b) {
-; CHECK-LABEL: test_vld1q_lane_f32
-; CHECK: ld1 {{{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
-entry:
-  %0 = load float* %a, align 4
-  %vld1_lane = insertelement <4 x float> %b, float %0, i32 3
-  ret <4 x float> %vld1_lane
-}
-
-define <2 x double> @test_vld1q_lane_f64(double* %a, <2 x double> %b) {
-; CHECK-LABEL: test_vld1q_lane_f64
-; CHECK: ld1 {{{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
-entry:
-  %0 = load double* %a, align 8
-  %vld1_lane = insertelement <2 x double> %b, double %0, i32 1
-  ret <2 x double> %vld1_lane
-}
-
-define <8 x i8> @test_vld1_lane_s8(i8* %a, <8 x i8> %b) {
-; CHECK-LABEL: test_vld1_lane_s8
-; CHECK: ld1 {{{v[0-9]+}}.b}[{{[0-9]+}}], [x0]
-entry:
-  %0 = load i8* %a, align 1
-  %vld1_lane = insertelement <8 x i8> %b, i8 %0, i32 7
-  ret <8 x i8> %vld1_lane
-}
-
-define <4 x i16> @test_vld1_lane_s16(i16* %a, <4 x i16> %b) {
-; CHECK-LABEL: test_vld1_lane_s16
-; CHECK: ld1 {{{v[0-9]+}}.h}[{{[0-9]+}}], [x0]
-entry:
-  %0 = load i16* %a, align 2
-  %vld1_lane = insertelement <4 x i16> %b, i16 %0, i32 3
-  ret <4 x i16> %vld1_lane
-}
-
-define <2 x i32> @test_vld1_lane_s32(i32* %a, <2 x i32> %b) {
-; CHECK-LABEL: test_vld1_lane_s32
-; CHECK: ld1 {{{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
-entry:
-  %0 = load i32* %a, align 4
-  %vld1_lane = insertelement <2 x i32> %b, i32 %0, i32 1
-  ret <2 x i32> %vld1_lane
-}
-
-define <1 x i64> @test_vld1_lane_s64(i64* %a, <1 x i64> %b) {
-; CHECK-LABEL: test_vld1_lane_s64
-; CHECK: ld1r {{{v[0-9]+}}.1d}, [x0]
-entry:
-  %0 = load i64* %a, align 8
-  %vld1_lane = insertelement <1 x i64> undef, i64 %0, i32 0
-  ret <1 x i64> %vld1_lane
-}
-
-define <2 x float> @test_vld1_lane_f32(float* %a, <2 x float> %b) {
-; CHECK-LABEL: test_vld1_lane_f32
-; CHECK: ld1 {{{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
-entry:
-  %0 = load float* %a, align 4
-  %vld1_lane = insertelement <2 x float> %b, float %0, i32 1
-  ret <2 x float> %vld1_lane
-}
-
-define <1 x double> @test_vld1_lane_f64(double* %a, <1 x double> %b) {
-; CHECK-LABEL: test_vld1_lane_f64
-; CHECK: ld1r {{{v[0-9]+}}.1d}, [x0]
-entry:
-  %0 = load double* %a, align 8
-  %vld1_lane = insertelement <1 x double> undef, double %0, i32 0
-  ret <1 x double> %vld1_lane
-}
-
-define %struct.int16x8x2_t @test_vld2q_lane_s16(i16* %a, [2 x <8 x i16>] %b.coerce) {
-; CHECK-LABEL: test_vld2q_lane_s16
-; CHECK: ld2 {{{v[0-9]+}}.h, {{v[0-9]+}}.h}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [2 x <8 x i16>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <8 x i16>] %b.coerce, 1
-  %0 = bitcast i16* %a to i8*
-  %vld2_lane = tail call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2lane.v8i16(i8* %0, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, i32 7, i32 2)
-  %vld2_lane.fca.0.extract = extractvalue { <8 x i16>, <8 x i16> } %vld2_lane, 0
-  %vld2_lane.fca.1.extract = extractvalue { <8 x i16>, <8 x i16> } %vld2_lane, 1
-  %.fca.0.0.insert = insertvalue %struct.int16x8x2_t undef, <8 x i16> %vld2_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int16x8x2_t %.fca.0.0.insert, <8 x i16> %vld2_lane.fca.1.extract, 0, 1
-  ret %struct.int16x8x2_t %.fca.0.1.insert
-}
-
-define %struct.int32x4x2_t @test_vld2q_lane_s32(i32* %a, [2 x <4 x i32>] %b.coerce) {
-; CHECK-LABEL: test_vld2q_lane_s32
-; CHECK: ld2 {{{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [2 x <4 x i32>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <4 x i32>] %b.coerce, 1
-  %0 = bitcast i32* %a to i8*
-  %vld2_lane = tail call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32(i8* %0, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, i32 3, i32 4)
-  %vld2_lane.fca.0.extract = extractvalue { <4 x i32>, <4 x i32> } %vld2_lane, 0
-  %vld2_lane.fca.1.extract = extractvalue { <4 x i32>, <4 x i32> } %vld2_lane, 1
-  %.fca.0.0.insert = insertvalue %struct.int32x4x2_t undef, <4 x i32> %vld2_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int32x4x2_t %.fca.0.0.insert, <4 x i32> %vld2_lane.fca.1.extract, 0, 1
-  ret %struct.int32x4x2_t %.fca.0.1.insert
-}
-
-define %struct.int64x2x2_t @test_vld2q_lane_s64(i64* %a, [2 x <2 x i64>] %b.coerce) {
-; CHECK-LABEL: test_vld2q_lane_s64
-; CHECK: ld2 {{{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [2 x <2 x i64>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <2 x i64>] %b.coerce, 1
-  %0 = bitcast i64* %a to i8*
-  %vld2_lane = tail call { <2 x i64>, <2 x i64> } @llvm.arm.neon.vld2lane.v2i64(i8* %0, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, i32 1, i32 8)
-  %vld2_lane.fca.0.extract = extractvalue { <2 x i64>, <2 x i64> } %vld2_lane, 0
-  %vld2_lane.fca.1.extract = extractvalue { <2 x i64>, <2 x i64> } %vld2_lane, 1
-  %.fca.0.0.insert = insertvalue %struct.int64x2x2_t undef, <2 x i64> %vld2_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int64x2x2_t %.fca.0.0.insert, <2 x i64> %vld2_lane.fca.1.extract, 0, 1
-  ret %struct.int64x2x2_t %.fca.0.1.insert
-}
-
-define %struct.float32x4x2_t @test_vld2q_lane_f32(float* %a, [2 x <4 x float>] %b.coerce) {
-; CHECK-LABEL: test_vld2q_lane_f32
-; CHECK: ld2 {{{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [2 x <4 x float>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <4 x float>] %b.coerce, 1
-  %0 = bitcast float* %a to i8*
-  %vld2_lane = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2lane.v4f32(i8* %0, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, i32 3, i32 4)
-  %vld2_lane.fca.0.extract = extractvalue { <4 x float>, <4 x float> } %vld2_lane, 0
-  %vld2_lane.fca.1.extract = extractvalue { <4 x float>, <4 x float> } %vld2_lane, 1
-  %.fca.0.0.insert = insertvalue %struct.float32x4x2_t undef, <4 x float> %vld2_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float32x4x2_t %.fca.0.0.insert, <4 x float> %vld2_lane.fca.1.extract, 0, 1
-  ret %struct.float32x4x2_t %.fca.0.1.insert
-}
-
-define %struct.float64x2x2_t @test_vld2q_lane_f64(double* %a, [2 x <2 x double>] %b.coerce) {
-; CHECK-LABEL: test_vld2q_lane_f64
-; CHECK: ld2 {{{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [2 x <2 x double>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <2 x double>] %b.coerce, 1
-  %0 = bitcast double* %a to i8*
-  %vld2_lane = tail call { <2 x double>, <2 x double> } @llvm.arm.neon.vld2lane.v2f64(i8* %0, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, i32 1, i32 8)
-  %vld2_lane.fca.0.extract = extractvalue { <2 x double>, <2 x double> } %vld2_lane, 0
-  %vld2_lane.fca.1.extract = extractvalue { <2 x double>, <2 x double> } %vld2_lane, 1
-  %.fca.0.0.insert = insertvalue %struct.float64x2x2_t undef, <2 x double> %vld2_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float64x2x2_t %.fca.0.0.insert, <2 x double> %vld2_lane.fca.1.extract, 0, 1
-  ret %struct.float64x2x2_t %.fca.0.1.insert
-}
-
-define %struct.int8x8x2_t @test_vld2_lane_s8(i8* %a, [2 x <8 x i8>] %b.coerce) {
-; CHECK-LABEL: test_vld2_lane_s8
-; CHECK: ld2 {{{v[0-9]+}}.b, {{v[0-9]+}}.b}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [2 x <8 x i8>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <8 x i8>] %b.coerce, 1
-  %vld2_lane = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, i32 7, i32 1)
-  %vld2_lane.fca.0.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2_lane, 0
-  %vld2_lane.fca.1.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2_lane, 1
-  %.fca.0.0.insert = insertvalue %struct.int8x8x2_t undef, <8 x i8> %vld2_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int8x8x2_t %.fca.0.0.insert, <8 x i8> %vld2_lane.fca.1.extract, 0, 1
-  ret %struct.int8x8x2_t %.fca.0.1.insert
-}
-
-define %struct.int16x4x2_t @test_vld2_lane_s16(i16* %a, [2 x <4 x i16>] %b.coerce) {
-; CHECK-LABEL: test_vld2_lane_s16
-; CHECK: ld2 {{{v[0-9]+}}.h, {{v[0-9]+}}.h}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [2 x <4 x i16>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <4 x i16>] %b.coerce, 1
-  %0 = bitcast i16* %a to i8*
-  %vld2_lane = tail call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16(i8* %0, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, i32 3, i32 2)
-  %vld2_lane.fca.0.extract = extractvalue { <4 x i16>, <4 x i16> } %vld2_lane, 0
-  %vld2_lane.fca.1.extract = extractvalue { <4 x i16>, <4 x i16> } %vld2_lane, 1
-  %.fca.0.0.insert = insertvalue %struct.int16x4x2_t undef, <4 x i16> %vld2_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int16x4x2_t %.fca.0.0.insert, <4 x i16> %vld2_lane.fca.1.extract, 0, 1
-  ret %struct.int16x4x2_t %.fca.0.1.insert
-}
-
-define %struct.int32x2x2_t @test_vld2_lane_s32(i32* %a, [2 x <2 x i32>] %b.coerce) {
-; CHECK-LABEL: test_vld2_lane_s32
-; CHECK: ld2 {{{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [2 x <2 x i32>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <2 x i32>] %b.coerce, 1
-  %0 = bitcast i32* %a to i8*
-  %vld2_lane = tail call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2lane.v2i32(i8* %0, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, i32 1, i32 4)
-  %vld2_lane.fca.0.extract = extractvalue { <2 x i32>, <2 x i32> } %vld2_lane, 0
-  %vld2_lane.fca.1.extract = extractvalue { <2 x i32>, <2 x i32> } %vld2_lane, 1
-  %.fca.0.0.insert = insertvalue %struct.int32x2x2_t undef, <2 x i32> %vld2_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int32x2x2_t %.fca.0.0.insert, <2 x i32> %vld2_lane.fca.1.extract, 0, 1
-  ret %struct.int32x2x2_t %.fca.0.1.insert
-}
-
-define %struct.int64x1x2_t @test_vld2_lane_s64(i64* %a, [2 x <1 x i64>] %b.coerce) {
-; CHECK-LABEL: test_vld2_lane_s64
-; CHECK: ld2 {{{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [2 x <1 x i64>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <1 x i64>] %b.coerce, 1
-  %0 = bitcast i64* %a to i8*
-  %vld2_lane = tail call { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2lane.v1i64(i8* %0, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, i32 0, i32 8)
-  %vld2_lane.fca.0.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2_lane, 0
-  %vld2_lane.fca.1.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2_lane, 1
-  %.fca.0.0.insert = insertvalue %struct.int64x1x2_t undef, <1 x i64> %vld2_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int64x1x2_t %.fca.0.0.insert, <1 x i64> %vld2_lane.fca.1.extract, 0, 1
-  ret %struct.int64x1x2_t %.fca.0.1.insert
-}
-
-define %struct.float32x2x2_t @test_vld2_lane_f32(float* %a, [2 x <2 x float>] %b.coerce) {
-; CHECK-LABEL: test_vld2_lane_f32
-; CHECK: ld2 {{{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [2 x <2 x float>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <2 x float>] %b.coerce, 1
-  %0 = bitcast float* %a to i8*
-  %vld2_lane = tail call { <2 x float>, <2 x float> } @llvm.arm.neon.vld2lane.v2f32(i8* %0, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, i32 1, i32 4)
-  %vld2_lane.fca.0.extract = extractvalue { <2 x float>, <2 x float> } %vld2_lane, 0
-  %vld2_lane.fca.1.extract = extractvalue { <2 x float>, <2 x float> } %vld2_lane, 1
-  %.fca.0.0.insert = insertvalue %struct.float32x2x2_t undef, <2 x float> %vld2_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float32x2x2_t %.fca.0.0.insert, <2 x float> %vld2_lane.fca.1.extract, 0, 1
-  ret %struct.float32x2x2_t %.fca.0.1.insert
-}
-
-define %struct.float64x1x2_t @test_vld2_lane_f64(double* %a, [2 x <1 x double>] %b.coerce) {
-; CHECK-LABEL: test_vld2_lane_f64
-; CHECK: ld2 {{{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [2 x <1 x double>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <1 x double>] %b.coerce, 1
-  %0 = bitcast double* %a to i8*
-  %vld2_lane = tail call { <1 x double>, <1 x double> } @llvm.arm.neon.vld2lane.v1f64(i8* %0, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, i32 0, i32 8)
-  %vld2_lane.fca.0.extract = extractvalue { <1 x double>, <1 x double> } %vld2_lane, 0
-  %vld2_lane.fca.1.extract = extractvalue { <1 x double>, <1 x double> } %vld2_lane, 1
-  %.fca.0.0.insert = insertvalue %struct.float64x1x2_t undef, <1 x double> %vld2_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float64x1x2_t %.fca.0.0.insert, <1 x double> %vld2_lane.fca.1.extract, 0, 1
-  ret %struct.float64x1x2_t %.fca.0.1.insert
-}
-
-define %struct.int16x8x3_t @test_vld3q_lane_s16(i16* %a, [3 x <8 x i16>] %b.coerce) {
-; CHECK-LABEL: test_vld3q_lane_s16
-; CHECK: ld3 {{{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [3 x <8 x i16>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <8 x i16>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <8 x i16>] %b.coerce, 2
-  %0 = bitcast i16* %a to i8*
-  %vld3_lane = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16(i8* %0, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, <8 x i16> %b.coerce.fca.2.extract, i32 7, i32 2)
-  %vld3_lane.fca.0.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3_lane, 0
-  %vld3_lane.fca.1.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3_lane, 1
-  %vld3_lane.fca.2.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3_lane, 2
-  %.fca.0.0.insert = insertvalue %struct.int16x8x3_t undef, <8 x i16> %vld3_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int16x8x3_t %.fca.0.0.insert, <8 x i16> %vld3_lane.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int16x8x3_t %.fca.0.1.insert, <8 x i16> %vld3_lane.fca.2.extract, 0, 2
-  ret %struct.int16x8x3_t %.fca.0.2.insert
-}
-
-define %struct.int32x4x3_t @test_vld3q_lane_s32(i32* %a, [3 x <4 x i32>] %b.coerce) {
-; CHECK-LABEL: test_vld3q_lane_s32
-; CHECK: ld3 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [3 x <4 x i32>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <4 x i32>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <4 x i32>] %b.coerce, 2
-  %0 = bitcast i32* %a to i8*
-  %vld3_lane = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3lane.v4i32(i8* %0, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, <4 x i32> %b.coerce.fca.2.extract, i32 3, i32 4)
-  %vld3_lane.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3_lane, 0
-  %vld3_lane.fca.1.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3_lane, 1
-  %vld3_lane.fca.2.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3_lane, 2
-  %.fca.0.0.insert = insertvalue %struct.int32x4x3_t undef, <4 x i32> %vld3_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int32x4x3_t %.fca.0.0.insert, <4 x i32> %vld3_lane.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int32x4x3_t %.fca.0.1.insert, <4 x i32> %vld3_lane.fca.2.extract, 0, 2
-  ret %struct.int32x4x3_t %.fca.0.2.insert
-}
-
-define %struct.int64x2x3_t @test_vld3q_lane_s64(i64* %a, [3 x <2 x i64>] %b.coerce) {
-; CHECK-LABEL: test_vld3q_lane_s64
-; CHECK: ld3 {{{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [3 x <2 x i64>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <2 x i64>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <2 x i64>] %b.coerce, 2
-  %0 = bitcast i64* %a to i8*
-  %vld3_lane = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld3lane.v2i64(i8* %0, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, <2 x i64> %b.coerce.fca.2.extract, i32 1, i32 8)
-  %vld3_lane.fca.0.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld3_lane, 0
-  %vld3_lane.fca.1.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld3_lane, 1
-  %vld3_lane.fca.2.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld3_lane, 2
-  %.fca.0.0.insert = insertvalue %struct.int64x2x3_t undef, <2 x i64> %vld3_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int64x2x3_t %.fca.0.0.insert, <2 x i64> %vld3_lane.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int64x2x3_t %.fca.0.1.insert, <2 x i64> %vld3_lane.fca.2.extract, 0, 2
-  ret %struct.int64x2x3_t %.fca.0.2.insert
-}
-
-define %struct.float32x4x3_t @test_vld3q_lane_f32(float* %a, [3 x <4 x float>] %b.coerce) {
-; CHECK-LABEL: test_vld3q_lane_f32
-; CHECK: ld3 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [3 x <4 x float>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <4 x float>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <4 x float>] %b.coerce, 2
-  %0 = bitcast float* %a to i8*
-  %vld3_lane = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3lane.v4f32(i8* %0, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, <4 x float> %b.coerce.fca.2.extract, i32 3, i32 4)
-  %vld3_lane.fca.0.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3_lane, 0
-  %vld3_lane.fca.1.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3_lane, 1
-  %vld3_lane.fca.2.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3_lane, 2
-  %.fca.0.0.insert = insertvalue %struct.float32x4x3_t undef, <4 x float> %vld3_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float32x4x3_t %.fca.0.0.insert, <4 x float> %vld3_lane.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.float32x4x3_t %.fca.0.1.insert, <4 x float> %vld3_lane.fca.2.extract, 0, 2
-  ret %struct.float32x4x3_t %.fca.0.2.insert
-}
-
-define %struct.float64x2x3_t @test_vld3q_lane_f64(double* %a, [3 x <2 x double>] %b.coerce) {
-; CHECK-LABEL: test_vld3q_lane_f64
-; CHECK: ld3 {{{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [3 x <2 x double>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <2 x double>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <2 x double>] %b.coerce, 2
-  %0 = bitcast double* %a to i8*
-  %vld3_lane = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld3lane.v2f64(i8* %0, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, <2 x double> %b.coerce.fca.2.extract, i32 1, i32 8)
-  %vld3_lane.fca.0.extract = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld3_lane, 0
-  %vld3_lane.fca.1.extract = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld3_lane, 1
-  %vld3_lane.fca.2.extract = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld3_lane, 2
-  %.fca.0.0.insert = insertvalue %struct.float64x2x3_t undef, <2 x double> %vld3_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float64x2x3_t %.fca.0.0.insert, <2 x double> %vld3_lane.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.float64x2x3_t %.fca.0.1.insert, <2 x double> %vld3_lane.fca.2.extract, 0, 2
-  ret %struct.float64x2x3_t %.fca.0.2.insert
-}
-
-define %struct.int8x8x3_t @test_vld3_lane_s8(i8* %a, [3 x <8 x i8>] %b.coerce) {
-; CHECK-LABEL: test_vld3_lane_s8
-; CHECK: ld3 {{{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [3 x <8 x i8>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <8 x i8>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <8 x i8>] %b.coerce, 2
-  %vld3_lane = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, <8 x i8> %b.coerce.fca.2.extract, i32 7, i32 1)
-  %vld3_lane.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane, 0
-  %vld3_lane.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane, 1
-  %vld3_lane.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane, 2
-  %.fca.0.0.insert = insertvalue %struct.int8x8x3_t undef, <8 x i8> %vld3_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int8x8x3_t %.fca.0.0.insert, <8 x i8> %vld3_lane.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int8x8x3_t %.fca.0.1.insert, <8 x i8> %vld3_lane.fca.2.extract, 0, 2
-  ret %struct.int8x8x3_t %.fca.0.2.insert
-}
-
-define %struct.int16x4x3_t @test_vld3_lane_s16(i16* %a, [3 x <4 x i16>] %b.coerce) {
-; CHECK-LABEL: test_vld3_lane_s16
-; CHECK: ld3 {{{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [3 x <4 x i16>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <4 x i16>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <4 x i16>] %b.coerce, 2
-  %0 = bitcast i16* %a to i8*
-  %vld3_lane = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16(i8* %0, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, <4 x i16> %b.coerce.fca.2.extract, i32 3, i32 2)
-  %vld3_lane.fca.0.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane, 0
-  %vld3_lane.fca.1.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane, 1
-  %vld3_lane.fca.2.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane, 2
-  %.fca.0.0.insert = insertvalue %struct.int16x4x3_t undef, <4 x i16> %vld3_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int16x4x3_t %.fca.0.0.insert, <4 x i16> %vld3_lane.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int16x4x3_t %.fca.0.1.insert, <4 x i16> %vld3_lane.fca.2.extract, 0, 2
-  ret %struct.int16x4x3_t %.fca.0.2.insert
-}
-
-define %struct.int32x2x3_t @test_vld3_lane_s32(i32* %a, [3 x <2 x i32>] %b.coerce) {
-; CHECK-LABEL: test_vld3_lane_s32
-; CHECK: ld3 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [3 x <2 x i32>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <2 x i32>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <2 x i32>] %b.coerce, 2
-  %0 = bitcast i32* %a to i8*
-  %vld3_lane = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3lane.v2i32(i8* %0, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, <2 x i32> %b.coerce.fca.2.extract, i32 1, i32 4)
-  %vld3_lane.fca.0.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3_lane, 0
-  %vld3_lane.fca.1.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3_lane, 1
-  %vld3_lane.fca.2.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3_lane, 2
-  %.fca.0.0.insert = insertvalue %struct.int32x2x3_t undef, <2 x i32> %vld3_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int32x2x3_t %.fca.0.0.insert, <2 x i32> %vld3_lane.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int32x2x3_t %.fca.0.1.insert, <2 x i32> %vld3_lane.fca.2.extract, 0, 2
-  ret %struct.int32x2x3_t %.fca.0.2.insert
-}
-
-define %struct.int64x1x3_t @test_vld3_lane_s64(i64* %a, [3 x <1 x i64>] %b.coerce) {
-; CHECK-LABEL: test_vld3_lane_s64
-; CHECK: ld3 {{{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [3 x <1 x i64>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <1 x i64>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <1 x i64>] %b.coerce, 2
-  %0 = bitcast i64* %a to i8*
-  %vld3_lane = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3lane.v1i64(i8* %0, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, <1 x i64> %b.coerce.fca.2.extract, i32 0, i32 8)
-  %vld3_lane.fca.0.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3_lane, 0
-  %vld3_lane.fca.1.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3_lane, 1
-  %vld3_lane.fca.2.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3_lane, 2
-  %.fca.0.0.insert = insertvalue %struct.int64x1x3_t undef, <1 x i64> %vld3_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int64x1x3_t %.fca.0.0.insert, <1 x i64> %vld3_lane.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int64x1x3_t %.fca.0.1.insert, <1 x i64> %vld3_lane.fca.2.extract, 0, 2
-  ret %struct.int64x1x3_t %.fca.0.2.insert
-}
-
-define %struct.float32x2x3_t @test_vld3_lane_f32(float* %a, [3 x <2 x float>] %b.coerce) {
-; CHECK-LABEL: test_vld3_lane_f32
-; CHECK: ld3 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [3 x <2 x float>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <2 x float>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <2 x float>] %b.coerce, 2
-  %0 = bitcast float* %a to i8*
-  %vld3_lane = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3lane.v2f32(i8* %0, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, <2 x float> %b.coerce.fca.2.extract, i32 1, i32 4)
-  %vld3_lane.fca.0.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3_lane, 0
-  %vld3_lane.fca.1.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3_lane, 1
-  %vld3_lane.fca.2.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3_lane, 2
-  %.fca.0.0.insert = insertvalue %struct.float32x2x3_t undef, <2 x float> %vld3_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float32x2x3_t %.fca.0.0.insert, <2 x float> %vld3_lane.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.float32x2x3_t %.fca.0.1.insert, <2 x float> %vld3_lane.fca.2.extract, 0, 2
-  ret %struct.float32x2x3_t %.fca.0.2.insert
-}
-
-define %struct.float64x1x3_t @test_vld3_lane_f64(double* %a, [3 x <1 x double>] %b.coerce) {
-; CHECK-LABEL: test_vld3_lane_f64
-; CHECK: ld3 {{{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [3 x <1 x double>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <1 x double>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <1 x double>] %b.coerce, 2
-  %0 = bitcast double* %a to i8*
-  %vld3_lane = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld3lane.v1f64(i8* %0, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, <1 x double> %b.coerce.fca.2.extract, i32 0, i32 8)
-  %vld3_lane.fca.0.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld3_lane, 0
-  %vld3_lane.fca.1.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld3_lane, 1
-  %vld3_lane.fca.2.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld3_lane, 2
-  %.fca.0.0.insert = insertvalue %struct.float64x1x3_t undef, <1 x double> %vld3_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float64x1x3_t %.fca.0.0.insert, <1 x double> %vld3_lane.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.float64x1x3_t %.fca.0.1.insert, <1 x double> %vld3_lane.fca.2.extract, 0, 2
-  ret %struct.float64x1x3_t %.fca.0.2.insert
-}
-
-define %struct.int8x16x4_t @test_vld4q_lane_s8(i8* %a, [4 x <16 x i8>] %b.coerce) {
-; CHECK-LABEL: test_vld4q_lane_s8
-; CHECK: ld4 {{{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [4 x <16 x i8>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <16 x i8>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <16 x i8>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <16 x i8>] %b.coerce, 3
-  %vld3_lane = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4lane.v16i8(i8* %a, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, <16 x i8> %b.coerce.fca.2.extract, <16 x i8> %b.coerce.fca.3.extract, i32 15, i32 1)
-  %vld3_lane.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 0
-  %vld3_lane.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 1
-  %vld3_lane.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 2
-  %vld3_lane.fca.3.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 3
-  %.fca.0.0.insert = insertvalue %struct.int8x16x4_t undef, <16 x i8> %vld3_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int8x16x4_t %.fca.0.0.insert, <16 x i8> %vld3_lane.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int8x16x4_t %.fca.0.1.insert, <16 x i8> %vld3_lane.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.int8x16x4_t %.fca.0.2.insert, <16 x i8> %vld3_lane.fca.3.extract, 0, 3
-  ret %struct.int8x16x4_t %.fca.0.3.insert
-}
-
-define %struct.int16x8x4_t @test_vld4q_lane_s16(i16* %a, [4 x <8 x i16>] %b.coerce) {
-; CHECK-LABEL: test_vld4q_lane_s16
-; CHECK: ld4 {{{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [4 x <8 x i16>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <8 x i16>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <8 x i16>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <8 x i16>] %b.coerce, 3
-  %0 = bitcast i16* %a to i8*
-  %vld3_lane = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4lane.v8i16(i8* %0, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, <8 x i16> %b.coerce.fca.2.extract, <8 x i16> %b.coerce.fca.3.extract, i32 7, i32 2)
-  %vld3_lane.fca.0.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld3_lane, 0
-  %vld3_lane.fca.1.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld3_lane, 1
-  %vld3_lane.fca.2.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld3_lane, 2
-  %vld3_lane.fca.3.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld3_lane, 3
-  %.fca.0.0.insert = insertvalue %struct.int16x8x4_t undef, <8 x i16> %vld3_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int16x8x4_t %.fca.0.0.insert, <8 x i16> %vld3_lane.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int16x8x4_t %.fca.0.1.insert, <8 x i16> %vld3_lane.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.int16x8x4_t %.fca.0.2.insert, <8 x i16> %vld3_lane.fca.3.extract, 0, 3
-  ret %struct.int16x8x4_t %.fca.0.3.insert
-}
-
-define %struct.int32x4x4_t @test_vld4q_lane_s32(i32* %a, [4 x <4 x i32>] %b.coerce) {
-; CHECK-LABEL: test_vld4q_lane_s32
-; CHECK: ld4 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [4 x <4 x i32>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <4 x i32>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <4 x i32>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <4 x i32>] %b.coerce, 3
-  %0 = bitcast i32* %a to i8*
-  %vld3_lane = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4lane.v4i32(i8* %0, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, <4 x i32> %b.coerce.fca.2.extract, <4 x i32> %b.coerce.fca.3.extract, i32 3, i32 4)
-  %vld3_lane.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld3_lane, 0
-  %vld3_lane.fca.1.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld3_lane, 1
-  %vld3_lane.fca.2.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld3_lane, 2
-  %vld3_lane.fca.3.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld3_lane, 3
-  %.fca.0.0.insert = insertvalue %struct.int32x4x4_t undef, <4 x i32> %vld3_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int32x4x4_t %.fca.0.0.insert, <4 x i32> %vld3_lane.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int32x4x4_t %.fca.0.1.insert, <4 x i32> %vld3_lane.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.int32x4x4_t %.fca.0.2.insert, <4 x i32> %vld3_lane.fca.3.extract, 0, 3
-  ret %struct.int32x4x4_t %.fca.0.3.insert
-}
-
-define %struct.int64x2x4_t @test_vld4q_lane_s64(i64* %a, [4 x <2 x i64>] %b.coerce) {
-; CHECK-LABEL: test_vld4q_lane_s64
-; CHECK: ld4 {{{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [4 x <2 x i64>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <2 x i64>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <2 x i64>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <2 x i64>] %b.coerce, 3
-  %0 = bitcast i64* %a to i8*
-  %vld3_lane = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld4lane.v2i64(i8* %0, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, <2 x i64> %b.coerce.fca.2.extract, <2 x i64> %b.coerce.fca.3.extract, i32 1, i32 8)
-  %vld3_lane.fca.0.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld3_lane, 0
-  %vld3_lane.fca.1.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld3_lane, 1
-  %vld3_lane.fca.2.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld3_lane, 2
-  %vld3_lane.fca.3.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld3_lane, 3
-  %.fca.0.0.insert = insertvalue %struct.int64x2x4_t undef, <2 x i64> %vld3_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int64x2x4_t %.fca.0.0.insert, <2 x i64> %vld3_lane.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int64x2x4_t %.fca.0.1.insert, <2 x i64> %vld3_lane.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.int64x2x4_t %.fca.0.2.insert, <2 x i64> %vld3_lane.fca.3.extract, 0, 3
-  ret %struct.int64x2x4_t %.fca.0.3.insert
-}
-
-define %struct.float32x4x4_t @test_vld4q_lane_f32(float* %a, [4 x <4 x float>] %b.coerce) {
-; CHECK-LABEL: test_vld4q_lane_f32
-; CHECK: ld4 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [4 x <4 x float>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <4 x float>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <4 x float>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <4 x float>] %b.coerce, 3
-  %0 = bitcast float* %a to i8*
-  %vld3_lane = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4lane.v4f32(i8* %0, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, <4 x float> %b.coerce.fca.2.extract, <4 x float> %b.coerce.fca.3.extract, i32 3, i32 4)
-  %vld3_lane.fca.0.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld3_lane, 0
-  %vld3_lane.fca.1.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld3_lane, 1
-  %vld3_lane.fca.2.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld3_lane, 2
-  %vld3_lane.fca.3.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld3_lane, 3
-  %.fca.0.0.insert = insertvalue %struct.float32x4x4_t undef, <4 x float> %vld3_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float32x4x4_t %.fca.0.0.insert, <4 x float> %vld3_lane.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.float32x4x4_t %.fca.0.1.insert, <4 x float> %vld3_lane.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.float32x4x4_t %.fca.0.2.insert, <4 x float> %vld3_lane.fca.3.extract, 0, 3
-  ret %struct.float32x4x4_t %.fca.0.3.insert
-}
-
-define %struct.float64x2x4_t @test_vld4q_lane_f64(double* %a, [4 x <2 x double>] %b.coerce) {
-; CHECK-LABEL: test_vld4q_lane_f64
-; CHECK: ld4 {{{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [4 x <2 x double>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <2 x double>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <2 x double>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <2 x double>] %b.coerce, 3
-  %0 = bitcast double* %a to i8*
-  %vld3_lane = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4lane.v2f64(i8* %0, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, <2 x double> %b.coerce.fca.2.extract, <2 x double> %b.coerce.fca.3.extract, i32 1, i32 8)
-  %vld3_lane.fca.0.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld3_lane, 0
-  %vld3_lane.fca.1.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld3_lane, 1
-  %vld3_lane.fca.2.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld3_lane, 2
-  %vld3_lane.fca.3.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld3_lane, 3
-  %.fca.0.0.insert = insertvalue %struct.float64x2x4_t undef, <2 x double> %vld3_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float64x2x4_t %.fca.0.0.insert, <2 x double> %vld3_lane.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.float64x2x4_t %.fca.0.1.insert, <2 x double> %vld3_lane.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.float64x2x4_t %.fca.0.2.insert, <2 x double> %vld3_lane.fca.3.extract, 0, 3
-  ret %struct.float64x2x4_t %.fca.0.3.insert
-}
-
-define %struct.int8x8x4_t @test_vld4_lane_s8(i8* %a, [4 x <8 x i8>] %b.coerce) {
-; CHECK-LABEL: test_vld4_lane_s8
-; CHECK: ld4 {{{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [4 x <8 x i8>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <8 x i8>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <8 x i8>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <8 x i8>] %b.coerce, 3
-  %vld3_lane = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4lane.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, <8 x i8> %b.coerce.fca.2.extract, <8 x i8> %b.coerce.fca.3.extract, i32 7, i32 1)
-  %vld3_lane.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane, 0
-  %vld3_lane.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane, 1
-  %vld3_lane.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane, 2
-  %vld3_lane.fca.3.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane, 3
-  %.fca.0.0.insert = insertvalue %struct.int8x8x4_t undef, <8 x i8> %vld3_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int8x8x4_t %.fca.0.0.insert, <8 x i8> %vld3_lane.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int8x8x4_t %.fca.0.1.insert, <8 x i8> %vld3_lane.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.int8x8x4_t %.fca.0.2.insert, <8 x i8> %vld3_lane.fca.3.extract, 0, 3
-  ret %struct.int8x8x4_t %.fca.0.3.insert
-}
-
-define %struct.int16x4x4_t @test_vld4_lane_s16(i16* %a, [4 x <4 x i16>] %b.coerce) {
-; CHECK-LABEL: test_vld4_lane_s16
-; CHECK: ld4 {{{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [4 x <4 x i16>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <4 x i16>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <4 x i16>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <4 x i16>] %b.coerce, 3
-  %0 = bitcast i16* %a to i8*
-  %vld3_lane = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16(i8* %0, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, <4 x i16> %b.coerce.fca.2.extract, <4 x i16> %b.coerce.fca.3.extract, i32 3, i32 2)
-  %vld3_lane.fca.0.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane, 0
-  %vld3_lane.fca.1.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane, 1
-  %vld3_lane.fca.2.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane, 2
-  %vld3_lane.fca.3.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane, 3
-  %.fca.0.0.insert = insertvalue %struct.int16x4x4_t undef, <4 x i16> %vld3_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int16x4x4_t %.fca.0.0.insert, <4 x i16> %vld3_lane.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int16x4x4_t %.fca.0.1.insert, <4 x i16> %vld3_lane.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.int16x4x4_t %.fca.0.2.insert, <4 x i16> %vld3_lane.fca.3.extract, 0, 3
-  ret %struct.int16x4x4_t %.fca.0.3.insert
-}
-
-define %struct.int32x2x4_t @test_vld4_lane_s32(i32* %a, [4 x <2 x i32>] %b.coerce) {
-; CHECK-LABEL: test_vld4_lane_s32
-; CHECK: ld4 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [4 x <2 x i32>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <2 x i32>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <2 x i32>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <2 x i32>] %b.coerce, 3
-  %0 = bitcast i32* %a to i8*
-  %vld3_lane = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32(i8* %0, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, <2 x i32> %b.coerce.fca.2.extract, <2 x i32> %b.coerce.fca.3.extract, i32 1, i32 4)
-  %vld3_lane.fca.0.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld3_lane, 0
-  %vld3_lane.fca.1.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld3_lane, 1
-  %vld3_lane.fca.2.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld3_lane, 2
-  %vld3_lane.fca.3.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld3_lane, 3
-  %.fca.0.0.insert = insertvalue %struct.int32x2x4_t undef, <2 x i32> %vld3_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int32x2x4_t %.fca.0.0.insert, <2 x i32> %vld3_lane.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int32x2x4_t %.fca.0.1.insert, <2 x i32> %vld3_lane.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.int32x2x4_t %.fca.0.2.insert, <2 x i32> %vld3_lane.fca.3.extract, 0, 3
-  ret %struct.int32x2x4_t %.fca.0.3.insert
-}
-
-define %struct.int64x1x4_t @test_vld4_lane_s64(i64* %a, [4 x <1 x i64>] %b.coerce) {
-; CHECK-LABEL: test_vld4_lane_s64
-; CHECK: ld4 {{{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [4 x <1 x i64>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <1 x i64>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <1 x i64>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <1 x i64>] %b.coerce, 3
-  %0 = bitcast i64* %a to i8*
-  %vld3_lane = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4lane.v1i64(i8* %0, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, <1 x i64> %b.coerce.fca.2.extract, <1 x i64> %b.coerce.fca.3.extract, i32 0, i32 8)
-  %vld3_lane.fca.0.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld3_lane, 0
-  %vld3_lane.fca.1.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld3_lane, 1
-  %vld3_lane.fca.2.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld3_lane, 2
-  %vld3_lane.fca.3.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld3_lane, 3
-  %.fca.0.0.insert = insertvalue %struct.int64x1x4_t undef, <1 x i64> %vld3_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int64x1x4_t %.fca.0.0.insert, <1 x i64> %vld3_lane.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int64x1x4_t %.fca.0.1.insert, <1 x i64> %vld3_lane.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.int64x1x4_t %.fca.0.2.insert, <1 x i64> %vld3_lane.fca.3.extract, 0, 3
-  ret %struct.int64x1x4_t %.fca.0.3.insert
-}
-
-define %struct.float32x2x4_t @test_vld4_lane_f32(float* %a, [4 x <2 x float>] %b.coerce) {
-; CHECK-LABEL: test_vld4_lane_f32
-; CHECK: ld4 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [4 x <2 x float>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <2 x float>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <2 x float>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <2 x float>] %b.coerce, 3
-  %0 = bitcast float* %a to i8*
-  %vld3_lane = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4lane.v2f32(i8* %0, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, <2 x float> %b.coerce.fca.2.extract, <2 x float> %b.coerce.fca.3.extract, i32 1, i32 4)
-  %vld3_lane.fca.0.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld3_lane, 0
-  %vld3_lane.fca.1.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld3_lane, 1
-  %vld3_lane.fca.2.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld3_lane, 2
-  %vld3_lane.fca.3.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld3_lane, 3
-  %.fca.0.0.insert = insertvalue %struct.float32x2x4_t undef, <2 x float> %vld3_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float32x2x4_t %.fca.0.0.insert, <2 x float> %vld3_lane.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.float32x2x4_t %.fca.0.1.insert, <2 x float> %vld3_lane.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.float32x2x4_t %.fca.0.2.insert, <2 x float> %vld3_lane.fca.3.extract, 0, 3
-  ret %struct.float32x2x4_t %.fca.0.3.insert
-}
-
-define %struct.float64x1x4_t @test_vld4_lane_f64(double* %a, [4 x <1 x double>] %b.coerce) {
-; CHECK-LABEL: test_vld4_lane_f64
-; CHECK: ld4 {{{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [4 x <1 x double>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <1 x double>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <1 x double>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <1 x double>] %b.coerce, 3
-  %0 = bitcast double* %a to i8*
-  %vld3_lane = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld4lane.v1f64(i8* %0, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, <1 x double> %b.coerce.fca.2.extract, <1 x double> %b.coerce.fca.3.extract, i32 0, i32 8)
-  %vld3_lane.fca.0.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld3_lane, 0
-  %vld3_lane.fca.1.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld3_lane, 1
-  %vld3_lane.fca.2.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld3_lane, 2
-  %vld3_lane.fca.3.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld3_lane, 3
-  %.fca.0.0.insert = insertvalue %struct.float64x1x4_t undef, <1 x double> %vld3_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.float64x1x4_t %.fca.0.0.insert, <1 x double> %vld3_lane.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.float64x1x4_t %.fca.0.1.insert, <1 x double> %vld3_lane.fca.2.extract, 0, 2
-  %.fca.0.3.insert = insertvalue %struct.float64x1x4_t %.fca.0.2.insert, <1 x double> %vld3_lane.fca.3.extract, 0, 3
-  ret %struct.float64x1x4_t %.fca.0.3.insert
-}
-
-define void @test_vst1q_lane_s8(i8* %a, <16 x i8> %b) {
-; CHECK-LABEL: test_vst1q_lane_s8
-; CHECK: st1 {{{v[0-9]+}}.b}[{{[0-9]+}}], [x0]
-entry:
-  %0 = extractelement <16 x i8> %b, i32 15
-  store i8 %0, i8* %a, align 1
-  ret void
-}
-
-define void @test_vst1q_lane_s16(i16* %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vst1q_lane_s16
-; CHECK: st1 {{{v[0-9]+}}.h}[{{[0-9]+}}], [x0]
-entry:
-  %0 = extractelement <8 x i16> %b, i32 7
-  store i16 %0, i16* %a, align 2
-  ret void
-}
-
-define void @test_vst1q_lane_s32(i32* %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vst1q_lane_s32
-; CHECK: st1 {{{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
-entry:
-  %0 = extractelement <4 x i32> %b, i32 3
-  store i32 %0, i32* %a, align 4
-  ret void
-}
-
-define void @test_vst1q_lane_s64(i64* %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vst1q_lane_s64
-; CHECK: st1 {{{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
-entry:
-  %0 = extractelement <2 x i64> %b, i32 1
-  store i64 %0, i64* %a, align 8
-  ret void
-}
-
-define void @test_vst1q_lane_f32(float* %a, <4 x float> %b) {
-; CHECK-LABEL: test_vst1q_lane_f32
-; CHECK: st1 {{{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
-entry:
-  %0 = extractelement <4 x float> %b, i32 3
-  store float %0, float* %a, align 4
-  ret void
-}
-
-define void @test_vst1q_lane_f64(double* %a, <2 x double> %b) {
-; CHECK-LABEL: test_vst1q_lane_f64
-; CHECK: st1 {{{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
-entry:
-  %0 = extractelement <2 x double> %b, i32 1
-  store double %0, double* %a, align 8
-  ret void
-}
-
-define void @test_vst1_lane_s8(i8* %a, <8 x i8> %b) {
-; CHECK-LABEL: test_vst1_lane_s8
-; CHECK: st1 {{{v[0-9]+}}.b}[{{[0-9]+}}], [x0]
-entry:
-  %0 = extractelement <8 x i8> %b, i32 7
-  store i8 %0, i8* %a, align 1
-  ret void
-}
-
-define void @test_vst1_lane_s16(i16* %a, <4 x i16> %b) {
-; CHECK-LABEL: test_vst1_lane_s16
-; CHECK: st1 {{{v[0-9]+}}.h}[{{[0-9]+}}], [x0]
-entry:
-  %0 = extractelement <4 x i16> %b, i32 3
-  store i16 %0, i16* %a, align 2
-  ret void
-}
-
-define void @test_vst1_lane_s32(i32* %a, <2 x i32> %b) {
-; CHECK-LABEL: test_vst1_lane_s32
-; CHECK: st1 {{{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
-entry:
-  %0 = extractelement <2 x i32> %b, i32 1
-  store i32 %0, i32* %a, align 4
-  ret void
-}
-
-define void @test_vst1_lane_s64(i64* %a, <1 x i64> %b) {
-; CHECK-LABEL: test_vst1_lane_s64
-; CHECK: st1 {{{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
-entry:
-  %0 = extractelement <1 x i64> %b, i32 0
-  store i64 %0, i64* %a, align 8
-  ret void
-}
-
-define void @test_vst1_lane_f32(float* %a, <2 x float> %b) {
-; CHECK-LABEL: test_vst1_lane_f32
-; CHECK: st1 {{{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
-entry:
-  %0 = extractelement <2 x float> %b, i32 1
-  store float %0, float* %a, align 4
-  ret void
-}
-
-define void @test_vst1_lane_f64(double* %a, <1 x double> %b) {
-; CHECK-LABEL: test_vst1_lane_f64
-; CHECK: st1 {{{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
-entry:
-  %0 = extractelement <1 x double> %b, i32 0
-  store double %0, double* %a, align 8
-  ret void
-}
-
-define void @test_vst2q_lane_s8(i8* %a, [2 x <16 x i8>] %b.coerce) {
-; CHECK-LABEL: test_vst2q_lane_s8
-; CHECK: st2 {{{v[0-9]+}}.b, {{v[0-9]+}}.b}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [2 x <16 x i8>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <16 x i8>] %b.coerce, 1
-  tail call void @llvm.arm.neon.vst2lane.v16i8(i8* %a, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, i32 15, i32 1)
-  ret void
-}
-
-define void @test_vst2q_lane_s16(i16* %a, [2 x <8 x i16>] %b.coerce) {
-; CHECK-LABEL: test_vst2q_lane_s16
-; CHECK: st2 {{{v[0-9]+}}.h, {{v[0-9]+}}.h}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [2 x <8 x i16>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <8 x i16>] %b.coerce, 1
-  %0 = bitcast i16* %a to i8*
-  tail call void @llvm.arm.neon.vst2lane.v8i16(i8* %0, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, i32 7, i32 2)
-  ret void
-}
-
-define void @test_vst2q_lane_s32(i32* %a, [2 x <4 x i32>] %b.coerce) {
-; CHECK-LABEL: test_vst2q_lane_s32
-; CHECK: st2 {{{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [2 x <4 x i32>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <4 x i32>] %b.coerce, 1
-  %0 = bitcast i32* %a to i8*
-  tail call void @llvm.arm.neon.vst2lane.v4i32(i8* %0, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, i32 3, i32 4)
-  ret void
-}
-
-define void @test_vst2q_lane_s64(i64* %a, [2 x <2 x i64>] %b.coerce) {
-; CHECK-LABEL: test_vst2q_lane_s64
-; CHECK: st2 {{{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [2 x <2 x i64>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <2 x i64>] %b.coerce, 1
-  %0 = bitcast i64* %a to i8*
-  tail call void @llvm.arm.neon.vst2lane.v2i64(i8* %0, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, i32 1, i32 8)
-  ret void
-}
-
-define void @test_vst2q_lane_f32(float* %a, [2 x <4 x float>] %b.coerce) {
-; CHECK-LABEL: test_vst2q_lane_f32
-; CHECK: st2 {{{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [2 x <4 x float>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <4 x float>] %b.coerce, 1
-  %0 = bitcast float* %a to i8*
-  tail call void @llvm.arm.neon.vst2lane.v4f32(i8* %0, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, i32 3, i32 4)
-  ret void
-}
-
-define void @test_vst2q_lane_f64(double* %a, [2 x <2 x double>] %b.coerce) {
-; CHECK-LABEL: test_vst2q_lane_f64
-; CHECK: st2 {{{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [2 x <2 x double>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <2 x double>] %b.coerce, 1
-  %0 = bitcast double* %a to i8*
-  tail call void @llvm.arm.neon.vst2lane.v2f64(i8* %0, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, i32 1, i32 8)
-  ret void
-}
-
-define void @test_vst2_lane_s8(i8* %a, [2 x <8 x i8>] %b.coerce) {
-; CHECK-LABEL: test_vst2_lane_s8
-; CHECK: st2 {{{v[0-9]+}}.b, {{v[0-9]+}}.b}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [2 x <8 x i8>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <8 x i8>] %b.coerce, 1
-  tail call void @llvm.arm.neon.vst2lane.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, i32 7, i32 1)
-  ret void
-}
-
-define void @test_vst2_lane_s16(i16* %a, [2 x <4 x i16>] %b.coerce) {
-; CHECK-LABEL: test_vst2_lane_s16
-; CHECK: st2 {{{v[0-9]+}}.h, {{v[0-9]+}}.h}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [2 x <4 x i16>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <4 x i16>] %b.coerce, 1
-  %0 = bitcast i16* %a to i8*
-  tail call void @llvm.arm.neon.vst2lane.v4i16(i8* %0, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, i32 3, i32 2)
-  ret void
-}
-
-define void @test_vst2_lane_s32(i32* %a, [2 x <2 x i32>] %b.coerce) {
-; CHECK-LABEL: test_vst2_lane_s32
-; CHECK: st2 {{{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [2 x <2 x i32>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <2 x i32>] %b.coerce, 1
-  %0 = bitcast i32* %a to i8*
-  tail call void @llvm.arm.neon.vst2lane.v2i32(i8* %0, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, i32 1, i32 4)
-  ret void
-}
-
-define void @test_vst2_lane_s64(i64* %a, [2 x <1 x i64>] %b.coerce) {
-; CHECK-LABEL: test_vst2_lane_s64
-; CHECK: st2 {{{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [2 x <1 x i64>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <1 x i64>] %b.coerce, 1
-  %0 = bitcast i64* %a to i8*
-  tail call void @llvm.arm.neon.vst2lane.v1i64(i8* %0, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, i32 0, i32 8)
-  ret void
-}
-
-define void @test_vst2_lane_f32(float* %a, [2 x <2 x float>] %b.coerce) {
-; CHECK-LABEL: test_vst2_lane_f32
-; CHECK: st2 {{{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [2 x <2 x float>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <2 x float>] %b.coerce, 1
-  %0 = bitcast float* %a to i8*
-  tail call void @llvm.arm.neon.vst2lane.v2f32(i8* %0, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, i32 1, i32 4)
-  ret void
-}
-
-define void @test_vst2_lane_f64(double* %a, [2 x <1 x double>] %b.coerce) {
-; CHECK-LABEL: test_vst2_lane_f64
-; CHECK: st2 {{{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [2 x <1 x double>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [2 x <1 x double>] %b.coerce, 1
-  %0 = bitcast double* %a to i8*
-  tail call void @llvm.arm.neon.vst2lane.v1f64(i8* %0, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, i32 0, i32 8)
-  ret void
-}
-
-define void @test_vst3q_lane_s8(i8* %a, [3 x <16 x i8>] %b.coerce) {
-; CHECK-LABEL: test_vst3q_lane_s8
-; CHECK: st3 {{{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [3 x <16 x i8>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <16 x i8>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <16 x i8>] %b.coerce, 2
-  tail call void @llvm.arm.neon.vst3lane.v16i8(i8* %a, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, <16 x i8> %b.coerce.fca.2.extract, i32 15, i32 1)
-  ret void
-}
-
-define void @test_vst3q_lane_s16(i16* %a, [3 x <8 x i16>] %b.coerce) {
-; CHECK-LABEL: test_vst3q_lane_s16
-; CHECK: st3 {{{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [3 x <8 x i16>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <8 x i16>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <8 x i16>] %b.coerce, 2
-  %0 = bitcast i16* %a to i8*
-  tail call void @llvm.arm.neon.vst3lane.v8i16(i8* %0, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, <8 x i16> %b.coerce.fca.2.extract, i32 7, i32 2)
-  ret void
-}
-
-define void @test_vst3q_lane_s32(i32* %a, [3 x <4 x i32>] %b.coerce) {
-; CHECK-LABEL: test_vst3q_lane_s32
-; CHECK: st3 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [3 x <4 x i32>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <4 x i32>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <4 x i32>] %b.coerce, 2
-  %0 = bitcast i32* %a to i8*
-  tail call void @llvm.arm.neon.vst3lane.v4i32(i8* %0, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, <4 x i32> %b.coerce.fca.2.extract, i32 3, i32 4)
-  ret void
-}
-
-define void @test_vst3q_lane_s64(i64* %a, [3 x <2 x i64>] %b.coerce) {
-; CHECK-LABEL: test_vst3q_lane_s64
-; CHECK: st3 {{{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [3 x <2 x i64>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <2 x i64>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <2 x i64>] %b.coerce, 2
-  %0 = bitcast i64* %a to i8*
-  tail call void @llvm.arm.neon.vst3lane.v2i64(i8* %0, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, <2 x i64> %b.coerce.fca.2.extract, i32 1, i32 8)
-  ret void
-}
-
-define void @test_vst3q_lane_f32(float* %a, [3 x <4 x float>] %b.coerce) {
-; CHECK-LABEL: test_vst3q_lane_f32
-; CHECK: st3 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [3 x <4 x float>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <4 x float>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <4 x float>] %b.coerce, 2
-  %0 = bitcast float* %a to i8*
-  tail call void @llvm.arm.neon.vst3lane.v4f32(i8* %0, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, <4 x float> %b.coerce.fca.2.extract, i32 3, i32 4)
-  ret void
-}
-
-define void @test_vst3q_lane_f64(double* %a, [3 x <2 x double>] %b.coerce) {
-; CHECK-LABEL: test_vst3q_lane_f64
-; CHECK: st3 {{{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [3 x <2 x double>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <2 x double>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <2 x double>] %b.coerce, 2
-  %0 = bitcast double* %a to i8*
-  tail call void @llvm.arm.neon.vst3lane.v2f64(i8* %0, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, <2 x double> %b.coerce.fca.2.extract, i32 1, i32 8)
-  ret void
-}
-
-define void @test_vst3_lane_s8(i8* %a, [3 x <8 x i8>] %b.coerce) {
-; CHECK-LABEL: test_vst3_lane_s8
-; CHECK: st3 {{{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [3 x <8 x i8>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <8 x i8>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <8 x i8>] %b.coerce, 2
-  tail call void @llvm.arm.neon.vst3lane.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, <8 x i8> %b.coerce.fca.2.extract, i32 7, i32 1)
-  ret void
-}
-
-define void @test_vst3_lane_s16(i16* %a, [3 x <4 x i16>] %b.coerce) {
-; CHECK-LABEL: test_vst3_lane_s16
-; CHECK: st3 {{{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [3 x <4 x i16>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <4 x i16>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <4 x i16>] %b.coerce, 2
-  %0 = bitcast i16* %a to i8*
-  tail call void @llvm.arm.neon.vst3lane.v4i16(i8* %0, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, <4 x i16> %b.coerce.fca.2.extract, i32 3, i32 2)
-  ret void
-}
-
-define void @test_vst3_lane_s32(i32* %a, [3 x <2 x i32>] %b.coerce) {
-; CHECK-LABEL: test_vst3_lane_s32
-; CHECK: st3 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [3 x <2 x i32>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <2 x i32>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <2 x i32>] %b.coerce, 2
-  %0 = bitcast i32* %a to i8*
-  tail call void @llvm.arm.neon.vst3lane.v2i32(i8* %0, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, <2 x i32> %b.coerce.fca.2.extract, i32 1, i32 4)
-  ret void
-}
-
-define void @test_vst3_lane_s64(i64* %a, [3 x <1 x i64>] %b.coerce) {
-; CHECK-LABEL: test_vst3_lane_s64
-; CHECK: st3 {{{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [3 x <1 x i64>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <1 x i64>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <1 x i64>] %b.coerce, 2
-  %0 = bitcast i64* %a to i8*
-  tail call void @llvm.arm.neon.vst3lane.v1i64(i8* %0, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, <1 x i64> %b.coerce.fca.2.extract, i32 0, i32 8)
-  ret void
-}
-
-define void @test_vst3_lane_f32(float* %a, [3 x <2 x float>] %b.coerce) {
-; CHECK-LABEL: test_vst3_lane_f32
-; CHECK: st3 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [3 x <2 x float>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <2 x float>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <2 x float>] %b.coerce, 2
-  %0 = bitcast float* %a to i8*
-  tail call void @llvm.arm.neon.vst3lane.v2f32(i8* %0, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, <2 x float> %b.coerce.fca.2.extract, i32 1, i32 4)
-  ret void
-}
-
-define void @test_vst3_lane_f64(double* %a, [3 x <1 x double>] %b.coerce) {
-; CHECK-LABEL: test_vst3_lane_f64
-; CHECK: st3 {{{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [3 x <1 x double>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [3 x <1 x double>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [3 x <1 x double>] %b.coerce, 2
-  %0 = bitcast double* %a to i8*
-  tail call void @llvm.arm.neon.vst3lane.v1f64(i8* %0, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, <1 x double> %b.coerce.fca.2.extract, i32 0, i32 8)
-  ret void
-}
-
-define void @test_vst4q_lane_s8(i16* %a, [4 x <16 x i8>] %b.coerce) {
-; CHECK-LABEL: test_vst4q_lane_s8
-; CHECK: st4 {{{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [4 x <16 x i8>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <16 x i8>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <16 x i8>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <16 x i8>] %b.coerce, 3
-  %0 = bitcast i16* %a to i8*
-  tail call void @llvm.arm.neon.vst4lane.v16i8(i8* %0, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, <16 x i8> %b.coerce.fca.2.extract, <16 x i8> %b.coerce.fca.3.extract, i32 15, i32 2)
-  ret void
-}
-
-define void @test_vst4q_lane_s16(i16* %a, [4 x <8 x i16>] %b.coerce) {
-; CHECK-LABEL: test_vst4q_lane_s16
-; CHECK: st4 {{{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [4 x <8 x i16>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <8 x i16>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <8 x i16>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <8 x i16>] %b.coerce, 3
-  %0 = bitcast i16* %a to i8*
-  tail call void @llvm.arm.neon.vst4lane.v8i16(i8* %0, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, <8 x i16> %b.coerce.fca.2.extract, <8 x i16> %b.coerce.fca.3.extract, i32 7, i32 2)
-  ret void
-}
-
-define void @test_vst4q_lane_s32(i32* %a, [4 x <4 x i32>] %b.coerce) {
-; CHECK-LABEL: test_vst4q_lane_s32
-; CHECK: st4 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [4 x <4 x i32>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <4 x i32>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <4 x i32>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <4 x i32>] %b.coerce, 3
-  %0 = bitcast i32* %a to i8*
-  tail call void @llvm.arm.neon.vst4lane.v4i32(i8* %0, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, <4 x i32> %b.coerce.fca.2.extract, <4 x i32> %b.coerce.fca.3.extract, i32 3, i32 4)
-  ret void
-}
-
-define void @test_vst4q_lane_s64(i64* %a, [4 x <2 x i64>] %b.coerce) {
-; CHECK-LABEL: test_vst4q_lane_s64
-; CHECK: st4 {{{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [4 x <2 x i64>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <2 x i64>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <2 x i64>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <2 x i64>] %b.coerce, 3
-  %0 = bitcast i64* %a to i8*
-  tail call void @llvm.arm.neon.vst4lane.v2i64(i8* %0, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, <2 x i64> %b.coerce.fca.2.extract, <2 x i64> %b.coerce.fca.3.extract, i32 1, i32 8)
-  ret void
-}
-
-define void @test_vst4q_lane_f32(float* %a, [4 x <4 x float>] %b.coerce) {
-; CHECK-LABEL: test_vst4q_lane_f32
-; CHECK: st4 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [4 x <4 x float>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <4 x float>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <4 x float>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <4 x float>] %b.coerce, 3
-  %0 = bitcast float* %a to i8*
-  tail call void @llvm.arm.neon.vst4lane.v4f32(i8* %0, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, <4 x float> %b.coerce.fca.2.extract, <4 x float> %b.coerce.fca.3.extract, i32 3, i32 4)
-  ret void
-}
-
-define void @test_vst4q_lane_f64(double* %a, [4 x <2 x double>] %b.coerce) {
-; CHECK-LABEL: test_vst4q_lane_f64
-; CHECK: st4 {{{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [4 x <2 x double>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <2 x double>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <2 x double>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <2 x double>] %b.coerce, 3
-  %0 = bitcast double* %a to i8*
-  tail call void @llvm.arm.neon.vst4lane.v2f64(i8* %0, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, <2 x double> %b.coerce.fca.2.extract, <2 x double> %b.coerce.fca.3.extract, i32 1, i32 8)
-  ret void
-}
-
-define void @test_vst4_lane_s8(i8* %a, [4 x <8 x i8>] %b.coerce) {
-; CHECK-LABEL: test_vst4_lane_s8
-; CHECK: st4 {{{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [4 x <8 x i8>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <8 x i8>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <8 x i8>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <8 x i8>] %b.coerce, 3
-  tail call void @llvm.arm.neon.vst4lane.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, <8 x i8> %b.coerce.fca.2.extract, <8 x i8> %b.coerce.fca.3.extract, i32 7, i32 1)
-  ret void
-}
-
-define void @test_vst4_lane_s16(i16* %a, [4 x <4 x i16>] %b.coerce) {
-; CHECK-LABEL: test_vst4_lane_s16
-; CHECK: st4 {{{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [4 x <4 x i16>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <4 x i16>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <4 x i16>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <4 x i16>] %b.coerce, 3
-  %0 = bitcast i16* %a to i8*
-  tail call void @llvm.arm.neon.vst4lane.v4i16(i8* %0, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, <4 x i16> %b.coerce.fca.2.extract, <4 x i16> %b.coerce.fca.3.extract, i32 3, i32 2)
-  ret void
-}
-
-define void @test_vst4_lane_s32(i32* %a, [4 x <2 x i32>] %b.coerce) {
-; CHECK-LABEL: test_vst4_lane_s32
-; CHECK: st4 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [4 x <2 x i32>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <2 x i32>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <2 x i32>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <2 x i32>] %b.coerce, 3
-  %0 = bitcast i32* %a to i8*
-  tail call void @llvm.arm.neon.vst4lane.v2i32(i8* %0, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, <2 x i32> %b.coerce.fca.2.extract, <2 x i32> %b.coerce.fca.3.extract, i32 1, i32 4)
-  ret void
-}
-
-define void @test_vst4_lane_s64(i64* %a, [4 x <1 x i64>] %b.coerce) {
-; CHECK-LABEL: test_vst4_lane_s64
-; CHECK: st4 {{{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [4 x <1 x i64>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <1 x i64>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <1 x i64>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <1 x i64>] %b.coerce, 3
-  %0 = bitcast i64* %a to i8*
-  tail call void @llvm.arm.neon.vst4lane.v1i64(i8* %0, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, <1 x i64> %b.coerce.fca.2.extract, <1 x i64> %b.coerce.fca.3.extract, i32 0, i32 8)
-  ret void
-}
-
-define void @test_vst4_lane_f32(float* %a, [4 x <2 x float>] %b.coerce) {
-; CHECK-LABEL: test_vst4_lane_f32
-; CHECK: st4 {{{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [4 x <2 x float>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <2 x float>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <2 x float>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <2 x float>] %b.coerce, 3
-  %0 = bitcast float* %a to i8*
-  tail call void @llvm.arm.neon.vst4lane.v2f32(i8* %0, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, <2 x float> %b.coerce.fca.2.extract, <2 x float> %b.coerce.fca.3.extract, i32 1, i32 4)
-  ret void
-}
-
-define void @test_vst4_lane_f64(double* %a, [4 x <1 x double>] %b.coerce) {
-; CHECK-LABEL: test_vst4_lane_f64
-; CHECK: st4 {{{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d}[{{[0-9]+}}], [x0]
-entry:
-  %b.coerce.fca.0.extract = extractvalue [4 x <1 x double>] %b.coerce, 0
-  %b.coerce.fca.1.extract = extractvalue [4 x <1 x double>] %b.coerce, 1
-  %b.coerce.fca.2.extract = extractvalue [4 x <1 x double>] %b.coerce, 2
-  %b.coerce.fca.3.extract = extractvalue [4 x <1 x double>] %b.coerce, 3
-  %0 = bitcast double* %a to i8*
-  tail call void @llvm.arm.neon.vst4lane.v1f64(i8* %0, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, <1 x double> %b.coerce.fca.2.extract, <1 x double> %b.coerce.fca.3.extract, i32 0, i32 8)
-  ret void
-}
-
-declare { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2lane.v16i8(i8*, <16 x i8>, <16 x i8>, i32, i32)
-declare { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32)
-declare { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32)
-declare { <2 x i64>, <2 x i64> } @llvm.arm.neon.vld2lane.v2i64(i8*, <2 x i64>, <2 x i64>, i32, i32)
-declare { <4 x float>, <4 x float> } @llvm.arm.neon.vld2lane.v4f32(i8*, <4 x float>, <4 x float>, i32, i32)
-declare { <2 x double>, <2 x double> } @llvm.arm.neon.vld2lane.v2f64(i8*, <2 x double>, <2 x double>, i32, i32)
-declare { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32)
-declare { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32)
-declare { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32)
-declare { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64(i8*, i32)
-declare { <2 x float>, <2 x float> } @llvm.arm.neon.vld2lane.v2f32(i8*, <2 x float>, <2 x float>, i32, i32)
-declare { <1 x double>, <1 x double> } @llvm.arm.neon.vld2.v1f64(i8*, i32)
-declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3lane.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32, i32)
-declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32)
-declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32)
-declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld3lane.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, i32, i32)
-declare { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32)
-declare { <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld3lane.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, i32, i32)
-declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32)
-declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32)
-declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32)
-declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64(i8*, i32)
-declare { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32)
-declare { <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld3.v1f64(i8*, i32)
-declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4lane.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32, i32)
-declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32)
-declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32)
-declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld4lane.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i32, i32)
-declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32)
-declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4lane.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, i32, i32)
-declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32)
-declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32)
-declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32)
-declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64(i8*, i32)
-declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32)
-declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld4.v1f64(i8*, i32)
-declare { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2lane.v1i64(i8*, <1 x i64>, <1 x i64>, i32, i32)
-declare { <1 x double>, <1 x double> } @llvm.arm.neon.vld2lane.v1f64(i8*, <1 x double>, <1 x double>, i32, i32)
-declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3lane.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32, i32)
-declare { <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld3lane.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, i32, i32)
-declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4lane.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32, i32)
-declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld4lane.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, <1 x double>, i32, i32)
-declare void @llvm.arm.neon.vst2lane.v16i8(i8*, <16 x i8>, <16 x i8>, i32, i32)
-declare void @llvm.arm.neon.vst2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32)
-declare void @llvm.arm.neon.vst2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32)
-declare void @llvm.arm.neon.vst2lane.v2i64(i8*, <2 x i64>, <2 x i64>, i32, i32)
-declare void @llvm.arm.neon.vst2lane.v4f32(i8*, <4 x float>, <4 x float>, i32, i32)
-declare void @llvm.arm.neon.vst2lane.v2f64(i8*, <2 x double>, <2 x double>, i32, i32)
-declare void @llvm.arm.neon.vst2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32)
-declare void @llvm.arm.neon.vst2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32)
-declare void @llvm.arm.neon.vst2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32)
-declare void @llvm.arm.neon.vst2lane.v1i64(i8*, <1 x i64>, <1 x i64>, i32, i32)
-declare void @llvm.arm.neon.vst2lane.v2f32(i8*, <2 x float>, <2 x float>, i32, i32)
-declare void @llvm.arm.neon.vst2lane.v1f64(i8*, <1 x double>, <1 x double>, i32, i32)
-declare void @llvm.arm.neon.vst3lane.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32, i32)
-declare void @llvm.arm.neon.vst3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32)
-declare void @llvm.arm.neon.vst3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32)
-declare void @llvm.arm.neon.vst3lane.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, i32, i32)
-declare void @llvm.arm.neon.vst3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32)
-declare void @llvm.arm.neon.vst3lane.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, i32, i32)
-declare void @llvm.arm.neon.vst3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32)
-declare void @llvm.arm.neon.vst3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32)
-declare void @llvm.arm.neon.vst3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32)
-declare void @llvm.arm.neon.vst3lane.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32, i32)
-declare void @llvm.arm.neon.vst3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32)
-declare void @llvm.arm.neon.vst3lane.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, i32, i32)
-declare void @llvm.arm.neon.vst4lane.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32, i32)
-declare void @llvm.arm.neon.vst4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32)
-declare void @llvm.arm.neon.vst4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32)
-declare void @llvm.arm.neon.vst4lane.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i32, i32)
-declare void @llvm.arm.neon.vst4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32)
-declare void @llvm.arm.neon.vst4lane.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, i32, i32)
-declare void @llvm.arm.neon.vst4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32)
-declare void @llvm.arm.neon.vst4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32)
-declare void @llvm.arm.neon.vst4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32)
-declare void @llvm.arm.neon.vst4lane.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32, i32)
-declare void @llvm.arm.neon.vst4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32)
-declare void @llvm.arm.neon.vst4lane.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, <1 x double>, i32, i32)
-
-define %struct.int8x16x2_t @test_vld2q_lane_s8(i8* readonly %ptr, [2 x <16 x i8>] %src.coerce) {
-; CHECK-LABEL: test_vld2q_lane_s8
-; CHECK: ld2 {{{v[0-9]+}}.b, {{v[0-9]+}}.b}[15], [x0]
-entry:
-  %src.coerce.fca.0.extract = extractvalue [2 x <16 x i8>] %src.coerce, 0
-  %src.coerce.fca.1.extract = extractvalue [2 x <16 x i8>] %src.coerce, 1
-  %vld2_lane = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2lane.v16i8(i8* %ptr, <16 x i8> %src.coerce.fca.0.extract, <16 x i8> %src.coerce.fca.1.extract, i32 15, i32 1)
-  %vld2_lane.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2_lane, 0
-  %vld2_lane.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2_lane, 1
-  %.fca.0.0.insert = insertvalue %struct.int8x16x2_t undef, <16 x i8> %vld2_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int8x16x2_t %.fca.0.0.insert, <16 x i8> %vld2_lane.fca.1.extract, 0, 1
-  ret %struct.int8x16x2_t %.fca.0.1.insert
-}
-
-define %struct.uint8x16x2_t @test_vld2q_lane_u8(i8* readonly %ptr, [2 x <16 x i8>] %src.coerce) {
-; CHECK-LABEL: test_vld2q_lane_u8
-; CHECK: ld2 {{{v[0-9]+}}.b, {{v[0-9]+}}.b}[15], [x0]
-entry:
-  %src.coerce.fca.0.extract = extractvalue [2 x <16 x i8>] %src.coerce, 0
-  %src.coerce.fca.1.extract = extractvalue [2 x <16 x i8>] %src.coerce, 1
-  %vld2_lane = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2lane.v16i8(i8* %ptr, <16 x i8> %src.coerce.fca.0.extract, <16 x i8> %src.coerce.fca.1.extract, i32 15, i32 1)
-  %vld2_lane.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2_lane, 0
-  %vld2_lane.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2_lane, 1
-  %.fca.0.0.insert = insertvalue %struct.uint8x16x2_t undef, <16 x i8> %vld2_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.uint8x16x2_t %.fca.0.0.insert, <16 x i8> %vld2_lane.fca.1.extract, 0, 1
-  ret %struct.uint8x16x2_t %.fca.0.1.insert
-}
-
-define %struct.poly8x16x2_t @test_vld2q_lane_p8(i8* readonly %ptr, [2 x <16 x i8>] %src.coerce) {
-; CHECK-LABEL: test_vld2q_lane_p8
-; CHECK: ld2 {{{v[0-9]+}}.b, {{v[0-9]+}}.b}[15], [x0]
-entry:
-  %src.coerce.fca.0.extract = extractvalue [2 x <16 x i8>] %src.coerce, 0
-  %src.coerce.fca.1.extract = extractvalue [2 x <16 x i8>] %src.coerce, 1
-  %vld2_lane = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2lane.v16i8(i8* %ptr, <16 x i8> %src.coerce.fca.0.extract, <16 x i8> %src.coerce.fca.1.extract, i32 15, i32 1)
-  %vld2_lane.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2_lane, 0
-  %vld2_lane.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2_lane, 1
-  %.fca.0.0.insert = insertvalue %struct.poly8x16x2_t undef, <16 x i8> %vld2_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.poly8x16x2_t %.fca.0.0.insert, <16 x i8> %vld2_lane.fca.1.extract, 0, 1
-  ret %struct.poly8x16x2_t %.fca.0.1.insert
-}
-
-define %struct.int8x16x3_t @test_vld3q_lane_s8(i8* readonly %ptr, [3 x <16 x i8>] %src.coerce) {
-; CHECK-LABEL: test_vld3q_lane_s8
-; CHECK: ld3 {{{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b}[15], [x0]
-entry:
-  %src.coerce.fca.0.extract = extractvalue [3 x <16 x i8>] %src.coerce, 0
-  %src.coerce.fca.1.extract = extractvalue [3 x <16 x i8>] %src.coerce, 1
-  %src.coerce.fca.2.extract = extractvalue [3 x <16 x i8>] %src.coerce, 2
-  %vld3_lane = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3lane.v16i8(i8* %ptr, <16 x i8> %src.coerce.fca.0.extract, <16 x i8> %src.coerce.fca.1.extract, <16 x i8> %src.coerce.fca.2.extract, i32 15, i32 1)
-  %vld3_lane.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 0
-  %vld3_lane.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 1
-  %vld3_lane.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 2
-  %.fca.0.0.insert = insertvalue %struct.int8x16x3_t undef, <16 x i8> %vld3_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.int8x16x3_t %.fca.0.0.insert, <16 x i8> %vld3_lane.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.int8x16x3_t %.fca.0.1.insert, <16 x i8> %vld3_lane.fca.2.extract, 0, 2
-  ret %struct.int8x16x3_t %.fca.0.2.insert
-}
-
-define %struct.uint8x16x3_t @test_vld3q_lane_u8(i8* readonly %ptr, [3 x <16 x i8>] %src.coerce) {
-; CHECK-LABEL: test_vld3q_lane_u8
-; CHECK: ld3 {{{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b}[15], [x0]
-entry:
-  %src.coerce.fca.0.extract = extractvalue [3 x <16 x i8>] %src.coerce, 0
-  %src.coerce.fca.1.extract = extractvalue [3 x <16 x i8>] %src.coerce, 1
-  %src.coerce.fca.2.extract = extractvalue [3 x <16 x i8>] %src.coerce, 2
-  %vld3_lane = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3lane.v16i8(i8* %ptr, <16 x i8> %src.coerce.fca.0.extract, <16 x i8> %src.coerce.fca.1.extract, <16 x i8> %src.coerce.fca.2.extract, i32 15, i32 1)
-  %vld3_lane.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 0
-  %vld3_lane.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 1
-  %vld3_lane.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 2
-  %.fca.0.0.insert = insertvalue %struct.uint8x16x3_t undef, <16 x i8> %vld3_lane.fca.0.extract, 0, 0
-  %.fca.0.1.insert = insertvalue %struct.uint8x16x3_t %.fca.0.0.insert, <16 x i8> %vld3_lane.fca.1.extract, 0, 1
-  %.fca.0.2.insert = insertvalue %struct.uint8x16x3_t %.fca.0.1.insert, <16 x i8> %vld3_lane.fca.2.extract, 0, 2
-  ret %struct.uint8x16x3_t %.fca.0.2.insert
-}
-
diff --git a/test/CodeGen/AArch64/neon-simd-ldst.ll b/test/CodeGen/AArch64/neon-simd-ldst.ll
deleted file mode 100644
index afc0901..0000000
--- a/test/CodeGen/AArch64/neon-simd-ldst.ll
+++ /dev/null
@@ -1,164 +0,0 @@
-; RUN: llc < %s -O2 -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
-
-define void @test_ldstq_4v(i8* noalias %io, i32 %count) {
-; CHECK-LABEL: test_ldstq_4v
-; CHECK: ld4     {v0.16b, v1.16b, v2.16b, v3.16b}, [x0]
-; CHECK: st4     {v0.16b, v1.16b, v2.16b, v3.16b}, [x0]
-entry:
-  %tobool62 = icmp eq i32 %count, 0
-  br i1 %tobool62, label %while.end, label %while.body
-
-while.body:                                       ; preds = %entry, %while.body
-  %count.addr.063 = phi i32 [ %dec, %while.body ], [ %count, %entry ]
-  %dec = add i32 %count.addr.063, -1
-  %vld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8* %io, i32 1)
-  %vld4.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 0
-  %vld4.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 1
-  %vld4.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 2
-  %vld4.fca.3.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 3
-  tail call void @llvm.arm.neon.vst4.v16i8(i8* %io, <16 x i8> %vld4.fca.0.extract, <16 x i8> %vld4.fca.1.extract, <16 x i8> %vld4.fca.2.extract, <16 x i8> %vld4.fca.3.extract, i32 1)
-  %tobool = icmp eq i32 %dec, 0
-  br i1 %tobool, label %while.end, label %while.body
-
-while.end:                                        ; preds = %while.body, %entry
-  ret void
-}
-
-declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8*, i32)
-
-declare void @llvm.arm.neon.vst4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32)
-
-define void @test_ldstq_3v(i8* noalias %io, i32 %count) {
-; CHECK-LABEL: test_ldstq_3v
-; CHECK: ld3     {v0.16b, v1.16b, v2.16b}, [x0]
-; CHECK: st3     {v0.16b, v1.16b, v2.16b}, [x0]
-entry:
-  %tobool47 = icmp eq i32 %count, 0
-  br i1 %tobool47, label %while.end, label %while.body
-
-while.body:                                       ; preds = %entry, %while.body
-  %count.addr.048 = phi i32 [ %dec, %while.body ], [ %count, %entry ]
-  %dec = add i32 %count.addr.048, -1
-  %vld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8* %io, i32 1)
-  %vld3.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 0
-  %vld3.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 1
-  %vld3.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 2
-  tail call void @llvm.arm.neon.vst3.v16i8(i8* %io, <16 x i8> %vld3.fca.0.extract, <16 x i8> %vld3.fca.1.extract, <16 x i8> %vld3.fca.2.extract, i32 1)
-  %tobool = icmp eq i32 %dec, 0
-  br i1 %tobool, label %while.end, label %while.body
-
-while.end:                                        ; preds = %while.body, %entry
-  ret void
-}
-
-declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8*, i32)
-
-declare void @llvm.arm.neon.vst3.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32)
-
-define void @test_ldstq_2v(i8* noalias %io, i32 %count) {
-; CHECK-LABEL: test_ldstq_2v
-; CHECK: ld2     {v0.16b, v1.16b}, [x0]
-; CHECK: st2     {v0.16b, v1.16b}, [x0]
-entry:
-  %tobool22 = icmp eq i32 %count, 0
-  br i1 %tobool22, label %while.end, label %while.body
-
-while.body:                                       ; preds = %entry, %while.body
-  %count.addr.023 = phi i32 [ %dec, %while.body ], [ %count, %entry ]
-  %dec = add i32 %count.addr.023, -1
-  %vld2 = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8(i8* %io, i32 1)
-  %vld2.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2, 0
-  %vld2.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2, 1
-  tail call void @llvm.arm.neon.vst2.v16i8(i8* %io, <16 x i8> %vld2.fca.0.extract, <16 x i8> %vld2.fca.1.extract, i32 1)
-  %tobool = icmp eq i32 %dec, 0
-  br i1 %tobool, label %while.end, label %while.body
-
-while.end:                                        ; preds = %while.body, %entry
-  ret void
-}
-
-declare { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8(i8*, i32)
-
-declare void @llvm.arm.neon.vst2.v16i8(i8*, <16 x i8>, <16 x i8>, i32)
-
-define void @test_ldst_4v(i8* noalias %io, i32 %count) {
-; CHECK-LABEL: test_ldst_4v
-; CHECK: ld4     {v0.8b, v1.8b, v2.8b, v3.8b}, [x0]
-; CHECK: st4     {v0.8b, v1.8b, v2.8b, v3.8b}, [x0]
-entry:
-  %tobool42 = icmp eq i32 %count, 0
-  br i1 %tobool42, label %while.end, label %while.body
-
-while.body:                                       ; preds = %entry, %while.body
-  %count.addr.043 = phi i32 [ %dec, %while.body ], [ %count, %entry ]
-  %dec = add i32 %count.addr.043, -1
-  %vld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8(i8* %io, i32 1)
-  %vld4.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 0
-  %vld4.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 1
-  %vld4.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 2
-  %vld4.fca.3.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 3
-  tail call void @llvm.arm.neon.vst4.v8i8(i8* %io, <8 x i8> %vld4.fca.0.extract, <8 x i8> %vld4.fca.1.extract, <8 x i8> %vld4.fca.2.extract, <8 x i8> %vld4.fca.3.extract, i32 1)
-  %tobool = icmp eq i32 %dec, 0
-  br i1 %tobool, label %while.end, label %while.body
-
-while.end:                                        ; preds = %while.body, %entry
-  ret void
-}
-
-declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8(i8*, i32)
-
-declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32)
-
-define void @test_ldst_3v(i8* noalias %io, i32 %count) {
-; CHECK-LABEL: test_ldst_3v
-; CHECK: ld3     {v0.8b, v1.8b, v2.8b}, [x0]
-; CHECK: st3     {v0.8b, v1.8b, v2.8b}, [x0]
-entry:
-  %tobool32 = icmp eq i32 %count, 0
-  br i1 %tobool32, label %while.end, label %while.body
-
-while.body:                                       ; preds = %entry, %while.body
-  %count.addr.033 = phi i32 [ %dec, %while.body ], [ %count, %entry ]
-  %dec = add i32 %count.addr.033, -1
-  %vld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8(i8* %io, i32 1)
-  %vld3.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 0
-  %vld3.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 1
-  %vld3.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 2
-  tail call void @llvm.arm.neon.vst3.v8i8(i8* %io, <8 x i8> %vld3.fca.0.extract, <8 x i8> %vld3.fca.1.extract, <8 x i8> %vld3.fca.2.extract, i32 1)
-  %tobool = icmp eq i32 %dec, 0
-  br i1 %tobool, label %while.end, label %while.body
-
-while.end:                                        ; preds = %while.body, %entry
-  ret void
-}
-
-declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8(i8*, i32)
-
-declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32)
-
-define void @test_ldst_2v(i8* noalias %io, i32 %count) {
-; CHECK-LABEL: test_ldst_2v
-; CHECK: ld2     {v0.8b, v1.8b}, [x0]
-; CHECK: st2     {v0.8b, v1.8b}, [x0]
-entry:
-  %tobool22 = icmp eq i32 %count, 0
-  br i1 %tobool22, label %while.end, label %while.body
-
-while.body:                                       ; preds = %entry, %while.body
-  %count.addr.023 = phi i32 [ %dec, %while.body ], [ %count, %entry ]
-  %dec = add i32 %count.addr.023, -1
-  %vld2 = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8(i8* %io, i32 1)
-  %vld2.fca.0.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2, 0
-  %vld2.fca.1.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2, 1
-  tail call void @llvm.arm.neon.vst2.v8i8(i8* %io, <8 x i8> %vld2.fca.0.extract, <8 x i8> %vld2.fca.1.extract, i32 1)
-  %tobool = icmp eq i32 %dec, 0
-  br i1 %tobool, label %while.end, label %while.body
-
-while.end:                                        ; preds = %while.body, %entry
-  ret void
-}
-
-declare { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8(i8*, i32)
-
-declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32)
-
diff --git a/test/CodeGen/AArch64/neon-simd-post-ldst-multi-elem.ll b/test/CodeGen/AArch64/neon-simd-post-ldst-multi-elem.ll
deleted file mode 100644
index 156fe1d..0000000
--- a/test/CodeGen/AArch64/neon-simd-post-ldst-multi-elem.ll
+++ /dev/null
@@ -1,354 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
-
-;Check for a post-increment updating load.
-define <4 x i16> @test_vld1_fx_update(i16** %ptr) nounwind {
-; CHECK: test_vld1_fx_update
-; CHECK: ld1 {v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}], #8
-  %A = load i16** %ptr
-  %tmp0 = bitcast i16* %A to i8*
-  %tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %tmp0, i32 2)
-  %tmp2 = getelementptr i16* %A, i32 4
-  store i16* %tmp2, i16** %ptr
-  ret <4 x i16> %tmp1
-}
-
-;Check for a post-increment updating load with register increment.
-define <2 x i32> @test_vld1_reg_update(i32** %ptr, i32 %inc) nounwind {
-; CHECK: test_vld1_reg_update
-; CHECK: ld1 {v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}], x{{[0-9]+}}
-  %A = load i32** %ptr
-  %tmp0 = bitcast i32* %A to i8*
-  %tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %tmp0, i32 4)
-  %tmp2 = getelementptr i32* %A, i32 %inc
-  store i32* %tmp2, i32** %ptr
-  ret <2 x i32> %tmp1
-}
-
-define <2 x float> @test_vld2_fx_update(float** %ptr) nounwind {
-; CHECK: test_vld2_fx_update
-; CHECK: ld2 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}], #16
-  %A = load float** %ptr
-  %tmp0 = bitcast float* %A to i8*
-  %tmp1 = call { <2 x float>, <2 x float> } @llvm.arm.neon.vld2.v2f32(i8* %tmp0, i32 4)
-  %tmp2 = extractvalue { <2 x float>, <2 x float> } %tmp1, 0
-  %tmp3 = getelementptr float* %A, i32 4
-  store float* %tmp3, float** %ptr
-  ret <2 x float> %tmp2
-}
-
-define <16 x i8> @test_vld2_reg_update(i8** %ptr, i32 %inc) nounwind {
-; CHECK: test_vld2_reg_update
-; CHECK: ld2 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}], x{{[0-9]+}}
-  %A = load i8** %ptr
-  %tmp0 = call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8(i8* %A, i32 1)
-  %tmp1 = extractvalue { <16 x i8>, <16 x i8> } %tmp0, 0
-  %tmp2 = getelementptr i8* %A, i32 %inc
-  store i8* %tmp2, i8** %ptr
-  ret <16 x i8> %tmp1
-}
-
-define <4 x i32> @test_vld3_fx_update(i32** %ptr) nounwind {
-; CHECK: test_vld3_fx_update
-; CHECK: ld3 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}], #48
-  %A = load i32** %ptr
-  %tmp0 = bitcast i32* %A to i8*
-  %tmp1 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32(i8* %tmp0, i32 4)
-  %tmp2 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %tmp1, 0
-  %tmp3 = getelementptr i32* %A, i32 12
-  store i32* %tmp3, i32** %ptr
-  ret <4 x i32> %tmp2
-}
-
-define <4 x i16> @test_vld3_reg_update(i16** %ptr, i32 %inc) nounwind {
-; CHECK: test_vld3_reg_update
-; CHECK: ld3 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}], x{{[0-9]+}}
-  %A = load i16** %ptr
-  %tmp0 = bitcast i16* %A to i8*
-  %tmp1 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16(i8* %tmp0, i32 2)
-  %tmp2 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %tmp1, 0
-  %tmp3 = getelementptr i16* %A, i32 %inc
-  store i16* %tmp3, i16** %ptr
-  ret <4 x i16> %tmp2
-}
-
-define <8 x i16> @test_vld4_fx_update(i16** %ptr) nounwind {
-; CHECK: test_vld4_fx_update
-; CHECK: ld4 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [x{{[0-9]+|sp}}], #64
-  %A = load i16** %ptr
-  %tmp0 = bitcast i16* %A to i8*
-  %tmp1 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16(i8* %tmp0, i32 8)
-  %tmp2 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %tmp1, 0
-  %tmp3 = getelementptr i16* %A, i32 32
-  store i16* %tmp3, i16** %ptr
-  ret <8 x i16> %tmp2
-}
-
-define <8 x i8> @test_vld4_reg_update(i8** %ptr, i32 %inc) nounwind {
-; CHECK: test_vld4_reg_update
-; CHECK: ld4 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}], x{{[0-9]+}}
-  %A = load i8** %ptr
-  %tmp0 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8(i8* %A, i32 1)
-  %tmp1 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %tmp0, 0
-  %tmp2 = getelementptr i8* %A, i32 %inc
-  store i8* %tmp2, i8** %ptr
-  ret <8 x i8> %tmp1
-}
-
-define void @test_vst1_fx_update(float** %ptr, <2 x float> %B) nounwind {
-; CHECK: test_vst1_fx_update
-; CHECK: st1 {v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}], #8
-  %A = load float** %ptr
-  %tmp0 = bitcast float* %A to i8*
-  call void @llvm.arm.neon.vst1.v2f32(i8* %tmp0, <2 x float> %B, i32 4)
-  %tmp2 = getelementptr float* %A, i32 2
-  store float* %tmp2, float** %ptr
-  ret void
-}
-
-define void @test_vst1_reg_update(i16** %ptr, <8 x i16> %B, i32 %inc) nounwind {
-; CHECK: test_vst1_reg_update
-; CHECK: st1 {v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}], x{{[0-9]+}}
-  %A = load i16** %ptr
-  %tmp0 = bitcast i16* %A to i8*
-  call void @llvm.arm.neon.vst1.v8i16(i8* %tmp0, <8 x i16> %B, i32 2)
-  %tmp1 = getelementptr i16* %A, i32 %inc
-  store i16* %tmp1, i16** %ptr
-  ret void
-}
-
-define void @test_vst2_fx_update(i64** %ptr, <1 x i64> %B) nounwind {
-; CHECK: test_vst2_fx_update
-; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}], #16
-  %A = load i64** %ptr
-  %tmp0 = bitcast i64* %A to i8*
-  call void @llvm.arm.neon.vst2.v1i64(i8* %tmp0, <1 x i64> %B, <1 x i64> %B, i32 8)
-  %tmp1 = getelementptr i64* %A, i32 2
-  store i64* %tmp1, i64** %ptr
-  ret void
-}
-
-define void @test_vst2_reg_update(i8** %ptr, <8 x i8> %B, i32 %inc) nounwind {
-; CHECK: test_vst2_reg_update
-; CHECK: st2 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}], x{{[0-9]+}}
-  %A = load i8** %ptr
-  call void @llvm.arm.neon.vst2.v8i8(i8* %A, <8 x i8> %B, <8 x i8> %B, i32 4)
-  %tmp0 = getelementptr i8* %A, i32 %inc
-  store i8* %tmp0, i8** %ptr
-  ret void
-}
-
-define void @test_vst3_fx_update(i32** %ptr, <2 x i32> %B) nounwind {
-; CHECK: test_vst3_fx_update
-; CHECK: st3 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}], #24
-  %A = load i32** %ptr
-  %tmp0 = bitcast i32* %A to i8*
-  call void @llvm.arm.neon.vst3.v2i32(i8* %tmp0, <2 x i32> %B, <2 x i32> %B, <2 x i32> %B, i32 4)
-  %tmp1 = getelementptr i32* %A, i32 6
-  store i32* %tmp1, i32** %ptr
-  ret void
-}
-
-define void @test_vst3_reg_update(i16** %ptr, <8 x i16> %B, i32 %inc) nounwind {
-; CHECK: test_vst3_reg_update
-; CHECK: st3 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}], x{{[0-9]+}}
-  %A = load i16** %ptr
-  %tmp0 = bitcast i16* %A to i8*
-  call void @llvm.arm.neon.vst3.v8i16(i8* %tmp0, <8 x i16> %B, <8 x i16> %B, <8 x i16> %B, i32 2)
-  %tmp1 = getelementptr i16* %A, i32 %inc
-  store i16* %tmp1, i16** %ptr
-  ret void
-}
-
-define void @test_vst4_fx_update(float** %ptr, <4 x float> %B) nounwind {
-; CHECK: test_vst4_fx_update
-; CHECK: st4 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}], #64
-  %A = load float** %ptr
-  %tmp0 = bitcast float* %A to i8*
-  call void @llvm.arm.neon.vst4.v4f32(i8* %tmp0, <4 x float> %B, <4 x float> %B, <4 x float> %B, <4 x float> %B, i32 4)
-  %tmp1 = getelementptr float* %A, i32 16
-  store float* %tmp1, float** %ptr
-  ret void
-}
-
-define void @test_vst4_reg_update(i8** %ptr, <8 x i8> %B, i32 %inc) nounwind {
-; CHECK: test_vst4_reg_update
-; CHECK: st4 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}], x{{[0-9]+}}
-  %A = load i8** %ptr
-  call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %B, <8 x i8> %B, <8 x i8> %B, <8 x i8> %B, i32 1)
-  %tmp0 = getelementptr i8* %A, i32 %inc
-  store i8* %tmp0, i8** %ptr
-  ret void
-}
-
-
-declare <4 x i16> @llvm.arm.neon.vld1.v4i16(i8*, i32)
-declare <2 x i32> @llvm.arm.neon.vld1.v2i32(i8*, i32)
-declare { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8(i8*, i32)
-declare { <2 x float>, <2 x float> } @llvm.arm.neon.vld2.v2f32(i8*, i32)
-declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16(i8*, i32)
-declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32(i8*, i32)
-declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16(i8*, i32)
-declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8(i8*, i32)
-
-declare void @llvm.arm.neon.vst1.v2f32(i8*, <2 x float>, i32)
-declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32)
-declare void @llvm.arm.neon.vst2.v1i64(i8*, <1 x i64>, <1 x i64>, i32)
-declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32)
-declare void @llvm.arm.neon.vst3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32)
-declare void @llvm.arm.neon.vst3.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32)
-declare void @llvm.arm.neon.vst4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32)
-declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32)
-
-define <16 x i8> @test_vld1x2_fx_update(i8* %a, i8** %ptr) {
-; CHECK: test_vld1x2_fx_update
-; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}], #32
-  %1 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x2.v16i8(i8* %a, i32 1)
-  %2 = extractvalue { <16 x i8>, <16 x i8> } %1, 0
-  %tmp1 = getelementptr i8* %a, i32 32
-  store i8* %tmp1, i8** %ptr
-  ret <16 x i8> %2
-}
-
-define <8 x i16> @test_vld1x2_reg_update(i16* %a, i16** %ptr, i32 %inc) {
-; CHECK: test_vld1x2_reg_update
-; CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [x{{[0-9]+|sp}}], x{{[0-9]+}}
-  %1 = bitcast i16* %a to i8*
-  %2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x2.v8i16(i8* %1, i32 2)
-  %3 = extractvalue { <8 x i16>, <8 x i16> } %2, 0
-  %tmp1 = getelementptr i16* %a, i32 %inc
-  store i16* %tmp1, i16** %ptr
-  ret <8 x i16> %3
-}
-
-define <2 x i64> @test_vld1x3_fx_update(i64* %a, i64** %ptr) {
-; CHECK: test_vld1x3_fx_update
-; CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}], #48
-  %1 = bitcast i64* %a to i8*
-  %2 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x3.v2i64(i8* %1, i32 8)
-  %3 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %2, 0
-  %tmp1 = getelementptr i64* %a, i32 6
-  store i64* %tmp1, i64** %ptr
-  ret  <2 x i64> %3
-}
-
-define <8 x i16> @test_vld1x3_reg_update(i16* %a, i16** %ptr, i32 %inc) {
-; CHECK: test_vld1x3_reg_update
-; CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [x{{[0-9]+|sp}}], x{{[0-9]+}}
-  %1 = bitcast i16* %a to i8*
-  %2 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x3.v8i16(i8* %1, i32 2)
-  %3 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %2, 0
-  %tmp1 = getelementptr i16* %a, i32 %inc
-  store i16* %tmp1, i16** %ptr
-  ret <8 x i16> %3
-}
-
-define <4 x float> @test_vld1x4_fx_update(float* %a, float** %ptr) {
-; CHECK: test_vld1x4_fx_update
-; CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}], #64
-  %1 = bitcast float* %a to i8*
-  %2 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x4.v4f32(i8* %1, i32 4)
-  %3 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %2, 0
-  %tmp1 = getelementptr float* %a, i32 16
-  store float* %tmp1, float** %ptr
-  ret <4 x float> %3
-}
-
-define <8 x i8> @test_vld1x4_reg_update(i8* readonly %a, i8** %ptr, i32 %inc) #0 {
-; CHECK: test_vld1x4_reg_update
-; CHECK: ld1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}], x{{[0-9]+}}
-  %1 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x4.v8i8(i8* %a, i32 1)
-  %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 0
-  %tmp1 = getelementptr i8* %a, i32 %inc
-  store i8* %tmp1, i8** %ptr
-  ret <8 x i8> %2
-}
-
-define void @test_vst1x2_fx_update(i8* %a, [2 x <16 x i8>] %b.coerce, i8** %ptr) #2 {
-; CHECK: test_vst1x2_fx_update
-; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}], #32
-  %1 = extractvalue [2 x <16 x i8>] %b.coerce, 0
-  %2 = extractvalue [2 x <16 x i8>] %b.coerce, 1
-  tail call void @llvm.aarch64.neon.vst1x2.v16i8(i8* %a, <16 x i8> %1, <16 x i8> %2, i32 1)
-  %tmp1 = getelementptr i8* %a, i32 32
-  store i8* %tmp1, i8** %ptr
-  ret void
-}
-
-define void @test_vst1x2_reg_update(i16* %a, [2 x <8 x i16>] %b.coerce, i16** %ptr, i32 %inc) #2 {
-; CHECK: test_vst1x2_reg_update
-; CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [x{{[0-9]+|sp}}], x{{[0-9]+}}
-  %1 = extractvalue [2 x <8 x i16>] %b.coerce, 0
-  %2 = extractvalue [2 x <8 x i16>] %b.coerce, 1
-  %3 = bitcast i16* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x2.v8i16(i8* %3, <8 x i16> %1, <8 x i16> %2, i32 2)
-  %tmp1 = getelementptr i16* %a, i32 %inc
-  store i16* %tmp1, i16** %ptr
-  ret void
-}
-
-define void @test_vst1x3_fx_update(i32* %a, [3 x <2 x i32>] %b.coerce, i32** %ptr) #2 {
-; CHECK: test_vst1x3_fx_update
-; CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}], #24
-  %1 = extractvalue [3 x <2 x i32>] %b.coerce, 0
-  %2 = extractvalue [3 x <2 x i32>] %b.coerce, 1
-  %3 = extractvalue [3 x <2 x i32>] %b.coerce, 2
-  %4 = bitcast i32* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x3.v2i32(i8* %4, <2 x i32> %1, <2 x i32> %2, <2 x i32> %3, i32 4)
-  %tmp1 = getelementptr i32* %a, i32 6
-  store i32* %tmp1, i32** %ptr
-  ret void
-}
-
-define void @test_vst1x3_reg_update(i64* %a, [3 x <1 x i64>] %b.coerce, i64** %ptr, i32 %inc) #2 {
-; CHECK: test_vst1x3_reg_update
-; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}], x{{[0-9]+}}
-  %1 = extractvalue [3 x <1 x i64>] %b.coerce, 0
-  %2 = extractvalue [3 x <1 x i64>] %b.coerce, 1
-  %3 = extractvalue [3 x <1 x i64>] %b.coerce, 2
-  %4 = bitcast i64* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x3.v1i64(i8* %4, <1 x i64> %1, <1 x i64> %2, <1 x i64> %3, i32 8)
-  %tmp1 = getelementptr i64* %a, i32 %inc
-  store i64* %tmp1, i64** %ptr
-  ret void
-}
-
-define void @test_vst1x4_fx_update(float* %a, [4 x <4 x float>] %b.coerce, float** %ptr) #2 {
-; CHECK: test_vst1x4_fx_update
-; CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}], #64
-  %1 = extractvalue [4 x <4 x float>] %b.coerce, 0
-  %2 = extractvalue [4 x <4 x float>] %b.coerce, 1
-  %3 = extractvalue [4 x <4 x float>] %b.coerce, 2
-  %4 = extractvalue [4 x <4 x float>] %b.coerce, 3
-  %5 = bitcast float* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x4.v4f32(i8* %5, <4 x float> %1, <4 x float> %2, <4 x float> %3, <4 x float> %4, i32 4)
-  %tmp1 = getelementptr float* %a, i32 16
-  store float* %tmp1, float** %ptr
-  ret void
-}
-
-define void @test_vst1x4_reg_update(double* %a, [4 x <2 x double>] %b.coerce, double** %ptr, i32 %inc) #2 {
-; CHECK: test_vst1x4_reg_update
-; CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}], x{{[0-9]+}}
-  %1 = extractvalue [4 x <2 x double>] %b.coerce, 0
-  %2 = extractvalue [4 x <2 x double>] %b.coerce, 1
-  %3 = extractvalue [4 x <2 x double>] %b.coerce, 2
-  %4 = extractvalue [4 x <2 x double>] %b.coerce, 3
-  %5 = bitcast double* %a to i8*
-  tail call void @llvm.aarch64.neon.vst1x4.v2f64(i8* %5, <2 x double> %1, <2 x double> %2, <2 x double> %3, <2 x double> %4, i32 8)
-  %tmp1 = getelementptr double* %a, i32 %inc
-  store double* %tmp1, double** %ptr
-  ret void
-}
-
-declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x2.v16i8(i8*, i32)
-declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x2.v8i16(i8*, i32)
-declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x3.v2i64(i8*, i32)
-declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x3.v8i16(i8*, i32)
-declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x4.v4f32(i8*, i32)
-declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x4.v8i8(i8*, i32)
-declare void @llvm.aarch64.neon.vst1x2.v16i8(i8*, <16 x i8>, <16 x i8>, i32)
-declare void @llvm.aarch64.neon.vst1x2.v8i16(i8*, <8 x i16>, <8 x i16>, i32)
-declare void @llvm.aarch64.neon.vst1x3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32)
-declare void @llvm.aarch64.neon.vst1x3.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32)
-declare void @llvm.aarch64.neon.vst1x4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) #3
-declare void @llvm.aarch64.neon.vst1x4.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, i32) #3
diff --git a/test/CodeGen/AArch64/neon-simd-post-ldst-one.ll b/test/CodeGen/AArch64/neon-simd-post-ldst-one.ll
deleted file mode 100644
index 80a9347..0000000
--- a/test/CodeGen/AArch64/neon-simd-post-ldst-one.ll
+++ /dev/null
@@ -1,319 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
-
-define { [2 x <16 x i8>] } @test_vld2q_dup_fx_update(i8* %a, i8** %ptr) {
-; CHECK-LABEL: test_vld2q_dup_fx_update
-; CHECK: ld2r  {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}], #2
-  %1 = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2lane.v16i8(i8* %a, <16 x i8> undef, <16 x i8> undef, i32 0, i32 1)
-  %2 = extractvalue { <16 x i8>, <16 x i8> } %1, 0
-  %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> zeroinitializer
-  %4 = extractvalue { <16 x i8>, <16 x i8> } %1, 1
-  %5 = shufflevector <16 x i8> %4, <16 x i8> undef, <16 x i32> zeroinitializer
-  %6 = insertvalue { [2 x <16 x i8>] } undef, <16 x i8> %3, 0, 0
-  %7 = insertvalue { [2 x <16 x i8>] } %6, <16 x i8> %5, 0, 1
-  %tmp1 = getelementptr i8* %a, i32 2
-  store i8* %tmp1, i8** %ptr
-  ret { [2 x <16 x i8>] } %7
-}
-
-define { [2 x <4 x i32>] } @test_vld2q_dup_reg_update(i32* %a, i32** %ptr, i32 %inc) {
-; CHECK-LABEL: test_vld2q_dup_reg_update
-; CHECK: ld2r  {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}], x{{[0-9]+}}
-  %1 = bitcast i32* %a to i8*
-  %2 = tail call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32(i8* %1, <4 x i32> undef, <4 x i32> undef, i32 0, i32 4)
-  %3 = extractvalue { <4 x i32>, <4 x i32> } %2, 0
-  %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer
-  %5 = extractvalue { <4 x i32>, <4 x i32> } %2, 1
-  %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <4 x i32> zeroinitializer
-  %7 = insertvalue { [2 x <4 x i32>] } undef, <4 x i32> %4, 0, 0
-  %8 = insertvalue { [2 x <4 x i32>] } %7, <4 x i32> %6, 0, 1
-  %tmp1 = getelementptr i32* %a, i32 %inc
-  store i32* %tmp1, i32** %ptr
-  ret { [2 x <4 x i32>] } %8
-}
-
-define { [3 x <4 x i16>] } @test_vld3_dup_fx_update(i16* %a, i16** %ptr) {
-; CHECK-LABEL: test_vld3_dup_fx_update
-; CHECK: ld3r  {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}], #6
-  %1 = bitcast i16* %a to i8*
-  %2 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16(i8* %1, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
-  %3 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %2, 0
-  %4 = shufflevector <4 x i16> %3, <4 x i16> undef, <4 x i32> zeroinitializer
-  %5 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %2, 1
-  %6 = shufflevector <4 x i16> %5, <4 x i16> undef, <4 x i32> zeroinitializer
-  %7 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %2, 2
-  %8 = shufflevector <4 x i16> %7, <4 x i16> undef, <4 x i32> zeroinitializer
-  %9 = insertvalue { [3 x <4 x i16>] }  undef, <4 x i16> %4, 0, 0
-  %10 = insertvalue { [3 x <4 x i16>] }  %9, <4 x i16> %6, 0, 1
-  %11 = insertvalue { [3 x <4 x i16>] }  %10, <4 x i16> %8, 0, 2
-  %tmp1 = getelementptr i16* %a, i32 3
-  store i16* %tmp1, i16** %ptr
-  ret { [3 x <4 x i16>] }  %11
-}
-
-define { [3 x <8 x i8>] } @test_vld3_dup_reg_update(i8* %a, i8** %ptr, i32 %inc) {
-; CHECK-LABEL: test_vld3_dup_reg_update
-; CHECK: ld3r  {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}], x{{[0-9]+}}
-  %1 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8(i8* %a, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1)
-  %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 0
-  %3 = shufflevector <8 x i8> %2, <8 x i8> undef, <8 x i32> zeroinitializer
-  %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 1
-  %5 = shufflevector <8 x i8> %4, <8 x i8> undef, <8 x i32> zeroinitializer
-  %6 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 2
-  %7 = shufflevector <8 x i8> %6, <8 x i8> undef, <8 x i32> zeroinitializer
-  %8 = insertvalue { [3 x <8 x i8>] } undef, <8 x i8> %3, 0, 0
-  %9 = insertvalue { [3 x <8 x i8>] } %8, <8 x i8> %5, 0, 1
-  %10 = insertvalue { [3 x <8 x i8>] } %9, <8 x i8> %7, 0, 2
-  %tmp1 = getelementptr i8* %a, i32 %inc
-  store i8* %tmp1, i8** %ptr
-  ret { [3 x <8 x i8>] }%10
-}
-
-define { [4 x <2 x i32>] } @test_vld4_dup_fx_update(i32* %a, i32** %ptr) #0 {
-; CHECK-LABEL: test_vld4_dup_fx_update
-; CHECK: ld4r  {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}], #16
-  %1 = bitcast i32* %a to i8*
-  %2 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32(i8* %1, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 4)
-  %3 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 0
-  %4 = shufflevector <2 x i32> %3, <2 x i32> undef, <2 x i32> zeroinitializer
-  %5 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 1
-  %6 = shufflevector <2 x i32> %5, <2 x i32> undef, <2 x i32> zeroinitializer
-  %7 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 2
-  %8 = shufflevector <2 x i32> %7, <2 x i32> undef, <2 x i32> zeroinitializer
-  %9 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 3
-  %10 = shufflevector <2 x i32> %9, <2 x i32> undef, <2 x i32> zeroinitializer
-  %11 = insertvalue { [4 x <2 x i32>] } undef, <2 x i32> %4, 0, 0
-  %12 = insertvalue { [4 x <2 x i32>] } %11, <2 x i32> %6, 0, 1
-  %13 = insertvalue { [4 x <2 x i32>] } %12, <2 x i32> %8, 0, 2
-  %14 = insertvalue { [4 x <2 x i32>] } %13, <2 x i32> %10, 0, 3
-  %tmp1 = getelementptr i32* %a, i32 4
-  store i32* %tmp1, i32** %ptr
-  ret { [4 x <2 x i32>] } %14
-}
-
-define { [4 x <2 x double>] } @test_vld4_dup_reg_update(double* %a, double** %ptr, i32 %inc) {
-; CHECK-LABEL: test_vld4_dup_reg_update
-; CHECK: ld4r  {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}], x{{[0-9]+}}
-  %1 = bitcast double* %a to i8*
-  %2 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4lane.v2f64(i8* %1, <2 x double> undef, <2 x double> undef, <2 x double> undef, <2 x double> undef, i32 0, i32 8)
-  %3 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 0
-  %4 = shufflevector <2 x double> %3, <2 x double> undef, <2 x i32> zeroinitializer
-  %5 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 1
-  %6 = shufflevector <2 x double> %5, <2 x double> undef, <2 x i32> zeroinitializer
-  %7 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 2
-  %8 = shufflevector <2 x double> %7, <2 x double> undef, <2 x i32> zeroinitializer
-  %9 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 3
-  %10 = shufflevector <2 x double> %9, <2 x double> undef, <2 x i32> zeroinitializer
-  %11 = insertvalue { [4 x <2 x double>] } undef, <2 x double> %4, 0, 0
-  %12 = insertvalue { [4 x <2 x double>] } %11, <2 x double> %6, 0, 1
-  %13 = insertvalue { [4 x <2 x double>] } %12, <2 x double> %8, 0, 2
-  %14 = insertvalue { [4 x <2 x double>] } %13, <2 x double> %10, 0, 3
-  %tmp1 = getelementptr double* %a, i32 %inc
-  store double* %tmp1, double** %ptr
-  ret { [4 x <2 x double>] } %14
-}
-
-define { [2 x <8 x i8>] } @test_vld2_lane_fx_update(i8*  %a, [2 x <8 x i8>] %b, i8** %ptr) {
-; CHECK-LABEL: test_vld2_lane_fx_update
-; CHECK: ld2  {v{{[0-9]+}}.b, v{{[0-9]+}}.b}[7], [x{{[0-9]+|sp}}], #2
-  %1 = extractvalue [2 x <8 x i8>] %b, 0
-  %2 = extractvalue [2 x <8 x i8>] %b, 1
-  %3 = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8(i8* %a, <8 x i8> %1, <8 x i8> %2, i32 7, i32 1)
-  %4 = extractvalue { <8 x i8>, <8 x i8> } %3, 0
-  %5 = extractvalue { <8 x i8>, <8 x i8> } %3, 1
-  %6 = insertvalue { [2 x <8 x i8>] } undef, <8 x i8> %4, 0, 0
-  %7 = insertvalue { [2 x <8 x i8>] } %6, <8 x i8> %5, 0, 1
-  %tmp1 = getelementptr i8* %a, i32 2
-  store i8* %tmp1, i8** %ptr
-  ret { [2 x <8 x i8>] } %7
-}
-
-define { [2 x <8 x i8>] } @test_vld2_lane_reg_update(i8*  %a, [2 x <8 x i8>] %b, i8** %ptr, i32 %inc) {
-; CHECK-LABEL: test_vld2_lane_reg_update
-; CHECK: ld2  {v{{[0-9]+}}.b, v{{[0-9]+}}.b}[6], [x{{[0-9]+|sp}}], x{{[0-9]+}}
-  %1 = extractvalue [2 x <8 x i8>] %b, 0
-  %2 = extractvalue [2 x <8 x i8>] %b, 1
-  %3 = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8(i8* %a, <8 x i8> %1, <8 x i8> %2, i32 6, i32 1)
-  %4 = extractvalue { <8 x i8>, <8 x i8> } %3, 0
-  %5 = extractvalue { <8 x i8>, <8 x i8> } %3, 1
-  %6 = insertvalue { [2 x <8 x i8>] } undef, <8 x i8> %4, 0, 0
-  %7 = insertvalue { [2 x <8 x i8>] } %6, <8 x i8> %5, 0, 1
-  %tmp1 = getelementptr i8* %a, i32 %inc
-  store i8* %tmp1, i8** %ptr
-  ret { [2 x <8 x i8>] } %7
-}
-
-define { [3 x <2 x float>] } @test_vld3_lane_fx_update(float* %a, [3 x <2 x float>] %b, float** %ptr) {
-; CHECK-LABEL: test_vld3_lane_fx_update
-; CHECK: ld3  {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [x{{[0-9]+|sp}}], #12
-  %1 = extractvalue [3 x <2 x float>] %b, 0
-  %2 = extractvalue [3 x <2 x float>] %b, 1
-  %3 = extractvalue [3 x <2 x float>] %b, 2
-  %4 = bitcast float* %a to i8*
-  %5 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3lane.v2f32(i8* %4, <2 x float> %1, <2 x float> %2, <2 x float> %3, i32 1, i32 4)
-  %6 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %5, 0
-  %7 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %5, 1
-  %8 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %5, 2
-  %9 = insertvalue { [3 x <2 x float>] } undef, <2 x float> %6, 0, 0
-  %10 = insertvalue { [3 x <2 x float>] } %9, <2 x float> %7, 0, 1
-  %11 = insertvalue { [3 x <2 x float>] } %10, <2 x float> %8, 0, 2
-  %tmp1 = getelementptr float* %a, i32 3
-  store float* %tmp1, float** %ptr
-  ret { [3 x <2 x float>] } %11
-}
-
-define { [3 x <4 x i16>] } @test_vld3_lane_reg_update(i16* %a, [3 x <4 x i16>] %b, i16** %ptr, i32 %inc) {
-; CHECK-LABEL: test_vld3_lane_reg_update
-; CHECK: ld3  {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [x{{[0-9]+|sp}}], x{{[0-9]+}}
-  %1 = extractvalue [3 x <4 x i16>] %b, 0
-  %2 = extractvalue [3 x <4 x i16>] %b, 1
-  %3 = extractvalue [3 x <4 x i16>] %b, 2
-  %4 = bitcast i16* %a to i8*
-  %5 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16(i8* %4, <4 x i16> %1, <4 x i16> %2, <4 x i16> %3, i32 3, i32 2)
-  %6 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %5, 0
-  %7 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %5, 1
-  %8 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %5, 2
-  %9 = insertvalue { [3 x <4 x i16>] } undef, <4 x i16> %6, 0, 0
-  %10 = insertvalue { [3 x <4 x i16>] } %9, <4 x i16> %7, 0, 1
-  %11 = insertvalue { [3 x <4 x i16>] } %10, <4 x i16> %8, 0, 2
-  %tmp1 = getelementptr i16* %a, i32 %inc
-  store i16* %tmp1, i16** %ptr
-  ret { [3 x <4 x i16>] } %11
-}
-
-define { [4 x <2 x i32>] } @test_vld4_lane_fx_update(i32* readonly %a, [4 x <2 x i32>] %b, i32** %ptr) {
-; CHECK-LABEL: test_vld4_lane_fx_update
-; CHECK: ld4  {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [x{{[0-9]+|sp}}], #16
-  %1 = extractvalue [4 x <2 x i32>] %b, 0
-  %2 = extractvalue [4 x <2 x i32>] %b, 1
-  %3 = extractvalue [4 x <2 x i32>] %b, 2
-  %4 = extractvalue [4 x <2 x i32>] %b, 3
-  %5 = bitcast i32* %a to i8*
-  %6 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32(i8* %5, <2 x i32> %1, <2 x i32> %2, <2 x i32> %3, <2 x i32> %4, i32 1, i32 4)
-  %7 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %6, 0
-  %8 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %6, 1
-  %9 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %6, 2
-  %10 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %6, 3
-  %11 = insertvalue { [4 x <2 x i32>] } undef, <2 x i32> %7, 0, 0
-  %12 = insertvalue { [4 x <2 x i32>] } %11, <2 x i32> %8, 0, 1
-  %13 = insertvalue { [4 x <2 x i32>] } %12, <2 x i32> %9, 0, 2
-  %14 = insertvalue { [4 x <2 x i32>] } %13, <2 x i32> %10, 0, 3
-  %tmp1 = getelementptr i32* %a, i32 4
-  store i32* %tmp1, i32** %ptr
-  ret { [4 x <2 x i32>] } %14
-}
-
-define { [4 x <2 x double>] } @test_vld4_lane_reg_update(double* readonly %a, [4 x <2 x double>] %b, double** %ptr, i32 %inc) {
-; CHECK-LABEL: test_vld4_lane_reg_update
-; CHECK: ld4  {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [x{{[0-9]+|sp}}], x{{[0-9]+}}
-  %1 = extractvalue [4 x <2 x double>] %b, 0
-  %2 = extractvalue [4 x <2 x double>] %b, 1
-  %3 = extractvalue [4 x <2 x double>] %b, 2
-  %4 = extractvalue [4 x <2 x double>] %b, 3
-  %5 = bitcast double* %a to i8*
-  %6 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4lane.v2f64(i8* %5, <2 x double> %1, <2 x double> %2, <2 x double> %3, <2 x double> %4, i32 1, i32 8)
-  %7 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %6, 0
-  %8 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %6, 1
-  %9 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %6, 2
-  %10 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %6, 3
-  %11 = insertvalue { [4 x <2 x double>] } undef, <2 x double> %7, 0, 0
-  %12 = insertvalue { [4 x <2 x double>] } %11, <2 x double> %8, 0, 1
-  %13 = insertvalue { [4 x <2 x double>] } %12, <2 x double> %9, 0, 2
-  %14 = insertvalue { [4 x <2 x double>] } %13, <2 x double> %10, 0, 3
-  %tmp1 = getelementptr double* %a, i32 %inc
-  store double* %tmp1, double** %ptr
-  ret { [4 x <2 x double>] } %14
-}
-
-define void @test_vst2_lane_fx_update(i8* %a, [2 x <8 x i8>] %b, i8** %ptr) {
-; CHECK-LABEL: test_vst2_lane_fx_update
-; CHECK: st2  {v{{[0-9]+}}.b, v{{[0-9]+}}.b}[7], [x{{[0-9]+|sp}}], #2
-  %1 = extractvalue [2 x <8 x i8>] %b, 0
-  %2 = extractvalue [2 x <8 x i8>] %b, 1
-  call void @llvm.arm.neon.vst2lane.v8i8(i8* %a, <8 x i8> %1, <8 x i8> %2, i32 7, i32 1)
-  %tmp1 = getelementptr i8* %a, i32 2
-  store i8* %tmp1, i8** %ptr
-  ret void
-}
-
-define void @test_vst2_lane_reg_update(i32* %a, [2 x <2 x i32>] %b.coerce, i32** %ptr, i32 %inc) {
-; CHECK-LABEL: test_vst2_lane_reg_update
-; CHECK: st2  {v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [x{{[0-9]+|sp}}], x{{[0-9]+}}
-  %1 = extractvalue [2 x <2 x i32>] %b.coerce, 0
-  %2 = extractvalue [2 x <2 x i32>] %b.coerce, 1
-  %3 = bitcast i32* %a to i8*
-  tail call void @llvm.arm.neon.vst2lane.v2i32(i8* %3, <2 x i32> %1, <2 x i32> %2, i32 1, i32 4)
-  %tmp1 = getelementptr i32* %a, i32 %inc
-  store i32* %tmp1, i32** %ptr
-  ret void
-}
-
-define void @test_vst3_lane_fx_update(float* %a, [3 x <4 x float>] %b, float** %ptr) {
-; CHECK-LABEL: test_vst3_lane_fx_update
-; CHECK: st3  {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[3], [x{{[0-9]+|sp}}], #12
-  %1 = extractvalue [3 x <4 x float>] %b, 0
-  %2 = extractvalue [3 x <4 x float>] %b, 1
-  %3 = extractvalue [3 x <4 x float>] %b, 2
-  %4 = bitcast float* %a to i8*
-  call void @llvm.arm.neon.vst3lane.v4f32(i8* %4, <4 x float> %1, <4 x float> %2, <4 x float> %3, i32 3, i32 4)
-  %tmp1 = getelementptr float* %a, i32 3
-  store float* %tmp1, float** %ptr
-  ret void
-}
-
-; Function Attrs: nounwind
-define void @test_vst3_lane_reg_update(i16* %a, [3 x <4 x i16>] %b, i16** %ptr, i32 %inc) {
-; CHECK-LABEL: test_vst3_lane_reg_update
-; CHECK: st3  {v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h}[3], [x{{[0-9]+|sp}}], x{{[0-9]+}}
-  %1 = extractvalue [3 x <4 x i16>] %b, 0
-  %2 = extractvalue [3 x <4 x i16>] %b, 1
-  %3 = extractvalue [3 x <4 x i16>] %b, 2
-  %4 = bitcast i16* %a to i8*
-  tail call void @llvm.arm.neon.vst3lane.v4i16(i8* %4, <4 x i16> %1, <4 x i16> %2, <4 x i16> %3, i32 3, i32 2)
-  %tmp1 = getelementptr i16* %a, i32 %inc
-  store i16* %tmp1, i16** %ptr
-  ret void
-}
-
-define void @test_vst4_lane_fx_update(double* %a, [4 x <2 x double>] %b.coerce, double** %ptr) {
-; CHECK-LABEL: test_vst4_lane_fx_update
-; CHECK: st4  {v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d}[1], [x{{[0-9]+|sp}}], #32
-  %1 = extractvalue [4 x <2 x double>] %b.coerce, 0
-  %2 = extractvalue [4 x <2 x double>] %b.coerce, 1
-  %3 = extractvalue [4 x <2 x double>] %b.coerce, 2
-  %4 = extractvalue [4 x <2 x double>] %b.coerce, 3
-  %5 = bitcast double* %a to i8*
-  tail call void @llvm.arm.neon.vst4lane.v2f64(i8* %5, <2 x double> %1, <2 x double> %2, <2 x double> %3, <2 x double> %4, i32 1, i32 8)
-  %tmp1 = getelementptr double* %a, i32 4
-  store double* %tmp1, double** %ptr
-  ret void
-}
-
-
-define void @test_vst4_lane_reg_update(float* %a, [4 x <2 x float>] %b.coerce, float** %ptr, i32 %inc) {
-; CHECK-LABEL: test_vst4_lane_reg_update
-; CHECK: st4  {v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s}[1], [x{{[0-9]+|sp}}], x{{[0-9]+}}
-  %1 = extractvalue [4 x <2 x float>] %b.coerce, 0
-  %2 = extractvalue [4 x <2 x float>] %b.coerce, 1
-  %3 = extractvalue [4 x <2 x float>] %b.coerce, 2
-  %4 = extractvalue [4 x <2 x float>] %b.coerce, 3
-  %5 = bitcast float* %a to i8*
-  tail call void @llvm.arm.neon.vst4lane.v2f32(i8* %5, <2 x float> %1, <2 x float> %2, <2 x float> %3, <2 x float> %4, i32 1, i32 4)
-  %tmp1 = getelementptr float* %a, i32 %inc
-  store float* %tmp1, float** %ptr
-  ret void
-}
-
-declare { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32)
-declare { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2lane.v16i8(i8*, <16 x i8>, <16 x i8>, i32, i32)
-declare { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32)
-declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32)
-declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32)
-declare { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32)
-declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4lane.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, i32, i32)
-declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32)
-declare void @llvm.arm.neon.vst2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32)
-declare void @llvm.arm.neon.vst2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32)
-declare void @llvm.arm.neon.vst3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32)
-declare void @llvm.arm.neon.vst3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32)
-declare void @llvm.arm.neon.vst4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32)
-declare void @llvm.arm.neon.vst4lane.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, i32, i32)
diff --git a/test/CodeGen/AArch64/neon-simd-shift.ll b/test/CodeGen/AArch64/neon-simd-shift.ll
deleted file mode 100644
index fd76265..0000000
--- a/test/CodeGen/AArch64/neon-simd-shift.ll
+++ /dev/null
@@ -1,1556 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
-
-define <8 x i8> @test_vshr_n_s8(<8 x i8> %a) {
-; CHECK: test_vshr_n_s8
-; CHECK: sshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
-  %vshr_n = ashr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-  ret <8 x i8> %vshr_n
-}
-
-define <4 x i16> @test_vshr_n_s16(<4 x i16> %a) {
-; CHECK: test_vshr_n_s16
-; CHECK: sshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
-  %vshr_n = ashr <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3>
-  ret <4 x i16> %vshr_n
-}
-
-define <2 x i32> @test_vshr_n_s32(<2 x i32> %a) {
-; CHECK: test_vshr_n_s32
-; CHECK: sshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
-  %vshr_n = ashr <2 x i32> %a, <i32 3, i32 3>
-  ret <2 x i32> %vshr_n
-}
-
-define <16 x i8> @test_vshrq_n_s8(<16 x i8> %a) {
-; CHECK: test_vshrq_n_s8
-; CHECK: sshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
-  %vshr_n = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-  ret <16 x i8> %vshr_n
-}
-
-define <8 x i16> @test_vshrq_n_s16(<8 x i16> %a) {
-; CHECK: test_vshrq_n_s16
-; CHECK: sshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
-  %vshr_n = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-  ret <8 x i16> %vshr_n
-}
-
-define <4 x i32> @test_vshrq_n_s32(<4 x i32> %a) {
-; CHECK: test_vshrq_n_s32
-; CHECK: sshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
-  %vshr_n = ashr <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
-  ret <4 x i32> %vshr_n
-}
-
-define <2 x i64> @test_vshrq_n_s64(<2 x i64> %a) {
-; CHECK: test_vshrq_n_s64
-; CHECK: sshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
-  %vshr_n = ashr <2 x i64> %a, <i64 3, i64 3>
-  ret <2 x i64> %vshr_n
-}
-
-define <8 x i8> @test_vshr_n_u8(<8 x i8> %a) {
-; CHECK: test_vshr_n_u8
-; CHECK: ushr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
-  %vshr_n = lshr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-  ret <8 x i8> %vshr_n
-}
-
-define <4 x i16> @test_vshr_n_u16(<4 x i16> %a) {
-; CHECK: test_vshr_n_u16
-; CHECK: ushr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
-  %vshr_n = lshr <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3>
-  ret <4 x i16> %vshr_n
-}
-
-define <2 x i32> @test_vshr_n_u32(<2 x i32> %a) {
-; CHECK: test_vshr_n_u32
-; CHECK: ushr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
-  %vshr_n = lshr <2 x i32> %a, <i32 3, i32 3>
-  ret <2 x i32> %vshr_n
-}
-
-define <16 x i8> @test_vshrq_n_u8(<16 x i8> %a) {
-; CHECK: test_vshrq_n_u8
-; CHECK: ushr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
-  %vshr_n = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-  ret <16 x i8> %vshr_n
-}
-
-define <8 x i16> @test_vshrq_n_u16(<8 x i16> %a) {
-; CHECK: test_vshrq_n_u16
-; CHECK: ushr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
-  %vshr_n = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-  ret <8 x i16> %vshr_n
-}
-
-define <4 x i32> @test_vshrq_n_u32(<4 x i32> %a) {
-; CHECK: test_vshrq_n_u32
-; CHECK: ushr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
-  %vshr_n = lshr <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
-  ret <4 x i32> %vshr_n
-}
-
-define <2 x i64> @test_vshrq_n_u64(<2 x i64> %a) {
-; CHECK: test_vshrq_n_u64
-; CHECK: ushr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
-  %vshr_n = lshr <2 x i64> %a, <i64 3, i64 3>
-  ret <2 x i64> %vshr_n
-}
-
-define <8 x i8> @test_vsra_n_s8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vsra_n_s8
-; CHECK: ssra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
-  %vsra_n = ashr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-  %1 = add <8 x i8> %vsra_n, %a
-  ret <8 x i8> %1
-}
-
-define <4 x i16> @test_vsra_n_s16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vsra_n_s16
-; CHECK: ssra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
-  %vsra_n = ashr <4 x i16> %b, <i16 3, i16 3, i16 3, i16 3>
-  %1 = add <4 x i16> %vsra_n, %a
-  ret <4 x i16> %1
-}
-
-define <2 x i32> @test_vsra_n_s32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK: test_vsra_n_s32
-; CHECK: ssra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
-  %vsra_n = ashr <2 x i32> %b, <i32 3, i32 3>
-  %1 = add <2 x i32> %vsra_n, %a
-  ret <2 x i32> %1
-}
-
-define <16 x i8> @test_vsraq_n_s8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vsraq_n_s8
-; CHECK: ssra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
-  %vsra_n = ashr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-  %1 = add <16 x i8> %vsra_n, %a
-  ret <16 x i8> %1
-}
-
-define <8 x i16> @test_vsraq_n_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vsraq_n_s16
-; CHECK: ssra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
-  %vsra_n = ashr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-  %1 = add <8 x i16> %vsra_n, %a
-  ret <8 x i16> %1
-}
-
-define <4 x i32> @test_vsraq_n_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vsraq_n_s32
-; CHECK: ssra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
-  %vsra_n = ashr <4 x i32> %b, <i32 3, i32 3, i32 3, i32 3>
-  %1 = add <4 x i32> %vsra_n, %a
-  ret <4 x i32> %1
-}
-
-define <2 x i64> @test_vsraq_n_s64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vsraq_n_s64
-; CHECK: ssra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
-  %vsra_n = ashr <2 x i64> %b, <i64 3, i64 3>
-  %1 = add <2 x i64> %vsra_n, %a
-  ret <2 x i64> %1
-}
-
-define <8 x i8> @test_vsra_n_u8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vsra_n_u8
-; CHECK: usra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
-  %vsra_n = lshr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-  %1 = add <8 x i8> %vsra_n, %a
-  ret <8 x i8> %1
-}
-
-define <4 x i16> @test_vsra_n_u16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vsra_n_u16
-; CHECK: usra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
-  %vsra_n = lshr <4 x i16> %b, <i16 3, i16 3, i16 3, i16 3>
-  %1 = add <4 x i16> %vsra_n, %a
-  ret <4 x i16> %1
-}
-
-define <2 x i32> @test_vsra_n_u32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK: test_vsra_n_u32
-; CHECK: usra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
-  %vsra_n = lshr <2 x i32> %b, <i32 3, i32 3>
-  %1 = add <2 x i32> %vsra_n, %a
-  ret <2 x i32> %1
-}
-
-define <16 x i8> @test_vsraq_n_u8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vsraq_n_u8
-; CHECK: usra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
-  %vsra_n = lshr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-  %1 = add <16 x i8> %vsra_n, %a
-  ret <16 x i8> %1
-}
-
-define <8 x i16> @test_vsraq_n_u16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vsraq_n_u16
-; CHECK: usra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
-  %vsra_n = lshr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-  %1 = add <8 x i16> %vsra_n, %a
-  ret <8 x i16> %1
-}
-
-define <4 x i32> @test_vsraq_n_u32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vsraq_n_u32
-; CHECK: usra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
-  %vsra_n = lshr <4 x i32> %b, <i32 3, i32 3, i32 3, i32 3>
-  %1 = add <4 x i32> %vsra_n, %a
-  ret <4 x i32> %1
-}
-
-define <2 x i64> @test_vsraq_n_u64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vsraq_n_u64
-; CHECK: usra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
-  %vsra_n = lshr <2 x i64> %b, <i64 3, i64 3>
-  %1 = add <2 x i64> %vsra_n, %a
-  ret <2 x i64> %1
-}
-
-define <8 x i8> @test_vrshr_n_s8(<8 x i8> %a) {
-; CHECK: test_vrshr_n_s8
-; CHECK: srshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
-  %vrshr_n = tail call <8 x i8> @llvm.aarch64.neon.vsrshr.v8i8(<8 x i8> %a, i32 3)
-  ret <8 x i8> %vrshr_n
-}
-
-
-define <4 x i16> @test_vrshr_n_s16(<4 x i16> %a) {
-; CHECK: test_vrshr_n_s16
-; CHECK: srshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
-  %vrshr_n = tail call <4 x i16> @llvm.aarch64.neon.vsrshr.v4i16(<4 x i16> %a, i32 3)
-  ret <4 x i16> %vrshr_n
-}
-
-
-define <2 x i32> @test_vrshr_n_s32(<2 x i32> %a) {
-; CHECK: test_vrshr_n_s32
-; CHECK: srshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
-  %vrshr_n = tail call <2 x i32> @llvm.aarch64.neon.vsrshr.v2i32(<2 x i32> %a, i32 3)
-  ret <2 x i32> %vrshr_n
-}
-
-
-define <16 x i8> @test_vrshrq_n_s8(<16 x i8> %a) {
-; CHECK: test_vrshrq_n_s8
-; CHECK: srshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
-  %vrshr_n = tail call <16 x i8> @llvm.aarch64.neon.vsrshr.v16i8(<16 x i8> %a, i32 3)
-  ret <16 x i8> %vrshr_n
-}
-
-
-define <8 x i16> @test_vrshrq_n_s16(<8 x i16> %a) {
-; CHECK: test_vrshrq_n_s16
-; CHECK: srshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
-  %vrshr_n = tail call <8 x i16> @llvm.aarch64.neon.vsrshr.v8i16(<8 x i16> %a, i32 3)
-  ret <8 x i16> %vrshr_n
-}
-
-
-define <4 x i32> @test_vrshrq_n_s32(<4 x i32> %a) {
-; CHECK: test_vrshrq_n_s32
-; CHECK: srshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
-  %vrshr_n = tail call <4 x i32> @llvm.aarch64.neon.vsrshr.v4i32(<4 x i32> %a, i32 3)
-  ret <4 x i32> %vrshr_n
-}
-
-
-define <2 x i64> @test_vrshrq_n_s64(<2 x i64> %a) {
-; CHECK: test_vrshrq_n_s64
-; CHECK: srshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
-  %vrshr_n = tail call <2 x i64> @llvm.aarch64.neon.vsrshr.v2i64(<2 x i64> %a, i32 3)
-  ret <2 x i64> %vrshr_n
-}
-
-
-define <8 x i8> @test_vrshr_n_u8(<8 x i8> %a) {
-; CHECK: test_vrshr_n_u8
-; CHECK: urshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
-  %vrshr_n = tail call <8 x i8> @llvm.aarch64.neon.vurshr.v8i8(<8 x i8> %a, i32 3)
-  ret <8 x i8> %vrshr_n
-}
-
-
-define <4 x i16> @test_vrshr_n_u16(<4 x i16> %a) {
-; CHECK: test_vrshr_n_u16
-; CHECK: urshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
-  %vrshr_n = tail call <4 x i16> @llvm.aarch64.neon.vurshr.v4i16(<4 x i16> %a, i32 3)
-  ret <4 x i16> %vrshr_n
-}
-
-
-define <2 x i32> @test_vrshr_n_u32(<2 x i32> %a) {
-; CHECK: test_vrshr_n_u32
-; CHECK: urshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
-  %vrshr_n = tail call <2 x i32> @llvm.aarch64.neon.vurshr.v2i32(<2 x i32> %a, i32 3)
-  ret <2 x i32> %vrshr_n
-}
-
-
-define <16 x i8> @test_vrshrq_n_u8(<16 x i8> %a) {
-; CHECK: test_vrshrq_n_u8
-; CHECK: urshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
-  %vrshr_n = tail call <16 x i8> @llvm.aarch64.neon.vurshr.v16i8(<16 x i8> %a, i32 3)
-  ret <16 x i8> %vrshr_n
-}
-
-
-define <8 x i16> @test_vrshrq_n_u16(<8 x i16> %a) {
-; CHECK: test_vrshrq_n_u16
-; CHECK: urshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
-  %vrshr_n = tail call <8 x i16> @llvm.aarch64.neon.vurshr.v8i16(<8 x i16> %a, i32 3)
-  ret <8 x i16> %vrshr_n
-}
-
-
-define <4 x i32> @test_vrshrq_n_u32(<4 x i32> %a) {
-; CHECK: test_vrshrq_n_u32
-; CHECK: urshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
-  %vrshr_n = tail call <4 x i32> @llvm.aarch64.neon.vurshr.v4i32(<4 x i32> %a, i32 3)
-  ret <4 x i32> %vrshr_n
-}
-
-
-define <2 x i64> @test_vrshrq_n_u64(<2 x i64> %a) {
-; CHECK: test_vrshrq_n_u64
-; CHECK: urshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
-  %vrshr_n = tail call <2 x i64> @llvm.aarch64.neon.vurshr.v2i64(<2 x i64> %a, i32 3)
-  ret <2 x i64> %vrshr_n
-}
-
-
-define <8 x i8> @test_vrsra_n_s8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vrsra_n_s8
-; CHECK: srsra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
-  %1 = tail call <8 x i8> @llvm.aarch64.neon.vsrshr.v8i8(<8 x i8> %b, i32 3)
-  %vrsra_n = add <8 x i8> %1, %a
-  ret <8 x i8> %vrsra_n
-}
-
-define <4 x i16> @test_vrsra_n_s16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vrsra_n_s16
-; CHECK: srsra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
-  %1 = tail call <4 x i16> @llvm.aarch64.neon.vsrshr.v4i16(<4 x i16> %b, i32 3)
-  %vrsra_n = add <4 x i16> %1, %a
-  ret <4 x i16> %vrsra_n
-}
-
-define <2 x i32> @test_vrsra_n_s32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK: test_vrsra_n_s32
-; CHECK: srsra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
-  %1 = tail call <2 x i32> @llvm.aarch64.neon.vsrshr.v2i32(<2 x i32> %b, i32 3)
-  %vrsra_n = add <2 x i32> %1, %a
-  ret <2 x i32> %vrsra_n
-}
-
-define <16 x i8> @test_vrsraq_n_s8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vrsraq_n_s8
-; CHECK: srsra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
-  %1 = tail call <16 x i8> @llvm.aarch64.neon.vsrshr.v16i8(<16 x i8> %b, i32 3)
-  %vrsra_n = add <16 x i8> %1, %a
-  ret <16 x i8> %vrsra_n
-}
-
-define <8 x i16> @test_vrsraq_n_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vrsraq_n_s16
-; CHECK: srsra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
-  %1 = tail call <8 x i16> @llvm.aarch64.neon.vsrshr.v8i16(<8 x i16> %b, i32 3)
-  %vrsra_n = add <8 x i16> %1, %a
-  ret <8 x i16> %vrsra_n
-}
-
-define <4 x i32> @test_vrsraq_n_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vrsraq_n_s32
-; CHECK: srsra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
-  %1 = tail call <4 x i32> @llvm.aarch64.neon.vsrshr.v4i32(<4 x i32> %b, i32 3)
-  %vrsra_n = add <4 x i32> %1, %a
-  ret <4 x i32> %vrsra_n
-}
-
-define <2 x i64> @test_vrsraq_n_s64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vrsraq_n_s64
-; CHECK: srsra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
-  %1 = tail call <2 x i64> @llvm.aarch64.neon.vsrshr.v2i64(<2 x i64> %b, i32 3)
-  %vrsra_n = add <2 x i64> %1, %a
-  ret <2 x i64> %vrsra_n
-}
-
-define <8 x i8> @test_vrsra_n_u8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vrsra_n_u8
-; CHECK: ursra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
-  %1 = tail call <8 x i8> @llvm.aarch64.neon.vurshr.v8i8(<8 x i8> %b, i32 3)
-  %vrsra_n = add <8 x i8> %1, %a
-  ret <8 x i8> %vrsra_n
-}
-
-define <4 x i16> @test_vrsra_n_u16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vrsra_n_u16
-; CHECK: ursra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
-  %1 = tail call <4 x i16> @llvm.aarch64.neon.vurshr.v4i16(<4 x i16> %b, i32 3)
-  %vrsra_n = add <4 x i16> %1, %a
-  ret <4 x i16> %vrsra_n
-}
-
-define <2 x i32> @test_vrsra_n_u32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK: test_vrsra_n_u32
-; CHECK: ursra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
-  %1 = tail call <2 x i32> @llvm.aarch64.neon.vurshr.v2i32(<2 x i32> %b, i32 3)
-  %vrsra_n = add <2 x i32> %1, %a
-  ret <2 x i32> %vrsra_n
-}
-
-define <16 x i8> @test_vrsraq_n_u8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vrsraq_n_u8
-; CHECK: ursra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
-  %1 = tail call <16 x i8> @llvm.aarch64.neon.vurshr.v16i8(<16 x i8> %b, i32 3)
-  %vrsra_n = add <16 x i8> %1, %a
-  ret <16 x i8> %vrsra_n
-}
-
-define <8 x i16> @test_vrsraq_n_u16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vrsraq_n_u16
-; CHECK: ursra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
-  %1 = tail call <8 x i16> @llvm.aarch64.neon.vurshr.v8i16(<8 x i16> %b, i32 3)
-  %vrsra_n = add <8 x i16> %1, %a
-  ret <8 x i16> %vrsra_n
-}
-
-define <4 x i32> @test_vrsraq_n_u32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vrsraq_n_u32
-; CHECK: ursra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
-  %1 = tail call <4 x i32> @llvm.aarch64.neon.vurshr.v4i32(<4 x i32> %b, i32 3)
-  %vrsra_n = add <4 x i32> %1, %a
-  ret <4 x i32> %vrsra_n
-}
-
-define <2 x i64> @test_vrsraq_n_u64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vrsraq_n_u64
-; CHECK: ursra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
-  %1 = tail call <2 x i64> @llvm.aarch64.neon.vurshr.v2i64(<2 x i64> %b, i32 3)
-  %vrsra_n = add <2 x i64> %1, %a
-  ret <2 x i64> %vrsra_n
-}
-
-define <8 x i8> @test_vsri_n_s8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vsri_n_s8
-; CHECK: sri {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
-  %vsri_n = tail call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
-  ret <8 x i8> %vsri_n
-}
-
-
-define <4 x i16> @test_vsri_n_s16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vsri_n_s16
-; CHECK: sri {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
-  %vsri = tail call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> %a, <4 x i16> %b, i32 3)
-  ret <4 x i16> %vsri
-}
-
-
-define <2 x i32> @test_vsri_n_s32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK: test_vsri_n_s32
-; CHECK: sri {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
-  %vsri = tail call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> %a, <2 x i32> %b, i32 3)
-  ret <2 x i32> %vsri
-}
-
-
-define <16 x i8> @test_vsriq_n_s8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vsriq_n_s8
-; CHECK: sri {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
-  %vsri_n = tail call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
-  ret <16 x i8> %vsri_n
-}
-
-
-define <8 x i16> @test_vsriq_n_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vsriq_n_s16
-; CHECK: sri {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
-  %vsri = tail call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> %a, <8 x i16> %b, i32 3)
-  ret <8 x i16> %vsri
-}
-
-
-define <4 x i32> @test_vsriq_n_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vsriq_n_s32
-; CHECK: sri {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
-  %vsri = tail call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> %a, <4 x i32> %b, i32 3)
-  ret <4 x i32> %vsri
-}
-
-
-define <2 x i64> @test_vsriq_n_s64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vsriq_n_s64
-; CHECK: sri {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
-  %vsri = tail call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> %a, <2 x i64> %b, i32 3)
-  ret <2 x i64> %vsri
-}
-
-define <8 x i8> @test_vsri_n_p8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vsri_n_p8
-; CHECK: sri {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
-  %vsri_n = tail call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
-  ret <8 x i8> %vsri_n
-}
-
-define <4 x i16> @test_vsri_n_p16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vsri_n_p16
-; CHECK: sri {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #15
-  %vsri = tail call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> %a, <4 x i16> %b, i32 15)
-  ret <4 x i16> %vsri
-}
-
-define <16 x i8> @test_vsriq_n_p8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vsriq_n_p8
-; CHECK: sri {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
-  %vsri_n = tail call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
-  ret <16 x i8> %vsri_n
-}
-
-define <8 x i16> @test_vsriq_n_p16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vsriq_n_p16
-; CHECK: sri {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #15
-  %vsri = tail call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> %a, <8 x i16> %b, i32 15)
-  ret <8 x i16> %vsri
-}
-
-define <8 x i8> @test_vsli_n_s8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vsli_n_s8
-; CHECK: sli {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
-  %vsli_n = tail call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
-  ret <8 x i8> %vsli_n
-}
-
-define <4 x i16> @test_vsli_n_s16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vsli_n_s16
-; CHECK: sli {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
-  %vsli = tail call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> %a, <4 x i16> %b, i32 3)
-  ret <4 x i16> %vsli
-}
-
-define <2 x i32> @test_vsli_n_s32(<2 x i32> %a, <2 x i32> %b) {
-; CHECK: test_vsli_n_s32
-; CHECK: sli {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
-  %vsli = tail call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> %a, <2 x i32> %b, i32 3)
-  ret <2 x i32> %vsli
-}
-
-define <16 x i8> @test_vsliq_n_s8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vsliq_n_s8
-; CHECK: sli {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
-  %vsli_n = tail call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
-  ret <16 x i8> %vsli_n
-}
-
-define <8 x i16> @test_vsliq_n_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vsliq_n_s16
-; CHECK: sli {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
-  %vsli = tail call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> %a, <8 x i16> %b, i32 3)
-  ret <8 x i16> %vsli
-}
-
-define <4 x i32> @test_vsliq_n_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK: test_vsliq_n_s32
-; CHECK: sli {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
-  %vsli = tail call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> %a, <4 x i32> %b, i32 3)
-  ret <4 x i32> %vsli
-}
-
-define <2 x i64> @test_vsliq_n_s64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK: test_vsliq_n_s64
-; CHECK: sli {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
-  %vsli = tail call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> %a, <2 x i64> %b, i32 3)
-  ret <2 x i64> %vsli
-}
-
-define <8 x i8> @test_vsli_n_p8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vsli_n_p8
-; CHECK: sli {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
-  %vsli_n = tail call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
-  ret <8 x i8> %vsli_n
-}
-
-define <4 x i16> @test_vsli_n_p16(<4 x i16> %a, <4 x i16> %b) {
-; CHECK: test_vsli_n_p16
-; CHECK: sli {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #15
-  %vsli = tail call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> %a, <4 x i16> %b, i32 15)
-  ret <4 x i16> %vsli
-}
-
-define <16 x i8> @test_vsliq_n_p8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vsliq_n_p8
-; CHECK: sli {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
-  %vsli_n = tail call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
-  ret <16 x i8> %vsli_n
-}
-
-define <8 x i16> @test_vsliq_n_p16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK: test_vsliq_n_p16
-; CHECK: sli {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #15
-  %vsli = tail call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> %a, <8 x i16> %b, i32 15)
-  ret <8 x i16> %vsli
-}
-
-define <8 x i8> @test_vqshl_n_s8(<8 x i8> %a) {
-; CHECK: test_vqshl_n_s8
-; CHECK: sqshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
-  %vqshl = tail call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %a, <8 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-  ret <8 x i8> %vqshl
-}
-
-
-define <4 x i16> @test_vqshl_n_s16(<4 x i16> %a) {
-; CHECK: test_vqshl_n_s16
-; CHECK: sqshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
-  %vqshl = tail call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %a, <4 x i16> <i16 3, i16 3, i16 3, i16 3>)
-  ret <4 x i16> %vqshl
-}
-
-
-define <2 x i32> @test_vqshl_n_s32(<2 x i32> %a) {
-; CHECK: test_vqshl_n_s32
-; CHECK: sqshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
-  %vqshl = tail call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %a, <2 x i32> <i32 3, i32 3>)
-  ret <2 x i32> %vqshl
-}
-
-
-define <16 x i8> @test_vqshlq_n_s8(<16 x i8> %a) {
-; CHECK: test_vqshlq_n_s8
-; CHECK: sqshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
-  %vqshl_n = tail call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %a, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-  ret <16 x i8> %vqshl_n
-}
-
-
-define <8 x i16> @test_vqshlq_n_s16(<8 x i16> %a) {
-; CHECK: test_vqshlq_n_s16
-; CHECK: sqshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
-  %vqshl = tail call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %a, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-  ret <8 x i16> %vqshl
-}
-
-
-define <4 x i32> @test_vqshlq_n_s32(<4 x i32> %a) {
-; CHECK: test_vqshlq_n_s32
-; CHECK: sqshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
-  %vqshl = tail call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %a, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
-  ret <4 x i32> %vqshl
-}
-
-
-define <2 x i64> @test_vqshlq_n_s64(<2 x i64> %a) {
-; CHECK: test_vqshlq_n_s64
-; CHECK: sqshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
-  %vqshl = tail call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %a, <2 x i64> <i64 3, i64 3>)
-  ret <2 x i64> %vqshl
-}
-
-
-define <8 x i8> @test_vqshl_n_u8(<8 x i8> %a) {
-; CHECK: test_vqshl_n_u8
-; CHECK: uqshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
-  %vqshl_n = tail call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %a, <8 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-  ret <8 x i8> %vqshl_n
-}
-
-
-define <4 x i16> @test_vqshl_n_u16(<4 x i16> %a) {
-; CHECK: test_vqshl_n_u16
-; CHECK: uqshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
-  %vqshl = tail call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %a, <4 x i16> <i16 3, i16 3, i16 3, i16 3>)
-  ret <4 x i16> %vqshl
-}
-
-
-define <2 x i32> @test_vqshl_n_u32(<2 x i32> %a) {
-; CHECK: test_vqshl_n_u32
-; CHECK: uqshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
-  %vqshl = tail call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %a, <2 x i32> <i32 3, i32 3>)
-  ret <2 x i32> %vqshl
-}
-
-
-define <16 x i8> @test_vqshlq_n_u8(<16 x i8> %a) {
-; CHECK: test_vqshlq_n_u8
-; CHECK: uqshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
-  %vqshl_n = tail call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %a, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-  ret <16 x i8> %vqshl_n
-}
-
-
-define <8 x i16> @test_vqshlq_n_u16(<8 x i16> %a) {
-; CHECK: test_vqshlq_n_u16
-; CHECK: uqshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
-  %vqshl = tail call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %a, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-  ret <8 x i16> %vqshl
-}
-
-
-define <4 x i32> @test_vqshlq_n_u32(<4 x i32> %a) {
-; CHECK: test_vqshlq_n_u32
-; CHECK: uqshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
-  %vqshl = tail call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %a, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
-  ret <4 x i32> %vqshl
-}
-
-
-define <2 x i64> @test_vqshlq_n_u64(<2 x i64> %a) {
-; CHECK: test_vqshlq_n_u64
-; CHECK: uqshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
-  %vqshl = tail call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %a, <2 x i64> <i64 3, i64 3>)
-  ret <2 x i64> %vqshl
-}
-
-define <8 x i8> @test_vqshlu_n_s8(<8 x i8> %a) {
-; CHECK: test_vqshlu_n_s8
-; CHECK: sqshlu {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
-  %vqshlu = tail call <8 x i8> @llvm.aarch64.neon.vsqshlu.v8i8(<8 x i8> %a, i32 3)
-  ret <8 x i8> %vqshlu
-}
-
-
-define <4 x i16> @test_vqshlu_n_s16(<4 x i16> %a) {
-; CHECK: test_vqshlu_n_s16
-; CHECK: sqshlu {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
-  %vqshlu = tail call <4 x i16> @llvm.aarch64.neon.vsqshlu.v4i16(<4 x i16> %a, i32 3)
-  ret <4 x i16> %vqshlu
-}
-
-
-define <2 x i32> @test_vqshlu_n_s32(<2 x i32> %a) {
-; CHECK: test_vqshlu_n_s32
-; CHECK: sqshlu {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
-  %vqshlu = tail call <2 x i32> @llvm.aarch64.neon.vsqshlu.v2i32(<2 x i32> %a, i32 3)
-  ret <2 x i32> %vqshlu
-}
-
-
-define <16 x i8> @test_vqshluq_n_s8(<16 x i8> %a) {
-; CHECK: test_vqshluq_n_s8
-; CHECK: sqshlu {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
-  %vqshlu = tail call <16 x i8> @llvm.aarch64.neon.vsqshlu.v16i8(<16 x i8> %a, i32 3)
-  ret <16 x i8> %vqshlu
-}
-
-
-define <8 x i16> @test_vqshluq_n_s16(<8 x i16> %a) {
-; CHECK: test_vqshluq_n_s16
-; CHECK: sqshlu {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
-  %vqshlu = tail call <8 x i16> @llvm.aarch64.neon.vsqshlu.v8i16(<8 x i16> %a, i32 3)
-  ret <8 x i16> %vqshlu
-}
-
-
-define <4 x i32> @test_vqshluq_n_s32(<4 x i32> %a) {
-; CHECK: test_vqshluq_n_s32
-; CHECK: sqshlu {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
-  %vqshlu = tail call <4 x i32> @llvm.aarch64.neon.vsqshlu.v4i32(<4 x i32> %a, i32 3)
-  ret <4 x i32> %vqshlu
-}
-
-
-define <2 x i64> @test_vqshluq_n_s64(<2 x i64> %a) {
-; CHECK: test_vqshluq_n_s64
-; CHECK: sqshlu {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
-  %vqshlu = tail call <2 x i64> @llvm.aarch64.neon.vsqshlu.v2i64(<2 x i64> %a, i32 3)
-  ret <2 x i64> %vqshlu
-}
-
-
-define <8 x i8> @test_vshrn_n_s16(<8 x i16> %a) {
-; CHECK: test_vshrn_n_s16
-; CHECK: shrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
-  %1 = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-  %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
-  ret <8 x i8> %vshrn_n
-}
-
-define <4 x i16> @test_vshrn_n_s32(<4 x i32> %a) {
-; CHECK: test_vshrn_n_s32
-; CHECK: shrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
-  %1 = ashr <4 x i32> %a, <i32 9, i32 9, i32 9, i32 9>
-  %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
-  ret <4 x i16> %vshrn_n
-}
-
-define <2 x i32> @test_vshrn_n_s64(<2 x i64> %a) {
-; CHECK: test_vshrn_n_s64
-; CHECK: shrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
-  %1 = ashr <2 x i64> %a, <i64 19, i64 19>
-  %vshrn_n = trunc <2 x i64> %1 to <2 x i32>
-  ret <2 x i32> %vshrn_n
-}
-
-define <8 x i8> @test_vshrn_n_u16(<8 x i16> %a) {
-; CHECK: test_vshrn_n_u16
-; CHECK: shrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
-  %1 = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-  %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
-  ret <8 x i8> %vshrn_n
-}
-
-define <4 x i16> @test_vshrn_n_u32(<4 x i32> %a) {
-; CHECK: test_vshrn_n_u32
-; CHECK: shrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
-  %1 = lshr <4 x i32> %a, <i32 9, i32 9, i32 9, i32 9>
-  %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
-  ret <4 x i16> %vshrn_n
-}
-
-define <2 x i32> @test_vshrn_n_u64(<2 x i64> %a) {
-; CHECK: test_vshrn_n_u64
-; CHECK: shrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
-  %1 = lshr <2 x i64> %a, <i64 19, i64 19>
-  %vshrn_n = trunc <2 x i64> %1 to <2 x i32>
-  ret <2 x i32> %vshrn_n
-}
-
-define <16 x i8> @test_vshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
-; CHECK: test_vshrn_high_n_s16
-; CHECK: shrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
-  %1 = ashr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-  %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
-  %2 = bitcast <8 x i8> %a to <1 x i64>
-  %3 = bitcast <8 x i8> %vshrn_n to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
-  %4 = bitcast <2 x i64> %shuffle.i to <16 x i8>
-  ret <16 x i8> %4
-}
-
-define <8 x i16> @test_vshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
-; CHECK: test_vshrn_high_n_s32
-; CHECK: shrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
-  %1 = ashr <4 x i32> %b, <i32 9, i32 9, i32 9, i32 9>
-  %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
-  %2 = bitcast <4 x i16> %a to <1 x i64>
-  %3 = bitcast <4 x i16> %vshrn_n to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
-  %4 = bitcast <2 x i64> %shuffle.i to <8 x i16>
-  ret <8 x i16> %4
-}
-
-define <4 x i32> @test_vshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
-; CHECK: test_vshrn_high_n_s64
-; CHECK: shrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
-  %1 = bitcast <2 x i32> %a to <1 x i64>
-  %2 = ashr <2 x i64> %b, <i64 19, i64 19>
-  %vshrn_n = trunc <2 x i64> %2 to <2 x i32>
-  %3 = bitcast <2 x i32> %vshrn_n to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
-  %4 = bitcast <2 x i64> %shuffle.i to <4 x i32>
-  ret <4 x i32> %4
-}
-
-define <16 x i8> @test_vshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
-; CHECK: test_vshrn_high_n_u16
-; CHECK: shrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
-  %1 = lshr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-  %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
-  %2 = bitcast <8 x i8> %a to <1 x i64>
-  %3 = bitcast <8 x i8> %vshrn_n to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
-  %4 = bitcast <2 x i64> %shuffle.i to <16 x i8>
-  ret <16 x i8> %4
-}
-
-define <8 x i16> @test_vshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
-; CHECK: test_vshrn_high_n_u32
-; CHECK: shrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
-  %1 = lshr <4 x i32> %b, <i32 9, i32 9, i32 9, i32 9>
-  %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
-  %2 = bitcast <4 x i16> %a to <1 x i64>
-  %3 = bitcast <4 x i16> %vshrn_n to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
-  %4 = bitcast <2 x i64> %shuffle.i to <8 x i16>
-  ret <8 x i16> %4
-}
-
-define <4 x i32> @test_vshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
-; CHECK: test_vshrn_high_n_u64
-; CHECK: shrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
-  %1 = bitcast <2 x i32> %a to <1 x i64>
-  %2 = lshr <2 x i64> %b, <i64 19, i64 19>
-  %vshrn_n = trunc <2 x i64> %2 to <2 x i32>
-  %3 = bitcast <2 x i32> %vshrn_n to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
-  %4 = bitcast <2 x i64> %shuffle.i to <4 x i32>
-  ret <4 x i32> %4
-}
-
-define <8 x i8> @test_vqshrun_n_s16(<8 x i16> %a) {
-; CHECK: test_vqshrun_n_s16
-; CHECK: sqshrun {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
-  %vqshrun = tail call <8 x i8> @llvm.aarch64.neon.vsqshrun.v8i8(<8 x i16> %a, i32 3)
-  ret <8 x i8> %vqshrun
-}
-
-
-define <4 x i16> @test_vqshrun_n_s32(<4 x i32> %a) {
-; CHECK: test_vqshrun_n_s32
-; CHECK: sqshrun {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
-  %vqshrun = tail call <4 x i16> @llvm.aarch64.neon.vsqshrun.v4i16(<4 x i32> %a, i32 9)
-  ret <4 x i16> %vqshrun
-}
-
-define <2 x i32> @test_vqshrun_n_s64(<2 x i64> %a) {
-; CHECK: test_vqshrun_n_s64
-; CHECK: sqshrun {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
-  %vqshrun = tail call <2 x i32> @llvm.aarch64.neon.vsqshrun.v2i32(<2 x i64> %a, i32 19)
-  ret <2 x i32> %vqshrun
-}
-
-define <16 x i8> @test_vqshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
-; CHECK: test_vqshrun_high_n_s16
-; CHECK: sqshrun2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
-  %vqshrun = tail call <8 x i8> @llvm.aarch64.neon.vsqshrun.v8i8(<8 x i16> %b, i32 3)
-  %1 = bitcast <8 x i8> %a to <1 x i64>
-  %2 = bitcast <8 x i8> %vqshrun to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
-  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
-  ret <16 x i8> %3
-}
-
-define <8 x i16> @test_vqshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
-; CHECK: test_vqshrun_high_n_s32
-; CHECK: sqshrun2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
-  %vqshrun = tail call <4 x i16> @llvm.aarch64.neon.vsqshrun.v4i16(<4 x i32> %b, i32 9)
-  %1 = bitcast <4 x i16> %a to <1 x i64>
-  %2 = bitcast <4 x i16> %vqshrun to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
-  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
-  ret <8 x i16> %3
-}
-
-define <4 x i32> @test_vqshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
-; CHECK: test_vqshrun_high_n_s64
-; CHECK: sqshrun2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
-  %1 = bitcast <2 x i32> %a to <1 x i64>
-  %vqshrun = tail call <2 x i32> @llvm.aarch64.neon.vsqshrun.v2i32(<2 x i64> %b, i32 19)
-  %2 = bitcast <2 x i32> %vqshrun to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
-  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
-  ret <4 x i32> %3
-}
-
-define <8 x i8> @test_vrshrn_n_s16(<8 x i16> %a) {
-; CHECK: test_vrshrn_n_s16
-; CHECK: rshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
-  %vrshrn = tail call <8 x i8> @llvm.aarch64.neon.vrshrn.v8i8(<8 x i16> %a, i32 3)
-  ret <8 x i8> %vrshrn
-}
-
-
-define <4 x i16> @test_vrshrn_n_s32(<4 x i32> %a) {
-; CHECK: test_vrshrn_n_s32
-; CHECK: rshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
-  %vrshrn = tail call <4 x i16> @llvm.aarch64.neon.vrshrn.v4i16(<4 x i32> %a, i32 9)
-  ret <4 x i16> %vrshrn
-}
-
-
-define <2 x i32> @test_vrshrn_n_s64(<2 x i64> %a) {
-; CHECK: test_vrshrn_n_s64
-; CHECK: rshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
-  %vrshrn = tail call <2 x i32> @llvm.aarch64.neon.vrshrn.v2i32(<2 x i64> %a, i32 19)
-  ret <2 x i32> %vrshrn
-}
-
-define <16 x i8> @test_vrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
-; CHECK: test_vrshrn_high_n_s16
-; CHECK: rshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
-  %vrshrn = tail call <8 x i8> @llvm.aarch64.neon.vrshrn.v8i8(<8 x i16> %b, i32 3)
-  %1 = bitcast <8 x i8> %a to <1 x i64>
-  %2 = bitcast <8 x i8> %vrshrn to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
-  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
-  ret <16 x i8> %3
-}
-
-define <8 x i16> @test_vrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
-; CHECK: test_vrshrn_high_n_s32
-; CHECK: rshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
-  %vrshrn = tail call <4 x i16> @llvm.aarch64.neon.vrshrn.v4i16(<4 x i32> %b, i32 9)
-  %1 = bitcast <4 x i16> %a to <1 x i64>
-  %2 = bitcast <4 x i16> %vrshrn to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
-  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
-  ret <8 x i16> %3
-}
-
-define <4 x i32> @test_vrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
-; CHECK: test_vrshrn_high_n_s64
-; CHECK: rshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
-  %1 = bitcast <2 x i32> %a to <1 x i64>
-  %vrshrn = tail call <2 x i32> @llvm.aarch64.neon.vrshrn.v2i32(<2 x i64> %b, i32 19)
-  %2 = bitcast <2 x i32> %vrshrn to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
-  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
-  ret <4 x i32> %3
-}
-
-define <8 x i8> @test_vqrshrun_n_s16(<8 x i16> %a) {
-; CHECK: test_vqrshrun_n_s16
-; CHECK: sqrshrun {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
-  %vqrshrun = tail call <8 x i8> @llvm.aarch64.neon.vsqrshrun.v8i8(<8 x i16> %a, i32 3)
-  ret <8 x i8> %vqrshrun
-}
-
-define <4 x i16> @test_vqrshrun_n_s32(<4 x i32> %a) {
-; CHECK: test_vqrshrun_n_s32
-; CHECK: sqrshrun {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
-  %vqrshrun = tail call <4 x i16> @llvm.aarch64.neon.vsqrshrun.v4i16(<4 x i32> %a, i32 9)
-  ret <4 x i16> %vqrshrun
-}
-
-define <2 x i32> @test_vqrshrun_n_s64(<2 x i64> %a) {
-; CHECK: test_vqrshrun_n_s64
-; CHECK: sqrshrun {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
-  %vqrshrun = tail call <2 x i32> @llvm.aarch64.neon.vsqrshrun.v2i32(<2 x i64> %a, i32 19)
-  ret <2 x i32> %vqrshrun
-}
-
-define <16 x i8> @test_vqrshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
-; CHECK: test_vqrshrun_high_n_s16
-; CHECK: sqrshrun2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
-  %vqrshrun = tail call <8 x i8> @llvm.aarch64.neon.vsqrshrun.v8i8(<8 x i16> %b, i32 3)
-  %1 = bitcast <8 x i8> %a to <1 x i64>
-  %2 = bitcast <8 x i8> %vqrshrun to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
-  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
-  ret <16 x i8> %3
-}
-
-define <8 x i16> @test_vqrshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
-; CHECK: test_vqrshrun_high_n_s32
-; CHECK: sqrshrun2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
-  %vqrshrun = tail call <4 x i16> @llvm.aarch64.neon.vsqrshrun.v4i16(<4 x i32> %b, i32 9)
-  %1 = bitcast <4 x i16> %a to <1 x i64>
-  %2 = bitcast <4 x i16> %vqrshrun to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
-  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
-  ret <8 x i16> %3
-}
-
-define <4 x i32> @test_vqrshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
-; CHECK: test_vqrshrun_high_n_s64
-; CHECK: sqrshrun2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
-  %1 = bitcast <2 x i32> %a to <1 x i64>
-  %vqrshrun = tail call <2 x i32> @llvm.aarch64.neon.vsqrshrun.v2i32(<2 x i64> %b, i32 19)
-  %2 = bitcast <2 x i32> %vqrshrun to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
-  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
-  ret <4 x i32> %3
-}
-
-define <8 x i8> @test_vqshrn_n_s16(<8 x i16> %a) {
-; CHECK: test_vqshrn_n_s16
-; CHECK: sqshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
-  %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.vsqshrn.v8i8(<8 x i16> %a, i32 3)
-  ret <8 x i8> %vqshrn
-}
-
-
-define <4 x i16> @test_vqshrn_n_s32(<4 x i32> %a) {
-; CHECK: test_vqshrn_n_s32
-; CHECK: sqshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
-  %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.vsqshrn.v4i16(<4 x i32> %a, i32 9)
-  ret <4 x i16> %vqshrn
-}
-
-
-define <2 x i32> @test_vqshrn_n_s64(<2 x i64> %a) {
-; CHECK: test_vqshrn_n_s64
-; CHECK: sqshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
-  %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.vsqshrn.v2i32(<2 x i64> %a, i32 19)
-  ret <2 x i32> %vqshrn
-}
-
-
-define <8 x i8> @test_vqshrn_n_u16(<8 x i16> %a) {
-; CHECK: test_vqshrn_n_u16
-; CHECK: uqshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
-  %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.vuqshrn.v8i8(<8 x i16> %a, i32 3)
-  ret <8 x i8> %vqshrn
-}
-
-
-define <4 x i16> @test_vqshrn_n_u32(<4 x i32> %a) {
-; CHECK: test_vqshrn_n_u32
-; CHECK: uqshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
-  %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.vuqshrn.v4i16(<4 x i32> %a, i32 9)
-  ret <4 x i16> %vqshrn
-}
-
-
-define <2 x i32> @test_vqshrn_n_u64(<2 x i64> %a) {
-; CHECK: test_vqshrn_n_u64
-; CHECK: uqshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
-  %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.vuqshrn.v2i32(<2 x i64> %a, i32 19)
-  ret <2 x i32> %vqshrn
-}
-
-
-define <16 x i8> @test_vqshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
-; CHECK: test_vqshrn_high_n_s16
-; CHECK: sqshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
-  %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.vsqshrn.v8i8(<8 x i16> %b, i32 3)
-  %1 = bitcast <8 x i8> %a to <1 x i64>
-  %2 = bitcast <8 x i8> %vqshrn to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
-  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
-  ret <16 x i8> %3
-}
-
-define <8 x i16> @test_vqshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
-; CHECK: test_vqshrn_high_n_s32
-; CHECK: sqshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
-  %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.vsqshrn.v4i16(<4 x i32> %b, i32 9)
-  %1 = bitcast <4 x i16> %a to <1 x i64>
-  %2 = bitcast <4 x i16> %vqshrn to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
-  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
-  ret <8 x i16> %3
-}
-
-define <4 x i32> @test_vqshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
-; CHECK: test_vqshrn_high_n_s64
-; CHECK: sqshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
-  %1 = bitcast <2 x i32> %a to <1 x i64>
-  %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.vsqshrn.v2i32(<2 x i64> %b, i32 19)
-  %2 = bitcast <2 x i32> %vqshrn to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
-  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
-  ret <4 x i32> %3
-}
-
-define <16 x i8> @test_vqshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
-; CHECK: test_vqshrn_high_n_u16
-; CHECK: uqshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
-  %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.vuqshrn.v8i8(<8 x i16> %b, i32 3)
-  %1 = bitcast <8 x i8> %a to <1 x i64>
-  %2 = bitcast <8 x i8> %vqshrn to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
-  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
-  ret <16 x i8> %3
-}
-
-define <8 x i16> @test_vqshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
-; CHECK: test_vqshrn_high_n_u32
-; CHECK: uqshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
-  %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.vuqshrn.v4i16(<4 x i32> %b, i32 9)
-  %1 = bitcast <4 x i16> %a to <1 x i64>
-  %2 = bitcast <4 x i16> %vqshrn to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
-  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
-  ret <8 x i16> %3
-}
-
-define <4 x i32> @test_vqshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
-; CHECK: test_vqshrn_high_n_u64
-; CHECK: uqshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
-  %1 = bitcast <2 x i32> %a to <1 x i64>
-  %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.vuqshrn.v2i32(<2 x i64> %b, i32 19)
-  %2 = bitcast <2 x i32> %vqshrn to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
-  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
-  ret <4 x i32> %3
-}
-
-define <8 x i8> @test_vqrshrn_n_s16(<8 x i16> %a) {
-; CHECK: test_vqrshrn_n_s16
-; CHECK: sqrshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
-  %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.vsqrshrn.v8i8(<8 x i16> %a, i32 3)
-  ret <8 x i8> %vqrshrn
-}
-
-
-define <4 x i16> @test_vqrshrn_n_s32(<4 x i32> %a) {
-; CHECK: test_vqrshrn_n_s32
-; CHECK: sqrshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
-  %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.vsqrshrn.v4i16(<4 x i32> %a, i32 9)
-  ret <4 x i16> %vqrshrn
-}
-
-
-define <2 x i32> @test_vqrshrn_n_s64(<2 x i64> %a) {
-; CHECK: test_vqrshrn_n_s64
-; CHECK: sqrshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
-  %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.vsqrshrn.v2i32(<2 x i64> %a, i32 19)
-  ret <2 x i32> %vqrshrn
-}
-
-
-define <8 x i8> @test_vqrshrn_n_u16(<8 x i16> %a) {
-; CHECK: test_vqrshrn_n_u16
-; CHECK: uqrshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
-  %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.vuqrshrn.v8i8(<8 x i16> %a, i32 3)
-  ret <8 x i8> %vqrshrn
-}
-
-
-define <4 x i16> @test_vqrshrn_n_u32(<4 x i32> %a) {
-; CHECK: test_vqrshrn_n_u32
-; CHECK: uqrshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
-  %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.vuqrshrn.v4i16(<4 x i32> %a, i32 9)
-  ret <4 x i16> %vqrshrn
-}
-
-
-define <2 x i32> @test_vqrshrn_n_u64(<2 x i64> %a) {
-; CHECK: test_vqrshrn_n_u64
-; CHECK: uqrshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
-  %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.vuqrshrn.v2i32(<2 x i64> %a, i32 19)
-  ret <2 x i32> %vqrshrn
-}
-
-
-define <16 x i8> @test_vqrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
-; CHECK: test_vqrshrn_high_n_s16
-; CHECK: sqrshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
-  %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.vsqrshrn.v8i8(<8 x i16> %b, i32 3)
-  %1 = bitcast <8 x i8> %a to <1 x i64>
-  %2 = bitcast <8 x i8> %vqrshrn to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
-  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
-  ret <16 x i8> %3
-}
-
-define <8 x i16> @test_vqrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
-; CHECK: test_vqrshrn_high_n_s32
-; CHECK: sqrshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
-  %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.vsqrshrn.v4i16(<4 x i32> %b, i32 9)
-  %1 = bitcast <4 x i16> %a to <1 x i64>
-  %2 = bitcast <4 x i16> %vqrshrn to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
-  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
-  ret <8 x i16> %3
-}
-
-define <4 x i32> @test_vqrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
-; CHECK: test_vqrshrn_high_n_s64
-; CHECK: sqrshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
-  %1 = bitcast <2 x i32> %a to <1 x i64>
-  %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.vsqrshrn.v2i32(<2 x i64> %b, i32 19)
-  %2 = bitcast <2 x i32> %vqrshrn to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
-  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
-  ret <4 x i32> %3
-}
-
-define <16 x i8> @test_vqrshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
-; CHECK: test_vqrshrn_high_n_u16
-; CHECK: uqrshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
-  %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.vuqrshrn.v8i8(<8 x i16> %b, i32 3)
-  %1 = bitcast <8 x i8> %a to <1 x i64>
-  %2 = bitcast <8 x i8> %vqrshrn to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
-  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
-  ret <16 x i8> %3
-}
-
-define <8 x i16> @test_vqrshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
-; CHECK: test_vqrshrn_high_n_u32
-; CHECK: uqrshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
-  %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.vuqrshrn.v4i16(<4 x i32> %b, i32 9)
-  %1 = bitcast <4 x i16> %a to <1 x i64>
-  %2 = bitcast <4 x i16> %vqrshrn to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
-  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
-  ret <8 x i16> %3
-}
-
-define <4 x i32> @test_vqrshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
-; CHECK: test_vqrshrn_high_n_u64
-; CHECK: uqrshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
-  %1 = bitcast <2 x i32> %a to <1 x i64>
-  %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.vuqrshrn.v2i32(<2 x i64> %b, i32 19)
-  %2 = bitcast <2 x i32> %vqrshrn to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
-  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
-  ret <4 x i32> %3
-}
-
-define <2 x float> @test_vcvt_n_f32_s32(<2 x i32> %a) {
-; CHECK: test_vcvt_n_f32_s32
-; CHECK: scvtf {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31
-  %vcvt = tail call <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> %a, i32 31)
-  ret <2 x float> %vcvt
-}
-
-define <4 x float> @test_vcvtq_n_f32_s32(<4 x i32> %a) {
-; CHECK: test_vcvtq_n_f32_s32
-; CHECK: scvtf {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31
-  %vcvt = tail call <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> %a, i32 31)
-  ret <4 x float> %vcvt
-}
-
-define <2 x double> @test_vcvtq_n_f64_s64(<2 x i64> %a) {
-; CHECK: test_vcvtq_n_f64_s64
-; CHECK: scvtf {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50
-  %vcvt = tail call <2 x double> @llvm.arm.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64> %a, i32 50)
-  ret <2 x double> %vcvt
-}
-
-define <2 x float> @test_vcvt_n_f32_u32(<2 x i32> %a) {
-; CHECK: test_vcvt_n_f32_u32
-; CHECK: ucvtf {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31
-  %vcvt = tail call <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> %a, i32 31)
-  ret <2 x float> %vcvt
-}
-
-define <4 x float> @test_vcvtq_n_f32_u32(<4 x i32> %a) {
-; CHECK: test_vcvtq_n_f32_u32
-; CHECK: ucvtf {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31
-  %vcvt = tail call <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> %a, i32 31)
-  ret <4 x float> %vcvt
-}
-
-define <2 x double> @test_vcvtq_n_f64_u64(<2 x i64> %a) {
-; CHECK: test_vcvtq_n_f64_u64
-; CHECK: ucvtf {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50
-  %vcvt = tail call <2 x double> @llvm.arm.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64> %a, i32 50)
-  ret <2 x double> %vcvt
-}
-
-define <2 x i32> @test_vcvt_n_s32_f32(<2 x float> %a) {
-; CHECK: test_vcvt_n_s32_f32
-; CHECK: fcvtzs {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31
-  %vcvt = tail call <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> %a, i32 31)
-  ret <2 x i32> %vcvt
-}
-
-define <4 x i32> @test_vcvtq_n_s32_f32(<4 x float> %a) {
-; CHECK: test_vcvtq_n_s32_f32
-; CHECK: fcvtzs {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31
-  %vcvt = tail call <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> %a, i32 31)
-  ret <4 x i32> %vcvt
-}
-
-define <2 x i64> @test_vcvtq_n_s64_f64(<2 x double> %a) {
-; CHECK: test_vcvtq_n_s64_f64
-; CHECK: fcvtzs {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50
-  %vcvt = tail call <2 x i64> @llvm.arm.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double> %a, i32 50)
-  ret <2 x i64> %vcvt
-}
-
-define <2 x i32> @test_vcvt_n_u32_f32(<2 x float> %a) {
-; CHECK: test_vcvt_n_u32_f32
-; CHECK: fcvtzu {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31
-  %vcvt = tail call <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> %a, i32 31)
-  ret <2 x i32> %vcvt
-}
-
-define <4 x i32> @test_vcvtq_n_u32_f32(<4 x float> %a) {
-; CHECK: test_vcvt_n_u32_f32
-; CHECK: fcvtzu {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31
-  %vcvt = tail call <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> %a, i32 31)
-  ret <4 x i32> %vcvt
-}
-
-define <2 x i64> @test_vcvtq_n_u64_f64(<2 x double> %a) {
-; CHECK: test_vcvtq_n_u64_f64
-; CHECK: fcvtzu {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50
-  %vcvt = tail call <2 x i64> @llvm.arm.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double> %a, i32 50)
-  ret <2 x i64> %vcvt
-}
-
-declare <8 x i8> @llvm.aarch64.neon.vsrshr.v8i8(<8 x i8>, i32)
-
-declare <4 x i16> @llvm.aarch64.neon.vsrshr.v4i16(<4 x i16>, i32)
-
-declare <2 x i32> @llvm.aarch64.neon.vsrshr.v2i32(<2 x i32>, i32)
-
-declare <16 x i8> @llvm.aarch64.neon.vsrshr.v16i8(<16 x i8>, i32)
-
-declare <8 x i16> @llvm.aarch64.neon.vsrshr.v8i16(<8 x i16>, i32)
-
-declare <4 x i32> @llvm.aarch64.neon.vsrshr.v4i32(<4 x i32>, i32)
-
-declare <2 x i64> @llvm.aarch64.neon.vsrshr.v2i64(<2 x i64>, i32)
-
-declare <8 x i8> @llvm.aarch64.neon.vurshr.v8i8(<8 x i8>, i32)
-
-declare <4 x i16> @llvm.aarch64.neon.vurshr.v4i16(<4 x i16>, i32)
-
-declare <2 x i32> @llvm.aarch64.neon.vurshr.v2i32(<2 x i32>, i32)
-
-declare <16 x i8> @llvm.aarch64.neon.vurshr.v16i8(<16 x i8>, i32)
-
-declare <8 x i16> @llvm.aarch64.neon.vurshr.v8i16(<8 x i16>, i32)
-
-declare <4 x i32> @llvm.aarch64.neon.vurshr.v4i32(<4 x i32>, i32)
-
-declare <2 x i64> @llvm.aarch64.neon.vurshr.v2i64(<2 x i64>, i32)
-
-declare <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8>, <8 x i8>, i32)
-
-declare <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16>, <4 x i16>, i32)
-
-declare <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32>, <2 x i32>, i32)
-
-declare <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8>, <16 x i8>, i32)
-
-declare <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16>, <8 x i16>, i32)
-
-declare <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32>, <4 x i32>, i32)
-
-declare <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64>, <2 x i64>, i32)
-
-declare <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8>, <8 x i8>, i32)
-
-declare <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16>, <4 x i16>, i32)
-
-declare <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32>, <2 x i32>, i32)
-
-declare <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8>, <16 x i8>, i32)
-
-declare <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16>, <8 x i16>, i32)
-
-declare <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32>, <4 x i32>, i32)
-
-declare <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64>, <2 x i64>, i32)
-
-declare <8 x i8> @llvm.aarch64.neon.vsqshlu.v8i8(<8 x i8>, i32)
-
-declare <4 x i16> @llvm.aarch64.neon.vsqshlu.v4i16(<4 x i16>, i32)
-
-declare <2 x i32> @llvm.aarch64.neon.vsqshlu.v2i32(<2 x i32>, i32)
-
-declare <16 x i8> @llvm.aarch64.neon.vsqshlu.v16i8(<16 x i8>, i32)
-
-declare <8 x i16> @llvm.aarch64.neon.vsqshlu.v8i16(<8 x i16>, i32)
-
-declare <4 x i32> @llvm.aarch64.neon.vsqshlu.v4i32(<4 x i32>, i32)
-
-declare <2 x i64> @llvm.aarch64.neon.vsqshlu.v2i64(<2 x i64>, i32)
-
-declare <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8>, <8 x i8>)
-
-declare <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16>, <4 x i16>)
-
-declare <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32>, <2 x i32>)
-
-declare <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8>, <16 x i8>)
-
-declare <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16>, <8 x i16>)
-
-declare <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32>, <4 x i32>)
-
-declare <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64>, <2 x i64>)
-
-declare <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8>, <8 x i8>)
-
-declare <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16>, <4 x i16>)
-
-declare <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32>, <2 x i32>) 
-
-declare <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8>, <16 x i8>) 
-
-declare <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16>, <8 x i16>) 
-
-declare <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32>, <4 x i32>)
-
-declare <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64>, <2 x i64>)
-
-declare <8 x i8> @llvm.aarch64.neon.vsqshrun.v8i8(<8 x i16>, i32)
-
-declare <4 x i16> @llvm.aarch64.neon.vsqshrun.v4i16(<4 x i32>, i32)
-
-declare <2 x i32> @llvm.aarch64.neon.vsqshrun.v2i32(<2 x i64>, i32)
-
-declare <8 x i8> @llvm.aarch64.neon.vrshrn.v8i8(<8 x i16>, i32)
-
-declare <4 x i16> @llvm.aarch64.neon.vrshrn.v4i16(<4 x i32>, i32)
-
-declare <2 x i32> @llvm.aarch64.neon.vrshrn.v2i32(<2 x i64>, i32)
-
-declare <8 x i8> @llvm.aarch64.neon.vsqrshrun.v8i8(<8 x i16>, i32)
-
-declare <4 x i16> @llvm.aarch64.neon.vsqrshrun.v4i16(<4 x i32>, i32)
-
-declare <2 x i32> @llvm.aarch64.neon.vsqrshrun.v2i32(<2 x i64>, i32)
-
-declare <8 x i8> @llvm.aarch64.neon.vsqshrn.v8i8(<8 x i16>, i32)
-
-declare <4 x i16> @llvm.aarch64.neon.vsqshrn.v4i16(<4 x i32>, i32)
-
-declare <2 x i32> @llvm.aarch64.neon.vsqshrn.v2i32(<2 x i64>, i32)
-
-declare <8 x i8> @llvm.aarch64.neon.vuqshrn.v8i8(<8 x i16>, i32)
-
-declare <4 x i16> @llvm.aarch64.neon.vuqshrn.v4i16(<4 x i32>, i32)
-
-declare <2 x i32> @llvm.aarch64.neon.vuqshrn.v2i32(<2 x i64>, i32)
-
-declare <8 x i8> @llvm.aarch64.neon.vsqrshrn.v8i8(<8 x i16>, i32)
-
-declare <4 x i16> @llvm.aarch64.neon.vsqrshrn.v4i16(<4 x i32>, i32)
-
-declare <2 x i32> @llvm.aarch64.neon.vsqrshrn.v2i32(<2 x i64>, i32)
-
-declare <8 x i8> @llvm.aarch64.neon.vuqrshrn.v8i8(<8 x i16>, i32)
-
-declare <4 x i16> @llvm.aarch64.neon.vuqrshrn.v4i16(<4 x i32>, i32)
-
-declare <2 x i32> @llvm.aarch64.neon.vuqrshrn.v2i32(<2 x i64>, i32)
-
-declare <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32)
-
-declare <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32)
-
-declare <2 x double> @llvm.arm.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64>, i32)
-
-declare <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32)
-
-declare <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32)
-
-declare <2 x double> @llvm.arm.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64>, i32)
-
-declare <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float>, i32)
-
-declare <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float>, i32)
-
-declare <2 x i64> @llvm.arm.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double>, i32)
-
-declare <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float>, i32)
-
-declare <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32)
-
-declare <2 x i64> @llvm.arm.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double>, i32)
-
-define <1 x i64> @test_vcvt_n_s64_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vcvt_n_s64_f64
-; CHECK: fcvtzs d{{[0-9]+}}, d{{[0-9]+}}, #64
-  %1 = tail call <1 x i64> @llvm.arm.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double> %a, i32 64)
-  ret <1 x i64> %1
-}
-
-define <1 x i64> @test_vcvt_n_u64_f64(<1 x double> %a) {
-; CHECK-LABEL: test_vcvt_n_u64_f64
-; CHECK: fcvtzu d{{[0-9]+}}, d{{[0-9]+}}, #64
-  %1 = tail call <1 x i64> @llvm.arm.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double> %a, i32 64)
-  ret <1 x i64> %1
-}
-
-define <1 x double> @test_vcvt_n_f64_s64(<1 x i64> %a) {
-; CHECK-LABEL: test_vcvt_n_f64_s64
-; CHECK: scvtf d{{[0-9]+}}, d{{[0-9]+}}, #64
-  %1 = tail call <1 x double> @llvm.arm.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64> %a, i32 64)
-  ret <1 x double> %1
-}
-
-define <1 x double> @test_vcvt_n_f64_u64(<1 x i64> %a) {
-; CHECK-LABEL: test_vcvt_n_f64_u64
-; CHECK: ucvtf d{{[0-9]+}}, d{{[0-9]+}}, #64
-  %1 = tail call <1 x double> @llvm.arm.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64> %a, i32 64)
-  ret <1 x double> %1
-}
-
-declare <1 x i64> @llvm.arm.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double>, i32)
-declare <1 x i64> @llvm.arm.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double>, i32)
-declare <1 x double> @llvm.arm.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64>, i32)
-declare <1 x double> @llvm.arm.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64>, i32)
-\ No newline at end of file
diff --git a/test/CodeGen/AArch64/neon-simd-tbl.ll b/test/CodeGen/AArch64/neon-simd-tbl.ll
deleted file mode 100644
index 7a51c0f..0000000
--- a/test/CodeGen/AArch64/neon-simd-tbl.ll
+++ /dev/null
@@ -1,828 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
-
-declare <16 x i8> @llvm.aarch64.neon.vtbx4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
-
-declare <16 x i8> @llvm.aarch64.neon.vtbx3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
-
-declare <16 x i8> @llvm.aarch64.neon.vtbx2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
-
-declare <16 x i8> @llvm.aarch64.neon.vtbx1.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
-
-declare <8 x i8> @llvm.aarch64.neon.vtbx4.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>)
-
-declare <8 x i8> @llvm.aarch64.neon.vtbx3.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>)
-
-declare <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <8 x i8>)
-
-declare <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8(<8 x i8>, <16 x i8>, <8 x i8>)
-
-declare <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8>, <16 x i8>, <8 x i8>)
-
-declare <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8>, <8 x i8>, <8 x i8>)
-
-declare <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8>, <8 x i8>)
-
-declare <16 x i8> @llvm.aarch64.neon.vtbl4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
-
-declare <16 x i8> @llvm.aarch64.neon.vtbl3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
-
-declare <16 x i8> @llvm.aarch64.neon.vtbl2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
-
-declare <16 x i8> @llvm.aarch64.neon.vtbl1.v16i8(<16 x i8>, <16 x i8>)
-
-declare <8 x i8> @llvm.aarch64.neon.vtbl4.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>)
-
-declare <8 x i8> @llvm.aarch64.neon.vtbl3.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>)
-
-define <8 x i8> @test_vtbl1_s8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vtbl1_s8:
-; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
-entry:
-  %vtbl1.i = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8> %vtbl1.i, <8 x i8> %b)
-  ret <8 x i8> %vtbl11.i
-}
-
-define <8 x i8> @test_vqtbl1_s8(<16 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vqtbl1_s8:
-; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
-entry:
-  %vtbl1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8> %a, <8 x i8> %b)
-  ret <8 x i8> %vtbl1.i
-}
-
-define <8 x i8> @test_vtbl2_s8([2 x <8 x i8>] %a.coerce, <8 x i8> %b) {
-; CHECK: test_vtbl2_s8:
-; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
-entry:
-  %__a.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 0
-  %__a.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 1
-  %vtbl1.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vtbl17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8> %vtbl1.i, <8 x i8> %b)
-  ret <8 x i8> %vtbl17.i
-}
-
-define <8 x i8> @test_vqtbl2_s8([2 x <16 x i8>] %a.coerce, <8 x i8> %b) {
-; CHECK: test_vqtbl2_s8:
-; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
-entry:
-  %__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0
-  %__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1
-  %vtbl2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <8 x i8> %b)
-  ret <8 x i8> %vtbl2.i
-}
-
-define <8 x i8> @test_vtbl3_s8([3 x <8 x i8>] %a.coerce, <8 x i8> %b) {
-; CHECK: test_vtbl3_s8:
-; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
-entry:
-  %__a.coerce.fca.0.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 0
-  %__a.coerce.fca.1.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 1
-  %__a.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 2
-  %vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vtbl211.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %b)
-  ret <8 x i8> %vtbl212.i
-}
-
-define <8 x i8> @test_vqtbl3_s8([3 x <16 x i8>] %a.coerce, <8 x i8> %b) {
-; CHECK: test_vqtbl3_s8:
-; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
-entry:
-  %__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0
-  %__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1
-  %__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2
-  %vtbl3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl3.v8i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %b)
-  ret <8 x i8> %vtbl3.i
-}
-
-define <8 x i8> @test_vtbl4_s8([4 x <8 x i8>] %a.coerce, <8 x i8> %b) {
-; CHECK: test_vtbl4_s8:
-; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
-entry:
-  %__a.coerce.fca.0.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 0
-  %__a.coerce.fca.1.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 1
-  %__a.coerce.fca.2.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 2
-  %__a.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 3
-  %vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vtbl215.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %__a.coerce.fca.3.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vtbl216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl215.i, <8 x i8> %b)
-  ret <8 x i8> %vtbl216.i
-}
-
-define <8 x i8> @test_vqtbl4_s8([4 x <16 x i8>] %a.coerce, <8 x i8> %b) {
-; CHECK: test_vqtbl4_s8:
-; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
-entry:
-  %__a.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 0
-  %__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1
-  %__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2
-  %__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3
-  %vtbl4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl4.v8i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <8 x i8> %b)
-  ret <8 x i8> %vtbl4.i
-}
-
-define <16 x i8> @test_vqtbl1q_s8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vqtbl1q_s8:
-; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
-entry:
-  %vtbl1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl1.v16i8(<16 x i8> %a, <16 x i8> %b)
-  ret <16 x i8> %vtbl1.i
-}
-
-define <16 x i8> @test_vqtbl2q_s8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) {
-; CHECK: test_vqtbl2q_s8:
-; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
-entry:
-  %__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0
-  %__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1
-  %vtbl2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl2.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %b)
-  ret <16 x i8> %vtbl2.i
-}
-
-define <16 x i8> @test_vqtbl3q_s8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) {
-; CHECK: test_vqtbl3q_s8:
-; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
-entry:
-  %__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0
-  %__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1
-  %__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2
-  %vtbl3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl3.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %b)
-  ret <16 x i8> %vtbl3.i
-}
-
-define <16 x i8> @test_vqtbl4q_s8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) {
-; CHECK: test_vqtbl4q_s8:
-; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
-entry:
-  %__a.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 0
-  %__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1
-  %__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2
-  %__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3
-  %vtbl4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl4.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <16 x i8> %b)
-  ret <16 x i8> %vtbl4.i
-}
-
-define <8 x i8> @test_vtbx1_s8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) {
-; CHECK: test_vtbx1_s8:
-; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
-entry:
-  %vtbl1.i = shufflevector <8 x i8> %b, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8> %vtbl1.i, <8 x i8> %c)
-  %0 = icmp uge <8 x i8> %c, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
-  %1 = sext <8 x i1> %0 to <8 x i8>
-  %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl11.i)
-  ret <8 x i8> %vbsl.i
-}
-
-define <8 x i8> @test_vtbx2_s8(<8 x i8> %a, [2 x <8 x i8>] %b.coerce, <8 x i8> %c) {
-; CHECK: test_vtbx2_s8:
-; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
-entry:
-  %__b.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 0
-  %__b.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 1
-  %vtbx1.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vtbx17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8(<8 x i8> %a, <16 x i8> %vtbx1.i, <8 x i8> %c)
-  ret <8 x i8> %vtbx17.i
-}
-
-define <8 x i8> @test_vtbx3_s8(<8 x i8> %a, [3 x <8 x i8>] %b.coerce, <8 x i8> %c) {
-; CHECK: test_vtbx3_s8:
-; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
-entry:
-  %__b.coerce.fca.0.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 0
-  %__b.coerce.fca.1.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 1
-  %__b.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 2
-  %vtbl2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vtbl211.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %c)
-  %0 = icmp uge <8 x i8> %c, <i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24>
-  %1 = sext <8 x i1> %0 to <8 x i8>
-  %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl212.i)
-  ret <8 x i8> %vbsl.i
-}
-
-define <8 x i8> @test_vtbx4_s8(<8 x i8> %a, [4 x <8 x i8>] %b.coerce, <8 x i8> %c) {
-; CHECK: test_vtbx4_s8:
-; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
-entry:
-  %__b.coerce.fca.0.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 0
-  %__b.coerce.fca.1.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 1
-  %__b.coerce.fca.2.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 2
-  %__b.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 3
-  %vtbx2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vtbx215.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %__b.coerce.fca.3.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vtbx216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8(<8 x i8> %a, <16 x i8> %vtbx2.i, <16 x i8> %vtbx215.i, <8 x i8> %c)
-  ret <8 x i8> %vtbx216.i
-}
-
-define <8 x i8> @test_vqtbx1_s8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) {
-; CHECK: test_vqtbx1_s8:
-; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
-entry:
-  %vtbx1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c)
-  ret <8 x i8> %vtbx1.i
-}
-
-define <8 x i8> @test_vqtbx2_s8(<8 x i8> %a, [2 x <16 x i8>] %b.coerce, <8 x i8> %c) {
-; CHECK: test_vqtbx2_s8:
-; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
-entry:
-  %__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0
-  %__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1
-  %vtbx2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <8 x i8> %c)
-  ret <8 x i8> %vtbx2.i
-}
-
-define <8 x i8> @test_vqtbx3_s8(<8 x i8> %a, [3 x <16 x i8>] %b.coerce, <8 x i8> %c) {
-; CHECK: test_vqtbx3_s8:
-; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
-entry:
-  %__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0
-  %__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1
-  %__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2
-  %vtbx3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx3.v8i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %c)
-  ret <8 x i8> %vtbx3.i
-}
-
-define <8 x i8> @test_vqtbx4_s8(<8 x i8> %a, [4 x <16 x i8>] %b.coerce, <8 x i8> %c) {
-; CHECK: test_vqtbx4_s8:
-; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
-entry:
-  %__b.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 0
-  %__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1
-  %__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2
-  %__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3
-  %vtbx4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx4.v8i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <8 x i8> %c)
-  ret <8 x i8> %vtbx4.i
-}
-
-define <16 x i8> @test_vqtbx1q_s8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
-; CHECK: test_vqtbx1q_s8:
-; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
-entry:
-  %vtbx1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx1.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
-  ret <16 x i8> %vtbx1.i
-}
-
-define <16 x i8> @test_vqtbx2q_s8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) {
-; CHECK: test_vqtbx2q_s8:
-; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
-entry:
-  %__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0
-  %__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1
-  %vtbx2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx2.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %c)
-  ret <16 x i8> %vtbx2.i
-}
-
-define <16 x i8> @test_vqtbx3q_s8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) {
-; CHECK: test_vqtbx3q_s8:
-; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
-entry:
-  %__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0
-  %__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1
-  %__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2
-  %vtbx3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx3.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %c)
-  ret <16 x i8> %vtbx3.i
-}
-
-define <16 x i8> @test_vqtbx4q_s8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) {
-; CHECK: test_vqtbx4q_s8:
-; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
-entry:
-  %__b.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 0
-  %__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1
-  %__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2
-  %__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3
-  %vtbx4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx4.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <16 x i8> %c)
-  ret <16 x i8> %vtbx4.i
-}
-
-define <8 x i8> @test_vtbl1_u8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vtbl1_u8:
-; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
-entry:
-  %vtbl1.i = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8> %vtbl1.i, <8 x i8> %b)
-  ret <8 x i8> %vtbl11.i
-}
-
-define <8 x i8> @test_vqtbl1_u8(<16 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vqtbl1_u8:
-; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
-entry:
-  %vtbl1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8> %a, <8 x i8> %b)
-  ret <8 x i8> %vtbl1.i
-}
-
-define <8 x i8> @test_vtbl2_u8([2 x <8 x i8>] %a.coerce, <8 x i8> %b) {
-; CHECK: test_vtbl2_u8:
-; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
-entry:
-  %__a.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 0
-  %__a.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 1
-  %vtbl1.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vtbl17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8> %vtbl1.i, <8 x i8> %b)
-  ret <8 x i8> %vtbl17.i
-}
-
-define <8 x i8> @test_vqtbl2_u8([2 x <16 x i8>] %a.coerce, <8 x i8> %b) {
-; CHECK: test_vqtbl2_u8:
-; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
-entry:
-  %__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0
-  %__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1
-  %vtbl2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <8 x i8> %b)
-  ret <8 x i8> %vtbl2.i
-}
-
-define <8 x i8> @test_vtbl3_u8([3 x <8 x i8>] %a.coerce, <8 x i8> %b) {
-; CHECK: test_vtbl3_u8:
-; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
-entry:
-  %__a.coerce.fca.0.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 0
-  %__a.coerce.fca.1.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 1
-  %__a.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 2
-  %vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vtbl211.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %b)
-  ret <8 x i8> %vtbl212.i
-}
-
-define <8 x i8> @test_vqtbl3_u8([3 x <16 x i8>] %a.coerce, <8 x i8> %b) {
-; CHECK: test_vqtbl3_u8:
-; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
-entry:
-  %__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0
-  %__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1
-  %__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2
-  %vtbl3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl3.v8i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %b)
-  ret <8 x i8> %vtbl3.i
-}
-
-define <8 x i8> @test_vtbl4_u8([4 x <8 x i8>] %a.coerce, <8 x i8> %b) {
-; CHECK: test_vtbl4_u8:
-; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
-entry:
-  %__a.coerce.fca.0.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 0
-  %__a.coerce.fca.1.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 1
-  %__a.coerce.fca.2.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 2
-  %__a.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 3
-  %vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vtbl215.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %__a.coerce.fca.3.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vtbl216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl215.i, <8 x i8> %b)
-  ret <8 x i8> %vtbl216.i
-}
-
-define <8 x i8> @test_vqtbl4_u8([4 x <16 x i8>] %a.coerce, <8 x i8> %b) {
-; CHECK: test_vqtbl4_u8:
-; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
-entry:
-  %__a.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 0
-  %__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1
-  %__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2
-  %__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3
-  %vtbl4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl4.v8i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <8 x i8> %b)
-  ret <8 x i8> %vtbl4.i
-}
-
-define <16 x i8> @test_vqtbl1q_u8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vqtbl1q_u8:
-; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
-entry:
-  %vtbl1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl1.v16i8(<16 x i8> %a, <16 x i8> %b)
-  ret <16 x i8> %vtbl1.i
-}
-
-define <16 x i8> @test_vqtbl2q_u8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) {
-; CHECK: test_vqtbl2q_u8:
-; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
-entry:
-  %__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0
-  %__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1
-  %vtbl2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl2.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %b)
-  ret <16 x i8> %vtbl2.i
-}
-
-define <16 x i8> @test_vqtbl3q_u8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) {
-; CHECK: test_vqtbl3q_u8:
-; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
-entry:
-  %__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0
-  %__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1
-  %__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2
-  %vtbl3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl3.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %b)
-  ret <16 x i8> %vtbl3.i
-}
-
-define <16 x i8> @test_vqtbl4q_u8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) {
-; CHECK: test_vqtbl4q_u8:
-; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
-entry:
-  %__a.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 0
-  %__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1
-  %__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2
-  %__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3
-  %vtbl4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl4.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <16 x i8> %b)
-  ret <16 x i8> %vtbl4.i
-}
-
-define <8 x i8> @test_vtbx1_u8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) {
-; CHECK: test_vtbx1_u8:
-; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
-entry:
-  %vtbl1.i = shufflevector <8 x i8> %b, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8> %vtbl1.i, <8 x i8> %c)
-  %0 = icmp uge <8 x i8> %c, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
-  %1 = sext <8 x i1> %0 to <8 x i8>
-  %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl11.i)
-  ret <8 x i8> %vbsl.i
-}
-
-define <8 x i8> @test_vtbx2_u8(<8 x i8> %a, [2 x <8 x i8>] %b.coerce, <8 x i8> %c) {
-; CHECK: test_vtbx2_u8:
-; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
-entry:
-  %__b.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 0
-  %__b.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 1
-  %vtbx1.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vtbx17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8(<8 x i8> %a, <16 x i8> %vtbx1.i, <8 x i8> %c)
-  ret <8 x i8> %vtbx17.i
-}
-
-define <8 x i8> @test_vtbx3_u8(<8 x i8> %a, [3 x <8 x i8>] %b.coerce, <8 x i8> %c) {
-; CHECK: test_vtbx3_u8:
-; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
-entry:
-  %__b.coerce.fca.0.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 0
-  %__b.coerce.fca.1.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 1
-  %__b.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 2
-  %vtbl2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vtbl211.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %c)
-  %0 = icmp uge <8 x i8> %c, <i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24>
-  %1 = sext <8 x i1> %0 to <8 x i8>
-  %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl212.i)
-  ret <8 x i8> %vbsl.i
-}
-
-define <8 x i8> @test_vtbx4_u8(<8 x i8> %a, [4 x <8 x i8>] %b.coerce, <8 x i8> %c) {
-; CHECK: test_vtbx4_u8:
-; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
-entry:
-  %__b.coerce.fca.0.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 0
-  %__b.coerce.fca.1.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 1
-  %__b.coerce.fca.2.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 2
-  %__b.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 3
-  %vtbx2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vtbx215.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %__b.coerce.fca.3.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vtbx216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8(<8 x i8> %a, <16 x i8> %vtbx2.i, <16 x i8> %vtbx215.i, <8 x i8> %c)
-  ret <8 x i8> %vtbx216.i
-}
-
-define <8 x i8> @test_vqtbx1_u8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) {
-; CHECK: test_vqtbx1_u8:
-; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
-entry:
-  %vtbx1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c)
-  ret <8 x i8> %vtbx1.i
-}
-
-define <8 x i8> @test_vqtbx2_u8(<8 x i8> %a, [2 x <16 x i8>] %b.coerce, <8 x i8> %c) {
-; CHECK: test_vqtbx2_u8:
-; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
-entry:
-  %__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0
-  %__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1
-  %vtbx2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <8 x i8> %c)
-  ret <8 x i8> %vtbx2.i
-}
-
-define <8 x i8> @test_vqtbx3_u8(<8 x i8> %a, [3 x <16 x i8>] %b.coerce, <8 x i8> %c) {
-; CHECK: test_vqtbx3_u8:
-; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
-entry:
-  %__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0
-  %__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1
-  %__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2
-  %vtbx3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx3.v8i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %c)
-  ret <8 x i8> %vtbx3.i
-}
-
-define <8 x i8> @test_vqtbx4_u8(<8 x i8> %a, [4 x <16 x i8>] %b.coerce, <8 x i8> %c) {
-; CHECK: test_vqtbx4_u8:
-; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
-entry:
-  %__b.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 0
-  %__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1
-  %__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2
-  %__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3
-  %vtbx4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx4.v8i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <8 x i8> %c)
-  ret <8 x i8> %vtbx4.i
-}
-
-define <16 x i8> @test_vqtbx1q_u8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
-; CHECK: test_vqtbx1q_u8:
-; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
-entry:
-  %vtbx1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx1.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
-  ret <16 x i8> %vtbx1.i
-}
-
-define <16 x i8> @test_vqtbx2q_u8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) {
-; CHECK: test_vqtbx2q_u8:
-; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
-entry:
-  %__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0
-  %__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1
-  %vtbx2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx2.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %c)
-  ret <16 x i8> %vtbx2.i
-}
-
-define <16 x i8> @test_vqtbx3q_u8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) {
-; CHECK: test_vqtbx3q_u8:
-; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
-entry:
-  %__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0
-  %__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1
-  %__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2
-  %vtbx3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx3.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %c)
-  ret <16 x i8> %vtbx3.i
-}
-
-define <16 x i8> @test_vqtbx4q_u8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) {
-; CHECK: test_vqtbx4q_u8:
-; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
-entry:
-  %__b.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 0
-  %__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1
-  %__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2
-  %__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3
-  %vtbx4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx4.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <16 x i8> %c)
-  ret <16 x i8> %vtbx4.i
-}
-
-define <8 x i8> @test_vtbl1_p8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vtbl1_p8:
-; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
-entry:
-  %vtbl1.i = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8> %vtbl1.i, <8 x i8> %b)
-  ret <8 x i8> %vtbl11.i
-}
-
-define <8 x i8> @test_vqtbl1_p8(<16 x i8> %a, <8 x i8> %b) {
-; CHECK: test_vqtbl1_p8:
-; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
-entry:
-  %vtbl1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8> %a, <8 x i8> %b)
-  ret <8 x i8> %vtbl1.i
-}
-
-define <8 x i8> @test_vtbl2_p8([2 x <8 x i8>] %a.coerce, <8 x i8> %b) {
-; CHECK: test_vtbl2_p8:
-; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
-entry:
-  %__a.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 0
-  %__a.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 1
-  %vtbl1.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vtbl17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8> %vtbl1.i, <8 x i8> %b)
-  ret <8 x i8> %vtbl17.i
-}
-
-define <8 x i8> @test_vqtbl2_p8([2 x <16 x i8>] %a.coerce, <8 x i8> %b) {
-; CHECK: test_vqtbl2_p8:
-; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
-entry:
-  %__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0
-  %__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1
-  %vtbl2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <8 x i8> %b)
-  ret <8 x i8> %vtbl2.i
-}
-
-define <8 x i8> @test_vtbl3_p8([3 x <8 x i8>] %a.coerce, <8 x i8> %b) {
-; CHECK: test_vtbl3_p8:
-; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
-entry:
-  %__a.coerce.fca.0.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 0
-  %__a.coerce.fca.1.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 1
-  %__a.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 2
-  %vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vtbl211.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %b)
-  ret <8 x i8> %vtbl212.i
-}
-
-define <8 x i8> @test_vqtbl3_p8([3 x <16 x i8>] %a.coerce, <8 x i8> %b) {
-; CHECK: test_vqtbl3_p8:
-; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
-entry:
-  %__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0
-  %__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1
-  %__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2
-  %vtbl3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl3.v8i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %b)
-  ret <8 x i8> %vtbl3.i
-}
-
-define <8 x i8> @test_vtbl4_p8([4 x <8 x i8>] %a.coerce, <8 x i8> %b) {
-; CHECK: test_vtbl4_p8:
-; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
-entry:
-  %__a.coerce.fca.0.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 0
-  %__a.coerce.fca.1.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 1
-  %__a.coerce.fca.2.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 2
-  %__a.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 3
-  %vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vtbl215.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %__a.coerce.fca.3.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vtbl216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl215.i, <8 x i8> %b)
-  ret <8 x i8> %vtbl216.i
-}
-
-define <8 x i8> @test_vqtbl4_p8([4 x <16 x i8>] %a.coerce, <8 x i8> %b) {
-; CHECK: test_vqtbl4_p8:
-; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
-entry:
-  %__a.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 0
-  %__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1
-  %__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2
-  %__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3
-  %vtbl4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl4.v8i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <8 x i8> %b)
-  ret <8 x i8> %vtbl4.i
-}
-
-define <16 x i8> @test_vqtbl1q_p8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK: test_vqtbl1q_p8:
-; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
-entry:
-  %vtbl1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl1.v16i8(<16 x i8> %a, <16 x i8> %b)
-  ret <16 x i8> %vtbl1.i
-}
-
-define <16 x i8> @test_vqtbl2q_p8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) {
-; CHECK: test_vqtbl2q_p8:
-; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
-entry:
-  %__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0
-  %__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1
-  %vtbl2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl2.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %b)
-  ret <16 x i8> %vtbl2.i
-}
-
-define <16 x i8> @test_vqtbl3q_p8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) {
-; CHECK: test_vqtbl3q_p8:
-; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
-entry:
-  %__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0
-  %__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1
-  %__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2
-  %vtbl3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl3.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %b)
-  ret <16 x i8> %vtbl3.i
-}
-
-define <16 x i8> @test_vqtbl4q_p8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) {
-; CHECK: test_vqtbl4q_p8:
-; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
-entry:
-  %__a.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 0
-  %__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1
-  %__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2
-  %__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3
-  %vtbl4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl4.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <16 x i8> %b)
-  ret <16 x i8> %vtbl4.i
-}
-
-define <8 x i8> @test_vtbx1_p8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) {
-; CHECK: test_vtbx1_p8:
-; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
-entry:
-  %vtbl1.i = shufflevector <8 x i8> %b, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8> %vtbl1.i, <8 x i8> %c)
-  %0 = icmp uge <8 x i8> %c, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
-  %1 = sext <8 x i1> %0 to <8 x i8>
-  %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl11.i)
-  ret <8 x i8> %vbsl.i
-}
-
-define <8 x i8> @test_vtbx2_p8(<8 x i8> %a, [2 x <8 x i8>] %b.coerce, <8 x i8> %c) {
-; CHECK: test_vtbx2_p8:
-; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
-entry:
-  %__b.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 0
-  %__b.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 1
-  %vtbx1.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vtbx17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8(<8 x i8> %a, <16 x i8> %vtbx1.i, <8 x i8> %c)
-  ret <8 x i8> %vtbx17.i
-}
-
-define <8 x i8> @test_vtbx3_p8(<8 x i8> %a, [3 x <8 x i8>] %b.coerce, <8 x i8> %c) {
-; CHECK: test_vtbx3_p8:
-; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
-entry:
-  %__b.coerce.fca.0.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 0
-  %__b.coerce.fca.1.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 1
-  %__b.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 2
-  %vtbl2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vtbl211.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %c)
-  %0 = icmp uge <8 x i8> %c, <i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24>
-  %1 = sext <8 x i1> %0 to <8 x i8>
-  %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl212.i)
-  ret <8 x i8> %vbsl.i
-}
-
-define <8 x i8> @test_vtbx4_p8(<8 x i8> %a, [4 x <8 x i8>] %b.coerce, <8 x i8> %c) {
-; CHECK: test_vtbx4_p8:
-; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
-entry:
-  %__b.coerce.fca.0.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 0
-  %__b.coerce.fca.1.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 1
-  %__b.coerce.fca.2.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 2
-  %__b.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 3
-  %vtbx2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vtbx215.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %__b.coerce.fca.3.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %vtbx216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8(<8 x i8> %a, <16 x i8> %vtbx2.i, <16 x i8> %vtbx215.i, <8 x i8> %c)
-  ret <8 x i8> %vtbx216.i
-}
-
-define <8 x i8> @test_vqtbx1_p8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) {
-; CHECK: test_vqtbx1_p8:
-; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
-entry:
-  %vtbx1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c)
-  ret <8 x i8> %vtbx1.i
-}
-
-define <8 x i8> @test_vqtbx2_p8(<8 x i8> %a, [2 x <16 x i8>] %b.coerce, <8 x i8> %c) {
-; CHECK: test_vqtbx2_p8:
-; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
-entry:
-  %__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0
-  %__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1
-  %vtbx2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <8 x i8> %c)
-  ret <8 x i8> %vtbx2.i
-}
-
-define <8 x i8> @test_vqtbx3_p8(<8 x i8> %a, [3 x <16 x i8>] %b.coerce, <8 x i8> %c) {
-; CHECK: test_vqtbx3_p8:
-; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
-entry:
-  %__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0
-  %__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1
-  %__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2
-  %vtbx3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx3.v8i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %c)
-  ret <8 x i8> %vtbx3.i
-}
-
-define <8 x i8> @test_vqtbx4_p8(<8 x i8> %a, [4 x <16 x i8>] %b.coerce, <8 x i8> %c) {
-; CHECK: test_vqtbx4_p8:
-; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
-entry:
-  %__b.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 0
-  %__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1
-  %__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2
-  %__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3
-  %vtbx4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx4.v8i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <8 x i8> %c)
-  ret <8 x i8> %vtbx4.i
-}
-
-define <16 x i8> @test_vqtbx1q_p8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
-; CHECK: test_vqtbx1q_p8:
-; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
-entry:
-  %vtbx1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx1.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
-  ret <16 x i8> %vtbx1.i
-}
-
-define <16 x i8> @test_vqtbx2q_p8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) {
-; CHECK: test_vqtbx2q_p8:
-; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
-entry:
-  %__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0
-  %__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1
-  %vtbx2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx2.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %c)
-  ret <16 x i8> %vtbx2.i
-}
-
-define <16 x i8> @test_vqtbx3q_p8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) {
-; CHECK: test_vqtbx3q_p8:
-; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
-entry:
-  %__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0
-  %__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1
-  %__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2
-  %vtbx3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx3.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %c)
-  ret <16 x i8> %vtbx3.i
-}
-
-define <16 x i8> @test_vqtbx4q_p8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) {
-; CHECK: test_vqtbx4q_p8:
-; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
-entry:
-  %__b.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 0
-  %__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1
-  %__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2
-  %__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3
-  %vtbx4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx4.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <16 x i8> %c)
-  ret <16 x i8> %vtbx4.i
-}
-
diff --git a/test/CodeGen/AArch64/neon-simd-vget.ll b/test/CodeGen/AArch64/neon-simd-vget.ll
deleted file mode 100644
index 6474499..0000000
--- a/test/CodeGen/AArch64/neon-simd-vget.ll
+++ /dev/null
@@ -1,225 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
-
-define <8 x i8> @test_vget_high_s8(<16 x i8> %a) {
-; CHECK-LABEL: test_vget_high_s8:
-; CHECK: dup d0, {{v[0-9]+}}.d[1]
-entry:
-  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  ret <8 x i8> %shuffle.i
-}
-
-define <4 x i16> @test_vget_high_s16(<8 x i16> %a) {
-; CHECK-LABEL: test_vget_high_s16:
-; CHECK: dup d0, {{v[0-9]+}}.d[1]
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  ret <4 x i16> %shuffle.i
-}
-
-define <2 x i32> @test_vget_high_s32(<4 x i32> %a) {
-; CHECK-LABEL: test_vget_high_s32:
-; CHECK: dup d0, {{v[0-9]+}}.d[1]
-entry:
-  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  ret <2 x i32> %shuffle.i
-}
-
-define <1 x i64> @test_vget_high_s64(<2 x i64> %a) {
-; CHECK-LABEL: test_vget_high_s64:
-; CHECK: dup d0, {{v[0-9]+}}.d[1]
-entry:
-  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> <i32 1>
-  ret <1 x i64> %shuffle.i
-}
-
-define <8 x i8> @test_vget_high_u8(<16 x i8> %a) {
-; CHECK-LABEL: test_vget_high_u8:
-; CHECK: dup d0, {{v[0-9]+}}.d[1]
-entry:
-  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  ret <8 x i8> %shuffle.i
-}
-
-define <4 x i16> @test_vget_high_u16(<8 x i16> %a) {
-; CHECK-LABEL: test_vget_high_u16:
-; CHECK: dup d0, {{v[0-9]+}}.d[1]
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  ret <4 x i16> %shuffle.i
-}
-
-define <2 x i32> @test_vget_high_u32(<4 x i32> %a) {
-; CHECK-LABEL: test_vget_high_u32:
-; CHECK: dup d0, {{v[0-9]+}}.d[1]
-entry:
-  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  ret <2 x i32> %shuffle.i
-}
-
-define <1 x i64> @test_vget_high_u64(<2 x i64> %a) {
-; CHECK-LABEL: test_vget_high_u64:
-; CHECK: dup d0, {{v[0-9]+}}.d[1]
-entry:
-  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> <i32 1>
-  ret <1 x i64> %shuffle.i
-}
-
-define <1 x i64> @test_vget_high_p64(<2 x i64> %a) {
-; CHECK-LABEL: test_vget_high_p64:
-; CHECK: dup d0, {{v[0-9]+}}.d[1]
-entry:
-  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> <i32 1>
-  ret <1 x i64> %shuffle.i
-}
-
-define <4 x i16> @test_vget_high_f16(<8 x i16> %a) {
-; CHECK-LABEL: test_vget_high_f16:
-; CHECK: dup d0, {{v[0-9]+}}.d[1]
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  ret <4 x i16> %shuffle.i
-}
-
-define <2 x float> @test_vget_high_f32(<4 x float> %a) {
-; CHECK-LABEL: test_vget_high_f32:
-; CHECK: dup d0, {{v[0-9]+}}.d[1]
-entry:
-  %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 2, i32 3>
-  ret <2 x float> %shuffle.i
-}
-
-define <8 x i8> @test_vget_high_p8(<16 x i8> %a) {
-; CHECK-LABEL: test_vget_high_p8:
-; CHECK: dup d0, {{v[0-9]+}}.d[1]
-entry:
-  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  ret <8 x i8> %shuffle.i
-}
-
-define <4 x i16> @test_vget_high_p16(<8 x i16> %a) {
-; CHECK-LABEL: test_vget_high_p16:
-; CHECK: dup d0, {{v[0-9]+}}.d[1]
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  ret <4 x i16> %shuffle.i
-}
-
-define <1 x double> @test_vget_high_f64(<2 x double> %a) {
-; CHECK-LABEL: test_vget_high_f64:
-; CHECK: dup d0, {{v[0-9]+}}.d[1]
-entry:
-  %shuffle.i = shufflevector <2 x double> %a, <2 x double> undef, <1 x i32> <i32 1>
-  ret <1 x double> %shuffle.i
-}
-
-define <8 x i8> @test_vget_low_s8(<16 x i8> %a) {
-; CHECK-LABEL: test_vget_low_s8:
-; CHECK: ret
-entry:
-  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  ret <8 x i8> %shuffle.i
-}
-
-define <4 x i16> @test_vget_low_s16(<8 x i16> %a) {
-; CHECK-LABEL: test_vget_low_s16:
-; CHECK: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  ret <4 x i16> %shuffle.i
-}
-
-define <2 x i32> @test_vget_low_s32(<4 x i32> %a) {
-; CHECK-LABEL: test_vget_low_s32:
-; CHECK: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
-  ret <2 x i32> %shuffle.i
-}
-
-define <1 x i64> @test_vget_low_s64(<2 x i64> %a) {
-; CHECK-LABEL: test_vget_low_s64:
-; CHECK: ret
-entry:
-  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> zeroinitializer
-  ret <1 x i64> %shuffle.i
-}
-
-define <8 x i8> @test_vget_low_u8(<16 x i8> %a) {
-; CHECK-LABEL: test_vget_low_u8:
-; CHECK: ret
-entry:
-  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  ret <8 x i8> %shuffle.i
-}
-
-define <4 x i16> @test_vget_low_u16(<8 x i16> %a) {
-; CHECK-LABEL: test_vget_low_u16:
-; CHECK: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  ret <4 x i16> %shuffle.i
-}
-
-define <2 x i32> @test_vget_low_u32(<4 x i32> %a) {
-; CHECK-LABEL: test_vget_low_u32:
-; CHECK: ret
-entry:
-  %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
-  ret <2 x i32> %shuffle.i
-}
-
-define <1 x i64> @test_vget_low_u64(<2 x i64> %a) {
-; CHECK-LABEL: test_vget_low_u64:
-; CHECK: ret
-entry:
-  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> zeroinitializer
-  ret <1 x i64> %shuffle.i
-}
-
-define <1 x i64> @test_vget_low_p64(<2 x i64> %a) {
-; CHECK-LABEL: test_vget_low_p64:
-; CHECK: ret
-entry:
-  %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> zeroinitializer
-  ret <1 x i64> %shuffle.i
-}
-
-define <4 x i16> @test_vget_low_f16(<8 x i16> %a) {
-; CHECK-LABEL: test_vget_low_f16:
-; CHECK: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  ret <4 x i16> %shuffle.i
-}
-
-define <2 x float> @test_vget_low_f32(<4 x float> %a) {
-; CHECK-LABEL: test_vget_low_f32:
-; CHECK: ret
-entry:
-  %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
-  ret <2 x float> %shuffle.i
-}
-
-define <8 x i8> @test_vget_low_p8(<16 x i8> %a) {
-; CHECK-LABEL: test_vget_low_p8:
-; CHECK: ret
-entry:
-  %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  ret <8 x i8> %shuffle.i
-}
-
-define <4 x i16> @test_vget_low_p16(<8 x i16> %a) {
-; CHECK-LABEL: test_vget_low_p16:
-; CHECK: ret
-entry:
-  %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  ret <4 x i16> %shuffle.i
-}
-
-define <1 x double> @test_vget_low_f64(<2 x double> %a) {
-; CHECK-LABEL: test_vget_low_f64:
-; CHECK: ret
-entry:
-  %shuffle.i = shufflevector <2 x double> %a, <2 x double> undef, <1 x i32> zeroinitializer
-  ret <1 x double> %shuffle.i
-}
diff --git a/test/CodeGen/AArch64/neon-spill-fpr8-fpr16.ll b/test/CodeGen/AArch64/neon-spill-fpr8-fpr16.ll
deleted file mode 100644
index bb3300e..0000000
--- a/test/CodeGen/AArch64/neon-spill-fpr8-fpr16.ll
+++ /dev/null
@@ -1,30 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-
-; This file tests the spill of FPR8/FPR16. The volatile loads/stores force the
-; allocator to keep the value live until it's needed.
-
-%bigtype_v1i8 = type [20 x <1 x i8>]
-
-define void @spill_fpr8(%bigtype_v1i8* %addr) {
-; CHECK-LABEL: spill_fpr8:
-; CHECK: 1-byte Folded Spill
-; CHECK: 1-byte Folded Reload
-  %val1 = load volatile %bigtype_v1i8* %addr
-  %val2 = load volatile %bigtype_v1i8* %addr
-  store volatile %bigtype_v1i8 %val1, %bigtype_v1i8* %addr
-  store volatile %bigtype_v1i8 %val2, %bigtype_v1i8* %addr
-  ret void
-}
-
-%bigtype_v1i16 = type [20 x <1 x i16>]
-
-define void @spill_fpr16(%bigtype_v1i16* %addr) {
-; CHECK-LABEL: spill_fpr16:
-; CHECK: 2-byte Folded Spill
-; CHECK: 2-byte Folded Reload
-  %val1 = load volatile %bigtype_v1i16* %addr
-  %val2 = load volatile %bigtype_v1i16* %addr
-  store volatile %bigtype_v1i16 %val1, %bigtype_v1i16* %addr
-  store volatile %bigtype_v1i16 %val2, %bigtype_v1i16* %addr
-  ret void
-}
-\ No newline at end of file
diff --git a/test/CodeGen/AArch64/neon-truncStore-extLoad.ll b/test/CodeGen/AArch64/neon-truncStore-extLoad.ll
index e5b7694..1df3719 100644
--- a/test/CodeGen/AArch64/neon-truncStore-extLoad.ll
+++ b/test/CodeGen/AArch64/neon-truncStore-extLoad.ll
@@ -5,7 +5,7 @@
 define void @truncStore.v2i64(<2 x i64> %a, <2 x i32>* %result) {
 ; CHECK-LABEL: truncStore.v2i64:
 ; CHECK: xtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
-; CHECK: st1 {v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
+; CHECK: {{st1 { v[0-9]+.2s }|str d[0-9]+}}, [x{{[0-9]+|sp}}]
   %b = trunc <2 x i64> %a to <2 x i32>
   store <2 x i32> %b, <2 x i32>* %result
   ret void
@@ -14,7 +14,7 @@ define void @truncStore.v2i64(<2 x i64> %a, <2 x i32>* %result) {
 define void @truncStore.v4i32(<4 x i32> %a, <4 x i16>* %result) {
 ; CHECK-LABEL: truncStore.v4i32:
 ; CHECK: xtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s
-; CHECK: st1 {v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}]
+; CHECK: {{st1 { v[0-9]+.4h }|str d[0-9]+}}, [x{{[0-9]+|sp}}]
   %b = trunc <4 x i32> %a to <4 x i16>
   store <4 x i16> %b, <4 x i16>* %result
   ret void
@@ -23,7 +23,7 @@ define void @truncStore.v4i32(<4 x i32> %a, <4 x i16>* %result) {
 define void @truncStore.v8i16(<8 x i16> %a, <8 x i8>* %result) {
 ; CHECK-LABEL: truncStore.v8i16:
 ; CHECK: xtn v{{[0-9]+}}.8b, v{{[0-9]+}}.8h
-; CHECK: st1 {v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}]
+; CHECK: {{st1 { v[0-9]+.8b }|str d[0-9]+}}, [x{{[0-9]+|sp}}]
   %b = trunc <8 x i16> %a to <8 x i8>
   store <8 x i8> %b, <8 x i8>* %result
   ret void
@@ -54,4 +54,4 @@ define i32 @loadExt.i32(<4 x i8>* %ref) {
   %vecext = extractelement <4 x i8> %a, i32 0
   %conv = zext i8 %vecext to i32
   ret i32 %conv
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/AArch64/neon-v1i1-setcc.ll b/test/CodeGen/AArch64/neon-v1i1-setcc.ll
deleted file mode 100644
index 6c7d009..0000000
--- a/test/CodeGen/AArch64/neon-v1i1-setcc.ll
+++ /dev/null
@@ -1,68 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
-
-; This file test the DAG node like "v1i1 SETCC v1i64, v1i64". As the v1i1 type
-; is illegal in AArch64 backend, the legalizer tries to scalarize this node.
-; As the v1i64 operands of SETCC are legal types, they will not be scalarized.
-; Currently the type legalizer will have an assertion failure as it assumes all
-; operands of SETCC have been legalized.
-; FIXME: If the algorithm of type scalarization is improved and can legaize
-; "v1i1 SETCC" correctly, these test cases are not needed.
-
-define i64 @test_sext_extr_cmp_0(<1 x i64> %v1, <1 x i64> %v2) {
-; CHECK-LABEL: test_sext_extr_cmp_0:
-; CHECK: cmge d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = icmp sge <1 x i64> %v1, %v2
-  %2 = extractelement <1 x i1> %1, i32 0
-  %vget_lane = sext i1 %2 to i64
-  ret i64 %vget_lane
-}
-
-define i64 @test_sext_extr_cmp_1(<1 x double> %v1, <1 x double> %v2) {
-; CHECK-LABEL: test_sext_extr_cmp_1:
-; CHECK: fcmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = fcmp oeq <1 x double> %v1, %v2
-  %2 = extractelement <1 x i1> %1, i32 0
-  %vget_lane = sext i1 %2 to i64
-  ret i64 %vget_lane
-}
-
-define <1 x i64> @test_select_v1i1_0(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) {
-; CHECK-LABEL: test_select_v1i1_0:
-; CHECK: cmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-; CHECK: bsl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-  %1 = icmp eq <1 x i64> %v1, %v2
-  %res = select <1 x i1> %1, <1 x i64> zeroinitializer, <1 x i64> %v3
-  ret <1 x i64> %res
-}
-
-define <1 x i64> @test_select_v1i1_1(<1 x double> %v1, <1 x double> %v2, <1 x i64> %v3) {
-; CHECK-LABEL: test_select_v1i1_1:
-; CHECK: fcmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-; CHECK: bsl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-  %1 = fcmp oeq <1 x double> %v1, %v2
-  %res = select <1 x i1> %1, <1 x i64> zeroinitializer, <1 x i64> %v3
-  ret <1 x i64> %res
-}
-
-define <1 x double> @test_select_v1i1_2(<1 x i64> %v1, <1 x i64> %v2, <1 x double> %v3) {
-; CHECK-LABEL: test_select_v1i1_2:
-; CHECK: cmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-; CHECK: bsl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
-  %1 = icmp eq <1 x i64> %v1, %v2
-  %res = select <1 x i1> %1, <1 x double> zeroinitializer, <1 x double> %v3
-  ret <1 x double> %res
-}
-
-define i32 @test_br_extr_cmp(<1 x i64> %v1, <1 x i64> %v2) {
-; CHECK-LABEL: test_br_extr_cmp:
-; CHECK: cmp x{{[0-9]+}}, x{{[0-9]+}}
-  %1 = icmp eq <1 x i64> %v1, %v2
-  %2 = extractelement <1 x i1> %1, i32 0
-  br i1 %2, label %if.end, label %if.then
-
-if.then:
-  ret i32 0;
-
-if.end:
-  ret i32 1;
-}
diff --git a/test/CodeGen/AArch64/neon-vector-list-spill.ll b/test/CodeGen/AArch64/neon-vector-list-spill.ll
deleted file mode 100644
index 3ab69c4..0000000
--- a/test/CodeGen/AArch64/neon-vector-list-spill.ll
+++ /dev/null
@@ -1,175 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast
-
-; FIXME: We should not generate ld/st for such register spill/fill, because the
-; test case seems very simple and the register pressure is not high. If the
-; spill/fill algorithm is optimized, this test case may not be triggered. And
-; then we can delete it.
-define i32 @spill.DPairReg(i8* %arg1, i32 %arg2) {
-; CHECK-LABEL: spill.DPairReg:
-; CHECK: ld2 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
-; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
-; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
-entry:
-  %vld = tail call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32(i8* %arg1, i32 4)
-  %cmp = icmp eq i32 %arg2, 0
-  br i1 %cmp, label %if.then, label %if.end
-
-if.then:
-  tail call void @foo()
-  br label %if.end
-
-if.end:
-  %vld.extract = extractvalue { <2 x i32>, <2 x i32> } %vld, 0
-  %res = extractelement <2 x i32> %vld.extract, i32 1
-  ret i32 %res
-}
-
-define i16 @spill.DTripleReg(i8* %arg1, i32 %arg2) {
-; CHECK-LABEL: spill.DTripleReg:
-; CHECK: ld3 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
-; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
-; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
-entry:
-  %vld = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16(i8* %arg1, i32 4)
-  %cmp = icmp eq i32 %arg2, 0
-  br i1 %cmp, label %if.then, label %if.end
-
-if.then:
-  tail call void @foo()
-  br label %if.end
-
-if.end:
-  %vld.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld, 0
-  %res = extractelement <4 x i16> %vld.extract, i32 1
-  ret i16 %res
-}
-
-define i16 @spill.DQuadReg(i8* %arg1, i32 %arg2) {
-; CHECK-LABEL: spill.DQuadReg:
-; CHECK: ld4 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
-; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
-; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
-entry:
-  %vld = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16(i8* %arg1, i32 4)
-  %cmp = icmp eq i32 %arg2, 0
-  br i1 %cmp, label %if.then, label %if.end
-
-if.then:
-  tail call void @foo()
-  br label %if.end
-
-if.end:
-  %vld.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld, 0
-  %res = extractelement <4 x i16> %vld.extract, i32 0
-  ret i16 %res
-}
-
-define i32 @spill.QPairReg(i8* %arg1, i32 %arg2) {
-; CHECK-LABEL: spill.QPairReg:
-; CHECK: ld3 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
-; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
-; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
-entry:
-  %vld = tail call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32(i8* %arg1, i32 4)
-  %cmp = icmp eq i32 %arg2, 0
-  br i1 %cmp, label %if.then, label %if.end
-
-if.then:
-  tail call void @foo()
-  br label %if.end
-
-if.end:
-  %vld.extract = extractvalue { <4 x i32>, <4 x i32> } %vld, 0
-  %res = extractelement <4 x i32> %vld.extract, i32 1
-  ret i32 %res
-}
-
-define float @spill.QTripleReg(i8* %arg1, i32 %arg2) {
-; CHECK-LABEL: spill.QTripleReg:
-; CHECK: ld3 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
-; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
-; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
-entry:
-  %vld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3.v4f32(i8* %arg1, i32 4)
-  %cmp = icmp eq i32 %arg2, 0
-  br i1 %cmp, label %if.then, label %if.end
-
-if.then:
-  tail call void @foo()
-  br label %if.end
-
-if.end:
-  %vld3.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3, 0
-  %res = extractelement <4 x float> %vld3.extract, i32 1
-  ret float %res
-}
-
-define i8 @spill.QQuadReg(i8* %arg1, i32 %arg2) {
-; CHECK-LABEL: spill.QQuadReg:
-; CHECK: ld4 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
-; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
-; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
-entry:
-  %vld = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8* %arg1, i32 4)
-  %cmp = icmp eq i32 %arg2, 0
-  br i1 %cmp, label %if.then, label %if.end
-
-if.then:
-  tail call void @foo()
-  br label %if.end
-
-if.end:
-  %vld.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld, 0
-  %res = extractelement <16 x i8> %vld.extract, i32 1
-  ret i8 %res
-}
-
-declare { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32(i8*, i32)
-declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16(i8*, i32)
-declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16(i8*, i32)
-declare { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32(i8*, i32)
-declare { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3.v4f32(i8*, i32)
-declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8*, i32)
-
-declare void @foo()
-
-; FIXME: We should not generate ld/st for such register spill/fill, because the
-; test case seems very simple and the register pressure is not high. If the
-; spill/fill algorithm is optimized, this test case may not be triggered. And
-; then we can delete it.
-; check the spill for Register Class QPair_with_qsub_0_in_FPR128Lo
-define <8 x i16> @test_2xFPR128Lo(i64 %got, i8* %ptr, <1 x i64> %a) {
-  tail call void @llvm.arm.neon.vst2lane.v1i64(i8* %ptr, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, i32 0, i32 8)
-  tail call void @foo()
-  %sv = shufflevector <1 x i64> zeroinitializer, <1 x i64> %a, <2 x i32> <i32 0, i32 1>
-  %1 = bitcast <2 x i64> %sv to <8 x i16>
-  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-  %3 = mul <8 x i16> %2, %2
-  ret <8 x i16> %3
-}
-
-; check the spill for Register Class QTriple_with_qsub_0_in_FPR128Lo
-define <8 x i16> @test_3xFPR128Lo(i64 %got, i8* %ptr, <1 x i64> %a) {
-  tail call void @llvm.arm.neon.vst3lane.v1i64(i8* %ptr, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, i32 0, i32 8)
-  tail call void @foo()
-  %sv = shufflevector <1 x i64> zeroinitializer, <1 x i64> %a, <2 x i32> <i32 0, i32 1>
-  %1 = bitcast <2 x i64> %sv to <8 x i16>
-  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-  %3 = mul <8 x i16> %2, %2
-  ret <8 x i16> %3
-}
-
-; check the spill for Register Class QQuad_with_qsub_0_in_FPR128Lo
-define <8 x i16> @test_4xFPR128Lo(i64 %got, i8* %ptr, <1 x i64> %a) {
-  tail call void @llvm.arm.neon.vst4lane.v1i64(i8* %ptr, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, i32 0, i32 8)
-  tail call void @foo()
-  %sv = shufflevector <1 x i64> zeroinitializer, <1 x i64> %a, <2 x i32> <i32 0, i32 1>
-  %1 = bitcast <2 x i64> %sv to <8 x i16>
-  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-  %3 = mul <8 x i16> %2, %2
-  ret <8 x i16> %3
-}
-
-declare void @llvm.arm.neon.vst2lane.v1i64(i8*, <1 x i64>, <1 x i64>, i32, i32)
-declare void @llvm.arm.neon.vst3lane.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32, i32)
-declare void @llvm.arm.neon.vst4lane.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32, i32)
-\ No newline at end of file
diff --git a/test/CodeGen/AArch64/nzcv-save.ll b/test/CodeGen/AArch64/nzcv-save.ll
new file mode 100644
index 0000000..32baff3
--- /dev/null
+++ b/test/CodeGen/AArch64/nzcv-save.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=aarch64 < %s | FileCheck %s
+
+; CHECK: mrs [[NZCV_SAVE:x[0-9]+]], NZCV
+; CHECK: msr NZCV, [[NZCV_SAVE]]
+
+; DAG ends up with two uses for the flags from an ADCS node, which means they
+; must be saved for later.
+define void @f(i256* nocapture %a, i256* nocapture %b, i256* nocapture %cc, i256* nocapture %dd) nounwind uwtable noinline ssp {
+entry:
+  %c = load i256* %cc
+  %d = load i256* %dd
+  %add = add nsw i256 %c, %d
+  store i256 %add, i256* %a, align 8
+  %or = or i256 %c, 1606938044258990275541962092341162602522202993782792835301376
+  %add6 = add nsw i256 %or, %d
+  store i256 %add6, i256* %b, align 8
+  ret void
+}
diff --git a/test/CodeGen/AArch64/pic-eh-stubs.ll b/test/CodeGen/AArch64/pic-eh-stubs.ll
index 3404d3f..e8c7625 100644
--- a/test/CodeGen/AArch64/pic-eh-stubs.ll
+++ b/test/CodeGen/AArch64/pic-eh-stubs.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -o - %s | FileCheck %s
-; RUN: llc -mtriple=aarch64_be-none-linux-gnu -relocation-model=pic -o - %s | FileCheck %s
+; RUN: llc -mtriple=arm64_be-none-linux-gnu -relocation-model=pic -o - %s | FileCheck %s
 
 ; Make sure exception-handling PIC code can be linked correctly. An alternative
 ; to the sequence described below would have .gcc_except_table itself writable
@@ -11,8 +11,8 @@
   ; ... referring indirectly to stubs for its typeinfo ...
 ; CHECK: // @TType Encoding = indirect pcrel sdata8
   ; ... one of which is "int"'s typeinfo
-; CHECK: .Ltmp7:
-; CHECK-NEXT: .xword  .L_ZTIi.DW.stub-.Ltmp7
+; CHECK: [[TYPEINFO_LBL:.Ltmp[0-9]+]]: // TypeInfo 1
+; CHECK-NEXT: .xword  .L_ZTIi.DW.stub-[[TYPEINFO_LBL]]
 
   ; .. and which is properly defined (in a writable section for the dynamic loader) later.
 ; CHECK: .section .data.rel,"aw"
diff --git a/test/CodeGen/AArch64/ragreedy-csr.ll b/test/CodeGen/AArch64/ragreedy-csr.ll
index 18a948b..de29b1b 100644
--- a/test/CodeGen/AArch64/ragreedy-csr.ll
+++ b/test/CodeGen/AArch64/ragreedy-csr.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -regalloc=greedy -regalloc-csr-first-time-cost=15 | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -regalloc=greedy -regalloc-csr-first-time-cost=15 | FileCheck %s
 
 ; This testing case is reduced from 197.parser prune_match function.
 ; We make sure that we do not use callee-saved registers (x19 to x25).
@@ -6,14 +6,14 @@
 
 ; CHECK-LABEL: prune_match:
 ; CHECK: entry
-; CHECK: str x30, [sp
+; CHECK: {{str x30|stp x29, x30}}, [sp
 ; CHECK-NOT: stp x25,
 ; CHECK-NOT: stp x23, x24
 ; CHECK-NOT: stp x21, x22
 ; CHECK-NOT: stp x19, x20
 ; CHECK: if.end
 ; CHECK: return
-; CHECK: ldr x30, [sp
+; CHECK: {{ldr x30|ldp x29, x30}}, [sp
 ; CHECK-NOT: ldp x19, x20
 ; CHECK-NOT: ldp x21, x22
 ; CHECK-NOT: ldp x23, x24
diff --git a/test/CodeGen/AArch64/regress-bitcast-formals.ll b/test/CodeGen/AArch64/regress-bitcast-formals.ll
index 9655f90..58e0542 100644
--- a/test/CodeGen/AArch64/regress-bitcast-formals.ll
+++ b/test/CodeGen/AArch64/regress-bitcast-formals.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=arm64-apple-ios7.0 -verify-machineinstrs < %s | FileCheck %s
 
 ; CallingConv.td requires a bitcast for vector arguments. Make sure we're
 ; actually capable of that (the test was omitted from LowerFormalArguments).
diff --git a/test/CodeGen/AArch64/regress-f128csel-flags.ll b/test/CodeGen/AArch64/regress-f128csel-flags.ll
index b35185c..25b5e0c 100644
--- a/test/CodeGen/AArch64/regress-f128csel-flags.ll
+++ b/test/CodeGen/AArch64/regress-f128csel-flags.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
 
 ; We used to not mark NZCV as being used in the continuation basic-block
 ; when lowering a 128-bit "select" to branches. This meant a subsequent use
diff --git a/test/CodeGen/AArch64/regress-fp128-livein.ll b/test/CodeGen/AArch64/regress-fp128-livein.ll
index cb8432a..5e6ab0a 100644
--- a/test/CodeGen/AArch64/regress-fp128-livein.ll
+++ b/test/CodeGen/AArch64/regress-fp128-livein.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s
+; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s
 
 ; Regression test for NZCV reg live-in not being added to fp128csel IfTrue BB,
 ; causing a crash during live range calc.
diff --git a/test/CodeGen/AArch64/regress-tail-livereg.ll b/test/CodeGen/AArch64/regress-tail-livereg.ll
index 053249c..e32ac84 100644
--- a/test/CodeGen/AArch64/regress-tail-livereg.ll
+++ b/test/CodeGen/AArch64/regress-tail-livereg.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=arm64-apple-ios7.0 -o - %s | FileCheck %s
 @var = global void()* zeroinitializer
 
 declare void @bar()
diff --git a/test/CodeGen/AArch64/regress-tblgen-chains.ll b/test/CodeGen/AArch64/regress-tblgen-chains.ll
index ff77fb4..477d996 100644
--- a/test/CodeGen/AArch64/regress-tblgen-chains.ll
+++ b/test/CodeGen/AArch64/regress-tblgen-chains.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=arm64-apple-ios7.0 -o - %s | FileCheck %s
 
 ; When generating DAG selection tables, TableGen used to only flag an
 ; instruction as needing a chain on its own account if it had a built-in pattern
@@ -17,17 +17,18 @@ define i64 @test_chains() {
   %locvar = alloca i8
 
   call void @bar(i8* %locvar)
-; CHECK: bl bar
+; CHECK: bl {{_?bar}}
 
   %inc.1 = load i8* %locvar
   %inc.2 = zext i8 %inc.1 to i64
   %inc.3 = add i64 %inc.2, 1
   %inc.4 = trunc i64 %inc.3 to i8
   store i8 %inc.4, i8* %locvar
-; CHECK: ldrb {{w[0-9]+}}, [sp, [[LOCADDR:#[0-9]+]]]
-; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #1
-; CHECK: strb {{w[0-9]+}}, [sp, [[LOCADDR]]]
-; CHECK: ldrb {{w[0-9]+}}, [sp, [[LOCADDR]]]
+
+; CHECK: ldurb {{w[0-9]+}}, [x29, [[LOCADDR:#-?[0-9]+]]]
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, #1
+; CHECK: sturb {{w[0-9]+}}, [x29, [[LOCADDR]]]
+; CHECK: ldurb {{w[0-9]+}}, [x29, [[LOCADDR]]]
 
   %ret.1 = load i8* %locvar
   %ret.2 = zext i8 %ret.1 to i64
diff --git a/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll b/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll
index 0ef9818..c3167e4 100644
--- a/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll
+++ b/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll
@@ -5,22 +5,7 @@ declare void @bar()
 
 define void @test_w29_reserved() {
 ; CHECK-LABEL: test_w29_reserved:
-; CHECK: .cfi_startproc
-; CHECK: .cfi_def_cfa sp, 96
 ; CHECK: add x29, sp, #{{[0-9]+}}
-; CHECK: .cfi_def_cfa x29, 16
-; CHECK: .cfi_offset x30, -8
-; CHECK: .cfi_offset x29, -16
-; CHECK: .cfi_offset x28, -24
-; CHECK: .cfi_offset x27, -32
-; CHECK: .cfi_offset x26, -40
-; CHECK: .cfi_offset x25, -48
-; CHECK: .cfi_offset x24, -56
-; CHECK: .cfi_offset x23, -64
-; CHECK: .cfi_offset x22, -72
-; CHECK: .cfi_offset x21, -80
-; CHECK: .cfi_offset x20, -88
-; CHECK: .cfi_offset x19, -96
 
   %val1 = load volatile i32* @var
   %val2 = load volatile i32* @var
diff --git a/test/CodeGen/AArch64/regress-wzr-allocatable.ll b/test/CodeGen/AArch64/regress-wzr-allocatable.ll
deleted file mode 100644
index 764d2bc..0000000
--- a/test/CodeGen/AArch64/regress-wzr-allocatable.ll
+++ /dev/null
@@ -1,41 +0,0 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -O0
-
-; When WZR wasn't marked as reserved, this function tried to allocate
-; it at O0 and then generated an internal fault (mostly incidentally)
-; when it discovered that it was already in use for a multiplication.
-
-; I'm not really convinced this is a good test since it could easily
-; stop testing what it does now with no-one any the wiser. However, I
-; can't think of a better way to force the allocator to use WZR
-; specifically.
-
-define void @test() nounwind {
-entry:
-  br label %for.cond
-
-for.cond:                                         ; preds = %for.body, %entry
-  br i1 undef, label %for.body, label %for.end
-
-for.body:                                         ; preds = %for.cond
-  br label %for.cond
-
-for.end:                                          ; preds = %for.cond
-  br label %for.cond6
-
-for.cond6:                                        ; preds = %for.body9, %for.end
-  br i1 undef, label %for.body9, label %while.cond30
-
-for.body9:                                        ; preds = %for.cond6
-  store i16 0, i16* undef, align 2
-  %0 = load i32* undef, align 4
-  %1 = load i32* undef, align 4
-  %mul15 = mul i32 %0, %1
-  %add16 = add i32 %mul15, 32768
-  %div = udiv i32 %add16, 65535
-  %add17 = add i32 %div, 1
-  store i32 %add17, i32* undef, align 4
-  br label %for.cond6
-
-while.cond30:                                     ; preds = %for.cond6
-  ret void
-}
diff --git a/test/CodeGen/AArch64/returnaddr.ll b/test/CodeGen/AArch64/returnaddr.ll
index c85f9ec..b136f04 100644
--- a/test/CodeGen/AArch64/returnaddr.ll
+++ b/test/CodeGen/AArch64/returnaddr.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=aarch64-none-linux-gnu  | FileCheck %s
+; RUN: llc -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s
 
 define i8* @rt0(i32 %x) nounwind readnone {
 entry:
diff --git a/test/CodeGen/AArch64/setcc-takes-i32.ll b/test/CodeGen/AArch64/setcc-takes-i32.ll
index bd79685..ec86159 100644
--- a/test/CodeGen/AArch64/setcc-takes-i32.ll
+++ b/test/CodeGen/AArch64/setcc-takes-i32.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -o - %s | FileCheck %s
 
 ; Most important point here is that the promotion of the i1 works
 ; correctly. Previously LLVM thought that i64 was the appropriate SetCC output,
diff --git a/test/CodeGen/AArch64/sext_inreg.ll b/test/CodeGen/AArch64/sext_inreg.ll
deleted file mode 100644
index 2f76081..0000000
--- a/test/CodeGen/AArch64/sext_inreg.ll
+++ /dev/null
@@ -1,198 +0,0 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
-
-; For formal arguments, we have the following vector type promotion,
-; v2i8 is promoted to v2i32(f64)
-; v2i16 is promoted to v2i32(f64)
-; v4i8 is promoted to v4i16(f64)
-; v8i1 is promoted to v8i16(f128)
-
-define <2 x i8> @test_sext_inreg_v2i8i16(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone {
-; CHECK-LABEL: test_sext_inreg_v2i8i16
-; CHECK: sshll   v0.8h, v0.8b, #0
-; CHECK-NEXT: uzp1    v0.8h, v0.8h, v0.8h
-; CHECK-NEXT: sshll   v1.8h, v1.8b, #0
-; CHECK-NEXT: uzp1    v1.8h, v1.8h, v1.8h
-  %1 = sext <2 x i8> %v1 to <2 x i16>
-  %2 = sext <2 x i8> %v2 to <2 x i16>
-  %3 = shufflevector <2 x i16> %1, <2 x i16> %2, <2 x i32> <i32 0, i32 2>
-  %4 = trunc <2 x i16> %3 to <2 x i8>
-  ret <2 x i8> %4
-}
-
-define <2 x i8> @test_sext_inreg_v2i8i16_2(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone {
-; CHECK-LABEL: test_sext_inreg_v2i8i16_2
-; CHECK: sshll   v0.8h, v0.8b, #0
-; CHECK-NEXT: uzp1    v0.8h, v0.8h, v0.8h
-; CHECK-NEXT: sshll   v1.8h, v1.8b, #0
-; CHECK-NEXT: uzp1    v1.8h, v1.8h, v1.8h
-  %a1 = shl <2 x i32> %v1, <i32 24, i32 24>
-  %a2 = ashr <2 x i32> %a1, <i32 24, i32 24>
-  %b1 = shl <2 x i32> %v2, <i32 24, i32 24>
-  %b2 = ashr <2 x i32> %b1, <i32 24, i32 24>
-  %c = shufflevector <2 x i32> %a2, <2 x i32> %b2, <2 x i32> <i32 0, i32 2>
-  %d = trunc <2 x i32> %c to <2 x i8>
-  ret <2 x i8> %d
-}
-
-define <2 x i8> @test_sext_inreg_v2i8i32(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone {
-; CHECK-LABEL: test_sext_inreg_v2i8i32
-; CHECK: sshll	 v0.8h, v0.8b, #0
-; CHECK-NEXT: uzp1    v0.8h, v0.8h, v0.8h
-; CHECK-NEXT: sshll	 v1.8h, v1.8b, #0
-; CHECK-NEXT: uzp1    v1.8h, v1.8h, v1.8h
-  %1 = sext <2 x i8> %v1 to <2 x i32>
-  %2 = sext <2 x i8> %v2 to <2 x i32>
-  %3 = shufflevector <2 x i32> %1, <2 x i32> %2, <2 x i32> <i32 0, i32 2>
-  %4 = trunc <2 x i32> %3 to <2 x i8>
-  ret <2 x i8> %4
-}
-
-define <2 x i8> @test_sext_inreg_v2i8i64(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone {
-; CHECK-LABEL: test_sext_inreg_v2i8i64
-; CHECK: ushll   v1.2d, v1.2s, #0
-; CHECK: ushll   v0.2d, v0.2s, #0
-; CHECK: shl     v0.2d, v0.2d, #56
-; CHECK: sshr    v0.2d, v0.2d, #56
-; CHECK: shl     v1.2d, v1.2d, #56
-; CHECK: sshr    v1.2d, v1.2d, #56
-  %1 = sext <2 x i8> %v1 to <2 x i64>
-  %2 = sext <2 x i8> %v2 to <2 x i64>
-  %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2>
-  %4 = trunc <2 x i64> %3 to <2 x i8>
-  ret <2 x i8> %4
-}
-
-define <4 x i8> @test_sext_inreg_v4i8i16(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone {
-; CHECK-LABEL: test_sext_inreg_v4i8i16
-; CHECK: sshll   v0.8h, v0.8b, #0
-; CHECK-NEXT: uzp1    v0.8h, v0.8h, v0.8h
-; CHECK-NEXT: sshll   v1.8h, v1.8b, #0
-; CHECK-NEXT: uzp1    v1.8h, v1.8h, v1.8h
-  %1 = sext <4 x i8> %v1 to <4 x i16>
-  %2 = sext <4 x i8> %v2 to <4 x i16>
-  %3 = shufflevector <4 x i16> %1, <4 x i16> %2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-  %4 = trunc <4 x i16> %3 to <4 x i8>
-  ret <4 x i8> %4
-}
-
-define <4 x i8> @test_sext_inreg_v4i8i16_2(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone {
-; CHECK-LABEL: test_sext_inreg_v4i8i16_2
-; CHECK: sshll   v0.8h, v0.8b, #0
-; CHECK-NEXT: uzp1    v0.8h, v0.8h, v0.8h
-; CHECK-NEXT: sshll   v1.8h, v1.8b, #0
-; CHECK-NEXT: uzp1    v1.8h, v1.8h, v1.8h
-  %a1 = shl <4 x i16> %v1, <i16 8, i16 8, i16 8, i16 8>
-  %a2 = ashr <4 x i16> %a1, <i16 8, i16 8, i16 8, i16 8>
-  %b1 = shl <4 x i16> %v2, <i16 8, i16 8, i16 8, i16 8>
-  %b2 = ashr <4 x i16> %b1, <i16 8, i16 8, i16 8, i16 8>
-  %c = shufflevector <4 x i16> %a2, <4 x i16> %b2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-  %d = trunc <4 x i16> %c to <4 x i8>
-  ret <4 x i8> %d
-}
-
-define <4 x i8> @test_sext_inreg_v4i8i32(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone {
-; CHECK-LABEL: test_sext_inreg_v4i8i32
-; CHECK: ushll   v1.4s, v1.4h, #0
-; CHECK: ushll   v0.4s, v0.4h, #0
-; CHECK: shl     v0.4s, v0.4s, #24
-; CHECK: sshr    v0.4s, v0.4s, #24
-; CHECK: shl     v1.4s, v1.4s, #24
-; CHECK: sshr    v1.4s, v1.4s, #24
-  %1 = sext <4 x i8> %v1 to <4 x i32>
-  %2 = sext <4 x i8> %v2 to <4 x i32>
-  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-  %4 = trunc <4 x i32> %3 to <4 x i8>
-  ret <4 x i8> %4
-}
-
-define <8 x i8> @test_sext_inreg_v8i8i16(<8 x i8> %v1, <8 x i8> %v2) nounwind readnone {
-; CHECK-LABEL: test_sext_inreg_v8i8i16
-; CHECK: sshll   v0.8h, v0.8b, #0
-; CHECK: sshll   v1.8h, v1.8b, #0
-  %1 = sext <8 x i8> %v1 to <8 x i16>
-  %2 = sext <8 x i8> %v2 to <8 x i16>
-  %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-  %4 = trunc <8 x i16> %3 to <8 x i8>
-  ret <8 x i8> %4
-}
-
-define <8 x i1> @test_sext_inreg_v8i1i16(<8 x i1> %v1, <8 x i1> %v2) nounwind readnone {
-; CHECK-LABEL: test_sext_inreg_v8i1i16
-; CHECK: ushll   v1.8h, v1.8b, #0
-; CHECK: ushll   v0.8h, v0.8b, #0
-; CHECK: shl     v0.8h, v0.8h, #15
-; CHECK: sshr    v0.8h, v0.8h, #15
-; CHECK: shl     v1.8h, v1.8h, #15
-; CHECK: sshr    v1.8h, v1.8h, #15
-  %1 = sext <8 x i1> %v1 to <8 x i16>
-  %2 = sext <8 x i1> %v2 to <8 x i16>
-  %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-  %4 = trunc <8 x i16> %3 to <8 x i1>
-  ret <8 x i1> %4
-}
-
-define <2 x i16> @test_sext_inreg_v2i16i32(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone {
-; CHECK-LABEL: test_sext_inreg_v2i16i32
-; CHECK: sshll   v0.4s, v0.4h, #0
-; CHECK-NEXT: uzp1    v0.4s, v0.4s, v0.4s
-; CHECK-NEXT: sshll   v1.4s, v1.4h, #0
-; CHECK-NEXT: uzp1    v1.4s, v1.4s, v1.4s
-  %1 = sext <2 x i16> %v1 to <2 x i32>
-  %2 = sext <2 x i16> %v2 to <2 x i32>
-  %3 = shufflevector <2 x i32> %1, <2 x i32> %2, <2 x i32> <i32 0, i32 2>
-  %4 = trunc <2 x i32> %3 to <2 x i16>
-  ret <2 x i16> %4
-}
-
-define <2 x i16> @test_sext_inreg_v2i16i32_2(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone {
-; CHECK-LABEL: test_sext_inreg_v2i16i32_2
-; CHECK: sshll   v0.4s, v0.4h, #0
-; CHECK-NEXT: uzp1    v0.4s, v0.4s, v0.4s
-; CHECK-NEXT: sshll   v1.4s, v1.4h, #0
-; CHECK-NEXT: uzp1    v1.4s, v1.4s, v1.4s
-  %a1 = shl <2 x i32> %v1, <i32 16, i32 16>
-  %a2 = ashr <2 x i32> %a1, <i32 16, i32 16>
-  %b1 = shl <2 x i32> %v2, <i32 16, i32 16>
-  %b2 = ashr <2 x i32> %b1, <i32 16, i32 16>
-  %c = shufflevector <2 x i32> %a2, <2 x i32> %b2, <2 x i32> <i32 0, i32 2>
-  %d = trunc <2 x i32> %c to <2 x i16>
-  ret <2 x i16> %d
-}
-
-define <2 x i16> @test_sext_inreg_v2i16i64(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone {
-; CHECK-LABEL: test_sext_inreg_v2i16i64
-; CHECK: ushll   v1.2d, v1.2s, #0
-; CHECK: ushll   v0.2d, v0.2s, #0
-; CHECK: shl     v0.2d, v0.2d, #48
-; CHECK: sshr    v0.2d, v0.2d, #48
-; CHECK: shl     v1.2d, v1.2d, #48
-; CHECK: sshr    v1.2d, v1.2d, #48
-  %1 = sext <2 x i16> %v1 to <2 x i64>
-  %2 = sext <2 x i16> %v2 to <2 x i64>
-  %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2>
-  %4 = trunc <2 x i64> %3 to <2 x i16>
-  ret <2 x i16> %4
-}
-
-define <4 x i16> @test_sext_inreg_v4i16i32(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone {
-; CHECK-LABEL: test_sext_inreg_v4i16i32
-; CHECK: sshll v0.4s, v0.4h, #0
-; CHECK: sshll v1.4s, v1.4h, #0
-  %1 = sext <4 x i16> %v1 to <4 x i32>
-  %2 = sext <4 x i16> %v2 to <4 x i32>
-  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-  %4 = trunc <4 x i32> %3 to <4 x i16>
-  ret <4 x i16> %4
-}
-
-define <2 x i32> @test_sext_inreg_v2i32i64(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone {
-; CHECK-LABEL: test_sext_inreg_v2i32i64
-; CHECK: sshll v0.2d, v0.2s, #0
-; CHECK: sshll v1.2d, v1.2s, #0
-  %1 = sext <2 x i32> %v1 to <2 x i64>
-  %2 = sext <2 x i32> %v2 to <2 x i64>
-  %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2>
-  %4 = trunc <2 x i64> %3 to <2 x i32>
-  ret <2 x i32> %4
-}
-
diff --git a/test/CodeGen/AArch64/sibling-call.ll b/test/CodeGen/AArch64/sibling-call.ll
index 20f1062..34e3bb4 100644
--- a/test/CodeGen/AArch64/sibling-call.ll
+++ b/test/CodeGen/AArch64/sibling-call.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -aarch64-load-store-opt=0 | FileCheck %s
 
 declare void @callee_stack0()
 declare void @callee_stack8([8 x i32], i64)
@@ -73,10 +73,10 @@ define void @caller_to16_from16([8 x i32], i64 %a, i64 %b) {
   tail call void @callee_stack16([8 x i32] undef, i64 %b, i64 %a)
   ret void
 
-; CHECK: ldr x0,
-; CHECK: ldr x1,
-; CHECK: str x1,
-; CHECK: str x0,
+; CHECK: ldr [[VAL0:x[0-9]+]],
+; CHECK: ldr [[VAL1:x[0-9]+]],
+; CHECK: str [[VAL1]],
+; CHECK: str [[VAL0]],
 
 ; CHECK-NOT: add sp, sp,
 ; CHECK: b callee_stack16
@@ -91,7 +91,7 @@ define void @indirect_tail() {
   %fptr = load void(i32)** @func
   tail call void %fptr(i32 42)
   ret void
-; CHECK: ldr [[FPTR:x[1-9]+]], [{{x[0-9]+}}, #:lo12:func]
-; CHECK: movz w0, #42
+; CHECK: ldr [[FPTR:x[1-9]+]], [{{x[0-9]+}}, {{#?}}:lo12:func]
+; CHECK: movz w0, #{{42|0x2a}}
 ; CHECK: br [[FPTR]]
 }
diff --git a/test/CodeGen/AArch64/sincos-expansion.ll b/test/CodeGen/AArch64/sincos-expansion.ll
index 4cd4449..c3a172d 100644
--- a/test/CodeGen/AArch64/sincos-expansion.ll
+++ b/test/CodeGen/AArch64/sincos-expansion.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
 
 define float @test_sincos_f32(float %f) {
   %sin = call float @sinf(float %f) readnone
diff --git a/test/CodeGen/AArch64/sincospow-vector-expansion.ll b/test/CodeGen/AArch64/sincospow-vector-expansion.ll
index 259a55e..22f33a8 100644
--- a/test/CodeGen/AArch64/sincospow-vector-expansion.ll
+++ b/test/CodeGen/AArch64/sincospow-vector-expansion.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+; RUN: llc -o - %s -verify-machineinstrs -mtriple=aarch64-linux-gnu -mattr=+neon | FileCheck %s
 
 
 define <2 x float> @test_cos_v2f64(<2 x double> %v1) {
diff --git a/test/CodeGen/AArch64/tail-call.ll b/test/CodeGen/AArch64/tail-call.ll
index 81885f1..8aab842 100644
--- a/test/CodeGen/AArch64/tail-call.ll
+++ b/test/CodeGen/AArch64/tail-call.ll
@@ -7,8 +7,10 @@ declare fastcc void @callee_stack16([8 x i32], i64, i64)
 define fastcc void @caller_to0_from0() nounwind {
 ; CHECK-LABEL: caller_to0_from0:
 ; CHECK-NEXT: // BB
+
   tail call fastcc void @callee_stack0()
   ret void
+
 ; CHECK-NEXT: b callee_stack0
 }
 
@@ -17,6 +19,7 @@ define fastcc void @caller_to0_from8([8 x i32], i64) {
 
   tail call fastcc void @callee_stack0()
   ret void
+
 ; CHECK: add sp, sp, #16
 ; CHECK-NEXT: b callee_stack0
 }
@@ -29,8 +32,8 @@ define fastcc void @caller_to8_from0() {
 ; pointer (we didn't have arg space to reuse).
   tail call fastcc void @callee_stack8([8 x i32] undef, i64 42)
   ret void
-; CHECK: str {{x[0-9]+}}, [sp, #16]
-; CHECK-NEXT: add sp, sp, #16
+
+; CHECK: str {{x[0-9]+}}, [sp, #16]!
 ; CHECK-NEXT: b callee_stack8
 }
 
@@ -41,8 +44,8 @@ define fastcc void @caller_to8_from8([8 x i32], i64 %a) {
 ; Key point is that the "%a" should go where at SP on entry.
   tail call fastcc void @callee_stack8([8 x i32] undef, i64 42)
   ret void
-; CHECK: str {{x[0-9]+}}, [sp, #16]
-; CHECK-NEXT: add sp, sp, #16
+
+; CHECK: str {{x[0-9]+}}, [sp, #16]!
 ; CHECK-NEXT: b callee_stack8
 }
 
@@ -54,10 +57,10 @@ define fastcc void @caller_to16_from8([8 x i32], i64 %a) {
 ; above %a on the stack. If it tries to go below incoming-SP then the
 ; callee will not deallocate the space, even in fastcc.
   tail call fastcc void @callee_stack16([8 x i32] undef, i64 42, i64 2)
-; CHECK: str {{x[0-9]+}}, [sp, #24]
-; CHECK: str {{x[0-9]+}}, [sp, #16]
-; CHECK: add sp, sp, #16
-; CHECK: b callee_stack16
+
+; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
+; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: b callee_stack16
   ret void
 }
 
@@ -69,8 +72,8 @@ define fastcc void @caller_to8_from24([8 x i32], i64 %a, i64 %b, i64 %c) {
 ; Key point is that the "%a" should go where at #16 above SP on entry.
   tail call fastcc void @callee_stack8([8 x i32] undef, i64 42)
   ret void
-; CHECK: str {{x[0-9]+}}, [sp, #32]
-; CHECK-NEXT: add sp, sp, #32
+
+; CHECK: str {{x[0-9]+}}, [sp, #32]!
 ; CHECK-NEXT: b callee_stack8
 }
 
@@ -84,11 +87,8 @@ define fastcc void @caller_to16_from16([8 x i32], i64 %a, i64 %b) {
   tail call fastcc void @callee_stack16([8 x i32] undef, i64 %b, i64 %a)
   ret void
 
-; CHECK: ldr x0,
-; CHECK: ldr x1,
-; CHECK: str x1,
-; CHECK: str x0,
-
-; CHECK: add sp, sp, #16
-; CHECK: b callee_stack16
+; CHECK: ldp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
+; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
+; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: b callee_stack16
 }
diff --git a/test/CodeGen/AArch64/tls-dynamic-together.ll b/test/CodeGen/AArch64/tls-dynamic-together.ll
deleted file mode 100644
index b5d7d89..0000000
--- a/test/CodeGen/AArch64/tls-dynamic-together.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc -O0 -mtriple=aarch64-none-linux-gnu -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s
-
-; If the .tlsdesccall and blr parts are emitted completely separately (even with
-; glue) then LLVM will separate them quite happily (with a spill at O0, hence
-; the option). This is definitely wrong, so we make sure they are emitted
-; together.
-
-@general_dynamic_var = external thread_local global i32
-
-define i32 @test_generaldynamic() {
-; CHECK-LABEL: test_generaldynamic:
-
-  %val = load i32* @general_dynamic_var
-  ret i32 %val
-
-; CHECK: .tlsdesccall general_dynamic_var
-; CHECK-NEXT: blr {{x[0-9]+}}
-}
diff --git a/test/CodeGen/AArch64/tls-dynamics.ll b/test/CodeGen/AArch64/tls-dynamics.ll
deleted file mode 100644
index 68c481c..0000000
--- a/test/CodeGen/AArch64/tls-dynamics.ll
+++ /dev/null
@@ -1,121 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-RELOC %s
-
-@general_dynamic_var = external thread_local global i32
-
-define i32 @test_generaldynamic() {
-; CHECK-LABEL: test_generaldynamic:
-
-  %val = load i32* @general_dynamic_var
-  ret i32 %val
-
-; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:general_dynamic_var
-; CHECK-DAG: add x0, x[[TLSDESC_HI]], #:tlsdesc_lo12:general_dynamic_var
-; CHECK-DAG: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], #:tlsdesc_lo12:general_dynamic_var]
-; CHECK: .tlsdesccall general_dynamic_var
-; CHECK-NEXT: blr [[CALLEE]]
-
-; CHECK: mrs x[[TP:[0-9]+]], tpidr_el0
-; CHECK: ldr w0, [x[[TP]], x0]
-
-; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE
-; CHECK-RELOC-DAG: R_AARCH64_TLSDESC_ADD_LO12_NC
-; CHECK-RELOC-DAG: R_AARCH64_TLSDESC_LD64_LO12_NC
-; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
-
-}
-
-define i32* @test_generaldynamic_addr() {
-; CHECK-LABEL: test_generaldynamic_addr:
-
-  ret i32* @general_dynamic_var
-
-; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:general_dynamic_var
-; CHECK-DAG: add x0, x[[TLSDESC_HI]], #:tlsdesc_lo12:general_dynamic_var
-; CHECK-DAG: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], #:tlsdesc_lo12:general_dynamic_var]
-; CHECK: .tlsdesccall general_dynamic_var
-; CHECK-NEXT: blr [[CALLEE]]
-
-; CHECK: mrs [[TP:x[0-9]+]], tpidr_el0
-; CHECK: add x0, [[TP]], x0
-
-; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE
-; CHECK-RELOC-DAG: R_AARCH64_TLSDESC_ADD_LO12_NC
-; CHECK-RELOC-DAG: R_AARCH64_TLSDESC_LD64_LO12_NC
-; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
-
-}
-
-@local_dynamic_var = external thread_local(localdynamic) global i32
-
-define i32 @test_localdynamic() {
-; CHECK-LABEL: test_localdynamic:
-
-  %val = load i32* @local_dynamic_var
-  ret i32 %val
-
-; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
-; CHECK-DAG: add x0, x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_
-; CHECK-DAG: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_]
-; CHECK: .tlsdesccall _TLS_MODULE_BASE_
-; CHECK-NEXT: blr [[CALLEE]]
-
-; CHECK: movz [[DTP_OFFSET:x[0-9]+]], #:dtprel_g1:local_dynamic_var
-; CHECK: movk [[DTP_OFFSET]], #:dtprel_g0_nc:local_dynamic_var
-
-; CHECK: ldr w0, [x0, [[DTP_OFFSET]]]
-
-; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE
-; CHECK-RELOC-DAG: R_AARCH64_TLSDESC_ADD_LO12_NC
-; CHECK-RELOC-DAG: R_AARCH64_TLSDESC_LD64_LO12_NC
-; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
-
-}
-
-define i32* @test_localdynamic_addr() {
-; CHECK-LABEL: test_localdynamic_addr:
-
-  ret i32* @local_dynamic_var
-
-; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
-; CHECK-DAG: add x0, x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_
-; CHECK-DAG: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_]
-; CHECK: .tlsdesccall _TLS_MODULE_BASE_
-; CHECK-NEXT: blr [[CALLEE]]
-
-; CHECK: movz [[DTP_OFFSET:x[0-9]+]], #:dtprel_g1:local_dynamic_var
-; CHECK: movk [[DTP_OFFSET]], #:dtprel_g0_nc:local_dynamic_var
-
-; CHECK: add x0, x0, [[DTP_OFFSET]]
-
-; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE
-; CHECK-RELOC-DAG: R_AARCH64_TLSDESC_ADD_LO12_NC
-; CHECK-RELOC-DAG: R_AARCH64_TLSDESC_LD64_LO12_NC
-; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
-
-}
-
-; The entire point of the local-dynamic access model is to have a single call to
-; the expensive resolver. Make sure we achieve that goal.
-
-@local_dynamic_var2 = external thread_local(localdynamic) global i32
-
-define i32 @test_localdynamic_deduplicate() {
-; CHECK-LABEL: test_localdynamic_deduplicate:
-
-  %val = load i32* @local_dynamic_var
-  %val2 = load i32* @local_dynamic_var2
-
-  %sum = add i32 %val, %val2
-  ret i32 %sum
-
-; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
-; CHECK-DAG: add x0, x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_
-; CHECK-DAG: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_]
-; CHECK: .tlsdesccall _TLS_MODULE_BASE_
-; CHECK-NEXT: blr [[CALLEE]]
-
-; CHECK-NOT: _TLS_MODULE_BASE_
-
-; CHECK: ret
-}
diff --git a/test/CodeGen/AArch64/tls-execs.ll b/test/CodeGen/AArch64/tls-execs.ll
deleted file mode 100644
index 39ceb9a..0000000
--- a/test/CodeGen/AArch64/tls-execs.ll
+++ /dev/null
@@ -1,63 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -show-mc-encoding < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-none-linux-gnu -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-RELOC %s
-
-@initial_exec_var = external thread_local(initialexec) global i32
-
-define i32 @test_initial_exec() {
-; CHECK-LABEL: test_initial_exec:
-  %val = load i32* @initial_exec_var
-
-; CHECK: adrp x[[GOTADDR:[0-9]+]], :gottprel:initial_exec_var
-; CHECK: ldr x[[TP_OFFSET:[0-9]+]], [x[[GOTADDR]], #:gottprel_lo12:initial_exec_var]
-; CHECK: mrs x[[TP:[0-9]+]], tpidr_el0
-; CHECK: ldr w0, [x[[TP]], x[[TP_OFFSET]]]
-
-; CHECK-RELOC: R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21
-; CHECK-RELOC: R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC
-
-  ret i32 %val
-}
-
-define i32* @test_initial_exec_addr() {
-; CHECK-LABEL: test_initial_exec_addr:
-  ret i32* @initial_exec_var
-
-; CHECK: adrp x[[GOTADDR:[0-9]+]], :gottprel:initial_exec_var
-; CHECK: ldr [[TP_OFFSET:x[0-9]+]], [x[[GOTADDR]], #:gottprel_lo12:initial_exec_var]
-; CHECK: mrs [[TP:x[0-9]+]], tpidr_el0
-; CHECK: add x0, [[TP]], [[TP_OFFSET]]
-
-; CHECK-RELOC: R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21
-; CHECK-RELOC: R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC
-
-}
-
-@local_exec_var = thread_local(initialexec) global i32 0
-
-define i32 @test_local_exec() {
-; CHECK-LABEL: test_local_exec:
-  %val = load i32* @local_exec_var
-
-; CHECK: movz [[TP_OFFSET:x[0-9]+]], #:tprel_g1:local_exec_var // encoding: [A,A,0xa0'A',0x92'A']
-; CHECK: movk [[TP_OFFSET]], #:tprel_g0_nc:local_exec_var
-; CHECK: mrs x[[TP:[0-9]+]], tpidr_el0
-; CHECK: ldr w0, [x[[TP]], [[TP_OFFSET]]]
-
-; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G1
-; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G0_NC
-
-  ret i32 %val
-}
-
-define i32* @test_local_exec_addr() {
-; CHECK-LABEL: test_local_exec_addr:
-  ret i32* @local_exec_var
-
-; CHECK: movz [[TP_OFFSET:x[0-9]+]], #:tprel_g1:local_exec_var
-; CHECK: movk [[TP_OFFSET]], #:tprel_g0_nc:local_exec_var
-; CHECK: mrs [[TP:x[0-9]+]], tpidr_el0
-; CHECK: add x0, [[TP]], [[TP_OFFSET]]
-
-; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G1
-; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G0_NC
-}
diff --git a/test/CodeGen/AArch64/tst-br.ll b/test/CodeGen/AArch64/tst-br.ll
index 154bc08..8a2fe26 100644
--- a/test/CodeGen/AArch64/tst-br.ll
+++ b/test/CodeGen/AArch64/tst-br.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s
 
 ; We've got the usual issues with LLVM reordering blocks here. The
 ; tests are correct for the current order, but who knows when that
@@ -15,7 +15,7 @@ define i32 @test_tbz() {
   %tbit0 = and i32 %val, 32768
   %tst0 = icmp ne i32 %tbit0, 0
   br i1 %tst0, label %test1, label %end1
-; CHECK: tbz {{w[0-9]+}}, #15, [[LBL_end1:.LBB0_[0-9]+]]
+; CHECK: tbz {{w[0-9]+}}, #15, [[LBL_end1:.?LBB0_[0-9]+]]
 
 test1:
   %tbit1 = and i32 %val, 4096
@@ -27,22 +27,22 @@ test2:
   %tbit2 = and i64 %val64, 32768
   %tst2 = icmp ne i64 %tbit2, 0
   br i1 %tst2, label %test3, label %end1
-; CHECK: tbz {{x[0-9]+}}, #15, [[LBL_end1]]
+; CHECK: tbz {{[wx][0-9]+}}, #15, [[LBL_end1]]
 
 test3:
   %tbit3 = and i64 %val64, 4096
   %tst3 = icmp ne i64 %tbit3, 0
   br i1 %tst3, label %end2, label %end1
-; CHECK: tbz {{x[0-9]+}}, #12, [[LBL_end1]]
+; CHECK: tbz {{[wx][0-9]+}}, #12, [[LBL_end1]]
 
 end2:
-; CHECK: movz x0, #1
+; CHECK: {{movz x0, #1|orr w0, wzr, #0x1}}
 ; CHECK-NEXT: ret
   ret i32 1
 
 end1:
 ; CHECK: [[LBL_end1]]:
-; CHECK-NEXT: mov x0, xzr
+; CHECK-NEXT: {{mov x0, xzr|mov w0, wzr}}
 ; CHECK-NEXT: ret
   ret i32 0
 }
diff --git a/test/CodeGen/AArch64/variadic.ll b/test/CodeGen/AArch64/variadic.ll
deleted file mode 100644
index 1c7f1e0..0000000
--- a/test/CodeGen/AArch64/variadic.ll
+++ /dev/null
@@ -1,241 +0,0 @@
-; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
-; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 < %s | FileCheck --check-prefix=CHECK-NOFP %s
-
-%va_list = type {i8*, i8*, i8*, i32, i32}
-
-@var = global %va_list zeroinitializer
-
-declare void @llvm.va_start(i8*)
-
-define void @test_simple(i32 %n, ...) {
-; CHECK-LABEL: test_simple:
-; CHECK: sub sp, sp, #[[STACKSIZE:[0-9]+]]
-; CHECK: mov x[[FPRBASE:[0-9]+]], sp
-; CHECK: str q7, [x[[FPRBASE]], #112]
-; CHECK: add x[[GPRBASE:[0-9]+]], sp, #[[GPRFROMSP:[0-9]+]]
-; CHECK: str x7, [x[[GPRBASE]], #48]
-
-; CHECK-NOFP: sub sp, sp, #[[STACKSIZE:[0-9]+]]
-; CHECK-NOFP: add x[[GPRBASE:[0-9]+]], sp, #[[GPRFROMSP:[0-9]+]]
-; CHECK-NOFP: str x7, [x[[GPRBASE]], #48]
-; CHECK-NOFP-NOT: str q7,
-; CHECK-NOFP: str x1, [sp, #[[GPRFROMSP]]]
-
-; Omit the middle ones
-
-; CHECK: str q0, [sp]
-; CHECK: str x1, [sp, #[[GPRFROMSP]]]
-; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
-
-; CHECK-NOFP-NOT: str q0, [sp]
-; CHECK-NOFP: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
-
-  %addr = bitcast %va_list* @var to i8*
-  call void @llvm.va_start(i8* %addr)
-; CHECK: movn [[VR_OFFS:w[0-9]+]], #127
-; CHECK: str [[VR_OFFS]], [x[[VA_LIST]], #28]
-; CHECK: movn [[GR_OFFS:w[0-9]+]], #55
-; CHECK: str [[GR_OFFS]], [x[[VA_LIST]], #24]
-; CHECK: add [[VR_TOP:x[0-9]+]], x[[FPRBASE]], #128
-; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16]
-; CHECK: add [[GR_TOP:x[0-9]+]], x[[GPRBASE]], #56
-; CHECK: str [[GR_TOP]], [x[[VA_LIST]], #8]
-; CHECK: add [[STACK:x[0-9]+]], sp, #[[STACKSIZE]]
-; CHECK: str [[STACK]], [{{x[0-9]+}}, #:lo12:var]
-
-; CHECK-NOFP: str wzr, [x[[VA_LIST]], #28]
-; CHECK-NOFP: movn [[GR_OFFS:w[0-9]+]], #55
-; CHECK-NOFP: str [[GR_OFFS]], [x[[VA_LIST]], #24]
-; CHECK-NOFP: add [[GR_TOP:x[0-9]+]], x[[GPRBASE]], #56
-; CHECK-NOFP: str [[GR_TOP]], [x[[VA_LIST]], #8]
-; CHECK-NOFP: add [[STACK:x[0-9]+]], sp, #[[STACKSIZE]]
-; CHECK-NOFP: str [[STACK]], [{{x[0-9]+}}, #:lo12:var]
-
-  ret void
-}
-
-define void @test_fewargs(i32 %n, i32 %n1, i32 %n2, float %m, ...) {
-; CHECK-LABEL: test_fewargs:
-; CHECK: sub sp, sp, #[[STACKSIZE:[0-9]+]]
-; CHECK: mov x[[FPRBASE:[0-9]+]], sp
-; CHECK: str q7, [x[[FPRBASE]], #96]
-; CHECK: add x[[GPRBASE:[0-9]+]], sp, #[[GPRFROMSP:[0-9]+]]
-; CHECK: str x7, [x[[GPRBASE]], #32]
-
-; CHECK-NOFP: sub sp, sp, #[[STACKSIZE:[0-9]+]]
-; CHECK-NOFP-NOT: str q7,
-; CHECK-NOFP: mov x[[GPRBASE:[0-9]+]], sp
-; CHECK-NOFP: str x7, [x[[GPRBASE]], #24]
-
-; Omit the middle ones
-
-; CHECK: str q1, [sp]
-; CHECK: str x3, [sp, #[[GPRFROMSP]]]
-
-; CHECK-NOFP-NOT: str q1, [sp]
-; CHECK-NOFP: str x4, [sp]
-
-  %addr = bitcast %va_list* @var to i8*
-  call void @llvm.va_start(i8* %addr)
-; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
-; CHECK: movn [[VR_OFFS:w[0-9]+]], #111
-; CHECK: str [[VR_OFFS]], [x[[VA_LIST]], #28]
-; CHECK: movn [[GR_OFFS:w[0-9]+]], #39
-; CHECK: str [[GR_OFFS]], [x[[VA_LIST]], #24]
-; CHECK: add [[VR_TOP:x[0-9]+]], x[[FPRBASE]], #112
-; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16]
-; CHECK: add [[GR_TOP:x[0-9]+]], x[[GPRBASE]], #40
-; CHECK: str [[GR_TOP]], [x[[VA_LIST]], #8]
-; CHECK: add [[STACK:x[0-9]+]], sp, #[[STACKSIZE]]
-; CHECK: str [[STACK]], [{{x[0-9]+}}, #:lo12:var]
-
-; CHECK-NOFP: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
-; CHECK-NOFP: str wzr, [x[[VA_LIST]], #28]
-; CHECK-NOFP: movn [[GR_OFFS:w[0-9]+]], #31
-; CHECK-NOFP: str [[GR_OFFS]], [x[[VA_LIST]], #24]
-; CHECK-NOFP: add [[GR_TOP:x[0-9]+]], x[[GPRBASE]], #32
-; CHECK-NOFP: str [[GR_TOP]], [x[[VA_LIST]], #8]
-; CHECK-NOFP: add [[STACK:x[0-9]+]], sp, #[[STACKSIZE]]
-; CHECK-NOFP: str [[STACK]], [{{x[0-9]+}}, #:lo12:var]
-
-  ret void
-}
-
-define void @test_nospare([8 x i64], [8 x float], ...) {
-; CHECK-LABEL: test_nospare:
-
-  %addr = bitcast %va_list* @var to i8*
-  call void @llvm.va_start(i8* %addr)
-; CHECK-NOT: sub sp, sp
-; CHECK: mov [[STACK:x[0-9]+]], sp
-; CHECK: str [[STACK]], [{{x[0-9]+}}, #:lo12:var]
-
-; CHECK-NOFP-NOT: sub sp, sp
-; CHECK-NOFP: add [[STACK:x[0-9]+]], sp, #64
-; CHECK-NOFP: str [[STACK]], [{{x[0-9]+}}, #:lo12:var]
-  ret void
-}
-
-; If there are non-variadic arguments on the stack (here two i64s) then the
-; __stack field should point just past them.
-define void @test_offsetstack([10 x i64], [3 x float], ...) {
-; CHECK-LABEL: test_offsetstack:
-; CHECK: sub sp, sp, #80
-; CHECK: mov x[[FPRBASE:[0-9]+]], sp
-; CHECK: str q7, [x[[FPRBASE]], #64]
-
-; CHECK-NOT: str x{{[0-9]+}},
-
-; CHECK-NOFP-NOT: str q7,
-; CHECK-NOT: str x7,
-
-; Omit the middle ones
-
-; CHECK: str q3, [sp]
-
-  %addr = bitcast %va_list* @var to i8*
-  call void @llvm.va_start(i8* %addr)
-; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
-; CHECK: movn [[VR_OFFS:w[0-9]+]], #79
-; CHECK: str [[VR_OFFS]], [x[[VA_LIST]], #28]
-; CHECK: str wzr, [x[[VA_LIST]], #24]
-; CHECK: add [[VR_TOP:x[0-9]+]], x[[FPRBASE]], #80
-; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16]
-; CHECK: add [[STACK:x[0-9]+]], sp, #96
-; CHECK: str [[STACK]], [{{x[0-9]+}}, #:lo12:var]
-
-; CHECK-NOFP: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
-; CHECK-NOFP: add [[STACK:x[0-9]+]], sp, #40
-; CHECK-NOFP: str [[STACK]], [{{x[0-9]+}}, #:lo12:var]
-; CHECK-NOFP: str wzr, [x[[VA_LIST]], #28]
-; CHECK-NOFP: str wzr, [x[[VA_LIST]], #24]
-  ret void
-}
-
-declare void @llvm.va_end(i8*)
-
-define void @test_va_end() nounwind {
-; CHECK-LABEL: test_va_end:
-; CHECK-NEXT: BB#0
-; CHECK-NOFP: BB#0
-
-  %addr = bitcast %va_list* @var to i8*
-  call void @llvm.va_end(i8* %addr)
-
-  ret void
-; CHECK-NEXT: ret
-; CHECK-NOFP-NEXT: ret
-}
-
-declare void @llvm.va_copy(i8* %dest, i8* %src)
-
-@second_list = global %va_list zeroinitializer
-
-define void @test_va_copy() {
-; CHECK-LABEL: test_va_copy:
-  %srcaddr = bitcast %va_list* @var to i8*
-  %dstaddr = bitcast %va_list* @second_list to i8*
-  call void @llvm.va_copy(i8* %dstaddr, i8* %srcaddr)
-
-; Check beginning and end again:
-
-; CHECK: add x[[SRC_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
-; CHECK: add x[[DEST_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:second_list
-; CHECK: ldr [[BLOCK1:x[0-9]+]], [{{x[0-9]+}}, #:lo12:var]
-; CHECK: ldr [[BLOCK2:x[0-9]+]], [x[[SRC_LIST]], #24]
-; CHECK: str [[BLOCK1]], [{{x[0-9]+}}, #:lo12:second_list]
-; CHECK: str [[BLOCK2]], [x[[DEST_LIST]], #24]
-
-; CHECK-NOFP: add x[[SRC_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
-; CHECK-NOFP: add x[[DEST_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:second_list
-; CHECK-NOFP: ldr [[BLOCK1:x[0-9]+]], [{{x[0-9]+}}, #:lo12:var]
-; CHECK-NOFP: ldr [[BLOCK2:x[0-9]+]], [x[[SRC_LIST]], #24]
-; CHECK-NOFP: str [[BLOCK1]], [{{x[0-9]+}}, #:lo12:second_list]
-; CHECK-NOFP: str [[BLOCK2]], [x[[DEST_LIST]], #24]
-
-  ret void
-; CHECK: ret
-; CHECK-NOFP: ret
-}
-
-%struct.s_3i = type { i32, i32, i32 }
-
-; This checks that, if the last named argument is not a multiple of 8 bytes,
-; and is allocated on the stack, that __va_list.__stack is initialised to the
-; first 8-byte aligned location above it.
-define void @test_va_odd_struct_on_stack(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, [1 x i64], %struct.s_3i* byval nocapture readnone align 4 %h, ...) {
-; CHECK-LABEL: test_va_odd_struct_on_stack:
-
-; CHECK: sub sp, sp, #128
-; CHECK: mov x[[FPRBASE:[0-9]+]], sp
-; CHECK: str q7, [x[[FPRBASE]], #112]
-
-; CHECK-NOT: str x{{[0-9]+}},
-
-; CHECK-NOFP-NOT: str q7,
-; CHECK-NOT: str x7,
-
-; Omit the middle ones
-
-; CHECK: str q0, [sp]
-
-  %addr = bitcast %va_list* @var to i8*
-  call void @llvm.va_start(i8* %addr)
-; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
-; CHECK: movn [[VR_OFFS:w[0-9]+]], #127
-; CHECK: str [[VR_OFFS]], [x[[VA_LIST]], #28]
-; CHECK: str wzr, [x[[VA_LIST]], #24]
-; CHECK: add [[VR_TOP:x[0-9]+]], x[[FPRBASE]], #128
-; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16]
-; This constant would be #140 if it was not 8-byte aligned
-; CHECK: add [[STACK:x[0-9]+]], sp, #144
-; CHECK: str [[STACK]], [{{x[0-9]+}}, #:lo12:var]
-
-; CHECK-NOFP: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
-; This constant would be #12 if it was not 8-byte aligned
-; CHECK-NOFP: add [[STACK:x[0-9]+]], sp, #16
-; CHECK-NOFP: str [[STACK]], [{{x[0-9]+}}, #:lo12:var]
-; CHECK-NOFP: str wzr, [x[[VA_LIST]], #28]
-; CHECK-NOFP: str wzr, [x[[VA_LIST]], #24]
-  ret void
-}
diff --git a/test/CodeGen/AArch64/zero-reg.ll b/test/CodeGen/AArch64/zero-reg.ll
index 9b1e527..bc112ab 100644
--- a/test/CodeGen/AArch64/zero-reg.ll
+++ b/test/CodeGen/AArch64/zero-reg.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s
 
 @var32 = global i32 0
 @var64 = global i64 0
@@ -7,9 +7,9 @@ define void @test_zr() {
 ; CHECK-LABEL: test_zr:
 
   store i32 0, i32* @var32
-; CHECK: str wzr, [{{x[0-9]+}}, #:lo12:var32]
+; CHECK: str wzr, [{{x[0-9]+}}, {{#?}}:lo12:var32]
   store i64 0, i64* @var64
-; CHECK: str xzr, [{{x[0-9]+}}, #:lo12:var64]
+; CHECK: str xzr, [{{x[0-9]+}}, {{#?}}:lo12:var64]
 
   ret void
 ; CHECK: ret
@@ -23,8 +23,7 @@ define void @test_sp(i32 %val) {
 ; instruction (0b11111 in the Rn field would mean "sp").
   %addr = getelementptr i32* null, i64 0
   store i32 %val, i32* %addr
-; CHECK: mov x[[NULL:[0-9]+]], xzr
-; CHECK: str {{w[0-9]+}}, [x[[NULL]]]
+; CHECK: str {{w[0-9]+}}, [{{x[0-9]+|sp}}]
 
   ret void
 ; CHECK: ret
diff --git a/test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll b/test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll
index 95aa595..dabe620 100644
--- a/test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll
+++ b/test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll
@@ -14,4 +14,6 @@ bb3:		; preds = %bb1
 }
 
 ; CHECK-NOT: 255
+; CHECK: .file{{.*}}SxtInRegBug.ll
+; CHECK-NOT: 255
 
diff --git a/test/CodeGen/ARM/2010-08-04-StackVariable.ll b/test/CodeGen/ARM/2010-08-04-StackVariable.ll
index bc4cc98..48de244 100644
--- a/test/CodeGen/ARM/2010-08-04-StackVariable.ll
+++ b/test/CodeGen/ARM/2010-08-04-StackVariable.ll
@@ -123,7 +123,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !43 = metadata !{i32 26, i32 0, metadata !39, null}
 !44 = metadata !{i32 786688, metadata !39, metadata !"k", metadata !2, i32 26, metadata !13, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
 !45 = metadata !{i32 27, i32 0, metadata !39, null}
-!46 = metadata !{metadata !0, metadata !9, metadata !16, metadata !17, metadata !20}
+!46 = metadata !{metadata !16, metadata !17, metadata !20}
 !47 = metadata !{}
 !48 = metadata !{metadata !"small.cc", metadata !"/Users/manav/R8248330"}
 !49 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/ARM/2013-05-07-ByteLoadSameAddress.ll b/test/CodeGen/ARM/2013-05-07-ByteLoadSameAddress.ll
index defb946..efb8202 100644
--- a/test/CodeGen/ARM/2013-05-07-ByteLoadSameAddress.ll
+++ b/test/CodeGen/ARM/2013-05-07-ByteLoadSameAddress.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mattr=+v7,+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mattr=+v7,+thumb2 %s -o - | FileCheck %s
 
 define i8 @f1(i8* %call1, i8* %call3, i32 %h, i32 %w, i32 %Width) {
 ; CHECK: f1:
diff --git a/test/CodeGen/ARM/2014-05-14-DwarfEHCrash.ll b/test/CodeGen/ARM/2014-05-14-DwarfEHCrash.ll
new file mode 100644
index 0000000..1e40e4a
--- /dev/null
+++ b/test/CodeGen/ARM/2014-05-14-DwarfEHCrash.ll
@@ -0,0 +1,50 @@
+; Assertion `Encoding == DW_EH_PE_absptr && "Can handle absptr encoding only"' failed.
+; Broken in r208166, fixed in 208715.
+
+; RUN: llc -mtriple=arm-linux-androideabi -o - -filetype=asm -relocation-model=pic %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+target triple = "armv4t--linux-androideabi"
+
+@_ZTIi = external constant i8*
+
+define void @_Z3fn2v() #0 {
+entry:
+  invoke void @_Z3fn1v()
+          to label %try.cont unwind label %lpad
+
+lpad:                                             ; preds = %entry
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* bitcast (i8** @_ZTIi to i8*)
+  %1 = extractvalue { i8*, i32 } %0, 1
+  %2 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) #2
+  %matches = icmp eq i32 %1, %2
+  br i1 %matches, label %catch, label %eh.resume
+
+catch:                                            ; preds = %lpad
+  %3 = extractvalue { i8*, i32 } %0, 0
+  %4 = tail call i8* @__cxa_begin_catch(i8* %3) #2
+  tail call void @__cxa_end_catch() #2
+  br label %try.cont
+
+try.cont:                                         ; preds = %entry, %catch
+  ret void
+
+eh.resume:                                        ; preds = %lpad
+  resume { i8*, i32 } %0
+}
+
+declare void @_Z3fn1v() #0
+
+declare i32 @__gxx_personality_v0(...)
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.eh.typeid.for(i8*) #1
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
+
+attributes #0 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind }
diff --git a/test/CodeGen/ARM/Windows/chkstk-movw-movt-isel.ll b/test/CodeGen/ARM/Windows/chkstk-movw-movt-isel.ll
new file mode 100644
index 0000000..a82f614
--- /dev/null
+++ b/test/CodeGen/ARM/Windows/chkstk-movw-movt-isel.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mtriple thumbv7--windows-itanium -code-model large -filetype obj -o - %s \
+; RUN:    | llvm-objdump -no-show-raw-insn -d - | FileCheck %s
+
+; ModuleID = 'reduced.c'
+target datalayout = "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv7--windows-itanium"
+
+define arm_aapcs_vfpcc i8 @isel(i32 %i) {
+entry:
+  %i.addr = alloca i32, align 4
+  %buffer = alloca [4096 x i8], align 1
+  store i32 %i, i32* %i.addr, align 4
+  %0 = load i32* %i.addr, align 4
+  %rem = urem i32 %0, 4096
+  %arrayidx = getelementptr inbounds [4096 x i8]* %buffer, i32 0, i32 %rem
+  %1 = load volatile i8* %arrayidx, align 1
+  ret i8 %1
+}
+
+; CHECK-LABEL: isel
+; CHECK: push {r4, r5}
+; CHECK: movw r4, #{{\d*}}
+; CHECK: movw r12, #0
+; CHECK: movt r12, #0
+; CHECK: blx r12
+; CHECK: sub.w sp, sp, r4
+
diff --git a/test/CodeGen/ARM/Windows/chkstk.ll b/test/CodeGen/ARM/Windows/chkstk.ll
new file mode 100644
index 0000000..cb787e1
--- /dev/null
+++ b/test/CodeGen/ARM/Windows/chkstk.ll
@@ -0,0 +1,24 @@
+; RUN: llc -mtriple=thumbv7-windows -mcpu=cortex-a9 %s -o - \
+; RUN:  | FileCheck -check-prefix CHECK-DEFAULT-CODE-MODEL %s
+
+; RUN: llc -mtriple=thumbv7-windows -mcpu=cortex-a9 -code-model=large %s -o - \
+; RUN:  | FileCheck -check-prefix CHECK-LARGE-CODE-MODEL %s
+
+define arm_aapcs_vfpcc void @check_watermark() {
+entry:
+  %buffer = alloca [4096 x i8], align 1
+  ret void
+}
+
+; CHECK-DEFAULT-CODE-MODEL: check_watermark:
+; CHECK-DEFAULT-CODE-MODEL: 	movw r4, #1024
+; CHECK-DEFAULT-CODE-MODEL: 	bl __chkstk
+; CHECK-DEFAULT-CODE-MODEL: 	sub.w sp, sp, r4
+
+; CHECK-LARGE-CODE-MODEL: check_watermark:
+; CHECK-LARGE-CODE-MODEL: 	movw r12, :lower16:__chkstk
+; CHECK-LARGE-CODE-MODEL: 	movt r12, :upper16:__chkstk
+; CHECK-LARGE-CODE-MODEL: 	movw r4, #1024
+; CHECK-LARGE-CODE-MODEL: 	blx r12
+; CHECK-LARGE-CODE-MODEL: 	sub.w sp, sp, r4
+
diff --git a/test/CodeGen/ARM/Windows/frame-register.ll b/test/CodeGen/ARM/Windows/frame-register.ll
new file mode 100644
index 0000000..31167d7
--- /dev/null
+++ b/test/CodeGen/ARM/Windows/frame-register.ll
@@ -0,0 +1,22 @@
+; RUN: llc -mtriple thumbv7-windows -disable-fp-elim -filetype asm -o - %s \
+; RUN:     | FileCheck %s
+
+declare void @callee(i32)
+
+define i32 @calleer(i32 %i) {
+entry:
+  %i.addr = alloca i32, align 4
+  %j = alloca i32, align 4
+  store i32 %i, i32* %i.addr, align 4
+  %0 = load i32* %i.addr, align 4
+  %add = add nsw i32 %0, 1
+  store i32 %add, i32* %j, align 4
+  %1 = load i32* %j, align 4
+  call void @callee(i32 %1)
+  %2 = load i32* %j, align 4
+  %add1 = add nsw i32 %2, 1
+  ret i32 %add1
+}
+
+; CHECK: push.w {r11, lr}
+
diff --git a/test/CodeGen/ARM/Windows/integer-floating-point-conversion.ll b/test/CodeGen/ARM/Windows/integer-floating-point-conversion.ll
new file mode 100644
index 0000000..acf21a1
--- /dev/null
+++ b/test/CodeGen/ARM/Windows/integer-floating-point-conversion.ll
@@ -0,0 +1,74 @@
+; RUN: llc -mtriple thumbv7-windows -filetype asm -o - %s | FileCheck %s
+
+define arm_aapcs_vfpcc i64 @stoi64(float %f) {
+entry:
+  %conv = fptosi float %f to i64
+  ret i64 %conv
+}
+
+; CHECK-LABEL: stoi64
+; CHECK: bl __stoi64
+
+define arm_aapcs_vfpcc i64 @stou64(float %f) {
+entry:
+  %conv = fptoui float %f to i64
+  ret i64 %conv
+}
+
+; CHECK-LABEL: stou64
+; CHECK: bl __stou64
+
+define arm_aapcs_vfpcc float @i64tos(i64 %i64) {
+entry:
+  %conv = sitofp i64 %i64 to float
+  ret float %conv
+}
+
+; CHECK-LABEL: i64tos
+; CHECK: bl __i64tos
+
+define arm_aapcs_vfpcc float @u64tos(i64 %u64) {
+entry:
+  %conv = uitofp i64 %u64 to float
+  ret float %conv
+}
+
+; CHECK-LABEL: u64tos
+; CHECK: bl __u64tos
+
+define arm_aapcs_vfpcc i64 @dtoi64(double %d) {
+entry:
+  %conv = fptosi double %d to i64
+  ret i64 %conv
+}
+
+; CHECK-LABEL: dtoi64
+; CHECK: bl __dtoi64
+
+define arm_aapcs_vfpcc i64 @dtou64(double %d) {
+entry:
+  %conv = fptoui double %d to i64
+  ret i64 %conv
+}
+
+; CHECK-LABEL: dtou64
+; CHECK: bl __dtou64
+
+define arm_aapcs_vfpcc double @i64tod(i64 %i64) {
+entry:
+  %conv = sitofp i64 %i64 to double
+  ret double %conv
+}
+
+; CHECK-LABEL: i64tod
+; CHECK: bl __i64tod
+
+define arm_aapcs_vfpcc double @u64tod(i64 %i64) {
+entry:
+  %conv = uitofp i64 %i64 to double
+  ret double %conv
+}
+
+; CHECK-LABEL: u64tod
+; CHECK: bl __u64tod
+
diff --git a/test/CodeGen/ARM/Windows/memset.ll b/test/CodeGen/ARM/Windows/memset.ll
new file mode 100644
index 0000000..500e25e
--- /dev/null
+++ b/test/CodeGen/ARM/Windows/memset.ll
@@ -0,0 +1,18 @@
+; RUN: llc -mtriple thumbv7--windows-itanium -filetype asm -o - %s | FileCheck %s
+
+@source = common global [512 x i8] zeroinitializer, align 4
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+
+define void @function() {
+entry:
+  call void @llvm.memset.p0i8.i32(i8* bitcast ([512 x i8]* @source to i8*), i8 0, i32 512, i32 0, i1 false)
+  unreachable
+}
+
+; CHECK: movw r0, :lower16:source
+; CHECK: movt r0, :upper16:source
+; CHECK: movs r1, #0
+; CHECK: mov.w r2, #512
+; CHECK: memset
+
diff --git a/test/CodeGen/ARM/Windows/mov32t-bundling.ll b/test/CodeGen/ARM/Windows/mov32t-bundling.ll
new file mode 100644
index 0000000..5f83837
--- /dev/null
+++ b/test/CodeGen/ARM/Windows/mov32t-bundling.ll
@@ -0,0 +1,28 @@
+; RUN: llc -mtriple thumbv7-windows-itanium -filetype asm -o - %s | FileCheck %s
+
+@_begin = external global i8
+@_end = external global i8
+
+declare arm_aapcs_vfpcc void @force_emission()
+
+define arm_aapcs_vfpcc void @bundle() {
+entry:
+  br i1 icmp uge (i32 sub (i32 ptrtoint (i8* @_end to i32), i32 ptrtoint (i8* @_begin to i32)), i32 4), label %if.then, label %if.end
+
+if.then:
+  tail call arm_aapcs_vfpcc void @force_emission()
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+; CHECK-LABEL: bundle
+; CHECK-NOT: subs r0, r1, r0
+; CHECK: movw r0, :lower16:_begin
+; CHECK-NEXT: movt r0, :upper16:_begin
+; CHECK-NEXT: movw r1, :lower16:_end
+; CHECK-NEXT: movt r1, :upper16:_end
+; CHECK-NEXT: subs r0, r1, r0
+; CHECK-NEXT: cmp r0, #4
+
diff --git a/test/CodeGen/ARM/Windows/movw-movt-relocations.ll b/test/CodeGen/ARM/Windows/movw-movt-relocations.ll
new file mode 100644
index 0000000..3ae6428
--- /dev/null
+++ b/test/CodeGen/ARM/Windows/movw-movt-relocations.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mtriple=thumbv7-windows -o - %s \
+; RUN:   | FileCheck %s -check-prefix CHECK-WINDOWS
+
+; RUN: llc -mtriple=thumbv7-eabi -o - %s \
+; RUN:   | FileCheck %s -check-prefix CHECK-EABI
+
+@i = common global i32 0, align 4
+@j = common global i32 0, align 4
+
+; Function Attrs: nounwind optsize readonly
+define i32 @relocation(i32 %j, i32 %k) {
+entry:
+  %0 = load i32* @i, align 4
+  %1 = load i32* @j, align 4
+  %add = add nsw i32 %1, %0
+  ret i32 %add
+}
+
+; CHECK-WINDOWS: movw r[[i:[0-4]]], :lower16:i
+; CHECK-WINDOWS-NEXT: movt r[[i]], :upper16:i
+; CHECK-WINDOWS: movw r[[j:[0-4]]], :lower16:j
+; CHECK-WINDOWS-NEXT: movt r[[j]], :upper16:j
+
+; CHECK-EABI: movw r[[i:[0-4]]], :lower16:i
+; CHECK-EABI: movw r[[j:[0-4]]], :lower16:j
+; CHECK-EABI-NEXT: movt r[[i]], :upper16:i
+; CHECK-EABI-NEXT: movt r[[j]], :upper16:j
diff --git a/test/CodeGen/ARM/Windows/no-aeabi.ll b/test/CodeGen/ARM/Windows/no-aeabi.ll
index 4c6676f..3971b9c 100644
--- a/test/CodeGen/ARM/Windows/no-aeabi.ll
+++ b/test/CodeGen/ARM/Windows/no-aeabi.ll
@@ -1,5 +1,27 @@
 ; RUN: llc -mtriple=thumbv7-windows-itanium -mcpu=cortex-a9 -o - %s | FileCheck %s
 
+declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+@source = common global [512 x i8] zeroinitializer, align 4
+@target = common global [512 x i8] zeroinitializer, align 4
+
+define void @move() nounwind {
+entry:
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* bitcast ([512 x i8]* @target to i8*), i8* bitcast ([512 x i8]* @source to i8*), i32 512, i32 0, i1 false)
+  unreachable
+}
+
+; CHECK-NOT: __aeabi_memmove
+
+define void @copy() nounwind {
+entry:
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([512 x i8]* @target to i8*), i8* bitcast ([512 x i8]* @source to i8*), i32 512, i32 0, i1 false)
+  unreachable
+}
+
+; CHECK-NOT: __aeabi_memcpy
+
 define i32 @divide(i32 %i, i32 %j) nounwind {
 entry:
   %quotient = sdiv i32 %i, %j
diff --git a/test/CodeGen/ARM/Windows/pic.ll b/test/CodeGen/ARM/Windows/pic.ll
new file mode 100644
index 0000000..28d371f
--- /dev/null
+++ b/test/CodeGen/ARM/Windows/pic.ll
@@ -0,0 +1,16 @@
+; RUN: llc -mtriple thumbv7-windows-itanium -relocation-model pic -filetype asm -o - %s \
+; RUN:    | FileCheck %s
+
+@external = external global i8
+
+define arm_aapcs_vfpcc i8 @return_external() {
+entry:
+  %0 = load i8* @external, align 1
+  ret i8 %0
+}
+
+; CHECK-LABEL: return_external
+; CHECK: movw r0, :lower16:external
+; CHECK: movt r0, :upper16:external
+; CHECK: ldrb r0, [r0]
+
diff --git a/test/CodeGen/ARM/Windows/read-only-data.ll b/test/CodeGen/ARM/Windows/read-only-data.ll
new file mode 100644
index 0000000..0ccb5ed
--- /dev/null
+++ b/test/CodeGen/ARM/Windows/read-only-data.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mtriple thumbv7-windows -filetype asm -o - %s | FileCheck %s
+
+@.str = private unnamed_addr constant [7 x i8] c"string\00", align 1
+
+declare arm_aapcs_vfpcc void @callee(i8*)
+
+define arm_aapcs_vfpcc void @function() {
+entry:
+  call arm_aapcs_vfpcc void @callee(i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0))
+  ret void
+}
+
+; CHECK: .section .rdata,"rd"
+; CHECK-NOT: .section ".rodata.str1.1"
+
diff --git a/test/CodeGen/ARM/aapcs-hfa-code.ll b/test/CodeGen/ARM/aapcs-hfa-code.ll
new file mode 100644
index 0000000..396e838
--- /dev/null
+++ b/test/CodeGen/ARM/aapcs-hfa-code.ll
@@ -0,0 +1,111 @@
+; RUN: llc < %s -mtriple=armv7-linux-gnueabihf -o - | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7em-none-eabi -mcpu=cortex-m4 | FileCheck %s --check-prefix=CHECK-M4F
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+define arm_aapcs_vfpcc void @test_1float({ float } %a) {
+  call arm_aapcs_vfpcc void @test_1float({ float } { float 1.0 })
+  ret void
+
+; CHECK-LABEL: test_1float:
+; CHECK-DAG: vmov.f32 s0, #1.{{0+}}e+00
+; CHECK: bl test_1float
+
+; CHECK-M4F-LABEL: test_1float:
+; CHECK-M4F-DAG: vmov.f32 s0, #1.{{0+}}e+00
+; CHECK-M4F: bl test_1float
+}
+
+define arm_aapcs_vfpcc void @test_2float({ float, float } %a) {
+  call arm_aapcs_vfpcc void @test_2float({ float, float } { float 1.0, float 2.0 })
+  ret void
+
+; CHECK-LABEL: test_2float:
+; CHECK-DAG: vmov.f32 s0, #1.{{0+}}e+00
+; CHECK-DAG: vmov.f32 s1, #2.{{0+}}e+00
+; CHECK: bl test_2float
+
+; CHECK-M4F-LABEL: test_2float:
+; CHECK-M4F-DAG: vmov.f32 s0, #1.{{0+}}e+00
+; CHECK-M4F-DAG: vmov.f32 s1, #2.{{0+}}e+00
+; CHECK-M4F: bl test_2float
+}
+
+define arm_aapcs_vfpcc void @test_3float({ float, float, float } %a) {
+  call arm_aapcs_vfpcc void @test_3float({ float, float, float } { float 1.0, float 2.0, float 3.0 })
+  ret void
+
+; CHECK-LABEL: test_3float:
+; CHECK-DAG: vmov.f32 s0, #1.{{0+}}e+00
+; CHECK-DAG: vmov.f32 s1, #2.{{0+}}e+00
+; CHECK-DAG: vmov.f32 s2, #3.{{0+}}e+00
+; CHECK: bl test_3float
+
+; CHECK-M4F-LABEL: test_3float:
+; CHECK-M4F-DAG: vmov.f32 s0, #1.{{0+}}e+00
+; CHECK-M4F-DAG: vmov.f32 s1, #2.{{0+}}e+00
+; CHECK-M4F-DAG: vmov.f32 s2, #3.{{0+}}e+00
+; CHECK-M4F: bl test_3float
+}
+
+define arm_aapcs_vfpcc void @test_1double({ double } %a) {
+; CHECK-LABEL: test_1double:
+; CHECK-DAG: vmov.f64 d0, #1.{{0+}}e+00
+; CHECK: bl test_1double
+
+; CHECK-M4F-LABEL: test_1double:
+; CHECK-M4F: movs [[ONEHI:r[0-9]+]], #0
+; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0
+; CHECK-M4F: movt [[ONEHI]], #16368
+; CHECK-M4F-DAG: vmov s0, [[ONELO]]
+; CHECK-M4F-DAG: vmov s1, [[ONEHI]]
+; CHECK-M4F: bl test_1double
+
+  call arm_aapcs_vfpcc void @test_1double({ double } { double 1.0 })
+  ret void
+}
+
+; Final double argument might be put in s15 & [sp] if we're careless. It should
+; go all on the stack.
+define arm_aapcs_vfpcc void @test_1double_nosplit([4 x float], [4 x double], [3 x float], double %a) {
+; CHECK-LABEL: test_1double_nosplit:
+; CHECK-DAG: mov [[ONELO:r[0-9]+]], #0
+; CHECK-DAG: movw [[ONEHI:r[0-9]+]], #0
+; CHECK-DAG: movt [[ONEHI]], #16368
+; CHECK: strd [[ONELO]], [[ONEHI]], [sp]
+; CHECK: bl test_1double_nosplit
+
+; CHECK-M4F-LABEL: test_1double_nosplit:
+; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0
+; CHECK-M4F: movs [[ONEHI:r[0-9]+]], #0
+; CHECK-M4F: movt [[ONEHI]], #16368
+; CHECK-M4F-DAG: str [[ONELO]], [sp]
+; CHECK-M4F-DAG: str [[ONEHI]], [sp, #4]
+; CHECK-M4F: bl test_1double_nosplit
+  call arm_aapcs_vfpcc void @test_1double_nosplit([4 x float] undef, [4 x double] undef, [3 x float] undef, double 1.0)
+  ret void
+}
+
+; Final double argument might go at [sp, #4] if we're careless. Should go at
+; [sp, #8] to preserve alignment.
+define arm_aapcs_vfpcc void @test_1double_misaligned([4 x double], [4 x double], float, double) {
+  call arm_aapcs_vfpcc void @test_1double_misaligned([4 x double] undef, [4 x double] undef, float undef, double 1.0)
+
+; CHECK-LABEL: test_1double_misaligned:
+; CHECK-DAG: mov [[ONELO:r[0-9]+]], #0
+; CHECK-DAG: mov r[[BASE:[0-9]+]], sp
+; CHECK-DAG: movw [[ONEHI:r[0-9]+]], #0
+; CHECK-DAG: movt [[ONEHI]], #16368
+; CHECK-DAG: str [[ONELO]], [r[[BASE]], #8]!
+; CHECK-DAG: str [[ONEHI]], [r[[BASE]], #4]
+
+; CHECK-M4F-LABEL: test_1double_misaligned:
+; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0
+; CHECK-M4F: movs [[ONEHI:r[0-9]+]], #0
+; CHECK-M4F: movt [[ONEHI]], #16368
+; CHECK-M4F-DAG: str [[ONELO]], [sp, #8]
+; CHECK-M4F-DAG: str [[ONEHI]], [sp, #12]
+; CHECK-M4F: bl test_1double_misaligned
+
+  ret void
+}
diff --git a/test/CodeGen/ARM/aapcs-hfa.ll b/test/CodeGen/ARM/aapcs-hfa.ll
new file mode 100644
index 0000000..6448e00
--- /dev/null
+++ b/test/CodeGen/ARM/aapcs-hfa.ll
@@ -0,0 +1,164 @@
+; RUN: llc < %s -float-abi=hard -debug-only arm-isel 2>&1 | FileCheck %s
+; RUN: llc < %s -float-abi=soft -debug-only arm-isel 2>&1 | FileCheck %s --check-prefix=SOFT
+; REQUIRES: asserts
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+target triple = "armv7-none--eabi"
+
+; SOFT-NOT: isHA
+
+; CHECK: isHA: 1 { float }
+define void @f0b({ float } %a) {
+  ret void
+}
+
+; CHECK: isHA: 1 { float, float }
+define void @f1({ float, float } %a) {
+  ret void
+}
+
+; CHECK: isHA: 1 { float, float, float }
+define void @f1b({ float, float, float } %a) {
+  ret void
+}
+
+; CHECK: isHA: 1 { float, float, float, float }
+define void @f1c({ float, float, float, float } %a) {
+  ret void
+}
+
+; CHECK: isHA: 0 { float, float, float, float, float }
+define void @f2({ float, float, float, float, float } %a) {
+  ret void
+}
+
+; CHECK: isHA: 1 { double }
+define void @f3({ double } %a) {
+  ret void
+}
+
+; CHECK: isHA: 1 { double, double, double, double }
+define void @f4({ double, double, double, double } %a) {
+  ret void
+}
+
+; CHECK: isHA: 0 { double, double, double, double, double }
+define void @f5({ double, double, double, double, double } %a) {
+  ret void
+}
+
+; CHECK: isHA: 0 { i32, i32 }
+define void @f5b({ i32, i32 } %a) {
+  ret void
+}
+
+; CHECK: isHA: 1 { [1 x float] }
+define void @f6({ [1 x float] } %a) {
+  ret void
+}
+
+; CHECK: isHA: 1 { [4 x float] }
+define void @f7({ [4 x float] } %a) {
+  ret void
+}
+
+; CHECK: isHA: 0 { [5 x float] }
+define void @f8({ [5 x float] } %a) {
+  ret void
+}
+
+; CHECK: isHA: 1 [1 x float]
+define void @f6b([1 x float] %a) {
+  ret void
+}
+
+; CHECK: isHA: 1 [4 x float]
+define void @f7b([4 x float] %a) {
+  ret void
+}
+
+; CHECK: isHA: 0 [5 x float]
+define void @f8b([5 x float] %a) {
+  ret void
+}
+
+; CHECK: isHA: 1 { [2 x float], [2 x float] }
+define void @f9({ [2 x float], [2 x float] } %a) {
+  ret void
+}
+
+; CHECK: isHA: 1 { [1 x float], [3 x float] }
+define void @f9b({ [1 x float], [3 x float] } %a) {
+  ret void
+}
+
+; CHECK: isHA: 0 { [3 x float], [3 x float] }
+define void @f10({ [3 x float], [3 x float] } %a) {
+  ret void
+}
+
+; CHECK: isHA: 1 { <2 x float> }
+define void @f11({ <2 x float>  } %a) {
+  ret void
+}
+
+; CHECK: isHA: 0 { <3 x float> }
+define void @f12({ <3 x float>  } %a) {
+  ret void
+}
+
+; CHECK: isHA: 1 { <4 x float> }
+define void @f13({ <4 x float>  } %a) {
+  ret void
+}
+
+; CHECK: isHA: 1 { <2 x float>, <2 x float> }
+define void @f15({ <2 x float>, <2 x float>  } %a) {
+  ret void
+}
+
+; CHECK: isHA: 0 { <2 x float>, float }
+define void @f15b({ <2 x float>, float  } %a) {
+  ret void
+}
+
+; CHECK: isHA: 0 { <2 x float>, [2 x float] }
+define void @f15c({ <2 x float>, [2 x float]  } %a) {
+  ret void
+}
+
+; CHECK: isHA: 0 { <2 x float>, <4 x float> }
+define void @f16({ <2 x float>, <4 x float>  } %a) {
+  ret void
+}
+
+; CHECK: isHA: 1 { <2 x double> }
+define void @f17({ <2 x double>  } %a) {
+  ret void
+}
+
+; CHECK: isHA: 1 { <2 x i32> }
+define void @f18({ <2 x i32>  } %a) {
+  ret void
+}
+
+; CHECK: isHA: 1 { <2 x i64>, <4 x i32> }
+define void @f19({ <2 x i64>, <4 x i32> } %a) {
+  ret void
+}
+
+; CHECK: isHA: 1 { [4 x <4 x float>] }
+define void @f20({ [4 x <4 x float>]  } %a) {
+  ret void
+}
+
+; CHECK: isHA: 0 { [5 x <4 x float>] }
+define void @f21({ [5 x <4 x float>]  } %a) {
+  ret void
+}
+
+; CHECK-NOT: isHA
+define void @f22({ float } %a, ...) {
+  ret void
+}
+
diff --git a/test/CodeGen/ARM/aliases.ll b/test/CodeGen/ARM/aliases.ll
index f55ae10..4de305b 100644
--- a/test/CodeGen/ARM/aliases.ll
+++ b/test/CodeGen/ARM/aliases.ll
@@ -29,7 +29,7 @@ define i32 @foo_f() {
 
 @bar_i = alias internal i32* @bar
 
-@A = alias bitcast (i32* @bar to i64*)
+@A = alias i64, i32* @bar
 
 define i32 @test() {
 entry:
diff --git a/test/CodeGen/ARM/argaddr.ll b/test/CodeGen/ARM/argaddr.ll
index 116a32f..40bc5e0 100644
--- a/test/CodeGen/ARM/argaddr.ll
+++ b/test/CodeGen/ARM/argaddr.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm
+; RUN: llc -mtriple=arm-eabi %s -o /dev/null
 
 define void @f(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
 entry:
diff --git a/test/CodeGen/ARM/atomic-64bit.ll b/test/CodeGen/ARM/atomic-64bit.ll
index a881d5f..9913f30 100644
--- a/test/CodeGen/ARM/atomic-64bit.ll
+++ b/test/CodeGen/ARM/atomic-64bit.ll
@@ -1,12 +1,16 @@
-; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv7-none-linux-gnueabihf -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-THUMB
+; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
+; RUN: llc < %s -mtriple=thumbv7-none-linux-gnueabihf | FileCheck %s --check-prefix=CHECK-THUMB --check-prefix=CHECK-THUMB-LE
+; RUN: llc < %s -mtriple=armebv7 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
+; RUN: llc < %s -mtriple=thumbebv7-none-linux-gnueabihf | FileCheck %s --check-prefix=CHECK-THUMB --check-prefix=CHECK-THUMB-BE
 
 define i64 @test1(i64* %ptr, i64 %val) {
 ; CHECK-LABEL: test1:
 ; CHECK: dmb {{ish$}}
 ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
-; CHECK: adds [[REG3:(r[0-9]?[02468])]], [[REG1]]
-; CHECK: adc [[REG4:(r[0-9]?[13579])]], [[REG2]]
+; CHECK-LE: adds [[REG3:(r[0-9]?[02468])]], [[REG1]]
+; CHECK-LE: adc [[REG4:(r[0-9]?[13579])]], [[REG2]]
+; CHECK-BE: adds [[REG4:(r[0-9]?[13579])]], [[REG2]]
+; CHECK-BE: adc [[REG3:(r[0-9]?[02468])]], [[REG1]]
 ; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK: cmp
 ; CHECK: bne
@@ -15,8 +19,10 @@ define i64 @test1(i64* %ptr, i64 %val) {
 ; CHECK-THUMB-LABEL: test1:
 ; CHECK-THUMB: dmb {{ish$}}
 ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
-; CHECK-THUMB: adds.w [[REG3:[a-z0-9]+]], [[REG1]]
-; CHECK-THUMB: adc.w [[REG4:[a-z0-9]+]], [[REG2]]
+; CHECK-THUMB-LE: adds.w [[REG3:[a-z0-9]+]], [[REG1]]
+; CHECK-THUMB-LE: adc.w [[REG4:[a-z0-9]+]], [[REG2]]
+; CHECK-THUMB-BE: adds.w [[REG4:[a-z0-9]+]], [[REG2]]
+; CHECK-THUMB-BE: adc.w [[REG3:[a-z0-9]+]], [[REG1]]
 ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK-THUMB: cmp
 ; CHECK-THUMB: bne
@@ -30,8 +36,10 @@ define i64 @test2(i64* %ptr, i64 %val) {
 ; CHECK-LABEL: test2:
 ; CHECK: dmb {{ish$}}
 ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
-; CHECK: subs [[REG3:(r[0-9]?[02468])]], [[REG1]]
-; CHECK: sbc [[REG4:(r[0-9]?[13579])]], [[REG2]]
+; CHECK-LE: subs [[REG3:(r[0-9]?[02468])]], [[REG1]]
+; CHECK-LE: sbc [[REG4:(r[0-9]?[13579])]], [[REG2]]
+; CHECK-BE: subs [[REG4:(r[0-9]?[13579])]], [[REG2]]
+; CHECK-BE: sbc [[REG3:(r[0-9]?[02468])]], [[REG1]]
 ; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK: cmp
 ; CHECK: bne
@@ -40,8 +48,10 @@ define i64 @test2(i64* %ptr, i64 %val) {
 ; CHECK-THUMB-LABEL: test2:
 ; CHECK-THUMB: dmb {{ish$}}
 ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
-; CHECK-THUMB: subs.w [[REG3:[a-z0-9]+]], [[REG1]]
-; CHECK-THUMB: sbc.w [[REG4:[a-z0-9]+]], [[REG2]]
+; CHECK-THUMB-LE: subs.w [[REG3:[a-z0-9]+]], [[REG1]]
+; CHECK-THUMB-LE: sbc.w [[REG4:[a-z0-9]+]], [[REG2]]
+; CHECK-THUMB-BE: subs.w [[REG4:[a-z0-9]+]], [[REG2]]
+; CHECK-THUMB-BE: sbc.w [[REG3:[a-z0-9]+]], [[REG1]]
 ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK-THUMB: cmp
 ; CHECK-THUMB: bne
@@ -55,8 +65,10 @@ define i64 @test3(i64* %ptr, i64 %val) {
 ; CHECK-LABEL: test3:
 ; CHECK: dmb {{ish$}}
 ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
-; CHECK-DAG: and [[REG3:(r[0-9]?[02468])]], [[REG1]]
-; CHECK-DAG: and [[REG4:(r[0-9]?[13579])]], [[REG2]]
+; CHECK-LE-DAG: and [[REG3:(r[0-9]?[02468])]], [[REG1]]
+; CHECK-LE-DAG: and [[REG4:(r[0-9]?[13579])]], [[REG2]]
+; CHECK-BE-DAG: and [[REG4:(r[0-9]?[13579])]], [[REG2]]
+; CHECK-BE-DAG: and [[REG3:(r[0-9]?[02468])]], [[REG1]]
 ; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK: cmp
 ; CHECK: bne
@@ -65,8 +77,10 @@ define i64 @test3(i64* %ptr, i64 %val) {
 ; CHECK-THUMB-LABEL: test3:
 ; CHECK-THUMB: dmb {{ish$}}
 ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
-; CHECK-THUMB-DAG: and.w [[REG3:[a-z0-9]+]], [[REG1]]
-; CHECK-THUMB-DAG: and.w [[REG4:[a-z0-9]+]], [[REG2]]
+; CHECK-THUMB-LE-DAG: and.w [[REG3:[a-z0-9]+]], [[REG1]]
+; CHECK-THUMB-LE-DAG: and.w [[REG4:[a-z0-9]+]], [[REG2]]
+; CHECK-THUMB-BE-DAG: and.w [[REG4:[a-z0-9]+]], [[REG2]]
+; CHECK-THUMB-BE-DAG: and.w [[REG3:[a-z0-9]+]], [[REG1]]
 ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK-THUMB: cmp
 ; CHECK-THUMB: bne
@@ -80,8 +94,10 @@ define i64 @test4(i64* %ptr, i64 %val) {
 ; CHECK-LABEL: test4:
 ; CHECK: dmb {{ish$}}
 ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
-; CHECK-DAG: orr [[REG3:(r[0-9]?[02468])]], [[REG1]]
-; CHECK-DAG: orr [[REG4:(r[0-9]?[13579])]], [[REG2]]
+; CHECK-LE-DAG: orr [[REG3:(r[0-9]?[02468])]], [[REG1]]
+; CHECK-LE-DAG: orr [[REG4:(r[0-9]?[13579])]], [[REG2]]
+; CHECK-BE-DAG: orr [[REG4:(r[0-9]?[13579])]], [[REG2]]
+; CHECK-BE-DAG: orr [[REG3:(r[0-9]?[02468])]], [[REG1]]
 ; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK: cmp
 ; CHECK: bne
@@ -90,8 +106,10 @@ define i64 @test4(i64* %ptr, i64 %val) {
 ; CHECK-THUMB-LABEL: test4:
 ; CHECK-THUMB: dmb {{ish$}}
 ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
-; CHECK-THUMB-DAG: orr.w [[REG3:[a-z0-9]+]], [[REG1]]
-; CHECK-THUMB-DAG: orr.w [[REG4:[a-z0-9]+]], [[REG2]]
+; CHECK-THUMB-LE-DAG: orr.w [[REG3:[a-z0-9]+]], [[REG1]]
+; CHECK-THUMB-LE-DAG: orr.w [[REG4:[a-z0-9]+]], [[REG2]]
+; CHECK-THUMB-BE-DAG: orr.w [[REG4:[a-z0-9]+]], [[REG2]]
+; CHECK-THUMB-BE-DAG: orr.w [[REG3:[a-z0-9]+]], [[REG1]]
 ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK-THUMB: cmp
 ; CHECK-THUMB: bne
@@ -105,8 +123,10 @@ define i64 @test5(i64* %ptr, i64 %val) {
 ; CHECK-LABEL: test5:
 ; CHECK: dmb {{ish$}}
 ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
-; CHECK-DAG: eor [[REG3:(r[0-9]?[02468])]], [[REG1]]
-; CHECK-DAG: eor [[REG4:(r[0-9]?[13579])]], [[REG2]]
+; CHECK-LE-DAG: eor [[REG3:(r[0-9]?[02468])]], [[REG1]]
+; CHECK-LE-DAG: eor [[REG4:(r[0-9]?[13579])]], [[REG2]]
+; CHECK-BE-DAG: eor [[REG4:(r[0-9]?[13579])]], [[REG2]]
+; CHECK-BE-DAG: eor [[REG3:(r[0-9]?[02468])]], [[REG1]]
 ; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK: cmp
 ; CHECK: bne
@@ -115,8 +135,10 @@ define i64 @test5(i64* %ptr, i64 %val) {
 ; CHECK-THUMB-LABEL: test5:
 ; CHECK-THUMB: dmb {{ish$}}
 ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
-; CHECK-THUMB-DAG: eor.w [[REG3:[a-z0-9]+]], [[REG1]]
-; CHECK-THUMB-DAG: eor.w [[REG4:[a-z0-9]+]], [[REG2]]
+; CHECK-THUMB-LE-DAG: eor.w [[REG3:[a-z0-9]+]], [[REG1]]
+; CHECK-THUMB-LE-DAG: eor.w [[REG4:[a-z0-9]+]], [[REG2]]
+; CHECK-THUMB-BE-DAG: eor.w [[REG4:[a-z0-9]+]], [[REG2]]
+; CHECK-THUMB-BE-DAG: eor.w [[REG3:[a-z0-9]+]], [[REG1]]
 ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK-THUMB: cmp
 ; CHECK-THUMB: bne
@@ -151,8 +173,10 @@ define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) {
 ; CHECK-LABEL: test7:
 ; CHECK: dmb {{ish$}}
 ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
-; CHECK-DAG: eor     [[MISMATCH_LO:r[0-9]+]], [[REG1]], r1
-; CHECK-DAG: eor     [[MISMATCH_HI:r[0-9]+]], [[REG2]], r2
+; CHECK-LE-DAG: eor     [[MISMATCH_LO:r[0-9]+]], [[REG1]], r1
+; CHECK-LE-DAG: eor     [[MISMATCH_HI:r[0-9]+]], [[REG2]], r2
+; CHECK-BE-DAG: eor     [[MISMATCH_LO:r[0-9]+]], [[REG2]], r2
+; CHECK-BE-DAG: eor     [[MISMATCH_HI:r[0-9]+]], [[REG1]], r1
 ; CHECK: orrs    {{r[0-9]+}}, [[MISMATCH_LO]], [[MISMATCH_HI]]
 ; CHECK: bne
 ; CHECK: strexd {{[a-z0-9]+}}, {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}
@@ -163,8 +187,10 @@ define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) {
 ; CHECK-THUMB-LABEL: test7:
 ; CHECK-THUMB: dmb {{ish$}}
 ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
-; CHECK-THUMB-DAG: eor.w     [[MISMATCH_LO:[a-z0-9]+]], [[REG1]], r2
-; CHECK-THUMB-DAG: eor.w     [[MISMATCH_HI:[a-z0-9]+]], [[REG2]], r3
+; CHECK-THUMB-LE-DAG: eor.w     [[MISMATCH_LO:[a-z0-9]+]], [[REG1]], r2
+; CHECK-THUMB-LE-DAG: eor.w     [[MISMATCH_HI:[a-z0-9]+]], [[REG2]], r3
+; CHECK-THUMB-BE-DAG: eor.w     [[MISMATCH_HI:[a-z0-9]+]], [[REG1]]
+; CHECK-THUMB-BE-DAG: eor.w     [[MISMATCH_LO:[a-z0-9]+]], [[REG2]]
 ; CHECK-THUMB: orrs    [[MISMATCH_HI]], [[MISMATCH_LO]]
 ; CHECK-THUMB: bne
 ; CHECK-THUMB: strexd {{[a-z0-9]+}}, {{[a-z0-9]+}}, {{[a-z0-9]+}}
@@ -220,9 +246,11 @@ define i64 @test10(i64* %ptr, i64 %val) {
 ; CHECK: mov     [[CARRY_LO:[a-z0-9]+]], #0
 ; CHECK: mov     [[CARRY_HI:[a-z0-9]+]], #0
 ; CHECK: mov     [[OUT_HI:[a-z0-9]+]], r2
-; CHECK: cmp     [[REG1]], r1
+; CHECK-LE: cmp     [[REG1]], r1
+; CHECK-BE: cmp     [[REG2]], r2
 ; CHECK: movwls  [[CARRY_LO]], #1
-; CHECK: cmp     [[REG2]], r2
+; CHECK-LE: cmp     [[REG2]], r2
+; CHECK-BE: cmp     [[REG1]], r1
 ; CHECK: movwle  [[CARRY_HI]], #1
 ; CHECK: moveq   [[CARRY_HI]], [[CARRY_LO]]
 ; CHECK: cmp     [[CARRY_HI]], #0
@@ -237,11 +265,13 @@ define i64 @test10(i64* %ptr, i64 %val) {
 ; CHECK-THUMB-LABEL: test10:
 ; CHECK-THUMB: dmb {{ish$}}
 ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
-; CHECK-THUMB: mov.w     [[CARRY_LO:[a-z0-9]+]], #0
-; CHECK-THUMB: movs     [[CARRY_HI:[a-z0-9]+]], #0
-; CHECK-THUMB: cmp     [[REG1]], r2
+; CHECK-THUMB: mov.w     [[CARRY_LO:[a-z0-9]+|lr]], #0
+; CHECK-THUMB: movs     [[CARRY_HI:[a-z0-9]+|lr]], #0
+; CHECK-THUMB-LE: cmp     [[REG1]], r2
+; CHECK-THUMB-BE: cmp     [[REG2]], r3
 ; CHECK-THUMB: movls.w  [[CARRY_LO]], #1
-; CHECK-THUMB: cmp     [[REG2]], r3
+; CHECK-THUMB-LE: cmp     [[REG2]], r3
+; CHECK-THUMB-BE: cmp     [[REG1]], r2
 ; CHECK-THUMB: movle  [[CARRY_HI]], #1
 ; CHECK-THUMB: moveq   [[CARRY_HI]], [[CARRY_LO]]
 ; CHECK-THUMB: mov     [[OUT_HI:[a-z0-9]+]], r3
@@ -265,9 +295,11 @@ define i64 @test11(i64* %ptr, i64 %val) {
 ; CHECK: mov     [[CARRY_LO:[a-z0-9]+]], #0
 ; CHECK: mov     [[CARRY_HI:[a-z0-9]+]], #0
 ; CHECK: mov     [[OUT_HI:[a-z0-9]+]], r2
-; CHECK: cmp     [[REG1]], r1
+; CHECK-LE: cmp     [[REG1]], r1
+; CHECK-BE: cmp     [[REG2]], r2
 ; CHECK: movwls  [[CARRY_LO]], #1
-; CHECK: cmp     [[REG2]], r2
+; CHECK-LE: cmp     [[REG2]], r2
+; CHECK-BE: cmp     [[REG1]], r1
 ; CHECK: movwls  [[CARRY_HI]], #1
 ; CHECK: moveq   [[CARRY_HI]], [[CARRY_LO]]
 ; CHECK: cmp     [[CARRY_HI]], #0
@@ -279,15 +311,16 @@ define i64 @test11(i64* %ptr, i64 %val) {
 ; CHECK: bne
 ; CHECK: dmb {{ish$}}
 
-
 ; CHECK-THUMB-LABEL: test11:
 ; CHECK-THUMB: dmb {{ish$}}
 ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
 ; CHECK-THUMB: mov.w     [[CARRY_LO:[a-z0-9]+]], #0
 ; CHECK-THUMB: movs     [[CARRY_HI:[a-z0-9]+]], #0
-; CHECK-THUMB: cmp     [[REG1]], r2
+; CHECK-THUMB-LE: cmp     [[REG1]], r2
+; CHECK-THUMB-BE: cmp     [[REG2]], r3
 ; CHECK-THUMB: movls.w  [[CARRY_LO]], #1
-; CHECK-THUMB: cmp     [[REG2]], r3
+; CHECK-THUMB-LE: cmp     [[REG2]], r3
+; CHECK-THUMB-BE: cmp     [[REG1]], r2
 ; CHECK-THUMB: movls  [[CARRY_HI]], #1
 ; CHECK-THUMB: moveq   [[CARRY_HI]], [[CARRY_LO]]
 ; CHECK-THUMB: mov     [[OUT_HI:[a-z0-9]+]], r3
@@ -311,9 +344,11 @@ define i64 @test12(i64* %ptr, i64 %val) {
 ; CHECK: mov     [[CARRY_LO:[a-z0-9]+]], #0
 ; CHECK: mov     [[CARRY_HI:[a-z0-9]+]], #0
 ; CHECK: mov     [[OUT_HI:[a-z0-9]+]], r2
-; CHECK: cmp     [[REG1]], r1
+; CHECK-LE: cmp     [[REG1]], r1
+; CHECK-BE: cmp     [[REG2]], r2
 ; CHECK: movwhi  [[CARRY_LO]], #1
-; CHECK: cmp     [[REG2]], r2
+; CHECK-LE: cmp     [[REG2]], r2
+; CHECK-BE: cmp     [[REG1]], r1
 ; CHECK: movwgt  [[CARRY_HI]], #1
 ; CHECK: moveq   [[CARRY_HI]], [[CARRY_LO]]
 ; CHECK: cmp     [[CARRY_HI]], #0
@@ -330,9 +365,11 @@ define i64 @test12(i64* %ptr, i64 %val) {
 ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
 ; CHECK-THUMB: mov.w     [[CARRY_LO:[a-z0-9]+]], #0
 ; CHECK-THUMB: movs     [[CARRY_HI:[a-z0-9]+]], #0
-; CHECK-THUMB: cmp     [[REG1]], r2
+; CHECK-THUMB-LE: cmp     [[REG1]], r2
+; CHECK-THUMB-BE: cmp     [[REG2]], r3
 ; CHECK-THUMB: movhi.w  [[CARRY_LO]], #1
-; CHECK-THUMB: cmp     [[REG2]], r3
+; CHECK-THUMB-LE: cmp     [[REG2]], r3
+; CHECK-THUMB-BE: cmp     [[REG1]], r2
 ; CHECK-THUMB: movgt  [[CARRY_HI]], #1
 ; CHECK-THUMB: moveq   [[CARRY_HI]], [[CARRY_LO]]
 ; CHECK-THUMB: mov     [[OUT_HI:[a-z0-9]+]], r3
@@ -356,9 +393,11 @@ define i64 @test13(i64* %ptr, i64 %val) {
 ; CHECK: mov     [[CARRY_LO:[a-z0-9]+]], #0
 ; CHECK: mov     [[CARRY_HI:[a-z0-9]+]], #0
 ; CHECK: mov     [[OUT_HI:[a-z0-9]+]], r2
-; CHECK: cmp     [[REG1]], r1
+; CHECK-LE: cmp     [[REG1]], r1
+; CHECK-BE: cmp     [[REG2]], r2
 ; CHECK: movwhi  [[CARRY_LO]], #1
-; CHECK: cmp     [[REG2]], r2
+; CHECK-LE: cmp     [[REG2]], r2
+; CHECK-BE: cmp     [[REG1]], r1
 ; CHECK: movwhi  [[CARRY_HI]], #1
 ; CHECK: moveq   [[CARRY_HI]], [[CARRY_LO]]
 ; CHECK: cmp     [[CARRY_HI]], #0
@@ -375,9 +414,11 @@ define i64 @test13(i64* %ptr, i64 %val) {
 ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
 ; CHECK-THUMB: mov.w     [[CARRY_LO:[a-z0-9]+]], #0
 ; CHECK-THUMB: movs     [[CARRY_HI:[a-z0-9]+]], #0
-; CHECK-THUMB: cmp     [[REG1]], r2
+; CHECK-THUMB-LE: cmp     [[REG1]], r2
+; CHECK-THUMB-BE: cmp     [[REG2]], r3
 ; CHECK-THUMB: movhi.w  [[CARRY_LO]], #1
-; CHECK-THUMB: cmp     [[REG2]], r3
+; CHECK-THUMB-LE: cmp     [[REG2]], r3
+; CHECK-THUMB-BE: cmp     [[REG1]], r2
 ; CHECK-THUMB: movhi  [[CARRY_HI]], #1
 ; CHECK-THUMB: moveq   [[CARRY_HI]], [[CARRY_LO]]
 ; CHECK-THUMB: mov     [[OUT_HI:[a-z0-9]+]], r3
diff --git a/test/CodeGen/ARM/atomic-ops-v8.ll b/test/CodeGen/ARM/atomic-ops-v8.ll
index 7922e22..a39565e 100644
--- a/test/CodeGen/ARM/atomic-ops-v8.ll
+++ b/test/CodeGen/ARM/atomic-ops-v8.ll
@@ -1,5 +1,7 @@
-; RUN: llc -mtriple=armv8-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM
-; RUN: llc -mtriple=thumbv8-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-THUMB
+; RUN: llc -mtriple=armv8-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE --check-prefix=CHECK-ARM --check-prefix=CHECK-ARM-LE
+; RUN: llc -mtriple=armebv8-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE --check-prefix=CHECK-ARM --check-prefix=CHECK-ARM-BE
+; RUN: llc -mtriple=thumbv8-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE --check-prefix=CHECK-THUMB --check-prefix=CHECK-THUMB-LE
+; RUN: llc -mtriple=thumbebv8-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE --check-prefix=CHECK-THUMB --check-prefix=CHECK-THUMB-BE
 
 @var8 = global i8 0
 @var16 = global i16 0
@@ -87,8 +89,10 @@ define void @test_atomic_load_add_i64(i64 %offset) nounwind {
 ; CHECK: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
   ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
-; CHECK-NEXT: adds{{(\.w)?}} [[NEW1:r[0-9]+|lr]], r[[OLD1]], r0
-; CHECK-NEXT: adc{{(\.w)?}}  [[NEW2:r[0-9]+]], r[[OLD2]], r1
+; CHECK-LE-NEXT: adds{{(\.w)?}} [[NEW1:r[0-9]+|lr]], r[[OLD1]], r0
+; CHECK-LE-NEXT: adc{{(\.w)?}}  [[NEW2:r[0-9]+]], r[[OLD2]], r1
+; CHECK-BE-NEXT: adds{{(\.w)?}} [[NEW2:r[0-9]+|lr]], r[[OLD2]], r1
+; CHECK-BE-NEXT: adc{{(\.w)?}}  [[NEW1:r[0-9]+]], r[[OLD1]], r0
 ; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]]
 ; CHECK-NEXT: cmp [[STATUS]], #0
 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
@@ -181,8 +185,10 @@ define void @test_atomic_load_sub_i64(i64 %offset) nounwind {
 ; CHECK: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
   ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
-; CHECK-NEXT: subs{{(\.w)?}} [[NEW1:r[0-9]+|lr]], r[[OLD1]], r0
-; CHECK-NEXT: sbc{{(\.w)?}}  [[NEW2:r[0-9]+]], r[[OLD2]], r1
+; CHECK-LE-NEXT: subs{{(\.w)?}} [[NEW1:r[0-9]+|lr]], r[[OLD1]], r0
+; CHECK-LE-NEXT: sbc{{(\.w)?}}  [[NEW2:r[0-9]+]], r[[OLD2]], r1
+; CHECK-BE-NEXT: subs{{(\.w)?}} [[NEW2:r[0-9]+|lr]], r[[OLD2]], r1
+; CHECK-BE-NEXT: sbc{{(\.w)?}}  [[NEW1:r[0-9]+]], r[[OLD1]], r0
 ; CHECK-NEXT: stlexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]]
 ; CHECK-NEXT: cmp [[STATUS]], #0
 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
@@ -275,8 +281,10 @@ define void @test_atomic_load_and_i64(i64 %offset) nounwind {
 ; CHECK: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
   ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
-; CHECK-DAG: and{{(\.w)?}} [[NEW1:r[0-9]+]], r[[OLD1]], r0
-; CHECK-DAG: and{{(\.w)?}} [[NEW2:r[0-9]+|lr]], r[[OLD2]], r1
+; CHECK-LE-DAG: and{{(\.w)?}} [[NEW1:r[0-9]+|lr]], r[[OLD1]], r0
+; CHECK-LE-DAG: and{{(\.w)?}} [[NEW2:r[0-9]+|lr]], r[[OLD2]], r1
+; CHECK-BE-DAG: and{{(\.w)?}} [[NEW2:r[0-9]+|lr]], r[[OLD2]], r1
+; CHECK-BE-DAG: and{{(\.w)?}} [[NEW1:r[0-9]+|lr]], r[[OLD1]], r0
 ; CHECK: strexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]]
 ; CHECK-NEXT: cmp [[STATUS]], #0
 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
@@ -369,8 +377,10 @@ define void @test_atomic_load_or_i64(i64 %offset) nounwind {
 ; CHECK: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
   ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
-; CHECK-DAG: orr{{(\.w)?}} [[NEW1:r[0-9]+]], r[[OLD1]], r0
-; CHECK-DAG: orr{{(\.w)?}} [[NEW2:r[0-9]+|lr]], r[[OLD2]], r1
+; CHECK-LE-DAG: orr{{(\.w)?}} [[NEW1:r[0-9]+|lr]], r[[OLD1]], r0
+; CHECK-LE-DAG: orr{{(\.w)?}} [[NEW2:r[0-9]+|lr]], r[[OLD2]], r1
+; CHECK-BE-DAG: orr{{(\.w)?}} [[NEW2:r[0-9]+|lr]], r[[OLD2]], r1
+; CHECK-BE-DAG: orr{{(\.w)?}} [[NEW1:r[0-9]+|lr]], r[[OLD1]], r0
 ; CHECK: stlexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]]
 ; CHECK-NEXT: cmp [[STATUS]], #0
 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
@@ -463,8 +473,10 @@ define void @test_atomic_load_xor_i64(i64 %offset) nounwind {
 ; CHECK: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
   ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
-; CHECK-DAG: eor{{(\.w)?}} [[NEW1:r[0-9]+]], r[[OLD1]], r0
-; CHECK-DAG: eor{{(\.w)?}} [[NEW2:r[0-9]+|lr]], r[[OLD2]], r1
+; CHECK-LE-DAG: eor{{(\.w)?}} [[NEW1:r[0-9]+|lr]], r[[OLD1]], r0
+; CHECK-LE-DAG: eor{{(\.w)?}} [[NEW2:r[0-9]+|lr]], r[[OLD2]], r1
+; CHECK-BE-DAG: eor{{(\.w)?}} [[NEW2:r[0-9]+|lr]], r[[OLD2]], r1
+; CHECK-BE-DAG: eor{{(\.w)?}} [[NEW1:r[0-9]+|lr]], r[[OLD1]], r0
 ; CHECK: strexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]]
 ; CHECK-NEXT: cmp [[STATUS]], #0
 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
@@ -657,10 +669,14 @@ define void @test_atomic_load_min_i64(i64 %offset) nounwind {
   ; function there.
 ; CHECK-ARM: mov [[LOCARRY:r[0-9]+|lr]], #0
 ; CHECK-ARM: mov [[HICARRY:r[0-9]+|lr]], #0
-; CHECK-ARM: cmp [[OLD1]], r0
-; CHECK-ARM: movwls [[LOCARRY]], #1
-; CHECK-ARM: cmp [[OLD2]], r1
-; CHECK-ARM: movwle [[HICARRY]], #1
+; CHECK-ARM-LE: cmp [[OLD1]], r0
+; CHECK-ARM-LE: movwls [[LOCARRY]], #1
+; CHECK-ARM-LE: cmp [[OLD2]], r1
+; CHECK-ARM-LE: movwle [[HICARRY]], #1
+; CHECK-ARM-BE: cmp [[OLD2]], r1
+; CHECK-ARM-BE: movwls [[LOCARRY]], #1
+; CHECK-ARM-BE: cmp [[OLD1]], r0
+; CHECK-ARM-BE: movwle [[HICARRY]], #1
 ; CHECK-ARM: moveq [[HICARRY]], [[LOCARRY]]
 ; CHECK-ARM: cmp [[HICARRY]], #0
 ; CHECK-ARM: mov [[MINHI:r[0-9]+]], r1
@@ -771,10 +787,14 @@ define void @test_atomic_load_max_i64(i64 %offset) nounwind {
   ; function there.
 ; CHECK-ARM: mov [[LOCARRY:r[0-9]+|lr]], #0
 ; CHECK-ARM: mov [[HICARRY:r[0-9]+|lr]], #0
-; CHECK-ARM: cmp [[OLD1]], r0
-; CHECK-ARM: movwhi [[LOCARRY]], #1
-; CHECK-ARM: cmp [[OLD2]], r1
-; CHECK-ARM: movwgt [[HICARRY]], #1
+; CHECK-ARM-LE: cmp [[OLD1]], r0
+; CHECK-ARM-LE: movwhi [[LOCARRY]], #1
+; CHECK-ARM-LE: cmp [[OLD2]], r1
+; CHECK-ARM-LE: movwgt [[HICARRY]], #1
+; CHECK-ARM-BE: cmp [[OLD2]], r1
+; CHECK-ARM-BE: movwhi [[LOCARRY]], #1
+; CHECK-ARM-BE: cmp [[OLD1]], r0
+; CHECK-ARM-BE: movwgt [[HICARRY]], #1
 ; CHECK-ARM: moveq [[HICARRY]], [[LOCARRY]]
 ; CHECK-ARM: cmp [[HICARRY]], #0
 ; CHECK-ARM: mov [[MINHI:r[0-9]+]], r1
@@ -885,10 +905,14 @@ define void @test_atomic_load_umin_i64(i64 %offset) nounwind {
   ; function there.
 ; CHECK-ARM: mov [[LOCARRY:r[0-9]+|lr]], #0
 ; CHECK-ARM: mov [[HICARRY:r[0-9]+|lr]], #0
-; CHECK-ARM: cmp [[OLD1]], r0
-; CHECK-ARM: movwls [[LOCARRY]], #1
-; CHECK-ARM: cmp [[OLD2]], r1
-; CHECK-ARM: movwls [[HICARRY]], #1
+; CHECK-ARM-LE: cmp [[OLD1]], r0
+; CHECK-ARM-LE: movwls [[LOCARRY]], #1
+; CHECK-ARM-LE: cmp [[OLD2]], r1
+; CHECK-ARM-LE: movwls [[HICARRY]], #1
+; CHECK-ARM-BE: cmp [[OLD2]], r1
+; CHECK-ARM-BE: movwls [[LOCARRY]], #1
+; CHECK-ARM-BE: cmp [[OLD1]], r0
+; CHECK-ARM-BE: movwls [[HICARRY]], #1
 ; CHECK-ARM: moveq [[HICARRY]], [[LOCARRY]]
 ; CHECK-ARM: cmp [[HICARRY]], #0
 ; CHECK-ARM: mov [[MINHI:r[0-9]+]], r1
@@ -999,10 +1023,14 @@ define void @test_atomic_load_umax_i64(i64 %offset) nounwind {
   ; function there.
 ; CHECK-ARM: mov [[LOCARRY:r[0-9]+|lr]], #0
 ; CHECK-ARM: mov [[HICARRY:r[0-9]+|lr]], #0
-; CHECK-ARM: cmp [[OLD1]], r0
-; CHECK-ARM: movwhi [[LOCARRY]], #1
-; CHECK-ARM: cmp [[OLD2]], r1
-; CHECK-ARM: movwhi [[HICARRY]], #1
+; CHECK-ARM-LE: cmp [[OLD1]], r0
+; CHECK-ARM-LE: movwhi [[LOCARRY]], #1
+; CHECK-ARM-LE: cmp [[OLD2]], r1
+; CHECK-ARM-LE: movwhi [[HICARRY]], #1
+; CHECK-ARM-BE: cmp [[OLD2]], r1
+; CHECK-ARM-BE: movwhi [[LOCARRY]], #1
+; CHECK-ARM-BE: cmp [[OLD1]], r0
+; CHECK-ARM-BE: movwhi [[HICARRY]], #1
 ; CHECK-ARM: moveq [[HICARRY]], [[LOCARRY]]
 ; CHECK-ARM: cmp [[HICARRY]], #0
 ; CHECK-ARM: mov [[MINHI:r[0-9]+]], r1
@@ -1112,9 +1140,12 @@ define void @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
 ; CHECK: ldrexd [[OLD1:r[0-9]+|lr]], [[OLD2:r[0-9]+|lr]], [r[[ADDR]]]
   ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
-; CHECK-DAG: eor{{(\.w)?}} [[MISMATCH_LO:r[0-9]+|lr]], [[OLD1]], r0
-; CHECK-DAG: eor{{(\.w)?}} [[MISMATCH_HI:r[0-9]+|lr]], [[OLD2]], r1
-; CHECK: orrs{{(\.w)?}} {{r[0-9]+}}, [[MISMATCH_LO]], [[MISMATCH_HI]]
+; CHECK-LE-DAG: eor{{(\.w)?}} [[MISMATCH_LO:r[0-9]+|lr]], [[OLD1]], r0
+; CHECK-LE-DAG: eor{{(\.w)?}} [[MISMATCH_HI:r[0-9]+|lr]], [[OLD2]], r1
+; CHECK-LE: orrs{{(\.w)?}} {{r[0-9]+}}, [[MISMATCH_LO]], [[MISMATCH_HI]]
+; CHECK-BE-DAG: eor{{(\.w)?}} [[MISMATCH_HI:r[0-9]+|lr]], [[OLD2]], r1
+; CHECK-BE-DAG: eor{{(\.w)?}} [[MISMATCH_LO:r[0-9]+|lr]], [[OLD1]], r0
+; CHECK-BE: orrs{{(\.w)?}} {{r[0-9]+}}, [[MISMATCH_HI]], [[MISMATCH_LO]]
 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_3
 ; CHECK-NEXT: BB#2:
   ; As above, r2, r3 is a reasonable guess.
@@ -1151,7 +1182,8 @@ define i8 @test_atomic_load_monotonic_regoff_i8(i64 %base, i64 %off) nounwind {
   %val = load atomic i8* %addr monotonic, align 1
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
-; CHECK: ldrb r0, [r0, r2]
+; CHECK-LE: ldrb r0, [r0, r2]
+; CHECK-BE: ldrb r0, [r1, r3]
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
 
@@ -1218,7 +1250,8 @@ define i32 @test_atomic_load_monotonic_regoff_i32(i64 %base, i64 %off) nounwind
   %val = load atomic i32* %addr monotonic, align 4
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
-; CHECK: ldr r0, [r0, r2]
+; CHECK-LE: ldr r0, [r0, r2]
+; CHECK-BE: ldr r0, [r1, r3]
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
 
@@ -1259,8 +1292,10 @@ define void @test_atomic_store_monotonic_regoff_i8(i64 %base, i64 %off, i8 %val)
   %addr = inttoptr i64 %addr_int to i8*
 
   store atomic i8 %val, i8* %addr monotonic, align 1
-; CHECK: ldrb{{(\.w)?}} [[VAL:r[0-9]+]], [sp]
-; CHECK: strb [[VAL]], [r0, r2]
+; CHECK-LE: ldrb{{(\.w)?}} [[VAL:r[0-9]+]], [sp]
+; CHECK-LE: strb [[VAL]], [r0, r2]
+; CHECK-BE: ldrb{{(\.w)?}} [[VAL:r[0-9]+]], [sp, #3]
+; CHECK-BE: strb [[VAL]], [r1, r3]
 
   ret void
 }
@@ -1328,7 +1363,8 @@ define void @test_atomic_store_monotonic_regoff_i32(i64 %base, i64 %off, i32 %va
 ; CHECK: ldr [[VAL:r[0-9]+]], [sp]
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
-; CHECK: str [[VAL]], [r0, r2]
+; CHECK-LE: str [[VAL]], [r0, r2]
+; CHECK-BE: str [[VAL]], [r1, r3]
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
 
diff --git a/test/CodeGen/ARM/available_externally.ll b/test/CodeGen/ARM/available_externally.ll
index 0f646d5..d925b5c 100644
--- a/test/CodeGen/ARM/available_externally.ll
+++ b/test/CodeGen/ARM/available_externally.ll
@@ -11,6 +11,8 @@ define i32 @t1() {
 }
 
 ; CHECK:      L_A$non_lazy_ptr:
-; CHECK-NEXT: .long _A
+; CHECK-NEXT: .indirect_symbol _A
+; CHECK-NEXT: .long 0
 ; CHECK:      L_B$non_lazy_ptr:
-; CHECK-NEXT: .long _B
+; CHECK-NEXT: .indirect_symbol _B
+; CHECK-NEXT: .long 0
diff --git a/test/CodeGen/ARM/big-endian-eh-unwind.ll b/test/CodeGen/ARM/big-endian-eh-unwind.ll
new file mode 100644
index 0000000..630dfed
--- /dev/null
+++ b/test/CodeGen/ARM/big-endian-eh-unwind.ll
@@ -0,0 +1,73 @@
+; RUN: llc < %s -mtriple armeb-eabi -mattr v7 -filetype obj -o - | llvm-objdump -s - | FileCheck %s
+
+; ARM EHABI for big endian
+; This test case checks whether frame unwinding instructions are laid out in big endian format.
+; 
+; This is the LLVM assembly generated from following C++ code:
+;
+; extern void foo(int);
+; void test(int a, int b) {
+;   try {
+;   foo(a);
+; } catch (...) {
+;   foo(b);
+; }
+;}
+
+define void @_Z4testii(i32 %a, i32 %b) #0 {
+entry:
+  invoke void @_Z3fooi(i32 %a)
+          to label %try.cont unwind label %lpad
+
+lpad:                                             ; preds = %entry
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  %1 = extractvalue { i8*, i32 } %0, 0
+  %2 = tail call i8* @__cxa_begin_catch(i8* %1) #2
+  invoke void @_Z3fooi(i32 %b)
+          to label %invoke.cont2 unwind label %lpad1
+
+invoke.cont2:                                     ; preds = %lpad
+  tail call void @__cxa_end_catch()
+  br label %try.cont
+
+try.cont:                                         ; preds = %entry, %invoke.cont2
+  ret void
+
+lpad1:                                            ; preds = %lpad
+  %3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  invoke void @__cxa_end_catch()
+          to label %eh.resume unwind label %terminate.lpad
+
+eh.resume:                                        ; preds = %lpad1
+  resume { i8*, i32 } %3
+
+terminate.lpad:                                   ; preds = %lpad1
+  %4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  %5 = extractvalue { i8*, i32 } %4, 0
+  tail call void @__clang_call_terminate(i8* %5) #3
+  unreachable
+}
+
+declare void @_Z3fooi(i32) #0
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
+
+; Function Attrs: noinline noreturn nounwind
+define linkonce_odr hidden void @__clang_call_terminate(i8*) #1 {
+  %2 = tail call i8* @__cxa_begin_catch(i8* %0) #2
+  tail call void @_ZSt9terminatev() #3
+  unreachable
+}
+
+declare void @_ZSt9terminatev()
+
+; CHECK-LABEL: Contents of section .ARM.extab:
+; CHECK-NEXT: 0000 00000000 00a8b0b0
+
diff --git a/test/CodeGen/ARM/big-endian-neon-bitconv.ll b/test/CodeGen/ARM/big-endian-neon-bitconv.ll
new file mode 100644
index 0000000..427d2e7
--- /dev/null
+++ b/test/CodeGen/ARM/big-endian-neon-bitconv.ll
@@ -0,0 +1,392 @@
+; RUN: llc < %s -march armeb -mtriple arm-eabi -mattr v7,neon -float-abi soft -o - | FileCheck %s
+; RUN: llc < %s -march armeb -mtriple arm-eabi -mattr v7,neon -float-abi hard -o - | FileCheck %s -check-prefix CHECK-HARD
+
+@v2i64 = global <2 x i64> zeroinitializer
+@v2i32 = global <2 x i32> zeroinitializer
+@v4i32 = global <4 x i32> zeroinitializer
+@v4i16 = global <4 x i16> zeroinitializer
+@v8i16 = global <8 x i16> zeroinitializer
+@v8i8 = global <8 x i8> zeroinitializer
+@v16i8 = global <16 x i8> zeroinitializer
+
+@v2f32 = global <2 x float> zeroinitializer
+@v2f64 = global <2 x double> zeroinitializer
+@v4f32 = global <4 x float> zeroinitializer
+
+
+; 64 bit conversions
+define void @conv_i64_to_v8i8( i64 %val,  <8 x i8>* %store ) {
+; CHECK-LABEL: conv_i64_to_v8i8:
+; CHECK: vrev64.8
+  %v = bitcast i64 %val to <8 x i8>
+  %w = load <8 x i8>* @v8i8
+  %a = add <8 x i8> %v, %w
+  store <8 x i8> %a, <8 x i8>* %store
+  ret void
+}
+
+define void @conv_v8i8_to_i64( <8 x i8>* %load, <8 x i8>* %store ) {
+; CHECK-LABEL: conv_v8i8_to_i64:
+; CHECK: vrev64.8
+  %v = load <8 x i8>* %load
+  %w = load <8 x i8>* @v8i8
+  %a = add <8 x i8> %v, %w
+  %f = bitcast <8 x i8> %a to i64
+  call void @conv_i64_to_v8i8( i64 %f, <8 x i8>* %store )
+  ret void
+}
+
+define void @conv_i64_to_v4i16( i64 %val,  <4 x i16>* %store ) {
+; CHECK-LABEL: conv_i64_to_v4i16:
+; CHECK: vrev64.16
+  %v = bitcast i64 %val to <4 x i16>
+  %w = load <4 x i16>* @v4i16
+  %a = add <4 x i16> %v, %w
+  store <4 x i16> %a, <4 x i16>* %store
+  ret void
+}
+
+define void @conv_v4i16_to_i64( <4 x i16>* %load, <4 x i16>* %store ) {
+; CHECK-LABEL: conv_v4i16_to_i64:
+; CHECK: vrev64.16
+  %v = load <4 x i16>* %load
+  %w = load <4 x i16>* @v4i16
+  %a = add <4 x i16> %v, %w
+  %f = bitcast <4 x i16> %a to i64
+  call void @conv_i64_to_v4i16( i64 %f, <4 x i16>* %store )
+  ret void
+}
+
+define void @conv_i64_to_v2i32( i64 %val,  <2 x i32>* %store ) {
+; CHECK-LABEL: conv_i64_to_v2i32:
+; CHECK: vrev64.32
+  %v = bitcast i64 %val to <2 x i32>
+  %w = load <2 x i32>* @v2i32
+  %a = add <2 x i32> %v, %w
+  store <2 x i32> %a, <2 x i32>* %store
+  ret void
+}
+
+define void @conv_v2i32_to_i64( <2 x i32>* %load, <2 x i32>* %store ) {
+; CHECK-LABEL: conv_v2i32_to_i64:
+; CHECK: vrev64.32
+  %v = load <2 x i32>* %load
+  %w = load <2 x i32>* @v2i32
+  %a = add <2 x i32> %v, %w
+  %f = bitcast <2 x i32> %a to i64
+  call void @conv_i64_to_v2i32( i64 %f, <2 x i32>* %store )
+  ret void
+}
+
+define void @conv_i64_to_v2f32( i64 %val,  <2 x float>* %store ) {
+; CHECK-LABEL: conv_i64_to_v2f32:
+; CHECK: vrev64.32
+  %v = bitcast i64 %val to <2 x float>
+  %w = load <2 x float>* @v2f32
+  %a = fadd <2 x float> %v, %w
+  store <2 x float> %a, <2 x float>* %store
+  ret void
+}
+
+define void @conv_v2f32_to_i64( <2 x float>* %load, <2 x float>* %store ) {
+; CHECK-LABEL: conv_v2f32_to_i64:
+; CHECK: vrev64.32
+  %v = load <2 x float>* %load
+  %w = load <2 x float>* @v2f32
+  %a = fadd <2 x float> %v, %w
+  %f = bitcast <2 x float> %a to i64
+  call void @conv_i64_to_v2f32( i64 %f, <2 x float>* %store )
+  ret void
+}
+
+define void @conv_f64_to_v8i8( double %val,  <8 x i8>* %store ) {
+; CHECK-LABEL: conv_f64_to_v8i8:
+; CHECK: vrev64.8
+  %v = bitcast double %val to <8 x i8>
+  %w = load <8 x i8>* @v8i8
+  %a = add <8 x i8> %v, %w
+  store <8 x i8> %a, <8 x i8>* %store
+  ret void
+}
+
+define void @conv_v8i8_to_f64( <8 x i8>* %load, <8 x i8>* %store ) {
+; CHECK-LABEL: conv_v8i8_to_f64:
+; CHECK: vrev64.8
+  %v = load <8 x i8>* %load
+  %w = load <8 x i8>* @v8i8
+  %a = add <8 x i8> %v, %w
+  %f = bitcast <8 x i8> %a to double
+  call void @conv_f64_to_v8i8( double %f, <8 x i8>* %store )
+  ret void
+}
+
+define void @conv_f64_to_v4i16( double %val,  <4 x i16>* %store ) {
+; CHECK-LABEL: conv_f64_to_v4i16:
+; CHECK: vrev64.16
+  %v = bitcast double %val to <4 x i16>
+  %w = load <4 x i16>* @v4i16
+  %a = add <4 x i16> %v, %w
+  store <4 x i16> %a, <4 x i16>* %store
+  ret void
+}
+
+define void @conv_v4i16_to_f64( <4 x i16>* %load, <4 x i16>* %store ) {
+; CHECK-LABEL: conv_v4i16_to_f64:
+; CHECK: vrev64.16
+  %v = load <4 x i16>* %load
+  %w = load <4 x i16>* @v4i16
+  %a = add <4 x i16> %v, %w
+  %f = bitcast <4 x i16> %a to double
+  call void @conv_f64_to_v4i16( double %f, <4 x i16>* %store )
+  ret void
+}
+
+define void @conv_f64_to_v2i32( double %val,  <2 x i32>* %store ) {
+; CHECK-LABEL: conv_f64_to_v2i32:
+; CHECK: vrev64.32
+  %v = bitcast double %val to <2 x i32>
+  %w = load <2 x i32>* @v2i32
+  %a = add <2 x i32> %v, %w
+  store <2 x i32> %a, <2 x i32>* %store
+  ret void
+}
+
+define void @conv_v2i32_to_f64( <2 x i32>* %load, <2 x i32>* %store ) {
+; CHECK-LABEL: conv_v2i32_to_f64:
+; CHECK: vrev64.32
+  %v = load <2 x i32>* %load
+  %w = load <2 x i32>* @v2i32
+  %a = add <2 x i32> %v, %w
+  %f = bitcast <2 x i32> %a to double
+  call void @conv_f64_to_v2i32( double %f, <2 x i32>* %store )
+  ret void
+}
+
+define void @conv_f64_to_v2f32( double %val,  <2 x float>* %store ) {
+; CHECK-LABEL: conv_f64_to_v2f32:
+; CHECK: vrev64.32
+  %v = bitcast double %val to <2 x float>
+  %w = load <2 x float>* @v2f32
+  %a = fadd <2 x float> %v, %w
+  store <2 x float> %a, <2 x float>* %store
+  ret void
+}
+
+define void @conv_v2f32_to_f64( <2 x float>* %load, <2 x float>* %store ) {
+; CHECK-LABEL: conv_v2f32_to_f64:
+; CHECK: vrev64.32
+  %v = load <2 x float>* %load
+  %w = load <2 x float>* @v2f32
+  %a = fadd <2 x float> %v, %w
+  %f = bitcast <2 x float> %a to double
+  call void @conv_f64_to_v2f32( double %f, <2 x float>* %store )
+  ret void
+}
+
+; 128 bit conversions
+
+
+define void @conv_i128_to_v16i8( i128 %val,  <16 x i8>* %store ) {
+; CHECK-LABEL: conv_i128_to_v16i8:
+; CHECK: vrev32.8
+  %v = bitcast i128 %val to <16 x i8>
+  %w = load  <16 x i8>* @v16i8
+  %a = add <16 x i8> %v, %w
+  store <16 x i8> %a, <16 x i8>* %store
+  ret void
+}
+
+define void @conv_v16i8_to_i128( <16 x i8>* %load, <16 x i8>* %store ) {
+; CHECK-LABEL: conv_v16i8_to_i128:
+; CHECK: vrev32.8
+  %v = load <16 x i8>* %load
+  %w = load <16 x i8>* @v16i8
+  %a = add <16 x i8> %v, %w
+  %f = bitcast <16 x i8> %a to i128
+  call void @conv_i128_to_v16i8( i128 %f, <16 x i8>* %store )
+  ret void
+}
+
+define void @conv_i128_to_v8i16( i128 %val,  <8 x i16>* %store ) {
+; CHECK-LABEL: conv_i128_to_v8i16:
+; CHECK: vrev32.16
+  %v = bitcast i128 %val to <8 x i16>
+  %w = load  <8 x i16>* @v8i16
+  %a = add <8 x i16> %v, %w
+  store <8 x i16> %a, <8 x i16>* %store
+  ret void
+}
+
+define void @conv_v8i16_to_i128( <8 x i16>* %load, <8 x i16>* %store ) {
+; CHECK-LABEL: conv_v8i16_to_i128:
+; CHECK: vrev32.16
+  %v = load <8 x i16>* %load
+  %w = load <8 x i16>* @v8i16
+  %a = add <8 x i16> %v, %w
+  %f = bitcast <8 x i16> %a to i128
+  call void @conv_i128_to_v8i16( i128 %f, <8 x i16>* %store )
+  ret void
+}
+
+define void @conv_i128_to_v4i32( i128 %val,  <4 x i32>* %store ) {
+; CHECK-LABEL: conv_i128_to_v4i32:
+; CHECK: vrev64.32
+  %v = bitcast i128 %val to <4 x i32>
+  %w = load <4 x i32>* @v4i32
+  %a = add <4 x i32> %v, %w
+  store <4 x i32> %a, <4 x i32>* %store
+  ret void
+}
+
+define void @conv_v4i32_to_i128( <4 x i32>* %load, <4 x i32>* %store ) {
+; CHECK-LABEL: conv_v4i32_to_i128:
+; CHECK: vrev64.32
+  %v = load <4 x i32>* %load
+  %w = load <4 x i32>* @v4i32
+  %a = add <4 x i32> %v, %w
+  %f = bitcast <4 x i32> %a to i128
+  call void @conv_i128_to_v4i32( i128 %f, <4 x i32>* %store )
+  ret void
+}
+
+define void @conv_i128_to_v4f32( i128 %val,  <4 x float>* %store ) {
+; CHECK-LABEL: conv_i128_to_v4f32:
+; CHECK: vrev64.32
+  %v = bitcast i128 %val to <4 x float>
+  %w = load <4 x float>* @v4f32
+  %a = fadd <4 x float> %v, %w
+  store <4 x float> %a, <4 x float>* %store
+  ret void
+}
+
+define void @conv_v4f32_to_i128( <4 x float>* %load, <4 x float>* %store ) {
+; CHECK-LABEL: conv_v4f32_to_i128:
+; CHECK: vrev64.32
+  %v = load <4 x float>* %load
+  %w = load <4 x float>* @v4f32
+  %a = fadd <4 x float> %v, %w
+  %f = bitcast <4 x float> %a to i128
+  call void @conv_i128_to_v4f32( i128 %f, <4 x float>* %store )
+  ret void
+}
+
+define void @conv_f128_to_v2f64( fp128 %val,  <2 x double>* %store ) {
+; CHECK-LABEL: conv_f128_to_v2f64:
+; CHECK: vrev64.32
+  %v = bitcast fp128 %val to <2 x double>
+  %w = load <2 x double>* @v2f64
+  %a = fadd <2 x double> %v, %w
+  store <2 x double> %a, <2 x double>* %store
+  ret void
+}
+
+define void @conv_v2f64_to_f128( <2 x double>* %load, <2 x double>* %store ) {
+; CHECK-LABEL: conv_v2f64_to_f128:
+; CHECK: vrev64.32
+  %v = load <2 x double>* %load
+  %w = load <2 x double>* @v2f64
+  %a = fadd <2 x double> %v, %w
+  %f = bitcast <2 x double> %a to fp128
+  call void @conv_f128_to_v2f64( fp128 %f, <2 x double>* %store )
+  ret void
+}
+
+define void @conv_f128_to_v16i8( fp128 %val,  <16 x i8>* %store ) {
+; CHECK-LABEL: conv_f128_to_v16i8:
+; CHECK: vrev32.8
+  %v = bitcast fp128 %val to <16 x i8>
+  %w = load  <16 x i8>* @v16i8
+  %a = add <16 x i8> %v, %w
+  store <16 x i8> %a, <16 x i8>* %store
+  ret void
+}
+
+define void @conv_v16i8_to_f128( <16 x i8>* %load, <16 x i8>* %store ) {
+; CHECK-LABEL: conv_v16i8_to_f128:
+; CHECK: vrev32.8
+  %v = load <16 x i8>* %load
+  %w = load <16 x i8>* @v16i8
+  %a = add <16 x i8> %v, %w
+  %f = bitcast <16 x i8> %a to fp128
+  call void @conv_f128_to_v16i8( fp128 %f, <16 x i8>* %store )
+  ret void
+}
+
+define void @conv_f128_to_v8i16( fp128 %val,  <8 x i16>* %store ) {
+; CHECK-LABEL: conv_f128_to_v8i16:
+; CHECK: vrev32.16
+  %v = bitcast fp128 %val to <8 x i16>
+  %w = load  <8 x i16>* @v8i16
+  %a = add <8 x i16> %v, %w
+  store <8 x i16> %a, <8 x i16>* %store
+  ret void
+}
+
+define void @conv_v8i16_to_f128( <8 x i16>* %load, <8 x i16>* %store ) {
+; CHECK-LABEL: conv_v8i16_to_f128:
+; CHECK: vrev32.16
+  %v = load <8 x i16>* %load
+  %w = load <8 x i16>* @v8i16
+  %a = add <8 x i16> %v, %w
+  %f = bitcast <8 x i16> %a to fp128
+  call void @conv_f128_to_v8i16( fp128 %f, <8 x i16>* %store )
+  ret void
+}
+
+define void @conv_f128_to_v4f32( fp128 %val,  <4 x float>* %store ) {
+; CHECK-LABEL: conv_f128_to_v4f32:
+; CHECK: vrev64.32
+  %v = bitcast fp128 %val to <4 x float>
+  %w = load <4 x float>* @v4f32
+  %a = fadd <4 x float> %v, %w
+  store <4 x float> %a, <4 x float>* %store
+  ret void
+}
+
+define void @conv_v4f32_to_f128( <4 x float>* %load, <4 x float>* %store ) {
+; CHECK-LABEL: conv_v4f32_to_f128:
+; CHECK: vrev64.32
+  %v = load <4 x float>* %load
+  %w = load <4 x float>* @v4f32
+  %a = fadd <4 x float> %v, %w
+  %f = bitcast <4 x float> %a to fp128
+  call void @conv_f128_to_v4f32( fp128 %f, <4 x float>* %store )
+  ret void
+}
+
+define void @arg_v4i32( <4 x i32> %var, <4 x i32>* %store ) {
+; CHECK-LABEL: arg_v4i32:
+; CHECK: vmov   [[REG2:d[0-9]+]], r3, r2
+; CHECK: vmov   [[REG1:d[0-9]+]], r1, r0
+; CHECK: vst1.64 {[[REG1]], [[REG2]]},
+; CHECK-HARD-LABEL: arg_v4i32:
+; CHECK-HARD-NOT: vmov
+; CHECK-HARD: vst1.64 {d0, d1}
+  store <4 x i32> %var, <4 x i32>* %store
+  ret void
+}
+
+define void @arg_v8i16( <8 x i16> %var, <8 x i16>* %store ) {
+; CHECK-LABEL: arg_v8i16:
+; CHECK: vmov   [[REG2:d[0-9]+]], r3, r2
+; CHECK: vmov   [[REG1:d[0-9]+]], r1, r0
+; CHECK: vst1.64 {[[REG1]], [[REG2]]},
+; CHECK-HARD-LABEL: arg_v8i16:
+; CHECK-HARD-NOT: vmov
+; CHECK-HARD: vst1.64 {d0, d1}
+  store <8 x i16> %var, <8 x i16>* %store
+  ret void
+}
+
+define void @arg_v16i8( <16 x i8> %var, <16 x i8>* %store ) {
+; CHECK-LABEL: arg_v16i8:
+; CHECK: vmov   [[REG2:d[0-9]+]], r3, r2
+; CHECK: vmov   [[REG1:d[0-9]+]], r1, r0
+; CHECK: vst1.64 {[[REG1]], [[REG2]]},
+; CHECK-HARD-LABEL: arg_v16i8:
+; CHECK-HARD-NOT: vmov
+; CHECK-HARD: vst1.64 {d0, d1}
+  store <16 x i8> %var, <16 x i8>* %store
+  ret void
+}
+
diff --git a/test/CodeGen/ARM/big-endian-vector-callee.ll b/test/CodeGen/ARM/big-endian-vector-callee.ll
new file mode 100644
index 0000000..4db8bde
--- /dev/null
+++ b/test/CodeGen/ARM/big-endian-vector-callee.ll
@@ -0,0 +1,1172 @@
+; RUN: llc -mtriple armeb-eabi -mattr v7,neon -float-abi soft %s -o - | FileCheck %s -check-prefix CHECK -check-prefix SOFT
+; RUN: llc -mtriple armeb-eabi -mattr v7,neon -float-abi hard %s -o - | FileCheck %s -check-prefix CHECK -check-prefix HARD
+
+; CHECK-LABEL: test_i64_f64:
+define i64 @test_i64_f64(double %p) {
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vadd.f64 [[REG]]
+; HARD: vadd.f64 d{{[0-9]+}}, d0
+    %1 = fadd double %p, %p
+    %2 = bitcast double %1 to i64
+    %3 = add i64 %2, %2
+    ret i64 %3
+; CHECK: adds r1
+; CHECK: adc r0
+}
+
+; CHECK-LABEL: test_i64_v1i64:
+define i64 @test_i64_v1i64(<1 x i64> %p) {
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vadd.i64 [[REG]]
+; HARD: vadd.i64 d{{[0-9]+}}, d0
+    %1 = add <1 x i64> %p, %p
+    %2 = bitcast <1 x i64> %1 to i64
+    %3 = add i64 %2, %2
+    ret i64 %3
+; CHECK: adds r1
+; CHECK: adc r0
+}
+
+; CHECK-LABEL: test_i64_v2f32:
+define i64 @test_i64_v2f32(<2 x float> %p) {
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vrev64.32 [[REG]]
+; HARD: vrev64.32 d{{[0-9]+}}, d0
+    %1 = fadd <2 x float> %p, %p
+    %2 = bitcast <2 x float> %1 to i64
+    %3 = add i64 %2, %2
+    ret i64 %3
+; CHECK: adds r1
+; CHECK: adc r0
+}
+
+; CHECK-LABEL: test_i64_v2i32:
+define i64 @test_i64_v2i32(<2 x i32> %p) {
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vrev64.32 [[REG]]
+; HARD: vrev64.32 d{{[0-9]+}}, d0
+    %1 = add <2 x i32> %p, %p
+    %2 = bitcast <2 x i32> %1 to i64
+    %3 = add i64 %2, %2
+    ret i64 %3
+; CHECK: adds r1
+; CHECK: adc r0
+}
+
+; CHECK-LABEL: test_i64_v4i16:
+define i64 @test_i64_v4i16(<4 x i16> %p) {
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vrev64.16 [[REG]]
+; HARD: vrev64.16 d{{[0-9]+}}, d0
+    %1 = add <4 x i16> %p, %p
+    %2 = bitcast <4 x i16> %1 to i64
+    %3 = add i64 %2, %2
+    ret i64 %3
+; CHECK: adds r1
+; CHECK: adc r0
+}
+
+; CHECK-LABEL: test_i64_v8i8:
+define i64 @test_i64_v8i8(<8 x i8> %p) {
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vrev64.8 [[REG]]
+; HARD: vrev64.8 d{{[0-9]+}}, d0
+    %1 = add <8 x i8> %p, %p
+    %2 = bitcast <8 x i8> %1 to i64
+    %3 = add i64 %2, %2
+    ret i64 %3
+; CHECK: adds r1
+; CHECK: adc r0
+}
+
+; CHECK-LABEL: test_f64_i64:
+define double @test_f64_i64(i64 %p) {
+; CHECK: adds r1
+; CHECK: adc r0
+    %1 = add i64 %p, %p
+    %2 = bitcast i64 %1 to double
+    %3 = fadd double %2, %2
+    ret double %3
+; SOFT: vadd.f64 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vadd.f64 d0
+}
+
+; CHECK-LABEL: test_f64_v1i64:
+define double @test_f64_v1i64(<1 x i64> %p) {
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vadd.i64 [[REG]]
+; HARD: vadd.i64 d{{[0-9]+}}, d0
+    %1 = add <1 x i64> %p, %p
+    %2 = bitcast <1 x i64> %1 to double
+    %3 = fadd double %2, %2
+    ret double %3
+; SOFT: vadd.f64 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vadd.f64 d0
+}
+
+; CHECK-LABEL: test_f64_v2f32:
+define double @test_f64_v2f32(<2 x float> %p) {
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vrev64.32 [[REG]]
+; HARD: vrev64.32 d{{[0-9]+}}, d0
+    %1 = fadd <2 x float> %p, %p
+    %2 = bitcast <2 x float> %1 to double
+    %3 = fadd double %2, %2
+    ret double %3
+; SOFT: vadd.f64 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vadd.f64 d0
+}
+
+; CHECK-LABEL: test_f64_v2i32:
+define double @test_f64_v2i32(<2 x i32> %p) {
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vrev64.32 [[REG]]
+; HARD: vrev64.32 d{{[0-9]+}}, d0
+    %1 = add <2 x i32> %p, %p
+    %2 = bitcast <2 x i32> %1 to double
+    %3 = fadd double %2, %2
+    ret double %3
+; SOFT: vadd.f64 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vadd.f64 d0
+}
+
+; CHECK-LABEL: test_f64_v4i16:
+define double @test_f64_v4i16(<4 x i16> %p) {
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vrev64.16 [[REG]]
+; HARD: vrev64.16 d{{[0-9]+}}, d0
+    %1 = add <4 x i16> %p, %p
+    %2 = bitcast <4 x i16> %1 to double
+    %3 = fadd double %2, %2
+    ret double %3
+; SOFT: vadd.f64 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vadd.f64 d0
+}
+
+; CHECK-LABEL: test_f64_v8i8:
+define double @test_f64_v8i8(<8 x i8> %p) {
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vrev64.8 [[REG]]
+; HARD: vrev64.8 d{{[0-9]+}}, d0
+    %1 = add <8 x i8> %p, %p
+    %2 = bitcast <8 x i8> %1 to double
+    %3 = fadd double %2, %2
+    ret double %3
+; SOFT: vadd.f64 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vadd.f64 d0
+}
+
+; CHECK-LABEL: test_v1i64_i64:
+define <1 x i64> @test_v1i64_i64(i64 %p) {
+; CHECK: adds r1
+; CHECK: adc r0
+    %1 = add i64 %p, %p
+    %2 = bitcast i64 %1 to <1 x i64>
+    %3 = add <1 x i64> %2, %2
+    ret <1 x i64> %3
+; SOFT: vadd.i64 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vadd.i64 d0
+}
+
+; CHECK-LABEL: test_v1i64_f64:
+define <1 x i64> @test_v1i64_f64(double %p) {
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vadd.f64 [[REG]]
+; HARD: vadd.f64 d{{[0-9]+}}, d0
+    %1 = fadd double %p, %p
+    %2 = bitcast double %1 to <1 x i64>
+    %3 = add <1 x i64> %2, %2
+    ret <1 x i64> %3
+; SOFT: vadd.i64 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vadd.i64 d0
+}
+
+; CHECK-LABEL: test_v1i64_v2f32:
+define <1 x i64> @test_v1i64_v2f32(<2 x float> %p) {
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vrev64.32 [[REG]]
+; HARD: vrev64.32 d{{[0-9]+}}, d0
+    %1 = fadd <2 x float> %p, %p
+    %2 = bitcast <2 x float> %1 to <1 x i64>
+    %3 = add <1 x i64> %2, %2
+    ret <1 x i64> %3
+; SOFT: vadd.i64 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vadd.i64 d0
+}
+
+; CHECK-LABEL: test_v1i64_v2i32:
+define <1 x i64> @test_v1i64_v2i32(<2 x i32> %p) {
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vrev64.32 [[REG]]
+; HARD: vrev64.32 d{{[0-9]+}}, d0
+    %1 = add <2 x i32> %p, %p
+    %2 = bitcast <2 x i32> %1 to <1 x i64>
+    %3 = add <1 x i64> %2, %2
+    ret <1 x i64> %3
+; SOFT: vadd.i64 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vadd.i64 d0
+}
+
+; CHECK-LABEL: test_v1i64_v4i16:
+define <1 x i64> @test_v1i64_v4i16(<4 x i16> %p) {
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vrev64.16 [[REG]]
+; HARD: vrev64.16 d{{[0-9]+}}, d0
+    %1 = add <4 x i16> %p, %p
+    %2 = bitcast <4 x i16> %1 to <1 x i64>
+    %3 = add <1 x i64> %2, %2
+    ret <1 x i64> %3
+; SOFT: vadd.i64 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vadd.i64 d0
+}
+
+; CHECK-LABEL: test_v1i64_v8i8:
+define <1 x i64> @test_v1i64_v8i8(<8 x i8> %p) {
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vrev64.8 [[REG]]
+; HARD: vrev64.8 d{{[0-9]+}}, d0
+    %1 = add <8 x i8> %p, %p
+    %2 = bitcast <8 x i8> %1 to <1 x i64>
+    %3 = add <1 x i64> %2, %2
+    ret <1 x i64> %3
+; SOFT: vadd.i64 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vadd.i64 d0
+}
+
+; CHECK-LABEL: test_v2f32_i64:
+define <2 x float> @test_v2f32_i64(i64 %p) {
+; CHECK: adds r1
+; CHECK: adc r0
+    %1 = add i64 %p, %p
+    %2 = bitcast i64 %1 to <2 x float>
+    %3 = fadd <2 x float> %2, %2
+    ret <2 x float> %3
+; SOFT: vrev64.32 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vrev64.32 d0
+}
+
+; CHECK-LABEL: test_v2f32_f64:
+define <2 x float> @test_v2f32_f64(double %p) {
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vadd.f64 [[REG]]
+; HARD: vadd.f64 d{{[0-9]+}}, d0
+    %1 = fadd double %p, %p
+    %2 = bitcast double %1 to <2 x float>
+    %3 = fadd <2 x float> %2, %2
+    ret <2 x float> %3
+; SOFT: vrev64.32 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vrev64.32 d0
+}
+
+; CHECK-LABEL: test_v2f32_v1i64:
+define <2 x float> @test_v2f32_v1i64(<1 x i64> %p) {
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vadd.i64 [[REG]]
+; HARD: vadd.i64 d{{[0-9]+}}, d0
+    %1 = add <1 x i64> %p, %p
+    %2 = bitcast <1 x i64> %1 to <2 x float>
+    %3 = fadd <2 x float> %2, %2
+    ret <2 x float> %3
+; SOFT: vrev64.32 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vrev64.32 d0
+}
+
+; CHECK-LABEL: test_v2f32_v2i32:
+define <2 x float> @test_v2f32_v2i32(<2 x i32> %p) {
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vrev64.32 [[REG]]
+; HARD: vrev64.32 d{{[0-9]+}}, d0
+    %1 = add <2 x i32> %p, %p
+    %2 = bitcast <2 x i32> %1 to <2 x float>
+    %3 = fadd <2 x float> %2, %2
+    ret <2 x float> %3
+; SOFT: vrev64.32 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vrev64.32 d0
+}
+
+; CHECK-LABEL: test_v2f32_v4i16:
+define <2 x float> @test_v2f32_v4i16(<4 x i16> %p) {
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vrev64.16 [[REG]]
+; HARD: vrev64.16 d{{[0-9]+}}, d0
+    %1 = add <4 x i16> %p, %p
+    %2 = bitcast <4 x i16> %1 to <2 x float>
+    %3 = fadd <2 x float> %2, %2
+    ret <2 x float> %3
+; SOFT: vrev64.32 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vrev64.32 d0
+}
+
+; CHECK-LABEL: test_v2f32_v8i8:
+define <2 x float> @test_v2f32_v8i8(<8 x i8> %p) {
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vrev64.8 [[REG]]
+; HARD: vrev64.8 d{{[0-9]+}}, d0
+    %1 = add <8 x i8> %p, %p
+    %2 = bitcast <8 x i8> %1 to <2 x float>
+    %3 = fadd <2 x float> %2, %2
+    ret <2 x float> %3
+; SOFT: vrev64.32 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vrev64.32 d0
+}
+
+; CHECK-LABEL: test_v2i32_i64:
+define <2 x i32> @test_v2i32_i64(i64 %p) {
+; CHECK: adds r1
+; CHECK: adc r0
+    %1 = add i64 %p, %p
+    %2 = bitcast i64 %1 to <2 x i32>
+    %3 = add <2 x i32> %2, %2
+    ret <2 x i32> %3
+; SOFT: vrev64.32 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vrev64.32 d0
+}
+
+; CHECK-LABEL: test_v2i32_f64:
+define <2 x i32> @test_v2i32_f64(double %p) {
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vadd.f64 [[REG]]
+; HARD: vadd.f64 d{{[0-9]+}}, d0
+    %1 = fadd double %p, %p
+    %2 = bitcast double %1 to <2 x i32>
+    %3 = add <2 x i32> %2, %2
+    ret <2 x i32> %3
+; SOFT: vrev64.32 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vrev64.32 d0
+}
+
+; CHECK-LABEL: test_v2i32_v1i64:
+define <2 x i32> @test_v2i32_v1i64(<1 x i64> %p) {
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vadd.i64 [[REG]]
+; HARD: vadd.i64 d{{[0-9]+}}, d0
+    %1 = add <1 x i64> %p, %p
+    %2 = bitcast <1 x i64> %1 to <2 x i32>
+    %3 = add <2 x i32> %2, %2
+    ret <2 x i32> %3
+; SOFT: vrev64.32 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vrev64.32 d0
+}
+
+; CHECK-LABEL: test_v2i32_v2f32:
+define <2 x i32> @test_v2i32_v2f32(<2 x float> %p) {
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vrev64.32 [[REG]]
+; HARD: vrev64.32 d{{[0-9]+}}, d0
+    %1 = fadd <2 x float> %p, %p
+    %2 = bitcast <2 x float> %1 to <2 x i32>
+    %3 = add <2 x i32> %2, %2
+    ret <2 x i32> %3
+; SOFT: vrev64.32 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vrev64.32 d0
+}
+
+; CHECK-LABEL: test_v2i32_v4i16:
+define <2 x i32> @test_v2i32_v4i16(<4 x i16> %p) {
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vrev64.16 [[REG]]
+; HARD: vrev64.16 d{{[0-9]+}}, d0
+    %1 = add <4 x i16> %p, %p
+    %2 = bitcast <4 x i16> %1 to <2 x i32>
+    %3 = add <2 x i32> %2, %2
+    ret <2 x i32> %3
+; SOFT: vrev64.32 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vrev64.32 d0
+}
+
+; CHECK-LABEL: test_v2i32_v8i8:
+define <2 x i32> @test_v2i32_v8i8(<8 x i8> %p) {
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vrev64.8 [[REG]]
+; HARD: vrev64.8 d{{[0-9]+}}, d0
+    %1 = add <8 x i8> %p, %p
+    %2 = bitcast <8 x i8> %1 to <2 x i32>
+    %3 = add <2 x i32> %2, %2
+    ret <2 x i32> %3
+; SOFT: vrev64.32 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vrev64.32 d0
+}
+
+; CHECK-LABEL: test_v4i16_i64:
+define <4 x i16> @test_v4i16_i64(i64 %p) {
+; CHECK: adds r1
+; CHECK: adc r0
+    %1 = add i64 %p, %p
+    %2 = bitcast i64 %1 to <4 x i16>
+    %3 = add <4 x i16> %2, %2
+    ret <4 x i16> %3
+; SOFT: vrev64.16 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vrev64.16 d0
+}
+
+; CHECK-LABEL: test_v4i16_f64:
+define <4 x i16> @test_v4i16_f64(double %p) {
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vadd.f64 [[REG]]
+; HARD: vadd.f64 d{{[0-9]+}}, d0
+    %1 = fadd double %p, %p
+    %2 = bitcast double %1 to <4 x i16>
+    %3 = add <4 x i16> %2, %2
+    ret <4 x i16> %3
+; SOFT: vrev64.16 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vrev64.16 d0
+}
+
+; CHECK-LABEL: test_v4i16_v1i64:
+define <4 x i16> @test_v4i16_v1i64(<1 x i64> %p) {
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vadd.i64 [[REG]]
+; HARD: vadd.i64 d{{[0-9]+}}, d0
+    %1 = add <1 x i64> %p, %p
+    %2 = bitcast <1 x i64> %1 to <4 x i16>
+    %3 = add <4 x i16> %2, %2
+    ret <4 x i16> %3
+; SOFT: vrev64.16 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vrev64.16 d0
+}
+
+; CHECK-LABEL: test_v4i16_v2f32:
+define <4 x i16> @test_v4i16_v2f32(<2 x float> %p) {
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vrev64.32 [[REG]]
+; HARD: vrev64.32 d{{[0-9]+}}, d0
+    %1 = fadd <2 x float> %p, %p
+    %2 = bitcast <2 x float> %1 to <4 x i16>
+    %3 = add <4 x i16> %2, %2
+    ret <4 x i16> %3
+; SOFT: vrev64.16 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vrev64.16 d0
+}
+
+; CHECK-LABEL: test_v4i16_v2i32:
+define <4 x i16> @test_v4i16_v2i32(<2 x i32> %p) {
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vrev64.32 [[REG]]
+; HARD: vrev64.32 d{{[0-9]+}}, d0
+    %1 = add <2 x i32> %p, %p
+    %2 = bitcast <2 x i32> %1 to <4 x i16>
+    %3 = add <4 x i16> %2, %2
+    ret <4 x i16> %3
+; SOFT: vrev64.16 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vrev64.16 d0
+}
+
+; CHECK-LABEL: test_v4i16_v8i8:
+define <4 x i16> @test_v4i16_v8i8(<8 x i8> %p) {
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vrev64.8 [[REG]]
+; HARD: vrev64.8 d{{[0-9]+}}, d0
+    %1 = add <8 x i8> %p, %p
+    %2 = bitcast <8 x i8> %1 to <4 x i16>
+    %3 = add <4 x i16> %2, %2
+    ret <4 x i16> %3
+; SOFT: vrev64.16 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vrev64.16 d0
+}
+
+; CHECK-LABEL: test_v8i8_i64:
+define <8 x i8> @test_v8i8_i64(i64 %p) {
+; CHECK: adds r1
+; CHECK: adc r0
+    %1 = add i64 %p, %p
+    %2 = bitcast i64 %1 to <8 x i8>
+    %3 = add <8 x i8> %2, %2
+    ret <8 x i8> %3
+; SOFT: vrev64.8 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vrev64.8 d0
+}
+
+; CHECK-LABEL: test_v8i8_f64:
+define <8 x i8> @test_v8i8_f64(double %p) {
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vadd.f64 [[REG]]
+; HARD: vadd.f64 d{{[0-9]+}}, d0
+    %1 = fadd double %p, %p
+    %2 = bitcast double %1 to <8 x i8>
+    %3 = add <8 x i8> %2, %2
+    ret <8 x i8> %3
+; SOFT: vrev64.8 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vrev64.8 d0
+}
+
+; CHECK-LABEL: test_v8i8_v1i64:
+define <8 x i8> @test_v8i8_v1i64(<1 x i64> %p) {
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vadd.i64 [[REG]]
+; HARD: vadd.i64 d{{[0-9]+}}, d0
+    %1 = add <1 x i64> %p, %p
+    %2 = bitcast <1 x i64> %1 to <8 x i8>
+    %3 = add <8 x i8> %2, %2
+    ret <8 x i8> %3
+; SOFT: vrev64.8 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vrev64.8 d0
+}
+
+; CHECK-LABEL: test_v8i8_v2f32:
+define <8 x i8> @test_v8i8_v2f32(<2 x float> %p) {
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vrev64.32 [[REG]]
+; HARD: vrev64.32 d{{[0-9]+}}, d0
+    %1 = fadd <2 x float> %p, %p
+    %2 = bitcast <2 x float> %1 to <8 x i8>
+    %3 = add <8 x i8> %2, %2
+    ret <8 x i8> %3
+; SOFT: vrev64.8 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vrev64.8 d0
+}
+
+; CHECK-LABEL: test_v8i8_v2i32:
+define <8 x i8> @test_v8i8_v2i32(<2 x i32> %p) {
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vrev64.32 [[REG]]
+; HARD: vrev64.32 d{{[0-9]+}}, d0
+    %1 = add <2 x i32> %p, %p
+    %2 = bitcast <2 x i32> %1 to <8 x i8>
+    %3 = add <8 x i8> %2, %2
+    ret <8 x i8> %3
+; SOFT: vrev64.8 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vrev64.8 d0
+}
+
+; CHECK-LABEL: test_v8i8_v4i16:
+define <8 x i8> @test_v8i8_v4i16(<4 x i16> %p) {
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vrev64.16 [[REG]]
+; HARD: vrev64.16 d{{[0-9]+}}, d0
+    %1 = add <4 x i16> %p, %p
+    %2 = bitcast <4 x i16> %1 to <8 x i8>
+    %3 = add <8 x i8> %2, %2
+    ret <8 x i8> %3
+; SOFT: vrev64.8 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vrev64.8 d0
+}
+
+; CHECK-LABEL: test_f128_v2f64:
+define fp128 @test_f128_v2f64(<2 x double> %p) {
+; SOFT: vmov [[REG1:d[0-9]+]], r3, r2
+; SOFT: vmov [[REG2:d[0-9]+]], r1, r0
+; SOFT: vadd.f64 d{{[0-9]+}}, [[REG1]]
+; SOFT: vadd.f64 d{{[0-9]+}}, [[REG2]]
+; HARD: vadd.f64 d{{[0-9]+}}, d1
+; HARD: vadd.f64 d{{[0-9]+}}, d0
+    %1 = fadd <2 x double> %p, %p
+    %2 = bitcast <2 x double> %1 to fp128
+    %3 = fadd fp128 %2, %2
+    ret fp128 %3
+; CHECK: vst1.32 {d{{[0-9]+}}[1]}, [{{[a-z0-9]+}}:32]
+; CHECK: vst1.32 {d{{[0-9]+}}[0]}, [{{[a-z0-9]+}}:32]
+}
+
+; CHECK-LABEL: test_f128_v2i64:
+define fp128 @test_f128_v2i64(<2 x i64> %p) {
+; SOFT: vmov [[REG1:d[0-9]+]], r3, r2
+; SOFT: vmov [[REG2:d[0-9]+]], r1, r0
+; HARD: vadd.i64 q{{[0-9]+}}, q0
+    %1 = add <2 x i64> %p, %p
+    %2 = bitcast <2 x i64> %1 to fp128
+    %3 = fadd fp128 %2, %2
+    ret fp128 %3
+; CHECK: vst1.32 {d{{[0-9]+}}[1]}, [{{[a-z0-9]+}}:32]
+; CHECK: vst1.32 {d{{[0-9]+}}[0]}, [{{[a-z0-9]+}}:32]
+}
+
+; CHECK-LABEL: test_f128_v4f32:
+define fp128 @test_f128_v4f32(<4 x float> %p) {
+; HARD: vrev64.32 q{{[0-9]+}}, q0
+    %1 = fadd <4 x float> %p, %p
+    %2 = bitcast <4 x float> %1 to fp128
+    %3 = fadd fp128 %2, %2
+    ret fp128 %3
+; CHECK: vst1.32 {d{{[0-9]+}}[1]}, [{{[a-z0-9]+}}:32]
+; CHECK: vst1.32 {d{{[0-9]+}}[0]}, [{{[a-z0-9]+}}:32]
+}
+
+; CHECK-LABEL: test_f128_v4i32:
+define fp128 @test_f128_v4i32(<4 x i32> %p) {
+; HARD: vrev64.32 q{{[0-9]+}}, q0
+    %1 = add <4 x i32> %p, %p
+    %2 = bitcast <4 x i32> %1 to fp128
+    %3 = fadd fp128 %2, %2
+    ret fp128 %3
+; CHECK: vst1.32 {d{{[0-9]+}}[1]}, [{{[a-z0-9]+}}:32]
+; CHECK: vst1.32 {d{{[0-9]+}}[0]}, [{{[a-z0-9]+}}:32]
+}
+
+; CHECK-LABEL: test_f128_v8i16:
+define fp128 @test_f128_v8i16(<8 x i16> %p) {
+; HARD: vrev64.16 q{{[0-9]+}}, q0
+    %1 = add <8 x i16> %p, %p
+    %2 = bitcast <8 x i16> %1 to fp128
+    %3 = fadd fp128 %2, %2
+    ret fp128 %3
+; CHECK: vst1.32 {d{{[0-9]+}}[1]}, [{{[a-z0-9]+}}:32]
+; CHECK: vst1.32 {d{{[0-9]+}}[0]}, [{{[a-z0-9]+}}:32]
+}
+
+; CHECK-LABEL: test_f128_v16i8:
+define fp128 @test_f128_v16i8(<16 x i8> %p) {
+; HARD: vrev64.8 q{{[0-9]+}}, q0
+    %1 = add <16 x i8> %p, %p
+    %2 = bitcast <16 x i8> %1 to fp128
+    %3 = fadd fp128 %2, %2
+    ret fp128 %3
+; CHECK: vst1.32 {d{{[0-9]+}}[1]}, [{{[a-z0-9]+}}:32]
+; CHECK: vst1.32 {d{{[0-9]+}}[0]}, [{{[a-z0-9]+}}:32]
+}
+
+; CHECK-LABEL: test_v2f64_f128:
+define <2 x double> @test_v2f64_f128(fp128 %p) {
+; CHECK: vmov.32 [[REG1:d[0-9]+]][0], r0
+; CHECK: vmov.32 [[REG1]][1], r1
+; CHECK: vmov.32 [[REG2:d[0-9]+]][0], r2
+; CHECK: vmov.32 [[REG2]][1], r3
+    %1 = fadd fp128 %p, %p
+    %2 = bitcast fp128 %1 to <2 x double>
+    %3 = fadd <2 x double> %2, %2
+    ret <2 x double> %3
+; SOFT: vadd.f64 [[REG1:d[0-9]+]]
+; SOFT: vadd.f64 [[REG2:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG2]]
+; SOFT: vmov r3, r2, [[REG1]]
+; HARD: vadd.f64 d1
+; HARD: vadd.f64 d0
+}
+
+; CHECK-LABEL: test_v2f64_v2i64:
+define <2 x double> @test_v2f64_v2i64(<2 x i64> %p) {
+; SOFT: vmov [[REG1:d[0-9]+]], r3, r2
+; SOFT: vmov [[REG2:d[0-9]+]], r1, r0
+; HARD: vadd.i64 q{{[0-9]+}}, q0
+    %1 = add <2 x i64> %p, %p
+    %2 = bitcast <2 x i64> %1 to <2 x double>
+    %3 = fadd <2 x double> %2, %2
+    ret <2 x double> %3
+; SOFT: vadd.f64 [[REG1:d[0-9]+]]
+; SOFT: vadd.f64 [[REG2:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG2]]
+; SOFT: vmov r3, r2, [[REG1]]
+; HARD: vadd.f64 d1
+; HARD: vadd.f64 d0
+}
+
+; CHECK-LABEL: test_v2f64_v4f32:
+define <2 x double> @test_v2f64_v4f32(<4 x float> %p) {
+; HARD: vrev64.32  q{{[0-9]+}}, q0
+    %1 = fadd <4 x float> %p, %p
+    %2 = bitcast <4 x float> %1 to <2 x double>
+    %3 = fadd <2 x double> %2, %2
+    ret <2 x double> %3
+; SOFT: vadd.f64 [[REG1:d[0-9]+]]
+; SOFT: vadd.f64 [[REG2:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG2]]
+; SOFT: vmov r3, r2, [[REG1]]
+; HARD: vadd.f64 d1
+; HARD: vadd.f64 d0
+}
+
+; CHECK-LABEL: test_v2f64_v4i32:
+define <2 x double> @test_v2f64_v4i32(<4 x i32> %p) {
+; HARD: vrev64.32  q{{[0-9]+}}, q0
+    %1 = add <4 x i32> %p, %p
+    %2 = bitcast <4 x i32> %1 to <2 x double>
+    %3 = fadd <2 x double> %2, %2
+    ret <2 x double> %3
+; SOFT: vadd.f64 [[REG1:d[0-9]+]]
+; SOFT: vadd.f64 [[REG2:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG2]]
+; SOFT: vmov r3, r2, [[REG1]]
+; HARD: vadd.f64 d1
+; HARD: vadd.f64 d0
+}
+
+; CHECK-LABEL: test_v2f64_v8i16:
+define <2 x double> @test_v2f64_v8i16(<8 x i16> %p) {
+; HARD: vrev64.16  q{{[0-9]+}}, q0
+    %1 = add <8 x i16> %p, %p
+    %2 = bitcast <8 x i16> %1 to <2 x double>
+    %3 = fadd <2 x double> %2, %2
+    ret <2 x double> %3
+; SOFT: vadd.f64 [[REG1:d[0-9]+]]
+; SOFT: vadd.f64 [[REG2:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG2]]
+; SOFT: vmov r3, r2, [[REG1]]
+; HARD: vadd.f64 d1
+; HARD: vadd.f64 d0
+}
+
+; CHECK-LABEL: test_v2f64_v16i8:
+define <2 x double> @test_v2f64_v16i8(<16 x i8> %p) {
+; HARD: vrev64.8  q{{[0-9]+}}, q0
+    %1 = add <16 x i8> %p, %p
+    %2 = bitcast <16 x i8> %1 to <2 x double>
+    %3 = fadd <2 x double> %2, %2
+    ret <2 x double> %3
+; SOFT: vadd.f64 [[REG1:d[0-9]+]]
+; SOFT: vadd.f64 [[REG2:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG2]]
+; SOFT: vmov r3, r2, [[REG1]]
+; HARD: vadd.f64 d1
+; HARD: vadd.f64 d0
+}
+
+; CHECK-LABEL: test_v2i64_f128:
+define <2 x i64> @test_v2i64_f128(fp128 %p) {
+; CHECK: vmov.32 [[REG1:d[0-9]+]][0], r0
+; CHECK: vmov.32 [[REG1]][1], r1
+; CHECK: vmov.32 [[REG2:d[0-9]+]][0], r2
+; CHECK: vmov.32 [[REG2]][1], r3
+    %1 = fadd fp128 %p, %p
+    %2 = bitcast fp128 %1 to <2 x i64>
+    %3 = add <2 x i64> %2, %2
+    ret <2 x i64> %3
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vadd.i64 q0
+}
+
+; CHECK-LABEL: test_v2i64_v2f64:
+define <2 x i64> @test_v2i64_v2f64(<2 x double> %p) {
+; SOFT: vmov [[REG1:d[0-9]+]], r3, r2
+; SOFT: vmov [[REG2:d[0-9]+]], r1, r0
+; SOFT: vadd.f64 d{{[0-9]+}}, [[REG1]]
+; SOFT: vadd.f64 d{{[0-9]+}}, [[REG2]]
+; HARD: vadd.f64  d{{[0-9]+}}, d1
+; HARD: vadd.f64  d{{[0-9]+}}, d0
+    %1 = fadd <2 x double> %p, %p
+    %2 = bitcast <2 x double> %1 to <2 x i64>
+    %3 = add <2 x i64> %2, %2
+    ret <2 x i64> %3
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vadd.i64 q0
+}
+
+; CHECK-LABEL: test_v2i64_v4f32:
+define <2 x i64> @test_v2i64_v4f32(<4 x float> %p) {
+; HARD: vrev64.32  q{{[0-9]+}}, q0
+    %1 = fadd <4 x float> %p, %p
+    %2 = bitcast <4 x float> %1 to <2 x i64>
+    %3 = add <2 x i64> %2, %2
+    ret <2 x i64> %3
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vadd.i64 q0
+}
+
+; CHECK-LABEL: test_v2i64_v4i32:
+define <2 x i64> @test_v2i64_v4i32(<4 x i32> %p) {
+; HARD: vrev64.32  q{{[0-9]+}}, q0
+    %1 = add <4 x i32> %p, %p
+    %2 = bitcast <4 x i32> %1 to <2 x i64>
+    %3 = add <2 x i64> %2, %2
+    ret <2 x i64> %3
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vadd.i64 q0
+}
+
+; CHECK-LABEL: test_v2i64_v8i16:
+define <2 x i64> @test_v2i64_v8i16(<8 x i16> %p) {
+; HARD: vrev64.16  q{{[0-9]+}}, q0
+    %1 = add <8 x i16> %p, %p
+    %2 = bitcast <8 x i16> %1 to <2 x i64>
+    %3 = add <2 x i64> %2, %2
+    ret <2 x i64> %3
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vadd.i64 q0
+}
+
+; CHECK-LABEL: test_v2i64_v16i8:
+define <2 x i64> @test_v2i64_v16i8(<16 x i8> %p) {
+; HARD: vrev64.8  q{{[0-9]+}}, q0
+    %1 = add <16 x i8> %p, %p
+    %2 = bitcast <16 x i8> %1 to <2 x i64>
+    %3 = add <2 x i64> %2, %2
+    ret <2 x i64> %3
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vadd.i64 q0
+}
+
+; CHECK-LABEL: test_v4f32_f128:
+define <4 x float> @test_v4f32_f128(fp128 %p) {
+; CHECK: vmov.32 [[REG1:d[0-9]+]][0], r0
+; CHECK: vmov.32 [[REG1]][1], r1
+; CHECK: vmov.32 [[REG2:d[0-9]+]][0], r2
+; CHECK: vmov.32 [[REG2]][1], r3
+    %1 = fadd fp128 %p, %p
+    %2 = bitcast fp128 %1 to <4 x float>
+    %3 = fadd <4 x float> %2, %2
+    ret <4 x float> %3
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vrev64.32 q0
+}
+
+; CHECK-LABEL: test_v4f32_v2f64:
+define <4 x float> @test_v4f32_v2f64(<2 x double> %p) {
+; SOFT: vmov [[REG1:d[0-9]+]], r3, r2
+; SOFT: vmov [[REG2:d[0-9]+]], r1, r0
+; SOFT: vadd.f64 d{{[0-9]+}}, [[REG1]]
+; SOFT: vadd.f64 d{{[0-9]+}}, [[REG2]]
+; HARD: vadd.f64  d{{[0-9]+}}, d1
+; HARD: vadd.f64  d{{[0-9]+}}, d0
+    %1 = fadd <2 x double> %p, %p
+    %2 = bitcast <2 x double> %1 to <4 x float>
+    %3 = fadd <4 x float> %2, %2
+    ret <4 x float> %3
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vrev64.32 q0
+}
+
+; CHECK-LABEL: test_v4f32_v2i64:
+define <4 x float> @test_v4f32_v2i64(<2 x i64> %p) {
+; SOFT: vmov [[REG1:d[0-9]+]], r3, r2
+; SOFT: vmov [[REG2:d[0-9]+]], r1, r0
+; HARD: vadd.i64  q{{[0-9]+}}, q0
+    %1 = add <2 x i64> %p, %p
+    %2 = bitcast <2 x i64> %1 to <4 x float>
+    %3 = fadd <4 x float> %2, %2
+    ret <4 x float> %3
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vrev64.32 q0
+}
+
+; CHECK-LABEL: test_v4f32_v4i32:
+define <4 x float> @test_v4f32_v4i32(<4 x i32> %p) {
+; HARD: vrev64.32  q{{[0-9]+}}, q0
+    %1 = add <4 x i32> %p, %p
+    %2 = bitcast <4 x i32> %1 to <4 x float>
+    %3 = fadd <4 x float> %2, %2
+    ret <4 x float> %3
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vrev64.32 q0
+}
+
+; CHECK-LABEL: test_v4f32_v8i16:
+define <4 x float> @test_v4f32_v8i16(<8 x i16> %p) {
+; HARD: vrev64.16  q{{[0-9]+}}, q0
+    %1 = add <8 x i16> %p, %p
+    %2 = bitcast <8 x i16> %1 to <4 x float>
+    %3 = fadd <4 x float> %2, %2
+    ret <4 x float> %3
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vrev64.32 q0
+}
+
+; CHECK-LABEL: test_v4f32_v16i8:
+define <4 x float> @test_v4f32_v16i8(<16 x i8> %p) {
+; HARD: vrev64.8  q{{[0-9]+}}, q0
+    %1 = add <16 x i8> %p, %p
+    %2 = bitcast <16 x i8> %1 to <4 x float>
+    %3 = fadd <4 x float> %2, %2
+    ret <4 x float> %3
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vrev64.32 q0
+}
+
+; CHECK-LABEL: test_v4i32_f128:
+define <4 x i32> @test_v4i32_f128(fp128 %p) {
+; CHECK: vmov.32 [[REG1:d[0-9]+]][0], r0
+; CHECK: vmov.32 [[REG1]][1], r1
+; CHECK: vmov.32 [[REG2:d[0-9]+]][0], r2
+; CHECK: vmov.32 [[REG2]][1], r3
+    %1 = fadd fp128 %p, %p
+    %2 = bitcast fp128 %1 to <4 x i32>
+    %3 = add <4 x i32> %2, %2
+    ret <4 x i32> %3
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vrev64.32 q0
+}
+
+; CHECK-LABEL: test_v4i32_v2f64:
+define <4 x i32> @test_v4i32_v2f64(<2 x double> %p) {
+; SOFT: vmov [[REG1:d[0-9]+]], r3, r2
+; SOFT: vmov [[REG2:d[0-9]+]], r1, r0
+; SOFT: vadd.f64 d{{[0-9]+}}, [[REG1]]
+; SOFT: vadd.f64 d{{[0-9]+}}, [[REG2]]
+; HARD: vadd.f64  d{{[0-9]+}}, d1
+; HARD: vadd.f64  d{{[0-9]+}}, d0
+    %1 = fadd <2 x double> %p, %p
+    %2 = bitcast <2 x double> %1 to <4 x i32>
+    %3 = add <4 x i32> %2, %2
+    ret <4 x i32> %3
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vrev64.32 q0
+}
+
+; CHECK-LABEL: test_v4i32_v2i64:
+define <4 x i32> @test_v4i32_v2i64(<2 x i64> %p) {
+; SOFT: vmov [[REG1:d[0-9]+]], r3, r2
+; SOFT: vmov [[REG2:d[0-9]+]], r1, r0
+; HARD: vadd.i64  q{{[0-9]+}}, q0
+    %1 = add <2 x i64> %p, %p
+    %2 = bitcast <2 x i64> %1 to <4 x i32>
+    %3 = add <4 x i32> %2, %2
+    ret <4 x i32> %3
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vrev64.32 q0
+}
+
+; CHECK-LABEL: test_v4i32_v4f32:
+define <4 x i32> @test_v4i32_v4f32(<4 x float> %p) {
+; SOFT: vmov [[REG1:d[0-9]+]], r3, r2
+; SOFT: vmov [[REG2:d[0-9]+]], r1, r0
+; HARD: vrev64.32  q{{[0-9]+}}, q0
+    %1 = fadd <4 x float> %p, %p
+    %2 = bitcast <4 x float> %1 to <4 x i32>
+    %3 = add <4 x i32> %2, %2
+    ret <4 x i32> %3
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vrev64.32 q0
+}
+
+; CHECK-LABEL: test_v4i32_v8i16:
+define <4 x i32> @test_v4i32_v8i16(<8 x i16> %p) {
+; SOFT: vmov [[REG1:d[0-9]+]], r3, r2
+; SOFT: vmov [[REG2:d[0-9]+]], r1, r0
+; HARD: vrev64.16  q{{[0-9]+}}, q0
+    %1 = add <8 x i16> %p, %p
+    %2 = bitcast <8 x i16> %1 to <4 x i32>
+    %3 = add <4 x i32> %2, %2
+    ret <4 x i32> %3
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vrev64.32 q0
+}
+
+; CHECK-LABEL: test_v4i32_v16i8:
+define <4 x i32> @test_v4i32_v16i8(<16 x i8> %p) {
+; SOFT: vmov [[REG1:d[0-9]+]], r3, r2
+; SOFT: vmov [[REG2:d[0-9]+]], r1, r0
+; HARD: vrev64.8  q{{[0-9]+}}, q0
+    %1 = add <16 x i8> %p, %p
+    %2 = bitcast <16 x i8> %1 to <4 x i32>
+    %3 = add <4 x i32> %2, %2
+    ret <4 x i32> %3
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vrev64.32 q0
+}
+
+; CHECK-LABEL: test_v8i16_f128:
+define <8 x i16> @test_v8i16_f128(fp128 %p) {
+; CHECK: vmov.32 [[REG1:d[0-9]+]][0], r0
+; CHECK: vmov.32 [[REG1]][1], r1
+; CHECK: vmov.32 [[REG2:d[0-9]+]][0], r2
+; CHECK: vmov.32 [[REG2]][1], r3
+    %1 = fadd fp128 %p, %p
+    %2 = bitcast fp128 %1 to <8 x i16>
+    %3 = add <8 x i16> %2, %2
+    ret <8 x i16> %3
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vrev64.16 q0
+}
+
+; CHECK-LABEL: test_v8i16_v2f64:
+define <8 x i16> @test_v8i16_v2f64(<2 x double> %p) {
+; SOFT: vmov [[REG1:d[0-9]+]], r3, r2
+; SOFT: vmov [[REG2:d[0-9]+]], r1, r0
+; SOFT: vadd.f64 d{{[0-9]+}}, [[REG1]]
+; SOFT: vadd.f64 d{{[0-9]+}}, [[REG2]]
+; HARD: vadd.f64  d{{[0-9]+}}, d1
+; HARD: vadd.f64  d{{[0-9]+}}, d0
+    %1 = fadd <2 x double> %p, %p
+    %2 = bitcast <2 x double> %1 to <8 x i16>
+    %3 = add <8 x i16> %2, %2
+    ret <8 x i16> %3
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vrev64.16 q0
+}
+
+; CHECK-LABEL: test_v8i16_v2i64:
+define <8 x i16> @test_v8i16_v2i64(<2 x i64> %p) {
+; SOFT: vmov [[REG1:d[0-9]+]], r3, r2
+; SOFT: vmov [[REG2:d[0-9]+]], r1, r0
+; HARD: vadd.i64  q{{[0-9]+}}, q0
+    %1 = add <2 x i64> %p, %p
+    %2 = bitcast <2 x i64> %1 to <8 x i16>
+    %3 = add <8 x i16> %2, %2
+    ret <8 x i16> %3
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vrev64.16 q0
+}
+
+; CHECK-LABEL: test_v8i16_v4f32:
+define <8 x i16> @test_v8i16_v4f32(<4 x float> %p) {
+; SOFT: vmov [[REG1:d[0-9]+]], r3, r2
+; SOFT: vmov [[REG2:d[0-9]+]], r1, r0
+; HARD: vrev64.32  q{{[0-9]+}}, q0
+    %1 = fadd <4 x float> %p, %p
+    %2 = bitcast <4 x float> %1 to <8 x i16>
+    %3 = add <8 x i16> %2, %2
+    ret <8 x i16> %3
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vrev64.16 q0
+}
+
+; CHECK-LABEL: test_v8i16_v4i32:
+define <8 x i16> @test_v8i16_v4i32(<4 x i32> %p) {
+; SOFT: vmov [[REG1:d[0-9]+]], r3, r2
+; SOFT: vmov [[REG2:d[0-9]+]], r1, r0
+; HARD: vrev64.32  q{{[0-9]+}}, q0
+    %1 = add <4 x i32> %p, %p
+    %2 = bitcast <4 x i32> %1 to <8 x i16>
+    %3 = add <8 x i16> %2, %2
+    ret <8 x i16> %3
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vrev64.16 q0
+}
+
+; CHECK-LABEL: test_v8i16_v16i8:
+define <8 x i16> @test_v8i16_v16i8(<16 x i8> %p) {
+; SOFT: vmov [[REG1:d[0-9]+]], r3, r2
+; SOFT: vmov [[REG2:d[0-9]+]], r1, r0
+; HARD: vrev64.8 q{{[0-9]+}}, q0
+    %1 = add <16 x i8> %p, %p
+    %2 = bitcast <16 x i8> %1 to <8 x i16>
+    %3 = add <8 x i16> %2, %2
+    ret <8 x i16> %3
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vrev64.16 q0
+}
+
+; CHECK-LABEL: test_v16i8_f128:
+define <16 x i8> @test_v16i8_f128(fp128 %p) {
+; CHECK: vmov.32 [[REG1:d[0-9]+]][0], r0
+; CHECK: vmov.32 [[REG1]][1], r1
+; CHECK: vmov.32 [[REG2:d[0-9]+]][0], r2
+; CHECK: vmov.32 [[REG2]][1], r3
+    %1 = fadd fp128 %p, %p
+    %2 = bitcast fp128 %1 to <16 x i8>
+    %3 = add <16 x i8> %2, %2
+    ret <16 x i8> %3
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vrev64.8 q0
+}
+
+; CHECK-LABEL: test_v16i8_v2f64:
+define <16 x i8> @test_v16i8_v2f64(<2 x double> %p) {
+; SOFT: vmov [[REG1:d[0-9]+]], r3, r2
+; SOFT: vmov [[REG2:d[0-9]+]], r1, r0
+; SOFT: vadd.f64 d{{[0-9]+}}, [[REG1]]
+; SOFT: vadd.f64 d{{[0-9]+}}, [[REG2]]
+; HARD: vadd.f64  d{{[0-9]+}}, d1
+; HARD: vadd.f64  d{{[0-9]+}}, d0
+    %1 = fadd <2 x double> %p, %p
+    %2 = bitcast <2 x double> %1 to <16 x i8>
+    %3 = add <16 x i8> %2, %2
+    ret <16 x i8> %3
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vrev64.8 q0
+}
+
+; CHECK-LABEL: test_v16i8_v2i64:
+define <16 x i8> @test_v16i8_v2i64(<2 x i64> %p) {
+; SOFT: vmov [[REG1:d[0-9]+]], r3, r2
+; SOFT: vmov [[REG2:d[0-9]+]], r1, r0
+; HARD: vadd.i64  q{{[0-9]+}}, q0
+    %1 = add <2 x i64> %p, %p
+    %2 = bitcast <2 x i64> %1 to <16 x i8>
+    %3 = add <16 x i8> %2, %2
+    ret <16 x i8> %3
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vrev64.8 q0
+}
+
+; CHECK-LABEL: test_v16i8_v4f32:
+define <16 x i8> @test_v16i8_v4f32(<4 x float> %p) {
+; SOFT: vmov [[REG1:d[0-9]+]], r3, r2
+; SOFT: vmov [[REG2:d[0-9]+]], r1, r0
+; HARD: vrev64.32 q{{[0-9]+}}, q0
+    %1 = fadd <4 x float> %p, %p
+    %2 = bitcast <4 x float> %1 to <16 x i8>
+    %3 = add <16 x i8> %2, %2
+    ret <16 x i8> %3
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vrev64.8 q0
+}
+
+; CHECK-LABEL: test_v16i8_v4i32:
+define <16 x i8> @test_v16i8_v4i32(<4 x i32> %p) {
+; SOFT: vmov [[REG1:d[0-9]+]], r3, r2
+; SOFT: vmov [[REG2:d[0-9]+]], r1, r0
+; HARD: vrev64.32 q{{[0-9]+}}, q0
+    %1 = add <4 x i32> %p, %p
+    %2 = bitcast <4 x i32> %1 to <16 x i8>
+    %3 = add <16 x i8> %2, %2
+    ret <16 x i8> %3
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vrev64.8 q0
+}
+
+; CHECK-LABEL: test_v16i8_v8i16:
+define <16 x i8> @test_v16i8_v8i16(<8 x i16> %p) {
+; SOFT: vmov [[REG1:d[0-9]+]], r3, r2
+; SOFT: vmov [[REG2:d[0-9]+]], r1, r0
+; HARD: vrev64.16 q{{[0-9]+}}, q0
+    %1 = add <8 x i16> %p, %p
+    %2 = bitcast <8 x i16> %1 to <16 x i8>
+    %3 = add <16 x i8> %2, %2
+    ret <16 x i8> %3
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vrev64.8 q0
+}
diff --git a/test/CodeGen/ARM/big-endian-vector-caller.ll b/test/CodeGen/ARM/big-endian-vector-caller.ll
new file mode 100644
index 0000000..d01b0a7
--- /dev/null
+++ b/test/CodeGen/ARM/big-endian-vector-caller.ll
@@ -0,0 +1,1369 @@
+; RUN: llc -mtriple armeb-eabi -mattr v7,neon -float-abi soft %s -o - | FileCheck %s -check-prefix CHECK -check-prefix SOFT
+; RUN: llc -mtriple armeb-eabi -mattr v7,neon -float-abi hard %s -o - | FileCheck %s -check-prefix CHECK -check-prefix HARD
+
+; CHECK-LABEL: test_i64_f64:
+declare i64 @test_i64_f64_helper(double %p)
+define void @test_i64_f64(double* %p, i64* %q) {
+; SOFT: vadd.f64 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vadd.f64 d0
+    %1 = load double* %p
+    %2 = fadd double %1, %1
+    %3 = call i64 @test_i64_f64_helper(double %2)
+    %4 = add i64 %3, %3
+    store i64 %4, i64* %q
+    ret void
+; CHECK: adds r1
+; CHECK: adc r0
+}
+
+; CHECK-LABEL: test_i64_v1i64:
+declare i64 @test_i64_v1i64_helper(<1 x i64> %p)
+define void @test_i64_v1i64(<1 x i64>* %p, i64* %q) {
+; SOFT: vadd.i64 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vadd.i64 d0
+    %1 = load <1 x i64>* %p
+    %2 = add <1 x i64> %1, %1
+    %3 = call i64 @test_i64_v1i64_helper(<1 x i64> %2)
+    %4 = add i64 %3, %3
+    store i64 %4, i64* %q
+    ret void
+; CHECK: adds r1
+; CHECK: adc r0
+}
+
+; CHECK-LABEL: test_i64_v2f32:
+declare i64 @test_i64_v2f32_helper(<2 x float> %p)
+define void @test_i64_v2f32(<2 x float>* %p, i64* %q) {
+; SOFT: vrev64.32 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vrev64.32 d0
+    %1 = load <2 x float>* %p
+    %2 = fadd <2 x float> %1, %1
+    %3 = call i64 @test_i64_v2f32_helper(<2 x float> %2)
+    %4 = add i64 %3, %3
+    store i64 %4, i64* %q
+    ret void
+; CHECK: adds r1
+; CHECK: adc r0
+}
+
+; CHECK-LABEL: test_i64_v2i32:
+declare i64 @test_i64_v2i32_helper(<2 x i32> %p)
+define void @test_i64_v2i32(<2 x i32>* %p, i64* %q) {
+; SOFT: vrev64.32 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vrev64.32 d0
+    %1 = load <2 x i32>* %p
+    %2 = add <2 x i32> %1, %1
+    %3 = call i64 @test_i64_v2i32_helper(<2 x i32> %2)
+    %4 = add i64 %3, %3
+    store i64 %4, i64* %q
+    ret void
+; CHECK: adds r1
+; CHECK: adc r0
+}
+
+; CHECK-LABEL: test_i64_v4i16:
+declare i64 @test_i64_v4i16_helper(<4 x i16> %p)
+define void @test_i64_v4i16(<4 x i16>* %p, i64* %q) {
+; SOFT: vrev64.16 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vrev64.16 d0
+    %1 = load <4 x i16>* %p
+    %2 = add <4 x i16> %1, %1
+    %3 = call i64 @test_i64_v4i16_helper(<4 x i16> %2)
+    %4 = add i64 %3, %3
+    store i64 %4, i64* %q
+    ret void
+; CHECK: adds r1
+; CHECK: adc r0
+}
+
+; CHECK-LABEL: test_i64_v8i8:
+declare i64 @test_i64_v8i8_helper(<8 x i8> %p)
+define void @test_i64_v8i8(<8 x i8>* %p, i64* %q) {
+; SOFT: vrev64.8 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vrev64.8 d0
+    %1 = load <8 x i8>* %p
+    %2 = add <8 x i8> %1, %1
+    %3 = call i64 @test_i64_v8i8_helper(<8 x i8> %2)
+    %4 = add i64 %3, %3
+    store i64 %4, i64* %q
+    ret void
+; CHECK: adds r1
+; CHECK: adc r0
+}
+
+; CHECK-LABEL: test_f64_i64:
+declare double @test_f64_i64_helper(i64 %p)
+define void @test_f64_i64(i64* %p, double* %q) {
+; CHECK: adds r1
+; CHECK: adc r0
+    %1 = load i64* %p
+    %2 = add i64 %1, %1
+    %3 = call double @test_f64_i64_helper(i64 %2)
+    %4 = fadd double %3, %3
+    store double %4, double* %q
+    ret void
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vadd.f64 [[REG]]
+; HARD: vadd.f64 {{d[0-9]+}}, d0
+}
+
+; CHECK-LABEL: test_f64_v1i64:
+declare double @test_f64_v1i64_helper(<1 x i64> %p)
+define void @test_f64_v1i64(<1 x i64>* %p, double* %q) {
+; SOFT: vadd.i64 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vadd.i64 d0
+    %1 = load <1 x i64>* %p
+    %2 = add <1 x i64> %1, %1
+    %3 = call double @test_f64_v1i64_helper(<1 x i64> %2)
+    %4 = fadd double %3, %3
+    store double %4, double* %q
+    ret void
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vadd.f64 [[REG]]
+; HARD: vadd.f64 {{d[0-9]+}}, d0
+}
+
+; CHECK-LABEL: test_f64_v2f32:
+declare double @test_f64_v2f32_helper(<2 x float> %p)
+define void @test_f64_v2f32(<2 x float>* %p, double* %q) {
+; SOFT: vrev64.32 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vrev64.32 d0
+    %1 = load <2 x float>* %p
+    %2 = fadd <2 x float> %1, %1
+    %3 = call double @test_f64_v2f32_helper(<2 x float> %2)
+    %4 = fadd double %3, %3
+    store double %4, double* %q
+    ret void
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vadd.f64 [[REG]]
+; HARD: vadd.f64 {{d[0-9]+}}, d0
+}
+
+; CHECK-LABEL: test_f64_v2i32:
+declare double @test_f64_v2i32_helper(<2 x i32> %p)
+define void @test_f64_v2i32(<2 x i32>* %p, double* %q) {
+; SOFT: vrev64.32 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vrev64.32 d0
+    %1 = load <2 x i32>* %p
+    %2 = add <2 x i32> %1, %1
+    %3 = call double @test_f64_v2i32_helper(<2 x i32> %2)
+    %4 = fadd double %3, %3
+    store double %4, double* %q
+    ret void
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vadd.f64 [[REG]]
+; HARD: vadd.f64 {{d[0-9]+}}, d0
+}
+
+; CHECK-LABEL: test_f64_v4i16:
+declare double @test_f64_v4i16_helper(<4 x i16> %p)
+define void @test_f64_v4i16(<4 x i16>* %p, double* %q) {
+; SOFT: vrev64.16 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vrev64.16 d0
+    %1 = load <4 x i16>* %p
+    %2 = add <4 x i16> %1, %1
+    %3 = call double @test_f64_v4i16_helper(<4 x i16> %2)
+    %4 = fadd double %3, %3
+    store double %4, double* %q
+    ret void
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vadd.f64 [[REG]]
+; HARD: vadd.f64 {{d[0-9]+}}, d0
+}
+
+; CHECK-LABEL: test_f64_v8i8:
+declare double @test_f64_v8i8_helper(<8 x i8> %p)
+define void @test_f64_v8i8(<8 x i8>* %p, double* %q) {
+; SOFT: vrev64.8 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vrev64.8 d0
+    %1 = load <8 x i8>* %p
+    %2 = add <8 x i8> %1, %1
+    %3 = call double @test_f64_v8i8_helper(<8 x i8> %2)
+    %4 = fadd double %3, %3
+    store double %4, double* %q
+    ret void
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vadd.f64 [[REG]]
+; HARD: vadd.f64 {{d[0-9]+}}, d0
+}
+
+; CHECK-LABEL: test_v1i64_i64:
+declare <1 x i64> @test_v1i64_i64_helper(i64 %p)
+define void @test_v1i64_i64(i64* %p, <1 x i64>* %q) {
+; CHECK: adds r1
+; CHECK: adc r0
+    %1 = load i64* %p
+    %2 = add i64 %1, %1
+    %3 = call <1 x i64> @test_v1i64_i64_helper(i64 %2)
+    %4 = add <1 x i64> %3, %3
+    store <1 x i64> %4, <1 x i64>* %q
+    ret void
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vadd.i64 [[REG]]
+; HARD: vadd.i64 {{d[0-9]+}}, d0
+}
+
+; CHECK-LABEL: test_v1i64_f64:
+declare <1 x i64> @test_v1i64_f64_helper(double %p)
+define void @test_v1i64_f64(double* %p, <1 x i64>* %q) {
+; SOFT: vadd.f64 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vadd.f64 d0
+    %1 = load double* %p
+    %2 = fadd double %1, %1
+    %3 = call <1 x i64> @test_v1i64_f64_helper(double %2)
+    %4 = add <1 x i64> %3, %3
+    store <1 x i64> %4, <1 x i64>* %q
+    ret void
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vadd.i64 [[REG]]
+; HARD: vadd.i64 {{d[0-9]+}}, d0
+}
+
+; CHECK-LABEL: test_v1i64_v2f32:
+declare <1 x i64> @test_v1i64_v2f32_helper(<2 x float> %p)
+define void @test_v1i64_v2f32(<2 x float>* %p, <1 x i64>* %q) {
+; HARD: vrev64.32 d0
+; SOFT: vadd.f32 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+    %1 = load <2 x float>* %p
+    %2 = fadd <2 x float> %1, %1
+    %3 = call <1 x i64> @test_v1i64_v2f32_helper(<2 x float> %2)
+    %4 = add <1 x i64> %3, %3
+    store <1 x i64> %4, <1 x i64>* %q
+    ret void
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vadd.i64 [[REG]]
+; HARD: vadd.i64 {{d[0-9]+}}, d0
+}
+
+; CHECK-LABEL: test_v1i64_v2i32:
+declare <1 x i64> @test_v1i64_v2i32_helper(<2 x i32> %p)
+define void @test_v1i64_v2i32(<2 x i32>* %p, <1 x i64>* %q) {
+; HARD: vrev64.32 d0
+; SOFT: vadd.i32 [[REG:d[0-9]+]]
+; SOFT: vrev64.32 [[REG]]
+; SOFT: vmov r1, r0, [[REG]]
+    %1 = load <2 x i32>* %p
+    %2 = add <2 x i32> %1, %1
+    %3 = call <1 x i64> @test_v1i64_v2i32_helper(<2 x i32> %2)
+    %4 = add <1 x i64> %3, %3
+    store <1 x i64> %4, <1 x i64>* %q
+    ret void
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vadd.i64 [[REG]]
+; HARD: vadd.i64 {{d[0-9]+}}, d0
+}
+
+; CHECK-LABEL: test_v1i64_v4i16:
+declare <1 x i64> @test_v1i64_v4i16_helper(<4 x i16> %p)
+define void @test_v1i64_v4i16(<4 x i16>* %p, <1 x i64>* %q) {
+; SOFT: vrev64.16 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vrev64.16 d0
+    %1 = load <4 x i16>* %p
+    %2 = add <4 x i16> %1, %1
+    %3 = call <1 x i64> @test_v1i64_v4i16_helper(<4 x i16> %2)
+    %4 = add <1 x i64> %3, %3
+    store <1 x i64> %4, <1 x i64>* %q
+    ret void
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vadd.i64 [[REG]]
+; HARD: vadd.i64 {{d[0-9]+}}, d0
+}
+
+; CHECK-LABEL: test_v1i64_v8i8:
+declare <1 x i64> @test_v1i64_v8i8_helper(<8 x i8> %p)
+define void @test_v1i64_v8i8(<8 x i8>* %p, <1 x i64>* %q) {
+; SOFT: vrev64.8 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vrev64.8 d0
+    %1 = load <8 x i8>* %p
+    %2 = add <8 x i8> %1, %1
+    %3 = call <1 x i64> @test_v1i64_v8i8_helper(<8 x i8> %2)
+    %4 = add <1 x i64> %3, %3
+    store <1 x i64> %4, <1 x i64>* %q
+    ret void
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vadd.i64 [[REG]]
+; HARD: vadd.i64 {{d[0-9]+}}, d0
+}
+
+; CHECK-LABEL: test_v2f32_i64:
+declare <2 x float> @test_v2f32_i64_helper(i64 %p)
+define void @test_v2f32_i64(i64* %p, <2 x float>* %q) {
+; CHECK: adds r1
+; CHECK: adc r0
+    %1 = load i64* %p
+    %2 = add i64 %1, %1
+    %3 = call <2 x float> @test_v2f32_i64_helper(i64 %2)
+    %4 = fadd <2 x float> %3, %3
+    store <2 x float> %4, <2 x float>* %q
+    ret void
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vrev64.32 [[REG]]
+; HARD: vrev64.32 {{d[0-9]+}}, d0
+}
+
+; CHECK-LABEL: test_v2f32_f64:
+declare <2 x float> @test_v2f32_f64_helper(double %p)
+define void @test_v2f32_f64(double* %p, <2 x float>* %q) {
+; SOFT: vadd.f64 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vadd.f64 d0
+    %1 = load double* %p
+    %2 = fadd double %1, %1
+    %3 = call <2 x float> @test_v2f32_f64_helper(double %2)
+    %4 = fadd <2 x float> %3, %3
+    store <2 x float> %4, <2 x float>* %q
+    ret void
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vrev64.32 [[REG]]
+; HARD: vrev64.32 {{d[0-9]+}}, d0
+}
+
+; CHECK-LABEL: test_v2f32_v1i64:
+declare <2 x float> @test_v2f32_v1i64_helper(<1 x i64> %p)
+define void @test_v2f32_v1i64(<1 x i64>* %p, <2 x float>* %q) {
+; SOFT: vadd.i64 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vadd.i64 d0
+    %1 = load <1 x i64>* %p
+    %2 = add <1 x i64> %1, %1
+    %3 = call <2 x float> @test_v2f32_v1i64_helper(<1 x i64> %2)
+    %4 = fadd <2 x float> %3, %3
+    store <2 x float> %4, <2 x float>* %q
+    ret void
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vrev64.32 [[REG]]
+; HARD: vrev64.32 {{d[0-9]+}}, d0
+}
+
+; CHECK-LABEL: test_v2f32_v2i32:
+declare <2 x float> @test_v2f32_v2i32_helper(<2 x i32> %p)
+define void @test_v2f32_v2i32(<2 x i32>* %p, <2 x float>* %q) {
+; HARD: vrev64.32 d0
+; SOFT: vadd.i32 [[REG:d[0-9]+]]
+; SOFT: vrev64.32 [[REG]]
+; SOFT: vmov r1, r0, [[REG]]
+    %1 = load <2 x i32>* %p
+    %2 = add <2 x i32> %1, %1
+    %3 = call <2 x float> @test_v2f32_v2i32_helper(<2 x i32> %2)
+    %4 = fadd <2 x float> %3, %3
+    store <2 x float> %4, <2 x float>* %q
+    ret void
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vrev64.32 [[REG]]
+; HARD: vrev64.32 {{d[0-9]+}}, d0
+}
+
+; CHECK-LABEL: test_v2f32_v4i16:
+declare <2 x float> @test_v2f32_v4i16_helper(<4 x i16> %p)
+define void @test_v2f32_v4i16(<4 x i16>* %p, <2 x float>* %q) {
+; SOFT: vrev64.16 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vrev64.16 d0
+    %1 = load <4 x i16>* %p
+    %2 = add <4 x i16> %1, %1
+    %3 = call <2 x float> @test_v2f32_v4i16_helper(<4 x i16> %2)
+    %4 = fadd <2 x float> %3, %3
+    store <2 x float> %4, <2 x float>* %q
+    ret void
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vrev64.32 [[REG]]
+; HARD: vrev64.32 {{d[0-9]+}}, d0
+}
+
+; CHECK-LABEL: test_v2f32_v8i8:
+declare <2 x float> @test_v2f32_v8i8_helper(<8 x i8> %p)
+define void @test_v2f32_v8i8(<8 x i8>* %p, <2 x float>* %q) {
+; SOFT: vrev64.8 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vrev64.8 d0
+    %1 = load <8 x i8>* %p
+    %2 = add <8 x i8> %1, %1
+    %3 = call <2 x float> @test_v2f32_v8i8_helper(<8 x i8> %2)
+    %4 = fadd <2 x float> %3, %3
+    store <2 x float> %4, <2 x float>* %q
+    ret void
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vrev64.32 [[REG]]
+; HARD: vrev64.32 {{d[0-9]+}}, d0
+}
+
+; CHECK-LABEL: test_v2i32_i64:
+declare <2 x i32> @test_v2i32_i64_helper(i64 %p)
+define void @test_v2i32_i64(i64* %p, <2 x i32>* %q) {
+; CHECK: adds r1
+; CHECK: adc r0
+    %1 = load i64* %p
+    %2 = add i64 %1, %1
+    %3 = call <2 x i32> @test_v2i32_i64_helper(i64 %2)
+    %4 = add <2 x i32> %3, %3
+    store <2 x i32> %4, <2 x i32>* %q
+    ret void
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vrev64.32 [[REG]]
+; HARD: vrev64.32 {{d[0-9]+}}, d0
+}
+
+; CHECK-LABEL: test_v2i32_f64:
+declare <2 x i32> @test_v2i32_f64_helper(double %p)
+define void @test_v2i32_f64(double* %p, <2 x i32>* %q) {
+; SOFT: vadd.f64 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vadd.f64 d0
+    %1 = load double* %p
+    %2 = fadd double %1, %1
+    %3 = call <2 x i32> @test_v2i32_f64_helper(double %2)
+    %4 = add <2 x i32> %3, %3
+    store <2 x i32> %4, <2 x i32>* %q
+    ret void
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vrev64.32 [[REG]]
+; HARD: vrev64.32 {{d[0-9]+}}, d0
+}
+
+; CHECK-LABEL: test_v2i32_v1i64:
+declare <2 x i32> @test_v2i32_v1i64_helper(<1 x i64> %p)
+define void @test_v2i32_v1i64(<1 x i64>* %p, <2 x i32>* %q) {
+; SOFT: vadd.i64 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vadd.i64 d0
+    %1 = load <1 x i64>* %p
+    %2 = add <1 x i64> %1, %1
+    %3 = call <2 x i32> @test_v2i32_v1i64_helper(<1 x i64> %2)
+    %4 = add <2 x i32> %3, %3
+    store <2 x i32> %4, <2 x i32>* %q
+    ret void
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vrev64.32 [[REG]]
+; HARD: vrev64.32 {{d[0-9]+}}, d0
+}
+
+; CHECK-LABEL: test_v2i32_v2f32:
+declare <2 x i32> @test_v2i32_v2f32_helper(<2 x float> %p)
+define void @test_v2i32_v2f32(<2 x float>* %p, <2 x i32>* %q) {
+; HARD: vadd.f32 [[REG:d[0-9]+]]
+; HARD: vrev64.32 d0, [[REG]]
+; SOFT: vadd.f32 [[REG:d[0-9]+]]
+; SOFT: vrev64.32 [[REG]]
+; SOFT: vmov r1, r0, [[REG]]
+    %1 = load <2 x float>* %p
+    %2 = fadd <2 x float> %1, %1
+    %3 = call <2 x i32> @test_v2i32_v2f32_helper(<2 x float> %2)
+    %4 = add <2 x i32> %3, %3
+    store <2 x i32> %4, <2 x i32>* %q
+    ret void
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vrev64.32 [[REG]]
+; HARD: vrev64.32 {{d[0-9]+}}, d0
+}
+
+; CHECK-LABEL: test_v2i32_v4i16:
+declare <2 x i32> @test_v2i32_v4i16_helper(<4 x i16> %p)
+define void @test_v2i32_v4i16(<4 x i16>* %p, <2 x i32>* %q) {
+; SOFT: vrev64.16 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vrev64.16 d0
+    %1 = load <4 x i16>* %p
+    %2 = add <4 x i16> %1, %1
+    %3 = call <2 x i32> @test_v2i32_v4i16_helper(<4 x i16> %2)
+    %4 = add <2 x i32> %3, %3
+    store <2 x i32> %4, <2 x i32>* %q
+    ret void
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vrev64.32 [[REG]]
+; HARD: vrev64.32 {{d[0-9]+}}, d0
+}
+
+; CHECK-LABEL: test_v2i32_v8i8:
+declare <2 x i32> @test_v2i32_v8i8_helper(<8 x i8> %p)
+define void @test_v2i32_v8i8(<8 x i8>* %p, <2 x i32>* %q) {
+; SOFT: vrev64.8 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vrev64.8 d0
+    %1 = load <8 x i8>* %p
+    %2 = add <8 x i8> %1, %1
+    %3 = call <2 x i32> @test_v2i32_v8i8_helper(<8 x i8> %2)
+    %4 = add <2 x i32> %3, %3
+    store <2 x i32> %4, <2 x i32>* %q
+    ret void
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vrev64.32 [[REG]]
+; HARD: vrev64.32 {{d[0-9]+}}, d0
+}
+
+; CHECK-LABEL: test_v4i16_i64:
+declare <4 x i16> @test_v4i16_i64_helper(i64 %p)
+define void @test_v4i16_i64(i64* %p, <4 x i16>* %q) {
+; CHECK: adds r1
+; CHECK: adc r0
+    %1 = load i64* %p
+    %2 = add i64 %1, %1
+    %3 = call <4 x i16> @test_v4i16_i64_helper(i64 %2)
+    %4 = add <4 x i16> %3, %3
+    store <4 x i16> %4, <4 x i16>* %q
+    ret void
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vrev64.16 [[REG]]
+; HARD: vrev64.16 {{d[0-9]+}}, d0
+}
+
+; CHECK-LABEL: test_v4i16_f64:
+declare <4 x i16> @test_v4i16_f64_helper(double %p)
+define void @test_v4i16_f64(double* %p, <4 x i16>* %q) {
+; SOFT: vadd.f64 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vadd.f64 d0
+    %1 = load double* %p
+    %2 = fadd double %1, %1
+    %3 = call <4 x i16> @test_v4i16_f64_helper(double %2)
+    %4 = add <4 x i16> %3, %3
+    store <4 x i16> %4, <4 x i16>* %q
+    ret void
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vrev64.16 [[REG]]
+; HARD: vrev64.16 {{d[0-9]+}}, d0
+}
+
+; CHECK-LABEL: test_v4i16_v1i64:
+declare <4 x i16> @test_v4i16_v1i64_helper(<1 x i64> %p)
+define void @test_v4i16_v1i64(<1 x i64>* %p, <4 x i16>* %q) {
+; SOFT: vadd.i64 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vadd.i64 d0
+    %1 = load <1 x i64>* %p
+    %2 = add <1 x i64> %1, %1
+    %3 = call <4 x i16> @test_v4i16_v1i64_helper(<1 x i64> %2)
+    %4 = add <4 x i16> %3, %3
+    store <4 x i16> %4, <4 x i16>* %q
+    ret void
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vrev64.16 [[REG]]
+; HARD: vrev64.16 {{d[0-9]+}}, d0
+}
+
+; CHECK-LABEL: test_v4i16_v2f32:
+declare <4 x i16> @test_v4i16_v2f32_helper(<2 x float> %p)
+define void @test_v4i16_v2f32(<2 x float>* %p, <4 x i16>* %q) {
+; HARD: vadd.f32 [[REG:d[0-9]+]]
+; HARD: vrev64.32 d0, [[REG]]
+; SOFT: vadd.f32 [[REG:d[0-9]+]]
+; SOFT: vrev64.32 [[REG]]
+; SOFT: vmov r1, r0, [[REG]]
+    %1 = load <2 x float>* %p
+    %2 = fadd <2 x float> %1, %1
+    %3 = call <4 x i16> @test_v4i16_v2f32_helper(<2 x float> %2)
+    %4 = add <4 x i16> %3, %3
+    store <4 x i16> %4, <4 x i16>* %q
+    ret void
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vrev64.16 [[REG]]
+; HARD: vrev64.16 {{d[0-9]+}}, d0
+}
+
+; CHECK-LABEL: test_v4i16_v2i32:
+declare <4 x i16> @test_v4i16_v2i32_helper(<2 x i32> %p)
+define void @test_v4i16_v2i32(<2 x i32>* %p, <4 x i16>* %q) {
+; HARD: vadd.i32 [[REG:d[0-9]+]]
+; HARD: vrev64.32 d0, [[REG]]
+; SOFT: vadd.i32 [[REG:d[0-9]+]]
+; SOFT: vrev64.32 [[REG]]
+; SOFT: vmov r1, r0, [[REG]]
+    %1 = load <2 x i32>* %p
+    %2 = add <2 x i32> %1, %1
+    %3 = call <4 x i16> @test_v4i16_v2i32_helper(<2 x i32> %2)
+    %4 = add <4 x i16> %3, %3
+    store <4 x i16> %4, <4 x i16>* %q
+    ret void
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vrev64.16 [[REG]]
+; HARD: vrev64.16 {{d[0-9]+}}, d0
+}
+
+; CHECK-LABEL: test_v4i16_v8i8:
+declare <4 x i16> @test_v4i16_v8i8_helper(<8 x i8> %p)
+define void @test_v4i16_v8i8(<8 x i8>* %p, <4 x i16>* %q) {
+; SOFT: vrev64.8 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vrev64.8 d0
+    %1 = load <8 x i8>* %p
+    %2 = add <8 x i8> %1, %1
+    %3 = call <4 x i16> @test_v4i16_v8i8_helper(<8 x i8> %2)
+    %4 = add <4 x i16> %3, %3
+    store <4 x i16> %4, <4 x i16>* %q
+    ret void
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vrev64.16 [[REG]]
+; HARD: vrev64.16 {{d[0-9]+}}, d0
+}
+
+; CHECK-LABEL: test_v8i8_i64:
+declare <8 x i8> @test_v8i8_i64_helper(i64 %p)
+define void @test_v8i8_i64(i64* %p, <8 x i8>* %q) {
+; CHECK: adds r1
+; CHECK: adc r0
+    %1 = load i64* %p
+    %2 = add i64 %1, %1
+    %3 = call <8 x i8> @test_v8i8_i64_helper(i64 %2)
+    %4 = add <8 x i8> %3, %3
+    store <8 x i8> %4, <8 x i8>* %q
+    ret void
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vrev64.8 [[REG]]
+; HARD: vrev64.8 {{d[0-9]+}}, d0
+}
+
+; CHECK-LABEL: test_v8i8_f64:
+declare <8 x i8> @test_v8i8_f64_helper(double %p)
+define void @test_v8i8_f64(double* %p, <8 x i8>* %q) {
+; SOFT: vadd.f64 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vadd.f64 d0
+    %1 = load double* %p
+    %2 = fadd double %1, %1
+    %3 = call <8 x i8> @test_v8i8_f64_helper(double %2)
+    %4 = add <8 x i8> %3, %3
+    store <8 x i8> %4, <8 x i8>* %q
+    ret void
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vrev64.8 [[REG]]
+; HARD: vrev64.8 {{d[0-9]+}}, d0
+}
+
+; CHECK-LABEL: test_v8i8_v1i64:
+declare <8 x i8> @test_v8i8_v1i64_helper(<1 x i64> %p)
+define void @test_v8i8_v1i64(<1 x i64>* %p, <8 x i8>* %q) {
+; SOFT: vadd.i64 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vadd.i64 d0
+    %1 = load <1 x i64>* %p
+    %2 = add <1 x i64> %1, %1
+    %3 = call <8 x i8> @test_v8i8_v1i64_helper(<1 x i64> %2)
+    %4 = add <8 x i8> %3, %3
+    store <8 x i8> %4, <8 x i8>* %q
+    ret void
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vrev64.8 [[REG]]
+; HARD: vrev64.8 {{d[0-9]+}}, d0
+}
+
+; CHECK-LABEL: test_v8i8_v2f32:
+declare <8 x i8> @test_v8i8_v2f32_helper(<2 x float> %p)
+define void @test_v8i8_v2f32(<2 x float>* %p, <8 x i8>* %q) {
+; SOFT: vrev64.32 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vrev64.32 d0
+    %1 = load <2 x float>* %p
+    %2 = fadd <2 x float> %1, %1
+    %3 = call <8 x i8> @test_v8i8_v2f32_helper(<2 x float> %2)
+    %4 = add <8 x i8> %3, %3
+    store <8 x i8> %4, <8 x i8>* %q
+    ret void
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vrev64.8 [[REG]]
+; HARD: vrev64.8 {{d[0-9]+}}, d0
+}
+
+; CHECK-LABEL: test_v8i8_v2i32:
+declare <8 x i8> @test_v8i8_v2i32_helper(<2 x i32> %p)
+define void @test_v8i8_v2i32(<2 x i32>* %p, <8 x i8>* %q) {
+; SOFT: vrev64.32 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vrev64.32 d0
+    %1 = load <2 x i32>* %p
+    %2 = add <2 x i32> %1, %1
+    %3 = call <8 x i8> @test_v8i8_v2i32_helper(<2 x i32> %2)
+    %4 = add <8 x i8> %3, %3
+    store <8 x i8> %4, <8 x i8>* %q
+    ret void
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vrev64.8 [[REG]]
+; HARD: vrev64.8 {{d[0-9]+}}, d0
+}
+
+; CHECK-LABEL: test_v8i8_v4i16:
+declare <8 x i8> @test_v8i8_v4i16_helper(<4 x i16> %p)
+define void @test_v8i8_v4i16(<4 x i16>* %p, <8 x i8>* %q) {
+; SOFT: vrev64.16 [[REG:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG]]
+; HARD: vrev64.16 d0
+    %1 = load <4 x i16>* %p
+    %2 = add <4 x i16> %1, %1
+    %3 = call <8 x i8> @test_v8i8_v4i16_helper(<4 x i16> %2)
+    %4 = add <8 x i8> %3, %3
+    store <8 x i8> %4, <8 x i8>* %q
+    ret void
+; SOFT: vmov [[REG:d[0-9]+]], r1, r0
+; SOFT: vrev64.8 [[REG]]
+; HARD: vrev64.8 {{d[0-9]+}}, d0
+}
+
+; CHECK-LABEL: test_f128_v2f64:
+declare fp128 @test_f128_v2f64_helper(<2 x double> %p)
+define void @test_f128_v2f64(<2 x double>* %p, fp128* %q) {
+; SOFT: vadd.f64 [[REG2:d[0-9]+]]
+; SOFT: vadd.f64 [[REG1:d[0-9]+]]
+; SOFT: vmov r1, r0, [[REG1]]
+; SOFT: vmov r3, r2, [[REG2]]
+; HARD: vadd.f64 d1
+; HARD: vadd.f64 d0
+    %1 = load <2 x double>* %p
+    %2 = fadd <2 x double> %1, %1
+    %3 = call fp128 @test_f128_v2f64_helper(<2 x double> %2)
+    %4 = fadd fp128 %3, %3
+    store fp128 %4, fp128* %q
+    ret void
+; CHECK: stm sp, {r0, r1, r2, r3}
+}
+
+; CHECK-LABEL: test_f128_v2i64:
+declare fp128 @test_f128_v2i64_helper(<2 x i64> %p)
+define void @test_f128_v2i64(<2 x i64>* %p, fp128* %q) {
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vadd.i64 q0
+    %1 = load <2 x i64>* %p
+    %2 = add <2 x i64> %1, %1
+    %3 = call fp128 @test_f128_v2i64_helper(<2 x i64> %2)
+    %4 = fadd fp128 %3, %3
+    store fp128 %4, fp128* %q
+    ret void
+; CHECK: stm sp, {r0, r1, r2, r3}
+}
+
+; CHECK-LABEL: test_f128_v4f32:
+declare fp128 @test_f128_v4f32_helper(<4 x float> %p)
+define void @test_f128_v4f32(<4 x float>* %p, fp128* %q) {
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vrev64.32 q0
+    %1 = load <4 x float>* %p
+    %2 = fadd <4 x float> %1, %1
+    %3 = call fp128 @test_f128_v4f32_helper(<4 x float> %2)
+    %4 = fadd fp128 %3, %3
+    store fp128 %4, fp128* %q
+    ret void
+; CHECK: stm sp, {r0, r1, r2, r3}
+}
+
+; CHECK-LABEL: test_f128_v4i32:
+declare fp128 @test_f128_v4i32_helper(<4 x i32> %p)
+define void @test_f128_v4i32(<4 x i32>* %p, fp128* %q) {
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vrev64.32 q0
+    %1 = load <4 x i32>* %p
+    %2 = add <4 x i32> %1, %1
+    %3 = call fp128 @test_f128_v4i32_helper(<4 x i32> %2)
+    %4 = fadd fp128 %3, %3
+    store fp128 %4, fp128* %q
+    ret void
+; CHECK: stm sp, {r0, r1, r2, r3}
+}
+
+; CHECK-LABEL: test_f128_v8i16:
+declare fp128 @test_f128_v8i16_helper(<8 x i16> %p)
+define void @test_f128_v8i16(<8 x i16>* %p, fp128* %q) {
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vrev64.16 q0
+    %1 = load <8 x i16>* %p
+    %2 = add <8 x i16> %1, %1
+    %3 = call fp128 @test_f128_v8i16_helper(<8 x i16> %2)
+    %4 = fadd fp128 %3, %3
+    store fp128 %4, fp128* %q
+    ret void
+; CHECK: stm sp, {r0, r1, r2, r3}
+}
+
+; CHECK-LABEL: test_f128_v16i8:
+declare fp128 @test_f128_v16i8_helper(<16 x i8> %p)
+define void @test_f128_v16i8(<16 x i8>* %p, fp128* %q) {
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vrev64.8 q0
+    %1 = load <16 x i8>* %p
+    %2 = add <16 x i8> %1, %1
+    %3 = call fp128 @test_f128_v16i8_helper(<16 x i8> %2)
+    %4 = fadd fp128 %3, %3
+    store fp128 %4, fp128* %q
+    ret void
+; CHECK: stm sp, {r0, r1, r2, r3}
+}
+
+; CHECK-LABEL: test_v2f64_f128:
+declare <2 x double> @test_v2f64_f128_helper(fp128 %p)
+define void @test_v2f64_f128(fp128* %p, <2 x double>* %q) {
+    %1 = load fp128* %p
+    %2 = fadd fp128 %1, %1
+    %3 = call <2 x double> @test_v2f64_f128_helper(fp128 %2)
+    %4 = fadd <2 x double> %3, %3
+    store <2 x double> %4, <2 x double>* %q
+    ret void
+; SOFT: vmov {{d[0-9]+}}, r3, r2
+; SOFT: vmov {{d[0-9]+}}, r1, r0
+
+}
+
+; CHECK-LABEL: test_v2f64_v2i64:
+declare <2 x double> @test_v2f64_v2i64_helper(<2 x i64> %p)
+define void @test_v2f64_v2i64(<2 x i64>* %p, <2 x double>* %q) {
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vadd.i64 q0
+    %1 = load <2 x i64>* %p
+    %2 = add <2 x i64> %1, %1
+    %3 = call <2 x double> @test_v2f64_v2i64_helper(<2 x i64> %2)
+    %4 = fadd <2 x double> %3, %3
+    store <2 x double> %4, <2 x double>* %q
+    ret void
+; SOFT: vmov {{d[0-9]+}}, r3, r2
+; SOFT: vmov {{d[0-9]+}}, r1, r0
+}
+
+; CHECK-LABEL: test_v2f64_v4f32:
+declare <2 x double> @test_v2f64_v4f32_helper(<4 x float> %p)
+define void @test_v2f64_v4f32(<4 x float>* %p, <2 x double>* %q) {
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vrev64.32 q0
+    %1 = load <4 x float>* %p
+    %2 = fadd <4 x float> %1, %1
+    %3 = call <2 x double> @test_v2f64_v4f32_helper(<4 x float> %2)
+    %4 = fadd <2 x double> %3, %3
+    store <2 x double> %4, <2 x double>* %q
+    ret void
+; SOFT: vmov {{d[0-9]+}}, r3, r2
+; SOFT: vmov {{d[0-9]+}}, r1, r0
+}
+
+; CHECK-LABEL: test_v2f64_v4i32:
+declare <2 x double> @test_v2f64_v4i32_helper(<4 x i32> %p)
+define void @test_v2f64_v4i32(<4 x i32>* %p, <2 x double>* %q) {
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vrev64.32 q0
+    %1 = load <4 x i32>* %p
+    %2 = add <4 x i32> %1, %1
+    %3 = call <2 x double> @test_v2f64_v4i32_helper(<4 x i32> %2)
+    %4 = fadd <2 x double> %3, %3
+    store <2 x double> %4, <2 x double>* %q
+    ret void
+; SOFT: vmov {{d[0-9]+}}, r3, r2
+; SOFT: vmov {{d[0-9]+}}, r1, r0
+}
+
+; CHECK-LABEL: test_v2f64_v8i16:
+declare <2 x double> @test_v2f64_v8i16_helper(<8 x i16> %p)
+define void @test_v2f64_v8i16(<8 x i16>* %p, <2 x double>* %q) {
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vrev64.16 q0
+    %1 = load <8 x i16>* %p
+    %2 = add <8 x i16> %1, %1
+    %3 = call <2 x double> @test_v2f64_v8i16_helper(<8 x i16> %2)
+    %4 = fadd <2 x double> %3, %3
+    store <2 x double> %4, <2 x double>* %q
+    ret void
+; SOFT: vmov {{d[0-9]+}}, r3, r2
+; SOFT: vmov {{d[0-9]+}}, r1, r0
+}
+
+; CHECK-LABEL: test_v2f64_v16i8:
+declare <2 x double> @test_v2f64_v16i8_helper(<16 x i8> %p)
+define void @test_v2f64_v16i8(<16 x i8>* %p, <2 x double>* %q) {
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vrev64.8 q0
+    %1 = load <16 x i8>* %p
+    %2 = add <16 x i8> %1, %1
+    %3 = call <2 x double> @test_v2f64_v16i8_helper(<16 x i8> %2)
+    %4 = fadd <2 x double> %3, %3
+    store <2 x double> %4, <2 x double>* %q
+    ret void
+; SOFT: vmov {{d[0-9]+}}, r3, r2
+; SOFT: vmov {{d[0-9]+}}, r1, r0
+}
+
+; CHECK-LABEL: test_v2i64_f128:
+declare <2 x i64> @test_v2i64_f128_helper(fp128 %p)
+define void @test_v2i64_f128(fp128* %p, <2 x i64>* %q) {
+    %1 = load fp128* %p
+    %2 = fadd fp128 %1, %1
+    %3 = call <2 x i64> @test_v2i64_f128_helper(fp128 %2)
+    %4 = add <2 x i64> %3, %3
+    store <2 x i64> %4, <2 x i64>* %q
+    ret void
+; SOFT: vmov {{d[0-9]+}}, r3, r2
+; SOFT: vmov {{d[0-9]+}}, r1, r0
+}
+
+; CHECK-LABEL: test_v2i64_v2f64:
+declare <2 x i64> @test_v2i64_v2f64_helper(<2 x double> %p)
+define void @test_v2i64_v2f64(<2 x double>* %p, <2 x i64>* %q) {
+; SOFT: vmov r1, r0, [[REG1]]
+; SOFT: vmov r3, r2, [[REG2]]
+; HARD: vadd.f64 d1
+; HARD: vadd.f64 d0
+    %1 = load <2 x double>* %p
+    %2 = fadd <2 x double> %1, %1
+    %3 = call <2 x i64> @test_v2i64_v2f64_helper(<2 x double> %2)
+    %4 = add <2 x i64> %3, %3
+    store <2 x i64> %4, <2 x i64>* %q
+    ret void
+; SOFT: vmov {{d[0-9]+}}, r3, r2
+; SOFT: vmov {{d[0-9]+}}, r1, r0
+}
+
+; CHECK-LABEL: test_v2i64_v4f32:
+declare <2 x i64> @test_v2i64_v4f32_helper(<4 x float> %p) 
+define void @test_v2i64_v4f32(<4 x float>* %p, <2 x i64>* %q) {
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vrev64.32 q0
+    %1 = load <4 x float>* %p
+    %2 = fadd <4 x float> %1, %1
+    %3 = call <2 x i64> @test_v2i64_v4f32_helper(<4 x float> %2)
+    %4 = add <2 x i64> %3, %3
+    store <2 x i64> %4, <2 x i64>* %q
+    ret void
+; SOFT: vmov {{d[0-9]+}}, r3, r2
+; SOFT: vmov {{d[0-9]+}}, r1, r0
+}
+
+; CHECK-LABEL: test_v2i64_v4i32:
+declare <2 x i64> @test_v2i64_v4i32_helper(<4 x i32> %p)
+define void @test_v2i64_v4i32(<4 x i32>* %p, <2 x i64>* %q) {
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vrev64.32 q0
+    %1 = load <4 x i32>* %p
+    %2 = add <4 x i32> %1, %1
+    %3 = call <2 x i64> @test_v2i64_v4i32_helper(<4 x i32> %2)
+    %4 = add <2 x i64> %3, %3
+    store <2 x i64> %4, <2 x i64>* %q
+    ret void
+; SOFT: vmov {{d[0-9]+}}, r3, r2
+; SOFT: vmov {{d[0-9]+}}, r1, r0
+}
+
+; CHECK-LABEL: test_v2i64_v8i16:
+declare <2 x i64> @test_v2i64_v8i16_helper(<8 x i16> %p)
+define void @test_v2i64_v8i16(<8 x i16>* %p, <2 x i64>* %q) {
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vrev64.16 q0
+    %1 = load <8 x i16>* %p
+    %2 = add <8 x i16> %1, %1
+    %3 = call <2 x i64> @test_v2i64_v8i16_helper(<8 x i16> %2)
+    %4 = add <2 x i64> %3, %3
+    store <2 x i64> %4, <2 x i64>* %q
+    ret void
+; SOFT: vmov {{d[0-9]+}}, r3, r2
+; SOFT: vmov {{d[0-9]+}}, r1, r0
+}
+
+; CHECK-LABEL: test_v2i64_v16i8:
+declare <2 x i64> @test_v2i64_v16i8_helper(<16 x i8> %p)
+define void @test_v2i64_v16i8(<16 x i8>* %p, <2 x i64>* %q) {
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vrev64.8 q0
+    %1 = load <16 x i8>* %p
+    %2 = add <16 x i8> %1, %1
+    %3 = call <2 x i64> @test_v2i64_v16i8_helper(<16 x i8> %2)
+    %4 = add <2 x i64> %3, %3
+    store <2 x i64> %4, <2 x i64>* %q
+    ret void
+; SOFT: vmov {{d[0-9]+}}, r3, r2
+; SOFT: vmov {{d[0-9]+}}, r1, r0
+}
+
+; CHECK-LABEL: test_v4f32_f128:
+declare <4 x float> @test_v4f32_f128_helper(fp128 %p)
+define void @test_v4f32_f128(fp128* %p, <4 x float>* %q) {
+    %1 = load fp128* %p
+    %2 = fadd fp128 %1, %1
+    %3 = call <4 x float> @test_v4f32_f128_helper(fp128 %2)
+    %4 = fadd <4 x float> %3, %3
+    store <4 x float> %4, <4 x float>* %q
+    ret void
+; SOFT: vmov {{d[0-9]+}}, r3, r2
+; SOFT: vmov {{d[0-9]+}}, r1, r0
+}
+
+; CHECK-LABEL: test_v4f32_v2f64:
+declare <4 x float> @test_v4f32_v2f64_helper(<2 x double> %p)
+define void @test_v4f32_v2f64(<2 x double>* %p, <4 x float>* %q) {
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vadd.f64  d1
+; HARD: vadd.f64  d0
+    %1 = load <2 x double>* %p
+    %2 = fadd <2 x double> %1, %1
+    %3 = call <4 x float> @test_v4f32_v2f64_helper(<2 x double> %2)
+    %4 = fadd <4 x float> %3, %3
+    store <4 x float> %4, <4 x float>* %q
+    ret void
+; SOFT: vmov {{d[0-9]+}}, r3, r2
+; SOFT: vmov {{d[0-9]+}}, r1, r0
+}
+
+; CHECK-LABEL: test_v4f32_v2i64:
+declare <4 x float> @test_v4f32_v2i64_helper(<2 x i64> %p)
+define void @test_v4f32_v2i64(<2 x i64>* %p, <4 x float>* %q) {
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vadd.i64 q0
+    %1 = load <2 x i64>* %p
+    %2 = add <2 x i64> %1, %1
+    %3 = call <4 x float> @test_v4f32_v2i64_helper(<2 x i64> %2)
+    %4 = fadd <4 x float> %3, %3
+    store <4 x float> %4, <4 x float>* %q
+    ret void
+; SOFT: vmov {{d[0-9]+}}, r3, r2
+; SOFT: vmov {{d[0-9]+}}, r1, r0
+}
+
+; CHECK-LABEL: test_v4f32_v4i32:
+declare <4 x float> @test_v4f32_v4i32_helper(<4 x i32> %p)
+define void @test_v4f32_v4i32(<4 x i32>* %p, <4 x float>* %q) {
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vrev64.32 q0
+    %1 = load <4 x i32>* %p
+    %2 = add <4 x i32> %1, %1
+    %3 = call <4 x float> @test_v4f32_v4i32_helper(<4 x i32> %2)
+    %4 = fadd <4 x float> %3, %3
+    store <4 x float> %4, <4 x float>* %q
+    ret void
+; SOFT: vmov {{d[0-9]+}}, r3, r2
+; SOFT: vmov {{d[0-9]+}}, r1, r0
+}
+
+; CHECK-LABEL: test_v4f32_v8i16:
+declare <4 x float> @test_v4f32_v8i16_helper(<8 x i16> %p)
+define void @test_v4f32_v8i16(<8 x i16>* %p, <4 x float>* %q) {
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vrev64.16 q0
+    %1 = load <8 x i16>* %p
+    %2 = add <8 x i16> %1, %1
+    %3 = call <4 x float> @test_v4f32_v8i16_helper(<8 x i16> %2)
+    %4 = fadd <4 x float> %3, %3
+    store <4 x float> %4, <4 x float>* %q
+    ret void
+; SOFT: vmov {{d[0-9]+}}, r3, r2
+; SOFT: vmov {{d[0-9]+}}, r1, r0
+}
+
+; CHECK-LABEL: test_v4f32_v16i8:
+declare <4 x float> @test_v4f32_v16i8_helper(<16 x i8> %p)
+define void @test_v4f32_v16i8(<16 x i8>* %p, <4 x float>* %q) {
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vrev64.8 q0
+    %1 = load <16 x i8>* %p
+    %2 = add <16 x i8> %1, %1
+    %3 = call <4 x float> @test_v4f32_v16i8_helper(<16 x i8> %2)
+    %4 = fadd <4 x float> %3, %3
+    store <4 x float> %4, <4 x float>* %q
+    ret void
+; SOFT: vmov {{d[0-9]+}}, r3, r2
+; SOFT: vmov {{d[0-9]+}}, r1, r0
+}
+
+; CHECK-LABEL: test_v4i32_f128:
+declare <4 x i32> @test_v4i32_f128_helper(fp128 %p)
+define void @test_v4i32_f128(fp128* %p, <4 x i32>* %q) {
+    %1 = load fp128* %p
+    %2 = fadd fp128 %1, %1
+    %3 = call <4 x i32> @test_v4i32_f128_helper(fp128 %2)
+    %4 = add <4 x i32> %3, %3
+    store <4 x i32> %4, <4 x i32>* %q
+    ret void
+; SOFT: vmov {{d[0-9]+}}, r3, r2
+; SOFT: vmov {{d[0-9]+}}, r1, r0
+}
+
+; CHECK-LABEL: test_v4i32_v2f64:
+declare <4 x i32> @test_v4i32_v2f64_helper(<2 x double> %p)
+define void @test_v4i32_v2f64(<2 x double>* %p, <4 x i32>* %q) {
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vadd.f64 d1
+; HARD: vadd.f64 d0
+    %1 = load <2 x double>* %p
+    %2 = fadd <2 x double> %1, %1
+    %3 = call <4 x i32> @test_v4i32_v2f64_helper(<2 x double> %2)
+    %4 = add <4 x i32> %3, %3
+    store <4 x i32> %4, <4 x i32>* %q
+    ret void
+; SOFT: vmov {{d[0-9]+}}, r3, r2
+; SOFT: vmov {{d[0-9]+}}, r1, r0
+}
+
+; CHECK-LABEL: test_v4i32_v2i64:
+declare <4 x i32> @test_v4i32_v2i64_helper(<2 x i64> %p)
+define void @test_v4i32_v2i64(<2 x i64>* %p, <4 x i32>* %q) {
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vadd.i64 q0
+    %1 = load <2 x i64>* %p
+    %2 = add <2 x i64> %1, %1
+    %3 = call <4 x i32> @test_v4i32_v2i64_helper(<2 x i64> %2)
+    %4 = add <4 x i32> %3, %3
+    store <4 x i32> %4, <4 x i32>* %q
+    ret void
+; SOFT: vmov {{d[0-9]+}}, r3, r2
+; SOFT: vmov {{d[0-9]+}}, r1, r0
+}
+
+; CHECK-LABEL: test_v4i32_v4f32:
+declare <4 x i32> @test_v4i32_v4f32_helper(<4 x float> %p)
+define void @test_v4i32_v4f32(<4 x float>* %p, <4 x i32>* %q) {
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vrev64.32 q0
+    %1 = load <4 x float>* %p
+    %2 = fadd <4 x float> %1, %1
+    %3 = call <4 x i32> @test_v4i32_v4f32_helper(<4 x float> %2)
+    %4 = add <4 x i32> %3, %3
+    store <4 x i32> %4, <4 x i32>* %q
+    ret void
+; SOFT: vmov {{d[0-9]+}}, r3, r2
+; SOFT: vmov {{d[0-9]+}}, r1, r0
+}
+
+; CHECK-LABEL: test_v4i32_v8i16:
+declare <4 x i32> @test_v4i32_v8i16_helper(<8 x i16> %p)
+define void @test_v4i32_v8i16(<8 x i16>* %p, <4 x i32>* %q) {
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vrev64.16 q0
+    %1 = load <8 x i16>* %p
+    %2 = add <8 x i16> %1, %1
+    %3 = call <4 x i32> @test_v4i32_v8i16_helper(<8 x i16> %2)
+    %4 = add <4 x i32> %3, %3
+    store <4 x i32> %4, <4 x i32>* %q
+    ret void
+; SOFT: vmov {{d[0-9]+}}, r3, r2
+; SOFT: vmov {{d[0-9]+}}, r1, r0
+}
+
+; CHECK-LABEL: test_v4i32_v16i8:
+declare <4 x i32> @test_v4i32_v16i8_helper(<16 x i8> %p)
+define void @test_v4i32_v16i8(<16 x i8>* %p, <4 x i32>* %q) {
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vrev64.8 q0
+    %1 = load <16 x i8>* %p
+    %2 = add <16 x i8> %1, %1
+    %3 = call <4 x i32> @test_v4i32_v16i8_helper(<16 x i8> %2)
+    %4 = add <4 x i32> %3, %3
+    store <4 x i32> %4, <4 x i32>* %q
+    ret void
+; SOFT: vmov {{d[0-9]+}}, r3, r2
+; SOFT: vmov {{d[0-9]+}}, r1, r0
+}
+
+; CHECK-LABEL: test_v8i16_f128:
+declare <8 x i16> @test_v8i16_f128_helper(fp128 %p)
+define void @test_v8i16_f128(fp128* %p, <8 x i16>* %q) {
+    %1 = load fp128* %p
+    %2 = fadd fp128 %1, %1
+    %3 = call <8 x i16> @test_v8i16_f128_helper(fp128 %2)
+    %4 = add <8 x i16> %3, %3
+    store <8 x i16> %4, <8 x i16>* %q
+    ret void
+; SOFT: vmov {{d[0-9]+}}, r3, r2
+; SOFT: vmov {{d[0-9]+}}, r1, r0
+}
+
+; CHECK-LABEL: test_v8i16_v2f64:
+declare <8 x i16> @test_v8i16_v2f64_helper(<2 x double> %p)
+define void @test_v8i16_v2f64(<2 x double>* %p, <8 x i16>* %q) {
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vadd.f64 d1
+; HARD: vadd.f64 d0
+    %1 = load <2 x double>* %p
+    %2 = fadd <2 x double> %1, %1
+    %3 = call <8 x i16> @test_v8i16_v2f64_helper(<2 x double> %2)
+    %4 = add <8 x i16> %3, %3
+    store <8 x i16> %4, <8 x i16>* %q
+    ret void
+; SOFT: vmov {{d[0-9]+}}, r3, r2
+; SOFT: vmov {{d[0-9]+}}, r1, r0
+}
+
+; CHECK-LABEL: test_v8i16_v2i64:
+declare <8 x i16> @test_v8i16_v2i64_helper(<2 x i64> %p)
+define void @test_v8i16_v2i64(<2 x i64>* %p, <8 x i16>* %q) {
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vadd.i64 q0
+    %1 = load <2 x i64>* %p
+    %2 = add <2 x i64> %1, %1
+    %3 = call <8 x i16> @test_v8i16_v2i64_helper(<2 x i64> %2)
+    %4 = add <8 x i16> %3, %3
+    store <8 x i16> %4, <8 x i16>* %q
+    ret void
+; SOFT: vmov {{d[0-9]+}}, r3, r2
+; SOFT: vmov {{d[0-9]+}}, r1, r0
+}
+
+; CHECK-LABEL: test_v8i16_v4f32:
+declare <8 x i16> @test_v8i16_v4f32_helper(<4 x float> %p)
+define void @test_v8i16_v4f32(<4 x float>* %p, <8 x i16>* %q) {
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vrev64.32 q0
+    %1 = load <4 x float>* %p
+    %2 = fadd <4 x float> %1, %1
+    %3 = call <8 x i16> @test_v8i16_v4f32_helper(<4 x float> %2)
+    %4 = add <8 x i16> %3, %3
+    store <8 x i16> %4, <8 x i16>* %q
+    ret void
+; SOFT: vmov {{d[0-9]+}}, r3, r2
+; SOFT: vmov {{d[0-9]+}}, r1, r0
+}
+
+; CHECK-LABEL: test_v8i16_v4i32:
+declare <8 x i16> @test_v8i16_v4i32_helper(<4 x i32> %p)
+define void @test_v8i16_v4i32(<4 x i32>* %p, <8 x i16>* %q) {
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vrev64.32 q0
+    %1 = load <4 x i32>* %p
+    %2 = add <4 x i32> %1, %1
+    %3 = call <8 x i16> @test_v8i16_v4i32_helper(<4 x i32> %2)
+    %4 = add <8 x i16> %3, %3
+    store <8 x i16> %4, <8 x i16>* %q
+    ret void
+; SOFT: vmov {{d[0-9]+}}, r3, r2
+; SOFT: vmov {{d[0-9]+}}, r1, r0
+}
+
+; CHECK-LABEL: test_v8i16_v16i8:
+declare <8 x i16> @test_v8i16_v16i8_helper(<16 x i8> %p)
+define void @test_v8i16_v16i8(<16 x i8>* %p, <8 x i16>* %q) {
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vrev64.8 q0
+    %1 = load <16 x i8>* %p
+    %2 = add <16 x i8> %1, %1
+    %3 = call <8 x i16> @test_v8i16_v16i8_helper(<16 x i8> %2)
+    %4 = add <8 x i16> %3, %3
+    store <8 x i16> %4, <8 x i16>* %q
+    ret void
+; SOFT: vmov {{d[0-9]+}}, r3, r2
+; SOFT: vmov {{d[0-9]+}}, r1, r0
+}
+
+; CHECK-LABEL: test_v16i8_f128:
+declare <16 x i8> @test_v16i8_f128_helper(fp128 %p)
+define void @test_v16i8_f128(fp128* %p, <16 x i8>* %q) {
+    %1 = load fp128* %p
+    %2 = fadd fp128 %1, %1
+    %3 = call <16 x i8> @test_v16i8_f128_helper(fp128 %2)
+    %4 = add <16 x i8> %3, %3
+    store <16 x i8> %4, <16 x i8>* %q
+    ret void
+; SOFT: vmov {{d[0-9]+}}, r3, r2
+; SOFT: vmov {{d[0-9]+}}, r1, r0
+}
+
+; CHECK-LABEL: test_v16i8_v2f64:
+declare <16 x i8> @test_v16i8_v2f64_helper(<2 x double> %p)
+define void @test_v16i8_v2f64(<2 x double>* %p, <16 x i8>* %q) {
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vadd.f64 d1
+; HARD: vadd.f64 d0
+    %1 = load <2 x double>* %p
+    %2 = fadd <2 x double> %1, %1
+    %3 = call <16 x i8> @test_v16i8_v2f64_helper(<2 x double> %2)
+    %4 = add <16 x i8> %3, %3
+    store <16 x i8> %4, <16 x i8>* %q
+    ret void
+; SOFT: vmov {{d[0-9]+}}, r3, r2
+; SOFT: vmov {{d[0-9]+}}, r1, r0
+}
+
+; CHECK-LABEL: test_v16i8_v2i64:
+declare <16 x i8> @test_v16i8_v2i64_helper(<2 x i64> %p)
+define void @test_v16i8_v2i64(<2 x i64>* %p, <16 x i8>* %q) {
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vadd.i64 q0
+    %1 = load <2 x i64>* %p
+    %2 = add <2 x i64> %1, %1
+    %3 = call <16 x i8> @test_v16i8_v2i64_helper(<2 x i64> %2)
+    %4 = add <16 x i8> %3, %3
+    store <16 x i8> %4, <16 x i8>* %q
+    ret void
+; SOFT: vmov {{d[0-9]+}}, r3, r2
+; SOFT: vmov {{d[0-9]+}}, r1, r0
+}
+
+; CHECK-LABEL: test_v16i8_v4f32:
+declare <16 x i8> @test_v16i8_v4f32_helper(<4 x float> %p)
+define void @test_v16i8_v4f32(<4 x float>* %p, <16 x i8>* %q) {
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vrev64.32 q0
+    %1 = load <4 x float>* %p
+    %2 = fadd <4 x float> %1, %1
+    %3 = call <16 x i8> @test_v16i8_v4f32_helper(<4 x float> %2)
+    %4 = add <16 x i8> %3, %3
+    store <16 x i8> %4, <16 x i8>* %q
+    ret void
+; SOFT: vmov {{d[0-9]+}}, r3, r2
+; SOFT: vmov {{d[0-9]+}}, r1, r0
+}
+
+; CHECK-LABEL: test_v16i8_v4i32:
+declare <16 x i8> @test_v16i8_v4i32_helper(<4 x i32> %p)
+define void @test_v16i8_v4i32(<4 x i32>* %p, <16 x i8>* %q) {
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vrev64.32 q0
+    %1 = load <4 x i32>* %p
+    %2 = add <4 x i32> %1, %1
+    %3 = call <16 x i8> @test_v16i8_v4i32_helper(<4 x i32> %2)
+    %4 = add <16 x i8> %3, %3
+    store <16 x i8> %4, <16 x i8>* %q
+    ret void
+; SOFT: vmov {{d[0-9]+}}, r3, r2
+; SOFT: vmov {{d[0-9]+}}, r1, r0
+}
+
+; CHECK-LABEL: test_v16i8_v8i16:
+declare <16 x i8> @test_v16i8_v8i16_helper(<8 x i16> %p)
+define void @test_v16i8_v8i16(<8 x i16>* %p, <16 x i8>* %q) {
+; SOFT: vmov r1, r0
+; SOFT: vmov r3, r2
+; HARD: vrev64.16 q0
+    %1 = load <8 x i16>* %p
+    %2 = add <8 x i16> %1, %1
+    %3 = call <16 x i8> @test_v16i8_v8i16_helper(<8 x i16> %2)
+    %4 = add <16 x i8> %3, %3
+    store <16 x i8> %4, <16 x i8>* %q
+    ret void
+; SOFT: vmov {{d[0-9]+}}, r3, r2
+; SOFT: vmov {{d[0-9]+}}, r1, r0
+}
diff --git a/test/CodeGen/ARM/bswap16.ll b/test/CodeGen/ARM/bswap16.ll
new file mode 100644
index 0000000..70c62d2
--- /dev/null
+++ b/test/CodeGen/ARM/bswap16.ll
@@ -0,0 +1,42 @@
+; RUN: llc -mtriple=arm-darwin  -mattr=v6 < %s | FileCheck %s
+; RUN: llc -mtriple=thumb-darwin  -mattr=v6 < %s | FileCheck %s
+
+
+define void @test1(i16* nocapture %data) {
+entry:
+  %0 = load i16* %data, align 2
+  %1 = tail call i16 @llvm.bswap.i16(i16 %0)
+  store i16 %1, i16* %data, align 2
+  ret void
+
+  ; CHECK-LABEL: test1:
+  ; CHECK: ldrh r[[R1:[0-9]+]], [r0]
+  ; CHECK: rev16 r[[R1]], r[[R1]]
+  ; CHECK: strh r[[R1]], [r0]
+}
+
+
+define void @test2(i16* nocapture %data, i16 zeroext %in) {
+entry:
+  %0 = tail call i16 @llvm.bswap.i16(i16 %in)
+  store i16 %0, i16* %data, align 2
+  ret void
+
+  ; CHECK-LABEL: test2:
+  ; CHECK: rev16 r[[R1:[0-9]+]], r1
+  ; CHECK: strh r[[R1]], [r0]
+}
+
+
+define i16 @test3(i16* nocapture %data) {
+entry:
+  %0 = load i16* %data, align 2
+  %1 = tail call i16 @llvm.bswap.i16(i16 %0)
+  ret i16 %1
+
+  ; CHECK-LABEL: test3:
+  ; CHECK: ldrh r[[R0:[0-9]+]], [r0]
+  ; CHECK: rev16 r[[R0]], r0
+}
+
+declare i16 @llvm.bswap.i16(i16)
diff --git a/test/CodeGen/ARM/build-attributes.ll b/test/CodeGen/ARM/build-attributes.ll
index 3e825e8..d75d55d 100644
--- a/test/CodeGen/ARM/build-attributes.ll
+++ b/test/CodeGen/ARM/build-attributes.ll
@@ -33,6 +33,11 @@
 ; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=-vfp2,-vfp3,-vfp4,-neon | FileCheck %s --check-prefix=CORTEX-A7-NOFPU
 ; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=+vfp4,-neon | FileCheck %s --check-prefix=CORTEX-A7-FPUV4
 ; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=+vfp4,,+d16,-neon | FileCheck %s --check-prefix=CORTEX-A7-FPUV4
+; RUN: llc < %s -mtriple=arm-none-linux-gnueabi -relocation-model=pic | FileCheck %s --check-prefix=RELOC-PIC
+; RUN: llc < %s -mtriple=arm-none-linux-gnueabi -relocation-model=static | FileCheck %s --check-prefix=RELOC-OTHER
+; RUN: llc < %s -mtriple=arm-none-linux-gnueabi -relocation-model=default | FileCheck %s --check-prefix=RELOC-OTHER
+; RUN: llc < %s -mtriple=arm-none-linux-gnueabi -relocation-model=dynamic-no-pic | FileCheck %s --check-prefix=RELOC-OTHER
+; RUN: llc < %s -mtriple=arm-none-linux-gnueabi | FileCheck %s --check-prefix=RELOC-OTHER
 
 ; XSCALE:      .eabi_attribute 6, 5
 ; XSCALE:      .eabi_attribute 8, 1
@@ -453,6 +458,11 @@
 ; CORTEX-A57-NOT:  .eabi_attribute 44
 ; CORTEX-A57:  .eabi_attribute 68, 3
 
+; RELOC-PIC:  .eabi_attribute 15, 1
+; RELOC-PIC:  .eabi_attribute 16, 1
+; RELOC-PIC:  .eabi_attribute 17, 2
+; RELOC-OTHER:  .eabi_attribute 17, 1
+
 define i32 @f(i64 %z) {
 	ret i32 0
 }
diff --git a/test/CodeGen/ARM/dagcombine-concatvector.ll b/test/CodeGen/ARM/dagcombine-concatvector.ll
index 2927ea2..62ed87f 100644
--- a/test/CodeGen/ARM/dagcombine-concatvector.ll
+++ b/test/CodeGen/ARM/dagcombine-concatvector.ll
@@ -1,11 +1,14 @@
-; RUN: llc < %s -mtriple=thumbv7s-apple-ios3.0.0 -mcpu=generic | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7s-apple-ios3.0.0 -mcpu=generic | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-LE
+; RUN: llc < %s -mtriple=thumbeb -mattr=v7,neon | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-BE
 
 ; PR15525
 ; CHECK-LABEL: test1:
 ; CHECK: ldr.w	[[REG:r[0-9]+]], [sp]
-; CHECK-NEXT: vmov	{{d[0-9]+}}, r1, r2
-; CHECK-NEXT: vmov	{{d[0-9]+}}, r3, [[REG]]
-; CHECK-NEXT: vst1.8	{{{d[0-9]+}}, {{d[0-9]+}}}, [r0]
+; CHECK-LE-NEXT: vmov	{{d[0-9]+}}, r1, r2
+; CHECK-LE-NEXT: vmov	{{d[0-9]+}}, r3, [[REG]]
+; CHECK-BE-NEXT: vmov	{{d[0-9]+}}, r2, r1
+; CHECK-BE: vmov	{{d[0-9]+}}, [[REG]], r3
+; CHECK: vst1.8	{{{d[0-9]+}}, {{d[0-9]+}}}, [r0]
 ; CHECK-NEXT: bx	lr
 define void @test1(i8* %arg, [4 x i64] %vec.coerce) {
 bb:
diff --git a/test/CodeGen/ARM/debug-frame-vararg.ll b/test/CodeGen/ARM/debug-frame-vararg.ll
index 9b39525..42ff82d 100644
--- a/test/CodeGen/ARM/debug-frame-vararg.ll
+++ b/test/CodeGen/ARM/debug-frame-vararg.ll
@@ -75,12 +75,13 @@
 ; CHECK-FP-ELIM: .cfi_startproc
 ; CHECK-FP-ELIM: sub    sp, sp, #16
 ; CHECK-FP-ELIM: .cfi_def_cfa_offset 16
-; CHECK-FP-ELIM: push   {r4, r11, lr}
-; CHECK-FP-ELIM: .cfi_def_cfa_offset 28
+; CHECK-FP-ELIM: push   {r4, r10, r11, lr}
+; CHECK-FP-ELIM: .cfi_def_cfa_offset 32
 ; CHECK-FP-ELIM: .cfi_offset lr, -20
 ; CHECK-FP-ELIM: .cfi_offset r11, -24
-; CHECK-FP-ELIM: .cfi_offset r4, -28
-; CHECK-FP-ELIM: add    r11, sp, #4
+; CHECK-FP-ELIM: .cfi_offset r10, -28
+; CHECK-FP-ELIM: .cfi_offset r4, -32
+; CHECK-FP-ELIM: add    r11, sp, #8
 ; CHECK-FP-ELIM: .cfi_def_cfa r11, 24
 
 ; CHECK-THUMB-FP-LABEL: sum
diff --git a/test/CodeGen/ARM/debug-frame.ll b/test/CodeGen/ARM/debug-frame.ll
index cf68767..cb54aa8 100644
--- a/test/CodeGen/ARM/debug-frame.ll
+++ b/test/CodeGen/ARM/debug-frame.ll
@@ -201,12 +201,13 @@ declare void @_ZSt9terminatev()
 
 ; CHECK-V7-FP-LABEL: _Z4testiiiiiddddd:
 ; CHECK-V7-FP:   .cfi_startproc
-; CHECK-V7-FP:   push   {r4, r11, lr}
-; CHECK-V7-FP:   .cfi_def_cfa_offset 12
+; CHECK-V7-FP:   push   {r4, r10, r11, lr}
+; CHECK-V7-FP:   .cfi_def_cfa_offset 16
 ; CHECK-V7-FP:   .cfi_offset lr, -4
 ; CHECK-V7-FP:   .cfi_offset r11, -8
-; CHECK-V7-FP:   .cfi_offset r4, -12
-; CHECK-V7-FP:   add    r11, sp, #4
+; CHECK-V7-FP:   .cfi_offset r10, -12
+; CHECK-V7-FP:   .cfi_offset r4, -16
+; CHECK-V7-FP:   add    r11, sp, #8
 ; CHECK-V7-FP:   .cfi_def_cfa r11, 8
 ; CHECK-V7-FP:   vpush  {d8, d9, d10, d11, d12}
 ; CHECK-V7-FP:   .cfi_offset d12, -24
@@ -214,7 +215,7 @@ declare void @_ZSt9terminatev()
 ; CHECK-V7-FP:   .cfi_offset d10, -40
 ; CHECK-V7-FP:   .cfi_offset d9, -48
 ; CHECK-V7-FP:   .cfi_offset d8, -56
-; CHECK-V7-FP:   sub    sp, sp, #28
+; CHECK-V7-FP:   sub    sp, sp, #24
 ; CHECK-V7-FP:   .cfi_endproc
 
 ; CHECK-V7-FP-ELIM-LABEL: _Z4testiiiiiddddd:
diff --git a/test/CodeGen/ARM/debug-segmented-stacks.ll b/test/CodeGen/ARM/debug-segmented-stacks.ll
index b0dc467..e866b4e 100644
--- a/test/CodeGen/ARM/debug-segmented-stacks.ll
+++ b/test/CodeGen/ARM/debug-segmented-stacks.ll
@@ -1,11 +1,11 @@
-; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -segmented-stacks -verify-machineinstrs -filetype=asm | FileCheck %s -check-prefix=ARM-linux
-; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -segmented-stacks -filetype=obj
+; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -verify-machineinstrs -filetype=asm | FileCheck %s -check-prefix=ARM-linux
+; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -filetype=obj
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!9, !10}
 !llvm.ident = !{!11}
 
-define void @test_basic() {
+define void @test_basic() #0 {
         %mem = alloca i32, i32 10
         call void @dummy_use (i32* %mem, i32 10)
 	ret void
@@ -78,3 +78,5 @@ define void @test_basic() {
 
 ; Just to prevent the alloca from being optimized away
 declare void @dummy_use(i32*, i32)
+
+attributes #0 = { "split-stack" }
diff --git a/test/CodeGen/ARM/dwarf-eh.ll b/test/CodeGen/ARM/dwarf-eh.ll
new file mode 100644
index 0000000..0b8a072
--- /dev/null
+++ b/test/CodeGen/ARM/dwarf-eh.ll
@@ -0,0 +1,71 @@
+; RUN: llc -mtriple=arm-netbsd-eabi -o - -filetype=asm %s | \
+; RUN: FileCheck %s
+; RUN: llc -mtriple=arm-netbsd-eabi -o - -filetype=asm %s \
+; RUN: -relocation-model=pic | FileCheck -check-prefix=CHECK-PIC %s
+
+; ModuleID = 'test.cc'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv5e--netbsd-eabi"
+
+%struct.exception = type { i8 }
+
+@_ZTVN10__cxxabiv117__class_type_infoE = external global i8*
+@_ZTS9exception = linkonce_odr constant [11 x i8] c"9exception\00"
+@_ZTI9exception = linkonce_odr unnamed_addr constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8** @_ZTVN10__cxxabiv117__class_type_infoE, i32 2) to i8*), i8* getelementptr inbounds ([11 x i8]* @_ZTS9exception, i32 0, i32 0) }
+
+define void @f() uwtable {
+  %1 = alloca i8*
+  %2 = alloca i32
+  %e = alloca %struct.exception*, align 4
+  invoke void @g()
+          to label %3 unwind label %4
+
+  br label %16
+
+  %5 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* bitcast ({ i8*, i8* }* @_ZTI9exception to i8*)
+  %6 = extractvalue { i8*, i32 } %5, 0
+  store i8* %6, i8** %1
+  %7 = extractvalue { i8*, i32 } %5, 1
+  store i32 %7, i32* %2
+  br label %8
+
+  %9 = load i32* %2
+  %10 = call i32 @llvm.eh.typeid.for(i8* bitcast ({ i8*, i8* }* @_ZTI9exception to i8*)) nounwind
+  %11 = icmp eq i32 %9, %10
+  br i1 %11, label %12, label %17
+
+  %13 = load i8** %1
+  %14 = call i8* @__cxa_begin_catch(i8* %13) #3
+  %15 = bitcast i8* %14 to %struct.exception*
+  store %struct.exception* %15, %struct.exception** %e
+  call void @__cxa_end_catch()
+  br label %16
+
+  ret void
+
+  %18 = load i8** %1
+  %19 = load i32* %2
+  %20 = insertvalue { i8*, i32 } undef, i8* %18, 0
+  %21 = insertvalue { i8*, i32 } %20, i32 %19, 1
+  resume { i8*, i32 } %21
+}
+
+declare void @g()
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i32 @llvm.eh.typeid.for(i8*) nounwind readnone
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
+
+; CHECK: .cfi_personality 0,
+; CHECK: .cfi_lsda 0,
+; CHECK: @TType Encoding = absptr
+; CHECK: @ Call site Encoding = udata4
+; CHECK-PIC: .cfi_personality 155,
+; CHECK-PIC: .cfi_lsda 27,
+; CHECK-PIC: @TType Encoding = indirect pcrel sdata4
+; CHECK-PIC: @ Call site Encoding = udata4
diff --git a/test/CodeGen/ARM/ehabi-handlerdata-nounwind.ll b/test/CodeGen/ARM/ehabi-handlerdata-nounwind.ll
new file mode 100644
index 0000000..42ca988
--- /dev/null
+++ b/test/CodeGen/ARM/ehabi-handlerdata-nounwind.ll
@@ -0,0 +1,61 @@
+; Test for handlerdata when the function has landingpad and nounwind.
+
+; This test case checks whether the handlerdata is generated for the function
+; with landingpad instruction, even if the function has "nounwind" atttribute.
+;
+; For example, although the following function never throws any exception,
+; however, it is still required to generate LSDA, otherwise, we can't catch
+; the exception properly.
+;
+; void test1() noexcept {
+;   try {
+;     throw_exception();
+;   } catch (...) {
+;   }
+; }
+
+; RUN: llc -mtriple arm-unknown-linux-gnueabi -filetype=asm -o - %s \
+; RUN:   | FileCheck %s
+
+declare void @throw_exception()
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
+
+define void @test1() nounwind {
+entry:
+  invoke void @throw_exception() to label %try.cont unwind label %lpad
+
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  %1 = extractvalue { i8*, i32 } %0, 0
+  %2 = tail call i8* @__cxa_begin_catch(i8* %1)
+  tail call void @__cxa_end_catch()
+  br label %try.cont
+
+try.cont:
+  ret void
+}
+
+; CHECK:   .globl test1
+; CHECK:   .align 2
+; CHECK:   .type test1,%function
+; CHECK-LABEL: test1:
+; CHECK:   .fnstart
+
+; CHECK-NOT: .cantunwind
+
+; CHECK:   .personality __gxx_personality_v0
+; CHECK:   .handlerdata
+; CHECK:   .align 2
+; CHECK-LABEL: GCC_except_table0:
+; CHECK-LABEL: .Lexception0:
+; CHECK:   .byte 255                     @ @LPStart Encoding = omit
+; CHECK:   .byte 0                       @ @TType Encoding = absptr
+; CHECK:   .asciz
+; CHECK:   .byte 3                       @ Call site Encoding = udata4
+; CHECK:   .fnend
diff --git a/test/CodeGen/ARM/ehabi-handlerdata.ll b/test/CodeGen/ARM/ehabi-handlerdata.ll
new file mode 100644
index 0000000..7045902
--- /dev/null
+++ b/test/CodeGen/ARM/ehabi-handlerdata.ll
@@ -0,0 +1,59 @@
+; ARM EHABI test for the handlerdata.
+
+; This test case checks whether the handlerdata for exception
+; handling is generated properly.
+;
+; (1) The handlerdata must not be empty.
+; (2) LPStartEncoding == DW_EH_PE_omit
+; (3) TTypeEncoding == DW_EH_PE_absptr
+; (4) CallSiteEncoding == DW_EH_PE_udata4
+
+; RUN: llc -mtriple arm-unknown-linux-gnueabi -filetype=asm -o - %s \
+; RUN:   | FileCheck %s
+
+; RUN: llc -mtriple arm-unknown-linux-gnueabi -filetype=asm -o - %s \
+; RUN:     -relocation-model=pic \
+; RUN:   | FileCheck %s
+
+declare void @throw_exception()
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
+
+define void @test1() {
+entry:
+  invoke void @throw_exception() to label %try.cont unwind label %lpad
+
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  %1 = extractvalue { i8*, i32 } %0, 0
+  %2 = tail call i8* @__cxa_begin_catch(i8* %1)
+  tail call void @__cxa_end_catch()
+  br label %try.cont
+
+try.cont:
+  ret void
+}
+
+; CHECK:   .globl test1
+; CHECK:   .align 2
+; CHECK:   .type test1,%function
+; CHECK-LABEL: test1:
+; CHECK:   .fnstart
+; CHECK:   .personality __gxx_personality_v0
+; CHECK:   .handlerdata
+; CHECK:   .align 2
+; CHECK-LABEL: GCC_except_table0:
+; CHECK-LABEL: .Lexception0:
+; CHECK:   .byte 255                     @ @LPStart Encoding = omit
+; CHECK:   .byte 0                       @ @TType Encoding = absptr
+; CHECK:   .asciz
+; CHECK:   .byte 3                       @ Call site Encoding = udata4
+; CHECK:   .long
+; CHECK:   .long
+; CHECK:   .long
+; CHECK:   .fnend
diff --git a/test/CodeGen/ARM/ehabi.ll b/test/CodeGen/ARM/ehabi.ll
index 720cc3c..ebf0c2a 100644
--- a/test/CodeGen/ARM/ehabi.ll
+++ b/test/CodeGen/ARM/ehabi.ll
@@ -50,6 +50,22 @@
 ; RUN:     -filetype=asm -o - %s \
 ; RUN:   | FileCheck %s --check-prefix=CHECK-V7-FP-ELIM
 
+; RUN: llc -mtriple arm-unknown-netbsd-eabi \
+; RUN:     -disable-fp-elim -filetype=asm -o - %s \
+; RUN:   | FileCheck %s --check-prefix=DWARF-FP
+
+; RUN: llc -mtriple arm-unknown-netbsd-eabi \
+; RUN:     -filetype=asm -o - %s \
+; RUN:   | FileCheck %s --check-prefix=DWARF-FP-ELIM
+
+; RUN: llc -mtriple armv7-unknown-netbsd-eabi \
+; RUN:     -disable-fp-elim -filetype=asm -o - %s \
+; RUN:   | FileCheck %s --check-prefix=DWARF-V7-FP
+
+; RUN: llc -mtriple armv7-unknown-netbsd-eabi \
+; RUN:     -filetype=asm -o - %s \
+; RUN:   | FileCheck %s --check-prefix=DWARF-V7-FP-ELIM
+
 ;-------------------------------------------------------------------------------
 ; Test 1
 ;-------------------------------------------------------------------------------
@@ -148,14 +164,14 @@ declare void @_ZSt9terminatev()
 
 ; CHECK-V7-FP-LABEL: _Z4testiiiiiddddd:
 ; CHECK-V7-FP:   .fnstart
-; CHECK-V7-FP:   .save  {r4, r11, lr}
-; CHECK-V7-FP:   push   {r4, r11, lr}
-; CHECK-V7-FP:   .setfp r11, sp, #4
-; CHECK-V7-FP:   add    r11, sp, #4
+; CHECK-V7-FP:   .save  {r4, r10, r11, lr}
+; CHECK-V7-FP:   push   {r4, r10, r11, lr}
+; CHECK-V7-FP:   .setfp r11, sp, #8
+; CHECK-V7-FP:   add    r11, sp, #8
 ; CHECK-V7-FP:   .vsave {d8, d9, d10, d11, d12}
 ; CHECK-V7-FP:   vpush  {d8, d9, d10, d11, d12}
-; CHECK-V7-FP:   .pad   #28
-; CHECK-V7-FP:   sub    sp, sp, #28
+; CHECK-V7-FP:   .pad   #24
+; CHECK-V7-FP:   sub    sp, sp, #24
 ; CHECK-V7-FP:   .personality __gxx_personality_v0
 ; CHECK-V7-FP:   .handlerdata
 ; CHECK-V7-FP:   .fnend
@@ -172,6 +188,93 @@ declare void @_ZSt9terminatev()
 ; CHECK-V7-FP-ELIM:   .handlerdata
 ; CHECK-V7-FP-ELIM:   .fnend
 
+; DWARF-FP-LABEL: _Z4testiiiiiddddd:
+; DWARF-FP:    .cfi_startproc
+; DWARF-FP:    .cfi_personality 0, __gxx_personality_v0
+; DWARF-FP:    .cfi_lsda 0, .Lexception0
+; DWARF-FP:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; DWARF-FP:    .cfi_def_cfa_offset 36
+; DWARF-FP:    .cfi_offset lr, -4
+; DWARF-FP:    .cfi_offset r11, -8
+; DWARF-FP:    .cfi_offset r10, -12
+; DWARF-FP:    .cfi_offset r9, -16
+; DWARF-FP:    .cfi_offset r8, -20
+; DWARF-FP:    .cfi_offset r7, -24
+; DWARF-FP:    .cfi_offset r6, -28
+; DWARF-FP:    .cfi_offset r5, -32
+; DWARF-FP:    .cfi_offset r4, -36
+; DWARF-FP:    add r11, sp, #28
+; DWARF-FP:    .cfi_def_cfa r11, 8
+; DWARF-FP:    sub sp, sp, #28
+; DWARF-FP:    sub sp, r11, #28
+; DWARF-FP:    pop {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; DWARF-FP:    mov pc, lr
+; DWARF-FP:    .cfi_endproc
+
+; DWARF-FP-ELIM-LABEL: _Z4testiiiiiddddd:
+; DWARF-FP-ELIM:    .cfi_startproc
+; DWARF-FP-ELIM:    .cfi_personality 0, __gxx_personality_v0
+; DWARF-FP-ELIM:    .cfi_lsda 0, .Lexception0
+; DWARF-FP-ELIM:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; DWARF-FP-ELIM:    .cfi_def_cfa_offset 36
+; DWARF-FP-ELIM:    .cfi_offset lr, -4
+; DWARF-FP-ELIM:    .cfi_offset r11, -8
+; DWARF-FP-ELIM:    .cfi_offset r10, -12
+; DWARF-FP-ELIM:    .cfi_offset r9, -16
+; DWARF-FP-ELIM:    .cfi_offset r8, -20
+; DWARF-FP-ELIM:    .cfi_offset r7, -24
+; DWARF-FP-ELIM:    .cfi_offset r6, -28
+; DWARF-FP-ELIM:    .cfi_offset r5, -32
+; DWARF-FP-ELIM:    .cfi_offset r4, -36
+; DWARF-FP-ELIM:    sub sp, sp, #28
+; DWARF-FP-ELIM:    .cfi_def_cfa_offset 64
+; DWARF-FP-ELIM:    add sp, sp, #28
+; DWARF-FP-ELIM:    pop {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; DWARF-FP-ELIM:    mov pc, lr
+; DWARF-FP-ELIM:    .cfi_endproc
+
+; DWARF-V7-FP-LABEL: _Z4testiiiiiddddd:
+; DWARF-V7-FP:    .cfi_startproc
+; DWARF-V7-FP:    .cfi_personality 0, __gxx_personality_v0
+; DWARF-V7-FP:    .cfi_lsda 0, .Lexception0
+; DWARF-V7-FP:    push {r4, r10, r11, lr}
+; DWARF-V7-FP:    .cfi_def_cfa_offset 16
+; DWARF-V7-FP:    .cfi_offset lr, -4
+; DWARF-V7-FP:    .cfi_offset r11, -8
+; DWARF-V7-FP:    .cfi_offset r10, -12
+; DWARF-V7-FP:    .cfi_offset r4, -16
+; DWARF-V7-FP:    add r11, sp, #8
+; DWARF-V7-FP:    .cfi_def_cfa r11, 8
+; DWARF-V7-FP:    vpush {d8, d9, d10, d11, d12}
+; DWARF-V7-FP:    .cfi_offset d12, -24
+; DWARF-V7-FP:    .cfi_offset d11, -32
+; DWARF-V7-FP:    .cfi_offset d10, -40
+; DWARF-V7-FP:    .cfi_offset d9, -48
+; DWARF-V7-FP:    sub sp, sp, #24
+; DWARF-V7-FP:    sub sp, r11, #48
+; DWARF-V7-FP:    vpop {d8, d9, d10, d11, d12}
+; DWARF-V7-FP:    pop {r4, r10, r11, pc}
+; DWARF-V7-FP:    .cfi_endproc
+
+; DWARF-V7-FP-ELIM-LABEL: _Z4testiiiiiddddd:
+; DWARF-V7-FP-ELIM:    .cfi_startproc
+; DWARF-V7-FP-ELIM:    .cfi_personality 0, __gxx_personality_v0
+; DWARF-V7-FP-ELIM:    .cfi_lsda 0, .Lexception0
+; DWARF-V7-FP-ELIM:    push {r4, lr}
+; DWARF-V7-FP-ELIM:    .cfi_def_cfa_offset 8
+; DWARF-V7-FP-ELIM:    .cfi_offset lr, -4
+; DWARF-V7-FP-ELIM:    .cfi_offset r4, -8
+; DWARF-V7-FP-ELIM:    vpush {d8, d9, d10, d11, d12}
+; DWARF-V7-FP-ELIM:    .cfi_offset d12, -16
+; DWARF-V7-FP-ELIM:    .cfi_offset d11, -24
+; DWARF-V7-FP-ELIM:    .cfi_offset d10, -32
+; DWARF-V7-FP-ELIM:    .cfi_offset d9, -40
+; DWARF-V7-FP-ELIM:    sub sp, sp, #24
+; DWARF-V7-FP-ELIM:    .cfi_def_cfa_offset 72
+; DWARF-V7-FP-ELIM:    add sp, sp, #24
+; DWARF-V7-FP-ELIM:    vpop {d8, d9, d10, d11, d12}
+; DWARF-V7-FP-ELIM:    pop {r4, pc}
+; DWARF-V7-FP-ELIM:    .cfi_endproc
 
 ;-------------------------------------------------------------------------------
 ; Test 2
@@ -219,6 +322,48 @@ entry:
 ; CHECK-V7-FP-ELIM:   pop   {r11, pc}
 ; CHECK-V7-FP-ELIM:   .fnend
 
+; DWARF-FP-LABEL: test2:
+; DWARF-FP:    .cfi_startproc
+; DWARF-FP:    push {r11, lr}
+; DWARF-FP:    .cfi_def_cfa_offset 8
+; DWARF-FP:    .cfi_offset lr, -4
+; DWARF-FP:    .cfi_offset r11, -8
+; DWARF-FP:    mov  r11, sp
+; DWARF-FP:    .cfi_def_cfa_register r11
+; DWARF-FP:    pop  {r11, lr}
+; DWARF-FP:    mov  pc, lr
+; DWARF-FP:    .cfi_endproc
+
+; DWARF-FP-ELIM-LABEL: test2:
+; DWARF-FP-ELIM:    .cfi_startproc
+; DWARF-FP-ELIM:    push {r11, lr}
+; DWARF-FP-ELIM:    .cfi_def_cfa_offset 8
+; DWARF-FP-ELIM:    .cfi_offset lr, -4
+; DWARF-FP-ELIM:    .cfi_offset r11, -8
+; DWARF-FP-ELIM:    pop  {r11, lr}
+; DWARF-FP-ELIM:    mov  pc, lr
+; DWARF-FP-ELIM:    .cfi_endproc
+
+; DWARF-V7-FP-LABEL: test2:
+; DWARF-V7-FP:    .cfi_startproc
+; DWARF-V7-FP:    push {r11, lr}
+; DWARF-V7-FP:    .cfi_def_cfa_offset 8
+; DWARF-V7-FP:    .cfi_offset lr, -4
+; DWARF-V7-FP:    .cfi_offset r11, -8
+; DWARF-V7-FP:    mov  r11, sp
+; DWARF-V7-FP:    .cfi_def_cfa_register r11
+; DWARF-V7-FP:    pop  {r11, pc}
+; DWARF-V7-FP:    .cfi_endproc
+
+; DWARF-V7-FP-ELIM-LABEL: test2:
+; DWARF-V7-FP-ELIM:    .cfi_startproc
+; DWARF-V7-FP-ELIM:    push {r11, lr}
+; DWARF-V7-FP-ELIM:    .cfi_def_cfa_offset 8
+; DWARF-V7-FP-ELIM:    .cfi_offset lr, -4
+; DWARF-V7-FP-ELIM:    .cfi_offset r11, -8
+; DWARF-V7-FP-ELIM:    pop  {r11, pc}
+; DWARF-V7-FP-ELIM:    .cfi_endproc
+
 
 ;-------------------------------------------------------------------------------
 ; Test 3
@@ -275,6 +420,56 @@ entry:
 ; CHECK-V7-FP-ELIM:   pop   {r4, r5, r11, pc}
 ; CHECK-V7-FP-ELIM:   .fnend
 
+; DWARF-FP-LABEL: test3:
+; DWARF-FP:    .cfi_startproc
+; DWARF-FP:    push {r4, r5, r11, lr}
+; DWARF-FP:    .cfi_def_cfa_offset 16
+; DWARF-FP:    .cfi_offset lr, -4
+; DWARF-FP:    .cfi_offset r11, -8
+; DWARF-FP:    .cfi_offset r5, -12
+; DWARF-FP:    .cfi_offset r4, -16
+; DWARF-FP:    add  r11, sp, #8
+; DWARF-FP:    .cfi_def_cfa r11, 8
+; DWARF-FP:    pop  {r4, r5, r11, lr}
+; DWARF-FP:    mov  pc, lr
+; DWARF-FP:    .cfi_endproc
+
+; DWARF-FP-ELIM-LABEL: test3:
+; DWARF-FP-ELIM:    .cfi_startproc
+; DWARF-FP-ELIM:    push {r4, r5, r11, lr}
+; DWARF-FP-ELIM:    .cfi_def_cfa_offset 16
+; DWARF-FP-ELIM:    .cfi_offset lr, -4
+; DWARF-FP-ELIM:    .cfi_offset r11, -8
+; DWARF-FP-ELIM:    .cfi_offset r5, -12
+; DWARF-FP-ELIM:    .cfi_offset r4, -16
+; DWARF-FP-ELIM:    pop  {r4, r5, r11, lr}
+; DWARF-FP-ELIM:    mov  pc, lr
+; DWARF-FP-ELIM:    .cfi_endproc
+
+; DWARF-V7-FP-LABEL: test3:
+; DWARF-V7-FP:    .cfi_startproc
+; DWARF-V7-FP:    push {r4, r5, r11, lr}
+; DWARF-V7-FP:    .cfi_def_cfa_offset 16
+; DWARF-V7-FP:    .cfi_offset lr, -4
+; DWARF-V7-FP:    .cfi_offset r11, -8
+; DWARF-V7-FP:    .cfi_offset r5, -12
+; DWARF-V7-FP:    .cfi_offset r4, -16
+; DWARF-V7-FP:    add  r11, sp, #8
+; DWARF-V7-FP:    .cfi_def_cfa r11, 8
+; DWARF-V7-FP:    pop  {r4, r5, r11, pc}
+; DWARF-V7-FP:    .cfi_endproc
+
+; DWARF-V7-FP-ELIM-LABEL: test3:
+; DWARF-V7-FP-ELIM:    .cfi_startproc
+; DWARF-V7-FP-ELIM:    push {r4, r5, r11, lr}
+; DWARF-V7-FP-ELIM:    .cfi_def_cfa_offset 16
+; DWARF-V7-FP-ELIM:    .cfi_offset lr, -4
+; DWARF-V7-FP-ELIM:    .cfi_offset r11, -8
+; DWARF-V7-FP-ELIM:    .cfi_offset r5, -12
+; DWARF-V7-FP-ELIM:    .cfi_offset r4, -16
+; DWARF-V7-FP-ELIM:    pop  {r4, r5, r11, pc}
+; DWARF-V7-FP-ELIM:    .cfi_endproc
+
 
 ;-------------------------------------------------------------------------------
 ; Test 4
@@ -308,3 +503,27 @@ entry:
 ; CHECK-V7-FP-ELIM:   bx lr
 ; CHECK-V7-FP-ELIM:   .cantunwind
 ; CHECK-V7-FP-ELIM:   .fnend
+
+; DWARF-FP-LABEL: test4:
+; DWARF-FP-NOT: .cfi_startproc
+; DWARF-FP:    mov pc, lr
+; DWARF-FP-NOT: .cfi_endproc
+; DWARF-FP:    .size test4,
+
+; DWARF-FP-ELIM-LABEL: test4:
+; DWARF-FP-ELIM-NOT: .cfi_startproc
+; DWARF-FP-ELIM:     mov pc, lr
+; DWARF-FP-ELIM-NOT: .cfi_endproc
+; DWARF-FP-ELIM:     .size test4,
+
+; DWARF-V7-FP-LABEL: test4:
+; DWARF-V7-FP-NOT: .cfi_startproc
+; DWARF-V7-FP:    bx lr
+; DWARF-V7-FP-NOT: .cfi_endproc
+; DWARF-V7-FP:    .size test4,
+
+; DWARF-V7-FP-ELIM-LABEL: test4:
+; DWARF-V7-FP-ELIM-NOT: .cfi_startproc
+; DWARF-V7-FP-ELIM:     bx lr
+; DWARF-V7-FP-ELIM-NOT: .cfi_endproc
+; DWARF-V7-FP-ELIM:     .size test4,
diff --git a/test/CodeGen/ARM/frame-register.ll b/test/CodeGen/ARM/frame-register.ll
new file mode 100644
index 0000000..e6a55bd
--- /dev/null
+++ b/test/CodeGen/ARM/frame-register.ll
@@ -0,0 +1,38 @@
+; RUN: llc -mtriple arm-eabi -disable-fp-elim -filetype asm -o - %s \
+; RUN:     | FileCheck -check-prefix CHECK-ARM %s
+
+; RUN: llc -mtriple thumb-eabi -disable-fp-elim -filetype asm -o - %s \
+; RUN:     | FileCheck -check-prefix CHECK-THUMB %s
+
+; RUN: llc -mtriple arm-darwin -disable-fp-elim -filetype asm -o - %s \
+; RUN:     | FileCheck -check-prefix CHECK-DARWIN-ARM %s
+
+; RUN: llc -mtriple thumb-darwin -disable-fp-elim -filetype asm -o - %s \
+; RUN:     | FileCheck -check-prefix CHECK-DARWIN-THUMB %s
+
+declare void @callee(i32)
+
+define i32 @calleer(i32 %i) {
+entry:
+  %i.addr = alloca i32, align 4
+  %j = alloca i32, align 4
+  store i32 %i, i32* %i.addr, align 4
+  %0 = load i32* %i.addr, align 4
+  %add = add nsw i32 %0, 1
+  store i32 %add, i32* %j, align 4
+  %1 = load i32* %j, align 4
+  call void @callee(i32 %1)
+  %2 = load i32* %j, align 4
+  %add1 = add nsw i32 %2, 1
+  ret i32 %add1
+}
+
+; CHECK-ARM: push {r11, lr}
+; CHECK-ARM: mov r11, sp
+
+; CHECK-THUMB: push {r4, r6, r7, lr}
+; CHECK-THUMB: add r7, sp, #8
+
+; CHECK-DARWIN-ARM: push {r7, lr}
+; CHECK-DARWIN-THUMB: push {r4, r7, lr}
+
diff --git a/test/CodeGen/ARM/func-argpassing-endian.ll b/test/CodeGen/ARM/func-argpassing-endian.ll
new file mode 100644
index 0000000..26f0597
--- /dev/null
+++ b/test/CodeGen/ARM/func-argpassing-endian.ll
@@ -0,0 +1,122 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=arm-eabi -mattr=v7,neon | FileCheck --check-prefix=CHECK --check-prefix=CHECK-LE %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=armeb-eabi -mattr=v7,neon | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s
+
+@var32 = global i32 0
+@vardouble = global double 0.0
+
+define void @arg_longint( i64 %val ) {
+; CHECK-LABEL: arg_longint:
+; CHECK-LE: str r0, [r1]
+; CHECK-BE: str r1, [r0]
+   %tmp = trunc i64 %val to i32 
+   store i32 %tmp, i32* @var32
+   ret void
+}
+
+define void @arg_double( double %val ) {
+; CHECK-LABEL: arg_double:
+; CHECK: strd r0, r1, [r2]
+    store double  %val, double* @vardouble
+    ret void
+}
+
+define void @arg_v4i32(<4 x i32> %vec ) {
+; CHECK-LABEL: arg_v4i32:
+; CHECK-LE: vmov {{d[0-9]+}}, r2, r3
+; CHECK-LE: vmov [[ARG_V4I32_REG:d[0-9]+]], r0, r1
+; CHECK-BE: vmov {{d[0-9]+}}, r3, r2
+; CHECK-BE: vmov [[ARG_V4I32_REG:d[0-9]+]], r1, r0
+; CHECK: vst1.32 {[[ARG_V4I32_REG]][0]}, [r0:32]
+    %tmp = extractelement <4 x i32> %vec, i32 0
+    store i32 %tmp, i32* @var32
+    ret void
+}
+
+define void @arg_v2f64(<2 x double> %vec ) {
+; CHECK-LABEL: arg_v2f64:
+; CHECK: strd r0, r1, [r2]
+    %tmp = extractelement <2 x double> %vec, i32 0
+    store double %tmp, double* @vardouble
+    ret void
+}
+
+define i64 @return_longint() {
+; CHECK-LABEL: return_longint:
+; CHECK-LE: mov r0, #42
+; CHECK-LE: mov r1, #0
+; CHECK-BE: mov r0, #0
+; CHECK-BE: mov r1, #42
+    ret i64 42
+}
+
+define double @return_double() {
+; CHECK-LABEL: return_double:
+; CHECK-LE: vmov r0, r1, {{d[0-9]+}}
+; CHECK-BE: vmov r1, r0, {{d[0-9]+}}
+    ret double 1.0
+}
+
+define <4 x i32> @return_v4i32() {
+; CHECK-LABEL: return_v4i32:
+; CHECK-LE: vmov r0, r1, {{d[0-9]+}}
+; CHECK-LE: vmov r2, r3, {{d[0-9]+}}
+; CHECK-BE: vmov r1, r0, {{d[0-9]+}}
+; CHECK-BE: vmov r3, r2, {{d[0-9]+}}
+   ret < 4 x i32> < i32 42, i32 43, i32 44, i32 45 >
+}
+
+define <2 x double> @return_v2f64() {
+; CHECK-LABEL: return_v2f64:
+; CHECK-LE: vmov r0, r1, {{d[0-9]+}}
+; CHECK-LE: vmov r2, r3, {{d[0-9]+}}
+; CHECK-BE: vmov r1, r0, {{d[0-9]+}}
+; CHECK-BE: vmov r3, r2, {{d[0-9]+}}
+   ret <2 x double> < double 3.14, double 6.28 >
+}
+
+define void @caller_arg_longint() {
+; CHECK-LABEL: caller_arg_longint:
+; CHECK-LE: mov r0, #42
+; CHECK-LE: mov r1, #0
+; CHECK-BE: mov r0, #0
+; CHECK-BE: mov r1, #42
+   call void @arg_longint( i64 42 )
+   ret void
+}
+
+define void @caller_arg_double() {
+; CHECK-LABEL: caller_arg_double:
+; CHECK-LE: vmov r0, r1, {{d[0-9]+}}
+; CHECK-BE: vmov r1, r0, {{d[0-9]+}}
+   call void @arg_double( double 1.0 )
+   ret void
+}
+
+define void @caller_return_longint() {
+; CHECK-LABEL: caller_return_longint:
+; CHECK-LE: str r0, [r1]
+; CHECK-BE: str r1, [r0]
+   %val = call i64 @return_longint()
+   %tmp = trunc i64 %val to i32 
+   store i32 %tmp, i32* @var32
+   ret void
+}
+
+define void @caller_return_double() {
+; CHECK-LABEL: caller_return_double:
+; CHECK-LE: vmov {{d[0-9]+}}, r0, r1
+; CHECK-BE: vmov {{d[0-9]+}}, r1, r0
+  %val = call double @return_double( )
+  %tmp = fadd double %val, 3.14
+  store double  %tmp, double* @vardouble
+  ret void
+}
+
+define void @caller_return_v2f64() {
+; CHECK-LABEL: caller_return_v2f64:
+; CHECK: strd r0, r1, [r2]
+   %val = call <2 x double> @return_v2f64( )
+   %tmp = extractelement <2 x double> %val, i32 0
+    store double %tmp, double* @vardouble
+    ret void
+}
diff --git a/test/CodeGen/ARM/hfa-in-contiguous-registers.ll b/test/CodeGen/ARM/hfa-in-contiguous-registers.ll
new file mode 100644
index 0000000..f9ec6e0
--- /dev/null
+++ b/test/CodeGen/ARM/hfa-in-contiguous-registers.ll
@@ -0,0 +1,94 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+target triple = "armv7-none--gnueabihf"
+
+%struct.s = type { float, float }
+%union.t = type { [4 x float] }
+
+; Equivalent C code:
+; struct s { float a; float b; };
+; float foo(float a, double b, struct s c) { return c.a; }
+; Argument allocation:
+; a -> s0
+; b -> d1
+; c -> s4, s5
+; s1 is unused
+; return in s0
+define float @test1(float %a, double %b, %struct.s %c) {
+entry:
+; CHECK-LABEL: test1
+; CHECK: vmov.f32  s0, s4
+; CHECK-NOT: vmov.f32        s0, s1
+
+  %result = extractvalue %struct.s %c, 0
+  ret float %result
+}
+
+; Equivalent C code:
+; union t { float a[4] };
+; float foo(float a, double b, union s c) { return c.a[0]; }
+; Argument allocation:
+; a -> s0
+; b -> d1
+; c -> s4..s7
+define float @test2(float %a, double %b, %union.t %c) #0 {
+entry:
+; CHECK-LABEL: test2
+; CHECK: vmov.f32  s0, s4
+; CHECK-NOT: vmov.f32        s0, s1
+
+  %result = extractvalue %union.t %c, 0, 0
+  ret float %result
+}
+
+; Equivalent C code:
+; struct s { float a; float b; };
+; float foo(float a, double b, struct s c, float d) { return d; }
+; Argument allocation:
+; a -> s0
+; b -> d1
+; c -> s4, s5
+; d -> s1
+; return in s0
+define float @test3(float %a, double %b, %struct.s %c, float %d) {
+entry:
+; CHECK-LABEL: test3
+; CHECK: vmov.f32  s0, s1
+; CHECK-NOT: vmov.f32        s0, s5
+
+  ret float %d
+}
+
+; Equivalent C code:
+; struct s { float a; float b; };
+; float foo(struct s a, struct s b) { return b.b; }
+; Argument allocation:
+; a -> s0, s1
+; b -> s2, s3
+; return in s0
+define float @test4(%struct.s %a, %struct.s %b) {
+entry:
+; CHECK-LABEL: test4
+; CHECK: vmov.f32  s0, s3
+
+  %result = extractvalue %struct.s %b, 1
+  ret float %result
+}
+
+; Equivalent C code:
+; struct s { float a; float b; };
+; float foo(struct s a, float b, struct s c) { return c.a; }
+; Argument allocation:
+; a -> s0, s1
+; b -> s2
+; c -> s3, s4
+; return in s0
+define float @test5(%struct.s %a, float %b, %struct.s %c) {
+entry:
+; CHECK-LABEL: test5
+; CHECK: vmov.f32  s0, s3
+
+  %result = extractvalue %struct.s %c, 0
+  ret float %result
+}
diff --git a/test/CodeGen/ARM/hints.ll b/test/CodeGen/ARM/hints.ll
new file mode 100644
index 0000000..18abbbe
--- /dev/null
+++ b/test/CodeGen/ARM/hints.ll
@@ -0,0 +1,69 @@
+; RUN: llc -mtriple armv7-eabi -o - %s | FileCheck %s
+; RUN: llc -mtriple thumbv6m-eabi -o - %s | FileCheck %s
+; RUN: llc -mtriple thumbv7-eabi -o - %s | FileCheck %s
+
+declare void @llvm.arm.hint(i32) nounwind
+
+define void @hint_nop() {
+entry:
+  tail call void @llvm.arm.hint(i32 0) nounwind
+  ret void
+}
+
+; CHECK-LABEL: hint_nop
+; CHECK: nop
+
+define void @hint_yield() {
+entry:
+  tail call void @llvm.arm.hint(i32 1) nounwind
+  ret void
+}
+
+; CHECK-LABEL: hint_yield
+; CHECK: yield
+
+define void @hint_wfe() {
+entry:
+  tail call void @llvm.arm.hint(i32 2) nounwind
+  ret void
+}
+
+; CHECK-LABEL: hint_wfe
+; CHECK: wfe
+
+define void @hint_wfi() {
+entry:
+  tail call void @llvm.arm.hint(i32 3) nounwind
+  ret void
+}
+
+; CHECK-LABEL: hint_wfi
+; CHECK: wfi
+
+define void @hint_sev() {
+entry:
+  tail call void @llvm.arm.hint(i32 4) nounwind
+  ret void
+}
+
+; CHECK-LABEL: hint_sev
+; CHECK: sev
+
+define void @hint_sevl() {
+entry:
+  tail call void @llvm.arm.hint(i32 5) nounwind
+  ret void
+}
+
+; CHECK-LABEL: hint_sevl
+; CHECK: hint #5
+
+define void @hint_undefined() {
+entry:
+  tail call void @llvm.arm.hint(i32 8) nounwind
+  ret void
+}
+
+; CHECK-LABEL: hint_undefined
+; CHECK: hint #8
+
diff --git a/test/CodeGen/ARM/ifcvt-branch-weight-bug.ll b/test/CodeGen/ARM/ifcvt-branch-weight-bug.ll
index 86ed5b2..5d8e477 100644
--- a/test/CodeGen/ARM/ifcvt-branch-weight-bug.ll
+++ b/test/CodeGen/ARM/ifcvt-branch-weight-bug.ll
@@ -24,7 +24,7 @@ entry:
 ; CHECK: BB#1: derived from LLVM BB %for.body
 ; CHECK: Successors according to CFG: BB#2(130023362) BB#4(62)
 for.body:
-  br i1 undef, label %for.cond.backedge, label %lor.lhs.false.i
+  br i1 undef, label %for.cond.backedge, label %lor.lhs.false.i, !prof !1
 
 for.cond.backedge:
   %tobool = icmp eq %classL* undef, null
@@ -60,3 +60,4 @@ declare void @_ZN1F10handleMoveEb(%classF*, i1 zeroext)
 declare void @_Z3fn1v()
 
 !0 = metadata !{metadata !"clang version 3.5"}
+!1 = metadata !{metadata !"branch_weights", i32 62, i32 62}
diff --git a/test/CodeGen/ARM/indirect-hidden.ll b/test/CodeGen/ARM/indirect-hidden.ll
new file mode 100644
index 0000000..ae1c505
--- /dev/null
+++ b/test/CodeGen/ARM/indirect-hidden.ll
@@ -0,0 +1,22 @@
+; RUN: llc -mtriple=thumbv7s-apple-ios7.0 -o - %s | FileCheck %s
+
+@var = external global i32
+@var_hidden = external hidden global i32
+
+define i32* @get_var() {
+  ret i32* @var
+}
+
+define i32* @get_var_hidden() {
+  ret i32* @var_hidden
+}
+
+; CHECK: .section __DATA,__nl_symbol_ptr,non_lazy_symbol_pointers
+
+; CHECK: .indirect_symbol _var
+; CHECK-NEXT: .long 0
+
+; CHECK-NOT: __DATA,__data
+
+; CHECK: .indirect_symbol _var_hidden
+; CHECK-NEXT: .long 0
+\ No newline at end of file
diff --git a/test/CodeGen/ARM/interrupt-attr.ll b/test/CodeGen/ARM/interrupt-attr.ll
index 9b7b41b..c5be667 100644
--- a/test/CodeGen/ARM/interrupt-attr.ll
+++ b/test/CodeGen/ARM/interrupt-attr.ll
@@ -12,13 +12,13 @@ define arm_aapcscc void @irq_fn() alignstack(8) "interrupt"="IRQ" {
 
   ; Also need special function return setting pc and CPSR simultaneously.
 ; CHECK-A-LABEL: irq_fn:
-; CHECK-A: push {r0, r1, r2, r3, r11, r12, lr}
-; CHECK-A: add r11, sp, #16
-; CHECK-A: sub sp, sp, #{{[0-9]+}}
+; CHECK-A: push {r0, r1, r2, r3, r10, r11, r12, lr}
+; CHECK-A: add r11, sp, #20
+; CHECK-A-NOT: sub sp, sp, #{{[0-9]+}}
 ; CHECK-A: bic sp, sp, #7
 ; CHECK-A: bl bar
-; CHECK-A: sub sp, r11, #16
-; CHECK-A: pop {r0, r1, r2, r3, r11, r12, lr}
+; CHECK-A: sub sp, r11, #20
+; CHECK-A: pop {r0, r1, r2, r3, r10, r11, r12, lr}
 ; CHECK-A: subs pc, lr, #4
 
 ; CHECK-A-THUMB-LABEL: irq_fn:
@@ -35,15 +35,15 @@ define arm_aapcscc void @irq_fn() alignstack(8) "interrupt"="IRQ" {
   ; Normal AAPCS function (r0-r3 pushed onto stack by hardware, lr set to
   ; appropriate sentinel so no special return needed).
 ; CHECK-M-LABEL: irq_fn:
-; CHECK-M: push {r4, r7, lr}
-; CHECK-M: add r7, sp, #4
+; CHECK-M: push {r4, r6, r7, lr}
+; CHECK-M: add r7, sp, #8
 ; CHECK-M: mov r4, sp
 ; CHECK-M: bic r4, r4, #7
 ; CHECK-M: mov sp, r4
 ; CHECK-M: blx _bar
-; CHECK-M: subs r4, r7, #4
+; CHECK-M: sub.w r4, r7, #8
 ; CHECK-M: mov sp, r4
-; CHECK-M: pop {r4, r7, pc}
+; CHECK-M: pop {r4, r6, r7, pc}
 
   call arm_aapcscc void @bar()
   ret void
@@ -88,13 +88,13 @@ define arm_aapcscc void @swi_fn() alignstack(8) "interrupt"="SWI" {
 
 define arm_aapcscc void @undef_fn() alignstack(8) "interrupt"="UNDEF" {
 ; CHECK-A-LABEL: undef_fn:
-; CHECK-A: push {r0, r1, r2, r3, r11, r12, lr}
-; CHECK-A: add r11, sp, #16
-; CHECK-A: sub sp, sp, #{{[0-9]+}}
+; CHECK-A: push {r0, r1, r2, r3, r10, r11, r12, lr}
+; CHECK-A: add r11, sp, #20
+; CHECK-A-NOT: sub sp, sp, #{{[0-9]+}}
 ; CHECK-A: bic sp, sp, #7
 ; [...]
-; CHECK-A: sub sp, r11, #16
-; CHECK-A: pop {r0, r1, r2, r3, r11, r12, lr}
+; CHECK-A: sub sp, r11, #20
+; CHECK-A: pop {r0, r1, r2, r3, r10, r11, r12, lr}
 ; CHECK-A: subs pc, lr, #0
 
   call void @bar()
@@ -103,13 +103,13 @@ define arm_aapcscc void @undef_fn() alignstack(8) "interrupt"="UNDEF" {
 
 define arm_aapcscc void @abort_fn() alignstack(8) "interrupt"="ABORT" {
 ; CHECK-A-LABEL: abort_fn:
-; CHECK-A: push {r0, r1, r2, r3, r11, r12, lr}
-; CHECK-A: add r11, sp, #16
-; CHECK-A: sub sp, sp, #{{[0-9]+}}
+; CHECK-A: push {r0, r1, r2, r3, r10, r11, r12, lr}
+; CHECK-A: add r11, sp, #20
+; CHECK-A-NOT: sub sp, sp, #{{[0-9]+}}
 ; CHECK-A: bic sp, sp, #7
 ; [...]
-; CHECK-A: sub sp, r11, #16
-; CHECK-A: pop {r0, r1, r2, r3, r11, r12, lr}
+; CHECK-A: sub sp, r11, #20
+; CHECK-A: pop {r0, r1, r2, r3, r10, r11, r12, lr}
 ; CHECK-A: subs pc, lr, #4
 
   call void @bar()
diff --git a/test/CodeGen/ARM/intrinsics-overflow.ll b/test/CodeGen/ARM/intrinsics-overflow.ll
new file mode 100644
index 0000000..af3dd9d
--- /dev/null
+++ b/test/CodeGen/ARM/intrinsics-overflow.ll
@@ -0,0 +1,57 @@
+; RUN: llc < %s -mtriple=arm-linux -mcpu=generic | FileCheck %s
+
+define i32 @uadd_overflow(i32 %a, i32 %b) #0 {
+  %sadd = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
+  %1 = extractvalue { i32, i1 } %sadd, 1
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+
+  ; CHECK-LABEL: uadd_overflow:
+  ; CHECK: add r[[R2:[0-9]+]], r[[R0:[0-9]+]], r[[R1:[0-9]+]]
+  ; CHECK: mov r[[R1]], #1
+  ; CHECK: cmp r[[R2]], r[[R0]]
+  ; CHECK: movhs r[[R1]], #0
+}
+
+
+define i32 @sadd_overflow(i32 %a, i32 %b) #0 {
+  %sadd = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b)
+  %1 = extractvalue { i32, i1 } %sadd, 1
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+
+  ; CHECK-LABEL: sadd_overflow:
+  ; CHECK: add r[[R2:[0-9]+]], r[[R0:[0-9]+]], r[[R1:[0-9]+]]
+  ; CHECK: mov r[[R1]], #1
+  ; CHECK: cmp r[[R2]], r[[R0]]
+  ; CHECK: movvc r[[R1]], #0
+}
+
+define i32 @usub_overflow(i32 %a, i32 %b) #0 {
+  %sadd = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
+  %1 = extractvalue { i32, i1 } %sadd, 1
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+
+  ; CHECK-LABEL: usub_overflow:
+  ; CHECK: mov r[[R2]], #1
+  ; CHECK: cmp r[[R0]], r[[R1]]
+  ; CHECK: movhs r[[R2]], #0
+}
+
+define i32 @ssub_overflow(i32 %a, i32 %b) #0 {
+  %sadd = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
+  %1 = extractvalue { i32, i1 } %sadd, 1
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+
+  ; CHECK-LABEL: ssub_overflow:
+  ; CHECK: mov r[[R2]], #1
+  ; CHECK: cmp r[[R0]], r[[R1]]
+  ; CHECK: movvc r[[R2]], #0
+}
+
+declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) #1
+declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) #2
+declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) #3
+declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32) #4
diff --git a/test/CodeGen/ARM/intrinsics-v8.ll b/test/CodeGen/ARM/intrinsics-v8.ll
index 247bfc1..ab1c3c0 100644
--- a/test/CodeGen/ARM/intrinsics-v8.ll
+++ b/test/CodeGen/ARM/intrinsics-v8.ll
@@ -10,10 +10,10 @@ define void @test() {
   ; CHECK: dsb ishld
   call void @llvm.arm.dsb(i32 9)
   ; CHECK: sevl
-  tail call void @llvm.arm.sevl() nounwind
+  tail call void @llvm.arm.hint(i32 5) nounwind
   ret void
 }
 
 declare void @llvm.arm.dmb(i32)
 declare void @llvm.arm.dsb(i32)
-declare void @llvm.arm.sevl() nounwind
+declare void @llvm.arm.hint(i32) nounwind
diff --git a/test/CodeGen/ARM/longMAC.ll b/test/CodeGen/ARM/longMAC.ll
index 5636a12..fed6ec0 100644
--- a/test/CodeGen/ARM/longMAC.ll
+++ b/test/CodeGen/ARM/longMAC.ll
@@ -1,5 +1,7 @@
-; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
-; RUN: llc -mtriple=armv7-eabi %s -o - | FileCheck %s --check-prefix=CHECK-V7
+; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s -check-prefix=CHECK --check-prefix=CHECK-LE
+; RUN: llc -mtriple=armv7-eabi %s -o - | FileCheck %s --check-prefix=CHECK-V7-LE
+; RUN: llc -mtriple=armeb-eabi %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
+; RUN: llc -mtriple=armebv7-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V7-BE
 ; Check generated signed and unsigned multiply accumulate long.
 
 define i64 @MACLongTest1(i32 %a, i32 %b, i64 %c) {
@@ -53,13 +55,18 @@ define i64 @MACLongTest4(i32 %a, i32 %b, i32 %c) {
 ;      function, both after the umlal. With it, *some* move has to happen
 ;      before the umlal.
 define i64 @MACLongTest5(i64 %c, i32 %a, i32 %b) {
-; CHECK-V7-LABEL: MACLongTest5:
-; CHECK-V7-LABEL: umlal r0, r1, r0, r0
+; CHECK-V7-LE-LABEL: MACLongTest5:
+; CHECK-V7-LE-LABEL: umlal r0, r1, r0, r0
+; CHECK-V7-BE-LABEL: MACLongTest5:
+; CHECK-V7-BE-LABEL: umlal r1, r0, r1, r1
 
 ; CHECK-LABEL: MACLongTest5:
-; CHECK: mov [[RDLO:r[0-9]+]], r0
-; CHECK: umlal [[RDLO]], r1, r0, r0
-; CHECK: mov r0, [[RDLO]]
+; CHECK-LE: mov [[RDLO:r[0-9]+]], r0
+; CHECK-LE: umlal [[RDLO]], r1, r0, r0
+; CHECK-LE: mov r0, [[RDLO]]
+; CHECK-BE: mov [[RDLO:r[0-9]+]], r1
+; CHECK-BE: umlal [[RDLO]], r0, r1, r1
+; CHECK-BE: mov r1, [[RDLO]]
 
   %conv.trunc = trunc i64 %c to i32
   %conv = zext i32 %conv.trunc to i64
diff --git a/test/CodeGen/ARM/long_shift.ll b/test/CodeGen/ARM/long_shift.ll
index 48b0ba7..3ec5fa4 100644
--- a/test/CodeGen/ARM/long_shift.ll
+++ b/test/CodeGen/ARM/long_shift.ll
@@ -1,11 +1,16 @@
-; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
+; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-LE
+; RUN: llc -mtriple=armeb-eabi %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-BE
 
 define i64 @f0(i64 %A, i64 %B) {
 ; CHECK-LABEL: f0:
-; CHECK:      lsrs    r3, r3, #1
-; CHECK-NEXT: rrx     r2, r2
-; CHECK-NEXT: subs    r0, r0, r2
-; CHECK-NEXT: sbc     r1, r1, r3
+; CHECK-LE:      lsrs    r3, r3, #1
+; CHECK-LE-NEXT: rrx     r2, r2
+; CHECK-LE-NEXT: subs    r0, r0, r2
+; CHECK-LE-NEXT: sbc     r1, r1, r3
+; CHECK-BE:      lsrs    r2, r2, #1
+; CHECK-BE-NEXT: rrx     r3, r3
+; CHECK-BE-NEXT: subs    r1, r1, r3
+; CHECK-BE-NEXT: sbc     r0, r0, r2
 	%tmp = bitcast i64 %A to i64
 	%tmp2 = lshr i64 %B, 1
 	%tmp3 = sub i64 %tmp, %tmp2
@@ -14,7 +19,8 @@ define i64 @f0(i64 %A, i64 %B) {
 
 define i32 @f1(i64 %x, i64 %y) {
 ; CHECK-LABEL: f1:
-; CHECK: lsl{{.*}}r2
+; CHECK-LE: lsl{{.*}}r2
+; CHECK-BE: lsl{{.*}}r3
 	%a = shl i64 %x, %y
 	%b = trunc i64 %a to i32
 	ret i32 %b
@@ -22,12 +28,20 @@ define i32 @f1(i64 %x, i64 %y) {
 
 define i32 @f2(i64 %x, i64 %y) {
 ; CHECK-LABEL: f2:
-; CHECK:      lsr{{.*}}r2
-; CHECK-NEXT: rsb     r3, r2, #32
-; CHECK-NEXT: sub     r2, r2, #32
-; CHECK-NEXT: orr     r0, r0, r1, lsl r3
-; CHECK-NEXT: cmp     r2, #0
-; CHECK-NEXT: asrge   r0, r1, r2
+; CHECK-LE:      lsr{{.*}}r2
+; CHECK-LE-NEXT: rsb     r3, r2, #32
+; CHECK-LE-NEXT: sub     r2, r2, #32
+; CHECK-LE-NEXT: orr     r0, r0, r1, lsl r3
+; CHECK-LE-NEXT: cmp     r2, #0
+; CHECK-LE-NEXT: asrge   r0, r1, r2
+
+; CHECK-BE:      lsr{{.*}}r3
+; CHECK-BE-NEXT: rsb     r2, r3, #32
+; CHECK-BE-NEXT: orr     r1, r1, r0, lsl r2
+; CHECK-BE-NEXT: sub     r2, r3, #32
+; CHECK-BE-NEXT: cmp     r2, #0
+; CHECK-BE-NEXT: asrge   r1, r0, r2
+
 	%a = ashr i64 %x, %y
 	%b = trunc i64 %a to i32
 	ret i32 %b
@@ -35,12 +49,20 @@ define i32 @f2(i64 %x, i64 %y) {
 
 define i32 @f3(i64 %x, i64 %y) {
 ; CHECK-LABEL: f3:
-; CHECK:      lsr{{.*}}r2
-; CHECK-NEXT: rsb     r3, r2, #32
-; CHECK-NEXT: sub     r2, r2, #32
-; CHECK-NEXT: orr     r0, r0, r1, lsl r3
-; CHECK-NEXT: cmp     r2, #0
-; CHECK-NEXT: lsrge   r0, r1, r2
+; CHECK-LE:      lsr{{.*}}r2
+; CHECK-LE-NEXT: rsb     r3, r2, #32
+; CHECK-LE-NEXT: sub     r2, r2, #32
+; CHECK-LE-NEXT: orr     r0, r0, r1, lsl r3
+; CHECK-LE-NEXT: cmp     r2, #0
+; CHECK-LE-NEXT: lsrge   r0, r1, r2
+
+; CHECK-BE:      lsr{{.*}}r3
+; CHECK-BE-NEXT: rsb     r2, r3, #32
+; CHECK-BE-NEXT: orr     r1, r1, r0, lsl r2
+; CHECK-BE-NEXT: sub     r2, r3, #32
+; CHECK-BE-NEXT: cmp     r2, #0
+; CHECK-BE-NEXT: lsrge   r1, r0, r2
+
 	%a = lshr i64 %x, %y
 	%b = trunc i64 %a to i32
 	ret i32 %b
diff --git a/test/CodeGen/ARM/memcpy-inline.ll b/test/CodeGen/ARM/memcpy-inline.ll
index 14d84de..84ce4a7 100644
--- a/test/CodeGen/ARM/memcpy-inline.ll
+++ b/test/CodeGen/ARM/memcpy-inline.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -pre-RA-sched=source -disable-post-ra | FileCheck %s
-
+; RUN: llc < %s -mtriple=thumbv6m-apple-ios -mcpu=cortex-m0 -pre-RA-sched=source -disable-post-ra | FileCheck %s -check-prefix=CHECK-T1
 %struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }
 
 @src = external global %struct.x
@@ -17,7 +17,12 @@ define i32 @t0() {
 entry:
 ; CHECK-LABEL: t0:
 ; CHECK: vldr [[REG1:d[0-9]+]],
-; CHECK: vstr [[REG1]], 
+; CHECK: vstr [[REG1]],
+; CHECK-T1-LABEL: t0:
+; CHECK-T1: ldrb [[TREG1:r[0-9]]],
+; CHECK-T1: strb [[TREG1]],
+; CHECK-T1: ldrh [[TREG2:r[0-9]]],
+; CHECK-T1: strh [[TREG2]]
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds (%struct.x* @dst, i32 0, i32 0), i8* getelementptr inbounds (%struct.x* @src, i32 0, i32 0), i32 11, i32 8, i1 false)
   ret i32 0
 }
@@ -83,6 +88,11 @@ entry:
 ; CHECK: movw [[REG7:r[0-9]+]], #18500
 ; CHECK: movt [[REG7:r[0-9]+]], #22866
 ; CHECK: str [[REG7]]
+; CHECK-T1-LABEL: t5:
+; CHECK-T1: movs [[TREG3:r[0-9]]],
+; CHECK-T1: strb [[TREG3]],
+; CHECK-T1: movs [[TREG4:r[0-9]]],
+; CHECK-T1: strb [[TREG4]],
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([7 x i8]* @.str5, i64 0, i64 0), i64 7, i32 1, i1 false)
   ret void
 }
@@ -90,12 +100,17 @@ entry:
 define void @t6() nounwind {
 entry:
 ; CHECK-LABEL: t6:
-; CHECK: vld1.8 {[[REG8:d[0-9]+]]}, [r0]
-; CHECK: vstr [[REG8]], [r1]
+; CHECK: vld1.8 {[[REG9:d[0-9]+]]}, [r0]
+; CHECK: vstr [[REG9]], [r1]
 ; CHECK: adds r1, #6
 ; CHECK: adds r0, #6
 ; CHECK: vld1.8
 ; CHECK: vst1.16
+; CHECK-T1-LABEL: t6:
+; CHECK-T1: movs [[TREG5:r[0-9]]],
+; CHECK-T1: strh [[TREG5]],
+; CHECK-T1: ldr [[TREG6:r[0-9]]],
+; CHECK-T1: str [[TREG6]]
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([512 x i8]* @spool.splbuf, i64 0, i64 0), i8* getelementptr inbounds ([14 x i8]* @.str6, i64 0, i64 0), i64 14, i32 1, i1 false)
   ret void
 }
@@ -104,9 +119,12 @@ entry:
 
 define void @t7(%struct.Foo* nocapture %a, %struct.Foo* nocapture %b) nounwind {
 entry:
-; CHECK: t7
+; CHECK-LABEL: t7:
 ; CHECK: vld1.32
 ; CHECK: vst1.32
+; CHECK-T1-LABEL: t7:
+; CHECK-T1: ldr
+; CHECK-T1: str
   %0 = bitcast %struct.Foo* %a to i8*
   %1 = bitcast %struct.Foo* %b to i8*
   tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 16, i32 4, i1 false)
diff --git a/test/CodeGen/ARM/misched-copy-arm.ll b/test/CodeGen/ARM/misched-copy-arm.ll
index 5da335f..26adf0c 100644
--- a/test/CodeGen/ARM/misched-copy-arm.ll
+++ b/test/CodeGen/ARM/misched-copy-arm.ll
@@ -1,5 +1,5 @@
 ; REQUIRES: asserts
-; RUN: llc < %s -march=thumb -mcpu=swift -pre-RA-sched=source -join-globalcopies -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=swift -pre-RA-sched=source -join-globalcopies -enable-misched -verify-misched -debug-only=misched %s -o - 2>&1 | FileCheck %s
 ;
 ; Loop counter copies should be eliminated.
 ; There is also a MUL here, but we don't care where it is scheduled.
diff --git a/test/CodeGen/ARM/movt.ll b/test/CodeGen/ARM/movt.ll
index 735d949..94c022e 100644
--- a/test/CodeGen/ARM/movt.ll
+++ b/test/CodeGen/ARM/movt.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 ; rdar://7317664
 
 define i32 @t(i32 %X) nounwind {
diff --git a/test/CodeGen/ARM/mul.ll b/test/CodeGen/ARM/mul.ll
index 466a802..5e150b0 100644
--- a/test/CodeGen/ARM/mul.ll
+++ b/test/CodeGen/ARM/mul.ll
@@ -1,11 +1,12 @@
-; RUN: llc < %s -march=arm | grep mul | count 2
-; RUN: llc < %s -march=arm | grep lsl | count 2
+; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
 
 define i32 @f1(i32 %u) {
     %tmp = mul i32 %u, %u
     ret i32 %tmp
 }
 
+; CHECK: mul
+
 define i32 @f2(i32 %u, i32 %v) {
     %tmp = mul i32 %u, %v
     ret i32 %tmp
@@ -16,7 +17,16 @@ define i32 @f3(i32 %u) {
         ret i32 %tmp
 }
 
+; CHECK: mul
+; CHECK: lsl
+
 define i32 @f4(i32 %u) {
 	%tmp = mul i32 %u, 4
         ret i32 %tmp
 }
+
+; CHECK-NOT: mul
+
+; CHECK: lsl
+; CHECK-NOT: lsl
+
diff --git a/test/CodeGen/ARM/mvn.ll b/test/CodeGen/ARM/mvn.ll
index 489f247..e40ab1e 100644
--- a/test/CodeGen/ARM/mvn.ll
+++ b/test/CodeGen/ARM/mvn.ll
@@ -73,7 +73,8 @@ entry:
 	ret i1 %tmp102
 }
 
-; CHECK-LABEL: f1
+; CHECK-LABEL: mvn.ll
+; CHECK-LABEL: @f1
 ; CHECK: mvn
 ; CHECK: mvn
 ; CHECK: mvn
diff --git a/test/CodeGen/ARM/named-reg-alloc.ll b/test/CodeGen/ARM/named-reg-alloc.ll
new file mode 100644
index 0000000..3c27d22
--- /dev/null
+++ b/test/CodeGen/ARM/named-reg-alloc.ll
@@ -0,0 +1,14 @@
+; RUN: not llc < %s -mtriple=arm-apple-darwin 2>&1 | FileCheck %s
+; RUN: not llc < %s -mtriple=arm-linux-gnueabi 2>&1 | FileCheck %s
+
+define i32 @get_stack() nounwind {
+entry:
+; FIXME: Include an allocatable-specific error message
+; CHECK: Invalid register name global variable
+	%sp = call i32 @llvm.read_register.i32(metadata !0)
+  ret i32 %sp
+}
+
+declare i32 @llvm.read_register.i32(metadata) nounwind
+
+!0 = metadata !{metadata !"r5\00"}
diff --git a/test/CodeGen/ARM/named-reg-notareg.ll b/test/CodeGen/ARM/named-reg-notareg.ll
new file mode 100644
index 0000000..af38b60
--- /dev/null
+++ b/test/CodeGen/ARM/named-reg-notareg.ll
@@ -0,0 +1,13 @@
+; RUN: not llc < %s -mtriple=arm-apple-darwin 2>&1 | FileCheck %s
+; RUN: not llc < %s -mtriple=arm-linux-gnueabi 2>&1 | FileCheck %s
+
+define i32 @get_stack() nounwind {
+entry:
+; CHECK: Invalid register name global variable
+	%sp = call i32 @llvm.read_register.i32(metadata !0)
+  ret i32 %sp
+}
+
+declare i32 @llvm.read_register.i32(metadata) nounwind
+
+!0 = metadata !{metadata !"notareg\00"}
diff --git a/test/CodeGen/ARM/phi.ll b/test/CodeGen/ARM/phi.ll
index 94bced5..5a8f623 100644
--- a/test/CodeGen/ARM/phi.ll
+++ b/test/CodeGen/ARM/phi.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -mtriple=arm-eabi -mattr=+v4t %s -o - | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+v4t -addr-sink-using-gep=1 %s -o - | FileCheck %s
 
 ; <rdar://problem/8686347>
 
diff --git a/test/CodeGen/ARM/ret_i64_arg2.ll b/test/CodeGen/ARM/ret_i64_arg2.ll
index c51d2b8..5313600 100644
--- a/test/CodeGen/ARM/ret_i64_arg2.ll
+++ b/test/CodeGen/ARM/ret_i64_arg2.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm -mattr=+vfp2 %s -o /dev/null
+; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o /dev/null
 
 define i64 @test_i64(i64 %a1, i64 %a2) {
         ret i64 %a2
diff --git a/test/CodeGen/ARM/ret_i64_arg3.ll b/test/CodeGen/ARM/ret_i64_arg3.ll
index 602997e..ce8da0a 100644
--- a/test/CodeGen/ARM/ret_i64_arg3.ll
+++ b/test/CodeGen/ARM/ret_i64_arg3.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm -mattr=+vfp2 %s -o /dev/null
+; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o /dev/null
 
 define i64 @test_i64_arg3(i64 %a1, i64 %a2, i64 %a3) {
         ret i64 %a3
diff --git a/test/CodeGen/ARM/segmented-stacks-dynamic.ll b/test/CodeGen/ARM/segmented-stacks-dynamic.ll
index 13b5bcf..86f8ff8 100644
--- a/test/CodeGen/ARM/segmented-stacks-dynamic.ll
+++ b/test/CodeGen/ARM/segmented-stacks-dynamic.ll
@@ -1,12 +1,12 @@
-; RUN: llc < %s -mtriple=arm-linux-androideabi -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=ARM-android
-; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=ARM-linux
-; RUN: llc < %s -mtriple=arm-linux-androideabi -segmented-stacks -filetype=obj
-; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -segmented-stacks -filetype=obj
+; RUN: llc < %s -mtriple=arm-linux-androideabi -verify-machineinstrs | FileCheck %s -check-prefix=ARM-android
+; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -verify-machineinstrs | FileCheck %s -check-prefix=ARM-linux
+; RUN: llc < %s -mtriple=arm-linux-androideabi -filetype=obj
+; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -filetype=obj
 
 ; Just to prevent the alloca from being optimized away
 declare void @dummy_use(i32*, i32)
 
-define i32 @test_basic(i32 %l) {
+define i32 @test_basic(i32 %l) #0 {
         %mem = alloca i32, i32 %l
         call void @dummy_use (i32* %mem, i32 %l)
         %terminate = icmp eq i32 %l, 0
@@ -29,7 +29,7 @@ false:
 ; ARM-linux-NEXT: cmp     r4, r5
 ; ARM-linux-NEXT: blo     .LBB0_2
 
-; ARM-linux:      mov     r4, #24
+; ARM-linux:      mov     r4, #16
 ; ARM-linux-NEXT: mov     r5, #0
 ; ARM-linux-NEXT: stmdb   sp!, {lr}
 ; ARM-linux-NEXT: bl      __morestack
@@ -49,7 +49,7 @@ false:
 ; ARM-android-NEXT: cmp     r4, r5
 ; ARM-android-NEXT: blo     .LBB0_2
 
-; ARM-android:      mov     r4, #24
+; ARM-android:      mov     r4, #16
 ; ARM-android-NEXT: mov     r5, #0
 ; ARM-android-NEXT: stmdb   sp!, {lr}
 ; ARM-android-NEXT: bl      __morestack
@@ -60,3 +60,5 @@ false:
 ; ARM-android:      pop     {r4, r5}
 
 }
+
+attributes #0 = { "split-stack" }
diff --git a/test/CodeGen/ARM/segmented-stacks.ll b/test/CodeGen/ARM/segmented-stacks.ll
index 5eff633..9873bf3 100644
--- a/test/CodeGen/ARM/segmented-stacks.ll
+++ b/test/CodeGen/ARM/segmented-stacks.ll
@@ -1,15 +1,15 @@
-; RUN: llc < %s -mtriple=arm-linux-androideabi -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=ARM-android
-; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=ARM-linux
+; RUN: llc < %s -mtriple=arm-linux-androideabi -verify-machineinstrs | FileCheck %s -check-prefix=ARM-android
+; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -verify-machineinstrs | FileCheck %s -check-prefix=ARM-linux
 
 ; We used to crash with filetype=obj
-; RUN: llc < %s -mtriple=arm-linux-androideabi -segmented-stacks -filetype=obj
-; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -segmented-stacks -filetype=obj
+; RUN: llc < %s -mtriple=arm-linux-androideabi -filetype=obj
+; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -filetype=obj
 
 
 ; Just to prevent the alloca from being optimized away
 declare void @dummy_use(i32*, i32)
 
-define void @test_basic() {
+define void @test_basic() #0 {
         %mem = alloca i32, i32 10
         call void @dummy_use (i32* %mem, i32 10)
 	ret void
@@ -54,9 +54,11 @@ define void @test_basic() {
 
 }
 
-define i32 @test_nested(i32 * nest %closure, i32 %other) {
+define i32 @test_nested(i32 * nest %closure, i32 %other) #0 {
        %addend = load i32 * %closure
        %result = add i32 %other, %addend
+       %mem = alloca i32, i32 10
+       call void @dummy_use (i32* %mem, i32 10)
        ret i32 %result
 
 ; ARM-linux:      test_nested:
@@ -68,7 +70,7 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) {
 ; ARM-linux-NEXT: cmp     r4, r5
 ; ARM-linux-NEXT: blo     .LBB1_2
 
-; ARM-linux:      mov     r4, #0
+; ARM-linux:      mov     r4, #56
 ; ARM-linux-NEXT: mov     r5, #0
 ; ARM-linux-NEXT: stmdb   sp!, {lr}
 ; ARM-linux-NEXT: bl      __morestack
@@ -87,7 +89,7 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) {
 ; ARM-android-NEXT: cmp     r4, r5
 ; ARM-android-NEXT: blo     .LBB1_2
 
-; ARM-android:      mov     r4, #0
+; ARM-android:      mov     r4, #56
 ; ARM-android-NEXT: mov     r5, #0
 ; ARM-android-NEXT: stmdb   sp!, {lr}
 ; ARM-android-NEXT: bl      __morestack
@@ -99,7 +101,7 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) {
 
 }
 
-define void @test_large() {
+define void @test_large() #0 {
         %mem = alloca i32, i32 10000
         call void @dummy_use (i32* %mem, i32 0)
         ret void
@@ -144,7 +146,7 @@ define void @test_large() {
 
 }
 
-define fastcc void @test_fastcc() {
+define fastcc void @test_fastcc() #0 {
         %mem = alloca i32, i32 10
         call void @dummy_use (i32* %mem, i32 10)
         ret void
@@ -189,7 +191,7 @@ define fastcc void @test_fastcc() {
 
 }
 
-define fastcc void @test_fastcc_large() {
+define fastcc void @test_fastcc_large() #0 {
         %mem = alloca i32, i32 10000
         call void @dummy_use (i32* %mem, i32 0)
         ret void
@@ -233,3 +235,15 @@ define fastcc void @test_fastcc_large() {
 ; ARM-android:      pop     {r4, r5}
 
 }
+
+define void @test_nostack() #0 {
+	ret void
+
+; ARM-linux-LABEL: test_nostack:
+; ARM-linux-NOT:   bl __morestack
+
+; ARM-android-LABEL: test_nostack:
+; ARM-android-NOT:   bl __morestack
+}
+
+attributes #0 = { "split-stack" }
diff --git a/test/CodeGen/ARM/smml.ll b/test/CodeGen/ARM/smml.ll
index 99df0d4..fc73eb7 100644
--- a/test/CodeGen/ARM/smml.ll
+++ b/test/CodeGen/ARM/smml.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s
+
 define i32 @f(i32 %a, i32 %b, i32 %c) nounwind readnone ssp {
 entry:
 ; CHECK-NOT: smmls
diff --git a/test/CodeGen/ARM/stack-frame.ll b/test/CodeGen/ARM/stack-frame.ll
index a419074..a3b0b66 100644
--- a/test/CodeGen/ARM/stack-frame.ll
+++ b/test/CodeGen/ARM/stack-frame.ll
@@ -1,14 +1,14 @@
-; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
+; RUN: llc -mtriple=arm-eabi < %s -o - | FileCheck %s
 
 define void @f1() {
 	%c = alloca i8, align 1
 	ret void
 }
+; CHECK-LABEL: f1:
+; CHECK: add
 
 define i32 @f2() {
 	ret i32 1
 }
-
-; CHECK: add
+; CHECK-LABEL: f2:
 ; CHECK-NOT: add
-
diff --git a/test/CodeGen/ARM/stackpointer.ll b/test/CodeGen/ARM/stackpointer.ll
new file mode 100644
index 0000000..420a916
--- /dev/null
+++ b/test/CodeGen/ARM/stackpointer.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin  | FileCheck %s
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s
+
+define i32 @get_stack() nounwind {
+entry:
+; CHECK-LABEL: get_stack:
+; CHECK: mov   r0, sp
+	%sp = call i32 @llvm.read_register.i32(metadata !0)
+  ret i32 %sp
+}
+
+define void @set_stack(i32 %val) nounwind {
+entry:
+; CHECK-LABEL: set_stack:
+; CHECK: mov   sp, r0
+  call void @llvm.write_register.i32(metadata !0, i32 %val)
+  ret void
+}
+
+declare i32 @llvm.read_register.i32(metadata) nounwind
+declare void @llvm.write_register.i32(metadata, i32) nounwind
+
+; register unsigned long current_stack_pointer asm("sp");
+; CHECK-NOT: .asciz  "sp"
+!0 = metadata !{metadata !"sp\00"}
diff --git a/test/CodeGen/ARM/sub.ll b/test/CodeGen/ARM/sub.ll
index 67bde2a..9ac314d 100644
--- a/test/CodeGen/ARM/sub.ll
+++ b/test/CodeGen/ARM/sub.ll
@@ -1,10 +1,13 @@
-; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-LE
+; RUN: llc -mtriple=armeb-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-BE
 
 ; 171 = 0x000000ab
 define i64 @f1(i64 %a) {
 ; CHECK: f1
-; CHECK: subs r0, r0, #171
-; CHECK: sbc r1, r1, #0
+; CHECK-LE: subs r0, r0, #171
+; CHECK-LE: sbc r1, r1, #0
+; CHECK-BE: subs r1, r1, #171
+; CHECK-BE: sbc r0, r0, #0
     %tmp = sub i64 %a, 171
     ret i64 %tmp
 }
@@ -12,8 +15,10 @@ define i64 @f1(i64 %a) {
 ; 66846720 = 0x03fc0000
 define i64 @f2(i64 %a) {
 ; CHECK: f2
-; CHECK: subs r0, r0, #66846720
-; CHECK: sbc r1, r1, #0
+; CHECK-LE: subs r0, r0, #66846720
+; CHECK-LE: sbc r1, r1, #0
+; CHECK-BE: subs r1, r1, #66846720
+; CHECK-BE: sbc r0, r0, #0
     %tmp = sub i64 %a, 66846720
     ret i64 %tmp
 }
@@ -21,8 +26,10 @@ define i64 @f2(i64 %a) {
 ; 734439407618 = 0x000000ab00000002
 define i64 @f3(i64 %a) {
 ; CHECK: f3
-; CHECK: subs r0, r0, #2
-; CHECK: sbc r1, r1, #171
+; CHECK-LE: subs r0, r0, #2
+; CHECK-LE: sbc r1, r1, #171
+; CHECK-BE: subs r1, r1, #2
+; CHECK-BE: sbc r0, r0, #171
    %tmp = sub i64 %a, 734439407618
    ret i64 %tmp
 }
diff --git a/test/CodeGen/ARM/t2-imm.ll b/test/CodeGen/ARM/t2-imm.ll
index dd75cd1..23463b8 100644
--- a/test/CodeGen/ARM/t2-imm.ll
+++ b/test/CodeGen/ARM/t2-imm.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 define i32 @f6(i32 %a) {
 ; CHECK:f6
diff --git a/test/CodeGen/ARM/thumb2-it-block.ll b/test/CodeGen/ARM/thumb2-it-block.ll
index d954760..c5e699c 100644
--- a/test/CodeGen/ARM/thumb2-it-block.ll
+++ b/test/CodeGen/ARM/thumb2-it-block.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv8 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv8 %s -o - | FileCheck %s
 ; PR11107
 
 define i32 @test(i32 %a, i32 %b) {
diff --git a/test/CodeGen/ARM/trap.ll b/test/CodeGen/ARM/trap.ll
index 6cb26e3..0baf50b 100644
--- a/test/CodeGen/ARM/trap.ll
+++ b/test/CodeGen/ARM/trap.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s -check-prefix=INSTR
 ; RUN: llc < %s -mtriple=arm-apple-darwin -trap-func=_trap | FileCheck %s -check-prefix=FUNC
+; RUN: llc < %s -mtriple=arm-apple-darwin -trap-func=_trap -O0 | FileCheck %s -check-prefix=FUNC
 ; RUN: llc -mtriple=armv7-unknown-nacl -filetype=obj %s -o - \
 ; RUN:  | llvm-objdump -disassemble -triple armv7-unknown-nacl - \
 ; RUN:  | FileCheck %s -check-prefix=ENCODING-NACL
diff --git a/test/CodeGen/ARM/undefined.ll b/test/CodeGen/ARM/undefined.ll
new file mode 100644
index 0000000..86422fb
--- /dev/null
+++ b/test/CodeGen/ARM/undefined.ll
@@ -0,0 +1,14 @@
+; RUN: llc -mtriple armv7-eabi -o - %s | FileCheck %s
+; RUN: llc -mtriple thumbv6m-eabi -o - %s | FileCheck %s
+; RUN: llc -mtriple thumbv7-eabi -o - %s | FileCheck %s
+
+declare void @llvm.arm.undefined(i32) nounwind
+
+define void @undefined_trap() {
+entry:
+  tail call void @llvm.arm.undefined(i32 254)
+  ret void
+}
+
+; CHECK-LABEL: undefined_trap
+; CHECK: udf #254
diff --git a/test/CodeGen/ARM/vcombine.ll b/test/CodeGen/ARM/vcombine.ll
index d611267..33aa71d 100644
--- a/test/CodeGen/ARM/vcombine.ll
+++ b/test/CodeGen/ARM/vcombine.ll
@@ -1,9 +1,12 @@
-; RUN: llc -mtriple=arm-eabi -float-abi=soft -mattr=+neon %s -o - | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -float-abi=soft -mattr=+neon %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-LE
+; RUN: llc -mtriple=armeb-eabi -float-abi=soft -mattr=+neon %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-BE
 
 define <16 x i8> @vcombine8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ; CHECK: vcombine8
-; CHECK: vmov r0, r1, d16
-; CHECK: vmov r2, r3, d17
+; CHECK-LE: vmov r0, r1, d16
+; CHECK-LE: vmov r2, r3, d17
+; CHECK-BE: vmov r1, r0, d16
+; CHECK-BE: vmov r3, r2, d17
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -12,8 +15,10 @@ define <16 x i8> @vcombine8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 
 define <8 x i16> @vcombine16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ; CHECK: vcombine16
-; CHECK: vmov r0, r1, d16
-; CHECK: vmov r2, r3, d17
+; CHECK-LE: vmov r0, r1, d16
+; CHECK-LE: vmov r2, r3, d17
+; CHECK-BE: vmov r1, r0, d16
+; CHECK-BE: vmov r3, r2, d17
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
 	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -22,8 +27,10 @@ define <8 x i16> @vcombine16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 
 define <4 x i32> @vcombine32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ; CHECK: vcombine32
-; CHECK: vmov r0, r1, d16
-; CHECK: vmov r2, r3, d17
+; CHECK-LE: vmov r0, r1, d16
+; CHECK-LE: vmov r2, r3, d17
+; CHECK-BE: vmov r1, r0, d16
+; CHECK-BE: vmov r3, r2, d17
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
 	%tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -32,8 +39,10 @@ define <4 x i32> @vcombine32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 
 define <4 x float> @vcombinefloat(<2 x float>* %A, <2 x float>* %B) nounwind {
 ; CHECK: vcombinefloat
-; CHECK: vmov r0, r1, d16
-; CHECK: vmov r2, r3, d17
+; CHECK-LE: vmov r0, r1, d16
+; CHECK-LE: vmov r2, r3, d17
+; CHECK-BE: vmov r1, r0, d16
+; CHECK-BE: vmov r3, r2, d17
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
 	%tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -42,8 +51,10 @@ define <4 x float> @vcombinefloat(<2 x float>* %A, <2 x float>* %B) nounwind {
 
 define <2 x i64> @vcombine64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ; CHECK: vcombine64
-; CHECK: vmov r0, r1, d16
-; CHECK: vmov r2, r3, d17
+; CHECK-LE: vmov r0, r1, d16
+; CHECK-LE: vmov r2, r3, d17
+; CHECK-BE: vmov r1, r0, d16
+; CHECK-BE: vmov r3, r2, d17
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
 	%tmp3 = shufflevector <1 x i64> %tmp1, <1 x i64> %tmp2, <2 x i32> <i32 0, i32 1>
@@ -56,7 +67,8 @@ define <2 x i64> @vcombine64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 define <4 x i16> @vget_low16(<8 x i16>* %A) nounwind {
 ; CHECK: vget_low16
 ; CHECK-NOT: vst
-; CHECK: vmov r0, r1, d16
+; CHECK-LE: vmov r0, r1, d16
+; CHECK-BE: vmov r1, r0, d16
 	%tmp1 = load <8 x i16>* %A
         %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
         ret <4 x i16> %tmp2
@@ -65,7 +77,8 @@ define <4 x i16> @vget_low16(<8 x i16>* %A) nounwind {
 define <8 x i8> @vget_high8(<16 x i8>* %A) nounwind {
 ; CHECK: vget_high8
 ; CHECK-NOT: vst
-; CHECK: vmov r0, r1, d17
+; CHECK-LE: vmov r0, r1, d17
+; CHECK-BE: vmov r1, r0, d16
 	%tmp1 = load <16 x i8>* %A
         %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
         ret <8 x i8> %tmp2
diff --git a/test/CodeGen/ARM/vfp-libcalls.ll b/test/CodeGen/ARM/vfp-libcalls.ll
new file mode 100644
index 0000000..9d4e194
--- /dev/null
+++ b/test/CodeGen/ARM/vfp-libcalls.ll
@@ -0,0 +1,11 @@
+; RUN: llc -mtriple=armv6-apple-ios -mcpu=arm1136jf-s -o - %s | FileCheck %s --check-prefix=CHECK-HARD
+; RUN: llc -mtriple=thumbv6-apple-ios -mcpu=arm1136jf-s -o - %s | FileCheck %s --check-prefix=CHECK-SOFTISH
+; RUN: llc -mtriple=armv7s-apple-ios -soft-float -mcpu=arm1136jf-s -o - %s | FileCheck %s --check-prefix=CHECK-SOFT
+
+define float @test_call(float %a, float %b) {
+; CHECK-HARD: vadd.f32 {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-SOFTISH: blx ___addsf3vfp
+; CHECK-SOFT: bl ___addsf3{{$}}
+  %sum = fadd float %a, %b
+  ret float %sum
+}
+\ No newline at end of file
diff --git a/test/CodeGen/ARM/vrev.ll b/test/CodeGen/ARM/vrev.ll
index eb76ba6..7215ad6 100644
--- a/test/CodeGen/ARM/vrev.ll
+++ b/test/CodeGen/ARM/vrev.ll
@@ -178,3 +178,11 @@ entry:
   ret void
 }
 
+define <4 x i32> @test_vrev32_bswap(<4 x i32> %source) nounwind {
+; CHECK-LABEL: test_vrev32_bswap:
+; CHECK: vrev32.8
+  %bswap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %source)
+  ret <4 x i32> %bswap
+}
+
+declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/zextload_demandedbits.ll b/test/CodeGen/ARM/zextload_demandedbits.ll
index 3d3269c..6b6ce97 100644
--- a/test/CodeGen/ARM/zextload_demandedbits.ll
+++ b/test/CodeGen/ARM/zextload_demandedbits.ll
@@ -6,7 +6,7 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-
 %struct.spam = type { [3 x i32] }
 %struct.barney = type { [2 x i32], [2 x i32] }
 
-; Make sure that the sext op does not get lost due to ComputeMaskedBits.
+; Make sure that the sext op does not get lost due to computeKnownBits.
 ; CHECK: quux
 ; CHECK: lsl
 ; CHECK: asr
diff --git a/test/CodeGen/ARM64/2011-10-18-LdStOptBug.ll b/test/CodeGen/ARM64/2011-10-18-LdStOptBug.ll
deleted file mode 100644
index ea1cd02..0000000
--- a/test/CodeGen/ARM64/2011-10-18-LdStOptBug.ll
+++ /dev/null
@@ -1,31 +0,0 @@
-; RUN: llc < %s -mtriple=arm64-apple-ios | FileCheck %s
-
-; Can't fold the increment by 1<<12 into a post-increment load
-; rdar://10301335
-
-@test_data = common global i32 0, align 4
-
-define void @t() nounwind ssp {
-; CHECK-LABEL: t:
-entry:
-  br label %for.body
-
-for.body:
-; CHECK: for.body
-; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}]
-; CHECK: add x[[REG:[0-9]+]],
-; CHECK:                      x[[REG]], #4096
-  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %0 = shl nsw i64 %indvars.iv, 12
-  %add = add nsw i64 %0, 34628173824
-  %1 = inttoptr i64 %add to i32*
-  %2 = load volatile i32* %1, align 4096
-  store volatile i32 %2, i32* @test_data, align 4
-  %indvars.iv.next = add i64 %indvars.iv, 1
-  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-  %exitcond = icmp eq i32 %lftr.wideiv, 200
-  br i1 %exitcond, label %for.end, label %for.body
-
-for.end:
-  ret void
-}
diff --git a/test/CodeGen/ARM64/2012-06-06-FPToUI.ll b/test/CodeGen/ARM64/2012-06-06-FPToUI.ll
deleted file mode 100644
index dda4ff5..0000000
--- a/test/CodeGen/ARM64/2012-06-06-FPToUI.ll
+++ /dev/null
@@ -1,65 +0,0 @@
-; RUN: llc -march=arm64 -O0 < %s | FileCheck %s
-; RUN: llc -march=arm64 -O3 < %s | FileCheck %s
-
-@.str = private unnamed_addr constant [9 x i8] c"%lf %lu\0A\00", align 1
-@.str1 = private unnamed_addr constant [8 x i8] c"%lf %u\0A\00", align 1
-@.str2 = private unnamed_addr constant [8 x i8] c"%f %lu\0A\00", align 1
-@.str3 = private unnamed_addr constant [7 x i8] c"%f %u\0A\00", align 1
-
-define void @testDouble(double %d) ssp {
-; CHECK:  fcvtzu x{{.}}, d{{.}}
-; CHECK:  fcvtzu w{{.}}, d{{.}}
-entry:
-  %d.addr = alloca double, align 8
-  store double %d, double* %d.addr, align 8
-  %0 = load double* %d.addr, align 8
-  %1 = load double* %d.addr, align 8
-  %conv = fptoui double %1 to i64
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0), double %0, i64 %conv)
-  %2 = load double* %d.addr, align 8
-  %3 = load double* %d.addr, align 8
-  %conv1 = fptoui double %3 to i32
-  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str1, i32 0, i32 0), double %2, i32 %conv1)
-  ret void
-}
-
-declare i32 @printf(i8*, ...)
-
-define void @testFloat(float %f) ssp {
-; CHECK:  fcvtzu x{{.}}, s{{.}}
-; CHECK:  fcvtzu w{{.}}, s{{.}}
-entry:
-  %f.addr = alloca float, align 4
-  store float %f, float* %f.addr, align 4
-  %0 = load float* %f.addr, align 4
-  %conv = fpext float %0 to double
-  %1 = load float* %f.addr, align 4
-  %conv1 = fptoui float %1 to i64
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str2, i32 0, i32 0), double %conv, i64 %conv1)
-  %2 = load float* %f.addr, align 4
-  %conv2 = fpext float %2 to double
-  %3 = load float* %f.addr, align 4
-  %conv3 = fptoui float %3 to i32
-  %call4 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8]* @.str3, i32 0, i32 0), double %conv2, i32 %conv3)
-  ret void
-}
-
-define i32 @main(i32 %argc, i8** %argv) ssp {
-entry:
-  %retval = alloca i32, align 4
-  %argc.addr = alloca i32, align 4
-  %argv.addr = alloca i8**, align 8
-  store i32 0, i32* %retval
-  store i32 %argc, i32* %argc.addr, align 4
-  store i8** %argv, i8*** %argv.addr, align 8
-  call void @testDouble(double 1.159198e+01)
-  call void @testFloat(float 0x40272F1800000000)
-  ret i32 0
-}
-
-!llvm.module.flags = !{!0, !1, !2, !3}
-
-!0 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
-!1 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
-!2 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
-!3 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0}
diff --git a/test/CodeGen/ARM64/2013-01-13-ffast-fcmp.ll b/test/CodeGen/ARM64/2013-01-13-ffast-fcmp.ll
deleted file mode 100644
index b40a581..0000000
--- a/test/CodeGen/ARM64/2013-01-13-ffast-fcmp.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -fp-contract=fast | FileCheck %s --check-prefix=FAST
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128"
-target triple = "arm64-apple-ios7.0.0"
-
-;FAST-LABEL: _Z9example25v:
-;FAST: fcmgt.4s
-;FAST: ret
-
-;CHECK-LABEL: _Z9example25v:
-;CHECK: fcmgt.4s
-;CHECK: ret
-
-define <4 x i32> @_Z9example25v( <4 x float> %N0,  <4 x float> %N1) {
-  %A = fcmp olt <4 x float> %N0, %N1
-  %B = zext <4 x i1> %A to <4 x i32>
-  ret <4 x i32> %B
-}
diff --git a/test/CodeGen/ARM64/2013-02-12-shufv8i8.ll b/test/CodeGen/ARM64/2013-02-12-shufv8i8.ll
deleted file mode 100644
index 70e745f..0000000
--- a/test/CodeGen/ARM64/2013-02-12-shufv8i8.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple
-
-;CHECK-LABEL: Shuff:
-;CHECK: tbl.8b
-;CHECK: ret
-define <8 x i8 > @Shuff(<8 x i8> %in, <8 x i8>* %out) nounwind ssp {
-  %value = shufflevector <8 x i8> %in, <8 x i8> zeroinitializer, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
-  ret <8 x i8> %value
-}
-
-
diff --git a/test/CodeGen/ARM64/AdvSIMD-Scalar.ll b/test/CodeGen/ARM64/AdvSIMD-Scalar.ll
deleted file mode 100644
index 6397ac5..0000000
--- a/test/CodeGen/ARM64/AdvSIMD-Scalar.ll
+++ /dev/null
@@ -1,38 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -arm64-simd-scalar=true -asm-verbose=false | FileCheck %s
-;
-define <2 x i64> @bar(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
-; CHECK-LABEL: bar:
-; CHECK: add.2d	v[[REG:[0-9]+]], v0, v1
-; CHECK: add	d[[REG3:[0-9]+]], d[[REG]], d1
-; CHECK: sub	d[[REG2:[0-9]+]], d[[REG]], d1
-  %add = add <2 x i64> %a, %b
-  %vgetq_lane = extractelement <2 x i64> %add, i32 0
-  %vgetq_lane2 = extractelement <2 x i64> %b, i32 0
-  %add3 = add i64 %vgetq_lane, %vgetq_lane2
-  %sub = sub i64 %vgetq_lane, %vgetq_lane2
-  %vecinit = insertelement <2 x i64> undef, i64 %add3, i32 0
-  %vecinit8 = insertelement <2 x i64> %vecinit, i64 %sub, i32 1
-  ret <2 x i64> %vecinit8
-}
-
-define double @subdd_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
-; CHECK-LABEL: subdd_su64:
-; CHECK: sub d0, d1, d0
-; CHECK-NEXT: ret
-  %vecext = extractelement <2 x i64> %a, i32 0
-  %vecext1 = extractelement <2 x i64> %b, i32 0
-  %sub.i = sub nsw i64 %vecext1, %vecext
-  %retval = bitcast i64 %sub.i to double
-  ret double %retval
-}
-
-define double @vaddd_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
-; CHECK-LABEL: vaddd_su64:
-; CHECK: add d0, d1, d0
-; CHECK-NEXT: ret
-  %vecext = extractelement <2 x i64> %a, i32 0
-  %vecext1 = extractelement <2 x i64> %b, i32 0
-  %add.i = add nsw i64 %vecext1, %vecext
-  %retval = bitcast i64 %add.i to double
-  ret double %retval
-}
diff --git a/test/CodeGen/ARM64/aapcs.ll b/test/CodeGen/ARM64/aapcs.ll
deleted file mode 100644
index 27d2aa7..0000000
--- a/test/CodeGen/ARM64/aapcs.ll
+++ /dev/null
@@ -1,86 +0,0 @@
-; RUN: llc -mtriple=arm64-linux-gnu -enable-misched=false < %s | FileCheck %s
-
-@var = global i32 0, align 4
-
-define i128 @test_i128_align(i32, i128 %arg, i32 %after) {
-  store i32 %after, i32* @var, align 4
-; CHECK: str w4, [{{x[0-9]+}}, :lo12:var]
-
-  ret i128 %arg
-; CHECK: mov x0, x2
-; CHECK: mov x1, x3
-}
-
-@var64 = global i64 0, align 8
-
-  ; Check stack slots are 64-bit at all times.
-define void @test_stack_slots([8 x i32], i1 %bool, i8 %char, i16 %short,
-                                i32 %int, i64 %long) {
-  ; Part of last store. Blasted scheduler.
-; CHECK: ldr [[LONG:x[0-9]+]], [sp, #32]
-
-  %ext_bool = zext i1 %bool to i64
-  store volatile i64 %ext_bool, i64* @var64, align 8
-; CHECK: ldr w[[EXT:[0-9]+]], [sp]
-; CHECK: and x[[EXTED:[0-9]+]], x[[EXT]], #0x1
-; CHECK: str x[[EXTED]], [{{x[0-9]+}}, :lo12:var64]
-
-  %ext_char = zext i8 %char to i64
-  store volatile i64 %ext_char, i64* @var64, align 8
-; CHECK: ldrb w[[EXT:[0-9]+]], [sp, #8]
-; CHECK: str x[[EXT]], [{{x[0-9]+}}, :lo12:var64]
-
-  %ext_short = zext i16 %short to i64
-  store volatile i64 %ext_short, i64* @var64, align 8
-; CHECK: ldrh w[[EXT:[0-9]+]], [sp, #16]
-; CHECK: str x[[EXT]], [{{x[0-9]+}}, :lo12:var64]
-
-  %ext_int = zext i32 %int to i64
-  store volatile i64 %ext_int, i64* @var64, align 8
-; CHECK: ldr w[[EXT:[0-9]+]], [sp, #24]
-; CHECK: str x[[EXT]], [{{x[0-9]+}}, :lo12:var64]
-
-  store volatile i64 %long, i64* @var64, align 8
-; CHECK: str [[LONG]], [{{x[0-9]+}}, :lo12:var64]
-
-  ret void
-}
-
-; Make sure the callee does extensions (in the absence of zext/sext
-; keyword on args) while we're here.
-
-define void @test_extension(i1 %bool, i8 %char, i16 %short, i32 %int) {
-  %ext_bool = zext i1 %bool to i64
-  store volatile i64 %ext_bool, i64* @var64
-; CHECK: and [[EXT:x[0-9]+]], x0, #0x1
-; CHECK: str [[EXT]], [{{x[0-9]+}}, :lo12:var64]
-
-  %ext_char = sext i8 %char to i64
-  store volatile i64 %ext_char, i64* @var64
-; CHECK: sxtb [[EXT:x[0-9]+]], x1
-; CHECK: str [[EXT]], [{{x[0-9]+}}, :lo12:var64]
-
-  %ext_short = zext i16 %short to i64
-  store volatile i64 %ext_short, i64* @var64
-; CHECK: and [[EXT:x[0-9]+]], x2, #0xffff
-; CHECK: str [[EXT]], [{{x[0-9]+}}, :lo12:var64]
-
-  %ext_int = zext i32 %int to i64
-  store volatile i64 %ext_int, i64* @var64
-; CHECK: uxtw [[EXT:x[0-9]+]], x3
-; CHECK: str [[EXT]], [{{x[0-9]+}}, :lo12:var64]
-
-  ret void
-}
-
-declare void @variadic(i32 %a, ...)
-
-  ; Under AAPCS variadic functions have the same calling convention as
-  ; others. The extra arguments should go in registers rather than on the stack.
-define void @test_variadic() {
-  call void(i32, ...)* @variadic(i32 0, i64 1, double 2.0)
-; CHECK: fmov d0, #2.0
-; CHECK: orr x1, xzr, #0x1
-; CHECK: bl variadic
-  ret void
-}
diff --git a/test/CodeGen/ARM64/abi.ll b/test/CodeGen/ARM64/abi.ll
deleted file mode 100644
index a7693b6..0000000
--- a/test/CodeGen/ARM64/abi.ll
+++ /dev/null
@@ -1,236 +0,0 @@
-; RUN: llc < %s -march=arm64 -mcpu=cyclone -enable-misched=false | FileCheck %s
-; RUN: llc < %s -O0 | FileCheck -check-prefix=FAST %s
-target triple = "arm64-apple-darwin"
-
-; rdar://9932559
-define i64 @i8i16callee(i64 %a1, i64 %a2, i64 %a3, i8 signext %a4, i16 signext %a5, i64 %a6, i64 %a7, i64 %a8, i8 signext %b1, i16 signext %b2, i8 signext %b3, i8 signext %b4) nounwind readnone noinline {
-entry:
-; CHECK-LABEL: i8i16callee:
-; The 8th, 9th, 10th and 11th arguments are passed at sp, sp+2, sp+4, sp+5.
-; They are i8, i16, i8 and i8.
-; CHECK: ldrsb	{{w[0-9]+}}, [sp, #5]
-; CHECK: ldrsh	{{w[0-9]+}}, [sp, #2]
-; CHECK: ldrsb	{{w[0-9]+}}, [sp]
-; CHECK: ldrsb	{{w[0-9]+}}, [sp, #4]
-; FAST-LABEL: i8i16callee:
-; FAST: ldrb  {{w[0-9]+}}, [sp, #5]
-; FAST: ldrb  {{w[0-9]+}}, [sp, #4]
-; FAST: ldrh  {{w[0-9]+}}, [sp, #2]
-; FAST: ldrb  {{w[0-9]+}}, [sp]
-  %conv = sext i8 %a4 to i64
-  %conv3 = sext i16 %a5 to i64
-  %conv8 = sext i8 %b1 to i64
-  %conv9 = sext i16 %b2 to i64
-  %conv11 = sext i8 %b3 to i64
-  %conv13 = sext i8 %b4 to i64
-  %add10 = add i64 %a2, %a1
-  %add12 = add i64 %add10, %a3
-  %add14 = add i64 %add12, %conv
-  %add = add i64 %add14, %conv3
-  %add1 = add i64 %add, %a6
-  %add2 = add i64 %add1, %a7
-  %add4 = add i64 %add2, %a8
-  %add5 = add i64 %add4, %conv8
-  %add6 = add i64 %add5, %conv9
-  %add7 = add i64 %add6, %conv11
-  %add15 = add i64 %add7, %conv13
-  %sext = shl i64 %add15, 32
-  %conv17 = ashr exact i64 %sext, 32
-  ret i64 %conv17
-}
-
-define i32 @i8i16caller() nounwind readnone {
-entry:
-; CHECK: i8i16caller
-; The 8th, 9th, 10th and 11th arguments are passed at sp, sp+2, sp+4, sp+5.
-; They are i8, i16, i8 and i8.
-; CHECK: strb {{w[0-9]+}}, [sp, #5]
-; CHECK: strb {{w[0-9]+}}, [sp, #4]
-; CHECK: strh {{w[0-9]+}}, [sp, #2]
-; CHECK: strb {{w[0-9]+}}, [sp]
-; CHECK: bl
-; FAST: i8i16caller
-; FAST: strb {{w[0-9]+}}, [sp]
-; FAST: strh {{w[0-9]+}}, [sp, #2]
-; FAST: strb {{w[0-9]+}}, [sp, #4]
-; FAST: strb {{w[0-9]+}}, [sp, #5]
-; FAST: bl
-  %call = tail call i64 @i8i16callee(i64 0, i64 1, i64 2, i8 signext 3, i16 signext 4, i64 5, i64 6, i64 7, i8 signext 97, i16 signext 98, i8 signext 99, i8 signext 100)
-  %conv = trunc i64 %call to i32
-  ret i32 %conv
-}
-
-; rdar://12651543
-define double @circle_center([2 x float] %a) nounwind ssp {
-  %call = tail call double @ext([2 x float] %a) nounwind
-; CHECK: circle_center
-; CHECK: bl
-  ret double %call
-}
-declare double @ext([2 x float])
-
-; rdar://12656141
-; 16-byte vector should be aligned at 16-byte when passing on stack.
-; A double argument will be passed on stack, so vecotr should be at sp+16.
-define double @fixed_4i(<4 x i32>* nocapture %in) nounwind {
-entry:
-; CHECK: fixed_4i
-; CHECK: str [[REG_1:q[0-9]+]], [sp, #16]
-; FAST: fixed_4i
-; FAST: mov x[[ADDR:[0-9]+]], sp
-; FAST: str [[REG_1:q[0-9]+]], [x[[ADDR]], #16]
-  %0 = load <4 x i32>* %in, align 16
-  %call = tail call double @args_vec_4i(double 3.000000e+00, <4 x i32> %0, <4 x i32> %0, <4 x i32> %0, <4 x i32> %0, <4 x i32> %0, <4 x i32> %0, <4 x i32> %0, double 3.000000e+00, <4 x i32> %0, i8 signext 3)
-  ret double %call
-}
-declare double @args_vec_4i(double, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, double, <4 x i32>, i8 signext)
-
-; rdar://12695237
-; d8 at sp, i in register w0.
-@g_d = common global double 0.000000e+00, align 8
-define void @test1(float %f1, double %d1, double %d2, double %d3, double %d4,
-       double %d5, double %d6, double %d7, double %d8, i32 %i) nounwind ssp {
-entry:
-; CHECK: test1
-; CHECK: ldr [[REG_1:d[0-9]+]], [sp]
-; CHECK: scvtf [[REG_2:s[0-9]+]], w0
-; CHECK: fadd s0, [[REG_2]], s0
-  %conv = sitofp i32 %i to float
-  %add = fadd float %conv, %f1
-  %conv1 = fpext float %add to double
-  %add2 = fadd double %conv1, %d7
-  %add3 = fadd double %add2, %d8
-  store double %add3, double* @g_d, align 8
-  ret void
-}
-
-; i9 at sp, d1 in register s0.
-define void @test2(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
-            i32 %i7, i32 %i8, i32 %i9, float %d1) nounwind ssp {
-entry:
-; CHECK: test2
-; CHECK: scvtf [[REG_2:s[0-9]+]], w0
-; CHECK: fadd s0, [[REG_2]], s0
-; CHECK: ldr [[REG_1:s[0-9]+]], [sp]
-  %conv = sitofp i32 %i1 to float
-  %add = fadd float %conv, %d1
-  %conv1 = fpext float %add to double
-  %conv2 = sitofp i32 %i8 to double
-  %add3 = fadd double %conv2, %conv1
-  %conv4 = sitofp i32 %i9 to double
-  %add5 = fadd double %conv4, %add3
-  store double %add5, double* @g_d, align 8
-  ret void
-}
-
-; rdar://12648441
-; Check alignment on stack for v64, f64, i64, f32, i32.
-define double @test3(<2 x i32>* nocapture %in) nounwind {
-entry:
-; CHECK: test3
-; CHECK: str [[REG_1:d[0-9]+]], [sp, #8]
-; FAST: test3
-; FAST: mov x[[ADDR:[0-9]+]], sp
-; FAST: str [[REG_1:d[0-9]+]], [x[[ADDR]], #8]
-  %0 = load <2 x i32>* %in, align 8
-  %call = tail call double @args_vec_2i(double 3.000000e+00, <2 x i32> %0,
-          <2 x i32> %0, <2 x i32> %0, <2 x i32> %0, <2 x i32> %0, <2 x i32> %0,
-          <2 x i32> %0, float 3.000000e+00, <2 x i32> %0, i8 signext 3)
-  ret double %call
-}
-declare double @args_vec_2i(double, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>,
-               <2 x i32>, <2 x i32>, <2 x i32>, float, <2 x i32>, i8 signext)
-
-define double @test4(double* nocapture %in) nounwind {
-entry:
-; CHECK: test4
-; CHECK: str [[REG_1:d[0-9]+]], [sp, #8]
-; CHECK: str [[REG_2:w[0-9]+]], [sp]
-; CHECK: orr w0, wzr, #0x3
-  %0 = load double* %in, align 8
-  %call = tail call double @args_f64(double 3.000000e+00, double %0, double %0,
-          double %0, double %0, double %0, double %0, double %0,
-          float 3.000000e+00, double %0, i8 signext 3)
-  ret double %call
-}
-declare double @args_f64(double, double, double, double, double, double, double,
-               double, float, double, i8 signext)
-
-define i64 @test5(i64* nocapture %in) nounwind {
-entry:
-; CHECK: test5
-; CHECK: strb [[REG_3:w[0-9]+]], [sp, #16]
-; CHECK: str [[REG_1:x[0-9]+]], [sp, #8]
-; CHECK: str [[REG_2:w[0-9]+]], [sp]
-  %0 = load i64* %in, align 8
-  %call = tail call i64 @args_i64(i64 3, i64 %0, i64 %0, i64 %0, i64 %0, i64 %0,
-                         i64 %0, i64 %0, i32 3, i64 %0, i8 signext 3)
-  ret i64 %call
-}
-declare i64 @args_i64(i64, i64, i64, i64, i64, i64, i64, i64, i32, i64,
-             i8 signext)
-
-define i32 @test6(float* nocapture %in) nounwind {
-entry:
-; CHECK: test6
-; CHECK: strb [[REG_2:w[0-9]+]], [sp, #8]
-; CHECK: str [[REG_1:s[0-9]+]], [sp, #4]
-; CHECK: strh [[REG_3:w[0-9]+]], [sp]
-  %0 = load float* %in, align 4
-  %call = tail call i32 @args_f32(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
-          i32 7, i32 8, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0,
-          float 6.0, float 7.0, float 8.0, i16 signext 3, float %0,
-          i8 signext 3)
-  ret i32 %call
-}
-declare i32 @args_f32(i32, i32, i32, i32, i32, i32, i32, i32,
-                      float, float, float, float, float, float, float, float,
-                      i16 signext, float, i8 signext)
-
-define i32 @test7(i32* nocapture %in) nounwind {
-entry:
-; CHECK: test7
-; CHECK: strb [[REG_2:w[0-9]+]], [sp, #8]
-; CHECK: str [[REG_1:w[0-9]+]], [sp, #4]
-; CHECK: strh [[REG_3:w[0-9]+]], [sp]
-  %0 = load i32* %in, align 4
-  %call = tail call i32 @args_i32(i32 3, i32 %0, i32 %0, i32 %0, i32 %0, i32 %0,
-                         i32 %0, i32 %0, i16 signext 3, i32 %0, i8 signext 4)
-  ret i32 %call
-}
-declare i32 @args_i32(i32, i32, i32, i32, i32, i32, i32, i32, i16 signext, i32,
-             i8 signext)
-
-define i32 @test8(i32 %argc, i8** nocapture %argv) nounwind {
-entry:
-; CHECK: test8
-; CHECK: strb {{w[0-9]+}}, [sp, #3]
-; CHECK: strb wzr, [sp, #2]
-; CHECK: strb {{w[0-9]+}}, [sp, #1]
-; CHECK: strb wzr, [sp]
-; CHECK: bl
-; FAST: test8
-; FAST: strb {{w[0-9]+}}, [sp]
-; FAST: strb {{w[0-9]+}}, [sp, #1]
-; FAST: strb {{w[0-9]+}}, [sp, #2]
-; FAST: strb {{w[0-9]+}}, [sp, #3]
-; FAST: bl
-  tail call void @args_i1(i1 zeroext false, i1 zeroext true, i1 zeroext false,
-                  i1 zeroext true, i1 zeroext false, i1 zeroext true,
-                  i1 zeroext false, i1 zeroext true, i1 zeroext false,
-                  i1 zeroext true, i1 zeroext false, i1 zeroext true)
-  ret i32 0
-}
-
-declare void @args_i1(i1 zeroext, i1 zeroext, i1 zeroext, i1 zeroext,
-                      i1 zeroext, i1 zeroext, i1 zeroext, i1 zeroext,
-                      i1 zeroext, i1 zeroext, i1 zeroext, i1 zeroext)
-
-define i32 @i1_stack_incoming(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f,
-                               i64 %g, i64 %h, i64 %i, i1 zeroext %j) {
-; CHECK-LABEL: i1_stack_incoming:
-; CHECK: ldrb w0, [sp, #8]
-; CHECK: ret
-  %v = zext i1 %j to i32
-  ret i32 %v
-}
diff --git a/test/CodeGen/ARM64/abi_align.ll b/test/CodeGen/ARM64/abi_align.ll
deleted file mode 100644
index 61c661e..0000000
--- a/test/CodeGen/ARM64/abi_align.ll
+++ /dev/null
@@ -1,529 +0,0 @@
-; RUN: llc < %s -march=arm64 -mcpu=cyclone -enable-misched=false | FileCheck %s
-; RUN: llc < %s -O0 | FileCheck -check-prefix=FAST %s
-target triple = "arm64-apple-darwin"
-
-; rdar://12648441
-; Generated from arm64-arguments.c with -O2.
-; Test passing structs with size < 8, < 16 and > 16
-; with alignment of 16 and without
-
-; Structs with size < 8
-%struct.s38 = type { i32, i16 }
-; With alignment of 16, the size will be padded to multiple of 16 bytes.
-%struct.s39 = type { i32, i16, [10 x i8] }
-; Structs with size < 16
-%struct.s40 = type { i32, i16, i32, i16 }
-%struct.s41 = type { i32, i16, i32, i16 }
-; Structs with size > 16
-%struct.s42 = type { i32, i16, i32, i16, i32, i16 }
-%struct.s43 = type { i32, i16, i32, i16, i32, i16, [10 x i8] }
-
-@g38 = common global %struct.s38 zeroinitializer, align 4
-@g38_2 = common global %struct.s38 zeroinitializer, align 4
-@g39 = common global %struct.s39 zeroinitializer, align 16
-@g39_2 = common global %struct.s39 zeroinitializer, align 16
-@g40 = common global %struct.s40 zeroinitializer, align 4
-@g40_2 = common global %struct.s40 zeroinitializer, align 4
-@g41 = common global %struct.s41 zeroinitializer, align 16
-@g41_2 = common global %struct.s41 zeroinitializer, align 16
-@g42 = common global %struct.s42 zeroinitializer, align 4
-@g42_2 = common global %struct.s42 zeroinitializer, align 4
-@g43 = common global %struct.s43 zeroinitializer, align 16
-@g43_2 = common global %struct.s43 zeroinitializer, align 16
-
-; structs with size < 8 bytes, passed via i64 in x1 and x2
-define i32 @f38(i32 %i, i64 %s1.coerce, i64 %s2.coerce) #0 {
-entry:
-; CHECK: f38
-; CHECK: add w[[A:[0-9]+]], w1, w0
-; CHECK: add {{w[0-9]+}}, w[[A]], w2
-  %s1.sroa.0.0.extract.trunc = trunc i64 %s1.coerce to i32
-  %s1.sroa.1.4.extract.shift = lshr i64 %s1.coerce, 32
-  %s2.sroa.0.0.extract.trunc = trunc i64 %s2.coerce to i32
-  %s2.sroa.1.4.extract.shift = lshr i64 %s2.coerce, 32
-  %sext8 = shl nuw nsw i64 %s1.sroa.1.4.extract.shift, 16
-  %sext = trunc i64 %sext8 to i32
-  %conv = ashr exact i32 %sext, 16
-  %sext1011 = shl nuw nsw i64 %s2.sroa.1.4.extract.shift, 16
-  %sext10 = trunc i64 %sext1011 to i32
-  %conv6 = ashr exact i32 %sext10, 16
-  %add = add i32 %s1.sroa.0.0.extract.trunc, %i
-  %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc
-  %add4 = add i32 %add3, %conv
-  %add7 = add i32 %add4, %conv6
-  ret i32 %add7
-}
-
-define i32 @caller38() #1 {
-entry:
-; CHECK: caller38
-; CHECK: ldr x1,
-; CHECK: ldr x2,
-  %0 = load i64* bitcast (%struct.s38* @g38 to i64*), align 4
-  %1 = load i64* bitcast (%struct.s38* @g38_2 to i64*), align 4
-  %call = tail call i32 @f38(i32 3, i64 %0, i64 %1) #5
-  ret i32 %call
-}
-
-declare i32 @f38_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
-                i32 %i7, i32 %i8, i32 %i9, i64 %s1.coerce, i64 %s2.coerce) #0
-
-; structs with size < 8 bytes, passed on stack at [sp+8] and [sp+16]
-; i9 at [sp]
-define i32 @caller38_stack() #1 {
-entry:
-; CHECK: caller38_stack
-; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8]
-; CHECK: movz w[[C:[0-9]+]], #9
-; CHECK: str w[[C]], [sp]
-  %0 = load i64* bitcast (%struct.s38* @g38 to i64*), align 4
-  %1 = load i64* bitcast (%struct.s38* @g38_2 to i64*), align 4
-  %call = tail call i32 @f38_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
-                                   i32 7, i32 8, i32 9, i64 %0, i64 %1) #5
-  ret i32 %call
-}
-
-; structs with size < 8 bytes, alignment of 16
-; passed via i128 in x1 and x3
-define i32 @f39(i32 %i, i128 %s1.coerce, i128 %s2.coerce) #0 {
-entry:
-; CHECK: f39
-; CHECK: add w[[A:[0-9]+]], w1, w0
-; CHECK: add {{w[0-9]+}}, w[[A]], w3
-  %s1.sroa.0.0.extract.trunc = trunc i128 %s1.coerce to i32
-  %s1.sroa.1.4.extract.shift = lshr i128 %s1.coerce, 32
-  %s2.sroa.0.0.extract.trunc = trunc i128 %s2.coerce to i32
-  %s2.sroa.1.4.extract.shift = lshr i128 %s2.coerce, 32
-  %sext8 = shl nuw nsw i128 %s1.sroa.1.4.extract.shift, 16
-  %sext = trunc i128 %sext8 to i32
-  %conv = ashr exact i32 %sext, 16
-  %sext1011 = shl nuw nsw i128 %s2.sroa.1.4.extract.shift, 16
-  %sext10 = trunc i128 %sext1011 to i32
-  %conv6 = ashr exact i32 %sext10, 16
-  %add = add i32 %s1.sroa.0.0.extract.trunc, %i
-  %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc
-  %add4 = add i32 %add3, %conv
-  %add7 = add i32 %add4, %conv6
-  ret i32 %add7
-}
-
-define i32 @caller39() #1 {
-entry:
-; CHECK: caller39
-; CHECK: ldp x1, x2,
-; CHECK: ldp x3, x4,
-  %0 = load i128* bitcast (%struct.s39* @g39 to i128*), align 16
-  %1 = load i128* bitcast (%struct.s39* @g39_2 to i128*), align 16
-  %call = tail call i32 @f39(i32 3, i128 %0, i128 %1) #5
-  ret i32 %call
-}
-
-declare i32 @f39_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
-                i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce) #0
-
-; structs with size < 8 bytes, alignment 16
-; passed on stack at [sp+16] and [sp+32]
-define i32 @caller39_stack() #1 {
-entry:
-; CHECK: caller39_stack
-; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #32]
-; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
-; CHECK: movz w[[C:[0-9]+]], #9
-; CHECK: str w[[C]], [sp]
-  %0 = load i128* bitcast (%struct.s39* @g39 to i128*), align 16
-  %1 = load i128* bitcast (%struct.s39* @g39_2 to i128*), align 16
-  %call = tail call i32 @f39_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
-                                   i32 7, i32 8, i32 9, i128 %0, i128 %1) #5
-  ret i32 %call
-}
-
-; structs with size < 16 bytes
-; passed via i128 in x1 and x3
-define i32 @f40(i32 %i, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) #0 {
-entry:
-; CHECK: f40
-; CHECK: add w[[A:[0-9]+]], w1, w0
-; CHECK: add {{w[0-9]+}}, w[[A]], w3
-  %s1.coerce.fca.0.extract = extractvalue [2 x i64] %s1.coerce, 0
-  %s2.coerce.fca.0.extract = extractvalue [2 x i64] %s2.coerce, 0
-  %s1.sroa.0.0.extract.trunc = trunc i64 %s1.coerce.fca.0.extract to i32
-  %s2.sroa.0.0.extract.trunc = trunc i64 %s2.coerce.fca.0.extract to i32
-  %s1.sroa.0.4.extract.shift = lshr i64 %s1.coerce.fca.0.extract, 32
-  %sext8 = shl nuw nsw i64 %s1.sroa.0.4.extract.shift, 16
-  %sext = trunc i64 %sext8 to i32
-  %conv = ashr exact i32 %sext, 16
-  %s2.sroa.0.4.extract.shift = lshr i64 %s2.coerce.fca.0.extract, 32
-  %sext1011 = shl nuw nsw i64 %s2.sroa.0.4.extract.shift, 16
-  %sext10 = trunc i64 %sext1011 to i32
-  %conv6 = ashr exact i32 %sext10, 16
-  %add = add i32 %s1.sroa.0.0.extract.trunc, %i
-  %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc
-  %add4 = add i32 %add3, %conv
-  %add7 = add i32 %add4, %conv6
-  ret i32 %add7
-}
-
-define i32 @caller40() #1 {
-entry:
-; CHECK: caller40
-; CHECK: ldp x1, x2,
-; CHECK: ldp x3, x4,
-  %0 = load [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4
-  %1 = load [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4
-  %call = tail call i32 @f40(i32 3, [2 x i64] %0, [2 x i64] %1) #5
-  ret i32 %call
-}
-
-declare i32 @f40_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
-                i32 %i7, i32 %i8, i32 %i9, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) #0
-
-; structs with size < 16 bytes
-; passed on stack at [sp+8] and [sp+24]
-define i32 @caller40_stack() #1 {
-entry:
-; CHECK: caller40_stack
-; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #24]
-; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8]
-; CHECK: movz w[[C:[0-9]+]], #9
-; CHECK: str w[[C]], [sp]
-  %0 = load [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4
-  %1 = load [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4
-  %call = tail call i32 @f40_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
-                         i32 7, i32 8, i32 9, [2 x i64] %0, [2 x i64] %1) #5
-  ret i32 %call
-}
-
-; structs with size < 16 bytes, alignment of 16
-; passed via i128 in x1 and x3
-define i32 @f41(i32 %i, i128 %s1.coerce, i128 %s2.coerce) #0 {
-entry:
-; CHECK: f41
-; CHECK: add w[[A:[0-9]+]], w1, w0
-; CHECK: add {{w[0-9]+}}, w[[A]], w3
-  %s1.sroa.0.0.extract.trunc = trunc i128 %s1.coerce to i32
-  %s1.sroa.1.4.extract.shift = lshr i128 %s1.coerce, 32
-  %s2.sroa.0.0.extract.trunc = trunc i128 %s2.coerce to i32
-  %s2.sroa.1.4.extract.shift = lshr i128 %s2.coerce, 32
-  %sext8 = shl nuw nsw i128 %s1.sroa.1.4.extract.shift, 16
-  %sext = trunc i128 %sext8 to i32
-  %conv = ashr exact i32 %sext, 16
-  %sext1011 = shl nuw nsw i128 %s2.sroa.1.4.extract.shift, 16
-  %sext10 = trunc i128 %sext1011 to i32
-  %conv6 = ashr exact i32 %sext10, 16
-  %add = add i32 %s1.sroa.0.0.extract.trunc, %i
-  %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc
-  %add4 = add i32 %add3, %conv
-  %add7 = add i32 %add4, %conv6
-  ret i32 %add7
-}
-
-define i32 @caller41() #1 {
-entry:
-; CHECK: caller41
-; CHECK: ldp x1, x2,
-; CHECK: ldp x3, x4,
-  %0 = load i128* bitcast (%struct.s41* @g41 to i128*), align 16
-  %1 = load i128* bitcast (%struct.s41* @g41_2 to i128*), align 16
-  %call = tail call i32 @f41(i32 3, i128 %0, i128 %1) #5
-  ret i32 %call
-}
-
-declare i32 @f41_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
-                i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce) #0
-
-; structs with size < 16 bytes, alignment of 16
-; passed on stack at [sp+16] and [sp+32]
-define i32 @caller41_stack() #1 {
-entry:
-; CHECK: caller41_stack
-; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #32]
-; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
-; CHECK: movz w[[C:[0-9]+]], #9
-; CHECK: str w[[C]], [sp]
-  %0 = load i128* bitcast (%struct.s41* @g41 to i128*), align 16
-  %1 = load i128* bitcast (%struct.s41* @g41_2 to i128*), align 16
-  %call = tail call i32 @f41_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
-                            i32 7, i32 8, i32 9, i128 %0, i128 %1) #5
-  ret i32 %call
-}
-
-; structs with size of 22 bytes, passed indirectly in x1 and x2
-define i32 @f42(i32 %i, %struct.s42* nocapture %s1, %struct.s42* nocapture %s2) #2 {
-entry:
-; CHECK: f42
-; CHECK: ldr w[[A:[0-9]+]], [x1]
-; CHECK: ldr w[[B:[0-9]+]], [x2]
-; CHECK: add w[[C:[0-9]+]], w[[A]], w0
-; CHECK: add {{w[0-9]+}}, w[[C]], w[[B]]
-; FAST: f42
-; FAST: ldr w[[A:[0-9]+]], [x1]
-; FAST: ldr w[[B:[0-9]+]], [x2]
-; FAST: add w[[C:[0-9]+]], w[[A]], w0
-; FAST: add {{w[0-9]+}}, w[[C]], w[[B]]
-  %i1 = getelementptr inbounds %struct.s42* %s1, i64 0, i32 0
-  %0 = load i32* %i1, align 4, !tbaa !0
-  %i2 = getelementptr inbounds %struct.s42* %s2, i64 0, i32 0
-  %1 = load i32* %i2, align 4, !tbaa !0
-  %s = getelementptr inbounds %struct.s42* %s1, i64 0, i32 1
-  %2 = load i16* %s, align 2, !tbaa !3
-  %conv = sext i16 %2 to i32
-  %s5 = getelementptr inbounds %struct.s42* %s2, i64 0, i32 1
-  %3 = load i16* %s5, align 2, !tbaa !3
-  %conv6 = sext i16 %3 to i32
-  %add = add i32 %0, %i
-  %add3 = add i32 %add, %1
-  %add4 = add i32 %add3, %conv
-  %add7 = add i32 %add4, %conv6
-  ret i32 %add7
-}
-
-; For s1, we allocate a 22-byte space, pass its address via x1
-define i32 @caller42() #3 {
-entry:
-; CHECK: caller42
-; CHECK: str {{x[0-9]+}}, [sp, #48]
-; CHECK: str {{q[0-9]+}}, [sp, #32]
-; CHECK: str {{x[0-9]+}}, [sp, #16]
-; CHECK: str {{q[0-9]+}}, [sp]
-; CHECK: add x1, sp, #32
-; CHECK: mov x2, sp
-; Space for s1 is allocated at sp+32
-; Space for s2 is allocated at sp
-
-; FAST: caller42
-; FAST: sub sp, sp, #96
-; Space for s1 is allocated at fp-24 = sp+72
-; Space for s2 is allocated at sp+48
-; FAST: sub x[[A:[0-9]+]], fp, #24
-; FAST: add x[[A:[0-9]+]], sp, #48
-; Call memcpy with size = 24 (0x18)
-; FAST: orr {{x[0-9]+}}, xzr, #0x18
-  %tmp = alloca %struct.s42, align 4
-  %tmp1 = alloca %struct.s42, align 4
-  %0 = bitcast %struct.s42* %tmp to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s42* @g42 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4
-  %1 = bitcast %struct.s42* %tmp1 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s42* @g42_2 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4
-  %call = call i32 @f42(i32 3, %struct.s42* %tmp, %struct.s42* %tmp1) #5
-  ret i32 %call
-}
-
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) #4
-
-declare i32 @f42_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
-                       i32 %i7, i32 %i8, i32 %i9, %struct.s42* nocapture %s1,
-                       %struct.s42* nocapture %s2) #2
-
-define i32 @caller42_stack() #3 {
-entry:
-; CHECK: caller42_stack
-; CHECK: mov fp, sp
-; CHECK: sub sp, sp, #96
-; CHECK: stur {{x[0-9]+}}, [fp, #-16]
-; CHECK: stur {{q[0-9]+}}, [fp, #-32]
-; CHECK: str {{x[0-9]+}}, [sp, #48]
-; CHECK: str {{q[0-9]+}}, [sp, #32]
-; Space for s1 is allocated at fp-32 = sp+64
-; Space for s2 is allocated at sp+32
-; CHECK: add x[[B:[0-9]+]], sp, #32
-; CHECK: str x[[B]], [sp, #16]
-; CHECK: sub x[[A:[0-9]+]], fp, #32
-; Address of s1 is passed on stack at sp+8
-; CHECK: str x[[A]], [sp, #8]
-; CHECK: movz w[[C:[0-9]+]], #9
-; CHECK: str w[[C]], [sp]
-
-; FAST: caller42_stack
-; Space for s1 is allocated at fp-24
-; Space for s2 is allocated at fp-48
-; FAST: sub x[[A:[0-9]+]], fp, #24
-; FAST: sub x[[B:[0-9]+]], fp, #48
-; Call memcpy with size = 24 (0x18)
-; FAST: orr {{x[0-9]+}}, xzr, #0x18
-; FAST: str {{w[0-9]+}}, [sp]
-; Address of s1 is passed on stack at sp+8
-; FAST: str {{x[0-9]+}}, [sp, #8]
-; FAST: str {{x[0-9]+}}, [sp, #16]
-  %tmp = alloca %struct.s42, align 4
-  %tmp1 = alloca %struct.s42, align 4
-  %0 = bitcast %struct.s42* %tmp to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s42* @g42 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4
-  %1 = bitcast %struct.s42* %tmp1 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s42* @g42_2 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4
-  %call = call i32 @f42_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
-                       i32 8, i32 9, %struct.s42* %tmp, %struct.s42* %tmp1) #5
-  ret i32 %call
-}
-
-; structs with size of 22 bytes, alignment of 16
-; passed indirectly in x1 and x2
-define i32 @f43(i32 %i, %struct.s43* nocapture %s1, %struct.s43* nocapture %s2) #2 {
-entry:
-; CHECK: f43
-; CHECK: ldr w[[A:[0-9]+]], [x1]
-; CHECK: ldr w[[B:[0-9]+]], [x2]
-; CHECK: add w[[C:[0-9]+]], w[[A]], w0
-; CHECK: add {{w[0-9]+}}, w[[C]], w[[B]]
-; FAST: f43
-; FAST: ldr w[[A:[0-9]+]], [x1]
-; FAST: ldr w[[B:[0-9]+]], [x2]
-; FAST: add w[[C:[0-9]+]], w[[A]], w0
-; FAST: add {{w[0-9]+}}, w[[C]], w[[B]]
-  %i1 = getelementptr inbounds %struct.s43* %s1, i64 0, i32 0
-  %0 = load i32* %i1, align 4, !tbaa !0
-  %i2 = getelementptr inbounds %struct.s43* %s2, i64 0, i32 0
-  %1 = load i32* %i2, align 4, !tbaa !0
-  %s = getelementptr inbounds %struct.s43* %s1, i64 0, i32 1
-  %2 = load i16* %s, align 2, !tbaa !3
-  %conv = sext i16 %2 to i32
-  %s5 = getelementptr inbounds %struct.s43* %s2, i64 0, i32 1
-  %3 = load i16* %s5, align 2, !tbaa !3
-  %conv6 = sext i16 %3 to i32
-  %add = add i32 %0, %i
-  %add3 = add i32 %add, %1
-  %add4 = add i32 %add3, %conv
-  %add7 = add i32 %add4, %conv6
-  ret i32 %add7
-}
-
-define i32 @caller43() #3 {
-entry:
-; CHECK: caller43
-; CHECK: str {{q[0-9]+}}, [sp, #48]
-; CHECK: str {{q[0-9]+}}, [sp, #32]
-; CHECK: str {{q[0-9]+}}, [sp, #16]
-; CHECK: str {{q[0-9]+}}, [sp]
-; CHECK: add x1, sp, #32
-; CHECK: mov x2, sp
-; Space for s1 is allocated at sp+32
-; Space for s2 is allocated at sp
-
-; FAST: caller43
-; FAST: mov fp, sp
-; Space for s1 is allocated at sp+32
-; Space for s2 is allocated at sp
-; FAST: add x1, sp, #32
-; FAST: mov x2, sp
-; FAST: str {{x[0-9]+}}, [sp, #32]
-; FAST: str {{x[0-9]+}}, [sp, #40]
-; FAST: str {{x[0-9]+}}, [sp, #48]
-; FAST: str {{x[0-9]+}}, [sp, #56]
-; FAST: str {{x[0-9]+}}, [sp]
-; FAST: str {{x[0-9]+}}, [sp, #8]
-; FAST: str {{x[0-9]+}}, [sp, #16]
-; FAST: str {{x[0-9]+}}, [sp, #24]
-  %tmp = alloca %struct.s43, align 16
-  %tmp1 = alloca %struct.s43, align 16
-  %0 = bitcast %struct.s43* %tmp to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s43* @g43 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4
-  %1 = bitcast %struct.s43* %tmp1 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s43* @g43_2 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4
-  %call = call i32 @f43(i32 3, %struct.s43* %tmp, %struct.s43* %tmp1) #5
-  ret i32 %call
-}
-
-declare i32 @f43_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
-                       i32 %i7, i32 %i8, i32 %i9, %struct.s43* nocapture %s1,
-                       %struct.s43* nocapture %s2) #2
-
-define i32 @caller43_stack() #3 {
-entry:
-; CHECK: caller43_stack
-; CHECK: mov fp, sp
-; CHECK: sub sp, sp, #96
-; CHECK: stur {{q[0-9]+}}, [fp, #-16]
-; CHECK: stur {{q[0-9]+}}, [fp, #-32]
-; CHECK: str {{q[0-9]+}}, [sp, #48]
-; CHECK: str {{q[0-9]+}}, [sp, #32]
-; Space for s1 is allocated at fp-32 = sp+64
-; Space for s2 is allocated at sp+32
-; CHECK: add x[[B:[0-9]+]], sp, #32
-; CHECK: str x[[B]], [sp, #16]
-; CHECK: sub x[[A:[0-9]+]], fp, #32
-; Address of s1 is passed on stack at sp+8
-; CHECK: str x[[A]], [sp, #8]
-; CHECK: movz w[[C:[0-9]+]], #9
-; CHECK: str w[[C]], [sp]
-
-; FAST: caller43_stack
-; FAST: sub sp, sp, #96
-; Space for s1 is allocated at fp-32 = sp+64
-; Space for s2 is allocated at sp+32
-; FAST: sub x[[A:[0-9]+]], fp, #32
-; FAST: add x[[B:[0-9]+]], sp, #32
-; FAST: stur {{x[0-9]+}}, [fp, #-32]
-; FAST: stur {{x[0-9]+}}, [fp, #-24]
-; FAST: stur {{x[0-9]+}}, [fp, #-16]
-; FAST: stur {{x[0-9]+}}, [fp, #-8]
-; FAST: str {{x[0-9]+}}, [sp, #32]
-; FAST: str {{x[0-9]+}}, [sp, #40]
-; FAST: str {{x[0-9]+}}, [sp, #48]
-; FAST: str {{x[0-9]+}}, [sp, #56]
-; FAST: str {{w[0-9]+}}, [sp]
-; Address of s1 is passed on stack at sp+8
-; FAST: str {{x[0-9]+}}, [sp, #8]
-; FAST: str {{x[0-9]+}}, [sp, #16]
-  %tmp = alloca %struct.s43, align 16
-  %tmp1 = alloca %struct.s43, align 16
-  %0 = bitcast %struct.s43* %tmp to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s43* @g43 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4
-  %1 = bitcast %struct.s43* %tmp1 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s43* @g43_2 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4
-  %call = call i32 @f43_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
-                       i32 8, i32 9, %struct.s43* %tmp, %struct.s43* %tmp1) #5
-  ret i32 %call
-}
-
-; rdar://13668927
-; Check that we don't split an i128.
-declare i32 @callee_i128_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5,
-                               i32 %i6, i32 %i7, i128 %s1, i32 %i8)
-
-define i32 @i128_split() {
-entry:
-; CHECK: i128_split
-; "i128 %0" should be on stack at [sp].
-; "i32 8" should be on stack at [sp, #16].
-; CHECK: str {{w[0-9]+}}, [sp, #16]
-; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp]
-; FAST: i128_split
-; FAST: mov x[[ADDR:[0-9]+]], sp
-; FAST: str {{w[0-9]+}}, [x[[ADDR]], #16]
-; FAST: stp {{x[0-9]+}}, {{x[0-9]+}}, [x[[ADDR]]]
-  %0 = load i128* bitcast (%struct.s41* @g41 to i128*), align 16
-  %call = tail call i32 @callee_i128_split(i32 1, i32 2, i32 3, i32 4, i32 5,
-                                           i32 6, i32 7, i128 %0, i32 8) #5
-  ret i32 %call
-}
-
-declare i32 @callee_i64(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5,
-                               i32 %i6, i32 %i7, i64 %s1, i32 %i8)
-
-define i32 @i64_split() {
-entry:
-; CHECK: i64_split
-; "i64 %0" should be in register x7.
-; "i32 8" should be on stack at [sp].
-; CHECK: ldr x7, [{{x[0-9]+}}]
-; CHECK: str {{w[0-9]+}}, [sp]
-; FAST: i64_split
-; FAST: ldr x7, [{{x[0-9]+}}]
-; FAST: str {{w[0-9]+}}, [sp]
-  %0 = load i64* bitcast (%struct.s41* @g41 to i64*), align 16
-  %call = tail call i32 @callee_i64(i32 1, i32 2, i32 3, i32 4, i32 5,
-                                    i32 6, i32 7, i64 %0, i32 8) #5
-  ret i32 %call
-}
-
-attributes #0 = { noinline nounwind readnone "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" }
-attributes #1 = { nounwind readonly "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" }
-attributes #2 = { noinline nounwind readonly "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" }
-attributes #3 = { nounwind "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" }
-attributes #4 = { nounwind }
-attributes #5 = { nobuiltin }
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
-!3 = metadata !{metadata !"short", metadata !1}
-!4 = metadata !{i64 0, i64 4, metadata !0, i64 4, i64 2, metadata !3, i64 8, i64 4, metadata !0, i64 12, i64 2, metadata !3, i64 16, i64 4, metadata !0, i64 20, i64 2, metadata !3}
diff --git a/test/CodeGen/ARM64/addp.ll b/test/CodeGen/ARM64/addp.ll
deleted file mode 100644
index 8283a00..0000000
--- a/test/CodeGen/ARM64/addp.ll
+++ /dev/null
@@ -1,32 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-define double @foo(<2 x double> %a) nounwind {
-; CHECK-LABEL: foo:
-; CHECK: faddp.2d d0, v0
-; CHECK-NEXT: ret
-  %lane0.i = extractelement <2 x double> %a, i32 0
-  %lane1.i = extractelement <2 x double> %a, i32 1
-  %vpaddd.i = fadd double %lane0.i, %lane1.i
-  ret double %vpaddd.i
-}
-
-define i64 @foo0(<2 x i64> %a) nounwind {
-; CHECK-LABEL: foo0:
-; CHECK: addp.2d d0, v0
-; CHECK-NEXT: fmov x0, d0
-; CHECK-NEXT: ret
-  %lane0.i = extractelement <2 x i64> %a, i32 0
-  %lane1.i = extractelement <2 x i64> %a, i32 1
-  %vpaddd.i = add i64 %lane0.i, %lane1.i
-  ret i64 %vpaddd.i
-}
-
-define float @foo1(<2 x float> %a) nounwind {
-; CHECK-LABEL: foo1:
-; CHECK: faddp.2s
-; CHECK-NEXT: ret
-  %lane0.i = extractelement <2 x float> %a, i32 0
-  %lane1.i = extractelement <2 x float> %a, i32 1
-  %vpaddd.i = fadd float %lane0.i, %lane1.i
-  ret float %vpaddd.i
-}
diff --git a/test/CodeGen/ARM64/addr-mode-folding.ll b/test/CodeGen/ARM64/addr-mode-folding.ll
deleted file mode 100644
index dff2331..0000000
--- a/test/CodeGen/ARM64/addr-mode-folding.ll
+++ /dev/null
@@ -1,171 +0,0 @@
-; RUN: llc -O3 -mtriple arm64-apple-ios3 %s -o - | FileCheck %s
-; <rdar://problem/13621857>
-
-@block = common global i8* null, align 8
-
-define i32 @fct(i32 %i1, i32 %i2) {
-; CHECK: @fct
-; Sign extension is used more than once, thus it should not be folded.
-; CodeGenPrepare is not sharing sext accross uses, thus this is folded because
-; of that.
-; _CHECK-NOT_: , sxtw]
-entry:
-  %idxprom = sext i32 %i1 to i64
-  %0 = load i8** @block, align 8
-  %arrayidx = getelementptr inbounds i8* %0, i64 %idxprom
-  %1 = load i8* %arrayidx, align 1
-  %idxprom1 = sext i32 %i2 to i64
-  %arrayidx2 = getelementptr inbounds i8* %0, i64 %idxprom1
-  %2 = load i8* %arrayidx2, align 1
-  %cmp = icmp eq i8 %1, %2
-  br i1 %cmp, label %if.end, label %if.then
-
-if.then:                                          ; preds = %entry
-  %cmp7 = icmp ugt i8 %1, %2
-  %conv8 = zext i1 %cmp7 to i32
-  br label %return
-
-if.end:                                           ; preds = %entry
-  %inc = add nsw i32 %i1, 1
-  %inc9 = add nsw i32 %i2, 1
-  %idxprom10 = sext i32 %inc to i64
-  %arrayidx11 = getelementptr inbounds i8* %0, i64 %idxprom10
-  %3 = load i8* %arrayidx11, align 1
-  %idxprom12 = sext i32 %inc9 to i64
-  %arrayidx13 = getelementptr inbounds i8* %0, i64 %idxprom12
-  %4 = load i8* %arrayidx13, align 1
-  %cmp16 = icmp eq i8 %3, %4
-  br i1 %cmp16, label %if.end23, label %if.then18
-
-if.then18:                                        ; preds = %if.end
-  %cmp21 = icmp ugt i8 %3, %4
-  %conv22 = zext i1 %cmp21 to i32
-  br label %return
-
-if.end23:                                         ; preds = %if.end
-  %inc24 = add nsw i32 %i1, 2
-  %inc25 = add nsw i32 %i2, 2
-  %idxprom26 = sext i32 %inc24 to i64
-  %arrayidx27 = getelementptr inbounds i8* %0, i64 %idxprom26
-  %5 = load i8* %arrayidx27, align 1
-  %idxprom28 = sext i32 %inc25 to i64
-  %arrayidx29 = getelementptr inbounds i8* %0, i64 %idxprom28
-  %6 = load i8* %arrayidx29, align 1
-  %cmp32 = icmp eq i8 %5, %6
-  br i1 %cmp32, label %return, label %if.then34
-
-if.then34:                                        ; preds = %if.end23
-  %cmp37 = icmp ugt i8 %5, %6
-  %conv38 = zext i1 %cmp37 to i32
-  br label %return
-
-return:                                           ; preds = %if.end23, %if.then34, %if.then18, %if.then
-  %retval.0 = phi i32 [ %conv8, %if.then ], [ %conv22, %if.then18 ], [ %conv38, %if.then34 ], [ 1, %if.end23 ]
-  ret i32 %retval.0
-}
-
-define i32 @fct1(i32 %i1, i32 %i2) optsize {
-; CHECK: @fct1
-; Addressing are folded when optimizing for code size.
-; CHECK: , sxtw]
-; CHECK: , sxtw]
-entry:
-  %idxprom = sext i32 %i1 to i64
-  %0 = load i8** @block, align 8
-  %arrayidx = getelementptr inbounds i8* %0, i64 %idxprom
-  %1 = load i8* %arrayidx, align 1
-  %idxprom1 = sext i32 %i2 to i64
-  %arrayidx2 = getelementptr inbounds i8* %0, i64 %idxprom1
-  %2 = load i8* %arrayidx2, align 1
-  %cmp = icmp eq i8 %1, %2
-  br i1 %cmp, label %if.end, label %if.then
-
-if.then:                                          ; preds = %entry
-  %cmp7 = icmp ugt i8 %1, %2
-  %conv8 = zext i1 %cmp7 to i32
-  br label %return
-
-if.end:                                           ; preds = %entry
-  %inc = add nsw i32 %i1, 1
-  %inc9 = add nsw i32 %i2, 1
-  %idxprom10 = sext i32 %inc to i64
-  %arrayidx11 = getelementptr inbounds i8* %0, i64 %idxprom10
-  %3 = load i8* %arrayidx11, align 1
-  %idxprom12 = sext i32 %inc9 to i64
-  %arrayidx13 = getelementptr inbounds i8* %0, i64 %idxprom12
-  %4 = load i8* %arrayidx13, align 1
-  %cmp16 = icmp eq i8 %3, %4
-  br i1 %cmp16, label %if.end23, label %if.then18
-
-if.then18:                                        ; preds = %if.end
-  %cmp21 = icmp ugt i8 %3, %4
-  %conv22 = zext i1 %cmp21 to i32
-  br label %return
-
-if.end23:                                         ; preds = %if.end
-  %inc24 = add nsw i32 %i1, 2
-  %inc25 = add nsw i32 %i2, 2
-  %idxprom26 = sext i32 %inc24 to i64
-  %arrayidx27 = getelementptr inbounds i8* %0, i64 %idxprom26
-  %5 = load i8* %arrayidx27, align 1
-  %idxprom28 = sext i32 %inc25 to i64
-  %arrayidx29 = getelementptr inbounds i8* %0, i64 %idxprom28
-  %6 = load i8* %arrayidx29, align 1
-  %cmp32 = icmp eq i8 %5, %6
-  br i1 %cmp32, label %return, label %if.then34
-
-if.then34:                                        ; preds = %if.end23
-  %cmp37 = icmp ugt i8 %5, %6
-  %conv38 = zext i1 %cmp37 to i32
-  br label %return
-
-return:                                           ; preds = %if.end23, %if.then34, %if.then18, %if.then
-  %retval.0 = phi i32 [ %conv8, %if.then ], [ %conv22, %if.then18 ], [ %conv38, %if.then34 ], [ 1, %if.end23 ]
-  ret i32 %retval.0
-}
-
-; CHECK: @test
-; CHECK-NOT: , uxtw #2]
-define i32 @test(i32* %array, i8 zeroext %c, i32 %arg) {
-entry:
-  %conv = zext i8 %c to i32
-  %add = sub i32 0, %arg
-  %tobool = icmp eq i32 %conv, %add
-  br i1 %tobool, label %if.end, label %if.then
-
-if.then:                                          ; preds = %entry
-  %idxprom = zext i8 %c to i64
-  %arrayidx = getelementptr inbounds i32* %array, i64 %idxprom
-  %0 = load volatile i32* %arrayidx, align 4
-  %1 = load volatile i32* %arrayidx, align 4
-  %add3 = add nsw i32 %1, %0
-  br label %if.end
-
-if.end:                                           ; preds = %entry, %if.then
-  %res.0 = phi i32 [ %add3, %if.then ], [ 0, %entry ]
-  ret i32 %res.0
-}
-
-
-; CHECK: @test2
-; CHECK: , uxtw #2]
-; CHECK: , uxtw #2]
-define i32 @test2(i32* %array, i8 zeroext %c, i32 %arg) optsize {
-entry:
-  %conv = zext i8 %c to i32
-  %add = sub i32 0, %arg
-  %tobool = icmp eq i32 %conv, %add
-  br i1 %tobool, label %if.end, label %if.then
-
-if.then:                                          ; preds = %entry
-  %idxprom = zext i8 %c to i64
-  %arrayidx = getelementptr inbounds i32* %array, i64 %idxprom
-  %0 = load volatile i32* %arrayidx, align 4
-  %1 = load volatile i32* %arrayidx, align 4
-  %add3 = add nsw i32 %1, %0
-  br label %if.end
-
-if.end:                                           ; preds = %entry, %if.then
-  %res.0 = phi i32 [ %add3, %if.then ], [ 0, %entry ]
-  ret i32 %res.0
-}
diff --git a/test/CodeGen/ARM64/addr-type-promotion.ll b/test/CodeGen/ARM64/addr-type-promotion.ll
deleted file mode 100644
index 0677603..0000000
--- a/test/CodeGen/ARM64/addr-type-promotion.ll
+++ /dev/null
@@ -1,82 +0,0 @@
-; RUN: llc -march arm64 < %s | FileCheck %s
-; rdar://13452552
-; ModuleID = 'reduced_test.ll'
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128"
-target triple = "arm64-apple-ios3.0.0"
-
-@block = common global i8* null, align 8
-
-define zeroext i8 @fullGtU(i32 %i1, i32 %i2) {
-; CHECK: fullGtU
-; CHECK: adrp [[PAGE:x[0-9]+]], _block@GOTPAGE
-; CHECK: ldr [[ADDR:x[0-9]+]], {{\[}}[[PAGE]], _block@GOTPAGEOFF]
-; CHECK-NEXT: ldr [[BLOCKBASE:x[0-9]+]], {{\[}}[[ADDR]]]
-; CHECK-NEXT: ldrb [[BLOCKVAL1:w[0-9]+]], {{\[}}[[BLOCKBASE]],  x0, sxtw]
-; CHECK-NEXT: ldrb [[BLOCKVAL2:w[0-9]+]], {{\[}}[[BLOCKBASE]], x1, sxtw]
-; CHECK-NEXT cmp [[BLOCKVAL1]], [[BLOCKVAL2]]
-; CHECK-NEXT b.ne
-; Next BB
-; CHECK: add [[BLOCKBASE2:x[0-9]+]], [[BLOCKBASE]], w1, sxtw
-; CHECK-NEXT: add [[BLOCKBASE1:x[0-9]+]], [[BLOCKBASE]], w0, sxtw
-; CHECK-NEXT: ldrb [[LOADEDVAL1:w[0-9]+]], {{\[}}[[BLOCKBASE1]], #1]
-; CHECK-NEXT: ldrb [[LOADEDVAL2:w[0-9]+]], {{\[}}[[BLOCKBASE2]], #1]
-; CHECK-NEXT: cmp [[LOADEDVAL1]], [[LOADEDVAL2]]
-; CHECK-NEXT: b.ne
-; Next BB
-; CHECK: ldrb [[LOADEDVAL3:w[0-9]+]], {{\[}}[[BLOCKBASE1]], #2]
-; CHECK-NEXT: ldrb [[LOADEDVAL4:w[0-9]+]], {{\[}}[[BLOCKBASE2]], #2]
-; CHECK-NEXT: cmp [[LOADEDVAL3]], [[LOADEDVAL4]]
-entry:
-  %idxprom = sext i32 %i1 to i64
-  %tmp = load i8** @block, align 8
-  %arrayidx = getelementptr inbounds i8* %tmp, i64 %idxprom
-  %tmp1 = load i8* %arrayidx, align 1
-  %idxprom1 = sext i32 %i2 to i64
-  %arrayidx2 = getelementptr inbounds i8* %tmp, i64 %idxprom1
-  %tmp2 = load i8* %arrayidx2, align 1
-  %cmp = icmp eq i8 %tmp1, %tmp2
-  br i1 %cmp, label %if.end, label %if.then
-
-if.then:                                          ; preds = %entry
-  %cmp7 = icmp ugt i8 %tmp1, %tmp2
-  %conv9 = zext i1 %cmp7 to i8
-  br label %return
-
-if.end:                                           ; preds = %entry
-  %inc = add nsw i32 %i1, 1
-  %inc10 = add nsw i32 %i2, 1
-  %idxprom11 = sext i32 %inc to i64
-  %arrayidx12 = getelementptr inbounds i8* %tmp, i64 %idxprom11
-  %tmp3 = load i8* %arrayidx12, align 1
-  %idxprom13 = sext i32 %inc10 to i64
-  %arrayidx14 = getelementptr inbounds i8* %tmp, i64 %idxprom13
-  %tmp4 = load i8* %arrayidx14, align 1
-  %cmp17 = icmp eq i8 %tmp3, %tmp4
-  br i1 %cmp17, label %if.end25, label %if.then19
-
-if.then19:                                        ; preds = %if.end
-  %cmp22 = icmp ugt i8 %tmp3, %tmp4
-  %conv24 = zext i1 %cmp22 to i8
-  br label %return
-
-if.end25:                                         ; preds = %if.end
-  %inc26 = add nsw i32 %i1, 2
-  %inc27 = add nsw i32 %i2, 2
-  %idxprom28 = sext i32 %inc26 to i64
-  %arrayidx29 = getelementptr inbounds i8* %tmp, i64 %idxprom28
-  %tmp5 = load i8* %arrayidx29, align 1
-  %idxprom30 = sext i32 %inc27 to i64
-  %arrayidx31 = getelementptr inbounds i8* %tmp, i64 %idxprom30
-  %tmp6 = load i8* %arrayidx31, align 1
-  %cmp34 = icmp eq i8 %tmp5, %tmp6
-  br i1 %cmp34, label %return, label %if.then36
-
-if.then36:                                        ; preds = %if.end25
-  %cmp39 = icmp ugt i8 %tmp5, %tmp6
-  %conv41 = zext i1 %cmp39 to i8
-  br label %return
-
-return:                                           ; preds = %if.then36, %if.end25, %if.then19, %if.then
-  %retval.0 = phi i8 [ %conv9, %if.then ], [ %conv24, %if.then19 ], [ %conv41, %if.then36 ], [ 0, %if.end25 ]
-  ret i8 %retval.0
-}
diff --git a/test/CodeGen/ARM64/addrmode.ll b/test/CodeGen/ARM64/addrmode.ll
deleted file mode 100644
index e131237..0000000
--- a/test/CodeGen/ARM64/addrmode.ll
+++ /dev/null
@@ -1,72 +0,0 @@
-; RUN: llc -march=arm64 < %s | FileCheck %s
-; rdar://10232252
-
-@object = external hidden global i64, section "__DATA, __objc_ivar", align 8
-
-; base + offset (imm9)
-; CHECK: @t1
-; CHECK: ldr xzr, [x{{[0-9]+}}, #8]
-; CHECK: ret
-define void @t1() {
-  %incdec.ptr = getelementptr inbounds i64* @object, i64 1
-  %tmp = load volatile i64* %incdec.ptr, align 8
-  ret void
-}
-
-; base + offset (> imm9)
-; CHECK: @t2
-; CHECK: sub [[ADDREG:x[0-9]+]], x{{[0-9]+}}, #264
-; CHECK: ldr xzr, [
-; CHECK: [[ADDREG]]]
-; CHECK: ret
-define void @t2() {
-  %incdec.ptr = getelementptr inbounds i64* @object, i64 -33
-  %tmp = load volatile i64* %incdec.ptr, align 8
-  ret void
-}
-
-; base + unsigned offset (> imm9 and <= imm12 * size of type in bytes)
-; CHECK: @t3
-; CHECK: ldr xzr, [x{{[0-9]+}}, #32760]
-; CHECK: ret
-define void @t3() {
-  %incdec.ptr = getelementptr inbounds i64* @object, i64 4095
-  %tmp = load volatile i64* %incdec.ptr, align 8
-  ret void
-}
-
-; base + unsigned offset (> imm12 * size of type in bytes)
-; CHECK: @t4
-; CHECK: add [[ADDREG:x[0-9]+]], x{{[0-9]+}}, #32768
-; CHECK: ldr xzr, [
-; CHECK: [[ADDREG]]]
-; CHECK: ret
-define void @t4() {
-  %incdec.ptr = getelementptr inbounds i64* @object, i64 4096
-  %tmp = load volatile i64* %incdec.ptr, align 8
-  ret void
-}
-
-; base + reg
-; CHECK: @t5
-; CHECK: ldr xzr, [x{{[0-9]+}}, x{{[0-9]+}}, lsl #3]
-; CHECK: ret
-define void @t5(i64 %a) {
-  %incdec.ptr = getelementptr inbounds i64* @object, i64 %a
-  %tmp = load volatile i64* %incdec.ptr, align 8
-  ret void
-}
-
-; base + reg + imm
-; CHECK: @t6
-; CHECK: add [[ADDREG:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, lsl #3
-; CHECK-NEXT: add [[ADDREG]], [[ADDREG]], #32768
-; CHECK: ldr xzr, [
-; CHECK: [[ADDREG]]]
-; CHECK: ret
-define void @t6(i64 %a) {
-  %tmp1 = getelementptr inbounds i64* @object, i64 %a
-  %incdec.ptr = getelementptr inbounds i64* %tmp1, i64 4096
-  %tmp = load volatile i64* %incdec.ptr, align 8
-  ret void
-}
diff --git a/test/CodeGen/ARM64/arith-saturating.ll b/test/CodeGen/ARM64/arith-saturating.ll
deleted file mode 100644
index 437ebb8..0000000
--- a/test/CodeGen/ARM64/arith-saturating.ll
+++ /dev/null
@@ -1,153 +0,0 @@
-; RUN: llc < %s -march=arm64 | FileCheck %s
-
-define i32 @qadds(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp {
-; CHECK-LABEL: qadds:
-; CHECK: sqadd s0, s0, s1
-  %vecext = extractelement <4 x i32> %b, i32 0
-  %vecext1 = extractelement <4 x i32> %c, i32 0
-  %vqadd.i = tail call i32 @llvm.arm64.neon.sqadd.i32(i32 %vecext, i32 %vecext1) nounwind
-  ret i32 %vqadd.i
-}
-
-define i64 @qaddd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp {
-; CHECK-LABEL: qaddd:
-; CHECK: sqadd d0, d0, d1
-  %vecext = extractelement <2 x i64> %b, i32 0
-  %vecext1 = extractelement <2 x i64> %c, i32 0
-  %vqadd.i = tail call i64 @llvm.arm64.neon.sqadd.i64(i64 %vecext, i64 %vecext1) nounwind
-  ret i64 %vqadd.i
-}
-
-define i32 @uqadds(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp {
-; CHECK-LABEL: uqadds:
-; CHECK: uqadd s0, s0, s1
-  %vecext = extractelement <4 x i32> %b, i32 0
-  %vecext1 = extractelement <4 x i32> %c, i32 0
-  %vqadd.i = tail call i32 @llvm.arm64.neon.uqadd.i32(i32 %vecext, i32 %vecext1) nounwind
-  ret i32 %vqadd.i
-}
-
-define i64 @uqaddd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp {
-; CHECK-LABEL: uqaddd:
-; CHECK: uqadd d0, d0, d1
-  %vecext = extractelement <2 x i64> %b, i32 0
-  %vecext1 = extractelement <2 x i64> %c, i32 0
-  %vqadd.i = tail call i64 @llvm.arm64.neon.uqadd.i64(i64 %vecext, i64 %vecext1) nounwind
-  ret i64 %vqadd.i
-}
-
-declare i64 @llvm.arm64.neon.uqadd.i64(i64, i64) nounwind readnone
-declare i32 @llvm.arm64.neon.uqadd.i32(i32, i32) nounwind readnone
-declare i64 @llvm.arm64.neon.sqadd.i64(i64, i64) nounwind readnone
-declare i32 @llvm.arm64.neon.sqadd.i32(i32, i32) nounwind readnone
-
-define i32 @qsubs(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp {
-; CHECK-LABEL: qsubs:
-; CHECK: sqsub s0, s0, s1
-  %vecext = extractelement <4 x i32> %b, i32 0
-  %vecext1 = extractelement <4 x i32> %c, i32 0
-  %vqsub.i = tail call i32 @llvm.arm64.neon.sqsub.i32(i32 %vecext, i32 %vecext1) nounwind
-  ret i32 %vqsub.i
-}
-
-define i64 @qsubd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp {
-; CHECK-LABEL: qsubd:
-; CHECK: sqsub d0, d0, d1
-  %vecext = extractelement <2 x i64> %b, i32 0
-  %vecext1 = extractelement <2 x i64> %c, i32 0
-  %vqsub.i = tail call i64 @llvm.arm64.neon.sqsub.i64(i64 %vecext, i64 %vecext1) nounwind
-  ret i64 %vqsub.i
-}
-
-define i32 @uqsubs(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp {
-; CHECK-LABEL: uqsubs:
-; CHECK: uqsub s0, s0, s1
-  %vecext = extractelement <4 x i32> %b, i32 0
-  %vecext1 = extractelement <4 x i32> %c, i32 0
-  %vqsub.i = tail call i32 @llvm.arm64.neon.uqsub.i32(i32 %vecext, i32 %vecext1) nounwind
-  ret i32 %vqsub.i
-}
-
-define i64 @uqsubd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp {
-; CHECK-LABEL: uqsubd:
-; CHECK: uqsub d0, d0, d1
-  %vecext = extractelement <2 x i64> %b, i32 0
-  %vecext1 = extractelement <2 x i64> %c, i32 0
-  %vqsub.i = tail call i64 @llvm.arm64.neon.uqsub.i64(i64 %vecext, i64 %vecext1) nounwind
-  ret i64 %vqsub.i
-}
-
-declare i64 @llvm.arm64.neon.uqsub.i64(i64, i64) nounwind readnone
-declare i32 @llvm.arm64.neon.uqsub.i32(i32, i32) nounwind readnone
-declare i64 @llvm.arm64.neon.sqsub.i64(i64, i64) nounwind readnone
-declare i32 @llvm.arm64.neon.sqsub.i32(i32, i32) nounwind readnone
-
-define i32 @qabss(<4 x i32> %b, <4 x i32> %c) nounwind readnone {
-; CHECK-LABEL: qabss:
-; CHECK: sqabs s0, s0
-; CHECK: ret
-  %vecext = extractelement <4 x i32> %b, i32 0
-  %vqabs.i = tail call i32 @llvm.arm64.neon.sqabs.i32(i32 %vecext) nounwind
-  ret i32 %vqabs.i
-}
-
-define i64 @qabsd(<2 x i64> %b, <2 x i64> %c) nounwind readnone {
-; CHECK-LABEL: qabsd:
-; CHECK: sqabs d0, d0
-; CHECK: ret
-  %vecext = extractelement <2 x i64> %b, i32 0
-  %vqabs.i = tail call i64 @llvm.arm64.neon.sqabs.i64(i64 %vecext) nounwind
-  ret i64 %vqabs.i
-}
-
-define i32 @qnegs(<4 x i32> %b, <4 x i32> %c) nounwind readnone {
-; CHECK-LABEL: qnegs:
-; CHECK: sqneg s0, s0
-; CHECK: ret
-  %vecext = extractelement <4 x i32> %b, i32 0
-  %vqneg.i = tail call i32 @llvm.arm64.neon.sqneg.i32(i32 %vecext) nounwind
-  ret i32 %vqneg.i
-}
-
-define i64 @qnegd(<2 x i64> %b, <2 x i64> %c) nounwind readnone {
-; CHECK-LABEL: qnegd:
-; CHECK: sqneg d0, d0
-; CHECK: ret
-  %vecext = extractelement <2 x i64> %b, i32 0
-  %vqneg.i = tail call i64 @llvm.arm64.neon.sqneg.i64(i64 %vecext) nounwind
-  ret i64 %vqneg.i
-}
-
-declare i64 @llvm.arm64.neon.sqneg.i64(i64) nounwind readnone
-declare i32 @llvm.arm64.neon.sqneg.i32(i32) nounwind readnone
-declare i64 @llvm.arm64.neon.sqabs.i64(i64) nounwind readnone
-declare i32 @llvm.arm64.neon.sqabs.i32(i32) nounwind readnone
-
-
-define i32 @vqmovund(<2 x i64> %b) nounwind readnone {
-; CHECK-LABEL: vqmovund:
-; CHECK: sqxtun s0, d0
-  %vecext = extractelement <2 x i64> %b, i32 0
-  %vqmovun.i = tail call i32 @llvm.arm64.neon.scalar.sqxtun.i32.i64(i64 %vecext) nounwind
-  ret i32 %vqmovun.i
-}
-
-define i32 @vqmovnd_s(<2 x i64> %b) nounwind readnone {
-; CHECK-LABEL: vqmovnd_s:
-; CHECK: sqxtn s0, d0
-  %vecext = extractelement <2 x i64> %b, i32 0
-  %vqmovn.i = tail call i32 @llvm.arm64.neon.scalar.sqxtn.i32.i64(i64 %vecext) nounwind
-  ret i32 %vqmovn.i
-}
-
-define i32 @vqmovnd_u(<2 x i64> %b) nounwind readnone {
-; CHECK-LABEL: vqmovnd_u:
-; CHECK: uqxtn s0, d0
-  %vecext = extractelement <2 x i64> %b, i32 0
-  %vqmovn.i = tail call i32 @llvm.arm64.neon.scalar.uqxtn.i32.i64(i64 %vecext) nounwind
-  ret i32 %vqmovn.i
-}
-
-declare i32 @llvm.arm64.neon.scalar.uqxtn.i32.i64(i64) nounwind readnone
-declare i32 @llvm.arm64.neon.scalar.sqxtn.i32.i64(i64) nounwind readnone
-declare i32 @llvm.arm64.neon.scalar.sqxtun.i32.i64(i64) nounwind readnone
diff --git a/test/CodeGen/ARM64/arith.ll b/test/CodeGen/ARM64/arith.ll
deleted file mode 100644
index b6ff0da..0000000
--- a/test/CodeGen/ARM64/arith.ll
+++ /dev/null
@@ -1,262 +0,0 @@
-; RUN: llc < %s -march=arm64 -asm-verbose=false | FileCheck %s
-
-define i32 @t1(i32 %a, i32 %b) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: t1:
-; CHECK: add w0, w1, w0
-; CHECK: ret
-  %add = add i32 %b, %a
-  ret i32 %add
-}
-
-define i32 @t2(i32 %a, i32 %b) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: t2:
-; CHECK: udiv w0, w0, w1
-; CHECK: ret
-  %udiv = udiv i32 %a, %b
-  ret i32 %udiv
-}
-
-define i64 @t3(i64 %a, i64 %b) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: t3:
-; CHECK: udiv x0, x0, x1
-; CHECK: ret
-  %udiv = udiv i64 %a, %b
-  ret i64 %udiv
-}
-
-define i32 @t4(i32 %a, i32 %b) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: t4:
-; CHECK: sdiv w0, w0, w1
-; CHECK: ret
-  %sdiv = sdiv i32 %a, %b
-  ret i32 %sdiv
-}
-
-define i64 @t5(i64 %a, i64 %b) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: t5:
-; CHECK: sdiv x0, x0, x1
-; CHECK: ret
-  %sdiv = sdiv i64 %a, %b
-  ret i64 %sdiv
-}
-
-define i32 @t6(i32 %a, i32 %b) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: t6:
-; CHECK: lslv w0, w0, w1
-; CHECK: ret
-  %shl = shl i32 %a, %b
-  ret i32 %shl
-}
-
-define i64 @t7(i64 %a, i64 %b) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: t7:
-; CHECK: lslv x0, x0, x1
-; CHECK: ret
-  %shl = shl i64 %a, %b
-  ret i64 %shl
-}
-
-define i32 @t8(i32 %a, i32 %b) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: t8:
-; CHECK: lsrv w0, w0, w1
-; CHECK: ret
-  %lshr = lshr i32 %a, %b
-  ret i32 %lshr
-}
-
-define i64 @t9(i64 %a, i64 %b) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: t9:
-; CHECK: lsrv x0, x0, x1
-; CHECK: ret
-  %lshr = lshr i64 %a, %b
-  ret i64 %lshr
-}
-
-define i32 @t10(i32 %a, i32 %b) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: t10:
-; CHECK: asrv w0, w0, w1
-; CHECK: ret
-  %ashr = ashr i32 %a, %b
-  ret i32 %ashr
-}
-
-define i64 @t11(i64 %a, i64 %b) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: t11:
-; CHECK: asrv x0, x0, x1
-; CHECK: ret
-  %ashr = ashr i64 %a, %b
-  ret i64 %ashr
-}
-
-define i32 @t12(i16 %a, i32 %x) nounwind ssp {
-entry:
-; CHECK-LABEL: t12:
-; CHECK: add	w0, w1, w0, sxth
-; CHECK: ret
-  %c = sext i16 %a to i32
-  %e = add i32 %x, %c
-  ret i32 %e
-}
-
-define i32 @t13(i16 %a, i32 %x) nounwind ssp {
-entry:
-; CHECK-LABEL: t13:
-; CHECK: add	w0, w1, w0, sxth #2
-; CHECK: ret
-  %c = sext i16 %a to i32
-  %d = shl i32 %c, 2
-  %e = add i32 %x, %d
-  ret i32 %e
-}
-
-define i64 @t14(i16 %a, i64 %x) nounwind ssp {
-entry:
-; CHECK-LABEL: t14:
-; CHECK: add	x0, x1, w0, uxth #3
-; CHECK: ret
-  %c = zext i16 %a to i64
-  %d = shl i64 %c, 3
-  %e = add i64 %x, %d
-  ret i64 %e
-}
-
-; rdar://9160598
-define i64 @t15(i64 %a, i64 %x) nounwind ssp {
-entry:
-; CHECK-LABEL: t15:
-; CHECK: add x0, x1, w0, uxtw
-; CHECK: ret
-  %b = and i64 %a, 4294967295
-  %c = add i64 %x, %b
-  ret i64 %c
-}
-
-define i64 @t16(i64 %x) nounwind ssp {
-entry:
-; CHECK-LABEL: t16:
-; CHECK: lsl x0, x0, #1
-; CHECK: ret
-  %a = shl i64 %x, 1
-  ret i64 %a
-}
-
-; rdar://9166974
-define i64 @t17(i16 %a, i64 %x) nounwind ssp {
-entry:
-; CHECK-LABEL: t17:
-; CHECK: sxth [[REG:x[0-9]+]], x0
-; CHECK: sub x0, xzr, [[REG]], lsl #32
-; CHECK: ret
-  %tmp16 = sext i16 %a to i64
-  %tmp17 = mul i64 %tmp16, -4294967296
-  ret i64 %tmp17
-}
-
-define i32 @t18(i32 %a, i32 %b) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: t18:
-; CHECK: sdiv w0, w0, w1
-; CHECK: ret
-  %sdiv = call i32 @llvm.arm64.sdiv.i32(i32 %a, i32 %b)
-  ret i32 %sdiv
-}
-
-define i64 @t19(i64 %a, i64 %b) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: t19:
-; CHECK: sdiv x0, x0, x1
-; CHECK: ret
-  %sdiv = call i64 @llvm.arm64.sdiv.i64(i64 %a, i64 %b)
-  ret i64 %sdiv
-}
-
-define i32 @t20(i32 %a, i32 %b) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: t20:
-; CHECK: udiv w0, w0, w1
-; CHECK: ret
-  %udiv = call i32 @llvm.arm64.udiv.i32(i32 %a, i32 %b)
-  ret i32 %udiv
-}
-
-define i64 @t21(i64 %a, i64 %b) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: t21:
-; CHECK: udiv x0, x0, x1
-; CHECK: ret
-  %udiv = call i64 @llvm.arm64.udiv.i64(i64 %a, i64 %b)
-  ret i64 %udiv
-}
-
-declare i32 @llvm.arm64.sdiv.i32(i32, i32) nounwind readnone
-declare i64 @llvm.arm64.sdiv.i64(i64, i64) nounwind readnone
-declare i32 @llvm.arm64.udiv.i32(i32, i32) nounwind readnone
-declare i64 @llvm.arm64.udiv.i64(i64, i64) nounwind readnone
-
-; 32-bit not.
-define i32 @inv_32(i32 %x) nounwind ssp {
-entry:
-; CHECK: inv_32
-; CHECK: mvn w0, w0
-; CHECK: ret
-  %inv = xor i32 %x, -1
-  ret i32 %inv
-}
-
-; 64-bit not.
-define i64 @inv_64(i64 %x) nounwind ssp {
-entry:
-; CHECK: inv_64
-; CHECK: mvn x0, x0
-; CHECK: ret
-  %inv = xor i64 %x, -1
-  ret i64 %inv
-}
-
-; Multiplying by a power of two plus or minus one is better done via shift
-; and add/sub rather than the madd/msub instructions. The latter are 4+ cycles,
-; and the former are two (total for the two instruction sequence for subtract).
-define i32 @f0(i32 %a) nounwind readnone ssp {
-; CHECK-LABEL: f0:
-; CHECK-NEXT: add w0, w0, w0, lsl #3
-; CHECK-NEXT: ret
-  %res = mul i32 %a, 9
-  ret i32 %res
-}
-
-define i64 @f1(i64 %a) nounwind readnone ssp {
-; CHECK-LABEL: f1:
-; CHECK-NEXT: lsl x8, x0, #4
-; CHECK-NEXT: sub x0, x8, x0
-; CHECK-NEXT: ret
-  %res = mul i64 %a, 15
-  ret i64 %res
-}
-
-define i32 @f2(i32 %a) nounwind readnone ssp {
-; CHECK-LABEL: f2:
-; CHECK-NEXT: lsl w8, w0, #3
-; CHECK-NEXT: sub w0, w8, w0
-; CHECK-NEXT: ret
-  %res = mul nsw i32 %a, 7
-  ret i32 %res
-}
-
-define i64 @f3(i64 %a) nounwind readnone ssp {
-; CHECK-LABEL: f3:
-; CHECK-NEXT: add x0, x0, x0, lsl #4
-; CHECK-NEXT: ret
-  %res = mul nsw i64 %a, 17
-  ret i64 %res
-}
diff --git a/test/CodeGen/ARM64/atomic-128.ll b/test/CodeGen/ARM64/atomic-128.ll
deleted file mode 100644
index a0039a3..0000000
--- a/test/CodeGen/ARM64/atomic-128.ll
+++ /dev/null
@@ -1,213 +0,0 @@
-; RUN: llc < %s -march=arm64 -mtriple=arm64-linux-gnu -verify-machineinstrs | FileCheck %s
-
-@var = global i128 0
-
-define i128 @val_compare_and_swap(i128* %p, i128 %oldval, i128 %newval) {
-; CHECK-LABEL: val_compare_and_swap:
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: ldaxp   [[RESULTLO:x[0-9]+]], [[RESULTHI:x[0-9]+]], [x0]
-; CHECK: cmp    [[RESULTLO]], x2
-; CHECK: sbc    xzr, [[RESULTHI]], x3
-; CHECK: b.ne   [[LABEL2:.?LBB[0-9]+_[0-9]+]]
-; CHECK: stxp   [[SCRATCH_RES:w[0-9]+]], x4, x5, [x0]
-; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
-; CHECK: [[LABEL2]]:
-  %val = cmpxchg i128* %p, i128 %oldval, i128 %newval acquire acquire
-  ret i128 %val
-}
-
-define void @fetch_and_nand(i128* %p, i128 %bits) {
-; CHECK-LABEL: fetch_and_nand:
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: ldxp  [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
-; CHECK: bic    [[SCRATCH_REGLO:x[0-9]+]], x2, [[DEST_REGLO]]
-; CHECK: bic    [[SCRATCH_REGHI:x[0-9]+]], x3, [[DEST_REGHI]]
-; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
-; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
-
-; CHECK: str    [[DEST_REGHI]]
-; CHECK: str    [[DEST_REGLO]]
-  %val = atomicrmw nand i128* %p, i128 %bits release
-  store i128 %val, i128* @var, align 16
-  ret void
-}
-
-define void @fetch_and_or(i128* %p, i128 %bits) {
-; CHECK-LABEL: fetch_and_or:
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: ldaxp  [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
-; CHECK: orr    [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2
-; CHECK: orr    [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3
-; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
-; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
-
-; CHECK: str    [[DEST_REGHI]]
-; CHECK: str    [[DEST_REGLO]]
-  %val = atomicrmw or i128* %p, i128 %bits seq_cst
-  store i128 %val, i128* @var, align 16
-  ret void
-}
-
-define void @fetch_and_add(i128* %p, i128 %bits) {
-; CHECK-LABEL: fetch_and_add:
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: ldaxp  [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
-; CHECK: adds   [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2
-; CHECK: adc    [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3
-; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
-; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
-
-; CHECK: str    [[DEST_REGHI]]
-; CHECK: str    [[DEST_REGLO]]
-  %val = atomicrmw add i128* %p, i128 %bits seq_cst
-  store i128 %val, i128* @var, align 16
-  ret void
-}
-
-define void @fetch_and_sub(i128* %p, i128 %bits) {
-; CHECK-LABEL: fetch_and_sub:
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: ldaxp  [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
-; CHECK: subs   [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2
-; CHECK: sbc    [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3
-; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
-; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
-
-; CHECK: str    [[DEST_REGHI]]
-; CHECK: str    [[DEST_REGLO]]
-  %val = atomicrmw sub i128* %p, i128 %bits seq_cst
-  store i128 %val, i128* @var, align 16
-  ret void
-}
-
-define void @fetch_and_min(i128* %p, i128 %bits) {
-; CHECK-LABEL: fetch_and_min:
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: ldaxp  [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
-; CHECK: cmp    [[DEST_REGLO]], x2
-; CHECK: sbc    xzr, [[DEST_REGHI]], x3
-; CHECK: csel   [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2, lt
-; CHECK: csel   [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3, lt
-; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
-; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
-
-; CHECK: str    [[DEST_REGHI]]
-; CHECK: str    [[DEST_REGLO]]
-  %val = atomicrmw min i128* %p, i128 %bits seq_cst
-  store i128 %val, i128* @var, align 16
-  ret void
-}
-
-define void @fetch_and_max(i128* %p, i128 %bits) {
-; CHECK-LABEL: fetch_and_max:
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: ldaxp  [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
-; CHECK: cmp    [[DEST_REGLO]], x2
-; CHECK: sbc    xzr, [[DEST_REGHI]], x3
-; CHECK: csel   [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2, gt
-; CHECK: csel   [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3, gt
-; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
-; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
-
-; CHECK: str    [[DEST_REGHI]]
-; CHECK: str    [[DEST_REGLO]]
-  %val = atomicrmw max i128* %p, i128 %bits seq_cst
-  store i128 %val, i128* @var, align 16
-  ret void
-}
-
-define void @fetch_and_umin(i128* %p, i128 %bits) {
-; CHECK-LABEL: fetch_and_umin:
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: ldaxp  [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
-; CHECK: cmp    [[DEST_REGLO]], x2
-; CHECK: sbc    xzr, [[DEST_REGHI]], x3
-; CHECK: csel   [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2, cc
-; CHECK: csel   [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3, cc
-; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
-; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
-
-; CHECK: str    [[DEST_REGHI]]
-; CHECK: str    [[DEST_REGLO]]
-  %val = atomicrmw umin i128* %p, i128 %bits seq_cst
-  store i128 %val, i128* @var, align 16
-  ret void
-}
-
-define void @fetch_and_umax(i128* %p, i128 %bits) {
-; CHECK-LABEL: fetch_and_umax:
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: ldaxp  [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
-; CHECK: cmp    [[DEST_REGLO]], x2
-; CHECK: sbc    xzr, [[DEST_REGHI]], x3
-; CHECK: csel   [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2, hi
-; CHECK: csel   [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3, hi
-; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
-; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
-
-; CHECK: str    [[DEST_REGHI]]
-; CHECK: str    [[DEST_REGLO]]
-  %val = atomicrmw umax i128* %p, i128 %bits seq_cst
-  store i128 %val, i128* @var, align 16
-  ret void
-}
-
-define i128 @atomic_load_seq_cst(i128* %p) {
-; CHECK-LABEL: atomic_load_seq_cst:
-; CHECK-NOT: dmb
-; CHECK-LABEL: ldaxp
-; CHECK-NOT: dmb
-   %r = load atomic i128* %p seq_cst, align 16
-   ret i128 %r
-}
-
-define i128 @atomic_load_relaxed(i128* %p) {
-; CHECK-LABEL: atomic_load_relaxed:
-; CHECK-NOT: dmb
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: ldxp [[LO:x[0-9]+]], [[HI:x[0-9]+]], [x0]
-; CHECK: orr [[SAMELO:x[0-9]+]], [[LO]], xzr
-; CHECK: orr [[SAMEHI:x[0-9]+]], [[HI]], xzr
-; CHECK: stxp [[SUCCESS:w[0-9]+]], [[SAMELO]], [[SAMEHI]], [x0]
-; CHECK: cbnz [[SUCCESS]], [[LABEL]]
-; CHECK-NOT: dmb
-   %r = load atomic i128* %p monotonic, align 16
-   ret i128 %r
-}
-
-
-define void @atomic_store_seq_cst(i128 %in, i128* %p) {
-; CHECK-LABEL: atomic_store_seq_cst:
-; CHECK-NOT: dmb
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: ldaxp xzr, xzr, [x2]
-; CHECK: stlxp [[SUCCESS:w[0-9]+]], x0, x1, [x2]
-; CHECK: cbnz [[SUCCESS]], [[LABEL]]
-; CHECK-NOT: dmb
-   store atomic i128 %in, i128* %p seq_cst, align 16
-   ret void
-}
-
-define void @atomic_store_release(i128 %in, i128* %p) {
-; CHECK-LABEL: atomic_store_release:
-; CHECK-NOT: dmb
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: ldxp xzr, xzr, [x2]
-; CHECK: stlxp [[SUCCESS:w[0-9]+]], x0, x1, [x2]
-; CHECK: cbnz [[SUCCESS]], [[LABEL]]
-; CHECK-NOT: dmb
-   store atomic i128 %in, i128* %p release, align 16
-   ret void
-}
-
-define void @atomic_store_relaxed(i128 %in, i128* %p) {
-; CHECK-LABEL: atomic_store_relaxed:
-; CHECK-NOT: dmb
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: ldxp xzr, xzr, [x2]
-; CHECK: stxp [[SUCCESS:w[0-9]+]], x0, x1, [x2]
-; CHECK: cbnz [[SUCCESS]], [[LABEL]]
-; CHECK-NOT: dmb
-   store atomic i128 %in, i128* %p unordered, align 16
-   ret void
-}
diff --git a/test/CodeGen/ARM64/atomic.ll b/test/CodeGen/ARM64/atomic.ll
deleted file mode 100644
index cf8cf7d..0000000
--- a/test/CodeGen/ARM64/atomic.ll
+++ /dev/null
@@ -1,343 +0,0 @@
-; RUN: llc < %s -march=arm64 -verify-machineinstrs | FileCheck %s
-
-define i32 @val_compare_and_swap(i32* %p) {
-; CHECK-LABEL: val_compare_and_swap:
-; CHECK: orr    [[NEWVAL_REG:w[0-9]+]], wzr, #0x4
-; CHECK: orr    [[OLDVAL_REG:w[0-9]+]], wzr, #0x7
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: ldaxr   [[RESULT:w[0-9]+]], [x0]
-; CHECK: cmp    [[RESULT]], [[OLDVAL_REG]]
-; CHECK: b.ne   [[LABEL2:.?LBB[0-9]+_[0-9]+]]
-; CHECK: stxr   [[SCRATCH_REG:w[0-9]+]], [[NEWVAL_REG]], [x0]
-; CHECK: cbnz   [[SCRATCH_REG]], [[LABEL]]
-; CHECK: [[LABEL2]]:
-  %val = cmpxchg i32* %p, i32 7, i32 4 acquire acquire
-  ret i32 %val
-}
-
-define i64 @val_compare_and_swap_64(i64* %p) {
-; CHECK-LABEL: val_compare_and_swap_64:
-; CHECK: orr    [[NEWVAL_REG:x[0-9]+]], xzr, #0x4
-; CHECK: orr    [[OLDVAL_REG:x[0-9]+]], xzr, #0x7
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: ldxr   [[RESULT:x[0-9]+]], [x0]
-; CHECK: cmp    [[RESULT]], [[OLDVAL_REG]]
-; CHECK: b.ne   [[LABEL2:.?LBB[0-9]+_[0-9]+]]
-; CHECK-NOT: stxr [[NEWVAL_REG]], [[NEWVAL_REG]]
-; CHECK: stxr   [[SCRATCH_REG:w[0-9]+]], [[NEWVAL_REG]], [x0]
-; CHECK: cbnz   [[SCRATCH_REG]], [[LABEL]]
-; CHECK: [[LABEL2]]:
-  %val = cmpxchg i64* %p, i64 7, i64 4 monotonic monotonic
-  ret i64 %val
-}
-
-define i32 @fetch_and_nand(i32* %p) {
-; CHECK-LABEL: fetch_and_nand:
-; CHECK: orr    [[OLDVAL_REG:w[0-9]+]], wzr, #0x7
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: ldxr   w[[DEST_REG:[0-9]+]], [x0]
-; CHECK: bic    [[SCRATCH2_REG:w[0-9]+]], [[OLDVAL_REG]], w[[DEST_REG]]
-; CHECK-NOT: stlxr [[SCRATCH2_REG]], [[SCRATCH2_REG]]
-; CHECK: stlxr   [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x0]
-; CHECK: cbnz   [[SCRATCH_REG]], [[LABEL]]
-; CHECK: mov    x0, x[[DEST_REG]]
-  %val = atomicrmw nand i32* %p, i32 7 release
-  ret i32 %val
-}
-
-define i64 @fetch_and_nand_64(i64* %p) {
-; CHECK-LABEL: fetch_and_nand_64:
-; CHECK: orr    [[OLDVAL_REG:x[0-9]+]], xzr, #0x7
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: ldaxr   [[DEST_REG:x[0-9]+]], [x0]
-; CHECK: bic    [[SCRATCH2_REG:x[0-9]+]], [[OLDVAL_REG]], [[DEST_REG]]
-; CHECK: stlxr   [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x0]
-; CHECK: cbnz   [[SCRATCH_REG]], [[LABEL]]
-; CHECK: mov    x0, [[DEST_REG]]
-  %val = atomicrmw nand i64* %p, i64 7 acq_rel
-  ret i64 %val
-}
-
-define i32 @fetch_and_or(i32* %p) {
-; CHECK-LABEL: fetch_and_or:
-; CHECK: movz   [[OLDVAL_REG:w[0-9]+]], #5
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: ldaxr   w[[DEST_REG:[0-9]+]], [x0]
-; CHECK: orr    [[SCRATCH2_REG:w[0-9]+]], w[[DEST_REG]], [[OLDVAL_REG]]
-; CHECK-NOT: stlxr [[SCRATCH2_REG]], [[SCRATCH2_REG]]
-; CHECK: stlxr [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x0]
-; CHECK: cbnz   [[SCRATCH_REG]], [[LABEL]]
-; CHECK: mov    x0, x[[DEST_REG]]
-  %val = atomicrmw or i32* %p, i32 5 seq_cst
-  ret i32 %val
-}
-
-define i64 @fetch_and_or_64(i64* %p) {
-; CHECK: fetch_and_or_64:
-; CHECK: orr    [[OLDVAL_REG:x[0-9]+]], xzr, #0x7
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: ldxr   [[DEST_REG:x[0-9]+]], [x0]
-; CHECK: orr    [[SCRATCH2_REG:x[0-9]+]], [[DEST_REG]], [[OLDVAL_REG]]
-; CHECK: stxr   [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x0]
-; CHECK: cbnz   [[SCRATCH_REG]], [[LABEL]]
-; CHECK: mov    x0, [[DEST_REG]]
-  %val = atomicrmw or i64* %p, i64 7 monotonic
-  ret i64 %val
-}
-
-define void @acquire_fence() {
-   fence acquire
-   ret void
-   ; CHECK-LABEL: acquire_fence:
-   ; CHECK: dmb ishld
-}
-
-define void @release_fence() {
-   fence release
-   ret void
-   ; CHECK-LABEL: release_fence:
-   ; CHECK: dmb ish{{$}}
-}
-
-define void @seq_cst_fence() {
-   fence seq_cst
-   ret void
-   ; CHECK-LABEL: seq_cst_fence:
-   ; CHECK: dmb ish{{$}}
-}
-
-define i32 @atomic_load(i32* %p) {
-   %r = load atomic i32* %p seq_cst, align 4
-   ret i32 %r
-   ; CHECK-LABEL: atomic_load:
-   ; CHECK: ldar
-}
-
-define i8 @atomic_load_relaxed_8(i8* %p, i32 %off32) {
-; CHECK-LABEL: atomic_load_relaxed_8:
-  %ptr_unsigned = getelementptr i8* %p, i32 4095
-  %val_unsigned = load atomic i8* %ptr_unsigned monotonic, align 1
-; CHECK: ldrb {{w[0-9]+}}, [x0, #4095]
-
-  %ptr_regoff = getelementptr i8* %p, i32 %off32
-  %val_regoff = load atomic i8* %ptr_regoff unordered, align 1
-  %tot1 = add i8 %val_unsigned, %val_regoff
-  ; FIXME: syntax is incorrect: "sxtw" should not be able to go with an x-reg.
-; CHECK: ldrb {{w[0-9]+}}, [x0, x1, sxtw]
-
-  %ptr_unscaled = getelementptr i8* %p, i32 -256
-  %val_unscaled = load atomic i8* %ptr_unscaled monotonic, align 1
-  %tot2 = add i8 %tot1, %val_unscaled
-; CHECK: ldurb {{w[0-9]+}}, [x0, #-256]
-
-  %ptr_random = getelementptr i8* %p, i32 1191936 ; 0x123000 (i.e. ADD imm)
-  %val_random = load atomic i8* %ptr_random unordered, align 1
-  %tot3 = add i8 %tot2, %val_random
-; CHECK: add x[[ADDR:[0-9]+]], x0, #1191936
-; CHECK: ldrb {{w[0-9]+}}, [x[[ADDR]]]
-
-  ret i8 %tot3
-}
-
-define i16 @atomic_load_relaxed_16(i16* %p, i32 %off32) {
-; CHECK-LABEL: atomic_load_relaxed_16:
-  %ptr_unsigned = getelementptr i16* %p, i32 4095
-  %val_unsigned = load atomic i16* %ptr_unsigned monotonic, align 2
-; CHECK: ldrh {{w[0-9]+}}, [x0, #8190]
-
-  %ptr_regoff = getelementptr i16* %p, i32 %off32
-  %val_regoff = load atomic i16* %ptr_regoff unordered, align 2
-  %tot1 = add i16 %val_unsigned, %val_regoff
-  ; FIXME: syntax is incorrect: "sxtw" should not be able to go with an x-reg.
-; CHECK: ldrh {{w[0-9]+}}, [x0, x1, sxtw #1]
-
-  %ptr_unscaled = getelementptr i16* %p, i32 -128
-  %val_unscaled = load atomic i16* %ptr_unscaled monotonic, align 2
-  %tot2 = add i16 %tot1, %val_unscaled
-; CHECK: ldurh {{w[0-9]+}}, [x0, #-256]
-
-  %ptr_random = getelementptr i16* %p, i32 595968 ; 0x123000/2 (i.e. ADD imm)
-  %val_random = load atomic i16* %ptr_random unordered, align 2
-  %tot3 = add i16 %tot2, %val_random
-; CHECK: add x[[ADDR:[0-9]+]], x0, #1191936
-; CHECK: ldrh {{w[0-9]+}}, [x[[ADDR]]]
-
-  ret i16 %tot3
-}
-
-define i32 @atomic_load_relaxed_32(i32* %p, i32 %off32) {
-; CHECK-LABEL: atomic_load_relaxed_32:
-  %ptr_unsigned = getelementptr i32* %p, i32 4095
-  %val_unsigned = load atomic i32* %ptr_unsigned monotonic, align 4
-; CHECK: ldr {{w[0-9]+}}, [x0, #16380]
-
-  %ptr_regoff = getelementptr i32* %p, i32 %off32
-  %val_regoff = load atomic i32* %ptr_regoff unordered, align 4
-  %tot1 = add i32 %val_unsigned, %val_regoff
-  ; FIXME: syntax is incorrect: "sxtw" should not be able to go with an x-reg.
-; CHECK: ldr {{w[0-9]+}}, [x0, x1, sxtw #2]
-
-  %ptr_unscaled = getelementptr i32* %p, i32 -64
-  %val_unscaled = load atomic i32* %ptr_unscaled monotonic, align 4
-  %tot2 = add i32 %tot1, %val_unscaled
-; CHECK: ldur {{w[0-9]+}}, [x0, #-256]
-
-  %ptr_random = getelementptr i32* %p, i32 297984 ; 0x123000/4 (i.e. ADD imm)
-  %val_random = load atomic i32* %ptr_random unordered, align 4
-  %tot3 = add i32 %tot2, %val_random
-; CHECK: add x[[ADDR:[0-9]+]], x0, #1191936
-; CHECK: ldr {{w[0-9]+}}, [x[[ADDR]]]
-
-  ret i32 %tot3
-}
-
-define i64 @atomic_load_relaxed_64(i64* %p, i32 %off32) {
-; CHECK-LABEL: atomic_load_relaxed_64:
-  %ptr_unsigned = getelementptr i64* %p, i32 4095
-  %val_unsigned = load atomic i64* %ptr_unsigned monotonic, align 8
-; CHECK: ldr {{x[0-9]+}}, [x0, #32760]
-
-  %ptr_regoff = getelementptr i64* %p, i32 %off32
-  %val_regoff = load atomic i64* %ptr_regoff unordered, align 8
-  %tot1 = add i64 %val_unsigned, %val_regoff
-  ; FIXME: syntax is incorrect: "sxtw" should not be able to go with an x-reg.
-; CHECK: ldr {{x[0-9]+}}, [x0, x1, sxtw #3]
-
-  %ptr_unscaled = getelementptr i64* %p, i32 -32
-  %val_unscaled = load atomic i64* %ptr_unscaled monotonic, align 8
-  %tot2 = add i64 %tot1, %val_unscaled
-; CHECK: ldur {{x[0-9]+}}, [x0, #-256]
-
-  %ptr_random = getelementptr i64* %p, i32 148992 ; 0x123000/8 (i.e. ADD imm)
-  %val_random = load atomic i64* %ptr_random unordered, align 8
-  %tot3 = add i64 %tot2, %val_random
-; CHECK: add x[[ADDR:[0-9]+]], x0, #1191936
-; CHECK: ldr {{x[0-9]+}}, [x[[ADDR]]]
-
-  ret i64 %tot3
-}
-
-
-define void @atomc_store(i32* %p) {
-   store atomic i32 4, i32* %p seq_cst, align 4
-   ret void
-   ; CHECK-LABEL: atomc_store:
-   ; CHECK: stlr
-}
-
-define void @atomic_store_relaxed_8(i8* %p, i32 %off32, i8 %val) {
-; CHECK-LABEL: atomic_store_relaxed_8:
-  %ptr_unsigned = getelementptr i8* %p, i32 4095
-  store atomic i8 %val, i8* %ptr_unsigned monotonic, align 1
-; CHECK: strb {{w[0-9]+}}, [x0, #4095]
-
-  %ptr_regoff = getelementptr i8* %p, i32 %off32
-  store atomic i8 %val, i8* %ptr_regoff unordered, align 1
-  ; FIXME: syntax is incorrect: "sxtw" should not be able to go with an x-reg.
-; CHECK: strb {{w[0-9]+}}, [x0, x1, sxtw]
-
-  %ptr_unscaled = getelementptr i8* %p, i32 -256
-  store atomic i8 %val, i8* %ptr_unscaled monotonic, align 1
-; CHECK: sturb {{w[0-9]+}}, [x0, #-256]
-
-  %ptr_random = getelementptr i8* %p, i32 1191936 ; 0x123000 (i.e. ADD imm)
-  store atomic i8 %val, i8* %ptr_random unordered, align 1
-; CHECK: add x[[ADDR:[0-9]+]], x0, #1191936
-; CHECK: strb {{w[0-9]+}}, [x[[ADDR]]]
-
-  ret void
-}
-
-define void @atomic_store_relaxed_16(i16* %p, i32 %off32, i16 %val) {
-; CHECK-LABEL: atomic_store_relaxed_16:
-  %ptr_unsigned = getelementptr i16* %p, i32 4095
-  store atomic i16 %val, i16* %ptr_unsigned monotonic, align 2
-; CHECK: strh {{w[0-9]+}}, [x0, #8190]
-
-  %ptr_regoff = getelementptr i16* %p, i32 %off32
-  store atomic i16 %val, i16* %ptr_regoff unordered, align 2
-  ; FIXME: syntax is incorrect: "sxtw" should not be able to go with an x-reg.
-; CHECK: strh {{w[0-9]+}}, [x0, x1, sxtw #1]
-
-  %ptr_unscaled = getelementptr i16* %p, i32 -128
-  store atomic i16 %val, i16* %ptr_unscaled monotonic, align 2
-; CHECK: sturh {{w[0-9]+}}, [x0, #-256]
-
-  %ptr_random = getelementptr i16* %p, i32 595968 ; 0x123000/2 (i.e. ADD imm)
-  store atomic i16 %val, i16* %ptr_random unordered, align 2
-; CHECK: add x[[ADDR:[0-9]+]], x0, #1191936
-; CHECK: strh {{w[0-9]+}}, [x[[ADDR]]]
-
-  ret void
-}
-
-define void @atomic_store_relaxed_32(i32* %p, i32 %off32, i32 %val) {
-; CHECK-LABEL: atomic_store_relaxed_32:
-  %ptr_unsigned = getelementptr i32* %p, i32 4095
-  store atomic i32 %val, i32* %ptr_unsigned monotonic, align 4
-; CHECK: str {{w[0-9]+}}, [x0, #16380]
-
-  %ptr_regoff = getelementptr i32* %p, i32 %off32
-  store atomic i32 %val, i32* %ptr_regoff unordered, align 4
-  ; FIXME: syntax is incorrect: "sxtw" should not be able to go with an x-reg.
-; CHECK: str {{w[0-9]+}}, [x0, x1, sxtw #2]
-
-  %ptr_unscaled = getelementptr i32* %p, i32 -64
-  store atomic i32 %val, i32* %ptr_unscaled monotonic, align 4
-; CHECK: stur {{w[0-9]+}}, [x0, #-256]
-
-  %ptr_random = getelementptr i32* %p, i32 297984 ; 0x123000/4 (i.e. ADD imm)
-  store atomic i32 %val, i32* %ptr_random unordered, align 4
-; CHECK: add x[[ADDR:[0-9]+]], x0, #1191936
-; CHECK: str {{w[0-9]+}}, [x[[ADDR]]]
-
-  ret void
-}
-
-define void @atomic_store_relaxed_64(i64* %p, i32 %off32, i64 %val) {
-; CHECK-LABEL: atomic_store_relaxed_64:
-  %ptr_unsigned = getelementptr i64* %p, i32 4095
-  store atomic i64 %val, i64* %ptr_unsigned monotonic, align 8
-; CHECK: str {{x[0-9]+}}, [x0, #32760]
-
-  %ptr_regoff = getelementptr i64* %p, i32 %off32
-  store atomic i64 %val, i64* %ptr_regoff unordered, align 8
-  ; FIXME: syntax is incorrect: "sxtw" should not be able to go with an x-reg.
-; CHECK: str {{x[0-9]+}}, [x0, x1, sxtw #3]
-
-  %ptr_unscaled = getelementptr i64* %p, i32 -32
-  store atomic i64 %val, i64* %ptr_unscaled monotonic, align 8
-; CHECK: stur {{x[0-9]+}}, [x0, #-256]
-
-  %ptr_random = getelementptr i64* %p, i32 148992 ; 0x123000/8 (i.e. ADD imm)
-  store atomic i64 %val, i64* %ptr_random unordered, align 8
-; CHECK: add x[[ADDR:[0-9]+]], x0, #1191936
-; CHECK: str {{x[0-9]+}}, [x[[ADDR]]]
-
-  ret void
-}
-
-; rdar://11531169
-; rdar://11531308
-
-%"class.X::Atomic" = type { %struct.x_atomic_t }
-%struct.x_atomic_t = type { i32 }
-
-@counter = external hidden global %"class.X::Atomic", align 4
-
-define i32 @next_id() nounwind optsize ssp align 2 {
-entry:
-  %0 = atomicrmw add i32* getelementptr inbounds (%"class.X::Atomic"* @counter, i64 0, i32 0, i32 0), i32 1 seq_cst
-  %add.i = add i32 %0, 1
-  %tobool = icmp eq i32 %add.i, 0
-  br i1 %tobool, label %if.else, label %return
-
-if.else:                                          ; preds = %entry
-  %1 = atomicrmw add i32* getelementptr inbounds (%"class.X::Atomic"* @counter, i64 0, i32 0, i32 0), i32 1 seq_cst
-  %add.i2 = add i32 %1, 1
-  br label %return
-
-return:                                           ; preds = %if.else, %entry
-  %retval.0 = phi i32 [ %add.i2, %if.else ], [ %add.i, %entry ]
-  ret i32 %retval.0
-}
diff --git a/test/CodeGen/ARM64/big-stack.ll b/test/CodeGen/ARM64/big-stack.ll
deleted file mode 100644
index 56ca30c..0000000
--- a/test/CodeGen/ARM64/big-stack.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; RUN: llc < %s | FileCheck %s
-target triple = "arm64-apple-macosx10"
-
-; Check that big stacks are generated correctly.
-; Currently, this is done by a sequence of sub instructions,
-; which can encode immediate with a 12 bits mask an optionally
-; shift left (up to 12). I.e., 16773120 is the biggest value.
-; <rdar://12513931>
-; CHECK-LABEL: foo:
-; CHECK: sub sp, sp, #16773120
-; CHECK: sub sp, sp, #16773120
-; CHECK: sub sp, sp, #8192
-define void @foo() nounwind ssp {
-entry:
-  %buffer = alloca [33554432 x i8], align 1
-  %arraydecay = getelementptr inbounds [33554432 x i8]* %buffer, i64 0, i64 0
-  call void @doit(i8* %arraydecay) nounwind
-  ret void
-}
-
-declare void @doit(i8*)
diff --git a/test/CodeGen/ARM64/bitfield-extract.ll b/test/CodeGen/ARM64/bitfield-extract.ll
deleted file mode 100644
index 96b6967..0000000
--- a/test/CodeGen/ARM64/bitfield-extract.ll
+++ /dev/null
@@ -1,406 +0,0 @@
-; RUN: llc < %s -march=arm64 | FileCheck %s
-%struct.X = type { i8, i8, [2 x i8] }
-%struct.Y = type { i32, i8 }
-%struct.Z = type { i8, i8, [2 x i8], i16 }
-%struct.A = type { i64, i8 }
-
-define void @foo(%struct.X* nocapture %x, %struct.Y* nocapture %y) nounwind optsize ssp {
-; CHECK-LABEL: foo:
-; CHECK: ubfm
-; CHECK-NOT: and
-; CHECK: ret
-
-  %tmp = bitcast %struct.X* %x to i32*
-  %tmp1 = load i32* %tmp, align 4
-  %b = getelementptr inbounds %struct.Y* %y, i64 0, i32 1
-  %bf.clear = lshr i32 %tmp1, 3
-  %bf.clear.lobit = and i32 %bf.clear, 1
-  %frombool = trunc i32 %bf.clear.lobit to i8
-  store i8 %frombool, i8* %b, align 1
-  ret void
-}
-
-define i32 @baz(i64 %cav1.coerce) nounwind {
-; CHECK-LABEL: baz:
-; CHECK: sbfm  w0, w0, #0, #3
-  %tmp = trunc i64 %cav1.coerce to i32
-  %tmp1 = shl i32 %tmp, 28
-  %bf.val.sext = ashr exact i32 %tmp1, 28
-  ret i32 %bf.val.sext
-}
-
-define i32 @bar(i64 %cav1.coerce) nounwind {
-; CHECK-LABEL: bar:
-; CHECK: sbfm  w0, w0, #4, #9
-  %tmp = trunc i64 %cav1.coerce to i32
-  %cav1.sroa.0.1.insert = shl i32 %tmp, 22
-  %tmp1 = ashr i32 %cav1.sroa.0.1.insert, 26
-  ret i32 %tmp1
-}
-
-define void @fct1(%struct.Z* nocapture %x, %struct.A* nocapture %y) nounwind optsize ssp {
-; CHECK-LABEL: fct1:
-; CHECK: ubfm
-; CHECK-NOT: and
-; CHECK: ret
-
-  %tmp = bitcast %struct.Z* %x to i64*
-  %tmp1 = load i64* %tmp, align 4
-  %b = getelementptr inbounds %struct.A* %y, i64 0, i32 0
-  %bf.clear = lshr i64 %tmp1, 3
-  %bf.clear.lobit = and i64 %bf.clear, 1
-  store i64 %bf.clear.lobit, i64* %b, align 8
-  ret void
-}
-
-define i64 @fct2(i64 %cav1.coerce) nounwind {
-; CHECK-LABEL: fct2:
-; CHECK: sbfm  x0, x0, #0, #35
-  %tmp = shl i64 %cav1.coerce, 28
-  %bf.val.sext = ashr exact i64 %tmp, 28
-  ret i64 %bf.val.sext
-}
-
-define i64 @fct3(i64 %cav1.coerce) nounwind {
-; CHECK-LABEL: fct3:
-; CHECK: sbfm  x0, x0, #4, #41
-  %cav1.sroa.0.1.insert = shl i64 %cav1.coerce, 22
-  %tmp1 = ashr i64 %cav1.sroa.0.1.insert, 26
-  ret i64 %tmp1
-}
-
-define void @fct4(i64* nocapture %y, i64 %x) nounwind optsize inlinehint ssp {
-entry:
-; CHECK-LABEL: fct4:
-; CHECK: ldr [[REG1:x[0-9]+]],
-; CHECK-NEXT: bfm [[REG1]], x1, #16, #39
-; CHECK-NEXT: str [[REG1]],
-; CHECK-NEXT: ret
-  %0 = load i64* %y, align 8
-  %and = and i64 %0, -16777216
-  %shr = lshr i64 %x, 16
-  %and1 = and i64 %shr, 16777215
-  %or = or i64 %and, %and1
-  store i64 %or, i64* %y, align 8
-  ret void
-}
-
-define void @fct5(i32* nocapture %y, i32 %x) nounwind optsize inlinehint ssp {
-entry:
-; CHECK-LABEL: fct5:
-; CHECK: ldr [[REG1:w[0-9]+]],
-; CHECK-NEXT: bfm [[REG1]], w1, #16, #18
-; CHECK-NEXT: str [[REG1]],
-; CHECK-NEXT: ret
-  %0 = load i32* %y, align 8
-  %and = and i32 %0, -8
-  %shr = lshr i32 %x, 16
-  %and1 = and i32 %shr, 7
-  %or = or i32 %and, %and1
-  store i32 %or, i32* %y, align 8
-  ret void
-}
-
-; Check if we can still catch bfm instruction when we drop some low bits
-define void @fct6(i32* nocapture %y, i32 %x) nounwind optsize inlinehint ssp {
-entry:
-; CHECK-LABEL: fct6:
-; CHECK: ldr [[REG1:w[0-9]+]],
-; CHECK-NEXT: bfm [[REG1]], w1, #16, #18
-; lsr is an alias of ubfm
-; CHECK-NEXT: lsr [[REG2:w[0-9]+]], [[REG1]], #2
-; CHECK-NEXT: str [[REG2]],
-; CHECK-NEXT: ret
-  %0 = load i32* %y, align 8
-  %and = and i32 %0, -8
-  %shr = lshr i32 %x, 16
-  %and1 = and i32 %shr, 7
-  %or = or i32 %and, %and1
-  %shr1 = lshr i32 %or, 2
-  store i32 %shr1, i32* %y, align 8
-  ret void
-}
-
-
-; Check if we can still catch bfm instruction when we drop some high bits
-define void @fct7(i32* nocapture %y, i32 %x) nounwind optsize inlinehint ssp {
-entry:
-; CHECK-LABEL: fct7:
-; CHECK: ldr [[REG1:w[0-9]+]],
-; CHECK-NEXT: bfm [[REG1]], w1, #16, #18
-; lsl is an alias of ubfm
-; CHECK-NEXT: lsl [[REG2:w[0-9]+]], [[REG1]], #2
-; CHECK-NEXT: str [[REG2]],
-; CHECK-NEXT: ret
-  %0 = load i32* %y, align 8
-  %and = and i32 %0, -8
-  %shr = lshr i32 %x, 16
-  %and1 = and i32 %shr, 7
-  %or = or i32 %and, %and1
-  %shl = shl i32 %or, 2
-  store i32 %shl, i32* %y, align 8
-  ret void
-}
-
-
-; Check if we can still catch bfm instruction when we drop some low bits
-; (i64 version)
-define void @fct8(i64* nocapture %y, i64 %x) nounwind optsize inlinehint ssp {
-entry:
-; CHECK-LABEL: fct8:
-; CHECK: ldr [[REG1:x[0-9]+]],
-; CHECK-NEXT: bfm [[REG1]], x1, #16, #18
-; lsr is an alias of ubfm
-; CHECK-NEXT: lsr [[REG2:x[0-9]+]], [[REG1]], #2
-; CHECK-NEXT: str [[REG2]],
-; CHECK-NEXT: ret
-  %0 = load i64* %y, align 8
-  %and = and i64 %0, -8
-  %shr = lshr i64 %x, 16
-  %and1 = and i64 %shr, 7
-  %or = or i64 %and, %and1
-  %shr1 = lshr i64 %or, 2
-  store i64 %shr1, i64* %y, align 8
-  ret void
-}
-
-
-; Check if we can still catch bfm instruction when we drop some high bits
-; (i64 version)
-define void @fct9(i64* nocapture %y, i64 %x) nounwind optsize inlinehint ssp {
-entry:
-; CHECK-LABEL: fct9:
-; CHECK: ldr [[REG1:x[0-9]+]],
-; CHECK-NEXT: bfm [[REG1]], x1, #16, #18
-; lsr is an alias of ubfm
-; CHECK-NEXT: lsl [[REG2:x[0-9]+]], [[REG1]], #2
-; CHECK-NEXT: str [[REG2]],
-; CHECK-NEXT: ret
-  %0 = load i64* %y, align 8
-  %and = and i64 %0, -8
-  %shr = lshr i64 %x, 16
-  %and1 = and i64 %shr, 7
-  %or = or i64 %and, %and1
-  %shl = shl i64 %or, 2
-  store i64 %shl, i64* %y, align 8
-  ret void
-}
-
-; Check if we can catch bfm instruction when lsb is 0 (i.e., no lshr)
-; (i32 version)
-define void @fct10(i32* nocapture %y, i32 %x) nounwind optsize inlinehint ssp {
-entry:
-; CHECK-LABEL: fct10:
-; CHECK: ldr [[REG1:w[0-9]+]],
-; CHECK-NEXT: bfm [[REG1]], w1, #0, #2
-; lsl is an alias of ubfm
-; CHECK-NEXT: lsl [[REG2:w[0-9]+]], [[REG1]], #2
-; CHECK-NEXT: str [[REG2]],
-; CHECK-NEXT: ret
-  %0 = load i32* %y, align 8
-  %and = and i32 %0, -8
-  %and1 = and i32 %x, 7
-  %or = or i32 %and, %and1
-  %shl = shl i32 %or, 2
-  store i32 %shl, i32* %y, align 8
-  ret void
-}
-
-; Check if we can catch bfm instruction when lsb is 0 (i.e., no lshr)
-; (i64 version)
-define void @fct11(i64* nocapture %y, i64 %x) nounwind optsize inlinehint ssp {
-entry:
-; CHECK-LABEL: fct11:
-; CHECK: ldr [[REG1:x[0-9]+]],
-; CHECK-NEXT: bfm [[REG1]], x1, #0, #2
-; lsl is an alias of ubfm
-; CHECK-NEXT: lsl [[REG2:x[0-9]+]], [[REG1]], #2
-; CHECK-NEXT: str [[REG2]],
-; CHECK-NEXT: ret
-  %0 = load i64* %y, align 8
-  %and = and i64 %0, -8
-  %and1 = and i64 %x, 7
-  %or = or i64 %and, %and1
-  %shl = shl i64 %or, 2
-  store i64 %shl, i64* %y, align 8
-  ret void
-}
-
-define zeroext i1 @fct12bis(i32 %tmp2) unnamed_addr nounwind ssp align 2 {
-; CHECK-LABEL: fct12bis:
-; CHECK-NOT: and
-; CHECK: ubfm w0, w0, #11, #11
-  %and.i.i = and i32 %tmp2, 2048
-  %tobool.i.i = icmp ne i32 %and.i.i, 0
-  ret i1 %tobool.i.i
-}
-
-; Check if we can still catch bfm instruction when we drop some high bits
-; and some low bits
-define void @fct12(i32* nocapture %y, i32 %x) nounwind optsize inlinehint ssp {
-entry:
-; CHECK-LABEL: fct12:
-; CHECK: ldr [[REG1:w[0-9]+]],
-; CHECK-NEXT: bfm [[REG1]], w1, #16, #18
-; lsr is an alias of ubfm
-; CHECK-NEXT: ubfm [[REG2:w[0-9]+]], [[REG1]], #2, #29
-; CHECK-NEXT: str [[REG2]],
-; CHECK-NEXT: ret
-  %0 = load i32* %y, align 8
-  %and = and i32 %0, -8
-  %shr = lshr i32 %x, 16
-  %and1 = and i32 %shr, 7
-  %or = or i32 %and, %and1
-  %shl = shl i32 %or, 2
-  %shr2 = lshr i32 %shl, 4
-  store i32 %shr2, i32* %y, align 8
-  ret void
-}
-
-; Check if we can still catch bfm instruction when we drop some high bits
-; and some low bits
-; (i64 version)
-define void @fct13(i64* nocapture %y, i64 %x) nounwind optsize inlinehint ssp {
-entry:
-; CHECK-LABEL: fct13:
-; CHECK: ldr [[REG1:x[0-9]+]],
-; CHECK-NEXT: bfm [[REG1]], x1, #16, #18
-; lsr is an alias of ubfm
-; CHECK-NEXT: ubfm [[REG2:x[0-9]+]], [[REG1]], #2, #61
-; CHECK-NEXT: str [[REG2]],
-; CHECK-NEXT: ret
-  %0 = load i64* %y, align 8
-  %and = and i64 %0, -8
-  %shr = lshr i64 %x, 16
-  %and1 = and i64 %shr, 7
-  %or = or i64 %and, %and1
-  %shl = shl i64 %or, 2
-  %shr2 = lshr i64 %shl, 4
-  store i64 %shr2, i64* %y, align 8
-  ret void
-}
-
-
-; Check if we can still catch bfm instruction when we drop some high bits
-; and some low bits
-define void @fct14(i32* nocapture %y, i32 %x, i32 %x1) nounwind optsize inlinehint ssp {
-entry:
-; CHECK-LABEL: fct14:
-; CHECK: ldr [[REG1:w[0-9]+]],
-; CHECK-NEXT: bfm [[REG1]], w1, #16, #23
-; lsr is an alias of ubfm
-; CHECK-NEXT: lsr [[REG2:w[0-9]+]], [[REG1]], #4
-; CHECK-NEXT: bfm [[REG2]], w2, #5, #7
-; lsl is an alias of ubfm
-; CHECK-NEXT: lsl [[REG3:w[0-9]+]], [[REG2]], #2
-; CHECK-NEXT: str [[REG3]],
-; CHECK-NEXT: ret
-  %0 = load i32* %y, align 8
-  %and = and i32 %0, -256
-  %shr = lshr i32 %x, 16
-  %and1 = and i32 %shr, 255
-  %or = or i32 %and, %and1
-  %shl = lshr i32 %or, 4
-  %and2 = and i32 %shl, -8
-  %shr1 = lshr i32 %x1, 5
-  %and3 = and i32 %shr1, 7
-  %or1 = or i32 %and2, %and3
-  %shl1 = shl i32 %or1, 2
-  store i32 %shl1, i32* %y, align 8
-  ret void
-}
-
-; Check if we can still catch bfm instruction when we drop some high bits
-; and some low bits
-; (i64 version)
-define void @fct15(i64* nocapture %y, i64 %x, i64 %x1) nounwind optsize inlinehint ssp {
-entry:
-; CHECK-LABEL: fct15:
-; CHECK: ldr [[REG1:x[0-9]+]],
-; CHECK-NEXT: bfm [[REG1]], x1, #16, #23
-; lsr is an alias of ubfm
-; CHECK-NEXT: lsr [[REG2:x[0-9]+]], [[REG1]], #4
-; CHECK-NEXT: bfm [[REG2]], x2, #5, #7
-; lsl is an alias of ubfm
-; CHECK-NEXT: lsl [[REG3:x[0-9]+]], [[REG2]], #2
-; CHECK-NEXT: str [[REG3]],
-; CHECK-NEXT: ret
-  %0 = load i64* %y, align 8
-  %and = and i64 %0, -256
-  %shr = lshr i64 %x, 16
-  %and1 = and i64 %shr, 255
-  %or = or i64 %and, %and1
-  %shl = lshr i64 %or, 4
-  %and2 = and i64 %shl, -8
-  %shr1 = lshr i64 %x1, 5
-  %and3 = and i64 %shr1, 7
-  %or1 = or i64 %and2, %and3
-  %shl1 = shl i64 %or1, 2
-  store i64 %shl1, i64* %y, align 8
-  ret void
-}
-
-; Check if we can still catch bfm instruction when we drop some high bits
-; and some low bits and a masking operation has to be kept
-define void @fct16(i32* nocapture %y, i32 %x) nounwind optsize inlinehint ssp {
-entry:
-; CHECK-LABEL: fct16:
-; CHECK: ldr [[REG1:w[0-9]+]],
-; Create the constant
-; CHECK: movz [[REGCST:w[0-9]+]], #26, lsl #16
-; CHECK: movk [[REGCST]], #33120
-; Do the masking
-; CHECK: and [[REG2:w[0-9]+]], [[REG1]], [[REGCST]]
-; CHECK-NEXT: bfm [[REG2]], w1, #16, #18
-; lsr is an alias of ubfm
-; CHECK-NEXT: ubfm [[REG3:w[0-9]+]], [[REG2]], #2, #29
-; CHECK-NEXT: str [[REG3]],
-; CHECK-NEXT: ret
-  %0 = load i32* %y, align 8
-  %and = and i32 %0, 1737056
-  %shr = lshr i32 %x, 16
-  %and1 = and i32 %shr, 7
-  %or = or i32 %and, %and1
-  %shl = shl i32 %or, 2
-  %shr2 = lshr i32 %shl, 4
-  store i32 %shr2, i32* %y, align 8
-  ret void
-}
-
-
-; Check if we can still catch bfm instruction when we drop some high bits
-; and some low bits and a masking operation has to be kept
-; (i64 version)
-define void @fct17(i64* nocapture %y, i64 %x) nounwind optsize inlinehint ssp {
-entry:
-; CHECK-LABEL: fct17:
-; CHECK: ldr [[REG1:x[0-9]+]],
-; Create the constant
-; CHECK: movz [[REGCST:x[0-9]+]], #26, lsl #16
-; CHECK: movk [[REGCST]], #33120
-; Do the masking
-; CHECK: and [[REG2:x[0-9]+]], [[REG1]], [[REGCST]]
-; CHECK-NEXT: bfm [[REG2]], x1, #16, #18
-; lsr is an alias of ubfm
-; CHECK-NEXT: ubfm [[REG3:x[0-9]+]], [[REG2]], #2, #61
-; CHECK-NEXT: str [[REG3]],
-; CHECK-NEXT: ret
-  %0 = load i64* %y, align 8
-  %and = and i64 %0, 1737056
-  %shr = lshr i64 %x, 16
-  %and1 = and i64 %shr, 7
-  %or = or i64 %and, %and1
-  %shl = shl i64 %or, 2
-  %shr2 = lshr i64 %shl, 4
-  store i64 %shr2, i64* %y, align 8
-  ret void
-}
-
-define i64 @fct18(i32 %xor72) nounwind ssp {
-; CHECK-LABEL: fct18:
-; CHECK: ubfm x0, x0, #9, #16
-  %shr81 = lshr i32 %xor72, 9
-  %conv82 = zext i32 %shr81 to i64
-  %result = and i64 %conv82, 255
-  ret i64 %result
-}
diff --git a/test/CodeGen/ARM64/build-vector.ll b/test/CodeGen/ARM64/build-vector.ll
deleted file mode 100644
index 1d137ae..0000000
--- a/test/CodeGen/ARM64/build-vector.ll
+++ /dev/null
@@ -1,35 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-; Check that building up a vector w/ only one non-zero lane initializes
-; intelligently.
-define void @one_lane(i32* nocapture %out_int, i32 %skip0) nounwind {
-; CHECK-LABEL: one_lane:
-; CHECK: dup.16b v[[REG:[0-9]+]], wzr
-; CHECK-NEXT: ins.b v[[REG]][0], w1
-; v and q are aliases, and str is prefered against st.16b when possible
-; rdar://11246289
-; CHECK: str q[[REG]], [x0]
-; CHECK: ret
-  %conv = trunc i32 %skip0 to i8
-  %vset_lane = insertelement <16 x i8> <i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, i8 %conv, i32 0
-  %tmp = bitcast i32* %out_int to <4 x i32>*
-  %tmp1 = bitcast <16 x i8> %vset_lane to <4 x i32>
-  store <4 x i32> %tmp1, <4 x i32>* %tmp, align 16
-  ret void
-}
-
-; Check that building a vector from floats doesn't insert an unnecessary
-; copy for lane zero.
-define <4 x float>  @foo(float %a, float %b, float %c, float %d) nounwind {
-; CHECK-LABEL: foo:
-; CHECK-NOT: ins.s v0[0], v0[0]
-; CHECK: ins.s v0[1], v1[0]
-; CHECK: ins.s v0[2], v2[0]
-; CHECK: ins.s v0[3], v3[0]
-; CHECK: ret
-  %1 = insertelement <4 x float> undef, float %a, i32 0
-  %2 = insertelement <4 x float> %1, float %b, i32 1
-  %3 = insertelement <4 x float> %2, float %c, i32 2
-  %4 = insertelement <4 x float> %3, float %d, i32 3
-  ret <4 x float> %4
-}
diff --git a/test/CodeGen/ARM64/cast-opt.ll b/test/CodeGen/ARM64/cast-opt.ll
deleted file mode 100644
index 3d7f257..0000000
--- a/test/CodeGen/ARM64/cast-opt.ll
+++ /dev/null
@@ -1,31 +0,0 @@
-; RUN: llc -O3 -march=arm64 -mtriple arm64-apple-ios5.0.0 < %s | FileCheck %s
-; <rdar://problem/15992732>
-; Zero truncation is not necessary when the values are extended properly
-; already.
-
-@block = common global i8* null, align 8
-
-define zeroext i8 @foo(i32 %i1, i32 %i2) {
-; CHECK-LABEL: foo:
-; CHECK: csinc
-; CHECK-NOT: and
-entry:
-  %idxprom = sext i32 %i1 to i64
-  %0 = load i8** @block, align 8
-  %arrayidx = getelementptr inbounds i8* %0, i64 %idxprom
-  %1 = load i8* %arrayidx, align 1
-  %idxprom1 = sext i32 %i2 to i64
-  %arrayidx2 = getelementptr inbounds i8* %0, i64 %idxprom1
-  %2 = load i8* %arrayidx2, align 1
-  %cmp = icmp eq i8 %1, %2
-  br i1 %cmp, label %return, label %if.then
-
-if.then:                                          ; preds = %entry
-  %cmp7 = icmp ugt i8 %1, %2
-  %conv9 = zext i1 %cmp7 to i8
-  br label %return
-
-return:                                           ; preds = %entry, %if.then
-  %retval.0 = phi i8 [ %conv9, %if.then ], [ 1, %entry ]
-  ret i8 %retval.0
-}
diff --git a/test/CodeGen/ARM64/ccmp-heuristics.ll b/test/CodeGen/ARM64/ccmp-heuristics.ll
deleted file mode 100644
index 5575997..0000000
--- a/test/CodeGen/ARM64/ccmp-heuristics.ll
+++ /dev/null
@@ -1,190 +0,0 @@
-; RUN: llc < %s -mcpu=cyclone -verify-machineinstrs -arm64-ccmp | FileCheck %s
-target triple = "arm64-apple-ios7.0.0"
-
-@channelColumns = external global i64
-@channelTracks = external global i64
-@mazeRoute = external hidden unnamed_addr global i8*, align 8
-@TOP = external global i64*
-@BOT = external global i64*
-@netsAssign = external global i64*
-
-; Function from yacr2/maze.c
-; The branch at the end of %if.then is driven by %cmp5 and %cmp6.
-; Isel converts the and i1 into two branches, and arm64-ccmp should not convert
-; it back again. %cmp6 has much higher latency than %cmp5.
-; CHECK: Maze1
-; CHECK: %if.then
-; CHECK: cmp x{{[0-9]+}}, #2
-; CHECK-NEXT b.cc
-; CHECK: %if.then
-; CHECK: cmp x{{[0-9]+}}, #2
-; CHECK-NEXT b.cc
-define i32 @Maze1() nounwind ssp {
-entry:
-  %0 = load i64* @channelColumns, align 8, !tbaa !0
-  %cmp90 = icmp eq i64 %0, 0
-  br i1 %cmp90, label %for.end, label %for.body
-
-for.body:                                         ; preds = %for.inc, %entry
-  %1 = phi i64 [ %0, %entry ], [ %37, %for.inc ]
-  %i.092 = phi i64 [ 1, %entry ], [ %inc53, %for.inc ]
-  %numLeft.091 = phi i32 [ 0, %entry ], [ %numLeft.1, %for.inc ]
-  %2 = load i8** @mazeRoute, align 8, !tbaa !3
-  %arrayidx = getelementptr inbounds i8* %2, i64 %i.092
-  %3 = load i8* %arrayidx, align 1, !tbaa !1
-  %tobool = icmp eq i8 %3, 0
-  br i1 %tobool, label %for.inc, label %if.then
-
-if.then:                                          ; preds = %for.body
-  %4 = load i64** @TOP, align 8, !tbaa !3
-  %arrayidx1 = getelementptr inbounds i64* %4, i64 %i.092
-  %5 = load i64* %arrayidx1, align 8, !tbaa !0
-  %6 = load i64** @netsAssign, align 8, !tbaa !3
-  %arrayidx2 = getelementptr inbounds i64* %6, i64 %5
-  %7 = load i64* %arrayidx2, align 8, !tbaa !0
-  %8 = load i64** @BOT, align 8, !tbaa !3
-  %arrayidx3 = getelementptr inbounds i64* %8, i64 %i.092
-  %9 = load i64* %arrayidx3, align 8, !tbaa !0
-  %arrayidx4 = getelementptr inbounds i64* %6, i64 %9
-  %10 = load i64* %arrayidx4, align 8, !tbaa !0
-  %cmp5 = icmp ugt i64 %i.092, 1
-  %cmp6 = icmp ugt i64 %10, 1
-  %or.cond = and i1 %cmp5, %cmp6
-  br i1 %or.cond, label %land.lhs.true7, label %if.else
-
-land.lhs.true7:                                   ; preds = %if.then
-  %11 = load i64* @channelTracks, align 8, !tbaa !0
-  %add = add i64 %11, 1
-  %call = tail call fastcc i32 @Maze1Mech(i64 %i.092, i64 %add, i64 %10, i64 0, i64 %7, i32 -1, i32 -1)
-  %tobool8 = icmp eq i32 %call, 0
-  br i1 %tobool8, label %land.lhs.true7.if.else_crit_edge, label %if.then9
-
-land.lhs.true7.if.else_crit_edge:                 ; preds = %land.lhs.true7
-  %.pre = load i64* @channelColumns, align 8, !tbaa !0
-  br label %if.else
-
-if.then9:                                         ; preds = %land.lhs.true7
-  %12 = load i8** @mazeRoute, align 8, !tbaa !3
-  %arrayidx10 = getelementptr inbounds i8* %12, i64 %i.092
-  store i8 0, i8* %arrayidx10, align 1, !tbaa !1
-  %13 = load i64** @TOP, align 8, !tbaa !3
-  %arrayidx11 = getelementptr inbounds i64* %13, i64 %i.092
-  %14 = load i64* %arrayidx11, align 8, !tbaa !0
-  tail call fastcc void @CleanNet(i64 %14)
-  %15 = load i64** @BOT, align 8, !tbaa !3
-  %arrayidx12 = getelementptr inbounds i64* %15, i64 %i.092
-  %16 = load i64* %arrayidx12, align 8, !tbaa !0
-  tail call fastcc void @CleanNet(i64 %16)
-  br label %for.inc
-
-if.else:                                          ; preds = %land.lhs.true7.if.else_crit_edge, %if.then
-  %17 = phi i64 [ %.pre, %land.lhs.true7.if.else_crit_edge ], [ %1, %if.then ]
-  %cmp13 = icmp ult i64 %i.092, %17
-  %or.cond89 = and i1 %cmp13, %cmp6
-  br i1 %or.cond89, label %land.lhs.true16, label %if.else24
-
-land.lhs.true16:                                  ; preds = %if.else
-  %18 = load i64* @channelTracks, align 8, !tbaa !0
-  %add17 = add i64 %18, 1
-  %call18 = tail call fastcc i32 @Maze1Mech(i64 %i.092, i64 %add17, i64 %10, i64 0, i64 %7, i32 1, i32 -1)
-  %tobool19 = icmp eq i32 %call18, 0
-  br i1 %tobool19, label %if.else24, label %if.then20
-
-if.then20:                                        ; preds = %land.lhs.true16
-  %19 = load i8** @mazeRoute, align 8, !tbaa !3
-  %arrayidx21 = getelementptr inbounds i8* %19, i64 %i.092
-  store i8 0, i8* %arrayidx21, align 1, !tbaa !1
-  %20 = load i64** @TOP, align 8, !tbaa !3
-  %arrayidx22 = getelementptr inbounds i64* %20, i64 %i.092
-  %21 = load i64* %arrayidx22, align 8, !tbaa !0
-  tail call fastcc void @CleanNet(i64 %21)
-  %22 = load i64** @BOT, align 8, !tbaa !3
-  %arrayidx23 = getelementptr inbounds i64* %22, i64 %i.092
-  %23 = load i64* %arrayidx23, align 8, !tbaa !0
-  tail call fastcc void @CleanNet(i64 %23)
-  br label %for.inc
-
-if.else24:                                        ; preds = %land.lhs.true16, %if.else
-  br i1 %cmp5, label %land.lhs.true26, label %if.else36
-
-land.lhs.true26:                                  ; preds = %if.else24
-  %24 = load i64* @channelTracks, align 8, !tbaa !0
-  %cmp27 = icmp ult i64 %7, %24
-  br i1 %cmp27, label %land.lhs.true28, label %if.else36
-
-land.lhs.true28:                                  ; preds = %land.lhs.true26
-  %add29 = add i64 %24, 1
-  %call30 = tail call fastcc i32 @Maze1Mech(i64 %i.092, i64 0, i64 %7, i64 %add29, i64 %10, i32 -1, i32 1)
-  %tobool31 = icmp eq i32 %call30, 0
-  br i1 %tobool31, label %if.else36, label %if.then32
-
-if.then32:                                        ; preds = %land.lhs.true28
-  %25 = load i8** @mazeRoute, align 8, !tbaa !3
-  %arrayidx33 = getelementptr inbounds i8* %25, i64 %i.092
-  store i8 0, i8* %arrayidx33, align 1, !tbaa !1
-  %26 = load i64** @TOP, align 8, !tbaa !3
-  %arrayidx34 = getelementptr inbounds i64* %26, i64 %i.092
-  %27 = load i64* %arrayidx34, align 8, !tbaa !0
-  tail call fastcc void @CleanNet(i64 %27)
-  %28 = load i64** @BOT, align 8, !tbaa !3
-  %arrayidx35 = getelementptr inbounds i64* %28, i64 %i.092
-  %29 = load i64* %arrayidx35, align 8, !tbaa !0
-  tail call fastcc void @CleanNet(i64 %29)
-  br label %for.inc
-
-if.else36:                                        ; preds = %land.lhs.true28, %land.lhs.true26, %if.else24
-  %30 = load i64* @channelColumns, align 8, !tbaa !0
-  %cmp37 = icmp ult i64 %i.092, %30
-  br i1 %cmp37, label %land.lhs.true38, label %if.else48
-
-land.lhs.true38:                                  ; preds = %if.else36
-  %31 = load i64* @channelTracks, align 8, !tbaa !0
-  %cmp39 = icmp ult i64 %7, %31
-  br i1 %cmp39, label %land.lhs.true40, label %if.else48
-
-land.lhs.true40:                                  ; preds = %land.lhs.true38
-  %add41 = add i64 %31, 1
-  %call42 = tail call fastcc i32 @Maze1Mech(i64 %i.092, i64 0, i64 %7, i64 %add41, i64 %10, i32 1, i32 1)
-  %tobool43 = icmp eq i32 %call42, 0
-  br i1 %tobool43, label %if.else48, label %if.then44
-
-if.then44:                                        ; preds = %land.lhs.true40
-  %32 = load i8** @mazeRoute, align 8, !tbaa !3
-  %arrayidx45 = getelementptr inbounds i8* %32, i64 %i.092
-  store i8 0, i8* %arrayidx45, align 1, !tbaa !1
-  %33 = load i64** @TOP, align 8, !tbaa !3
-  %arrayidx46 = getelementptr inbounds i64* %33, i64 %i.092
-  %34 = load i64* %arrayidx46, align 8, !tbaa !0
-  tail call fastcc void @CleanNet(i64 %34)
-  %35 = load i64** @BOT, align 8, !tbaa !3
-  %arrayidx47 = getelementptr inbounds i64* %35, i64 %i.092
-  %36 = load i64* %arrayidx47, align 8, !tbaa !0
-  tail call fastcc void @CleanNet(i64 %36)
-  br label %for.inc
-
-if.else48:                                        ; preds = %land.lhs.true40, %land.lhs.true38, %if.else36
-  %inc = add nsw i32 %numLeft.091, 1
-  br label %for.inc
-
-for.inc:                                          ; preds = %if.else48, %if.then44, %if.then32, %if.then20, %if.then9, %for.body
-  %numLeft.1 = phi i32 [ %numLeft.091, %if.then9 ], [ %numLeft.091, %if.then20 ], [ %numLeft.091, %if.then32 ], [ %numLeft.091, %if.then44 ], [ %inc, %if.else48 ], [ %numLeft.091, %for.body ]
-  %inc53 = add i64 %i.092, 1
-  %37 = load i64* @channelColumns, align 8, !tbaa !0
-  %cmp = icmp ugt i64 %inc53, %37
-  br i1 %cmp, label %for.end, label %for.body
-
-for.end:                                          ; preds = %for.inc, %entry
-  %numLeft.0.lcssa = phi i32 [ 0, %entry ], [ %numLeft.1, %for.inc ]
-  ret i32 %numLeft.0.lcssa
-}
-
-; Materializable
-declare hidden fastcc i32 @Maze1Mech(i64, i64, i64, i64, i64, i32, i32) nounwind ssp
-
-; Materializable
-declare hidden fastcc void @CleanNet(i64) nounwind ssp
-
-!0 = metadata !{metadata !"long", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
-!3 = metadata !{metadata !"any pointer", metadata !1}
diff --git a/test/CodeGen/ARM64/ccmp.ll b/test/CodeGen/ARM64/ccmp.ll
deleted file mode 100644
index 79e6f94..0000000
--- a/test/CodeGen/ARM64/ccmp.ll
+++ /dev/null
@@ -1,289 +0,0 @@
-; RUN: llc < %s -mcpu=cyclone -verify-machineinstrs -arm64-ccmp -arm64-stress-ccmp | FileCheck %s
-target triple = "arm64-apple-ios"
-
-; CHECK: single_same
-; CHECK: cmp w0, #5
-; CHECK-NEXT: ccmp w1, #17, #4, ne
-; CHECK-NEXT: b.ne
-; CHECK: %if.then
-; CHECK: bl _foo
-; CHECK: %if.end
-define i32 @single_same(i32 %a, i32 %b) nounwind ssp {
-entry:
-  %cmp = icmp eq i32 %a, 5
-  %cmp1 = icmp eq i32 %b, 17
-  %or.cond = or i1 %cmp, %cmp1
-  br i1 %or.cond, label %if.then, label %if.end
-
-if.then:
-  %call = tail call i32 @foo() nounwind
-  br label %if.end
-
-if.end:
-  ret i32 7
-}
-
-; Different condition codes for the two compares.
-; CHECK: single_different
-; CHECK: cmp w0, #6
-; CHECK-NEXT: ccmp w1, #17, #0, ge
-; CHECK-NEXT: b.eq
-; CHECK: %if.then
-; CHECK: bl _foo
-; CHECK: %if.end
-define i32 @single_different(i32 %a, i32 %b) nounwind ssp {
-entry:
-  %cmp = icmp sle i32 %a, 5
-  %cmp1 = icmp ne i32 %b, 17
-  %or.cond = or i1 %cmp, %cmp1
-  br i1 %or.cond, label %if.then, label %if.end
-
-if.then:
-  %call = tail call i32 @foo() nounwind
-  br label %if.end
-
-if.end:
-  ret i32 7
-}
-
-; Second block clobbers the flags, can't convert (easily).
-; CHECK: single_flagclobber
-; CHECK: cmp
-; CHECK: b.eq
-; CHECK: cmp
-; CHECK: b.gt
-define i32 @single_flagclobber(i32 %a, i32 %b) nounwind ssp {
-entry:
-  %cmp = icmp eq i32 %a, 5
-  br i1 %cmp, label %if.then, label %lor.lhs.false
-
-lor.lhs.false:                                    ; preds = %entry
-  %cmp1 = icmp slt i32 %b, 7
-  %mul = shl nsw i32 %b, 1
-  %add = add nsw i32 %b, 1
-  %cond = select i1 %cmp1, i32 %mul, i32 %add
-  %cmp2 = icmp slt i32 %cond, 17
-  br i1 %cmp2, label %if.then, label %if.end
-
-if.then:                                          ; preds = %lor.lhs.false, %entry
-  %call = tail call i32 @foo() nounwind
-  br label %if.end
-
-if.end:                                           ; preds = %if.then, %lor.lhs.false
-  ret i32 7
-}
-
-; Second block clobbers the flags and ends with a tbz terminator.
-; CHECK: single_flagclobber_tbz
-; CHECK: cmp
-; CHECK: b.eq
-; CHECK: cmp
-; CHECK: tbz
-define i32 @single_flagclobber_tbz(i32 %a, i32 %b) nounwind ssp {
-entry:
-  %cmp = icmp eq i32 %a, 5
-  br i1 %cmp, label %if.then, label %lor.lhs.false
-
-lor.lhs.false:                                    ; preds = %entry
-  %cmp1 = icmp slt i32 %b, 7
-  %mul = shl nsw i32 %b, 1
-  %add = add nsw i32 %b, 1
-  %cond = select i1 %cmp1, i32 %mul, i32 %add
-  %and = and i32 %cond, 8
-  %cmp2 = icmp ne i32 %and, 0
-  br i1 %cmp2, label %if.then, label %if.end
-
-if.then:                                          ; preds = %lor.lhs.false, %entry
-  %call = tail call i32 @foo() nounwind
-  br label %if.end
-
-if.end:                                           ; preds = %if.then, %lor.lhs.false
-  ret i32 7
-}
-
-; Speculatively execute division by zero.
-; The sdiv/udiv instructions do not trap when the divisor is zero, so they are
-; safe to speculate.
-; CHECK: speculate_division
-; CHECK-NOT: cmp
-; CHECK: sdiv
-; CHECK: cmp
-; CHECK-NEXT: ccmp
-define i32 @speculate_division(i32 %a, i32 %b) nounwind ssp {
-entry:
-  %cmp = icmp sgt i32 %a, 0
-  br i1 %cmp, label %land.lhs.true, label %if.end
-
-land.lhs.true:
-  %div = sdiv i32 %b, %a
-  %cmp1 = icmp slt i32 %div, 17
-  br i1 %cmp1, label %if.then, label %if.end
-
-if.then:
-  %call = tail call i32 @foo() nounwind
-  br label %if.end
-
-if.end:
-  ret i32 7
-}
-
-; Floating point compare.
-; CHECK: single_fcmp
-; CHECK: cmp
-; CHECK-NOT: b.
-; CHECK: fccmp {{.*}}, #8, ge
-; CHECK: b.lt
-define i32 @single_fcmp(i32 %a, float %b) nounwind ssp {
-entry:
-  %cmp = icmp sgt i32 %a, 0
-  br i1 %cmp, label %land.lhs.true, label %if.end
-
-land.lhs.true:
-  %conv = sitofp i32 %a to float
-  %div = fdiv float %b, %conv
-  %cmp1 = fcmp oge float %div, 1.700000e+01
-  br i1 %cmp1, label %if.then, label %if.end
-
-if.then:
-  %call = tail call i32 @foo() nounwind
-  br label %if.end
-
-if.end:
-  ret i32 7
-}
-
-; Chain multiple compares.
-; CHECK: multi_different
-; CHECK: cmp
-; CHECK: ccmp
-; CHECK: ccmp
-; CHECK: b.
-define void @multi_different(i32 %a, i32 %b, i32 %c) nounwind ssp {
-entry:
-  %cmp = icmp sgt i32 %a, %b
-  br i1 %cmp, label %land.lhs.true, label %if.end
-
-land.lhs.true:
-  %div = sdiv i32 %b, %a
-  %cmp1 = icmp eq i32 %div, 5
-  %cmp4 = icmp sgt i32 %div, %c
-  %or.cond = and i1 %cmp1, %cmp4
-  br i1 %or.cond, label %if.then, label %if.end
-
-if.then:
-  %call = tail call i32 @foo() nounwind
-  br label %if.end
-
-if.end:
-  ret void
-}
-
-; Convert a cbz in the head block.
-; CHECK: cbz_head
-; CHECK: cmp w0, #0
-; CHECK: ccmp
-define i32 @cbz_head(i32 %a, i32 %b) nounwind ssp {
-entry:
-  %cmp = icmp eq i32 %a, 0
-  %cmp1 = icmp ne i32 %b, 17
-  %or.cond = or i1 %cmp, %cmp1
-  br i1 %or.cond, label %if.then, label %if.end
-
-if.then:
-  %call = tail call i32 @foo() nounwind
-  br label %if.end
-
-if.end:
-  ret i32 7
-}
-
-; Check that the immediate operand is in range. The ccmp instruction encodes a
-; smaller range of immediates than subs/adds.
-; The ccmp immediates must be in the range 0-31.
-; CHECK: immediate_range
-; CHECK-NOT: ccmp
-define i32 @immediate_range(i32 %a, i32 %b) nounwind ssp {
-entry:
-  %cmp = icmp eq i32 %a, 5
-  %cmp1 = icmp eq i32 %b, 32
-  %or.cond = or i1 %cmp, %cmp1
-  br i1 %or.cond, label %if.then, label %if.end
-
-if.then:
-  %call = tail call i32 @foo() nounwind
-  br label %if.end
-
-if.end:
-  ret i32 7
-}
-
-; Convert a cbz in the second block.
-; CHECK: cbz_second
-; CHECK: cmp w0, #0
-; CHECK: ccmp w1, #0, #0, ne
-; CHECK: b.eq
-define i32 @cbz_second(i32 %a, i32 %b) nounwind ssp {
-entry:
-  %cmp = icmp eq i32 %a, 0
-  %cmp1 = icmp ne i32 %b, 0
-  %or.cond = or i1 %cmp, %cmp1
-  br i1 %or.cond, label %if.then, label %if.end
-
-if.then:
-  %call = tail call i32 @foo() nounwind
-  br label %if.end
-
-if.end:
-  ret i32 7
-}
-
-; Convert a cbnz in the second block.
-; CHECK: cbnz_second
-; CHECK: cmp w0, #0
-; CHECK: ccmp w1, #0, #4, ne
-; CHECK: b.ne
-define i32 @cbnz_second(i32 %a, i32 %b) nounwind ssp {
-entry:
-  %cmp = icmp eq i32 %a, 0
-  %cmp1 = icmp eq i32 %b, 0
-  %or.cond = or i1 %cmp, %cmp1
-  br i1 %or.cond, label %if.then, label %if.end
-
-if.then:
-  %call = tail call i32 @foo() nounwind
-  br label %if.end
-
-if.end:
-  ret i32 7
-}
-declare i32 @foo()
-
-%str1 = type { %str2 }
-%str2 = type { [24 x i8], i8*, i32, %str1*, i32, [4 x i8], %str1*, %str1*, %str1*, %str1*, %str1*, %str1*, %str1*, %str1*, %str1*, i8*, i8, i8*, %str1*, i8* }
-
-; Test case distilled from 126.gcc.
-; The phi in sw.bb.i.i gets multiple operands for the %entry predecessor.
-; CHECK: build_modify_expr
-define void @build_modify_expr() nounwind ssp {
-entry:
-  switch i32 undef, label %sw.bb.i.i [
-    i32 69, label %if.end85
-    i32 70, label %if.end85
-    i32 71, label %if.end85
-    i32 72, label %if.end85
-    i32 73, label %if.end85
-    i32 105, label %if.end85
-    i32 106, label %if.end85
-  ]
-
-if.end85:
-  ret void
-
-sw.bb.i.i:
-  %ref.tr.i.i = phi %str1* [ %0, %sw.bb.i.i ], [ undef, %entry ]
-  %operands.i.i = getelementptr inbounds %str1* %ref.tr.i.i, i64 0, i32 0, i32 2
-  %arrayidx.i.i = bitcast i32* %operands.i.i to %str1**
-  %0 = load %str1** %arrayidx.i.i, align 8
-  %code1.i.i.phi.trans.insert = getelementptr inbounds %str1* %0, i64 0, i32 0, i32 0, i64 16
-  br label %sw.bb.i.i
-}
diff --git a/test/CodeGen/ARM64/coalesce-ext.ll b/test/CodeGen/ARM64/coalesce-ext.ll
deleted file mode 100644
index 9e8d08e..0000000
--- a/test/CodeGen/ARM64/coalesce-ext.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: llc -march=arm64 -mtriple=arm64-apple-darwin < %s | FileCheck %s
-; Check that the peephole optimizer knows about sext and zext instructions.
-; CHECK: test1sext
-define i32 @test1sext(i64 %A, i64 %B, i32* %P, i64 *%P2) nounwind {
-  %C = add i64 %A, %B
-  ; CHECK: add x[[SUM:[0-9]+]], x0, x1
-  %D = trunc i64 %C to i32
-  %E = shl i64 %C, 32
-  %F = ashr i64 %E, 32
-  ; CHECK: sxtw x[[EXT:[0-9]+]], x[[SUM]]
-  store volatile i64 %F, i64 *%P2
-  ; CHECK: str x[[EXT]]
-  store volatile i32 %D, i32* %P
-  ; Reuse low bits of extended register, don't extend live range of SUM.
-  ; CHECK: str w[[SUM]]
-  ret i32 %D
-}
diff --git a/test/CodeGen/ARM64/collect-loh-garbage-crash.ll b/test/CodeGen/ARM64/collect-loh-garbage-crash.ll
deleted file mode 100644
index 98cb625..0000000
--- a/test/CodeGen/ARM64/collect-loh-garbage-crash.ll
+++ /dev/null
@@ -1,37 +0,0 @@
-; RUN: llc -mtriple=arm64-apple-ios -O3 -arm64-collect-loh -arm64-collect-loh-bb-only=true -arm64-collect-loh-pre-collect-register=false < %s -o - | FileCheck %s
-; Check that the LOH analysis does not crash when the analysed chained
-; contains instructions that are filtered out.
-;
-; Before the fix for <rdar://problem/16041712>, these cases were removed
-; from the main container. Now, the deterministic container does not allow
-; to remove arbitrary values, so we have to live with garbage values.
-; <rdar://problem/16041712>
-
-%"class.H4ISP::H4ISPDevice" = type { i32 (%"class.H4ISP::H4ISPDevice"*, i32, i8*, i8*)*, i8*, i32*, %"class.H4ISP::H4ISPCameraManager"* }
-
-%"class.H4ISP::H4ISPCameraManager" = type opaque
-
-declare i32 @_ZN5H4ISP11H4ISPDevice32ISP_SelectBestMIPIFrequencyIndexEjPj(%"class.H4ISP::H4ISPDevice"*)
-
-@pH4ISPDevice = hidden global %"class.H4ISP::H4ISPDevice"* null, align 8
-
-; CHECK-LABEL: _foo:
-; CHECK: ret
-; CHECK-NOT: .loh AdrpLdrGotLdr
-define void @foo() {
-entry:
-  br label %if.then83
-if.then83:                                        ; preds = %if.end81
-  %tmp = load %"class.H4ISP::H4ISPDevice"** @pH4ISPDevice, align 8
-  %call84 = call i32 @_ZN5H4ISP11H4ISPDevice32ISP_SelectBestMIPIFrequencyIndexEjPj(%"class.H4ISP::H4ISPDevice"* %tmp) #19
-  tail call void asm sideeffect "", "~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27}"()
-  %tmp2 = load %"class.H4ISP::H4ISPDevice"** @pH4ISPDevice, align 8
-  tail call void asm sideeffect "", "~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x28}"()
-  %pCameraManager.i268 = getelementptr inbounds %"class.H4ISP::H4ISPDevice"* %tmp2, i64 0, i32 3
-  %tmp3 = load %"class.H4ISP::H4ISPCameraManager"** %pCameraManager.i268, align 8
-  %tobool.i269 = icmp eq %"class.H4ISP::H4ISPCameraManager"* %tmp3, null
-  br i1 %tobool.i269, label %if.then83, label %end
-end:
-  ret void
-}
-
diff --git a/test/CodeGen/ARM64/collect-loh-str.ll b/test/CodeGen/ARM64/collect-loh-str.ll
deleted file mode 100644
index fc63f8b..0000000
--- a/test/CodeGen/ARM64/collect-loh-str.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: llc -mtriple=arm64-apple-ios -O2 -arm64-collect-loh -arm64-collect-loh-bb-only=false < %s -o - | FileCheck %s
-; Test case for <rdar://problem/15942912>.
-; AdrpAddStr cannot be used when the store uses same
-; register as address and value. Indeed, the related
-; if applied, may completely remove the definition or
-; at least provide a wrong one (with the offset folded
-; into the definition).
-
-%struct.anon = type { i32*, i32** }
-
-@pptp_wan_head = internal global %struct.anon zeroinitializer, align 8
-
-; CHECK-LABEL: _pptp_wan_init
-; CHECK: ret
-; CHECK-NOT: AdrpAddStr
-define i32 @pptp_wan_init() {
-entry:
-  store i32* null, i32** getelementptr inbounds (%struct.anon* @pptp_wan_head, i64 0, i32 0), align 8
-  store i32** getelementptr inbounds (%struct.anon* @pptp_wan_head, i64 0, i32 0), i32*** getelementptr inbounds (%struct.anon* @pptp_wan_head, i64 0, i32 1), align 8
-  ret i32 0
-}
-
-
diff --git a/test/CodeGen/ARM64/collect-loh.ll b/test/CodeGen/ARM64/collect-loh.ll
deleted file mode 100644
index 08ab062..0000000
--- a/test/CodeGen/ARM64/collect-loh.ll
+++ /dev/null
@@ -1,47 +0,0 @@
-; RUN: llc -mtriple=arm64-apple-ios -O2 -arm64-collect-loh -arm64-collect-loh-bb-only=false < %s -o - | FileCheck %s
-
-@a = internal unnamed_addr global i32 0, align 4
-@b = external global i32
-
-; Function Attrs: noinline nounwind ssp
-define void @foo(i32 %t) {
-entry:
-  %tmp = load i32* @a, align 4
-  %add = add nsw i32 %tmp, %t
-  store i32 %add, i32* @a, align 4
-  ret void
-}
-
-; Function Attrs: nounwind ssp
-; Testcase for <rdar://problem/15438605>, AdrpAdrp reuse is valid only when the first adrp
-; dominates the second.
-; The first adrp comes from the loading of 'a' and the second the loading of 'b'.
-; 'a' is loaded in if.then, 'b' in if.end4, if.then does not dominates if.end4.
-; CHECK-LABEL: _test
-; CHECK: ret
-; CHECK-NOT: .loh AdrpAdrp
-define i32 @test(i32 %t) {
-entry:
-  %cmp = icmp sgt i32 %t, 5
-  br i1 %cmp, label %if.then, label %if.end4
-
-if.then:                                          ; preds = %entry
-  %tmp = load i32* @a, align 4
-  %add = add nsw i32 %tmp, %t
-  %cmp1 = icmp sgt i32 %add, 12
-  br i1 %cmp1, label %if.then2, label %if.end4
-
-if.then2:                                         ; preds = %if.then
-  tail call void @foo(i32 %add)
-  %tmp1 = load i32* @a, align 4
-  br label %if.end4
-
-if.end4:                                          ; preds = %if.then2, %if.then, %entry
-  %t.addr.0 = phi i32 [ %tmp1, %if.then2 ], [ %t, %if.then ], [ %t, %entry ]
-  %tmp2 = load i32* @b, align 4
-  %add5 = add nsw i32 %tmp2, %t.addr.0
-  tail call void @foo(i32 %add5)
-  %tmp3 = load i32* @b, align 4
-  %add6 = add nsw i32 %tmp3, %t.addr.0
-  ret i32 %add6
-}
diff --git a/test/CodeGen/ARM64/compact-unwind-unhandled-cfi.S b/test/CodeGen/ARM64/compact-unwind-unhandled-cfi.S
deleted file mode 100644
index 250732d..0000000
--- a/test/CodeGen/ARM64/compact-unwind-unhandled-cfi.S
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: llvm-mc -triple arm64-apple-darwin -filetype=obj -o /dev/null %s
-
-        .text
-        .globl _foo
-        .cfi_startproc
-_foo:
-        stp x29, x30, [sp, #-16]!
- .cfi_adjust_cfa_offset 16
-
-        ldp x29, x30, [sp], #16
- .cfi_adjust_cfa_offset -16
-        .cfi_restore x29
-        .cfi_restore x30
-
-        ret
-
-        .cfi_endproc
diff --git a/test/CodeGen/ARM64/convert-v2f64-v2i32.ll b/test/CodeGen/ARM64/convert-v2f64-v2i32.ll
deleted file mode 100644
index 1a07c98..0000000
--- a/test/CodeGen/ARM64/convert-v2f64-v2i32.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-; CHECK: fptosi_1
-; CHECK: fcvtzs.2d
-; CHECK: xtn.2s
-; CHECK: ret
-define void @fptosi_1() nounwind noinline ssp {
-entry:
-  %0 = fptosi <2 x double> undef to <2 x i32>
-  store <2 x i32> %0, <2 x i32>* undef, align 8
-  ret void
-}
-
-; CHECK: fptoui_1
-; CHECK: fcvtzu.2d
-; CHECK: xtn.2s
-; CHECK: ret
-define void @fptoui_1() nounwind noinline ssp {
-entry:
-  %0 = fptoui <2 x double> undef to <2 x i32>
-  store <2 x i32> %0, <2 x i32>* undef, align 8
-  ret void
-}
-
diff --git a/test/CodeGen/ARM64/convert-v2i32-v2f64.ll b/test/CodeGen/ARM64/convert-v2i32-v2f64.ll
deleted file mode 100644
index 63129a4..0000000
--- a/test/CodeGen/ARM64/convert-v2i32-v2f64.ll
+++ /dev/null
@@ -1,29 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-define <2 x double> @f1(<2 x i32> %v) nounwind readnone {
-; CHECK-LABEL: f1:
-; CHECK: sshll.2d v0, v0, #0
-; CHECK-NEXT: scvtf.2d v0, v0
-; CHECK-NEXT: ret
-  %conv = sitofp <2 x i32> %v to <2 x double>
-  ret <2 x double> %conv
-}
-define <2 x double> @f2(<2 x i32> %v) nounwind readnone {
-; CHECK-LABEL: f2:
-; CHECK: ushll.2d v0, v0, #0
-; CHECK-NEXT: ucvtf.2d v0, v0
-; CHECK-NEXT: ret
-  %conv = uitofp <2 x i32> %v to <2 x double>
-  ret <2 x double> %conv
-}
-
-; CHECK: autogen_SD19655
-; CHECK: scvtf
-; CHECK: ret
-define void @autogen_SD19655() {
-  %T = load <2 x i64>* undef
-  %F = sitofp <2 x i64> undef to <2 x float>
-  store <2 x float> %F, <2 x float>* undef
-  ret void
-}
-
diff --git a/test/CodeGen/ARM64/copy-tuple.ll b/test/CodeGen/ARM64/copy-tuple.ll
deleted file mode 100644
index 6325c3f..0000000
--- a/test/CodeGen/ARM64/copy-tuple.ll
+++ /dev/null
@@ -1,146 +0,0 @@
-; RUN: llc -mtriple=arm64-apple-ios -o - %s | FileCheck %s
-
-; The main purpose of this test is to find out whether copyPhysReg can deal with
-; the memmove-like situation arising in tuples, where an early copy can clobber
-; the value needed by a later one if the tuples overlap.
-
-; We use dummy inline asm to force LLVM to generate a COPY between the registers
-; we want by clobbering all the others.
-
-define void @test_D1D2_from_D0D1(i8* %addr) #0 {
-; CHECK-LABEL: test_D1D2_from_D0D1:
-; CHECK: orr.8b v2, v1
-; CHECK: orr.8b v1, v0
-entry:
-  %addr_v8i8 = bitcast i8* %addr to <8 x i8>*
-  %vec = tail call { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2.v8i8.p0v8i8(<8 x i8>* %addr_v8i8)
-  %vec0 = extractvalue { <8 x i8>, <8 x i8> } %vec, 0
-  %vec1 = extractvalue { <8 x i8>, <8 x i8> } %vec, 1
-  tail call void asm sideeffect "", "~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
-  tail call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr)
-
-  tail call void asm sideeffect "", "~{v0},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
-  tail call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr)
-  ret void
-}
-
-define void @test_D0D1_from_D1D2(i8* %addr) #0 {
-; CHECK-LABEL: test_D0D1_from_D1D2:
-; CHECK: orr.8b v0, v1
-; CHECK: orr.8b v1, v2
-entry:
-  %addr_v8i8 = bitcast i8* %addr to <8 x i8>*
-  %vec = tail call { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2.v8i8.p0v8i8(<8 x i8>* %addr_v8i8)
-  %vec0 = extractvalue { <8 x i8>, <8 x i8> } %vec, 0
-  %vec1 = extractvalue { <8 x i8>, <8 x i8> } %vec, 1
-  tail call void asm sideeffect "", "~{v0},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
-  tail call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr)
-
-  tail call void asm sideeffect "", "~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
-  tail call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr)
-  ret void
-}
-
-define void @test_D0D1_from_D31D0(i8* %addr) #0 {
-; CHECK-LABEL: test_D0D1_from_D31D0:
-; CHECK: orr.8b v1, v0
-; CHECK: orr.8b v0, v31
-entry:
-  %addr_v8i8 = bitcast i8* %addr to <8 x i8>*
-  %vec = tail call { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2.v8i8.p0v8i8(<8 x i8>* %addr_v8i8)
-  %vec0 = extractvalue { <8 x i8>, <8 x i8> } %vec, 0
-  %vec1 = extractvalue { <8 x i8>, <8 x i8> } %vec, 1
-  tail call void asm sideeffect "", "~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30}"()
-  tail call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr)
-
-  tail call void asm sideeffect "", "~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
-  tail call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr)
-  ret void
-}
-
-define void @test_D31D0_from_D0D1(i8* %addr) #0 {
-; CHECK-LABEL: test_D31D0_from_D0D1:
-; CHECK: orr.8b v31, v0
-; CHECK: orr.8b v0, v1
-entry:
-  %addr_v8i8 = bitcast i8* %addr to <8 x i8>*
-  %vec = tail call { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2.v8i8.p0v8i8(<8 x i8>* %addr_v8i8)
-  %vec0 = extractvalue { <8 x i8>, <8 x i8> } %vec, 0
-  %vec1 = extractvalue { <8 x i8>, <8 x i8> } %vec, 1
-  tail call void asm sideeffect "", "~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
-  tail call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr)
-
-  tail call void asm sideeffect "", "~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30}"()
-  tail call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr)
-  ret void
-}
-
-define void @test_D2D3D4_from_D0D1D2(i8* %addr) #0 {
-; CHECK-LABEL: test_D2D3D4_from_D0D1D2:
-; CHECK: orr.8b v4, v2
-; CHECK: orr.8b v3, v1
-; CHECK: orr.8b v2, v0
-entry:
-  %addr_v8i8 = bitcast i8* %addr to <8 x i8>*
-  %vec = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld3.v8i8.p0v8i8(<8 x i8>* %addr_v8i8)
-  %vec0 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vec, 0
-  %vec1 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vec, 1
-  %vec2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vec, 2
-
-  tail call void asm sideeffect "", "~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
-  tail call void @llvm.arm64.neon.st3.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, <8 x i8> %vec2, i8* %addr)
-
-  tail call void asm sideeffect "", "~{v0},~{v1},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
-  tail call void @llvm.arm64.neon.st3.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, <8 x i8> %vec2, i8* %addr)
-  ret void
-}
-
-define void @test_Q0Q1Q2_from_Q1Q2Q3(i8* %addr) #0 {
-; CHECK-LABEL: test_Q0Q1Q2_from_Q1Q2Q3:
-; CHECK: orr.16b v0, v1
-; CHECK: orr.16b v1, v2
-; CHECK: orr.16b v2, v3
-entry:
-  %addr_v16i8 = bitcast i8* %addr to <16 x i8>*
-  %vec = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld3.v16i8.p0v16i8(<16 x i8>* %addr_v16i8)
-  %vec0 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vec, 0
-  %vec1 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vec, 1
-  %vec2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vec, 2
-  tail call void asm sideeffect "", "~{v0},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
-  tail call void @llvm.arm64.neon.st3.v16i8.p0i8(<16 x i8> %vec0, <16 x i8> %vec1, <16 x i8> %vec2, i8* %addr)
-
-  tail call void asm sideeffect "", "~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
-  tail call void @llvm.arm64.neon.st3.v16i8.p0i8(<16 x i8> %vec0, <16 x i8> %vec1, <16 x i8> %vec2, i8* %addr)
-  ret void
-}
-
-define void @test_Q1Q2Q3Q4_from_Q30Q31Q0Q1(i8* %addr) #0 {
-; CHECK-LABEL: test_Q1Q2Q3Q4_from_Q30Q31Q0Q1:
-; CHECK: orr.16b v4, v1
-; CHECK: orr.16b v3, v0
-; CHECK: orr.16b v2, v31
-; CHECK: orr.16b v1, v30
-  %addr_v16i8 = bitcast i8* %addr to <16 x i8>*
-  %vec = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld4.v16i8.p0v16i8(<16 x i8>* %addr_v16i8)
-  %vec0 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vec, 0
-  %vec1 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vec, 1
-  %vec2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vec, 2
-  %vec3 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vec, 3
-
-  tail call void asm sideeffect "", "~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29}"()
-  tail call void @llvm.arm64.neon.st4.v16i8.p0i8(<16 x i8> %vec0, <16 x i8> %vec1, <16 x i8> %vec2, <16 x i8> %vec3, i8* %addr)
-
-  tail call void asm sideeffect "", "~{v0},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
-  tail call void @llvm.arm64.neon.st4.v16i8.p0i8(<16 x i8> %vec0, <16 x i8> %vec1, <16 x i8> %vec2, <16 x i8> %vec3, i8* %addr)
-  ret void
-}
-
-declare { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2.v8i8.p0v8i8(<8 x i8>*)
-declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld3.v8i8.p0v8i8(<8 x i8>*)
-declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld3.v16i8.p0v16i8(<16 x i8>*)
-declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld4.v16i8.p0v16i8(<16 x i8>*)
-
-declare void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*)
-declare void @llvm.arm64.neon.st3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*)
-declare void @llvm.arm64.neon.st3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*)
-declare void @llvm.arm64.neon.st4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*)
diff --git a/test/CodeGen/ARM64/crc32.ll b/test/CodeGen/ARM64/crc32.ll
deleted file mode 100644
index 609eb44..0000000
--- a/test/CodeGen/ARM64/crc32.ll
+++ /dev/null
@@ -1,71 +0,0 @@
-; RUN: llc -march=arm64 -o - %s | FileCheck %s
-
-define i32 @test_crc32b(i32 %cur, i8 %next) {
-; CHECK-LABEL: test_crc32b:
-; CHECK: crc32b w0, w0, w1
-  %bits = zext i8 %next to i32
-  %val = call i32 @llvm.arm64.crc32b(i32 %cur, i32 %bits)
-  ret i32 %val
-}
-
-define i32 @test_crc32h(i32 %cur, i16 %next) {
-; CHECK-LABEL: test_crc32h:
-; CHECK: crc32h w0, w0, w1
-  %bits = zext i16 %next to i32
-  %val = call i32 @llvm.arm64.crc32h(i32 %cur, i32 %bits)
-  ret i32 %val
-}
-
-define i32 @test_crc32w(i32 %cur, i32 %next) {
-; CHECK-LABEL: test_crc32w:
-; CHECK: crc32w w0, w0, w1
-  %val = call i32 @llvm.arm64.crc32w(i32 %cur, i32 %next)
-  ret i32 %val
-}
-
-define i32 @test_crc32x(i32 %cur, i64 %next) {
-; CHECK-LABEL: test_crc32x:
-; CHECK: crc32x w0, w0, x1
-  %val = call i32 @llvm.arm64.crc32x(i32 %cur, i64 %next)
-  ret i32 %val
-}
-
-define i32 @test_crc32cb(i32 %cur, i8 %next) {
-; CHECK-LABEL: test_crc32cb:
-; CHECK: crc32cb w0, w0, w1
-  %bits = zext i8 %next to i32
-  %val = call i32 @llvm.arm64.crc32cb(i32 %cur, i32 %bits)
-  ret i32 %val
-}
-
-define i32 @test_crc32ch(i32 %cur, i16 %next) {
-; CHECK-LABEL: test_crc32ch:
-; CHECK: crc32ch w0, w0, w1
-  %bits = zext i16 %next to i32
-  %val = call i32 @llvm.arm64.crc32ch(i32 %cur, i32 %bits)
-  ret i32 %val
-}
-
-define i32 @test_crc32cw(i32 %cur, i32 %next) {
-; CHECK-LABEL: test_crc32cw:
-; CHECK: crc32cw w0, w0, w1
-  %val = call i32 @llvm.arm64.crc32cw(i32 %cur, i32 %next)
-  ret i32 %val
-}
-
-define i32 @test_crc32cx(i32 %cur, i64 %next) {
-; CHECK-LABEL: test_crc32cx:
-; CHECK: crc32cx w0, w0, x1
-  %val = call i32 @llvm.arm64.crc32cx(i32 %cur, i64 %next)
-  ret i32 %val
-}
-
-declare i32 @llvm.arm64.crc32b(i32, i32)
-declare i32 @llvm.arm64.crc32h(i32, i32)
-declare i32 @llvm.arm64.crc32w(i32, i32)
-declare i32 @llvm.arm64.crc32x(i32, i64)
-
-declare i32 @llvm.arm64.crc32cb(i32, i32)
-declare i32 @llvm.arm64.crc32ch(i32, i32)
-declare i32 @llvm.arm64.crc32cw(i32, i32)
-declare i32 @llvm.arm64.crc32cx(i32, i64)
diff --git a/test/CodeGen/ARM64/crypto.ll b/test/CodeGen/ARM64/crypto.ll
deleted file mode 100644
index 3804310..0000000
--- a/test/CodeGen/ARM64/crypto.ll
+++ /dev/null
@@ -1,135 +0,0 @@
-; RUN: llc -march=arm64 -arm64-neon-syntax=apple -o - %s | FileCheck %s
-
-declare <16 x i8> @llvm.arm64.crypto.aese(<16 x i8> %data, <16 x i8> %key)
-declare <16 x i8> @llvm.arm64.crypto.aesd(<16 x i8> %data, <16 x i8> %key)
-declare <16 x i8> @llvm.arm64.crypto.aesmc(<16 x i8> %data)
-declare <16 x i8> @llvm.arm64.crypto.aesimc(<16 x i8> %data)
-
-define <16 x i8> @test_aese(<16 x i8> %data, <16 x i8> %key) {
-; CHECK-LABEL: test_aese:
-; CHECK: aese.16b v0, v1
-  %res = call <16 x i8> @llvm.arm64.crypto.aese(<16 x i8> %data, <16 x i8> %key)
-  ret <16 x i8> %res
-}
-
-define <16 x i8> @test_aesd(<16 x i8> %data, <16 x i8> %key) {
-; CHECK-LABEL: test_aesd:
-; CHECK: aesd.16b v0, v1
-  %res = call <16 x i8> @llvm.arm64.crypto.aesd(<16 x i8> %data, <16 x i8> %key)
-  ret <16 x i8> %res
-}
-
-define <16 x i8> @test_aesmc(<16 x i8> %data) {
-; CHECK-LABEL: test_aesmc:
-; CHECK: aesmc.16b v0, v0
- %res = call <16 x i8> @llvm.arm64.crypto.aesmc(<16 x i8> %data)
-  ret <16 x i8> %res
-}
-
-define <16 x i8> @test_aesimc(<16 x i8> %data) {
-; CHECK-LABEL: test_aesimc:
-; CHECK: aesimc.16b v0, v0
- %res = call <16 x i8> @llvm.arm64.crypto.aesimc(<16 x i8> %data)
-  ret <16 x i8> %res
-}
-
-declare <4 x i32> @llvm.arm64.crypto.sha1c(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk)
-declare <4 x i32> @llvm.arm64.crypto.sha1p(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk)
-declare <4 x i32> @llvm.arm64.crypto.sha1m(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk)
-declare i32 @llvm.arm64.crypto.sha1h(i32 %hash_e)
-declare <4 x i32> @llvm.arm64.crypto.sha1su0(<4 x i32> %wk0_3, <4 x i32> %wk4_7, <4 x i32> %wk8_11)
-declare <4 x i32> @llvm.arm64.crypto.sha1su1(<4 x i32> %wk0_3, <4 x i32> %wk12_15)
-
-define <4 x i32> @test_sha1c(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) {
-; CHECK-LABEL: test_sha1c:
-; CHECK: fmov [[HASH_E:s[0-9]+]], w0
-; CHECK: sha1c.4s q0, [[HASH_E]], v1
-  %res = call <4 x i32> @llvm.arm64.crypto.sha1c(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk)
-  ret <4 x i32> %res
-}
-
-; <rdar://problem/14742333> Incomplete removal of unnecessary FMOV instructions in intrinsic SHA1
-define <4 x i32> @test_sha1c_in_a_row(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) {
-; CHECK-LABEL: test_sha1c_in_a_row:
-; CHECK: fmov [[HASH_E:s[0-9]+]], w0
-; CHECK: sha1c.4s q[[SHA1RES:[0-9]+]], [[HASH_E]], v1
-; CHECK-NOT: fmov
-; CHECK: sha1c.4s q0, s[[SHA1RES]], v1
-  %res = call <4 x i32> @llvm.arm64.crypto.sha1c(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk)
-  %extract = extractelement <4 x i32> %res, i32 0
-  %res2 = call <4 x i32> @llvm.arm64.crypto.sha1c(<4 x i32> %hash_abcd, i32 %extract, <4 x i32> %wk)
-  ret <4 x i32> %res2
-}
-
-define <4 x i32> @test_sha1p(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) {
-; CHECK-LABEL: test_sha1p:
-; CHECK: fmov [[HASH_E:s[0-9]+]], w0
-; CHECK: sha1p.4s q0, [[HASH_E]], v1
-  %res = call <4 x i32> @llvm.arm64.crypto.sha1p(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk)
-  ret <4 x i32> %res
-}
-
-define <4 x i32> @test_sha1m(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) {
-; CHECK-LABEL: test_sha1m:
-; CHECK: fmov [[HASH_E:s[0-9]+]], w0
-; CHECK: sha1m.4s q0, [[HASH_E]], v1
-  %res = call <4 x i32> @llvm.arm64.crypto.sha1m(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk)
-  ret <4 x i32> %res
-}
-
-define i32 @test_sha1h(i32 %hash_e) {
-; CHECK-LABEL: test_sha1h:
-; CHECK: fmov [[HASH_E:s[0-9]+]], w0
-; CHECK: sha1h [[RES:s[0-9]+]], [[HASH_E]]
-; CHECK: fmov w0, [[RES]]
-  %res = call i32 @llvm.arm64.crypto.sha1h(i32 %hash_e)
-  ret i32 %res
-}
-
-define <4 x i32> @test_sha1su0(<4 x i32> %wk0_3, <4 x i32> %wk4_7, <4 x i32> %wk8_11) {
-; CHECK-LABEL: test_sha1su0:
-; CHECK: sha1su0.4s v0, v1, v2
-  %res = call <4 x i32> @llvm.arm64.crypto.sha1su0(<4 x i32> %wk0_3, <4 x i32> %wk4_7, <4 x i32> %wk8_11)
-  ret <4 x i32> %res
-}
-
-define <4 x i32> @test_sha1su1(<4 x i32> %wk0_3, <4 x i32> %wk12_15) {
-; CHECK-LABEL: test_sha1su1:
-; CHECK: sha1su1.4s v0, v1
-  %res = call <4 x i32> @llvm.arm64.crypto.sha1su1(<4 x i32> %wk0_3, <4 x i32> %wk12_15)
-  ret <4 x i32> %res
-}
-
-declare <4 x i32> @llvm.arm64.crypto.sha256h(<4 x i32> %hash_abcd, <4 x i32> %hash_efgh, <4 x i32> %wk)
-declare <4 x i32> @llvm.arm64.crypto.sha256h2(<4 x i32> %hash_efgh, <4 x i32> %hash_abcd, <4 x i32> %wk)
-declare <4 x i32> @llvm.arm64.crypto.sha256su0(<4 x i32> %w0_3, <4 x i32> %w4_7)
-declare <4 x i32> @llvm.arm64.crypto.sha256su1(<4 x i32> %w0_3, <4 x i32> %w8_11, <4 x i32> %w12_15)
-
-define <4 x i32> @test_sha256h(<4 x i32> %hash_abcd, <4 x i32> %hash_efgh, <4 x i32> %wk) {
-; CHECK-LABEL: test_sha256h:
-; CHECK: sha256h.4s q0, q1, v2
-  %res = call <4 x i32> @llvm.arm64.crypto.sha256h(<4 x i32> %hash_abcd, <4 x i32> %hash_efgh, <4 x i32> %wk)
-  ret <4 x i32> %res
-}
-
-define <4 x i32> @test_sha256h2(<4 x i32> %hash_efgh, <4 x i32> %hash_abcd, <4 x i32> %wk) {
-; CHECK-LABEL: test_sha256h2:
-; CHECK: sha256h2.4s q0, q1, v2
-
-  %res = call <4 x i32> @llvm.arm64.crypto.sha256h2(<4 x i32> %hash_efgh, <4 x i32> %hash_abcd, <4 x i32> %wk)
-  ret <4 x i32> %res
-}
-
-define <4 x i32> @test_sha256su0(<4 x i32> %w0_3, <4 x i32> %w4_7) {
-; CHECK-LABEL: test_sha256su0:
-; CHECK: sha256su0.4s v0, v1
-  %res = call <4 x i32> @llvm.arm64.crypto.sha256su0(<4 x i32> %w0_3, <4 x i32> %w4_7)
-  ret <4 x i32> %res
-}
-
-define <4 x i32> @test_sha256su1(<4 x i32> %w0_3, <4 x i32> %w8_11, <4 x i32> %w12_15) {
-; CHECK-LABEL: test_sha256su1:
-; CHECK: sha256su1.4s v0, v1, v2
-  %res = call <4 x i32> @llvm.arm64.crypto.sha256su1(<4 x i32> %w0_3, <4 x i32> %w8_11, <4 x i32> %w12_15)
-  ret <4 x i32> %res
-}
diff --git a/test/CodeGen/ARM64/cse.ll b/test/CodeGen/ARM64/cse.ll
deleted file mode 100644
index d98bfd6..0000000
--- a/test/CodeGen/ARM64/cse.ll
+++ /dev/null
@@ -1,59 +0,0 @@
-; RUN: llc -O3 < %s | FileCheck %s
-target triple = "arm64-apple-ios"
-
-; rdar://12462006
-; CSE between "icmp reg reg" and "sub reg reg".
-; Both can be in the same basic block or in different basic blocks.
-define i8* @t1(i8* %base, i32* nocapture %offset, i32 %size) nounwind {
-entry:
-; CHECK-LABEL: t1:
-; CHECK: subs
-; CHECK-NOT: cmp
-; CHECK-NOT: sub
-; CHECK: b.ge
-; CHECK: sub
-; CHECK: sub
-; CHECK_NOT: sub
-; CHECK: ret
- %0 = load i32* %offset, align 4
- %cmp = icmp slt i32 %0, %size
- %s = sub nsw i32 %0, %size
- br i1 %cmp, label %return, label %if.end
-
-if.end:
- %sub = sub nsw i32 %0, %size
- %s2 = sub nsw i32 %s, %size
- %s3 = sub nsw i32 %sub, %s2
- store i32 %s3, i32* %offset, align 4
- %add.ptr = getelementptr inbounds i8* %base, i32 %sub
- br label %return
-
-return:
- %retval.0 = phi i8* [ %add.ptr, %if.end ], [ null, %entry ]
- ret i8* %retval.0
-}
-
-; CSE between "icmp reg imm" and "sub reg imm".
-define i8* @t2(i8* %base, i32* nocapture %offset) nounwind {
-entry:
-; CHECK-LABEL: t2:
-; CHECK: subs
-; CHECK-NOT: cmp
-; CHECK-NOT: sub
-; CHECK: b.lt
-; CHECK-NOT: sub
-; CHECK: ret
- %0 = load i32* %offset, align 4
- %cmp = icmp slt i32 %0, 1
- br i1 %cmp, label %return, label %if.end
-
-if.end:
- %sub = sub nsw i32 %0, 1
- store i32 %sub, i32* %offset, align 4
- %add.ptr = getelementptr inbounds i8* %base, i32 %sub
- br label %return
-
-return:
- %retval.0 = phi i8* [ %add.ptr, %if.end ], [ null, %entry ]
- ret i8* %retval.0
-}
diff --git a/test/CodeGen/ARM64/csel.ll b/test/CodeGen/ARM64/csel.ll
deleted file mode 100644
index cbf1769..0000000
--- a/test/CodeGen/ARM64/csel.ll
+++ /dev/null
@@ -1,222 +0,0 @@
-; RUN: llc -O3 < %s | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64"
-target triple = "arm64-unknown-unknown"
-
-; CHECK: foo1
-; CHECK: csinc w{{[0-9]+}}, w[[REG:[0-9]+]],
-; CHECK:                                     w[[REG]], eq
-define i32 @foo1(i32 %b, i32 %c) nounwind readnone ssp {
-entry:
-  %not.tobool = icmp ne i32 %c, 0
-  %add = zext i1 %not.tobool to i32
-  %b.add = add i32 %c, %b
-  %add1 = add i32 %b.add, %add
-  ret i32 %add1
-}
-
-; CHECK: foo2
-; CHECK: csneg w{{[0-9]+}}, w[[REG:[0-9]+]],
-; CHECK:                                     w[[REG]], eq
-define i32 @foo2(i32 %b, i32 %c) nounwind readnone ssp {
-entry:
-  %mul = sub i32 0, %b
-  %tobool = icmp eq i32 %c, 0
-  %b.mul = select i1 %tobool, i32 %b, i32 %mul
-  %add = add nsw i32 %b.mul, %c
-  ret i32 %add
-}
-
-; CHECK: foo3
-; CHECK: csinv w{{[0-9]+}}, w[[REG:[0-9]+]],
-; CHECK:                                     w[[REG]], eq
-define i32 @foo3(i32 %b, i32 %c) nounwind readnone ssp {
-entry:
-  %not.tobool = icmp ne i32 %c, 0
-  %xor = sext i1 %not.tobool to i32
-  %b.xor = xor i32 %xor, %b
-  %add = add nsw i32 %b.xor, %c
-  ret i32 %add
-}
-
-; rdar://11632325
-define i32@foo4(i32 %a) nounwind ssp {
-; CHECK: foo4
-; CHECK: csneg
-; CHECK-NEXT: ret
-  %cmp = icmp sgt i32 %a, -1
-  %neg = sub nsw i32 0, %a
-  %cond = select i1 %cmp, i32 %a, i32 %neg
-  ret i32 %cond
-}
-
-define i32@foo5(i32 %a, i32 %b) nounwind ssp {
-entry:
-; CHECK: foo5
-; CHECK: subs
-; CHECK-NEXT: csneg
-; CHECK-NEXT: ret
-  %sub = sub nsw i32 %a, %b
-  %cmp = icmp sgt i32 %sub, -1
-  %sub3 = sub nsw i32 0, %sub
-  %cond = select i1 %cmp, i32 %sub, i32 %sub3
-  ret i32 %cond
-}
-
-; make sure we can handle branch instruction in optimizeCompare.
-define i32@foo6(i32 %a, i32 %b) nounwind ssp {
-; CHECK: foo6
-; CHECK: b
-  %sub = sub nsw i32 %a, %b
-  %cmp = icmp sgt i32 %sub, 0
-  br i1 %cmp, label %l.if, label %l.else
-
-l.if:
-  ret i32 1
-
-l.else:
-  ret i32 %sub
-}
-
-; If CPSR is used multiple times and V flag is used, we don't remove cmp.
-define i32 @foo7(i32 %a, i32 %b) nounwind {
-entry:
-; CHECK-LABEL: foo7:
-; CHECK: sub
-; CHECK-next: adds
-; CHECK-next: csneg
-; CHECK-next: b
-  %sub = sub nsw i32 %a, %b
-  %cmp = icmp sgt i32 %sub, -1
-  %sub3 = sub nsw i32 0, %sub
-  %cond = select i1 %cmp, i32 %sub, i32 %sub3
-  br i1 %cmp, label %if.then, label %if.else
-
-if.then:
-  %cmp2 = icmp slt i32 %sub, -1
-  %sel = select i1 %cmp2, i32 %cond, i32 %a
-  ret i32 %sel
-
-if.else:
-  ret i32 %cond
-}
-
-define i32 @foo8(i32 %v, i32 %a, i32 %b) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: foo8:
-; CHECK: cmp w0, #0
-; CHECK: csinv w0, w1, w2, ne
-  %tobool = icmp eq i32 %v, 0
-  %neg = xor i32 -1, %b
-  %cond = select i1 %tobool, i32 %neg, i32 %a
-  ret i32 %cond
-}
-
-define i32 @foo9(i32 %v) nounwind readnone optsize ssp {
-entry:
-; CHECK-LABEL: foo9:
-; CHECK: cmp w0, #0
-; CHECK: orr w[[REG:[0-9]+]], wzr, #0x4
-; CHECK: csinv w0, w[[REG]], w[[REG]], ne
-  %tobool = icmp ne i32 %v, 0
-  %cond = select i1 %tobool, i32 4, i32 -5
-  ret i32 %cond
-}
-
-define i64 @foo10(i64 %v) nounwind readnone optsize ssp {
-entry:
-; CHECK-LABEL: foo10:
-; CHECK: cmp x0, #0
-; CHECK: orr x[[REG:[0-9]+]], xzr, #0x4
-; CHECK: csinv x0, x[[REG]], x[[REG]], ne
-  %tobool = icmp ne i64 %v, 0
-  %cond = select i1 %tobool, i64 4, i64 -5
-  ret i64 %cond
-}
-
-define i32 @foo11(i32 %v) nounwind readnone optsize ssp {
-entry:
-; CHECK-LABEL: foo11:
-; CHECK: cmp w0, #0
-; CHECK: orr w[[REG:[0-9]+]], wzr, #0x4
-; CHECK: csneg w0, w[[REG]], w[[REG]], ne
-  %tobool = icmp ne i32 %v, 0
-  %cond = select i1 %tobool, i32 4, i32 -4
-  ret i32 %cond
-}
-
-define i64 @foo12(i64 %v) nounwind readnone optsize ssp {
-entry:
-; CHECK-LABEL: foo12:
-; CHECK: cmp x0, #0
-; CHECK: orr x[[REG:[0-9]+]], xzr, #0x4
-; CHECK: csneg x0, x[[REG]], x[[REG]], ne
-  %tobool = icmp ne i64 %v, 0
-  %cond = select i1 %tobool, i64 4, i64 -4
-  ret i64 %cond
-}
-
-define i32 @foo13(i32 %v, i32 %a, i32 %b) nounwind readnone optsize ssp {
-entry:
-; CHECK-LABEL: foo13:
-; CHECK: cmp w0, #0
-; CHECK: csneg w0, w1, w2, ne
-  %tobool = icmp eq i32 %v, 0
-  %sub = sub i32 0, %b
-  %cond = select i1 %tobool, i32 %sub, i32 %a
-  ret i32 %cond
-}
-
-define i64 @foo14(i64 %v, i64 %a, i64 %b) nounwind readnone optsize ssp {
-entry:
-; CHECK-LABEL: foo14:
-; CHECK: cmp x0, #0
-; CHECK: csneg x0, x1, x2, ne
-  %tobool = icmp eq i64 %v, 0
-  %sub = sub i64 0, %b
-  %cond = select i1 %tobool, i64 %sub, i64 %a
-  ret i64 %cond
-}
-
-define i32 @foo15(i32 %a, i32 %b) nounwind readnone optsize ssp {
-entry:
-; CHECK-LABEL: foo15:
-; CHECK: cmp w0, w1
-; CHECK: orr w[[REG:[0-9]+]], wzr, #0x1
-; CHECK: csinc w0, w[[REG]], w[[REG]], le
-  %cmp = icmp sgt i32 %a, %b
-  %. = select i1 %cmp, i32 2, i32 1
-  ret i32 %.
-}
-
-define i32 @foo16(i32 %a, i32 %b) nounwind readnone optsize ssp {
-entry:
-; CHECK-LABEL: foo16:
-; CHECK: cmp w0, w1
-; CHECK: orr w[[REG:[0-9]+]], wzr, #0x1
-; CHECK: csinc w0, w[[REG]], w[[REG]], gt
-  %cmp = icmp sgt i32 %a, %b
-  %. = select i1 %cmp, i32 1, i32 2
-  ret i32 %.
-}
-
-define i64 @foo17(i64 %a, i64 %b) nounwind readnone optsize ssp {
-entry:
-; CHECK-LABEL: foo17:
-; CHECK: cmp x0, x1
-; CHECK: orr x[[REG:[0-9]+]], xzr, #0x1
-; CHECK: csinc x0, x[[REG]], x[[REG]], le
-  %cmp = icmp sgt i64 %a, %b
-  %. = select i1 %cmp, i64 2, i64 1
-  ret i64 %.
-}
-
-define i64 @foo18(i64 %a, i64 %b) nounwind readnone optsize ssp {
-entry:
-; CHECK-LABEL: foo18:
-; CHECK: cmp x0, x1
-; CHECK: orr x[[REG:[0-9]+]], xzr, #0x1
-; CHECK: csinc x0, x[[REG]], x[[REG]], gt
-  %cmp = icmp sgt i64 %a, %b
-  %. = select i1 %cmp, i64 1, i64 2
-  ret i64 %.
-}
diff --git a/test/CodeGen/ARM64/cvt.ll b/test/CodeGen/ARM64/cvt.ll
deleted file mode 100644
index b55a42f..0000000
--- a/test/CodeGen/ARM64/cvt.ll
+++ /dev/null
@@ -1,401 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-;
-; Floating-point scalar convert to signed integer (to nearest with ties to away)
-;
-define i32 @fcvtas_1w1s(float %A) nounwind {
-;CHECK-LABEL: fcvtas_1w1s:
-;CHECK: fcvtas w0, s0
-;CHECK-NEXT: ret
-	%tmp3 = call i32 @llvm.arm64.neon.fcvtas.i32.f32(float %A)
-	ret i32 %tmp3
-}
-
-define i64 @fcvtas_1x1s(float %A) nounwind {
-;CHECK-LABEL: fcvtas_1x1s:
-;CHECK: fcvtas x0, s0
-;CHECK-NEXT: ret
-	%tmp3 = call i64 @llvm.arm64.neon.fcvtas.i64.f32(float %A)
-	ret i64 %tmp3
-}
-
-define i32 @fcvtas_1w1d(double %A) nounwind {
-;CHECK-LABEL: fcvtas_1w1d:
-;CHECK: fcvtas w0, d0
-;CHECK-NEXT: ret
-	%tmp3 = call i32 @llvm.arm64.neon.fcvtas.i32.f64(double %A)
-	ret i32 %tmp3
-}
-
-define i64 @fcvtas_1x1d(double %A) nounwind {
-;CHECK-LABEL: fcvtas_1x1d:
-;CHECK: fcvtas x0, d0
-;CHECK-NEXT: ret
-	%tmp3 = call i64 @llvm.arm64.neon.fcvtas.i64.f64(double %A)
-	ret i64 %tmp3
-}
-
-declare i32 @llvm.arm64.neon.fcvtas.i32.f32(float) nounwind readnone
-declare i64 @llvm.arm64.neon.fcvtas.i64.f32(float) nounwind readnone
-declare i32 @llvm.arm64.neon.fcvtas.i32.f64(double) nounwind readnone
-declare i64 @llvm.arm64.neon.fcvtas.i64.f64(double) nounwind readnone
-
-;
-; Floating-point scalar convert to unsigned integer
-;
-define i32 @fcvtau_1w1s(float %A) nounwind {
-;CHECK-LABEL: fcvtau_1w1s:
-;CHECK: fcvtau w0, s0
-;CHECK-NEXT: ret
-	%tmp3 = call i32 @llvm.arm64.neon.fcvtau.i32.f32(float %A)
-	ret i32 %tmp3
-}
-
-define i64 @fcvtau_1x1s(float %A) nounwind {
-;CHECK-LABEL: fcvtau_1x1s:
-;CHECK: fcvtau x0, s0
-;CHECK-NEXT: ret
-	%tmp3 = call i64 @llvm.arm64.neon.fcvtau.i64.f32(float %A)
-	ret i64 %tmp3
-}
-
-define i32 @fcvtau_1w1d(double %A) nounwind {
-;CHECK-LABEL: fcvtau_1w1d:
-;CHECK: fcvtau w0, d0
-;CHECK-NEXT: ret
-	%tmp3 = call i32 @llvm.arm64.neon.fcvtau.i32.f64(double %A)
-	ret i32 %tmp3
-}
-
-define i64 @fcvtau_1x1d(double %A) nounwind {
-;CHECK-LABEL: fcvtau_1x1d:
-;CHECK: fcvtau x0, d0
-;CHECK-NEXT: ret
-	%tmp3 = call i64 @llvm.arm64.neon.fcvtau.i64.f64(double %A)
-	ret i64 %tmp3
-}
-
-declare i32 @llvm.arm64.neon.fcvtau.i32.f32(float) nounwind readnone
-declare i64 @llvm.arm64.neon.fcvtau.i64.f32(float) nounwind readnone
-declare i32 @llvm.arm64.neon.fcvtau.i32.f64(double) nounwind readnone
-declare i64 @llvm.arm64.neon.fcvtau.i64.f64(double) nounwind readnone
-
-;
-; Floating-point scalar convert to signed integer (toward -Inf)
-;
-define i32 @fcvtms_1w1s(float %A) nounwind {
-;CHECK-LABEL: fcvtms_1w1s:
-;CHECK: fcvtms w0, s0
-;CHECK-NEXT: ret
-	%tmp3 = call i32 @llvm.arm64.neon.fcvtms.i32.f32(float %A)
-	ret i32 %tmp3
-}
-
-define i64 @fcvtms_1x1s(float %A) nounwind {
-;CHECK-LABEL: fcvtms_1x1s:
-;CHECK: fcvtms x0, s0
-;CHECK-NEXT: ret
-	%tmp3 = call i64 @llvm.arm64.neon.fcvtms.i64.f32(float %A)
-	ret i64 %tmp3
-}
-
-define i32 @fcvtms_1w1d(double %A) nounwind {
-;CHECK-LABEL: fcvtms_1w1d:
-;CHECK: fcvtms w0, d0
-;CHECK-NEXT: ret
-	%tmp3 = call i32 @llvm.arm64.neon.fcvtms.i32.f64(double %A)
-	ret i32 %tmp3
-}
-
-define i64 @fcvtms_1x1d(double %A) nounwind {
-;CHECK-LABEL: fcvtms_1x1d:
-;CHECK: fcvtms x0, d0
-;CHECK-NEXT: ret
-	%tmp3 = call i64 @llvm.arm64.neon.fcvtms.i64.f64(double %A)
-	ret i64 %tmp3
-}
-
-declare i32 @llvm.arm64.neon.fcvtms.i32.f32(float) nounwind readnone
-declare i64 @llvm.arm64.neon.fcvtms.i64.f32(float) nounwind readnone
-declare i32 @llvm.arm64.neon.fcvtms.i32.f64(double) nounwind readnone
-declare i64 @llvm.arm64.neon.fcvtms.i64.f64(double) nounwind readnone
-
-;
-; Floating-point scalar convert to unsigned integer (toward -Inf)
-;
-define i32 @fcvtmu_1w1s(float %A) nounwind {
-;CHECK-LABEL: fcvtmu_1w1s:
-;CHECK: fcvtmu w0, s0
-;CHECK-NEXT: ret
-	%tmp3 = call i32 @llvm.arm64.neon.fcvtmu.i32.f32(float %A)
-	ret i32 %tmp3
-}
-
-define i64 @fcvtmu_1x1s(float %A) nounwind {
-;CHECK-LABEL: fcvtmu_1x1s:
-;CHECK: fcvtmu x0, s0
-;CHECK-NEXT: ret
-	%tmp3 = call i64 @llvm.arm64.neon.fcvtmu.i64.f32(float %A)
-	ret i64 %tmp3
-}
-
-define i32 @fcvtmu_1w1d(double %A) nounwind {
-;CHECK-LABEL: fcvtmu_1w1d:
-;CHECK: fcvtmu w0, d0
-;CHECK-NEXT: ret
-	%tmp3 = call i32 @llvm.arm64.neon.fcvtmu.i32.f64(double %A)
-	ret i32 %tmp3
-}
-
-define i64 @fcvtmu_1x1d(double %A) nounwind {
-;CHECK-LABEL: fcvtmu_1x1d:
-;CHECK: fcvtmu x0, d0
-;CHECK-NEXT: ret
-	%tmp3 = call i64 @llvm.arm64.neon.fcvtmu.i64.f64(double %A)
-	ret i64 %tmp3
-}
-
-declare i32 @llvm.arm64.neon.fcvtmu.i32.f32(float) nounwind readnone
-declare i64 @llvm.arm64.neon.fcvtmu.i64.f32(float) nounwind readnone
-declare i32 @llvm.arm64.neon.fcvtmu.i32.f64(double) nounwind readnone
-declare i64 @llvm.arm64.neon.fcvtmu.i64.f64(double) nounwind readnone
-
-;
-; Floating-point scalar convert to signed integer (to nearest with ties to even)
-;
-define i32 @fcvtns_1w1s(float %A) nounwind {
-;CHECK-LABEL: fcvtns_1w1s:
-;CHECK: fcvtns w0, s0
-;CHECK-NEXT: ret
-	%tmp3 = call i32 @llvm.arm64.neon.fcvtns.i32.f32(float %A)
-	ret i32 %tmp3
-}
-
-define i64 @fcvtns_1x1s(float %A) nounwind {
-;CHECK-LABEL: fcvtns_1x1s:
-;CHECK: fcvtns x0, s0
-;CHECK-NEXT: ret
-	%tmp3 = call i64 @llvm.arm64.neon.fcvtns.i64.f32(float %A)
-	ret i64 %tmp3
-}
-
-define i32 @fcvtns_1w1d(double %A) nounwind {
-;CHECK-LABEL: fcvtns_1w1d:
-;CHECK: fcvtns w0, d0
-;CHECK-NEXT: ret
-	%tmp3 = call i32 @llvm.arm64.neon.fcvtns.i32.f64(double %A)
-	ret i32 %tmp3
-}
-
-define i64 @fcvtns_1x1d(double %A) nounwind {
-;CHECK-LABEL: fcvtns_1x1d:
-;CHECK: fcvtns x0, d0
-;CHECK-NEXT: ret
-	%tmp3 = call i64 @llvm.arm64.neon.fcvtns.i64.f64(double %A)
-	ret i64 %tmp3
-}
-
-declare i32 @llvm.arm64.neon.fcvtns.i32.f32(float) nounwind readnone
-declare i64 @llvm.arm64.neon.fcvtns.i64.f32(float) nounwind readnone
-declare i32 @llvm.arm64.neon.fcvtns.i32.f64(double) nounwind readnone
-declare i64 @llvm.arm64.neon.fcvtns.i64.f64(double) nounwind readnone
-
-;
-; Floating-point scalar convert to unsigned integer (to nearest with ties to even)
-;
-define i32 @fcvtnu_1w1s(float %A) nounwind {
-;CHECK-LABEL: fcvtnu_1w1s:
-;CHECK: fcvtnu w0, s0
-;CHECK-NEXT: ret
-	%tmp3 = call i32 @llvm.arm64.neon.fcvtnu.i32.f32(float %A)
-	ret i32 %tmp3
-}
-
-define i64 @fcvtnu_1x1s(float %A) nounwind {
-;CHECK-LABEL: fcvtnu_1x1s:
-;CHECK: fcvtnu x0, s0
-;CHECK-NEXT: ret
-	%tmp3 = call i64 @llvm.arm64.neon.fcvtnu.i64.f32(float %A)
-	ret i64 %tmp3
-}
-
-define i32 @fcvtnu_1w1d(double %A) nounwind {
-;CHECK-LABEL: fcvtnu_1w1d:
-;CHECK: fcvtnu w0, d0
-;CHECK-NEXT: ret
-	%tmp3 = call i32 @llvm.arm64.neon.fcvtnu.i32.f64(double %A)
-	ret i32 %tmp3
-}
-
-define i64 @fcvtnu_1x1d(double %A) nounwind {
-;CHECK-LABEL: fcvtnu_1x1d:
-;CHECK: fcvtnu x0, d0
-;CHECK-NEXT: ret
-	%tmp3 = call i64 @llvm.arm64.neon.fcvtnu.i64.f64(double %A)
-	ret i64 %tmp3
-}
-
-declare i32 @llvm.arm64.neon.fcvtnu.i32.f32(float) nounwind readnone
-declare i64 @llvm.arm64.neon.fcvtnu.i64.f32(float) nounwind readnone
-declare i32 @llvm.arm64.neon.fcvtnu.i32.f64(double) nounwind readnone
-declare i64 @llvm.arm64.neon.fcvtnu.i64.f64(double) nounwind readnone
-
-;
-; Floating-point scalar convert to signed integer (toward +Inf)
-;
-define i32 @fcvtps_1w1s(float %A) nounwind {
-;CHECK-LABEL: fcvtps_1w1s:
-;CHECK: fcvtps w0, s0
-;CHECK-NEXT: ret
-	%tmp3 = call i32 @llvm.arm64.neon.fcvtps.i32.f32(float %A)
-	ret i32 %tmp3
-}
-
-define i64 @fcvtps_1x1s(float %A) nounwind {
-;CHECK-LABEL: fcvtps_1x1s:
-;CHECK: fcvtps x0, s0
-;CHECK-NEXT: ret
-	%tmp3 = call i64 @llvm.arm64.neon.fcvtps.i64.f32(float %A)
-	ret i64 %tmp3
-}
-
-define i32 @fcvtps_1w1d(double %A) nounwind {
-;CHECK-LABEL: fcvtps_1w1d:
-;CHECK: fcvtps w0, d0
-;CHECK-NEXT: ret
-	%tmp3 = call i32 @llvm.arm64.neon.fcvtps.i32.f64(double %A)
-	ret i32 %tmp3
-}
-
-define i64 @fcvtps_1x1d(double %A) nounwind {
-;CHECK-LABEL: fcvtps_1x1d:
-;CHECK: fcvtps x0, d0
-;CHECK-NEXT: ret
-	%tmp3 = call i64 @llvm.arm64.neon.fcvtps.i64.f64(double %A)
-	ret i64 %tmp3
-}
-
-declare i32 @llvm.arm64.neon.fcvtps.i32.f32(float) nounwind readnone
-declare i64 @llvm.arm64.neon.fcvtps.i64.f32(float) nounwind readnone
-declare i32 @llvm.arm64.neon.fcvtps.i32.f64(double) nounwind readnone
-declare i64 @llvm.arm64.neon.fcvtps.i64.f64(double) nounwind readnone
-
-;
-; Floating-point scalar convert to unsigned integer (toward +Inf)
-;
-define i32 @fcvtpu_1w1s(float %A) nounwind {
-;CHECK-LABEL: fcvtpu_1w1s:
-;CHECK: fcvtpu w0, s0
-;CHECK-NEXT: ret
-	%tmp3 = call i32 @llvm.arm64.neon.fcvtpu.i32.f32(float %A)
-	ret i32 %tmp3
-}
-
-define i64 @fcvtpu_1x1s(float %A) nounwind {
-;CHECK-LABEL: fcvtpu_1x1s:
-;CHECK: fcvtpu x0, s0
-;CHECK-NEXT: ret
-	%tmp3 = call i64 @llvm.arm64.neon.fcvtpu.i64.f32(float %A)
-	ret i64 %tmp3
-}
-
-define i32 @fcvtpu_1w1d(double %A) nounwind {
-;CHECK-LABEL: fcvtpu_1w1d:
-;CHECK: fcvtpu w0, d0
-;CHECK-NEXT: ret
-	%tmp3 = call i32 @llvm.arm64.neon.fcvtpu.i32.f64(double %A)
-	ret i32 %tmp3
-}
-
-define i64 @fcvtpu_1x1d(double %A) nounwind {
-;CHECK-LABEL: fcvtpu_1x1d:
-;CHECK: fcvtpu x0, d0
-;CHECK-NEXT: ret
-	%tmp3 = call i64 @llvm.arm64.neon.fcvtpu.i64.f64(double %A)
-	ret i64 %tmp3
-}
-
-declare i32 @llvm.arm64.neon.fcvtpu.i32.f32(float) nounwind readnone
-declare i64 @llvm.arm64.neon.fcvtpu.i64.f32(float) nounwind readnone
-declare i32 @llvm.arm64.neon.fcvtpu.i32.f64(double) nounwind readnone
-declare i64 @llvm.arm64.neon.fcvtpu.i64.f64(double) nounwind readnone
-
-;
-;  Floating-point scalar convert to signed integer (toward zero)
-;
-define i32 @fcvtzs_1w1s(float %A) nounwind {
-;CHECK-LABEL: fcvtzs_1w1s:
-;CHECK: fcvtzs w0, s0
-;CHECK-NEXT: ret
-	%tmp3 = call i32 @llvm.arm64.neon.fcvtzs.i32.f32(float %A)
-	ret i32 %tmp3
-}
-
-define i64 @fcvtzs_1x1s(float %A) nounwind {
-;CHECK-LABEL: fcvtzs_1x1s:
-;CHECK: fcvtzs x0, s0
-;CHECK-NEXT: ret
-	%tmp3 = call i64 @llvm.arm64.neon.fcvtzs.i64.f32(float %A)
-	ret i64 %tmp3
-}
-
-define i32 @fcvtzs_1w1d(double %A) nounwind {
-;CHECK-LABEL: fcvtzs_1w1d:
-;CHECK: fcvtzs w0, d0
-;CHECK-NEXT: ret
-	%tmp3 = call i32 @llvm.arm64.neon.fcvtzs.i32.f64(double %A)
-	ret i32 %tmp3
-}
-
-define i64 @fcvtzs_1x1d(double %A) nounwind {
-;CHECK-LABEL: fcvtzs_1x1d:
-;CHECK: fcvtzs x0, d0
-;CHECK-NEXT: ret
-	%tmp3 = call i64 @llvm.arm64.neon.fcvtzs.i64.f64(double %A)
-	ret i64 %tmp3
-}
-
-declare i32 @llvm.arm64.neon.fcvtzs.i32.f32(float) nounwind readnone
-declare i64 @llvm.arm64.neon.fcvtzs.i64.f32(float) nounwind readnone
-declare i32 @llvm.arm64.neon.fcvtzs.i32.f64(double) nounwind readnone
-declare i64 @llvm.arm64.neon.fcvtzs.i64.f64(double) nounwind readnone
-
-;
-; Floating-point scalar convert to unsigned integer (toward zero)
-;
-define i32 @fcvtzu_1w1s(float %A) nounwind {
-;CHECK-LABEL: fcvtzu_1w1s:
-;CHECK: fcvtzu w0, s0
-;CHECK-NEXT: ret
-	%tmp3 = call i32 @llvm.arm64.neon.fcvtzu.i32.f32(float %A)
-	ret i32 %tmp3
-}
-
-define i64 @fcvtzu_1x1s(float %A) nounwind {
-;CHECK-LABEL: fcvtzu_1x1s:
-;CHECK: fcvtzu x0, s0
-;CHECK-NEXT: ret
-	%tmp3 = call i64 @llvm.arm64.neon.fcvtzu.i64.f32(float %A)
-	ret i64 %tmp3
-}
-
-define i32 @fcvtzu_1w1d(double %A) nounwind {
-;CHECK-LABEL: fcvtzu_1w1d:
-;CHECK: fcvtzu w0, d0
-;CHECK-NEXT: ret
-	%tmp3 = call i32 @llvm.arm64.neon.fcvtzu.i32.f64(double %A)
-	ret i32 %tmp3
-}
-
-define i64 @fcvtzu_1x1d(double %A) nounwind {
-;CHECK-LABEL: fcvtzu_1x1d:
-;CHECK: fcvtzu x0, d0
-;CHECK-NEXT: ret
-	%tmp3 = call i64 @llvm.arm64.neon.fcvtzu.i64.f64(double %A)
-	ret i64 %tmp3
-}
-
-declare i32 @llvm.arm64.neon.fcvtzu.i32.f32(float) nounwind readnone
-declare i64 @llvm.arm64.neon.fcvtzu.i64.f32(float) nounwind readnone
-declare i32 @llvm.arm64.neon.fcvtzu.i32.f64(double) nounwind readnone
-declare i64 @llvm.arm64.neon.fcvtzu.i64.f64(double) nounwind readnone
diff --git a/test/CodeGen/ARM64/dup.ll b/test/CodeGen/ARM64/dup.ll
deleted file mode 100644
index e659575..0000000
--- a/test/CodeGen/ARM64/dup.ll
+++ /dev/null
@@ -1,322 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -asm-verbose=false | FileCheck %s
-
-define <8 x i8> @v_dup8(i8 %A) nounwind {
-;CHECK-LABEL: v_dup8:
-;CHECK: dup.8b
-	%tmp1 = insertelement <8 x i8> zeroinitializer, i8 %A, i32 0
-	%tmp2 = insertelement <8 x i8> %tmp1, i8 %A, i32 1
-	%tmp3 = insertelement <8 x i8> %tmp2, i8 %A, i32 2
-	%tmp4 = insertelement <8 x i8> %tmp3, i8 %A, i32 3
-	%tmp5 = insertelement <8 x i8> %tmp4, i8 %A, i32 4
-	%tmp6 = insertelement <8 x i8> %tmp5, i8 %A, i32 5
-	%tmp7 = insertelement <8 x i8> %tmp6, i8 %A, i32 6
-	%tmp8 = insertelement <8 x i8> %tmp7, i8 %A, i32 7
-	ret <8 x i8> %tmp8
-}
-
-define <4 x i16> @v_dup16(i16 %A) nounwind {
-;CHECK-LABEL: v_dup16:
-;CHECK: dup.4h
-	%tmp1 = insertelement <4 x i16> zeroinitializer, i16 %A, i32 0
-	%tmp2 = insertelement <4 x i16> %tmp1, i16 %A, i32 1
-	%tmp3 = insertelement <4 x i16> %tmp2, i16 %A, i32 2
-	%tmp4 = insertelement <4 x i16> %tmp3, i16 %A, i32 3
-	ret <4 x i16> %tmp4
-}
-
-define <2 x i32> @v_dup32(i32 %A) nounwind {
-;CHECK-LABEL: v_dup32:
-;CHECK: dup.2s
-	%tmp1 = insertelement <2 x i32> zeroinitializer, i32 %A, i32 0
-	%tmp2 = insertelement <2 x i32> %tmp1, i32 %A, i32 1
-	ret <2 x i32> %tmp2
-}
-
-define <2 x float> @v_dupfloat(float %A) nounwind {
-;CHECK-LABEL: v_dupfloat:
-;CHECK: dup.2s
-	%tmp1 = insertelement <2 x float> zeroinitializer, float %A, i32 0
-	%tmp2 = insertelement <2 x float> %tmp1, float %A, i32 1
-	ret <2 x float> %tmp2
-}
-
-define <16 x i8> @v_dupQ8(i8 %A) nounwind {
-;CHECK-LABEL: v_dupQ8:
-;CHECK: dup.16b
-	%tmp1 = insertelement <16 x i8> zeroinitializer, i8 %A, i32 0
-	%tmp2 = insertelement <16 x i8> %tmp1, i8 %A, i32 1
-	%tmp3 = insertelement <16 x i8> %tmp2, i8 %A, i32 2
-	%tmp4 = insertelement <16 x i8> %tmp3, i8 %A, i32 3
-	%tmp5 = insertelement <16 x i8> %tmp4, i8 %A, i32 4
-	%tmp6 = insertelement <16 x i8> %tmp5, i8 %A, i32 5
-	%tmp7 = insertelement <16 x i8> %tmp6, i8 %A, i32 6
-	%tmp8 = insertelement <16 x i8> %tmp7, i8 %A, i32 7
-	%tmp9 = insertelement <16 x i8> %tmp8, i8 %A, i32 8
-	%tmp10 = insertelement <16 x i8> %tmp9, i8 %A, i32 9
-	%tmp11 = insertelement <16 x i8> %tmp10, i8 %A, i32 10
-	%tmp12 = insertelement <16 x i8> %tmp11, i8 %A, i32 11
-	%tmp13 = insertelement <16 x i8> %tmp12, i8 %A, i32 12
-	%tmp14 = insertelement <16 x i8> %tmp13, i8 %A, i32 13
-	%tmp15 = insertelement <16 x i8> %tmp14, i8 %A, i32 14
-	%tmp16 = insertelement <16 x i8> %tmp15, i8 %A, i32 15
-	ret <16 x i8> %tmp16
-}
-
-define <8 x i16> @v_dupQ16(i16 %A) nounwind {
-;CHECK-LABEL: v_dupQ16:
-;CHECK: dup.8h
-	%tmp1 = insertelement <8 x i16> zeroinitializer, i16 %A, i32 0
-	%tmp2 = insertelement <8 x i16> %tmp1, i16 %A, i32 1
-	%tmp3 = insertelement <8 x i16> %tmp2, i16 %A, i32 2
-	%tmp4 = insertelement <8 x i16> %tmp3, i16 %A, i32 3
-	%tmp5 = insertelement <8 x i16> %tmp4, i16 %A, i32 4
-	%tmp6 = insertelement <8 x i16> %tmp5, i16 %A, i32 5
-	%tmp7 = insertelement <8 x i16> %tmp6, i16 %A, i32 6
-	%tmp8 = insertelement <8 x i16> %tmp7, i16 %A, i32 7
-	ret <8 x i16> %tmp8
-}
-
-define <4 x i32> @v_dupQ32(i32 %A) nounwind {
-;CHECK-LABEL: v_dupQ32:
-;CHECK: dup.4s
-	%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %A, i32 0
-	%tmp2 = insertelement <4 x i32> %tmp1, i32 %A, i32 1
-	%tmp3 = insertelement <4 x i32> %tmp2, i32 %A, i32 2
-	%tmp4 = insertelement <4 x i32> %tmp3, i32 %A, i32 3
-	ret <4 x i32> %tmp4
-}
-
-define <4 x float> @v_dupQfloat(float %A) nounwind {
-;CHECK-LABEL: v_dupQfloat:
-;CHECK: dup.4s
-	%tmp1 = insertelement <4 x float> zeroinitializer, float %A, i32 0
-	%tmp2 = insertelement <4 x float> %tmp1, float %A, i32 1
-	%tmp3 = insertelement <4 x float> %tmp2, float %A, i32 2
-	%tmp4 = insertelement <4 x float> %tmp3, float %A, i32 3
-	ret <4 x float> %tmp4
-}
-
-; Check to make sure it works with shuffles, too.
-
-define <8 x i8> @v_shuffledup8(i8 %A) nounwind {
-;CHECK-LABEL: v_shuffledup8:
-;CHECK: dup.8b
-	%tmp1 = insertelement <8 x i8> undef, i8 %A, i32 0
-	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer
-	ret <8 x i8> %tmp2
-}
-
-define <4 x i16> @v_shuffledup16(i16 %A) nounwind {
-;CHECK-LABEL: v_shuffledup16:
-;CHECK: dup.4h
-	%tmp1 = insertelement <4 x i16> undef, i16 %A, i32 0
-	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
-	ret <4 x i16> %tmp2
-}
-
-define <2 x i32> @v_shuffledup32(i32 %A) nounwind {
-;CHECK-LABEL: v_shuffledup32:
-;CHECK: dup.2s
-	%tmp1 = insertelement <2 x i32> undef, i32 %A, i32 0
-	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
-	ret <2 x i32> %tmp2
-}
-
-define <2 x float> @v_shuffledupfloat(float %A) nounwind {
-;CHECK-LABEL: v_shuffledupfloat:
-;CHECK: dup.2s
-	%tmp1 = insertelement <2 x float> undef, float %A, i32 0
-	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
-	ret <2 x float> %tmp2
-}
-
-define <16 x i8> @v_shuffledupQ8(i8 %A) nounwind {
-;CHECK-LABEL: v_shuffledupQ8:
-;CHECK: dup.16b
-	%tmp1 = insertelement <16 x i8> undef, i8 %A, i32 0
-	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> zeroinitializer
-	ret <16 x i8> %tmp2
-}
-
-define <8 x i16> @v_shuffledupQ16(i16 %A) nounwind {
-;CHECK-LABEL: v_shuffledupQ16:
-;CHECK: dup.8h
-	%tmp1 = insertelement <8 x i16> undef, i16 %A, i32 0
-	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> zeroinitializer
-	ret <8 x i16> %tmp2
-}
-
-define <4 x i32> @v_shuffledupQ32(i32 %A) nounwind {
-;CHECK-LABEL: v_shuffledupQ32:
-;CHECK: dup.4s
-	%tmp1 = insertelement <4 x i32> undef, i32 %A, i32 0
-	%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> zeroinitializer
-	ret <4 x i32> %tmp2
-}
-
-define <4 x float> @v_shuffledupQfloat(float %A) nounwind {
-;CHECK-LABEL: v_shuffledupQfloat:
-;CHECK: dup.4s
-	%tmp1 = insertelement <4 x float> undef, float %A, i32 0
-	%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
-	ret <4 x float> %tmp2
-}
-
-define <8 x i8> @vduplane8(<8 x i8>* %A) nounwind {
-;CHECK-LABEL: vduplane8:
-;CHECK: dup.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
-	ret <8 x i8> %tmp2
-}
-
-define <4 x i16> @vduplane16(<4 x i16>* %A) nounwind {
-;CHECK-LABEL: vduplane16:
-;CHECK: dup.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
-	ret <4 x i16> %tmp2
-}
-
-define <2 x i32> @vduplane32(<2 x i32>* %A) nounwind {
-;CHECK-LABEL: vduplane32:
-;CHECK: dup.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> < i32 1, i32 1 >
-	ret <2 x i32> %tmp2
-}
-
-define <2 x float> @vduplanefloat(<2 x float>* %A) nounwind {
-;CHECK-LABEL: vduplanefloat:
-;CHECK: dup.2s
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> < i32 1, i32 1 >
-	ret <2 x float> %tmp2
-}
-
-define <16 x i8> @vduplaneQ8(<8 x i8>* %A) nounwind {
-;CHECK-LABEL: vduplaneQ8:
-;CHECK: dup.16b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <16 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
-	ret <16 x i8> %tmp2
-}
-
-define <8 x i16> @vduplaneQ16(<4 x i16>* %A) nounwind {
-;CHECK-LABEL: vduplaneQ16:
-;CHECK: dup.8h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
-	ret <8 x i16> %tmp2
-}
-
-define <4 x i32> @vduplaneQ32(<2 x i32>* %A) nounwind {
-;CHECK-LABEL: vduplaneQ32:
-;CHECK: dup.4s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
-	ret <4 x i32> %tmp2
-}
-
-define <4 x float> @vduplaneQfloat(<2 x float>* %A) nounwind {
-;CHECK-LABEL: vduplaneQfloat:
-;CHECK: dup.4s
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
-	ret <4 x float> %tmp2
-}
-
-define <2 x i64> @foo(<2 x i64> %arg0_int64x1_t) nounwind readnone {
-;CHECK-LABEL: foo:
-;CHECK: dup.2d
-entry:
-  %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
-  ret <2 x i64> %0
-}
-
-define <2 x i64> @bar(<2 x i64> %arg0_int64x1_t) nounwind readnone {
-;CHECK-LABEL: bar:
-;CHECK: dup.2d
-entry:
-  %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
-  ret <2 x i64> %0
-}
-
-define <2 x double> @baz(<2 x double> %arg0_int64x1_t) nounwind readnone {
-;CHECK-LABEL: baz:
-;CHECK: dup.2d
-entry:
-  %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> <i32 1, i32 1>
-  ret <2 x double> %0
-}
-
-define <2 x double> @qux(<2 x double> %arg0_int64x1_t) nounwind readnone {
-;CHECK-LABEL: qux:
-;CHECK: dup.2d
-entry:
-  %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> <i32 0, i32 0>
-  ret <2 x double> %0
-}
-
-define <2 x i32> @f(i32 %a, i32 %b) nounwind readnone  {
-; CHECK-LABEL: f:
-; CHECK-NEXT: fmov s0, w0
-; CHECK-NEXT: ins.s v0[1], w1
-; CHECK-NEXT: ret
-  %vecinit = insertelement <2 x i32> undef, i32 %a, i32 0
-  %vecinit1 = insertelement <2 x i32> %vecinit, i32 %b, i32 1
-  ret <2 x i32> %vecinit1
-}
-
-define <4 x i32> @g(i32 %a, i32 %b) nounwind readnone  {
-; CHECK-LABEL: g:
-; CHECK-NEXT: fmov s0, w0
-; CHECK-NEXT: ins.s v0[1], w1
-; CHECK-NEXT: ins.s v0[2], w1
-; CHECK-NEXT: ins.s v0[3], w0
-; CHECK-NEXT: ret
-  %vecinit = insertelement <4 x i32> undef, i32 %a, i32 0
-  %vecinit1 = insertelement <4 x i32> %vecinit, i32 %b, i32 1
-  %vecinit2 = insertelement <4 x i32> %vecinit1, i32 %b, i32 2
-  %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %a, i32 3
-  ret <4 x i32> %vecinit3
-}
-
-define <2 x i64> @h(i64 %a, i64 %b) nounwind readnone  {
-; CHECK-LABEL: h:
-; CHECK-NEXT: fmov d0, x0
-; CHECK-NEXT: ins.d v0[1], x1
-; CHECK-NEXT: ret
-  %vecinit = insertelement <2 x i64> undef, i64 %a, i32 0
-  %vecinit1 = insertelement <2 x i64> %vecinit, i64 %b, i32 1
-  ret <2 x i64> %vecinit1
-}
-
-; We used to spot this as a BUILD_VECTOR implementable by dup, but assume that
-; the single value needed was of the same type as the vector. This is false if
-; the scalar corresponding to the vector type is illegal (e.g. a <4 x i16>
-; BUILD_VECTOR will have an i32 as its source). In that case, the operation is
-; not a simple "dup vD.4h, vN.h[idx]" after all, and we crashed.
-define <4 x i16> @test_build_illegal(<4 x i32> %in) {
-; CHECK-LABEL: test_build_illegal:
-; CHECK: umov.s [[WTMP:w[0-9]+]], v0[3]
-; CHECK: dup.4h v0, [[WTMP]]
-  %val = extractelement <4 x i32> %in, i32 3
-  %smallval = trunc i32 %val to i16
-  %vec = insertelement <4x i16> undef, i16 %smallval, i32 3
-
-  ret <4 x i16> %vec
-}
-
-; We used to inherit an already extract_subvectored v4i16 from
-; SelectionDAGBuilder here. We then added a DUPLANE on top of that, preventing
-; the formation of an indexed-by-7 MLS.
-define <4 x i16> @test_high_splat(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) #0 {
-; CHECK-LABEL: test_high_splat:
-; CHECK: mls.4h v0, v1, v2[7]
-entry:
-  %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-  %mul = mul <4 x i16> %shuffle, %b
-  %sub = sub <4 x i16> %a, %mul
-  ret <4 x i16> %sub
-}
diff --git a/test/CodeGen/ARM64/early-ifcvt.ll b/test/CodeGen/ARM64/early-ifcvt.ll
deleted file mode 100644
index a5c1e26..0000000
--- a/test/CodeGen/ARM64/early-ifcvt.ll
+++ /dev/null
@@ -1,423 +0,0 @@
-; RUN: llc < %s -stress-early-ifcvt | FileCheck %s
-target triple = "arm64-apple-macosx"
-
-; CHECK: mm2
-define i32 @mm2(i32* nocapture %p, i32 %n) nounwind uwtable readonly ssp {
-entry:
-  br label %do.body
-
-; CHECK: do.body
-; Loop body has no branches before the backedge.
-; CHECK-NOT: LBB
-do.body:
-  %max.0 = phi i32 [ 0, %entry ], [ %max.1, %do.cond ]
-  %min.0 = phi i32 [ 0, %entry ], [ %min.1, %do.cond ]
-  %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %do.cond ]
-  %p.addr.0 = phi i32* [ %p, %entry ], [ %incdec.ptr, %do.cond ]
-  %incdec.ptr = getelementptr inbounds i32* %p.addr.0, i64 1
-  %0 = load i32* %p.addr.0, align 4
-  %cmp = icmp sgt i32 %0, %max.0
-  br i1 %cmp, label %do.cond, label %if.else
-
-if.else:
-  %cmp1 = icmp slt i32 %0, %min.0
-  %.min.0 = select i1 %cmp1, i32 %0, i32 %min.0
-  br label %do.cond
-
-do.cond:
-  %max.1 = phi i32 [ %0, %do.body ], [ %max.0, %if.else ]
-  %min.1 = phi i32 [ %min.0, %do.body ], [ %.min.0, %if.else ]
-; CHECK: cbnz
-  %dec = add i32 %n.addr.0, -1
-  %tobool = icmp eq i32 %dec, 0
-  br i1 %tobool, label %do.end, label %do.body
-
-do.end:
-  %sub = sub nsw i32 %max.1, %min.1
-  ret i32 %sub
-}
-
-; CHECK-LABEL: fold_inc_true_32:
-; CHECK: {{subs.*wzr,|cmp}} w2, #1
-; CHECK-NEXT: csinc w0, w1, w0, eq
-; CHECK-NEXT: ret
-define i32 @fold_inc_true_32(i32 %x, i32 %y, i32 %c) nounwind ssp {
-entry:
-  %tobool = icmp eq i32 %c, 1
-  %inc = add nsw i32 %x, 1
-  br i1 %tobool, label %eq_bb, label %done
-
-eq_bb:
-  br label %done
-
-done:
-  %cond = phi i32 [ %y, %eq_bb ], [ %inc, %entry ]
-  ret i32 %cond
-}
-
-; CHECK-LABEL: fold_inc_true_64:
-; CHECK: {{subs.*xzr,|cmp}} x2, #1
-; CHECK-NEXT: csinc x0, x1, x0, eq
-; CHECK-NEXT: ret
-define i64 @fold_inc_true_64(i64 %x, i64 %y, i64 %c) nounwind ssp {
-entry:
-  %tobool = icmp eq i64 %c, 1
-  %inc = add nsw i64 %x, 1
-  br i1 %tobool, label %eq_bb, label %done
-
-eq_bb:
-  br label %done
-
-done:
-  %cond = phi i64 [ %y, %eq_bb ], [ %inc, %entry ]
-  ret i64 %cond
-}
-
-; CHECK-LABEL: fold_inc_false_32:
-; CHECK: {{subs.*wzr,|cmp}} w2, #1
-; CHECK-NEXT: csinc w0, w1, w0, ne
-; CHECK-NEXT: ret
-define i32 @fold_inc_false_32(i32 %x, i32 %y, i32 %c) nounwind ssp {
-entry:
-  %tobool = icmp eq i32 %c, 1
-  %inc = add nsw i32 %x, 1
-  br i1 %tobool, label %eq_bb, label %done
-
-eq_bb:
-  br label %done
-
-done:
-  %cond = phi i32 [ %inc, %eq_bb ], [ %y, %entry ]
-  ret i32 %cond
-}
-
-; CHECK-LABEL: fold_inc_false_64:
-; CHECK: {{subs.*xzr,|cmp}} x2, #1
-; CHECK-NEXT: csinc x0, x1, x0, ne
-; CHECK-NEXT: ret
-define i64 @fold_inc_false_64(i64 %x, i64 %y, i64 %c) nounwind ssp {
-entry:
-  %tobool = icmp eq i64 %c, 1
-  %inc = add nsw i64 %x, 1
-  br i1 %tobool, label %eq_bb, label %done
-
-eq_bb:
-  br label %done
-
-done:
-  %cond = phi i64 [ %inc, %eq_bb ], [ %y, %entry ]
-  ret i64 %cond
-}
-
-; CHECK-LABEL: fold_inv_true_32:
-; CHECK: {{subs.*wzr,|cmp}} w2, #1
-; CHECK-NEXT: csinv w0, w1, w0, eq
-; CHECK-NEXT: ret
-define i32 @fold_inv_true_32(i32 %x, i32 %y, i32 %c) nounwind ssp {
-entry:
-  %tobool = icmp eq i32 %c, 1
-  %inv = xor i32 %x, -1
-  br i1 %tobool, label %eq_bb, label %done
-
-eq_bb:
-  br label %done
-
-done:
-  %cond = phi i32 [ %y, %eq_bb ], [ %inv, %entry ]
-  ret i32 %cond
-}
-
-; CHECK-LABEL: fold_inv_true_64:
-; CHECK: {{subs.*xzr,|cmp}} x2, #1
-; CHECK-NEXT: csinv x0, x1, x0, eq
-; CHECK-NEXT: ret
-define i64 @fold_inv_true_64(i64 %x, i64 %y, i64 %c) nounwind ssp {
-entry:
-  %tobool = icmp eq i64 %c, 1
-  %inv = xor i64 %x, -1
-  br i1 %tobool, label %eq_bb, label %done
-
-eq_bb:
-  br label %done
-
-done:
-  %cond = phi i64 [ %y, %eq_bb ], [ %inv, %entry ]
-  ret i64 %cond
-}
-
-; CHECK-LABEL: fold_inv_false_32:
-; CHECK: {{subs.*wzr,|cmp}} w2, #1
-; CHECK-NEXT: csinv w0, w1, w0, ne
-; CHECK-NEXT: ret
-define i32 @fold_inv_false_32(i32 %x, i32 %y, i32 %c) nounwind ssp {
-entry:
-  %tobool = icmp eq i32 %c, 1
-  %inv = xor i32 %x, -1
-  br i1 %tobool, label %eq_bb, label %done
-
-eq_bb:
-  br label %done
-
-done:
-  %cond = phi i32 [ %inv, %eq_bb ], [ %y, %entry ]
-  ret i32 %cond
-}
-
-; CHECK-LABEL: fold_inv_false_64:
-; CHECK: {{subs.*xzr,|cmp}} x2, #1
-; CHECK-NEXT: csinv x0, x1, x0, ne
-; CHECK-NEXT: ret
-define i64 @fold_inv_false_64(i64 %x, i64 %y, i64 %c) nounwind ssp {
-entry:
-  %tobool = icmp eq i64 %c, 1
-  %inv = xor i64 %x, -1
-  br i1 %tobool, label %eq_bb, label %done
-
-eq_bb:
-  br label %done
-
-done:
-  %cond = phi i64 [ %inv, %eq_bb ], [ %y, %entry ]
-  ret i64 %cond
-}
-
-; CHECK-LABEL: fold_neg_true_32:
-; CHECK: {{subs.*wzr,|cmp}} w2, #1
-; CHECK-NEXT: csneg w0, w1, w0, eq
-; CHECK-NEXT: ret
-define i32 @fold_neg_true_32(i32 %x, i32 %y, i32 %c) nounwind ssp {
-entry:
-  %tobool = icmp eq i32 %c, 1
-  %neg = sub nsw i32 0, %x
-  br i1 %tobool, label %eq_bb, label %done
-
-eq_bb:
-  br label %done
-
-done:
-  %cond = phi i32 [ %y, %eq_bb ], [ %neg, %entry ]
-  ret i32 %cond
-}
-
-; CHECK-LABEL: fold_neg_true_64:
-; CHECK: {{subs.*xzr,|cmp}} x2, #1
-; CHECK-NEXT: csneg x0, x1, x0, eq
-; CHECK-NEXT: ret
-define i64 @fold_neg_true_64(i64 %x, i64 %y, i64 %c) nounwind ssp {
-entry:
-  %tobool = icmp eq i64 %c, 1
-  %neg = sub nsw i64 0, %x
-  br i1 %tobool, label %eq_bb, label %done
-
-eq_bb:
-  br label %done
-
-done:
-  %cond = phi i64 [ %y, %eq_bb ], [ %neg, %entry ]
-  ret i64 %cond
-}
-
-; CHECK-LABEL: fold_neg_false_32:
-; CHECK: {{subs.*wzr,|cmp}} w2, #1
-; CHECK-NEXT: csneg w0, w1, w0, ne
-; CHECK-NEXT: ret
-define i32 @fold_neg_false_32(i32 %x, i32 %y, i32 %c) nounwind ssp {
-entry:
-  %tobool = icmp eq i32 %c, 1
-  %neg = sub nsw i32 0, %x
-  br i1 %tobool, label %eq_bb, label %done
-
-eq_bb:
-  br label %done
-
-done:
-  %cond = phi i32 [ %neg, %eq_bb ], [ %y, %entry ]
-  ret i32 %cond
-}
-
-; CHECK-LABEL: fold_neg_false_64:
-; CHECK: {{subs.*xzr,|cmp}} x2, #1
-; CHECK-NEXT: csneg x0, x1, x0, ne
-; CHECK-NEXT: ret
-define i64 @fold_neg_false_64(i64 %x, i64 %y, i64 %c) nounwind ssp {
-entry:
-  %tobool = icmp eq i64 %c, 1
-  %neg = sub nsw i64 0, %x
-  br i1 %tobool, label %eq_bb, label %done
-
-eq_bb:
-  br label %done
-
-done:
-  %cond = phi i64 [ %neg, %eq_bb ], [ %y, %entry ]
-  ret i64 %cond
-}
-
-; CHECK: cbnz_32
-; CHECK: {{subs.*wzr,|cmp}} w2, #0
-; CHECK-NEXT: csel w0, w1, w0, ne
-; CHECK-NEXT: ret
-define i32 @cbnz_32(i32 %x, i32 %y, i32 %c) nounwind ssp {
-entry:
-  %tobool = icmp eq i32 %c, 0
-  br i1 %tobool, label %eq_bb, label %done
-
-eq_bb:
-  br label %done
-
-done:
-  %cond = phi i32 [ %x, %eq_bb ], [ %y, %entry ]
-  ret i32 %cond
-}
-
-; CHECK: cbnz_64
-; CHECK: {{subs.*xzr,|cmp}} x2, #0
-; CHECK-NEXT: csel x0, x1, x0, ne
-; CHECK-NEXT: ret
-define i64 @cbnz_64(i64 %x, i64 %y, i64 %c) nounwind ssp {
-entry:
-  %tobool = icmp eq i64 %c, 0
-  br i1 %tobool, label %eq_bb, label %done
-
-eq_bb:
-  br label %done
-
-done:
-  %cond = phi i64 [ %x, %eq_bb ], [ %y, %entry ]
-  ret i64 %cond
-}
-
-; CHECK: cbz_32
-; CHECK: {{subs.*wzr,|cmp}} w2, #0
-; CHECK-NEXT: csel w0, w1, w0, eq
-; CHECK-NEXT: ret
-define i32 @cbz_32(i32 %x, i32 %y, i32 %c) nounwind ssp {
-entry:
-  %tobool = icmp ne i32 %c, 0
-  br i1 %tobool, label %ne_bb, label %done
-
-ne_bb:
-  br label %done
-
-done:
-  %cond = phi i32 [ %x, %ne_bb ], [ %y, %entry ]
-  ret i32 %cond
-}
-
-; CHECK: cbz_64
-; CHECK: {{subs.*xzr,|cmp}} x2, #0
-; CHECK-NEXT: csel x0, x1, x0, eq
-; CHECK-NEXT: ret
-define i64 @cbz_64(i64 %x, i64 %y, i64 %c) nounwind ssp {
-entry:
-  %tobool = icmp ne i64 %c, 0
-  br i1 %tobool, label %ne_bb, label %done
-
-ne_bb:
-  br label %done
-
-done:
-  %cond = phi i64 [ %x, %ne_bb ], [ %y, %entry ]
-  ret i64 %cond
-}
-
-; CHECK: tbnz_32
-; CHECK: {{ands.*xzr,|tst}} x2, #0x80
-; CHECK-NEXT: csel w0, w1, w0, ne
-; CHECK-NEXT: ret
-define i32 @tbnz_32(i32 %x, i32 %y, i32 %c) nounwind ssp {
-entry:
-  %mask = and i32 %c, 128
-  %tobool = icmp eq i32 %mask, 0
-  br i1 %tobool, label %eq_bb, label %done
-
-eq_bb:
-  br label %done
-
-done:
-  %cond = phi i32 [ %x, %eq_bb ], [ %y, %entry ]
-  ret i32 %cond
-}
-
-; CHECK: tbnz_64
-; CHECK: {{ands.*xzr,|tst}} x2, #0x8000000000000000
-; CHECK-NEXT: csel x0, x1, x0, ne
-; CHECK-NEXT: ret
-define i64 @tbnz_64(i64 %x, i64 %y, i64 %c) nounwind ssp {
-entry:
-  %mask = and i64 %c, 9223372036854775808
-  %tobool = icmp eq i64 %mask, 0
-  br i1 %tobool, label %eq_bb, label %done
-
-eq_bb:
-  br label %done
-
-done:
-  %cond = phi i64 [ %x, %eq_bb ], [ %y, %entry ]
-  ret i64 %cond
-}
-
-; CHECK: tbz_32
-; CHECK: {{ands.*xzr,|tst}} x2, #0x80
-; CHECK-NEXT: csel w0, w1, w0, eq
-; CHECK-NEXT: ret
-define i32 @tbz_32(i32 %x, i32 %y, i32 %c) nounwind ssp {
-entry:
-  %mask = and i32 %c, 128
-  %tobool = icmp ne i32 %mask, 0
-  br i1 %tobool, label %ne_bb, label %done
-
-ne_bb:
-  br label %done
-
-done:
-  %cond = phi i32 [ %x, %ne_bb ], [ %y, %entry ]
-  ret i32 %cond
-}
-
-; CHECK: tbz_64
-; CHECK: {{ands.*xzr,|tst}} x2, #0x8000000000000000
-; CHECK-NEXT: csel x0, x1, x0, eq
-; CHECK-NEXT: ret
-define i64 @tbz_64(i64 %x, i64 %y, i64 %c) nounwind ssp {
-entry:
-  %mask = and i64 %c, 9223372036854775808
-  %tobool = icmp ne i64 %mask, 0
-  br i1 %tobool, label %ne_bb, label %done
-
-ne_bb:
-  br label %done
-
-done:
-  %cond = phi i64 [ %x, %ne_bb ], [ %y, %entry ]
-  ret i64 %cond
-}
-
-; This function from 175.vpr folds an ADDWri into a CSINC.
-; Remember to clear the kill flag on the ADDWri.
-define i32 @get_ytrack_to_xtracks() nounwind ssp {
-entry:
-  br label %for.body
-
-for.body:
-  %x0 = load i32* undef, align 4
-  br i1 undef, label %if.then.i146, label %is_sbox.exit155
-
-if.then.i146:
-  %add8.i143 = add nsw i32 0, %x0
-  %rem.i144 = srem i32 %add8.i143, %x0
-  %add9.i145 = add i32 %rem.i144, 1
-  br label %is_sbox.exit155
-
-is_sbox.exit155:                                  ; preds = %if.then.i146, %for.body
-  %seg_offset.0.i151 = phi i32 [ %add9.i145, %if.then.i146 ], [ undef, %for.body ]
-  %idxprom15.i152 = sext i32 %seg_offset.0.i151 to i64
-  %arrayidx18.i154 = getelementptr inbounds i32* null, i64 %idxprom15.i152
-  %x1 = load i32* %arrayidx18.i154, align 4
-  br i1 undef, label %for.body51, label %for.body
-
-for.body51:                                       ; preds = %is_sbox.exit155
-  call fastcc void @get_switch_type(i32 %x1, i32 undef, i16 signext undef, i16 signext undef, i16* undef)
-  unreachable
-}
-declare fastcc void @get_switch_type(i32, i32, i16 signext, i16 signext, i16* nocapture) nounwind ssp
diff --git a/test/CodeGen/ARM64/elf-globals.ll b/test/CodeGen/ARM64/elf-globals.ll
deleted file mode 100644
index 598c96a..0000000
--- a/test/CodeGen/ARM64/elf-globals.ll
+++ /dev/null
@@ -1,115 +0,0 @@
-; RUN: llc -mtriple=arm64-linux-gnu -o - %s | FileCheck %s
-; RUN: llc -mtriple=arm64-linux-gnu -o - %s -O0 | FileCheck %s --check-prefix=CHECK-FAST
-; RUN: llc -mtriple=arm64-linux-gnu -relocation-model=pic -o - %s | FileCheck %s --check-prefix=CHECK-PIC
-; RUN: llc -mtriple=arm64-linux-gnu -O0 -relocation-model=pic -o - %s | FileCheck %s --check-prefix=CHECK-FAST-PIC
-
-@var8 = external global i8, align 1
-@var16 = external global i16, align 2
-@var32 = external global i32, align 4
-@var64 = external global i64, align 8
-
-define i8 @test_i8(i8 %new) {
-  %val = load i8* @var8, align 1
-  store i8 %new, i8* @var8
-  ret i8 %val
-; CHECK-LABEL: test_i8:
-; CHECK: adrp x[[HIREG:[0-9]+]], var8
-; CHECK: ldrb {{w[0-9]+}}, [x[[HIREG]], :lo12:var8]
-; CHECK: strb {{w[0-9]+}}, [x[[HIREG]], :lo12:var8]
-
-; CHECK-PIC-LABEL: test_i8:
-; CHECK-PIC: adrp x[[HIREG:[0-9]+]], :got:var8
-; CHECK-PIC: ldr x[[VAR_ADDR:[0-9]+]], [x[[HIREG]], :got_lo12:var8]
-; CHECK-PIC: ldrb {{w[0-9]+}}, [x[[VAR_ADDR]]]
-
-; CHECK-FAST: adrp x[[HIREG:[0-9]+]], var8
-; CHECK-FAST: ldrb {{w[0-9]+}}, [x[[HIREG]], :lo12:var8]
-
-; CHECK-FAST-PIC: adrp x[[HIREG:[0-9]+]], :got:var8
-; CHECK-FAST-PIC: ldr x[[VARADDR:[0-9]+]], [x[[HIREG]], :got_lo12:var8]
-; CHECK-FAST-PIC: ldr {{w[0-9]+}}, [x[[VARADDR]]]
-}
-
-define i16 @test_i16(i16 %new) {
-  %val = load i16* @var16, align 2
-  store i16 %new, i16* @var16
-  ret i16 %val
-; CHECK-LABEL: test_i16:
-; CHECK: adrp x[[HIREG:[0-9]+]], var16
-; CHECK: ldrh {{w[0-9]+}}, [x[[HIREG]], :lo12:var16]
-; CHECK: strh {{w[0-9]+}}, [x[[HIREG]], :lo12:var16]
-
-; CHECK-FAST: adrp x[[HIREG:[0-9]+]], var16
-; CHECK-FAST: ldrh {{w[0-9]+}}, [x[[HIREG]], :lo12:var16]
-}
-
-define i32 @test_i32(i32 %new) {
-  %val = load i32* @var32, align 4
-  store i32 %new, i32* @var32
-  ret i32 %val
-; CHECK-LABEL: test_i32:
-; CHECK: adrp x[[HIREG:[0-9]+]], var32
-; CHECK: ldr {{w[0-9]+}}, [x[[HIREG]], :lo12:var32]
-; CHECK: str {{w[0-9]+}}, [x[[HIREG]], :lo12:var32]
-
-; CHECK-FAST: adrp x[[HIREG:[0-9]+]], var32
-; CHECK-FAST: add {{x[0-9]+}}, x[[HIREG]], :lo12:var32
-}
-
-define i64 @test_i64(i64 %new) {
-  %val = load i64* @var64, align 8
-  store i64 %new, i64* @var64
-  ret i64 %val
-; CHECK-LABEL: test_i64:
-; CHECK: adrp x[[HIREG:[0-9]+]], var64
-; CHECK: ldr {{x[0-9]+}}, [x[[HIREG]], :lo12:var64]
-; CHECK: str {{x[0-9]+}}, [x[[HIREG]], :lo12:var64]
-
-; CHECK-FAST: adrp x[[HIREG:[0-9]+]], var64
-; CHECK-FAST: add {{x[0-9]+}}, x[[HIREG]], :lo12:var64
-}
-
-define i64* @test_addr() {
-  ret i64* @var64
-; CHECK-LABEL: test_addr:
-; CHECK: adrp [[HIREG:x[0-9]+]], var64
-; CHECK: add x0, [[HIREG]], :lo12:var64
-
-; CHECK-FAST: adrp [[HIREG:x[0-9]+]], var64
-; CHECK-FAST: add x0, [[HIREG]], :lo12:var64
-}
-
-@hiddenvar = hidden global i32 0, align 4
-@protectedvar = protected global i32 0, align 4
-
-define i32 @test_vis() {
-  %lhs = load i32* @hiddenvar, align 4
-  %rhs = load i32* @protectedvar, align 4
-  %ret = add i32 %lhs, %rhs
-  ret i32 %ret
-; CHECK-PIC: adrp {{x[0-9]+}}, hiddenvar
-; CHECK-PIC: ldr {{w[0-9]+}}, [{{x[0-9]+}}, :lo12:hiddenvar]
-; CHECK-PIC: adrp {{x[0-9]+}}, protectedvar
-; CHECK-PIC: ldr {{w[0-9]+}}, [{{x[0-9]+}}, :lo12:protectedvar]
-}
-
-@var_default = external global [2 x i32]
-
-define i32 @test_default_align() {
-  %addr = getelementptr [2 x i32]* @var_default, i32 0, i32 0
-  %val = load i32* %addr
-  ret i32 %val
-; CHECK-LABEL: test_default_align:
-; CHECK: adrp x[[HIREG:[0-9]+]], var_default
-; CHECK: ldr w0, [x[[HIREG]], :lo12:var_default]
-}
-
-define i64 @test_default_unaligned() {
-  %addr = bitcast [2 x i32]* @var_default to i64*
-  %val = load i64* %addr
-  ret i64 %val
-; CHECK-LABEL: test_default_unaligned:
-; CHECK: adrp [[HIREG:x[0-9]+]], var_default
-; CHECK: add x[[ADDR:[0-9]+]], [[HIREG]], :lo12:var_default
-; CHECK: ldr x0, [x[[ADDR]]]
-}
diff --git a/test/CodeGen/ARM64/ext.ll b/test/CodeGen/ARM64/ext.ll
deleted file mode 100644
index 57d6e0c..0000000
--- a/test/CodeGen/ARM64/ext.ll
+++ /dev/null
@@ -1,101 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-define <8 x i8> @test_vextd(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: test_vextd:
-;CHECK: {{ext.8b.*#3}}
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
-	ret <8 x i8> %tmp3
-}
-
-define <8 x i8> @test_vextRd(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: test_vextRd:
-;CHECK: {{ext.8b.*#5}}
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4>
-	ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @test_vextq(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: test_vextq:
-;CHECK: {{ext.16b.*3}}
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
-	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
-	ret <16 x i8> %tmp3
-}
-
-define <16 x i8> @test_vextRq(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: test_vextRq:
-;CHECK: {{ext.16b.*7}}
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
-	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
-	ret <16 x i8> %tmp3
-}
-
-define <4 x i16> @test_vextd16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: test_vextd16:
-;CHECK: {{ext.8b.*#6}}
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-	ret <4 x i16> %tmp3
-}
-
-define <4 x i32> @test_vextq32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: test_vextq32:
-;CHECK: {{ext.16b.*12}}
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
-	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-	ret <4 x i32> %tmp3
-}
-
-; Undef shuffle indices should not prevent matching to VEXT:
-
-define <8 x i8> @test_vextd_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: test_vextd_undef:
-;CHECK: {{ext.8b.*}}
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 3, i32 undef, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10>
-	ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @test_vextRq_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: test_vextRq_undef:
-;CHECK: {{ext.16b.*#7}}
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
-	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 23, i32 24, i32 25, i32 26, i32 undef, i32 undef, i32 29, i32 30, i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 undef, i32 6>
-	ret <16 x i8> %tmp3
-}
-
-; Tests for ReconstructShuffle function. Indices have to be carefully
-; chosen to reach lowering phase as a BUILD_VECTOR.
-
-; One vector needs vext, the other can be handled by extract_subvector
-; Also checks interleaving of sources is handled correctly.
-; Essence: a vext is used on %A and something saner than stack load/store for final result.
-define <4 x i16> @test_interleaved(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: test_interleaved:
-;CHECK: ext.8b
-;CHECK: zip1.4h
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i16>* %B
-        %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <4 x i32> <i32 3, i32 8, i32 5, i32 9>
-        ret <4 x i16> %tmp3
-}
-
-; An undef in the shuffle list should still be optimizable
-define <4 x i16> @test_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: test_undef:
-;CHECK: zip1.4h
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i16>* %B
-        %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <4 x i32> <i32 undef, i32 8, i32 5, i32 9>
-        ret <4 x i16> %tmp3
-}
diff --git a/test/CodeGen/ARM64/extend-int-to-fp.ll b/test/CodeGen/ARM64/extend-int-to-fp.ll
deleted file mode 100644
index 599a697..0000000
--- a/test/CodeGen/ARM64/extend-int-to-fp.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-define <4 x float> @foo(<4 x i16> %a) nounwind {
-; CHECK-LABEL: foo:
-; CHECK: ushll.4s	v0, v0, #0
-; CHECK-NEXT: ucvtf.4s	v0, v0
-; CHECK-NEXT: ret
-  %vcvt.i = uitofp <4 x i16> %a to <4 x float>
-  ret <4 x float> %vcvt.i
-}
-
-define <4 x float> @bar(<4 x i16> %a) nounwind {
-; CHECK-LABEL: bar:
-; CHECK: sshll.4s	v0, v0, #0
-; CHECK-NEXT: scvtf.4s	v0, v0
-; CHECK-NEXT: ret
-  %vcvt.i = sitofp <4 x i16> %a to <4 x float>
-  ret <4 x float> %vcvt.i
-}
diff --git a/test/CodeGen/ARM64/extend.ll b/test/CodeGen/ARM64/extend.ll
deleted file mode 100644
index 4d20543..0000000
--- a/test/CodeGen/ARM64/extend.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llc < %s -mtriple=arm64-apple-ios | FileCheck %s
-@array = external global [0 x i32]
-
-define i64 @foo(i32 %i) {
-; CHECK: foo
-; CHECK:  adrp  x[[REG:[0-9]+]], _array@GOTPAGE
-; CHECK:  ldr x[[REG1:[0-9]+]], [x[[REG]], _array@GOTPAGEOFF]
-; CHECK:  ldrsw x0, [x[[REG1]], x0, sxtw #2]
-; CHECK:  ret
-  %idxprom = sext i32 %i to i64
-  %arrayidx = getelementptr inbounds [0 x i32]* @array, i64 0, i64 %idxprom
-  %tmp1 = load i32* %arrayidx, align 4
-  %conv = sext i32 %tmp1 to i64
-  ret i64 %conv
-}
diff --git a/test/CodeGen/ARM64/extract.ll b/test/CodeGen/ARM64/extract.ll
deleted file mode 100644
index 119751c..0000000
--- a/test/CodeGen/ARM64/extract.ll
+++ /dev/null
@@ -1,58 +0,0 @@
-; RUN: llc -arm64-extr-generation=true -verify-machineinstrs < %s \
-; RUN: -march=arm64 | FileCheck %s
-
-define i64 @ror_i64(i64 %in) {
-; CHECK-LABEL: ror_i64:
-    %left = shl i64 %in, 19
-    %right = lshr i64 %in, 45
-    %val5 = or i64 %left, %right
-; CHECK: extr {{x[0-9]+}}, x0, x0, #45
-    ret i64 %val5
-}
-
-define i32 @ror_i32(i32 %in) {
-; CHECK-LABEL: ror_i32:
-    %left = shl i32 %in, 9
-    %right = lshr i32 %in, 23
-    %val5 = or i32 %left, %right
-; CHECK: extr {{w[0-9]+}}, w0, w0, #23
-    ret i32 %val5
-}
-
-define i32 @extr_i32(i32 %lhs, i32 %rhs) {
-; CHECK-LABEL: extr_i32:
-  %left = shl i32 %lhs, 6
-  %right = lshr i32 %rhs, 26
-  %val = or i32 %left, %right
-  ; Order of lhs and rhs matters here. Regalloc would have to be very odd to use
-  ; something other than w0 and w1.
-; CHECK: extr {{w[0-9]+}}, w0, w1, #26
-
-  ret i32 %val
-}
-
-define i64 @extr_i64(i64 %lhs, i64 %rhs) {
-; CHECK-LABEL: extr_i64:
-  %right = lshr i64 %rhs, 40
-  %left = shl i64 %lhs, 24
-  %val = or i64 %right, %left
-  ; Order of lhs and rhs matters here. Regalloc would have to be very odd to use
-  ; something other than w0 and w1.
-; CHECK: extr {{x[0-9]+}}, x0, x1, #40
-
-  ret i64 %val
-}
-
-; Regression test: a bad experimental pattern crept into git which optimised
-; this pattern to a single EXTR.
-define i32 @extr_regress(i32 %a, i32 %b) {
-; CHECK-LABEL: extr_regress:
-
-    %sh1 = shl i32 %a, 14
-    %sh2 = lshr i32 %b, 14
-    %val = or i32 %sh2, %sh1
-; CHECK-NOT: extr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, #{{[0-9]+}}
-
-    ret i32 %val
-; CHECK: ret
-}
diff --git a/test/CodeGen/ARM64/extract_subvector.ll b/test/CodeGen/ARM64/extract_subvector.ll
deleted file mode 100644
index 20c05fb..0000000
--- a/test/CodeGen/ARM64/extract_subvector.ll
+++ /dev/null
@@ -1,51 +0,0 @@
-; RUN: llc -march=arm64 -arm64-neon-syntax=apple < %s | FileCheck %s
-
-; Extract of an upper half of a vector is an "ext.16b v0, v0, v0, #8" insn.
-
-define <8 x i8> @v8i8(<16 x i8> %a) nounwind {
-; CHECK: v8i8
-; CHECK: ext.16b v0, v0, v0, #8
-; CHECK: ret
-  %ret = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32>  <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  ret <8 x i8> %ret
-}
-
-define <4 x i16> @v4i16(<8 x i16> %a) nounwind {
-; CHECK-LABEL: v4i16:
-; CHECK: ext.16b v0, v0, v0, #8
-; CHECK: ret
-  %ret = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32>  <i32 4, i32 5, i32 6, i32 7>
-  ret <4 x i16> %ret
-}
-
-define <2 x i32> @v2i32(<4 x i32> %a) nounwind {
-; CHECK-LABEL: v2i32:
-; CHECK: ext.16b v0, v0, v0, #8
-; CHECK: ret
-  %ret = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32>  <i32 2, i32 3>
-  ret <2 x i32> %ret
-}
-
-define <1 x i64> @v1i64(<2 x i64> %a) nounwind {
-; CHECK-LABEL: v1i64:
-; CHECK: ext.16b v0, v0, v0, #8
-; CHECK: ret
-  %ret = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32>  <i32 1>
-  ret <1 x i64> %ret
-}
-
-define <2 x float> @v2f32(<4 x float> %a) nounwind {
-; CHECK-LABEL: v2f32:
-; CHECK: ext.16b v0, v0, v0, #8
-; CHECK: ret
-  %ret = shufflevector <4 x float> %a, <4 x float> %a, <2 x i32>  <i32 2, i32 3>
-  ret <2 x float> %ret
-}
-
-define <1 x double> @v1f64(<2 x double> %a) nounwind {
-; CHECK-LABEL: v1f64:
-; CHECK: ext.16b v0, v0, v0, #8
-; CHECK: ret
-  %ret = shufflevector <2 x double> %a, <2 x double> %a, <1 x i32>  <i32 1>
-  ret <1 x double> %ret
-}
diff --git a/test/CodeGen/ARM64/fast-isel-addr-offset.ll b/test/CodeGen/ARM64/fast-isel-addr-offset.ll
deleted file mode 100644
index a4326dc..0000000
--- a/test/CodeGen/ARM64/fast-isel-addr-offset.ll
+++ /dev/null
@@ -1,47 +0,0 @@
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin | FileCheck %s
-
-@sortlist = common global [5001 x i32] zeroinitializer, align 16
-@sortlist2 = common global [5001 x i64] zeroinitializer, align 16
-
-; Load an address with an offset larget then LDR imm can handle
-define i32 @foo() nounwind {
-entry:
-; CHECK: @foo
-; CHECK: adrp x[[REG:[0-9]+]], _sortlist@GOTPAGE
-; CHECK: ldr x[[REG1:[0-9]+]], [x[[REG]], _sortlist@GOTPAGEOFF]
-; CHECK: movz x[[REG2:[0-9]+]], #20000
-; CHECK: add x[[REG3:[0-9]+]], x[[REG1]], x[[REG2]]
-; CHECK: ldr w0, [x[[REG3]]]
-; CHECK: ret
-  %0 = load i32* getelementptr inbounds ([5001 x i32]* @sortlist, i32 0, i64 5000), align 4
-  ret i32 %0
-}
-
-define i64 @foo2() nounwind {
-entry:
-; CHECK: @foo2
-; CHECK: adrp x[[REG:[0-9]+]], _sortlist2@GOTPAGE
-; CHECK: ldr x[[REG1:[0-9]+]], [x[[REG]], _sortlist2@GOTPAGEOFF]
-; CHECK: movz x[[REG2:[0-9]+]], #40000
-; CHECK: add x[[REG3:[0-9]+]], x[[REG1]], x[[REG2]]
-; CHECK: ldr x0, [x[[REG3]]]
-; CHECK: ret
-  %0 = load i64* getelementptr inbounds ([5001 x i64]* @sortlist2, i32 0, i64 5000), align 4
-  ret i64 %0
-}
-
-; Load an address with a ridiculously large offset.
-; rdar://12505553
-@pd2 = common global i8* null, align 8
-
-define signext i8 @foo3() nounwind ssp {
-entry:
-; CHECK: @foo3
-; CHECK: movz x[[REG:[0-9]+]], #2874, lsl #32
-; CHECK: movk x[[REG]], #29646, lsl #16
-; CHECK: movk x[[REG]], #12274
-  %0 = load i8** @pd2, align 8
-  %arrayidx = getelementptr inbounds i8* %0, i64 12345678901234
-  %1 = load i8* %arrayidx, align 1
-  ret i8 %1
-}
diff --git a/test/CodeGen/ARM64/fast-isel-alloca.ll b/test/CodeGen/ARM64/fast-isel-alloca.ll
deleted file mode 100644
index 8bbee16..0000000
--- a/test/CodeGen/ARM64/fast-isel-alloca.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; This test should cause the TargetMaterializeAlloca to be invoked
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin | FileCheck %s
-
-%struct.S1Ty = type { i64 }
-%struct.S2Ty = type { %struct.S1Ty, %struct.S1Ty }
-
-define void @takeS1(%struct.S1Ty* %V) nounwind {
-entry:
-  %V.addr = alloca %struct.S1Ty*, align 8
-  store %struct.S1Ty* %V, %struct.S1Ty** %V.addr, align 8
-  ret void
-}
-
-define void @main() nounwind {
-entry:
-; CHECK: main
-; CHECK: mov x[[REG:[0-9]+]], sp
-; CHECK-NEXT: orr x[[REG1:[0-9]+]], xzr, #0x8
-; CHECK-NEXT: add x0, x[[REG]], x[[REG1]]
-  %E = alloca %struct.S2Ty, align 4
-  %B = getelementptr inbounds %struct.S2Ty* %E, i32 0, i32 1
-  call void @takeS1(%struct.S1Ty* %B)
-  ret void
-}
diff --git a/test/CodeGen/ARM64/fast-isel-br.ll b/test/CodeGen/ARM64/fast-isel-br.ll
deleted file mode 100644
index 8fd32fd..0000000
--- a/test/CodeGen/ARM64/fast-isel-br.ll
+++ /dev/null
@@ -1,155 +0,0 @@
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin | FileCheck %s
-
-define void @branch1() nounwind uwtable ssp {
-  %x = alloca i32, align 4
-  store i32 0, i32* %x, align 4
-  %1 = load i32* %x, align 4
-  %2 = icmp ne i32 %1, 0
-  br i1 %2, label %3, label %4
-
-; <label>:3                                       ; preds = %0
-  br label %4
-
-; <label>:4                                       ; preds = %3, %0
-  ret void
-}
-
-define void @branch2() nounwind uwtable ssp {
-  %1 = alloca i32, align 4
-  %x = alloca i32, align 4
-  %y = alloca i32, align 4
-  %z = alloca i32, align 4
-  store i32 0, i32* %1
-  store i32 1, i32* %y, align 4
-  store i32 1, i32* %x, align 4
-  store i32 0, i32* %z, align 4
-  %2 = load i32* %x, align 4
-  %3 = icmp ne i32 %2, 0
-  br i1 %3, label %4, label %5
-
-; <label>:4                                       ; preds = %0
-  store i32 0, i32* %1
-  br label %14
-
-; <label>:5                                       ; preds = %0
-  %6 = load i32* %y, align 4
-  %7 = icmp ne i32 %6, 0
-  br i1 %7, label %8, label %13
-
-; <label>:8                                       ; preds = %5
-  %9 = load i32* %z, align 4
-  %10 = icmp ne i32 %9, 0
-  br i1 %10, label %11, label %12
-
-; <label>:11                                      ; preds = %8
-  store i32 1, i32* %1
-  br label %14
-
-; <label>:12                                      ; preds = %8
-  store i32 0, i32* %1
-  br label %14
-
-; <label>:13                                      ; preds = %5
-  br label %14
-
-; <label>:14                                      ; preds = %4, %11, %12, %13
-  %15 = load i32* %1
-  ret void
-}
-
-define void @true_() nounwind uwtable ssp {
-; CHECK: @true_
-; CHECK: b LBB2_1
-  br i1 true, label %1, label %2
-
-; <label>:1
-; CHECK: LBB2_1
-  br label %2
-
-; <label>:2
-  ret void
-}
-
-define void @false_() nounwind uwtable ssp {
-; CHECK: @false_
-; CHECK: b LBB3_2
-  br i1 false, label %1, label %2
-
-; <label>:1
-  br label %2
-
-; <label>:2
-; CHECK: LBB3_2
-  ret void
-}
-
-define zeroext i8 @trunc_(i8 zeroext %a, i16 zeroext %b, i32 %c, i64 %d) {
-entry:
-  %a.addr = alloca i8, align 1
-  %b.addr = alloca i16, align 2
-  %c.addr = alloca i32, align 4
-  %d.addr = alloca i64, align 8
-  store i8 %a, i8* %a.addr, align 1
-  store i16 %b, i16* %b.addr, align 2
-  store i32 %c, i32* %c.addr, align 4
-  store i64 %d, i64* %d.addr, align 8
-  %0 = load i16* %b.addr, align 2
-; CHECK: and w0, w0, #0x1
-; CHECK: subs w0, w0, #0
-; CHECK: b.eq LBB4_2
-  %conv = trunc i16 %0 to i1
-  br i1 %conv, label %if.then, label %if.end
-
-if.then:                                          ; preds = %entry
-  call void @foo1()
-  br label %if.end
-
-if.end:                                           ; preds = %if.then, %entry
-  %1 = load i32* %c.addr, align 4
-; CHECK: and w[[REG:[0-9]+]], w{{[0-9]+}}, #0x1
-; CHECK: subs w{{[0-9]+}}, w[[REG]], #0
-; CHECK: b.eq LBB4_4
-  %conv1 = trunc i32 %1 to i1
-  br i1 %conv1, label %if.then3, label %if.end4
-
-if.then3:                                         ; preds = %if.end
-  call void @foo1()
-  br label %if.end4
-
-if.end4:                                          ; preds = %if.then3, %if.end
-  %2 = load i64* %d.addr, align 8
-; CHECK: subs w{{[0-9]+}}, w{{[0-9]+}}, #0
-; CHECK: b.eq LBB4_6
-  %conv5 = trunc i64 %2 to i1
-  br i1 %conv5, label %if.then7, label %if.end8
-
-if.then7:                                         ; preds = %if.end4
-  call void @foo1()
-  br label %if.end8
-
-if.end8:                                          ; preds = %if.then7, %if.end4
-  %3 = load i8* %a.addr, align 1
-  ret i8 %3
-}
-
-declare void @foo1()
-
-; rdar://15174028
-define i32 @trunc64(i64 %foo) nounwind {
-; CHECK: trunc64
-; CHECK: orr  [[REG:x[0-9]+]], xzr, #0x1
-; CHECK: and  [[REG2:x[0-9]+]], x0, [[REG]]
-; CHECK: mov  x[[REG3:[0-9]+]], [[REG2]]
-; CHECK: and  [[REG4:w[0-9]+]], w[[REG3]], #0x1
-; CHECK: subs {{w[0-9]+}}, [[REG4]], #0
-; CHECK: b.eq LBB5_2
-  %a = and i64 %foo, 1
-  %b = trunc i64 %a to i1
-  br i1 %b, label %if.then, label %if.else
-
-if.then:
-  ret i32 1
-
-if.else:
-  ret i32 0
-}
diff --git a/test/CodeGen/ARM64/fast-isel-call.ll b/test/CodeGen/ARM64/fast-isel-call.ll
deleted file mode 100644
index be0ca68..0000000
--- a/test/CodeGen/ARM64/fast-isel-call.ll
+++ /dev/null
@@ -1,91 +0,0 @@
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin | FileCheck %s
-
-define void @call0() nounwind {
-entry:
-  ret void
-}
-
-define void @foo0() nounwind {
-entry:
-; CHECK: foo0
-; CHECK: bl _call0
-  call void @call0()
-  ret void
-}
-
-define i32 @call1(i32 %a) nounwind {
-entry:
-  %a.addr = alloca i32, align 4
-  store i32 %a, i32* %a.addr, align 4
-  %tmp = load i32* %a.addr, align 4
-  ret i32 %tmp
-}
-
-define i32 @foo1(i32 %a) nounwind {
-entry:
-; CHECK: foo1
-; CHECK: stur w0, [fp, #-4]
-; CHECK-NEXT: ldur w0, [fp, #-4]
-; CHECK-NEXT: bl _call1
-  %a.addr = alloca i32, align 4
-  store i32 %a, i32* %a.addr, align 4
-  %tmp = load i32* %a.addr, align 4
-  %call = call i32 @call1(i32 %tmp)
-  ret i32 %call
-}
-
-define i32 @sext_(i8 %a, i16 %b) nounwind {
-entry:
-; CHECK: @sext_
-; CHECK: sxtb w0, w0
-; CHECK: sxth w1, w1
-; CHECK: bl _foo_sext_
-  call void @foo_sext_(i8 signext %a, i16 signext %b)
-  ret i32 0
-}
-
-declare void @foo_sext_(i8 %a, i16 %b)
-
-define i32 @zext_(i8 %a, i16 %b) nounwind {
-entry:
-; CHECK: @zext_
-; CHECK: uxtb w0, w0
-; CHECK: uxth w1, w1
-  call void @foo_zext_(i8 zeroext %a, i16 zeroext %b)
-  ret i32 0
-}
-
-declare void @foo_zext_(i8 %a, i16 %b)
-
-define i32 @t1(i32 %argc, i8** nocapture %argv) {
-entry:
-; CHECK: @t1
-; The last parameter will be passed on stack via i8.
-; CHECK: strb w{{[0-9]+}}, [sp]
-; CHECK-NEXT: bl _bar
-  %call = call i32 @bar(i8 zeroext 0, i8 zeroext -8, i8 zeroext -69, i8 zeroext 28, i8 zeroext 40, i8 zeroext -70, i8 zeroext 28, i8 zeroext 39, i8 zeroext -41)
-  ret i32 0
-}
-
-declare i32 @bar(i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext)
-
-; Test materialization of integers.  Target-independent selector handles this.
-define i32 @t2() {
-entry:
-; CHECK: @t2
-; CHECK: movz x0, #0
-; CHECK: orr w1, wzr, #0xfffffff8
-; CHECK: orr w[[REG:[0-9]+]], wzr, #0x3ff
-; CHECK: orr w[[REG2:[0-9]+]], wzr, #0x2
-; CHECK: movz w[[REG3:[0-9]+]], #0
-; CHECK: orr w[[REG4:[0-9]+]], wzr, #0x1
-; CHECK: uxth w2, w[[REG]]
-; CHECK: sxtb w3, w[[REG2]]
-; CHECK: and w4, w[[REG3]], #0x1
-; CHECK: and w5, w[[REG4]], #0x1
-; CHECK: bl	_func2
-  %call = call i32 @func2(i64 zeroext 0, i32 signext -8, i16 zeroext 1023, i8 signext -254, i1 zeroext 0, i1 zeroext 1)
-  ret i32 0
-}
-
-declare i32 @func2(i64 zeroext, i32 signext, i16 zeroext, i8 signext, i1 zeroext, i1 zeroext)
diff --git a/test/CodeGen/ARM64/fast-isel-conversion.ll b/test/CodeGen/ARM64/fast-isel-conversion.ll
deleted file mode 100644
index 4e62e33..0000000
--- a/test/CodeGen/ARM64/fast-isel-conversion.ll
+++ /dev/null
@@ -1,416 +0,0 @@
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin | FileCheck %s
-
-;; Test various conversions.
-define zeroext i32 @trunc_(i8 zeroext %a, i16 zeroext %b, i32 %c, i64 %d) nounwind ssp {
-entry:
-; CHECK: trunc_
-; CHECK: sub sp, sp, #16
-; CHECK: strb w0, [sp, #15]
-; CHECK: strh w1, [sp, #12]
-; CHECK: str w2, [sp, #8]
-; CHECK: str x3, [sp]
-; CHECK: ldr x3, [sp]
-; CHECK: mov x0, x3
-; CHECK: str w0, [sp, #8]
-; CHECK: ldr w0, [sp, #8]
-; CHECK: strh w0, [sp, #12]
-; CHECK: ldrh w0, [sp, #12]
-; CHECK: strb w0, [sp, #15]
-; CHECK: ldrb w0, [sp, #15]
-; CHECK: uxtb w0, w0
-; CHECK: add sp, sp, #16
-; CHECK: ret
-  %a.addr = alloca i8, align 1
-  %b.addr = alloca i16, align 2
-  %c.addr = alloca i32, align 4
-  %d.addr = alloca i64, align 8
-  store i8 %a, i8* %a.addr, align 1
-  store i16 %b, i16* %b.addr, align 2
-  store i32 %c, i32* %c.addr, align 4
-  store i64 %d, i64* %d.addr, align 8
-  %tmp = load i64* %d.addr, align 8
-  %conv = trunc i64 %tmp to i32
-  store i32 %conv, i32* %c.addr, align 4
-  %tmp1 = load i32* %c.addr, align 4
-  %conv2 = trunc i32 %tmp1 to i16
-  store i16 %conv2, i16* %b.addr, align 2
-  %tmp3 = load i16* %b.addr, align 2
-  %conv4 = trunc i16 %tmp3 to i8
-  store i8 %conv4, i8* %a.addr, align 1
-  %tmp5 = load i8* %a.addr, align 1
-  %conv6 = zext i8 %tmp5 to i32
-  ret i32 %conv6
-}
-
-define i64 @zext_(i8 zeroext %a, i16 zeroext %b, i32 %c, i64 %d) nounwind ssp {
-entry:
-; CHECK: zext_
-; CHECK: sub sp, sp, #16
-; CHECK: strb w0, [sp, #15]
-; CHECK: strh w1, [sp, #12]
-; CHECK: str w2, [sp, #8]
-; CHECK: str x3, [sp]
-; CHECK: ldrb w0, [sp, #15]
-; CHECK: uxtb w0, w0
-; CHECK: strh w0, [sp, #12]
-; CHECK: ldrh w0, [sp, #12]
-; CHECK: uxth w0, w0
-; CHECK: str w0, [sp, #8]
-; CHECK: ldr w0, [sp, #8]
-; CHECK: uxtw x3, w0
-; CHECK: str x3, [sp]
-; CHECK: ldr x0, [sp], #16
-; CHECK: ret
-  %a.addr = alloca i8, align 1
-  %b.addr = alloca i16, align 2
-  %c.addr = alloca i32, align 4
-  %d.addr = alloca i64, align 8
-  store i8 %a, i8* %a.addr, align 1
-  store i16 %b, i16* %b.addr, align 2
-  store i32 %c, i32* %c.addr, align 4
-  store i64 %d, i64* %d.addr, align 8
-  %tmp = load i8* %a.addr, align 1
-  %conv = zext i8 %tmp to i16
-  store i16 %conv, i16* %b.addr, align 2
-  %tmp1 = load i16* %b.addr, align 2
-  %conv2 = zext i16 %tmp1 to i32
-  store i32 %conv2, i32* %c.addr, align 4
-  %tmp3 = load i32* %c.addr, align 4
-  %conv4 = zext i32 %tmp3 to i64
-  store i64 %conv4, i64* %d.addr, align 8
-  %tmp5 = load i64* %d.addr, align 8
-  ret i64 %tmp5
-}
-
-define i32 @zext_i1_i32(i1 zeroext %a) nounwind ssp {
-entry:
-; CHECK: @zext_i1_i32
-; CHECK: and w0, w0, #0x1
-  %conv = zext i1 %a to i32
-  ret i32 %conv;
-}
-
-define i64 @zext_i1_i64(i1 zeroext %a) nounwind ssp {
-entry:
-; CHECK: @zext_i1_i64
-; CHECK: and w0, w0, #0x1
-  %conv = zext i1 %a to i64
-  ret i64 %conv;
-}
-
-define i64 @sext_(i8 signext %a, i16 signext %b, i32 %c, i64 %d) nounwind ssp {
-entry:
-; CHECK: sext_
-; CHECK: sub sp, sp, #16
-; CHECK: strb w0, [sp, #15]
-; CHECK: strh w1, [sp, #12]
-; CHECK: str w2, [sp, #8]
-; CHECK: str x3, [sp]
-; CHECK: ldrb w0, [sp, #15]
-; CHECK: sxtb w0, w0
-; CHECK: strh w0, [sp, #12]
-; CHECK: ldrh w0, [sp, #12]
-; CHECK: sxth w0, w0
-; CHECK: str w0, [sp, #8]
-; CHECK: ldr w0, [sp, #8]
-; CHECK: sxtw x3, w0
-; CHECK: str x3, [sp]
-; CHECK: ldr x0, [sp], #16
-; CHECK: ret
-  %a.addr = alloca i8, align 1
-  %b.addr = alloca i16, align 2
-  %c.addr = alloca i32, align 4
-  %d.addr = alloca i64, align 8
-  store i8 %a, i8* %a.addr, align 1
-  store i16 %b, i16* %b.addr, align 2
-  store i32 %c, i32* %c.addr, align 4
-  store i64 %d, i64* %d.addr, align 8
-  %tmp = load i8* %a.addr, align 1
-  %conv = sext i8 %tmp to i16
-  store i16 %conv, i16* %b.addr, align 2
-  %tmp1 = load i16* %b.addr, align 2
-  %conv2 = sext i16 %tmp1 to i32
-  store i32 %conv2, i32* %c.addr, align 4
-  %tmp3 = load i32* %c.addr, align 4
-  %conv4 = sext i32 %tmp3 to i64
-  store i64 %conv4, i64* %d.addr, align 8
-  %tmp5 = load i64* %d.addr, align 8
-  ret i64 %tmp5
-}
-
-; Test sext i8 to i64
-define i64 @sext_2(i8 signext %a) nounwind ssp {
-entry:
-; CHECK: sext_2
-; CHECK: sxtb x0, w0
-  %conv = sext i8 %a to i64
-  ret i64 %conv
-}
-
-; Test sext i1 to i32
-define i32 @sext_i1_i32(i1 signext %a) nounwind ssp {
-entry:
-; CHECK: sext_i1_i32
-; CHECK: sbfm w0, w0, #0, #0
-  %conv = sext i1 %a to i32
-  ret i32 %conv
-}
-
-; Test sext i1 to i16
-define signext i16 @sext_i1_i16(i1 %a) nounwind ssp {
-entry:
-; CHECK: sext_i1_i16
-; CHECK: sbfm w0, w0, #0, #0
-  %conv = sext i1 %a to i16
-  ret i16 %conv
-}
-
-; Test sext i1 to i8
-define signext i8 @sext_i1_i8(i1 %a) nounwind ssp {
-entry:
-; CHECK: sext_i1_i8
-; CHECK: sbfm w0, w0, #0, #0
-  %conv = sext i1 %a to i8
-  ret i8 %conv
-}
-
-; Test fpext
-define double @fpext_(float %a) nounwind ssp {
-entry:
-; CHECK: fpext_
-; CHECK: fcvt d0, s0
-  %conv = fpext float %a to double
-  ret double %conv
-}
-
-; Test fptrunc
-define float @fptrunc_(double %a) nounwind ssp {
-entry:
-; CHECK: fptrunc_
-; CHECK: fcvt s0, d0
-  %conv = fptrunc double %a to float
-  ret float %conv
-}
-
-; Test fptosi
-define i32 @fptosi_ws(float %a) nounwind ssp {
-entry:
-; CHECK: fptosi_ws
-; CHECK: fcvtzs w0, s0
-  %conv = fptosi float %a to i32
-  ret i32 %conv
-}
-
-; Test fptosi
-define i32 @fptosi_wd(double %a) nounwind ssp {
-entry:
-; CHECK: fptosi_wd
-; CHECK: fcvtzs w0, d0
-  %conv = fptosi double %a to i32
-  ret i32 %conv
-}
-
-; Test fptoui
-define i32 @fptoui_ws(float %a) nounwind ssp {
-entry:
-; CHECK: fptoui_ws
-; CHECK: fcvtzu w0, s0
-  %conv = fptoui float %a to i32
-  ret i32 %conv
-}
-
-; Test fptoui
-define i32 @fptoui_wd(double %a) nounwind ssp {
-entry:
-; CHECK: fptoui_wd
-; CHECK: fcvtzu w0, d0
-  %conv = fptoui double %a to i32
-  ret i32 %conv
-}
-
-; Test sitofp
-define float @sitofp_sw_i1(i1 %a) nounwind ssp {
-entry:
-; CHECK: sitofp_sw_i1
-; CHECK: sbfm w0, w0, #0, #0
-; CHECK: scvtf s0, w0
-  %conv = sitofp i1 %a to float
-  ret float %conv
-}
-
-; Test sitofp
-define float @sitofp_sw_i8(i8 %a) nounwind ssp {
-entry:
-; CHECK: sitofp_sw_i8
-; CHECK: sxtb w0, w0
-; CHECK: scvtf s0, w0
-  %conv = sitofp i8 %a to float
-  ret float %conv
-}
-
-; Test sitofp
-define float @sitofp_sw_i16(i16 %a) nounwind ssp {
-entry:
-; CHECK: sitofp_sw_i16
-; CHECK: sxth w0, w0
-; CHECK: scvtf s0, w0
-  %conv = sitofp i16 %a to float
-  ret float %conv
-}
-
-; Test sitofp
-define float @sitofp_sw(i32 %a) nounwind ssp {
-entry:
-; CHECK: sitofp_sw
-; CHECK: scvtf s0, w0
-  %conv = sitofp i32 %a to float
-  ret float %conv
-}
-
-; Test sitofp
-define float @sitofp_sx(i64 %a) nounwind ssp {
-entry:
-; CHECK: sitofp_sx
-; CHECK: scvtf s0, x0
-  %conv = sitofp i64 %a to float
-  ret float %conv
-}
-
-; Test sitofp
-define double @sitofp_dw(i32 %a) nounwind ssp {
-entry:
-; CHECK: sitofp_dw
-; CHECK: scvtf d0, w0
-  %conv = sitofp i32 %a to double
-  ret double %conv
-}
-
-; Test sitofp
-define double @sitofp_dx(i64 %a) nounwind ssp {
-entry:
-; CHECK: sitofp_dx
-; CHECK: scvtf d0, x0
-  %conv = sitofp i64 %a to double
-  ret double %conv
-}
-
-; Test uitofp
-define float @uitofp_sw_i1(i1 %a) nounwind ssp {
-entry:
-; CHECK: uitofp_sw_i1
-; CHECK: and w0, w0, #0x1
-; CHECK: ucvtf s0, w0
-  %conv = uitofp i1 %a to float
-  ret float %conv
-}
-
-; Test uitofp
-define float @uitofp_sw_i8(i8 %a) nounwind ssp {
-entry:
-; CHECK: uitofp_sw_i8
-; CHECK: uxtb w0, w0
-; CHECK: ucvtf s0, w0
-  %conv = uitofp i8 %a to float
-  ret float %conv
-}
-
-; Test uitofp
-define float @uitofp_sw_i16(i16 %a) nounwind ssp {
-entry:
-; CHECK: uitofp_sw_i16
-; CHECK: uxth w0, w0
-; CHECK: ucvtf s0, w0
-  %conv = uitofp i16 %a to float
-  ret float %conv
-}
-
-; Test uitofp
-define float @uitofp_sw(i32 %a) nounwind ssp {
-entry:
-; CHECK: uitofp_sw
-; CHECK: ucvtf s0, w0
-  %conv = uitofp i32 %a to float
-  ret float %conv
-}
-
-; Test uitofp
-define float @uitofp_sx(i64 %a) nounwind ssp {
-entry:
-; CHECK: uitofp_sx
-; CHECK: ucvtf s0, x0
-  %conv = uitofp i64 %a to float
-  ret float %conv
-}
-
-; Test uitofp
-define double @uitofp_dw(i32 %a) nounwind ssp {
-entry:
-; CHECK: uitofp_dw
-; CHECK: ucvtf d0, w0
-  %conv = uitofp i32 %a to double
-  ret double %conv
-}
-
-; Test uitofp
-define double @uitofp_dx(i64 %a) nounwind ssp {
-entry:
-; CHECK: uitofp_dx
-; CHECK: ucvtf d0, x0
-  %conv = uitofp i64 %a to double
-  ret double %conv
-}
-
-define i32 @i64_trunc_i32(i64 %a) nounwind ssp {
-entry:
-; CHECK: i64_trunc_i32
-; CHECK: mov x1, x0
-  %conv = trunc i64 %a to i32
-  ret i32 %conv
-}
-
-define zeroext i16 @i64_trunc_i16(i64 %a) nounwind ssp {
-entry:
-; CHECK: i64_trunc_i16
-; CHECK: mov x[[REG:[0-9]+]], x0
-; CHECK: and [[REG2:w[0-9]+]], w[[REG]], #0xffff
-; CHECK: uxth w0, [[REG2]]
-  %conv = trunc i64 %a to i16
-  ret i16 %conv
-}
-
-define zeroext i8 @i64_trunc_i8(i64 %a) nounwind ssp {
-entry:
-; CHECK: i64_trunc_i8
-; CHECK: mov x[[REG:[0-9]+]], x0
-; CHECK: and [[REG2:w[0-9]+]], w[[REG]], #0xff
-; CHECK: uxtb w0, [[REG2]]
-  %conv = trunc i64 %a to i8
-  ret i8 %conv
-}
-
-define zeroext i1 @i64_trunc_i1(i64 %a) nounwind ssp {
-entry:
-; CHECK: i64_trunc_i1
-; CHECK: mov x[[REG:[0-9]+]], x0
-; CHECK: and [[REG2:w[0-9]+]], w[[REG]], #0x1
-; CHECK: and w0, [[REG2]], #0x1
-  %conv = trunc i64 %a to i1
-  ret i1 %conv
-}
-
-; rdar://15101939
-define void @stack_trunc() nounwind {
-; CHECK: stack_trunc
-; CHECK: sub  sp, sp, #16
-; CHECK: ldr  [[REG:x[0-9]+]], [sp]
-; CHECK: mov  x[[REG2:[0-9]+]], [[REG]]
-; CHECK: and  [[REG3:w[0-9]+]], w[[REG2]], #0xff
-; CHECK: strb [[REG3]], [sp, #15]
-; CHECK: add  sp, sp, #16
-  %a = alloca i8, align 1
-  %b = alloca i64, align 8
-  %c = load i64* %b, align 8
-  %d = trunc i64 %c to i8
-  store i8 %d, i8* %a, align 1
-  ret void
-}
diff --git a/test/CodeGen/ARM64/fast-isel-fcmp.ll b/test/CodeGen/ARM64/fast-isel-fcmp.ll
deleted file mode 100644
index cf71fab..0000000
--- a/test/CodeGen/ARM64/fast-isel-fcmp.ll
+++ /dev/null
@@ -1,146 +0,0 @@
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin | FileCheck %s
-
-define zeroext i1 @fcmp_float1(float %a) nounwind ssp {
-entry:
-; CHECK: @fcmp_float1
-; CHECK: fcmp s0, #0.0
-; CHECK: csinc w{{[0-9]+}}, wzr, wzr, eq
-  %cmp = fcmp une float %a, 0.000000e+00
-  ret i1 %cmp
-}
-
-define zeroext i1 @fcmp_float2(float %a, float %b) nounwind ssp {
-entry:
-; CHECK: @fcmp_float2
-; CHECK: fcmp s0, s1
-; CHECK: csinc w{{[0-9]+}}, wzr, wzr, eq
-  %cmp = fcmp une float %a, %b
-  ret i1 %cmp
-}
-
-define zeroext i1 @fcmp_double1(double %a) nounwind ssp {
-entry:
-; CHECK: @fcmp_double1
-; CHECK: fcmp d0, #0.0
-; CHECK: csinc w{{[0-9]+}}, wzr, wzr, eq
-  %cmp = fcmp une double %a, 0.000000e+00
-  ret i1 %cmp
-}
-
-define zeroext i1 @fcmp_double2(double %a, double %b) nounwind ssp {
-entry:
-; CHECK: @fcmp_double2
-; CHECK: fcmp d0, d1
-; CHECK: csinc w{{[0-9]+}}, wzr, wzr, eq
-  %cmp = fcmp une double %a, %b
-  ret i1 %cmp
-}
-
-; Check each fcmp condition
-define float @fcmp_oeq(float %a, float %b) nounwind ssp {
-; CHECK: @fcmp_oeq
-; CHECK: fcmp s0, s1
-; CHECK: csinc w{{[0-9]+}}, wzr, wzr, ne
-  %cmp = fcmp oeq float %a, %b
-  %conv = uitofp i1 %cmp to float
-  ret float %conv
-}
-
-define float @fcmp_ogt(float %a, float %b) nounwind ssp {
-; CHECK: @fcmp_ogt
-; CHECK: fcmp s0, s1
-; CHECK: csinc w{{[0-9]+}}, wzr, wzr, le
-  %cmp = fcmp ogt float %a, %b
-  %conv = uitofp i1 %cmp to float
-  ret float %conv
-}
-
-define float @fcmp_oge(float %a, float %b) nounwind ssp {
-; CHECK: @fcmp_oge
-; CHECK: fcmp s0, s1
-; CHECK: csinc w{{[0-9]+}}, wzr, wzr, lt
-  %cmp = fcmp oge float %a, %b
-  %conv = uitofp i1 %cmp to float
-  ret float %conv
-}
-
-define float @fcmp_olt(float %a, float %b) nounwind ssp {
-; CHECK: @fcmp_olt
-; CHECK: fcmp s0, s1
-; CHECK: csinc w{{[0-9]+}}, wzr, wzr, pl
-  %cmp = fcmp olt float %a, %b
-  %conv = uitofp i1 %cmp to float
-  ret float %conv
-}
-
-define float @fcmp_ole(float %a, float %b) nounwind ssp {
-; CHECK: @fcmp_ole
-; CHECK: fcmp s0, s1
-; CHECK: csinc w{{[0-9]+}}, wzr, wzr, hi
-  %cmp = fcmp ole float %a, %b
-  %conv = uitofp i1 %cmp to float
-  ret float %conv
-}
-
-define float @fcmp_ord(float %a, float %b) nounwind ssp {
-; CHECK: @fcmp_ord
-; CHECK: fcmp s0, s1
-; CHECK: csinc {{w[0-9]+}}, wzr, wzr, vs
-  %cmp = fcmp ord float %a, %b
-  %conv = uitofp i1 %cmp to float
-  ret float %conv
-}
-
-define float @fcmp_uno(float %a, float %b) nounwind ssp {
-; CHECK: @fcmp_uno
-; CHECK: fcmp s0, s1
-; CHECK: csinc {{w[0-9]+}}, wzr, wzr, vc
-  %cmp = fcmp uno float %a, %b
-  %conv = uitofp i1 %cmp to float
-  ret float %conv
-}
-
-define float @fcmp_ugt(float %a, float %b) nounwind ssp {
-; CHECK: @fcmp_ugt
-; CHECK: fcmp s0, s1
-; CHECK: csinc {{w[0-9]+}}, wzr, wzr, ls
-  %cmp = fcmp ugt float %a, %b
-  %conv = uitofp i1 %cmp to float
-  ret float %conv
-}
-
-define float @fcmp_uge(float %a, float %b) nounwind ssp {
-; CHECK: @fcmp_uge
-; CHECK: fcmp s0, s1
-; CHECK: csinc {{w[0-9]+}}, wzr, wzr, mi
-  %cmp = fcmp uge float %a, %b
-  %conv = uitofp i1 %cmp to float
-  ret float %conv
-}
-
-define float @fcmp_ult(float %a, float %b) nounwind ssp {
-; CHECK: @fcmp_ult
-; CHECK: fcmp s0, s1
-; CHECK: csinc {{w[0-9]+}}, wzr, wzr, ge
-  %cmp = fcmp ult float %a, %b
-  %conv = uitofp i1 %cmp to float
-  ret float %conv
-}
-
-define float @fcmp_ule(float %a, float %b) nounwind ssp {
-; CHECK: @fcmp_ule
-; CHECK: fcmp s0, s1
-; CHECK: csinc {{w[0-9]+}}, wzr, wzr, gt
-  %cmp = fcmp ule float %a, %b
-  %conv = uitofp i1 %cmp to float
-  ret float %conv
-}
-
-define float @fcmp_une(float %a, float %b) nounwind ssp {
-; CHECK: @fcmp_une
-; CHECK: fcmp s0, s1
-; CHECK: csinc {{w[0-9]+}}, wzr, wzr, eq
-  %cmp = fcmp une float %a, %b
-  %conv = uitofp i1 %cmp to float
-  ret float %conv
-}
diff --git a/test/CodeGen/ARM64/fast-isel-gv.ll b/test/CodeGen/ARM64/fast-isel-gv.ll
deleted file mode 100644
index cb3df14..0000000
--- a/test/CodeGen/ARM64/fast-isel-gv.ll
+++ /dev/null
@@ -1,38 +0,0 @@
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin | FileCheck %s
-
-; Test load/store of global value from global offset table.
-@seed = common global i64 0, align 8
-
-define void @Initrand() nounwind {
-entry:
-; CHECK: @Initrand
-; CHECK: adrp x[[REG:[0-9]+]], _seed@GOTPAGE
-; CHECK: ldr x[[REG2:[0-9]+]], [x[[REG]], _seed@GOTPAGEOFF]
-; CHECK: str x{{[0-9]+}}, [x[[REG2]]]
-  store i64 74755, i64* @seed, align 8
-  ret void
-}
-
-define i32 @Rand() nounwind {
-entry:
-; CHECK: @Rand
-; CHECK: adrp x[[REG:[0-9]+]], _seed@GOTPAGE
-; CHECK: ldr x[[REG2:[0-9]+]], [x[[REG]], _seed@GOTPAGEOFF]
-; CHECK: movz x[[REG3:[0-9]+]], #1309
-; CHECK: ldr x[[REG4:[0-9]+]], [x[[REG2]]]
-; CHECK: mul x[[REG5:[0-9]+]], x[[REG4]], x[[REG3]]
-; CHECK: movz x[[REG6:[0-9]+]], #13849
-; CHECK: add x[[REG7:[0-9]+]], x[[REG5]], x[[REG6]]
-; CHECK: orr x[[REG8:[0-9]+]], xzr, #0xffff
-; CHECK: and x[[REG9:[0-9]+]], x[[REG7]], x[[REG8]]
-; CHECK: str x[[REG9]], [x[[REG]]]
-; CHECK: ldr x{{[0-9]+}}, [x[[REG]]]
-  %0 = load i64* @seed, align 8
-  %mul = mul nsw i64 %0, 1309
-  %add = add nsw i64 %mul, 13849
-  %and = and i64 %add, 65535
-  store i64 %and, i64* @seed, align 8
-  %1 = load i64* @seed, align 8
-  %conv = trunc i64 %1 to i32
-  ret i32 %conv
-}
diff --git a/test/CodeGen/ARM64/fast-isel-icmp.ll b/test/CodeGen/ARM64/fast-isel-icmp.ll
deleted file mode 100644
index 22af542..0000000
--- a/test/CodeGen/ARM64/fast-isel-icmp.ll
+++ /dev/null
@@ -1,214 +0,0 @@
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin | FileCheck %s
-
-define i32 @icmp_eq_imm(i32 %a) nounwind ssp {
-entry:
-; CHECK: icmp_eq_imm
-; CHECK: cmp  w0, #31
-; CHECK: csinc w0, wzr, wzr, ne
-  %cmp = icmp eq i32 %a, 31
-  %conv = zext i1 %cmp to i32
-  ret i32 %conv
-}
-
-define i32 @icmp_eq_neg_imm(i32 %a) nounwind ssp {
-entry:
-; CHECK: icmp_eq_neg_imm
-; CHECK: cmn  w0, #7
-; CHECK: csinc w0, wzr, wzr, ne
-  %cmp = icmp eq i32 %a, -7
-  %conv = zext i1 %cmp to i32
-  ret i32 %conv
-}
-
-define i32 @icmp_eq(i32 %a, i32 %b) nounwind ssp {
-entry:
-; CHECK: icmp_eq
-; CHECK: cmp  w0, w1
-; CHECK: csinc w0, wzr, wzr, ne
-  %cmp = icmp eq i32 %a, %b
-  %conv = zext i1 %cmp to i32
-  ret i32 %conv
-}
-
-define i32 @icmp_ne(i32 %a, i32 %b) nounwind ssp {
-entry:
-; CHECK: icmp_ne
-; CHECK: cmp  w0, w1
-; CHECK: csinc w0, wzr, wzr, eq
-  %cmp = icmp ne i32 %a, %b
-  %conv = zext i1 %cmp to i32
-  ret i32 %conv
-}
-
-define i32 @icmp_ugt(i32 %a, i32 %b) nounwind ssp {
-entry:
-; CHECK: icmp_ugt
-; CHECK: cmp  w0, w1
-; CHECK: csinc w0, wzr, wzr, ls
-  %cmp = icmp ugt i32 %a, %b
-  %conv = zext i1 %cmp to i32
-  ret i32 %conv
-}
-
-define i32 @icmp_uge(i32 %a, i32 %b) nounwind ssp {
-entry:
-; CHECK: icmp_uge
-; CHECK: cmp  w0, w1
-; CHECK: csinc w0, wzr, wzr, cc
-  %cmp = icmp uge i32 %a, %b
-  %conv = zext i1 %cmp to i32
-  ret i32 %conv
-}
-
-define i32 @icmp_ult(i32 %a, i32 %b) nounwind ssp {
-entry:
-; CHECK: icmp_ult
-; CHECK: cmp  w0, w1
-; CHECK: csinc w0, wzr, wzr, cs
-  %cmp = icmp ult i32 %a, %b
-  %conv = zext i1 %cmp to i32
-  ret i32 %conv
-}
-
-define i32 @icmp_ule(i32 %a, i32 %b) nounwind ssp {
-entry:
-; CHECK: icmp_ule
-; CHECK: cmp  w0, w1
-; CHECK: csinc w0, wzr, wzr, hi
-  %cmp = icmp ule i32 %a, %b
-  %conv = zext i1 %cmp to i32
-  ret i32 %conv
-}
-
-define i32 @icmp_sgt(i32 %a, i32 %b) nounwind ssp {
-entry:
-; CHECK: icmp_sgt
-; CHECK: cmp  w0, w1
-; CHECK: csinc w0, wzr, wzr, le
-  %cmp = icmp sgt i32 %a, %b
-  %conv = zext i1 %cmp to i32
-  ret i32 %conv
-}
-
-define i32 @icmp_sge(i32 %a, i32 %b) nounwind ssp {
-entry:
-; CHECK: icmp_sge
-; CHECK: cmp  w0, w1
-; CHECK: csinc w0, wzr, wzr, lt
-  %cmp = icmp sge i32 %a, %b
-  %conv = zext i1 %cmp to i32
-  ret i32 %conv
-}
-
-define i32 @icmp_slt(i32 %a, i32 %b) nounwind ssp {
-entry:
-; CHECK: icmp_slt
-; CHECK: cmp  w0, w1
-; CHECK: csinc w0, wzr, wzr, ge
-  %cmp = icmp slt i32 %a, %b
-  %conv = zext i1 %cmp to i32
-  ret i32 %conv
-}
-
-define i32 @icmp_sle(i32 %a, i32 %b) nounwind ssp {
-entry:
-; CHECK: icmp_sle
-; CHECK: cmp  w0, w1
-; CHECK: csinc w0, wzr, wzr, gt
-  %cmp = icmp sle i32 %a, %b
-  %conv = zext i1 %cmp to i32
-  ret i32 %conv
-}
-
-define i32 @icmp_i64(i64 %a, i64 %b) nounwind ssp {
-entry:
-; CHECK: icmp_i64
-; CHECK: cmp  x0, x1
-; CHECK: csinc w{{[0-9]+}}, wzr, wzr, gt
-  %cmp = icmp sle i64 %a, %b
-  %conv = zext i1 %cmp to i32
-  ret i32 %conv
-}
-
-define zeroext i1 @icmp_eq_i16(i16 %a, i16 %b) nounwind ssp {
-entry:
-; CHECK: icmp_eq_i16
-; CHECK: sxth w0, w0
-; CHECK: sxth w1, w1
-; CHECK: cmp  w0, w1
-; CHECK: csinc w0, wzr, wzr, ne
-  %cmp = icmp eq i16 %a, %b
-  ret i1 %cmp
-}
-
-define zeroext i1 @icmp_eq_i8(i8 %a, i8 %b) nounwind ssp {
-entry:
-; CHECK: icmp_eq_i8
-; CHECK: sxtb w0, w0
-; CHECK: sxtb w1, w1
-; CHECK: cmp  w0, w1
-; CHECK: csinc w0, wzr, wzr, ne
-  %cmp = icmp eq i8 %a, %b
-  ret i1 %cmp
-}
-
-define i32 @icmp_i16_unsigned(i16 %a, i16 %b) nounwind {
-entry:
-; CHECK: icmp_i16_unsigned
-; CHECK: uxth w0, w0
-; CHECK: uxth w1, w1
-; CHECK: cmp  w0, w1
-; CHECK: csinc w0, wzr, wzr, cs
-  %cmp = icmp ult i16 %a, %b
-  %conv2 = zext i1 %cmp to i32
-  ret i32 %conv2
-}
-
-define i32 @icmp_i8_signed(i8 %a, i8 %b) nounwind {
-entry:
-; CHECK: @icmp_i8_signed
-; CHECK: sxtb w0, w0
-; CHECK: sxtb w1, w1
-; CHECK: cmp  w0, w1
-; CHECK: csinc w0, wzr, wzr, le
-  %cmp = icmp sgt i8 %a, %b
-  %conv2 = zext i1 %cmp to i32
-  ret i32 %conv2
-}
-
-
-define i32 @icmp_i16_signed_const(i16 %a) nounwind {
-entry:
-; CHECK: icmp_i16_signed_const
-; CHECK: sxth w0, w0
-; CHECK: cmn  w0, #233
-; CHECK: csinc w0, wzr, wzr, ge
-; CHECK: and w0, w0, #0x1
-  %cmp = icmp slt i16 %a, -233
-  %conv2 = zext i1 %cmp to i32
-  ret i32 %conv2
-}
-
-define i32 @icmp_i8_signed_const(i8 %a) nounwind {
-entry:
-; CHECK: icmp_i8_signed_const
-; CHECK: sxtb w0, w0
-; CHECK: cmp  w0, #124
-; CHECK: csinc w0, wzr, wzr, le
-; CHECK: and w0, w0, #0x1
-  %cmp = icmp sgt i8 %a, 124
-  %conv2 = zext i1 %cmp to i32
-  ret i32 %conv2
-}
-
-define i32 @icmp_i1_unsigned_const(i1 %a) nounwind {
-entry:
-; CHECK: icmp_i1_unsigned_const
-; CHECK: and w0, w0, #0x1
-; CHECK: cmp  w0, #0
-; CHECK: csinc w0, wzr, wzr, cs
-; CHECK: and w0, w0, #0x1
-  %cmp = icmp ult i1 %a, 0
-  %conv2 = zext i1 %cmp to i32
-  ret i32 %conv2
-}
diff --git a/test/CodeGen/ARM64/fast-isel-intrinsic.ll b/test/CodeGen/ARM64/fast-isel-intrinsic.ll
deleted file mode 100644
index 6443d82..0000000
--- a/test/CodeGen/ARM64/fast-isel-intrinsic.ll
+++ /dev/null
@@ -1,135 +0,0 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=arm64-apple-ios | FileCheck %s --check-prefix=ARM64
-
-@message = global [80 x i8] c"The LLVM Compiler Infrastructure\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", align 16
-@temp = common global [80 x i8] zeroinitializer, align 16
-
-define void @t1() {
-; ARM64: t1
-; ARM64: adrp x8, _message@PAGE
-; ARM64: add x0, x8, _message@PAGEOFF
-; ARM64: movz w9, #0
-; ARM64: movz x2, #80
-; ARM64: uxtb w1, w9
-; ARM64: bl _memset
-  call void @llvm.memset.p0i8.i64(i8* getelementptr inbounds ([80 x i8]* @message, i32 0, i32 0), i8 0, i64 80, i32 16, i1 false)
-  ret void
-}
-
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
-
-define void @t2() {
-; ARM64: t2
-; ARM64: adrp x8, _temp@GOTPAGE
-; ARM64: ldr x0, [x8, _temp@GOTPAGEOFF]
-; ARM64: adrp x8, _message@PAGE
-; ARM64: add x1, x8, _message@PAGEOFF
-; ARM64: movz x2, #80
-; ARM64: bl _memcpy
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8]* @message, i32 0, i32 0), i64 80, i32 16, i1 false)
-  ret void
-}
-
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1)
-
-define void @t3() {
-; ARM64: t3
-; ARM64: adrp x8, _temp@GOTPAGE
-; ARM64: ldr x0, [x8, _temp@GOTPAGEOFF]
-; ARM64: adrp x8, _message@PAGE
-; ARM64: add x1, x8, _message@PAGEOFF
-; ARM64: movz x2, #20
-; ARM64: bl _memmove
-  call void @llvm.memmove.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8]* @message, i32 0, i32 0), i64 20, i32 16, i1 false)
-  ret void
-}
-
-declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1)
-
-define void @t4() {
-; ARM64: t4
-; ARM64: adrp x8, _temp@GOTPAGE
-; ARM64: ldr x8, [x8, _temp@GOTPAGEOFF]
-; ARM64: adrp x9, _message@PAGE
-; ARM64: add x9, x9, _message@PAGEOFF
-; ARM64: ldr x10, [x9]
-; ARM64: str x10, [x8]
-; ARM64: ldr x10, [x9, #8]
-; ARM64: str x10, [x8, #8]
-; ARM64: ldrb w11, [x9, #16]
-; ARM64: strb w11, [x8, #16]
-; ARM64: ret
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8]* @message, i32 0, i32 0), i64 17, i32 16, i1 false)
-  ret void
-}
-
-define void @t5() {
-; ARM64: t5
-; ARM64: adrp x8, _temp@GOTPAGE
-; ARM64: ldr x8, [x8, _temp@GOTPAGEOFF]
-; ARM64: adrp x9, _message@PAGE
-; ARM64: add x9, x9, _message@PAGEOFF
-; ARM64: ldr x10, [x9]
-; ARM64: str x10, [x8]
-; ARM64: ldr x10, [x9, #8]
-; ARM64: str x10, [x8, #8]
-; ARM64: ldrb w11, [x9, #16]
-; ARM64: strb w11, [x8, #16]
-; ARM64: ret
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8]* @message, i32 0, i32 0), i64 17, i32 8, i1 false)
-  ret void
-}
-
-define void @t6() {
-; ARM64: t6
-; ARM64: adrp x8, _temp@GOTPAGE
-; ARM64: ldr x8, [x8, _temp@GOTPAGEOFF]
-; ARM64: adrp x9, _message@PAGE
-; ARM64: add x9, x9, _message@PAGEOFF
-; ARM64: ldr w10, [x9]
-; ARM64: str w10, [x8]
-; ARM64: ldr w10, [x9, #4]
-; ARM64: str w10, [x8, #4]
-; ARM64: ldrb w10, [x9, #8]
-; ARM64: strb w10, [x8, #8]
-; ARM64: ret
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8]* @message, i32 0, i32 0), i64 9, i32 4, i1 false)
-  ret void
-}
-
-define void @t7() {
-; ARM64: t7
-; ARM64: adrp x8, _temp@GOTPAGE
-; ARM64: ldr x8, [x8, _temp@GOTPAGEOFF]
-; ARM64: adrp x9, _message@PAGE
-; ARM64: add x9, x9, _message@PAGEOFF
-; ARM64: ldrh w10, [x9]
-; ARM64: strh w10, [x8]
-; ARM64: ldrh w10, [x9, #2]
-; ARM64: strh w10, [x8, #2]
-; ARM64: ldrh w10, [x9, #4]
-; ARM64: strh w10, [x8, #4]
-; ARM64: ldrb w10, [x9, #6]
-; ARM64: strb w10, [x8, #6]
-; ARM64: ret
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8]* @message, i32 0, i32 0), i64 7, i32 2, i1 false)
-  ret void
-}
-
-define void @t8() {
-; ARM64: t8
-; ARM64: adrp x8, _temp@GOTPAGE
-; ARM64: ldr x8, [x8, _temp@GOTPAGEOFF]
-; ARM64: adrp x9, _message@PAGE
-; ARM64: add x9, x9, _message@PAGEOFF
-; ARM64: ldrb w10, [x9]
-; ARM64: strb w10, [x8]
-; ARM64: ldrb w10, [x9, #1]
-; ARM64: strb w10, [x8, #1]
-; ARM64: ldrb w10, [x9, #2]
-; ARM64: strb w10, [x8, #2]
-; ARM64: ldrb w10, [x9, #3]
-; ARM64: strb w10, [x8, #3]
-; ARM64: ret
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8]* @message, i32 0, i32 0), i64 4, i32 1, i1 false)
-  ret void
-}
diff --git a/test/CodeGen/ARM64/fast-isel-materialize.ll b/test/CodeGen/ARM64/fast-isel-materialize.ll
deleted file mode 100644
index fa2daf7..0000000
--- a/test/CodeGen/ARM64/fast-isel-materialize.ll
+++ /dev/null
@@ -1,27 +0,0 @@
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin | FileCheck %s
-
-; Materialize using fmov
-define void @float_(float* %value) {
-; CHECK: @float_
-; CHECK: fmov s0, #1.250000e+00
-  store float 1.250000e+00, float* %value, align 4
-  ret void
-}
-
-define void @double_(double* %value) {
-; CHECK: @double_
-; CHECK: fmov d0, #1.250000e+00
-  store double 1.250000e+00, double* %value, align 8
-  ret void
-}
-
-; Materialize from constant pool
-define float @float_cp() {
-; CHECK: @float_cp
-  ret float 0x400921FB60000000
-}
-
-define double @double_cp() {
-; CHECK: @double_cp
-  ret double 0x400921FB54442D18
-}
diff --git a/test/CodeGen/ARM64/fast-isel-noconvert.ll b/test/CodeGen/ARM64/fast-isel-noconvert.ll
deleted file mode 100644
index 3517970..0000000
--- a/test/CodeGen/ARM64/fast-isel-noconvert.ll
+++ /dev/null
@@ -1,36 +0,0 @@
-; RUN: llc -mtriple=arm64-apple-ios -O0 %s -o - | FileCheck %s
-
-; Fast-isel can't do vector conversions yet, but it was emitting some highly
-; suspect UCVTFUWDri MachineInstrs.
-define <4 x float> @test_uitofp(<4 x i32> %in) {
-; CHECK-LABEL: test_uitofp:
-; CHECK: ucvtf.4s v0, v0
-
-  %res = uitofp <4 x i32> %in to <4 x float>
-  ret <4 x float> %res
-}
-
-define <2 x double> @test_sitofp(<2 x i32> %in) {
-; CHECK-LABEL: test_sitofp:
-; CHECK: sshll.2d [[EXT:v[0-9]+]], v0, #0
-; CHECK: scvtf.2d v0, [[EXT]]
-
-  %res = sitofp <2 x i32> %in to <2 x double>
-  ret <2 x double> %res
-}
-
-define <2 x i32> @test_fptoui(<2 x float> %in) {
-; CHECK-LABEL: test_fptoui:
-; CHECK: fcvtzu.2s v0, v0
-
-  %res = fptoui <2 x float> %in to <2 x i32>
-  ret <2 x i32> %res
-}
-
-define <2 x i64> @test_fptosi(<2 x double> %in) {
-; CHECK-LABEL: test_fptosi:
-; CHECK: fcvtzs.2d v0, v0
-
-  %res = fptosi <2 x double> %in to <2 x i64>
-  ret <2 x i64> %res
-}
-\ No newline at end of file
diff --git a/test/CodeGen/ARM64/fast-isel-rem.ll b/test/CodeGen/ARM64/fast-isel-rem.ll
deleted file mode 100644
index 0c68401..0000000
--- a/test/CodeGen/ARM64/fast-isel-rem.ll
+++ /dev/null
@@ -1,33 +0,0 @@
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin | FileCheck %s
-
-define i32 @t1(i32 %a, i32 %b) {
-; CHECK: @t1
-; CHECK: sdiv w2, w0, w1
-; CHECK: msub w2, w2, w1, w0
-  %1 = srem i32 %a, %b
-  ret i32 %1
-}
-
-define i64 @t2(i64 %a, i64 %b) {
-; CHECK: @t2
-; CHECK: sdiv x2, x0, x1
-; CHECK: msub x2, x2, x1, x0
-  %1 = srem i64 %a, %b
-  ret i64 %1
-}
-
-define i32 @t3(i32 %a, i32 %b) {
-; CHECK: @t3
-; CHECK: udiv w2, w0, w1
-; CHECK: msub w2, w2, w1, w0
-  %1 = urem i32 %a, %b
-  ret i32 %1
-}
-
-define i64 @t4(i64 %a, i64 %b) {
-; CHECK: @t4
-; CHECK: udiv x2, x0, x1
-; CHECK: msub x2, x2, x1, x0
-  %1 = urem i64 %a, %b
-  ret i64 %1
-}
diff --git a/test/CodeGen/ARM64/fast-isel.ll b/test/CodeGen/ARM64/fast-isel.ll
deleted file mode 100644
index ba718d3..0000000
--- a/test/CodeGen/ARM64/fast-isel.ll
+++ /dev/null
@@ -1,95 +0,0 @@
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin | FileCheck %s
-
-define void @t0(i32 %a) nounwind {
-entry:
-; CHECK: t0
-; CHECK: str {{w[0-9]+}}, [sp, #12]
-; CHECK-NEXT: ldr [[REGISTER:w[0-9]+]], [sp, #12]
-; CHECK-NEXT: str [[REGISTER]], [sp, #12]
-; CHECK: ret
-  %a.addr = alloca i32, align 4
-  store i32 %a, i32* %a.addr
-  %tmp = load i32* %a.addr
-  store i32 %tmp, i32* %a.addr
-  ret void
-}
-
-define void @t1(i64 %a) nounwind {
-; CHECK: t1
-; CHECK: str {{x[0-9]+}}, [sp, #8]
-; CHECK-NEXT: ldr [[REGISTER:x[0-9]+]], [sp, #8]
-; CHECK-NEXT: str [[REGISTER]], [sp, #8]
-; CHECK: ret
-  %a.addr = alloca i64, align 4
-  store i64 %a, i64* %a.addr
-  %tmp = load i64* %a.addr
-  store i64 %tmp, i64* %a.addr
-  ret void
-}
-
-define zeroext i1 @i1(i1 %a) nounwind {
-entry:
-; CHECK: @i1
-; CHECK: and w0, w0, #0x1
-; CHECK: strb w0, [sp, #15]
-; CHECK: ldrb w0, [sp, #15]
-; CHECK: and w0, w0, #0x1
-; CHECK: and w0, w0, #0x1
-; CHECK: add sp, sp, #16
-; CHECK: ret
-  %a.addr = alloca i1, align 1
-  store i1 %a, i1* %a.addr, align 1
-  %0 = load i1* %a.addr, align 1
-  ret i1 %0
-}
-
-define i32 @t2(i32 *%ptr) nounwind {
-entry:
-; CHECK-LABEL: t2:
-; CHECK: ldur w0, [x0, #-4]
-; CHECK: ret
-  %0 = getelementptr i32 *%ptr, i32 -1
-  %1 = load i32* %0, align 4
-  ret i32 %1
-}
-
-define i32 @t3(i32 *%ptr) nounwind {
-entry:
-; CHECK-LABEL: t3:
-; CHECK: ldur w0, [x0, #-256]
-; CHECK: ret
-  %0 = getelementptr i32 *%ptr, i32 -64
-  %1 = load i32* %0, align 4
-  ret i32 %1
-}
-
-define void @t4(i32 *%ptr) nounwind {
-entry:
-; CHECK-LABEL: t4:
-; CHECK: movz w8, #0
-; CHECK: stur w8, [x0, #-4]
-; CHECK: ret
-  %0 = getelementptr i32 *%ptr, i32 -1
-  store i32 0, i32* %0, align 4
-  ret void
-}
-
-define void @t5(i32 *%ptr) nounwind {
-entry:
-; CHECK-LABEL: t5:
-; CHECK: movz w8, #0
-; CHECK: stur w8, [x0, #-256]
-; CHECK: ret
-  %0 = getelementptr i32 *%ptr, i32 -64
-  store i32 0, i32* %0, align 4
-  ret void
-}
-
-define void @t6() nounwind {
-; CHECK: t6
-; CHECK: brk #1
-  tail call void @llvm.trap()
-  ret void
-}
-
-declare void @llvm.trap() nounwind
diff --git a/test/CodeGen/ARM64/fcmp-opt.ll b/test/CodeGen/ARM64/fcmp-opt.ll
deleted file mode 100644
index 17412dd..0000000
--- a/test/CodeGen/ARM64/fcmp-opt.ll
+++ /dev/null
@@ -1,173 +0,0 @@
-; RUN: llc < %s -march=arm64 -mcpu=cyclone | FileCheck %s
-; rdar://10263824
-
-define i1 @fcmp_float1(float %a) nounwind ssp {
-entry:
-; CHECK: @fcmp_float1
-; CHECK: fcmp s0, #0.0
-; CHECK: csinc w0, wzr, wzr, eq
-  %cmp = fcmp une float %a, 0.000000e+00
-  ret i1 %cmp
-}
-
-define i1 @fcmp_float2(float %a, float %b) nounwind ssp {
-entry:
-; CHECK: @fcmp_float2
-; CHECK: fcmp s0, s1
-; CHECK: csinc w0, wzr, wzr, eq
-  %cmp = fcmp une float %a, %b
-  ret i1 %cmp
-}
-
-define i1 @fcmp_double1(double %a) nounwind ssp {
-entry:
-; CHECK: @fcmp_double1
-; CHECK: fcmp d0, #0.0
-; CHECK: csinc w0, wzr, wzr, eq
-  %cmp = fcmp une double %a, 0.000000e+00
-  ret i1 %cmp
-}
-
-define i1 @fcmp_double2(double %a, double %b) nounwind ssp {
-entry:
-; CHECK: @fcmp_double2
-; CHECK: fcmp d0, d1
-; CHECK: csinc w0, wzr, wzr, eq
-  %cmp = fcmp une double %a, %b
-  ret i1 %cmp
-}
-
-; Check each fcmp condition
-define float @fcmp_oeq(float %a, float %b) nounwind ssp {
-; CHECK: @fcmp_oeq
-; CHECK: fcmp s0, s1
-; CHECK: csinc w{{[0-9]+}}, wzr, wzr, ne
-  %cmp = fcmp oeq float %a, %b
-  %conv = uitofp i1 %cmp to float
-  ret float %conv
-}
-
-define float @fcmp_ogt(float %a, float %b) nounwind ssp {
-; CHECK: @fcmp_ogt
-; CHECK: fcmp s0, s1
-; CHECK: csinc w{{[0-9]+}}, wzr, wzr, le
-  %cmp = fcmp ogt float %a, %b
-  %conv = uitofp i1 %cmp to float
-  ret float %conv
-}
-
-define float @fcmp_oge(float %a, float %b) nounwind ssp {
-; CHECK: @fcmp_oge
-; CHECK: fcmp s0, s1
-; CHECK: csinc w{{[0-9]+}}, wzr, wzr, lt
-  %cmp = fcmp oge float %a, %b
-  %conv = uitofp i1 %cmp to float
-  ret float %conv
-}
-
-define float @fcmp_olt(float %a, float %b) nounwind ssp {
-; CHECK: @fcmp_olt
-; CHECK: fcmp s0, s1
-; CHECK: csinc w{{[0-9]+}}, wzr, wzr, pl
-  %cmp = fcmp olt float %a, %b
-  %conv = uitofp i1 %cmp to float
-  ret float %conv
-}
-
-define float @fcmp_ole(float %a, float %b) nounwind ssp {
-; CHECK: @fcmp_ole
-; CHECK: fcmp s0, s1
-; CHECK: csinc w{{[0-9]+}}, wzr, wzr, hi
-  %cmp = fcmp ole float %a, %b
-  %conv = uitofp i1 %cmp to float
-  ret float %conv
-}
-
-define float @fcmp_ord(float %a, float %b) nounwind ssp {
-; CHECK: @fcmp_ord
-; CHECK: fcmp s0, s1
-; CHECK: csinc w{{[0-9]+}}, wzr, wzr, vs
-  %cmp = fcmp ord float %a, %b
-  %conv = uitofp i1 %cmp to float
-  ret float %conv
-}
-
-define float @fcmp_uno(float %a, float %b) nounwind ssp {
-; CHECK: @fcmp_uno
-; CHECK: fcmp s0, s1
-; CHECK: csinc w{{[0-9]+}}, wzr, wzr, vc
-  %cmp = fcmp uno float %a, %b
-  %conv = uitofp i1 %cmp to float
-  ret float %conv
-}
-
-define float @fcmp_ugt(float %a, float %b) nounwind ssp {
-; CHECK: @fcmp_ugt
-; CHECK: fcmp s0, s1
-; CHECK: csinc w{{[0-9]+}}, wzr, wzr, ls
-  %cmp = fcmp ugt float %a, %b
-  %conv = uitofp i1 %cmp to float
-  ret float %conv
-}
-
-define float @fcmp_uge(float %a, float %b) nounwind ssp {
-; CHECK: @fcmp_uge
-; CHECK: fcmp s0, s1
-; CHECK: csinc w{{[0-9]+}}, wzr, wzr, mi
-  %cmp = fcmp uge float %a, %b
-  %conv = uitofp i1 %cmp to float
-  ret float %conv
-}
-
-define float @fcmp_ult(float %a, float %b) nounwind ssp {
-; CHECK: @fcmp_ult
-; CHECK: fcmp s0, s1
-; CHECK: csinc w{{[0-9]+}}, wzr, wzr, ge
-  %cmp = fcmp ult float %a, %b
-  %conv = uitofp i1 %cmp to float
-  ret float %conv
-}
-
-define float @fcmp_ule(float %a, float %b) nounwind ssp {
-; CHECK: @fcmp_ule
-; CHECK: fcmp s0, s1
-; CHECK: csinc w{{[0-9]+}}, wzr, wzr, gt
-  %cmp = fcmp ule float %a, %b
-  %conv = uitofp i1 %cmp to float
-  ret float %conv
-}
-
-define float @fcmp_une(float %a, float %b) nounwind ssp {
-; CHECK: @fcmp_une
-; CHECK: fcmp s0, s1
-; CHECK: csinc w{{[0-9]+}}, wzr, wzr, eq
-  %cmp = fcmp une float %a, %b
-  %conv = uitofp i1 %cmp to float
-  ret float %conv
-}
-
-; Possible opportunity for improvement.  See comment in
-; ARM64TargetLowering::LowerSETCC()
-define float @fcmp_one(float %a, float %b) nounwind ssp {
-; CHECK: @fcmp_one
-;	fcmp	s0, s1
-;	orr	w0, wzr, #0x1
-;	csel	w1, w0, wzr, mi
-;	csel	w0, w0, wzr, gt
-  %cmp = fcmp one float %a, %b
-  %conv = uitofp i1 %cmp to float
-  ret float %conv
-}
-
-; Possible opportunity for improvement.  See comment in
-; ARM64TargetLowering::LowerSETCC()
-define float @fcmp_ueq(float %a, float %b) nounwind ssp {
-; CHECK: @fcmp_ueq
-; CHECK: fcmp s0, s1
-;        orr w0, wzr, #0x1
-; CHECK: csel [[REG1:w[0-9]]], [[REG2:w[0-9]+]], wzr, eq
-; CHECK: csel {{w[0-9]+}}, [[REG2]], [[REG1]], vs
-  %cmp = fcmp ueq float %a, %b
-  %conv = uitofp i1 %cmp to float
-  ret float %conv
-}
diff --git a/test/CodeGen/ARM64/fcopysign.ll b/test/CodeGen/ARM64/fcopysign.ll
deleted file mode 100644
index 094ce7a..0000000
--- a/test/CodeGen/ARM64/fcopysign.ll
+++ /dev/null
@@ -1,51 +0,0 @@
-; RUN: llc < %s -mtriple=arm64-apple-darwin | FileCheck %s
-
-; rdar://9332258
-
-define float @test1(float %x, float %y) nounwind {
-entry:
-; CHECK-LABEL: test1:
-; CHECK: movi.4s	v2, #128, lsl #24
-; CHECK: bit.16b	v0, v1, v2
-  %0 = tail call float @copysignf(float %x, float %y) nounwind readnone
-  ret float %0
-}
-
-define double @test2(double %x, double %y) nounwind {
-entry:
-; CHECK-LABEL: test2:
-; CHECK: movi.2d	v2, #0
-; CHECK: fneg.2d	v2, v2
-; CHECK: bit.16b	v0, v1, v2
-  %0 = tail call double @copysign(double %x, double %y) nounwind readnone
-  ret double %0
-}
-
-; rdar://9545768
-define double @test3(double %a, float %b, float %c) nounwind {
-; CHECK-LABEL: test3:
-; CHECK: fcvt d1, s1
-; CHECK: fneg.2d v2, v{{[0-9]+}}
-; CHECK: bit.16b v0, v1, v2
-  %tmp1 = fadd float %b, %c
-  %tmp2 = fpext float %tmp1 to double
-  %tmp = tail call double @copysign( double %a, double %tmp2 ) nounwind readnone
-  ret double %tmp
-}
-
-define float @test4() nounwind {
-entry:
-; CHECK-LABEL: test4:
-; CHECK: fcvt s0, d0
-; CHECK: movi.4s v[[CONST:[0-9]+]], #128, lsl #24
-; CHECK: bit.16b v{{[0-9]+}}, v0, v[[CONST]]
-  %0 = tail call double (...)* @bar() nounwind
-  %1 = fptrunc double %0 to float
-  %2 = tail call float @copysignf(float 5.000000e-01, float %1) nounwind readnone
-  %3 = fadd float %1, %2
-  ret float %3
-}
-
-declare double @bar(...)
-declare double @copysign(double, double) nounwind readnone
-declare float @copysignf(float, float) nounwind readnone
diff --git a/test/CodeGen/ARM64/fixed-point-scalar-cvt-dagcombine.ll b/test/CodeGen/ARM64/fixed-point-scalar-cvt-dagcombine.ll
deleted file mode 100644
index 77981f2..0000000
--- a/test/CodeGen/ARM64/fixed-point-scalar-cvt-dagcombine.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-; DAGCombine to transform a conversion of an extract_vector_elt to an
-; extract_vector_elt of a conversion, which saves a round trip of copies
-; of the value to a GPR and back to and FPR.
-; rdar://11855286
-define double @foo0(<2 x i64> %a) nounwind {
-; CHECK:  scvtf.2d  [[REG:v[0-9]+]], v0, #9
-; CHECK-NEXT:  ins.d v0[0], [[REG]][1]
-  %vecext = extractelement <2 x i64> %a, i32 1
-  %fcvt_n = tail call double @llvm.arm64.neon.vcvtfxs2fp.f64.i64(i64 %vecext, i32 9)
-  ret double %fcvt_n
-}
-
-declare double @llvm.arm64.neon.vcvtfxs2fp.f64.i64(i64, i32) nounwind readnone
diff --git a/test/CodeGen/ARM64/fmadd.ll b/test/CodeGen/ARM64/fmadd.ll
deleted file mode 100644
index d00aaef..0000000
--- a/test/CodeGen/ARM64/fmadd.ll
+++ /dev/null
@@ -1,92 +0,0 @@
-; RUN: llc -march=arm64 < %s | FileCheck %s
-
-define float @fma32(float %a, float %b, float %c) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: fma32:
-; CHECK: fmadd
-  %0 = tail call float @llvm.fma.f32(float %a, float %b, float %c)
-  ret float %0
-}
-
-define float @fnma32(float %a, float %b, float %c) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: fnma32:
-; CHECK: fnmadd
-  %0 = tail call float @llvm.fma.f32(float %a, float %b, float %c)
-  %mul = fmul float %0, -1.000000e+00
-  ret float %mul
-}
-
-define float @fms32(float %a, float %b, float %c) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: fms32:
-; CHECK: fmsub
-  %mul = fmul float %b, -1.000000e+00
-  %0 = tail call float @llvm.fma.f32(float %a, float %mul, float %c)
-  ret float %0
-}
-
-define float @fms32_com(float %a, float %b, float %c) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: fms32_com:
-; CHECK: fmsub
-  %mul = fmul float %b, -1.000000e+00
-  %0 = tail call float @llvm.fma.f32(float %mul, float %a, float %c)
-  ret float %0
-}
-
-define float @fnms32(float %a, float %b, float %c) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: fnms32:
-; CHECK: fnmsub
-  %mul = fmul float %c, -1.000000e+00
-  %0 = tail call float @llvm.fma.f32(float %a, float %b, float %mul)
-  ret float %0
-}
-
-define double @fma64(double %a, double %b, double %c) nounwind readnone ssp {
-; CHECK-LABEL: fma64:
-; CHECK: fmadd
-entry:
-  %0 = tail call double @llvm.fma.f64(double %a, double %b, double %c)
-  ret double %0
-}
-
-define double @fnma64(double %a, double %b, double %c) nounwind readnone ssp {
-; CHECK-LABEL: fnma64:
-; CHECK: fnmadd
-entry:
-  %0 = tail call double @llvm.fma.f64(double %a, double %b, double %c)
-  %mul = fmul double %0, -1.000000e+00
-  ret double %mul
-}
-
-define double @fms64(double %a, double %b, double %c) nounwind readnone ssp {
-; CHECK-LABEL: fms64:
-; CHECK: fmsub
-entry:
-  %mul = fmul double %b, -1.000000e+00
-  %0 = tail call double @llvm.fma.f64(double %a, double %mul, double %c)
-  ret double %0
-}
-
-define double @fms64_com(double %a, double %b, double %c) nounwind readnone ssp {
-; CHECK-LABEL: fms64_com:
-; CHECK: fmsub
-entry:
-  %mul = fmul double %b, -1.000000e+00
-  %0 = tail call double @llvm.fma.f64(double %mul, double %a, double %c)
-  ret double %0
-}
-
-define double @fnms64(double %a, double %b, double %c) nounwind readnone ssp {
-; CHECK-LABEL: fnms64:
-; CHECK: fnmsub
-entry:
-  %mul = fmul double %c, -1.000000e+00
-  %0 = tail call double @llvm.fma.f64(double %a, double %b, double %mul)
-  ret double %0
-}
-
-declare float @llvm.fma.f32(float, float, float) nounwind readnone
-declare double @llvm.fma.f64(double, double, double) nounwind readnone
diff --git a/test/CodeGen/ARM64/fmax.ll b/test/CodeGen/ARM64/fmax.ll
deleted file mode 100644
index 53ecf86..0000000
--- a/test/CodeGen/ARM64/fmax.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; RUN: llc -march=arm64 -enable-no-nans-fp-math < %s | FileCheck %s
-
-define double @test_direct(float %in) #1 {
-entry:
-  %cmp = fcmp olt float %in, 0.000000e+00
-  %longer = fpext float %in to double
-  %val = select i1 %cmp, double 0.000000e+00, double %longer
-  ret double %val
-
-; CHECK: fmax
-}
-
-define double @test_cross(float %in) #1 {
-entry:
-  %cmp = fcmp olt float %in, 0.000000e+00
-  %longer = fpext float %in to double
-  %val = select i1 %cmp, double %longer, double 0.000000e+00
-  ret double %val
-
-; CHECK: fmin
-}
diff --git a/test/CodeGen/ARM64/fminv.ll b/test/CodeGen/ARM64/fminv.ll
deleted file mode 100644
index ca706d8..0000000
--- a/test/CodeGen/ARM64/fminv.ll
+++ /dev/null
@@ -1,101 +0,0 @@
-; RUN: llc -mtriple=arm64-linux-gnu -o - %s | FileCheck %s
-
-define float @test_fminv_v2f32(<2 x float> %in) {
-; CHECK: test_fminv_v2f32:
-; CHECK: fminp s0, v0.2s
-  %min = call float @llvm.arm64.neon.fminv.f32.v2f32(<2 x float> %in)
-  ret float %min
-}
-
-define float @test_fminv_v4f32(<4 x float> %in) {
-; CHECK: test_fminv_v4f32:
-; CHECK: fminv s0, v0.4s
-  %min = call float @llvm.arm64.neon.fminv.f32.v4f32(<4 x float> %in)
-  ret float %min
-}
-
-define double @test_fminv_v2f64(<2 x double> %in) {
-; CHECK: test_fminv_v2f64:
-; CHECK: fminp d0, v0.2d
-  %min = call double @llvm.arm64.neon.fminv.f64.v2f64(<2 x double> %in)
-  ret double %min
-}
-
-declare float @llvm.arm64.neon.fminv.f32.v2f32(<2 x float>)
-declare float @llvm.arm64.neon.fminv.f32.v4f32(<4 x float>)
-declare double @llvm.arm64.neon.fminv.f64.v2f64(<2 x double>)
-
-define float @test_fmaxv_v2f32(<2 x float> %in) {
-; CHECK: test_fmaxv_v2f32:
-; CHECK: fmaxp s0, v0.2s
-  %max = call float @llvm.arm64.neon.fmaxv.f32.v2f32(<2 x float> %in)
-  ret float %max
-}
-
-define float @test_fmaxv_v4f32(<4 x float> %in) {
-; CHECK: test_fmaxv_v4f32:
-; CHECK: fmaxv s0, v0.4s
-  %max = call float @llvm.arm64.neon.fmaxv.f32.v4f32(<4 x float> %in)
-  ret float %max
-}
-
-define double @test_fmaxv_v2f64(<2 x double> %in) {
-; CHECK: test_fmaxv_v2f64:
-; CHECK: fmaxp d0, v0.2d
-  %max = call double @llvm.arm64.neon.fmaxv.f64.v2f64(<2 x double> %in)
-  ret double %max
-}
-
-declare float @llvm.arm64.neon.fmaxv.f32.v2f32(<2 x float>)
-declare float @llvm.arm64.neon.fmaxv.f32.v4f32(<4 x float>)
-declare double @llvm.arm64.neon.fmaxv.f64.v2f64(<2 x double>)
-
-define float @test_fminnmv_v2f32(<2 x float> %in) {
-; CHECK: test_fminnmv_v2f32:
-; CHECK: fminnmp s0, v0.2s
-  %minnm = call float @llvm.arm64.neon.fminnmv.f32.v2f32(<2 x float> %in)
-  ret float %minnm
-}
-
-define float @test_fminnmv_v4f32(<4 x float> %in) {
-; CHECK: test_fminnmv_v4f32:
-; CHECK: fminnmv s0, v0.4s
-  %minnm = call float @llvm.arm64.neon.fminnmv.f32.v4f32(<4 x float> %in)
-  ret float %minnm
-}
-
-define double @test_fminnmv_v2f64(<2 x double> %in) {
-; CHECK: test_fminnmv_v2f64:
-; CHECK: fminnmp d0, v0.2d
-  %minnm = call double @llvm.arm64.neon.fminnmv.f64.v2f64(<2 x double> %in)
-  ret double %minnm
-}
-
-declare float @llvm.arm64.neon.fminnmv.f32.v2f32(<2 x float>)
-declare float @llvm.arm64.neon.fminnmv.f32.v4f32(<4 x float>)
-declare double @llvm.arm64.neon.fminnmv.f64.v2f64(<2 x double>)
-
-define float @test_fmaxnmv_v2f32(<2 x float> %in) {
-; CHECK: test_fmaxnmv_v2f32:
-; CHECK: fmaxnmp s0, v0.2s
-  %maxnm = call float @llvm.arm64.neon.fmaxnmv.f32.v2f32(<2 x float> %in)
-  ret float %maxnm
-}
-
-define float @test_fmaxnmv_v4f32(<4 x float> %in) {
-; CHECK: test_fmaxnmv_v4f32:
-; CHECK: fmaxnmv s0, v0.4s
-  %maxnm = call float @llvm.arm64.neon.fmaxnmv.f32.v4f32(<4 x float> %in)
-  ret float %maxnm
-}
-
-define double @test_fmaxnmv_v2f64(<2 x double> %in) {
-; CHECK: test_fmaxnmv_v2f64:
-; CHECK: fmaxnmp d0, v0.2d
-  %maxnm = call double @llvm.arm64.neon.fmaxnmv.f64.v2f64(<2 x double> %in)
-  ret double %maxnm
-}
-
-declare float @llvm.arm64.neon.fmaxnmv.f32.v2f32(<2 x float>)
-declare float @llvm.arm64.neon.fmaxnmv.f32.v4f32(<4 x float>)
-declare double @llvm.arm64.neon.fmaxnmv.f64.v2f64(<2 x double>)
diff --git a/test/CodeGen/ARM64/fmuladd.ll b/test/CodeGen/ARM64/fmuladd.ll
deleted file mode 100644
index 174d830..0000000
--- a/test/CodeGen/ARM64/fmuladd.ll
+++ /dev/null
@@ -1,88 +0,0 @@
-; RUN: llc -asm-verbose=false < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-define float @test_f32(float* %A, float* %B, float* %C) nounwind {
-;CHECK-LABEL: test_f32:
-;CHECK: fmadd
-;CHECK-NOT: fmadd
-  %tmp1 = load float* %A
-  %tmp2 = load float* %B
-  %tmp3 = load float* %C
-  %tmp4 = call float @llvm.fmuladd.f32(float %tmp1, float %tmp2, float %tmp3)
-  ret float %tmp4
-}
-
-define <2 x float> @test_v2f32(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) nounwind {
-;CHECK-LABEL: test_v2f32:
-;CHECK: fmla.2s
-;CHECK-NOT: fmla.2s
-  %tmp1 = load <2 x float>* %A
-  %tmp2 = load <2 x float>* %B
-  %tmp3 = load <2 x float>* %C
-  %tmp4 = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %tmp1, <2 x float> %tmp2, <2 x float> %tmp3)
-  ret <2 x float> %tmp4
-}
-
-define <4 x float> @test_v4f32(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind {
-;CHECK-LABEL: test_v4f32:
-;CHECK: fmla.4s
-;CHECK-NOT: fmla.4s
-  %tmp1 = load <4 x float>* %A
-  %tmp2 = load <4 x float>* %B
-  %tmp3 = load <4 x float>* %C
-  %tmp4 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %tmp1, <4 x float> %tmp2, <4 x float> %tmp3)
-  ret <4 x float> %tmp4
-}
-
-define <8 x float> @test_v8f32(<8 x float>* %A, <8 x float>* %B, <8 x float>* %C) nounwind {
-;CHECK-LABEL: test_v8f32:
-;CHECK: fmla.4s
-;CHECK: fmla.4s
-;CHECK-NOT: fmla.4s
-  %tmp1 = load <8 x float>* %A
-  %tmp2 = load <8 x float>* %B
-  %tmp3 = load <8 x float>* %C
-  %tmp4 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %tmp1, <8 x float> %tmp2, <8 x float> %tmp3)
-  ret <8 x float> %tmp4
-}
-
-define double @test_f64(double* %A, double* %B, double* %C) nounwind {
-;CHECK-LABEL: test_f64:
-;CHECK: fmadd
-;CHECK-NOT: fmadd
-  %tmp1 = load double* %A
-  %tmp2 = load double* %B
-  %tmp3 = load double* %C
-  %tmp4 = call double @llvm.fmuladd.f64(double %tmp1, double %tmp2, double %tmp3)
-  ret double %tmp4
-}
-
-define <2 x double> @test_v2f64(<2 x double>* %A, <2 x double>* %B, <2 x double>* %C) nounwind {
-;CHECK-LABEL: test_v2f64:
-;CHECK: fmla.2d
-;CHECK-NOT: fmla.2d
-  %tmp1 = load <2 x double>* %A
-  %tmp2 = load <2 x double>* %B
-  %tmp3 = load <2 x double>* %C
-  %tmp4 = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %tmp1, <2 x double> %tmp2, <2 x double> %tmp3)
-  ret <2 x double> %tmp4
-}
-
-define <4 x double> @test_v4f64(<4 x double>* %A, <4 x double>* %B, <4 x double>* %C) nounwind {
-;CHECK-LABEL: test_v4f64:
-;CHECK: fmla.2d
-;CHECK: fmla.2d
-;CHECK-NOT: fmla.2d
-  %tmp1 = load <4 x double>* %A
-  %tmp2 = load <4 x double>* %B
-  %tmp3 = load <4 x double>* %C
-  %tmp4 = call <4 x double> @llvm.fmuladd.v4f64(<4 x double> %tmp1, <4 x double> %tmp2, <4 x double> %tmp3)
-  ret <4 x double> %tmp4
-}
-
-declare float @llvm.fmuladd.f32(float, float, float) nounwind readnone
-declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone
-declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
-declare <8 x float> @llvm.fmuladd.v8f32(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
-declare double @llvm.fmuladd.f64(double, double, double) nounwind readnone
-declare <2 x double> @llvm.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
-declare <4 x double> @llvm.fmuladd.v4f64(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
diff --git a/test/CodeGen/ARM64/fold-lsl.ll b/test/CodeGen/ARM64/fold-lsl.ll
deleted file mode 100644
index a856c96..0000000
--- a/test/CodeGen/ARM64/fold-lsl.ll
+++ /dev/null
@@ -1,79 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-;
-; <rdar://problem/14486451>
-
-%struct.a = type [256 x i16]
-%struct.b = type [256 x i32]
-%struct.c = type [256 x i64]
-
-define i16 @load_halfword(%struct.a* %ctx, i32 %xor72) nounwind {
-; CHECK-LABEL: load_halfword:
-; CHECK: ubfm [[REG:x[0-9]+]], x1, #9, #16
-; CHECK: ldrh w0, [x0, [[REG]], lsl #1]
-  %shr81 = lshr i32 %xor72, 9
-  %conv82 = zext i32 %shr81 to i64
-  %idxprom83 = and i64 %conv82, 255
-  %arrayidx86 = getelementptr inbounds %struct.a* %ctx, i64 0, i64 %idxprom83
-  %result = load i16* %arrayidx86, align 2
-  ret i16 %result
-}
-
-define i32 @load_word(%struct.b* %ctx, i32 %xor72) nounwind {
-; CHECK-LABEL: load_word:
-; CHECK: ubfm [[REG:x[0-9]+]], x1, #9, #16
-; CHECK: ldr w0, [x0, [[REG]], lsl #2]
-  %shr81 = lshr i32 %xor72, 9
-  %conv82 = zext i32 %shr81 to i64
-  %idxprom83 = and i64 %conv82, 255
-  %arrayidx86 = getelementptr inbounds %struct.b* %ctx, i64 0, i64 %idxprom83
-  %result = load i32* %arrayidx86, align 4
-  ret i32 %result
-}
-
-define i64 @load_doubleword(%struct.c* %ctx, i32 %xor72) nounwind {
-; CHECK-LABEL: load_doubleword:
-; CHECK: ubfm [[REG:x[0-9]+]], x1, #9, #16
-; CHECK: ldr x0, [x0, [[REG]], lsl #3]
-  %shr81 = lshr i32 %xor72, 9
-  %conv82 = zext i32 %shr81 to i64
-  %idxprom83 = and i64 %conv82, 255
-  %arrayidx86 = getelementptr inbounds %struct.c* %ctx, i64 0, i64 %idxprom83
-  %result = load i64* %arrayidx86, align 8
-  ret i64 %result
-}
-
-define void @store_halfword(%struct.a* %ctx, i32 %xor72, i16 %val) nounwind {
-; CHECK-LABEL: store_halfword:
-; CHECK: ubfm [[REG:x[0-9]+]], x1, #9, #16
-; CHECK: strh w2, [x0, [[REG]], lsl #1]
-  %shr81 = lshr i32 %xor72, 9
-  %conv82 = zext i32 %shr81 to i64
-  %idxprom83 = and i64 %conv82, 255
-  %arrayidx86 = getelementptr inbounds %struct.a* %ctx, i64 0, i64 %idxprom83
-  store i16 %val, i16* %arrayidx86, align 8
-  ret void
-}
-
-define void @store_word(%struct.b* %ctx, i32 %xor72, i32 %val) nounwind {
-; CHECK-LABEL: store_word:
-; CHECK: ubfm [[REG:x[0-9]+]], x1, #9, #16
-; CHECK: str w2, [x0, [[REG]], lsl #2]
-  %shr81 = lshr i32 %xor72, 9
-  %conv82 = zext i32 %shr81 to i64
-  %idxprom83 = and i64 %conv82, 255
-  %arrayidx86 = getelementptr inbounds %struct.b* %ctx, i64 0, i64 %idxprom83
-  store i32 %val, i32* %arrayidx86, align 8
-  ret void
-}
-
-define void @store_doubleword(%struct.c* %ctx, i32 %xor72, i64 %val) nounwind {
-; CHECK-LABEL: store_doubleword:
-; CHECK: ubfm [[REG:x[0-9]+]], x1, #9, #16
-; CHECK: str x2, [x0, [[REG]], lsl #3]
-  %shr81 = lshr i32 %xor72, 9
-  %conv82 = zext i32 %shr81 to i64
-  %idxprom83 = and i64 %conv82, 255
-  %arrayidx86 = getelementptr inbounds %struct.c* %ctx, i64 0, i64 %idxprom83
-  store i64 %val, i64* %arrayidx86, align 8
-  ret void
-}
diff --git a/test/CodeGen/ARM64/fp128.ll b/test/CodeGen/ARM64/fp128.ll
deleted file mode 100644
index 21eb893..0000000
--- a/test/CodeGen/ARM64/fp128.ll
+++ /dev/null
@@ -1,274 +0,0 @@
-; RUN: llc -mtriple=arm64-linux-gnu -verify-machineinstrs < %s | FileCheck %s
-
-@lhs = global fp128 zeroinitializer, align 16
-@rhs = global fp128 zeroinitializer, align 16
-
-define fp128 @test_add() {
-; CHECK-LABEL: test_add:
-
-  %lhs = load fp128* @lhs, align 16
-  %rhs = load fp128* @rhs, align 16
-; CHECK: ldr q0, [{{x[0-9]+}}, :lo12:lhs]
-; CHECK: ldr q1, [{{x[0-9]+}}, :lo12:rhs]
-
-  %val = fadd fp128 %lhs, %rhs
-; CHECK: bl __addtf3
-  ret fp128 %val
-}
-
-define fp128 @test_sub() {
-; CHECK-LABEL: test_sub:
-
-  %lhs = load fp128* @lhs, align 16
-  %rhs = load fp128* @rhs, align 16
-; CHECK: ldr q0, [{{x[0-9]+}}, :lo12:lhs]
-; CHECK: ldr q1, [{{x[0-9]+}}, :lo12:rhs]
-
-  %val = fsub fp128 %lhs, %rhs
-; CHECK: bl __subtf3
-  ret fp128 %val
-}
-
-define fp128 @test_mul() {
-; CHECK-LABEL: test_mul:
-
-  %lhs = load fp128* @lhs, align 16
-  %rhs = load fp128* @rhs, align 16
-; CHECK: ldr q0, [{{x[0-9]+}}, :lo12:lhs]
-; CHECK: ldr q1, [{{x[0-9]+}}, :lo12:rhs]
-
-  %val = fmul fp128 %lhs, %rhs
-; CHECK: bl __multf3
-  ret fp128 %val
-}
-
-define fp128 @test_div() {
-; CHECK-LABEL: test_div:
-
-  %lhs = load fp128* @lhs, align 16
-  %rhs = load fp128* @rhs, align 16
-; CHECK: ldr q0, [{{x[0-9]+}}, :lo12:lhs]
-; CHECK: ldr q1, [{{x[0-9]+}}, :lo12:rhs]
-
-  %val = fdiv fp128 %lhs, %rhs
-; CHECK: bl __divtf3
-  ret fp128 %val
-}
-
-@var32 = global i32 0
-@var64 = global i64 0
-
-define void @test_fptosi() {
-; CHECK-LABEL: test_fptosi:
-  %val = load fp128* @lhs, align 16
-
-  %val32 = fptosi fp128 %val to i32
-  store i32 %val32, i32* @var32
-; CHECK: bl __fixtfsi
-
-  %val64 = fptosi fp128 %val to i64
-  store i64 %val64, i64* @var64
-; CHECK: bl __fixtfdi
-
-  ret void
-}
-
-define void @test_fptoui() {
-; CHECK-LABEL: test_fptoui:
-  %val = load fp128* @lhs, align 16
-
-  %val32 = fptoui fp128 %val to i32
-  store i32 %val32, i32* @var32
-; CHECK: bl __fixunstfsi
-
-  %val64 = fptoui fp128 %val to i64
-  store i64 %val64, i64* @var64
-; CHECK: bl __fixunstfdi
-
-  ret void
-}
-
-define void @test_sitofp() {
-; CHECK-LABEL: test_sitofp:
-
-  %src32 = load i32* @var32
-  %val32 = sitofp i32 %src32 to fp128
-  store volatile fp128 %val32, fp128* @lhs
-; CHECK: bl __floatsitf
-
-  %src64 = load i64* @var64
-  %val64 = sitofp i64 %src64 to fp128
-  store volatile fp128 %val64, fp128* @lhs
-; CHECK: bl __floatditf
-
-  ret void
-}
-
-define void @test_uitofp() {
-; CHECK-LABEL: test_uitofp:
-
-  %src32 = load i32* @var32
-  %val32 = uitofp i32 %src32 to fp128
-  store volatile fp128 %val32, fp128* @lhs
-; CHECK: bl __floatunsitf
-
-  %src64 = load i64* @var64
-  %val64 = uitofp i64 %src64 to fp128
-  store volatile fp128 %val64, fp128* @lhs
-; CHECK: bl __floatunditf
-
-  ret void
-}
-
-define i1 @test_setcc1() {
-; CHECK-LABEL: test_setcc1:
-
-  %lhs = load fp128* @lhs, align 16
-  %rhs = load fp128* @rhs, align 16
-; CHECK: ldr q0, [{{x[0-9]+}}, :lo12:lhs]
-; CHECK: ldr q1, [{{x[0-9]+}}, :lo12:rhs]
-
-; Technically, everything after the call to __letf2 is redundant, but we'll let
-; LLVM have its fun for now.
-  %val = fcmp ole fp128 %lhs, %rhs
-; CHECK: bl __letf2
-; CHECK: cmp w0, #0
-; CHECK: csinc w0, wzr, wzr, gt
-
-  ret i1 %val
-; CHECK: ret
-}
-
-define i1 @test_setcc2() {
-; CHECK-LABEL: test_setcc2:
-
-  %lhs = load fp128* @lhs, align 16
-  %rhs = load fp128* @rhs, align 16
-; CHECK: ldr q0, [{{x[0-9]+}}, :lo12:lhs]
-; CHECK: ldr q1, [{{x[0-9]+}}, :lo12:rhs]
-
-  %val = fcmp ugt fp128 %lhs, %rhs
-; CHECK: bl      __gttf2
-; CHECK: cmp     w0, #0
-; CHECK: csinc   [[GT:w[0-9]+]], wzr, wzr, le
-
-; CHECK: bl      __unordtf2
-; CHECK: cmp     w0, #0
-; CHECK: csinc   [[UNORDERED:w[0-9]+]], wzr, wzr, eq
-; CHECK: orr     w0, [[UNORDERED]], [[GT]]
-
-  ret i1 %val
-; CHECK: ret
-}
-
-define i32 @test_br_cc() {
-; CHECK-LABEL: test_br_cc:
-
-  %lhs = load fp128* @lhs, align 16
-  %rhs = load fp128* @rhs, align 16
-; CHECK: ldr q0, [{{x[0-9]+}}, :lo12:lhs]
-; CHECK: ldr q1, [{{x[0-9]+}}, :lo12:rhs]
-
-  ; olt == !uge, which LLVM unfortunately "optimizes" this to.
-  %cond = fcmp olt fp128 %lhs, %rhs
-; CHECK: bl      __getf2
-; CHECK: cmp     w0, #0
-; CHECK: csinc   [[OGE:w[0-9]+]], wzr, wzr, lt
-
-; CHECK: bl      __unordtf2
-; CHECK: cmp     w0, #0
-; CHECK: csinc   [[UNORDERED:w[0-9]+]], wzr, wzr, eq
-
-; CHECK: orr     [[UGE:w[0-9]+]], [[UNORDERED]], [[OGE]]
-; CHECK: cbnz [[UGE]], [[RET29:.LBB[0-9]+_[0-9]+]]
-  br i1 %cond, label %iftrue, label %iffalse
-
-iftrue:
-  ret i32 42
-; CHECK-NEXT: BB#
-; CHECK-NEXT: movz w0, #42
-; CHECK-NEXT: b [[REALRET:.LBB[0-9]+_[0-9]+]]
-
-iffalse:
-  ret i32 29
-; CHECK: [[RET29]]:
-; CHECK-NEXT: movz w0, #29
-; CHECK-NEXT: [[REALRET]]:
-; CHECK: ret
-}
-
-define void @test_select(i1 %cond, fp128 %lhs, fp128 %rhs) {
-; CHECK-LABEL: test_select:
-
-  %val = select i1 %cond, fp128 %lhs, fp128 %rhs
-  store fp128 %val, fp128* @lhs, align 16
-; CHECK: and [[BIT:w[0-9]+]], w0, #0x1
-; CHECK: cmp [[BIT]], #0
-; CHECK-NEXT: b.eq [[IFFALSE:.LBB[0-9]+_[0-9]+]]
-; CHECK-NEXT: BB#
-; CHECK-NEXT: orr v[[VAL:[0-9]+]].16b, v0.16b, v0.16b
-; CHECK-NEXT: [[IFFALSE]]:
-; CHECK: str q[[VAL]], [{{x[0-9]+}}, :lo12:lhs]
-  ret void
-; CHECK: ret
-}
-
-@varfloat = global float 0.0, align 4
-@vardouble = global double 0.0, align 8
-
-define void @test_round() {
-; CHECK-LABEL: test_round:
-
-  %val = load fp128* @lhs, align 16
-
-  %float = fptrunc fp128 %val to float
-  store float %float, float* @varfloat, align 4
-; CHECK: bl __trunctfsf2
-; CHECK: str s0, [{{x[0-9]+}}, :lo12:varfloat]
-
-  %double = fptrunc fp128 %val to double
-  store double %double, double* @vardouble, align 8
-; CHECK: bl __trunctfdf2
-; CHECK: str d0, [{{x[0-9]+}}, :lo12:vardouble]
-
-  ret void
-}
-
-define void @test_extend() {
-; CHECK-LABEL: test_extend:
-
-  %val = load fp128* @lhs, align 16
-
-  %float = load float* @varfloat
-  %fromfloat = fpext float %float to fp128
-  store volatile fp128 %fromfloat, fp128* @lhs, align 16
-; CHECK: bl __extendsftf2
-; CHECK: str q0, [{{x[0-9]+}}, :lo12:lhs]
-
-  %double = load double* @vardouble
-  %fromdouble = fpext double %double to fp128
-  store volatile fp128 %fromdouble, fp128* @lhs, align 16
-; CHECK: bl __extenddftf2
-; CHECK: str q0, [{{x[0-9]+}}, :lo12:lhs]
-
-  ret void
-; CHECK: ret
-}
-
-define fp128 @test_neg(fp128 %in) {
-; CHECK: [[MINUS0:.LCPI[0-9]+_0]]:
-; Make sure the weird hex constant below *is* -0.0
-; CHECK-NEXT: fp128 -0
-
-; CHECK-LABEL: test_neg:
-
-  ; Could in principle be optimized to fneg which we can't select, this makes
-  ; sure that doesn't happen.
-  %ret = fsub fp128 0xL00000000000000008000000000000000, %in
-; CHECK: orr v1.16b, v0.16b, v0.16b
-; CHECK: ldr q0, [{{x[0-9]+}}, :lo12:[[MINUS0]]]
-; CHECK: bl __subtf3
-
-  ret fp128 %ret
-; CHECK: ret
-}
diff --git a/test/CodeGen/ARM64/frameaddr.ll b/test/CodeGen/ARM64/frameaddr.ll
deleted file mode 100644
index d0635ad..0000000
--- a/test/CodeGen/ARM64/frameaddr.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llc < %s -march=arm64 | FileCheck %s
-
-define i8* @t() nounwind {
-entry:
-; CHECK-LABEL: t:
-; CHECK: stp fp, lr, [sp, #-16]!
-; CHECK: mov fp, sp
-; CHECK: mov x0, fp
-; CHECK: ldp fp, lr, [sp], #16
-; CHECK: ret
-	%0 = call i8* @llvm.frameaddress(i32 0)
-        ret i8* %0
-}
-
-declare i8* @llvm.frameaddress(i32) nounwind readnone
diff --git a/test/CodeGen/ARM64/hello.ll b/test/CodeGen/ARM64/hello.ll
deleted file mode 100644
index f870fff..0000000
--- a/test/CodeGen/ARM64/hello.ll
+++ /dev/null
@@ -1,38 +0,0 @@
-; RUN: llc < %s -mtriple=arm64-apple-ios7.0 | FileCheck %s
-; RUN: llc < %s -mtriple=arm64-linux-gnu | FileCheck %s --check-prefix=CHECK-LINUX
-
-; CHECK-LABEL: main:
-; CHECK:	stp	fp, lr, [sp, #-16]!
-; CHECK-NEXT:	mov	fp, sp
-; CHECK-NEXT:	sub	sp, sp, #16
-; CHECK-NEXT:	stur	wzr, [fp, #-4]
-; CHECK:	adrp	x0, L_.str@PAGE
-; CHECK:	add	x0, x0, L_.str@PAGEOFF
-; CHECK-NEXT:	bl	_puts
-; CHECK-NEXT:	mov	sp, fp
-; CHECK-NEXT:	ldp	fp, lr, [sp], #16
-; CHECK-NEXT:	ret
-
-; CHECK-LINUX-LABEL: main:
-; CHECK-LINUX:	stp	fp, lr, [sp, #-16]!
-; CHECK-LINUX-NEXT:	mov	fp, sp
-; CHECK-LINUX-NEXT:	sub	sp, sp, #16
-; CHECK-LINUX-NEXT:	stur	wzr, [fp, #-4]
-; CHECK-LINUX:	adrp	x0, .L.str
-; CHECK-LINUX:	add	x0, x0, :lo12:.L.str
-; CHECK-LINUX-NEXT:	bl	puts
-; CHECK-LINUX-NEXT:	mov	sp, fp
-; CHECK-LINUX-NEXT:	ldp	fp, lr, [sp], #16
-; CHECK-LINUX-NEXT:	ret
-
-@.str = private unnamed_addr constant [7 x i8] c"hello\0A\00"
-
-define i32 @main() nounwind ssp {
-entry:
-  %retval = alloca i32, align 4
-  store i32 0, i32* %retval
-  %call = call i32 @puts(i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0))
-  ret i32 %call
-}
-
-declare i32 @puts(i8*)
diff --git a/test/CodeGen/ARM64/i16-subreg-extract.ll b/test/CodeGen/ARM64/i16-subreg-extract.ll
deleted file mode 100644
index fc2e8b5..0000000
--- a/test/CodeGen/ARM64/i16-subreg-extract.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-define i32 @foo(<4 x i16>* %__a) nounwind {
-; CHECK-LABEL: foo:
-; CHECK: umov.h w{{[0-9]+}}, v{{[0-9]+}}[0]
-  %tmp18 = load <4 x i16>* %__a, align 8
-  %vget_lane = extractelement <4 x i16> %tmp18, i32 0
-  %conv = zext i16 %vget_lane to i32
-  %mul = mul nsw i32 3, %conv
-  ret i32 %mul
-}
-
diff --git a/test/CodeGen/ARM64/icmp-opt.ll b/test/CodeGen/ARM64/icmp-opt.ll
deleted file mode 100644
index f88399b..0000000
--- a/test/CodeGen/ARM64/icmp-opt.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: llc < %s -march=arm64 | FileCheck %s
-
-; Optimize (x > -1) to (x >= 0) etc.
-; Optimize (cmp (add / sub), 0): eliminate the subs used to update flag
-;   for comparison only
-; rdar://10233472
-
-define i32 @t1(i64 %a) nounwind ssp {
-entry:
-; CHECK-LABEL: t1:
-; CHECK-NOT: movn
-; CHECK: cmp  x0, #0
-; CHECK: csinc w0, wzr, wzr, lt
-  %cmp = icmp sgt i64 %a, -1
-  %conv = zext i1 %cmp to i32
-  ret i32 %conv
-}
diff --git a/test/CodeGen/ARM64/indexed-memory.ll b/test/CodeGen/ARM64/indexed-memory.ll
deleted file mode 100644
index e390ed7..0000000
--- a/test/CodeGen/ARM64/indexed-memory.ll
+++ /dev/null
@@ -1,351 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-redzone | FileCheck %s
-
-define void @store64(i64** nocapture %out, i64 %index, i64 %spacing) nounwind noinline ssp {
-; CHECK-LABEL: store64:
-; CHECK: str x{{[0-9+]}}, [x{{[0-9+]}}], #8
-; CHECK: ret
-  %tmp = load i64** %out, align 8
-  %incdec.ptr = getelementptr inbounds i64* %tmp, i64 1
-  store i64 %spacing, i64* %tmp, align 4
-  store i64* %incdec.ptr, i64** %out, align 8
-  ret void
-}
-
-define void @store32(i32** nocapture %out, i32 %index, i32 %spacing) nounwind noinline ssp {
-; CHECK-LABEL: store32:
-; CHECK: str w{{[0-9+]}}, [x{{[0-9+]}}], #4
-; CHECK: ret
-  %tmp = load i32** %out, align 8
-  %incdec.ptr = getelementptr inbounds i32* %tmp, i64 1
-  store i32 %spacing, i32* %tmp, align 4
-  store i32* %incdec.ptr, i32** %out, align 8
-  ret void
-}
-
-define void @store16(i16** nocapture %out, i16 %index, i16 %spacing) nounwind noinline ssp {
-; CHECK-LABEL: store16:
-; CHECK: strh w{{[0-9+]}}, [x{{[0-9+]}}], #2
-; CHECK: ret
-  %tmp = load i16** %out, align 8
-  %incdec.ptr = getelementptr inbounds i16* %tmp, i64 1
-  store i16 %spacing, i16* %tmp, align 4
-  store i16* %incdec.ptr, i16** %out, align 8
-  ret void
-}
-
-define void @store8(i8** nocapture %out, i8 %index, i8 %spacing) nounwind noinline ssp {
-; CHECK-LABEL: store8:
-; CHECK: strb w{{[0-9+]}}, [x{{[0-9+]}}], #1
-; CHECK: ret
-  %tmp = load i8** %out, align 8
-  %incdec.ptr = getelementptr inbounds i8* %tmp, i64 1
-  store i8 %spacing, i8* %tmp, align 4
-  store i8* %incdec.ptr, i8** %out, align 8
-  ret void
-}
-
-define void @truncst64to32(i32** nocapture %out, i32 %index, i64 %spacing) nounwind noinline ssp {
-; CHECK-LABEL: truncst64to32:
-; CHECK: str w{{[0-9+]}}, [x{{[0-9+]}}], #4
-; CHECK: ret
-  %tmp = load i32** %out, align 8
-  %incdec.ptr = getelementptr inbounds i32* %tmp, i64 1
-  %trunc = trunc i64 %spacing to i32
-  store i32 %trunc, i32* %tmp, align 4
-  store i32* %incdec.ptr, i32** %out, align 8
-  ret void
-}
-
-define void @truncst64to16(i16** nocapture %out, i16 %index, i64 %spacing) nounwind noinline ssp {
-; CHECK-LABEL: truncst64to16:
-; CHECK: strh w{{[0-9+]}}, [x{{[0-9+]}}], #2
-; CHECK: ret
-  %tmp = load i16** %out, align 8
-  %incdec.ptr = getelementptr inbounds i16* %tmp, i64 1
-  %trunc = trunc i64 %spacing to i16
-  store i16 %trunc, i16* %tmp, align 4
-  store i16* %incdec.ptr, i16** %out, align 8
-  ret void
-}
-
-define void @truncst64to8(i8** nocapture %out, i8 %index, i64 %spacing) nounwind noinline ssp {
-; CHECK-LABEL: truncst64to8:
-; CHECK: strb w{{[0-9+]}}, [x{{[0-9+]}}], #1
-; CHECK: ret
-  %tmp = load i8** %out, align 8
-  %incdec.ptr = getelementptr inbounds i8* %tmp, i64 1
-  %trunc = trunc i64 %spacing to i8
-  store i8 %trunc, i8* %tmp, align 4
-  store i8* %incdec.ptr, i8** %out, align 8
-  ret void
-}
-
-
-define void @storef32(float** nocapture %out, float %index, float %spacing) nounwind noinline ssp {
-; CHECK-LABEL: storef32:
-; CHECK: str s{{[0-9+]}}, [x{{[0-9+]}}], #4
-; CHECK: ret
-  %tmp = load float** %out, align 8
-  %incdec.ptr = getelementptr inbounds float* %tmp, i64 1
-  store float %spacing, float* %tmp, align 4
-  store float* %incdec.ptr, float** %out, align 8
-  ret void
-}
-
-define void @storef64(double** nocapture %out, double %index, double %spacing) nounwind noinline ssp {
-; CHECK-LABEL: storef64:
-; CHECK: str d{{[0-9+]}}, [x{{[0-9+]}}], #8
-; CHECK: ret
-  %tmp = load double** %out, align 8
-  %incdec.ptr = getelementptr inbounds double* %tmp, i64 1
-  store double %spacing, double* %tmp, align 4
-  store double* %incdec.ptr, double** %out, align 8
-  ret void
-}
-
-define double * @pref64(double** nocapture %out, double %spacing) nounwind noinline ssp {
-; CHECK-LABEL: pref64:
-; CHECK: ldr     x0, [x0]
-; CHECK-NEXT: str     d0, [x0, #32]!
-; CHECK-NEXT: ret
-  %tmp = load double** %out, align 8
-  %ptr = getelementptr inbounds double* %tmp, i64 4
-  store double %spacing, double* %ptr, align 4
-  ret double *%ptr
-}
-
-define float * @pref32(float** nocapture %out, float %spacing) nounwind noinline ssp {
-; CHECK-LABEL: pref32:
-; CHECK: ldr     x0, [x0]
-; CHECK-NEXT: str     s0, [x0, #12]!
-; CHECK-NEXT: ret
-  %tmp = load float** %out, align 8
-  %ptr = getelementptr inbounds float* %tmp, i64 3
-  store float %spacing, float* %ptr, align 4
-  ret float *%ptr
-}
-
-define i64 * @pre64(i64** nocapture %out, i64 %spacing) nounwind noinline ssp {
-; CHECK-LABEL: pre64:
-; CHECK: ldr     x0, [x0]
-; CHECK-NEXT: str     x1, [x0, #16]!
-; CHECK-NEXT: ret
-  %tmp = load i64** %out, align 8
-  %ptr = getelementptr inbounds i64* %tmp, i64 2
-  store i64 %spacing, i64* %ptr, align 4
-  ret i64 *%ptr
-}
-
-define i32 * @pre32(i32** nocapture %out, i32 %spacing) nounwind noinline ssp {
-; CHECK-LABEL: pre32:
-; CHECK: ldr     x0, [x0]
-; CHECK-NEXT: str     w1, [x0, #8]!
-; CHECK-NEXT: ret
-  %tmp = load i32** %out, align 8
-  %ptr = getelementptr inbounds i32* %tmp, i64 2
-  store i32 %spacing, i32* %ptr, align 4
-  ret i32 *%ptr
-}
-
-define i16 * @pre16(i16** nocapture %out, i16 %spacing) nounwind noinline ssp {
-; CHECK-LABEL: pre16:
-; CHECK: ldr     x0, [x0]
-; CHECK-NEXT: strh    w1, [x0, #4]!
-; CHECK-NEXT: ret
-  %tmp = load i16** %out, align 8
-  %ptr = getelementptr inbounds i16* %tmp, i64 2
-  store i16 %spacing, i16* %ptr, align 4
-  ret i16 *%ptr
-}
-
-define i8 * @pre8(i8** nocapture %out, i8 %spacing) nounwind noinline ssp {
-; CHECK-LABEL: pre8:
-; CHECK: ldr     x0, [x0]
-; CHECK-NEXT: strb    w1, [x0, #2]!
-; CHECK-NEXT: ret
-  %tmp = load i8** %out, align 8
-  %ptr = getelementptr inbounds i8* %tmp, i64 2
-  store i8 %spacing, i8* %ptr, align 4
-  ret i8 *%ptr
-}
-
-define i32 * @pretrunc64to32(i32** nocapture %out, i64 %spacing) nounwind noinline ssp {
-; CHECK-LABEL: pretrunc64to32:
-; CHECK: ldr     x0, [x0]
-; CHECK-NEXT: str     w1, [x0, #8]!
-; CHECK-NEXT: ret
-  %tmp = load i32** %out, align 8
-  %ptr = getelementptr inbounds i32* %tmp, i64 2
-  %trunc = trunc i64 %spacing to i32
-  store i32 %trunc, i32* %ptr, align 4
-  ret i32 *%ptr
-}
-
-define i16 * @pretrunc64to16(i16** nocapture %out, i64 %spacing) nounwind noinline ssp {
-; CHECK-LABEL: pretrunc64to16:
-; CHECK: ldr     x0, [x0]
-; CHECK-NEXT: strh    w1, [x0, #4]!
-; CHECK-NEXT: ret
-  %tmp = load i16** %out, align 8
-  %ptr = getelementptr inbounds i16* %tmp, i64 2
-  %trunc = trunc i64 %spacing to i16
-  store i16 %trunc, i16* %ptr, align 4
-  ret i16 *%ptr
-}
-
-define i8 * @pretrunc64to8(i8** nocapture %out, i64 %spacing) nounwind noinline ssp {
-; CHECK-LABEL: pretrunc64to8:
-; CHECK: ldr     x0, [x0]
-; CHECK-NEXT: strb    w1, [x0, #2]!
-; CHECK-NEXT: ret
-  %tmp = load i8** %out, align 8
-  %ptr = getelementptr inbounds i8* %tmp, i64 2
-  %trunc = trunc i64 %spacing to i8
-  store i8 %trunc, i8* %ptr, align 4
-  ret i8 *%ptr
-}
-
-;-----
-; Pre-indexed loads
-;-----
-define double* @preidxf64(double* %src, double* %out) {
-; CHECK-LABEL: preidxf64:
-; CHECK: ldr     d0, [x0, #8]!
-; CHECK: str     d0, [x1]
-; CHECK: ret
-  %ptr = getelementptr inbounds double* %src, i64 1
-  %tmp = load double* %ptr, align 4
-  store double %tmp, double* %out, align 4
-  ret double* %ptr
-}
-
-define float* @preidxf32(float* %src, float* %out) {
-; CHECK-LABEL: preidxf32:
-; CHECK: ldr     s0, [x0, #4]!
-; CHECK: str     s0, [x1]
-; CHECK: ret
-  %ptr = getelementptr inbounds float* %src, i64 1
-  %tmp = load float* %ptr, align 4
-  store float %tmp, float* %out, align 4
-  ret float* %ptr
-}
-
-define i64* @preidx64(i64* %src, i64* %out) {
-; CHECK-LABEL: preidx64:
-; CHECK: ldr     x[[REG:[0-9]+]], [x0, #8]!
-; CHECK: str     x[[REG]], [x1]
-; CHECK: ret
-  %ptr = getelementptr inbounds i64* %src, i64 1
-  %tmp = load i64* %ptr, align 4
-  store i64 %tmp, i64* %out, align 4
-  ret i64* %ptr
-}
-
-define i32* @preidx32(i32* %src, i32* %out) {
-; CHECK: ldr     w[[REG:[0-9]+]], [x0, #4]!
-; CHECK: str     w[[REG]], [x1]
-; CHECK: ret
-  %ptr = getelementptr inbounds i32* %src, i64 1
-  %tmp = load i32* %ptr, align 4
-  store i32 %tmp, i32* %out, align 4
-  ret i32* %ptr
-}
-
-define i16* @preidx16zext32(i16* %src, i32* %out) {
-; CHECK: ldrh    w[[REG:[0-9]+]], [x0, #2]!
-; CHECK: str     w[[REG]], [x1]
-; CHECK: ret
-  %ptr = getelementptr inbounds i16* %src, i64 1
-  %tmp = load i16* %ptr, align 4
-  %ext = zext i16 %tmp to i32
-  store i32 %ext, i32* %out, align 4
-  ret i16* %ptr
-}
-
-define i16* @preidx16zext64(i16* %src, i64* %out) {
-; CHECK: ldrh    w[[REG:[0-9]+]], [x0, #2]!
-; CHECK: str     x[[REG]], [x1]
-; CHECK: ret
-  %ptr = getelementptr inbounds i16* %src, i64 1
-  %tmp = load i16* %ptr, align 4
-  %ext = zext i16 %tmp to i64
-  store i64 %ext, i64* %out, align 4
-  ret i16* %ptr
-}
-
-define i8* @preidx8zext32(i8* %src, i32* %out) {
-; CHECK: ldrb    w[[REG:[0-9]+]], [x0, #1]!
-; CHECK: str     w[[REG]], [x1]
-; CHECK: ret
-  %ptr = getelementptr inbounds i8* %src, i64 1
-  %tmp = load i8* %ptr, align 4
-  %ext = zext i8 %tmp to i32
-  store i32 %ext, i32* %out, align 4
-  ret i8* %ptr
-}
-
-define i8* @preidx8zext64(i8* %src, i64* %out) {
-; CHECK: ldrb    w[[REG:[0-9]+]], [x0, #1]!
-; CHECK: str     x[[REG]], [x1]
-; CHECK: ret
-  %ptr = getelementptr inbounds i8* %src, i64 1
-  %tmp = load i8* %ptr, align 4
-  %ext = zext i8 %tmp to i64
-  store i64 %ext, i64* %out, align 4
-  ret i8* %ptr
-}
-
-define i32* @preidx32sext64(i32* %src, i64* %out) {
-; CHECK: ldrsw   x[[REG:[0-9]+]], [x0, #4]!
-; CHECK: str     x[[REG]], [x1]
-; CHECK: ret
-  %ptr = getelementptr inbounds i32* %src, i64 1
-  %tmp = load i32* %ptr, align 4
-  %ext = sext i32 %tmp to i64
-  store i64 %ext, i64* %out, align 8
-  ret i32* %ptr
-}
-
-define i16* @preidx16sext32(i16* %src, i32* %out) {
-; CHECK: ldrsh   w[[REG:[0-9]+]], [x0, #2]!
-; CHECK: str     w[[REG]], [x1]
-; CHECK: ret
-  %ptr = getelementptr inbounds i16* %src, i64 1
-  %tmp = load i16* %ptr, align 4
-  %ext = sext i16 %tmp to i32
-  store i32 %ext, i32* %out, align 4
-  ret i16* %ptr
-}
-
-define i16* @preidx16sext64(i16* %src, i64* %out) {
-; CHECK: ldrsh   x[[REG:[0-9]+]], [x0, #2]!
-; CHECK: str     x[[REG]], [x1]
-; CHECK: ret
-  %ptr = getelementptr inbounds i16* %src, i64 1
-  %tmp = load i16* %ptr, align 4
-  %ext = sext i16 %tmp to i64
-  store i64 %ext, i64* %out, align 4
-  ret i16* %ptr
-}
-
-define i8* @preidx8sext32(i8* %src, i32* %out) {
-; CHECK: ldrsb   w[[REG:[0-9]+]], [x0, #1]!
-; CHECK: str     w[[REG]], [x1]
-; CHECK: ret
-  %ptr = getelementptr inbounds i8* %src, i64 1
-  %tmp = load i8* %ptr, align 4
-  %ext = sext i8 %tmp to i32
-  store i32 %ext, i32* %out, align 4
-  ret i8* %ptr
-}
-
-define i8* @preidx8sext64(i8* %src, i64* %out) {
-; CHECK: ldrsb   x[[REG:[0-9]+]], [x0, #1]!
-; CHECK: str     x[[REG]], [x1]
-; CHECK: ret
-  %ptr = getelementptr inbounds i8* %src, i64 1
-  %tmp = load i8* %ptr, align 4
-  %ext = sext i8 %tmp to i64
-  store i64 %ext, i64* %out, align 4
-  ret i8* %ptr
-}
diff --git a/test/CodeGen/ARM64/inline-asm.ll b/test/CodeGen/ARM64/inline-asm.ll
deleted file mode 100644
index e645078..0000000
--- a/test/CodeGen/ARM64/inline-asm.ll
+++ /dev/null
@@ -1,230 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -no-integrated-as | FileCheck %s
-
-; rdar://9167275
-
-define i32 @t1() nounwind ssp {
-entry:
-; CHECK-LABEL: t1:
-; CHECK: mov {{w[0-9]+}}, 7
-  %0 = tail call i32 asm "mov ${0:w}, 7", "=r"() nounwind
-  ret i32 %0
-}
-
-define i64 @t2() nounwind ssp {
-entry:
-; CHECK-LABEL: t2:
-; CHECK: mov {{x[0-9]+}}, 7
-  %0 = tail call i64 asm "mov $0, 7", "=r"() nounwind
-  ret i64 %0
-}
-
-define i64 @t3() nounwind ssp {
-entry:
-; CHECK-LABEL: t3:
-; CHECK: mov {{w[0-9]+}}, 7
-  %0 = tail call i64 asm "mov ${0:w}, 7", "=r"() nounwind
-  ret i64 %0
-}
-
-; rdar://9281206
-
-define void @t4(i64 %op) nounwind {
-entry:
-; CHECK-LABEL: t4:
-; CHECK: mov x0, {{x[0-9]+}}; svc #0
-  %0 = tail call i64 asm sideeffect "mov x0, $1; svc #0;", "=r,r,r,~{x0}"(i64 %op, i64 undef) nounwind
-  ret void
-}
-
-; rdar://9394290
-
-define float @t5(float %x) nounwind {
-entry:
-; CHECK-LABEL: t5:
-; CHECK: fadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
-  %0 = tail call float asm "fadd ${0:s}, ${0:s}, ${0:s}", "=w,0"(float %x) nounwind
-  ret float %0
-}
-
-; rdar://9553599
-
-define zeroext i8 @t6(i8* %src) nounwind {
-entry:
-; CHECK-LABEL: t6:
-; CHECK: ldtrb {{w[0-9]+}}, [{{x[0-9]+}}]
-  %0 = tail call i8 asm "ldtrb ${0:w}, [$1]", "=r,r"(i8* %src) nounwind
-  ret i8 %0
-}
-
-define void @t7(i8* %f, i32 %g) nounwind {
-entry:
-  %f.addr = alloca i8*, align 8
-  store i8* %f, i8** %f.addr, align 8
-  ; CHECK-LABEL: t7:
-  ; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}]
-  call void asm "str ${1:w}, $0", "=*Q,r"(i8** %f.addr, i32 %g) nounwind
-  ret void
-}
-
-; rdar://10258229
-; ARM64TargetLowering::getRegForInlineAsmConstraint() should recognize 'v'
-; registers.
-define void @t8() nounwind ssp {
-entry:
-; CHECK-LABEL: t8:
-; CHECK: stp {{d[0-9]+}}, {{d[0-9]+}}, [sp, #-16]
-  tail call void asm sideeffect "nop", "~{v8}"() nounwind
-  ret void
-}
-
-define i32 @constraint_I(i32 %i, i32 %j) nounwind {
-entry:
-  ; CHECK-LABEL: constraint_I:
-  %0 = tail call i32 asm sideeffect "add ${0:w}, ${1:w}, $2", "=r,r,I"(i32 %i, i32 16773120) nounwind
-  ; CHECK: add   {{w[0-9]+}}, {{w[0-9]+}}, #16773120
-  %1 = tail call i32 asm sideeffect "add ${0:w}, ${1:w}, $2", "=r,r,I"(i32 %i, i32 4096) nounwind
-  ; CHECK: add   {{w[0-9]+}}, {{w[0-9]+}}, #4096
-  ret i32 %1
-}
-
-define i32 @constraint_J(i32 %i, i32 %j) nounwind {
-entry:
-  ; CHECK-LABEL: constraint_J:
-  %0 = tail call i32 asm sideeffect "sub ${0:w}, ${1:w}, $2", "=r,r,J"(i32 %i, i32 -16773120) nounwind
-  ; CHECK: sub   {{w[0-9]+}}, {{w[0-9]+}}, #4278194176
-  %1 = tail call i32 asm sideeffect "sub ${0:w}, ${1:w}, $2", "=r,r,J"(i32 %i, i32 -1) nounwind
-  ; CHECK: sub   {{w[0-9]+}}, {{w[0-9]+}}, #4294967295
-  ret i32 %1
-}
-
-define i32 @constraint_KL(i32 %i, i32 %j) nounwind {
-entry:
-  ; CHECK-LABEL: constraint_KL:
-  %0 = tail call i32 asm sideeffect "eor ${0:w}, ${1:w}, $2", "=r,r,K"(i32 %i, i32 255) nounwind
-  ; CHECK: eor {{w[0-9]+}}, {{w[0-9]+}}, #255
-  %1 = tail call i32 asm sideeffect "eor ${0:w}, ${1:w}, $2", "=r,r,L"(i32 %i, i64 16711680) nounwind
-  ; CHECK: eor {{w[0-9]+}}, {{w[0-9]+}}, #16711680
-  ret i32 %1
-}
-
-define i32 @constraint_MN(i32 %i, i32 %j) nounwind {
-entry:
-  ; CHECK-LABEL: constraint_MN:
-  %0 = tail call i32 asm sideeffect "movk ${0:w}, $1", "=r,M"(i32 65535) nounwind
-  ; CHECK: movk  {{w[0-9]+}}, #65535
-  %1 = tail call i32 asm sideeffect "movz ${0:w}, $1", "=r,N"(i64 0) nounwind
-  ; CHECK: movz  {{w[0-9]+}}, #0
-  ret i32 %1
-}
-
-define void @t9() nounwind {
-entry:
-  ; CHECK-LABEL: t9:
-  %data = alloca <2 x double>, align 16
-  %0 = load <2 x double>* %data, align 16
-  call void asm sideeffect "mov.2d v4, $0\0A", "w,~{v4}"(<2 x double> %0) nounwind
-  ; CHECK: mov.2d v4, {{v[0-9]+}}
-  ret void
-}
-
-define void @t10() nounwind {
-entry:
-  ; CHECK-LABEL: t10:
-  %data = alloca <2 x float>, align 8
-  %a = alloca [2 x float], align 4
-  %arraydecay = getelementptr inbounds [2 x float]* %a, i32 0, i32 0
-  %0 = load <2 x float>* %data, align 8
-  call void asm sideeffect "ldr ${1:q}, [$0]\0A", "r,w"(float* %arraydecay, <2 x float> %0) nounwind
-  ; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}]
-  call void asm sideeffect "ldr ${1:d}, [$0]\0A", "r,w"(float* %arraydecay, <2 x float> %0) nounwind
-  ; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}]
-  call void asm sideeffect "ldr ${1:s}, [$0]\0A", "r,w"(float* %arraydecay, <2 x float> %0) nounwind
-  ; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}]
-  call void asm sideeffect "ldr ${1:h}, [$0]\0A", "r,w"(float* %arraydecay, <2 x float> %0) nounwind
-  ; CHECK: ldr {{h[0-9]+}}, [{{x[0-9]+}}]
-  call void asm sideeffect "ldr ${1:b}, [$0]\0A", "r,w"(float* %arraydecay, <2 x float> %0) nounwind
-  ; CHECK: ldr {{b[0-9]+}}, [{{x[0-9]+}}]
-  ret void
-}
-
-define void @t11() nounwind {
-entry:
-  ; CHECK-LABEL: t11:
-  %a = alloca i32, align 4
-  %0 = load i32* %a, align 4
-  call void asm sideeffect "mov ${1:x}, ${0:x}\0A", "r,i"(i32 %0, i32 0) nounwind
-  ; CHECK: mov xzr, {{x[0-9]+}}
-  %1 = load i32* %a, align 4
-  call void asm sideeffect "mov ${1:w}, ${0:w}\0A", "r,i"(i32 %1, i32 0) nounwind
-  ; CHECK: mov wzr, {{w[0-9]+}}
-  ret void
-}
-
-define void @t12() nounwind {
-entry:
-  ; CHECK-LABEL: t12:
-  %data = alloca <4 x float>, align 16
-  %0 = load <4 x float>* %data, align 16
-  call void asm sideeffect "mov.2d v4, $0\0A", "x,~{v4}"(<4 x float> %0) nounwind
-  ; CHECK mov.2d v4, {{v([0-9])|(1[0-5])}}
-  ret void
-}
-
-define void @t13() nounwind {
-entry:
-  ; CHECK-LABEL: t13:
-  tail call void asm sideeffect "mov x4, $0\0A", "N"(i64 1311673391471656960) nounwind
-  ; CHECK: mov x4, #1311673391471656960
-  tail call void asm sideeffect "mov x4, $0\0A", "N"(i64 -4662) nounwind
-  ; CHECK: mov x4, #-4662
-  tail call void asm sideeffect "mov x4, $0\0A", "N"(i64 4660) nounwind
-  ; CHECK: mov x4, #4660
-  call void asm sideeffect "mov x4, $0\0A", "N"(i64 -71777214294589696) nounwind
-  ; CHECK: mov x4, #-71777214294589696
-  ret void
-}
-
-define void @t14() nounwind {
-entry:
-  ; CHECK-LABEL: t14:
-  tail call void asm sideeffect "mov w4, $0\0A", "M"(i32 305397760) nounwind
-  ; CHECK: mov w4, #305397760
-  tail call void asm sideeffect "mov w4, $0\0A", "M"(i32 -4662) nounwind
-  ; CHECK: mov w4, #4294962634
-  tail call void asm sideeffect "mov w4, $0\0A", "M"(i32 4660) nounwind
-  ; CHECK: mov w4, #4660
-  call void asm sideeffect "mov w4, $0\0A", "M"(i32 -16711936) nounwind
-  ; CHECK: mov w4, #4278255360
-  ret void
-}
-
-define void @t15() nounwind {
-entry:
-  %0 = tail call double asm sideeffect "fmov $0, d8", "=r"() nounwind
-  ; CHECK: fmov {{x[0-9]+}}, d8
-  ret void
-}
-
-; rdar://problem/14285178
-
-define void @test_zero_reg(i32* %addr) {
-; CHECK-LABEL: test_zero_reg:
-
-  tail call void asm sideeffect "USE($0)", "z"(i32 0) nounwind
-; CHECK: USE(xzr)
-
-  tail call void asm sideeffect "USE(${0:w})", "zr"(i32 0)
-; CHECK: USE(wzr)
-
-  tail call void asm sideeffect "USE(${0:w})", "zr"(i32 1)
-; CHECK: orr [[VAL1:w[0-9]+]], wzr, #0x1
-; CHECK: USE([[VAL1]])
-
-  tail call void asm sideeffect "USE($0), USE($1)", "z,z"(i32 0, i32 0) nounwind
-; CHECK: USE(xzr), USE(xzr)
-
-  tail call void asm sideeffect "USE($0), USE(${1:w})", "z,z"(i32 0, i32 0) nounwind
-; CHECK: USE(xzr), USE(wzr)
-
-  ret void
-}
diff --git a/test/CodeGen/ARM64/ld1.ll b/test/CodeGen/ARM64/ld1.ll
deleted file mode 100644
index 61836a1..0000000
--- a/test/CodeGen/ARM64/ld1.ll
+++ /dev/null
@@ -1,1345 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -verify-machineinstrs | FileCheck %s
-
-%struct.__neon_int8x8x2_t = type { <8 x i8>,  <8 x i8> }
-%struct.__neon_int8x8x3_t = type { <8 x i8>,  <8 x i8>,  <8 x i8> }
-%struct.__neon_int8x8x4_t = type { <8 x i8>,  <8 x i8>, <8 x i8>,  <8 x i8> }
-
-define %struct.__neon_int8x8x2_t @ld2_8b(i8* %A) nounwind {
-; CHECK-LABEL: ld2_8b
-; Make sure we are loading into the results defined by the ABI (i.e., v0, v1)
-; and from the argument of the function also defined by ABI (i.e., x0)
-; CHECK ld2.8b { v0, v1 }, [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm64.neon.ld2.v8i8.p0i8(i8* %A)
-	ret %struct.__neon_int8x8x2_t  %tmp2
-}
-
-define %struct.__neon_int8x8x3_t @ld3_8b(i8* %A) nounwind {
-; CHECK-LABEL: ld3_8b
-; Make sure we are using the operands defined by the ABI
-; CHECK ld3.8b { v0, v1, v2 }, [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int8x8x3_t @llvm.arm64.neon.ld3.v8i8.p0i8(i8* %A)
-	ret %struct.__neon_int8x8x3_t  %tmp2
-}
-
-define %struct.__neon_int8x8x4_t @ld4_8b(i8* %A) nounwind {
-; CHECK-LABEL: ld4_8b
-; Make sure we are using the operands defined by the ABI
-; CHECK ld4.8b { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm64.neon.ld4.v8i8.p0i8(i8* %A)
-	ret %struct.__neon_int8x8x4_t  %tmp2
-}
-
-declare %struct.__neon_int8x8x2_t @llvm.arm64.neon.ld2.v8i8.p0i8(i8*) nounwind readonly
-declare %struct.__neon_int8x8x3_t @llvm.arm64.neon.ld3.v8i8.p0i8(i8*) nounwind readonly
-declare %struct.__neon_int8x8x4_t @llvm.arm64.neon.ld4.v8i8.p0i8(i8*) nounwind readonly
-
-%struct.__neon_int8x16x2_t = type { <16 x i8>,  <16 x i8> }
-%struct.__neon_int8x16x3_t = type { <16 x i8>,  <16 x i8>,  <16 x i8> }
-%struct.__neon_int8x16x4_t = type { <16 x i8>,  <16 x i8>, <16 x i8>,  <16 x i8> }
-
-define %struct.__neon_int8x16x2_t @ld2_16b(i8* %A) nounwind {
-; CHECK-LABEL: ld2_16b
-; Make sure we are using the operands defined by the ABI
-; CHECK ld2.16b { v0, v1 }, [x0]
-; CHECK-NEXT ret
-  %tmp2 = call %struct.__neon_int8x16x2_t @llvm.arm64.neon.ld2.v16i8.p0i8(i8* %A)
-  ret %struct.__neon_int8x16x2_t  %tmp2
-}
-
-define %struct.__neon_int8x16x3_t @ld3_16b(i8* %A) nounwind {
-; CHECK-LABEL: ld3_16b
-; Make sure we are using the operands defined by the ABI
-; CHECK ld3.16b { v0, v1, v2 }, [x0]
-; CHECK-NEXT ret
-  %tmp2 = call %struct.__neon_int8x16x3_t @llvm.arm64.neon.ld3.v16i8.p0i8(i8* %A)
-  ret %struct.__neon_int8x16x3_t  %tmp2
-}
-
-define %struct.__neon_int8x16x4_t @ld4_16b(i8* %A) nounwind {
-; CHECK-LABEL: ld4_16b
-; Make sure we are using the operands defined by the ABI
-; CHECK ld4.16b { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT ret
-  %tmp2 = call %struct.__neon_int8x16x4_t @llvm.arm64.neon.ld4.v16i8.p0i8(i8* %A)
-  ret %struct.__neon_int8x16x4_t  %tmp2
-}
-
-declare %struct.__neon_int8x16x2_t @llvm.arm64.neon.ld2.v16i8.p0i8(i8*) nounwind readonly
-declare %struct.__neon_int8x16x3_t @llvm.arm64.neon.ld3.v16i8.p0i8(i8*) nounwind readonly
-declare %struct.__neon_int8x16x4_t @llvm.arm64.neon.ld4.v16i8.p0i8(i8*) nounwind readonly
-
-%struct.__neon_int16x4x2_t = type { <4 x i16>,  <4 x i16> }
-%struct.__neon_int16x4x3_t = type { <4 x i16>,  <4 x i16>,  <4 x i16> }
-%struct.__neon_int16x4x4_t = type { <4 x i16>,  <4 x i16>, <4 x i16>,  <4 x i16> }
-
-define %struct.__neon_int16x4x2_t @ld2_4h(i16* %A) nounwind {
-; CHECK-LABEL: ld2_4h
-; Make sure we are using the operands defined by the ABI
-; CHECK ld2.4h { v0, v1 }, [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm64.neon.ld2.v4i16.p0i16(i16* %A)
-	ret %struct.__neon_int16x4x2_t  %tmp2
-}
-
-define %struct.__neon_int16x4x3_t @ld3_4h(i16* %A) nounwind {
-; CHECK-LABEL: ld3_4h
-; Make sure we are using the operands defined by the ABI
-; CHECK ld3.4h { v0, v1, v2 }, [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm64.neon.ld3.v4i16.p0i16(i16* %A)
-	ret %struct.__neon_int16x4x3_t  %tmp2
-}
-
-define %struct.__neon_int16x4x4_t @ld4_4h(i16* %A) nounwind {
-; CHECK-LABEL: ld4_4h
-; Make sure we are using the operands defined by the ABI
-; CHECK ld4.4h { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm64.neon.ld4.v4i16.p0i16(i16* %A)
-	ret %struct.__neon_int16x4x4_t  %tmp2
-}
-
-declare %struct.__neon_int16x4x2_t @llvm.arm64.neon.ld2.v4i16.p0i16(i16*) nounwind readonly
-declare %struct.__neon_int16x4x3_t @llvm.arm64.neon.ld3.v4i16.p0i16(i16*) nounwind readonly
-declare %struct.__neon_int16x4x4_t @llvm.arm64.neon.ld4.v4i16.p0i16(i16*) nounwind readonly
-
-%struct.__neon_int16x8x2_t = type { <8 x i16>,  <8 x i16> }
-%struct.__neon_int16x8x3_t = type { <8 x i16>,  <8 x i16>,  <8 x i16> }
-%struct.__neon_int16x8x4_t = type { <8 x i16>,  <8 x i16>, <8 x i16>,  <8 x i16> }
-
-define %struct.__neon_int16x8x2_t @ld2_8h(i16* %A) nounwind {
-; CHECK-LABEL: ld2_8h
-; Make sure we are using the operands defined by the ABI
-; CHECK ld2.8h { v0, v1 }, [x0]
-; CHECK-NEXT ret
-  %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm64.neon.ld2.v8i16.p0i16(i16* %A)
-  ret %struct.__neon_int16x8x2_t  %tmp2
-}
-
-define %struct.__neon_int16x8x3_t @ld3_8h(i16* %A) nounwind {
-; CHECK-LABEL: ld3_8h
-; Make sure we are using the operands defined by the ABI
-; CHECK ld3.8h { v0, v1, v2 }, [x0]
-; CHECK-NEXT ret
-  %tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm64.neon.ld3.v8i16.p0i16(i16* %A)
-  ret %struct.__neon_int16x8x3_t %tmp2
-}
-
-define %struct.__neon_int16x8x4_t @ld4_8h(i16* %A) nounwind {
-; CHECK-LABEL: ld4_8h
-; Make sure we are using the operands defined by the ABI
-; CHECK ld4.8h { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT ret
-  %tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm64.neon.ld4.v8i16.p0i16(i16* %A)
-  ret %struct.__neon_int16x8x4_t  %tmp2
-}
-
-declare %struct.__neon_int16x8x2_t @llvm.arm64.neon.ld2.v8i16.p0i16(i16*) nounwind readonly
-declare %struct.__neon_int16x8x3_t @llvm.arm64.neon.ld3.v8i16.p0i16(i16*) nounwind readonly
-declare %struct.__neon_int16x8x4_t @llvm.arm64.neon.ld4.v8i16.p0i16(i16*) nounwind readonly
-
-%struct.__neon_int32x2x2_t = type { <2 x i32>,  <2 x i32> }
-%struct.__neon_int32x2x3_t = type { <2 x i32>,  <2 x i32>,  <2 x i32> }
-%struct.__neon_int32x2x4_t = type { <2 x i32>,  <2 x i32>, <2 x i32>,  <2 x i32> }
-
-define %struct.__neon_int32x2x2_t @ld2_2s(i32* %A) nounwind {
-; CHECK-LABEL: ld2_2s
-; Make sure we are using the operands defined by the ABI
-; CHECK ld2.2s { v0, v1 }, [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm64.neon.ld2.v2i32.p0i32(i32* %A)
-	ret %struct.__neon_int32x2x2_t  %tmp2
-}
-
-define %struct.__neon_int32x2x3_t @ld3_2s(i32* %A) nounwind {
-; CHECK-LABEL: ld3_2s
-; Make sure we are using the operands defined by the ABI
-; CHECK ld3.2s { v0, v1, v2 }, [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int32x2x3_t @llvm.arm64.neon.ld3.v2i32.p0i32(i32* %A)
-	ret %struct.__neon_int32x2x3_t  %tmp2
-}
-
-define %struct.__neon_int32x2x4_t @ld4_2s(i32* %A) nounwind {
-; CHECK-LABEL: ld4_2s
-; Make sure we are using the operands defined by the ABI
-; CHECK ld4.2s { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm64.neon.ld4.v2i32.p0i32(i32* %A)
-	ret %struct.__neon_int32x2x4_t  %tmp2
-}
-
-declare %struct.__neon_int32x2x2_t @llvm.arm64.neon.ld2.v2i32.p0i32(i32*) nounwind readonly
-declare %struct.__neon_int32x2x3_t @llvm.arm64.neon.ld3.v2i32.p0i32(i32*) nounwind readonly
-declare %struct.__neon_int32x2x4_t @llvm.arm64.neon.ld4.v2i32.p0i32(i32*) nounwind readonly
-
-%struct.__neon_int32x4x2_t = type { <4 x i32>,  <4 x i32> }
-%struct.__neon_int32x4x3_t = type { <4 x i32>,  <4 x i32>,  <4 x i32> }
-%struct.__neon_int32x4x4_t = type { <4 x i32>,  <4 x i32>, <4 x i32>,  <4 x i32> }
-
-define %struct.__neon_int32x4x2_t @ld2_4s(i32* %A) nounwind {
-; CHECK-LABEL: ld2_4s
-; Make sure we are using the operands defined by the ABI
-; CHECK ld2.4s { v0, v1 }, [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm64.neon.ld2.v4i32.p0i32(i32* %A)
-	ret %struct.__neon_int32x4x2_t  %tmp2
-}
-
-define %struct.__neon_int32x4x3_t @ld3_4s(i32* %A) nounwind {
-; CHECK-LABEL: ld3_4s
-; Make sure we are using the operands defined by the ABI
-; CHECK ld3.4s { v0, v1, v2 }, [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int32x4x3_t @llvm.arm64.neon.ld3.v4i32.p0i32(i32* %A)
-	ret %struct.__neon_int32x4x3_t  %tmp2
-}
-
-define %struct.__neon_int32x4x4_t @ld4_4s(i32* %A) nounwind {
-; CHECK-LABEL: ld4_4s
-; Make sure we are using the operands defined by the ABI
-; CHECK ld4.4s { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm64.neon.ld4.v4i32.p0i32(i32* %A)
-	ret %struct.__neon_int32x4x4_t  %tmp2
-}
-
-declare %struct.__neon_int32x4x2_t @llvm.arm64.neon.ld2.v4i32.p0i32(i32*) nounwind readonly
-declare %struct.__neon_int32x4x3_t @llvm.arm64.neon.ld3.v4i32.p0i32(i32*) nounwind readonly
-declare %struct.__neon_int32x4x4_t @llvm.arm64.neon.ld4.v4i32.p0i32(i32*) nounwind readonly
-
-%struct.__neon_int64x2x2_t = type { <2 x i64>,  <2 x i64> }
-%struct.__neon_int64x2x3_t = type { <2 x i64>,  <2 x i64>,  <2 x i64> }
-%struct.__neon_int64x2x4_t = type { <2 x i64>,  <2 x i64>, <2 x i64>,  <2 x i64> }
-
-define %struct.__neon_int64x2x2_t @ld2_2d(i64* %A) nounwind {
-; CHECK-LABEL: ld2_2d
-; Make sure we are using the operands defined by the ABI
-; CHECK ld2.2d { v0, v1 }, [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int64x2x2_t @llvm.arm64.neon.ld2.v2i64.p0i64(i64* %A)
-	ret %struct.__neon_int64x2x2_t  %tmp2
-}
-
-define %struct.__neon_int64x2x3_t @ld3_2d(i64* %A) nounwind {
-; CHECK-LABEL: ld3_2d
-; Make sure we are using the operands defined by the ABI
-; CHECK ld3.2d { v0, v1, v2 }, [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int64x2x3_t @llvm.arm64.neon.ld3.v2i64.p0i64(i64* %A)
-	ret %struct.__neon_int64x2x3_t  %tmp2
-}
-
-define %struct.__neon_int64x2x4_t @ld4_2d(i64* %A) nounwind {
-; CHECK-LABEL: ld4_2d
-; Make sure we are using the operands defined by the ABI
-; CHECK ld4.2d { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int64x2x4_t @llvm.arm64.neon.ld4.v2i64.p0i64(i64* %A)
-	ret %struct.__neon_int64x2x4_t  %tmp2
-}
-
-declare %struct.__neon_int64x2x2_t @llvm.arm64.neon.ld2.v2i64.p0i64(i64*) nounwind readonly
-declare %struct.__neon_int64x2x3_t @llvm.arm64.neon.ld3.v2i64.p0i64(i64*) nounwind readonly
-declare %struct.__neon_int64x2x4_t @llvm.arm64.neon.ld4.v2i64.p0i64(i64*) nounwind readonly
-
-%struct.__neon_int64x1x2_t = type { <1 x i64>,  <1 x i64> }
-%struct.__neon_int64x1x3_t = type { <1 x i64>,  <1 x i64>, <1 x i64> }
-%struct.__neon_int64x1x4_t = type { <1 x i64>,  <1 x i64>, <1 x i64>, <1 x i64> }
-
-
-define %struct.__neon_int64x1x2_t @ld2_1di64(i64* %A) nounwind {
-; CHECK-LABEL: ld2_1di64
-; Make sure we are using the operands defined by the ABI
-; CHECK ld1.1d { v0, v1 }, [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int64x1x2_t @llvm.arm64.neon.ld2.v1i64.p0i64(i64* %A)
-	ret %struct.__neon_int64x1x2_t  %tmp2
-}
-
-define %struct.__neon_int64x1x3_t @ld3_1di64(i64* %A) nounwind {
-; CHECK-LABEL: ld3_1di64
-; Make sure we are using the operands defined by the ABI
-; CHECK ld1.1d { v0, v1, v2 }, [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int64x1x3_t @llvm.arm64.neon.ld3.v1i64.p0i64(i64* %A)
-	ret %struct.__neon_int64x1x3_t  %tmp2
-}
-
-define %struct.__neon_int64x1x4_t @ld4_1di64(i64* %A) nounwind {
-; CHECK-LABEL: ld4_1di64
-; Make sure we are using the operands defined by the ABI
-; CHECK ld1.1d { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int64x1x4_t @llvm.arm64.neon.ld4.v1i64.p0i64(i64* %A)
-	ret %struct.__neon_int64x1x4_t  %tmp2
-}
-
-
-declare %struct.__neon_int64x1x2_t @llvm.arm64.neon.ld2.v1i64.p0i64(i64*) nounwind readonly
-declare %struct.__neon_int64x1x3_t @llvm.arm64.neon.ld3.v1i64.p0i64(i64*) nounwind readonly
-declare %struct.__neon_int64x1x4_t @llvm.arm64.neon.ld4.v1i64.p0i64(i64*) nounwind readonly
-
-%struct.__neon_float64x1x2_t = type { <1 x double>,  <1 x double> }
-%struct.__neon_float64x1x3_t = type { <1 x double>,  <1 x double>, <1 x double> }
-%struct.__neon_float64x1x4_t = type { <1 x double>,  <1 x double>, <1 x double>, <1 x double> }
-
-
-define %struct.__neon_float64x1x2_t @ld2_1df64(double* %A) nounwind {
-; CHECK-LABEL: ld2_1df64
-; Make sure we are using the operands defined by the ABI
-; CHECK ld1.1d { v0, v1 }, [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_float64x1x2_t @llvm.arm64.neon.ld2.v1f64.p0f64(double* %A)
-	ret %struct.__neon_float64x1x2_t  %tmp2
-}
-
-define %struct.__neon_float64x1x3_t @ld3_1df64(double* %A) nounwind {
-; CHECK-LABEL: ld3_1df64
-; Make sure we are using the operands defined by the ABI
-; CHECK ld1.1d { v0, v1, v2 }, [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_float64x1x3_t @llvm.arm64.neon.ld3.v1f64.p0f64(double* %A)
-	ret %struct.__neon_float64x1x3_t  %tmp2
-}
-
-define %struct.__neon_float64x1x4_t @ld4_1df64(double* %A) nounwind {
-; CHECK-LABEL: ld4_1df64
-; Make sure we are using the operands defined by the ABI
-; CHECK ld1.1d { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_float64x1x4_t @llvm.arm64.neon.ld4.v1f64.p0f64(double* %A)
-	ret %struct.__neon_float64x1x4_t  %tmp2
-}
-
-declare %struct.__neon_float64x1x2_t @llvm.arm64.neon.ld2.v1f64.p0f64(double*) nounwind readonly
-declare %struct.__neon_float64x1x3_t @llvm.arm64.neon.ld3.v1f64.p0f64(double*) nounwind readonly
-declare %struct.__neon_float64x1x4_t @llvm.arm64.neon.ld4.v1f64.p0f64(double*) nounwind readonly
-
-
-define %struct.__neon_int8x16x2_t @ld2lane_16b(<16 x i8> %L1, <16 x i8> %L2, i8* %A) nounwind {
-; Make sure we are using the operands defined by the ABI
-; CHECK: ld2lane_16b
-; CHECK ld2.b { v0, v1 }[1], [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int8x16x2_t @llvm.arm64.neon.ld2lane.v16i8.p0i8(<16 x i8> %L1, <16 x i8> %L2, i64 1, i8* %A)
-	ret %struct.__neon_int8x16x2_t  %tmp2
-}
-
-define %struct.__neon_int8x16x3_t @ld3lane_16b(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, i8* %A) nounwind {
-; Make sure we are using the operands defined by the ABI
-; CHECK: ld3lane_16b
-; CHECK ld3.b { v0, v1, v2 }[1], [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int8x16x3_t @llvm.arm64.neon.ld3lane.v16i8.p0i8(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, i64 1, i8* %A)
-	ret %struct.__neon_int8x16x3_t  %tmp2
-}
-
-define %struct.__neon_int8x16x4_t @ld4lane_16b(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, <16 x i8> %L4, i8* %A) nounwind {
-; Make sure we are using the operands defined by the ABI
-; CHECK: ld4lane_16b
-; CHECK ld4.b { v0, v1, v2, v3 }[1], [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int8x16x4_t @llvm.arm64.neon.ld4lane.v16i8.p0i8(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, <16 x i8> %L4, i64 1, i8* %A)
-	ret %struct.__neon_int8x16x4_t  %tmp2
-}
-
-declare %struct.__neon_int8x16x2_t @llvm.arm64.neon.ld2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8*) nounwind readonly
-declare %struct.__neon_int8x16x3_t @llvm.arm64.neon.ld3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readonly
-declare %struct.__neon_int8x16x4_t @llvm.arm64.neon.ld4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readonly
-
-define %struct.__neon_int16x8x2_t @ld2lane_8h(<8 x i16> %L1, <8 x i16> %L2, i16* %A) nounwind {
-; Make sure we are using the operands defined by the ABI
-; CHECK: ld2lane_8h
-; CHECK ld2.h { v0, v1 }[1], [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm64.neon.ld2lane.v8i16.p0i16(<8 x i16> %L1, <8 x i16> %L2, i64 1, i16* %A)
-	ret %struct.__neon_int16x8x2_t  %tmp2
-}
-
-define %struct.__neon_int16x8x3_t @ld3lane_8h(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, i16* %A) nounwind {
-; Make sure we are using the operands defined by the ABI
-; CHECK: ld3lane_8h
-; CHECK ld3.h { v0, v1, v3 }[1], [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm64.neon.ld3lane.v8i16.p0i16(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, i64 1, i16* %A)
-	ret %struct.__neon_int16x8x3_t  %tmp2
-}
-
-define %struct.__neon_int16x8x4_t @ld4lane_8h(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, <8 x i16> %L4, i16* %A) nounwind {
-; Make sure we are using the operands defined by the ABI
-; CHECK: ld4lane_8h
-; CHECK ld4.h { v0, v1, v2, v3 }[1], [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm64.neon.ld4lane.v8i16.p0i16(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, <8 x i16> %L4, i64 1, i16* %A)
-	ret %struct.__neon_int16x8x4_t  %tmp2
-}
-
-declare %struct.__neon_int16x8x2_t @llvm.arm64.neon.ld2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i16*) nounwind readonly
-declare %struct.__neon_int16x8x3_t @llvm.arm64.neon.ld3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readonly
-declare %struct.__neon_int16x8x4_t @llvm.arm64.neon.ld4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readonly
-
-define %struct.__neon_int32x4x2_t @ld2lane_4s(<4 x i32> %L1, <4 x i32> %L2, i32* %A) nounwind {
-; Make sure we are using the operands defined by the ABI
-; CHECK: ld2lane_4s
-; CHECK ld2.s { v0, v1 }[1], [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm64.neon.ld2lane.v4i32.p0i32(<4 x i32> %L1, <4 x i32> %L2, i64 1, i32* %A)
-	ret %struct.__neon_int32x4x2_t  %tmp2
-}
-
-define %struct.__neon_int32x4x3_t @ld3lane_4s(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, i32* %A) nounwind {
-; Make sure we are using the operands defined by the ABI
-; CHECK: ld3lane_4s
-; CHECK ld3.s { v0, v1, v2 }[1], [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int32x4x3_t @llvm.arm64.neon.ld3lane.v4i32.p0i32(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, i64 1, i32* %A)
-	ret %struct.__neon_int32x4x3_t  %tmp2
-}
-
-define %struct.__neon_int32x4x4_t @ld4lane_4s(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, <4 x i32> %L4, i32* %A) nounwind {
-; Make sure we are using the operands defined by the ABI
-; CHECK: ld4lane_4s
-; CHECK ld4.s { v0, v1, v2, v3 }[1], [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm64.neon.ld4lane.v4i32.p0i32(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, <4 x i32> %L4, i64 1, i32* %A)
-	ret %struct.__neon_int32x4x4_t  %tmp2
-}
-
-declare %struct.__neon_int32x4x2_t @llvm.arm64.neon.ld2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*) nounwind readonly
-declare %struct.__neon_int32x4x3_t @llvm.arm64.neon.ld3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readonly
-declare %struct.__neon_int32x4x4_t @llvm.arm64.neon.ld4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readonly
-
-define %struct.__neon_int64x2x2_t @ld2lane_2d(<2 x i64> %L1, <2 x i64> %L2, i64* %A) nounwind {
-; Make sure we are using the operands defined by the ABI
-; CHECK: ld2lane_2d
-; CHECK ld2.d { v0, v1 }[1], [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int64x2x2_t @llvm.arm64.neon.ld2lane.v2i64.p0i64(<2 x i64> %L1, <2 x i64> %L2, i64 1, i64* %A)
-	ret %struct.__neon_int64x2x2_t  %tmp2
-}
-
-define %struct.__neon_int64x2x3_t @ld3lane_2d(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, i64* %A) nounwind {
-; Make sure we are using the operands defined by the ABI
-; CHECK: ld3lane_2d
-; CHECK ld3.d { v0, v1, v3 }[1], [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int64x2x3_t @llvm.arm64.neon.ld3lane.v2i64.p0i64(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, i64 1, i64* %A)
-	ret %struct.__neon_int64x2x3_t  %tmp2
-}
-
-define %struct.__neon_int64x2x4_t @ld4lane_2d(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, <2 x i64> %L4, i64* %A) nounwind {
-; Make sure we are using the operands defined by the ABI
-; CHECK: ld4lane_2d
-; CHECK ld4.d { v0, v1, v2, v3 }[1], [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int64x2x4_t @llvm.arm64.neon.ld4lane.v2i64.p0i64(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, <2 x i64> %L4, i64 1, i64* %A)
-	ret %struct.__neon_int64x2x4_t  %tmp2
-}
-
-declare %struct.__neon_int64x2x2_t @llvm.arm64.neon.ld2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*) nounwind readonly
-declare %struct.__neon_int64x2x3_t @llvm.arm64.neon.ld3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readonly
-declare %struct.__neon_int64x2x4_t @llvm.arm64.neon.ld4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readonly
-
-define <8 x i8> @ld1r_8b(i8* %bar) {
-; CHECK: ld1r_8b
-; Make sure we are using the operands defined by the ABI
-; CHECK: ld1r.8b { v0 }, [x0]
-; CHECK-NEXT ret
-  %tmp1 = load i8* %bar
-  %tmp2 = insertelement <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
-  %tmp3 = insertelement <8 x i8> %tmp2, i8 %tmp1, i32 1
-  %tmp4 = insertelement <8 x i8> %tmp3, i8 %tmp1, i32 2
-  %tmp5 = insertelement <8 x i8> %tmp4, i8 %tmp1, i32 3
-  %tmp6 = insertelement <8 x i8> %tmp5, i8 %tmp1, i32 4
-  %tmp7 = insertelement <8 x i8> %tmp6, i8 %tmp1, i32 5
-  %tmp8 = insertelement <8 x i8> %tmp7, i8 %tmp1, i32 6
-  %tmp9 = insertelement <8 x i8> %tmp8, i8 %tmp1, i32 7
-  ret <8 x i8> %tmp9
-}
-
-define <16 x i8> @ld1r_16b(i8* %bar) {
-; CHECK: ld1r_16b
-; Make sure we are using the operands defined by the ABI
-; CHECK: ld1r.16b { v0 }, [x0]
-; CHECK-NEXT ret
-  %tmp1 = load i8* %bar
-  %tmp2 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
-  %tmp3 = insertelement <16 x i8> %tmp2, i8 %tmp1, i32 1
-  %tmp4 = insertelement <16 x i8> %tmp3, i8 %tmp1, i32 2
-  %tmp5 = insertelement <16 x i8> %tmp4, i8 %tmp1, i32 3
-  %tmp6 = insertelement <16 x i8> %tmp5, i8 %tmp1, i32 4
-  %tmp7 = insertelement <16 x i8> %tmp6, i8 %tmp1, i32 5
-  %tmp8 = insertelement <16 x i8> %tmp7, i8 %tmp1, i32 6
-  %tmp9 = insertelement <16 x i8> %tmp8, i8 %tmp1, i32 7
-  %tmp10 = insertelement <16 x i8> %tmp9, i8 %tmp1, i32 8
-  %tmp11 = insertelement <16 x i8> %tmp10, i8 %tmp1, i32 9
-  %tmp12 = insertelement <16 x i8> %tmp11, i8 %tmp1, i32 10
-  %tmp13 = insertelement <16 x i8> %tmp12, i8 %tmp1, i32 11
-  %tmp14 = insertelement <16 x i8> %tmp13, i8 %tmp1, i32 12
-  %tmp15 = insertelement <16 x i8> %tmp14, i8 %tmp1, i32 13
-  %tmp16 = insertelement <16 x i8> %tmp15, i8 %tmp1, i32 14
-  %tmp17 = insertelement <16 x i8> %tmp16, i8 %tmp1, i32 15
-  ret <16 x i8> %tmp17
-}
-
-define <4 x i16> @ld1r_4h(i16* %bar) {
-; CHECK: ld1r_4h
-; Make sure we are using the operands defined by the ABI
-; CHECK: ld1r.4h { v0 }, [x0]
-; CHECK-NEXT ret
-  %tmp1 = load i16* %bar
-  %tmp2 = insertelement <4 x i16> <i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
-  %tmp3 = insertelement <4 x i16> %tmp2, i16 %tmp1, i32 1
-  %tmp4 = insertelement <4 x i16> %tmp3, i16 %tmp1, i32 2
-  %tmp5 = insertelement <4 x i16> %tmp4, i16 %tmp1, i32 3
-  ret <4 x i16> %tmp5
-}
-
-define <8 x i16> @ld1r_8h(i16* %bar) {
-; CHECK: ld1r_8h
-; Make sure we are using the operands defined by the ABI
-; CHECK: ld1r.8h { v0 }, [x0]
-; CHECK-NEXT ret
-  %tmp1 = load i16* %bar
-  %tmp2 = insertelement <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
-  %tmp3 = insertelement <8 x i16> %tmp2, i16 %tmp1, i32 1
-  %tmp4 = insertelement <8 x i16> %tmp3, i16 %tmp1, i32 2
-  %tmp5 = insertelement <8 x i16> %tmp4, i16 %tmp1, i32 3
-  %tmp6 = insertelement <8 x i16> %tmp5, i16 %tmp1, i32 4
-  %tmp7 = insertelement <8 x i16> %tmp6, i16 %tmp1, i32 5
-  %tmp8 = insertelement <8 x i16> %tmp7, i16 %tmp1, i32 6
-  %tmp9 = insertelement <8 x i16> %tmp8, i16 %tmp1, i32 7
-  ret <8 x i16> %tmp9
-}
-
-define <2 x i32> @ld1r_2s(i32* %bar) {
-; CHECK: ld1r_2s
-; Make sure we are using the operands defined by the ABI
-; CHECK: ld1r.2s { v0 }, [x0]
-; CHECK-NEXT ret
-  %tmp1 = load i32* %bar
-  %tmp2 = insertelement <2 x i32> <i32 undef, i32 undef>, i32 %tmp1, i32 0
-  %tmp3 = insertelement <2 x i32> %tmp2, i32 %tmp1, i32 1
-  ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @ld1r_4s(i32* %bar) {
-; CHECK: ld1r_4s
-; Make sure we are using the operands defined by the ABI
-; CHECK: ld1r.4s { v0 }, [x0]
-; CHECK-NEXT ret
-  %tmp1 = load i32* %bar
-  %tmp2 = insertelement <4 x i32> <i32 undef, i32 undef, i32 undef, i32 undef>, i32 %tmp1, i32 0
-  %tmp3 = insertelement <4 x i32> %tmp2, i32 %tmp1, i32 1
-  %tmp4 = insertelement <4 x i32> %tmp3, i32 %tmp1, i32 2
-  %tmp5 = insertelement <4 x i32> %tmp4, i32 %tmp1, i32 3
-  ret <4 x i32> %tmp5
-}
-
-define <2 x i64> @ld1r_2d(i64* %bar) {
-; CHECK: ld1r_2d
-; Make sure we are using the operands defined by the ABI
-; CHECK: ld1r.2d { v0 }, [x0]
-; CHECK-NEXT ret
-  %tmp1 = load i64* %bar
-  %tmp2 = insertelement <2 x i64> <i64 undef, i64 undef>, i64 %tmp1, i32 0
-  %tmp3 = insertelement <2 x i64> %tmp2, i64 %tmp1, i32 1
-  ret <2 x i64> %tmp3
-}
-
-define %struct.__neon_int8x8x2_t @ld2r_8b(i8* %A) nounwind {
-; CHECK: ld2r_8b
-; Make sure we are using the operands defined by the ABI
-; CHECK ld2r.8b { v0, v1 }, [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm64.neon.ld2r.v8i8.p0i8(i8* %A)
-	ret %struct.__neon_int8x8x2_t  %tmp2
-}
-
-define %struct.__neon_int8x8x3_t @ld3r_8b(i8* %A) nounwind {
-; CHECK: ld3r_8b
-; Make sure we are using the operands defined by the ABI
-; CHECK ld3r.8b { v0, v1, v2 }, [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int8x8x3_t @llvm.arm64.neon.ld3r.v8i8.p0i8(i8* %A)
-	ret %struct.__neon_int8x8x3_t  %tmp2
-}
-
-define %struct.__neon_int8x8x4_t @ld4r_8b(i8* %A) nounwind {
-; CHECK: ld4r_8b
-; Make sure we are using the operands defined by the ABI
-; CHECK ld4r.8b { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm64.neon.ld4r.v8i8.p0i8(i8* %A)
-	ret %struct.__neon_int8x8x4_t  %tmp2
-}
-
-declare %struct.__neon_int8x8x2_t @llvm.arm64.neon.ld2r.v8i8.p0i8(i8*) nounwind readonly
-declare %struct.__neon_int8x8x3_t @llvm.arm64.neon.ld3r.v8i8.p0i8(i8*) nounwind readonly
-declare %struct.__neon_int8x8x4_t @llvm.arm64.neon.ld4r.v8i8.p0i8(i8*) nounwind readonly
-
-define %struct.__neon_int8x16x2_t @ld2r_16b(i8* %A) nounwind {
-; CHECK: ld2r_16b
-; Make sure we are using the operands defined by the ABI
-; CHECK ld2r.16b { v0, v1 }, [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int8x16x2_t @llvm.arm64.neon.ld2r.v16i8.p0i8(i8* %A)
-	ret %struct.__neon_int8x16x2_t  %tmp2
-}
-
-define %struct.__neon_int8x16x3_t @ld3r_16b(i8* %A) nounwind {
-; CHECK: ld3r_16b
-; Make sure we are using the operands defined by the ABI
-; CHECK ld3r.16b { v0, v1, v2 }, [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int8x16x3_t @llvm.arm64.neon.ld3r.v16i8.p0i8(i8* %A)
-	ret %struct.__neon_int8x16x3_t  %tmp2
-}
-
-define %struct.__neon_int8x16x4_t @ld4r_16b(i8* %A) nounwind {
-; CHECK: ld4r_16b
-; Make sure we are using the operands defined by the ABI
-; CHECK ld4r.16b { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int8x16x4_t @llvm.arm64.neon.ld4r.v16i8.p0i8(i8* %A)
-	ret %struct.__neon_int8x16x4_t  %tmp2
-}
-
-declare %struct.__neon_int8x16x2_t @llvm.arm64.neon.ld2r.v16i8.p0i8(i8*) nounwind readonly
-declare %struct.__neon_int8x16x3_t @llvm.arm64.neon.ld3r.v16i8.p0i8(i8*) nounwind readonly
-declare %struct.__neon_int8x16x4_t @llvm.arm64.neon.ld4r.v16i8.p0i8(i8*) nounwind readonly
-
-define %struct.__neon_int16x4x2_t @ld2r_4h(i16* %A) nounwind {
-; CHECK: ld2r_4h
-; Make sure we are using the operands defined by the ABI
-; CHECK ld2r.4h { v0, v1 }, [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm64.neon.ld2r.v4i16.p0i16(i16* %A)
-	ret %struct.__neon_int16x4x2_t  %tmp2
-}
-
-define %struct.__neon_int16x4x3_t @ld3r_4h(i16* %A) nounwind {
-; CHECK: ld3r_4h
-; Make sure we are using the operands defined by the ABI
-; CHECK ld3r.4h { v0, v1, v2 }, [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm64.neon.ld3r.v4i16.p0i16(i16* %A)
-	ret %struct.__neon_int16x4x3_t  %tmp2
-}
-
-define %struct.__neon_int16x4x4_t @ld4r_4h(i16* %A) nounwind {
-; CHECK: ld4r_4h
-; Make sure we are using the operands defined by the ABI
-; CHECK ld4r.4h { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm64.neon.ld4r.v4i16.p0i16(i16* %A)
-	ret %struct.__neon_int16x4x4_t  %tmp2
-}
-
-declare %struct.__neon_int16x4x2_t @llvm.arm64.neon.ld2r.v4i16.p0i16(i16*) nounwind readonly
-declare %struct.__neon_int16x4x3_t @llvm.arm64.neon.ld3r.v4i16.p0i16(i16*) nounwind readonly
-declare %struct.__neon_int16x4x4_t @llvm.arm64.neon.ld4r.v4i16.p0i16(i16*) nounwind readonly
-
-define %struct.__neon_int16x8x2_t @ld2r_8h(i16* %A) nounwind {
-; CHECK: ld2r_8h
-; Make sure we are using the operands defined by the ABI
-; CHECK ld2r.8h { v0, v1 }, [x0]
-; CHECK-NEXT ret
-  %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm64.neon.ld2r.v8i16.p0i16(i16* %A)
-  ret %struct.__neon_int16x8x2_t  %tmp2
-}
-
-define %struct.__neon_int16x8x3_t @ld3r_8h(i16* %A) nounwind {
-; CHECK: ld3r_8h
-; Make sure we are using the operands defined by the ABI
-; CHECK ld3r.8h { v0, v1, v2 }, [x0]
-; CHECK-NEXT ret
-  %tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm64.neon.ld3r.v8i16.p0i16(i16* %A)
-  ret %struct.__neon_int16x8x3_t  %tmp2
-}
-
-define %struct.__neon_int16x8x4_t @ld4r_8h(i16* %A) nounwind {
-; CHECK: ld4r_8h
-; Make sure we are using the operands defined by the ABI
-; CHECK ld4r.8h { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT ret
-  %tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm64.neon.ld4r.v8i16.p0i16(i16* %A)
-  ret %struct.__neon_int16x8x4_t  %tmp2
-}
-
-declare %struct.__neon_int16x8x2_t @llvm.arm64.neon.ld2r.v8i16.p0i16(i16*) nounwind readonly
-declare %struct.__neon_int16x8x3_t @llvm.arm64.neon.ld3r.v8i16.p0i16(i16*) nounwind readonly
-declare %struct.__neon_int16x8x4_t @llvm.arm64.neon.ld4r.v8i16.p0i16(i16*) nounwind readonly
-
-define %struct.__neon_int32x2x2_t @ld2r_2s(i32* %A) nounwind {
-; CHECK: ld2r_2s
-; Make sure we are using the operands defined by the ABI
-; CHECK ld2r.2s { v0, v1 }, [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm64.neon.ld2r.v2i32.p0i32(i32* %A)
-	ret %struct.__neon_int32x2x2_t  %tmp2
-}
-
-define %struct.__neon_int32x2x3_t @ld3r_2s(i32* %A) nounwind {
-; CHECK: ld3r_2s
-; Make sure we are using the operands defined by the ABI
-; CHECK ld3r.2s { v0, v1, v2 }, [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int32x2x3_t @llvm.arm64.neon.ld3r.v2i32.p0i32(i32* %A)
-	ret %struct.__neon_int32x2x3_t  %tmp2
-}
-
-define %struct.__neon_int32x2x4_t @ld4r_2s(i32* %A) nounwind {
-; CHECK: ld4r_2s
-; Make sure we are using the operands defined by the ABI
-; CHECK ld4r.2s { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm64.neon.ld4r.v2i32.p0i32(i32* %A)
-	ret %struct.__neon_int32x2x4_t  %tmp2
-}
-
-declare %struct.__neon_int32x2x2_t @llvm.arm64.neon.ld2r.v2i32.p0i32(i32*) nounwind readonly
-declare %struct.__neon_int32x2x3_t @llvm.arm64.neon.ld3r.v2i32.p0i32(i32*) nounwind readonly
-declare %struct.__neon_int32x2x4_t @llvm.arm64.neon.ld4r.v2i32.p0i32(i32*) nounwind readonly
-
-define %struct.__neon_int32x4x2_t @ld2r_4s(i32* %A) nounwind {
-; CHECK: ld2r_4s
-; Make sure we are using the operands defined by the ABI
-; CHECK ld2r.4s { v0, v1 }, [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm64.neon.ld2r.v4i32.p0i32(i32* %A)
-	ret %struct.__neon_int32x4x2_t  %tmp2
-}
-
-define %struct.__neon_int32x4x3_t @ld3r_4s(i32* %A) nounwind {
-; CHECK: ld3r_4s
-; Make sure we are using the operands defined by the ABI
-; CHECK ld3r.4s { v0, v1, v2 }, [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int32x4x3_t @llvm.arm64.neon.ld3r.v4i32.p0i32(i32* %A)
-	ret %struct.__neon_int32x4x3_t  %tmp2
-}
-
-define %struct.__neon_int32x4x4_t @ld4r_4s(i32* %A) nounwind {
-; CHECK: ld4r_4s
-; Make sure we are using the operands defined by the ABI
-; CHECK ld4r.4s { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm64.neon.ld4r.v4i32.p0i32(i32* %A)
-	ret %struct.__neon_int32x4x4_t  %tmp2
-}
-
-declare %struct.__neon_int32x4x2_t @llvm.arm64.neon.ld2r.v4i32.p0i32(i32*) nounwind readonly
-declare %struct.__neon_int32x4x3_t @llvm.arm64.neon.ld3r.v4i32.p0i32(i32*) nounwind readonly
-declare %struct.__neon_int32x4x4_t @llvm.arm64.neon.ld4r.v4i32.p0i32(i32*) nounwind readonly
-
-define %struct.__neon_int64x1x2_t @ld2r_1d(i64* %A) nounwind {
-; CHECK: ld2r_1d
-; Make sure we are using the operands defined by the ABI
-; CHECK ld2r.1d { v0, v1 }, [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int64x1x2_t @llvm.arm64.neon.ld2r.v1i64.p0i64(i64* %A)
-	ret %struct.__neon_int64x1x2_t  %tmp2
-}
-
-define %struct.__neon_int64x1x3_t @ld3r_1d(i64* %A) nounwind {
-; CHECK: ld3r_1d
-; Make sure we are using the operands defined by the ABI
-; CHECK ld3r.1d { v0, v1, v2 }, [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int64x1x3_t @llvm.arm64.neon.ld3r.v1i64.p0i64(i64* %A)
-	ret %struct.__neon_int64x1x3_t  %tmp2
-}
-
-define %struct.__neon_int64x1x4_t @ld4r_1d(i64* %A) nounwind {
-; CHECK: ld4r_1d
-; Make sure we are using the operands defined by the ABI
-; CHECK ld4r.1d { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int64x1x4_t @llvm.arm64.neon.ld4r.v1i64.p0i64(i64* %A)
-	ret %struct.__neon_int64x1x4_t  %tmp2
-}
-
-declare %struct.__neon_int64x1x2_t @llvm.arm64.neon.ld2r.v1i64.p0i64(i64*) nounwind readonly
-declare %struct.__neon_int64x1x3_t @llvm.arm64.neon.ld3r.v1i64.p0i64(i64*) nounwind readonly
-declare %struct.__neon_int64x1x4_t @llvm.arm64.neon.ld4r.v1i64.p0i64(i64*) nounwind readonly
-
-define %struct.__neon_int64x2x2_t @ld2r_2d(i64* %A) nounwind {
-; CHECK: ld2r_2d
-; Make sure we are using the operands defined by the ABI
-; CHECK ld2r.2d { v0, v1 }, [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int64x2x2_t @llvm.arm64.neon.ld2r.v2i64.p0i64(i64* %A)
-	ret %struct.__neon_int64x2x2_t  %tmp2
-}
-
-define %struct.__neon_int64x2x3_t @ld3r_2d(i64* %A) nounwind {
-; CHECK: ld3r_2d
-; Make sure we are using the operands defined by the ABI
-; CHECK ld3r.2d { v0, v1, v2 }, [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int64x2x3_t @llvm.arm64.neon.ld3r.v2i64.p0i64(i64* %A)
-	ret %struct.__neon_int64x2x3_t  %tmp2
-}
-
-define %struct.__neon_int64x2x4_t @ld4r_2d(i64* %A) nounwind {
-; CHECK: ld4r_2d
-; Make sure we are using the operands defined by the ABI
-; CHECK ld4r.2d { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT ret
-	%tmp2 = call %struct.__neon_int64x2x4_t @llvm.arm64.neon.ld4r.v2i64.p0i64(i64* %A)
-	ret %struct.__neon_int64x2x4_t  %tmp2
-}
-
-declare %struct.__neon_int64x2x2_t @llvm.arm64.neon.ld2r.v2i64.p0i64(i64*) nounwind readonly
-declare %struct.__neon_int64x2x3_t @llvm.arm64.neon.ld3r.v2i64.p0i64(i64*) nounwind readonly
-declare %struct.__neon_int64x2x4_t @llvm.arm64.neon.ld4r.v2i64.p0i64(i64*) nounwind readonly
-
-define <16 x i8> @ld1_16b(<16 x i8> %V, i8* %bar) {
-; CHECK-LABEL: ld1_16b
-; Make sure we are using the operands defined by the ABI
-; CHECK: ld1.b { v0 }[0], [x0]
-; CHECK-NEXT ret
-  %tmp1 = load i8* %bar
-  %tmp2 = insertelement <16 x i8> %V, i8 %tmp1, i32 0
-  ret <16 x i8> %tmp2
-}
-
-define <8 x i16> @ld1_8h(<8 x i16> %V, i16* %bar) {
-; CHECK-LABEL: ld1_8h
-; Make sure we are using the operands defined by the ABI
-; CHECK: ld1.h { v0 }[0], [x0]
-; CHECK-NEXT ret
-  %tmp1 = load i16* %bar
-  %tmp2 = insertelement <8 x i16> %V, i16 %tmp1, i32 0
-  ret <8 x i16> %tmp2
-}
-
-define <4 x i32> @ld1_4s(<4 x i32> %V, i32* %bar) {
-; CHECK-LABEL: ld1_4s
-; Make sure we are using the operands defined by the ABI
-; CHECK: ld1.s { v0 }[0], [x0]
-; CHECK-NEXT ret
-  %tmp1 = load i32* %bar
-  %tmp2 = insertelement <4 x i32> %V, i32 %tmp1, i32 0
-  ret <4 x i32> %tmp2
-}
-
-define <4 x float> @ld1_4s_float(<4 x float> %V, float* %bar) {
-; CHECK-LABEL: ld1_4s_float:
-; Make sure we are using the operands defined by the ABI
-; CHECK: ld1.s { v0 }[0], [x0]
-; CHECK-NEXT ret
-  %tmp1 = load float* %bar
-  %tmp2 = insertelement <4 x float> %V, float %tmp1, i32 0
-  ret <4 x float> %tmp2
-}
-
-define <2 x i64> @ld1_2d(<2 x i64> %V, i64* %bar) {
-; CHECK-LABEL: ld1_2d
-; Make sure we are using the operands defined by the ABI
-; CHECK: ld1.d { v0 }[0], [x0]
-; CHECK-NEXT ret
-  %tmp1 = load i64* %bar
-  %tmp2 = insertelement <2 x i64> %V, i64 %tmp1, i32 0
-  ret <2 x i64> %tmp2
-}
-
-define <2 x double> @ld1_2d_double(<2 x double> %V, double* %bar) {
-; CHECK-LABEL: ld1_2d_double:
-; Make sure we are using the operands defined by the ABI
-; CHECK: ld1.d { v0 }[0], [x0]
-; CHECK-NEXT ret
-  %tmp1 = load double* %bar
-  %tmp2 = insertelement <2 x double> %V, double %tmp1, i32 0
-  ret <2 x double> %tmp2
-}
-
-define <1 x i64> @ld1_1d(<1 x i64>* %p) {
-; CHECK-LABEL: ld1_1d
-; Make sure we are using the operands defined by the ABI
-; CHECK: ldr [[REG:d[0-9]+]], [x0]
-; CHECK-NEXT: ret
-  %tmp = load <1 x i64>* %p, align 8
-  ret <1 x i64> %tmp
-}
-
-define <8 x i8> @ld1_8b(<8 x i8> %V, i8* %bar) {
-; CHECK-LABEL: ld1_8b
-; Make sure we are using the operands defined by the ABI
-; CHECK: ld1.b { v0 }[0], [x0]
-; CHECK-NEXT ret
-  %tmp1 = load i8* %bar
-  %tmp2 = insertelement <8 x i8> %V, i8 %tmp1, i32 0
-  ret <8 x i8> %tmp2
-}
-
-define <4 x i16> @ld1_4h(<4 x i16> %V, i16* %bar) {
-; CHECK-LABEL: ld1_4h
-; Make sure we are using the operands defined by the ABI
-; CHECK: ld1.h { v0 }[0], [x0]
-; CHECK-NEXT ret
-  %tmp1 = load i16* %bar
-  %tmp2 = insertelement <4 x i16> %V, i16 %tmp1, i32 0
-  ret <4 x i16> %tmp2
-}
-
-define <2 x i32> @ld1_2s(<2 x i32> %V, i32* %bar) {
-; CHECK-LABEL: ld1_2s:
-; Make sure we are using the operands defined by the ABI
-; CHECK: ld1.s { v0 }[0], [x0]
-; CHECK-NEXT ret
-  %tmp1 = load i32* %bar
-  %tmp2 = insertelement <2 x i32> %V, i32 %tmp1, i32 0
-  ret <2 x i32> %tmp2
-}
-
-define <2 x float> @ld1_2s_float(<2 x float> %V, float* %bar) {
-; CHECK-LABEL: ld1_2s_float:
-; Make sure we are using the operands defined by the ABI
-; CHECK: ld1.s { v0 }[0], [x0]
-; CHECK-NEXT ret
-  %tmp1 = load float* %bar
-  %tmp2 = insertelement <2 x float> %V, float %tmp1, i32 0
-  ret <2 x float> %tmp2
-}
-
-
-; Add rdar://13098923 test case: vld1_dup_u32 doesn't generate ld1r.2s
-define void @ld1r_2s_from_dup(i8* nocapture %a, i8* nocapture %b, i16* nocapture %diff) nounwind ssp {
-entry:
-; CHECK: ld1r_2s_from_dup
-; CHECK: ld1r.2s { [[ARG1:v[0-9]+]] }, [x0]
-; CHECK-NEXT: ld1r.2s { [[ARG2:v[0-9]+]] }, [x1]
-; CHECK-NEXT: usubl.8h v[[RESREGNUM:[0-9]+]], [[ARG1]], [[ARG2]]
-; CHECK-NEXT: str d[[RESREGNUM]], [x2]
-; CHECK-NEXT: ret
-  %tmp = bitcast i8* %a to i32*
-  %tmp1 = load i32* %tmp, align 4
-  %tmp2 = insertelement <2 x i32> undef, i32 %tmp1, i32 0
-  %lane = shufflevector <2 x i32> %tmp2, <2 x i32> undef, <2 x i32> zeroinitializer
-  %tmp3 = bitcast <2 x i32> %lane to <8 x i8>
-  %tmp4 = bitcast i8* %b to i32*
-  %tmp5 = load i32* %tmp4, align 4
-  %tmp6 = insertelement <2 x i32> undef, i32 %tmp5, i32 0
-  %lane1 = shufflevector <2 x i32> %tmp6, <2 x i32> undef, <2 x i32> zeroinitializer
-  %tmp7 = bitcast <2 x i32> %lane1 to <8 x i8>
-  %vmovl.i.i = zext <8 x i8> %tmp3 to <8 x i16>
-  %vmovl.i4.i = zext <8 x i8> %tmp7 to <8 x i16>
-  %sub.i = sub <8 x i16> %vmovl.i.i, %vmovl.i4.i
-  %tmp8 = bitcast <8 x i16> %sub.i to <2 x i64>
-  %shuffle.i = shufflevector <2 x i64> %tmp8, <2 x i64> undef, <1 x i32> zeroinitializer
-  %tmp9 = bitcast <1 x i64> %shuffle.i to <4 x i16>
-  %tmp10 = bitcast i16* %diff to <4 x i16>*
-  store <4 x i16> %tmp9, <4 x i16>* %tmp10, align 8
-  ret void
-}
-
-; Tests for rdar://11947069: vld1_dup_* and vld1q_dup_* code gen is suboptimal
-define <4 x float> @ld1r_4s_float(float* nocapture %x) {
-entry:
-; CHECK-LABEL: ld1r_4s_float
-; Make sure we are using the operands defined by the ABI
-; CHECK: ld1r.4s { v0 }, [x0]
-; CHECK-NEXT ret
-  %tmp = load float* %x, align 4
-  %tmp1 = insertelement <4 x float> undef, float %tmp, i32 0
-  %tmp2 = insertelement <4 x float> %tmp1, float %tmp, i32 1
-  %tmp3 = insertelement <4 x float> %tmp2, float %tmp, i32 2
-  %tmp4 = insertelement <4 x float> %tmp3, float %tmp, i32 3
-  ret <4 x float> %tmp4
-}
-
-define <2 x float> @ld1r_2s_float(float* nocapture %x) {
-entry:
-; CHECK-LABEL: ld1r_2s_float
-; Make sure we are using the operands defined by the ABI
-; CHECK: ld1r.2s { v0 }, [x0]
-; CHECK-NEXT ret
-  %tmp = load float* %x, align 4
-  %tmp1 = insertelement <2 x float> undef, float %tmp, i32 0
-  %tmp2 = insertelement <2 x float> %tmp1, float %tmp, i32 1
-  ret <2 x float> %tmp2
-}
-
-define <2 x double> @ld1r_2d_double(double* nocapture %x) {
-entry:
-; CHECK-LABEL: ld1r_2d_double
-; Make sure we are using the operands defined by the ABI
-; CHECK: ld1r.2d { v0 }, [x0]
-; CHECK-NEXT ret
-  %tmp = load double* %x, align 4
-  %tmp1 = insertelement <2 x double> undef, double %tmp, i32 0
-  %tmp2 = insertelement <2 x double> %tmp1, double %tmp, i32 1
-  ret <2 x double> %tmp2
-}
-
-define <1 x double> @ld1r_1d_double(double* nocapture %x) {
-entry:
-; CHECK-LABEL: ld1r_1d_double
-; Make sure we are using the operands defined by the ABI
-; CHECK: ldr d0, [x0]
-; CHECK-NEXT ret
-  %tmp = load double* %x, align 4
-  %tmp1 = insertelement <1 x double> undef, double %tmp, i32 0
-  ret <1 x double> %tmp1
-}
-
-define <4 x float> @ld1r_4s_float_shuff(float* nocapture %x) {
-entry:
-; CHECK-LABEL: ld1r_4s_float_shuff
-; Make sure we are using the operands defined by the ABI
-; CHECK: ld1r.4s { v0 }, [x0]
-; CHECK-NEXT ret
-  %tmp = load float* %x, align 4
-  %tmp1 = insertelement <4 x float> undef, float %tmp, i32 0
-  %lane = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
-  ret <4 x float> %lane
-}
-
-define <2 x float> @ld1r_2s_float_shuff(float* nocapture %x) {
-entry:
-; CHECK-LABEL: ld1r_2s_float_shuff
-; Make sure we are using the operands defined by the ABI
-; CHECK: ld1r.2s { v0 }, [x0]
-; CHECK-NEXT ret
-  %tmp = load float* %x, align 4
-  %tmp1 = insertelement <2 x float> undef, float %tmp, i32 0
-  %lane = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
-  ret <2 x float> %lane
-}
-
-define <2 x double> @ld1r_2d_double_shuff(double* nocapture %x) {
-entry:
-; CHECK-LABEL: ld1r_2d_double_shuff
-; Make sure we are using the operands defined by the ABI
-; CHECK: ld1r.2d { v0 }, [x0]
-; CHECK-NEXT ret
-  %tmp = load double* %x, align 4
-  %tmp1 = insertelement <2 x double> undef, double %tmp, i32 0
-  %lane = shufflevector <2 x double> %tmp1, <2 x double> undef, <2 x i32> zeroinitializer
-  ret <2 x double> %lane
-}
-
-define <1 x double> @ld1r_1d_double_shuff(double* nocapture %x) {
-entry:
-; CHECK-LABEL: ld1r_1d_double_shuff
-; Make sure we are using the operands defined by the ABI
-; CHECK: ldr d0, [x0]
-; CHECK-NEXT ret
-  %tmp = load double* %x, align 4
-  %tmp1 = insertelement <1 x double> undef, double %tmp, i32 0
-  %lane = shufflevector <1 x double> %tmp1, <1 x double> undef, <1 x i32> zeroinitializer
-  ret <1 x double> %lane
-}
-
-%struct.__neon_float32x2x2_t = type { <2 x float>,  <2 x float> }
-%struct.__neon_float32x2x3_t = type { <2 x float>,  <2 x float>,  <2 x float> }
-%struct.__neon_float32x2x4_t = type { <2 x float>,  <2 x float>, <2 x float>,  <2 x float> }
-
-declare %struct.__neon_int8x8x2_t @llvm.arm64.neon.ld1x2.v8i8.p0i8(i8*) nounwind readonly
-declare %struct.__neon_int16x4x2_t @llvm.arm64.neon.ld1x2.v4i16.p0i16(i16*) nounwind readonly
-declare %struct.__neon_int32x2x2_t @llvm.arm64.neon.ld1x2.v2i32.p0i32(i32*) nounwind readonly
-declare %struct.__neon_float32x2x2_t @llvm.arm64.neon.ld1x2.v2f32.p0f32(float*) nounwind readonly
-declare %struct.__neon_int64x1x2_t @llvm.arm64.neon.ld1x2.v1i64.p0i64(i64*) nounwind readonly
-declare %struct.__neon_float64x1x2_t @llvm.arm64.neon.ld1x2.v1f64.p0f64(double*) nounwind readonly
-
-define %struct.__neon_int8x8x2_t @ld1_x2_v8i8(i8* %addr) {
-; CHECK-LABEL: ld1_x2_v8i8:
-; CHECK: ld1.8b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_int8x8x2_t @llvm.arm64.neon.ld1x2.v8i8.p0i8(i8* %addr)
-  ret %struct.__neon_int8x8x2_t %val
-}
-
-define %struct.__neon_int16x4x2_t @ld1_x2_v4i16(i16* %addr) {
-; CHECK-LABEL: ld1_x2_v4i16:
-; CHECK: ld1.4h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_int16x4x2_t @llvm.arm64.neon.ld1x2.v4i16.p0i16(i16* %addr)
-  ret %struct.__neon_int16x4x2_t %val
-}
-
-define %struct.__neon_int32x2x2_t @ld1_x2_v2i32(i32* %addr) {
-; CHECK-LABEL: ld1_x2_v2i32:
-; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_int32x2x2_t @llvm.arm64.neon.ld1x2.v2i32.p0i32(i32* %addr)
-  ret %struct.__neon_int32x2x2_t %val
-}
-
-define %struct.__neon_float32x2x2_t @ld1_x2_v2f32(float* %addr) {
-; CHECK-LABEL: ld1_x2_v2f32:
-; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_float32x2x2_t @llvm.arm64.neon.ld1x2.v2f32.p0f32(float* %addr)
-  ret %struct.__neon_float32x2x2_t %val
-}
-
-define %struct.__neon_int64x1x2_t @ld1_x2_v1i64(i64* %addr) {
-; CHECK-LABEL: ld1_x2_v1i64:
-; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_int64x1x2_t @llvm.arm64.neon.ld1x2.v1i64.p0i64(i64* %addr)
-  ret %struct.__neon_int64x1x2_t %val
-}
-
-define %struct.__neon_float64x1x2_t @ld1_x2_v1f64(double* %addr) {
-; CHECK-LABEL: ld1_x2_v1f64:
-; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_float64x1x2_t @llvm.arm64.neon.ld1x2.v1f64.p0f64(double* %addr)
-  ret %struct.__neon_float64x1x2_t %val
-}
-
-
-%struct.__neon_float32x4x2_t = type { <4 x float>,  <4 x float> }
-%struct.__neon_float32x4x3_t = type { <4 x float>,  <4 x float>,  <4 x float> }
-%struct.__neon_float32x4x4_t = type { <4 x float>,  <4 x float>, <4 x float>,  <4 x float> }
-
-%struct.__neon_float64x2x2_t = type { <2 x double>,  <2 x double> }
-%struct.__neon_float64x2x3_t = type { <2 x double>,  <2 x double>,  <2 x double> }
-%struct.__neon_float64x2x4_t = type { <2 x double>,  <2 x double>, <2 x double>,  <2 x double> }
-
-declare %struct.__neon_int8x16x2_t @llvm.arm64.neon.ld1x2.v16i8.p0i8(i8*) nounwind readonly
-declare %struct.__neon_int16x8x2_t @llvm.arm64.neon.ld1x2.v8i16.p0i16(i16*) nounwind readonly
-declare %struct.__neon_int32x4x2_t @llvm.arm64.neon.ld1x2.v4i32.p0i32(i32*) nounwind readonly
-declare %struct.__neon_float32x4x2_t @llvm.arm64.neon.ld1x2.v4f32.p0f32(float*) nounwind readonly
-declare %struct.__neon_int64x2x2_t @llvm.arm64.neon.ld1x2.v2i64.p0i64(i64*) nounwind readonly
-declare %struct.__neon_float64x2x2_t @llvm.arm64.neon.ld1x2.v2f64.p0f64(double*) nounwind readonly
-
-define %struct.__neon_int8x16x2_t @ld1_x2_v16i8(i8* %addr) {
-; CHECK-LABEL: ld1_x2_v16i8:
-; CHECK: ld1.16b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_int8x16x2_t @llvm.arm64.neon.ld1x2.v16i8.p0i8(i8* %addr)
-  ret %struct.__neon_int8x16x2_t %val
-}
-
-define %struct.__neon_int16x8x2_t @ld1_x2_v8i16(i16* %addr) {
-; CHECK-LABEL: ld1_x2_v8i16:
-; CHECK: ld1.8h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_int16x8x2_t @llvm.arm64.neon.ld1x2.v8i16.p0i16(i16* %addr)
-  ret %struct.__neon_int16x8x2_t %val
-}
-
-define %struct.__neon_int32x4x2_t @ld1_x2_v4i32(i32* %addr) {
-; CHECK-LABEL: ld1_x2_v4i32:
-; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_int32x4x2_t @llvm.arm64.neon.ld1x2.v4i32.p0i32(i32* %addr)
-  ret %struct.__neon_int32x4x2_t %val
-}
-
-define %struct.__neon_float32x4x2_t @ld1_x2_v4f32(float* %addr) {
-; CHECK-LABEL: ld1_x2_v4f32:
-; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_float32x4x2_t @llvm.arm64.neon.ld1x2.v4f32.p0f32(float* %addr)
-  ret %struct.__neon_float32x4x2_t %val
-}
-
-define %struct.__neon_int64x2x2_t @ld1_x2_v2i64(i64* %addr) {
-; CHECK-LABEL: ld1_x2_v2i64:
-; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_int64x2x2_t @llvm.arm64.neon.ld1x2.v2i64.p0i64(i64* %addr)
-  ret %struct.__neon_int64x2x2_t %val
-}
-
-define %struct.__neon_float64x2x2_t @ld1_x2_v2f64(double* %addr) {
-; CHECK-LABEL: ld1_x2_v2f64:
-; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_float64x2x2_t @llvm.arm64.neon.ld1x2.v2f64.p0f64(double* %addr)
-  ret %struct.__neon_float64x2x2_t %val
-}
-
-declare %struct.__neon_int8x8x3_t @llvm.arm64.neon.ld1x3.v8i8.p0i8(i8*) nounwind readonly
-declare %struct.__neon_int16x4x3_t @llvm.arm64.neon.ld1x3.v4i16.p0i16(i16*) nounwind readonly
-declare %struct.__neon_int32x2x3_t @llvm.arm64.neon.ld1x3.v2i32.p0i32(i32*) nounwind readonly
-declare %struct.__neon_float32x2x3_t @llvm.arm64.neon.ld1x3.v2f32.p0f32(float*) nounwind readonly
-declare %struct.__neon_int64x1x3_t @llvm.arm64.neon.ld1x3.v1i64.p0i64(i64*) nounwind readonly
-declare %struct.__neon_float64x1x3_t @llvm.arm64.neon.ld1x3.v1f64.p0f64(double*) nounwind readonly
-
-define %struct.__neon_int8x8x3_t @ld1_x3_v8i8(i8* %addr) {
-; CHECK-LABEL: ld1_x3_v8i8:
-; CHECK: ld1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_int8x8x3_t @llvm.arm64.neon.ld1x3.v8i8.p0i8(i8* %addr)
-  ret %struct.__neon_int8x8x3_t %val
-}
-
-define %struct.__neon_int16x4x3_t @ld1_x3_v4i16(i16* %addr) {
-; CHECK-LABEL: ld1_x3_v4i16:
-; CHECK: ld1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_int16x4x3_t @llvm.arm64.neon.ld1x3.v4i16.p0i16(i16* %addr)
-  ret %struct.__neon_int16x4x3_t %val
-}
-
-define %struct.__neon_int32x2x3_t @ld1_x3_v2i32(i32* %addr) {
-; CHECK-LABEL: ld1_x3_v2i32:
-; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_int32x2x3_t @llvm.arm64.neon.ld1x3.v2i32.p0i32(i32* %addr)
-  ret %struct.__neon_int32x2x3_t %val
-}
-
-define %struct.__neon_float32x2x3_t @ld1_x3_v2f32(float* %addr) {
-; CHECK-LABEL: ld1_x3_v2f32:
-; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_float32x2x3_t @llvm.arm64.neon.ld1x3.v2f32.p0f32(float* %addr)
-  ret %struct.__neon_float32x2x3_t %val
-}
-
-define %struct.__neon_int64x1x3_t @ld1_x3_v1i64(i64* %addr) {
-; CHECK-LABEL: ld1_x3_v1i64:
-; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_int64x1x3_t @llvm.arm64.neon.ld1x3.v1i64.p0i64(i64* %addr)
-  ret %struct.__neon_int64x1x3_t %val
-}
-
-define %struct.__neon_float64x1x3_t @ld1_x3_v1f64(double* %addr) {
-; CHECK-LABEL: ld1_x3_v1f64:
-; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_float64x1x3_t @llvm.arm64.neon.ld1x3.v1f64.p0f64(double* %addr)
-  ret %struct.__neon_float64x1x3_t %val
-}
-
-declare %struct.__neon_int8x16x3_t @llvm.arm64.neon.ld1x3.v16i8.p0i8(i8*) nounwind readonly
-declare %struct.__neon_int16x8x3_t @llvm.arm64.neon.ld1x3.v8i16.p0i16(i16*) nounwind readonly
-declare %struct.__neon_int32x4x3_t @llvm.arm64.neon.ld1x3.v4i32.p0i32(i32*) nounwind readonly
-declare %struct.__neon_float32x4x3_t @llvm.arm64.neon.ld1x3.v4f32.p0f32(float*) nounwind readonly
-declare %struct.__neon_int64x2x3_t @llvm.arm64.neon.ld1x3.v2i64.p0i64(i64*) nounwind readonly
-declare %struct.__neon_float64x2x3_t @llvm.arm64.neon.ld1x3.v2f64.p0f64(double*) nounwind readonly
-
-define %struct.__neon_int8x16x3_t @ld1_x3_v16i8(i8* %addr) {
-; CHECK-LABEL: ld1_x3_v16i8:
-; CHECK: ld1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_int8x16x3_t @llvm.arm64.neon.ld1x3.v16i8.p0i8(i8* %addr)
-  ret %struct.__neon_int8x16x3_t %val
-}
-
-define %struct.__neon_int16x8x3_t @ld1_x3_v8i16(i16* %addr) {
-; CHECK-LABEL: ld1_x3_v8i16:
-; CHECK: ld1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_int16x8x3_t @llvm.arm64.neon.ld1x3.v8i16.p0i16(i16* %addr)
-  ret %struct.__neon_int16x8x3_t %val
-}
-
-define %struct.__neon_int32x4x3_t @ld1_x3_v4i32(i32* %addr) {
-; CHECK-LABEL: ld1_x3_v4i32:
-; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_int32x4x3_t @llvm.arm64.neon.ld1x3.v4i32.p0i32(i32* %addr)
-  ret %struct.__neon_int32x4x3_t %val
-}
-
-define %struct.__neon_float32x4x3_t @ld1_x3_v4f32(float* %addr) {
-; CHECK-LABEL: ld1_x3_v4f32:
-; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_float32x4x3_t @llvm.arm64.neon.ld1x3.v4f32.p0f32(float* %addr)
-  ret %struct.__neon_float32x4x3_t %val
-}
-
-define %struct.__neon_int64x2x3_t @ld1_x3_v2i64(i64* %addr) {
-; CHECK-LABEL: ld1_x3_v2i64:
-; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_int64x2x3_t @llvm.arm64.neon.ld1x3.v2i64.p0i64(i64* %addr)
-  ret %struct.__neon_int64x2x3_t %val
-}
-
-define %struct.__neon_float64x2x3_t @ld1_x3_v2f64(double* %addr) {
-; CHECK-LABEL: ld1_x3_v2f64:
-; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_float64x2x3_t @llvm.arm64.neon.ld1x3.v2f64.p0f64(double* %addr)
-  ret %struct.__neon_float64x2x3_t %val
-}
-
-declare %struct.__neon_int8x8x4_t @llvm.arm64.neon.ld1x4.v8i8.p0i8(i8*) nounwind readonly
-declare %struct.__neon_int16x4x4_t @llvm.arm64.neon.ld1x4.v4i16.p0i16(i16*) nounwind readonly
-declare %struct.__neon_int32x2x4_t @llvm.arm64.neon.ld1x4.v2i32.p0i32(i32*) nounwind readonly
-declare %struct.__neon_float32x2x4_t @llvm.arm64.neon.ld1x4.v2f32.p0f32(float*) nounwind readonly
-declare %struct.__neon_int64x1x4_t @llvm.arm64.neon.ld1x4.v1i64.p0i64(i64*) nounwind readonly
-declare %struct.__neon_float64x1x4_t @llvm.arm64.neon.ld1x4.v1f64.p0f64(double*) nounwind readonly
-
-define %struct.__neon_int8x8x4_t @ld1_x4_v8i8(i8* %addr) {
-; CHECK-LABEL: ld1_x4_v8i8:
-; CHECK: ld1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_int8x8x4_t @llvm.arm64.neon.ld1x4.v8i8.p0i8(i8* %addr)
-  ret %struct.__neon_int8x8x4_t %val
-}
-
-define %struct.__neon_int16x4x4_t @ld1_x4_v4i16(i16* %addr) {
-; CHECK-LABEL: ld1_x4_v4i16:
-; CHECK: ld1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_int16x4x4_t @llvm.arm64.neon.ld1x4.v4i16.p0i16(i16* %addr)
-  ret %struct.__neon_int16x4x4_t %val
-}
-
-define %struct.__neon_int32x2x4_t @ld1_x4_v2i32(i32* %addr) {
-; CHECK-LABEL: ld1_x4_v2i32:
-; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_int32x2x4_t @llvm.arm64.neon.ld1x4.v2i32.p0i32(i32* %addr)
-  ret %struct.__neon_int32x2x4_t %val
-}
-
-define %struct.__neon_float32x2x4_t @ld1_x4_v2f32(float* %addr) {
-; CHECK-LABEL: ld1_x4_v2f32:
-; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_float32x2x4_t @llvm.arm64.neon.ld1x4.v2f32.p0f32(float* %addr)
-  ret %struct.__neon_float32x2x4_t %val
-}
-
-define %struct.__neon_int64x1x4_t @ld1_x4_v1i64(i64* %addr) {
-; CHECK-LABEL: ld1_x4_v1i64:
-; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_int64x1x4_t @llvm.arm64.neon.ld1x4.v1i64.p0i64(i64* %addr)
-  ret %struct.__neon_int64x1x4_t %val
-}
-
-define %struct.__neon_float64x1x4_t @ld1_x4_v1f64(double* %addr) {
-; CHECK-LABEL: ld1_x4_v1f64:
-; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_float64x1x4_t @llvm.arm64.neon.ld1x4.v1f64.p0f64(double* %addr)
-  ret %struct.__neon_float64x1x4_t %val
-}
-
-declare %struct.__neon_int8x16x4_t @llvm.arm64.neon.ld1x4.v16i8.p0i8(i8*) nounwind readonly
-declare %struct.__neon_int16x8x4_t @llvm.arm64.neon.ld1x4.v8i16.p0i16(i16*) nounwind readonly
-declare %struct.__neon_int32x4x4_t @llvm.arm64.neon.ld1x4.v4i32.p0i32(i32*) nounwind readonly
-declare %struct.__neon_float32x4x4_t @llvm.arm64.neon.ld1x4.v4f32.p0f32(float*) nounwind readonly
-declare %struct.__neon_int64x2x4_t @llvm.arm64.neon.ld1x4.v2i64.p0i64(i64*) nounwind readonly
-declare %struct.__neon_float64x2x4_t @llvm.arm64.neon.ld1x4.v2f64.p0f64(double*) nounwind readonly
-
-define %struct.__neon_int8x16x4_t @ld1_x4_v16i8(i8* %addr) {
-; CHECK-LABEL: ld1_x4_v16i8:
-; CHECK: ld1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_int8x16x4_t @llvm.arm64.neon.ld1x4.v16i8.p0i8(i8* %addr)
-  ret %struct.__neon_int8x16x4_t %val
-}
-
-define %struct.__neon_int16x8x4_t @ld1_x4_v8i16(i16* %addr) {
-; CHECK-LABEL: ld1_x4_v8i16:
-; CHECK: ld1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_int16x8x4_t @llvm.arm64.neon.ld1x4.v8i16.p0i16(i16* %addr)
-  ret %struct.__neon_int16x8x4_t %val
-}
-
-define %struct.__neon_int32x4x4_t @ld1_x4_v4i32(i32* %addr) {
-; CHECK-LABEL: ld1_x4_v4i32:
-; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_int32x4x4_t @llvm.arm64.neon.ld1x4.v4i32.p0i32(i32* %addr)
-  ret %struct.__neon_int32x4x4_t %val
-}
-
-define %struct.__neon_float32x4x4_t @ld1_x4_v4f32(float* %addr) {
-; CHECK-LABEL: ld1_x4_v4f32:
-; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_float32x4x4_t @llvm.arm64.neon.ld1x4.v4f32.p0f32(float* %addr)
-  ret %struct.__neon_float32x4x4_t %val
-}
-
-define %struct.__neon_int64x2x4_t @ld1_x4_v2i64(i64* %addr) {
-; CHECK-LABEL: ld1_x4_v2i64:
-; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_int64x2x4_t @llvm.arm64.neon.ld1x4.v2i64.p0i64(i64* %addr)
-  ret %struct.__neon_int64x2x4_t %val
-}
-
-define %struct.__neon_float64x2x4_t @ld1_x4_v2f64(double* %addr) {
-; CHECK-LABEL: ld1_x4_v2f64:
-; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  %val = call %struct.__neon_float64x2x4_t @llvm.arm64.neon.ld1x4.v2f64.p0f64(double* %addr)
-  ret %struct.__neon_float64x2x4_t %val
-}
diff --git a/test/CodeGen/ARM64/ldp.ll b/test/CodeGen/ARM64/ldp.ll
deleted file mode 100644
index 9444385..0000000
--- a/test/CodeGen/ARM64/ldp.ll
+++ /dev/null
@@ -1,149 +0,0 @@
-; RUN: llc < %s -march=arm64 -verify-machineinstrs | FileCheck %s
-; RUN: llc < %s -march=arm64 -arm64-unscaled-mem-op=true\
-; RUN:   -verify-machineinstrs | FileCheck -check-prefix=LDUR_CHK %s
-
-; CHECK: ldp_int
-; CHECK: ldp
-define i32 @ldp_int(i32* %p) nounwind {
-  %tmp = load i32* %p, align 4
-  %add.ptr = getelementptr inbounds i32* %p, i64 1
-  %tmp1 = load i32* %add.ptr, align 4
-  %add = add nsw i32 %tmp1, %tmp
-  ret i32 %add
-}
-
-; CHECK: ldp_long
-; CHECK: ldp
-define i64 @ldp_long(i64* %p) nounwind {
-  %tmp = load i64* %p, align 8
-  %add.ptr = getelementptr inbounds i64* %p, i64 1
-  %tmp1 = load i64* %add.ptr, align 8
-  %add = add nsw i64 %tmp1, %tmp
-  ret i64 %add
-}
-
-; CHECK: ldp_float
-; CHECK: ldp
-define float @ldp_float(float* %p) nounwind {
-  %tmp = load float* %p, align 4
-  %add.ptr = getelementptr inbounds float* %p, i64 1
-  %tmp1 = load float* %add.ptr, align 4
-  %add = fadd float %tmp, %tmp1
-  ret float %add
-}
-
-; CHECK: ldp_double
-; CHECK: ldp
-define double @ldp_double(double* %p) nounwind {
-  %tmp = load double* %p, align 8
-  %add.ptr = getelementptr inbounds double* %p, i64 1
-  %tmp1 = load double* %add.ptr, align 8
-  %add = fadd double %tmp, %tmp1
-  ret double %add
-}
-
-; Test the load/store optimizer---combine ldurs into a ldp, if appropriate
-define i32 @ldur_int(i32* %a) nounwind {
-; LDUR_CHK: ldur_int
-; LDUR_CHK: ldp     [[DST1:w[0-9]+]], [[DST2:w[0-9]+]], [x0, #-8]
-; LDUR_CHK-NEXT: add     w{{[0-9]+}}, [[DST2]], [[DST1]]
-; LDUR_CHK-NEXT: ret
-  %p1 = getelementptr inbounds i32* %a, i32 -1
-  %tmp1 = load i32* %p1, align 2
-  %p2 = getelementptr inbounds i32* %a, i32 -2
-  %tmp2 = load i32* %p2, align 2
-  %tmp3 = add i32 %tmp1, %tmp2
-  ret i32 %tmp3
-}
-
-define i64 @ldur_long(i64* %a) nounwind ssp {
-; LDUR_CHK: ldur_long
-; LDUR_CHK: ldp     [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-16]
-; LDUR_CHK-NEXT: add     x{{[0-9]+}}, [[DST2]], [[DST1]]
-; LDUR_CHK-NEXT: ret
-  %p1 = getelementptr inbounds i64* %a, i64 -1
-  %tmp1 = load i64* %p1, align 2
-  %p2 = getelementptr inbounds i64* %a, i64 -2
-  %tmp2 = load i64* %p2, align 2
-  %tmp3 = add i64 %tmp1, %tmp2
-  ret i64 %tmp3
-}
-
-define float @ldur_float(float* %a) {
-; LDUR_CHK: ldur_float
-; LDUR_CHK: ldp     [[DST1:s[0-9]+]], [[DST2:s[0-9]+]], [x0, #-8]
-; LDUR_CHK-NEXT: add     s{{[0-9]+}}, [[DST2]], [[DST1]]
-; LDUR_CHK-NEXT: ret
-  %p1 = getelementptr inbounds float* %a, i64 -1
-  %tmp1 = load float* %p1, align 2
-  %p2 = getelementptr inbounds float* %a, i64 -2
-  %tmp2 = load float* %p2, align 2
-  %tmp3 = fadd float %tmp1, %tmp2
-  ret float %tmp3
-}
-
-define double @ldur_double(double* %a) {
-; LDUR_CHK: ldur_double
-; LDUR_CHK: ldp     [[DST1:d[0-9]+]], [[DST2:d[0-9]+]], [x0, #-16]
-; LDUR_CHK-NEXT: add     d{{[0-9]+}}, [[DST2]], [[DST1]]
-; LDUR_CHK-NEXT: ret
-  %p1 = getelementptr inbounds double* %a, i64 -1
-  %tmp1 = load double* %p1, align 2
-  %p2 = getelementptr inbounds double* %a, i64 -2
-  %tmp2 = load double* %p2, align 2
-  %tmp3 = fadd double %tmp1, %tmp2
-  ret double %tmp3
-}
-
-; Now check some boundary conditions
-define i64 @pairUpBarelyIn(i64* %a) nounwind ssp {
-; LDUR_CHK: pairUpBarelyIn
-; LDUR_CHK-NOT: ldur
-; LDUR_CHK: ldp     [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-256]
-; LDUR_CHK-NEXT: add     x{{[0-9]+}}, [[DST2]], [[DST1]]
-; LDUR_CHK-NEXT: ret
-  %p1 = getelementptr inbounds i64* %a, i64 -31
-  %tmp1 = load i64* %p1, align 2
-  %p2 = getelementptr inbounds i64* %a, i64 -32
-  %tmp2 = load i64* %p2, align 2
-  %tmp3 = add i64 %tmp1, %tmp2
-  ret i64 %tmp3
-}
-
-define i64 @pairUpBarelyOut(i64* %a) nounwind ssp {
-; LDUR_CHK: pairUpBarelyOut
-; LDUR_CHK-NOT: ldp
-; Don't be fragile about which loads or manipulations of the base register
-; are used---just check that there isn't an ldp before the add
-; LDUR_CHK: add
-; LDUR_CHK-NEXT: ret
-  %p1 = getelementptr inbounds i64* %a, i64 -32
-  %tmp1 = load i64* %p1, align 2
-  %p2 = getelementptr inbounds i64* %a, i64 -33
-  %tmp2 = load i64* %p2, align 2
-  %tmp3 = add i64 %tmp1, %tmp2
-  ret i64 %tmp3
-}
-
-define i64 @pairUpNotAligned(i64* %a) nounwind ssp {
-; LDUR_CHK: pairUpNotAligned
-; LDUR_CHK-NOT: ldp
-; LDUR_CHK: ldur
-; LDUR_CHK-NEXT: ldur
-; LDUR_CHK-NEXT: add
-; LDUR_CHK-NEXT: ret
-  %p1 = getelementptr inbounds i64* %a, i64 -18
-  %bp1 = bitcast i64* %p1 to i8*
-  %bp1p1 = getelementptr inbounds i8* %bp1, i64 1
-  %dp1 = bitcast i8* %bp1p1 to i64*
-  %tmp1 = load i64* %dp1, align 1
-
-  %p2 = getelementptr inbounds i64* %a, i64 -17
-  %bp2 = bitcast i64* %p2 to i8*
-  %bp2p1 = getelementptr inbounds i8* %bp2, i64 1
-  %dp2 = bitcast i8* %bp2p1 to i64*
-  %tmp2 = load i64* %dp2, align 1
-
-  %tmp3 = add i64 %tmp1, %tmp2
-  ret i64 %tmp3
-}
diff --git a/test/CodeGen/ARM64/ldxr-stxr.ll b/test/CodeGen/ARM64/ldxr-stxr.ll
deleted file mode 100644
index d50ba94..0000000
--- a/test/CodeGen/ARM64/ldxr-stxr.ll
+++ /dev/null
@@ -1,143 +0,0 @@
-; RUN: llc < %s -mtriple=arm64-linux-gnu | FileCheck %s
-
-%0 = type { i64, i64 }
-
-define i128 @f0(i8* %p) nounwind readonly {
-; CHECK-LABEL: f0:
-; CHECK: ldxp {{x[0-9]+}}, {{x[0-9]+}}, [x0]
-entry:
-  %ldrexd = tail call %0 @llvm.arm64.ldxp(i8* %p)
-  %0 = extractvalue %0 %ldrexd, 1
-  %1 = extractvalue %0 %ldrexd, 0
-  %2 = zext i64 %0 to i128
-  %3 = zext i64 %1 to i128
-  %shl = shl nuw i128 %2, 64
-  %4 = or i128 %shl, %3
-  ret i128 %4
-}
-
-define i32 @f1(i8* %ptr, i128 %val) nounwind {
-; CHECK-LABEL: f1:
-; CHECK: stxp {{w[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, [x0]
-entry:
-  %tmp4 = trunc i128 %val to i64
-  %tmp6 = lshr i128 %val, 64
-  %tmp7 = trunc i128 %tmp6 to i64
-  %strexd = tail call i32 @llvm.arm64.stxp(i64 %tmp4, i64 %tmp7, i8* %ptr)
-  ret i32 %strexd
-}
-
-declare %0 @llvm.arm64.ldxp(i8*) nounwind
-declare i32 @llvm.arm64.stxp(i64, i64, i8*) nounwind
-
-@var = global i64 0, align 8
-
-define void @test_load_i8(i8* %addr) {
-; CHECK-LABEL: test_load_i8:
-; CHECK: ldxrb w[[LOADVAL:[0-9]+]], [x0]
-; CHECK-NOT: uxtb
-; CHECK-NOT: and
-; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var]
-
-  %val = call i64 @llvm.arm64.ldxr.p0i8(i8* %addr)
-  %shortval = trunc i64 %val to i8
-  %extval = zext i8 %shortval to i64
-  store i64 %extval, i64* @var, align 8
-  ret void
-}
-
-define void @test_load_i16(i16* %addr) {
-; CHECK-LABEL: test_load_i16:
-; CHECK: ldxrh w[[LOADVAL:[0-9]+]], [x0]
-; CHECK-NOT: uxth
-; CHECK-NOT: and
-; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var]
-
-  %val = call i64 @llvm.arm64.ldxr.p0i16(i16* %addr)
-  %shortval = trunc i64 %val to i16
-  %extval = zext i16 %shortval to i64
-  store i64 %extval, i64* @var, align 8
-  ret void
-}
-
-define void @test_load_i32(i32* %addr) {
-; CHECK-LABEL: test_load_i32:
-; CHECK: ldxr w[[LOADVAL:[0-9]+]], [x0]
-; CHECK-NOT: uxtw
-; CHECK-NOT: and
-; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var]
-
-  %val = call i64 @llvm.arm64.ldxr.p0i32(i32* %addr)
-  %shortval = trunc i64 %val to i32
-  %extval = zext i32 %shortval to i64
-  store i64 %extval, i64* @var, align 8
-  ret void
-}
-
-define void @test_load_i64(i64* %addr) {
-; CHECK-LABEL: test_load_i64:
-; CHECK: ldxr x[[LOADVAL:[0-9]+]], [x0]
-; CHECK: str x[[LOADVAL]], [{{x[0-9]+}}, :lo12:var]
-
-  %val = call i64 @llvm.arm64.ldxr.p0i64(i64* %addr)
-  store i64 %val, i64* @var, align 8
-  ret void
-}
-
-
-declare i64 @llvm.arm64.ldxr.p0i8(i8*) nounwind
-declare i64 @llvm.arm64.ldxr.p0i16(i16*) nounwind
-declare i64 @llvm.arm64.ldxr.p0i32(i32*) nounwind
-declare i64 @llvm.arm64.ldxr.p0i64(i64*) nounwind
-
-define i32 @test_store_i8(i32, i8 %val, i8* %addr) {
-; CHECK-LABEL: test_store_i8:
-; CHECK-NOT: uxtb
-; CHECK-NOT: and
-; CHECK: stxrb w0, w1, [x2]
-  %extval = zext i8 %val to i64
-  %res = call i32 @llvm.arm64.stxr.p0i8(i64 %extval, i8* %addr)
-  ret i32 %res
-}
-
-define i32 @test_store_i16(i32, i16 %val, i16* %addr) {
-; CHECK-LABEL: test_store_i16:
-; CHECK-NOT: uxth
-; CHECK-NOT: and
-; CHECK: stxrh w0, w1, [x2]
-  %extval = zext i16 %val to i64
-  %res = call i32 @llvm.arm64.stxr.p0i16(i64 %extval, i16* %addr)
-  ret i32 %res
-}
-
-define i32 @test_store_i32(i32, i32 %val, i32* %addr) {
-; CHECK-LABEL: test_store_i32:
-; CHECK-NOT: uxtw
-; CHECK-NOT: and
-; CHECK: stxr w0, w1, [x2]
-  %extval = zext i32 %val to i64
-  %res = call i32 @llvm.arm64.stxr.p0i32(i64 %extval, i32* %addr)
-  ret i32 %res
-}
-
-define i32 @test_store_i64(i32, i64 %val, i64* %addr) {
-; CHECK-LABEL: test_store_i64:
-; CHECK: stxr w0, x1, [x2]
-  %res = call i32 @llvm.arm64.stxr.p0i64(i64 %val, i64* %addr)
-  ret i32 %res
-}
-
-declare i32 @llvm.arm64.stxr.p0i8(i64, i8*) nounwind
-declare i32 @llvm.arm64.stxr.p0i16(i64, i16*) nounwind
-declare i32 @llvm.arm64.stxr.p0i32(i64, i32*) nounwind
-declare i32 @llvm.arm64.stxr.p0i64(i64, i64*) nounwind
-
-; CHECK: test_clear:
-; CHECK: clrex
-define void @test_clear() {
-  call void @llvm.arm64.clrex()
-  ret void
-}
-
-declare void @llvm.arm64.clrex() nounwind
-
diff --git a/test/CodeGen/ARM64/leaf-compact-unwind.ll b/test/CodeGen/ARM64/leaf-compact-unwind.ll
deleted file mode 100644
index 0a58717..0000000
--- a/test/CodeGen/ARM64/leaf-compact-unwind.ll
+++ /dev/null
@@ -1,161 +0,0 @@
-; Use the -disable-cfi flag so that we get the compact unwind info in the
-; emitted assembly. Compact unwind info is omitted when CFI directives
-; are emitted.
-;
-; RUN: llc -march=arm64 -mtriple=arm64-apple-ios -disable-cfi < %s | FileCheck %s
-;
-; rdar://13070556
-
-@bar = common global i32 0, align 4
-
-; Leaf function with no stack allocation and no saving/restoring
-; of non-volatile registers.
-define i32 @foo1(i32 %a) #0 {
-entry:
-  %add = add nsw i32 %a, 42
-  ret i32 %add
-}
-
-; Leaf function with stack allocation but no saving/restoring
-; of non-volatile registers.
-define i32 @foo2(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h) #0 {
-entry:
-  %stack = alloca [36 x i32], align 4
-  br label %for.body
-
-for.body:                                         ; preds = %for.body, %entry
-  %indvars.iv19 = phi i64 [ 0, %entry ], [ %indvars.iv.next20, %for.body ]
-  %arrayidx = getelementptr inbounds [36 x i32]* %stack, i64 0, i64 %indvars.iv19
-  %0 = trunc i64 %indvars.iv19 to i32
-  store i32 %0, i32* %arrayidx, align 4, !tbaa !0
-  %indvars.iv.next20 = add i64 %indvars.iv19, 1
-  %lftr.wideiv21 = trunc i64 %indvars.iv.next20 to i32
-  %exitcond22 = icmp eq i32 %lftr.wideiv21, 36
-  br i1 %exitcond22, label %for.body4, label %for.body
-
-for.body4:                                        ; preds = %for.body, %for.body4
-  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body4 ], [ 0, %for.body ]
-  %z1.016 = phi i32 [ %add, %for.body4 ], [ 0, %for.body ]
-  %arrayidx6 = getelementptr inbounds [36 x i32]* %stack, i64 0, i64 %indvars.iv
-  %1 = load i32* %arrayidx6, align 4, !tbaa !0
-  %add = add nsw i32 %1, %z1.016
-  %indvars.iv.next = add i64 %indvars.iv, 1
-  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-  %exitcond = icmp eq i32 %lftr.wideiv, 36
-  br i1 %exitcond, label %for.end9, label %for.body4
-
-for.end9:                                         ; preds = %for.body4
-  ret i32 %add
-}
-
-; Leaf function with no stack allocation but with saving restoring of
-; non-volatile registers.
-define i32 @foo3(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h) #1 {
-entry:
-  %0 = load volatile i32* @bar, align 4, !tbaa !0
-  %1 = load volatile i32* @bar, align 4, !tbaa !0
-  %2 = load volatile i32* @bar, align 4, !tbaa !0
-  %3 = load volatile i32* @bar, align 4, !tbaa !0
-  %4 = load volatile i32* @bar, align 4, !tbaa !0
-  %5 = load volatile i32* @bar, align 4, !tbaa !0
-  %6 = load volatile i32* @bar, align 4, !tbaa !0
-  %7 = load volatile i32* @bar, align 4, !tbaa !0
-  %8 = load volatile i32* @bar, align 4, !tbaa !0
-  %9 = load volatile i32* @bar, align 4, !tbaa !0
-  %10 = load volatile i32* @bar, align 4, !tbaa !0
-  %11 = load volatile i32* @bar, align 4, !tbaa !0
-  %12 = load volatile i32* @bar, align 4, !tbaa !0
-  %13 = load volatile i32* @bar, align 4, !tbaa !0
-  %14 = load volatile i32* @bar, align 4, !tbaa !0
-  %15 = load volatile i32* @bar, align 4, !tbaa !0
-  %16 = load volatile i32* @bar, align 4, !tbaa !0
-  %17 = load volatile i32* @bar, align 4, !tbaa !0
-  %factor = mul i32 %h, -2
-  %factor56 = mul i32 %g, -2
-  %factor57 = mul i32 %f, -2
-  %factor58 = mul i32 %e, -2
-  %factor59 = mul i32 %d, -2
-  %factor60 = mul i32 %c, -2
-  %factor61 = mul i32 %b, -2
-  %sum = add i32 %1, %0
-  %sum62 = add i32 %sum, %2
-  %sum63 = add i32 %sum62, %3
-  %sum64 = add i32 %sum63, %4
-  %sum65 = add i32 %sum64, %5
-  %sum66 = add i32 %sum65, %6
-  %sum67 = add i32 %sum66, %7
-  %sum68 = add i32 %sum67, %8
-  %sum69 = add i32 %sum68, %9
-  %sum70 = add i32 %sum69, %10
-  %sum71 = add i32 %sum70, %11
-  %sum72 = add i32 %sum71, %12
-  %sum73 = add i32 %sum72, %13
-  %sum74 = add i32 %sum73, %14
-  %sum75 = add i32 %sum74, %15
-  %sum76 = add i32 %sum75, %16
-  %sub10 = sub i32 %17, %sum76
-  %sub11 = add i32 %sub10, %factor
-  %sub12 = add i32 %sub11, %factor56
-  %sub13 = add i32 %sub12, %factor57
-  %sub14 = add i32 %sub13, %factor58
-  %sub15 = add i32 %sub14, %factor59
-  %sub16 = add i32 %sub15, %factor60
-  %add = add i32 %sub16, %factor61
-  ret i32 %add
-}
-
-; Leaf function with stack allocation and saving/restoring of non-volatile
-; registers.
-define i32 @foo4(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h) #0 {
-entry:
-  %stack = alloca [128 x i32], align 4
-  %0 = zext i32 %a to i64
-  br label %for.body
-
-for.cond2.preheader:                              ; preds = %for.body
-  %1 = sext i32 %f to i64
-  br label %for.body4
-
-for.body:                                         ; preds = %for.body, %entry
-  %indvars.iv22 = phi i64 [ 0, %entry ], [ %indvars.iv.next23, %for.body ]
-  %2 = add nsw i64 %indvars.iv22, %0
-  %arrayidx = getelementptr inbounds [128 x i32]* %stack, i64 0, i64 %indvars.iv22
-  %3 = trunc i64 %2 to i32
-  store i32 %3, i32* %arrayidx, align 4, !tbaa !0
-  %indvars.iv.next23 = add i64 %indvars.iv22, 1
-  %lftr.wideiv25 = trunc i64 %indvars.iv.next23 to i32
-  %exitcond26 = icmp eq i32 %lftr.wideiv25, 128
-  br i1 %exitcond26, label %for.cond2.preheader, label %for.body
-
-for.body4:                                        ; preds = %for.body4, %for.cond2.preheader
-  %indvars.iv = phi i64 [ 0, %for.cond2.preheader ], [ %indvars.iv.next, %for.body4 ]
-  %z1.018 = phi i32 [ 0, %for.cond2.preheader ], [ %add8, %for.body4 ]
-  %4 = add nsw i64 %indvars.iv, %1
-  %arrayidx7 = getelementptr inbounds [128 x i32]* %stack, i64 0, i64 %4
-  %5 = load i32* %arrayidx7, align 4, !tbaa !0
-  %add8 = add nsw i32 %5, %z1.018
-  %indvars.iv.next = add i64 %indvars.iv, 1
-  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-  %exitcond = icmp eq i32 %lftr.wideiv, 128
-  br i1 %exitcond, label %for.end11, label %for.body4
-
-for.end11:                                        ; preds = %for.body4
-  ret i32 %add8
-}
-
-attributes #0 = { readnone "target-cpu"="cyclone" }
-attributes #1 = { "target-cpu"="cyclone" }
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
-
-; CHECK:        .section        __LD,__compact_unwind,regular,debug
-; CHECK:        .quad   _foo1                   ; Range Start
-; CHECK:        .long   33554432                ; Compact Unwind Encoding: 0x2000000
-; CHECK:        .quad   _foo2                   ; Range Start
-; CHECK:        .long   33591296                ; Compact Unwind Encoding: 0x2009000
-; CHECK:        .quad   _foo3                   ; Range Start
-; CHECK:        .long   33570831                ; Compact Unwind Encoding: 0x200400f
-; CHECK:        .quad   _foo4                   ; Range Start
-; CHECK:        .long   33689616                ; Compact Unwind Encoding: 0x2021010
diff --git a/test/CodeGen/ARM64/lit.local.cfg b/test/CodeGen/ARM64/lit.local.cfg
deleted file mode 100644
index 48af100..0000000
--- a/test/CodeGen/ARM64/lit.local.cfg
+++ /dev/null
@@ -1,11 +0,0 @@
-import re
-
-config.suffixes = ['.ll', '.c', '.cpp']
-
-targets = set(config.root.targets_to_build.split())
-if not 'ARM64' in targets:
-    config.unsupported = True
-
-# For now we don't test arm64-win32.
-if re.search(r'cygwin|mingw32|win32', config.target_triple):
-    config.unsupported = True
diff --git a/test/CodeGen/ARM64/long-shift.ll b/test/CodeGen/ARM64/long-shift.ll
deleted file mode 100644
index 6f37044..0000000
--- a/test/CodeGen/ARM64/long-shift.ll
+++ /dev/null
@@ -1,59 +0,0 @@
-; RUN: llc < %s -march=arm64 -mcpu=cyclone | FileCheck %s
-
-define i128 @shl(i128 %r, i128 %s) nounwind readnone {
-; CHECK-LABEL: shl:
-; CHECK: lslv  [[XREG_0:x[0-9]+]], x1, x2
-; CHECK-NEXT: orr [[XREG_1:x[0-9]+]], xzr, #0x40
-; CHECK-NEXT: sub [[XREG_2:x[0-9]+]], [[XREG_1]], x2
-; CHECK-NEXT: lsrv  [[XREG_3:x[0-9]+]], x0, [[XREG_2]]
-; CHECK-NEXT: orr [[XREG_6:x[0-9]+]], [[XREG_3]], [[XREG_0]]
-; CHECK-NEXT: sub [[XREG_4:x[0-9]+]], x2, #64
-; CHECK-NEXT: lslv  [[XREG_5:x[0-9]+]], x0, [[XREG_4]]
-; CHECK-NEXT: cmp   [[XREG_4]], #0
-; CHECK-NEXT: csel  x1, [[XREG_5]], [[XREG_6]], ge
-; CHECK-NEXT: lslv  [[SMALLSHIFT_LO:x[0-9]+]], x0, x2
-; CHECK-NEXT: csel  x0, xzr, [[SMALLSHIFT_LO]], ge
-; CHECK-NEXT: ret
-
-  %shl = shl i128 %r, %s
-  ret i128 %shl
-}
-
-define i128 @ashr(i128 %r, i128 %s) nounwind readnone {
-; CHECK: ashr:
-; CHECK: lsrv  [[XREG_0:x[0-9]+]], x0, x2
-; CHECK-NEXT: orr [[XREG_1:x[0-9]+]], xzr, #0x40
-; CHECK-NEXT: sub [[XREG_2:x[0-9]+]], [[XREG_1]], x2
-; CHECK-NEXT: lslv  [[XREG_3:x[0-9]+]], x1, [[XREG_2]]
-; CHECK-NEXT: orr [[XREG_4:x[0-9]+]], [[XREG_0]], [[XREG_3]]
-; CHECK-NEXT: sub [[XREG_5:x[0-9]+]], x2, #64
-; CHECK-NEXT: asrv  [[XREG_6:x[0-9]+]], x1, [[XREG_5]]
-; CHECK-NEXT: cmp   [[XREG_5]], #0
-; CHECK-NEXT: csel  x0, [[XREG_6]], [[XREG_4]], ge
-; CHECK-NEXT: asrv  [[SMALLSHIFT_HI:x[0-9]+]], x1, x2
-; CHECK-NEXT: asr [[BIGSHIFT_HI:x[0-9]+]], x1, #63
-; CHECK-NEXT: csel x1, [[BIGSHIFT_HI]], [[SMALLSHIFT_HI]], ge
-; CHECK-NEXT: ret
-
-  %shr = ashr i128 %r, %s
-  ret i128 %shr
-}
-
-define i128 @lshr(i128 %r, i128 %s) nounwind readnone {
-; CHECK: lshr:
-; CHECK: lsrv  [[XREG_0:x[0-9]+]], x0, x2
-; CHECK-NEXT: orr [[XREG_1:x[0-9]+]], xzr, #0x40
-; CHECK-NEXT: sub [[XREG_2:x[0-9]+]], [[XREG_1]], x2
-; CHECK-NEXT: lslv  [[XREG_3:x[0-9]+]], x1, [[XREG_2]]
-; CHECK-NEXT: orr [[XREG_4:x[0-9]+]], [[XREG_0]], [[XREG_3]]
-; CHECK-NEXT: sub [[XREG_5:x[0-9]+]], x2, #64
-; CHECK-NEXT: lsrv  [[XREG_6:x[0-9]+]], x1, [[XREG_5]]
-; CHECK-NEXT: cmp   [[XREG_5]], #0
-; CHECK-NEXT: csel  x0, [[XREG_6]], [[XREG_4]], ge
-; CHECK-NEXT: lsrv  [[SMALLSHIFT_HI:x[0-9]+]], x1, x2
-; CHECK-NEXT: csel x1, xzr, [[SMALLSHIFT_HI]], ge
-; CHECK-NEXT: ret
-
-  %shr = lshr i128 %r, %s
-  ret i128 %shr
-}
diff --git a/test/CodeGen/ARM64/memcpy-inline.ll b/test/CodeGen/ARM64/memcpy-inline.ll
deleted file mode 100644
index 26f5166..0000000
--- a/test/CodeGen/ARM64/memcpy-inline.ll
+++ /dev/null
@@ -1,112 +0,0 @@
-; RUN: llc < %s -march=arm64 -mcpu=cyclone | FileCheck %s
-
-%struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }
-
-@src = external global %struct.x
-@dst = external global %struct.x
-
-@.str1 = private unnamed_addr constant [31 x i8] c"DHRYSTONE PROGRAM, SOME STRING\00", align 1
-@.str2 = private unnamed_addr constant [36 x i8] c"DHRYSTONE PROGRAM, SOME STRING BLAH\00", align 1
-@.str3 = private unnamed_addr constant [24 x i8] c"DHRYSTONE PROGRAM, SOME\00", align 1
-@.str4 = private unnamed_addr constant [18 x i8] c"DHRYSTONE PROGR  \00", align 1
-@.str5 = private unnamed_addr constant [7 x i8] c"DHRYST\00", align 1
-@.str6 = private unnamed_addr constant [14 x i8] c"/tmp/rmXXXXXX\00", align 1
-@spool.splbuf = internal global [512 x i8] zeroinitializer, align 16
-
-define i32 @t0() {
-entry:
-; CHECK-LABEL: t0:
-; CHECK: ldrb [[REG0:w[0-9]+]], [x[[BASEREG:[0-9]+]], #10]
-; CHECK: strb [[REG0]], [x[[BASEREG2:[0-9]+]], #10]
-; CHECK: ldrh [[REG1:w[0-9]+]], [x[[BASEREG]], #8]
-; CHECK: strh [[REG1]], [x[[BASEREG2]], #8]
-; CHECK: ldr [[REG2:x[0-9]+]],
-; CHECK: str [[REG2]],
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds (%struct.x* @dst, i32 0, i32 0), i8* getelementptr inbounds (%struct.x* @src, i32 0, i32 0), i32 11, i32 8, i1 false)
-  ret i32 0
-}
-
-define void @t1(i8* nocapture %C) nounwind {
-entry:
-; CHECK-LABEL: t1:
-; CHECK: ldur [[DEST:q[0-9]+]], [x[[BASEREG:[0-9]+]], #15]
-; CHECK: stur [[DEST]], [x0, #15]
-; CHECK: ldr [[DEST:q[0-9]+]], [x[[BASEREG]]]
-; CHECK: str [[DEST]], [x0]
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([31 x i8]* @.str1, i64 0, i64 0), i64 31, i32 1, i1 false)
-  ret void
-}
-
-define void @t2(i8* nocapture %C) nounwind {
-entry:
-; CHECK-LABEL: t2:
-; CHECK: movz [[REG3:w[0-9]+]]
-; CHECK: movk [[REG3]],
-; CHECK: str [[REG3]], [x0, #32]
-; CHECK: ldp [[DEST1:q[0-9]+]], [[DEST2:q[0-9]+]], [x{{[0-9]+}}]
-; CHECK: stp [[DEST1]], [[DEST2]], [x0]
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([36 x i8]* @.str2, i64 0, i64 0), i64 36, i32 1, i1 false)
-  ret void
-}
-
-define void @t3(i8* nocapture %C) nounwind {
-entry:
-; CHECK-LABEL: t3:
-; CHECK: ldr [[REG4:x[0-9]+]], [x[[BASEREG:[0-9]+]], #16]
-; CHECK: str [[REG4]], [x0, #16]
-; CHECK: ldr [[DEST:q[0-9]+]], [x[[BASEREG]]]
-; CHECK: str [[DEST]], [x0]
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([24 x i8]* @.str3, i64 0, i64 0), i64 24, i32 1, i1 false)
-  ret void
-}
-
-define void @t4(i8* nocapture %C) nounwind {
-entry:
-; CHECK-LABEL: t4:
-; CHECK: orr [[REG5:w[0-9]+]], wzr, #0x20
-; CHECK: strh [[REG5]], [x0, #16]
-; CHECK: ldr [[REG6:q[0-9]+]], [x{{[0-9]+}}]
-; CHECK: str [[REG6]], [x0]
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([18 x i8]* @.str4, i64 0, i64 0), i64 18, i32 1, i1 false)
-  ret void
-}
-
-define void @t5(i8* nocapture %C) nounwind {
-entry:
-; CHECK-LABEL: t5:
-; CHECK: strb wzr, [x0, #6]
-; CHECK: movz [[REG7:w[0-9]+]], #21587
-; CHECK: strh [[REG7]], [x0, #4]
-; CHECK: movz [[REG8:w[0-9]+]],
-; CHECK: movk [[REG8]],
-; CHECK: str [[REG8]], [x0]
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([7 x i8]* @.str5, i64 0, i64 0), i64 7, i32 1, i1 false)
-  ret void
-}
-
-define void @t6() nounwind {
-entry:
-; CHECK-LABEL: t6:
-; CHECK: ldur [[REG9:x[0-9]+]], [x{{[0-9]+}}, #6]
-; CHECK: stur [[REG9]], [x{{[0-9]+}}, #6]
-; CHECK: ldr
-; CHECK: str
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([512 x i8]* @spool.splbuf, i64 0, i64 0), i8* getelementptr inbounds ([14 x i8]* @.str6, i64 0, i64 0), i64 14, i32 1, i1 false)
-  ret void
-}
-
-%struct.Foo = type { i32, i32, i32, i32 }
-
-define void @t7(%struct.Foo* nocapture %a, %struct.Foo* nocapture %b) nounwind {
-entry:
-; CHECK: t7
-; CHECK: ldr [[REG10:q[0-9]+]], [x1]
-; CHECK: str [[REG10]], [x0]
-  %0 = bitcast %struct.Foo* %a to i8*
-  %1 = bitcast %struct.Foo* %b to i8*
-  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 16, i32 4, i1 false)
-  ret void
-}
-
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
diff --git a/test/CodeGen/ARM64/memset-to-bzero.ll b/test/CodeGen/ARM64/memset-to-bzero.ll
deleted file mode 100644
index b28122c..0000000
--- a/test/CodeGen/ARM64/memset-to-bzero.ll
+++ /dev/null
@@ -1,101 +0,0 @@
-; RUN: llc %s -march arm64 -o - | FileCheck %s
-; <rdar://problem/14199482> ARM64: Calls to bzero() replaced with calls to memset()
-
-; CHECK: @fct1
-; For small size (<= 256), we do not change memset to bzero.
-; CHECK: memset
-define void @fct1(i8* nocapture %ptr) {
-entry:
-  tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 256, i32 1, i1 false)
-  ret void
-}
-
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
-
-; CHECK: @fct2
-; When the size is bigger than 256, change into bzero.
-; CHECK: bzero
-define void @fct2(i8* nocapture %ptr) {
-entry:
-  tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 257, i32 1, i1 false)
-  ret void
-}
-
-; CHECK: @fct3
-; For unknown size, change to bzero.
-; CHECK: bzero
-define void @fct3(i8* nocapture %ptr, i32 %unknown) {
-entry:
-  %conv = sext i32 %unknown to i64
-  tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 %conv, i32 1, i1 false)
-  ret void
-}
-
-; CHECK: @fct4
-; Size <= 256, no change.
-; CHECK: memset
-define void @fct4(i8* %ptr) {
-entry:
-  %tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false)
-  %call = tail call i8* @__memset_chk(i8* %ptr, i32 0, i64 256, i64 %tmp)
-  ret void
-}
-
-declare i8* @__memset_chk(i8*, i32, i64, i64)
-
-declare i64 @llvm.objectsize.i64(i8*, i1)
-
-; CHECK: @fct5
-; Size > 256, change.
-; CHECK: bzero
-define void @fct5(i8* %ptr) {
-entry:
-  %tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false)
-  %call = tail call i8* @__memset_chk(i8* %ptr, i32 0, i64 257, i64 %tmp)
-  ret void
-}
-
-; CHECK: @fct6
-; Size = unknown, change.
-; CHECK: bzero
-define void @fct6(i8* %ptr, i32 %unknown) {
-entry:
-  %conv = sext i32 %unknown to i64
-  %tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false)
-  %call = tail call i8* @__memset_chk(i8* %ptr, i32 0, i64 %conv, i64 %tmp)
-  ret void
-}
-
-; Next functions check that memset is not turned into bzero
-; when the set constant is non-zero, whatever the given size.
-
-; CHECK: @fct7
-; memset with something that is not a zero, no change.
-; CHECK: memset
-define void @fct7(i8* %ptr) {
-entry:
-  %tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false)
-  %call = tail call i8* @__memset_chk(i8* %ptr, i32 1, i64 256, i64 %tmp)
-  ret void
-}
-
-; CHECK: @fct8
-; memset with something that is not a zero, no change.
-; CHECK: memset
-define void @fct8(i8* %ptr) {
-entry:
-  %tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false)
-  %call = tail call i8* @__memset_chk(i8* %ptr, i32 1, i64 257, i64 %tmp)
-  ret void
-}
-
-; CHECK: @fct9
-; memset with something that is not a zero, no change.
-; CHECK: memset
-define void @fct9(i8* %ptr, i32 %unknown) {
-entry:
-  %conv = sext i32 %unknown to i64
-  %tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false)
-  %call = tail call i8* @__memset_chk(i8* %ptr, i32 1, i64 %conv, i64 %tmp)
-  ret void
-}
diff --git a/test/CodeGen/ARM64/movi.ll b/test/CodeGen/ARM64/movi.ll
deleted file mode 100644
index 8fceccc..0000000
--- a/test/CodeGen/ARM64/movi.ll
+++ /dev/null
@@ -1,202 +0,0 @@
-; RUN: llc < %s -march=arm64 | FileCheck %s
-
-;==--------------------------------------------------------------------------==
-; Tests for MOV-immediate implemented with ORR-immediate.
-;==--------------------------------------------------------------------------==
-
-; 64-bit immed with 32-bit pattern size, rotated by 0.
-define i64 @test64_32_rot0() nounwind {
-; CHECK: test64_32_rot0
-; CHECK: orr x0, xzr, #0x700000007
-  ret i64 30064771079
-}
-
-; 64-bit immed with 32-bit pattern size, rotated by 2.
-define i64 @test64_32_rot2() nounwind {
-; CHECK: test64_32_rot2
-; CHECK: orr x0, xzr, #0xc0000003c0000003
-  ret i64 13835058071388291075
-}
-
-; 64-bit immed with 4-bit pattern size, rotated by 3.
-define i64 @test64_4_rot3() nounwind {
-; CHECK: test64_4_rot3
-; CHECK: orr  x0, xzr, #0xeeeeeeeeeeeeeeee
-  ret i64 17216961135462248174
-}
-
-; 32-bit immed with 32-bit pattern size, rotated by 16.
-define i32 @test32_32_rot16() nounwind {
-; CHECK: test32_32_rot16
-; CHECK: orr w0, wzr, #0xff0000
-  ret i32 16711680
-}
-
-; 32-bit immed with 2-bit pattern size, rotated by 1.
-define i32 @test32_2_rot1() nounwind {
-; CHECK: test32_2_rot1
-; CHECK: orr w0, wzr, #0xaaaaaaaa
-  ret i32 2863311530
-}
-
-;==--------------------------------------------------------------------------==
-; Tests for MOVZ with MOVK.
-;==--------------------------------------------------------------------------==
-
-define i32 @movz() nounwind {
-; CHECK: movz
-; CHECK: movz w0, #5
-  ret i32 5
-}
-
-define i64 @movz_3movk() nounwind {
-; CHECK: movz_3movk
-; CHECK:      movz x0, #5, lsl #48
-; CHECK-NEXT: movk x0, #4660, lsl #32
-; CHECK-NEXT: movk x0, #43981, lsl #16
-; CHECK-NEXT: movk x0, #22136
-  ret i64 1427392313513592
-}
-
-define i64 @movz_movk_skip1() nounwind {
-; CHECK: movz_movk_skip1
-; CHECK:      movz x0, #5, lsl #32
-; CHECK-NEXT: movk x0, #17185, lsl #16
-  ret i64 22601072640
-}
-
-define i64 @movz_skip1_movk() nounwind {
-; CHECK: movz_skip1_movk
-; CHECK:      movz x0, #34388, lsl #32
-; CHECK-NEXT: movk x0, #4660
-  ret i64 147695335379508
-}
-
-;==--------------------------------------------------------------------------==
-; Tests for MOVN with MOVK.
-;==--------------------------------------------------------------------------==
-
-define i64 @movn() nounwind {
-; CHECK: movn
-; CHECK: movn x0, #41
-  ret i64 -42
-}
-
-define i64 @movn_skip1_movk() nounwind {
-; CHECK: movn_skip1_movk
-; CHECK:      movn x0, #41, lsl #32
-; CHECK-NEXT: movk x0, #4660
-  ret i64 -176093720012
-}
-
-;==--------------------------------------------------------------------------==
-; Tests for ORR with MOVK.
-;==--------------------------------------------------------------------------==
-; rdar://14987673
-
-define i64 @orr_movk1() nounwind {
-; CHECK: orr_movk1
-; CHECK: orr x0, xzr, #0xffff0000ffff0
-; CHECK: movk x0, #57005, lsl #16
-  ret i64 72056498262245120
-}
-
-define i64 @orr_movk2() nounwind {
-; CHECK: orr_movk2
-; CHECK: orr x0, xzr, #0xffff0000ffff0
-; CHECK: movk x0, #57005, lsl #48
-  ret i64 -2400982650836746496
-}
-
-define i64 @orr_movk3() nounwind {
-; CHECK: orr_movk3
-; CHECK: orr x0, xzr, #0xffff0000ffff0
-; CHECK: movk x0, #57005, lsl #32
-  ret i64 72020953688702720
-}
-
-define i64 @orr_movk4() nounwind {
-; CHECK: orr_movk4
-; CHECK: orr x0, xzr, #0xffff0000ffff0
-; CHECK: movk x0, #57005
-  ret i64 72056494543068845
-}
-
-; rdar://14987618
-define i64 @orr_movk5() nounwind {
-; CHECK: orr_movk5
-; CHECK: orr x0, xzr, #0xff00ff00ff00ff00
-; CHECK: movk x0, #57005, lsl #16
-  ret i64 -71777214836900096
-}
-
-define i64 @orr_movk6() nounwind {
-; CHECK: orr_movk6
-; CHECK: orr x0, xzr, #0xff00ff00ff00ff00
-; CHECK: movk x0, #57005, lsl #16
-; CHECK: movk x0, #57005, lsl #48
-  ret i64 -2400982647117578496
-}
-
-define i64 @orr_movk7() nounwind {
-; CHECK: orr_movk7
-; CHECK: orr x0, xzr, #0xff00ff00ff00ff00
-; CHECK: movk x0, #57005, lsl #48
-  ret i64 -2400982646575268096
-}
-
-define i64 @orr_movk8() nounwind {
-; CHECK: orr_movk8
-; CHECK: orr x0, xzr, #0xff00ff00ff00ff00
-; CHECK: movk x0, #57005
-; CHECK: movk x0, #57005, lsl #48
-  ret i64 -2400982646575276371
-}
-
-; rdar://14987715
-define i64 @orr_movk9() nounwind {
-; CHECK: orr_movk9
-; CHECK: orr x0, xzr, #0xffffff000000000
-; CHECK: movk x0, #65280
-; CHECK: movk x0, #57005, lsl #16
-  ret i64 1152921439623315200
-}
-
-define i64 @orr_movk10() nounwind {
-; CHECK: orr_movk10
-; CHECK: orr x0, xzr, #0xfffffffffffff00
-; CHECK: movk x0, #57005, lsl #16
-  ret i64 1152921504047824640
-}
-
-define i64 @orr_movk11() nounwind {
-; CHECK: orr_movk11
-; CHECK: orr x0, xzr, #0xfff00000000000ff
-; CHECK: movk x0, #57005, lsl #16
-; CHECK: movk x0, #65535, lsl #32
-  ret i64 -4222125209747201
-}
-
-define i64 @orr_movk12() nounwind {
-; CHECK: orr_movk12
-; CHECK: orr x0, xzr, #0xfff00000000000ff
-; CHECK: movk x0, #57005, lsl #32
-  ret i64 -4258765016661761
-}
-
-define i64 @orr_movk13() nounwind {
-; CHECK: orr_movk13
-; CHECK: orr x0, xzr, #0xfffff000000
-; CHECK: movk x0, #57005
-; CHECK: movk x0, #57005, lsl #48
-  ret i64 -2401245434149282131
-}
-
-; rdar://13944082
-define i64 @g() nounwind {
-; CHECK: g
-; CHECK: movz x0, #65535, lsl #48
-; CHECK: movk x0, #2
-entry:
-  ret i64 -281474976710654
-}
diff --git a/test/CodeGen/ARM64/neon-compare-instructions.ll b/test/CodeGen/ARM64/neon-compare-instructions.ll
deleted file mode 100644
index 55f7b99..0000000
--- a/test/CodeGen/ARM64/neon-compare-instructions.ll
+++ /dev/null
@@ -1,1191 +0,0 @@
-; RUN: llc -mtriple=arm64-none-linux-gnu < %s | FileCheck %s
-
-define <8 x i8> @cmeq8xi8(<8 x i8> %A, <8 x i8> %B) {
-;CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-	%tmp3 = icmp eq <8 x i8> %A, %B;
-   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
-	ret <8 x i8> %tmp4
-}
-
-define <16 x i8> @cmeq16xi8(<16 x i8> %A, <16 x i8> %B) {
-;CHECK: cmeq {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-	%tmp3 = icmp eq <16 x i8> %A, %B;
-   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
-	ret <16 x i8> %tmp4
-}
-
-define <4 x i16> @cmeq4xi16(<4 x i16> %A, <4 x i16> %B) {
-;CHECK: cmeq {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-	%tmp3 = icmp eq <4 x i16> %A, %B;
-   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
-	ret <4 x i16> %tmp4
-}
-
-define <8 x i16> @cmeq8xi16(<8 x i16> %A, <8 x i16> %B) {
-;CHECK: cmeq {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-	%tmp3 = icmp eq <8 x i16> %A, %B;
-   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
-	ret <8 x i16> %tmp4
-}
-
-define <2 x i32> @cmeq2xi32(<2 x i32> %A, <2 x i32> %B) {
-;CHECK: cmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-	%tmp3 = icmp eq <2 x i32> %A, %B;
-   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
-	ret <2 x i32> %tmp4
-}
-
-define <4 x i32> @cmeq4xi32(<4 x i32> %A, <4 x i32> %B) {
-;CHECK: cmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-	%tmp3 = icmp eq <4 x i32> %A, %B;
-   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
-	ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @cmeq2xi64(<2 x i64> %A, <2 x i64> %B) {
-;CHECK: cmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-	%tmp3 = icmp eq <2 x i64> %A, %B;
-   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
-	ret <2 x i64> %tmp4
-}
-
-define <8 x i8> @cmne8xi8(<8 x i8> %A, <8 x i8> %B) {
-;CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-	%tmp3 = icmp ne <8 x i8> %A, %B;
-   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
-	ret <8 x i8> %tmp4
-}
-
-define <16 x i8> @cmne16xi8(<16 x i8> %A, <16 x i8> %B) {
-;CHECK: cmeq {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-	%tmp3 = icmp ne <16 x i8> %A, %B;
-   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
-	ret <16 x i8> %tmp4
-}
-
-define <4 x i16> @cmne4xi16(<4 x i16> %A, <4 x i16> %B) {
-;CHECK: cmeq {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-	%tmp3 = icmp ne <4 x i16> %A, %B;
-   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
-	ret <4 x i16> %tmp4
-}
-
-define <8 x i16> @cmne8xi16(<8 x i16> %A, <8 x i16> %B) {
-;CHECK: cmeq {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-	%tmp3 = icmp ne <8 x i16> %A, %B;
-   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
-	ret <8 x i16> %tmp4
-}
-
-define <2 x i32> @cmne2xi32(<2 x i32> %A, <2 x i32> %B) {
-;CHECK: cmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-	%tmp3 = icmp ne <2 x i32> %A, %B;
-   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
-	ret <2 x i32> %tmp4
-}
-
-define <4 x i32> @cmne4xi32(<4 x i32> %A, <4 x i32> %B) {
-;CHECK: cmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-	%tmp3 = icmp ne <4 x i32> %A, %B;
-   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
-	ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @cmne2xi64(<2 x i64> %A, <2 x i64> %B) {
-;CHECK: cmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-	%tmp3 = icmp ne <2 x i64> %A, %B;
-   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
-	ret <2 x i64> %tmp4
-}
-
-define <8 x i8> @cmgt8xi8(<8 x i8> %A, <8 x i8> %B) {
-;CHECK: cmgt {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-	%tmp3 = icmp sgt <8 x i8> %A, %B;
-   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
-	ret <8 x i8> %tmp4
-}
-
-define <16 x i8> @cmgt16xi8(<16 x i8> %A, <16 x i8> %B) {
-;CHECK: cmgt {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-	%tmp3 = icmp sgt <16 x i8> %A, %B;
-   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
-	ret <16 x i8> %tmp4
-}
-
-define <4 x i16> @cmgt4xi16(<4 x i16> %A, <4 x i16> %B) {
-;CHECK: cmgt {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-	%tmp3 = icmp sgt <4 x i16> %A, %B;
-   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
-	ret <4 x i16> %tmp4
-}
-
-define <8 x i16> @cmgt8xi16(<8 x i16> %A, <8 x i16> %B) {
-;CHECK: cmgt {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-	%tmp3 = icmp sgt <8 x i16> %A, %B;
-   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
-	ret <8 x i16> %tmp4
-}
-
-define <2 x i32> @cmgt2xi32(<2 x i32> %A, <2 x i32> %B) {
-;CHECK: cmgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-	%tmp3 = icmp sgt <2 x i32> %A, %B;
-   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
-	ret <2 x i32> %tmp4
-}
-
-define <4 x i32> @cmgt4xi32(<4 x i32> %A, <4 x i32> %B) {
-;CHECK: cmgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-	%tmp3 = icmp sgt <4 x i32> %A, %B;
-   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
-	ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @cmgt2xi64(<2 x i64> %A, <2 x i64> %B) {
-;CHECK: cmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-	%tmp3 = icmp sgt <2 x i64> %A, %B;
-   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
-	ret <2 x i64> %tmp4
-}
-
-define <8 x i8> @cmlt8xi8(<8 x i8> %A, <8 x i8> %B) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; LT implemented as GT, so check reversed operands.
-;CHECK: cmgt {{v[0-9]+}}.8b, v1.8b, v0.8b
-	%tmp3 = icmp slt <8 x i8> %A, %B;
-   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
-	ret <8 x i8> %tmp4
-}
-
-define <16 x i8> @cmlt16xi8(<16 x i8> %A, <16 x i8> %B) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; LT implemented as GT, so check reversed operands.
-;CHECK: cmgt {{v[0-9]+}}.16b, v1.16b, v0.16b
-	%tmp3 = icmp slt <16 x i8> %A, %B;
-   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
-	ret <16 x i8> %tmp4
-}
-
-define <4 x i16> @cmlt4xi16(<4 x i16> %A, <4 x i16> %B) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; LT implemented as GT, so check reversed operands.
-;CHECK: cmgt {{v[0-9]+}}.4h, v1.4h, v0.4h
-	%tmp3 = icmp slt <4 x i16> %A, %B;
-   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
-	ret <4 x i16> %tmp4
-}
-
-define <8 x i16> @cmlt8xi16(<8 x i16> %A, <8 x i16> %B) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; LT implemented as GT, so check reversed operands.
-;CHECK: cmgt {{v[0-9]+}}.8h, v1.8h, v0.8h
-	%tmp3 = icmp slt <8 x i16> %A, %B;
-   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
-	ret <8 x i16> %tmp4
-}
-
-define <2 x i32> @cmlt2xi32(<2 x i32> %A, <2 x i32> %B) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; LT implemented as GT, so check reversed operands.
-;CHECK: cmgt {{v[0-9]+}}.2s, v1.2s, v0.2s
-	%tmp3 = icmp slt <2 x i32> %A, %B;
-   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
-	ret <2 x i32> %tmp4
-}
-
-define <4 x i32> @cmlt4xi32(<4 x i32> %A, <4 x i32> %B) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; LT implemented as GT, so check reversed operands.
-;CHECK: cmgt {{v[0-9]+}}.4s, v1.4s, v0.4s
-	%tmp3 = icmp slt <4 x i32> %A, %B;
-   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
-	ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @cmlt2xi64(<2 x i64> %A, <2 x i64> %B) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; LT implemented as GT, so check reversed operands.
-;CHECK: cmgt {{v[0-9]+}}.2d, v1.2d, v0.2d
-	%tmp3 = icmp slt <2 x i64> %A, %B;
-   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
-	ret <2 x i64> %tmp4
-}
-
-define <8 x i8> @cmge8xi8(<8 x i8> %A, <8 x i8> %B) {
-;CHECK: cmge {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-	%tmp3 = icmp sge <8 x i8> %A, %B;
-   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
-	ret <8 x i8> %tmp4
-}
-
-define <16 x i8> @cmge16xi8(<16 x i8> %A, <16 x i8> %B) {
-;CHECK: cmge {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-	%tmp3 = icmp sge <16 x i8> %A, %B;
-   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
-	ret <16 x i8> %tmp4
-}
-
-define <4 x i16> @cmge4xi16(<4 x i16> %A, <4 x i16> %B) {
-;CHECK: cmge {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-	%tmp3 = icmp sge <4 x i16> %A, %B;
-   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
-	ret <4 x i16> %tmp4
-}
-
-define <8 x i16> @cmge8xi16(<8 x i16> %A, <8 x i16> %B) {
-;CHECK: cmge {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-	%tmp3 = icmp sge <8 x i16> %A, %B;
-   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
-	ret <8 x i16> %tmp4
-}
-
-define <2 x i32> @cmge2xi32(<2 x i32> %A, <2 x i32> %B) {
-;CHECK: cmge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-	%tmp3 = icmp sge <2 x i32> %A, %B;
-   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
-	ret <2 x i32> %tmp4
-}
-
-define <4 x i32> @cmge4xi32(<4 x i32> %A, <4 x i32> %B) {
-;CHECK: cmge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-	%tmp3 = icmp sge <4 x i32> %A, %B;
-   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
-	ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @cmge2xi64(<2 x i64> %A, <2 x i64> %B) {
-;CHECK: cmge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-	%tmp3 = icmp sge <2 x i64> %A, %B;
-   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
-	ret <2 x i64> %tmp4
-}
-
-define <8 x i8> @cmle8xi8(<8 x i8> %A, <8 x i8> %B) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; LE implemented as GE, so check reversed operands.
-;CHECK: cmge {{v[0-9]+}}.8b, v1.8b, v0.8b
-	%tmp3 = icmp sle <8 x i8> %A, %B;
-   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
-	ret <8 x i8> %tmp4
-}
-
-define <16 x i8> @cmle16xi8(<16 x i8> %A, <16 x i8> %B) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; LE implemented as GE, so check reversed operands.
-;CHECK: cmge {{v[0-9]+}}.16b, v1.16b, v0.16b
-	%tmp3 = icmp sle <16 x i8> %A, %B;
-   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
-	ret <16 x i8> %tmp4
-}
-
-define <4 x i16> @cmle4xi16(<4 x i16> %A, <4 x i16> %B) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; LE implemented as GE, so check reversed operands.
-;CHECK: cmge {{v[0-9]+}}.4h, v1.4h, v0.4h
-	%tmp3 = icmp sle <4 x i16> %A, %B;
-   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
-	ret <4 x i16> %tmp4
-}
-
-define <8 x i16> @cmle8xi16(<8 x i16> %A, <8 x i16> %B) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; LE implemented as GE, so check reversed operands.
-;CHECK: cmge {{v[0-9]+}}.8h, v1.8h, v0.8h
-	%tmp3 = icmp sle <8 x i16> %A, %B;
-   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
-	ret <8 x i16> %tmp4
-}
-
-define <2 x i32> @cmle2xi32(<2 x i32> %A, <2 x i32> %B) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; LE implemented as GE, so check reversed operands.
-;CHECK: cmge {{v[0-9]+}}.2s, v1.2s, v0.2s
-	%tmp3 = icmp sle <2 x i32> %A, %B;
-   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
-	ret <2 x i32> %tmp4
-}
-
-define <4 x i32> @cmle4xi32(<4 x i32> %A, <4 x i32> %B) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; LE implemented as GE, so check reversed operands.
-;CHECK: cmge {{v[0-9]+}}.4s, v1.4s, v0.4s
-	%tmp3 = icmp sle <4 x i32> %A, %B;
-   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
-	ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @cmle2xi64(<2 x i64> %A, <2 x i64> %B) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; LE implemented as GE, so check reversed operands.
-;CHECK: cmge {{v[0-9]+}}.2d, v1.2d, v0.2d
-	%tmp3 = icmp sle <2 x i64> %A, %B;
-   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
-	ret <2 x i64> %tmp4
-}
-
-define <8 x i8> @cmhi8xi8(<8 x i8> %A, <8 x i8> %B) {
-;CHECK: cmhi {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-	%tmp3 = icmp ugt <8 x i8> %A, %B;
-   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
-	ret <8 x i8> %tmp4
-}
-
-define <16 x i8> @cmhi16xi8(<16 x i8> %A, <16 x i8> %B) {
-;CHECK: cmhi {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-	%tmp3 = icmp ugt <16 x i8> %A, %B;
-   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
-	ret <16 x i8> %tmp4
-}
-
-define <4 x i16> @cmhi4xi16(<4 x i16> %A, <4 x i16> %B) {
-;CHECK: cmhi {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-	%tmp3 = icmp ugt <4 x i16> %A, %B;
-   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
-	ret <4 x i16> %tmp4
-}
-
-define <8 x i16> @cmhi8xi16(<8 x i16> %A, <8 x i16> %B) {
-;CHECK: cmhi {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-	%tmp3 = icmp ugt <8 x i16> %A, %B;
-   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
-	ret <8 x i16> %tmp4
-}
-
-define <2 x i32> @cmhi2xi32(<2 x i32> %A, <2 x i32> %B) {
-;CHECK: cmhi {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-	%tmp3 = icmp ugt <2 x i32> %A, %B;
-   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
-	ret <2 x i32> %tmp4
-}
-
-define <4 x i32> @cmhi4xi32(<4 x i32> %A, <4 x i32> %B) {
-;CHECK: cmhi {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-	%tmp3 = icmp ugt <4 x i32> %A, %B;
-   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
-	ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @cmhi2xi64(<2 x i64> %A, <2 x i64> %B) {
-;CHECK: cmhi {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-	%tmp3 = icmp ugt <2 x i64> %A, %B;
-   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
-	ret <2 x i64> %tmp4
-}
-
-define <8 x i8> @cmlo8xi8(<8 x i8> %A, <8 x i8> %B) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; LO implemented as HI, so check reversed operands.
-;CHECK: cmhi {{v[0-9]+}}.8b, v1.8b, v0.8b
-	%tmp3 = icmp ult <8 x i8> %A, %B;
-   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
-	ret <8 x i8> %tmp4
-}
-
-define <16 x i8> @cmlo16xi8(<16 x i8> %A, <16 x i8> %B) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; LO implemented as HI, so check reversed operands.
-;CHECK: cmhi {{v[0-9]+}}.16b, v1.16b, v0.16b
-	%tmp3 = icmp ult <16 x i8> %A, %B;
-   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
-	ret <16 x i8> %tmp4
-}
-
-define <4 x i16> @cmlo4xi16(<4 x i16> %A, <4 x i16> %B) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; LO implemented as HI, so check reversed operands.
-;CHECK: cmhi {{v[0-9]+}}.4h, v1.4h, v0.4h
-	%tmp3 = icmp ult <4 x i16> %A, %B;
-   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
-	ret <4 x i16> %tmp4
-}
-
-define <8 x i16> @cmlo8xi16(<8 x i16> %A, <8 x i16> %B) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; LO implemented as HI, so check reversed operands.
-;CHECK: cmhi {{v[0-9]+}}.8h, v1.8h, v0.8h
-	%tmp3 = icmp ult <8 x i16> %A, %B;
-   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
-	ret <8 x i16> %tmp4
-}
-
-define <2 x i32> @cmlo2xi32(<2 x i32> %A, <2 x i32> %B) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; LO implemented as HI, so check reversed operands.
-;CHECK: cmhi {{v[0-9]+}}.2s, v1.2s, v0.2s
-	%tmp3 = icmp ult <2 x i32> %A, %B;
-   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
-	ret <2 x i32> %tmp4
-}
-
-define <4 x i32> @cmlo4xi32(<4 x i32> %A, <4 x i32> %B) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; LO implemented as HI, so check reversed operands.
-;CHECK: cmhi {{v[0-9]+}}.4s, v1.4s, v0.4s
-	%tmp3 = icmp ult <4 x i32> %A, %B;
-   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
-	ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @cmlo2xi64(<2 x i64> %A, <2 x i64> %B) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; LO implemented as HI, so check reversed operands.
-;CHECK: cmhi {{v[0-9]+}}.2d, v1.2d, v0.2d
-	%tmp3 = icmp ult <2 x i64> %A, %B;
-   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
-	ret <2 x i64> %tmp4
-}
-
-define <8 x i8> @cmhs8xi8(<8 x i8> %A, <8 x i8> %B) {
-;CHECK: cmhs {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-	%tmp3 = icmp uge <8 x i8> %A, %B;
-   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
-	ret <8 x i8> %tmp4
-}
-
-define <16 x i8> @cmhs16xi8(<16 x i8> %A, <16 x i8> %B) {
-;CHECK: cmhs {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-	%tmp3 = icmp uge <16 x i8> %A, %B;
-   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
-	ret <16 x i8> %tmp4
-}
-
-define <4 x i16> @cmhs4xi16(<4 x i16> %A, <4 x i16> %B) {
-;CHECK: cmhs {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-	%tmp3 = icmp uge <4 x i16> %A, %B;
-   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
-	ret <4 x i16> %tmp4
-}
-
-define <8 x i16> @cmhs8xi16(<8 x i16> %A, <8 x i16> %B) {
-;CHECK: cmhs {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
-	%tmp3 = icmp uge <8 x i16> %A, %B;
-   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
-	ret <8 x i16> %tmp4
-}
-
-define <2 x i32> @cmhs2xi32(<2 x i32> %A, <2 x i32> %B) {
-;CHECK: cmhs {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-	%tmp3 = icmp uge <2 x i32> %A, %B;
-   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
-	ret <2 x i32> %tmp4
-}
-
-define <4 x i32> @cmhs4xi32(<4 x i32> %A, <4 x i32> %B) {
-;CHECK: cmhs {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
-	%tmp3 = icmp uge <4 x i32> %A, %B;
-   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
-	ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @cmhs2xi64(<2 x i64> %A, <2 x i64> %B) {
-;CHECK: cmhs {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
-	%tmp3 = icmp uge <2 x i64> %A, %B;
-   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
-	ret <2 x i64> %tmp4
-}
-
-define <8 x i8> @cmls8xi8(<8 x i8> %A, <8 x i8> %B) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; LS implemented as HS, so check reversed operands.
-;CHECK: cmhs {{v[0-9]+}}.8b, v1.8b, v0.8b
-	%tmp3 = icmp ule <8 x i8> %A, %B;
-   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
-	ret <8 x i8> %tmp4
-}
-
-define <16 x i8> @cmls16xi8(<16 x i8> %A, <16 x i8> %B) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; LS implemented as HS, so check reversed operands.
-;CHECK: cmhs {{v[0-9]+}}.16b, v1.16b, v0.16b
-	%tmp3 = icmp ule <16 x i8> %A, %B;
-   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
-	ret <16 x i8> %tmp4
-}
-
-define <4 x i16> @cmls4xi16(<4 x i16> %A, <4 x i16> %B) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; LS implemented as HS, so check reversed operands.
-;CHECK: cmhs {{v[0-9]+}}.4h, v1.4h, v0.4h
-	%tmp3 = icmp ule <4 x i16> %A, %B;
-   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
-	ret <4 x i16> %tmp4
-}
-
-define <8 x i16> @cmls8xi16(<8 x i16> %A, <8 x i16> %B) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; LS implemented as HS, so check reversed operands.
-;CHECK: cmhs {{v[0-9]+}}.8h, v1.8h, v0.8h
-	%tmp3 = icmp ule <8 x i16> %A, %B;
-   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
-	ret <8 x i16> %tmp4
-}
-
-define <2 x i32> @cmls2xi32(<2 x i32> %A, <2 x i32> %B) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; LS implemented as HS, so check reversed operands.
-;CHECK: cmhs {{v[0-9]+}}.2s, v1.2s, v0.2s
-	%tmp3 = icmp ule <2 x i32> %A, %B;
-   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
-	ret <2 x i32> %tmp4
-}
-
-define <4 x i32> @cmls4xi32(<4 x i32> %A, <4 x i32> %B) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; LS implemented as HS, so check reversed operands.
-;CHECK: cmhs {{v[0-9]+}}.4s, v1.4s, v0.4s
-	%tmp3 = icmp ule <4 x i32> %A, %B;
-   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
-	ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @cmls2xi64(<2 x i64> %A, <2 x i64> %B) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; LS implemented as HS, so check reversed operands.
-;CHECK: cmhs {{v[0-9]+}}.2d, v1.2d, v0.2d
-	%tmp3 = icmp ule <2 x i64> %A, %B;
-   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
-	ret <2 x i64> %tmp4
-}
-
-
-define <8 x i8> @cmeqz8xi8(<8 x i8> %A) {
-;CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0
-	%tmp3 = icmp eq <8 x i8> %A, zeroinitializer;
-   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
-	ret <8 x i8> %tmp4
-}
-
-define <16 x i8> @cmeqz16xi8(<16 x i8> %A) {
-;CHECK: cmeq {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0
-	%tmp3 = icmp eq <16 x i8> %A, zeroinitializer;
-   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
-	ret <16 x i8> %tmp4
-}
-
-define <4 x i16> @cmeqz4xi16(<4 x i16> %A) {
-;CHECK: cmeq {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0
-	%tmp3 = icmp eq <4 x i16> %A, zeroinitializer;
-   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
-	ret <4 x i16> %tmp4
-}
-
-define <8 x i16> @cmeqz8xi16(<8 x i16> %A) {
-;CHECK: cmeq {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0
-	%tmp3 = icmp eq <8 x i16> %A, zeroinitializer;
-   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
-	ret <8 x i16> %tmp4
-}
-
-define <2 x i32> @cmeqz2xi32(<2 x i32> %A) {
-;CHECK: cmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0
-	%tmp3 = icmp eq <2 x i32> %A, zeroinitializer;
-   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
-	ret <2 x i32> %tmp4
-}
-
-define <4 x i32> @cmeqz4xi32(<4 x i32> %A) {
-;CHECK: cmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0
-	%tmp3 = icmp eq <4 x i32> %A, zeroinitializer;
-   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
-	ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @cmeqz2xi64(<2 x i64> %A) {
-;CHECK: cmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0
-	%tmp3 = icmp eq <2 x i64> %A, zeroinitializer;
-   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
-	ret <2 x i64> %tmp4
-}
-
-
-define <8 x i8> @cmgez8xi8(<8 x i8> %A) {
-;CHECK: cmge {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0
-	%tmp3 = icmp sge <8 x i8> %A, zeroinitializer;
-   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
-	ret <8 x i8> %tmp4
-}
-
-define <16 x i8> @cmgez16xi8(<16 x i8> %A) {
-;CHECK: cmge {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0
-	%tmp3 = icmp sge <16 x i8> %A, zeroinitializer;
-   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
-	ret <16 x i8> %tmp4
-}
-
-define <4 x i16> @cmgez4xi16(<4 x i16> %A) {
-;CHECK: cmge {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0
-	%tmp3 = icmp sge <4 x i16> %A, zeroinitializer;
-   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
-	ret <4 x i16> %tmp4
-}
-
-define <8 x i16> @cmgez8xi16(<8 x i16> %A) {
-;CHECK: cmge {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0
-	%tmp3 = icmp sge <8 x i16> %A, zeroinitializer;
-   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
-	ret <8 x i16> %tmp4
-}
-
-define <2 x i32> @cmgez2xi32(<2 x i32> %A) {
-;CHECK: cmge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0
-	%tmp3 = icmp sge <2 x i32> %A, zeroinitializer;
-   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
-	ret <2 x i32> %tmp4
-}
-
-define <4 x i32> @cmgez4xi32(<4 x i32> %A) {
-;CHECK: cmge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0
-	%tmp3 = icmp sge <4 x i32> %A, zeroinitializer;
-   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
-	ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @cmgez2xi64(<2 x i64> %A) {
-;CHECK: cmge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0
-	%tmp3 = icmp sge <2 x i64> %A, zeroinitializer;
-   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
-	ret <2 x i64> %tmp4
-}
-
-
-define <8 x i8> @cmgtz8xi8(<8 x i8> %A) {
-;CHECK: cmgt {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0
-	%tmp3 = icmp sgt <8 x i8> %A, zeroinitializer;
-   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
-	ret <8 x i8> %tmp4
-}
-
-define <16 x i8> @cmgtz16xi8(<16 x i8> %A) {
-;CHECK: cmgt {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0
-	%tmp3 = icmp sgt <16 x i8> %A, zeroinitializer;
-   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
-	ret <16 x i8> %tmp4
-}
-
-define <4 x i16> @cmgtz4xi16(<4 x i16> %A) {
-;CHECK: cmgt {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0
-	%tmp3 = icmp sgt <4 x i16> %A, zeroinitializer;
-   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
-	ret <4 x i16> %tmp4
-}
-
-define <8 x i16> @cmgtz8xi16(<8 x i16> %A) {
-;CHECK: cmgt {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0
-	%tmp3 = icmp sgt <8 x i16> %A, zeroinitializer;
-   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
-	ret <8 x i16> %tmp4
-}
-
-define <2 x i32> @cmgtz2xi32(<2 x i32> %A) {
-;CHECK: cmgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0
-	%tmp3 = icmp sgt <2 x i32> %A, zeroinitializer;
-   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
-	ret <2 x i32> %tmp4
-}
-
-define <4 x i32> @cmgtz4xi32(<4 x i32> %A) {
-;CHECK: cmgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0
-	%tmp3 = icmp sgt <4 x i32> %A, zeroinitializer;
-   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
-	ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @cmgtz2xi64(<2 x i64> %A) {
-;CHECK: cmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0
-	%tmp3 = icmp sgt <2 x i64> %A, zeroinitializer;
-   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
-	ret <2 x i64> %tmp4
-}
-
-define <8 x i8> @cmlez8xi8(<8 x i8> %A) {
-;CHECK: cmle {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0
-	%tmp3 = icmp sle <8 x i8> %A, zeroinitializer;
-   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
-	ret <8 x i8> %tmp4
-}
-
-define <16 x i8> @cmlez16xi8(<16 x i8> %A) {
-;CHECK: cmle {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0
-	%tmp3 = icmp sle <16 x i8> %A, zeroinitializer;
-   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
-	ret <16 x i8> %tmp4
-}
-
-define <4 x i16> @cmlez4xi16(<4 x i16> %A) {
-;CHECK: cmle {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0
-	%tmp3 = icmp sle <4 x i16> %A, zeroinitializer;
-   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
-	ret <4 x i16> %tmp4
-}
-
-define <8 x i16> @cmlez8xi16(<8 x i16> %A) {
-;CHECK: cmle {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0
-	%tmp3 = icmp sle <8 x i16> %A, zeroinitializer;
-   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
-	ret <8 x i16> %tmp4
-}
-
-define <2 x i32> @cmlez2xi32(<2 x i32> %A) {
-;CHECK: cmle {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0
-	%tmp3 = icmp sle <2 x i32> %A, zeroinitializer;
-   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
-	ret <2 x i32> %tmp4
-}
-
-define <4 x i32> @cmlez4xi32(<4 x i32> %A) {
-;CHECK: cmle {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0
-	%tmp3 = icmp sle <4 x i32> %A, zeroinitializer;
-   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
-	ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @cmlez2xi64(<2 x i64> %A) {
-;CHECK: cmle {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0
-	%tmp3 = icmp sle <2 x i64> %A, zeroinitializer;
-   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
-	ret <2 x i64> %tmp4
-}
-
-define <8 x i8> @cmltz8xi8(<8 x i8> %A) {
-;CHECK: cmlt {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0
-	%tmp3 = icmp slt <8 x i8> %A, zeroinitializer;
-   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
-	ret <8 x i8> %tmp4
-}
-
-define <16 x i8> @cmltz16xi8(<16 x i8> %A) {
-;CHECK: cmlt {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0
-	%tmp3 = icmp slt <16 x i8> %A, zeroinitializer;
-   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
-	ret <16 x i8> %tmp4
-}
-
-define <4 x i16> @cmltz4xi16(<4 x i16> %A) {
-;CHECK: cmlt {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0
-	%tmp3 = icmp slt <4 x i16> %A, zeroinitializer;
-   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
-	ret <4 x i16> %tmp4
-}
-
-define <8 x i16> @cmltz8xi16(<8 x i16> %A) {
-;CHECK: cmlt {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0
-	%tmp3 = icmp slt <8 x i16> %A, zeroinitializer;
-   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
-	ret <8 x i16> %tmp4
-}
-
-define <2 x i32> @cmltz2xi32(<2 x i32> %A) {
-;CHECK: cmlt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0
-	%tmp3 = icmp slt <2 x i32> %A, zeroinitializer;
-   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
-	ret <2 x i32> %tmp4
-}
-
-define <4 x i32> @cmltz4xi32(<4 x i32> %A) {
-;CHECK: cmlt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0
-	%tmp3 = icmp slt <4 x i32> %A, zeroinitializer;
-   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
-	ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @cmltz2xi64(<2 x i64> %A) {
-;CHECK: cmlt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0
-	%tmp3 = icmp slt <2 x i64> %A, zeroinitializer;
-   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
-	ret <2 x i64> %tmp4
-}
-
-define <8 x i8> @cmneqz8xi8(<8 x i8> %A) {
-;CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0
-;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-	%tmp3 = icmp ne <8 x i8> %A, zeroinitializer;
-   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
-	ret <8 x i8> %tmp4
-}
-
-define <16 x i8> @cmneqz16xi8(<16 x i8> %A) {
-;CHECK: cmeq {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0
-;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-	%tmp3 = icmp ne <16 x i8> %A, zeroinitializer;
-   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
-	ret <16 x i8> %tmp4
-}
-
-define <4 x i16> @cmneqz4xi16(<4 x i16> %A) {
-;CHECK: cmeq {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0
-;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-	%tmp3 = icmp ne <4 x i16> %A, zeroinitializer;
-   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
-	ret <4 x i16> %tmp4
-}
-
-define <8 x i16> @cmneqz8xi16(<8 x i16> %A) {
-;CHECK: cmeq {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0
-;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-	%tmp3 = icmp ne <8 x i16> %A, zeroinitializer;
-   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
-	ret <8 x i16> %tmp4
-}
-
-define <2 x i32> @cmneqz2xi32(<2 x i32> %A) {
-;CHECK: cmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0
-;CHECK-NEXT: not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-	%tmp3 = icmp ne <2 x i32> %A, zeroinitializer;
-   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
-	ret <2 x i32> %tmp4
-}
-
-define <4 x i32> @cmneqz4xi32(<4 x i32> %A) {
-;CHECK: cmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0
-;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-	%tmp3 = icmp ne <4 x i32> %A, zeroinitializer;
-   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
-	ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @cmneqz2xi64(<2 x i64> %A) {
-;CHECK: cmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0
-;CHECK-NEXT: not {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-	%tmp3 = icmp ne <2 x i64> %A, zeroinitializer;
-   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
-	ret <2 x i64> %tmp4
-}
-
-define <8 x i8> @cmhsz8xi8(<8 x i8> %A) {
-;CHECK: movi d[[ZERO:[0-9]+]], #0
-;CHECK-NEXT: cmhs {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, v[[ZERO]].8b
-	%tmp3 = icmp uge <8 x i8> %A, zeroinitializer;
-   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
-	ret <8 x i8> %tmp4
-}
-
-define <16 x i8> @cmhsz16xi8(<16 x i8> %A) {
-;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
-;CHECK-NEXT: cmhs {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, v[[ZERO]].16b
-	%tmp3 = icmp uge <16 x i8> %A, zeroinitializer;
-   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
-	ret <16 x i8> %tmp4
-}
-
-define <4 x i16> @cmhsz4xi16(<4 x i16> %A) {
-;CHECK: movi d[[ZERO:[0-9]+]], #0
-;CHECK-NEXT: cmhs {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, v[[ZERO]].4h
-	%tmp3 = icmp uge <4 x i16> %A, zeroinitializer;
-   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
-	ret <4 x i16> %tmp4
-}
-
-define <8 x i16> @cmhsz8xi16(<8 x i16> %A) {
-;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
-;CHECK-NEXT: cmhs {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, v[[ZERO]].8h
-	%tmp3 = icmp uge <8 x i16> %A, zeroinitializer;
-   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
-	ret <8 x i16> %tmp4
-}
-
-define <2 x i32> @cmhsz2xi32(<2 x i32> %A) {
-;CHECK: movi d[[ZERO:[0-9]+]], #0
-;CHECK-NEXT: cmhs {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, v[[ZERO]].2s
-	%tmp3 = icmp uge <2 x i32> %A, zeroinitializer;
-   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
-	ret <2 x i32> %tmp4
-}
-
-define <4 x i32> @cmhsz4xi32(<4 x i32> %A) {
-;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
-;CHECK-NEXT: cmhs {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, v[[ZERO]].4s
-	%tmp3 = icmp uge <4 x i32> %A, zeroinitializer;
-   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
-	ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @cmhsz2xi64(<2 x i64> %A) {
-;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
-;CHECK-NEXT: cmhs {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, v[[ZERO]].2d
-	%tmp3 = icmp uge <2 x i64> %A, zeroinitializer;
-   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
-	ret <2 x i64> %tmp4
-}
-
-
-define <8 x i8> @cmhiz8xi8(<8 x i8> %A) {
-;CHECK: movi d[[ZERO:[0-9]+]], #0
-;CHECK-NEXT: cmhi {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, v[[ZERO]].8b
-	%tmp3 = icmp ugt <8 x i8> %A, zeroinitializer;
-   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
-	ret <8 x i8> %tmp4
-}
-
-define <16 x i8> @cmhiz16xi8(<16 x i8> %A) {
-;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
-;CHECK-NEXT: cmhi {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, v[[ZERO]].16b
-	%tmp3 = icmp ugt <16 x i8> %A, zeroinitializer;
-   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
-	ret <16 x i8> %tmp4
-}
-
-define <4 x i16> @cmhiz4xi16(<4 x i16> %A) {
-;CHECK: movi d[[ZERO:[0-9]+]], #0
-;CHECK-NEXT: cmhi {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, v[[ZERO]].4h
-	%tmp3 = icmp ugt <4 x i16> %A, zeroinitializer;
-   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
-	ret <4 x i16> %tmp4
-}
-
-define <8 x i16> @cmhiz8xi16(<8 x i16> %A) {
-;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
-;CHECK-NEXT: cmhi {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, v[[ZERO]].8h
-	%tmp3 = icmp ugt <8 x i16> %A, zeroinitializer;
-   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
-	ret <8 x i16> %tmp4
-}
-
-define <2 x i32> @cmhiz2xi32(<2 x i32> %A) {
-;CHECK: movi d[[ZERO:[0-9]+]], #0
-;CHECK-NEXT: cmhi {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, v[[ZERO]].2s
-	%tmp3 = icmp ugt <2 x i32> %A, zeroinitializer;
-   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
-	ret <2 x i32> %tmp4
-}
-
-define <4 x i32> @cmhiz4xi32(<4 x i32> %A) {
-;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
-;CHECK-NEXT: cmhi {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, v[[ZERO]].4s
-	%tmp3 = icmp ugt <4 x i32> %A, zeroinitializer;
-   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
-	ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @cmhiz2xi64(<2 x i64> %A) {
-;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
-;CHECK-NEXT: cmhi {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, v[[ZERO]].2d
-	%tmp3 = icmp ugt <2 x i64> %A, zeroinitializer;
-   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
-	ret <2 x i64> %tmp4
-}
-
-define <8 x i8> @cmlsz8xi8(<8 x i8> %A) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; LS implemented as HS, so check reversed operands.
-;CHECK: movi d[[ZERO:[0-9]+]], #0
-;CHECK-NEXT: cmhs {{v[0-9]+}}.8b, v[[ZERO]].8b, v0.8b
-	%tmp3 = icmp ule <8 x i8> %A, zeroinitializer;
-   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
-	ret <8 x i8> %tmp4
-}
-
-define <16 x i8> @cmlsz16xi8(<16 x i8> %A) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; LS implemented as HS, so check reversed operands.
-;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
-;CHECK-NEXT: cmhs {{v[0-9]+}}.16b, v[[ZERO]].16b, v0.16b
-	%tmp3 = icmp ule <16 x i8> %A, zeroinitializer;
-   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
-	ret <16 x i8> %tmp4
-}
-
-define <4 x i16> @cmlsz4xi16(<4 x i16> %A) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; LS implemented as HS, so check reversed operands.
-;CHECK: movi d[[ZERO:[0-9]+]], #0
-;CHECK-NEXT: cmhs {{v[0-9]+}}.4h, v[[ZERO]].4h, v0.4h
-	%tmp3 = icmp ule <4 x i16> %A, zeroinitializer;
-   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
-	ret <4 x i16> %tmp4
-}
-
-define <8 x i16> @cmlsz8xi16(<8 x i16> %A) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; LS implemented as HS, so check reversed operands.
-;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
-;CHECK-NEXT: cmhs {{v[0-9]+}}.8h, v[[ZERO]].8h, v0.8h
-	%tmp3 = icmp ule <8 x i16> %A, zeroinitializer;
-   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
-	ret <8 x i16> %tmp4
-}
-
-define <2 x i32> @cmlsz2xi32(<2 x i32> %A) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; LS implemented as HS, so check reversed operands.
-;CHECK: movi d[[ZERO:[0-9]+]], #0
-;CHECK-NEXT: cmhs {{v[0-9]+}}.2s, v[[ZERO]].2s, v0.2s
-	%tmp3 = icmp ule <2 x i32> %A, zeroinitializer;
-   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
-	ret <2 x i32> %tmp4
-}
-
-define <4 x i32> @cmlsz4xi32(<4 x i32> %A) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; LS implemented as HS, so check reversed operands.
-;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
-;CHECK-NEXT: cmhs {{v[0-9]+}}.4s, v[[ZERO]].4s, v0.4s
-	%tmp3 = icmp ule <4 x i32> %A, zeroinitializer;
-   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
-	ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @cmlsz2xi64(<2 x i64> %A) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; LS implemented as HS, so check reversed operands.
-;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
-;CHECK-NEXT: cmhs {{v[0-9]+}}.2d, v[[ZERO]].2d, v0.2d
-	%tmp3 = icmp ule <2 x i64> %A, zeroinitializer;
-   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
-	ret <2 x i64> %tmp4
-}
-
-define <8 x i8> @cmloz8xi8(<8 x i8> %A) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; LO implemented as HI, so check reversed operands.
-;CHECK: movi d[[ZERO:[0-9]+]], #0
-;CHECK-NEXT: cmhi {{v[0-9]+}}.8b, v[[ZERO]].8b, {{v[0-9]+}}.8b
-	%tmp3 = icmp ult <8 x i8> %A, zeroinitializer;
-   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
-	ret <8 x i8> %tmp4
-}
-
-define <16 x i8> @cmloz16xi8(<16 x i8> %A) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; LO implemented as HI, so check reversed operands.
-;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
-;CHECK-NEXT: cmhi {{v[0-9]+}}.16b, v[[ZERO]].16b, v0.16b
-	%tmp3 = icmp ult <16 x i8> %A, zeroinitializer;
-   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
-	ret <16 x i8> %tmp4
-}
-
-define <4 x i16> @cmloz4xi16(<4 x i16> %A) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; LO implemented as HI, so check reversed operands.
-;CHECK: movi d[[ZERO:[0-9]+]], #0
-;CHECK-NEXT: cmhi {{v[0-9]+}}.4h, v[[ZERO]].4h, v0.4h
-	%tmp3 = icmp ult <4 x i16> %A, zeroinitializer;
-   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
-	ret <4 x i16> %tmp4
-}
-
-define <8 x i16> @cmloz8xi16(<8 x i16> %A) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; LO implemented as HI, so check reversed operands.
-;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
-;CHECK-NEXT: cmhi {{v[0-9]+}}.8h, v[[ZERO]].8h, v0.8h
-	%tmp3 = icmp ult <8 x i16> %A, zeroinitializer;
-   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
-	ret <8 x i16> %tmp4
-}
-
-define <2 x i32> @cmloz2xi32(<2 x i32> %A) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; LO implemented as HI, so check reversed operands.
-;CHECK: movi d[[ZERO:[0-9]+]], #0
-;CHECK-NEXT: cmhi {{v[0-9]+}}.2s, v[[ZERO]].2s, v0.2s
-	%tmp3 = icmp ult <2 x i32> %A, zeroinitializer;
-   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
-	ret <2 x i32> %tmp4
-}
-
-define <4 x i32> @cmloz4xi32(<4 x i32> %A) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; LO implemented as HI, so check reversed operands.
-;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
-;CHECK-NEXT: cmhi {{v[0-9]+}}.4s, v[[ZERO]].4s, v0.4s
-	%tmp3 = icmp ult <4 x i32> %A, zeroinitializer;
-   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
-	ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @cmloz2xi64(<2 x i64> %A) {
-; Using registers other than v0, v1 are possible, but would be odd.
-; LO implemented as HI, so check reversed operands.
-;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
-;CHECK-NEXT: cmhi {{v[0-9]+}}.2d, v[[ZERO]].2d, v0.2d
-	%tmp3 = icmp ult <2 x i64> %A, zeroinitializer;
-   %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
-	ret <2 x i64> %tmp4
-}
-
-define <1 x i64> @cmeqz_v1i64(<1 x i64> %A) {
-; CHECK-LABEL: cmeqz_v1i64:
-; CHECK: cmeq d0, d0, #0
-  %tst = icmp eq <1 x i64> %A, <i64 0>
-  %mask = sext <1 x i1> %tst to <1 x i64>
-  ret <1 x i64> %mask
-}
-
-define <1 x i64> @cmgez_v1i64(<1 x i64> %A) {
-; CHECK-LABEL: cmgez_v1i64:
-; CHECK: cmge d0, d0, #0
-  %tst = icmp sge <1 x i64> %A, <i64 0>
-  %mask = sext <1 x i1> %tst to <1 x i64>
-  ret <1 x i64> %mask
-}
-
-define <1 x i64> @cmgtz_v1i64(<1 x i64> %A) {
-; CHECK-LABEL: cmgtz_v1i64:
-; CHECK: cmgt d0, d0, #0
-  %tst = icmp sgt <1 x i64> %A, <i64 0>
-  %mask = sext <1 x i1> %tst to <1 x i64>
-  ret <1 x i64> %mask
-}
-
-define <1 x i64> @cmlez_v1i64(<1 x i64> %A) {
-; CHECK-LABEL: cmlez_v1i64:
-; CHECK: cmle d0, d0, #0
-  %tst = icmp sle <1 x i64> %A, <i64 0>
-  %mask = sext <1 x i1> %tst to <1 x i64>
-  ret <1 x i64> %mask
-}
-
-define <1 x i64> @cmltz_v1i64(<1 x i64> %A) {
-; CHECK-LABEL: cmltz_v1i64:
-; CHECK: cmlt d0, d0, #0
-  %tst = icmp slt <1 x i64> %A, <i64 0>
-  %mask = sext <1 x i1> %tst to <1 x i64>
-  ret <1 x i64> %mask
-}
-
-define <1 x i64> @fcmeqz_v1f64(<1 x double> %A) {
-; CHECK-LABEL: fcmeqz_v1f64:
-; CHECK: fcmeq d0, d0, #0
-  %tst = fcmp oeq <1 x double> %A, <double 0.0>
-  %mask = sext <1 x i1> %tst to <1 x i64>
-  ret <1 x i64> %mask
-}
-
-define <1 x i64> @fcmgez_v1f64(<1 x double> %A) {
-; CHECK-LABEL: fcmgez_v1f64:
-; CHECK: fcmge d0, d0, #0
-  %tst = fcmp oge <1 x double> %A, <double 0.0>
-  %mask = sext <1 x i1> %tst to <1 x i64>
-  ret <1 x i64> %mask
-}
-
-define <1 x i64> @fcmgtz_v1f64(<1 x double> %A) {
-; CHECK-LABEL: fcmgtz_v1f64:
-; CHECK: fcmgt d0, d0, #0
-  %tst = fcmp ogt <1 x double> %A, <double 0.0>
-  %mask = sext <1 x i1> %tst to <1 x i64>
-  ret <1 x i64> %mask
-}
-
-define <1 x i64> @fcmlez_v1f64(<1 x double> %A) {
-; CHECK-LABEL: fcmlez_v1f64:
-; CHECK: fcmle d0, d0, #0
-  %tst = fcmp ole <1 x double> %A, <double 0.0>
-  %mask = sext <1 x i1> %tst to <1 x i64>
-  ret <1 x i64> %mask
-}
-
-define <1 x i64> @fcmltz_v1f64(<1 x double> %A) {
-; CHECK-LABEL: fcmltz_v1f64:
-; CHECK: fcmlt d0, d0, #0
-  %tst = fcmp olt <1 x double> %A, <double 0.0>
-  %mask = sext <1 x i1> %tst to <1 x i64>
-  ret <1 x i64> %mask
-}
diff --git a/test/CodeGen/ARM64/patchpoint.ll b/test/CodeGen/ARM64/patchpoint.ll
deleted file mode 100644
index 993e3eb..0000000
--- a/test/CodeGen/ARM64/patchpoint.ll
+++ /dev/null
@@ -1,163 +0,0 @@
-; RUN: llc < %s -mtriple=arm64-apple-darwin -enable-misched=0 | FileCheck %s
-
-; Trivial patchpoint codegen
-;
-define i64 @trivial_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
-entry:
-; CHECK-LABEL: trivial_patchpoint_codegen:
-; CHECK:       movz x16, #57005, lsl #32
-; CHECK-NEXT:  movk x16, #48879, lsl #16
-; CHECK-NEXT:  movk x16, #51966
-; CHECK-NEXT:  blr  x16
-; CHECK:       movz x16, #57005, lsl #32
-; CHECK-NEXT:  movk x16, #48879, lsl #16
-; CHECK-NEXT:  movk x16, #51967
-; CHECK-NEXT:  blr  x16
-; CHECK:       ret
-  %resolveCall2 = inttoptr i64 244837814094590 to i8*
-  %result = tail call i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 2, i32 20, i8* %resolveCall2, i32 4, i64 %p1, i64 %p2, i64 %p3, i64 %p4)
-  %resolveCall3 = inttoptr i64 244837814094591 to i8*
-  tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 3, i32 20, i8* %resolveCall3, i32 2, i64 %p1, i64 %result)
-  ret i64 %result
-}
-
-; Caller frame metadata with stackmaps. This should not be optimized
-; as a leaf function.
-;
-; CHECK-LABEL: caller_meta_leaf
-; CHECK:       mov fp, sp
-; CHECK-NEXT:  sub sp, sp, #32
-; CHECK:       Ltmp
-; CHECK:       mov sp, fp
-; CHECK:       ret
-
-define void @caller_meta_leaf() {
-entry:
-  %metadata = alloca i64, i32 3, align 8
-  store i64 11, i64* %metadata
-  store i64 12, i64* %metadata
-  store i64 13, i64* %metadata
-  call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 4, i32 0, i64* %metadata)
-  ret void
-}
-
-; Test the webkit_jscc calling convention.
-; One argument will be passed in register, the other will be pushed on the stack.
-; Return value in x0.
-define void @jscall_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
-entry:
-; CHECK-LABEL: jscall_patchpoint_codegen:
-; CHECK:      Ltmp
-; CHECK:      str x{{.+}}, [sp]
-; CHECK-NEXT: mov  x0, x{{.+}}
-; CHECK:      Ltmp
-; CHECK-NEXT: movz  x16, #65535, lsl #32
-; CHECK-NEXT: movk  x16, #57005, lsl #16
-; CHECK-NEXT: movk  x16, #48879
-; CHECK-NEXT: blr x16
-  %resolveCall2 = inttoptr i64 281474417671919 to i8*
-  %result = tail call webkit_jscc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 5, i32 20, i8* %resolveCall2, i32 2, i64 %p4, i64 %p2)
-  %resolveCall3 = inttoptr i64 244837814038255 to i8*
-  tail call webkit_jscc void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 6, i32 20, i8* %resolveCall3, i32 2, i64 %p4, i64 %result)
-  ret void
-}
-
-; Test if the arguments are properly aligned and that we don't store undef arguments.
-define i64 @jscall_patchpoint_codegen2(i64 %callee) {
-entry:
-; CHECK-LABEL: jscall_patchpoint_codegen2:
-; CHECK:      Ltmp
-; CHECK:      orr x{{.+}}, xzr, #0x6
-; CHECK-NEXT: str x{{.+}}, [sp, #24]
-; CHECK-NEXT: orr w{{.+}}, wzr, #0x4
-; CHECK-NEXT: str w{{.+}}, [sp, #16]
-; CHECK-NEXT: orr x{{.+}}, xzr, #0x2
-; CHECK-NEXT: str x{{.+}}, [sp]
-; CHECK:      Ltmp
-; CHECK-NEXT: movz  x16, #65535, lsl #32
-; CHECK-NEXT: movk  x16, #57005, lsl #16
-; CHECK-NEXT: movk  x16, #48879
-; CHECK-NEXT: blr x16
-  %call = inttoptr i64 281474417671919 to i8*
-  %result = call webkit_jscc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 7, i32 20, i8* %call, i32 6, i64 %callee, i64 2, i64 undef, i32 4, i32 undef, i64 6)
-  ret i64 %result
-}
-
-; Test if the arguments are properly aligned and that we don't store undef arguments.
-define i64 @jscall_patchpoint_codegen3(i64 %callee) {
-entry:
-; CHECK-LABEL: jscall_patchpoint_codegen3:
-; CHECK:      Ltmp
-; CHECK:      movz  x{{.+}}, #10
-; CHECK-NEXT: str x{{.+}}, [sp, #48]
-; CHECK-NEXT: orr w{{.+}}, wzr, #0x8
-; CHECK-NEXT: str w{{.+}}, [sp, #36]
-; CHECK-NEXT: orr x{{.+}}, xzr, #0x6
-; CHECK-NEXT: str x{{.+}}, [sp, #24]
-; CHECK-NEXT: orr w{{.+}}, wzr, #0x4
-; CHECK-NEXT: str w{{.+}}, [sp, #16]
-; CHECK-NEXT: orr x{{.+}}, xzr, #0x2
-; CHECK-NEXT: str x{{.+}}, [sp]
-; CHECK:      Ltmp
-; CHECK-NEXT: movz  x16, #65535, lsl #32
-; CHECK-NEXT: movk  x16, #57005, lsl #16
-; CHECK-NEXT: movk  x16, #48879
-; CHECK-NEXT: blr x16
-  %call = inttoptr i64 281474417671919 to i8*
-  %result = call webkit_jscc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 7, i32 20, i8* %call, i32 10, i64 %callee, i64 2, i64 undef, i32 4, i32 undef, i64 6, i32 undef, i32 8, i32 undef, i64 10)
-  ret i64 %result
-}
-
-; Test patchpoints reusing the same TargetConstant.
-; <rdar:15390785> Assertion failed: (CI.getNumArgOperands() >= NumArgs + 4)
-; There is no way to verify this, since it depends on memory allocation.
-; But I think it's useful to include as a working example.
-define i64 @testLowerConstant(i64 %arg, i64 %tmp2, i64 %tmp10, i64* %tmp33, i64 %tmp79) {
-entry:
-  %tmp80 = add i64 %tmp79, -16
-  %tmp81 = inttoptr i64 %tmp80 to i64*
-  %tmp82 = load i64* %tmp81, align 8
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 14, i32 8, i64 %arg, i64 %tmp2, i64 %tmp10, i64 %tmp82)
-  tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 15, i32 32, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp82)
-  %tmp83 = load i64* %tmp33, align 8
-  %tmp84 = add i64 %tmp83, -24
-  %tmp85 = inttoptr i64 %tmp84 to i64*
-  %tmp86 = load i64* %tmp85, align 8
-  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 17, i32 8, i64 %arg, i64 %tmp10, i64 %tmp86)
-  tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 18, i32 32, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp86)
-  ret i64 10
-}
-
-; Test small patchpoints that don't emit calls.
-define void @small_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
-entry:
-; CHECK-LABEL: small_patchpoint_codegen:
-; CHECK:      Ltmp
-; CHECK:      nop
-; CHECK-NEXT: nop
-; CHECK-NEXT: nop
-; CHECK-NEXT: nop
-; CHECK-NEXT: nop
-; CHECK-NEXT: ldp
-; CHECK-NEXT: ret
-  %result = tail call i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 5, i32 20, i8* null, i32 2, i64 %p1, i64 %p2)
-  ret void
-}
-
-; Test that scratch registers are spilled around patchpoints
-; CHECK: InlineAsm End
-; CHECK-NEXT: mov x{{[0-9]+}}, x16
-; CHECK-NEXT: mov x{{[0-9]+}}, x17
-; CHECK-NEXT: Ltmp
-; CHECK-NEXT: nop
-define void @clobberScratch(i32* %p) {
-  %v = load i32* %p
-  tail call void asm sideeffect "nop", "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{x29},~{x30},~{x31}"() nounwind
-  tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 5, i32 20, i8* null, i32 0, i32* %p, i32 %v)
-  store i32 %v, i32* %p
-  ret void
-}
-
-declare void @llvm.experimental.stackmap(i64, i32, ...)
-declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...)
-declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...)
diff --git a/test/CodeGen/ARM64/popcnt.ll b/test/CodeGen/ARM64/popcnt.ll
deleted file mode 100644
index 9bbba09c..0000000
--- a/test/CodeGen/ARM64/popcnt.ll
+++ /dev/null
@@ -1,43 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-define i32 @cnt32_advsimd(i32 %x) nounwind readnone {
-  %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
-  ret i32 %cnt
-; CHECK: fmov	s0, w0
-; CHECK: cnt.8b	v0, v0
-; CHECK: uaddlv.8b	h0, v0
-; CHECK: fmov w0, s0
-; CHECK: ret
-}
-
-define i64 @cnt64_advsimd(i64 %x) nounwind readnone {
-  %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
-  ret i64 %cnt
-; CHECK: fmov	d0, x0
-; CHECK: cnt.8b	v0, v0
-; CHECK: uaddlv.8b	h0, v0
-; CHECK: fmov	w0, s0
-; CHECK: ret
-}
-
-; Do not use AdvSIMD when -mno-implicit-float is specified.
-; rdar://9473858
-
-define i32 @cnt32(i32 %x) nounwind readnone noimplicitfloat {
-  %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
-  ret i32 %cnt
-; CHECK-LABEL: cnt32:
-; CHECK-NOT 16b
-; CHECK: ret
-}
-
-define i64 @cnt64(i64 %x) nounwind readnone noimplicitfloat {
-  %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
-  ret i64 %cnt
-; CHECK-LABEL: cnt64:
-; CHECK-NOT 16b
-; CHECK: ret
-}
-
-declare i32 @llvm.ctpop.i32(i32) nounwind readnone
-declare i64 @llvm.ctpop.i64(i64) nounwind readnone
diff --git a/test/CodeGen/ARM64/promote-const.ll b/test/CodeGen/ARM64/promote-const.ll
deleted file mode 100644
index 4a336db..0000000
--- a/test/CodeGen/ARM64/promote-const.ll
+++ /dev/null
@@ -1,255 +0,0 @@
-; Disable machine cse to stress the different path of the algorithm.
-; Otherwise, we always fall in the simple case, i.e., only one definition.
-; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -disable-machine-cse -arm64-stress-promote-const | FileCheck -check-prefix=PROMOTED %s
-; The REGULAR run just checks that the inputs passed to promote const expose
-; the appropriate patterns.
-; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -disable-machine-cse -arm64-promote-const=false | FileCheck -check-prefix=REGULAR %s
-
-%struct.uint8x16x4_t = type { [4 x <16 x i8>] }
-
-; Constant is a structure
-define %struct.uint8x16x4_t @test1() {
-; PROMOTED-LABEL: test1:
-; Promote constant has created a big constant for the whole structure
-; PROMOTED: adrp [[PAGEADDR:x[0-9]+]], __PromotedConst@PAGE
-; PROMOTED: add [[BASEADDR:x[0-9]+]], [[PAGEADDR]], __PromotedConst@PAGEOFF
-; Destination registers are defined by the ABI
-; PROMOTED-NEXT: ldp q0, q1, {{\[}}[[BASEADDR]]]
-; PROMOTED-NEXT: ldp q2, q3, {{\[}}[[BASEADDR]], #32]
-; PROMOTED-NEXT: ret
-
-; REGULAR-LABEL: test1:
-; Regular access is quite bad, it performs 4 loads, one for each chunk of
-; the structure
-; REGULAR: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL:lCP.*]]@PAGE
-; Destination registers are defined by the ABI
-; REGULAR: ldr q0, {{\[}}[[PAGEADDR]], [[CSTLABEL]]@PAGEOFF]
-; REGULAR: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL:lCP.*]]@PAGE
-; REGULAR: ldr q1, {{\[}}[[PAGEADDR]], [[CSTLABEL]]@PAGEOFF]
-; REGULAR: adrp [[PAGEADDR2:x[0-9]+]], [[CSTLABEL2:lCP.*]]@PAGE
-; REGULAR: ldr q2, {{\[}}[[PAGEADDR2]], [[CSTLABEL2]]@PAGEOFF]
-; REGULAR: adrp [[PAGEADDR3:x[0-9]+]], [[CSTLABEL3:lCP.*]]@PAGE
-; REGULAR: ldr q3, {{\[}}[[PAGEADDR3]], [[CSTLABEL3]]@PAGEOFF]
-; REGULAR-NEXT: ret
-entry:
-  ret %struct.uint8x16x4_t { [4 x <16 x i8>] [<16 x i8> <i8 -40, i8 -93, i8 -118, i8 -99, i8 -75, i8 -105, i8 74, i8 -110, i8 62, i8 -115, i8 -119, i8 -120, i8 34, i8 -124, i8 0, i8 -128>, <16 x i8> <i8 32, i8 124, i8 121, i8 120, i8 8, i8 117, i8 -56, i8 113, i8 -76, i8 110, i8 -53, i8 107, i8 7, i8 105, i8 103, i8 102>, <16 x i8> <i8 -24, i8 99, i8 -121, i8 97, i8 66, i8 95, i8 24, i8 93, i8 6, i8 91, i8 12, i8 89, i8 39, i8 87, i8 86, i8 85>, <16 x i8> <i8 -104, i8 83, i8 -20, i8 81, i8 81, i8 80, i8 -59, i8 78, i8 73, i8 77, i8 -37, i8 75, i8 122, i8 74, i8 37, i8 73>] }
-}
-
-; Two different uses of the same constant in the same basic block
-define <16 x i8> @test2(<16 x i8> %arg) {
-entry:
-; PROMOTED-LABEL: test2:
-; In stress mode, constant vector are promoted
-; PROMOTED: adrp [[PAGEADDR:x[0-9]+]], [[CSTV1:__PromotedConst[0-9]+]]@PAGE
-; PROMOTED: add [[BASEADDR:x[0-9]+]], [[PAGEADDR]], [[CSTV1]]@PAGEOFF
-; PROMOTED: ldr q[[REGNUM:[0-9]+]], {{\[}}[[BASEADDR]]]
-; Destination register is defined by ABI
-; PROMOTED-NEXT: add.16b v0, v0, v[[REGNUM]]
-; PROMOTED-NEXT: mla.16b v0, v0, v[[REGNUM]]
-; PROMOTED-NEXT: ret
-
-; REGULAR-LABEL: test2:
-; Regular access is strickly the same as promoted access.
-; The difference is that the address (and thus the space in memory) is not
-; shared between constants
-; REGULAR: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL:lCP.*]]@PAGE
-; REGULAR: ldr q[[REGNUM:[0-9]+]], {{\[}}[[PAGEADDR]], [[CSTLABEL]]@PAGEOFF]
-; Destination register is defined by ABI
-; REGULAR-NEXT: add.16b v0, v0, v[[REGNUM]]
-; REGULAR-NEXT: mla.16b v0, v0, v[[REGNUM]]
-; REGULAR-NEXT: ret
-  %add.i = add <16 x i8> %arg, <i8 -40, i8 -93, i8 -118, i8 -99, i8 -75, i8 -105, i8 74, i8 -110, i8 62, i8 -115, i8 -119, i8 -120, i8 34, i8 -124, i8 0, i8 -128>
-  %mul.i = mul <16 x i8> %add.i, <i8 -40, i8 -93, i8 -118, i8 -99, i8 -75, i8 -105, i8 74, i8 -110, i8 62, i8 -115, i8 -119, i8 -120, i8 34, i8 -124, i8 0, i8 -128>
-  %add.i9 = add <16 x i8> %add.i, %mul.i
-  ret <16 x i8> %add.i9
-}
-
-; Two different uses of the sane constant in two different basic blocks,
-; one dominates the other
-define <16 x i8> @test3(<16 x i8> %arg, i32 %path) {
-; PROMOTED-LABEL: test3:
-; In stress mode, constant vector are promoted
-; Since, the constant is the same as the previous function,
-; the same address must be used
-; PROMOTED: adrp [[PAGEADDR:x[0-9]+]], [[CSTV1]]@PAGE
-; PROMOTED: add [[BASEADDR:x[0-9]+]], [[PAGEADDR]], [[CSTV1]]@PAGEOFF
-; PROMOTED-NEXT: ldr q[[REGNUM:[0-9]+]], {{\[}}[[BASEADDR]]]
-; Destination register is defined by ABI
-; PROMOTED-NEXT: add.16b v0, v0, v[[REGNUM]]
-; PROMOTED-NEXT: cbnz w0, [[LABEL:LBB.*]]
-; Next BB
-; PROMOTED: adrp [[PAGEADDR:x[0-9]+]], [[CSTV2:__PromotedConst[0-9]+]]@PAGE
-; PROMOTED: add [[BASEADDR:x[0-9]+]], [[PAGEADDR]], [[CSTV2]]@PAGEOFF
-; PROMOTED-NEXT: ldr q[[REGNUM]], {{\[}}[[BASEADDR]]]
-; Next BB
-; PROMOTED-NEXT: [[LABEL]]:
-; PROMOTED-NEXT: mul.16b [[DESTV:v[0-9]+]], v0, v[[REGNUM]]
-; PROMOTED-NEXT: add.16b v0, v0, [[DESTV]]
-; PROMOTED-NEXT: ret
-
-; REGULAR-LABEL: test3:
-; Regular mode does not elimitate common sub expression by its own.
-; In other words, the same loads appears several times.
-; REGULAR: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL1:lCP.*]]@PAGE
-; REGULAR-NEXT: ldr q[[REGNUM:[0-9]+]], {{\[}}[[PAGEADDR]], [[CSTLABEL1]]@PAGEOFF]
-; Destination register is defined by ABI
-; REGULAR-NEXT: add.16b v0, v0, v[[REGNUM]]
-; REGULAR-NEXT: cbz w0, [[LABELelse:LBB.*]]
-; Next BB
-; Redundant load
-; REGULAR: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL1]]@PAGE
-; REGULAR-NEXT: ldr q[[REGNUM]], {{\[}}[[PAGEADDR]], [[CSTLABEL1]]@PAGEOFF]
-; REGULAR-NEXT: b [[LABELend:LBB.*]]
-; Next BB
-; REGULAR-NEXT: [[LABELelse]]
-; REGULAR-NEXT: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL2:lCP.*]]@PAGE
-; REGULAR-NEXT: ldr q[[REGNUM]], {{\[}}[[PAGEADDR]], [[CSTLABEL2]]@PAGEOFF]
-; Next BB
-; REGULAR-NEXT: [[LABELend]]:
-; REGULAR-NEXT: mul.16b [[DESTV:v[0-9]+]], v0, v[[REGNUM]]
-; REGULAR-NEXT: add.16b v0, v0, [[DESTV]]
-; REGULAR-NEXT: ret
-entry:
-  %add.i = add <16 x i8> %arg, <i8 -40, i8 -93, i8 -118, i8 -99, i8 -75, i8 -105, i8 74, i8 -110, i8 62, i8 -115, i8 -119, i8 -120, i8 34, i8 -124, i8 0, i8 -128>
-  %tobool = icmp eq i32 %path, 0
-  br i1 %tobool, label %if.else, label %if.then
-
-if.then:                                          ; preds = %entry
-  %mul.i13 = mul <16 x i8> %add.i, <i8 -40, i8 -93, i8 -118, i8 -99, i8 -75, i8 -105, i8 74, i8 -110, i8 62, i8 -115, i8 -119, i8 -120, i8 34, i8 -124, i8 0, i8 -128>
-  br label %if.end
-
-if.else:                                          ; preds = %entry
-  %mul.i = mul <16 x i8> %add.i, <i8 -24, i8 99, i8 -121, i8 97, i8 66, i8 95, i8 24, i8 93, i8 6, i8 91, i8 12, i8 89, i8 39, i8 87, i8 86, i8 85>
-  br label %if.end
-
-if.end:                                           ; preds = %if.else, %if.then
-  %ret2.0 = phi <16 x i8> [ %mul.i13, %if.then ], [ %mul.i, %if.else ]
-  %add.i12 = add <16 x i8> %add.i, %ret2.0
-  ret <16 x i8> %add.i12
-}
-
-; Two different uses of the sane constant in two different basic blocks,
-; none dominates the other
-define <16 x i8> @test4(<16 x i8> %arg, i32 %path) {
-; PROMOTED-LABEL: test4:
-; In stress mode, constant vector are promoted
-; Since, the constant is the same as the previous function,
-; the same address must be used
-; PROMOTED: adrp [[PAGEADDR:x[0-9]+]], [[CSTV1]]@PAGE
-; PROMOTED: add [[BASEADDR:x[0-9]+]], [[PAGEADDR]], [[CSTV1]]@PAGEOFF
-; PROMOTED-NEXT: ldr q[[REGNUM:[0-9]+]], {{\[}}[[BASEADDR]]]
-; Destination register is defined by ABI
-; PROMOTED-NEXT: add.16b v0, v0, v[[REGNUM]]
-; PROMOTED-NEXT: cbz w0, [[LABEL:LBB.*]]
-; Next BB
-; PROMOTED: mul.16b v0, v0, v[[REGNUM]]
-; Next BB
-; PROMOTED-NEXT: [[LABEL]]:
-; PROMOTED-NEXT: ret
-
-
-; REGULAR-LABEL: test4:
-; REGULAR: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL3:lCP.*]]@PAGE
-; REGULAR-NEXT: ldr q[[REGNUM:[0-9]+]], {{\[}}[[PAGEADDR]], [[CSTLABEL3]]@PAGEOFF]
-; Destination register is defined by ABI
-; REGULAR-NEXT: add.16b v0, v0, v[[REGNUM]]
-; REGULAR-NEXT: cbz w0, [[LABEL:LBB.*]]
-; Next BB
-; Redundant expression
-; REGULAR: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL3]]@PAGE
-; REGULAR-NEXT: ldr q[[REGNUM:[0-9]+]], {{\[}}[[PAGEADDR]], [[CSTLABEL3]]@PAGEOFF]
-; Destination register is defined by ABI
-; REGULAR-NEXT: mul.16b v0, v0, v[[REGNUM]]
-; Next BB
-; REGULAR-NEXT: [[LABEL]]:
-; REGULAR-NEXT: ret
-entry:
-  %add.i = add <16 x i8> %arg, <i8 -40, i8 -93, i8 -118, i8 -99, i8 -75, i8 -105, i8 74, i8 -110, i8 62, i8 -115, i8 -119, i8 -120, i8 34, i8 -124, i8 0, i8 -128>
-  %tobool = icmp eq i32 %path, 0
-  br i1 %tobool, label %if.end, label %if.then
-
-if.then:                                          ; preds = %entry
-  %mul.i = mul <16 x i8> %add.i, <i8 -40, i8 -93, i8 -118, i8 -99, i8 -75, i8 -105, i8 74, i8 -110, i8 62, i8 -115, i8 -119, i8 -120, i8 34, i8 -124, i8 0, i8 -128>
-  br label %if.end
-
-if.end:                                           ; preds = %entry, %if.then
-  %ret.0 = phi <16 x i8> [ %mul.i, %if.then ], [ %add.i, %entry ]
-  ret <16 x i8> %ret.0
-}
-
-; Two different uses of the sane constant in two different basic blocks,
-; one is in a phi.
-define <16 x i8> @test5(<16 x i8> %arg, i32 %path) {
-; PROMOTED-LABEL: test5:
-; In stress mode, constant vector are promoted
-; Since, the constant is the same as the previous function,
-; the same address must be used
-; PROMOTED: adrp [[PAGEADDR:x[0-9]+]], [[CSTV1]]@PAGE
-; PROMOTED: add [[BASEADDR:x[0-9]+]], [[PAGEADDR]], [[CSTV1]]@PAGEOFF
-; PROMOTED-NEXT: ldr q[[REGNUM:[0-9]+]], {{\[}}[[BASEADDR]]]
-; PROMOTED-NEXT: cbz w0, [[LABEL:LBB.*]]
-; Next BB
-; PROMOTED: add.16b [[DESTV:v[0-9]+]], v0, v[[REGNUM]]
-; PROMOTED-NEXT: mul.16b v[[REGNUM]], [[DESTV]], v[[REGNUM]]
-; Next BB
-; PROMOTED-NEXT: [[LABEL]]:
-; PROMOTED-NEXT: mul.16b [[TMP1:v[0-9]+]], v[[REGNUM]], v[[REGNUM]]
-; PROMOTED-NEXT: mul.16b [[TMP2:v[0-9]+]], [[TMP1]], [[TMP1]]
-; PROMOTED-NEXT: mul.16b [[TMP3:v[0-9]+]], [[TMP2]], [[TMP2]]
-; PROMOTED-NEXT: mul.16b v0, [[TMP3]], [[TMP3]]
-; PROMOTED-NEXT: ret
-
-; REGULAR-LABEL: test5:
-; REGULAR: cbz w0, [[LABELelse:LBB.*]]
-; Next BB
-; REGULAR: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL:lCP.*]]@PAGE
-; REGULAR-NEXT: ldr q[[REGNUM:[0-9]+]], {{\[}}[[PAGEADDR]], [[CSTLABEL]]@PAGEOFF]
-; REGULAR-NEXT: add.16b [[DESTV:v[0-9]+]], v0, v[[REGNUM]]
-; REGULAR-NEXT: mul.16b v[[DESTREGNUM:[0-9]+]], [[DESTV]], v[[REGNUM]]
-; REGULAR-NEXT: b [[LABELend:LBB.*]]
-; Next BB
-; REGULAR-NEXT: [[LABELelse]]
-; REGULAR-NEXT: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL:lCP.*]]@PAGE
-; REGULAR-NEXT: ldr q[[DESTREGNUM]], {{\[}}[[PAGEADDR]], [[CSTLABEL]]@PAGEOFF]
-; Next BB
-; REGULAR-NEXT: [[LABELend]]:
-; REGULAR-NEXT: mul.16b [[TMP1:v[0-9]+]], v[[DESTREGNUM]], v[[DESTREGNUM]]
-; REGULAR-NEXT: mul.16b [[TMP2:v[0-9]+]], [[TMP1]], [[TMP1]]
-; REGULAR-NEXT: mul.16b [[TMP3:v[0-9]+]], [[TMP2]], [[TMP2]]
-; REGULAR-NEXT: mul.16b v0, [[TMP3]], [[TMP3]]
-; REGULAR-NEXT: ret
-entry:
-  %tobool = icmp eq i32 %path, 0
-  br i1 %tobool, label %if.end, label %if.then
-
-if.then:                                          ; preds = %entry
-  %add.i = add <16 x i8> %arg, <i8 -40, i8 -93, i8 -118, i8 -99, i8 -75, i8 -105, i8 74, i8 -110, i8 62, i8 -115, i8 -119, i8 -120, i8 34, i8 -124, i8 0, i8 -128>
-  %mul.i26 = mul <16 x i8> %add.i, <i8 -40, i8 -93, i8 -118, i8 -99, i8 -75, i8 -105, i8 74, i8 -110, i8 62, i8 -115, i8 -119, i8 -120, i8 34, i8 -124, i8 0, i8 -128>
-  br label %if.end
-
-if.end:                                           ; preds = %entry, %if.then
-  %ret.0 = phi <16 x i8> [ %mul.i26, %if.then ], [ <i8 -40, i8 -93, i8 -118, i8 -99, i8 -75, i8 -105, i8 74, i8 -110, i8 62, i8 -115, i8 -119, i8 -120, i8 34, i8 -124, i8 0, i8 -128>, %entry ]
-  %mul.i25 = mul <16 x i8> %ret.0, %ret.0
-  %mul.i24 = mul <16 x i8> %mul.i25, %mul.i25
-  %mul.i23 = mul <16 x i8> %mul.i24, %mul.i24
-  %mul.i = mul <16 x i8> %mul.i23, %mul.i23
-  ret <16 x i8> %mul.i
-}
-
-define void @accessBig(i64* %storage) {
-; PROMOTED-LABEL: accessBig:
-; PROMOTED: adrp
-; PROMOTED: ret
-  %addr = bitcast i64* %storage to <1 x i80>*
-  store <1 x i80> <i80 483673642326615442599424>, <1 x i80>* %addr
-  ret void
-}
-
-define void @asmStatement() {
-; PROMOTED-LABEL: asmStatement:
-; PROMOTED-NOT: adrp
-; PROMOTED: ret
-  call void asm sideeffect "bfxil w0, w0, $0, $1", "i,i"(i32 28, i32 4)
-  ret void
-}
-
diff --git a/test/CodeGen/ARM64/redzone.ll b/test/CodeGen/ARM64/redzone.ll
deleted file mode 100644
index b89d7b1..0000000
--- a/test/CodeGen/ARM64/redzone.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-redzone | FileCheck %s
-
-define i32 @foo(i32 %a, i32 %b) nounwind ssp {
-; CHECK-LABEL: foo:
-; CHECK-NOT: sub sp, sp
-; CHECK: ret
-  %a.addr = alloca i32, align 4
-  %b.addr = alloca i32, align 4
-  %x = alloca i32, align 4
-  store i32 %a, i32* %a.addr, align 4
-  store i32 %b, i32* %b.addr, align 4
-  %tmp = load i32* %a.addr, align 4
-  %tmp1 = load i32* %b.addr, align 4
-  %add = add nsw i32 %tmp, %tmp1
-  store i32 %add, i32* %x, align 4
-  %tmp2 = load i32* %x, align 4
-  ret i32 %tmp2
-}
diff --git a/test/CodeGen/ARM64/register-offset-addressing.ll b/test/CodeGen/ARM64/register-offset-addressing.ll
deleted file mode 100644
index c273602..0000000
--- a/test/CodeGen/ARM64/register-offset-addressing.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -mtriple=arm64-apple-darwin | FileCheck %s
-
-define i8 @t1(i16* %a, i64 %b) {
-; CHECK: t1
-; CHECK: lsl [[REG:x[0-9]+]], x1, #1
-; CHECK: ldrb w0, [x0, [[REG]]]
-; CHECK: ret
-  %tmp1 = getelementptr inbounds i16* %a, i64 %b
-  %tmp2 = load i16* %tmp1
-  %tmp3 = trunc i16 %tmp2 to i8
-  ret i8 %tmp3
-}
diff --git a/test/CodeGen/ARM64/register-pairing.ll b/test/CodeGen/ARM64/register-pairing.ll
deleted file mode 100644
index 4de80d2..0000000
--- a/test/CodeGen/ARM64/register-pairing.ll
+++ /dev/null
@@ -1,53 +0,0 @@
-; RUN: llc -mtriple=arm64-apple-ios < %s | FileCheck %s
-;
-; rdar://14075006
-
-define void @odd() nounwind {
-; CHECK-LABEL: odd:
-; CHECK: stp d15, d14, [sp, #-144]!
-; CHECK: stp d13, d12, [sp, #16]
-; CHECK: stp d11, d10, [sp, #32]
-; CHECK: stp d9, d8, [sp, #48]
-; CHECK: stp x28, x27, [sp, #64]
-; CHECK: stp x26, x25, [sp, #80]
-; CHECK: stp x24, x23, [sp, #96]
-; CHECK: stp x22, x21, [sp, #112]
-; CHECK: stp x20, x19, [sp, #128]
-; CHECK: movz x0, #42
-; CHECK: ldp x20, x19, [sp, #128]
-; CHECK: ldp x22, x21, [sp, #112]
-; CHECK: ldp x24, x23, [sp, #96]
-; CHECK: ldp x26, x25, [sp, #80]
-; CHECK: ldp x28, x27, [sp, #64]
-; CHECK: ldp d9, d8, [sp, #48]
-; CHECK: ldp d11, d10, [sp, #32]
-; CHECK: ldp d13, d12, [sp, #16]
-; CHECK: ldp d15, d14, [sp], #144
-  call void asm sideeffect "mov x0, #42", "~{x0},~{x19},~{x21},~{x23},~{x25},~{x27},~{d8},~{d10},~{d12},~{d14}"() nounwind
-  ret void
-}
-
-define void @even() nounwind {
-; CHECK-LABEL: even:
-; CHECK: stp d15, d14, [sp, #-144]!
-; CHECK: stp d13, d12, [sp, #16]
-; CHECK: stp d11, d10, [sp, #32]
-; CHECK: stp d9, d8, [sp, #48]
-; CHECK: stp x28, x27, [sp, #64]
-; CHECK: stp x26, x25, [sp, #80]
-; CHECK: stp x24, x23, [sp, #96]
-; CHECK: stp x22, x21, [sp, #112]
-; CHECK: stp x20, x19, [sp, #128]
-; CHECK: movz x0, #42
-; CHECK: ldp x20, x19, [sp, #128]
-; CHECK: ldp x22, x21, [sp, #112]
-; CHECK: ldp x24, x23, [sp, #96]
-; CHECK: ldp x26, x25, [sp, #80]
-; CHECK: ldp x28, x27, [sp, #64]
-; CHECK: ldp d9, d8, [sp, #48]
-; CHECK: ldp d11, d10, [sp, #32]
-; CHECK: ldp d13, d12, [sp, #16]
-; CHECK: ldp d15, d14, [sp], #144
-  call void asm sideeffect "mov x0, #42", "~{x0},~{x20},~{x22},~{x24},~{x26},~{x28},~{d9},~{d11},~{d13},~{d15}"() nounwind
-  ret void
-}
diff --git a/test/CodeGen/ARM64/regress-interphase-shift.ll b/test/CodeGen/ARM64/regress-interphase-shift.ll
deleted file mode 100644
index fddf591..0000000
--- a/test/CodeGen/ARM64/regress-interphase-shift.ll
+++ /dev/null
@@ -1,29 +0,0 @@
-; RUN: llc -march=arm64 -o - %s | FileCheck %s
-
-; This is mostly a "don't assert" test. The type of the RHS of a shift depended
-; on the phase of legalization, which led to the creation of an unexpected and
-; unselectable "rotr" node: (i32 (rotr i32, i64)).
-
-define void @foo(i64* nocapture %d) {
-; CHECK-LABEL: foo:
-; CHECK: rorv
-  %tmp = load i64* undef, align 8
-  %sub397 = sub i64 0, %tmp
-  %and398 = and i64 %sub397, 4294967295
-  %shr404 = lshr i64 %and398, 0
-  %or405 = or i64 0, %shr404
-  %xor406 = xor i64 %or405, 0
-  %xor417 = xor i64 0, %xor406
-  %xor428 = xor i64 0, %xor417
-  %sub430 = sub i64 %xor417, 0
-  %and431 = and i64 %sub430, 4294967295
-  %and432 = and i64 %xor428, 31
-  %sub433 = sub i64 32, %and432
-  %shl434 = shl i64 %and431, %sub433
-  %shr437 = lshr i64 %and431, %and432
-  %or438 = or i64 %shl434, %shr437
-  %xor439 = xor i64 %or438, %xor428
-  %sub441 = sub i64 %xor439, 0
-  store i64 %sub441, i64* %d, align 8
-  ret void
-}
diff --git a/test/CodeGen/ARM64/returnaddr.ll b/test/CodeGen/ARM64/returnaddr.ll
deleted file mode 100644
index e06ce90..0000000
--- a/test/CodeGen/ARM64/returnaddr.ll
+++ /dev/null
@@ -1,26 +0,0 @@
-; RUN: llc < %s -march=arm64 | FileCheck %s
-
-define i8* @rt0(i32 %x) nounwind readnone {
-entry:
-; CHECK-LABEL: rt0:
-; CHECK: mov x0, lr
-; CHECK: ret
-  %0 = tail call i8* @llvm.returnaddress(i32 0)
-  ret i8* %0
-}
-
-define i8* @rt2() nounwind readnone {
-entry:
-; CHECK-LABEL: rt2:
-; CHECK: stp fp, lr, [sp, #-16]!
-; CHECK: mov fp, sp
-; CHECK: ldr x[[REG:[0-9]+]], [fp]
-; CHECK: ldr x[[REG2:[0-9]+]], [x[[REG]]]
-; CHECK: ldr x0, [x[[REG2]], #8]
-; CHECK: ldp fp, lr, [sp], #16
-; CHECK: ret
-  %0 = tail call i8* @llvm.returnaddress(i32 2)
-  ret i8* %0
-}
-
-declare i8* @llvm.returnaddress(i32) nounwind readnone
diff --git a/test/CodeGen/ARM64/rev.ll b/test/CodeGen/ARM64/rev.ll
deleted file mode 100644
index 867d5b3..0000000
--- a/test/CodeGen/ARM64/rev.ll
+++ /dev/null
@@ -1,221 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-define i32 @test_rev_w(i32 %a) nounwind {
-entry:
-; CHECK-LABEL: test_rev_w:
-; CHECK: rev w0, w0
-  %0 = tail call i32 @llvm.bswap.i32(i32 %a)
-  ret i32 %0
-}
-
-define i64 @test_rev_x(i64 %a) nounwind {
-entry:
-; CHECK-LABEL: test_rev_x:
-; CHECK: rev x0, x0
-  %0 = tail call i64 @llvm.bswap.i64(i64 %a)
-  ret i64 %0
-}
-
-declare i32 @llvm.bswap.i32(i32) nounwind readnone
-declare i64 @llvm.bswap.i64(i64) nounwind readnone
-
-define i32 @test_rev16_w(i32 %X) nounwind {
-entry:
-; CHECK-LABEL: test_rev16_w:
-; CHECK: rev16 w0, w0
-  %tmp1 = lshr i32 %X, 8
-  %X15 = bitcast i32 %X to i32
-  %tmp4 = shl i32 %X15, 8
-  %tmp2 = and i32 %tmp1, 16711680
-  %tmp5 = and i32 %tmp4, -16777216
-  %tmp9 = and i32 %tmp1, 255
-  %tmp13 = and i32 %tmp4, 65280
-  %tmp6 = or i32 %tmp5, %tmp2
-  %tmp10 = or i32 %tmp6, %tmp13
-  %tmp14 = or i32 %tmp10, %tmp9
-  ret i32 %tmp14
-}
-
-define i64 @test_rev16_x(i64 %a) nounwind {
-entry:
-; CHECK-LABEL: test_rev16_x:
-; CHECK: rev16 x0, x0
-  %0 = tail call i64 @llvm.bswap.i64(i64 %a)
-  %1 = lshr i64 %0, 16
-  %2 = shl i64 %0, 48
-  %3 = or i64 %1, %2
-  ret i64 %3
-}
-
-define i64 @test_rev32_x(i64 %a) nounwind {
-entry:
-; CHECK-LABEL: test_rev32_x:
-; CHECK: rev32 x0, x0
-  %0 = tail call i64 @llvm.bswap.i64(i64 %a)
-  %1 = lshr i64 %0, 32
-  %2 = shl i64 %0, 32
-  %3 = or i64 %1, %2
-  ret i64 %3
-}
-
-define <8 x i8> @test_vrev64D8(<8 x i8>* %A) nounwind {
-;CHECK-LABEL: test_vrev64D8:
-;CHECK: rev64.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-	ret <8 x i8> %tmp2
-}
-
-define <4 x i16> @test_vrev64D16(<4 x i16>* %A) nounwind {
-;CHECK-LABEL: test_vrev64D16:
-;CHECK: rev64.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-	ret <4 x i16> %tmp2
-}
-
-define <2 x i32> @test_vrev64D32(<2 x i32>* %A) nounwind {
-;CHECK-LABEL: test_vrev64D32:
-;CHECK: rev64.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
-	ret <2 x i32> %tmp2
-}
-
-define <2 x float> @test_vrev64Df(<2 x float>* %A) nounwind {
-;CHECK-LABEL: test_vrev64Df:
-;CHECK: rev64.2s
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> <i32 1, i32 0>
-	ret <2 x float> %tmp2
-}
-
-define <16 x i8> @test_vrev64Q8(<16 x i8>* %A) nounwind {
-;CHECK-LABEL: test_vrev64Q8:
-;CHECK: rev64.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
-	ret <16 x i8> %tmp2
-}
-
-define <8 x i16> @test_vrev64Q16(<8 x i16>* %A) nounwind {
-;CHECK-LABEL: test_vrev64Q16:
-;CHECK: rev64.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
-	ret <8 x i16> %tmp2
-}
-
-define <4 x i32> @test_vrev64Q32(<4 x i32>* %A) nounwind {
-;CHECK-LABEL: test_vrev64Q32:
-;CHECK: rev64.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-	ret <4 x i32> %tmp2
-}
-
-define <4 x float> @test_vrev64Qf(<4 x float>* %A) nounwind {
-;CHECK-LABEL: test_vrev64Qf:
-;CHECK: rev64.4s
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-	ret <4 x float> %tmp2
-}
-
-define <8 x i8> @test_vrev32D8(<8 x i8>* %A) nounwind {
-;CHECK-LABEL: test_vrev32D8:
-;CHECK: rev32.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
-	ret <8 x i8> %tmp2
-}
-
-define <4 x i16> @test_vrev32D16(<4 x i16>* %A) nounwind {
-;CHECK-LABEL: test_vrev32D16:
-;CHECK: rev32.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-	ret <4 x i16> %tmp2
-}
-
-define <16 x i8> @test_vrev32Q8(<16 x i8>* %A) nounwind {
-;CHECK-LABEL: test_vrev32Q8:
-;CHECK: rev32.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
-	ret <16 x i8> %tmp2
-}
-
-define <8 x i16> @test_vrev32Q16(<8 x i16>* %A) nounwind {
-;CHECK-LABEL: test_vrev32Q16:
-;CHECK: rev32.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
-	ret <8 x i16> %tmp2
-}
-
-define <8 x i8> @test_vrev16D8(<8 x i8>* %A) nounwind {
-;CHECK-LABEL: test_vrev16D8:
-;CHECK: rev16.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
-	ret <8 x i8> %tmp2
-}
-
-define <16 x i8> @test_vrev16Q8(<16 x i8>* %A) nounwind {
-;CHECK-LABEL: test_vrev16Q8:
-;CHECK: rev16.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
-	ret <16 x i8> %tmp2
-}
-
-; Undef shuffle indices should not prevent matching to VREV:
-
-define <8 x i8> @test_vrev64D8_undef(<8 x i8>* %A) nounwind {
-;CHECK-LABEL: test_vrev64D8_undef:
-;CHECK: rev64.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 undef, i32 undef, i32 4, i32 3, i32 2, i32 1, i32 0>
-	ret <8 x i8> %tmp2
-}
-
-define <8 x i16> @test_vrev32Q16_undef(<8 x i16>* %A) nounwind {
-;CHECK-LABEL: test_vrev32Q16_undef:
-;CHECK: rev32.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 undef, i32 2, i32 5, i32 4, i32 7, i32 undef>
-	ret <8 x i16> %tmp2
-}
-
-; vrev <4 x i16> should use REV32 and not REV64
-define void @test_vrev64(<4 x i16>* nocapture %source, <2 x i16>* nocapture %dst) nounwind ssp {
-; CHECK-LABEL: test_vrev64:
-; CHECK: ldr [[DEST:q[0-9]+]],
-; CHECK: st1.h
-; CHECK: st1.h
-entry:
-  %0 = bitcast <4 x i16>* %source to <8 x i16>*
-  %tmp2 = load <8 x i16>* %0, align 4
-  %tmp3 = extractelement <8 x i16> %tmp2, i32 6
-  %tmp5 = insertelement <2 x i16> undef, i16 %tmp3, i32 0
-  %tmp9 = extractelement <8 x i16> %tmp2, i32 5
-  %tmp11 = insertelement <2 x i16> %tmp5, i16 %tmp9, i32 1
-  store <2 x i16> %tmp11, <2 x i16>* %dst, align 4
-  ret void
-}
-
-; Test vrev of float4
-define void @float_vrev64(float* nocapture %source, <4 x float>* nocapture %dest) nounwind noinline ssp {
-; CHECK: float_vrev64
-; CHECK: ldr [[DEST:q[0-9]+]],
-; CHECK: rev64.4s
-entry:
-  %0 = bitcast float* %source to <4 x float>*
-  %tmp2 = load <4 x float>* %0, align 4
-  %tmp5 = shufflevector <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <4 x float> %tmp2, <4 x i32> <i32 0, i32 7, i32 0, i32 0>
-  %arrayidx8 = getelementptr inbounds <4 x float>* %dest, i32 11
-  store <4 x float> %tmp5, <4 x float>* %arrayidx8, align 4
-  ret void
-}
-
diff --git a/test/CodeGen/ARM64/rounding.ll b/test/CodeGen/ARM64/rounding.ll
deleted file mode 100644
index 7ff65c3..0000000
--- a/test/CodeGen/ARM64/rounding.ll
+++ /dev/null
@@ -1,208 +0,0 @@
-; RUN: llc -O3 < %s | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64"
-target triple = "arm64-apple-ios6.0.0"
-
-; CHECK: test1
-; CHECK: frintx
-; CHECK: frintm
-define float @test1(float %a) #0 {
-entry:
-  %call = tail call float @floorf(float %a) nounwind readnone
-  ret float %call
-}
-
-declare float @floorf(float) nounwind readnone
-
-; CHECK: test2
-; CHECK: frintx
-; CHECK: frintm
-define double @test2(double %a) #0 {
-entry:
-  %call = tail call double @floor(double %a) nounwind readnone
-  ret double %call
-}
-
-declare double @floor(double) nounwind readnone
-
-; CHECK: test3
-; CHECK: frinti
-define float @test3(float %a) #0 {
-entry:
-  %call = tail call float @nearbyintf(float %a) nounwind readnone
-  ret float %call
-}
-
-declare float @nearbyintf(float) nounwind readnone
-
-; CHECK: test4
-; CHECK: frinti
-define double @test4(double %a) #0 {
-entry:
-  %call = tail call double @nearbyint(double %a) nounwind readnone
-  ret double %call
-}
-
-declare double @nearbyint(double) nounwind readnone
-
-; CHECK: test5
-; CHECK: frintx
-; CHECK: frintp
-define float @test5(float %a) #0 {
-entry:
-  %call = tail call float @ceilf(float %a) nounwind readnone
-  ret float %call
-}
-
-declare float @ceilf(float) nounwind readnone
-
-; CHECK: test6
-; CHECK: frintx
-; CHECK: frintp
-define double @test6(double %a) #0 {
-entry:
-  %call = tail call double @ceil(double %a) nounwind readnone
-  ret double %call
-}
-
-declare double @ceil(double) nounwind readnone
-
-; CHECK: test7
-; CHECK: frintx
-define float @test7(float %a) #0 {
-entry:
-  %call = tail call float @rintf(float %a) nounwind readnone
-  ret float %call
-}
-
-declare float @rintf(float) nounwind readnone
-
-; CHECK: test8
-; CHECK: frintx
-define double @test8(double %a) #0 {
-entry:
-  %call = tail call double @rint(double %a) nounwind readnone
-  ret double %call
-}
-
-declare double @rint(double) nounwind readnone
-
-; CHECK: test9
-; CHECK: frintx
-; CHECK: frintz
-define float @test9(float %a) #0 {
-entry:
-  %call = tail call float @truncf(float %a) nounwind readnone
-  ret float %call
-}
-
-declare float @truncf(float) nounwind readnone
-
-; CHECK: test10
-; CHECK: frintx
-; CHECK: frintz
-define double @test10(double %a) #0 {
-entry:
-  %call = tail call double @trunc(double %a) nounwind readnone
-  ret double %call
-}
-
-declare double @trunc(double) nounwind readnone
-
-; CHECK: test11
-; CHECK: frintx
-; CHECK: frinta
-define float @test11(float %a) #0 {
-entry:
-  %call = tail call float @roundf(float %a) nounwind readnone
-  ret float %call
-}
-
-declare float @roundf(float %a) nounwind readnone
-
-; CHECK: test12
-; CHECK: frintx
-; CHECK: frinta
-define double @test12(double %a) #0 {
-entry:
-  %call = tail call double @round(double %a) nounwind readnone
-  ret double %call
-}
-
-declare double @round(double %a) nounwind readnone
-
-; CHECK: test13
-; CHECK-NOT: frintx
-; CHECK: frintm
-define float @test13(float %a) #1 {
-entry:
-  %call = tail call float @floorf(float %a) nounwind readnone
-  ret float %call
-}
-
-; CHECK: test14
-; CHECK-NOT: frintx
-; CHECK: frintm
-define double @test14(double %a) #1 {
-entry:
-  %call = tail call double @floor(double %a) nounwind readnone
-  ret double %call
-}
-
-; CHECK: test15
-; CHECK-NOT: frintx
-; CHECK: frintp
-define float @test15(float %a) #1 {
-entry:
-  %call = tail call float @ceilf(float %a) nounwind readnone
-  ret float %call
-}
-
-; CHECK: test16
-; CHECK-NOT: frintx
-; CHECK: frintp
-define double @test16(double %a) #1 {
-entry:
-  %call = tail call double @ceil(double %a) nounwind readnone
-  ret double %call
-}
-
-; CHECK: test17
-; CHECK-NOT: frintx
-; CHECK: frintz
-define float @test17(float %a) #1 {
-entry:
-  %call = tail call float @truncf(float %a) nounwind readnone
-  ret float %call
-}
-
-; CHECK: test18
-; CHECK-NOT: frintx
-; CHECK: frintz
-define double @test18(double %a) #1 {
-entry:
-  %call = tail call double @trunc(double %a) nounwind readnone
-  ret double %call
-}
-
-; CHECK: test19
-; CHECK-NOT: frintx
-; CHECK: frinta
-define float @test19(float %a) #1 {
-entry:
-  %call = tail call float @roundf(float %a) nounwind readnone
-  ret float %call
-}
-
-; CHECK: test20
-; CHECK-NOT: frintx
-; CHECK: frinta
-define double @test20(double %a) #1 {
-entry:
-  %call = tail call double @round(double %a) nounwind readnone
-  ret double %call
-}
-
-
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind "unsafe-fp-math"="true" }
diff --git a/test/CodeGen/ARM64/scvt.ll b/test/CodeGen/ARM64/scvt.ll
deleted file mode 100644
index b4d4add..0000000
--- a/test/CodeGen/ARM64/scvt.ll
+++ /dev/null
@@ -1,830 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-; rdar://13082402
-
-define float @t1(i32* nocapture %src) nounwind ssp {
-entry:
-; CHECK-LABEL: t1:
-; CHECK: ldr s0, [x0]
-; CHECK: scvtf s0, s0
-  %tmp1 = load i32* %src, align 4
-  %tmp2 = sitofp i32 %tmp1 to float
-  ret float %tmp2
-}
-
-define float @t2(i32* nocapture %src) nounwind ssp {
-entry:
-; CHECK-LABEL: t2:
-; CHECK: ldr s0, [x0]
-; CHECK: ucvtf s0, s0
-  %tmp1 = load i32* %src, align 4
-  %tmp2 = uitofp i32 %tmp1 to float
-  ret float %tmp2
-}
-
-define double @t3(i64* nocapture %src) nounwind ssp {
-entry:
-; CHECK-LABEL: t3:
-; CHECK: ldr d0, [x0]
-; CHECK: scvtf d0, d0
-  %tmp1 = load i64* %src, align 4
-  %tmp2 = sitofp i64 %tmp1 to double
-  ret double %tmp2
-}
-
-define double @t4(i64* nocapture %src) nounwind ssp {
-entry:
-; CHECK-LABEL: t4:
-; CHECK: ldr d0, [x0]
-; CHECK: ucvtf d0, d0
-  %tmp1 = load i64* %src, align 4
-  %tmp2 = uitofp i64 %tmp1 to double
-  ret double %tmp2
-}
-
-; rdar://13136456
-define double @t5(i32* nocapture %src) nounwind ssp optsize {
-entry:
-; CHECK-LABEL: t5:
-; CHECK: ldr [[REG:w[0-9]+]], [x0]
-; CHECK: scvtf d0, [[REG]]
-  %tmp1 = load i32* %src, align 4
-  %tmp2 = sitofp i32 %tmp1 to double
-  ret double %tmp2
-}
-
-; Check that we load in FP register when we want to convert into
-; floating point value.
-; This is much faster than loading on GPR and making the conversion
-; GPR -> FPR.
-; <rdar://problem/14599607>
-;
-; Check the flollowing patterns for signed/unsigned:
-; 1. load with scaled imm to float.
-; 2. load with scaled register to float.
-; 3. load with scaled imm to double.
-; 4. load with scaled register to double.
-; 5. load with unscaled imm to float.
-; 6. load with unscaled imm to double.
-; With loading size: 8, 16, 32, and 64-bits.
-
-; ********* 1. load with scaled imm to float. *********
-define float @fct1(i8* nocapture %sp0) {
-; CHECK-LABEL: fct1:
-; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
-; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
-; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
-entry:
-  %addr = getelementptr i8* %sp0, i64 1
-  %pix_sp0.0.copyload = load i8* %addr, align 1
-  %val = uitofp i8 %pix_sp0.0.copyload to float
-  %vmull.i = fmul float %val, %val
-  ret float %vmull.i
-}
-
-define float @fct2(i16* nocapture %sp0) {
-; CHECK-LABEL: fct2:
-; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
-; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
-; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
-entry:
-  %addr = getelementptr i16* %sp0, i64 1
-  %pix_sp0.0.copyload = load i16* %addr, align 1
-  %val = uitofp i16 %pix_sp0.0.copyload to float
-  %vmull.i = fmul float %val, %val
-  ret float %vmull.i
-}
-
-define float @fct3(i32* nocapture %sp0) {
-; CHECK-LABEL: fct3:
-; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
-; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
-; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
-entry:
-  %addr = getelementptr i32* %sp0, i64 1
-  %pix_sp0.0.copyload = load i32* %addr, align 1
-  %val = uitofp i32 %pix_sp0.0.copyload to float
-  %vmull.i = fmul float %val, %val
-  ret float %vmull.i
-}
-
-; i64 -> f32 is not supported on floating point unit.
-define float @fct4(i64* nocapture %sp0) {
-; CHECK-LABEL: fct4:
-; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, #8]
-; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], x[[REGNUM]]
-; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
-entry:
-  %addr = getelementptr i64* %sp0, i64 1
-  %pix_sp0.0.copyload = load i64* %addr, align 1
-  %val = uitofp i64 %pix_sp0.0.copyload to float
-  %vmull.i = fmul float %val, %val
-  ret float %vmull.i
-}
-
-; ********* 2. load with scaled register to float. *********
-define float @fct5(i8* nocapture %sp0, i64 %offset) {
-; CHECK-LABEL: fct5:
-; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
-; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
-; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
-entry:
-  %addr = getelementptr i8* %sp0, i64 %offset
-  %pix_sp0.0.copyload = load i8* %addr, align 1
-  %val = uitofp i8 %pix_sp0.0.copyload to float
-  %vmull.i = fmul float %val, %val
-  ret float %vmull.i
-}
-
-define float @fct6(i16* nocapture %sp0, i64 %offset) {
-; CHECK-LABEL: fct6:
-; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
-; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
-; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
-entry:
-  %addr = getelementptr i16* %sp0, i64 %offset
-  %pix_sp0.0.copyload = load i16* %addr, align 1
-  %val = uitofp i16 %pix_sp0.0.copyload to float
-  %vmull.i = fmul float %val, %val
-  ret float %vmull.i
-}
-
-define float @fct7(i32* nocapture %sp0, i64 %offset) {
-; CHECK-LABEL: fct7:
-; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
-; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
-; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
-entry:
-  %addr = getelementptr i32* %sp0, i64 %offset
-  %pix_sp0.0.copyload = load i32* %addr, align 1
-  %val = uitofp i32 %pix_sp0.0.copyload to float
-  %vmull.i = fmul float %val, %val
-  ret float %vmull.i
-}
-
-; i64 -> f32 is not supported on floating point unit.
-define float @fct8(i64* nocapture %sp0, i64 %offset) {
-; CHECK-LABEL: fct8:
-; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, x1, lsl #3]
-; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], x[[REGNUM]]
-; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
-entry:
-  %addr = getelementptr i64* %sp0, i64 %offset
-  %pix_sp0.0.copyload = load i64* %addr, align 1
-  %val = uitofp i64 %pix_sp0.0.copyload to float
-  %vmull.i = fmul float %val, %val
-  ret float %vmull.i
-}
-
-
-; ********* 3. load with scaled imm to double. *********
-define double @fct9(i8* nocapture %sp0) {
-; CHECK-LABEL: fct9:
-; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
-; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
-; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
-entry:
-  %addr = getelementptr i8* %sp0, i64 1
-  %pix_sp0.0.copyload = load i8* %addr, align 1
-  %val = uitofp i8 %pix_sp0.0.copyload to double
-  %vmull.i = fmul double %val, %val
-  ret double %vmull.i
-}
-
-define double @fct10(i16* nocapture %sp0) {
-; CHECK-LABEL: fct10:
-; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
-; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
-; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
-entry:
-  %addr = getelementptr i16* %sp0, i64 1
-  %pix_sp0.0.copyload = load i16* %addr, align 1
-  %val = uitofp i16 %pix_sp0.0.copyload to double
-  %vmull.i = fmul double %val, %val
-  ret double %vmull.i
-}
-
-define double @fct11(i32* nocapture %sp0) {
-; CHECK-LABEL: fct11:
-; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
-; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
-; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
-entry:
-  %addr = getelementptr i32* %sp0, i64 1
-  %pix_sp0.0.copyload = load i32* %addr, align 1
-  %val = uitofp i32 %pix_sp0.0.copyload to double
-  %vmull.i = fmul double %val, %val
-  ret double %vmull.i
-}
-
-define double @fct12(i64* nocapture %sp0) {
-; CHECK-LABEL: fct12:
-; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, #8]
-; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
-; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
-entry:
-  %addr = getelementptr i64* %sp0, i64 1
-  %pix_sp0.0.copyload = load i64* %addr, align 1
-  %val = uitofp i64 %pix_sp0.0.copyload to double
-  %vmull.i = fmul double %val, %val
-  ret double %vmull.i
-}
-
-; ********* 4. load with scaled register to double. *********
-define double @fct13(i8* nocapture %sp0, i64 %offset) {
-; CHECK-LABEL: fct13:
-; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
-; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
-; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
-entry:
-  %addr = getelementptr i8* %sp0, i64 %offset
-  %pix_sp0.0.copyload = load i8* %addr, align 1
-  %val = uitofp i8 %pix_sp0.0.copyload to double
-  %vmull.i = fmul double %val, %val
-  ret double %vmull.i
-}
-
-define double @fct14(i16* nocapture %sp0, i64 %offset) {
-; CHECK-LABEL: fct14:
-; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
-; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
-; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
-entry:
-  %addr = getelementptr i16* %sp0, i64 %offset
-  %pix_sp0.0.copyload = load i16* %addr, align 1
-  %val = uitofp i16 %pix_sp0.0.copyload to double
-  %vmull.i = fmul double %val, %val
-  ret double %vmull.i
-}
-
-define double @fct15(i32* nocapture %sp0, i64 %offset) {
-; CHECK-LABEL: fct15:
-; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
-; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
-; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
-entry:
-  %addr = getelementptr i32* %sp0, i64 %offset
-  %pix_sp0.0.copyload = load i32* %addr, align 1
-  %val = uitofp i32 %pix_sp0.0.copyload to double
-  %vmull.i = fmul double %val, %val
-  ret double %vmull.i
-}
-
-define double @fct16(i64* nocapture %sp0, i64 %offset) {
-; CHECK-LABEL: fct16:
-; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, x1, lsl #3]
-; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
-; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
-entry:
-  %addr = getelementptr i64* %sp0, i64 %offset
-  %pix_sp0.0.copyload = load i64* %addr, align 1
-  %val = uitofp i64 %pix_sp0.0.copyload to double
-  %vmull.i = fmul double %val, %val
-  ret double %vmull.i
-}
-
-; ********* 5. load with unscaled imm to float. *********
-define float @fct17(i8* nocapture %sp0) {
-entry:
-; CHECK-LABEL: fct17:
-; CHECK: ldur b[[REGNUM:[0-9]+]], [x0, #-1]
-; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
-; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
-  %bitcast = ptrtoint i8* %sp0 to i64
-  %add = add i64 %bitcast, -1
-  %addr = inttoptr i64 %add to i8*
-  %pix_sp0.0.copyload = load i8* %addr, align 1
-  %val = uitofp i8 %pix_sp0.0.copyload to float
-  %vmull.i = fmul float %val, %val
-  ret float %vmull.i
-}
-
-define float @fct18(i16* nocapture %sp0) {
-; CHECK-LABEL: fct18:
-; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1]
-; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
-; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
-  %bitcast = ptrtoint i16* %sp0 to i64
-  %add = add i64 %bitcast, 1
-  %addr = inttoptr i64 %add to i16*
-  %pix_sp0.0.copyload = load i16* %addr, align 1
-  %val = uitofp i16 %pix_sp0.0.copyload to float
-  %vmull.i = fmul float %val, %val
-  ret float %vmull.i
-}
-
-define float @fct19(i32* nocapture %sp0) {
-; CHECK-LABEL: fct19:
-; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1]
-; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
-; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
-  %bitcast = ptrtoint i32* %sp0 to i64
-  %add = add i64 %bitcast, 1
-  %addr = inttoptr i64 %add to i32*
-  %pix_sp0.0.copyload = load i32* %addr, align 1
-  %val = uitofp i32 %pix_sp0.0.copyload to float
-  %vmull.i = fmul float %val, %val
-  ret float %vmull.i
-}
-
-; i64 -> f32 is not supported on floating point unit.
-define float @fct20(i64* nocapture %sp0) {
-; CHECK-LABEL: fct20:
-; CHECK: ldur x[[REGNUM:[0-9]+]], [x0, #1]
-; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], x[[REGNUM]]
-; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
-  %bitcast = ptrtoint i64* %sp0 to i64
-  %add = add i64 %bitcast, 1
-  %addr = inttoptr i64 %add to i64*
-  %pix_sp0.0.copyload = load i64* %addr, align 1
-  %val = uitofp i64 %pix_sp0.0.copyload to float
-  %vmull.i = fmul float %val, %val
-  ret float %vmull.i
-
-}
-
-; ********* 6. load with unscaled imm to double. *********
-define double @fct21(i8* nocapture %sp0) {
-entry:
-; CHECK-LABEL: fct21:
-; CHECK: ldur b[[REGNUM:[0-9]+]], [x0, #-1]
-; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
-; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
-  %bitcast = ptrtoint i8* %sp0 to i64
-  %add = add i64 %bitcast, -1
-  %addr = inttoptr i64 %add to i8*
-  %pix_sp0.0.copyload = load i8* %addr, align 1
-  %val = uitofp i8 %pix_sp0.0.copyload to double
-  %vmull.i = fmul double %val, %val
-  ret double %vmull.i
-}
-
-define double @fct22(i16* nocapture %sp0) {
-; CHECK-LABEL: fct22:
-; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1]
-; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
-; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
-  %bitcast = ptrtoint i16* %sp0 to i64
-  %add = add i64 %bitcast, 1
-  %addr = inttoptr i64 %add to i16*
-  %pix_sp0.0.copyload = load i16* %addr, align 1
-  %val = uitofp i16 %pix_sp0.0.copyload to double
-  %vmull.i = fmul double %val, %val
-  ret double %vmull.i
-}
-
-define double @fct23(i32* nocapture %sp0) {
-; CHECK-LABEL: fct23:
-; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1]
-; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
-; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
-  %bitcast = ptrtoint i32* %sp0 to i64
-  %add = add i64 %bitcast, 1
-  %addr = inttoptr i64 %add to i32*
-  %pix_sp0.0.copyload = load i32* %addr, align 1
-  %val = uitofp i32 %pix_sp0.0.copyload to double
-  %vmull.i = fmul double %val, %val
-  ret double %vmull.i
-}
-
-define double @fct24(i64* nocapture %sp0) {
-; CHECK-LABEL: fct24:
-; CHECK: ldur d[[REGNUM:[0-9]+]], [x0, #1]
-; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
-; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
-  %bitcast = ptrtoint i64* %sp0 to i64
-  %add = add i64 %bitcast, 1
-  %addr = inttoptr i64 %add to i64*
-  %pix_sp0.0.copyload = load i64* %addr, align 1
-  %val = uitofp i64 %pix_sp0.0.copyload to double
-  %vmull.i = fmul double %val, %val
-  ret double %vmull.i
-
-}
-
-; ********* 1s. load with scaled imm to float. *********
-define float @sfct1(i8* nocapture %sp0) {
-; CHECK-LABEL: sfct1:
-; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
-; CHECK-NEXT: sshll.8h [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
-; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
-; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
-; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
-entry:
-  %addr = getelementptr i8* %sp0, i64 1
-  %pix_sp0.0.copyload = load i8* %addr, align 1
-  %val = sitofp i8 %pix_sp0.0.copyload to float
-  %vmull.i = fmul float %val, %val
-  ret float %vmull.i
-}
-
-define float @sfct2(i16* nocapture %sp0) {
-; CHECK-LABEL: sfct2:
-; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
-; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
-; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
-; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
-entry:
-  %addr = getelementptr i16* %sp0, i64 1
-  %pix_sp0.0.copyload = load i16* %addr, align 1
-  %val = sitofp i16 %pix_sp0.0.copyload to float
-  %vmull.i = fmul float %val, %val
-  ret float %vmull.i
-}
-
-define float @sfct3(i32* nocapture %sp0) {
-; CHECK-LABEL: sfct3:
-; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
-; CHECK-NEXT: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
-; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
-entry:
-  %addr = getelementptr i32* %sp0, i64 1
-  %pix_sp0.0.copyload = load i32* %addr, align 1
-  %val = sitofp i32 %pix_sp0.0.copyload to float
-  %vmull.i = fmul float %val, %val
-  ret float %vmull.i
-}
-
-; i64 -> f32 is not supported on floating point unit.
-define float @sfct4(i64* nocapture %sp0) {
-; CHECK-LABEL: sfct4:
-; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, #8]
-; CHECK-NEXT: scvtf [[REG:s[0-9]+]], x[[REGNUM]]
-; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
-entry:
-  %addr = getelementptr i64* %sp0, i64 1
-  %pix_sp0.0.copyload = load i64* %addr, align 1
-  %val = sitofp i64 %pix_sp0.0.copyload to float
-  %vmull.i = fmul float %val, %val
-  ret float %vmull.i
-}
-
-; ********* 2s. load with scaled register to float. *********
-define float @sfct5(i8* nocapture %sp0, i64 %offset) {
-; CHECK-LABEL: sfct5:
-; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
-; CHECK-NEXT: sshll.8h [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
-; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
-; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
-; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
-entry:
-  %addr = getelementptr i8* %sp0, i64 %offset
-  %pix_sp0.0.copyload = load i8* %addr, align 1
-  %val = sitofp i8 %pix_sp0.0.copyload to float
-  %vmull.i = fmul float %val, %val
-  ret float %vmull.i
-}
-
-define float @sfct6(i16* nocapture %sp0, i64 %offset) {
-; CHECK-LABEL: sfct6:
-; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
-; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
-; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
-; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
-entry:
-  %addr = getelementptr i16* %sp0, i64 %offset
-  %pix_sp0.0.copyload = load i16* %addr, align 1
-  %val = sitofp i16 %pix_sp0.0.copyload to float
-  %vmull.i = fmul float %val, %val
-  ret float %vmull.i
-}
-
-define float @sfct7(i32* nocapture %sp0, i64 %offset) {
-; CHECK-LABEL: sfct7:
-; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
-; CHECK-NEXT: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
-; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
-entry:
-  %addr = getelementptr i32* %sp0, i64 %offset
-  %pix_sp0.0.copyload = load i32* %addr, align 1
-  %val = sitofp i32 %pix_sp0.0.copyload to float
-  %vmull.i = fmul float %val, %val
-  ret float %vmull.i
-}
-
-; i64 -> f32 is not supported on floating point unit.
-define float @sfct8(i64* nocapture %sp0, i64 %offset) {
-; CHECK-LABEL: sfct8:
-; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, x1, lsl #3]
-; CHECK-NEXT: scvtf [[REG:s[0-9]+]], x[[REGNUM]]
-; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
-entry:
-  %addr = getelementptr i64* %sp0, i64 %offset
-  %pix_sp0.0.copyload = load i64* %addr, align 1
-  %val = sitofp i64 %pix_sp0.0.copyload to float
-  %vmull.i = fmul float %val, %val
-  ret float %vmull.i
-}
-
-; ********* 3s. load with scaled imm to double. *********
-define double @sfct9(i8* nocapture %sp0) {
-; CHECK-LABEL: sfct9:
-; CHECK: ldrsb w[[REGNUM:[0-9]+]], [x0, #1]
-; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
-; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
-entry:
-  %addr = getelementptr i8* %sp0, i64 1
-  %pix_sp0.0.copyload = load i8* %addr, align 1
-  %val = sitofp i8 %pix_sp0.0.copyload to double
-  %vmull.i = fmul double %val, %val
-  ret double %vmull.i
-}
-
-define double @sfct10(i16* nocapture %sp0) {
-; CHECK-LABEL: sfct10:
-; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
-; CHECK-NEXT: sshll.4s [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
-; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
-; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
-; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
-entry:
-  %addr = getelementptr i16* %sp0, i64 1
-  %pix_sp0.0.copyload = load i16* %addr, align 1
-  %val = sitofp i16 %pix_sp0.0.copyload to double
-  %vmull.i = fmul double %val, %val
-  ret double %vmull.i
-}
-
-define double @sfct11(i32* nocapture %sp0) {
-; CHECK-LABEL: sfct11:
-; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
-; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
-; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
-; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
-entry:
-  %addr = getelementptr i32* %sp0, i64 1
-  %pix_sp0.0.copyload = load i32* %addr, align 1
-  %val = sitofp i32 %pix_sp0.0.copyload to double
-  %vmull.i = fmul double %val, %val
-  ret double %vmull.i
-}
-
-define double @sfct12(i64* nocapture %sp0) {
-; CHECK-LABEL: sfct12:
-; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, #8]
-; CHECK-NEXT: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
-; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
-entry:
-  %addr = getelementptr i64* %sp0, i64 1
-  %pix_sp0.0.copyload = load i64* %addr, align 1
-  %val = sitofp i64 %pix_sp0.0.copyload to double
-  %vmull.i = fmul double %val, %val
-  ret double %vmull.i
-}
-
-; ********* 4s. load with scaled register to double. *********
-define double @sfct13(i8* nocapture %sp0, i64 %offset) {
-; CHECK-LABEL: sfct13:
-; CHECK: ldrsb w[[REGNUM:[0-9]+]], [x0, x1]
-; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
-; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
-entry:
-  %addr = getelementptr i8* %sp0, i64 %offset
-  %pix_sp0.0.copyload = load i8* %addr, align 1
-  %val = sitofp i8 %pix_sp0.0.copyload to double
-  %vmull.i = fmul double %val, %val
-  ret double %vmull.i
-}
-
-define double @sfct14(i16* nocapture %sp0, i64 %offset) {
-; CHECK-LABEL: sfct14:
-; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
-; CHECK-NEXT: sshll.4s [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
-; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
-; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
-; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
-entry:
-  %addr = getelementptr i16* %sp0, i64 %offset
-  %pix_sp0.0.copyload = load i16* %addr, align 1
-  %val = sitofp i16 %pix_sp0.0.copyload to double
-  %vmull.i = fmul double %val, %val
-  ret double %vmull.i
-}
-
-define double @sfct15(i32* nocapture %sp0, i64 %offset) {
-; CHECK-LABEL: sfct15:
-; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
-; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
-; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
-; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
-entry:
-  %addr = getelementptr i32* %sp0, i64 %offset
-  %pix_sp0.0.copyload = load i32* %addr, align 1
-  %val = sitofp i32 %pix_sp0.0.copyload to double
-  %vmull.i = fmul double %val, %val
-  ret double %vmull.i
-}
-
-define double @sfct16(i64* nocapture %sp0, i64 %offset) {
-; CHECK-LABEL: sfct16:
-; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, x1, lsl #3]
-; CHECK-NEXT: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
-; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
-entry:
-  %addr = getelementptr i64* %sp0, i64 %offset
-  %pix_sp0.0.copyload = load i64* %addr, align 1
-  %val = sitofp i64 %pix_sp0.0.copyload to double
-  %vmull.i = fmul double %val, %val
-  ret double %vmull.i
-}
-
-; ********* 5s. load with unscaled imm to float. *********
-define float @sfct17(i8* nocapture %sp0) {
-entry:
-; CHECK-LABEL: sfct17:
-; CHECK: ldur b[[REGNUM:[0-9]+]], [x0, #-1]
-; CHECK-NEXT: sshll.8h [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
-; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
-; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
-; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
-  %bitcast = ptrtoint i8* %sp0 to i64
-  %add = add i64 %bitcast, -1
-  %addr = inttoptr i64 %add to i8*
-  %pix_sp0.0.copyload = load i8* %addr, align 1
-  %val = sitofp i8 %pix_sp0.0.copyload to float
-  %vmull.i = fmul float %val, %val
-  ret float %vmull.i
-}
-
-define float @sfct18(i16* nocapture %sp0) {
-; CHECK-LABEL: sfct18:
-; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1]
-; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
-; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
-; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
-  %bitcast = ptrtoint i16* %sp0 to i64
-  %add = add i64 %bitcast, 1
-  %addr = inttoptr i64 %add to i16*
-  %pix_sp0.0.copyload = load i16* %addr, align 1
-  %val = sitofp i16 %pix_sp0.0.copyload to float
-  %vmull.i = fmul float %val, %val
-  ret float %vmull.i
-}
-
-define float @sfct19(i32* nocapture %sp0) {
-; CHECK-LABEL: sfct19:
-; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1]
-; CHECK-NEXT: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
-; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
-  %bitcast = ptrtoint i32* %sp0 to i64
-  %add = add i64 %bitcast, 1
-  %addr = inttoptr i64 %add to i32*
-  %pix_sp0.0.copyload = load i32* %addr, align 1
-  %val = sitofp i32 %pix_sp0.0.copyload to float
-  %vmull.i = fmul float %val, %val
-  ret float %vmull.i
-}
-
-; i64 -> f32 is not supported on floating point unit.
-define float @sfct20(i64* nocapture %sp0) {
-; CHECK-LABEL: sfct20:
-; CHECK: ldur x[[REGNUM:[0-9]+]], [x0, #1]
-; CHECK-NEXT: scvtf [[REG:s[0-9]+]], x[[REGNUM]]
-; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
-  %bitcast = ptrtoint i64* %sp0 to i64
-  %add = add i64 %bitcast, 1
-  %addr = inttoptr i64 %add to i64*
-  %pix_sp0.0.copyload = load i64* %addr, align 1
-  %val = sitofp i64 %pix_sp0.0.copyload to float
-  %vmull.i = fmul float %val, %val
-  ret float %vmull.i
-
-}
-
-; ********* 6s. load with unscaled imm to double. *********
-define double @sfct21(i8* nocapture %sp0) {
-entry:
-; CHECK-LABEL: sfct21:
-; CHECK: ldursb w[[REGNUM:[0-9]+]], [x0, #-1]
-; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
-; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
-  %bitcast = ptrtoint i8* %sp0 to i64
-  %add = add i64 %bitcast, -1
-  %addr = inttoptr i64 %add to i8*
-  %pix_sp0.0.copyload = load i8* %addr, align 1
-  %val = sitofp i8 %pix_sp0.0.copyload to double
-  %vmull.i = fmul double %val, %val
-  ret double %vmull.i
-}
-
-define double @sfct22(i16* nocapture %sp0) {
-; CHECK-LABEL: sfct22:
-; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1]
-; CHECK-NEXT: sshll.4s [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
-; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
-; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
-; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
-  %bitcast = ptrtoint i16* %sp0 to i64
-  %add = add i64 %bitcast, 1
-  %addr = inttoptr i64 %add to i16*
-  %pix_sp0.0.copyload = load i16* %addr, align 1
-  %val = sitofp i16 %pix_sp0.0.copyload to double
-  %vmull.i = fmul double %val, %val
-  ret double %vmull.i
-}
-
-define double @sfct23(i32* nocapture %sp0) {
-; CHECK-LABEL: sfct23:
-; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1]
-; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
-; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
-; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
-  %bitcast = ptrtoint i32* %sp0 to i64
-  %add = add i64 %bitcast, 1
-  %addr = inttoptr i64 %add to i32*
-  %pix_sp0.0.copyload = load i32* %addr, align 1
-  %val = sitofp i32 %pix_sp0.0.copyload to double
-  %vmull.i = fmul double %val, %val
-  ret double %vmull.i
-}
-
-define double @sfct24(i64* nocapture %sp0) {
-; CHECK-LABEL: sfct24:
-; CHECK: ldur d[[REGNUM:[0-9]+]], [x0, #1]
-; CHECK-NEXT: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
-; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
-  %bitcast = ptrtoint i64* %sp0 to i64
-  %add = add i64 %bitcast, 1
-  %addr = inttoptr i64 %add to i64*
-  %pix_sp0.0.copyload = load i64* %addr, align 1
-  %val = sitofp i64 %pix_sp0.0.copyload to double
-  %vmull.i = fmul double %val, %val
-  ret double %vmull.i
-
-}
-
-; Check that we do not use SSHLL code sequence when code size is a concern.
-define float @codesize_sfct17(i8* nocapture %sp0) optsize {
-entry:
-; CHECK-LABEL: codesize_sfct17:
-; CHECK: ldursb w[[REGNUM:[0-9]+]], [x0, #-1]
-; CHECK-NEXT: scvtf [[REG:s[0-9]+]], w[[REGNUM]]
-; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
-  %bitcast = ptrtoint i8* %sp0 to i64
-  %add = add i64 %bitcast, -1
-  %addr = inttoptr i64 %add to i8*
-  %pix_sp0.0.copyload = load i8* %addr, align 1
-  %val = sitofp i8 %pix_sp0.0.copyload to float
-  %vmull.i = fmul float %val, %val
-  ret float %vmull.i
-}
-
-define double @codesize_sfct11(i32* nocapture %sp0) minsize {
-; CHECK-LABEL: sfct11:
-; CHECK: ldr w[[REGNUM:[0-9]+]], [x0, #4]
-; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
-; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
-entry:
-  %addr = getelementptr i32* %sp0, i64 1
-  %pix_sp0.0.copyload = load i32* %addr, align 1
-  %val = sitofp i32 %pix_sp0.0.copyload to double
-  %vmull.i = fmul double %val, %val
-  ret double %vmull.i
-}
-
-; Adding fp128 custom lowering makes these a little fragile since we have to
-; return the correct mix of Legal/Expand from the custom method.
-;
-; rdar://problem/14991489
-
-define float @float_from_i128(i128 %in) {
-; CHECK-LABEL: float_from_i128:
-; CHECK: bl {{_?__floatuntisf}}
-  %conv = uitofp i128 %in to float
-  ret float %conv
-}
-
-define double @double_from_i128(i128 %in) {
-; CHECK-LABEL: double_from_i128:
-; CHECK: bl {{_?__floattidf}}
-  %conv = sitofp i128 %in to double
-  ret double %conv
-}
-
-define fp128 @fp128_from_i128(i128 %in) {
-; CHECK-LABEL: fp128_from_i128:
-; CHECK: bl {{_?__floatuntitf}}
-  %conv = uitofp i128 %in to fp128
-  ret fp128 %conv
-}
-
-define i128 @i128_from_float(float %in) {
-; CHECK-LABEL: i128_from_float
-; CHECK: bl {{_?__fixsfti}}
-  %conv = fptosi float %in to i128
-  ret i128 %conv
-}
-
-define i128 @i128_from_double(double %in) {
-; CHECK-LABEL: i128_from_double
-; CHECK: bl {{_?__fixunsdfti}}
-  %conv = fptoui double %in to i128
-  ret i128 %conv
-}
-
-define i128 @i128_from_fp128(fp128 %in) {
-; CHECK-LABEL: i128_from_fp128
-; CHECK: bl {{_?__fixtfti}}
-  %conv = fptosi fp128 %in to i128
-  ret i128 %conv
-}
-
diff --git a/test/CodeGen/ARM64/shifted-sext.ll b/test/CodeGen/ARM64/shifted-sext.ll
deleted file mode 100644
index e553be5..0000000
--- a/test/CodeGen/ARM64/shifted-sext.ll
+++ /dev/null
@@ -1,277 +0,0 @@
-; RUN: llc -march=arm64 -mtriple=arm64-apple-ios < %s | FileCheck %s
-;
-; <rdar://problem/13820218>
-
-define signext i16 @extendedLeftShiftcharToshortBy4(i8 signext %a) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: extendedLeftShiftcharToshortBy4:
-; CHECK: add [[REG:w[0-9]+]], w0, #1
-; CHECK: sbfm w0, [[REG]], #28, #7
-  %inc = add i8 %a, 1
-  %conv1 = sext i8 %inc to i32
-  %shl = shl nsw i32 %conv1, 4
-  %conv2 = trunc i32 %shl to i16
-  ret i16 %conv2
-}
-
-define signext i16 @extendedRightShiftcharToshortBy4(i8 signext %a) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: extendedRightShiftcharToshortBy4:
-; CHECK: add [[REG:w[0-9]+]], w0, #1
-; CHECK: sbfm w0, [[REG]], #4, #7
-  %inc = add i8 %a, 1
-  %conv1 = sext i8 %inc to i32
-  %shr4 = lshr i32 %conv1, 4
-  %conv2 = trunc i32 %shr4 to i16
-  ret i16 %conv2
-}
-
-define signext i16 @extendedLeftShiftcharToshortBy8(i8 signext %a) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: extendedLeftShiftcharToshortBy8:
-; CHECK: add [[REG:w[0-9]+]], w0, #1
-; CHECK: sbfm w0, [[REG]], #24, #7
-  %inc = add i8 %a, 1
-  %conv1 = sext i8 %inc to i32
-  %shl = shl nsw i32 %conv1, 8
-  %conv2 = trunc i32 %shl to i16
-  ret i16 %conv2
-}
-
-define signext i16 @extendedRightShiftcharToshortBy8(i8 signext %a) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: extendedRightShiftcharToshortBy8:
-; CHECK: add [[REG:w[0-9]+]], w0, #1
-; CHECK: sxtb [[REG]], [[REG]]
-; CHECK: asr w0, [[REG]], #8
-  %inc = add i8 %a, 1
-  %conv1 = sext i8 %inc to i32
-  %shr4 = lshr i32 %conv1, 8
-  %conv2 = trunc i32 %shr4 to i16
-  ret i16 %conv2
-}
-
-define i32 @extendedLeftShiftcharTointBy4(i8 signext %a) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: extendedLeftShiftcharTointBy4:
-; CHECK: add [[REG:w[0-9]+]], w0, #1
-; CHECK: sbfm w0, [[REG]], #28, #7
-  %inc = add i8 %a, 1
-  %conv = sext i8 %inc to i32
-  %shl = shl nsw i32 %conv, 4
-  ret i32 %shl
-}
-
-define i32 @extendedRightShiftcharTointBy4(i8 signext %a) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: extendedRightShiftcharTointBy4:
-; CHECK: add [[REG:w[0-9]+]], w0, #1
-; CHECK: sbfm w0, [[REG]], #4, #7
-  %inc = add i8 %a, 1
-  %conv = sext i8 %inc to i32
-  %shr = ashr i32 %conv, 4
-  ret i32 %shr
-}
-
-define i32 @extendedLeftShiftcharTointBy8(i8 signext %a) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: extendedLeftShiftcharTointBy8:
-; CHECK: add [[REG:w[0-9]+]], w0, #1
-; CHECK: sbfm w0, [[REG]], #24, #7
-  %inc = add i8 %a, 1
-  %conv = sext i8 %inc to i32
-  %shl = shl nsw i32 %conv, 8
-  ret i32 %shl
-}
-
-define i32 @extendedRightShiftcharTointBy8(i8 signext %a) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: extendedRightShiftcharTointBy8:
-; CHECK: add [[REG:w[0-9]+]], w0, #1
-; CHECK: sxtb [[REG]], [[REG]]
-; CHECK: asr w0, [[REG]], #8
-  %inc = add i8 %a, 1
-  %conv = sext i8 %inc to i32
-  %shr = ashr i32 %conv, 8
-  ret i32 %shr
-}
-
-define i64 @extendedLeftShiftcharToint64By4(i8 signext %a) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: extendedLeftShiftcharToint64By4:
-; CHECK: add w[[REG:[0-9]+]], w0, #1
-; CHECK: sbfm x0, x[[REG]], #60, #7
-  %inc = add i8 %a, 1
-  %conv = sext i8 %inc to i64
-  %shl = shl nsw i64 %conv, 4
-  ret i64 %shl
-}
-
-define i64 @extendedRightShiftcharToint64By4(i8 signext %a) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: extendedRightShiftcharToint64By4:
-; CHECK: add w[[REG:[0-9]+]], w0, #1
-; CHECK: sbfm x0, x[[REG]], #4, #7
-  %inc = add i8 %a, 1
-  %conv = sext i8 %inc to i64
-  %shr = ashr i64 %conv, 4
-  ret i64 %shr
-}
-
-define i64 @extendedLeftShiftcharToint64By8(i8 signext %a) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: extendedLeftShiftcharToint64By8:
-; CHECK: add w[[REG:[0-9]+]], w0, #1
-; CHECK: sbfm x0, x[[REG]], #56, #7
-  %inc = add i8 %a, 1
-  %conv = sext i8 %inc to i64
-  %shl = shl nsw i64 %conv, 8
-  ret i64 %shl
-}
-
-define i64 @extendedRightShiftcharToint64By8(i8 signext %a) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: extendedRightShiftcharToint64By8:
-; CHECK: add w[[REG:[0-9]+]], w0, #1
-; CHECK: sxtb x[[REG]], x[[REG]]
-; CHECK: asr x0, x[[REG]], #8
-  %inc = add i8 %a, 1
-  %conv = sext i8 %inc to i64
-  %shr = ashr i64 %conv, 8
-  ret i64 %shr
-}
-
-define i32 @extendedLeftShiftshortTointBy4(i16 signext %a) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: extendedLeftShiftshortTointBy4:
-; CHECK: add [[REG:w[0-9]+]], w0, #1
-; CHECK: sbfm w0, [[REG]], #28, #15
-  %inc = add i16 %a, 1
-  %conv = sext i16 %inc to i32
-  %shl = shl nsw i32 %conv, 4
-  ret i32 %shl
-}
-
-define i32 @extendedRightShiftshortTointBy4(i16 signext %a) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: extendedRightShiftshortTointBy4:
-; CHECK: add [[REG:w[0-9]+]], w0, #1
-; CHECK: sbfm w0, [[REG]], #4, #15
-  %inc = add i16 %a, 1
-  %conv = sext i16 %inc to i32
-  %shr = ashr i32 %conv, 4
-  ret i32 %shr
-}
-
-define i32 @extendedLeftShiftshortTointBy16(i16 signext %a) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: extendedLeftShiftshortTointBy16:
-; CHECK: add [[REG:w[0-9]+]], w0, #1
-; CHECK: lsl w0, [[REG]], #16
-  %inc = add i16 %a, 1
-  %conv2 = zext i16 %inc to i32
-  %shl = shl nuw i32 %conv2, 16
-  ret i32 %shl
-}
-
-define i32 @extendedRightShiftshortTointBy16(i16 signext %a) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: extendedRightShiftshortTointBy16:
-; CHECK: add [[REG:w[0-9]+]], w0, #1
-; CHECK: sxth [[REG]], [[REG]]
-; CHECK: asr w0, [[REG]], #16
-  %inc = add i16 %a, 1
-  %conv = sext i16 %inc to i32
-  %shr = ashr i32 %conv, 16
-  ret i32 %shr
-}
-
-define i64 @extendedLeftShiftshortToint64By4(i16 signext %a) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: extendedLeftShiftshortToint64By4:
-; CHECK: add w[[REG:[0-9]+]], w0, #1
-; CHECK: sbfm x0, x[[REG]], #60, #15
-  %inc = add i16 %a, 1
-  %conv = sext i16 %inc to i64
-  %shl = shl nsw i64 %conv, 4
-  ret i64 %shl
-}
-
-define i64 @extendedRightShiftshortToint64By4(i16 signext %a) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: extendedRightShiftshortToint64By4:
-; CHECK: add w[[REG:[0-9]+]], w0, #1
-; CHECK: sbfm x0, x[[REG]], #4, #15
-  %inc = add i16 %a, 1
-  %conv = sext i16 %inc to i64
-  %shr = ashr i64 %conv, 4
-  ret i64 %shr
-}
-
-define i64 @extendedLeftShiftshortToint64By16(i16 signext %a) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: extendedLeftShiftshortToint64By16:
-; CHECK: add w[[REG:[0-9]+]], w0, #1
-; CHECK: sbfm x0, x[[REG]], #48, #15
-  %inc = add i16 %a, 1
-  %conv = sext i16 %inc to i64
-  %shl = shl nsw i64 %conv, 16
-  ret i64 %shl
-}
-
-define i64 @extendedRightShiftshortToint64By16(i16 signext %a) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: extendedRightShiftshortToint64By16:
-; CHECK: add w[[REG:[0-9]+]], w0, #1
-; CHECK: sxth x[[REG]], x[[REG]]
-; CHECK: asr x0, x[[REG]], #16
-  %inc = add i16 %a, 1
-  %conv = sext i16 %inc to i64
-  %shr = ashr i64 %conv, 16
-  ret i64 %shr
-}
-
-define i64 @extendedLeftShiftintToint64By4(i32 %a) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: extendedLeftShiftintToint64By4:
-; CHECK: add w[[REG:[0-9]+]], w0, #1
-; CHECK: sbfm x0, x[[REG]], #60, #31
-  %inc = add nsw i32 %a, 1
-  %conv = sext i32 %inc to i64
-  %shl = shl nsw i64 %conv, 4
-  ret i64 %shl
-}
-
-define i64 @extendedRightShiftintToint64By4(i32 %a) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: extendedRightShiftintToint64By4:
-; CHECK: add w[[REG:[0-9]+]], w0, #1
-; CHECK: sbfm x0, x[[REG]], #4, #31
-  %inc = add nsw i32 %a, 1
-  %conv = sext i32 %inc to i64
-  %shr = ashr i64 %conv, 4
-  ret i64 %shr
-}
-
-define i64 @extendedLeftShiftintToint64By32(i32 %a) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: extendedLeftShiftintToint64By32:
-; CHECK: add w[[REG:[0-9]+]], w0, #1
-; CHECK: lsl x0, x[[REG]], #32
-  %inc = add nsw i32 %a, 1
-  %conv2 = zext i32 %inc to i64
-  %shl = shl nuw i64 %conv2, 32
-  ret i64 %shl
-}
-
-define i64 @extendedRightShiftintToint64By32(i32 %a) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: extendedRightShiftintToint64By32:
-; CHECK: add w[[REG:[0-9]+]], w0, #1
-; CHECK: sxtw x[[REG]], x[[REG]]
-; CHECK: asr x0, x[[REG]], #32
-  %inc = add nsw i32 %a, 1
-  %conv = sext i32 %inc to i64
-  %shr = ashr i64 %conv, 32
-  ret i64 %shr
-}
diff --git a/test/CodeGen/ARM64/simd-scalar-to-vector.ll b/test/CodeGen/ARM64/simd-scalar-to-vector.ll
deleted file mode 100644
index 6c0b840..0000000
--- a/test/CodeGen/ARM64/simd-scalar-to-vector.ll
+++ /dev/null
@@ -1,22 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -O0 | FileCheck %s --check-prefix=CHECK-FAST
-
-define <16 x i8> @foo(<16 x i8> %a) nounwind optsize readnone ssp {
-; CHECK: uaddlv.16b h0, v0
-; CHECK: rshrn.8b v0, v0, #4
-; CHECK: dup.16b v0, v0[0]
-; CHECK: ret
-
-; CHECK-FAST: uaddlv.16b
-; CHECK-FAST: rshrn.8b
-; CHECK-FAST: dup.16b
-  %tmp = tail call i32 @llvm.arm64.neon.uaddlv.i32.v16i8(<16 x i8> %a) nounwind
-  %tmp1 = trunc i32 %tmp to i16
-  %tmp2 = insertelement <8 x i16> undef, i16 %tmp1, i32 0
-  %tmp3 = tail call <8 x i8> @llvm.arm64.neon.rshrn.v8i8(<8 x i16> %tmp2, i32 4)
-  %tmp4 = shufflevector <8 x i8> %tmp3, <8 x i8> undef, <16 x i32> zeroinitializer
-  ret <16 x i8> %tmp4
-}
-
-declare <8 x i8> @llvm.arm64.neon.rshrn.v8i8(<8 x i16>, i32) nounwind readnone
-declare i32 @llvm.arm64.neon.uaddlv.i32.v16i8(<16 x i8>) nounwind readnone
diff --git a/test/CodeGen/ARM64/sli-sri-opt.ll b/test/CodeGen/ARM64/sli-sri-opt.ll
deleted file mode 100644
index 725dcd5..0000000
--- a/test/CodeGen/ARM64/sli-sri-opt.ll
+++ /dev/null
@@ -1,41 +0,0 @@
-; RUN: llc -arm64-shift-insert-generation=true -march=arm64 -arm64-neon-syntax=apple < %s | FileCheck %s
-
-define void @testLeftGood(<16 x i8> %src1, <16 x i8> %src2, <16 x i8>* %dest) nounwind {
-; CHECK-LABEL: testLeftGood:
-; CHECK: sli.16b v0, v1, #3
-  %and.i = and <16 x i8> %src1, <i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252>
-  %vshl_n = shl <16 x i8> %src2, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-  %result = or <16 x i8> %and.i, %vshl_n
-  store <16 x i8> %result, <16 x i8>* %dest, align 16
-  ret void
-}
-
-define void @testLeftBad(<16 x i8> %src1, <16 x i8> %src2, <16 x i8>* %dest) nounwind {
-; CHECK-LABEL: testLeftBad:
-; CHECK-NOT: sli
-  %and.i = and <16 x i8> %src1, <i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165>
-  %vshl_n = shl <16 x i8> %src2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
-  %result = or <16 x i8> %and.i, %vshl_n
-  store <16 x i8> %result, <16 x i8>* %dest, align 16
-  ret void
-}
-
-define void @testRightGood(<16 x i8> %src1, <16 x i8> %src2, <16 x i8>* %dest) nounwind {
-; CHECK-LABEL: testRightGood:
-; CHECK: sri.16b v0, v1, #3
-  %and.i = and <16 x i8> %src1, <i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252>
-  %vshl_n = lshr <16 x i8> %src2, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-  %result = or <16 x i8> %and.i, %vshl_n
-  store <16 x i8> %result, <16 x i8>* %dest, align 16
-  ret void
-}
-
-define void @testRightBad(<16 x i8> %src1, <16 x i8> %src2, <16 x i8>* %dest) nounwind {
-; CHECK-LABEL: testRightBad:
-; CHECK-NOT: sri
-  %and.i = and <16 x i8> %src1, <i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165>
-  %vshl_n = lshr <16 x i8> %src2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
-  %result = or <16 x i8> %and.i, %vshl_n
-  store <16 x i8> %result, <16 x i8>* %dest, align 16
-  ret void
-}
diff --git a/test/CodeGen/ARM64/smaxv.ll b/test/CodeGen/ARM64/smaxv.ll
deleted file mode 100644
index 4f6e01b..0000000
--- a/test/CodeGen/ARM64/smaxv.ll
+++ /dev/null
@@ -1,74 +0,0 @@
-; RUN: llc -march=arm64 -arm64-neon-syntax=apple < %s | FileCheck %s
-
-define signext i8 @test_vmaxv_s8(<8 x i8> %a1) {
-; CHECK: test_vmaxv_s8
-; CHECK: smaxv.8b b[[REGNUM:[0-9]+]], v0
-; CHECK-NEXT: smov.b w0, v[[REGNUM]][0]
-; CHECK-NEXT: ret
-entry:
-  %vmaxv.i = tail call i32 @llvm.arm64.neon.smaxv.i32.v8i8(<8 x i8> %a1)
-  %0 = trunc i32 %vmaxv.i to i8
-  ret i8 %0
-}
-
-define signext i16 @test_vmaxv_s16(<4 x i16> %a1) {
-; CHECK: test_vmaxv_s16
-; CHECK: smaxv.4h h[[REGNUM:[0-9]+]], v0
-; CHECK-NEXT: smov.h w0, v[[REGNUM]][0]
-; CHECK-NEXT: ret
-entry:
-  %vmaxv.i = tail call i32 @llvm.arm64.neon.smaxv.i32.v4i16(<4 x i16> %a1)
-  %0 = trunc i32 %vmaxv.i to i16
-  ret i16 %0
-}
-
-define i32 @test_vmaxv_s32(<2 x i32> %a1) {
-; CHECK: test_vmaxv_s32
-; 2 x i32 is not supported by the ISA, thus, this is a special case
-; CHECK: smaxp.2s v[[REGNUM:[0-9]+]], v0, v0
-; CHECK-NEXT: fmov w0, s[[REGNUM]]
-; CHECK-NEXT: ret
-entry:
-  %vmaxv.i = tail call i32 @llvm.arm64.neon.smaxv.i32.v2i32(<2 x i32> %a1)
-  ret i32 %vmaxv.i
-}
-
-define signext i8 @test_vmaxvq_s8(<16 x i8> %a1) {
-; CHECK: test_vmaxvq_s8
-; CHECK: smaxv.16b b[[REGNUM:[0-9]+]], v0
-; CHECK-NEXT: smov.b w0, v[[REGNUM]][0]
-; CHECK-NEXT: ret
-entry:
-  %vmaxv.i = tail call i32 @llvm.arm64.neon.smaxv.i32.v16i8(<16 x i8> %a1)
-  %0 = trunc i32 %vmaxv.i to i8
-  ret i8 %0
-}
-
-define signext i16 @test_vmaxvq_s16(<8 x i16> %a1) {
-; CHECK: test_vmaxvq_s16
-; CHECK: smaxv.8h h[[REGNUM:[0-9]+]], v0
-; CHECK-NEXT: smov.h w0, v[[REGNUM]][0]
-; CHECK-NEXT: ret
-entry:
-  %vmaxv.i = tail call i32 @llvm.arm64.neon.smaxv.i32.v8i16(<8 x i16> %a1)
-  %0 = trunc i32 %vmaxv.i to i16
-  ret i16 %0
-}
-
-define i32 @test_vmaxvq_s32(<4 x i32> %a1) {
-; CHECK: test_vmaxvq_s32
-; CHECK: smaxv.4s [[REGNUM:s[0-9]+]], v0
-; CHECK-NEXT: fmov w0, [[REGNUM]]
-; CHECK-NEXT: ret
-entry:
-  %vmaxv.i = tail call i32 @llvm.arm64.neon.smaxv.i32.v4i32(<4 x i32> %a1)
-  ret i32 %vmaxv.i
-}
-
-declare i32 @llvm.arm64.neon.smaxv.i32.v4i32(<4 x i32>)
-declare i32 @llvm.arm64.neon.smaxv.i32.v8i16(<8 x i16>)
-declare i32 @llvm.arm64.neon.smaxv.i32.v16i8(<16 x i8>)
-declare i32 @llvm.arm64.neon.smaxv.i32.v2i32(<2 x i32>)
-declare i32 @llvm.arm64.neon.smaxv.i32.v4i16(<4 x i16>)
-declare i32 @llvm.arm64.neon.smaxv.i32.v8i8(<8 x i8>)
-
diff --git a/test/CodeGen/ARM64/sminv.ll b/test/CodeGen/ARM64/sminv.ll
deleted file mode 100644
index a246868..0000000
--- a/test/CodeGen/ARM64/sminv.ll
+++ /dev/null
@@ -1,74 +0,0 @@
-; RUN: llc -march=arm64 -arm64-neon-syntax=apple < %s | FileCheck %s
-
-define signext i8 @test_vminv_s8(<8 x i8> %a1) {
-; CHECK: test_vminv_s8
-; CHECK: sminv.8b b[[REGNUM:[0-9]+]], v0
-; CHECK-NEXT: smov.b w0, v[[REGNUM]][0]
-; CHECK-NEXT: ret
-entry:
-  %vminv.i = tail call i32 @llvm.arm64.neon.sminv.i32.v8i8(<8 x i8> %a1)
-  %0 = trunc i32 %vminv.i to i8
-  ret i8 %0
-}
-
-define signext i16 @test_vminv_s16(<4 x i16> %a1) {
-; CHECK: test_vminv_s16
-; CHECK: sminv.4h h[[REGNUM:[0-9]+]], v0
-; CHECK-NEXT: smov.h w0, v[[REGNUM]][0]
-; CHECK-NEXT: ret
-entry:
-  %vminv.i = tail call i32 @llvm.arm64.neon.sminv.i32.v4i16(<4 x i16> %a1)
-  %0 = trunc i32 %vminv.i to i16
-  ret i16 %0
-}
-
-define i32 @test_vminv_s32(<2 x i32> %a1) {
-; CHECK: test_vminv_s32
-; 2 x i32 is not supported by the ISA, thus, this is a special case
-; CHECK: sminp.2s v[[REGNUM:[0-9]+]], v0, v0
-; CHECK-NEXT: fmov w0, s[[REGNUM]]
-; CHECK-NEXT: ret
-entry:
-  %vminv.i = tail call i32 @llvm.arm64.neon.sminv.i32.v2i32(<2 x i32> %a1)
-  ret i32 %vminv.i
-}
-
-define signext i8 @test_vminvq_s8(<16 x i8> %a1) {
-; CHECK: test_vminvq_s8
-; CHECK: sminv.16b b[[REGNUM:[0-9]+]], v0
-; CHECK-NEXT: smov.b w0, v[[REGNUM]][0]
-; CHECK-NEXT: ret
-entry:
-  %vminv.i = tail call i32 @llvm.arm64.neon.sminv.i32.v16i8(<16 x i8> %a1)
-  %0 = trunc i32 %vminv.i to i8
-  ret i8 %0
-}
-
-define signext i16 @test_vminvq_s16(<8 x i16> %a1) {
-; CHECK: test_vminvq_s16
-; CHECK: sminv.8h h[[REGNUM:[0-9]+]], v0
-; CHECK-NEXT: smov.h w0, v[[REGNUM]][0]
-; CHECK-NEXT: ret
-entry:
-  %vminv.i = tail call i32 @llvm.arm64.neon.sminv.i32.v8i16(<8 x i16> %a1)
-  %0 = trunc i32 %vminv.i to i16
-  ret i16 %0
-}
-
-define i32 @test_vminvq_s32(<4 x i32> %a1) {
-; CHECK: test_vminvq_s32
-; CHECK: sminv.4s [[REGNUM:s[0-9]+]], v0
-; CHECK-NEXT: fmov w0, [[REGNUM]]
-; CHECK-NEXT: ret
-entry:
-  %vminv.i = tail call i32 @llvm.arm64.neon.sminv.i32.v4i32(<4 x i32> %a1)
-  ret i32 %vminv.i
-}
-
-declare i32 @llvm.arm64.neon.sminv.i32.v4i32(<4 x i32>)
-declare i32 @llvm.arm64.neon.sminv.i32.v8i16(<8 x i16>)
-declare i32 @llvm.arm64.neon.sminv.i32.v16i8(<16 x i8>)
-declare i32 @llvm.arm64.neon.sminv.i32.v2i32(<2 x i32>)
-declare i32 @llvm.arm64.neon.sminv.i32.v4i16(<4 x i16>)
-declare i32 @llvm.arm64.neon.sminv.i32.v8i8(<8 x i8>)
-
diff --git a/test/CodeGen/ARM64/spill.ll b/test/CodeGen/ARM64/spill.ll
deleted file mode 100644
index 9173c87..0000000
--- a/test/CodeGen/ARM64/spill.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -verify-machineinstrs
-
-; CHECK: fpr128
-; CHECK: ld1.2d
-; CHECK: str q
-; CHECK: inlineasm
-; CHECK: ldr q
-; CHECK: st1.2d
-define void @fpr128(<4 x float>* %p) nounwind ssp {
-entry:
-  %x = load <4 x float>* %p, align 16
-  call void asm sideeffect "; inlineasm", "~{q0},~{q1},~{q2},~{q3},~{q4},~{q5},~{q6},~{q7},~{q8},~{q9},~{q10},~{q11},~{q12},~{q13},~{q14},~{q15},~{q16},~{q17},~{q18},~{q19},~{q20},~{q21},~{q22},~{q23},~{q24},~{q25},~{q26},~{q27},~{q28},~{q29},~{q30},~{q31},~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{fp},~{lr},~{sp},~{memory}"() nounwind
-  store <4 x float> %x, <4 x float>* %p, align 16
-  ret void
-}
diff --git a/test/CodeGen/ARM64/st1.ll b/test/CodeGen/ARM64/st1.ll
deleted file mode 100644
index b9aafc6..0000000
--- a/test/CodeGen/ARM64/st1.ll
+++ /dev/null
@@ -1,676 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -verify-machineinstrs | FileCheck %s
-
-define void @st1lane_16b(<16 x i8> %A, i8* %D) {
-; CHECK-LABEL: st1lane_16b
-; CHECK: st1.b
-  %tmp = extractelement <16 x i8> %A, i32 1
-  store i8 %tmp, i8* %D
-  ret void
-}
-
-define void @st1lane_8h(<8 x i16> %A, i16* %D) {
-; CHECK-LABEL: st1lane_8h
-; CHECK: st1.h
-  %tmp = extractelement <8 x i16> %A, i32 1
-  store i16 %tmp, i16* %D
-  ret void
-}
-
-define void @st1lane_4s(<4 x i32> %A, i32* %D) {
-; CHECK-LABEL: st1lane_4s
-; CHECK: st1.s
-  %tmp = extractelement <4 x i32> %A, i32 1
-  store i32 %tmp, i32* %D
-  ret void
-}
-
-define void @st1lane_4s_float(<4 x float> %A, float* %D) {
-; CHECK-LABEL: st1lane_4s_float
-; CHECK: st1.s
-  %tmp = extractelement <4 x float> %A, i32 1
-  store float %tmp, float* %D
-  ret void
-}
-
-define void @st1lane_2d(<2 x i64> %A, i64* %D) {
-; CHECK-LABEL: st1lane_2d
-; CHECK: st1.d
-  %tmp = extractelement <2 x i64> %A, i32 1
-  store i64 %tmp, i64* %D
-  ret void
-}
-
-define void @st1lane_2d_double(<2 x double> %A, double* %D) {
-; CHECK-LABEL: st1lane_2d_double
-; CHECK: st1.d
-  %tmp = extractelement <2 x double> %A, i32 1
-  store double %tmp, double* %D
-  ret void
-}
-
-define void @st1lane_8b(<8 x i8> %A, i8* %D) {
-; CHECK-LABEL: st1lane_8b
-; CHECK: st1.b
-  %tmp = extractelement <8 x i8> %A, i32 1
-  store i8 %tmp, i8* %D
-  ret void
-}
-
-define void @st1lane_4h(<4 x i16> %A, i16* %D) {
-; CHECK-LABEL: st1lane_4h
-; CHECK: st1.h
-  %tmp = extractelement <4 x i16> %A, i32 1
-  store i16 %tmp, i16* %D
-  ret void
-}
-
-define void @st1lane_2s(<2 x i32> %A, i32* %D) {
-; CHECK-LABEL: st1lane_2s
-; CHECK: st1.s
-  %tmp = extractelement <2 x i32> %A, i32 1
-  store i32 %tmp, i32* %D
-  ret void
-}
-
-define void @st1lane_2s_float(<2 x float> %A, float* %D) {
-; CHECK-LABEL: st1lane_2s_float
-; CHECK: st1.s
-  %tmp = extractelement <2 x float> %A, i32 1
-  store float %tmp, float* %D
-  ret void
-}
-
-define void @st2lane_16b(<16 x i8> %A, <16 x i8> %B, i8* %D) {
-; CHECK-LABEL: st2lane_16b
-; CHECK: st2.b
-  call void @llvm.arm64.neon.st2lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i64 1, i8* %D)
-  ret void
-}
-
-define void @st2lane_8h(<8 x i16> %A, <8 x i16> %B, i16* %D) {
-; CHECK-LABEL: st2lane_8h
-; CHECK: st2.h
-  call void @llvm.arm64.neon.st2lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i64 1, i16* %D)
-  ret void
-}
-
-define void @st2lane_4s(<4 x i32> %A, <4 x i32> %B, i32* %D) {
-; CHECK-LABEL: st2lane_4s
-; CHECK: st2.s
-  call void @llvm.arm64.neon.st2lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i64 1, i32* %D)
-  ret void
-}
-
-define void @st2lane_2d(<2 x i64> %A, <2 x i64> %B, i64* %D) {
-; CHECK-LABEL: st2lane_2d
-; CHECK: st2.d
-  call void @llvm.arm64.neon.st2lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64 1, i64* %D)
-  ret void
-}
-
-declare void @llvm.arm64.neon.st2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8*) nounwind readnone
-declare void @llvm.arm64.neon.st2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i16*) nounwind readnone
-declare void @llvm.arm64.neon.st2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*) nounwind readnone
-declare void @llvm.arm64.neon.st2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*) nounwind readnone
-
-define void @st3lane_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %D) {
-; CHECK-LABEL: st3lane_16b
-; CHECK: st3.b
-  call void @llvm.arm64.neon.st3lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i64 1, i8* %D)
-  ret void
-}
-
-define void @st3lane_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %D) {
-; CHECK-LABEL: st3lane_8h
-; CHECK: st3.h
-  call void @llvm.arm64.neon.st3lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i64 1, i16* %D)
-  ret void
-}
-
-define void @st3lane_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %D) {
-; CHECK-LABEL: st3lane_4s
-; CHECK: st3.s
-  call void @llvm.arm64.neon.st3lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i64 1, i32* %D)
-  ret void
-}
-
-define void @st3lane_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %D) {
-; CHECK-LABEL: st3lane_2d
-; CHECK: st3.d
-  call void @llvm.arm64.neon.st3lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64 1, i64* %D)
-  ret void
-}
-
-declare void @llvm.arm64.neon.st3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readnone
-declare void @llvm.arm64.neon.st3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readnone
-declare void @llvm.arm64.neon.st3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readnone
-declare void @llvm.arm64.neon.st3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readnone
-
-define void @st4lane_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %E) {
-; CHECK-LABEL: st4lane_16b
-; CHECK: st4.b
-  call void @llvm.arm64.neon.st4lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 1, i8* %E)
-  ret void
-}
-
-define void @st4lane_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %E) {
-; CHECK-LABEL: st4lane_8h
-; CHECK: st4.h
-  call void @llvm.arm64.neon.st4lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 1, i16* %E)
-  ret void
-}
-
-define void @st4lane_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %E) {
-; CHECK-LABEL: st4lane_4s
-; CHECK: st4.s
-  call void @llvm.arm64.neon.st4lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 1, i32* %E)
-  ret void
-}
-
-define void @st4lane_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %E) {
-; CHECK-LABEL: st4lane_2d
-; CHECK: st4.d
-  call void @llvm.arm64.neon.st4lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 1, i64* %E)
-  ret void
-}
-
-declare void @llvm.arm64.neon.st4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readnone
-declare void @llvm.arm64.neon.st4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readnone
-declare void @llvm.arm64.neon.st4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readnone
-declare void @llvm.arm64.neon.st4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readnone
-
-
-define void @st2_8b(<8 x i8> %A, <8 x i8> %B, i8* %P) nounwind {
-; CHECK-LABEL: st2_8b
-; CHECK st2.8b
-	call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, i8* %P)
-	ret void
-}
-
-define void @st3_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %P) nounwind {
-; CHECK-LABEL: st3_8b
-; CHECK st3.8b
-	call void @llvm.arm64.neon.st3.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %P)
-	ret void
-}
-
-define void @st4_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %P) nounwind {
-; CHECK-LABEL: st4_8b
-; CHECK st4.8b
-	call void @llvm.arm64.neon.st4.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %P)
-	ret void
-}
-
-declare void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*) nounwind readonly
-declare void @llvm.arm64.neon.st3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly
-declare void @llvm.arm64.neon.st4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly
-
-define void @st2_16b(<16 x i8> %A, <16 x i8> %B, i8* %P) nounwind {
-; CHECK-LABEL: st2_16b
-; CHECK st2.16b
-	call void @llvm.arm64.neon.st2.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i8* %P)
-	ret void
-}
-
-define void @st3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %P) nounwind {
-; CHECK-LABEL: st3_16b
-; CHECK st3.16b
-	call void @llvm.arm64.neon.st3.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %P)
-	ret void
-}
-
-define void @st4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %P) nounwind {
-; CHECK-LABEL: st4_16b
-; CHECK st4.16b
-	call void @llvm.arm64.neon.st4.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %P)
-	ret void
-}
-
-declare void @llvm.arm64.neon.st2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*) nounwind readonly
-declare void @llvm.arm64.neon.st3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly
-declare void @llvm.arm64.neon.st4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly
-
-define void @st2_4h(<4 x i16> %A, <4 x i16> %B, i16* %P) nounwind {
-; CHECK-LABEL: st2_4h
-; CHECK st2.4h
-	call void @llvm.arm64.neon.st2.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, i16* %P)
-	ret void
-}
-
-define void @st3_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %P) nounwind {
-; CHECK-LABEL: st3_4h
-; CHECK st3.4h
-	call void @llvm.arm64.neon.st3.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %P)
-	ret void
-}
-
-define void @st4_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %P) nounwind {
-; CHECK-LABEL: st4_4h
-; CHECK st4.4h
-	call void @llvm.arm64.neon.st4.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %P)
-	ret void
-}
-
-declare void @llvm.arm64.neon.st2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*) nounwind readonly
-declare void @llvm.arm64.neon.st3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly
-declare void @llvm.arm64.neon.st4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly
-
-define void @st2_8h(<8 x i16> %A, <8 x i16> %B, i16* %P) nounwind {
-; CHECK-LABEL: st2_8h
-; CHECK st2.8h
-	call void @llvm.arm64.neon.st2.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i16* %P)
-	ret void
-}
-
-define void @st3_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %P) nounwind {
-; CHECK-LABEL: st3_8h
-; CHECK st3.8h
-	call void @llvm.arm64.neon.st3.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %P)
-	ret void
-}
-
-define void @st4_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %P) nounwind {
-; CHECK-LABEL: st4_8h
-; CHECK st4.8h
-	call void @llvm.arm64.neon.st4.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %P)
-	ret void
-}
-
-declare void @llvm.arm64.neon.st2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*) nounwind readonly
-declare void @llvm.arm64.neon.st3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly
-declare void @llvm.arm64.neon.st4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly
-
-define void @st2_2s(<2 x i32> %A, <2 x i32> %B, i32* %P) nounwind {
-; CHECK-LABEL: st2_2s
-; CHECK st2.2s
-	call void @llvm.arm64.neon.st2.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, i32* %P)
-	ret void
-}
-
-define void @st3_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %P) nounwind {
-; CHECK-LABEL: st3_2s
-; CHECK st3.2s
-	call void @llvm.arm64.neon.st3.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %P)
-	ret void
-}
-
-define void @st4_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %P) nounwind {
-; CHECK-LABEL: st4_2s
-; CHECK st4.2s
-	call void @llvm.arm64.neon.st4.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %P)
-	ret void
-}
-
-declare void @llvm.arm64.neon.st2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*) nounwind readonly
-declare void @llvm.arm64.neon.st3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly
-declare void @llvm.arm64.neon.st4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly
-
-define void @st2_4s(<4 x i32> %A, <4 x i32> %B, i32* %P) nounwind {
-; CHECK-LABEL: st2_4s
-; CHECK st2.4s
-	call void @llvm.arm64.neon.st2.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i32* %P)
-	ret void
-}
-
-define void @st3_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %P) nounwind {
-; CHECK-LABEL: st3_4s
-; CHECK st3.4s
-	call void @llvm.arm64.neon.st3.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %P)
-	ret void
-}
-
-define void @st4_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %P) nounwind {
-; CHECK-LABEL: st4_4s
-; CHECK st4.4s
-	call void @llvm.arm64.neon.st4.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %P)
-	ret void
-}
-
-declare void @llvm.arm64.neon.st2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*) nounwind readonly
-declare void @llvm.arm64.neon.st3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly
-declare void @llvm.arm64.neon.st4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly
-
-define void @st2_1d(<1 x i64> %A, <1 x i64> %B, i64* %P) nounwind {
-; CHECK-LABEL: st2_1d
-; CHECK st1.2d
-	call void @llvm.arm64.neon.st2.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, i64* %P)
-	ret void
-}
-
-define void @st3_1d(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %P) nounwind {
-; CHECK-LABEL: st3_1d
-; CHECK st1.3d
-	call void @llvm.arm64.neon.st3.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %P)
-	ret void
-}
-
-define void @st4_1d(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %P) nounwind {
-; CHECK-LABEL: st4_1d
-; CHECK st1.4d
-	call void @llvm.arm64.neon.st4.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %P)
-	ret void
-}
-
-declare void @llvm.arm64.neon.st2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*) nounwind readonly
-declare void @llvm.arm64.neon.st3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly
-declare void @llvm.arm64.neon.st4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly
-
-define void @st2_2d(<2 x i64> %A, <2 x i64> %B, i64* %P) nounwind {
-; CHECK-LABEL: st2_2d
-; CHECK st2.2d
-	call void @llvm.arm64.neon.st2.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64* %P)
-	ret void
-}
-
-define void @st3_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %P) nounwind {
-; CHECK-LABEL: st3_2d
-; CHECK st2.3d
-	call void @llvm.arm64.neon.st3.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %P)
-	ret void
-}
-
-define void @st4_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %P) nounwind {
-; CHECK-LABEL: st4_2d
-; CHECK st2.4d
-	call void @llvm.arm64.neon.st4.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %P)
-	ret void
-}
-
-declare void @llvm.arm64.neon.st2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*) nounwind readonly
-declare void @llvm.arm64.neon.st3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly
-declare void @llvm.arm64.neon.st4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly
-
-declare void @llvm.arm64.neon.st1x2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*) nounwind readonly
-declare void @llvm.arm64.neon.st1x2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*) nounwind readonly
-declare void @llvm.arm64.neon.st1x2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*) nounwind readonly
-declare void @llvm.arm64.neon.st1x2.v2f32.p0f32(<2 x float>, <2 x float>, float*) nounwind readonly
-declare void @llvm.arm64.neon.st1x2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*) nounwind readonly
-declare void @llvm.arm64.neon.st1x2.v1f64.p0f64(<1 x double>, <1 x double>, double*) nounwind readonly
-
-define void @st1_x2_v8i8(<8 x i8> %A, <8 x i8> %B, i8* %addr) {
-; CHECK-LABEL: st1_x2_v8i8:
-; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x2.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, i8* %addr)
-  ret void
-}
-
-define void @st1_x2_v4i16(<4 x i16> %A, <4 x i16> %B, i16* %addr) {
-; CHECK-LABEL: st1_x2_v4i16:
-; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x2.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, i16* %addr)
-  ret void
-}
-
-define void @st1_x2_v2i32(<2 x i32> %A, <2 x i32> %B, i32* %addr) {
-; CHECK-LABEL: st1_x2_v2i32:
-; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x2.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, i32* %addr)
-  ret void
-}
-
-define void @st1_x2_v2f32(<2 x float> %A, <2 x float> %B, float* %addr) {
-; CHECK-LABEL: st1_x2_v2f32:
-; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x2.v2f32.p0f32(<2 x float> %A, <2 x float> %B, float* %addr)
-  ret void
-}
-
-define void @st1_x2_v1i64(<1 x i64> %A, <1 x i64> %B, i64* %addr) {
-; CHECK-LABEL: st1_x2_v1i64:
-; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x2.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, i64* %addr)
-  ret void
-}
-
-define void @st1_x2_v1f64(<1 x double> %A, <1 x double> %B, double* %addr) {
-; CHECK-LABEL: st1_x2_v1f64:
-; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x2.v1f64.p0f64(<1 x double> %A, <1 x double> %B, double* %addr)
-  ret void
-}
-
-declare void @llvm.arm64.neon.st1x2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*) nounwind readonly
-declare void @llvm.arm64.neon.st1x2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*) nounwind readonly
-declare void @llvm.arm64.neon.st1x2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*) nounwind readonly
-declare void @llvm.arm64.neon.st1x2.v4f32.p0f32(<4 x float>, <4 x float>, float*) nounwind readonly
-declare void @llvm.arm64.neon.st1x2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*) nounwind readonly
-declare void @llvm.arm64.neon.st1x2.v2f64.p0f64(<2 x double>, <2 x double>, double*) nounwind readonly
-
-define void @st1_x2_v16i8(<16 x i8> %A, <16 x i8> %B, i8* %addr) {
-; CHECK-LABEL: st1_x2_v16i8:
-; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x2.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i8* %addr)
-  ret void
-}
-
-define void @st1_x2_v8i16(<8 x i16> %A, <8 x i16> %B, i16* %addr) {
-; CHECK-LABEL: st1_x2_v8i16:
-; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x2.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i16* %addr)
-  ret void
-}
-
-define void @st1_x2_v4i32(<4 x i32> %A, <4 x i32> %B, i32* %addr) {
-; CHECK-LABEL: st1_x2_v4i32:
-; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x2.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i32* %addr)
-  ret void
-}
-
-define void @st1_x2_v4f32(<4 x float> %A, <4 x float> %B, float* %addr) {
-; CHECK-LABEL: st1_x2_v4f32:
-; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x2.v4f32.p0f32(<4 x float> %A, <4 x float> %B, float* %addr)
-  ret void
-}
-
-define void @st1_x2_v2i64(<2 x i64> %A, <2 x i64> %B, i64* %addr) {
-; CHECK-LABEL: st1_x2_v2i64:
-; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x2.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64* %addr)
-  ret void
-}
-
-define void @st1_x2_v2f64(<2 x double> %A, <2 x double> %B, double* %addr) {
-; CHECK-LABEL: st1_x2_v2f64:
-; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x2.v2f64.p0f64(<2 x double> %A, <2 x double> %B, double* %addr)
-  ret void
-}
-
-declare void @llvm.arm64.neon.st1x3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly
-declare void @llvm.arm64.neon.st1x3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly
-declare void @llvm.arm64.neon.st1x3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly
-declare void @llvm.arm64.neon.st1x3.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, float*) nounwind readonly
-declare void @llvm.arm64.neon.st1x3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly
-declare void @llvm.arm64.neon.st1x3.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, double*) nounwind readonly
-
-define void @st1_x3_v8i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %addr) {
-; CHECK-LABEL: st1_x3_v8i8:
-; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x3.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %addr)
-  ret void
-}
-
-define void @st1_x3_v4i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %addr) {
-; CHECK-LABEL: st1_x3_v4i16:
-; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x3.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %addr)
-  ret void
-}
-
-define void @st1_x3_v2i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %addr) {
-; CHECK-LABEL: st1_x3_v2i32:
-; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x3.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %addr)
-  ret void
-}
-
-define void @st1_x3_v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, float* %addr) {
-; CHECK-LABEL: st1_x3_v2f32:
-; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x3.v2f32.p0f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, float* %addr)
-  ret void
-}
-
-define void @st1_x3_v1i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %addr) {
-; CHECK-LABEL: st1_x3_v1i64:
-; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x3.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %addr)
-  ret void
-}
-
-define void @st1_x3_v1f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, double* %addr) {
-; CHECK-LABEL: st1_x3_v1f64:
-; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x3.v1f64.p0f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, double* %addr)
-  ret void
-}
-
-declare void @llvm.arm64.neon.st1x3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly
-declare void @llvm.arm64.neon.st1x3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly
-declare void @llvm.arm64.neon.st1x3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly
-declare void @llvm.arm64.neon.st1x3.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, float*) nounwind readonly
-declare void @llvm.arm64.neon.st1x3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly
-declare void @llvm.arm64.neon.st1x3.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, double*) nounwind readonly
-
-define void @st1_x3_v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %addr) {
-; CHECK-LABEL: st1_x3_v16i8:
-; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x3.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %addr)
-  ret void
-}
-
-define void @st1_x3_v8i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %addr) {
-; CHECK-LABEL: st1_x3_v8i16:
-; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x3.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %addr)
-  ret void
-}
-
-define void @st1_x3_v4i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %addr) {
-; CHECK-LABEL: st1_x3_v4i32:
-; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x3.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %addr)
-  ret void
-}
-
-define void @st1_x3_v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, float* %addr) {
-; CHECK-LABEL: st1_x3_v4f32:
-; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x3.v4f32.p0f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, float* %addr)
-  ret void
-}
-
-define void @st1_x3_v2i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %addr) {
-; CHECK-LABEL: st1_x3_v2i64:
-; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x3.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %addr)
-  ret void
-}
-
-define void @st1_x3_v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, double* %addr) {
-; CHECK-LABEL: st1_x3_v2f64:
-; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x3.v2f64.p0f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, double* %addr)
-  ret void
-}
-
-
-declare void @llvm.arm64.neon.st1x4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly
-declare void @llvm.arm64.neon.st1x4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly
-declare void @llvm.arm64.neon.st1x4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly
-declare void @llvm.arm64.neon.st1x4.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, float*) nounwind readonly
-declare void @llvm.arm64.neon.st1x4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly
-declare void @llvm.arm64.neon.st1x4.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, double*) nounwind readonly
-
-define void @st1_x4_v8i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %addr) {
-; CHECK-LABEL: st1_x4_v8i8:
-; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x4.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %addr)
-  ret void
-}
-
-define void @st1_x4_v4i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %addr) {
-; CHECK-LABEL: st1_x4_v4i16:
-; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x4.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %addr)
-  ret void
-}
-
-define void @st1_x4_v2i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %addr) {
-; CHECK-LABEL: st1_x4_v2i32:
-; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x4.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %addr)
-  ret void
-}
-
-define void @st1_x4_v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, <2 x float> %D, float* %addr) {
-; CHECK-LABEL: st1_x4_v2f32:
-; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x4.v2f32.p0f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, <2 x float> %D, float* %addr)
-  ret void
-}
-
-define void @st1_x4_v1i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %addr) {
-; CHECK-LABEL: st1_x4_v1i64:
-; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x4.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %addr)
-  ret void
-}
-
-define void @st1_x4_v1f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, <1 x double> %D, double* %addr) {
-; CHECK-LABEL: st1_x4_v1f64:
-; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x4.v1f64.p0f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, <1 x double> %D, double* %addr)
-  ret void
-}
-
-declare void @llvm.arm64.neon.st1x4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly
-declare void @llvm.arm64.neon.st1x4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly
-declare void @llvm.arm64.neon.st1x4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly
-declare void @llvm.arm64.neon.st1x4.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, float*) nounwind readonly
-declare void @llvm.arm64.neon.st1x4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly
-declare void @llvm.arm64.neon.st1x4.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, <2 x double>, double*) nounwind readonly
-
-define void @st1_x4_v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %addr) {
-; CHECK-LABEL: st1_x4_v16i8:
-; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x4.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %addr)
-  ret void
-}
-
-define void @st1_x4_v8i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %addr) {
-; CHECK-LABEL: st1_x4_v8i16:
-; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x4.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %addr)
-  ret void
-}
-
-define void @st1_x4_v4i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %addr) {
-; CHECK-LABEL: st1_x4_v4i32:
-; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x4.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %addr)
-  ret void
-}
-
-define void @st1_x4_v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, <4 x float> %D, float* %addr) {
-; CHECK-LABEL: st1_x4_v4f32:
-; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x4.v4f32.p0f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, <4 x float> %D, float* %addr)
-  ret void
-}
-
-define void @st1_x4_v2i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %addr) {
-; CHECK-LABEL: st1_x4_v2i64:
-; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x4.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %addr)
-  ret void
-}
-
-define void @st1_x4_v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, <2 x double> %D, double* %addr) {
-; CHECK-LABEL: st1_x4_v2f64:
-; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
-  call void @llvm.arm64.neon.st1x4.v2f64.p0f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, <2 x double> %D, double* %addr)
-  ret void
-}
diff --git a/test/CodeGen/ARM64/stp.ll b/test/CodeGen/ARM64/stp.ll
deleted file mode 100644
index eacf093..0000000
--- a/test/CodeGen/ARM64/stp.ll
+++ /dev/null
@@ -1,101 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-stp-suppress=false -verify-machineinstrs | FileCheck %s
-; RUN: llc < %s -march=arm64 -arm64-unscaled-mem-op=true\
-; RUN:   -verify-machineinstrs | FileCheck -check-prefix=STUR_CHK %s
-
-; CHECK: stp_int
-; CHECK: stp w0, w1, [x2]
-define void @stp_int(i32 %a, i32 %b, i32* nocapture %p) nounwind {
-  store i32 %a, i32* %p, align 4
-  %add.ptr = getelementptr inbounds i32* %p, i64 1
-  store i32 %b, i32* %add.ptr, align 4
-  ret void
-}
-
-; CHECK: stp_long
-; CHECK: stp x0, x1, [x2]
-define void @stp_long(i64 %a, i64 %b, i64* nocapture %p) nounwind {
-  store i64 %a, i64* %p, align 8
-  %add.ptr = getelementptr inbounds i64* %p, i64 1
-  store i64 %b, i64* %add.ptr, align 8
-  ret void
-}
-
-; CHECK: stp_float
-; CHECK: stp s0, s1, [x0]
-define void @stp_float(float %a, float %b, float* nocapture %p) nounwind {
-  store float %a, float* %p, align 4
-  %add.ptr = getelementptr inbounds float* %p, i64 1
-  store float %b, float* %add.ptr, align 4
-  ret void
-}
-
-; CHECK: stp_double
-; CHECK: stp d0, d1, [x0]
-define void @stp_double(double %a, double %b, double* nocapture %p) nounwind {
-  store double %a, double* %p, align 8
-  %add.ptr = getelementptr inbounds double* %p, i64 1
-  store double %b, double* %add.ptr, align 8
-  ret void
-}
-
-; Test the load/store optimizer---combine ldurs into a ldp, if appropriate
-define void @stur_int(i32 %a, i32 %b, i32* nocapture %p) nounwind {
-; STUR_CHK: stur_int
-; STUR_CHK: stp w{{[0-9]+}}, {{w[0-9]+}}, [x{{[0-9]+}}, #-8]
-; STUR_CHK-NEXT: ret
-  %p1 = getelementptr inbounds i32* %p, i32 -1
-  store i32 %a, i32* %p1, align 2
-  %p2 = getelementptr inbounds i32* %p, i32 -2
-  store i32 %b, i32* %p2, align 2
-  ret void
-}
-
-define void @stur_long(i64 %a, i64 %b, i64* nocapture %p) nounwind {
-; STUR_CHK: stur_long
-; STUR_CHK: stp x{{[0-9]+}}, {{x[0-9]+}}, [x{{[0-9]+}}, #-16]
-; STUR_CHK-NEXT: ret
-  %p1 = getelementptr inbounds i64* %p, i32 -1
-  store i64 %a, i64* %p1, align 2
-  %p2 = getelementptr inbounds i64* %p, i32 -2
-  store i64 %b, i64* %p2, align 2
-  ret void
-}
-
-define void @stur_float(float %a, float %b, float* nocapture %p) nounwind {
-; STUR_CHK: stur_float
-; STUR_CHK: stp s{{[0-9]+}}, {{s[0-9]+}}, [x{{[0-9]+}}, #-8]
-; STUR_CHK-NEXT: ret
-  %p1 = getelementptr inbounds float* %p, i32 -1
-  store float %a, float* %p1, align 2
-  %p2 = getelementptr inbounds float* %p, i32 -2
-  store float %b, float* %p2, align 2
-  ret void
-}
-
-define void @stur_double(double %a, double %b, double* nocapture %p) nounwind {
-; STUR_CHK: stur_double
-; STUR_CHK: stp d{{[0-9]+}}, {{d[0-9]+}}, [x{{[0-9]+}}, #-16]
-; STUR_CHK-NEXT: ret
-  %p1 = getelementptr inbounds double* %p, i32 -1
-  store double %a, double* %p1, align 2
-  %p2 = getelementptr inbounds double* %p, i32 -2
-  store double %b, double* %p2, align 2
-  ret void
-}
-
-define void @splat_v4i32(i32 %v, i32 *%p) {
-entry:
-
-; CHECK-LABEL: splat_v4i32
-; CHECK-DAG: stp w0, w0, [x1]
-; CHECK-DAG: stp w0, w0, [x1, #8]
-; CHECK: ret
-
-  %p17 = insertelement <4 x i32> undef, i32 %v, i32 0
-  %p18 = insertelement <4 x i32> %p17, i32 %v, i32 1
-  %p19 = insertelement <4 x i32> %p18, i32 %v, i32 2
-  %p20 = insertelement <4 x i32> %p19, i32 %v, i32 3
-  %p21 = bitcast i32* %p to <4 x i32>*
-  store <4 x i32> %p20, <4 x i32>* %p21, align 4
-  ret void
-}
diff --git a/test/CodeGen/ARM64/strict-align.ll b/test/CodeGen/ARM64/strict-align.ll
deleted file mode 100644
index e392172..0000000
--- a/test/CodeGen/ARM64/strict-align.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: llc < %s -mtriple=arm64-apple-darwin | FileCheck %s
-; RUN: llc < %s -mtriple=arm64-apple-darwin -arm64-strict-align | FileCheck %s --check-prefix=CHECK-STRICT
-
-define i32 @f0(i32* nocapture %p) nounwind {
-; CHECK-STRICT: ldrh [[HIGH:w[0-9]+]], [x0, #2]
-; CHECK-STRICT: ldrh [[LOW:w[0-9]+]], [x0]
-; CHECK-STRICT: orr w0, [[LOW]], [[HIGH]], lsl #16
-; CHECK-STRICT: ret
-
-; CHECK: ldr w0, [x0]
-; CHECK: ret
-  %tmp = load i32* %p, align 2
-  ret i32 %tmp
-}
-
-define i64 @f1(i64* nocapture %p) nounwind {
-; CHECK-STRICT:	ldp	w[[LOW:[0-9]+]], w[[HIGH:[0-9]+]], [x0]
-; CHECK-STRICT:	orr	x0, x[[LOW]], x[[HIGH]], lsl #32
-; CHECK-STRICT:	ret
-
-; CHECK: ldr x0, [x0]
-; CHECK: ret
-  %tmp = load i64* %p, align 4
-  ret i64 %tmp
-}
diff --git a/test/CodeGen/ARM64/stur.ll b/test/CodeGen/ARM64/stur.ll
deleted file mode 100644
index 8326bba..0000000
--- a/test/CodeGen/ARM64/stur.ll
+++ /dev/null
@@ -1,98 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-%struct.X = type <{ i32, i64, i64 }>
-
-define void @foo1(i32* %p, i64 %val) nounwind {
-; CHECK-LABEL: foo1:
-; CHECK: 	stur	w1, [x0, #-4]
-; CHECK-NEXT: 	ret
-  %tmp1 = trunc i64 %val to i32
-  %ptr = getelementptr inbounds i32* %p, i64 -1
-  store i32 %tmp1, i32* %ptr, align 4
-  ret void
-}
-define void @foo2(i16* %p, i64 %val) nounwind {
-; CHECK-LABEL: foo2:
-; CHECK: 	sturh	w1, [x0, #-2]
-; CHECK-NEXT: 	ret
-  %tmp1 = trunc i64 %val to i16
-  %ptr = getelementptr inbounds i16* %p, i64 -1
-  store i16 %tmp1, i16* %ptr, align 2
-  ret void
-}
-define void @foo3(i8* %p, i64 %val) nounwind {
-; CHECK-LABEL: foo3:
-; CHECK: 	sturb	w1, [x0, #-1]
-; CHECK-NEXT: 	ret
-  %tmp1 = trunc i64 %val to i8
-  %ptr = getelementptr inbounds i8* %p, i64 -1
-  store i8 %tmp1, i8* %ptr, align 1
-  ret void
-}
-define void @foo4(i16* %p, i32 %val) nounwind {
-; CHECK-LABEL: foo4:
-; CHECK: 	sturh	w1, [x0, #-2]
-; CHECK-NEXT: 	ret
-  %tmp1 = trunc i32 %val to i16
-  %ptr = getelementptr inbounds i16* %p, i32 -1
-  store i16 %tmp1, i16* %ptr, align 2
-  ret void
-}
-define void @foo5(i8* %p, i32 %val) nounwind {
-; CHECK-LABEL: foo5:
-; CHECK: 	sturb	w1, [x0, #-1]
-; CHECK-NEXT: 	ret
-  %tmp1 = trunc i32 %val to i8
-  %ptr = getelementptr inbounds i8* %p, i32 -1
-  store i8 %tmp1, i8* %ptr, align 1
-  ret void
-}
-
-define void @foo(%struct.X* nocapture %p) nounwind optsize ssp {
-; CHECK-LABEL: foo:
-; CHECK-NOT: str
-; CHECK: stur    xzr, [x0, #12]
-; CHECK-NEXT: stur    xzr, [x0, #4]
-; CHECK-NEXT: ret
-  %B = getelementptr inbounds %struct.X* %p, i64 0, i32 1
-  %val = bitcast i64* %B to i8*
-  call void @llvm.memset.p0i8.i64(i8* %val, i8 0, i64 16, i32 1, i1 false)
-  ret void
-}
-
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
-
-; Unaligned 16b stores are split into 8b stores for performance.
-; radar://15424193
-
-; CHECK-LABEL: unaligned:
-; CHECK-NOT: str q0
-; CHECK: str     d[[REG:[0-9]+]], [x0]
-; CHECK: ext.16b v[[REG2:[0-9]+]], v[[REG]], v[[REG]], #8
-; CHECK: str     d[[REG2]], [x0, #8]
-define void @unaligned(<4 x i32>* %p, <4 x i32> %v) nounwind {
-  store <4 x i32> %v, <4 x i32>* %p, align 4
-  ret void
-}
-
-; CHECK-LABEL: aligned:
-; CHECK: str q0
-define void @aligned(<4 x i32>* %p, <4 x i32> %v) nounwind {
-  store <4 x i32> %v, <4 x i32>* %p
-  ret void
-}
-
-; Don't split one and two byte aligned stores.
-; radar://16349308
-
-; CHECK-LABEL: twobytealign:
-; CHECK: str q0
-define void @twobytealign(<4 x i32>* %p, <4 x i32> %v) nounwind {
-  store <4 x i32> %v, <4 x i32>* %p, align 2
-  ret void
-}
-; CHECK-LABEL: onebytealign:
-; CHECK: str q0
-define void @onebytealign(<4 x i32>* %p, <4 x i32> %v) nounwind {
-  store <4 x i32> %v, <4 x i32>* %p, align 1
-  ret void
-}
diff --git a/test/CodeGen/ARM64/subvector-extend.ll b/test/CodeGen/ARM64/subvector-extend.ll
deleted file mode 100644
index ad2f06c..0000000
--- a/test/CodeGen/ARM64/subvector-extend.ll
+++ /dev/null
@@ -1,141 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -asm-verbose=false | FileCheck %s
-
-; Test efficient codegen of vector extends up from legal type to 128 bit
-; and 256 bit vector types.
-
-;-----
-; Vectors of i16.
-;-----
-define <8 x i16> @func1(<8 x i8> %v0) nounwind {
-; CHECK-LABEL: func1:
-; CHECK-NEXT: ushll.8h  v0, v0, #0
-; CHECK-NEXT: ret
-  %r = zext <8 x i8> %v0 to <8 x i16>
-  ret <8 x i16> %r
-}
-
-define <8 x i16> @func2(<8 x i8> %v0) nounwind {
-; CHECK-LABEL: func2:
-; CHECK-NEXT: sshll.8h  v0, v0, #0
-; CHECK-NEXT: ret
-  %r = sext <8 x i8> %v0 to <8 x i16>
-  ret <8 x i16> %r
-}
-
-define <16 x i16> @func3(<16 x i8> %v0) nounwind {
-; CHECK-LABEL: func3:
-; CHECK-NEXT: ushll2.8h  v1, v0, #0
-; CHECK-NEXT: ushll.8h  v0, v0, #0
-; CHECK-NEXT: ret
-  %r = zext <16 x i8> %v0 to <16 x i16>
-  ret <16 x i16> %r
-}
-
-define <16 x i16> @func4(<16 x i8> %v0) nounwind {
-; CHECK-LABEL: func4:
-; CHECK-NEXT: sshll2.8h  v1, v0, #0
-; CHECK-NEXT: sshll.8h  v0, v0, #0
-; CHECK-NEXT: ret
-  %r = sext <16 x i8> %v0 to <16 x i16>
-  ret <16 x i16> %r
-}
-
-;-----
-; Vectors of i32.
-;-----
-
-define <4 x i32> @afunc1(<4 x i16> %v0) nounwind {
-; CHECK-LABEL: afunc1:
-; CHECK-NEXT: ushll.4s v0, v0, #0
-; CHECK-NEXT: ret
-  %r = zext <4 x i16> %v0 to <4 x i32>
-  ret <4 x i32> %r
-}
-
-define <4 x i32> @afunc2(<4 x i16> %v0) nounwind {
-; CHECK-LABEL: afunc2:
-; CHECK-NEXT: sshll.4s v0, v0, #0
-; CHECK-NEXT: ret
-  %r = sext <4 x i16> %v0 to <4 x i32>
-  ret <4 x i32> %r
-}
-
-define <8 x i32> @afunc3(<8 x i16> %v0) nounwind {
-; CHECK-LABEL: afunc3:
-; CHECK-NEXT: ushll2.4s v1, v0, #0
-; CHECK-NEXT: ushll.4s v0, v0, #0
-; CHECK-NEXT: ret
-  %r = zext <8 x i16> %v0 to <8 x i32>
-  ret <8 x i32> %r
-}
-
-define <8 x i32> @afunc4(<8 x i16> %v0) nounwind {
-; CHECK-LABEL: afunc4:
-; CHECK-NEXT: sshll2.4s v1, v0, #0
-; CHECK-NEXT: sshll.4s v0, v0, #0
-; CHECK-NEXT: ret
-  %r = sext <8 x i16> %v0 to <8 x i32>
-  ret <8 x i32> %r
-}
-
-define <8 x i32> @bfunc1(<8 x i8> %v0) nounwind {
-; CHECK-LABEL: bfunc1:
-; CHECK-NEXT: ushll.8h  v0, v0, #0
-; CHECK-NEXT: ushll2.4s v1, v0, #0
-; CHECK-NEXT: ushll.4s  v0, v0, #0
-; CHECK-NEXT: ret
-  %r = zext <8 x i8> %v0 to <8 x i32>
-  ret <8 x i32> %r
-}
-
-define <8 x i32> @bfunc2(<8 x i8> %v0) nounwind {
-; CHECK-LABEL: bfunc2:
-; CHECK-NEXT: sshll.8h  v0, v0, #0
-; CHECK-NEXT: sshll2.4s v1, v0, #0
-; CHECK-NEXT: sshll.4s  v0, v0, #0
-; CHECK-NEXT: ret
-  %r = sext <8 x i8> %v0 to <8 x i32>
-  ret <8 x i32> %r
-}
-
-;-----
-; Vectors of i64.
-;-----
-
-define <4 x i64> @zfunc1(<4 x i32> %v0) nounwind {
-; CHECK-LABEL: zfunc1:
-; CHECK-NEXT: ushll2.2d v1, v0, #0
-; CHECK-NEXT: ushll.2d v0, v0, #0
-; CHECK-NEXT: ret
-  %r = zext <4 x i32> %v0 to <4 x i64>
-  ret <4 x i64> %r
-}
-
-define <4 x i64> @zfunc2(<4 x i32> %v0) nounwind {
-; CHECK-LABEL: zfunc2:
-; CHECK-NEXT: sshll2.2d v1, v0, #0
-; CHECK-NEXT: sshll.2d v0, v0, #0
-; CHECK-NEXT: ret
-  %r = sext <4 x i32> %v0 to <4 x i64>
-  ret <4 x i64> %r
-}
-
-define <4 x i64> @bfunc3(<4 x i16> %v0) nounwind {
-; CHECK-LABEL: func3:
-; CHECK-NEXT: ushll.4s  v0, v0, #0
-; CHECK-NEXT: ushll2.2d v1, v0, #0
-; CHECK-NEXT: ushll.2d  v0, v0, #0
-; CHECK-NEXT: ret
-  %r = zext <4 x i16> %v0 to <4 x i64>
-  ret <4 x i64> %r
-}
-
-define <4 x i64> @cfunc4(<4 x i16> %v0) nounwind {
-; CHECK-LABEL: func4:
-; CHECK-NEXT: sshll.4s  v0, v0, #0
-; CHECK-NEXT: sshll2.2d v1, v0, #0
-; CHECK-NEXT: sshll.2d  v0, v0, #0
-; CHECK-NEXT: ret
-  %r = sext <4 x i16> %v0 to <4 x i64>
-  ret <4 x i64> %r
-}
diff --git a/test/CodeGen/ARM64/tbl.ll b/test/CodeGen/ARM64/tbl.ll
deleted file mode 100644
index e1edd21..0000000
--- a/test/CodeGen/ARM64/tbl.ll
+++ /dev/null
@@ -1,132 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-define <8 x i8> @tbl1_8b(<16 x i8> %A, <8 x i8> %B) nounwind {
-; CHECK: tbl1_8b
-; CHECK: tbl.8b
-  %tmp3 = call <8 x i8> @llvm.arm64.neon.tbl1.v8i8(<16 x i8> %A, <8 x i8> %B)
-  ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @tbl1_16b(<16 x i8> %A, <16 x i8> %B) nounwind {
-; CHECK: tbl1_16b
-; CHECK: tbl.16b
-  %tmp3 = call <16 x i8> @llvm.arm64.neon.tbl1.v16i8(<16 x i8> %A, <16 x i8> %B)
-  ret <16 x i8> %tmp3
-}
-
-define <8 x i8> @tbl2_8b(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) {
-; CHECK: tbl2_8b
-; CHECK: tbl.8b
-  %tmp3 = call <8 x i8> @llvm.arm64.neon.tbl2.v8i8(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C)
-  ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @tbl2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) {
-; CHECK: tbl2_16b
-; CHECK: tbl.16b
-  %tmp3 = call <16 x i8> @llvm.arm64.neon.tbl2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C)
-  ret <16 x i8> %tmp3
-}
-
-define <8 x i8> @tbl3_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) {
-; CHECK: tbl3_8b
-; CHECK: tbl.8b
-  %tmp3 = call <8 x i8> @llvm.arm64.neon.tbl3.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D)
-  ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @tbl3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) {
-; CHECK: tbl3_16b
-; CHECK: tbl.16b
-  %tmp3 = call <16 x i8> @llvm.arm64.neon.tbl3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D)
-  ret <16 x i8> %tmp3
-}
-
-define <8 x i8> @tbl4_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) {
-; CHECK: tbl4_8b
-; CHECK: tbl.8b
-  %tmp3 = call <8 x i8> @llvm.arm64.neon.tbl4.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E)
-  ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @tbl4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) {
-; CHECK: tbl4_16b
-; CHECK: tbl.16b
-  %tmp3 = call <16 x i8> @llvm.arm64.neon.tbl4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E)
-  ret <16 x i8> %tmp3
-}
-
-declare <8 x i8> @llvm.arm64.neon.tbl1.v8i8(<16 x i8>, <8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm64.neon.tbl1.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i8> @llvm.arm64.neon.tbl2.v8i8(<16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm64.neon.tbl2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i8> @llvm.arm64.neon.tbl3.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm64.neon.tbl3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i8> @llvm.arm64.neon.tbl4.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm64.neon.tbl4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
-
-define <8 x i8> @tbx1_8b(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C) nounwind {
-; CHECK: tbx1_8b
-; CHECK: tbx.8b
-  %tmp3 = call <8 x i8> @llvm.arm64.neon.tbx1.v8i8(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C)
-  ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @tbx1_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) nounwind {
-; CHECK: tbx1_16b
-; CHECK: tbx.16b
-  %tmp3 = call <16 x i8> @llvm.arm64.neon.tbx1.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C)
-  ret <16 x i8> %tmp3
-}
-
-define <8 x i8> @tbx2_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) {
-; CHECK: tbx2_8b
-; CHECK: tbx.8b
-  %tmp3 = call <8 x i8> @llvm.arm64.neon.tbx2.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D)
-  ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @tbx2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) {
-; CHECK: tbx2_16b
-; CHECK: tbx.16b
-  %tmp3 = call <16 x i8> @llvm.arm64.neon.tbx2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D)
-  ret <16 x i8> %tmp3
-}
-
-define <8 x i8> @tbx3_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) {
-; CHECK: tbx3_8b
-; CHECK: tbx.8b
-  %tmp3 = call <8 x i8> @llvm.arm64.neon.tbx3.v8i8(< 8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E)
-  ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @tbx3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) {
-; CHECK: tbx3_16b
-; CHECK: tbx.16b
-  %tmp3 = call <16 x i8> @llvm.arm64.neon.tbx3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E)
-  ret <16 x i8> %tmp3
-}
-
-define <8 x i8> @tbx4_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F) {
-; CHECK: tbx4_8b
-; CHECK: tbx.8b
-  %tmp3 = call <8 x i8> @llvm.arm64.neon.tbx4.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F)
-  ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @tbx4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F) {
-; CHECK: tbx4_16b
-; CHECK: tbx.16b
-  %tmp3 = call <16 x i8> @llvm.arm64.neon.tbx4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F)
-  ret <16 x i8> %tmp3
-}
-
-declare <8 x i8> @llvm.arm64.neon.tbx1.v8i8(<8 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm64.neon.tbx1.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i8> @llvm.arm64.neon.tbx2.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm64.neon.tbx2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i8> @llvm.arm64.neon.tbx3.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm64.neon.tbx3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i8> @llvm.arm64.neon.tbx4.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm64.neon.tbx4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
-
diff --git a/test/CodeGen/ARM64/trap.ll b/test/CodeGen/ARM64/trap.ll
deleted file mode 100644
index c9e0bea..0000000
--- a/test/CodeGen/ARM64/trap.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc < %s -march=arm64 | FileCheck %s
-define void @foo() nounwind {
-; CHECK: foo
-; CHECK: brk #1
-  tail call void @llvm.trap()
-  ret void
-}
-declare void @llvm.trap() nounwind
diff --git a/test/CodeGen/ARM64/trn.ll b/test/CodeGen/ARM64/trn.ll
deleted file mode 100644
index f467984..0000000
--- a/test/CodeGen/ARM64/trn.ll
+++ /dev/null
@@ -1,134 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-define <8 x i8> @vtrni8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: vtrni8:
-;CHECK: trn1.8b
-;CHECK: trn2.8b
-;CHECK-NEXT: add.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
-	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
-        %tmp5 = add <8 x i8> %tmp3, %tmp4
-	ret <8 x i8> %tmp5
-}
-
-define <4 x i16> @vtrni16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: vtrni16:
-;CHECK: trn1.4h
-;CHECK: trn2.4h
-;CHECK-NEXT: add.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
-	%tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
-        %tmp5 = add <4 x i16> %tmp3, %tmp4
-	ret <4 x i16> %tmp5
-}
-
-; 2xi32 TRN is redundant with ZIP
-define <2 x i32> @vtrni32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: vtrni32:
-;CHECK: zip1.2s
-;CHECK: zip2.2s
-;CHECK-NEXT: add.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> <i32 0, i32 2>
-	%tmp4 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 3>
-        %tmp5 = add <2 x i32> %tmp3, %tmp4
-	ret <2 x i32> %tmp5
-}
-
-define <2 x float> @vtrnf(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK-LABEL: vtrnf:
-;CHECK: zip1.2s
-;CHECK: zip2.2s
-;CHECK-NEXT: fadd.2s
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
-	%tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x i32> <i32 0, i32 2>
-	%tmp4 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x i32> <i32 1, i32 3>
-        %tmp5 = fadd <2 x float> %tmp3, %tmp4
-	ret <2 x float> %tmp5
-}
-
-define <16 x i8> @vtrnQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: vtrnQi8:
-;CHECK: trn1.16b
-;CHECK: trn2.16b
-;CHECK-NEXT: add.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
-	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
-	%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
-        %tmp5 = add <16 x i8> %tmp3, %tmp4
-	ret <16 x i8> %tmp5
-}
-
-define <8 x i16> @vtrnQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: vtrnQi16:
-;CHECK: trn1.8h
-;CHECK: trn2.8h
-;CHECK-NEXT: add.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
-	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
-	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
-        %tmp5 = add <8 x i16> %tmp3, %tmp4
-	ret <8 x i16> %tmp5
-}
-
-define <4 x i32> @vtrnQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: vtrnQi32:
-;CHECK: trn1.4s
-;CHECK: trn2.4s
-;CHECK-NEXT: add.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
-	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
-	%tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
-        %tmp5 = add <4 x i32> %tmp3, %tmp4
-	ret <4 x i32> %tmp5
-}
-
-define <4 x float> @vtrnQf(<4 x float>* %A, <4 x float>* %B) nounwind {
-;CHECK-LABEL: vtrnQf:
-;CHECK: trn1.4s
-;CHECK: trn2.4s
-;CHECK-NEXT: fadd.4s
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
-	%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
-	%tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
-        %tmp5 = fadd <4 x float> %tmp3, %tmp4
-	ret <4 x float> %tmp5
-}
-
-; Undef shuffle indices should not prevent matching to VTRN:
-
-define <8 x i8> @vtrni8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: vtrni8_undef:
-;CHECK: trn1.8b
-;CHECK: trn2.8b
-;CHECK-NEXT: add.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 2, i32 10, i32 undef, i32 12, i32 6, i32 14>
-	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 undef, i32 undef, i32 15>
-        %tmp5 = add <8 x i8> %tmp3, %tmp4
-	ret <8 x i8> %tmp5
-}
-
-define <8 x i16> @vtrnQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: vtrnQi16_undef:
-;CHECK: trn1.8h
-;CHECK: trn2.8h
-;CHECK-NEXT: add.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
-	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 undef, i32 undef, i32 4, i32 12, i32 6, i32 14>
-	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 undef, i32 3, i32 11, i32 5, i32 13, i32 undef, i32 undef>
-        %tmp5 = add <8 x i16> %tmp3, %tmp4
-	ret <8 x i16> %tmp5
-}
diff --git a/test/CodeGen/ARM64/trunc-store.ll b/test/CodeGen/ARM64/trunc-store.ll
deleted file mode 100644
index e65f5b5..0000000
--- a/test/CodeGen/ARM64/trunc-store.ll
+++ /dev/null
@@ -1,75 +0,0 @@
-; RUN: llc < %s -mtriple=arm64-apple-ios7.0 | FileCheck %s
-
-define void @bar(<8 x i16> %arg, <8 x i8>* %p) nounwind {
-; CHECK-LABEL: bar:
-; CHECK: xtn.8b v[[REG:[0-9]+]], v0
-; CHECK-NEXT: str d[[REG]], [x0]
-; CHECK-NEXT: ret
-  %tmp = trunc <8 x i16> %arg to <8 x i8>
-  store <8 x i8> %tmp, <8 x i8>* %p, align 8
-  ret void
-}
-
-@zptr8 = common global i8* null, align 8
-@zptr16 = common global i16* null, align 8
-@zptr32 = common global i32* null, align 8
-
-define void @fct32(i32 %arg, i64 %var) {
-; CHECK: fct32
-; CHECK: adrp [[GLOBALPAGE:x[0-9]+]], _zptr32@GOTPAGE
-; CHECK: ldr [[GLOBALOFF:x[0-9]+]], {{\[}}[[GLOBALPAGE]], _zptr32@GOTPAGEOFF]
-; CHECK: ldr [[GLOBALADDR:x[0-9]+]], {{\[}}[[GLOBALOFF]]]
-; w0 is %arg
-; CHECK-NEXT: sub w[[OFFSETREGNUM:[0-9]+]], w0, #1
-; w1 is %var truncated
-; CHECK-NEXT: str w1, {{\[}}[[GLOBALADDR]], x[[OFFSETREGNUM]], sxtw #2]
-; CHECK-NEXT: ret
-bb:
-  %.pre37 = load i32** @zptr32, align 8
-  %dec = add nsw i32 %arg, -1
-  %idxprom8 = sext i32 %dec to i64
-  %arrayidx9 = getelementptr inbounds i32* %.pre37, i64 %idxprom8
-  %tmp = trunc i64 %var to i32
-  store i32 %tmp, i32* %arrayidx9, align 4
-  ret void
-}
-
-define void @fct16(i32 %arg, i64 %var) {
-; CHECK: fct16
-; CHECK: adrp [[GLOBALPAGE:x[0-9]+]], _zptr16@GOTPAGE
-; CHECK: ldr [[GLOBALOFF:x[0-9]+]], {{\[}}[[GLOBALPAGE]], _zptr16@GOTPAGEOFF]
-; CHECK: ldr [[GLOBALADDR:x[0-9]+]], {{\[}}[[GLOBALOFF]]]
-; w0 is %arg
-; CHECK-NEXT: sub w[[OFFSETREGNUM:[0-9]+]], w0, #1
-; w1 is %var truncated
-; CHECK-NEXT: strh w1, {{\[}}[[GLOBALADDR]], x[[OFFSETREGNUM]], sxtw #1]
-; CHECK-NEXT: ret
-bb:
-  %.pre37 = load i16** @zptr16, align 8
-  %dec = add nsw i32 %arg, -1
-  %idxprom8 = sext i32 %dec to i64
-  %arrayidx9 = getelementptr inbounds i16* %.pre37, i64 %idxprom8
-  %tmp = trunc i64 %var to i16
-  store i16 %tmp, i16* %arrayidx9, align 4
-  ret void
-}
-
-define void @fct8(i32 %arg, i64 %var) {
-; CHECK: fct8
-; CHECK: adrp [[GLOBALPAGE:x[0-9]+]], _zptr8@GOTPAGE
-; CHECK: ldr [[GLOBALOFF:x[0-9]+]], {{\[}}[[GLOBALPAGE]], _zptr8@GOTPAGEOFF]
-; CHECK: ldr [[BASEADDR:x[0-9]+]], {{\[}}[[GLOBALOFF]]]
-; w0 is %arg
-; CHECK-NEXT: add [[ADDR:x[0-9]+]], [[BASEADDR]], w0, sxtw
-; w1 is %var truncated
-; CHECK-NEXT: sturb w1, {{\[}}[[ADDR]], #-1]
-; CHECK-NEXT: ret
-bb:
-  %.pre37 = load i8** @zptr8, align 8
-  %dec = add nsw i32 %arg, -1
-  %idxprom8 = sext i32 %dec to i64
-  %arrayidx9 = getelementptr inbounds i8* %.pre37, i64 %idxprom8
-  %tmp = trunc i64 %var to i8
-  store i8 %tmp, i8* %arrayidx9, align 4
-  ret void
-}
diff --git a/test/CodeGen/ARM64/umaxv.ll b/test/CodeGen/ARM64/umaxv.ll
deleted file mode 100644
index 15277d3..0000000
--- a/test/CodeGen/ARM64/umaxv.ll
+++ /dev/null
@@ -1,92 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-define i32 @vmax_u8x8(<8 x i8> %a) nounwind ssp {
-; CHECK-LABEL: vmax_u8x8:
-; CHECK: umaxv.8b        b[[REG:[0-9]+]], v0
-; CHECK: fmov    [[REG2:w[0-9]+]], s[[REG]]
-; CHECK-NOT: and
-; CHECK: cbz     [[REG2]],
-entry:
-  %vmaxv.i = tail call i32 @llvm.arm64.neon.umaxv.i32.v8i8(<8 x i8> %a) nounwind
-  %tmp = trunc i32 %vmaxv.i to i8
-  %tobool = icmp eq i8 %tmp, 0
-  br i1 %tobool, label %return, label %if.then
-
-if.then:
-  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() nounwind
-  br label %return
-
-return:
-  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
-  ret i32 %retval.0
-}
-
-declare i32 @bar(...)
-
-define i32 @vmax_u4x16(<4 x i16> %a) nounwind ssp {
-; CHECK-LABEL: vmax_u4x16:
-; CHECK: umaxv.4h        h[[REG:[0-9]+]], v0
-; CHECK: fmov    [[REG2:w[0-9]+]], s[[REG]]
-; CHECK-NOT: and
-; CHECK: cbz     [[REG2]],
-entry:
-  %vmaxv.i = tail call i32 @llvm.arm64.neon.umaxv.i32.v4i16(<4 x i16> %a) nounwind
-  %tmp = trunc i32 %vmaxv.i to i16
-  %tobool = icmp eq i16 %tmp, 0
-  br i1 %tobool, label %return, label %if.then
-
-if.then:
-  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() nounwind
-  br label %return
-
-return:
-  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
-  ret i32 %retval.0
-}
-
-define i32 @vmax_u8x16(<8 x i16> %a) nounwind ssp {
-; CHECK-LABEL: vmax_u8x16:
-; CHECK: umaxv.8h        h[[REG:[0-9]+]], v0
-; CHECK: fmov    [[REG2:w[0-9]+]], s[[REG]]
-; CHECK-NOT: and
-; CHECK: cbz     [[REG2]],
-entry:
-  %vmaxv.i = tail call i32 @llvm.arm64.neon.umaxv.i32.v8i16(<8 x i16> %a) nounwind
-  %tmp = trunc i32 %vmaxv.i to i16
-  %tobool = icmp eq i16 %tmp, 0
-  br i1 %tobool, label %return, label %if.then
-
-if.then:
-  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() nounwind
-  br label %return
-
-return:
-  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
-  ret i32 %retval.0
-}
-
-define i32 @vmax_u16x8(<16 x i8> %a) nounwind ssp {
-; CHECK-LABEL: vmax_u16x8:
-; CHECK: umaxv.16b        b[[REG:[0-9]+]], v0
-; CHECK: fmov     [[REG2:w[0-9]+]], s[[REG]]
-; CHECK-NOT: and
-; CHECK: cbz     [[REG2]],
-entry:
-  %vmaxv.i = tail call i32 @llvm.arm64.neon.umaxv.i32.v16i8(<16 x i8> %a) nounwind
-  %tmp = trunc i32 %vmaxv.i to i8
-  %tobool = icmp eq i8 %tmp, 0
-  br i1 %tobool, label %return, label %if.then
-
-if.then:
-  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() nounwind
-  br label %return
-
-return:
-  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
-  ret i32 %retval.0
-}
-
-declare i32 @llvm.arm64.neon.umaxv.i32.v16i8(<16 x i8>) nounwind readnone
-declare i32 @llvm.arm64.neon.umaxv.i32.v8i16(<8 x i16>) nounwind readnone
-declare i32 @llvm.arm64.neon.umaxv.i32.v4i16(<4 x i16>) nounwind readnone
-declare i32 @llvm.arm64.neon.umaxv.i32.v8i8(<8 x i8>) nounwind readnone
diff --git a/test/CodeGen/ARM64/uminv.ll b/test/CodeGen/ARM64/uminv.ll
deleted file mode 100644
index 440522f..0000000
--- a/test/CodeGen/ARM64/uminv.ll
+++ /dev/null
@@ -1,92 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-define i32 @vmin_u8x8(<8 x i8> %a) nounwind ssp {
-; CHECK-LABEL: vmin_u8x8:
-; CHECK: uminv.8b        b[[REG:[0-9]+]], v0
-; CHECK: fmov    [[REG2:w[0-9]+]], s[[REG]]
-; CHECK-NOT: and
-; CHECK: cbz     [[REG2]],
-entry:
-  %vminv.i = tail call i32 @llvm.arm64.neon.uminv.i32.v8i8(<8 x i8> %a) nounwind
-  %tmp = trunc i32 %vminv.i to i8
-  %tobool = icmp eq i8 %tmp, 0
-  br i1 %tobool, label %return, label %if.then
-
-if.then:
-  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() nounwind
-  br label %return
-
-return:
-  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
-  ret i32 %retval.0
-}
-
-declare i32 @bar(...)
-
-define i32 @vmin_u4x16(<4 x i16> %a) nounwind ssp {
-; CHECK-LABEL: vmin_u4x16:
-; CHECK: uminv.4h        h[[REG:[0-9]+]], v0
-; CHECK: fmov    [[REG2:w[0-9]+]], s[[REG]]
-; CHECK-NOT: and
-; CHECK: cbz     [[REG2]],
-entry:
-  %vminv.i = tail call i32 @llvm.arm64.neon.uminv.i32.v4i16(<4 x i16> %a) nounwind
-  %tmp = trunc i32 %vminv.i to i16
-  %tobool = icmp eq i16 %tmp, 0
-  br i1 %tobool, label %return, label %if.then
-
-if.then:
-  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() nounwind
-  br label %return
-
-return:
-  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
-  ret i32 %retval.0
-}
-
-define i32 @vmin_u8x16(<8 x i16> %a) nounwind ssp {
-; CHECK-LABEL: vmin_u8x16:
-; CHECK: uminv.8h        h[[REG:[0-9]+]], v0
-; CHECK: fmov    [[REG2:w[0-9]+]], s[[REG]]
-; CHECK-NOT: and
-; CHECK: cbz     [[REG2]],
-entry:
-  %vminv.i = tail call i32 @llvm.arm64.neon.uminv.i32.v8i16(<8 x i16> %a) nounwind
-  %tmp = trunc i32 %vminv.i to i16
-  %tobool = icmp eq i16 %tmp, 0
-  br i1 %tobool, label %return, label %if.then
-
-if.then:
-  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() nounwind
-  br label %return
-
-return:
-  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
-  ret i32 %retval.0
-}
-
-define i32 @vmin_u16x8(<16 x i8> %a) nounwind ssp {
-; CHECK-LABEL: vmin_u16x8:
-; CHECK: uminv.16b        b[[REG:[0-9]+]], v0
-; CHECK: fmov     [[REG2:w[0-9]+]], s[[REG]]
-; CHECK-NOT: and
-; CHECK: cbz     [[REG2]],
-entry:
-  %vminv.i = tail call i32 @llvm.arm64.neon.uminv.i32.v16i8(<16 x i8> %a) nounwind
-  %tmp = trunc i32 %vminv.i to i8
-  %tobool = icmp eq i8 %tmp, 0
-  br i1 %tobool, label %return, label %if.then
-
-if.then:
-  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() nounwind
-  br label %return
-
-return:
-  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
-  ret i32 %retval.0
-}
-
-declare i32 @llvm.arm64.neon.uminv.i32.v16i8(<16 x i8>) nounwind readnone
-declare i32 @llvm.arm64.neon.uminv.i32.v8i16(<8 x i16>) nounwind readnone
-declare i32 @llvm.arm64.neon.uminv.i32.v4i16(<4 x i16>) nounwind readnone
-declare i32 @llvm.arm64.neon.uminv.i32.v8i8(<8 x i8>) nounwind readnone
diff --git a/test/CodeGen/ARM64/umov.ll b/test/CodeGen/ARM64/umov.ll
deleted file mode 100644
index 7701874..0000000
--- a/test/CodeGen/ARM64/umov.ll
+++ /dev/null
@@ -1,33 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-define zeroext i8 @f1(<16 x i8> %a) {
-; CHECK-LABEL: f1:
-; CHECK: umov.b w0, v0[3]
-; CHECK-NEXT: ret
-  %vecext = extractelement <16 x i8> %a, i32 3
-  ret i8 %vecext
-}
-
-define zeroext i16 @f2(<4 x i16> %a) {
-; CHECK-LABEL: f2:
-; CHECK: umov.h w0, v0[2]
-; CHECK-NEXT: ret
-  %vecext = extractelement <4 x i16> %a, i32 2
-  ret i16 %vecext
-}
-
-define i32 @f3(<2 x i32> %a) {
-; CHECK-LABEL: f3:
-; CHECK: umov.s w0, v0[1]
-; CHECK-NEXT: ret
-  %vecext = extractelement <2 x i32> %a, i32 1
-  ret i32 %vecext
-}
-
-define i64 @f4(<2 x i64> %a) {
-; CHECK-LABEL: f4:
-; CHECK: umov.d x0, v0[1]
-; CHECK-NEXT: ret
-  %vecext = extractelement <2 x i64> %a, i32 1
-  ret i64 %vecext
-}
diff --git a/test/CodeGen/ARM64/uzp.ll b/test/CodeGen/ARM64/uzp.ll
deleted file mode 100644
index 60e16d0..0000000
--- a/test/CodeGen/ARM64/uzp.ll
+++ /dev/null
@@ -1,107 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-define <8 x i8> @vuzpi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: vuzpi8:
-;CHECK: uzp1.8b
-;CHECK: uzp2.8b
-;CHECK-NEXT: add.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-        %tmp5 = add <8 x i8> %tmp3, %tmp4
-	ret <8 x i8> %tmp5
-}
-
-define <4 x i16> @vuzpi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: vuzpi16:
-;CHECK: uzp1.4h
-;CHECK: uzp2.4h
-;CHECK-NEXT: add.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-	%tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-        %tmp5 = add <4 x i16> %tmp3, %tmp4
-	ret <4 x i16> %tmp5
-}
-
-define <16 x i8> @vuzpQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: vuzpQi8:
-;CHECK: uzp1.16b
-;CHECK: uzp2.16b
-;CHECK-NEXT: add.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
-	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
-	%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
-        %tmp5 = add <16 x i8> %tmp3, %tmp4
-	ret <16 x i8> %tmp5
-}
-
-define <8 x i16> @vuzpQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: vuzpQi16:
-;CHECK: uzp1.8h
-;CHECK: uzp2.8h
-;CHECK-NEXT: add.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
-	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-        %tmp5 = add <8 x i16> %tmp3, %tmp4
-	ret <8 x i16> %tmp5
-}
-
-define <4 x i32> @vuzpQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: vuzpQi32:
-;CHECK: uzp1.4s
-;CHECK: uzp2.4s
-;CHECK-NEXT: add.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
-	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-	%tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-        %tmp5 = add <4 x i32> %tmp3, %tmp4
-	ret <4 x i32> %tmp5
-}
-
-define <4 x float> @vuzpQf(<4 x float>* %A, <4 x float>* %B) nounwind {
-;CHECK-LABEL: vuzpQf:
-;CHECK: uzp1.4s
-;CHECK: uzp2.4s
-;CHECK-NEXT: fadd.4s
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
-	%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-	%tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-        %tmp5 = fadd <4 x float> %tmp3, %tmp4
-	ret <4 x float> %tmp5
-}
-
-; Undef shuffle indices should not prevent matching to VUZP:
-
-define <8 x i8> @vuzpi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: vuzpi8_undef:
-;CHECK: uzp1.8b
-;CHECK: uzp2.8b
-;CHECK-NEXT: add.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 8, i32 10, i32 12, i32 14>
-	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 13, i32 15>
-        %tmp5 = add <8 x i8> %tmp3, %tmp4
-	ret <8 x i8> %tmp5
-}
-
-define <8 x i16> @vuzpQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: vuzpQi16_undef:
-;CHECK: uzp1.8h
-;CHECK: uzp2.8h
-;CHECK-NEXT: add.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
-	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 14>
-	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 undef, i32 undef, i32 11, i32 13, i32 15>
-        %tmp5 = add <8 x i16> %tmp3, %tmp4
-	ret <8 x i16> %tmp5
-}
diff --git a/test/CodeGen/ARM64/vabs.ll b/test/CodeGen/ARM64/vabs.ll
deleted file mode 100644
index 0d8aa24..0000000
--- a/test/CodeGen/ARM64/vabs.ll
+++ /dev/null
@@ -1,804 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-
-define <8 x i16> @sabdl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: sabdl8h:
-;CHECK: sabdl.8h
-        %tmp1 = load <8 x i8>* %A
-        %tmp2 = load <8 x i8>* %B
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
-        %tmp4 = zext <8 x i8> %tmp3 to <8 x i16>
-        ret <8 x i16> %tmp4
-}
-
-define <4 x i32> @sabdl4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: sabdl4s:
-;CHECK: sabdl.4s
-        %tmp1 = load <4 x i16>* %A
-        %tmp2 = load <4 x i16>* %B
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
-        %tmp4 = zext <4 x i16> %tmp3 to <4 x i32>
-        ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @sabdl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: sabdl2d:
-;CHECK: sabdl.2d
-        %tmp1 = load <2 x i32>* %A
-        %tmp2 = load <2 x i32>* %B
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
-        %tmp4 = zext <2 x i32> %tmp3 to <2 x i64>
-        ret <2 x i64> %tmp4
-}
-
-define <8 x i16> @sabdl2_8h(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: sabdl2_8h:
-;CHECK: sabdl2.8h
-        %load1 = load <16 x i8>* %A
-        %load2 = load <16 x i8>* %B
-        %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-        %tmp2 = shufflevector <16 x i8> %load2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
-        %tmp4 = zext <8 x i8> %tmp3 to <8 x i16>
-        ret <8 x i16> %tmp4
-}
-
-define <4 x i32> @sabdl2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: sabdl2_4s:
-;CHECK: sabdl2.4s
-        %load1 = load <8 x i16>* %A
-        %load2 = load <8 x i16>* %B
-        %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-        %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
-        %tmp4 = zext <4 x i16> %tmp3 to <4 x i32>
-        ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @sabdl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: sabdl2_2d:
-;CHECK: sabdl2.2d
-        %load1 = load <4 x i32>* %A
-        %load2 = load <4 x i32>* %B
-        %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-        %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
-        %tmp4 = zext <2 x i32> %tmp3 to <2 x i64>
-        ret <2 x i64> %tmp4
-}
-
-define <8 x i16> @uabdl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: uabdl8h:
-;CHECK: uabdl.8h
-  %tmp1 = load <8 x i8>* %A
-  %tmp2 = load <8 x i8>* %B
-  %tmp3 = call <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
-  %tmp4 = zext <8 x i8> %tmp3 to <8 x i16>
-  ret <8 x i16> %tmp4
-}
-
-define <4 x i32> @uabdl4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: uabdl4s:
-;CHECK: uabdl.4s
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
-  %tmp3 = call <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
-  %tmp4 = zext <4 x i16> %tmp3 to <4 x i32>
-  ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @uabdl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: uabdl2d:
-;CHECK: uabdl.2d
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
-  %tmp3 = call <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
-  %tmp4 = zext <2 x i32> %tmp3 to <2 x i64>
-  ret <2 x i64> %tmp4
-}
-
-define <8 x i16> @uabdl2_8h(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: uabdl2_8h:
-;CHECK: uabdl2.8h
-  %load1 = load <16 x i8>* %A
-  %load2 = load <16 x i8>* %B
-  %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %tmp2 = shufflevector <16 x i8> %load2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-
-  %tmp3 = call <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
-  %tmp4 = zext <8 x i8> %tmp3 to <8 x i16>
-  ret <8 x i16> %tmp4
-}
-
-define <4 x i32> @uabdl2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: uabdl2_4s:
-;CHECK: uabdl2.4s
-  %load1 = load <8 x i16>* %A
-  %load2 = load <8 x i16>* %B
-  %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %tmp3 = call <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
-  %tmp4 = zext <4 x i16> %tmp3 to <4 x i32>
-  ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @uabdl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: uabdl2_2d:
-;CHECK: uabdl2.2d
-  %load1 = load <4 x i32>* %A
-  %load2 = load <4 x i32>* %B
-  %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %tmp3 = call <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
-  %tmp4 = zext <2 x i32> %tmp3 to <2 x i64>
-  ret <2 x i64> %tmp4
-}
-
-define <2 x float> @fabd_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK-LABEL: fabd_2s:
-;CHECK: fabd.2s
-        %tmp1 = load <2 x float>* %A
-        %tmp2 = load <2 x float>* %B
-        %tmp3 = call <2 x float> @llvm.arm64.neon.fabd.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
-        ret <2 x float> %tmp3
-}
-
-define <4 x float> @fabd_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
-;CHECK-LABEL: fabd_4s:
-;CHECK: fabd.4s
-        %tmp1 = load <4 x float>* %A
-        %tmp2 = load <4 x float>* %B
-        %tmp3 = call <4 x float> @llvm.arm64.neon.fabd.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
-        ret <4 x float> %tmp3
-}
-
-define <2 x double> @fabd_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
-;CHECK-LABEL: fabd_2d:
-;CHECK: fabd.2d
-        %tmp1 = load <2 x double>* %A
-        %tmp2 = load <2 x double>* %B
-        %tmp3 = call <2 x double> @llvm.arm64.neon.fabd.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
-        ret <2 x double> %tmp3
-}
-
-declare <2 x float> @llvm.arm64.neon.fabd.v2f32(<2 x float>, <2 x float>) nounwind readnone
-declare <4 x float> @llvm.arm64.neon.fabd.v4f32(<4 x float>, <4 x float>) nounwind readnone
-declare <2 x double> @llvm.arm64.neon.fabd.v2f64(<2 x double>, <2 x double>) nounwind readnone
-
-define <8 x i8> @sabd_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: sabd_8b:
-;CHECK: sabd.8b
-        %tmp1 = load <8 x i8>* %A
-        %tmp2 = load <8 x i8>* %B
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
-        ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @sabd_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: sabd_16b:
-;CHECK: sabd.16b
-        %tmp1 = load <16 x i8>* %A
-        %tmp2 = load <16 x i8>* %B
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.sabd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
-        ret <16 x i8> %tmp3
-}
-
-define <4 x i16> @sabd_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: sabd_4h:
-;CHECK: sabd.4h
-        %tmp1 = load <4 x i16>* %A
-        %tmp2 = load <4 x i16>* %B
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
-        ret <4 x i16> %tmp3
-}
-
-define <8 x i16> @sabd_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: sabd_8h:
-;CHECK: sabd.8h
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i16>* %B
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.sabd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
-        ret <8 x i16> %tmp3
-}
-
-define <2 x i32> @sabd_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: sabd_2s:
-;CHECK: sabd.2s
-        %tmp1 = load <2 x i32>* %A
-        %tmp2 = load <2 x i32>* %B
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
-        ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @sabd_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: sabd_4s:
-;CHECK: sabd.4s
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i32>* %B
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.sabd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
-        ret <4 x i32> %tmp3
-}
-
-declare <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm64.neon.sabd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.sabd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.sabd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-
-define <8 x i8> @uabd_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: uabd_8b:
-;CHECK: uabd.8b
-        %tmp1 = load <8 x i8>* %A
-        %tmp2 = load <8 x i8>* %B
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
-        ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @uabd_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: uabd_16b:
-;CHECK: uabd.16b
-        %tmp1 = load <16 x i8>* %A
-        %tmp2 = load <16 x i8>* %B
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.uabd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
-        ret <16 x i8> %tmp3
-}
-
-define <4 x i16> @uabd_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: uabd_4h:
-;CHECK: uabd.4h
-        %tmp1 = load <4 x i16>* %A
-        %tmp2 = load <4 x i16>* %B
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
-        ret <4 x i16> %tmp3
-}
-
-define <8 x i16> @uabd_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: uabd_8h:
-;CHECK: uabd.8h
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i16>* %B
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.uabd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
-        ret <8 x i16> %tmp3
-}
-
-define <2 x i32> @uabd_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: uabd_2s:
-;CHECK: uabd.2s
-        %tmp1 = load <2 x i32>* %A
-        %tmp2 = load <2 x i32>* %B
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
-        ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @uabd_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: uabd_4s:
-;CHECK: uabd.4s
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i32>* %B
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.uabd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
-        ret <4 x i32> %tmp3
-}
-
-declare <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm64.neon.uabd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.uabd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.uabd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-
-define <8 x i8> @sqabs_8b(<8 x i8>* %A) nounwind {
-;CHECK-LABEL: sqabs_8b:
-;CHECK: sqabs.8b
-        %tmp1 = load <8 x i8>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sqabs.v8i8(<8 x i8> %tmp1)
-        ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @sqabs_16b(<16 x i8>* %A) nounwind {
-;CHECK-LABEL: sqabs_16b:
-;CHECK: sqabs.16b
-        %tmp1 = load <16 x i8>* %A
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.sqabs.v16i8(<16 x i8> %tmp1)
-        ret <16 x i8> %tmp3
-}
-
-define <4 x i16> @sqabs_4h(<4 x i16>* %A) nounwind {
-;CHECK-LABEL: sqabs_4h:
-;CHECK: sqabs.4h
-        %tmp1 = load <4 x i16>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sqabs.v4i16(<4 x i16> %tmp1)
-        ret <4 x i16> %tmp3
-}
-
-define <8 x i16> @sqabs_8h(<8 x i16>* %A) nounwind {
-;CHECK-LABEL: sqabs_8h:
-;CHECK: sqabs.8h
-        %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.sqabs.v8i16(<8 x i16> %tmp1)
-        ret <8 x i16> %tmp3
-}
-
-define <2 x i32> @sqabs_2s(<2 x i32>* %A) nounwind {
-;CHECK-LABEL: sqabs_2s:
-;CHECK: sqabs.2s
-        %tmp1 = load <2 x i32>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sqabs.v2i32(<2 x i32> %tmp1)
-        ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @sqabs_4s(<4 x i32>* %A) nounwind {
-;CHECK-LABEL: sqabs_4s:
-;CHECK: sqabs.4s
-        %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.sqabs.v4i32(<4 x i32> %tmp1)
-        ret <4 x i32> %tmp3
-}
-
-declare <8 x i8> @llvm.arm64.neon.sqabs.v8i8(<8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm64.neon.sqabs.v16i8(<16 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.sqabs.v4i16(<4 x i16>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.sqabs.v8i16(<8 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.sqabs.v2i32(<2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.sqabs.v4i32(<4 x i32>) nounwind readnone
-
-define <8 x i8> @sqneg_8b(<8 x i8>* %A) nounwind {
-;CHECK-LABEL: sqneg_8b:
-;CHECK: sqneg.8b
-        %tmp1 = load <8 x i8>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sqneg.v8i8(<8 x i8> %tmp1)
-        ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @sqneg_16b(<16 x i8>* %A) nounwind {
-;CHECK-LABEL: sqneg_16b:
-;CHECK: sqneg.16b
-        %tmp1 = load <16 x i8>* %A
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.sqneg.v16i8(<16 x i8> %tmp1)
-        ret <16 x i8> %tmp3
-}
-
-define <4 x i16> @sqneg_4h(<4 x i16>* %A) nounwind {
-;CHECK-LABEL: sqneg_4h:
-;CHECK: sqneg.4h
-        %tmp1 = load <4 x i16>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sqneg.v4i16(<4 x i16> %tmp1)
-        ret <4 x i16> %tmp3
-}
-
-define <8 x i16> @sqneg_8h(<8 x i16>* %A) nounwind {
-;CHECK-LABEL: sqneg_8h:
-;CHECK: sqneg.8h
-        %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.sqneg.v8i16(<8 x i16> %tmp1)
-        ret <8 x i16> %tmp3
-}
-
-define <2 x i32> @sqneg_2s(<2 x i32>* %A) nounwind {
-;CHECK-LABEL: sqneg_2s:
-;CHECK: sqneg.2s
-        %tmp1 = load <2 x i32>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sqneg.v2i32(<2 x i32> %tmp1)
-        ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @sqneg_4s(<4 x i32>* %A) nounwind {
-;CHECK-LABEL: sqneg_4s:
-;CHECK: sqneg.4s
-        %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.sqneg.v4i32(<4 x i32> %tmp1)
-        ret <4 x i32> %tmp3
-}
-
-declare <8 x i8> @llvm.arm64.neon.sqneg.v8i8(<8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm64.neon.sqneg.v16i8(<16 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.sqneg.v4i16(<4 x i16>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.sqneg.v8i16(<8 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.sqneg.v2i32(<2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.sqneg.v4i32(<4 x i32>) nounwind readnone
-
-define <8 x i8> @abs_8b(<8 x i8>* %A) nounwind {
-;CHECK-LABEL: abs_8b:
-;CHECK: abs.8b
-        %tmp1 = load <8 x i8>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.abs.v8i8(<8 x i8> %tmp1)
-        ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @abs_16b(<16 x i8>* %A) nounwind {
-;CHECK-LABEL: abs_16b:
-;CHECK: abs.16b
-        %tmp1 = load <16 x i8>* %A
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.abs.v16i8(<16 x i8> %tmp1)
-        ret <16 x i8> %tmp3
-}
-
-define <4 x i16> @abs_4h(<4 x i16>* %A) nounwind {
-;CHECK-LABEL: abs_4h:
-;CHECK: abs.4h
-        %tmp1 = load <4 x i16>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.abs.v4i16(<4 x i16> %tmp1)
-        ret <4 x i16> %tmp3
-}
-
-define <8 x i16> @abs_8h(<8 x i16>* %A) nounwind {
-;CHECK-LABEL: abs_8h:
-;CHECK: abs.8h
-        %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.abs.v8i16(<8 x i16> %tmp1)
-        ret <8 x i16> %tmp3
-}
-
-define <2 x i32> @abs_2s(<2 x i32>* %A) nounwind {
-;CHECK-LABEL: abs_2s:
-;CHECK: abs.2s
-        %tmp1 = load <2 x i32>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.abs.v2i32(<2 x i32> %tmp1)
-        ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @abs_4s(<4 x i32>* %A) nounwind {
-;CHECK-LABEL: abs_4s:
-;CHECK: abs.4s
-        %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.abs.v4i32(<4 x i32> %tmp1)
-        ret <4 x i32> %tmp3
-}
-
-define <1 x i64> @abs_1d(<1 x i64> %A) nounwind {
-; CHECK-LABEL: abs_1d:
-; CHECK: abs d0, d0
-  %abs = call <1 x i64> @llvm.arm64.neon.abs.v1i64(<1 x i64> %A)
-  ret <1 x i64> %abs
-}
-
-define i64 @abs_1d_honestly(i64 %A) nounwind {
-; CHECK-LABEL: abs_1d_honestly:
-; CHECK: abs d0, d0
-  %abs = call i64 @llvm.arm64.neon.abs.i64(i64 %A)
-  ret i64 %abs
-}
-
-declare <8 x i8> @llvm.arm64.neon.abs.v8i8(<8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm64.neon.abs.v16i8(<16 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.abs.v4i16(<4 x i16>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.abs.v8i16(<8 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.abs.v2i32(<2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.abs.v4i32(<4 x i32>) nounwind readnone
-declare <1 x i64> @llvm.arm64.neon.abs.v1i64(<1 x i64>) nounwind readnone
-declare i64 @llvm.arm64.neon.abs.i64(i64) nounwind readnone
-
-define <8 x i16> @sabal8h(<8 x i8>* %A, <8 x i8>* %B,  <8 x i16>* %C) nounwind {
-;CHECK-LABEL: sabal8h:
-;CHECK: sabal.8h
-        %tmp1 = load <8 x i8>* %A
-        %tmp2 = load <8 x i8>* %B
-        %tmp3 = load <8 x i16>* %C
-        %tmp4 = call <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
-        %tmp4.1 = zext <8 x i8> %tmp4 to <8 x i16>
-        %tmp5 = add <8 x i16> %tmp3, %tmp4.1
-        ret <8 x i16> %tmp5
-}
-
-define <4 x i32> @sabal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
-;CHECK-LABEL: sabal4s:
-;CHECK: sabal.4s
-        %tmp1 = load <4 x i16>* %A
-        %tmp2 = load <4 x i16>* %B
-        %tmp3 = load <4 x i32>* %C
-        %tmp4 = call <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
-        %tmp4.1 = zext <4 x i16> %tmp4 to <4 x i32>
-        %tmp5 = add <4 x i32> %tmp3, %tmp4.1
-        ret <4 x i32> %tmp5
-}
-
-define <2 x i64> @sabal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
-;CHECK-LABEL: sabal2d:
-;CHECK: sabal.2d
-        %tmp1 = load <2 x i32>* %A
-        %tmp2 = load <2 x i32>* %B
-        %tmp3 = load <2 x i64>* %C
-        %tmp4 = call <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
-        %tmp4.1 = zext <2 x i32> %tmp4 to <2 x i64>
-        %tmp4.1.1 = zext <2 x i32> %tmp4 to <2 x i64>
-        %tmp5 = add <2 x i64> %tmp3, %tmp4.1
-        ret <2 x i64> %tmp5
-}
-
-define <8 x i16> @sabal2_8h(<16 x i8>* %A, <16 x i8>* %B, <8 x i16>* %C) nounwind {
-;CHECK-LABEL: sabal2_8h:
-;CHECK: sabal2.8h
-        %load1 = load <16 x i8>* %A
-        %load2 = load <16 x i8>* %B
-        %tmp3 = load <8 x i16>* %C
-        %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-        %tmp2 = shufflevector <16 x i8> %load2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-        %tmp4 = call <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
-        %tmp4.1 = zext <8 x i8> %tmp4 to <8 x i16>
-        %tmp5 = add <8 x i16> %tmp3, %tmp4.1
-        ret <8 x i16> %tmp5
-}
-
-define <4 x i32> @sabal2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwind {
-;CHECK-LABEL: sabal2_4s:
-;CHECK: sabal2.4s
-        %load1 = load <8 x i16>* %A
-        %load2 = load <8 x i16>* %B
-        %tmp3 = load <4 x i32>* %C
-        %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-        %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-        %tmp4 = call <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
-        %tmp4.1 = zext <4 x i16> %tmp4 to <4 x i32>
-        %tmp5 = add <4 x i32> %tmp3, %tmp4.1
-        ret <4 x i32> %tmp5
-}
-
-define <2 x i64> @sabal2_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwind {
-;CHECK-LABEL: sabal2_2d:
-;CHECK: sabal2.2d
-        %load1 = load <4 x i32>* %A
-        %load2 = load <4 x i32>* %B
-        %tmp3 = load <2 x i64>* %C
-        %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-        %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-        %tmp4 = call <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
-        %tmp4.1 = zext <2 x i32> %tmp4 to <2 x i64>
-        %tmp5 = add <2 x i64> %tmp3, %tmp4.1
-        ret <2 x i64> %tmp5
-}
-
-define <8 x i16> @uabal8h(<8 x i8>* %A, <8 x i8>* %B,  <8 x i16>* %C) nounwind {
-;CHECK-LABEL: uabal8h:
-;CHECK: uabal.8h
-        %tmp1 = load <8 x i8>* %A
-        %tmp2 = load <8 x i8>* %B
-        %tmp3 = load <8 x i16>* %C
-        %tmp4 = call <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
-        %tmp4.1 = zext <8 x i8> %tmp4 to <8 x i16>
-        %tmp5 = add <8 x i16> %tmp3, %tmp4.1
-        ret <8 x i16> %tmp5
-}
-
-define <4 x i32> @uabal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
-;CHECK-LABEL: uabal4s:
-;CHECK: uabal.4s
-        %tmp1 = load <4 x i16>* %A
-        %tmp2 = load <4 x i16>* %B
-        %tmp3 = load <4 x i32>* %C
-        %tmp4 = call <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
-        %tmp4.1 = zext <4 x i16> %tmp4 to <4 x i32>
-        %tmp5 = add <4 x i32> %tmp3, %tmp4.1
-        ret <4 x i32> %tmp5
-}
-
-define <2 x i64> @uabal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
-;CHECK-LABEL: uabal2d:
-;CHECK: uabal.2d
-        %tmp1 = load <2 x i32>* %A
-        %tmp2 = load <2 x i32>* %B
-        %tmp3 = load <2 x i64>* %C
-        %tmp4 = call <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
-        %tmp4.1 = zext <2 x i32> %tmp4 to <2 x i64>
-        %tmp5 = add <2 x i64> %tmp3, %tmp4.1
-        ret <2 x i64> %tmp5
-}
-
-define <8 x i16> @uabal2_8h(<16 x i8>* %A, <16 x i8>* %B, <8 x i16>* %C) nounwind {
-;CHECK-LABEL: uabal2_8h:
-;CHECK: uabal2.8h
-        %load1 = load <16 x i8>* %A
-        %load2 = load <16 x i8>* %B
-        %tmp3 = load <8 x i16>* %C
-        %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-        %tmp2 = shufflevector <16 x i8> %load2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-        %tmp4 = call <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
-        %tmp4.1 = zext <8 x i8> %tmp4 to <8 x i16>
-        %tmp5 = add <8 x i16> %tmp3, %tmp4.1
-        ret <8 x i16> %tmp5
-}
-
-define <4 x i32> @uabal2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwind {
-;CHECK-LABEL: uabal2_4s:
-;CHECK: uabal2.4s
-        %load1 = load <8 x i16>* %A
-        %load2 = load <8 x i16>* %B
-        %tmp3 = load <4 x i32>* %C
-        %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-        %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-        %tmp4 = call <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
-        %tmp4.1 = zext <4 x i16> %tmp4 to <4 x i32>
-        %tmp5 = add <4 x i32> %tmp3, %tmp4.1
-        ret <4 x i32> %tmp5
-}
-
-define <2 x i64> @uabal2_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwind {
-;CHECK-LABEL: uabal2_2d:
-;CHECK: uabal2.2d
-        %load1 = load <4 x i32>* %A
-        %load2 = load <4 x i32>* %B
-        %tmp3 = load <2 x i64>* %C
-        %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-        %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-        %tmp4 = call <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
-        %tmp4.1 = zext <2 x i32> %tmp4 to <2 x i64>
-        %tmp5 = add <2 x i64> %tmp3, %tmp4.1
-        ret <2 x i64> %tmp5
-}
-
-define <8 x i8> @saba_8b(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
-;CHECK-LABEL: saba_8b:
-;CHECK: saba.8b
-        %tmp1 = load <8 x i8>* %A
-        %tmp2 = load <8 x i8>* %B
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
-        %tmp4 = load <8 x i8>* %C
-        %tmp5 = add <8 x i8> %tmp3, %tmp4
-        ret <8 x i8> %tmp5
-}
-
-define <16 x i8> @saba_16b(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
-;CHECK-LABEL: saba_16b:
-;CHECK: saba.16b
-        %tmp1 = load <16 x i8>* %A
-        %tmp2 = load <16 x i8>* %B
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.sabd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
-        %tmp4 = load <16 x i8>* %C
-        %tmp5 = add <16 x i8> %tmp3, %tmp4
-        ret <16 x i8> %tmp5
-}
-
-define <4 x i16> @saba_4h(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
-;CHECK-LABEL: saba_4h:
-;CHECK: saba.4h
-        %tmp1 = load <4 x i16>* %A
-        %tmp2 = load <4 x i16>* %B
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
-        %tmp4 = load <4 x i16>* %C
-        %tmp5 = add <4 x i16> %tmp3, %tmp4
-        ret <4 x i16> %tmp5
-}
-
-define <8 x i16> @saba_8h(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
-;CHECK-LABEL: saba_8h:
-;CHECK: saba.8h
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i16>* %B
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.sabd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
-        %tmp4 = load <8 x i16>* %C
-        %tmp5 = add <8 x i16> %tmp3, %tmp4
-        ret <8 x i16> %tmp5
-}
-
-define <2 x i32> @saba_2s(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
-;CHECK-LABEL: saba_2s:
-;CHECK: saba.2s
-        %tmp1 = load <2 x i32>* %A
-        %tmp2 = load <2 x i32>* %B
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
-        %tmp4 = load <2 x i32>* %C
-        %tmp5 = add <2 x i32> %tmp3, %tmp4
-        ret <2 x i32> %tmp5
-}
-
-define <4 x i32> @saba_4s(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
-;CHECK-LABEL: saba_4s:
-;CHECK: saba.4s
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i32>* %B
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.sabd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
-        %tmp4 = load <4 x i32>* %C
-        %tmp5 = add <4 x i32> %tmp3, %tmp4
-        ret <4 x i32> %tmp5
-}
-
-define <8 x i8> @uaba_8b(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
-;CHECK-LABEL: uaba_8b:
-;CHECK: uaba.8b
-        %tmp1 = load <8 x i8>* %A
-        %tmp2 = load <8 x i8>* %B
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
-        %tmp4 = load <8 x i8>* %C
-        %tmp5 = add <8 x i8> %tmp3, %tmp4
-        ret <8 x i8> %tmp5
-}
-
-define <16 x i8> @uaba_16b(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
-;CHECK-LABEL: uaba_16b:
-;CHECK: uaba.16b
-        %tmp1 = load <16 x i8>* %A
-        %tmp2 = load <16 x i8>* %B
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.uabd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
-        %tmp4 = load <16 x i8>* %C
-        %tmp5 = add <16 x i8> %tmp3, %tmp4
-        ret <16 x i8> %tmp5
-}
-
-define <4 x i16> @uaba_4h(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
-;CHECK-LABEL: uaba_4h:
-;CHECK: uaba.4h
-        %tmp1 = load <4 x i16>* %A
-        %tmp2 = load <4 x i16>* %B
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
-        %tmp4 = load <4 x i16>* %C
-        %tmp5 = add <4 x i16> %tmp3, %tmp4
-        ret <4 x i16> %tmp5
-}
-
-define <8 x i16> @uaba_8h(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
-;CHECK-LABEL: uaba_8h:
-;CHECK: uaba.8h
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i16>* %B
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.uabd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
-        %tmp4 = load <8 x i16>* %C
-        %tmp5 = add <8 x i16> %tmp3, %tmp4
-        ret <8 x i16> %tmp5
-}
-
-define <2 x i32> @uaba_2s(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
-;CHECK-LABEL: uaba_2s:
-;CHECK: uaba.2s
-        %tmp1 = load <2 x i32>* %A
-        %tmp2 = load <2 x i32>* %B
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
-        %tmp4 = load <2 x i32>* %C
-        %tmp5 = add <2 x i32> %tmp3, %tmp4
-        ret <2 x i32> %tmp5
-}
-
-define <4 x i32> @uaba_4s(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
-;CHECK-LABEL: uaba_4s:
-;CHECK: uaba.4s
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i32>* %B
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.uabd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
-        %tmp4 = load <4 x i32>* %C
-        %tmp5 = add <4 x i32> %tmp3, %tmp4
-        ret <4 x i32> %tmp5
-}
-
-; Scalar FABD
-define float @fabds(float %a, float %b) nounwind {
-; CHECK-LABEL: fabds:
-; CHECK: fabd s0, s0, s1
-  %vabd.i = tail call float @llvm.arm64.sisd.fabd.f32(float %a, float %b) nounwind
-  ret float %vabd.i
-}
-
-define double @fabdd(double %a, double %b) nounwind {
-; CHECK-LABEL: fabdd:
-; CHECK: fabd d0, d0, d1
-  %vabd.i = tail call double @llvm.arm64.sisd.fabd.f64(double %a, double %b) nounwind
-  ret double %vabd.i
-}
-
-declare double @llvm.arm64.sisd.fabd.f64(double, double) nounwind readnone
-declare float @llvm.arm64.sisd.fabd.f32(float, float) nounwind readnone
-
-define <2 x i64> @uabdl_from_extract_dup(<4 x i32> %lhs, i32 %rhs) {
-; CHECK-LABEL: uabdl_from_extract_dup:
-; CHECK-NOT: ext.16b
-; CHECK: uabdl2.2d
-  %rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0
-  %rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1
-
-  %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-
-  %res = tail call <2 x i32> @llvm.arm64.neon.uabd.v2i32(<2 x i32> %lhs.high, <2 x i32> %rhsvec) nounwind
-  %res1 = zext <2 x i32> %res to <2 x i64>
-  ret <2 x i64> %res1
-}
-
-define <2 x i64> @sabdl_from_extract_dup(<4 x i32> %lhs, i32 %rhs) {
-; CHECK-LABEL: sabdl_from_extract_dup:
-; CHECK-NOT: ext.16b
-; CHECK: sabdl2.2d
-  %rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0
-  %rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1
-
-  %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-
-  %res = tail call <2 x i32> @llvm.arm64.neon.sabd.v2i32(<2 x i32> %lhs.high, <2 x i32> %rhsvec) nounwind
-  %res1 = zext <2 x i32> %res to <2 x i64>
-  ret <2 x i64> %res1
-}
diff --git a/test/CodeGen/ARM64/vadd.ll b/test/CodeGen/ARM64/vadd.ll
deleted file mode 100644
index f674c6d..0000000
--- a/test/CodeGen/ARM64/vadd.ll
+++ /dev/null
@@ -1,941 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -asm-verbose=false | FileCheck %s
-
-define <8 x i8> @addhn8b(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: addhn8b:
-;CHECK: addhn.8b
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i16>* %B
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.addhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
-        ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @addhn4h(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: addhn4h:
-;CHECK: addhn.4h
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i32>* %B
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.addhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
-        ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @addhn2s(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK-LABEL: addhn2s:
-;CHECK: addhn.2s
-        %tmp1 = load <2 x i64>* %A
-        %tmp2 = load <2 x i64>* %B
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.addhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
-        ret <2 x i32> %tmp3
-}
-
-define <16 x i8> @addhn2_16b(<8 x i16> %a, <8 x i16> %b) nounwind {
-;CHECK-LABEL: addhn2_16b:
-;CHECK: addhn.8b
-;CHECK-NEXT: addhn2.16b
-  %vaddhn2.i = tail call <8 x i8> @llvm.arm64.neon.addhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind
-  %vaddhn_high2.i = tail call <8 x i8> @llvm.arm64.neon.addhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind
-  %res = shufflevector <8 x i8> %vaddhn2.i, <8 x i8> %vaddhn_high2.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  ret <16 x i8> %res
-}
-
-define <8 x i16> @addhn2_8h(<4 x i32> %a, <4 x i32> %b) nounwind {
-;CHECK-LABEL: addhn2_8h:
-;CHECK: addhn.4h
-;CHECK-NEXT: addhn2.8h
-  %vaddhn2.i = tail call <4 x i16> @llvm.arm64.neon.addhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind
-  %vaddhn_high3.i = tail call <4 x i16> @llvm.arm64.neon.addhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind
-  %res = shufflevector <4 x i16> %vaddhn2.i, <4 x i16> %vaddhn_high3.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  ret <8 x i16> %res
-}
-
-define <4 x i32> @addhn2_4s(<2 x i64> %a, <2 x i64> %b) nounwind {
-;CHECK-LABEL: addhn2_4s:
-;CHECK: addhn.2s
-;CHECK-NEXT: addhn2.4s
-  %vaddhn2.i = tail call <2 x i32> @llvm.arm64.neon.addhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind
-  %vaddhn_high3.i = tail call <2 x i32> @llvm.arm64.neon.addhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind
-  %res = shufflevector <2 x i32> %vaddhn2.i, <2 x i32> %vaddhn_high3.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  ret <4 x i32> %res
-}
-
-declare <2 x i32> @llvm.arm64.neon.addhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.addhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
-declare <8 x i8> @llvm.arm64.neon.addhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
-
-
-define <8 x i8> @raddhn8b(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: raddhn8b:
-;CHECK: raddhn.8b
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i16>* %B
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.raddhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
-        ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @raddhn4h(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: raddhn4h:
-;CHECK: raddhn.4h
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i32>* %B
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.raddhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
-        ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @raddhn2s(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK-LABEL: raddhn2s:
-;CHECK: raddhn.2s
-        %tmp1 = load <2 x i64>* %A
-        %tmp2 = load <2 x i64>* %B
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.raddhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
-        ret <2 x i32> %tmp3
-}
-
-define <16 x i8> @raddhn2_16b(<8 x i16> %a, <8 x i16> %b) nounwind {
-;CHECK-LABEL: raddhn2_16b:
-;CHECK: raddhn.8b
-;CHECK-NEXT: raddhn2.16b
-  %vraddhn2.i = tail call <8 x i8> @llvm.arm64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind
-  %vraddhn_high2.i = tail call <8 x i8> @llvm.arm64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind
-  %res = shufflevector <8 x i8> %vraddhn2.i, <8 x i8> %vraddhn_high2.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  ret <16 x i8> %res
-}
-
-define <8 x i16> @raddhn2_8h(<4 x i32> %a, <4 x i32> %b) nounwind {
-;CHECK-LABEL: raddhn2_8h:
-;CHECK: raddhn.4h
-;CHECK-NEXT: raddhn2.8h
-  %vraddhn2.i = tail call <4 x i16> @llvm.arm64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind
-  %vraddhn_high3.i = tail call <4 x i16> @llvm.arm64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind
-  %res = shufflevector <4 x i16> %vraddhn2.i, <4 x i16> %vraddhn_high3.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  ret <8 x i16> %res
-}
-
-define <4 x i32> @raddhn2_4s(<2 x i64> %a, <2 x i64> %b) nounwind {
-;CHECK-LABEL: raddhn2_4s:
-;CHECK: raddhn.2s
-;CHECK-NEXT: raddhn2.4s
-  %vraddhn2.i = tail call <2 x i32> @llvm.arm64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind
-  %vraddhn_high3.i = tail call <2 x i32> @llvm.arm64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind
-  %res = shufflevector <2 x i32> %vraddhn2.i, <2 x i32> %vraddhn_high3.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  ret <4 x i32> %res
-}
-
-declare <2 x i32> @llvm.arm64.neon.raddhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.raddhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
-declare <8 x i8> @llvm.arm64.neon.raddhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
-
-define <8 x i16> @saddl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: saddl8h:
-;CHECK: saddl.8h
-        %tmp1 = load <8 x i8>* %A
-        %tmp2 = load <8 x i8>* %B
-  %tmp3 = sext <8 x i8> %tmp1 to <8 x i16>
-  %tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
-  %tmp5 = add <8 x i16> %tmp3, %tmp4
-        ret <8 x i16> %tmp5
-}
-
-define <4 x i32> @saddl4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: saddl4s:
-;CHECK: saddl.4s
-        %tmp1 = load <4 x i16>* %A
-        %tmp2 = load <4 x i16>* %B
-  %tmp3 = sext <4 x i16> %tmp1 to <4 x i32>
-  %tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
-  %tmp5 = add <4 x i32> %tmp3, %tmp4
-        ret <4 x i32> %tmp5
-}
-
-define <2 x i64> @saddl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: saddl2d:
-;CHECK: saddl.2d
-        %tmp1 = load <2 x i32>* %A
-        %tmp2 = load <2 x i32>* %B
-  %tmp3 = sext <2 x i32> %tmp1 to <2 x i64>
-  %tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
-  %tmp5 = add <2 x i64> %tmp3, %tmp4
-        ret <2 x i64> %tmp5
-}
-
-define <8 x i16> @saddl2_8h(<16 x i8> %a, <16 x i8> %b) nounwind  {
-; CHECK-LABEL: saddl2_8h:
-; CHECK-NEXT: saddl2.8h v0, v0, v1
-; CHECK-NEXT: ret
-  %tmp = bitcast <16 x i8> %a to <2 x i64>
-  %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
-  %tmp1 = bitcast <1 x i64> %shuffle.i.i.i to <8 x i8>
-  %vmovl.i.i.i = sext <8 x i8> %tmp1 to <8 x i16>
-  %tmp2 = bitcast <16 x i8> %b to <2 x i64>
-  %shuffle.i.i4.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
-  %tmp3 = bitcast <1 x i64> %shuffle.i.i4.i to <8 x i8>
-  %vmovl.i.i5.i = sext <8 x i8> %tmp3 to <8 x i16>
-  %add.i = add <8 x i16> %vmovl.i.i.i, %vmovl.i.i5.i
-  ret <8 x i16> %add.i
-}
-
-define <4 x i32> @saddl2_4s(<8 x i16> %a, <8 x i16> %b) nounwind  {
-; CHECK-LABEL: saddl2_4s:
-; CHECK-NEXT: saddl2.4s v0, v0, v1
-; CHECK-NEXT: ret
-  %tmp = bitcast <8 x i16> %a to <2 x i64>
-  %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
-  %tmp1 = bitcast <1 x i64> %shuffle.i.i.i to <4 x i16>
-  %vmovl.i.i.i = sext <4 x i16> %tmp1 to <4 x i32>
-  %tmp2 = bitcast <8 x i16> %b to <2 x i64>
-  %shuffle.i.i4.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
-  %tmp3 = bitcast <1 x i64> %shuffle.i.i4.i to <4 x i16>
-  %vmovl.i.i5.i = sext <4 x i16> %tmp3 to <4 x i32>
-  %add.i = add <4 x i32> %vmovl.i.i.i, %vmovl.i.i5.i
-  ret <4 x i32> %add.i
-}
-
-define <2 x i64> @saddl2_2d(<4 x i32> %a, <4 x i32> %b) nounwind  {
-; CHECK-LABEL: saddl2_2d:
-; CHECK-NEXT: saddl2.2d v0, v0, v1
-; CHECK-NEXT: ret
-  %tmp = bitcast <4 x i32> %a to <2 x i64>
-  %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
-  %tmp1 = bitcast <1 x i64> %shuffle.i.i.i to <2 x i32>
-  %vmovl.i.i.i = sext <2 x i32> %tmp1 to <2 x i64>
-  %tmp2 = bitcast <4 x i32> %b to <2 x i64>
-  %shuffle.i.i4.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
-  %tmp3 = bitcast <1 x i64> %shuffle.i.i4.i to <2 x i32>
-  %vmovl.i.i5.i = sext <2 x i32> %tmp3 to <2 x i64>
-  %add.i = add <2 x i64> %vmovl.i.i.i, %vmovl.i.i5.i
-  ret <2 x i64> %add.i
-}
-
-define <8 x i16> @uaddl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: uaddl8h:
-;CHECK: uaddl.8h
-  %tmp1 = load <8 x i8>* %A
-  %tmp2 = load <8 x i8>* %B
-  %tmp3 = zext <8 x i8> %tmp1 to <8 x i16>
-  %tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
-  %tmp5 = add <8 x i16> %tmp3, %tmp4
-  ret <8 x i16> %tmp5
-}
-
-define <4 x i32> @uaddl4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: uaddl4s:
-;CHECK: uaddl.4s
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
-  %tmp3 = zext <4 x i16> %tmp1 to <4 x i32>
-  %tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
-  %tmp5 = add <4 x i32> %tmp3, %tmp4
-  ret <4 x i32> %tmp5
-}
-
-define <2 x i64> @uaddl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: uaddl2d:
-;CHECK: uaddl.2d
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
-  %tmp3 = zext <2 x i32> %tmp1 to <2 x i64>
-  %tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
-  %tmp5 = add <2 x i64> %tmp3, %tmp4
-  ret <2 x i64> %tmp5
-}
-
-
-define <8 x i16> @uaddl2_8h(<16 x i8> %a, <16 x i8> %b) nounwind  {
-; CHECK-LABEL: uaddl2_8h:
-; CHECK-NEXT: uaddl2.8h v0, v0, v1
-; CHECK-NEXT: ret
-  %tmp = bitcast <16 x i8> %a to <2 x i64>
-  %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
-  %tmp1 = bitcast <1 x i64> %shuffle.i.i.i to <8 x i8>
-  %vmovl.i.i.i = zext <8 x i8> %tmp1 to <8 x i16>
-  %tmp2 = bitcast <16 x i8> %b to <2 x i64>
-  %shuffle.i.i4.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
-  %tmp3 = bitcast <1 x i64> %shuffle.i.i4.i to <8 x i8>
-  %vmovl.i.i5.i = zext <8 x i8> %tmp3 to <8 x i16>
-  %add.i = add <8 x i16> %vmovl.i.i.i, %vmovl.i.i5.i
-  ret <8 x i16> %add.i
-}
-
-define <4 x i32> @uaddl2_4s(<8 x i16> %a, <8 x i16> %b) nounwind  {
-; CHECK-LABEL: uaddl2_4s:
-; CHECK-NEXT: uaddl2.4s v0, v0, v1
-; CHECK-NEXT: ret
-  %tmp = bitcast <8 x i16> %a to <2 x i64>
-  %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
-  %tmp1 = bitcast <1 x i64> %shuffle.i.i.i to <4 x i16>
-  %vmovl.i.i.i = zext <4 x i16> %tmp1 to <4 x i32>
-  %tmp2 = bitcast <8 x i16> %b to <2 x i64>
-  %shuffle.i.i4.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
-  %tmp3 = bitcast <1 x i64> %shuffle.i.i4.i to <4 x i16>
-  %vmovl.i.i5.i = zext <4 x i16> %tmp3 to <4 x i32>
-  %add.i = add <4 x i32> %vmovl.i.i.i, %vmovl.i.i5.i
-  ret <4 x i32> %add.i
-}
-
-define <2 x i64> @uaddl2_2d(<4 x i32> %a, <4 x i32> %b) nounwind  {
-; CHECK-LABEL: uaddl2_2d:
-; CHECK-NEXT: uaddl2.2d v0, v0, v1
-; CHECK-NEXT: ret
-  %tmp = bitcast <4 x i32> %a to <2 x i64>
-  %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
-  %tmp1 = bitcast <1 x i64> %shuffle.i.i.i to <2 x i32>
-  %vmovl.i.i.i = zext <2 x i32> %tmp1 to <2 x i64>
-  %tmp2 = bitcast <4 x i32> %b to <2 x i64>
-  %shuffle.i.i4.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
-  %tmp3 = bitcast <1 x i64> %shuffle.i.i4.i to <2 x i32>
-  %vmovl.i.i5.i = zext <2 x i32> %tmp3 to <2 x i64>
-  %add.i = add <2 x i64> %vmovl.i.i.i, %vmovl.i.i5.i
-  ret <2 x i64> %add.i
-}
-
-define <8 x i16> @uaddw8h(<8 x i16>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: uaddw8h:
-;CHECK: uaddw.8h
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i8>* %B
-  %tmp3 = zext <8 x i8> %tmp2 to <8 x i16>
-  %tmp4 = add <8 x i16> %tmp1, %tmp3
-        ret <8 x i16> %tmp4
-}
-
-define <4 x i32> @uaddw4s(<4 x i32>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: uaddw4s:
-;CHECK: uaddw.4s
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i16>* %B
-  %tmp3 = zext <4 x i16> %tmp2 to <4 x i32>
-  %tmp4 = add <4 x i32> %tmp1, %tmp3
-        ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @uaddw2d(<2 x i64>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: uaddw2d:
-;CHECK: uaddw.2d
-        %tmp1 = load <2 x i64>* %A
-        %tmp2 = load <2 x i32>* %B
-  %tmp3 = zext <2 x i32> %tmp2 to <2 x i64>
-  %tmp4 = add <2 x i64> %tmp1, %tmp3
-        ret <2 x i64> %tmp4
-}
-
-define <8 x i16> @uaddw2_8h(<8 x i16>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: uaddw2_8h:
-;CHECK: uaddw2.8h
-        %tmp1 = load <8 x i16>* %A
-
-        %tmp2 = load <16 x i8>* %B
-        %high2 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-        %ext2 = zext <8 x i8> %high2 to <8 x i16>
-
-        %res = add <8 x i16> %tmp1, %ext2
-        ret <8 x i16> %res
-}
-
-define <4 x i32> @uaddw2_4s(<4 x i32>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: uaddw2_4s:
-;CHECK: uaddw2.4s
-        %tmp1 = load <4 x i32>* %A
-
-        %tmp2 = load <8 x i16>* %B
-        %high2 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-        %ext2 = zext <4 x i16> %high2 to <4 x i32>
-
-        %res = add <4 x i32> %tmp1, %ext2
-        ret <4 x i32> %res
-}
-
-define <2 x i64> @uaddw2_2d(<2 x i64>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: uaddw2_2d:
-;CHECK: uaddw2.2d
-        %tmp1 = load <2 x i64>* %A
-
-        %tmp2 = load <4 x i32>* %B
-        %high2 = shufflevector <4 x i32> %tmp2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-        %ext2 = zext <2 x i32> %high2 to <2 x i64>
-
-        %res = add <2 x i64> %tmp1, %ext2
-        ret <2 x i64> %res
-}
-
-define <8 x i16> @saddw8h(<8 x i16>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: saddw8h:
-;CHECK: saddw.8h
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i8>* %B
-        %tmp3 = sext <8 x i8> %tmp2 to <8 x i16>
-        %tmp4 = add <8 x i16> %tmp1, %tmp3
-        ret <8 x i16> %tmp4
-}
-
-define <4 x i32> @saddw4s(<4 x i32>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: saddw4s:
-;CHECK: saddw.4s
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i16>* %B
-        %tmp3 = sext <4 x i16> %tmp2 to <4 x i32>
-        %tmp4 = add <4 x i32> %tmp1, %tmp3
-        ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @saddw2d(<2 x i64>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: saddw2d:
-;CHECK: saddw.2d
-        %tmp1 = load <2 x i64>* %A
-        %tmp2 = load <2 x i32>* %B
-        %tmp3 = sext <2 x i32> %tmp2 to <2 x i64>
-        %tmp4 = add <2 x i64> %tmp1, %tmp3
-        ret <2 x i64> %tmp4
-}
-
-define <8 x i16> @saddw2_8h(<8 x i16>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: saddw2_8h:
-;CHECK: saddw2.8h
-        %tmp1 = load <8 x i16>* %A
-
-        %tmp2 = load <16 x i8>* %B
-        %high2 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-        %ext2 = sext <8 x i8> %high2 to <8 x i16>
-
-        %res = add <8 x i16> %tmp1, %ext2
-        ret <8 x i16> %res
-}
-
-define <4 x i32> @saddw2_4s(<4 x i32>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: saddw2_4s:
-;CHECK: saddw2.4s
-        %tmp1 = load <4 x i32>* %A
-
-        %tmp2 = load <8 x i16>* %B
-        %high2 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-        %ext2 = sext <4 x i16> %high2 to <4 x i32>
-
-        %res = add <4 x i32> %tmp1, %ext2
-        ret <4 x i32> %res
-}
-
-define <2 x i64> @saddw2_2d(<2 x i64>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: saddw2_2d:
-;CHECK: saddw2.2d
-        %tmp1 = load <2 x i64>* %A
-
-        %tmp2 = load <4 x i32>* %B
-        %high2 = shufflevector <4 x i32> %tmp2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-        %ext2 = sext <2 x i32> %high2 to <2 x i64>
-
-        %res = add <2 x i64> %tmp1, %ext2
-        ret <2 x i64> %res
-}
-
-define <4 x i16> @saddlp4h(<8 x i8>* %A) nounwind {
-;CHECK-LABEL: saddlp4h:
-;CHECK: saddlp.4h
-        %tmp1 = load <8 x i8>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.saddlp.v4i16.v8i8(<8 x i8> %tmp1)
-        ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @saddlp2s(<4 x i16>* %A) nounwind {
-;CHECK-LABEL: saddlp2s:
-;CHECK: saddlp.2s
-        %tmp1 = load <4 x i16>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.saddlp.v2i32.v4i16(<4 x i16> %tmp1)
-        ret <2 x i32> %tmp3
-}
-
-define <1 x i64> @saddlp1d(<2 x i32>* %A) nounwind {
-;CHECK-LABEL: saddlp1d:
-;CHECK: saddlp.1d
-        %tmp1 = load <2 x i32>* %A
-        %tmp3 = call <1 x i64> @llvm.arm64.neon.saddlp.v1i64.v2i32(<2 x i32> %tmp1)
-        ret <1 x i64> %tmp3
-}
-
-define <8 x i16> @saddlp8h(<16 x i8>* %A) nounwind {
-;CHECK-LABEL: saddlp8h:
-;CHECK: saddlp.8h
-        %tmp1 = load <16 x i8>* %A
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.saddlp.v8i16.v16i8(<16 x i8> %tmp1)
-        ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @saddlp4s(<8 x i16>* %A) nounwind {
-;CHECK-LABEL: saddlp4s:
-;CHECK: saddlp.4s
-        %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.saddlp.v4i32.v8i16(<8 x i16> %tmp1)
-        ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @saddlp2d(<4 x i32>* %A) nounwind {
-;CHECK-LABEL: saddlp2d:
-;CHECK: saddlp.2d
-        %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.saddlp.v2i64.v4i32(<4 x i32> %tmp1)
-        ret <2 x i64> %tmp3
-}
-
-declare <4 x i16>  @llvm.arm64.neon.saddlp.v4i16.v8i8(<8 x i8>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.saddlp.v2i32.v4i16(<4 x i16>) nounwind readnone
-declare <1 x i64> @llvm.arm64.neon.saddlp.v1i64.v2i32(<2 x i32>) nounwind readnone
-
-declare <8 x i16>  @llvm.arm64.neon.saddlp.v8i16.v16i8(<16 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.saddlp.v4i32.v8i16(<8 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.saddlp.v2i64.v4i32(<4 x i32>) nounwind readnone
-
-define <4 x i16> @uaddlp4h(<8 x i8>* %A) nounwind {
-;CHECK-LABEL: uaddlp4h:
-;CHECK: uaddlp.4h
-        %tmp1 = load <8 x i8>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.uaddlp.v4i16.v8i8(<8 x i8> %tmp1)
-        ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @uaddlp2s(<4 x i16>* %A) nounwind {
-;CHECK-LABEL: uaddlp2s:
-;CHECK: uaddlp.2s
-        %tmp1 = load <4 x i16>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.uaddlp.v2i32.v4i16(<4 x i16> %tmp1)
-        ret <2 x i32> %tmp3
-}
-
-define <1 x i64> @uaddlp1d(<2 x i32>* %A) nounwind {
-;CHECK-LABEL: uaddlp1d:
-;CHECK: uaddlp.1d
-        %tmp1 = load <2 x i32>* %A
-        %tmp3 = call <1 x i64> @llvm.arm64.neon.uaddlp.v1i64.v2i32(<2 x i32> %tmp1)
-        ret <1 x i64> %tmp3
-}
-
-define <8 x i16> @uaddlp8h(<16 x i8>* %A) nounwind {
-;CHECK-LABEL: uaddlp8h:
-;CHECK: uaddlp.8h
-        %tmp1 = load <16 x i8>* %A
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.uaddlp.v8i16.v16i8(<16 x i8> %tmp1)
-        ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @uaddlp4s(<8 x i16>* %A) nounwind {
-;CHECK-LABEL: uaddlp4s:
-;CHECK: uaddlp.4s
-        %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.uaddlp.v4i32.v8i16(<8 x i16> %tmp1)
-        ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @uaddlp2d(<4 x i32>* %A) nounwind {
-;CHECK-LABEL: uaddlp2d:
-;CHECK: uaddlp.2d
-        %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.uaddlp.v2i64.v4i32(<4 x i32> %tmp1)
-        ret <2 x i64> %tmp3
-}
-
-declare <4 x i16>  @llvm.arm64.neon.uaddlp.v4i16.v8i8(<8 x i8>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.uaddlp.v2i32.v4i16(<4 x i16>) nounwind readnone
-declare <1 x i64> @llvm.arm64.neon.uaddlp.v1i64.v2i32(<2 x i32>) nounwind readnone
-
-declare <8 x i16>  @llvm.arm64.neon.uaddlp.v8i16.v16i8(<16 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.uaddlp.v4i32.v8i16(<8 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.uaddlp.v2i64.v4i32(<4 x i32>) nounwind readnone
-
-define <4 x i16> @sadalp4h(<8 x i8>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: sadalp4h:
-;CHECK: sadalp.4h
-        %tmp1 = load <8 x i8>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.saddlp.v4i16.v8i8(<8 x i8> %tmp1)
-        %tmp4 = load <4 x i16>* %B
-        %tmp5 = add <4 x i16> %tmp3, %tmp4
-        ret <4 x i16> %tmp5
-}
-
-define <2 x i32> @sadalp2s(<4 x i16>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: sadalp2s:
-;CHECK: sadalp.2s
-        %tmp1 = load <4 x i16>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.saddlp.v2i32.v4i16(<4 x i16> %tmp1)
-        %tmp4 = load <2 x i32>* %B
-        %tmp5 = add <2 x i32> %tmp3, %tmp4
-        ret <2 x i32> %tmp5
-}
-
-define <8 x i16> @sadalp8h(<16 x i8>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: sadalp8h:
-;CHECK: sadalp.8h
-        %tmp1 = load <16 x i8>* %A
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.saddlp.v8i16.v16i8(<16 x i8> %tmp1)
-        %tmp4 = load <8 x i16>* %B
-        %tmp5 = add <8 x i16> %tmp3, %tmp4
-        ret <8 x i16> %tmp5
-}
-
-define <4 x i32> @sadalp4s(<8 x i16>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: sadalp4s:
-;CHECK: sadalp.4s
-        %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.saddlp.v4i32.v8i16(<8 x i16> %tmp1)
-        %tmp4 = load <4 x i32>* %B
-        %tmp5 = add <4 x i32> %tmp3, %tmp4
-        ret <4 x i32> %tmp5
-}
-
-define <2 x i64> @sadalp2d(<4 x i32>* %A, <2 x i64>* %B) nounwind {
-;CHECK-LABEL: sadalp2d:
-;CHECK: sadalp.2d
-        %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.saddlp.v2i64.v4i32(<4 x i32> %tmp1)
-        %tmp4 = load <2 x i64>* %B
-        %tmp5 = add <2 x i64> %tmp3, %tmp4
-        ret <2 x i64> %tmp5
-}
-
-define <4 x i16> @uadalp4h(<8 x i8>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: uadalp4h:
-;CHECK: uadalp.4h
-        %tmp1 = load <8 x i8>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.uaddlp.v4i16.v8i8(<8 x i8> %tmp1)
-        %tmp4 = load <4 x i16>* %B
-        %tmp5 = add <4 x i16> %tmp3, %tmp4
-        ret <4 x i16> %tmp5
-}
-
-define <2 x i32> @uadalp2s(<4 x i16>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: uadalp2s:
-;CHECK: uadalp.2s
-        %tmp1 = load <4 x i16>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.uaddlp.v2i32.v4i16(<4 x i16> %tmp1)
-        %tmp4 = load <2 x i32>* %B
-        %tmp5 = add <2 x i32> %tmp3, %tmp4
-        ret <2 x i32> %tmp5
-}
-
-define <8 x i16> @uadalp8h(<16 x i8>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: uadalp8h:
-;CHECK: uadalp.8h
-        %tmp1 = load <16 x i8>* %A
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.uaddlp.v8i16.v16i8(<16 x i8> %tmp1)
-        %tmp4 = load <8 x i16>* %B
-        %tmp5 = add <8 x i16> %tmp3, %tmp4
-        ret <8 x i16> %tmp5
-}
-
-define <4 x i32> @uadalp4s(<8 x i16>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: uadalp4s:
-;CHECK: uadalp.4s
-        %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.uaddlp.v4i32.v8i16(<8 x i16> %tmp1)
-        %tmp4 = load <4 x i32>* %B
-        %tmp5 = add <4 x i32> %tmp3, %tmp4
-        ret <4 x i32> %tmp5
-}
-
-define <2 x i64> @uadalp2d(<4 x i32>* %A, <2 x i64>* %B) nounwind {
-;CHECK-LABEL: uadalp2d:
-;CHECK: uadalp.2d
-        %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.uaddlp.v2i64.v4i32(<4 x i32> %tmp1)
-        %tmp4 = load <2 x i64>* %B
-        %tmp5 = add <2 x i64> %tmp3, %tmp4
-        ret <2 x i64> %tmp5
-}
-
-define <8 x i8> @addp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: addp_8b:
-;CHECK: addp.8b
-        %tmp1 = load <8 x i8>* %A
-        %tmp2 = load <8 x i8>* %B
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.addp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
-        ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @addp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: addp_16b:
-;CHECK: addp.16b
-        %tmp1 = load <16 x i8>* %A
-        %tmp2 = load <16 x i8>* %B
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.addp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
-        ret <16 x i8> %tmp3
-}
-
-define <4 x i16> @addp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: addp_4h:
-;CHECK: addp.4h
-        %tmp1 = load <4 x i16>* %A
-        %tmp2 = load <4 x i16>* %B
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.addp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
-        ret <4 x i16> %tmp3
-}
-
-define <8 x i16> @addp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: addp_8h:
-;CHECK: addp.8h
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i16>* %B
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.addp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
-        ret <8 x i16> %tmp3
-}
-
-define <2 x i32> @addp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: addp_2s:
-;CHECK: addp.2s
-        %tmp1 = load <2 x i32>* %A
-        %tmp2 = load <2 x i32>* %B
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.addp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
-        ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @addp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: addp_4s:
-;CHECK: addp.4s
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i32>* %B
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.addp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
-        ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @addp_2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK-LABEL: addp_2d:
-;CHECK: addp.2d
-        %tmp1 = load <2 x i64>* %A
-        %tmp2 = load <2 x i64>* %B
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.addp.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
-        ret <2 x i64> %tmp3
-}
-
-declare <8 x i8> @llvm.arm64.neon.addp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm64.neon.addp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.addp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.addp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.addp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.addp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.addp.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
-
-define <2 x float> @faddp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK-LABEL: faddp_2s:
-;CHECK: faddp.2s
-        %tmp1 = load <2 x float>* %A
-        %tmp2 = load <2 x float>* %B
-        %tmp3 = call <2 x float> @llvm.arm64.neon.addp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
-        ret <2 x float> %tmp3
-}
-
-define <4 x float> @faddp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
-;CHECK-LABEL: faddp_4s:
-;CHECK: faddp.4s
-        %tmp1 = load <4 x float>* %A
-        %tmp2 = load <4 x float>* %B
-        %tmp3 = call <4 x float> @llvm.arm64.neon.addp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
-        ret <4 x float> %tmp3
-}
-
-define <2 x double> @faddp_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
-;CHECK-LABEL: faddp_2d:
-;CHECK: faddp.2d
-        %tmp1 = load <2 x double>* %A
-        %tmp2 = load <2 x double>* %B
-        %tmp3 = call <2 x double> @llvm.arm64.neon.addp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
-        ret <2 x double> %tmp3
-}
-
-declare <2 x float> @llvm.arm64.neon.addp.v2f32(<2 x float>, <2 x float>) nounwind readnone
-declare <4 x float> @llvm.arm64.neon.addp.v4f32(<4 x float>, <4 x float>) nounwind readnone
-declare <2 x double> @llvm.arm64.neon.addp.v2f64(<2 x double>, <2 x double>) nounwind readnone
-
-define <2 x i64> @uaddl2_duprhs(<4 x i32> %lhs, i32 %rhs) {
-; CHECK-LABEL: uaddl2_duprhs
-; CHECK-NOT: ext.16b
-; CHECK: uaddl2.2d
-  %rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0
-  %rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1
-
-  %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-
-  %lhs.ext = zext <2 x i32> %lhs.high to <2 x i64>
-  %rhs.ext = zext <2 x i32> %rhsvec to <2 x i64>
-
-  %res = add <2 x i64> %lhs.ext, %rhs.ext
-  ret <2 x i64> %res
-}
-
-define <2 x i64> @saddl2_duplhs(i32 %lhs, <4 x i32> %rhs) {
-; CHECK-LABEL: saddl2_duplhs
-; CHECK-NOT: ext.16b
-; CHECK: saddl2.2d
-  %lhsvec.tmp = insertelement <2 x i32> undef, i32 %lhs, i32 0
-  %lhsvec = insertelement <2 x i32> %lhsvec.tmp, i32 %lhs, i32 1
-
-  %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-
-  %lhs.ext = sext <2 x i32> %lhsvec to <2 x i64>
-  %rhs.ext = sext <2 x i32> %rhs.high to <2 x i64>
-
-  %res = add <2 x i64> %lhs.ext, %rhs.ext
-  ret <2 x i64> %res
-}
-
-define <2 x i64> @usubl2_duprhs(<4 x i32> %lhs, i32 %rhs) {
-; CHECK-LABEL: usubl2_duprhs
-; CHECK-NOT: ext.16b
-; CHECK: usubl2.2d
-  %rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0
-  %rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1
-
-  %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-
-  %lhs.ext = zext <2 x i32> %lhs.high to <2 x i64>
-  %rhs.ext = zext <2 x i32> %rhsvec to <2 x i64>
-
-  %res = sub <2 x i64> %lhs.ext, %rhs.ext
-  ret <2 x i64> %res
-}
-
-define <2 x i64> @ssubl2_duplhs(i32 %lhs, <4 x i32> %rhs) {
-; CHECK-LABEL: ssubl2_duplhs
-; CHECK-NOT: ext.16b
-; CHECK: ssubl2.2d
-  %lhsvec.tmp = insertelement <2 x i32> undef, i32 %lhs, i32 0
-  %lhsvec = insertelement <2 x i32> %lhsvec.tmp, i32 %lhs, i32 1
-
-  %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-
-  %lhs.ext = sext <2 x i32> %lhsvec to <2 x i64>
-  %rhs.ext = sext <2 x i32> %rhs.high to <2 x i64>
-
-  %res = sub <2 x i64> %lhs.ext, %rhs.ext
-  ret <2 x i64> %res
-}
-
-define <8 x i8> @addhn8b_natural(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: addhn8b_natural:
-;CHECK: addhn.8b
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i16>* %B
-        %sum = add <8 x i16> %tmp1, %tmp2
-        %high_bits = lshr <8 x i16> %sum, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
-        %narrowed = trunc <8 x i16> %high_bits to <8 x i8>
-        ret <8 x i8> %narrowed
-}
-
-define <4 x i16> @addhn4h_natural(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: addhn4h_natural:
-;CHECK: addhn.4h
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i32>* %B
-        %sum = add <4 x i32> %tmp1, %tmp2
-        %high_bits = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
-        %narrowed = trunc <4 x i32> %high_bits to <4 x i16>
-        ret <4 x i16> %narrowed
-}
-
-define <2 x i32> @addhn2s_natural(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK-LABEL: addhn2s_natural:
-;CHECK: addhn.2s
-        %tmp1 = load <2 x i64>* %A
-        %tmp2 = load <2 x i64>* %B
-        %sum = add <2 x i64> %tmp1, %tmp2
-        %high_bits = lshr <2 x i64> %sum, <i64 32, i64 32>
-        %narrowed = trunc <2 x i64> %high_bits to <2 x i32>
-        ret <2 x i32> %narrowed
-}
-
-define <16 x i8> @addhn2_16b_natural(<8 x i8> %low, <8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: addhn2_16b_natural:
-;CHECK: addhn2.16b
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i16>* %B
-        %sum = add <8 x i16> %tmp1, %tmp2
-        %high_bits = lshr <8 x i16> %sum, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
-        %narrowed = trunc <8 x i16> %high_bits to <8 x i8>
-        %res = shufflevector <8 x i8> %low, <8 x i8> %narrowed, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-        ret <16 x i8> %res
-}
-
-define <8 x i16> @addhn2_8h_natural(<4 x i16> %low, <4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: addhn2_8h_natural:
-;CHECK: addhn2.8h
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i32>* %B
-        %sum = add <4 x i32> %tmp1, %tmp2
-        %high_bits = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
-        %narrowed = trunc <4 x i32> %high_bits to <4 x i16>
-        %res = shufflevector <4 x i16> %low, <4 x i16> %narrowed, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-        ret <8 x i16> %res
-}
-
-define <4 x i32> @addhn2_4s_natural(<2 x i32> %low, <2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK-LABEL: addhn2_4s_natural:
-;CHECK: addhn2.4s
-        %tmp1 = load <2 x i64>* %A
-        %tmp2 = load <2 x i64>* %B
-        %sum = add <2 x i64> %tmp1, %tmp2
-        %high_bits = lshr <2 x i64> %sum, <i64 32, i64 32>
-        %narrowed = trunc <2 x i64> %high_bits to <2 x i32>
-        %res = shufflevector <2 x i32> %low, <2 x i32> %narrowed, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-        ret <4 x i32> %res
-}
-
-define <8 x i8> @subhn8b_natural(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: subhn8b_natural:
-;CHECK: subhn.8b
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i16>* %B
-        %diff = sub <8 x i16> %tmp1, %tmp2
-        %high_bits = lshr <8 x i16> %diff, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
-        %narrowed = trunc <8 x i16> %high_bits to <8 x i8>
-        ret <8 x i8> %narrowed
-}
-
-define <4 x i16> @subhn4h_natural(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: subhn4h_natural:
-;CHECK: subhn.4h
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i32>* %B
-        %diff = sub <4 x i32> %tmp1, %tmp2
-        %high_bits = lshr <4 x i32> %diff, <i32 16, i32 16, i32 16, i32 16>
-        %narrowed = trunc <4 x i32> %high_bits to <4 x i16>
-        ret <4 x i16> %narrowed
-}
-
-define <2 x i32> @subhn2s_natural(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK-LABEL: subhn2s_natural:
-;CHECK: subhn.2s
-        %tmp1 = load <2 x i64>* %A
-        %tmp2 = load <2 x i64>* %B
-        %diff = sub <2 x i64> %tmp1, %tmp2
-        %high_bits = lshr <2 x i64> %diff, <i64 32, i64 32>
-        %narrowed = trunc <2 x i64> %high_bits to <2 x i32>
-        ret <2 x i32> %narrowed
-}
-
-define <16 x i8> @subhn2_16b_natural(<8 x i8> %low, <8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: subhn2_16b_natural:
-;CHECK: subhn2.16b
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i16>* %B
-        %diff = sub <8 x i16> %tmp1, %tmp2
-        %high_bits = lshr <8 x i16> %diff, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
-        %narrowed = trunc <8 x i16> %high_bits to <8 x i8>
-        %res = shufflevector <8 x i8> %low, <8 x i8> %narrowed, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-        ret <16 x i8> %res
-}
-
-define <8 x i16> @subhn2_8h_natural(<4 x i16> %low, <4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: subhn2_8h_natural:
-;CHECK: subhn2.8h
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i32>* %B
-        %diff = sub <4 x i32> %tmp1, %tmp2
-        %high_bits = lshr <4 x i32> %diff, <i32 16, i32 16, i32 16, i32 16>
-        %narrowed = trunc <4 x i32> %high_bits to <4 x i16>
-        %res = shufflevector <4 x i16> %low, <4 x i16> %narrowed, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-        ret <8 x i16> %res
-}
-
-define <4 x i32> @subhn2_4s_natural(<2 x i32> %low, <2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK-LABEL: subhn2_4s_natural:
-;CHECK: subhn2.4s
-        %tmp1 = load <2 x i64>* %A
-        %tmp2 = load <2 x i64>* %B
-        %diff = sub <2 x i64> %tmp1, %tmp2
-        %high_bits = lshr <2 x i64> %diff, <i64 32, i64 32>
-        %narrowed = trunc <2 x i64> %high_bits to <2 x i32>
-        %res = shufflevector <2 x i32> %low, <2 x i32> %narrowed, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-        ret <4 x i32> %res
-}
diff --git a/test/CodeGen/ARM64/vaddlv.ll b/test/CodeGen/ARM64/vaddlv.ll
deleted file mode 100644
index d4d4608..0000000
--- a/test/CodeGen/ARM64/vaddlv.ll
+++ /dev/null
@@ -1,26 +0,0 @@
-; RUN: llc -march=arm64 -arm64-neon-syntax=apple < %s | FileCheck %s
-
-define i64 @test_vaddlv_s32(<2 x i32> %a1) nounwind readnone {
-; CHECK: test_vaddlv_s32
-; CHECK: saddlp.1d v[[REGNUM:[0-9]+]], v[[INREG:[0-9]+]]
-; CHECK-NEXT: fmov x[[OUTREG:[0-9]+]], d[[REGNUM]]
-; CHECK-NEXT: ret
-entry:
-  %vaddlv.i = tail call i64 @llvm.arm64.neon.saddlv.i64.v2i32(<2 x i32> %a1) nounwind
-  ret i64 %vaddlv.i
-}
-
-define i64 @test_vaddlv_u32(<2 x i32> %a1) nounwind readnone {
-; CHECK: test_vaddlv_u32
-; CHECK: uaddlp.1d v[[REGNUM:[0-9]+]], v[[INREG:[0-9]+]]
-; CHECK-NEXT: fmov x[[OUTREG:[0-9]+]], d[[REGNUM]]
-; CHECK-NEXT: ret
-entry:
-  %vaddlv.i = tail call i64 @llvm.arm64.neon.uaddlv.i64.v2i32(<2 x i32> %a1) nounwind
-  ret i64 %vaddlv.i
-}
-
-declare i64 @llvm.arm64.neon.uaddlv.i64.v2i32(<2 x i32>) nounwind readnone
-
-declare i64 @llvm.arm64.neon.saddlv.i64.v2i32(<2 x i32>) nounwind readnone
-
diff --git a/test/CodeGen/ARM64/vaddv.ll b/test/CodeGen/ARM64/vaddv.ll
deleted file mode 100644
index 44bfa84..0000000
--- a/test/CodeGen/ARM64/vaddv.ll
+++ /dev/null
@@ -1,233 +0,0 @@
-; RUN: llc -march=arm64 -arm64-neon-syntax=apple < %s | FileCheck %s
-
-define signext i8 @test_vaddv_s8(<8 x i8> %a1) {
-; CHECK-LABEL: test_vaddv_s8:
-; CHECK: addv.8b b[[REGNUM:[0-9]+]], v0
-; CHECK-NEXT: smov.b w0, v[[REGNUM]][0]
-; CHECK-NEXT: ret
-entry:
-  %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v8i8(<8 x i8> %a1)
-  %0 = trunc i32 %vaddv.i to i8
-  ret i8 %0
-}
-
-define signext i16 @test_vaddv_s16(<4 x i16> %a1) {
-; CHECK-LABEL: test_vaddv_s16:
-; CHECK: addv.4h h[[REGNUM:[0-9]+]], v0
-; CHECK-NEXT: smov.h w0, v[[REGNUM]][0]
-; CHECK-NEXT: ret
-entry:
-  %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v4i16(<4 x i16> %a1)
-  %0 = trunc i32 %vaddv.i to i16
-  ret i16 %0
-}
-
-define i32 @test_vaddv_s32(<2 x i32> %a1) {
-; CHECK-LABEL: test_vaddv_s32:
-; 2 x i32 is not supported by the ISA, thus, this is a special case
-; CHECK: addp.2s v[[REGNUM:[0-9]+]], v0, v0
-; CHECK-NEXT: fmov w0, s[[REGNUM]]
-; CHECK-NEXT: ret
-entry:
-  %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v2i32(<2 x i32> %a1)
-  ret i32 %vaddv.i
-}
-
-define i64 @test_vaddv_s64(<2 x i64> %a1) {
-; CHECK-LABEL: test_vaddv_s64:
-; CHECK: addp.2d [[REGNUM:d[0-9]+]], v0
-; CHECK-NEXT: fmov x0, [[REGNUM]]
-; CHECK-NEXT: ret
-entry:
-  %vaddv.i = tail call i64 @llvm.arm64.neon.saddv.i64.v2i64(<2 x i64> %a1)
-  ret i64 %vaddv.i
-}
-
-define zeroext i8 @test_vaddv_u8(<8 x i8> %a1) {
-; CHECK-LABEL: test_vaddv_u8:
-; CHECK: addv.8b b[[REGNUM:[0-9]+]], v0
-; CHECK-NEXT: fmov w0, s[[REGNUM]]
-; CHECK-NEXT: ret
-entry:
-  %vaddv.i = tail call i32 @llvm.arm64.neon.uaddv.i32.v8i8(<8 x i8> %a1)
-  %0 = trunc i32 %vaddv.i to i8
-  ret i8 %0
-}
-
-define i32 @test_vaddv_u8_masked(<8 x i8> %a1) {
-; CHECK-LABEL: test_vaddv_u8_masked:
-; CHECK: addv.8b b[[REGNUM:[0-9]+]], v0
-; CHECK-NEXT: fmov w0, s[[REGNUM]]
-; CHECK-NEXT: ret
-entry:
-  %vaddv.i = tail call i32 @llvm.arm64.neon.uaddv.i32.v8i8(<8 x i8> %a1)
-  %0 = and i32 %vaddv.i, 511 ; 0x1ff
-  ret i32 %0
-}
-
-define zeroext i16 @test_vaddv_u16(<4 x i16> %a1) {
-; CHECK-LABEL: test_vaddv_u16:
-; CHECK: addv.4h h[[REGNUM:[0-9]+]], v0
-; CHECK-NEXT: fmov w0, s[[REGNUM]]
-; CHECK-NEXT: ret
-entry:
-  %vaddv.i = tail call i32 @llvm.arm64.neon.uaddv.i32.v4i16(<4 x i16> %a1)
-  %0 = trunc i32 %vaddv.i to i16
-  ret i16 %0
-}
-
-define i32 @test_vaddv_u16_masked(<4 x i16> %a1) {
-; CHECK-LABEL: test_vaddv_u16_masked:
-; CHECK: addv.4h h[[REGNUM:[0-9]+]], v0
-; CHECK-NEXT: fmov w0, s[[REGNUM]]
-; CHECK-NEXT: ret
-entry:
-  %vaddv.i = tail call i32 @llvm.arm64.neon.uaddv.i32.v4i16(<4 x i16> %a1)
-  %0 = and i32 %vaddv.i, 3276799 ; 0x31ffff
-  ret i32 %0
-}
-
-define i32 @test_vaddv_u32(<2 x i32> %a1) {
-; CHECK-LABEL: test_vaddv_u32:
-; 2 x i32 is not supported by the ISA, thus, this is a special case
-; CHECK: addp.2s v[[REGNUM:[0-9]+]], v0, v0
-; CHECK-NEXT: fmov w0, s[[REGNUM]]
-; CHECK-NEXT: ret
-entry:
-  %vaddv.i = tail call i32 @llvm.arm64.neon.uaddv.i32.v2i32(<2 x i32> %a1)
-  ret i32 %vaddv.i
-}
-
-define float @test_vaddv_f32(<2 x float> %a1) {
-; CHECK-LABEL: test_vaddv_f32:
-; CHECK: faddp.2s s0, v0
-; CHECK-NEXT: ret
-entry:
-  %vaddv.i = tail call float @llvm.arm64.neon.faddv.f32.v2f32(<2 x float> %a1)
-  ret float %vaddv.i
-}
-
-define float @test_vaddv_v4f32(<4 x float> %a1) {
-; CHECK-LABEL: test_vaddv_v4f32:
-; CHECK: faddp.4s [[REGNUM:v[0-9]+]], v0, v0
-; CHECK: faddp.2s s0, [[REGNUM]]
-; CHECK-NEXT: ret
-entry:
-  %vaddv.i = tail call float @llvm.arm64.neon.faddv.f32.v4f32(<4 x float> %a1)
-  ret float %vaddv.i
-}
-
-define double @test_vaddv_f64(<2 x double> %a1) {
-; CHECK-LABEL: test_vaddv_f64:
-; CHECK: faddp.2d d0, v0
-; CHECK-NEXT: ret
-entry:
-  %vaddv.i = tail call double @llvm.arm64.neon.faddv.f64.v2f64(<2 x double> %a1)
-  ret double %vaddv.i
-}
-
-define i64 @test_vaddv_u64(<2 x i64> %a1) {
-; CHECK-LABEL: test_vaddv_u64:
-; CHECK: addp.2d [[REGNUM:d[0-9]+]], v0
-; CHECK-NEXT: fmov x0, [[REGNUM]]
-; CHECK-NEXT: ret
-entry:
-  %vaddv.i = tail call i64 @llvm.arm64.neon.uaddv.i64.v2i64(<2 x i64> %a1)
-  ret i64 %vaddv.i
-}
-
-define signext i8 @test_vaddvq_s8(<16 x i8> %a1) {
-; CHECK-LABEL: test_vaddvq_s8:
-; CHECK: addv.16b b[[REGNUM:[0-9]+]], v0
-; CHECK-NEXT: smov.b w0, v[[REGNUM]][0]
-; CHECK-NEXT: ret
-entry:
-  %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v16i8(<16 x i8> %a1)
-  %0 = trunc i32 %vaddv.i to i8
-  ret i8 %0
-}
-
-define signext i16 @test_vaddvq_s16(<8 x i16> %a1) {
-; CHECK-LABEL: test_vaddvq_s16:
-; CHECK: addv.8h h[[REGNUM:[0-9]+]], v0
-; CHECK-NEXT: smov.h w0, v[[REGNUM]][0]
-; CHECK-NEXT: ret
-entry:
-  %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v8i16(<8 x i16> %a1)
-  %0 = trunc i32 %vaddv.i to i16
-  ret i16 %0
-}
-
-define i32 @test_vaddvq_s32(<4 x i32> %a1) {
-; CHECK-LABEL: test_vaddvq_s32:
-; CHECK: addv.4s [[REGNUM:s[0-9]+]], v0
-; CHECK-NEXT: fmov w0, [[REGNUM]]
-; CHECK-NEXT: ret
-entry:
-  %vaddv.i = tail call i32 @llvm.arm64.neon.saddv.i32.v4i32(<4 x i32> %a1)
-  ret i32 %vaddv.i
-}
-
-define zeroext i8 @test_vaddvq_u8(<16 x i8> %a1) {
-; CHECK-LABEL: test_vaddvq_u8:
-; CHECK: addv.16b b[[REGNUM:[0-9]+]], v0
-; CHECK-NEXT: fmov w0, s[[REGNUM]]
-; CHECK-NEXT: ret
-entry:
-  %vaddv.i = tail call i32 @llvm.arm64.neon.uaddv.i32.v16i8(<16 x i8> %a1)
-  %0 = trunc i32 %vaddv.i to i8
-  ret i8 %0
-}
-
-define zeroext i16 @test_vaddvq_u16(<8 x i16> %a1) {
-; CHECK-LABEL: test_vaddvq_u16:
-; CHECK: addv.8h h[[REGNUM:[0-9]+]], v0
-; CHECK-NEXT: fmov w0, s[[REGNUM]]
-; CHECK-NEXT: ret
-entry:
-  %vaddv.i = tail call i32 @llvm.arm64.neon.uaddv.i32.v8i16(<8 x i16> %a1)
-  %0 = trunc i32 %vaddv.i to i16
-  ret i16 %0
-}
-
-define i32 @test_vaddvq_u32(<4 x i32> %a1) {
-; CHECK-LABEL: test_vaddvq_u32:
-; CHECK: addv.4s [[REGNUM:s[0-9]+]], v0
-; CHECK-NEXT: fmov [[FMOVRES:w[0-9]+]], [[REGNUM]]
-; CHECK-NEXT: ret
-entry:
-  %vaddv.i = tail call i32 @llvm.arm64.neon.uaddv.i32.v4i32(<4 x i32> %a1)
-  ret i32 %vaddv.i
-}
-
-declare i32 @llvm.arm64.neon.uaddv.i32.v4i32(<4 x i32>)
-
-declare i32 @llvm.arm64.neon.uaddv.i32.v8i16(<8 x i16>)
-
-declare i32 @llvm.arm64.neon.uaddv.i32.v16i8(<16 x i8>)
-
-declare i32 @llvm.arm64.neon.saddv.i32.v4i32(<4 x i32>)
-
-declare i32 @llvm.arm64.neon.saddv.i32.v8i16(<8 x i16>)
-
-declare i32 @llvm.arm64.neon.saddv.i32.v16i8(<16 x i8>)
-
-declare i64 @llvm.arm64.neon.uaddv.i64.v2i64(<2 x i64>)
-
-declare i32 @llvm.arm64.neon.uaddv.i32.v2i32(<2 x i32>)
-
-declare i32 @llvm.arm64.neon.uaddv.i32.v4i16(<4 x i16>)
-
-declare i32 @llvm.arm64.neon.uaddv.i32.v8i8(<8 x i8>)
-
-declare i32 @llvm.arm64.neon.saddv.i32.v2i32(<2 x i32>)
-
-declare i64 @llvm.arm64.neon.saddv.i64.v2i64(<2 x i64>)
-
-declare i32 @llvm.arm64.neon.saddv.i32.v4i16(<4 x i16>)
-
-declare i32 @llvm.arm64.neon.saddv.i32.v8i8(<8 x i8>)
-
-declare float @llvm.arm64.neon.faddv.f32.v2f32(<2 x float> %a1)
-declare float @llvm.arm64.neon.faddv.f32.v4f32(<4 x float> %a1)
-declare double @llvm.arm64.neon.faddv.f64.v2f64(<2 x double> %a1)
diff --git a/test/CodeGen/ARM64/variadic-aapcs.ll b/test/CodeGen/ARM64/variadic-aapcs.ll
deleted file mode 100644
index ac66902..0000000
--- a/test/CodeGen/ARM64/variadic-aapcs.ll
+++ /dev/null
@@ -1,143 +0,0 @@
-; RUN: llc -verify-machineinstrs -mtriple=arm64-linux-gnu -pre-RA-sched=linearize -enable-misched=false < %s | FileCheck %s
-
-%va_list = type {i8*, i8*, i8*, i32, i32}
-
-@var = global %va_list zeroinitializer, align 8
-
-declare void @llvm.va_start(i8*)
-
-define void @test_simple(i32 %n, ...) {
-; CHECK-LABEL: test_simple:
-; CHECK: sub sp, sp, #[[STACKSIZE:[0-9]+]]
-; CHECK: add [[STACK_TOP:x[0-9]+]], sp, #[[STACKSIZE]]
-
-; CHECK: adrp x[[VA_LIST_HI:[0-9]+]], var
-
-; CHECK: stp x1, x2, [sp, #[[GR_BASE:[0-9]+]]]
-; ... omit middle ones ...
-; CHECK: str x7, [sp, #
-
-; CHECK: stp q0, q1, [sp]
-; ... omit middle ones ...
-; CHECK: stp q6, q7, [sp, #
-
-; CHECK: str [[STACK_TOP]], [x[[VA_LIST_HI]], :lo12:var]
-
-; CHECK: add [[GR_TOPTMP:x[0-9]+]], sp, #[[GR_BASE]]
-; CHECK: add [[GR_TOP:x[0-9]+]], [[GR_TOPTMP]], #56
-; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, :lo12:var
-; CHECK: str [[GR_TOP]], [x[[VA_LIST]], #8]
-
-; CHECK: mov [[VR_TOPTMP:x[0-9]+]], sp
-; CHECK: add [[VR_TOP:x[0-9]+]], [[VR_TOPTMP]], #128
-; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16]
-
-; CHECK: movn [[GR_OFFS:w[0-9]+]], #55
-; CHECK: str [[GR_OFFS]], [x[[VA_LIST]], #24]
-
-; CHECK: orr [[VR_OFFS:w[0-9]+]], wzr, #0xffffff80
-; CHECK: str [[VR_OFFS]], [x[[VA_LIST]], #28]
-
-  %addr = bitcast %va_list* @var to i8*
-  call void @llvm.va_start(i8* %addr)
-
-  ret void
-}
-
-define void @test_fewargs(i32 %n, i32 %n1, i32 %n2, float %m, ...) {
-; CHECK-LABEL: test_fewargs:
-; CHECK: sub sp, sp, #[[STACKSIZE:[0-9]+]]
-; CHECK: add [[STACK_TOP:x[0-9]+]], sp, #[[STACKSIZE]]
-
-; CHECK: adrp x[[VA_LIST_HI:[0-9]+]], var
-
-; CHECK: stp x3, x4, [sp, #[[GR_BASE:[0-9]+]]]
-; ... omit middle ones ...
-; CHECK: str x7, [sp, #
-
-; CHECK: stp q1, q2, [sp]
-; ... omit middle ones ...
-; CHECK: str q7, [sp, #
-
-; CHECK: str [[STACK_TOP]], [x[[VA_LIST_HI]], :lo12:var]
-
-; CHECK: add [[GR_TOPTMP:x[0-9]+]], sp, #[[GR_BASE]]
-; CHECK: add [[GR_TOP:x[0-9]+]], [[GR_TOPTMP]], #40
-; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, :lo12:var
-; CHECK: str [[GR_TOP]], [x[[VA_LIST]], #8]
-
-; CHECK: mov [[VR_TOPTMP:x[0-9]+]], sp
-; CHECK: add [[VR_TOP:x[0-9]+]], [[VR_TOPTMP]], #112
-; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16]
-
-; CHECK: movn [[GR_OFFS:w[0-9]+]], #39
-; CHECK: str [[GR_OFFS]], [x[[VA_LIST]], #24]
-
-; CHECK: movn [[VR_OFFS:w[0-9]+]], #111
-; CHECK: str [[VR_OFFS]], [x[[VA_LIST]], #28]
-
-  %addr = bitcast %va_list* @var to i8*
-  call void @llvm.va_start(i8* %addr)
-
-  ret void
-}
-
-define void @test_nospare([8 x i64], [8 x float], ...) {
-; CHECK-LABEL: test_nospare:
-
-  %addr = bitcast %va_list* @var to i8*
-  call void @llvm.va_start(i8* %addr)
-; CHECK-NOT: sub sp, sp
-; CHECK: mov [[STACK:x[0-9]+]], sp
-; CHECK: str [[STACK]], [{{x[0-9]+}}, :lo12:var]
-
-  ret void
-}
-
-; If there are non-variadic arguments on the stack (here two i64s) then the
-; __stack field should point just past them.
-define void @test_offsetstack([10 x i64], [3 x float], ...) {
-; CHECK-LABEL: test_offsetstack:
-; CHECK: sub sp, sp, #80
-; CHECK: add [[STACK_TOP:x[0-9]+]], sp, #96
-; CHECK: str [[STACK_TOP]], [{{x[0-9]+}}, :lo12:var]
-
-  %addr = bitcast %va_list* @var to i8*
-  call void @llvm.va_start(i8* %addr)
-  ret void
-}
-
-declare void @llvm.va_end(i8*)
-
-define void @test_va_end() nounwind {
-; CHECK-LABEL: test_va_end:
-; CHECK-NEXT: BB#0
-
-  %addr = bitcast %va_list* @var to i8*
-  call void @llvm.va_end(i8* %addr)
-
-  ret void
-; CHECK-NEXT: ret
-}
-
-declare void @llvm.va_copy(i8* %dest, i8* %src)
-
-@second_list = global %va_list zeroinitializer
-
-define void @test_va_copy() {
-; CHECK-LABEL: test_va_copy:
-  %srcaddr = bitcast %va_list* @var to i8*
-  %dstaddr = bitcast %va_list* @second_list to i8*
-  call void @llvm.va_copy(i8* %dstaddr, i8* %srcaddr)
-
-; CHECK: add x[[SRC:[0-9]+]], {{x[0-9]+}}, :lo12:var
-
-; CHECK: ldr [[BLOCK:q[0-9]+]], [x[[SRC]]]
-; CHECK: add x[[DST:[0-9]+]], {{x[0-9]+}}, :lo12:second_list
-; CHECK: str [[BLOCK]], [x[[DST]]]
-
-; CHECK: ldr [[BLOCK:q[0-9]+]], [x[[SRC]], #16]
-; CHECK: str [[BLOCK]], [x[[DST]], #16]
-  ret void
-; CHECK: ret
-}
diff --git a/test/CodeGen/ARM64/vbitwise.ll b/test/CodeGen/ARM64/vbitwise.ll
deleted file mode 100644
index 7d8378d..0000000
--- a/test/CodeGen/ARM64/vbitwise.ll
+++ /dev/null
@@ -1,91 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-define <8 x i8> @rbit_8b(<8 x i8>* %A) nounwind {
-;CHECK-LABEL: rbit_8b:
-;CHECK: rbit.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp3 = call <8 x i8> @llvm.arm64.neon.rbit.v8i8(<8 x i8> %tmp1)
-	ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @rbit_16b(<16 x i8>* %A) nounwind {
-;CHECK-LABEL: rbit_16b:
-;CHECK: rbit.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp3 = call <16 x i8> @llvm.arm64.neon.rbit.v16i8(<16 x i8> %tmp1)
-	ret <16 x i8> %tmp3
-}
-
-declare <8 x i8> @llvm.arm64.neon.rbit.v8i8(<8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm64.neon.rbit.v16i8(<16 x i8>) nounwind readnone
-
-define <8 x i16> @sxtl8h(<8 x i8>* %A) nounwind {
-;CHECK-LABEL: sxtl8h:
-;CHECK: sshll.8h
-	%tmp1 = load <8 x i8>* %A
-  %tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
-  ret <8 x i16> %tmp2
-}
-
-define <8 x i16> @uxtl8h(<8 x i8>* %A) nounwind {
-;CHECK-LABEL: uxtl8h:
-;CHECK: ushll.8h
-	%tmp1 = load <8 x i8>* %A
-  %tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
-  ret <8 x i16> %tmp2
-}
-
-define <4 x i32> @sxtl4s(<4 x i16>* %A) nounwind {
-;CHECK-LABEL: sxtl4s:
-;CHECK: sshll.4s
-	%tmp1 = load <4 x i16>* %A
-  %tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
-  ret <4 x i32> %tmp2
-}
-
-define <4 x i32> @uxtl4s(<4 x i16>* %A) nounwind {
-;CHECK-LABEL: uxtl4s:
-;CHECK: ushll.4s
-	%tmp1 = load <4 x i16>* %A
-  %tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
-  ret <4 x i32> %tmp2
-}
-
-define <2 x i64> @sxtl2d(<2 x i32>* %A) nounwind {
-;CHECK-LABEL: sxtl2d:
-;CHECK: sshll.2d
-	%tmp1 = load <2 x i32>* %A
-  %tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
-  ret <2 x i64> %tmp2
-}
-
-define <2 x i64> @uxtl2d(<2 x i32>* %A) nounwind {
-;CHECK-LABEL: uxtl2d:
-;CHECK: ushll.2d
-	%tmp1 = load <2 x i32>* %A
-  %tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
-  ret <2 x i64> %tmp2
-}
-
-; Check for incorrect use of vector bic.
-; rdar://11553859
-define void @test_vsliq(i8* nocapture %src, i8* nocapture %dest) nounwind noinline ssp {
-entry:
-; CHECK-LABEL: test_vsliq:
-; CHECK-NOT: bic
-; CHECK: movi.2d [[REG1:v[0-9]+]], #0x0000ff000000ff
-; CHECK: and.16b v{{[0-9]+}}, v{{[0-9]+}}, [[REG1]]
-  %0 = bitcast i8* %src to <16 x i8>*
-  %1 = load <16 x i8>* %0, align 16
-  %and.i = and <16 x i8> %1, <i8 -1, i8 0, i8 0, i8 0, i8 -1, i8 0, i8 0, i8 0, i8 -1, i8 0, i8 0, i8 0, i8 -1, i8 0, i8 0, i8 0>
-  %2 = bitcast <16 x i8> %and.i to <8 x i16>
-  %vshl_n = shl <8 x i16> %2, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
-  %3 = or <8 x i16> %2, %vshl_n
-  %4 = bitcast <8 x i16> %3 to <4 x i32>
-  %vshl_n8 = shl <4 x i32> %4, <i32 16, i32 16, i32 16, i32 16>
-  %5 = or <4 x i32> %4, %vshl_n8
-  %6 = bitcast <4 x i32> %5 to <16 x i8>
-  %7 = bitcast i8* %dest to <16 x i8>*
-  store <16 x i8> %6, <16 x i8>* %7, align 16
-  ret void
-}
diff --git a/test/CodeGen/ARM64/vclz.ll b/test/CodeGen/ARM64/vclz.ll
deleted file mode 100644
index ddc09ed..0000000
--- a/test/CodeGen/ARM64/vclz.ll
+++ /dev/null
@@ -1,109 +0,0 @@
-; RUN: llc -march=arm64 -arm64-neon-syntax=apple < %s | FileCheck %s
-
-define <8 x i8> @test_vclz_u8(<8 x i8> %a) nounwind readnone ssp {
-  ; CHECK-LABEL: test_vclz_u8:
-  ; CHECK: clz.8b v0, v0
-  ; CHECK-NEXT: ret
-  %vclz.i = tail call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false) nounwind
-  ret <8 x i8> %vclz.i
-}
-
-define <8 x i8> @test_vclz_s8(<8 x i8> %a) nounwind readnone ssp {
-  ; CHECK-LABEL: test_vclz_s8:
-  ; CHECK: clz.8b v0, v0
-  ; CHECK-NEXT: ret
-  %vclz.i = tail call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false) nounwind
-  ret <8 x i8> %vclz.i
-}
-
-define <4 x i16> @test_vclz_u16(<4 x i16> %a) nounwind readnone ssp {
-  ; CHECK-LABEL: test_vclz_u16:
-  ; CHECK: clz.4h v0, v0
-  ; CHECK-NEXT: ret
-  %vclz1.i = tail call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false) nounwind
-  ret <4 x i16> %vclz1.i
-}
-
-define <4 x i16> @test_vclz_s16(<4 x i16> %a) nounwind readnone ssp {
-  ; CHECK-LABEL: test_vclz_s16:
-  ; CHECK: clz.4h v0, v0
-  ; CHECK-NEXT: ret
-  %vclz1.i = tail call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false) nounwind
-  ret <4 x i16> %vclz1.i
-}
-
-define <2 x i32> @test_vclz_u32(<2 x i32> %a) nounwind readnone ssp {
-  ; CHECK-LABEL: test_vclz_u32:
-  ; CHECK: clz.2s v0, v0
-  ; CHECK-NEXT: ret
-  %vclz1.i = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) nounwind
-  ret <2 x i32> %vclz1.i
-}
-
-define <2 x i32> @test_vclz_s32(<2 x i32> %a) nounwind readnone ssp {
-  ; CHECK-LABEL: test_vclz_s32:
-  ; CHECK: clz.2s v0, v0
-  ; CHECK-NEXT: ret
-  %vclz1.i = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) nounwind
-  ret <2 x i32> %vclz1.i
-}
-
-define <16 x i8> @test_vclzq_u8(<16 x i8> %a) nounwind readnone ssp {
-  ; CHECK-LABEL: test_vclzq_u8:
-  ; CHECK: clz.16b v0, v0
-  ; CHECK-NEXT: ret
-  %vclz.i = tail call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) nounwind
-  ret <16 x i8> %vclz.i
-}
-
-define <16 x i8> @test_vclzq_s8(<16 x i8> %a) nounwind readnone ssp {
-  ; CHECK-LABEL: test_vclzq_s8:
-  ; CHECK: clz.16b v0, v0
-  ; CHECK-NEXT: ret
-  %vclz.i = tail call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) nounwind
-  ret <16 x i8> %vclz.i
-}
-
-define <8 x i16> @test_vclzq_u16(<8 x i16> %a) nounwind readnone ssp {
-  ; CHECK-LABEL: test_vclzq_u16:
-  ; CHECK: clz.8h v0, v0
-  ; CHECK-NEXT: ret
-  %vclz1.i = tail call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) nounwind
-  ret <8 x i16> %vclz1.i
-}
-
-define <8 x i16> @test_vclzq_s16(<8 x i16> %a) nounwind readnone ssp {
-  ; CHECK-LABEL: test_vclzq_s16:
-  ; CHECK: clz.8h v0, v0
-  ; CHECK-NEXT: ret
-  %vclz1.i = tail call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) nounwind
-  ret <8 x i16> %vclz1.i
-}
-
-define <4 x i32> @test_vclzq_u32(<4 x i32> %a) nounwind readnone ssp {
-  ; CHECK-LABEL: test_vclzq_u32:
-  ; CHECK: clz.4s v0, v0
-  ; CHECK-NEXT: ret
-  %vclz1.i = tail call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) nounwind
-  ret <4 x i32> %vclz1.i
-}
-
-define <4 x i32> @test_vclzq_s32(<4 x i32> %a) nounwind readnone ssp {
-  ; CHECK-LABEL: test_vclzq_s32:
-  ; CHECK: clz.4s v0, v0
-  ; CHECK-NEXT: ret
-  %vclz1.i = tail call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) nounwind
-  ret <4 x i32> %vclz1.i
-}
-
-declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) nounwind readnone
-
-declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1) nounwind readnone
-
-declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1) nounwind readnone
-
-declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone
-
-declare <4 x i16> @llvm.ctlz.v4i16(<4 x i16>, i1) nounwind readnone
-
-declare <8 x i8> @llvm.ctlz.v8i8(<8 x i8>, i1) nounwind readnone
diff --git a/test/CodeGen/ARM64/vcmp.ll b/test/CodeGen/ARM64/vcmp.ll
deleted file mode 100644
index f9275b8..0000000
--- a/test/CodeGen/ARM64/vcmp.ll
+++ /dev/null
@@ -1,227 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-
-define void @fcmltz_4s(<4 x float> %a, <4 x i16>* %p) nounwind {
-;CHECK-LABEL: fcmltz_4s:
-;CHECK: fcmlt.4s [[REG:v[0-9]+]], v0, #0
-;CHECK-NEXT: xtn.4h v[[REG_1:[0-9]+]], [[REG]]
-;CHECK-NEXT: str d[[REG_1]], [x0]
-;CHECK-NEXT: ret
-  %tmp = fcmp olt <4 x float> %a, zeroinitializer
-  %tmp2 = sext <4 x i1> %tmp to <4 x i16>
-  store <4 x i16> %tmp2, <4 x i16>* %p, align 8
-  ret void
-}
-
-define <2 x i32> @facge_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK-LABEL: facge_2s:
-;CHECK: facge.2s
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.facge.v2i32.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @facge_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
-;CHECK-LABEL: facge_4s:
-;CHECK: facge.4s
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.facge.v4i32.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
-	ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @facge_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
-;CHECK-LABEL: facge_2d:
-;CHECK: facge.2d
-	%tmp1 = load <2 x double>* %A
-	%tmp2 = load <2 x double>* %B
-	%tmp3 = call <2 x i64> @llvm.arm64.neon.facge.v2i64.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
-	ret <2 x i64> %tmp3
-}
-
-declare <2 x i32> @llvm.arm64.neon.facge.v2i32.v2f32(<2 x float>, <2 x float>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.facge.v4i32.v4f32(<4 x float>, <4 x float>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.facge.v2i64.v2f64(<2 x double>, <2 x double>) nounwind readnone
-
-define <2 x i32> @facgt_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK-LABEL: facgt_2s:
-;CHECK: facgt.2s
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.facgt.v2i32.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @facgt_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
-;CHECK-LABEL: facgt_4s:
-;CHECK: facgt.4s
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.facgt.v4i32.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
-	ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @facgt_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
-;CHECK-LABEL: facgt_2d:
-;CHECK: facgt.2d
-	%tmp1 = load <2 x double>* %A
-	%tmp2 = load <2 x double>* %B
-	%tmp3 = call <2 x i64> @llvm.arm64.neon.facgt.v2i64.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
-	ret <2 x i64> %tmp3
-}
-
-declare <2 x i32> @llvm.arm64.neon.facgt.v2i32.v2f32(<2 x float>, <2 x float>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.facgt.v4i32.v4f32(<4 x float>, <4 x float>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.facgt.v2i64.v2f64(<2 x double>, <2 x double>) nounwind readnone
-
-define i32 @facge_s(float %A, float %B) nounwind {
-; CHECK-LABEL: facge_s:
-; CHECK: facge {{s[0-9]+}}, s0, s1
-  %mask = call i32 @llvm.arm64.neon.facge.i32.f32(float %A, float %B)
-  ret i32 %mask
-}
-
-define i64 @facge_d(double %A, double %B) nounwind {
-; CHECK-LABEL: facge_d:
-; CHECK: facge {{d[0-9]+}}, d0, d1
-  %mask = call i64 @llvm.arm64.neon.facge.i64.f64(double %A, double %B)
-  ret i64 %mask
-}
-
-declare i64 @llvm.arm64.neon.facge.i64.f64(double, double)
-declare i32 @llvm.arm64.neon.facge.i32.f32(float, float)
-
-define i32 @facgt_s(float %A, float %B) nounwind {
-; CHECK-LABEL: facgt_s:
-; CHECK: facgt {{s[0-9]+}}, s0, s1
-  %mask = call i32 @llvm.arm64.neon.facgt.i32.f32(float %A, float %B)
-  ret i32 %mask
-}
-
-define i64 @facgt_d(double %A, double %B) nounwind {
-; CHECK-LABEL: facgt_d:
-; CHECK: facgt {{d[0-9]+}}, d0, d1
-  %mask = call i64 @llvm.arm64.neon.facgt.i64.f64(double %A, double %B)
-  ret i64 %mask
-}
-
-declare i64 @llvm.arm64.neon.facgt.i64.f64(double, double)
-declare i32 @llvm.arm64.neon.facgt.i32.f32(float, float)
-
-define <8 x i8> @cmtst_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: cmtst_8b:
-;CHECK: cmtst.8b
-  %tmp1 = load <8 x i8>* %A
-  %tmp2 = load <8 x i8>* %B
-  %commonbits = and <8 x i8> %tmp1, %tmp2
-  %mask = icmp ne <8 x i8> %commonbits, zeroinitializer
-  %res = sext <8 x i1> %mask to <8 x i8>
-  ret <8 x i8> %res
-}
-
-define <16 x i8> @cmtst_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: cmtst_16b:
-;CHECK: cmtst.16b
-  %tmp1 = load <16 x i8>* %A
-  %tmp2 = load <16 x i8>* %B
-  %commonbits = and <16 x i8> %tmp1, %tmp2
-  %mask = icmp ne <16 x i8> %commonbits, zeroinitializer
-  %res = sext <16 x i1> %mask to <16 x i8>
-  ret <16 x i8> %res
-}
-
-define <4 x i16> @cmtst_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: cmtst_4h:
-;CHECK: cmtst.4h
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
-  %commonbits = and <4 x i16> %tmp1, %tmp2
-  %mask = icmp ne <4 x i16> %commonbits, zeroinitializer
-  %res = sext <4 x i1> %mask to <4 x i16>
-  ret <4 x i16> %res
-}
-
-define <8 x i16> @cmtst_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: cmtst_8h:
-;CHECK: cmtst.8h
-  %tmp1 = load <8 x i16>* %A
-  %tmp2 = load <8 x i16>* %B
-  %commonbits = and <8 x i16> %tmp1, %tmp2
-  %mask = icmp ne <8 x i16> %commonbits, zeroinitializer
-  %res = sext <8 x i1> %mask to <8 x i16>
-  ret <8 x i16> %res
-}
-
-define <2 x i32> @cmtst_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: cmtst_2s:
-;CHECK: cmtst.2s
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
-  %commonbits = and <2 x i32> %tmp1, %tmp2
-  %mask = icmp ne <2 x i32> %commonbits, zeroinitializer
-  %res = sext <2 x i1> %mask to <2 x i32>
-  ret <2 x i32> %res
-}
-
-define <4 x i32> @cmtst_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: cmtst_4s:
-;CHECK: cmtst.4s
-  %tmp1 = load <4 x i32>* %A
-  %tmp2 = load <4 x i32>* %B
-  %commonbits = and <4 x i32> %tmp1, %tmp2
-  %mask = icmp ne <4 x i32> %commonbits, zeroinitializer
-  %res = sext <4 x i1> %mask to <4 x i32>
-  ret <4 x i32> %res
-}
-
-define <2 x i64> @cmtst_2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK-LABEL: cmtst_2d:
-;CHECK: cmtst.2d
-  %tmp1 = load <2 x i64>* %A
-  %tmp2 = load <2 x i64>* %B
-  %commonbits = and <2 x i64> %tmp1, %tmp2
-  %mask = icmp ne <2 x i64> %commonbits, zeroinitializer
-  %res = sext <2 x i1> %mask to <2 x i64>
-  ret <2 x i64> %res
-}
-
-define <1 x i64> @fcmeq_d(<1 x double> %A, <1 x double> %B) nounwind {
-; CHECK-LABEL: fcmeq_d:
-; CHECK: fcmeq {{d[0-9]+}}, d0, d1
-  %tst = fcmp oeq <1 x double> %A, %B
-  %mask = sext <1 x i1> %tst to <1 x i64>
-  ret <1 x i64> %mask
-}
-
-define <1 x i64> @fcmge_d(<1 x double> %A, <1 x double> %B) nounwind {
-; CHECK-LABEL: fcmge_d:
-; CHECK: fcmge {{d[0-9]+}}, d0, d1
-  %tst = fcmp oge <1 x double> %A, %B
-  %mask = sext <1 x i1> %tst to <1 x i64>
-  ret <1 x i64> %mask
-}
-
-define <1 x i64> @fcmle_d(<1 x double> %A, <1 x double> %B) nounwind {
-; CHECK-LABEL: fcmle_d:
-; CHECK: fcmge {{d[0-9]+}}, d1, d0
-  %tst = fcmp ole <1 x double> %A, %B
-  %mask = sext <1 x i1> %tst to <1 x i64>
-  ret <1 x i64> %mask
-}
-
-define <1 x i64> @fcmgt_d(<1 x double> %A, <1 x double> %B) nounwind {
-; CHECK-LABEL: fcmgt_d:
-; CHECK: fcmgt {{d[0-9]+}}, d0, d1
-  %tst = fcmp ogt <1 x double> %A, %B
-  %mask = sext <1 x i1> %tst to <1 x i64>
-  ret <1 x i64> %mask
-}
-
-define <1 x i64> @fcmlt_d(<1 x double> %A, <1 x double> %B) nounwind {
-; CHECK-LABEL: fcmlt_d:
-; CHECK: fcmgt {{d[0-9]+}}, d1, d0
-  %tst = fcmp olt <1 x double> %A, %B
-  %mask = sext <1 x i1> %tst to <1 x i64>
-  ret <1 x i64> %mask
-}
diff --git a/test/CodeGen/ARM64/vcnt.ll b/test/CodeGen/ARM64/vcnt.ll
deleted file mode 100644
index e00658a..0000000
--- a/test/CodeGen/ARM64/vcnt.ll
+++ /dev/null
@@ -1,56 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-define <8 x i8> @cls_8b(<8 x i8>* %A) nounwind {
-;CHECK-LABEL: cls_8b:
-;CHECK: cls.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp3 = call <8 x i8> @llvm.arm64.neon.cls.v8i8(<8 x i8> %tmp1)
-	ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @cls_16b(<16 x i8>* %A) nounwind {
-;CHECK-LABEL: cls_16b:
-;CHECK: cls.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp3 = call <16 x i8> @llvm.arm64.neon.cls.v16i8(<16 x i8> %tmp1)
-	ret <16 x i8> %tmp3
-}
-
-define <4 x i16> @cls_4h(<4 x i16>* %A) nounwind {
-;CHECK-LABEL: cls_4h:
-;CHECK: cls.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp3 = call <4 x i16> @llvm.arm64.neon.cls.v4i16(<4 x i16> %tmp1)
-	ret <4 x i16> %tmp3
-}
-
-define <8 x i16> @cls_8h(<8 x i16>* %A) nounwind {
-;CHECK-LABEL: cls_8h:
-;CHECK: cls.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp3 = call <8 x i16> @llvm.arm64.neon.cls.v8i16(<8 x i16> %tmp1)
-	ret <8 x i16> %tmp3
-}
-
-define <2 x i32> @cls_2s(<2 x i32>* %A) nounwind {
-;CHECK-LABEL: cls_2s:
-;CHECK: cls.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.cls.v2i32(<2 x i32> %tmp1)
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @cls_4s(<4 x i32>* %A) nounwind {
-;CHECK-LABEL: cls_4s:
-;CHECK: cls.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.cls.v4i32(<4 x i32> %tmp1)
-	ret <4 x i32> %tmp3
-}
-
-declare <8 x i8> @llvm.arm64.neon.cls.v8i8(<8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm64.neon.cls.v16i8(<16 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.cls.v4i16(<4 x i16>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.cls.v8i16(<8 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.cls.v2i32(<2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.cls.v4i32(<4 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM64/vcombine.ll b/test/CodeGen/ARM64/vcombine.ll
deleted file mode 100644
index 16f591e..0000000
--- a/test/CodeGen/ARM64/vcombine.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-; LowerCONCAT_VECTORS() was reversing the order of two parts.
-; rdar://11558157
-; rdar://11559553
-define <16 x i8> @test(<16 x i8> %q0, <16 x i8> %q1, i8* nocapture %dest) nounwind {
-entry:
-; CHECK-LABEL: test:
-; CHECK: ins.d v0[1], v1[0]
-  %0 = bitcast <16 x i8> %q0 to <2 x i64>
-  %shuffle.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> zeroinitializer
-  %1 = bitcast <16 x i8> %q1 to <2 x i64>
-  %shuffle.i4 = shufflevector <2 x i64> %1, <2 x i64> undef, <1 x i32> zeroinitializer
-  %shuffle.i3 = shufflevector <1 x i64> %shuffle.i, <1 x i64> %shuffle.i4, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i3 to <16 x i8>
-  ret <16 x i8> %2
-}
diff --git a/test/CodeGen/ARM64/vcvt.ll b/test/CodeGen/ARM64/vcvt.ll
deleted file mode 100644
index 19bb8cb..0000000
--- a/test/CodeGen/ARM64/vcvt.ll
+++ /dev/null
@@ -1,686 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-define <2 x i32> @fcvtas_2s(<2 x float> %A) nounwind {
-;CHECK-LABEL: fcvtas_2s:
-;CHECK-NOT: ld1
-;CHECK: fcvtas.2s v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.fcvtas.v2i32.v2f32(<2 x float> %A)
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @fcvtas_4s(<4 x float> %A) nounwind {
-;CHECK-LABEL: fcvtas_4s:
-;CHECK-NOT: ld1
-;CHECK: fcvtas.4s v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.fcvtas.v4i32.v4f32(<4 x float> %A)
-	ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @fcvtas_2d(<2 x double> %A) nounwind {
-;CHECK-LABEL: fcvtas_2d:
-;CHECK-NOT: ld1
-;CHECK: fcvtas.2d v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = call <2 x i64> @llvm.arm64.neon.fcvtas.v2i64.v2f64(<2 x double> %A)
-	ret <2 x i64> %tmp3
-}
-
-declare <2 x i32> @llvm.arm64.neon.fcvtas.v2i32.v2f32(<2 x float>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.fcvtas.v4i32.v4f32(<4 x float>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.fcvtas.v2i64.v2f64(<2 x double>) nounwind readnone
-
-define <2 x i32> @fcvtau_2s(<2 x float> %A) nounwind {
-;CHECK-LABEL: fcvtau_2s:
-;CHECK-NOT: ld1
-;CHECK: fcvtau.2s v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.fcvtau.v2i32.v2f32(<2 x float> %A)
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @fcvtau_4s(<4 x float> %A) nounwind {
-;CHECK-LABEL: fcvtau_4s:
-;CHECK-NOT: ld1
-;CHECK: fcvtau.4s v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.fcvtau.v4i32.v4f32(<4 x float> %A)
-	ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @fcvtau_2d(<2 x double> %A) nounwind {
-;CHECK-LABEL: fcvtau_2d:
-;CHECK-NOT: ld1
-;CHECK: fcvtau.2d v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = call <2 x i64> @llvm.arm64.neon.fcvtau.v2i64.v2f64(<2 x double> %A)
-	ret <2 x i64> %tmp3
-}
-
-declare <2 x i32> @llvm.arm64.neon.fcvtau.v2i32.v2f32(<2 x float>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.fcvtau.v4i32.v4f32(<4 x float>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.fcvtau.v2i64.v2f64(<2 x double>) nounwind readnone
-
-define <2 x i32> @fcvtms_2s(<2 x float> %A) nounwind {
-;CHECK-LABEL: fcvtms_2s:
-;CHECK-NOT: ld1
-;CHECK: fcvtms.2s v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.fcvtms.v2i32.v2f32(<2 x float> %A)
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @fcvtms_4s(<4 x float> %A) nounwind {
-;CHECK-LABEL: fcvtms_4s:
-;CHECK-NOT: ld1
-;CHECK: fcvtms.4s v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.fcvtms.v4i32.v4f32(<4 x float> %A)
-	ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @fcvtms_2d(<2 x double> %A) nounwind {
-;CHECK-LABEL: fcvtms_2d:
-;CHECK-NOT: ld1
-;CHECK: fcvtms.2d v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = call <2 x i64> @llvm.arm64.neon.fcvtms.v2i64.v2f64(<2 x double> %A)
-	ret <2 x i64> %tmp3
-}
-
-declare <2 x i32> @llvm.arm64.neon.fcvtms.v2i32.v2f32(<2 x float>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.fcvtms.v4i32.v4f32(<4 x float>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.fcvtms.v2i64.v2f64(<2 x double>) nounwind readnone
-
-define <2 x i32> @fcvtmu_2s(<2 x float> %A) nounwind {
-;CHECK-LABEL: fcvtmu_2s:
-;CHECK-NOT: ld1
-;CHECK: fcvtmu.2s v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.fcvtmu.v2i32.v2f32(<2 x float> %A)
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @fcvtmu_4s(<4 x float> %A) nounwind {
-;CHECK-LABEL: fcvtmu_4s:
-;CHECK-NOT: ld1
-;CHECK: fcvtmu.4s v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.fcvtmu.v4i32.v4f32(<4 x float> %A)
-	ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @fcvtmu_2d(<2 x double> %A) nounwind {
-;CHECK-LABEL: fcvtmu_2d:
-;CHECK-NOT: ld1
-;CHECK: fcvtmu.2d v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = call <2 x i64> @llvm.arm64.neon.fcvtmu.v2i64.v2f64(<2 x double> %A)
-	ret <2 x i64> %tmp3
-}
-
-declare <2 x i32> @llvm.arm64.neon.fcvtmu.v2i32.v2f32(<2 x float>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.fcvtmu.v4i32.v4f32(<4 x float>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.fcvtmu.v2i64.v2f64(<2 x double>) nounwind readnone
-
-define <2 x i32> @fcvtps_2s(<2 x float> %A) nounwind {
-;CHECK-LABEL: fcvtps_2s:
-;CHECK-NOT: ld1
-;CHECK: fcvtps.2s v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.fcvtps.v2i32.v2f32(<2 x float> %A)
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @fcvtps_4s(<4 x float> %A) nounwind {
-;CHECK-LABEL: fcvtps_4s:
-;CHECK-NOT: ld1
-;CHECK: fcvtps.4s v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.fcvtps.v4i32.v4f32(<4 x float> %A)
-	ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @fcvtps_2d(<2 x double> %A) nounwind {
-;CHECK-LABEL: fcvtps_2d:
-;CHECK-NOT: ld1
-;CHECK: fcvtps.2d v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = call <2 x i64> @llvm.arm64.neon.fcvtps.v2i64.v2f64(<2 x double> %A)
-	ret <2 x i64> %tmp3
-}
-
-declare <2 x i32> @llvm.arm64.neon.fcvtps.v2i32.v2f32(<2 x float>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.fcvtps.v4i32.v4f32(<4 x float>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.fcvtps.v2i64.v2f64(<2 x double>) nounwind readnone
-
-define <2 x i32> @fcvtpu_2s(<2 x float> %A) nounwind {
-;CHECK-LABEL: fcvtpu_2s:
-;CHECK-NOT: ld1
-;CHECK: fcvtpu.2s v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.fcvtpu.v2i32.v2f32(<2 x float> %A)
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @fcvtpu_4s(<4 x float> %A) nounwind {
-;CHECK-LABEL: fcvtpu_4s:
-;CHECK-NOT: ld1
-;CHECK: fcvtpu.4s v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.fcvtpu.v4i32.v4f32(<4 x float> %A)
-	ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @fcvtpu_2d(<2 x double> %A) nounwind {
-;CHECK-LABEL: fcvtpu_2d:
-;CHECK-NOT: ld1
-;CHECK: fcvtpu.2d v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = call <2 x i64> @llvm.arm64.neon.fcvtpu.v2i64.v2f64(<2 x double> %A)
-	ret <2 x i64> %tmp3
-}
-
-declare <2 x i32> @llvm.arm64.neon.fcvtpu.v2i32.v2f32(<2 x float>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.fcvtpu.v4i32.v4f32(<4 x float>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.fcvtpu.v2i64.v2f64(<2 x double>) nounwind readnone
-
-define <2 x i32> @fcvtns_2s(<2 x float> %A) nounwind {
-;CHECK-LABEL: fcvtns_2s:
-;CHECK-NOT: ld1
-;CHECK: fcvtns.2s v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.fcvtns.v2i32.v2f32(<2 x float> %A)
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @fcvtns_4s(<4 x float> %A) nounwind {
-;CHECK-LABEL: fcvtns_4s:
-;CHECK-NOT: ld1
-;CHECK: fcvtns.4s v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.fcvtns.v4i32.v4f32(<4 x float> %A)
-	ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @fcvtns_2d(<2 x double> %A) nounwind {
-;CHECK-LABEL: fcvtns_2d:
-;CHECK-NOT: ld1
-;CHECK: fcvtns.2d v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = call <2 x i64> @llvm.arm64.neon.fcvtns.v2i64.v2f64(<2 x double> %A)
-	ret <2 x i64> %tmp3
-}
-
-declare <2 x i32> @llvm.arm64.neon.fcvtns.v2i32.v2f32(<2 x float>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.fcvtns.v4i32.v4f32(<4 x float>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.fcvtns.v2i64.v2f64(<2 x double>) nounwind readnone
-
-define <2 x i32> @fcvtnu_2s(<2 x float> %A) nounwind {
-;CHECK-LABEL: fcvtnu_2s:
-;CHECK-NOT: ld1
-;CHECK: fcvtnu.2s v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.fcvtnu.v2i32.v2f32(<2 x float> %A)
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @fcvtnu_4s(<4 x float> %A) nounwind {
-;CHECK-LABEL: fcvtnu_4s:
-;CHECK-NOT: ld1
-;CHECK: fcvtnu.4s v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.fcvtnu.v4i32.v4f32(<4 x float> %A)
-	ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @fcvtnu_2d(<2 x double> %A) nounwind {
-;CHECK-LABEL: fcvtnu_2d:
-;CHECK-NOT: ld1
-;CHECK: fcvtnu.2d v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = call <2 x i64> @llvm.arm64.neon.fcvtnu.v2i64.v2f64(<2 x double> %A)
-	ret <2 x i64> %tmp3
-}
-
-declare <2 x i32> @llvm.arm64.neon.fcvtnu.v2i32.v2f32(<2 x float>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.fcvtnu.v4i32.v4f32(<4 x float>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.fcvtnu.v2i64.v2f64(<2 x double>) nounwind readnone
-
-define <2 x i32> @fcvtzs_2s(<2 x float> %A) nounwind {
-;CHECK-LABEL: fcvtzs_2s:
-;CHECK-NOT: ld1
-;CHECK: fcvtzs.2s v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = fptosi <2 x float> %A to <2 x i32>
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @fcvtzs_4s(<4 x float> %A) nounwind {
-;CHECK-LABEL: fcvtzs_4s:
-;CHECK-NOT: ld1
-;CHECK: fcvtzs.4s v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = fptosi <4 x float> %A to <4 x i32>
-	ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @fcvtzs_2d(<2 x double> %A) nounwind {
-;CHECK-LABEL: fcvtzs_2d:
-;CHECK-NOT: ld1
-;CHECK: fcvtzs.2d v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = fptosi <2 x double> %A to <2 x i64>
-	ret <2 x i64> %tmp3
-}
-
-
-define <2 x i32> @fcvtzu_2s(<2 x float> %A) nounwind {
-;CHECK-LABEL: fcvtzu_2s:
-;CHECK-NOT: ld1
-;CHECK: fcvtzu.2s v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = fptoui <2 x float> %A to <2 x i32>
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @fcvtzu_4s(<4 x float> %A) nounwind {
-;CHECK-LABEL: fcvtzu_4s:
-;CHECK-NOT: ld1
-;CHECK: fcvtzu.4s v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = fptoui <4 x float> %A to <4 x i32>
-	ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @fcvtzu_2d(<2 x double> %A) nounwind {
-;CHECK-LABEL: fcvtzu_2d:
-;CHECK-NOT: ld1
-;CHECK: fcvtzu.2d v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = fptoui <2 x double> %A to <2 x i64>
-	ret <2 x i64> %tmp3
-}
-
-define <2 x float> @frinta_2s(<2 x float> %A) nounwind {
-;CHECK-LABEL: frinta_2s:
-;CHECK-NOT: ld1
-;CHECK: frinta.2s v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = call <2 x float> @llvm.round.v2f32(<2 x float> %A)
-	ret <2 x float> %tmp3
-}
-
-define <4 x float> @frinta_4s(<4 x float> %A) nounwind {
-;CHECK-LABEL: frinta_4s:
-;CHECK-NOT: ld1
-;CHECK: frinta.4s v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = call <4 x float> @llvm.round.v4f32(<4 x float> %A)
-	ret <4 x float> %tmp3
-}
-
-define <2 x double> @frinta_2d(<2 x double> %A) nounwind {
-;CHECK-LABEL: frinta_2d:
-;CHECK-NOT: ld1
-;CHECK: frinta.2d v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = call <2 x double> @llvm.round.v2f64(<2 x double> %A)
-	ret <2 x double> %tmp3
-}
-
-declare <2 x float> @llvm.round.v2f32(<2 x float>) nounwind readnone
-declare <4 x float> @llvm.round.v4f32(<4 x float>) nounwind readnone
-declare <2 x double> @llvm.round.v2f64(<2 x double>) nounwind readnone
-
-define <2 x float> @frinti_2s(<2 x float> %A) nounwind {
-;CHECK-LABEL: frinti_2s:
-;CHECK-NOT: ld1
-;CHECK: frinti.2s v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = call <2 x float> @llvm.nearbyint.v2f32(<2 x float> %A)
-	ret <2 x float> %tmp3
-}
-
-define <4 x float> @frinti_4s(<4 x float> %A) nounwind {
-;CHECK-LABEL: frinti_4s:
-;CHECK-NOT: ld1
-;CHECK: frinti.4s v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %A)
-	ret <4 x float> %tmp3
-}
-
-define <2 x double> @frinti_2d(<2 x double> %A) nounwind {
-;CHECK-LABEL: frinti_2d:
-;CHECK-NOT: ld1
-;CHECK: frinti.2d v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %A)
-	ret <2 x double> %tmp3
-}
-
-declare <2 x float> @llvm.nearbyint.v2f32(<2 x float>) nounwind readnone
-declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) nounwind readnone
-declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) nounwind readnone
-
-define <2 x float> @frintm_2s(<2 x float> %A) nounwind {
-;CHECK-LABEL: frintm_2s:
-;CHECK-NOT: ld1
-;CHECK: frintm.2s v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = call <2 x float> @llvm.floor.v2f32(<2 x float> %A)
-	ret <2 x float> %tmp3
-}
-
-define <4 x float> @frintm_4s(<4 x float> %A) nounwind {
-;CHECK-LABEL: frintm_4s:
-;CHECK-NOT: ld1
-;CHECK: frintm.4s v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = call <4 x float> @llvm.floor.v4f32(<4 x float> %A)
-	ret <4 x float> %tmp3
-}
-
-define <2 x double> @frintm_2d(<2 x double> %A) nounwind {
-;CHECK-LABEL: frintm_2d:
-;CHECK-NOT: ld1
-;CHECK: frintm.2d v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = call <2 x double> @llvm.floor.v2f64(<2 x double> %A)
-	ret <2 x double> %tmp3
-}
-
-declare <2 x float> @llvm.floor.v2f32(<2 x float>) nounwind readnone
-declare <4 x float> @llvm.floor.v4f32(<4 x float>) nounwind readnone
-declare <2 x double> @llvm.floor.v2f64(<2 x double>) nounwind readnone
-
-define <2 x float> @frintn_2s(<2 x float> %A) nounwind {
-;CHECK-LABEL: frintn_2s:
-;CHECK-NOT: ld1
-;CHECK: frintn.2s v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = call <2 x float> @llvm.arm64.neon.frintn.v2f32(<2 x float> %A)
-	ret <2 x float> %tmp3
-}
-
-define <4 x float> @frintn_4s(<4 x float> %A) nounwind {
-;CHECK-LABEL: frintn_4s:
-;CHECK-NOT: ld1
-;CHECK: frintn.4s v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = call <4 x float> @llvm.arm64.neon.frintn.v4f32(<4 x float> %A)
-	ret <4 x float> %tmp3
-}
-
-define <2 x double> @frintn_2d(<2 x double> %A) nounwind {
-;CHECK-LABEL: frintn_2d:
-;CHECK-NOT: ld1
-;CHECK: frintn.2d v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = call <2 x double> @llvm.arm64.neon.frintn.v2f64(<2 x double> %A)
-	ret <2 x double> %tmp3
-}
-
-declare <2 x float> @llvm.arm64.neon.frintn.v2f32(<2 x float>) nounwind readnone
-declare <4 x float> @llvm.arm64.neon.frintn.v4f32(<4 x float>) nounwind readnone
-declare <2 x double> @llvm.arm64.neon.frintn.v2f64(<2 x double>) nounwind readnone
-
-define <2 x float> @frintp_2s(<2 x float> %A) nounwind {
-;CHECK-LABEL: frintp_2s:
-;CHECK-NOT: ld1
-;CHECK: frintp.2s v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = call <2 x float> @llvm.ceil.v2f32(<2 x float> %A)
-	ret <2 x float> %tmp3
-}
-
-define <4 x float> @frintp_4s(<4 x float> %A) nounwind {
-;CHECK-LABEL: frintp_4s:
-;CHECK-NOT: ld1
-;CHECK: frintp.4s v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %A)
-	ret <4 x float> %tmp3
-}
-
-define <2 x double> @frintp_2d(<2 x double> %A) nounwind {
-;CHECK-LABEL: frintp_2d:
-;CHECK-NOT: ld1
-;CHECK: frintp.2d v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = call <2 x double> @llvm.ceil.v2f64(<2 x double> %A)
-	ret <2 x double> %tmp3
-}
-
-declare <2 x float> @llvm.ceil.v2f32(<2 x float>) nounwind readnone
-declare <4 x float> @llvm.ceil.v4f32(<4 x float>) nounwind readnone
-declare <2 x double> @llvm.ceil.v2f64(<2 x double>) nounwind readnone
-
-define <2 x float> @frintx_2s(<2 x float> %A) nounwind {
-;CHECK-LABEL: frintx_2s:
-;CHECK-NOT: ld1
-;CHECK: frintx.2s v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = call <2 x float> @llvm.rint.v2f32(<2 x float> %A)
-	ret <2 x float> %tmp3
-}
-
-define <4 x float> @frintx_4s(<4 x float> %A) nounwind {
-;CHECK-LABEL: frintx_4s:
-;CHECK-NOT: ld1
-;CHECK: frintx.4s v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = call <4 x float> @llvm.rint.v4f32(<4 x float> %A)
-	ret <4 x float> %tmp3
-}
-
-define <2 x double> @frintx_2d(<2 x double> %A) nounwind {
-;CHECK-LABEL: frintx_2d:
-;CHECK-NOT: ld1
-;CHECK: frintx.2d v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = call <2 x double> @llvm.rint.v2f64(<2 x double> %A)
-	ret <2 x double> %tmp3
-}
-
-declare <2 x float> @llvm.rint.v2f32(<2 x float>) nounwind readnone
-declare <4 x float> @llvm.rint.v4f32(<4 x float>) nounwind readnone
-declare <2 x double> @llvm.rint.v2f64(<2 x double>) nounwind readnone
-
-define <2 x float> @frintz_2s(<2 x float> %A) nounwind {
-;CHECK-LABEL: frintz_2s:
-;CHECK-NOT: ld1
-;CHECK: frintz.2s v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = call <2 x float> @llvm.trunc.v2f32(<2 x float> %A)
-	ret <2 x float> %tmp3
-}
-
-define <4 x float> @frintz_4s(<4 x float> %A) nounwind {
-;CHECK-LABEL: frintz_4s:
-;CHECK-NOT: ld1
-;CHECK: frintz.4s v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = call <4 x float> @llvm.trunc.v4f32(<4 x float> %A)
-	ret <4 x float> %tmp3
-}
-
-define <2 x double> @frintz_2d(<2 x double> %A) nounwind {
-;CHECK-LABEL: frintz_2d:
-;CHECK-NOT: ld1
-;CHECK: frintz.2d v0, v0
-;CHECK-NEXT: ret
-	%tmp3 = call <2 x double> @llvm.trunc.v2f64(<2 x double> %A)
-	ret <2 x double> %tmp3
-}
-
-declare <2 x float> @llvm.trunc.v2f32(<2 x float>) nounwind readnone
-declare <4 x float> @llvm.trunc.v4f32(<4 x float>) nounwind readnone
-declare <2 x double> @llvm.trunc.v2f64(<2 x double>) nounwind readnone
-
-define <2 x float> @fcvtxn_2s(<2 x double> %A) nounwind {
-;CHECK-LABEL: fcvtxn_2s:
-;CHECK-NOT: ld1
-;CHECK: fcvtxn v0.2s, v0.2d
-;CHECK-NEXT: ret
-	%tmp3 = call <2 x float> @llvm.arm64.neon.fcvtxn.v2f32.v2f64(<2 x double> %A)
-	ret <2 x float> %tmp3
-}
-
-define <4 x float> @fcvtxn_4s(<2 x float> %ret, <2 x double> %A) nounwind {
-;CHECK-LABEL: fcvtxn_4s:
-;CHECK-NOT: ld1
-;CHECK: fcvtxn2 v0.4s, v1.2d
-;CHECK-NEXT: ret
-	%tmp3 = call <2 x float> @llvm.arm64.neon.fcvtxn.v2f32.v2f64(<2 x double> %A)
-        %res = shufflevector <2 x float> %ret, <2 x float> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-	ret <4 x float> %res
-}
-
-declare <2 x float> @llvm.arm64.neon.fcvtxn.v2f32.v2f64(<2 x double>) nounwind readnone
-
-define <2 x i32> @fcvtzsc_2s(<2 x float> %A) nounwind {
-;CHECK-LABEL: fcvtzsc_2s:
-;CHECK-NOT: ld1
-;CHECK: fcvtzs.2s v0, v0, #1
-;CHECK-NEXT: ret
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> %A, i32 1)
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @fcvtzsc_4s(<4 x float> %A) nounwind {
-;CHECK-LABEL: fcvtzsc_4s:
-;CHECK-NOT: ld1
-;CHECK: fcvtzs.4s v0, v0, #1
-;CHECK-NEXT: ret
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> %A, i32 1)
-	ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @fcvtzsc_2d(<2 x double> %A) nounwind {
-;CHECK-LABEL: fcvtzsc_2d:
-;CHECK-NOT: ld1
-;CHECK: fcvtzs.2d v0, v0, #1
-;CHECK-NEXT: ret
-	%tmp3 = call <2 x i64> @llvm.arm64.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double> %A, i32 1)
-	ret <2 x i64> %tmp3
-}
-
-declare <2 x i32> @llvm.arm64.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float>, i32) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float>, i32) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double>, i32) nounwind readnone
-
-define <2 x i32> @fcvtzuc_2s(<2 x float> %A) nounwind {
-;CHECK-LABEL: fcvtzuc_2s:
-;CHECK-NOT: ld1
-;CHECK: fcvtzu.2s v0, v0, #1
-;CHECK-NEXT: ret
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> %A, i32 1)
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @fcvtzuc_4s(<4 x float> %A) nounwind {
-;CHECK-LABEL: fcvtzuc_4s:
-;CHECK-NOT: ld1
-;CHECK: fcvtzu.4s v0, v0, #1
-;CHECK-NEXT: ret
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> %A, i32 1)
-	ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @fcvtzuc_2d(<2 x double> %A) nounwind {
-;CHECK-LABEL: fcvtzuc_2d:
-;CHECK-NOT: ld1
-;CHECK: fcvtzu.2d v0, v0, #1
-;CHECK-NEXT: ret
-	%tmp3 = call <2 x i64> @llvm.arm64.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double> %A, i32 1)
-	ret <2 x i64> %tmp3
-}
-
-declare <2 x i32> @llvm.arm64.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float>, i32) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double>, i32) nounwind readnone
-
-define <2 x float> @scvtf_2sc(<2 x i32> %A) nounwind {
-;CHECK-LABEL: scvtf_2sc:
-;CHECK-NOT: ld1
-;CHECK: scvtf.2s v0, v0, #1
-;CHECK-NEXT: ret
-	%tmp3 = call <2 x float> @llvm.arm64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> %A, i32 1)
-	ret <2 x float> %tmp3
-}
-
-define <4 x float> @scvtf_4sc(<4 x i32> %A) nounwind {
-;CHECK-LABEL: scvtf_4sc:
-;CHECK-NOT: ld1
-;CHECK: scvtf.4s v0, v0, #1
-;CHECK-NEXT: ret
-	%tmp3 = call <4 x float> @llvm.arm64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> %A, i32 1)
-	ret <4 x float> %tmp3
-}
-
-define <2 x double> @scvtf_2dc(<2 x i64> %A) nounwind {
-;CHECK-LABEL: scvtf_2dc:
-;CHECK-NOT: ld1
-;CHECK: scvtf.2d v0, v0, #1
-;CHECK-NEXT: ret
-	%tmp3 = call <2 x double> @llvm.arm64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64> %A, i32 1)
-	ret <2 x double> %tmp3
-}
-
-declare <2 x float> @llvm.arm64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone
-declare <4 x float> @llvm.arm64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone
-declare <2 x double> @llvm.arm64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64>, i32) nounwind readnone
-
-define <2 x float> @ucvtf_2sc(<2 x i32> %A) nounwind {
-;CHECK-LABEL: ucvtf_2sc:
-;CHECK-NOT: ld1
-;CHECK: ucvtf.2s v0, v0, #1
-;CHECK-NEXT: ret
-	%tmp3 = call <2 x float> @llvm.arm64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> %A, i32 1)
-	ret <2 x float> %tmp3
-}
-
-define <4 x float> @ucvtf_4sc(<4 x i32> %A) nounwind {
-;CHECK-LABEL: ucvtf_4sc:
-;CHECK-NOT: ld1
-;CHECK: ucvtf.4s v0, v0, #1
-;CHECK-NEXT: ret
-	%tmp3 = call <4 x float> @llvm.arm64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> %A, i32 1)
-	ret <4 x float> %tmp3
-}
-
-define <2 x double> @ucvtf_2dc(<2 x i64> %A) nounwind {
-;CHECK-LABEL: ucvtf_2dc:
-;CHECK-NOT: ld1
-;CHECK: ucvtf.2d v0, v0, #1
-;CHECK-NEXT: ret
-	%tmp3 = call <2 x double> @llvm.arm64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64> %A, i32 1)
-	ret <2 x double> %tmp3
-}
-
-
-;CHECK-LABEL: autogen_SD28458:
-;CHECK: fcvt
-;CHECK: ret
-define void @autogen_SD28458() {
-  %Tr53 = fptrunc <8 x double> undef to <8 x float>
-  store <8 x float> %Tr53, <8 x float>* undef
-  ret void
-}
-
-;CHECK-LABEL: autogen_SD19225:
-;CHECK: fcvt
-;CHECK: ret
-define void @autogen_SD19225() {
-  %A = load <8 x float>* undef
-  %Tr53 = fpext <8 x float> %A to <8 x double>
-  store <8 x double> %Tr53, <8 x double>* undef
-  ret void
-}
-
-declare <2 x float> @llvm.arm64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone
-declare <4 x float> @llvm.arm64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone
-declare <2 x double> @llvm.arm64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64>, i32) nounwind readnone
diff --git a/test/CodeGen/ARM64/vcvt_f.ll b/test/CodeGen/ARM64/vcvt_f.ll
deleted file mode 100644
index d67aa3b..0000000
--- a/test/CodeGen/ARM64/vcvt_f.ll
+++ /dev/null
@@ -1,82 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-; RUN: llc < %s -O0 -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-define <2 x double> @test_vcvt_f64_f32(<2 x float> %x) nounwind readnone ssp {
-; CHECK-LABEL: test_vcvt_f64_f32:
-  %vcvt1.i = fpext <2 x float> %x to <2 x double>
-; CHECK: fcvtl	v0.2d, v0.2s
-  ret <2 x double> %vcvt1.i
-; CHECK: ret
-}
-
-define <2 x double> @test_vcvt_high_f64_f32(<4 x float> %x) nounwind readnone ssp {
-; CHECK-LABEL: test_vcvt_high_f64_f32:
-  %cvt_in = shufflevector <4 x float> %x, <4 x float> undef, <2 x i32> <i32 2, i32 3>
-  %vcvt1.i = fpext <2 x float> %cvt_in to <2 x double>
-; CHECK: fcvtl2	v0.2d, v0.4s
-  ret <2 x double> %vcvt1.i
-; CHECK: ret
-}
-
-define <2 x float> @test_vcvt_f32_f64(<2 x double> %v) nounwind readnone ssp {
-; CHECK-LABEL: test_vcvt_f32_f64:
-  %vcvt1.i = fptrunc <2 x double> %v to <2 x float>
-; CHECK: fcvtn
-  ret <2 x float> %vcvt1.i
-; CHECK: ret
-}
-
-define <4 x float> @test_vcvt_high_f32_f64(<2 x float> %x, <2 x double> %v) nounwind readnone ssp {
-; CHECK-LABEL: test_vcvt_high_f32_f64:
-
-  %cvt = fptrunc <2 x double> %v to <2 x float>
-  %vcvt2.i = shufflevector <2 x float> %x, <2 x float> %cvt, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK: fcvtn2
-  ret <4 x float> %vcvt2.i
-; CHECK: ret
-}
-
-define <2 x float> @test_vcvtx_f32_f64(<2 x double> %v) nounwind readnone ssp {
-; CHECK-LABEL: test_vcvtx_f32_f64:
-  %vcvtx1.i = tail call <2 x float> @llvm.arm64.neon.fcvtxn.v2f32.v2f64(<2 x double> %v) nounwind
-; CHECK: fcvtxn
-  ret <2 x float> %vcvtx1.i
-; CHECK: ret
-}
-
-define <4 x float> @test_vcvtx_high_f32_f64(<2 x float> %x, <2 x double> %v) nounwind readnone ssp {
-; CHECK-LABEL: test_vcvtx_high_f32_f64:
-  %vcvtx2.i = tail call <2 x float> @llvm.arm64.neon.fcvtxn.v2f32.v2f64(<2 x double> %v) nounwind
-  %res = shufflevector <2 x float> %x, <2 x float> %vcvtx2.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK: fcvtxn2
-  ret <4 x float> %res
-; CHECK: ret
-}
-
-
-declare <2 x double> @llvm.arm64.neon.vcvthighfp2df(<4 x float>) nounwind readnone
-declare <2 x double> @llvm.arm64.neon.vcvtfp2df(<2 x float>) nounwind readnone
-
-declare <2 x float> @llvm.arm64.neon.vcvtdf2fp(<2 x double>) nounwind readnone
-declare <4 x float> @llvm.arm64.neon.vcvthighdf2fp(<2 x float>, <2 x double>) nounwind readnone
-
-declare <2 x float> @llvm.arm64.neon.fcvtxn.v2f32.v2f64(<2 x double>) nounwind readnone
-
-define i16 @to_half(float %in) {
-; CHECK-LABEL: to_half:
-; CHECK: fcvt h[[HALFVAL:[0-9]+]], s0
-; CHECK: fmov {{w[0-9]+}}, {{s[0-9]+}}
-  %res = call i16 @llvm.convert.to.fp16(float %in)
-  ret i16 %res
-}
-
-define float @from_half(i16 %in) {
-; CHECK-LABEL: from_half:
-; CHECK: fmov s[[HALFVAL:[0-9]+]], {{w[0-9]+}}
-; CHECK: fcvt s0, h[[HALFVAL]]
-  %res = call float @llvm.convert.from.fp16(i16 %in)
-  ret float %res
-}
-
-declare float @llvm.convert.from.fp16(i16) #1
-declare i16 @llvm.convert.to.fp16(float) #1
diff --git a/test/CodeGen/ARM64/vcvt_f32_su32.ll b/test/CodeGen/ARM64/vcvt_f32_su32.ll
deleted file mode 100644
index 51e053d..0000000
--- a/test/CodeGen/ARM64/vcvt_f32_su32.ll
+++ /dev/null
@@ -1,73 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-define <2 x float> @ucvt(<2 x i32> %a) nounwind readnone ssp {
-; CHECK-LABEL: ucvt:
-; CHECK: ucvtf.2s  v0, v0
-; CHECK: ret
-
-  %vcvt.i = uitofp <2 x i32> %a to <2 x float>
-  ret <2 x float> %vcvt.i
-}
-
-define <2 x float> @scvt(<2 x i32> %a) nounwind readnone ssp {
-; CHECK-LABEL: scvt:
-; CHECK: scvtf.2s  v0, v0
-; CHECK: ret
-  %vcvt.i = sitofp <2 x i32> %a to <2 x float>
-  ret <2 x float> %vcvt.i
-}
-
-define <4 x float> @ucvtq(<4 x i32> %a) nounwind readnone ssp {
-; CHECK-LABEL: ucvtq:
-; CHECK: ucvtf.4s  v0, v0
-; CHECK: ret
-  %vcvt.i = uitofp <4 x i32> %a to <4 x float>
-  ret <4 x float> %vcvt.i
-}
-
-define <4 x float> @scvtq(<4 x i32> %a) nounwind readnone ssp {
-; CHECK-LABEL: scvtq:
-; CHECK: scvtf.4s  v0, v0
-; CHECK: ret
-  %vcvt.i = sitofp <4 x i32> %a to <4 x float>
-  ret <4 x float> %vcvt.i
-}
-
-define <4 x float> @cvtf16(<4 x i16> %a) nounwind readnone ssp {
-; CHECK-LABEL: cvtf16:
-; CHECK: fcvtl  v0.4s, v0.4h
-; CHECK-NEXT: ret
-  %vcvt1.i = tail call <4 x float> @llvm.arm64.neon.vcvthf2fp(<4 x i16> %a) nounwind
-  ret <4 x float> %vcvt1.i
-}
-
-define <4 x float> @cvtf16_high(<8 x i16> %a) nounwind readnone ssp {
-; CHECK-LABEL: cvtf16_high:
-; CHECK: fcvtl2  v0.4s, v0.8h
-; CHECK-NEXT: ret
-  %in = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %vcvt1.i = tail call <4 x float> @llvm.arm64.neon.vcvthf2fp(<4 x i16> %in) nounwind
-  ret <4 x float> %vcvt1.i
-}
-
-
-
-define <4 x i16> @cvtf16f32(<4 x float> %a) nounwind readnone ssp {
-; CHECK-LABEL: cvtf16f32:
-; CHECK: fcvtn  v0.4h, v0.4s
-; CHECK-NEXT: ret
-  %vcvt1.i = tail call <4 x i16> @llvm.arm64.neon.vcvtfp2hf(<4 x float> %a) nounwind
-  ret <4 x i16> %vcvt1.i
-}
-
-define <8 x i16> @cvtf16f32_high(<4 x i16> %low, <4 x float> %high_big) {
-; CHECK-LABEL: cvtf16f32_high:
-; CHECK: fcvtn2 v0.8h, v1.4s
-; CHECK-NEXT: ret
-  %high = call <4 x i16> @llvm.arm64.neon.vcvtfp2hf(<4 x float> %high_big)
-  %res = shufflevector <4 x i16> %low, <4 x i16> %high, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  ret <8 x i16> %res
-}
-
-declare <4 x float> @llvm.arm64.neon.vcvthf2fp(<4 x i16>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.vcvtfp2hf(<4 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM64/vcvt_n.ll b/test/CodeGen/ARM64/vcvt_n.ll
deleted file mode 100644
index 46de557..0000000
--- a/test/CodeGen/ARM64/vcvt_n.ll
+++ /dev/null
@@ -1,49 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-define <2 x float> @cvtf32fxpu(<2 x i32> %a) nounwind readnone ssp {
-; CHECK-LABEL: cvtf32fxpu:
-; CHECK: ucvtf.2s	v0, v0, #9
-; CHECK: ret
-  %vcvt_n1 = tail call <2 x float> @llvm.arm64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> %a, i32 9)
-  ret <2 x float> %vcvt_n1
-}
-
-define <2 x float> @cvtf32fxps(<2 x i32> %a) nounwind readnone ssp {
-; CHECK-LABEL: cvtf32fxps:
-; CHECK: scvtf.2s	v0, v0, #12
-; CHECK: ret
-  %vcvt_n1 = tail call <2 x float> @llvm.arm64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> %a, i32 12)
-  ret <2 x float> %vcvt_n1
-}
-
-define <4 x float> @cvtqf32fxpu(<4 x i32> %a) nounwind readnone ssp {
-; CHECK-LABEL: cvtqf32fxpu:
-; CHECK: ucvtf.4s	v0, v0, #18
-; CHECK: ret
-  %vcvt_n1 = tail call <4 x float> @llvm.arm64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> %a, i32 18)
-  ret <4 x float> %vcvt_n1
-}
-
-define <4 x float> @cvtqf32fxps(<4 x i32> %a) nounwind readnone ssp {
-; CHECK-LABEL: cvtqf32fxps:
-; CHECK: scvtf.4s	v0, v0, #30
-; CHECK: ret
-  %vcvt_n1 = tail call <4 x float> @llvm.arm64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> %a, i32 30)
-  ret <4 x float> %vcvt_n1
-}
-define <2 x double> @f1(<2 x i64> %a) nounwind readnone ssp {
-  %vcvt_n1 = tail call <2 x double> @llvm.arm64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64> %a, i32 12)
-  ret <2 x double> %vcvt_n1
-}
-
-define <2 x double> @f2(<2 x i64> %a) nounwind readnone ssp {
-  %vcvt_n1 = tail call <2 x double> @llvm.arm64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64> %a, i32 9)
-  ret <2 x double> %vcvt_n1
-}
-
-declare <4 x float> @llvm.arm64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone
-declare <4 x float> @llvm.arm64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone
-declare <2 x float> @llvm.arm64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone
-declare <2 x float> @llvm.arm64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone
-declare <2 x double> @llvm.arm64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64>, i32) nounwind readnone
-declare <2 x double> @llvm.arm64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64>, i32) nounwind readnone
diff --git a/test/CodeGen/ARM64/vcvt_su32_f32.ll b/test/CodeGen/ARM64/vcvt_su32_f32.ll
deleted file mode 100644
index 8c82fa0..0000000
--- a/test/CodeGen/ARM64/vcvt_su32_f32.ll
+++ /dev/null
@@ -1,34 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-define <2 x i32> @c1(<2 x float> %a) nounwind readnone ssp {
-; CHECK: c1
-; CHECK: fcvtzs.2s	v0, v0
-; CHECK: ret
-  %vcvt.i = fptosi <2 x float> %a to <2 x i32>
-  ret <2 x i32> %vcvt.i
-}
-
-define <2 x i32> @c2(<2 x float> %a) nounwind readnone ssp {
-; CHECK: c2
-; CHECK: fcvtzu.2s	v0, v0
-; CHECK: ret
-  %vcvt.i = fptoui <2 x float> %a to <2 x i32>
-  ret <2 x i32> %vcvt.i
-}
-
-define <4 x i32> @c3(<4 x float> %a) nounwind readnone ssp {
-; CHECK: c3
-; CHECK: fcvtzs.4s	v0, v0
-; CHECK: ret
-  %vcvt.i = fptosi <4 x float> %a to <4 x i32>
-  ret <4 x i32> %vcvt.i
-}
-
-define <4 x i32> @c4(<4 x float> %a) nounwind readnone ssp {
-; CHECK: c4
-; CHECK: fcvtzu.4s	v0, v0
-; CHECK: ret
-  %vcvt.i = fptoui <4 x float> %a to <4 x i32>
-  ret <4 x i32> %vcvt.i
-}
-
diff --git a/test/CodeGen/ARM64/vcvtxd_f32_f64.ll b/test/CodeGen/ARM64/vcvtxd_f32_f64.ll
deleted file mode 100644
index bbe8f0b..0000000
--- a/test/CodeGen/ARM64/vcvtxd_f32_f64.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: llc < %s -march=arm64 | FileCheck %s
-
-define float @fcvtxn(double %a) {
-; CHECK-LABEL: fcvtxn:
-; CHECK: fcvtxn s0, d0
-; CHECK-NEXT: ret
-  %vcvtxd.i = tail call float @llvm.arm64.sisd.fcvtxn(double %a) nounwind
-  ret float %vcvtxd.i
-}
-
-declare float @llvm.arm64.sisd.fcvtxn(double) nounwind readnone
diff --git a/test/CodeGen/ARM64/vecCmpBr.ll b/test/CodeGen/ARM64/vecCmpBr.ll
deleted file mode 100644
index e23ef25..0000000
--- a/test/CodeGen/ARM64/vecCmpBr.ll
+++ /dev/null
@@ -1,207 +0,0 @@
-; RUN: llc -march=arm64 -arm64-neon-syntax=apple < %s | FileCheck %s
-; ModuleID = 'arm64_vecCmpBr.c'
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128"
-target triple = "arm64-apple-ios3.0.0"
-
-
-define i32 @anyZero64(<4 x i16> %a) #0 {
-; CHECK: _anyZero64:
-; CHECK: uminv.8b b[[REGNO1:[0-9]+]], v0
-; CHECK-NEXT: fmov w[[REGNO2:[0-9]+]], s[[REGNO1]]
-; CHECK-NEXT: cbz w[[REGNO2]], [[LABEL:[A-Z_0-9]+]]
-; CHECK: [[LABEL]]:
-; CHECK-NEXT: b _bar
-entry:
-  %0 = bitcast <4 x i16> %a to <8 x i8>
-  %vminv.i = tail call i32 @llvm.arm64.neon.uminv.i32.v8i8(<8 x i8> %0) #3
-  %1 = trunc i32 %vminv.i to i8
-  %tobool = icmp eq i8 %1, 0
-  br i1 %tobool, label %if.then, label %return
-
-if.then:                                          ; preds = %entry
-  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() #4
-  br label %return
-
-return:                                           ; preds = %entry, %if.then
-  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
-  ret i32 %retval.0
-}
-
-declare i32 @bar(...) #1
-
-define i32 @anyZero128(<8 x i16> %a) #0 {
-; CHECK: _anyZero128:
-; CHECK: uminv.16b b[[REGNO1:[0-9]+]], v0
-; CHECK-NEXT: fmov w[[REGNO2:[0-9]+]], s[[REGNO1]]
-; CHECK-NEXT: cbz w[[REGNO2]], [[LABEL:[A-Z_0-9]+]]
-; CHECK: [[LABEL]]:
-; CHECK-NEXT: b _bar
-
-entry:
-  %0 = bitcast <8 x i16> %a to <16 x i8>
-  %vminv.i = tail call i32 @llvm.arm64.neon.uminv.i32.v16i8(<16 x i8> %0) #3
-  %1 = trunc i32 %vminv.i to i8
-  %tobool = icmp eq i8 %1, 0
-  br i1 %tobool, label %if.then, label %return
-
-if.then:                                          ; preds = %entry
-  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() #4
-  br label %return
-
-return:                                           ; preds = %entry, %if.then
-  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
-  ret i32 %retval.0
-}
-
-define i32 @anyNonZero64(<4 x i16> %a) #0 {
-; CHECK: _anyNonZero64:
-; CHECK: umaxv.8b b[[REGNO1:[0-9]+]], v0
-; CHECK-NEXT: fmov w[[REGNO2:[0-9]+]], s[[REGNO1]]
-; CHECK-NEXT: cbz w[[REGNO2]], [[LABEL:[A-Z_0-9]+]]
-; CHECK: [[LABEL]]:
-; CHECK-NEXT: movz w0, #0
-
-entry:
-  %0 = bitcast <4 x i16> %a to <8 x i8>
-  %vmaxv.i = tail call i32 @llvm.arm64.neon.umaxv.i32.v8i8(<8 x i8> %0) #3
-  %1 = trunc i32 %vmaxv.i to i8
-  %tobool = icmp eq i8 %1, 0
-  br i1 %tobool, label %return, label %if.then
-
-if.then:                                          ; preds = %entry
-  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() #4
-  br label %return
-
-return:                                           ; preds = %entry, %if.then
-  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
-  ret i32 %retval.0
-}
-
-define i32 @anyNonZero128(<8 x i16> %a) #0 {
-; CHECK: _anyNonZero128:
-; CHECK: umaxv.16b b[[REGNO1:[0-9]+]], v0
-; CHECK-NEXT: fmov w[[REGNO2:[0-9]+]], s[[REGNO1]]
-; CHECK-NEXT: cbz w[[REGNO2]], [[LABEL:[A-Z_0-9]+]]
-; CHECK: [[LABEL]]:
-; CHECK-NEXT: movz w0, #0
-entry:
-  %0 = bitcast <8 x i16> %a to <16 x i8>
-  %vmaxv.i = tail call i32 @llvm.arm64.neon.umaxv.i32.v16i8(<16 x i8> %0) #3
-  %1 = trunc i32 %vmaxv.i to i8
-  %tobool = icmp eq i8 %1, 0
-  br i1 %tobool, label %return, label %if.then
-
-if.then:                                          ; preds = %entry
-  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() #4
-  br label %return
-
-return:                                           ; preds = %entry, %if.then
-  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
-  ret i32 %retval.0
-}
-
-define i32 @allZero64(<4 x i16> %a) #0 {
-; CHECK: _allZero64:
-; CHECK: umaxv.8b b[[REGNO1:[0-9]+]], v0
-; CHECK-NEXT: fmov w[[REGNO2:[0-9]+]], s[[REGNO1]]
-; CHECK-NEXT: cbz w[[REGNO2]], [[LABEL:[A-Z_0-9]+]]
-; CHECK: [[LABEL]]:
-; CHECK-NEXT: b _bar
-entry:
-  %0 = bitcast <4 x i16> %a to <8 x i8>
-  %vmaxv.i = tail call i32 @llvm.arm64.neon.umaxv.i32.v8i8(<8 x i8> %0) #3
-  %1 = trunc i32 %vmaxv.i to i8
-  %tobool = icmp eq i8 %1, 0
-  br i1 %tobool, label %if.then, label %return
-
-if.then:                                          ; preds = %entry
-  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() #4
-  br label %return
-
-return:                                           ; preds = %entry, %if.then
-  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
-  ret i32 %retval.0
-}
-
-define i32 @allZero128(<8 x i16> %a) #0 {
-; CHECK: _allZero128:
-; CHECK: umaxv.16b b[[REGNO1:[0-9]+]], v0
-; CHECK-NEXT: fmov w[[REGNO2:[0-9]+]], s[[REGNO1]]
-; CHECK-NEXT: cbz w[[REGNO2]], [[LABEL:[A-Z_0-9]+]]
-; CHECK: [[LABEL]]:
-; CHECK-NEXT: b _bar
-entry:
-  %0 = bitcast <8 x i16> %a to <16 x i8>
-  %vmaxv.i = tail call i32 @llvm.arm64.neon.umaxv.i32.v16i8(<16 x i8> %0) #3
-  %1 = trunc i32 %vmaxv.i to i8
-  %tobool = icmp eq i8 %1, 0
-  br i1 %tobool, label %if.then, label %return
-
-if.then:                                          ; preds = %entry
-  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() #4
-  br label %return
-
-return:                                           ; preds = %entry, %if.then
-  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
-  ret i32 %retval.0
-}
-
-define i32 @allNonZero64(<4 x i16> %a) #0 {
-; CHECK: _allNonZero64:
-; CHECK: uminv.8b b[[REGNO1:[0-9]+]], v0
-; CHECK-NEXT: fmov w[[REGNO2:[0-9]+]], s[[REGNO1]]
-; CHECK-NEXT: cbz w[[REGNO2]], [[LABEL:[A-Z_0-9]+]]
-; CHECK: [[LABEL]]:
-; CHECK-NEXT: movz w0, #0
-entry:
-  %0 = bitcast <4 x i16> %a to <8 x i8>
-  %vminv.i = tail call i32 @llvm.arm64.neon.uminv.i32.v8i8(<8 x i8> %0) #3
-  %1 = trunc i32 %vminv.i to i8
-  %tobool = icmp eq i8 %1, 0
-  br i1 %tobool, label %return, label %if.then
-
-if.then:                                          ; preds = %entry
-  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() #4
-  br label %return
-
-return:                                           ; preds = %entry, %if.then
-  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
-  ret i32 %retval.0
-}
-
-define i32 @allNonZero128(<8 x i16> %a) #0 {
-; CHECK: _allNonZero128:
-; CHECK: uminv.16b b[[REGNO1:[0-9]+]], v0
-; CHECK-NEXT: fmov w[[REGNO2:[0-9]+]], s[[REGNO1]]
-; CHECK-NEXT: cbz w[[REGNO2]], [[LABEL:[A-Z_0-9]+]]
-; CHECK: [[LABEL]]:
-; CHECK-NEXT: movz w0, #0
-entry:
-  %0 = bitcast <8 x i16> %a to <16 x i8>
-  %vminv.i = tail call i32 @llvm.arm64.neon.uminv.i32.v16i8(<16 x i8> %0) #3
-  %1 = trunc i32 %vminv.i to i8
-  %tobool = icmp eq i8 %1, 0
-  br i1 %tobool, label %return, label %if.then
-
-if.then:                                          ; preds = %entry
-  %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() #4
-  br label %return
-
-return:                                           ; preds = %entry, %if.then
-  %retval.0 = phi i32 [ %call1, %if.then ], [ 0, %entry ]
-  ret i32 %retval.0
-}
-
-declare i32 @llvm.arm64.neon.umaxv.i32.v16i8(<16 x i8>) #2
-
-declare i32 @llvm.arm64.neon.umaxv.i32.v8i8(<8 x i8>) #2
-
-declare i32 @llvm.arm64.neon.uminv.i32.v16i8(<16 x i8>) #2
-
-declare i32 @llvm.arm64.neon.uminv.i32.v8i8(<8 x i8>) #2
-
-attributes #0 = { nounwind ssp "target-cpu"="cyclone" }
-attributes #1 = { "target-cpu"="cyclone" }
-attributes #2 = { nounwind readnone }
-attributes #3 = { nounwind }
-attributes #4 = { nobuiltin nounwind }
diff --git a/test/CodeGen/ARM64/vecFold.ll b/test/CodeGen/ARM64/vecFold.ll
deleted file mode 100644
index 6888932..0000000
--- a/test/CodeGen/ARM64/vecFold.ll
+++ /dev/null
@@ -1,145 +0,0 @@
-; RUN: llc -march=arm64 -arm64-neon-syntax=apple -o - %s| FileCheck %s
-
-define <16 x i8> @foov16i8(<8 x i16> %a0, <8 x i16> %b0) nounwind readnone ssp {
-; CHECK-LABEL: foov16i8:
-  %vshrn_low_shift = lshr <8 x i16> %a0, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
-  %vshrn_low = trunc <8 x i16> %vshrn_low_shift to <8 x i8>
-  %vshrn_high_shift = lshr <8 x i16> %b0, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
-  %vshrn_high = trunc <8 x i16> %vshrn_high_shift to <8 x i8>
-; CHECK: shrn.8b v0, v0, #5
-; CHECK-NEXT: shrn2.16b v0, v1, #5
-; CHECK-NEXT: ret
-  %1 = bitcast <8 x i8> %vshrn_low to <1 x i64>
-  %2 = bitcast <8 x i8> %vshrn_high to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
-  %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
-  ret <16 x i8> %3
-}
-
-define <8 x i16> @foov8i16(<4 x i32> %a0, <4 x i32> %b0) nounwind readnone ssp {
-; CHECK-LABEL: foov8i16:
-  %vshrn_low_shift = lshr <4 x i32> %a0, <i32 5, i32 5, i32 5, i32 5>
-  %vshrn_low = trunc <4 x i32> %vshrn_low_shift to <4 x i16>
-  %vshrn_high_shift = lshr <4 x i32> %b0, <i32 5, i32 5, i32 5, i32 5>
-  %vshrn_high = trunc <4 x i32> %vshrn_high_shift to <4 x i16>
-; CHECK: shrn.4h v0, v0, #5
-; CHECK-NEXT: shrn2.8h v0, v1, #5
-; CHECK-NEXT: ret
-  %1 = bitcast <4 x i16> %vshrn_low to <1 x i64>
-  %2 = bitcast <4 x i16> %vshrn_high to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
-  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
-  ret <8 x i16> %3
-}
-
-define <4 x i32> @foov4i32(<2 x i64> %a0, <2 x i64> %b0) nounwind readnone ssp {
-; CHECK-LABEL: foov4i32:
-  %vshrn_low_shift = lshr <2 x i64> %a0, <i64 5, i64 5>
-  %vshrn_low = trunc <2 x i64> %vshrn_low_shift to <2 x i32>
-  %vshrn_high_shift = lshr <2 x i64> %b0, <i64 5, i64 5>
-  %vshrn_high = trunc <2 x i64> %vshrn_high_shift to <2 x i32>
-; CHECK: shrn.2s v0, v0, #5
-; CHECK-NEXT: shrn2.4s v0, v1, #5
-; CHECK-NEXT: ret
-  %1 = bitcast <2 x i32> %vshrn_low to <1 x i64>
-  %2 = bitcast <2 x i32> %vshrn_high to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
-  %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
-  ret <4 x i32> %3
-}
-
-define <8 x i16> @bar(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %b0, <4 x i32> %b1) nounwind readnone ssp {
-; CHECK-LABEL: bar:
-  %vaddhn2.i = tail call <4 x i16> @llvm.arm64.neon.addhn.v4i16(<4 x i32> %a0, <4 x i32> %a1) nounwind
-  %vaddhn2.i10 = tail call <4 x i16> @llvm.arm64.neon.addhn.v4i16(<4 x i32> %b0, <4 x i32> %b1) nounwind
-; CHECK: addhn.4h	v0, v0, v1
-; CHECK-NEXT: addhn2.8h	v0, v2, v3
-; CHECK-NEXT: ret
-  %1 = bitcast <4 x i16> %vaddhn2.i to <1 x i64>
-  %2 = bitcast <4 x i16> %vaddhn2.i10 to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
-  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
-  ret <8 x i16> %3
-}
-
-define <8 x i16> @baz(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %b0, <4 x i32> %b1) nounwind readnone ssp {
-; CHECK-LABEL: baz:
-  %vaddhn2.i = tail call <4 x i16> @llvm.arm64.neon.addhn.v4i16(<4 x i32> %a0, <4 x i32> %a1) nounwind
-  %vshrn_high_shift = ashr <4 x i32> %b0, <i32 5, i32 5, i32 5, i32 5>
-  %vshrn_high = trunc <4 x i32> %vshrn_high_shift to <4 x i16>
-; CHECK: addhn.4h	v0, v0, v1
-; CHECK-NEXT: shrn2.8h	v0, v2, #5
-; CHECK-NEXT: ret
-  %1 = bitcast <4 x i16> %vaddhn2.i to <1 x i64>
-  %2 = bitcast <4 x i16> %vshrn_high to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
-  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
-  ret <8 x i16> %3
-}
-
-define <8 x i16> @raddhn(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %b0, <4 x i32> %b1) nounwind readnone ssp {
-; CHECK-LABEL: raddhn:
-entry:
-; CHECK: 	raddhn.4h	v0, v0, v1
-; CHECK-NEXT: 	raddhn2.8h	v0, v2, v3
-; CHECK-NEXT: 	ret
-  %vraddhn2.i = tail call <4 x i16> @llvm.arm64.neon.raddhn.v4i16(<4 x i32> %a0, <4 x i32> %a1) nounwind
-  %vraddhn2.i10 = tail call <4 x i16> @llvm.arm64.neon.raddhn.v4i16(<4 x i32> %b0, <4 x i32> %b1) nounwind
-  %0 = bitcast <4 x i16> %vraddhn2.i to <1 x i64>
-  %1 = bitcast <4 x i16> %vraddhn2.i10 to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1>
-  %2 = bitcast <2 x i64> %shuffle.i to <8 x i16>
-  ret <8 x i16> %2
-}
-
-define <8 x i16> @vrshrn(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %b0, <8 x i16> %b1) nounwind readnone ssp {
-; CHECK-LABEL: vrshrn:
-; CHECK: rshrn.8b	v0, v0, #5
-; CHECK-NEXT: rshrn2.16b	v0, v2, #6
-; CHECK-NEXT: ret
-  %vrshrn_n1 = tail call <8 x i8> @llvm.arm64.neon.rshrn.v8i8(<8 x i16> %a0, i32 5)
-  %vrshrn_n4 = tail call <8 x i8> @llvm.arm64.neon.rshrn.v8i8(<8 x i16> %b0, i32 6)
-  %1 = bitcast <8 x i8> %vrshrn_n1 to <1 x i64>
-  %2 = bitcast <8 x i8> %vrshrn_n4 to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
-  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
-  ret <8 x i16> %3
-}
-
-define <8 x i16> @vrsubhn(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %b0, <8 x i16> %b1) nounwind readnone ssp {
-; CHECK-LABEL: vrsubhn:
-; CHECK: rsubhn.8b	v0, v0, v1
-; CHECK: rsubhn2.16b	v0, v2, v3
-; CHECK-NEXT: 	ret
-  %vrsubhn2.i = tail call <8 x i8> @llvm.arm64.neon.rsubhn.v8i8(<8 x i16> %a0, <8 x i16> %a1) nounwind
-  %vrsubhn2.i10 = tail call <8 x i8> @llvm.arm64.neon.rsubhn.v8i8(<8 x i16> %b0, <8 x i16> %b1) nounwind
-  %1 = bitcast <8 x i8> %vrsubhn2.i to <1 x i64>
-  %2 = bitcast <8 x i8> %vrsubhn2.i10 to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
-  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
-  ret <8 x i16> %3
-}
-
-define <8 x i16> @noOpt1(<2 x i32> %a0, <2 x i32> %a1, <4 x i32> %b0, <4 x i32> %b1) nounwind readnone ssp {
-; CHECK-LABEL: noOpt1:
-  %vqsub2.i = tail call <2 x i32> @llvm.arm64.neon.sqsub.v2i32(<2 x i32> %a0, <2 x i32> %a1) nounwind
-  %vaddhn2.i = tail call <4 x i16> @llvm.arm64.neon.addhn.v4i16(<4 x i32> %b0, <4 x i32> %b1) nounwind
-; CHECK:	sqsub.2s	v0, v0, v1
-; CHECK-NEXT:	addhn2.8h	v0, v2, v3
-  %1 = bitcast <2 x i32> %vqsub2.i to <1 x i64>
-  %2 = bitcast <4 x i16> %vaddhn2.i to <1 x i64>
-  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
-  %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
-  ret <8 x i16> %3
-}
-
-declare <2 x i32> @llvm.arm64.neon.sqsub.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-
-declare <8 x i8> @llvm.arm64.neon.shrn.v8i8(<8 x i16>, i32) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.shrn.v4i16(<4 x i32>, i32) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.shrn.v2i32(<2 x i64>, i32) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.addhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.raddhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
-declare <8 x i8> @llvm.arm64.neon.rshrn.v8i8(<8 x i16>, i32) nounwind readnone
-declare <8 x i8> @llvm.arm64.neon.rsubhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
-
diff --git a/test/CodeGen/ARM64/vector-ext.ll b/test/CodeGen/ARM64/vector-ext.ll
deleted file mode 100644
index 88889fd..0000000
--- a/test/CodeGen/ARM64/vector-ext.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-;CHECK: @func30
-;CHECK: ushll.4s  v0, v0, #0
-;CHECK: movi.4s v1, #1
-;CHECK: and.16b v0, v0, v1
-;CHECK: str  q0, [x0]
-;CHECK: ret
-
-%T0_30 = type <4 x i1>
-%T1_30 = type <4 x i32>
-define void @func30(%T0_30 %v0, %T1_30* %p1) {
-  %r = zext %T0_30 %v0 to %T1_30
-  store %T1_30 %r, %T1_30* %p1
-  ret void
-}
diff --git a/test/CodeGen/ARM64/vector-imm.ll b/test/CodeGen/ARM64/vector-imm.ll
deleted file mode 100644
index f1fc3cc..0000000
--- a/test/CodeGen/ARM64/vector-imm.ll
+++ /dev/null
@@ -1,134 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-define <8 x i8> @v_orrimm(<8 x i8>* %A) nounwind {
-; CHECK-LABEL: v_orrimm:
-; CHECK-NOT: mov
-; CHECK-NOT: mvn
-; CHECK: orr
-	%tmp1 = load <8 x i8>* %A
-	%tmp3 = or <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1>
-	ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @v_orrimmQ(<16 x i8>* %A) nounwind {
-; CHECK: v_orrimmQ
-; CHECK-NOT: mov
-; CHECK-NOT: mvn
-; CHECK: orr
-	%tmp1 = load <16 x i8>* %A
-	%tmp3 = or <16 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1>
-	ret <16 x i8> %tmp3
-}
-
-define <8 x i8> @v_bicimm(<8 x i8>* %A) nounwind {
-; CHECK-LABEL: v_bicimm:
-; CHECK-NOT: mov
-; CHECK-NOT: mvn
-; CHECK: bic
-	%tmp1 = load <8 x i8>* %A
-	%tmp3 = and <8 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0 >
-	ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @v_bicimmQ(<16 x i8>* %A) nounwind {
-; CHECK-LABEL: v_bicimmQ:
-; CHECK-NOT: mov
-; CHECK-NOT: mvn
-; CHECK: bic
-	%tmp1 = load <16 x i8>* %A
-	%tmp3 = and <16 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0 >
-	ret <16 x i8> %tmp3
-}
-
-define <2 x double> @foo(<2 x double> %bar) nounwind {
-; CHECK: foo
-; CHECK: fmov.2d	v1, #1.000000e+00
-  %add = fadd <2 x double> %bar, <double 1.0, double 1.0>
-  ret <2 x double> %add
-}
-
-define <4 x i32> @movi_4s_imm_t1() nounwind readnone ssp {
-entry:
-; CHECK-LABEL: movi_4s_imm_t1:
-; CHECK: movi.4s v0, #75
-  ret <4 x i32> <i32 75, i32 75, i32 75, i32 75>
-}
-
-define <4 x i32> @movi_4s_imm_t2() nounwind readnone ssp {
-entry:
-; CHECK-LABEL: movi_4s_imm_t2:
-; CHECK: movi.4s v0, #75, lsl #8
-  ret <4 x i32> <i32 19200, i32 19200, i32 19200, i32 19200>
-}
-
-define <4 x i32> @movi_4s_imm_t3() nounwind readnone ssp {
-entry:
-; CHECK-LABEL: movi_4s_imm_t3:
-; CHECK: movi.4s v0, #75, lsl #16
-  ret <4 x i32> <i32 4915200, i32 4915200, i32 4915200, i32 4915200>
-}
-
-define <4 x i32> @movi_4s_imm_t4() nounwind readnone ssp {
-entry:
-; CHECK-LABEL: movi_4s_imm_t4:
-; CHECK: movi.4s v0, #75, lsl #24
-  ret <4 x i32> <i32 1258291200, i32 1258291200, i32 1258291200, i32 1258291200>
-}
-
-define <8 x i16> @movi_8h_imm_t5() nounwind readnone ssp {
-entry:
-; CHECK-LABEL: movi_8h_imm_t5:
-; CHECK: movi.8h v0, #75
-  ret <8 x i16> <i16 75, i16 75, i16 75, i16 75, i16 75, i16 75, i16 75, i16 75>
-}
-
-; rdar://11989841
-define <8 x i16> @movi_8h_imm_t6() nounwind readnone ssp {
-entry:
-; CHECK-LABEL: movi_8h_imm_t6:
-; CHECK: movi.8h v0, #75, lsl #8
-  ret <8 x i16> <i16 19200, i16 19200, i16 19200, i16 19200, i16 19200, i16 19200, i16 19200, i16 19200>
-}
-
-define <4 x i32> @movi_4s_imm_t7() nounwind readnone ssp {
-entry:
-; CHECK-LABEL: movi_4s_imm_t7:
-; CHECK: movi.4s v0, #75, msl #8
-ret <4 x i32> <i32 19455, i32 19455, i32 19455, i32 19455>
-}
-
-define <4 x i32> @movi_4s_imm_t8() nounwind readnone ssp {
-entry:
-; CHECK-LABEL: movi_4s_imm_t8:
-; CHECK: movi.4s v0, #75, msl #16
-ret <4 x i32> <i32 4980735, i32 4980735, i32 4980735, i32 4980735>
-}
-
-define <16 x i8> @movi_16b_imm_t9() nounwind readnone ssp {
-entry:
-; CHECK-LABEL: movi_16b_imm_t9:
-; CHECK: movi.16b v0, #75
-ret <16 x i8> <i8 75, i8 75, i8 75, i8 75, i8 75, i8 75, i8 75, i8 75,
-               i8 75, i8 75, i8 75, i8 75, i8 75, i8 75, i8 75, i8 75>
-}
-
-define <2 x i64> @movi_2d_imm_t10() nounwind readnone ssp {
-entry:
-; CHECK-LABEL: movi_2d_imm_t10:
-; CHECK: movi.2d v0, #0xff00ff00ff00ff
-ret <2 x i64> <i64 71777214294589695, i64 71777214294589695>
-}
-
-define <4 x i32> @movi_4s_imm_t11() nounwind readnone ssp {
-entry:
-; CHECK-LABEL: movi_4s_imm_t11:
-; CHECK: fmov.4s v0, #-3.281250e-01
-ret <4 x i32> <i32 3198681088, i32 3198681088, i32 3198681088, i32 3198681088>
-}
-
-define <2 x i64> @movi_2d_imm_t12() nounwind readnone ssp {
-entry:
-; CHECK-LABEL: movi_2d_imm_t12:
-; CHECK: fmov.2d v0, #-1.718750e-01
-ret <2 x i64> <i64 13818732506632945664, i64 13818732506632945664>
-}
diff --git a/test/CodeGen/ARM64/vector-ldst.ll b/test/CodeGen/ARM64/vector-ldst.ll
deleted file mode 100644
index 154160e..0000000
--- a/test/CodeGen/ARM64/vector-ldst.ll
+++ /dev/null
@@ -1,601 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -verify-machineinstrs | FileCheck %s
-
-; rdar://9428579
-
-%type1 = type { <16 x i8> }
-%type2 = type { <8 x i8> }
-%type3 = type { <4 x i16> }
-
-
-define hidden fastcc void @t1(%type1** %argtable) nounwind {
-entry:
-; CHECK-LABEL: t1:
-; CHECK: ldr x[[REG:[0-9]+]], [x0]
-; CHECK: str q0, [x[[REG]]]
-  %tmp1 = load %type1** %argtable, align 8
-  %tmp2 = getelementptr inbounds %type1* %tmp1, i64 0, i32 0
-  store <16 x i8> zeroinitializer, <16 x i8>* %tmp2, align 16
-  ret void
-}
-
-define hidden fastcc void @t2(%type2** %argtable) nounwind {
-entry:
-; CHECK-LABEL: t2:
-; CHECK: ldr x[[REG:[0-9]+]], [x0]
-; CHECK: str d0, [x[[REG]]]
-  %tmp1 = load %type2** %argtable, align 8
-  %tmp2 = getelementptr inbounds %type2* %tmp1, i64 0, i32 0
-  store <8 x i8> zeroinitializer, <8 x i8>* %tmp2, align 8
-  ret void
-}
-
-; add a bunch of tests for rdar://11246289
-
-@globalArray64x2 = common global <2 x i64>* null, align 8
-@globalArray32x4 = common global <4 x i32>* null, align 8
-@globalArray16x8 = common global <8 x i16>* null, align 8
-@globalArray8x16 = common global <16 x i8>* null, align 8
-@globalArray64x1 = common global <1 x i64>* null, align 8
-@globalArray32x2 = common global <2 x i32>* null, align 8
-@globalArray16x4 = common global <4 x i16>* null, align 8
-@globalArray8x8 = common global <8 x i8>* null, align 8
-@floatglobalArray64x2 = common global <2 x double>* null, align 8
-@floatglobalArray32x4 = common global <4 x float>* null, align 8
-@floatglobalArray64x1 = common global <1 x double>* null, align 8
-@floatglobalArray32x2 = common global <2 x float>* null, align 8
-
-define void @fct1_64x2(<2 x i64>* nocapture %array, i64 %offset) nounwind ssp {
-entry:
-; CHECK-LABEL: fct1_64x2:
-; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4
-; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]
-; CHECK: ldr [[BASE:x[0-9]+]],
-; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
-  %arrayidx = getelementptr inbounds <2 x i64>* %array, i64 %offset
-  %tmp = load <2 x i64>* %arrayidx, align 16
-  %tmp1 = load <2 x i64>** @globalArray64x2, align 8
-  %arrayidx1 = getelementptr inbounds <2 x i64>* %tmp1, i64 %offset
-  store <2 x i64> %tmp, <2 x i64>* %arrayidx1, align 16
-  ret void
-}
-
-define void @fct2_64x2(<2 x i64>* nocapture %array) nounwind ssp {
-entry:
-; CHECK-LABEL: fct2_64x2:
-; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
-; CHECK: ldr [[BASE:x[0-9]+]],
-; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
-  %arrayidx = getelementptr inbounds <2 x i64>* %array, i64 3
-  %tmp = load <2 x i64>* %arrayidx, align 16
-  %tmp1 = load <2 x i64>** @globalArray64x2, align 8
-  %arrayidx1 = getelementptr inbounds <2 x i64>* %tmp1, i64 5
-  store <2 x i64> %tmp, <2 x i64>* %arrayidx1, align 16
-  ret void
-}
-
-define void @fct1_32x4(<4 x i32>* nocapture %array, i64 %offset) nounwind ssp {
-entry:
-; CHECK-LABEL: fct1_32x4:
-; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4
-; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
-; CHECK: ldr [[BASE:x[0-9]+]],
-; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
-  %arrayidx = getelementptr inbounds <4 x i32>* %array, i64 %offset
-  %tmp = load <4 x i32>* %arrayidx, align 16
-  %tmp1 = load <4 x i32>** @globalArray32x4, align 8
-  %arrayidx1 = getelementptr inbounds <4 x i32>* %tmp1, i64 %offset
-  store <4 x i32> %tmp, <4 x i32>* %arrayidx1, align 16
-  ret void
-}
-
-define void @fct2_32x4(<4 x i32>* nocapture %array) nounwind ssp {
-entry:
-; CHECK-LABEL: fct2_32x4:
-; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
-; CHECK: ldr [[BASE:x[0-9]+]],
-; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
-  %arrayidx = getelementptr inbounds <4 x i32>* %array, i64 3
-  %tmp = load <4 x i32>* %arrayidx, align 16
-  %tmp1 = load <4 x i32>** @globalArray32x4, align 8
-  %arrayidx1 = getelementptr inbounds <4 x i32>* %tmp1, i64 5
-  store <4 x i32> %tmp, <4 x i32>* %arrayidx1, align 16
-  ret void
-}
-
-define void @fct1_16x8(<8 x i16>* nocapture %array, i64 %offset) nounwind ssp {
-entry:
-; CHECK-LABEL: fct1_16x8:
-; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4
-; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
-; CHECK: ldr [[BASE:x[0-9]+]],
-; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
-  %arrayidx = getelementptr inbounds <8 x i16>* %array, i64 %offset
-  %tmp = load <8 x i16>* %arrayidx, align 16
-  %tmp1 = load <8 x i16>** @globalArray16x8, align 8
-  %arrayidx1 = getelementptr inbounds <8 x i16>* %tmp1, i64 %offset
-  store <8 x i16> %tmp, <8 x i16>* %arrayidx1, align 16
-  ret void
-}
-
-define void @fct2_16x8(<8 x i16>* nocapture %array) nounwind ssp {
-entry:
-; CHECK-LABEL: fct2_16x8:
-; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
-; CHECK: ldr [[BASE:x[0-9]+]],
-; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
-  %arrayidx = getelementptr inbounds <8 x i16>* %array, i64 3
-  %tmp = load <8 x i16>* %arrayidx, align 16
-  %tmp1 = load <8 x i16>** @globalArray16x8, align 8
-  %arrayidx1 = getelementptr inbounds <8 x i16>* %tmp1, i64 5
-  store <8 x i16> %tmp, <8 x i16>* %arrayidx1, align 16
-  ret void
-}
-
-define void @fct1_8x16(<16 x i8>* nocapture %array, i64 %offset) nounwind ssp {
-entry:
-; CHECK-LABEL: fct1_8x16:
-; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4
-; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
-; CHECK: ldr [[BASE:x[0-9]+]],
-; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
-  %arrayidx = getelementptr inbounds <16 x i8>* %array, i64 %offset
-  %tmp = load <16 x i8>* %arrayidx, align 16
-  %tmp1 = load <16 x i8>** @globalArray8x16, align 8
-  %arrayidx1 = getelementptr inbounds <16 x i8>* %tmp1, i64 %offset
-  store <16 x i8> %tmp, <16 x i8>* %arrayidx1, align 16
-  ret void
-}
-
-define void @fct2_8x16(<16 x i8>* nocapture %array) nounwind ssp {
-entry:
-; CHECK-LABEL: fct2_8x16:
-; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
-; CHECK: ldr [[BASE:x[0-9]+]],
-; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
-  %arrayidx = getelementptr inbounds <16 x i8>* %array, i64 3
-  %tmp = load <16 x i8>* %arrayidx, align 16
-  %tmp1 = load <16 x i8>** @globalArray8x16, align 8
-  %arrayidx1 = getelementptr inbounds <16 x i8>* %tmp1, i64 5
-  store <16 x i8> %tmp, <16 x i8>* %arrayidx1, align 16
-  ret void
-}
-
-define void @fct1_64x1(<1 x i64>* nocapture %array, i64 %offset) nounwind ssp {
-entry:
-; CHECK-LABEL: fct1_64x1:
-; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3
-; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
-; CHECK: ldr [[BASE:x[0-9]+]],
-; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
-  %arrayidx = getelementptr inbounds <1 x i64>* %array, i64 %offset
-  %tmp = load <1 x i64>* %arrayidx, align 8
-  %tmp1 = load <1 x i64>** @globalArray64x1, align 8
-  %arrayidx1 = getelementptr inbounds <1 x i64>* %tmp1, i64 %offset
-  store <1 x i64> %tmp, <1 x i64>* %arrayidx1, align 8
-  ret void
-}
-
-define void @fct2_64x1(<1 x i64>* nocapture %array) nounwind ssp {
-entry:
-; CHECK-LABEL: fct2_64x1:
-; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24]
-; CHECK: ldr [[BASE:x[0-9]+]],
-; CHECK: str [[DEST]], {{\[}}[[BASE]], #40]
-  %arrayidx = getelementptr inbounds <1 x i64>* %array, i64 3
-  %tmp = load <1 x i64>* %arrayidx, align 8
-  %tmp1 = load <1 x i64>** @globalArray64x1, align 8
-  %arrayidx1 = getelementptr inbounds <1 x i64>* %tmp1, i64 5
-  store <1 x i64> %tmp, <1 x i64>* %arrayidx1, align 8
-  ret void
-}
-
-define void @fct1_32x2(<2 x i32>* nocapture %array, i64 %offset) nounwind ssp {
-entry:
-; CHECK-LABEL: fct1_32x2:
-; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3
-; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
-; CHECK: ldr [[BASE:x[0-9]+]],
-; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
-  %arrayidx = getelementptr inbounds <2 x i32>* %array, i64 %offset
-  %tmp = load <2 x i32>* %arrayidx, align 8
-  %tmp1 = load <2 x i32>** @globalArray32x2, align 8
-  %arrayidx1 = getelementptr inbounds <2 x i32>* %tmp1, i64 %offset
-  store <2 x i32> %tmp, <2 x i32>* %arrayidx1, align 8
-  ret void
-}
-
-define void @fct2_32x2(<2 x i32>* nocapture %array) nounwind ssp {
-entry:
-; CHECK-LABEL: fct2_32x2:
-; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24]
-; CHECK: ldr [[BASE:x[0-9]+]],
-; CHECK: str [[DEST]], {{\[}}[[BASE]], #40]
-  %arrayidx = getelementptr inbounds <2 x i32>* %array, i64 3
-  %tmp = load <2 x i32>* %arrayidx, align 8
-  %tmp1 = load <2 x i32>** @globalArray32x2, align 8
-  %arrayidx1 = getelementptr inbounds <2 x i32>* %tmp1, i64 5
-  store <2 x i32> %tmp, <2 x i32>* %arrayidx1, align 8
-  ret void
-}
-
-define void @fct1_16x4(<4 x i16>* nocapture %array, i64 %offset) nounwind ssp {
-entry:
-; CHECK-LABEL: fct1_16x4:
-; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3
-; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
-; CHECK: ldr [[BASE:x[0-9]+]],
-; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
-  %arrayidx = getelementptr inbounds <4 x i16>* %array, i64 %offset
-  %tmp = load <4 x i16>* %arrayidx, align 8
-  %tmp1 = load <4 x i16>** @globalArray16x4, align 8
-  %arrayidx1 = getelementptr inbounds <4 x i16>* %tmp1, i64 %offset
-  store <4 x i16> %tmp, <4 x i16>* %arrayidx1, align 8
-  ret void
-}
-
-define void @fct2_16x4(<4 x i16>* nocapture %array) nounwind ssp {
-entry:
-; CHECK-LABEL: fct2_16x4:
-; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24]
-; CHECK: ldr [[BASE:x[0-9]+]],
-; CHECK: str [[DEST]], {{\[}}[[BASE]], #40]
-  %arrayidx = getelementptr inbounds <4 x i16>* %array, i64 3
-  %tmp = load <4 x i16>* %arrayidx, align 8
-  %tmp1 = load <4 x i16>** @globalArray16x4, align 8
-  %arrayidx1 = getelementptr inbounds <4 x i16>* %tmp1, i64 5
-  store <4 x i16> %tmp, <4 x i16>* %arrayidx1, align 8
-  ret void
-}
-
-define void @fct1_8x8(<8 x i8>* nocapture %array, i64 %offset) nounwind ssp {
-entry:
-; CHECK-LABEL: fct1_8x8:
-; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3
-; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
-; CHECK: ldr [[BASE:x[0-9]+]],
-; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
-  %arrayidx = getelementptr inbounds <8 x i8>* %array, i64 %offset
-  %tmp = load <8 x i8>* %arrayidx, align 8
-  %tmp1 = load <8 x i8>** @globalArray8x8, align 8
-  %arrayidx1 = getelementptr inbounds <8 x i8>* %tmp1, i64 %offset
-  store <8 x i8> %tmp, <8 x i8>* %arrayidx1, align 8
-  ret void
-}
-
-; Add a bunch of tests for rdar://13258794: Match LDUR/STUR for D and Q
-; registers for unscaled vector accesses
-@str = global [63 x i8] c"Test case for rdar://13258794: LDUR/STUR for D and Q registers\00", align 1
-
-define <1 x i64> @fct0() nounwind readonly ssp {
-entry:
-; CHECK-LABEL: fct0:
-; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
-  %0 = load <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <1 x i64>*), align 8
-  ret <1 x i64> %0
-}
-
-define <2 x i32> @fct1() nounwind readonly ssp {
-entry:
-; CHECK-LABEL: fct1:
-; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
-  %0 = load <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <2 x i32>*), align 8
-  ret <2 x i32> %0
-}
-
-define <4 x i16> @fct2() nounwind readonly ssp {
-entry:
-; CHECK-LABEL: fct2:
-; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
-  %0 = load <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <4 x i16>*), align 8
-  ret <4 x i16> %0
-}
-
-define <8 x i8> @fct3() nounwind readonly ssp {
-entry:
-; CHECK-LABEL: fct3:
-; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
-  %0 = load <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <8 x i8>*), align 8
-  ret <8 x i8> %0
-}
-
-define <2 x i64> @fct4() nounwind readonly ssp {
-entry:
-; CHECK-LABEL: fct4:
-; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
-  %0 = load <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <2 x i64>*), align 16
-  ret <2 x i64> %0
-}
-
-define <4 x i32> @fct5() nounwind readonly ssp {
-entry:
-; CHECK-LABEL: fct5:
-; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
-  %0 = load <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <4 x i32>*), align 16
-  ret <4 x i32> %0
-}
-
-define <8 x i16> @fct6() nounwind readonly ssp {
-entry:
-; CHECK-LABEL: fct6:
-; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
-  %0 = load <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <8 x i16>*), align 16
-  ret <8 x i16> %0
-}
-
-define <16 x i8> @fct7() nounwind readonly ssp {
-entry:
-; CHECK-LABEL: fct7:
-; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
-  %0 = load <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <16 x i8>*), align 16
-  ret <16 x i8> %0
-}
-
-define void @fct8() nounwind ssp {
-entry:
-; CHECK-LABEL: fct8:
-; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
-; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
-  %0 = load <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <1 x i64>*), align 8
-  store <1 x i64> %0, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <1 x i64>*), align 8
-  ret void
-}
-
-define void @fct9() nounwind ssp {
-entry:
-; CHECK-LABEL: fct9:
-; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
-; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
-  %0 = load <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <2 x i32>*), align 8
-  store <2 x i32> %0, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <2 x i32>*), align 8
-  ret void
-}
-
-define void @fct10() nounwind ssp {
-entry:
-; CHECK-LABEL: fct10:
-; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
-; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
-  %0 = load <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <4 x i16>*), align 8
-  store <4 x i16> %0, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <4 x i16>*), align 8
-  ret void
-}
-
-define void @fct11() nounwind ssp {
-entry:
-; CHECK-LABEL: fct11:
-; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
-; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
-  %0 = load <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <8 x i8>*), align 8
-  store <8 x i8> %0, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <8 x i8>*), align 8
-  ret void
-}
-
-define void @fct12() nounwind ssp {
-entry:
-; CHECK-LABEL: fct12:
-; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
-; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
-  %0 = load <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <2 x i64>*), align 16
-  store <2 x i64> %0, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <2 x i64>*), align 16
-  ret void
-}
-
-define void @fct13() nounwind ssp {
-entry:
-; CHECK-LABEL: fct13:
-; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
-; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
-  %0 = load <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <4 x i32>*), align 16
-  store <4 x i32> %0, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <4 x i32>*), align 16
-  ret void
-}
-
-define void @fct14() nounwind ssp {
-entry:
-; CHECK-LABEL: fct14:
-; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
-; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
-  %0 = load <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <8 x i16>*), align 16
-  store <8 x i16> %0, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <8 x i16>*), align 16
-  ret void
-}
-
-define void @fct15() nounwind ssp {
-entry:
-; CHECK-LABEL: fct15:
-; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
-; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
-  %0 = load <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <16 x i8>*), align 16
-  store <16 x i8> %0, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <16 x i8>*), align 16
-  ret void
-}
-
-; Check the building of vector from a single loaded value.
-; Part of <rdar://problem/14170854>
-;
-; Single loads with immediate offset.
-define <8 x i8> @fct16(i8* nocapture %sp0) {
-; CHECK-LABEL: fct16:
-; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
-; CHECK-NEXT: mul.8b v0, v[[REGNUM]], v[[REGNUM]]
-entry:
-  %addr = getelementptr i8* %sp0, i64 1
-  %pix_sp0.0.copyload = load i8* %addr, align 1
-  %vec = insertelement <8 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
-  %vmull.i = mul <8 x i8> %vec, %vec
-  ret <8 x i8> %vmull.i
-}
-
-define <16 x i8> @fct17(i8* nocapture %sp0) {
-; CHECK-LABEL: fct17:
-; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
-; CHECK-NEXT: mul.16b v0, v[[REGNUM]], v[[REGNUM]]
-entry:
-  %addr = getelementptr i8* %sp0, i64 1
-  %pix_sp0.0.copyload = load i8* %addr, align 1
-  %vec = insertelement <16 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
-  %vmull.i = mul <16 x i8> %vec, %vec
-  ret <16 x i8> %vmull.i
-}
-
-define <4 x i16> @fct18(i16* nocapture %sp0) {
-; CHECK-LABEL: fct18:
-; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
-; CHECK-NEXT: mul.4h v0, v[[REGNUM]], v[[REGNUM]]
-entry:
-  %addr = getelementptr i16* %sp0, i64 1
-  %pix_sp0.0.copyload = load i16* %addr, align 1
-  %vec = insertelement <4 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
-  %vmull.i = mul <4 x i16> %vec, %vec
-  ret <4 x i16> %vmull.i
-}
-
-define <8 x i16> @fct19(i16* nocapture %sp0) {
-; CHECK-LABEL: fct19:
-; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
-; CHECK-NEXT: mul.8h v0, v[[REGNUM]], v[[REGNUM]]
-entry:
-  %addr = getelementptr i16* %sp0, i64 1
-  %pix_sp0.0.copyload = load i16* %addr, align 1
-  %vec = insertelement <8 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
-  %vmull.i = mul <8 x i16> %vec, %vec
-  ret <8 x i16> %vmull.i
-}
-
-define <2 x i32> @fct20(i32* nocapture %sp0) {
-; CHECK-LABEL: fct20:
-; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
-; CHECK-NEXT: mul.2s v0, v[[REGNUM]], v[[REGNUM]]
-entry:
-  %addr = getelementptr i32* %sp0, i64 1
-  %pix_sp0.0.copyload = load i32* %addr, align 1
-  %vec = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
-  %vmull.i = mul <2 x i32> %vec, %vec
-  ret <2 x i32> %vmull.i
-}
-
-define <4 x i32> @fct21(i32* nocapture %sp0) {
-; CHECK-LABEL: fct21:
-; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
-; CHECK-NEXT: mul.4s v0, v[[REGNUM]], v[[REGNUM]]
-entry:
-  %addr = getelementptr i32* %sp0, i64 1
-  %pix_sp0.0.copyload = load i32* %addr, align 1
-  %vec = insertelement <4 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
-  %vmull.i = mul <4 x i32> %vec, %vec
-  ret <4 x i32> %vmull.i
-}
-
-define <1 x i64> @fct22(i64* nocapture %sp0) {
-; CHECK-LABEL: fct22:
-; CHECK: ldr d0, [x0, #8]
-entry:
-  %addr = getelementptr i64* %sp0, i64 1
-  %pix_sp0.0.copyload = load i64* %addr, align 1
-  %vec = insertelement <1 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
-   ret <1 x i64> %vec
-}
-
-define <2 x i64> @fct23(i64* nocapture %sp0) {
-; CHECK-LABEL: fct23:
-; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, #8]
-entry:
-  %addr = getelementptr i64* %sp0, i64 1
-  %pix_sp0.0.copyload = load i64* %addr, align 1
-  %vec = insertelement <2 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
-  ret <2 x i64> %vec
-}
-
-;
-; Single loads with register offset.
-define <8 x i8> @fct24(i8* nocapture %sp0, i64 %offset) {
-; CHECK-LABEL: fct24:
-; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
-; CHECK-NEXT: mul.8b v0, v[[REGNUM]], v[[REGNUM]]
-entry:
-  %addr = getelementptr i8* %sp0, i64 %offset
-  %pix_sp0.0.copyload = load i8* %addr, align 1
-  %vec = insertelement <8 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
-  %vmull.i = mul <8 x i8> %vec, %vec
-  ret <8 x i8> %vmull.i
-}
-
-define <16 x i8> @fct25(i8* nocapture %sp0, i64 %offset) {
-; CHECK-LABEL: fct25:
-; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
-; CHECK-NEXT: mul.16b v0, v[[REGNUM]], v[[REGNUM]]
-entry:
-  %addr = getelementptr i8* %sp0, i64 %offset
-  %pix_sp0.0.copyload = load i8* %addr, align 1
-  %vec = insertelement <16 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
-  %vmull.i = mul <16 x i8> %vec, %vec
-  ret <16 x i8> %vmull.i
-}
-
-define <4 x i16> @fct26(i16* nocapture %sp0, i64 %offset) {
-; CHECK-LABEL: fct26:
-; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
-; CHECK-NEXT: mul.4h v0, v[[REGNUM]], v[[REGNUM]]
-entry:
-  %addr = getelementptr i16* %sp0, i64 %offset
-  %pix_sp0.0.copyload = load i16* %addr, align 1
-  %vec = insertelement <4 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
-  %vmull.i = mul <4 x i16> %vec, %vec
-  ret <4 x i16> %vmull.i
-}
-
-define <8 x i16> @fct27(i16* nocapture %sp0, i64 %offset) {
-; CHECK-LABEL: fct27:
-; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
-; CHECK-NEXT: mul.8h v0, v[[REGNUM]], v[[REGNUM]]
-entry:
-  %addr = getelementptr i16* %sp0, i64 %offset
-  %pix_sp0.0.copyload = load i16* %addr, align 1
-  %vec = insertelement <8 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
-  %vmull.i = mul <8 x i16> %vec, %vec
-  ret <8 x i16> %vmull.i
-}
-
-define <2 x i32> @fct28(i32* nocapture %sp0, i64 %offset) {
-; CHECK-LABEL: fct28:
-; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
-; CHECK-NEXT: mul.2s v0, v[[REGNUM]], v[[REGNUM]]
-entry:
-  %addr = getelementptr i32* %sp0, i64 %offset
-  %pix_sp0.0.copyload = load i32* %addr, align 1
-  %vec = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
-  %vmull.i = mul <2 x i32> %vec, %vec
-  ret <2 x i32> %vmull.i
-}
-
-define <4 x i32> @fct29(i32* nocapture %sp0, i64 %offset) {
-; CHECK-LABEL: fct29:
-; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
-; CHECK-NEXT: mul.4s v0, v[[REGNUM]], v[[REGNUM]]
-entry:
-  %addr = getelementptr i32* %sp0, i64 %offset
-  %pix_sp0.0.copyload = load i32* %addr, align 1
-  %vec = insertelement <4 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
-  %vmull.i = mul <4 x i32> %vec, %vec
-  ret <4 x i32> %vmull.i
-}
-
-define <1 x i64> @fct30(i64* nocapture %sp0, i64 %offset) {
-; CHECK-LABEL: fct30:
-; CHECK: ldr d0, [x0, x1, lsl #3]
-entry:
-  %addr = getelementptr i64* %sp0, i64 %offset
-  %pix_sp0.0.copyload = load i64* %addr, align 1
-  %vec = insertelement <1 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
-   ret <1 x i64> %vec
-}
-
-define <2 x i64> @fct31(i64* nocapture %sp0, i64 %offset) {
-; CHECK-LABEL: fct31:
-; CHECK: ldr d0, [x0, x1, lsl #3]
-entry:
-  %addr = getelementptr i64* %sp0, i64 %offset
-  %pix_sp0.0.copyload = load i64* %addr, align 1
-  %vec = insertelement <2 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
-  ret <2 x i64> %vec
-}
diff --git a/test/CodeGen/ARM64/vext.ll b/test/CodeGen/ARM64/vext.ll
deleted file mode 100644
index c820439..0000000
--- a/test/CodeGen/ARM64/vext.ll
+++ /dev/null
@@ -1,464 +0,0 @@
-; RUN: llc -march=arm64 -arm64-neon-syntax=apple < %s | FileCheck %s
-
-define void @test_vext_s8() nounwind ssp {
-  ; CHECK-LABEL: test_vext_s8:
-  ; CHECK: {{ext.8.*#1}}
-  %xS8x8 = alloca <8 x i8>, align 8
-  %__a = alloca <8 x i8>, align 8
-  %__b = alloca <8 x i8>, align 8
-  %tmp = load <8 x i8>* %xS8x8, align 8
-  store <8 x i8> %tmp, <8 x i8>* %__a, align 8
-  %tmp1 = load <8 x i8>* %xS8x8, align 8
-  store <8 x i8> %tmp1, <8 x i8>* %__b, align 8
-  %tmp2 = load <8 x i8>* %__a, align 8
-  %tmp3 = load <8 x i8>* %__b, align 8
-  %vext = shufflevector <8 x i8> %tmp2, <8 x i8> %tmp3, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
-  store <8 x i8> %vext, <8 x i8>* %xS8x8, align 8
-  ret void
-}
-
-define void @test_vext_u8() nounwind ssp {
-  ; CHECK-LABEL: test_vext_u8:
-  ; CHECK: {{ext.8.*#2}}
-  %xU8x8 = alloca <8 x i8>, align 8
-  %__a = alloca <8 x i8>, align 8
-  %__b = alloca <8 x i8>, align 8
-  %tmp = load <8 x i8>* %xU8x8, align 8
-  store <8 x i8> %tmp, <8 x i8>* %__a, align 8
-  %tmp1 = load <8 x i8>* %xU8x8, align 8
-  store <8 x i8> %tmp1, <8 x i8>* %__b, align 8
-  %tmp2 = load <8 x i8>* %__a, align 8
-  %tmp3 = load <8 x i8>* %__b, align 8
-  %vext = shufflevector <8 x i8> %tmp2, <8 x i8> %tmp3, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
-  store <8 x i8> %vext, <8 x i8>* %xU8x8, align 8
-  ret void
-}
-
-define void @test_vext_p8() nounwind ssp {
-  ; CHECK-LABEL: test_vext_p8:
-  ; CHECK: {{ext.8.*#3}}
-  %xP8x8 = alloca <8 x i8>, align 8
-  %__a = alloca <8 x i8>, align 8
-  %__b = alloca <8 x i8>, align 8
-  %tmp = load <8 x i8>* %xP8x8, align 8
-  store <8 x i8> %tmp, <8 x i8>* %__a, align 8
-  %tmp1 = load <8 x i8>* %xP8x8, align 8
-  store <8 x i8> %tmp1, <8 x i8>* %__b, align 8
-  %tmp2 = load <8 x i8>* %__a, align 8
-  %tmp3 = load <8 x i8>* %__b, align 8
-  %vext = shufflevector <8 x i8> %tmp2, <8 x i8> %tmp3, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
-  store <8 x i8> %vext, <8 x i8>* %xP8x8, align 8
-  ret void
-}
-
-define void @test_vext_s16() nounwind ssp {
-  ; CHECK-LABEL: test_vext_s16:
-  ; CHECK: {{ext.8.*#2}}
-  %xS16x4 = alloca <4 x i16>, align 8
-  %__a = alloca <4 x i16>, align 8
-  %__b = alloca <4 x i16>, align 8
-  %tmp = load <4 x i16>* %xS16x4, align 8
-  store <4 x i16> %tmp, <4 x i16>* %__a, align 8
-  %tmp1 = load <4 x i16>* %xS16x4, align 8
-  store <4 x i16> %tmp1, <4 x i16>* %__b, align 8
-  %tmp2 = load <4 x i16>* %__a, align 8
-  %tmp3 = bitcast <4 x i16> %tmp2 to <8 x i8>
-  %tmp4 = load <4 x i16>* %__b, align 8
-  %tmp5 = bitcast <4 x i16> %tmp4 to <8 x i8>
-  %tmp6 = bitcast <8 x i8> %tmp3 to <4 x i16>
-  %tmp7 = bitcast <8 x i8> %tmp5 to <4 x i16>
-  %vext = shufflevector <4 x i16> %tmp6, <4 x i16> %tmp7, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
-  store <4 x i16> %vext, <4 x i16>* %xS16x4, align 8
-  ret void
-}
-
-define void @test_vext_u16() nounwind ssp {
-  ; CHECK-LABEL: test_vext_u16:
-  ; CHECK: {{ext.8.*#4}}
-  %xU16x4 = alloca <4 x i16>, align 8
-  %__a = alloca <4 x i16>, align 8
-  %__b = alloca <4 x i16>, align 8
-  %tmp = load <4 x i16>* %xU16x4, align 8
-  store <4 x i16> %tmp, <4 x i16>* %__a, align 8
-  %tmp1 = load <4 x i16>* %xU16x4, align 8
-  store <4 x i16> %tmp1, <4 x i16>* %__b, align 8
-  %tmp2 = load <4 x i16>* %__a, align 8
-  %tmp3 = bitcast <4 x i16> %tmp2 to <8 x i8>
-  %tmp4 = load <4 x i16>* %__b, align 8
-  %tmp5 = bitcast <4 x i16> %tmp4 to <8 x i8>
-  %tmp6 = bitcast <8 x i8> %tmp3 to <4 x i16>
-  %tmp7 = bitcast <8 x i8> %tmp5 to <4 x i16>
-  %vext = shufflevector <4 x i16> %tmp6, <4 x i16> %tmp7, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
-  store <4 x i16> %vext, <4 x i16>* %xU16x4, align 8
-  ret void
-}
-
-define void @test_vext_p16() nounwind ssp {
-  ; CHECK-LABEL: test_vext_p16:
-  ; CHECK: {{ext.8.*#6}}
-  %xP16x4 = alloca <4 x i16>, align 8
-  %__a = alloca <4 x i16>, align 8
-  %__b = alloca <4 x i16>, align 8
-  %tmp = load <4 x i16>* %xP16x4, align 8
-  store <4 x i16> %tmp, <4 x i16>* %__a, align 8
-  %tmp1 = load <4 x i16>* %xP16x4, align 8
-  store <4 x i16> %tmp1, <4 x i16>* %__b, align 8
-  %tmp2 = load <4 x i16>* %__a, align 8
-  %tmp3 = bitcast <4 x i16> %tmp2 to <8 x i8>
-  %tmp4 = load <4 x i16>* %__b, align 8
-  %tmp5 = bitcast <4 x i16> %tmp4 to <8 x i8>
-  %tmp6 = bitcast <8 x i8> %tmp3 to <4 x i16>
-  %tmp7 = bitcast <8 x i8> %tmp5 to <4 x i16>
-  %vext = shufflevector <4 x i16> %tmp6, <4 x i16> %tmp7, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-  store <4 x i16> %vext, <4 x i16>* %xP16x4, align 8
-  ret void
-}
-
-define void @test_vext_s32() nounwind ssp {
-  ; CHECK-LABEL: test_vext_s32:
-  ; CHECK: {{ext.8.*#4}}
-  %xS32x2 = alloca <2 x i32>, align 8
-  %__a = alloca <2 x i32>, align 8
-  %__b = alloca <2 x i32>, align 8
-  %tmp = load <2 x i32>* %xS32x2, align 8
-  store <2 x i32> %tmp, <2 x i32>* %__a, align 8
-  %tmp1 = load <2 x i32>* %xS32x2, align 8
-  store <2 x i32> %tmp1, <2 x i32>* %__b, align 8
-  %tmp2 = load <2 x i32>* %__a, align 8
-  %tmp3 = bitcast <2 x i32> %tmp2 to <8 x i8>
-  %tmp4 = load <2 x i32>* %__b, align 8
-  %tmp5 = bitcast <2 x i32> %tmp4 to <8 x i8>
-  %tmp6 = bitcast <8 x i8> %tmp3 to <2 x i32>
-  %tmp7 = bitcast <8 x i8> %tmp5 to <2 x i32>
-  %vext = shufflevector <2 x i32> %tmp6, <2 x i32> %tmp7, <2 x i32> <i32 1, i32 2>
-  store <2 x i32> %vext, <2 x i32>* %xS32x2, align 8
-  ret void
-}
-
-define void @test_vext_u32() nounwind ssp {
-  ; CHECK-LABEL: test_vext_u32:
-  ; CHECK: {{ext.8.*#4}}
-  %xU32x2 = alloca <2 x i32>, align 8
-  %__a = alloca <2 x i32>, align 8
-  %__b = alloca <2 x i32>, align 8
-  %tmp = load <2 x i32>* %xU32x2, align 8
-  store <2 x i32> %tmp, <2 x i32>* %__a, align 8
-  %tmp1 = load <2 x i32>* %xU32x2, align 8
-  store <2 x i32> %tmp1, <2 x i32>* %__b, align 8
-  %tmp2 = load <2 x i32>* %__a, align 8
-  %tmp3 = bitcast <2 x i32> %tmp2 to <8 x i8>
-  %tmp4 = load <2 x i32>* %__b, align 8
-  %tmp5 = bitcast <2 x i32> %tmp4 to <8 x i8>
-  %tmp6 = bitcast <8 x i8> %tmp3 to <2 x i32>
-  %tmp7 = bitcast <8 x i8> %tmp5 to <2 x i32>
-  %vext = shufflevector <2 x i32> %tmp6, <2 x i32> %tmp7, <2 x i32> <i32 1, i32 2>
-  store <2 x i32> %vext, <2 x i32>* %xU32x2, align 8
-  ret void
-}
-
-define void @test_vext_f32() nounwind ssp {
-  ; CHECK-LABEL: test_vext_f32:
-  ; CHECK: {{ext.8.*#4}}
-  %xF32x2 = alloca <2 x float>, align 8
-  %__a = alloca <2 x float>, align 8
-  %__b = alloca <2 x float>, align 8
-  %tmp = load <2 x float>* %xF32x2, align 8
-  store <2 x float> %tmp, <2 x float>* %__a, align 8
-  %tmp1 = load <2 x float>* %xF32x2, align 8
-  store <2 x float> %tmp1, <2 x float>* %__b, align 8
-  %tmp2 = load <2 x float>* %__a, align 8
-  %tmp3 = bitcast <2 x float> %tmp2 to <8 x i8>
-  %tmp4 = load <2 x float>* %__b, align 8
-  %tmp5 = bitcast <2 x float> %tmp4 to <8 x i8>
-  %tmp6 = bitcast <8 x i8> %tmp3 to <2 x float>
-  %tmp7 = bitcast <8 x i8> %tmp5 to <2 x float>
-  %vext = shufflevector <2 x float> %tmp6, <2 x float> %tmp7, <2 x i32> <i32 1, i32 2>
-  store <2 x float> %vext, <2 x float>* %xF32x2, align 8
-  ret void
-}
-
-define void @test_vext_s64() nounwind ssp {
-  ; CHECK-LABEL: test_vext_s64:
-  ; CHECK_FIXME: {{ext.8.*#1}}
-  ; this just turns into a load of the second element
-  %xS64x1 = alloca <1 x i64>, align 8
-  %__a = alloca <1 x i64>, align 8
-  %__b = alloca <1 x i64>, align 8
-  %tmp = load <1 x i64>* %xS64x1, align 8
-  store <1 x i64> %tmp, <1 x i64>* %__a, align 8
-  %tmp1 = load <1 x i64>* %xS64x1, align 8
-  store <1 x i64> %tmp1, <1 x i64>* %__b, align 8
-  %tmp2 = load <1 x i64>* %__a, align 8
-  %tmp3 = bitcast <1 x i64> %tmp2 to <8 x i8>
-  %tmp4 = load <1 x i64>* %__b, align 8
-  %tmp5 = bitcast <1 x i64> %tmp4 to <8 x i8>
-  %tmp6 = bitcast <8 x i8> %tmp3 to <1 x i64>
-  %tmp7 = bitcast <8 x i8> %tmp5 to <1 x i64>
-  %vext = shufflevector <1 x i64> %tmp6, <1 x i64> %tmp7, <1 x i32> <i32 1>
-  store <1 x i64> %vext, <1 x i64>* %xS64x1, align 8
-  ret void
-}
-
-define void @test_vext_u64() nounwind ssp {
-  ; CHECK-LABEL: test_vext_u64:
-  ; CHECK_FIXME: {{ext.8.*#1}}
-  ; this is turned into a simple load of the 2nd element
-  %xU64x1 = alloca <1 x i64>, align 8
-  %__a = alloca <1 x i64>, align 8
-  %__b = alloca <1 x i64>, align 8
-  %tmp = load <1 x i64>* %xU64x1, align 8
-  store <1 x i64> %tmp, <1 x i64>* %__a, align 8
-  %tmp1 = load <1 x i64>* %xU64x1, align 8
-  store <1 x i64> %tmp1, <1 x i64>* %__b, align 8
-  %tmp2 = load <1 x i64>* %__a, align 8
-  %tmp3 = bitcast <1 x i64> %tmp2 to <8 x i8>
-  %tmp4 = load <1 x i64>* %__b, align 8
-  %tmp5 = bitcast <1 x i64> %tmp4 to <8 x i8>
-  %tmp6 = bitcast <8 x i8> %tmp3 to <1 x i64>
-  %tmp7 = bitcast <8 x i8> %tmp5 to <1 x i64>
-  %vext = shufflevector <1 x i64> %tmp6, <1 x i64> %tmp7, <1 x i32> <i32 1>
-  store <1 x i64> %vext, <1 x i64>* %xU64x1, align 8
-  ret void
-}
-
-define void @test_vextq_s8() nounwind ssp {
-  ; CHECK-LABEL: test_vextq_s8:
-  ; CHECK: {{ext.16.*#4}}
-  %xS8x16 = alloca <16 x i8>, align 16
-  %__a = alloca <16 x i8>, align 16
-  %__b = alloca <16 x i8>, align 16
-  %tmp = load <16 x i8>* %xS8x16, align 16
-  store <16 x i8> %tmp, <16 x i8>* %__a, align 16
-  %tmp1 = load <16 x i8>* %xS8x16, align 16
-  store <16 x i8> %tmp1, <16 x i8>* %__b, align 16
-  %tmp2 = load <16 x i8>* %__a, align 16
-  %tmp3 = load <16 x i8>* %__b, align 16
-  %vext = shufflevector <16 x i8> %tmp2, <16 x i8> %tmp3, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-  store <16 x i8> %vext, <16 x i8>* %xS8x16, align 16
-  ret void
-}
-
-define void @test_vextq_u8() nounwind ssp {
-  ; CHECK-LABEL: test_vextq_u8:
-  ; CHECK: {{ext.16.*#5}}
-  %xU8x16 = alloca <16 x i8>, align 16
-  %__a = alloca <16 x i8>, align 16
-  %__b = alloca <16 x i8>, align 16
-  %tmp = load <16 x i8>* %xU8x16, align 16
-  store <16 x i8> %tmp, <16 x i8>* %__a, align 16
-  %tmp1 = load <16 x i8>* %xU8x16, align 16
-  store <16 x i8> %tmp1, <16 x i8>* %__b, align 16
-  %tmp2 = load <16 x i8>* %__a, align 16
-  %tmp3 = load <16 x i8>* %__b, align 16
-  %vext = shufflevector <16 x i8> %tmp2, <16 x i8> %tmp3, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
-  store <16 x i8> %vext, <16 x i8>* %xU8x16, align 16
-  ret void
-}
-
-define void @test_vextq_p8() nounwind ssp {
-  ; CHECK-LABEL: test_vextq_p8:
-  ; CHECK: {{ext.16.*#6}}
-  %xP8x16 = alloca <16 x i8>, align 16
-  %__a = alloca <16 x i8>, align 16
-  %__b = alloca <16 x i8>, align 16
-  %tmp = load <16 x i8>* %xP8x16, align 16
-  store <16 x i8> %tmp, <16 x i8>* %__a, align 16
-  %tmp1 = load <16 x i8>* %xP8x16, align 16
-  store <16 x i8> %tmp1, <16 x i8>* %__b, align 16
-  %tmp2 = load <16 x i8>* %__a, align 16
-  %tmp3 = load <16 x i8>* %__b, align 16
-  %vext = shufflevector <16 x i8> %tmp2, <16 x i8> %tmp3, <16 x i32> <i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21>
-  store <16 x i8> %vext, <16 x i8>* %xP8x16, align 16
-  ret void
-}
-
-define void @test_vextq_s16() nounwind ssp {
-  ; CHECK-LABEL: test_vextq_s16:
-  ; CHECK: {{ext.16.*#14}}
-  %xS16x8 = alloca <8 x i16>, align 16
-  %__a = alloca <8 x i16>, align 16
-  %__b = alloca <8 x i16>, align 16
-  %tmp = load <8 x i16>* %xS16x8, align 16
-  store <8 x i16> %tmp, <8 x i16>* %__a, align 16
-  %tmp1 = load <8 x i16>* %xS16x8, align 16
-  store <8 x i16> %tmp1, <8 x i16>* %__b, align 16
-  %tmp2 = load <8 x i16>* %__a, align 16
-  %tmp3 = bitcast <8 x i16> %tmp2 to <16 x i8>
-  %tmp4 = load <8 x i16>* %__b, align 16
-  %tmp5 = bitcast <8 x i16> %tmp4 to <16 x i8>
-  %tmp6 = bitcast <16 x i8> %tmp3 to <8 x i16>
-  %tmp7 = bitcast <16 x i8> %tmp5 to <8 x i16>
-  %vext = shufflevector <8 x i16> %tmp6, <8 x i16> %tmp7, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
-  store <8 x i16> %vext, <8 x i16>* %xS16x8, align 16
-  ret void
-}
-
-define void @test_vextq_u16() nounwind ssp {
-  ; CHECK-LABEL: test_vextq_u16:
-  ; CHECK: {{ext.16.*#8}}
-  %xU16x8 = alloca <8 x i16>, align 16
-  %__a = alloca <8 x i16>, align 16
-  %__b = alloca <8 x i16>, align 16
-  %tmp = load <8 x i16>* %xU16x8, align 16
-  store <8 x i16> %tmp, <8 x i16>* %__a, align 16
-  %tmp1 = load <8 x i16>* %xU16x8, align 16
-  store <8 x i16> %tmp1, <8 x i16>* %__b, align 16
-  %tmp2 = load <8 x i16>* %__a, align 16
-  %tmp3 = bitcast <8 x i16> %tmp2 to <16 x i8>
-  %tmp4 = load <8 x i16>* %__b, align 16
-  %tmp5 = bitcast <8 x i16> %tmp4 to <16 x i8>
-  %tmp6 = bitcast <16 x i8> %tmp3 to <8 x i16>
-  %tmp7 = bitcast <16 x i8> %tmp5 to <8 x i16>
-  %vext = shufflevector <8 x i16> %tmp6, <8 x i16> %tmp7, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
-  store <8 x i16> %vext, <8 x i16>* %xU16x8, align 16
-  ret void
-}
-
-define void @test_vextq_p16() nounwind ssp {
-  ; CHECK-LABEL: test_vextq_p16:
-  ; CHECK: {{ext.16.*#10}}
-  %xP16x8 = alloca <8 x i16>, align 16
-  %__a = alloca <8 x i16>, align 16
-  %__b = alloca <8 x i16>, align 16
-  %tmp = load <8 x i16>* %xP16x8, align 16
-  store <8 x i16> %tmp, <8 x i16>* %__a, align 16
-  %tmp1 = load <8 x i16>* %xP16x8, align 16
-  store <8 x i16> %tmp1, <8 x i16>* %__b, align 16
-  %tmp2 = load <8 x i16>* %__a, align 16
-  %tmp3 = bitcast <8 x i16> %tmp2 to <16 x i8>
-  %tmp4 = load <8 x i16>* %__b, align 16
-  %tmp5 = bitcast <8 x i16> %tmp4 to <16 x i8>
-  %tmp6 = bitcast <16 x i8> %tmp3 to <8 x i16>
-  %tmp7 = bitcast <16 x i8> %tmp5 to <8 x i16>
-  %vext = shufflevector <8 x i16> %tmp6, <8 x i16> %tmp7, <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12>
-  store <8 x i16> %vext, <8 x i16>* %xP16x8, align 16
-  ret void
-}
-
-define void @test_vextq_s32() nounwind ssp {
-  ; CHECK-LABEL: test_vextq_s32:
-  ; CHECK: {{ext.16.*#4}}
-  %xS32x4 = alloca <4 x i32>, align 16
-  %__a = alloca <4 x i32>, align 16
-  %__b = alloca <4 x i32>, align 16
-  %tmp = load <4 x i32>* %xS32x4, align 16
-  store <4 x i32> %tmp, <4 x i32>* %__a, align 16
-  %tmp1 = load <4 x i32>* %xS32x4, align 16
-  store <4 x i32> %tmp1, <4 x i32>* %__b, align 16
-  %tmp2 = load <4 x i32>* %__a, align 16
-  %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8>
-  %tmp4 = load <4 x i32>* %__b, align 16
-  %tmp5 = bitcast <4 x i32> %tmp4 to <16 x i8>
-  %tmp6 = bitcast <16 x i8> %tmp3 to <4 x i32>
-  %tmp7 = bitcast <16 x i8> %tmp5 to <4 x i32>
-  %vext = shufflevector <4 x i32> %tmp6, <4 x i32> %tmp7, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
-  store <4 x i32> %vext, <4 x i32>* %xS32x4, align 16
-  ret void
-}
-
-define void @test_vextq_u32() nounwind ssp {
-  ; CHECK-LABEL: test_vextq_u32:
-  ; CHECK: {{ext.16.*#8}}
-  %xU32x4 = alloca <4 x i32>, align 16
-  %__a = alloca <4 x i32>, align 16
-  %__b = alloca <4 x i32>, align 16
-  %tmp = load <4 x i32>* %xU32x4, align 16
-  store <4 x i32> %tmp, <4 x i32>* %__a, align 16
-  %tmp1 = load <4 x i32>* %xU32x4, align 16
-  store <4 x i32> %tmp1, <4 x i32>* %__b, align 16
-  %tmp2 = load <4 x i32>* %__a, align 16
-  %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8>
-  %tmp4 = load <4 x i32>* %__b, align 16
-  %tmp5 = bitcast <4 x i32> %tmp4 to <16 x i8>
-  %tmp6 = bitcast <16 x i8> %tmp3 to <4 x i32>
-  %tmp7 = bitcast <16 x i8> %tmp5 to <4 x i32>
-  %vext = shufflevector <4 x i32> %tmp6, <4 x i32> %tmp7, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
-  store <4 x i32> %vext, <4 x i32>* %xU32x4, align 16
-  ret void
-}
-
-define void @test_vextq_f32() nounwind ssp {
-  ; CHECK-LABEL: test_vextq_f32:
-  ; CHECK: {{ext.16.*#12}}
-  %xF32x4 = alloca <4 x float>, align 16
-  %__a = alloca <4 x float>, align 16
-  %__b = alloca <4 x float>, align 16
-  %tmp = load <4 x float>* %xF32x4, align 16
-  store <4 x float> %tmp, <4 x float>* %__a, align 16
-  %tmp1 = load <4 x float>* %xF32x4, align 16
-  store <4 x float> %tmp1, <4 x float>* %__b, align 16
-  %tmp2 = load <4 x float>* %__a, align 16
-  %tmp3 = bitcast <4 x float> %tmp2 to <16 x i8>
-  %tmp4 = load <4 x float>* %__b, align 16
-  %tmp5 = bitcast <4 x float> %tmp4 to <16 x i8>
-  %tmp6 = bitcast <16 x i8> %tmp3 to <4 x float>
-  %tmp7 = bitcast <16 x i8> %tmp5 to <4 x float>
-  %vext = shufflevector <4 x float> %tmp6, <4 x float> %tmp7, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-  store <4 x float> %vext, <4 x float>* %xF32x4, align 16
-  ret void
-}
-
-define void @test_vextq_s64() nounwind ssp {
-  ; CHECK-LABEL: test_vextq_s64:
-  ; CHECK: {{ext.16.*#8}}
-  %xS64x2 = alloca <2 x i64>, align 16
-  %__a = alloca <2 x i64>, align 16
-  %__b = alloca <2 x i64>, align 16
-  %tmp = load <2 x i64>* %xS64x2, align 16
-  store <2 x i64> %tmp, <2 x i64>* %__a, align 16
-  %tmp1 = load <2 x i64>* %xS64x2, align 16
-  store <2 x i64> %tmp1, <2 x i64>* %__b, align 16
-  %tmp2 = load <2 x i64>* %__a, align 16
-  %tmp3 = bitcast <2 x i64> %tmp2 to <16 x i8>
-  %tmp4 = load <2 x i64>* %__b, align 16
-  %tmp5 = bitcast <2 x i64> %tmp4 to <16 x i8>
-  %tmp6 = bitcast <16 x i8> %tmp3 to <2 x i64>
-  %tmp7 = bitcast <16 x i8> %tmp5 to <2 x i64>
-  %vext = shufflevector <2 x i64> %tmp6, <2 x i64> %tmp7, <2 x i32> <i32 1, i32 2>
-  store <2 x i64> %vext, <2 x i64>* %xS64x2, align 16
-  ret void
-}
-
-define void @test_vextq_u64() nounwind ssp {
-  ; CHECK-LABEL: test_vextq_u64:
-  ; CHECK: {{ext.16.*#8}}
-  %xU64x2 = alloca <2 x i64>, align 16
-  %__a = alloca <2 x i64>, align 16
-  %__b = alloca <2 x i64>, align 16
-  %tmp = load <2 x i64>* %xU64x2, align 16
-  store <2 x i64> %tmp, <2 x i64>* %__a, align 16
-  %tmp1 = load <2 x i64>* %xU64x2, align 16
-  store <2 x i64> %tmp1, <2 x i64>* %__b, align 16
-  %tmp2 = load <2 x i64>* %__a, align 16
-  %tmp3 = bitcast <2 x i64> %tmp2 to <16 x i8>
-  %tmp4 = load <2 x i64>* %__b, align 16
-  %tmp5 = bitcast <2 x i64> %tmp4 to <16 x i8>
-  %tmp6 = bitcast <16 x i8> %tmp3 to <2 x i64>
-  %tmp7 = bitcast <16 x i8> %tmp5 to <2 x i64>
-  %vext = shufflevector <2 x i64> %tmp6, <2 x i64> %tmp7, <2 x i32> <i32 1, i32 2>
-  store <2 x i64> %vext, <2 x i64>* %xU64x2, align 16
-  ret void
-}
-
-; shuffles with an undef second operand can use an EXT also so long as the
-; indices wrap and stay sequential.
-; rdar://12051674
-define <16 x i8> @vext1(<16 x i8> %_a) nounwind {
-; CHECK-LABEL: vext1:
-; CHECK: ext.16b  v0, v0, v0, #8
-  %vext = shufflevector <16 x i8> %_a, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  ret <16 x i8> %vext
-}
-
-; <rdar://problem/12212062>
-define <2 x i64> @vext2(<2 x i64> %p0, <2 x i64> %p1) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: vext2:
-; CHECK: ext.16b v1, v1, v1, #8
-; CHECK: ext.16b v0, v0, v0, #8
-; CHECK: add.2d  v0, v0, v1
-  %t0 = shufflevector <2 x i64> %p1, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
-  %t1 = shufflevector <2 x i64> %p0, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
-  %t2 = add <2 x i64> %t1, %t0
-  ret <2 x i64> %t2
-}
diff --git a/test/CodeGen/ARM64/vfloatintrinsics.ll b/test/CodeGen/ARM64/vfloatintrinsics.ll
deleted file mode 100644
index a8c882b..0000000
--- a/test/CodeGen/ARM64/vfloatintrinsics.ll
+++ /dev/null
@@ -1,375 +0,0 @@
-; RUN: llc -march=arm64 -arm64-neon-syntax=apple < %s | FileCheck %s
-
-;;; Float vectors
-
-%v2f32 = type <2 x float>
-; CHECK: test_v2f32.sqrt:
-define %v2f32 @test_v2f32.sqrt(%v2f32 %a) {
-  ; CHECK: fsqrt.2s
-  %1 = call %v2f32 @llvm.sqrt.v2f32(%v2f32 %a)
-  ret %v2f32 %1
-}
-; CHECK: test_v2f32.powi:
-define %v2f32 @test_v2f32.powi(%v2f32 %a, i32 %b) {
-  ; CHECK: pow
-  %1 = call %v2f32 @llvm.powi.v2f32(%v2f32 %a, i32 %b)
-  ret %v2f32 %1
-}
-; CHECK: test_v2f32.sin:
-define %v2f32 @test_v2f32.sin(%v2f32 %a) {
-  ; CHECK: sin
-  %1 = call %v2f32 @llvm.sin.v2f32(%v2f32 %a)
-  ret %v2f32 %1
-}
-; CHECK: test_v2f32.cos:
-define %v2f32 @test_v2f32.cos(%v2f32 %a) {
-  ; CHECK: cos
-  %1 = call %v2f32 @llvm.cos.v2f32(%v2f32 %a)
-  ret %v2f32 %1
-}
-; CHECK: test_v2f32.pow:
-define %v2f32 @test_v2f32.pow(%v2f32 %a, %v2f32 %b) {
-  ; CHECK: pow
-  %1 = call %v2f32 @llvm.pow.v2f32(%v2f32 %a, %v2f32 %b)
-  ret %v2f32 %1
-}
-; CHECK: test_v2f32.exp:
-define %v2f32 @test_v2f32.exp(%v2f32 %a) {
-  ; CHECK: exp
-  %1 = call %v2f32 @llvm.exp.v2f32(%v2f32 %a)
-  ret %v2f32 %1
-}
-; CHECK: test_v2f32.exp2:
-define %v2f32 @test_v2f32.exp2(%v2f32 %a) {
-  ; CHECK: exp
-  %1 = call %v2f32 @llvm.exp2.v2f32(%v2f32 %a)
-  ret %v2f32 %1
-}
-; CHECK: test_v2f32.log:
-define %v2f32 @test_v2f32.log(%v2f32 %a) {
-  ; CHECK: log
-  %1 = call %v2f32 @llvm.log.v2f32(%v2f32 %a)
-  ret %v2f32 %1
-}
-; CHECK: test_v2f32.log10:
-define %v2f32 @test_v2f32.log10(%v2f32 %a) {
-  ; CHECK: log
-  %1 = call %v2f32 @llvm.log10.v2f32(%v2f32 %a)
-  ret %v2f32 %1
-}
-; CHECK: test_v2f32.log2:
-define %v2f32 @test_v2f32.log2(%v2f32 %a) {
-  ; CHECK: log
-  %1 = call %v2f32 @llvm.log2.v2f32(%v2f32 %a)
-  ret %v2f32 %1
-}
-; CHECK: test_v2f32.fma:
-define %v2f32 @test_v2f32.fma(%v2f32 %a, %v2f32 %b, %v2f32 %c) {
-  ; CHECK: fma
-  %1 = call %v2f32 @llvm.fma.v2f32(%v2f32 %a, %v2f32 %b, %v2f32 %c)
-  ret %v2f32 %1
-}
-; CHECK: test_v2f32.fabs:
-define %v2f32 @test_v2f32.fabs(%v2f32 %a) {
-  ; CHECK: fabs
-  %1 = call %v2f32 @llvm.fabs.v2f32(%v2f32 %a)
-  ret %v2f32 %1
-}
-; CHECK: test_v2f32.floor:
-define %v2f32 @test_v2f32.floor(%v2f32 %a) {
-  ; CHECK: frintm.2s
-  %1 = call %v2f32 @llvm.floor.v2f32(%v2f32 %a)
-  ret %v2f32 %1
-}
-; CHECK: test_v2f32.ceil:
-define %v2f32 @test_v2f32.ceil(%v2f32 %a) {
-  ; CHECK: frintp.2s
-  %1 = call %v2f32 @llvm.ceil.v2f32(%v2f32 %a)
-  ret %v2f32 %1
-}
-; CHECK: test_v2f32.trunc:
-define %v2f32 @test_v2f32.trunc(%v2f32 %a) {
-  ; CHECK: frintz.2s
-  %1 = call %v2f32 @llvm.trunc.v2f32(%v2f32 %a)
-  ret %v2f32 %1
-}
-; CHECK: test_v2f32.rint:
-define %v2f32 @test_v2f32.rint(%v2f32 %a) {
-  ; CHECK: frintx.2s
-  %1 = call %v2f32 @llvm.rint.v2f32(%v2f32 %a)
-  ret %v2f32 %1
-}
-; CHECK: test_v2f32.nearbyint:
-define %v2f32 @test_v2f32.nearbyint(%v2f32 %a) {
-  ; CHECK: frinti.2s
-  %1 = call %v2f32 @llvm.nearbyint.v2f32(%v2f32 %a)
-  ret %v2f32 %1
-}
-
-declare %v2f32 @llvm.sqrt.v2f32(%v2f32) #0
-declare %v2f32 @llvm.powi.v2f32(%v2f32, i32) #0
-declare %v2f32 @llvm.sin.v2f32(%v2f32) #0
-declare %v2f32 @llvm.cos.v2f32(%v2f32) #0
-declare %v2f32 @llvm.pow.v2f32(%v2f32, %v2f32) #0
-declare %v2f32 @llvm.exp.v2f32(%v2f32) #0
-declare %v2f32 @llvm.exp2.v2f32(%v2f32) #0
-declare %v2f32 @llvm.log.v2f32(%v2f32) #0
-declare %v2f32 @llvm.log10.v2f32(%v2f32) #0
-declare %v2f32 @llvm.log2.v2f32(%v2f32) #0
-declare %v2f32 @llvm.fma.v2f32(%v2f32, %v2f32, %v2f32) #0
-declare %v2f32 @llvm.fabs.v2f32(%v2f32) #0
-declare %v2f32 @llvm.floor.v2f32(%v2f32) #0
-declare %v2f32 @llvm.ceil.v2f32(%v2f32) #0
-declare %v2f32 @llvm.trunc.v2f32(%v2f32) #0
-declare %v2f32 @llvm.rint.v2f32(%v2f32) #0
-declare %v2f32 @llvm.nearbyint.v2f32(%v2f32) #0
-
-;;;
-
-%v4f32 = type <4 x float>
-; CHECK: test_v4f32.sqrt:
-define %v4f32 @test_v4f32.sqrt(%v4f32 %a) {
-  ; CHECK: fsqrt.4s
-  %1 = call %v4f32 @llvm.sqrt.v4f32(%v4f32 %a)
-  ret %v4f32 %1
-}
-; CHECK: test_v4f32.powi:
-define %v4f32 @test_v4f32.powi(%v4f32 %a, i32 %b) {
-  ; CHECK: pow
-  %1 = call %v4f32 @llvm.powi.v4f32(%v4f32 %a, i32 %b)
-  ret %v4f32 %1
-}
-; CHECK: test_v4f32.sin:
-define %v4f32 @test_v4f32.sin(%v4f32 %a) {
-  ; CHECK: sin
-  %1 = call %v4f32 @llvm.sin.v4f32(%v4f32 %a)
-  ret %v4f32 %1
-}
-; CHECK: test_v4f32.cos:
-define %v4f32 @test_v4f32.cos(%v4f32 %a) {
-  ; CHECK: cos
-  %1 = call %v4f32 @llvm.cos.v4f32(%v4f32 %a)
-  ret %v4f32 %1
-}
-; CHECK: test_v4f32.pow:
-define %v4f32 @test_v4f32.pow(%v4f32 %a, %v4f32 %b) {
-  ; CHECK: pow
-  %1 = call %v4f32 @llvm.pow.v4f32(%v4f32 %a, %v4f32 %b)
-  ret %v4f32 %1
-}
-; CHECK: test_v4f32.exp:
-define %v4f32 @test_v4f32.exp(%v4f32 %a) {
-  ; CHECK: exp
-  %1 = call %v4f32 @llvm.exp.v4f32(%v4f32 %a)
-  ret %v4f32 %1
-}
-; CHECK: test_v4f32.exp2:
-define %v4f32 @test_v4f32.exp2(%v4f32 %a) {
-  ; CHECK: exp
-  %1 = call %v4f32 @llvm.exp2.v4f32(%v4f32 %a)
-  ret %v4f32 %1
-}
-; CHECK: test_v4f32.log:
-define %v4f32 @test_v4f32.log(%v4f32 %a) {
-  ; CHECK: log
-  %1 = call %v4f32 @llvm.log.v4f32(%v4f32 %a)
-  ret %v4f32 %1
-}
-; CHECK: test_v4f32.log10:
-define %v4f32 @test_v4f32.log10(%v4f32 %a) {
-  ; CHECK: log
-  %1 = call %v4f32 @llvm.log10.v4f32(%v4f32 %a)
-  ret %v4f32 %1
-}
-; CHECK: test_v4f32.log2:
-define %v4f32 @test_v4f32.log2(%v4f32 %a) {
-  ; CHECK: log
-  %1 = call %v4f32 @llvm.log2.v4f32(%v4f32 %a)
-  ret %v4f32 %1
-}
-; CHECK: test_v4f32.fma:
-define %v4f32 @test_v4f32.fma(%v4f32 %a, %v4f32 %b, %v4f32 %c) {
-  ; CHECK: fma
-  %1 = call %v4f32 @llvm.fma.v4f32(%v4f32 %a, %v4f32 %b, %v4f32 %c)
-  ret %v4f32 %1
-}
-; CHECK: test_v4f32.fabs:
-define %v4f32 @test_v4f32.fabs(%v4f32 %a) {
-  ; CHECK: fabs
-  %1 = call %v4f32 @llvm.fabs.v4f32(%v4f32 %a)
-  ret %v4f32 %1
-}
-; CHECK: test_v4f32.floor:
-define %v4f32 @test_v4f32.floor(%v4f32 %a) {
-  ; CHECK: frintm.4s
-  %1 = call %v4f32 @llvm.floor.v4f32(%v4f32 %a)
-  ret %v4f32 %1
-}
-; CHECK: test_v4f32.ceil:
-define %v4f32 @test_v4f32.ceil(%v4f32 %a) {
-  ; CHECK: frintp.4s
-  %1 = call %v4f32 @llvm.ceil.v4f32(%v4f32 %a)
-  ret %v4f32 %1
-}
-; CHECK: test_v4f32.trunc:
-define %v4f32 @test_v4f32.trunc(%v4f32 %a) {
-  ; CHECK: frintz.4s
-  %1 = call %v4f32 @llvm.trunc.v4f32(%v4f32 %a)
-  ret %v4f32 %1
-}
-; CHECK: test_v4f32.rint:
-define %v4f32 @test_v4f32.rint(%v4f32 %a) {
-  ; CHECK: frintx.4s
-  %1 = call %v4f32 @llvm.rint.v4f32(%v4f32 %a)
-  ret %v4f32 %1
-}
-; CHECK: test_v4f32.nearbyint:
-define %v4f32 @test_v4f32.nearbyint(%v4f32 %a) {
-  ; CHECK: frinti.4s
-  %1 = call %v4f32 @llvm.nearbyint.v4f32(%v4f32 %a)
-  ret %v4f32 %1
-}
-
-declare %v4f32 @llvm.sqrt.v4f32(%v4f32) #0
-declare %v4f32 @llvm.powi.v4f32(%v4f32, i32) #0
-declare %v4f32 @llvm.sin.v4f32(%v4f32) #0
-declare %v4f32 @llvm.cos.v4f32(%v4f32) #0
-declare %v4f32 @llvm.pow.v4f32(%v4f32, %v4f32) #0
-declare %v4f32 @llvm.exp.v4f32(%v4f32) #0
-declare %v4f32 @llvm.exp2.v4f32(%v4f32) #0
-declare %v4f32 @llvm.log.v4f32(%v4f32) #0
-declare %v4f32 @llvm.log10.v4f32(%v4f32) #0
-declare %v4f32 @llvm.log2.v4f32(%v4f32) #0
-declare %v4f32 @llvm.fma.v4f32(%v4f32, %v4f32, %v4f32) #0
-declare %v4f32 @llvm.fabs.v4f32(%v4f32) #0
-declare %v4f32 @llvm.floor.v4f32(%v4f32) #0
-declare %v4f32 @llvm.ceil.v4f32(%v4f32) #0
-declare %v4f32 @llvm.trunc.v4f32(%v4f32) #0
-declare %v4f32 @llvm.rint.v4f32(%v4f32) #0
-declare %v4f32 @llvm.nearbyint.v4f32(%v4f32) #0
-
-;;; Double vector
-
-%v2f64 = type <2 x double>
-; CHECK: test_v2f64.sqrt:
-define %v2f64 @test_v2f64.sqrt(%v2f64 %a) {
-  ; CHECK: fsqrt.2d
-  %1 = call %v2f64 @llvm.sqrt.v2f64(%v2f64 %a)
-  ret %v2f64 %1
-}
-; CHECK: test_v2f64.powi:
-define %v2f64 @test_v2f64.powi(%v2f64 %a, i32 %b) {
-  ; CHECK: pow
-  %1 = call %v2f64 @llvm.powi.v2f64(%v2f64 %a, i32 %b)
-  ret %v2f64 %1
-}
-; CHECK: test_v2f64.sin:
-define %v2f64 @test_v2f64.sin(%v2f64 %a) {
-  ; CHECK: sin
-  %1 = call %v2f64 @llvm.sin.v2f64(%v2f64 %a)
-  ret %v2f64 %1
-}
-; CHECK: test_v2f64.cos:
-define %v2f64 @test_v2f64.cos(%v2f64 %a) {
-  ; CHECK: cos
-  %1 = call %v2f64 @llvm.cos.v2f64(%v2f64 %a)
-  ret %v2f64 %1
-}
-; CHECK: test_v2f64.pow:
-define %v2f64 @test_v2f64.pow(%v2f64 %a, %v2f64 %b) {
-  ; CHECK: pow
-  %1 = call %v2f64 @llvm.pow.v2f64(%v2f64 %a, %v2f64 %b)
-  ret %v2f64 %1
-}
-; CHECK: test_v2f64.exp:
-define %v2f64 @test_v2f64.exp(%v2f64 %a) {
-  ; CHECK: exp
-  %1 = call %v2f64 @llvm.exp.v2f64(%v2f64 %a)
-  ret %v2f64 %1
-}
-; CHECK: test_v2f64.exp2:
-define %v2f64 @test_v2f64.exp2(%v2f64 %a) {
-  ; CHECK: exp
-  %1 = call %v2f64 @llvm.exp2.v2f64(%v2f64 %a)
-  ret %v2f64 %1
-}
-; CHECK: test_v2f64.log:
-define %v2f64 @test_v2f64.log(%v2f64 %a) {
-  ; CHECK: log
-  %1 = call %v2f64 @llvm.log.v2f64(%v2f64 %a)
-  ret %v2f64 %1
-}
-; CHECK: test_v2f64.log10:
-define %v2f64 @test_v2f64.log10(%v2f64 %a) {
-  ; CHECK: log
-  %1 = call %v2f64 @llvm.log10.v2f64(%v2f64 %a)
-  ret %v2f64 %1
-}
-; CHECK: test_v2f64.log2:
-define %v2f64 @test_v2f64.log2(%v2f64 %a) {
-  ; CHECK: log
-  %1 = call %v2f64 @llvm.log2.v2f64(%v2f64 %a)
-  ret %v2f64 %1
-}
-; CHECK: test_v2f64.fma:
-define %v2f64 @test_v2f64.fma(%v2f64 %a, %v2f64 %b, %v2f64 %c) {
-  ; CHECK: fma
-  %1 = call %v2f64 @llvm.fma.v2f64(%v2f64 %a, %v2f64 %b, %v2f64 %c)
-  ret %v2f64 %1
-}
-; CHECK: test_v2f64.fabs:
-define %v2f64 @test_v2f64.fabs(%v2f64 %a) {
-  ; CHECK: fabs
-  %1 = call %v2f64 @llvm.fabs.v2f64(%v2f64 %a)
-  ret %v2f64 %1
-}
-; CHECK: test_v2f64.floor:
-define %v2f64 @test_v2f64.floor(%v2f64 %a) {
-  ; CHECK: frintm.2d
-  %1 = call %v2f64 @llvm.floor.v2f64(%v2f64 %a)
-  ret %v2f64 %1
-}
-; CHECK: test_v2f64.ceil:
-define %v2f64 @test_v2f64.ceil(%v2f64 %a) {
-  ; CHECK: frintp.2d
-  %1 = call %v2f64 @llvm.ceil.v2f64(%v2f64 %a)
-  ret %v2f64 %1
-}
-; CHECK: test_v2f64.trunc:
-define %v2f64 @test_v2f64.trunc(%v2f64 %a) {
-  ; CHECK: frintz.2d
-  %1 = call %v2f64 @llvm.trunc.v2f64(%v2f64 %a)
-  ret %v2f64 %1
-}
-; CHECK: test_v2f64.rint:
-define %v2f64 @test_v2f64.rint(%v2f64 %a) {
-  ; CHECK: frintx.2d
-  %1 = call %v2f64 @llvm.rint.v2f64(%v2f64 %a)
-  ret %v2f64 %1
-}
-; CHECK: test_v2f64.nearbyint:
-define %v2f64 @test_v2f64.nearbyint(%v2f64 %a) {
-  ; CHECK: frinti.2d
-  %1 = call %v2f64 @llvm.nearbyint.v2f64(%v2f64 %a)
-  ret %v2f64 %1
-}
-
-declare %v2f64 @llvm.sqrt.v2f64(%v2f64) #0
-declare %v2f64 @llvm.powi.v2f64(%v2f64, i32) #0
-declare %v2f64 @llvm.sin.v2f64(%v2f64) #0
-declare %v2f64 @llvm.cos.v2f64(%v2f64) #0
-declare %v2f64 @llvm.pow.v2f64(%v2f64, %v2f64) #0
-declare %v2f64 @llvm.exp.v2f64(%v2f64) #0
-declare %v2f64 @llvm.exp2.v2f64(%v2f64) #0
-declare %v2f64 @llvm.log.v2f64(%v2f64) #0
-declare %v2f64 @llvm.log10.v2f64(%v2f64) #0
-declare %v2f64 @llvm.log2.v2f64(%v2f64) #0
-declare %v2f64 @llvm.fma.v2f64(%v2f64, %v2f64, %v2f64) #0
-declare %v2f64 @llvm.fabs.v2f64(%v2f64) #0
-declare %v2f64 @llvm.floor.v2f64(%v2f64) #0
-declare %v2f64 @llvm.ceil.v2f64(%v2f64) #0
-declare %v2f64 @llvm.trunc.v2f64(%v2f64) #0
-declare %v2f64 @llvm.rint.v2f64(%v2f64) #0
-declare %v2f64 @llvm.nearbyint.v2f64(%v2f64) #0
-
-attributes #0 = { nounwind readonly }
diff --git a/test/CodeGen/ARM64/vhadd.ll b/test/CodeGen/ARM64/vhadd.ll
deleted file mode 100644
index aed7681..0000000
--- a/test/CodeGen/ARM64/vhadd.ll
+++ /dev/null
@@ -1,249 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-define <8 x i8> @shadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: shadd8b:
-;CHECK: shadd.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = call <8 x i8> @llvm.arm64.neon.shadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
-	ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @shadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: shadd16b:
-;CHECK: shadd.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
-	%tmp3 = call <16 x i8> @llvm.arm64.neon.shadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
-	ret <16 x i8> %tmp3
-}
-
-define <4 x i16> @shadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: shadd4h:
-;CHECK: shadd.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = call <4 x i16> @llvm.arm64.neon.shadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
-	ret <4 x i16> %tmp3
-}
-
-define <8 x i16> @shadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: shadd8h:
-;CHECK: shadd.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call <8 x i16> @llvm.arm64.neon.shadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
-	ret <8 x i16> %tmp3
-}
-
-define <2 x i32> @shadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: shadd2s:
-;CHECK: shadd.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.shadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @shadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: shadd4s:
-;CHECK: shadd.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.shadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
-	ret <4 x i32> %tmp3
-}
-
-define <8 x i8> @uhadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: uhadd8b:
-;CHECK: uhadd.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = call <8 x i8> @llvm.arm64.neon.uhadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
-	ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @uhadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: uhadd16b:
-;CHECK: uhadd.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
-	%tmp3 = call <16 x i8> @llvm.arm64.neon.uhadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
-	ret <16 x i8> %tmp3
-}
-
-define <4 x i16> @uhadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: uhadd4h:
-;CHECK: uhadd.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = call <4 x i16> @llvm.arm64.neon.uhadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
-	ret <4 x i16> %tmp3
-}
-
-define <8 x i16> @uhadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: uhadd8h:
-;CHECK: uhadd.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call <8 x i16> @llvm.arm64.neon.uhadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
-	ret <8 x i16> %tmp3
-}
-
-define <2 x i32> @uhadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: uhadd2s:
-;CHECK: uhadd.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.uhadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @uhadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: uhadd4s:
-;CHECK: uhadd.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.uhadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
-	ret <4 x i32> %tmp3
-}
-
-declare <8 x i8>  @llvm.arm64.neon.shadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.shadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.shadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-
-declare <8 x i8>  @llvm.arm64.neon.uhadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.uhadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.uhadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-
-declare <16 x i8> @llvm.arm64.neon.shadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.shadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.shadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-
-declare <16 x i8> @llvm.arm64.neon.uhadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.uhadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.uhadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-
-define <8 x i8> @srhadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: srhadd8b:
-;CHECK: srhadd.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = call <8 x i8> @llvm.arm64.neon.srhadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
-	ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @srhadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: srhadd16b:
-;CHECK: srhadd.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
-	%tmp3 = call <16 x i8> @llvm.arm64.neon.srhadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
-	ret <16 x i8> %tmp3
-}
-
-define <4 x i16> @srhadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: srhadd4h:
-;CHECK: srhadd.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = call <4 x i16> @llvm.arm64.neon.srhadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
-	ret <4 x i16> %tmp3
-}
-
-define <8 x i16> @srhadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: srhadd8h:
-;CHECK: srhadd.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call <8 x i16> @llvm.arm64.neon.srhadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
-	ret <8 x i16> %tmp3
-}
-
-define <2 x i32> @srhadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: srhadd2s:
-;CHECK: srhadd.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.srhadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @srhadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: srhadd4s:
-;CHECK: srhadd.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.srhadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
-	ret <4 x i32> %tmp3
-}
-
-define <8 x i8> @urhadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: urhadd8b:
-;CHECK: urhadd.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = call <8 x i8> @llvm.arm64.neon.urhadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
-	ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @urhadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: urhadd16b:
-;CHECK: urhadd.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
-	%tmp3 = call <16 x i8> @llvm.arm64.neon.urhadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
-	ret <16 x i8> %tmp3
-}
-
-define <4 x i16> @urhadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: urhadd4h:
-;CHECK: urhadd.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = call <4 x i16> @llvm.arm64.neon.urhadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
-	ret <4 x i16> %tmp3
-}
-
-define <8 x i16> @urhadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: urhadd8h:
-;CHECK: urhadd.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call <8 x i16> @llvm.arm64.neon.urhadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
-	ret <8 x i16> %tmp3
-}
-
-define <2 x i32> @urhadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: urhadd2s:
-;CHECK: urhadd.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.urhadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @urhadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: urhadd4s:
-;CHECK: urhadd.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.urhadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
-	ret <4 x i32> %tmp3
-}
-
-declare <8 x i8>  @llvm.arm64.neon.srhadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.srhadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.srhadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-
-declare <8 x i8>  @llvm.arm64.neon.urhadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.urhadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.urhadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-
-declare <16 x i8> @llvm.arm64.neon.srhadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.srhadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.srhadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-
-declare <16 x i8> @llvm.arm64.neon.urhadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.urhadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.urhadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM64/vhsub.ll b/test/CodeGen/ARM64/vhsub.ll
deleted file mode 100644
index 85df4d4..0000000
--- a/test/CodeGen/ARM64/vhsub.ll
+++ /dev/null
@@ -1,125 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-define <8 x i8> @shsub8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: shsub8b:
-;CHECK: shsub.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = call <8 x i8> @llvm.arm64.neon.shsub.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
-	ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @shsub16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: shsub16b:
-;CHECK: shsub.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
-	%tmp3 = call <16 x i8> @llvm.arm64.neon.shsub.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
-	ret <16 x i8> %tmp3
-}
-
-define <4 x i16> @shsub4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: shsub4h:
-;CHECK: shsub.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = call <4 x i16> @llvm.arm64.neon.shsub.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
-	ret <4 x i16> %tmp3
-}
-
-define <8 x i16> @shsub8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: shsub8h:
-;CHECK: shsub.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call <8 x i16> @llvm.arm64.neon.shsub.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
-	ret <8 x i16> %tmp3
-}
-
-define <2 x i32> @shsub2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: shsub2s:
-;CHECK: shsub.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.shsub.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @shsub4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: shsub4s:
-;CHECK: shsub.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.shsub.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
-	ret <4 x i32> %tmp3
-}
-
-define <8 x i8> @uhsub8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: uhsub8b:
-;CHECK: uhsub.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = call <8 x i8> @llvm.arm64.neon.uhsub.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
-	ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @uhsub16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: uhsub16b:
-;CHECK: uhsub.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
-	%tmp3 = call <16 x i8> @llvm.arm64.neon.uhsub.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
-	ret <16 x i8> %tmp3
-}
-
-define <4 x i16> @uhsub4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: uhsub4h:
-;CHECK: uhsub.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = call <4 x i16> @llvm.arm64.neon.uhsub.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
-	ret <4 x i16> %tmp3
-}
-
-define <8 x i16> @uhsub8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: uhsub8h:
-;CHECK: uhsub.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call <8 x i16> @llvm.arm64.neon.uhsub.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
-	ret <8 x i16> %tmp3
-}
-
-define <2 x i32> @uhsub2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: uhsub2s:
-;CHECK: uhsub.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.uhsub.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @uhsub4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: uhsub4s:
-;CHECK: uhsub.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.uhsub.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
-	ret <4 x i32> %tmp3
-}
-
-declare <8 x i8>  @llvm.arm64.neon.shsub.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.shsub.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.shsub.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-
-declare <8 x i8>  @llvm.arm64.neon.uhsub.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.uhsub.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.uhsub.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-
-declare <16 x i8> @llvm.arm64.neon.shsub.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.shsub.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.shsub.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-
-declare <16 x i8> @llvm.arm64.neon.uhsub.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.uhsub.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.uhsub.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM64/vmax.ll b/test/CodeGen/ARM64/vmax.ll
deleted file mode 100644
index b2426f3..0000000
--- a/test/CodeGen/ARM64/vmax.ll
+++ /dev/null
@@ -1,679 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-define <8 x i8> @smax_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: smax_8b:
-;CHECK: smax.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = call <8 x i8> @llvm.arm64.neon.smax.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
-	ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @smax_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: smax_16b:
-;CHECK: smax.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
-	%tmp3 = call <16 x i8> @llvm.arm64.neon.smax.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
-	ret <16 x i8> %tmp3
-}
-
-define <4 x i16> @smax_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: smax_4h:
-;CHECK: smax.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = call <4 x i16> @llvm.arm64.neon.smax.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
-	ret <4 x i16> %tmp3
-}
-
-define <8 x i16> @smax_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: smax_8h:
-;CHECK: smax.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call <8 x i16> @llvm.arm64.neon.smax.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
-	ret <8 x i16> %tmp3
-}
-
-define <2 x i32> @smax_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: smax_2s:
-;CHECK: smax.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.smax.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @smax_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: smax_4s:
-;CHECK: smax.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.smax.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
-	ret <4 x i32> %tmp3
-}
-
-declare <8 x i8> @llvm.arm64.neon.smax.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm64.neon.smax.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.smax.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.smax.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.smax.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.smax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-
-define <8 x i8> @umax_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: umax_8b:
-;CHECK: umax.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = call <8 x i8> @llvm.arm64.neon.umax.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
-	ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @umax_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: umax_16b:
-;CHECK: umax.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
-	%tmp3 = call <16 x i8> @llvm.arm64.neon.umax.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
-	ret <16 x i8> %tmp3
-}
-
-define <4 x i16> @umax_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: umax_4h:
-;CHECK: umax.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = call <4 x i16> @llvm.arm64.neon.umax.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
-	ret <4 x i16> %tmp3
-}
-
-define <8 x i16> @umax_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: umax_8h:
-;CHECK: umax.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call <8 x i16> @llvm.arm64.neon.umax.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
-	ret <8 x i16> %tmp3
-}
-
-define <2 x i32> @umax_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: umax_2s:
-;CHECK: umax.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.umax.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @umax_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: umax_4s:
-;CHECK: umax.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.umax.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
-	ret <4 x i32> %tmp3
-}
-
-declare <8 x i8> @llvm.arm64.neon.umax.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm64.neon.umax.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.umax.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.umax.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.umax.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.umax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-
-define <8 x i8> @smin_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: smin_8b:
-;CHECK: smin.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = call <8 x i8> @llvm.arm64.neon.smin.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
-	ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @smin_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: smin_16b:
-;CHECK: smin.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
-	%tmp3 = call <16 x i8> @llvm.arm64.neon.smin.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
-	ret <16 x i8> %tmp3
-}
-
-define <4 x i16> @smin_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: smin_4h:
-;CHECK: smin.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = call <4 x i16> @llvm.arm64.neon.smin.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
-	ret <4 x i16> %tmp3
-}
-
-define <8 x i16> @smin_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: smin_8h:
-;CHECK: smin.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call <8 x i16> @llvm.arm64.neon.smin.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
-	ret <8 x i16> %tmp3
-}
-
-define <2 x i32> @smin_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: smin_2s:
-;CHECK: smin.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.smin.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @smin_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: smin_4s:
-;CHECK: smin.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.smin.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
-	ret <4 x i32> %tmp3
-}
-
-declare <8 x i8> @llvm.arm64.neon.smin.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm64.neon.smin.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.smin.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.smin.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.smin.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.smin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-
-define <8 x i8> @umin_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: umin_8b:
-;CHECK: umin.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = call <8 x i8> @llvm.arm64.neon.umin.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
-	ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @umin_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: umin_16b:
-;CHECK: umin.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
-	%tmp3 = call <16 x i8> @llvm.arm64.neon.umin.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
-	ret <16 x i8> %tmp3
-}
-
-define <4 x i16> @umin_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: umin_4h:
-;CHECK: umin.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = call <4 x i16> @llvm.arm64.neon.umin.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
-	ret <4 x i16> %tmp3
-}
-
-define <8 x i16> @umin_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: umin_8h:
-;CHECK: umin.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call <8 x i16> @llvm.arm64.neon.umin.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
-	ret <8 x i16> %tmp3
-}
-
-define <2 x i32> @umin_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: umin_2s:
-;CHECK: umin.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.umin.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @umin_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: umin_4s:
-;CHECK: umin.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.umin.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
-	ret <4 x i32> %tmp3
-}
-
-declare <8 x i8> @llvm.arm64.neon.umin.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm64.neon.umin.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.umin.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.umin.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.umin.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.umin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-define <8 x i8> @smaxp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: smaxp_8b:
-;CHECK: smaxp.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = call <8 x i8> @llvm.arm64.neon.smaxp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
-	ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @smaxp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: smaxp_16b:
-;CHECK: smaxp.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
-	%tmp3 = call <16 x i8> @llvm.arm64.neon.smaxp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
-	ret <16 x i8> %tmp3
-}
-
-define <4 x i16> @smaxp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: smaxp_4h:
-;CHECK: smaxp.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = call <4 x i16> @llvm.arm64.neon.smaxp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
-	ret <4 x i16> %tmp3
-}
-
-define <8 x i16> @smaxp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: smaxp_8h:
-;CHECK: smaxp.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call <8 x i16> @llvm.arm64.neon.smaxp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
-	ret <8 x i16> %tmp3
-}
-
-define <2 x i32> @smaxp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: smaxp_2s:
-;CHECK: smaxp.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.smaxp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @smaxp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: smaxp_4s:
-;CHECK: smaxp.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.smaxp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
-	ret <4 x i32> %tmp3
-}
-
-declare <8 x i8> @llvm.arm64.neon.smaxp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm64.neon.smaxp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.smaxp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.smaxp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.smaxp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.smaxp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-
-define <8 x i8> @umaxp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: umaxp_8b:
-;CHECK: umaxp.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = call <8 x i8> @llvm.arm64.neon.umaxp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
-	ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @umaxp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: umaxp_16b:
-;CHECK: umaxp.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
-	%tmp3 = call <16 x i8> @llvm.arm64.neon.umaxp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
-	ret <16 x i8> %tmp3
-}
-
-define <4 x i16> @umaxp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: umaxp_4h:
-;CHECK: umaxp.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = call <4 x i16> @llvm.arm64.neon.umaxp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
-	ret <4 x i16> %tmp3
-}
-
-define <8 x i16> @umaxp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: umaxp_8h:
-;CHECK: umaxp.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call <8 x i16> @llvm.arm64.neon.umaxp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
-	ret <8 x i16> %tmp3
-}
-
-define <2 x i32> @umaxp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: umaxp_2s:
-;CHECK: umaxp.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.umaxp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @umaxp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: umaxp_4s:
-;CHECK: umaxp.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.umaxp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
-	ret <4 x i32> %tmp3
-}
-
-declare <8 x i8> @llvm.arm64.neon.umaxp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm64.neon.umaxp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.umaxp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.umaxp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.umaxp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.umaxp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-define <8 x i8> @sminp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: sminp_8b:
-;CHECK: sminp.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = call <8 x i8> @llvm.arm64.neon.sminp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
-	ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @sminp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: sminp_16b:
-;CHECK: sminp.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
-	%tmp3 = call <16 x i8> @llvm.arm64.neon.sminp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
-	ret <16 x i8> %tmp3
-}
-
-define <4 x i16> @sminp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: sminp_4h:
-;CHECK: sminp.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = call <4 x i16> @llvm.arm64.neon.sminp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
-	ret <4 x i16> %tmp3
-}
-
-define <8 x i16> @sminp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: sminp_8h:
-;CHECK: sminp.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call <8 x i16> @llvm.arm64.neon.sminp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
-	ret <8 x i16> %tmp3
-}
-
-define <2 x i32> @sminp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: sminp_2s:
-;CHECK: sminp.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.sminp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @sminp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: sminp_4s:
-;CHECK: sminp.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.sminp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
-	ret <4 x i32> %tmp3
-}
-
-declare <8 x i8> @llvm.arm64.neon.sminp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm64.neon.sminp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.sminp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.sminp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.sminp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.sminp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-
-define <8 x i8> @uminp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: uminp_8b:
-;CHECK: uminp.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = call <8 x i8> @llvm.arm64.neon.uminp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
-	ret <8 x i8> %tmp3
-}
-
-define <16 x i8> @uminp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: uminp_16b:
-;CHECK: uminp.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
-	%tmp3 = call <16 x i8> @llvm.arm64.neon.uminp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
-	ret <16 x i8> %tmp3
-}
-
-define <4 x i16> @uminp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: uminp_4h:
-;CHECK: uminp.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = call <4 x i16> @llvm.arm64.neon.uminp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
-	ret <4 x i16> %tmp3
-}
-
-define <8 x i16> @uminp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: uminp_8h:
-;CHECK: uminp.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call <8 x i16> @llvm.arm64.neon.uminp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
-	ret <8 x i16> %tmp3
-}
-
-define <2 x i32> @uminp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: uminp_2s:
-;CHECK: uminp.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.uminp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @uminp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: uminp_4s:
-;CHECK: uminp.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.uminp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
-	ret <4 x i32> %tmp3
-}
-
-declare <8 x i8> @llvm.arm64.neon.uminp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm64.neon.uminp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.uminp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.uminp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.uminp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.uminp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-
-define <2 x float> @fmax_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK-LABEL: fmax_2s:
-;CHECK: fmax.2s
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
-	%tmp3 = call <2 x float> @llvm.arm64.neon.fmax.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
-	ret <2 x float> %tmp3
-}
-
-define <4 x float> @fmax_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
-;CHECK-LABEL: fmax_4s:
-;CHECK: fmax.4s
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
-	%tmp3 = call <4 x float> @llvm.arm64.neon.fmax.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
-	ret <4 x float> %tmp3
-}
-
-define <2 x double> @fmax_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
-;CHECK-LABEL: fmax_2d:
-;CHECK: fmax.2d
-	%tmp1 = load <2 x double>* %A
-	%tmp2 = load <2 x double>* %B
-	%tmp3 = call <2 x double> @llvm.arm64.neon.fmax.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
-	ret <2 x double> %tmp3
-}
-
-declare <2 x float> @llvm.arm64.neon.fmax.v2f32(<2 x float>, <2 x float>) nounwind readnone
-declare <4 x float> @llvm.arm64.neon.fmax.v4f32(<4 x float>, <4 x float>) nounwind readnone
-declare <2 x double> @llvm.arm64.neon.fmax.v2f64(<2 x double>, <2 x double>) nounwind readnone
-
-define <2 x float> @fmaxp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK-LABEL: fmaxp_2s:
-;CHECK: fmaxp.2s
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
-	%tmp3 = call <2 x float> @llvm.arm64.neon.fmaxp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
-	ret <2 x float> %tmp3
-}
-
-define <4 x float> @fmaxp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
-;CHECK-LABEL: fmaxp_4s:
-;CHECK: fmaxp.4s
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
-	%tmp3 = call <4 x float> @llvm.arm64.neon.fmaxp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
-	ret <4 x float> %tmp3
-}
-
-define <2 x double> @fmaxp_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
-;CHECK-LABEL: fmaxp_2d:
-;CHECK: fmaxp.2d
-	%tmp1 = load <2 x double>* %A
-	%tmp2 = load <2 x double>* %B
-	%tmp3 = call <2 x double> @llvm.arm64.neon.fmaxp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
-	ret <2 x double> %tmp3
-}
-
-declare <2 x float> @llvm.arm64.neon.fmaxp.v2f32(<2 x float>, <2 x float>) nounwind readnone
-declare <4 x float> @llvm.arm64.neon.fmaxp.v4f32(<4 x float>, <4 x float>) nounwind readnone
-declare <2 x double> @llvm.arm64.neon.fmaxp.v2f64(<2 x double>, <2 x double>) nounwind readnone
-
-define <2 x float> @fmin_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK-LABEL: fmin_2s:
-;CHECK: fmin.2s
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
-	%tmp3 = call <2 x float> @llvm.arm64.neon.fmin.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
-	ret <2 x float> %tmp3
-}
-
-define <4 x float> @fmin_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
-;CHECK-LABEL: fmin_4s:
-;CHECK: fmin.4s
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
-	%tmp3 = call <4 x float> @llvm.arm64.neon.fmin.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
-	ret <4 x float> %tmp3
-}
-
-define <2 x double> @fmin_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
-;CHECK-LABEL: fmin_2d:
-;CHECK: fmin.2d
-	%tmp1 = load <2 x double>* %A
-	%tmp2 = load <2 x double>* %B
-	%tmp3 = call <2 x double> @llvm.arm64.neon.fmin.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
-	ret <2 x double> %tmp3
-}
-
-declare <2 x float> @llvm.arm64.neon.fmin.v2f32(<2 x float>, <2 x float>) nounwind readnone
-declare <4 x float> @llvm.arm64.neon.fmin.v4f32(<4 x float>, <4 x float>) nounwind readnone
-declare <2 x double> @llvm.arm64.neon.fmin.v2f64(<2 x double>, <2 x double>) nounwind readnone
-
-define <2 x float> @fminp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK-LABEL: fminp_2s:
-;CHECK: fminp.2s
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
-	%tmp3 = call <2 x float> @llvm.arm64.neon.fminp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
-	ret <2 x float> %tmp3
-}
-
-define <4 x float> @fminp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
-;CHECK-LABEL: fminp_4s:
-;CHECK: fminp.4s
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
-	%tmp3 = call <4 x float> @llvm.arm64.neon.fminp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
-	ret <4 x float> %tmp3
-}
-
-define <2 x double> @fminp_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
-;CHECK-LABEL: fminp_2d:
-;CHECK: fminp.2d
-	%tmp1 = load <2 x double>* %A
-	%tmp2 = load <2 x double>* %B
-	%tmp3 = call <2 x double> @llvm.arm64.neon.fminp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
-	ret <2 x double> %tmp3
-}
-
-declare <2 x float> @llvm.arm64.neon.fminp.v2f32(<2 x float>, <2 x float>) nounwind readnone
-declare <4 x float> @llvm.arm64.neon.fminp.v4f32(<4 x float>, <4 x float>) nounwind readnone
-declare <2 x double> @llvm.arm64.neon.fminp.v2f64(<2 x double>, <2 x double>) nounwind readnone
-
-define <2 x float> @fminnmp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK-LABEL: fminnmp_2s:
-;CHECK: fminnmp.2s
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
-	%tmp3 = call <2 x float> @llvm.arm64.neon.fminnmp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
-	ret <2 x float> %tmp3
-}
-
-define <4 x float> @fminnmp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
-;CHECK-LABEL: fminnmp_4s:
-;CHECK: fminnmp.4s
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
-	%tmp3 = call <4 x float> @llvm.arm64.neon.fminnmp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
-	ret <4 x float> %tmp3
-}
-
-define <2 x double> @fminnmp_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
-;CHECK-LABEL: fminnmp_2d:
-;CHECK: fminnmp.2d
-	%tmp1 = load <2 x double>* %A
-	%tmp2 = load <2 x double>* %B
-	%tmp3 = call <2 x double> @llvm.arm64.neon.fminnmp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
-	ret <2 x double> %tmp3
-}
-
-declare <2 x float> @llvm.arm64.neon.fminnmp.v2f32(<2 x float>, <2 x float>) nounwind readnone
-declare <4 x float> @llvm.arm64.neon.fminnmp.v4f32(<4 x float>, <4 x float>) nounwind readnone
-declare <2 x double> @llvm.arm64.neon.fminnmp.v2f64(<2 x double>, <2 x double>) nounwind readnone
-
-define <2 x float> @fmaxnmp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK-LABEL: fmaxnmp_2s:
-;CHECK: fmaxnmp.2s
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
-	%tmp3 = call <2 x float> @llvm.arm64.neon.fmaxnmp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
-	ret <2 x float> %tmp3
-}
-
-define <4 x float> @fmaxnmp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
-;CHECK-LABEL: fmaxnmp_4s:
-;CHECK: fmaxnmp.4s
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
-	%tmp3 = call <4 x float> @llvm.arm64.neon.fmaxnmp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
-	ret <4 x float> %tmp3
-}
-
-define <2 x double> @fmaxnmp_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
-;CHECK-LABEL: fmaxnmp_2d:
-;CHECK: fmaxnmp.2d
-	%tmp1 = load <2 x double>* %A
-	%tmp2 = load <2 x double>* %B
-	%tmp3 = call <2 x double> @llvm.arm64.neon.fmaxnmp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
-	ret <2 x double> %tmp3
-}
-
-declare <2 x float> @llvm.arm64.neon.fmaxnmp.v2f32(<2 x float>, <2 x float>) nounwind readnone
-declare <4 x float> @llvm.arm64.neon.fmaxnmp.v4f32(<4 x float>, <4 x float>) nounwind readnone
-declare <2 x double> @llvm.arm64.neon.fmaxnmp.v2f64(<2 x double>, <2 x double>) nounwind readnone
diff --git a/test/CodeGen/ARM64/vminmaxnm.ll b/test/CodeGen/ARM64/vminmaxnm.ll
deleted file mode 100644
index 6286407..0000000
--- a/test/CodeGen/ARM64/vminmaxnm.ll
+++ /dev/null
@@ -1,68 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-define <2 x float> @f1(<2 x float> %a, <2 x float> %b) nounwind readnone ssp {
-; CHECK: fmaxnm.2s	v0, v0, v1
-; CHECK: ret
-  %vmaxnm2.i = tail call <2 x float> @llvm.arm64.neon.fmaxnm.v2f32(<2 x float> %a, <2 x float> %b) nounwind
-  ret <2 x float> %vmaxnm2.i
-}
-
-define <4 x float> @f2(<4 x float> %a, <4 x float> %b) nounwind readnone ssp {
-; CHECK: fmaxnm.4s	v0, v0, v1
-; CHECK: ret
-  %vmaxnm2.i = tail call <4 x float> @llvm.arm64.neon.fmaxnm.v4f32(<4 x float> %a, <4 x float> %b) nounwind
-  ret <4 x float> %vmaxnm2.i
-}
-
-define <2 x double> @f3(<2 x double> %a, <2 x double> %b) nounwind readnone ssp {
-; CHECK: fmaxnm.2d	v0, v0, v1
-; CHECK: ret
-  %vmaxnm2.i = tail call <2 x double> @llvm.arm64.neon.fmaxnm.v2f64(<2 x double> %a, <2 x double> %b) nounwind
-  ret <2 x double> %vmaxnm2.i
-}
-
-define <2 x float> @f4(<2 x float> %a, <2 x float> %b) nounwind readnone ssp {
-; CHECK: fminnm.2s	v0, v0, v1
-; CHECK: ret
-  %vminnm2.i = tail call <2 x float> @llvm.arm64.neon.fminnm.v2f32(<2 x float> %a, <2 x float> %b) nounwind
-  ret <2 x float> %vminnm2.i
-}
-
-define <4 x float> @f5(<4 x float> %a, <4 x float> %b) nounwind readnone ssp {
-; CHECK: fminnm.4s	v0, v0, v1
-; CHECK: ret
-  %vminnm2.i = tail call <4 x float> @llvm.arm64.neon.fminnm.v4f32(<4 x float> %a, <4 x float> %b) nounwind
-  ret <4 x float> %vminnm2.i
-}
-
-define <2 x double> @f6(<2 x double> %a, <2 x double> %b) nounwind readnone ssp {
-; CHECK: fminnm.2d	v0, v0, v1
-; CHECK: ret
-  %vminnm2.i = tail call <2 x double> @llvm.arm64.neon.fminnm.v2f64(<2 x double> %a, <2 x double> %b) nounwind
-  ret <2 x double> %vminnm2.i
-}
-
-declare <2 x double> @llvm.arm64.neon.fminnm.v2f64(<2 x double>, <2 x double>) nounwind readnone
-declare <4 x float> @llvm.arm64.neon.fminnm.v4f32(<4 x float>, <4 x float>) nounwind readnone
-declare <2 x float> @llvm.arm64.neon.fminnm.v2f32(<2 x float>, <2 x float>) nounwind readnone
-declare <2 x double> @llvm.arm64.neon.fmaxnm.v2f64(<2 x double>, <2 x double>) nounwind readnone
-declare <4 x float> @llvm.arm64.neon.fmaxnm.v4f32(<4 x float>, <4 x float>) nounwind readnone
-declare <2 x float> @llvm.arm64.neon.fmaxnm.v2f32(<2 x float>, <2 x float>) nounwind readnone
-
-
-define double @test_fmaxnmv(<2 x double> %in) {
-; CHECK-LABEL: test_fmaxnmv:
-; CHECK: fmaxnmp.2d d0, v0
-  %max = call double @llvm.arm64.neon.fmaxnmv.f64.v2f64(<2 x double> %in)
-  ret double %max
-}
-
-define double @test_fminnmv(<2 x double> %in) {
-; CHECK-LABEL: test_fminnmv:
-; CHECK: fminnmp.2d d0, v0
-  %min = call double @llvm.arm64.neon.fminnmv.f64.v2f64(<2 x double> %in)
-  ret double %min
-}
-
-declare double @llvm.arm64.neon.fmaxnmv.f64.v2f64(<2 x double>)
-declare double @llvm.arm64.neon.fminnmv.f64.v2f64(<2 x double>)
diff --git a/test/CodeGen/ARM64/vmovn.ll b/test/CodeGen/ARM64/vmovn.ll
deleted file mode 100644
index 675633b..0000000
--- a/test/CodeGen/ARM64/vmovn.ll
+++ /dev/null
@@ -1,242 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-define <8 x i8> @xtn8b(<8 x i16> %A) nounwind {
-;CHECK-LABEL: xtn8b:
-;CHECK-NOT: ld1
-;CHECK: xtn.8b v0, v0
-;CHECK-NEXT: ret
-  %tmp3 = trunc <8 x i16> %A to <8 x i8>
-        ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @xtn4h(<4 x i32> %A) nounwind {
-;CHECK-LABEL: xtn4h:
-;CHECK-NOT: ld1
-;CHECK: xtn.4h v0, v0
-;CHECK-NEXT: ret
-  %tmp3 = trunc <4 x i32> %A to <4 x i16>
-        ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @xtn2s(<2 x i64> %A) nounwind {
-;CHECK-LABEL: xtn2s:
-;CHECK-NOT: ld1
-;CHECK: xtn.2s v0, v0
-;CHECK-NEXT: ret
-  %tmp3 = trunc <2 x i64> %A to <2 x i32>
-        ret <2 x i32> %tmp3
-}
-
-define <16 x i8> @xtn2_16b(<8 x i8> %ret, <8 x i16> %A) nounwind {
-;CHECK-LABEL: xtn2_16b:
-;CHECK-NOT: ld1
-;CHECK: xtn2.16b v0, v1
-;CHECK-NEXT: ret
-        %tmp3 = trunc <8 x i16> %A to <8 x i8>
-        %res = shufflevector <8 x i8> %ret, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-        ret <16 x i8> %res
-}
-
-define <8 x i16> @xtn2_8h(<4 x i16> %ret, <4 x i32> %A) nounwind {
-;CHECK-LABEL: xtn2_8h:
-;CHECK-NOT: ld1
-;CHECK: xtn2.8h v0, v1
-;CHECK-NEXT: ret
-        %tmp3 = trunc <4 x i32> %A to <4 x i16>
-        %res = shufflevector <4 x i16> %ret, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-        ret <8 x i16> %res
-}
-
-define <4 x i32> @xtn2_4s(<2 x i32> %ret, <2 x i64> %A) nounwind {
-;CHECK-LABEL: xtn2_4s:
-;CHECK-NOT: ld1
-;CHECK: xtn2.4s v0, v1
-;CHECK-NEXT: ret
-        %tmp3 = trunc <2 x i64> %A to <2 x i32>
-        %res = shufflevector <2 x i32> %ret, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-        ret <4 x i32> %res
-}
-
-define <8 x i8> @sqxtn8b(<8 x i16> %A) nounwind {
-;CHECK-LABEL: sqxtn8b:
-;CHECK-NOT: ld1
-;CHECK: sqxtn.8b v0, v0
-;CHECK-NEXT: ret
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sqxtn.v8i8(<8 x i16> %A)
-        ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @sqxtn4h(<4 x i32> %A) nounwind {
-;CHECK-LABEL: sqxtn4h:
-;CHECK-NOT: ld1
-;CHECK: sqxtn.4h v0, v0
-;CHECK-NEXT: ret
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sqxtn.v4i16(<4 x i32> %A)
-        ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @sqxtn2s(<2 x i64> %A) nounwind {
-;CHECK-LABEL: sqxtn2s:
-;CHECK-NOT: ld1
-;CHECK: sqxtn.2s v0, v0
-;CHECK-NEXT: ret
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sqxtn.v2i32(<2 x i64> %A)
-        ret <2 x i32> %tmp3
-}
-
-define <16 x i8> @sqxtn2_16b(<8 x i8> %ret, <8 x i16> %A) nounwind {
-;CHECK-LABEL: sqxtn2_16b:
-;CHECK-NOT: ld1
-;CHECK: sqxtn2.16b v0, v1
-;CHECK-NEXT: ret
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sqxtn.v8i8(<8 x i16> %A)
-        %res = shufflevector <8 x i8> %ret, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-        ret <16 x i8> %res
-}
-
-define <8 x i16> @sqxtn2_8h(<4 x i16> %ret, <4 x i32> %A) nounwind {
-;CHECK-LABEL: sqxtn2_8h:
-;CHECK-NOT: ld1
-;CHECK: sqxtn2.8h v0, v1
-;CHECK-NEXT: ret
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sqxtn.v4i16(<4 x i32> %A)
-        %res = shufflevector <4 x i16> %ret, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-        ret <8 x i16> %res
-}
-
-define <4 x i32> @sqxtn2_4s(<2 x i32> %ret, <2 x i64> %A) nounwind {
-;CHECK-LABEL: sqxtn2_4s:
-;CHECK-NOT: ld1
-;CHECK: sqxtn2.4s v0, v1
-;CHECK-NEXT: ret
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sqxtn.v2i32(<2 x i64> %A)
-        %res = shufflevector <2 x i32> %ret, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-        ret <4 x i32> %res
-}
-
-declare <8 x i8>  @llvm.arm64.neon.sqxtn.v8i8(<8 x i16>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.sqxtn.v4i16(<4 x i32>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.sqxtn.v2i32(<2 x i64>) nounwind readnone
-
-define <8 x i8> @uqxtn8b(<8 x i16> %A) nounwind {
-;CHECK-LABEL: uqxtn8b:
-;CHECK-NOT: ld1
-;CHECK: uqxtn.8b v0, v0
-;CHECK-NEXT: ret
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.uqxtn.v8i8(<8 x i16> %A)
-        ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @uqxtn4h(<4 x i32> %A) nounwind {
-;CHECK-LABEL: uqxtn4h:
-;CHECK-NOT: ld1
-;CHECK: uqxtn.4h v0, v0
-;CHECK-NEXT: ret
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.uqxtn.v4i16(<4 x i32> %A)
-        ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @uqxtn2s(<2 x i64> %A) nounwind {
-;CHECK-LABEL: uqxtn2s:
-;CHECK-NOT: ld1
-;CHECK: uqxtn.2s v0, v0
-;CHECK-NEXT: ret
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.uqxtn.v2i32(<2 x i64> %A)
-        ret <2 x i32> %tmp3
-}
-
-define <16 x i8> @uqxtn2_16b(<8 x i8> %ret, <8 x i16> %A) nounwind {
-;CHECK-LABEL: uqxtn2_16b:
-;CHECK-NOT: ld1
-;CHECK: uqxtn2.16b v0, v1
-;CHECK-NEXT: ret
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.uqxtn.v8i8(<8 x i16> %A)
-        %res = shufflevector <8 x i8> %ret, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-        ret <16 x i8> %res
-}
-
-define <8 x i16> @uqxtn2_8h(<4 x i16> %ret, <4 x i32> %A) nounwind {
-;CHECK-LABEL: uqxtn2_8h:
-;CHECK-NOT: ld1
-;CHECK: uqxtn2.8h v0, v1
-;CHECK-NEXT: ret
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.uqxtn.v4i16(<4 x i32> %A)
-        %res = shufflevector <4 x i16> %ret, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-        ret <8 x i16> %res
-}
-
-define <4 x i32> @uqxtn2_4s(<2 x i32> %ret, <2 x i64> %A) nounwind {
-;CHECK-LABEL: uqxtn2_4s:
-;CHECK-NOT: ld1
-;CHECK: uqxtn2.4s v0, v1
-;CHECK-NEXT: ret
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.uqxtn.v2i32(<2 x i64> %A)
-        %res = shufflevector <2 x i32> %ret, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-        ret <4 x i32> %res
-}
-
-declare <8 x i8>  @llvm.arm64.neon.uqxtn.v8i8(<8 x i16>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.uqxtn.v4i16(<4 x i32>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.uqxtn.v2i32(<2 x i64>) nounwind readnone
-
-define <8 x i8> @sqxtun8b(<8 x i16> %A) nounwind {
-;CHECK-LABEL: sqxtun8b:
-;CHECK-NOT: ld1
-;CHECK: sqxtun.8b v0, v0
-;CHECK-NEXT: ret
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sqxtun.v8i8(<8 x i16> %A)
-        ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @sqxtun4h(<4 x i32> %A) nounwind {
-;CHECK-LABEL: sqxtun4h:
-;CHECK-NOT: ld1
-;CHECK: sqxtun.4h v0, v0
-;CHECK-NEXT: ret
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sqxtun.v4i16(<4 x i32> %A)
-        ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @sqxtun2s(<2 x i64> %A) nounwind {
-;CHECK-LABEL: sqxtun2s:
-;CHECK-NOT: ld1
-;CHECK: sqxtun.2s v0, v0
-;CHECK-NEXT: ret
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sqxtun.v2i32(<2 x i64> %A)
-        ret <2 x i32> %tmp3
-}
-
-define <16 x i8> @sqxtun2_16b(<8 x i8> %ret, <8 x i16> %A) nounwind {
-;CHECK-LABEL: sqxtun2_16b:
-;CHECK-NOT: ld1
-;CHECK: sqxtun2.16b v0, v1
-;CHECK-NEXT: ret
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sqxtun.v8i8(<8 x i16> %A)
-        %res = shufflevector <8 x i8> %ret, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-        ret <16 x i8> %res
-}
-
-define <8 x i16> @sqxtun2_8h(<4 x i16> %ret, <4 x i32> %A) nounwind {
-;CHECK-LABEL: sqxtun2_8h:
-;CHECK-NOT: ld1
-;CHECK: sqxtun2.8h v0, v1
-;CHECK-NEXT: ret
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sqxtun.v4i16(<4 x i32> %A)
-        %res = shufflevector <4 x i16> %ret, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-        ret <8 x i16> %res
-}
-
-define <4 x i32> @sqxtun2_4s(<2 x i32> %ret, <2 x i64> %A) nounwind {
-;CHECK-LABEL: sqxtun2_4s:
-;CHECK-NOT: ld1
-;CHECK: sqxtun2.4s v0, v1
-;CHECK-NEXT: ret
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sqxtun.v2i32(<2 x i64> %A)
-        %res = shufflevector <2 x i32> %ret, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-        ret <4 x i32> %res
-}
-
-declare <8 x i8>  @llvm.arm64.neon.sqxtun.v8i8(<8 x i16>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.sqxtun.v4i16(<4 x i32>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.sqxtun.v2i32(<2 x i64>) nounwind readnone
-
diff --git a/test/CodeGen/ARM64/vmul.ll b/test/CodeGen/ARM64/vmul.ll
deleted file mode 100644
index 3ef0a76..0000000
--- a/test/CodeGen/ARM64/vmul.ll
+++ /dev/null
@@ -1,2003 +0,0 @@
-; RUN: llc -asm-verbose=false < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-
-define <8 x i16> @smull8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: smull8h:
-;CHECK: smull.8h
-  %tmp1 = load <8 x i8>* %A
-  %tmp2 = load <8 x i8>* %B
-  %tmp3 = call <8 x i16> @llvm.arm64.neon.smull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
-  ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @smull4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: smull4s:
-;CHECK: smull.4s
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
-  %tmp3 = call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
-  ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @smull2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: smull2d:
-;CHECK: smull.2d
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
-  %tmp3 = call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
-  ret <2 x i64> %tmp3
-}
-
-declare <8 x i16>  @llvm.arm64.neon.smull.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
-
-define <8 x i16> @umull8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: umull8h:
-;CHECK: umull.8h
-  %tmp1 = load <8 x i8>* %A
-  %tmp2 = load <8 x i8>* %B
-  %tmp3 = call <8 x i16> @llvm.arm64.neon.umull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
-  ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @umull4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: umull4s:
-;CHECK: umull.4s
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
-  %tmp3 = call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
-  ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @umull2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: umull2d:
-;CHECK: umull.2d
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
-  %tmp3 = call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
-  ret <2 x i64> %tmp3
-}
-
-declare <8 x i16>  @llvm.arm64.neon.umull.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
-
-define <4 x i32> @sqdmull4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: sqdmull4s:
-;CHECK: sqdmull.4s
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
-  %tmp3 = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
-  ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @sqdmull2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: sqdmull2d:
-;CHECK: sqdmull.2d
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
-  %tmp3 = call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
-  ret <2 x i64> %tmp3
-}
-
-define <4 x i32> @sqdmull2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: sqdmull2_4s:
-;CHECK: sqdmull2.4s
-  %load1 = load <8 x i16>* %A
-  %load2 = load <8 x i16>* %B
-  %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %tmp3 = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
-  ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @sqdmull2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: sqdmull2_2d:
-;CHECK: sqdmull2.2d
-  %load1 = load <4 x i32>* %A
-  %load2 = load <4 x i32>* %B
-  %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %tmp3 = call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
-  ret <2 x i64> %tmp3
-}
-
-
-declare <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
-
-define <8 x i16> @pmull8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: pmull8h:
-;CHECK: pmull.8h
-  %tmp1 = load <8 x i8>* %A
-  %tmp2 = load <8 x i8>* %B
-  %tmp3 = call <8 x i16> @llvm.arm64.neon.pmull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
-  ret <8 x i16> %tmp3
-}
-
-declare <8 x i16> @llvm.arm64.neon.pmull.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
-
-define <4 x i16> @sqdmulh_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: sqdmulh_4h:
-;CHECK: sqdmulh.4h
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
-  %tmp3 = call <4 x i16> @llvm.arm64.neon.sqdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
-  ret <4 x i16> %tmp3
-}
-
-define <8 x i16> @sqdmulh_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: sqdmulh_8h:
-;CHECK: sqdmulh.8h
-  %tmp1 = load <8 x i16>* %A
-  %tmp2 = load <8 x i16>* %B
-  %tmp3 = call <8 x i16> @llvm.arm64.neon.sqdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
-  ret <8 x i16> %tmp3
-}
-
-define <2 x i32> @sqdmulh_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: sqdmulh_2s:
-;CHECK: sqdmulh.2s
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
-  %tmp3 = call <2 x i32> @llvm.arm64.neon.sqdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
-  ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @sqdmulh_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: sqdmulh_4s:
-;CHECK: sqdmulh.4s
-  %tmp1 = load <4 x i32>* %A
-  %tmp2 = load <4 x i32>* %B
-  %tmp3 = call <4 x i32> @llvm.arm64.neon.sqdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
-  ret <4 x i32> %tmp3
-}
-
-define i32 @sqdmulh_1s(i32* %A, i32* %B) nounwind {
-;CHECK-LABEL: sqdmulh_1s:
-;CHECK: sqdmulh s0, {{s[0-9]+}}, {{s[0-9]+}}
-  %tmp1 = load i32* %A
-  %tmp2 = load i32* %B
-  %tmp3 = call i32 @llvm.arm64.neon.sqdmulh.i32(i32 %tmp1, i32 %tmp2)
-  ret i32 %tmp3
-}
-
-declare <4 x i16> @llvm.arm64.neon.sqdmulh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.sqdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.sqdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.sqdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare i32 @llvm.arm64.neon.sqdmulh.i32(i32, i32) nounwind readnone
-
-define <4 x i16> @sqrdmulh_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: sqrdmulh_4h:
-;CHECK: sqrdmulh.4h
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
-  %tmp3 = call <4 x i16> @llvm.arm64.neon.sqrdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
-  ret <4 x i16> %tmp3
-}
-
-define <8 x i16> @sqrdmulh_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: sqrdmulh_8h:
-;CHECK: sqrdmulh.8h
-  %tmp1 = load <8 x i16>* %A
-  %tmp2 = load <8 x i16>* %B
-  %tmp3 = call <8 x i16> @llvm.arm64.neon.sqrdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
-  ret <8 x i16> %tmp3
-}
-
-define <2 x i32> @sqrdmulh_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: sqrdmulh_2s:
-;CHECK: sqrdmulh.2s
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
-  %tmp3 = call <2 x i32> @llvm.arm64.neon.sqrdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
-  ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @sqrdmulh_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: sqrdmulh_4s:
-;CHECK: sqrdmulh.4s
-  %tmp1 = load <4 x i32>* %A
-  %tmp2 = load <4 x i32>* %B
-  %tmp3 = call <4 x i32> @llvm.arm64.neon.sqrdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
-  ret <4 x i32> %tmp3
-}
-
-define i32 @sqrdmulh_1s(i32* %A, i32* %B) nounwind {
-;CHECK-LABEL: sqrdmulh_1s:
-;CHECK: sqrdmulh s0, {{s[0-9]+}}, {{s[0-9]+}}
-  %tmp1 = load i32* %A
-  %tmp2 = load i32* %B
-  %tmp3 = call i32 @llvm.arm64.neon.sqrdmulh.i32(i32 %tmp1, i32 %tmp2)
-  ret i32 %tmp3
-}
-
-declare <4 x i16> @llvm.arm64.neon.sqrdmulh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.sqrdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.sqrdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.sqrdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare i32 @llvm.arm64.neon.sqrdmulh.i32(i32, i32) nounwind readnone
-
-define <2 x float> @fmulx_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK-LABEL: fmulx_2s:
-;CHECK: fmulx.2s
-  %tmp1 = load <2 x float>* %A
-  %tmp2 = load <2 x float>* %B
-  %tmp3 = call <2 x float> @llvm.arm64.neon.fmulx.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
-  ret <2 x float> %tmp3
-}
-
-define <4 x float> @fmulx_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
-;CHECK-LABEL: fmulx_4s:
-;CHECK: fmulx.4s
-  %tmp1 = load <4 x float>* %A
-  %tmp2 = load <4 x float>* %B
-  %tmp3 = call <4 x float> @llvm.arm64.neon.fmulx.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
-  ret <4 x float> %tmp3
-}
-
-define <2 x double> @fmulx_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
-;CHECK-LABEL: fmulx_2d:
-;CHECK: fmulx.2d
-  %tmp1 = load <2 x double>* %A
-  %tmp2 = load <2 x double>* %B
-  %tmp3 = call <2 x double> @llvm.arm64.neon.fmulx.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
-  ret <2 x double> %tmp3
-}
-
-declare <2 x float> @llvm.arm64.neon.fmulx.v2f32(<2 x float>, <2 x float>) nounwind readnone
-declare <4 x float> @llvm.arm64.neon.fmulx.v4f32(<4 x float>, <4 x float>) nounwind readnone
-declare <2 x double> @llvm.arm64.neon.fmulx.v2f64(<2 x double>, <2 x double>) nounwind readnone
-
-define <4 x i32> @smlal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
-;CHECK-LABEL: smlal4s:
-;CHECK: smlal.4s
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
-  %tmp3 = load <4 x i32>* %C
-  %tmp4 = call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
-  %tmp5 = add <4 x i32> %tmp3, %tmp4
-  ret <4 x i32> %tmp5
-}
-
-define <2 x i64> @smlal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
-;CHECK-LABEL: smlal2d:
-;CHECK: smlal.2d
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
-  %tmp3 = load <2 x i64>* %C
-  %tmp4 = call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
-  %tmp5 = add <2 x i64> %tmp3, %tmp4
-  ret <2 x i64> %tmp5
-}
-
-define <4 x i32> @smlsl4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
-;CHECK-LABEL: smlsl4s:
-;CHECK: smlsl.4s
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
-  %tmp3 = load <4 x i32>* %C
-  %tmp4 = call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
-  %tmp5 = sub <4 x i32> %tmp3, %tmp4
-  ret <4 x i32> %tmp5
-}
-
-define <2 x i64> @smlsl2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
-;CHECK-LABEL: smlsl2d:
-;CHECK: smlsl.2d
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
-  %tmp3 = load <2 x i64>* %C
-  %tmp4 = call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
-  %tmp5 = sub <2 x i64> %tmp3, %tmp4
-  ret <2 x i64> %tmp5
-}
-
-declare <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>)
-declare <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>)
-declare <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>)
-declare <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>)
-
-define <4 x i32> @sqdmlal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
-;CHECK-LABEL: sqdmlal4s:
-;CHECK: sqdmlal.4s
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
-  %tmp3 = load <4 x i32>* %C
-  %tmp4 = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
-  %tmp5 = call <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp4)
-  ret <4 x i32> %tmp5
-}
-
-define <2 x i64> @sqdmlal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
-;CHECK-LABEL: sqdmlal2d:
-;CHECK: sqdmlal.2d
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
-  %tmp3 = load <2 x i64>* %C
-  %tmp4 = call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
-  %tmp5 = call <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp4)
-  ret <2 x i64> %tmp5
-}
-
-define <4 x i32> @sqdmlal2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwind {
-;CHECK-LABEL: sqdmlal2_4s:
-;CHECK: sqdmlal2.4s
-  %load1 = load <8 x i16>* %A
-  %load2 = load <8 x i16>* %B
-  %tmp3 = load <4 x i32>* %C
-  %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %tmp4 = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
-  %tmp5 = call <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp4)
-  ret <4 x i32> %tmp5
-}
-
-define <2 x i64> @sqdmlal2_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwind {
-;CHECK-LABEL: sqdmlal2_2d:
-;CHECK: sqdmlal2.2d
-  %load1 = load <4 x i32>* %A
-  %load2 = load <4 x i32>* %B
-  %tmp3 = load <2 x i64>* %C
-  %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %tmp4 = call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
-  %tmp5 = call <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp4)
-  ret <2 x i64> %tmp5
-}
-
-define <4 x i32> @sqdmlsl4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
-;CHECK-LABEL: sqdmlsl4s:
-;CHECK: sqdmlsl.4s
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
-  %tmp3 = load <4 x i32>* %C
-  %tmp4 = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
-  %tmp5 = call <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp4)
-  ret <4 x i32> %tmp5
-}
-
-define <2 x i64> @sqdmlsl2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
-;CHECK-LABEL: sqdmlsl2d:
-;CHECK: sqdmlsl.2d
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
-  %tmp3 = load <2 x i64>* %C
-  %tmp4 = call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
-  %tmp5 = call <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp4)
-  ret <2 x i64> %tmp5
-}
-
-define <4 x i32> @sqdmlsl2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwind {
-;CHECK-LABEL: sqdmlsl2_4s:
-;CHECK: sqdmlsl2.4s
-  %load1 = load <8 x i16>* %A
-  %load2 = load <8 x i16>* %B
-  %tmp3 = load <4 x i32>* %C
-  %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %tmp4 = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
-  %tmp5 = call <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp4)
-  ret <4 x i32> %tmp5
-}
-
-define <2 x i64> @sqdmlsl2_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwind {
-;CHECK-LABEL: sqdmlsl2_2d:
-;CHECK: sqdmlsl2.2d
-  %load1 = load <4 x i32>* %A
-  %load2 = load <4 x i32>* %B
-  %tmp3 = load <2 x i64>* %C
-  %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %tmp4 = call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
-  %tmp5 = call <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp4)
-  ret <2 x i64> %tmp5
-}
-
-define <4 x i32> @umlal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
-;CHECK-LABEL: umlal4s:
-;CHECK: umlal.4s
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
-  %tmp3 = load <4 x i32>* %C
-  %tmp4 = call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
-  %tmp5 = add <4 x i32> %tmp3, %tmp4
-  ret <4 x i32> %tmp5
-}
-
-define <2 x i64> @umlal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
-;CHECK-LABEL: umlal2d:
-;CHECK: umlal.2d
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
-  %tmp3 = load <2 x i64>* %C
-  %tmp4 = call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
-  %tmp5 = add <2 x i64> %tmp3, %tmp4
-  ret <2 x i64> %tmp5
-}
-
-define <4 x i32> @umlsl4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
-;CHECK-LABEL: umlsl4s:
-;CHECK: umlsl.4s
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
-  %tmp3 = load <4 x i32>* %C
-  %tmp4 = call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
-  %tmp5 = sub <4 x i32> %tmp3, %tmp4
-  ret <4 x i32> %tmp5
-}
-
-define <2 x i64> @umlsl2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
-;CHECK-LABEL: umlsl2d:
-;CHECK: umlsl.2d
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
-  %tmp3 = load <2 x i64>* %C
-  %tmp4 = call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
-  %tmp5 = sub <2 x i64> %tmp3, %tmp4
-  ret <2 x i64> %tmp5
-}
-
-define <2 x float> @fmla_2s(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) nounwind {
-;CHECK-LABEL: fmla_2s:
-;CHECK: fmla.2s
-  %tmp1 = load <2 x float>* %A
-  %tmp2 = load <2 x float>* %B
-  %tmp3 = load <2 x float>* %C
-  %tmp4 = call <2 x float> @llvm.fma.v2f32(<2 x float> %tmp1, <2 x float> %tmp2, <2 x float> %tmp3)
-  ret <2 x float> %tmp4
-}
-
-define <4 x float> @fmla_4s(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind {
-;CHECK-LABEL: fmla_4s:
-;CHECK: fmla.4s
-  %tmp1 = load <4 x float>* %A
-  %tmp2 = load <4 x float>* %B
-  %tmp3 = load <4 x float>* %C
-  %tmp4 = call <4 x float> @llvm.fma.v4f32(<4 x float> %tmp1, <4 x float> %tmp2, <4 x float> %tmp3)
-  ret <4 x float> %tmp4
-}
-
-define <2 x double> @fmla_2d(<2 x double>* %A, <2 x double>* %B, <2 x double>* %C) nounwind {
-;CHECK-LABEL: fmla_2d:
-;CHECK: fmla.2d
-  %tmp1 = load <2 x double>* %A
-  %tmp2 = load <2 x double>* %B
-  %tmp3 = load <2 x double>* %C
-  %tmp4 = call <2 x double> @llvm.fma.v2f64(<2 x double> %tmp1, <2 x double> %tmp2, <2 x double> %tmp3)
-  ret <2 x double> %tmp4
-}
-
-declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone
-declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
-declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
-
-define <2 x float> @fmls_2s(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) nounwind {
-;CHECK-LABEL: fmls_2s:
-;CHECK: fmls.2s
-  %tmp1 = load <2 x float>* %A
-  %tmp2 = load <2 x float>* %B
-  %tmp3 = load <2 x float>* %C
-  %tmp4 = fsub <2 x float> <float -0.0, float -0.0>, %tmp2
-  %tmp5 = call <2 x float> @llvm.fma.v2f32(<2 x float> %tmp1, <2 x float> %tmp4, <2 x float> %tmp3)
-  ret <2 x float> %tmp5
-}
-
-define <4 x float> @fmls_4s(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind {
-;CHECK-LABEL: fmls_4s:
-;CHECK: fmls.4s
-  %tmp1 = load <4 x float>* %A
-  %tmp2 = load <4 x float>* %B
-  %tmp3 = load <4 x float>* %C
-  %tmp4 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %tmp2
-  %tmp5 = call <4 x float> @llvm.fma.v4f32(<4 x float> %tmp1, <4 x float> %tmp4, <4 x float> %tmp3)
-  ret <4 x float> %tmp5
-}
-
-define <2 x double> @fmls_2d(<2 x double>* %A, <2 x double>* %B, <2 x double>* %C) nounwind {
-;CHECK-LABEL: fmls_2d:
-;CHECK: fmls.2d
-  %tmp1 = load <2 x double>* %A
-  %tmp2 = load <2 x double>* %B
-  %tmp3 = load <2 x double>* %C
-  %tmp4 = fsub <2 x double> <double -0.0, double -0.0>, %tmp2
-  %tmp5 = call <2 x double> @llvm.fma.v2f64(<2 x double> %tmp1, <2 x double> %tmp4, <2 x double> %tmp3)
-  ret <2 x double> %tmp5
-}
-
-define <2 x float> @fmls_commuted_neg_2s(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) nounwind {
-;CHECK-LABEL: fmls_commuted_neg_2s:
-;CHECK: fmls.2s
-  %tmp1 = load <2 x float>* %A
-  %tmp2 = load <2 x float>* %B
-  %tmp3 = load <2 x float>* %C
-  %tmp4 = fsub <2 x float> <float -0.0, float -0.0>, %tmp2
-  %tmp5 = call <2 x float> @llvm.fma.v2f32(<2 x float> %tmp4, <2 x float> %tmp1, <2 x float> %tmp3)
-  ret <2 x float> %tmp5
-}
-
-define <4 x float> @fmls_commuted_neg_4s(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind {
-;CHECK-LABEL: fmls_commuted_neg_4s:
-;CHECK: fmls.4s
-  %tmp1 = load <4 x float>* %A
-  %tmp2 = load <4 x float>* %B
-  %tmp3 = load <4 x float>* %C
-  %tmp4 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %tmp2
-  %tmp5 = call <4 x float> @llvm.fma.v4f32(<4 x float> %tmp4, <4 x float> %tmp1, <4 x float> %tmp3)
-  ret <4 x float> %tmp5
-}
-
-define <2 x double> @fmls_commuted_neg_2d(<2 x double>* %A, <2 x double>* %B, <2 x double>* %C) nounwind {
-;CHECK-LABEL: fmls_commuted_neg_2d:
-;CHECK: fmls.2d
-  %tmp1 = load <2 x double>* %A
-  %tmp2 = load <2 x double>* %B
-  %tmp3 = load <2 x double>* %C
-  %tmp4 = fsub <2 x double> <double -0.0, double -0.0>, %tmp2
-  %tmp5 = call <2 x double> @llvm.fma.v2f64(<2 x double> %tmp4, <2 x double> %tmp1, <2 x double> %tmp3)
-  ret <2 x double> %tmp5
-}
-
-define <2 x float> @fmls_indexed_2s(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone ssp {
-;CHECK-LABEL: fmls_indexed_2s:
-;CHECK: fmls.2s
-entry:
-  %0 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %c
-  %lane = shufflevector <2 x float> %b, <2 x float> undef, <2 x i32> zeroinitializer
-  %fmls1 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %0, <2 x float> %lane, <2 x float> %a)
-  ret <2 x float> %fmls1
-}
-
-define <4 x float> @fmls_indexed_4s(<4 x float> %a, <4 x float> %b, <4 x float> %c) nounwind readnone ssp {
-;CHECK-LABEL: fmls_indexed_4s:
-;CHECK: fmls.4s
-entry:
-  %0 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
-  %lane = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer
-  %fmls1 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %0, <4 x float> %lane, <4 x float> %a)
-  ret <4 x float> %fmls1
-}
-
-define <2 x double> @fmls_indexed_2d(<2 x double> %a, <2 x double> %b, <2 x double> %c) nounwind readnone ssp {
-;CHECK-LABEL: fmls_indexed_2d:
-;CHECK: fmls.2d
-entry:
-  %0 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %c
-  %lane = shufflevector <2 x double> %b, <2 x double> undef, <2 x i32> zeroinitializer
-  %fmls1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %0, <2 x double> %lane, <2 x double> %a)
-  ret <2 x double> %fmls1
-}
-
-define <2 x float> @fmla_indexed_scalar_2s(<2 x float> %a, <2 x float> %b, float %c) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: fmla_indexed_scalar_2s:
-; CHECK-NEXT: fmla.2s
-; CHECK-NEXT: ret
-  %v1 = insertelement <2 x float> undef, float %c, i32 0
-  %v2 = insertelement <2 x float> %v1, float %c, i32 1
-  %fmla1 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %v1, <2 x float> %b, <2 x float> %a) nounwind
-  ret <2 x float> %fmla1
-}
-
-define <4 x float> @fmla_indexed_scalar_4s(<4 x float> %a, <4 x float> %b, float %c) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: fmla_indexed_scalar_4s:
-; CHECK-NEXT: fmla.4s
-; CHECK-NEXT: ret
-  %v1 = insertelement <4 x float> undef, float %c, i32 0
-  %v2 = insertelement <4 x float> %v1, float %c, i32 1
-  %v3 = insertelement <4 x float> %v2, float %c, i32 2
-  %v4 = insertelement <4 x float> %v3, float %c, i32 3
-  %fmla1 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %v4, <4 x float> %b, <4 x float> %a) nounwind
-  ret <4 x float> %fmla1
-}
-
-define <2 x double> @fmla_indexed_scalar_2d(<2 x double> %a, <2 x double> %b, double %c) nounwind readnone ssp {
-; CHECK-LABEL: fmla_indexed_scalar_2d:
-; CHECK-NEXT: fmla.2d
-; CHECK-NEXT: ret
-entry:
-  %v1 = insertelement <2 x double> undef, double %c, i32 0
-  %v2 = insertelement <2 x double> %v1, double %c, i32 1
-  %fmla1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %v2, <2 x double> %b, <2 x double> %a) nounwind
-  ret <2 x double> %fmla1
-}
-
-define <4 x i16> @mul_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: mul_4h:
-;CHECK-NOT: dup
-;CHECK: mul.4h
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
-  %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp4 = mul <4 x i16> %tmp1, %tmp3
-  ret <4 x i16> %tmp4
-}
-
-define <8 x i16> @mul_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: mul_8h:
-;CHECK-NOT: dup
-;CHECK: mul.8h
-  %tmp1 = load <8 x i16>* %A
-  %tmp2 = load <8 x i16>* %B
-  %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-  %tmp4 = mul <8 x i16> %tmp1, %tmp3
-  ret <8 x i16> %tmp4
-}
-
-define <2 x i32> @mul_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: mul_2s:
-;CHECK-NOT: dup
-;CHECK: mul.2s
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
-  %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
-  %tmp4 = mul <2 x i32> %tmp1, %tmp3
-  ret <2 x i32> %tmp4
-}
-
-define <4 x i32> @mul_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: mul_4s:
-;CHECK-NOT: dup
-;CHECK: mul.4s
-  %tmp1 = load <4 x i32>* %A
-  %tmp2 = load <4 x i32>* %B
-  %tmp3 = shufflevector <4 x i32> %tmp2, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp4 = mul <4 x i32> %tmp1, %tmp3
-  ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @mul_2d(<2 x i64> %A, <2 x i64> %B) nounwind {
-; CHECK-LABEL: mul_2d:
-; CHECK: mul
-; CHECK: mul
-  %tmp1 = mul <2 x i64> %A, %B
-  ret <2 x i64> %tmp1
-}
-
-define <2 x float> @fmul_lane_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK-LABEL: fmul_lane_2s:
-;CHECK-NOT: dup
-;CHECK: fmul.2s
-  %tmp1 = load <2 x float>* %A
-  %tmp2 = load <2 x float>* %B
-  %tmp3 = shufflevector <2 x float> %tmp2, <2 x float> %tmp2, <2 x i32> <i32 1, i32 1>
-  %tmp4 = fmul <2 x float> %tmp1, %tmp3
-  ret <2 x float> %tmp4
-}
-
-define <4 x float> @fmul_lane_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
-;CHECK-LABEL: fmul_lane_4s:
-;CHECK-NOT: dup
-;CHECK: fmul.4s
-  %tmp1 = load <4 x float>* %A
-  %tmp2 = load <4 x float>* %B
-  %tmp3 = shufflevector <4 x float> %tmp2, <4 x float> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp4 = fmul <4 x float> %tmp1, %tmp3
-  ret <4 x float> %tmp4
-}
-
-define <2 x double> @fmul_lane_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
-;CHECK-LABEL: fmul_lane_2d:
-;CHECK-NOT: dup
-;CHECK: fmul.2d
-  %tmp1 = load <2 x double>* %A
-  %tmp2 = load <2 x double>* %B
-  %tmp3 = shufflevector <2 x double> %tmp2, <2 x double> %tmp2, <2 x i32> <i32 1, i32 1>
-  %tmp4 = fmul <2 x double> %tmp1, %tmp3
-  ret <2 x double> %tmp4
-}
-
-define float @fmul_lane_s(float %A, <4 x float> %vec) nounwind {
-;CHECK-LABEL: fmul_lane_s:
-;CHECK-NOT: dup
-;CHECK: fmul.s s0, s0, v1[3]
-  %B = extractelement <4 x float> %vec, i32 3
-  %res = fmul float %A, %B
-  ret float %res
-}
-
-define double @fmul_lane_d(double %A, <2 x double> %vec) nounwind {
-;CHECK-LABEL: fmul_lane_d:
-;CHECK-NOT: dup
-;CHECK: fmul.d d0, d0, v1[1]
-  %B = extractelement <2 x double> %vec, i32 1
-  %res = fmul double %A, %B
-  ret double %res
-}
-
-
-
-define <2 x float> @fmulx_lane_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK-LABEL: fmulx_lane_2s:
-;CHECK-NOT: dup
-;CHECK: fmulx.2s
-  %tmp1 = load <2 x float>* %A
-  %tmp2 = load <2 x float>* %B
-  %tmp3 = shufflevector <2 x float> %tmp2, <2 x float> %tmp2, <2 x i32> <i32 1, i32 1>
-  %tmp4 = call <2 x float> @llvm.arm64.neon.fmulx.v2f32(<2 x float> %tmp1, <2 x float> %tmp3)
-  ret <2 x float> %tmp4
-}
-
-define <4 x float> @fmulx_lane_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
-;CHECK-LABEL: fmulx_lane_4s:
-;CHECK-NOT: dup
-;CHECK: fmulx.4s
-  %tmp1 = load <4 x float>* %A
-  %tmp2 = load <4 x float>* %B
-  %tmp3 = shufflevector <4 x float> %tmp2, <4 x float> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp4 = call <4 x float> @llvm.arm64.neon.fmulx.v4f32(<4 x float> %tmp1, <4 x float> %tmp3)
-  ret <4 x float> %tmp4
-}
-
-define <2 x double> @fmulx_lane_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
-;CHECK-LABEL: fmulx_lane_2d:
-;CHECK-NOT: dup
-;CHECK: fmulx.2d
-  %tmp1 = load <2 x double>* %A
-  %tmp2 = load <2 x double>* %B
-  %tmp3 = shufflevector <2 x double> %tmp2, <2 x double> %tmp2, <2 x i32> <i32 1, i32 1>
-  %tmp4 = call <2 x double> @llvm.arm64.neon.fmulx.v2f64(<2 x double> %tmp1, <2 x double> %tmp3)
-  ret <2 x double> %tmp4
-}
-
-define <4 x i16> @sqdmulh_lane_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: sqdmulh_lane_4h:
-;CHECK-NOT: dup
-;CHECK: sqdmulh.4h
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
-  %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp4 = call <4 x i16> @llvm.arm64.neon.sqdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp3)
-  ret <4 x i16> %tmp4
-}
-
-define <8 x i16> @sqdmulh_lane_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: sqdmulh_lane_8h:
-;CHECK-NOT: dup
-;CHECK: sqdmulh.8h
-  %tmp1 = load <8 x i16>* %A
-  %tmp2 = load <8 x i16>* %B
-  %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-  %tmp4 = call <8 x i16> @llvm.arm64.neon.sqdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp3)
-  ret <8 x i16> %tmp4
-}
-
-define <2 x i32> @sqdmulh_lane_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: sqdmulh_lane_2s:
-;CHECK-NOT: dup
-;CHECK: sqdmulh.2s
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
-  %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
-  %tmp4 = call <2 x i32> @llvm.arm64.neon.sqdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp3)
-  ret <2 x i32> %tmp4
-}
-
-define <4 x i32> @sqdmulh_lane_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: sqdmulh_lane_4s:
-;CHECK-NOT: dup
-;CHECK: sqdmulh.4s
-  %tmp1 = load <4 x i32>* %A
-  %tmp2 = load <4 x i32>* %B
-  %tmp3 = shufflevector <4 x i32> %tmp2, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp4 = call <4 x i32> @llvm.arm64.neon.sqdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp3)
-  ret <4 x i32> %tmp4
-}
-
-define i32 @sqdmulh_lane_1s(i32 %A, <4 x i32> %B) nounwind {
-;CHECK-LABEL: sqdmulh_lane_1s:
-;CHECK-NOT: dup
-;CHECK: sqdmulh.s s0, {{s[0-9]+}}, {{v[0-9]+}}[1]
-  %tmp1 = extractelement <4 x i32> %B, i32 1
-  %tmp2 = call i32 @llvm.arm64.neon.sqdmulh.i32(i32 %A, i32 %tmp1)
-  ret i32 %tmp2
-}
-
-define <4 x i16> @sqrdmulh_lane_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: sqrdmulh_lane_4h:
-;CHECK-NOT: dup
-;CHECK: sqrdmulh.4h
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
-  %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp4 = call <4 x i16> @llvm.arm64.neon.sqrdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp3)
-  ret <4 x i16> %tmp4
-}
-
-define <8 x i16> @sqrdmulh_lane_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: sqrdmulh_lane_8h:
-;CHECK-NOT: dup
-;CHECK: sqrdmulh.8h
-  %tmp1 = load <8 x i16>* %A
-  %tmp2 = load <8 x i16>* %B
-  %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-  %tmp4 = call <8 x i16> @llvm.arm64.neon.sqrdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp3)
-  ret <8 x i16> %tmp4
-}
-
-define <2 x i32> @sqrdmulh_lane_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: sqrdmulh_lane_2s:
-;CHECK-NOT: dup
-;CHECK: sqrdmulh.2s
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
-  %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
-  %tmp4 = call <2 x i32> @llvm.arm64.neon.sqrdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp3)
-  ret <2 x i32> %tmp4
-}
-
-define <4 x i32> @sqrdmulh_lane_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: sqrdmulh_lane_4s:
-;CHECK-NOT: dup
-;CHECK: sqrdmulh.4s
-  %tmp1 = load <4 x i32>* %A
-  %tmp2 = load <4 x i32>* %B
-  %tmp3 = shufflevector <4 x i32> %tmp2, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp4 = call <4 x i32> @llvm.arm64.neon.sqrdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp3)
-  ret <4 x i32> %tmp4
-}
-
-define i32 @sqrdmulh_lane_1s(i32 %A, <4 x i32> %B) nounwind {
-;CHECK-LABEL: sqrdmulh_lane_1s:
-;CHECK-NOT: dup
-;CHECK: sqrdmulh.s s0, {{s[0-9]+}}, {{v[0-9]+}}[1]
-  %tmp1 = extractelement <4 x i32> %B, i32 1
-  %tmp2 = call i32 @llvm.arm64.neon.sqrdmulh.i32(i32 %A, i32 %tmp1)
-  ret i32 %tmp2
-}
-
-define <4 x i32> @sqdmull_lane_4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: sqdmull_lane_4s:
-;CHECK-NOT: dup
-;CHECK: sqdmull.4s
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
-  %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp4 = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3)
-  ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @sqdmull_lane_2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: sqdmull_lane_2d:
-;CHECK-NOT: dup
-;CHECK: sqdmull.2d
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
-  %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
-  %tmp4 = call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3)
-  ret <2 x i64> %tmp4
-}
-
-define <4 x i32> @sqdmull2_lane_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: sqdmull2_lane_4s:
-;CHECK-NOT: dup
-;CHECK: sqdmull2.4s
-  %load1 = load <8 x i16>* %A
-  %load2 = load <8 x i16>* %B
-  %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp4 = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
-  ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @sqdmull2_lane_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: sqdmull2_lane_2d:
-;CHECK-NOT: dup
-;CHECK: sqdmull2.2d
-  %load1 = load <4 x i32>* %A
-  %load2 = load <4 x i32>* %B
-  %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %tmp4 = call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
-  ret <2 x i64> %tmp4
-}
-
-define <4 x i32> @umull_lane_4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: umull_lane_4s:
-;CHECK-NOT: dup
-;CHECK: umull.4s
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
-  %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp4 = call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3)
-  ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @umull_lane_2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: umull_lane_2d:
-;CHECK-NOT: dup
-;CHECK: umull.2d
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
-  %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
-  %tmp4 = call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3)
-  ret <2 x i64> %tmp4
-}
-
-define <4 x i32> @smull_lane_4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: smull_lane_4s:
-;CHECK-NOT: dup
-;CHECK: smull.4s
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
-  %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp4 = call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3)
-  ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @smull_lane_2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: smull_lane_2d:
-;CHECK-NOT: dup
-;CHECK: smull.2d
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
-  %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
-  %tmp4 = call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3)
-  ret <2 x i64> %tmp4
-}
-
-define <4 x i32> @smlal_lane_4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
-;CHECK-LABEL: smlal_lane_4s:
-;CHECK-NOT: dup
-;CHECK: smlal.4s
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
-  %tmp3 = load <4 x i32>* %C
-  %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp5 = call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
-  %tmp6 = add <4 x i32> %tmp3, %tmp5
-  ret <4 x i32> %tmp6
-}
-
-define <2 x i64> @smlal_lane_2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
-;CHECK-LABEL: smlal_lane_2d:
-;CHECK-NOT: dup
-;CHECK: smlal.2d
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
-  %tmp3 = load <2 x i64>* %C
-  %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
-  %tmp5 = call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
-  %tmp6 = add <2 x i64> %tmp3, %tmp5
-  ret <2 x i64> %tmp6
-}
-
-define <4 x i32> @sqdmlal_lane_4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
-;CHECK-LABEL: sqdmlal_lane_4s:
-;CHECK-NOT: dup
-;CHECK: sqdmlal.4s
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
-  %tmp3 = load <4 x i32>* %C
-  %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp5 = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
-  %tmp6 = call <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5)
-  ret <4 x i32> %tmp6
-}
-
-define <2 x i64> @sqdmlal_lane_2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
-;CHECK-LABEL: sqdmlal_lane_2d:
-;CHECK-NOT: dup
-;CHECK: sqdmlal.2d
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
-  %tmp3 = load <2 x i64>* %C
-  %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
-  %tmp5 = call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
-  %tmp6 = call <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5)
-  ret <2 x i64> %tmp6
-}
-
-define <4 x i32> @sqdmlal2_lane_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwind {
-;CHECK-LABEL: sqdmlal2_lane_4s:
-;CHECK-NOT: dup
-;CHECK: sqdmlal2.4s
-  %load1 = load <8 x i16>* %A
-  %load2 = load <8 x i16>* %B
-  %tmp3 = load <4 x i32>* %C
-  %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp5 = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
-  %tmp6 = call <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5)
-  ret <4 x i32> %tmp6
-}
-
-define <2 x i64> @sqdmlal2_lane_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwind {
-;CHECK-LABEL: sqdmlal2_lane_2d:
-;CHECK-NOT: dup
-;CHECK: sqdmlal2.2d
-  %load1 = load <4 x i32>* %A
-  %load2 = load <4 x i32>* %B
-  %tmp3 = load <2 x i64>* %C
-  %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %tmp5 = call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
-  %tmp6 = call <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5)
-  ret <2 x i64> %tmp6
-}
-
-define i32 @sqdmlal_lane_1s(i32 %A, i16 %B, <4 x i16> %C) nounwind {
-;CHECK-LABEL: sqdmlal_lane_1s:
-;CHECK: sqdmlal.4s
-  %lhs = insertelement <4 x i16> undef, i16 %B, i32 0
-  %rhs = shufflevector <4 x i16> %C, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
-  %prod.vec = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %lhs, <4 x i16> %rhs)
-  %prod = extractelement <4 x i32> %prod.vec, i32 0
-  %res = call i32 @llvm.arm64.neon.sqadd.i32(i32 %A, i32 %prod)
-  ret i32 %res
-}
-declare i32 @llvm.arm64.neon.sqadd.i32(i32, i32)
-
-define i32 @sqdmlsl_lane_1s(i32 %A, i16 %B, <4 x i16> %C) nounwind {
-;CHECK-LABEL: sqdmlsl_lane_1s:
-;CHECK: sqdmlsl.4s
-  %lhs = insertelement <4 x i16> undef, i16 %B, i32 0
-  %rhs = shufflevector <4 x i16> %C, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
-  %prod.vec = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %lhs, <4 x i16> %rhs)
-  %prod = extractelement <4 x i32> %prod.vec, i32 0
-  %res = call i32 @llvm.arm64.neon.sqsub.i32(i32 %A, i32 %prod)
-  ret i32 %res
-}
-declare i32 @llvm.arm64.neon.sqsub.i32(i32, i32)
-
-define i64 @sqdmlal_lane_1d(i64 %A, i32 %B, <2 x i32> %C) nounwind {
-;CHECK-LABEL: sqdmlal_lane_1d:
-;CHECK: sqdmlal.s
-  %rhs = extractelement <2 x i32> %C, i32 1
-  %prod = call i64 @llvm.arm64.neon.sqdmulls.scalar(i32 %B, i32 %rhs)
-  %res = call i64 @llvm.arm64.neon.sqadd.i64(i64 %A, i64 %prod)
-  ret i64 %res
-}
-declare i64 @llvm.arm64.neon.sqdmulls.scalar(i32, i32)
-declare i64 @llvm.arm64.neon.sqadd.i64(i64, i64)
-
-define i64 @sqdmlsl_lane_1d(i64 %A, i32 %B, <2 x i32> %C) nounwind {
-;CHECK-LABEL: sqdmlsl_lane_1d:
-;CHECK: sqdmlsl.s
-  %rhs = extractelement <2 x i32> %C, i32 1
-  %prod = call i64 @llvm.arm64.neon.sqdmulls.scalar(i32 %B, i32 %rhs)
-  %res = call i64 @llvm.arm64.neon.sqsub.i64(i64 %A, i64 %prod)
-  ret i64 %res
-}
-declare i64 @llvm.arm64.neon.sqsub.i64(i64, i64)
-
-
-define <4 x i32> @umlal_lane_4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
-;CHECK-LABEL: umlal_lane_4s:
-;CHECK-NOT: dup
-;CHECK: umlal.4s
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
-  %tmp3 = load <4 x i32>* %C
-  %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp5 = call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
-  %tmp6 = add <4 x i32> %tmp3, %tmp5
-  ret <4 x i32> %tmp6
-}
-
-define <2 x i64> @umlal_lane_2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
-;CHECK-LABEL: umlal_lane_2d:
-;CHECK-NOT: dup
-;CHECK: umlal.2d
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
-  %tmp3 = load <2 x i64>* %C
-  %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
-  %tmp5 = call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
-  %tmp6 = add <2 x i64> %tmp3, %tmp5
-  ret <2 x i64> %tmp6
-}
-
-
-define <4 x i32> @smlsl_lane_4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
-;CHECK-LABEL: smlsl_lane_4s:
-;CHECK-NOT: dup
-;CHECK: smlsl.4s
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
-  %tmp3 = load <4 x i32>* %C
-  %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp5 = call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
-  %tmp6 = sub <4 x i32> %tmp3, %tmp5
-  ret <4 x i32> %tmp6
-}
-
-define <2 x i64> @smlsl_lane_2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
-;CHECK-LABEL: smlsl_lane_2d:
-;CHECK-NOT: dup
-;CHECK: smlsl.2d
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
-  %tmp3 = load <2 x i64>* %C
-  %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
-  %tmp5 = call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
-  %tmp6 = sub <2 x i64> %tmp3, %tmp5
-  ret <2 x i64> %tmp6
-}
-
-define <4 x i32> @sqdmlsl_lane_4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
-;CHECK-LABEL: sqdmlsl_lane_4s:
-;CHECK-NOT: dup
-;CHECK: sqdmlsl.4s
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
-  %tmp3 = load <4 x i32>* %C
-  %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp5 = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
-  %tmp6 = call <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5)
-  ret <4 x i32> %tmp6
-}
-
-define <2 x i64> @sqdmlsl_lane_2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
-;CHECK-LABEL: sqdmlsl_lane_2d:
-;CHECK-NOT: dup
-;CHECK: sqdmlsl.2d
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
-  %tmp3 = load <2 x i64>* %C
-  %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
-  %tmp5 = call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
-  %tmp6 = call <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5)
-  ret <2 x i64> %tmp6
-}
-
-define <4 x i32> @sqdmlsl2_lane_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwind {
-;CHECK-LABEL: sqdmlsl2_lane_4s:
-;CHECK-NOT: dup
-;CHECK: sqdmlsl2.4s
-  %load1 = load <8 x i16>* %A
-  %load2 = load <8 x i16>* %B
-  %tmp3 = load <4 x i32>* %C
-  %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp5 = call <4 x i32> @llvm.arm64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
-  %tmp6 = call <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5)
-  ret <4 x i32> %tmp6
-}
-
-define <2 x i64> @sqdmlsl2_lane_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwind {
-;CHECK-LABEL: sqdmlsl2_lane_2d:
-;CHECK-NOT: dup
-;CHECK: sqdmlsl2.2d
-  %load1 = load <4 x i32>* %A
-  %load2 = load <4 x i32>* %B
-  %tmp3 = load <2 x i64>* %C
-  %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %tmp5 = call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
-  %tmp6 = call <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5)
-  ret <2 x i64> %tmp6
-}
-
-define <4 x i32> @umlsl_lane_4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
-;CHECK-LABEL: umlsl_lane_4s:
-;CHECK-NOT: dup
-;CHECK: umlsl.4s
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
-  %tmp3 = load <4 x i32>* %C
-  %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp5 = call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
-  %tmp6 = sub <4 x i32> %tmp3, %tmp5
-  ret <4 x i32> %tmp6
-}
-
-define <2 x i64> @umlsl_lane_2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
-;CHECK-LABEL: umlsl_lane_2d:
-;CHECK-NOT: dup
-;CHECK: umlsl.2d
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
-  %tmp3 = load <2 x i64>* %C
-  %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
-  %tmp5 = call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
-  %tmp6 = sub <2 x i64> %tmp3, %tmp5
-  ret <2 x i64> %tmp6
-}
-
-; Scalar FMULX
-define float @fmulxs(float %a, float %b) nounwind {
-; CHECK-LABEL: fmulxs:
-; CHECKNEXT: fmulx s0, s0, s1
-  %fmulx.i = tail call float @llvm.arm64.neon.fmulx.f32(float %a, float %b) nounwind
-; CHECKNEXT: ret
-  ret float %fmulx.i
-}
-
-define double @fmulxd(double %a, double %b) nounwind {
-; CHECK-LABEL: fmulxd:
-; CHECKNEXT: fmulx d0, d0, d1
-  %fmulx.i = tail call double @llvm.arm64.neon.fmulx.f64(double %a, double %b) nounwind
-; CHECKNEXT: ret
-  ret double %fmulx.i
-}
-
-define float @fmulxs_lane(float %a, <4 x float> %vec) nounwind {
-; CHECK-LABEL: fmulxs_lane:
-; CHECKNEXT: fmulx.s s0, s0, v1[3]
-  %b = extractelement <4 x float> %vec, i32 3
-  %fmulx.i = tail call float @llvm.arm64.neon.fmulx.f32(float %a, float %b) nounwind
-; CHECKNEXT: ret
-  ret float %fmulx.i
-}
-
-define double @fmulxd_lane(double %a, <2 x double> %vec) nounwind {
-; CHECK-LABEL: fmulxd_lane:
-; CHECKNEXT: fmulx d0, d0, v1[1]
-  %b = extractelement <2 x double> %vec, i32 1
-  %fmulx.i = tail call double @llvm.arm64.neon.fmulx.f64(double %a, double %b) nounwind
-; CHECKNEXT: ret
-  ret double %fmulx.i
-}
-
-declare double @llvm.arm64.neon.fmulx.f64(double, double) nounwind readnone
-declare float @llvm.arm64.neon.fmulx.f32(float, float) nounwind readnone
-
-
-define <8 x i16> @smull2_8h_simple(<16 x i8> %a, <16 x i8> %b) nounwind {
-; CHECK-LABEL: smull2_8h_simple:
-; CHECK-NEXT: smull2.8h v0, v0, v1
-; CHECK-NEXT: ret
-  %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %2 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %3 = tail call <8 x i16> @llvm.arm64.neon.smull.v8i16(<8 x i8> %1, <8 x i8> %2) #2
-  ret <8 x i16> %3
-}
-
-define <8 x i16> @foo0(<16 x i8> %a, <16 x i8> %b) nounwind {
-; CHECK-LABEL: foo0:
-; CHECK: smull2.8h v0, v0, v1
-  %tmp = bitcast <16 x i8> %a to <2 x i64>
-  %shuffle.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
-  %tmp1 = bitcast <1 x i64> %shuffle.i.i to <8 x i8>
-  %tmp2 = bitcast <16 x i8> %b to <2 x i64>
-  %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
-  %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <8 x i8>
-  %vmull.i.i = tail call <8 x i16> @llvm.arm64.neon.smull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp3) nounwind
-  ret <8 x i16> %vmull.i.i
-}
-
-define <4 x i32> @foo1(<8 x i16> %a, <8 x i16> %b) nounwind {
-; CHECK-LABEL: foo1:
-; CHECK: smull2.4s v0, v0, v1
-  %tmp = bitcast <8 x i16> %a to <2 x i64>
-  %shuffle.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
-  %tmp1 = bitcast <1 x i64> %shuffle.i.i to <4 x i16>
-  %tmp2 = bitcast <8 x i16> %b to <2 x i64>
-  %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
-  %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <4 x i16>
-  %vmull2.i.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind
-  ret <4 x i32> %vmull2.i.i
-}
-
-define <2 x i64> @foo2(<4 x i32> %a, <4 x i32> %b) nounwind {
-; CHECK-LABEL: foo2:
-; CHECK: smull2.2d v0, v0, v1
-  %tmp = bitcast <4 x i32> %a to <2 x i64>
-  %shuffle.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
-  %tmp1 = bitcast <1 x i64> %shuffle.i.i to <2 x i32>
-  %tmp2 = bitcast <4 x i32> %b to <2 x i64>
-  %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
-  %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <2 x i32>
-  %vmull2.i.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind
-  ret <2 x i64> %vmull2.i.i
-}
-
-define <8 x i16> @foo3(<16 x i8> %a, <16 x i8> %b) nounwind {
-; CHECK-LABEL: foo3:
-; CHECK: umull2.8h v0, v0, v1
-  %tmp = bitcast <16 x i8> %a to <2 x i64>
-  %shuffle.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
-  %tmp1 = bitcast <1 x i64> %shuffle.i.i to <8 x i8>
-  %tmp2 = bitcast <16 x i8> %b to <2 x i64>
-  %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
-  %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <8 x i8>
-  %vmull.i.i = tail call <8 x i16> @llvm.arm64.neon.umull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp3) nounwind
-  ret <8 x i16> %vmull.i.i
-}
-
-define <4 x i32> @foo4(<8 x i16> %a, <8 x i16> %b) nounwind {
-; CHECK-LABEL: foo4:
-; CHECK: umull2.4s v0, v0, v1
-  %tmp = bitcast <8 x i16> %a to <2 x i64>
-  %shuffle.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
-  %tmp1 = bitcast <1 x i64> %shuffle.i.i to <4 x i16>
-  %tmp2 = bitcast <8 x i16> %b to <2 x i64>
-  %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
-  %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <4 x i16>
-  %vmull2.i.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind
-  ret <4 x i32> %vmull2.i.i
-}
-
-define <2 x i64> @foo5(<4 x i32> %a, <4 x i32> %b) nounwind {
-; CHECK-LABEL: foo5:
-; CHECK: umull2.2d v0, v0, v1
-  %tmp = bitcast <4 x i32> %a to <2 x i64>
-  %shuffle.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
-  %tmp1 = bitcast <1 x i64> %shuffle.i.i to <2 x i32>
-  %tmp2 = bitcast <4 x i32> %b to <2 x i64>
-  %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
-  %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <2 x i32>
-  %vmull2.i.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind
-  ret <2 x i64> %vmull2.i.i
-}
-
-define <4 x i32> @foo6(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind readnone optsize ssp {
-; CHECK-LABEL: foo6:
-; CHECK-NEXT: smull2.4s v0, v1, v2[1]
-; CHECK-NEXT: ret
-entry:
-  %0 = bitcast <8 x i16> %b to <2 x i64>
-  %shuffle.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1>
-  %1 = bitcast <1 x i64> %shuffle.i to <4 x i16>
-  %shuffle = shufflevector <4 x i16> %c, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %1, <4 x i16> %shuffle) nounwind
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @foo7(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind readnone optsize ssp {
-; CHECK-LABEL: foo7:
-; CHECK-NEXT: smull2.2d v0, v1, v2[1]
-; CHECK-NEXT: ret
-entry:
-  %0 = bitcast <4 x i32> %b to <2 x i64>
-  %shuffle.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1>
-  %1 = bitcast <1 x i64> %shuffle.i to <2 x i32>
-  %shuffle = shufflevector <2 x i32> %c, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %1, <2 x i32> %shuffle) nounwind
-  ret <2 x i64> %vmull2.i
-}
-
-define <4 x i32> @foo8(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind readnone optsize ssp {
-; CHECK-LABEL: foo8:
-; CHECK-NEXT: umull2.4s v0, v1, v2[1]
-; CHECK-NEXT: ret
-entry:
-  %0 = bitcast <8 x i16> %b to <2 x i64>
-  %shuffle.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1>
-  %1 = bitcast <1 x i64> %shuffle.i to <4 x i16>
-  %shuffle = shufflevector <4 x i16> %c, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %1, <4 x i16> %shuffle) nounwind
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @foo9(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind readnone optsize ssp {
-; CHECK-LABEL: foo9:
-; CHECK-NEXT: umull2.2d v0, v1, v2[1]
-; CHECK-NEXT: ret
-entry:
-  %0 = bitcast <4 x i32> %b to <2 x i64>
-  %shuffle.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1>
-  %1 = bitcast <1 x i64> %shuffle.i to <2 x i32>
-  %shuffle = shufflevector <2 x i32> %c, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %1, <2 x i32> %shuffle) nounwind
-  ret <2 x i64> %vmull2.i
-}
-
-define <8 x i16> @bar0(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) nounwind {
-; CHECK-LABEL: bar0:
-; CHECK: smlal2.8h v0, v1, v2
-; CHECK-NEXT: ret
-
-  %tmp = bitcast <16 x i8> %b to <2 x i64>
-  %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
-  %tmp1 = bitcast <1 x i64> %shuffle.i.i.i to <8 x i8>
-  %tmp2 = bitcast <16 x i8> %c to <2 x i64>
-  %shuffle.i3.i.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
-  %tmp3 = bitcast <1 x i64> %shuffle.i3.i.i to <8 x i8>
-  %vmull.i.i.i = tail call <8 x i16> @llvm.arm64.neon.smull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp3) nounwind
-  %add.i = add <8 x i16> %vmull.i.i.i, %a
-  ret <8 x i16> %add.i
-}
-
-define <4 x i32> @bar1(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) nounwind {
-; CHECK-LABEL: bar1:
-; CHECK: smlal2.4s v0, v1, v2
-; CHECK-NEXT: ret
-
-  %tmp = bitcast <8 x i16> %b to <2 x i64>
-  %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
-  %tmp1 = bitcast <1 x i64> %shuffle.i.i.i to <4 x i16>
-  %tmp2 = bitcast <8 x i16> %c to <2 x i64>
-  %shuffle.i3.i.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
-  %tmp3 = bitcast <1 x i64> %shuffle.i3.i.i to <4 x i16>
-  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind
-  %add.i = add <4 x i32> %vmull2.i.i.i, %a
-  ret <4 x i32> %add.i
-}
-
-define <2 x i64> @bar2(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) nounwind {
-; CHECK-LABEL: bar2:
-; CHECK: smlal2.2d v0, v1, v2
-; CHECK-NEXT: ret
-
-  %tmp = bitcast <4 x i32> %b to <2 x i64>
-  %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
-  %tmp1 = bitcast <1 x i64> %shuffle.i.i.i to <2 x i32>
-  %tmp2 = bitcast <4 x i32> %c to <2 x i64>
-  %shuffle.i3.i.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
-  %tmp3 = bitcast <1 x i64> %shuffle.i3.i.i to <2 x i32>
-  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind
-  %add.i = add <2 x i64> %vmull2.i.i.i, %a
-  ret <2 x i64> %add.i
-}
-
-define <8 x i16> @bar3(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) nounwind {
-; CHECK-LABEL: bar3:
-; CHECK: umlal2.8h v0, v1, v2
-; CHECK-NEXT: ret
-
-  %tmp = bitcast <16 x i8> %b to <2 x i64>
-  %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
-  %tmp1 = bitcast <1 x i64> %shuffle.i.i.i to <8 x i8>
-  %tmp2 = bitcast <16 x i8> %c to <2 x i64>
-  %shuffle.i3.i.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
-  %tmp3 = bitcast <1 x i64> %shuffle.i3.i.i to <8 x i8>
-  %vmull.i.i.i = tail call <8 x i16> @llvm.arm64.neon.umull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp3) nounwind
-  %add.i = add <8 x i16> %vmull.i.i.i, %a
-  ret <8 x i16> %add.i
-}
-
-define <4 x i32> @bar4(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) nounwind {
-; CHECK-LABEL: bar4:
-; CHECK: umlal2.4s v0, v1, v2
-; CHECK-NEXT: ret
-
-  %tmp = bitcast <8 x i16> %b to <2 x i64>
-  %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
-  %tmp1 = bitcast <1 x i64> %shuffle.i.i.i to <4 x i16>
-  %tmp2 = bitcast <8 x i16> %c to <2 x i64>
-  %shuffle.i3.i.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
-  %tmp3 = bitcast <1 x i64> %shuffle.i3.i.i to <4 x i16>
-  %vmull2.i.i.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind
-  %add.i = add <4 x i32> %vmull2.i.i.i, %a
-  ret <4 x i32> %add.i
-}
-
-define <2 x i64> @bar5(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) nounwind {
-; CHECK-LABEL: bar5:
-; CHECK: umlal2.2d v0, v1, v2
-; CHECK-NEXT: ret
-
-  %tmp = bitcast <4 x i32> %b to <2 x i64>
-  %shuffle.i.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
-  %tmp1 = bitcast <1 x i64> %shuffle.i.i.i to <2 x i32>
-  %tmp2 = bitcast <4 x i32> %c to <2 x i64>
-  %shuffle.i3.i.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
-  %tmp3 = bitcast <1 x i64> %shuffle.i3.i.i to <2 x i32>
-  %vmull2.i.i.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind
-  %add.i = add <2 x i64> %vmull2.i.i.i, %a
-  ret <2 x i64> %add.i
-}
-
-define <4 x i32> @mlal2_1(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind {
-; CHECK-LABEL: mlal2_1:
-; CHECK: smlal2.4s v0, v1, v2[3]
-; CHECK-NEXT: ret
-  %shuffle = shufflevector <4 x i16> %c, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-  %tmp = bitcast <8 x i16> %b to <2 x i64>
-  %shuffle.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
-  %tmp1 = bitcast <1 x i64> %shuffle.i.i to <4 x i16>
-  %tmp2 = bitcast <8 x i16> %shuffle to <2 x i64>
-  %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
-  %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <4 x i16>
-  %vmull2.i.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind
-  %add = add <4 x i32> %vmull2.i.i, %a
-  ret <4 x i32> %add
-}
-
-define <2 x i64> @mlal2_2(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind {
-; CHECK-LABEL: mlal2_2:
-; CHECK: smlal2.2d v0, v1, v2[1]
-; CHECK-NEXT: ret
-  %shuffle = shufflevector <2 x i32> %c, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp = bitcast <4 x i32> %b to <2 x i64>
-  %shuffle.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
-  %tmp1 = bitcast <1 x i64> %shuffle.i.i to <2 x i32>
-  %tmp2 = bitcast <4 x i32> %shuffle to <2 x i64>
-  %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
-  %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <2 x i32>
-  %vmull2.i.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind
-  %add = add <2 x i64> %vmull2.i.i, %a
-  ret <2 x i64> %add
-}
-
-define <4 x i32> @mlal2_4(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind {
-; CHECK-LABEL: mlal2_4:
-; CHECK: umlal2.4s v0, v1, v2[2]
-; CHECK-NEXT: ret
-
-  %shuffle = shufflevector <4 x i16> %c, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-  %tmp = bitcast <8 x i16> %b to <2 x i64>
-  %shuffle.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
-  %tmp1 = bitcast <1 x i64> %shuffle.i.i to <4 x i16>
-  %tmp2 = bitcast <8 x i16> %shuffle to <2 x i64>
-  %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
-  %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <4 x i16>
-  %vmull2.i.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3) nounwind
-  %add = add <4 x i32> %vmull2.i.i, %a
-  ret <4 x i32> %add
-}
-
-define <2 x i64> @mlal2_5(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind {
-; CHECK-LABEL: mlal2_5:
-; CHECK: umlal2.2d v0, v1, v2[0]
-; CHECK-NEXT: ret
-  %shuffle = shufflevector <2 x i32> %c, <2 x i32> undef, <4 x i32> zeroinitializer
-  %tmp = bitcast <4 x i32> %b to <2 x i64>
-  %shuffle.i.i = shufflevector <2 x i64> %tmp, <2 x i64> undef, <1 x i32> <i32 1>
-  %tmp1 = bitcast <1 x i64> %shuffle.i.i to <2 x i32>
-  %tmp2 = bitcast <4 x i32> %shuffle to <2 x i64>
-  %shuffle.i3.i = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <1 x i32> <i32 1>
-  %tmp3 = bitcast <1 x i64> %shuffle.i3.i to <2 x i32>
-  %vmull2.i.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3) nounwind
-  %add = add <2 x i64> %vmull2.i.i, %a
-  ret <2 x i64> %add
-}
-
-; rdar://12328502
-define <2 x double> @vmulq_n_f64(<2 x double> %x, double %y) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: vmulq_n_f64:
-; CHECK-NOT: dup.2d
-; CHECK: fmul.2d v0, v0, v1[0]
-  %vecinit.i = insertelement <2 x double> undef, double %y, i32 0
-  %vecinit1.i = insertelement <2 x double> %vecinit.i, double %y, i32 1
-  %mul.i = fmul <2 x double> %vecinit1.i, %x
-  ret <2 x double> %mul.i
-}
-
-define <4 x float> @vmulq_n_f32(<4 x float> %x, float %y) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: vmulq_n_f32:
-; CHECK-NOT: dup.4s
-; CHECK: fmul.4s v0, v0, v1[0]
-  %vecinit.i = insertelement <4 x float> undef, float %y, i32 0
-  %vecinit1.i = insertelement <4 x float> %vecinit.i, float %y, i32 1
-  %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %y, i32 2
-  %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %y, i32 3
-  %mul.i = fmul <4 x float> %vecinit3.i, %x
-  ret <4 x float> %mul.i
-}
-
-define <2 x float> @vmul_n_f32(<2 x float> %x, float %y) nounwind readnone ssp {
-entry:
-; CHECK-LABEL: vmul_n_f32:
-; CHECK-NOT: dup.2s
-; CHECK: fmul.2s v0, v0, v1[0]
-  %vecinit.i = insertelement <2 x float> undef, float %y, i32 0
-  %vecinit1.i = insertelement <2 x float> %vecinit.i, float %y, i32 1
-  %mul.i = fmul <2 x float> %vecinit1.i, %x
-  ret <2 x float> %mul.i
-}
-
-define <4 x i16> @vmla_laneq_s16_test(<4 x i16> %a, <4 x i16> %b, <8 x i16> %c) nounwind readnone ssp {
-entry:
-; CHECK: vmla_laneq_s16_test
-; CHECK-NOT: ext
-; CHECK: mla.4h v0, v1, v2[6]
-; CHECK-NEXT: ret
-  %shuffle = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 6, i32 6, i32 6, i32 6>
-  %mul = mul <4 x i16> %shuffle, %b
-  %add = add <4 x i16> %mul, %a
-  ret <4 x i16> %add
-}
-
-define <2 x i32> @vmla_laneq_s32_test(<2 x i32> %a, <2 x i32> %b, <4 x i32> %c) nounwind readnone ssp {
-entry:
-; CHECK: vmla_laneq_s32_test
-; CHECK-NOT: ext
-; CHECK: mla.2s v0, v1, v2[3]
-; CHECK-NEXT: ret
-  %shuffle = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
-  %mul = mul <2 x i32> %shuffle, %b
-  %add = add <2 x i32> %mul, %a
-  ret <2 x i32> %add
-}
-
-define <4 x i32> @vmull_laneq_s16_test(<4 x i16> %a, <8 x i16> %b) nounwind readnone ssp {
-entry:
-; CHECK: vmull_laneq_s16_test
-; CHECK-NOT: ext
-; CHECK: smull.4s v0, v0, v1[6]
-; CHECK-NEXT: ret
-  %shuffle = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 6, i32 6, i32 6, i32 6>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) #2
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @vmull_laneq_s32_test(<2 x i32> %a, <4 x i32> %b) nounwind readnone ssp {
-entry:
-; CHECK: vmull_laneq_s32_test
-; CHECK-NOT: ext
-; CHECK: smull.2d v0, v0, v1[2]
-; CHECK-NEXT: ret
-  %shuffle = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 2>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) #2
-  ret <2 x i64> %vmull2.i
-}
-define <4 x i32> @vmull_laneq_u16_test(<4 x i16> %a, <8 x i16> %b) nounwind readnone ssp {
-entry:
-; CHECK: vmull_laneq_u16_test
-; CHECK-NOT: ext
-; CHECK: umull.4s v0, v0, v1[6]
-; CHECK-NEXT: ret
-  %shuffle = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 6, i32 6, i32 6, i32 6>
-  %vmull2.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) #2
-  ret <4 x i32> %vmull2.i
-}
-
-define <2 x i64> @vmull_laneq_u32_test(<2 x i32> %a, <4 x i32> %b) nounwind readnone ssp {
-entry:
-; CHECK: vmull_laneq_u32_test
-; CHECK-NOT: ext
-; CHECK: umull.2d v0, v0, v1[2]
-; CHECK-NEXT: ret
-  %shuffle = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 2>
-  %vmull2.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) #2
-  ret <2 x i64> %vmull2.i
-}
-
-define <4 x i32> @vmull_high_n_s16_test(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c, i32 %d) nounwind readnone optsize ssp {
-entry:
-; CHECK: vmull_high_n_s16_test
-; CHECK-NOT: ext
-; CHECK: smull2.4s
-; CHECK-NEXT: ret
-  %conv = trunc i32 %d to i16
-  %0 = bitcast <8 x i16> %b to <2 x i64>
-  %shuffle.i.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1>
-  %1 = bitcast <1 x i64> %shuffle.i.i to <4 x i16>
-  %vecinit.i = insertelement <4 x i16> undef, i16 %conv, i32 0
-  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %conv, i32 1
-  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %conv, i32 2
-  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %conv, i32 3
-  %vmull2.i.i = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %1, <4 x i16> %vecinit3.i) nounwind
-  ret <4 x i32> %vmull2.i.i
-}
-
-define <2 x i64> @vmull_high_n_s32_test(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c, i32 %d) nounwind readnone optsize ssp {
-entry:
-; CHECK: vmull_high_n_s32_test
-; CHECK-NOT: ext
-; CHECK: smull2.2d
-; CHECK-NEXT: ret
-  %0 = bitcast <4 x i32> %b to <2 x i64>
-  %shuffle.i.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1>
-  %1 = bitcast <1 x i64> %shuffle.i.i to <2 x i32>
-  %vecinit.i = insertelement <2 x i32> undef, i32 %d, i32 0
-  %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %d, i32 1
-  %vmull2.i.i = tail call <2 x i64> @llvm.arm64.neon.smull.v2i64(<2 x i32> %1, <2 x i32> %vecinit1.i) nounwind
-  ret <2 x i64> %vmull2.i.i
-}
-
-define <4 x i32> @vmull_high_n_u16_test(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c, i32 %d) nounwind readnone optsize ssp {
-entry:
-; CHECK: vmull_high_n_u16_test
-; CHECK-NOT: ext
-; CHECK: umull2.4s
-; CHECK-NEXT: ret
-  %conv = trunc i32 %d to i16
-  %0 = bitcast <8 x i16> %b to <2 x i64>
-  %shuffle.i.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1>
-  %1 = bitcast <1 x i64> %shuffle.i.i to <4 x i16>
-  %vecinit.i = insertelement <4 x i16> undef, i16 %conv, i32 0
-  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %conv, i32 1
-  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %conv, i32 2
-  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %conv, i32 3
-  %vmull2.i.i = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> %1, <4 x i16> %vecinit3.i) nounwind
-  ret <4 x i32> %vmull2.i.i
-}
-
-define <2 x i64> @vmull_high_n_u32_test(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c, i32 %d) nounwind readnone optsize ssp {
-entry:
-; CHECK: vmull_high_n_u32_test
-; CHECK-NOT: ext
-; CHECK: umull2.2d
-; CHECK-NEXT: ret
-  %0 = bitcast <4 x i32> %b to <2 x i64>
-  %shuffle.i.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1>
-  %1 = bitcast <1 x i64> %shuffle.i.i to <2 x i32>
-  %vecinit.i = insertelement <2 x i32> undef, i32 %d, i32 0
-  %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %d, i32 1
-  %vmull2.i.i = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %1, <2 x i32> %vecinit1.i) nounwind
-  ret <2 x i64> %vmull2.i.i
-}
-
-define <4 x i32> @vmul_built_dup_test(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: vmul_built_dup_test:
-; CHECK-NOT: ins
-; CHECK-NOT: dup
-; CHECK: mul.4s {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}[1]
-  %vget_lane = extractelement <4 x i32> %b, i32 1
-  %vecinit.i = insertelement <4 x i32> undef, i32 %vget_lane, i32 0
-  %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %vget_lane, i32 1
-  %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %vget_lane, i32 2
-  %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %vget_lane, i32 3
-  %prod = mul <4 x i32> %a, %vecinit3.i
-  ret <4 x i32> %prod
-}
-
-define <4 x i16> @vmul_built_dup_fromsmall_test(<4 x i16> %a, <4 x i16> %b) {
-; CHECK-LABEL: vmul_built_dup_fromsmall_test:
-; CHECK-NOT: ins
-; CHECK-NOT: dup
-; CHECK: mul.4h {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}[3]
-  %vget_lane = extractelement <4 x i16> %b, i32 3
-  %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
-  %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
-  %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
-  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
-  %prod = mul <4 x i16> %a, %vecinit3.i
-  ret <4 x i16> %prod
-}
-
-define <8 x i16> @vmulq_built_dup_fromsmall_test(<8 x i16> %a, <4 x i16> %b) {
-; CHECK-LABEL: vmulq_built_dup_fromsmall_test:
-; CHECK-NOT: ins
-; CHECK-NOT: dup
-; CHECK: mul.8h {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}[0]
-  %vget_lane = extractelement <4 x i16> %b, i32 0
-  %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0
-  %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1
-  %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2
-  %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3
-  %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4
-  %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5
-  %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6
-  %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7
-  %prod = mul <8 x i16> %a, %vecinit7.i
-  ret <8 x i16> %prod
-}
-
-define <2 x i64> @mull_from_two_extracts(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK-LABEL: mull_from_two_extracts:
-; CHECK-NOT: ext
-; CHECK: sqdmull2.2d
-
-  %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-
-  %res = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind
-  ret <2 x i64> %res
-}
-
-define <2 x i64> @mlal_from_two_extracts(<2 x i64> %accum, <4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK-LABEL: mlal_from_two_extracts:
-; CHECK-NOT: ext
-; CHECK: sqdmlal2.2d
-
-  %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-
-  %res = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind
-  %sum = call <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64> %accum, <2 x i64> %res)
-  ret <2 x i64> %sum
-}
-
-define <2 x i64> @mull_from_extract_dup(<4 x i32> %lhs, i32 %rhs) {
-; CHECK-LABEL: mull_from_extract_dup:
-; CHECK-NOT: ext
-; CHECK: sqdmull2.2d
-  %rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0
-  %rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1
-
-  %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-
-  %res = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhsvec) nounwind
-  ret <2 x i64> %res
-}
-
-define <8 x i16> @pmull_from_extract_dup(<16 x i8> %lhs, i8 %rhs) {
-; CHECK-LABEL: pmull_from_extract_dup:
-; CHECK-NOT: ext
-; CHECK: pmull2.8h
-  %rhsvec.0 = insertelement <8 x i8> undef, i8 %rhs, i32 0
-  %rhsvec = shufflevector <8 x i8> %rhsvec.0, <8 x i8> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
-
-  %lhs.high = shufflevector <16 x i8> %lhs, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-
-  %res = tail call <8 x i16> @llvm.arm64.neon.pmull.v8i16(<8 x i8> %lhs.high, <8 x i8> %rhsvec) nounwind
-  ret <8 x i16> %res
-}
-
-define <8 x i16> @pmull_from_extract_duplane(<16 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK-LABEL: pmull_from_extract_duplane:
-; CHECK-NOT: ext
-; CHECK: pmull2.8h
-
-  %lhs.high = shufflevector <16 x i8> %lhs, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %rhs.high = shufflevector <8 x i8> %rhs, <8 x i8> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
-
-  %res = tail call <8 x i16> @llvm.arm64.neon.pmull.v8i16(<8 x i8> %lhs.high, <8 x i8> %rhs.high) nounwind
-  ret <8 x i16> %res
-}
-
-define <2 x i64> @sqdmull_from_extract_duplane(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK-LABEL: sqdmull_from_extract_duplane:
-; CHECK-NOT: ext
-; CHECK: sqdmull2.2d
-
-  %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 0, i32 0>
-
-  %res = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind
-  ret <2 x i64> %res
-}
-
-define <2 x i64> @sqdmlal_from_extract_duplane(<2 x i64> %accum, <4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK-LABEL: sqdmlal_from_extract_duplane:
-; CHECK-NOT: ext
-; CHECK: sqdmlal2.2d
-
-  %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 0, i32 0>
-
-  %res = tail call <2 x i64> @llvm.arm64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind
-  %sum = call <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64> %accum, <2 x i64> %res)
-  ret <2 x i64> %sum
-}
-
-define <2 x i64> @umlal_from_extract_duplane(<2 x i64> %accum, <4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK-LABEL: umlal_from_extract_duplane:
-; CHECK-NOT: ext
-; CHECK: umlal2.2d
-
-  %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 0, i32 0>
-
-  %res = tail call <2 x i64> @llvm.arm64.neon.umull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind
-  %sum = add <2 x i64> %accum, %res
-  ret <2 x i64> %sum
-}
-
-define float @scalar_fmla_from_extract_v4f32(float %accum, float %lhs, <4 x float> %rvec) {
-; CHECK-LABEL: scalar_fmla_from_extract_v4f32:
-; CHECK: fmla.s s0, s1, v2[3]
-  %rhs = extractelement <4 x float> %rvec, i32 3
-  %res = call float @llvm.fma.f32(float %lhs, float %rhs, float %accum)
-  ret float %res
-}
-
-define float @scalar_fmla_from_extract_v2f32(float %accum, float %lhs, <2 x float> %rvec) {
-; CHECK-LABEL: scalar_fmla_from_extract_v2f32:
-; CHECK: fmla.s s0, s1, v2[1]
-  %rhs = extractelement <2 x float> %rvec, i32 1
-  %res = call float @llvm.fma.f32(float %lhs, float %rhs, float %accum)
-  ret float %res
-}
-
-define float @scalar_fmls_from_extract_v4f32(float %accum, float %lhs, <4 x float> %rvec) {
-; CHECK-LABEL: scalar_fmls_from_extract_v4f32:
-; CHECK: fmls.s s0, s1, v2[3]
-  %rhs.scal = extractelement <4 x float> %rvec, i32 3
-  %rhs = fsub float -0.0, %rhs.scal
-  %res = call float @llvm.fma.f32(float %lhs, float %rhs, float %accum)
-  ret float %res
-}
-
-define float @scalar_fmls_from_extract_v2f32(float %accum, float %lhs, <2 x float> %rvec) {
-; CHECK-LABEL: scalar_fmls_from_extract_v2f32:
-; CHECK: fmls.s s0, s1, v2[1]
-  %rhs.scal = extractelement <2 x float> %rvec, i32 1
-  %rhs = fsub float -0.0, %rhs.scal
-  %res = call float @llvm.fma.f32(float %lhs, float %rhs, float %accum)
-  ret float %res
-}
-
-declare float @llvm.fma.f32(float, float, float)
-
-define double @scalar_fmla_from_extract_v2f64(double %accum, double %lhs, <2 x double> %rvec) {
-; CHECK-LABEL: scalar_fmla_from_extract_v2f64:
-; CHECK: fmla.d d0, d1, v2[1]
-  %rhs = extractelement <2 x double> %rvec, i32 1
-  %res = call double @llvm.fma.f64(double %lhs, double %rhs, double %accum)
-  ret double %res
-}
-
-define double @scalar_fmls_from_extract_v2f64(double %accum, double %lhs, <2 x double> %rvec) {
-; CHECK-LABEL: scalar_fmls_from_extract_v2f64:
-; CHECK: fmls.d d0, d1, v2[1]
-  %rhs.scal = extractelement <2 x double> %rvec, i32 1
-  %rhs = fsub double -0.0, %rhs.scal
-  %res = call double @llvm.fma.f64(double %lhs, double %rhs, double %accum)
-  ret double %res
-}
-
-declare double @llvm.fma.f64(double, double, double)
-
-define <2 x float> @fmls_with_fneg_before_extract_v2f32(<2 x float> %accum, <2 x float> %lhs, <4 x float> %rhs) {
-; CHECK-LABEL: fmls_with_fneg_before_extract_v2f32:
-; CHECK: fmls.2s v0, v1, v2[3]
-  %rhs_neg = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %rhs
-  %splat = shufflevector <4 x float> %rhs_neg, <4 x float> undef, <2 x i32> <i32 3, i32 3>
-  %res = call <2 x float> @llvm.fma.v2f32(<2 x float> %lhs, <2 x float> %splat, <2 x float> %accum)
-  ret <2 x float> %res
-}
-
-define <2 x float> @fmls_with_fneg_before_extract_v2f32_1(<2 x float> %accum, <2 x float> %lhs, <2 x float> %rhs) {
-; CHECK-LABEL: fmls_with_fneg_before_extract_v2f32_1:
-; CHECK: fmls.2s v0, v1, v2[1]
-  %rhs_neg = fsub <2 x float> <float -0.0, float -0.0>, %rhs
-  %splat = shufflevector <2 x float> %rhs_neg, <2 x float> undef, <2 x i32> <i32 1, i32 1>
-  %res = call <2 x float> @llvm.fma.v2f32(<2 x float> %lhs, <2 x float> %splat, <2 x float> %accum)
-  ret <2 x float> %res
-}
-
-define <4 x float> @fmls_with_fneg_before_extract_v4f32(<4 x float> %accum, <4 x float> %lhs, <4 x float> %rhs) {
-; CHECK-LABEL: fmls_with_fneg_before_extract_v4f32:
-; CHECK: fmls.4s v0, v1, v2[3]
-  %rhs_neg = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %rhs
-  %splat = shufflevector <4 x float> %rhs_neg, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %res = call <4 x float> @llvm.fma.v4f32(<4 x float> %lhs, <4 x float> %splat, <4 x float> %accum)
-  ret <4 x float> %res
-}
-
-define <4 x float> @fmls_with_fneg_before_extract_v4f32_1(<4 x float> %accum, <4 x float> %lhs, <2 x float> %rhs) {
-; CHECK-LABEL: fmls_with_fneg_before_extract_v4f32_1:
-; CHECK: fmls.4s v0, v1, v2[1]
-  %rhs_neg = fsub <2 x float> <float -0.0, float -0.0>, %rhs
-  %splat = shufflevector <2 x float> %rhs_neg, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %res = call <4 x float> @llvm.fma.v4f32(<4 x float> %lhs, <4 x float> %splat, <4 x float> %accum)
-  ret <4 x float> %res
-}
-
-define <2 x double> @fmls_with_fneg_before_extract_v2f64(<2 x double> %accum, <2 x double> %lhs, <2 x double> %rhs) {
-; CHECK-LABEL: fmls_with_fneg_before_extract_v2f64:
-; CHECK: fmls.2d v0, v1, v2[1]
-  %rhs_neg = fsub <2 x double> <double -0.0, double -0.0>, %rhs
-  %splat = shufflevector <2 x double> %rhs_neg, <2 x double> undef, <2 x i32> <i32 1, i32 1>
-  %res = call <2 x double> @llvm.fma.v2f64(<2 x double> %lhs, <2 x double> %splat, <2 x double> %accum)
-  ret <2 x double> %res
-}
-
-define <1 x double> @test_fmul_v1f64(<1 x double> %L, <1 x double> %R) nounwind {
-; CHECK-LABEL: test_fmul_v1f64:
-; CHECK: fmul
-  %prod = fmul <1 x double> %L, %R
-  ret <1 x double> %prod
-}
-
-define <1 x double> @test_fdiv_v1f64(<1 x double> %L, <1 x double> %R) nounwind {
-; CHECK-LABEL: test_fdiv_v1f64:
-; CHECK-LABEL: fdiv
-  %prod = fdiv <1 x double> %L, %R
-  ret <1 x double> %prod
-}
-
-define i64 @sqdmlal_d(i32 %A, i32 %B, i64 %C) nounwind {
-;CHECK-LABEL: sqdmlal_d:
-;CHECK: sqdmlal
-  %tmp4 = call i64 @llvm.arm64.neon.sqdmulls.scalar(i32 %A, i32 %B)
-  %tmp5 = call i64 @llvm.arm64.neon.sqadd.i64(i64 %C, i64 %tmp4)
-  ret i64 %tmp5
-}
-
-define i64 @sqdmlsl_d(i32 %A, i32 %B, i64 %C) nounwind {
-;CHECK-LABEL: sqdmlsl_d:
-;CHECK: sqdmlsl
-  %tmp4 = call i64 @llvm.arm64.neon.sqdmulls.scalar(i32 %A, i32 %B)
-  %tmp5 = call i64 @llvm.arm64.neon.sqsub.i64(i64 %C, i64 %tmp4)
-  ret i64 %tmp5
-}
-
-define <16 x i8> @test_pmull_64(i64 %l, i64 %r) nounwind {
-; CHECK-LABEL: test_pmull_64:
-; CHECK: pmull.1q
-  %val = call <16 x i8> @llvm.arm64.neon.pmull64(i64 %l, i64 %r)
-  ret <16 x i8> %val
-}
-
-define <16 x i8> @test_pmull_high_64(<2 x i64> %l, <2 x i64> %r) nounwind {
-; CHECK-LABEL: test_pmull_high_64:
-; CHECK: pmull2.1q
-  %l_hi = extractelement <2 x i64> %l, i32 1
-  %r_hi = extractelement <2 x i64> %r, i32 1
-  %val = call <16 x i8> @llvm.arm64.neon.pmull64(i64 %l_hi, i64 %r_hi)
-  ret <16 x i8> %val
-}
-
-declare <16 x i8> @llvm.arm64.neon.pmull64(i64, i64)
diff --git a/test/CodeGen/ARM64/vqadd.ll b/test/CodeGen/ARM64/vqadd.ll
deleted file mode 100644
index 0b7f7e5..0000000
--- a/test/CodeGen/ARM64/vqadd.ll
+++ /dev/null
@@ -1,332 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-define <8 x i8> @sqadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: sqadd8b:
-;CHECK: sqadd.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = call <8 x i8> @llvm.arm64.neon.sqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
-	ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @sqadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: sqadd4h:
-;CHECK: sqadd.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = call <4 x i16> @llvm.arm64.neon.sqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
-	ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @sqadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: sqadd2s:
-;CHECK: sqadd.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.sqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
-	ret <2 x i32> %tmp3
-}
-
-define <8 x i8> @uqadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: uqadd8b:
-;CHECK: uqadd.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = call <8 x i8> @llvm.arm64.neon.uqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
-	ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @uqadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: uqadd4h:
-;CHECK: uqadd.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = call <4 x i16> @llvm.arm64.neon.uqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
-	ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @uqadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: uqadd2s:
-;CHECK: uqadd.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.uqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
-	ret <2 x i32> %tmp3
-}
-
-define <16 x i8> @sqadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: sqadd16b:
-;CHECK: sqadd.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
-	%tmp3 = call <16 x i8> @llvm.arm64.neon.sqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
-	ret <16 x i8> %tmp3
-}
-
-define <8 x i16> @sqadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: sqadd8h:
-;CHECK: sqadd.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call <8 x i16> @llvm.arm64.neon.sqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
-	ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @sqadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: sqadd4s:
-;CHECK: sqadd.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
-	ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @sqadd2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK-LABEL: sqadd2d:
-;CHECK: sqadd.2d
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
-	%tmp3 = call <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
-	ret <2 x i64> %tmp3
-}
-
-define <16 x i8> @uqadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: uqadd16b:
-;CHECK: uqadd.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
-	%tmp3 = call <16 x i8> @llvm.arm64.neon.uqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
-	ret <16 x i8> %tmp3
-}
-
-define <8 x i16> @uqadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: uqadd8h:
-;CHECK: uqadd.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call <8 x i16> @llvm.arm64.neon.uqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
-	ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @uqadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: uqadd4s:
-;CHECK: uqadd.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.uqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
-	ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @uqadd2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK-LABEL: uqadd2d:
-;CHECK: uqadd.2d
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
-	%tmp3 = call <2 x i64> @llvm.arm64.neon.uqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
-	ret <2 x i64> %tmp3
-}
-
-declare <8 x i8>  @llvm.arm64.neon.sqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.sqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.sqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <1 x i64> @llvm.arm64.neon.sqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
-
-declare <8 x i8>  @llvm.arm64.neon.uqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.uqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.uqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <1 x i64> @llvm.arm64.neon.uqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
-
-declare <16 x i8> @llvm.arm64.neon.sqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.sqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
-
-declare <16 x i8> @llvm.arm64.neon.uqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.uqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.uqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.uqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
-
-define <8 x i8> @usqadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: usqadd8b:
-;CHECK: usqadd.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = call <8 x i8> @llvm.arm64.neon.usqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
-	ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @usqadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: usqadd4h:
-;CHECK: usqadd.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = call <4 x i16> @llvm.arm64.neon.usqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
-	ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @usqadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: usqadd2s:
-;CHECK: usqadd.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.usqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
-	ret <2 x i32> %tmp3
-}
-
-define <16 x i8> @usqadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: usqadd16b:
-;CHECK: usqadd.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
-	%tmp3 = call <16 x i8> @llvm.arm64.neon.usqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
-	ret <16 x i8> %tmp3
-}
-
-define <8 x i16> @usqadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: usqadd8h:
-;CHECK: usqadd.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call <8 x i16> @llvm.arm64.neon.usqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
-	ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @usqadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: usqadd4s:
-;CHECK: usqadd.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.usqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
-	ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @usqadd2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK-LABEL: usqadd2d:
-;CHECK: usqadd.2d
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
-	%tmp3 = call <2 x i64> @llvm.arm64.neon.usqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
-	ret <2 x i64> %tmp3
-}
-
-define i64 @usqadd_d(i64 %l, i64 %r) nounwind {
-; CHECK-LABEL: usqadd_d:
-; CHECK: usqadd {{d[0-9]+}}, {{d[0-9]+}}
-  %sum = call i64 @llvm.arm64.neon.usqadd.i64(i64 %l, i64 %r)
-  ret i64 %sum
-}
-
-define i32 @usqadd_s(i32 %l, i32 %r) nounwind {
-; CHECK-LABEL: usqadd_s:
-; CHECK: usqadd {{s[0-9]+}}, {{s[0-9]+}}
-  %sum = call i32 @llvm.arm64.neon.usqadd.i32(i32 %l, i32 %r)
-  ret i32 %sum
-}
-
-declare <8 x i8>  @llvm.arm64.neon.usqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.usqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.usqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <1 x i64> @llvm.arm64.neon.usqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
-declare i64 @llvm.arm64.neon.usqadd.i64(i64, i64) nounwind readnone
-declare i32 @llvm.arm64.neon.usqadd.i32(i32, i32) nounwind readnone
-
-declare <16 x i8> @llvm.arm64.neon.usqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.usqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.usqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.usqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
-
-define <8 x i8> @suqadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: suqadd8b:
-;CHECK: suqadd.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = call <8 x i8> @llvm.arm64.neon.suqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
-	ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @suqadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: suqadd4h:
-;CHECK: suqadd.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = call <4 x i16> @llvm.arm64.neon.suqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
-	ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @suqadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: suqadd2s:
-;CHECK: suqadd.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.suqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
-	ret <2 x i32> %tmp3
-}
-
-define <16 x i8> @suqadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: suqadd16b:
-;CHECK: suqadd.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
-	%tmp3 = call <16 x i8> @llvm.arm64.neon.suqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
-	ret <16 x i8> %tmp3
-}
-
-define <8 x i16> @suqadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: suqadd8h:
-;CHECK: suqadd.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call <8 x i16> @llvm.arm64.neon.suqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
-	ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @suqadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: suqadd4s:
-;CHECK: suqadd.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.suqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
-	ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @suqadd2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK-LABEL: suqadd2d:
-;CHECK: suqadd.2d
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
-	%tmp3 = call <2 x i64> @llvm.arm64.neon.suqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
-	ret <2 x i64> %tmp3
-}
-
-define <1 x i64> @suqadd_1d(<1 x i64> %l, <1 x i64> %r) nounwind {
-; CHECK-LABEL: suqadd_1d:
-; CHECK: suqadd {{d[0-9]+}}, {{d[0-9]+}}
-  %sum = call <1 x i64> @llvm.arm64.neon.suqadd.v1i64(<1 x i64> %l, <1 x i64> %r)
-  ret <1 x i64> %sum
-}
-
-define i64 @suqadd_d(i64 %l, i64 %r) nounwind {
-; CHECK-LABEL: suqadd_d:
-; CHECK: suqadd {{d[0-9]+}}, {{d[0-9]+}}
-  %sum = call i64 @llvm.arm64.neon.suqadd.i64(i64 %l, i64 %r)
-  ret i64 %sum
-}
-
-define i32 @suqadd_s(i32 %l, i32 %r) nounwind {
-; CHECK-LABEL: suqadd_s:
-; CHECK: suqadd {{s[0-9]+}}, {{s[0-9]+}}
-  %sum = call i32 @llvm.arm64.neon.suqadd.i32(i32 %l, i32 %r)
-  ret i32 %sum
-}
-
-declare <8 x i8>  @llvm.arm64.neon.suqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.suqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.suqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <1 x i64> @llvm.arm64.neon.suqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
-declare i64 @llvm.arm64.neon.suqadd.i64(i64, i64) nounwind readnone
-declare i32 @llvm.arm64.neon.suqadd.i32(i32, i32) nounwind readnone
-
-declare <16 x i8> @llvm.arm64.neon.suqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.suqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.suqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.suqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
diff --git a/test/CodeGen/ARM64/vqsub.ll b/test/CodeGen/ARM64/vqsub.ll
deleted file mode 100644
index 0afeb68..0000000
--- a/test/CodeGen/ARM64/vqsub.ll
+++ /dev/null
@@ -1,147 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-define <8 x i8> @sqsub8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: sqsub8b:
-;CHECK: sqsub.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = call <8 x i8> @llvm.arm64.neon.sqsub.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
-	ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @sqsub4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: sqsub4h:
-;CHECK: sqsub.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = call <4 x i16> @llvm.arm64.neon.sqsub.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
-	ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @sqsub2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: sqsub2s:
-;CHECK: sqsub.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.sqsub.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
-	ret <2 x i32> %tmp3
-}
-
-define <8 x i8> @uqsub8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: uqsub8b:
-;CHECK: uqsub.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = call <8 x i8> @llvm.arm64.neon.uqsub.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
-	ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @uqsub4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: uqsub4h:
-;CHECK: uqsub.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = call <4 x i16> @llvm.arm64.neon.uqsub.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
-	ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @uqsub2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: uqsub2s:
-;CHECK: uqsub.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.uqsub.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
-	ret <2 x i32> %tmp3
-}
-
-define <16 x i8> @sqsub16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: sqsub16b:
-;CHECK: sqsub.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
-	%tmp3 = call <16 x i8> @llvm.arm64.neon.sqsub.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
-	ret <16 x i8> %tmp3
-}
-
-define <8 x i16> @sqsub8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: sqsub8h:
-;CHECK: sqsub.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call <8 x i16> @llvm.arm64.neon.sqsub.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
-	ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @sqsub4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: sqsub4s:
-;CHECK: sqsub.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
-	ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @sqsub2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK-LABEL: sqsub2d:
-;CHECK: sqsub.2d
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
-	%tmp3 = call <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
-	ret <2 x i64> %tmp3
-}
-
-define <16 x i8> @uqsub16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: uqsub16b:
-;CHECK: uqsub.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
-	%tmp3 = call <16 x i8> @llvm.arm64.neon.uqsub.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
-	ret <16 x i8> %tmp3
-}
-
-define <8 x i16> @uqsub8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: uqsub8h:
-;CHECK: uqsub.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call <8 x i16> @llvm.arm64.neon.uqsub.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
-	ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @uqsub4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: uqsub4s:
-;CHECK: uqsub.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.uqsub.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
-	ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @uqsub2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK-LABEL: uqsub2d:
-;CHECK: uqsub.2d
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
-	%tmp3 = call <2 x i64> @llvm.arm64.neon.uqsub.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
-	ret <2 x i64> %tmp3
-}
-
-declare <8 x i8>  @llvm.arm64.neon.sqsub.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.sqsub.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.sqsub.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <1 x i64> @llvm.arm64.neon.sqsub.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
-
-declare <8 x i8>  @llvm.arm64.neon.uqsub.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.uqsub.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.uqsub.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <1 x i64> @llvm.arm64.neon.uqsub.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
-
-declare <16 x i8> @llvm.arm64.neon.sqsub.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.sqsub.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
-
-declare <16 x i8> @llvm.arm64.neon.uqsub.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.uqsub.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.uqsub.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.uqsub.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
diff --git a/test/CodeGen/ARM64/vselect.ll b/test/CodeGen/ARM64/vselect.ll
deleted file mode 100644
index 07274a0..0000000
--- a/test/CodeGen/ARM64/vselect.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-;CHECK: @func63
-;CHECK: cmeq.4h v0, v0, v1
-;CHECK: sshll.4s  v0, v0, #0
-;CHECK: bsl.16b v0, v2, v3
-;CHECK: str  q0, [x0]
-;CHECK: ret
-
-%T0_63 = type <4 x i16>
-%T1_63 = type <4 x i32>
-%T2_63 = type <4 x i1>
-define void @func63(%T1_63* %out, %T0_63 %v0, %T0_63 %v1, %T1_63 %v2, %T1_63 %v3) {
-  %cond = icmp eq %T0_63 %v0, %v1
-  %r = select %T2_63 %cond, %T1_63 %v2, %T1_63 %v3
-  store %T1_63 %r, %T1_63* %out
-  ret void
-}
diff --git a/test/CodeGen/ARM64/vsetcc_fp.ll b/test/CodeGen/ARM64/vsetcc_fp.ll
deleted file mode 100644
index c93aad5..0000000
--- a/test/CodeGen/ARM64/vsetcc_fp.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -asm-verbose=false | FileCheck %s
-define <2 x i32> @fcmp_one(<2 x float> %x, <2 x float> %y) nounwind optsize readnone {
-; CHECK-LABEL: fcmp_one:
-; CHECK-NEXT: fcmgt.2s [[REG:v[0-9]+]], v0, v1
-; CHECK-NEXT: fcmgt.2s [[REG2:v[0-9]+]], v1, v0
-; CHECK-NEXT: orr.8b v0, [[REG2]], [[REG]]
-; CHECK-NEXT: ret
-  %tmp = fcmp one <2 x float> %x, %y
-  %or = sext <2 x i1> %tmp to <2 x i32>
-  ret <2 x i32> %or
-}
diff --git a/test/CodeGen/ARM64/vshift.ll b/test/CodeGen/ARM64/vshift.ll
deleted file mode 100644
index ae5da38..0000000
--- a/test/CodeGen/ARM64/vshift.ll
+++ /dev/null
@@ -1,1909 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -enable-misched=false | FileCheck %s
-
-define <8 x i8> @sqshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: sqshl8b:
-;CHECK: sqshl.8b
-        %tmp1 = load <8 x i8>* %A
-        %tmp2 = load <8 x i8>* %B
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sqshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
-        ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @sqshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: sqshl4h:
-;CHECK: sqshl.4h
-        %tmp1 = load <4 x i16>* %A
-        %tmp2 = load <4 x i16>* %B
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sqshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
-        ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @sqshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: sqshl2s:
-;CHECK: sqshl.2s
-        %tmp1 = load <2 x i32>* %A
-        %tmp2 = load <2 x i32>* %B
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sqshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
-        ret <2 x i32> %tmp3
-}
-
-define <8 x i8> @uqshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: uqshl8b:
-;CHECK: uqshl.8b
-        %tmp1 = load <8 x i8>* %A
-        %tmp2 = load <8 x i8>* %B
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
-        ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @uqshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: uqshl4h:
-;CHECK: uqshl.4h
-        %tmp1 = load <4 x i16>* %A
-        %tmp2 = load <4 x i16>* %B
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.uqshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
-        ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @uqshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: uqshl2s:
-;CHECK: uqshl.2s
-        %tmp1 = load <2 x i32>* %A
-        %tmp2 = load <2 x i32>* %B
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.uqshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
-        ret <2 x i32> %tmp3
-}
-
-define <16 x i8> @sqshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: sqshl16b:
-;CHECK: sqshl.16b
-        %tmp1 = load <16 x i8>* %A
-        %tmp2 = load <16 x i8>* %B
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.sqshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
-        ret <16 x i8> %tmp3
-}
-
-define <8 x i16> @sqshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: sqshl8h:
-;CHECK: sqshl.8h
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i16>* %B
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.sqshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
-        ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @sqshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: sqshl4s:
-;CHECK: sqshl.4s
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i32>* %B
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.sqshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
-        ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @sqshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK-LABEL: sqshl2d:
-;CHECK: sqshl.2d
-        %tmp1 = load <2 x i64>* %A
-        %tmp2 = load <2 x i64>* %B
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.sqshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
-        ret <2 x i64> %tmp3
-}
-
-define <16 x i8> @uqshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: uqshl16b:
-;CHECK: uqshl.16b
-        %tmp1 = load <16 x i8>* %A
-        %tmp2 = load <16 x i8>* %B
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.uqshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
-        ret <16 x i8> %tmp3
-}
-
-define <8 x i16> @uqshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: uqshl8h:
-;CHECK: uqshl.8h
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i16>* %B
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.uqshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
-        ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @uqshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: uqshl4s:
-;CHECK: uqshl.4s
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i32>* %B
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.uqshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
-        ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @uqshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK-LABEL: uqshl2d:
-;CHECK: uqshl.2d
-        %tmp1 = load <2 x i64>* %A
-        %tmp2 = load <2 x i64>* %B
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.uqshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
-        ret <2 x i64> %tmp3
-}
-
-declare <8 x i8>  @llvm.arm64.neon.sqshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.sqshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.sqshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <1 x i64> @llvm.arm64.neon.sqshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
-
-declare <8 x i8>  @llvm.arm64.neon.uqshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.uqshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.uqshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <1 x i64> @llvm.arm64.neon.uqshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
-
-declare <16 x i8> @llvm.arm64.neon.sqshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.sqshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.sqshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.sqshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
-
-declare <16 x i8> @llvm.arm64.neon.uqshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.uqshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.uqshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.uqshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
-
-define <8 x i8> @srshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: srshl8b:
-;CHECK: srshl.8b
-        %tmp1 = load <8 x i8>* %A
-        %tmp2 = load <8 x i8>* %B
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
-        ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @srshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: srshl4h:
-;CHECK: srshl.4h
-        %tmp1 = load <4 x i16>* %A
-        %tmp2 = load <4 x i16>* %B
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
-        ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @srshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: srshl2s:
-;CHECK: srshl.2s
-        %tmp1 = load <2 x i32>* %A
-        %tmp2 = load <2 x i32>* %B
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
-        ret <2 x i32> %tmp3
-}
-
-define <8 x i8> @urshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: urshl8b:
-;CHECK: urshl.8b
-        %tmp1 = load <8 x i8>* %A
-        %tmp2 = load <8 x i8>* %B
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
-        ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @urshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: urshl4h:
-;CHECK: urshl.4h
-        %tmp1 = load <4 x i16>* %A
-        %tmp2 = load <4 x i16>* %B
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
-        ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @urshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: urshl2s:
-;CHECK: urshl.2s
-        %tmp1 = load <2 x i32>* %A
-        %tmp2 = load <2 x i32>* %B
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
-        ret <2 x i32> %tmp3
-}
-
-define <16 x i8> @srshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: srshl16b:
-;CHECK: srshl.16b
-        %tmp1 = load <16 x i8>* %A
-        %tmp2 = load <16 x i8>* %B
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
-        ret <16 x i8> %tmp3
-}
-
-define <8 x i16> @srshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: srshl8h:
-;CHECK: srshl.8h
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i16>* %B
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
-        ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @srshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: srshl4s:
-;CHECK: srshl.4s
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i32>* %B
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
-        ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @srshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK-LABEL: srshl2d:
-;CHECK: srshl.2d
-        %tmp1 = load <2 x i64>* %A
-        %tmp2 = load <2 x i64>* %B
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
-        ret <2 x i64> %tmp3
-}
-
-define <16 x i8> @urshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: urshl16b:
-;CHECK: urshl.16b
-        %tmp1 = load <16 x i8>* %A
-        %tmp2 = load <16 x i8>* %B
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
-        ret <16 x i8> %tmp3
-}
-
-define <8 x i16> @urshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: urshl8h:
-;CHECK: urshl.8h
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i16>* %B
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
-        ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @urshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: urshl4s:
-;CHECK: urshl.4s
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i32>* %B
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
-        ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @urshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK-LABEL: urshl2d:
-;CHECK: urshl.2d
-        %tmp1 = load <2 x i64>* %A
-        %tmp2 = load <2 x i64>* %B
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
-        ret <2 x i64> %tmp3
-}
-
-declare <8 x i8>  @llvm.arm64.neon.srshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.srshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.srshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <1 x i64> @llvm.arm64.neon.srshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
-
-declare <8 x i8>  @llvm.arm64.neon.urshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.urshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.urshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <1 x i64> @llvm.arm64.neon.urshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
-
-declare <16 x i8> @llvm.arm64.neon.srshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.srshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.srshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.srshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
-
-declare <16 x i8> @llvm.arm64.neon.urshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.urshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.urshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.urshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
-
-define <8 x i8> @sqrshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: sqrshl8b:
-;CHECK: sqrshl.8b
-        %tmp1 = load <8 x i8>* %A
-        %tmp2 = load <8 x i8>* %B
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sqrshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
-        ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @sqrshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: sqrshl4h:
-;CHECK: sqrshl.4h
-        %tmp1 = load <4 x i16>* %A
-        %tmp2 = load <4 x i16>* %B
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sqrshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
-        ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @sqrshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: sqrshl2s:
-;CHECK: sqrshl.2s
-        %tmp1 = load <2 x i32>* %A
-        %tmp2 = load <2 x i32>* %B
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sqrshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
-        ret <2 x i32> %tmp3
-}
-
-define <8 x i8> @uqrshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: uqrshl8b:
-;CHECK: uqrshl.8b
-        %tmp1 = load <8 x i8>* %A
-        %tmp2 = load <8 x i8>* %B
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.uqrshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
-        ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @uqrshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: uqrshl4h:
-;CHECK: uqrshl.4h
-        %tmp1 = load <4 x i16>* %A
-        %tmp2 = load <4 x i16>* %B
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.uqrshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
-        ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @uqrshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: uqrshl2s:
-;CHECK: uqrshl.2s
-        %tmp1 = load <2 x i32>* %A
-        %tmp2 = load <2 x i32>* %B
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.uqrshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
-        ret <2 x i32> %tmp3
-}
-
-define <16 x i8> @sqrshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: sqrshl16b:
-;CHECK: sqrshl.16b
-        %tmp1 = load <16 x i8>* %A
-        %tmp2 = load <16 x i8>* %B
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.sqrshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
-        ret <16 x i8> %tmp3
-}
-
-define <8 x i16> @sqrshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: sqrshl8h:
-;CHECK: sqrshl.8h
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i16>* %B
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.sqrshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
-        ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @sqrshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: sqrshl4s:
-;CHECK: sqrshl.4s
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i32>* %B
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.sqrshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
-        ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @sqrshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK-LABEL: sqrshl2d:
-;CHECK: sqrshl.2d
-        %tmp1 = load <2 x i64>* %A
-        %tmp2 = load <2 x i64>* %B
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.sqrshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
-        ret <2 x i64> %tmp3
-}
-
-define <16 x i8> @uqrshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: uqrshl16b:
-;CHECK: uqrshl.16b
-        %tmp1 = load <16 x i8>* %A
-        %tmp2 = load <16 x i8>* %B
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.uqrshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
-        ret <16 x i8> %tmp3
-}
-
-define <8 x i16> @uqrshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: uqrshl8h:
-;CHECK: uqrshl.8h
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i16>* %B
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.uqrshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
-        ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @uqrshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: uqrshl4s:
-;CHECK: uqrshl.4s
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i32>* %B
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.uqrshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
-        ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @uqrshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK-LABEL: uqrshl2d:
-;CHECK: uqrshl.2d
-        %tmp1 = load <2 x i64>* %A
-        %tmp2 = load <2 x i64>* %B
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.uqrshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
-        ret <2 x i64> %tmp3
-}
-
-declare <8 x i8>  @llvm.arm64.neon.sqrshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.sqrshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.sqrshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <1 x i64> @llvm.arm64.neon.sqrshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
-
-declare <8 x i8>  @llvm.arm64.neon.uqrshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.uqrshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.uqrshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <1 x i64> @llvm.arm64.neon.uqrshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
-
-declare <16 x i8> @llvm.arm64.neon.sqrshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.sqrshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.sqrshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.sqrshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
-
-declare <16 x i8> @llvm.arm64.neon.uqrshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.uqrshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.uqrshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.uqrshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
-
-define <8 x i8> @urshr8b(<8 x i8>* %A) nounwind {
-;CHECK-LABEL: urshr8b:
-;CHECK: urshr.8b
-        %tmp1 = load <8 x i8>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
-        ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @urshr4h(<4 x i16>* %A) nounwind {
-;CHECK-LABEL: urshr4h:
-;CHECK: urshr.4h
-        %tmp1 = load <4 x i16>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
-        ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @urshr2s(<2 x i32>* %A) nounwind {
-;CHECK-LABEL: urshr2s:
-;CHECK: urshr.2s
-        %tmp1 = load <2 x i32>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
-        ret <2 x i32> %tmp3
-}
-
-define <16 x i8> @urshr16b(<16 x i8>* %A) nounwind {
-;CHECK-LABEL: urshr16b:
-;CHECK: urshr.16b
-        %tmp1 = load <16 x i8>* %A
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
-        ret <16 x i8> %tmp3
-}
-
-define <8 x i16> @urshr8h(<8 x i16>* %A) nounwind {
-;CHECK-LABEL: urshr8h:
-;CHECK: urshr.8h
-        %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
-        ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @urshr4s(<4 x i32>* %A) nounwind {
-;CHECK-LABEL: urshr4s:
-;CHECK: urshr.4s
-        %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
-        ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @urshr2d(<2 x i64>* %A) nounwind {
-;CHECK-LABEL: urshr2d:
-;CHECK: urshr.2d
-        %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
-        ret <2 x i64> %tmp3
-}
-
-define <8 x i8> @srshr8b(<8 x i8>* %A) nounwind {
-;CHECK-LABEL: srshr8b:
-;CHECK: srshr.8b
-        %tmp1 = load <8 x i8>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
-        ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @srshr4h(<4 x i16>* %A) nounwind {
-;CHECK-LABEL: srshr4h:
-;CHECK: srshr.4h
-        %tmp1 = load <4 x i16>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
-        ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @srshr2s(<2 x i32>* %A) nounwind {
-;CHECK-LABEL: srshr2s:
-;CHECK: srshr.2s
-        %tmp1 = load <2 x i32>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
-        ret <2 x i32> %tmp3
-}
-
-define <16 x i8> @srshr16b(<16 x i8>* %A) nounwind {
-;CHECK-LABEL: srshr16b:
-;CHECK: srshr.16b
-        %tmp1 = load <16 x i8>* %A
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
-        ret <16 x i8> %tmp3
-}
-
-define <8 x i16> @srshr8h(<8 x i16>* %A) nounwind {
-;CHECK-LABEL: srshr8h:
-;CHECK: srshr.8h
-        %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
-        ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @srshr4s(<4 x i32>* %A) nounwind {
-;CHECK-LABEL: srshr4s:
-;CHECK: srshr.4s
-        %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
-        ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @srshr2d(<2 x i64>* %A) nounwind {
-;CHECK-LABEL: srshr2d:
-;CHECK: srshr.2d
-        %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
-        ret <2 x i64> %tmp3
-}
-
-define <8 x i8> @sqshlu8b(<8 x i8>* %A) nounwind {
-;CHECK-LABEL: sqshlu8b:
-;CHECK: sqshlu.8b v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i8>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sqshlu.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
-        ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @sqshlu4h(<4 x i16>* %A) nounwind {
-;CHECK-LABEL: sqshlu4h:
-;CHECK: sqshlu.4h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i16>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sqshlu.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
-        ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @sqshlu2s(<2 x i32>* %A) nounwind {
-;CHECK-LABEL: sqshlu2s:
-;CHECK: sqshlu.2s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i32>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sqshlu.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
-        ret <2 x i32> %tmp3
-}
-
-define <16 x i8> @sqshlu16b(<16 x i8>* %A) nounwind {
-;CHECK-LABEL: sqshlu16b:
-;CHECK: sqshlu.16b v0, {{v[0-9]+}}, #1
-        %tmp1 = load <16 x i8>* %A
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.sqshlu.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
-        ret <16 x i8> %tmp3
-}
-
-define <8 x i16> @sqshlu8h(<8 x i16>* %A) nounwind {
-;CHECK-LABEL: sqshlu8h:
-;CHECK: sqshlu.8h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.sqshlu.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
-        ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @sqshlu4s(<4 x i32>* %A) nounwind {
-;CHECK-LABEL: sqshlu4s:
-;CHECK: sqshlu.4s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.sqshlu.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
-        ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @sqshlu2d(<2 x i64>* %A) nounwind {
-;CHECK-LABEL: sqshlu2d:
-;CHECK: sqshlu.2d v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.sqshlu.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
-        ret <2 x i64> %tmp3
-}
-
-declare <8 x i8>  @llvm.arm64.neon.sqshlu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.sqshlu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.sqshlu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <1 x i64> @llvm.arm64.neon.sqshlu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
-
-declare <16 x i8> @llvm.arm64.neon.sqshlu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.sqshlu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.sqshlu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.sqshlu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
-
-define <8 x i8> @rshrn8b(<8 x i16>* %A) nounwind {
-;CHECK-LABEL: rshrn8b:
-;CHECK: rshrn.8b v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.rshrn.v8i8(<8 x i16> %tmp1, i32 1)
-        ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @rshrn4h(<4 x i32>* %A) nounwind {
-;CHECK-LABEL: rshrn4h:
-;CHECK: rshrn.4h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.rshrn.v4i16(<4 x i32> %tmp1, i32 1)
-        ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @rshrn2s(<2 x i64>* %A) nounwind {
-;CHECK-LABEL: rshrn2s:
-;CHECK: rshrn.2s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.rshrn.v2i32(<2 x i64> %tmp1, i32 1)
-        ret <2 x i32> %tmp3
-}
-
-define <16 x i8> @rshrn16b(<8 x i8> *%ret, <8 x i16>* %A) nounwind {
-;CHECK-LABEL: rshrn16b:
-;CHECK: rshrn2.16b v0, {{v[0-9]+}}, #1
-        %out = load <8 x i8>* %ret
-        %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.rshrn.v8i8(<8 x i16> %tmp1, i32 1)
-        %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-        ret <16 x i8> %tmp4
-}
-
-define <8 x i16> @rshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
-;CHECK-LABEL: rshrn8h:
-;CHECK: rshrn2.8h v0, {{v[0-9]+}}, #1
-        %out = load <4 x i16>* %ret
-        %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.rshrn.v4i16(<4 x i32> %tmp1, i32 1)
-        %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-        ret <8 x i16> %tmp4
-}
-
-define <4 x i32> @rshrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
-;CHECK-LABEL: rshrn4s:
-;CHECK: rshrn2.4s v0, {{v[0-9]+}}, #1
-        %out = load <2 x i32>* %ret
-        %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.rshrn.v2i32(<2 x i64> %tmp1, i32 1)
-        %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-        ret <4 x i32> %tmp4
-}
-
-declare <8 x i8>  @llvm.arm64.neon.rshrn.v8i8(<8 x i16>, i32) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.rshrn.v4i16(<4 x i32>, i32) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.rshrn.v2i32(<2 x i64>, i32) nounwind readnone
-
-define <8 x i8> @shrn8b(<8 x i16>* %A) nounwind {
-;CHECK-LABEL: shrn8b:
-;CHECK: shrn.8b v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
-        %tmp3 = trunc <8 x i16> %tmp2 to <8 x i8>
-        ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @shrn4h(<4 x i32>* %A) nounwind {
-;CHECK-LABEL: shrn4h:
-;CHECK: shrn.4h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
-        %tmp3 = trunc <4 x i32> %tmp2 to <4 x i16>
-        ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @shrn2s(<2 x i64>* %A) nounwind {
-;CHECK-LABEL: shrn2s:
-;CHECK: shrn.2s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i64>* %A
-        %tmp2 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
-        %tmp3 = trunc <2 x i64> %tmp2 to <2 x i32>
-        ret <2 x i32> %tmp3
-}
-
-define <16 x i8> @shrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
-;CHECK-LABEL: shrn16b:
-;CHECK: shrn2.16b v0, {{v[0-9]+}}, #1
-        %out = load <8 x i8>* %ret
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
-        %tmp3 = trunc <8 x i16> %tmp2 to <8 x i8>
-        %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-        ret <16 x i8> %tmp4
-}
-
-define <8 x i16> @shrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
-;CHECK-LABEL: shrn8h:
-;CHECK: shrn2.8h v0, {{v[0-9]+}}, #1
-        %out = load <4 x i16>* %ret
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
-        %tmp3 = trunc <4 x i32> %tmp2 to <4 x i16>
-        %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-        ret <8 x i16> %tmp4
-}
-
-define <4 x i32> @shrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
-;CHECK-LABEL: shrn4s:
-;CHECK: shrn2.4s v0, {{v[0-9]+}}, #1
-        %out = load <2 x i32>* %ret
-        %tmp1 = load <2 x i64>* %A
-        %tmp2 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
-        %tmp3 = trunc <2 x i64> %tmp2 to <2 x i32>
-        %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-        ret <4 x i32> %tmp4
-}
-
-declare <8 x i8>  @llvm.arm64.neon.shrn.v8i8(<8 x i16>, i32) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.shrn.v4i16(<4 x i32>, i32) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.shrn.v2i32(<2 x i64>, i32) nounwind readnone
-
-define i32 @sqshrn1s(i64 %A) nounwind {
-; CHECK-LABEL: sqshrn1s:
-; CHECK: sqshrn {{s[0-9]+}}, d0, #1
-  %tmp = call i32 @llvm.arm64.neon.sqshrn.i32(i64 %A, i32 1)
-  ret i32 %tmp
-}
-
-define <8 x i8> @sqshrn8b(<8 x i16>* %A) nounwind {
-;CHECK-LABEL: sqshrn8b:
-;CHECK: sqshrn.8b v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sqshrn.v8i8(<8 x i16> %tmp1, i32 1)
-        ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @sqshrn4h(<4 x i32>* %A) nounwind {
-;CHECK-LABEL: sqshrn4h:
-;CHECK: sqshrn.4h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sqshrn.v4i16(<4 x i32> %tmp1, i32 1)
-        ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @sqshrn2s(<2 x i64>* %A) nounwind {
-;CHECK-LABEL: sqshrn2s:
-;CHECK: sqshrn.2s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sqshrn.v2i32(<2 x i64> %tmp1, i32 1)
-        ret <2 x i32> %tmp3
-}
-
-
-define <16 x i8> @sqshrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
-;CHECK-LABEL: sqshrn16b:
-;CHECK: sqshrn2.16b v0, {{v[0-9]+}}, #1
-        %out = load <8 x i8>* %ret
-        %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sqshrn.v8i8(<8 x i16> %tmp1, i32 1)
-        %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-        ret <16 x i8> %tmp4
-}
-
-define <8 x i16> @sqshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
-;CHECK-LABEL: sqshrn8h:
-;CHECK: sqshrn2.8h v0, {{v[0-9]+}}, #1
-        %out = load <4 x i16>* %ret
-        %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sqshrn.v4i16(<4 x i32> %tmp1, i32 1)
-        %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-        ret <8 x i16> %tmp4
-}
-
-define <4 x i32> @sqshrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
-;CHECK-LABEL: sqshrn4s:
-;CHECK: sqshrn2.4s v0, {{v[0-9]+}}, #1
-        %out = load <2 x i32>* %ret
-        %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sqshrn.v2i32(<2 x i64> %tmp1, i32 1)
-        %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-        ret <4 x i32> %tmp4
-}
-
-declare i32  @llvm.arm64.neon.sqshrn.i32(i64, i32) nounwind readnone
-declare <8 x i8>  @llvm.arm64.neon.sqshrn.v8i8(<8 x i16>, i32) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.sqshrn.v4i16(<4 x i32>, i32) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.sqshrn.v2i32(<2 x i64>, i32) nounwind readnone
-
-define i32 @sqshrun1s(i64 %A) nounwind {
-; CHECK-LABEL: sqshrun1s:
-; CHECK: sqshrun {{s[0-9]+}}, d0, #1
-  %tmp = call i32 @llvm.arm64.neon.sqshrun.i32(i64 %A, i32 1)
-  ret i32 %tmp
-}
-
-define <8 x i8> @sqshrun8b(<8 x i16>* %A) nounwind {
-;CHECK-LABEL: sqshrun8b:
-;CHECK: sqshrun.8b v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sqshrun.v8i8(<8 x i16> %tmp1, i32 1)
-        ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @sqshrun4h(<4 x i32>* %A) nounwind {
-;CHECK-LABEL: sqshrun4h:
-;CHECK: sqshrun.4h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sqshrun.v4i16(<4 x i32> %tmp1, i32 1)
-        ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @sqshrun2s(<2 x i64>* %A) nounwind {
-;CHECK-LABEL: sqshrun2s:
-;CHECK: sqshrun.2s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sqshrun.v2i32(<2 x i64> %tmp1, i32 1)
-        ret <2 x i32> %tmp3
-}
-
-define <16 x i8> @sqshrun16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
-;CHECK-LABEL: sqshrun16b:
-;CHECK: sqshrun2.16b v0, {{v[0-9]+}}, #1
-        %out = load <8 x i8>* %ret
-        %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sqshrun.v8i8(<8 x i16> %tmp1, i32 1)
-        %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-        ret <16 x i8> %tmp4
-}
-
-define <8 x i16> @sqshrun8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
-;CHECK-LABEL: sqshrun8h:
-;CHECK: sqshrun2.8h v0, {{v[0-9]+}}, #1
-        %out = load <4 x i16>* %ret
-        %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sqshrun.v4i16(<4 x i32> %tmp1, i32 1)
-        %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-        ret <8 x i16> %tmp4
-}
-
-define <4 x i32> @sqshrun4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
-;CHECK-LABEL: sqshrun4s:
-;CHECK: sqshrun2.4s v0, {{v[0-9]+}}, #1
-        %out = load <2 x i32>* %ret
-        %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sqshrun.v2i32(<2 x i64> %tmp1, i32 1)
-        %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-        ret <4 x i32> %tmp4
-}
-
-declare i32  @llvm.arm64.neon.sqshrun.i32(i64, i32) nounwind readnone
-declare <8 x i8>  @llvm.arm64.neon.sqshrun.v8i8(<8 x i16>, i32) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.sqshrun.v4i16(<4 x i32>, i32) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.sqshrun.v2i32(<2 x i64>, i32) nounwind readnone
-
-define i32 @sqrshrn1s(i64 %A) nounwind {
-; CHECK-LABEL: sqrshrn1s:
-; CHECK: sqrshrn {{s[0-9]+}}, d0, #1
-  %tmp = call i32 @llvm.arm64.neon.sqrshrn.i32(i64 %A, i32 1)
-  ret i32 %tmp
-}
-
-define <8 x i8> @sqrshrn8b(<8 x i16>* %A) nounwind {
-;CHECK-LABEL: sqrshrn8b:
-;CHECK: sqrshrn.8b v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
-        ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @sqrshrn4h(<4 x i32>* %A) nounwind {
-;CHECK-LABEL: sqrshrn4h:
-;CHECK: sqrshrn.4h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
-        ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @sqrshrn2s(<2 x i64>* %A) nounwind {
-;CHECK-LABEL: sqrshrn2s:
-;CHECK: sqrshrn.2s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
-        ret <2 x i32> %tmp3
-}
-
-define <16 x i8> @sqrshrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
-;CHECK-LABEL: sqrshrn16b:
-;CHECK: sqrshrn2.16b v0, {{v[0-9]+}}, #1
-        %out = load <8 x i8>* %ret
-        %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
-        %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-        ret <16 x i8> %tmp4
-}
-
-define <8 x i16> @sqrshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
-;CHECK-LABEL: sqrshrn8h:
-;CHECK: sqrshrn2.8h v0, {{v[0-9]+}}, #1
-        %out = load <4 x i16>* %ret
-        %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
-        %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-        ret <8 x i16> %tmp4
-}
-
-define <4 x i32> @sqrshrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
-;CHECK-LABEL: sqrshrn4s:
-;CHECK: sqrshrn2.4s v0, {{v[0-9]+}}, #1
-        %out = load <2 x i32>* %ret
-        %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
-        %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-        ret <4 x i32> %tmp4
-}
-
-declare i32  @llvm.arm64.neon.sqrshrn.i32(i64, i32) nounwind readnone
-declare <8 x i8>  @llvm.arm64.neon.sqrshrn.v8i8(<8 x i16>, i32) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.sqrshrn.v4i16(<4 x i32>, i32) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.sqrshrn.v2i32(<2 x i64>, i32) nounwind readnone
-
-define i32 @sqrshrun1s(i64 %A) nounwind {
-; CHECK-LABEL: sqrshrun1s:
-; CHECK: sqrshrun {{s[0-9]+}}, d0, #1
-  %tmp = call i32 @llvm.arm64.neon.sqrshrun.i32(i64 %A, i32 1)
-  ret i32 %tmp
-}
-
-define <8 x i8> @sqrshrun8b(<8 x i16>* %A) nounwind {
-;CHECK-LABEL: sqrshrun8b:
-;CHECK: sqrshrun.8b v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sqrshrun.v8i8(<8 x i16> %tmp1, i32 1)
-        ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @sqrshrun4h(<4 x i32>* %A) nounwind {
-;CHECK-LABEL: sqrshrun4h:
-;CHECK: sqrshrun.4h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sqrshrun.v4i16(<4 x i32> %tmp1, i32 1)
-        ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @sqrshrun2s(<2 x i64>* %A) nounwind {
-;CHECK-LABEL: sqrshrun2s:
-;CHECK: sqrshrun.2s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sqrshrun.v2i32(<2 x i64> %tmp1, i32 1)
-        ret <2 x i32> %tmp3
-}
-
-define <16 x i8> @sqrshrun16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
-;CHECK-LABEL: sqrshrun16b:
-;CHECK: sqrshrun2.16b v0, {{v[0-9]+}}, #1
-        %out = load <8 x i8>* %ret
-        %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sqrshrun.v8i8(<8 x i16> %tmp1, i32 1)
-        %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-        ret <16 x i8> %tmp4
-}
-
-define <8 x i16> @sqrshrun8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
-;CHECK-LABEL: sqrshrun8h:
-;CHECK: sqrshrun2.8h v0, {{v[0-9]+}}, #1
-        %out = load <4 x i16>* %ret
-        %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sqrshrun.v4i16(<4 x i32> %tmp1, i32 1)
-        %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-        ret <8 x i16> %tmp4
-}
-
-define <4 x i32> @sqrshrun4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
-;CHECK-LABEL: sqrshrun4s:
-;CHECK: sqrshrun2.4s v0, {{v[0-9]+}}, #1
-        %out = load <2 x i32>* %ret
-        %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sqrshrun.v2i32(<2 x i64> %tmp1, i32 1)
-        %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-        ret <4 x i32> %tmp4
-}
-
-declare i32  @llvm.arm64.neon.sqrshrun.i32(i64, i32) nounwind readnone
-declare <8 x i8>  @llvm.arm64.neon.sqrshrun.v8i8(<8 x i16>, i32) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.sqrshrun.v4i16(<4 x i32>, i32) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.sqrshrun.v2i32(<2 x i64>, i32) nounwind readnone
-
-define i32 @uqrshrn1s(i64 %A) nounwind {
-; CHECK-LABEL: uqrshrn1s:
-; CHECK: uqrshrn {{s[0-9]+}}, d0, #1
-  %tmp = call i32 @llvm.arm64.neon.uqrshrn.i32(i64 %A, i32 1)
-  ret i32 %tmp
-}
-
-define <8 x i8> @uqrshrn8b(<8 x i16>* %A) nounwind {
-;CHECK-LABEL: uqrshrn8b:
-;CHECK: uqrshrn.8b v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.uqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
-        ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @uqrshrn4h(<4 x i32>* %A) nounwind {
-;CHECK-LABEL: uqrshrn4h:
-;CHECK: uqrshrn.4h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.uqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
-        ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @uqrshrn2s(<2 x i64>* %A) nounwind {
-;CHECK-LABEL: uqrshrn2s:
-;CHECK: uqrshrn.2s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.uqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
-        ret <2 x i32> %tmp3
-}
-
-define <16 x i8> @uqrshrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
-;CHECK-LABEL: uqrshrn16b:
-;CHECK: uqrshrn2.16b v0, {{v[0-9]+}}, #1
-        %out = load <8 x i8>* %ret
-        %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.uqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
-        %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-        ret <16 x i8> %tmp4
-}
-
-define <8 x i16> @uqrshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
-;CHECK-LABEL: uqrshrn8h:
-;CHECK: uqrshrn2.8h v0, {{v[0-9]+}}, #1
-        %out = load <4 x i16>* %ret
-        %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.uqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
-        %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-        ret <8 x i16> %tmp4
-}
-
-define <4 x i32> @uqrshrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
-;CHECK-LABEL: uqrshrn4s:
-;CHECK: uqrshrn2.4s v0, {{v[0-9]+}}, #1
-        %out = load <2 x i32>* %ret
-        %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.uqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
-        %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-        ret <4 x i32> %tmp4
-}
-
-declare i32  @llvm.arm64.neon.uqrshrn.i32(i64, i32) nounwind readnone
-declare <8 x i8>  @llvm.arm64.neon.uqrshrn.v8i8(<8 x i16>, i32) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.uqrshrn.v4i16(<4 x i32>, i32) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.uqrshrn.v2i32(<2 x i64>, i32) nounwind readnone
-
-define i32 @uqshrn1s(i64 %A) nounwind {
-; CHECK-LABEL: uqshrn1s:
-; CHECK: uqshrn {{s[0-9]+}}, d0, #1
-  %tmp = call i32 @llvm.arm64.neon.uqshrn.i32(i64 %A, i32 1)
-  ret i32 %tmp
-}
-
-define <8 x i8> @uqshrn8b(<8 x i16>* %A) nounwind {
-;CHECK-LABEL: uqshrn8b:
-;CHECK: uqshrn.8b v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.uqshrn.v8i8(<8 x i16> %tmp1, i32 1)
-        ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @uqshrn4h(<4 x i32>* %A) nounwind {
-;CHECK-LABEL: uqshrn4h:
-;CHECK: uqshrn.4h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.uqshrn.v4i16(<4 x i32> %tmp1, i32 1)
-        ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @uqshrn2s(<2 x i64>* %A) nounwind {
-;CHECK-LABEL: uqshrn2s:
-;CHECK: uqshrn.2s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.uqshrn.v2i32(<2 x i64> %tmp1, i32 1)
-        ret <2 x i32> %tmp3
-}
-
-define <16 x i8> @uqshrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
-;CHECK-LABEL: uqshrn16b:
-;CHECK: uqshrn2.16b v0, {{v[0-9]+}}, #1
-        %out = load <8 x i8>* %ret
-        %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.uqshrn.v8i8(<8 x i16> %tmp1, i32 1)
-        %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-        ret <16 x i8> %tmp4
-}
-
-define <8 x i16> @uqshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
-;CHECK-LABEL: uqshrn8h:
-;CHECK: uqshrn2.8h v0, {{v[0-9]+}}, #1
-  %out = load <4 x i16>* %ret
-  %tmp1 = load <4 x i32>* %A
-  %tmp3 = call <4 x i16> @llvm.arm64.neon.uqshrn.v4i16(<4 x i32> %tmp1, i32 1)
-  %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  ret <8 x i16> %tmp4
-}
-
-define <4 x i32> @uqshrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
-;CHECK-LABEL: uqshrn4s:
-;CHECK: uqshrn2.4s v0, {{v[0-9]+}}, #1
-  %out = load <2 x i32>* %ret
-  %tmp1 = load <2 x i64>* %A
-  %tmp3 = call <2 x i32> @llvm.arm64.neon.uqshrn.v2i32(<2 x i64> %tmp1, i32 1)
-  %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  ret <4 x i32> %tmp4
-}
-
-declare i32  @llvm.arm64.neon.uqshrn.i32(i64, i32) nounwind readnone
-declare <8 x i8>  @llvm.arm64.neon.uqshrn.v8i8(<8 x i16>, i32) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.uqshrn.v4i16(<4 x i32>, i32) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.uqshrn.v2i32(<2 x i64>, i32) nounwind readnone
-
-define <8 x i16> @ushll8h(<8 x i8>* %A) nounwind {
-;CHECK-LABEL: ushll8h:
-;CHECK: ushll.8h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i8>* %A
-        %tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
-        %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
-        ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @ushll4s(<4 x i16>* %A) nounwind {
-;CHECK-LABEL: ushll4s:
-;CHECK: ushll.4s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i16>* %A
-        %tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
-        %tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1>
-        ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @ushll2d(<2 x i32>* %A) nounwind {
-;CHECK-LABEL: ushll2d:
-;CHECK: ushll.2d v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i32>* %A
-        %tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
-        %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
-        ret <2 x i64> %tmp3
-}
-
-define <8 x i16> @ushll2_8h(<16 x i8>* %A) nounwind {
-;CHECK-LABEL: ushll2_8h:
-;CHECK: ushll2.8h v0, {{v[0-9]+}}, #1
-        %load1 = load <16 x i8>* %A
-        %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-        %tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
-        %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
-        ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @ushll2_4s(<8 x i16>* %A) nounwind {
-;CHECK-LABEL: ushll2_4s:
-;CHECK: ushll2.4s v0, {{v[0-9]+}}, #1
-        %load1 = load <8 x i16>* %A
-        %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-        %tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
-        %tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1>
-        ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @ushll2_2d(<4 x i32>* %A) nounwind {
-;CHECK-LABEL: ushll2_2d:
-;CHECK: ushll2.2d v0, {{v[0-9]+}}, #1
-        %load1 = load <4 x i32>* %A
-        %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-        %tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
-        %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
-        ret <2 x i64> %tmp3
-}
-
-define <8 x i16> @sshll8h(<8 x i8>* %A) nounwind {
-;CHECK-LABEL: sshll8h:
-;CHECK: sshll.8h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i8>* %A
-        %tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
-        %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
-        ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @sshll4s(<4 x i16>* %A) nounwind {
-;CHECK-LABEL: sshll4s:
-;CHECK: sshll.4s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i16>* %A
-        %tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
-        %tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1>
-        ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @sshll2d(<2 x i32>* %A) nounwind {
-;CHECK-LABEL: sshll2d:
-;CHECK: sshll.2d v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i32>* %A
-        %tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
-        %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
-        ret <2 x i64> %tmp3
-}
-
-define <8 x i16> @sshll2_8h(<16 x i8>* %A) nounwind {
-;CHECK-LABEL: sshll2_8h:
-;CHECK: sshll2.8h v0, {{v[0-9]+}}, #1
-        %load1 = load <16 x i8>* %A
-        %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-        %tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
-        %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
-        ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @sshll2_4s(<8 x i16>* %A) nounwind {
-;CHECK-LABEL: sshll2_4s:
-;CHECK: sshll2.4s v0, {{v[0-9]+}}, #1
-        %load1 = load <8 x i16>* %A
-        %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-        %tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
-        %tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1>
-        ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @sshll2_2d(<4 x i32>* %A) nounwind {
-;CHECK-LABEL: sshll2_2d:
-;CHECK: sshll2.2d v0, {{v[0-9]+}}, #1
-        %load1 = load <4 x i32>* %A
-        %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-        %tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
-        %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
-        ret <2 x i64> %tmp3
-}
-
-define <8 x i8> @sqshli8b(<8 x i8>* %A) nounwind {
-;CHECK-LABEL: sqshli8b:
-;CHECK: sqshl.8b v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i8>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.sqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
-        ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @sqshli4h(<4 x i16>* %A) nounwind {
-;CHECK-LABEL: sqshli4h:
-;CHECK: sqshl.4h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i16>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.sqshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
-        ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @sqshli2s(<2 x i32>* %A) nounwind {
-;CHECK-LABEL: sqshli2s:
-;CHECK: sqshl.2s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i32>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.sqshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
-        ret <2 x i32> %tmp3
-}
-
-define <16 x i8> @sqshli16b(<16 x i8>* %A) nounwind {
-;CHECK-LABEL: sqshli16b:
-;CHECK: sqshl.16b v0, {{v[0-9]+}}, #1
-        %tmp1 = load <16 x i8>* %A
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.sqshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
-        ret <16 x i8> %tmp3
-}
-
-define <8 x i16> @sqshli8h(<8 x i16>* %A) nounwind {
-;CHECK-LABEL: sqshli8h:
-;CHECK: sqshl.8h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.sqshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
-        ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @sqshli4s(<4 x i32>* %A) nounwind {
-;CHECK-LABEL: sqshli4s:
-;CHECK: sqshl.4s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.sqshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
-        ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @sqshli2d(<2 x i64>* %A) nounwind {
-;CHECK-LABEL: sqshli2d:
-;CHECK: sqshl.2d v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.sqshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
-        ret <2 x i64> %tmp3
-}
-
-define <8 x i8> @uqshli8b(<8 x i8>* %A) nounwind {
-;CHECK-LABEL: uqshli8b:
-;CHECK: uqshl.8b v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i8>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
-        ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @uqshli4h(<4 x i16>* %A) nounwind {
-;CHECK-LABEL: uqshli4h:
-;CHECK: uqshl.4h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i16>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.uqshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
-        ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @uqshli2s(<2 x i32>* %A) nounwind {
-;CHECK-LABEL: uqshli2s:
-;CHECK: uqshl.2s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i32>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.uqshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
-        ret <2 x i32> %tmp3
-}
-
-define <16 x i8> @uqshli16b(<16 x i8>* %A) nounwind {
-;CHECK-LABEL: uqshli16b:
-;CHECK: uqshl.16b
-        %tmp1 = load <16 x i8>* %A
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.uqshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
-        ret <16 x i8> %tmp3
-}
-
-define <8 x i16> @uqshli8h(<8 x i16>* %A) nounwind {
-;CHECK-LABEL: uqshli8h:
-;CHECK: uqshl.8h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.uqshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
-        ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @uqshli4s(<4 x i32>* %A) nounwind {
-;CHECK-LABEL: uqshli4s:
-;CHECK: uqshl.4s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.uqshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
-        ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @uqshli2d(<2 x i64>* %A) nounwind {
-;CHECK-LABEL: uqshli2d:
-;CHECK: uqshl.2d v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.uqshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
-        ret <2 x i64> %tmp3
-}
-
-define <8 x i8> @ursra8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: ursra8b:
-;CHECK: ursra.8b v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i8>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
-        %tmp4 = load <8 x i8>* %B
-        %tmp5 = add <8 x i8> %tmp3, %tmp4
-        ret <8 x i8> %tmp5
-}
-
-define <4 x i16> @ursra4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: ursra4h:
-;CHECK: ursra.4h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i16>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
-        %tmp4 = load <4 x i16>* %B
-        %tmp5 = add <4 x i16> %tmp3, %tmp4
-        ret <4 x i16> %tmp5
-}
-
-define <2 x i32> @ursra2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: ursra2s:
-;CHECK: ursra.2s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i32>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
-        %tmp4 = load <2 x i32>* %B
-        %tmp5 = add <2 x i32> %tmp3, %tmp4
-        ret <2 x i32> %tmp5
-}
-
-define <16 x i8> @ursra16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: ursra16b:
-;CHECK: ursra.16b v0, {{v[0-9]+}}, #1
-        %tmp1 = load <16 x i8>* %A
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
-        %tmp4 = load <16 x i8>* %B
-        %tmp5 = add <16 x i8> %tmp3, %tmp4
-         ret <16 x i8> %tmp5
-}
-
-define <8 x i16> @ursra8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: ursra8h:
-;CHECK: ursra.8h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
-        %tmp4 = load <8 x i16>* %B
-        %tmp5 = add <8 x i16> %tmp3, %tmp4
-         ret <8 x i16> %tmp5
-}
-
-define <4 x i32> @ursra4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: ursra4s:
-;CHECK: ursra.4s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
-        %tmp4 = load <4 x i32>* %B
-        %tmp5 = add <4 x i32> %tmp3, %tmp4
-         ret <4 x i32> %tmp5
-}
-
-define <2 x i64> @ursra2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK-LABEL: ursra2d:
-;CHECK: ursra.2d v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
-        %tmp4 = load <2 x i64>* %B
-        %tmp5 = add <2 x i64> %tmp3, %tmp4
-         ret <2 x i64> %tmp5
-}
-
-define <8 x i8> @srsra8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: srsra8b:
-;CHECK: srsra.8b v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i8>* %A
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
-        %tmp4 = load <8 x i8>* %B
-        %tmp5 = add <8 x i8> %tmp3, %tmp4
-        ret <8 x i8> %tmp5
-}
-
-define <4 x i16> @srsra4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: srsra4h:
-;CHECK: srsra.4h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i16>* %A
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
-        %tmp4 = load <4 x i16>* %B
-        %tmp5 = add <4 x i16> %tmp3, %tmp4
-        ret <4 x i16> %tmp5
-}
-
-define <2 x i32> @srsra2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: srsra2s:
-;CHECK: srsra.2s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i32>* %A
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
-        %tmp4 = load <2 x i32>* %B
-        %tmp5 = add <2 x i32> %tmp3, %tmp4
-        ret <2 x i32> %tmp5
-}
-
-define <16 x i8> @srsra16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: srsra16b:
-;CHECK: srsra.16b v0, {{v[0-9]+}}, #1
-        %tmp1 = load <16 x i8>* %A
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
-        %tmp4 = load <16 x i8>* %B
-        %tmp5 = add <16 x i8> %tmp3, %tmp4
-         ret <16 x i8> %tmp5
-}
-
-define <8 x i16> @srsra8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: srsra8h:
-;CHECK: srsra.8h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i16>* %A
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
-        %tmp4 = load <8 x i16>* %B
-        %tmp5 = add <8 x i16> %tmp3, %tmp4
-         ret <8 x i16> %tmp5
-}
-
-define <4 x i32> @srsra4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: srsra4s:
-;CHECK: srsra.4s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i32>* %A
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
-        %tmp4 = load <4 x i32>* %B
-        %tmp5 = add <4 x i32> %tmp3, %tmp4
-         ret <4 x i32> %tmp5
-}
-
-define <2 x i64> @srsra2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK-LABEL: srsra2d:
-;CHECK: srsra.2d v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i64>* %A
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
-        %tmp4 = load <2 x i64>* %B
-        %tmp5 = add <2 x i64> %tmp3, %tmp4
-         ret <2 x i64> %tmp5
-}
-
-define <8 x i8> @usra8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: usra8b:
-;CHECK: usra.8b v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i8>* %A
-        %tmp3 = lshr <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
-        %tmp4 = load <8 x i8>* %B
-        %tmp5 = add <8 x i8> %tmp3, %tmp4
-        ret <8 x i8> %tmp5
-}
-
-define <4 x i16> @usra4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: usra4h:
-;CHECK: usra.4h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i16>* %A
-        %tmp3 = lshr <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
-        %tmp4 = load <4 x i16>* %B
-        %tmp5 = add <4 x i16> %tmp3, %tmp4
-        ret <4 x i16> %tmp5
-}
-
-define <2 x i32> @usra2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: usra2s:
-;CHECK: usra.2s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i32>* %A
-        %tmp3 = lshr <2 x i32> %tmp1, <i32 1, i32 1>
-        %tmp4 = load <2 x i32>* %B
-        %tmp5 = add <2 x i32> %tmp3, %tmp4
-        ret <2 x i32> %tmp5
-}
-
-define <16 x i8> @usra16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: usra16b:
-;CHECK: usra.16b v0, {{v[0-9]+}}, #1
-        %tmp1 = load <16 x i8>* %A
-        %tmp3 = lshr <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
-        %tmp4 = load <16 x i8>* %B
-        %tmp5 = add <16 x i8> %tmp3, %tmp4
-         ret <16 x i8> %tmp5
-}
-
-define <8 x i16> @usra8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: usra8h:
-;CHECK: usra.8h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i16>* %A
-        %tmp3 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
-        %tmp4 = load <8 x i16>* %B
-        %tmp5 = add <8 x i16> %tmp3, %tmp4
-         ret <8 x i16> %tmp5
-}
-
-define <4 x i32> @usra4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: usra4s:
-;CHECK: usra.4s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i32>* %A
-        %tmp3 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
-        %tmp4 = load <4 x i32>* %B
-        %tmp5 = add <4 x i32> %tmp3, %tmp4
-         ret <4 x i32> %tmp5
-}
-
-define <2 x i64> @usra2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK-LABEL: usra2d:
-;CHECK: usra.2d v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i64>* %A
-        %tmp3 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
-        %tmp4 = load <2 x i64>* %B
-        %tmp5 = add <2 x i64> %tmp3, %tmp4
-         ret <2 x i64> %tmp5
-}
-
-define <8 x i8> @ssra8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: ssra8b:
-;CHECK: ssra.8b v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i8>* %A
-        %tmp3 = ashr <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
-        %tmp4 = load <8 x i8>* %B
-        %tmp5 = add <8 x i8> %tmp3, %tmp4
-        ret <8 x i8> %tmp5
-}
-
-define <4 x i16> @ssra4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: ssra4h:
-;CHECK: ssra.4h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i16>* %A
-        %tmp3 = ashr <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
-        %tmp4 = load <4 x i16>* %B
-        %tmp5 = add <4 x i16> %tmp3, %tmp4
-        ret <4 x i16> %tmp5
-}
-
-define <2 x i32> @ssra2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: ssra2s:
-;CHECK: ssra.2s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i32>* %A
-        %tmp3 = ashr <2 x i32> %tmp1, <i32 1, i32 1>
-        %tmp4 = load <2 x i32>* %B
-        %tmp5 = add <2 x i32> %tmp3, %tmp4
-        ret <2 x i32> %tmp5
-}
-
-define <16 x i8> @ssra16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: ssra16b:
-;CHECK: ssra.16b v0, {{v[0-9]+}}, #1
-        %tmp1 = load <16 x i8>* %A
-        %tmp3 = ashr <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
-        %tmp4 = load <16 x i8>* %B
-        %tmp5 = add <16 x i8> %tmp3, %tmp4
-         ret <16 x i8> %tmp5
-}
-
-define <8 x i16> @ssra8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: ssra8h:
-;CHECK: ssra.8h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i16>* %A
-        %tmp3 = ashr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
-        %tmp4 = load <8 x i16>* %B
-        %tmp5 = add <8 x i16> %tmp3, %tmp4
-         ret <8 x i16> %tmp5
-}
-
-define <4 x i32> @ssra4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: ssra4s:
-;CHECK: ssra.4s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i32>* %A
-        %tmp3 = ashr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
-        %tmp4 = load <4 x i32>* %B
-        %tmp5 = add <4 x i32> %tmp3, %tmp4
-         ret <4 x i32> %tmp5
-}
-
-define <2 x i64> @ssra2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK-LABEL: ssra2d:
-;CHECK: ssra.2d v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i64>* %A
-        %tmp3 = ashr <2 x i64> %tmp1, <i64 1, i64 1>
-        %tmp4 = load <2 x i64>* %B
-        %tmp5 = add <2 x i64> %tmp3, %tmp4
-         ret <2 x i64> %tmp5
-}
-
-define <8 x i8> @shr_orr8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: shr_orr8b:
-;CHECK: shr.8b v0, {{v[0-9]+}}, #1
-;CHECK-NEXT: orr.8b
-;CHECK-NEXT: ret
-        %tmp1 = load <8 x i8>* %A
-        %tmp4 = load <8 x i8>* %B
-        %tmp3 = lshr <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
-        %tmp5 = or <8 x i8> %tmp3, %tmp4
-        ret <8 x i8> %tmp5
-}
-
-define <4 x i16> @shr_orr4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: shr_orr4h:
-;CHECK: shr.4h v0, {{v[0-9]+}}, #1
-;CHECK-NEXT: orr.8b
-;CHECK-NEXT: ret
-        %tmp1 = load <4 x i16>* %A
-        %tmp4 = load <4 x i16>* %B
-        %tmp3 = lshr <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
-        %tmp5 = or <4 x i16> %tmp3, %tmp4
-        ret <4 x i16> %tmp5
-}
-
-define <2 x i32> @shr_orr2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: shr_orr2s:
-;CHECK: shr.2s v0, {{v[0-9]+}}, #1
-;CHECK-NEXT: orr.8b
-;CHECK-NEXT: ret
-        %tmp1 = load <2 x i32>* %A
-        %tmp4 = load <2 x i32>* %B
-        %tmp3 = lshr <2 x i32> %tmp1, <i32 1, i32 1>
-        %tmp5 = or <2 x i32> %tmp3, %tmp4
-        ret <2 x i32> %tmp5
-}
-
-define <16 x i8> @shr_orr16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: shr_orr16b:
-;CHECK: shr.16b v0, {{v[0-9]+}}, #1
-;CHECK-NEXT: orr.16b
-;CHECK-NEXT: ret
-        %tmp1 = load <16 x i8>* %A
-        %tmp4 = load <16 x i8>* %B
-        %tmp3 = lshr <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
-        %tmp5 = or <16 x i8> %tmp3, %tmp4
-         ret <16 x i8> %tmp5
-}
-
-define <8 x i16> @shr_orr8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: shr_orr8h:
-;CHECK: shr.8h v0, {{v[0-9]+}}, #1
-;CHECK-NEXT: orr.16b
-;CHECK-NEXT: ret
-        %tmp1 = load <8 x i16>* %A
-        %tmp4 = load <8 x i16>* %B
-        %tmp3 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
-        %tmp5 = or <8 x i16> %tmp3, %tmp4
-         ret <8 x i16> %tmp5
-}
-
-define <4 x i32> @shr_orr4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: shr_orr4s:
-;CHECK: shr.4s v0, {{v[0-9]+}}, #1
-;CHECK-NEXT: orr.16b
-;CHECK-NEXT: ret
-        %tmp1 = load <4 x i32>* %A
-        %tmp4 = load <4 x i32>* %B
-        %tmp3 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
-        %tmp5 = or <4 x i32> %tmp3, %tmp4
-         ret <4 x i32> %tmp5
-}
-
-define <2 x i64> @shr_orr2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK-LABEL: shr_orr2d:
-;CHECK: shr.2d v0, {{v[0-9]+}}, #1
-;CHECK-NEXT: orr.16b
-;CHECK-NEXT: ret
-        %tmp1 = load <2 x i64>* %A
-        %tmp4 = load <2 x i64>* %B
-        %tmp3 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
-        %tmp5 = or <2 x i64> %tmp3, %tmp4
-         ret <2 x i64> %tmp5
-}
-
-define <8 x i8> @shl_orr8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: shl_orr8b:
-;CHECK: shl.8b v0, {{v[0-9]+}}, #1
-;CHECK-NEXT: orr.8b
-;CHECK-NEXT: ret
-        %tmp1 = load <8 x i8>* %A
-        %tmp4 = load <8 x i8>* %B
-        %tmp3 = shl <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
-        %tmp5 = or <8 x i8> %tmp3, %tmp4
-        ret <8 x i8> %tmp5
-}
-
-define <4 x i16> @shl_orr4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: shl_orr4h:
-;CHECK: shl.4h v0, {{v[0-9]+}}, #1
-;CHECK-NEXT: orr.8b
-;CHECK-NEXT: ret
-        %tmp1 = load <4 x i16>* %A
-        %tmp4 = load <4 x i16>* %B
-        %tmp3 = shl <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
-        %tmp5 = or <4 x i16> %tmp3, %tmp4
-        ret <4 x i16> %tmp5
-}
-
-define <2 x i32> @shl_orr2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: shl_orr2s:
-;CHECK: shl.2s v0, {{v[0-9]+}}, #1
-;CHECK-NEXT: orr.8b
-;CHECK-NEXT: ret
-        %tmp1 = load <2 x i32>* %A
-        %tmp4 = load <2 x i32>* %B
-        %tmp3 = shl <2 x i32> %tmp1, <i32 1, i32 1>
-        %tmp5 = or <2 x i32> %tmp3, %tmp4
-        ret <2 x i32> %tmp5
-}
-
-define <16 x i8> @shl_orr16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: shl_orr16b:
-;CHECK: shl.16b v0, {{v[0-9]+}}, #1
-;CHECK-NEXT: orr.16b
-;CHECK-NEXT: ret
-        %tmp1 = load <16 x i8>* %A
-        %tmp4 = load <16 x i8>* %B
-        %tmp3 = shl <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
-        %tmp5 = or <16 x i8> %tmp3, %tmp4
-         ret <16 x i8> %tmp5
-}
-
-define <8 x i16> @shl_orr8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: shl_orr8h:
-;CHECK: shl.8h v0, {{v[0-9]+}}, #1
-;CHECK-NEXT: orr.16b
-;CHECK-NEXT: ret
-        %tmp1 = load <8 x i16>* %A
-        %tmp4 = load <8 x i16>* %B
-        %tmp3 = shl <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
-        %tmp5 = or <8 x i16> %tmp3, %tmp4
-         ret <8 x i16> %tmp5
-}
-
-define <4 x i32> @shl_orr4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: shl_orr4s:
-;CHECK: shl.4s v0, {{v[0-9]+}}, #1
-;CHECK-NEXT: orr.16b
-;CHECK-NEXT: ret
-        %tmp1 = load <4 x i32>* %A
-        %tmp4 = load <4 x i32>* %B
-        %tmp3 = shl <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
-        %tmp5 = or <4 x i32> %tmp3, %tmp4
-         ret <4 x i32> %tmp5
-}
-
-define <2 x i64> @shl_orr2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK-LABEL: shl_orr2d:
-;CHECK: shl.2d v0, {{v[0-9]+}}, #1
-;CHECK-NEXT: orr.16b
-;CHECK-NEXT: ret
-        %tmp1 = load <2 x i64>* %A
-        %tmp4 = load <2 x i64>* %B
-        %tmp3 = shl <2 x i64> %tmp1, <i64 1, i64 1>
-        %tmp5 = or <2 x i64> %tmp3, %tmp4
-         ret <2 x i64> %tmp5
-}
-
-define <8 x i16> @shll(<8 x i8> %in) {
-; CHECK-LABEL: shll:
-; CHECK: shll.8h v0, {{v[0-9]+}}, #8
-  %ext = zext <8 x i8> %in to <8 x i16>
-  %res = shl <8 x i16> %ext, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
-  ret <8 x i16> %res
-}
-
-define <4 x i32> @shll_high(<8 x i16> %in) {
-; CHECK-LABEL: shll_high
-; CHECK: shll2.4s v0, {{v[0-9]+}}, #16
-  %extract = shufflevector <8 x i16> %in, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %ext = zext <4 x i16> %extract to <4 x i32>
-  %res = shl <4 x i32> %ext, <i32 16, i32 16, i32 16, i32 16>
-  ret <4 x i32> %res
-}
-
-define <8 x i8> @sli8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: sli8b:
-;CHECK: sli.8b v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i8>* %A
-        %tmp2 = load <8 x i8>* %B
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.vsli.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, i32 1)
-        ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @sli4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: sli4h:
-;CHECK: sli.4h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i16>* %A
-        %tmp2 = load <4 x i16>* %B
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.vsli.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, i32 1)
-        ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @sli2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: sli2s:
-;CHECK: sli.2s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i32>* %A
-        %tmp2 = load <2 x i32>* %B
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.vsli.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, i32 1)
-        ret <2 x i32> %tmp3
-}
-
-define <1 x i64> @sli1d(<1 x i64>* %A, <1 x i64>* %B) nounwind {
-;CHECK-LABEL: sli1d:
-;CHECK: sli d0, {{d[0-9]+}}, #1
-        %tmp1 = load <1 x i64>* %A
-        %tmp2 = load <1 x i64>* %B
-        %tmp3 = call <1 x i64> @llvm.arm64.neon.vsli.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2, i32 1)
-        ret <1 x i64> %tmp3
-}
-
-define <16 x i8> @sli16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: sli16b:
-;CHECK: sli.16b v0, {{v[0-9]+}}, #1
-        %tmp1 = load <16 x i8>* %A
-        %tmp2 = load <16 x i8>* %B
-        %tmp3 = call <16 x i8> @llvm.arm64.neon.vsli.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, i32 1)
-        ret <16 x i8> %tmp3
-}
-
-define <8 x i16> @sli8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: sli8h:
-;CHECK: sli.8h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i16>* %B
-        %tmp3 = call <8 x i16> @llvm.arm64.neon.vsli.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, i32 1)
-        ret <8 x i16> %tmp3
-}
-
-define <4 x i32> @sli4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: sli4s:
-;CHECK: sli.4s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i32>* %B
-        %tmp3 = call <4 x i32> @llvm.arm64.neon.vsli.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, i32 1)
-        ret <4 x i32> %tmp3
-}
-
-define <2 x i64> @sli2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK-LABEL: sli2d:
-;CHECK: sli.2d v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i64>* %A
-        %tmp2 = load <2 x i64>* %B
-        %tmp3 = call <2 x i64> @llvm.arm64.neon.vsli.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2, i32 1)
-        ret <2 x i64> %tmp3
-}
-
-declare <8 x i8>  @llvm.arm64.neon.vsli.v8i8(<8 x i8>, <8 x i8>, i32) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.vsli.v4i16(<4 x i16>, <4 x i16>, i32) nounwind readnone
-declare <2 x i32> @llvm.arm64.neon.vsli.v2i32(<2 x i32>, <2 x i32>, i32) nounwind readnone
-declare <1 x i64> @llvm.arm64.neon.vsli.v1i64(<1 x i64>, <1 x i64>, i32) nounwind readnone
-
-declare <16 x i8> @llvm.arm64.neon.vsli.v16i8(<16 x i8>, <16 x i8>, i32) nounwind readnone
-declare <8 x i16> @llvm.arm64.neon.vsli.v8i16(<8 x i16>, <8 x i16>, i32) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.vsli.v4i32(<4 x i32>, <4 x i32>, i32) nounwind readnone
-declare <2 x i64> @llvm.arm64.neon.vsli.v2i64(<2 x i64>, <2 x i64>, i32) nounwind readnone
diff --git a/test/CodeGen/ARM64/vshr.ll b/test/CodeGen/ARM64/vshr.ll
deleted file mode 100644
index 6adb81c..0000000
--- a/test/CodeGen/ARM64/vshr.ll
+++ /dev/null
@@ -1,63 +0,0 @@
-; RUN: llc -march=arm64 -arm64-neon-syntax=apple < %s | FileCheck %s
-
-define <8 x i16> @testShiftRightArith_v8i16(<8 x i16> %a, <8 x i16> %b) #0 {
-; CHECK-LABEL: testShiftRightArith_v8i16:
-; CHECK: neg.8h	[[REG1:v[0-9]+]], [[REG1]]
-; CHECK-NEXT: sshl.8h [[REG2:v[0-9]+]], [[REG2]], [[REG1]]
-
-entry:
-  %a.addr = alloca <8 x i16>, align 16
-  %b.addr = alloca <8 x i16>, align 16
-  store <8 x i16> %a, <8 x i16>* %a.addr, align 16
-  store <8 x i16> %b, <8 x i16>* %b.addr, align 16
-  %0 = load <8 x i16>* %a.addr, align 16
-  %1 = load <8 x i16>* %b.addr, align 16
-  %shr = ashr <8 x i16> %0, %1
-  ret <8 x i16> %shr
-}
-
-define <4 x i32> @testShiftRightArith_v4i32(<4 x i32> %a, <4 x i32> %b) #0 {
-; CHECK-LABEL: testShiftRightArith_v4i32:
-; CHECK: neg.4s	[[REG3:v[0-9]+]], [[REG3]]
-; CHECK-NEXT: sshl.4s [[REG4:v[0-9]+]], [[REG4]], [[REG3]]
-entry:
-  %a.addr = alloca <4 x i32>, align 32
-  %b.addr = alloca <4 x i32>, align 32
-  store <4 x i32> %a, <4 x i32>* %a.addr, align 32
-  store <4 x i32> %b, <4 x i32>* %b.addr, align 32
-  %0 = load <4 x i32>* %a.addr, align 32
-  %1 = load <4 x i32>* %b.addr, align 32
-  %shr = ashr <4 x i32> %0, %1
-  ret <4 x i32> %shr
-}
-
-define <8 x i16> @testShiftRightLogical(<8 x i16> %a, <8 x i16> %b) #0 {
-; CHECK: testShiftRightLogical
-; CHECK: neg.8h	[[REG5:v[0-9]+]], [[REG5]]
-; CHECK-NEXT: ushl.8h [[REG6:v[0-9]+]], [[REG6]], [[REG5]]
-entry:
-  %a.addr = alloca <8 x i16>, align 16
-  %b.addr = alloca <8 x i16>, align 16
-  store <8 x i16> %a, <8 x i16>* %a.addr, align 16
-  store <8 x i16> %b, <8 x i16>* %b.addr, align 16
-  %0 = load <8 x i16>* %a.addr, align 16
-  %1 = load <8 x i16>* %b.addr, align 16
-  %shr = lshr <8 x i16> %0, %1
-  ret <8 x i16> %shr
-}
-
-define <1 x i64> @sshr_v1i64(<1 x i64> %A) nounwind {
-; CHECK-LABEL: sshr_v1i64:
-; CHECK: sshr d0, d0, #63
-  %tmp3 = ashr <1 x i64> %A, < i64 63 >
-  ret <1 x i64> %tmp3
-}
-
-define <1 x i64> @ushr_v1i64(<1 x i64> %A) nounwind {
-; CHECK-LABEL: ushr_v1i64:
-; CHECK: ushr d0, d0, #63
-  %tmp3 = lshr <1 x i64> %A, < i64 63 >
-  ret <1 x i64> %tmp3
-}
-
-attributes #0 = { nounwind }
diff --git a/test/CodeGen/ARM64/vshuffle.ll b/test/CodeGen/ARM64/vshuffle.ll
deleted file mode 100644
index f90200c..0000000
--- a/test/CodeGen/ARM64/vshuffle.ll
+++ /dev/null
@@ -1,115 +0,0 @@
-; RUN: llc < %s -mtriple=arm64-apple-ios7.0 | FileCheck %s
-
-
-; The mask:
-; CHECK: lCPI0_0:
-; CHECK:  .byte   2                       ; 0x2
-; CHECK:  .byte   255                     ; 0xff
-; CHECK:  .byte   6                       ; 0x6
-; CHECK:  .byte   255                     ; 0xff
-; The second vector is legalized to undef and the elements of the first vector
-; are used instead.
-; CHECK:  .byte   2                       ; 0x2
-; CHECK:  .byte   4                       ; 0x4
-; CHECK:  .byte   6                       ; 0x6
-; CHECK:  .byte   0                       ; 0x0
-; CHECK: test1
-; CHECK: ldr d[[REG0:[0-9]+]], [{{.*}}, lCPI0_0
-; CHECK: movi.8h v[[REG1:[0-9]+]], #1, lsl #8
-; CHECK: tbl.8b  v{{[0-9]+}}, { v[[REG1]] }, v[[REG0]]
-define <8 x i1> @test1() {
-entry:
-  %Shuff = shufflevector <8 x i1> <i1 0, i1 1, i1 2, i1 3, i1 4, i1 5, i1 6,
-                                   i1 7>,
-                         <8 x i1> <i1 0, i1 1, i1 2, i1 3, i1 4, i1 5, i1 6,
-                                   i1 7>,
-                         <8 x i32> <i32 2, i32 undef, i32 6, i32 undef, i32 10,
-                                    i32 12, i32 14, i32 0>
-  ret <8 x i1> %Shuff
-}
-
-; CHECK: lCPI1_0:
-; CHECK:          .byte   2                       ; 0x2
-; CHECK:          .byte   255                     ; 0xff
-; CHECK:          .byte   6                       ; 0x6
-; CHECK:          .byte   255                     ; 0xff
-; CHECK:          .byte   10                      ; 0xa
-; CHECK:          .byte   12                      ; 0xc
-; CHECK:          .byte   14                      ; 0xe
-; CHECK:          .byte   0                       ; 0x0
-; CHECK: test2
-; CHECK: ldr     d[[REG0:[0-9]+]], [{{.*}}, lCPI1_0@PAGEOFF]
-; CHECK: adrp    x[[REG2:[0-9]+]], lCPI1_1@PAGE
-; CHECK: ldr     q[[REG1:[0-9]+]], [x[[REG2]], lCPI1_1@PAGEOFF]
-; CHECK: tbl.8b  v{{[0-9]+}}, { v[[REG1]] }, v[[REG0]]
-define <8 x i1>@test2() {
-bb:
-  %Shuff = shufflevector <8 x i1> zeroinitializer,
-     <8 x i1> <i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0>,
-     <8 x i32> <i32 2, i32 undef, i32 6, i32 undef, i32 10, i32 12, i32 14,
-                i32 0>
-  ret <8 x i1> %Shuff
-}
-
-; CHECK: lCPI2_0:
-; CHECK:         .byte   2                       ; 0x2
-; CHECK:         .byte   255                     ; 0xff
-; CHECK:         .byte   6                       ; 0x6
-; CHECK:         .byte   255                     ; 0xff
-; CHECK:         .byte   10                      ; 0xa
-; CHECK:         .byte   12                      ; 0xc
-; CHECK:         .byte   14                      ; 0xe
-; CHECK:         .byte   0                       ; 0x0
-; CHECK:         .byte   2                       ; 0x2
-; CHECK:         .byte   255                     ; 0xff
-; CHECK:         .byte   6                       ; 0x6
-; CHECK:         .byte   255                     ; 0xff
-; CHECK:         .byte   10                      ; 0xa
-; CHECK:         .byte   12                      ; 0xc
-; CHECK:         .byte   14                      ; 0xe
-; CHECK:         .byte   0                       ; 0x0
-; CHECK: test3
-; CHECK: adrp    x[[REG3:[0-9]+]], lCPI2_0@PAGE
-; CHECK: ldr     q[[REG0:[0-9]+]], [x[[REG3]], lCPI2_0@PAGEOFF]
-; CHECK: movi.2d v[[REG1:[0-9]+]], #0000000000000000
-; CHECK: tbl.16b v{{[0-9]+}}, { v[[REG1]] }, v[[REG0]]
-define <16 x i1> @test3(i1* %ptr, i32 %v) {
-bb:
-  %Shuff = shufflevector <16 x i1> zeroinitializer, <16 x i1> undef,
-     <16 x i32> <i32 2, i32 undef, i32 6, i32 undef, i32 10, i32 12, i32 14,
-                 i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 10, i32 12,
-                 i32 14, i32 0>
-  ret <16 x i1> %Shuff
-}
-; CHECK: lCPI3_1:
-; CHECK:         .byte   2                       ; 0x2
-; CHECK:         .byte   1                       ; 0x1
-; CHECK:         .byte   6                       ; 0x6
-; CHECK:         .byte   18                      ; 0x12
-; CHECK:         .byte   10                      ; 0xa
-; CHECK:         .byte   12                      ; 0xc
-; CHECK:         .byte   14                      ; 0xe
-; CHECK:         .byte   0                       ; 0x0
-; CHECK:         .byte   2                       ; 0x2
-; CHECK:         .byte   31                      ; 0x1f
-; CHECK:         .byte   6                       ; 0x6
-; CHECK:         .byte   30                      ; 0x1e
-; CHECK:         .byte   10                      ; 0xa
-; CHECK:         .byte   12                      ; 0xc
-; CHECK:         .byte   14                      ; 0xe
-; CHECK:         .byte   0                       ; 0x0
-; CHECK: _test4:
-; CHECK:         ldr     q[[REG1:[0-9]+]]
-; CHECK:         movi.2d v[[REG0:[0-9]+]], #0000000000000000
-; CHECK:         adrp    x[[REG3:[0-9]+]], lCPI3_1@PAGE
-; CHECK:         ldr     q[[REG2:[0-9]+]], [x[[REG3]], lCPI3_1@PAGEOFF]
-; CHECK:         tbl.16b v{{[0-9]+}}, { v[[REG0]], v[[REG1]] }, v[[REG2]]
-define <16 x i1> @test4(i1* %ptr, i32 %v) {
-bb:
-  %Shuff = shufflevector <16 x i1> zeroinitializer,
-     <16 x i1> <i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1,
-                i1 1, i1 0, i1 0, i1 1, i1 0, i1 0>,
-     <16 x i32> <i32 2, i32 1, i32 6, i32 18, i32 10, i32 12, i32 14, i32 0,
-                 i32 2, i32 31, i32 6, i32 30, i32 10, i32 12, i32 14, i32 0>
-  ret <16 x i1> %Shuff
-}
diff --git a/test/CodeGen/ARM64/vsqrt.ll b/test/CodeGen/ARM64/vsqrt.ll
deleted file mode 100644
index 094d704..0000000
--- a/test/CodeGen/ARM64/vsqrt.ll
+++ /dev/null
@@ -1,232 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-define <2 x float> @frecps_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK-LABEL: frecps_2s:
-;CHECK: frecps.2s
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
-	%tmp3 = call <2 x float> @llvm.arm64.neon.frecps.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
-	ret <2 x float> %tmp3
-}
-
-define <4 x float> @frecps_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
-;CHECK-LABEL: frecps_4s:
-;CHECK: frecps.4s
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
-	%tmp3 = call <4 x float> @llvm.arm64.neon.frecps.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
-	ret <4 x float> %tmp3
-}
-
-define <2 x double> @frecps_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
-;CHECK-LABEL: frecps_2d:
-;CHECK: frecps.2d
-	%tmp1 = load <2 x double>* %A
-	%tmp2 = load <2 x double>* %B
-	%tmp3 = call <2 x double> @llvm.arm64.neon.frecps.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
-	ret <2 x double> %tmp3
-}
-
-declare <2 x float> @llvm.arm64.neon.frecps.v2f32(<2 x float>, <2 x float>) nounwind readnone
-declare <4 x float> @llvm.arm64.neon.frecps.v4f32(<4 x float>, <4 x float>) nounwind readnone
-declare <2 x double> @llvm.arm64.neon.frecps.v2f64(<2 x double>, <2 x double>) nounwind readnone
-
-
-define <2 x float> @frsqrts_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
-;CHECK-LABEL: frsqrts_2s:
-;CHECK: frsqrts.2s
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
-	%tmp3 = call <2 x float> @llvm.arm64.neon.frsqrts.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
-	ret <2 x float> %tmp3
-}
-
-define <4 x float> @frsqrts_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
-;CHECK-LABEL: frsqrts_4s:
-;CHECK: frsqrts.4s
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
-	%tmp3 = call <4 x float> @llvm.arm64.neon.frsqrts.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
-	ret <4 x float> %tmp3
-}
-
-define <2 x double> @frsqrts_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
-;CHECK-LABEL: frsqrts_2d:
-;CHECK: frsqrts.2d
-	%tmp1 = load <2 x double>* %A
-	%tmp2 = load <2 x double>* %B
-	%tmp3 = call <2 x double> @llvm.arm64.neon.frsqrts.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
-	ret <2 x double> %tmp3
-}
-
-declare <2 x float> @llvm.arm64.neon.frsqrts.v2f32(<2 x float>, <2 x float>) nounwind readnone
-declare <4 x float> @llvm.arm64.neon.frsqrts.v4f32(<4 x float>, <4 x float>) nounwind readnone
-declare <2 x double> @llvm.arm64.neon.frsqrts.v2f64(<2 x double>, <2 x double>) nounwind readnone
-
-define <2 x float> @frecpe_2s(<2 x float>* %A) nounwind {
-;CHECK-LABEL: frecpe_2s:
-;CHECK: frecpe.2s
-	%tmp1 = load <2 x float>* %A
-	%tmp3 = call <2 x float> @llvm.arm64.neon.frecpe.v2f32(<2 x float> %tmp1)
-	ret <2 x float> %tmp3
-}
-
-define <4 x float> @frecpe_4s(<4 x float>* %A) nounwind {
-;CHECK-LABEL: frecpe_4s:
-;CHECK: frecpe.4s
-	%tmp1 = load <4 x float>* %A
-	%tmp3 = call <4 x float> @llvm.arm64.neon.frecpe.v4f32(<4 x float> %tmp1)
-	ret <4 x float> %tmp3
-}
-
-define <2 x double> @frecpe_2d(<2 x double>* %A) nounwind {
-;CHECK-LABEL: frecpe_2d:
-;CHECK: frecpe.2d
-	%tmp1 = load <2 x double>* %A
-	%tmp3 = call <2 x double> @llvm.arm64.neon.frecpe.v2f64(<2 x double> %tmp1)
-	ret <2 x double> %tmp3
-}
-
-define float @frecpe_s(float* %A) nounwind {
-;CHECK-LABEL: frecpe_s:
-;CHECK: frecpe s0, {{s[0-9]+}}
-  %tmp1 = load float* %A
-  %tmp3 = call float @llvm.arm64.neon.frecpe.f32(float %tmp1)
-  ret float %tmp3
-}
-
-define double @frecpe_d(double* %A) nounwind {
-;CHECK-LABEL: frecpe_d:
-;CHECK: frecpe d0, {{d[0-9]+}}
-  %tmp1 = load double* %A
-  %tmp3 = call double @llvm.arm64.neon.frecpe.f64(double %tmp1)
-  ret double %tmp3
-}
-
-declare <2 x float> @llvm.arm64.neon.frecpe.v2f32(<2 x float>) nounwind readnone
-declare <4 x float> @llvm.arm64.neon.frecpe.v4f32(<4 x float>) nounwind readnone
-declare <2 x double> @llvm.arm64.neon.frecpe.v2f64(<2 x double>) nounwind readnone
-declare float @llvm.arm64.neon.frecpe.f32(float) nounwind readnone
-declare double @llvm.arm64.neon.frecpe.f64(double) nounwind readnone
-
-define float @frecpx_s(float* %A) nounwind {
-;CHECK-LABEL: frecpx_s:
-;CHECK: frecpx s0, {{s[0-9]+}}
-  %tmp1 = load float* %A
-  %tmp3 = call float @llvm.arm64.neon.frecpx.f32(float %tmp1)
-  ret float %tmp3
-}
-
-define double @frecpx_d(double* %A) nounwind {
-;CHECK-LABEL: frecpx_d:
-;CHECK: frecpx d0, {{d[0-9]+}}
-  %tmp1 = load double* %A
-  %tmp3 = call double @llvm.arm64.neon.frecpx.f64(double %tmp1)
-  ret double %tmp3
-}
-
-declare float @llvm.arm64.neon.frecpx.f32(float) nounwind readnone
-declare double @llvm.arm64.neon.frecpx.f64(double) nounwind readnone
-
-define <2 x float> @frsqrte_2s(<2 x float>* %A) nounwind {
-;CHECK-LABEL: frsqrte_2s:
-;CHECK: frsqrte.2s
-	%tmp1 = load <2 x float>* %A
-	%tmp3 = call <2 x float> @llvm.arm64.neon.frsqrte.v2f32(<2 x float> %tmp1)
-	ret <2 x float> %tmp3
-}
-
-define <4 x float> @frsqrte_4s(<4 x float>* %A) nounwind {
-;CHECK-LABEL: frsqrte_4s:
-;CHECK: frsqrte.4s
-	%tmp1 = load <4 x float>* %A
-	%tmp3 = call <4 x float> @llvm.arm64.neon.frsqrte.v4f32(<4 x float> %tmp1)
-	ret <4 x float> %tmp3
-}
-
-define <2 x double> @frsqrte_2d(<2 x double>* %A) nounwind {
-;CHECK-LABEL: frsqrte_2d:
-;CHECK: frsqrte.2d
-	%tmp1 = load <2 x double>* %A
-	%tmp3 = call <2 x double> @llvm.arm64.neon.frsqrte.v2f64(<2 x double> %tmp1)
-	ret <2 x double> %tmp3
-}
-
-define float @frsqrte_s(float* %A) nounwind {
-;CHECK-LABEL: frsqrte_s:
-;CHECK: frsqrte s0, {{s[0-9]+}}
-  %tmp1 = load float* %A
-  %tmp3 = call float @llvm.arm64.neon.frsqrte.f32(float %tmp1)
-  ret float %tmp3
-}
-
-define double @frsqrte_d(double* %A) nounwind {
-;CHECK-LABEL: frsqrte_d:
-;CHECK: frsqrte d0, {{d[0-9]+}}
-  %tmp1 = load double* %A
-  %tmp3 = call double @llvm.arm64.neon.frsqrte.f64(double %tmp1)
-  ret double %tmp3
-}
-
-declare <2 x float> @llvm.arm64.neon.frsqrte.v2f32(<2 x float>) nounwind readnone
-declare <4 x float> @llvm.arm64.neon.frsqrte.v4f32(<4 x float>) nounwind readnone
-declare <2 x double> @llvm.arm64.neon.frsqrte.v2f64(<2 x double>) nounwind readnone
-declare float @llvm.arm64.neon.frsqrte.f32(float) nounwind readnone
-declare double @llvm.arm64.neon.frsqrte.f64(double) nounwind readnone
-
-define <2 x i32> @urecpe_2s(<2 x i32>* %A) nounwind {
-;CHECK-LABEL: urecpe_2s:
-;CHECK: urecpe.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.urecpe.v2i32(<2 x i32> %tmp1)
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @urecpe_4s(<4 x i32>* %A) nounwind {
-;CHECK-LABEL: urecpe_4s:
-;CHECK: urecpe.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.urecpe.v4i32(<4 x i32> %tmp1)
-	ret <4 x i32> %tmp3
-}
-
-declare <2 x i32> @llvm.arm64.neon.urecpe.v2i32(<2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.urecpe.v4i32(<4 x i32>) nounwind readnone
-
-define <2 x i32> @ursqrte_2s(<2 x i32>* %A) nounwind {
-;CHECK-LABEL: ursqrte_2s:
-;CHECK: ursqrte.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp3 = call <2 x i32> @llvm.arm64.neon.ursqrte.v2i32(<2 x i32> %tmp1)
-	ret <2 x i32> %tmp3
-}
-
-define <4 x i32> @ursqrte_4s(<4 x i32>* %A) nounwind {
-;CHECK-LABEL: ursqrte_4s:
-;CHECK: ursqrte.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp3 = call <4 x i32> @llvm.arm64.neon.ursqrte.v4i32(<4 x i32> %tmp1)
-	ret <4 x i32> %tmp3
-}
-
-declare <2 x i32> @llvm.arm64.neon.ursqrte.v2i32(<2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.ursqrte.v4i32(<4 x i32>) nounwind readnone
-
-define float @f1(float %a, float %b) nounwind readnone optsize ssp {
-; CHECK-LABEL: f1:
-; CHECK: frsqrts s0, s0, s1
-; CHECK-NEXT: ret
-  %vrsqrtss.i = tail call float @llvm.arm64.neon.frsqrts.f32(float %a, float %b) nounwind
-  ret float %vrsqrtss.i
-}
-
-define double @f2(double %a, double %b) nounwind readnone optsize ssp {
-; CHECK-LABEL: f2:
-; CHECK: frsqrts d0, d0, d1
-; CHECK-NEXT: ret
-  %vrsqrtsd.i = tail call double @llvm.arm64.neon.frsqrts.f64(double %a, double %b) nounwind
-  ret double %vrsqrtsd.i
-}
-
-declare double @llvm.arm64.neon.frsqrts.f64(double, double) nounwind readnone
-declare float @llvm.arm64.neon.frsqrts.f32(float, float) nounwind readnone
diff --git a/test/CodeGen/ARM64/vsra.ll b/test/CodeGen/ARM64/vsra.ll
deleted file mode 100644
index 3611eb3..0000000
--- a/test/CodeGen/ARM64/vsra.ll
+++ /dev/null
@@ -1,150 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-define <8 x i8> @vsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: vsras8:
-;CHECK: ssra.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = ashr <8 x i8> %tmp2, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
-        %tmp4 = add <8 x i8> %tmp1, %tmp3
-	ret <8 x i8> %tmp4
-}
-
-define <4 x i16> @vsras16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: vsras16:
-;CHECK: ssra.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = ashr <4 x i16> %tmp2, < i16 15, i16 15, i16 15, i16 15 >
-        %tmp4 = add <4 x i16> %tmp1, %tmp3
-	ret <4 x i16> %tmp4
-}
-
-define <2 x i32> @vsras32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: vsras32:
-;CHECK: ssra.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = ashr <2 x i32> %tmp2, < i32 31, i32 31 >
-        %tmp4 = add <2 x i32> %tmp1, %tmp3
-	ret <2 x i32> %tmp4
-}
-
-define <16 x i8> @vsraQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: vsraQs8:
-;CHECK: ssra.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
-	%tmp3 = ashr <16 x i8> %tmp2, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
-        %tmp4 = add <16 x i8> %tmp1, %tmp3
-	ret <16 x i8> %tmp4
-}
-
-define <8 x i16> @vsraQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: vsraQs16:
-;CHECK: ssra.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
-	%tmp3 = ashr <8 x i16> %tmp2, < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >
-        %tmp4 = add <8 x i16> %tmp1, %tmp3
-	ret <8 x i16> %tmp4
-}
-
-define <4 x i32> @vsraQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: vsraQs32:
-;CHECK: ssra.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
-	%tmp3 = ashr <4 x i32> %tmp2, < i32 31, i32 31, i32 31, i32 31 >
-        %tmp4 = add <4 x i32> %tmp1, %tmp3
-	ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @vsraQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK-LABEL: vsraQs64:
-;CHECK: ssra.2d
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
-	%tmp3 = ashr <2 x i64> %tmp2, < i64 63, i64 63 >
-        %tmp4 = add <2 x i64> %tmp1, %tmp3
-	ret <2 x i64> %tmp4
-}
-
-define <8 x i8> @vsrau8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: vsrau8:
-;CHECK: usra.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = lshr <8 x i8> %tmp2, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
-        %tmp4 = add <8 x i8> %tmp1, %tmp3
-	ret <8 x i8> %tmp4
-}
-
-define <4 x i16> @vsrau16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: vsrau16:
-;CHECK: usra.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = lshr <4 x i16> %tmp2, < i16 15, i16 15, i16 15, i16 15 >
-        %tmp4 = add <4 x i16> %tmp1, %tmp3
-	ret <4 x i16> %tmp4
-}
-
-define <2 x i32> @vsrau32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: vsrau32:
-;CHECK: usra.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = lshr <2 x i32> %tmp2, < i32 31, i32 31 >
-        %tmp4 = add <2 x i32> %tmp1, %tmp3
-	ret <2 x i32> %tmp4
-}
-
-
-define <16 x i8> @vsraQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: vsraQu8:
-;CHECK: usra.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
-	%tmp3 = lshr <16 x i8> %tmp2, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
-        %tmp4 = add <16 x i8> %tmp1, %tmp3
-	ret <16 x i8> %tmp4
-}
-
-define <8 x i16> @vsraQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: vsraQu16:
-;CHECK: usra.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
-	%tmp3 = lshr <8 x i16> %tmp2, < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >
-        %tmp4 = add <8 x i16> %tmp1, %tmp3
-	ret <8 x i16> %tmp4
-}
-
-define <4 x i32> @vsraQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: vsraQu32:
-;CHECK: usra.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
-	%tmp3 = lshr <4 x i32> %tmp2, < i32 31, i32 31, i32 31, i32 31 >
-        %tmp4 = add <4 x i32> %tmp1, %tmp3
-	ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @vsraQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK-LABEL: vsraQu64:
-;CHECK: usra.2d
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
-	%tmp3 = lshr <2 x i64> %tmp2, < i64 63, i64 63 >
-        %tmp4 = add <2 x i64> %tmp1, %tmp3
-	ret <2 x i64> %tmp4
-}
-
-define <1 x i64> @vsra_v1i64(<1 x i64> %A, <1 x i64> %B) nounwind {
-; CHECK-LABEL: vsra_v1i64:
-; CHECK: ssra d0, d1, #63
-  %tmp3 = ashr <1 x i64> %B, < i64 63 >
-  %tmp4 = add <1 x i64> %A, %tmp3
-  ret <1 x i64> %tmp4
-}
diff --git a/test/CodeGen/ARM64/vsub.ll b/test/CodeGen/ARM64/vsub.ll
deleted file mode 100644
index 5c7e84f..0000000
--- a/test/CodeGen/ARM64/vsub.ll
+++ /dev/null
@@ -1,417 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-define <8 x i8> @subhn8b(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: subhn8b:
-;CHECK: subhn.8b
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i16>* %B
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.subhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
-        ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @subhn4h(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: subhn4h:
-;CHECK: subhn.4h
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i32>* %B
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.subhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
-        ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @subhn2s(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK-LABEL: subhn2s:
-;CHECK: subhn.2s
-        %tmp1 = load <2 x i64>* %A
-        %tmp2 = load <2 x i64>* %B
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.subhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
-        ret <2 x i32> %tmp3
-}
-
-define <16 x i8> @subhn2_16b(<8 x i16> %a, <8 x i16> %b) nounwind  {
-;CHECK-LABEL: subhn2_16b:
-;CHECK: subhn.8b
-;CHECK-NEXT: subhn2.16b
-  %vsubhn2.i = tail call <8 x i8> @llvm.arm64.neon.subhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind
-  %vsubhn_high2.i = tail call <8 x i8> @llvm.arm64.neon.subhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind
-  %res = shufflevector <8 x i8> %vsubhn2.i, <8 x i8> %vsubhn_high2.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  ret <16 x i8> %res
-}
-
-define <8 x i16> @subhn2_8h(<4 x i32> %a, <4 x i32> %b) nounwind  {
-;CHECK-LABEL: subhn2_8h:
-;CHECK: subhn.4h
-;CHECK-NEXT: subhn2.8h
-  %vsubhn2.i = tail call <4 x i16> @llvm.arm64.neon.subhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind
-  %vsubhn_high3.i = tail call <4 x i16> @llvm.arm64.neon.subhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind
-  %res = shufflevector <4 x i16> %vsubhn2.i, <4 x i16> %vsubhn_high3.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  ret <8 x i16> %res
-}
-
-define <4 x i32> @subhn2_4s(<2 x i64> %a, <2 x i64> %b) nounwind  {
-;CHECK-LABEL: subhn2_4s:
-;CHECK: subhn.2s
-;CHECK-NEXT: subhn2.4s
-  %vsubhn2.i = tail call <2 x i32> @llvm.arm64.neon.subhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind
-  %vsubhn_high3.i = tail call <2 x i32> @llvm.arm64.neon.subhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind
-  %res = shufflevector <2 x i32> %vsubhn2.i, <2 x i32> %vsubhn_high3.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  ret <4 x i32> %res
-}
-
-declare <2 x i32> @llvm.arm64.neon.subhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.subhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
-declare <8 x i8> @llvm.arm64.neon.subhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
-
-define <8 x i8> @rsubhn8b(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: rsubhn8b:
-;CHECK: rsubhn.8b
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i16>* %B
-        %tmp3 = call <8 x i8> @llvm.arm64.neon.rsubhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
-        ret <8 x i8> %tmp3
-}
-
-define <4 x i16> @rsubhn4h(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: rsubhn4h:
-;CHECK: rsubhn.4h
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i32>* %B
-        %tmp3 = call <4 x i16> @llvm.arm64.neon.rsubhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
-        ret <4 x i16> %tmp3
-}
-
-define <2 x i32> @rsubhn2s(<2 x i64>* %A, <2 x i64>* %B) nounwind {
-;CHECK-LABEL: rsubhn2s:
-;CHECK: rsubhn.2s
-        %tmp1 = load <2 x i64>* %A
-        %tmp2 = load <2 x i64>* %B
-        %tmp3 = call <2 x i32> @llvm.arm64.neon.rsubhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
-        ret <2 x i32> %tmp3
-}
-
-define <16 x i8> @rsubhn2_16b(<8 x i16> %a, <8 x i16> %b) nounwind  {
-;CHECK-LABEL: rsubhn2_16b:
-;CHECK: rsubhn.8b
-;CHECK-NEXT: rsubhn2.16b
-  %vrsubhn2.i = tail call <8 x i8> @llvm.arm64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind
-  %vrsubhn_high2.i = tail call <8 x i8> @llvm.arm64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind
-  %res = shufflevector <8 x i8> %vrsubhn2.i, <8 x i8> %vrsubhn_high2.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  ret <16 x i8> %res
-}
-
-define <8 x i16> @rsubhn2_8h(<4 x i32> %a, <4 x i32> %b) nounwind  {
-;CHECK-LABEL: rsubhn2_8h:
-;CHECK: rsubhn.4h
-;CHECK-NEXT: rsubhn2.8h
-  %vrsubhn2.i = tail call <4 x i16> @llvm.arm64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind
-  %vrsubhn_high3.i = tail call <4 x i16> @llvm.arm64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind
-  %res = shufflevector <4 x i16> %vrsubhn2.i, <4 x i16> %vrsubhn_high3.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  ret <8 x i16> %res
-}
-
-define <4 x i32> @rsubhn2_4s(<2 x i64> %a, <2 x i64> %b) nounwind  {
-;CHECK-LABEL: rsubhn2_4s:
-;CHECK: rsubhn.2s
-;CHECK-NEXT: rsubhn2.4s
-  %vrsubhn2.i = tail call <2 x i32> @llvm.arm64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind
-  %vrsubhn_high3.i = tail call <2 x i32> @llvm.arm64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind
-  %res = shufflevector <2 x i32> %vrsubhn2.i, <2 x i32> %vrsubhn_high3.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  ret <4 x i32> %res
-}
-
-declare <2 x i32> @llvm.arm64.neon.rsubhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
-declare <4 x i16> @llvm.arm64.neon.rsubhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
-declare <8 x i8> @llvm.arm64.neon.rsubhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
-
-define <8 x i16> @ssubl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: ssubl8h:
-;CHECK: ssubl.8h
-        %tmp1 = load <8 x i8>* %A
-        %tmp2 = load <8 x i8>* %B
-  %tmp3 = sext <8 x i8> %tmp1 to <8 x i16>
-  %tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
-  %tmp5 = sub <8 x i16> %tmp3, %tmp4
-        ret <8 x i16> %tmp5
-}
-
-define <4 x i32> @ssubl4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: ssubl4s:
-;CHECK: ssubl.4s
-        %tmp1 = load <4 x i16>* %A
-        %tmp2 = load <4 x i16>* %B
-  %tmp3 = sext <4 x i16> %tmp1 to <4 x i32>
-  %tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
-  %tmp5 = sub <4 x i32> %tmp3, %tmp4
-        ret <4 x i32> %tmp5
-}
-
-define <2 x i64> @ssubl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: ssubl2d:
-;CHECK: ssubl.2d
-        %tmp1 = load <2 x i32>* %A
-        %tmp2 = load <2 x i32>* %B
-  %tmp3 = sext <2 x i32> %tmp1 to <2 x i64>
-  %tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
-  %tmp5 = sub <2 x i64> %tmp3, %tmp4
-        ret <2 x i64> %tmp5
-}
-
-define <8 x i16> @ssubl2_8h(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: ssubl2_8h:
-;CHECK: ssubl2.8h
-        %tmp1 = load <16 x i8>* %A
-        %high1 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-        %ext1 = sext <8 x i8> %high1 to <8 x i16>
-
-        %tmp2 = load <16 x i8>* %B
-        %high2 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-        %ext2 = sext <8 x i8> %high2 to <8 x i16>
-
-        %res = sub <8 x i16> %ext1, %ext2
-        ret <8 x i16> %res
-}
-
-define <4 x i32> @ssubl2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: ssubl2_4s:
-;CHECK: ssubl2.4s
-        %tmp1 = load <8 x i16>* %A
-        %high1 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-        %ext1 = sext <4 x i16> %high1 to <4 x i32>
-
-        %tmp2 = load <8 x i16>* %B
-        %high2 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-        %ext2 = sext <4 x i16> %high2 to <4 x i32>
-
-        %res = sub <4 x i32> %ext1, %ext2
-        ret <4 x i32> %res
-}
-
-define <2 x i64> @ssubl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: ssubl2_2d:
-;CHECK: ssubl2.2d
-        %tmp1 = load <4 x i32>* %A
-        %high1 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-        %ext1 = sext <2 x i32> %high1 to <2 x i64>
-
-        %tmp2 = load <4 x i32>* %B
-        %high2 = shufflevector <4 x i32> %tmp2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-        %ext2 = sext <2 x i32> %high2 to <2 x i64>
-
-        %res = sub <2 x i64> %ext1, %ext2
-        ret <2 x i64> %res
-}
-
-define <8 x i16> @usubl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: usubl8h:
-;CHECK: usubl.8h
-  %tmp1 = load <8 x i8>* %A
-  %tmp2 = load <8 x i8>* %B
-  %tmp3 = zext <8 x i8> %tmp1 to <8 x i16>
-  %tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
-  %tmp5 = sub <8 x i16> %tmp3, %tmp4
-  ret <8 x i16> %tmp5
-}
-
-define <4 x i32> @usubl4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: usubl4s:
-;CHECK: usubl.4s
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
-  %tmp3 = zext <4 x i16> %tmp1 to <4 x i32>
-  %tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
-  %tmp5 = sub <4 x i32> %tmp3, %tmp4
-  ret <4 x i32> %tmp5
-}
-
-define <2 x i64> @usubl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: usubl2d:
-;CHECK: usubl.2d
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
-  %tmp3 = zext <2 x i32> %tmp1 to <2 x i64>
-  %tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
-  %tmp5 = sub <2 x i64> %tmp3, %tmp4
-  ret <2 x i64> %tmp5
-}
-
-define <8 x i16> @usubl2_8h(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: usubl2_8h:
-;CHECK: usubl2.8h
-  %tmp1 = load <16 x i8>* %A
-  %high1 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %ext1 = zext <8 x i8> %high1 to <8 x i16>
-
-  %tmp2 = load <16 x i8>* %B
-  %high2 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %ext2 = zext <8 x i8> %high2 to <8 x i16>
-
-  %res = sub <8 x i16> %ext1, %ext2
-  ret <8 x i16> %res
-}
-
-define <4 x i32> @usubl2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: usubl2_4s:
-;CHECK: usubl2.4s
-  %tmp1 = load <8 x i16>* %A
-  %high1 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %ext1 = zext <4 x i16> %high1 to <4 x i32>
-
-  %tmp2 = load <8 x i16>* %B
-  %high2 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %ext2 = zext <4 x i16> %high2 to <4 x i32>
-
-  %res = sub <4 x i32> %ext1, %ext2
-  ret <4 x i32> %res
-}
-
-define <2 x i64> @usubl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: usubl2_2d:
-;CHECK: usubl2.2d
-  %tmp1 = load <4 x i32>* %A
-  %high1 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %ext1 = zext <2 x i32> %high1 to <2 x i64>
-
-  %tmp2 = load <4 x i32>* %B
-  %high2 = shufflevector <4 x i32> %tmp2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %ext2 = zext <2 x i32> %high2 to <2 x i64>
-
-  %res = sub <2 x i64> %ext1, %ext2
-  ret <2 x i64> %res
-}
-
-define <8 x i16> @ssubw8h(<8 x i16>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: ssubw8h:
-;CHECK: ssubw.8h
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i8>* %B
-  %tmp3 = sext <8 x i8> %tmp2 to <8 x i16>
-  %tmp4 = sub <8 x i16> %tmp1, %tmp3
-        ret <8 x i16> %tmp4
-}
-
-define <4 x i32> @ssubw4s(<4 x i32>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: ssubw4s:
-;CHECK: ssubw.4s
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i16>* %B
-  %tmp3 = sext <4 x i16> %tmp2 to <4 x i32>
-  %tmp4 = sub <4 x i32> %tmp1, %tmp3
-        ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @ssubw2d(<2 x i64>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: ssubw2d:
-;CHECK: ssubw.2d
-        %tmp1 = load <2 x i64>* %A
-        %tmp2 = load <2 x i32>* %B
-  %tmp3 = sext <2 x i32> %tmp2 to <2 x i64>
-  %tmp4 = sub <2 x i64> %tmp1, %tmp3
-        ret <2 x i64> %tmp4
-}
-
-define <8 x i16> @ssubw2_8h(<8 x i16>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: ssubw2_8h:
-;CHECK: ssubw2.8h
-        %tmp1 = load <8 x i16>* %A
-
-        %tmp2 = load <16 x i8>* %B
-        %high2 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-        %ext2 = sext <8 x i8> %high2 to <8 x i16>
-
-        %res = sub <8 x i16> %tmp1, %ext2
-        ret <8 x i16> %res
-}
-
-define <4 x i32> @ssubw2_4s(<4 x i32>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: ssubw2_4s:
-;CHECK: ssubw2.4s
-        %tmp1 = load <4 x i32>* %A
-
-        %tmp2 = load <8 x i16>* %B
-        %high2 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-        %ext2 = sext <4 x i16> %high2 to <4 x i32>
-
-        %res = sub <4 x i32> %tmp1, %ext2
-        ret <4 x i32> %res
-}
-
-define <2 x i64> @ssubw2_2d(<2 x i64>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: ssubw2_2d:
-;CHECK: ssubw2.2d
-        %tmp1 = load <2 x i64>* %A
-
-        %tmp2 = load <4 x i32>* %B
-        %high2 = shufflevector <4 x i32> %tmp2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-        %ext2 = sext <2 x i32> %high2 to <2 x i64>
-
-        %res = sub <2 x i64> %tmp1, %ext2
-        ret <2 x i64> %res
-}
-
-define <8 x i16> @usubw8h(<8 x i16>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: usubw8h:
-;CHECK: usubw.8h
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i8>* %B
-  %tmp3 = zext <8 x i8> %tmp2 to <8 x i16>
-  %tmp4 = sub <8 x i16> %tmp1, %tmp3
-        ret <8 x i16> %tmp4
-}
-
-define <4 x i32> @usubw4s(<4 x i32>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: usubw4s:
-;CHECK: usubw.4s
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i16>* %B
-  %tmp3 = zext <4 x i16> %tmp2 to <4 x i32>
-  %tmp4 = sub <4 x i32> %tmp1, %tmp3
-        ret <4 x i32> %tmp4
-}
-
-define <2 x i64> @usubw2d(<2 x i64>* %A, <2 x i32>* %B) nounwind {
-;CHECK-LABEL: usubw2d:
-;CHECK: usubw.2d
-        %tmp1 = load <2 x i64>* %A
-        %tmp2 = load <2 x i32>* %B
-  %tmp3 = zext <2 x i32> %tmp2 to <2 x i64>
-  %tmp4 = sub <2 x i64> %tmp1, %tmp3
-        ret <2 x i64> %tmp4
-}
-
-define <8 x i16> @usubw2_8h(<8 x i16>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: usubw2_8h:
-;CHECK: usubw2.8h
-        %tmp1 = load <8 x i16>* %A
-
-        %tmp2 = load <16 x i8>* %B
-        %high2 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-        %ext2 = zext <8 x i8> %high2 to <8 x i16>
-
-        %res = sub <8 x i16> %tmp1, %ext2
-        ret <8 x i16> %res
-}
-
-define <4 x i32> @usubw2_4s(<4 x i32>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: usubw2_4s:
-;CHECK: usubw2.4s
-        %tmp1 = load <4 x i32>* %A
-
-        %tmp2 = load <8 x i16>* %B
-        %high2 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-        %ext2 = zext <4 x i16> %high2 to <4 x i32>
-
-        %res = sub <4 x i32> %tmp1, %ext2
-        ret <4 x i32> %res
-}
-
-define <2 x i64> @usubw2_2d(<2 x i64>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: usubw2_2d:
-;CHECK: usubw2.2d
-        %tmp1 = load <2 x i64>* %A
-
-        %tmp2 = load <4 x i32>* %B
-        %high2 = shufflevector <4 x i32> %tmp2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-        %ext2 = zext <2 x i32> %high2 to <2 x i64>
-
-        %res = sub <2 x i64> %tmp1, %ext2
-        ret <2 x i64> %res
-}
diff --git a/test/CodeGen/ARM64/xaluo.ll b/test/CodeGen/ARM64/xaluo.ll
deleted file mode 100644
index 6a8520d..0000000
--- a/test/CodeGen/ARM64/xaluo.ll
+++ /dev/null
@@ -1,524 +0,0 @@
-; RUN: llc < %s -march=arm64 | FileCheck %s
-
-;
-; Get the actual value of the overflow bit.
-;
-define i1 @saddo.i32(i32 %v1, i32 %v2, i32* %res) {
-entry:
-; CHECK-LABEL:  saddo.i32
-; CHECK:        adds w8, w0, w1
-; CHECK-NEXT:   csinc w0, wzr, wzr, vc
-  %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
-  %val = extractvalue {i32, i1} %t, 0
-  %obit = extractvalue {i32, i1} %t, 1
-  store i32 %val, i32* %res
-  ret i1 %obit
-}
-
-define i1 @saddo.i64(i64 %v1, i64 %v2, i64* %res) {
-entry:
-; CHECK-LABEL:  saddo.i64
-; CHECK:        adds x8, x0, x1
-; CHECK-NEXT:   csinc w0, wzr, wzr, vc
-  %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2)
-  %val = extractvalue {i64, i1} %t, 0
-  %obit = extractvalue {i64, i1} %t, 1
-  store i64 %val, i64* %res
-  ret i1 %obit
-}
-
-define i1 @uaddo.i32(i32 %v1, i32 %v2, i32* %res) {
-entry:
-; CHECK-LABEL:  uaddo.i32
-; CHECK:        adds w8, w0, w1
-; CHECK-NEXT:   csinc w0, wzr, wzr, cc
-  %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)
-  %val = extractvalue {i32, i1} %t, 0
-  %obit = extractvalue {i32, i1} %t, 1
-  store i32 %val, i32* %res
-  ret i1 %obit
-}
-
-define i1 @uaddo.i64(i64 %v1, i64 %v2, i64* %res) {
-entry:
-; CHECK-LABEL:  uaddo.i64
-; CHECK:        adds x8, x0, x1
-; CHECK-NEXT:   csinc w0, wzr, wzr, cc
-  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 %v2)
-  %val = extractvalue {i64, i1} %t, 0
-  %obit = extractvalue {i64, i1} %t, 1
-  store i64 %val, i64* %res
-  ret i1 %obit
-}
-
-define i1 @ssubo.i32(i32 %v1, i32 %v2, i32* %res) {
-entry:
-; CHECK-LABEL:  ssubo.i32
-; CHECK:        subs w8, w0, w1
-; CHECK-NEXT:   csinc w0, wzr, wzr, vc
-  %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2)
-  %val = extractvalue {i32, i1} %t, 0
-  %obit = extractvalue {i32, i1} %t, 1
-  store i32 %val, i32* %res
-  ret i1 %obit
-}
-
-define i1 @ssubo.i64(i64 %v1, i64 %v2, i64* %res) {
-entry:
-; CHECK-LABEL:  ssubo.i64
-; CHECK:        subs x8, x0, x1
-; CHECK-NEXT:   csinc w0, wzr, wzr, vc
-  %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %v1, i64 %v2)
-  %val = extractvalue {i64, i1} %t, 0
-  %obit = extractvalue {i64, i1} %t, 1
-  store i64 %val, i64* %res
-  ret i1 %obit
-}
-
-define i1 @usubo.i32(i32 %v1, i32 %v2, i32* %res) {
-entry:
-; CHECK-LABEL:  usubo.i32
-; CHECK:        subs w8, w0, w1
-; CHECK-NEXT:   csinc w0, wzr, wzr, cs
-  %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2)
-  %val = extractvalue {i32, i1} %t, 0
-  %obit = extractvalue {i32, i1} %t, 1
-  store i32 %val, i32* %res
-  ret i1 %obit
-}
-
-define i1 @usubo.i64(i64 %v1, i64 %v2, i64* %res) {
-entry:
-; CHECK-LABEL:  usubo.i64
-; CHECK:        subs x8, x0, x1
-; CHECK-NEXT:   csinc w0, wzr, wzr, cs
-  %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %v1, i64 %v2)
-  %val = extractvalue {i64, i1} %t, 0
-  %obit = extractvalue {i64, i1} %t, 1
-  store i64 %val, i64* %res
-  ret i1 %obit
-}
-
-define i1 @smulo.i32(i32 %v1, i32 %v2, i32* %res) {
-entry:
-; CHECK-LABEL:  smulo.i32
-; CHECK:        smull x8, w0, w1
-; CHECK-NEXT:   lsr x9, x8, #32
-; CHECK-NEXT:   cmp w9, w8, asr #31
-; CHECK-NEXT:   csinc w0, wzr, wzr, eq
-  %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
-  %val = extractvalue {i32, i1} %t, 0
-  %obit = extractvalue {i32, i1} %t, 1
-  store i32 %val, i32* %res
-  ret i1 %obit
-}
-
-define i1 @smulo.i64(i64 %v1, i64 %v2, i64* %res) {
-entry:
-; CHECK-LABEL:  smulo.i64
-; CHECK:        mul x8, x0, x1
-; CHECK-NEXT:   smulh x9, x0, x1
-; CHECK-NEXT:   cmp x9, x8, asr #63
-; CHECK-NEXT:   csinc w0, wzr, wzr, eq
-  %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
-  %val = extractvalue {i64, i1} %t, 0
-  %obit = extractvalue {i64, i1} %t, 1
-  store i64 %val, i64* %res
-  ret i1 %obit
-}
-
-define i1 @umulo.i32(i32 %v1, i32 %v2, i32* %res) {
-entry:
-; CHECK-LABEL:  umulo.i32
-; CHECK:        umull x8, w0, w1
-; CHECK-NEXT:   cmp xzr, x8, lsr #32
-; CHECK-NEXT:   csinc w0, wzr, wzr, eq
-  %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
-  %val = extractvalue {i32, i1} %t, 0
-  %obit = extractvalue {i32, i1} %t, 1
-  store i32 %val, i32* %res
-  ret i1 %obit
-}
-
-define i1 @umulo.i64(i64 %v1, i64 %v2, i64* %res) {
-entry:
-; CHECK-LABEL:  umulo.i64
-; CHECK:        umulh x8, x0, x1
-; CHECK-NEXT:   cmp xzr, x8
-; CHECK-NEXT:   csinc w8, wzr, wzr, eq
-; CHECK-NEXT:   mul x9, x0, x1
-  %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2)
-  %val = extractvalue {i64, i1} %t, 0
-  %obit = extractvalue {i64, i1} %t, 1
-  store i64 %val, i64* %res
-  ret i1 %obit
-}
-
-
-;
-; Check the use of the overflow bit in combination with a select instruction.
-;
-define i32 @saddo.select.i32(i32 %v1, i32 %v2) {
-entry:
-; CHECK-LABEL:  saddo.select.i32
-; CHECK:        cmn w0, w1
-; CHECK-NEXT:   csel w0, w0, w1, vs
-  %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
-  %obit = extractvalue {i32, i1} %t, 1
-  %ret = select i1 %obit, i32 %v1, i32 %v2
-  ret i32 %ret
-}
-
-define i64 @saddo.select.i64(i64 %v1, i64 %v2) {
-entry:
-; CHECK-LABEL:  saddo.select.i64
-; CHECK:        cmn x0, x1
-; CHECK-NEXT:   csel x0, x0, x1, vs
-  %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2)
-  %obit = extractvalue {i64, i1} %t, 1
-  %ret = select i1 %obit, i64 %v1, i64 %v2
-  ret i64 %ret
-}
-
-define i32 @uaddo.select.i32(i32 %v1, i32 %v2) {
-entry:
-; CHECK-LABEL:  uaddo.select.i32
-; CHECK:        cmn w0, w1
-; CHECK-NEXT:   csel w0, w0, w1, cs
-  %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)
-  %obit = extractvalue {i32, i1} %t, 1
-  %ret = select i1 %obit, i32 %v1, i32 %v2
-  ret i32 %ret
-}
-
-define i64 @uaddo.select.i64(i64 %v1, i64 %v2) {
-entry:
-; CHECK-LABEL:  uaddo.select.i64
-; CHECK:        cmn x0, x1
-; CHECK-NEXT:   csel x0, x0, x1, cs
-  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 %v2)
-  %obit = extractvalue {i64, i1} %t, 1
-  %ret = select i1 %obit, i64 %v1, i64 %v2
-  ret i64 %ret
-}
-
-define i32 @ssubo.select.i32(i32 %v1, i32 %v2) {
-entry:
-; CHECK-LABEL:  ssubo.select.i32
-; CHECK:        cmp w0, w1
-; CHECK-NEXT:   csel w0, w0, w1, vs
-  %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2)
-  %obit = extractvalue {i32, i1} %t, 1
-  %ret = select i1 %obit, i32 %v1, i32 %v2
-  ret i32 %ret
-}
-
-define i64 @ssubo.select.i64(i64 %v1, i64 %v2) {
-entry:
-; CHECK-LABEL:  ssubo.select.i64
-; CHECK:        cmp x0, x1
-; CHECK-NEXT:   csel x0, x0, x1, vs
-  %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %v1, i64 %v2)
-  %obit = extractvalue {i64, i1} %t, 1
-  %ret = select i1 %obit, i64 %v1, i64 %v2
-  ret i64 %ret
-}
-
-define i32 @usubo.select.i32(i32 %v1, i32 %v2) {
-entry:
-; CHECK-LABEL:  usubo.select.i32
-; CHECK:        cmp w0, w1
-; CHECK-NEXT:   csel w0, w0, w1, cc
-  %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2)
-  %obit = extractvalue {i32, i1} %t, 1
-  %ret = select i1 %obit, i32 %v1, i32 %v2
-  ret i32 %ret
-}
-
-define i64 @usubo.select.i64(i64 %v1, i64 %v2) {
-entry:
-; CHECK-LABEL:  usubo.select.i64
-; CHECK:        cmp x0, x1
-; CHECK-NEXT:   csel x0, x0, x1, cc
-  %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %v1, i64 %v2)
-  %obit = extractvalue {i64, i1} %t, 1
-  %ret = select i1 %obit, i64 %v1, i64 %v2
-  ret i64 %ret
-}
-
-define i32 @smulo.select.i32(i32 %v1, i32 %v2) {
-entry:
-; CHECK-LABEL:  smulo.select.i32
-; CHECK:        smull    x8, w0, w1
-; CHECK-NEXT:   lsr     x9, x8, #32
-; CHECK-NEXT:   cmp     w9, w8, asr #31
-; CHECK-NEXT:   csel    w0, w0, w1, ne
-  %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
-  %obit = extractvalue {i32, i1} %t, 1
-  %ret = select i1 %obit, i32 %v1, i32 %v2
-  ret i32 %ret
-}
-
-define i64 @smulo.select.i64(i64 %v1, i64 %v2) {
-entry:
-; CHECK-LABEL:  smulo.select.i64
-; CHECK:        mul      x8, x0, x1
-; CHECK-NEXT:   smulh   x9, x0, x1
-; CHECK-NEXT:   cmp     x9, x8, asr #63
-; CHECK-NEXT:   csel    x0, x0, x1, ne
-  %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
-  %obit = extractvalue {i64, i1} %t, 1
-  %ret = select i1 %obit, i64 %v1, i64 %v2
-  ret i64 %ret
-}
-
-define i32 @umulo.select.i32(i32 %v1, i32 %v2) {
-entry:
-; CHECK-LABEL:  umulo.select.i32
-; CHECK:        umull    x8, w0, w1
-; CHECK-NEXT:   cmp     xzr, x8, lsr #32
-; CHECK-NEXT:   csel    w0, w0, w1, ne
-  %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
-  %obit = extractvalue {i32, i1} %t, 1
-  %ret = select i1 %obit, i32 %v1, i32 %v2
-  ret i32 %ret
-}
-
-define i64 @umulo.select.i64(i64 %v1, i64 %v2) {
-entry:
-; CHECK-LABEL:  umulo.select.i64
-; CHECK:        umulh   x8, x0, x1
-; CHECK-NEXT:   cmp     xzr, x8
-; CHECK-NEXT:   csel    x0, x0, x1, ne
-  %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2)
-  %obit = extractvalue {i64, i1} %t, 1
-  %ret = select i1 %obit, i64 %v1, i64 %v2
-  ret i64 %ret
-}
-
-
-;
-; Check the use of the overflow bit in combination with a branch instruction.
-;
-define i1 @saddo.br.i32(i32 %v1, i32 %v2) {
-entry:
-; CHECK-LABEL:  saddo.br.i32
-; CHECK:        cmn w0, w1
-; CHECK-NEXT:   b.vc
-  %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
-  %val = extractvalue {i32, i1} %t, 0
-  %obit = extractvalue {i32, i1} %t, 1
-  br i1 %obit, label %overflow, label %continue
-
-overflow:
-  ret i1 false
-
-continue:
-  ret i1 true
-}
-
-define i1 @saddo.br.i64(i64 %v1, i64 %v2) {
-entry:
-; CHECK-LABEL:  saddo.br.i64
-; CHECK:        cmn x0, x1
-; CHECK-NEXT:   b.vc
-  %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2)
-  %val = extractvalue {i64, i1} %t, 0
-  %obit = extractvalue {i64, i1} %t, 1
-  br i1 %obit, label %overflow, label %continue
-
-overflow:
-  ret i1 false
-
-continue:
-  ret i1 true
-}
-
-define i1 @uaddo.br.i32(i32 %v1, i32 %v2) {
-entry:
-; CHECK-LABEL:  uaddo.br.i32
-; CHECK:        cmn w0, w1
-; CHECK-NEXT:   b.cc
-  %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)
-  %val = extractvalue {i32, i1} %t, 0
-  %obit = extractvalue {i32, i1} %t, 1
-  br i1 %obit, label %overflow, label %continue
-
-overflow:
-  ret i1 false
-
-continue:
-  ret i1 true
-}
-
-define i1 @uaddo.br.i64(i64 %v1, i64 %v2) {
-entry:
-; CHECK-LABEL:  uaddo.br.i64
-; CHECK:        cmn x0, x1
-; CHECK-NEXT:   b.cc
-  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 %v2)
-  %val = extractvalue {i64, i1} %t, 0
-  %obit = extractvalue {i64, i1} %t, 1
-  br i1 %obit, label %overflow, label %continue
-
-overflow:
-  ret i1 false
-
-continue:
-  ret i1 true
-}
-
-define i1 @ssubo.br.i32(i32 %v1, i32 %v2) {
-entry:
-; CHECK-LABEL:  ssubo.br.i32
-; CHECK:        cmp w0, w1
-; CHECK-NEXT:   b.vc
-  %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2)
-  %val = extractvalue {i32, i1} %t, 0
-  %obit = extractvalue {i32, i1} %t, 1
-  br i1 %obit, label %overflow, label %continue
-
-overflow:
-  ret i1 false
-
-continue:
-  ret i1 true
-}
-
-define i1 @ssubo.br.i64(i64 %v1, i64 %v2) {
-entry:
-; CHECK-LABEL:  ssubo.br.i64
-; CHECK:        cmp x0, x1
-; CHECK-NEXT:   b.vc
-  %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %v1, i64 %v2)
-  %val = extractvalue {i64, i1} %t, 0
-  %obit = extractvalue {i64, i1} %t, 1
-  br i1 %obit, label %overflow, label %continue
-
-overflow:
-  ret i1 false
-
-continue:
-  ret i1 true
-}
-
-define i1 @usubo.br.i32(i32 %v1, i32 %v2) {
-entry:
-; CHECK-LABEL:  usubo.br.i32
-; CHECK:        cmp w0, w1
-; CHECK-NEXT:   b.cs
-  %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2)
-  %val = extractvalue {i32, i1} %t, 0
-  %obit = extractvalue {i32, i1} %t, 1
-  br i1 %obit, label %overflow, label %continue
-
-overflow:
-  ret i1 false
-
-continue:
-  ret i1 true
-}
-
-define i1 @usubo.br.i64(i64 %v1, i64 %v2) {
-entry:
-; CHECK-LABEL:  usubo.br.i64
-; CHECK:        cmp x0, x1
-; CHECK-NEXT:   b.cs
-  %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %v1, i64 %v2)
-  %val = extractvalue {i64, i1} %t, 0
-  %obit = extractvalue {i64, i1} %t, 1
-  br i1 %obit, label %overflow, label %continue
-
-overflow:
-  ret i1 false
-
-continue:
-  ret i1 true
-}
-
-define i1 @smulo.br.i32(i32 %v1, i32 %v2) {
-entry:
-; CHECK-LABEL:  smulo.br.i32
-; CHECK:        smull    x8, w0, w1
-; CHECK-NEXT:   lsr     x9, x8, #32
-; CHECK-NEXT:   cmp     w9, w8, asr #31
-; CHECK-NEXT:   b.eq
-  %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
-  %val = extractvalue {i32, i1} %t, 0
-  %obit = extractvalue {i32, i1} %t, 1
-  br i1 %obit, label %overflow, label %continue
-
-overflow:
-  ret i1 false
-
-continue:
-  ret i1 true
-}
-
-define i1 @smulo.br.i64(i64 %v1, i64 %v2) {
-entry:
-; CHECK-LABEL:  smulo.br.i64
-; CHECK:        mul      x8, x0, x1
-; CHECK-NEXT:   smulh   x9, x0, x1
-; CHECK-NEXT:   cmp     x9, x8, asr #63
-; CHECK-NEXT:   b.eq
-  %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
-  %val = extractvalue {i64, i1} %t, 0
-  %obit = extractvalue {i64, i1} %t, 1
-  br i1 %obit, label %overflow, label %continue
-
-overflow:
-  ret i1 false
-
-continue:
-  ret i1 true
-}
-
-define i1 @umulo.br.i32(i32 %v1, i32 %v2) {
-entry:
-; CHECK-LABEL:  umulo.br.i32
-; CHECK:        umull    x8, w0, w1
-; CHECK-NEXT:   cmp     xzr, x8, lsr #32
-; CHECK-NEXT:   b.eq
-  %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
-  %val = extractvalue {i32, i1} %t, 0
-  %obit = extractvalue {i32, i1} %t, 1
-  br i1 %obit, label %overflow, label %continue
-
-overflow:
-  ret i1 false
-
-continue:
-  ret i1 true
-}
-
-define i1 @umulo.br.i64(i64 %v1, i64 %v2) {
-entry:
-; CHECK-LABEL:  umulo.br.i64
-; CHECK:        umulh   x8, x0, x1
-; CHECK-NEXT:   cbz
-  %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2)
-  %val = extractvalue {i64, i1} %t, 0
-  %obit = extractvalue {i64, i1} %t, 1
-  br i1 %obit, label %overflow, label %continue
-
-overflow:
-  ret i1 false
-
-continue:
-  ret i1 true
-}
-
-declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone
-declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone
-declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
-declare {i64, i1} @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
-declare {i32, i1} @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone
-declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone
-declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone
-declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone
-declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32) nounwind readnone
-declare {i64, i1} @llvm.smul.with.overflow.i64(i64, i64) nounwind readnone
-declare {i32, i1} @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone
-declare {i64, i1} @llvm.umul.with.overflow.i64(i64, i64) nounwind readnone
-
diff --git a/test/CodeGen/ARM64/zip.ll b/test/CodeGen/ARM64/zip.ll
deleted file mode 100644
index d06a9f8..0000000
--- a/test/CodeGen/ARM64/zip.ll
+++ /dev/null
@@ -1,107 +0,0 @@
-; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
-
-define <8 x i8> @vzipi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: vzipi8:
-;CHECK: zip1.8b
-;CHECK: zip2.8b
-;CHECK-NEXT: add.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
-	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-        %tmp5 = add <8 x i8> %tmp3, %tmp4
-	ret <8 x i8> %tmp5
-}
-
-define <4 x i16> @vzipi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-;CHECK-LABEL: vzipi16:
-;CHECK: zip1.4h
-;CHECK: zip2.4h
-;CHECK-NEXT: add.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
-	%tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
-        %tmp5 = add <4 x i16> %tmp3, %tmp4
-	ret <4 x i16> %tmp5
-}
-
-define <16 x i8> @vzipQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: vzipQi8:
-;CHECK: zip1.16b
-;CHECK: zip2.16b
-;CHECK-NEXT: add.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
-	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
-	%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
-        %tmp5 = add <16 x i8> %tmp3, %tmp4
-	ret <16 x i8> %tmp5
-}
-
-define <8 x i16> @vzipQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: vzipQi16:
-;CHECK: zip1.8h
-;CHECK: zip2.8h
-;CHECK-NEXT: add.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
-	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
-	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-        %tmp5 = add <8 x i16> %tmp3, %tmp4
-	ret <8 x i16> %tmp5
-}
-
-define <4 x i32> @vzipQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
-;CHECK-LABEL: vzipQi32:
-;CHECK: zip1.4s
-;CHECK: zip2.4s
-;CHECK-NEXT: add.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
-	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
-	%tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
-        %tmp5 = add <4 x i32> %tmp3, %tmp4
-	ret <4 x i32> %tmp5
-}
-
-define <4 x float> @vzipQf(<4 x float>* %A, <4 x float>* %B) nounwind {
-;CHECK-LABEL: vzipQf:
-;CHECK: zip1.4s
-;CHECK: zip2.4s
-;CHECK-NEXT: fadd.4s
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
-	%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
-	%tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
-        %tmp5 = fadd <4 x float> %tmp3, %tmp4
-	ret <4 x float> %tmp5
-}
-
-; Undef shuffle indices should not prevent matching to VZIP:
-
-define <8 x i8> @vzipi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-;CHECK-LABEL: vzipi8_undef:
-;CHECK: zip1.8b
-;CHECK: zip2.8b
-;CHECK-NEXT: add.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 undef, i32 10, i32 3, i32 11>
-	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 undef, i32 undef, i32 15>
-        %tmp5 = add <8 x i8> %tmp3, %tmp4
-	ret <8 x i8> %tmp5
-}
-
-define <16 x i8> @vzipQi8_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK-LABEL: vzipQi8_undef:
-;CHECK: zip1.16b
-;CHECK: zip2.16b
-;CHECK-NEXT: add.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
-	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 undef, i32 undef, i32 undef, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
-	%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 undef, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 undef, i32 14, i32 30, i32 undef, i32 31>
-        %tmp5 = add <16 x i8> %tmp3, %tmp4
-	ret <16 x i8> %tmp5
-}
diff --git a/test/CodeGen/Hexagon/hwloop-dbg.ll b/test/CodeGen/Hexagon/hwloop-dbg.ll
index 4e858f7..9537489 100644
--- a/test/CodeGen/Hexagon/hwloop-dbg.ll
+++ b/test/CodeGen/Hexagon/hwloop-dbg.ll
@@ -46,8 +46,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !8 = metadata !{null, metadata !9, metadata !9}
 !9 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 32, align 32, offset 0] [from int]
 !10 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!11 = metadata !{metadata !12}
-!12 = metadata !{metadata !13, metadata !14, metadata !15}
+!11 = metadata !{metadata !13, metadata !14, metadata !15}
 !13 = metadata !{i32 786689, metadata !5, metadata !"a", metadata !6, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 1]
 !14 = metadata !{i32 786689, metadata !5, metadata !"b", metadata !6, i32 33554433, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [b] [line 1]
 !15 = metadata !{i32 786688, metadata !16, metadata !"i", metadata !6, i32 2, metadata !10, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 2]
diff --git a/test/CodeGen/MSP430/fp.ll b/test/CodeGen/MSP430/fp.ll
index b6ba22e..2559e23 100644
--- a/test/CodeGen/MSP430/fp.ll
+++ b/test/CodeGen/MSP430/fp.ll
@@ -21,7 +21,7 @@ entry:
 ; does not happen anymore. Note that the only reason an ISR is used here is that
 ; the register allocator selects r4 first instead of fifth in a normal function.
 define msp430_intrcc void @fpb_alloced() #0 {
-; CHECK_LABEL: fpb_alloced:
+; CHECK-LABEL: fpb_alloced:
 ; CHECK-NOT: mov.b #0, r4
 ; CHECK: nop
   call void asm sideeffect "nop", "r"(i8 0)
diff --git a/test/CodeGen/Mips/2010-07-20-Switch.ll b/test/CodeGen/Mips/2010-07-20-Switch.ll
index 38d7b7e..5c84077 100644
--- a/test/CodeGen/Mips/2010-07-20-Switch.ll
+++ b/test/CodeGen/Mips/2010-07-20-Switch.ll
@@ -2,10 +2,14 @@
 ; RUN: FileCheck %s -check-prefix=STATIC-O32 
 ; RUN: llc < %s -march=mips -relocation-model=pic | \
 ; RUN: FileCheck %s -check-prefix=PIC-O32 
+; RUN: llc < %s -march=mips64 -relocation-model=pic -mcpu=mips4 | \
+; RUN:     FileCheck %s -check-prefix=N64
+; RUN: llc < %s -march=mips64 -relocation-model=static -mcpu=mips4 | \
+; RUN:     FileCheck %s -check-prefix=N64
 ; RUN: llc < %s -march=mips64 -relocation-model=pic -mcpu=mips64 | \
-; RUN: FileCheck %s -check-prefix=N64
+; RUN:     FileCheck %s -check-prefix=N64
 ; RUN: llc < %s -march=mips64 -relocation-model=static -mcpu=mips64 | \
-; RUN: FileCheck %s -check-prefix=N64
+; RUN:     FileCheck %s -check-prefix=N64
 
 define i32 @main() nounwind readnone {
 entry:
diff --git a/test/CodeGen/Mips/Fast-ISel/nullvoid.ll b/test/CodeGen/Mips/Fast-ISel/nullvoid.ll
new file mode 100644
index 0000000..eeaff87
--- /dev/null
+++ b/test/CodeGen/Mips/Fast-ISel/nullvoid.ll
@@ -0,0 +1,9 @@
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32r2 \
+; RUN:     < %s | FileCheck %s
+
+; Function Attrs: nounwind
+define void @foo() {
+entry:
+  ret void
+; CHECK: jr	$ra
+}
diff --git a/test/CodeGen/Mips/Fast-ISel/simplestore.ll b/test/CodeGen/Mips/Fast-ISel/simplestore.ll
new file mode 100644
index 0000000..5d52481
--- /dev/null
+++ b/test/CodeGen/Mips/Fast-ISel/simplestore.ll
@@ -0,0 +1,15 @@
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32r2 \
+; RUN:     < %s | FileCheck %s
+
+@abcd = external global i32
+
+; Function Attrs: nounwind
+define void @foo()  {
+entry:
+  store i32 12345, i32* @abcd, align 4
+; CHECK: 	addiu	$[[REG1:[0-9]+]], $zero, 12345
+; CHECK: 	lw	$[[REG2:[0-9]+]], %got(abcd)(${{[0-9]+}})
+; CHECK: 	sw	$[[REG1]], 0($[[REG2]])
+  ret void
+}
+
diff --git a/test/CodeGen/Mips/Fast-ISel/simplestorei.ll b/test/CodeGen/Mips/Fast-ISel/simplestorei.ll
new file mode 100644
index 0000000..7d2c8e7
--- /dev/null
+++ b/test/CodeGen/Mips/Fast-ISel/simplestorei.ll
@@ -0,0 +1,65 @@
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32r2 \
+; RUN:     < %s | FileCheck %s
+
+@ijk = external global i32
+
+; Function Attrs: nounwind
+define void @si2_1() #0 {
+entry:
+  store i32 32767, i32* @ijk, align 4
+; CHECK:        .ent    si2_1
+; CHECK:        addiu   $[[REG1:[0-9]+]], $zero, 32767
+; CHECK:        lw      $[[REG2:[0-9]+]], %got(ijk)(${{[0-9]+}})
+; CHECK:        sw      $[[REG1]], 0($[[REG2]])
+
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @si2_2() #0 {
+entry:
+  store i32 -32768, i32* @ijk, align 4
+; CHECK:        .ent    si2_2
+; CHECK:        addiu   $[[REG1:[0-9]+]], $zero, -32768
+; CHECK:        lw      $[[REG2:[0-9]+]], %got(ijk)(${{[0-9]+}})
+; CHECK:        sw      $[[REG1]], 0($[[REG2]])
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @ui2_1() #0 {
+entry:
+  store i32 65535, i32* @ijk, align 4
+; CHECK:        .ent    ui2_1
+; CHECK:        ori     $[[REG1:[0-9]+]], $zero, 65535
+; CHECK:        lw      $[[REG2:[0-9]+]], %got(ijk)(${{[0-9]+}})
+; CHECK:        sw      $[[REG1]], 0($[[REG2]])
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @ui4_1() #0 {
+entry:
+  store i32 983040, i32* @ijk, align 4
+; CHECK:        .ent    ui4_1
+; CHECK:        lui     $[[REG1:[0-9]+]], 15
+; CHECK:        lw      $[[REG2:[0-9]+]], %got(ijk)(${{[0-9]+}})
+; CHECK:        sw      $[[REG1]], 0($[[REG2]])
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @ui4_2() #0 {
+entry:
+  store i32 719566, i32* @ijk, align 4
+; CHECK:        .ent    ui4_2
+; CHECK:        lui	$[[REG1:[0-9]+]], 10
+; CHECK: 	ori	$[[REG1]], $[[REG1]], 64206
+; CHECK: 	lw	$[[REG2:[0-9]+]], %got(ijk)(${{[0-9]+}})
+; CHECK: 	sw	$[[REG1]], 0($[[REG2]])
+  ret void
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+
diff --git a/test/CodeGen/Mips/abicalls.ll b/test/CodeGen/Mips/abicalls.ll
index 7b98b02..6fa33aa 100644
--- a/test/CodeGen/Mips/abicalls.ll
+++ b/test/CodeGen/Mips/abicalls.ll
@@ -7,6 +7,7 @@
 
 ; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips32 -relocation-model=static %s -o - | FileCheck -check-prefix=CHECK-STATIC %s
 ; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips32 %s -o - | FileCheck -check-prefix=CHECK-PIC %s
+; RUN: llc -filetype=asm -mtriple mips64el-unknown-linux -mcpu=mips4 -relocation-model=static %s -o - | FileCheck -check-prefix=CHECK-PIC %s
 ; RUN: llc -filetype=asm -mtriple mips64el-unknown-linux -mcpu=mips64 -relocation-model=static %s -o - | FileCheck -check-prefix=CHECK-PIC %s
 
 ; CHECK-STATIC: .abicalls
diff --git a/test/CodeGen/Mips/cconv/arguments-float.ll b/test/CodeGen/Mips/cconv/arguments-float.ll
new file mode 100644
index 0000000..e2119ec
--- /dev/null
+++ b/test/CodeGen/Mips/cconv/arguments-float.ll
@@ -0,0 +1,222 @@
+; RUN: llc -march=mips -relocation-model=static -soft-float < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 --check-prefix=O32BE %s
+; RUN: llc -march=mipsel -relocation-model=static -soft-float < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 --check-prefix=O32LE %s
+
+; RUN-TODO: llc -march=mips64 -relocation-model=static -soft-float -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
+; RUN-TODO: llc -march=mips64el -relocation-model=static -soft-float -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
+
+; RUN: llc -march=mips64 -relocation-model=static -soft-float -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=NEW %s
+; RUN: llc -march=mips64el -relocation-model=static -soft-float -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=NEW %s
+
+; RUN: llc -march=mips64 -relocation-model=static -soft-float -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=NEW %s
+; RUN: llc -march=mips64el -relocation-model=static -soft-float -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=NEW %s
+
+; Test the floating point arguments for all ABI's and byte orders as specified
+; by section 5 of MD00305 (MIPS ABIs Described).
+;
+; N32/N64 are identical in this area so their checks have been combined into
+; the 'NEW' prefix (the N stands for New).
+
+@bytes = global [11 x i8] zeroinitializer
+@dwords = global [11 x i64] zeroinitializer
+@floats = global [11 x float] zeroinitializer
+@doubles = global [11 x double] zeroinitializer
+
+define void @double_args(double %a, double %b, double %c, double %d, double %e,
+                         double %f, double %g, double %h, double %i) nounwind {
+entry:
+        %0 = getelementptr [11 x double]* @doubles, i32 0, i32 1
+        store volatile double %a, double* %0
+        %1 = getelementptr [11 x double]* @doubles, i32 0, i32 2
+        store volatile double %b, double* %1
+        %2 = getelementptr [11 x double]* @doubles, i32 0, i32 3
+        store volatile double %c, double* %2
+        %3 = getelementptr [11 x double]* @doubles, i32 0, i32 4
+        store volatile double %d, double* %3
+        %4 = getelementptr [11 x double]* @doubles, i32 0, i32 5
+        store volatile double %e, double* %4
+        %5 = getelementptr [11 x double]* @doubles, i32 0, i32 6
+        store volatile double %f, double* %5
+        %6 = getelementptr [11 x double]* @doubles, i32 0, i32 7
+        store volatile double %g, double* %6
+        %7 = getelementptr [11 x double]* @doubles, i32 0, i32 8
+        store volatile double %h, double* %7
+        %8 = getelementptr [11 x double]* @doubles, i32 0, i32 9
+        store volatile double %i, double* %8
+        ret void
+}
+
+; ALL-LABEL: double_args:
+; We won't test the way the global address is calculated in this test. This is
+; just to get the register number for the other checks.
+; SYM32-DAG:           addiu [[R2:\$[0-9]+]], ${{[0-9]+}}, %lo(doubles)
+; SYM64-DAG:           ld [[R2:\$[0-9]]], %got_disp(doubles)(
+
+; The first four arguments are the same in O32/N32/N64.
+; The first argument is floating point but soft-float is enabled so floating
+; point registers are not used.
+; O32-DAG:           sw $4, 8([[R2]])
+; O32-DAG:           sw $5, 12([[R2]])
+; NEW-DAG:           sd $4, 8([[R2]])
+
+; O32-DAG:           sw $6, 16([[R2]])
+; O32-DAG:           sw $7, 20([[R2]])
+; NEW-DAG:           sd $5, 16([[R2]])
+
+; O32 has run out of argument registers and starts using the stack
+; O32-DAG:           lw [[R3:\$([0-9]+|gp)]], 24($sp)
+; O32-DAG:           lw [[R4:\$([0-9]+|gp)]], 28($sp)
+; O32-DAG:           sw [[R3]], 24([[R2]])
+; O32-DAG:           sw [[R4]], 28([[R2]])
+; NEW-DAG:           sd $6, 24([[R2]])
+
+; O32-DAG:           lw [[R3:\$[0-9]+]], 32($sp)
+; O32-DAG:           lw [[R4:\$[0-9]+]], 36($sp)
+; O32-DAG:           sw [[R3]], 32([[R2]])
+; O32-DAG:           sw [[R4]], 36([[R2]])
+; NEW-DAG:           sd $7, 32([[R2]])
+
+; O32-DAG:           lw [[R3:\$[0-9]+]], 40($sp)
+; O32-DAG:           lw [[R4:\$[0-9]+]], 44($sp)
+; O32-DAG:           sw [[R3]], 40([[R2]])
+; O32-DAG:           sw [[R4]], 44([[R2]])
+; NEW-DAG:           sd $8, 40([[R2]])
+
+; O32-DAG:           lw [[R3:\$[0-9]+]], 48($sp)
+; O32-DAG:           lw [[R4:\$[0-9]+]], 52($sp)
+; O32-DAG:           sw [[R3]], 48([[R2]])
+; O32-DAG:           sw [[R4]], 52([[R2]])
+; NEW-DAG:           sd $9, 48([[R2]])
+
+; O32-DAG:           lw [[R3:\$[0-9]+]], 56($sp)
+; O32-DAG:           lw [[R4:\$[0-9]+]], 60($sp)
+; O32-DAG:           sw [[R3]], 56([[R2]])
+; O32-DAG:           sw [[R4]], 60([[R2]])
+; NEW-DAG:           sd $10, 56([[R2]])
+
+; N32/N64 have run out of registers and starts using the stack too
+; O32-DAG:           lw [[R3:\$[0-9]+]], 64($sp)
+; O32-DAG:           lw [[R4:\$[0-9]+]], 68($sp)
+; O32-DAG:           sw [[R3]], 64([[R2]])
+; O32-DAG:           sw [[R4]], 68([[R2]])
+; NEW-DAG:           ld [[R3:\$[0-9]+]], 0($sp)
+; NEW-DAG:           sd $11, 64([[R2]])
+
+define void @float_args(float %a, float %b, float %c, float %d, float %e,
+                        float %f, float %g, float %h, float %i, float %j)
+                       nounwind {
+entry:
+        %0 = getelementptr [11 x float]* @floats, i32 0, i32 1
+        store volatile float %a, float* %0
+        %1 = getelementptr [11 x float]* @floats, i32 0, i32 2
+        store volatile float %b, float* %1
+        %2 = getelementptr [11 x float]* @floats, i32 0, i32 3
+        store volatile float %c, float* %2
+        %3 = getelementptr [11 x float]* @floats, i32 0, i32 4
+        store volatile float %d, float* %3
+        %4 = getelementptr [11 x float]* @floats, i32 0, i32 5
+        store volatile float %e, float* %4
+        %5 = getelementptr [11 x float]* @floats, i32 0, i32 6
+        store volatile float %f, float* %5
+        %6 = getelementptr [11 x float]* @floats, i32 0, i32 7
+        store volatile float %g, float* %6
+        %7 = getelementptr [11 x float]* @floats, i32 0, i32 8
+        store volatile float %h, float* %7
+        %8 = getelementptr [11 x float]* @floats, i32 0, i32 9
+        store volatile float %i, float* %8
+        %9 = getelementptr [11 x float]* @floats, i32 0, i32 10
+        store volatile float %j, float* %9
+        ret void
+}
+
+; ALL-LABEL: float_args:
+; We won't test the way the global address is calculated in this test. This is
+; just to get the register number for the other checks.
+; SYM32-DAG:           addiu [[R2:\$[0-9]+]], ${{[0-9]+}}, %lo(floats)
+; SYM64-DAG:           ld [[R2:\$[0-9]]], %got_disp(floats)(
+
+; The first four arguments are the same in O32/N32/N64.
+; The first argument isn't floating point so floating point registers are not
+; used.
+; MD00305 and GCC disagree on this one. MD00305 says that floats are treated
+; as 8-byte aligned and occupy two slots on O32. GCC is treating them as 4-byte
+; aligned and occupying one slot. We'll use GCC's definition.
+; ALL-DAG:           sw $4, 4([[R2]])
+; ALL-DAG:           sw $5, 8([[R2]])
+; ALL-DAG:           sw $6, 12([[R2]])
+; ALL-DAG:           sw $7, 16([[R2]])
+
+; O32 has run out of argument registers and starts using the stack
+; O32-DAG:           lw [[R3:\$[0-9]+]], 16($sp)
+; O32-DAG:           sw [[R3]], 20([[R2]])
+; NEW-DAG:           sw $8, 20([[R2]])
+
+; O32-DAG:           lw [[R3:\$[0-9]+]], 20($sp)
+; O32-DAG:           sw [[R3]], 24([[R2]])
+; NEW-DAG:           sw $9, 24([[R2]])
+
+; O32-DAG:           lw [[R3:\$[0-9]+]], 24($sp)
+; O32-DAG:           sw [[R3]], 28([[R2]])
+; NEW-DAG:           sw $10, 28([[R2]])
+
+; O32-DAG:           lw [[R3:\$[0-9]+]], 28($sp)
+; O32-DAG:           sw [[R3]], 32([[R2]])
+; NEW-DAG:           sw $11, 32([[R2]])
+
+; N32/N64 have run out of registers and start using the stack too
+; O32-DAG:           lw [[R3:\$[0-9]+]], 32($sp)
+; O32-DAG:           sw [[R3]], 36([[R2]])
+; NEW-DAG:           lw [[R3:\$[0-9]+]], 0($sp)
+; NEW-DAG:           sw [[R3]], 36([[R2]])
+
+define void @double_arg2(i8 %a, double %b) nounwind {
+entry:
+        %0 = getelementptr [11 x i8]* @bytes, i32 0, i32 1
+        store volatile i8 %a, i8* %0
+        %1 = getelementptr [11 x double]* @doubles, i32 0, i32 1
+        store volatile double %b, double* %1
+        ret void
+}
+
+; ALL-LABEL: double_arg2:
+; We won't test the way the global address is calculated in this test. This is
+; just to get the register number for the other checks.
+; SYM32-DAG:           addiu [[R1:\$[0-9]+]], ${{[0-9]+}}, %lo(bytes)
+; SYM64-DAG:           ld [[R1:\$[0-9]]], %got_disp(bytes)(
+; SYM32-DAG:           addiu [[R2:\$[0-9]+]], ${{[0-9]+}}, %lo(doubles)
+; SYM64-DAG:           ld [[R2:\$[0-9]]], %got_disp(doubles)(
+
+; The first four arguments are the same in O32/N32/N64.
+; The first argument isn't floating point so floating point registers are not
+; used.
+; The second slot is insufficiently aligned for double on O32 so it is skipped.
+; Also, double occupies two slots on O32 and only one for N32/N64.
+; ALL-DAG:           sb $4, 1([[R1]])
+; O32-DAG:           sw $6, 8([[R2]])
+; O32-DAG:           sw $7, 12([[R2]])
+; NEW-DAG:           sd $5, 8([[R2]])
+
+define void @float_arg2(i8 %a, float %b) nounwind {
+entry:
+        %0 = getelementptr [11 x i8]* @bytes, i32 0, i32 1
+        store volatile i8 %a, i8* %0
+        %1 = getelementptr [11 x float]* @floats, i32 0, i32 1
+        store volatile float %b, float* %1
+        ret void
+}
+
+; ALL-LABEL: float_arg2:
+; We won't test the way the global address is calculated in this test. This is
+; just to get the register number for the other checks.
+; SYM32-DAG:           addiu [[R1:\$[0-9]+]], ${{[0-9]+}}, %lo(bytes)
+; SYM64-DAG:           ld [[R1:\$[0-9]]], %got_disp(bytes)(
+; SYM32-DAG:           addiu [[R2:\$[0-9]+]], ${{[0-9]+}}, %lo(floats)
+; SYM64-DAG:           ld [[R2:\$[0-9]]], %got_disp(floats)(
+
+; The first four arguments are the same in O32/N32/N64.
+; The first argument isn't floating point so floating point registers are not
+; used.
+; MD00305 and GCC disagree on this one. MD00305 says that floats are treated
+; as 8-byte aligned and occupy two slots on O32. GCC is treating them as 4-byte
+; aligned and occupying one slot. We'll use GCC's definition.
+; ALL-DAG:           sb $4, 1([[R1]])
+; ALL-DAG:           sw $5, 4([[R2]])
diff --git a/test/CodeGen/Mips/cconv/arguments-fp128.ll b/test/CodeGen/Mips/cconv/arguments-fp128.ll
new file mode 100644
index 0000000..c8cd8fd
--- /dev/null
+++ b/test/CodeGen/Mips/cconv/arguments-fp128.ll
@@ -0,0 +1,51 @@
+; RUN: llc -march=mips64 -relocation-model=static -soft-float -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 %s
+; RUN: llc -march=mips64el -relocation-model=static -soft-float -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 %s
+
+; RUN: llc -march=mips64 -relocation-model=static -soft-float -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 %s
+; RUN: llc -march=mips64el -relocation-model=static -soft-float -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 %s
+
+; Test the fp128 arguments for all ABI's and byte orders as specified
+; by section 2 of the MIPSpro N32 Handbook.
+;
+; O32 is not tested because long double is the same as double on O32.
+
+@ldoubles = global [11 x fp128] zeroinitializer
+
+define void @ldouble_args(fp128 %a, fp128 %b, fp128 %c, fp128 %d, fp128 %e) nounwind {
+entry:
+        %0 = getelementptr [11 x fp128]* @ldoubles, i32 0, i32 1
+        store volatile fp128 %a, fp128* %0
+        %1 = getelementptr [11 x fp128]* @ldoubles, i32 0, i32 2
+        store volatile fp128 %b, fp128* %1
+        %2 = getelementptr [11 x fp128]* @ldoubles, i32 0, i32 3
+        store volatile fp128 %c, fp128* %2
+        %3 = getelementptr [11 x fp128]* @ldoubles, i32 0, i32 4
+        store volatile fp128 %d, fp128* %3
+        %4 = getelementptr [11 x fp128]* @ldoubles, i32 0, i32 5
+        store volatile fp128 %e, fp128* %4
+        ret void
+}
+
+; ALL-LABEL: ldouble_args:
+; We won't test the way the global address is calculated in this test. This is
+; just to get the register number for the other checks.
+; SYM32-DAG:           addiu [[R2:\$[0-9]+]], ${{[0-9]+}}, %lo(ldoubles)
+; SYM64-DAG:           ld [[R2:\$[0-9]]], %got_disp(ldoubles)(
+
+; The first four arguments are the same in N32/N64.
+; The first argument is floating point but soft-float is enabled so floating
+; point registers are not used.
+; ALL-DAG:           sd $4, 16([[R2]])
+; ALL-DAG:           sd $5, 24([[R2]])
+; ALL-DAG:           sd $6, 32([[R2]])
+; ALL-DAG:           sd $7, 40([[R2]])
+; ALL-DAG:           sd $8, 48([[R2]])
+; ALL-DAG:           sd $9, 56([[R2]])
+; ALL-DAG:           sd $10, 64([[R2]])
+; ALL-DAG:           sd $11, 72([[R2]])
+
+; N32/N64 have run out of registers and starts using the stack too
+; ALL-DAG:           ld [[R3:\$[0-9]+]], 0($sp)
+; ALL-DAG:           ld [[R4:\$[0-9]+]], 8($sp)
+; ALL-DAG:           sd [[R3]], 80([[R2]])
+; ALL-DAG:           sd [[R4]], 88([[R2]])
diff --git a/test/CodeGen/Mips/cconv/arguments-hard-float-varargs.ll b/test/CodeGen/Mips/cconv/arguments-hard-float-varargs.ll
new file mode 100644
index 0000000..aadf7d1
--- /dev/null
+++ b/test/CodeGen/Mips/cconv/arguments-hard-float-varargs.ll
@@ -0,0 +1,157 @@
+; RUN: llc -march=mips -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 --check-prefix=O32BE %s
+; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 --check-prefix=O32LE %s
+
+; RUN-TODO: llc -march=mips64 -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
+; RUN-TODO: llc -march=mips64el -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
+
+; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=N32 --check-prefix=NEW %s
+; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=N32 --check-prefix=NEW %s
+
+; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=N64 --check-prefix=NEW %s
+; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=N64 --check-prefix=NEW %s
+
+; Test the effect of varargs on floating point types in the non-variable part
+; of the argument list as specified by section 2 of the MIPSpro N32 Handbook.
+;
+; N32/N64 are almost identical in this area so many of their checks have been
+; combined into the 'NEW' prefix (the N stands for New).
+;
+; On O32, varargs prevents all FPU argument register usage. This contradicts
+; the N32 handbook, but agrees with the SYSV ABI and GCC's behaviour.
+
+@floats = global [11 x float] zeroinitializer
+@doubles = global [11 x double] zeroinitializer
+
+define void @double_args(double %a, ...)
+                         nounwind {
+entry:
+        %0 = getelementptr [11 x double]* @doubles, i32 0, i32 1
+        store volatile double %a, double* %0
+
+        %ap = alloca i8*
+        %ap2 = bitcast i8** %ap to i8*
+        call void @llvm.va_start(i8* %ap2)
+        %b = va_arg i8** %ap, double
+        %1 = getelementptr [11 x double]* @doubles, i32 0, i32 2
+        store volatile double %b, double* %1
+        ret void
+}
+
+; ALL-LABEL: double_args:
+; We won't test the way the global address is calculated in this test. This is
+; just to get the register number for the other checks.
+; SYM32-DAG:         addiu [[R2:\$[0-9]+]], ${{[0-9]+}}, %lo(doubles)
+; SYM64-DAG:         ld [[R2:\$[0-9]]], %got_disp(doubles)(
+
+; O32 forbids using floating point registers for the non-variable portion.
+; N32/N64 allow it.
+; O32BE-DAG:         mtc1 $5, [[FTMP1:\$f[0-9]*[02468]+]]
+; O32BE-DAG:         mtc1 $4, [[FTMP2:\$f[0-9]*[13579]+]]
+; O32LE-DAG:         mtc1 $4, [[FTMP1:\$f[0-9]*[02468]+]]
+; O32LE-DAG:         mtc1 $5, [[FTMP2:\$f[0-9]*[13579]+]]
+; O32-DAG:           sdc1 [[FTMP1]], 8([[R2]])
+; NEW-DAG:           sdc1 $f12, 8([[R2]])
+
+; The varargs portion is dumped to stack
+; O32-DAG:           sw $6, 16($sp)
+; O32-DAG:           sw $7, 20($sp)
+; NEW-DAG:           sd $5, 8($sp)
+; NEW-DAG:           sd $6, 16($sp)
+; NEW-DAG:           sd $7, 24($sp)
+; NEW-DAG:           sd $8, 32($sp)
+; NEW-DAG:           sd $9, 40($sp)
+; NEW-DAG:           sd $10, 48($sp)
+; NEW-DAG:           sd $11, 56($sp)
+
+; Get the varargs pointer
+; O32 has 4 bytes padding, 4 bytes for the varargs pointer, and 8 bytes reserved
+; for arguments 1 and 2.
+; N32/N64 has 8 bytes for the varargs pointer, and no reserved area.
+; O32-DAG:           addiu [[VAPTR:\$[0-9]+]], $sp, 16
+; O32-DAG:           sw [[VAPTR]], 4($sp)
+; N32-DAG:           addiu [[VAPTR:\$[0-9]+]], $sp, 8
+; N32-DAG:           sw [[VAPTR]], 4($sp)
+; N64-DAG:           daddiu [[VAPTR:\$[0-9]+]], $sp, 8
+; N64-DAG:           sd [[VAPTR]], 0($sp)
+
+; Increment the pointer then get the varargs arg
+; LLVM will rebind the load to the stack pointer instead of the varargs pointer
+; during lowering. This is fine and doesn't change the behaviour.
+; O32-DAG:           addiu [[VAPTR]], [[VAPTR]], 8
+; O32-DAG:           sw [[VAPTR]], 4($sp)
+; N32-DAG:           addiu [[VAPTR]], [[VAPTR]], 8
+; N32-DAG:           sw [[VAPTR]], 4($sp)
+; N64-DAG:           daddiu [[VAPTR]], [[VAPTR]], 8
+; N64-DAG:           sd [[VAPTR]], 0($sp)
+; O32-DAG:           ldc1 [[FTMP1:\$f[0-9]+]], 16($sp)
+; NEW-DAG:           ldc1 [[FTMP1:\$f[0-9]+]], 8($sp)
+; ALL-DAG:           sdc1 [[FTMP1]], 16([[R2]])
+
+define void @float_args(float %a, ...) nounwind {
+entry:
+        %0 = getelementptr [11 x float]* @floats, i32 0, i32 1
+        store volatile float %a, float* %0
+
+        %ap = alloca i8*
+        %ap2 = bitcast i8** %ap to i8*
+        call void @llvm.va_start(i8* %ap2)
+        %b = va_arg i8** %ap, float
+        %1 = getelementptr [11 x float]* @floats, i32 0, i32 2
+        store volatile float %b, float* %1
+        ret void
+}
+
+; ALL-LABEL: float_args:
+; We won't test the way the global address is calculated in this test. This is
+; just to get the register number for the other checks.
+; SYM32-DAG:         addiu [[R2:\$[0-9]+]], ${{[0-9]+}}, %lo(floats)
+; SYM64-DAG:         ld [[R2:\$[0-9]]], %got_disp(floats)(
+
+; The first four arguments are the same in O32/N32/N64.
+; The non-variable portion should be unaffected.
+; O32-DAG:           sw $4, 4([[R2]])
+; NEW-DAG:           swc1 $f12, 4([[R2]])
+
+; The varargs portion is dumped to stack
+; O32-DAG:           sw $5, 12($sp)
+; O32-DAG:           sw $6, 16($sp)
+; O32-DAG:           sw $7, 20($sp)
+; NEW-DAG:           sd $5, 8($sp)
+; NEW-DAG:           sd $6, 16($sp)
+; NEW-DAG:           sd $7, 24($sp)
+; NEW-DAG:           sd $8, 32($sp)
+; NEW-DAG:           sd $9, 40($sp)
+; NEW-DAG:           sd $10, 48($sp)
+; NEW-DAG:           sd $11, 56($sp)
+
+; Get the varargs pointer
+; O32 has 4 bytes padding, 4 bytes for the varargs pointer, and should have 8
+; bytes reserved for arguments 1 and 2 (the first float arg) but as discussed in
+; arguments-float.ll, GCC doesn't agree with MD00305 and treats floats as 4
+; bytes so we only have 12 bytes total.
+; N32/N64 has 8 bytes for the varargs pointer, and no reserved area.
+; O32-DAG:           addiu [[VAPTR:\$[0-9]+]], $sp, 12
+; O32-DAG:           sw [[VAPTR]], 4($sp)
+; N32-DAG:           addiu [[VAPTR:\$[0-9]+]], $sp, 8
+; N32-DAG:           sw [[VAPTR]], 4($sp)
+; N64-DAG:           daddiu [[VAPTR:\$[0-9]+]], $sp, 8
+; N64-DAG:           sd [[VAPTR]], 0($sp)
+
+; Increment the pointer then get the varargs arg
+; LLVM will rebind the load to the stack pointer instead of the varargs pointer
+; during lowering. This is fine and doesn't change the behaviour.
+; N32/N64 is using ori instead of addiu/daddiu but (although odd) this is fine
+; since the stack is always aligned.
+; O32-DAG:           addiu [[VAPTR]], [[VAPTR]], 4
+; O32-DAG:           sw [[VAPTR]], 4($sp)
+; N32-DAG:           ori [[VAPTR]], [[VAPTR]], 4
+; N32-DAG:           sw [[VAPTR]], 4($sp)
+; N64-DAG:           ori [[VAPTR]], [[VAPTR]], 4
+; N64-DAG:           sd [[VAPTR]], 0($sp)
+; O32-DAG:           lwc1 [[FTMP1:\$f[0-9]+]], 12($sp)
+; NEW-DAG:           lwc1 [[FTMP1:\$f[0-9]+]], 8($sp)
+; ALL-DAG:           swc1 [[FTMP1]], 8([[R2]])
+
+declare void @llvm.va_start(i8*)
+declare void @llvm.va_copy(i8*, i8*)
+declare void @llvm.va_end(i8*)
diff --git a/test/CodeGen/Mips/cconv/arguments-hard-float.ll b/test/CodeGen/Mips/cconv/arguments-hard-float.ll
new file mode 100644
index 0000000..9837f7e
--- /dev/null
+++ b/test/CodeGen/Mips/cconv/arguments-hard-float.ll
@@ -0,0 +1,211 @@
+; RUN: llc -march=mips -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 --check-prefix=O32BE %s
+; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 --check-prefix=O32LE %s
+
+; RUN-TODO: llc -march=mips64 -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
+; RUN-TODO: llc -march=mips64el -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
+
+; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=NEW %s
+; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=NEW %s
+
+; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=NEW %s
+; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=NEW %s
+
+; Test the floating point arguments for all ABI's and byte orders as specified
+; by section 5 of MD00305 (MIPS ABIs Described).
+;
+; N32/N64 are identical in this area so their checks have been combined into
+; the 'NEW' prefix (the N stands for New).
+
+@bytes = global [11 x i8] zeroinitializer
+@dwords = global [11 x i64] zeroinitializer
+@floats = global [11 x float] zeroinitializer
+@doubles = global [11 x double] zeroinitializer
+
+define void @double_args(double %a, double %b, double %c, double %d, double %e,
+                         double %f, double %g, double %h, double %i) nounwind {
+entry:
+        %0 = getelementptr [11 x double]* @doubles, i32 0, i32 1
+        store volatile double %a, double* %0
+        %1 = getelementptr [11 x double]* @doubles, i32 0, i32 2
+        store volatile double %b, double* %1
+        %2 = getelementptr [11 x double]* @doubles, i32 0, i32 3
+        store volatile double %c, double* %2
+        %3 = getelementptr [11 x double]* @doubles, i32 0, i32 4
+        store volatile double %d, double* %3
+        %4 = getelementptr [11 x double]* @doubles, i32 0, i32 5
+        store volatile double %e, double* %4
+        %5 = getelementptr [11 x double]* @doubles, i32 0, i32 6
+        store volatile double %f, double* %5
+        %6 = getelementptr [11 x double]* @doubles, i32 0, i32 7
+        store volatile double %g, double* %6
+        %7 = getelementptr [11 x double]* @doubles, i32 0, i32 8
+        store volatile double %h, double* %7
+        %8 = getelementptr [11 x double]* @doubles, i32 0, i32 9
+        store volatile double %i, double* %8
+        ret void
+}
+
+; ALL-LABEL: double_args:
+; We won't test the way the global address is calculated in this test. This is
+; just to get the register number for the other checks.
+; SYM32-DAG:           addiu [[R2:\$[0-9]+]], ${{[0-9]+}}, %lo(doubles)
+; SYM64-DAG:           ld [[R2:\$[0-9]]], %got_disp(doubles)(
+
+; The first argument is floating point so floating point registers are used.
+; The first argument is the same for O32/N32/N64 but the second argument differs
+; by register
+; ALL-DAG:           sdc1 $f12, 8([[R2]])
+; O32-DAG:           sdc1 $f14, 16([[R2]])
+; NEW-DAG:           sdc1 $f13, 16([[R2]])
+
+; O32 has run out of argument registers and starts using the stack
+; O32-DAG:           ldc1 [[F1:\$f[0-9]+]], 16($sp)
+; O32-DAG:           sdc1 [[F1]], 24([[R2]])
+; NEW-DAG:           sdc1 $f14, 24([[R2]])
+; O32-DAG:           ldc1 [[F1:\$f[0-9]+]], 24($sp)
+; O32-DAG:           sdc1 [[F1]], 32([[R2]])
+; NEW-DAG:           sdc1 $f15, 32([[R2]])
+; O32-DAG:           ldc1 [[F1:\$f[0-9]+]], 32($sp)
+; O32-DAG:           sdc1 [[F1]], 40([[R2]])
+; NEW-DAG:           sdc1 $f16, 40([[R2]])
+; O32-DAG:           ldc1 [[F1:\$f[0-9]+]], 40($sp)
+; O32-DAG:           sdc1 [[F1]], 48([[R2]])
+; NEW-DAG:           sdc1 $f17, 48([[R2]])
+; O32-DAG:           ldc1 [[F1:\$f[0-9]+]], 48($sp)
+; O32-DAG:           sdc1 [[F1]], 56([[R2]])
+; NEW-DAG:           sdc1 $f18, 56([[R2]])
+; O32-DAG:           ldc1 [[F1:\$f[0-9]+]], 56($sp)
+; O32-DAG:           sdc1 [[F1]], 64([[R2]])
+; NEW-DAG:           sdc1 $f19, 64([[R2]])
+
+; N32/N64 have run out of registers and start using the stack too
+; O32-DAG:           ldc1 [[F1:\$f[0-9]+]], 64($sp)
+; O32-DAG:           sdc1 [[F1]], 72([[R2]])
+; NEW-DAG:           ldc1 [[F1:\$f[0-9]+]], 0($sp)
+; NEW-DAG:           sdc1 [[F1]], 72([[R2]])
+
+define void @float_args(float %a, float %b, float %c, float %d, float %e,
+                        float %f, float %g, float %h, float %i) nounwind {
+entry:
+        %0 = getelementptr [11 x float]* @floats, i32 0, i32 1
+        store volatile float %a, float* %0
+        %1 = getelementptr [11 x float]* @floats, i32 0, i32 2
+        store volatile float %b, float* %1
+        %2 = getelementptr [11 x float]* @floats, i32 0, i32 3
+        store volatile float %c, float* %2
+        %3 = getelementptr [11 x float]* @floats, i32 0, i32 4
+        store volatile float %d, float* %3
+        %4 = getelementptr [11 x float]* @floats, i32 0, i32 5
+        store volatile float %e, float* %4
+        %5 = getelementptr [11 x float]* @floats, i32 0, i32 6
+        store volatile float %f, float* %5
+        %6 = getelementptr [11 x float]* @floats, i32 0, i32 7
+        store volatile float %g, float* %6
+        %7 = getelementptr [11 x float]* @floats, i32 0, i32 8
+        store volatile float %h, float* %7
+        %8 = getelementptr [11 x float]* @floats, i32 0, i32 9
+        store volatile float %i, float* %8
+        ret void
+}
+
+; ALL-LABEL: float_args:
+; We won't test the way the global address is calculated in this test. This is
+; just to get the register number for the other checks.
+; SYM32-DAG:           addiu [[R1:\$[0-9]+]], ${{[0-9]+}}, %lo(floats)
+; SYM64-DAG:           ld [[R1:\$[0-9]]], %got_disp(floats)(
+
+; The first argument is floating point so floating point registers are used.
+; The first argument is the same for O32/N32/N64 but the second argument differs
+; by register
+; ALL-DAG:           swc1 $f12, 4([[R1]])
+; O32-DAG:           swc1 $f14, 8([[R1]])
+; NEW-DAG:           swc1 $f13, 8([[R1]])
+
+; O32 has run out of argument registers and (in theory) starts using the stack
+; I've yet to find a reference in the documentation about this but GCC uses up
+; the remaining two argument slots in the GPR's first. We'll do the same for
+; compatibility.
+; O32-DAG:           sw $6, 12([[R1]])
+; NEW-DAG:           swc1 $f14, 12([[R1]])
+; O32-DAG:           sw $7, 16([[R1]])
+; NEW-DAG:           swc1 $f15, 16([[R1]])
+
+; O32 is definitely out of registers now and switches to the stack.
+; O32-DAG:           lwc1 [[F1:\$f[0-9]+]], 16($sp)
+; O32-DAG:           swc1 [[F1]], 20([[R1]])
+; NEW-DAG:           swc1 $f16, 20([[R1]])
+; O32-DAG:           lwc1 [[F1:\$f[0-9]+]], 20($sp)
+; O32-DAG:           swc1 [[F1]], 24([[R1]])
+; NEW-DAG:           swc1 $f17, 24([[R1]])
+; O32-DAG:           lwc1 [[F1:\$f[0-9]+]], 24($sp)
+; O32-DAG:           swc1 [[F1]], 28([[R1]])
+; NEW-DAG:           swc1 $f18, 28([[R1]])
+; O32-DAG:           lwc1 [[F1:\$f[0-9]+]], 28($sp)
+; O32-DAG:           swc1 [[F1]], 32([[R1]])
+; NEW-DAG:           swc1 $f19, 32([[R1]])
+
+; N32/N64 have run out of registers and start using the stack too
+; O32-DAG:           lwc1 [[F1:\$f[0-9]+]], 32($sp)
+; O32-DAG:           swc1 [[F1]], 36([[R1]])
+; NEW-DAG:           lwc1 [[F1:\$f[0-9]+]], 0($sp)
+; NEW-DAG:           swc1 [[F1]], 36([[R1]])
+
+
+define void @double_arg2(i8 %a, double %b) nounwind {
+entry:
+        %0 = getelementptr [11 x i8]* @bytes, i32 0, i32 1
+        store volatile i8 %a, i8* %0
+        %1 = getelementptr [11 x double]* @doubles, i32 0, i32 1
+        store volatile double %b, double* %1
+        ret void
+}
+
+; ALL-LABEL: double_arg2:
+; We won't test the way the global address is calculated in this test. This is
+; just to get the register number for the other checks.
+; SYM32-DAG:           addiu [[R1:\$[0-9]+]], ${{[0-9]+}}, %lo(bytes)
+; SYM64-DAG:           ld [[R1:\$[0-9]]], %got_disp(bytes)(
+; SYM32-DAG:           addiu [[R2:\$[0-9]+]], ${{[0-9]+}}, %lo(doubles)
+; SYM64-DAG:           ld [[R2:\$[0-9]]], %got_disp(doubles)(
+
+; The first argument is the same in O32/N32/N64.
+; ALL-DAG:           sb $4, 1([[R1]])
+
+; The first argument isn't floating point so floating point registers are not
+; used in O32, but N32/N64 will still use them.
+; The second slot is insufficiently aligned for double on O32 so it is skipped.
+; Also, double occupies two slots on O32 and only one for N32/N64.
+; O32LE-DAG:           mtc1 $6, [[F1:\$f[0-9]*[02468]+]]
+; O32LE-DAG:           mtc1 $7, [[F2:\$f[0-9]*[13579]+]]
+; O32BE-DAG:           mtc1 $6, [[F2:\$f[0-9]*[13579]+]]
+; O32BE-DAG:           mtc1 $7, [[F1:\$f[0-9]*[02468]+]]
+; O32-DAG:           sdc1 [[F1]], 8([[R2]])
+; NEW-DAG:           sdc1 $f13, 8([[R2]])
+
+define void @float_arg2(i8 %a, float %b) nounwind {
+entry:
+        %0 = getelementptr [11 x i8]* @bytes, i32 0, i32 1
+        store volatile i8 %a, i8* %0
+        %1 = getelementptr [11 x float]* @floats, i32 0, i32 1
+        store volatile float %b, float* %1
+        ret void
+}
+
+; ALL-LABEL: float_arg2:
+; We won't test the way the global address is calculated in this test. This is
+; just to get the register number for the other checks.
+; SYM32-DAG:           addiu [[R1:\$[0-9]+]], ${{[0-9]+}}, %lo(bytes)
+; SYM64-DAG:           ld [[R1:\$[0-9]]], %got_disp(bytes)(
+; SYM32-DAG:           addiu [[R2:\$[0-9]+]], ${{[0-9]+}}, %lo(floats)
+; SYM64-DAG:           ld [[R2:\$[0-9]]], %got_disp(floats)(
+
+; The first argument is the same in O32/N32/N64.
+; ALL-DAG:           sb $4, 1([[R1]])
+
+; The first argument isn't floating point so floating point registers are not
+; used in O32, but N32/N64 will still use them.
+; MD00305 and GCC disagree on this one. MD00305 says that floats are treated
+; as 8-byte aligned and occupy two slots on O32. GCC is treating them as 4-byte
+; aligned and occupying one slot. We'll use GCC's definition.
+; O32-DAG:           sw $5, 4([[R2]])
+; NEW-DAG:           swc1 $f13, 4([[R2]])
diff --git a/test/CodeGen/Mips/cconv/arguments-hard-fp128.ll b/test/CodeGen/Mips/cconv/arguments-hard-fp128.ll
new file mode 100644
index 0000000..5e3f403
--- /dev/null
+++ b/test/CodeGen/Mips/cconv/arguments-hard-fp128.ll
@@ -0,0 +1,49 @@
+; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 %s
+; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 %s
+
+; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 %s
+; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 %s
+
+; Test the fp128 arguments for all ABI's and byte orders as specified
+; by section 2 of the MIPSpro N32 Handbook.
+;
+; O32 is not tested because long double is the same as double on O32.
+
+@ldoubles = global [11 x fp128] zeroinitializer
+
+define void @ldouble_args(fp128 %a, fp128 %b, fp128 %c, fp128 %d, fp128 %e) nounwind {
+entry:
+        %0 = getelementptr [11 x fp128]* @ldoubles, i32 0, i32 1
+        store volatile fp128 %a, fp128* %0
+        %1 = getelementptr [11 x fp128]* @ldoubles, i32 0, i32 2
+        store volatile fp128 %b, fp128* %1
+        %2 = getelementptr [11 x fp128]* @ldoubles, i32 0, i32 3
+        store volatile fp128 %c, fp128* %2
+        %3 = getelementptr [11 x fp128]* @ldoubles, i32 0, i32 4
+        store volatile fp128 %d, fp128* %3
+        %4 = getelementptr [11 x fp128]* @ldoubles, i32 0, i32 5
+        store volatile fp128 %e, fp128* %4
+        ret void
+}
+
+; ALL-LABEL: ldouble_args:
+; We won't test the way the global address is calculated in this test. This is
+; just to get the register number for the other checks.
+; SYM32-DAG:           addiu [[R2:\$[0-9]+]], ${{[0-9]+}}, %lo(ldoubles)
+; SYM64-DAG:           ld [[R2:\$[0-9]]], %got_disp(ldoubles)(
+
+; The first four arguments are the same in N32/N64.
+; ALL-DAG:           sdc1 $f12, 16([[R2]])
+; ALL-DAG:           sdc1 $f13, 24([[R2]])
+; ALL-DAG:           sdc1 $f14, 32([[R2]])
+; ALL-DAG:           sdc1 $f15, 40([[R2]])
+; ALL-DAG:           sdc1 $f16, 48([[R2]])
+; ALL-DAG:           sdc1 $f17, 56([[R2]])
+; ALL-DAG:           sdc1 $f18, 64([[R2]])
+; ALL-DAG:           sdc1 $f19, 72([[R2]])
+
+; N32/N64 have run out of registers and starts using the stack too
+; ALL-DAG:           ld [[R3:\$[0-9]+]], 0($sp)
+; ALL-DAG:           ld [[R4:\$[0-9]+]], 8($sp)
+; ALL-DAG:           sd [[R3]], 80([[R2]])
+; ALL-DAG:           sd [[R4]], 88([[R2]])
diff --git a/test/CodeGen/Mips/cconv/arguments.ll b/test/CodeGen/Mips/cconv/arguments.ll
new file mode 100644
index 0000000..8fe29f3
--- /dev/null
+++ b/test/CodeGen/Mips/cconv/arguments.ll
@@ -0,0 +1,170 @@
+; RUN: llc -march=mips -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 --check-prefix=O32BE %s
+; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 --check-prefix=O32LE %s
+
+; RUN-TODO: llc -march=mips64 -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
+; RUN-TODO: llc -march=mips64el -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
+
+; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=NEW %s
+; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=NEW %s
+
+; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=NEW %s
+; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=NEW %s
+
+; Test the integer arguments for all ABI's and byte orders as specified by
+; section 5 of MD00305 (MIPS ABIs Described).
+;
+; N32/N64 are identical in this area so their checks have been combined into
+; the 'NEW' prefix (the N stands for New).
+;
+; Varargs are covered in arguments-hard-float-varargs.ll.
+
+@bytes = global [11 x i8] zeroinitializer
+@dwords = global [11 x i64] zeroinitializer
+@floats = global [11 x float] zeroinitializer
+@doubles = global [11 x double] zeroinitializer
+
+define void @align_to_arg_slots(i8 %a, i8 %b, i8 %c, i8 %d, i8 %e, i8 %f, i8 %g,
+                                i8 %h, i8 %i, i8 %j) nounwind {
+entry:
+        %0 = getelementptr [11 x i8]* @bytes, i32 0, i32 1
+        store volatile i8 %a, i8* %0
+        %1 = getelementptr [11 x i8]* @bytes, i32 0, i32 2
+        store volatile i8 %b, i8* %1
+        %2 = getelementptr [11 x i8]* @bytes, i32 0, i32 3
+        store volatile i8 %c, i8* %2
+        %3 = getelementptr [11 x i8]* @bytes, i32 0, i32 4
+        store volatile i8 %d, i8* %3
+        %4 = getelementptr [11 x i8]* @bytes, i32 0, i32 5
+        store volatile i8 %e, i8* %4
+        %5 = getelementptr [11 x i8]* @bytes, i32 0, i32 6
+        store volatile i8 %f, i8* %5
+        %6 = getelementptr [11 x i8]* @bytes, i32 0, i32 7
+        store volatile i8 %g, i8* %6
+        %7 = getelementptr [11 x i8]* @bytes, i32 0, i32 8
+        store volatile i8 %h, i8* %7
+        %8 = getelementptr [11 x i8]* @bytes, i32 0, i32 9
+        store volatile i8 %i, i8* %8
+        %9 = getelementptr [11 x i8]* @bytes, i32 0, i32 10
+        store volatile i8 %j, i8* %9
+        ret void
+}
+
+; ALL-LABEL: align_to_arg_slots:
+; We won't test the way the global address is calculated in this test. This is
+; just to get the register number for the other checks.
+; SYM32-DAG:           addiu [[R1:\$[0-9]+]], ${{[0-9]+}}, %lo(bytes)
+; SYM64-DAG:           ld [[R1:\$[0-9]]], %got_disp(bytes)(
+
+; The first four arguments are the same in O32/N32/N64
+; ALL-DAG:           sb $4, 1([[R1]])
+; ALL-DAG:           sb $5, 2([[R1]])
+; ALL-DAG:           sb $6, 3([[R1]])
+; ALL-DAG:           sb $7, 4([[R1]])
+
+; N32/N64 get an extra four arguments in registers
+; O32 starts loading from the stack. The addresses start at 16 because space is
+; always reserved for the first four arguments.
+; O32-DAG:           lw [[R3:\$[0-9]+]], 16($sp)
+; O32-DAG:           sb [[R3]], 5([[R1]])
+; NEW-DAG:           sb $8, 5([[R1]])
+; O32-DAG:           lw [[R3:\$[0-9]+]], 20($sp)
+; O32-DAG:           sb [[R3]], 6([[R1]])
+; NEW-DAG:           sb $9, 6([[R1]])
+; O32-DAG:           lw [[R3:\$[0-9]+]], 24($sp)
+; O32-DAG:           sb [[R3]], 7([[R1]])
+; NEW-DAG:           sb $10, 7([[R1]])
+; O32-DAG:           lw [[R3:\$[0-9]+]], 28($sp)
+; O32-DAG:           sb [[R3]], 8([[R1]])
+; NEW-DAG:           sb $11, 8([[R1]])
+
+; O32/N32/N64 are accessing the stack at this point.
+; Unlike O32, N32/N64 do not reserve space for the arguments.
+; increase by 4 for O32 and 8 for N32/N64.
+; O32-DAG:           lw [[R3:\$[0-9]+]], 32($sp)
+; O32-DAG:           sb [[R3]], 9([[R1]])
+; NEW-DAG:           lw [[R3:\$[0-9]+]], 0($sp)
+; NEW-DAG:           sb [[R3]], 9([[R1]])
+; O32-DAG:           lw [[R3:\$[0-9]+]], 36($sp)
+; O32-DAG:           sb [[R3]], 10([[R1]])
+; NEW-DAG:           lw [[R3:\$[0-9]+]], 8($sp)
+; NEW-DAG:           sb [[R3]], 10([[R1]])
+
+define void @slot_skipping(i8 %a, i64 %b, i8 %c, i8 %d,
+                           i8 %e, i8 %f, i8 %g, i64 %i, i8 %j) nounwind {
+entry:
+        %0 = getelementptr [11 x i8]* @bytes, i32 0, i32 1
+        store volatile i8 %a, i8* %0
+        %1 = getelementptr [11 x i64]* @dwords, i32 0, i32 1
+        store volatile i64 %b, i64* %1
+        %2 = getelementptr [11 x i8]* @bytes, i32 0, i32 2
+        store volatile i8 %c, i8* %2
+        %3 = getelementptr [11 x i8]* @bytes, i32 0, i32 3
+        store volatile i8 %d, i8* %3
+        %4 = getelementptr [11 x i8]* @bytes, i32 0, i32 4
+        store volatile i8 %e, i8* %4
+        %5 = getelementptr [11 x i8]* @bytes, i32 0, i32 5
+        store volatile i8 %f, i8* %5
+        %6 = getelementptr [11 x i8]* @bytes, i32 0, i32 6
+        store volatile i8 %g, i8* %6
+        %7 = getelementptr [11 x i64]* @dwords, i32 0, i32 2
+        store volatile i64 %i, i64* %7
+        %8 = getelementptr [11 x i8]* @bytes, i32 0, i32 7
+        store volatile i8 %j, i8* %8
+        ret void
+}
+
+; ALL-LABEL: slot_skipping:
+; We won't test the way the global address is calculated in this test. This is
+; just to get the register number for the other checks.
+; SYM32-DAG:           addiu [[R1:\$[0-9]+]], ${{[0-9]+}}, %lo(bytes)
+; SYM64-DAG:           ld [[R1:\$[0-9]]], %got_disp(bytes)(
+; SYM32-DAG:           addiu [[R2:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords)
+; SYM64-DAG:           ld [[R2:\$[0-9]]], %got_disp(dwords)(
+
+; The first argument is the same in O32/N32/N64.
+; ALL-DAG:           sb $4, 1([[R1]])
+
+; The second slot is insufficiently aligned for i64 on O32 so it is skipped.
+; Also, i64 occupies two slots on O32 and only one for N32/N64.
+; O32-DAG:           sw $6, 8([[R2]])
+; O32-DAG:           sw $7, 12([[R2]])
+; NEW-DAG:           sd $5, 8([[R2]])
+
+; N32/N64 get an extra four arguments in registers and still have two left from
+; the first four.
+; O32 starts loading from the stack. The addresses start at 16 because space is
+; always reserved for the first four arguments.
+; It's not clear why O32 uses lbu for this argument, but it's not wrong so we'll
+; accept it for now. The only IR difference is that this argument has
+; anyext from i8 and align 8 on it.
+; O32LE-DAG:           lbu [[R3:\$[0-9]+]], 16($sp)
+; O32BE-DAG:           lbu [[R3:\$[0-9]+]], 19($sp)
+; O32-DAG:           sb [[R3]], 2([[R1]])
+; NEW-DAG:           sb $6, 2([[R1]])
+; O32-DAG:           lw [[R3:\$[0-9]+]], 20($sp)
+; O32-DAG:           sb [[R3]], 3([[R1]])
+; NEW-DAG:           sb $7, 3([[R1]])
+; O32-DAG:           lw [[R3:\$[0-9]+]], 24($sp)
+; O32-DAG:           sb [[R3]], 4([[R1]])
+; NEW-DAG:           sb $8, 4([[R1]])
+; O32-DAG:           lw [[R3:\$[0-9]+]], 28($sp)
+; O32-DAG:           sb [[R3]], 5([[R1]])
+; NEW-DAG:           sb $9, 5([[R1]])
+
+; O32-DAG:           lw [[R3:\$[0-9]+]], 32($sp)
+; O32-DAG:           sb [[R3]], 6([[R1]])
+; NEW-DAG:           sb $10, 6([[R1]])
+
+; O32-DAG:           lw [[R3:\$[0-9]+]], 40($sp)
+; O32-DAG:           sw [[R3]], 16([[R2]])
+; O32-DAG:           lw [[R3:\$[0-9]+]], 44($sp)
+; O32-DAG:           sw [[R3]], 20([[R2]])
+; NEW-DAG:           sd $11, 16([[R2]])
+
+; O32/N32/N64 are accessing the stack at this point.
+; Unlike O32, N32/N64 do not reserve space for the arguments.
+; increase by 4 for O32 and 8 for N32/N64.
+; O32-DAG:           lw [[R3:\$[0-9]+]], 48($sp)
+; O32-DAG:           sb [[R3]], 7([[R1]])
+; NEW-DAG:           lw [[R3:\$[0-9]+]], 0($sp)
+; NEW-DAG:           sb [[R3]], 7([[R1]])
diff --git a/test/CodeGen/Mips/cconv/callee-saved-float.ll b/test/CodeGen/Mips/cconv/callee-saved-float.ll
new file mode 100644
index 0000000..de4d917
--- /dev/null
+++ b/test/CodeGen/Mips/cconv/callee-saved-float.ll
@@ -0,0 +1,111 @@
+; RUN: llc -march=mips < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN: llc -march=mipsel < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN: llc -march=mips < %s | FileCheck --check-prefix=ALL --check-prefix=O32-INV %s
+; RUN: llc -march=mipsel < %s | FileCheck --check-prefix=ALL --check-prefix=O32-INV %s
+
+; RUN-TODO: llc -march=mips64 -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN-TODO: llc -march=mips64el -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN-TODO: llc -march=mips64 -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=ALL-INV --check-prefix=O32-INV %s
+; RUN-TODO: llc -march=mips64el -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=ALL-INV --check-prefix=O32-INV %s
+
+; RUN: llc -march=mips64 -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+; RUN: llc -march=mips64el -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+; RUN: llc -march=mips64 -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=ALL-INV --check-prefix=N32-INV %s
+; RUN: llc -march=mips64el -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=ALL-INV --check-prefix=N32-INV %s
+
+; RUN: llc -march=mips64 -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+; RUN: llc -march=mips64el -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+; RUN: llc -march=mips64 -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=ALL-INV --check-prefix=N64-INV %s
+; RUN: llc -march=mips64el -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=ALL-INV --check-prefix=N64-INV %s
+
+; Test the the callee-saved registers are callee-saved as specified by section
+; 2 of the MIPSpro N32 Handbook and section 3 of the SYSV ABI spec.
+
+define void @fpu_clobber() nounwind {
+entry:
+        call void asm "# Clobber", "~{$f0},~{$f1},~{$f2},~{$f3},~{$f4},~{$f5},~{$f6},~{$f7},~{$f8},~{$f9},~{$f10},~{$f11},~{$f12},~{$f13},~{$f14},~{$f15},~{$f16},~{$f17},~{$f18},~{$f19},~{$f20},~{$f21},~{$f22},~{$f23},~{$f24},~{$f25},~{$f26},~{$f27},~{$f28},~{$f29},~{$f30},~{$f31}"()
+        ret void
+}
+
+; ALL-LABEL: fpu_clobber:
+; ALL-INV-NOT:   sdc1 $f0,
+; ALL-INV-NOT:   sdc1 $f1,
+; ALL-INV-NOT:   sdc1 $f2,
+; ALL-INV-NOT:   sdc1 $f3,
+; ALL-INV-NOT:   sdc1 $f4,
+; ALL-INV-NOT:   sdc1 $f5,
+; ALL-INV-NOT:   sdc1 $f6,
+; ALL-INV-NOT:   sdc1 $f7,
+; ALL-INV-NOT:   sdc1 $f8,
+; ALL-INV-NOT:   sdc1 $f9,
+; ALL-INV-NOT:   sdc1 $f10,
+; ALL-INV-NOT:   sdc1 $f11,
+; ALL-INV-NOT:   sdc1 $f12,
+; ALL-INV-NOT:   sdc1 $f13,
+; ALL-INV-NOT:   sdc1 $f14,
+; ALL-INV-NOT:   sdc1 $f15,
+; ALL-INV-NOT:   sdc1 $f16,
+; ALL-INV-NOT:   sdc1 $f17,
+; ALL-INV-NOT:   sdc1 $f18,
+; ALL-INV-NOT:   sdc1 $f19,
+; ALL-INV-NOT:   sdc1 $f21,
+; ALL-INV-NOT:   sdc1 $f23,
+
+; O32:           addiu $sp, $sp, -48
+; O32-DAG:       sdc1 [[F20:\$f20]], [[OFF20:[0-9]+]]($sp)
+; O32-DAG:       sdc1 [[F22:\$f22]], [[OFF22:[0-9]+]]($sp)
+; O32-DAG:       sdc1 [[F24:\$f24]], [[OFF24:[0-9]+]]($sp)
+; O32-DAG:       sdc1 [[F26:\$f26]], [[OFF26:[0-9]+]]($sp)
+; O32-DAG:       sdc1 [[F28:\$f28]], [[OFF28:[0-9]+]]($sp)
+; O32-DAG:       sdc1 [[F30:\$f30]], [[OFF30:[0-9]+]]($sp)
+; O32-DAG:       ldc1 [[F20]], [[OFF20]]($sp)
+; O32-DAG:       ldc1 [[F22]], [[OFF22]]($sp)
+; O32-DAG:       ldc1 [[F24]], [[OFF24]]($sp)
+; O32-INV-NOT:   sdc1 $f25,
+; O32-DAG:       ldc1 [[F26]], [[OFF26]]($sp)
+; O32-INV-NOT:   sdc1 $f27,
+; O32-DAG:       ldc1 [[F28]], [[OFF28]]($sp)
+; O32-INV-NOT:   sdc1 $f29,
+; O32-DAG:       ldc1 [[F30]], [[OFF30]]($sp)
+; O32-INV-NOT:   sdc1 $f31,
+; O32:           addiu $sp, $sp, 48
+
+; N32:           addiu $sp, $sp, -48
+; N32-DAG:       sdc1 [[F20:\$f20]], [[OFF20:[0-9]+]]($sp)
+; N32-DAG:       sdc1 [[F22:\$f22]], [[OFF22:[0-9]+]]($sp)
+; N32-DAG:       sdc1 [[F24:\$f24]], [[OFF24:[0-9]+]]($sp)
+; N32-DAG:       sdc1 [[F26:\$f26]], [[OFF26:[0-9]+]]($sp)
+; N32-DAG:       sdc1 [[F28:\$f28]], [[OFF28:[0-9]+]]($sp)
+; N32-DAG:       sdc1 [[F30:\$f30]], [[OFF30:[0-9]+]]($sp)
+; N32-DAG:       ldc1 [[F20]], [[OFF20]]($sp)
+; N32-DAG:       ldc1 [[F22]], [[OFF22]]($sp)
+; N32-DAG:       ldc1 [[F24]], [[OFF24]]($sp)
+; N32-INV-NOT:   sdc1 $f25,
+; N32-DAG:       ldc1 [[F26]], [[OFF26]]($sp)
+; N32-INV-NOT:   sdc1 $f27,
+; N32-DAG:       ldc1 [[F28]], [[OFF28]]($sp)
+; N32-INV-NOT:   sdc1 $f29,
+; N32-DAG:       ldc1 [[F30]], [[OFF30]]($sp)
+; N32-INV-NOT:   sdc1 $f31,
+; N32:           addiu $sp, $sp, 48
+
+; N64:           addiu $sp, $sp, -64
+; N64-INV-NOT:   sdc1 $f20,
+; N64-INV-NOT:   sdc1 $f22,
+; N64-DAG:       sdc1 [[F24:\$f24]], [[OFF24:[0-9]+]]($sp)
+; N64-DAG:       sdc1 [[F25:\$f25]], [[OFF25:[0-9]+]]($sp)
+; N64-DAG:       sdc1 [[F26:\$f26]], [[OFF26:[0-9]+]]($sp)
+; N64-DAG:       sdc1 [[F27:\$f27]], [[OFF27:[0-9]+]]($sp)
+; N64-DAG:       sdc1 [[F28:\$f28]], [[OFF28:[0-9]+]]($sp)
+; N64-DAG:       sdc1 [[F29:\$f29]], [[OFF29:[0-9]+]]($sp)
+; N64-DAG:       sdc1 [[F30:\$f30]], [[OFF30:[0-9]+]]($sp)
+; N64-DAG:       sdc1 [[F31:\$f31]], [[OFF31:[0-9]+]]($sp)
+; N64-DAG:       ldc1 [[F24]], [[OFF24]]($sp)
+; N64-DAG:       ldc1 [[F25]], [[OFF25]]($sp)
+; N64-DAG:       ldc1 [[F26]], [[OFF26]]($sp)
+; N64-DAG:       ldc1 [[F27]], [[OFF27]]($sp)
+; N64-DAG:       ldc1 [[F28]], [[OFF28]]($sp)
+; N64-DAG:       ldc1 [[F29]], [[OFF29]]($sp)
+; N64-DAG:       ldc1 [[F30]], [[OFF30]]($sp)
+; N64-DAG:       ldc1 [[F31]], [[OFF31]]($sp)
+; N64:           addiu $sp, $sp, 64
diff --git a/test/CodeGen/Mips/cconv/callee-saved.ll b/test/CodeGen/Mips/cconv/callee-saved.ll
new file mode 100644
index 0000000..293e99f
--- /dev/null
+++ b/test/CodeGen/Mips/cconv/callee-saved.ll
@@ -0,0 +1,167 @@
+; RUN: llc -march=mips < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN: llc -march=mipsel < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN: llc -march=mips < %s | FileCheck --check-prefix=ALL --check-prefix=O32-INV %s
+; RUN: llc -march=mipsel < %s | FileCheck --check-prefix=ALL --check-prefix=O32-INV %s
+
+; RUN-TODO: llc -march=mips64 -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN-TODO: llc -march=mips64el -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN-TODO: llc -march=mips64 -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32-INV %s
+; RUN-TODO: llc -march=mips64el -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32-INV %s
+
+; RUN: llc -march=mips64 -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+; RUN: llc -march=mips64el -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+; RUN: llc -march=mips64 -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32-INV %s
+; RUN: llc -march=mips64el -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32-INV %s
+
+; RUN: llc -march=mips64 -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+; RUN: llc -march=mips64el -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+; RUN: llc -march=mips64 -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64-INV %s
+; RUN: llc -march=mips64el -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64-INV %s
+
+; Test the the callee-saved registers are callee-saved as specified by section
+; 2 of the MIPSpro N32 Handbook and section 3 of the SYSV ABI spec.
+
+define void @gpr_clobber() nounwind {
+entry:
+        ; Clobbering the stack pointer is a bad idea so we'll skip that one
+        call void asm "# Clobber", "~{$0},~{$1},~{$2},~{$3},~{$4},~{$5},~{$6},~{$7},~{$8},~{$9},~{$10},~{$11},~{$12},~{$13},~{$14},~{$15},~{$16},~{$17},~{$18},~{$19},~{$20},~{$21},~{$22},~{$23},~{$24},~{$25},~{$26},~{$27},~{$28},~{$30},~{$31}"()
+        ret void
+}
+
+; ALL-LABEL: gpr_clobber:
+; O32:           addiu $sp, $sp, -40
+; O32-INV-NOT:   sw $0,
+; O32-INV-NOT:   sw $1,
+; O32-INV-NOT:   sw $2,
+; O32-INV-NOT:   sw $3,
+; O32-INV-NOT:   sw $4,
+; O32-INV-NOT:   sw $5,
+; O32-INV-NOT:   sw $6,
+; O32-INV-NOT:   sw $7,
+; O32-INV-NOT:   sw $8,
+; O32-INV-NOT:   sw $9,
+; O32-INV-NOT:   sw $10,
+; O32-INV-NOT:   sw $11,
+; O32-INV-NOT:   sw $12,
+; O32-INV-NOT:   sw $13,
+; O32-INV-NOT:   sw $14,
+; O32-INV-NOT:   sw $15,
+; O32-DAG:       sw [[G16:\$16]], [[OFF16:[0-9]+]]($sp)
+; O32-DAG:       sw [[G17:\$17]], [[OFF17:[0-9]+]]($sp)
+; O32-DAG:       sw [[G18:\$18]], [[OFF18:[0-9]+]]($sp)
+; O32-DAG:       sw [[G19:\$19]], [[OFF19:[0-9]+]]($sp)
+; O32-DAG:       sw [[G20:\$20]], [[OFF20:[0-9]+]]($sp)
+; O32-DAG:       sw [[G21:\$21]], [[OFF21:[0-9]+]]($sp)
+; O32-DAG:       sw [[G22:\$22]], [[OFF22:[0-9]+]]($sp)
+; O32-DAG:       sw [[G23:\$23]], [[OFF23:[0-9]+]]($sp)
+; O32-INV-NOT:   sw $24,
+; O32-INV-NOT:   sw $25,
+; O32-INV-NOT:   sw $26,
+; O32-INV-NOT:   sw $27,
+; O32-INV-NOT:   sw $28,
+; O32-INV-NOT:   sw $29,
+; O32-DAG:       sw [[G30:\$fp]], [[OFF30:[0-9]+]]($sp)
+; O32-DAG:       sw [[G31:\$fp]], [[OFF31:[0-9]+]]($sp)
+; O32-DAG:       lw [[G16]], [[OFF16]]($sp)
+; O32-DAG:       lw [[G17]], [[OFF17]]($sp)
+; O32-DAG:       lw [[G18]], [[OFF18]]($sp)
+; O32-DAG:       lw [[G19]], [[OFF19]]($sp)
+; O32-DAG:       lw [[G20]], [[OFF20]]($sp)
+; O32-DAG:       lw [[G21]], [[OFF21]]($sp)
+; O32-DAG:       lw [[G22]], [[OFF22]]($sp)
+; O32-DAG:       lw [[G23]], [[OFF23]]($sp)
+; O32-DAG:       lw [[G30]], [[OFF30]]($sp)
+; O32-DAG:       lw [[G31]], [[OFF31]]($sp)
+; O32:           addiu $sp, $sp, 40
+
+; N32:           addiu $sp, $sp, -96
+; N32-INV-NOT:   sd $0,
+; N32-INV-NOT:   sd $1,
+; N32-INV-NOT:   sd $2,
+; N32-INV-NOT:   sd $3,
+; N32-INV-NOT:   sd $4,
+; N32-INV-NOT:   sd $5,
+; N32-INV-NOT:   sd $6,
+; N32-INV-NOT:   sd $7,
+; N32-INV-NOT:   sd $8,
+; N32-INV-NOT:   sd $9,
+; N32-INV-NOT:   sd $10,
+; N32-INV-NOT:   sd $11,
+; N32-INV-NOT:   sd $12,
+; N32-INV-NOT:   sd $13,
+; N32-INV-NOT:   sd $14,
+; N32-INV-NOT:   sd $15,
+; N32-DAG:       sd [[G16:\$16]], [[OFF16:[0-9]+]]($sp)
+; N32-DAG:       sd [[G17:\$17]], [[OFF17:[0-9]+]]($sp)
+; N32-DAG:       sd [[G18:\$18]], [[OFF18:[0-9]+]]($sp)
+; N32-DAG:       sd [[G19:\$19]], [[OFF19:[0-9]+]]($sp)
+; N32-DAG:       sd [[G20:\$20]], [[OFF20:[0-9]+]]($sp)
+; N32-DAG:       sd [[G21:\$21]], [[OFF21:[0-9]+]]($sp)
+; N32-DAG:       sd [[G22:\$22]], [[OFF22:[0-9]+]]($sp)
+; N32-DAG:       sd [[G23:\$23]], [[OFF23:[0-9]+]]($sp)
+; N32-INV-NOT:   sd $24,
+; N32-INV-NOT:   sd $25,
+; N32-INV-NOT:   sd $26,
+; N32-INV-NOT:   sd $27,
+; N32-DAG:       sd [[G28:\$gp]], [[OFF28:[0-9]+]]($sp)
+; N32-INV-NOT:   sd $29,
+; N32-DAG:       sd [[G30:\$fp]], [[OFF30:[0-9]+]]($sp)
+; N32-DAG:       sd [[G31:\$fp]], [[OFF31:[0-9]+]]($sp)
+; N32-DAG:       ld [[G16]], [[OFF16]]($sp)
+; N32-DAG:       ld [[G17]], [[OFF17]]($sp)
+; N32-DAG:       ld [[G18]], [[OFF18]]($sp)
+; N32-DAG:       ld [[G19]], [[OFF19]]($sp)
+; N32-DAG:       ld [[G20]], [[OFF20]]($sp)
+; N32-DAG:       ld [[G21]], [[OFF21]]($sp)
+; N32-DAG:       ld [[G22]], [[OFF22]]($sp)
+; N32-DAG:       ld [[G23]], [[OFF23]]($sp)
+; N32-DAG:       ld [[G28]], [[OFF28]]($sp)
+; N32-DAG:       ld [[G30]], [[OFF30]]($sp)
+; N32-DAG:       ld [[G31]], [[OFF31]]($sp)
+; N32:           addiu $sp, $sp, 96
+
+; N64:           daddiu $sp, $sp, -96
+; N64-INV-NOT:   sd $0,
+; N64-INV-NOT:   sd $1,
+; N64-INV-NOT:   sd $2,
+; N64-INV-NOT:   sd $3,
+; N64-INV-NOT:   sd $4,
+; N64-INV-NOT:   sd $5,
+; N64-INV-NOT:   sd $6,
+; N64-INV-NOT:   sd $7,
+; N64-INV-NOT:   sd $8,
+; N64-INV-NOT:   sd $9,
+; N64-INV-NOT:   sd $10,
+; N64-INV-NOT:   sd $11,
+; N64-INV-NOT:   sd $12,
+; N64-INV-NOT:   sd $13,
+; N64-INV-NOT:   sd $14,
+; N64-INV-NOT:   sd $15,
+; N64-DAG:       sd [[G16:\$16]], [[OFF16:[0-9]+]]($sp)
+; N64-DAG:       sd [[G17:\$17]], [[OFF17:[0-9]+]]($sp)
+; N64-DAG:       sd [[G18:\$18]], [[OFF18:[0-9]+]]($sp)
+; N64-DAG:       sd [[G19:\$19]], [[OFF19:[0-9]+]]($sp)
+; N64-DAG:       sd [[G20:\$20]], [[OFF20:[0-9]+]]($sp)
+; N64-DAG:       sd [[G21:\$21]], [[OFF21:[0-9]+]]($sp)
+; N64-DAG:       sd [[G22:\$22]], [[OFF22:[0-9]+]]($sp)
+; N64-DAG:       sd [[G23:\$23]], [[OFF23:[0-9]+]]($sp)
+; N64-DAG:       sd [[G30:\$fp]], [[OFF30:[0-9]+]]($sp)
+; N64-DAG:       sd [[G31:\$fp]], [[OFF31:[0-9]+]]($sp)
+; N64-INV-NOT:   sd $24,
+; N64-INV-NOT:   sd $25,
+; N64-INV-NOT:   sd $26,
+; N64-INV-NOT:   sd $27,
+; N64-DAG:       sd [[G28:\$gp]], [[OFF28:[0-9]+]]($sp)
+; N64-INV-NOT:   sd $29,
+; N64-DAG:       ld [[G16]], [[OFF16]]($sp)
+; N64-DAG:       ld [[G17]], [[OFF17]]($sp)
+; N64-DAG:       ld [[G18]], [[OFF18]]($sp)
+; N64-DAG:       ld [[G19]], [[OFF19]]($sp)
+; N64-DAG:       ld [[G20]], [[OFF20]]($sp)
+; N64-DAG:       ld [[G21]], [[OFF21]]($sp)
+; N64-DAG:       ld [[G22]], [[OFF22]]($sp)
+; N64-DAG:       ld [[G23]], [[OFF23]]($sp)
+; N64-DAG:       ld [[G28]], [[OFF28]]($sp)
+; N64-DAG:       ld [[G30]], [[OFF30]]($sp)
+; N64-DAG:       ld [[G31]], [[OFF31]]($sp)
+; N64:           daddiu $sp, $sp, 96
diff --git a/test/CodeGen/Mips/cconv/memory-layout.ll b/test/CodeGen/Mips/cconv/memory-layout.ll
new file mode 100644
index 0000000..0c3cc9e
--- /dev/null
+++ b/test/CodeGen/Mips/cconv/memory-layout.ll
@@ -0,0 +1,140 @@
+; RUN: llc -march=mips < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN: llc -march=mipsel < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+
+; RUN-TODO: llc -march=mips64 -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN-TODO: llc -march=mips64el -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+
+; RUN: llc -march=mips64 -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+; RUN: llc -march=mips64el -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+
+; RUN: llc -march=mips64 -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+; RUN: llc -march=mips64el -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+
+; Test the memory layout for all ABI's and byte orders as specified by section
+; 4 of MD00305 (MIPS ABIs Described).
+; Bitfields are not covered since they are not available as a type in LLVM IR.
+;
+; The assembly directives deal with endianness so we don't need to account for
+; that.
+
+; Deliberately request alignments that are too small for the target so we get
+; the minimum alignment instead of the preferred alignment.
+@byte = global i8 1, align 1
+@halfword = global i16 258, align 1
+@word = global i32 16909060, align 1
+@float = global float 1.0, align 1
+@dword = global i64 283686952306183, align 1
+@double = global double 1.0, align 1
+@pointer = global i8* @byte
+
+; ALL-NOT:       .align
+; ALL-LABEL: byte:
+; ALL:           .byte 1
+; ALL:           .size byte, 1
+
+; ALL:           .align 1
+; ALL-LABEL: halfword:
+; ALL:           .2byte 258
+; ALL:           .size halfword, 2
+
+; ALL:           .align 2
+; ALL-LABEL: word:
+; ALL:           .4byte 16909060
+; ALL:           .size word, 4
+
+; ALL:           .align 2
+; ALL-LABEL: float:
+; ALL:           .4byte 1065353216
+; ALL:           .size float, 4
+
+; ALL:           .align 3
+; ALL-LABEL: dword:
+; ALL:           .8byte 283686952306183
+; ALL:           .size dword, 8
+
+; ALL:           .align 3
+; ALL-LABEL: double:
+; ALL:           .8byte 4607182418800017408
+; ALL:           .size double, 8
+
+; O32:           .align 2
+; N32:           .align 2
+; N64:           .align 3
+; ALL-LABEL: pointer:
+; O32:           .4byte byte
+; O32:           .size pointer, 4
+; N32:           .4byte byte
+; N32:           .size pointer, 4
+; N64:           .8byte byte
+; N64:           .size pointer, 8
+
+@byte_array = global [2 x i8] [i8 1, i8 2], align 1
+@halfword_array = global [2 x i16] [i16 1, i16 2], align 1
+@word_array = global [2 x i32] [i32 1, i32 2], align 1
+@float_array = global [2 x float] [float 1.0, float 2.0], align 1
+@dword_array = global [2 x i64] [i64 1, i64 2], align 1
+@double_array = global [2 x double] [double 1.0, double 2.0], align 1
+@pointer_array = global [2 x i8*] [i8* @byte, i8* @byte]
+
+; ALL-NOT:       .align
+; ALL-LABEL: byte_array:
+; ALL:           .ascii "\001\002"
+; ALL:           .size byte_array, 2
+
+; ALL:           .align 1
+; ALL-LABEL: halfword_array:
+; ALL:           .2byte 1
+; ALL:           .2byte 2
+; ALL:           .size halfword_array, 4
+
+; ALL:           .align 2
+; ALL-LABEL: word_array:
+; ALL:           .4byte 1
+; ALL:           .4byte 2
+; ALL:           .size word_array, 8
+
+; ALL:           .align 2
+; ALL-LABEL: float_array:
+; ALL:           .4byte 1065353216
+; ALL:           .4byte 1073741824
+; ALL:           .size float_array, 8
+
+; ALL:           .align 3
+; ALL-LABEL: dword_array:
+; ALL:           .8byte 1
+; ALL:           .8byte 2
+; ALL:           .size dword_array, 16
+
+; ALL:           .align 3
+; ALL-LABEL: double_array:
+; ALL:           .8byte 4607182418800017408
+; ALL:           .8byte 4611686018427387904
+; ALL:           .size double_array, 16
+
+; O32:           .align 2
+; N32:           .align 2
+; N64:           .align 3
+; ALL-LABEL: pointer_array:
+; O32:           .4byte byte
+; O32:           .4byte byte
+; O32:           .size pointer_array, 8
+; N32:           .4byte byte
+; N32:           .4byte byte
+; N32:           .size pointer_array, 8
+; N64:           .8byte byte
+; N64:           .8byte byte
+; N64:           .size pointer_array, 16
+
+%mixed = type { i8, double, i16 }
+@mixed = global %mixed { i8 1, double 1.0, i16 515 }, align 1
+
+; ALL:           .align 3
+; ALL-LABEL: mixed:
+; ALL:           .byte 1
+; ALL:           .space 7
+; ALL:           .8byte 4607182418800017408
+; ALL:           .2byte 515
+; ALL:           .space 6
+; ALL:           .size mixed, 24
+
+; Bitfields are not available in LLVM IR so we can't test them here.
diff --git a/test/CodeGen/Mips/cconv/reserved-space.ll b/test/CodeGen/Mips/cconv/reserved-space.ll
new file mode 100644
index 0000000..b36f89e
--- /dev/null
+++ b/test/CodeGen/Mips/cconv/reserved-space.ll
@@ -0,0 +1,39 @@
+; RUN: llc -march=mips < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN: llc -march=mipsel < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+
+; RUN-TODO: llc -march=mips64 -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN-TODO: llc -march=mips64el -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+
+; RUN: llc -march=mips64 -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+; RUN: llc -march=mips64el -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+
+; RUN: llc -march=mips64 -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+; RUN: llc -march=mips64el -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+
+; Test that O32 correctly reserved space for the four arguments, even when
+; there aren't any as per section 5 of MD00305 (MIPS ABIs Described).
+
+declare void @foo() nounwind;
+
+define void @reserved_space() nounwind {
+entry:
+        tail call void @foo()
+        ret void
+}
+
+; ALL-LABEL: reserved_space:
+; O32:           addiu $sp, $sp, -24
+; O32:           sw $ra, 20($sp)
+; O32:           lw $ra, 20($sp)
+; O32:           addiu $sp, $sp, 24
+; Despite pointers being 32-bit wide on N32, the return pointer is saved as a
+; 64-bit pointer. I've yet to find a documentation reference for this quirk but
+; this behaviour matches GCC so I have considered it to be correct.
+; N32:           addiu $sp, $sp, -16
+; N32:           sd $ra, 8($sp)
+; N32:           ld $ra, 8($sp)
+; N32:           addiu $sp, $sp, 16
+; N64:           daddiu $sp, $sp, -16
+; N64:           sd $ra, 8($sp)
+; N64:           ld $ra, 8($sp)
+; N64:           daddiu $sp, $sp, 16
diff --git a/test/CodeGen/Mips/cconv/return-float.ll b/test/CodeGen/Mips/cconv/return-float.ll
new file mode 100644
index 0000000..28cf83d
--- /dev/null
+++ b/test/CodeGen/Mips/cconv/return-float.ll
@@ -0,0 +1,48 @@
+; RUN: llc -mtriple=mips-linux-gnu -soft-float -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN: llc -mtriple=mipsel-linux-gnu -soft-float -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+
+; RUN-TODO: llc -mtriple=mips64-linux-gnu -soft-float -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN-TODO: llc -mtriple=mips64el-linux-gnu -soft-float -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+
+; RUN: llc -mtriple=mips64-linux-gnu -soft-float -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+; RUN: llc -mtriple=mips64el-linux-gnu -soft-float -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+
+; RUN: llc -mtriple=mips64-linux-gnu -soft-float -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+; RUN: llc -mtriple=mips64el-linux-gnu -soft-float -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+
+; Test the float returns for all ABI's and byte orders as specified by
+; section 5 of MD00305 (MIPS ABIs Described).
+
+; We only test Linux because other OS's use different relocations and I don't
+; know if this is correct.
+
+@float = global float zeroinitializer
+@double = global double zeroinitializer
+
+define float @retfloat() nounwind {
+entry:
+        %0 = load volatile float* @float
+        ret float %0
+}
+
+; ALL-LABEL: retfloat:
+; O32-DAG:           lui [[R1:\$[0-9]+]], %hi(float)
+; O32-DAG:           lw $2, %lo(float)([[R1]])
+; N32-DAG:           lui [[R1:\$[0-9]+]], %hi(float)
+; N32-DAG:           lw $2, %lo(float)([[R1]])
+; N64-DAG:           ld  [[R1:\$[0-9]+]], %got_disp(float)($1)
+; N64-DAG:           lw $2, 0([[R1]])
+
+define double @retdouble() nounwind {
+entry:
+        %0 = load volatile double* @double
+        ret double %0
+}
+
+; ALL-LABEL: retdouble:
+; O32-DAG:           lw $2, %lo(double)([[R1:\$[0-9]+]])
+; O32-DAG:           addiu [[R2:\$[0-9]+]], [[R1]], %lo(double)
+; O32-DAG:           lw $3, 4([[R2]])
+; N32-DAG:           ld $2, %lo(double)([[R1:\$[0-9]+]])
+; N64-DAG:           ld  [[R1:\$[0-9]+]], %got_disp(double)($1)
+; N64-DAG:           ld $2, 0([[R1]])
diff --git a/test/CodeGen/Mips/cconv/return-hard-float.ll b/test/CodeGen/Mips/cconv/return-hard-float.ll
new file mode 100644
index 0000000..371b3a5
--- /dev/null
+++ b/test/CodeGen/Mips/cconv/return-hard-float.ll
@@ -0,0 +1,46 @@
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN: llc -mtriple=mipsel-linux-gnu -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+
+; RUN-TODO: llc -mtriple=mips64-linux-gnu -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN-TODO: llc -mtriple=mips64el-linux-gnu -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+; RUN: llc -mtriple=mips64el-linux-gnu -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+; RUN: llc -mtriple=mips64el-linux-gnu -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+
+; Test the float returns for all ABI's and byte orders as specified by
+; section 5 of MD00305 (MIPS ABIs Described).
+
+; We only test Linux because other OS's use different relocations and I don't
+; know if this is correct.
+
+@float = global float zeroinitializer
+@double = global double zeroinitializer
+
+define float @retfloat() nounwind {
+entry:
+        %0 = load volatile float* @float
+        ret float %0
+}
+
+; ALL-LABEL: retfloat:
+; O32-DAG:           lui [[R1:\$[0-9]+]], %hi(float)
+; O32-DAG:           lwc1 $f0, %lo(float)([[R1]])
+; N32-DAG:           lui [[R1:\$[0-9]+]], %hi(float)
+; N32-DAG:           lwc1 $f0, %lo(float)([[R1]])
+; N64-DAG:           ld  [[R1:\$[0-9]+]], %got_disp(float)($1)
+; N64-DAG:           lwc1 $f0, 0([[R1]])
+
+define double @retdouble() nounwind {
+entry:
+        %0 = load volatile double* @double
+        ret double %0
+}
+
+; ALL-LABEL: retdouble:
+; O32-DAG:           ldc1 $f0, %lo(double)([[R1:\$[0-9]+]])
+; N32-DAG:           ldc1 $f0, %lo(double)([[R1:\$[0-9]+]])
+; N64-DAG:           ld  [[R1:\$[0-9]+]], %got_disp(double)($1)
+; N64-DAG:           ldc1 $f0, 0([[R1]])
diff --git a/test/CodeGen/Mips/cconv/return-hard-fp128.ll b/test/CodeGen/Mips/cconv/return-hard-fp128.ll
new file mode 100644
index 0000000..0da59ef
--- /dev/null
+++ b/test/CodeGen/Mips/cconv/return-hard-fp128.ll
@@ -0,0 +1,31 @@
+; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+
+; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+
+; Test the fp128 returns for N32/N64 and all byte orders as specified by
+; section 5 of MD00305 (MIPS ABIs Described).
+;
+; O32 is not tested because long double is the same as double on O32.
+;
+@fp128 = global fp128 zeroinitializer
+
+define fp128 @retldouble() nounwind {
+entry:
+        %0 = load volatile fp128* @fp128
+        ret fp128 %0
+}
+
+; ALL-LABEL: retldouble:
+; N32-DAG:           ld [[R2:\$[0-9]+]], %lo(fp128)([[R1:\$[0-9]+]])
+; N32-DAG:           addiu [[R3:\$[0-9]+]], [[R1]], %lo(fp128)
+; N32-DAG:           ld [[R4:\$[0-9]+]], 8([[R3]])
+; N32-DAG:           dmtc1 [[R2]], $f0
+; N32-DAG:           dmtc1 [[R4]], $f2
+
+; N64-DAG:           ld [[R2:\$[0-9]+]], %got_disp(fp128)([[R1:\$[0-9]+]])
+; N64-DAG:           ld [[R3:\$[0-9]+]], 0([[R2]])
+; N64-DAG:           ld [[R4:\$[0-9]+]], 8([[R2]])
+; N64-DAG:           dmtc1 [[R3]], $f0
+; N64-DAG:           dmtc1 [[R4]], $f2
diff --git a/test/CodeGen/Mips/cconv/return.ll b/test/CodeGen/Mips/cconv/return.ll
new file mode 100644
index 0000000..76ce5e4
--- /dev/null
+++ b/test/CodeGen/Mips/cconv/return.ll
@@ -0,0 +1,66 @@
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN: llc -mtriple=mipsel-linux-gnu -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+
+; RUN-TODO: llc -mtriple=mips64-linux-gnu -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN-TODO: llc -mtriple=mips64el-linux-gnu -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+; RUN: llc -mtriple=mips64el-linux-gnu -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+; RUN: llc -mtriple=mips64el-linux-gnu -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+
+; Test the integer returns for all ABI's and byte orders as specified by
+; section 5 of MD00305 (MIPS ABIs Described).
+
+; We only test Linux because other OS's use different relocations and I don't
+; know if this is correct.
+
+@byte = global i8 zeroinitializer
+@word = global i32 zeroinitializer
+@dword = global i64 zeroinitializer
+@float = global float zeroinitializer
+@double = global double zeroinitializer
+
+define i8 @reti8() nounwind {
+entry:
+        %0 = load volatile i8* @byte
+        ret i8 %0
+}
+
+; ALL-LABEL: reti8:
+; O32-DAG:           lui [[R1:\$[0-9]+]], %hi(byte)
+; O32-DAG:           lbu $2, %lo(byte)([[R1]])
+; N32-DAG:           lui [[R1:\$[0-9]+]], %hi(byte)
+; N32-DAG:           lbu $2, %lo(byte)([[R1]])
+; N64-DAG:           ld  [[R1:\$[0-9]+]], %got_disp(byte)($1)
+; N64-DAG:           lbu $2, 0([[R1]])
+
+define i32 @reti32() nounwind {
+entry:
+        %0 = load volatile i32* @word
+        ret i32 %0
+}
+
+; ALL-LABEL: reti32:
+; O32-DAG:           lui [[R1:\$[0-9]+]], %hi(word)
+; O32-DAG:           lw $2, %lo(word)([[R1]])
+; N32-DAG:           lui [[R1:\$[0-9]+]], %hi(word)
+; N32-DAG:           lw $2, %lo(word)([[R1]])
+; N64-DAG:           ld  [[R1:\$[0-9]+]], %got_disp(word)($1)
+; N64-DAG:           lw $2, 0([[R1]])
+
+define i64 @reti64() nounwind {
+entry:
+        %0 = load volatile i64* @dword
+        ret i64 %0
+}
+
+; ALL-LABEL: reti64:
+; On O32, we must use v0 and v1 for the return value
+; O32-DAG:           lw $2, %lo(dword)([[R1:\$[0-9]+]])
+; O32-DAG:           addiu [[R2:\$[0-9]+]], [[R1]], %lo(dword)
+; O32-DAG:           lw $3, 4([[R2]])
+; N32-DAG:           ld $2, %lo(dword)([[R1:\$[0-9]+]])
+; N64-DAG:           ld  [[R1:\$[0-9]+]], %got_disp(dword)([[R1:\$[0-9]+]])
+; N64-DAG:           ld $2, 0([[R1]])
diff --git a/test/CodeGen/Mips/cconv/stack-alignment.ll b/test/CodeGen/Mips/cconv/stack-alignment.ll
new file mode 100644
index 0000000..834033b
--- /dev/null
+++ b/test/CodeGen/Mips/cconv/stack-alignment.ll
@@ -0,0 +1,28 @@
+; RUN: llc -march=mips < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN: llc -march=mipsel < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+
+; RUN-TODO: llc -march=mips64 -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN-TODO: llc -march=mips64el -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+
+; RUN: llc -march=mips64 -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+; RUN: llc -march=mips64el -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+
+; RUN: llc -march=mips64 -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+; RUN: llc -march=mips64el -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+
+; Test the stack alignment for all ABI's and byte orders as specified by
+; section 5 of MD00305 (MIPS ABIs Described).
+
+define void @local_bytes_1() nounwind {
+entry:
+        %0 = alloca i8
+        ret void
+}
+
+; ALL-LABEL: local_bytes_1:
+; O32:           addiu $sp, $sp, -8
+; O32:           addiu $sp, $sp, 8
+; N32:           addiu $sp, $sp, -16
+; N32:           addiu $sp, $sp, 16
+; N64:           addiu $sp, $sp, -16
+; N64:           addiu $sp, $sp, 16
diff --git a/test/CodeGen/Mips/cmov.ll b/test/CodeGen/Mips/cmov.ll
index f2009fa..b9732eb 100644
--- a/test/CodeGen/Mips/cmov.ll
+++ b/test/CodeGen/Mips/cmov.ll
@@ -1,5 +1,6 @@
 ; RUN: llc -march=mips < %s | FileCheck %s -check-prefix=O32
 ; RUN: llc -march=mips -regalloc=basic < %s | FileCheck %s -check-prefix=O32
+; RUN: llc -march=mips64el -mcpu=mips4 -mattr=n64 < %s | FileCheck %s -check-prefix=N64
 ; RUN: llc -march=mips64el -mcpu=mips64 -mattr=n64 < %s | FileCheck %s -check-prefix=N64
 
 @i1 = global [3 x i32] [i32 1, i32 2, i32 3], align 4
@@ -238,4 +239,4 @@ define i32 @slti6(i32 %a) nounwind readnone {
 ; O32-DAG: xori [[R1]], [[R1]], 1
 ; O32-DAG: addiu [[R2:\$[0-9]+]], [[R1]], 3
 ; O32-NOT: movn
-; O32:.size slti6
-\ No newline at end of file
+; O32:.size slti6
diff --git a/test/CodeGen/Mips/eh-dwarf-cfa.ll b/test/CodeGen/Mips/eh-dwarf-cfa.ll
index 3a21332..6554974 100644
--- a/test/CodeGen/Mips/eh-dwarf-cfa.ll
+++ b/test/CodeGen/Mips/eh-dwarf-cfa.ll
@@ -1,4 +1,6 @@
 ; RUN: llc -march=mipsel -mcpu=mips32 < %s | FileCheck %s
+; RUN: llc -march=mips64el -mcpu=mips4 < %s | \
+; RUN:      FileCheck %s -check-prefix=CHECK-MIPS64
 ; RUN: llc -march=mips64el -mcpu=mips64 < %s | \
 ; RUN:      FileCheck %s -check-prefix=CHECK-MIPS64
 
diff --git a/test/CodeGen/Mips/eh-return64.ll b/test/CodeGen/Mips/eh-return64.ll
index 32fc5e6..8c5af50 100644
--- a/test/CodeGen/Mips/eh-return64.ll
+++ b/test/CodeGen/Mips/eh-return64.ll
@@ -1,3 +1,4 @@
+; RUN: llc -march=mips64el -mcpu=mips4 < %s | FileCheck %s
 ; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s
 
 declare void @llvm.eh.return.i64(i64, i8*)
diff --git a/test/CodeGen/Mips/elf_eflags.ll b/test/CodeGen/Mips/elf_eflags.ll
index 336ed7b..00d8584 100644
--- a/test/CodeGen/Mips/elf_eflags.ll
+++ b/test/CodeGen/Mips/elf_eflags.ll
@@ -23,6 +23,9 @@
 ; RUN: llc -mtriple mipsel-unknown-linux -mcpu=mips32r2 -mattr=+micromips -relocation-model=static %s -o - | FileCheck -check-prefix=CHECK-LE32R2-MICROMIPS %s
 ; RUN: llc -mtriple mipsel-unknown-linux -mcpu=mips32r2 -mattr=+micromips %s -o - | FileCheck -check-prefix=CHECK-LE32R2-MICROMIPS_PIC %s
 
+; RUN: llc -mtriple mipsel-unknown-linux -mcpu=mips4 -relocation-model=static %s -o - | FileCheck -check-prefix=CHECK-LE64 %s
+; RUN: llc -mtriple mipsel-unknown-linux -mcpu=mips4 %s -o - | FileCheck -check-prefix=CHECK-LE64_PIC %s
+
 ; RUN: llc -mtriple mipsel-unknown-linux -mcpu=mips64 -relocation-model=static %s -o - | FileCheck -check-prefix=CHECK-LE64 %s
 ; RUN: llc -mtriple mipsel-unknown-linux -mcpu=mips64 %s -o - | FileCheck -check-prefix=CHECK-LE64_PIC %s
 ; RUN: llc -mtriple mipsel-unknown-linux -mcpu=mips64r2 -relocation-model=static %s -o - | FileCheck -check-prefix=CHECK-LE64R2 %s
diff --git a/test/CodeGen/Mips/elf_st_other.ll b/test/CodeGen/Mips/elf_st_other.ll
deleted file mode 100644
index 8a5f20d..0000000
--- a/test/CodeGen/Mips/elf_st_other.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; This tests value of ELF st_other field for function symbol table entries.
-; For microMIPS value should be equal to STO_MIPS_MICROMIPS.
-
-; RUN: llc -mtriple mipsel-unknown-linux -mcpu=mips32r2 -mattr=+micromips %s -o - | FileCheck %s
-
-define i32 @main() nounwind {
-entry:
-  ret i32 0
-}
-
-; CHECK: .set	micromips
-; CHECK: main:
diff --git a/test/CodeGen/Mips/fabs.ll b/test/CodeGen/Mips/fabs.ll
index 49d8a72..ce1a9a6 100644
--- a/test/CodeGen/Mips/fabs.ll
+++ b/test/CodeGen/Mips/fabs.ll
@@ -1,21 +1,23 @@
-; RUN: llc  < %s -mtriple=mipsel-linux-gnu -mcpu=mips32 | FileCheck %s -check-prefix=32
-; RUN: llc  < %s -mtriple=mipsel-linux-gnu -mcpu=mips32r2 | FileCheck %s -check-prefix=32R2
-; RUN: llc  < %s -mtriple=mips64el-linux-gnu -mcpu=mips64 -mattr=n64 | FileCheck %s -check-prefix=64
-; RUN: llc  < %s -mtriple=mips64el-linux-gnu -mcpu=mips64r2 -mattr=n64 | FileCheck %s -check-prefix=64R2
-; RUN: llc  < %s -mtriple=mipsel-linux-gnu -mcpu=mips32 -enable-no-nans-fp-math | FileCheck %s -check-prefix=NO-NAN
+; Check that abs.[ds] is selected and does not depend on -enable-no-nans-fp-math
+; They obey the Has2008 and ABS2008 configuration bits which govern the
+; conformance to IEEE 754 (1985) and IEEE 754 (2008). When these bits are not
+; present, they confirm to 1985.
+; In 1985 mode, abs.[ds] are arithmetic (i.e. they raise invalid operation
+; exceptions when given NaN's). In 2008 mode, they are non-arithmetic (i.e.
+; they are copies and don't raise any exceptions).
 
-define float @foo0(float %a) nounwind readnone {
-entry:
+; RUN: llc  < %s -mtriple=mipsel-linux-gnu -mcpu=mips32 | FileCheck %s
+; RUN: llc  < %s -mtriple=mipsel-linux-gnu -mcpu=mips32r2 | FileCheck %s
+; RUN: llc  < %s -mtriple=mipsel-linux-gnu -mcpu=mips32 -enable-no-nans-fp-math | FileCheck %s
 
-; 32: lui  $[[T0:[0-9]+]], 32767
-; 32: ori  $[[MSK0:[0-9]+]], $[[T0]], 65535
-; 32: and  $[[AND:[0-9]+]], ${{[0-9]+}}, $[[MSK0]]
-; 32: mtc1 $[[AND]], $f0
+; RUN: llc  < %s -mtriple=mips64el-linux-gnu -mcpu=mips64 | FileCheck %s
+; RUN: llc  < %s -mtriple=mips64el-linux-gnu -mcpu=mips64 -enable-no-nans-fp-math | FileCheck %s
 
-; 32R2: ins  $[[INS:[0-9]+]], $zero, 31, 1
-; 32R2: mtc1 $[[INS]], $f0
+define float @foo0(float %a) nounwind readnone {
+entry:
 
-; NO-NAN: abs.s
+; CHECK-LABEL: foo0
+; CHECK: abs.s
 
   %call = tail call float @fabsf(float %a) nounwind readnone
   ret float %call
@@ -26,24 +28,8 @@ declare float @fabsf(float) nounwind readnone
 define double @foo1(double %a) nounwind readnone {
 entry:
 
-; 32: lui  $[[T0:[0-9]+]], 32767
-; 32: ori  $[[MSK0:[0-9]+]], $[[T0]], 65535
-; 32: and  $[[AND:[0-9]+]], ${{[0-9]+}}, $[[MSK0]]
-; 32: mtc1 $[[AND]], $f1
-
-; 32R2: ins  $[[INS:[0-9]+]], $zero, 31, 1
-; 32R2: mtc1 $[[INS]], $f1
-
-; 64: daddiu  $[[T0:[0-9]+]], $zero, 1
-; 64: dsll    $[[T1:[0-9]+]], ${{[0-9]+}}, 63
-; 64: daddiu  $[[MSK0:[0-9]+]], $[[T1]], -1
-; 64: and     $[[AND:[0-9]+]], ${{[0-9]+}}, $[[MSK0]]
-; 64: dmtc1   $[[AND]], $f0
-
-; 64R2: dins  $[[INS:[0-9]+]], $zero, 63, 1
-; 64R2: dmtc1 $[[INS]], $f0
-
-; NO-NAN: abs.d
+; CHECK-LABEL: foo1:
+; CHECK: abs.d
 
   %call = tail call double @fabs(double %a) nounwind readnone
   ret double %call
diff --git a/test/CodeGen/Mips/fcopysign-f32-f64.ll b/test/CodeGen/Mips/fcopysign-f32-f64.ll
index 9f88d0c..148a780 100644
--- a/test/CodeGen/Mips/fcopysign-f32-f64.ll
+++ b/test/CodeGen/Mips/fcopysign-f32-f64.ll
@@ -1,3 +1,4 @@
+; RUN: llc  < %s -march=mips64el -mcpu=mips4 -mattr=n64 | FileCheck %s -check-prefix=64
 ; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s -check-prefix=64
 ; RUN: llc  < %s -march=mips64el -mcpu=mips64r2 -mattr=n64 | FileCheck %s -check-prefix=64R2
 
diff --git a/test/CodeGen/Mips/fcopysign.ll b/test/CodeGen/Mips/fcopysign.ll
index 1c57eca..44c4117 100644
--- a/test/CodeGen/Mips/fcopysign.ll
+++ b/test/CodeGen/Mips/fcopysign.ll
@@ -1,5 +1,6 @@
 ; RUN: llc  < %s -march=mipsel -mcpu=mips32 | FileCheck %s -check-prefix=32
 ; RUN: llc  < %s -march=mipsel -mcpu=mips32r2 | FileCheck %s -check-prefix=32R2
+; RUN: llc  < %s -march=mips64el -mcpu=mips4 -mattr=n64 | FileCheck %s -check-prefix=64
 ; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s -check-prefix=64
 ; RUN: llc  < %s -march=mips64el -mcpu=mips64r2 -mattr=n64 | FileCheck %s -check-prefix=64R2
 
diff --git a/test/CodeGen/Mips/fmadd1.ll b/test/CodeGen/Mips/fmadd1.ll
index 6768ed6..a9a8e21 100644
--- a/test/CodeGen/Mips/fmadd1.ll
+++ b/test/CodeGen/Mips/fmadd1.ll
@@ -1,3 +1,10 @@
+; Check that madd.[ds], msub.[ds], nmadd.[ds], and nmsub.[ds] are supported
+; correctly.
+; The spec for nmadd.[ds], and nmsub.[ds] does not state that they obey the
+; the Has2008 and ABS2008 configuration bits which govern the conformance to
+; IEEE 754 (1985) and IEEE 754 (2008). These instructions are therefore only
+; available when -enable-no-nans-fp-math is given.
+
 ; RUN: llc < %s -march=mipsel -mcpu=mips32r2 -enable-no-nans-fp-math | FileCheck %s -check-prefix=32R2 -check-prefix=CHECK
 ; RUN: llc < %s -march=mips64el -mcpu=mips64r2 -mattr=n64 -enable-no-nans-fp-math | FileCheck %s -check-prefix=64R2 -check-prefix=CHECK
 ; RUN: llc < %s -march=mipsel -mcpu=mips32r2 | FileCheck %s -check-prefix=32R2NAN -check-prefix=CHECK
@@ -5,6 +12,7 @@
 
 define float @FOO0float(float %a, float %b, float %c) nounwind readnone {
 entry:
+; CHECK-LABEL: FOO0float:
 ; CHECK: madd.s 
   %mul = fmul float %a, %b
   %add = fadd float %mul, %c
@@ -14,6 +22,7 @@ entry:
 
 define float @FOO1float(float %a, float %b, float %c) nounwind readnone {
 entry:
+; CHECK-LABEL: FOO1float:
 ; CHECK: msub.s 
   %mul = fmul float %a, %b
   %sub = fsub float %mul, %c
@@ -23,6 +32,7 @@ entry:
 
 define float @FOO2float(float %a, float %b, float %c) nounwind readnone {
 entry:
+; CHECK-LABEL: FOO2float:
 ; 32R2: nmadd.s 
 ; 64R2: nmadd.s 
 ; 32R2NAN: madd.s 
@@ -35,6 +45,7 @@ entry:
 
 define float @FOO3float(float %a, float %b, float %c) nounwind readnone {
 entry:
+; CHECK-LABEL: FOO3float:
 ; 32R2: nmsub.s 
 ; 64R2: nmsub.s 
 ; 32R2NAN: msub.s 
@@ -47,6 +58,7 @@ entry:
 
 define double @FOO10double(double %a, double %b, double %c) nounwind readnone {
 entry:
+; CHECK-LABEL: FOO10double:
 ; CHECK: madd.d
   %mul = fmul double %a, %b
   %add = fadd double %mul, %c
@@ -56,6 +68,7 @@ entry:
 
 define double @FOO11double(double %a, double %b, double %c) nounwind readnone {
 entry:
+; CHECK-LABEL: FOO11double:
 ; CHECK: msub.d
   %mul = fmul double %a, %b
   %sub = fsub double %mul, %c
@@ -65,6 +78,7 @@ entry:
 
 define double @FOO12double(double %a, double %b, double %c) nounwind readnone {
 entry:
+; CHECK-LABEL: FOO12double:
 ; 32R2: nmadd.d 
 ; 64R2: nmadd.d 
 ; 32R2NAN: madd.d 
@@ -77,6 +91,7 @@ entry:
 
 define double @FOO13double(double %a, double %b, double %c) nounwind readnone {
 entry:
+; CHECK-LABEL: FOO13double:
 ; 32R2: nmsub.d 
 ; 64R2: nmsub.d 
 ; 32R2NAN: msub.d 
diff --git a/test/CodeGen/Mips/fneg.ll b/test/CodeGen/Mips/fneg.ll
index b322abd..4fb80fd 100644
--- a/test/CodeGen/Mips/fneg.ll
+++ b/test/CodeGen/Mips/fneg.ll
@@ -1,17 +1,30 @@
-; RUN: llc  < %s -march=mipsel -mcpu=mips32 | FileCheck %s 
+; Check that abs.[ds] is selected and does not depend on -enable-no-nans-fp-math
+; They obey the Has2008 and ABS2008 configuration bits which govern the
+; conformance to IEEE 754 (1985) and IEEE 754 (2008). When these bits are not
+; present, they confirm to 1985.
+; In 1985 mode, abs.[ds] are arithmetic (i.e. they raise invalid operation
+; exceptions when given NaN's). In 2008 mode, they are non-arithmetic (i.e.
+; they are copies and don't raise any exceptions).
 
-define float @foo0(i32 %a, float %d) nounwind readnone {
+; RUN: llc  < %s -mtriple=mipsel-linux-gnu -mcpu=mips32 | FileCheck %s
+; RUN: llc  < %s -mtriple=mipsel-linux-gnu -mcpu=mips32r2 | FileCheck %s
+; RUN: llc  < %s -mtriple=mipsel-linux-gnu -mcpu=mips32 -enable-no-nans-fp-math | FileCheck %s
+
+; RUN: llc  < %s -mtriple=mips64el-linux-gnu -mcpu=mips64 | FileCheck %s
+; RUN: llc  < %s -mtriple=mips64el-linux-gnu -mcpu=mips64 -enable-no-nans-fp-math | FileCheck %s
+
+define float @foo0(float %d) nounwind readnone {
 entry:
-; CHECK-NOT: neg.s
+; CHECK-LABEL: foo0:
+; CHECK: neg.s
   %sub = fsub float -0.000000e+00, %d
   ret float %sub
 }
 
-define double @foo1(i32 %a, double %d) nounwind readnone {
+define double @foo1(double %d) nounwind readnone {
 entry:
-; CHECK:     foo1
-; CHECK-NOT: neg.d
-; CHECK:     jr
+; CHECK-LABEL: foo1:
+; CHECK: neg.d
   %sub = fsub double -0.000000e+00, %d
   ret double %sub
 }
diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-I-1.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-I-1.ll
index f9e53cb..c09108d 100644
--- a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-I-1.ll
+++ b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-I-1.ll
@@ -9,7 +9,7 @@ define i32 @main() nounwind {
 entry:
 
 ;CHECK-ERRORS:	error: invalid operand for inline asm constraint 'I'
-  tail call i32 asm sideeffect "addi $0,$1,$2", "=r,r,I"(i32 7, i32 1048576) nounwind
+  tail call i32 asm sideeffect "addiu $0,$1,$2", "=r,r,I"(i32 7, i32 1048576) nounwind
   ret i32 0
 }
 
diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-J.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-J.ll
index 1fdf672..2b24b0f 100644
--- a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-J.ll
+++ b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-J.ll
@@ -10,7 +10,7 @@ entry:
 
 ;CHECK-ERRORS:	error: invalid operand for inline asm constraint 'J'
 
-  tail call i32 asm "addi $0,$1,$2", "=r,r,J"(i32 1024, i32 3) nounwind
+  tail call i32 asm "addiu $0,$1,$2", "=r,r,J"(i32 1024, i32 3) nounwind
   ret i32 0
 }
 
diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-L.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-L.ll
index 49dcc87..5edb3e2 100644
--- a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-L.ll
+++ b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-L.ll
@@ -10,7 +10,7 @@ entry:
 
 ;CHECK-ERRORS:	error: invalid operand for inline asm constraint 'L'
 
-  tail call i32 asm "addi $0,$1,$2", "=r,r,L"(i32 7, i32 1048579) nounwind
+  tail call i32 asm "addiu $0,$1,$2", "=r,r,L"(i32 7, i32 1048579) nounwind
   ret i32 0
 }
 
diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-N.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-N.ll
index 770669d..eaa540a 100644
--- a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-N.ll
+++ b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-N.ll
@@ -11,7 +11,7 @@ entry:
 
 ;CHECK-ERRORS:	error: invalid operand for inline asm constraint 'N'
 
-  tail call i32 asm sideeffect "addi $0,$1,$2", "=r,r,N"(i32 7, i32 3) nounwind
+  tail call i32 asm sideeffect "addiu $0,$1,$2", "=r,r,N"(i32 7, i32 3) nounwind
   ret i32 0
 }
 
diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-O.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-O.ll
index cd4431a..56afbaa 100644
--- a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-O.ll
+++ b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-O.ll
@@ -11,6 +11,6 @@ entry:
 
 ;CHECK-ERRORS:	error: invalid operand for inline asm constraint 'O'
 
-  tail call i32 asm sideeffect "addi $0,$1,$2", "=r,r,O"(i32 undef, i32 16384) nounwind
+  tail call i32 asm sideeffect "addiu $0,$1,$2", "=r,r,O"(i32 undef, i32 16384) nounwind
   ret i32 0
 }
diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-P.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-P.ll
index 0a4739e..0a55cb5 100644
--- a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-P.ll
+++ b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-P.ll
@@ -11,6 +11,6 @@ entry:
 
 ;CHECK-ERRORS:	error: invalid operand for inline asm constraint 'P'
 
-  tail call i32 asm sideeffect "addi $0,$1,$2", "=r,r,P"(i32 undef, i32 655536) nounwind
+  tail call i32 asm sideeffect "addiu $0,$1,$2", "=r,r,P"(i32 undef, i32 655536) nounwind
   ret i32 0
 }
diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll
index 94ded30..9464918 100644
--- a/test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll
+++ b/test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll
@@ -7,27 +7,27 @@ entry:
 
 ; r with char
 ;CHECK:	#APP
-;CHECK:	addi ${{[0-9]+}},${{[0-9]+}},23
+;CHECK:	addiu ${{[0-9]+}},${{[0-9]+}},23
 ;CHECK:	#NO_APP
-  tail call i8 asm sideeffect "addi $0,$1,$2", "=r,r,n"(i8 27, i8 23) nounwind
+  tail call i8 asm sideeffect "addiu $0,$1,$2", "=r,r,n"(i8 27, i8 23) nounwind
 
 ; r with short
 ;CHECK:	#APP
-;CHECK:	addi ${{[0-9]+}},${{[0-9]+}},13
+;CHECK:	addiu ${{[0-9]+}},${{[0-9]+}},13
 ;CHECK:	#NO_APP
-  tail call i16 asm sideeffect "addi $0,$1,$2", "=r,r,n"(i16 17, i16 13) nounwind
+  tail call i16 asm sideeffect "addiu $0,$1,$2", "=r,r,n"(i16 17, i16 13) nounwind
 
 ; r with int
 ;CHECK:	#APP
-;CHECK:	addi ${{[0-9]+}},${{[0-9]+}},3
+;CHECK:	addiu ${{[0-9]+}},${{[0-9]+}},3
 ;CHECK:	#NO_APP
-  tail call i32 asm sideeffect "addi $0,$1,$2", "=r,r,n"(i32 7, i32 3) nounwind
+  tail call i32 asm sideeffect "addiu $0,$1,$2", "=r,r,n"(i32 7, i32 3) nounwind
 
 ; Now c with 1024: make sure register $25 is picked
 ; CHECK: #APP
-; CHECK: addi $25,${{[0-9]+}},1024
+; CHECK: addiu $25,${{[0-9]+}},1024
 ; CHECK: #NO_APP	
-   tail call i32 asm sideeffect "addi $0,$1,$2", "=c,c,I"(i32 4194304, i32 1024) nounwind
+   tail call i32 asm sideeffect "addiu $0,$1,$2", "=c,c,I"(i32 4194304, i32 1024) nounwind
 
 ; Now l with 1024: make sure register lo is picked. We do this by checking the instruction
 ; after the inline expression for a mflo to pull the value out of lo.
diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-reg64.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-reg64.ll
index 7870666..a7ba762 100644
--- a/test/CodeGen/Mips/inlineasm-cnstrnt-reg64.ll
+++ b/test/CodeGen/Mips/inlineasm-cnstrnt-reg64.ll
@@ -12,9 +12,9 @@ entry:
 
 ; r with long long
 ;CHECK:	#APP
-;CHECK:	addi ${{[0-9]+}},${{[0-9]+}},3
+;CHECK:	addiu ${{[0-9]+}},${{[0-9]+}},3
 ;CHECK:	#NO_APP
-  tail call i64 asm sideeffect "addi $0,$1,$2", "=r,r,i"(i64 7, i64 3) nounwind
+  tail call i64 asm sideeffect "addiu $0,$1,$2", "=r,r,i"(i64 7, i64 3) nounwind
   ret i32 0
 }
 
diff --git a/test/CodeGen/Mips/inlineasm-operand-code.ll b/test/CodeGen/Mips/inlineasm-operand-code.ll
index 7bb4adc..6512851 100644
--- a/test/CodeGen/Mips/inlineasm-operand-code.ll
+++ b/test/CodeGen/Mips/inlineasm-operand-code.ll
@@ -12,9 +12,9 @@ define i32 @constraint_X() nounwind {
 entry:
 ;CHECK_LITTLE_32-LABEL:   constraint_X:
 ;CHECK_LITTLE_32: #APP
-;CHECK_LITTLE_32: addi ${{[0-9]+}},${{[0-9]+}},0xfffffffffffffffd
+;CHECK_LITTLE_32: addiu ${{[0-9]+}},${{[0-9]+}},0xfffffffffffffffd
 ;CHECK_LITTLE_32: #NO_APP
-  tail call i32 asm sideeffect "addi $0,$1,${2:X}", "=r,r,I"(i32 7, i32 -3) ;
+  tail call i32 asm sideeffect "addiu $0,$1,${2:X}", "=r,r,I"(i32 7, i32 -3) ;
   ret i32 0
 }
 
@@ -23,9 +23,9 @@ define i32 @constraint_x() nounwind {
 entry:
 ;CHECK_LITTLE_32-LABEL:   constraint_x:
 ;CHECK_LITTLE_32: #APP
-;CHECK_LITTLE_32: addi ${{[0-9]+}},${{[0-9]+}},0xfffd
+;CHECK_LITTLE_32: addiu ${{[0-9]+}},${{[0-9]+}},0xfffd
 ;CHECK_LITTLE_32: #NO_APP
-  tail call i32 asm sideeffect "addi $0,$1,${2:x}", "=r,r,I"(i32 7, i32 -3) ;
+  tail call i32 asm sideeffect "addiu $0,$1,${2:x}", "=r,r,I"(i32 7, i32 -3) ;
   ret i32 0
 }
 
@@ -34,9 +34,9 @@ define i32 @constraint_d() nounwind {
 entry:
 ;CHECK_LITTLE_32-LABEL:   constraint_d:
 ;CHECK_LITTLE_32:   #APP
-;CHECK_LITTLE_32:   addi ${{[0-9]+}},${{[0-9]+}},-3
+;CHECK_LITTLE_32:   addiu ${{[0-9]+}},${{[0-9]+}},-3
 ;CHECK_LITTLE_32:   #NO_APP
-  tail call i32 asm sideeffect "addi $0,$1,${2:d}", "=r,r,I"(i32 7, i32 -3) ;
+  tail call i32 asm sideeffect "addiu $0,$1,${2:d}", "=r,r,I"(i32 7, i32 -3) ;
   ret i32 0
 }
 
@@ -45,9 +45,9 @@ define i32 @constraint_m() nounwind {
 entry:
 ;CHECK_LITTLE_32-LABEL:   constraint_m:
 ;CHECK_LITTLE_32:   #APP
-;CHECK_LITTLE_32:   addi ${{[0-9]+}},${{[0-9]+}},-4
+;CHECK_LITTLE_32:   addiu ${{[0-9]+}},${{[0-9]+}},-4
 ;CHECK_LITTLE_32:   #NO_APP
-  tail call i32 asm sideeffect "addi $0,$1,${2:m}", "=r,r,I"(i32 7, i32 -3) ;
+  tail call i32 asm sideeffect "addiu $0,$1,${2:m}", "=r,r,I"(i32 7, i32 -3) ;
   ret i32 0
 }
 
@@ -56,15 +56,15 @@ define i32 @constraint_z() nounwind {
 entry:
 ;CHECK_LITTLE_32-LABEL: constraint_z:
 ;CHECK_LITTLE_32:    #APP
-;CHECK_LITTLE_32:    addi ${{[0-9]+}},${{[0-9]+}},-3
+;CHECK_LITTLE_32:    addiu ${{[0-9]+}},${{[0-9]+}},-3
 ;CHECK_LITTLE_32:    #NO_APP
-  tail call i32 asm sideeffect "addi $0,$1,${2:z}", "=r,r,I"(i32 7, i32 -3) ;
+  tail call i32 asm sideeffect "addiu $0,$1,${2:z}", "=r,r,I"(i32 7, i32 -3) ;
 
 ; z with 0
 ;CHECK_LITTLE_32:    #APP
-;CHECK_LITTLE_32:    addi ${{[0-9]+}},${{[0-9]+}},$0
+;CHECK_LITTLE_32:    addiu ${{[0-9]+}},${{[0-9]+}},$0
 ;CHECK_LITTLE_32:    #NO_APP
-  tail call i32 asm sideeffect "addi $0,$1,${2:z}", "=r,r,I"(i32 7, i32 0) nounwind
+  tail call i32 asm sideeffect "addiu $0,$1,${2:z}", "=r,r,I"(i32 7, i32 0) nounwind
   ret i32 0
 }
 
@@ -73,9 +73,9 @@ define i32 @constraint_longlong() nounwind {
 entry:
 ;CHECK_LITTLE_32-LABEL: constraint_longlong:
 ;CHECK_LITTLE_32:    #APP
-;CHECK_LITTLE_32:    addi ${{[0-9]+}},${{[0-9]+}},3
+;CHECK_LITTLE_32:    addiu ${{[0-9]+}},${{[0-9]+}},3
 ;CHECK_LITTLE_32:    #NO_APP
-  tail call i64 asm sideeffect "addi $0,$1,$2 \0A\09", "=r,r,X"(i64 1229801703532086340, i64 3) nounwind
+  tail call i64 asm sideeffect "addiu $0,$1,$2 \0A\09", "=r,r,X"(i64 1229801703532086340, i64 3) nounwind
   ret i32 0
 }
 
diff --git a/test/CodeGen/Mips/inlineasm_constraint.ll b/test/CodeGen/Mips/inlineasm_constraint.ll
index 8d30f45..76b73dc 100644
--- a/test/CodeGen/Mips/inlineasm_constraint.ll
+++ b/test/CodeGen/Mips/inlineasm_constraint.ll
@@ -5,21 +5,21 @@ entry:
 
 ; First I with short
 ; CHECK: #APP
-; CHECK: addi ${{[0-9]+}},${{[0-9]+}},4096
+; CHECK: addiu ${{[0-9]+}},${{[0-9]+}},4096
 ; CHECK: #NO_APP
-  tail call i16 asm sideeffect "addi $0,$1,$2", "=r,r,I"(i16 7, i16 4096) nounwind
+  tail call i16 asm sideeffect "addiu $0,$1,$2", "=r,r,I"(i16 7, i16 4096) nounwind
 
 ; Then I with int
 ; CHECK: #APP
-; CHECK: addi ${{[0-9]+}},${{[0-9]+}},-3
+; CHECK: addiu ${{[0-9]+}},${{[0-9]+}},-3
 ; CHECK: #NO_APP
-   tail call i32 asm sideeffect "addi $0,$1,$2", "=r,r,I"(i32 7, i32 -3) nounwind
+   tail call i32 asm sideeffect "addiu $0,$1,$2", "=r,r,I"(i32 7, i32 -3) nounwind
 
 ; Now J with 0
 ; CHECK: #APP
-; CHECK: addi ${{[0-9]+}},${{[0-9]+}},0
+; CHECK: addiu ${{[0-9]+}},${{[0-9]+}},0
 ; CHECK: #NO_APP
-  tail call i32 asm sideeffect "addi $0,$1,$2\0A\09 ", "=r,r,J"(i32 7, i16 0) nounwind
+  tail call i32 asm sideeffect "addiu $0,$1,$2\0A\09 ", "=r,r,J"(i32 7, i16 0) nounwind
 
 ; Now K with 64
 ; CHECK: #APP
@@ -35,29 +35,29 @@ entry:
 
 ; Now N with -3
 ; CHECK: #APP
-; CHECK: addi ${{[0-9]+}},${{[0-9]+}},-3
+; CHECK: addiu ${{[0-9]+}},${{[0-9]+}},-3
 ; CHECK: #NO_APP	
-  tail call i32 asm sideeffect "addi $0,$1,$2", "=r,r,N"(i32 7, i32 -3) nounwind
+  tail call i32 asm sideeffect "addiu $0,$1,$2", "=r,r,N"(i32 7, i32 -3) nounwind
 
 ; Now O with -3
 ; CHECK: #APP
-; CHECK: addi ${{[0-9]+}},${{[0-9]+}},-3
+; CHECK: addiu ${{[0-9]+}},${{[0-9]+}},-3
 ; CHECK: #NO_APP	
-  tail call i32 asm sideeffect "addi $0,$1,$2", "=r,r,O"(i32 7, i16 -3) nounwind
+  tail call i32 asm sideeffect "addiu $0,$1,$2", "=r,r,O"(i32 7, i16 -3) nounwind
 
 ; Now P with 65535
 ; CHECK: #APP
-; CHECK: addi ${{[0-9]+}},${{[0-9]+}},65535
+; CHECK: addiu ${{[0-9]+}},${{[0-9]+}},65535
 ; CHECK: #NO_APP	
-  tail call i32 asm sideeffect "addi $0,$1,$2", "=r,r,P"(i32 7, i32 65535) nounwind
+  tail call i32 asm sideeffect "addiu $0,$1,$2", "=r,r,P"(i32 7, i32 65535) nounwind
 
 ; Now R Which takes the address of c
   %c = alloca i32, align 4
   store i32 -4469539, i32* %c, align 4
-  %8 = call i32 asm sideeffect "lwl $0, 1 + $1\0A\09lwr $0, 2 + $1\0A\09", "=r,*R"(i32* %c) #1
+  %8 = call i32 asm sideeffect "lw $0, 1 + $1\0A\09lw $0, 2 + $1\0A\09", "=r,*R"(i32* %c) #1
 ; CHECK: #APP
-; CHECK: lwl ${{[0-9]+}}, 1 + 0(${{[0-9]+}})
-; CHECK: lwr ${{[0-9]+}}, 2 + 0(${{[0-9]+}})
+; CHECK: lw ${{[0-9]+}}, 1 + 0(${{[0-9]+}})
+; CHECK: lw ${{[0-9]+}}, 2 + 0(${{[0-9]+}})
 ; CHECK: #NO_APP	
 
   ret i32 0
diff --git a/test/CodeGen/Mips/int-to-float-conversion.ll b/test/CodeGen/Mips/int-to-float-conversion.ll
index c2baf44..d226b48 100644
--- a/test/CodeGen/Mips/int-to-float-conversion.ll
+++ b/test/CodeGen/Mips/int-to-float-conversion.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -march=mipsel < %s | FileCheck %s -check-prefix=32
+; RUN: llc -march=mips64el -mcpu=mips4 < %s | FileCheck %s -check-prefix=64
 ; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s -check-prefix=64
 
 @i1 = global [3 x i32] [i32 1, i32 2, i32 3], align 4
diff --git a/test/CodeGen/Mips/largeimmprinting.ll b/test/CodeGen/Mips/largeimmprinting.ll
index 09fee3d..0e9c91f 100644
--- a/test/CodeGen/Mips/largeimmprinting.ll
+++ b/test/CodeGen/Mips/largeimmprinting.ll
@@ -1,6 +1,8 @@
 ; RUN: llc -march=mipsel < %s | FileCheck %s -check-prefix=32
+; RUN: llc -march=mips64el -mcpu=mips4 -mattr=n64 < %s | \
+; RUN:     FileCheck %s -check-prefix=64
 ; RUN: llc -march=mips64el -mcpu=mips64 -mattr=n64 < %s | \
-; RUN: FileCheck %s -check-prefix=64
+; RUN:     FileCheck %s -check-prefix=64
 
 %struct.S1 = type { [65536 x i8] }
 
diff --git a/test/CodeGen/Mips/load-store-left-right.ll b/test/CodeGen/Mips/load-store-left-right.ll
index d0928ee..a3f5ebf 100644
--- a/test/CodeGen/Mips/load-store-left-right.ll
+++ b/test/CodeGen/Mips/load-store-left-right.ll
@@ -1,29 +1,439 @@
-; RUN: llc -march=mipsel < %s | FileCheck  -check-prefix=EL %s
-; RUN: llc -march=mips < %s | FileCheck  -check-prefix=EB %s
+; RUN: llc -march=mipsel   -mcpu=mips32              < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS32 -check-prefix=MIPS32-EL %s
+; RUN: llc -march=mips     -mcpu=mips32              < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS32 -check-prefix=MIPS32-EB %s
+; RUN: llc -march=mipsel   -mcpu=mips32r2            < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS32 -check-prefix=MIPS32-EL %s
+; RUN: llc -march=mips     -mcpu=mips32r2            < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS32 -check-prefix=MIPS32-EB %s
+; RUN: llc -march=mipsel   -mcpu=mips32r6            < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS32R6 -check-prefix=MIPS32R6-EL %s
+; RUN: llc -march=mips     -mcpu=mips32r6            < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS32R6 -check-prefix=MIPS32R6-EB %s
+; RUN: llc -march=mips64el -mcpu=mips4    -mattr=n64 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64 -check-prefix=MIPS64-EL %s
+; RUN: llc -march=mips64   -mcpu=mips4    -mattr=n64 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64 -check-prefix=MIPS64-EB %s
+; RUN: llc -march=mips64el -mcpu=mips64   -mattr=n64 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64 -check-prefix=MIPS64-EL %s
+; RUN: llc -march=mips64   -mcpu=mips64   -mattr=n64 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64 -check-prefix=MIPS64-EB %s
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64 -check-prefix=MIPS64-EL %s
+; RUN: llc -march=mips64   -mcpu=mips64r2 -mattr=n64 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64 -check-prefix=MIPS64-EB %s
+; RUN: llc -march=mips64el -mcpu=mips64r6 -mattr=n64 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64R6 -check-prefix=MIPS64R6-EL %s
+; RUN: llc -march=mips64   -mcpu=mips64r6 -mattr=n64 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64R6 -check-prefix=MIPS64R6-EB %s
 
+%struct.SLL = type { i64 }
 %struct.SI = type { i32 }
+%struct.SUI = type { i32 }
 
+@sll = common global %struct.SLL zeroinitializer, align 1
 @si = common global %struct.SI zeroinitializer, align 1
+@sui = common global %struct.SUI zeroinitializer, align 1
 
-define i32 @foo_load_i() nounwind readonly {
+define i32 @load_SI() nounwind readonly {
 entry:
-; EL: lwl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
-; EL: lwr $[[R0]], 0($[[R1]])
-; EB: lwl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
-; EB: lwr $[[R0]], 3($[[R1]])
+; ALL-LABEL: load_SI:
+
+; MIPS32-EL:     lwl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
+; MIPS32-EL:     lwr $[[R0]], 0($[[R1]])
+
+; MIPS32-EB:     lwl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
+; MIPS32-EB:     lwr $[[R0]], 3($[[R1]])
+
+; MIPS32R6:      lw $[[PTR:[0-9]+]], %got(si)(
+; MIPS32R6:      lw $2, 0($[[PTR]])
+
+; MIPS64-EL:     lwl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
+; MIPS64-EL:     lwr $[[R0]], 0($[[R1]])
+
+; MIPS64-EB:     lwl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
+; MIPS64-EB:     lwr $[[R0]], 3($[[R1]])
+
+; MIPS64R6:      ld $[[PTR:[0-9]+]], %got_disp(si)(
+; MIPS64R6:      lw $2, 0($[[PTR]])
 
   %0 = load i32* getelementptr inbounds (%struct.SI* @si, i32 0, i32 0), align 1
   ret i32 %0
 }
 
-define void @foo_store_i(i32 %a) nounwind {
+define void @store_SI(i32 %a) nounwind {
 entry:
-; EL: swl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
-; EL: swr $[[R0]], 0($[[R1]])
-; EB: swl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
-; EB: swr $[[R0]], 3($[[R1]])
+; ALL-LABEL: store_SI:
+
+; MIPS32-EL:     swl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
+; MIPS32-EL:     swr $[[R0]], 0($[[R1]])
+
+; MIPS32-EB:     swl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
+; MIPS32-EB:     swr $[[R0]], 3($[[R1]])
+
+; MIPS32R6:      lw $[[PTR:[0-9]+]], %got(si)(
+; MIPS32R6:      sw $4, 0($[[PTR]])
+
+; MIPS64-EL:     swl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
+; MIPS64-EL:     swr $[[R0]], 0($[[R1]])
+
+; MIPS64-EB:     swl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
+; MIPS64-EB:     swr $[[R0]], 3($[[R1]])
+
+; MIPS64R6:      ld $[[PTR:[0-9]+]], %got_disp(si)(
+; MIPS64R6:      sw $4, 0($[[PTR]])
 
   store i32 %a, i32* getelementptr inbounds (%struct.SI* @si, i32 0, i32 0), align 1
   ret void
 }
 
+define i64 @load_SLL() nounwind readonly {
+entry:
+; ALL-LABEL: load_SLL:
+
+; MIPS32-EL:     lwl $2, 3($[[R1:[0-9]+]])
+; MIPS32-EL:     lwr $2, 0($[[R1]])
+; MIPS32-EL:     lwl $3, 7($[[R1:[0-9]+]])
+; MIPS32-EL:     lwr $3, 4($[[R1]])
+
+; MIPS32-EB:     lwl $2, 0($[[R1:[0-9]+]])
+; MIPS32-EB:     lwr $2, 3($[[R1]])
+; MIPS32-EB:     lwl $3, 4($[[R1:[0-9]+]])
+; MIPS32-EB:     lwr $3, 7($[[R1]])
+
+; MIPS32R6:      lw $[[PTR:[0-9]+]], %got(sll)(
+; MIPS32R6-DAG:  lw $2, 0($[[PTR]])
+; MIPS32R6-DAG:  lw $3, 4($[[PTR]])
+
+; MIPS64-EL:     ldl $[[R0:[0-9]+]], 7($[[R1:[0-9]+]])
+; MIPS64-EL:     ldr $[[R0]], 0($[[R1]])
+
+; MIPS64-EB:     ldl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
+; MIPS64-EB:     ldr $[[R0]], 7($[[R1]])
+
+; MIPS64R6:      ld $[[PTR:[0-9]+]], %got_disp(sll)(
+; MIPS64R6:      ld $2, 0($[[PTR]])
+
+  %0 = load i64* getelementptr inbounds (%struct.SLL* @sll, i64 0, i32 0), align 1
+  ret i64 %0
+}
+
+define i64 @load_SI_sext_to_i64() nounwind readonly {
+entry:
+; ALL-LABEL: load_SI_sext_to_i64:
+
+; MIPS32-EL:     lwl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
+; MIPS32-EL:     lwr $[[R0]], 0($[[R1]])
+
+; MIPS32-EB:     lwl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
+; MIPS32-EB:     lwr $[[R0]], 3($[[R1]])
+
+; MIPS32R6:      lw $[[PTR:[0-9]+]], %got(si)(
+; MIPS32R6-EL:   lw $2, 0($[[PTR]])
+; MIPS32R6-EL:   sra $3, $2, 31
+; MIPS32R6-EB:   lw $3, 0($[[PTR]])
+; MIPS32R6-EB:   sra $2, $3, 31
+
+; MIPS64-EL:     lwl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
+; MIPS64-EL:     lwr $[[R0]], 0($[[R1]])
+
+; MIPS64-EB:     lwl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
+; MIPS64-EB:     lwr $[[R0]], 3($[[R1]])
+
+; MIPS64R6:      ld $[[PTR:[0-9]+]], %got_disp(si)(
+; MIPS64R6:      lw $2, 0($[[PTR]])
+
+  %0 = load i32* getelementptr inbounds (%struct.SI* @si, i64 0, i32 0), align 1
+  %conv = sext i32 %0 to i64
+  ret i64 %conv
+}
+
+define i64 @load_UI() nounwind readonly {
+entry:
+; ALL-LABEL: load_UI:
+
+; MIPS32-EL-DAG: lwl $[[R2:2]], 3($[[R1:[0-9]+]])
+; MIPS32-EL-DAG: lwr $[[R2]],   0($[[R1]])
+; MIPS32-EL-DAG: addiu $3, $zero, 0
+
+; MIPS32-EB-DAG: lwl $[[R2:3]], 0($[[R1:[0-9]+]])
+; MIPS32-EB-DAG: lwr $[[R2]],   3($[[R1]])
+; MIPS32-EB-DAG: addiu $2, $zero, 0
+
+; MIPS32R6:        lw $[[PTR:[0-9]+]], %got(sui)(
+; MIPS32R6-EL-DAG: lw $2, 0($[[PTR]])
+; MIPS32R6-EL-DAG: addiu $3, $zero, 0
+; MIPS32R6-EB-DAG: lw $3, 0($[[PTR]])
+; MIPS32R6-EB-DAG: addiu $2, $zero, 0
+
+; MIPS64-EL-DAG: lwl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
+; MIPS64-EL-DAG: lwr $[[R0]], 0($[[R1]])
+; MIPS64-EL-DAG: daddiu $[[R2:[0-9]+]], $zero, 1
+; MIPS64-EL-DAG: dsll   $[[R3:[0-9]+]], $[[R2]], 32
+; MIPS64-EL-DAG: daddiu $[[R4:[0-9]+]], $[[R3]], -1
+; MIPS64-EL-DAG: and    ${{[0-9]+}}, $[[R0]], $[[R4]]
+
+; MIPS64-EB:     lwl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
+; MIPS64-EB:     lwr $[[R0]], 3($[[R1]])
+
+; MIPS64R6:      ld $[[PTR:[0-9]+]], %got_disp(sui)(
+; MIPS64R6:      lwu $2, 0($[[PTR]])
+
+  %0 = load i32* getelementptr inbounds (%struct.SUI* @sui, i64 0, i32 0), align 1
+  %conv = zext i32 %0 to i64
+  ret i64 %conv
+}
+
+define void @store_SLL(i64 %a) nounwind {
+entry:
+; ALL-LABEL: store_SLL:
+
+; MIPS32-EL-DAG: swl $[[A1:4]], 3($[[R1:[0-9]+]])
+; MIPS32-EL-DAG: swr $[[A1]],   0($[[R1]])
+; MIPS32-EL-DAG: swl $[[A2:5]], 7($[[R1:[0-9]+]])
+; MIPS32-EL-DAG: swr $[[A2]],   4($[[R1]])
+
+; MIPS32-EB-DAG: swl $[[A1:4]], 0($[[R1:[0-9]+]])
+; MIPS32-EB-DAG: swr $[[A1]],   3($[[R1]])
+; MIPS32-EB-DAG: swl $[[A1:5]], 4($[[R1:[0-9]+]])
+; MIPS32-EB-DAG: swr $[[A1]],   7($[[R1]])
+
+; MIPS32R6-DAG:  lw $[[PTR:[0-9]+]], %got(sll)(
+; MIPS32R6-DAG:  sw $4, 0($[[PTR]])
+; MIPS32R6-DAG:  sw $5, 4($[[PTR]])
+
+; MIPS64-EL:     sdl $[[R0:[0-9]+]], 7($[[R1:[0-9]+]])
+; MIPS64-EL:     sdr $[[R0]], 0($[[R1]])
+
+; MIPS64-EB:     sdl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
+; MIPS64-EB:     sdr $[[R0]], 7($[[R1]])
+
+; MIPS64R6:      ld $[[PTR:[0-9]+]], %got_disp(sll)(
+; MIPS64R6:      sd $4, 0($[[PTR]])
+
+  store i64 %a, i64* getelementptr inbounds (%struct.SLL* @sll, i64 0, i32 0), align 1
+  ret void
+}
+
+define void @store_SI_trunc_from_i64(i32 %a) nounwind {
+entry:
+; ALL-LABEL: store_SI_trunc_from_i64:
+
+; MIPS32-EL:     swl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
+; MIPS32-EL:     swr $[[R0]], 0($[[R1]])
+
+; MIPS32-EB:     swl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
+; MIPS32-EB:     swr $[[R0]], 3($[[R1]])
+
+; MIPS32R6:      lw $[[PTR:[0-9]+]], %got(si)(
+; MIPS32R6:      sw $4, 0($[[PTR]])
+
+; MIPS64-EL:     swl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
+; MIPS64-EL:     swr $[[R0]], 0($[[R1]])
+
+; MIPS64-EB:     swl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
+; MIPS64-EB:     swr $[[R0]], 3($[[R1]])
+
+; MIPS64R6:      ld $[[PTR:[0-9]+]], %got_disp(si)(
+; MIPS64R6:      sw $4, 0($[[PTR]])
+
+  store i32 %a, i32* getelementptr inbounds (%struct.SI* @si, i64 0, i32 0), align 1
+  ret void
+}
+
+;
+; Structures are simply concatenations of the members. They are unaffected by
+; endianness
+;
+
+%struct.S0 = type { i8, i8 }
+@struct_s0 = common global %struct.S0 zeroinitializer, align 1
+%struct.S1 = type { i16, i16 }
+@struct_s1 = common global %struct.S1 zeroinitializer, align 1
+%struct.S2 = type { i32, i32 }
+@struct_s2 = common global %struct.S2 zeroinitializer, align 1
+
+define void @copy_struct_S0() nounwind {
+entry:
+; ALL-LABEL: copy_struct_S0:
+
+; MIPS32-EL:     lw $[[PTR:[0-9]+]], %got(struct_s0)(
+; MIPS32-EB:     lw $[[PTR:[0-9]+]], %got(struct_s0)(
+; MIPS32R6:      lw $[[PTR:[0-9]+]], %got(struct_s0)(
+; MIPS64-EL:     ld $[[PTR:[0-9]+]], %got_disp(struct_s0)(
+; MIPS64-EB:     ld $[[PTR:[0-9]+]], %got_disp(struct_s0)(
+; MIPS64R6:      ld $[[PTR:[0-9]+]], %got_disp(struct_s0)(
+
+; FIXME: We should be able to do better than this on MIPS32r6/MIPS64r6 since
+;        we have unaligned halfword load/store available
+; ALL-DAG:       lbu $[[R1:[0-9]+]], 0($[[PTR]])
+; ALL-DAG:       sb $[[R1]], 2($[[PTR]])
+; ALL-DAG:       lbu $[[R1:[0-9]+]], 1($[[PTR]])
+; ALL-DAG:       sb $[[R1]], 3($[[PTR]])
+
+  %0 = load %struct.S0* getelementptr inbounds (%struct.S0* @struct_s0, i32 0), align 1
+  store %struct.S0 %0, %struct.S0* getelementptr inbounds (%struct.S0* @struct_s0, i32 1), align 1
+  ret void
+}
+
+define void @copy_struct_S1() nounwind {
+entry:
+; ALL-LABEL: copy_struct_S1:
+
+; MIPS32-EL:     lw $[[PTR:[0-9]+]], %got(struct_s1)(
+; MIPS32-EB:     lw $[[PTR:[0-9]+]], %got(struct_s1)(
+; MIPS32-DAG:    lbu $[[R1:[0-9]+]], 0($[[PTR]])
+; MIPS32-DAG:    sb $[[R1]], 4($[[PTR]])
+; MIPS32-DAG:    lbu $[[R1:[0-9]+]], 1($[[PTR]])
+; MIPS32-DAG:    sb $[[R1]], 5($[[PTR]])
+; MIPS32-DAG:    lbu $[[R1:[0-9]+]], 2($[[PTR]])
+; MIPS32-DAG:    sb $[[R1]], 6($[[PTR]])
+; MIPS32-DAG:    lbu $[[R1:[0-9]+]], 3($[[PTR]])
+; MIPS32-DAG:    sb $[[R1]], 7($[[PTR]])
+
+; MIPS32R6:      lw $[[PTR:[0-9]+]], %got(struct_s1)(
+; MIPS32R6-DAG:  lhu $[[R1:[0-9]+]], 0($[[PTR]])
+; MIPS32R6-DAG:  sh $[[R1]], 4($[[PTR]])
+; MIPS32R6-DAG:  lhu $[[R1:[0-9]+]], 2($[[PTR]])
+; MIPS32R6-DAG:  sh $[[R1]], 6($[[PTR]])
+
+; MIPS64-EL:     ld $[[PTR:[0-9]+]], %got_disp(struct_s1)(
+; MIPS64-EB:     ld $[[PTR:[0-9]+]], %got_disp(struct_s1)(
+; MIPS64-DAG:    lbu $[[R1:[0-9]+]], 0($[[PTR]])
+; MIPS64-DAG:    sb $[[R1]], 4($[[PTR]])
+; MIPS64-DAG:    lbu $[[R1:[0-9]+]], 1($[[PTR]])
+; MIPS64-DAG:    sb $[[R1]], 5($[[PTR]])
+; MIPS64-DAG:    lbu $[[R1:[0-9]+]], 2($[[PTR]])
+; MIPS64-DAG:    sb $[[R1]], 6($[[PTR]])
+; MIPS64-DAG:    lbu $[[R1:[0-9]+]], 3($[[PTR]])
+; MIPS64-DAG:    sb $[[R1]], 7($[[PTR]])
+
+; MIPS64R6:      ld $[[PTR:[0-9]+]], %got_disp(struct_s1)(
+; MIPS64R6-DAG:  lhu $[[R1:[0-9]+]], 0($[[PTR]])
+; MIPS64R6-DAG:  sh $[[R1]], 4($[[PTR]])
+; MIPS64R6-DAG:  lhu $[[R1:[0-9]+]], 2($[[PTR]])
+; MIPS64R6-DAG:  sh $[[R1]], 6($[[PTR]])
+
+  %0 = load %struct.S1* getelementptr inbounds (%struct.S1* @struct_s1, i32 0), align 1
+  store %struct.S1 %0, %struct.S1* getelementptr inbounds (%struct.S1* @struct_s1, i32 1), align 1
+  ret void
+}
+
+define void @copy_struct_S2() nounwind {
+entry:
+; ALL-LABEL: copy_struct_S2:
+
+; MIPS32-EL:     lw $[[PTR:[0-9]+]], %got(struct_s2)(
+; MIPS32-EL-DAG: lwl $[[R1:[0-9]+]], 3($[[PTR]])
+; MIPS32-EL-DAG: lwr $[[R1]],        0($[[PTR]])
+; MIPS32-EL-DAG: swl $[[R1]],       11($[[PTR]])
+; MIPS32-EL-DAG: swr $[[R1]],        8($[[PTR]])
+; MIPS32-EL-DAG: lwl $[[R1:[0-9]+]], 7($[[PTR]])
+; MIPS32-EL-DAG: lwr $[[R1]],        4($[[PTR]])
+; MIPS32-EL-DAG: swl $[[R1]],       15($[[PTR]])
+; MIPS32-EL-DAG: swr $[[R1]],       12($[[PTR]])
+
+; MIPS32-EB:     lw $[[PTR:[0-9]+]], %got(struct_s2)(
+; MIPS32-EB-DAG: lwl $[[R1:[0-9]+]], 0($[[PTR]])
+; MIPS32-EB-DAG: lwr $[[R1]],        3($[[PTR]])
+; MIPS32-EB-DAG: swl $[[R1]],        8($[[PTR]])
+; MIPS32-EB-DAG: swr $[[R1]],       11($[[PTR]])
+; MIPS32-EB-DAG: lwl $[[R1:[0-9]+]], 4($[[PTR]])
+; MIPS32-EB-DAG: lwr $[[R1]],        7($[[PTR]])
+; MIPS32-EB-DAG: swl $[[R1]],       12($[[PTR]])
+; MIPS32-EB-DAG: swr $[[R1]],       15($[[PTR]])
+
+; MIPS32R6:      lw $[[PTR:[0-9]+]], %got(struct_s2)(
+; MIPS32R6-DAG:  lw $[[R1:[0-9]+]], 0($[[PTR]])
+; MIPS32R6-DAG:  sw $[[R1]],        8($[[PTR]])
+; MIPS32R6-DAG:  lw $[[R1:[0-9]+]], 4($[[PTR]])
+; MIPS32R6-DAG:  sw $[[R1]],       12($[[PTR]])
+
+; MIPS64-EL:     ld $[[PTR:[0-9]+]], %got_disp(struct_s2)(
+; MIPS64-EL-DAG: lwl $[[R1:[0-9]+]], 3($[[PTR]])
+; MIPS64-EL-DAG: lwr $[[R1]],        0($[[PTR]])
+; MIPS64-EL-DAG: swl $[[R1]],       11($[[PTR]])
+; MIPS64-EL-DAG: swr $[[R1]],        8($[[PTR]])
+; MIPS64-EL-DAG: lwl $[[R1:[0-9]+]], 7($[[PTR]])
+; MIPS64-EL-DAG: lwr $[[R1]],        4($[[PTR]])
+; MIPS64-EL-DAG: swl $[[R1]],       15($[[PTR]])
+; MIPS64-EL-DAG: swr $[[R1]],       12($[[PTR]])
+
+; MIPS64-EB:     ld $[[PTR:[0-9]+]], %got_disp(struct_s2)(
+; MIPS64-EB-DAG: lwl $[[R1:[0-9]+]], 0($[[PTR]])
+; MIPS64-EB-DAG: lwr $[[R1]],        3($[[PTR]])
+; MIPS64-EB-DAG: swl $[[R1]],        8($[[PTR]])
+; MIPS64-EB-DAG: swr $[[R1]],       11($[[PTR]])
+; MIPS64-EB-DAG: lwl $[[R1:[0-9]+]], 4($[[PTR]])
+; MIPS64-EB-DAG: lwr $[[R1]],        7($[[PTR]])
+; MIPS64-EB-DAG: swl $[[R1]],       12($[[PTR]])
+; MIPS64-EB-DAG: swr $[[R1]],       15($[[PTR]])
+
+; MIPS64R6:      ld $[[PTR:[0-9]+]], %got_disp(struct_s2)(
+; MIPS64R6-DAG:  lw $[[R1:[0-9]+]], 0($[[PTR]])
+; MIPS64R6-DAG:  sw $[[R1]],        8($[[PTR]])
+; MIPS64R6-DAG:  lw $[[R1:[0-9]+]], 4($[[PTR]])
+; MIPS64R6-DAG:  sw $[[R1]],       12($[[PTR]])
+
+  %0 = load %struct.S2* getelementptr inbounds (%struct.S2* @struct_s2, i32 0), align 1
+  store %struct.S2 %0, %struct.S2* getelementptr inbounds (%struct.S2* @struct_s2, i32 1), align 1
+  ret void
+}
+
+;
+; Arrays are simply concatenations of the members. They are unaffected by
+; endianness
+;
+
+@arr = common global [7 x i8] zeroinitializer, align 1
+
+define void @pass_array_byval() nounwind {
+entry:
+; ALL-LABEL: pass_array_byval:
+
+; MIPS32-EL:     lw $[[SPTR:[0-9]+]], %got(arr)(
+; MIPS32-EL-DAG: lwl $[[R1:4]], 3($[[PTR]])
+; MIPS32-EL-DAG: lwr $[[R1]],   0($[[PTR]])
+; MIPS32-EL-DAG: lbu $[[R2:[0-9]+]], 4($[[PTR]])
+; MIPS32-EL-DAG: lbu $[[R3:[0-9]+]], 5($[[PTR]])
+; MIPS32-EL-DAG: sll $[[T0:[0-9]+]], $[[R3]], 8
+; MIPS32-EL-DAG: or  $[[T1:[0-9]+]], $[[T0]], $[[R2]]
+; MIPS32-EL-DAG: lbu $[[R4:[0-9]+]], 6($[[PTR]])
+; MIPS32-EL-DAG: sll $[[T2:[0-9]+]], $[[R4]], 16
+; MIPS32-EL-DAG: or  $5, $[[T1]], $[[T2]]
+
+; MIPS32-EB:     lw $[[SPTR:[0-9]+]], %got(arr)(
+; MIPS32-EB-DAG: lwl $[[R1:4]], 0($[[PTR]])
+; MIPS32-EB-DAG: lwr $[[R1]],   3($[[PTR]])
+; MIPS32-EB-DAG: lbu $[[R2:[0-9]+]], 5($[[PTR]])
+; MIPS32-EB-DAG: lbu $[[R3:[0-9]+]], 4($[[PTR]])
+; MIPS32-EB-DAG: sll $[[T0:[0-9]+]], $[[R3]], 8
+; MIPS32-EB-DAG: or  $[[T1:[0-9]+]], $[[T0]], $[[R2]]
+; MIPS32-EB-DAG: sll $[[T1]], $[[T1]], 16
+; MIPS32-EB-DAG: lbu $[[R4:[0-9]+]], 6($[[PTR]])
+; MIPS32-EB-DAG: sll $[[T2:[0-9]+]], $[[R4]], 8
+; MIPS32-EB-DAG: or  $5, $[[T1]], $[[T2]]
+
+; MIPS32R6:        lw $[[SPTR:[0-9]+]], %got(arr)(
+; MIPS32R6-DAG:    lw $4, 0($[[PTR]])
+; MIPS32R6-EL-DAG: lhu $[[R2:[0-9]+]], 4($[[PTR]])
+; MIPS32R6-EL-DAG: lbu $[[R3:[0-9]+]], 6($[[PTR]])
+; MIPS32R6-EL-DAG: sll $[[T0:[0-9]+]], $[[R3]], 16
+; MIPS32R6-EL-DAG: or  $5, $[[R2]], $[[T0]]
+
+; MIPS32R6-EB-DAG: lhu $[[R2:[0-9]+]], 4($[[PTR]])
+; MIPS32R6-EB-DAG: lbu $[[R3:[0-9]+]], 6($[[PTR]])
+; MIPS32R6-EB-DAG: sll $[[T0:[0-9]+]], $[[R2]], 16
+; MIPS32R6-EB-DAG: or  $5, $[[T0]], $[[R3]]
+
+; MIPS64-EL:     ld $[[SPTR:[0-9]+]], %got_disp(arr)(
+; MIPS64-EL-DAG: lwl $[[R1:[0-9]+]], 3($[[PTR]])
+; MIPS64-EL-DAG: lwr $[[R1]],   0($[[PTR]])
+
+; MIPS64-EB:     ld $[[SPTR:[0-9]+]], %got_disp(arr)(
+; MIPS64-EB-DAG: lwl  $[[R1:[0-9]+]], 0($[[PTR]])
+; MIPS64-EB-DAG: lwr  $[[R1]],   3($[[PTR]])
+; MIPS64-EB-DAG: dsll $[[R1]], $[[R1]], 32
+; MIPS64-EB-DAG: lbu  $[[R2:[0-9]+]], 5($[[PTR]])
+; MIPS64-EB-DAG: lbu  $[[R3:[0-9]+]], 4($[[PTR]])
+; MIPS64-EB-DAG: dsll $[[T0:[0-9]+]], $[[R3]], 8
+; MIPS64-EB-DAG: or   $[[T1:[0-9]+]], $[[T0]], $[[R2]]
+; MIPS64-EB-DAG: dsll $[[T1]], $[[T1]], 16
+; MIPS64-EB-DAG: or   $[[T3:[0-9]+]], $[[R1]], $[[T1]]
+; MIPS64-EB-DAG: lbu  $[[R4:[0-9]+]], 6($[[PTR]])
+; MIPS64-EB-DAG: dsll $[[T4:[0-9]+]], $[[R4]], 8
+; MIPS64-EB-DAG: or   $4, $[[T3]], $[[T4]]
+
+; MIPS64R6:      ld $[[SPTR:[0-9]+]], %got_disp(arr)(
+
+  tail call void @extern_func([7 x i8]* byval @arr) nounwind
+  ret void
+}
+
+declare void @extern_func([7 x i8]* byval)
diff --git a/test/CodeGen/Mips/longbranch.ll b/test/CodeGen/Mips/longbranch.ll
index af192d0..c7fe6fd 100644
--- a/test/CodeGen/Mips/longbranch.ll
+++ b/test/CodeGen/Mips/longbranch.ll
@@ -1,35 +1,129 @@
-; RUN: llc -march=mipsel -force-mips-long-branch -disable-mips-delay-filler < %s | FileCheck %s -check-prefix=O32
-; RUN: llc -march=mips64el -mcpu=mips64 -mattr=n64  -force-mips-long-branch -disable-mips-delay-filler < %s | FileCheck %s -check-prefix=N64
+; RUN: llc -march=mipsel < %s | FileCheck %s
+; RUN: llc -march=mipsel -force-mips-long-branch -O3 < %s \
+; RUN:   | FileCheck %s -check-prefix=O32
+; RUN: llc -march=mips64el -mcpu=mips4 -mattr=n64 -force-mips-long-branch -O3 \
+; RUN:   < %s | FileCheck %s -check-prefix=N64
+; RUN: llc -march=mips64el -mcpu=mips64 -mattr=n64 -force-mips-long-branch -O3 \
+; RUN:   < %s | FileCheck %s -check-prefix=N64
+; RUN: llc -march=mipsel -mcpu=mips32r2 -mattr=micromips \
+; RUN:   -force-mips-long-branch -O3 < %s | FileCheck %s -check-prefix=MICROMIPS
 
-@g0 = external global i32
 
-define void @foo1(i32 %s) nounwind {
+@x = external global i32
+
+define void @test1(i32 %s) {
 entry:
-; O32: nop
-; O32: addiu $sp, $sp, -8
-; O32: bal
-; O32: lui $1, 0
-; O32: addiu $1, $1, {{[0-9]+}} 
-; N64: nop
-; N64: daddiu $sp, $sp, -16
-; N64: lui $1, 0
-; N64: daddiu $1, $1, 0
-; N64: dsll $1, $1, 16
-; N64: daddiu $1, $1, 0
-; N64: bal
-; N64: dsll $1, $1, 16
-; N64: daddiu $1, $1, {{[0-9]+}}  
-
-  %tobool = icmp eq i32 %s, 0
-  br i1 %tobool, label %if.end, label %if.then
-
-if.then:                                          ; preds = %entry
-  %0 = load i32* @g0, align 4
-  %add = add nsw i32 %0, 12
-  store i32 %add, i32* @g0, align 4
-  br label %if.end
-
-if.end:                                           ; preds = %entry, %if.then
+  %cmp = icmp eq i32 %s, 0
+  br i1 %cmp, label %end, label %then
+
+then:
+  store i32 1, i32* @x, align 4
+  br label %end
+
+end:
   ret void
-}
 
+
+; First check the normal version (without long branch).  beqz jumps to return,
+; and fallthrough block stores 1 to global variable.
+
+; CHECK:        lui     $[[R0:[0-9]+]], %hi(_gp_disp)
+; CHECK:        addiu   $[[R0]], $[[R0]], %lo(_gp_disp)
+; CHECK:        beqz    $4, $[[BB0:BB[0-9_]+]]
+; CHECK:        addu    $[[GP:[0-9]+]], $[[R0]], $25
+; CHECK:        lw      $[[R1:[0-9]+]], %got(x)($[[GP]])
+; CHECK:        addiu   $[[R2:[0-9]+]], $zero, 1
+; CHECK:        sw      $[[R2]], 0($[[R1]])
+; CHECK:   $[[BB0]]:
+; CHECK:        jr      $ra
+; CHECK:        nop
+
+
+; Check the MIPS32 version.  Check that branch logic is inverted, so that the
+; target of the new branch (bnez) is the fallthrough block of the original
+; branch.  Check that fallthrough block of the new branch contains long branch
+; expansion which at the end indirectly jumps to the target of the original
+; branch.
+
+; O32:        lui     $[[R0:[0-9]+]], %hi(_gp_disp)
+; O32:        addiu   $[[R0]], $[[R0]], %lo(_gp_disp)
+; O32:        bnez    $4, $[[BB0:BB[0-9_]+]]
+; O32:        addu    $[[GP:[0-9]+]], $[[R0]], $25
+
+; Check for long branch expansion:
+; O32:             addiu   $sp, $sp, -8
+; O32-NEXT:        sw      $ra, 0($sp)
+; O32-NEXT:        lui     $1, %hi(($[[BB2:BB[0-9_]+]])-($[[BB1:BB[0-9_]+]]))
+; O32-NEXT:        bal     $[[BB1]]
+; O32-NEXT:        addiu   $1, $1, %lo(($[[BB2]])-($[[BB1]]))
+; O32-NEXT:   $[[BB1]]:
+; O32-NEXT:        addu    $1, $ra, $1
+; O32-NEXT:        lw      $ra, 0($sp)
+; O32-NEXT:        jr      $1
+; O32-NEXT:        addiu   $sp, $sp, 8
+
+; O32:   $[[BB0]]:
+; O32:        lw      $[[R1:[0-9]+]], %got(x)($[[GP]])
+; O32:        addiu   $[[R2:[0-9]+]], $zero, 1
+; O32:        sw      $[[R2]], 0($[[R1]])
+; O32:   $[[BB2]]:
+; O32:        jr      $ra
+; O32:        nop
+
+
+; Check the MIPS64 version.
+
+; N64:        lui     $[[R0:[0-9]+]], %hi(%neg(%gp_rel(test1)))
+; N64:        bnez    $4, $[[BB0:BB[0-9_]+]]
+; N64:        daddu   $[[R1:[0-9]+]], $[[R0]], $25
+
+; Check for long branch expansion:
+; N64:           daddiu  $sp, $sp, -16
+; N64-NEXT:      sd      $ra, 0($sp)
+; N64-NEXT:      daddiu  $1, $zero, %hi(($[[BB2:BB[0-9_]+]])-($[[BB1:BB[0-9_]+]]))
+; N64-NEXT:      dsll    $1, $1, 16
+; N64-NEXT:      bal     $[[BB1]]
+; N64-NEXT:      daddiu  $1, $1, %lo(($[[BB2]])-($[[BB1]]))
+; N64-NEXT:  $[[BB1]]:
+; N64-NEXT:      daddu   $1, $ra, $1
+; N64-NEXT:      ld      $ra, 0($sp)
+; N64-NEXT:      jr      $1
+; N64-NEXT:      daddiu  $sp, $sp, 16
+
+; N64:   $[[BB0]]:
+; N64:        daddiu  $[[GP:[0-9]+]], $[[R1]], %lo(%neg(%gp_rel(test1)))
+; N64:        ld      $[[R2:[0-9]+]], %got_disp(x)($[[GP]])
+; N64:        addiu   $[[R3:[0-9]+]], $zero, 1
+; N64:        sw      $[[R3]], 0($[[R2]])
+; N64:   $[[BB2]]:
+; N64:        jr      $ra
+; N64:        nop
+
+
+; Check the microMIPS version.
+
+; MICROMIPS:        lui     $[[R0:[0-9]+]], %hi(_gp_disp)
+; MICROMIPS:        addiu   $[[R0]], $[[R0]], %lo(_gp_disp)
+; MICROMIPS:        bnez    $4, $[[BB0:BB[0-9_]+]]
+; MICROMIPS:        addu    $[[GP:[0-9]+]], $[[R0]], $25
+
+; Check for long branch expansion:
+; MICROMIPS:          addiu   $sp, $sp, -8
+; MICROMIPS-NEXT:     sw      $ra, 0($sp)
+; MICROMIPS-NEXT:     lui     $1, %hi(($[[BB2:BB[0-9_]+]])-($[[BB1:BB[0-9_]+]]))
+; MICROMIPS-NEXT:     bal     $[[BB1]]
+; MICROMIPS-NEXT:     addiu   $1, $1, %lo(($[[BB2]])-($[[BB1]]))
+; MICROMIPS-NEXT:  $[[BB1]]:
+; MICROMIPS-NEXT:     addu    $1, $ra, $1
+; MICROMIPS-NEXT:     lw      $ra, 0($sp)
+; MICROMIPS-NEXT:     jr      $1
+; MICROMIPS-NEXT:     addiu   $sp, $sp, 8
+
+; MICROMIPS:   $[[BB0]]:
+; MICROMIPS:        lw      $[[R1:[0-9]+]], %got(x)($[[GP]])
+; MICROMIPS:        addiu   $[[R2:[0-9]+]], $zero, 1
+; MICROMIPS:        sw      $[[R2]], 0($[[R1]])
+; MICROMIPS:   $[[BB2]]:
+; MICROMIPS:        jr      $ra
+; MICROMIPS:        nop
+}
diff --git a/test/CodeGen/Mips/micromips-directives.ll b/test/CodeGen/Mips/micromips-directives.ll
new file mode 100644
index 0000000..dd0bd58
--- /dev/null
+++ b/test/CodeGen/Mips/micromips-directives.ll
@@ -0,0 +1,16 @@
+; This test checks if the '.set [no]micromips' directives
+; are emitted before a function's entry label.
+
+; RUN: llc -mtriple mipsel-unknown-linux -mcpu=mips32r2 -mattr=+micromips %s -o - | \
+; RUN:   FileCheck %s -check-prefix=CHECK-MM
+; RUN: llc -mtriple mipsel-unknown-linux -mcpu=mips32r2 -mattr=-micromips %s -o - | \
+; RUN:   FileCheck %s -check-prefix=CHECK-NO-MM
+
+define i32 @main() nounwind {
+entry:
+  ret i32 0
+}
+
+; CHECK-MM: .set micromips
+; CHECK-NO-MM: .set nomicromips
+; CHECK: main:
diff --git a/test/CodeGen/Mips/micromips-long-branch.ll b/test/CodeGen/Mips/micromips-long-branch.ll
deleted file mode 100644
index 3267f4a..0000000
--- a/test/CodeGen/Mips/micromips-long-branch.ll
+++ /dev/null
@@ -1,16437 +0,0 @@
-; RUN: llc %s -march=mipsel -mcpu=mips32r2 -mattr=micromips -filetype=asm \
-; RUN: -relocation-model=pic -O3 -o - | FileCheck %s
-
-@a = common global [10 x i32] zeroinitializer, align 16
-
-; Function Attrs: nounwind uwtable
-define i32 @main() #0 {
-entry:
-  %retval = alloca i32, align 4
-  %i = alloca i32, align 4
-  store i32 0, i32* %retval
-  store i32 0, i32* %i, align 4
-  br label %for.cond
-
-for.cond:
-  %0 = load i32* %i, align 4
-  %cmp = icmp slt i32 %0, 10
-  br i1 %cmp, label %for.body, label %for.end
-
-; CHECK:  addiu $sp, $sp, -8
-; CHECK:  sw  $ra, 0($sp)
-; CHECK:  lui $[[REG1:[0-9]+]], 2
-; CHECK:  addiu $[[REG1]], $[[REG1]], 0
-; CHECK:  addu  $[[REG1]], $ra, $[[REG1]]
-; CHECK:  lw  $ra, 0($sp)
-; CHECK:  jr  $[[REG1]]
-; CHECK:  addiu $sp, $sp, 8
-
-for.body:
-  %1 = load i32* %i, align 4
-  %2 = load i32* %i, align 4
-  %idxprom = sext i32 %2 to i64
-  %arrayidx = getelementptr inbounds [10 x i32]* @a, i32 0, i64 %idxprom
-  store i32 %1, i32* %arrayidx, align 4  %nop0 = alloca i1, i1 0
-  %nop1 = alloca i1, i1 0
-  %nop2 = alloca i1, i1 0
-  %nop3 = alloca i1, i1 0
-  %nop4 = alloca i1, i1 0
-  %nop5 = alloca i1, i1 0
-  %nop6 = alloca i1, i1 0
-  %nop7 = alloca i1, i1 0
-  %nop8 = alloca i1, i1 0
-  %nop9 = alloca i1, i1 0
-  %nop10 = alloca i1, i1 0
-  %nop11 = alloca i1, i1 0
-  %nop12 = alloca i1, i1 0
-  %nop13 = alloca i1, i1 0
-  %nop14 = alloca i1, i1 0
-  %nop15 = alloca i1, i1 0
-  %nop16 = alloca i1, i1 0
-  %nop17 = alloca i1, i1 0
-  %nop18 = alloca i1, i1 0
-  %nop19 = alloca i1, i1 0
-  %nop20 = alloca i1, i1 0
-  %nop21 = alloca i1, i1 0
-  %nop22 = alloca i1, i1 0
-  %nop23 = alloca i1, i1 0
-  %nop24 = alloca i1, i1 0
-  %nop25 = alloca i1, i1 0
-  %nop26 = alloca i1, i1 0
-  %nop27 = alloca i1, i1 0
-  %nop28 = alloca i1, i1 0
-  %nop29 = alloca i1, i1 0
-  %nop30 = alloca i1, i1 0
-  %nop31 = alloca i1, i1 0
-  %nop32 = alloca i1, i1 0
-  %nop33 = alloca i1, i1 0
-  %nop34 = alloca i1, i1 0
-  %nop35 = alloca i1, i1 0
-  %nop36 = alloca i1, i1 0
-  %nop37 = alloca i1, i1 0
-  %nop38 = alloca i1, i1 0
-  %nop39 = alloca i1, i1 0
-  %nop40 = alloca i1, i1 0
-  %nop41 = alloca i1, i1 0
-  %nop42 = alloca i1, i1 0
-  %nop43 = alloca i1, i1 0
-  %nop44 = alloca i1, i1 0
-  %nop45 = alloca i1, i1 0
-  %nop46 = alloca i1, i1 0
-  %nop47 = alloca i1, i1 0
-  %nop48 = alloca i1, i1 0
-  %nop49 = alloca i1, i1 0
-  %nop50 = alloca i1, i1 0
-  %nop51 = alloca i1, i1 0
-  %nop52 = alloca i1, i1 0
-  %nop53 = alloca i1, i1 0
-  %nop54 = alloca i1, i1 0
-  %nop55 = alloca i1, i1 0
-  %nop56 = alloca i1, i1 0
-  %nop57 = alloca i1, i1 0
-  %nop58 = alloca i1, i1 0
-  %nop59 = alloca i1, i1 0
-  %nop60 = alloca i1, i1 0
-  %nop61 = alloca i1, i1 0
-  %nop62 = alloca i1, i1 0
-  %nop63 = alloca i1, i1 0
-  %nop64 = alloca i1, i1 0
-  %nop65 = alloca i1, i1 0
-  %nop66 = alloca i1, i1 0
-  %nop67 = alloca i1, i1 0
-  %nop68 = alloca i1, i1 0
-  %nop69 = alloca i1, i1 0
-  %nop70 = alloca i1, i1 0
-  %nop71 = alloca i1, i1 0
-  %nop72 = alloca i1, i1 0
-  %nop73 = alloca i1, i1 0
-  %nop74 = alloca i1, i1 0
-  %nop75 = alloca i1, i1 0
-  %nop76 = alloca i1, i1 0
-  %nop77 = alloca i1, i1 0
-  %nop78 = alloca i1, i1 0
-  %nop79 = alloca i1, i1 0
-  %nop80 = alloca i1, i1 0
-  %nop81 = alloca i1, i1 0
-  %nop82 = alloca i1, i1 0
-  %nop83 = alloca i1, i1 0
-  %nop84 = alloca i1, i1 0
-  %nop85 = alloca i1, i1 0
-  %nop86 = alloca i1, i1 0
-  %nop87 = alloca i1, i1 0
-  %nop88 = alloca i1, i1 0
-  %nop89 = alloca i1, i1 0
-  %nop90 = alloca i1, i1 0
-  %nop91 = alloca i1, i1 0
-  %nop92 = alloca i1, i1 0
-  %nop93 = alloca i1, i1 0
-  %nop94 = alloca i1, i1 0
-  %nop95 = alloca i1, i1 0
-  %nop96 = alloca i1, i1 0
-  %nop97 = alloca i1, i1 0
-  %nop98 = alloca i1, i1 0
-  %nop99 = alloca i1, i1 0
-  %nop100 = alloca i1, i1 0
-  %nop101 = alloca i1, i1 0
-  %nop102 = alloca i1, i1 0
-  %nop103 = alloca i1, i1 0
-  %nop104 = alloca i1, i1 0
-  %nop105 = alloca i1, i1 0
-  %nop106 = alloca i1, i1 0
-  %nop107 = alloca i1, i1 0
-  %nop108 = alloca i1, i1 0
-  %nop109 = alloca i1, i1 0
-  %nop110 = alloca i1, i1 0
-  %nop111 = alloca i1, i1 0
-  %nop112 = alloca i1, i1 0
-  %nop113 = alloca i1, i1 0
-  %nop114 = alloca i1, i1 0
-  %nop115 = alloca i1, i1 0
-  %nop116 = alloca i1, i1 0
-  %nop117 = alloca i1, i1 0
-  %nop118 = alloca i1, i1 0
-  %nop119 = alloca i1, i1 0
-  %nop120 = alloca i1, i1 0
-  %nop121 = alloca i1, i1 0
-  %nop122 = alloca i1, i1 0
-  %nop123 = alloca i1, i1 0
-  %nop124 = alloca i1, i1 0
-  %nop125 = alloca i1, i1 0
-  %nop126 = alloca i1, i1 0
-  %nop127 = alloca i1, i1 0
-  %nop128 = alloca i1, i1 0
-  %nop129 = alloca i1, i1 0
-  %nop130 = alloca i1, i1 0
-  %nop131 = alloca i1, i1 0
-  %nop132 = alloca i1, i1 0
-  %nop133 = alloca i1, i1 0
-  %nop134 = alloca i1, i1 0
-  %nop135 = alloca i1, i1 0
-  %nop136 = alloca i1, i1 0
-  %nop137 = alloca i1, i1 0
-  %nop138 = alloca i1, i1 0
-  %nop139 = alloca i1, i1 0
-  %nop140 = alloca i1, i1 0
-  %nop141 = alloca i1, i1 0
-  %nop142 = alloca i1, i1 0
-  %nop143 = alloca i1, i1 0
-  %nop144 = alloca i1, i1 0
-  %nop145 = alloca i1, i1 0
-  %nop146 = alloca i1, i1 0
-  %nop147 = alloca i1, i1 0
-  %nop148 = alloca i1, i1 0
-  %nop149 = alloca i1, i1 0
-  %nop150 = alloca i1, i1 0
-  %nop151 = alloca i1, i1 0
-  %nop152 = alloca i1, i1 0
-  %nop153 = alloca i1, i1 0
-  %nop154 = alloca i1, i1 0
-  %nop155 = alloca i1, i1 0
-  %nop156 = alloca i1, i1 0
-  %nop157 = alloca i1, i1 0
-  %nop158 = alloca i1, i1 0
-  %nop159 = alloca i1, i1 0
-  %nop160 = alloca i1, i1 0
-  %nop161 = alloca i1, i1 0
-  %nop162 = alloca i1, i1 0
-  %nop163 = alloca i1, i1 0
-  %nop164 = alloca i1, i1 0
-  %nop165 = alloca i1, i1 0
-  %nop166 = alloca i1, i1 0
-  %nop167 = alloca i1, i1 0
-  %nop168 = alloca i1, i1 0
-  %nop169 = alloca i1, i1 0
-  %nop170 = alloca i1, i1 0
-  %nop171 = alloca i1, i1 0
-  %nop172 = alloca i1, i1 0
-  %nop173 = alloca i1, i1 0
-  %nop174 = alloca i1, i1 0
-  %nop175 = alloca i1, i1 0
-  %nop176 = alloca i1, i1 0
-  %nop177 = alloca i1, i1 0
-  %nop178 = alloca i1, i1 0
-  %nop179 = alloca i1, i1 0
-  %nop180 = alloca i1, i1 0
-  %nop181 = alloca i1, i1 0
-  %nop182 = alloca i1, i1 0
-  %nop183 = alloca i1, i1 0
-  %nop184 = alloca i1, i1 0
-  %nop185 = alloca i1, i1 0
-  %nop186 = alloca i1, i1 0
-  %nop187 = alloca i1, i1 0
-  %nop188 = alloca i1, i1 0
-  %nop189 = alloca i1, i1 0
-  %nop190 = alloca i1, i1 0
-  %nop191 = alloca i1, i1 0
-  %nop192 = alloca i1, i1 0
-  %nop193 = alloca i1, i1 0
-  %nop194 = alloca i1, i1 0
-  %nop195 = alloca i1, i1 0
-  %nop196 = alloca i1, i1 0
-  %nop197 = alloca i1, i1 0
-  %nop198 = alloca i1, i1 0
-  %nop199 = alloca i1, i1 0
-  %nop200 = alloca i1, i1 0
-  %nop201 = alloca i1, i1 0
-  %nop202 = alloca i1, i1 0
-  %nop203 = alloca i1, i1 0
-  %nop204 = alloca i1, i1 0
-  %nop205 = alloca i1, i1 0
-  %nop206 = alloca i1, i1 0
-  %nop207 = alloca i1, i1 0
-  %nop208 = alloca i1, i1 0
-  %nop209 = alloca i1, i1 0
-  %nop210 = alloca i1, i1 0
-  %nop211 = alloca i1, i1 0
-  %nop212 = alloca i1, i1 0
-  %nop213 = alloca i1, i1 0
-  %nop214 = alloca i1, i1 0
-  %nop215 = alloca i1, i1 0
-  %nop216 = alloca i1, i1 0
-  %nop217 = alloca i1, i1 0
-  %nop218 = alloca i1, i1 0
-  %nop219 = alloca i1, i1 0
-  %nop220 = alloca i1, i1 0
-  %nop221 = alloca i1, i1 0
-  %nop222 = alloca i1, i1 0
-  %nop223 = alloca i1, i1 0
-  %nop224 = alloca i1, i1 0
-  %nop225 = alloca i1, i1 0
-  %nop226 = alloca i1, i1 0
-  %nop227 = alloca i1, i1 0
-  %nop228 = alloca i1, i1 0
-  %nop229 = alloca i1, i1 0
-  %nop230 = alloca i1, i1 0
-  %nop231 = alloca i1, i1 0
-  %nop232 = alloca i1, i1 0
-  %nop233 = alloca i1, i1 0
-  %nop234 = alloca i1, i1 0
-  %nop235 = alloca i1, i1 0
-  %nop236 = alloca i1, i1 0
-  %nop237 = alloca i1, i1 0
-  %nop238 = alloca i1, i1 0
-  %nop239 = alloca i1, i1 0
-  %nop240 = alloca i1, i1 0
-  %nop241 = alloca i1, i1 0
-  %nop242 = alloca i1, i1 0
-  %nop243 = alloca i1, i1 0
-  %nop244 = alloca i1, i1 0
-  %nop245 = alloca i1, i1 0
-  %nop246 = alloca i1, i1 0
-  %nop247 = alloca i1, i1 0
-  %nop248 = alloca i1, i1 0
-  %nop249 = alloca i1, i1 0
-  %nop250 = alloca i1, i1 0
-  %nop251 = alloca i1, i1 0
-  %nop252 = alloca i1, i1 0
-  %nop253 = alloca i1, i1 0
-  %nop254 = alloca i1, i1 0
-  %nop255 = alloca i1, i1 0
-  %nop256 = alloca i1, i1 0
-  %nop257 = alloca i1, i1 0
-  %nop258 = alloca i1, i1 0
-  %nop259 = alloca i1, i1 0
-  %nop260 = alloca i1, i1 0
-  %nop261 = alloca i1, i1 0
-  %nop262 = alloca i1, i1 0
-  %nop263 = alloca i1, i1 0
-  %nop264 = alloca i1, i1 0
-  %nop265 = alloca i1, i1 0
-  %nop266 = alloca i1, i1 0
-  %nop267 = alloca i1, i1 0
-  %nop268 = alloca i1, i1 0
-  %nop269 = alloca i1, i1 0
-  %nop270 = alloca i1, i1 0
-  %nop271 = alloca i1, i1 0
-  %nop272 = alloca i1, i1 0
-  %nop273 = alloca i1, i1 0
-  %nop274 = alloca i1, i1 0
-  %nop275 = alloca i1, i1 0
-  %nop276 = alloca i1, i1 0
-  %nop277 = alloca i1, i1 0
-  %nop278 = alloca i1, i1 0
-  %nop279 = alloca i1, i1 0
-  %nop280 = alloca i1, i1 0
-  %nop281 = alloca i1, i1 0
-  %nop282 = alloca i1, i1 0
-  %nop283 = alloca i1, i1 0
-  %nop284 = alloca i1, i1 0
-  %nop285 = alloca i1, i1 0
-  %nop286 = alloca i1, i1 0
-  %nop287 = alloca i1, i1 0
-  %nop288 = alloca i1, i1 0
-  %nop289 = alloca i1, i1 0
-  %nop290 = alloca i1, i1 0
-  %nop291 = alloca i1, i1 0
-  %nop292 = alloca i1, i1 0
-  %nop293 = alloca i1, i1 0
-  %nop294 = alloca i1, i1 0
-  %nop295 = alloca i1, i1 0
-  %nop296 = alloca i1, i1 0
-  %nop297 = alloca i1, i1 0
-  %nop298 = alloca i1, i1 0
-  %nop299 = alloca i1, i1 0
-  %nop300 = alloca i1, i1 0
-  %nop301 = alloca i1, i1 0
-  %nop302 = alloca i1, i1 0
-  %nop303 = alloca i1, i1 0
-  %nop304 = alloca i1, i1 0
-  %nop305 = alloca i1, i1 0
-  %nop306 = alloca i1, i1 0
-  %nop307 = alloca i1, i1 0
-  %nop308 = alloca i1, i1 0
-  %nop309 = alloca i1, i1 0
-  %nop310 = alloca i1, i1 0
-  %nop311 = alloca i1, i1 0
-  %nop312 = alloca i1, i1 0
-  %nop313 = alloca i1, i1 0
-  %nop314 = alloca i1, i1 0
-  %nop315 = alloca i1, i1 0
-  %nop316 = alloca i1, i1 0
-  %nop317 = alloca i1, i1 0
-  %nop318 = alloca i1, i1 0
-  %nop319 = alloca i1, i1 0
-  %nop320 = alloca i1, i1 0
-  %nop321 = alloca i1, i1 0
-  %nop322 = alloca i1, i1 0
-  %nop323 = alloca i1, i1 0
-  %nop324 = alloca i1, i1 0
-  %nop325 = alloca i1, i1 0
-  %nop326 = alloca i1, i1 0
-  %nop327 = alloca i1, i1 0
-  %nop328 = alloca i1, i1 0
-  %nop329 = alloca i1, i1 0
-  %nop330 = alloca i1, i1 0
-  %nop331 = alloca i1, i1 0
-  %nop332 = alloca i1, i1 0
-  %nop333 = alloca i1, i1 0
-  %nop334 = alloca i1, i1 0
-  %nop335 = alloca i1, i1 0
-  %nop336 = alloca i1, i1 0
-  %nop337 = alloca i1, i1 0
-  %nop338 = alloca i1, i1 0
-  %nop339 = alloca i1, i1 0
-  %nop340 = alloca i1, i1 0
-  %nop341 = alloca i1, i1 0
-  %nop342 = alloca i1, i1 0
-  %nop343 = alloca i1, i1 0
-  %nop344 = alloca i1, i1 0
-  %nop345 = alloca i1, i1 0
-  %nop346 = alloca i1, i1 0
-  %nop347 = alloca i1, i1 0
-  %nop348 = alloca i1, i1 0
-  %nop349 = alloca i1, i1 0
-  %nop350 = alloca i1, i1 0
-  %nop351 = alloca i1, i1 0
-  %nop352 = alloca i1, i1 0
-  %nop353 = alloca i1, i1 0
-  %nop354 = alloca i1, i1 0
-  %nop355 = alloca i1, i1 0
-  %nop356 = alloca i1, i1 0
-  %nop357 = alloca i1, i1 0
-  %nop358 = alloca i1, i1 0
-  %nop359 = alloca i1, i1 0
-  %nop360 = alloca i1, i1 0
-  %nop361 = alloca i1, i1 0
-  %nop362 = alloca i1, i1 0
-  %nop363 = alloca i1, i1 0
-  %nop364 = alloca i1, i1 0
-  %nop365 = alloca i1, i1 0
-  %nop366 = alloca i1, i1 0
-  %nop367 = alloca i1, i1 0
-  %nop368 = alloca i1, i1 0
-  %nop369 = alloca i1, i1 0
-  %nop370 = alloca i1, i1 0
-  %nop371 = alloca i1, i1 0
-  %nop372 = alloca i1, i1 0
-  %nop373 = alloca i1, i1 0
-  %nop374 = alloca i1, i1 0
-  %nop375 = alloca i1, i1 0
-  %nop376 = alloca i1, i1 0
-  %nop377 = alloca i1, i1 0
-  %nop378 = alloca i1, i1 0
-  %nop379 = alloca i1, i1 0
-  %nop380 = alloca i1, i1 0
-  %nop381 = alloca i1, i1 0
-  %nop382 = alloca i1, i1 0
-  %nop383 = alloca i1, i1 0
-  %nop384 = alloca i1, i1 0
-  %nop385 = alloca i1, i1 0
-  %nop386 = alloca i1, i1 0
-  %nop387 = alloca i1, i1 0
-  %nop388 = alloca i1, i1 0
-  %nop389 = alloca i1, i1 0
-  %nop390 = alloca i1, i1 0
-  %nop391 = alloca i1, i1 0
-  %nop392 = alloca i1, i1 0
-  %nop393 = alloca i1, i1 0
-  %nop394 = alloca i1, i1 0
-  %nop395 = alloca i1, i1 0
-  %nop396 = alloca i1, i1 0
-  %nop397 = alloca i1, i1 0
-  %nop398 = alloca i1, i1 0
-  %nop399 = alloca i1, i1 0
-  %nop400 = alloca i1, i1 0
-  %nop401 = alloca i1, i1 0
-  %nop402 = alloca i1, i1 0
-  %nop403 = alloca i1, i1 0
-  %nop404 = alloca i1, i1 0
-  %nop405 = alloca i1, i1 0
-  %nop406 = alloca i1, i1 0
-  %nop407 = alloca i1, i1 0
-  %nop408 = alloca i1, i1 0
-  %nop409 = alloca i1, i1 0
-  %nop410 = alloca i1, i1 0
-  %nop411 = alloca i1, i1 0
-  %nop412 = alloca i1, i1 0
-  %nop413 = alloca i1, i1 0
-  %nop414 = alloca i1, i1 0
-  %nop415 = alloca i1, i1 0
-  %nop416 = alloca i1, i1 0
-  %nop417 = alloca i1, i1 0
-  %nop418 = alloca i1, i1 0
-  %nop419 = alloca i1, i1 0
-  %nop420 = alloca i1, i1 0
-  %nop421 = alloca i1, i1 0
-  %nop422 = alloca i1, i1 0
-  %nop423 = alloca i1, i1 0
-  %nop424 = alloca i1, i1 0
-  %nop425 = alloca i1, i1 0
-  %nop426 = alloca i1, i1 0
-  %nop427 = alloca i1, i1 0
-  %nop428 = alloca i1, i1 0
-  %nop429 = alloca i1, i1 0
-  %nop430 = alloca i1, i1 0
-  %nop431 = alloca i1, i1 0
-  %nop432 = alloca i1, i1 0
-  %nop433 = alloca i1, i1 0
-  %nop434 = alloca i1, i1 0
-  %nop435 = alloca i1, i1 0
-  %nop436 = alloca i1, i1 0
-  %nop437 = alloca i1, i1 0
-  %nop438 = alloca i1, i1 0
-  %nop439 = alloca i1, i1 0
-  %nop440 = alloca i1, i1 0
-  %nop441 = alloca i1, i1 0
-  %nop442 = alloca i1, i1 0
-  %nop443 = alloca i1, i1 0
-  %nop444 = alloca i1, i1 0
-  %nop445 = alloca i1, i1 0
-  %nop446 = alloca i1, i1 0
-  %nop447 = alloca i1, i1 0
-  %nop448 = alloca i1, i1 0
-  %nop449 = alloca i1, i1 0
-  %nop450 = alloca i1, i1 0
-  %nop451 = alloca i1, i1 0
-  %nop452 = alloca i1, i1 0
-  %nop453 = alloca i1, i1 0
-  %nop454 = alloca i1, i1 0
-  %nop455 = alloca i1, i1 0
-  %nop456 = alloca i1, i1 0
-  %nop457 = alloca i1, i1 0
-  %nop458 = alloca i1, i1 0
-  %nop459 = alloca i1, i1 0
-  %nop460 = alloca i1, i1 0
-  %nop461 = alloca i1, i1 0
-  %nop462 = alloca i1, i1 0
-  %nop463 = alloca i1, i1 0
-  %nop464 = alloca i1, i1 0
-  %nop465 = alloca i1, i1 0
-  %nop466 = alloca i1, i1 0
-  %nop467 = alloca i1, i1 0
-  %nop468 = alloca i1, i1 0
-  %nop469 = alloca i1, i1 0
-  %nop470 = alloca i1, i1 0
-  %nop471 = alloca i1, i1 0
-  %nop472 = alloca i1, i1 0
-  %nop473 = alloca i1, i1 0
-  %nop474 = alloca i1, i1 0
-  %nop475 = alloca i1, i1 0
-  %nop476 = alloca i1, i1 0
-  %nop477 = alloca i1, i1 0
-  %nop478 = alloca i1, i1 0
-  %nop479 = alloca i1, i1 0
-  %nop480 = alloca i1, i1 0
-  %nop481 = alloca i1, i1 0
-  %nop482 = alloca i1, i1 0
-  %nop483 = alloca i1, i1 0
-  %nop484 = alloca i1, i1 0
-  %nop485 = alloca i1, i1 0
-  %nop486 = alloca i1, i1 0
-  %nop487 = alloca i1, i1 0
-  %nop488 = alloca i1, i1 0
-  %nop489 = alloca i1, i1 0
-  %nop490 = alloca i1, i1 0
-  %nop491 = alloca i1, i1 0
-  %nop492 = alloca i1, i1 0
-  %nop493 = alloca i1, i1 0
-  %nop494 = alloca i1, i1 0
-  %nop495 = alloca i1, i1 0
-  %nop496 = alloca i1, i1 0
-  %nop497 = alloca i1, i1 0
-  %nop498 = alloca i1, i1 0
-  %nop499 = alloca i1, i1 0
-  %nop500 = alloca i1, i1 0
-  %nop501 = alloca i1, i1 0
-  %nop502 = alloca i1, i1 0
-  %nop503 = alloca i1, i1 0
-  %nop504 = alloca i1, i1 0
-  %nop505 = alloca i1, i1 0
-  %nop506 = alloca i1, i1 0
-  %nop507 = alloca i1, i1 0
-  %nop508 = alloca i1, i1 0
-  %nop509 = alloca i1, i1 0
-  %nop510 = alloca i1, i1 0
-  %nop511 = alloca i1, i1 0
-  %nop512 = alloca i1, i1 0
-  %nop513 = alloca i1, i1 0
-  %nop514 = alloca i1, i1 0
-  %nop515 = alloca i1, i1 0
-  %nop516 = alloca i1, i1 0
-  %nop517 = alloca i1, i1 0
-  %nop518 = alloca i1, i1 0
-  %nop519 = alloca i1, i1 0
-  %nop520 = alloca i1, i1 0
-  %nop521 = alloca i1, i1 0
-  %nop522 = alloca i1, i1 0
-  %nop523 = alloca i1, i1 0
-  %nop524 = alloca i1, i1 0
-  %nop525 = alloca i1, i1 0
-  %nop526 = alloca i1, i1 0
-  %nop527 = alloca i1, i1 0
-  %nop528 = alloca i1, i1 0
-  %nop529 = alloca i1, i1 0
-  %nop530 = alloca i1, i1 0
-  %nop531 = alloca i1, i1 0
-  %nop532 = alloca i1, i1 0
-  %nop533 = alloca i1, i1 0
-  %nop534 = alloca i1, i1 0
-  %nop535 = alloca i1, i1 0
-  %nop536 = alloca i1, i1 0
-  %nop537 = alloca i1, i1 0
-  %nop538 = alloca i1, i1 0
-  %nop539 = alloca i1, i1 0
-  %nop540 = alloca i1, i1 0
-  %nop541 = alloca i1, i1 0
-  %nop542 = alloca i1, i1 0
-  %nop543 = alloca i1, i1 0
-  %nop544 = alloca i1, i1 0
-  %nop545 = alloca i1, i1 0
-  %nop546 = alloca i1, i1 0
-  %nop547 = alloca i1, i1 0
-  %nop548 = alloca i1, i1 0
-  %nop549 = alloca i1, i1 0
-  %nop550 = alloca i1, i1 0
-  %nop551 = alloca i1, i1 0
-  %nop552 = alloca i1, i1 0
-  %nop553 = alloca i1, i1 0
-  %nop554 = alloca i1, i1 0
-  %nop555 = alloca i1, i1 0
-  %nop556 = alloca i1, i1 0
-  %nop557 = alloca i1, i1 0
-  %nop558 = alloca i1, i1 0
-  %nop559 = alloca i1, i1 0
-  %nop560 = alloca i1, i1 0
-  %nop561 = alloca i1, i1 0
-  %nop562 = alloca i1, i1 0
-  %nop563 = alloca i1, i1 0
-  %nop564 = alloca i1, i1 0
-  %nop565 = alloca i1, i1 0
-  %nop566 = alloca i1, i1 0
-  %nop567 = alloca i1, i1 0
-  %nop568 = alloca i1, i1 0
-  %nop569 = alloca i1, i1 0
-  %nop570 = alloca i1, i1 0
-  %nop571 = alloca i1, i1 0
-  %nop572 = alloca i1, i1 0
-  %nop573 = alloca i1, i1 0
-  %nop574 = alloca i1, i1 0
-  %nop575 = alloca i1, i1 0
-  %nop576 = alloca i1, i1 0
-  %nop577 = alloca i1, i1 0
-  %nop578 = alloca i1, i1 0
-  %nop579 = alloca i1, i1 0
-  %nop580 = alloca i1, i1 0
-  %nop581 = alloca i1, i1 0
-  %nop582 = alloca i1, i1 0
-  %nop583 = alloca i1, i1 0
-  %nop584 = alloca i1, i1 0
-  %nop585 = alloca i1, i1 0
-  %nop586 = alloca i1, i1 0
-  %nop587 = alloca i1, i1 0
-  %nop588 = alloca i1, i1 0
-  %nop589 = alloca i1, i1 0
-  %nop590 = alloca i1, i1 0
-  %nop591 = alloca i1, i1 0
-  %nop592 = alloca i1, i1 0
-  %nop593 = alloca i1, i1 0
-  %nop594 = alloca i1, i1 0
-  %nop595 = alloca i1, i1 0
-  %nop596 = alloca i1, i1 0
-  %nop597 = alloca i1, i1 0
-  %nop598 = alloca i1, i1 0
-  %nop599 = alloca i1, i1 0
-  %nop600 = alloca i1, i1 0
-  %nop601 = alloca i1, i1 0
-  %nop602 = alloca i1, i1 0
-  %nop603 = alloca i1, i1 0
-  %nop604 = alloca i1, i1 0
-  %nop605 = alloca i1, i1 0
-  %nop606 = alloca i1, i1 0
-  %nop607 = alloca i1, i1 0
-  %nop608 = alloca i1, i1 0
-  %nop609 = alloca i1, i1 0
-  %nop610 = alloca i1, i1 0
-  %nop611 = alloca i1, i1 0
-  %nop612 = alloca i1, i1 0
-  %nop613 = alloca i1, i1 0
-  %nop614 = alloca i1, i1 0
-  %nop615 = alloca i1, i1 0
-  %nop616 = alloca i1, i1 0
-  %nop617 = alloca i1, i1 0
-  %nop618 = alloca i1, i1 0
-  %nop619 = alloca i1, i1 0
-  %nop620 = alloca i1, i1 0
-  %nop621 = alloca i1, i1 0
-  %nop622 = alloca i1, i1 0
-  %nop623 = alloca i1, i1 0
-  %nop624 = alloca i1, i1 0
-  %nop625 = alloca i1, i1 0
-  %nop626 = alloca i1, i1 0
-  %nop627 = alloca i1, i1 0
-  %nop628 = alloca i1, i1 0
-  %nop629 = alloca i1, i1 0
-  %nop630 = alloca i1, i1 0
-  %nop631 = alloca i1, i1 0
-  %nop632 = alloca i1, i1 0
-  %nop633 = alloca i1, i1 0
-  %nop634 = alloca i1, i1 0
-  %nop635 = alloca i1, i1 0
-  %nop636 = alloca i1, i1 0
-  %nop637 = alloca i1, i1 0
-  %nop638 = alloca i1, i1 0
-  %nop639 = alloca i1, i1 0
-  %nop640 = alloca i1, i1 0
-  %nop641 = alloca i1, i1 0
-  %nop642 = alloca i1, i1 0
-  %nop643 = alloca i1, i1 0
-  %nop644 = alloca i1, i1 0
-  %nop645 = alloca i1, i1 0
-  %nop646 = alloca i1, i1 0
-  %nop647 = alloca i1, i1 0
-  %nop648 = alloca i1, i1 0
-  %nop649 = alloca i1, i1 0
-  %nop650 = alloca i1, i1 0
-  %nop651 = alloca i1, i1 0
-  %nop652 = alloca i1, i1 0
-  %nop653 = alloca i1, i1 0
-  %nop654 = alloca i1, i1 0
-  %nop655 = alloca i1, i1 0
-  %nop656 = alloca i1, i1 0
-  %nop657 = alloca i1, i1 0
-  %nop658 = alloca i1, i1 0
-  %nop659 = alloca i1, i1 0
-  %nop660 = alloca i1, i1 0
-  %nop661 = alloca i1, i1 0
-  %nop662 = alloca i1, i1 0
-  %nop663 = alloca i1, i1 0
-  %nop664 = alloca i1, i1 0
-  %nop665 = alloca i1, i1 0
-  %nop666 = alloca i1, i1 0
-  %nop667 = alloca i1, i1 0
-  %nop668 = alloca i1, i1 0
-  %nop669 = alloca i1, i1 0
-  %nop670 = alloca i1, i1 0
-  %nop671 = alloca i1, i1 0
-  %nop672 = alloca i1, i1 0
-  %nop673 = alloca i1, i1 0
-  %nop674 = alloca i1, i1 0
-  %nop675 = alloca i1, i1 0
-  %nop676 = alloca i1, i1 0
-  %nop677 = alloca i1, i1 0
-  %nop678 = alloca i1, i1 0
-  %nop679 = alloca i1, i1 0
-  %nop680 = alloca i1, i1 0
-  %nop681 = alloca i1, i1 0
-  %nop682 = alloca i1, i1 0
-  %nop683 = alloca i1, i1 0
-  %nop684 = alloca i1, i1 0
-  %nop685 = alloca i1, i1 0
-  %nop686 = alloca i1, i1 0
-  %nop687 = alloca i1, i1 0
-  %nop688 = alloca i1, i1 0
-  %nop689 = alloca i1, i1 0
-  %nop690 = alloca i1, i1 0
-  %nop691 = alloca i1, i1 0
-  %nop692 = alloca i1, i1 0
-  %nop693 = alloca i1, i1 0
-  %nop694 = alloca i1, i1 0
-  %nop695 = alloca i1, i1 0
-  %nop696 = alloca i1, i1 0
-  %nop697 = alloca i1, i1 0
-  %nop698 = alloca i1, i1 0
-  %nop699 = alloca i1, i1 0
-  %nop700 = alloca i1, i1 0
-  %nop701 = alloca i1, i1 0
-  %nop702 = alloca i1, i1 0
-  %nop703 = alloca i1, i1 0
-  %nop704 = alloca i1, i1 0
-  %nop705 = alloca i1, i1 0
-  %nop706 = alloca i1, i1 0
-  %nop707 = alloca i1, i1 0
-  %nop708 = alloca i1, i1 0
-  %nop709 = alloca i1, i1 0
-  %nop710 = alloca i1, i1 0
-  %nop711 = alloca i1, i1 0
-  %nop712 = alloca i1, i1 0
-  %nop713 = alloca i1, i1 0
-  %nop714 = alloca i1, i1 0
-  %nop715 = alloca i1, i1 0
-  %nop716 = alloca i1, i1 0
-  %nop717 = alloca i1, i1 0
-  %nop718 = alloca i1, i1 0
-  %nop719 = alloca i1, i1 0
-  %nop720 = alloca i1, i1 0
-  %nop721 = alloca i1, i1 0
-  %nop722 = alloca i1, i1 0
-  %nop723 = alloca i1, i1 0
-  %nop724 = alloca i1, i1 0
-  %nop725 = alloca i1, i1 0
-  %nop726 = alloca i1, i1 0
-  %nop727 = alloca i1, i1 0
-  %nop728 = alloca i1, i1 0
-  %nop729 = alloca i1, i1 0
-  %nop730 = alloca i1, i1 0
-  %nop731 = alloca i1, i1 0
-  %nop732 = alloca i1, i1 0
-  %nop733 = alloca i1, i1 0
-  %nop734 = alloca i1, i1 0
-  %nop735 = alloca i1, i1 0
-  %nop736 = alloca i1, i1 0
-  %nop737 = alloca i1, i1 0
-  %nop738 = alloca i1, i1 0
-  %nop739 = alloca i1, i1 0
-  %nop740 = alloca i1, i1 0
-  %nop741 = alloca i1, i1 0
-  %nop742 = alloca i1, i1 0
-  %nop743 = alloca i1, i1 0
-  %nop744 = alloca i1, i1 0
-  %nop745 = alloca i1, i1 0
-  %nop746 = alloca i1, i1 0
-  %nop747 = alloca i1, i1 0
-  %nop748 = alloca i1, i1 0
-  %nop749 = alloca i1, i1 0
-  %nop750 = alloca i1, i1 0
-  %nop751 = alloca i1, i1 0
-  %nop752 = alloca i1, i1 0
-  %nop753 = alloca i1, i1 0
-  %nop754 = alloca i1, i1 0
-  %nop755 = alloca i1, i1 0
-  %nop756 = alloca i1, i1 0
-  %nop757 = alloca i1, i1 0
-  %nop758 = alloca i1, i1 0
-  %nop759 = alloca i1, i1 0
-  %nop760 = alloca i1, i1 0
-  %nop761 = alloca i1, i1 0
-  %nop762 = alloca i1, i1 0
-  %nop763 = alloca i1, i1 0
-  %nop764 = alloca i1, i1 0
-  %nop765 = alloca i1, i1 0
-  %nop766 = alloca i1, i1 0
-  %nop767 = alloca i1, i1 0
-  %nop768 = alloca i1, i1 0
-  %nop769 = alloca i1, i1 0
-  %nop770 = alloca i1, i1 0
-  %nop771 = alloca i1, i1 0
-  %nop772 = alloca i1, i1 0
-  %nop773 = alloca i1, i1 0
-  %nop774 = alloca i1, i1 0
-  %nop775 = alloca i1, i1 0
-  %nop776 = alloca i1, i1 0
-  %nop777 = alloca i1, i1 0
-  %nop778 = alloca i1, i1 0
-  %nop779 = alloca i1, i1 0
-  %nop780 = alloca i1, i1 0
-  %nop781 = alloca i1, i1 0
-  %nop782 = alloca i1, i1 0
-  %nop783 = alloca i1, i1 0
-  %nop784 = alloca i1, i1 0
-  %nop785 = alloca i1, i1 0
-  %nop786 = alloca i1, i1 0
-  %nop787 = alloca i1, i1 0
-  %nop788 = alloca i1, i1 0
-  %nop789 = alloca i1, i1 0
-  %nop790 = alloca i1, i1 0
-  %nop791 = alloca i1, i1 0
-  %nop792 = alloca i1, i1 0
-  %nop793 = alloca i1, i1 0
-  %nop794 = alloca i1, i1 0
-  %nop795 = alloca i1, i1 0
-  %nop796 = alloca i1, i1 0
-  %nop797 = alloca i1, i1 0
-  %nop798 = alloca i1, i1 0
-  %nop799 = alloca i1, i1 0
-  %nop800 = alloca i1, i1 0
-  %nop801 = alloca i1, i1 0
-  %nop802 = alloca i1, i1 0
-  %nop803 = alloca i1, i1 0
-  %nop804 = alloca i1, i1 0
-  %nop805 = alloca i1, i1 0
-  %nop806 = alloca i1, i1 0
-  %nop807 = alloca i1, i1 0
-  %nop808 = alloca i1, i1 0
-  %nop809 = alloca i1, i1 0
-  %nop810 = alloca i1, i1 0
-  %nop811 = alloca i1, i1 0
-  %nop812 = alloca i1, i1 0
-  %nop813 = alloca i1, i1 0
-  %nop814 = alloca i1, i1 0
-  %nop815 = alloca i1, i1 0
-  %nop816 = alloca i1, i1 0
-  %nop817 = alloca i1, i1 0
-  %nop818 = alloca i1, i1 0
-  %nop819 = alloca i1, i1 0
-  %nop820 = alloca i1, i1 0
-  %nop821 = alloca i1, i1 0
-  %nop822 = alloca i1, i1 0
-  %nop823 = alloca i1, i1 0
-  %nop824 = alloca i1, i1 0
-  %nop825 = alloca i1, i1 0
-  %nop826 = alloca i1, i1 0
-  %nop827 = alloca i1, i1 0
-  %nop828 = alloca i1, i1 0
-  %nop829 = alloca i1, i1 0
-  %nop830 = alloca i1, i1 0
-  %nop831 = alloca i1, i1 0
-  %nop832 = alloca i1, i1 0
-  %nop833 = alloca i1, i1 0
-  %nop834 = alloca i1, i1 0
-  %nop835 = alloca i1, i1 0
-  %nop836 = alloca i1, i1 0
-  %nop837 = alloca i1, i1 0
-  %nop838 = alloca i1, i1 0
-  %nop839 = alloca i1, i1 0
-  %nop840 = alloca i1, i1 0
-  %nop841 = alloca i1, i1 0
-  %nop842 = alloca i1, i1 0
-  %nop843 = alloca i1, i1 0
-  %nop844 = alloca i1, i1 0
-  %nop845 = alloca i1, i1 0
-  %nop846 = alloca i1, i1 0
-  %nop847 = alloca i1, i1 0
-  %nop848 = alloca i1, i1 0
-  %nop849 = alloca i1, i1 0
-  %nop850 = alloca i1, i1 0
-  %nop851 = alloca i1, i1 0
-  %nop852 = alloca i1, i1 0
-  %nop853 = alloca i1, i1 0
-  %nop854 = alloca i1, i1 0
-  %nop855 = alloca i1, i1 0
-  %nop856 = alloca i1, i1 0
-  %nop857 = alloca i1, i1 0
-  %nop858 = alloca i1, i1 0
-  %nop859 = alloca i1, i1 0
-  %nop860 = alloca i1, i1 0
-  %nop861 = alloca i1, i1 0
-  %nop862 = alloca i1, i1 0
-  %nop863 = alloca i1, i1 0
-  %nop864 = alloca i1, i1 0
-  %nop865 = alloca i1, i1 0
-  %nop866 = alloca i1, i1 0
-  %nop867 = alloca i1, i1 0
-  %nop868 = alloca i1, i1 0
-  %nop869 = alloca i1, i1 0
-  %nop870 = alloca i1, i1 0
-  %nop871 = alloca i1, i1 0
-  %nop872 = alloca i1, i1 0
-  %nop873 = alloca i1, i1 0
-  %nop874 = alloca i1, i1 0
-  %nop875 = alloca i1, i1 0
-  %nop876 = alloca i1, i1 0
-  %nop877 = alloca i1, i1 0
-  %nop878 = alloca i1, i1 0
-  %nop879 = alloca i1, i1 0
-  %nop880 = alloca i1, i1 0
-  %nop881 = alloca i1, i1 0
-  %nop882 = alloca i1, i1 0
-  %nop883 = alloca i1, i1 0
-  %nop884 = alloca i1, i1 0
-  %nop885 = alloca i1, i1 0
-  %nop886 = alloca i1, i1 0
-  %nop887 = alloca i1, i1 0
-  %nop888 = alloca i1, i1 0
-  %nop889 = alloca i1, i1 0
-  %nop890 = alloca i1, i1 0
-  %nop891 = alloca i1, i1 0
-  %nop892 = alloca i1, i1 0
-  %nop893 = alloca i1, i1 0
-  %nop894 = alloca i1, i1 0
-  %nop895 = alloca i1, i1 0
-  %nop896 = alloca i1, i1 0
-  %nop897 = alloca i1, i1 0
-  %nop898 = alloca i1, i1 0
-  %nop899 = alloca i1, i1 0
-  %nop900 = alloca i1, i1 0
-  %nop901 = alloca i1, i1 0
-  %nop902 = alloca i1, i1 0
-  %nop903 = alloca i1, i1 0
-  %nop904 = alloca i1, i1 0
-  %nop905 = alloca i1, i1 0
-  %nop906 = alloca i1, i1 0
-  %nop907 = alloca i1, i1 0
-  %nop908 = alloca i1, i1 0
-  %nop909 = alloca i1, i1 0
-  %nop910 = alloca i1, i1 0
-  %nop911 = alloca i1, i1 0
-  %nop912 = alloca i1, i1 0
-  %nop913 = alloca i1, i1 0
-  %nop914 = alloca i1, i1 0
-  %nop915 = alloca i1, i1 0
-  %nop916 = alloca i1, i1 0
-  %nop917 = alloca i1, i1 0
-  %nop918 = alloca i1, i1 0
-  %nop919 = alloca i1, i1 0
-  %nop920 = alloca i1, i1 0
-  %nop921 = alloca i1, i1 0
-  %nop922 = alloca i1, i1 0
-  %nop923 = alloca i1, i1 0
-  %nop924 = alloca i1, i1 0
-  %nop925 = alloca i1, i1 0
-  %nop926 = alloca i1, i1 0
-  %nop927 = alloca i1, i1 0
-  %nop928 = alloca i1, i1 0
-  %nop929 = alloca i1, i1 0
-  %nop930 = alloca i1, i1 0
-  %nop931 = alloca i1, i1 0
-  %nop932 = alloca i1, i1 0
-  %nop933 = alloca i1, i1 0
-  %nop934 = alloca i1, i1 0
-  %nop935 = alloca i1, i1 0
-  %nop936 = alloca i1, i1 0
-  %nop937 = alloca i1, i1 0
-  %nop938 = alloca i1, i1 0
-  %nop939 = alloca i1, i1 0
-  %nop940 = alloca i1, i1 0
-  %nop941 = alloca i1, i1 0
-  %nop942 = alloca i1, i1 0
-  %nop943 = alloca i1, i1 0
-  %nop944 = alloca i1, i1 0
-  %nop945 = alloca i1, i1 0
-  %nop946 = alloca i1, i1 0
-  %nop947 = alloca i1, i1 0
-  %nop948 = alloca i1, i1 0
-  %nop949 = alloca i1, i1 0
-  %nop950 = alloca i1, i1 0
-  %nop951 = alloca i1, i1 0
-  %nop952 = alloca i1, i1 0
-  %nop953 = alloca i1, i1 0
-  %nop954 = alloca i1, i1 0
-  %nop955 = alloca i1, i1 0
-  %nop956 = alloca i1, i1 0
-  %nop957 = alloca i1, i1 0
-  %nop958 = alloca i1, i1 0
-  %nop959 = alloca i1, i1 0
-  %nop960 = alloca i1, i1 0
-  %nop961 = alloca i1, i1 0
-  %nop962 = alloca i1, i1 0
-  %nop963 = alloca i1, i1 0
-  %nop964 = alloca i1, i1 0
-  %nop965 = alloca i1, i1 0
-  %nop966 = alloca i1, i1 0
-  %nop967 = alloca i1, i1 0
-  %nop968 = alloca i1, i1 0
-  %nop969 = alloca i1, i1 0
-  %nop970 = alloca i1, i1 0
-  %nop971 = alloca i1, i1 0
-  %nop972 = alloca i1, i1 0
-  %nop973 = alloca i1, i1 0
-  %nop974 = alloca i1, i1 0
-  %nop975 = alloca i1, i1 0
-  %nop976 = alloca i1, i1 0
-  %nop977 = alloca i1, i1 0
-  %nop978 = alloca i1, i1 0
-  %nop979 = alloca i1, i1 0
-  %nop980 = alloca i1, i1 0
-  %nop981 = alloca i1, i1 0
-  %nop982 = alloca i1, i1 0
-  %nop983 = alloca i1, i1 0
-  %nop984 = alloca i1, i1 0
-  %nop985 = alloca i1, i1 0
-  %nop986 = alloca i1, i1 0
-  %nop987 = alloca i1, i1 0
-  %nop988 = alloca i1, i1 0
-  %nop989 = alloca i1, i1 0
-  %nop990 = alloca i1, i1 0
-  %nop991 = alloca i1, i1 0
-  %nop992 = alloca i1, i1 0
-  %nop993 = alloca i1, i1 0
-  %nop994 = alloca i1, i1 0
-  %nop995 = alloca i1, i1 0
-  %nop996 = alloca i1, i1 0
-  %nop997 = alloca i1, i1 0
-  %nop998 = alloca i1, i1 0
-  %nop999 = alloca i1, i1 0
-  %nop1000 = alloca i1, i1 0
-  %nop1001 = alloca i1, i1 0
-  %nop1002 = alloca i1, i1 0
-  %nop1003 = alloca i1, i1 0
-  %nop1004 = alloca i1, i1 0
-  %nop1005 = alloca i1, i1 0
-  %nop1006 = alloca i1, i1 0
-  %nop1007 = alloca i1, i1 0
-  %nop1008 = alloca i1, i1 0
-  %nop1009 = alloca i1, i1 0
-  %nop1010 = alloca i1, i1 0
-  %nop1011 = alloca i1, i1 0
-  %nop1012 = alloca i1, i1 0
-  %nop1013 = alloca i1, i1 0
-  %nop1014 = alloca i1, i1 0
-  %nop1015 = alloca i1, i1 0
-  %nop1016 = alloca i1, i1 0
-  %nop1017 = alloca i1, i1 0
-  %nop1018 = alloca i1, i1 0
-  %nop1019 = alloca i1, i1 0
-  %nop1020 = alloca i1, i1 0
-  %nop1021 = alloca i1, i1 0
-  %nop1022 = alloca i1, i1 0
-  %nop1023 = alloca i1, i1 0
-  %nop1024 = alloca i1, i1 0
-  %nop1025 = alloca i1, i1 0
-  %nop1026 = alloca i1, i1 0
-  %nop1027 = alloca i1, i1 0
-  %nop1028 = alloca i1, i1 0
-  %nop1029 = alloca i1, i1 0
-  %nop1030 = alloca i1, i1 0
-  %nop1031 = alloca i1, i1 0
-  %nop1032 = alloca i1, i1 0
-  %nop1033 = alloca i1, i1 0
-  %nop1034 = alloca i1, i1 0
-  %nop1035 = alloca i1, i1 0
-  %nop1036 = alloca i1, i1 0
-  %nop1037 = alloca i1, i1 0
-  %nop1038 = alloca i1, i1 0
-  %nop1039 = alloca i1, i1 0
-  %nop1040 = alloca i1, i1 0
-  %nop1041 = alloca i1, i1 0
-  %nop1042 = alloca i1, i1 0
-  %nop1043 = alloca i1, i1 0
-  %nop1044 = alloca i1, i1 0
-  %nop1045 = alloca i1, i1 0
-  %nop1046 = alloca i1, i1 0
-  %nop1047 = alloca i1, i1 0
-  %nop1048 = alloca i1, i1 0
-  %nop1049 = alloca i1, i1 0
-  %nop1050 = alloca i1, i1 0
-  %nop1051 = alloca i1, i1 0
-  %nop1052 = alloca i1, i1 0
-  %nop1053 = alloca i1, i1 0
-  %nop1054 = alloca i1, i1 0
-  %nop1055 = alloca i1, i1 0
-  %nop1056 = alloca i1, i1 0
-  %nop1057 = alloca i1, i1 0
-  %nop1058 = alloca i1, i1 0
-  %nop1059 = alloca i1, i1 0
-  %nop1060 = alloca i1, i1 0
-  %nop1061 = alloca i1, i1 0
-  %nop1062 = alloca i1, i1 0
-  %nop1063 = alloca i1, i1 0
-  %nop1064 = alloca i1, i1 0
-  %nop1065 = alloca i1, i1 0
-  %nop1066 = alloca i1, i1 0
-  %nop1067 = alloca i1, i1 0
-  %nop1068 = alloca i1, i1 0
-  %nop1069 = alloca i1, i1 0
-  %nop1070 = alloca i1, i1 0
-  %nop1071 = alloca i1, i1 0
-  %nop1072 = alloca i1, i1 0
-  %nop1073 = alloca i1, i1 0
-  %nop1074 = alloca i1, i1 0
-  %nop1075 = alloca i1, i1 0
-  %nop1076 = alloca i1, i1 0
-  %nop1077 = alloca i1, i1 0
-  %nop1078 = alloca i1, i1 0
-  %nop1079 = alloca i1, i1 0
-  %nop1080 = alloca i1, i1 0
-  %nop1081 = alloca i1, i1 0
-  %nop1082 = alloca i1, i1 0
-  %nop1083 = alloca i1, i1 0
-  %nop1084 = alloca i1, i1 0
-  %nop1085 = alloca i1, i1 0
-  %nop1086 = alloca i1, i1 0
-  %nop1087 = alloca i1, i1 0
-  %nop1088 = alloca i1, i1 0
-  %nop1089 = alloca i1, i1 0
-  %nop1090 = alloca i1, i1 0
-  %nop1091 = alloca i1, i1 0
-  %nop1092 = alloca i1, i1 0
-  %nop1093 = alloca i1, i1 0
-  %nop1094 = alloca i1, i1 0
-  %nop1095 = alloca i1, i1 0
-  %nop1096 = alloca i1, i1 0
-  %nop1097 = alloca i1, i1 0
-  %nop1098 = alloca i1, i1 0
-  %nop1099 = alloca i1, i1 0
-  %nop1100 = alloca i1, i1 0
-  %nop1101 = alloca i1, i1 0
-  %nop1102 = alloca i1, i1 0
-  %nop1103 = alloca i1, i1 0
-  %nop1104 = alloca i1, i1 0
-  %nop1105 = alloca i1, i1 0
-  %nop1106 = alloca i1, i1 0
-  %nop1107 = alloca i1, i1 0
-  %nop1108 = alloca i1, i1 0
-  %nop1109 = alloca i1, i1 0
-  %nop1110 = alloca i1, i1 0
-  %nop1111 = alloca i1, i1 0
-  %nop1112 = alloca i1, i1 0
-  %nop1113 = alloca i1, i1 0
-  %nop1114 = alloca i1, i1 0
-  %nop1115 = alloca i1, i1 0
-  %nop1116 = alloca i1, i1 0
-  %nop1117 = alloca i1, i1 0
-  %nop1118 = alloca i1, i1 0
-  %nop1119 = alloca i1, i1 0
-  %nop1120 = alloca i1, i1 0
-  %nop1121 = alloca i1, i1 0
-  %nop1122 = alloca i1, i1 0
-  %nop1123 = alloca i1, i1 0
-  %nop1124 = alloca i1, i1 0
-  %nop1125 = alloca i1, i1 0
-  %nop1126 = alloca i1, i1 0
-  %nop1127 = alloca i1, i1 0
-  %nop1128 = alloca i1, i1 0
-  %nop1129 = alloca i1, i1 0
-  %nop1130 = alloca i1, i1 0
-  %nop1131 = alloca i1, i1 0
-  %nop1132 = alloca i1, i1 0
-  %nop1133 = alloca i1, i1 0
-  %nop1134 = alloca i1, i1 0
-  %nop1135 = alloca i1, i1 0
-  %nop1136 = alloca i1, i1 0
-  %nop1137 = alloca i1, i1 0
-  %nop1138 = alloca i1, i1 0
-  %nop1139 = alloca i1, i1 0
-  %nop1140 = alloca i1, i1 0
-  %nop1141 = alloca i1, i1 0
-  %nop1142 = alloca i1, i1 0
-  %nop1143 = alloca i1, i1 0
-  %nop1144 = alloca i1, i1 0
-  %nop1145 = alloca i1, i1 0
-  %nop1146 = alloca i1, i1 0
-  %nop1147 = alloca i1, i1 0
-  %nop1148 = alloca i1, i1 0
-  %nop1149 = alloca i1, i1 0
-  %nop1150 = alloca i1, i1 0
-  %nop1151 = alloca i1, i1 0
-  %nop1152 = alloca i1, i1 0
-  %nop1153 = alloca i1, i1 0
-  %nop1154 = alloca i1, i1 0
-  %nop1155 = alloca i1, i1 0
-  %nop1156 = alloca i1, i1 0
-  %nop1157 = alloca i1, i1 0
-  %nop1158 = alloca i1, i1 0
-  %nop1159 = alloca i1, i1 0
-  %nop1160 = alloca i1, i1 0
-  %nop1161 = alloca i1, i1 0
-  %nop1162 = alloca i1, i1 0
-  %nop1163 = alloca i1, i1 0
-  %nop1164 = alloca i1, i1 0
-  %nop1165 = alloca i1, i1 0
-  %nop1166 = alloca i1, i1 0
-  %nop1167 = alloca i1, i1 0
-  %nop1168 = alloca i1, i1 0
-  %nop1169 = alloca i1, i1 0
-  %nop1170 = alloca i1, i1 0
-  %nop1171 = alloca i1, i1 0
-  %nop1172 = alloca i1, i1 0
-  %nop1173 = alloca i1, i1 0
-  %nop1174 = alloca i1, i1 0
-  %nop1175 = alloca i1, i1 0
-  %nop1176 = alloca i1, i1 0
-  %nop1177 = alloca i1, i1 0
-  %nop1178 = alloca i1, i1 0
-  %nop1179 = alloca i1, i1 0
-  %nop1180 = alloca i1, i1 0
-  %nop1181 = alloca i1, i1 0
-  %nop1182 = alloca i1, i1 0
-  %nop1183 = alloca i1, i1 0
-  %nop1184 = alloca i1, i1 0
-  %nop1185 = alloca i1, i1 0
-  %nop1186 = alloca i1, i1 0
-  %nop1187 = alloca i1, i1 0
-  %nop1188 = alloca i1, i1 0
-  %nop1189 = alloca i1, i1 0
-  %nop1190 = alloca i1, i1 0
-  %nop1191 = alloca i1, i1 0
-  %nop1192 = alloca i1, i1 0
-  %nop1193 = alloca i1, i1 0
-  %nop1194 = alloca i1, i1 0
-  %nop1195 = alloca i1, i1 0
-  %nop1196 = alloca i1, i1 0
-  %nop1197 = alloca i1, i1 0
-  %nop1198 = alloca i1, i1 0
-  %nop1199 = alloca i1, i1 0
-  %nop1200 = alloca i1, i1 0
-  %nop1201 = alloca i1, i1 0
-  %nop1202 = alloca i1, i1 0
-  %nop1203 = alloca i1, i1 0
-  %nop1204 = alloca i1, i1 0
-  %nop1205 = alloca i1, i1 0
-  %nop1206 = alloca i1, i1 0
-  %nop1207 = alloca i1, i1 0
-  %nop1208 = alloca i1, i1 0
-  %nop1209 = alloca i1, i1 0
-  %nop1210 = alloca i1, i1 0
-  %nop1211 = alloca i1, i1 0
-  %nop1212 = alloca i1, i1 0
-  %nop1213 = alloca i1, i1 0
-  %nop1214 = alloca i1, i1 0
-  %nop1215 = alloca i1, i1 0
-  %nop1216 = alloca i1, i1 0
-  %nop1217 = alloca i1, i1 0
-  %nop1218 = alloca i1, i1 0
-  %nop1219 = alloca i1, i1 0
-  %nop1220 = alloca i1, i1 0
-  %nop1221 = alloca i1, i1 0
-  %nop1222 = alloca i1, i1 0
-  %nop1223 = alloca i1, i1 0
-  %nop1224 = alloca i1, i1 0
-  %nop1225 = alloca i1, i1 0
-  %nop1226 = alloca i1, i1 0
-  %nop1227 = alloca i1, i1 0
-  %nop1228 = alloca i1, i1 0
-  %nop1229 = alloca i1, i1 0
-  %nop1230 = alloca i1, i1 0
-  %nop1231 = alloca i1, i1 0
-  %nop1232 = alloca i1, i1 0
-  %nop1233 = alloca i1, i1 0
-  %nop1234 = alloca i1, i1 0
-  %nop1235 = alloca i1, i1 0
-  %nop1236 = alloca i1, i1 0
-  %nop1237 = alloca i1, i1 0
-  %nop1238 = alloca i1, i1 0
-  %nop1239 = alloca i1, i1 0
-  %nop1240 = alloca i1, i1 0
-  %nop1241 = alloca i1, i1 0
-  %nop1242 = alloca i1, i1 0
-  %nop1243 = alloca i1, i1 0
-  %nop1244 = alloca i1, i1 0
-  %nop1245 = alloca i1, i1 0
-  %nop1246 = alloca i1, i1 0
-  %nop1247 = alloca i1, i1 0
-  %nop1248 = alloca i1, i1 0
-  %nop1249 = alloca i1, i1 0
-  %nop1250 = alloca i1, i1 0
-  %nop1251 = alloca i1, i1 0
-  %nop1252 = alloca i1, i1 0
-  %nop1253 = alloca i1, i1 0
-  %nop1254 = alloca i1, i1 0
-  %nop1255 = alloca i1, i1 0
-  %nop1256 = alloca i1, i1 0
-  %nop1257 = alloca i1, i1 0
-  %nop1258 = alloca i1, i1 0
-  %nop1259 = alloca i1, i1 0
-  %nop1260 = alloca i1, i1 0
-  %nop1261 = alloca i1, i1 0
-  %nop1262 = alloca i1, i1 0
-  %nop1263 = alloca i1, i1 0
-  %nop1264 = alloca i1, i1 0
-  %nop1265 = alloca i1, i1 0
-  %nop1266 = alloca i1, i1 0
-  %nop1267 = alloca i1, i1 0
-  %nop1268 = alloca i1, i1 0
-  %nop1269 = alloca i1, i1 0
-  %nop1270 = alloca i1, i1 0
-  %nop1271 = alloca i1, i1 0
-  %nop1272 = alloca i1, i1 0
-  %nop1273 = alloca i1, i1 0
-  %nop1274 = alloca i1, i1 0
-  %nop1275 = alloca i1, i1 0
-  %nop1276 = alloca i1, i1 0
-  %nop1277 = alloca i1, i1 0
-  %nop1278 = alloca i1, i1 0
-  %nop1279 = alloca i1, i1 0
-  %nop1280 = alloca i1, i1 0
-  %nop1281 = alloca i1, i1 0
-  %nop1282 = alloca i1, i1 0
-  %nop1283 = alloca i1, i1 0
-  %nop1284 = alloca i1, i1 0
-  %nop1285 = alloca i1, i1 0
-  %nop1286 = alloca i1, i1 0
-  %nop1287 = alloca i1, i1 0
-  %nop1288 = alloca i1, i1 0
-  %nop1289 = alloca i1, i1 0
-  %nop1290 = alloca i1, i1 0
-  %nop1291 = alloca i1, i1 0
-  %nop1292 = alloca i1, i1 0
-  %nop1293 = alloca i1, i1 0
-  %nop1294 = alloca i1, i1 0
-  %nop1295 = alloca i1, i1 0
-  %nop1296 = alloca i1, i1 0
-  %nop1297 = alloca i1, i1 0
-  %nop1298 = alloca i1, i1 0
-  %nop1299 = alloca i1, i1 0
-  %nop1300 = alloca i1, i1 0
-  %nop1301 = alloca i1, i1 0
-  %nop1302 = alloca i1, i1 0
-  %nop1303 = alloca i1, i1 0
-  %nop1304 = alloca i1, i1 0
-  %nop1305 = alloca i1, i1 0
-  %nop1306 = alloca i1, i1 0
-  %nop1307 = alloca i1, i1 0
-  %nop1308 = alloca i1, i1 0
-  %nop1309 = alloca i1, i1 0
-  %nop1310 = alloca i1, i1 0
-  %nop1311 = alloca i1, i1 0
-  %nop1312 = alloca i1, i1 0
-  %nop1313 = alloca i1, i1 0
-  %nop1314 = alloca i1, i1 0
-  %nop1315 = alloca i1, i1 0
-  %nop1316 = alloca i1, i1 0
-  %nop1317 = alloca i1, i1 0
-  %nop1318 = alloca i1, i1 0
-  %nop1319 = alloca i1, i1 0
-  %nop1320 = alloca i1, i1 0
-  %nop1321 = alloca i1, i1 0
-  %nop1322 = alloca i1, i1 0
-  %nop1323 = alloca i1, i1 0
-  %nop1324 = alloca i1, i1 0
-  %nop1325 = alloca i1, i1 0
-  %nop1326 = alloca i1, i1 0
-  %nop1327 = alloca i1, i1 0
-  %nop1328 = alloca i1, i1 0
-  %nop1329 = alloca i1, i1 0
-  %nop1330 = alloca i1, i1 0
-  %nop1331 = alloca i1, i1 0
-  %nop1332 = alloca i1, i1 0
-  %nop1333 = alloca i1, i1 0
-  %nop1334 = alloca i1, i1 0
-  %nop1335 = alloca i1, i1 0
-  %nop1336 = alloca i1, i1 0
-  %nop1337 = alloca i1, i1 0
-  %nop1338 = alloca i1, i1 0
-  %nop1339 = alloca i1, i1 0
-  %nop1340 = alloca i1, i1 0
-  %nop1341 = alloca i1, i1 0
-  %nop1342 = alloca i1, i1 0
-  %nop1343 = alloca i1, i1 0
-  %nop1344 = alloca i1, i1 0
-  %nop1345 = alloca i1, i1 0
-  %nop1346 = alloca i1, i1 0
-  %nop1347 = alloca i1, i1 0
-  %nop1348 = alloca i1, i1 0
-  %nop1349 = alloca i1, i1 0
-  %nop1350 = alloca i1, i1 0
-  %nop1351 = alloca i1, i1 0
-  %nop1352 = alloca i1, i1 0
-  %nop1353 = alloca i1, i1 0
-  %nop1354 = alloca i1, i1 0
-  %nop1355 = alloca i1, i1 0
-  %nop1356 = alloca i1, i1 0
-  %nop1357 = alloca i1, i1 0
-  %nop1358 = alloca i1, i1 0
-  %nop1359 = alloca i1, i1 0
-  %nop1360 = alloca i1, i1 0
-  %nop1361 = alloca i1, i1 0
-  %nop1362 = alloca i1, i1 0
-  %nop1363 = alloca i1, i1 0
-  %nop1364 = alloca i1, i1 0
-  %nop1365 = alloca i1, i1 0
-  %nop1366 = alloca i1, i1 0
-  %nop1367 = alloca i1, i1 0
-  %nop1368 = alloca i1, i1 0
-  %nop1369 = alloca i1, i1 0
-  %nop1370 = alloca i1, i1 0
-  %nop1371 = alloca i1, i1 0
-  %nop1372 = alloca i1, i1 0
-  %nop1373 = alloca i1, i1 0
-  %nop1374 = alloca i1, i1 0
-  %nop1375 = alloca i1, i1 0
-  %nop1376 = alloca i1, i1 0
-  %nop1377 = alloca i1, i1 0
-  %nop1378 = alloca i1, i1 0
-  %nop1379 = alloca i1, i1 0
-  %nop1380 = alloca i1, i1 0
-  %nop1381 = alloca i1, i1 0
-  %nop1382 = alloca i1, i1 0
-  %nop1383 = alloca i1, i1 0
-  %nop1384 = alloca i1, i1 0
-  %nop1385 = alloca i1, i1 0
-  %nop1386 = alloca i1, i1 0
-  %nop1387 = alloca i1, i1 0
-  %nop1388 = alloca i1, i1 0
-  %nop1389 = alloca i1, i1 0
-  %nop1390 = alloca i1, i1 0
-  %nop1391 = alloca i1, i1 0
-  %nop1392 = alloca i1, i1 0
-  %nop1393 = alloca i1, i1 0
-  %nop1394 = alloca i1, i1 0
-  %nop1395 = alloca i1, i1 0
-  %nop1396 = alloca i1, i1 0
-  %nop1397 = alloca i1, i1 0
-  %nop1398 = alloca i1, i1 0
-  %nop1399 = alloca i1, i1 0
-  %nop1400 = alloca i1, i1 0
-  %nop1401 = alloca i1, i1 0
-  %nop1402 = alloca i1, i1 0
-  %nop1403 = alloca i1, i1 0
-  %nop1404 = alloca i1, i1 0
-  %nop1405 = alloca i1, i1 0
-  %nop1406 = alloca i1, i1 0
-  %nop1407 = alloca i1, i1 0
-  %nop1408 = alloca i1, i1 0
-  %nop1409 = alloca i1, i1 0
-  %nop1410 = alloca i1, i1 0
-  %nop1411 = alloca i1, i1 0
-  %nop1412 = alloca i1, i1 0
-  %nop1413 = alloca i1, i1 0
-  %nop1414 = alloca i1, i1 0
-  %nop1415 = alloca i1, i1 0
-  %nop1416 = alloca i1, i1 0
-  %nop1417 = alloca i1, i1 0
-  %nop1418 = alloca i1, i1 0
-  %nop1419 = alloca i1, i1 0
-  %nop1420 = alloca i1, i1 0
-  %nop1421 = alloca i1, i1 0
-  %nop1422 = alloca i1, i1 0
-  %nop1423 = alloca i1, i1 0
-  %nop1424 = alloca i1, i1 0
-  %nop1425 = alloca i1, i1 0
-  %nop1426 = alloca i1, i1 0
-  %nop1427 = alloca i1, i1 0
-  %nop1428 = alloca i1, i1 0
-  %nop1429 = alloca i1, i1 0
-  %nop1430 = alloca i1, i1 0
-  %nop1431 = alloca i1, i1 0
-  %nop1432 = alloca i1, i1 0
-  %nop1433 = alloca i1, i1 0
-  %nop1434 = alloca i1, i1 0
-  %nop1435 = alloca i1, i1 0
-  %nop1436 = alloca i1, i1 0
-  %nop1437 = alloca i1, i1 0
-  %nop1438 = alloca i1, i1 0
-  %nop1439 = alloca i1, i1 0
-  %nop1440 = alloca i1, i1 0
-  %nop1441 = alloca i1, i1 0
-  %nop1442 = alloca i1, i1 0
-  %nop1443 = alloca i1, i1 0
-  %nop1444 = alloca i1, i1 0
-  %nop1445 = alloca i1, i1 0
-  %nop1446 = alloca i1, i1 0
-  %nop1447 = alloca i1, i1 0
-  %nop1448 = alloca i1, i1 0
-  %nop1449 = alloca i1, i1 0
-  %nop1450 = alloca i1, i1 0
-  %nop1451 = alloca i1, i1 0
-  %nop1452 = alloca i1, i1 0
-  %nop1453 = alloca i1, i1 0
-  %nop1454 = alloca i1, i1 0
-  %nop1455 = alloca i1, i1 0
-  %nop1456 = alloca i1, i1 0
-  %nop1457 = alloca i1, i1 0
-  %nop1458 = alloca i1, i1 0
-  %nop1459 = alloca i1, i1 0
-  %nop1460 = alloca i1, i1 0
-  %nop1461 = alloca i1, i1 0
-  %nop1462 = alloca i1, i1 0
-  %nop1463 = alloca i1, i1 0
-  %nop1464 = alloca i1, i1 0
-  %nop1465 = alloca i1, i1 0
-  %nop1466 = alloca i1, i1 0
-  %nop1467 = alloca i1, i1 0
-  %nop1468 = alloca i1, i1 0
-  %nop1469 = alloca i1, i1 0
-  %nop1470 = alloca i1, i1 0
-  %nop1471 = alloca i1, i1 0
-  %nop1472 = alloca i1, i1 0
-  %nop1473 = alloca i1, i1 0
-  %nop1474 = alloca i1, i1 0
-  %nop1475 = alloca i1, i1 0
-  %nop1476 = alloca i1, i1 0
-  %nop1477 = alloca i1, i1 0
-  %nop1478 = alloca i1, i1 0
-  %nop1479 = alloca i1, i1 0
-  %nop1480 = alloca i1, i1 0
-  %nop1481 = alloca i1, i1 0
-  %nop1482 = alloca i1, i1 0
-  %nop1483 = alloca i1, i1 0
-  %nop1484 = alloca i1, i1 0
-  %nop1485 = alloca i1, i1 0
-  %nop1486 = alloca i1, i1 0
-  %nop1487 = alloca i1, i1 0
-  %nop1488 = alloca i1, i1 0
-  %nop1489 = alloca i1, i1 0
-  %nop1490 = alloca i1, i1 0
-  %nop1491 = alloca i1, i1 0
-  %nop1492 = alloca i1, i1 0
-  %nop1493 = alloca i1, i1 0
-  %nop1494 = alloca i1, i1 0
-  %nop1495 = alloca i1, i1 0
-  %nop1496 = alloca i1, i1 0
-  %nop1497 = alloca i1, i1 0
-  %nop1498 = alloca i1, i1 0
-  %nop1499 = alloca i1, i1 0
-  %nop1500 = alloca i1, i1 0
-  %nop1501 = alloca i1, i1 0
-  %nop1502 = alloca i1, i1 0
-  %nop1503 = alloca i1, i1 0
-  %nop1504 = alloca i1, i1 0
-  %nop1505 = alloca i1, i1 0
-  %nop1506 = alloca i1, i1 0
-  %nop1507 = alloca i1, i1 0
-  %nop1508 = alloca i1, i1 0
-  %nop1509 = alloca i1, i1 0
-  %nop1510 = alloca i1, i1 0
-  %nop1511 = alloca i1, i1 0
-  %nop1512 = alloca i1, i1 0
-  %nop1513 = alloca i1, i1 0
-  %nop1514 = alloca i1, i1 0
-  %nop1515 = alloca i1, i1 0
-  %nop1516 = alloca i1, i1 0
-  %nop1517 = alloca i1, i1 0
-  %nop1518 = alloca i1, i1 0
-  %nop1519 = alloca i1, i1 0
-  %nop1520 = alloca i1, i1 0
-  %nop1521 = alloca i1, i1 0
-  %nop1522 = alloca i1, i1 0
-  %nop1523 = alloca i1, i1 0
-  %nop1524 = alloca i1, i1 0
-  %nop1525 = alloca i1, i1 0
-  %nop1526 = alloca i1, i1 0
-  %nop1527 = alloca i1, i1 0
-  %nop1528 = alloca i1, i1 0
-  %nop1529 = alloca i1, i1 0
-  %nop1530 = alloca i1, i1 0
-  %nop1531 = alloca i1, i1 0
-  %nop1532 = alloca i1, i1 0
-  %nop1533 = alloca i1, i1 0
-  %nop1534 = alloca i1, i1 0
-  %nop1535 = alloca i1, i1 0
-  %nop1536 = alloca i1, i1 0
-  %nop1537 = alloca i1, i1 0
-  %nop1538 = alloca i1, i1 0
-  %nop1539 = alloca i1, i1 0
-  %nop1540 = alloca i1, i1 0
-  %nop1541 = alloca i1, i1 0
-  %nop1542 = alloca i1, i1 0
-  %nop1543 = alloca i1, i1 0
-  %nop1544 = alloca i1, i1 0
-  %nop1545 = alloca i1, i1 0
-  %nop1546 = alloca i1, i1 0
-  %nop1547 = alloca i1, i1 0
-  %nop1548 = alloca i1, i1 0
-  %nop1549 = alloca i1, i1 0
-  %nop1550 = alloca i1, i1 0
-  %nop1551 = alloca i1, i1 0
-  %nop1552 = alloca i1, i1 0
-  %nop1553 = alloca i1, i1 0
-  %nop1554 = alloca i1, i1 0
-  %nop1555 = alloca i1, i1 0
-  %nop1556 = alloca i1, i1 0
-  %nop1557 = alloca i1, i1 0
-  %nop1558 = alloca i1, i1 0
-  %nop1559 = alloca i1, i1 0
-  %nop1560 = alloca i1, i1 0
-  %nop1561 = alloca i1, i1 0
-  %nop1562 = alloca i1, i1 0
-  %nop1563 = alloca i1, i1 0
-  %nop1564 = alloca i1, i1 0
-  %nop1565 = alloca i1, i1 0
-  %nop1566 = alloca i1, i1 0
-  %nop1567 = alloca i1, i1 0
-  %nop1568 = alloca i1, i1 0
-  %nop1569 = alloca i1, i1 0
-  %nop1570 = alloca i1, i1 0
-  %nop1571 = alloca i1, i1 0
-  %nop1572 = alloca i1, i1 0
-  %nop1573 = alloca i1, i1 0
-  %nop1574 = alloca i1, i1 0
-  %nop1575 = alloca i1, i1 0
-  %nop1576 = alloca i1, i1 0
-  %nop1577 = alloca i1, i1 0
-  %nop1578 = alloca i1, i1 0
-  %nop1579 = alloca i1, i1 0
-  %nop1580 = alloca i1, i1 0
-  %nop1581 = alloca i1, i1 0
-  %nop1582 = alloca i1, i1 0
-  %nop1583 = alloca i1, i1 0
-  %nop1584 = alloca i1, i1 0
-  %nop1585 = alloca i1, i1 0
-  %nop1586 = alloca i1, i1 0
-  %nop1587 = alloca i1, i1 0
-  %nop1588 = alloca i1, i1 0
-  %nop1589 = alloca i1, i1 0
-  %nop1590 = alloca i1, i1 0
-  %nop1591 = alloca i1, i1 0
-  %nop1592 = alloca i1, i1 0
-  %nop1593 = alloca i1, i1 0
-  %nop1594 = alloca i1, i1 0
-  %nop1595 = alloca i1, i1 0
-  %nop1596 = alloca i1, i1 0
-  %nop1597 = alloca i1, i1 0
-  %nop1598 = alloca i1, i1 0
-  %nop1599 = alloca i1, i1 0
-  %nop1600 = alloca i1, i1 0
-  %nop1601 = alloca i1, i1 0
-  %nop1602 = alloca i1, i1 0
-  %nop1603 = alloca i1, i1 0
-  %nop1604 = alloca i1, i1 0
-  %nop1605 = alloca i1, i1 0
-  %nop1606 = alloca i1, i1 0
-  %nop1607 = alloca i1, i1 0
-  %nop1608 = alloca i1, i1 0
-  %nop1609 = alloca i1, i1 0
-  %nop1610 = alloca i1, i1 0
-  %nop1611 = alloca i1, i1 0
-  %nop1612 = alloca i1, i1 0
-  %nop1613 = alloca i1, i1 0
-  %nop1614 = alloca i1, i1 0
-  %nop1615 = alloca i1, i1 0
-  %nop1616 = alloca i1, i1 0
-  %nop1617 = alloca i1, i1 0
-  %nop1618 = alloca i1, i1 0
-  %nop1619 = alloca i1, i1 0
-  %nop1620 = alloca i1, i1 0
-  %nop1621 = alloca i1, i1 0
-  %nop1622 = alloca i1, i1 0
-  %nop1623 = alloca i1, i1 0
-  %nop1624 = alloca i1, i1 0
-  %nop1625 = alloca i1, i1 0
-  %nop1626 = alloca i1, i1 0
-  %nop1627 = alloca i1, i1 0
-  %nop1628 = alloca i1, i1 0
-  %nop1629 = alloca i1, i1 0
-  %nop1630 = alloca i1, i1 0
-  %nop1631 = alloca i1, i1 0
-  %nop1632 = alloca i1, i1 0
-  %nop1633 = alloca i1, i1 0
-  %nop1634 = alloca i1, i1 0
-  %nop1635 = alloca i1, i1 0
-  %nop1636 = alloca i1, i1 0
-  %nop1637 = alloca i1, i1 0
-  %nop1638 = alloca i1, i1 0
-  %nop1639 = alloca i1, i1 0
-  %nop1640 = alloca i1, i1 0
-  %nop1641 = alloca i1, i1 0
-  %nop1642 = alloca i1, i1 0
-  %nop1643 = alloca i1, i1 0
-  %nop1644 = alloca i1, i1 0
-  %nop1645 = alloca i1, i1 0
-  %nop1646 = alloca i1, i1 0
-  %nop1647 = alloca i1, i1 0
-  %nop1648 = alloca i1, i1 0
-  %nop1649 = alloca i1, i1 0
-  %nop1650 = alloca i1, i1 0
-  %nop1651 = alloca i1, i1 0
-  %nop1652 = alloca i1, i1 0
-  %nop1653 = alloca i1, i1 0
-  %nop1654 = alloca i1, i1 0
-  %nop1655 = alloca i1, i1 0
-  %nop1656 = alloca i1, i1 0
-  %nop1657 = alloca i1, i1 0
-  %nop1658 = alloca i1, i1 0
-  %nop1659 = alloca i1, i1 0
-  %nop1660 = alloca i1, i1 0
-  %nop1661 = alloca i1, i1 0
-  %nop1662 = alloca i1, i1 0
-  %nop1663 = alloca i1, i1 0
-  %nop1664 = alloca i1, i1 0
-  %nop1665 = alloca i1, i1 0
-  %nop1666 = alloca i1, i1 0
-  %nop1667 = alloca i1, i1 0
-  %nop1668 = alloca i1, i1 0
-  %nop1669 = alloca i1, i1 0
-  %nop1670 = alloca i1, i1 0
-  %nop1671 = alloca i1, i1 0
-  %nop1672 = alloca i1, i1 0
-  %nop1673 = alloca i1, i1 0
-  %nop1674 = alloca i1, i1 0
-  %nop1675 = alloca i1, i1 0
-  %nop1676 = alloca i1, i1 0
-  %nop1677 = alloca i1, i1 0
-  %nop1678 = alloca i1, i1 0
-  %nop1679 = alloca i1, i1 0
-  %nop1680 = alloca i1, i1 0
-  %nop1681 = alloca i1, i1 0
-  %nop1682 = alloca i1, i1 0
-  %nop1683 = alloca i1, i1 0
-  %nop1684 = alloca i1, i1 0
-  %nop1685 = alloca i1, i1 0
-  %nop1686 = alloca i1, i1 0
-  %nop1687 = alloca i1, i1 0
-  %nop1688 = alloca i1, i1 0
-  %nop1689 = alloca i1, i1 0
-  %nop1690 = alloca i1, i1 0
-  %nop1691 = alloca i1, i1 0
-  %nop1692 = alloca i1, i1 0
-  %nop1693 = alloca i1, i1 0
-  %nop1694 = alloca i1, i1 0
-  %nop1695 = alloca i1, i1 0
-  %nop1696 = alloca i1, i1 0
-  %nop1697 = alloca i1, i1 0
-  %nop1698 = alloca i1, i1 0
-  %nop1699 = alloca i1, i1 0
-  %nop1700 = alloca i1, i1 0
-  %nop1701 = alloca i1, i1 0
-  %nop1702 = alloca i1, i1 0
-  %nop1703 = alloca i1, i1 0
-  %nop1704 = alloca i1, i1 0
-  %nop1705 = alloca i1, i1 0
-  %nop1706 = alloca i1, i1 0
-  %nop1707 = alloca i1, i1 0
-  %nop1708 = alloca i1, i1 0
-  %nop1709 = alloca i1, i1 0
-  %nop1710 = alloca i1, i1 0
-  %nop1711 = alloca i1, i1 0
-  %nop1712 = alloca i1, i1 0
-  %nop1713 = alloca i1, i1 0
-  %nop1714 = alloca i1, i1 0
-  %nop1715 = alloca i1, i1 0
-  %nop1716 = alloca i1, i1 0
-  %nop1717 = alloca i1, i1 0
-  %nop1718 = alloca i1, i1 0
-  %nop1719 = alloca i1, i1 0
-  %nop1720 = alloca i1, i1 0
-  %nop1721 = alloca i1, i1 0
-  %nop1722 = alloca i1, i1 0
-  %nop1723 = alloca i1, i1 0
-  %nop1724 = alloca i1, i1 0
-  %nop1725 = alloca i1, i1 0
-  %nop1726 = alloca i1, i1 0
-  %nop1727 = alloca i1, i1 0
-  %nop1728 = alloca i1, i1 0
-  %nop1729 = alloca i1, i1 0
-  %nop1730 = alloca i1, i1 0
-  %nop1731 = alloca i1, i1 0
-  %nop1732 = alloca i1, i1 0
-  %nop1733 = alloca i1, i1 0
-  %nop1734 = alloca i1, i1 0
-  %nop1735 = alloca i1, i1 0
-  %nop1736 = alloca i1, i1 0
-  %nop1737 = alloca i1, i1 0
-  %nop1738 = alloca i1, i1 0
-  %nop1739 = alloca i1, i1 0
-  %nop1740 = alloca i1, i1 0
-  %nop1741 = alloca i1, i1 0
-  %nop1742 = alloca i1, i1 0
-  %nop1743 = alloca i1, i1 0
-  %nop1744 = alloca i1, i1 0
-  %nop1745 = alloca i1, i1 0
-  %nop1746 = alloca i1, i1 0
-  %nop1747 = alloca i1, i1 0
-  %nop1748 = alloca i1, i1 0
-  %nop1749 = alloca i1, i1 0
-  %nop1750 = alloca i1, i1 0
-  %nop1751 = alloca i1, i1 0
-  %nop1752 = alloca i1, i1 0
-  %nop1753 = alloca i1, i1 0
-  %nop1754 = alloca i1, i1 0
-  %nop1755 = alloca i1, i1 0
-  %nop1756 = alloca i1, i1 0
-  %nop1757 = alloca i1, i1 0
-  %nop1758 = alloca i1, i1 0
-  %nop1759 = alloca i1, i1 0
-  %nop1760 = alloca i1, i1 0
-  %nop1761 = alloca i1, i1 0
-  %nop1762 = alloca i1, i1 0
-  %nop1763 = alloca i1, i1 0
-  %nop1764 = alloca i1, i1 0
-  %nop1765 = alloca i1, i1 0
-  %nop1766 = alloca i1, i1 0
-  %nop1767 = alloca i1, i1 0
-  %nop1768 = alloca i1, i1 0
-  %nop1769 = alloca i1, i1 0
-  %nop1770 = alloca i1, i1 0
-  %nop1771 = alloca i1, i1 0
-  %nop1772 = alloca i1, i1 0
-  %nop1773 = alloca i1, i1 0
-  %nop1774 = alloca i1, i1 0
-  %nop1775 = alloca i1, i1 0
-  %nop1776 = alloca i1, i1 0
-  %nop1777 = alloca i1, i1 0
-  %nop1778 = alloca i1, i1 0
-  %nop1779 = alloca i1, i1 0
-  %nop1780 = alloca i1, i1 0
-  %nop1781 = alloca i1, i1 0
-  %nop1782 = alloca i1, i1 0
-  %nop1783 = alloca i1, i1 0
-  %nop1784 = alloca i1, i1 0
-  %nop1785 = alloca i1, i1 0
-  %nop1786 = alloca i1, i1 0
-  %nop1787 = alloca i1, i1 0
-  %nop1788 = alloca i1, i1 0
-  %nop1789 = alloca i1, i1 0
-  %nop1790 = alloca i1, i1 0
-  %nop1791 = alloca i1, i1 0
-  %nop1792 = alloca i1, i1 0
-  %nop1793 = alloca i1, i1 0
-  %nop1794 = alloca i1, i1 0
-  %nop1795 = alloca i1, i1 0
-  %nop1796 = alloca i1, i1 0
-  %nop1797 = alloca i1, i1 0
-  %nop1798 = alloca i1, i1 0
-  %nop1799 = alloca i1, i1 0
-  %nop1800 = alloca i1, i1 0
-  %nop1801 = alloca i1, i1 0
-  %nop1802 = alloca i1, i1 0
-  %nop1803 = alloca i1, i1 0
-  %nop1804 = alloca i1, i1 0
-  %nop1805 = alloca i1, i1 0
-  %nop1806 = alloca i1, i1 0
-  %nop1807 = alloca i1, i1 0
-  %nop1808 = alloca i1, i1 0
-  %nop1809 = alloca i1, i1 0
-  %nop1810 = alloca i1, i1 0
-  %nop1811 = alloca i1, i1 0
-  %nop1812 = alloca i1, i1 0
-  %nop1813 = alloca i1, i1 0
-  %nop1814 = alloca i1, i1 0
-  %nop1815 = alloca i1, i1 0
-  %nop1816 = alloca i1, i1 0
-  %nop1817 = alloca i1, i1 0
-  %nop1818 = alloca i1, i1 0
-  %nop1819 = alloca i1, i1 0
-  %nop1820 = alloca i1, i1 0
-  %nop1821 = alloca i1, i1 0
-  %nop1822 = alloca i1, i1 0
-  %nop1823 = alloca i1, i1 0
-  %nop1824 = alloca i1, i1 0
-  %nop1825 = alloca i1, i1 0
-  %nop1826 = alloca i1, i1 0
-  %nop1827 = alloca i1, i1 0
-  %nop1828 = alloca i1, i1 0
-  %nop1829 = alloca i1, i1 0
-  %nop1830 = alloca i1, i1 0
-  %nop1831 = alloca i1, i1 0
-  %nop1832 = alloca i1, i1 0
-  %nop1833 = alloca i1, i1 0
-  %nop1834 = alloca i1, i1 0
-  %nop1835 = alloca i1, i1 0
-  %nop1836 = alloca i1, i1 0
-  %nop1837 = alloca i1, i1 0
-  %nop1838 = alloca i1, i1 0
-  %nop1839 = alloca i1, i1 0
-  %nop1840 = alloca i1, i1 0
-  %nop1841 = alloca i1, i1 0
-  %nop1842 = alloca i1, i1 0
-  %nop1843 = alloca i1, i1 0
-  %nop1844 = alloca i1, i1 0
-  %nop1845 = alloca i1, i1 0
-  %nop1846 = alloca i1, i1 0
-  %nop1847 = alloca i1, i1 0
-  %nop1848 = alloca i1, i1 0
-  %nop1849 = alloca i1, i1 0
-  %nop1850 = alloca i1, i1 0
-  %nop1851 = alloca i1, i1 0
-  %nop1852 = alloca i1, i1 0
-  %nop1853 = alloca i1, i1 0
-  %nop1854 = alloca i1, i1 0
-  %nop1855 = alloca i1, i1 0
-  %nop1856 = alloca i1, i1 0
-  %nop1857 = alloca i1, i1 0
-  %nop1858 = alloca i1, i1 0
-  %nop1859 = alloca i1, i1 0
-  %nop1860 = alloca i1, i1 0
-  %nop1861 = alloca i1, i1 0
-  %nop1862 = alloca i1, i1 0
-  %nop1863 = alloca i1, i1 0
-  %nop1864 = alloca i1, i1 0
-  %nop1865 = alloca i1, i1 0
-  %nop1866 = alloca i1, i1 0
-  %nop1867 = alloca i1, i1 0
-  %nop1868 = alloca i1, i1 0
-  %nop1869 = alloca i1, i1 0
-  %nop1870 = alloca i1, i1 0
-  %nop1871 = alloca i1, i1 0
-  %nop1872 = alloca i1, i1 0
-  %nop1873 = alloca i1, i1 0
-  %nop1874 = alloca i1, i1 0
-  %nop1875 = alloca i1, i1 0
-  %nop1876 = alloca i1, i1 0
-  %nop1877 = alloca i1, i1 0
-  %nop1878 = alloca i1, i1 0
-  %nop1879 = alloca i1, i1 0
-  %nop1880 = alloca i1, i1 0
-  %nop1881 = alloca i1, i1 0
-  %nop1882 = alloca i1, i1 0
-  %nop1883 = alloca i1, i1 0
-  %nop1884 = alloca i1, i1 0
-  %nop1885 = alloca i1, i1 0
-  %nop1886 = alloca i1, i1 0
-  %nop1887 = alloca i1, i1 0
-  %nop1888 = alloca i1, i1 0
-  %nop1889 = alloca i1, i1 0
-  %nop1890 = alloca i1, i1 0
-  %nop1891 = alloca i1, i1 0
-  %nop1892 = alloca i1, i1 0
-  %nop1893 = alloca i1, i1 0
-  %nop1894 = alloca i1, i1 0
-  %nop1895 = alloca i1, i1 0
-  %nop1896 = alloca i1, i1 0
-  %nop1897 = alloca i1, i1 0
-  %nop1898 = alloca i1, i1 0
-  %nop1899 = alloca i1, i1 0
-  %nop1900 = alloca i1, i1 0
-  %nop1901 = alloca i1, i1 0
-  %nop1902 = alloca i1, i1 0
-  %nop1903 = alloca i1, i1 0
-  %nop1904 = alloca i1, i1 0
-  %nop1905 = alloca i1, i1 0
-  %nop1906 = alloca i1, i1 0
-  %nop1907 = alloca i1, i1 0
-  %nop1908 = alloca i1, i1 0
-  %nop1909 = alloca i1, i1 0
-  %nop1910 = alloca i1, i1 0
-  %nop1911 = alloca i1, i1 0
-  %nop1912 = alloca i1, i1 0
-  %nop1913 = alloca i1, i1 0
-  %nop1914 = alloca i1, i1 0
-  %nop1915 = alloca i1, i1 0
-  %nop1916 = alloca i1, i1 0
-  %nop1917 = alloca i1, i1 0
-  %nop1918 = alloca i1, i1 0
-  %nop1919 = alloca i1, i1 0
-  %nop1920 = alloca i1, i1 0
-  %nop1921 = alloca i1, i1 0
-  %nop1922 = alloca i1, i1 0
-  %nop1923 = alloca i1, i1 0
-  %nop1924 = alloca i1, i1 0
-  %nop1925 = alloca i1, i1 0
-  %nop1926 = alloca i1, i1 0
-  %nop1927 = alloca i1, i1 0
-  %nop1928 = alloca i1, i1 0
-  %nop1929 = alloca i1, i1 0
-  %nop1930 = alloca i1, i1 0
-  %nop1931 = alloca i1, i1 0
-  %nop1932 = alloca i1, i1 0
-  %nop1933 = alloca i1, i1 0
-  %nop1934 = alloca i1, i1 0
-  %nop1935 = alloca i1, i1 0
-  %nop1936 = alloca i1, i1 0
-  %nop1937 = alloca i1, i1 0
-  %nop1938 = alloca i1, i1 0
-  %nop1939 = alloca i1, i1 0
-  %nop1940 = alloca i1, i1 0
-  %nop1941 = alloca i1, i1 0
-  %nop1942 = alloca i1, i1 0
-  %nop1943 = alloca i1, i1 0
-  %nop1944 = alloca i1, i1 0
-  %nop1945 = alloca i1, i1 0
-  %nop1946 = alloca i1, i1 0
-  %nop1947 = alloca i1, i1 0
-  %nop1948 = alloca i1, i1 0
-  %nop1949 = alloca i1, i1 0
-  %nop1950 = alloca i1, i1 0
-  %nop1951 = alloca i1, i1 0
-  %nop1952 = alloca i1, i1 0
-  %nop1953 = alloca i1, i1 0
-  %nop1954 = alloca i1, i1 0
-  %nop1955 = alloca i1, i1 0
-  %nop1956 = alloca i1, i1 0
-  %nop1957 = alloca i1, i1 0
-  %nop1958 = alloca i1, i1 0
-  %nop1959 = alloca i1, i1 0
-  %nop1960 = alloca i1, i1 0
-  %nop1961 = alloca i1, i1 0
-  %nop1962 = alloca i1, i1 0
-  %nop1963 = alloca i1, i1 0
-  %nop1964 = alloca i1, i1 0
-  %nop1965 = alloca i1, i1 0
-  %nop1966 = alloca i1, i1 0
-  %nop1967 = alloca i1, i1 0
-  %nop1968 = alloca i1, i1 0
-  %nop1969 = alloca i1, i1 0
-  %nop1970 = alloca i1, i1 0
-  %nop1971 = alloca i1, i1 0
-  %nop1972 = alloca i1, i1 0
-  %nop1973 = alloca i1, i1 0
-  %nop1974 = alloca i1, i1 0
-  %nop1975 = alloca i1, i1 0
-  %nop1976 = alloca i1, i1 0
-  %nop1977 = alloca i1, i1 0
-  %nop1978 = alloca i1, i1 0
-  %nop1979 = alloca i1, i1 0
-  %nop1980 = alloca i1, i1 0
-  %nop1981 = alloca i1, i1 0
-  %nop1982 = alloca i1, i1 0
-  %nop1983 = alloca i1, i1 0
-  %nop1984 = alloca i1, i1 0
-  %nop1985 = alloca i1, i1 0
-  %nop1986 = alloca i1, i1 0
-  %nop1987 = alloca i1, i1 0
-  %nop1988 = alloca i1, i1 0
-  %nop1989 = alloca i1, i1 0
-  %nop1990 = alloca i1, i1 0
-  %nop1991 = alloca i1, i1 0
-  %nop1992 = alloca i1, i1 0
-  %nop1993 = alloca i1, i1 0
-  %nop1994 = alloca i1, i1 0
-  %nop1995 = alloca i1, i1 0
-  %nop1996 = alloca i1, i1 0
-  %nop1997 = alloca i1, i1 0
-  %nop1998 = alloca i1, i1 0
-  %nop1999 = alloca i1, i1 0
-  %nop2000 = alloca i1, i1 0
-  %nop2001 = alloca i1, i1 0
-  %nop2002 = alloca i1, i1 0
-  %nop2003 = alloca i1, i1 0
-  %nop2004 = alloca i1, i1 0
-  %nop2005 = alloca i1, i1 0
-  %nop2006 = alloca i1, i1 0
-  %nop2007 = alloca i1, i1 0
-  %nop2008 = alloca i1, i1 0
-  %nop2009 = alloca i1, i1 0
-  %nop2010 = alloca i1, i1 0
-  %nop2011 = alloca i1, i1 0
-  %nop2012 = alloca i1, i1 0
-  %nop2013 = alloca i1, i1 0
-  %nop2014 = alloca i1, i1 0
-  %nop2015 = alloca i1, i1 0
-  %nop2016 = alloca i1, i1 0
-  %nop2017 = alloca i1, i1 0
-  %nop2018 = alloca i1, i1 0
-  %nop2019 = alloca i1, i1 0
-  %nop2020 = alloca i1, i1 0
-  %nop2021 = alloca i1, i1 0
-  %nop2022 = alloca i1, i1 0
-  %nop2023 = alloca i1, i1 0
-  %nop2024 = alloca i1, i1 0
-  %nop2025 = alloca i1, i1 0
-  %nop2026 = alloca i1, i1 0
-  %nop2027 = alloca i1, i1 0
-  %nop2028 = alloca i1, i1 0
-  %nop2029 = alloca i1, i1 0
-  %nop2030 = alloca i1, i1 0
-  %nop2031 = alloca i1, i1 0
-  %nop2032 = alloca i1, i1 0
-  %nop2033 = alloca i1, i1 0
-  %nop2034 = alloca i1, i1 0
-  %nop2035 = alloca i1, i1 0
-  %nop2036 = alloca i1, i1 0
-  %nop2037 = alloca i1, i1 0
-  %nop2038 = alloca i1, i1 0
-  %nop2039 = alloca i1, i1 0
-  %nop2040 = alloca i1, i1 0
-  %nop2041 = alloca i1, i1 0
-  %nop2042 = alloca i1, i1 0
-  %nop2043 = alloca i1, i1 0
-  %nop2044 = alloca i1, i1 0
-  %nop2045 = alloca i1, i1 0
-  %nop2046 = alloca i1, i1 0
-  %nop2047 = alloca i1, i1 0
-  %nop2048 = alloca i1, i1 0
-  %nop2049 = alloca i1, i1 0
-  %nop2050 = alloca i1, i1 0
-  %nop2051 = alloca i1, i1 0
-  %nop2052 = alloca i1, i1 0
-  %nop2053 = alloca i1, i1 0
-  %nop2054 = alloca i1, i1 0
-  %nop2055 = alloca i1, i1 0
-  %nop2056 = alloca i1, i1 0
-  %nop2057 = alloca i1, i1 0
-  %nop2058 = alloca i1, i1 0
-  %nop2059 = alloca i1, i1 0
-  %nop2060 = alloca i1, i1 0
-  %nop2061 = alloca i1, i1 0
-  %nop2062 = alloca i1, i1 0
-  %nop2063 = alloca i1, i1 0
-  %nop2064 = alloca i1, i1 0
-  %nop2065 = alloca i1, i1 0
-  %nop2066 = alloca i1, i1 0
-  %nop2067 = alloca i1, i1 0
-  %nop2068 = alloca i1, i1 0
-  %nop2069 = alloca i1, i1 0
-  %nop2070 = alloca i1, i1 0
-  %nop2071 = alloca i1, i1 0
-  %nop2072 = alloca i1, i1 0
-  %nop2073 = alloca i1, i1 0
-  %nop2074 = alloca i1, i1 0
-  %nop2075 = alloca i1, i1 0
-  %nop2076 = alloca i1, i1 0
-  %nop2077 = alloca i1, i1 0
-  %nop2078 = alloca i1, i1 0
-  %nop2079 = alloca i1, i1 0
-  %nop2080 = alloca i1, i1 0
-  %nop2081 = alloca i1, i1 0
-  %nop2082 = alloca i1, i1 0
-  %nop2083 = alloca i1, i1 0
-  %nop2084 = alloca i1, i1 0
-  %nop2085 = alloca i1, i1 0
-  %nop2086 = alloca i1, i1 0
-  %nop2087 = alloca i1, i1 0
-  %nop2088 = alloca i1, i1 0
-  %nop2089 = alloca i1, i1 0
-  %nop2090 = alloca i1, i1 0
-  %nop2091 = alloca i1, i1 0
-  %nop2092 = alloca i1, i1 0
-  %nop2093 = alloca i1, i1 0
-  %nop2094 = alloca i1, i1 0
-  %nop2095 = alloca i1, i1 0
-  %nop2096 = alloca i1, i1 0
-  %nop2097 = alloca i1, i1 0
-  %nop2098 = alloca i1, i1 0
-  %nop2099 = alloca i1, i1 0
-  %nop2100 = alloca i1, i1 0
-  %nop2101 = alloca i1, i1 0
-  %nop2102 = alloca i1, i1 0
-  %nop2103 = alloca i1, i1 0
-  %nop2104 = alloca i1, i1 0
-  %nop2105 = alloca i1, i1 0
-  %nop2106 = alloca i1, i1 0
-  %nop2107 = alloca i1, i1 0
-  %nop2108 = alloca i1, i1 0
-  %nop2109 = alloca i1, i1 0
-  %nop2110 = alloca i1, i1 0
-  %nop2111 = alloca i1, i1 0
-  %nop2112 = alloca i1, i1 0
-  %nop2113 = alloca i1, i1 0
-  %nop2114 = alloca i1, i1 0
-  %nop2115 = alloca i1, i1 0
-  %nop2116 = alloca i1, i1 0
-  %nop2117 = alloca i1, i1 0
-  %nop2118 = alloca i1, i1 0
-  %nop2119 = alloca i1, i1 0
-  %nop2120 = alloca i1, i1 0
-  %nop2121 = alloca i1, i1 0
-  %nop2122 = alloca i1, i1 0
-  %nop2123 = alloca i1, i1 0
-  %nop2124 = alloca i1, i1 0
-  %nop2125 = alloca i1, i1 0
-  %nop2126 = alloca i1, i1 0
-  %nop2127 = alloca i1, i1 0
-  %nop2128 = alloca i1, i1 0
-  %nop2129 = alloca i1, i1 0
-  %nop2130 = alloca i1, i1 0
-  %nop2131 = alloca i1, i1 0
-  %nop2132 = alloca i1, i1 0
-  %nop2133 = alloca i1, i1 0
-  %nop2134 = alloca i1, i1 0
-  %nop2135 = alloca i1, i1 0
-  %nop2136 = alloca i1, i1 0
-  %nop2137 = alloca i1, i1 0
-  %nop2138 = alloca i1, i1 0
-  %nop2139 = alloca i1, i1 0
-  %nop2140 = alloca i1, i1 0
-  %nop2141 = alloca i1, i1 0
-  %nop2142 = alloca i1, i1 0
-  %nop2143 = alloca i1, i1 0
-  %nop2144 = alloca i1, i1 0
-  %nop2145 = alloca i1, i1 0
-  %nop2146 = alloca i1, i1 0
-  %nop2147 = alloca i1, i1 0
-  %nop2148 = alloca i1, i1 0
-  %nop2149 = alloca i1, i1 0
-  %nop2150 = alloca i1, i1 0
-  %nop2151 = alloca i1, i1 0
-  %nop2152 = alloca i1, i1 0
-  %nop2153 = alloca i1, i1 0
-  %nop2154 = alloca i1, i1 0
-  %nop2155 = alloca i1, i1 0
-  %nop2156 = alloca i1, i1 0
-  %nop2157 = alloca i1, i1 0
-  %nop2158 = alloca i1, i1 0
-  %nop2159 = alloca i1, i1 0
-  %nop2160 = alloca i1, i1 0
-  %nop2161 = alloca i1, i1 0
-  %nop2162 = alloca i1, i1 0
-  %nop2163 = alloca i1, i1 0
-  %nop2164 = alloca i1, i1 0
-  %nop2165 = alloca i1, i1 0
-  %nop2166 = alloca i1, i1 0
-  %nop2167 = alloca i1, i1 0
-  %nop2168 = alloca i1, i1 0
-  %nop2169 = alloca i1, i1 0
-  %nop2170 = alloca i1, i1 0
-  %nop2171 = alloca i1, i1 0
-  %nop2172 = alloca i1, i1 0
-  %nop2173 = alloca i1, i1 0
-  %nop2174 = alloca i1, i1 0
-  %nop2175 = alloca i1, i1 0
-  %nop2176 = alloca i1, i1 0
-  %nop2177 = alloca i1, i1 0
-  %nop2178 = alloca i1, i1 0
-  %nop2179 = alloca i1, i1 0
-  %nop2180 = alloca i1, i1 0
-  %nop2181 = alloca i1, i1 0
-  %nop2182 = alloca i1, i1 0
-  %nop2183 = alloca i1, i1 0
-  %nop2184 = alloca i1, i1 0
-  %nop2185 = alloca i1, i1 0
-  %nop2186 = alloca i1, i1 0
-  %nop2187 = alloca i1, i1 0
-  %nop2188 = alloca i1, i1 0
-  %nop2189 = alloca i1, i1 0
-  %nop2190 = alloca i1, i1 0
-  %nop2191 = alloca i1, i1 0
-  %nop2192 = alloca i1, i1 0
-  %nop2193 = alloca i1, i1 0
-  %nop2194 = alloca i1, i1 0
-  %nop2195 = alloca i1, i1 0
-  %nop2196 = alloca i1, i1 0
-  %nop2197 = alloca i1, i1 0
-  %nop2198 = alloca i1, i1 0
-  %nop2199 = alloca i1, i1 0
-  %nop2200 = alloca i1, i1 0
-  %nop2201 = alloca i1, i1 0
-  %nop2202 = alloca i1, i1 0
-  %nop2203 = alloca i1, i1 0
-  %nop2204 = alloca i1, i1 0
-  %nop2205 = alloca i1, i1 0
-  %nop2206 = alloca i1, i1 0
-  %nop2207 = alloca i1, i1 0
-  %nop2208 = alloca i1, i1 0
-  %nop2209 = alloca i1, i1 0
-  %nop2210 = alloca i1, i1 0
-  %nop2211 = alloca i1, i1 0
-  %nop2212 = alloca i1, i1 0
-  %nop2213 = alloca i1, i1 0
-  %nop2214 = alloca i1, i1 0
-  %nop2215 = alloca i1, i1 0
-  %nop2216 = alloca i1, i1 0
-  %nop2217 = alloca i1, i1 0
-  %nop2218 = alloca i1, i1 0
-  %nop2219 = alloca i1, i1 0
-  %nop2220 = alloca i1, i1 0
-  %nop2221 = alloca i1, i1 0
-  %nop2222 = alloca i1, i1 0
-  %nop2223 = alloca i1, i1 0
-  %nop2224 = alloca i1, i1 0
-  %nop2225 = alloca i1, i1 0
-  %nop2226 = alloca i1, i1 0
-  %nop2227 = alloca i1, i1 0
-  %nop2228 = alloca i1, i1 0
-  %nop2229 = alloca i1, i1 0
-  %nop2230 = alloca i1, i1 0
-  %nop2231 = alloca i1, i1 0
-  %nop2232 = alloca i1, i1 0
-  %nop2233 = alloca i1, i1 0
-  %nop2234 = alloca i1, i1 0
-  %nop2235 = alloca i1, i1 0
-  %nop2236 = alloca i1, i1 0
-  %nop2237 = alloca i1, i1 0
-  %nop2238 = alloca i1, i1 0
-  %nop2239 = alloca i1, i1 0
-  %nop2240 = alloca i1, i1 0
-  %nop2241 = alloca i1, i1 0
-  %nop2242 = alloca i1, i1 0
-  %nop2243 = alloca i1, i1 0
-  %nop2244 = alloca i1, i1 0
-  %nop2245 = alloca i1, i1 0
-  %nop2246 = alloca i1, i1 0
-  %nop2247 = alloca i1, i1 0
-  %nop2248 = alloca i1, i1 0
-  %nop2249 = alloca i1, i1 0
-  %nop2250 = alloca i1, i1 0
-  %nop2251 = alloca i1, i1 0
-  %nop2252 = alloca i1, i1 0
-  %nop2253 = alloca i1, i1 0
-  %nop2254 = alloca i1, i1 0
-  %nop2255 = alloca i1, i1 0
-  %nop2256 = alloca i1, i1 0
-  %nop2257 = alloca i1, i1 0
-  %nop2258 = alloca i1, i1 0
-  %nop2259 = alloca i1, i1 0
-  %nop2260 = alloca i1, i1 0
-  %nop2261 = alloca i1, i1 0
-  %nop2262 = alloca i1, i1 0
-  %nop2263 = alloca i1, i1 0
-  %nop2264 = alloca i1, i1 0
-  %nop2265 = alloca i1, i1 0
-  %nop2266 = alloca i1, i1 0
-  %nop2267 = alloca i1, i1 0
-  %nop2268 = alloca i1, i1 0
-  %nop2269 = alloca i1, i1 0
-  %nop2270 = alloca i1, i1 0
-  %nop2271 = alloca i1, i1 0
-  %nop2272 = alloca i1, i1 0
-  %nop2273 = alloca i1, i1 0
-  %nop2274 = alloca i1, i1 0
-  %nop2275 = alloca i1, i1 0
-  %nop2276 = alloca i1, i1 0
-  %nop2277 = alloca i1, i1 0
-  %nop2278 = alloca i1, i1 0
-  %nop2279 = alloca i1, i1 0
-  %nop2280 = alloca i1, i1 0
-  %nop2281 = alloca i1, i1 0
-  %nop2282 = alloca i1, i1 0
-  %nop2283 = alloca i1, i1 0
-  %nop2284 = alloca i1, i1 0
-  %nop2285 = alloca i1, i1 0
-  %nop2286 = alloca i1, i1 0
-  %nop2287 = alloca i1, i1 0
-  %nop2288 = alloca i1, i1 0
-  %nop2289 = alloca i1, i1 0
-  %nop2290 = alloca i1, i1 0
-  %nop2291 = alloca i1, i1 0
-  %nop2292 = alloca i1, i1 0
-  %nop2293 = alloca i1, i1 0
-  %nop2294 = alloca i1, i1 0
-  %nop2295 = alloca i1, i1 0
-  %nop2296 = alloca i1, i1 0
-  %nop2297 = alloca i1, i1 0
-  %nop2298 = alloca i1, i1 0
-  %nop2299 = alloca i1, i1 0
-  %nop2300 = alloca i1, i1 0
-  %nop2301 = alloca i1, i1 0
-  %nop2302 = alloca i1, i1 0
-  %nop2303 = alloca i1, i1 0
-  %nop2304 = alloca i1, i1 0
-  %nop2305 = alloca i1, i1 0
-  %nop2306 = alloca i1, i1 0
-  %nop2307 = alloca i1, i1 0
-  %nop2308 = alloca i1, i1 0
-  %nop2309 = alloca i1, i1 0
-  %nop2310 = alloca i1, i1 0
-  %nop2311 = alloca i1, i1 0
-  %nop2312 = alloca i1, i1 0
-  %nop2313 = alloca i1, i1 0
-  %nop2314 = alloca i1, i1 0
-  %nop2315 = alloca i1, i1 0
-  %nop2316 = alloca i1, i1 0
-  %nop2317 = alloca i1, i1 0
-  %nop2318 = alloca i1, i1 0
-  %nop2319 = alloca i1, i1 0
-  %nop2320 = alloca i1, i1 0
-  %nop2321 = alloca i1, i1 0
-  %nop2322 = alloca i1, i1 0
-  %nop2323 = alloca i1, i1 0
-  %nop2324 = alloca i1, i1 0
-  %nop2325 = alloca i1, i1 0
-  %nop2326 = alloca i1, i1 0
-  %nop2327 = alloca i1, i1 0
-  %nop2328 = alloca i1, i1 0
-  %nop2329 = alloca i1, i1 0
-  %nop2330 = alloca i1, i1 0
-  %nop2331 = alloca i1, i1 0
-  %nop2332 = alloca i1, i1 0
-  %nop2333 = alloca i1, i1 0
-  %nop2334 = alloca i1, i1 0
-  %nop2335 = alloca i1, i1 0
-  %nop2336 = alloca i1, i1 0
-  %nop2337 = alloca i1, i1 0
-  %nop2338 = alloca i1, i1 0
-  %nop2339 = alloca i1, i1 0
-  %nop2340 = alloca i1, i1 0
-  %nop2341 = alloca i1, i1 0
-  %nop2342 = alloca i1, i1 0
-  %nop2343 = alloca i1, i1 0
-  %nop2344 = alloca i1, i1 0
-  %nop2345 = alloca i1, i1 0
-  %nop2346 = alloca i1, i1 0
-  %nop2347 = alloca i1, i1 0
-  %nop2348 = alloca i1, i1 0
-  %nop2349 = alloca i1, i1 0
-  %nop2350 = alloca i1, i1 0
-  %nop2351 = alloca i1, i1 0
-  %nop2352 = alloca i1, i1 0
-  %nop2353 = alloca i1, i1 0
-  %nop2354 = alloca i1, i1 0
-  %nop2355 = alloca i1, i1 0
-  %nop2356 = alloca i1, i1 0
-  %nop2357 = alloca i1, i1 0
-  %nop2358 = alloca i1, i1 0
-  %nop2359 = alloca i1, i1 0
-  %nop2360 = alloca i1, i1 0
-  %nop2361 = alloca i1, i1 0
-  %nop2362 = alloca i1, i1 0
-  %nop2363 = alloca i1, i1 0
-  %nop2364 = alloca i1, i1 0
-  %nop2365 = alloca i1, i1 0
-  %nop2366 = alloca i1, i1 0
-  %nop2367 = alloca i1, i1 0
-  %nop2368 = alloca i1, i1 0
-  %nop2369 = alloca i1, i1 0
-  %nop2370 = alloca i1, i1 0
-  %nop2371 = alloca i1, i1 0
-  %nop2372 = alloca i1, i1 0
-  %nop2373 = alloca i1, i1 0
-  %nop2374 = alloca i1, i1 0
-  %nop2375 = alloca i1, i1 0
-  %nop2376 = alloca i1, i1 0
-  %nop2377 = alloca i1, i1 0
-  %nop2378 = alloca i1, i1 0
-  %nop2379 = alloca i1, i1 0
-  %nop2380 = alloca i1, i1 0
-  %nop2381 = alloca i1, i1 0
-  %nop2382 = alloca i1, i1 0
-  %nop2383 = alloca i1, i1 0
-  %nop2384 = alloca i1, i1 0
-  %nop2385 = alloca i1, i1 0
-  %nop2386 = alloca i1, i1 0
-  %nop2387 = alloca i1, i1 0
-  %nop2388 = alloca i1, i1 0
-  %nop2389 = alloca i1, i1 0
-  %nop2390 = alloca i1, i1 0
-  %nop2391 = alloca i1, i1 0
-  %nop2392 = alloca i1, i1 0
-  %nop2393 = alloca i1, i1 0
-  %nop2394 = alloca i1, i1 0
-  %nop2395 = alloca i1, i1 0
-  %nop2396 = alloca i1, i1 0
-  %nop2397 = alloca i1, i1 0
-  %nop2398 = alloca i1, i1 0
-  %nop2399 = alloca i1, i1 0
-  %nop2400 = alloca i1, i1 0
-  %nop2401 = alloca i1, i1 0
-  %nop2402 = alloca i1, i1 0
-  %nop2403 = alloca i1, i1 0
-  %nop2404 = alloca i1, i1 0
-  %nop2405 = alloca i1, i1 0
-  %nop2406 = alloca i1, i1 0
-  %nop2407 = alloca i1, i1 0
-  %nop2408 = alloca i1, i1 0
-  %nop2409 = alloca i1, i1 0
-  %nop2410 = alloca i1, i1 0
-  %nop2411 = alloca i1, i1 0
-  %nop2412 = alloca i1, i1 0
-  %nop2413 = alloca i1, i1 0
-  %nop2414 = alloca i1, i1 0
-  %nop2415 = alloca i1, i1 0
-  %nop2416 = alloca i1, i1 0
-  %nop2417 = alloca i1, i1 0
-  %nop2418 = alloca i1, i1 0
-  %nop2419 = alloca i1, i1 0
-  %nop2420 = alloca i1, i1 0
-  %nop2421 = alloca i1, i1 0
-  %nop2422 = alloca i1, i1 0
-  %nop2423 = alloca i1, i1 0
-  %nop2424 = alloca i1, i1 0
-  %nop2425 = alloca i1, i1 0
-  %nop2426 = alloca i1, i1 0
-  %nop2427 = alloca i1, i1 0
-  %nop2428 = alloca i1, i1 0
-  %nop2429 = alloca i1, i1 0
-  %nop2430 = alloca i1, i1 0
-  %nop2431 = alloca i1, i1 0
-  %nop2432 = alloca i1, i1 0
-  %nop2433 = alloca i1, i1 0
-  %nop2434 = alloca i1, i1 0
-  %nop2435 = alloca i1, i1 0
-  %nop2436 = alloca i1, i1 0
-  %nop2437 = alloca i1, i1 0
-  %nop2438 = alloca i1, i1 0
-  %nop2439 = alloca i1, i1 0
-  %nop2440 = alloca i1, i1 0
-  %nop2441 = alloca i1, i1 0
-  %nop2442 = alloca i1, i1 0
-  %nop2443 = alloca i1, i1 0
-  %nop2444 = alloca i1, i1 0
-  %nop2445 = alloca i1, i1 0
-  %nop2446 = alloca i1, i1 0
-  %nop2447 = alloca i1, i1 0
-  %nop2448 = alloca i1, i1 0
-  %nop2449 = alloca i1, i1 0
-  %nop2450 = alloca i1, i1 0
-  %nop2451 = alloca i1, i1 0
-  %nop2452 = alloca i1, i1 0
-  %nop2453 = alloca i1, i1 0
-  %nop2454 = alloca i1, i1 0
-  %nop2455 = alloca i1, i1 0
-  %nop2456 = alloca i1, i1 0
-  %nop2457 = alloca i1, i1 0
-  %nop2458 = alloca i1, i1 0
-  %nop2459 = alloca i1, i1 0
-  %nop2460 = alloca i1, i1 0
-  %nop2461 = alloca i1, i1 0
-  %nop2462 = alloca i1, i1 0
-  %nop2463 = alloca i1, i1 0
-  %nop2464 = alloca i1, i1 0
-  %nop2465 = alloca i1, i1 0
-  %nop2466 = alloca i1, i1 0
-  %nop2467 = alloca i1, i1 0
-  %nop2468 = alloca i1, i1 0
-  %nop2469 = alloca i1, i1 0
-  %nop2470 = alloca i1, i1 0
-  %nop2471 = alloca i1, i1 0
-  %nop2472 = alloca i1, i1 0
-  %nop2473 = alloca i1, i1 0
-  %nop2474 = alloca i1, i1 0
-  %nop2475 = alloca i1, i1 0
-  %nop2476 = alloca i1, i1 0
-  %nop2477 = alloca i1, i1 0
-  %nop2478 = alloca i1, i1 0
-  %nop2479 = alloca i1, i1 0
-  %nop2480 = alloca i1, i1 0
-  %nop2481 = alloca i1, i1 0
-  %nop2482 = alloca i1, i1 0
-  %nop2483 = alloca i1, i1 0
-  %nop2484 = alloca i1, i1 0
-  %nop2485 = alloca i1, i1 0
-  %nop2486 = alloca i1, i1 0
-  %nop2487 = alloca i1, i1 0
-  %nop2488 = alloca i1, i1 0
-  %nop2489 = alloca i1, i1 0
-  %nop2490 = alloca i1, i1 0
-  %nop2491 = alloca i1, i1 0
-  %nop2492 = alloca i1, i1 0
-  %nop2493 = alloca i1, i1 0
-  %nop2494 = alloca i1, i1 0
-  %nop2495 = alloca i1, i1 0
-  %nop2496 = alloca i1, i1 0
-  %nop2497 = alloca i1, i1 0
-  %nop2498 = alloca i1, i1 0
-  %nop2499 = alloca i1, i1 0
-  %nop2500 = alloca i1, i1 0
-  %nop2501 = alloca i1, i1 0
-  %nop2502 = alloca i1, i1 0
-  %nop2503 = alloca i1, i1 0
-  %nop2504 = alloca i1, i1 0
-  %nop2505 = alloca i1, i1 0
-  %nop2506 = alloca i1, i1 0
-  %nop2507 = alloca i1, i1 0
-  %nop2508 = alloca i1, i1 0
-  %nop2509 = alloca i1, i1 0
-  %nop2510 = alloca i1, i1 0
-  %nop2511 = alloca i1, i1 0
-  %nop2512 = alloca i1, i1 0
-  %nop2513 = alloca i1, i1 0
-  %nop2514 = alloca i1, i1 0
-  %nop2515 = alloca i1, i1 0
-  %nop2516 = alloca i1, i1 0
-  %nop2517 = alloca i1, i1 0
-  %nop2518 = alloca i1, i1 0
-  %nop2519 = alloca i1, i1 0
-  %nop2520 = alloca i1, i1 0
-  %nop2521 = alloca i1, i1 0
-  %nop2522 = alloca i1, i1 0
-  %nop2523 = alloca i1, i1 0
-  %nop2524 = alloca i1, i1 0
-  %nop2525 = alloca i1, i1 0
-  %nop2526 = alloca i1, i1 0
-  %nop2527 = alloca i1, i1 0
-  %nop2528 = alloca i1, i1 0
-  %nop2529 = alloca i1, i1 0
-  %nop2530 = alloca i1, i1 0
-  %nop2531 = alloca i1, i1 0
-  %nop2532 = alloca i1, i1 0
-  %nop2533 = alloca i1, i1 0
-  %nop2534 = alloca i1, i1 0
-  %nop2535 = alloca i1, i1 0
-  %nop2536 = alloca i1, i1 0
-  %nop2537 = alloca i1, i1 0
-  %nop2538 = alloca i1, i1 0
-  %nop2539 = alloca i1, i1 0
-  %nop2540 = alloca i1, i1 0
-  %nop2541 = alloca i1, i1 0
-  %nop2542 = alloca i1, i1 0
-  %nop2543 = alloca i1, i1 0
-  %nop2544 = alloca i1, i1 0
-  %nop2545 = alloca i1, i1 0
-  %nop2546 = alloca i1, i1 0
-  %nop2547 = alloca i1, i1 0
-  %nop2548 = alloca i1, i1 0
-  %nop2549 = alloca i1, i1 0
-  %nop2550 = alloca i1, i1 0
-  %nop2551 = alloca i1, i1 0
-  %nop2552 = alloca i1, i1 0
-  %nop2553 = alloca i1, i1 0
-  %nop2554 = alloca i1, i1 0
-  %nop2555 = alloca i1, i1 0
-  %nop2556 = alloca i1, i1 0
-  %nop2557 = alloca i1, i1 0
-  %nop2558 = alloca i1, i1 0
-  %nop2559 = alloca i1, i1 0
-  %nop2560 = alloca i1, i1 0
-  %nop2561 = alloca i1, i1 0
-  %nop2562 = alloca i1, i1 0
-  %nop2563 = alloca i1, i1 0
-  %nop2564 = alloca i1, i1 0
-  %nop2565 = alloca i1, i1 0
-  %nop2566 = alloca i1, i1 0
-  %nop2567 = alloca i1, i1 0
-  %nop2568 = alloca i1, i1 0
-  %nop2569 = alloca i1, i1 0
-  %nop2570 = alloca i1, i1 0
-  %nop2571 = alloca i1, i1 0
-  %nop2572 = alloca i1, i1 0
-  %nop2573 = alloca i1, i1 0
-  %nop2574 = alloca i1, i1 0
-  %nop2575 = alloca i1, i1 0
-  %nop2576 = alloca i1, i1 0
-  %nop2577 = alloca i1, i1 0
-  %nop2578 = alloca i1, i1 0
-  %nop2579 = alloca i1, i1 0
-  %nop2580 = alloca i1, i1 0
-  %nop2581 = alloca i1, i1 0
-  %nop2582 = alloca i1, i1 0
-  %nop2583 = alloca i1, i1 0
-  %nop2584 = alloca i1, i1 0
-  %nop2585 = alloca i1, i1 0
-  %nop2586 = alloca i1, i1 0
-  %nop2587 = alloca i1, i1 0
-  %nop2588 = alloca i1, i1 0
-  %nop2589 = alloca i1, i1 0
-  %nop2590 = alloca i1, i1 0
-  %nop2591 = alloca i1, i1 0
-  %nop2592 = alloca i1, i1 0
-  %nop2593 = alloca i1, i1 0
-  %nop2594 = alloca i1, i1 0
-  %nop2595 = alloca i1, i1 0
-  %nop2596 = alloca i1, i1 0
-  %nop2597 = alloca i1, i1 0
-  %nop2598 = alloca i1, i1 0
-  %nop2599 = alloca i1, i1 0
-  %nop2600 = alloca i1, i1 0
-  %nop2601 = alloca i1, i1 0
-  %nop2602 = alloca i1, i1 0
-  %nop2603 = alloca i1, i1 0
-  %nop2604 = alloca i1, i1 0
-  %nop2605 = alloca i1, i1 0
-  %nop2606 = alloca i1, i1 0
-  %nop2607 = alloca i1, i1 0
-  %nop2608 = alloca i1, i1 0
-  %nop2609 = alloca i1, i1 0
-  %nop2610 = alloca i1, i1 0
-  %nop2611 = alloca i1, i1 0
-  %nop2612 = alloca i1, i1 0
-  %nop2613 = alloca i1, i1 0
-  %nop2614 = alloca i1, i1 0
-  %nop2615 = alloca i1, i1 0
-  %nop2616 = alloca i1, i1 0
-  %nop2617 = alloca i1, i1 0
-  %nop2618 = alloca i1, i1 0
-  %nop2619 = alloca i1, i1 0
-  %nop2620 = alloca i1, i1 0
-  %nop2621 = alloca i1, i1 0
-  %nop2622 = alloca i1, i1 0
-  %nop2623 = alloca i1, i1 0
-  %nop2624 = alloca i1, i1 0
-  %nop2625 = alloca i1, i1 0
-  %nop2626 = alloca i1, i1 0
-  %nop2627 = alloca i1, i1 0
-  %nop2628 = alloca i1, i1 0
-  %nop2629 = alloca i1, i1 0
-  %nop2630 = alloca i1, i1 0
-  %nop2631 = alloca i1, i1 0
-  %nop2632 = alloca i1, i1 0
-  %nop2633 = alloca i1, i1 0
-  %nop2634 = alloca i1, i1 0
-  %nop2635 = alloca i1, i1 0
-  %nop2636 = alloca i1, i1 0
-  %nop2637 = alloca i1, i1 0
-  %nop2638 = alloca i1, i1 0
-  %nop2639 = alloca i1, i1 0
-  %nop2640 = alloca i1, i1 0
-  %nop2641 = alloca i1, i1 0
-  %nop2642 = alloca i1, i1 0
-  %nop2643 = alloca i1, i1 0
-  %nop2644 = alloca i1, i1 0
-  %nop2645 = alloca i1, i1 0
-  %nop2646 = alloca i1, i1 0
-  %nop2647 = alloca i1, i1 0
-  %nop2648 = alloca i1, i1 0
-  %nop2649 = alloca i1, i1 0
-  %nop2650 = alloca i1, i1 0
-  %nop2651 = alloca i1, i1 0
-  %nop2652 = alloca i1, i1 0
-  %nop2653 = alloca i1, i1 0
-  %nop2654 = alloca i1, i1 0
-  %nop2655 = alloca i1, i1 0
-  %nop2656 = alloca i1, i1 0
-  %nop2657 = alloca i1, i1 0
-  %nop2658 = alloca i1, i1 0
-  %nop2659 = alloca i1, i1 0
-  %nop2660 = alloca i1, i1 0
-  %nop2661 = alloca i1, i1 0
-  %nop2662 = alloca i1, i1 0
-  %nop2663 = alloca i1, i1 0
-  %nop2664 = alloca i1, i1 0
-  %nop2665 = alloca i1, i1 0
-  %nop2666 = alloca i1, i1 0
-  %nop2667 = alloca i1, i1 0
-  %nop2668 = alloca i1, i1 0
-  %nop2669 = alloca i1, i1 0
-  %nop2670 = alloca i1, i1 0
-  %nop2671 = alloca i1, i1 0
-  %nop2672 = alloca i1, i1 0
-  %nop2673 = alloca i1, i1 0
-  %nop2674 = alloca i1, i1 0
-  %nop2675 = alloca i1, i1 0
-  %nop2676 = alloca i1, i1 0
-  %nop2677 = alloca i1, i1 0
-  %nop2678 = alloca i1, i1 0
-  %nop2679 = alloca i1, i1 0
-  %nop2680 = alloca i1, i1 0
-  %nop2681 = alloca i1, i1 0
-  %nop2682 = alloca i1, i1 0
-  %nop2683 = alloca i1, i1 0
-  %nop2684 = alloca i1, i1 0
-  %nop2685 = alloca i1, i1 0
-  %nop2686 = alloca i1, i1 0
-  %nop2687 = alloca i1, i1 0
-  %nop2688 = alloca i1, i1 0
-  %nop2689 = alloca i1, i1 0
-  %nop2690 = alloca i1, i1 0
-  %nop2691 = alloca i1, i1 0
-  %nop2692 = alloca i1, i1 0
-  %nop2693 = alloca i1, i1 0
-  %nop2694 = alloca i1, i1 0
-  %nop2695 = alloca i1, i1 0
-  %nop2696 = alloca i1, i1 0
-  %nop2697 = alloca i1, i1 0
-  %nop2698 = alloca i1, i1 0
-  %nop2699 = alloca i1, i1 0
-  %nop2700 = alloca i1, i1 0
-  %nop2701 = alloca i1, i1 0
-  %nop2702 = alloca i1, i1 0
-  %nop2703 = alloca i1, i1 0
-  %nop2704 = alloca i1, i1 0
-  %nop2705 = alloca i1, i1 0
-  %nop2706 = alloca i1, i1 0
-  %nop2707 = alloca i1, i1 0
-  %nop2708 = alloca i1, i1 0
-  %nop2709 = alloca i1, i1 0
-  %nop2710 = alloca i1, i1 0
-  %nop2711 = alloca i1, i1 0
-  %nop2712 = alloca i1, i1 0
-  %nop2713 = alloca i1, i1 0
-  %nop2714 = alloca i1, i1 0
-  %nop2715 = alloca i1, i1 0
-  %nop2716 = alloca i1, i1 0
-  %nop2717 = alloca i1, i1 0
-  %nop2718 = alloca i1, i1 0
-  %nop2719 = alloca i1, i1 0
-  %nop2720 = alloca i1, i1 0
-  %nop2721 = alloca i1, i1 0
-  %nop2722 = alloca i1, i1 0
-  %nop2723 = alloca i1, i1 0
-  %nop2724 = alloca i1, i1 0
-  %nop2725 = alloca i1, i1 0
-  %nop2726 = alloca i1, i1 0
-  %nop2727 = alloca i1, i1 0
-  %nop2728 = alloca i1, i1 0
-  %nop2729 = alloca i1, i1 0
-  %nop2730 = alloca i1, i1 0
-  %nop2731 = alloca i1, i1 0
-  %nop2732 = alloca i1, i1 0
-  %nop2733 = alloca i1, i1 0
-  %nop2734 = alloca i1, i1 0
-  %nop2735 = alloca i1, i1 0
-  %nop2736 = alloca i1, i1 0
-  %nop2737 = alloca i1, i1 0
-  %nop2738 = alloca i1, i1 0
-  %nop2739 = alloca i1, i1 0
-  %nop2740 = alloca i1, i1 0
-  %nop2741 = alloca i1, i1 0
-  %nop2742 = alloca i1, i1 0
-  %nop2743 = alloca i1, i1 0
-  %nop2744 = alloca i1, i1 0
-  %nop2745 = alloca i1, i1 0
-  %nop2746 = alloca i1, i1 0
-  %nop2747 = alloca i1, i1 0
-  %nop2748 = alloca i1, i1 0
-  %nop2749 = alloca i1, i1 0
-  %nop2750 = alloca i1, i1 0
-  %nop2751 = alloca i1, i1 0
-  %nop2752 = alloca i1, i1 0
-  %nop2753 = alloca i1, i1 0
-  %nop2754 = alloca i1, i1 0
-  %nop2755 = alloca i1, i1 0
-  %nop2756 = alloca i1, i1 0
-  %nop2757 = alloca i1, i1 0
-  %nop2758 = alloca i1, i1 0
-  %nop2759 = alloca i1, i1 0
-  %nop2760 = alloca i1, i1 0
-  %nop2761 = alloca i1, i1 0
-  %nop2762 = alloca i1, i1 0
-  %nop2763 = alloca i1, i1 0
-  %nop2764 = alloca i1, i1 0
-  %nop2765 = alloca i1, i1 0
-  %nop2766 = alloca i1, i1 0
-  %nop2767 = alloca i1, i1 0
-  %nop2768 = alloca i1, i1 0
-  %nop2769 = alloca i1, i1 0
-  %nop2770 = alloca i1, i1 0
-  %nop2771 = alloca i1, i1 0
-  %nop2772 = alloca i1, i1 0
-  %nop2773 = alloca i1, i1 0
-  %nop2774 = alloca i1, i1 0
-  %nop2775 = alloca i1, i1 0
-  %nop2776 = alloca i1, i1 0
-  %nop2777 = alloca i1, i1 0
-  %nop2778 = alloca i1, i1 0
-  %nop2779 = alloca i1, i1 0
-  %nop2780 = alloca i1, i1 0
-  %nop2781 = alloca i1, i1 0
-  %nop2782 = alloca i1, i1 0
-  %nop2783 = alloca i1, i1 0
-  %nop2784 = alloca i1, i1 0
-  %nop2785 = alloca i1, i1 0
-  %nop2786 = alloca i1, i1 0
-  %nop2787 = alloca i1, i1 0
-  %nop2788 = alloca i1, i1 0
-  %nop2789 = alloca i1, i1 0
-  %nop2790 = alloca i1, i1 0
-  %nop2791 = alloca i1, i1 0
-  %nop2792 = alloca i1, i1 0
-  %nop2793 = alloca i1, i1 0
-  %nop2794 = alloca i1, i1 0
-  %nop2795 = alloca i1, i1 0
-  %nop2796 = alloca i1, i1 0
-  %nop2797 = alloca i1, i1 0
-  %nop2798 = alloca i1, i1 0
-  %nop2799 = alloca i1, i1 0
-  %nop2800 = alloca i1, i1 0
-  %nop2801 = alloca i1, i1 0
-  %nop2802 = alloca i1, i1 0
-  %nop2803 = alloca i1, i1 0
-  %nop2804 = alloca i1, i1 0
-  %nop2805 = alloca i1, i1 0
-  %nop2806 = alloca i1, i1 0
-  %nop2807 = alloca i1, i1 0
-  %nop2808 = alloca i1, i1 0
-  %nop2809 = alloca i1, i1 0
-  %nop2810 = alloca i1, i1 0
-  %nop2811 = alloca i1, i1 0
-  %nop2812 = alloca i1, i1 0
-  %nop2813 = alloca i1, i1 0
-  %nop2814 = alloca i1, i1 0
-  %nop2815 = alloca i1, i1 0
-  %nop2816 = alloca i1, i1 0
-  %nop2817 = alloca i1, i1 0
-  %nop2818 = alloca i1, i1 0
-  %nop2819 = alloca i1, i1 0
-  %nop2820 = alloca i1, i1 0
-  %nop2821 = alloca i1, i1 0
-  %nop2822 = alloca i1, i1 0
-  %nop2823 = alloca i1, i1 0
-  %nop2824 = alloca i1, i1 0
-  %nop2825 = alloca i1, i1 0
-  %nop2826 = alloca i1, i1 0
-  %nop2827 = alloca i1, i1 0
-  %nop2828 = alloca i1, i1 0
-  %nop2829 = alloca i1, i1 0
-  %nop2830 = alloca i1, i1 0
-  %nop2831 = alloca i1, i1 0
-  %nop2832 = alloca i1, i1 0
-  %nop2833 = alloca i1, i1 0
-  %nop2834 = alloca i1, i1 0
-  %nop2835 = alloca i1, i1 0
-  %nop2836 = alloca i1, i1 0
-  %nop2837 = alloca i1, i1 0
-  %nop2838 = alloca i1, i1 0
-  %nop2839 = alloca i1, i1 0
-  %nop2840 = alloca i1, i1 0
-  %nop2841 = alloca i1, i1 0
-  %nop2842 = alloca i1, i1 0
-  %nop2843 = alloca i1, i1 0
-  %nop2844 = alloca i1, i1 0
-  %nop2845 = alloca i1, i1 0
-  %nop2846 = alloca i1, i1 0
-  %nop2847 = alloca i1, i1 0
-  %nop2848 = alloca i1, i1 0
-  %nop2849 = alloca i1, i1 0
-  %nop2850 = alloca i1, i1 0
-  %nop2851 = alloca i1, i1 0
-  %nop2852 = alloca i1, i1 0
-  %nop2853 = alloca i1, i1 0
-  %nop2854 = alloca i1, i1 0
-  %nop2855 = alloca i1, i1 0
-  %nop2856 = alloca i1, i1 0
-  %nop2857 = alloca i1, i1 0
-  %nop2858 = alloca i1, i1 0
-  %nop2859 = alloca i1, i1 0
-  %nop2860 = alloca i1, i1 0
-  %nop2861 = alloca i1, i1 0
-  %nop2862 = alloca i1, i1 0
-  %nop2863 = alloca i1, i1 0
-  %nop2864 = alloca i1, i1 0
-  %nop2865 = alloca i1, i1 0
-  %nop2866 = alloca i1, i1 0
-  %nop2867 = alloca i1, i1 0
-  %nop2868 = alloca i1, i1 0
-  %nop2869 = alloca i1, i1 0
-  %nop2870 = alloca i1, i1 0
-  %nop2871 = alloca i1, i1 0
-  %nop2872 = alloca i1, i1 0
-  %nop2873 = alloca i1, i1 0
-  %nop2874 = alloca i1, i1 0
-  %nop2875 = alloca i1, i1 0
-  %nop2876 = alloca i1, i1 0
-  %nop2877 = alloca i1, i1 0
-  %nop2878 = alloca i1, i1 0
-  %nop2879 = alloca i1, i1 0
-  %nop2880 = alloca i1, i1 0
-  %nop2881 = alloca i1, i1 0
-  %nop2882 = alloca i1, i1 0
-  %nop2883 = alloca i1, i1 0
-  %nop2884 = alloca i1, i1 0
-  %nop2885 = alloca i1, i1 0
-  %nop2886 = alloca i1, i1 0
-  %nop2887 = alloca i1, i1 0
-  %nop2888 = alloca i1, i1 0
-  %nop2889 = alloca i1, i1 0
-  %nop2890 = alloca i1, i1 0
-  %nop2891 = alloca i1, i1 0
-  %nop2892 = alloca i1, i1 0
-  %nop2893 = alloca i1, i1 0
-  %nop2894 = alloca i1, i1 0
-  %nop2895 = alloca i1, i1 0
-  %nop2896 = alloca i1, i1 0
-  %nop2897 = alloca i1, i1 0
-  %nop2898 = alloca i1, i1 0
-  %nop2899 = alloca i1, i1 0
-  %nop2900 = alloca i1, i1 0
-  %nop2901 = alloca i1, i1 0
-  %nop2902 = alloca i1, i1 0
-  %nop2903 = alloca i1, i1 0
-  %nop2904 = alloca i1, i1 0
-  %nop2905 = alloca i1, i1 0
-  %nop2906 = alloca i1, i1 0
-  %nop2907 = alloca i1, i1 0
-  %nop2908 = alloca i1, i1 0
-  %nop2909 = alloca i1, i1 0
-  %nop2910 = alloca i1, i1 0
-  %nop2911 = alloca i1, i1 0
-  %nop2912 = alloca i1, i1 0
-  %nop2913 = alloca i1, i1 0
-  %nop2914 = alloca i1, i1 0
-  %nop2915 = alloca i1, i1 0
-  %nop2916 = alloca i1, i1 0
-  %nop2917 = alloca i1, i1 0
-  %nop2918 = alloca i1, i1 0
-  %nop2919 = alloca i1, i1 0
-  %nop2920 = alloca i1, i1 0
-  %nop2921 = alloca i1, i1 0
-  %nop2922 = alloca i1, i1 0
-  %nop2923 = alloca i1, i1 0
-  %nop2924 = alloca i1, i1 0
-  %nop2925 = alloca i1, i1 0
-  %nop2926 = alloca i1, i1 0
-  %nop2927 = alloca i1, i1 0
-  %nop2928 = alloca i1, i1 0
-  %nop2929 = alloca i1, i1 0
-  %nop2930 = alloca i1, i1 0
-  %nop2931 = alloca i1, i1 0
-  %nop2932 = alloca i1, i1 0
-  %nop2933 = alloca i1, i1 0
-  %nop2934 = alloca i1, i1 0
-  %nop2935 = alloca i1, i1 0
-  %nop2936 = alloca i1, i1 0
-  %nop2937 = alloca i1, i1 0
-  %nop2938 = alloca i1, i1 0
-  %nop2939 = alloca i1, i1 0
-  %nop2940 = alloca i1, i1 0
-  %nop2941 = alloca i1, i1 0
-  %nop2942 = alloca i1, i1 0
-  %nop2943 = alloca i1, i1 0
-  %nop2944 = alloca i1, i1 0
-  %nop2945 = alloca i1, i1 0
-  %nop2946 = alloca i1, i1 0
-  %nop2947 = alloca i1, i1 0
-  %nop2948 = alloca i1, i1 0
-  %nop2949 = alloca i1, i1 0
-  %nop2950 = alloca i1, i1 0
-  %nop2951 = alloca i1, i1 0
-  %nop2952 = alloca i1, i1 0
-  %nop2953 = alloca i1, i1 0
-  %nop2954 = alloca i1, i1 0
-  %nop2955 = alloca i1, i1 0
-  %nop2956 = alloca i1, i1 0
-  %nop2957 = alloca i1, i1 0
-  %nop2958 = alloca i1, i1 0
-  %nop2959 = alloca i1, i1 0
-  %nop2960 = alloca i1, i1 0
-  %nop2961 = alloca i1, i1 0
-  %nop2962 = alloca i1, i1 0
-  %nop2963 = alloca i1, i1 0
-  %nop2964 = alloca i1, i1 0
-  %nop2965 = alloca i1, i1 0
-  %nop2966 = alloca i1, i1 0
-  %nop2967 = alloca i1, i1 0
-  %nop2968 = alloca i1, i1 0
-  %nop2969 = alloca i1, i1 0
-  %nop2970 = alloca i1, i1 0
-  %nop2971 = alloca i1, i1 0
-  %nop2972 = alloca i1, i1 0
-  %nop2973 = alloca i1, i1 0
-  %nop2974 = alloca i1, i1 0
-  %nop2975 = alloca i1, i1 0
-  %nop2976 = alloca i1, i1 0
-  %nop2977 = alloca i1, i1 0
-  %nop2978 = alloca i1, i1 0
-  %nop2979 = alloca i1, i1 0
-  %nop2980 = alloca i1, i1 0
-  %nop2981 = alloca i1, i1 0
-  %nop2982 = alloca i1, i1 0
-  %nop2983 = alloca i1, i1 0
-  %nop2984 = alloca i1, i1 0
-  %nop2985 = alloca i1, i1 0
-  %nop2986 = alloca i1, i1 0
-  %nop2987 = alloca i1, i1 0
-  %nop2988 = alloca i1, i1 0
-  %nop2989 = alloca i1, i1 0
-  %nop2990 = alloca i1, i1 0
-  %nop2991 = alloca i1, i1 0
-  %nop2992 = alloca i1, i1 0
-  %nop2993 = alloca i1, i1 0
-  %nop2994 = alloca i1, i1 0
-  %nop2995 = alloca i1, i1 0
-  %nop2996 = alloca i1, i1 0
-  %nop2997 = alloca i1, i1 0
-  %nop2998 = alloca i1, i1 0
-  %nop2999 = alloca i1, i1 0
-  %nop3000 = alloca i1, i1 0
-  %nop3001 = alloca i1, i1 0
-  %nop3002 = alloca i1, i1 0
-  %nop3003 = alloca i1, i1 0
-  %nop3004 = alloca i1, i1 0
-  %nop3005 = alloca i1, i1 0
-  %nop3006 = alloca i1, i1 0
-  %nop3007 = alloca i1, i1 0
-  %nop3008 = alloca i1, i1 0
-  %nop3009 = alloca i1, i1 0
-  %nop3010 = alloca i1, i1 0
-  %nop3011 = alloca i1, i1 0
-  %nop3012 = alloca i1, i1 0
-  %nop3013 = alloca i1, i1 0
-  %nop3014 = alloca i1, i1 0
-  %nop3015 = alloca i1, i1 0
-  %nop3016 = alloca i1, i1 0
-  %nop3017 = alloca i1, i1 0
-  %nop3018 = alloca i1, i1 0
-  %nop3019 = alloca i1, i1 0
-  %nop3020 = alloca i1, i1 0
-  %nop3021 = alloca i1, i1 0
-  %nop3022 = alloca i1, i1 0
-  %nop3023 = alloca i1, i1 0
-  %nop3024 = alloca i1, i1 0
-  %nop3025 = alloca i1, i1 0
-  %nop3026 = alloca i1, i1 0
-  %nop3027 = alloca i1, i1 0
-  %nop3028 = alloca i1, i1 0
-  %nop3029 = alloca i1, i1 0
-  %nop3030 = alloca i1, i1 0
-  %nop3031 = alloca i1, i1 0
-  %nop3032 = alloca i1, i1 0
-  %nop3033 = alloca i1, i1 0
-  %nop3034 = alloca i1, i1 0
-  %nop3035 = alloca i1, i1 0
-  %nop3036 = alloca i1, i1 0
-  %nop3037 = alloca i1, i1 0
-  %nop3038 = alloca i1, i1 0
-  %nop3039 = alloca i1, i1 0
-  %nop3040 = alloca i1, i1 0
-  %nop3041 = alloca i1, i1 0
-  %nop3042 = alloca i1, i1 0
-  %nop3043 = alloca i1, i1 0
-  %nop3044 = alloca i1, i1 0
-  %nop3045 = alloca i1, i1 0
-  %nop3046 = alloca i1, i1 0
-  %nop3047 = alloca i1, i1 0
-  %nop3048 = alloca i1, i1 0
-  %nop3049 = alloca i1, i1 0
-  %nop3050 = alloca i1, i1 0
-  %nop3051 = alloca i1, i1 0
-  %nop3052 = alloca i1, i1 0
-  %nop3053 = alloca i1, i1 0
-  %nop3054 = alloca i1, i1 0
-  %nop3055 = alloca i1, i1 0
-  %nop3056 = alloca i1, i1 0
-  %nop3057 = alloca i1, i1 0
-  %nop3058 = alloca i1, i1 0
-  %nop3059 = alloca i1, i1 0
-  %nop3060 = alloca i1, i1 0
-  %nop3061 = alloca i1, i1 0
-  %nop3062 = alloca i1, i1 0
-  %nop3063 = alloca i1, i1 0
-  %nop3064 = alloca i1, i1 0
-  %nop3065 = alloca i1, i1 0
-  %nop3066 = alloca i1, i1 0
-  %nop3067 = alloca i1, i1 0
-  %nop3068 = alloca i1, i1 0
-  %nop3069 = alloca i1, i1 0
-  %nop3070 = alloca i1, i1 0
-  %nop3071 = alloca i1, i1 0
-  %nop3072 = alloca i1, i1 0
-  %nop3073 = alloca i1, i1 0
-  %nop3074 = alloca i1, i1 0
-  %nop3075 = alloca i1, i1 0
-  %nop3076 = alloca i1, i1 0
-  %nop3077 = alloca i1, i1 0
-  %nop3078 = alloca i1, i1 0
-  %nop3079 = alloca i1, i1 0
-  %nop3080 = alloca i1, i1 0
-  %nop3081 = alloca i1, i1 0
-  %nop3082 = alloca i1, i1 0
-  %nop3083 = alloca i1, i1 0
-  %nop3084 = alloca i1, i1 0
-  %nop3085 = alloca i1, i1 0
-  %nop3086 = alloca i1, i1 0
-  %nop3087 = alloca i1, i1 0
-  %nop3088 = alloca i1, i1 0
-  %nop3089 = alloca i1, i1 0
-  %nop3090 = alloca i1, i1 0
-  %nop3091 = alloca i1, i1 0
-  %nop3092 = alloca i1, i1 0
-  %nop3093 = alloca i1, i1 0
-  %nop3094 = alloca i1, i1 0
-  %nop3095 = alloca i1, i1 0
-  %nop3096 = alloca i1, i1 0
-  %nop3097 = alloca i1, i1 0
-  %nop3098 = alloca i1, i1 0
-  %nop3099 = alloca i1, i1 0
-  %nop3100 = alloca i1, i1 0
-  %nop3101 = alloca i1, i1 0
-  %nop3102 = alloca i1, i1 0
-  %nop3103 = alloca i1, i1 0
-  %nop3104 = alloca i1, i1 0
-  %nop3105 = alloca i1, i1 0
-  %nop3106 = alloca i1, i1 0
-  %nop3107 = alloca i1, i1 0
-  %nop3108 = alloca i1, i1 0
-  %nop3109 = alloca i1, i1 0
-  %nop3110 = alloca i1, i1 0
-  %nop3111 = alloca i1, i1 0
-  %nop3112 = alloca i1, i1 0
-  %nop3113 = alloca i1, i1 0
-  %nop3114 = alloca i1, i1 0
-  %nop3115 = alloca i1, i1 0
-  %nop3116 = alloca i1, i1 0
-  %nop3117 = alloca i1, i1 0
-  %nop3118 = alloca i1, i1 0
-  %nop3119 = alloca i1, i1 0
-  %nop3120 = alloca i1, i1 0
-  %nop3121 = alloca i1, i1 0
-  %nop3122 = alloca i1, i1 0
-  %nop3123 = alloca i1, i1 0
-  %nop3124 = alloca i1, i1 0
-  %nop3125 = alloca i1, i1 0
-  %nop3126 = alloca i1, i1 0
-  %nop3127 = alloca i1, i1 0
-  %nop3128 = alloca i1, i1 0
-  %nop3129 = alloca i1, i1 0
-  %nop3130 = alloca i1, i1 0
-  %nop3131 = alloca i1, i1 0
-  %nop3132 = alloca i1, i1 0
-  %nop3133 = alloca i1, i1 0
-  %nop3134 = alloca i1, i1 0
-  %nop3135 = alloca i1, i1 0
-  %nop3136 = alloca i1, i1 0
-  %nop3137 = alloca i1, i1 0
-  %nop3138 = alloca i1, i1 0
-  %nop3139 = alloca i1, i1 0
-  %nop3140 = alloca i1, i1 0
-  %nop3141 = alloca i1, i1 0
-  %nop3142 = alloca i1, i1 0
-  %nop3143 = alloca i1, i1 0
-  %nop3144 = alloca i1, i1 0
-  %nop3145 = alloca i1, i1 0
-  %nop3146 = alloca i1, i1 0
-  %nop3147 = alloca i1, i1 0
-  %nop3148 = alloca i1, i1 0
-  %nop3149 = alloca i1, i1 0
-  %nop3150 = alloca i1, i1 0
-  %nop3151 = alloca i1, i1 0
-  %nop3152 = alloca i1, i1 0
-  %nop3153 = alloca i1, i1 0
-  %nop3154 = alloca i1, i1 0
-  %nop3155 = alloca i1, i1 0
-  %nop3156 = alloca i1, i1 0
-  %nop3157 = alloca i1, i1 0
-  %nop3158 = alloca i1, i1 0
-  %nop3159 = alloca i1, i1 0
-  %nop3160 = alloca i1, i1 0
-  %nop3161 = alloca i1, i1 0
-  %nop3162 = alloca i1, i1 0
-  %nop3163 = alloca i1, i1 0
-  %nop3164 = alloca i1, i1 0
-  %nop3165 = alloca i1, i1 0
-  %nop3166 = alloca i1, i1 0
-  %nop3167 = alloca i1, i1 0
-  %nop3168 = alloca i1, i1 0
-  %nop3169 = alloca i1, i1 0
-  %nop3170 = alloca i1, i1 0
-  %nop3171 = alloca i1, i1 0
-  %nop3172 = alloca i1, i1 0
-  %nop3173 = alloca i1, i1 0
-  %nop3174 = alloca i1, i1 0
-  %nop3175 = alloca i1, i1 0
-  %nop3176 = alloca i1, i1 0
-  %nop3177 = alloca i1, i1 0
-  %nop3178 = alloca i1, i1 0
-  %nop3179 = alloca i1, i1 0
-  %nop3180 = alloca i1, i1 0
-  %nop3181 = alloca i1, i1 0
-  %nop3182 = alloca i1, i1 0
-  %nop3183 = alloca i1, i1 0
-  %nop3184 = alloca i1, i1 0
-  %nop3185 = alloca i1, i1 0
-  %nop3186 = alloca i1, i1 0
-  %nop3187 = alloca i1, i1 0
-  %nop3188 = alloca i1, i1 0
-  %nop3189 = alloca i1, i1 0
-  %nop3190 = alloca i1, i1 0
-  %nop3191 = alloca i1, i1 0
-  %nop3192 = alloca i1, i1 0
-  %nop3193 = alloca i1, i1 0
-  %nop3194 = alloca i1, i1 0
-  %nop3195 = alloca i1, i1 0
-  %nop3196 = alloca i1, i1 0
-  %nop3197 = alloca i1, i1 0
-  %nop3198 = alloca i1, i1 0
-  %nop3199 = alloca i1, i1 0
-  %nop3200 = alloca i1, i1 0
-  %nop3201 = alloca i1, i1 0
-  %nop3202 = alloca i1, i1 0
-  %nop3203 = alloca i1, i1 0
-  %nop3204 = alloca i1, i1 0
-  %nop3205 = alloca i1, i1 0
-  %nop3206 = alloca i1, i1 0
-  %nop3207 = alloca i1, i1 0
-  %nop3208 = alloca i1, i1 0
-  %nop3209 = alloca i1, i1 0
-  %nop3210 = alloca i1, i1 0
-  %nop3211 = alloca i1, i1 0
-  %nop3212 = alloca i1, i1 0
-  %nop3213 = alloca i1, i1 0
-  %nop3214 = alloca i1, i1 0
-  %nop3215 = alloca i1, i1 0
-  %nop3216 = alloca i1, i1 0
-  %nop3217 = alloca i1, i1 0
-  %nop3218 = alloca i1, i1 0
-  %nop3219 = alloca i1, i1 0
-  %nop3220 = alloca i1, i1 0
-  %nop3221 = alloca i1, i1 0
-  %nop3222 = alloca i1, i1 0
-  %nop3223 = alloca i1, i1 0
-  %nop3224 = alloca i1, i1 0
-  %nop3225 = alloca i1, i1 0
-  %nop3226 = alloca i1, i1 0
-  %nop3227 = alloca i1, i1 0
-  %nop3228 = alloca i1, i1 0
-  %nop3229 = alloca i1, i1 0
-  %nop3230 = alloca i1, i1 0
-  %nop3231 = alloca i1, i1 0
-  %nop3232 = alloca i1, i1 0
-  %nop3233 = alloca i1, i1 0
-  %nop3234 = alloca i1, i1 0
-  %nop3235 = alloca i1, i1 0
-  %nop3236 = alloca i1, i1 0
-  %nop3237 = alloca i1, i1 0
-  %nop3238 = alloca i1, i1 0
-  %nop3239 = alloca i1, i1 0
-  %nop3240 = alloca i1, i1 0
-  %nop3241 = alloca i1, i1 0
-  %nop3242 = alloca i1, i1 0
-  %nop3243 = alloca i1, i1 0
-  %nop3244 = alloca i1, i1 0
-  %nop3245 = alloca i1, i1 0
-  %nop3246 = alloca i1, i1 0
-  %nop3247 = alloca i1, i1 0
-  %nop3248 = alloca i1, i1 0
-  %nop3249 = alloca i1, i1 0
-  %nop3250 = alloca i1, i1 0
-  %nop3251 = alloca i1, i1 0
-  %nop3252 = alloca i1, i1 0
-  %nop3253 = alloca i1, i1 0
-  %nop3254 = alloca i1, i1 0
-  %nop3255 = alloca i1, i1 0
-  %nop3256 = alloca i1, i1 0
-  %nop3257 = alloca i1, i1 0
-  %nop3258 = alloca i1, i1 0
-  %nop3259 = alloca i1, i1 0
-  %nop3260 = alloca i1, i1 0
-  %nop3261 = alloca i1, i1 0
-  %nop3262 = alloca i1, i1 0
-  %nop3263 = alloca i1, i1 0
-  %nop3264 = alloca i1, i1 0
-  %nop3265 = alloca i1, i1 0
-  %nop3266 = alloca i1, i1 0
-  %nop3267 = alloca i1, i1 0
-  %nop3268 = alloca i1, i1 0
-  %nop3269 = alloca i1, i1 0
-  %nop3270 = alloca i1, i1 0
-  %nop3271 = alloca i1, i1 0
-  %nop3272 = alloca i1, i1 0
-  %nop3273 = alloca i1, i1 0
-  %nop3274 = alloca i1, i1 0
-  %nop3275 = alloca i1, i1 0
-  %nop3276 = alloca i1, i1 0
-  %nop3277 = alloca i1, i1 0
-  %nop3278 = alloca i1, i1 0
-  %nop3279 = alloca i1, i1 0
-  %nop3280 = alloca i1, i1 0
-  %nop3281 = alloca i1, i1 0
-  %nop3282 = alloca i1, i1 0
-  %nop3283 = alloca i1, i1 0
-  %nop3284 = alloca i1, i1 0
-  %nop3285 = alloca i1, i1 0
-  %nop3286 = alloca i1, i1 0
-  %nop3287 = alloca i1, i1 0
-  %nop3288 = alloca i1, i1 0
-  %nop3289 = alloca i1, i1 0
-  %nop3290 = alloca i1, i1 0
-  %nop3291 = alloca i1, i1 0
-  %nop3292 = alloca i1, i1 0
-  %nop3293 = alloca i1, i1 0
-  %nop3294 = alloca i1, i1 0
-  %nop3295 = alloca i1, i1 0
-  %nop3296 = alloca i1, i1 0
-  %nop3297 = alloca i1, i1 0
-  %nop3298 = alloca i1, i1 0
-  %nop3299 = alloca i1, i1 0
-  %nop3300 = alloca i1, i1 0
-  %nop3301 = alloca i1, i1 0
-  %nop3302 = alloca i1, i1 0
-  %nop3303 = alloca i1, i1 0
-  %nop3304 = alloca i1, i1 0
-  %nop3305 = alloca i1, i1 0
-  %nop3306 = alloca i1, i1 0
-  %nop3307 = alloca i1, i1 0
-  %nop3308 = alloca i1, i1 0
-  %nop3309 = alloca i1, i1 0
-  %nop3310 = alloca i1, i1 0
-  %nop3311 = alloca i1, i1 0
-  %nop3312 = alloca i1, i1 0
-  %nop3313 = alloca i1, i1 0
-  %nop3314 = alloca i1, i1 0
-  %nop3315 = alloca i1, i1 0
-  %nop3316 = alloca i1, i1 0
-  %nop3317 = alloca i1, i1 0
-  %nop3318 = alloca i1, i1 0
-  %nop3319 = alloca i1, i1 0
-  %nop3320 = alloca i1, i1 0
-  %nop3321 = alloca i1, i1 0
-  %nop3322 = alloca i1, i1 0
-  %nop3323 = alloca i1, i1 0
-  %nop3324 = alloca i1, i1 0
-  %nop3325 = alloca i1, i1 0
-  %nop3326 = alloca i1, i1 0
-  %nop3327 = alloca i1, i1 0
-  %nop3328 = alloca i1, i1 0
-  %nop3329 = alloca i1, i1 0
-  %nop3330 = alloca i1, i1 0
-  %nop3331 = alloca i1, i1 0
-  %nop3332 = alloca i1, i1 0
-  %nop3333 = alloca i1, i1 0
-  %nop3334 = alloca i1, i1 0
-  %nop3335 = alloca i1, i1 0
-  %nop3336 = alloca i1, i1 0
-  %nop3337 = alloca i1, i1 0
-  %nop3338 = alloca i1, i1 0
-  %nop3339 = alloca i1, i1 0
-  %nop3340 = alloca i1, i1 0
-  %nop3341 = alloca i1, i1 0
-  %nop3342 = alloca i1, i1 0
-  %nop3343 = alloca i1, i1 0
-  %nop3344 = alloca i1, i1 0
-  %nop3345 = alloca i1, i1 0
-  %nop3346 = alloca i1, i1 0
-  %nop3347 = alloca i1, i1 0
-  %nop3348 = alloca i1, i1 0
-  %nop3349 = alloca i1, i1 0
-  %nop3350 = alloca i1, i1 0
-  %nop3351 = alloca i1, i1 0
-  %nop3352 = alloca i1, i1 0
-  %nop3353 = alloca i1, i1 0
-  %nop3354 = alloca i1, i1 0
-  %nop3355 = alloca i1, i1 0
-  %nop3356 = alloca i1, i1 0
-  %nop3357 = alloca i1, i1 0
-  %nop3358 = alloca i1, i1 0
-  %nop3359 = alloca i1, i1 0
-  %nop3360 = alloca i1, i1 0
-  %nop3361 = alloca i1, i1 0
-  %nop3362 = alloca i1, i1 0
-  %nop3363 = alloca i1, i1 0
-  %nop3364 = alloca i1, i1 0
-  %nop3365 = alloca i1, i1 0
-  %nop3366 = alloca i1, i1 0
-  %nop3367 = alloca i1, i1 0
-  %nop3368 = alloca i1, i1 0
-  %nop3369 = alloca i1, i1 0
-  %nop3370 = alloca i1, i1 0
-  %nop3371 = alloca i1, i1 0
-  %nop3372 = alloca i1, i1 0
-  %nop3373 = alloca i1, i1 0
-  %nop3374 = alloca i1, i1 0
-  %nop3375 = alloca i1, i1 0
-  %nop3376 = alloca i1, i1 0
-  %nop3377 = alloca i1, i1 0
-  %nop3378 = alloca i1, i1 0
-  %nop3379 = alloca i1, i1 0
-  %nop3380 = alloca i1, i1 0
-  %nop3381 = alloca i1, i1 0
-  %nop3382 = alloca i1, i1 0
-  %nop3383 = alloca i1, i1 0
-  %nop3384 = alloca i1, i1 0
-  %nop3385 = alloca i1, i1 0
-  %nop3386 = alloca i1, i1 0
-  %nop3387 = alloca i1, i1 0
-  %nop3388 = alloca i1, i1 0
-  %nop3389 = alloca i1, i1 0
-  %nop3390 = alloca i1, i1 0
-  %nop3391 = alloca i1, i1 0
-  %nop3392 = alloca i1, i1 0
-  %nop3393 = alloca i1, i1 0
-  %nop3394 = alloca i1, i1 0
-  %nop3395 = alloca i1, i1 0
-  %nop3396 = alloca i1, i1 0
-  %nop3397 = alloca i1, i1 0
-  %nop3398 = alloca i1, i1 0
-  %nop3399 = alloca i1, i1 0
-  %nop3400 = alloca i1, i1 0
-  %nop3401 = alloca i1, i1 0
-  %nop3402 = alloca i1, i1 0
-  %nop3403 = alloca i1, i1 0
-  %nop3404 = alloca i1, i1 0
-  %nop3405 = alloca i1, i1 0
-  %nop3406 = alloca i1, i1 0
-  %nop3407 = alloca i1, i1 0
-  %nop3408 = alloca i1, i1 0
-  %nop3409 = alloca i1, i1 0
-  %nop3410 = alloca i1, i1 0
-  %nop3411 = alloca i1, i1 0
-  %nop3412 = alloca i1, i1 0
-  %nop3413 = alloca i1, i1 0
-  %nop3414 = alloca i1, i1 0
-  %nop3415 = alloca i1, i1 0
-  %nop3416 = alloca i1, i1 0
-  %nop3417 = alloca i1, i1 0
-  %nop3418 = alloca i1, i1 0
-  %nop3419 = alloca i1, i1 0
-  %nop3420 = alloca i1, i1 0
-  %nop3421 = alloca i1, i1 0
-  %nop3422 = alloca i1, i1 0
-  %nop3423 = alloca i1, i1 0
-  %nop3424 = alloca i1, i1 0
-  %nop3425 = alloca i1, i1 0
-  %nop3426 = alloca i1, i1 0
-  %nop3427 = alloca i1, i1 0
-  %nop3428 = alloca i1, i1 0
-  %nop3429 = alloca i1, i1 0
-  %nop3430 = alloca i1, i1 0
-  %nop3431 = alloca i1, i1 0
-  %nop3432 = alloca i1, i1 0
-  %nop3433 = alloca i1, i1 0
-  %nop3434 = alloca i1, i1 0
-  %nop3435 = alloca i1, i1 0
-  %nop3436 = alloca i1, i1 0
-  %nop3437 = alloca i1, i1 0
-  %nop3438 = alloca i1, i1 0
-  %nop3439 = alloca i1, i1 0
-  %nop3440 = alloca i1, i1 0
-  %nop3441 = alloca i1, i1 0
-  %nop3442 = alloca i1, i1 0
-  %nop3443 = alloca i1, i1 0
-  %nop3444 = alloca i1, i1 0
-  %nop3445 = alloca i1, i1 0
-  %nop3446 = alloca i1, i1 0
-  %nop3447 = alloca i1, i1 0
-  %nop3448 = alloca i1, i1 0
-  %nop3449 = alloca i1, i1 0
-  %nop3450 = alloca i1, i1 0
-  %nop3451 = alloca i1, i1 0
-  %nop3452 = alloca i1, i1 0
-  %nop3453 = alloca i1, i1 0
-  %nop3454 = alloca i1, i1 0
-  %nop3455 = alloca i1, i1 0
-  %nop3456 = alloca i1, i1 0
-  %nop3457 = alloca i1, i1 0
-  %nop3458 = alloca i1, i1 0
-  %nop3459 = alloca i1, i1 0
-  %nop3460 = alloca i1, i1 0
-  %nop3461 = alloca i1, i1 0
-  %nop3462 = alloca i1, i1 0
-  %nop3463 = alloca i1, i1 0
-  %nop3464 = alloca i1, i1 0
-  %nop3465 = alloca i1, i1 0
-  %nop3466 = alloca i1, i1 0
-  %nop3467 = alloca i1, i1 0
-  %nop3468 = alloca i1, i1 0
-  %nop3469 = alloca i1, i1 0
-  %nop3470 = alloca i1, i1 0
-  %nop3471 = alloca i1, i1 0
-  %nop3472 = alloca i1, i1 0
-  %nop3473 = alloca i1, i1 0
-  %nop3474 = alloca i1, i1 0
-  %nop3475 = alloca i1, i1 0
-  %nop3476 = alloca i1, i1 0
-  %nop3477 = alloca i1, i1 0
-  %nop3478 = alloca i1, i1 0
-  %nop3479 = alloca i1, i1 0
-  %nop3480 = alloca i1, i1 0
-  %nop3481 = alloca i1, i1 0
-  %nop3482 = alloca i1, i1 0
-  %nop3483 = alloca i1, i1 0
-  %nop3484 = alloca i1, i1 0
-  %nop3485 = alloca i1, i1 0
-  %nop3486 = alloca i1, i1 0
-  %nop3487 = alloca i1, i1 0
-  %nop3488 = alloca i1, i1 0
-  %nop3489 = alloca i1, i1 0
-  %nop3490 = alloca i1, i1 0
-  %nop3491 = alloca i1, i1 0
-  %nop3492 = alloca i1, i1 0
-  %nop3493 = alloca i1, i1 0
-  %nop3494 = alloca i1, i1 0
-  %nop3495 = alloca i1, i1 0
-  %nop3496 = alloca i1, i1 0
-  %nop3497 = alloca i1, i1 0
-  %nop3498 = alloca i1, i1 0
-  %nop3499 = alloca i1, i1 0
-  %nop3500 = alloca i1, i1 0
-  %nop3501 = alloca i1, i1 0
-  %nop3502 = alloca i1, i1 0
-  %nop3503 = alloca i1, i1 0
-  %nop3504 = alloca i1, i1 0
-  %nop3505 = alloca i1, i1 0
-  %nop3506 = alloca i1, i1 0
-  %nop3507 = alloca i1, i1 0
-  %nop3508 = alloca i1, i1 0
-  %nop3509 = alloca i1, i1 0
-  %nop3510 = alloca i1, i1 0
-  %nop3511 = alloca i1, i1 0
-  %nop3512 = alloca i1, i1 0
-  %nop3513 = alloca i1, i1 0
-  %nop3514 = alloca i1, i1 0
-  %nop3515 = alloca i1, i1 0
-  %nop3516 = alloca i1, i1 0
-  %nop3517 = alloca i1, i1 0
-  %nop3518 = alloca i1, i1 0
-  %nop3519 = alloca i1, i1 0
-  %nop3520 = alloca i1, i1 0
-  %nop3521 = alloca i1, i1 0
-  %nop3522 = alloca i1, i1 0
-  %nop3523 = alloca i1, i1 0
-  %nop3524 = alloca i1, i1 0
-  %nop3525 = alloca i1, i1 0
-  %nop3526 = alloca i1, i1 0
-  %nop3527 = alloca i1, i1 0
-  %nop3528 = alloca i1, i1 0
-  %nop3529 = alloca i1, i1 0
-  %nop3530 = alloca i1, i1 0
-  %nop3531 = alloca i1, i1 0
-  %nop3532 = alloca i1, i1 0
-  %nop3533 = alloca i1, i1 0
-  %nop3534 = alloca i1, i1 0
-  %nop3535 = alloca i1, i1 0
-  %nop3536 = alloca i1, i1 0
-  %nop3537 = alloca i1, i1 0
-  %nop3538 = alloca i1, i1 0
-  %nop3539 = alloca i1, i1 0
-  %nop3540 = alloca i1, i1 0
-  %nop3541 = alloca i1, i1 0
-  %nop3542 = alloca i1, i1 0
-  %nop3543 = alloca i1, i1 0
-  %nop3544 = alloca i1, i1 0
-  %nop3545 = alloca i1, i1 0
-  %nop3546 = alloca i1, i1 0
-  %nop3547 = alloca i1, i1 0
-  %nop3548 = alloca i1, i1 0
-  %nop3549 = alloca i1, i1 0
-  %nop3550 = alloca i1, i1 0
-  %nop3551 = alloca i1, i1 0
-  %nop3552 = alloca i1, i1 0
-  %nop3553 = alloca i1, i1 0
-  %nop3554 = alloca i1, i1 0
-  %nop3555 = alloca i1, i1 0
-  %nop3556 = alloca i1, i1 0
-  %nop3557 = alloca i1, i1 0
-  %nop3558 = alloca i1, i1 0
-  %nop3559 = alloca i1, i1 0
-  %nop3560 = alloca i1, i1 0
-  %nop3561 = alloca i1, i1 0
-  %nop3562 = alloca i1, i1 0
-  %nop3563 = alloca i1, i1 0
-  %nop3564 = alloca i1, i1 0
-  %nop3565 = alloca i1, i1 0
-  %nop3566 = alloca i1, i1 0
-  %nop3567 = alloca i1, i1 0
-  %nop3568 = alloca i1, i1 0
-  %nop3569 = alloca i1, i1 0
-  %nop3570 = alloca i1, i1 0
-  %nop3571 = alloca i1, i1 0
-  %nop3572 = alloca i1, i1 0
-  %nop3573 = alloca i1, i1 0
-  %nop3574 = alloca i1, i1 0
-  %nop3575 = alloca i1, i1 0
-  %nop3576 = alloca i1, i1 0
-  %nop3577 = alloca i1, i1 0
-  %nop3578 = alloca i1, i1 0
-  %nop3579 = alloca i1, i1 0
-  %nop3580 = alloca i1, i1 0
-  %nop3581 = alloca i1, i1 0
-  %nop3582 = alloca i1, i1 0
-  %nop3583 = alloca i1, i1 0
-  %nop3584 = alloca i1, i1 0
-  %nop3585 = alloca i1, i1 0
-  %nop3586 = alloca i1, i1 0
-  %nop3587 = alloca i1, i1 0
-  %nop3588 = alloca i1, i1 0
-  %nop3589 = alloca i1, i1 0
-  %nop3590 = alloca i1, i1 0
-  %nop3591 = alloca i1, i1 0
-  %nop3592 = alloca i1, i1 0
-  %nop3593 = alloca i1, i1 0
-  %nop3594 = alloca i1, i1 0
-  %nop3595 = alloca i1, i1 0
-  %nop3596 = alloca i1, i1 0
-  %nop3597 = alloca i1, i1 0
-  %nop3598 = alloca i1, i1 0
-  %nop3599 = alloca i1, i1 0
-  %nop3600 = alloca i1, i1 0
-  %nop3601 = alloca i1, i1 0
-  %nop3602 = alloca i1, i1 0
-  %nop3603 = alloca i1, i1 0
-  %nop3604 = alloca i1, i1 0
-  %nop3605 = alloca i1, i1 0
-  %nop3606 = alloca i1, i1 0
-  %nop3607 = alloca i1, i1 0
-  %nop3608 = alloca i1, i1 0
-  %nop3609 = alloca i1, i1 0
-  %nop3610 = alloca i1, i1 0
-  %nop3611 = alloca i1, i1 0
-  %nop3612 = alloca i1, i1 0
-  %nop3613 = alloca i1, i1 0
-  %nop3614 = alloca i1, i1 0
-  %nop3615 = alloca i1, i1 0
-  %nop3616 = alloca i1, i1 0
-  %nop3617 = alloca i1, i1 0
-  %nop3618 = alloca i1, i1 0
-  %nop3619 = alloca i1, i1 0
-  %nop3620 = alloca i1, i1 0
-  %nop3621 = alloca i1, i1 0
-  %nop3622 = alloca i1, i1 0
-  %nop3623 = alloca i1, i1 0
-  %nop3624 = alloca i1, i1 0
-  %nop3625 = alloca i1, i1 0
-  %nop3626 = alloca i1, i1 0
-  %nop3627 = alloca i1, i1 0
-  %nop3628 = alloca i1, i1 0
-  %nop3629 = alloca i1, i1 0
-  %nop3630 = alloca i1, i1 0
-  %nop3631 = alloca i1, i1 0
-  %nop3632 = alloca i1, i1 0
-  %nop3633 = alloca i1, i1 0
-  %nop3634 = alloca i1, i1 0
-  %nop3635 = alloca i1, i1 0
-  %nop3636 = alloca i1, i1 0
-  %nop3637 = alloca i1, i1 0
-  %nop3638 = alloca i1, i1 0
-  %nop3639 = alloca i1, i1 0
-  %nop3640 = alloca i1, i1 0
-  %nop3641 = alloca i1, i1 0
-  %nop3642 = alloca i1, i1 0
-  %nop3643 = alloca i1, i1 0
-  %nop3644 = alloca i1, i1 0
-  %nop3645 = alloca i1, i1 0
-  %nop3646 = alloca i1, i1 0
-  %nop3647 = alloca i1, i1 0
-  %nop3648 = alloca i1, i1 0
-  %nop3649 = alloca i1, i1 0
-  %nop3650 = alloca i1, i1 0
-  %nop3651 = alloca i1, i1 0
-  %nop3652 = alloca i1, i1 0
-  %nop3653 = alloca i1, i1 0
-  %nop3654 = alloca i1, i1 0
-  %nop3655 = alloca i1, i1 0
-  %nop3656 = alloca i1, i1 0
-  %nop3657 = alloca i1, i1 0
-  %nop3658 = alloca i1, i1 0
-  %nop3659 = alloca i1, i1 0
-  %nop3660 = alloca i1, i1 0
-  %nop3661 = alloca i1, i1 0
-  %nop3662 = alloca i1, i1 0
-  %nop3663 = alloca i1, i1 0
-  %nop3664 = alloca i1, i1 0
-  %nop3665 = alloca i1, i1 0
-  %nop3666 = alloca i1, i1 0
-  %nop3667 = alloca i1, i1 0
-  %nop3668 = alloca i1, i1 0
-  %nop3669 = alloca i1, i1 0
-  %nop3670 = alloca i1, i1 0
-  %nop3671 = alloca i1, i1 0
-  %nop3672 = alloca i1, i1 0
-  %nop3673 = alloca i1, i1 0
-  %nop3674 = alloca i1, i1 0
-  %nop3675 = alloca i1, i1 0
-  %nop3676 = alloca i1, i1 0
-  %nop3677 = alloca i1, i1 0
-  %nop3678 = alloca i1, i1 0
-  %nop3679 = alloca i1, i1 0
-  %nop3680 = alloca i1, i1 0
-  %nop3681 = alloca i1, i1 0
-  %nop3682 = alloca i1, i1 0
-  %nop3683 = alloca i1, i1 0
-  %nop3684 = alloca i1, i1 0
-  %nop3685 = alloca i1, i1 0
-  %nop3686 = alloca i1, i1 0
-  %nop3687 = alloca i1, i1 0
-  %nop3688 = alloca i1, i1 0
-  %nop3689 = alloca i1, i1 0
-  %nop3690 = alloca i1, i1 0
-  %nop3691 = alloca i1, i1 0
-  %nop3692 = alloca i1, i1 0
-  %nop3693 = alloca i1, i1 0
-  %nop3694 = alloca i1, i1 0
-  %nop3695 = alloca i1, i1 0
-  %nop3696 = alloca i1, i1 0
-  %nop3697 = alloca i1, i1 0
-  %nop3698 = alloca i1, i1 0
-  %nop3699 = alloca i1, i1 0
-  %nop3700 = alloca i1, i1 0
-  %nop3701 = alloca i1, i1 0
-  %nop3702 = alloca i1, i1 0
-  %nop3703 = alloca i1, i1 0
-  %nop3704 = alloca i1, i1 0
-  %nop3705 = alloca i1, i1 0
-  %nop3706 = alloca i1, i1 0
-  %nop3707 = alloca i1, i1 0
-  %nop3708 = alloca i1, i1 0
-  %nop3709 = alloca i1, i1 0
-  %nop3710 = alloca i1, i1 0
-  %nop3711 = alloca i1, i1 0
-  %nop3712 = alloca i1, i1 0
-  %nop3713 = alloca i1, i1 0
-  %nop3714 = alloca i1, i1 0
-  %nop3715 = alloca i1, i1 0
-  %nop3716 = alloca i1, i1 0
-  %nop3717 = alloca i1, i1 0
-  %nop3718 = alloca i1, i1 0
-  %nop3719 = alloca i1, i1 0
-  %nop3720 = alloca i1, i1 0
-  %nop3721 = alloca i1, i1 0
-  %nop3722 = alloca i1, i1 0
-  %nop3723 = alloca i1, i1 0
-  %nop3724 = alloca i1, i1 0
-  %nop3725 = alloca i1, i1 0
-  %nop3726 = alloca i1, i1 0
-  %nop3727 = alloca i1, i1 0
-  %nop3728 = alloca i1, i1 0
-  %nop3729 = alloca i1, i1 0
-  %nop3730 = alloca i1, i1 0
-  %nop3731 = alloca i1, i1 0
-  %nop3732 = alloca i1, i1 0
-  %nop3733 = alloca i1, i1 0
-  %nop3734 = alloca i1, i1 0
-  %nop3735 = alloca i1, i1 0
-  %nop3736 = alloca i1, i1 0
-  %nop3737 = alloca i1, i1 0
-  %nop3738 = alloca i1, i1 0
-  %nop3739 = alloca i1, i1 0
-  %nop3740 = alloca i1, i1 0
-  %nop3741 = alloca i1, i1 0
-  %nop3742 = alloca i1, i1 0
-  %nop3743 = alloca i1, i1 0
-  %nop3744 = alloca i1, i1 0
-  %nop3745 = alloca i1, i1 0
-  %nop3746 = alloca i1, i1 0
-  %nop3747 = alloca i1, i1 0
-  %nop3748 = alloca i1, i1 0
-  %nop3749 = alloca i1, i1 0
-  %nop3750 = alloca i1, i1 0
-  %nop3751 = alloca i1, i1 0
-  %nop3752 = alloca i1, i1 0
-  %nop3753 = alloca i1, i1 0
-  %nop3754 = alloca i1, i1 0
-  %nop3755 = alloca i1, i1 0
-  %nop3756 = alloca i1, i1 0
-  %nop3757 = alloca i1, i1 0
-  %nop3758 = alloca i1, i1 0
-  %nop3759 = alloca i1, i1 0
-  %nop3760 = alloca i1, i1 0
-  %nop3761 = alloca i1, i1 0
-  %nop3762 = alloca i1, i1 0
-  %nop3763 = alloca i1, i1 0
-  %nop3764 = alloca i1, i1 0
-  %nop3765 = alloca i1, i1 0
-  %nop3766 = alloca i1, i1 0
-  %nop3767 = alloca i1, i1 0
-  %nop3768 = alloca i1, i1 0
-  %nop3769 = alloca i1, i1 0
-  %nop3770 = alloca i1, i1 0
-  %nop3771 = alloca i1, i1 0
-  %nop3772 = alloca i1, i1 0
-  %nop3773 = alloca i1, i1 0
-  %nop3774 = alloca i1, i1 0
-  %nop3775 = alloca i1, i1 0
-  %nop3776 = alloca i1, i1 0
-  %nop3777 = alloca i1, i1 0
-  %nop3778 = alloca i1, i1 0
-  %nop3779 = alloca i1, i1 0
-  %nop3780 = alloca i1, i1 0
-  %nop3781 = alloca i1, i1 0
-  %nop3782 = alloca i1, i1 0
-  %nop3783 = alloca i1, i1 0
-  %nop3784 = alloca i1, i1 0
-  %nop3785 = alloca i1, i1 0
-  %nop3786 = alloca i1, i1 0
-  %nop3787 = alloca i1, i1 0
-  %nop3788 = alloca i1, i1 0
-  %nop3789 = alloca i1, i1 0
-  %nop3790 = alloca i1, i1 0
-  %nop3791 = alloca i1, i1 0
-  %nop3792 = alloca i1, i1 0
-  %nop3793 = alloca i1, i1 0
-  %nop3794 = alloca i1, i1 0
-  %nop3795 = alloca i1, i1 0
-  %nop3796 = alloca i1, i1 0
-  %nop3797 = alloca i1, i1 0
-  %nop3798 = alloca i1, i1 0
-  %nop3799 = alloca i1, i1 0
-  %nop3800 = alloca i1, i1 0
-  %nop3801 = alloca i1, i1 0
-  %nop3802 = alloca i1, i1 0
-  %nop3803 = alloca i1, i1 0
-  %nop3804 = alloca i1, i1 0
-  %nop3805 = alloca i1, i1 0
-  %nop3806 = alloca i1, i1 0
-  %nop3807 = alloca i1, i1 0
-  %nop3808 = alloca i1, i1 0
-  %nop3809 = alloca i1, i1 0
-  %nop3810 = alloca i1, i1 0
-  %nop3811 = alloca i1, i1 0
-  %nop3812 = alloca i1, i1 0
-  %nop3813 = alloca i1, i1 0
-  %nop3814 = alloca i1, i1 0
-  %nop3815 = alloca i1, i1 0
-  %nop3816 = alloca i1, i1 0
-  %nop3817 = alloca i1, i1 0
-  %nop3818 = alloca i1, i1 0
-  %nop3819 = alloca i1, i1 0
-  %nop3820 = alloca i1, i1 0
-  %nop3821 = alloca i1, i1 0
-  %nop3822 = alloca i1, i1 0
-  %nop3823 = alloca i1, i1 0
-  %nop3824 = alloca i1, i1 0
-  %nop3825 = alloca i1, i1 0
-  %nop3826 = alloca i1, i1 0
-  %nop3827 = alloca i1, i1 0
-  %nop3828 = alloca i1, i1 0
-  %nop3829 = alloca i1, i1 0
-  %nop3830 = alloca i1, i1 0
-  %nop3831 = alloca i1, i1 0
-  %nop3832 = alloca i1, i1 0
-  %nop3833 = alloca i1, i1 0
-  %nop3834 = alloca i1, i1 0
-  %nop3835 = alloca i1, i1 0
-  %nop3836 = alloca i1, i1 0
-  %nop3837 = alloca i1, i1 0
-  %nop3838 = alloca i1, i1 0
-  %nop3839 = alloca i1, i1 0
-  %nop3840 = alloca i1, i1 0
-  %nop3841 = alloca i1, i1 0
-  %nop3842 = alloca i1, i1 0
-  %nop3843 = alloca i1, i1 0
-  %nop3844 = alloca i1, i1 0
-  %nop3845 = alloca i1, i1 0
-  %nop3846 = alloca i1, i1 0
-  %nop3847 = alloca i1, i1 0
-  %nop3848 = alloca i1, i1 0
-  %nop3849 = alloca i1, i1 0
-  %nop3850 = alloca i1, i1 0
-  %nop3851 = alloca i1, i1 0
-  %nop3852 = alloca i1, i1 0
-  %nop3853 = alloca i1, i1 0
-  %nop3854 = alloca i1, i1 0
-  %nop3855 = alloca i1, i1 0
-  %nop3856 = alloca i1, i1 0
-  %nop3857 = alloca i1, i1 0
-  %nop3858 = alloca i1, i1 0
-  %nop3859 = alloca i1, i1 0
-  %nop3860 = alloca i1, i1 0
-  %nop3861 = alloca i1, i1 0
-  %nop3862 = alloca i1, i1 0
-  %nop3863 = alloca i1, i1 0
-  %nop3864 = alloca i1, i1 0
-  %nop3865 = alloca i1, i1 0
-  %nop3866 = alloca i1, i1 0
-  %nop3867 = alloca i1, i1 0
-  %nop3868 = alloca i1, i1 0
-  %nop3869 = alloca i1, i1 0
-  %nop3870 = alloca i1, i1 0
-  %nop3871 = alloca i1, i1 0
-  %nop3872 = alloca i1, i1 0
-  %nop3873 = alloca i1, i1 0
-  %nop3874 = alloca i1, i1 0
-  %nop3875 = alloca i1, i1 0
-  %nop3876 = alloca i1, i1 0
-  %nop3877 = alloca i1, i1 0
-  %nop3878 = alloca i1, i1 0
-  %nop3879 = alloca i1, i1 0
-  %nop3880 = alloca i1, i1 0
-  %nop3881 = alloca i1, i1 0
-  %nop3882 = alloca i1, i1 0
-  %nop3883 = alloca i1, i1 0
-  %nop3884 = alloca i1, i1 0
-  %nop3885 = alloca i1, i1 0
-  %nop3886 = alloca i1, i1 0
-  %nop3887 = alloca i1, i1 0
-  %nop3888 = alloca i1, i1 0
-  %nop3889 = alloca i1, i1 0
-  %nop3890 = alloca i1, i1 0
-  %nop3891 = alloca i1, i1 0
-  %nop3892 = alloca i1, i1 0
-  %nop3893 = alloca i1, i1 0
-  %nop3894 = alloca i1, i1 0
-  %nop3895 = alloca i1, i1 0
-  %nop3896 = alloca i1, i1 0
-  %nop3897 = alloca i1, i1 0
-  %nop3898 = alloca i1, i1 0
-  %nop3899 = alloca i1, i1 0
-  %nop3900 = alloca i1, i1 0
-  %nop3901 = alloca i1, i1 0
-  %nop3902 = alloca i1, i1 0
-  %nop3903 = alloca i1, i1 0
-  %nop3904 = alloca i1, i1 0
-  %nop3905 = alloca i1, i1 0
-  %nop3906 = alloca i1, i1 0
-  %nop3907 = alloca i1, i1 0
-  %nop3908 = alloca i1, i1 0
-  %nop3909 = alloca i1, i1 0
-  %nop3910 = alloca i1, i1 0
-  %nop3911 = alloca i1, i1 0
-  %nop3912 = alloca i1, i1 0
-  %nop3913 = alloca i1, i1 0
-  %nop3914 = alloca i1, i1 0
-  %nop3915 = alloca i1, i1 0
-  %nop3916 = alloca i1, i1 0
-  %nop3917 = alloca i1, i1 0
-  %nop3918 = alloca i1, i1 0
-  %nop3919 = alloca i1, i1 0
-  %nop3920 = alloca i1, i1 0
-  %nop3921 = alloca i1, i1 0
-  %nop3922 = alloca i1, i1 0
-  %nop3923 = alloca i1, i1 0
-  %nop3924 = alloca i1, i1 0
-  %nop3925 = alloca i1, i1 0
-  %nop3926 = alloca i1, i1 0
-  %nop3927 = alloca i1, i1 0
-  %nop3928 = alloca i1, i1 0
-  %nop3929 = alloca i1, i1 0
-  %nop3930 = alloca i1, i1 0
-  %nop3931 = alloca i1, i1 0
-  %nop3932 = alloca i1, i1 0
-  %nop3933 = alloca i1, i1 0
-  %nop3934 = alloca i1, i1 0
-  %nop3935 = alloca i1, i1 0
-  %nop3936 = alloca i1, i1 0
-  %nop3937 = alloca i1, i1 0
-  %nop3938 = alloca i1, i1 0
-  %nop3939 = alloca i1, i1 0
-  %nop3940 = alloca i1, i1 0
-  %nop3941 = alloca i1, i1 0
-  %nop3942 = alloca i1, i1 0
-  %nop3943 = alloca i1, i1 0
-  %nop3944 = alloca i1, i1 0
-  %nop3945 = alloca i1, i1 0
-  %nop3946 = alloca i1, i1 0
-  %nop3947 = alloca i1, i1 0
-  %nop3948 = alloca i1, i1 0
-  %nop3949 = alloca i1, i1 0
-  %nop3950 = alloca i1, i1 0
-  %nop3951 = alloca i1, i1 0
-  %nop3952 = alloca i1, i1 0
-  %nop3953 = alloca i1, i1 0
-  %nop3954 = alloca i1, i1 0
-  %nop3955 = alloca i1, i1 0
-  %nop3956 = alloca i1, i1 0
-  %nop3957 = alloca i1, i1 0
-  %nop3958 = alloca i1, i1 0
-  %nop3959 = alloca i1, i1 0
-  %nop3960 = alloca i1, i1 0
-  %nop3961 = alloca i1, i1 0
-  %nop3962 = alloca i1, i1 0
-  %nop3963 = alloca i1, i1 0
-  %nop3964 = alloca i1, i1 0
-  %nop3965 = alloca i1, i1 0
-  %nop3966 = alloca i1, i1 0
-  %nop3967 = alloca i1, i1 0
-  %nop3968 = alloca i1, i1 0
-  %nop3969 = alloca i1, i1 0
-  %nop3970 = alloca i1, i1 0
-  %nop3971 = alloca i1, i1 0
-  %nop3972 = alloca i1, i1 0
-  %nop3973 = alloca i1, i1 0
-  %nop3974 = alloca i1, i1 0
-  %nop3975 = alloca i1, i1 0
-  %nop3976 = alloca i1, i1 0
-  %nop3977 = alloca i1, i1 0
-  %nop3978 = alloca i1, i1 0
-  %nop3979 = alloca i1, i1 0
-  %nop3980 = alloca i1, i1 0
-  %nop3981 = alloca i1, i1 0
-  %nop3982 = alloca i1, i1 0
-  %nop3983 = alloca i1, i1 0
-  %nop3984 = alloca i1, i1 0
-  %nop3985 = alloca i1, i1 0
-  %nop3986 = alloca i1, i1 0
-  %nop3987 = alloca i1, i1 0
-  %nop3988 = alloca i1, i1 0
-  %nop3989 = alloca i1, i1 0
-  %nop3990 = alloca i1, i1 0
-  %nop3991 = alloca i1, i1 0
-  %nop3992 = alloca i1, i1 0
-  %nop3993 = alloca i1, i1 0
-  %nop3994 = alloca i1, i1 0
-  %nop3995 = alloca i1, i1 0
-  %nop3996 = alloca i1, i1 0
-  %nop3997 = alloca i1, i1 0
-  %nop3998 = alloca i1, i1 0
-  %nop3999 = alloca i1, i1 0
-  %nop4000 = alloca i1, i1 0
-  %nop4001 = alloca i1, i1 0
-  %nop4002 = alloca i1, i1 0
-  %nop4003 = alloca i1, i1 0
-  %nop4004 = alloca i1, i1 0
-  %nop4005 = alloca i1, i1 0
-  %nop4006 = alloca i1, i1 0
-  %nop4007 = alloca i1, i1 0
-  %nop4008 = alloca i1, i1 0
-  %nop4009 = alloca i1, i1 0
-  %nop4010 = alloca i1, i1 0
-  %nop4011 = alloca i1, i1 0
-  %nop4012 = alloca i1, i1 0
-  %nop4013 = alloca i1, i1 0
-  %nop4014 = alloca i1, i1 0
-  %nop4015 = alloca i1, i1 0
-  %nop4016 = alloca i1, i1 0
-  %nop4017 = alloca i1, i1 0
-  %nop4018 = alloca i1, i1 0
-  %nop4019 = alloca i1, i1 0
-  %nop4020 = alloca i1, i1 0
-  %nop4021 = alloca i1, i1 0
-  %nop4022 = alloca i1, i1 0
-  %nop4023 = alloca i1, i1 0
-  %nop4024 = alloca i1, i1 0
-  %nop4025 = alloca i1, i1 0
-  %nop4026 = alloca i1, i1 0
-  %nop4027 = alloca i1, i1 0
-  %nop4028 = alloca i1, i1 0
-  %nop4029 = alloca i1, i1 0
-  %nop4030 = alloca i1, i1 0
-  %nop4031 = alloca i1, i1 0
-  %nop4032 = alloca i1, i1 0
-  %nop4033 = alloca i1, i1 0
-  %nop4034 = alloca i1, i1 0
-  %nop4035 = alloca i1, i1 0
-  %nop4036 = alloca i1, i1 0
-  %nop4037 = alloca i1, i1 0
-  %nop4038 = alloca i1, i1 0
-  %nop4039 = alloca i1, i1 0
-  %nop4040 = alloca i1, i1 0
-  %nop4041 = alloca i1, i1 0
-  %nop4042 = alloca i1, i1 0
-  %nop4043 = alloca i1, i1 0
-  %nop4044 = alloca i1, i1 0
-  %nop4045 = alloca i1, i1 0
-  %nop4046 = alloca i1, i1 0
-  %nop4047 = alloca i1, i1 0
-  %nop4048 = alloca i1, i1 0
-  %nop4049 = alloca i1, i1 0
-  %nop4050 = alloca i1, i1 0
-  %nop4051 = alloca i1, i1 0
-  %nop4052 = alloca i1, i1 0
-  %nop4053 = alloca i1, i1 0
-  %nop4054 = alloca i1, i1 0
-  %nop4055 = alloca i1, i1 0
-  %nop4056 = alloca i1, i1 0
-  %nop4057 = alloca i1, i1 0
-  %nop4058 = alloca i1, i1 0
-  %nop4059 = alloca i1, i1 0
-  %nop4060 = alloca i1, i1 0
-  %nop4061 = alloca i1, i1 0
-  %nop4062 = alloca i1, i1 0
-  %nop4063 = alloca i1, i1 0
-  %nop4064 = alloca i1, i1 0
-  %nop4065 = alloca i1, i1 0
-  %nop4066 = alloca i1, i1 0
-  %nop4067 = alloca i1, i1 0
-  %nop4068 = alloca i1, i1 0
-  %nop4069 = alloca i1, i1 0
-  %nop4070 = alloca i1, i1 0
-  %nop4071 = alloca i1, i1 0
-  %nop4072 = alloca i1, i1 0
-  %nop4073 = alloca i1, i1 0
-  %nop4074 = alloca i1, i1 0
-  %nop4075 = alloca i1, i1 0
-  %nop4076 = alloca i1, i1 0
-  %nop4077 = alloca i1, i1 0
-  %nop4078 = alloca i1, i1 0
-  %nop4079 = alloca i1, i1 0
-  %nop4080 = alloca i1, i1 0
-  %nop4081 = alloca i1, i1 0
-  %nop4082 = alloca i1, i1 0
-  %nop4083 = alloca i1, i1 0
-  %nop4084 = alloca i1, i1 0
-  %nop4085 = alloca i1, i1 0
-  %nop4086 = alloca i1, i1 0
-  %nop4087 = alloca i1, i1 0
-  %nop4088 = alloca i1, i1 0
-  %nop4089 = alloca i1, i1 0
-  %nop4090 = alloca i1, i1 0
-  %nop4091 = alloca i1, i1 0
-  %nop4092 = alloca i1, i1 0
-  %nop4093 = alloca i1, i1 0
-  %nop4094 = alloca i1, i1 0
-  %nop4095 = alloca i1, i1 0
-  %nop4096 = alloca i1, i1 0
-  %nop4097 = alloca i1, i1 0
-  %nop4098 = alloca i1, i1 0
-  %nop4099 = alloca i1, i1 0
-  %nop4100 = alloca i1, i1 0
-  %nop4101 = alloca i1, i1 0
-  %nop4102 = alloca i1, i1 0
-  %nop4103 = alloca i1, i1 0
-  %nop4104 = alloca i1, i1 0
-  %nop4105 = alloca i1, i1 0
-  %nop4106 = alloca i1, i1 0
-  %nop4107 = alloca i1, i1 0
-  %nop4108 = alloca i1, i1 0
-  %nop4109 = alloca i1, i1 0
-  %nop4110 = alloca i1, i1 0
-  %nop4111 = alloca i1, i1 0
-  %nop4112 = alloca i1, i1 0
-  %nop4113 = alloca i1, i1 0
-  %nop4114 = alloca i1, i1 0
-  %nop4115 = alloca i1, i1 0
-  %nop4116 = alloca i1, i1 0
-  %nop4117 = alloca i1, i1 0
-  %nop4118 = alloca i1, i1 0
-  %nop4119 = alloca i1, i1 0
-  %nop4120 = alloca i1, i1 0
-  %nop4121 = alloca i1, i1 0
-  %nop4122 = alloca i1, i1 0
-  %nop4123 = alloca i1, i1 0
-  %nop4124 = alloca i1, i1 0
-  %nop4125 = alloca i1, i1 0
-  %nop4126 = alloca i1, i1 0
-  %nop4127 = alloca i1, i1 0
-  %nop4128 = alloca i1, i1 0
-  %nop4129 = alloca i1, i1 0
-  %nop4130 = alloca i1, i1 0
-  %nop4131 = alloca i1, i1 0
-  %nop4132 = alloca i1, i1 0
-  %nop4133 = alloca i1, i1 0
-  %nop4134 = alloca i1, i1 0
-  %nop4135 = alloca i1, i1 0
-  %nop4136 = alloca i1, i1 0
-  %nop4137 = alloca i1, i1 0
-  %nop4138 = alloca i1, i1 0
-  %nop4139 = alloca i1, i1 0
-  %nop4140 = alloca i1, i1 0
-  %nop4141 = alloca i1, i1 0
-  %nop4142 = alloca i1, i1 0
-  %nop4143 = alloca i1, i1 0
-  %nop4144 = alloca i1, i1 0
-  %nop4145 = alloca i1, i1 0
-  %nop4146 = alloca i1, i1 0
-  %nop4147 = alloca i1, i1 0
-  %nop4148 = alloca i1, i1 0
-  %nop4149 = alloca i1, i1 0
-  %nop4150 = alloca i1, i1 0
-  %nop4151 = alloca i1, i1 0
-  %nop4152 = alloca i1, i1 0
-  %nop4153 = alloca i1, i1 0
-  %nop4154 = alloca i1, i1 0
-  %nop4155 = alloca i1, i1 0
-  %nop4156 = alloca i1, i1 0
-  %nop4157 = alloca i1, i1 0
-  %nop4158 = alloca i1, i1 0
-  %nop4159 = alloca i1, i1 0
-  %nop4160 = alloca i1, i1 0
-  %nop4161 = alloca i1, i1 0
-  %nop4162 = alloca i1, i1 0
-  %nop4163 = alloca i1, i1 0
-  %nop4164 = alloca i1, i1 0
-  %nop4165 = alloca i1, i1 0
-  %nop4166 = alloca i1, i1 0
-  %nop4167 = alloca i1, i1 0
-  %nop4168 = alloca i1, i1 0
-  %nop4169 = alloca i1, i1 0
-  %nop4170 = alloca i1, i1 0
-  %nop4171 = alloca i1, i1 0
-  %nop4172 = alloca i1, i1 0
-  %nop4173 = alloca i1, i1 0
-  %nop4174 = alloca i1, i1 0
-  %nop4175 = alloca i1, i1 0
-  %nop4176 = alloca i1, i1 0
-  %nop4177 = alloca i1, i1 0
-  %nop4178 = alloca i1, i1 0
-  %nop4179 = alloca i1, i1 0
-  %nop4180 = alloca i1, i1 0
-  %nop4181 = alloca i1, i1 0
-  %nop4182 = alloca i1, i1 0
-  %nop4183 = alloca i1, i1 0
-  %nop4184 = alloca i1, i1 0
-  %nop4185 = alloca i1, i1 0
-  %nop4186 = alloca i1, i1 0
-  %nop4187 = alloca i1, i1 0
-  %nop4188 = alloca i1, i1 0
-  %nop4189 = alloca i1, i1 0
-  %nop4190 = alloca i1, i1 0
-  %nop4191 = alloca i1, i1 0
-  %nop4192 = alloca i1, i1 0
-  %nop4193 = alloca i1, i1 0
-  %nop4194 = alloca i1, i1 0
-  %nop4195 = alloca i1, i1 0
-  %nop4196 = alloca i1, i1 0
-  %nop4197 = alloca i1, i1 0
-  %nop4198 = alloca i1, i1 0
-  %nop4199 = alloca i1, i1 0
-  %nop4200 = alloca i1, i1 0
-  %nop4201 = alloca i1, i1 0
-  %nop4202 = alloca i1, i1 0
-  %nop4203 = alloca i1, i1 0
-  %nop4204 = alloca i1, i1 0
-  %nop4205 = alloca i1, i1 0
-  %nop4206 = alloca i1, i1 0
-  %nop4207 = alloca i1, i1 0
-  %nop4208 = alloca i1, i1 0
-  %nop4209 = alloca i1, i1 0
-  %nop4210 = alloca i1, i1 0
-  %nop4211 = alloca i1, i1 0
-  %nop4212 = alloca i1, i1 0
-  %nop4213 = alloca i1, i1 0
-  %nop4214 = alloca i1, i1 0
-  %nop4215 = alloca i1, i1 0
-  %nop4216 = alloca i1, i1 0
-  %nop4217 = alloca i1, i1 0
-  %nop4218 = alloca i1, i1 0
-  %nop4219 = alloca i1, i1 0
-  %nop4220 = alloca i1, i1 0
-  %nop4221 = alloca i1, i1 0
-  %nop4222 = alloca i1, i1 0
-  %nop4223 = alloca i1, i1 0
-  %nop4224 = alloca i1, i1 0
-  %nop4225 = alloca i1, i1 0
-  %nop4226 = alloca i1, i1 0
-  %nop4227 = alloca i1, i1 0
-  %nop4228 = alloca i1, i1 0
-  %nop4229 = alloca i1, i1 0
-  %nop4230 = alloca i1, i1 0
-  %nop4231 = alloca i1, i1 0
-  %nop4232 = alloca i1, i1 0
-  %nop4233 = alloca i1, i1 0
-  %nop4234 = alloca i1, i1 0
-  %nop4235 = alloca i1, i1 0
-  %nop4236 = alloca i1, i1 0
-  %nop4237 = alloca i1, i1 0
-  %nop4238 = alloca i1, i1 0
-  %nop4239 = alloca i1, i1 0
-  %nop4240 = alloca i1, i1 0
-  %nop4241 = alloca i1, i1 0
-  %nop4242 = alloca i1, i1 0
-  %nop4243 = alloca i1, i1 0
-  %nop4244 = alloca i1, i1 0
-  %nop4245 = alloca i1, i1 0
-  %nop4246 = alloca i1, i1 0
-  %nop4247 = alloca i1, i1 0
-  %nop4248 = alloca i1, i1 0
-  %nop4249 = alloca i1, i1 0
-  %nop4250 = alloca i1, i1 0
-  %nop4251 = alloca i1, i1 0
-  %nop4252 = alloca i1, i1 0
-  %nop4253 = alloca i1, i1 0
-  %nop4254 = alloca i1, i1 0
-  %nop4255 = alloca i1, i1 0
-  %nop4256 = alloca i1, i1 0
-  %nop4257 = alloca i1, i1 0
-  %nop4258 = alloca i1, i1 0
-  %nop4259 = alloca i1, i1 0
-  %nop4260 = alloca i1, i1 0
-  %nop4261 = alloca i1, i1 0
-  %nop4262 = alloca i1, i1 0
-  %nop4263 = alloca i1, i1 0
-  %nop4264 = alloca i1, i1 0
-  %nop4265 = alloca i1, i1 0
-  %nop4266 = alloca i1, i1 0
-  %nop4267 = alloca i1, i1 0
-  %nop4268 = alloca i1, i1 0
-  %nop4269 = alloca i1, i1 0
-  %nop4270 = alloca i1, i1 0
-  %nop4271 = alloca i1, i1 0
-  %nop4272 = alloca i1, i1 0
-  %nop4273 = alloca i1, i1 0
-  %nop4274 = alloca i1, i1 0
-  %nop4275 = alloca i1, i1 0
-  %nop4276 = alloca i1, i1 0
-  %nop4277 = alloca i1, i1 0
-  %nop4278 = alloca i1, i1 0
-  %nop4279 = alloca i1, i1 0
-  %nop4280 = alloca i1, i1 0
-  %nop4281 = alloca i1, i1 0
-  %nop4282 = alloca i1, i1 0
-  %nop4283 = alloca i1, i1 0
-  %nop4284 = alloca i1, i1 0
-  %nop4285 = alloca i1, i1 0
-  %nop4286 = alloca i1, i1 0
-  %nop4287 = alloca i1, i1 0
-  %nop4288 = alloca i1, i1 0
-  %nop4289 = alloca i1, i1 0
-  %nop4290 = alloca i1, i1 0
-  %nop4291 = alloca i1, i1 0
-  %nop4292 = alloca i1, i1 0
-  %nop4293 = alloca i1, i1 0
-  %nop4294 = alloca i1, i1 0
-  %nop4295 = alloca i1, i1 0
-  %nop4296 = alloca i1, i1 0
-  %nop4297 = alloca i1, i1 0
-  %nop4298 = alloca i1, i1 0
-  %nop4299 = alloca i1, i1 0
-  %nop4300 = alloca i1, i1 0
-  %nop4301 = alloca i1, i1 0
-  %nop4302 = alloca i1, i1 0
-  %nop4303 = alloca i1, i1 0
-  %nop4304 = alloca i1, i1 0
-  %nop4305 = alloca i1, i1 0
-  %nop4306 = alloca i1, i1 0
-  %nop4307 = alloca i1, i1 0
-  %nop4308 = alloca i1, i1 0
-  %nop4309 = alloca i1, i1 0
-  %nop4310 = alloca i1, i1 0
-  %nop4311 = alloca i1, i1 0
-  %nop4312 = alloca i1, i1 0
-  %nop4313 = alloca i1, i1 0
-  %nop4314 = alloca i1, i1 0
-  %nop4315 = alloca i1, i1 0
-  %nop4316 = alloca i1, i1 0
-  %nop4317 = alloca i1, i1 0
-  %nop4318 = alloca i1, i1 0
-  %nop4319 = alloca i1, i1 0
-  %nop4320 = alloca i1, i1 0
-  %nop4321 = alloca i1, i1 0
-  %nop4322 = alloca i1, i1 0
-  %nop4323 = alloca i1, i1 0
-  %nop4324 = alloca i1, i1 0
-  %nop4325 = alloca i1, i1 0
-  %nop4326 = alloca i1, i1 0
-  %nop4327 = alloca i1, i1 0
-  %nop4328 = alloca i1, i1 0
-  %nop4329 = alloca i1, i1 0
-  %nop4330 = alloca i1, i1 0
-  %nop4331 = alloca i1, i1 0
-  %nop4332 = alloca i1, i1 0
-  %nop4333 = alloca i1, i1 0
-  %nop4334 = alloca i1, i1 0
-  %nop4335 = alloca i1, i1 0
-  %nop4336 = alloca i1, i1 0
-  %nop4337 = alloca i1, i1 0
-  %nop4338 = alloca i1, i1 0
-  %nop4339 = alloca i1, i1 0
-  %nop4340 = alloca i1, i1 0
-  %nop4341 = alloca i1, i1 0
-  %nop4342 = alloca i1, i1 0
-  %nop4343 = alloca i1, i1 0
-  %nop4344 = alloca i1, i1 0
-  %nop4345 = alloca i1, i1 0
-  %nop4346 = alloca i1, i1 0
-  %nop4347 = alloca i1, i1 0
-  %nop4348 = alloca i1, i1 0
-  %nop4349 = alloca i1, i1 0
-  %nop4350 = alloca i1, i1 0
-  %nop4351 = alloca i1, i1 0
-  %nop4352 = alloca i1, i1 0
-  %nop4353 = alloca i1, i1 0
-  %nop4354 = alloca i1, i1 0
-  %nop4355 = alloca i1, i1 0
-  %nop4356 = alloca i1, i1 0
-  %nop4357 = alloca i1, i1 0
-  %nop4358 = alloca i1, i1 0
-  %nop4359 = alloca i1, i1 0
-  %nop4360 = alloca i1, i1 0
-  %nop4361 = alloca i1, i1 0
-  %nop4362 = alloca i1, i1 0
-  %nop4363 = alloca i1, i1 0
-  %nop4364 = alloca i1, i1 0
-  %nop4365 = alloca i1, i1 0
-  %nop4366 = alloca i1, i1 0
-  %nop4367 = alloca i1, i1 0
-  %nop4368 = alloca i1, i1 0
-  %nop4369 = alloca i1, i1 0
-  %nop4370 = alloca i1, i1 0
-  %nop4371 = alloca i1, i1 0
-  %nop4372 = alloca i1, i1 0
-  %nop4373 = alloca i1, i1 0
-  %nop4374 = alloca i1, i1 0
-  %nop4375 = alloca i1, i1 0
-  %nop4376 = alloca i1, i1 0
-  %nop4377 = alloca i1, i1 0
-  %nop4378 = alloca i1, i1 0
-  %nop4379 = alloca i1, i1 0
-  %nop4380 = alloca i1, i1 0
-  %nop4381 = alloca i1, i1 0
-  %nop4382 = alloca i1, i1 0
-  %nop4383 = alloca i1, i1 0
-  %nop4384 = alloca i1, i1 0
-  %nop4385 = alloca i1, i1 0
-  %nop4386 = alloca i1, i1 0
-  %nop4387 = alloca i1, i1 0
-  %nop4388 = alloca i1, i1 0
-  %nop4389 = alloca i1, i1 0
-  %nop4390 = alloca i1, i1 0
-  %nop4391 = alloca i1, i1 0
-  %nop4392 = alloca i1, i1 0
-  %nop4393 = alloca i1, i1 0
-  %nop4394 = alloca i1, i1 0
-  %nop4395 = alloca i1, i1 0
-  %nop4396 = alloca i1, i1 0
-  %nop4397 = alloca i1, i1 0
-  %nop4398 = alloca i1, i1 0
-  %nop4399 = alloca i1, i1 0
-  %nop4400 = alloca i1, i1 0
-  %nop4401 = alloca i1, i1 0
-  %nop4402 = alloca i1, i1 0
-  %nop4403 = alloca i1, i1 0
-  %nop4404 = alloca i1, i1 0
-  %nop4405 = alloca i1, i1 0
-  %nop4406 = alloca i1, i1 0
-  %nop4407 = alloca i1, i1 0
-  %nop4408 = alloca i1, i1 0
-  %nop4409 = alloca i1, i1 0
-  %nop4410 = alloca i1, i1 0
-  %nop4411 = alloca i1, i1 0
-  %nop4412 = alloca i1, i1 0
-  %nop4413 = alloca i1, i1 0
-  %nop4414 = alloca i1, i1 0
-  %nop4415 = alloca i1, i1 0
-  %nop4416 = alloca i1, i1 0
-  %nop4417 = alloca i1, i1 0
-  %nop4418 = alloca i1, i1 0
-  %nop4419 = alloca i1, i1 0
-  %nop4420 = alloca i1, i1 0
-  %nop4421 = alloca i1, i1 0
-  %nop4422 = alloca i1, i1 0
-  %nop4423 = alloca i1, i1 0
-  %nop4424 = alloca i1, i1 0
-  %nop4425 = alloca i1, i1 0
-  %nop4426 = alloca i1, i1 0
-  %nop4427 = alloca i1, i1 0
-  %nop4428 = alloca i1, i1 0
-  %nop4429 = alloca i1, i1 0
-  %nop4430 = alloca i1, i1 0
-  %nop4431 = alloca i1, i1 0
-  %nop4432 = alloca i1, i1 0
-  %nop4433 = alloca i1, i1 0
-  %nop4434 = alloca i1, i1 0
-  %nop4435 = alloca i1, i1 0
-  %nop4436 = alloca i1, i1 0
-  %nop4437 = alloca i1, i1 0
-  %nop4438 = alloca i1, i1 0
-  %nop4439 = alloca i1, i1 0
-  %nop4440 = alloca i1, i1 0
-  %nop4441 = alloca i1, i1 0
-  %nop4442 = alloca i1, i1 0
-  %nop4443 = alloca i1, i1 0
-  %nop4444 = alloca i1, i1 0
-  %nop4445 = alloca i1, i1 0
-  %nop4446 = alloca i1, i1 0
-  %nop4447 = alloca i1, i1 0
-  %nop4448 = alloca i1, i1 0
-  %nop4449 = alloca i1, i1 0
-  %nop4450 = alloca i1, i1 0
-  %nop4451 = alloca i1, i1 0
-  %nop4452 = alloca i1, i1 0
-  %nop4453 = alloca i1, i1 0
-  %nop4454 = alloca i1, i1 0
-  %nop4455 = alloca i1, i1 0
-  %nop4456 = alloca i1, i1 0
-  %nop4457 = alloca i1, i1 0
-  %nop4458 = alloca i1, i1 0
-  %nop4459 = alloca i1, i1 0
-  %nop4460 = alloca i1, i1 0
-  %nop4461 = alloca i1, i1 0
-  %nop4462 = alloca i1, i1 0
-  %nop4463 = alloca i1, i1 0
-  %nop4464 = alloca i1, i1 0
-  %nop4465 = alloca i1, i1 0
-  %nop4466 = alloca i1, i1 0
-  %nop4467 = alloca i1, i1 0
-  %nop4468 = alloca i1, i1 0
-  %nop4469 = alloca i1, i1 0
-  %nop4470 = alloca i1, i1 0
-  %nop4471 = alloca i1, i1 0
-  %nop4472 = alloca i1, i1 0
-  %nop4473 = alloca i1, i1 0
-  %nop4474 = alloca i1, i1 0
-  %nop4475 = alloca i1, i1 0
-  %nop4476 = alloca i1, i1 0
-  %nop4477 = alloca i1, i1 0
-  %nop4478 = alloca i1, i1 0
-  %nop4479 = alloca i1, i1 0
-  %nop4480 = alloca i1, i1 0
-  %nop4481 = alloca i1, i1 0
-  %nop4482 = alloca i1, i1 0
-  %nop4483 = alloca i1, i1 0
-  %nop4484 = alloca i1, i1 0
-  %nop4485 = alloca i1, i1 0
-  %nop4486 = alloca i1, i1 0
-  %nop4487 = alloca i1, i1 0
-  %nop4488 = alloca i1, i1 0
-  %nop4489 = alloca i1, i1 0
-  %nop4490 = alloca i1, i1 0
-  %nop4491 = alloca i1, i1 0
-  %nop4492 = alloca i1, i1 0
-  %nop4493 = alloca i1, i1 0
-  %nop4494 = alloca i1, i1 0
-  %nop4495 = alloca i1, i1 0
-  %nop4496 = alloca i1, i1 0
-  %nop4497 = alloca i1, i1 0
-  %nop4498 = alloca i1, i1 0
-  %nop4499 = alloca i1, i1 0
-  %nop4500 = alloca i1, i1 0
-  %nop4501 = alloca i1, i1 0
-  %nop4502 = alloca i1, i1 0
-  %nop4503 = alloca i1, i1 0
-  %nop4504 = alloca i1, i1 0
-  %nop4505 = alloca i1, i1 0
-  %nop4506 = alloca i1, i1 0
-  %nop4507 = alloca i1, i1 0
-  %nop4508 = alloca i1, i1 0
-  %nop4509 = alloca i1, i1 0
-  %nop4510 = alloca i1, i1 0
-  %nop4511 = alloca i1, i1 0
-  %nop4512 = alloca i1, i1 0
-  %nop4513 = alloca i1, i1 0
-  %nop4514 = alloca i1, i1 0
-  %nop4515 = alloca i1, i1 0
-  %nop4516 = alloca i1, i1 0
-  %nop4517 = alloca i1, i1 0
-  %nop4518 = alloca i1, i1 0
-  %nop4519 = alloca i1, i1 0
-  %nop4520 = alloca i1, i1 0
-  %nop4521 = alloca i1, i1 0
-  %nop4522 = alloca i1, i1 0
-  %nop4523 = alloca i1, i1 0
-  %nop4524 = alloca i1, i1 0
-  %nop4525 = alloca i1, i1 0
-  %nop4526 = alloca i1, i1 0
-  %nop4527 = alloca i1, i1 0
-  %nop4528 = alloca i1, i1 0
-  %nop4529 = alloca i1, i1 0
-  %nop4530 = alloca i1, i1 0
-  %nop4531 = alloca i1, i1 0
-  %nop4532 = alloca i1, i1 0
-  %nop4533 = alloca i1, i1 0
-  %nop4534 = alloca i1, i1 0
-  %nop4535 = alloca i1, i1 0
-  %nop4536 = alloca i1, i1 0
-  %nop4537 = alloca i1, i1 0
-  %nop4538 = alloca i1, i1 0
-  %nop4539 = alloca i1, i1 0
-  %nop4540 = alloca i1, i1 0
-  %nop4541 = alloca i1, i1 0
-  %nop4542 = alloca i1, i1 0
-  %nop4543 = alloca i1, i1 0
-  %nop4544 = alloca i1, i1 0
-  %nop4545 = alloca i1, i1 0
-  %nop4546 = alloca i1, i1 0
-  %nop4547 = alloca i1, i1 0
-  %nop4548 = alloca i1, i1 0
-  %nop4549 = alloca i1, i1 0
-  %nop4550 = alloca i1, i1 0
-  %nop4551 = alloca i1, i1 0
-  %nop4552 = alloca i1, i1 0
-  %nop4553 = alloca i1, i1 0
-  %nop4554 = alloca i1, i1 0
-  %nop4555 = alloca i1, i1 0
-  %nop4556 = alloca i1, i1 0
-  %nop4557 = alloca i1, i1 0
-  %nop4558 = alloca i1, i1 0
-  %nop4559 = alloca i1, i1 0
-  %nop4560 = alloca i1, i1 0
-  %nop4561 = alloca i1, i1 0
-  %nop4562 = alloca i1, i1 0
-  %nop4563 = alloca i1, i1 0
-  %nop4564 = alloca i1, i1 0
-  %nop4565 = alloca i1, i1 0
-  %nop4566 = alloca i1, i1 0
-  %nop4567 = alloca i1, i1 0
-  %nop4568 = alloca i1, i1 0
-  %nop4569 = alloca i1, i1 0
-  %nop4570 = alloca i1, i1 0
-  %nop4571 = alloca i1, i1 0
-  %nop4572 = alloca i1, i1 0
-  %nop4573 = alloca i1, i1 0
-  %nop4574 = alloca i1, i1 0
-  %nop4575 = alloca i1, i1 0
-  %nop4576 = alloca i1, i1 0
-  %nop4577 = alloca i1, i1 0
-  %nop4578 = alloca i1, i1 0
-  %nop4579 = alloca i1, i1 0
-  %nop4580 = alloca i1, i1 0
-  %nop4581 = alloca i1, i1 0
-  %nop4582 = alloca i1, i1 0
-  %nop4583 = alloca i1, i1 0
-  %nop4584 = alloca i1, i1 0
-  %nop4585 = alloca i1, i1 0
-  %nop4586 = alloca i1, i1 0
-  %nop4587 = alloca i1, i1 0
-  %nop4588 = alloca i1, i1 0
-  %nop4589 = alloca i1, i1 0
-  %nop4590 = alloca i1, i1 0
-  %nop4591 = alloca i1, i1 0
-  %nop4592 = alloca i1, i1 0
-  %nop4593 = alloca i1, i1 0
-  %nop4594 = alloca i1, i1 0
-  %nop4595 = alloca i1, i1 0
-  %nop4596 = alloca i1, i1 0
-  %nop4597 = alloca i1, i1 0
-  %nop4598 = alloca i1, i1 0
-  %nop4599 = alloca i1, i1 0
-  %nop4600 = alloca i1, i1 0
-  %nop4601 = alloca i1, i1 0
-  %nop4602 = alloca i1, i1 0
-  %nop4603 = alloca i1, i1 0
-  %nop4604 = alloca i1, i1 0
-  %nop4605 = alloca i1, i1 0
-  %nop4606 = alloca i1, i1 0
-  %nop4607 = alloca i1, i1 0
-  %nop4608 = alloca i1, i1 0
-  %nop4609 = alloca i1, i1 0
-  %nop4610 = alloca i1, i1 0
-  %nop4611 = alloca i1, i1 0
-  %nop4612 = alloca i1, i1 0
-  %nop4613 = alloca i1, i1 0
-  %nop4614 = alloca i1, i1 0
-  %nop4615 = alloca i1, i1 0
-  %nop4616 = alloca i1, i1 0
-  %nop4617 = alloca i1, i1 0
-  %nop4618 = alloca i1, i1 0
-  %nop4619 = alloca i1, i1 0
-  %nop4620 = alloca i1, i1 0
-  %nop4621 = alloca i1, i1 0
-  %nop4622 = alloca i1, i1 0
-  %nop4623 = alloca i1, i1 0
-  %nop4624 = alloca i1, i1 0
-  %nop4625 = alloca i1, i1 0
-  %nop4626 = alloca i1, i1 0
-  %nop4627 = alloca i1, i1 0
-  %nop4628 = alloca i1, i1 0
-  %nop4629 = alloca i1, i1 0
-  %nop4630 = alloca i1, i1 0
-  %nop4631 = alloca i1, i1 0
-  %nop4632 = alloca i1, i1 0
-  %nop4633 = alloca i1, i1 0
-  %nop4634 = alloca i1, i1 0
-  %nop4635 = alloca i1, i1 0
-  %nop4636 = alloca i1, i1 0
-  %nop4637 = alloca i1, i1 0
-  %nop4638 = alloca i1, i1 0
-  %nop4639 = alloca i1, i1 0
-  %nop4640 = alloca i1, i1 0
-  %nop4641 = alloca i1, i1 0
-  %nop4642 = alloca i1, i1 0
-  %nop4643 = alloca i1, i1 0
-  %nop4644 = alloca i1, i1 0
-  %nop4645 = alloca i1, i1 0
-  %nop4646 = alloca i1, i1 0
-  %nop4647 = alloca i1, i1 0
-  %nop4648 = alloca i1, i1 0
-  %nop4649 = alloca i1, i1 0
-  %nop4650 = alloca i1, i1 0
-  %nop4651 = alloca i1, i1 0
-  %nop4652 = alloca i1, i1 0
-  %nop4653 = alloca i1, i1 0
-  %nop4654 = alloca i1, i1 0
-  %nop4655 = alloca i1, i1 0
-  %nop4656 = alloca i1, i1 0
-  %nop4657 = alloca i1, i1 0
-  %nop4658 = alloca i1, i1 0
-  %nop4659 = alloca i1, i1 0
-  %nop4660 = alloca i1, i1 0
-  %nop4661 = alloca i1, i1 0
-  %nop4662 = alloca i1, i1 0
-  %nop4663 = alloca i1, i1 0
-  %nop4664 = alloca i1, i1 0
-  %nop4665 = alloca i1, i1 0
-  %nop4666 = alloca i1, i1 0
-  %nop4667 = alloca i1, i1 0
-  %nop4668 = alloca i1, i1 0
-  %nop4669 = alloca i1, i1 0
-  %nop4670 = alloca i1, i1 0
-  %nop4671 = alloca i1, i1 0
-  %nop4672 = alloca i1, i1 0
-  %nop4673 = alloca i1, i1 0
-  %nop4674 = alloca i1, i1 0
-  %nop4675 = alloca i1, i1 0
-  %nop4676 = alloca i1, i1 0
-  %nop4677 = alloca i1, i1 0
-  %nop4678 = alloca i1, i1 0
-  %nop4679 = alloca i1, i1 0
-  %nop4680 = alloca i1, i1 0
-  %nop4681 = alloca i1, i1 0
-  %nop4682 = alloca i1, i1 0
-  %nop4683 = alloca i1, i1 0
-  %nop4684 = alloca i1, i1 0
-  %nop4685 = alloca i1, i1 0
-  %nop4686 = alloca i1, i1 0
-  %nop4687 = alloca i1, i1 0
-  %nop4688 = alloca i1, i1 0
-  %nop4689 = alloca i1, i1 0
-  %nop4690 = alloca i1, i1 0
-  %nop4691 = alloca i1, i1 0
-  %nop4692 = alloca i1, i1 0
-  %nop4693 = alloca i1, i1 0
-  %nop4694 = alloca i1, i1 0
-  %nop4695 = alloca i1, i1 0
-  %nop4696 = alloca i1, i1 0
-  %nop4697 = alloca i1, i1 0
-  %nop4698 = alloca i1, i1 0
-  %nop4699 = alloca i1, i1 0
-  %nop4700 = alloca i1, i1 0
-  %nop4701 = alloca i1, i1 0
-  %nop4702 = alloca i1, i1 0
-  %nop4703 = alloca i1, i1 0
-  %nop4704 = alloca i1, i1 0
-  %nop4705 = alloca i1, i1 0
-  %nop4706 = alloca i1, i1 0
-  %nop4707 = alloca i1, i1 0
-  %nop4708 = alloca i1, i1 0
-  %nop4709 = alloca i1, i1 0
-  %nop4710 = alloca i1, i1 0
-  %nop4711 = alloca i1, i1 0
-  %nop4712 = alloca i1, i1 0
-  %nop4713 = alloca i1, i1 0
-  %nop4714 = alloca i1, i1 0
-  %nop4715 = alloca i1, i1 0
-  %nop4716 = alloca i1, i1 0
-  %nop4717 = alloca i1, i1 0
-  %nop4718 = alloca i1, i1 0
-  %nop4719 = alloca i1, i1 0
-  %nop4720 = alloca i1, i1 0
-  %nop4721 = alloca i1, i1 0
-  %nop4722 = alloca i1, i1 0
-  %nop4723 = alloca i1, i1 0
-  %nop4724 = alloca i1, i1 0
-  %nop4725 = alloca i1, i1 0
-  %nop4726 = alloca i1, i1 0
-  %nop4727 = alloca i1, i1 0
-  %nop4728 = alloca i1, i1 0
-  %nop4729 = alloca i1, i1 0
-  %nop4730 = alloca i1, i1 0
-  %nop4731 = alloca i1, i1 0
-  %nop4732 = alloca i1, i1 0
-  %nop4733 = alloca i1, i1 0
-  %nop4734 = alloca i1, i1 0
-  %nop4735 = alloca i1, i1 0
-  %nop4736 = alloca i1, i1 0
-  %nop4737 = alloca i1, i1 0
-  %nop4738 = alloca i1, i1 0
-  %nop4739 = alloca i1, i1 0
-  %nop4740 = alloca i1, i1 0
-  %nop4741 = alloca i1, i1 0
-  %nop4742 = alloca i1, i1 0
-  %nop4743 = alloca i1, i1 0
-  %nop4744 = alloca i1, i1 0
-  %nop4745 = alloca i1, i1 0
-  %nop4746 = alloca i1, i1 0
-  %nop4747 = alloca i1, i1 0
-  %nop4748 = alloca i1, i1 0
-  %nop4749 = alloca i1, i1 0
-  %nop4750 = alloca i1, i1 0
-  %nop4751 = alloca i1, i1 0
-  %nop4752 = alloca i1, i1 0
-  %nop4753 = alloca i1, i1 0
-  %nop4754 = alloca i1, i1 0
-  %nop4755 = alloca i1, i1 0
-  %nop4756 = alloca i1, i1 0
-  %nop4757 = alloca i1, i1 0
-  %nop4758 = alloca i1, i1 0
-  %nop4759 = alloca i1, i1 0
-  %nop4760 = alloca i1, i1 0
-  %nop4761 = alloca i1, i1 0
-  %nop4762 = alloca i1, i1 0
-  %nop4763 = alloca i1, i1 0
-  %nop4764 = alloca i1, i1 0
-  %nop4765 = alloca i1, i1 0
-  %nop4766 = alloca i1, i1 0
-  %nop4767 = alloca i1, i1 0
-  %nop4768 = alloca i1, i1 0
-  %nop4769 = alloca i1, i1 0
-  %nop4770 = alloca i1, i1 0
-  %nop4771 = alloca i1, i1 0
-  %nop4772 = alloca i1, i1 0
-  %nop4773 = alloca i1, i1 0
-  %nop4774 = alloca i1, i1 0
-  %nop4775 = alloca i1, i1 0
-  %nop4776 = alloca i1, i1 0
-  %nop4777 = alloca i1, i1 0
-  %nop4778 = alloca i1, i1 0
-  %nop4779 = alloca i1, i1 0
-  %nop4780 = alloca i1, i1 0
-  %nop4781 = alloca i1, i1 0
-  %nop4782 = alloca i1, i1 0
-  %nop4783 = alloca i1, i1 0
-  %nop4784 = alloca i1, i1 0
-  %nop4785 = alloca i1, i1 0
-  %nop4786 = alloca i1, i1 0
-  %nop4787 = alloca i1, i1 0
-  %nop4788 = alloca i1, i1 0
-  %nop4789 = alloca i1, i1 0
-  %nop4790 = alloca i1, i1 0
-  %nop4791 = alloca i1, i1 0
-  %nop4792 = alloca i1, i1 0
-  %nop4793 = alloca i1, i1 0
-  %nop4794 = alloca i1, i1 0
-  %nop4795 = alloca i1, i1 0
-  %nop4796 = alloca i1, i1 0
-  %nop4797 = alloca i1, i1 0
-  %nop4798 = alloca i1, i1 0
-  %nop4799 = alloca i1, i1 0
-  %nop4800 = alloca i1, i1 0
-  %nop4801 = alloca i1, i1 0
-  %nop4802 = alloca i1, i1 0
-  %nop4803 = alloca i1, i1 0
-  %nop4804 = alloca i1, i1 0
-  %nop4805 = alloca i1, i1 0
-  %nop4806 = alloca i1, i1 0
-  %nop4807 = alloca i1, i1 0
-  %nop4808 = alloca i1, i1 0
-  %nop4809 = alloca i1, i1 0
-  %nop4810 = alloca i1, i1 0
-  %nop4811 = alloca i1, i1 0
-  %nop4812 = alloca i1, i1 0
-  %nop4813 = alloca i1, i1 0
-  %nop4814 = alloca i1, i1 0
-  %nop4815 = alloca i1, i1 0
-  %nop4816 = alloca i1, i1 0
-  %nop4817 = alloca i1, i1 0
-  %nop4818 = alloca i1, i1 0
-  %nop4819 = alloca i1, i1 0
-  %nop4820 = alloca i1, i1 0
-  %nop4821 = alloca i1, i1 0
-  %nop4822 = alloca i1, i1 0
-  %nop4823 = alloca i1, i1 0
-  %nop4824 = alloca i1, i1 0
-  %nop4825 = alloca i1, i1 0
-  %nop4826 = alloca i1, i1 0
-  %nop4827 = alloca i1, i1 0
-  %nop4828 = alloca i1, i1 0
-  %nop4829 = alloca i1, i1 0
-  %nop4830 = alloca i1, i1 0
-  %nop4831 = alloca i1, i1 0
-  %nop4832 = alloca i1, i1 0
-  %nop4833 = alloca i1, i1 0
-  %nop4834 = alloca i1, i1 0
-  %nop4835 = alloca i1, i1 0
-  %nop4836 = alloca i1, i1 0
-  %nop4837 = alloca i1, i1 0
-  %nop4838 = alloca i1, i1 0
-  %nop4839 = alloca i1, i1 0
-  %nop4840 = alloca i1, i1 0
-  %nop4841 = alloca i1, i1 0
-  %nop4842 = alloca i1, i1 0
-  %nop4843 = alloca i1, i1 0
-  %nop4844 = alloca i1, i1 0
-  %nop4845 = alloca i1, i1 0
-  %nop4846 = alloca i1, i1 0
-  %nop4847 = alloca i1, i1 0
-  %nop4848 = alloca i1, i1 0
-  %nop4849 = alloca i1, i1 0
-  %nop4850 = alloca i1, i1 0
-  %nop4851 = alloca i1, i1 0
-  %nop4852 = alloca i1, i1 0
-  %nop4853 = alloca i1, i1 0
-  %nop4854 = alloca i1, i1 0
-  %nop4855 = alloca i1, i1 0
-  %nop4856 = alloca i1, i1 0
-  %nop4857 = alloca i1, i1 0
-  %nop4858 = alloca i1, i1 0
-  %nop4859 = alloca i1, i1 0
-  %nop4860 = alloca i1, i1 0
-  %nop4861 = alloca i1, i1 0
-  %nop4862 = alloca i1, i1 0
-  %nop4863 = alloca i1, i1 0
-  %nop4864 = alloca i1, i1 0
-  %nop4865 = alloca i1, i1 0
-  %nop4866 = alloca i1, i1 0
-  %nop4867 = alloca i1, i1 0
-  %nop4868 = alloca i1, i1 0
-  %nop4869 = alloca i1, i1 0
-  %nop4870 = alloca i1, i1 0
-  %nop4871 = alloca i1, i1 0
-  %nop4872 = alloca i1, i1 0
-  %nop4873 = alloca i1, i1 0
-  %nop4874 = alloca i1, i1 0
-  %nop4875 = alloca i1, i1 0
-  %nop4876 = alloca i1, i1 0
-  %nop4877 = alloca i1, i1 0
-  %nop4878 = alloca i1, i1 0
-  %nop4879 = alloca i1, i1 0
-  %nop4880 = alloca i1, i1 0
-  %nop4881 = alloca i1, i1 0
-  %nop4882 = alloca i1, i1 0
-  %nop4883 = alloca i1, i1 0
-  %nop4884 = alloca i1, i1 0
-  %nop4885 = alloca i1, i1 0
-  %nop4886 = alloca i1, i1 0
-  %nop4887 = alloca i1, i1 0
-  %nop4888 = alloca i1, i1 0
-  %nop4889 = alloca i1, i1 0
-  %nop4890 = alloca i1, i1 0
-  %nop4891 = alloca i1, i1 0
-  %nop4892 = alloca i1, i1 0
-  %nop4893 = alloca i1, i1 0
-  %nop4894 = alloca i1, i1 0
-  %nop4895 = alloca i1, i1 0
-  %nop4896 = alloca i1, i1 0
-  %nop4897 = alloca i1, i1 0
-  %nop4898 = alloca i1, i1 0
-  %nop4899 = alloca i1, i1 0
-  %nop4900 = alloca i1, i1 0
-  %nop4901 = alloca i1, i1 0
-  %nop4902 = alloca i1, i1 0
-  %nop4903 = alloca i1, i1 0
-  %nop4904 = alloca i1, i1 0
-  %nop4905 = alloca i1, i1 0
-  %nop4906 = alloca i1, i1 0
-  %nop4907 = alloca i1, i1 0
-  %nop4908 = alloca i1, i1 0
-  %nop4909 = alloca i1, i1 0
-  %nop4910 = alloca i1, i1 0
-  %nop4911 = alloca i1, i1 0
-  %nop4912 = alloca i1, i1 0
-  %nop4913 = alloca i1, i1 0
-  %nop4914 = alloca i1, i1 0
-  %nop4915 = alloca i1, i1 0
-  %nop4916 = alloca i1, i1 0
-  %nop4917 = alloca i1, i1 0
-  %nop4918 = alloca i1, i1 0
-  %nop4919 = alloca i1, i1 0
-  %nop4920 = alloca i1, i1 0
-  %nop4921 = alloca i1, i1 0
-  %nop4922 = alloca i1, i1 0
-  %nop4923 = alloca i1, i1 0
-  %nop4924 = alloca i1, i1 0
-  %nop4925 = alloca i1, i1 0
-  %nop4926 = alloca i1, i1 0
-  %nop4927 = alloca i1, i1 0
-  %nop4928 = alloca i1, i1 0
-  %nop4929 = alloca i1, i1 0
-  %nop4930 = alloca i1, i1 0
-  %nop4931 = alloca i1, i1 0
-  %nop4932 = alloca i1, i1 0
-  %nop4933 = alloca i1, i1 0
-  %nop4934 = alloca i1, i1 0
-  %nop4935 = alloca i1, i1 0
-  %nop4936 = alloca i1, i1 0
-  %nop4937 = alloca i1, i1 0
-  %nop4938 = alloca i1, i1 0
-  %nop4939 = alloca i1, i1 0
-  %nop4940 = alloca i1, i1 0
-  %nop4941 = alloca i1, i1 0
-  %nop4942 = alloca i1, i1 0
-  %nop4943 = alloca i1, i1 0
-  %nop4944 = alloca i1, i1 0
-  %nop4945 = alloca i1, i1 0
-  %nop4946 = alloca i1, i1 0
-  %nop4947 = alloca i1, i1 0
-  %nop4948 = alloca i1, i1 0
-  %nop4949 = alloca i1, i1 0
-  %nop4950 = alloca i1, i1 0
-  %nop4951 = alloca i1, i1 0
-  %nop4952 = alloca i1, i1 0
-  %nop4953 = alloca i1, i1 0
-  %nop4954 = alloca i1, i1 0
-  %nop4955 = alloca i1, i1 0
-  %nop4956 = alloca i1, i1 0
-  %nop4957 = alloca i1, i1 0
-  %nop4958 = alloca i1, i1 0
-  %nop4959 = alloca i1, i1 0
-  %nop4960 = alloca i1, i1 0
-  %nop4961 = alloca i1, i1 0
-  %nop4962 = alloca i1, i1 0
-  %nop4963 = alloca i1, i1 0
-  %nop4964 = alloca i1, i1 0
-  %nop4965 = alloca i1, i1 0
-  %nop4966 = alloca i1, i1 0
-  %nop4967 = alloca i1, i1 0
-  %nop4968 = alloca i1, i1 0
-  %nop4969 = alloca i1, i1 0
-  %nop4970 = alloca i1, i1 0
-  %nop4971 = alloca i1, i1 0
-  %nop4972 = alloca i1, i1 0
-  %nop4973 = alloca i1, i1 0
-  %nop4974 = alloca i1, i1 0
-  %nop4975 = alloca i1, i1 0
-  %nop4976 = alloca i1, i1 0
-  %nop4977 = alloca i1, i1 0
-  %nop4978 = alloca i1, i1 0
-  %nop4979 = alloca i1, i1 0
-  %nop4980 = alloca i1, i1 0
-  %nop4981 = alloca i1, i1 0
-  %nop4982 = alloca i1, i1 0
-  %nop4983 = alloca i1, i1 0
-  %nop4984 = alloca i1, i1 0
-  %nop4985 = alloca i1, i1 0
-  %nop4986 = alloca i1, i1 0
-  %nop4987 = alloca i1, i1 0
-  %nop4988 = alloca i1, i1 0
-  %nop4989 = alloca i1, i1 0
-  %nop4990 = alloca i1, i1 0
-  %nop4991 = alloca i1, i1 0
-  %nop4992 = alloca i1, i1 0
-  %nop4993 = alloca i1, i1 0
-  %nop4994 = alloca i1, i1 0
-  %nop4995 = alloca i1, i1 0
-  %nop4996 = alloca i1, i1 0
-  %nop4997 = alloca i1, i1 0
-  %nop4998 = alloca i1, i1 0
-  %nop4999 = alloca i1, i1 0
-  %nop5000 = alloca i1, i1 0
-  %nop5001 = alloca i1, i1 0
-  %nop5002 = alloca i1, i1 0
-  %nop5003 = alloca i1, i1 0
-  %nop5004 = alloca i1, i1 0
-  %nop5005 = alloca i1, i1 0
-  %nop5006 = alloca i1, i1 0
-  %nop5007 = alloca i1, i1 0
-  %nop5008 = alloca i1, i1 0
-  %nop5009 = alloca i1, i1 0
-  %nop5010 = alloca i1, i1 0
-  %nop5011 = alloca i1, i1 0
-  %nop5012 = alloca i1, i1 0
-  %nop5013 = alloca i1, i1 0
-  %nop5014 = alloca i1, i1 0
-  %nop5015 = alloca i1, i1 0
-  %nop5016 = alloca i1, i1 0
-  %nop5017 = alloca i1, i1 0
-  %nop5018 = alloca i1, i1 0
-  %nop5019 = alloca i1, i1 0
-  %nop5020 = alloca i1, i1 0
-  %nop5021 = alloca i1, i1 0
-  %nop5022 = alloca i1, i1 0
-  %nop5023 = alloca i1, i1 0
-  %nop5024 = alloca i1, i1 0
-  %nop5025 = alloca i1, i1 0
-  %nop5026 = alloca i1, i1 0
-  %nop5027 = alloca i1, i1 0
-  %nop5028 = alloca i1, i1 0
-  %nop5029 = alloca i1, i1 0
-  %nop5030 = alloca i1, i1 0
-  %nop5031 = alloca i1, i1 0
-  %nop5032 = alloca i1, i1 0
-  %nop5033 = alloca i1, i1 0
-  %nop5034 = alloca i1, i1 0
-  %nop5035 = alloca i1, i1 0
-  %nop5036 = alloca i1, i1 0
-  %nop5037 = alloca i1, i1 0
-  %nop5038 = alloca i1, i1 0
-  %nop5039 = alloca i1, i1 0
-  %nop5040 = alloca i1, i1 0
-  %nop5041 = alloca i1, i1 0
-  %nop5042 = alloca i1, i1 0
-  %nop5043 = alloca i1, i1 0
-  %nop5044 = alloca i1, i1 0
-  %nop5045 = alloca i1, i1 0
-  %nop5046 = alloca i1, i1 0
-  %nop5047 = alloca i1, i1 0
-  %nop5048 = alloca i1, i1 0
-  %nop5049 = alloca i1, i1 0
-  %nop5050 = alloca i1, i1 0
-  %nop5051 = alloca i1, i1 0
-  %nop5052 = alloca i1, i1 0
-  %nop5053 = alloca i1, i1 0
-  %nop5054 = alloca i1, i1 0
-  %nop5055 = alloca i1, i1 0
-  %nop5056 = alloca i1, i1 0
-  %nop5057 = alloca i1, i1 0
-  %nop5058 = alloca i1, i1 0
-  %nop5059 = alloca i1, i1 0
-  %nop5060 = alloca i1, i1 0
-  %nop5061 = alloca i1, i1 0
-  %nop5062 = alloca i1, i1 0
-  %nop5063 = alloca i1, i1 0
-  %nop5064 = alloca i1, i1 0
-  %nop5065 = alloca i1, i1 0
-  %nop5066 = alloca i1, i1 0
-  %nop5067 = alloca i1, i1 0
-  %nop5068 = alloca i1, i1 0
-  %nop5069 = alloca i1, i1 0
-  %nop5070 = alloca i1, i1 0
-  %nop5071 = alloca i1, i1 0
-  %nop5072 = alloca i1, i1 0
-  %nop5073 = alloca i1, i1 0
-  %nop5074 = alloca i1, i1 0
-  %nop5075 = alloca i1, i1 0
-  %nop5076 = alloca i1, i1 0
-  %nop5077 = alloca i1, i1 0
-  %nop5078 = alloca i1, i1 0
-  %nop5079 = alloca i1, i1 0
-  %nop5080 = alloca i1, i1 0
-  %nop5081 = alloca i1, i1 0
-  %nop5082 = alloca i1, i1 0
-  %nop5083 = alloca i1, i1 0
-  %nop5084 = alloca i1, i1 0
-  %nop5085 = alloca i1, i1 0
-  %nop5086 = alloca i1, i1 0
-  %nop5087 = alloca i1, i1 0
-  %nop5088 = alloca i1, i1 0
-  %nop5089 = alloca i1, i1 0
-  %nop5090 = alloca i1, i1 0
-  %nop5091 = alloca i1, i1 0
-  %nop5092 = alloca i1, i1 0
-  %nop5093 = alloca i1, i1 0
-  %nop5094 = alloca i1, i1 0
-  %nop5095 = alloca i1, i1 0
-  %nop5096 = alloca i1, i1 0
-  %nop5097 = alloca i1, i1 0
-  %nop5098 = alloca i1, i1 0
-  %nop5099 = alloca i1, i1 0
-  %nop5100 = alloca i1, i1 0
-  %nop5101 = alloca i1, i1 0
-  %nop5102 = alloca i1, i1 0
-  %nop5103 = alloca i1, i1 0
-  %nop5104 = alloca i1, i1 0
-  %nop5105 = alloca i1, i1 0
-  %nop5106 = alloca i1, i1 0
-  %nop5107 = alloca i1, i1 0
-  %nop5108 = alloca i1, i1 0
-  %nop5109 = alloca i1, i1 0
-  %nop5110 = alloca i1, i1 0
-  %nop5111 = alloca i1, i1 0
-  %nop5112 = alloca i1, i1 0
-  %nop5113 = alloca i1, i1 0
-  %nop5114 = alloca i1, i1 0
-  %nop5115 = alloca i1, i1 0
-  %nop5116 = alloca i1, i1 0
-  %nop5117 = alloca i1, i1 0
-  %nop5118 = alloca i1, i1 0
-  %nop5119 = alloca i1, i1 0
-  %nop5120 = alloca i1, i1 0
-  %nop5121 = alloca i1, i1 0
-  %nop5122 = alloca i1, i1 0
-  %nop5123 = alloca i1, i1 0
-  %nop5124 = alloca i1, i1 0
-  %nop5125 = alloca i1, i1 0
-  %nop5126 = alloca i1, i1 0
-  %nop5127 = alloca i1, i1 0
-  %nop5128 = alloca i1, i1 0
-  %nop5129 = alloca i1, i1 0
-  %nop5130 = alloca i1, i1 0
-  %nop5131 = alloca i1, i1 0
-  %nop5132 = alloca i1, i1 0
-  %nop5133 = alloca i1, i1 0
-  %nop5134 = alloca i1, i1 0
-  %nop5135 = alloca i1, i1 0
-  %nop5136 = alloca i1, i1 0
-  %nop5137 = alloca i1, i1 0
-  %nop5138 = alloca i1, i1 0
-  %nop5139 = alloca i1, i1 0
-  %nop5140 = alloca i1, i1 0
-  %nop5141 = alloca i1, i1 0
-  %nop5142 = alloca i1, i1 0
-  %nop5143 = alloca i1, i1 0
-  %nop5144 = alloca i1, i1 0
-  %nop5145 = alloca i1, i1 0
-  %nop5146 = alloca i1, i1 0
-  %nop5147 = alloca i1, i1 0
-  %nop5148 = alloca i1, i1 0
-  %nop5149 = alloca i1, i1 0
-  %nop5150 = alloca i1, i1 0
-  %nop5151 = alloca i1, i1 0
-  %nop5152 = alloca i1, i1 0
-  %nop5153 = alloca i1, i1 0
-  %nop5154 = alloca i1, i1 0
-  %nop5155 = alloca i1, i1 0
-  %nop5156 = alloca i1, i1 0
-  %nop5157 = alloca i1, i1 0
-  %nop5158 = alloca i1, i1 0
-  %nop5159 = alloca i1, i1 0
-  %nop5160 = alloca i1, i1 0
-  %nop5161 = alloca i1, i1 0
-  %nop5162 = alloca i1, i1 0
-  %nop5163 = alloca i1, i1 0
-  %nop5164 = alloca i1, i1 0
-  %nop5165 = alloca i1, i1 0
-  %nop5166 = alloca i1, i1 0
-  %nop5167 = alloca i1, i1 0
-  %nop5168 = alloca i1, i1 0
-  %nop5169 = alloca i1, i1 0
-  %nop5170 = alloca i1, i1 0
-  %nop5171 = alloca i1, i1 0
-  %nop5172 = alloca i1, i1 0
-  %nop5173 = alloca i1, i1 0
-  %nop5174 = alloca i1, i1 0
-  %nop5175 = alloca i1, i1 0
-  %nop5176 = alloca i1, i1 0
-  %nop5177 = alloca i1, i1 0
-  %nop5178 = alloca i1, i1 0
-  %nop5179 = alloca i1, i1 0
-  %nop5180 = alloca i1, i1 0
-  %nop5181 = alloca i1, i1 0
-  %nop5182 = alloca i1, i1 0
-  %nop5183 = alloca i1, i1 0
-  %nop5184 = alloca i1, i1 0
-  %nop5185 = alloca i1, i1 0
-  %nop5186 = alloca i1, i1 0
-  %nop5187 = alloca i1, i1 0
-  %nop5188 = alloca i1, i1 0
-  %nop5189 = alloca i1, i1 0
-  %nop5190 = alloca i1, i1 0
-  %nop5191 = alloca i1, i1 0
-  %nop5192 = alloca i1, i1 0
-  %nop5193 = alloca i1, i1 0
-  %nop5194 = alloca i1, i1 0
-  %nop5195 = alloca i1, i1 0
-  %nop5196 = alloca i1, i1 0
-  %nop5197 = alloca i1, i1 0
-  %nop5198 = alloca i1, i1 0
-  %nop5199 = alloca i1, i1 0
-  %nop5200 = alloca i1, i1 0
-  %nop5201 = alloca i1, i1 0
-  %nop5202 = alloca i1, i1 0
-  %nop5203 = alloca i1, i1 0
-  %nop5204 = alloca i1, i1 0
-  %nop5205 = alloca i1, i1 0
-  %nop5206 = alloca i1, i1 0
-  %nop5207 = alloca i1, i1 0
-  %nop5208 = alloca i1, i1 0
-  %nop5209 = alloca i1, i1 0
-  %nop5210 = alloca i1, i1 0
-  %nop5211 = alloca i1, i1 0
-  %nop5212 = alloca i1, i1 0
-  %nop5213 = alloca i1, i1 0
-  %nop5214 = alloca i1, i1 0
-  %nop5215 = alloca i1, i1 0
-  %nop5216 = alloca i1, i1 0
-  %nop5217 = alloca i1, i1 0
-  %nop5218 = alloca i1, i1 0
-  %nop5219 = alloca i1, i1 0
-  %nop5220 = alloca i1, i1 0
-  %nop5221 = alloca i1, i1 0
-  %nop5222 = alloca i1, i1 0
-  %nop5223 = alloca i1, i1 0
-  %nop5224 = alloca i1, i1 0
-  %nop5225 = alloca i1, i1 0
-  %nop5226 = alloca i1, i1 0
-  %nop5227 = alloca i1, i1 0
-  %nop5228 = alloca i1, i1 0
-  %nop5229 = alloca i1, i1 0
-  %nop5230 = alloca i1, i1 0
-  %nop5231 = alloca i1, i1 0
-  %nop5232 = alloca i1, i1 0
-  %nop5233 = alloca i1, i1 0
-  %nop5234 = alloca i1, i1 0
-  %nop5235 = alloca i1, i1 0
-  %nop5236 = alloca i1, i1 0
-  %nop5237 = alloca i1, i1 0
-  %nop5238 = alloca i1, i1 0
-  %nop5239 = alloca i1, i1 0
-  %nop5240 = alloca i1, i1 0
-  %nop5241 = alloca i1, i1 0
-  %nop5242 = alloca i1, i1 0
-  %nop5243 = alloca i1, i1 0
-  %nop5244 = alloca i1, i1 0
-  %nop5245 = alloca i1, i1 0
-  %nop5246 = alloca i1, i1 0
-  %nop5247 = alloca i1, i1 0
-  %nop5248 = alloca i1, i1 0
-  %nop5249 = alloca i1, i1 0
-  %nop5250 = alloca i1, i1 0
-  %nop5251 = alloca i1, i1 0
-  %nop5252 = alloca i1, i1 0
-  %nop5253 = alloca i1, i1 0
-  %nop5254 = alloca i1, i1 0
-  %nop5255 = alloca i1, i1 0
-  %nop5256 = alloca i1, i1 0
-  %nop5257 = alloca i1, i1 0
-  %nop5258 = alloca i1, i1 0
-  %nop5259 = alloca i1, i1 0
-  %nop5260 = alloca i1, i1 0
-  %nop5261 = alloca i1, i1 0
-  %nop5262 = alloca i1, i1 0
-  %nop5263 = alloca i1, i1 0
-  %nop5264 = alloca i1, i1 0
-  %nop5265 = alloca i1, i1 0
-  %nop5266 = alloca i1, i1 0
-  %nop5267 = alloca i1, i1 0
-  %nop5268 = alloca i1, i1 0
-  %nop5269 = alloca i1, i1 0
-  %nop5270 = alloca i1, i1 0
-  %nop5271 = alloca i1, i1 0
-  %nop5272 = alloca i1, i1 0
-  %nop5273 = alloca i1, i1 0
-  %nop5274 = alloca i1, i1 0
-  %nop5275 = alloca i1, i1 0
-  %nop5276 = alloca i1, i1 0
-  %nop5277 = alloca i1, i1 0
-  %nop5278 = alloca i1, i1 0
-  %nop5279 = alloca i1, i1 0
-  %nop5280 = alloca i1, i1 0
-  %nop5281 = alloca i1, i1 0
-  %nop5282 = alloca i1, i1 0
-  %nop5283 = alloca i1, i1 0
-  %nop5284 = alloca i1, i1 0
-  %nop5285 = alloca i1, i1 0
-  %nop5286 = alloca i1, i1 0
-  %nop5287 = alloca i1, i1 0
-  %nop5288 = alloca i1, i1 0
-  %nop5289 = alloca i1, i1 0
-  %nop5290 = alloca i1, i1 0
-  %nop5291 = alloca i1, i1 0
-  %nop5292 = alloca i1, i1 0
-  %nop5293 = alloca i1, i1 0
-  %nop5294 = alloca i1, i1 0
-  %nop5295 = alloca i1, i1 0
-  %nop5296 = alloca i1, i1 0
-  %nop5297 = alloca i1, i1 0
-  %nop5298 = alloca i1, i1 0
-  %nop5299 = alloca i1, i1 0
-  %nop5300 = alloca i1, i1 0
-  %nop5301 = alloca i1, i1 0
-  %nop5302 = alloca i1, i1 0
-  %nop5303 = alloca i1, i1 0
-  %nop5304 = alloca i1, i1 0
-  %nop5305 = alloca i1, i1 0
-  %nop5306 = alloca i1, i1 0
-  %nop5307 = alloca i1, i1 0
-  %nop5308 = alloca i1, i1 0
-  %nop5309 = alloca i1, i1 0
-  %nop5310 = alloca i1, i1 0
-  %nop5311 = alloca i1, i1 0
-  %nop5312 = alloca i1, i1 0
-  %nop5313 = alloca i1, i1 0
-  %nop5314 = alloca i1, i1 0
-  %nop5315 = alloca i1, i1 0
-  %nop5316 = alloca i1, i1 0
-  %nop5317 = alloca i1, i1 0
-  %nop5318 = alloca i1, i1 0
-  %nop5319 = alloca i1, i1 0
-  %nop5320 = alloca i1, i1 0
-  %nop5321 = alloca i1, i1 0
-  %nop5322 = alloca i1, i1 0
-  %nop5323 = alloca i1, i1 0
-  %nop5324 = alloca i1, i1 0
-  %nop5325 = alloca i1, i1 0
-  %nop5326 = alloca i1, i1 0
-  %nop5327 = alloca i1, i1 0
-  %nop5328 = alloca i1, i1 0
-  %nop5329 = alloca i1, i1 0
-  %nop5330 = alloca i1, i1 0
-  %nop5331 = alloca i1, i1 0
-  %nop5332 = alloca i1, i1 0
-  %nop5333 = alloca i1, i1 0
-  %nop5334 = alloca i1, i1 0
-  %nop5335 = alloca i1, i1 0
-  %nop5336 = alloca i1, i1 0
-  %nop5337 = alloca i1, i1 0
-  %nop5338 = alloca i1, i1 0
-  %nop5339 = alloca i1, i1 0
-  %nop5340 = alloca i1, i1 0
-  %nop5341 = alloca i1, i1 0
-  %nop5342 = alloca i1, i1 0
-  %nop5343 = alloca i1, i1 0
-  %nop5344 = alloca i1, i1 0
-  %nop5345 = alloca i1, i1 0
-  %nop5346 = alloca i1, i1 0
-  %nop5347 = alloca i1, i1 0
-  %nop5348 = alloca i1, i1 0
-  %nop5349 = alloca i1, i1 0
-  %nop5350 = alloca i1, i1 0
-  %nop5351 = alloca i1, i1 0
-  %nop5352 = alloca i1, i1 0
-  %nop5353 = alloca i1, i1 0
-  %nop5354 = alloca i1, i1 0
-  %nop5355 = alloca i1, i1 0
-  %nop5356 = alloca i1, i1 0
-  %nop5357 = alloca i1, i1 0
-  %nop5358 = alloca i1, i1 0
-  %nop5359 = alloca i1, i1 0
-  %nop5360 = alloca i1, i1 0
-  %nop5361 = alloca i1, i1 0
-  %nop5362 = alloca i1, i1 0
-  %nop5363 = alloca i1, i1 0
-  %nop5364 = alloca i1, i1 0
-  %nop5365 = alloca i1, i1 0
-  %nop5366 = alloca i1, i1 0
-  %nop5367 = alloca i1, i1 0
-  %nop5368 = alloca i1, i1 0
-  %nop5369 = alloca i1, i1 0
-  %nop5370 = alloca i1, i1 0
-  %nop5371 = alloca i1, i1 0
-  %nop5372 = alloca i1, i1 0
-  %nop5373 = alloca i1, i1 0
-  %nop5374 = alloca i1, i1 0
-  %nop5375 = alloca i1, i1 0
-  %nop5376 = alloca i1, i1 0
-  %nop5377 = alloca i1, i1 0
-  %nop5378 = alloca i1, i1 0
-  %nop5379 = alloca i1, i1 0
-  %nop5380 = alloca i1, i1 0
-  %nop5381 = alloca i1, i1 0
-  %nop5382 = alloca i1, i1 0
-  %nop5383 = alloca i1, i1 0
-  %nop5384 = alloca i1, i1 0
-  %nop5385 = alloca i1, i1 0
-  %nop5386 = alloca i1, i1 0
-  %nop5387 = alloca i1, i1 0
-  %nop5388 = alloca i1, i1 0
-  %nop5389 = alloca i1, i1 0
-  %nop5390 = alloca i1, i1 0
-  %nop5391 = alloca i1, i1 0
-  %nop5392 = alloca i1, i1 0
-  %nop5393 = alloca i1, i1 0
-  %nop5394 = alloca i1, i1 0
-  %nop5395 = alloca i1, i1 0
-  %nop5396 = alloca i1, i1 0
-  %nop5397 = alloca i1, i1 0
-  %nop5398 = alloca i1, i1 0
-  %nop5399 = alloca i1, i1 0
-  %nop5400 = alloca i1, i1 0
-  %nop5401 = alloca i1, i1 0
-  %nop5402 = alloca i1, i1 0
-  %nop5403 = alloca i1, i1 0
-  %nop5404 = alloca i1, i1 0
-  %nop5405 = alloca i1, i1 0
-  %nop5406 = alloca i1, i1 0
-  %nop5407 = alloca i1, i1 0
-  %nop5408 = alloca i1, i1 0
-  %nop5409 = alloca i1, i1 0
-  %nop5410 = alloca i1, i1 0
-  %nop5411 = alloca i1, i1 0
-  %nop5412 = alloca i1, i1 0
-  %nop5413 = alloca i1, i1 0
-  %nop5414 = alloca i1, i1 0
-  %nop5415 = alloca i1, i1 0
-  %nop5416 = alloca i1, i1 0
-  %nop5417 = alloca i1, i1 0
-  %nop5418 = alloca i1, i1 0
-  %nop5419 = alloca i1, i1 0
-  %nop5420 = alloca i1, i1 0
-  %nop5421 = alloca i1, i1 0
-  %nop5422 = alloca i1, i1 0
-  %nop5423 = alloca i1, i1 0
-  %nop5424 = alloca i1, i1 0
-  %nop5425 = alloca i1, i1 0
-  %nop5426 = alloca i1, i1 0
-  %nop5427 = alloca i1, i1 0
-  %nop5428 = alloca i1, i1 0
-  %nop5429 = alloca i1, i1 0
-  %nop5430 = alloca i1, i1 0
-  %nop5431 = alloca i1, i1 0
-  %nop5432 = alloca i1, i1 0
-  %nop5433 = alloca i1, i1 0
-  %nop5434 = alloca i1, i1 0
-  %nop5435 = alloca i1, i1 0
-  %nop5436 = alloca i1, i1 0
-  %nop5437 = alloca i1, i1 0
-  %nop5438 = alloca i1, i1 0
-  %nop5439 = alloca i1, i1 0
-  %nop5440 = alloca i1, i1 0
-  %nop5441 = alloca i1, i1 0
-  %nop5442 = alloca i1, i1 0
-  %nop5443 = alloca i1, i1 0
-  %nop5444 = alloca i1, i1 0
-  %nop5445 = alloca i1, i1 0
-  %nop5446 = alloca i1, i1 0
-  %nop5447 = alloca i1, i1 0
-  %nop5448 = alloca i1, i1 0
-  %nop5449 = alloca i1, i1 0
-  %nop5450 = alloca i1, i1 0
-  %nop5451 = alloca i1, i1 0
-  %nop5452 = alloca i1, i1 0
-  %nop5453 = alloca i1, i1 0
-  %nop5454 = alloca i1, i1 0
-  %nop5455 = alloca i1, i1 0
-  %nop5456 = alloca i1, i1 0
-  %nop5457 = alloca i1, i1 0
-  %nop5458 = alloca i1, i1 0
-  %nop5459 = alloca i1, i1 0
-  %nop5460 = alloca i1, i1 0
-  %nop5461 = alloca i1, i1 0
-  %nop5462 = alloca i1, i1 0
-  %nop5463 = alloca i1, i1 0
-  %nop5464 = alloca i1, i1 0
-  %nop5465 = alloca i1, i1 0
-  %nop5466 = alloca i1, i1 0
-  %nop5467 = alloca i1, i1 0
-  %nop5468 = alloca i1, i1 0
-  %nop5469 = alloca i1, i1 0
-  %nop5470 = alloca i1, i1 0
-  %nop5471 = alloca i1, i1 0
-  %nop5472 = alloca i1, i1 0
-  %nop5473 = alloca i1, i1 0
-  %nop5474 = alloca i1, i1 0
-  %nop5475 = alloca i1, i1 0
-  %nop5476 = alloca i1, i1 0
-  %nop5477 = alloca i1, i1 0
-  %nop5478 = alloca i1, i1 0
-  %nop5479 = alloca i1, i1 0
-  %nop5480 = alloca i1, i1 0
-  %nop5481 = alloca i1, i1 0
-  %nop5482 = alloca i1, i1 0
-  %nop5483 = alloca i1, i1 0
-  %nop5484 = alloca i1, i1 0
-  %nop5485 = alloca i1, i1 0
-  %nop5486 = alloca i1, i1 0
-  %nop5487 = alloca i1, i1 0
-  %nop5488 = alloca i1, i1 0
-  %nop5489 = alloca i1, i1 0
-  %nop5490 = alloca i1, i1 0
-  %nop5491 = alloca i1, i1 0
-  %nop5492 = alloca i1, i1 0
-  %nop5493 = alloca i1, i1 0
-  %nop5494 = alloca i1, i1 0
-  %nop5495 = alloca i1, i1 0
-  %nop5496 = alloca i1, i1 0
-  %nop5497 = alloca i1, i1 0
-  %nop5498 = alloca i1, i1 0
-  %nop5499 = alloca i1, i1 0
-  %nop5500 = alloca i1, i1 0
-  %nop5501 = alloca i1, i1 0
-  %nop5502 = alloca i1, i1 0
-  %nop5503 = alloca i1, i1 0
-  %nop5504 = alloca i1, i1 0
-  %nop5505 = alloca i1, i1 0
-  %nop5506 = alloca i1, i1 0
-  %nop5507 = alloca i1, i1 0
-  %nop5508 = alloca i1, i1 0
-  %nop5509 = alloca i1, i1 0
-  %nop5510 = alloca i1, i1 0
-  %nop5511 = alloca i1, i1 0
-  %nop5512 = alloca i1, i1 0
-  %nop5513 = alloca i1, i1 0
-  %nop5514 = alloca i1, i1 0
-  %nop5515 = alloca i1, i1 0
-  %nop5516 = alloca i1, i1 0
-  %nop5517 = alloca i1, i1 0
-  %nop5518 = alloca i1, i1 0
-  %nop5519 = alloca i1, i1 0
-  %nop5520 = alloca i1, i1 0
-  %nop5521 = alloca i1, i1 0
-  %nop5522 = alloca i1, i1 0
-  %nop5523 = alloca i1, i1 0
-  %nop5524 = alloca i1, i1 0
-  %nop5525 = alloca i1, i1 0
-  %nop5526 = alloca i1, i1 0
-  %nop5527 = alloca i1, i1 0
-  %nop5528 = alloca i1, i1 0
-  %nop5529 = alloca i1, i1 0
-  %nop5530 = alloca i1, i1 0
-  %nop5531 = alloca i1, i1 0
-  %nop5532 = alloca i1, i1 0
-  %nop5533 = alloca i1, i1 0
-  %nop5534 = alloca i1, i1 0
-  %nop5535 = alloca i1, i1 0
-  %nop5536 = alloca i1, i1 0
-  %nop5537 = alloca i1, i1 0
-  %nop5538 = alloca i1, i1 0
-  %nop5539 = alloca i1, i1 0
-  %nop5540 = alloca i1, i1 0
-  %nop5541 = alloca i1, i1 0
-  %nop5542 = alloca i1, i1 0
-  %nop5543 = alloca i1, i1 0
-  %nop5544 = alloca i1, i1 0
-  %nop5545 = alloca i1, i1 0
-  %nop5546 = alloca i1, i1 0
-  %nop5547 = alloca i1, i1 0
-  %nop5548 = alloca i1, i1 0
-  %nop5549 = alloca i1, i1 0
-  %nop5550 = alloca i1, i1 0
-  %nop5551 = alloca i1, i1 0
-  %nop5552 = alloca i1, i1 0
-  %nop5553 = alloca i1, i1 0
-  %nop5554 = alloca i1, i1 0
-  %nop5555 = alloca i1, i1 0
-  %nop5556 = alloca i1, i1 0
-  %nop5557 = alloca i1, i1 0
-  %nop5558 = alloca i1, i1 0
-  %nop5559 = alloca i1, i1 0
-  %nop5560 = alloca i1, i1 0
-  %nop5561 = alloca i1, i1 0
-  %nop5562 = alloca i1, i1 0
-  %nop5563 = alloca i1, i1 0
-  %nop5564 = alloca i1, i1 0
-  %nop5565 = alloca i1, i1 0
-  %nop5566 = alloca i1, i1 0
-  %nop5567 = alloca i1, i1 0
-  %nop5568 = alloca i1, i1 0
-  %nop5569 = alloca i1, i1 0
-  %nop5570 = alloca i1, i1 0
-  %nop5571 = alloca i1, i1 0
-  %nop5572 = alloca i1, i1 0
-  %nop5573 = alloca i1, i1 0
-  %nop5574 = alloca i1, i1 0
-  %nop5575 = alloca i1, i1 0
-  %nop5576 = alloca i1, i1 0
-  %nop5577 = alloca i1, i1 0
-  %nop5578 = alloca i1, i1 0
-  %nop5579 = alloca i1, i1 0
-  %nop5580 = alloca i1, i1 0
-  %nop5581 = alloca i1, i1 0
-  %nop5582 = alloca i1, i1 0
-  %nop5583 = alloca i1, i1 0
-  %nop5584 = alloca i1, i1 0
-  %nop5585 = alloca i1, i1 0
-  %nop5586 = alloca i1, i1 0
-  %nop5587 = alloca i1, i1 0
-  %nop5588 = alloca i1, i1 0
-  %nop5589 = alloca i1, i1 0
-  %nop5590 = alloca i1, i1 0
-  %nop5591 = alloca i1, i1 0
-  %nop5592 = alloca i1, i1 0
-  %nop5593 = alloca i1, i1 0
-  %nop5594 = alloca i1, i1 0
-  %nop5595 = alloca i1, i1 0
-  %nop5596 = alloca i1, i1 0
-  %nop5597 = alloca i1, i1 0
-  %nop5598 = alloca i1, i1 0
-  %nop5599 = alloca i1, i1 0
-  %nop5600 = alloca i1, i1 0
-  %nop5601 = alloca i1, i1 0
-  %nop5602 = alloca i1, i1 0
-  %nop5603 = alloca i1, i1 0
-  %nop5604 = alloca i1, i1 0
-  %nop5605 = alloca i1, i1 0
-  %nop5606 = alloca i1, i1 0
-  %nop5607 = alloca i1, i1 0
-  %nop5608 = alloca i1, i1 0
-  %nop5609 = alloca i1, i1 0
-  %nop5610 = alloca i1, i1 0
-  %nop5611 = alloca i1, i1 0
-  %nop5612 = alloca i1, i1 0
-  %nop5613 = alloca i1, i1 0
-  %nop5614 = alloca i1, i1 0
-  %nop5615 = alloca i1, i1 0
-  %nop5616 = alloca i1, i1 0
-  %nop5617 = alloca i1, i1 0
-  %nop5618 = alloca i1, i1 0
-  %nop5619 = alloca i1, i1 0
-  %nop5620 = alloca i1, i1 0
-  %nop5621 = alloca i1, i1 0
-  %nop5622 = alloca i1, i1 0
-  %nop5623 = alloca i1, i1 0
-  %nop5624 = alloca i1, i1 0
-  %nop5625 = alloca i1, i1 0
-  %nop5626 = alloca i1, i1 0
-  %nop5627 = alloca i1, i1 0
-  %nop5628 = alloca i1, i1 0
-  %nop5629 = alloca i1, i1 0
-  %nop5630 = alloca i1, i1 0
-  %nop5631 = alloca i1, i1 0
-  %nop5632 = alloca i1, i1 0
-  %nop5633 = alloca i1, i1 0
-  %nop5634 = alloca i1, i1 0
-  %nop5635 = alloca i1, i1 0
-  %nop5636 = alloca i1, i1 0
-  %nop5637 = alloca i1, i1 0
-  %nop5638 = alloca i1, i1 0
-  %nop5639 = alloca i1, i1 0
-  %nop5640 = alloca i1, i1 0
-  %nop5641 = alloca i1, i1 0
-  %nop5642 = alloca i1, i1 0
-  %nop5643 = alloca i1, i1 0
-  %nop5644 = alloca i1, i1 0
-  %nop5645 = alloca i1, i1 0
-  %nop5646 = alloca i1, i1 0
-  %nop5647 = alloca i1, i1 0
-  %nop5648 = alloca i1, i1 0
-  %nop5649 = alloca i1, i1 0
-  %nop5650 = alloca i1, i1 0
-  %nop5651 = alloca i1, i1 0
-  %nop5652 = alloca i1, i1 0
-  %nop5653 = alloca i1, i1 0
-  %nop5654 = alloca i1, i1 0
-  %nop5655 = alloca i1, i1 0
-  %nop5656 = alloca i1, i1 0
-  %nop5657 = alloca i1, i1 0
-  %nop5658 = alloca i1, i1 0
-  %nop5659 = alloca i1, i1 0
-  %nop5660 = alloca i1, i1 0
-  %nop5661 = alloca i1, i1 0
-  %nop5662 = alloca i1, i1 0
-  %nop5663 = alloca i1, i1 0
-  %nop5664 = alloca i1, i1 0
-  %nop5665 = alloca i1, i1 0
-  %nop5666 = alloca i1, i1 0
-  %nop5667 = alloca i1, i1 0
-  %nop5668 = alloca i1, i1 0
-  %nop5669 = alloca i1, i1 0
-  %nop5670 = alloca i1, i1 0
-  %nop5671 = alloca i1, i1 0
-  %nop5672 = alloca i1, i1 0
-  %nop5673 = alloca i1, i1 0
-  %nop5674 = alloca i1, i1 0
-  %nop5675 = alloca i1, i1 0
-  %nop5676 = alloca i1, i1 0
-  %nop5677 = alloca i1, i1 0
-  %nop5678 = alloca i1, i1 0
-  %nop5679 = alloca i1, i1 0
-  %nop5680 = alloca i1, i1 0
-  %nop5681 = alloca i1, i1 0
-  %nop5682 = alloca i1, i1 0
-  %nop5683 = alloca i1, i1 0
-  %nop5684 = alloca i1, i1 0
-  %nop5685 = alloca i1, i1 0
-  %nop5686 = alloca i1, i1 0
-  %nop5687 = alloca i1, i1 0
-  %nop5688 = alloca i1, i1 0
-  %nop5689 = alloca i1, i1 0
-  %nop5690 = alloca i1, i1 0
-  %nop5691 = alloca i1, i1 0
-  %nop5692 = alloca i1, i1 0
-  %nop5693 = alloca i1, i1 0
-  %nop5694 = alloca i1, i1 0
-  %nop5695 = alloca i1, i1 0
-  %nop5696 = alloca i1, i1 0
-  %nop5697 = alloca i1, i1 0
-  %nop5698 = alloca i1, i1 0
-  %nop5699 = alloca i1, i1 0
-  %nop5700 = alloca i1, i1 0
-  %nop5701 = alloca i1, i1 0
-  %nop5702 = alloca i1, i1 0
-  %nop5703 = alloca i1, i1 0
-  %nop5704 = alloca i1, i1 0
-  %nop5705 = alloca i1, i1 0
-  %nop5706 = alloca i1, i1 0
-  %nop5707 = alloca i1, i1 0
-  %nop5708 = alloca i1, i1 0
-  %nop5709 = alloca i1, i1 0
-  %nop5710 = alloca i1, i1 0
-  %nop5711 = alloca i1, i1 0
-  %nop5712 = alloca i1, i1 0
-  %nop5713 = alloca i1, i1 0
-  %nop5714 = alloca i1, i1 0
-  %nop5715 = alloca i1, i1 0
-  %nop5716 = alloca i1, i1 0
-  %nop5717 = alloca i1, i1 0
-  %nop5718 = alloca i1, i1 0
-  %nop5719 = alloca i1, i1 0
-  %nop5720 = alloca i1, i1 0
-  %nop5721 = alloca i1, i1 0
-  %nop5722 = alloca i1, i1 0
-  %nop5723 = alloca i1, i1 0
-  %nop5724 = alloca i1, i1 0
-  %nop5725 = alloca i1, i1 0
-  %nop5726 = alloca i1, i1 0
-  %nop5727 = alloca i1, i1 0
-  %nop5728 = alloca i1, i1 0
-  %nop5729 = alloca i1, i1 0
-  %nop5730 = alloca i1, i1 0
-  %nop5731 = alloca i1, i1 0
-  %nop5732 = alloca i1, i1 0
-  %nop5733 = alloca i1, i1 0
-  %nop5734 = alloca i1, i1 0
-  %nop5735 = alloca i1, i1 0
-  %nop5736 = alloca i1, i1 0
-  %nop5737 = alloca i1, i1 0
-  %nop5738 = alloca i1, i1 0
-  %nop5739 = alloca i1, i1 0
-  %nop5740 = alloca i1, i1 0
-  %nop5741 = alloca i1, i1 0
-  %nop5742 = alloca i1, i1 0
-  %nop5743 = alloca i1, i1 0
-  %nop5744 = alloca i1, i1 0
-  %nop5745 = alloca i1, i1 0
-  %nop5746 = alloca i1, i1 0
-  %nop5747 = alloca i1, i1 0
-  %nop5748 = alloca i1, i1 0
-  %nop5749 = alloca i1, i1 0
-  %nop5750 = alloca i1, i1 0
-  %nop5751 = alloca i1, i1 0
-  %nop5752 = alloca i1, i1 0
-  %nop5753 = alloca i1, i1 0
-  %nop5754 = alloca i1, i1 0
-  %nop5755 = alloca i1, i1 0
-  %nop5756 = alloca i1, i1 0
-  %nop5757 = alloca i1, i1 0
-  %nop5758 = alloca i1, i1 0
-  %nop5759 = alloca i1, i1 0
-  %nop5760 = alloca i1, i1 0
-  %nop5761 = alloca i1, i1 0
-  %nop5762 = alloca i1, i1 0
-  %nop5763 = alloca i1, i1 0
-  %nop5764 = alloca i1, i1 0
-  %nop5765 = alloca i1, i1 0
-  %nop5766 = alloca i1, i1 0
-  %nop5767 = alloca i1, i1 0
-  %nop5768 = alloca i1, i1 0
-  %nop5769 = alloca i1, i1 0
-  %nop5770 = alloca i1, i1 0
-  %nop5771 = alloca i1, i1 0
-  %nop5772 = alloca i1, i1 0
-  %nop5773 = alloca i1, i1 0
-  %nop5774 = alloca i1, i1 0
-  %nop5775 = alloca i1, i1 0
-  %nop5776 = alloca i1, i1 0
-  %nop5777 = alloca i1, i1 0
-  %nop5778 = alloca i1, i1 0
-  %nop5779 = alloca i1, i1 0
-  %nop5780 = alloca i1, i1 0
-  %nop5781 = alloca i1, i1 0
-  %nop5782 = alloca i1, i1 0
-  %nop5783 = alloca i1, i1 0
-  %nop5784 = alloca i1, i1 0
-  %nop5785 = alloca i1, i1 0
-  %nop5786 = alloca i1, i1 0
-  %nop5787 = alloca i1, i1 0
-  %nop5788 = alloca i1, i1 0
-  %nop5789 = alloca i1, i1 0
-  %nop5790 = alloca i1, i1 0
-  %nop5791 = alloca i1, i1 0
-  %nop5792 = alloca i1, i1 0
-  %nop5793 = alloca i1, i1 0
-  %nop5794 = alloca i1, i1 0
-  %nop5795 = alloca i1, i1 0
-  %nop5796 = alloca i1, i1 0
-  %nop5797 = alloca i1, i1 0
-  %nop5798 = alloca i1, i1 0
-  %nop5799 = alloca i1, i1 0
-  %nop5800 = alloca i1, i1 0
-  %nop5801 = alloca i1, i1 0
-  %nop5802 = alloca i1, i1 0
-  %nop5803 = alloca i1, i1 0
-  %nop5804 = alloca i1, i1 0
-  %nop5805 = alloca i1, i1 0
-  %nop5806 = alloca i1, i1 0
-  %nop5807 = alloca i1, i1 0
-  %nop5808 = alloca i1, i1 0
-  %nop5809 = alloca i1, i1 0
-  %nop5810 = alloca i1, i1 0
-  %nop5811 = alloca i1, i1 0
-  %nop5812 = alloca i1, i1 0
-  %nop5813 = alloca i1, i1 0
-  %nop5814 = alloca i1, i1 0
-  %nop5815 = alloca i1, i1 0
-  %nop5816 = alloca i1, i1 0
-  %nop5817 = alloca i1, i1 0
-  %nop5818 = alloca i1, i1 0
-  %nop5819 = alloca i1, i1 0
-  %nop5820 = alloca i1, i1 0
-  %nop5821 = alloca i1, i1 0
-  %nop5822 = alloca i1, i1 0
-  %nop5823 = alloca i1, i1 0
-  %nop5824 = alloca i1, i1 0
-  %nop5825 = alloca i1, i1 0
-  %nop5826 = alloca i1, i1 0
-  %nop5827 = alloca i1, i1 0
-  %nop5828 = alloca i1, i1 0
-  %nop5829 = alloca i1, i1 0
-  %nop5830 = alloca i1, i1 0
-  %nop5831 = alloca i1, i1 0
-  %nop5832 = alloca i1, i1 0
-  %nop5833 = alloca i1, i1 0
-  %nop5834 = alloca i1, i1 0
-  %nop5835 = alloca i1, i1 0
-  %nop5836 = alloca i1, i1 0
-  %nop5837 = alloca i1, i1 0
-  %nop5838 = alloca i1, i1 0
-  %nop5839 = alloca i1, i1 0
-  %nop5840 = alloca i1, i1 0
-  %nop5841 = alloca i1, i1 0
-  %nop5842 = alloca i1, i1 0
-  %nop5843 = alloca i1, i1 0
-  %nop5844 = alloca i1, i1 0
-  %nop5845 = alloca i1, i1 0
-  %nop5846 = alloca i1, i1 0
-  %nop5847 = alloca i1, i1 0
-  %nop5848 = alloca i1, i1 0
-  %nop5849 = alloca i1, i1 0
-  %nop5850 = alloca i1, i1 0
-  %nop5851 = alloca i1, i1 0
-  %nop5852 = alloca i1, i1 0
-  %nop5853 = alloca i1, i1 0
-  %nop5854 = alloca i1, i1 0
-  %nop5855 = alloca i1, i1 0
-  %nop5856 = alloca i1, i1 0
-  %nop5857 = alloca i1, i1 0
-  %nop5858 = alloca i1, i1 0
-  %nop5859 = alloca i1, i1 0
-  %nop5860 = alloca i1, i1 0
-  %nop5861 = alloca i1, i1 0
-  %nop5862 = alloca i1, i1 0
-  %nop5863 = alloca i1, i1 0
-  %nop5864 = alloca i1, i1 0
-  %nop5865 = alloca i1, i1 0
-  %nop5866 = alloca i1, i1 0
-  %nop5867 = alloca i1, i1 0
-  %nop5868 = alloca i1, i1 0
-  %nop5869 = alloca i1, i1 0
-  %nop5870 = alloca i1, i1 0
-  %nop5871 = alloca i1, i1 0
-  %nop5872 = alloca i1, i1 0
-  %nop5873 = alloca i1, i1 0
-  %nop5874 = alloca i1, i1 0
-  %nop5875 = alloca i1, i1 0
-  %nop5876 = alloca i1, i1 0
-  %nop5877 = alloca i1, i1 0
-  %nop5878 = alloca i1, i1 0
-  %nop5879 = alloca i1, i1 0
-  %nop5880 = alloca i1, i1 0
-  %nop5881 = alloca i1, i1 0
-  %nop5882 = alloca i1, i1 0
-  %nop5883 = alloca i1, i1 0
-  %nop5884 = alloca i1, i1 0
-  %nop5885 = alloca i1, i1 0
-  %nop5886 = alloca i1, i1 0
-  %nop5887 = alloca i1, i1 0
-  %nop5888 = alloca i1, i1 0
-  %nop5889 = alloca i1, i1 0
-  %nop5890 = alloca i1, i1 0
-  %nop5891 = alloca i1, i1 0
-  %nop5892 = alloca i1, i1 0
-  %nop5893 = alloca i1, i1 0
-  %nop5894 = alloca i1, i1 0
-  %nop5895 = alloca i1, i1 0
-  %nop5896 = alloca i1, i1 0
-  %nop5897 = alloca i1, i1 0
-  %nop5898 = alloca i1, i1 0
-  %nop5899 = alloca i1, i1 0
-  %nop5900 = alloca i1, i1 0
-  %nop5901 = alloca i1, i1 0
-  %nop5902 = alloca i1, i1 0
-  %nop5903 = alloca i1, i1 0
-  %nop5904 = alloca i1, i1 0
-  %nop5905 = alloca i1, i1 0
-  %nop5906 = alloca i1, i1 0
-  %nop5907 = alloca i1, i1 0
-  %nop5908 = alloca i1, i1 0
-  %nop5909 = alloca i1, i1 0
-  %nop5910 = alloca i1, i1 0
-  %nop5911 = alloca i1, i1 0
-  %nop5912 = alloca i1, i1 0
-  %nop5913 = alloca i1, i1 0
-  %nop5914 = alloca i1, i1 0
-  %nop5915 = alloca i1, i1 0
-  %nop5916 = alloca i1, i1 0
-  %nop5917 = alloca i1, i1 0
-  %nop5918 = alloca i1, i1 0
-  %nop5919 = alloca i1, i1 0
-  %nop5920 = alloca i1, i1 0
-  %nop5921 = alloca i1, i1 0
-  %nop5922 = alloca i1, i1 0
-  %nop5923 = alloca i1, i1 0
-  %nop5924 = alloca i1, i1 0
-  %nop5925 = alloca i1, i1 0
-  %nop5926 = alloca i1, i1 0
-  %nop5927 = alloca i1, i1 0
-  %nop5928 = alloca i1, i1 0
-  %nop5929 = alloca i1, i1 0
-  %nop5930 = alloca i1, i1 0
-  %nop5931 = alloca i1, i1 0
-  %nop5932 = alloca i1, i1 0
-  %nop5933 = alloca i1, i1 0
-  %nop5934 = alloca i1, i1 0
-  %nop5935 = alloca i1, i1 0
-  %nop5936 = alloca i1, i1 0
-  %nop5937 = alloca i1, i1 0
-  %nop5938 = alloca i1, i1 0
-  %nop5939 = alloca i1, i1 0
-  %nop5940 = alloca i1, i1 0
-  %nop5941 = alloca i1, i1 0
-  %nop5942 = alloca i1, i1 0
-  %nop5943 = alloca i1, i1 0
-  %nop5944 = alloca i1, i1 0
-  %nop5945 = alloca i1, i1 0
-  %nop5946 = alloca i1, i1 0
-  %nop5947 = alloca i1, i1 0
-  %nop5948 = alloca i1, i1 0
-  %nop5949 = alloca i1, i1 0
-  %nop5950 = alloca i1, i1 0
-  %nop5951 = alloca i1, i1 0
-  %nop5952 = alloca i1, i1 0
-  %nop5953 = alloca i1, i1 0
-  %nop5954 = alloca i1, i1 0
-  %nop5955 = alloca i1, i1 0
-  %nop5956 = alloca i1, i1 0
-  %nop5957 = alloca i1, i1 0
-  %nop5958 = alloca i1, i1 0
-  %nop5959 = alloca i1, i1 0
-  %nop5960 = alloca i1, i1 0
-  %nop5961 = alloca i1, i1 0
-  %nop5962 = alloca i1, i1 0
-  %nop5963 = alloca i1, i1 0
-  %nop5964 = alloca i1, i1 0
-  %nop5965 = alloca i1, i1 0
-  %nop5966 = alloca i1, i1 0
-  %nop5967 = alloca i1, i1 0
-  %nop5968 = alloca i1, i1 0
-  %nop5969 = alloca i1, i1 0
-  %nop5970 = alloca i1, i1 0
-  %nop5971 = alloca i1, i1 0
-  %nop5972 = alloca i1, i1 0
-  %nop5973 = alloca i1, i1 0
-  %nop5974 = alloca i1, i1 0
-  %nop5975 = alloca i1, i1 0
-  %nop5976 = alloca i1, i1 0
-  %nop5977 = alloca i1, i1 0
-  %nop5978 = alloca i1, i1 0
-  %nop5979 = alloca i1, i1 0
-  %nop5980 = alloca i1, i1 0
-  %nop5981 = alloca i1, i1 0
-  %nop5982 = alloca i1, i1 0
-  %nop5983 = alloca i1, i1 0
-  %nop5984 = alloca i1, i1 0
-  %nop5985 = alloca i1, i1 0
-  %nop5986 = alloca i1, i1 0
-  %nop5987 = alloca i1, i1 0
-  %nop5988 = alloca i1, i1 0
-  %nop5989 = alloca i1, i1 0
-  %nop5990 = alloca i1, i1 0
-  %nop5991 = alloca i1, i1 0
-  %nop5992 = alloca i1, i1 0
-  %nop5993 = alloca i1, i1 0
-  %nop5994 = alloca i1, i1 0
-  %nop5995 = alloca i1, i1 0
-  %nop5996 = alloca i1, i1 0
-  %nop5997 = alloca i1, i1 0
-  %nop5998 = alloca i1, i1 0
-  %nop5999 = alloca i1, i1 0
-  %nop6000 = alloca i1, i1 0
-  %nop6001 = alloca i1, i1 0
-  %nop6002 = alloca i1, i1 0
-  %nop6003 = alloca i1, i1 0
-  %nop6004 = alloca i1, i1 0
-  %nop6005 = alloca i1, i1 0
-  %nop6006 = alloca i1, i1 0
-  %nop6007 = alloca i1, i1 0
-  %nop6008 = alloca i1, i1 0
-  %nop6009 = alloca i1, i1 0
-  %nop6010 = alloca i1, i1 0
-  %nop6011 = alloca i1, i1 0
-  %nop6012 = alloca i1, i1 0
-  %nop6013 = alloca i1, i1 0
-  %nop6014 = alloca i1, i1 0
-  %nop6015 = alloca i1, i1 0
-  %nop6016 = alloca i1, i1 0
-  %nop6017 = alloca i1, i1 0
-  %nop6018 = alloca i1, i1 0
-  %nop6019 = alloca i1, i1 0
-  %nop6020 = alloca i1, i1 0
-  %nop6021 = alloca i1, i1 0
-  %nop6022 = alloca i1, i1 0
-  %nop6023 = alloca i1, i1 0
-  %nop6024 = alloca i1, i1 0
-  %nop6025 = alloca i1, i1 0
-  %nop6026 = alloca i1, i1 0
-  %nop6027 = alloca i1, i1 0
-  %nop6028 = alloca i1, i1 0
-  %nop6029 = alloca i1, i1 0
-  %nop6030 = alloca i1, i1 0
-  %nop6031 = alloca i1, i1 0
-  %nop6032 = alloca i1, i1 0
-  %nop6033 = alloca i1, i1 0
-  %nop6034 = alloca i1, i1 0
-  %nop6035 = alloca i1, i1 0
-  %nop6036 = alloca i1, i1 0
-  %nop6037 = alloca i1, i1 0
-  %nop6038 = alloca i1, i1 0
-  %nop6039 = alloca i1, i1 0
-  %nop6040 = alloca i1, i1 0
-  %nop6041 = alloca i1, i1 0
-  %nop6042 = alloca i1, i1 0
-  %nop6043 = alloca i1, i1 0
-  %nop6044 = alloca i1, i1 0
-  %nop6045 = alloca i1, i1 0
-  %nop6046 = alloca i1, i1 0
-  %nop6047 = alloca i1, i1 0
-  %nop6048 = alloca i1, i1 0
-  %nop6049 = alloca i1, i1 0
-  %nop6050 = alloca i1, i1 0
-  %nop6051 = alloca i1, i1 0
-  %nop6052 = alloca i1, i1 0
-  %nop6053 = alloca i1, i1 0
-  %nop6054 = alloca i1, i1 0
-  %nop6055 = alloca i1, i1 0
-  %nop6056 = alloca i1, i1 0
-  %nop6057 = alloca i1, i1 0
-  %nop6058 = alloca i1, i1 0
-  %nop6059 = alloca i1, i1 0
-  %nop6060 = alloca i1, i1 0
-  %nop6061 = alloca i1, i1 0
-  %nop6062 = alloca i1, i1 0
-  %nop6063 = alloca i1, i1 0
-  %nop6064 = alloca i1, i1 0
-  %nop6065 = alloca i1, i1 0
-  %nop6066 = alloca i1, i1 0
-  %nop6067 = alloca i1, i1 0
-  %nop6068 = alloca i1, i1 0
-  %nop6069 = alloca i1, i1 0
-  %nop6070 = alloca i1, i1 0
-  %nop6071 = alloca i1, i1 0
-  %nop6072 = alloca i1, i1 0
-  %nop6073 = alloca i1, i1 0
-  %nop6074 = alloca i1, i1 0
-  %nop6075 = alloca i1, i1 0
-  %nop6076 = alloca i1, i1 0
-  %nop6077 = alloca i1, i1 0
-  %nop6078 = alloca i1, i1 0
-  %nop6079 = alloca i1, i1 0
-  %nop6080 = alloca i1, i1 0
-  %nop6081 = alloca i1, i1 0
-  %nop6082 = alloca i1, i1 0
-  %nop6083 = alloca i1, i1 0
-  %nop6084 = alloca i1, i1 0
-  %nop6085 = alloca i1, i1 0
-  %nop6086 = alloca i1, i1 0
-  %nop6087 = alloca i1, i1 0
-  %nop6088 = alloca i1, i1 0
-  %nop6089 = alloca i1, i1 0
-  %nop6090 = alloca i1, i1 0
-  %nop6091 = alloca i1, i1 0
-  %nop6092 = alloca i1, i1 0
-  %nop6093 = alloca i1, i1 0
-  %nop6094 = alloca i1, i1 0
-  %nop6095 = alloca i1, i1 0
-  %nop6096 = alloca i1, i1 0
-  %nop6097 = alloca i1, i1 0
-  %nop6098 = alloca i1, i1 0
-  %nop6099 = alloca i1, i1 0
-  %nop6100 = alloca i1, i1 0
-  %nop6101 = alloca i1, i1 0
-  %nop6102 = alloca i1, i1 0
-  %nop6103 = alloca i1, i1 0
-  %nop6104 = alloca i1, i1 0
-  %nop6105 = alloca i1, i1 0
-  %nop6106 = alloca i1, i1 0
-  %nop6107 = alloca i1, i1 0
-  %nop6108 = alloca i1, i1 0
-  %nop6109 = alloca i1, i1 0
-  %nop6110 = alloca i1, i1 0
-  %nop6111 = alloca i1, i1 0
-  %nop6112 = alloca i1, i1 0
-  %nop6113 = alloca i1, i1 0
-  %nop6114 = alloca i1, i1 0
-  %nop6115 = alloca i1, i1 0
-  %nop6116 = alloca i1, i1 0
-  %nop6117 = alloca i1, i1 0
-  %nop6118 = alloca i1, i1 0
-  %nop6119 = alloca i1, i1 0
-  %nop6120 = alloca i1, i1 0
-  %nop6121 = alloca i1, i1 0
-  %nop6122 = alloca i1, i1 0
-  %nop6123 = alloca i1, i1 0
-  %nop6124 = alloca i1, i1 0
-  %nop6125 = alloca i1, i1 0
-  %nop6126 = alloca i1, i1 0
-  %nop6127 = alloca i1, i1 0
-  %nop6128 = alloca i1, i1 0
-  %nop6129 = alloca i1, i1 0
-  %nop6130 = alloca i1, i1 0
-  %nop6131 = alloca i1, i1 0
-  %nop6132 = alloca i1, i1 0
-  %nop6133 = alloca i1, i1 0
-  %nop6134 = alloca i1, i1 0
-  %nop6135 = alloca i1, i1 0
-  %nop6136 = alloca i1, i1 0
-  %nop6137 = alloca i1, i1 0
-  %nop6138 = alloca i1, i1 0
-  %nop6139 = alloca i1, i1 0
-  %nop6140 = alloca i1, i1 0
-  %nop6141 = alloca i1, i1 0
-  %nop6142 = alloca i1, i1 0
-  %nop6143 = alloca i1, i1 0
-  %nop6144 = alloca i1, i1 0
-  %nop6145 = alloca i1, i1 0
-  %nop6146 = alloca i1, i1 0
-  %nop6147 = alloca i1, i1 0
-  %nop6148 = alloca i1, i1 0
-  %nop6149 = alloca i1, i1 0
-  %nop6150 = alloca i1, i1 0
-  %nop6151 = alloca i1, i1 0
-  %nop6152 = alloca i1, i1 0
-  %nop6153 = alloca i1, i1 0
-  %nop6154 = alloca i1, i1 0
-  %nop6155 = alloca i1, i1 0
-  %nop6156 = alloca i1, i1 0
-  %nop6157 = alloca i1, i1 0
-  %nop6158 = alloca i1, i1 0
-  %nop6159 = alloca i1, i1 0
-  %nop6160 = alloca i1, i1 0
-  %nop6161 = alloca i1, i1 0
-  %nop6162 = alloca i1, i1 0
-  %nop6163 = alloca i1, i1 0
-  %nop6164 = alloca i1, i1 0
-  %nop6165 = alloca i1, i1 0
-  %nop6166 = alloca i1, i1 0
-  %nop6167 = alloca i1, i1 0
-  %nop6168 = alloca i1, i1 0
-  %nop6169 = alloca i1, i1 0
-  %nop6170 = alloca i1, i1 0
-  %nop6171 = alloca i1, i1 0
-  %nop6172 = alloca i1, i1 0
-  %nop6173 = alloca i1, i1 0
-  %nop6174 = alloca i1, i1 0
-  %nop6175 = alloca i1, i1 0
-  %nop6176 = alloca i1, i1 0
-  %nop6177 = alloca i1, i1 0
-  %nop6178 = alloca i1, i1 0
-  %nop6179 = alloca i1, i1 0
-  %nop6180 = alloca i1, i1 0
-  %nop6181 = alloca i1, i1 0
-  %nop6182 = alloca i1, i1 0
-  %nop6183 = alloca i1, i1 0
-  %nop6184 = alloca i1, i1 0
-  %nop6185 = alloca i1, i1 0
-  %nop6186 = alloca i1, i1 0
-  %nop6187 = alloca i1, i1 0
-  %nop6188 = alloca i1, i1 0
-  %nop6189 = alloca i1, i1 0
-  %nop6190 = alloca i1, i1 0
-  %nop6191 = alloca i1, i1 0
-  %nop6192 = alloca i1, i1 0
-  %nop6193 = alloca i1, i1 0
-  %nop6194 = alloca i1, i1 0
-  %nop6195 = alloca i1, i1 0
-  %nop6196 = alloca i1, i1 0
-  %nop6197 = alloca i1, i1 0
-  %nop6198 = alloca i1, i1 0
-  %nop6199 = alloca i1, i1 0
-  %nop6200 = alloca i1, i1 0
-  %nop6201 = alloca i1, i1 0
-  %nop6202 = alloca i1, i1 0
-  %nop6203 = alloca i1, i1 0
-  %nop6204 = alloca i1, i1 0
-  %nop6205 = alloca i1, i1 0
-  %nop6206 = alloca i1, i1 0
-  %nop6207 = alloca i1, i1 0
-  %nop6208 = alloca i1, i1 0
-  %nop6209 = alloca i1, i1 0
-  %nop6210 = alloca i1, i1 0
-  %nop6211 = alloca i1, i1 0
-  %nop6212 = alloca i1, i1 0
-  %nop6213 = alloca i1, i1 0
-  %nop6214 = alloca i1, i1 0
-  %nop6215 = alloca i1, i1 0
-  %nop6216 = alloca i1, i1 0
-  %nop6217 = alloca i1, i1 0
-  %nop6218 = alloca i1, i1 0
-  %nop6219 = alloca i1, i1 0
-  %nop6220 = alloca i1, i1 0
-  %nop6221 = alloca i1, i1 0
-  %nop6222 = alloca i1, i1 0
-  %nop6223 = alloca i1, i1 0
-  %nop6224 = alloca i1, i1 0
-  %nop6225 = alloca i1, i1 0
-  %nop6226 = alloca i1, i1 0
-  %nop6227 = alloca i1, i1 0
-  %nop6228 = alloca i1, i1 0
-  %nop6229 = alloca i1, i1 0
-  %nop6230 = alloca i1, i1 0
-  %nop6231 = alloca i1, i1 0
-  %nop6232 = alloca i1, i1 0
-  %nop6233 = alloca i1, i1 0
-  %nop6234 = alloca i1, i1 0
-  %nop6235 = alloca i1, i1 0
-  %nop6236 = alloca i1, i1 0
-  %nop6237 = alloca i1, i1 0
-  %nop6238 = alloca i1, i1 0
-  %nop6239 = alloca i1, i1 0
-  %nop6240 = alloca i1, i1 0
-  %nop6241 = alloca i1, i1 0
-  %nop6242 = alloca i1, i1 0
-  %nop6243 = alloca i1, i1 0
-  %nop6244 = alloca i1, i1 0
-  %nop6245 = alloca i1, i1 0
-  %nop6246 = alloca i1, i1 0
-  %nop6247 = alloca i1, i1 0
-  %nop6248 = alloca i1, i1 0
-  %nop6249 = alloca i1, i1 0
-  %nop6250 = alloca i1, i1 0
-  %nop6251 = alloca i1, i1 0
-  %nop6252 = alloca i1, i1 0
-  %nop6253 = alloca i1, i1 0
-  %nop6254 = alloca i1, i1 0
-  %nop6255 = alloca i1, i1 0
-  %nop6256 = alloca i1, i1 0
-  %nop6257 = alloca i1, i1 0
-  %nop6258 = alloca i1, i1 0
-  %nop6259 = alloca i1, i1 0
-  %nop6260 = alloca i1, i1 0
-  %nop6261 = alloca i1, i1 0
-  %nop6262 = alloca i1, i1 0
-  %nop6263 = alloca i1, i1 0
-  %nop6264 = alloca i1, i1 0
-  %nop6265 = alloca i1, i1 0
-  %nop6266 = alloca i1, i1 0
-  %nop6267 = alloca i1, i1 0
-  %nop6268 = alloca i1, i1 0
-  %nop6269 = alloca i1, i1 0
-  %nop6270 = alloca i1, i1 0
-  %nop6271 = alloca i1, i1 0
-  %nop6272 = alloca i1, i1 0
-  %nop6273 = alloca i1, i1 0
-  %nop6274 = alloca i1, i1 0
-  %nop6275 = alloca i1, i1 0
-  %nop6276 = alloca i1, i1 0
-  %nop6277 = alloca i1, i1 0
-  %nop6278 = alloca i1, i1 0
-  %nop6279 = alloca i1, i1 0
-  %nop6280 = alloca i1, i1 0
-  %nop6281 = alloca i1, i1 0
-  %nop6282 = alloca i1, i1 0
-  %nop6283 = alloca i1, i1 0
-  %nop6284 = alloca i1, i1 0
-  %nop6285 = alloca i1, i1 0
-  %nop6286 = alloca i1, i1 0
-  %nop6287 = alloca i1, i1 0
-  %nop6288 = alloca i1, i1 0
-  %nop6289 = alloca i1, i1 0
-  %nop6290 = alloca i1, i1 0
-  %nop6291 = alloca i1, i1 0
-  %nop6292 = alloca i1, i1 0
-  %nop6293 = alloca i1, i1 0
-  %nop6294 = alloca i1, i1 0
-  %nop6295 = alloca i1, i1 0
-  %nop6296 = alloca i1, i1 0
-  %nop6297 = alloca i1, i1 0
-  %nop6298 = alloca i1, i1 0
-  %nop6299 = alloca i1, i1 0
-  %nop6300 = alloca i1, i1 0
-  %nop6301 = alloca i1, i1 0
-  %nop6302 = alloca i1, i1 0
-  %nop6303 = alloca i1, i1 0
-  %nop6304 = alloca i1, i1 0
-  %nop6305 = alloca i1, i1 0
-  %nop6306 = alloca i1, i1 0
-  %nop6307 = alloca i1, i1 0
-  %nop6308 = alloca i1, i1 0
-  %nop6309 = alloca i1, i1 0
-  %nop6310 = alloca i1, i1 0
-  %nop6311 = alloca i1, i1 0
-  %nop6312 = alloca i1, i1 0
-  %nop6313 = alloca i1, i1 0
-  %nop6314 = alloca i1, i1 0
-  %nop6315 = alloca i1, i1 0
-  %nop6316 = alloca i1, i1 0
-  %nop6317 = alloca i1, i1 0
-  %nop6318 = alloca i1, i1 0
-  %nop6319 = alloca i1, i1 0
-  %nop6320 = alloca i1, i1 0
-  %nop6321 = alloca i1, i1 0
-  %nop6322 = alloca i1, i1 0
-  %nop6323 = alloca i1, i1 0
-  %nop6324 = alloca i1, i1 0
-  %nop6325 = alloca i1, i1 0
-  %nop6326 = alloca i1, i1 0
-  %nop6327 = alloca i1, i1 0
-  %nop6328 = alloca i1, i1 0
-  %nop6329 = alloca i1, i1 0
-  %nop6330 = alloca i1, i1 0
-  %nop6331 = alloca i1, i1 0
-  %nop6332 = alloca i1, i1 0
-  %nop6333 = alloca i1, i1 0
-  %nop6334 = alloca i1, i1 0
-  %nop6335 = alloca i1, i1 0
-  %nop6336 = alloca i1, i1 0
-  %nop6337 = alloca i1, i1 0
-  %nop6338 = alloca i1, i1 0
-  %nop6339 = alloca i1, i1 0
-  %nop6340 = alloca i1, i1 0
-  %nop6341 = alloca i1, i1 0
-  %nop6342 = alloca i1, i1 0
-  %nop6343 = alloca i1, i1 0
-  %nop6344 = alloca i1, i1 0
-  %nop6345 = alloca i1, i1 0
-  %nop6346 = alloca i1, i1 0
-  %nop6347 = alloca i1, i1 0
-  %nop6348 = alloca i1, i1 0
-  %nop6349 = alloca i1, i1 0
-  %nop6350 = alloca i1, i1 0
-  %nop6351 = alloca i1, i1 0
-  %nop6352 = alloca i1, i1 0
-  %nop6353 = alloca i1, i1 0
-  %nop6354 = alloca i1, i1 0
-  %nop6355 = alloca i1, i1 0
-  %nop6356 = alloca i1, i1 0
-  %nop6357 = alloca i1, i1 0
-  %nop6358 = alloca i1, i1 0
-  %nop6359 = alloca i1, i1 0
-  %nop6360 = alloca i1, i1 0
-  %nop6361 = alloca i1, i1 0
-  %nop6362 = alloca i1, i1 0
-  %nop6363 = alloca i1, i1 0
-  %nop6364 = alloca i1, i1 0
-  %nop6365 = alloca i1, i1 0
-  %nop6366 = alloca i1, i1 0
-  %nop6367 = alloca i1, i1 0
-  %nop6368 = alloca i1, i1 0
-  %nop6369 = alloca i1, i1 0
-  %nop6370 = alloca i1, i1 0
-  %nop6371 = alloca i1, i1 0
-  %nop6372 = alloca i1, i1 0
-  %nop6373 = alloca i1, i1 0
-  %nop6374 = alloca i1, i1 0
-  %nop6375 = alloca i1, i1 0
-  %nop6376 = alloca i1, i1 0
-  %nop6377 = alloca i1, i1 0
-  %nop6378 = alloca i1, i1 0
-  %nop6379 = alloca i1, i1 0
-  %nop6380 = alloca i1, i1 0
-  %nop6381 = alloca i1, i1 0
-  %nop6382 = alloca i1, i1 0
-  %nop6383 = alloca i1, i1 0
-  %nop6384 = alloca i1, i1 0
-  %nop6385 = alloca i1, i1 0
-  %nop6386 = alloca i1, i1 0
-  %nop6387 = alloca i1, i1 0
-  %nop6388 = alloca i1, i1 0
-  %nop6389 = alloca i1, i1 0
-  %nop6390 = alloca i1, i1 0
-  %nop6391 = alloca i1, i1 0
-  %nop6392 = alloca i1, i1 0
-  %nop6393 = alloca i1, i1 0
-  %nop6394 = alloca i1, i1 0
-  %nop6395 = alloca i1, i1 0
-  %nop6396 = alloca i1, i1 0
-  %nop6397 = alloca i1, i1 0
-  %nop6398 = alloca i1, i1 0
-  %nop6399 = alloca i1, i1 0
-  %nop6400 = alloca i1, i1 0
-  %nop6401 = alloca i1, i1 0
-  %nop6402 = alloca i1, i1 0
-  %nop6403 = alloca i1, i1 0
-  %nop6404 = alloca i1, i1 0
-  %nop6405 = alloca i1, i1 0
-  %nop6406 = alloca i1, i1 0
-  %nop6407 = alloca i1, i1 0
-  %nop6408 = alloca i1, i1 0
-  %nop6409 = alloca i1, i1 0
-  %nop6410 = alloca i1, i1 0
-  %nop6411 = alloca i1, i1 0
-  %nop6412 = alloca i1, i1 0
-  %nop6413 = alloca i1, i1 0
-  %nop6414 = alloca i1, i1 0
-  %nop6415 = alloca i1, i1 0
-  %nop6416 = alloca i1, i1 0
-  %nop6417 = alloca i1, i1 0
-  %nop6418 = alloca i1, i1 0
-  %nop6419 = alloca i1, i1 0
-  %nop6420 = alloca i1, i1 0
-  %nop6421 = alloca i1, i1 0
-  %nop6422 = alloca i1, i1 0
-  %nop6423 = alloca i1, i1 0
-  %nop6424 = alloca i1, i1 0
-  %nop6425 = alloca i1, i1 0
-  %nop6426 = alloca i1, i1 0
-  %nop6427 = alloca i1, i1 0
-  %nop6428 = alloca i1, i1 0
-  %nop6429 = alloca i1, i1 0
-  %nop6430 = alloca i1, i1 0
-  %nop6431 = alloca i1, i1 0
-  %nop6432 = alloca i1, i1 0
-  %nop6433 = alloca i1, i1 0
-  %nop6434 = alloca i1, i1 0
-  %nop6435 = alloca i1, i1 0
-  %nop6436 = alloca i1, i1 0
-  %nop6437 = alloca i1, i1 0
-  %nop6438 = alloca i1, i1 0
-  %nop6439 = alloca i1, i1 0
-  %nop6440 = alloca i1, i1 0
-  %nop6441 = alloca i1, i1 0
-  %nop6442 = alloca i1, i1 0
-  %nop6443 = alloca i1, i1 0
-  %nop6444 = alloca i1, i1 0
-  %nop6445 = alloca i1, i1 0
-  %nop6446 = alloca i1, i1 0
-  %nop6447 = alloca i1, i1 0
-  %nop6448 = alloca i1, i1 0
-  %nop6449 = alloca i1, i1 0
-  %nop6450 = alloca i1, i1 0
-  %nop6451 = alloca i1, i1 0
-  %nop6452 = alloca i1, i1 0
-  %nop6453 = alloca i1, i1 0
-  %nop6454 = alloca i1, i1 0
-  %nop6455 = alloca i1, i1 0
-  %nop6456 = alloca i1, i1 0
-  %nop6457 = alloca i1, i1 0
-  %nop6458 = alloca i1, i1 0
-  %nop6459 = alloca i1, i1 0
-  %nop6460 = alloca i1, i1 0
-  %nop6461 = alloca i1, i1 0
-  %nop6462 = alloca i1, i1 0
-  %nop6463 = alloca i1, i1 0
-  %nop6464 = alloca i1, i1 0
-  %nop6465 = alloca i1, i1 0
-  %nop6466 = alloca i1, i1 0
-  %nop6467 = alloca i1, i1 0
-  %nop6468 = alloca i1, i1 0
-  %nop6469 = alloca i1, i1 0
-  %nop6470 = alloca i1, i1 0
-  %nop6471 = alloca i1, i1 0
-  %nop6472 = alloca i1, i1 0
-  %nop6473 = alloca i1, i1 0
-  %nop6474 = alloca i1, i1 0
-  %nop6475 = alloca i1, i1 0
-  %nop6476 = alloca i1, i1 0
-  %nop6477 = alloca i1, i1 0
-  %nop6478 = alloca i1, i1 0
-  %nop6479 = alloca i1, i1 0
-  %nop6480 = alloca i1, i1 0
-  %nop6481 = alloca i1, i1 0
-  %nop6482 = alloca i1, i1 0
-  %nop6483 = alloca i1, i1 0
-  %nop6484 = alloca i1, i1 0
-  %nop6485 = alloca i1, i1 0
-  %nop6486 = alloca i1, i1 0
-  %nop6487 = alloca i1, i1 0
-  %nop6488 = alloca i1, i1 0
-  %nop6489 = alloca i1, i1 0
-  %nop6490 = alloca i1, i1 0
-  %nop6491 = alloca i1, i1 0
-  %nop6492 = alloca i1, i1 0
-  %nop6493 = alloca i1, i1 0
-  %nop6494 = alloca i1, i1 0
-  %nop6495 = alloca i1, i1 0
-  %nop6496 = alloca i1, i1 0
-  %nop6497 = alloca i1, i1 0
-  %nop6498 = alloca i1, i1 0
-  %nop6499 = alloca i1, i1 0
-  %nop6500 = alloca i1, i1 0
-  %nop6501 = alloca i1, i1 0
-  %nop6502 = alloca i1, i1 0
-  %nop6503 = alloca i1, i1 0
-  %nop6504 = alloca i1, i1 0
-  %nop6505 = alloca i1, i1 0
-  %nop6506 = alloca i1, i1 0
-  %nop6507 = alloca i1, i1 0
-  %nop6508 = alloca i1, i1 0
-  %nop6509 = alloca i1, i1 0
-  %nop6510 = alloca i1, i1 0
-  %nop6511 = alloca i1, i1 0
-  %nop6512 = alloca i1, i1 0
-  %nop6513 = alloca i1, i1 0
-  %nop6514 = alloca i1, i1 0
-  %nop6515 = alloca i1, i1 0
-  %nop6516 = alloca i1, i1 0
-  %nop6517 = alloca i1, i1 0
-  %nop6518 = alloca i1, i1 0
-  %nop6519 = alloca i1, i1 0
-  %nop6520 = alloca i1, i1 0
-  %nop6521 = alloca i1, i1 0
-  %nop6522 = alloca i1, i1 0
-  %nop6523 = alloca i1, i1 0
-  %nop6524 = alloca i1, i1 0
-  %nop6525 = alloca i1, i1 0
-  %nop6526 = alloca i1, i1 0
-  %nop6527 = alloca i1, i1 0
-  %nop6528 = alloca i1, i1 0
-  %nop6529 = alloca i1, i1 0
-  %nop6530 = alloca i1, i1 0
-  %nop6531 = alloca i1, i1 0
-  %nop6532 = alloca i1, i1 0
-  %nop6533 = alloca i1, i1 0
-  %nop6534 = alloca i1, i1 0
-  %nop6535 = alloca i1, i1 0
-  %nop6536 = alloca i1, i1 0
-  %nop6537 = alloca i1, i1 0
-  %nop6538 = alloca i1, i1 0
-  %nop6539 = alloca i1, i1 0
-  %nop6540 = alloca i1, i1 0
-  %nop6541 = alloca i1, i1 0
-  %nop6542 = alloca i1, i1 0
-  %nop6543 = alloca i1, i1 0
-  %nop6544 = alloca i1, i1 0
-  %nop6545 = alloca i1, i1 0
-  %nop6546 = alloca i1, i1 0
-  %nop6547 = alloca i1, i1 0
-  %nop6548 = alloca i1, i1 0
-  %nop6549 = alloca i1, i1 0
-  %nop6550 = alloca i1, i1 0
-  %nop6551 = alloca i1, i1 0
-  %nop6552 = alloca i1, i1 0
-  %nop6553 = alloca i1, i1 0
-  %nop6554 = alloca i1, i1 0
-  %nop6555 = alloca i1, i1 0
-  %nop6556 = alloca i1, i1 0
-  %nop6557 = alloca i1, i1 0
-  %nop6558 = alloca i1, i1 0
-  %nop6559 = alloca i1, i1 0
-  %nop6560 = alloca i1, i1 0
-  %nop6561 = alloca i1, i1 0
-  %nop6562 = alloca i1, i1 0
-  %nop6563 = alloca i1, i1 0
-  %nop6564 = alloca i1, i1 0
-  %nop6565 = alloca i1, i1 0
-  %nop6566 = alloca i1, i1 0
-  %nop6567 = alloca i1, i1 0
-  %nop6568 = alloca i1, i1 0
-  %nop6569 = alloca i1, i1 0
-  %nop6570 = alloca i1, i1 0
-  %nop6571 = alloca i1, i1 0
-  %nop6572 = alloca i1, i1 0
-  %nop6573 = alloca i1, i1 0
-  %nop6574 = alloca i1, i1 0
-  %nop6575 = alloca i1, i1 0
-  %nop6576 = alloca i1, i1 0
-  %nop6577 = alloca i1, i1 0
-  %nop6578 = alloca i1, i1 0
-  %nop6579 = alloca i1, i1 0
-  %nop6580 = alloca i1, i1 0
-  %nop6581 = alloca i1, i1 0
-  %nop6582 = alloca i1, i1 0
-  %nop6583 = alloca i1, i1 0
-  %nop6584 = alloca i1, i1 0
-  %nop6585 = alloca i1, i1 0
-  %nop6586 = alloca i1, i1 0
-  %nop6587 = alloca i1, i1 0
-  %nop6588 = alloca i1, i1 0
-  %nop6589 = alloca i1, i1 0
-  %nop6590 = alloca i1, i1 0
-  %nop6591 = alloca i1, i1 0
-  %nop6592 = alloca i1, i1 0
-  %nop6593 = alloca i1, i1 0
-  %nop6594 = alloca i1, i1 0
-  %nop6595 = alloca i1, i1 0
-  %nop6596 = alloca i1, i1 0
-  %nop6597 = alloca i1, i1 0
-  %nop6598 = alloca i1, i1 0
-  %nop6599 = alloca i1, i1 0
-  %nop6600 = alloca i1, i1 0
-  %nop6601 = alloca i1, i1 0
-  %nop6602 = alloca i1, i1 0
-  %nop6603 = alloca i1, i1 0
-  %nop6604 = alloca i1, i1 0
-  %nop6605 = alloca i1, i1 0
-  %nop6606 = alloca i1, i1 0
-  %nop6607 = alloca i1, i1 0
-  %nop6608 = alloca i1, i1 0
-  %nop6609 = alloca i1, i1 0
-  %nop6610 = alloca i1, i1 0
-  %nop6611 = alloca i1, i1 0
-  %nop6612 = alloca i1, i1 0
-  %nop6613 = alloca i1, i1 0
-  %nop6614 = alloca i1, i1 0
-  %nop6615 = alloca i1, i1 0
-  %nop6616 = alloca i1, i1 0
-  %nop6617 = alloca i1, i1 0
-  %nop6618 = alloca i1, i1 0
-  %nop6619 = alloca i1, i1 0
-  %nop6620 = alloca i1, i1 0
-  %nop6621 = alloca i1, i1 0
-  %nop6622 = alloca i1, i1 0
-  %nop6623 = alloca i1, i1 0
-  %nop6624 = alloca i1, i1 0
-  %nop6625 = alloca i1, i1 0
-  %nop6626 = alloca i1, i1 0
-  %nop6627 = alloca i1, i1 0
-  %nop6628 = alloca i1, i1 0
-  %nop6629 = alloca i1, i1 0
-  %nop6630 = alloca i1, i1 0
-  %nop6631 = alloca i1, i1 0
-  %nop6632 = alloca i1, i1 0
-  %nop6633 = alloca i1, i1 0
-  %nop6634 = alloca i1, i1 0
-  %nop6635 = alloca i1, i1 0
-  %nop6636 = alloca i1, i1 0
-  %nop6637 = alloca i1, i1 0
-  %nop6638 = alloca i1, i1 0
-  %nop6639 = alloca i1, i1 0
-  %nop6640 = alloca i1, i1 0
-  %nop6641 = alloca i1, i1 0
-  %nop6642 = alloca i1, i1 0
-  %nop6643 = alloca i1, i1 0
-  %nop6644 = alloca i1, i1 0
-  %nop6645 = alloca i1, i1 0
-  %nop6646 = alloca i1, i1 0
-  %nop6647 = alloca i1, i1 0
-  %nop6648 = alloca i1, i1 0
-  %nop6649 = alloca i1, i1 0
-  %nop6650 = alloca i1, i1 0
-  %nop6651 = alloca i1, i1 0
-  %nop6652 = alloca i1, i1 0
-  %nop6653 = alloca i1, i1 0
-  %nop6654 = alloca i1, i1 0
-  %nop6655 = alloca i1, i1 0
-  %nop6656 = alloca i1, i1 0
-  %nop6657 = alloca i1, i1 0
-  %nop6658 = alloca i1, i1 0
-  %nop6659 = alloca i1, i1 0
-  %nop6660 = alloca i1, i1 0
-  %nop6661 = alloca i1, i1 0
-  %nop6662 = alloca i1, i1 0
-  %nop6663 = alloca i1, i1 0
-  %nop6664 = alloca i1, i1 0
-  %nop6665 = alloca i1, i1 0
-  %nop6666 = alloca i1, i1 0
-  %nop6667 = alloca i1, i1 0
-  %nop6668 = alloca i1, i1 0
-  %nop6669 = alloca i1, i1 0
-  %nop6670 = alloca i1, i1 0
-  %nop6671 = alloca i1, i1 0
-  %nop6672 = alloca i1, i1 0
-  %nop6673 = alloca i1, i1 0
-  %nop6674 = alloca i1, i1 0
-  %nop6675 = alloca i1, i1 0
-  %nop6676 = alloca i1, i1 0
-  %nop6677 = alloca i1, i1 0
-  %nop6678 = alloca i1, i1 0
-  %nop6679 = alloca i1, i1 0
-  %nop6680 = alloca i1, i1 0
-  %nop6681 = alloca i1, i1 0
-  %nop6682 = alloca i1, i1 0
-  %nop6683 = alloca i1, i1 0
-  %nop6684 = alloca i1, i1 0
-  %nop6685 = alloca i1, i1 0
-  %nop6686 = alloca i1, i1 0
-  %nop6687 = alloca i1, i1 0
-  %nop6688 = alloca i1, i1 0
-  %nop6689 = alloca i1, i1 0
-  %nop6690 = alloca i1, i1 0
-  %nop6691 = alloca i1, i1 0
-  %nop6692 = alloca i1, i1 0
-  %nop6693 = alloca i1, i1 0
-  %nop6694 = alloca i1, i1 0
-  %nop6695 = alloca i1, i1 0
-  %nop6696 = alloca i1, i1 0
-  %nop6697 = alloca i1, i1 0
-  %nop6698 = alloca i1, i1 0
-  %nop6699 = alloca i1, i1 0
-  %nop6700 = alloca i1, i1 0
-  %nop6701 = alloca i1, i1 0
-  %nop6702 = alloca i1, i1 0
-  %nop6703 = alloca i1, i1 0
-  %nop6704 = alloca i1, i1 0
-  %nop6705 = alloca i1, i1 0
-  %nop6706 = alloca i1, i1 0
-  %nop6707 = alloca i1, i1 0
-  %nop6708 = alloca i1, i1 0
-  %nop6709 = alloca i1, i1 0
-  %nop6710 = alloca i1, i1 0
-  %nop6711 = alloca i1, i1 0
-  %nop6712 = alloca i1, i1 0
-  %nop6713 = alloca i1, i1 0
-  %nop6714 = alloca i1, i1 0
-  %nop6715 = alloca i1, i1 0
-  %nop6716 = alloca i1, i1 0
-  %nop6717 = alloca i1, i1 0
-  %nop6718 = alloca i1, i1 0
-  %nop6719 = alloca i1, i1 0
-  %nop6720 = alloca i1, i1 0
-  %nop6721 = alloca i1, i1 0
-  %nop6722 = alloca i1, i1 0
-  %nop6723 = alloca i1, i1 0
-  %nop6724 = alloca i1, i1 0
-  %nop6725 = alloca i1, i1 0
-  %nop6726 = alloca i1, i1 0
-  %nop6727 = alloca i1, i1 0
-  %nop6728 = alloca i1, i1 0
-  %nop6729 = alloca i1, i1 0
-  %nop6730 = alloca i1, i1 0
-  %nop6731 = alloca i1, i1 0
-  %nop6732 = alloca i1, i1 0
-  %nop6733 = alloca i1, i1 0
-  %nop6734 = alloca i1, i1 0
-  %nop6735 = alloca i1, i1 0
-  %nop6736 = alloca i1, i1 0
-  %nop6737 = alloca i1, i1 0
-  %nop6738 = alloca i1, i1 0
-  %nop6739 = alloca i1, i1 0
-  %nop6740 = alloca i1, i1 0
-  %nop6741 = alloca i1, i1 0
-  %nop6742 = alloca i1, i1 0
-  %nop6743 = alloca i1, i1 0
-  %nop6744 = alloca i1, i1 0
-  %nop6745 = alloca i1, i1 0
-  %nop6746 = alloca i1, i1 0
-  %nop6747 = alloca i1, i1 0
-  %nop6748 = alloca i1, i1 0
-  %nop6749 = alloca i1, i1 0
-  %nop6750 = alloca i1, i1 0
-  %nop6751 = alloca i1, i1 0
-  %nop6752 = alloca i1, i1 0
-  %nop6753 = alloca i1, i1 0
-  %nop6754 = alloca i1, i1 0
-  %nop6755 = alloca i1, i1 0
-  %nop6756 = alloca i1, i1 0
-  %nop6757 = alloca i1, i1 0
-  %nop6758 = alloca i1, i1 0
-  %nop6759 = alloca i1, i1 0
-  %nop6760 = alloca i1, i1 0
-  %nop6761 = alloca i1, i1 0
-  %nop6762 = alloca i1, i1 0
-  %nop6763 = alloca i1, i1 0
-  %nop6764 = alloca i1, i1 0
-  %nop6765 = alloca i1, i1 0
-  %nop6766 = alloca i1, i1 0
-  %nop6767 = alloca i1, i1 0
-  %nop6768 = alloca i1, i1 0
-  %nop6769 = alloca i1, i1 0
-  %nop6770 = alloca i1, i1 0
-  %nop6771 = alloca i1, i1 0
-  %nop6772 = alloca i1, i1 0
-  %nop6773 = alloca i1, i1 0
-  %nop6774 = alloca i1, i1 0
-  %nop6775 = alloca i1, i1 0
-  %nop6776 = alloca i1, i1 0
-  %nop6777 = alloca i1, i1 0
-  %nop6778 = alloca i1, i1 0
-  %nop6779 = alloca i1, i1 0
-  %nop6780 = alloca i1, i1 0
-  %nop6781 = alloca i1, i1 0
-  %nop6782 = alloca i1, i1 0
-  %nop6783 = alloca i1, i1 0
-  %nop6784 = alloca i1, i1 0
-  %nop6785 = alloca i1, i1 0
-  %nop6786 = alloca i1, i1 0
-  %nop6787 = alloca i1, i1 0
-  %nop6788 = alloca i1, i1 0
-  %nop6789 = alloca i1, i1 0
-  %nop6790 = alloca i1, i1 0
-  %nop6791 = alloca i1, i1 0
-  %nop6792 = alloca i1, i1 0
-  %nop6793 = alloca i1, i1 0
-  %nop6794 = alloca i1, i1 0
-  %nop6795 = alloca i1, i1 0
-  %nop6796 = alloca i1, i1 0
-  %nop6797 = alloca i1, i1 0
-  %nop6798 = alloca i1, i1 0
-  %nop6799 = alloca i1, i1 0
-  %nop6800 = alloca i1, i1 0
-  %nop6801 = alloca i1, i1 0
-  %nop6802 = alloca i1, i1 0
-  %nop6803 = alloca i1, i1 0
-  %nop6804 = alloca i1, i1 0
-  %nop6805 = alloca i1, i1 0
-  %nop6806 = alloca i1, i1 0
-  %nop6807 = alloca i1, i1 0
-  %nop6808 = alloca i1, i1 0
-  %nop6809 = alloca i1, i1 0
-  %nop6810 = alloca i1, i1 0
-  %nop6811 = alloca i1, i1 0
-  %nop6812 = alloca i1, i1 0
-  %nop6813 = alloca i1, i1 0
-  %nop6814 = alloca i1, i1 0
-  %nop6815 = alloca i1, i1 0
-  %nop6816 = alloca i1, i1 0
-  %nop6817 = alloca i1, i1 0
-  %nop6818 = alloca i1, i1 0
-  %nop6819 = alloca i1, i1 0
-  %nop6820 = alloca i1, i1 0
-  %nop6821 = alloca i1, i1 0
-  %nop6822 = alloca i1, i1 0
-  %nop6823 = alloca i1, i1 0
-  %nop6824 = alloca i1, i1 0
-  %nop6825 = alloca i1, i1 0
-  %nop6826 = alloca i1, i1 0
-  %nop6827 = alloca i1, i1 0
-  %nop6828 = alloca i1, i1 0
-  %nop6829 = alloca i1, i1 0
-  %nop6830 = alloca i1, i1 0
-  %nop6831 = alloca i1, i1 0
-  %nop6832 = alloca i1, i1 0
-  %nop6833 = alloca i1, i1 0
-  %nop6834 = alloca i1, i1 0
-  %nop6835 = alloca i1, i1 0
-  %nop6836 = alloca i1, i1 0
-  %nop6837 = alloca i1, i1 0
-  %nop6838 = alloca i1, i1 0
-  %nop6839 = alloca i1, i1 0
-  %nop6840 = alloca i1, i1 0
-  %nop6841 = alloca i1, i1 0
-  %nop6842 = alloca i1, i1 0
-  %nop6843 = alloca i1, i1 0
-  %nop6844 = alloca i1, i1 0
-  %nop6845 = alloca i1, i1 0
-  %nop6846 = alloca i1, i1 0
-  %nop6847 = alloca i1, i1 0
-  %nop6848 = alloca i1, i1 0
-  %nop6849 = alloca i1, i1 0
-  %nop6850 = alloca i1, i1 0
-  %nop6851 = alloca i1, i1 0
-  %nop6852 = alloca i1, i1 0
-  %nop6853 = alloca i1, i1 0
-  %nop6854 = alloca i1, i1 0
-  %nop6855 = alloca i1, i1 0
-  %nop6856 = alloca i1, i1 0
-  %nop6857 = alloca i1, i1 0
-  %nop6858 = alloca i1, i1 0
-  %nop6859 = alloca i1, i1 0
-  %nop6860 = alloca i1, i1 0
-  %nop6861 = alloca i1, i1 0
-  %nop6862 = alloca i1, i1 0
-  %nop6863 = alloca i1, i1 0
-  %nop6864 = alloca i1, i1 0
-  %nop6865 = alloca i1, i1 0
-  %nop6866 = alloca i1, i1 0
-  %nop6867 = alloca i1, i1 0
-  %nop6868 = alloca i1, i1 0
-  %nop6869 = alloca i1, i1 0
-  %nop6870 = alloca i1, i1 0
-  %nop6871 = alloca i1, i1 0
-  %nop6872 = alloca i1, i1 0
-  %nop6873 = alloca i1, i1 0
-  %nop6874 = alloca i1, i1 0
-  %nop6875 = alloca i1, i1 0
-  %nop6876 = alloca i1, i1 0
-  %nop6877 = alloca i1, i1 0
-  %nop6878 = alloca i1, i1 0
-  %nop6879 = alloca i1, i1 0
-  %nop6880 = alloca i1, i1 0
-  %nop6881 = alloca i1, i1 0
-  %nop6882 = alloca i1, i1 0
-  %nop6883 = alloca i1, i1 0
-  %nop6884 = alloca i1, i1 0
-  %nop6885 = alloca i1, i1 0
-  %nop6886 = alloca i1, i1 0
-  %nop6887 = alloca i1, i1 0
-  %nop6888 = alloca i1, i1 0
-  %nop6889 = alloca i1, i1 0
-  %nop6890 = alloca i1, i1 0
-  %nop6891 = alloca i1, i1 0
-  %nop6892 = alloca i1, i1 0
-  %nop6893 = alloca i1, i1 0
-  %nop6894 = alloca i1, i1 0
-  %nop6895 = alloca i1, i1 0
-  %nop6896 = alloca i1, i1 0
-  %nop6897 = alloca i1, i1 0
-  %nop6898 = alloca i1, i1 0
-  %nop6899 = alloca i1, i1 0
-  %nop6900 = alloca i1, i1 0
-  %nop6901 = alloca i1, i1 0
-  %nop6902 = alloca i1, i1 0
-  %nop6903 = alloca i1, i1 0
-  %nop6904 = alloca i1, i1 0
-  %nop6905 = alloca i1, i1 0
-  %nop6906 = alloca i1, i1 0
-  %nop6907 = alloca i1, i1 0
-  %nop6908 = alloca i1, i1 0
-  %nop6909 = alloca i1, i1 0
-  %nop6910 = alloca i1, i1 0
-  %nop6911 = alloca i1, i1 0
-  %nop6912 = alloca i1, i1 0
-  %nop6913 = alloca i1, i1 0
-  %nop6914 = alloca i1, i1 0
-  %nop6915 = alloca i1, i1 0
-  %nop6916 = alloca i1, i1 0
-  %nop6917 = alloca i1, i1 0
-  %nop6918 = alloca i1, i1 0
-  %nop6919 = alloca i1, i1 0
-  %nop6920 = alloca i1, i1 0
-  %nop6921 = alloca i1, i1 0
-  %nop6922 = alloca i1, i1 0
-  %nop6923 = alloca i1, i1 0
-  %nop6924 = alloca i1, i1 0
-  %nop6925 = alloca i1, i1 0
-  %nop6926 = alloca i1, i1 0
-  %nop6927 = alloca i1, i1 0
-  %nop6928 = alloca i1, i1 0
-  %nop6929 = alloca i1, i1 0
-  %nop6930 = alloca i1, i1 0
-  %nop6931 = alloca i1, i1 0
-  %nop6932 = alloca i1, i1 0
-  %nop6933 = alloca i1, i1 0
-  %nop6934 = alloca i1, i1 0
-  %nop6935 = alloca i1, i1 0
-  %nop6936 = alloca i1, i1 0
-  %nop6937 = alloca i1, i1 0
-  %nop6938 = alloca i1, i1 0
-  %nop6939 = alloca i1, i1 0
-  %nop6940 = alloca i1, i1 0
-  %nop6941 = alloca i1, i1 0
-  %nop6942 = alloca i1, i1 0
-  %nop6943 = alloca i1, i1 0
-  %nop6944 = alloca i1, i1 0
-  %nop6945 = alloca i1, i1 0
-  %nop6946 = alloca i1, i1 0
-  %nop6947 = alloca i1, i1 0
-  %nop6948 = alloca i1, i1 0
-  %nop6949 = alloca i1, i1 0
-  %nop6950 = alloca i1, i1 0
-  %nop6951 = alloca i1, i1 0
-  %nop6952 = alloca i1, i1 0
-  %nop6953 = alloca i1, i1 0
-  %nop6954 = alloca i1, i1 0
-  %nop6955 = alloca i1, i1 0
-  %nop6956 = alloca i1, i1 0
-  %nop6957 = alloca i1, i1 0
-  %nop6958 = alloca i1, i1 0
-  %nop6959 = alloca i1, i1 0
-  %nop6960 = alloca i1, i1 0
-  %nop6961 = alloca i1, i1 0
-  %nop6962 = alloca i1, i1 0
-  %nop6963 = alloca i1, i1 0
-  %nop6964 = alloca i1, i1 0
-  %nop6965 = alloca i1, i1 0
-  %nop6966 = alloca i1, i1 0
-  %nop6967 = alloca i1, i1 0
-  %nop6968 = alloca i1, i1 0
-  %nop6969 = alloca i1, i1 0
-  %nop6970 = alloca i1, i1 0
-  %nop6971 = alloca i1, i1 0
-  %nop6972 = alloca i1, i1 0
-  %nop6973 = alloca i1, i1 0
-  %nop6974 = alloca i1, i1 0
-  %nop6975 = alloca i1, i1 0
-  %nop6976 = alloca i1, i1 0
-  %nop6977 = alloca i1, i1 0
-  %nop6978 = alloca i1, i1 0
-  %nop6979 = alloca i1, i1 0
-  %nop6980 = alloca i1, i1 0
-  %nop6981 = alloca i1, i1 0
-  %nop6982 = alloca i1, i1 0
-  %nop6983 = alloca i1, i1 0
-  %nop6984 = alloca i1, i1 0
-  %nop6985 = alloca i1, i1 0
-  %nop6986 = alloca i1, i1 0
-  %nop6987 = alloca i1, i1 0
-  %nop6988 = alloca i1, i1 0
-  %nop6989 = alloca i1, i1 0
-  %nop6990 = alloca i1, i1 0
-  %nop6991 = alloca i1, i1 0
-  %nop6992 = alloca i1, i1 0
-  %nop6993 = alloca i1, i1 0
-  %nop6994 = alloca i1, i1 0
-  %nop6995 = alloca i1, i1 0
-  %nop6996 = alloca i1, i1 0
-  %nop6997 = alloca i1, i1 0
-  %nop6998 = alloca i1, i1 0
-  %nop6999 = alloca i1, i1 0
-  %nop7000 = alloca i1, i1 0
-  %nop7001 = alloca i1, i1 0
-  %nop7002 = alloca i1, i1 0
-  %nop7003 = alloca i1, i1 0
-  %nop7004 = alloca i1, i1 0
-  %nop7005 = alloca i1, i1 0
-  %nop7006 = alloca i1, i1 0
-  %nop7007 = alloca i1, i1 0
-  %nop7008 = alloca i1, i1 0
-  %nop7009 = alloca i1, i1 0
-  %nop7010 = alloca i1, i1 0
-  %nop7011 = alloca i1, i1 0
-  %nop7012 = alloca i1, i1 0
-  %nop7013 = alloca i1, i1 0
-  %nop7014 = alloca i1, i1 0
-  %nop7015 = alloca i1, i1 0
-  %nop7016 = alloca i1, i1 0
-  %nop7017 = alloca i1, i1 0
-  %nop7018 = alloca i1, i1 0
-  %nop7019 = alloca i1, i1 0
-  %nop7020 = alloca i1, i1 0
-  %nop7021 = alloca i1, i1 0
-  %nop7022 = alloca i1, i1 0
-  %nop7023 = alloca i1, i1 0
-  %nop7024 = alloca i1, i1 0
-  %nop7025 = alloca i1, i1 0
-  %nop7026 = alloca i1, i1 0
-  %nop7027 = alloca i1, i1 0
-  %nop7028 = alloca i1, i1 0
-  %nop7029 = alloca i1, i1 0
-  %nop7030 = alloca i1, i1 0
-  %nop7031 = alloca i1, i1 0
-  %nop7032 = alloca i1, i1 0
-  %nop7033 = alloca i1, i1 0
-  %nop7034 = alloca i1, i1 0
-  %nop7035 = alloca i1, i1 0
-  %nop7036 = alloca i1, i1 0
-  %nop7037 = alloca i1, i1 0
-  %nop7038 = alloca i1, i1 0
-  %nop7039 = alloca i1, i1 0
-  %nop7040 = alloca i1, i1 0
-  %nop7041 = alloca i1, i1 0
-  %nop7042 = alloca i1, i1 0
-  %nop7043 = alloca i1, i1 0
-  %nop7044 = alloca i1, i1 0
-  %nop7045 = alloca i1, i1 0
-  %nop7046 = alloca i1, i1 0
-  %nop7047 = alloca i1, i1 0
-  %nop7048 = alloca i1, i1 0
-  %nop7049 = alloca i1, i1 0
-  %nop7050 = alloca i1, i1 0
-  %nop7051 = alloca i1, i1 0
-  %nop7052 = alloca i1, i1 0
-  %nop7053 = alloca i1, i1 0
-  %nop7054 = alloca i1, i1 0
-  %nop7055 = alloca i1, i1 0
-  %nop7056 = alloca i1, i1 0
-  %nop7057 = alloca i1, i1 0
-  %nop7058 = alloca i1, i1 0
-  %nop7059 = alloca i1, i1 0
-  %nop7060 = alloca i1, i1 0
-  %nop7061 = alloca i1, i1 0
-  %nop7062 = alloca i1, i1 0
-  %nop7063 = alloca i1, i1 0
-  %nop7064 = alloca i1, i1 0
-  %nop7065 = alloca i1, i1 0
-  %nop7066 = alloca i1, i1 0
-  %nop7067 = alloca i1, i1 0
-  %nop7068 = alloca i1, i1 0
-  %nop7069 = alloca i1, i1 0
-  %nop7070 = alloca i1, i1 0
-  %nop7071 = alloca i1, i1 0
-  %nop7072 = alloca i1, i1 0
-  %nop7073 = alloca i1, i1 0
-  %nop7074 = alloca i1, i1 0
-  %nop7075 = alloca i1, i1 0
-  %nop7076 = alloca i1, i1 0
-  %nop7077 = alloca i1, i1 0
-  %nop7078 = alloca i1, i1 0
-  %nop7079 = alloca i1, i1 0
-  %nop7080 = alloca i1, i1 0
-  %nop7081 = alloca i1, i1 0
-  %nop7082 = alloca i1, i1 0
-  %nop7083 = alloca i1, i1 0
-  %nop7084 = alloca i1, i1 0
-  %nop7085 = alloca i1, i1 0
-  %nop7086 = alloca i1, i1 0
-  %nop7087 = alloca i1, i1 0
-  %nop7088 = alloca i1, i1 0
-  %nop7089 = alloca i1, i1 0
-  %nop7090 = alloca i1, i1 0
-  %nop7091 = alloca i1, i1 0
-  %nop7092 = alloca i1, i1 0
-  %nop7093 = alloca i1, i1 0
-  %nop7094 = alloca i1, i1 0
-  %nop7095 = alloca i1, i1 0
-  %nop7096 = alloca i1, i1 0
-  %nop7097 = alloca i1, i1 0
-  %nop7098 = alloca i1, i1 0
-  %nop7099 = alloca i1, i1 0
-  %nop7100 = alloca i1, i1 0
-  %nop7101 = alloca i1, i1 0
-  %nop7102 = alloca i1, i1 0
-  %nop7103 = alloca i1, i1 0
-  %nop7104 = alloca i1, i1 0
-  %nop7105 = alloca i1, i1 0
-  %nop7106 = alloca i1, i1 0
-  %nop7107 = alloca i1, i1 0
-  %nop7108 = alloca i1, i1 0
-  %nop7109 = alloca i1, i1 0
-  %nop7110 = alloca i1, i1 0
-  %nop7111 = alloca i1, i1 0
-  %nop7112 = alloca i1, i1 0
-  %nop7113 = alloca i1, i1 0
-  %nop7114 = alloca i1, i1 0
-  %nop7115 = alloca i1, i1 0
-  %nop7116 = alloca i1, i1 0
-  %nop7117 = alloca i1, i1 0
-  %nop7118 = alloca i1, i1 0
-  %nop7119 = alloca i1, i1 0
-  %nop7120 = alloca i1, i1 0
-  %nop7121 = alloca i1, i1 0
-  %nop7122 = alloca i1, i1 0
-  %nop7123 = alloca i1, i1 0
-  %nop7124 = alloca i1, i1 0
-  %nop7125 = alloca i1, i1 0
-  %nop7126 = alloca i1, i1 0
-  %nop7127 = alloca i1, i1 0
-  %nop7128 = alloca i1, i1 0
-  %nop7129 = alloca i1, i1 0
-  %nop7130 = alloca i1, i1 0
-  %nop7131 = alloca i1, i1 0
-  %nop7132 = alloca i1, i1 0
-  %nop7133 = alloca i1, i1 0
-  %nop7134 = alloca i1, i1 0
-  %nop7135 = alloca i1, i1 0
-  %nop7136 = alloca i1, i1 0
-  %nop7137 = alloca i1, i1 0
-  %nop7138 = alloca i1, i1 0
-  %nop7139 = alloca i1, i1 0
-  %nop7140 = alloca i1, i1 0
-  %nop7141 = alloca i1, i1 0
-  %nop7142 = alloca i1, i1 0
-  %nop7143 = alloca i1, i1 0
-  %nop7144 = alloca i1, i1 0
-  %nop7145 = alloca i1, i1 0
-  %nop7146 = alloca i1, i1 0
-  %nop7147 = alloca i1, i1 0
-  %nop7148 = alloca i1, i1 0
-  %nop7149 = alloca i1, i1 0
-  %nop7150 = alloca i1, i1 0
-  %nop7151 = alloca i1, i1 0
-  %nop7152 = alloca i1, i1 0
-  %nop7153 = alloca i1, i1 0
-  %nop7154 = alloca i1, i1 0
-  %nop7155 = alloca i1, i1 0
-  %nop7156 = alloca i1, i1 0
-  %nop7157 = alloca i1, i1 0
-  %nop7158 = alloca i1, i1 0
-  %nop7159 = alloca i1, i1 0
-  %nop7160 = alloca i1, i1 0
-  %nop7161 = alloca i1, i1 0
-  %nop7162 = alloca i1, i1 0
-  %nop7163 = alloca i1, i1 0
-  %nop7164 = alloca i1, i1 0
-  %nop7165 = alloca i1, i1 0
-  %nop7166 = alloca i1, i1 0
-  %nop7167 = alloca i1, i1 0
-  %nop7168 = alloca i1, i1 0
-  %nop7169 = alloca i1, i1 0
-  %nop7170 = alloca i1, i1 0
-  %nop7171 = alloca i1, i1 0
-  %nop7172 = alloca i1, i1 0
-  %nop7173 = alloca i1, i1 0
-  %nop7174 = alloca i1, i1 0
-  %nop7175 = alloca i1, i1 0
-  %nop7176 = alloca i1, i1 0
-  %nop7177 = alloca i1, i1 0
-  %nop7178 = alloca i1, i1 0
-  %nop7179 = alloca i1, i1 0
-  %nop7180 = alloca i1, i1 0
-  %nop7181 = alloca i1, i1 0
-  %nop7182 = alloca i1, i1 0
-  %nop7183 = alloca i1, i1 0
-  %nop7184 = alloca i1, i1 0
-  %nop7185 = alloca i1, i1 0
-  %nop7186 = alloca i1, i1 0
-  %nop7187 = alloca i1, i1 0
-  %nop7188 = alloca i1, i1 0
-  %nop7189 = alloca i1, i1 0
-  %nop7190 = alloca i1, i1 0
-  %nop7191 = alloca i1, i1 0
-  %nop7192 = alloca i1, i1 0
-  %nop7193 = alloca i1, i1 0
-  %nop7194 = alloca i1, i1 0
-  %nop7195 = alloca i1, i1 0
-  %nop7196 = alloca i1, i1 0
-  %nop7197 = alloca i1, i1 0
-  %nop7198 = alloca i1, i1 0
-  %nop7199 = alloca i1, i1 0
-  %nop7200 = alloca i1, i1 0
-  %nop7201 = alloca i1, i1 0
-  %nop7202 = alloca i1, i1 0
-  %nop7203 = alloca i1, i1 0
-  %nop7204 = alloca i1, i1 0
-  %nop7205 = alloca i1, i1 0
-  %nop7206 = alloca i1, i1 0
-  %nop7207 = alloca i1, i1 0
-  %nop7208 = alloca i1, i1 0
-  %nop7209 = alloca i1, i1 0
-  %nop7210 = alloca i1, i1 0
-  %nop7211 = alloca i1, i1 0
-  %nop7212 = alloca i1, i1 0
-  %nop7213 = alloca i1, i1 0
-  %nop7214 = alloca i1, i1 0
-  %nop7215 = alloca i1, i1 0
-  %nop7216 = alloca i1, i1 0
-  %nop7217 = alloca i1, i1 0
-  %nop7218 = alloca i1, i1 0
-  %nop7219 = alloca i1, i1 0
-  %nop7220 = alloca i1, i1 0
-  %nop7221 = alloca i1, i1 0
-  %nop7222 = alloca i1, i1 0
-  %nop7223 = alloca i1, i1 0
-  %nop7224 = alloca i1, i1 0
-  %nop7225 = alloca i1, i1 0
-  %nop7226 = alloca i1, i1 0
-  %nop7227 = alloca i1, i1 0
-  %nop7228 = alloca i1, i1 0
-  %nop7229 = alloca i1, i1 0
-  %nop7230 = alloca i1, i1 0
-  %nop7231 = alloca i1, i1 0
-  %nop7232 = alloca i1, i1 0
-  %nop7233 = alloca i1, i1 0
-  %nop7234 = alloca i1, i1 0
-  %nop7235 = alloca i1, i1 0
-  %nop7236 = alloca i1, i1 0
-  %nop7237 = alloca i1, i1 0
-  %nop7238 = alloca i1, i1 0
-  %nop7239 = alloca i1, i1 0
-  %nop7240 = alloca i1, i1 0
-  %nop7241 = alloca i1, i1 0
-  %nop7242 = alloca i1, i1 0
-  %nop7243 = alloca i1, i1 0
-  %nop7244 = alloca i1, i1 0
-  %nop7245 = alloca i1, i1 0
-  %nop7246 = alloca i1, i1 0
-  %nop7247 = alloca i1, i1 0
-  %nop7248 = alloca i1, i1 0
-  %nop7249 = alloca i1, i1 0
-  %nop7250 = alloca i1, i1 0
-  %nop7251 = alloca i1, i1 0
-  %nop7252 = alloca i1, i1 0
-  %nop7253 = alloca i1, i1 0
-  %nop7254 = alloca i1, i1 0
-  %nop7255 = alloca i1, i1 0
-  %nop7256 = alloca i1, i1 0
-  %nop7257 = alloca i1, i1 0
-  %nop7258 = alloca i1, i1 0
-  %nop7259 = alloca i1, i1 0
-  %nop7260 = alloca i1, i1 0
-  %nop7261 = alloca i1, i1 0
-  %nop7262 = alloca i1, i1 0
-  %nop7263 = alloca i1, i1 0
-  %nop7264 = alloca i1, i1 0
-  %nop7265 = alloca i1, i1 0
-  %nop7266 = alloca i1, i1 0
-  %nop7267 = alloca i1, i1 0
-  %nop7268 = alloca i1, i1 0
-  %nop7269 = alloca i1, i1 0
-  %nop7270 = alloca i1, i1 0
-  %nop7271 = alloca i1, i1 0
-  %nop7272 = alloca i1, i1 0
-  %nop7273 = alloca i1, i1 0
-  %nop7274 = alloca i1, i1 0
-  %nop7275 = alloca i1, i1 0
-  %nop7276 = alloca i1, i1 0
-  %nop7277 = alloca i1, i1 0
-  %nop7278 = alloca i1, i1 0
-  %nop7279 = alloca i1, i1 0
-  %nop7280 = alloca i1, i1 0
-  %nop7281 = alloca i1, i1 0
-  %nop7282 = alloca i1, i1 0
-  %nop7283 = alloca i1, i1 0
-  %nop7284 = alloca i1, i1 0
-  %nop7285 = alloca i1, i1 0
-  %nop7286 = alloca i1, i1 0
-  %nop7287 = alloca i1, i1 0
-  %nop7288 = alloca i1, i1 0
-  %nop7289 = alloca i1, i1 0
-  %nop7290 = alloca i1, i1 0
-  %nop7291 = alloca i1, i1 0
-  %nop7292 = alloca i1, i1 0
-  %nop7293 = alloca i1, i1 0
-  %nop7294 = alloca i1, i1 0
-  %nop7295 = alloca i1, i1 0
-  %nop7296 = alloca i1, i1 0
-  %nop7297 = alloca i1, i1 0
-  %nop7298 = alloca i1, i1 0
-  %nop7299 = alloca i1, i1 0
-  %nop7300 = alloca i1, i1 0
-  %nop7301 = alloca i1, i1 0
-  %nop7302 = alloca i1, i1 0
-  %nop7303 = alloca i1, i1 0
-  %nop7304 = alloca i1, i1 0
-  %nop7305 = alloca i1, i1 0
-  %nop7306 = alloca i1, i1 0
-  %nop7307 = alloca i1, i1 0
-  %nop7308 = alloca i1, i1 0
-  %nop7309 = alloca i1, i1 0
-  %nop7310 = alloca i1, i1 0
-  %nop7311 = alloca i1, i1 0
-  %nop7312 = alloca i1, i1 0
-  %nop7313 = alloca i1, i1 0
-  %nop7314 = alloca i1, i1 0
-  %nop7315 = alloca i1, i1 0
-  %nop7316 = alloca i1, i1 0
-  %nop7317 = alloca i1, i1 0
-  %nop7318 = alloca i1, i1 0
-  %nop7319 = alloca i1, i1 0
-  %nop7320 = alloca i1, i1 0
-  %nop7321 = alloca i1, i1 0
-  %nop7322 = alloca i1, i1 0
-  %nop7323 = alloca i1, i1 0
-  %nop7324 = alloca i1, i1 0
-  %nop7325 = alloca i1, i1 0
-  %nop7326 = alloca i1, i1 0
-  %nop7327 = alloca i1, i1 0
-  %nop7328 = alloca i1, i1 0
-  %nop7329 = alloca i1, i1 0
-  %nop7330 = alloca i1, i1 0
-  %nop7331 = alloca i1, i1 0
-  %nop7332 = alloca i1, i1 0
-  %nop7333 = alloca i1, i1 0
-  %nop7334 = alloca i1, i1 0
-  %nop7335 = alloca i1, i1 0
-  %nop7336 = alloca i1, i1 0
-  %nop7337 = alloca i1, i1 0
-  %nop7338 = alloca i1, i1 0
-  %nop7339 = alloca i1, i1 0
-  %nop7340 = alloca i1, i1 0
-  %nop7341 = alloca i1, i1 0
-  %nop7342 = alloca i1, i1 0
-  %nop7343 = alloca i1, i1 0
-  %nop7344 = alloca i1, i1 0
-  %nop7345 = alloca i1, i1 0
-  %nop7346 = alloca i1, i1 0
-  %nop7347 = alloca i1, i1 0
-  %nop7348 = alloca i1, i1 0
-  %nop7349 = alloca i1, i1 0
-  %nop7350 = alloca i1, i1 0
-  %nop7351 = alloca i1, i1 0
-  %nop7352 = alloca i1, i1 0
-  %nop7353 = alloca i1, i1 0
-  %nop7354 = alloca i1, i1 0
-  %nop7355 = alloca i1, i1 0
-  %nop7356 = alloca i1, i1 0
-  %nop7357 = alloca i1, i1 0
-  %nop7358 = alloca i1, i1 0
-  %nop7359 = alloca i1, i1 0
-  %nop7360 = alloca i1, i1 0
-  %nop7361 = alloca i1, i1 0
-  %nop7362 = alloca i1, i1 0
-  %nop7363 = alloca i1, i1 0
-  %nop7364 = alloca i1, i1 0
-  %nop7365 = alloca i1, i1 0
-  %nop7366 = alloca i1, i1 0
-  %nop7367 = alloca i1, i1 0
-  %nop7368 = alloca i1, i1 0
-  %nop7369 = alloca i1, i1 0
-  %nop7370 = alloca i1, i1 0
-  %nop7371 = alloca i1, i1 0
-  %nop7372 = alloca i1, i1 0
-  %nop7373 = alloca i1, i1 0
-  %nop7374 = alloca i1, i1 0
-  %nop7375 = alloca i1, i1 0
-  %nop7376 = alloca i1, i1 0
-  %nop7377 = alloca i1, i1 0
-  %nop7378 = alloca i1, i1 0
-  %nop7379 = alloca i1, i1 0
-  %nop7380 = alloca i1, i1 0
-  %nop7381 = alloca i1, i1 0
-  %nop7382 = alloca i1, i1 0
-  %nop7383 = alloca i1, i1 0
-  %nop7384 = alloca i1, i1 0
-  %nop7385 = alloca i1, i1 0
-  %nop7386 = alloca i1, i1 0
-  %nop7387 = alloca i1, i1 0
-  %nop7388 = alloca i1, i1 0
-  %nop7389 = alloca i1, i1 0
-  %nop7390 = alloca i1, i1 0
-  %nop7391 = alloca i1, i1 0
-  %nop7392 = alloca i1, i1 0
-  %nop7393 = alloca i1, i1 0
-  %nop7394 = alloca i1, i1 0
-  %nop7395 = alloca i1, i1 0
-  %nop7396 = alloca i1, i1 0
-  %nop7397 = alloca i1, i1 0
-  %nop7398 = alloca i1, i1 0
-  %nop7399 = alloca i1, i1 0
-  %nop7400 = alloca i1, i1 0
-  %nop7401 = alloca i1, i1 0
-  %nop7402 = alloca i1, i1 0
-  %nop7403 = alloca i1, i1 0
-  %nop7404 = alloca i1, i1 0
-  %nop7405 = alloca i1, i1 0
-  %nop7406 = alloca i1, i1 0
-  %nop7407 = alloca i1, i1 0
-  %nop7408 = alloca i1, i1 0
-  %nop7409 = alloca i1, i1 0
-  %nop7410 = alloca i1, i1 0
-  %nop7411 = alloca i1, i1 0
-  %nop7412 = alloca i1, i1 0
-  %nop7413 = alloca i1, i1 0
-  %nop7414 = alloca i1, i1 0
-  %nop7415 = alloca i1, i1 0
-  %nop7416 = alloca i1, i1 0
-  %nop7417 = alloca i1, i1 0
-  %nop7418 = alloca i1, i1 0
-  %nop7419 = alloca i1, i1 0
-  %nop7420 = alloca i1, i1 0
-  %nop7421 = alloca i1, i1 0
-  %nop7422 = alloca i1, i1 0
-  %nop7423 = alloca i1, i1 0
-  %nop7424 = alloca i1, i1 0
-  %nop7425 = alloca i1, i1 0
-  %nop7426 = alloca i1, i1 0
-  %nop7427 = alloca i1, i1 0
-  %nop7428 = alloca i1, i1 0
-  %nop7429 = alloca i1, i1 0
-  %nop7430 = alloca i1, i1 0
-  %nop7431 = alloca i1, i1 0
-  %nop7432 = alloca i1, i1 0
-  %nop7433 = alloca i1, i1 0
-  %nop7434 = alloca i1, i1 0
-  %nop7435 = alloca i1, i1 0
-  %nop7436 = alloca i1, i1 0
-  %nop7437 = alloca i1, i1 0
-  %nop7438 = alloca i1, i1 0
-  %nop7439 = alloca i1, i1 0
-  %nop7440 = alloca i1, i1 0
-  %nop7441 = alloca i1, i1 0
-  %nop7442 = alloca i1, i1 0
-  %nop7443 = alloca i1, i1 0
-  %nop7444 = alloca i1, i1 0
-  %nop7445 = alloca i1, i1 0
-  %nop7446 = alloca i1, i1 0
-  %nop7447 = alloca i1, i1 0
-  %nop7448 = alloca i1, i1 0
-  %nop7449 = alloca i1, i1 0
-  %nop7450 = alloca i1, i1 0
-  %nop7451 = alloca i1, i1 0
-  %nop7452 = alloca i1, i1 0
-  %nop7453 = alloca i1, i1 0
-  %nop7454 = alloca i1, i1 0
-  %nop7455 = alloca i1, i1 0
-  %nop7456 = alloca i1, i1 0
-  %nop7457 = alloca i1, i1 0
-  %nop7458 = alloca i1, i1 0
-  %nop7459 = alloca i1, i1 0
-  %nop7460 = alloca i1, i1 0
-  %nop7461 = alloca i1, i1 0
-  %nop7462 = alloca i1, i1 0
-  %nop7463 = alloca i1, i1 0
-  %nop7464 = alloca i1, i1 0
-  %nop7465 = alloca i1, i1 0
-  %nop7466 = alloca i1, i1 0
-  %nop7467 = alloca i1, i1 0
-  %nop7468 = alloca i1, i1 0
-  %nop7469 = alloca i1, i1 0
-  %nop7470 = alloca i1, i1 0
-  %nop7471 = alloca i1, i1 0
-  %nop7472 = alloca i1, i1 0
-  %nop7473 = alloca i1, i1 0
-  %nop7474 = alloca i1, i1 0
-  %nop7475 = alloca i1, i1 0
-  %nop7476 = alloca i1, i1 0
-  %nop7477 = alloca i1, i1 0
-  %nop7478 = alloca i1, i1 0
-  %nop7479 = alloca i1, i1 0
-  %nop7480 = alloca i1, i1 0
-  %nop7481 = alloca i1, i1 0
-  %nop7482 = alloca i1, i1 0
-  %nop7483 = alloca i1, i1 0
-  %nop7484 = alloca i1, i1 0
-  %nop7485 = alloca i1, i1 0
-  %nop7486 = alloca i1, i1 0
-  %nop7487 = alloca i1, i1 0
-  %nop7488 = alloca i1, i1 0
-  %nop7489 = alloca i1, i1 0
-  %nop7490 = alloca i1, i1 0
-  %nop7491 = alloca i1, i1 0
-  %nop7492 = alloca i1, i1 0
-  %nop7493 = alloca i1, i1 0
-  %nop7494 = alloca i1, i1 0
-  %nop7495 = alloca i1, i1 0
-  %nop7496 = alloca i1, i1 0
-  %nop7497 = alloca i1, i1 0
-  %nop7498 = alloca i1, i1 0
-  %nop7499 = alloca i1, i1 0
-  %nop7500 = alloca i1, i1 0
-  %nop7501 = alloca i1, i1 0
-  %nop7502 = alloca i1, i1 0
-  %nop7503 = alloca i1, i1 0
-  %nop7504 = alloca i1, i1 0
-  %nop7505 = alloca i1, i1 0
-  %nop7506 = alloca i1, i1 0
-  %nop7507 = alloca i1, i1 0
-  %nop7508 = alloca i1, i1 0
-  %nop7509 = alloca i1, i1 0
-  %nop7510 = alloca i1, i1 0
-  %nop7511 = alloca i1, i1 0
-  %nop7512 = alloca i1, i1 0
-  %nop7513 = alloca i1, i1 0
-  %nop7514 = alloca i1, i1 0
-  %nop7515 = alloca i1, i1 0
-  %nop7516 = alloca i1, i1 0
-  %nop7517 = alloca i1, i1 0
-  %nop7518 = alloca i1, i1 0
-  %nop7519 = alloca i1, i1 0
-  %nop7520 = alloca i1, i1 0
-  %nop7521 = alloca i1, i1 0
-  %nop7522 = alloca i1, i1 0
-  %nop7523 = alloca i1, i1 0
-  %nop7524 = alloca i1, i1 0
-  %nop7525 = alloca i1, i1 0
-  %nop7526 = alloca i1, i1 0
-  %nop7527 = alloca i1, i1 0
-  %nop7528 = alloca i1, i1 0
-  %nop7529 = alloca i1, i1 0
-  %nop7530 = alloca i1, i1 0
-  %nop7531 = alloca i1, i1 0
-  %nop7532 = alloca i1, i1 0
-  %nop7533 = alloca i1, i1 0
-  %nop7534 = alloca i1, i1 0
-  %nop7535 = alloca i1, i1 0
-  %nop7536 = alloca i1, i1 0
-  %nop7537 = alloca i1, i1 0
-  %nop7538 = alloca i1, i1 0
-  %nop7539 = alloca i1, i1 0
-  %nop7540 = alloca i1, i1 0
-  %nop7541 = alloca i1, i1 0
-  %nop7542 = alloca i1, i1 0
-  %nop7543 = alloca i1, i1 0
-  %nop7544 = alloca i1, i1 0
-  %nop7545 = alloca i1, i1 0
-  %nop7546 = alloca i1, i1 0
-  %nop7547 = alloca i1, i1 0
-  %nop7548 = alloca i1, i1 0
-  %nop7549 = alloca i1, i1 0
-  %nop7550 = alloca i1, i1 0
-  %nop7551 = alloca i1, i1 0
-  %nop7552 = alloca i1, i1 0
-  %nop7553 = alloca i1, i1 0
-  %nop7554 = alloca i1, i1 0
-  %nop7555 = alloca i1, i1 0
-  %nop7556 = alloca i1, i1 0
-  %nop7557 = alloca i1, i1 0
-  %nop7558 = alloca i1, i1 0
-  %nop7559 = alloca i1, i1 0
-  %nop7560 = alloca i1, i1 0
-  %nop7561 = alloca i1, i1 0
-  %nop7562 = alloca i1, i1 0
-  %nop7563 = alloca i1, i1 0
-  %nop7564 = alloca i1, i1 0
-  %nop7565 = alloca i1, i1 0
-  %nop7566 = alloca i1, i1 0
-  %nop7567 = alloca i1, i1 0
-  %nop7568 = alloca i1, i1 0
-  %nop7569 = alloca i1, i1 0
-  %nop7570 = alloca i1, i1 0
-  %nop7571 = alloca i1, i1 0
-  %nop7572 = alloca i1, i1 0
-  %nop7573 = alloca i1, i1 0
-  %nop7574 = alloca i1, i1 0
-  %nop7575 = alloca i1, i1 0
-  %nop7576 = alloca i1, i1 0
-  %nop7577 = alloca i1, i1 0
-  %nop7578 = alloca i1, i1 0
-  %nop7579 = alloca i1, i1 0
-  %nop7580 = alloca i1, i1 0
-  %nop7581 = alloca i1, i1 0
-  %nop7582 = alloca i1, i1 0
-  %nop7583 = alloca i1, i1 0
-  %nop7584 = alloca i1, i1 0
-  %nop7585 = alloca i1, i1 0
-  %nop7586 = alloca i1, i1 0
-  %nop7587 = alloca i1, i1 0
-  %nop7588 = alloca i1, i1 0
-  %nop7589 = alloca i1, i1 0
-  %nop7590 = alloca i1, i1 0
-  %nop7591 = alloca i1, i1 0
-  %nop7592 = alloca i1, i1 0
-  %nop7593 = alloca i1, i1 0
-  %nop7594 = alloca i1, i1 0
-  %nop7595 = alloca i1, i1 0
-  %nop7596 = alloca i1, i1 0
-  %nop7597 = alloca i1, i1 0
-  %nop7598 = alloca i1, i1 0
-  %nop7599 = alloca i1, i1 0
-  %nop7600 = alloca i1, i1 0
-  %nop7601 = alloca i1, i1 0
-  %nop7602 = alloca i1, i1 0
-  %nop7603 = alloca i1, i1 0
-  %nop7604 = alloca i1, i1 0
-  %nop7605 = alloca i1, i1 0
-  %nop7606 = alloca i1, i1 0
-  %nop7607 = alloca i1, i1 0
-  %nop7608 = alloca i1, i1 0
-  %nop7609 = alloca i1, i1 0
-  %nop7610 = alloca i1, i1 0
-  %nop7611 = alloca i1, i1 0
-  %nop7612 = alloca i1, i1 0
-  %nop7613 = alloca i1, i1 0
-  %nop7614 = alloca i1, i1 0
-  %nop7615 = alloca i1, i1 0
-  %nop7616 = alloca i1, i1 0
-  %nop7617 = alloca i1, i1 0
-  %nop7618 = alloca i1, i1 0
-  %nop7619 = alloca i1, i1 0
-  %nop7620 = alloca i1, i1 0
-  %nop7621 = alloca i1, i1 0
-  %nop7622 = alloca i1, i1 0
-  %nop7623 = alloca i1, i1 0
-  %nop7624 = alloca i1, i1 0
-  %nop7625 = alloca i1, i1 0
-  %nop7626 = alloca i1, i1 0
-  %nop7627 = alloca i1, i1 0
-  %nop7628 = alloca i1, i1 0
-  %nop7629 = alloca i1, i1 0
-  %nop7630 = alloca i1, i1 0
-  %nop7631 = alloca i1, i1 0
-  %nop7632 = alloca i1, i1 0
-  %nop7633 = alloca i1, i1 0
-  %nop7634 = alloca i1, i1 0
-  %nop7635 = alloca i1, i1 0
-  %nop7636 = alloca i1, i1 0
-  %nop7637 = alloca i1, i1 0
-  %nop7638 = alloca i1, i1 0
-  %nop7639 = alloca i1, i1 0
-  %nop7640 = alloca i1, i1 0
-  %nop7641 = alloca i1, i1 0
-  %nop7642 = alloca i1, i1 0
-  %nop7643 = alloca i1, i1 0
-  %nop7644 = alloca i1, i1 0
-  %nop7645 = alloca i1, i1 0
-  %nop7646 = alloca i1, i1 0
-  %nop7647 = alloca i1, i1 0
-  %nop7648 = alloca i1, i1 0
-  %nop7649 = alloca i1, i1 0
-  %nop7650 = alloca i1, i1 0
-  %nop7651 = alloca i1, i1 0
-  %nop7652 = alloca i1, i1 0
-  %nop7653 = alloca i1, i1 0
-  %nop7654 = alloca i1, i1 0
-  %nop7655 = alloca i1, i1 0
-  %nop7656 = alloca i1, i1 0
-  %nop7657 = alloca i1, i1 0
-  %nop7658 = alloca i1, i1 0
-  %nop7659 = alloca i1, i1 0
-  %nop7660 = alloca i1, i1 0
-  %nop7661 = alloca i1, i1 0
-  %nop7662 = alloca i1, i1 0
-  %nop7663 = alloca i1, i1 0
-  %nop7664 = alloca i1, i1 0
-  %nop7665 = alloca i1, i1 0
-  %nop7666 = alloca i1, i1 0
-  %nop7667 = alloca i1, i1 0
-  %nop7668 = alloca i1, i1 0
-  %nop7669 = alloca i1, i1 0
-  %nop7670 = alloca i1, i1 0
-  %nop7671 = alloca i1, i1 0
-  %nop7672 = alloca i1, i1 0
-  %nop7673 = alloca i1, i1 0
-  %nop7674 = alloca i1, i1 0
-  %nop7675 = alloca i1, i1 0
-  %nop7676 = alloca i1, i1 0
-  %nop7677 = alloca i1, i1 0
-  %nop7678 = alloca i1, i1 0
-  %nop7679 = alloca i1, i1 0
-  %nop7680 = alloca i1, i1 0
-  %nop7681 = alloca i1, i1 0
-  %nop7682 = alloca i1, i1 0
-  %nop7683 = alloca i1, i1 0
-  %nop7684 = alloca i1, i1 0
-  %nop7685 = alloca i1, i1 0
-  %nop7686 = alloca i1, i1 0
-  %nop7687 = alloca i1, i1 0
-  %nop7688 = alloca i1, i1 0
-  %nop7689 = alloca i1, i1 0
-  %nop7690 = alloca i1, i1 0
-  %nop7691 = alloca i1, i1 0
-  %nop7692 = alloca i1, i1 0
-  %nop7693 = alloca i1, i1 0
-  %nop7694 = alloca i1, i1 0
-  %nop7695 = alloca i1, i1 0
-  %nop7696 = alloca i1, i1 0
-  %nop7697 = alloca i1, i1 0
-  %nop7698 = alloca i1, i1 0
-  %nop7699 = alloca i1, i1 0
-  %nop7700 = alloca i1, i1 0
-  %nop7701 = alloca i1, i1 0
-  %nop7702 = alloca i1, i1 0
-  %nop7703 = alloca i1, i1 0
-  %nop7704 = alloca i1, i1 0
-  %nop7705 = alloca i1, i1 0
-  %nop7706 = alloca i1, i1 0
-  %nop7707 = alloca i1, i1 0
-  %nop7708 = alloca i1, i1 0
-  %nop7709 = alloca i1, i1 0
-  %nop7710 = alloca i1, i1 0
-  %nop7711 = alloca i1, i1 0
-  %nop7712 = alloca i1, i1 0
-  %nop7713 = alloca i1, i1 0
-  %nop7714 = alloca i1, i1 0
-  %nop7715 = alloca i1, i1 0
-  %nop7716 = alloca i1, i1 0
-  %nop7717 = alloca i1, i1 0
-  %nop7718 = alloca i1, i1 0
-  %nop7719 = alloca i1, i1 0
-  %nop7720 = alloca i1, i1 0
-  %nop7721 = alloca i1, i1 0
-  %nop7722 = alloca i1, i1 0
-  %nop7723 = alloca i1, i1 0
-  %nop7724 = alloca i1, i1 0
-  %nop7725 = alloca i1, i1 0
-  %nop7726 = alloca i1, i1 0
-  %nop7727 = alloca i1, i1 0
-  %nop7728 = alloca i1, i1 0
-  %nop7729 = alloca i1, i1 0
-  %nop7730 = alloca i1, i1 0
-  %nop7731 = alloca i1, i1 0
-  %nop7732 = alloca i1, i1 0
-  %nop7733 = alloca i1, i1 0
-  %nop7734 = alloca i1, i1 0
-  %nop7735 = alloca i1, i1 0
-  %nop7736 = alloca i1, i1 0
-  %nop7737 = alloca i1, i1 0
-  %nop7738 = alloca i1, i1 0
-  %nop7739 = alloca i1, i1 0
-  %nop7740 = alloca i1, i1 0
-  %nop7741 = alloca i1, i1 0
-  %nop7742 = alloca i1, i1 0
-  %nop7743 = alloca i1, i1 0
-  %nop7744 = alloca i1, i1 0
-  %nop7745 = alloca i1, i1 0
-  %nop7746 = alloca i1, i1 0
-  %nop7747 = alloca i1, i1 0
-  %nop7748 = alloca i1, i1 0
-  %nop7749 = alloca i1, i1 0
-  %nop7750 = alloca i1, i1 0
-  %nop7751 = alloca i1, i1 0
-  %nop7752 = alloca i1, i1 0
-  %nop7753 = alloca i1, i1 0
-  %nop7754 = alloca i1, i1 0
-  %nop7755 = alloca i1, i1 0
-  %nop7756 = alloca i1, i1 0
-  %nop7757 = alloca i1, i1 0
-  %nop7758 = alloca i1, i1 0
-  %nop7759 = alloca i1, i1 0
-  %nop7760 = alloca i1, i1 0
-  %nop7761 = alloca i1, i1 0
-  %nop7762 = alloca i1, i1 0
-  %nop7763 = alloca i1, i1 0
-  %nop7764 = alloca i1, i1 0
-  %nop7765 = alloca i1, i1 0
-  %nop7766 = alloca i1, i1 0
-  %nop7767 = alloca i1, i1 0
-  %nop7768 = alloca i1, i1 0
-  %nop7769 = alloca i1, i1 0
-  %nop7770 = alloca i1, i1 0
-  %nop7771 = alloca i1, i1 0
-  %nop7772 = alloca i1, i1 0
-  %nop7773 = alloca i1, i1 0
-  %nop7774 = alloca i1, i1 0
-  %nop7775 = alloca i1, i1 0
-  %nop7776 = alloca i1, i1 0
-  %nop7777 = alloca i1, i1 0
-  %nop7778 = alloca i1, i1 0
-  %nop7779 = alloca i1, i1 0
-  %nop7780 = alloca i1, i1 0
-  %nop7781 = alloca i1, i1 0
-  %nop7782 = alloca i1, i1 0
-  %nop7783 = alloca i1, i1 0
-  %nop7784 = alloca i1, i1 0
-  %nop7785 = alloca i1, i1 0
-  %nop7786 = alloca i1, i1 0
-  %nop7787 = alloca i1, i1 0
-  %nop7788 = alloca i1, i1 0
-  %nop7789 = alloca i1, i1 0
-  %nop7790 = alloca i1, i1 0
-  %nop7791 = alloca i1, i1 0
-  %nop7792 = alloca i1, i1 0
-  %nop7793 = alloca i1, i1 0
-  %nop7794 = alloca i1, i1 0
-  %nop7795 = alloca i1, i1 0
-  %nop7796 = alloca i1, i1 0
-  %nop7797 = alloca i1, i1 0
-  %nop7798 = alloca i1, i1 0
-  %nop7799 = alloca i1, i1 0
-  %nop7800 = alloca i1, i1 0
-  %nop7801 = alloca i1, i1 0
-  %nop7802 = alloca i1, i1 0
-  %nop7803 = alloca i1, i1 0
-  %nop7804 = alloca i1, i1 0
-  %nop7805 = alloca i1, i1 0
-  %nop7806 = alloca i1, i1 0
-  %nop7807 = alloca i1, i1 0
-  %nop7808 = alloca i1, i1 0
-  %nop7809 = alloca i1, i1 0
-  %nop7810 = alloca i1, i1 0
-  %nop7811 = alloca i1, i1 0
-  %nop7812 = alloca i1, i1 0
-  %nop7813 = alloca i1, i1 0
-  %nop7814 = alloca i1, i1 0
-  %nop7815 = alloca i1, i1 0
-  %nop7816 = alloca i1, i1 0
-  %nop7817 = alloca i1, i1 0
-  %nop7818 = alloca i1, i1 0
-  %nop7819 = alloca i1, i1 0
-  %nop7820 = alloca i1, i1 0
-  %nop7821 = alloca i1, i1 0
-  %nop7822 = alloca i1, i1 0
-  %nop7823 = alloca i1, i1 0
-  %nop7824 = alloca i1, i1 0
-  %nop7825 = alloca i1, i1 0
-  %nop7826 = alloca i1, i1 0
-  %nop7827 = alloca i1, i1 0
-  %nop7828 = alloca i1, i1 0
-  %nop7829 = alloca i1, i1 0
-  %nop7830 = alloca i1, i1 0
-  %nop7831 = alloca i1, i1 0
-  %nop7832 = alloca i1, i1 0
-  %nop7833 = alloca i1, i1 0
-  %nop7834 = alloca i1, i1 0
-  %nop7835 = alloca i1, i1 0
-  %nop7836 = alloca i1, i1 0
-  %nop7837 = alloca i1, i1 0
-  %nop7838 = alloca i1, i1 0
-  %nop7839 = alloca i1, i1 0
-  %nop7840 = alloca i1, i1 0
-  %nop7841 = alloca i1, i1 0
-  %nop7842 = alloca i1, i1 0
-  %nop7843 = alloca i1, i1 0
-  %nop7844 = alloca i1, i1 0
-  %nop7845 = alloca i1, i1 0
-  %nop7846 = alloca i1, i1 0
-  %nop7847 = alloca i1, i1 0
-  %nop7848 = alloca i1, i1 0
-  %nop7849 = alloca i1, i1 0
-  %nop7850 = alloca i1, i1 0
-  %nop7851 = alloca i1, i1 0
-  %nop7852 = alloca i1, i1 0
-  %nop7853 = alloca i1, i1 0
-  %nop7854 = alloca i1, i1 0
-  %nop7855 = alloca i1, i1 0
-  %nop7856 = alloca i1, i1 0
-  %nop7857 = alloca i1, i1 0
-  %nop7858 = alloca i1, i1 0
-  %nop7859 = alloca i1, i1 0
-  %nop7860 = alloca i1, i1 0
-  %nop7861 = alloca i1, i1 0
-  %nop7862 = alloca i1, i1 0
-  %nop7863 = alloca i1, i1 0
-  %nop7864 = alloca i1, i1 0
-  %nop7865 = alloca i1, i1 0
-  %nop7866 = alloca i1, i1 0
-  %nop7867 = alloca i1, i1 0
-  %nop7868 = alloca i1, i1 0
-  %nop7869 = alloca i1, i1 0
-  %nop7870 = alloca i1, i1 0
-  %nop7871 = alloca i1, i1 0
-  %nop7872 = alloca i1, i1 0
-  %nop7873 = alloca i1, i1 0
-  %nop7874 = alloca i1, i1 0
-  %nop7875 = alloca i1, i1 0
-  %nop7876 = alloca i1, i1 0
-  %nop7877 = alloca i1, i1 0
-  %nop7878 = alloca i1, i1 0
-  %nop7879 = alloca i1, i1 0
-  %nop7880 = alloca i1, i1 0
-  %nop7881 = alloca i1, i1 0
-  %nop7882 = alloca i1, i1 0
-  %nop7883 = alloca i1, i1 0
-  %nop7884 = alloca i1, i1 0
-  %nop7885 = alloca i1, i1 0
-  %nop7886 = alloca i1, i1 0
-  %nop7887 = alloca i1, i1 0
-  %nop7888 = alloca i1, i1 0
-  %nop7889 = alloca i1, i1 0
-  %nop7890 = alloca i1, i1 0
-  %nop7891 = alloca i1, i1 0
-  %nop7892 = alloca i1, i1 0
-  %nop7893 = alloca i1, i1 0
-  %nop7894 = alloca i1, i1 0
-  %nop7895 = alloca i1, i1 0
-  %nop7896 = alloca i1, i1 0
-  %nop7897 = alloca i1, i1 0
-  %nop7898 = alloca i1, i1 0
-  %nop7899 = alloca i1, i1 0
-  %nop7900 = alloca i1, i1 0
-  %nop7901 = alloca i1, i1 0
-  %nop7902 = alloca i1, i1 0
-  %nop7903 = alloca i1, i1 0
-  %nop7904 = alloca i1, i1 0
-  %nop7905 = alloca i1, i1 0
-  %nop7906 = alloca i1, i1 0
-  %nop7907 = alloca i1, i1 0
-  %nop7908 = alloca i1, i1 0
-  %nop7909 = alloca i1, i1 0
-  %nop7910 = alloca i1, i1 0
-  %nop7911 = alloca i1, i1 0
-  %nop7912 = alloca i1, i1 0
-  %nop7913 = alloca i1, i1 0
-  %nop7914 = alloca i1, i1 0
-  %nop7915 = alloca i1, i1 0
-  %nop7916 = alloca i1, i1 0
-  %nop7917 = alloca i1, i1 0
-  %nop7918 = alloca i1, i1 0
-  %nop7919 = alloca i1, i1 0
-  %nop7920 = alloca i1, i1 0
-  %nop7921 = alloca i1, i1 0
-  %nop7922 = alloca i1, i1 0
-  %nop7923 = alloca i1, i1 0
-  %nop7924 = alloca i1, i1 0
-  %nop7925 = alloca i1, i1 0
-  %nop7926 = alloca i1, i1 0
-  %nop7927 = alloca i1, i1 0
-  %nop7928 = alloca i1, i1 0
-  %nop7929 = alloca i1, i1 0
-  %nop7930 = alloca i1, i1 0
-  %nop7931 = alloca i1, i1 0
-  %nop7932 = alloca i1, i1 0
-  %nop7933 = alloca i1, i1 0
-  %nop7934 = alloca i1, i1 0
-  %nop7935 = alloca i1, i1 0
-  %nop7936 = alloca i1, i1 0
-  %nop7937 = alloca i1, i1 0
-  %nop7938 = alloca i1, i1 0
-  %nop7939 = alloca i1, i1 0
-  %nop7940 = alloca i1, i1 0
-  %nop7941 = alloca i1, i1 0
-  %nop7942 = alloca i1, i1 0
-  %nop7943 = alloca i1, i1 0
-  %nop7944 = alloca i1, i1 0
-  %nop7945 = alloca i1, i1 0
-  %nop7946 = alloca i1, i1 0
-  %nop7947 = alloca i1, i1 0
-  %nop7948 = alloca i1, i1 0
-  %nop7949 = alloca i1, i1 0
-  %nop7950 = alloca i1, i1 0
-  %nop7951 = alloca i1, i1 0
-  %nop7952 = alloca i1, i1 0
-  %nop7953 = alloca i1, i1 0
-  %nop7954 = alloca i1, i1 0
-  %nop7955 = alloca i1, i1 0
-  %nop7956 = alloca i1, i1 0
-  %nop7957 = alloca i1, i1 0
-  %nop7958 = alloca i1, i1 0
-  %nop7959 = alloca i1, i1 0
-  %nop7960 = alloca i1, i1 0
-  %nop7961 = alloca i1, i1 0
-  %nop7962 = alloca i1, i1 0
-  %nop7963 = alloca i1, i1 0
-  %nop7964 = alloca i1, i1 0
-  %nop7965 = alloca i1, i1 0
-  %nop7966 = alloca i1, i1 0
-  %nop7967 = alloca i1, i1 0
-  %nop7968 = alloca i1, i1 0
-  %nop7969 = alloca i1, i1 0
-  %nop7970 = alloca i1, i1 0
-  %nop7971 = alloca i1, i1 0
-  %nop7972 = alloca i1, i1 0
-  %nop7973 = alloca i1, i1 0
-  %nop7974 = alloca i1, i1 0
-  %nop7975 = alloca i1, i1 0
-  %nop7976 = alloca i1, i1 0
-  %nop7977 = alloca i1, i1 0
-  %nop7978 = alloca i1, i1 0
-  %nop7979 = alloca i1, i1 0
-  %nop7980 = alloca i1, i1 0
-  %nop7981 = alloca i1, i1 0
-  %nop7982 = alloca i1, i1 0
-  %nop7983 = alloca i1, i1 0
-  %nop7984 = alloca i1, i1 0
-  %nop7985 = alloca i1, i1 0
-  %nop7986 = alloca i1, i1 0
-  %nop7987 = alloca i1, i1 0
-  %nop7988 = alloca i1, i1 0
-  %nop7989 = alloca i1, i1 0
-  %nop7990 = alloca i1, i1 0
-  %nop7991 = alloca i1, i1 0
-  %nop7992 = alloca i1, i1 0
-  %nop7993 = alloca i1, i1 0
-  %nop7994 = alloca i1, i1 0
-  %nop7995 = alloca i1, i1 0
-  %nop7996 = alloca i1, i1 0
-  %nop7997 = alloca i1, i1 0
-  %nop7998 = alloca i1, i1 0
-  %nop7999 = alloca i1, i1 0
-  %nop8000 = alloca i1, i1 0
-  %nop8001 = alloca i1, i1 0
-  %nop8002 = alloca i1, i1 0
-  %nop8003 = alloca i1, i1 0
-  %nop8004 = alloca i1, i1 0
-  %nop8005 = alloca i1, i1 0
-  %nop8006 = alloca i1, i1 0
-  %nop8007 = alloca i1, i1 0
-  %nop8008 = alloca i1, i1 0
-  %nop8009 = alloca i1, i1 0
-  %nop8010 = alloca i1, i1 0
-  %nop8011 = alloca i1, i1 0
-  %nop8012 = alloca i1, i1 0
-  %nop8013 = alloca i1, i1 0
-  %nop8014 = alloca i1, i1 0
-  %nop8015 = alloca i1, i1 0
-  %nop8016 = alloca i1, i1 0
-  %nop8017 = alloca i1, i1 0
-  %nop8018 = alloca i1, i1 0
-  %nop8019 = alloca i1, i1 0
-  %nop8020 = alloca i1, i1 0
-  %nop8021 = alloca i1, i1 0
-  %nop8022 = alloca i1, i1 0
-  %nop8023 = alloca i1, i1 0
-  %nop8024 = alloca i1, i1 0
-  %nop8025 = alloca i1, i1 0
-  %nop8026 = alloca i1, i1 0
-  %nop8027 = alloca i1, i1 0
-  %nop8028 = alloca i1, i1 0
-  %nop8029 = alloca i1, i1 0
-  %nop8030 = alloca i1, i1 0
-  %nop8031 = alloca i1, i1 0
-  %nop8032 = alloca i1, i1 0
-  %nop8033 = alloca i1, i1 0
-  %nop8034 = alloca i1, i1 0
-  %nop8035 = alloca i1, i1 0
-  %nop8036 = alloca i1, i1 0
-  %nop8037 = alloca i1, i1 0
-  %nop8038 = alloca i1, i1 0
-  %nop8039 = alloca i1, i1 0
-  %nop8040 = alloca i1, i1 0
-  %nop8041 = alloca i1, i1 0
-  %nop8042 = alloca i1, i1 0
-  %nop8043 = alloca i1, i1 0
-  %nop8044 = alloca i1, i1 0
-  %nop8045 = alloca i1, i1 0
-  %nop8046 = alloca i1, i1 0
-  %nop8047 = alloca i1, i1 0
-  %nop8048 = alloca i1, i1 0
-  %nop8049 = alloca i1, i1 0
-  %nop8050 = alloca i1, i1 0
-  %nop8051 = alloca i1, i1 0
-  %nop8052 = alloca i1, i1 0
-  %nop8053 = alloca i1, i1 0
-  %nop8054 = alloca i1, i1 0
-  %nop8055 = alloca i1, i1 0
-  %nop8056 = alloca i1, i1 0
-  %nop8057 = alloca i1, i1 0
-  %nop8058 = alloca i1, i1 0
-  %nop8059 = alloca i1, i1 0
-  %nop8060 = alloca i1, i1 0
-  %nop8061 = alloca i1, i1 0
-  %nop8062 = alloca i1, i1 0
-  %nop8063 = alloca i1, i1 0
-  %nop8064 = alloca i1, i1 0
-  %nop8065 = alloca i1, i1 0
-  %nop8066 = alloca i1, i1 0
-  %nop8067 = alloca i1, i1 0
-  %nop8068 = alloca i1, i1 0
-  %nop8069 = alloca i1, i1 0
-  %nop8070 = alloca i1, i1 0
-  %nop8071 = alloca i1, i1 0
-  %nop8072 = alloca i1, i1 0
-  %nop8073 = alloca i1, i1 0
-  %nop8074 = alloca i1, i1 0
-  %nop8075 = alloca i1, i1 0
-  %nop8076 = alloca i1, i1 0
-  %nop8077 = alloca i1, i1 0
-  %nop8078 = alloca i1, i1 0
-  %nop8079 = alloca i1, i1 0
-  %nop8080 = alloca i1, i1 0
-  %nop8081 = alloca i1, i1 0
-  %nop8082 = alloca i1, i1 0
-  %nop8083 = alloca i1, i1 0
-  %nop8084 = alloca i1, i1 0
-  %nop8085 = alloca i1, i1 0
-  %nop8086 = alloca i1, i1 0
-  %nop8087 = alloca i1, i1 0
-  %nop8088 = alloca i1, i1 0
-  %nop8089 = alloca i1, i1 0
-  %nop8090 = alloca i1, i1 0
-  %nop8091 = alloca i1, i1 0
-  %nop8092 = alloca i1, i1 0
-  %nop8093 = alloca i1, i1 0
-  %nop8094 = alloca i1, i1 0
-  %nop8095 = alloca i1, i1 0
-  %nop8096 = alloca i1, i1 0
-  %nop8097 = alloca i1, i1 0
-  %nop8098 = alloca i1, i1 0
-  %nop8099 = alloca i1, i1 0
-  %nop8100 = alloca i1, i1 0
-  %nop8101 = alloca i1, i1 0
-  %nop8102 = alloca i1, i1 0
-  %nop8103 = alloca i1, i1 0
-  %nop8104 = alloca i1, i1 0
-  %nop8105 = alloca i1, i1 0
-  %nop8106 = alloca i1, i1 0
-  %nop8107 = alloca i1, i1 0
-  %nop8108 = alloca i1, i1 0
-  %nop8109 = alloca i1, i1 0
-  %nop8110 = alloca i1, i1 0
-  %nop8111 = alloca i1, i1 0
-  %nop8112 = alloca i1, i1 0
-  %nop8113 = alloca i1, i1 0
-  %nop8114 = alloca i1, i1 0
-  %nop8115 = alloca i1, i1 0
-  %nop8116 = alloca i1, i1 0
-  %nop8117 = alloca i1, i1 0
-  %nop8118 = alloca i1, i1 0
-  %nop8119 = alloca i1, i1 0
-  %nop8120 = alloca i1, i1 0
-  %nop8121 = alloca i1, i1 0
-  %nop8122 = alloca i1, i1 0
-  %nop8123 = alloca i1, i1 0
-  %nop8124 = alloca i1, i1 0
-  %nop8125 = alloca i1, i1 0
-  %nop8126 = alloca i1, i1 0
-  %nop8127 = alloca i1, i1 0
-  %nop8128 = alloca i1, i1 0
-  %nop8129 = alloca i1, i1 0
-  %nop8130 = alloca i1, i1 0
-  %nop8131 = alloca i1, i1 0
-  %nop8132 = alloca i1, i1 0
-  %nop8133 = alloca i1, i1 0
-  %nop8134 = alloca i1, i1 0
-  %nop8135 = alloca i1, i1 0
-  %nop8136 = alloca i1, i1 0
-  %nop8137 = alloca i1, i1 0
-  %nop8138 = alloca i1, i1 0
-  %nop8139 = alloca i1, i1 0
-  %nop8140 = alloca i1, i1 0
-  %nop8141 = alloca i1, i1 0
-  %nop8142 = alloca i1, i1 0
-  %nop8143 = alloca i1, i1 0
-  %nop8144 = alloca i1, i1 0
-  %nop8145 = alloca i1, i1 0
-  %nop8146 = alloca i1, i1 0
-  %nop8147 = alloca i1, i1 0
-  %nop8148 = alloca i1, i1 0
-  %nop8149 = alloca i1, i1 0
-  %nop8150 = alloca i1, i1 0
-  %nop8151 = alloca i1, i1 0
-  %nop8152 = alloca i1, i1 0
-  %nop8153 = alloca i1, i1 0
-  %nop8154 = alloca i1, i1 0
-  %nop8155 = alloca i1, i1 0
-  %nop8156 = alloca i1, i1 0
-  %nop8157 = alloca i1, i1 0
-  %nop8158 = alloca i1, i1 0
-  %nop8159 = alloca i1, i1 0
-  %nop8160 = alloca i1, i1 0
-  %nop8161 = alloca i1, i1 0
-  %nop8162 = alloca i1, i1 0
-  %nop8163 = alloca i1, i1 0
-  %nop8164 = alloca i1, i1 0
-  %nop8165 = alloca i1, i1 0
-  %nop8166 = alloca i1, i1 0
-  %nop8167 = alloca i1, i1 0
-  %nop8168 = alloca i1, i1 0
-  %nop8169 = alloca i1, i1 0
-  %nop8170 = alloca i1, i1 0
-  %nop8171 = alloca i1, i1 0
-  %nop8172 = alloca i1, i1 0
-  %nop8173 = alloca i1, i1 0
-  %nop8174 = alloca i1, i1 0
-  %nop8175 = alloca i1, i1 0
-  %nop8176 = alloca i1, i1 0
-  %nop8177 = alloca i1, i1 0
-  %nop8178 = alloca i1, i1 0
-  %nop8179 = alloca i1, i1 0
-  %nop8180 = alloca i1, i1 0
-  %nop8181 = alloca i1, i1 0
-  %nop8182 = alloca i1, i1 0
-  %nop8183 = alloca i1, i1 0
-  %nop8184 = alloca i1, i1 0
-  %nop8185 = alloca i1, i1 0
-  %nop8186 = alloca i1, i1 0
-  %nop8187 = alloca i1, i1 0
-  %nop8188 = alloca i1, i1 0
-  %nop8189 = alloca i1, i1 0
-  %nop8190 = alloca i1, i1 0
-  %nop8191 = alloca i1, i1 0
-  %nop8192 = alloca i1, i1 0
-  %nop8193 = alloca i1, i1 0
-  %nop8194 = alloca i1, i1 0
-  %nop8195 = alloca i1, i1 0
-  %nop8196 = alloca i1, i1 0
-  %nop8197 = alloca i1, i1 0
-  %nop8198 = alloca i1, i1 0
-  %nop8199 = alloca i1, i1 0
-  %nop8200 = alloca i1, i1 0
-  %nop8201 = alloca i1, i1 0
-  %nop8202 = alloca i1, i1 0
-  %nop8203 = alloca i1, i1 0
-  %nop8204 = alloca i1, i1 0
-  %nop8205 = alloca i1, i1 0
-  %nop8206 = alloca i1, i1 0
-  %nop8207 = alloca i1, i1 0
-  %nop8208 = alloca i1, i1 0
-  %nop8209 = alloca i1, i1 0
-  %nop8210 = alloca i1, i1 0
-  %nop8211 = alloca i1, i1 0
-  %nop8212 = alloca i1, i1 0
-  %nop8213 = alloca i1, i1 0
-  %nop8214 = alloca i1, i1 0
-  %nop8215 = alloca i1, i1 0
-  %nop8216 = alloca i1, i1 0
-  %nop8217 = alloca i1, i1 0
-  %nop8218 = alloca i1, i1 0
-  %nop8219 = alloca i1, i1 0
-  %nop8220 = alloca i1, i1 0
-  %nop8221 = alloca i1, i1 0
-  %nop8222 = alloca i1, i1 0
-  %nop8223 = alloca i1, i1 0
-  %nop8224 = alloca i1, i1 0
-  %nop8225 = alloca i1, i1 0
-  %nop8226 = alloca i1, i1 0
-  %nop8227 = alloca i1, i1 0
-  %nop8228 = alloca i1, i1 0
-  %nop8229 = alloca i1, i1 0
-  %nop8230 = alloca i1, i1 0
-  %nop8231 = alloca i1, i1 0
-  %nop8232 = alloca i1, i1 0
-  %nop8233 = alloca i1, i1 0
-  %nop8234 = alloca i1, i1 0
-  %nop8235 = alloca i1, i1 0
-  %nop8236 = alloca i1, i1 0
-  %nop8237 = alloca i1, i1 0
-  %nop8238 = alloca i1, i1 0
-  %nop8239 = alloca i1, i1 0
-  %nop8240 = alloca i1, i1 0
-  %nop8241 = alloca i1, i1 0
-  %nop8242 = alloca i1, i1 0
-  %nop8243 = alloca i1, i1 0
-  %nop8244 = alloca i1, i1 0
-  %nop8245 = alloca i1, i1 0
-  %nop8246 = alloca i1, i1 0
-  %nop8247 = alloca i1, i1 0
-  %nop8248 = alloca i1, i1 0
-  %nop8249 = alloca i1, i1 0
-  %nop8250 = alloca i1, i1 0
-  %nop8251 = alloca i1, i1 0
-  %nop8252 = alloca i1, i1 0
-  %nop8253 = alloca i1, i1 0
-  %nop8254 = alloca i1, i1 0
-  %nop8255 = alloca i1, i1 0
-  %nop8256 = alloca i1, i1 0
-  %nop8257 = alloca i1, i1 0
-  %nop8258 = alloca i1, i1 0
-  %nop8259 = alloca i1, i1 0
-  %nop8260 = alloca i1, i1 0
-  %nop8261 = alloca i1, i1 0
-  %nop8262 = alloca i1, i1 0
-  %nop8263 = alloca i1, i1 0
-  %nop8264 = alloca i1, i1 0
-  %nop8265 = alloca i1, i1 0
-  %nop8266 = alloca i1, i1 0
-  %nop8267 = alloca i1, i1 0
-  %nop8268 = alloca i1, i1 0
-  %nop8269 = alloca i1, i1 0
-  %nop8270 = alloca i1, i1 0
-  %nop8271 = alloca i1, i1 0
-  %nop8272 = alloca i1, i1 0
-  %nop8273 = alloca i1, i1 0
-  %nop8274 = alloca i1, i1 0
-  %nop8275 = alloca i1, i1 0
-  %nop8276 = alloca i1, i1 0
-  %nop8277 = alloca i1, i1 0
-  %nop8278 = alloca i1, i1 0
-  %nop8279 = alloca i1, i1 0
-  %nop8280 = alloca i1, i1 0
-  %nop8281 = alloca i1, i1 0
-  %nop8282 = alloca i1, i1 0
-  %nop8283 = alloca i1, i1 0
-  %nop8284 = alloca i1, i1 0
-  %nop8285 = alloca i1, i1 0
-  %nop8286 = alloca i1, i1 0
-  %nop8287 = alloca i1, i1 0
-  %nop8288 = alloca i1, i1 0
-  %nop8289 = alloca i1, i1 0
-  %nop8290 = alloca i1, i1 0
-  %nop8291 = alloca i1, i1 0
-  %nop8292 = alloca i1, i1 0
-  %nop8293 = alloca i1, i1 0
-  %nop8294 = alloca i1, i1 0
-  %nop8295 = alloca i1, i1 0
-  %nop8296 = alloca i1, i1 0
-  %nop8297 = alloca i1, i1 0
-  %nop8298 = alloca i1, i1 0
-  %nop8299 = alloca i1, i1 0
-  %nop8300 = alloca i1, i1 0
-  %nop8301 = alloca i1, i1 0
-  %nop8302 = alloca i1, i1 0
-  %nop8303 = alloca i1, i1 0
-  %nop8304 = alloca i1, i1 0
-  %nop8305 = alloca i1, i1 0
-  %nop8306 = alloca i1, i1 0
-  %nop8307 = alloca i1, i1 0
-  %nop8308 = alloca i1, i1 0
-  %nop8309 = alloca i1, i1 0
-  %nop8310 = alloca i1, i1 0
-  %nop8311 = alloca i1, i1 0
-  %nop8312 = alloca i1, i1 0
-  %nop8313 = alloca i1, i1 0
-  %nop8314 = alloca i1, i1 0
-  %nop8315 = alloca i1, i1 0
-  %nop8316 = alloca i1, i1 0
-  %nop8317 = alloca i1, i1 0
-  %nop8318 = alloca i1, i1 0
-  %nop8319 = alloca i1, i1 0
-  %nop8320 = alloca i1, i1 0
-  %nop8321 = alloca i1, i1 0
-  %nop8322 = alloca i1, i1 0
-  %nop8323 = alloca i1, i1 0
-  %nop8324 = alloca i1, i1 0
-  %nop8325 = alloca i1, i1 0
-  %nop8326 = alloca i1, i1 0
-  %nop8327 = alloca i1, i1 0
-  %nop8328 = alloca i1, i1 0
-  %nop8329 = alloca i1, i1 0
-  %nop8330 = alloca i1, i1 0
-  %nop8331 = alloca i1, i1 0
-  %nop8332 = alloca i1, i1 0
-  %nop8333 = alloca i1, i1 0
-  %nop8334 = alloca i1, i1 0
-  %nop8335 = alloca i1, i1 0
-  %nop8336 = alloca i1, i1 0
-  %nop8337 = alloca i1, i1 0
-  %nop8338 = alloca i1, i1 0
-  %nop8339 = alloca i1, i1 0
-  %nop8340 = alloca i1, i1 0
-  %nop8341 = alloca i1, i1 0
-  %nop8342 = alloca i1, i1 0
-  %nop8343 = alloca i1, i1 0
-  %nop8344 = alloca i1, i1 0
-  %nop8345 = alloca i1, i1 0
-  %nop8346 = alloca i1, i1 0
-  %nop8347 = alloca i1, i1 0
-  %nop8348 = alloca i1, i1 0
-  %nop8349 = alloca i1, i1 0
-  %nop8350 = alloca i1, i1 0
-  %nop8351 = alloca i1, i1 0
-  %nop8352 = alloca i1, i1 0
-  %nop8353 = alloca i1, i1 0
-  %nop8354 = alloca i1, i1 0
-  %nop8355 = alloca i1, i1 0
-  %nop8356 = alloca i1, i1 0
-  %nop8357 = alloca i1, i1 0
-  %nop8358 = alloca i1, i1 0
-  %nop8359 = alloca i1, i1 0
-  %nop8360 = alloca i1, i1 0
-  %nop8361 = alloca i1, i1 0
-  %nop8362 = alloca i1, i1 0
-  %nop8363 = alloca i1, i1 0
-  %nop8364 = alloca i1, i1 0
-  %nop8365 = alloca i1, i1 0
-  %nop8366 = alloca i1, i1 0
-  %nop8367 = alloca i1, i1 0
-  %nop8368 = alloca i1, i1 0
-  %nop8369 = alloca i1, i1 0
-  %nop8370 = alloca i1, i1 0
-  %nop8371 = alloca i1, i1 0
-  %nop8372 = alloca i1, i1 0
-  %nop8373 = alloca i1, i1 0
-  %nop8374 = alloca i1, i1 0
-  %nop8375 = alloca i1, i1 0
-  %nop8376 = alloca i1, i1 0
-  %nop8377 = alloca i1, i1 0
-  %nop8378 = alloca i1, i1 0
-  %nop8379 = alloca i1, i1 0
-  %nop8380 = alloca i1, i1 0
-  %nop8381 = alloca i1, i1 0
-  %nop8382 = alloca i1, i1 0
-  %nop8383 = alloca i1, i1 0
-  %nop8384 = alloca i1, i1 0
-  %nop8385 = alloca i1, i1 0
-  %nop8386 = alloca i1, i1 0
-  %nop8387 = alloca i1, i1 0
-  %nop8388 = alloca i1, i1 0
-  %nop8389 = alloca i1, i1 0
-  %nop8390 = alloca i1, i1 0
-  %nop8391 = alloca i1, i1 0
-  %nop8392 = alloca i1, i1 0
-  %nop8393 = alloca i1, i1 0
-  %nop8394 = alloca i1, i1 0
-  %nop8395 = alloca i1, i1 0
-  %nop8396 = alloca i1, i1 0
-  %nop8397 = alloca i1, i1 0
-  %nop8398 = alloca i1, i1 0
-  %nop8399 = alloca i1, i1 0
-  %nop8400 = alloca i1, i1 0
-  %nop8401 = alloca i1, i1 0
-  %nop8402 = alloca i1, i1 0
-  %nop8403 = alloca i1, i1 0
-  %nop8404 = alloca i1, i1 0
-  %nop8405 = alloca i1, i1 0
-  %nop8406 = alloca i1, i1 0
-  %nop8407 = alloca i1, i1 0
-  %nop8408 = alloca i1, i1 0
-  %nop8409 = alloca i1, i1 0
-  %nop8410 = alloca i1, i1 0
-  %nop8411 = alloca i1, i1 0
-  %nop8412 = alloca i1, i1 0
-  %nop8413 = alloca i1, i1 0
-  %nop8414 = alloca i1, i1 0
-  %nop8415 = alloca i1, i1 0
-  %nop8416 = alloca i1, i1 0
-  %nop8417 = alloca i1, i1 0
-  %nop8418 = alloca i1, i1 0
-  %nop8419 = alloca i1, i1 0
-  %nop8420 = alloca i1, i1 0
-  %nop8421 = alloca i1, i1 0
-  %nop8422 = alloca i1, i1 0
-  %nop8423 = alloca i1, i1 0
-  %nop8424 = alloca i1, i1 0
-  %nop8425 = alloca i1, i1 0
-  %nop8426 = alloca i1, i1 0
-  %nop8427 = alloca i1, i1 0
-  %nop8428 = alloca i1, i1 0
-  %nop8429 = alloca i1, i1 0
-  %nop8430 = alloca i1, i1 0
-  %nop8431 = alloca i1, i1 0
-  %nop8432 = alloca i1, i1 0
-  %nop8433 = alloca i1, i1 0
-  %nop8434 = alloca i1, i1 0
-  %nop8435 = alloca i1, i1 0
-  %nop8436 = alloca i1, i1 0
-  %nop8437 = alloca i1, i1 0
-  %nop8438 = alloca i1, i1 0
-  %nop8439 = alloca i1, i1 0
-  %nop8440 = alloca i1, i1 0
-  %nop8441 = alloca i1, i1 0
-  %nop8442 = alloca i1, i1 0
-  %nop8443 = alloca i1, i1 0
-  %nop8444 = alloca i1, i1 0
-  %nop8445 = alloca i1, i1 0
-  %nop8446 = alloca i1, i1 0
-  %nop8447 = alloca i1, i1 0
-  %nop8448 = alloca i1, i1 0
-  %nop8449 = alloca i1, i1 0
-  %nop8450 = alloca i1, i1 0
-  %nop8451 = alloca i1, i1 0
-  %nop8452 = alloca i1, i1 0
-  %nop8453 = alloca i1, i1 0
-  %nop8454 = alloca i1, i1 0
-  %nop8455 = alloca i1, i1 0
-  %nop8456 = alloca i1, i1 0
-  %nop8457 = alloca i1, i1 0
-  %nop8458 = alloca i1, i1 0
-  %nop8459 = alloca i1, i1 0
-  %nop8460 = alloca i1, i1 0
-  %nop8461 = alloca i1, i1 0
-  %nop8462 = alloca i1, i1 0
-  %nop8463 = alloca i1, i1 0
-  %nop8464 = alloca i1, i1 0
-  %nop8465 = alloca i1, i1 0
-  %nop8466 = alloca i1, i1 0
-  %nop8467 = alloca i1, i1 0
-  %nop8468 = alloca i1, i1 0
-  %nop8469 = alloca i1, i1 0
-  %nop8470 = alloca i1, i1 0
-  %nop8471 = alloca i1, i1 0
-  %nop8472 = alloca i1, i1 0
-  %nop8473 = alloca i1, i1 0
-  %nop8474 = alloca i1, i1 0
-  %nop8475 = alloca i1, i1 0
-  %nop8476 = alloca i1, i1 0
-  %nop8477 = alloca i1, i1 0
-  %nop8478 = alloca i1, i1 0
-  %nop8479 = alloca i1, i1 0
-  %nop8480 = alloca i1, i1 0
-  %nop8481 = alloca i1, i1 0
-  %nop8482 = alloca i1, i1 0
-  %nop8483 = alloca i1, i1 0
-  %nop8484 = alloca i1, i1 0
-  %nop8485 = alloca i1, i1 0
-  %nop8486 = alloca i1, i1 0
-  %nop8487 = alloca i1, i1 0
-  %nop8488 = alloca i1, i1 0
-  %nop8489 = alloca i1, i1 0
-  %nop8490 = alloca i1, i1 0
-  %nop8491 = alloca i1, i1 0
-  %nop8492 = alloca i1, i1 0
-  %nop8493 = alloca i1, i1 0
-  %nop8494 = alloca i1, i1 0
-  %nop8495 = alloca i1, i1 0
-  %nop8496 = alloca i1, i1 0
-  %nop8497 = alloca i1, i1 0
-  %nop8498 = alloca i1, i1 0
-  %nop8499 = alloca i1, i1 0
-  %nop8500 = alloca i1, i1 0
-  %nop8501 = alloca i1, i1 0
-  %nop8502 = alloca i1, i1 0
-  %nop8503 = alloca i1, i1 0
-  %nop8504 = alloca i1, i1 0
-  %nop8505 = alloca i1, i1 0
-  %nop8506 = alloca i1, i1 0
-  %nop8507 = alloca i1, i1 0
-  %nop8508 = alloca i1, i1 0
-  %nop8509 = alloca i1, i1 0
-  %nop8510 = alloca i1, i1 0
-  %nop8511 = alloca i1, i1 0
-  %nop8512 = alloca i1, i1 0
-  %nop8513 = alloca i1, i1 0
-  %nop8514 = alloca i1, i1 0
-  %nop8515 = alloca i1, i1 0
-  %nop8516 = alloca i1, i1 0
-  %nop8517 = alloca i1, i1 0
-  %nop8518 = alloca i1, i1 0
-  %nop8519 = alloca i1, i1 0
-  %nop8520 = alloca i1, i1 0
-  %nop8521 = alloca i1, i1 0
-  %nop8522 = alloca i1, i1 0
-  %nop8523 = alloca i1, i1 0
-  %nop8524 = alloca i1, i1 0
-  %nop8525 = alloca i1, i1 0
-  %nop8526 = alloca i1, i1 0
-  %nop8527 = alloca i1, i1 0
-  %nop8528 = alloca i1, i1 0
-  %nop8529 = alloca i1, i1 0
-  %nop8530 = alloca i1, i1 0
-  %nop8531 = alloca i1, i1 0
-  %nop8532 = alloca i1, i1 0
-  %nop8533 = alloca i1, i1 0
-  %nop8534 = alloca i1, i1 0
-  %nop8535 = alloca i1, i1 0
-  %nop8536 = alloca i1, i1 0
-  %nop8537 = alloca i1, i1 0
-  %nop8538 = alloca i1, i1 0
-  %nop8539 = alloca i1, i1 0
-  %nop8540 = alloca i1, i1 0
-  %nop8541 = alloca i1, i1 0
-  %nop8542 = alloca i1, i1 0
-  %nop8543 = alloca i1, i1 0
-  %nop8544 = alloca i1, i1 0
-  %nop8545 = alloca i1, i1 0
-  %nop8546 = alloca i1, i1 0
-  %nop8547 = alloca i1, i1 0
-  %nop8548 = alloca i1, i1 0
-  %nop8549 = alloca i1, i1 0
-  %nop8550 = alloca i1, i1 0
-  %nop8551 = alloca i1, i1 0
-  %nop8552 = alloca i1, i1 0
-  %nop8553 = alloca i1, i1 0
-  %nop8554 = alloca i1, i1 0
-  %nop8555 = alloca i1, i1 0
-  %nop8556 = alloca i1, i1 0
-  %nop8557 = alloca i1, i1 0
-  %nop8558 = alloca i1, i1 0
-  %nop8559 = alloca i1, i1 0
-  %nop8560 = alloca i1, i1 0
-  %nop8561 = alloca i1, i1 0
-  %nop8562 = alloca i1, i1 0
-  %nop8563 = alloca i1, i1 0
-  %nop8564 = alloca i1, i1 0
-  %nop8565 = alloca i1, i1 0
-  %nop8566 = alloca i1, i1 0
-  %nop8567 = alloca i1, i1 0
-  %nop8568 = alloca i1, i1 0
-  %nop8569 = alloca i1, i1 0
-  %nop8570 = alloca i1, i1 0
-  %nop8571 = alloca i1, i1 0
-  %nop8572 = alloca i1, i1 0
-  %nop8573 = alloca i1, i1 0
-  %nop8574 = alloca i1, i1 0
-  %nop8575 = alloca i1, i1 0
-  %nop8576 = alloca i1, i1 0
-  %nop8577 = alloca i1, i1 0
-  %nop8578 = alloca i1, i1 0
-  %nop8579 = alloca i1, i1 0
-  %nop8580 = alloca i1, i1 0
-  %nop8581 = alloca i1, i1 0
-  %nop8582 = alloca i1, i1 0
-  %nop8583 = alloca i1, i1 0
-  %nop8584 = alloca i1, i1 0
-  %nop8585 = alloca i1, i1 0
-  %nop8586 = alloca i1, i1 0
-  %nop8587 = alloca i1, i1 0
-  %nop8588 = alloca i1, i1 0
-  %nop8589 = alloca i1, i1 0
-  %nop8590 = alloca i1, i1 0
-  %nop8591 = alloca i1, i1 0
-  %nop8592 = alloca i1, i1 0
-  %nop8593 = alloca i1, i1 0
-  %nop8594 = alloca i1, i1 0
-  %nop8595 = alloca i1, i1 0
-  %nop8596 = alloca i1, i1 0
-  %nop8597 = alloca i1, i1 0
-  %nop8598 = alloca i1, i1 0
-  %nop8599 = alloca i1, i1 0
-  %nop8600 = alloca i1, i1 0
-  %nop8601 = alloca i1, i1 0
-  %nop8602 = alloca i1, i1 0
-  %nop8603 = alloca i1, i1 0
-  %nop8604 = alloca i1, i1 0
-  %nop8605 = alloca i1, i1 0
-  %nop8606 = alloca i1, i1 0
-  %nop8607 = alloca i1, i1 0
-  %nop8608 = alloca i1, i1 0
-  %nop8609 = alloca i1, i1 0
-  %nop8610 = alloca i1, i1 0
-  %nop8611 = alloca i1, i1 0
-  %nop8612 = alloca i1, i1 0
-  %nop8613 = alloca i1, i1 0
-  %nop8614 = alloca i1, i1 0
-  %nop8615 = alloca i1, i1 0
-  %nop8616 = alloca i1, i1 0
-  %nop8617 = alloca i1, i1 0
-  %nop8618 = alloca i1, i1 0
-  %nop8619 = alloca i1, i1 0
-  %nop8620 = alloca i1, i1 0
-  %nop8621 = alloca i1, i1 0
-  %nop8622 = alloca i1, i1 0
-  %nop8623 = alloca i1, i1 0
-  %nop8624 = alloca i1, i1 0
-  %nop8625 = alloca i1, i1 0
-  %nop8626 = alloca i1, i1 0
-  %nop8627 = alloca i1, i1 0
-  %nop8628 = alloca i1, i1 0
-  %nop8629 = alloca i1, i1 0
-  %nop8630 = alloca i1, i1 0
-  %nop8631 = alloca i1, i1 0
-  %nop8632 = alloca i1, i1 0
-  %nop8633 = alloca i1, i1 0
-  %nop8634 = alloca i1, i1 0
-  %nop8635 = alloca i1, i1 0
-  %nop8636 = alloca i1, i1 0
-  %nop8637 = alloca i1, i1 0
-  %nop8638 = alloca i1, i1 0
-  %nop8639 = alloca i1, i1 0
-  %nop8640 = alloca i1, i1 0
-  %nop8641 = alloca i1, i1 0
-  %nop8642 = alloca i1, i1 0
-  %nop8643 = alloca i1, i1 0
-  %nop8644 = alloca i1, i1 0
-  %nop8645 = alloca i1, i1 0
-  %nop8646 = alloca i1, i1 0
-  %nop8647 = alloca i1, i1 0
-  %nop8648 = alloca i1, i1 0
-  %nop8649 = alloca i1, i1 0
-  %nop8650 = alloca i1, i1 0
-  %nop8651 = alloca i1, i1 0
-  %nop8652 = alloca i1, i1 0
-  %nop8653 = alloca i1, i1 0
-  %nop8654 = alloca i1, i1 0
-  %nop8655 = alloca i1, i1 0
-  %nop8656 = alloca i1, i1 0
-  %nop8657 = alloca i1, i1 0
-  %nop8658 = alloca i1, i1 0
-  %nop8659 = alloca i1, i1 0
-  %nop8660 = alloca i1, i1 0
-  %nop8661 = alloca i1, i1 0
-  %nop8662 = alloca i1, i1 0
-  %nop8663 = alloca i1, i1 0
-  %nop8664 = alloca i1, i1 0
-  %nop8665 = alloca i1, i1 0
-  %nop8666 = alloca i1, i1 0
-  %nop8667 = alloca i1, i1 0
-  %nop8668 = alloca i1, i1 0
-  %nop8669 = alloca i1, i1 0
-  %nop8670 = alloca i1, i1 0
-  %nop8671 = alloca i1, i1 0
-  %nop8672 = alloca i1, i1 0
-  %nop8673 = alloca i1, i1 0
-  %nop8674 = alloca i1, i1 0
-  %nop8675 = alloca i1, i1 0
-  %nop8676 = alloca i1, i1 0
-  %nop8677 = alloca i1, i1 0
-  %nop8678 = alloca i1, i1 0
-  %nop8679 = alloca i1, i1 0
-  %nop8680 = alloca i1, i1 0
-  %nop8681 = alloca i1, i1 0
-  %nop8682 = alloca i1, i1 0
-  %nop8683 = alloca i1, i1 0
-  %nop8684 = alloca i1, i1 0
-  %nop8685 = alloca i1, i1 0
-  %nop8686 = alloca i1, i1 0
-  %nop8687 = alloca i1, i1 0
-  %nop8688 = alloca i1, i1 0
-  %nop8689 = alloca i1, i1 0
-  %nop8690 = alloca i1, i1 0
-  %nop8691 = alloca i1, i1 0
-  %nop8692 = alloca i1, i1 0
-  %nop8693 = alloca i1, i1 0
-  %nop8694 = alloca i1, i1 0
-  %nop8695 = alloca i1, i1 0
-  %nop8696 = alloca i1, i1 0
-  %nop8697 = alloca i1, i1 0
-  %nop8698 = alloca i1, i1 0
-  %nop8699 = alloca i1, i1 0
-  %nop8700 = alloca i1, i1 0
-  %nop8701 = alloca i1, i1 0
-  %nop8702 = alloca i1, i1 0
-  %nop8703 = alloca i1, i1 0
-  %nop8704 = alloca i1, i1 0
-  %nop8705 = alloca i1, i1 0
-  %nop8706 = alloca i1, i1 0
-  %nop8707 = alloca i1, i1 0
-  %nop8708 = alloca i1, i1 0
-  %nop8709 = alloca i1, i1 0
-  %nop8710 = alloca i1, i1 0
-  %nop8711 = alloca i1, i1 0
-  %nop8712 = alloca i1, i1 0
-  %nop8713 = alloca i1, i1 0
-  %nop8714 = alloca i1, i1 0
-  %nop8715 = alloca i1, i1 0
-  %nop8716 = alloca i1, i1 0
-  %nop8717 = alloca i1, i1 0
-  %nop8718 = alloca i1, i1 0
-  %nop8719 = alloca i1, i1 0
-  %nop8720 = alloca i1, i1 0
-  %nop8721 = alloca i1, i1 0
-  %nop8722 = alloca i1, i1 0
-  %nop8723 = alloca i1, i1 0
-  %nop8724 = alloca i1, i1 0
-  %nop8725 = alloca i1, i1 0
-  %nop8726 = alloca i1, i1 0
-  %nop8727 = alloca i1, i1 0
-  %nop8728 = alloca i1, i1 0
-  %nop8729 = alloca i1, i1 0
-  %nop8730 = alloca i1, i1 0
-  %nop8731 = alloca i1, i1 0
-  %nop8732 = alloca i1, i1 0
-  %nop8733 = alloca i1, i1 0
-  %nop8734 = alloca i1, i1 0
-  %nop8735 = alloca i1, i1 0
-  %nop8736 = alloca i1, i1 0
-  %nop8737 = alloca i1, i1 0
-  %nop8738 = alloca i1, i1 0
-  %nop8739 = alloca i1, i1 0
-  %nop8740 = alloca i1, i1 0
-  %nop8741 = alloca i1, i1 0
-  %nop8742 = alloca i1, i1 0
-  %nop8743 = alloca i1, i1 0
-  %nop8744 = alloca i1, i1 0
-  %nop8745 = alloca i1, i1 0
-  %nop8746 = alloca i1, i1 0
-  %nop8747 = alloca i1, i1 0
-  %nop8748 = alloca i1, i1 0
-  %nop8749 = alloca i1, i1 0
-  %nop8750 = alloca i1, i1 0
-  %nop8751 = alloca i1, i1 0
-  %nop8752 = alloca i1, i1 0
-  %nop8753 = alloca i1, i1 0
-  %nop8754 = alloca i1, i1 0
-  %nop8755 = alloca i1, i1 0
-  %nop8756 = alloca i1, i1 0
-  %nop8757 = alloca i1, i1 0
-  %nop8758 = alloca i1, i1 0
-  %nop8759 = alloca i1, i1 0
-  %nop8760 = alloca i1, i1 0
-  %nop8761 = alloca i1, i1 0
-  %nop8762 = alloca i1, i1 0
-  %nop8763 = alloca i1, i1 0
-  %nop8764 = alloca i1, i1 0
-  %nop8765 = alloca i1, i1 0
-  %nop8766 = alloca i1, i1 0
-  %nop8767 = alloca i1, i1 0
-  %nop8768 = alloca i1, i1 0
-  %nop8769 = alloca i1, i1 0
-  %nop8770 = alloca i1, i1 0
-  %nop8771 = alloca i1, i1 0
-  %nop8772 = alloca i1, i1 0
-  %nop8773 = alloca i1, i1 0
-  %nop8774 = alloca i1, i1 0
-  %nop8775 = alloca i1, i1 0
-  %nop8776 = alloca i1, i1 0
-  %nop8777 = alloca i1, i1 0
-  %nop8778 = alloca i1, i1 0
-  %nop8779 = alloca i1, i1 0
-  %nop8780 = alloca i1, i1 0
-  %nop8781 = alloca i1, i1 0
-  %nop8782 = alloca i1, i1 0
-  %nop8783 = alloca i1, i1 0
-  %nop8784 = alloca i1, i1 0
-  %nop8785 = alloca i1, i1 0
-  %nop8786 = alloca i1, i1 0
-  %nop8787 = alloca i1, i1 0
-  %nop8788 = alloca i1, i1 0
-  %nop8789 = alloca i1, i1 0
-  %nop8790 = alloca i1, i1 0
-  %nop8791 = alloca i1, i1 0
-  %nop8792 = alloca i1, i1 0
-  %nop8793 = alloca i1, i1 0
-  %nop8794 = alloca i1, i1 0
-  %nop8795 = alloca i1, i1 0
-  %nop8796 = alloca i1, i1 0
-  %nop8797 = alloca i1, i1 0
-  %nop8798 = alloca i1, i1 0
-  %nop8799 = alloca i1, i1 0
-  %nop8800 = alloca i1, i1 0
-  %nop8801 = alloca i1, i1 0
-  %nop8802 = alloca i1, i1 0
-  %nop8803 = alloca i1, i1 0
-  %nop8804 = alloca i1, i1 0
-  %nop8805 = alloca i1, i1 0
-  %nop8806 = alloca i1, i1 0
-  %nop8807 = alloca i1, i1 0
-  %nop8808 = alloca i1, i1 0
-  %nop8809 = alloca i1, i1 0
-  %nop8810 = alloca i1, i1 0
-  %nop8811 = alloca i1, i1 0
-  %nop8812 = alloca i1, i1 0
-  %nop8813 = alloca i1, i1 0
-  %nop8814 = alloca i1, i1 0
-  %nop8815 = alloca i1, i1 0
-  %nop8816 = alloca i1, i1 0
-  %nop8817 = alloca i1, i1 0
-  %nop8818 = alloca i1, i1 0
-  %nop8819 = alloca i1, i1 0
-  %nop8820 = alloca i1, i1 0
-  %nop8821 = alloca i1, i1 0
-  %nop8822 = alloca i1, i1 0
-  %nop8823 = alloca i1, i1 0
-  %nop8824 = alloca i1, i1 0
-  %nop8825 = alloca i1, i1 0
-  %nop8826 = alloca i1, i1 0
-  %nop8827 = alloca i1, i1 0
-  %nop8828 = alloca i1, i1 0
-  %nop8829 = alloca i1, i1 0
-  %nop8830 = alloca i1, i1 0
-  %nop8831 = alloca i1, i1 0
-  %nop8832 = alloca i1, i1 0
-  %nop8833 = alloca i1, i1 0
-  %nop8834 = alloca i1, i1 0
-  %nop8835 = alloca i1, i1 0
-  %nop8836 = alloca i1, i1 0
-  %nop8837 = alloca i1, i1 0
-  %nop8838 = alloca i1, i1 0
-  %nop8839 = alloca i1, i1 0
-  %nop8840 = alloca i1, i1 0
-  %nop8841 = alloca i1, i1 0
-  %nop8842 = alloca i1, i1 0
-  %nop8843 = alloca i1, i1 0
-  %nop8844 = alloca i1, i1 0
-  %nop8845 = alloca i1, i1 0
-  %nop8846 = alloca i1, i1 0
-  %nop8847 = alloca i1, i1 0
-  %nop8848 = alloca i1, i1 0
-  %nop8849 = alloca i1, i1 0
-  %nop8850 = alloca i1, i1 0
-  %nop8851 = alloca i1, i1 0
-  %nop8852 = alloca i1, i1 0
-  %nop8853 = alloca i1, i1 0
-  %nop8854 = alloca i1, i1 0
-  %nop8855 = alloca i1, i1 0
-  %nop8856 = alloca i1, i1 0
-  %nop8857 = alloca i1, i1 0
-  %nop8858 = alloca i1, i1 0
-  %nop8859 = alloca i1, i1 0
-  %nop8860 = alloca i1, i1 0
-  %nop8861 = alloca i1, i1 0
-  %nop8862 = alloca i1, i1 0
-  %nop8863 = alloca i1, i1 0
-  %nop8864 = alloca i1, i1 0
-  %nop8865 = alloca i1, i1 0
-  %nop8866 = alloca i1, i1 0
-  %nop8867 = alloca i1, i1 0
-  %nop8868 = alloca i1, i1 0
-  %nop8869 = alloca i1, i1 0
-  %nop8870 = alloca i1, i1 0
-  %nop8871 = alloca i1, i1 0
-  %nop8872 = alloca i1, i1 0
-  %nop8873 = alloca i1, i1 0
-  %nop8874 = alloca i1, i1 0
-  %nop8875 = alloca i1, i1 0
-  %nop8876 = alloca i1, i1 0
-  %nop8877 = alloca i1, i1 0
-  %nop8878 = alloca i1, i1 0
-  %nop8879 = alloca i1, i1 0
-  %nop8880 = alloca i1, i1 0
-  %nop8881 = alloca i1, i1 0
-  %nop8882 = alloca i1, i1 0
-  %nop8883 = alloca i1, i1 0
-  %nop8884 = alloca i1, i1 0
-  %nop8885 = alloca i1, i1 0
-  %nop8886 = alloca i1, i1 0
-  %nop8887 = alloca i1, i1 0
-  %nop8888 = alloca i1, i1 0
-  %nop8889 = alloca i1, i1 0
-  %nop8890 = alloca i1, i1 0
-  %nop8891 = alloca i1, i1 0
-  %nop8892 = alloca i1, i1 0
-  %nop8893 = alloca i1, i1 0
-  %nop8894 = alloca i1, i1 0
-  %nop8895 = alloca i1, i1 0
-  %nop8896 = alloca i1, i1 0
-  %nop8897 = alloca i1, i1 0
-  %nop8898 = alloca i1, i1 0
-  %nop8899 = alloca i1, i1 0
-  %nop8900 = alloca i1, i1 0
-  %nop8901 = alloca i1, i1 0
-  %nop8902 = alloca i1, i1 0
-  %nop8903 = alloca i1, i1 0
-  %nop8904 = alloca i1, i1 0
-  %nop8905 = alloca i1, i1 0
-  %nop8906 = alloca i1, i1 0
-  %nop8907 = alloca i1, i1 0
-  %nop8908 = alloca i1, i1 0
-  %nop8909 = alloca i1, i1 0
-  %nop8910 = alloca i1, i1 0
-  %nop8911 = alloca i1, i1 0
-  %nop8912 = alloca i1, i1 0
-  %nop8913 = alloca i1, i1 0
-  %nop8914 = alloca i1, i1 0
-  %nop8915 = alloca i1, i1 0
-  %nop8916 = alloca i1, i1 0
-  %nop8917 = alloca i1, i1 0
-  %nop8918 = alloca i1, i1 0
-  %nop8919 = alloca i1, i1 0
-  %nop8920 = alloca i1, i1 0
-  %nop8921 = alloca i1, i1 0
-  %nop8922 = alloca i1, i1 0
-  %nop8923 = alloca i1, i1 0
-  %nop8924 = alloca i1, i1 0
-  %nop8925 = alloca i1, i1 0
-  %nop8926 = alloca i1, i1 0
-  %nop8927 = alloca i1, i1 0
-  %nop8928 = alloca i1, i1 0
-  %nop8929 = alloca i1, i1 0
-  %nop8930 = alloca i1, i1 0
-  %nop8931 = alloca i1, i1 0
-  %nop8932 = alloca i1, i1 0
-  %nop8933 = alloca i1, i1 0
-  %nop8934 = alloca i1, i1 0
-  %nop8935 = alloca i1, i1 0
-  %nop8936 = alloca i1, i1 0
-  %nop8937 = alloca i1, i1 0
-  %nop8938 = alloca i1, i1 0
-  %nop8939 = alloca i1, i1 0
-  %nop8940 = alloca i1, i1 0
-  %nop8941 = alloca i1, i1 0
-  %nop8942 = alloca i1, i1 0
-  %nop8943 = alloca i1, i1 0
-  %nop8944 = alloca i1, i1 0
-  %nop8945 = alloca i1, i1 0
-  %nop8946 = alloca i1, i1 0
-  %nop8947 = alloca i1, i1 0
-  %nop8948 = alloca i1, i1 0
-  %nop8949 = alloca i1, i1 0
-  %nop8950 = alloca i1, i1 0
-  %nop8951 = alloca i1, i1 0
-  %nop8952 = alloca i1, i1 0
-  %nop8953 = alloca i1, i1 0
-  %nop8954 = alloca i1, i1 0
-  %nop8955 = alloca i1, i1 0
-  %nop8956 = alloca i1, i1 0
-  %nop8957 = alloca i1, i1 0
-  %nop8958 = alloca i1, i1 0
-  %nop8959 = alloca i1, i1 0
-  %nop8960 = alloca i1, i1 0
-  %nop8961 = alloca i1, i1 0
-  %nop8962 = alloca i1, i1 0
-  %nop8963 = alloca i1, i1 0
-  %nop8964 = alloca i1, i1 0
-  %nop8965 = alloca i1, i1 0
-  %nop8966 = alloca i1, i1 0
-  %nop8967 = alloca i1, i1 0
-  %nop8968 = alloca i1, i1 0
-  %nop8969 = alloca i1, i1 0
-  %nop8970 = alloca i1, i1 0
-  %nop8971 = alloca i1, i1 0
-  %nop8972 = alloca i1, i1 0
-  %nop8973 = alloca i1, i1 0
-  %nop8974 = alloca i1, i1 0
-  %nop8975 = alloca i1, i1 0
-  %nop8976 = alloca i1, i1 0
-  %nop8977 = alloca i1, i1 0
-  %nop8978 = alloca i1, i1 0
-  %nop8979 = alloca i1, i1 0
-  %nop8980 = alloca i1, i1 0
-  %nop8981 = alloca i1, i1 0
-  %nop8982 = alloca i1, i1 0
-  %nop8983 = alloca i1, i1 0
-  %nop8984 = alloca i1, i1 0
-  %nop8985 = alloca i1, i1 0
-  %nop8986 = alloca i1, i1 0
-  %nop8987 = alloca i1, i1 0
-  %nop8988 = alloca i1, i1 0
-  %nop8989 = alloca i1, i1 0
-  %nop8990 = alloca i1, i1 0
-  %nop8991 = alloca i1, i1 0
-  %nop8992 = alloca i1, i1 0
-  %nop8993 = alloca i1, i1 0
-  %nop8994 = alloca i1, i1 0
-  %nop8995 = alloca i1, i1 0
-  %nop8996 = alloca i1, i1 0
-  %nop8997 = alloca i1, i1 0
-  %nop8998 = alloca i1, i1 0
-  %nop8999 = alloca i1, i1 0
-  %nop9000 = alloca i1, i1 0
-  %nop9001 = alloca i1, i1 0
-  %nop9002 = alloca i1, i1 0
-  %nop9003 = alloca i1, i1 0
-  %nop9004 = alloca i1, i1 0
-  %nop9005 = alloca i1, i1 0
-  %nop9006 = alloca i1, i1 0
-  %nop9007 = alloca i1, i1 0
-  %nop9008 = alloca i1, i1 0
-  %nop9009 = alloca i1, i1 0
-  %nop9010 = alloca i1, i1 0
-  %nop9011 = alloca i1, i1 0
-  %nop9012 = alloca i1, i1 0
-  %nop9013 = alloca i1, i1 0
-  %nop9014 = alloca i1, i1 0
-  %nop9015 = alloca i1, i1 0
-  %nop9016 = alloca i1, i1 0
-  %nop9017 = alloca i1, i1 0
-  %nop9018 = alloca i1, i1 0
-  %nop9019 = alloca i1, i1 0
-  %nop9020 = alloca i1, i1 0
-  %nop9021 = alloca i1, i1 0
-  %nop9022 = alloca i1, i1 0
-  %nop9023 = alloca i1, i1 0
-  %nop9024 = alloca i1, i1 0
-  %nop9025 = alloca i1, i1 0
-  %nop9026 = alloca i1, i1 0
-  %nop9027 = alloca i1, i1 0
-  %nop9028 = alloca i1, i1 0
-  %nop9029 = alloca i1, i1 0
-  %nop9030 = alloca i1, i1 0
-  %nop9031 = alloca i1, i1 0
-  %nop9032 = alloca i1, i1 0
-  %nop9033 = alloca i1, i1 0
-  %nop9034 = alloca i1, i1 0
-  %nop9035 = alloca i1, i1 0
-  %nop9036 = alloca i1, i1 0
-  %nop9037 = alloca i1, i1 0
-  %nop9038 = alloca i1, i1 0
-  %nop9039 = alloca i1, i1 0
-  %nop9040 = alloca i1, i1 0
-  %nop9041 = alloca i1, i1 0
-  %nop9042 = alloca i1, i1 0
-  %nop9043 = alloca i1, i1 0
-  %nop9044 = alloca i1, i1 0
-  %nop9045 = alloca i1, i1 0
-  %nop9046 = alloca i1, i1 0
-  %nop9047 = alloca i1, i1 0
-  %nop9048 = alloca i1, i1 0
-  %nop9049 = alloca i1, i1 0
-  %nop9050 = alloca i1, i1 0
-  %nop9051 = alloca i1, i1 0
-  %nop9052 = alloca i1, i1 0
-  %nop9053 = alloca i1, i1 0
-  %nop9054 = alloca i1, i1 0
-  %nop9055 = alloca i1, i1 0
-  %nop9056 = alloca i1, i1 0
-  %nop9057 = alloca i1, i1 0
-  %nop9058 = alloca i1, i1 0
-  %nop9059 = alloca i1, i1 0
-  %nop9060 = alloca i1, i1 0
-  %nop9061 = alloca i1, i1 0
-  %nop9062 = alloca i1, i1 0
-  %nop9063 = alloca i1, i1 0
-  %nop9064 = alloca i1, i1 0
-  %nop9065 = alloca i1, i1 0
-  %nop9066 = alloca i1, i1 0
-  %nop9067 = alloca i1, i1 0
-  %nop9068 = alloca i1, i1 0
-  %nop9069 = alloca i1, i1 0
-  %nop9070 = alloca i1, i1 0
-  %nop9071 = alloca i1, i1 0
-  %nop9072 = alloca i1, i1 0
-  %nop9073 = alloca i1, i1 0
-  %nop9074 = alloca i1, i1 0
-  %nop9075 = alloca i1, i1 0
-  %nop9076 = alloca i1, i1 0
-  %nop9077 = alloca i1, i1 0
-  %nop9078 = alloca i1, i1 0
-  %nop9079 = alloca i1, i1 0
-  %nop9080 = alloca i1, i1 0
-  %nop9081 = alloca i1, i1 0
-  %nop9082 = alloca i1, i1 0
-  %nop9083 = alloca i1, i1 0
-  %nop9084 = alloca i1, i1 0
-  %nop9085 = alloca i1, i1 0
-  %nop9086 = alloca i1, i1 0
-  %nop9087 = alloca i1, i1 0
-  %nop9088 = alloca i1, i1 0
-  %nop9089 = alloca i1, i1 0
-  %nop9090 = alloca i1, i1 0
-  %nop9091 = alloca i1, i1 0
-  %nop9092 = alloca i1, i1 0
-  %nop9093 = alloca i1, i1 0
-  %nop9094 = alloca i1, i1 0
-  %nop9095 = alloca i1, i1 0
-  %nop9096 = alloca i1, i1 0
-  %nop9097 = alloca i1, i1 0
-  %nop9098 = alloca i1, i1 0
-  %nop9099 = alloca i1, i1 0
-  %nop9100 = alloca i1, i1 0
-  %nop9101 = alloca i1, i1 0
-  %nop9102 = alloca i1, i1 0
-  %nop9103 = alloca i1, i1 0
-  %nop9104 = alloca i1, i1 0
-  %nop9105 = alloca i1, i1 0
-  %nop9106 = alloca i1, i1 0
-  %nop9107 = alloca i1, i1 0
-  %nop9108 = alloca i1, i1 0
-  %nop9109 = alloca i1, i1 0
-  %nop9110 = alloca i1, i1 0
-  %nop9111 = alloca i1, i1 0
-  %nop9112 = alloca i1, i1 0
-  %nop9113 = alloca i1, i1 0
-  %nop9114 = alloca i1, i1 0
-  %nop9115 = alloca i1, i1 0
-  %nop9116 = alloca i1, i1 0
-  %nop9117 = alloca i1, i1 0
-  %nop9118 = alloca i1, i1 0
-  %nop9119 = alloca i1, i1 0
-  %nop9120 = alloca i1, i1 0
-  %nop9121 = alloca i1, i1 0
-  %nop9122 = alloca i1, i1 0
-  %nop9123 = alloca i1, i1 0
-  %nop9124 = alloca i1, i1 0
-  %nop9125 = alloca i1, i1 0
-  %nop9126 = alloca i1, i1 0
-  %nop9127 = alloca i1, i1 0
-  %nop9128 = alloca i1, i1 0
-  %nop9129 = alloca i1, i1 0
-  %nop9130 = alloca i1, i1 0
-  %nop9131 = alloca i1, i1 0
-  %nop9132 = alloca i1, i1 0
-  %nop9133 = alloca i1, i1 0
-  %nop9134 = alloca i1, i1 0
-  %nop9135 = alloca i1, i1 0
-  %nop9136 = alloca i1, i1 0
-  %nop9137 = alloca i1, i1 0
-  %nop9138 = alloca i1, i1 0
-  %nop9139 = alloca i1, i1 0
-  %nop9140 = alloca i1, i1 0
-  %nop9141 = alloca i1, i1 0
-  %nop9142 = alloca i1, i1 0
-  %nop9143 = alloca i1, i1 0
-  %nop9144 = alloca i1, i1 0
-  %nop9145 = alloca i1, i1 0
-  %nop9146 = alloca i1, i1 0
-  %nop9147 = alloca i1, i1 0
-  %nop9148 = alloca i1, i1 0
-  %nop9149 = alloca i1, i1 0
-  %nop9150 = alloca i1, i1 0
-  %nop9151 = alloca i1, i1 0
-  %nop9152 = alloca i1, i1 0
-  %nop9153 = alloca i1, i1 0
-  %nop9154 = alloca i1, i1 0
-  %nop9155 = alloca i1, i1 0
-  %nop9156 = alloca i1, i1 0
-  %nop9157 = alloca i1, i1 0
-  %nop9158 = alloca i1, i1 0
-  %nop9159 = alloca i1, i1 0
-  %nop9160 = alloca i1, i1 0
-  %nop9161 = alloca i1, i1 0
-  %nop9162 = alloca i1, i1 0
-  %nop9163 = alloca i1, i1 0
-  %nop9164 = alloca i1, i1 0
-  %nop9165 = alloca i1, i1 0
-  %nop9166 = alloca i1, i1 0
-  %nop9167 = alloca i1, i1 0
-  %nop9168 = alloca i1, i1 0
-  %nop9169 = alloca i1, i1 0
-  %nop9170 = alloca i1, i1 0
-  %nop9171 = alloca i1, i1 0
-  %nop9172 = alloca i1, i1 0
-  %nop9173 = alloca i1, i1 0
-  %nop9174 = alloca i1, i1 0
-  %nop9175 = alloca i1, i1 0
-  %nop9176 = alloca i1, i1 0
-  %nop9177 = alloca i1, i1 0
-  %nop9178 = alloca i1, i1 0
-  %nop9179 = alloca i1, i1 0
-  %nop9180 = alloca i1, i1 0
-  %nop9181 = alloca i1, i1 0
-  %nop9182 = alloca i1, i1 0
-  %nop9183 = alloca i1, i1 0
-  %nop9184 = alloca i1, i1 0
-  %nop9185 = alloca i1, i1 0
-  %nop9186 = alloca i1, i1 0
-  %nop9187 = alloca i1, i1 0
-  %nop9188 = alloca i1, i1 0
-  %nop9189 = alloca i1, i1 0
-  %nop9190 = alloca i1, i1 0
-  %nop9191 = alloca i1, i1 0
-  %nop9192 = alloca i1, i1 0
-  %nop9193 = alloca i1, i1 0
-  %nop9194 = alloca i1, i1 0
-  %nop9195 = alloca i1, i1 0
-  %nop9196 = alloca i1, i1 0
-  %nop9197 = alloca i1, i1 0
-  %nop9198 = alloca i1, i1 0
-  %nop9199 = alloca i1, i1 0
-  %nop9200 = alloca i1, i1 0
-  %nop9201 = alloca i1, i1 0
-  %nop9202 = alloca i1, i1 0
-  %nop9203 = alloca i1, i1 0
-  %nop9204 = alloca i1, i1 0
-  %nop9205 = alloca i1, i1 0
-  %nop9206 = alloca i1, i1 0
-  %nop9207 = alloca i1, i1 0
-  %nop9208 = alloca i1, i1 0
-  %nop9209 = alloca i1, i1 0
-  %nop9210 = alloca i1, i1 0
-  %nop9211 = alloca i1, i1 0
-  %nop9212 = alloca i1, i1 0
-  %nop9213 = alloca i1, i1 0
-  %nop9214 = alloca i1, i1 0
-  %nop9215 = alloca i1, i1 0
-  %nop9216 = alloca i1, i1 0
-  %nop9217 = alloca i1, i1 0
-  %nop9218 = alloca i1, i1 0
-  %nop9219 = alloca i1, i1 0
-  %nop9220 = alloca i1, i1 0
-  %nop9221 = alloca i1, i1 0
-  %nop9222 = alloca i1, i1 0
-  %nop9223 = alloca i1, i1 0
-  %nop9224 = alloca i1, i1 0
-  %nop9225 = alloca i1, i1 0
-  %nop9226 = alloca i1, i1 0
-  %nop9227 = alloca i1, i1 0
-  %nop9228 = alloca i1, i1 0
-  %nop9229 = alloca i1, i1 0
-  %nop9230 = alloca i1, i1 0
-  %nop9231 = alloca i1, i1 0
-  %nop9232 = alloca i1, i1 0
-  %nop9233 = alloca i1, i1 0
-  %nop9234 = alloca i1, i1 0
-  %nop9235 = alloca i1, i1 0
-  %nop9236 = alloca i1, i1 0
-  %nop9237 = alloca i1, i1 0
-  %nop9238 = alloca i1, i1 0
-  %nop9239 = alloca i1, i1 0
-  %nop9240 = alloca i1, i1 0
-  %nop9241 = alloca i1, i1 0
-  %nop9242 = alloca i1, i1 0
-  %nop9243 = alloca i1, i1 0
-  %nop9244 = alloca i1, i1 0
-  %nop9245 = alloca i1, i1 0
-  %nop9246 = alloca i1, i1 0
-  %nop9247 = alloca i1, i1 0
-  %nop9248 = alloca i1, i1 0
-  %nop9249 = alloca i1, i1 0
-  %nop9250 = alloca i1, i1 0
-  %nop9251 = alloca i1, i1 0
-  %nop9252 = alloca i1, i1 0
-  %nop9253 = alloca i1, i1 0
-  %nop9254 = alloca i1, i1 0
-  %nop9255 = alloca i1, i1 0
-  %nop9256 = alloca i1, i1 0
-  %nop9257 = alloca i1, i1 0
-  %nop9258 = alloca i1, i1 0
-  %nop9259 = alloca i1, i1 0
-  %nop9260 = alloca i1, i1 0
-  %nop9261 = alloca i1, i1 0
-  %nop9262 = alloca i1, i1 0
-  %nop9263 = alloca i1, i1 0
-  %nop9264 = alloca i1, i1 0
-  %nop9265 = alloca i1, i1 0
-  %nop9266 = alloca i1, i1 0
-  %nop9267 = alloca i1, i1 0
-  %nop9268 = alloca i1, i1 0
-  %nop9269 = alloca i1, i1 0
-  %nop9270 = alloca i1, i1 0
-  %nop9271 = alloca i1, i1 0
-  %nop9272 = alloca i1, i1 0
-  %nop9273 = alloca i1, i1 0
-  %nop9274 = alloca i1, i1 0
-  %nop9275 = alloca i1, i1 0
-  %nop9276 = alloca i1, i1 0
-  %nop9277 = alloca i1, i1 0
-  %nop9278 = alloca i1, i1 0
-  %nop9279 = alloca i1, i1 0
-  %nop9280 = alloca i1, i1 0
-  %nop9281 = alloca i1, i1 0
-  %nop9282 = alloca i1, i1 0
-  %nop9283 = alloca i1, i1 0
-  %nop9284 = alloca i1, i1 0
-  %nop9285 = alloca i1, i1 0
-  %nop9286 = alloca i1, i1 0
-  %nop9287 = alloca i1, i1 0
-  %nop9288 = alloca i1, i1 0
-  %nop9289 = alloca i1, i1 0
-  %nop9290 = alloca i1, i1 0
-  %nop9291 = alloca i1, i1 0
-  %nop9292 = alloca i1, i1 0
-  %nop9293 = alloca i1, i1 0
-  %nop9294 = alloca i1, i1 0
-  %nop9295 = alloca i1, i1 0
-  %nop9296 = alloca i1, i1 0
-  %nop9297 = alloca i1, i1 0
-  %nop9298 = alloca i1, i1 0
-  %nop9299 = alloca i1, i1 0
-  %nop9300 = alloca i1, i1 0
-  %nop9301 = alloca i1, i1 0
-  %nop9302 = alloca i1, i1 0
-  %nop9303 = alloca i1, i1 0
-  %nop9304 = alloca i1, i1 0
-  %nop9305 = alloca i1, i1 0
-  %nop9306 = alloca i1, i1 0
-  %nop9307 = alloca i1, i1 0
-  %nop9308 = alloca i1, i1 0
-  %nop9309 = alloca i1, i1 0
-  %nop9310 = alloca i1, i1 0
-  %nop9311 = alloca i1, i1 0
-  %nop9312 = alloca i1, i1 0
-  %nop9313 = alloca i1, i1 0
-  %nop9314 = alloca i1, i1 0
-  %nop9315 = alloca i1, i1 0
-  %nop9316 = alloca i1, i1 0
-  %nop9317 = alloca i1, i1 0
-  %nop9318 = alloca i1, i1 0
-  %nop9319 = alloca i1, i1 0
-  %nop9320 = alloca i1, i1 0
-  %nop9321 = alloca i1, i1 0
-  %nop9322 = alloca i1, i1 0
-  %nop9323 = alloca i1, i1 0
-  %nop9324 = alloca i1, i1 0
-  %nop9325 = alloca i1, i1 0
-  %nop9326 = alloca i1, i1 0
-  %nop9327 = alloca i1, i1 0
-  %nop9328 = alloca i1, i1 0
-  %nop9329 = alloca i1, i1 0
-  %nop9330 = alloca i1, i1 0
-  %nop9331 = alloca i1, i1 0
-  %nop9332 = alloca i1, i1 0
-  %nop9333 = alloca i1, i1 0
-  %nop9334 = alloca i1, i1 0
-  %nop9335 = alloca i1, i1 0
-  %nop9336 = alloca i1, i1 0
-  %nop9337 = alloca i1, i1 0
-  %nop9338 = alloca i1, i1 0
-  %nop9339 = alloca i1, i1 0
-  %nop9340 = alloca i1, i1 0
-  %nop9341 = alloca i1, i1 0
-  %nop9342 = alloca i1, i1 0
-  %nop9343 = alloca i1, i1 0
-  %nop9344 = alloca i1, i1 0
-  %nop9345 = alloca i1, i1 0
-  %nop9346 = alloca i1, i1 0
-  %nop9347 = alloca i1, i1 0
-  %nop9348 = alloca i1, i1 0
-  %nop9349 = alloca i1, i1 0
-  %nop9350 = alloca i1, i1 0
-  %nop9351 = alloca i1, i1 0
-  %nop9352 = alloca i1, i1 0
-  %nop9353 = alloca i1, i1 0
-  %nop9354 = alloca i1, i1 0
-  %nop9355 = alloca i1, i1 0
-  %nop9356 = alloca i1, i1 0
-  %nop9357 = alloca i1, i1 0
-  %nop9358 = alloca i1, i1 0
-  %nop9359 = alloca i1, i1 0
-  %nop9360 = alloca i1, i1 0
-  %nop9361 = alloca i1, i1 0
-  %nop9362 = alloca i1, i1 0
-  %nop9363 = alloca i1, i1 0
-  %nop9364 = alloca i1, i1 0
-  %nop9365 = alloca i1, i1 0
-  %nop9366 = alloca i1, i1 0
-  %nop9367 = alloca i1, i1 0
-  %nop9368 = alloca i1, i1 0
-  %nop9369 = alloca i1, i1 0
-  %nop9370 = alloca i1, i1 0
-  %nop9371 = alloca i1, i1 0
-  %nop9372 = alloca i1, i1 0
-  %nop9373 = alloca i1, i1 0
-  %nop9374 = alloca i1, i1 0
-  %nop9375 = alloca i1, i1 0
-  %nop9376 = alloca i1, i1 0
-  %nop9377 = alloca i1, i1 0
-  %nop9378 = alloca i1, i1 0
-  %nop9379 = alloca i1, i1 0
-  %nop9380 = alloca i1, i1 0
-  %nop9381 = alloca i1, i1 0
-  %nop9382 = alloca i1, i1 0
-  %nop9383 = alloca i1, i1 0
-  %nop9384 = alloca i1, i1 0
-  %nop9385 = alloca i1, i1 0
-  %nop9386 = alloca i1, i1 0
-  %nop9387 = alloca i1, i1 0
-  %nop9388 = alloca i1, i1 0
-  %nop9389 = alloca i1, i1 0
-  %nop9390 = alloca i1, i1 0
-  %nop9391 = alloca i1, i1 0
-  %nop9392 = alloca i1, i1 0
-  %nop9393 = alloca i1, i1 0
-  %nop9394 = alloca i1, i1 0
-  %nop9395 = alloca i1, i1 0
-  %nop9396 = alloca i1, i1 0
-  %nop9397 = alloca i1, i1 0
-  %nop9398 = alloca i1, i1 0
-  %nop9399 = alloca i1, i1 0
-  %nop9400 = alloca i1, i1 0
-  %nop9401 = alloca i1, i1 0
-  %nop9402 = alloca i1, i1 0
-  %nop9403 = alloca i1, i1 0
-  %nop9404 = alloca i1, i1 0
-  %nop9405 = alloca i1, i1 0
-  %nop9406 = alloca i1, i1 0
-  %nop9407 = alloca i1, i1 0
-  %nop9408 = alloca i1, i1 0
-  %nop9409 = alloca i1, i1 0
-  %nop9410 = alloca i1, i1 0
-  %nop9411 = alloca i1, i1 0
-  %nop9412 = alloca i1, i1 0
-  %nop9413 = alloca i1, i1 0
-  %nop9414 = alloca i1, i1 0
-  %nop9415 = alloca i1, i1 0
-  %nop9416 = alloca i1, i1 0
-  %nop9417 = alloca i1, i1 0
-  %nop9418 = alloca i1, i1 0
-  %nop9419 = alloca i1, i1 0
-  %nop9420 = alloca i1, i1 0
-  %nop9421 = alloca i1, i1 0
-  %nop9422 = alloca i1, i1 0
-  %nop9423 = alloca i1, i1 0
-  %nop9424 = alloca i1, i1 0
-  %nop9425 = alloca i1, i1 0
-  %nop9426 = alloca i1, i1 0
-  %nop9427 = alloca i1, i1 0
-  %nop9428 = alloca i1, i1 0
-  %nop9429 = alloca i1, i1 0
-  %nop9430 = alloca i1, i1 0
-  %nop9431 = alloca i1, i1 0
-  %nop9432 = alloca i1, i1 0
-  %nop9433 = alloca i1, i1 0
-  %nop9434 = alloca i1, i1 0
-  %nop9435 = alloca i1, i1 0
-  %nop9436 = alloca i1, i1 0
-  %nop9437 = alloca i1, i1 0
-  %nop9438 = alloca i1, i1 0
-  %nop9439 = alloca i1, i1 0
-  %nop9440 = alloca i1, i1 0
-  %nop9441 = alloca i1, i1 0
-  %nop9442 = alloca i1, i1 0
-  %nop9443 = alloca i1, i1 0
-  %nop9444 = alloca i1, i1 0
-  %nop9445 = alloca i1, i1 0
-  %nop9446 = alloca i1, i1 0
-  %nop9447 = alloca i1, i1 0
-  %nop9448 = alloca i1, i1 0
-  %nop9449 = alloca i1, i1 0
-  %nop9450 = alloca i1, i1 0
-  %nop9451 = alloca i1, i1 0
-  %nop9452 = alloca i1, i1 0
-  %nop9453 = alloca i1, i1 0
-  %nop9454 = alloca i1, i1 0
-  %nop9455 = alloca i1, i1 0
-  %nop9456 = alloca i1, i1 0
-  %nop9457 = alloca i1, i1 0
-  %nop9458 = alloca i1, i1 0
-  %nop9459 = alloca i1, i1 0
-  %nop9460 = alloca i1, i1 0
-  %nop9461 = alloca i1, i1 0
-  %nop9462 = alloca i1, i1 0
-  %nop9463 = alloca i1, i1 0
-  %nop9464 = alloca i1, i1 0
-  %nop9465 = alloca i1, i1 0
-  %nop9466 = alloca i1, i1 0
-  %nop9467 = alloca i1, i1 0
-  %nop9468 = alloca i1, i1 0
-  %nop9469 = alloca i1, i1 0
-  %nop9470 = alloca i1, i1 0
-  %nop9471 = alloca i1, i1 0
-  %nop9472 = alloca i1, i1 0
-  %nop9473 = alloca i1, i1 0
-  %nop9474 = alloca i1, i1 0
-  %nop9475 = alloca i1, i1 0
-  %nop9476 = alloca i1, i1 0
-  %nop9477 = alloca i1, i1 0
-  %nop9478 = alloca i1, i1 0
-  %nop9479 = alloca i1, i1 0
-  %nop9480 = alloca i1, i1 0
-  %nop9481 = alloca i1, i1 0
-  %nop9482 = alloca i1, i1 0
-  %nop9483 = alloca i1, i1 0
-  %nop9484 = alloca i1, i1 0
-  %nop9485 = alloca i1, i1 0
-  %nop9486 = alloca i1, i1 0
-  %nop9487 = alloca i1, i1 0
-  %nop9488 = alloca i1, i1 0
-  %nop9489 = alloca i1, i1 0
-  %nop9490 = alloca i1, i1 0
-  %nop9491 = alloca i1, i1 0
-  %nop9492 = alloca i1, i1 0
-  %nop9493 = alloca i1, i1 0
-  %nop9494 = alloca i1, i1 0
-  %nop9495 = alloca i1, i1 0
-  %nop9496 = alloca i1, i1 0
-  %nop9497 = alloca i1, i1 0
-  %nop9498 = alloca i1, i1 0
-  %nop9499 = alloca i1, i1 0
-  %nop9500 = alloca i1, i1 0
-  %nop9501 = alloca i1, i1 0
-  %nop9502 = alloca i1, i1 0
-  %nop9503 = alloca i1, i1 0
-  %nop9504 = alloca i1, i1 0
-  %nop9505 = alloca i1, i1 0
-  %nop9506 = alloca i1, i1 0
-  %nop9507 = alloca i1, i1 0
-  %nop9508 = alloca i1, i1 0
-  %nop9509 = alloca i1, i1 0
-  %nop9510 = alloca i1, i1 0
-  %nop9511 = alloca i1, i1 0
-  %nop9512 = alloca i1, i1 0
-  %nop9513 = alloca i1, i1 0
-  %nop9514 = alloca i1, i1 0
-  %nop9515 = alloca i1, i1 0
-  %nop9516 = alloca i1, i1 0
-  %nop9517 = alloca i1, i1 0
-  %nop9518 = alloca i1, i1 0
-  %nop9519 = alloca i1, i1 0
-  %nop9520 = alloca i1, i1 0
-  %nop9521 = alloca i1, i1 0
-  %nop9522 = alloca i1, i1 0
-  %nop9523 = alloca i1, i1 0
-  %nop9524 = alloca i1, i1 0
-  %nop9525 = alloca i1, i1 0
-  %nop9526 = alloca i1, i1 0
-  %nop9527 = alloca i1, i1 0
-  %nop9528 = alloca i1, i1 0
-  %nop9529 = alloca i1, i1 0
-  %nop9530 = alloca i1, i1 0
-  %nop9531 = alloca i1, i1 0
-  %nop9532 = alloca i1, i1 0
-  %nop9533 = alloca i1, i1 0
-  %nop9534 = alloca i1, i1 0
-  %nop9535 = alloca i1, i1 0
-  %nop9536 = alloca i1, i1 0
-  %nop9537 = alloca i1, i1 0
-  %nop9538 = alloca i1, i1 0
-  %nop9539 = alloca i1, i1 0
-  %nop9540 = alloca i1, i1 0
-  %nop9541 = alloca i1, i1 0
-  %nop9542 = alloca i1, i1 0
-  %nop9543 = alloca i1, i1 0
-  %nop9544 = alloca i1, i1 0
-  %nop9545 = alloca i1, i1 0
-  %nop9546 = alloca i1, i1 0
-  %nop9547 = alloca i1, i1 0
-  %nop9548 = alloca i1, i1 0
-  %nop9549 = alloca i1, i1 0
-  %nop9550 = alloca i1, i1 0
-  %nop9551 = alloca i1, i1 0
-  %nop9552 = alloca i1, i1 0
-  %nop9553 = alloca i1, i1 0
-  %nop9554 = alloca i1, i1 0
-  %nop9555 = alloca i1, i1 0
-  %nop9556 = alloca i1, i1 0
-  %nop9557 = alloca i1, i1 0
-  %nop9558 = alloca i1, i1 0
-  %nop9559 = alloca i1, i1 0
-  %nop9560 = alloca i1, i1 0
-  %nop9561 = alloca i1, i1 0
-  %nop9562 = alloca i1, i1 0
-  %nop9563 = alloca i1, i1 0
-  %nop9564 = alloca i1, i1 0
-  %nop9565 = alloca i1, i1 0
-  %nop9566 = alloca i1, i1 0
-  %nop9567 = alloca i1, i1 0
-  %nop9568 = alloca i1, i1 0
-  %nop9569 = alloca i1, i1 0
-  %nop9570 = alloca i1, i1 0
-  %nop9571 = alloca i1, i1 0
-  %nop9572 = alloca i1, i1 0
-  %nop9573 = alloca i1, i1 0
-  %nop9574 = alloca i1, i1 0
-  %nop9575 = alloca i1, i1 0
-  %nop9576 = alloca i1, i1 0
-  %nop9577 = alloca i1, i1 0
-  %nop9578 = alloca i1, i1 0
-  %nop9579 = alloca i1, i1 0
-  %nop9580 = alloca i1, i1 0
-  %nop9581 = alloca i1, i1 0
-  %nop9582 = alloca i1, i1 0
-  %nop9583 = alloca i1, i1 0
-  %nop9584 = alloca i1, i1 0
-  %nop9585 = alloca i1, i1 0
-  %nop9586 = alloca i1, i1 0
-  %nop9587 = alloca i1, i1 0
-  %nop9588 = alloca i1, i1 0
-  %nop9589 = alloca i1, i1 0
-  %nop9590 = alloca i1, i1 0
-  %nop9591 = alloca i1, i1 0
-  %nop9592 = alloca i1, i1 0
-  %nop9593 = alloca i1, i1 0
-  %nop9594 = alloca i1, i1 0
-  %nop9595 = alloca i1, i1 0
-  %nop9596 = alloca i1, i1 0
-  %nop9597 = alloca i1, i1 0
-  %nop9598 = alloca i1, i1 0
-  %nop9599 = alloca i1, i1 0
-  %nop9600 = alloca i1, i1 0
-  %nop9601 = alloca i1, i1 0
-  %nop9602 = alloca i1, i1 0
-  %nop9603 = alloca i1, i1 0
-  %nop9604 = alloca i1, i1 0
-  %nop9605 = alloca i1, i1 0
-  %nop9606 = alloca i1, i1 0
-  %nop9607 = alloca i1, i1 0
-  %nop9608 = alloca i1, i1 0
-  %nop9609 = alloca i1, i1 0
-  %nop9610 = alloca i1, i1 0
-  %nop9611 = alloca i1, i1 0
-  %nop9612 = alloca i1, i1 0
-  %nop9613 = alloca i1, i1 0
-  %nop9614 = alloca i1, i1 0
-  %nop9615 = alloca i1, i1 0
-  %nop9616 = alloca i1, i1 0
-  %nop9617 = alloca i1, i1 0
-  %nop9618 = alloca i1, i1 0
-  %nop9619 = alloca i1, i1 0
-  %nop9620 = alloca i1, i1 0
-  %nop9621 = alloca i1, i1 0
-  %nop9622 = alloca i1, i1 0
-  %nop9623 = alloca i1, i1 0
-  %nop9624 = alloca i1, i1 0
-  %nop9625 = alloca i1, i1 0
-  %nop9626 = alloca i1, i1 0
-  %nop9627 = alloca i1, i1 0
-  %nop9628 = alloca i1, i1 0
-  %nop9629 = alloca i1, i1 0
-  %nop9630 = alloca i1, i1 0
-  %nop9631 = alloca i1, i1 0
-  %nop9632 = alloca i1, i1 0
-  %nop9633 = alloca i1, i1 0
-  %nop9634 = alloca i1, i1 0
-  %nop9635 = alloca i1, i1 0
-  %nop9636 = alloca i1, i1 0
-  %nop9637 = alloca i1, i1 0
-  %nop9638 = alloca i1, i1 0
-  %nop9639 = alloca i1, i1 0
-  %nop9640 = alloca i1, i1 0
-  %nop9641 = alloca i1, i1 0
-  %nop9642 = alloca i1, i1 0
-  %nop9643 = alloca i1, i1 0
-  %nop9644 = alloca i1, i1 0
-  %nop9645 = alloca i1, i1 0
-  %nop9646 = alloca i1, i1 0
-  %nop9647 = alloca i1, i1 0
-  %nop9648 = alloca i1, i1 0
-  %nop9649 = alloca i1, i1 0
-  %nop9650 = alloca i1, i1 0
-  %nop9651 = alloca i1, i1 0
-  %nop9652 = alloca i1, i1 0
-  %nop9653 = alloca i1, i1 0
-  %nop9654 = alloca i1, i1 0
-  %nop9655 = alloca i1, i1 0
-  %nop9656 = alloca i1, i1 0
-  %nop9657 = alloca i1, i1 0
-  %nop9658 = alloca i1, i1 0
-  %nop9659 = alloca i1, i1 0
-  %nop9660 = alloca i1, i1 0
-  %nop9661 = alloca i1, i1 0
-  %nop9662 = alloca i1, i1 0
-  %nop9663 = alloca i1, i1 0
-  %nop9664 = alloca i1, i1 0
-  %nop9665 = alloca i1, i1 0
-  %nop9666 = alloca i1, i1 0
-  %nop9667 = alloca i1, i1 0
-  %nop9668 = alloca i1, i1 0
-  %nop9669 = alloca i1, i1 0
-  %nop9670 = alloca i1, i1 0
-  %nop9671 = alloca i1, i1 0
-  %nop9672 = alloca i1, i1 0
-  %nop9673 = alloca i1, i1 0
-  %nop9674 = alloca i1, i1 0
-  %nop9675 = alloca i1, i1 0
-  %nop9676 = alloca i1, i1 0
-  %nop9677 = alloca i1, i1 0
-  %nop9678 = alloca i1, i1 0
-  %nop9679 = alloca i1, i1 0
-  %nop9680 = alloca i1, i1 0
-  %nop9681 = alloca i1, i1 0
-  %nop9682 = alloca i1, i1 0
-  %nop9683 = alloca i1, i1 0
-  %nop9684 = alloca i1, i1 0
-  %nop9685 = alloca i1, i1 0
-  %nop9686 = alloca i1, i1 0
-  %nop9687 = alloca i1, i1 0
-  %nop9688 = alloca i1, i1 0
-  %nop9689 = alloca i1, i1 0
-  %nop9690 = alloca i1, i1 0
-  %nop9691 = alloca i1, i1 0
-  %nop9692 = alloca i1, i1 0
-  %nop9693 = alloca i1, i1 0
-  %nop9694 = alloca i1, i1 0
-  %nop9695 = alloca i1, i1 0
-  %nop9696 = alloca i1, i1 0
-  %nop9697 = alloca i1, i1 0
-  %nop9698 = alloca i1, i1 0
-  %nop9699 = alloca i1, i1 0
-  %nop9700 = alloca i1, i1 0
-  %nop9701 = alloca i1, i1 0
-  %nop9702 = alloca i1, i1 0
-  %nop9703 = alloca i1, i1 0
-  %nop9704 = alloca i1, i1 0
-  %nop9705 = alloca i1, i1 0
-  %nop9706 = alloca i1, i1 0
-  %nop9707 = alloca i1, i1 0
-  %nop9708 = alloca i1, i1 0
-  %nop9709 = alloca i1, i1 0
-  %nop9710 = alloca i1, i1 0
-  %nop9711 = alloca i1, i1 0
-  %nop9712 = alloca i1, i1 0
-  %nop9713 = alloca i1, i1 0
-  %nop9714 = alloca i1, i1 0
-  %nop9715 = alloca i1, i1 0
-  %nop9716 = alloca i1, i1 0
-  %nop9717 = alloca i1, i1 0
-  %nop9718 = alloca i1, i1 0
-  %nop9719 = alloca i1, i1 0
-  %nop9720 = alloca i1, i1 0
-  %nop9721 = alloca i1, i1 0
-  %nop9722 = alloca i1, i1 0
-  %nop9723 = alloca i1, i1 0
-  %nop9724 = alloca i1, i1 0
-  %nop9725 = alloca i1, i1 0
-  %nop9726 = alloca i1, i1 0
-  %nop9727 = alloca i1, i1 0
-  %nop9728 = alloca i1, i1 0
-  %nop9729 = alloca i1, i1 0
-  %nop9730 = alloca i1, i1 0
-  %nop9731 = alloca i1, i1 0
-  %nop9732 = alloca i1, i1 0
-  %nop9733 = alloca i1, i1 0
-  %nop9734 = alloca i1, i1 0
-  %nop9735 = alloca i1, i1 0
-  %nop9736 = alloca i1, i1 0
-  %nop9737 = alloca i1, i1 0
-  %nop9738 = alloca i1, i1 0
-  %nop9739 = alloca i1, i1 0
-  %nop9740 = alloca i1, i1 0
-  %nop9741 = alloca i1, i1 0
-  %nop9742 = alloca i1, i1 0
-  %nop9743 = alloca i1, i1 0
-  %nop9744 = alloca i1, i1 0
-  %nop9745 = alloca i1, i1 0
-  %nop9746 = alloca i1, i1 0
-  %nop9747 = alloca i1, i1 0
-  %nop9748 = alloca i1, i1 0
-  %nop9749 = alloca i1, i1 0
-  %nop9750 = alloca i1, i1 0
-  %nop9751 = alloca i1, i1 0
-  %nop9752 = alloca i1, i1 0
-  %nop9753 = alloca i1, i1 0
-  %nop9754 = alloca i1, i1 0
-  %nop9755 = alloca i1, i1 0
-  %nop9756 = alloca i1, i1 0
-  %nop9757 = alloca i1, i1 0
-  %nop9758 = alloca i1, i1 0
-  %nop9759 = alloca i1, i1 0
-  %nop9760 = alloca i1, i1 0
-  %nop9761 = alloca i1, i1 0
-  %nop9762 = alloca i1, i1 0
-  %nop9763 = alloca i1, i1 0
-  %nop9764 = alloca i1, i1 0
-  %nop9765 = alloca i1, i1 0
-  %nop9766 = alloca i1, i1 0
-  %nop9767 = alloca i1, i1 0
-  %nop9768 = alloca i1, i1 0
-  %nop9769 = alloca i1, i1 0
-  %nop9770 = alloca i1, i1 0
-  %nop9771 = alloca i1, i1 0
-  %nop9772 = alloca i1, i1 0
-  %nop9773 = alloca i1, i1 0
-  %nop9774 = alloca i1, i1 0
-  %nop9775 = alloca i1, i1 0
-  %nop9776 = alloca i1, i1 0
-  %nop9777 = alloca i1, i1 0
-  %nop9778 = alloca i1, i1 0
-  %nop9779 = alloca i1, i1 0
-  %nop9780 = alloca i1, i1 0
-  %nop9781 = alloca i1, i1 0
-  %nop9782 = alloca i1, i1 0
-  %nop9783 = alloca i1, i1 0
-  %nop9784 = alloca i1, i1 0
-  %nop9785 = alloca i1, i1 0
-  %nop9786 = alloca i1, i1 0
-  %nop9787 = alloca i1, i1 0
-  %nop9788 = alloca i1, i1 0
-  %nop9789 = alloca i1, i1 0
-  %nop9790 = alloca i1, i1 0
-  %nop9791 = alloca i1, i1 0
-  %nop9792 = alloca i1, i1 0
-  %nop9793 = alloca i1, i1 0
-  %nop9794 = alloca i1, i1 0
-  %nop9795 = alloca i1, i1 0
-  %nop9796 = alloca i1, i1 0
-  %nop9797 = alloca i1, i1 0
-  %nop9798 = alloca i1, i1 0
-  %nop9799 = alloca i1, i1 0
-  %nop9800 = alloca i1, i1 0
-  %nop9801 = alloca i1, i1 0
-  %nop9802 = alloca i1, i1 0
-  %nop9803 = alloca i1, i1 0
-  %nop9804 = alloca i1, i1 0
-  %nop9805 = alloca i1, i1 0
-  %nop9806 = alloca i1, i1 0
-  %nop9807 = alloca i1, i1 0
-  %nop9808 = alloca i1, i1 0
-  %nop9809 = alloca i1, i1 0
-  %nop9810 = alloca i1, i1 0
-  %nop9811 = alloca i1, i1 0
-  %nop9812 = alloca i1, i1 0
-  %nop9813 = alloca i1, i1 0
-  %nop9814 = alloca i1, i1 0
-  %nop9815 = alloca i1, i1 0
-  %nop9816 = alloca i1, i1 0
-  %nop9817 = alloca i1, i1 0
-  %nop9818 = alloca i1, i1 0
-  %nop9819 = alloca i1, i1 0
-  %nop9820 = alloca i1, i1 0
-  %nop9821 = alloca i1, i1 0
-  %nop9822 = alloca i1, i1 0
-  %nop9823 = alloca i1, i1 0
-  %nop9824 = alloca i1, i1 0
-  %nop9825 = alloca i1, i1 0
-  %nop9826 = alloca i1, i1 0
-  %nop9827 = alloca i1, i1 0
-  %nop9828 = alloca i1, i1 0
-  %nop9829 = alloca i1, i1 0
-  %nop9830 = alloca i1, i1 0
-  %nop9831 = alloca i1, i1 0
-  %nop9832 = alloca i1, i1 0
-  %nop9833 = alloca i1, i1 0
-  %nop9834 = alloca i1, i1 0
-  %nop9835 = alloca i1, i1 0
-  %nop9836 = alloca i1, i1 0
-  %nop9837 = alloca i1, i1 0
-  %nop9838 = alloca i1, i1 0
-  %nop9839 = alloca i1, i1 0
-  %nop9840 = alloca i1, i1 0
-  %nop9841 = alloca i1, i1 0
-  %nop9842 = alloca i1, i1 0
-  %nop9843 = alloca i1, i1 0
-  %nop9844 = alloca i1, i1 0
-  %nop9845 = alloca i1, i1 0
-  %nop9846 = alloca i1, i1 0
-  %nop9847 = alloca i1, i1 0
-  %nop9848 = alloca i1, i1 0
-  %nop9849 = alloca i1, i1 0
-  %nop9850 = alloca i1, i1 0
-  %nop9851 = alloca i1, i1 0
-  %nop9852 = alloca i1, i1 0
-  %nop9853 = alloca i1, i1 0
-  %nop9854 = alloca i1, i1 0
-  %nop9855 = alloca i1, i1 0
-  %nop9856 = alloca i1, i1 0
-  %nop9857 = alloca i1, i1 0
-  %nop9858 = alloca i1, i1 0
-  %nop9859 = alloca i1, i1 0
-  %nop9860 = alloca i1, i1 0
-  %nop9861 = alloca i1, i1 0
-  %nop9862 = alloca i1, i1 0
-  %nop9863 = alloca i1, i1 0
-  %nop9864 = alloca i1, i1 0
-  %nop9865 = alloca i1, i1 0
-  %nop9866 = alloca i1, i1 0
-  %nop9867 = alloca i1, i1 0
-  %nop9868 = alloca i1, i1 0
-  %nop9869 = alloca i1, i1 0
-  %nop9870 = alloca i1, i1 0
-  %nop9871 = alloca i1, i1 0
-  %nop9872 = alloca i1, i1 0
-  %nop9873 = alloca i1, i1 0
-  %nop9874 = alloca i1, i1 0
-  %nop9875 = alloca i1, i1 0
-  %nop9876 = alloca i1, i1 0
-  %nop9877 = alloca i1, i1 0
-  %nop9878 = alloca i1, i1 0
-  %nop9879 = alloca i1, i1 0
-  %nop9880 = alloca i1, i1 0
-  %nop9881 = alloca i1, i1 0
-  %nop9882 = alloca i1, i1 0
-  %nop9883 = alloca i1, i1 0
-  %nop9884 = alloca i1, i1 0
-  %nop9885 = alloca i1, i1 0
-  %nop9886 = alloca i1, i1 0
-  %nop9887 = alloca i1, i1 0
-  %nop9888 = alloca i1, i1 0
-  %nop9889 = alloca i1, i1 0
-  %nop9890 = alloca i1, i1 0
-  %nop9891 = alloca i1, i1 0
-  %nop9892 = alloca i1, i1 0
-  %nop9893 = alloca i1, i1 0
-  %nop9894 = alloca i1, i1 0
-  %nop9895 = alloca i1, i1 0
-  %nop9896 = alloca i1, i1 0
-  %nop9897 = alloca i1, i1 0
-  %nop9898 = alloca i1, i1 0
-  %nop9899 = alloca i1, i1 0
-  %nop9900 = alloca i1, i1 0
-  %nop9901 = alloca i1, i1 0
-  %nop9902 = alloca i1, i1 0
-  %nop9903 = alloca i1, i1 0
-  %nop9904 = alloca i1, i1 0
-  %nop9905 = alloca i1, i1 0
-  %nop9906 = alloca i1, i1 0
-  %nop9907 = alloca i1, i1 0
-  %nop9908 = alloca i1, i1 0
-  %nop9909 = alloca i1, i1 0
-  %nop9910 = alloca i1, i1 0
-  %nop9911 = alloca i1, i1 0
-  %nop9912 = alloca i1, i1 0
-  %nop9913 = alloca i1, i1 0
-  %nop9914 = alloca i1, i1 0
-  %nop9915 = alloca i1, i1 0
-  %nop9916 = alloca i1, i1 0
-  %nop9917 = alloca i1, i1 0
-  %nop9918 = alloca i1, i1 0
-  %nop9919 = alloca i1, i1 0
-  %nop9920 = alloca i1, i1 0
-  %nop9921 = alloca i1, i1 0
-  %nop9922 = alloca i1, i1 0
-  %nop9923 = alloca i1, i1 0
-  %nop9924 = alloca i1, i1 0
-  %nop9925 = alloca i1, i1 0
-  %nop9926 = alloca i1, i1 0
-  %nop9927 = alloca i1, i1 0
-  %nop9928 = alloca i1, i1 0
-  %nop9929 = alloca i1, i1 0
-  %nop9930 = alloca i1, i1 0
-  %nop9931 = alloca i1, i1 0
-  %nop9932 = alloca i1, i1 0
-  %nop9933 = alloca i1, i1 0
-  %nop9934 = alloca i1, i1 0
-  %nop9935 = alloca i1, i1 0
-  %nop9936 = alloca i1, i1 0
-  %nop9937 = alloca i1, i1 0
-  %nop9938 = alloca i1, i1 0
-  %nop9939 = alloca i1, i1 0
-  %nop9940 = alloca i1, i1 0
-  %nop9941 = alloca i1, i1 0
-  %nop9942 = alloca i1, i1 0
-  %nop9943 = alloca i1, i1 0
-  %nop9944 = alloca i1, i1 0
-  %nop9945 = alloca i1, i1 0
-  %nop9946 = alloca i1, i1 0
-  %nop9947 = alloca i1, i1 0
-  %nop9948 = alloca i1, i1 0
-  %nop9949 = alloca i1, i1 0
-  %nop9950 = alloca i1, i1 0
-  %nop9951 = alloca i1, i1 0
-  %nop9952 = alloca i1, i1 0
-  %nop9953 = alloca i1, i1 0
-  %nop9954 = alloca i1, i1 0
-  %nop9955 = alloca i1, i1 0
-  %nop9956 = alloca i1, i1 0
-  %nop9957 = alloca i1, i1 0
-  %nop9958 = alloca i1, i1 0
-  %nop9959 = alloca i1, i1 0
-  %nop9960 = alloca i1, i1 0
-  %nop9961 = alloca i1, i1 0
-  %nop9962 = alloca i1, i1 0
-  %nop9963 = alloca i1, i1 0
-  %nop9964 = alloca i1, i1 0
-  %nop9965 = alloca i1, i1 0
-  %nop9966 = alloca i1, i1 0
-  %nop9967 = alloca i1, i1 0
-  %nop9968 = alloca i1, i1 0
-  %nop9969 = alloca i1, i1 0
-  %nop9970 = alloca i1, i1 0
-  %nop9971 = alloca i1, i1 0
-  %nop9972 = alloca i1, i1 0
-  %nop9973 = alloca i1, i1 0
-  %nop9974 = alloca i1, i1 0
-  %nop9975 = alloca i1, i1 0
-  %nop9976 = alloca i1, i1 0
-  %nop9977 = alloca i1, i1 0
-  %nop9978 = alloca i1, i1 0
-  %nop9979 = alloca i1, i1 0
-  %nop9980 = alloca i1, i1 0
-  %nop9981 = alloca i1, i1 0
-  %nop9982 = alloca i1, i1 0
-  %nop9983 = alloca i1, i1 0
-  %nop9984 = alloca i1, i1 0
-  %nop9985 = alloca i1, i1 0
-  %nop9986 = alloca i1, i1 0
-  %nop9987 = alloca i1, i1 0
-  %nop9988 = alloca i1, i1 0
-  %nop9989 = alloca i1, i1 0
-  %nop9990 = alloca i1, i1 0
-  %nop9991 = alloca i1, i1 0
-  %nop9992 = alloca i1, i1 0
-  %nop9993 = alloca i1, i1 0
-  %nop9994 = alloca i1, i1 0
-  %nop9995 = alloca i1, i1 0
-  %nop9996 = alloca i1, i1 0
-  %nop9997 = alloca i1, i1 0
-  %nop9998 = alloca i1, i1 0
-  %nop9999 = alloca i1, i1 0
-  %nop10000 = alloca i1, i1 0
-  %nop10001 = alloca i1, i1 0
-  %nop10002 = alloca i1, i1 0
-  %nop10003 = alloca i1, i1 0
-  %nop10004 = alloca i1, i1 0
-  %nop10005 = alloca i1, i1 0
-  %nop10006 = alloca i1, i1 0
-  %nop10007 = alloca i1, i1 0
-  %nop10008 = alloca i1, i1 0
-  %nop10009 = alloca i1, i1 0
-  %nop10010 = alloca i1, i1 0
-  %nop10011 = alloca i1, i1 0
-  %nop10012 = alloca i1, i1 0
-  %nop10013 = alloca i1, i1 0
-  %nop10014 = alloca i1, i1 0
-  %nop10015 = alloca i1, i1 0
-  %nop10016 = alloca i1, i1 0
-  %nop10017 = alloca i1, i1 0
-  %nop10018 = alloca i1, i1 0
-  %nop10019 = alloca i1, i1 0
-  %nop10020 = alloca i1, i1 0
-  %nop10021 = alloca i1, i1 0
-  %nop10022 = alloca i1, i1 0
-  %nop10023 = alloca i1, i1 0
-  %nop10024 = alloca i1, i1 0
-  %nop10025 = alloca i1, i1 0
-  %nop10026 = alloca i1, i1 0
-  %nop10027 = alloca i1, i1 0
-  %nop10028 = alloca i1, i1 0
-  %nop10029 = alloca i1, i1 0
-  %nop10030 = alloca i1, i1 0
-  %nop10031 = alloca i1, i1 0
-  %nop10032 = alloca i1, i1 0
-  %nop10033 = alloca i1, i1 0
-  %nop10034 = alloca i1, i1 0
-  %nop10035 = alloca i1, i1 0
-  %nop10036 = alloca i1, i1 0
-  %nop10037 = alloca i1, i1 0
-  %nop10038 = alloca i1, i1 0
-  %nop10039 = alloca i1, i1 0
-  %nop10040 = alloca i1, i1 0
-  %nop10041 = alloca i1, i1 0
-  %nop10042 = alloca i1, i1 0
-  %nop10043 = alloca i1, i1 0
-  %nop10044 = alloca i1, i1 0
-  %nop10045 = alloca i1, i1 0
-  %nop10046 = alloca i1, i1 0
-  %nop10047 = alloca i1, i1 0
-  %nop10048 = alloca i1, i1 0
-  %nop10049 = alloca i1, i1 0
-  %nop10050 = alloca i1, i1 0
-  %nop10051 = alloca i1, i1 0
-  %nop10052 = alloca i1, i1 0
-  %nop10053 = alloca i1, i1 0
-  %nop10054 = alloca i1, i1 0
-  %nop10055 = alloca i1, i1 0
-  %nop10056 = alloca i1, i1 0
-  %nop10057 = alloca i1, i1 0
-  %nop10058 = alloca i1, i1 0
-  %nop10059 = alloca i1, i1 0
-  %nop10060 = alloca i1, i1 0
-  %nop10061 = alloca i1, i1 0
-  %nop10062 = alloca i1, i1 0
-  %nop10063 = alloca i1, i1 0
-  %nop10064 = alloca i1, i1 0
-  %nop10065 = alloca i1, i1 0
-  %nop10066 = alloca i1, i1 0
-  %nop10067 = alloca i1, i1 0
-  %nop10068 = alloca i1, i1 0
-  %nop10069 = alloca i1, i1 0
-  %nop10070 = alloca i1, i1 0
-  %nop10071 = alloca i1, i1 0
-  %nop10072 = alloca i1, i1 0
-  %nop10073 = alloca i1, i1 0
-  %nop10074 = alloca i1, i1 0
-  %nop10075 = alloca i1, i1 0
-  %nop10076 = alloca i1, i1 0
-  %nop10077 = alloca i1, i1 0
-  %nop10078 = alloca i1, i1 0
-  %nop10079 = alloca i1, i1 0
-  %nop10080 = alloca i1, i1 0
-  %nop10081 = alloca i1, i1 0
-  %nop10082 = alloca i1, i1 0
-  %nop10083 = alloca i1, i1 0
-  %nop10084 = alloca i1, i1 0
-  %nop10085 = alloca i1, i1 0
-  %nop10086 = alloca i1, i1 0
-  %nop10087 = alloca i1, i1 0
-  %nop10088 = alloca i1, i1 0
-  %nop10089 = alloca i1, i1 0
-  %nop10090 = alloca i1, i1 0
-  %nop10091 = alloca i1, i1 0
-  %nop10092 = alloca i1, i1 0
-  %nop10093 = alloca i1, i1 0
-  %nop10094 = alloca i1, i1 0
-  %nop10095 = alloca i1, i1 0
-  %nop10096 = alloca i1, i1 0
-  %nop10097 = alloca i1, i1 0
-  %nop10098 = alloca i1, i1 0
-  %nop10099 = alloca i1, i1 0
-  %nop10100 = alloca i1, i1 0
-  %nop10101 = alloca i1, i1 0
-  %nop10102 = alloca i1, i1 0
-  %nop10103 = alloca i1, i1 0
-  %nop10104 = alloca i1, i1 0
-  %nop10105 = alloca i1, i1 0
-  %nop10106 = alloca i1, i1 0
-  %nop10107 = alloca i1, i1 0
-  %nop10108 = alloca i1, i1 0
-  %nop10109 = alloca i1, i1 0
-  %nop10110 = alloca i1, i1 0
-  %nop10111 = alloca i1, i1 0
-  %nop10112 = alloca i1, i1 0
-  %nop10113 = alloca i1, i1 0
-  %nop10114 = alloca i1, i1 0
-  %nop10115 = alloca i1, i1 0
-  %nop10116 = alloca i1, i1 0
-  %nop10117 = alloca i1, i1 0
-  %nop10118 = alloca i1, i1 0
-  %nop10119 = alloca i1, i1 0
-  %nop10120 = alloca i1, i1 0
-  %nop10121 = alloca i1, i1 0
-  %nop10122 = alloca i1, i1 0
-  %nop10123 = alloca i1, i1 0
-  %nop10124 = alloca i1, i1 0
-  %nop10125 = alloca i1, i1 0
-  %nop10126 = alloca i1, i1 0
-  %nop10127 = alloca i1, i1 0
-  %nop10128 = alloca i1, i1 0
-  %nop10129 = alloca i1, i1 0
-  %nop10130 = alloca i1, i1 0
-  %nop10131 = alloca i1, i1 0
-  %nop10132 = alloca i1, i1 0
-  %nop10133 = alloca i1, i1 0
-  %nop10134 = alloca i1, i1 0
-  %nop10135 = alloca i1, i1 0
-  %nop10136 = alloca i1, i1 0
-  %nop10137 = alloca i1, i1 0
-  %nop10138 = alloca i1, i1 0
-  %nop10139 = alloca i1, i1 0
-  %nop10140 = alloca i1, i1 0
-  %nop10141 = alloca i1, i1 0
-  %nop10142 = alloca i1, i1 0
-  %nop10143 = alloca i1, i1 0
-  %nop10144 = alloca i1, i1 0
-  %nop10145 = alloca i1, i1 0
-  %nop10146 = alloca i1, i1 0
-  %nop10147 = alloca i1, i1 0
-  %nop10148 = alloca i1, i1 0
-  %nop10149 = alloca i1, i1 0
-  %nop10150 = alloca i1, i1 0
-  %nop10151 = alloca i1, i1 0
-  %nop10152 = alloca i1, i1 0
-  %nop10153 = alloca i1, i1 0
-  %nop10154 = alloca i1, i1 0
-  %nop10155 = alloca i1, i1 0
-  %nop10156 = alloca i1, i1 0
-  %nop10157 = alloca i1, i1 0
-  %nop10158 = alloca i1, i1 0
-  %nop10159 = alloca i1, i1 0
-  %nop10160 = alloca i1, i1 0
-  %nop10161 = alloca i1, i1 0
-  %nop10162 = alloca i1, i1 0
-  %nop10163 = alloca i1, i1 0
-  %nop10164 = alloca i1, i1 0
-  %nop10165 = alloca i1, i1 0
-  %nop10166 = alloca i1, i1 0
-  %nop10167 = alloca i1, i1 0
-  %nop10168 = alloca i1, i1 0
-  %nop10169 = alloca i1, i1 0
-  %nop10170 = alloca i1, i1 0
-  %nop10171 = alloca i1, i1 0
-  %nop10172 = alloca i1, i1 0
-  %nop10173 = alloca i1, i1 0
-  %nop10174 = alloca i1, i1 0
-  %nop10175 = alloca i1, i1 0
-  %nop10176 = alloca i1, i1 0
-  %nop10177 = alloca i1, i1 0
-  %nop10178 = alloca i1, i1 0
-  %nop10179 = alloca i1, i1 0
-  %nop10180 = alloca i1, i1 0
-  %nop10181 = alloca i1, i1 0
-  %nop10182 = alloca i1, i1 0
-  %nop10183 = alloca i1, i1 0
-  %nop10184 = alloca i1, i1 0
-  %nop10185 = alloca i1, i1 0
-  %nop10186 = alloca i1, i1 0
-  %nop10187 = alloca i1, i1 0
-  %nop10188 = alloca i1, i1 0
-  %nop10189 = alloca i1, i1 0
-  %nop10190 = alloca i1, i1 0
-  %nop10191 = alloca i1, i1 0
-  %nop10192 = alloca i1, i1 0
-  %nop10193 = alloca i1, i1 0
-  %nop10194 = alloca i1, i1 0
-  %nop10195 = alloca i1, i1 0
-  %nop10196 = alloca i1, i1 0
-  %nop10197 = alloca i1, i1 0
-  %nop10198 = alloca i1, i1 0
-  %nop10199 = alloca i1, i1 0
-  %nop10200 = alloca i1, i1 0
-  %nop10201 = alloca i1, i1 0
-  %nop10202 = alloca i1, i1 0
-  %nop10203 = alloca i1, i1 0
-  %nop10204 = alloca i1, i1 0
-  %nop10205 = alloca i1, i1 0
-  %nop10206 = alloca i1, i1 0
-  %nop10207 = alloca i1, i1 0
-  %nop10208 = alloca i1, i1 0
-  %nop10209 = alloca i1, i1 0
-  %nop10210 = alloca i1, i1 0
-  %nop10211 = alloca i1, i1 0
-  %nop10212 = alloca i1, i1 0
-  %nop10213 = alloca i1, i1 0
-  %nop10214 = alloca i1, i1 0
-  %nop10215 = alloca i1, i1 0
-  %nop10216 = alloca i1, i1 0
-  %nop10217 = alloca i1, i1 0
-  %nop10218 = alloca i1, i1 0
-  %nop10219 = alloca i1, i1 0
-  %nop10220 = alloca i1, i1 0
-  %nop10221 = alloca i1, i1 0
-  %nop10222 = alloca i1, i1 0
-  %nop10223 = alloca i1, i1 0
-  %nop10224 = alloca i1, i1 0
-  %nop10225 = alloca i1, i1 0
-  %nop10226 = alloca i1, i1 0
-  %nop10227 = alloca i1, i1 0
-  %nop10228 = alloca i1, i1 0
-  %nop10229 = alloca i1, i1 0
-  %nop10230 = alloca i1, i1 0
-  %nop10231 = alloca i1, i1 0
-  %nop10232 = alloca i1, i1 0
-  %nop10233 = alloca i1, i1 0
-  %nop10234 = alloca i1, i1 0
-  %nop10235 = alloca i1, i1 0
-  %nop10236 = alloca i1, i1 0
-  %nop10237 = alloca i1, i1 0
-  %nop10238 = alloca i1, i1 0
-  %nop10239 = alloca i1, i1 0
-  %nop10240 = alloca i1, i1 0
-  %nop10241 = alloca i1, i1 0
-  %nop10242 = alloca i1, i1 0
-  %nop10243 = alloca i1, i1 0
-  %nop10244 = alloca i1, i1 0
-  %nop10245 = alloca i1, i1 0
-  %nop10246 = alloca i1, i1 0
-  %nop10247 = alloca i1, i1 0
-  %nop10248 = alloca i1, i1 0
-  %nop10249 = alloca i1, i1 0
-  %nop10250 = alloca i1, i1 0
-  %nop10251 = alloca i1, i1 0
-  %nop10252 = alloca i1, i1 0
-  %nop10253 = alloca i1, i1 0
-  %nop10254 = alloca i1, i1 0
-  %nop10255 = alloca i1, i1 0
-  %nop10256 = alloca i1, i1 0
-  %nop10257 = alloca i1, i1 0
-  %nop10258 = alloca i1, i1 0
-  %nop10259 = alloca i1, i1 0
-  %nop10260 = alloca i1, i1 0
-  %nop10261 = alloca i1, i1 0
-  %nop10262 = alloca i1, i1 0
-  %nop10263 = alloca i1, i1 0
-  %nop10264 = alloca i1, i1 0
-  %nop10265 = alloca i1, i1 0
-  %nop10266 = alloca i1, i1 0
-  %nop10267 = alloca i1, i1 0
-  %nop10268 = alloca i1, i1 0
-  %nop10269 = alloca i1, i1 0
-  %nop10270 = alloca i1, i1 0
-  %nop10271 = alloca i1, i1 0
-  %nop10272 = alloca i1, i1 0
-  %nop10273 = alloca i1, i1 0
-  %nop10274 = alloca i1, i1 0
-  %nop10275 = alloca i1, i1 0
-  %nop10276 = alloca i1, i1 0
-  %nop10277 = alloca i1, i1 0
-  %nop10278 = alloca i1, i1 0
-  %nop10279 = alloca i1, i1 0
-  %nop10280 = alloca i1, i1 0
-  %nop10281 = alloca i1, i1 0
-  %nop10282 = alloca i1, i1 0
-  %nop10283 = alloca i1, i1 0
-  %nop10284 = alloca i1, i1 0
-  %nop10285 = alloca i1, i1 0
-  %nop10286 = alloca i1, i1 0
-  %nop10287 = alloca i1, i1 0
-  %nop10288 = alloca i1, i1 0
-  %nop10289 = alloca i1, i1 0
-  %nop10290 = alloca i1, i1 0
-  %nop10291 = alloca i1, i1 0
-  %nop10292 = alloca i1, i1 0
-  %nop10293 = alloca i1, i1 0
-  %nop10294 = alloca i1, i1 0
-  %nop10295 = alloca i1, i1 0
-  %nop10296 = alloca i1, i1 0
-  %nop10297 = alloca i1, i1 0
-  %nop10298 = alloca i1, i1 0
-  %nop10299 = alloca i1, i1 0
-  %nop10300 = alloca i1, i1 0
-  %nop10301 = alloca i1, i1 0
-  %nop10302 = alloca i1, i1 0
-  %nop10303 = alloca i1, i1 0
-  %nop10304 = alloca i1, i1 0
-  %nop10305 = alloca i1, i1 0
-  %nop10306 = alloca i1, i1 0
-  %nop10307 = alloca i1, i1 0
-  %nop10308 = alloca i1, i1 0
-  %nop10309 = alloca i1, i1 0
-  %nop10310 = alloca i1, i1 0
-  %nop10311 = alloca i1, i1 0
-  %nop10312 = alloca i1, i1 0
-  %nop10313 = alloca i1, i1 0
-  %nop10314 = alloca i1, i1 0
-  %nop10315 = alloca i1, i1 0
-  %nop10316 = alloca i1, i1 0
-  %nop10317 = alloca i1, i1 0
-  %nop10318 = alloca i1, i1 0
-  %nop10319 = alloca i1, i1 0
-  %nop10320 = alloca i1, i1 0
-  %nop10321 = alloca i1, i1 0
-  %nop10322 = alloca i1, i1 0
-  %nop10323 = alloca i1, i1 0
-  %nop10324 = alloca i1, i1 0
-  %nop10325 = alloca i1, i1 0
-  %nop10326 = alloca i1, i1 0
-  %nop10327 = alloca i1, i1 0
-  %nop10328 = alloca i1, i1 0
-  %nop10329 = alloca i1, i1 0
-  %nop10330 = alloca i1, i1 0
-  %nop10331 = alloca i1, i1 0
-  %nop10332 = alloca i1, i1 0
-  %nop10333 = alloca i1, i1 0
-  %nop10334 = alloca i1, i1 0
-  %nop10335 = alloca i1, i1 0
-  %nop10336 = alloca i1, i1 0
-  %nop10337 = alloca i1, i1 0
-  %nop10338 = alloca i1, i1 0
-  %nop10339 = alloca i1, i1 0
-  %nop10340 = alloca i1, i1 0
-  %nop10341 = alloca i1, i1 0
-  %nop10342 = alloca i1, i1 0
-  %nop10343 = alloca i1, i1 0
-  %nop10344 = alloca i1, i1 0
-  %nop10345 = alloca i1, i1 0
-  %nop10346 = alloca i1, i1 0
-  %nop10347 = alloca i1, i1 0
-  %nop10348 = alloca i1, i1 0
-  %nop10349 = alloca i1, i1 0
-  %nop10350 = alloca i1, i1 0
-  %nop10351 = alloca i1, i1 0
-  %nop10352 = alloca i1, i1 0
-  %nop10353 = alloca i1, i1 0
-  %nop10354 = alloca i1, i1 0
-  %nop10355 = alloca i1, i1 0
-  %nop10356 = alloca i1, i1 0
-  %nop10357 = alloca i1, i1 0
-  %nop10358 = alloca i1, i1 0
-  %nop10359 = alloca i1, i1 0
-  %nop10360 = alloca i1, i1 0
-  %nop10361 = alloca i1, i1 0
-  %nop10362 = alloca i1, i1 0
-  %nop10363 = alloca i1, i1 0
-  %nop10364 = alloca i1, i1 0
-  %nop10365 = alloca i1, i1 0
-  %nop10366 = alloca i1, i1 0
-  %nop10367 = alloca i1, i1 0
-  %nop10368 = alloca i1, i1 0
-  %nop10369 = alloca i1, i1 0
-  %nop10370 = alloca i1, i1 0
-  %nop10371 = alloca i1, i1 0
-  %nop10372 = alloca i1, i1 0
-  %nop10373 = alloca i1, i1 0
-  %nop10374 = alloca i1, i1 0
-  %nop10375 = alloca i1, i1 0
-  %nop10376 = alloca i1, i1 0
-  %nop10377 = alloca i1, i1 0
-  %nop10378 = alloca i1, i1 0
-  %nop10379 = alloca i1, i1 0
-  %nop10380 = alloca i1, i1 0
-  %nop10381 = alloca i1, i1 0
-  %nop10382 = alloca i1, i1 0
-  %nop10383 = alloca i1, i1 0
-  %nop10384 = alloca i1, i1 0
-  %nop10385 = alloca i1, i1 0
-  %nop10386 = alloca i1, i1 0
-  %nop10387 = alloca i1, i1 0
-  %nop10388 = alloca i1, i1 0
-  %nop10389 = alloca i1, i1 0
-  %nop10390 = alloca i1, i1 0
-  %nop10391 = alloca i1, i1 0
-  %nop10392 = alloca i1, i1 0
-  %nop10393 = alloca i1, i1 0
-  %nop10394 = alloca i1, i1 0
-  %nop10395 = alloca i1, i1 0
-  %nop10396 = alloca i1, i1 0
-  %nop10397 = alloca i1, i1 0
-  %nop10398 = alloca i1, i1 0
-  %nop10399 = alloca i1, i1 0
-  %nop10400 = alloca i1, i1 0
-  %nop10401 = alloca i1, i1 0
-  %nop10402 = alloca i1, i1 0
-  %nop10403 = alloca i1, i1 0
-  %nop10404 = alloca i1, i1 0
-  %nop10405 = alloca i1, i1 0
-  %nop10406 = alloca i1, i1 0
-  %nop10407 = alloca i1, i1 0
-  %nop10408 = alloca i1, i1 0
-  %nop10409 = alloca i1, i1 0
-  %nop10410 = alloca i1, i1 0
-  %nop10411 = alloca i1, i1 0
-  %nop10412 = alloca i1, i1 0
-  %nop10413 = alloca i1, i1 0
-  %nop10414 = alloca i1, i1 0
-  %nop10415 = alloca i1, i1 0
-  %nop10416 = alloca i1, i1 0
-  %nop10417 = alloca i1, i1 0
-  %nop10418 = alloca i1, i1 0
-  %nop10419 = alloca i1, i1 0
-  %nop10420 = alloca i1, i1 0
-  %nop10421 = alloca i1, i1 0
-  %nop10422 = alloca i1, i1 0
-  %nop10423 = alloca i1, i1 0
-  %nop10424 = alloca i1, i1 0
-  %nop10425 = alloca i1, i1 0
-  %nop10426 = alloca i1, i1 0
-  %nop10427 = alloca i1, i1 0
-  %nop10428 = alloca i1, i1 0
-  %nop10429 = alloca i1, i1 0
-  %nop10430 = alloca i1, i1 0
-  %nop10431 = alloca i1, i1 0
-  %nop10432 = alloca i1, i1 0
-  %nop10433 = alloca i1, i1 0
-  %nop10434 = alloca i1, i1 0
-  %nop10435 = alloca i1, i1 0
-  %nop10436 = alloca i1, i1 0
-  %nop10437 = alloca i1, i1 0
-  %nop10438 = alloca i1, i1 0
-  %nop10439 = alloca i1, i1 0
-  %nop10440 = alloca i1, i1 0
-  %nop10441 = alloca i1, i1 0
-  %nop10442 = alloca i1, i1 0
-  %nop10443 = alloca i1, i1 0
-  %nop10444 = alloca i1, i1 0
-  %nop10445 = alloca i1, i1 0
-  %nop10446 = alloca i1, i1 0
-  %nop10447 = alloca i1, i1 0
-  %nop10448 = alloca i1, i1 0
-  %nop10449 = alloca i1, i1 0
-  %nop10450 = alloca i1, i1 0
-  %nop10451 = alloca i1, i1 0
-  %nop10452 = alloca i1, i1 0
-  %nop10453 = alloca i1, i1 0
-  %nop10454 = alloca i1, i1 0
-  %nop10455 = alloca i1, i1 0
-  %nop10456 = alloca i1, i1 0
-  %nop10457 = alloca i1, i1 0
-  %nop10458 = alloca i1, i1 0
-  %nop10459 = alloca i1, i1 0
-  %nop10460 = alloca i1, i1 0
-  %nop10461 = alloca i1, i1 0
-  %nop10462 = alloca i1, i1 0
-  %nop10463 = alloca i1, i1 0
-  %nop10464 = alloca i1, i1 0
-  %nop10465 = alloca i1, i1 0
-  %nop10466 = alloca i1, i1 0
-  %nop10467 = alloca i1, i1 0
-  %nop10468 = alloca i1, i1 0
-  %nop10469 = alloca i1, i1 0
-  %nop10470 = alloca i1, i1 0
-  %nop10471 = alloca i1, i1 0
-  %nop10472 = alloca i1, i1 0
-  %nop10473 = alloca i1, i1 0
-  %nop10474 = alloca i1, i1 0
-  %nop10475 = alloca i1, i1 0
-  %nop10476 = alloca i1, i1 0
-  %nop10477 = alloca i1, i1 0
-  %nop10478 = alloca i1, i1 0
-  %nop10479 = alloca i1, i1 0
-  %nop10480 = alloca i1, i1 0
-  %nop10481 = alloca i1, i1 0
-  %nop10482 = alloca i1, i1 0
-  %nop10483 = alloca i1, i1 0
-  %nop10484 = alloca i1, i1 0
-  %nop10485 = alloca i1, i1 0
-  %nop10486 = alloca i1, i1 0
-  %nop10487 = alloca i1, i1 0
-  %nop10488 = alloca i1, i1 0
-  %nop10489 = alloca i1, i1 0
-  %nop10490 = alloca i1, i1 0
-  %nop10491 = alloca i1, i1 0
-  %nop10492 = alloca i1, i1 0
-  %nop10493 = alloca i1, i1 0
-  %nop10494 = alloca i1, i1 0
-  %nop10495 = alloca i1, i1 0
-  %nop10496 = alloca i1, i1 0
-  %nop10497 = alloca i1, i1 0
-  %nop10498 = alloca i1, i1 0
-  %nop10499 = alloca i1, i1 0
-  %nop10500 = alloca i1, i1 0
-  %nop10501 = alloca i1, i1 0
-  %nop10502 = alloca i1, i1 0
-  %nop10503 = alloca i1, i1 0
-  %nop10504 = alloca i1, i1 0
-  %nop10505 = alloca i1, i1 0
-  %nop10506 = alloca i1, i1 0
-  %nop10507 = alloca i1, i1 0
-  %nop10508 = alloca i1, i1 0
-  %nop10509 = alloca i1, i1 0
-  %nop10510 = alloca i1, i1 0
-  %nop10511 = alloca i1, i1 0
-  %nop10512 = alloca i1, i1 0
-  %nop10513 = alloca i1, i1 0
-  %nop10514 = alloca i1, i1 0
-  %nop10515 = alloca i1, i1 0
-  %nop10516 = alloca i1, i1 0
-  %nop10517 = alloca i1, i1 0
-  %nop10518 = alloca i1, i1 0
-  %nop10519 = alloca i1, i1 0
-  %nop10520 = alloca i1, i1 0
-  %nop10521 = alloca i1, i1 0
-  %nop10522 = alloca i1, i1 0
-  %nop10523 = alloca i1, i1 0
-  %nop10524 = alloca i1, i1 0
-  %nop10525 = alloca i1, i1 0
-  %nop10526 = alloca i1, i1 0
-  %nop10527 = alloca i1, i1 0
-  %nop10528 = alloca i1, i1 0
-  %nop10529 = alloca i1, i1 0
-  %nop10530 = alloca i1, i1 0
-  %nop10531 = alloca i1, i1 0
-  %nop10532 = alloca i1, i1 0
-  %nop10533 = alloca i1, i1 0
-  %nop10534 = alloca i1, i1 0
-  %nop10535 = alloca i1, i1 0
-  %nop10536 = alloca i1, i1 0
-  %nop10537 = alloca i1, i1 0
-  %nop10538 = alloca i1, i1 0
-  %nop10539 = alloca i1, i1 0
-  %nop10540 = alloca i1, i1 0
-  %nop10541 = alloca i1, i1 0
-  %nop10542 = alloca i1, i1 0
-  %nop10543 = alloca i1, i1 0
-  %nop10544 = alloca i1, i1 0
-  %nop10545 = alloca i1, i1 0
-  %nop10546 = alloca i1, i1 0
-  %nop10547 = alloca i1, i1 0
-  %nop10548 = alloca i1, i1 0
-  %nop10549 = alloca i1, i1 0
-  %nop10550 = alloca i1, i1 0
-  %nop10551 = alloca i1, i1 0
-  %nop10552 = alloca i1, i1 0
-  %nop10553 = alloca i1, i1 0
-  %nop10554 = alloca i1, i1 0
-  %nop10555 = alloca i1, i1 0
-  %nop10556 = alloca i1, i1 0
-  %nop10557 = alloca i1, i1 0
-  %nop10558 = alloca i1, i1 0
-  %nop10559 = alloca i1, i1 0
-  %nop10560 = alloca i1, i1 0
-  %nop10561 = alloca i1, i1 0
-  %nop10562 = alloca i1, i1 0
-  %nop10563 = alloca i1, i1 0
-  %nop10564 = alloca i1, i1 0
-  %nop10565 = alloca i1, i1 0
-  %nop10566 = alloca i1, i1 0
-  %nop10567 = alloca i1, i1 0
-  %nop10568 = alloca i1, i1 0
-  %nop10569 = alloca i1, i1 0
-  %nop10570 = alloca i1, i1 0
-  %nop10571 = alloca i1, i1 0
-  %nop10572 = alloca i1, i1 0
-  %nop10573 = alloca i1, i1 0
-  %nop10574 = alloca i1, i1 0
-  %nop10575 = alloca i1, i1 0
-  %nop10576 = alloca i1, i1 0
-  %nop10577 = alloca i1, i1 0
-  %nop10578 = alloca i1, i1 0
-  %nop10579 = alloca i1, i1 0
-  %nop10580 = alloca i1, i1 0
-  %nop10581 = alloca i1, i1 0
-  %nop10582 = alloca i1, i1 0
-  %nop10583 = alloca i1, i1 0
-  %nop10584 = alloca i1, i1 0
-  %nop10585 = alloca i1, i1 0
-  %nop10586 = alloca i1, i1 0
-  %nop10587 = alloca i1, i1 0
-  %nop10588 = alloca i1, i1 0
-  %nop10589 = alloca i1, i1 0
-  %nop10590 = alloca i1, i1 0
-  %nop10591 = alloca i1, i1 0
-  %nop10592 = alloca i1, i1 0
-  %nop10593 = alloca i1, i1 0
-  %nop10594 = alloca i1, i1 0
-  %nop10595 = alloca i1, i1 0
-  %nop10596 = alloca i1, i1 0
-  %nop10597 = alloca i1, i1 0
-  %nop10598 = alloca i1, i1 0
-  %nop10599 = alloca i1, i1 0
-  %nop10600 = alloca i1, i1 0
-  %nop10601 = alloca i1, i1 0
-  %nop10602 = alloca i1, i1 0
-  %nop10603 = alloca i1, i1 0
-  %nop10604 = alloca i1, i1 0
-  %nop10605 = alloca i1, i1 0
-  %nop10606 = alloca i1, i1 0
-  %nop10607 = alloca i1, i1 0
-  %nop10608 = alloca i1, i1 0
-  %nop10609 = alloca i1, i1 0
-  %nop10610 = alloca i1, i1 0
-  %nop10611 = alloca i1, i1 0
-  %nop10612 = alloca i1, i1 0
-  %nop10613 = alloca i1, i1 0
-  %nop10614 = alloca i1, i1 0
-  %nop10615 = alloca i1, i1 0
-  %nop10616 = alloca i1, i1 0
-  %nop10617 = alloca i1, i1 0
-  %nop10618 = alloca i1, i1 0
-  %nop10619 = alloca i1, i1 0
-  %nop10620 = alloca i1, i1 0
-  %nop10621 = alloca i1, i1 0
-  %nop10622 = alloca i1, i1 0
-  %nop10623 = alloca i1, i1 0
-  %nop10624 = alloca i1, i1 0
-  %nop10625 = alloca i1, i1 0
-  %nop10626 = alloca i1, i1 0
-  %nop10627 = alloca i1, i1 0
-  %nop10628 = alloca i1, i1 0
-  %nop10629 = alloca i1, i1 0
-  %nop10630 = alloca i1, i1 0
-  %nop10631 = alloca i1, i1 0
-  %nop10632 = alloca i1, i1 0
-  %nop10633 = alloca i1, i1 0
-  %nop10634 = alloca i1, i1 0
-  %nop10635 = alloca i1, i1 0
-  %nop10636 = alloca i1, i1 0
-  %nop10637 = alloca i1, i1 0
-  %nop10638 = alloca i1, i1 0
-  %nop10639 = alloca i1, i1 0
-  %nop10640 = alloca i1, i1 0
-  %nop10641 = alloca i1, i1 0
-  %nop10642 = alloca i1, i1 0
-  %nop10643 = alloca i1, i1 0
-  %nop10644 = alloca i1, i1 0
-  %nop10645 = alloca i1, i1 0
-  %nop10646 = alloca i1, i1 0
-  %nop10647 = alloca i1, i1 0
-  %nop10648 = alloca i1, i1 0
-  %nop10649 = alloca i1, i1 0
-  %nop10650 = alloca i1, i1 0
-  %nop10651 = alloca i1, i1 0
-  %nop10652 = alloca i1, i1 0
-  %nop10653 = alloca i1, i1 0
-  %nop10654 = alloca i1, i1 0
-  %nop10655 = alloca i1, i1 0
-  %nop10656 = alloca i1, i1 0
-  %nop10657 = alloca i1, i1 0
-  %nop10658 = alloca i1, i1 0
-  %nop10659 = alloca i1, i1 0
-  %nop10660 = alloca i1, i1 0
-  %nop10661 = alloca i1, i1 0
-  %nop10662 = alloca i1, i1 0
-  %nop10663 = alloca i1, i1 0
-  %nop10664 = alloca i1, i1 0
-  %nop10665 = alloca i1, i1 0
-  %nop10666 = alloca i1, i1 0
-  %nop10667 = alloca i1, i1 0
-  %nop10668 = alloca i1, i1 0
-  %nop10669 = alloca i1, i1 0
-  %nop10670 = alloca i1, i1 0
-  %nop10671 = alloca i1, i1 0
-  %nop10672 = alloca i1, i1 0
-  %nop10673 = alloca i1, i1 0
-  %nop10674 = alloca i1, i1 0
-  %nop10675 = alloca i1, i1 0
-  %nop10676 = alloca i1, i1 0
-  %nop10677 = alloca i1, i1 0
-  %nop10678 = alloca i1, i1 0
-  %nop10679 = alloca i1, i1 0
-  %nop10680 = alloca i1, i1 0
-  %nop10681 = alloca i1, i1 0
-  %nop10682 = alloca i1, i1 0
-  %nop10683 = alloca i1, i1 0
-  %nop10684 = alloca i1, i1 0
-  %nop10685 = alloca i1, i1 0
-  %nop10686 = alloca i1, i1 0
-  %nop10687 = alloca i1, i1 0
-  %nop10688 = alloca i1, i1 0
-  %nop10689 = alloca i1, i1 0
-  %nop10690 = alloca i1, i1 0
-  %nop10691 = alloca i1, i1 0
-  %nop10692 = alloca i1, i1 0
-  %nop10693 = alloca i1, i1 0
-  %nop10694 = alloca i1, i1 0
-  %nop10695 = alloca i1, i1 0
-  %nop10696 = alloca i1, i1 0
-  %nop10697 = alloca i1, i1 0
-  %nop10698 = alloca i1, i1 0
-  %nop10699 = alloca i1, i1 0
-  %nop10700 = alloca i1, i1 0
-  %nop10701 = alloca i1, i1 0
-  %nop10702 = alloca i1, i1 0
-  %nop10703 = alloca i1, i1 0
-  %nop10704 = alloca i1, i1 0
-  %nop10705 = alloca i1, i1 0
-  %nop10706 = alloca i1, i1 0
-  %nop10707 = alloca i1, i1 0
-  %nop10708 = alloca i1, i1 0
-  %nop10709 = alloca i1, i1 0
-  %nop10710 = alloca i1, i1 0
-  %nop10711 = alloca i1, i1 0
-  %nop10712 = alloca i1, i1 0
-  %nop10713 = alloca i1, i1 0
-  %nop10714 = alloca i1, i1 0
-  %nop10715 = alloca i1, i1 0
-  %nop10716 = alloca i1, i1 0
-  %nop10717 = alloca i1, i1 0
-  %nop10718 = alloca i1, i1 0
-  %nop10719 = alloca i1, i1 0
-  %nop10720 = alloca i1, i1 0
-  %nop10721 = alloca i1, i1 0
-  %nop10722 = alloca i1, i1 0
-  %nop10723 = alloca i1, i1 0
-  %nop10724 = alloca i1, i1 0
-  %nop10725 = alloca i1, i1 0
-  %nop10726 = alloca i1, i1 0
-  %nop10727 = alloca i1, i1 0
-  %nop10728 = alloca i1, i1 0
-  %nop10729 = alloca i1, i1 0
-  %nop10730 = alloca i1, i1 0
-  %nop10731 = alloca i1, i1 0
-  %nop10732 = alloca i1, i1 0
-  %nop10733 = alloca i1, i1 0
-  %nop10734 = alloca i1, i1 0
-  %nop10735 = alloca i1, i1 0
-  %nop10736 = alloca i1, i1 0
-  %nop10737 = alloca i1, i1 0
-  %nop10738 = alloca i1, i1 0
-  %nop10739 = alloca i1, i1 0
-  %nop10740 = alloca i1, i1 0
-  %nop10741 = alloca i1, i1 0
-  %nop10742 = alloca i1, i1 0
-  %nop10743 = alloca i1, i1 0
-  %nop10744 = alloca i1, i1 0
-  %nop10745 = alloca i1, i1 0
-  %nop10746 = alloca i1, i1 0
-  %nop10747 = alloca i1, i1 0
-  %nop10748 = alloca i1, i1 0
-  %nop10749 = alloca i1, i1 0
-  %nop10750 = alloca i1, i1 0
-  %nop10751 = alloca i1, i1 0
-  %nop10752 = alloca i1, i1 0
-  %nop10753 = alloca i1, i1 0
-  %nop10754 = alloca i1, i1 0
-  %nop10755 = alloca i1, i1 0
-  %nop10756 = alloca i1, i1 0
-  %nop10757 = alloca i1, i1 0
-  %nop10758 = alloca i1, i1 0
-  %nop10759 = alloca i1, i1 0
-  %nop10760 = alloca i1, i1 0
-  %nop10761 = alloca i1, i1 0
-  %nop10762 = alloca i1, i1 0
-  %nop10763 = alloca i1, i1 0
-  %nop10764 = alloca i1, i1 0
-  %nop10765 = alloca i1, i1 0
-  %nop10766 = alloca i1, i1 0
-  %nop10767 = alloca i1, i1 0
-  %nop10768 = alloca i1, i1 0
-  %nop10769 = alloca i1, i1 0
-  %nop10770 = alloca i1, i1 0
-  %nop10771 = alloca i1, i1 0
-  %nop10772 = alloca i1, i1 0
-  %nop10773 = alloca i1, i1 0
-  %nop10774 = alloca i1, i1 0
-  %nop10775 = alloca i1, i1 0
-  %nop10776 = alloca i1, i1 0
-  %nop10777 = alloca i1, i1 0
-  %nop10778 = alloca i1, i1 0
-  %nop10779 = alloca i1, i1 0
-  %nop10780 = alloca i1, i1 0
-  %nop10781 = alloca i1, i1 0
-  %nop10782 = alloca i1, i1 0
-  %nop10783 = alloca i1, i1 0
-  %nop10784 = alloca i1, i1 0
-  %nop10785 = alloca i1, i1 0
-  %nop10786 = alloca i1, i1 0
-  %nop10787 = alloca i1, i1 0
-  %nop10788 = alloca i1, i1 0
-  %nop10789 = alloca i1, i1 0
-  %nop10790 = alloca i1, i1 0
-  %nop10791 = alloca i1, i1 0
-  %nop10792 = alloca i1, i1 0
-  %nop10793 = alloca i1, i1 0
-  %nop10794 = alloca i1, i1 0
-  %nop10795 = alloca i1, i1 0
-  %nop10796 = alloca i1, i1 0
-  %nop10797 = alloca i1, i1 0
-  %nop10798 = alloca i1, i1 0
-  %nop10799 = alloca i1, i1 0
-  %nop10800 = alloca i1, i1 0
-  %nop10801 = alloca i1, i1 0
-  %nop10802 = alloca i1, i1 0
-  %nop10803 = alloca i1, i1 0
-  %nop10804 = alloca i1, i1 0
-  %nop10805 = alloca i1, i1 0
-  %nop10806 = alloca i1, i1 0
-  %nop10807 = alloca i1, i1 0
-  %nop10808 = alloca i1, i1 0
-  %nop10809 = alloca i1, i1 0
-  %nop10810 = alloca i1, i1 0
-  %nop10811 = alloca i1, i1 0
-  %nop10812 = alloca i1, i1 0
-  %nop10813 = alloca i1, i1 0
-  %nop10814 = alloca i1, i1 0
-  %nop10815 = alloca i1, i1 0
-  %nop10816 = alloca i1, i1 0
-  %nop10817 = alloca i1, i1 0
-  %nop10818 = alloca i1, i1 0
-  %nop10819 = alloca i1, i1 0
-  %nop10820 = alloca i1, i1 0
-  %nop10821 = alloca i1, i1 0
-  %nop10822 = alloca i1, i1 0
-  %nop10823 = alloca i1, i1 0
-  %nop10824 = alloca i1, i1 0
-  %nop10825 = alloca i1, i1 0
-  %nop10826 = alloca i1, i1 0
-  %nop10827 = alloca i1, i1 0
-  %nop10828 = alloca i1, i1 0
-  %nop10829 = alloca i1, i1 0
-  %nop10830 = alloca i1, i1 0
-  %nop10831 = alloca i1, i1 0
-  %nop10832 = alloca i1, i1 0
-  %nop10833 = alloca i1, i1 0
-  %nop10834 = alloca i1, i1 0
-  %nop10835 = alloca i1, i1 0
-  %nop10836 = alloca i1, i1 0
-  %nop10837 = alloca i1, i1 0
-  %nop10838 = alloca i1, i1 0
-  %nop10839 = alloca i1, i1 0
-  %nop10840 = alloca i1, i1 0
-  %nop10841 = alloca i1, i1 0
-  %nop10842 = alloca i1, i1 0
-  %nop10843 = alloca i1, i1 0
-  %nop10844 = alloca i1, i1 0
-  %nop10845 = alloca i1, i1 0
-  %nop10846 = alloca i1, i1 0
-  %nop10847 = alloca i1, i1 0
-  %nop10848 = alloca i1, i1 0
-  %nop10849 = alloca i1, i1 0
-  %nop10850 = alloca i1, i1 0
-  %nop10851 = alloca i1, i1 0
-  %nop10852 = alloca i1, i1 0
-  %nop10853 = alloca i1, i1 0
-  %nop10854 = alloca i1, i1 0
-  %nop10855 = alloca i1, i1 0
-  %nop10856 = alloca i1, i1 0
-  %nop10857 = alloca i1, i1 0
-  %nop10858 = alloca i1, i1 0
-  %nop10859 = alloca i1, i1 0
-  %nop10860 = alloca i1, i1 0
-  %nop10861 = alloca i1, i1 0
-  %nop10862 = alloca i1, i1 0
-  %nop10863 = alloca i1, i1 0
-  %nop10864 = alloca i1, i1 0
-  %nop10865 = alloca i1, i1 0
-  %nop10866 = alloca i1, i1 0
-  %nop10867 = alloca i1, i1 0
-  %nop10868 = alloca i1, i1 0
-  %nop10869 = alloca i1, i1 0
-  %nop10870 = alloca i1, i1 0
-  %nop10871 = alloca i1, i1 0
-  %nop10872 = alloca i1, i1 0
-  %nop10873 = alloca i1, i1 0
-  %nop10874 = alloca i1, i1 0
-  %nop10875 = alloca i1, i1 0
-  %nop10876 = alloca i1, i1 0
-  %nop10877 = alloca i1, i1 0
-  %nop10878 = alloca i1, i1 0
-  %nop10879 = alloca i1, i1 0
-  %nop10880 = alloca i1, i1 0
-  %nop10881 = alloca i1, i1 0
-  %nop10882 = alloca i1, i1 0
-  %nop10883 = alloca i1, i1 0
-  %nop10884 = alloca i1, i1 0
-  %nop10885 = alloca i1, i1 0
-  %nop10886 = alloca i1, i1 0
-  %nop10887 = alloca i1, i1 0
-  %nop10888 = alloca i1, i1 0
-  %nop10889 = alloca i1, i1 0
-  %nop10890 = alloca i1, i1 0
-  %nop10891 = alloca i1, i1 0
-  %nop10892 = alloca i1, i1 0
-  %nop10893 = alloca i1, i1 0
-  %nop10894 = alloca i1, i1 0
-  %nop10895 = alloca i1, i1 0
-  %nop10896 = alloca i1, i1 0
-  %nop10897 = alloca i1, i1 0
-  %nop10898 = alloca i1, i1 0
-  %nop10899 = alloca i1, i1 0
-  %nop10900 = alloca i1, i1 0
-  %nop10901 = alloca i1, i1 0
-  %nop10902 = alloca i1, i1 0
-  %nop10903 = alloca i1, i1 0
-  %nop10904 = alloca i1, i1 0
-  %nop10905 = alloca i1, i1 0
-  %nop10906 = alloca i1, i1 0
-  %nop10907 = alloca i1, i1 0
-  %nop10908 = alloca i1, i1 0
-  %nop10909 = alloca i1, i1 0
-  %nop10910 = alloca i1, i1 0
-  %nop10911 = alloca i1, i1 0
-  %nop10912 = alloca i1, i1 0
-  %nop10913 = alloca i1, i1 0
-  %nop10914 = alloca i1, i1 0
-  %nop10915 = alloca i1, i1 0
-  %nop10916 = alloca i1, i1 0
-  %nop10917 = alloca i1, i1 0
-  %nop10918 = alloca i1, i1 0
-  %nop10919 = alloca i1, i1 0
-  %nop10920 = alloca i1, i1 0
-  %nop10921 = alloca i1, i1 0
-  %nop10922 = alloca i1, i1 0
-  %nop10923 = alloca i1, i1 0
-  %nop10924 = alloca i1, i1 0
-  %nop10925 = alloca i1, i1 0
-  %nop10926 = alloca i1, i1 0
-  %nop10927 = alloca i1, i1 0
-  %nop10928 = alloca i1, i1 0
-  %nop10929 = alloca i1, i1 0
-  %nop10930 = alloca i1, i1 0
-  %nop10931 = alloca i1, i1 0
-  %nop10932 = alloca i1, i1 0
-  %nop10933 = alloca i1, i1 0
-  %nop10934 = alloca i1, i1 0
-  %nop10935 = alloca i1, i1 0
-  %nop10936 = alloca i1, i1 0
-  %nop10937 = alloca i1, i1 0
-  %nop10938 = alloca i1, i1 0
-  %nop10939 = alloca i1, i1 0
-  %nop10940 = alloca i1, i1 0
-  %nop10941 = alloca i1, i1 0
-  %nop10942 = alloca i1, i1 0
-  %nop10943 = alloca i1, i1 0
-  %nop10944 = alloca i1, i1 0
-  %nop10945 = alloca i1, i1 0
-  %nop10946 = alloca i1, i1 0
-  %nop10947 = alloca i1, i1 0
-  %nop10948 = alloca i1, i1 0
-  %nop10949 = alloca i1, i1 0
-  %nop10950 = alloca i1, i1 0
-  %nop10951 = alloca i1, i1 0
-  %nop10952 = alloca i1, i1 0
-  %nop10953 = alloca i1, i1 0
-  %nop10954 = alloca i1, i1 0
-  %nop10955 = alloca i1, i1 0
-  %nop10956 = alloca i1, i1 0
-  %nop10957 = alloca i1, i1 0
-  %nop10958 = alloca i1, i1 0
-  %nop10959 = alloca i1, i1 0
-  %nop10960 = alloca i1, i1 0
-  %nop10961 = alloca i1, i1 0
-  %nop10962 = alloca i1, i1 0
-  %nop10963 = alloca i1, i1 0
-  %nop10964 = alloca i1, i1 0
-  %nop10965 = alloca i1, i1 0
-  %nop10966 = alloca i1, i1 0
-  %nop10967 = alloca i1, i1 0
-  %nop10968 = alloca i1, i1 0
-  %nop10969 = alloca i1, i1 0
-  %nop10970 = alloca i1, i1 0
-  %nop10971 = alloca i1, i1 0
-  %nop10972 = alloca i1, i1 0
-  %nop10973 = alloca i1, i1 0
-  %nop10974 = alloca i1, i1 0
-  %nop10975 = alloca i1, i1 0
-  %nop10976 = alloca i1, i1 0
-  %nop10977 = alloca i1, i1 0
-  %nop10978 = alloca i1, i1 0
-  %nop10979 = alloca i1, i1 0
-  %nop10980 = alloca i1, i1 0
-  %nop10981 = alloca i1, i1 0
-  %nop10982 = alloca i1, i1 0
-  %nop10983 = alloca i1, i1 0
-  %nop10984 = alloca i1, i1 0
-  %nop10985 = alloca i1, i1 0
-  %nop10986 = alloca i1, i1 0
-  %nop10987 = alloca i1, i1 0
-  %nop10988 = alloca i1, i1 0
-  %nop10989 = alloca i1, i1 0
-  %nop10990 = alloca i1, i1 0
-  %nop10991 = alloca i1, i1 0
-  %nop10992 = alloca i1, i1 0
-  %nop10993 = alloca i1, i1 0
-  %nop10994 = alloca i1, i1 0
-  %nop10995 = alloca i1, i1 0
-  %nop10996 = alloca i1, i1 0
-  %nop10997 = alloca i1, i1 0
-  %nop10998 = alloca i1, i1 0
-  %nop10999 = alloca i1, i1 0
-  %nop11000 = alloca i1, i1 0
-  %nop11001 = alloca i1, i1 0
-  %nop11002 = alloca i1, i1 0
-  %nop11003 = alloca i1, i1 0
-  %nop11004 = alloca i1, i1 0
-  %nop11005 = alloca i1, i1 0
-  %nop11006 = alloca i1, i1 0
-  %nop11007 = alloca i1, i1 0
-  %nop11008 = alloca i1, i1 0
-  %nop11009 = alloca i1, i1 0
-  %nop11010 = alloca i1, i1 0
-  %nop11011 = alloca i1, i1 0
-  %nop11012 = alloca i1, i1 0
-  %nop11013 = alloca i1, i1 0
-  %nop11014 = alloca i1, i1 0
-  %nop11015 = alloca i1, i1 0
-  %nop11016 = alloca i1, i1 0
-  %nop11017 = alloca i1, i1 0
-  %nop11018 = alloca i1, i1 0
-  %nop11019 = alloca i1, i1 0
-  %nop11020 = alloca i1, i1 0
-  %nop11021 = alloca i1, i1 0
-  %nop11022 = alloca i1, i1 0
-  %nop11023 = alloca i1, i1 0
-  %nop11024 = alloca i1, i1 0
-  %nop11025 = alloca i1, i1 0
-  %nop11026 = alloca i1, i1 0
-  %nop11027 = alloca i1, i1 0
-  %nop11028 = alloca i1, i1 0
-  %nop11029 = alloca i1, i1 0
-  %nop11030 = alloca i1, i1 0
-  %nop11031 = alloca i1, i1 0
-  %nop11032 = alloca i1, i1 0
-  %nop11033 = alloca i1, i1 0
-  %nop11034 = alloca i1, i1 0
-  %nop11035 = alloca i1, i1 0
-  %nop11036 = alloca i1, i1 0
-  %nop11037 = alloca i1, i1 0
-  %nop11038 = alloca i1, i1 0
-  %nop11039 = alloca i1, i1 0
-  %nop11040 = alloca i1, i1 0
-  %nop11041 = alloca i1, i1 0
-  %nop11042 = alloca i1, i1 0
-  %nop11043 = alloca i1, i1 0
-  %nop11044 = alloca i1, i1 0
-  %nop11045 = alloca i1, i1 0
-  %nop11046 = alloca i1, i1 0
-  %nop11047 = alloca i1, i1 0
-  %nop11048 = alloca i1, i1 0
-  %nop11049 = alloca i1, i1 0
-  %nop11050 = alloca i1, i1 0
-  %nop11051 = alloca i1, i1 0
-  %nop11052 = alloca i1, i1 0
-  %nop11053 = alloca i1, i1 0
-  %nop11054 = alloca i1, i1 0
-  %nop11055 = alloca i1, i1 0
-  %nop11056 = alloca i1, i1 0
-  %nop11057 = alloca i1, i1 0
-  %nop11058 = alloca i1, i1 0
-  %nop11059 = alloca i1, i1 0
-  %nop11060 = alloca i1, i1 0
-  %nop11061 = alloca i1, i1 0
-  %nop11062 = alloca i1, i1 0
-  %nop11063 = alloca i1, i1 0
-  %nop11064 = alloca i1, i1 0
-  %nop11065 = alloca i1, i1 0
-  %nop11066 = alloca i1, i1 0
-  %nop11067 = alloca i1, i1 0
-  %nop11068 = alloca i1, i1 0
-  %nop11069 = alloca i1, i1 0
-  %nop11070 = alloca i1, i1 0
-  %nop11071 = alloca i1, i1 0
-  %nop11072 = alloca i1, i1 0
-  %nop11073 = alloca i1, i1 0
-  %nop11074 = alloca i1, i1 0
-  %nop11075 = alloca i1, i1 0
-  %nop11076 = alloca i1, i1 0
-  %nop11077 = alloca i1, i1 0
-  %nop11078 = alloca i1, i1 0
-  %nop11079 = alloca i1, i1 0
-  %nop11080 = alloca i1, i1 0
-  %nop11081 = alloca i1, i1 0
-  %nop11082 = alloca i1, i1 0
-  %nop11083 = alloca i1, i1 0
-  %nop11084 = alloca i1, i1 0
-  %nop11085 = alloca i1, i1 0
-  %nop11086 = alloca i1, i1 0
-  %nop11087 = alloca i1, i1 0
-  %nop11088 = alloca i1, i1 0
-  %nop11089 = alloca i1, i1 0
-  %nop11090 = alloca i1, i1 0
-  %nop11091 = alloca i1, i1 0
-  %nop11092 = alloca i1, i1 0
-  %nop11093 = alloca i1, i1 0
-  %nop11094 = alloca i1, i1 0
-  %nop11095 = alloca i1, i1 0
-  %nop11096 = alloca i1, i1 0
-  %nop11097 = alloca i1, i1 0
-  %nop11098 = alloca i1, i1 0
-  %nop11099 = alloca i1, i1 0
-  %nop11100 = alloca i1, i1 0
-  %nop11101 = alloca i1, i1 0
-  %nop11102 = alloca i1, i1 0
-  %nop11103 = alloca i1, i1 0
-  %nop11104 = alloca i1, i1 0
-  %nop11105 = alloca i1, i1 0
-  %nop11106 = alloca i1, i1 0
-  %nop11107 = alloca i1, i1 0
-  %nop11108 = alloca i1, i1 0
-  %nop11109 = alloca i1, i1 0
-  %nop11110 = alloca i1, i1 0
-  %nop11111 = alloca i1, i1 0
-  %nop11112 = alloca i1, i1 0
-  %nop11113 = alloca i1, i1 0
-  %nop11114 = alloca i1, i1 0
-  %nop11115 = alloca i1, i1 0
-  %nop11116 = alloca i1, i1 0
-  %nop11117 = alloca i1, i1 0
-  %nop11118 = alloca i1, i1 0
-  %nop11119 = alloca i1, i1 0
-  %nop11120 = alloca i1, i1 0
-  %nop11121 = alloca i1, i1 0
-  %nop11122 = alloca i1, i1 0
-  %nop11123 = alloca i1, i1 0
-  %nop11124 = alloca i1, i1 0
-  %nop11125 = alloca i1, i1 0
-  %nop11126 = alloca i1, i1 0
-  %nop11127 = alloca i1, i1 0
-  %nop11128 = alloca i1, i1 0
-  %nop11129 = alloca i1, i1 0
-  %nop11130 = alloca i1, i1 0
-  %nop11131 = alloca i1, i1 0
-  %nop11132 = alloca i1, i1 0
-  %nop11133 = alloca i1, i1 0
-  %nop11134 = alloca i1, i1 0
-  %nop11135 = alloca i1, i1 0
-  %nop11136 = alloca i1, i1 0
-  %nop11137 = alloca i1, i1 0
-  %nop11138 = alloca i1, i1 0
-  %nop11139 = alloca i1, i1 0
-  %nop11140 = alloca i1, i1 0
-  %nop11141 = alloca i1, i1 0
-  %nop11142 = alloca i1, i1 0
-  %nop11143 = alloca i1, i1 0
-  %nop11144 = alloca i1, i1 0
-  %nop11145 = alloca i1, i1 0
-  %nop11146 = alloca i1, i1 0
-  %nop11147 = alloca i1, i1 0
-  %nop11148 = alloca i1, i1 0
-  %nop11149 = alloca i1, i1 0
-  %nop11150 = alloca i1, i1 0
-  %nop11151 = alloca i1, i1 0
-  %nop11152 = alloca i1, i1 0
-  %nop11153 = alloca i1, i1 0
-  %nop11154 = alloca i1, i1 0
-  %nop11155 = alloca i1, i1 0
-  %nop11156 = alloca i1, i1 0
-  %nop11157 = alloca i1, i1 0
-  %nop11158 = alloca i1, i1 0
-  %nop11159 = alloca i1, i1 0
-  %nop11160 = alloca i1, i1 0
-  %nop11161 = alloca i1, i1 0
-  %nop11162 = alloca i1, i1 0
-  %nop11163 = alloca i1, i1 0
-  %nop11164 = alloca i1, i1 0
-  %nop11165 = alloca i1, i1 0
-  %nop11166 = alloca i1, i1 0
-  %nop11167 = alloca i1, i1 0
-  %nop11168 = alloca i1, i1 0
-  %nop11169 = alloca i1, i1 0
-  %nop11170 = alloca i1, i1 0
-  %nop11171 = alloca i1, i1 0
-  %nop11172 = alloca i1, i1 0
-  %nop11173 = alloca i1, i1 0
-  %nop11174 = alloca i1, i1 0
-  %nop11175 = alloca i1, i1 0
-  %nop11176 = alloca i1, i1 0
-  %nop11177 = alloca i1, i1 0
-  %nop11178 = alloca i1, i1 0
-  %nop11179 = alloca i1, i1 0
-  %nop11180 = alloca i1, i1 0
-  %nop11181 = alloca i1, i1 0
-  %nop11182 = alloca i1, i1 0
-  %nop11183 = alloca i1, i1 0
-  %nop11184 = alloca i1, i1 0
-  %nop11185 = alloca i1, i1 0
-  %nop11186 = alloca i1, i1 0
-  %nop11187 = alloca i1, i1 0
-  %nop11188 = alloca i1, i1 0
-  %nop11189 = alloca i1, i1 0
-  %nop11190 = alloca i1, i1 0
-  %nop11191 = alloca i1, i1 0
-  %nop11192 = alloca i1, i1 0
-  %nop11193 = alloca i1, i1 0
-  %nop11194 = alloca i1, i1 0
-  %nop11195 = alloca i1, i1 0
-  %nop11196 = alloca i1, i1 0
-  %nop11197 = alloca i1, i1 0
-  %nop11198 = alloca i1, i1 0
-  %nop11199 = alloca i1, i1 0
-  %nop11200 = alloca i1, i1 0
-  %nop11201 = alloca i1, i1 0
-  %nop11202 = alloca i1, i1 0
-  %nop11203 = alloca i1, i1 0
-  %nop11204 = alloca i1, i1 0
-  %nop11205 = alloca i1, i1 0
-  %nop11206 = alloca i1, i1 0
-  %nop11207 = alloca i1, i1 0
-  %nop11208 = alloca i1, i1 0
-  %nop11209 = alloca i1, i1 0
-  %nop11210 = alloca i1, i1 0
-  %nop11211 = alloca i1, i1 0
-  %nop11212 = alloca i1, i1 0
-  %nop11213 = alloca i1, i1 0
-  %nop11214 = alloca i1, i1 0
-  %nop11215 = alloca i1, i1 0
-  %nop11216 = alloca i1, i1 0
-  %nop11217 = alloca i1, i1 0
-  %nop11218 = alloca i1, i1 0
-  %nop11219 = alloca i1, i1 0
-  %nop11220 = alloca i1, i1 0
-  %nop11221 = alloca i1, i1 0
-  %nop11222 = alloca i1, i1 0
-  %nop11223 = alloca i1, i1 0
-  %nop11224 = alloca i1, i1 0
-  %nop11225 = alloca i1, i1 0
-  %nop11226 = alloca i1, i1 0
-  %nop11227 = alloca i1, i1 0
-  %nop11228 = alloca i1, i1 0
-  %nop11229 = alloca i1, i1 0
-  %nop11230 = alloca i1, i1 0
-  %nop11231 = alloca i1, i1 0
-  %nop11232 = alloca i1, i1 0
-  %nop11233 = alloca i1, i1 0
-  %nop11234 = alloca i1, i1 0
-  %nop11235 = alloca i1, i1 0
-  %nop11236 = alloca i1, i1 0
-  %nop11237 = alloca i1, i1 0
-  %nop11238 = alloca i1, i1 0
-  %nop11239 = alloca i1, i1 0
-  %nop11240 = alloca i1, i1 0
-  %nop11241 = alloca i1, i1 0
-  %nop11242 = alloca i1, i1 0
-  %nop11243 = alloca i1, i1 0
-  %nop11244 = alloca i1, i1 0
-  %nop11245 = alloca i1, i1 0
-  %nop11246 = alloca i1, i1 0
-  %nop11247 = alloca i1, i1 0
-  %nop11248 = alloca i1, i1 0
-  %nop11249 = alloca i1, i1 0
-  %nop11250 = alloca i1, i1 0
-  %nop11251 = alloca i1, i1 0
-  %nop11252 = alloca i1, i1 0
-  %nop11253 = alloca i1, i1 0
-  %nop11254 = alloca i1, i1 0
-  %nop11255 = alloca i1, i1 0
-  %nop11256 = alloca i1, i1 0
-  %nop11257 = alloca i1, i1 0
-  %nop11258 = alloca i1, i1 0
-  %nop11259 = alloca i1, i1 0
-  %nop11260 = alloca i1, i1 0
-  %nop11261 = alloca i1, i1 0
-  %nop11262 = alloca i1, i1 0
-  %nop11263 = alloca i1, i1 0
-  %nop11264 = alloca i1, i1 0
-  %nop11265 = alloca i1, i1 0
-  %nop11266 = alloca i1, i1 0
-  %nop11267 = alloca i1, i1 0
-  %nop11268 = alloca i1, i1 0
-  %nop11269 = alloca i1, i1 0
-  %nop11270 = alloca i1, i1 0
-  %nop11271 = alloca i1, i1 0
-  %nop11272 = alloca i1, i1 0
-  %nop11273 = alloca i1, i1 0
-  %nop11274 = alloca i1, i1 0
-  %nop11275 = alloca i1, i1 0
-  %nop11276 = alloca i1, i1 0
-  %nop11277 = alloca i1, i1 0
-  %nop11278 = alloca i1, i1 0
-  %nop11279 = alloca i1, i1 0
-  %nop11280 = alloca i1, i1 0
-  %nop11281 = alloca i1, i1 0
-  %nop11282 = alloca i1, i1 0
-  %nop11283 = alloca i1, i1 0
-  %nop11284 = alloca i1, i1 0
-  %nop11285 = alloca i1, i1 0
-  %nop11286 = alloca i1, i1 0
-  %nop11287 = alloca i1, i1 0
-  %nop11288 = alloca i1, i1 0
-  %nop11289 = alloca i1, i1 0
-  %nop11290 = alloca i1, i1 0
-  %nop11291 = alloca i1, i1 0
-  %nop11292 = alloca i1, i1 0
-  %nop11293 = alloca i1, i1 0
-  %nop11294 = alloca i1, i1 0
-  %nop11295 = alloca i1, i1 0
-  %nop11296 = alloca i1, i1 0
-  %nop11297 = alloca i1, i1 0
-  %nop11298 = alloca i1, i1 0
-  %nop11299 = alloca i1, i1 0
-  %nop11300 = alloca i1, i1 0
-  %nop11301 = alloca i1, i1 0
-  %nop11302 = alloca i1, i1 0
-  %nop11303 = alloca i1, i1 0
-  %nop11304 = alloca i1, i1 0
-  %nop11305 = alloca i1, i1 0
-  %nop11306 = alloca i1, i1 0
-  %nop11307 = alloca i1, i1 0
-  %nop11308 = alloca i1, i1 0
-  %nop11309 = alloca i1, i1 0
-  %nop11310 = alloca i1, i1 0
-  %nop11311 = alloca i1, i1 0
-  %nop11312 = alloca i1, i1 0
-  %nop11313 = alloca i1, i1 0
-  %nop11314 = alloca i1, i1 0
-  %nop11315 = alloca i1, i1 0
-  %nop11316 = alloca i1, i1 0
-  %nop11317 = alloca i1, i1 0
-  %nop11318 = alloca i1, i1 0
-  %nop11319 = alloca i1, i1 0
-  %nop11320 = alloca i1, i1 0
-  %nop11321 = alloca i1, i1 0
-  %nop11322 = alloca i1, i1 0
-  %nop11323 = alloca i1, i1 0
-  %nop11324 = alloca i1, i1 0
-  %nop11325 = alloca i1, i1 0
-  %nop11326 = alloca i1, i1 0
-  %nop11327 = alloca i1, i1 0
-  %nop11328 = alloca i1, i1 0
-  %nop11329 = alloca i1, i1 0
-  %nop11330 = alloca i1, i1 0
-  %nop11331 = alloca i1, i1 0
-  %nop11332 = alloca i1, i1 0
-  %nop11333 = alloca i1, i1 0
-  %nop11334 = alloca i1, i1 0
-  %nop11335 = alloca i1, i1 0
-  %nop11336 = alloca i1, i1 0
-  %nop11337 = alloca i1, i1 0
-  %nop11338 = alloca i1, i1 0
-  %nop11339 = alloca i1, i1 0
-  %nop11340 = alloca i1, i1 0
-  %nop11341 = alloca i1, i1 0
-  %nop11342 = alloca i1, i1 0
-  %nop11343 = alloca i1, i1 0
-  %nop11344 = alloca i1, i1 0
-  %nop11345 = alloca i1, i1 0
-  %nop11346 = alloca i1, i1 0
-  %nop11347 = alloca i1, i1 0
-  %nop11348 = alloca i1, i1 0
-  %nop11349 = alloca i1, i1 0
-  %nop11350 = alloca i1, i1 0
-  %nop11351 = alloca i1, i1 0
-  %nop11352 = alloca i1, i1 0
-  %nop11353 = alloca i1, i1 0
-  %nop11354 = alloca i1, i1 0
-  %nop11355 = alloca i1, i1 0
-  %nop11356 = alloca i1, i1 0
-  %nop11357 = alloca i1, i1 0
-  %nop11358 = alloca i1, i1 0
-  %nop11359 = alloca i1, i1 0
-  %nop11360 = alloca i1, i1 0
-  %nop11361 = alloca i1, i1 0
-  %nop11362 = alloca i1, i1 0
-  %nop11363 = alloca i1, i1 0
-  %nop11364 = alloca i1, i1 0
-  %nop11365 = alloca i1, i1 0
-  %nop11366 = alloca i1, i1 0
-  %nop11367 = alloca i1, i1 0
-  %nop11368 = alloca i1, i1 0
-  %nop11369 = alloca i1, i1 0
-  %nop11370 = alloca i1, i1 0
-  %nop11371 = alloca i1, i1 0
-  %nop11372 = alloca i1, i1 0
-  %nop11373 = alloca i1, i1 0
-  %nop11374 = alloca i1, i1 0
-  %nop11375 = alloca i1, i1 0
-  %nop11376 = alloca i1, i1 0
-  %nop11377 = alloca i1, i1 0
-  %nop11378 = alloca i1, i1 0
-  %nop11379 = alloca i1, i1 0
-  %nop11380 = alloca i1, i1 0
-  %nop11381 = alloca i1, i1 0
-  %nop11382 = alloca i1, i1 0
-  %nop11383 = alloca i1, i1 0
-  %nop11384 = alloca i1, i1 0
-  %nop11385 = alloca i1, i1 0
-  %nop11386 = alloca i1, i1 0
-  %nop11387 = alloca i1, i1 0
-  %nop11388 = alloca i1, i1 0
-  %nop11389 = alloca i1, i1 0
-  %nop11390 = alloca i1, i1 0
-  %nop11391 = alloca i1, i1 0
-  %nop11392 = alloca i1, i1 0
-  %nop11393 = alloca i1, i1 0
-  %nop11394 = alloca i1, i1 0
-  %nop11395 = alloca i1, i1 0
-  %nop11396 = alloca i1, i1 0
-  %nop11397 = alloca i1, i1 0
-  %nop11398 = alloca i1, i1 0
-  %nop11399 = alloca i1, i1 0
-  %nop11400 = alloca i1, i1 0
-  %nop11401 = alloca i1, i1 0
-  %nop11402 = alloca i1, i1 0
-  %nop11403 = alloca i1, i1 0
-  %nop11404 = alloca i1, i1 0
-  %nop11405 = alloca i1, i1 0
-  %nop11406 = alloca i1, i1 0
-  %nop11407 = alloca i1, i1 0
-  %nop11408 = alloca i1, i1 0
-  %nop11409 = alloca i1, i1 0
-  %nop11410 = alloca i1, i1 0
-  %nop11411 = alloca i1, i1 0
-  %nop11412 = alloca i1, i1 0
-  %nop11413 = alloca i1, i1 0
-  %nop11414 = alloca i1, i1 0
-  %nop11415 = alloca i1, i1 0
-  %nop11416 = alloca i1, i1 0
-  %nop11417 = alloca i1, i1 0
-  %nop11418 = alloca i1, i1 0
-  %nop11419 = alloca i1, i1 0
-  %nop11420 = alloca i1, i1 0
-  %nop11421 = alloca i1, i1 0
-  %nop11422 = alloca i1, i1 0
-  %nop11423 = alloca i1, i1 0
-  %nop11424 = alloca i1, i1 0
-  %nop11425 = alloca i1, i1 0
-  %nop11426 = alloca i1, i1 0
-  %nop11427 = alloca i1, i1 0
-  %nop11428 = alloca i1, i1 0
-  %nop11429 = alloca i1, i1 0
-  %nop11430 = alloca i1, i1 0
-  %nop11431 = alloca i1, i1 0
-  %nop11432 = alloca i1, i1 0
-  %nop11433 = alloca i1, i1 0
-  %nop11434 = alloca i1, i1 0
-  %nop11435 = alloca i1, i1 0
-  %nop11436 = alloca i1, i1 0
-  %nop11437 = alloca i1, i1 0
-  %nop11438 = alloca i1, i1 0
-  %nop11439 = alloca i1, i1 0
-  %nop11440 = alloca i1, i1 0
-  %nop11441 = alloca i1, i1 0
-  %nop11442 = alloca i1, i1 0
-  %nop11443 = alloca i1, i1 0
-  %nop11444 = alloca i1, i1 0
-  %nop11445 = alloca i1, i1 0
-  %nop11446 = alloca i1, i1 0
-  %nop11447 = alloca i1, i1 0
-  %nop11448 = alloca i1, i1 0
-  %nop11449 = alloca i1, i1 0
-  %nop11450 = alloca i1, i1 0
-  %nop11451 = alloca i1, i1 0
-  %nop11452 = alloca i1, i1 0
-  %nop11453 = alloca i1, i1 0
-  %nop11454 = alloca i1, i1 0
-  %nop11455 = alloca i1, i1 0
-  %nop11456 = alloca i1, i1 0
-  %nop11457 = alloca i1, i1 0
-  %nop11458 = alloca i1, i1 0
-  %nop11459 = alloca i1, i1 0
-  %nop11460 = alloca i1, i1 0
-  %nop11461 = alloca i1, i1 0
-  %nop11462 = alloca i1, i1 0
-  %nop11463 = alloca i1, i1 0
-  %nop11464 = alloca i1, i1 0
-  %nop11465 = alloca i1, i1 0
-  %nop11466 = alloca i1, i1 0
-  %nop11467 = alloca i1, i1 0
-  %nop11468 = alloca i1, i1 0
-  %nop11469 = alloca i1, i1 0
-  %nop11470 = alloca i1, i1 0
-  %nop11471 = alloca i1, i1 0
-  %nop11472 = alloca i1, i1 0
-  %nop11473 = alloca i1, i1 0
-  %nop11474 = alloca i1, i1 0
-  %nop11475 = alloca i1, i1 0
-  %nop11476 = alloca i1, i1 0
-  %nop11477 = alloca i1, i1 0
-  %nop11478 = alloca i1, i1 0
-  %nop11479 = alloca i1, i1 0
-  %nop11480 = alloca i1, i1 0
-  %nop11481 = alloca i1, i1 0
-  %nop11482 = alloca i1, i1 0
-  %nop11483 = alloca i1, i1 0
-  %nop11484 = alloca i1, i1 0
-  %nop11485 = alloca i1, i1 0
-  %nop11486 = alloca i1, i1 0
-  %nop11487 = alloca i1, i1 0
-  %nop11488 = alloca i1, i1 0
-  %nop11489 = alloca i1, i1 0
-  %nop11490 = alloca i1, i1 0
-  %nop11491 = alloca i1, i1 0
-  %nop11492 = alloca i1, i1 0
-  %nop11493 = alloca i1, i1 0
-  %nop11494 = alloca i1, i1 0
-  %nop11495 = alloca i1, i1 0
-  %nop11496 = alloca i1, i1 0
-  %nop11497 = alloca i1, i1 0
-  %nop11498 = alloca i1, i1 0
-  %nop11499 = alloca i1, i1 0
-  %nop11500 = alloca i1, i1 0
-  %nop11501 = alloca i1, i1 0
-  %nop11502 = alloca i1, i1 0
-  %nop11503 = alloca i1, i1 0
-  %nop11504 = alloca i1, i1 0
-  %nop11505 = alloca i1, i1 0
-  %nop11506 = alloca i1, i1 0
-  %nop11507 = alloca i1, i1 0
-  %nop11508 = alloca i1, i1 0
-  %nop11509 = alloca i1, i1 0
-  %nop11510 = alloca i1, i1 0
-  %nop11511 = alloca i1, i1 0
-  %nop11512 = alloca i1, i1 0
-  %nop11513 = alloca i1, i1 0
-  %nop11514 = alloca i1, i1 0
-  %nop11515 = alloca i1, i1 0
-  %nop11516 = alloca i1, i1 0
-  %nop11517 = alloca i1, i1 0
-  %nop11518 = alloca i1, i1 0
-  %nop11519 = alloca i1, i1 0
-  %nop11520 = alloca i1, i1 0
-  %nop11521 = alloca i1, i1 0
-  %nop11522 = alloca i1, i1 0
-  %nop11523 = alloca i1, i1 0
-  %nop11524 = alloca i1, i1 0
-  %nop11525 = alloca i1, i1 0
-  %nop11526 = alloca i1, i1 0
-  %nop11527 = alloca i1, i1 0
-  %nop11528 = alloca i1, i1 0
-  %nop11529 = alloca i1, i1 0
-  %nop11530 = alloca i1, i1 0
-  %nop11531 = alloca i1, i1 0
-  %nop11532 = alloca i1, i1 0
-  %nop11533 = alloca i1, i1 0
-  %nop11534 = alloca i1, i1 0
-  %nop11535 = alloca i1, i1 0
-  %nop11536 = alloca i1, i1 0
-  %nop11537 = alloca i1, i1 0
-  %nop11538 = alloca i1, i1 0
-  %nop11539 = alloca i1, i1 0
-  %nop11540 = alloca i1, i1 0
-  %nop11541 = alloca i1, i1 0
-  %nop11542 = alloca i1, i1 0
-  %nop11543 = alloca i1, i1 0
-  %nop11544 = alloca i1, i1 0
-  %nop11545 = alloca i1, i1 0
-  %nop11546 = alloca i1, i1 0
-  %nop11547 = alloca i1, i1 0
-  %nop11548 = alloca i1, i1 0
-  %nop11549 = alloca i1, i1 0
-  %nop11550 = alloca i1, i1 0
-  %nop11551 = alloca i1, i1 0
-  %nop11552 = alloca i1, i1 0
-  %nop11553 = alloca i1, i1 0
-  %nop11554 = alloca i1, i1 0
-  %nop11555 = alloca i1, i1 0
-  %nop11556 = alloca i1, i1 0
-  %nop11557 = alloca i1, i1 0
-  %nop11558 = alloca i1, i1 0
-  %nop11559 = alloca i1, i1 0
-  %nop11560 = alloca i1, i1 0
-  %nop11561 = alloca i1, i1 0
-  %nop11562 = alloca i1, i1 0
-  %nop11563 = alloca i1, i1 0
-  %nop11564 = alloca i1, i1 0
-  %nop11565 = alloca i1, i1 0
-  %nop11566 = alloca i1, i1 0
-  %nop11567 = alloca i1, i1 0
-  %nop11568 = alloca i1, i1 0
-  %nop11569 = alloca i1, i1 0
-  %nop11570 = alloca i1, i1 0
-  %nop11571 = alloca i1, i1 0
-  %nop11572 = alloca i1, i1 0
-  %nop11573 = alloca i1, i1 0
-  %nop11574 = alloca i1, i1 0
-  %nop11575 = alloca i1, i1 0
-  %nop11576 = alloca i1, i1 0
-  %nop11577 = alloca i1, i1 0
-  %nop11578 = alloca i1, i1 0
-  %nop11579 = alloca i1, i1 0
-  %nop11580 = alloca i1, i1 0
-  %nop11581 = alloca i1, i1 0
-  %nop11582 = alloca i1, i1 0
-  %nop11583 = alloca i1, i1 0
-  %nop11584 = alloca i1, i1 0
-  %nop11585 = alloca i1, i1 0
-  %nop11586 = alloca i1, i1 0
-  %nop11587 = alloca i1, i1 0
-  %nop11588 = alloca i1, i1 0
-  %nop11589 = alloca i1, i1 0
-  %nop11590 = alloca i1, i1 0
-  %nop11591 = alloca i1, i1 0
-  %nop11592 = alloca i1, i1 0
-  %nop11593 = alloca i1, i1 0
-  %nop11594 = alloca i1, i1 0
-  %nop11595 = alloca i1, i1 0
-  %nop11596 = alloca i1, i1 0
-  %nop11597 = alloca i1, i1 0
-  %nop11598 = alloca i1, i1 0
-  %nop11599 = alloca i1, i1 0
-  %nop11600 = alloca i1, i1 0
-  %nop11601 = alloca i1, i1 0
-  %nop11602 = alloca i1, i1 0
-  %nop11603 = alloca i1, i1 0
-  %nop11604 = alloca i1, i1 0
-  %nop11605 = alloca i1, i1 0
-  %nop11606 = alloca i1, i1 0
-  %nop11607 = alloca i1, i1 0
-  %nop11608 = alloca i1, i1 0
-  %nop11609 = alloca i1, i1 0
-  %nop11610 = alloca i1, i1 0
-  %nop11611 = alloca i1, i1 0
-  %nop11612 = alloca i1, i1 0
-  %nop11613 = alloca i1, i1 0
-  %nop11614 = alloca i1, i1 0
-  %nop11615 = alloca i1, i1 0
-  %nop11616 = alloca i1, i1 0
-  %nop11617 = alloca i1, i1 0
-  %nop11618 = alloca i1, i1 0
-  %nop11619 = alloca i1, i1 0
-  %nop11620 = alloca i1, i1 0
-  %nop11621 = alloca i1, i1 0
-  %nop11622 = alloca i1, i1 0
-  %nop11623 = alloca i1, i1 0
-  %nop11624 = alloca i1, i1 0
-  %nop11625 = alloca i1, i1 0
-  %nop11626 = alloca i1, i1 0
-  %nop11627 = alloca i1, i1 0
-  %nop11628 = alloca i1, i1 0
-  %nop11629 = alloca i1, i1 0
-  %nop11630 = alloca i1, i1 0
-  %nop11631 = alloca i1, i1 0
-  %nop11632 = alloca i1, i1 0
-  %nop11633 = alloca i1, i1 0
-  %nop11634 = alloca i1, i1 0
-  %nop11635 = alloca i1, i1 0
-  %nop11636 = alloca i1, i1 0
-  %nop11637 = alloca i1, i1 0
-  %nop11638 = alloca i1, i1 0
-  %nop11639 = alloca i1, i1 0
-  %nop11640 = alloca i1, i1 0
-  %nop11641 = alloca i1, i1 0
-  %nop11642 = alloca i1, i1 0
-  %nop11643 = alloca i1, i1 0
-  %nop11644 = alloca i1, i1 0
-  %nop11645 = alloca i1, i1 0
-  %nop11646 = alloca i1, i1 0
-  %nop11647 = alloca i1, i1 0
-  %nop11648 = alloca i1, i1 0
-  %nop11649 = alloca i1, i1 0
-  %nop11650 = alloca i1, i1 0
-  %nop11651 = alloca i1, i1 0
-  %nop11652 = alloca i1, i1 0
-  %nop11653 = alloca i1, i1 0
-  %nop11654 = alloca i1, i1 0
-  %nop11655 = alloca i1, i1 0
-  %nop11656 = alloca i1, i1 0
-  %nop11657 = alloca i1, i1 0
-  %nop11658 = alloca i1, i1 0
-  %nop11659 = alloca i1, i1 0
-  %nop11660 = alloca i1, i1 0
-  %nop11661 = alloca i1, i1 0
-  %nop11662 = alloca i1, i1 0
-  %nop11663 = alloca i1, i1 0
-  %nop11664 = alloca i1, i1 0
-  %nop11665 = alloca i1, i1 0
-  %nop11666 = alloca i1, i1 0
-  %nop11667 = alloca i1, i1 0
-  %nop11668 = alloca i1, i1 0
-  %nop11669 = alloca i1, i1 0
-  %nop11670 = alloca i1, i1 0
-  %nop11671 = alloca i1, i1 0
-  %nop11672 = alloca i1, i1 0
-  %nop11673 = alloca i1, i1 0
-  %nop11674 = alloca i1, i1 0
-  %nop11675 = alloca i1, i1 0
-  %nop11676 = alloca i1, i1 0
-  %nop11677 = alloca i1, i1 0
-  %nop11678 = alloca i1, i1 0
-  %nop11679 = alloca i1, i1 0
-  %nop11680 = alloca i1, i1 0
-  %nop11681 = alloca i1, i1 0
-  %nop11682 = alloca i1, i1 0
-  %nop11683 = alloca i1, i1 0
-  %nop11684 = alloca i1, i1 0
-  %nop11685 = alloca i1, i1 0
-  %nop11686 = alloca i1, i1 0
-  %nop11687 = alloca i1, i1 0
-  %nop11688 = alloca i1, i1 0
-  %nop11689 = alloca i1, i1 0
-  %nop11690 = alloca i1, i1 0
-  %nop11691 = alloca i1, i1 0
-  %nop11692 = alloca i1, i1 0
-  %nop11693 = alloca i1, i1 0
-  %nop11694 = alloca i1, i1 0
-  %nop11695 = alloca i1, i1 0
-  %nop11696 = alloca i1, i1 0
-  %nop11697 = alloca i1, i1 0
-  %nop11698 = alloca i1, i1 0
-  %nop11699 = alloca i1, i1 0
-  %nop11700 = alloca i1, i1 0
-  %nop11701 = alloca i1, i1 0
-  %nop11702 = alloca i1, i1 0
-  %nop11703 = alloca i1, i1 0
-  %nop11704 = alloca i1, i1 0
-  %nop11705 = alloca i1, i1 0
-  %nop11706 = alloca i1, i1 0
-  %nop11707 = alloca i1, i1 0
-  %nop11708 = alloca i1, i1 0
-  %nop11709 = alloca i1, i1 0
-  %nop11710 = alloca i1, i1 0
-  %nop11711 = alloca i1, i1 0
-  %nop11712 = alloca i1, i1 0
-  %nop11713 = alloca i1, i1 0
-  %nop11714 = alloca i1, i1 0
-  %nop11715 = alloca i1, i1 0
-  %nop11716 = alloca i1, i1 0
-  %nop11717 = alloca i1, i1 0
-  %nop11718 = alloca i1, i1 0
-  %nop11719 = alloca i1, i1 0
-  %nop11720 = alloca i1, i1 0
-  %nop11721 = alloca i1, i1 0
-  %nop11722 = alloca i1, i1 0
-  %nop11723 = alloca i1, i1 0
-  %nop11724 = alloca i1, i1 0
-  %nop11725 = alloca i1, i1 0
-  %nop11726 = alloca i1, i1 0
-  %nop11727 = alloca i1, i1 0
-  %nop11728 = alloca i1, i1 0
-  %nop11729 = alloca i1, i1 0
-  %nop11730 = alloca i1, i1 0
-  %nop11731 = alloca i1, i1 0
-  %nop11732 = alloca i1, i1 0
-  %nop11733 = alloca i1, i1 0
-  %nop11734 = alloca i1, i1 0
-  %nop11735 = alloca i1, i1 0
-  %nop11736 = alloca i1, i1 0
-  %nop11737 = alloca i1, i1 0
-  %nop11738 = alloca i1, i1 0
-  %nop11739 = alloca i1, i1 0
-  %nop11740 = alloca i1, i1 0
-  %nop11741 = alloca i1, i1 0
-  %nop11742 = alloca i1, i1 0
-  %nop11743 = alloca i1, i1 0
-  %nop11744 = alloca i1, i1 0
-  %nop11745 = alloca i1, i1 0
-  %nop11746 = alloca i1, i1 0
-  %nop11747 = alloca i1, i1 0
-  %nop11748 = alloca i1, i1 0
-  %nop11749 = alloca i1, i1 0
-  %nop11750 = alloca i1, i1 0
-  %nop11751 = alloca i1, i1 0
-  %nop11752 = alloca i1, i1 0
-  %nop11753 = alloca i1, i1 0
-  %nop11754 = alloca i1, i1 0
-  %nop11755 = alloca i1, i1 0
-  %nop11756 = alloca i1, i1 0
-  %nop11757 = alloca i1, i1 0
-  %nop11758 = alloca i1, i1 0
-  %nop11759 = alloca i1, i1 0
-  %nop11760 = alloca i1, i1 0
-  %nop11761 = alloca i1, i1 0
-  %nop11762 = alloca i1, i1 0
-  %nop11763 = alloca i1, i1 0
-  %nop11764 = alloca i1, i1 0
-  %nop11765 = alloca i1, i1 0
-  %nop11766 = alloca i1, i1 0
-  %nop11767 = alloca i1, i1 0
-  %nop11768 = alloca i1, i1 0
-  %nop11769 = alloca i1, i1 0
-  %nop11770 = alloca i1, i1 0
-  %nop11771 = alloca i1, i1 0
-  %nop11772 = alloca i1, i1 0
-  %nop11773 = alloca i1, i1 0
-  %nop11774 = alloca i1, i1 0
-  %nop11775 = alloca i1, i1 0
-  %nop11776 = alloca i1, i1 0
-  %nop11777 = alloca i1, i1 0
-  %nop11778 = alloca i1, i1 0
-  %nop11779 = alloca i1, i1 0
-  %nop11780 = alloca i1, i1 0
-  %nop11781 = alloca i1, i1 0
-  %nop11782 = alloca i1, i1 0
-  %nop11783 = alloca i1, i1 0
-  %nop11784 = alloca i1, i1 0
-  %nop11785 = alloca i1, i1 0
-  %nop11786 = alloca i1, i1 0
-  %nop11787 = alloca i1, i1 0
-  %nop11788 = alloca i1, i1 0
-  %nop11789 = alloca i1, i1 0
-  %nop11790 = alloca i1, i1 0
-  %nop11791 = alloca i1, i1 0
-  %nop11792 = alloca i1, i1 0
-  %nop11793 = alloca i1, i1 0
-  %nop11794 = alloca i1, i1 0
-  %nop11795 = alloca i1, i1 0
-  %nop11796 = alloca i1, i1 0
-  %nop11797 = alloca i1, i1 0
-  %nop11798 = alloca i1, i1 0
-  %nop11799 = alloca i1, i1 0
-  %nop11800 = alloca i1, i1 0
-  %nop11801 = alloca i1, i1 0
-  %nop11802 = alloca i1, i1 0
-  %nop11803 = alloca i1, i1 0
-  %nop11804 = alloca i1, i1 0
-  %nop11805 = alloca i1, i1 0
-  %nop11806 = alloca i1, i1 0
-  %nop11807 = alloca i1, i1 0
-  %nop11808 = alloca i1, i1 0
-  %nop11809 = alloca i1, i1 0
-  %nop11810 = alloca i1, i1 0
-  %nop11811 = alloca i1, i1 0
-  %nop11812 = alloca i1, i1 0
-  %nop11813 = alloca i1, i1 0
-  %nop11814 = alloca i1, i1 0
-  %nop11815 = alloca i1, i1 0
-  %nop11816 = alloca i1, i1 0
-  %nop11817 = alloca i1, i1 0
-  %nop11818 = alloca i1, i1 0
-  %nop11819 = alloca i1, i1 0
-  %nop11820 = alloca i1, i1 0
-  %nop11821 = alloca i1, i1 0
-  %nop11822 = alloca i1, i1 0
-  %nop11823 = alloca i1, i1 0
-  %nop11824 = alloca i1, i1 0
-  %nop11825 = alloca i1, i1 0
-  %nop11826 = alloca i1, i1 0
-  %nop11827 = alloca i1, i1 0
-  %nop11828 = alloca i1, i1 0
-  %nop11829 = alloca i1, i1 0
-  %nop11830 = alloca i1, i1 0
-  %nop11831 = alloca i1, i1 0
-  %nop11832 = alloca i1, i1 0
-  %nop11833 = alloca i1, i1 0
-  %nop11834 = alloca i1, i1 0
-  %nop11835 = alloca i1, i1 0
-  %nop11836 = alloca i1, i1 0
-  %nop11837 = alloca i1, i1 0
-  %nop11838 = alloca i1, i1 0
-  %nop11839 = alloca i1, i1 0
-  %nop11840 = alloca i1, i1 0
-  %nop11841 = alloca i1, i1 0
-  %nop11842 = alloca i1, i1 0
-  %nop11843 = alloca i1, i1 0
-  %nop11844 = alloca i1, i1 0
-  %nop11845 = alloca i1, i1 0
-  %nop11846 = alloca i1, i1 0
-  %nop11847 = alloca i1, i1 0
-  %nop11848 = alloca i1, i1 0
-  %nop11849 = alloca i1, i1 0
-  %nop11850 = alloca i1, i1 0
-  %nop11851 = alloca i1, i1 0
-  %nop11852 = alloca i1, i1 0
-  %nop11853 = alloca i1, i1 0
-  %nop11854 = alloca i1, i1 0
-  %nop11855 = alloca i1, i1 0
-  %nop11856 = alloca i1, i1 0
-  %nop11857 = alloca i1, i1 0
-  %nop11858 = alloca i1, i1 0
-  %nop11859 = alloca i1, i1 0
-  %nop11860 = alloca i1, i1 0
-  %nop11861 = alloca i1, i1 0
-  %nop11862 = alloca i1, i1 0
-  %nop11863 = alloca i1, i1 0
-  %nop11864 = alloca i1, i1 0
-  %nop11865 = alloca i1, i1 0
-  %nop11866 = alloca i1, i1 0
-  %nop11867 = alloca i1, i1 0
-  %nop11868 = alloca i1, i1 0
-  %nop11869 = alloca i1, i1 0
-  %nop11870 = alloca i1, i1 0
-  %nop11871 = alloca i1, i1 0
-  %nop11872 = alloca i1, i1 0
-  %nop11873 = alloca i1, i1 0
-  %nop11874 = alloca i1, i1 0
-  %nop11875 = alloca i1, i1 0
-  %nop11876 = alloca i1, i1 0
-  %nop11877 = alloca i1, i1 0
-  %nop11878 = alloca i1, i1 0
-  %nop11879 = alloca i1, i1 0
-  %nop11880 = alloca i1, i1 0
-  %nop11881 = alloca i1, i1 0
-  %nop11882 = alloca i1, i1 0
-  %nop11883 = alloca i1, i1 0
-  %nop11884 = alloca i1, i1 0
-  %nop11885 = alloca i1, i1 0
-  %nop11886 = alloca i1, i1 0
-  %nop11887 = alloca i1, i1 0
-  %nop11888 = alloca i1, i1 0
-  %nop11889 = alloca i1, i1 0
-  %nop11890 = alloca i1, i1 0
-  %nop11891 = alloca i1, i1 0
-  %nop11892 = alloca i1, i1 0
-  %nop11893 = alloca i1, i1 0
-  %nop11894 = alloca i1, i1 0
-  %nop11895 = alloca i1, i1 0
-  %nop11896 = alloca i1, i1 0
-  %nop11897 = alloca i1, i1 0
-  %nop11898 = alloca i1, i1 0
-  %nop11899 = alloca i1, i1 0
-  %nop11900 = alloca i1, i1 0
-  %nop11901 = alloca i1, i1 0
-  %nop11902 = alloca i1, i1 0
-  %nop11903 = alloca i1, i1 0
-  %nop11904 = alloca i1, i1 0
-  %nop11905 = alloca i1, i1 0
-  %nop11906 = alloca i1, i1 0
-  %nop11907 = alloca i1, i1 0
-  %nop11908 = alloca i1, i1 0
-  %nop11909 = alloca i1, i1 0
-  %nop11910 = alloca i1, i1 0
-  %nop11911 = alloca i1, i1 0
-  %nop11912 = alloca i1, i1 0
-  %nop11913 = alloca i1, i1 0
-  %nop11914 = alloca i1, i1 0
-  %nop11915 = alloca i1, i1 0
-  %nop11916 = alloca i1, i1 0
-  %nop11917 = alloca i1, i1 0
-  %nop11918 = alloca i1, i1 0
-  %nop11919 = alloca i1, i1 0
-  %nop11920 = alloca i1, i1 0
-  %nop11921 = alloca i1, i1 0
-  %nop11922 = alloca i1, i1 0
-  %nop11923 = alloca i1, i1 0
-  %nop11924 = alloca i1, i1 0
-  %nop11925 = alloca i1, i1 0
-  %nop11926 = alloca i1, i1 0
-  %nop11927 = alloca i1, i1 0
-  %nop11928 = alloca i1, i1 0
-  %nop11929 = alloca i1, i1 0
-  %nop11930 = alloca i1, i1 0
-  %nop11931 = alloca i1, i1 0
-  %nop11932 = alloca i1, i1 0
-  %nop11933 = alloca i1, i1 0
-  %nop11934 = alloca i1, i1 0
-  %nop11935 = alloca i1, i1 0
-  %nop11936 = alloca i1, i1 0
-  %nop11937 = alloca i1, i1 0
-  %nop11938 = alloca i1, i1 0
-  %nop11939 = alloca i1, i1 0
-  %nop11940 = alloca i1, i1 0
-  %nop11941 = alloca i1, i1 0
-  %nop11942 = alloca i1, i1 0
-  %nop11943 = alloca i1, i1 0
-  %nop11944 = alloca i1, i1 0
-  %nop11945 = alloca i1, i1 0
-  %nop11946 = alloca i1, i1 0
-  %nop11947 = alloca i1, i1 0
-  %nop11948 = alloca i1, i1 0
-  %nop11949 = alloca i1, i1 0
-  %nop11950 = alloca i1, i1 0
-  %nop11951 = alloca i1, i1 0
-  %nop11952 = alloca i1, i1 0
-  %nop11953 = alloca i1, i1 0
-  %nop11954 = alloca i1, i1 0
-  %nop11955 = alloca i1, i1 0
-  %nop11956 = alloca i1, i1 0
-  %nop11957 = alloca i1, i1 0
-  %nop11958 = alloca i1, i1 0
-  %nop11959 = alloca i1, i1 0
-  %nop11960 = alloca i1, i1 0
-  %nop11961 = alloca i1, i1 0
-  %nop11962 = alloca i1, i1 0
-  %nop11963 = alloca i1, i1 0
-  %nop11964 = alloca i1, i1 0
-  %nop11965 = alloca i1, i1 0
-  %nop11966 = alloca i1, i1 0
-  %nop11967 = alloca i1, i1 0
-  %nop11968 = alloca i1, i1 0
-  %nop11969 = alloca i1, i1 0
-  %nop11970 = alloca i1, i1 0
-  %nop11971 = alloca i1, i1 0
-  %nop11972 = alloca i1, i1 0
-  %nop11973 = alloca i1, i1 0
-  %nop11974 = alloca i1, i1 0
-  %nop11975 = alloca i1, i1 0
-  %nop11976 = alloca i1, i1 0
-  %nop11977 = alloca i1, i1 0
-  %nop11978 = alloca i1, i1 0
-  %nop11979 = alloca i1, i1 0
-  %nop11980 = alloca i1, i1 0
-  %nop11981 = alloca i1, i1 0
-  %nop11982 = alloca i1, i1 0
-  %nop11983 = alloca i1, i1 0
-  %nop11984 = alloca i1, i1 0
-  %nop11985 = alloca i1, i1 0
-  %nop11986 = alloca i1, i1 0
-  %nop11987 = alloca i1, i1 0
-  %nop11988 = alloca i1, i1 0
-  %nop11989 = alloca i1, i1 0
-  %nop11990 = alloca i1, i1 0
-  %nop11991 = alloca i1, i1 0
-  %nop11992 = alloca i1, i1 0
-  %nop11993 = alloca i1, i1 0
-  %nop11994 = alloca i1, i1 0
-  %nop11995 = alloca i1, i1 0
-  %nop11996 = alloca i1, i1 0
-  %nop11997 = alloca i1, i1 0
-  %nop11998 = alloca i1, i1 0
-  %nop11999 = alloca i1, i1 0
-  %nop12000 = alloca i1, i1 0
-  %nop12001 = alloca i1, i1 0
-  %nop12002 = alloca i1, i1 0
-  %nop12003 = alloca i1, i1 0
-  %nop12004 = alloca i1, i1 0
-  %nop12005 = alloca i1, i1 0
-  %nop12006 = alloca i1, i1 0
-  %nop12007 = alloca i1, i1 0
-  %nop12008 = alloca i1, i1 0
-  %nop12009 = alloca i1, i1 0
-  %nop12010 = alloca i1, i1 0
-  %nop12011 = alloca i1, i1 0
-  %nop12012 = alloca i1, i1 0
-  %nop12013 = alloca i1, i1 0
-  %nop12014 = alloca i1, i1 0
-  %nop12015 = alloca i1, i1 0
-  %nop12016 = alloca i1, i1 0
-  %nop12017 = alloca i1, i1 0
-  %nop12018 = alloca i1, i1 0
-  %nop12019 = alloca i1, i1 0
-  %nop12020 = alloca i1, i1 0
-  %nop12021 = alloca i1, i1 0
-  %nop12022 = alloca i1, i1 0
-  %nop12023 = alloca i1, i1 0
-  %nop12024 = alloca i1, i1 0
-  %nop12025 = alloca i1, i1 0
-  %nop12026 = alloca i1, i1 0
-  %nop12027 = alloca i1, i1 0
-  %nop12028 = alloca i1, i1 0
-  %nop12029 = alloca i1, i1 0
-  %nop12030 = alloca i1, i1 0
-  %nop12031 = alloca i1, i1 0
-  %nop12032 = alloca i1, i1 0
-  %nop12033 = alloca i1, i1 0
-  %nop12034 = alloca i1, i1 0
-  %nop12035 = alloca i1, i1 0
-  %nop12036 = alloca i1, i1 0
-  %nop12037 = alloca i1, i1 0
-  %nop12038 = alloca i1, i1 0
-  %nop12039 = alloca i1, i1 0
-  %nop12040 = alloca i1, i1 0
-  %nop12041 = alloca i1, i1 0
-  %nop12042 = alloca i1, i1 0
-  %nop12043 = alloca i1, i1 0
-  %nop12044 = alloca i1, i1 0
-  %nop12045 = alloca i1, i1 0
-  %nop12046 = alloca i1, i1 0
-  %nop12047 = alloca i1, i1 0
-  %nop12048 = alloca i1, i1 0
-  %nop12049 = alloca i1, i1 0
-  %nop12050 = alloca i1, i1 0
-  %nop12051 = alloca i1, i1 0
-  %nop12052 = alloca i1, i1 0
-  %nop12053 = alloca i1, i1 0
-  %nop12054 = alloca i1, i1 0
-  %nop12055 = alloca i1, i1 0
-  %nop12056 = alloca i1, i1 0
-  %nop12057 = alloca i1, i1 0
-  %nop12058 = alloca i1, i1 0
-  %nop12059 = alloca i1, i1 0
-  %nop12060 = alloca i1, i1 0
-  %nop12061 = alloca i1, i1 0
-  %nop12062 = alloca i1, i1 0
-  %nop12063 = alloca i1, i1 0
-  %nop12064 = alloca i1, i1 0
-  %nop12065 = alloca i1, i1 0
-  %nop12066 = alloca i1, i1 0
-  %nop12067 = alloca i1, i1 0
-  %nop12068 = alloca i1, i1 0
-  %nop12069 = alloca i1, i1 0
-  %nop12070 = alloca i1, i1 0
-  %nop12071 = alloca i1, i1 0
-  %nop12072 = alloca i1, i1 0
-  %nop12073 = alloca i1, i1 0
-  %nop12074 = alloca i1, i1 0
-  %nop12075 = alloca i1, i1 0
-  %nop12076 = alloca i1, i1 0
-  %nop12077 = alloca i1, i1 0
-  %nop12078 = alloca i1, i1 0
-  %nop12079 = alloca i1, i1 0
-  %nop12080 = alloca i1, i1 0
-  %nop12081 = alloca i1, i1 0
-  %nop12082 = alloca i1, i1 0
-  %nop12083 = alloca i1, i1 0
-  %nop12084 = alloca i1, i1 0
-  %nop12085 = alloca i1, i1 0
-  %nop12086 = alloca i1, i1 0
-  %nop12087 = alloca i1, i1 0
-  %nop12088 = alloca i1, i1 0
-  %nop12089 = alloca i1, i1 0
-  %nop12090 = alloca i1, i1 0
-  %nop12091 = alloca i1, i1 0
-  %nop12092 = alloca i1, i1 0
-  %nop12093 = alloca i1, i1 0
-  %nop12094 = alloca i1, i1 0
-  %nop12095 = alloca i1, i1 0
-  %nop12096 = alloca i1, i1 0
-  %nop12097 = alloca i1, i1 0
-  %nop12098 = alloca i1, i1 0
-  %nop12099 = alloca i1, i1 0
-  %nop12100 = alloca i1, i1 0
-  %nop12101 = alloca i1, i1 0
-  %nop12102 = alloca i1, i1 0
-  %nop12103 = alloca i1, i1 0
-  %nop12104 = alloca i1, i1 0
-  %nop12105 = alloca i1, i1 0
-  %nop12106 = alloca i1, i1 0
-  %nop12107 = alloca i1, i1 0
-  %nop12108 = alloca i1, i1 0
-  %nop12109 = alloca i1, i1 0
-  %nop12110 = alloca i1, i1 0
-  %nop12111 = alloca i1, i1 0
-  %nop12112 = alloca i1, i1 0
-  %nop12113 = alloca i1, i1 0
-  %nop12114 = alloca i1, i1 0
-  %nop12115 = alloca i1, i1 0
-  %nop12116 = alloca i1, i1 0
-  %nop12117 = alloca i1, i1 0
-  %nop12118 = alloca i1, i1 0
-  %nop12119 = alloca i1, i1 0
-  %nop12120 = alloca i1, i1 0
-  %nop12121 = alloca i1, i1 0
-  %nop12122 = alloca i1, i1 0
-  %nop12123 = alloca i1, i1 0
-  %nop12124 = alloca i1, i1 0
-  %nop12125 = alloca i1, i1 0
-  %nop12126 = alloca i1, i1 0
-  %nop12127 = alloca i1, i1 0
-  %nop12128 = alloca i1, i1 0
-  %nop12129 = alloca i1, i1 0
-  %nop12130 = alloca i1, i1 0
-  %nop12131 = alloca i1, i1 0
-  %nop12132 = alloca i1, i1 0
-  %nop12133 = alloca i1, i1 0
-  %nop12134 = alloca i1, i1 0
-  %nop12135 = alloca i1, i1 0
-  %nop12136 = alloca i1, i1 0
-  %nop12137 = alloca i1, i1 0
-  %nop12138 = alloca i1, i1 0
-  %nop12139 = alloca i1, i1 0
-  %nop12140 = alloca i1, i1 0
-  %nop12141 = alloca i1, i1 0
-  %nop12142 = alloca i1, i1 0
-  %nop12143 = alloca i1, i1 0
-  %nop12144 = alloca i1, i1 0
-  %nop12145 = alloca i1, i1 0
-  %nop12146 = alloca i1, i1 0
-  %nop12147 = alloca i1, i1 0
-  %nop12148 = alloca i1, i1 0
-  %nop12149 = alloca i1, i1 0
-  %nop12150 = alloca i1, i1 0
-  %nop12151 = alloca i1, i1 0
-  %nop12152 = alloca i1, i1 0
-  %nop12153 = alloca i1, i1 0
-  %nop12154 = alloca i1, i1 0
-  %nop12155 = alloca i1, i1 0
-  %nop12156 = alloca i1, i1 0
-  %nop12157 = alloca i1, i1 0
-  %nop12158 = alloca i1, i1 0
-  %nop12159 = alloca i1, i1 0
-  %nop12160 = alloca i1, i1 0
-  %nop12161 = alloca i1, i1 0
-  %nop12162 = alloca i1, i1 0
-  %nop12163 = alloca i1, i1 0
-  %nop12164 = alloca i1, i1 0
-  %nop12165 = alloca i1, i1 0
-  %nop12166 = alloca i1, i1 0
-  %nop12167 = alloca i1, i1 0
-  %nop12168 = alloca i1, i1 0
-  %nop12169 = alloca i1, i1 0
-  %nop12170 = alloca i1, i1 0
-  %nop12171 = alloca i1, i1 0
-  %nop12172 = alloca i1, i1 0
-  %nop12173 = alloca i1, i1 0
-  %nop12174 = alloca i1, i1 0
-  %nop12175 = alloca i1, i1 0
-  %nop12176 = alloca i1, i1 0
-  %nop12177 = alloca i1, i1 0
-  %nop12178 = alloca i1, i1 0
-  %nop12179 = alloca i1, i1 0
-  %nop12180 = alloca i1, i1 0
-  %nop12181 = alloca i1, i1 0
-  %nop12182 = alloca i1, i1 0
-  %nop12183 = alloca i1, i1 0
-  %nop12184 = alloca i1, i1 0
-  %nop12185 = alloca i1, i1 0
-  %nop12186 = alloca i1, i1 0
-  %nop12187 = alloca i1, i1 0
-  %nop12188 = alloca i1, i1 0
-  %nop12189 = alloca i1, i1 0
-  %nop12190 = alloca i1, i1 0
-  %nop12191 = alloca i1, i1 0
-  %nop12192 = alloca i1, i1 0
-  %nop12193 = alloca i1, i1 0
-  %nop12194 = alloca i1, i1 0
-  %nop12195 = alloca i1, i1 0
-  %nop12196 = alloca i1, i1 0
-  %nop12197 = alloca i1, i1 0
-  %nop12198 = alloca i1, i1 0
-  %nop12199 = alloca i1, i1 0
-  %nop12200 = alloca i1, i1 0
-  %nop12201 = alloca i1, i1 0
-  %nop12202 = alloca i1, i1 0
-  %nop12203 = alloca i1, i1 0
-  %nop12204 = alloca i1, i1 0
-  %nop12205 = alloca i1, i1 0
-  %nop12206 = alloca i1, i1 0
-  %nop12207 = alloca i1, i1 0
-  %nop12208 = alloca i1, i1 0
-  %nop12209 = alloca i1, i1 0
-  %nop12210 = alloca i1, i1 0
-  %nop12211 = alloca i1, i1 0
-  %nop12212 = alloca i1, i1 0
-  %nop12213 = alloca i1, i1 0
-  %nop12214 = alloca i1, i1 0
-  %nop12215 = alloca i1, i1 0
-  %nop12216 = alloca i1, i1 0
-  %nop12217 = alloca i1, i1 0
-  %nop12218 = alloca i1, i1 0
-  %nop12219 = alloca i1, i1 0
-  %nop12220 = alloca i1, i1 0
-  %nop12221 = alloca i1, i1 0
-  %nop12222 = alloca i1, i1 0
-  %nop12223 = alloca i1, i1 0
-  %nop12224 = alloca i1, i1 0
-  %nop12225 = alloca i1, i1 0
-  %nop12226 = alloca i1, i1 0
-  %nop12227 = alloca i1, i1 0
-  %nop12228 = alloca i1, i1 0
-  %nop12229 = alloca i1, i1 0
-  %nop12230 = alloca i1, i1 0
-  %nop12231 = alloca i1, i1 0
-  %nop12232 = alloca i1, i1 0
-  %nop12233 = alloca i1, i1 0
-  %nop12234 = alloca i1, i1 0
-  %nop12235 = alloca i1, i1 0
-  %nop12236 = alloca i1, i1 0
-  %nop12237 = alloca i1, i1 0
-  %nop12238 = alloca i1, i1 0
-  %nop12239 = alloca i1, i1 0
-  %nop12240 = alloca i1, i1 0
-  %nop12241 = alloca i1, i1 0
-  %nop12242 = alloca i1, i1 0
-  %nop12243 = alloca i1, i1 0
-  %nop12244 = alloca i1, i1 0
-  %nop12245 = alloca i1, i1 0
-  %nop12246 = alloca i1, i1 0
-  %nop12247 = alloca i1, i1 0
-  %nop12248 = alloca i1, i1 0
-  %nop12249 = alloca i1, i1 0
-  %nop12250 = alloca i1, i1 0
-  %nop12251 = alloca i1, i1 0
-  %nop12252 = alloca i1, i1 0
-  %nop12253 = alloca i1, i1 0
-  %nop12254 = alloca i1, i1 0
-  %nop12255 = alloca i1, i1 0
-  %nop12256 = alloca i1, i1 0
-  %nop12257 = alloca i1, i1 0
-  %nop12258 = alloca i1, i1 0
-  %nop12259 = alloca i1, i1 0
-  %nop12260 = alloca i1, i1 0
-  %nop12261 = alloca i1, i1 0
-  %nop12262 = alloca i1, i1 0
-  %nop12263 = alloca i1, i1 0
-  %nop12264 = alloca i1, i1 0
-  %nop12265 = alloca i1, i1 0
-  %nop12266 = alloca i1, i1 0
-  %nop12267 = alloca i1, i1 0
-  %nop12268 = alloca i1, i1 0
-  %nop12269 = alloca i1, i1 0
-  %nop12270 = alloca i1, i1 0
-  %nop12271 = alloca i1, i1 0
-  %nop12272 = alloca i1, i1 0
-  %nop12273 = alloca i1, i1 0
-  %nop12274 = alloca i1, i1 0
-  %nop12275 = alloca i1, i1 0
-  %nop12276 = alloca i1, i1 0
-  %nop12277 = alloca i1, i1 0
-  %nop12278 = alloca i1, i1 0
-  %nop12279 = alloca i1, i1 0
-  %nop12280 = alloca i1, i1 0
-  %nop12281 = alloca i1, i1 0
-  %nop12282 = alloca i1, i1 0
-  %nop12283 = alloca i1, i1 0
-  %nop12284 = alloca i1, i1 0
-  %nop12285 = alloca i1, i1 0
-  %nop12286 = alloca i1, i1 0
-  %nop12287 = alloca i1, i1 0
-  %nop12288 = alloca i1, i1 0
-  %nop12289 = alloca i1, i1 0
-  %nop12290 = alloca i1, i1 0
-  %nop12291 = alloca i1, i1 0
-  %nop12292 = alloca i1, i1 0
-  %nop12293 = alloca i1, i1 0
-  %nop12294 = alloca i1, i1 0
-  %nop12295 = alloca i1, i1 0
-  %nop12296 = alloca i1, i1 0
-  %nop12297 = alloca i1, i1 0
-  %nop12298 = alloca i1, i1 0
-  %nop12299 = alloca i1, i1 0
-  %nop12300 = alloca i1, i1 0
-  %nop12301 = alloca i1, i1 0
-  %nop12302 = alloca i1, i1 0
-  %nop12303 = alloca i1, i1 0
-  %nop12304 = alloca i1, i1 0
-  %nop12305 = alloca i1, i1 0
-  %nop12306 = alloca i1, i1 0
-  %nop12307 = alloca i1, i1 0
-  %nop12308 = alloca i1, i1 0
-  %nop12309 = alloca i1, i1 0
-  %nop12310 = alloca i1, i1 0
-  %nop12311 = alloca i1, i1 0
-  %nop12312 = alloca i1, i1 0
-  %nop12313 = alloca i1, i1 0
-  %nop12314 = alloca i1, i1 0
-  %nop12315 = alloca i1, i1 0
-  %nop12316 = alloca i1, i1 0
-  %nop12317 = alloca i1, i1 0
-  %nop12318 = alloca i1, i1 0
-  %nop12319 = alloca i1, i1 0
-  %nop12320 = alloca i1, i1 0
-  %nop12321 = alloca i1, i1 0
-  %nop12322 = alloca i1, i1 0
-  %nop12323 = alloca i1, i1 0
-  %nop12324 = alloca i1, i1 0
-  %nop12325 = alloca i1, i1 0
-  %nop12326 = alloca i1, i1 0
-  %nop12327 = alloca i1, i1 0
-  %nop12328 = alloca i1, i1 0
-  %nop12329 = alloca i1, i1 0
-  %nop12330 = alloca i1, i1 0
-  %nop12331 = alloca i1, i1 0
-  %nop12332 = alloca i1, i1 0
-  %nop12333 = alloca i1, i1 0
-  %nop12334 = alloca i1, i1 0
-  %nop12335 = alloca i1, i1 0
-  %nop12336 = alloca i1, i1 0
-  %nop12337 = alloca i1, i1 0
-  %nop12338 = alloca i1, i1 0
-  %nop12339 = alloca i1, i1 0
-  %nop12340 = alloca i1, i1 0
-  %nop12341 = alloca i1, i1 0
-  %nop12342 = alloca i1, i1 0
-  %nop12343 = alloca i1, i1 0
-  %nop12344 = alloca i1, i1 0
-  %nop12345 = alloca i1, i1 0
-  %nop12346 = alloca i1, i1 0
-  %nop12347 = alloca i1, i1 0
-  %nop12348 = alloca i1, i1 0
-  %nop12349 = alloca i1, i1 0
-  %nop12350 = alloca i1, i1 0
-  %nop12351 = alloca i1, i1 0
-  %nop12352 = alloca i1, i1 0
-  %nop12353 = alloca i1, i1 0
-  %nop12354 = alloca i1, i1 0
-  %nop12355 = alloca i1, i1 0
-  %nop12356 = alloca i1, i1 0
-  %nop12357 = alloca i1, i1 0
-  %nop12358 = alloca i1, i1 0
-  %nop12359 = alloca i1, i1 0
-  %nop12360 = alloca i1, i1 0
-  %nop12361 = alloca i1, i1 0
-  %nop12362 = alloca i1, i1 0
-  %nop12363 = alloca i1, i1 0
-  %nop12364 = alloca i1, i1 0
-  %nop12365 = alloca i1, i1 0
-  %nop12366 = alloca i1, i1 0
-  %nop12367 = alloca i1, i1 0
-  %nop12368 = alloca i1, i1 0
-  %nop12369 = alloca i1, i1 0
-  %nop12370 = alloca i1, i1 0
-  %nop12371 = alloca i1, i1 0
-  %nop12372 = alloca i1, i1 0
-  %nop12373 = alloca i1, i1 0
-  %nop12374 = alloca i1, i1 0
-  %nop12375 = alloca i1, i1 0
-  %nop12376 = alloca i1, i1 0
-  %nop12377 = alloca i1, i1 0
-  %nop12378 = alloca i1, i1 0
-  %nop12379 = alloca i1, i1 0
-  %nop12380 = alloca i1, i1 0
-  %nop12381 = alloca i1, i1 0
-  %nop12382 = alloca i1, i1 0
-  %nop12383 = alloca i1, i1 0
-  %nop12384 = alloca i1, i1 0
-  %nop12385 = alloca i1, i1 0
-  %nop12386 = alloca i1, i1 0
-  %nop12387 = alloca i1, i1 0
-  %nop12388 = alloca i1, i1 0
-  %nop12389 = alloca i1, i1 0
-  %nop12390 = alloca i1, i1 0
-  %nop12391 = alloca i1, i1 0
-  %nop12392 = alloca i1, i1 0
-  %nop12393 = alloca i1, i1 0
-  %nop12394 = alloca i1, i1 0
-  %nop12395 = alloca i1, i1 0
-  %nop12396 = alloca i1, i1 0
-  %nop12397 = alloca i1, i1 0
-  %nop12398 = alloca i1, i1 0
-  %nop12399 = alloca i1, i1 0
-  %nop12400 = alloca i1, i1 0
-  %nop12401 = alloca i1, i1 0
-  %nop12402 = alloca i1, i1 0
-  %nop12403 = alloca i1, i1 0
-  %nop12404 = alloca i1, i1 0
-  %nop12405 = alloca i1, i1 0
-  %nop12406 = alloca i1, i1 0
-  %nop12407 = alloca i1, i1 0
-  %nop12408 = alloca i1, i1 0
-  %nop12409 = alloca i1, i1 0
-  %nop12410 = alloca i1, i1 0
-  %nop12411 = alloca i1, i1 0
-  %nop12412 = alloca i1, i1 0
-  %nop12413 = alloca i1, i1 0
-  %nop12414 = alloca i1, i1 0
-  %nop12415 = alloca i1, i1 0
-  %nop12416 = alloca i1, i1 0
-  %nop12417 = alloca i1, i1 0
-  %nop12418 = alloca i1, i1 0
-  %nop12419 = alloca i1, i1 0
-  %nop12420 = alloca i1, i1 0
-  %nop12421 = alloca i1, i1 0
-  %nop12422 = alloca i1, i1 0
-  %nop12423 = alloca i1, i1 0
-  %nop12424 = alloca i1, i1 0
-  %nop12425 = alloca i1, i1 0
-  %nop12426 = alloca i1, i1 0
-  %nop12427 = alloca i1, i1 0
-  %nop12428 = alloca i1, i1 0
-  %nop12429 = alloca i1, i1 0
-  %nop12430 = alloca i1, i1 0
-  %nop12431 = alloca i1, i1 0
-  %nop12432 = alloca i1, i1 0
-  %nop12433 = alloca i1, i1 0
-  %nop12434 = alloca i1, i1 0
-  %nop12435 = alloca i1, i1 0
-  %nop12436 = alloca i1, i1 0
-  %nop12437 = alloca i1, i1 0
-  %nop12438 = alloca i1, i1 0
-  %nop12439 = alloca i1, i1 0
-  %nop12440 = alloca i1, i1 0
-  %nop12441 = alloca i1, i1 0
-  %nop12442 = alloca i1, i1 0
-  %nop12443 = alloca i1, i1 0
-  %nop12444 = alloca i1, i1 0
-  %nop12445 = alloca i1, i1 0
-  %nop12446 = alloca i1, i1 0
-  %nop12447 = alloca i1, i1 0
-  %nop12448 = alloca i1, i1 0
-  %nop12449 = alloca i1, i1 0
-  %nop12450 = alloca i1, i1 0
-  %nop12451 = alloca i1, i1 0
-  %nop12452 = alloca i1, i1 0
-  %nop12453 = alloca i1, i1 0
-  %nop12454 = alloca i1, i1 0
-  %nop12455 = alloca i1, i1 0
-  %nop12456 = alloca i1, i1 0
-  %nop12457 = alloca i1, i1 0
-  %nop12458 = alloca i1, i1 0
-  %nop12459 = alloca i1, i1 0
-  %nop12460 = alloca i1, i1 0
-  %nop12461 = alloca i1, i1 0
-  %nop12462 = alloca i1, i1 0
-  %nop12463 = alloca i1, i1 0
-  %nop12464 = alloca i1, i1 0
-  %nop12465 = alloca i1, i1 0
-  %nop12466 = alloca i1, i1 0
-  %nop12467 = alloca i1, i1 0
-  %nop12468 = alloca i1, i1 0
-  %nop12469 = alloca i1, i1 0
-  %nop12470 = alloca i1, i1 0
-  %nop12471 = alloca i1, i1 0
-  %nop12472 = alloca i1, i1 0
-  %nop12473 = alloca i1, i1 0
-  %nop12474 = alloca i1, i1 0
-  %nop12475 = alloca i1, i1 0
-  %nop12476 = alloca i1, i1 0
-  %nop12477 = alloca i1, i1 0
-  %nop12478 = alloca i1, i1 0
-  %nop12479 = alloca i1, i1 0
-  %nop12480 = alloca i1, i1 0
-  %nop12481 = alloca i1, i1 0
-  %nop12482 = alloca i1, i1 0
-  %nop12483 = alloca i1, i1 0
-  %nop12484 = alloca i1, i1 0
-  %nop12485 = alloca i1, i1 0
-  %nop12486 = alloca i1, i1 0
-  %nop12487 = alloca i1, i1 0
-  %nop12488 = alloca i1, i1 0
-  %nop12489 = alloca i1, i1 0
-  %nop12490 = alloca i1, i1 0
-  %nop12491 = alloca i1, i1 0
-  %nop12492 = alloca i1, i1 0
-  %nop12493 = alloca i1, i1 0
-  %nop12494 = alloca i1, i1 0
-  %nop12495 = alloca i1, i1 0
-  %nop12496 = alloca i1, i1 0
-  %nop12497 = alloca i1, i1 0
-  %nop12498 = alloca i1, i1 0
-  %nop12499 = alloca i1, i1 0
-  %nop12500 = alloca i1, i1 0
-  %nop12501 = alloca i1, i1 0
-  %nop12502 = alloca i1, i1 0
-  %nop12503 = alloca i1, i1 0
-  %nop12504 = alloca i1, i1 0
-  %nop12505 = alloca i1, i1 0
-  %nop12506 = alloca i1, i1 0
-  %nop12507 = alloca i1, i1 0
-  %nop12508 = alloca i1, i1 0
-  %nop12509 = alloca i1, i1 0
-  %nop12510 = alloca i1, i1 0
-  %nop12511 = alloca i1, i1 0
-  %nop12512 = alloca i1, i1 0
-  %nop12513 = alloca i1, i1 0
-  %nop12514 = alloca i1, i1 0
-  %nop12515 = alloca i1, i1 0
-  %nop12516 = alloca i1, i1 0
-  %nop12517 = alloca i1, i1 0
-  %nop12518 = alloca i1, i1 0
-  %nop12519 = alloca i1, i1 0
-  %nop12520 = alloca i1, i1 0
-  %nop12521 = alloca i1, i1 0
-  %nop12522 = alloca i1, i1 0
-  %nop12523 = alloca i1, i1 0
-  %nop12524 = alloca i1, i1 0
-  %nop12525 = alloca i1, i1 0
-  %nop12526 = alloca i1, i1 0
-  %nop12527 = alloca i1, i1 0
-  %nop12528 = alloca i1, i1 0
-  %nop12529 = alloca i1, i1 0
-  %nop12530 = alloca i1, i1 0
-  %nop12531 = alloca i1, i1 0
-  %nop12532 = alloca i1, i1 0
-  %nop12533 = alloca i1, i1 0
-  %nop12534 = alloca i1, i1 0
-  %nop12535 = alloca i1, i1 0
-  %nop12536 = alloca i1, i1 0
-  %nop12537 = alloca i1, i1 0
-  %nop12538 = alloca i1, i1 0
-  %nop12539 = alloca i1, i1 0
-  %nop12540 = alloca i1, i1 0
-  %nop12541 = alloca i1, i1 0
-  %nop12542 = alloca i1, i1 0
-  %nop12543 = alloca i1, i1 0
-  %nop12544 = alloca i1, i1 0
-  %nop12545 = alloca i1, i1 0
-  %nop12546 = alloca i1, i1 0
-  %nop12547 = alloca i1, i1 0
-  %nop12548 = alloca i1, i1 0
-  %nop12549 = alloca i1, i1 0
-  %nop12550 = alloca i1, i1 0
-  %nop12551 = alloca i1, i1 0
-  %nop12552 = alloca i1, i1 0
-  %nop12553 = alloca i1, i1 0
-  %nop12554 = alloca i1, i1 0
-  %nop12555 = alloca i1, i1 0
-  %nop12556 = alloca i1, i1 0
-  %nop12557 = alloca i1, i1 0
-  %nop12558 = alloca i1, i1 0
-  %nop12559 = alloca i1, i1 0
-  %nop12560 = alloca i1, i1 0
-  %nop12561 = alloca i1, i1 0
-  %nop12562 = alloca i1, i1 0
-  %nop12563 = alloca i1, i1 0
-  %nop12564 = alloca i1, i1 0
-  %nop12565 = alloca i1, i1 0
-  %nop12566 = alloca i1, i1 0
-  %nop12567 = alloca i1, i1 0
-  %nop12568 = alloca i1, i1 0
-  %nop12569 = alloca i1, i1 0
-  %nop12570 = alloca i1, i1 0
-  %nop12571 = alloca i1, i1 0
-  %nop12572 = alloca i1, i1 0
-  %nop12573 = alloca i1, i1 0
-  %nop12574 = alloca i1, i1 0
-  %nop12575 = alloca i1, i1 0
-  %nop12576 = alloca i1, i1 0
-  %nop12577 = alloca i1, i1 0
-  %nop12578 = alloca i1, i1 0
-  %nop12579 = alloca i1, i1 0
-  %nop12580 = alloca i1, i1 0
-  %nop12581 = alloca i1, i1 0
-  %nop12582 = alloca i1, i1 0
-  %nop12583 = alloca i1, i1 0
-  %nop12584 = alloca i1, i1 0
-  %nop12585 = alloca i1, i1 0
-  %nop12586 = alloca i1, i1 0
-  %nop12587 = alloca i1, i1 0
-  %nop12588 = alloca i1, i1 0
-  %nop12589 = alloca i1, i1 0
-  %nop12590 = alloca i1, i1 0
-  %nop12591 = alloca i1, i1 0
-  %nop12592 = alloca i1, i1 0
-  %nop12593 = alloca i1, i1 0
-  %nop12594 = alloca i1, i1 0
-  %nop12595 = alloca i1, i1 0
-  %nop12596 = alloca i1, i1 0
-  %nop12597 = alloca i1, i1 0
-  %nop12598 = alloca i1, i1 0
-  %nop12599 = alloca i1, i1 0
-  %nop12600 = alloca i1, i1 0
-  %nop12601 = alloca i1, i1 0
-  %nop12602 = alloca i1, i1 0
-  %nop12603 = alloca i1, i1 0
-  %nop12604 = alloca i1, i1 0
-  %nop12605 = alloca i1, i1 0
-  %nop12606 = alloca i1, i1 0
-  %nop12607 = alloca i1, i1 0
-  %nop12608 = alloca i1, i1 0
-  %nop12609 = alloca i1, i1 0
-  %nop12610 = alloca i1, i1 0
-  %nop12611 = alloca i1, i1 0
-  %nop12612 = alloca i1, i1 0
-  %nop12613 = alloca i1, i1 0
-  %nop12614 = alloca i1, i1 0
-  %nop12615 = alloca i1, i1 0
-  %nop12616 = alloca i1, i1 0
-  %nop12617 = alloca i1, i1 0
-  %nop12618 = alloca i1, i1 0
-  %nop12619 = alloca i1, i1 0
-  %nop12620 = alloca i1, i1 0
-  %nop12621 = alloca i1, i1 0
-  %nop12622 = alloca i1, i1 0
-  %nop12623 = alloca i1, i1 0
-  %nop12624 = alloca i1, i1 0
-  %nop12625 = alloca i1, i1 0
-  %nop12626 = alloca i1, i1 0
-  %nop12627 = alloca i1, i1 0
-  %nop12628 = alloca i1, i1 0
-  %nop12629 = alloca i1, i1 0
-  %nop12630 = alloca i1, i1 0
-  %nop12631 = alloca i1, i1 0
-  %nop12632 = alloca i1, i1 0
-  %nop12633 = alloca i1, i1 0
-  %nop12634 = alloca i1, i1 0
-  %nop12635 = alloca i1, i1 0
-  %nop12636 = alloca i1, i1 0
-  %nop12637 = alloca i1, i1 0
-  %nop12638 = alloca i1, i1 0
-  %nop12639 = alloca i1, i1 0
-  %nop12640 = alloca i1, i1 0
-  %nop12641 = alloca i1, i1 0
-  %nop12642 = alloca i1, i1 0
-  %nop12643 = alloca i1, i1 0
-  %nop12644 = alloca i1, i1 0
-  %nop12645 = alloca i1, i1 0
-  %nop12646 = alloca i1, i1 0
-  %nop12647 = alloca i1, i1 0
-  %nop12648 = alloca i1, i1 0
-  %nop12649 = alloca i1, i1 0
-  %nop12650 = alloca i1, i1 0
-  %nop12651 = alloca i1, i1 0
-  %nop12652 = alloca i1, i1 0
-  %nop12653 = alloca i1, i1 0
-  %nop12654 = alloca i1, i1 0
-  %nop12655 = alloca i1, i1 0
-  %nop12656 = alloca i1, i1 0
-  %nop12657 = alloca i1, i1 0
-  %nop12658 = alloca i1, i1 0
-  %nop12659 = alloca i1, i1 0
-  %nop12660 = alloca i1, i1 0
-  %nop12661 = alloca i1, i1 0
-  %nop12662 = alloca i1, i1 0
-  %nop12663 = alloca i1, i1 0
-  %nop12664 = alloca i1, i1 0
-  %nop12665 = alloca i1, i1 0
-  %nop12666 = alloca i1, i1 0
-  %nop12667 = alloca i1, i1 0
-  %nop12668 = alloca i1, i1 0
-  %nop12669 = alloca i1, i1 0
-  %nop12670 = alloca i1, i1 0
-  %nop12671 = alloca i1, i1 0
-  %nop12672 = alloca i1, i1 0
-  %nop12673 = alloca i1, i1 0
-  %nop12674 = alloca i1, i1 0
-  %nop12675 = alloca i1, i1 0
-  %nop12676 = alloca i1, i1 0
-  %nop12677 = alloca i1, i1 0
-  %nop12678 = alloca i1, i1 0
-  %nop12679 = alloca i1, i1 0
-  %nop12680 = alloca i1, i1 0
-  %nop12681 = alloca i1, i1 0
-  %nop12682 = alloca i1, i1 0
-  %nop12683 = alloca i1, i1 0
-  %nop12684 = alloca i1, i1 0
-  %nop12685 = alloca i1, i1 0
-  %nop12686 = alloca i1, i1 0
-  %nop12687 = alloca i1, i1 0
-  %nop12688 = alloca i1, i1 0
-  %nop12689 = alloca i1, i1 0
-  %nop12690 = alloca i1, i1 0
-  %nop12691 = alloca i1, i1 0
-  %nop12692 = alloca i1, i1 0
-  %nop12693 = alloca i1, i1 0
-  %nop12694 = alloca i1, i1 0
-  %nop12695 = alloca i1, i1 0
-  %nop12696 = alloca i1, i1 0
-  %nop12697 = alloca i1, i1 0
-  %nop12698 = alloca i1, i1 0
-  %nop12699 = alloca i1, i1 0
-  %nop12700 = alloca i1, i1 0
-  %nop12701 = alloca i1, i1 0
-  %nop12702 = alloca i1, i1 0
-  %nop12703 = alloca i1, i1 0
-  %nop12704 = alloca i1, i1 0
-  %nop12705 = alloca i1, i1 0
-  %nop12706 = alloca i1, i1 0
-  %nop12707 = alloca i1, i1 0
-  %nop12708 = alloca i1, i1 0
-  %nop12709 = alloca i1, i1 0
-  %nop12710 = alloca i1, i1 0
-  %nop12711 = alloca i1, i1 0
-  %nop12712 = alloca i1, i1 0
-  %nop12713 = alloca i1, i1 0
-  %nop12714 = alloca i1, i1 0
-  %nop12715 = alloca i1, i1 0
-  %nop12716 = alloca i1, i1 0
-  %nop12717 = alloca i1, i1 0
-  %nop12718 = alloca i1, i1 0
-  %nop12719 = alloca i1, i1 0
-  %nop12720 = alloca i1, i1 0
-  %nop12721 = alloca i1, i1 0
-  %nop12722 = alloca i1, i1 0
-  %nop12723 = alloca i1, i1 0
-  %nop12724 = alloca i1, i1 0
-  %nop12725 = alloca i1, i1 0
-  %nop12726 = alloca i1, i1 0
-  %nop12727 = alloca i1, i1 0
-  %nop12728 = alloca i1, i1 0
-  %nop12729 = alloca i1, i1 0
-  %nop12730 = alloca i1, i1 0
-  %nop12731 = alloca i1, i1 0
-  %nop12732 = alloca i1, i1 0
-  %nop12733 = alloca i1, i1 0
-  %nop12734 = alloca i1, i1 0
-  %nop12735 = alloca i1, i1 0
-  %nop12736 = alloca i1, i1 0
-  %nop12737 = alloca i1, i1 0
-  %nop12738 = alloca i1, i1 0
-  %nop12739 = alloca i1, i1 0
-  %nop12740 = alloca i1, i1 0
-  %nop12741 = alloca i1, i1 0
-  %nop12742 = alloca i1, i1 0
-  %nop12743 = alloca i1, i1 0
-  %nop12744 = alloca i1, i1 0
-  %nop12745 = alloca i1, i1 0
-  %nop12746 = alloca i1, i1 0
-  %nop12747 = alloca i1, i1 0
-  %nop12748 = alloca i1, i1 0
-  %nop12749 = alloca i1, i1 0
-  %nop12750 = alloca i1, i1 0
-  %nop12751 = alloca i1, i1 0
-  %nop12752 = alloca i1, i1 0
-  %nop12753 = alloca i1, i1 0
-  %nop12754 = alloca i1, i1 0
-  %nop12755 = alloca i1, i1 0
-  %nop12756 = alloca i1, i1 0
-  %nop12757 = alloca i1, i1 0
-  %nop12758 = alloca i1, i1 0
-  %nop12759 = alloca i1, i1 0
-  %nop12760 = alloca i1, i1 0
-  %nop12761 = alloca i1, i1 0
-  %nop12762 = alloca i1, i1 0
-  %nop12763 = alloca i1, i1 0
-  %nop12764 = alloca i1, i1 0
-  %nop12765 = alloca i1, i1 0
-  %nop12766 = alloca i1, i1 0
-  %nop12767 = alloca i1, i1 0
-  %nop12768 = alloca i1, i1 0
-  %nop12769 = alloca i1, i1 0
-  %nop12770 = alloca i1, i1 0
-  %nop12771 = alloca i1, i1 0
-  %nop12772 = alloca i1, i1 0
-  %nop12773 = alloca i1, i1 0
-  %nop12774 = alloca i1, i1 0
-  %nop12775 = alloca i1, i1 0
-  %nop12776 = alloca i1, i1 0
-  %nop12777 = alloca i1, i1 0
-  %nop12778 = alloca i1, i1 0
-  %nop12779 = alloca i1, i1 0
-  %nop12780 = alloca i1, i1 0
-  %nop12781 = alloca i1, i1 0
-  %nop12782 = alloca i1, i1 0
-  %nop12783 = alloca i1, i1 0
-  %nop12784 = alloca i1, i1 0
-  %nop12785 = alloca i1, i1 0
-  %nop12786 = alloca i1, i1 0
-  %nop12787 = alloca i1, i1 0
-  %nop12788 = alloca i1, i1 0
-  %nop12789 = alloca i1, i1 0
-  %nop12790 = alloca i1, i1 0
-  %nop12791 = alloca i1, i1 0
-  %nop12792 = alloca i1, i1 0
-  %nop12793 = alloca i1, i1 0
-  %nop12794 = alloca i1, i1 0
-  %nop12795 = alloca i1, i1 0
-  %nop12796 = alloca i1, i1 0
-  %nop12797 = alloca i1, i1 0
-  %nop12798 = alloca i1, i1 0
-  %nop12799 = alloca i1, i1 0
-  %nop12800 = alloca i1, i1 0
-  %nop12801 = alloca i1, i1 0
-  %nop12802 = alloca i1, i1 0
-  %nop12803 = alloca i1, i1 0
-  %nop12804 = alloca i1, i1 0
-  %nop12805 = alloca i1, i1 0
-  %nop12806 = alloca i1, i1 0
-  %nop12807 = alloca i1, i1 0
-  %nop12808 = alloca i1, i1 0
-  %nop12809 = alloca i1, i1 0
-  %nop12810 = alloca i1, i1 0
-  %nop12811 = alloca i1, i1 0
-  %nop12812 = alloca i1, i1 0
-  %nop12813 = alloca i1, i1 0
-  %nop12814 = alloca i1, i1 0
-  %nop12815 = alloca i1, i1 0
-  %nop12816 = alloca i1, i1 0
-  %nop12817 = alloca i1, i1 0
-  %nop12818 = alloca i1, i1 0
-  %nop12819 = alloca i1, i1 0
-  %nop12820 = alloca i1, i1 0
-  %nop12821 = alloca i1, i1 0
-  %nop12822 = alloca i1, i1 0
-  %nop12823 = alloca i1, i1 0
-  %nop12824 = alloca i1, i1 0
-  %nop12825 = alloca i1, i1 0
-  %nop12826 = alloca i1, i1 0
-  %nop12827 = alloca i1, i1 0
-  %nop12828 = alloca i1, i1 0
-  %nop12829 = alloca i1, i1 0
-  %nop12830 = alloca i1, i1 0
-  %nop12831 = alloca i1, i1 0
-  %nop12832 = alloca i1, i1 0
-  %nop12833 = alloca i1, i1 0
-  %nop12834 = alloca i1, i1 0
-  %nop12835 = alloca i1, i1 0
-  %nop12836 = alloca i1, i1 0
-  %nop12837 = alloca i1, i1 0
-  %nop12838 = alloca i1, i1 0
-  %nop12839 = alloca i1, i1 0
-  %nop12840 = alloca i1, i1 0
-  %nop12841 = alloca i1, i1 0
-  %nop12842 = alloca i1, i1 0
-  %nop12843 = alloca i1, i1 0
-  %nop12844 = alloca i1, i1 0
-  %nop12845 = alloca i1, i1 0
-  %nop12846 = alloca i1, i1 0
-  %nop12847 = alloca i1, i1 0
-  %nop12848 = alloca i1, i1 0
-  %nop12849 = alloca i1, i1 0
-  %nop12850 = alloca i1, i1 0
-  %nop12851 = alloca i1, i1 0
-  %nop12852 = alloca i1, i1 0
-  %nop12853 = alloca i1, i1 0
-  %nop12854 = alloca i1, i1 0
-  %nop12855 = alloca i1, i1 0
-  %nop12856 = alloca i1, i1 0
-  %nop12857 = alloca i1, i1 0
-  %nop12858 = alloca i1, i1 0
-  %nop12859 = alloca i1, i1 0
-  %nop12860 = alloca i1, i1 0
-  %nop12861 = alloca i1, i1 0
-  %nop12862 = alloca i1, i1 0
-  %nop12863 = alloca i1, i1 0
-  %nop12864 = alloca i1, i1 0
-  %nop12865 = alloca i1, i1 0
-  %nop12866 = alloca i1, i1 0
-  %nop12867 = alloca i1, i1 0
-  %nop12868 = alloca i1, i1 0
-  %nop12869 = alloca i1, i1 0
-  %nop12870 = alloca i1, i1 0
-  %nop12871 = alloca i1, i1 0
-  %nop12872 = alloca i1, i1 0
-  %nop12873 = alloca i1, i1 0
-  %nop12874 = alloca i1, i1 0
-  %nop12875 = alloca i1, i1 0
-  %nop12876 = alloca i1, i1 0
-  %nop12877 = alloca i1, i1 0
-  %nop12878 = alloca i1, i1 0
-  %nop12879 = alloca i1, i1 0
-  %nop12880 = alloca i1, i1 0
-  %nop12881 = alloca i1, i1 0
-  %nop12882 = alloca i1, i1 0
-  %nop12883 = alloca i1, i1 0
-  %nop12884 = alloca i1, i1 0
-  %nop12885 = alloca i1, i1 0
-  %nop12886 = alloca i1, i1 0
-  %nop12887 = alloca i1, i1 0
-  %nop12888 = alloca i1, i1 0
-  %nop12889 = alloca i1, i1 0
-  %nop12890 = alloca i1, i1 0
-  %nop12891 = alloca i1, i1 0
-  %nop12892 = alloca i1, i1 0
-  %nop12893 = alloca i1, i1 0
-  %nop12894 = alloca i1, i1 0
-  %nop12895 = alloca i1, i1 0
-  %nop12896 = alloca i1, i1 0
-  %nop12897 = alloca i1, i1 0
-  %nop12898 = alloca i1, i1 0
-  %nop12899 = alloca i1, i1 0
-  %nop12900 = alloca i1, i1 0
-  %nop12901 = alloca i1, i1 0
-  %nop12902 = alloca i1, i1 0
-  %nop12903 = alloca i1, i1 0
-  %nop12904 = alloca i1, i1 0
-  %nop12905 = alloca i1, i1 0
-  %nop12906 = alloca i1, i1 0
-  %nop12907 = alloca i1, i1 0
-  %nop12908 = alloca i1, i1 0
-  %nop12909 = alloca i1, i1 0
-  %nop12910 = alloca i1, i1 0
-  %nop12911 = alloca i1, i1 0
-  %nop12912 = alloca i1, i1 0
-  %nop12913 = alloca i1, i1 0
-  %nop12914 = alloca i1, i1 0
-  %nop12915 = alloca i1, i1 0
-  %nop12916 = alloca i1, i1 0
-  %nop12917 = alloca i1, i1 0
-  %nop12918 = alloca i1, i1 0
-  %nop12919 = alloca i1, i1 0
-  %nop12920 = alloca i1, i1 0
-  %nop12921 = alloca i1, i1 0
-  %nop12922 = alloca i1, i1 0
-  %nop12923 = alloca i1, i1 0
-  %nop12924 = alloca i1, i1 0
-  %nop12925 = alloca i1, i1 0
-  %nop12926 = alloca i1, i1 0
-  %nop12927 = alloca i1, i1 0
-  %nop12928 = alloca i1, i1 0
-  %nop12929 = alloca i1, i1 0
-  %nop12930 = alloca i1, i1 0
-  %nop12931 = alloca i1, i1 0
-  %nop12932 = alloca i1, i1 0
-  %nop12933 = alloca i1, i1 0
-  %nop12934 = alloca i1, i1 0
-  %nop12935 = alloca i1, i1 0
-  %nop12936 = alloca i1, i1 0
-  %nop12937 = alloca i1, i1 0
-  %nop12938 = alloca i1, i1 0
-  %nop12939 = alloca i1, i1 0
-  %nop12940 = alloca i1, i1 0
-  %nop12941 = alloca i1, i1 0
-  %nop12942 = alloca i1, i1 0
-  %nop12943 = alloca i1, i1 0
-  %nop12944 = alloca i1, i1 0
-  %nop12945 = alloca i1, i1 0
-  %nop12946 = alloca i1, i1 0
-  %nop12947 = alloca i1, i1 0
-  %nop12948 = alloca i1, i1 0
-  %nop12949 = alloca i1, i1 0
-  %nop12950 = alloca i1, i1 0
-  %nop12951 = alloca i1, i1 0
-  %nop12952 = alloca i1, i1 0
-  %nop12953 = alloca i1, i1 0
-  %nop12954 = alloca i1, i1 0
-  %nop12955 = alloca i1, i1 0
-  %nop12956 = alloca i1, i1 0
-  %nop12957 = alloca i1, i1 0
-  %nop12958 = alloca i1, i1 0
-  %nop12959 = alloca i1, i1 0
-  %nop12960 = alloca i1, i1 0
-  %nop12961 = alloca i1, i1 0
-  %nop12962 = alloca i1, i1 0
-  %nop12963 = alloca i1, i1 0
-  %nop12964 = alloca i1, i1 0
-  %nop12965 = alloca i1, i1 0
-  %nop12966 = alloca i1, i1 0
-  %nop12967 = alloca i1, i1 0
-  %nop12968 = alloca i1, i1 0
-  %nop12969 = alloca i1, i1 0
-  %nop12970 = alloca i1, i1 0
-  %nop12971 = alloca i1, i1 0
-  %nop12972 = alloca i1, i1 0
-  %nop12973 = alloca i1, i1 0
-  %nop12974 = alloca i1, i1 0
-  %nop12975 = alloca i1, i1 0
-  %nop12976 = alloca i1, i1 0
-  %nop12977 = alloca i1, i1 0
-  %nop12978 = alloca i1, i1 0
-  %nop12979 = alloca i1, i1 0
-  %nop12980 = alloca i1, i1 0
-  %nop12981 = alloca i1, i1 0
-  %nop12982 = alloca i1, i1 0
-  %nop12983 = alloca i1, i1 0
-  %nop12984 = alloca i1, i1 0
-  %nop12985 = alloca i1, i1 0
-  %nop12986 = alloca i1, i1 0
-  %nop12987 = alloca i1, i1 0
-  %nop12988 = alloca i1, i1 0
-  %nop12989 = alloca i1, i1 0
-  %nop12990 = alloca i1, i1 0
-  %nop12991 = alloca i1, i1 0
-  %nop12992 = alloca i1, i1 0
-  %nop12993 = alloca i1, i1 0
-  %nop12994 = alloca i1, i1 0
-  %nop12995 = alloca i1, i1 0
-  %nop12996 = alloca i1, i1 0
-  %nop12997 = alloca i1, i1 0
-  %nop12998 = alloca i1, i1 0
-  %nop12999 = alloca i1, i1 0
-  %nop13000 = alloca i1, i1 0
-  %nop13001 = alloca i1, i1 0
-  %nop13002 = alloca i1, i1 0
-  %nop13003 = alloca i1, i1 0
-  %nop13004 = alloca i1, i1 0
-  %nop13005 = alloca i1, i1 0
-  %nop13006 = alloca i1, i1 0
-  %nop13007 = alloca i1, i1 0
-  %nop13008 = alloca i1, i1 0
-  %nop13009 = alloca i1, i1 0
-  %nop13010 = alloca i1, i1 0
-  %nop13011 = alloca i1, i1 0
-  %nop13012 = alloca i1, i1 0
-  %nop13013 = alloca i1, i1 0
-  %nop13014 = alloca i1, i1 0
-  %nop13015 = alloca i1, i1 0
-  %nop13016 = alloca i1, i1 0
-  %nop13017 = alloca i1, i1 0
-  %nop13018 = alloca i1, i1 0
-  %nop13019 = alloca i1, i1 0
-  %nop13020 = alloca i1, i1 0
-  %nop13021 = alloca i1, i1 0
-  %nop13022 = alloca i1, i1 0
-  %nop13023 = alloca i1, i1 0
-  %nop13024 = alloca i1, i1 0
-  %nop13025 = alloca i1, i1 0
-  %nop13026 = alloca i1, i1 0
-  %nop13027 = alloca i1, i1 0
-  %nop13028 = alloca i1, i1 0
-  %nop13029 = alloca i1, i1 0
-  %nop13030 = alloca i1, i1 0
-  %nop13031 = alloca i1, i1 0
-  %nop13032 = alloca i1, i1 0
-  %nop13033 = alloca i1, i1 0
-  %nop13034 = alloca i1, i1 0
-  %nop13035 = alloca i1, i1 0
-  %nop13036 = alloca i1, i1 0
-  %nop13037 = alloca i1, i1 0
-  %nop13038 = alloca i1, i1 0
-  %nop13039 = alloca i1, i1 0
-  %nop13040 = alloca i1, i1 0
-  %nop13041 = alloca i1, i1 0
-  %nop13042 = alloca i1, i1 0
-  %nop13043 = alloca i1, i1 0
-  %nop13044 = alloca i1, i1 0
-  %nop13045 = alloca i1, i1 0
-  %nop13046 = alloca i1, i1 0
-  %nop13047 = alloca i1, i1 0
-  %nop13048 = alloca i1, i1 0
-  %nop13049 = alloca i1, i1 0
-  %nop13050 = alloca i1, i1 0
-  %nop13051 = alloca i1, i1 0
-  %nop13052 = alloca i1, i1 0
-  %nop13053 = alloca i1, i1 0
-  %nop13054 = alloca i1, i1 0
-  %nop13055 = alloca i1, i1 0
-  %nop13056 = alloca i1, i1 0
-  %nop13057 = alloca i1, i1 0
-  %nop13058 = alloca i1, i1 0
-  %nop13059 = alloca i1, i1 0
-  %nop13060 = alloca i1, i1 0
-  %nop13061 = alloca i1, i1 0
-  %nop13062 = alloca i1, i1 0
-  %nop13063 = alloca i1, i1 0
-  %nop13064 = alloca i1, i1 0
-  %nop13065 = alloca i1, i1 0
-  %nop13066 = alloca i1, i1 0
-  %nop13067 = alloca i1, i1 0
-  %nop13068 = alloca i1, i1 0
-  %nop13069 = alloca i1, i1 0
-  %nop13070 = alloca i1, i1 0
-  %nop13071 = alloca i1, i1 0
-  %nop13072 = alloca i1, i1 0
-  %nop13073 = alloca i1, i1 0
-  %nop13074 = alloca i1, i1 0
-  %nop13075 = alloca i1, i1 0
-  %nop13076 = alloca i1, i1 0
-  %nop13077 = alloca i1, i1 0
-  %nop13078 = alloca i1, i1 0
-  %nop13079 = alloca i1, i1 0
-  %nop13080 = alloca i1, i1 0
-  %nop13081 = alloca i1, i1 0
-  %nop13082 = alloca i1, i1 0
-  %nop13083 = alloca i1, i1 0
-  %nop13084 = alloca i1, i1 0
-  %nop13085 = alloca i1, i1 0
-  %nop13086 = alloca i1, i1 0
-  %nop13087 = alloca i1, i1 0
-  %nop13088 = alloca i1, i1 0
-  %nop13089 = alloca i1, i1 0
-  %nop13090 = alloca i1, i1 0
-  %nop13091 = alloca i1, i1 0
-  %nop13092 = alloca i1, i1 0
-  %nop13093 = alloca i1, i1 0
-  %nop13094 = alloca i1, i1 0
-  %nop13095 = alloca i1, i1 0
-  %nop13096 = alloca i1, i1 0
-  %nop13097 = alloca i1, i1 0
-  %nop13098 = alloca i1, i1 0
-  %nop13099 = alloca i1, i1 0
-  %nop13100 = alloca i1, i1 0
-  %nop13101 = alloca i1, i1 0
-  %nop13102 = alloca i1, i1 0
-  %nop13103 = alloca i1, i1 0
-  %nop13104 = alloca i1, i1 0
-  %nop13105 = alloca i1, i1 0
-  %nop13106 = alloca i1, i1 0
-  %nop13107 = alloca i1, i1 0
-  %nop13108 = alloca i1, i1 0
-  %nop13109 = alloca i1, i1 0
-  %nop13110 = alloca i1, i1 0
-  %nop13111 = alloca i1, i1 0
-  %nop13112 = alloca i1, i1 0
-  %nop13113 = alloca i1, i1 0
-  %nop13114 = alloca i1, i1 0
-  %nop13115 = alloca i1, i1 0
-  %nop13116 = alloca i1, i1 0
-  %nop13117 = alloca i1, i1 0
-  %nop13118 = alloca i1, i1 0
-  %nop13119 = alloca i1, i1 0
-  %nop13120 = alloca i1, i1 0
-  %nop13121 = alloca i1, i1 0
-  %nop13122 = alloca i1, i1 0
-  %nop13123 = alloca i1, i1 0
-  %nop13124 = alloca i1, i1 0
-  %nop13125 = alloca i1, i1 0
-  %nop13126 = alloca i1, i1 0
-  %nop13127 = alloca i1, i1 0
-  %nop13128 = alloca i1, i1 0
-  %nop13129 = alloca i1, i1 0
-  %nop13130 = alloca i1, i1 0
-  %nop13131 = alloca i1, i1 0
-  %nop13132 = alloca i1, i1 0
-  %nop13133 = alloca i1, i1 0
-  %nop13134 = alloca i1, i1 0
-  %nop13135 = alloca i1, i1 0
-  %nop13136 = alloca i1, i1 0
-  %nop13137 = alloca i1, i1 0
-  %nop13138 = alloca i1, i1 0
-  %nop13139 = alloca i1, i1 0
-  %nop13140 = alloca i1, i1 0
-  %nop13141 = alloca i1, i1 0
-  %nop13142 = alloca i1, i1 0
-  %nop13143 = alloca i1, i1 0
-  %nop13144 = alloca i1, i1 0
-  %nop13145 = alloca i1, i1 0
-  %nop13146 = alloca i1, i1 0
-  %nop13147 = alloca i1, i1 0
-  %nop13148 = alloca i1, i1 0
-  %nop13149 = alloca i1, i1 0
-  %nop13150 = alloca i1, i1 0
-  %nop13151 = alloca i1, i1 0
-  %nop13152 = alloca i1, i1 0
-  %nop13153 = alloca i1, i1 0
-  %nop13154 = alloca i1, i1 0
-  %nop13155 = alloca i1, i1 0
-  %nop13156 = alloca i1, i1 0
-  %nop13157 = alloca i1, i1 0
-  %nop13158 = alloca i1, i1 0
-  %nop13159 = alloca i1, i1 0
-  %nop13160 = alloca i1, i1 0
-  %nop13161 = alloca i1, i1 0
-  %nop13162 = alloca i1, i1 0
-  %nop13163 = alloca i1, i1 0
-  %nop13164 = alloca i1, i1 0
-  %nop13165 = alloca i1, i1 0
-  %nop13166 = alloca i1, i1 0
-  %nop13167 = alloca i1, i1 0
-  %nop13168 = alloca i1, i1 0
-  %nop13169 = alloca i1, i1 0
-  %nop13170 = alloca i1, i1 0
-  %nop13171 = alloca i1, i1 0
-  %nop13172 = alloca i1, i1 0
-  %nop13173 = alloca i1, i1 0
-  %nop13174 = alloca i1, i1 0
-  %nop13175 = alloca i1, i1 0
-  %nop13176 = alloca i1, i1 0
-  %nop13177 = alloca i1, i1 0
-  %nop13178 = alloca i1, i1 0
-  %nop13179 = alloca i1, i1 0
-  %nop13180 = alloca i1, i1 0
-  %nop13181 = alloca i1, i1 0
-  %nop13182 = alloca i1, i1 0
-  %nop13183 = alloca i1, i1 0
-  %nop13184 = alloca i1, i1 0
-  %nop13185 = alloca i1, i1 0
-  %nop13186 = alloca i1, i1 0
-  %nop13187 = alloca i1, i1 0
-  %nop13188 = alloca i1, i1 0
-  %nop13189 = alloca i1, i1 0
-  %nop13190 = alloca i1, i1 0
-  %nop13191 = alloca i1, i1 0
-  %nop13192 = alloca i1, i1 0
-  %nop13193 = alloca i1, i1 0
-  %nop13194 = alloca i1, i1 0
-  %nop13195 = alloca i1, i1 0
-  %nop13196 = alloca i1, i1 0
-  %nop13197 = alloca i1, i1 0
-  %nop13198 = alloca i1, i1 0
-  %nop13199 = alloca i1, i1 0
-  %nop13200 = alloca i1, i1 0
-  %nop13201 = alloca i1, i1 0
-  %nop13202 = alloca i1, i1 0
-  %nop13203 = alloca i1, i1 0
-  %nop13204 = alloca i1, i1 0
-  %nop13205 = alloca i1, i1 0
-  %nop13206 = alloca i1, i1 0
-  %nop13207 = alloca i1, i1 0
-  %nop13208 = alloca i1, i1 0
-  %nop13209 = alloca i1, i1 0
-  %nop13210 = alloca i1, i1 0
-  %nop13211 = alloca i1, i1 0
-  %nop13212 = alloca i1, i1 0
-  %nop13213 = alloca i1, i1 0
-  %nop13214 = alloca i1, i1 0
-  %nop13215 = alloca i1, i1 0
-  %nop13216 = alloca i1, i1 0
-  %nop13217 = alloca i1, i1 0
-  %nop13218 = alloca i1, i1 0
-  %nop13219 = alloca i1, i1 0
-  %nop13220 = alloca i1, i1 0
-  %nop13221 = alloca i1, i1 0
-  %nop13222 = alloca i1, i1 0
-  %nop13223 = alloca i1, i1 0
-  %nop13224 = alloca i1, i1 0
-  %nop13225 = alloca i1, i1 0
-  %nop13226 = alloca i1, i1 0
-  %nop13227 = alloca i1, i1 0
-  %nop13228 = alloca i1, i1 0
-  %nop13229 = alloca i1, i1 0
-  %nop13230 = alloca i1, i1 0
-  %nop13231 = alloca i1, i1 0
-  %nop13232 = alloca i1, i1 0
-  %nop13233 = alloca i1, i1 0
-  %nop13234 = alloca i1, i1 0
-  %nop13235 = alloca i1, i1 0
-  %nop13236 = alloca i1, i1 0
-  %nop13237 = alloca i1, i1 0
-  %nop13238 = alloca i1, i1 0
-  %nop13239 = alloca i1, i1 0
-  %nop13240 = alloca i1, i1 0
-  %nop13241 = alloca i1, i1 0
-  %nop13242 = alloca i1, i1 0
-  %nop13243 = alloca i1, i1 0
-  %nop13244 = alloca i1, i1 0
-  %nop13245 = alloca i1, i1 0
-  %nop13246 = alloca i1, i1 0
-  %nop13247 = alloca i1, i1 0
-  %nop13248 = alloca i1, i1 0
-  %nop13249 = alloca i1, i1 0
-  %nop13250 = alloca i1, i1 0
-  %nop13251 = alloca i1, i1 0
-  %nop13252 = alloca i1, i1 0
-  %nop13253 = alloca i1, i1 0
-  %nop13254 = alloca i1, i1 0
-  %nop13255 = alloca i1, i1 0
-  %nop13256 = alloca i1, i1 0
-  %nop13257 = alloca i1, i1 0
-  %nop13258 = alloca i1, i1 0
-  %nop13259 = alloca i1, i1 0
-  %nop13260 = alloca i1, i1 0
-  %nop13261 = alloca i1, i1 0
-  %nop13262 = alloca i1, i1 0
-  %nop13263 = alloca i1, i1 0
-  %nop13264 = alloca i1, i1 0
-  %nop13265 = alloca i1, i1 0
-  %nop13266 = alloca i1, i1 0
-  %nop13267 = alloca i1, i1 0
-  %nop13268 = alloca i1, i1 0
-  %nop13269 = alloca i1, i1 0
-  %nop13270 = alloca i1, i1 0
-  %nop13271 = alloca i1, i1 0
-  %nop13272 = alloca i1, i1 0
-  %nop13273 = alloca i1, i1 0
-  %nop13274 = alloca i1, i1 0
-  %nop13275 = alloca i1, i1 0
-  %nop13276 = alloca i1, i1 0
-  %nop13277 = alloca i1, i1 0
-  %nop13278 = alloca i1, i1 0
-  %nop13279 = alloca i1, i1 0
-  %nop13280 = alloca i1, i1 0
-  %nop13281 = alloca i1, i1 0
-  %nop13282 = alloca i1, i1 0
-  %nop13283 = alloca i1, i1 0
-  %nop13284 = alloca i1, i1 0
-  %nop13285 = alloca i1, i1 0
-  %nop13286 = alloca i1, i1 0
-  %nop13287 = alloca i1, i1 0
-  %nop13288 = alloca i1, i1 0
-  %nop13289 = alloca i1, i1 0
-  %nop13290 = alloca i1, i1 0
-  %nop13291 = alloca i1, i1 0
-  %nop13292 = alloca i1, i1 0
-  %nop13293 = alloca i1, i1 0
-  %nop13294 = alloca i1, i1 0
-  %nop13295 = alloca i1, i1 0
-  %nop13296 = alloca i1, i1 0
-  %nop13297 = alloca i1, i1 0
-  %nop13298 = alloca i1, i1 0
-  %nop13299 = alloca i1, i1 0
-  %nop13300 = alloca i1, i1 0
-  %nop13301 = alloca i1, i1 0
-  %nop13302 = alloca i1, i1 0
-  %nop13303 = alloca i1, i1 0
-  %nop13304 = alloca i1, i1 0
-  %nop13305 = alloca i1, i1 0
-  %nop13306 = alloca i1, i1 0
-  %nop13307 = alloca i1, i1 0
-  %nop13308 = alloca i1, i1 0
-  %nop13309 = alloca i1, i1 0
-  %nop13310 = alloca i1, i1 0
-  %nop13311 = alloca i1, i1 0
-  %nop13312 = alloca i1, i1 0
-  %nop13313 = alloca i1, i1 0
-  %nop13314 = alloca i1, i1 0
-  %nop13315 = alloca i1, i1 0
-  %nop13316 = alloca i1, i1 0
-  %nop13317 = alloca i1, i1 0
-  %nop13318 = alloca i1, i1 0
-  %nop13319 = alloca i1, i1 0
-  %nop13320 = alloca i1, i1 0
-  %nop13321 = alloca i1, i1 0
-  %nop13322 = alloca i1, i1 0
-  %nop13323 = alloca i1, i1 0
-  %nop13324 = alloca i1, i1 0
-  %nop13325 = alloca i1, i1 0
-  %nop13326 = alloca i1, i1 0
-  %nop13327 = alloca i1, i1 0
-  %nop13328 = alloca i1, i1 0
-  %nop13329 = alloca i1, i1 0
-  %nop13330 = alloca i1, i1 0
-  %nop13331 = alloca i1, i1 0
-  %nop13332 = alloca i1, i1 0
-  %nop13333 = alloca i1, i1 0
-  %nop13334 = alloca i1, i1 0
-  %nop13335 = alloca i1, i1 0
-  %nop13336 = alloca i1, i1 0
-  %nop13337 = alloca i1, i1 0
-  %nop13338 = alloca i1, i1 0
-  %nop13339 = alloca i1, i1 0
-  %nop13340 = alloca i1, i1 0
-  %nop13341 = alloca i1, i1 0
-  %nop13342 = alloca i1, i1 0
-  %nop13343 = alloca i1, i1 0
-  %nop13344 = alloca i1, i1 0
-  %nop13345 = alloca i1, i1 0
-  %nop13346 = alloca i1, i1 0
-  %nop13347 = alloca i1, i1 0
-  %nop13348 = alloca i1, i1 0
-  %nop13349 = alloca i1, i1 0
-  %nop13350 = alloca i1, i1 0
-  %nop13351 = alloca i1, i1 0
-  %nop13352 = alloca i1, i1 0
-  %nop13353 = alloca i1, i1 0
-  %nop13354 = alloca i1, i1 0
-  %nop13355 = alloca i1, i1 0
-  %nop13356 = alloca i1, i1 0
-  %nop13357 = alloca i1, i1 0
-  %nop13358 = alloca i1, i1 0
-  %nop13359 = alloca i1, i1 0
-  %nop13360 = alloca i1, i1 0
-  %nop13361 = alloca i1, i1 0
-  %nop13362 = alloca i1, i1 0
-  %nop13363 = alloca i1, i1 0
-  %nop13364 = alloca i1, i1 0
-  %nop13365 = alloca i1, i1 0
-  %nop13366 = alloca i1, i1 0
-  %nop13367 = alloca i1, i1 0
-  %nop13368 = alloca i1, i1 0
-  %nop13369 = alloca i1, i1 0
-  %nop13370 = alloca i1, i1 0
-  %nop13371 = alloca i1, i1 0
-  %nop13372 = alloca i1, i1 0
-  %nop13373 = alloca i1, i1 0
-  %nop13374 = alloca i1, i1 0
-  %nop13375 = alloca i1, i1 0
-  %nop13376 = alloca i1, i1 0
-  %nop13377 = alloca i1, i1 0
-  %nop13378 = alloca i1, i1 0
-  %nop13379 = alloca i1, i1 0
-  %nop13380 = alloca i1, i1 0
-  %nop13381 = alloca i1, i1 0
-  %nop13382 = alloca i1, i1 0
-  %nop13383 = alloca i1, i1 0
-  %nop13384 = alloca i1, i1 0
-  %nop13385 = alloca i1, i1 0
-  %nop13386 = alloca i1, i1 0
-  %nop13387 = alloca i1, i1 0
-  %nop13388 = alloca i1, i1 0
-  %nop13389 = alloca i1, i1 0
-  %nop13390 = alloca i1, i1 0
-  %nop13391 = alloca i1, i1 0
-  %nop13392 = alloca i1, i1 0
-  %nop13393 = alloca i1, i1 0
-  %nop13394 = alloca i1, i1 0
-  %nop13395 = alloca i1, i1 0
-  %nop13396 = alloca i1, i1 0
-  %nop13397 = alloca i1, i1 0
-  %nop13398 = alloca i1, i1 0
-  %nop13399 = alloca i1, i1 0
-  %nop13400 = alloca i1, i1 0
-  %nop13401 = alloca i1, i1 0
-  %nop13402 = alloca i1, i1 0
-  %nop13403 = alloca i1, i1 0
-  %nop13404 = alloca i1, i1 0
-  %nop13405 = alloca i1, i1 0
-  %nop13406 = alloca i1, i1 0
-  %nop13407 = alloca i1, i1 0
-  %nop13408 = alloca i1, i1 0
-  %nop13409 = alloca i1, i1 0
-  %nop13410 = alloca i1, i1 0
-  %nop13411 = alloca i1, i1 0
-  %nop13412 = alloca i1, i1 0
-  %nop13413 = alloca i1, i1 0
-  %nop13414 = alloca i1, i1 0
-  %nop13415 = alloca i1, i1 0
-  %nop13416 = alloca i1, i1 0
-  %nop13417 = alloca i1, i1 0
-  %nop13418 = alloca i1, i1 0
-  %nop13419 = alloca i1, i1 0
-  %nop13420 = alloca i1, i1 0
-  %nop13421 = alloca i1, i1 0
-  %nop13422 = alloca i1, i1 0
-  %nop13423 = alloca i1, i1 0
-  %nop13424 = alloca i1, i1 0
-  %nop13425 = alloca i1, i1 0
-  %nop13426 = alloca i1, i1 0
-  %nop13427 = alloca i1, i1 0
-  %nop13428 = alloca i1, i1 0
-  %nop13429 = alloca i1, i1 0
-  %nop13430 = alloca i1, i1 0
-  %nop13431 = alloca i1, i1 0
-  %nop13432 = alloca i1, i1 0
-  %nop13433 = alloca i1, i1 0
-  %nop13434 = alloca i1, i1 0
-  %nop13435 = alloca i1, i1 0
-  %nop13436 = alloca i1, i1 0
-  %nop13437 = alloca i1, i1 0
-  %nop13438 = alloca i1, i1 0
-  %nop13439 = alloca i1, i1 0
-  %nop13440 = alloca i1, i1 0
-  %nop13441 = alloca i1, i1 0
-  %nop13442 = alloca i1, i1 0
-  %nop13443 = alloca i1, i1 0
-  %nop13444 = alloca i1, i1 0
-  %nop13445 = alloca i1, i1 0
-  %nop13446 = alloca i1, i1 0
-  %nop13447 = alloca i1, i1 0
-  %nop13448 = alloca i1, i1 0
-  %nop13449 = alloca i1, i1 0
-  %nop13450 = alloca i1, i1 0
-  %nop13451 = alloca i1, i1 0
-  %nop13452 = alloca i1, i1 0
-  %nop13453 = alloca i1, i1 0
-  %nop13454 = alloca i1, i1 0
-  %nop13455 = alloca i1, i1 0
-  %nop13456 = alloca i1, i1 0
-  %nop13457 = alloca i1, i1 0
-  %nop13458 = alloca i1, i1 0
-  %nop13459 = alloca i1, i1 0
-  %nop13460 = alloca i1, i1 0
-  %nop13461 = alloca i1, i1 0
-  %nop13462 = alloca i1, i1 0
-  %nop13463 = alloca i1, i1 0
-  %nop13464 = alloca i1, i1 0
-  %nop13465 = alloca i1, i1 0
-  %nop13466 = alloca i1, i1 0
-  %nop13467 = alloca i1, i1 0
-  %nop13468 = alloca i1, i1 0
-  %nop13469 = alloca i1, i1 0
-  %nop13470 = alloca i1, i1 0
-  %nop13471 = alloca i1, i1 0
-  %nop13472 = alloca i1, i1 0
-  %nop13473 = alloca i1, i1 0
-  %nop13474 = alloca i1, i1 0
-  %nop13475 = alloca i1, i1 0
-  %nop13476 = alloca i1, i1 0
-  %nop13477 = alloca i1, i1 0
-  %nop13478 = alloca i1, i1 0
-  %nop13479 = alloca i1, i1 0
-  %nop13480 = alloca i1, i1 0
-  %nop13481 = alloca i1, i1 0
-  %nop13482 = alloca i1, i1 0
-  %nop13483 = alloca i1, i1 0
-  %nop13484 = alloca i1, i1 0
-  %nop13485 = alloca i1, i1 0
-  %nop13486 = alloca i1, i1 0
-  %nop13487 = alloca i1, i1 0
-  %nop13488 = alloca i1, i1 0
-  %nop13489 = alloca i1, i1 0
-  %nop13490 = alloca i1, i1 0
-  %nop13491 = alloca i1, i1 0
-  %nop13492 = alloca i1, i1 0
-  %nop13493 = alloca i1, i1 0
-  %nop13494 = alloca i1, i1 0
-  %nop13495 = alloca i1, i1 0
-  %nop13496 = alloca i1, i1 0
-  %nop13497 = alloca i1, i1 0
-  %nop13498 = alloca i1, i1 0
-  %nop13499 = alloca i1, i1 0
-  %nop13500 = alloca i1, i1 0
-  %nop13501 = alloca i1, i1 0
-  %nop13502 = alloca i1, i1 0
-  %nop13503 = alloca i1, i1 0
-  %nop13504 = alloca i1, i1 0
-  %nop13505 = alloca i1, i1 0
-  %nop13506 = alloca i1, i1 0
-  %nop13507 = alloca i1, i1 0
-  %nop13508 = alloca i1, i1 0
-  %nop13509 = alloca i1, i1 0
-  %nop13510 = alloca i1, i1 0
-  %nop13511 = alloca i1, i1 0
-  %nop13512 = alloca i1, i1 0
-  %nop13513 = alloca i1, i1 0
-  %nop13514 = alloca i1, i1 0
-  %nop13515 = alloca i1, i1 0
-  %nop13516 = alloca i1, i1 0
-  %nop13517 = alloca i1, i1 0
-  %nop13518 = alloca i1, i1 0
-  %nop13519 = alloca i1, i1 0
-  %nop13520 = alloca i1, i1 0
-  %nop13521 = alloca i1, i1 0
-  %nop13522 = alloca i1, i1 0
-  %nop13523 = alloca i1, i1 0
-  %nop13524 = alloca i1, i1 0
-  %nop13525 = alloca i1, i1 0
-  %nop13526 = alloca i1, i1 0
-  %nop13527 = alloca i1, i1 0
-  %nop13528 = alloca i1, i1 0
-  %nop13529 = alloca i1, i1 0
-  %nop13530 = alloca i1, i1 0
-  %nop13531 = alloca i1, i1 0
-  %nop13532 = alloca i1, i1 0
-  %nop13533 = alloca i1, i1 0
-  %nop13534 = alloca i1, i1 0
-  %nop13535 = alloca i1, i1 0
-  %nop13536 = alloca i1, i1 0
-  %nop13537 = alloca i1, i1 0
-  %nop13538 = alloca i1, i1 0
-  %nop13539 = alloca i1, i1 0
-  %nop13540 = alloca i1, i1 0
-  %nop13541 = alloca i1, i1 0
-  %nop13542 = alloca i1, i1 0
-  %nop13543 = alloca i1, i1 0
-  %nop13544 = alloca i1, i1 0
-  %nop13545 = alloca i1, i1 0
-  %nop13546 = alloca i1, i1 0
-  %nop13547 = alloca i1, i1 0
-  %nop13548 = alloca i1, i1 0
-  %nop13549 = alloca i1, i1 0
-  %nop13550 = alloca i1, i1 0
-  %nop13551 = alloca i1, i1 0
-  %nop13552 = alloca i1, i1 0
-  %nop13553 = alloca i1, i1 0
-  %nop13554 = alloca i1, i1 0
-  %nop13555 = alloca i1, i1 0
-  %nop13556 = alloca i1, i1 0
-  %nop13557 = alloca i1, i1 0
-  %nop13558 = alloca i1, i1 0
-  %nop13559 = alloca i1, i1 0
-  %nop13560 = alloca i1, i1 0
-  %nop13561 = alloca i1, i1 0
-  %nop13562 = alloca i1, i1 0
-  %nop13563 = alloca i1, i1 0
-  %nop13564 = alloca i1, i1 0
-  %nop13565 = alloca i1, i1 0
-  %nop13566 = alloca i1, i1 0
-  %nop13567 = alloca i1, i1 0
-  %nop13568 = alloca i1, i1 0
-  %nop13569 = alloca i1, i1 0
-  %nop13570 = alloca i1, i1 0
-  %nop13571 = alloca i1, i1 0
-  %nop13572 = alloca i1, i1 0
-  %nop13573 = alloca i1, i1 0
-  %nop13574 = alloca i1, i1 0
-  %nop13575 = alloca i1, i1 0
-  %nop13576 = alloca i1, i1 0
-  %nop13577 = alloca i1, i1 0
-  %nop13578 = alloca i1, i1 0
-  %nop13579 = alloca i1, i1 0
-  %nop13580 = alloca i1, i1 0
-  %nop13581 = alloca i1, i1 0
-  %nop13582 = alloca i1, i1 0
-  %nop13583 = alloca i1, i1 0
-  %nop13584 = alloca i1, i1 0
-  %nop13585 = alloca i1, i1 0
-  %nop13586 = alloca i1, i1 0
-  %nop13587 = alloca i1, i1 0
-  %nop13588 = alloca i1, i1 0
-  %nop13589 = alloca i1, i1 0
-  %nop13590 = alloca i1, i1 0
-  %nop13591 = alloca i1, i1 0
-  %nop13592 = alloca i1, i1 0
-  %nop13593 = alloca i1, i1 0
-  %nop13594 = alloca i1, i1 0
-  %nop13595 = alloca i1, i1 0
-  %nop13596 = alloca i1, i1 0
-  %nop13597 = alloca i1, i1 0
-  %nop13598 = alloca i1, i1 0
-  %nop13599 = alloca i1, i1 0
-  %nop13600 = alloca i1, i1 0
-  %nop13601 = alloca i1, i1 0
-  %nop13602 = alloca i1, i1 0
-  %nop13603 = alloca i1, i1 0
-  %nop13604 = alloca i1, i1 0
-  %nop13605 = alloca i1, i1 0
-  %nop13606 = alloca i1, i1 0
-  %nop13607 = alloca i1, i1 0
-  %nop13608 = alloca i1, i1 0
-  %nop13609 = alloca i1, i1 0
-  %nop13610 = alloca i1, i1 0
-  %nop13611 = alloca i1, i1 0
-  %nop13612 = alloca i1, i1 0
-  %nop13613 = alloca i1, i1 0
-  %nop13614 = alloca i1, i1 0
-  %nop13615 = alloca i1, i1 0
-  %nop13616 = alloca i1, i1 0
-  %nop13617 = alloca i1, i1 0
-  %nop13618 = alloca i1, i1 0
-  %nop13619 = alloca i1, i1 0
-  %nop13620 = alloca i1, i1 0
-  %nop13621 = alloca i1, i1 0
-  %nop13622 = alloca i1, i1 0
-  %nop13623 = alloca i1, i1 0
-  %nop13624 = alloca i1, i1 0
-  %nop13625 = alloca i1, i1 0
-  %nop13626 = alloca i1, i1 0
-  %nop13627 = alloca i1, i1 0
-  %nop13628 = alloca i1, i1 0
-  %nop13629 = alloca i1, i1 0
-  %nop13630 = alloca i1, i1 0
-  %nop13631 = alloca i1, i1 0
-  %nop13632 = alloca i1, i1 0
-  %nop13633 = alloca i1, i1 0
-  %nop13634 = alloca i1, i1 0
-  %nop13635 = alloca i1, i1 0
-  %nop13636 = alloca i1, i1 0
-  %nop13637 = alloca i1, i1 0
-  %nop13638 = alloca i1, i1 0
-  %nop13639 = alloca i1, i1 0
-  %nop13640 = alloca i1, i1 0
-  %nop13641 = alloca i1, i1 0
-  %nop13642 = alloca i1, i1 0
-  %nop13643 = alloca i1, i1 0
-  %nop13644 = alloca i1, i1 0
-  %nop13645 = alloca i1, i1 0
-  %nop13646 = alloca i1, i1 0
-  %nop13647 = alloca i1, i1 0
-  %nop13648 = alloca i1, i1 0
-  %nop13649 = alloca i1, i1 0
-  %nop13650 = alloca i1, i1 0
-  %nop13651 = alloca i1, i1 0
-  %nop13652 = alloca i1, i1 0
-  %nop13653 = alloca i1, i1 0
-  %nop13654 = alloca i1, i1 0
-  %nop13655 = alloca i1, i1 0
-  %nop13656 = alloca i1, i1 0
-  %nop13657 = alloca i1, i1 0
-  %nop13658 = alloca i1, i1 0
-  %nop13659 = alloca i1, i1 0
-  %nop13660 = alloca i1, i1 0
-  %nop13661 = alloca i1, i1 0
-  %nop13662 = alloca i1, i1 0
-  %nop13663 = alloca i1, i1 0
-  %nop13664 = alloca i1, i1 0
-  %nop13665 = alloca i1, i1 0
-  %nop13666 = alloca i1, i1 0
-  %nop13667 = alloca i1, i1 0
-  %nop13668 = alloca i1, i1 0
-  %nop13669 = alloca i1, i1 0
-  %nop13670 = alloca i1, i1 0
-  %nop13671 = alloca i1, i1 0
-  %nop13672 = alloca i1, i1 0
-  %nop13673 = alloca i1, i1 0
-  %nop13674 = alloca i1, i1 0
-  %nop13675 = alloca i1, i1 0
-  %nop13676 = alloca i1, i1 0
-  %nop13677 = alloca i1, i1 0
-  %nop13678 = alloca i1, i1 0
-  %nop13679 = alloca i1, i1 0
-  %nop13680 = alloca i1, i1 0
-  %nop13681 = alloca i1, i1 0
-  %nop13682 = alloca i1, i1 0
-  %nop13683 = alloca i1, i1 0
-  %nop13684 = alloca i1, i1 0
-  %nop13685 = alloca i1, i1 0
-  %nop13686 = alloca i1, i1 0
-  %nop13687 = alloca i1, i1 0
-  %nop13688 = alloca i1, i1 0
-  %nop13689 = alloca i1, i1 0
-  %nop13690 = alloca i1, i1 0
-  %nop13691 = alloca i1, i1 0
-  %nop13692 = alloca i1, i1 0
-  %nop13693 = alloca i1, i1 0
-  %nop13694 = alloca i1, i1 0
-  %nop13695 = alloca i1, i1 0
-  %nop13696 = alloca i1, i1 0
-  %nop13697 = alloca i1, i1 0
-  %nop13698 = alloca i1, i1 0
-  %nop13699 = alloca i1, i1 0
-  %nop13700 = alloca i1, i1 0
-  %nop13701 = alloca i1, i1 0
-  %nop13702 = alloca i1, i1 0
-  %nop13703 = alloca i1, i1 0
-  %nop13704 = alloca i1, i1 0
-  %nop13705 = alloca i1, i1 0
-  %nop13706 = alloca i1, i1 0
-  %nop13707 = alloca i1, i1 0
-  %nop13708 = alloca i1, i1 0
-  %nop13709 = alloca i1, i1 0
-  %nop13710 = alloca i1, i1 0
-  %nop13711 = alloca i1, i1 0
-  %nop13712 = alloca i1, i1 0
-  %nop13713 = alloca i1, i1 0
-  %nop13714 = alloca i1, i1 0
-  %nop13715 = alloca i1, i1 0
-  %nop13716 = alloca i1, i1 0
-  %nop13717 = alloca i1, i1 0
-  %nop13718 = alloca i1, i1 0
-  %nop13719 = alloca i1, i1 0
-  %nop13720 = alloca i1, i1 0
-  %nop13721 = alloca i1, i1 0
-  %nop13722 = alloca i1, i1 0
-  %nop13723 = alloca i1, i1 0
-  %nop13724 = alloca i1, i1 0
-  %nop13725 = alloca i1, i1 0
-  %nop13726 = alloca i1, i1 0
-  %nop13727 = alloca i1, i1 0
-  %nop13728 = alloca i1, i1 0
-  %nop13729 = alloca i1, i1 0
-  %nop13730 = alloca i1, i1 0
-  %nop13731 = alloca i1, i1 0
-  %nop13732 = alloca i1, i1 0
-  %nop13733 = alloca i1, i1 0
-  %nop13734 = alloca i1, i1 0
-  %nop13735 = alloca i1, i1 0
-  %nop13736 = alloca i1, i1 0
-  %nop13737 = alloca i1, i1 0
-  %nop13738 = alloca i1, i1 0
-  %nop13739 = alloca i1, i1 0
-  %nop13740 = alloca i1, i1 0
-  %nop13741 = alloca i1, i1 0
-  %nop13742 = alloca i1, i1 0
-  %nop13743 = alloca i1, i1 0
-  %nop13744 = alloca i1, i1 0
-  %nop13745 = alloca i1, i1 0
-  %nop13746 = alloca i1, i1 0
-  %nop13747 = alloca i1, i1 0
-  %nop13748 = alloca i1, i1 0
-  %nop13749 = alloca i1, i1 0
-  %nop13750 = alloca i1, i1 0
-  %nop13751 = alloca i1, i1 0
-  %nop13752 = alloca i1, i1 0
-  %nop13753 = alloca i1, i1 0
-  %nop13754 = alloca i1, i1 0
-  %nop13755 = alloca i1, i1 0
-  %nop13756 = alloca i1, i1 0
-  %nop13757 = alloca i1, i1 0
-  %nop13758 = alloca i1, i1 0
-  %nop13759 = alloca i1, i1 0
-  %nop13760 = alloca i1, i1 0
-  %nop13761 = alloca i1, i1 0
-  %nop13762 = alloca i1, i1 0
-  %nop13763 = alloca i1, i1 0
-  %nop13764 = alloca i1, i1 0
-  %nop13765 = alloca i1, i1 0
-  %nop13766 = alloca i1, i1 0
-  %nop13767 = alloca i1, i1 0
-  %nop13768 = alloca i1, i1 0
-  %nop13769 = alloca i1, i1 0
-  %nop13770 = alloca i1, i1 0
-  %nop13771 = alloca i1, i1 0
-  %nop13772 = alloca i1, i1 0
-  %nop13773 = alloca i1, i1 0
-  %nop13774 = alloca i1, i1 0
-  %nop13775 = alloca i1, i1 0
-  %nop13776 = alloca i1, i1 0
-  %nop13777 = alloca i1, i1 0
-  %nop13778 = alloca i1, i1 0
-  %nop13779 = alloca i1, i1 0
-  %nop13780 = alloca i1, i1 0
-  %nop13781 = alloca i1, i1 0
-  %nop13782 = alloca i1, i1 0
-  %nop13783 = alloca i1, i1 0
-  %nop13784 = alloca i1, i1 0
-  %nop13785 = alloca i1, i1 0
-  %nop13786 = alloca i1, i1 0
-  %nop13787 = alloca i1, i1 0
-  %nop13788 = alloca i1, i1 0
-  %nop13789 = alloca i1, i1 0
-  %nop13790 = alloca i1, i1 0
-  %nop13791 = alloca i1, i1 0
-  %nop13792 = alloca i1, i1 0
-  %nop13793 = alloca i1, i1 0
-  %nop13794 = alloca i1, i1 0
-  %nop13795 = alloca i1, i1 0
-  %nop13796 = alloca i1, i1 0
-  %nop13797 = alloca i1, i1 0
-  %nop13798 = alloca i1, i1 0
-  %nop13799 = alloca i1, i1 0
-  %nop13800 = alloca i1, i1 0
-  %nop13801 = alloca i1, i1 0
-  %nop13802 = alloca i1, i1 0
-  %nop13803 = alloca i1, i1 0
-  %nop13804 = alloca i1, i1 0
-  %nop13805 = alloca i1, i1 0
-  %nop13806 = alloca i1, i1 0
-  %nop13807 = alloca i1, i1 0
-  %nop13808 = alloca i1, i1 0
-  %nop13809 = alloca i1, i1 0
-  %nop13810 = alloca i1, i1 0
-  %nop13811 = alloca i1, i1 0
-  %nop13812 = alloca i1, i1 0
-  %nop13813 = alloca i1, i1 0
-  %nop13814 = alloca i1, i1 0
-  %nop13815 = alloca i1, i1 0
-  %nop13816 = alloca i1, i1 0
-  %nop13817 = alloca i1, i1 0
-  %nop13818 = alloca i1, i1 0
-  %nop13819 = alloca i1, i1 0
-  %nop13820 = alloca i1, i1 0
-  %nop13821 = alloca i1, i1 0
-  %nop13822 = alloca i1, i1 0
-  %nop13823 = alloca i1, i1 0
-  %nop13824 = alloca i1, i1 0
-  %nop13825 = alloca i1, i1 0
-  %nop13826 = alloca i1, i1 0
-  %nop13827 = alloca i1, i1 0
-  %nop13828 = alloca i1, i1 0
-  %nop13829 = alloca i1, i1 0
-  %nop13830 = alloca i1, i1 0
-  %nop13831 = alloca i1, i1 0
-  %nop13832 = alloca i1, i1 0
-  %nop13833 = alloca i1, i1 0
-  %nop13834 = alloca i1, i1 0
-  %nop13835 = alloca i1, i1 0
-  %nop13836 = alloca i1, i1 0
-  %nop13837 = alloca i1, i1 0
-  %nop13838 = alloca i1, i1 0
-  %nop13839 = alloca i1, i1 0
-  %nop13840 = alloca i1, i1 0
-  %nop13841 = alloca i1, i1 0
-  %nop13842 = alloca i1, i1 0
-  %nop13843 = alloca i1, i1 0
-  %nop13844 = alloca i1, i1 0
-  %nop13845 = alloca i1, i1 0
-  %nop13846 = alloca i1, i1 0
-  %nop13847 = alloca i1, i1 0
-  %nop13848 = alloca i1, i1 0
-  %nop13849 = alloca i1, i1 0
-  %nop13850 = alloca i1, i1 0
-  %nop13851 = alloca i1, i1 0
-  %nop13852 = alloca i1, i1 0
-  %nop13853 = alloca i1, i1 0
-  %nop13854 = alloca i1, i1 0
-  %nop13855 = alloca i1, i1 0
-  %nop13856 = alloca i1, i1 0
-  %nop13857 = alloca i1, i1 0
-  %nop13858 = alloca i1, i1 0
-  %nop13859 = alloca i1, i1 0
-  %nop13860 = alloca i1, i1 0
-  %nop13861 = alloca i1, i1 0
-  %nop13862 = alloca i1, i1 0
-  %nop13863 = alloca i1, i1 0
-  %nop13864 = alloca i1, i1 0
-  %nop13865 = alloca i1, i1 0
-  %nop13866 = alloca i1, i1 0
-  %nop13867 = alloca i1, i1 0
-  %nop13868 = alloca i1, i1 0
-  %nop13869 = alloca i1, i1 0
-  %nop13870 = alloca i1, i1 0
-  %nop13871 = alloca i1, i1 0
-  %nop13872 = alloca i1, i1 0
-  %nop13873 = alloca i1, i1 0
-  %nop13874 = alloca i1, i1 0
-  %nop13875 = alloca i1, i1 0
-  %nop13876 = alloca i1, i1 0
-  %nop13877 = alloca i1, i1 0
-  %nop13878 = alloca i1, i1 0
-  %nop13879 = alloca i1, i1 0
-  %nop13880 = alloca i1, i1 0
-  %nop13881 = alloca i1, i1 0
-  %nop13882 = alloca i1, i1 0
-  %nop13883 = alloca i1, i1 0
-  %nop13884 = alloca i1, i1 0
-  %nop13885 = alloca i1, i1 0
-  %nop13886 = alloca i1, i1 0
-  %nop13887 = alloca i1, i1 0
-  %nop13888 = alloca i1, i1 0
-  %nop13889 = alloca i1, i1 0
-  %nop13890 = alloca i1, i1 0
-  %nop13891 = alloca i1, i1 0
-  %nop13892 = alloca i1, i1 0
-  %nop13893 = alloca i1, i1 0
-  %nop13894 = alloca i1, i1 0
-  %nop13895 = alloca i1, i1 0
-  %nop13896 = alloca i1, i1 0
-  %nop13897 = alloca i1, i1 0
-  %nop13898 = alloca i1, i1 0
-  %nop13899 = alloca i1, i1 0
-  %nop13900 = alloca i1, i1 0
-  %nop13901 = alloca i1, i1 0
-  %nop13902 = alloca i1, i1 0
-  %nop13903 = alloca i1, i1 0
-  %nop13904 = alloca i1, i1 0
-  %nop13905 = alloca i1, i1 0
-  %nop13906 = alloca i1, i1 0
-  %nop13907 = alloca i1, i1 0
-  %nop13908 = alloca i1, i1 0
-  %nop13909 = alloca i1, i1 0
-  %nop13910 = alloca i1, i1 0
-  %nop13911 = alloca i1, i1 0
-  %nop13912 = alloca i1, i1 0
-  %nop13913 = alloca i1, i1 0
-  %nop13914 = alloca i1, i1 0
-  %nop13915 = alloca i1, i1 0
-  %nop13916 = alloca i1, i1 0
-  %nop13917 = alloca i1, i1 0
-  %nop13918 = alloca i1, i1 0
-  %nop13919 = alloca i1, i1 0
-  %nop13920 = alloca i1, i1 0
-  %nop13921 = alloca i1, i1 0
-  %nop13922 = alloca i1, i1 0
-  %nop13923 = alloca i1, i1 0
-  %nop13924 = alloca i1, i1 0
-  %nop13925 = alloca i1, i1 0
-  %nop13926 = alloca i1, i1 0
-  %nop13927 = alloca i1, i1 0
-  %nop13928 = alloca i1, i1 0
-  %nop13929 = alloca i1, i1 0
-  %nop13930 = alloca i1, i1 0
-  %nop13931 = alloca i1, i1 0
-  %nop13932 = alloca i1, i1 0
-  %nop13933 = alloca i1, i1 0
-  %nop13934 = alloca i1, i1 0
-  %nop13935 = alloca i1, i1 0
-  %nop13936 = alloca i1, i1 0
-  %nop13937 = alloca i1, i1 0
-  %nop13938 = alloca i1, i1 0
-  %nop13939 = alloca i1, i1 0
-  %nop13940 = alloca i1, i1 0
-  %nop13941 = alloca i1, i1 0
-  %nop13942 = alloca i1, i1 0
-  %nop13943 = alloca i1, i1 0
-  %nop13944 = alloca i1, i1 0
-  %nop13945 = alloca i1, i1 0
-  %nop13946 = alloca i1, i1 0
-  %nop13947 = alloca i1, i1 0
-  %nop13948 = alloca i1, i1 0
-  %nop13949 = alloca i1, i1 0
-  %nop13950 = alloca i1, i1 0
-  %nop13951 = alloca i1, i1 0
-  %nop13952 = alloca i1, i1 0
-  %nop13953 = alloca i1, i1 0
-  %nop13954 = alloca i1, i1 0
-  %nop13955 = alloca i1, i1 0
-  %nop13956 = alloca i1, i1 0
-  %nop13957 = alloca i1, i1 0
-  %nop13958 = alloca i1, i1 0
-  %nop13959 = alloca i1, i1 0
-  %nop13960 = alloca i1, i1 0
-  %nop13961 = alloca i1, i1 0
-  %nop13962 = alloca i1, i1 0
-  %nop13963 = alloca i1, i1 0
-  %nop13964 = alloca i1, i1 0
-  %nop13965 = alloca i1, i1 0
-  %nop13966 = alloca i1, i1 0
-  %nop13967 = alloca i1, i1 0
-  %nop13968 = alloca i1, i1 0
-  %nop13969 = alloca i1, i1 0
-  %nop13970 = alloca i1, i1 0
-  %nop13971 = alloca i1, i1 0
-  %nop13972 = alloca i1, i1 0
-  %nop13973 = alloca i1, i1 0
-  %nop13974 = alloca i1, i1 0
-  %nop13975 = alloca i1, i1 0
-  %nop13976 = alloca i1, i1 0
-  %nop13977 = alloca i1, i1 0
-  %nop13978 = alloca i1, i1 0
-  %nop13979 = alloca i1, i1 0
-  %nop13980 = alloca i1, i1 0
-  %nop13981 = alloca i1, i1 0
-  %nop13982 = alloca i1, i1 0
-  %nop13983 = alloca i1, i1 0
-  %nop13984 = alloca i1, i1 0
-  %nop13985 = alloca i1, i1 0
-  %nop13986 = alloca i1, i1 0
-  %nop13987 = alloca i1, i1 0
-  %nop13988 = alloca i1, i1 0
-  %nop13989 = alloca i1, i1 0
-  %nop13990 = alloca i1, i1 0
-  %nop13991 = alloca i1, i1 0
-  %nop13992 = alloca i1, i1 0
-  %nop13993 = alloca i1, i1 0
-  %nop13994 = alloca i1, i1 0
-  %nop13995 = alloca i1, i1 0
-  %nop13996 = alloca i1, i1 0
-  %nop13997 = alloca i1, i1 0
-  %nop13998 = alloca i1, i1 0
-  %nop13999 = alloca i1, i1 0
-  %nop14000 = alloca i1, i1 0
-  %nop14001 = alloca i1, i1 0
-  %nop14002 = alloca i1, i1 0
-  %nop14003 = alloca i1, i1 0
-  %nop14004 = alloca i1, i1 0
-  %nop14005 = alloca i1, i1 0
-  %nop14006 = alloca i1, i1 0
-  %nop14007 = alloca i1, i1 0
-  %nop14008 = alloca i1, i1 0
-  %nop14009 = alloca i1, i1 0
-  %nop14010 = alloca i1, i1 0
-  %nop14011 = alloca i1, i1 0
-  %nop14012 = alloca i1, i1 0
-  %nop14013 = alloca i1, i1 0
-  %nop14014 = alloca i1, i1 0
-  %nop14015 = alloca i1, i1 0
-  %nop14016 = alloca i1, i1 0
-  %nop14017 = alloca i1, i1 0
-  %nop14018 = alloca i1, i1 0
-  %nop14019 = alloca i1, i1 0
-  %nop14020 = alloca i1, i1 0
-  %nop14021 = alloca i1, i1 0
-  %nop14022 = alloca i1, i1 0
-  %nop14023 = alloca i1, i1 0
-  %nop14024 = alloca i1, i1 0
-  %nop14025 = alloca i1, i1 0
-  %nop14026 = alloca i1, i1 0
-  %nop14027 = alloca i1, i1 0
-  %nop14028 = alloca i1, i1 0
-  %nop14029 = alloca i1, i1 0
-  %nop14030 = alloca i1, i1 0
-  %nop14031 = alloca i1, i1 0
-  %nop14032 = alloca i1, i1 0
-  %nop14033 = alloca i1, i1 0
-  %nop14034 = alloca i1, i1 0
-  %nop14035 = alloca i1, i1 0
-  %nop14036 = alloca i1, i1 0
-  %nop14037 = alloca i1, i1 0
-  %nop14038 = alloca i1, i1 0
-  %nop14039 = alloca i1, i1 0
-  %nop14040 = alloca i1, i1 0
-  %nop14041 = alloca i1, i1 0
-  %nop14042 = alloca i1, i1 0
-  %nop14043 = alloca i1, i1 0
-  %nop14044 = alloca i1, i1 0
-  %nop14045 = alloca i1, i1 0
-  %nop14046 = alloca i1, i1 0
-  %nop14047 = alloca i1, i1 0
-  %nop14048 = alloca i1, i1 0
-  %nop14049 = alloca i1, i1 0
-  %nop14050 = alloca i1, i1 0
-  %nop14051 = alloca i1, i1 0
-  %nop14052 = alloca i1, i1 0
-  %nop14053 = alloca i1, i1 0
-  %nop14054 = alloca i1, i1 0
-  %nop14055 = alloca i1, i1 0
-  %nop14056 = alloca i1, i1 0
-  %nop14057 = alloca i1, i1 0
-  %nop14058 = alloca i1, i1 0
-  %nop14059 = alloca i1, i1 0
-  %nop14060 = alloca i1, i1 0
-  %nop14061 = alloca i1, i1 0
-  %nop14062 = alloca i1, i1 0
-  %nop14063 = alloca i1, i1 0
-  %nop14064 = alloca i1, i1 0
-  %nop14065 = alloca i1, i1 0
-  %nop14066 = alloca i1, i1 0
-  %nop14067 = alloca i1, i1 0
-  %nop14068 = alloca i1, i1 0
-  %nop14069 = alloca i1, i1 0
-  %nop14070 = alloca i1, i1 0
-  %nop14071 = alloca i1, i1 0
-  %nop14072 = alloca i1, i1 0
-  %nop14073 = alloca i1, i1 0
-  %nop14074 = alloca i1, i1 0
-  %nop14075 = alloca i1, i1 0
-  %nop14076 = alloca i1, i1 0
-  %nop14077 = alloca i1, i1 0
-  %nop14078 = alloca i1, i1 0
-  %nop14079 = alloca i1, i1 0
-  %nop14080 = alloca i1, i1 0
-  %nop14081 = alloca i1, i1 0
-  %nop14082 = alloca i1, i1 0
-  %nop14083 = alloca i1, i1 0
-  %nop14084 = alloca i1, i1 0
-  %nop14085 = alloca i1, i1 0
-  %nop14086 = alloca i1, i1 0
-  %nop14087 = alloca i1, i1 0
-  %nop14088 = alloca i1, i1 0
-  %nop14089 = alloca i1, i1 0
-  %nop14090 = alloca i1, i1 0
-  %nop14091 = alloca i1, i1 0
-  %nop14092 = alloca i1, i1 0
-  %nop14093 = alloca i1, i1 0
-  %nop14094 = alloca i1, i1 0
-  %nop14095 = alloca i1, i1 0
-  %nop14096 = alloca i1, i1 0
-  %nop14097 = alloca i1, i1 0
-  %nop14098 = alloca i1, i1 0
-  %nop14099 = alloca i1, i1 0
-  %nop14100 = alloca i1, i1 0
-  %nop14101 = alloca i1, i1 0
-  %nop14102 = alloca i1, i1 0
-  %nop14103 = alloca i1, i1 0
-  %nop14104 = alloca i1, i1 0
-  %nop14105 = alloca i1, i1 0
-  %nop14106 = alloca i1, i1 0
-  %nop14107 = alloca i1, i1 0
-  %nop14108 = alloca i1, i1 0
-  %nop14109 = alloca i1, i1 0
-  %nop14110 = alloca i1, i1 0
-  %nop14111 = alloca i1, i1 0
-  %nop14112 = alloca i1, i1 0
-  %nop14113 = alloca i1, i1 0
-  %nop14114 = alloca i1, i1 0
-  %nop14115 = alloca i1, i1 0
-  %nop14116 = alloca i1, i1 0
-  %nop14117 = alloca i1, i1 0
-  %nop14118 = alloca i1, i1 0
-  %nop14119 = alloca i1, i1 0
-  %nop14120 = alloca i1, i1 0
-  %nop14121 = alloca i1, i1 0
-  %nop14122 = alloca i1, i1 0
-  %nop14123 = alloca i1, i1 0
-  %nop14124 = alloca i1, i1 0
-  %nop14125 = alloca i1, i1 0
-  %nop14126 = alloca i1, i1 0
-  %nop14127 = alloca i1, i1 0
-  %nop14128 = alloca i1, i1 0
-  %nop14129 = alloca i1, i1 0
-  %nop14130 = alloca i1, i1 0
-  %nop14131 = alloca i1, i1 0
-  %nop14132 = alloca i1, i1 0
-  %nop14133 = alloca i1, i1 0
-  %nop14134 = alloca i1, i1 0
-  %nop14135 = alloca i1, i1 0
-  %nop14136 = alloca i1, i1 0
-  %nop14137 = alloca i1, i1 0
-  %nop14138 = alloca i1, i1 0
-  %nop14139 = alloca i1, i1 0
-  %nop14140 = alloca i1, i1 0
-  %nop14141 = alloca i1, i1 0
-  %nop14142 = alloca i1, i1 0
-  %nop14143 = alloca i1, i1 0
-  %nop14144 = alloca i1, i1 0
-  %nop14145 = alloca i1, i1 0
-  %nop14146 = alloca i1, i1 0
-  %nop14147 = alloca i1, i1 0
-  %nop14148 = alloca i1, i1 0
-  %nop14149 = alloca i1, i1 0
-  %nop14150 = alloca i1, i1 0
-  %nop14151 = alloca i1, i1 0
-  %nop14152 = alloca i1, i1 0
-  %nop14153 = alloca i1, i1 0
-  %nop14154 = alloca i1, i1 0
-  %nop14155 = alloca i1, i1 0
-  %nop14156 = alloca i1, i1 0
-  %nop14157 = alloca i1, i1 0
-  %nop14158 = alloca i1, i1 0
-  %nop14159 = alloca i1, i1 0
-  %nop14160 = alloca i1, i1 0
-  %nop14161 = alloca i1, i1 0
-  %nop14162 = alloca i1, i1 0
-  %nop14163 = alloca i1, i1 0
-  %nop14164 = alloca i1, i1 0
-  %nop14165 = alloca i1, i1 0
-  %nop14166 = alloca i1, i1 0
-  %nop14167 = alloca i1, i1 0
-  %nop14168 = alloca i1, i1 0
-  %nop14169 = alloca i1, i1 0
-  %nop14170 = alloca i1, i1 0
-  %nop14171 = alloca i1, i1 0
-  %nop14172 = alloca i1, i1 0
-  %nop14173 = alloca i1, i1 0
-  %nop14174 = alloca i1, i1 0
-  %nop14175 = alloca i1, i1 0
-  %nop14176 = alloca i1, i1 0
-  %nop14177 = alloca i1, i1 0
-  %nop14178 = alloca i1, i1 0
-  %nop14179 = alloca i1, i1 0
-  %nop14180 = alloca i1, i1 0
-  %nop14181 = alloca i1, i1 0
-  %nop14182 = alloca i1, i1 0
-  %nop14183 = alloca i1, i1 0
-  %nop14184 = alloca i1, i1 0
-  %nop14185 = alloca i1, i1 0
-  %nop14186 = alloca i1, i1 0
-  %nop14187 = alloca i1, i1 0
-  %nop14188 = alloca i1, i1 0
-  %nop14189 = alloca i1, i1 0
-  %nop14190 = alloca i1, i1 0
-  %nop14191 = alloca i1, i1 0
-  %nop14192 = alloca i1, i1 0
-  %nop14193 = alloca i1, i1 0
-  %nop14194 = alloca i1, i1 0
-  %nop14195 = alloca i1, i1 0
-  %nop14196 = alloca i1, i1 0
-  %nop14197 = alloca i1, i1 0
-  %nop14198 = alloca i1, i1 0
-  %nop14199 = alloca i1, i1 0
-  %nop14200 = alloca i1, i1 0
-  %nop14201 = alloca i1, i1 0
-  %nop14202 = alloca i1, i1 0
-  %nop14203 = alloca i1, i1 0
-  %nop14204 = alloca i1, i1 0
-  %nop14205 = alloca i1, i1 0
-  %nop14206 = alloca i1, i1 0
-  %nop14207 = alloca i1, i1 0
-  %nop14208 = alloca i1, i1 0
-  %nop14209 = alloca i1, i1 0
-  %nop14210 = alloca i1, i1 0
-  %nop14211 = alloca i1, i1 0
-  %nop14212 = alloca i1, i1 0
-  %nop14213 = alloca i1, i1 0
-  %nop14214 = alloca i1, i1 0
-  %nop14215 = alloca i1, i1 0
-  %nop14216 = alloca i1, i1 0
-  %nop14217 = alloca i1, i1 0
-  %nop14218 = alloca i1, i1 0
-  %nop14219 = alloca i1, i1 0
-  %nop14220 = alloca i1, i1 0
-  %nop14221 = alloca i1, i1 0
-  %nop14222 = alloca i1, i1 0
-  %nop14223 = alloca i1, i1 0
-  %nop14224 = alloca i1, i1 0
-  %nop14225 = alloca i1, i1 0
-  %nop14226 = alloca i1, i1 0
-  %nop14227 = alloca i1, i1 0
-  %nop14228 = alloca i1, i1 0
-  %nop14229 = alloca i1, i1 0
-  %nop14230 = alloca i1, i1 0
-  %nop14231 = alloca i1, i1 0
-  %nop14232 = alloca i1, i1 0
-  %nop14233 = alloca i1, i1 0
-  %nop14234 = alloca i1, i1 0
-  %nop14235 = alloca i1, i1 0
-  %nop14236 = alloca i1, i1 0
-  %nop14237 = alloca i1, i1 0
-  %nop14238 = alloca i1, i1 0
-  %nop14239 = alloca i1, i1 0
-  %nop14240 = alloca i1, i1 0
-  %nop14241 = alloca i1, i1 0
-  %nop14242 = alloca i1, i1 0
-  %nop14243 = alloca i1, i1 0
-  %nop14244 = alloca i1, i1 0
-  %nop14245 = alloca i1, i1 0
-  %nop14246 = alloca i1, i1 0
-  %nop14247 = alloca i1, i1 0
-  %nop14248 = alloca i1, i1 0
-  %nop14249 = alloca i1, i1 0
-  %nop14250 = alloca i1, i1 0
-  %nop14251 = alloca i1, i1 0
-  %nop14252 = alloca i1, i1 0
-  %nop14253 = alloca i1, i1 0
-  %nop14254 = alloca i1, i1 0
-  %nop14255 = alloca i1, i1 0
-  %nop14256 = alloca i1, i1 0
-  %nop14257 = alloca i1, i1 0
-  %nop14258 = alloca i1, i1 0
-  %nop14259 = alloca i1, i1 0
-  %nop14260 = alloca i1, i1 0
-  %nop14261 = alloca i1, i1 0
-  %nop14262 = alloca i1, i1 0
-  %nop14263 = alloca i1, i1 0
-  %nop14264 = alloca i1, i1 0
-  %nop14265 = alloca i1, i1 0
-  %nop14266 = alloca i1, i1 0
-  %nop14267 = alloca i1, i1 0
-  %nop14268 = alloca i1, i1 0
-  %nop14269 = alloca i1, i1 0
-  %nop14270 = alloca i1, i1 0
-  %nop14271 = alloca i1, i1 0
-  %nop14272 = alloca i1, i1 0
-  %nop14273 = alloca i1, i1 0
-  %nop14274 = alloca i1, i1 0
-  %nop14275 = alloca i1, i1 0
-  %nop14276 = alloca i1, i1 0
-  %nop14277 = alloca i1, i1 0
-  %nop14278 = alloca i1, i1 0
-  %nop14279 = alloca i1, i1 0
-  %nop14280 = alloca i1, i1 0
-  %nop14281 = alloca i1, i1 0
-  %nop14282 = alloca i1, i1 0
-  %nop14283 = alloca i1, i1 0
-  %nop14284 = alloca i1, i1 0
-  %nop14285 = alloca i1, i1 0
-  %nop14286 = alloca i1, i1 0
-  %nop14287 = alloca i1, i1 0
-  %nop14288 = alloca i1, i1 0
-  %nop14289 = alloca i1, i1 0
-  %nop14290 = alloca i1, i1 0
-  %nop14291 = alloca i1, i1 0
-  %nop14292 = alloca i1, i1 0
-  %nop14293 = alloca i1, i1 0
-  %nop14294 = alloca i1, i1 0
-  %nop14295 = alloca i1, i1 0
-  %nop14296 = alloca i1, i1 0
-  %nop14297 = alloca i1, i1 0
-  %nop14298 = alloca i1, i1 0
-  %nop14299 = alloca i1, i1 0
-  %nop14300 = alloca i1, i1 0
-  %nop14301 = alloca i1, i1 0
-  %nop14302 = alloca i1, i1 0
-  %nop14303 = alloca i1, i1 0
-  %nop14304 = alloca i1, i1 0
-  %nop14305 = alloca i1, i1 0
-  %nop14306 = alloca i1, i1 0
-  %nop14307 = alloca i1, i1 0
-  %nop14308 = alloca i1, i1 0
-  %nop14309 = alloca i1, i1 0
-  %nop14310 = alloca i1, i1 0
-  %nop14311 = alloca i1, i1 0
-  %nop14312 = alloca i1, i1 0
-  %nop14313 = alloca i1, i1 0
-  %nop14314 = alloca i1, i1 0
-  %nop14315 = alloca i1, i1 0
-  %nop14316 = alloca i1, i1 0
-  %nop14317 = alloca i1, i1 0
-  %nop14318 = alloca i1, i1 0
-  %nop14319 = alloca i1, i1 0
-  %nop14320 = alloca i1, i1 0
-  %nop14321 = alloca i1, i1 0
-  %nop14322 = alloca i1, i1 0
-  %nop14323 = alloca i1, i1 0
-  %nop14324 = alloca i1, i1 0
-  %nop14325 = alloca i1, i1 0
-  %nop14326 = alloca i1, i1 0
-  %nop14327 = alloca i1, i1 0
-  %nop14328 = alloca i1, i1 0
-  %nop14329 = alloca i1, i1 0
-  %nop14330 = alloca i1, i1 0
-  %nop14331 = alloca i1, i1 0
-  %nop14332 = alloca i1, i1 0
-  %nop14333 = alloca i1, i1 0
-  %nop14334 = alloca i1, i1 0
-  %nop14335 = alloca i1, i1 0
-  %nop14336 = alloca i1, i1 0
-  %nop14337 = alloca i1, i1 0
-  %nop14338 = alloca i1, i1 0
-  %nop14339 = alloca i1, i1 0
-  %nop14340 = alloca i1, i1 0
-  %nop14341 = alloca i1, i1 0
-  %nop14342 = alloca i1, i1 0
-  %nop14343 = alloca i1, i1 0
-  %nop14344 = alloca i1, i1 0
-  %nop14345 = alloca i1, i1 0
-  %nop14346 = alloca i1, i1 0
-  %nop14347 = alloca i1, i1 0
-  %nop14348 = alloca i1, i1 0
-  %nop14349 = alloca i1, i1 0
-  %nop14350 = alloca i1, i1 0
-  %nop14351 = alloca i1, i1 0
-  %nop14352 = alloca i1, i1 0
-  %nop14353 = alloca i1, i1 0
-  %nop14354 = alloca i1, i1 0
-  %nop14355 = alloca i1, i1 0
-  %nop14356 = alloca i1, i1 0
-  %nop14357 = alloca i1, i1 0
-  %nop14358 = alloca i1, i1 0
-  %nop14359 = alloca i1, i1 0
-  %nop14360 = alloca i1, i1 0
-  %nop14361 = alloca i1, i1 0
-  %nop14362 = alloca i1, i1 0
-  %nop14363 = alloca i1, i1 0
-  %nop14364 = alloca i1, i1 0
-  %nop14365 = alloca i1, i1 0
-  %nop14366 = alloca i1, i1 0
-  %nop14367 = alloca i1, i1 0
-  %nop14368 = alloca i1, i1 0
-  %nop14369 = alloca i1, i1 0
-  %nop14370 = alloca i1, i1 0
-  %nop14371 = alloca i1, i1 0
-  %nop14372 = alloca i1, i1 0
-  %nop14373 = alloca i1, i1 0
-  %nop14374 = alloca i1, i1 0
-  %nop14375 = alloca i1, i1 0
-  %nop14376 = alloca i1, i1 0
-  %nop14377 = alloca i1, i1 0
-  %nop14378 = alloca i1, i1 0
-  %nop14379 = alloca i1, i1 0
-  %nop14380 = alloca i1, i1 0
-  %nop14381 = alloca i1, i1 0
-  %nop14382 = alloca i1, i1 0
-  %nop14383 = alloca i1, i1 0
-  %nop14384 = alloca i1, i1 0
-  %nop14385 = alloca i1, i1 0
-  %nop14386 = alloca i1, i1 0
-  %nop14387 = alloca i1, i1 0
-  %nop14388 = alloca i1, i1 0
-  %nop14389 = alloca i1, i1 0
-  %nop14390 = alloca i1, i1 0
-  %nop14391 = alloca i1, i1 0
-  %nop14392 = alloca i1, i1 0
-  %nop14393 = alloca i1, i1 0
-  %nop14394 = alloca i1, i1 0
-  %nop14395 = alloca i1, i1 0
-  %nop14396 = alloca i1, i1 0
-  %nop14397 = alloca i1, i1 0
-  %nop14398 = alloca i1, i1 0
-  %nop14399 = alloca i1, i1 0
-  %nop14400 = alloca i1, i1 0
-  %nop14401 = alloca i1, i1 0
-  %nop14402 = alloca i1, i1 0
-  %nop14403 = alloca i1, i1 0
-  %nop14404 = alloca i1, i1 0
-  %nop14405 = alloca i1, i1 0
-  %nop14406 = alloca i1, i1 0
-  %nop14407 = alloca i1, i1 0
-  %nop14408 = alloca i1, i1 0
-  %nop14409 = alloca i1, i1 0
-  %nop14410 = alloca i1, i1 0
-  %nop14411 = alloca i1, i1 0
-  %nop14412 = alloca i1, i1 0
-  %nop14413 = alloca i1, i1 0
-  %nop14414 = alloca i1, i1 0
-  %nop14415 = alloca i1, i1 0
-  %nop14416 = alloca i1, i1 0
-  %nop14417 = alloca i1, i1 0
-  %nop14418 = alloca i1, i1 0
-  %nop14419 = alloca i1, i1 0
-  %nop14420 = alloca i1, i1 0
-  %nop14421 = alloca i1, i1 0
-  %nop14422 = alloca i1, i1 0
-  %nop14423 = alloca i1, i1 0
-  %nop14424 = alloca i1, i1 0
-  %nop14425 = alloca i1, i1 0
-  %nop14426 = alloca i1, i1 0
-  %nop14427 = alloca i1, i1 0
-  %nop14428 = alloca i1, i1 0
-  %nop14429 = alloca i1, i1 0
-  %nop14430 = alloca i1, i1 0
-  %nop14431 = alloca i1, i1 0
-  %nop14432 = alloca i1, i1 0
-  %nop14433 = alloca i1, i1 0
-  %nop14434 = alloca i1, i1 0
-  %nop14435 = alloca i1, i1 0
-  %nop14436 = alloca i1, i1 0
-  %nop14437 = alloca i1, i1 0
-  %nop14438 = alloca i1, i1 0
-  %nop14439 = alloca i1, i1 0
-  %nop14440 = alloca i1, i1 0
-  %nop14441 = alloca i1, i1 0
-  %nop14442 = alloca i1, i1 0
-  %nop14443 = alloca i1, i1 0
-  %nop14444 = alloca i1, i1 0
-  %nop14445 = alloca i1, i1 0
-  %nop14446 = alloca i1, i1 0
-  %nop14447 = alloca i1, i1 0
-  %nop14448 = alloca i1, i1 0
-  %nop14449 = alloca i1, i1 0
-  %nop14450 = alloca i1, i1 0
-  %nop14451 = alloca i1, i1 0
-  %nop14452 = alloca i1, i1 0
-  %nop14453 = alloca i1, i1 0
-  %nop14454 = alloca i1, i1 0
-  %nop14455 = alloca i1, i1 0
-  %nop14456 = alloca i1, i1 0
-  %nop14457 = alloca i1, i1 0
-  %nop14458 = alloca i1, i1 0
-  %nop14459 = alloca i1, i1 0
-  %nop14460 = alloca i1, i1 0
-  %nop14461 = alloca i1, i1 0
-  %nop14462 = alloca i1, i1 0
-  %nop14463 = alloca i1, i1 0
-  %nop14464 = alloca i1, i1 0
-  %nop14465 = alloca i1, i1 0
-  %nop14466 = alloca i1, i1 0
-  %nop14467 = alloca i1, i1 0
-  %nop14468 = alloca i1, i1 0
-  %nop14469 = alloca i1, i1 0
-  %nop14470 = alloca i1, i1 0
-  %nop14471 = alloca i1, i1 0
-  %nop14472 = alloca i1, i1 0
-  %nop14473 = alloca i1, i1 0
-  %nop14474 = alloca i1, i1 0
-  %nop14475 = alloca i1, i1 0
-  %nop14476 = alloca i1, i1 0
-  %nop14477 = alloca i1, i1 0
-  %nop14478 = alloca i1, i1 0
-  %nop14479 = alloca i1, i1 0
-  %nop14480 = alloca i1, i1 0
-  %nop14481 = alloca i1, i1 0
-  %nop14482 = alloca i1, i1 0
-  %nop14483 = alloca i1, i1 0
-  %nop14484 = alloca i1, i1 0
-  %nop14485 = alloca i1, i1 0
-  %nop14486 = alloca i1, i1 0
-  %nop14487 = alloca i1, i1 0
-  %nop14488 = alloca i1, i1 0
-  %nop14489 = alloca i1, i1 0
-  %nop14490 = alloca i1, i1 0
-  %nop14491 = alloca i1, i1 0
-  %nop14492 = alloca i1, i1 0
-  %nop14493 = alloca i1, i1 0
-  %nop14494 = alloca i1, i1 0
-  %nop14495 = alloca i1, i1 0
-  %nop14496 = alloca i1, i1 0
-  %nop14497 = alloca i1, i1 0
-  %nop14498 = alloca i1, i1 0
-  %nop14499 = alloca i1, i1 0
-  %nop14500 = alloca i1, i1 0
-  %nop14501 = alloca i1, i1 0
-  %nop14502 = alloca i1, i1 0
-  %nop14503 = alloca i1, i1 0
-  %nop14504 = alloca i1, i1 0
-  %nop14505 = alloca i1, i1 0
-  %nop14506 = alloca i1, i1 0
-  %nop14507 = alloca i1, i1 0
-  %nop14508 = alloca i1, i1 0
-  %nop14509 = alloca i1, i1 0
-  %nop14510 = alloca i1, i1 0
-  %nop14511 = alloca i1, i1 0
-  %nop14512 = alloca i1, i1 0
-  %nop14513 = alloca i1, i1 0
-  %nop14514 = alloca i1, i1 0
-  %nop14515 = alloca i1, i1 0
-  %nop14516 = alloca i1, i1 0
-  %nop14517 = alloca i1, i1 0
-  %nop14518 = alloca i1, i1 0
-  %nop14519 = alloca i1, i1 0
-  %nop14520 = alloca i1, i1 0
-  %nop14521 = alloca i1, i1 0
-  %nop14522 = alloca i1, i1 0
-  %nop14523 = alloca i1, i1 0
-  %nop14524 = alloca i1, i1 0
-  %nop14525 = alloca i1, i1 0
-  %nop14526 = alloca i1, i1 0
-  %nop14527 = alloca i1, i1 0
-  %nop14528 = alloca i1, i1 0
-  %nop14529 = alloca i1, i1 0
-  %nop14530 = alloca i1, i1 0
-  %nop14531 = alloca i1, i1 0
-  %nop14532 = alloca i1, i1 0
-  %nop14533 = alloca i1, i1 0
-  %nop14534 = alloca i1, i1 0
-  %nop14535 = alloca i1, i1 0
-  %nop14536 = alloca i1, i1 0
-  %nop14537 = alloca i1, i1 0
-  %nop14538 = alloca i1, i1 0
-  %nop14539 = alloca i1, i1 0
-  %nop14540 = alloca i1, i1 0
-  %nop14541 = alloca i1, i1 0
-  %nop14542 = alloca i1, i1 0
-  %nop14543 = alloca i1, i1 0
-  %nop14544 = alloca i1, i1 0
-  %nop14545 = alloca i1, i1 0
-  %nop14546 = alloca i1, i1 0
-  %nop14547 = alloca i1, i1 0
-  %nop14548 = alloca i1, i1 0
-  %nop14549 = alloca i1, i1 0
-  %nop14550 = alloca i1, i1 0
-  %nop14551 = alloca i1, i1 0
-  %nop14552 = alloca i1, i1 0
-  %nop14553 = alloca i1, i1 0
-  %nop14554 = alloca i1, i1 0
-  %nop14555 = alloca i1, i1 0
-  %nop14556 = alloca i1, i1 0
-  %nop14557 = alloca i1, i1 0
-  %nop14558 = alloca i1, i1 0
-  %nop14559 = alloca i1, i1 0
-  %nop14560 = alloca i1, i1 0
-  %nop14561 = alloca i1, i1 0
-  %nop14562 = alloca i1, i1 0
-  %nop14563 = alloca i1, i1 0
-  %nop14564 = alloca i1, i1 0
-  %nop14565 = alloca i1, i1 0
-  %nop14566 = alloca i1, i1 0
-  %nop14567 = alloca i1, i1 0
-  %nop14568 = alloca i1, i1 0
-  %nop14569 = alloca i1, i1 0
-  %nop14570 = alloca i1, i1 0
-  %nop14571 = alloca i1, i1 0
-  %nop14572 = alloca i1, i1 0
-  %nop14573 = alloca i1, i1 0
-  %nop14574 = alloca i1, i1 0
-  %nop14575 = alloca i1, i1 0
-  %nop14576 = alloca i1, i1 0
-  %nop14577 = alloca i1, i1 0
-  %nop14578 = alloca i1, i1 0
-  %nop14579 = alloca i1, i1 0
-  %nop14580 = alloca i1, i1 0
-  %nop14581 = alloca i1, i1 0
-  %nop14582 = alloca i1, i1 0
-  %nop14583 = alloca i1, i1 0
-  %nop14584 = alloca i1, i1 0
-  %nop14585 = alloca i1, i1 0
-  %nop14586 = alloca i1, i1 0
-  %nop14587 = alloca i1, i1 0
-  %nop14588 = alloca i1, i1 0
-  %nop14589 = alloca i1, i1 0
-  %nop14590 = alloca i1, i1 0
-  %nop14591 = alloca i1, i1 0
-  %nop14592 = alloca i1, i1 0
-  %nop14593 = alloca i1, i1 0
-  %nop14594 = alloca i1, i1 0
-  %nop14595 = alloca i1, i1 0
-  %nop14596 = alloca i1, i1 0
-  %nop14597 = alloca i1, i1 0
-  %nop14598 = alloca i1, i1 0
-  %nop14599 = alloca i1, i1 0
-  %nop14600 = alloca i1, i1 0
-  %nop14601 = alloca i1, i1 0
-  %nop14602 = alloca i1, i1 0
-  %nop14603 = alloca i1, i1 0
-  %nop14604 = alloca i1, i1 0
-  %nop14605 = alloca i1, i1 0
-  %nop14606 = alloca i1, i1 0
-  %nop14607 = alloca i1, i1 0
-  %nop14608 = alloca i1, i1 0
-  %nop14609 = alloca i1, i1 0
-  %nop14610 = alloca i1, i1 0
-  %nop14611 = alloca i1, i1 0
-  %nop14612 = alloca i1, i1 0
-  %nop14613 = alloca i1, i1 0
-  %nop14614 = alloca i1, i1 0
-  %nop14615 = alloca i1, i1 0
-  %nop14616 = alloca i1, i1 0
-  %nop14617 = alloca i1, i1 0
-  %nop14618 = alloca i1, i1 0
-  %nop14619 = alloca i1, i1 0
-  %nop14620 = alloca i1, i1 0
-  %nop14621 = alloca i1, i1 0
-  %nop14622 = alloca i1, i1 0
-  %nop14623 = alloca i1, i1 0
-  %nop14624 = alloca i1, i1 0
-  %nop14625 = alloca i1, i1 0
-  %nop14626 = alloca i1, i1 0
-  %nop14627 = alloca i1, i1 0
-  %nop14628 = alloca i1, i1 0
-  %nop14629 = alloca i1, i1 0
-  %nop14630 = alloca i1, i1 0
-  %nop14631 = alloca i1, i1 0
-  %nop14632 = alloca i1, i1 0
-  %nop14633 = alloca i1, i1 0
-  %nop14634 = alloca i1, i1 0
-  %nop14635 = alloca i1, i1 0
-  %nop14636 = alloca i1, i1 0
-  %nop14637 = alloca i1, i1 0
-  %nop14638 = alloca i1, i1 0
-  %nop14639 = alloca i1, i1 0
-  %nop14640 = alloca i1, i1 0
-  %nop14641 = alloca i1, i1 0
-  %nop14642 = alloca i1, i1 0
-  %nop14643 = alloca i1, i1 0
-  %nop14644 = alloca i1, i1 0
-  %nop14645 = alloca i1, i1 0
-  %nop14646 = alloca i1, i1 0
-  %nop14647 = alloca i1, i1 0
-  %nop14648 = alloca i1, i1 0
-  %nop14649 = alloca i1, i1 0
-  %nop14650 = alloca i1, i1 0
-  %nop14651 = alloca i1, i1 0
-  %nop14652 = alloca i1, i1 0
-  %nop14653 = alloca i1, i1 0
-  %nop14654 = alloca i1, i1 0
-  %nop14655 = alloca i1, i1 0
-  %nop14656 = alloca i1, i1 0
-  %nop14657 = alloca i1, i1 0
-  %nop14658 = alloca i1, i1 0
-  %nop14659 = alloca i1, i1 0
-  %nop14660 = alloca i1, i1 0
-  %nop14661 = alloca i1, i1 0
-  %nop14662 = alloca i1, i1 0
-  %nop14663 = alloca i1, i1 0
-  %nop14664 = alloca i1, i1 0
-  %nop14665 = alloca i1, i1 0
-  %nop14666 = alloca i1, i1 0
-  %nop14667 = alloca i1, i1 0
-  %nop14668 = alloca i1, i1 0
-  %nop14669 = alloca i1, i1 0
-  %nop14670 = alloca i1, i1 0
-  %nop14671 = alloca i1, i1 0
-  %nop14672 = alloca i1, i1 0
-  %nop14673 = alloca i1, i1 0
-  %nop14674 = alloca i1, i1 0
-  %nop14675 = alloca i1, i1 0
-  %nop14676 = alloca i1, i1 0
-  %nop14677 = alloca i1, i1 0
-  %nop14678 = alloca i1, i1 0
-  %nop14679 = alloca i1, i1 0
-  %nop14680 = alloca i1, i1 0
-  %nop14681 = alloca i1, i1 0
-  %nop14682 = alloca i1, i1 0
-  %nop14683 = alloca i1, i1 0
-  %nop14684 = alloca i1, i1 0
-  %nop14685 = alloca i1, i1 0
-  %nop14686 = alloca i1, i1 0
-  %nop14687 = alloca i1, i1 0
-  %nop14688 = alloca i1, i1 0
-  %nop14689 = alloca i1, i1 0
-  %nop14690 = alloca i1, i1 0
-  %nop14691 = alloca i1, i1 0
-  %nop14692 = alloca i1, i1 0
-  %nop14693 = alloca i1, i1 0
-  %nop14694 = alloca i1, i1 0
-  %nop14695 = alloca i1, i1 0
-  %nop14696 = alloca i1, i1 0
-  %nop14697 = alloca i1, i1 0
-  %nop14698 = alloca i1, i1 0
-  %nop14699 = alloca i1, i1 0
-  %nop14700 = alloca i1, i1 0
-  %nop14701 = alloca i1, i1 0
-  %nop14702 = alloca i1, i1 0
-  %nop14703 = alloca i1, i1 0
-  %nop14704 = alloca i1, i1 0
-  %nop14705 = alloca i1, i1 0
-  %nop14706 = alloca i1, i1 0
-  %nop14707 = alloca i1, i1 0
-  %nop14708 = alloca i1, i1 0
-  %nop14709 = alloca i1, i1 0
-  %nop14710 = alloca i1, i1 0
-  %nop14711 = alloca i1, i1 0
-  %nop14712 = alloca i1, i1 0
-  %nop14713 = alloca i1, i1 0
-  %nop14714 = alloca i1, i1 0
-  %nop14715 = alloca i1, i1 0
-  %nop14716 = alloca i1, i1 0
-  %nop14717 = alloca i1, i1 0
-  %nop14718 = alloca i1, i1 0
-  %nop14719 = alloca i1, i1 0
-  %nop14720 = alloca i1, i1 0
-  %nop14721 = alloca i1, i1 0
-  %nop14722 = alloca i1, i1 0
-  %nop14723 = alloca i1, i1 0
-  %nop14724 = alloca i1, i1 0
-  %nop14725 = alloca i1, i1 0
-  %nop14726 = alloca i1, i1 0
-  %nop14727 = alloca i1, i1 0
-  %nop14728 = alloca i1, i1 0
-  %nop14729 = alloca i1, i1 0
-  %nop14730 = alloca i1, i1 0
-  %nop14731 = alloca i1, i1 0
-  %nop14732 = alloca i1, i1 0
-  %nop14733 = alloca i1, i1 0
-  %nop14734 = alloca i1, i1 0
-  %nop14735 = alloca i1, i1 0
-  %nop14736 = alloca i1, i1 0
-  %nop14737 = alloca i1, i1 0
-  %nop14738 = alloca i1, i1 0
-  %nop14739 = alloca i1, i1 0
-  %nop14740 = alloca i1, i1 0
-  %nop14741 = alloca i1, i1 0
-  %nop14742 = alloca i1, i1 0
-  %nop14743 = alloca i1, i1 0
-  %nop14744 = alloca i1, i1 0
-  %nop14745 = alloca i1, i1 0
-  %nop14746 = alloca i1, i1 0
-  %nop14747 = alloca i1, i1 0
-  %nop14748 = alloca i1, i1 0
-  %nop14749 = alloca i1, i1 0
-  %nop14750 = alloca i1, i1 0
-  %nop14751 = alloca i1, i1 0
-  %nop14752 = alloca i1, i1 0
-  %nop14753 = alloca i1, i1 0
-  %nop14754 = alloca i1, i1 0
-  %nop14755 = alloca i1, i1 0
-  %nop14756 = alloca i1, i1 0
-  %nop14757 = alloca i1, i1 0
-  %nop14758 = alloca i1, i1 0
-  %nop14759 = alloca i1, i1 0
-  %nop14760 = alloca i1, i1 0
-  %nop14761 = alloca i1, i1 0
-  %nop14762 = alloca i1, i1 0
-  %nop14763 = alloca i1, i1 0
-  %nop14764 = alloca i1, i1 0
-  %nop14765 = alloca i1, i1 0
-  %nop14766 = alloca i1, i1 0
-  %nop14767 = alloca i1, i1 0
-  %nop14768 = alloca i1, i1 0
-  %nop14769 = alloca i1, i1 0
-  %nop14770 = alloca i1, i1 0
-  %nop14771 = alloca i1, i1 0
-  %nop14772 = alloca i1, i1 0
-  %nop14773 = alloca i1, i1 0
-  %nop14774 = alloca i1, i1 0
-  %nop14775 = alloca i1, i1 0
-  %nop14776 = alloca i1, i1 0
-  %nop14777 = alloca i1, i1 0
-  %nop14778 = alloca i1, i1 0
-  %nop14779 = alloca i1, i1 0
-  %nop14780 = alloca i1, i1 0
-  %nop14781 = alloca i1, i1 0
-  %nop14782 = alloca i1, i1 0
-  %nop14783 = alloca i1, i1 0
-  %nop14784 = alloca i1, i1 0
-  %nop14785 = alloca i1, i1 0
-  %nop14786 = alloca i1, i1 0
-  %nop14787 = alloca i1, i1 0
-  %nop14788 = alloca i1, i1 0
-  %nop14789 = alloca i1, i1 0
-  %nop14790 = alloca i1, i1 0
-  %nop14791 = alloca i1, i1 0
-  %nop14792 = alloca i1, i1 0
-  %nop14793 = alloca i1, i1 0
-  %nop14794 = alloca i1, i1 0
-  %nop14795 = alloca i1, i1 0
-  %nop14796 = alloca i1, i1 0
-  %nop14797 = alloca i1, i1 0
-  %nop14798 = alloca i1, i1 0
-  %nop14799 = alloca i1, i1 0
-  %nop14800 = alloca i1, i1 0
-  %nop14801 = alloca i1, i1 0
-  %nop14802 = alloca i1, i1 0
-  %nop14803 = alloca i1, i1 0
-  %nop14804 = alloca i1, i1 0
-  %nop14805 = alloca i1, i1 0
-  %nop14806 = alloca i1, i1 0
-  %nop14807 = alloca i1, i1 0
-  %nop14808 = alloca i1, i1 0
-  %nop14809 = alloca i1, i1 0
-  %nop14810 = alloca i1, i1 0
-  %nop14811 = alloca i1, i1 0
-  %nop14812 = alloca i1, i1 0
-  %nop14813 = alloca i1, i1 0
-  %nop14814 = alloca i1, i1 0
-  %nop14815 = alloca i1, i1 0
-  %nop14816 = alloca i1, i1 0
-  %nop14817 = alloca i1, i1 0
-  %nop14818 = alloca i1, i1 0
-  %nop14819 = alloca i1, i1 0
-  %nop14820 = alloca i1, i1 0
-  %nop14821 = alloca i1, i1 0
-  %nop14822 = alloca i1, i1 0
-  %nop14823 = alloca i1, i1 0
-  %nop14824 = alloca i1, i1 0
-  %nop14825 = alloca i1, i1 0
-  %nop14826 = alloca i1, i1 0
-  %nop14827 = alloca i1, i1 0
-  %nop14828 = alloca i1, i1 0
-  %nop14829 = alloca i1, i1 0
-  %nop14830 = alloca i1, i1 0
-  %nop14831 = alloca i1, i1 0
-  %nop14832 = alloca i1, i1 0
-  %nop14833 = alloca i1, i1 0
-  %nop14834 = alloca i1, i1 0
-  %nop14835 = alloca i1, i1 0
-  %nop14836 = alloca i1, i1 0
-  %nop14837 = alloca i1, i1 0
-  %nop14838 = alloca i1, i1 0
-  %nop14839 = alloca i1, i1 0
-  %nop14840 = alloca i1, i1 0
-  %nop14841 = alloca i1, i1 0
-  %nop14842 = alloca i1, i1 0
-  %nop14843 = alloca i1, i1 0
-  %nop14844 = alloca i1, i1 0
-  %nop14845 = alloca i1, i1 0
-  %nop14846 = alloca i1, i1 0
-  %nop14847 = alloca i1, i1 0
-  %nop14848 = alloca i1, i1 0
-  %nop14849 = alloca i1, i1 0
-  %nop14850 = alloca i1, i1 0
-  %nop14851 = alloca i1, i1 0
-  %nop14852 = alloca i1, i1 0
-  %nop14853 = alloca i1, i1 0
-  %nop14854 = alloca i1, i1 0
-  %nop14855 = alloca i1, i1 0
-  %nop14856 = alloca i1, i1 0
-  %nop14857 = alloca i1, i1 0
-  %nop14858 = alloca i1, i1 0
-  %nop14859 = alloca i1, i1 0
-  %nop14860 = alloca i1, i1 0
-  %nop14861 = alloca i1, i1 0
-  %nop14862 = alloca i1, i1 0
-  %nop14863 = alloca i1, i1 0
-  %nop14864 = alloca i1, i1 0
-  %nop14865 = alloca i1, i1 0
-  %nop14866 = alloca i1, i1 0
-  %nop14867 = alloca i1, i1 0
-  %nop14868 = alloca i1, i1 0
-  %nop14869 = alloca i1, i1 0
-  %nop14870 = alloca i1, i1 0
-  %nop14871 = alloca i1, i1 0
-  %nop14872 = alloca i1, i1 0
-  %nop14873 = alloca i1, i1 0
-  %nop14874 = alloca i1, i1 0
-  %nop14875 = alloca i1, i1 0
-  %nop14876 = alloca i1, i1 0
-  %nop14877 = alloca i1, i1 0
-  %nop14878 = alloca i1, i1 0
-  %nop14879 = alloca i1, i1 0
-  %nop14880 = alloca i1, i1 0
-  %nop14881 = alloca i1, i1 0
-  %nop14882 = alloca i1, i1 0
-  %nop14883 = alloca i1, i1 0
-  %nop14884 = alloca i1, i1 0
-  %nop14885 = alloca i1, i1 0
-  %nop14886 = alloca i1, i1 0
-  %nop14887 = alloca i1, i1 0
-  %nop14888 = alloca i1, i1 0
-  %nop14889 = alloca i1, i1 0
-  %nop14890 = alloca i1, i1 0
-  %nop14891 = alloca i1, i1 0
-  %nop14892 = alloca i1, i1 0
-  %nop14893 = alloca i1, i1 0
-  %nop14894 = alloca i1, i1 0
-  %nop14895 = alloca i1, i1 0
-  %nop14896 = alloca i1, i1 0
-  %nop14897 = alloca i1, i1 0
-  %nop14898 = alloca i1, i1 0
-  %nop14899 = alloca i1, i1 0
-  %nop14900 = alloca i1, i1 0
-  %nop14901 = alloca i1, i1 0
-  %nop14902 = alloca i1, i1 0
-  %nop14903 = alloca i1, i1 0
-  %nop14904 = alloca i1, i1 0
-  %nop14905 = alloca i1, i1 0
-  %nop14906 = alloca i1, i1 0
-  %nop14907 = alloca i1, i1 0
-  %nop14908 = alloca i1, i1 0
-  %nop14909 = alloca i1, i1 0
-  %nop14910 = alloca i1, i1 0
-  %nop14911 = alloca i1, i1 0
-  %nop14912 = alloca i1, i1 0
-  %nop14913 = alloca i1, i1 0
-  %nop14914 = alloca i1, i1 0
-  %nop14915 = alloca i1, i1 0
-  %nop14916 = alloca i1, i1 0
-  %nop14917 = alloca i1, i1 0
-  %nop14918 = alloca i1, i1 0
-  %nop14919 = alloca i1, i1 0
-  %nop14920 = alloca i1, i1 0
-  %nop14921 = alloca i1, i1 0
-  %nop14922 = alloca i1, i1 0
-  %nop14923 = alloca i1, i1 0
-  %nop14924 = alloca i1, i1 0
-  %nop14925 = alloca i1, i1 0
-  %nop14926 = alloca i1, i1 0
-  %nop14927 = alloca i1, i1 0
-  %nop14928 = alloca i1, i1 0
-  %nop14929 = alloca i1, i1 0
-  %nop14930 = alloca i1, i1 0
-  %nop14931 = alloca i1, i1 0
-  %nop14932 = alloca i1, i1 0
-  %nop14933 = alloca i1, i1 0
-  %nop14934 = alloca i1, i1 0
-  %nop14935 = alloca i1, i1 0
-  %nop14936 = alloca i1, i1 0
-  %nop14937 = alloca i1, i1 0
-  %nop14938 = alloca i1, i1 0
-  %nop14939 = alloca i1, i1 0
-  %nop14940 = alloca i1, i1 0
-  %nop14941 = alloca i1, i1 0
-  %nop14942 = alloca i1, i1 0
-  %nop14943 = alloca i1, i1 0
-  %nop14944 = alloca i1, i1 0
-  %nop14945 = alloca i1, i1 0
-  %nop14946 = alloca i1, i1 0
-  %nop14947 = alloca i1, i1 0
-  %nop14948 = alloca i1, i1 0
-  %nop14949 = alloca i1, i1 0
-  %nop14950 = alloca i1, i1 0
-  %nop14951 = alloca i1, i1 0
-  %nop14952 = alloca i1, i1 0
-  %nop14953 = alloca i1, i1 0
-  %nop14954 = alloca i1, i1 0
-  %nop14955 = alloca i1, i1 0
-  %nop14956 = alloca i1, i1 0
-  %nop14957 = alloca i1, i1 0
-  %nop14958 = alloca i1, i1 0
-  %nop14959 = alloca i1, i1 0
-  %nop14960 = alloca i1, i1 0
-  %nop14961 = alloca i1, i1 0
-  %nop14962 = alloca i1, i1 0
-  %nop14963 = alloca i1, i1 0
-  %nop14964 = alloca i1, i1 0
-  %nop14965 = alloca i1, i1 0
-  %nop14966 = alloca i1, i1 0
-  %nop14967 = alloca i1, i1 0
-  %nop14968 = alloca i1, i1 0
-  %nop14969 = alloca i1, i1 0
-  %nop14970 = alloca i1, i1 0
-  %nop14971 = alloca i1, i1 0
-  %nop14972 = alloca i1, i1 0
-  %nop14973 = alloca i1, i1 0
-  %nop14974 = alloca i1, i1 0
-  %nop14975 = alloca i1, i1 0
-  %nop14976 = alloca i1, i1 0
-  %nop14977 = alloca i1, i1 0
-  %nop14978 = alloca i1, i1 0
-  %nop14979 = alloca i1, i1 0
-  %nop14980 = alloca i1, i1 0
-  %nop14981 = alloca i1, i1 0
-  %nop14982 = alloca i1, i1 0
-  %nop14983 = alloca i1, i1 0
-  %nop14984 = alloca i1, i1 0
-  %nop14985 = alloca i1, i1 0
-  %nop14986 = alloca i1, i1 0
-  %nop14987 = alloca i1, i1 0
-  %nop14988 = alloca i1, i1 0
-  %nop14989 = alloca i1, i1 0
-  %nop14990 = alloca i1, i1 0
-  %nop14991 = alloca i1, i1 0
-  %nop14992 = alloca i1, i1 0
-  %nop14993 = alloca i1, i1 0
-  %nop14994 = alloca i1, i1 0
-  %nop14995 = alloca i1, i1 0
-  %nop14996 = alloca i1, i1 0
-  %nop14997 = alloca i1, i1 0
-  %nop14998 = alloca i1, i1 0
-  %nop14999 = alloca i1, i1 0
-  %nop15000 = alloca i1, i1 0
-  %nop15001 = alloca i1, i1 0
-  %nop15002 = alloca i1, i1 0
-  %nop15003 = alloca i1, i1 0
-  %nop15004 = alloca i1, i1 0
-  %nop15005 = alloca i1, i1 0
-  %nop15006 = alloca i1, i1 0
-  %nop15007 = alloca i1, i1 0
-  %nop15008 = alloca i1, i1 0
-  %nop15009 = alloca i1, i1 0
-  %nop15010 = alloca i1, i1 0
-  %nop15011 = alloca i1, i1 0
-  %nop15012 = alloca i1, i1 0
-  %nop15013 = alloca i1, i1 0
-  %nop15014 = alloca i1, i1 0
-  %nop15015 = alloca i1, i1 0
-  %nop15016 = alloca i1, i1 0
-  %nop15017 = alloca i1, i1 0
-  %nop15018 = alloca i1, i1 0
-  %nop15019 = alloca i1, i1 0
-  %nop15020 = alloca i1, i1 0
-  %nop15021 = alloca i1, i1 0
-  %nop15022 = alloca i1, i1 0
-  %nop15023 = alloca i1, i1 0
-  %nop15024 = alloca i1, i1 0
-  %nop15025 = alloca i1, i1 0
-  %nop15026 = alloca i1, i1 0
-  %nop15027 = alloca i1, i1 0
-  %nop15028 = alloca i1, i1 0
-  %nop15029 = alloca i1, i1 0
-  %nop15030 = alloca i1, i1 0
-  %nop15031 = alloca i1, i1 0
-  %nop15032 = alloca i1, i1 0
-  %nop15033 = alloca i1, i1 0
-  %nop15034 = alloca i1, i1 0
-  %nop15035 = alloca i1, i1 0
-  %nop15036 = alloca i1, i1 0
-  %nop15037 = alloca i1, i1 0
-  %nop15038 = alloca i1, i1 0
-  %nop15039 = alloca i1, i1 0
-  %nop15040 = alloca i1, i1 0
-  %nop15041 = alloca i1, i1 0
-  %nop15042 = alloca i1, i1 0
-  %nop15043 = alloca i1, i1 0
-  %nop15044 = alloca i1, i1 0
-  %nop15045 = alloca i1, i1 0
-  %nop15046 = alloca i1, i1 0
-  %nop15047 = alloca i1, i1 0
-  %nop15048 = alloca i1, i1 0
-  %nop15049 = alloca i1, i1 0
-  %nop15050 = alloca i1, i1 0
-  %nop15051 = alloca i1, i1 0
-  %nop15052 = alloca i1, i1 0
-  %nop15053 = alloca i1, i1 0
-  %nop15054 = alloca i1, i1 0
-  %nop15055 = alloca i1, i1 0
-  %nop15056 = alloca i1, i1 0
-  %nop15057 = alloca i1, i1 0
-  %nop15058 = alloca i1, i1 0
-  %nop15059 = alloca i1, i1 0
-  %nop15060 = alloca i1, i1 0
-  %nop15061 = alloca i1, i1 0
-  %nop15062 = alloca i1, i1 0
-  %nop15063 = alloca i1, i1 0
-  %nop15064 = alloca i1, i1 0
-  %nop15065 = alloca i1, i1 0
-  %nop15066 = alloca i1, i1 0
-  %nop15067 = alloca i1, i1 0
-  %nop15068 = alloca i1, i1 0
-  %nop15069 = alloca i1, i1 0
-  %nop15070 = alloca i1, i1 0
-  %nop15071 = alloca i1, i1 0
-  %nop15072 = alloca i1, i1 0
-  %nop15073 = alloca i1, i1 0
-  %nop15074 = alloca i1, i1 0
-  %nop15075 = alloca i1, i1 0
-  %nop15076 = alloca i1, i1 0
-  %nop15077 = alloca i1, i1 0
-  %nop15078 = alloca i1, i1 0
-  %nop15079 = alloca i1, i1 0
-  %nop15080 = alloca i1, i1 0
-  %nop15081 = alloca i1, i1 0
-  %nop15082 = alloca i1, i1 0
-  %nop15083 = alloca i1, i1 0
-  %nop15084 = alloca i1, i1 0
-  %nop15085 = alloca i1, i1 0
-  %nop15086 = alloca i1, i1 0
-  %nop15087 = alloca i1, i1 0
-  %nop15088 = alloca i1, i1 0
-  %nop15089 = alloca i1, i1 0
-  %nop15090 = alloca i1, i1 0
-  %nop15091 = alloca i1, i1 0
-  %nop15092 = alloca i1, i1 0
-  %nop15093 = alloca i1, i1 0
-  %nop15094 = alloca i1, i1 0
-  %nop15095 = alloca i1, i1 0
-  %nop15096 = alloca i1, i1 0
-  %nop15097 = alloca i1, i1 0
-  %nop15098 = alloca i1, i1 0
-  %nop15099 = alloca i1, i1 0
-  %nop15100 = alloca i1, i1 0
-  %nop15101 = alloca i1, i1 0
-  %nop15102 = alloca i1, i1 0
-  %nop15103 = alloca i1, i1 0
-  %nop15104 = alloca i1, i1 0
-  %nop15105 = alloca i1, i1 0
-  %nop15106 = alloca i1, i1 0
-  %nop15107 = alloca i1, i1 0
-  %nop15108 = alloca i1, i1 0
-  %nop15109 = alloca i1, i1 0
-  %nop15110 = alloca i1, i1 0
-  %nop15111 = alloca i1, i1 0
-  %nop15112 = alloca i1, i1 0
-  %nop15113 = alloca i1, i1 0
-  %nop15114 = alloca i1, i1 0
-  %nop15115 = alloca i1, i1 0
-  %nop15116 = alloca i1, i1 0
-  %nop15117 = alloca i1, i1 0
-  %nop15118 = alloca i1, i1 0
-  %nop15119 = alloca i1, i1 0
-  %nop15120 = alloca i1, i1 0
-  %nop15121 = alloca i1, i1 0
-  %nop15122 = alloca i1, i1 0
-  %nop15123 = alloca i1, i1 0
-  %nop15124 = alloca i1, i1 0
-  %nop15125 = alloca i1, i1 0
-  %nop15126 = alloca i1, i1 0
-  %nop15127 = alloca i1, i1 0
-  %nop15128 = alloca i1, i1 0
-  %nop15129 = alloca i1, i1 0
-  %nop15130 = alloca i1, i1 0
-  %nop15131 = alloca i1, i1 0
-  %nop15132 = alloca i1, i1 0
-  %nop15133 = alloca i1, i1 0
-  %nop15134 = alloca i1, i1 0
-  %nop15135 = alloca i1, i1 0
-  %nop15136 = alloca i1, i1 0
-  %nop15137 = alloca i1, i1 0
-  %nop15138 = alloca i1, i1 0
-  %nop15139 = alloca i1, i1 0
-  %nop15140 = alloca i1, i1 0
-  %nop15141 = alloca i1, i1 0
-  %nop15142 = alloca i1, i1 0
-  %nop15143 = alloca i1, i1 0
-  %nop15144 = alloca i1, i1 0
-  %nop15145 = alloca i1, i1 0
-  %nop15146 = alloca i1, i1 0
-  %nop15147 = alloca i1, i1 0
-  %nop15148 = alloca i1, i1 0
-  %nop15149 = alloca i1, i1 0
-  %nop15150 = alloca i1, i1 0
-  %nop15151 = alloca i1, i1 0
-  %nop15152 = alloca i1, i1 0
-  %nop15153 = alloca i1, i1 0
-  %nop15154 = alloca i1, i1 0
-  %nop15155 = alloca i1, i1 0
-  %nop15156 = alloca i1, i1 0
-  %nop15157 = alloca i1, i1 0
-  %nop15158 = alloca i1, i1 0
-  %nop15159 = alloca i1, i1 0
-  %nop15160 = alloca i1, i1 0
-  %nop15161 = alloca i1, i1 0
-  %nop15162 = alloca i1, i1 0
-  %nop15163 = alloca i1, i1 0
-  %nop15164 = alloca i1, i1 0
-  %nop15165 = alloca i1, i1 0
-  %nop15166 = alloca i1, i1 0
-  %nop15167 = alloca i1, i1 0
-  %nop15168 = alloca i1, i1 0
-  %nop15169 = alloca i1, i1 0
-  %nop15170 = alloca i1, i1 0
-  %nop15171 = alloca i1, i1 0
-  %nop15172 = alloca i1, i1 0
-  %nop15173 = alloca i1, i1 0
-  %nop15174 = alloca i1, i1 0
-  %nop15175 = alloca i1, i1 0
-  %nop15176 = alloca i1, i1 0
-  %nop15177 = alloca i1, i1 0
-  %nop15178 = alloca i1, i1 0
-  %nop15179 = alloca i1, i1 0
-  %nop15180 = alloca i1, i1 0
-  %nop15181 = alloca i1, i1 0
-  %nop15182 = alloca i1, i1 0
-  %nop15183 = alloca i1, i1 0
-  %nop15184 = alloca i1, i1 0
-  %nop15185 = alloca i1, i1 0
-  %nop15186 = alloca i1, i1 0
-  %nop15187 = alloca i1, i1 0
-  %nop15188 = alloca i1, i1 0
-  %nop15189 = alloca i1, i1 0
-  %nop15190 = alloca i1, i1 0
-  %nop15191 = alloca i1, i1 0
-  %nop15192 = alloca i1, i1 0
-  %nop15193 = alloca i1, i1 0
-  %nop15194 = alloca i1, i1 0
-  %nop15195 = alloca i1, i1 0
-  %nop15196 = alloca i1, i1 0
-  %nop15197 = alloca i1, i1 0
-  %nop15198 = alloca i1, i1 0
-  %nop15199 = alloca i1, i1 0
-  %nop15200 = alloca i1, i1 0
-  %nop15201 = alloca i1, i1 0
-  %nop15202 = alloca i1, i1 0
-  %nop15203 = alloca i1, i1 0
-  %nop15204 = alloca i1, i1 0
-  %nop15205 = alloca i1, i1 0
-  %nop15206 = alloca i1, i1 0
-  %nop15207 = alloca i1, i1 0
-  %nop15208 = alloca i1, i1 0
-  %nop15209 = alloca i1, i1 0
-  %nop15210 = alloca i1, i1 0
-  %nop15211 = alloca i1, i1 0
-  %nop15212 = alloca i1, i1 0
-  %nop15213 = alloca i1, i1 0
-  %nop15214 = alloca i1, i1 0
-  %nop15215 = alloca i1, i1 0
-  %nop15216 = alloca i1, i1 0
-  %nop15217 = alloca i1, i1 0
-  %nop15218 = alloca i1, i1 0
-  %nop15219 = alloca i1, i1 0
-  %nop15220 = alloca i1, i1 0
-  %nop15221 = alloca i1, i1 0
-  %nop15222 = alloca i1, i1 0
-  %nop15223 = alloca i1, i1 0
-  %nop15224 = alloca i1, i1 0
-  %nop15225 = alloca i1, i1 0
-  %nop15226 = alloca i1, i1 0
-  %nop15227 = alloca i1, i1 0
-  %nop15228 = alloca i1, i1 0
-  %nop15229 = alloca i1, i1 0
-  %nop15230 = alloca i1, i1 0
-  %nop15231 = alloca i1, i1 0
-  %nop15232 = alloca i1, i1 0
-  %nop15233 = alloca i1, i1 0
-  %nop15234 = alloca i1, i1 0
-  %nop15235 = alloca i1, i1 0
-  %nop15236 = alloca i1, i1 0
-  %nop15237 = alloca i1, i1 0
-  %nop15238 = alloca i1, i1 0
-  %nop15239 = alloca i1, i1 0
-  %nop15240 = alloca i1, i1 0
-  %nop15241 = alloca i1, i1 0
-  %nop15242 = alloca i1, i1 0
-  %nop15243 = alloca i1, i1 0
-  %nop15244 = alloca i1, i1 0
-  %nop15245 = alloca i1, i1 0
-  %nop15246 = alloca i1, i1 0
-  %nop15247 = alloca i1, i1 0
-  %nop15248 = alloca i1, i1 0
-  %nop15249 = alloca i1, i1 0
-  %nop15250 = alloca i1, i1 0
-  %nop15251 = alloca i1, i1 0
-  %nop15252 = alloca i1, i1 0
-  %nop15253 = alloca i1, i1 0
-  %nop15254 = alloca i1, i1 0
-  %nop15255 = alloca i1, i1 0
-  %nop15256 = alloca i1, i1 0
-  %nop15257 = alloca i1, i1 0
-  %nop15258 = alloca i1, i1 0
-  %nop15259 = alloca i1, i1 0
-  %nop15260 = alloca i1, i1 0
-  %nop15261 = alloca i1, i1 0
-  %nop15262 = alloca i1, i1 0
-  %nop15263 = alloca i1, i1 0
-  %nop15264 = alloca i1, i1 0
-  %nop15265 = alloca i1, i1 0
-  %nop15266 = alloca i1, i1 0
-  %nop15267 = alloca i1, i1 0
-  %nop15268 = alloca i1, i1 0
-  %nop15269 = alloca i1, i1 0
-  %nop15270 = alloca i1, i1 0
-  %nop15271 = alloca i1, i1 0
-  %nop15272 = alloca i1, i1 0
-  %nop15273 = alloca i1, i1 0
-  %nop15274 = alloca i1, i1 0
-  %nop15275 = alloca i1, i1 0
-  %nop15276 = alloca i1, i1 0
-  %nop15277 = alloca i1, i1 0
-  %nop15278 = alloca i1, i1 0
-  %nop15279 = alloca i1, i1 0
-  %nop15280 = alloca i1, i1 0
-  %nop15281 = alloca i1, i1 0
-  %nop15282 = alloca i1, i1 0
-  %nop15283 = alloca i1, i1 0
-  %nop15284 = alloca i1, i1 0
-  %nop15285 = alloca i1, i1 0
-  %nop15286 = alloca i1, i1 0
-  %nop15287 = alloca i1, i1 0
-  %nop15288 = alloca i1, i1 0
-  %nop15289 = alloca i1, i1 0
-  %nop15290 = alloca i1, i1 0
-  %nop15291 = alloca i1, i1 0
-  %nop15292 = alloca i1, i1 0
-  %nop15293 = alloca i1, i1 0
-  %nop15294 = alloca i1, i1 0
-  %nop15295 = alloca i1, i1 0
-  %nop15296 = alloca i1, i1 0
-  %nop15297 = alloca i1, i1 0
-  %nop15298 = alloca i1, i1 0
-  %nop15299 = alloca i1, i1 0
-  %nop15300 = alloca i1, i1 0
-  %nop15301 = alloca i1, i1 0
-  %nop15302 = alloca i1, i1 0
-  %nop15303 = alloca i1, i1 0
-  %nop15304 = alloca i1, i1 0
-  %nop15305 = alloca i1, i1 0
-  %nop15306 = alloca i1, i1 0
-  %nop15307 = alloca i1, i1 0
-  %nop15308 = alloca i1, i1 0
-  %nop15309 = alloca i1, i1 0
-  %nop15310 = alloca i1, i1 0
-  %nop15311 = alloca i1, i1 0
-  %nop15312 = alloca i1, i1 0
-  %nop15313 = alloca i1, i1 0
-  %nop15314 = alloca i1, i1 0
-  %nop15315 = alloca i1, i1 0
-  %nop15316 = alloca i1, i1 0
-  %nop15317 = alloca i1, i1 0
-  %nop15318 = alloca i1, i1 0
-  %nop15319 = alloca i1, i1 0
-  %nop15320 = alloca i1, i1 0
-  %nop15321 = alloca i1, i1 0
-  %nop15322 = alloca i1, i1 0
-  %nop15323 = alloca i1, i1 0
-  %nop15324 = alloca i1, i1 0
-  %nop15325 = alloca i1, i1 0
-  %nop15326 = alloca i1, i1 0
-  %nop15327 = alloca i1, i1 0
-  %nop15328 = alloca i1, i1 0
-  %nop15329 = alloca i1, i1 0
-  %nop15330 = alloca i1, i1 0
-  %nop15331 = alloca i1, i1 0
-  %nop15332 = alloca i1, i1 0
-  %nop15333 = alloca i1, i1 0
-  %nop15334 = alloca i1, i1 0
-  %nop15335 = alloca i1, i1 0
-  %nop15336 = alloca i1, i1 0
-  %nop15337 = alloca i1, i1 0
-  %nop15338 = alloca i1, i1 0
-  %nop15339 = alloca i1, i1 0
-  %nop15340 = alloca i1, i1 0
-  %nop15341 = alloca i1, i1 0
-  %nop15342 = alloca i1, i1 0
-  %nop15343 = alloca i1, i1 0
-  %nop15344 = alloca i1, i1 0
-  %nop15345 = alloca i1, i1 0
-  %nop15346 = alloca i1, i1 0
-  %nop15347 = alloca i1, i1 0
-  %nop15348 = alloca i1, i1 0
-  %nop15349 = alloca i1, i1 0
-  %nop15350 = alloca i1, i1 0
-  %nop15351 = alloca i1, i1 0
-  %nop15352 = alloca i1, i1 0
-  %nop15353 = alloca i1, i1 0
-  %nop15354 = alloca i1, i1 0
-  %nop15355 = alloca i1, i1 0
-  %nop15356 = alloca i1, i1 0
-  %nop15357 = alloca i1, i1 0
-  %nop15358 = alloca i1, i1 0
-  %nop15359 = alloca i1, i1 0
-  %nop15360 = alloca i1, i1 0
-  %nop15361 = alloca i1, i1 0
-  %nop15362 = alloca i1, i1 0
-  %nop15363 = alloca i1, i1 0
-  %nop15364 = alloca i1, i1 0
-  %nop15365 = alloca i1, i1 0
-  %nop15366 = alloca i1, i1 0
-  %nop15367 = alloca i1, i1 0
-  %nop15368 = alloca i1, i1 0
-  %nop15369 = alloca i1, i1 0
-  %nop15370 = alloca i1, i1 0
-  %nop15371 = alloca i1, i1 0
-  %nop15372 = alloca i1, i1 0
-  %nop15373 = alloca i1, i1 0
-  %nop15374 = alloca i1, i1 0
-  %nop15375 = alloca i1, i1 0
-  %nop15376 = alloca i1, i1 0
-  %nop15377 = alloca i1, i1 0
-  %nop15378 = alloca i1, i1 0
-  %nop15379 = alloca i1, i1 0
-  %nop15380 = alloca i1, i1 0
-  %nop15381 = alloca i1, i1 0
-  %nop15382 = alloca i1, i1 0
-  %nop15383 = alloca i1, i1 0
-  %nop15384 = alloca i1, i1 0
-  %nop15385 = alloca i1, i1 0
-  %nop15386 = alloca i1, i1 0
-  %nop15387 = alloca i1, i1 0
-  %nop15388 = alloca i1, i1 0
-  %nop15389 = alloca i1, i1 0
-  %nop15390 = alloca i1, i1 0
-  %nop15391 = alloca i1, i1 0
-  %nop15392 = alloca i1, i1 0
-  %nop15393 = alloca i1, i1 0
-  %nop15394 = alloca i1, i1 0
-  %nop15395 = alloca i1, i1 0
-  %nop15396 = alloca i1, i1 0
-  %nop15397 = alloca i1, i1 0
-  %nop15398 = alloca i1, i1 0
-  %nop15399 = alloca i1, i1 0
-  %nop15400 = alloca i1, i1 0
-  %nop15401 = alloca i1, i1 0
-  %nop15402 = alloca i1, i1 0
-  %nop15403 = alloca i1, i1 0
-  %nop15404 = alloca i1, i1 0
-  %nop15405 = alloca i1, i1 0
-  %nop15406 = alloca i1, i1 0
-  %nop15407 = alloca i1, i1 0
-  %nop15408 = alloca i1, i1 0
-  %nop15409 = alloca i1, i1 0
-  %nop15410 = alloca i1, i1 0
-  %nop15411 = alloca i1, i1 0
-  %nop15412 = alloca i1, i1 0
-  %nop15413 = alloca i1, i1 0
-  %nop15414 = alloca i1, i1 0
-  %nop15415 = alloca i1, i1 0
-  %nop15416 = alloca i1, i1 0
-  %nop15417 = alloca i1, i1 0
-  %nop15418 = alloca i1, i1 0
-  %nop15419 = alloca i1, i1 0
-  %nop15420 = alloca i1, i1 0
-  %nop15421 = alloca i1, i1 0
-  %nop15422 = alloca i1, i1 0
-  %nop15423 = alloca i1, i1 0
-  %nop15424 = alloca i1, i1 0
-  %nop15425 = alloca i1, i1 0
-  %nop15426 = alloca i1, i1 0
-  %nop15427 = alloca i1, i1 0
-  %nop15428 = alloca i1, i1 0
-  %nop15429 = alloca i1, i1 0
-  %nop15430 = alloca i1, i1 0
-  %nop15431 = alloca i1, i1 0
-  %nop15432 = alloca i1, i1 0
-  %nop15433 = alloca i1, i1 0
-  %nop15434 = alloca i1, i1 0
-  %nop15435 = alloca i1, i1 0
-  %nop15436 = alloca i1, i1 0
-  %nop15437 = alloca i1, i1 0
-  %nop15438 = alloca i1, i1 0
-  %nop15439 = alloca i1, i1 0
-  %nop15440 = alloca i1, i1 0
-  %nop15441 = alloca i1, i1 0
-  %nop15442 = alloca i1, i1 0
-  %nop15443 = alloca i1, i1 0
-  %nop15444 = alloca i1, i1 0
-  %nop15445 = alloca i1, i1 0
-  %nop15446 = alloca i1, i1 0
-  %nop15447 = alloca i1, i1 0
-  %nop15448 = alloca i1, i1 0
-  %nop15449 = alloca i1, i1 0
-  %nop15450 = alloca i1, i1 0
-  %nop15451 = alloca i1, i1 0
-  %nop15452 = alloca i1, i1 0
-  %nop15453 = alloca i1, i1 0
-  %nop15454 = alloca i1, i1 0
-  %nop15455 = alloca i1, i1 0
-  %nop15456 = alloca i1, i1 0
-  %nop15457 = alloca i1, i1 0
-  %nop15458 = alloca i1, i1 0
-  %nop15459 = alloca i1, i1 0
-  %nop15460 = alloca i1, i1 0
-  %nop15461 = alloca i1, i1 0
-  %nop15462 = alloca i1, i1 0
-  %nop15463 = alloca i1, i1 0
-  %nop15464 = alloca i1, i1 0
-  %nop15465 = alloca i1, i1 0
-  %nop15466 = alloca i1, i1 0
-  %nop15467 = alloca i1, i1 0
-  %nop15468 = alloca i1, i1 0
-  %nop15469 = alloca i1, i1 0
-  %nop15470 = alloca i1, i1 0
-  %nop15471 = alloca i1, i1 0
-  %nop15472 = alloca i1, i1 0
-  %nop15473 = alloca i1, i1 0
-  %nop15474 = alloca i1, i1 0
-  %nop15475 = alloca i1, i1 0
-  %nop15476 = alloca i1, i1 0
-  %nop15477 = alloca i1, i1 0
-  %nop15478 = alloca i1, i1 0
-  %nop15479 = alloca i1, i1 0
-  %nop15480 = alloca i1, i1 0
-  %nop15481 = alloca i1, i1 0
-  %nop15482 = alloca i1, i1 0
-  %nop15483 = alloca i1, i1 0
-  %nop15484 = alloca i1, i1 0
-  %nop15485 = alloca i1, i1 0
-  %nop15486 = alloca i1, i1 0
-  %nop15487 = alloca i1, i1 0
-  %nop15488 = alloca i1, i1 0
-  %nop15489 = alloca i1, i1 0
-  %nop15490 = alloca i1, i1 0
-  %nop15491 = alloca i1, i1 0
-  %nop15492 = alloca i1, i1 0
-  %nop15493 = alloca i1, i1 0
-  %nop15494 = alloca i1, i1 0
-  %nop15495 = alloca i1, i1 0
-  %nop15496 = alloca i1, i1 0
-  %nop15497 = alloca i1, i1 0
-  %nop15498 = alloca i1, i1 0
-  %nop15499 = alloca i1, i1 0
-  %nop15500 = alloca i1, i1 0
-  %nop15501 = alloca i1, i1 0
-  %nop15502 = alloca i1, i1 0
-  %nop15503 = alloca i1, i1 0
-  %nop15504 = alloca i1, i1 0
-  %nop15505 = alloca i1, i1 0
-  %nop15506 = alloca i1, i1 0
-  %nop15507 = alloca i1, i1 0
-  %nop15508 = alloca i1, i1 0
-  %nop15509 = alloca i1, i1 0
-  %nop15510 = alloca i1, i1 0
-  %nop15511 = alloca i1, i1 0
-  %nop15512 = alloca i1, i1 0
-  %nop15513 = alloca i1, i1 0
-  %nop15514 = alloca i1, i1 0
-  %nop15515 = alloca i1, i1 0
-  %nop15516 = alloca i1, i1 0
-  %nop15517 = alloca i1, i1 0
-  %nop15518 = alloca i1, i1 0
-  %nop15519 = alloca i1, i1 0
-  %nop15520 = alloca i1, i1 0
-  %nop15521 = alloca i1, i1 0
-  %nop15522 = alloca i1, i1 0
-  %nop15523 = alloca i1, i1 0
-  %nop15524 = alloca i1, i1 0
-  %nop15525 = alloca i1, i1 0
-  %nop15526 = alloca i1, i1 0
-  %nop15527 = alloca i1, i1 0
-  %nop15528 = alloca i1, i1 0
-  %nop15529 = alloca i1, i1 0
-  %nop15530 = alloca i1, i1 0
-  %nop15531 = alloca i1, i1 0
-  %nop15532 = alloca i1, i1 0
-  %nop15533 = alloca i1, i1 0
-  %nop15534 = alloca i1, i1 0
-  %nop15535 = alloca i1, i1 0
-  %nop15536 = alloca i1, i1 0
-  %nop15537 = alloca i1, i1 0
-  %nop15538 = alloca i1, i1 0
-  %nop15539 = alloca i1, i1 0
-  %nop15540 = alloca i1, i1 0
-  %nop15541 = alloca i1, i1 0
-  %nop15542 = alloca i1, i1 0
-  %nop15543 = alloca i1, i1 0
-  %nop15544 = alloca i1, i1 0
-  %nop15545 = alloca i1, i1 0
-  %nop15546 = alloca i1, i1 0
-  %nop15547 = alloca i1, i1 0
-  %nop15548 = alloca i1, i1 0
-  %nop15549 = alloca i1, i1 0
-  %nop15550 = alloca i1, i1 0
-  %nop15551 = alloca i1, i1 0
-  %nop15552 = alloca i1, i1 0
-  %nop15553 = alloca i1, i1 0
-  %nop15554 = alloca i1, i1 0
-  %nop15555 = alloca i1, i1 0
-  %nop15556 = alloca i1, i1 0
-  %nop15557 = alloca i1, i1 0
-  %nop15558 = alloca i1, i1 0
-  %nop15559 = alloca i1, i1 0
-  %nop15560 = alloca i1, i1 0
-  %nop15561 = alloca i1, i1 0
-  %nop15562 = alloca i1, i1 0
-  %nop15563 = alloca i1, i1 0
-  %nop15564 = alloca i1, i1 0
-  %nop15565 = alloca i1, i1 0
-  %nop15566 = alloca i1, i1 0
-  %nop15567 = alloca i1, i1 0
-  %nop15568 = alloca i1, i1 0
-  %nop15569 = alloca i1, i1 0
-  %nop15570 = alloca i1, i1 0
-  %nop15571 = alloca i1, i1 0
-  %nop15572 = alloca i1, i1 0
-  %nop15573 = alloca i1, i1 0
-  %nop15574 = alloca i1, i1 0
-  %nop15575 = alloca i1, i1 0
-  %nop15576 = alloca i1, i1 0
-  %nop15577 = alloca i1, i1 0
-  %nop15578 = alloca i1, i1 0
-  %nop15579 = alloca i1, i1 0
-  %nop15580 = alloca i1, i1 0
-  %nop15581 = alloca i1, i1 0
-  %nop15582 = alloca i1, i1 0
-  %nop15583 = alloca i1, i1 0
-  %nop15584 = alloca i1, i1 0
-  %nop15585 = alloca i1, i1 0
-  %nop15586 = alloca i1, i1 0
-  %nop15587 = alloca i1, i1 0
-  %nop15588 = alloca i1, i1 0
-  %nop15589 = alloca i1, i1 0
-  %nop15590 = alloca i1, i1 0
-  %nop15591 = alloca i1, i1 0
-  %nop15592 = alloca i1, i1 0
-  %nop15593 = alloca i1, i1 0
-  %nop15594 = alloca i1, i1 0
-  %nop15595 = alloca i1, i1 0
-  %nop15596 = alloca i1, i1 0
-  %nop15597 = alloca i1, i1 0
-  %nop15598 = alloca i1, i1 0
-  %nop15599 = alloca i1, i1 0
-  %nop15600 = alloca i1, i1 0
-  %nop15601 = alloca i1, i1 0
-  %nop15602 = alloca i1, i1 0
-  %nop15603 = alloca i1, i1 0
-  %nop15604 = alloca i1, i1 0
-  %nop15605 = alloca i1, i1 0
-  %nop15606 = alloca i1, i1 0
-  %nop15607 = alloca i1, i1 0
-  %nop15608 = alloca i1, i1 0
-  %nop15609 = alloca i1, i1 0
-  %nop15610 = alloca i1, i1 0
-  %nop15611 = alloca i1, i1 0
-  %nop15612 = alloca i1, i1 0
-  %nop15613 = alloca i1, i1 0
-  %nop15614 = alloca i1, i1 0
-  %nop15615 = alloca i1, i1 0
-  %nop15616 = alloca i1, i1 0
-  %nop15617 = alloca i1, i1 0
-  %nop15618 = alloca i1, i1 0
-  %nop15619 = alloca i1, i1 0
-  %nop15620 = alloca i1, i1 0
-  %nop15621 = alloca i1, i1 0
-  %nop15622 = alloca i1, i1 0
-  %nop15623 = alloca i1, i1 0
-  %nop15624 = alloca i1, i1 0
-  %nop15625 = alloca i1, i1 0
-  %nop15626 = alloca i1, i1 0
-  %nop15627 = alloca i1, i1 0
-  %nop15628 = alloca i1, i1 0
-  %nop15629 = alloca i1, i1 0
-  %nop15630 = alloca i1, i1 0
-  %nop15631 = alloca i1, i1 0
-  %nop15632 = alloca i1, i1 0
-  %nop15633 = alloca i1, i1 0
-  %nop15634 = alloca i1, i1 0
-  %nop15635 = alloca i1, i1 0
-  %nop15636 = alloca i1, i1 0
-  %nop15637 = alloca i1, i1 0
-  %nop15638 = alloca i1, i1 0
-  %nop15639 = alloca i1, i1 0
-  %nop15640 = alloca i1, i1 0
-  %nop15641 = alloca i1, i1 0
-  %nop15642 = alloca i1, i1 0
-  %nop15643 = alloca i1, i1 0
-  %nop15644 = alloca i1, i1 0
-  %nop15645 = alloca i1, i1 0
-  %nop15646 = alloca i1, i1 0
-  %nop15647 = alloca i1, i1 0
-  %nop15648 = alloca i1, i1 0
-  %nop15649 = alloca i1, i1 0
-  %nop15650 = alloca i1, i1 0
-  %nop15651 = alloca i1, i1 0
-  %nop15652 = alloca i1, i1 0
-  %nop15653 = alloca i1, i1 0
-  %nop15654 = alloca i1, i1 0
-  %nop15655 = alloca i1, i1 0
-  %nop15656 = alloca i1, i1 0
-  %nop15657 = alloca i1, i1 0
-  %nop15658 = alloca i1, i1 0
-  %nop15659 = alloca i1, i1 0
-  %nop15660 = alloca i1, i1 0
-  %nop15661 = alloca i1, i1 0
-  %nop15662 = alloca i1, i1 0
-  %nop15663 = alloca i1, i1 0
-  %nop15664 = alloca i1, i1 0
-  %nop15665 = alloca i1, i1 0
-  %nop15666 = alloca i1, i1 0
-  %nop15667 = alloca i1, i1 0
-  %nop15668 = alloca i1, i1 0
-  %nop15669 = alloca i1, i1 0
-  %nop15670 = alloca i1, i1 0
-  %nop15671 = alloca i1, i1 0
-  %nop15672 = alloca i1, i1 0
-  %nop15673 = alloca i1, i1 0
-  %nop15674 = alloca i1, i1 0
-  %nop15675 = alloca i1, i1 0
-  %nop15676 = alloca i1, i1 0
-  %nop15677 = alloca i1, i1 0
-  %nop15678 = alloca i1, i1 0
-  %nop15679 = alloca i1, i1 0
-  %nop15680 = alloca i1, i1 0
-  %nop15681 = alloca i1, i1 0
-  %nop15682 = alloca i1, i1 0
-  %nop15683 = alloca i1, i1 0
-  %nop15684 = alloca i1, i1 0
-  %nop15685 = alloca i1, i1 0
-  %nop15686 = alloca i1, i1 0
-  %nop15687 = alloca i1, i1 0
-  %nop15688 = alloca i1, i1 0
-  %nop15689 = alloca i1, i1 0
-  %nop15690 = alloca i1, i1 0
-  %nop15691 = alloca i1, i1 0
-  %nop15692 = alloca i1, i1 0
-  %nop15693 = alloca i1, i1 0
-  %nop15694 = alloca i1, i1 0
-  %nop15695 = alloca i1, i1 0
-  %nop15696 = alloca i1, i1 0
-  %nop15697 = alloca i1, i1 0
-  %nop15698 = alloca i1, i1 0
-  %nop15699 = alloca i1, i1 0
-  %nop15700 = alloca i1, i1 0
-  %nop15701 = alloca i1, i1 0
-  %nop15702 = alloca i1, i1 0
-  %nop15703 = alloca i1, i1 0
-  %nop15704 = alloca i1, i1 0
-  %nop15705 = alloca i1, i1 0
-  %nop15706 = alloca i1, i1 0
-  %nop15707 = alloca i1, i1 0
-  %nop15708 = alloca i1, i1 0
-  %nop15709 = alloca i1, i1 0
-  %nop15710 = alloca i1, i1 0
-  %nop15711 = alloca i1, i1 0
-  %nop15712 = alloca i1, i1 0
-  %nop15713 = alloca i1, i1 0
-  %nop15714 = alloca i1, i1 0
-  %nop15715 = alloca i1, i1 0
-  %nop15716 = alloca i1, i1 0
-  %nop15717 = alloca i1, i1 0
-  %nop15718 = alloca i1, i1 0
-  %nop15719 = alloca i1, i1 0
-  %nop15720 = alloca i1, i1 0
-  %nop15721 = alloca i1, i1 0
-  %nop15722 = alloca i1, i1 0
-  %nop15723 = alloca i1, i1 0
-  %nop15724 = alloca i1, i1 0
-  %nop15725 = alloca i1, i1 0
-  %nop15726 = alloca i1, i1 0
-  %nop15727 = alloca i1, i1 0
-  %nop15728 = alloca i1, i1 0
-  %nop15729 = alloca i1, i1 0
-  %nop15730 = alloca i1, i1 0
-  %nop15731 = alloca i1, i1 0
-  %nop15732 = alloca i1, i1 0
-  %nop15733 = alloca i1, i1 0
-  %nop15734 = alloca i1, i1 0
-  %nop15735 = alloca i1, i1 0
-  %nop15736 = alloca i1, i1 0
-  %nop15737 = alloca i1, i1 0
-  %nop15738 = alloca i1, i1 0
-  %nop15739 = alloca i1, i1 0
-  %nop15740 = alloca i1, i1 0
-  %nop15741 = alloca i1, i1 0
-  %nop15742 = alloca i1, i1 0
-  %nop15743 = alloca i1, i1 0
-  %nop15744 = alloca i1, i1 0
-  %nop15745 = alloca i1, i1 0
-  %nop15746 = alloca i1, i1 0
-  %nop15747 = alloca i1, i1 0
-  %nop15748 = alloca i1, i1 0
-  %nop15749 = alloca i1, i1 0
-  %nop15750 = alloca i1, i1 0
-  %nop15751 = alloca i1, i1 0
-  %nop15752 = alloca i1, i1 0
-  %nop15753 = alloca i1, i1 0
-  %nop15754 = alloca i1, i1 0
-  %nop15755 = alloca i1, i1 0
-  %nop15756 = alloca i1, i1 0
-  %nop15757 = alloca i1, i1 0
-  %nop15758 = alloca i1, i1 0
-  %nop15759 = alloca i1, i1 0
-  %nop15760 = alloca i1, i1 0
-  %nop15761 = alloca i1, i1 0
-  %nop15762 = alloca i1, i1 0
-  %nop15763 = alloca i1, i1 0
-  %nop15764 = alloca i1, i1 0
-  %nop15765 = alloca i1, i1 0
-  %nop15766 = alloca i1, i1 0
-  %nop15767 = alloca i1, i1 0
-  %nop15768 = alloca i1, i1 0
-  %nop15769 = alloca i1, i1 0
-  %nop15770 = alloca i1, i1 0
-  %nop15771 = alloca i1, i1 0
-  %nop15772 = alloca i1, i1 0
-  %nop15773 = alloca i1, i1 0
-  %nop15774 = alloca i1, i1 0
-  %nop15775 = alloca i1, i1 0
-  %nop15776 = alloca i1, i1 0
-  %nop15777 = alloca i1, i1 0
-  %nop15778 = alloca i1, i1 0
-  %nop15779 = alloca i1, i1 0
-  %nop15780 = alloca i1, i1 0
-  %nop15781 = alloca i1, i1 0
-  %nop15782 = alloca i1, i1 0
-  %nop15783 = alloca i1, i1 0
-  %nop15784 = alloca i1, i1 0
-  %nop15785 = alloca i1, i1 0
-  %nop15786 = alloca i1, i1 0
-  %nop15787 = alloca i1, i1 0
-  %nop15788 = alloca i1, i1 0
-  %nop15789 = alloca i1, i1 0
-  %nop15790 = alloca i1, i1 0
-  %nop15791 = alloca i1, i1 0
-  %nop15792 = alloca i1, i1 0
-  %nop15793 = alloca i1, i1 0
-  %nop15794 = alloca i1, i1 0
-  %nop15795 = alloca i1, i1 0
-  %nop15796 = alloca i1, i1 0
-  %nop15797 = alloca i1, i1 0
-  %nop15798 = alloca i1, i1 0
-  %nop15799 = alloca i1, i1 0
-  %nop15800 = alloca i1, i1 0
-  %nop15801 = alloca i1, i1 0
-  %nop15802 = alloca i1, i1 0
-  %nop15803 = alloca i1, i1 0
-  %nop15804 = alloca i1, i1 0
-  %nop15805 = alloca i1, i1 0
-  %nop15806 = alloca i1, i1 0
-  %nop15807 = alloca i1, i1 0
-  %nop15808 = alloca i1, i1 0
-  %nop15809 = alloca i1, i1 0
-  %nop15810 = alloca i1, i1 0
-  %nop15811 = alloca i1, i1 0
-  %nop15812 = alloca i1, i1 0
-  %nop15813 = alloca i1, i1 0
-  %nop15814 = alloca i1, i1 0
-  %nop15815 = alloca i1, i1 0
-  %nop15816 = alloca i1, i1 0
-  %nop15817 = alloca i1, i1 0
-  %nop15818 = alloca i1, i1 0
-  %nop15819 = alloca i1, i1 0
-  %nop15820 = alloca i1, i1 0
-  %nop15821 = alloca i1, i1 0
-  %nop15822 = alloca i1, i1 0
-  %nop15823 = alloca i1, i1 0
-  %nop15824 = alloca i1, i1 0
-  %nop15825 = alloca i1, i1 0
-  %nop15826 = alloca i1, i1 0
-  %nop15827 = alloca i1, i1 0
-  %nop15828 = alloca i1, i1 0
-  %nop15829 = alloca i1, i1 0
-  %nop15830 = alloca i1, i1 0
-  %nop15831 = alloca i1, i1 0
-  %nop15832 = alloca i1, i1 0
-  %nop15833 = alloca i1, i1 0
-  %nop15834 = alloca i1, i1 0
-  %nop15835 = alloca i1, i1 0
-  %nop15836 = alloca i1, i1 0
-  %nop15837 = alloca i1, i1 0
-  %nop15838 = alloca i1, i1 0
-  %nop15839 = alloca i1, i1 0
-  %nop15840 = alloca i1, i1 0
-  %nop15841 = alloca i1, i1 0
-  %nop15842 = alloca i1, i1 0
-  %nop15843 = alloca i1, i1 0
-  %nop15844 = alloca i1, i1 0
-  %nop15845 = alloca i1, i1 0
-  %nop15846 = alloca i1, i1 0
-  %nop15847 = alloca i1, i1 0
-  %nop15848 = alloca i1, i1 0
-  %nop15849 = alloca i1, i1 0
-  %nop15850 = alloca i1, i1 0
-  %nop15851 = alloca i1, i1 0
-  %nop15852 = alloca i1, i1 0
-  %nop15853 = alloca i1, i1 0
-  %nop15854 = alloca i1, i1 0
-  %nop15855 = alloca i1, i1 0
-  %nop15856 = alloca i1, i1 0
-  %nop15857 = alloca i1, i1 0
-  %nop15858 = alloca i1, i1 0
-  %nop15859 = alloca i1, i1 0
-  %nop15860 = alloca i1, i1 0
-  %nop15861 = alloca i1, i1 0
-  %nop15862 = alloca i1, i1 0
-  %nop15863 = alloca i1, i1 0
-  %nop15864 = alloca i1, i1 0
-  %nop15865 = alloca i1, i1 0
-  %nop15866 = alloca i1, i1 0
-  %nop15867 = alloca i1, i1 0
-  %nop15868 = alloca i1, i1 0
-  %nop15869 = alloca i1, i1 0
-  %nop15870 = alloca i1, i1 0
-  %nop15871 = alloca i1, i1 0
-  %nop15872 = alloca i1, i1 0
-  %nop15873 = alloca i1, i1 0
-  %nop15874 = alloca i1, i1 0
-  %nop15875 = alloca i1, i1 0
-  %nop15876 = alloca i1, i1 0
-  %nop15877 = alloca i1, i1 0
-  %nop15878 = alloca i1, i1 0
-  %nop15879 = alloca i1, i1 0
-  %nop15880 = alloca i1, i1 0
-  %nop15881 = alloca i1, i1 0
-  %nop15882 = alloca i1, i1 0
-  %nop15883 = alloca i1, i1 0
-  %nop15884 = alloca i1, i1 0
-  %nop15885 = alloca i1, i1 0
-  %nop15886 = alloca i1, i1 0
-  %nop15887 = alloca i1, i1 0
-  %nop15888 = alloca i1, i1 0
-  %nop15889 = alloca i1, i1 0
-  %nop15890 = alloca i1, i1 0
-  %nop15891 = alloca i1, i1 0
-  %nop15892 = alloca i1, i1 0
-  %nop15893 = alloca i1, i1 0
-  %nop15894 = alloca i1, i1 0
-  %nop15895 = alloca i1, i1 0
-  %nop15896 = alloca i1, i1 0
-  %nop15897 = alloca i1, i1 0
-  %nop15898 = alloca i1, i1 0
-  %nop15899 = alloca i1, i1 0
-  %nop15900 = alloca i1, i1 0
-  %nop15901 = alloca i1, i1 0
-  %nop15902 = alloca i1, i1 0
-  %nop15903 = alloca i1, i1 0
-  %nop15904 = alloca i1, i1 0
-  %nop15905 = alloca i1, i1 0
-  %nop15906 = alloca i1, i1 0
-  %nop15907 = alloca i1, i1 0
-  %nop15908 = alloca i1, i1 0
-  %nop15909 = alloca i1, i1 0
-  %nop15910 = alloca i1, i1 0
-  %nop15911 = alloca i1, i1 0
-  %nop15912 = alloca i1, i1 0
-  %nop15913 = alloca i1, i1 0
-  %nop15914 = alloca i1, i1 0
-  %nop15915 = alloca i1, i1 0
-  %nop15916 = alloca i1, i1 0
-  %nop15917 = alloca i1, i1 0
-  %nop15918 = alloca i1, i1 0
-  %nop15919 = alloca i1, i1 0
-  %nop15920 = alloca i1, i1 0
-  %nop15921 = alloca i1, i1 0
-  %nop15922 = alloca i1, i1 0
-  %nop15923 = alloca i1, i1 0
-  %nop15924 = alloca i1, i1 0
-  %nop15925 = alloca i1, i1 0
-  %nop15926 = alloca i1, i1 0
-  %nop15927 = alloca i1, i1 0
-  %nop15928 = alloca i1, i1 0
-  %nop15929 = alloca i1, i1 0
-  %nop15930 = alloca i1, i1 0
-  %nop15931 = alloca i1, i1 0
-  %nop15932 = alloca i1, i1 0
-  %nop15933 = alloca i1, i1 0
-  %nop15934 = alloca i1, i1 0
-  %nop15935 = alloca i1, i1 0
-  %nop15936 = alloca i1, i1 0
-  %nop15937 = alloca i1, i1 0
-  %nop15938 = alloca i1, i1 0
-  %nop15939 = alloca i1, i1 0
-  %nop15940 = alloca i1, i1 0
-  %nop15941 = alloca i1, i1 0
-  %nop15942 = alloca i1, i1 0
-  %nop15943 = alloca i1, i1 0
-  %nop15944 = alloca i1, i1 0
-  %nop15945 = alloca i1, i1 0
-  %nop15946 = alloca i1, i1 0
-  %nop15947 = alloca i1, i1 0
-  %nop15948 = alloca i1, i1 0
-  %nop15949 = alloca i1, i1 0
-  %nop15950 = alloca i1, i1 0
-  %nop15951 = alloca i1, i1 0
-  %nop15952 = alloca i1, i1 0
-  %nop15953 = alloca i1, i1 0
-  %nop15954 = alloca i1, i1 0
-  %nop15955 = alloca i1, i1 0
-  %nop15956 = alloca i1, i1 0
-  %nop15957 = alloca i1, i1 0
-  %nop15958 = alloca i1, i1 0
-  %nop15959 = alloca i1, i1 0
-  %nop15960 = alloca i1, i1 0
-  %nop15961 = alloca i1, i1 0
-  %nop15962 = alloca i1, i1 0
-  %nop15963 = alloca i1, i1 0
-  %nop15964 = alloca i1, i1 0
-  %nop15965 = alloca i1, i1 0
-  %nop15966 = alloca i1, i1 0
-  %nop15967 = alloca i1, i1 0
-  %nop15968 = alloca i1, i1 0
-  %nop15969 = alloca i1, i1 0
-  %nop15970 = alloca i1, i1 0
-  %nop15971 = alloca i1, i1 0
-  %nop15972 = alloca i1, i1 0
-  %nop15973 = alloca i1, i1 0
-  %nop15974 = alloca i1, i1 0
-  %nop15975 = alloca i1, i1 0
-  %nop15976 = alloca i1, i1 0
-  %nop15977 = alloca i1, i1 0
-  %nop15978 = alloca i1, i1 0
-  %nop15979 = alloca i1, i1 0
-  %nop15980 = alloca i1, i1 0
-  %nop15981 = alloca i1, i1 0
-  %nop15982 = alloca i1, i1 0
-  %nop15983 = alloca i1, i1 0
-  %nop15984 = alloca i1, i1 0
-  %nop15985 = alloca i1, i1 0
-  %nop15986 = alloca i1, i1 0
-  %nop15987 = alloca i1, i1 0
-  %nop15988 = alloca i1, i1 0
-  %nop15989 = alloca i1, i1 0
-  %nop15990 = alloca i1, i1 0
-  %nop15991 = alloca i1, i1 0
-  %nop15992 = alloca i1, i1 0
-  %nop15993 = alloca i1, i1 0
-  %nop15994 = alloca i1, i1 0
-  %nop15995 = alloca i1, i1 0
-  %nop15996 = alloca i1, i1 0
-  %nop15997 = alloca i1, i1 0
-  %nop15998 = alloca i1, i1 0
-  %nop15999 = alloca i1, i1 0
-  %nop16000 = alloca i1, i1 0
-  %nop16001 = alloca i1, i1 0
-  %nop16002 = alloca i1, i1 0
-  %nop16003 = alloca i1, i1 0
-  %nop16004 = alloca i1, i1 0
-  %nop16005 = alloca i1, i1 0
-  %nop16006 = alloca i1, i1 0
-  %nop16007 = alloca i1, i1 0
-  %nop16008 = alloca i1, i1 0
-  %nop16009 = alloca i1, i1 0
-  %nop16010 = alloca i1, i1 0
-  %nop16011 = alloca i1, i1 0
-  %nop16012 = alloca i1, i1 0
-  %nop16013 = alloca i1, i1 0
-  %nop16014 = alloca i1, i1 0
-  %nop16015 = alloca i1, i1 0
-  %nop16016 = alloca i1, i1 0
-  %nop16017 = alloca i1, i1 0
-  %nop16018 = alloca i1, i1 0
-  %nop16019 = alloca i1, i1 0
-  %nop16020 = alloca i1, i1 0
-  %nop16021 = alloca i1, i1 0
-  %nop16022 = alloca i1, i1 0
-  %nop16023 = alloca i1, i1 0
-  %nop16024 = alloca i1, i1 0
-  %nop16025 = alloca i1, i1 0
-  %nop16026 = alloca i1, i1 0
-  %nop16027 = alloca i1, i1 0
-  %nop16028 = alloca i1, i1 0
-  %nop16029 = alloca i1, i1 0
-  %nop16030 = alloca i1, i1 0
-  %nop16031 = alloca i1, i1 0
-  %nop16032 = alloca i1, i1 0
-  %nop16033 = alloca i1, i1 0
-  %nop16034 = alloca i1, i1 0
-  %nop16035 = alloca i1, i1 0
-  %nop16036 = alloca i1, i1 0
-  %nop16037 = alloca i1, i1 0
-  %nop16038 = alloca i1, i1 0
-  %nop16039 = alloca i1, i1 0
-  %nop16040 = alloca i1, i1 0
-  %nop16041 = alloca i1, i1 0
-  %nop16042 = alloca i1, i1 0
-  %nop16043 = alloca i1, i1 0
-  %nop16044 = alloca i1, i1 0
-  %nop16045 = alloca i1, i1 0
-  %nop16046 = alloca i1, i1 0
-  %nop16047 = alloca i1, i1 0
-  %nop16048 = alloca i1, i1 0
-  %nop16049 = alloca i1, i1 0
-  %nop16050 = alloca i1, i1 0
-  %nop16051 = alloca i1, i1 0
-  %nop16052 = alloca i1, i1 0
-  %nop16053 = alloca i1, i1 0
-  %nop16054 = alloca i1, i1 0
-  %nop16055 = alloca i1, i1 0
-  %nop16056 = alloca i1, i1 0
-  %nop16057 = alloca i1, i1 0
-  %nop16058 = alloca i1, i1 0
-  %nop16059 = alloca i1, i1 0
-  %nop16060 = alloca i1, i1 0
-  %nop16061 = alloca i1, i1 0
-  %nop16062 = alloca i1, i1 0
-  %nop16063 = alloca i1, i1 0
-  %nop16064 = alloca i1, i1 0
-  %nop16065 = alloca i1, i1 0
-  %nop16066 = alloca i1, i1 0
-  %nop16067 = alloca i1, i1 0
-  %nop16068 = alloca i1, i1 0
-  %nop16069 = alloca i1, i1 0
-  %nop16070 = alloca i1, i1 0
-  %nop16071 = alloca i1, i1 0
-  %nop16072 = alloca i1, i1 0
-  %nop16073 = alloca i1, i1 0
-  %nop16074 = alloca i1, i1 0
-  %nop16075 = alloca i1, i1 0
-  %nop16076 = alloca i1, i1 0
-  %nop16077 = alloca i1, i1 0
-  %nop16078 = alloca i1, i1 0
-  %nop16079 = alloca i1, i1 0
-  %nop16080 = alloca i1, i1 0
-  %nop16081 = alloca i1, i1 0
-  %nop16082 = alloca i1, i1 0
-  %nop16083 = alloca i1, i1 0
-  %nop16084 = alloca i1, i1 0
-  %nop16085 = alloca i1, i1 0
-  %nop16086 = alloca i1, i1 0
-  %nop16087 = alloca i1, i1 0
-  %nop16088 = alloca i1, i1 0
-  %nop16089 = alloca i1, i1 0
-  %nop16090 = alloca i1, i1 0
-  %nop16091 = alloca i1, i1 0
-  %nop16092 = alloca i1, i1 0
-  %nop16093 = alloca i1, i1 0
-  %nop16094 = alloca i1, i1 0
-  %nop16095 = alloca i1, i1 0
-  %nop16096 = alloca i1, i1 0
-  %nop16097 = alloca i1, i1 0
-  %nop16098 = alloca i1, i1 0
-  %nop16099 = alloca i1, i1 0
-  %nop16100 = alloca i1, i1 0
-  %nop16101 = alloca i1, i1 0
-  %nop16102 = alloca i1, i1 0
-  %nop16103 = alloca i1, i1 0
-  %nop16104 = alloca i1, i1 0
-  %nop16105 = alloca i1, i1 0
-  %nop16106 = alloca i1, i1 0
-  %nop16107 = alloca i1, i1 0
-  %nop16108 = alloca i1, i1 0
-  %nop16109 = alloca i1, i1 0
-  %nop16110 = alloca i1, i1 0
-  %nop16111 = alloca i1, i1 0
-  %nop16112 = alloca i1, i1 0
-  %nop16113 = alloca i1, i1 0
-  %nop16114 = alloca i1, i1 0
-  %nop16115 = alloca i1, i1 0
-  %nop16116 = alloca i1, i1 0
-  %nop16117 = alloca i1, i1 0
-  %nop16118 = alloca i1, i1 0
-  %nop16119 = alloca i1, i1 0
-  %nop16120 = alloca i1, i1 0
-  %nop16121 = alloca i1, i1 0
-  %nop16122 = alloca i1, i1 0
-  %nop16123 = alloca i1, i1 0
-  %nop16124 = alloca i1, i1 0
-  %nop16125 = alloca i1, i1 0
-  %nop16126 = alloca i1, i1 0
-  %nop16127 = alloca i1, i1 0
-  %nop16128 = alloca i1, i1 0
-  %nop16129 = alloca i1, i1 0
-  %nop16130 = alloca i1, i1 0
-  %nop16131 = alloca i1, i1 0
-  %nop16132 = alloca i1, i1 0
-  %nop16133 = alloca i1, i1 0
-  %nop16134 = alloca i1, i1 0
-  %nop16135 = alloca i1, i1 0
-  %nop16136 = alloca i1, i1 0
-  %nop16137 = alloca i1, i1 0
-  %nop16138 = alloca i1, i1 0
-  %nop16139 = alloca i1, i1 0
-  %nop16140 = alloca i1, i1 0
-  %nop16141 = alloca i1, i1 0
-  %nop16142 = alloca i1, i1 0
-  %nop16143 = alloca i1, i1 0
-  %nop16144 = alloca i1, i1 0
-  %nop16145 = alloca i1, i1 0
-  %nop16146 = alloca i1, i1 0
-  %nop16147 = alloca i1, i1 0
-  %nop16148 = alloca i1, i1 0
-  %nop16149 = alloca i1, i1 0
-  %nop16150 = alloca i1, i1 0
-  %nop16151 = alloca i1, i1 0
-  %nop16152 = alloca i1, i1 0
-  %nop16153 = alloca i1, i1 0
-  %nop16154 = alloca i1, i1 0
-  %nop16155 = alloca i1, i1 0
-  %nop16156 = alloca i1, i1 0
-  %nop16157 = alloca i1, i1 0
-  %nop16158 = alloca i1, i1 0
-  %nop16159 = alloca i1, i1 0
-  %nop16160 = alloca i1, i1 0
-  %nop16161 = alloca i1, i1 0
-  %nop16162 = alloca i1, i1 0
-  %nop16163 = alloca i1, i1 0
-  %nop16164 = alloca i1, i1 0
-  %nop16165 = alloca i1, i1 0
-  %nop16166 = alloca i1, i1 0
-  %nop16167 = alloca i1, i1 0
-  %nop16168 = alloca i1, i1 0
-  %nop16169 = alloca i1, i1 0
-  %nop16170 = alloca i1, i1 0
-  %nop16171 = alloca i1, i1 0
-  %nop16172 = alloca i1, i1 0
-  %nop16173 = alloca i1, i1 0
-  %nop16174 = alloca i1, i1 0
-  %nop16175 = alloca i1, i1 0
-  %nop16176 = alloca i1, i1 0
-  %nop16177 = alloca i1, i1 0
-  %nop16178 = alloca i1, i1 0
-  %nop16179 = alloca i1, i1 0
-  %nop16180 = alloca i1, i1 0
-  %nop16181 = alloca i1, i1 0
-  %nop16182 = alloca i1, i1 0
-  %nop16183 = alloca i1, i1 0
-  %nop16184 = alloca i1, i1 0
-  %nop16185 = alloca i1, i1 0
-  %nop16186 = alloca i1, i1 0
-  %nop16187 = alloca i1, i1 0
-  %nop16188 = alloca i1, i1 0
-  %nop16189 = alloca i1, i1 0
-  %nop16190 = alloca i1, i1 0
-  %nop16191 = alloca i1, i1 0
-  %nop16192 = alloca i1, i1 0
-  %nop16193 = alloca i1, i1 0
-  %nop16194 = alloca i1, i1 0
-  %nop16195 = alloca i1, i1 0
-  %nop16196 = alloca i1, i1 0
-  %nop16197 = alloca i1, i1 0
-  %nop16198 = alloca i1, i1 0
-  %nop16199 = alloca i1, i1 0
-  %nop16200 = alloca i1, i1 0
-  %nop16201 = alloca i1, i1 0
-  %nop16202 = alloca i1, i1 0
-  %nop16203 = alloca i1, i1 0
-  %nop16204 = alloca i1, i1 0
-  %nop16205 = alloca i1, i1 0
-  %nop16206 = alloca i1, i1 0
-  %nop16207 = alloca i1, i1 0
-  %nop16208 = alloca i1, i1 0
-  %nop16209 = alloca i1, i1 0
-  %nop16210 = alloca i1, i1 0
-  %nop16211 = alloca i1, i1 0
-  %nop16212 = alloca i1, i1 0
-  %nop16213 = alloca i1, i1 0
-  %nop16214 = alloca i1, i1 0
-  %nop16215 = alloca i1, i1 0
-  %nop16216 = alloca i1, i1 0
-  %nop16217 = alloca i1, i1 0
-  %nop16218 = alloca i1, i1 0
-  %nop16219 = alloca i1, i1 0
-  %nop16220 = alloca i1, i1 0
-  %nop16221 = alloca i1, i1 0
-  %nop16222 = alloca i1, i1 0
-  %nop16223 = alloca i1, i1 0
-  %nop16224 = alloca i1, i1 0
-  %nop16225 = alloca i1, i1 0
-  %nop16226 = alloca i1, i1 0
-  %nop16227 = alloca i1, i1 0
-  %nop16228 = alloca i1, i1 0
-  %nop16229 = alloca i1, i1 0
-  %nop16230 = alloca i1, i1 0
-  %nop16231 = alloca i1, i1 0
-  %nop16232 = alloca i1, i1 0
-  %nop16233 = alloca i1, i1 0
-  %nop16234 = alloca i1, i1 0
-  %nop16235 = alloca i1, i1 0
-  %nop16236 = alloca i1, i1 0
-  %nop16237 = alloca i1, i1 0
-  %nop16238 = alloca i1, i1 0
-  %nop16239 = alloca i1, i1 0
-  %nop16240 = alloca i1, i1 0
-  %nop16241 = alloca i1, i1 0
-  %nop16242 = alloca i1, i1 0
-  %nop16243 = alloca i1, i1 0
-  %nop16244 = alloca i1, i1 0
-  %nop16245 = alloca i1, i1 0
-  %nop16246 = alloca i1, i1 0
-  %nop16247 = alloca i1, i1 0
-  %nop16248 = alloca i1, i1 0
-  %nop16249 = alloca i1, i1 0
-  %nop16250 = alloca i1, i1 0
-  %nop16251 = alloca i1, i1 0
-  %nop16252 = alloca i1, i1 0
-  %nop16253 = alloca i1, i1 0
-  %nop16254 = alloca i1, i1 0
-  %nop16255 = alloca i1, i1 0
-  %nop16256 = alloca i1, i1 0
-  %nop16257 = alloca i1, i1 0
-  %nop16258 = alloca i1, i1 0
-  %nop16259 = alloca i1, i1 0
-  %nop16260 = alloca i1, i1 0
-  %nop16261 = alloca i1, i1 0
-  %nop16262 = alloca i1, i1 0
-  %nop16263 = alloca i1, i1 0
-  %nop16264 = alloca i1, i1 0
-  %nop16265 = alloca i1, i1 0
-  %nop16266 = alloca i1, i1 0
-  %nop16267 = alloca i1, i1 0
-  %nop16268 = alloca i1, i1 0
-  %nop16269 = alloca i1, i1 0
-  %nop16270 = alloca i1, i1 0
-  %nop16271 = alloca i1, i1 0
-  %nop16272 = alloca i1, i1 0
-  %nop16273 = alloca i1, i1 0
-  %nop16274 = alloca i1, i1 0
-  %nop16275 = alloca i1, i1 0
-  %nop16276 = alloca i1, i1 0
-  %nop16277 = alloca i1, i1 0
-  %nop16278 = alloca i1, i1 0
-  %nop16279 = alloca i1, i1 0
-  %nop16280 = alloca i1, i1 0
-  %nop16281 = alloca i1, i1 0
-  %nop16282 = alloca i1, i1 0
-  %nop16283 = alloca i1, i1 0
-  %nop16284 = alloca i1, i1 0
-  %nop16285 = alloca i1, i1 0
-  %nop16286 = alloca i1, i1 0
-  %nop16287 = alloca i1, i1 0
-  %nop16288 = alloca i1, i1 0
-  %nop16289 = alloca i1, i1 0
-  %nop16290 = alloca i1, i1 0
-  %nop16291 = alloca i1, i1 0
-  %nop16292 = alloca i1, i1 0
-  %nop16293 = alloca i1, i1 0
-  %nop16294 = alloca i1, i1 0
-  %nop16295 = alloca i1, i1 0
-  %nop16296 = alloca i1, i1 0
-  %nop16297 = alloca i1, i1 0
-  %nop16298 = alloca i1, i1 0
-  %nop16299 = alloca i1, i1 0
-  %nop16300 = alloca i1, i1 0
-  %nop16301 = alloca i1, i1 0
-  %nop16302 = alloca i1, i1 0
-  %nop16303 = alloca i1, i1 0
-  %nop16304 = alloca i1, i1 0
-  %nop16305 = alloca i1, i1 0
-  %nop16306 = alloca i1, i1 0
-  %nop16307 = alloca i1, i1 0
-  %nop16308 = alloca i1, i1 0
-  %nop16309 = alloca i1, i1 0
-  %nop16310 = alloca i1, i1 0
-  %nop16311 = alloca i1, i1 0
-  %nop16312 = alloca i1, i1 0
-  %nop16313 = alloca i1, i1 0
-  %nop16314 = alloca i1, i1 0
-  %nop16315 = alloca i1, i1 0
-  %nop16316 = alloca i1, i1 0
-  %nop16317 = alloca i1, i1 0
-  %nop16318 = alloca i1, i1 0
-  %nop16319 = alloca i1, i1 0
-  %nop16320 = alloca i1, i1 0
-  %nop16321 = alloca i1, i1 0
-  %nop16322 = alloca i1, i1 0
-  %nop16323 = alloca i1, i1 0
-  %nop16324 = alloca i1, i1 0
-  %nop16325 = alloca i1, i1 0
-  %nop16326 = alloca i1, i1 0
-  %nop16327 = alloca i1, i1 0
-  %nop16328 = alloca i1, i1 0
-  %nop16329 = alloca i1, i1 0
-  %nop16330 = alloca i1, i1 0
-  %nop16331 = alloca i1, i1 0
-  %nop16332 = alloca i1, i1 0
-  %nop16333 = alloca i1, i1 0
-  %nop16334 = alloca i1, i1 0
-  %nop16335 = alloca i1, i1 0
-  %nop16336 = alloca i1, i1 0
-  %nop16337 = alloca i1, i1 0
-  %nop16338 = alloca i1, i1 0
-  %nop16339 = alloca i1, i1 0
-  %nop16340 = alloca i1, i1 0
-  %nop16341 = alloca i1, i1 0
-  %nop16342 = alloca i1, i1 0
-  %nop16343 = alloca i1, i1 0
-  %nop16344 = alloca i1, i1 0
-  %nop16345 = alloca i1, i1 0
-  %nop16346 = alloca i1, i1 0
-  %nop16347 = alloca i1, i1 0
-  %nop16348 = alloca i1, i1 0
-  %nop16349 = alloca i1, i1 0
-  %nop16350 = alloca i1, i1 0
-  %nop16351 = alloca i1, i1 0
-  %nop16352 = alloca i1, i1 0
-  %nop16353 = alloca i1, i1 0
-  %nop16354 = alloca i1, i1 0
-  %nop16355 = alloca i1, i1 0
-  %nop16356 = alloca i1, i1 0
-  %nop16357 = alloca i1, i1 0
-  %nop16358 = alloca i1, i1 0
-  %nop16359 = alloca i1, i1 0
-  %nop16360 = alloca i1, i1 0
-  %nop16361 = alloca i1, i1 0
-  %nop16362 = alloca i1, i1 0
-  %nop16363 = alloca i1, i1 0
-  %nop16364 = alloca i1, i1 0
-  %nop16365 = alloca i1, i1 0
-  %nop16366 = alloca i1, i1 0
-  %nop16367 = alloca i1, i1 0
-  %nop16368 = alloca i1, i1 0
-  %nop16369 = alloca i1, i1 0
-  %nop16370 = alloca i1, i1 0
-  %nop16371 = alloca i1, i1 0
-  %nop16372 = alloca i1, i1 0
-  %nop16373 = alloca i1, i1 0
-  %nop16374 = alloca i1, i1 0
-  %nop16375 = alloca i1, i1 0
-  %nop16376 = alloca i1, i1 0
-  %nop16377 = alloca i1, i1 0
-  br label %for.inc
-
-for.inc:
-  %3 = load i32* %i, align 4
-  %inc = add nsw i32 %3, 1
-  store i32 %inc, i32* %i, align 4
-  br label %for.cond
-
-; CHECK:  addiu $sp, $sp, -8
-; CHECK:  sw  $ra, 0($sp)
-; CHECK:  lui $[[REG1:[0-9]+]], 65534
-; CHECK:  addiu $[[REG1]], $[[REG1]], -12
-; CHECK:  addu  $[[REG1]], $ra, $[[REG1]]
-; CHECK:  lw  $ra, 0($sp)
-; CHECK:  jr  $[[REG1]]
-; CHECK:  addiu $sp, $sp, 8
-
-for.end:
-  ret i32 0
-}
-
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false"
-  "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"
-  "no-infs-fp-math"="false" "no-nans-fp-math"="false"
-  "stack-protector-buffer-size"="8" "unsafe-fp-math"="false"
-  "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/mips32r6/compatibility.ll b/test/CodeGen/Mips/mips32r6/compatibility.ll
new file mode 100644
index 0000000..8eac8d4
--- /dev/null
+++ b/test/CodeGen/Mips/mips32r6/compatibility.ll
@@ -0,0 +1,9 @@
+; RUN: llc -march=mipsel -mcpu=mips32r6 < %s | FileCheck %s
+; RUN: not llc -march=mipsel -mcpu=mips32r6 -mattr=+dsp < %s 2>&1 | FileCheck --check-prefix=DSP %s
+
+; CHECK: foo:
+; DSP: MIPS32r6 is not compatible with the DSP ASE
+
+define void @foo() nounwind {
+  ret void
+}
diff --git a/test/CodeGen/Mips/mips64-f128.ll b/test/CodeGen/Mips/mips64-f128.ll
index dc8bbfd..4d590b6 100644
--- a/test/CodeGen/Mips/mips64-f128.ll
+++ b/test/CodeGen/Mips/mips64-f128.ll
@@ -1,5 +1,7 @@
+; RUN: llc -mtriple=mips64el-unknown-unknown -mcpu=mips4 -soft-float -O1 \
+; RUN:     -disable-mips-delay-filler < %s | FileCheck %s
 ; RUN: llc -mtriple=mips64el-unknown-unknown -mcpu=mips64 -soft-float -O1 \
-; RUN: -disable-mips-delay-filler < %s | FileCheck %s
+; RUN:     -disable-mips-delay-filler < %s | FileCheck %s
 
 @gld0 = external global fp128
 @gld1 = external global fp128
diff --git a/test/CodeGen/Mips/mips64-sret.ll b/test/CodeGen/Mips/mips64-sret.ll
index e01609f..7a52c3d 100644
--- a/test/CodeGen/Mips/mips64-sret.ll
+++ b/test/CodeGen/Mips/mips64-sret.ll
@@ -1,16 +1,23 @@
-; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 -O3 < %s | FileCheck %s
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 < %s | FileCheck %s
 
-%struct.S = type { [8 x i32] }
+define void @foo(i32* noalias sret %agg.result) nounwind {
+entry:
+; CHECK-LABEL: foo:
+; CHECK: sw {{.*}}, 0($4)
+; CHECK: jr $ra
+; CHECK-NEXT: move $2, $4
 
-@g = common global %struct.S zeroinitializer, align 4
+  store i32 42, i32* %agg.result
+  ret void
+}
 
-define void @f(%struct.S* noalias sret %agg.result) nounwind {
+define void @bar(i32 %v, i32* noalias sret %agg.result) nounwind {
 entry:
-; CHECK: move $2, $4
+; CHECK-LABEL: bar:
+; CHECK: sw $4, 0($5)
+; CHECK: jr $ra
+; CHECK-NEXT: move $2, $5
 
-  %0 = bitcast %struct.S* %agg.result to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.S* @g to i8*), i64 32, i32 4, i1 false)
+  store i32 %v, i32* %agg.result
   ret void
 }
-
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
diff --git a/test/CodeGen/Mips/mips64countleading.ll b/test/CodeGen/Mips/mips64countleading.ll
index b2b67e5..252f323 100644
--- a/test/CodeGen/Mips/mips64countleading.ll
+++ b/test/CodeGen/Mips/mips64countleading.ll
@@ -1,8 +1,11 @@
-; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s
+; RUN: llc -march=mips64el -mcpu=mips4 < %s | FileCheck -check-prefix=CHECK -check-prefix=MIPS4 %s
+; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck -check-prefix=CHECK -check-prefix=MIPS64 %s
 
 define i64 @t1(i64 %X) nounwind readnone {
 entry:
-; CHECK: dclz
+; CHECK-LABEL: t1:
+; MIPS4-NOT: dclz
+; MIPS64: dclz
   %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %X, i1 true)
   ret i64 %tmp1
 }
@@ -11,7 +14,9 @@ declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
 
 define i64 @t3(i64 %X) nounwind readnone {
 entry:
-; CHECK: dclo 
+; CHECK-LABEL: t3:
+; MIPS4-NOT: dclo
+; MIPS64: dclo
   %neg = xor i64 %X, -1
   %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %neg, i1 true)
   ret i64 %tmp1
diff --git a/test/CodeGen/Mips/mips64directive.ll b/test/CodeGen/Mips/mips64directive.ll
index fa81b72..3d95f51 100644
--- a/test/CodeGen/Mips/mips64directive.ll
+++ b/test/CodeGen/Mips/mips64directive.ll
@@ -1,3 +1,4 @@
+; RUN: llc  < %s -march=mips64el -mcpu=mips4 -mattr=n64 | FileCheck %s
 ; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s
 
 @gl = global i64 1250999896321, align 8
diff --git a/test/CodeGen/Mips/mips64ext.ll b/test/CodeGen/Mips/mips64ext.ll
index 02a35f8..22ea0eb 100644
--- a/test/CodeGen/Mips/mips64ext.ll
+++ b/test/CodeGen/Mips/mips64ext.ll
@@ -1,4 +1,5 @@
-; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s 
+; RUN: llc  < %s -march=mips64el -mcpu=mips4 -mattr=n64 | FileCheck %s
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s
 
 define i64 @zext64_32(i32 %a) nounwind readnone {
 entry:
diff --git a/test/CodeGen/Mips/mips64fpimm0.ll b/test/CodeGen/Mips/mips64fpimm0.ll
index 17716da..19e076d 100644
--- a/test/CodeGen/Mips/mips64fpimm0.ll
+++ b/test/CodeGen/Mips/mips64fpimm0.ll
@@ -1,3 +1,4 @@
+; RUN: llc  < %s -march=mips64el -mcpu=mips4 -mattr=n64 | FileCheck %s
 ; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s
 
 define double @foo1() nounwind readnone {
diff --git a/test/CodeGen/Mips/mips64fpldst.ll b/test/CodeGen/Mips/mips64fpldst.ll
index 368ab83..2f42270 100644
--- a/test/CodeGen/Mips/mips64fpldst.ll
+++ b/test/CodeGen/Mips/mips64fpldst.ll
@@ -1,3 +1,5 @@
+; RUN: llc  < %s -march=mips64el -mcpu=mips4 -mattr=-n64,n64 | FileCheck %s -check-prefix=CHECK-N64
+; RUN: llc  < %s -march=mips64el -mcpu=mips4 -mattr=-n64,n32 | FileCheck %s -check-prefix=CHECK-N32
 ; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=-n64,n64 | FileCheck %s -check-prefix=CHECK-N64
 ; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=-n64,n32 | FileCheck %s -check-prefix=CHECK-N32
 
diff --git a/test/CodeGen/Mips/mips64imm.ll b/test/CodeGen/Mips/mips64imm.ll
index 1fc8636..c3fc61d 100644
--- a/test/CodeGen/Mips/mips64imm.ll
+++ b/test/CodeGen/Mips/mips64imm.ll
@@ -1,3 +1,4 @@
+; RUN: llc -march=mips64el -mcpu=mips4 < %s | FileCheck %s
 ; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s
 
 define i32 @foo1() nounwind readnone {
diff --git a/test/CodeGen/Mips/mips64instrs.ll b/test/CodeGen/Mips/mips64instrs.ll
index 2894d69..58f11f1 100644
--- a/test/CodeGen/Mips/mips64instrs.ll
+++ b/test/CodeGen/Mips/mips64instrs.ll
@@ -1,4 +1,5 @@
-; RUN: llc -march=mips64el -mcpu=mips64 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=mips64el -mcpu=mips4 -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK -check-prefix=MIPS4 %s
+; RUN: llc -march=mips64el -mcpu=mips64 -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK -check-prefix=MIPS64 %s
 
 @gll0 = common global i64 0, align 8
 @gll1 = common global i64 0, align 8
@@ -135,14 +136,24 @@ declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
 
 define i64 @f18(i64 %X) nounwind readnone {
 entry:
-; CHECK: dclz $2, $4
+; CHECK-LABEL: f18:
+
+; The MIPS4 version is too long to reasonably test. At least check we don't get dclz
+; MIPS4-NOT: dclz
+
+; MIPS64: dclz $2, $4
   %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %X, i1 true)
   ret i64 %tmp1
 }
 
 define i64 @f19(i64 %X) nounwind readnone {
 entry:
-; CHECK: dclo $2, $4
+; CHECK-LABEL: f19:
+
+; The MIPS4 version is too long to reasonably test. At least check we don't get dclo
+; MIPS4-NOT: dclo
+
+; MIPS64: dclo $2, $4
   %neg = xor i64 %X, -1
   %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %neg, i1 true)
   ret i64 %tmp1
@@ -150,6 +161,7 @@ entry:
 
 define i64 @f20(i64 %a, i64 %b) nounwind readnone {
 entry:
+; CHECK-LABEL: f20:
 ; CHECK: nor
   %or = or i64 %b, %a
   %neg = xor i64 %or, -1
diff --git a/test/CodeGen/Mips/mips64intldst.ll b/test/CodeGen/Mips/mips64intldst.ll
index 62244f6..c3607ba 100644
--- a/test/CodeGen/Mips/mips64intldst.ll
+++ b/test/CodeGen/Mips/mips64intldst.ll
@@ -1,3 +1,5 @@
+; RUN: llc  < %s -march=mips64el -mcpu=mips4 -mattr=-n64,n64 | FileCheck %s -check-prefix=CHECK-N64
+; RUN: llc  < %s -march=mips64el -mcpu=mips4 -mattr=-n64,n32 | FileCheck %s -check-prefix=CHECK-N32
 ; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=-n64,n64 | FileCheck %s -check-prefix=CHECK-N64
 ; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=-n64,n32 | FileCheck %s -check-prefix=CHECK-N32
 
diff --git a/test/CodeGen/Mips/mips64lea.ll b/test/CodeGen/Mips/mips64lea.ll
index 54d504f..e866b21 100644
--- a/test/CodeGen/Mips/mips64lea.ll
+++ b/test/CodeGen/Mips/mips64lea.ll
@@ -1,3 +1,4 @@
+; RUN: llc -march=mips64el -mcpu=mips4 < %s | FileCheck %s
 ; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s
 
 define void @foo3() nounwind {
diff --git a/test/CodeGen/Mips/mips64load-store-left-right.ll b/test/CodeGen/Mips/mips64load-store-left-right.ll
deleted file mode 100644
index 4561429..0000000
--- a/test/CodeGen/Mips/mips64load-store-left-right.ll
+++ /dev/null
@@ -1,73 +0,0 @@
-; RUN: llc -march=mips64el -mcpu=mips64 -mattr=n64 < %s | FileCheck  -check-prefix=EL %s
-; RUN: llc -march=mips64 -mcpu=mips64 -mattr=n64 < %s | FileCheck  -check-prefix=EB %s
-
-%struct.SLL = type { i64 }
-%struct.SI = type { i32 }
-%struct.SUI = type { i32 }
-
-@sll = common global %struct.SLL zeroinitializer, align 1
-@si = common global %struct.SI zeroinitializer, align 1
-@sui = common global %struct.SUI zeroinitializer, align 1
-
-define i64 @foo_load_ll() nounwind readonly {
-entry:
-; EL: ldl $[[R0:[0-9]+]], 7($[[R1:[0-9]+]])
-; EL: ldr $[[R0]], 0($[[R1]])
-; EB: ldl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
-; EB: ldr $[[R0]], 7($[[R1]])
-
-  %0 = load i64* getelementptr inbounds (%struct.SLL* @sll, i64 0, i32 0), align 1
-  ret i64 %0
-}
-
-define i64 @foo_load_i() nounwind readonly {
-entry:
-; EL: lwl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
-; EL: lwr $[[R0]], 0($[[R1]])
-; EB: lwl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
-; EB: lwr $[[R0]], 3($[[R1]])
-
-  %0 = load i32* getelementptr inbounds (%struct.SI* @si, i64 0, i32 0), align 1
-  %conv = sext i32 %0 to i64
-  ret i64 %conv
-}
-
-define i64 @foo_load_ui() nounwind readonly {
-entry:
-; EL: lwl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
-; EL: lwr $[[R0]], 0($[[R1]])
-; EL: daddiu $[[R2:[0-9]+]], $zero, 1
-; EL: dsll   $[[R3:[0-9]+]], $[[R2]], 32
-; EL: daddiu $[[R4:[0-9]+]], $[[R3]], -1
-; EL: and    ${{[0-9]+}}, $[[R0]], $[[R4]]
-; EB: lwl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
-; EB: lwr $[[R0]], 3($[[R1]])
-
-
-  %0 = load i32* getelementptr inbounds (%struct.SUI* @sui, i64 0, i32 0), align 1
-  %conv = zext i32 %0 to i64
-  ret i64 %conv
-}
-
-define void @foo_store_ll(i64 %a) nounwind {
-entry:
-; EL: sdl $[[R0:[0-9]+]], 7($[[R1:[0-9]+]])
-; EL: sdr $[[R0]], 0($[[R1]])
-; EB: sdl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
-; EB: sdr $[[R0]], 7($[[R1]])
-
-  store i64 %a, i64* getelementptr inbounds (%struct.SLL* @sll, i64 0, i32 0), align 1
-  ret void
-}
-
-define void @foo_store_i(i32 %a) nounwind {
-entry:
-; EL: swl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
-; EL: swr $[[R0]], 0($[[R1]])
-; EB: swl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
-; EB: swr $[[R0]], 3($[[R1]])
-
-  store i32 %a, i32* getelementptr inbounds (%struct.SI* @si, i64 0, i32 0), align 1
-  ret void
-}
-
diff --git a/test/CodeGen/Mips/mips64muldiv.ll b/test/CodeGen/Mips/mips64muldiv.ll
index fd036a2..39c73e9 100644
--- a/test/CodeGen/Mips/mips64muldiv.ll
+++ b/test/CodeGen/Mips/mips64muldiv.ll
@@ -1,3 +1,4 @@
+; RUN: llc -march=mips64el -mcpu=mips4 < %s | FileCheck %s
 ; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s
 
 define i64 @m0(i64 %a0, i64 %a1) nounwind readnone {
diff --git a/test/CodeGen/Mips/mips64r6/compatibility.ll b/test/CodeGen/Mips/mips64r6/compatibility.ll
new file mode 100644
index 0000000..429f68d
--- /dev/null
+++ b/test/CodeGen/Mips/mips64r6/compatibility.ll
@@ -0,0 +1,9 @@
+; RUN: llc -march=mipsel -mcpu=mips64r6 < %s | FileCheck %s
+; RUN: not llc -march=mipsel -mcpu=mips64r6 -mattr=+dsp < %s 2>&1 | FileCheck --check-prefix=DSP %s
+
+; CHECK: foo:
+; DSP: MIPS64r6 is not compatible with the DSP ASE
+
+define void @foo() nounwind {
+  ret void
+}
diff --git a/test/CodeGen/Mips/msa/basic_operations.ll b/test/CodeGen/Mips/msa/basic_operations.ll
index 2725e9a..dbdf42b 100644
--- a/test/CodeGen/Mips/msa/basic_operations.ll
+++ b/test/CodeGen/Mips/msa/basic_operations.ll
@@ -6,10 +6,11 @@
 @v8i16 = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
 @v4i32 = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>
 @v2i64 = global <2 x i64> <i64 0, i64 0>
+@i32 = global i32 0
 @i64 = global i64 0
 
 define void @const_v16i8() nounwind {
-  ; MIPS32-AE: const_v16i8:
+  ; MIPS32-AE-LABEL: const_v16i8:
 
   store volatile <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8>*@v16i8
   ; MIPS32-AE: ldi.b [[R1:\$w[0-9]+]], 0
@@ -45,7 +46,7 @@ define void @const_v16i8() nounwind {
 }
 
 define void @const_v8i16() nounwind {
-  ; MIPS32-AE: const_v8i16:
+  ; MIPS32-AE-LABEL: const_v8i16:
 
   store volatile <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16>*@v8i16
   ; MIPS32-AE: ldi.b [[R1:\$w[0-9]+]], 0
@@ -76,7 +77,7 @@ define void @const_v8i16() nounwind {
 }
 
 define void @const_v4i32() nounwind {
-  ; MIPS32-AE: const_v4i32:
+  ; MIPS32-AE-LABEL: const_v4i32:
 
   store volatile <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32>*@v4i32
   ; MIPS32-AE: ldi.b [[R1:\$w[0-9]+]], 0
@@ -107,7 +108,7 @@ define void @const_v4i32() nounwind {
 }
 
 define void @const_v2i64() nounwind {
-  ; MIPS32-AE: const_v2i64:
+  ; MIPS32-AE-LABEL: const_v2i64:
 
   store volatile <2 x i64> <i64 0, i64 0>, <2 x i64>*@v2i64
   ; MIPS32-AE: ldi.b [[R1:\$w[0-9]+]], 0
@@ -137,7 +138,7 @@ define void @const_v2i64() nounwind {
 }
 
 define void @nonconst_v16i8(i8 %a, i8 %b, i8 %c, i8 %d, i8 %e, i8 %f, i8 %g, i8 %h) nounwind {
-  ; MIPS32-AE: nonconst_v16i8:
+  ; MIPS32-AE-LABEL: nonconst_v16i8:
 
   %1 = insertelement <16 x i8> undef, i8 %a, i32 0
   %2 = insertelement <16 x i8> %1, i8 %b, i32 1
@@ -187,7 +188,7 @@ define void @nonconst_v16i8(i8 %a, i8 %b, i8 %c, i8 %d, i8 %e, i8 %f, i8 %g, i8
 }
 
 define void @nonconst_v8i16(i16 %a, i16 %b, i16 %c, i16 %d, i16 %e, i16 %f, i16 %g, i16 %h) nounwind {
-  ; MIPS32-AE: nonconst_v8i16:
+  ; MIPS32-AE-LABEL: nonconst_v8i16:
 
   %1 = insertelement <8 x i16> undef, i16 %a, i32 0
   %2 = insertelement <8 x i16> %1, i16 %b, i32 1
@@ -221,7 +222,7 @@ define void @nonconst_v8i16(i16 %a, i16 %b, i16 %c, i16 %d, i16 %e, i16 %f, i16
 }
 
 define void @nonconst_v4i32(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
-  ; MIPS32-AE: nonconst_v4i32:
+  ; MIPS32-AE-LABEL: nonconst_v4i32:
 
   %1 = insertelement <4 x i32> undef, i32 %a, i32 0
   %2 = insertelement <4 x i32> %1, i32 %b, i32 1
@@ -239,7 +240,7 @@ define void @nonconst_v4i32(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
 }
 
 define void @nonconst_v2i64(i64 %a, i64 %b) nounwind {
-  ; MIPS32-AE: nonconst_v2i64:
+  ; MIPS32-AE-LABEL: nonconst_v2i64:
 
   %1 = insertelement <2 x i64> undef, i64 %a, i32 0
   %2 = insertelement <2 x i64> %1, i64 %b, i32 1
@@ -255,7 +256,7 @@ define void @nonconst_v2i64(i64 %a, i64 %b) nounwind {
 }
 
 define i32 @extract_sext_v16i8() nounwind {
-  ; MIPS32-AE: extract_sext_v16i8:
+  ; MIPS32-AE-LABEL: extract_sext_v16i8:
 
   %1 = load <16 x i8>* @v16i8
   ; MIPS32-AE-DAG: ld.b [[R1:\$w[0-9]+]],
@@ -274,7 +275,7 @@ define i32 @extract_sext_v16i8() nounwind {
 }
 
 define i32 @extract_sext_v8i16() nounwind {
-  ; MIPS32-AE: extract_sext_v8i16:
+  ; MIPS32-AE-LABEL: extract_sext_v8i16:
 
   %1 = load <8 x i16>* @v8i16
   ; MIPS32-AE-DAG: ld.h [[R1:\$w[0-9]+]],
@@ -293,7 +294,7 @@ define i32 @extract_sext_v8i16() nounwind {
 }
 
 define i32 @extract_sext_v4i32() nounwind {
-  ; MIPS32-AE: extract_sext_v4i32:
+  ; MIPS32-AE-LABEL: extract_sext_v4i32:
 
   %1 = load <4 x i32>* @v4i32
   ; MIPS32-AE-DAG: ld.w [[R1:\$w[0-9]+]],
@@ -309,7 +310,7 @@ define i32 @extract_sext_v4i32() nounwind {
 }
 
 define i64 @extract_sext_v2i64() nounwind {
-  ; MIPS32-AE: extract_sext_v2i64:
+  ; MIPS32-AE-LABEL: extract_sext_v2i64:
 
   %1 = load <2 x i64>* @v2i64
   ; MIPS32-AE-DAG: ld.d [[R1:\$w[0-9]+]],
@@ -328,7 +329,7 @@ define i64 @extract_sext_v2i64() nounwind {
 }
 
 define i32 @extract_zext_v16i8() nounwind {
-  ; MIPS32-AE: extract_zext_v16i8:
+  ; MIPS32-AE-LABEL: extract_zext_v16i8:
 
   %1 = load <16 x i8>* @v16i8
   ; MIPS32-AE-DAG: ld.b [[R1:\$w[0-9]+]],
@@ -346,7 +347,7 @@ define i32 @extract_zext_v16i8() nounwind {
 }
 
 define i32 @extract_zext_v8i16() nounwind {
-  ; MIPS32-AE: extract_zext_v8i16:
+  ; MIPS32-AE-LABEL: extract_zext_v8i16:
 
   %1 = load <8 x i16>* @v8i16
   ; MIPS32-AE-DAG: ld.h [[R1:\$w[0-9]+]],
@@ -364,7 +365,7 @@ define i32 @extract_zext_v8i16() nounwind {
 }
 
 define i32 @extract_zext_v4i32() nounwind {
-  ; MIPS32-AE: extract_zext_v4i32:
+  ; MIPS32-AE-LABEL: extract_zext_v4i32:
 
   %1 = load <4 x i32>* @v4i32
   ; MIPS32-AE-DAG: ld.w [[R1:\$w[0-9]+]],
@@ -380,7 +381,7 @@ define i32 @extract_zext_v4i32() nounwind {
 }
 
 define i64 @extract_zext_v2i64() nounwind {
-  ; MIPS32-AE: extract_zext_v2i64:
+  ; MIPS32-AE-LABEL: extract_zext_v2i64:
 
   %1 = load <2 x i64>* @v2i64
   ; MIPS32-AE-DAG: ld.d [[R1:\$w[0-9]+]],
@@ -397,8 +398,200 @@ define i64 @extract_zext_v2i64() nounwind {
   ; MIPS32-AE: .size extract_zext_v2i64
 }
 
+define i32 @extract_sext_v16i8_vidx() nounwind {
+  ; MIPS32-AE-LABEL: extract_sext_v16i8_vidx:
+
+  %1 = load <16 x i8>* @v16i8
+  ; MIPS32-AE-DAG: lw [[PTR_V:\$[0-9]+]], %got(v16i8)(
+  ; MIPS32-AE-DAG: ld.b [[R1:\$w[0-9]+]], 0([[PTR_V]])
+
+  %2 = add <16 x i8> %1, %1
+  ; MIPS32-AE-DAG: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+
+  %3 = load i32* @i32
+  ; MIPS32-AE-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
+  ; MIPS32-AE-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
+
+  %4 = extractelement <16 x i8> %2, i32 %3
+  %5 = sext i8 %4 to i32
+  ; MIPS32-AE-DAG: splat.b $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
+  ; MIPS32-AE-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]]
+  ; MIPS32-AE-DAG: sra [[R6:\$[0-9]+]], [[R5]], 24
+
+  ret i32 %5
+  ; MIPS32-AE: .size extract_sext_v16i8_vidx
+}
+
+define i32 @extract_sext_v8i16_vidx() nounwind {
+  ; MIPS32-AE-LABEL: extract_sext_v8i16_vidx:
+
+  %1 = load <8 x i16>* @v8i16
+  ; MIPS32-AE-DAG: lw [[PTR_V:\$[0-9]+]], %got(v8i16)(
+  ; MIPS32-AE-DAG: ld.h [[R1:\$w[0-9]+]], 0([[PTR_V]])
+
+  %2 = add <8 x i16> %1, %1
+  ; MIPS32-AE-DAG: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+
+  %3 = load i32* @i32
+  ; MIPS32-AE-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
+  ; MIPS32-AE-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
+
+  %4 = extractelement <8 x i16> %2, i32 %3
+  %5 = sext i16 %4 to i32
+  ; MIPS32-AE-DAG: splat.h $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
+  ; MIPS32-AE-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]]
+  ; MIPS32-AE-DAG: sra [[R6:\$[0-9]+]], [[R5]], 16
+
+  ret i32 %5
+  ; MIPS32-AE: .size extract_sext_v8i16_vidx
+}
+
+define i32 @extract_sext_v4i32_vidx() nounwind {
+  ; MIPS32-AE-LABEL: extract_sext_v4i32_vidx:
+
+  %1 = load <4 x i32>* @v4i32
+  ; MIPS32-AE-DAG: lw [[PTR_V:\$[0-9]+]], %got(v4i32)(
+  ; MIPS32-AE-DAG: ld.w [[R1:\$w[0-9]+]], 0([[PTR_V]])
+
+  %2 = add <4 x i32> %1, %1
+  ; MIPS32-AE-DAG: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+
+  %3 = load i32* @i32
+  ; MIPS32-AE-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
+  ; MIPS32-AE-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
+
+  %4 = extractelement <4 x i32> %2, i32 %3
+  ; MIPS32-AE-DAG: splat.w $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
+  ; MIPS32-AE-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]]
+  ; MIPS32-AE-NOT: sra
+
+  ret i32 %4
+  ; MIPS32-AE: .size extract_sext_v4i32_vidx
+}
+
+define i64 @extract_sext_v2i64_vidx() nounwind {
+  ; MIPS32-AE-LABEL: extract_sext_v2i64_vidx:
+
+  %1 = load <2 x i64>* @v2i64
+  ; MIPS32-AE-DAG: lw [[PTR_V:\$[0-9]+]], %got(v2i64)(
+  ; MIPS32-AE-DAG: ld.d [[R1:\$w[0-9]+]], 0([[PTR_V]])
+
+  %2 = add <2 x i64> %1, %1
+  ; MIPS32-AE-DAG: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+
+  %3 = load i32* @i32
+  ; MIPS32-AE-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
+  ; MIPS32-AE-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
+
+  %4 = extractelement <2 x i64> %2, i32 %3
+  ; MIPS32-AE-DAG: splat.w $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
+  ; MIPS32-AE-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]]
+  ; MIPS32-AE-DAG: splat.w $w[[R4:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
+  ; MIPS32-AE-DAG: mfc1 [[R6:\$[0-9]+]], $f[[R4]]
+  ; MIPS32-AE-NOT: sra
+
+  ret i64 %4
+  ; MIPS32-AE: .size extract_sext_v2i64_vidx
+}
+
+define i32 @extract_zext_v16i8_vidx() nounwind {
+  ; MIPS32-AE-LABEL: extract_zext_v16i8_vidx:
+
+  %1 = load <16 x i8>* @v16i8
+  ; MIPS32-AE-DAG: lw [[PTR_V:\$[0-9]+]], %got(v16i8)(
+  ; MIPS32-AE-DAG: ld.b [[R1:\$w[0-9]+]], 0([[PTR_V]])
+
+  %2 = add <16 x i8> %1, %1
+  ; MIPS32-AE-DAG: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+
+  %3 = load i32* @i32
+  ; MIPS32-AE-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
+  ; MIPS32-AE-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
+
+  %4 = extractelement <16 x i8> %2, i32 %3
+  %5 = zext i8 %4 to i32
+  ; MIPS32-AE-DAG: splat.b $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
+  ; MIPS32-AE-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]]
+  ; MIPS32-AE-DAG: srl [[R6:\$[0-9]+]], [[R5]], 24
+
+  ret i32 %5
+  ; MIPS32-AE: .size extract_zext_v16i8_vidx
+}
+
+define i32 @extract_zext_v8i16_vidx() nounwind {
+  ; MIPS32-AE-LABEL: extract_zext_v8i16_vidx:
+
+  %1 = load <8 x i16>* @v8i16
+  ; MIPS32-AE-DAG: lw [[PTR_V:\$[0-9]+]], %got(v8i16)(
+  ; MIPS32-AE-DAG: ld.h [[R1:\$w[0-9]+]], 0([[PTR_V]])
+
+  %2 = add <8 x i16> %1, %1
+  ; MIPS32-AE-DAG: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+
+  %3 = load i32* @i32
+  ; MIPS32-AE-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
+  ; MIPS32-AE-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
+
+  %4 = extractelement <8 x i16> %2, i32 %3
+  %5 = zext i16 %4 to i32
+  ; MIPS32-AE-DAG: splat.h $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
+  ; MIPS32-AE-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]]
+  ; MIPS32-AE-DAG: srl [[R6:\$[0-9]+]], [[R5]], 16
+
+  ret i32 %5
+  ; MIPS32-AE: .size extract_zext_v8i16_vidx
+}
+
+define i32 @extract_zext_v4i32_vidx() nounwind {
+  ; MIPS32-AE-LABEL: extract_zext_v4i32_vidx:
+
+  %1 = load <4 x i32>* @v4i32
+  ; MIPS32-AE-DAG: lw [[PTR_V:\$[0-9]+]], %got(v4i32)(
+  ; MIPS32-AE-DAG: ld.w [[R1:\$w[0-9]+]], 0([[PTR_V]])
+
+  %2 = add <4 x i32> %1, %1
+  ; MIPS32-AE-DAG: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+
+  %3 = load i32* @i32
+  ; MIPS32-AE-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
+  ; MIPS32-AE-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
+
+  %4 = extractelement <4 x i32> %2, i32 %3
+  ; MIPS32-AE-DAG: splat.w $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
+  ; MIPS32-AE-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]]
+  ; MIPS32-AE-NOT: srl
+
+  ret i32 %4
+  ; MIPS32-AE: .size extract_zext_v4i32_vidx
+}
+
+define i64 @extract_zext_v2i64_vidx() nounwind {
+  ; MIPS32-AE-LABEL: extract_zext_v2i64_vidx:
+
+  %1 = load <2 x i64>* @v2i64
+  ; MIPS32-AE-DAG: lw [[PTR_V:\$[0-9]+]], %got(v2i64)(
+  ; MIPS32-AE-DAG: ld.d [[R1:\$w[0-9]+]], 0([[PTR_V]])
+
+  %2 = add <2 x i64> %1, %1
+  ; MIPS32-AE-DAG: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+
+  %3 = load i32* @i32
+  ; MIPS32-AE-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
+  ; MIPS32-AE-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
+
+  %4 = extractelement <2 x i64> %2, i32 %3
+  ; MIPS32-AE-DAG: splat.w $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
+  ; MIPS32-AE-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]]
+  ; MIPS32-AE-DAG: splat.w $w[[R4:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
+  ; MIPS32-AE-DAG: mfc1 [[R6:\$[0-9]+]], $f[[R4]]
+  ; MIPS32-AE-NOT: srl
+
+  ret i64 %4
+  ; MIPS32-AE: .size extract_zext_v2i64_vidx
+}
+
 define void @insert_v16i8(i32 %a) nounwind {
-  ; MIPS32-AE: insert_v16i8:
+  ; MIPS32-AE-LABEL: insert_v16i8:
 
   %1 = load <16 x i8>* @v16i8
   ; MIPS32-AE-DAG: ld.b [[R1:\$w[0-9]+]],
@@ -420,7 +613,7 @@ define void @insert_v16i8(i32 %a) nounwind {
 }
 
 define void @insert_v8i16(i32 %a) nounwind {
-  ; MIPS32-AE: insert_v8i16:
+  ; MIPS32-AE-LABEL: insert_v8i16:
 
   %1 = load <8 x i16>* @v8i16
   ; MIPS32-AE-DAG: ld.h [[R1:\$w[0-9]+]],
@@ -442,7 +635,7 @@ define void @insert_v8i16(i32 %a) nounwind {
 }
 
 define void @insert_v4i32(i32 %a) nounwind {
-  ; MIPS32-AE: insert_v4i32:
+  ; MIPS32-AE-LABEL: insert_v4i32:
 
   %1 = load <4 x i32>* @v4i32
   ; MIPS32-AE-DAG: ld.w [[R1:\$w[0-9]+]],
@@ -461,7 +654,7 @@ define void @insert_v4i32(i32 %a) nounwind {
 }
 
 define void @insert_v2i64(i64 %a) nounwind {
-  ; MIPS32-AE: insert_v2i64:
+  ; MIPS32-AE-LABEL: insert_v2i64:
 
   %1 = load <2 x i64>* @v2i64
   ; MIPS32-AE-DAG: ld.w [[R1:\$w[0-9]+]],
@@ -480,8 +673,131 @@ define void @insert_v2i64(i64 %a) nounwind {
   ; MIPS32-AE: .size insert_v2i64
 }
 
+define void @insert_v16i8_vidx(i32 %a) nounwind {
+  ; MIPS32-AE: insert_v16i8_vidx:
+
+  %1 = load <16 x i8>* @v16i8
+  ; MIPS32-AE-DAG: ld.b [[R1:\$w[0-9]+]],
+
+  %2 = load i32* @i32
+  ; MIPS32-AE-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
+  ; MIPS32-AE-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
+
+  %a2 = trunc i32 %a to i8
+  %a3 = sext i8 %a2 to i32
+  %a4 = trunc i32 %a3 to i8
+  ; MIPS32-AE-NOT: andi
+  ; MIPS32-AE-NOT: sra
+
+  %3 = insertelement <16 x i8> %1, i8 %a4, i32 %2
+  ; MIPS32-AE-DAG: sld.b [[R1]], [[R1]]{{\[}}[[IDX]]]
+  ; MIPS32-AE-DAG: insert.b [[R1]][0], $4
+  ; MIPS32-AE-DAG: neg [[NIDX:\$[0-9]+]], [[IDX]]
+  ; MIPS32-AE-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]]
+
+  store <16 x i8> %3, <16 x i8>* @v16i8
+  ; MIPS32-AE-DAG: st.b [[R1]]
+
+  ret void
+  ; MIPS32-AE: .size insert_v16i8_vidx
+}
+
+define void @insert_v8i16_vidx(i32 %a) nounwind {
+  ; MIPS32-AE: insert_v8i16_vidx:
+
+  %1 = load <8 x i16>* @v8i16
+  ; MIPS32-AE-DAG: ld.h [[R1:\$w[0-9]+]],
+
+  %2 = load i32* @i32
+  ; MIPS32-AE-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
+  ; MIPS32-AE-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
+
+  %a2 = trunc i32 %a to i16
+  %a3 = sext i16 %a2 to i32
+  %a4 = trunc i32 %a3 to i16
+  ; MIPS32-AE-NOT: andi
+  ; MIPS32-AE-NOT: sra
+
+  %3 = insertelement <8 x i16> %1, i16 %a4, i32 %2
+  ; MIPS32-AE-DAG: sll [[BIDX:\$[0-9]+]], [[IDX]], 1
+  ; MIPS32-AE-DAG: sld.b [[R1]], [[R1]]{{\[}}[[BIDX]]]
+  ; MIPS32-AE-DAG: insert.h [[R1]][0], $4
+  ; MIPS32-AE-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]]
+  ; MIPS32-AE-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]]
+
+  store <8 x i16> %3, <8 x i16>* @v8i16
+  ; MIPS32-AE-DAG: st.h [[R1]]
+
+  ret void
+  ; MIPS32-AE: .size insert_v8i16_vidx
+}
+
+define void @insert_v4i32_vidx(i32 %a) nounwind {
+  ; MIPS32-AE: insert_v4i32_vidx:
+
+  %1 = load <4 x i32>* @v4i32
+  ; MIPS32-AE-DAG: ld.w [[R1:\$w[0-9]+]],
+
+  %2 = load i32* @i32
+  ; MIPS32-AE-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
+  ; MIPS32-AE-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
+
+  ; MIPS32-AE-NOT: andi
+  ; MIPS32-AE-NOT: sra
+
+  %3 = insertelement <4 x i32> %1, i32 %a, i32 %2
+  ; MIPS32-AE-DAG: sll [[BIDX:\$[0-9]+]], [[IDX]], 2
+  ; MIPS32-AE-DAG: sld.b [[R1]], [[R1]]{{\[}}[[BIDX]]]
+  ; MIPS32-AE-DAG: insert.w [[R1]][0], $4
+  ; MIPS32-AE-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]]
+  ; MIPS32-AE-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]]
+
+  store <4 x i32> %3, <4 x i32>* @v4i32
+  ; MIPS32-AE-DAG: st.w [[R1]]
+
+  ret void
+  ; MIPS32-AE: .size insert_v4i32_vidx
+}
+
+define void @insert_v2i64_vidx(i64 %a) nounwind {
+  ; MIPS32-AE: insert_v2i64_vidx:
+
+  %1 = load <2 x i64>* @v2i64
+  ; MIPS32-AE-DAG: ld.w [[R1:\$w[0-9]+]],
+
+  %2 = load i32* @i32
+  ; MIPS32-AE-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
+  ; MIPS32-AE-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
+
+  ; MIPS32-AE-NOT: andi
+  ; MIPS32-AE-NOT: sra
+
+  %3 = insertelement <2 x i64> %1, i64 %a, i32 %2
+  ; TODO: This code could be a lot better but it works. The legalizer splits
+  ; 64-bit inserts into two 32-bit inserts because there is no i64 type on
+  ; MIPS32. The obvious optimisation is to perform both insert.w's at once while
+  ; the vector is rotated.
+  ; MIPS32-AE-DAG: sll [[BIDX:\$[0-9]+]], [[IDX]], 2
+  ; MIPS32-AE-DAG: sld.b [[R1]], [[R1]]{{\[}}[[BIDX]]]
+  ; MIPS32-AE-DAG: insert.w [[R1]][0], $4
+  ; MIPS32-AE-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]]
+  ; MIPS32-AE-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]]
+  ; MIPS32-AE-DAG: addiu [[IDX2:\$[0-9]+]], [[IDX]], 1
+  ; MIPS32-AE-DAG: sll [[BIDX:\$[0-9]+]], [[IDX2]], 2
+  ; MIPS32-AE-DAG: sld.b [[R1]], [[R1]]{{\[}}[[BIDX]]]
+  ; MIPS32-AE-DAG: insert.w [[R1]][0], $5
+  ; MIPS32-AE-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]]
+  ; MIPS32-AE-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]]
+
+  store <2 x i64> %3, <2 x i64>* @v2i64
+  ; MIPS32-AE-DAG: st.w [[R1]]
+
+  ret void
+  ; MIPS32-AE: .size insert_v2i64_vidx
+}
+
 define void @truncstore() nounwind {
-  ; MIPS32-AE: truncstore:
+  ; MIPS32-AE-LABEL: truncstore:
 
   store volatile <4 x i8> <i8 -1, i8 -1, i8 -1, i8 -1>, <4 x i8>*@v4i8
   ; TODO: What code should be emitted?
diff --git a/test/CodeGen/Mips/msa/basic_operations_float.ll b/test/CodeGen/Mips/msa/basic_operations_float.ll
index c8cef44..a0c9d29 100644
--- a/test/CodeGen/Mips/msa/basic_operations_float.ll
+++ b/test/CodeGen/Mips/msa/basic_operations_float.ll
@@ -3,11 +3,12 @@
 
 @v4f32 = global <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>
 @v2f64 = global <2 x double> <double 0.0, double 0.0>
+@i32 = global i32 0
 @f32 = global float 0.0
 @f64 = global double 0.0
 
 define void @const_v4f32() nounwind {
-  ; MIPS32: const_v4f32:
+  ; MIPS32-LABEL: const_v4f32:
 
   store volatile <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>, <4 x float>*@v4f32
   ; MIPS32: ldi.b  [[R1:\$w[0-9]+]], 0
@@ -38,7 +39,7 @@ define void @const_v4f32() nounwind {
 }
 
 define void @const_v2f64() nounwind {
-  ; MIPS32: const_v2f64:
+  ; MIPS32-LABEL: const_v2f64:
 
   store volatile <2 x double> <double 0.0, double 0.0>, <2 x double>*@v2f64
   ; MIPS32: ldi.b  [[R1:\$w[0-9]+]], 0
@@ -72,7 +73,7 @@ define void @const_v2f64() nounwind {
 }
 
 define void @nonconst_v4f32() nounwind {
-  ; MIPS32: nonconst_v4f32:
+  ; MIPS32-LABEL: nonconst_v4f32:
 
   %1 = load float *@f32
   %2 = insertelement <4 x float> undef, float %1, i32 0
@@ -88,7 +89,7 @@ define void @nonconst_v4f32() nounwind {
 }
 
 define void @nonconst_v2f64() nounwind {
-  ; MIPS32: nonconst_v2f64:
+  ; MIPS32-LABEL: nonconst_v2f64:
 
   %1 = load double *@f64
   %2 = insertelement <2 x double> undef, double %1, i32 0
@@ -102,7 +103,7 @@ define void @nonconst_v2f64() nounwind {
 }
 
 define float @extract_v4f32() nounwind {
-  ; MIPS32: extract_v4f32:
+  ; MIPS32-LABEL: extract_v4f32:
 
   %1 = load <4 x float>* @v4f32
   ; MIPS32-DAG: ld.w [[R1:\$w[0-9]+]],
@@ -120,7 +121,7 @@ define float @extract_v4f32() nounwind {
 }
 
 define float @extract_v4f32_elt0() nounwind {
-  ; MIPS32: extract_v4f32_elt0:
+  ; MIPS32-LABEL: extract_v4f32_elt0:
 
   %1 = load <4 x float>* @v4f32
   ; MIPS32-DAG: ld.w [[R1:\$w[0-9]+]],
@@ -138,7 +139,7 @@ define float @extract_v4f32_elt0() nounwind {
 }
 
 define float @extract_v4f32_elt2() nounwind {
-  ; MIPS32: extract_v4f32_elt2:
+  ; MIPS32-LABEL: extract_v4f32_elt2:
 
   %1 = load <4 x float>* @v4f32
   ; MIPS32-DAG: ld.w [[R1:\$w[0-9]+]],
@@ -155,8 +156,29 @@ define float @extract_v4f32_elt2() nounwind {
   ; MIPS32: .size extract_v4f32_elt2
 }
 
+define float @extract_v4f32_vidx() nounwind {
+  ; MIPS32-LABEL: extract_v4f32_vidx:
+
+  %1 = load <4 x float>* @v4f32
+  ; MIPS32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v4f32)(
+  ; MIPS32-DAG: ld.w [[R1:\$w[0-9]+]], 0([[PTR_V]])
+
+  %2 = fadd <4 x float> %1, %1
+  ; MIPS32-DAG: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+
+  %3 = load i32* @i32
+  ; MIPS32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
+  ; MIPS32-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
+
+  %4 = extractelement <4 x float> %2, i32 %3
+  ; MIPS32-DAG: splat.w $w0, [[R1]]{{\[}}[[IDX]]]
+
+  ret float %4
+  ; MIPS32: .size extract_v4f32_vidx
+}
+
 define double @extract_v2f64() nounwind {
-  ; MIPS32: extract_v2f64:
+  ; MIPS32-LABEL: extract_v2f64:
 
   %1 = load <2 x double>* @v2f64
   ; MIPS32-DAG: ld.d [[R1:\$w[0-9]+]],
@@ -179,7 +201,7 @@ define double @extract_v2f64() nounwind {
 }
 
 define double @extract_v2f64_elt0() nounwind {
-  ; MIPS32: extract_v2f64_elt0:
+  ; MIPS32-LABEL: extract_v2f64_elt0:
 
   %1 = load <2 x double>* @v2f64
   ; MIPS32-DAG: ld.d [[R1:\$w[0-9]+]],
@@ -199,8 +221,29 @@ define double @extract_v2f64_elt0() nounwind {
   ; MIPS32: .size extract_v2f64_elt0
 }
 
+define double @extract_v2f64_vidx() nounwind {
+  ; MIPS32-LABEL: extract_v2f64_vidx:
+
+  %1 = load <2 x double>* @v2f64
+  ; MIPS32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v2f64)(
+  ; MIPS32-DAG: ld.d [[R1:\$w[0-9]+]], 0([[PTR_V]])
+
+  %2 = fadd <2 x double> %1, %1
+  ; MIPS32-DAG: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+
+  %3 = load i32* @i32
+  ; MIPS32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
+  ; MIPS32-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
+
+  %4 = extractelement <2 x double> %2, i32 %3
+  ; MIPS32-DAG: splat.d $w0, [[R1]]{{\[}}[[IDX]]]
+
+  ret double %4
+  ; MIPS32: .size extract_v2f64_vidx
+}
+
 define void @insert_v4f32(float %a) nounwind {
-  ; MIPS32: insert_v4f32:
+  ; MIPS32-LABEL: insert_v4f32:
 
   %1 = load <4 x float>* @v4f32
   ; MIPS32-DAG: ld.w [[R1:\$w[0-9]+]],
@@ -217,7 +260,7 @@ define void @insert_v4f32(float %a) nounwind {
 }
 
 define void @insert_v2f64(double %a) nounwind {
-  ; MIPS32: insert_v2f64:
+  ; MIPS32-LABEL: insert_v2f64:
 
   %1 = load <2 x double>* @v2f64
   ; MIPS32-DAG: ld.d [[R1:\$w[0-9]+]],
@@ -232,3 +275,55 @@ define void @insert_v2f64(double %a) nounwind {
   ret void
   ; MIPS32: .size insert_v2f64
 }
+
+define void @insert_v4f32_vidx(float %a) nounwind {
+  ; MIPS32-LABEL: insert_v4f32_vidx:
+
+  %1 = load <4 x float>* @v4f32
+  ; MIPS32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v4f32)(
+  ; MIPS32-DAG: ld.w [[R1:\$w[0-9]+]], 0([[PTR_V]])
+
+  %2 = load i32* @i32
+  ; MIPS32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
+  ; MIPS32-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
+
+  %3 = insertelement <4 x float> %1, float %a, i32 %2
+  ; float argument passed in $f12
+  ; MIPS32-DAG: sll [[BIDX:\$[0-9]+]], [[IDX]], 2
+  ; MIPS32-DAG: sld.b [[R1]], [[R1]]{{\[}}[[BIDX]]]
+  ; MIPS32-DAG: insve.w [[R1]][0], $w12[0]
+  ; MIPS32-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]]
+  ; MIPS32-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]]
+
+  store <4 x float> %3, <4 x float>* @v4f32
+  ; MIPS32-DAG: st.w [[R1]]
+
+  ret void
+  ; MIPS32: .size insert_v4f32_vidx
+}
+
+define void @insert_v2f64_vidx(double %a) nounwind {
+  ; MIPS32-LABEL: insert_v2f64_vidx:
+
+  %1 = load <2 x double>* @v2f64
+  ; MIPS32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v2f64)(
+  ; MIPS32-DAG: ld.d [[R1:\$w[0-9]+]], 0([[PTR_V]])
+
+  %2 = load i32* @i32
+  ; MIPS32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
+  ; MIPS32-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
+
+  %3 = insertelement <2 x double> %1, double %a, i32 %2
+  ; double argument passed in $f12
+  ; MIPS32-DAG: sll [[BIDX:\$[0-9]+]], [[IDX]], 3
+  ; MIPS32-DAG: sld.b [[R1]], [[R1]]{{\[}}[[BIDX]]]
+  ; MIPS32-DAG: insve.d [[R1]][0], $w12[0]
+  ; MIPS32-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]]
+  ; MIPS32-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]]
+
+  store <2 x double> %3, <2 x double>* @v2f64
+  ; MIPS32-DAG: st.d [[R1]]
+
+  ret void
+  ; MIPS32: .size insert_v2f64_vidx
+}
diff --git a/test/CodeGen/Mips/optimize-fp-math.ll b/test/CodeGen/Mips/optimize-fp-math.ll
index 8b71dc4..7886f29 100644
--- a/test/CodeGen/Mips/optimize-fp-math.ll
+++ b/test/CodeGen/Mips/optimize-fp-math.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -march=mipsel < %s | FileCheck %s -check-prefix=32
+; RUN: llc -march=mips64el -mcpu=mips4 < %s | FileCheck %s -check-prefix=64
 ; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s -check-prefix=64
 
 ; 32-LABEL: test_sqrtf_float_:
diff --git a/test/CodeGen/Mips/remat-immed-load.ll b/test/CodeGen/Mips/remat-immed-load.ll
index d93964b..b53b156 100644
--- a/test/CodeGen/Mips/remat-immed-load.ll
+++ b/test/CodeGen/Mips/remat-immed-load.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -march=mipsel < %s | FileCheck %s -check-prefix=32
+; RUN: llc -march=mips64el -mcpu=mips4 -mattr=n64 < %s | FileCheck %s -check-prefix=64
 ; RUN: llc -march=mips64el -mcpu=mips64 -mattr=n64 < %s | FileCheck %s -check-prefix=64
 
 define void @f0() nounwind {
diff --git a/test/CodeGen/Mips/sint-fp-store_pattern.ll b/test/CodeGen/Mips/sint-fp-store_pattern.ll
index c44ea08..2735d78 100644
--- a/test/CodeGen/Mips/sint-fp-store_pattern.ll
+++ b/test/CodeGen/Mips/sint-fp-store_pattern.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -march=mipsel < %s | FileCheck %s -check-prefix=32
+; RUN: llc -march=mips64el -mcpu=mips4 < %s | FileCheck %s -check-prefix=64
 ; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s -check-prefix=64
 
 @gint_ = external global i32
diff --git a/test/CodeGen/Mips/start-asm-file.ll b/test/CodeGen/Mips/start-asm-file.ll
new file mode 100644
index 0000000..8872464
--- /dev/null
+++ b/test/CodeGen/Mips/start-asm-file.ll
@@ -0,0 +1,91 @@
+; Check the emission of directives at the start of an asm file.
+; This test is XFAILED until we fix the emission of '.option pic0' on
+; N32. At the moment we check if subtarget is Mips64 when we should be
+; checking the Subtarget's ABI.
+
+; ### O32 ABI ###
+; RUN: llc -filetype=asm -mtriple mips-unknown-linux -mcpu=mips32 \
+; RUN: -relocation-model=static %s -o - | \
+; RUN:   FileCheck -check-prefix=CHECK-STATIC-O32 -check-prefix=CHECK-STATIC-O32-NLEGACY %s
+
+; RUN: llc -filetype=asm -mtriple mips-unknown-linux -mcpu=mips32 \
+; RUN: -relocation-model=pic %s -o - | \
+; RUN:   FileCheck -check-prefix=CHECK-PIC-O32 -check-prefix=CHECK-PIC-O32-NLEGACY %s
+
+; RUN: llc -filetype=asm -mtriple mips-unknown-linux -mcpu=mips32 \
+; RUN: -relocation-model=static -mattr=+nan2008 %s -o - | \
+; RUN:   FileCheck -check-prefix=CHECK-STATIC-O32 -check-prefix=CHECK-STATIC-O32-N2008 %s
+
+; RUN: llc -filetype=asm -mtriple mips-unknown-linux -mcpu=mips32 \
+; RUN: -relocation-model=pic -mattr=+nan2008 %s -o - | \
+; RUN:   FileCheck -check-prefix=CHECK-PIC-O32 -check-prefix=CHECK-PIC-O32-N2008 %s
+
+; ### N32 ABI ###
+; RUN: llc -filetype=asm -mtriple mips64-unknown-linux -mcpu=mips64 \
+; RUN: -relocation-model=static -mattr=-n64,+n32 %s -o - | \
+; RUN:   FileCheck -check-prefix=CHECK-STATIC-N32 -check-prefix=CHECK-STATIC-N32-NLEGACY %s
+
+; RUN: llc -filetype=asm -mtriple mips64-unknown-linux -mcpu=mips64 \
+; RUN: -relocation-model=pic -mattr=-n64,+n32 %s -o - | \
+; RUN:   FileCheck -check-prefix=CHECK-PIC-N32 -check-prefix=CHECK-PIC-N32-NLEGACY %s
+
+; RUN: llc -filetype=asm -mtriple mips64-unknown-linux -mcpu=mips64 \
+; RUN: -relocation-model=static -mattr=-n64,+n32,+nan2008 %s -o - | \
+; RUN:   FileCheck -check-prefix=CHECK-STATIC-N32 -check-prefix=CHECK-STATIC-N32-N2008 %s
+
+; RUN: llc -filetype=asm -mtriple mips64-unknown-linux -mcpu=mips64 \
+; RUN: -relocation-model=pic -mattr=-n64,+n32,+nan2008 %s -o - | \
+; RUN:   FileCheck -check-prefix=CHECK-PIC-N32 -check-prefix=CHECK-PIC-N32-N2008 %s
+
+; ### N64 ABI ###
+; RUN: llc -filetype=asm -mtriple mips64-unknown-linux -mcpu=mips64 \
+; RUN: -relocation-model=static -mattr=+n64 %s -o - | \
+; RUN:   FileCheck -check-prefix=CHECK-STATIC-N64 -check-prefix=CHECK-STATIC-N64-NLEGACY %s
+
+; RUN: llc -filetype=asm -mtriple mips64-unknown-linux -mcpu=mips64 \
+; RUN: -relocation-model=pic -mattr=+n64 %s -o - | \
+; RUN:   FileCheck -check-prefix=CHECK-PIC-N64 -check-prefix=CHECK-PIC-N64-NLEGACY %s
+
+; RUN: llc -filetype=asm -mtriple mips64-unknown-linux -mcpu=mips64 \
+; RUN: -relocation-model=static -mattr=+n64,+nan2008 %s -o - | \
+; RUN:   FileCheck -check-prefix=CHECK-STATIC-N64 -check-prefix=CHECK-STATIC-N64-N2008 %s
+
+; RUN: llc -filetype=asm -mtriple mips64-unknown-linux -mcpu=mips64 \
+; RUN: -relocation-model=pic -mattr=+n64,+nan2008 %s -o - | \
+; RUN:   FileCheck -check-prefix=CHECK-PIC-N64 -check-prefix=CHECK-PIC-N64-N2008 %s
+
+; CHECK-STATIC-O32: .abicalls
+; CHECK-STATIC-O32: .option pic0
+; CHECK-STATIC-O32: .section .mdebug.abi32
+; CHECK-STATIC-O32-NLEGACY: .nan legacy
+; CHECK-STATIC-O32-N2008: .nan 2008
+
+; CHECK-PIC-O32: .abicalls
+; CHECK-PIC-O32-NOT: .option pic0
+; CHECK-PIC-O32: .section .mdebug.abi32
+; CHECK-PIC-O32-NLEGACY: .nan legacy
+; CHECK-PIC-O32-N2008: .nan 2008
+
+; CHECK-STATIC-N32: .abicalls
+; CHECK-STATIC-N32: .option pic0
+; CHECK-STATIC-N32: .section .mdebug.abiN32
+; CHECK-STATIC-N32-NLEGACY: .nan legacy
+; CHECK-STATIC-N32-N2008: .nan 2008
+
+; CHECK-PIC-N32: .abicalls
+; CHECK-PIC-N32-NOT: .option pic0
+; CHECK-PIC-N32: .section .mdebug.abiN32
+; CHECK-PIC-N32-NLEGACY: .nan legacy
+; CHECK-PIC-N32-N2008: .nan 2008
+
+; CHECK-STATIC-N64: .abicalls
+; CHECK-STATIC-N64-NOT: .option pic0
+; CHECK-STATIC-N64: .section .mdebug.abi64
+; CHECK-STATIC-N64-NLEGACY: .nan legacy
+; CHECK-STATIC-N64-N2008: .nan 2008
+
+; CHECK-PIC-N64: .abicalls
+; CHECK-PIC-N64-NOT: .option pic0
+; CHECK-PIC-N64: .section .mdebug.abi64
+; CHECK-PIC-N64-NLEGACY: .nan legacy
+; CHECK-PIC-N64-N2008: .nan 2008
diff --git a/test/CodeGen/Mips/tls-alias.ll b/test/CodeGen/Mips/tls-alias.ll
index 3c81054..80fbe87 100644
--- a/test/CodeGen/Mips/tls-alias.ll
+++ b/test/CodeGen/Mips/tls-alias.ll
@@ -5,6 +5,6 @@
 
 define i32* @zed() {
 ; CHECK-DAG: __tls_get_addr
-; CHECK-DAG: %tlsgd(bar)
+; CHECK-DAG: %tlsldm(bar)
        ret i32* @bar
 }
diff --git a/test/CodeGen/Mips/unalignedload.ll b/test/CodeGen/Mips/unalignedload.ll
index 19f3af7..2002b1c 100644
--- a/test/CodeGen/Mips/unalignedload.ll
+++ b/test/CodeGen/Mips/unalignedload.ll
@@ -1,5 +1,9 @@
-; RUN: llc  < %s -march=mipsel  | FileCheck %s -check-prefix=CHECK-EL
-; RUN: llc  < %s -march=mips    | FileCheck %s -check-prefix=CHECK-EB
+; RUN: llc  < %s -march=mipsel -mcpu=mips32   | FileCheck %s -check-prefix=ALL -check-prefix=ALL-EL -check-prefix=MIPS32-EL
+; RUN: llc  < %s -march=mips   -mcpu=mips32   | FileCheck %s -check-prefix=ALL -check-prefix=ALL-EB -check-prefix=MIPS32-EB
+; RUN: llc  < %s -march=mipsel -mcpu=mips32r2 | FileCheck %s -check-prefix=ALL -check-prefix=ALL-EL -check-prefix=MIPS32-EL
+; RUN: llc  < %s -march=mips   -mcpu=mips32r2 | FileCheck %s -check-prefix=ALL -check-prefix=ALL-EB -check-prefix=MIPS32-EB
+; RUN: llc  < %s -march=mipsel -mcpu=mips32r6 | FileCheck %s -check-prefix=ALL -check-prefix=ALL-EL -check-prefix=MIPS32R6-EL
+; RUN: llc  < %s -march=mips   -mcpu=mips32r6 | FileCheck %s -check-prefix=ALL -check-prefix=ALL-EB -check-prefix=MIPS32R6-EB
 %struct.S2 = type { %struct.S1, %struct.S1 }
 %struct.S1 = type { i8, i8 }
 %struct.S4 = type { [7 x i8] }
@@ -7,21 +11,71 @@
 @s2 = common global %struct.S2 zeroinitializer, align 1
 @s4 = common global %struct.S4 zeroinitializer, align 1
 
-define void @foo1() nounwind {
+define void @bar1() nounwind {
 entry:
-; CHECK-EL-DAG: lbu ${{[0-9]+}}, 2($[[R0:[0-9]+]])
-; CHECK-EL-DAG: lbu ${{[0-9]+}}, 3($[[R0]])
-; CHECK-EL:     jalr
-; CHECK-EL-DAG: lwl $[[R1:[0-9]+]], 3($[[R2:[0-9]+]])
-; CHECK-EL-DAG: lwr $[[R1]], 0($[[R2]])
-
-; CHECK-EB-DAG: lbu ${{[0-9]+}}, 3($[[R0:[0-9]+]])
-; CHECK-EB-DAG: lbu ${{[0-9]+}}, 2($[[R0]])
-; CHECK-EB:     jalr
-; CHECK-EB-DAG: lwl $[[R1:[0-9]+]], 0($[[R2:[0-9]+]])
-; CHECK-EB-DAG: lwr $[[R1]], 3($[[R2]])
+; ALL-LABEL: bar1:
+
+; ALL-DAG:       lw $[[R0:[0-9]+]], %got(s2)(
+
+; MIPS32-EL-DAG: lbu $[[PART1:[0-9]+]], 2($[[R0]])
+; MIPS32-EL-DAG: lbu $[[PART2:[0-9]+]], 3($[[R0]])
+; MIPS32-EL-DAG: sll $[[T0:[0-9]+]], $[[PART2]], 8
+; MIPS32-EL-DAG: or  $4, $[[T0]], $[[PART1]]
+
+; MIPS32-EB-DAG: lbu $[[PART1:[0-9]+]], 2($[[R0]])
+; MIPS32-EB-DAG: lbu $[[PART2:[0-9]+]], 3($[[R0]])
+; MIPS32-EB-DAG: sll $[[T0:[0-9]+]], $[[PART1]], 8
+; MIPS32-EB-DAG: or  $[[T1:[0-9]+]], $[[T0]], $[[PART2]]
+; MIPS32-EB-DAG: sll $4, $[[T1]], 16
+
+; MIPS32R6-DAG:  lhu $[[PART1:[0-9]+]], 2($[[R0]])
 
   tail call void @foo2(%struct.S1* byval getelementptr inbounds (%struct.S2* @s2, i32 0, i32 1)) nounwind
+  ret void
+}
+
+define void @bar2() nounwind {
+entry:
+; ALL-LABEL: bar2:
+
+; ALL-DAG:       lw $[[R2:[0-9]+]], %got(s4)(
+
+; MIPS32-EL-DAG: lwl $[[R1:4]], 3($[[R2]])
+; MIPS32-EL-DAG: lwr $[[R1]], 0($[[R2]])
+; MIPS32-EL-DAG: lbu $[[T0:[0-9]+]], 4($[[R2]])
+; MIPS32-EL-DAG: lbu $[[T1:[0-9]+]], 5($[[R2]])
+; MIPS32-EL-DAG: lbu $[[T2:[0-9]+]], 6($[[R2]])
+; MIPS32-EL-DAG: sll $[[T3:[0-9]+]], $[[T1]], 8
+; MIPS32-EL-DAG: or  $[[T4:[0-9]+]], $[[T3]], $[[T0]]
+; MIPS32-EL-DAG: sll $[[T5:[0-9]+]], $[[T2]], 16
+; MIPS32-EL-DAG: or  $5, $[[T4]], $[[T5]]
+
+; MIPS32-EB-DAG: lwl $[[R1:4]], 0($[[R2]])
+; MIPS32-EB-DAG: lwr $[[R1]], 3($[[R2]])
+; MIPS32-EB-DAG: lbu $[[T0:[0-9]+]], 4($[[R2]])
+; MIPS32-EB-DAG: lbu $[[T1:[0-9]+]], 5($[[R2]])
+; MIPS32-EB-DAG: lbu $[[T2:[0-9]+]], 6($[[R2]])
+; MIPS32-EB-DAG: sll $[[T3:[0-9]+]], $[[T0]], 8
+; MIPS32-EB-DAG: or  $[[T4:[0-9]+]], $[[T3]], $[[T1]]
+; MIPS32-EB-DAG: sll $[[T5:[0-9]+]], $[[T4]], 16
+; MIPS32-EB-DAG: sll $[[T6:[0-9]+]], $[[T2]], 8
+; MIPS32-EB-DAG: or  $5, $[[T5]], $[[T6]]
+
+; FIXME: We should be able to do better than this using lhu
+; MIPS32R6-EL-DAG: lw $4, 0($[[R2]])
+; MIPS32R6-EL-DAG: lhu $[[T0:[0-9]+]], 4($[[R2]])
+; MIPS32R6-EL-DAG: lbu $[[T1:[0-9]+]], 6($[[R2]])
+; MIPS32R6-EL-DAG: sll $[[T2:[0-9]+]], $[[T1]], 16
+; MIPS32R6-EL-DAG: or  $5, $[[T0]], $[[T2]]
+
+; FIXME: We should be able to do better than this using lhu
+; MIPS32R6-EB-DAG: lw $4, 0($[[R2]])
+; MIPS32R6-EB-DAG: lhu $[[T0:[0-9]+]], 4($[[R2]])
+; MIPS32R6-EB-DAG: lbu $[[T1:[0-9]+]], 6($[[R2]])
+; MIPS32R6-EB-DAG: sll $[[T2:[0-9]+]], $[[T0]], 16
+; MIPS32R6-EB-DAG: sll $[[T3:[0-9]+]], $[[T1]], 8
+; MIPS32R6-EB-DAG: or  $5, $[[T2]], $[[T3]]
+
   tail call void @foo4(%struct.S4* byval @s4) nounwind
   ret void
 }
diff --git a/test/CodeGen/NVPTX/access-non-generic.ll b/test/CodeGen/NVPTX/access-non-generic.ll
new file mode 100644
index 0000000..0622aa3
--- /dev/null
+++ b/test/CodeGen/NVPTX/access-non-generic.ll
@@ -0,0 +1,91 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix PTX
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix PTX
+; RUN: opt < %s -S -nvptx-favor-non-generic -dce | FileCheck %s --check-prefix IR
+
+@array = internal addrspace(3) global [10 x float] zeroinitializer, align 4
+@scalar = internal addrspace(3) global float 0.000000e+00, align 4
+
+; Verifies nvptx-favor-non-generic correctly optimizes generic address space
+; usage to non-generic address space usage for the patterns we claim to handle:
+; 1. load cast
+; 2. store cast
+; 3. load gep cast
+; 4. store gep cast
+; gep and cast can be an instruction or a constant expression. This function
+; tries all possible combinations.
+define float @ld_st_shared_f32(i32 %i, float %v) {
+; IR-LABEL: @ld_st_shared_f32
+; IR-NOT: addrspacecast
+; PTX-LABEL: ld_st_shared_f32(
+  ; load cast
+  %1 = load float* addrspacecast (float addrspace(3)* @scalar to float*), align 4
+; PTX: ld.shared.f32 %f{{[0-9]+}}, [scalar];
+  ; store cast
+  store float %v, float* addrspacecast (float addrspace(3)* @scalar to float*), align 4
+; PTX: st.shared.f32 [scalar], %f{{[0-9]+}};
+  ; use syncthreads to disable optimizations across components
+  call void @llvm.cuda.syncthreads()
+; PTX: bar.sync 0;
+
+  ; cast; load
+  %2 = addrspacecast float addrspace(3)* @scalar to float*
+  %3 = load float* %2, align 4
+; PTX: ld.shared.f32 %f{{[0-9]+}}, [scalar];
+  ; cast; store
+  store float %v, float* %2, align 4
+; PTX: st.shared.f32 [scalar], %f{{[0-9]+}};
+  call void @llvm.cuda.syncthreads()
+; PTX: bar.sync 0;
+
+  ; load gep cast
+  %4 = load float* getelementptr inbounds ([10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i32 0, i32 5), align 4
+; PTX: ld.shared.f32 %f{{[0-9]+}}, [array+20];
+  ; store gep cast
+  store float %v, float* getelementptr inbounds ([10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i32 0, i32 5), align 4
+; PTX: st.shared.f32 [array+20], %f{{[0-9]+}};
+  call void @llvm.cuda.syncthreads()
+; PTX: bar.sync 0;
+
+  ; gep cast; load
+  %5 = getelementptr inbounds [10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i32 0, i32 5
+  %6 = load float* %5, align 4
+; PTX: ld.shared.f32 %f{{[0-9]+}}, [array+20];
+  ; gep cast; store
+  store float %v, float* %5, align 4
+; PTX: st.shared.f32 [array+20], %f{{[0-9]+}};
+  call void @llvm.cuda.syncthreads()
+; PTX: bar.sync 0;
+
+  ; cast; gep; load
+  %7 = addrspacecast [10 x float] addrspace(3)* @array to [10 x float]*
+  %8 = getelementptr inbounds [10 x float]* %7, i32 0, i32 %i
+  %9 = load float* %8, align 4
+; PTX: ld.shared.f32 %f{{[0-9]+}}, [%{{(r|rl|rd)[0-9]+}}];
+  ; cast; gep; store
+  store float %v, float* %8, align 4
+; PTX: st.shared.f32 [%{{(r|rl|rd)[0-9]+}}], %f{{[0-9]+}};
+  call void @llvm.cuda.syncthreads()
+; PTX: bar.sync 0;
+
+  %sum2 = fadd float %1, %3
+  %sum3 = fadd float %sum2, %4
+  %sum4 = fadd float %sum3, %6
+  %sum5 = fadd float %sum4, %9
+  ret float %sum5
+}
+
+; Verifies nvptx-favor-non-generic keeps addrspacecasts between pointers of
+; different element types.
+define i32 @ld_int_from_float() {
+; IR-LABEL: @ld_int_from_float
+; IR: addrspacecast
+; PTX-LABEL: ld_int_from_float(
+; PTX: cvta.shared.u{{(32|64)}}
+  %1 = load i32* addrspacecast(float addrspace(3)* @scalar to i32*), align 4
+  ret i32 %1
+}
+
+declare void @llvm.cuda.syncthreads() #3
+
+attributes #3 = { noduplicate nounwind }
+
diff --git a/test/CodeGen/NVPTX/addrspacecast-gvar.ll b/test/CodeGen/NVPTX/addrspacecast-gvar.ll
new file mode 100644
index 0000000..6afbdb8
--- /dev/null
+++ b/test/CodeGen/NVPTX/addrspacecast-gvar.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+; CHECK: .visible .global .align 4 .u32 g = 42;
+; CHECK: .visible .global .align 4 .u32 g2 = generic(g);
+; CHECK: .visible .global .align 4 .u32 g3 = g;
+
+@g = addrspace(1) global i32 42
+@g2 = addrspace(1) global i32* addrspacecast (i32 addrspace(1)* @g to i32*)
+@g3 = addrspace(1) global i32 addrspace(1)* @g
diff --git a/test/CodeGen/NVPTX/addrspacecast.ll b/test/CodeGen/NVPTX/addrspacecast.ll
index 98ea655..03b9a98 100644
--- a/test/CodeGen/NVPTX/addrspacecast.ll
+++ b/test/CodeGen/NVPTX/addrspacecast.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s -check-prefix=PTX32
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s -check-prefix=PTX64
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 -disable-nvptx-favor-non-generic | FileCheck %s -check-prefix=PTX32
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -disable-nvptx-favor-non-generic | FileCheck %s -check-prefix=PTX64
 
 
 define i32 @conv1(i32 addrspace(1)* %ptr) {
diff --git a/test/CodeGen/NVPTX/local-stack-frame.ll b/test/CodeGen/NVPTX/local-stack-frame.ll
index 178dff1..c0d7d1c 100644
--- a/test/CodeGen/NVPTX/local-stack-frame.ll
+++ b/test/CodeGen/NVPTX/local-stack-frame.ll
@@ -3,16 +3,16 @@
 
 ; Ensure we access the local stack properly
 
-; PTX32:        mov.u32         %r{{[0-9]+}}, __local_depot{{[0-9]+}};
-; PTX32:        cvta.local.u32  %SP, %r{{[0-9]+}};
-; PTX32:        ld.param.u32    %r{{[0-9]+}}, [foo_param_0];
-; PTX32:        st.u32  [%SP+0], %r{{[0-9]+}};
-; PTX64:        mov.u64         %rl{{[0-9]+}}, __local_depot{{[0-9]+}};
-; PTX64:        cvta.local.u64  %SP, %rl{{[0-9]+}};
-; PTX64:        ld.param.u32    %r{{[0-9]+}}, [foo_param_0];
-; PTX64:        st.u32  [%SP+0], %r{{[0-9]+}};
+; PTX32:        mov.u32          %r{{[0-9]+}}, __local_depot{{[0-9]+}};
+; PTX32:        cvta.local.u32   %SP, %r{{[0-9]+}};
+; PTX32:        ld.param.u32     %r{{[0-9]+}}, [foo_param_0];
+; PTX32:        st.volatile.u32  [%SP+0], %r{{[0-9]+}};
+; PTX64:        mov.u64          %rl{{[0-9]+}}, __local_depot{{[0-9]+}};
+; PTX64:        cvta.local.u64   %SP, %rl{{[0-9]+}};
+; PTX64:        ld.param.u32     %r{{[0-9]+}}, [foo_param_0];
+; PTX64:        st.volatile.u32  [%SP+0], %r{{[0-9]+}};
 define void @foo(i32 %a) {
   %local = alloca i32, align 4
-  store i32 %a, i32* %local
+  store volatile i32 %a, i32* %local
   ret void
 }
diff --git a/test/CodeGen/NVPTX/surf-read.ll b/test/CodeGen/NVPTX/surf-read.ll
new file mode 100644
index 0000000..a69d03e
--- /dev/null
+++ b/test/CodeGen/NVPTX/surf-read.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+target triple = "nvptx-unknown-nvcl"
+
+declare i32 @llvm.nvvm.suld.1d.i32.trap(i64, i32)
+
+; CHECK: .entry foo
+define void @foo(i64 %img, float* %red, i32 %idx) {
+; CHECK: suld.b.1d.b32.trap {%r[[RED:[0-9]+]]}, [foo_param_0, {%r{{[0-9]+}}}]
+  %val = tail call i32 @llvm.nvvm.suld.1d.i32.trap(i64 %img, i32 %idx)
+; CHECK: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]]
+  %ret = sitofp i32 %val to float
+; CHECK: st.f32 [%r{{[0-9]+}}], %f[[REDF]]
+  store float %ret, float* %red
+  ret void
+}
+
+!nvvm.annotations = !{!1, !2}
+!1 = metadata !{void (i64, float*, i32)* @foo, metadata !"kernel", i32 1}
+!2 = metadata !{void (i64, float*, i32)* @foo, metadata !"rdwrimage", i32 0}
diff --git a/test/CodeGen/NVPTX/surf-write.ll b/test/CodeGen/NVPTX/surf-write.ll
new file mode 100644
index 0000000..880231f
--- /dev/null
+++ b/test/CodeGen/NVPTX/surf-write.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+target triple = "nvptx-unknown-nvcl"
+
+declare void @llvm.nvvm.sust.b.1d.i32.trap(i64, i32, i32)
+
+; CHECK: .entry foo
+define void @foo(i64 %img, i32 %val, i32 %idx) {
+; CHECK: sust.b.1d.b32.trap [foo_param_0, {%r{{[0-9]+}}}], {%r{{[0-9]+}}}
+  tail call void @llvm.nvvm.sust.b.1d.i32.trap(i64 %img, i32 %idx, i32 %val)
+  ret void
+}
+
+!nvvm.annotations = !{!1, !2}
+!1 = metadata !{void (i64, i32, i32)* @foo, metadata !"kernel", i32 1}
+!2 = metadata !{void (i64, i32, i32)* @foo, metadata !"wroimage", i32 0}
diff --git a/test/CodeGen/NVPTX/tex-read.ll b/test/CodeGen/NVPTX/tex-read.ll
new file mode 100644
index 0000000..291060b
--- /dev/null
+++ b/test/CodeGen/NVPTX/tex-read.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+target triple = "nvptx-unknown-nvcl"
+
+declare { float, float, float, float } @llvm.nvvm.tex.1d.v4f32.i32(i64, i64, i32)
+
+; CHECK: .entry foo
+define void @foo(i64 %img, i64 %sampler, float* %red, i32 %idx) {
+; CHECK: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [foo_param_0, foo_param_1, {%r{{[0-9]+}}}]
+  %val = tail call { float, float, float, float } @llvm.nvvm.tex.1d.v4f32.i32(i64 %img, i64 %sampler, i32 %idx)
+  %ret = extractvalue { float, float, float, float } %val, 0
+; CHECK: st.f32 [%r{{[0-9]+}}], %f[[RED]]
+  store float %ret, float* %red
+  ret void
+}
+
+!nvvm.annotations = !{!1, !2, !3}
+!1 = metadata !{void (i64, i64, float*, i32)* @foo, metadata !"kernel", i32 1}
+!2 = metadata !{void (i64, i64, float*, i32)* @foo, metadata !"rdoimage", i32 0}
+!3 = metadata !{void (i64, i64, float*, i32)* @foo, metadata !"sampler", i32 1}
diff --git a/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll b/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll
index ccf5297..df83f8b 100644
--- a/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll
+++ b/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -mcpu=g5 < %s | FileCheck %s
+; RUN: llc -mcpu=g5 -addr-sink-using-gep=1 < %s | FileCheck %s
 ;; Formerly crashed, see PR 1508
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc64-apple-darwin8"
diff --git a/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll b/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll
index 00a402e..8802b97 100644
--- a/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll
+++ b/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll
@@ -1,6 +1,5 @@
 ; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vadduhm
 ; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vsubuhm
-; XFAIL: *
 
 define <4 x i32> @test() nounwind {
 	ret <4 x i32> < i32 4293066722, i32 4293066722, i32 4293066722, i32 4293066722>
diff --git a/test/CodeGen/PowerPC/aa-tbaa.ll b/test/CodeGen/PowerPC/aa-tbaa.ll
index d7f80fa..1939841 100644
--- a/test/CodeGen/PowerPC/aa-tbaa.ll
+++ b/test/CodeGen/PowerPC/aa-tbaa.ll
@@ -1,4 +1,4 @@
-; RUN: llc -enable-misched -misched=shuffle -enable-aa-sched-mi -post-RA-scheduler=0 -mcpu=ppc64 < %s | FileCheck %s
+; RUN: llc -enable-misched -misched=shuffle -enable-aa-sched-mi -use-tbaa-in-sched-mi=0 -post-RA-scheduler=0 -mcpu=ppc64 < %s | FileCheck %s
 
 ; REQUIRES: asserts
 ; -misched=shuffle is NDEBUG only!
diff --git a/test/CodeGen/PowerPC/alias.ll b/test/CodeGen/PowerPC/alias.ll
new file mode 100644
index 0000000..86e4114
--- /dev/null
+++ b/test/CodeGen/PowerPC/alias.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -code-model=medium| FileCheck --check-prefix=CHECK --check-prefix=MEDIUM %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -code-model=large | FileCheck --check-prefix=CHECK --check-prefix=LARGE %s
+
+@foo = global i32 42
+@fooa = alias i32* @foo
+
+@foo2 = global i64 42
+@foo2a = alias i64* @foo2
+
+; CHECK-LABEL: bar:
+define i32 @bar() {
+; MEDIUM: addis 3, 2, fooa@toc@ha
+; LARGE: addis 3, 2, .LC1@toc@ha
+  %a = load i32* @fooa
+  ret i32 %a
+}
+
+; CHECK-LABEL: bar2:
+define i64 @bar2() {
+; MEDIUM: addis 3, 2, foo2a@toc@ha
+; MEDIUM: addi 3, 3, foo2a@toc@l
+; LARGE: addis 3, 2, .LC3@toc@ha
+  %a = load i64* @foo2a
+  ret i64 %a
+}
+
+; LARGE: .LC1:
+; LARGE-NEXT: .tc fooa[TC],fooa
+
+; LARGE: .LC3:
+; LARGE-NEXT: .tc foo2a[TC],foo2a
diff --git a/test/CodeGen/PowerPC/cc.ll b/test/CodeGen/PowerPC/cc.ll
new file mode 100644
index 0000000..f92121b
--- /dev/null
+++ b/test/CodeGen/PowerPC/cc.ll
@@ -0,0 +1,70 @@
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i64 @test1(i64 %a, i64 %b) {
+entry:
+  %c = icmp eq i64 %a, %b
+  br label %foo
+
+foo:
+  call { i64, i64 } asm sideeffect "sc", "={r0},={r3},{r0},~{cr0},~{cr1},~{cr2},~{cr3},~{cr4},~{cr5},~{cr6},~{cr7}" (i64 %a)
+  br i1 %c, label %bar, label %end
+
+bar:
+  ret i64 %b
+
+end:
+  ret i64 %a
+
+; CHECK-LABEL: @test1
+; CHECK: mfcr [[REG1:[0-9]+]]
+; CHECK-DAG: cmpd
+; CHECK-DAG: mfocrf [[REG2:[0-9]+]],
+; CHECK-DAG: stw [[REG1]], 8(1)
+; CHECK-DAG: stw [[REG2]], -4(1)
+
+; CHECK: sc
+; CHECK: lwz [[REG3:[0-9]+]], -4(1)
+; CHECK: mtocrf 128, [[REG3]]
+
+; CHECK: lwz [[REG4:[0-9]+]], 8(1)
+; CHECK-DAG: mtocrf 32, [[REG4]]
+; CHECK-DAG: mtocrf 16, [[REG4]]
+; CHECK-DAG: mtocrf 8, [[REG4]]
+; CHECK: blr
+}
+
+define i64 @test2(i64 %a, i64 %b) {
+entry:
+  %c = icmp eq i64 %a, %b
+  br label %foo
+
+foo:
+  call { i64, i64 } asm sideeffect "sc", "={r0},={r3},{r0},~{cc}" (i64 %a)
+  br i1 %c, label %bar, label %end
+
+bar:
+  ret i64 %b
+
+end:
+  ret i64 %a
+
+; CHECK-LABEL: @test2
+; CHECK: mfcr [[REG1:[0-9]+]]
+; CHECK-DAG: cmpd
+; CHECK-DAG: mfocrf [[REG2:[0-9]+]],
+; CHECK-DAG: stw [[REG1]], 8(1)
+; CHECK-DAG: stw [[REG2]], -4(1)
+
+; CHECK: sc
+; CHECK: lwz [[REG3:[0-9]+]], -4(1)
+; CHECK: mtocrf 128, [[REG3]]
+
+; CHECK: lwz [[REG4:[0-9]+]], 8(1)
+; CHECK-DAG: mtocrf 32, [[REG4]]
+; CHECK-DAG: mtocrf 16, [[REG4]]
+; CHECK-DAG: mtocrf 8, [[REG4]]
+; CHECK: blr
+}
+
diff --git a/test/CodeGen/PowerPC/ctrloop-le.ll b/test/CodeGen/PowerPC/ctrloop-le.ll
index 7b8185e..60b0536 100644
--- a/test/CodeGen/PowerPC/ctrloop-le.ll
+++ b/test/CodeGen/PowerPC/ctrloop-le.ll
@@ -2,6 +2,9 @@ target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "powerpc64-unknown-linux-gnu"
 ; RUN: llc < %s -march=ppc64 | FileCheck %s
 
+; XFAIL: *
+; SE needs improvement
+
 ; CHECK: test_pos1_ir_sle
 ; CHECK: bdnz
 ; a < b
diff --git a/test/CodeGen/PowerPC/ctrloop-lt.ll b/test/CodeGen/PowerPC/ctrloop-lt.ll
index eaab61a..a9dc42c 100644
--- a/test/CodeGen/PowerPC/ctrloop-lt.ll
+++ b/test/CodeGen/PowerPC/ctrloop-lt.ll
@@ -2,6 +2,9 @@ target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "powerpc64-unknown-linux-gnu"
 ; RUN: llc < %s -march=ppc64 | FileCheck %s
 
+; XFAIL: *
+; SE needs improvement
+
 ; CHECK: test_pos1_ir_slt
 ; CHECK: bdnz
 ; a < b
diff --git a/test/CodeGen/PowerPC/ctrloop-sh.ll b/test/CodeGen/PowerPC/ctrloop-sh.ll
new file mode 100644
index 0000000..d8e6fc7
--- /dev/null
+++ b/test/CodeGen/PowerPC/ctrloop-sh.ll
@@ -0,0 +1,72 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-m:e-p:32:32-i128:64-n32"
+target triple = "powerpc-ellcc-linux"
+
+; Function Attrs: nounwind
+define void @foo1(i128* %a, i128* readonly %b, i128* readonly %c) #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %0 = load i128* %b, align 16
+  %1 = load i128* %c, align 16
+  %shl = shl i128 %0, %1
+  store i128 %shl, i128* %a, align 16
+  %inc = add nsw i32 %i.02, 1
+  %exitcond = icmp eq i32 %inc, 2048
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+
+; CHECK-LABEL: @foo1
+; CHECK-NOT: mtctr
+}
+
+; Function Attrs: nounwind
+define void @foo2(i128* %a, i128* readonly %b, i128* readonly %c) #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %0 = load i128* %b, align 16
+  %1 = load i128* %c, align 16
+  %shl = ashr i128 %0, %1
+  store i128 %shl, i128* %a, align 16
+  %inc = add nsw i32 %i.02, 1
+  %exitcond = icmp eq i32 %inc, 2048
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+
+; CHECK-LABEL: @foo2
+; CHECK-NOT: mtctr
+}
+
+; Function Attrs: nounwind
+define void @foo3(i128* %a, i128* readonly %b, i128* readonly %c) #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %0 = load i128* %b, align 16
+  %1 = load i128* %c, align 16
+  %shl = lshr i128 %0, %1
+  store i128 %shl, i128* %a, align 16
+  %inc = add nsw i32 %i.02, 1
+  %exitcond = icmp eq i32 %inc, 2048
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+
+; CHECK-LABEL: @foo3
+; CHECK-NOT: mtctr
+}
+
+attributes #0 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/dbg.ll b/test/CodeGen/PowerPC/dbg.ll
index 0d6c4a6..6beea55 100644
--- a/test/CodeGen/PowerPC/dbg.ll
+++ b/test/CodeGen/PowerPC/dbg.ll
@@ -28,8 +28,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !10 = metadata !{i32 720911, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
 !11 = metadata !{i32 720911, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ]
 !12 = metadata !{i32 720932, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
-!13 = metadata !{metadata !14}
-!14 = metadata !{metadata !15, metadata !16}
+!13 = metadata !{metadata !15, metadata !16}
 !15 = metadata !{i32 721153, metadata !5, metadata !"argc", metadata !6, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
 !16 = metadata !{i32 721153, metadata !5, metadata !"argv", metadata !6, i32 33554433, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
 !17 = metadata !{i32 1, i32 14, metadata !5, null}
diff --git a/test/CodeGen/PowerPC/indexed-load.ll b/test/CodeGen/PowerPC/indexed-load.ll
new file mode 100644
index 0000000..59fc058
--- /dev/null
+++ b/test/CodeGen/PowerPC/indexed-load.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s | FileCheck %s
+
+; The SplitIndexingFromLoad tranformation exposed an isel backend bug.  This
+; testcase used to generate stwx 4, 3, 64.  stwx does not have an
+; immediate-offset format (note the 64) and it should not be matched.
+
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+%class.test = type { [64 x i8], [5 x i8] }
+
+; CHECK-LABEL: f:
+; CHECK-NOT: stwx {{[0-9]+}}, {{[0-9]+}}, 64
+define void @f(%class.test* %this) {
+entry:
+  %Subminor.i.i = getelementptr inbounds %class.test* %this, i64 0, i32 1
+  %0 = bitcast [5 x i8]* %Subminor.i.i to i40*
+  %bf.load2.i.i = load i40* %0, align 4
+  %bf.clear7.i.i = and i40 %bf.load2.i.i, -8589934592
+  store i40 %bf.clear7.i.i, i40* %0, align 4
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/mcm-10.ll b/test/CodeGen/PowerPC/mcm-10.ll
index b479559..c3ab747 100644
--- a/test/CodeGen/PowerPC/mcm-10.ll
+++ b/test/CodeGen/PowerPC/mcm-10.ll
@@ -18,7 +18,8 @@ entry:
 
 ; CHECK-LABEL: test_fn_static:
 ; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
-; CHECK: lwz {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
+; CHECK: lwa {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
+; CHECK-NOT: extsw
 ; CHECK: stw {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
 ; CHECK: .type [[VAR]],@object
 ; CHECK: .local [[VAR]]
diff --git a/test/CodeGen/PowerPC/mcm-11.ll b/test/CodeGen/PowerPC/mcm-11.ll
index c49e865..033045c 100644
--- a/test/CodeGen/PowerPC/mcm-11.ll
+++ b/test/CodeGen/PowerPC/mcm-11.ll
@@ -18,7 +18,8 @@ entry:
 
 ; CHECK-LABEL: test_file_static:
 ; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
-; CHECK: lwz {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
+; CHECK: lwa {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
+; CHECK-NOT: extsw
 ; CHECK: stw {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
 ; CHECK: .type [[VAR]],@object
 ; CHECK: .data
diff --git a/test/CodeGen/PowerPC/mcm-obj-2.ll b/test/CodeGen/PowerPC/mcm-obj-2.ll
index a6e9855..c42cf0c 100644
--- a/test/CodeGen/PowerPC/mcm-obj-2.ll
+++ b/test/CodeGen/PowerPC/mcm-obj-2.ll
@@ -22,7 +22,7 @@ entry:
 ; CHECK: Relocations [
 ; CHECK:   Section (2) .rela.text {
 ; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM2:[^ ]+]]
-; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO [[SYM2]]
+; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM2]]
 ; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO [[SYM2]]
 
 @gi = global i32 5, align 4
@@ -39,7 +39,7 @@ entry:
 ; accessing file-scope variable gi.
 ;
 ; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM3:[^ ]+]]
-; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO [[SYM3]]
+; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM3]]
 ; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO [[SYM3]]
 
 define double @test_double_const() nounwind {
diff --git a/test/CodeGen/PowerPC/named-reg-alloc-r0.ll b/test/CodeGen/PowerPC/named-reg-alloc-r0.ll
new file mode 100644
index 0000000..e683f99
--- /dev/null
+++ b/test/CodeGen/PowerPC/named-reg-alloc-r0.ll
@@ -0,0 +1,15 @@
+; RUN: not llc < %s -mtriple=powerpc-apple-darwin 2>&1 | FileCheck %s
+; RUN: not llc < %s -mtriple=powerpc-unknown-linux-gnu 2>&1 | FileCheck %s
+; RUN: not llc < %s -mtriple=powerpc64-unknown-linux-gnu 2>&1 | FileCheck %s
+
+define i32 @get_reg() nounwind {
+entry:
+; FIXME: Include an allocatable-specific error message
+; CHECK: Invalid register name global variable
+        %reg = call i32 @llvm.read_register.i32(metadata !0)
+  ret i32 %reg
+}
+
+declare i32 @llvm.read_register.i32(metadata) nounwind
+
+!0 = metadata !{metadata !"r0\00"}
diff --git a/test/CodeGen/PowerPC/named-reg-alloc-r1-64.ll b/test/CodeGen/PowerPC/named-reg-alloc-r1-64.ll
new file mode 100644
index 0000000..b047f9f
--- /dev/null
+++ b/test/CodeGen/PowerPC/named-reg-alloc-r1-64.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=powerpc64-apple-darwin 2>&1 | FileCheck %s --check-prefix=CHECK-DARWIN
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu 2>&1 | FileCheck %s
+
+define i64 @get_reg() nounwind {
+entry:
+  %reg = call i64 @llvm.read_register.i64(metadata !0)
+  ret i64 %reg
+
+; CHECK-LABEL: @get_reg
+; CHECK: mr 3, 1
+
+; CHECK-DARWIN-LABEL: @get_reg
+; CHECK-DARWIN: mr r3, r1
+}
+
+declare i64 @llvm.read_register.i64(metadata) nounwind
+
+!0 = metadata !{metadata !"r1\00"}
diff --git a/test/CodeGen/PowerPC/named-reg-alloc-r1.ll b/test/CodeGen/PowerPC/named-reg-alloc-r1.ll
new file mode 100644
index 0000000..9d0eb34
--- /dev/null
+++ b/test/CodeGen/PowerPC/named-reg-alloc-r1.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin 2>&1 | FileCheck %s --check-prefix=CHECK-DARWIN
+; RUN: llc < %s -mtriple=powerpc64-apple-darwin 2>&1 | FileCheck %s --check-prefix=CHECK-DARWIN
+; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu 2>&1 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu 2>&1 | FileCheck %s
+
+define i32 @get_reg() nounwind {
+entry:
+  %reg = call i32 @llvm.read_register.i32(metadata !0)
+  ret i32 %reg
+
+; CHECK-LABEL: @get_reg
+; CHECK: mr 3, 1
+
+; CHECK-DARWIN-LABEL: @get_reg
+; CHECK-DARWIN: mr r3, r1
+}
+
+declare i32 @llvm.read_register.i32(metadata) nounwind
+
+!0 = metadata !{metadata !"r1\00"}
diff --git a/test/CodeGen/PowerPC/named-reg-alloc-r13-64.ll b/test/CodeGen/PowerPC/named-reg-alloc-r13-64.ll
new file mode 100644
index 0000000..df5085b
--- /dev/null
+++ b/test/CodeGen/PowerPC/named-reg-alloc-r13-64.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=powerpc64-apple-darwin 2>&1 | FileCheck %s --check-prefix=CHECK-DARWIN
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu 2>&1 | FileCheck %s
+
+define i64 @get_reg() nounwind {
+entry:
+  %reg = call i64 @llvm.read_register.i64(metadata !0)
+  ret i64 %reg
+
+; CHECK-LABEL: @get_reg
+; CHECK: mr 3, 13
+
+; CHECK-DARWIN-LABEL: @get_reg
+; CHECK-DARWIN: mr r3, r13
+}
+
+declare i64 @llvm.read_register.i64(metadata) nounwind
+
+!0 = metadata !{metadata !"r13\00"}
diff --git a/test/CodeGen/PowerPC/named-reg-alloc-r13.ll b/test/CodeGen/PowerPC/named-reg-alloc-r13.ll
new file mode 100644
index 0000000..900ebb2
--- /dev/null
+++ b/test/CodeGen/PowerPC/named-reg-alloc-r13.ll
@@ -0,0 +1,18 @@
+; RUN: not llc < %s -mtriple=powerpc-apple-darwin 2>&1 | FileCheck %s --check-prefix=CHECK-DARWIN
+; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu 2>&1 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu 2>&1 | FileCheck %s
+
+define i32 @get_reg() nounwind {
+entry:
+; FIXME: Include an allocatable-specific error message
+; CHECK-DARWIN: Invalid register name global variable
+        %reg = call i32 @llvm.read_register.i32(metadata !0)
+  ret i32 %reg
+
+; CHECK-LABEL: @get_reg
+; CHECK: mr 3, 13
+}
+
+declare i32 @llvm.read_register.i32(metadata) nounwind
+
+!0 = metadata !{metadata !"r13\00"}
diff --git a/test/CodeGen/PowerPC/named-reg-alloc-r2-64.ll b/test/CodeGen/PowerPC/named-reg-alloc-r2-64.ll
new file mode 100644
index 0000000..0da33fa
--- /dev/null
+++ b/test/CodeGen/PowerPC/named-reg-alloc-r2-64.ll
@@ -0,0 +1,17 @@
+; RUN: not llc < %s -mtriple=powerpc64-apple-darwin 2>&1 | FileCheck %s --check-prefix=CHECK-DARWIN
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu 2>&1 | FileCheck %s
+
+define i64 @get_reg() nounwind {
+entry:
+; FIXME: Include an allocatable-specific error message
+; CHECK-DARWIN: Invalid register name global variable
+        %reg = call i64 @llvm.read_register.i64(metadata !0)
+  ret i64 %reg
+
+; CHECK-LABEL: @get_reg
+; CHECK: mr 3, 2
+}
+
+declare i64 @llvm.read_register.i64(metadata) nounwind
+
+!0 = metadata !{metadata !"r2\00"}
diff --git a/test/CodeGen/PowerPC/named-reg-alloc-r2.ll b/test/CodeGen/PowerPC/named-reg-alloc-r2.ll
new file mode 100644
index 0000000..51e7e3e
--- /dev/null
+++ b/test/CodeGen/PowerPC/named-reg-alloc-r2.ll
@@ -0,0 +1,18 @@
+; RUN: not llc < %s -mtriple=powerpc-apple-darwin 2>&1 | FileCheck %s --check-prefix=CHECK-DARWIN
+; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu 2>&1 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu 2>&1 | FileCheck %s
+
+define i32 @get_reg() nounwind {
+entry:
+; FIXME: Include an allocatable-specific error message
+; CHECK-DARWIN: Invalid register name global variable
+        %reg = call i32 @llvm.read_register.i32(metadata !0)
+  ret i32 %reg
+
+; CHECK-LABEL: @get_reg
+; CHECK: mr 3, 2
+}
+
+declare i32 @llvm.read_register.i32(metadata) nounwind
+
+!0 = metadata !{metadata !"r2\00"}
diff --git a/test/CodeGen/PowerPC/rlwimi-dyn-and.ll b/test/CodeGen/PowerPC/rlwimi-dyn-and.ll
new file mode 100644
index 0000000..e02801f
--- /dev/null
+++ b/test/CodeGen/PowerPC/rlwimi-dyn-and.ll
@@ -0,0 +1,48 @@
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i32 @test1() #0 {
+entry:
+  %conv67.reload = load i32* undef
+  %const = bitcast i32 65535 to i32
+  br label %next
+
+next:
+  %shl161 = shl nuw nsw i32 %conv67.reload, 15
+  %0 = load i8* undef, align 1
+  %conv169 = zext i8 %0 to i32
+  %shl170 = shl nuw nsw i32 %conv169, 7
+  %const_mat = add i32 %const, -32767
+  %shl161.masked = and i32 %shl161, %const_mat
+  %conv174 = or i32 %shl170, %shl161.masked
+  ret i32 %conv174
+
+; CHECK-LABEL: @test1
+; CHECK-NOT: rlwimi 3, {{[0-9]+}}, 15, 0, 16
+; CHECK: blr
+}
+
+define i32 @test2() #0 {
+entry:
+  %conv67.reload = load i32* undef
+  %const = bitcast i32 65535 to i32
+  br label %next
+
+next:
+  %shl161 = shl nuw nsw i32 %conv67.reload, 15
+  %0 = load i8* undef, align 1
+  %conv169 = zext i8 %0 to i32
+  %shl170 = shl nuw nsw i32 %conv169, 7
+  %shl161.masked = and i32 %shl161, 32768
+  %conv174 = or i32 %shl170, %shl161.masked
+  ret i32 %conv174
+
+; CHECK-LABEL: @test2
+; CHECK: slwi 3, {{[0-9]+}}, 7
+; CHECK: rlwimi 3, {{[0-9]+}}, 15, 16, 16
+; CHECK: blr
+}
+
+attributes #0 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/splat-bug.ll b/test/CodeGen/PowerPC/splat-bug.ll
new file mode 100644
index 0000000..4b5250b
--- /dev/null
+++ b/test/CodeGen/PowerPC/splat-bug.ll
@@ -0,0 +1,18 @@
+; RUN: llc -mcpu=ppc64 -O0 -fast-isel=false < %s | FileCheck %s
+
+; Checks for a previous bug where vspltisb/vaddubm were issued in place
+; of vsplitsh/vadduhm.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@a = external global <16 x i8>
+
+define void @foo() nounwind ssp {
+; CHECK: foo:
+  store <16 x i8> <i8 0, i8 16, i8 0, i8 16, i8 0, i8 16, i8 0, i8 16, i8 0, i8 16, i8 0, i8 16, i8 0, i8 16, i8 0, i8 16>, <16 x i8>* @a
+; CHECK: vspltish [[REG:[0-9]+]], 8
+; CHECK: vadduhm {{[0-9]+}}, [[REG]], [[REG]]
+  ret void
+}
+
diff --git a/test/CodeGen/R600/32-bit-local-address-space.ll b/test/CodeGen/R600/32-bit-local-address-space.ll
index fffaefe..7dec426 100644
--- a/test/CodeGen/R600/32-bit-local-address-space.ll
+++ b/test/CodeGen/R600/32-bit-local-address-space.ll
@@ -33,7 +33,7 @@ entry:
 
 ; CHECK-LABEL: @local_address_gep_const_offset
 ; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], s{{[0-9]+}}
-; CHECK: DS_READ_B32 v{{[0-9]+}}, [[VPTR]], 4,
+; CHECK: DS_READ_B32 v{{[0-9]+}}, [[VPTR]], 0x4,
 define void @local_address_gep_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
 entry:
   %0 = getelementptr i32 addrspace(3)* %in, i32 1
@@ -44,7 +44,7 @@ entry:
 
 ; Offset too large, can't fold into 16-bit immediate offset.
 ; CHECK-LABEL: @local_address_gep_large_const_offset
-; CHECK: S_ADD_I32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 65540
+; CHECK: S_ADD_I32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 0x10004
 ; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
 ; CHECK: DS_READ_B32 [[VPTR]]
 define void @local_address_gep_large_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
@@ -119,7 +119,7 @@ define void @local_address_gep_store(i32 addrspace(3)* %out, i32, i32 %val, i32
 ; CHECK-LABEL: @local_address_gep_const_offset_store
 ; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], s{{[0-9]+}}
 ; CHECK: V_MOV_B32_e32 [[VAL:v[0-9]+]], s{{[0-9]+}}
-; CHECK: DS_WRITE_B32 [[VPTR]], [[VAL]], 4
+; CHECK: DS_WRITE_B32 [[VPTR]], [[VAL]], 0x4
 define void @local_address_gep_const_offset_store(i32 addrspace(3)* %out, i32 %val) {
   %gep = getelementptr i32 addrspace(3)* %out, i32 1
   store i32 %val, i32 addrspace(3)* %gep, align 4
@@ -128,7 +128,7 @@ define void @local_address_gep_const_offset_store(i32 addrspace(3)* %out, i32 %v
 
 ; Offset too large, can't fold into 16-bit immediate offset.
 ; CHECK-LABEL: @local_address_gep_large_const_offset_store
-; CHECK: S_ADD_I32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 65540
+; CHECK: S_ADD_I32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 0x10004
 ; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
 ; CHECK: DS_WRITE_B32 [[VPTR]], v{{[0-9]+}}, 0
 define void @local_address_gep_large_const_offset_store(i32 addrspace(3)* %out, i32 %val) {
diff --git a/test/CodeGen/R600/64bit-kernel-args.ll b/test/CodeGen/R600/64bit-kernel-args.ll
index 0d6bfb1..2d82c1e 100644
--- a/test/CodeGen/R600/64bit-kernel-args.ll
+++ b/test/CodeGen/R600/64bit-kernel-args.ll
@@ -1,8 +1,8 @@
 ; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
 
 ; SI-CHECK: @f64_kernel_arg
-; SI-CHECK-DAG: S_LOAD_DWORDX2 s[{{[0-9]:[0-9]}}], s[0:1], 9
-; SI-CHECK-DAG: S_LOAD_DWORDX2 s[{{[0-9]:[0-9]}}], s[0:1], 11
+; SI-CHECK-DAG: S_LOAD_DWORDX2 s[{{[0-9]:[0-9]}}], s[0:1], 0x9
+; SI-CHECK-DAG: S_LOAD_DWORDX2 s[{{[0-9]:[0-9]}}], s[0:1], 0xb
 ; SI-CHECK: BUFFER_STORE_DWORDX2
 define void @f64_kernel_arg(double addrspace(1)* %out, double  %in) {
 entry:
diff --git a/test/CodeGen/R600/add.ll b/test/CodeGen/R600/add.ll
index e9db52a..711a2bc 100644
--- a/test/CodeGen/R600/add.ll
+++ b/test/CodeGen/R600/add.ll
@@ -140,3 +140,28 @@ entry:
   store i64 %1, i64 addrspace(1)* %out
   ret void
 }
+
+; Test i64 add inside a branch.  We don't allow SALU instructions inside of
+; branches.
+; FIXME: We are being conservative here.  We could allow this in some cases.
+; FUNC-LABEL: @add64_in_branch
+; SI-CHECK-NOT: S_ADD_I32
+; SI-CHECK-NOT: S_ADDC_U32
+define void @add64_in_branch(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b, i64 %c) {
+entry:
+  %0 = icmp eq i64 %a, 0
+  br i1 %0, label %if, label %else
+
+if:
+  %1 = load i64 addrspace(1)* %in
+  br label %endif
+
+else:
+  %2 = add i64 %a, %b
+  br label %endif
+
+endif:
+  %3 = phi i64 [%1, %if], [%2, %else]
+  store i64 %3, i64 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/add_i64.ll b/test/CodeGen/R600/add_i64.ll
index 7081b07..c9eaeda 100644
--- a/test/CodeGen/R600/add_i64.ll
+++ b/test/CodeGen/R600/add_i64.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
 
 
 declare i32 @llvm.r600.read.tidig.x() readnone
diff --git a/test/CodeGen/R600/address-space.ll b/test/CodeGen/R600/address-space.ll
index 15d2ed2..f75a8ac 100644
--- a/test/CodeGen/R600/address-space.ll
+++ b/test/CodeGen/R600/address-space.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck %s
 
 ; Test that codegenprepare understands address space sizes
 
@@ -10,8 +10,8 @@
 ; CHECK-LABEL: @do_as_ptr_calcs:
 ; CHECK: S_LOAD_DWORD [[SREG1:s[0-9]+]],
 ; CHECK: V_MOV_B32_e32 [[VREG1:v[0-9]+]], [[SREG1]]
-; CHECK: DS_READ_B32 v{{[0-9]+}}, [[VREG1]], 20
-; CHECK: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}}, 12
+; CHECK: DS_READ_B32 v{{[0-9]+}}, [[VREG1]], 0x14
+; CHECK: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}}, 0xc
 define void @do_as_ptr_calcs(%struct.foo addrspace(3)* nocapture %ptr) nounwind {
 entry:
   %x = getelementptr inbounds %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 0
diff --git a/test/CodeGen/R600/array-ptr-calc-i32.ll b/test/CodeGen/R600/array-ptr-calc-i32.ll
index cb2a1c8..c2362da 100644
--- a/test/CodeGen/R600/array-ptr-calc-i32.ll
+++ b/test/CodeGen/R600/array-ptr-calc-i32.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
 
 declare i32 @llvm.SI.tid() nounwind readnone
 declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate
diff --git a/test/CodeGen/R600/array-ptr-calc-i64.ll b/test/CodeGen/R600/array-ptr-calc-i64.ll
index 652bbfe..e254c5f 100644
--- a/test/CodeGen/R600/array-ptr-calc-i64.ll
+++ b/test/CodeGen/R600/array-ptr-calc-i64.ll
@@ -1,5 +1,5 @@
 ; XFAIL: *
-; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI %s
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI %s
 
 declare i32 @llvm.SI.tid() readnone
 
diff --git a/test/CodeGen/R600/call.ll b/test/CodeGen/R600/call.ll
new file mode 100644
index 0000000..d803474
--- /dev/null
+++ b/test/CodeGen/R600/call.ll
@@ -0,0 +1,33 @@
+; RUN: not llc -march=r600 -mcpu=SI -verify-machineinstrs< %s 2>&1 | FileCheck %s
+; RUN: not llc -march=r600 -mcpu=cypress < %s 2>&1 | FileCheck %s
+
+; CHECK: error: unsupported call to function defined_function in test_call
+
+
+declare i32 @external_function(i32) nounwind
+
+define i32 @defined_function(i32 %x) nounwind noinline {
+  %y = add i32 %x, 8
+  ret i32 %y
+}
+
+define void @test_call(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+  %b_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+  %a = load i32 addrspace(1)* %in
+  %b = load i32 addrspace(1)* %b_ptr
+  %c = call i32 @defined_function(i32 %b) nounwind
+  %result = add i32 %a, %c
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
+
+define void @test_call_external(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+  %b_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+  %a = load i32 addrspace(1)* %in
+  %b = load i32 addrspace(1)* %b_ptr
+  %c = call i32 @external_function(i32 %b) nounwind
+  %result = add i32 %a, %c
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
+
diff --git a/test/CodeGen/R600/extload.ll b/test/CodeGen/R600/extload.ll
index 2e70d47..dc056e0 100644
--- a/test/CodeGen/R600/extload.ll
+++ b/test/CodeGen/R600/extload.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
 ; FUNC-LABEL: @anyext_load_i8:
 ; EG: AND_INT
@@ -87,8 +87,9 @@ define void @sextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)
 }
 
 ; FUNC-LABEL: @zextload_global_i8_to_i64
+; SI: S_MOV_B32 [[ZERO:s[0-9]+]], 0
 ; SI: BUFFER_LOAD_UBYTE [[LOAD:v[0-9]+]],
-; SI: V_MOV_B32_e32 {{v[0-9]+}}, 0
+; SI: V_MOV_B32_e32 {{v[0-9]+}}, [[ZERO]]
 ; SI: BUFFER_STORE_DWORDX2
 define void @zextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
   %a = load i8 addrspace(1)* %in, align 8
@@ -98,8 +99,9 @@ define void @zextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)*
 }
 
 ; FUNC-LABEL: @zextload_global_i16_to_i64
+; SI: S_MOV_B32 [[ZERO:s[0-9]+]], 0
 ; SI: BUFFER_LOAD_USHORT [[LOAD:v[0-9]+]],
-; SI: V_MOV_B32_e32 {{v[0-9]+}}, 0
+; SI: V_MOV_B32_e32 {{v[0-9]+}}, [[ZERO]]
 ; SI: BUFFER_STORE_DWORDX2
 define void @zextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
   %a = load i16 addrspace(1)* %in, align 8
@@ -109,8 +111,9 @@ define void @zextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)
 }
 
 ; FUNC-LABEL: @zextload_global_i32_to_i64
+; SI: S_MOV_B32 [[ZERO:s[0-9]+]], 0
 ; SI: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]],
-; SI: V_MOV_B32_e32 {{v[0-9]+}}, 0
+; SI: V_MOV_B32_e32 {{v[0-9]+}}, [[ZERO]]
 ; SI: BUFFER_STORE_DWORDX2
 define void @zextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
   %a = load i32 addrspace(1)* %in, align 8
diff --git a/test/CodeGen/R600/extract_vector_elt_i16.ll b/test/CodeGen/R600/extract_vector_elt_i16.ll
new file mode 100644
index 0000000..5cd1b04
--- /dev/null
+++ b/test/CodeGen/R600/extract_vector_elt_i16.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+; FUNC-LABEL: @extract_vector_elt_v2i16
+; SI: BUFFER_LOAD_USHORT
+; SI: BUFFER_STORE_SHORT
+; SI: BUFFER_LOAD_USHORT
+; SI: BUFFER_STORE_SHORT
+define void @extract_vector_elt_v2i16(i16 addrspace(1)* %out, <2 x i16> %foo) nounwind {
+  %p0 = extractelement <2 x i16> %foo, i32 0
+  %p1 = extractelement <2 x i16> %foo, i32 1
+  %out1 = getelementptr i16 addrspace(1)* %out, i32 1
+  store i16 %p1, i16 addrspace(1)* %out, align 2
+  store i16 %p0, i16 addrspace(1)* %out1, align 2
+  ret void
+}
+
+; FUNC-LABEL: @extract_vector_elt_v4i16
+; SI: BUFFER_LOAD_USHORT
+; SI: BUFFER_STORE_SHORT
+; SI: BUFFER_LOAD_USHORT
+; SI: BUFFER_STORE_SHORT
+define void @extract_vector_elt_v4i16(i16 addrspace(1)* %out, <4 x i16> %foo) nounwind {
+  %p0 = extractelement <4 x i16> %foo, i32 0
+  %p1 = extractelement <4 x i16> %foo, i32 2
+  %out1 = getelementptr i16 addrspace(1)* %out, i32 1
+  store i16 %p1, i16 addrspace(1)* %out, align 2
+  store i16 %p0, i16 addrspace(1)* %out1, align 2
+  ret void
+}
diff --git a/test/CodeGen/R600/fabs.ll b/test/CodeGen/R600/fabs.ll
index 2cd3a4f..b87ce22 100644
--- a/test/CodeGen/R600/fabs.ll
+++ b/test/CodeGen/R600/fabs.ll
@@ -49,6 +49,17 @@ entry:
   ret void
 }
 
+; SI-CHECK-LABEL: @fabs_fold
+; SI-CHECK-NOT: V_AND_B32_e32
+; SI-CHECK: V_MUL_F32_e64 v{{[0-9]+}}, s{{[0-9]+}}, |v{{[0-9]+}}|
+define void @fabs_fold(float addrspace(1)* %out, float %in0, float %in1) {
+entry:
+  %0 = call float @fabs(float %in0)
+  %1 = fmul float %0, %in1
+  store float %1, float addrspace(1)* %out
+  ret void
+}
+
 declare float @fabs(float ) readnone
 declare <2 x float> @llvm.fabs.v2f32(<2 x float> ) readnone
 declare <4 x float> @llvm.fabs.v4f32(<4 x float> ) readnone
diff --git a/test/CodeGen/R600/fconst64.ll b/test/CodeGen/R600/fconst64.ll
index 5c5ee7e..9c3a7e3 100644
--- a/test/CodeGen/R600/fconst64.ll
+++ b/test/CodeGen/R600/fconst64.ll
@@ -1,8 +1,8 @@
 ; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
 
 ; CHECK: @fconst_f64
-; CHECK: V_MOV_B32_e32 {{v[0-9]+}}, 0.000000e+00
-; CHECK-NEXT: V_MOV_B32_e32 {{v[0-9]+}}, 2.312500e+00
+; CHECK-DAG: S_MOV_B32 {{s[0-9]+}}, 0x40140000
+; CHECK-DAG: S_MOV_B32 {{s[0-9]+}}, 0
 
 define void @fconst_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
    %r1 = load double addrspace(1)* %in
diff --git a/test/CodeGen/R600/fneg.ll b/test/CodeGen/R600/fneg.ll
index f4e6be6..4cddc73 100644
--- a/test/CodeGen/R600/fneg.ll
+++ b/test/CodeGen/R600/fneg.ll
@@ -51,7 +51,7 @@ entry:
 ; R600-CHECK: -KC0[2].Z
 ; SI-CHECK-LABEL: @fneg_free
 ; XXX: We could use V_ADD_F32_e64 with the negate bit here instead.
-; SI-CHECK: V_SUB_F32_e64 v{{[0-9]}}, 0.000000e+00, s{{[0-9]}}, 0, 0, 0, 0
+; SI-CHECK: V_SUB_F32_e64 v{{[0-9]}}, 0.000000e+00, s{{[0-9]}}, 0, 0
 define void @fneg_free(float addrspace(1)* %out, i32 %in) {
 entry:
   %0 = bitcast i32 %in to float
@@ -59,3 +59,14 @@ entry:
   store float %1, float addrspace(1)* %out
   ret void
 }
+
+; SI-CHECK-LABEL: @fneg_fold
+; SI-CHECK-NOT: V_XOR_B32
+; SI-CHECK: V_MUL_F32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -v{{[0-9]+}}
+define void @fneg_fold(float addrspace(1)* %out, float %in) {
+entry:
+  %0 = fsub float -0.0, %in
+  %1 = fmul float %0, %in
+  store float %1, float addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/fp_to_uint.f64.ll b/test/CodeGen/R600/fp_to_uint.f64.ll
new file mode 100644
index 0000000..bf607ce
--- /dev/null
+++ b/test/CodeGen/R600/fp_to_uint.f64.ll
@@ -0,0 +1,9 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+; SI-LABEL: @fp_to_uint_i32_f64
+; SI: V_CVT_U32_F64_e32
+define void @fp_to_uint_i32_f64(i32 addrspace(1)* %out, double %in) {
+  %cast = fptoui double %in to i32
+  store i32 %cast, i32 addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/gep-address-space.ll b/test/CodeGen/R600/gep-address-space.ll
index ee914fa..ab2c0bf 100644
--- a/test/CodeGen/R600/gep-address-space.ll
+++ b/test/CodeGen/R600/gep-address-space.ll
@@ -1,9 +1,9 @@
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck %s
 
 define void @use_gep_address_space([1024 x i32] addrspace(3)* %array) nounwind {
 ; CHECK-LABEL: @use_gep_address_space:
 ; CHECK: V_MOV_B32_e32 [[PTR:v[0-9]+]], s{{[0-9]+}}
-; CHECK: DS_WRITE_B32 [[PTR]], v{{[0-9]+}}, 64
+; CHECK: DS_WRITE_B32 [[PTR]], v{{[0-9]+}}, 0x40
   %p = getelementptr [1024 x i32] addrspace(3)* %array, i16 0, i16 16
   store i32 99, i32 addrspace(3)* %p
   ret void
diff --git a/test/CodeGen/R600/gv-const-addrspace-fail.ll b/test/CodeGen/R600/gv-const-addrspace-fail.ll
new file mode 100644
index 0000000..ebd7811
--- /dev/null
+++ b/test/CodeGen/R600/gv-const-addrspace-fail.ll
@@ -0,0 +1,58 @@
+; XFAIL: *
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+
+@a = internal addrspace(2) constant [1 x i8] [ i8 7 ], align 1
+
+; FUNC-LABEL: @test_i8
+; EG: CF_END
+; SI: BUFFER_STORE_BYTE
+; SI: S_ENDPGM
+define void @test_i8( i32 %s, i8 addrspace(1)* %out) #3 {
+  %arrayidx = getelementptr inbounds [1 x i8] addrspace(2)* @a, i32 0, i32 %s
+  %1 = load i8 addrspace(2)* %arrayidx, align 1
+  store i8 %1, i8 addrspace(1)* %out
+  ret void
+}
+
+@b = internal addrspace(2) constant [1 x i16] [ i16 7 ], align 2
+
+; FUNC-LABEL: @test_i16
+; EG: CF_END
+; SI: BUFFER_STORE_SHORT
+; SI: S_ENDPGM
+define void @test_i16( i32 %s, i16 addrspace(1)* %out) #3 {
+  %arrayidx = getelementptr inbounds [1 x i16] addrspace(2)* @b, i32 0, i32 %s
+  %1 = load i16 addrspace(2)* %arrayidx, align 2
+  store i16 %1, i16 addrspace(1)* %out
+  ret void
+}
+
+%struct.bar = type { float, [5 x i8] }
+
+; The illegal i8s aren't handled
+@struct_bar_gv = internal addrspace(2) unnamed_addr constant [1 x %struct.bar] [ %struct.bar { float 16.0, [5 x i8] [i8 0, i8 1, i8 2, i8 3, i8 4] } ]
+
+; FUNC-LABEL: @struct_bar_gv_load
+define void @struct_bar_gv_load(i8 addrspace(1)* %out, i32 %index) {
+  %gep = getelementptr inbounds [1 x %struct.bar] addrspace(2)* @struct_bar_gv, i32 0, i32 0, i32 1, i32 %index
+  %load = load i8 addrspace(2)* %gep, align 1
+  store i8 %load, i8 addrspace(1)* %out, align 1
+  ret void
+}
+
+
+; The private load isn't scalarzied.
+@array_vector_gv = internal addrspace(2) constant [4 x <4 x i32>] [ <4 x i32> <i32 1, i32 2, i32 3, i32 4>,
+                                                                    <4 x i32> <i32 5, i32 6, i32 7, i32 8>,
+                                                                    <4 x i32> <i32 9, i32 10, i32 11, i32 12>,
+                                                                    <4 x i32> <i32 13, i32 14, i32 15, i32 16> ]
+
+; FUNC-LABEL: @array_vector_gv_load
+define void @array_vector_gv_load(<4 x i32> addrspace(1)* %out, i32 %index) {
+  %gep = getelementptr inbounds [4 x <4 x i32>] addrspace(2)* @array_vector_gv, i32 0, i32 %index
+  %load = load <4 x i32> addrspace(2)* %gep, align 16
+  store <4 x i32> %load, <4 x i32> addrspace(1)* %out, align 16
+  ret void
+}
diff --git a/test/CodeGen/R600/gv-const-addrspace.ll b/test/CodeGen/R600/gv-const-addrspace.ll
index cda7ab1..0176061 100644
--- a/test/CodeGen/R600/gv-const-addrspace.ll
+++ b/test/CodeGen/R600/gv-const-addrspace.ll
@@ -1,4 +1,8 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600 --check-prefix=FUNC
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+
+@b = internal addrspace(2) constant [1 x i16] [ i16 7 ], align 2
 
 ; XXX: Test on SI once 64-bit adds are supportes.
 
@@ -6,12 +10,12 @@
 
 ; FUNC-LABEL: @float
 
-; R600-DAG: MOV {{\** *}}T2.X
-; R600-DAG: MOV {{\** *}}T3.X
-; R600-DAG: MOV {{\** *}}T4.X
-; R600-DAG: MOV {{\** *}}T5.X
-; R600-DAG: MOV {{\** *}}T6.X
-; R600: MOVA_INT
+; EG-DAG: MOV {{\** *}}T2.X
+; EG-DAG: MOV {{\** *}}T3.X
+; EG-DAG: MOV {{\** *}}T4.X
+; EG-DAG: MOV {{\** *}}T5.X
+; EG-DAG: MOV {{\** *}}T6.X
+; EG: MOVA_INT
 
 define void @float(float addrspace(1)* %out, i32 %index) {
 entry:
@@ -25,12 +29,12 @@ entry:
 
 ; FUNC-LABEL: @i32
 
-; R600-DAG: MOV {{\** *}}T2.X
-; R600-DAG: MOV {{\** *}}T3.X
-; R600-DAG: MOV {{\** *}}T4.X
-; R600-DAG: MOV {{\** *}}T5.X
-; R600-DAG: MOV {{\** *}}T6.X
-; R600: MOVA_INT
+; EG-DAG: MOV {{\** *}}T2.X
+; EG-DAG: MOV {{\** *}}T3.X
+; EG-DAG: MOV {{\** *}}T4.X
+; EG-DAG: MOV {{\** *}}T5.X
+; EG-DAG: MOV {{\** *}}T6.X
+; EG: MOVA_INT
 
 define void @i32(i32 addrspace(1)* %out, i32 %index) {
 entry:
@@ -39,3 +43,30 @@ entry:
   store i32 %1, i32 addrspace(1)* %out
   ret void
 }
+
+
+%struct.foo = type { float, [5 x i32] }
+
+@struct_foo_gv = internal addrspace(2) unnamed_addr constant [1 x %struct.foo] [ %struct.foo { float 16.0, [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4] } ]
+
+; FUNC-LABEL: @struct_foo_gv_load
+
+define void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) {
+  %gep = getelementptr inbounds [1 x %struct.foo] addrspace(2)* @struct_foo_gv, i32 0, i32 0, i32 1, i32 %index
+  %load = load i32 addrspace(2)* %gep, align 4
+  store i32 %load, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+@array_v1_gv = internal addrspace(2) constant [4 x <1 x i32>] [ <1 x i32> <i32 1>,
+                                                                <1 x i32> <i32 2>,
+                                                                <1 x i32> <i32 3>,
+                                                                <1 x i32> <i32 4> ]
+
+; FUNC-LABEL: @array_v1_gv_load
+define void @array_v1_gv_load(<1 x i32> addrspace(1)* %out, i32 %index) {
+  %gep = getelementptr inbounds [4 x <1 x i32>] addrspace(2)* @array_v1_gv, i32 0, i32 %index
+  %load = load <1 x i32> addrspace(2)* %gep, align 4
+  store <1 x i32> %load, <1 x i32> addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/infinite-loop.ll b/test/CodeGen/R600/infinite-loop.ll
index a60bc37..68ffaae 100644
--- a/test/CodeGen/R600/infinite-loop.ll
+++ b/test/CodeGen/R600/infinite-loop.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 
 ; SI-LABEL: @infinite_loop:
-; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 999
+; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0x3e7
 ; SI: BB0_1:
 ; SI: BUFFER_STORE_DWORD [[REG]]
 ; SI: S_WAITCNT vmcnt(0) expcnt(0)
diff --git a/test/CodeGen/R600/insert_vector_elt.ll b/test/CodeGen/R600/insert_vector_elt.ll
index 530d1cc..43b4efc 100644
--- a/test/CodeGen/R600/insert_vector_elt.ll
+++ b/test/CodeGen/R600/insert_vector_elt.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
 
 ; FIXME: Broken on evergreen
 ; FIXME: For some reason the 8 and 16 vectors are being stored as
@@ -173,3 +173,29 @@ define void @dynamic_insertelement_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8>
   store <16 x i8> %vecins, <16 x i8> addrspace(1)* %out, align 16
   ret void
 }
+
+; This test requires handling INSERT_SUBREG in SIFixSGPRCopies.  Check that
+; the compiler doesn't crash.
+; SI-LABEL: @insert_split_bb
+define void @insert_split_bb(<2 x i32> addrspace(1)* %out, i32 addrspace(1)* %in, i32 %a, i32 %b) {
+entry:
+  %0 = insertelement <2 x i32> undef, i32 %a, i32 0
+  %1 = icmp eq i32 %a, 0
+  br i1 %1, label %if, label %else
+
+if:
+  %2 = load i32 addrspace(1)* %in
+  %3 = insertelement <2 x i32> %0, i32 %2, i32 1
+  br label %endif
+
+else:
+  %4 = getelementptr i32 addrspace(1)* %in, i32 1
+  %5 = load i32 addrspace(1)* %4
+  %6 = insertelement <2 x i32> %0, i32 %5, i32 1
+  br label %endif
+
+endif:
+  %7 = phi <2 x i32> [%3, %if], [%6, %else]
+  store <2 x i32> %7, <2 x i32> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/insert_vector_elt_f64.ll b/test/CodeGen/R600/insert_vector_elt_f64.ll
index e334be1..595bc59 100644
--- a/test/CodeGen/R600/insert_vector_elt_f64.ll
+++ b/test/CodeGen/R600/insert_vector_elt_f64.ll
@@ -1,6 +1,6 @@
 ; REQUIRES: asserts
 ; XFAIL: *
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
 
 
 ; SI-LABEL: @dynamic_insertelement_v2f64:
diff --git a/test/CodeGen/R600/kernel-args.ll b/test/CodeGen/R600/kernel-args.ll
index 247e316..6fc6979 100644
--- a/test/CodeGen/R600/kernel-args.ll
+++ b/test/CodeGen/R600/kernel-args.ll
@@ -17,7 +17,7 @@ entry:
 ; EG-CHECK-LABEL: @i8_zext_arg
 ; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
 ; SI-CHECK-LABEL: @i8_zext_arg
-; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 11
+; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb
 
 define void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind {
 entry:
@@ -29,7 +29,7 @@ entry:
 ; EG-CHECK-LABEL: @i8_sext_arg
 ; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
 ; SI-CHECK-LABEL: @i8_sext_arg
-; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 11
+; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb
 
 define void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind {
 entry:
@@ -53,7 +53,7 @@ entry:
 ; EG-CHECK-LABEL: @i16_zext_arg
 ; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
 ; SI-CHECK-LABEL: @i16_zext_arg
-; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 11
+; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb
 
 define void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind {
 entry:
@@ -65,7 +65,7 @@ entry:
 ; EG-CHECK-LABEL: @i16_sext_arg
 ; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
 ; SI-CHECK-LABEL: @i16_sext_arg
-; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 11
+; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb
 
 define void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind {
 entry:
@@ -77,7 +77,7 @@ entry:
 ; EG-CHECK-LABEL: @i32_arg
 ; EG-CHECK: T{{[0-9]\.[XYZW]}}, KC0[2].Z
 ; SI-CHECK-LABEL: @i32_arg
-; S_LOAD_DWORD s{{[0-9]}}, s[0:1], 11
+; S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb
 define void @i32_arg(i32 addrspace(1)* nocapture %out, i32 %in) nounwind {
 entry:
   store i32 %in, i32 addrspace(1)* %out, align 4
@@ -87,7 +87,7 @@ entry:
 ; EG-CHECK-LABEL: @f32_arg
 ; EG-CHECK: T{{[0-9]\.[XYZW]}}, KC0[2].Z
 ; SI-CHECK-LABEL: @f32_arg
-; S_LOAD_DWORD s{{[0-9]}}, s[0:1], 11
+; S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb
 define void @f32_arg(float addrspace(1)* nocapture %out, float %in) nounwind {
 entry:
   store float %in, float addrspace(1)* %out, align 4
@@ -122,7 +122,7 @@ entry:
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
 ; SI-CHECK-LABEL: @v2i32_arg
-; SI-CHECK: S_LOAD_DWORDX2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 11
+; SI-CHECK: S_LOAD_DWORDX2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
 define void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2 x i32> %in) nounwind {
 entry:
   store <2 x i32> %in, <2 x i32> addrspace(1)* %out, align 4
@@ -133,7 +133,7 @@ entry:
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
 ; SI-CHECK-LABEL: @v2f32_arg
-; SI-CHECK: S_LOAD_DWORDX2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 11
+; SI-CHECK: S_LOAD_DWORDX2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
 define void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) nounwind {
 entry:
   store <2 x float> %in, <2 x float> addrspace(1)* %out, align 4
@@ -166,7 +166,7 @@ entry:
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
 ; SI-CHECK-LABEL: @v3i32_arg
-; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 13
+; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
 define void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3 x i32> %in) nounwind {
 entry:
   store <3 x i32> %in, <3 x i32> addrspace(1)* %out, align 4
@@ -178,7 +178,7 @@ entry:
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
 ; SI-CHECK-LABEL: @v3f32_arg
-; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 13
+; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
 define void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, <3 x float> %in) nounwind {
 entry:
   store <3 x float> %in, <3 x float> addrspace(1)* %out, align 4
@@ -223,7 +223,7 @@ entry:
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
 ; SI-CHECK-LABEL: @v4i32_arg
-; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 13
+; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
 define void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> %in) nounwind {
 entry:
   store <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4
@@ -236,7 +236,7 @@ entry:
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
 ; SI-CHECK-LABEL: @v4f32_arg
-; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 13
+; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
 define void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) nounwind {
 entry:
   store <4 x float> %in, <4 x float> addrspace(1)* %out, align 4
@@ -300,7 +300,7 @@ entry:
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
 ; SI-CHECK-LABEL: @v8i32_arg
-; SI-CHECK: S_LOAD_DWORDX8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 17
+; SI-CHECK: S_LOAD_DWORDX8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11
 define void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 x i32> %in) nounwind {
 entry:
   store <8 x i32> %in, <8 x i32> addrspace(1)* %out, align 4
@@ -317,7 +317,7 @@ entry:
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
 ; SI-CHECK-LABEL: @v8f32_arg
-; SI-CHECK: S_LOAD_DWORDX8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 17
+; SI-CHECK: S_LOAD_DWORDX8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11
 define void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <8 x float> %in) nounwind {
 entry:
   store <8 x float> %in, <8 x float> addrspace(1)* %out, align 4
@@ -422,7 +422,7 @@ entry:
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
 ; SI-CHECK-LABEL: @v16i32_arg
-; SI-CHECK: S_LOAD_DWORDX16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 25
+; SI-CHECK: S_LOAD_DWORDX16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19
 define void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <16 x i32> %in) nounwind {
 entry:
   store <16 x i32> %in, <16 x i32> addrspace(1)* %out, align 4
@@ -447,7 +447,7 @@ entry:
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
 ; SI-CHECK-LABEL: @v16f32_arg
-; SI-CHECK: S_LOAD_DWORDX16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 25
+; SI-CHECK: S_LOAD_DWORDX16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19
 define void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, <16 x float> %in) nounwind {
 entry:
   store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4
diff --git a/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll b/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll
index c3f000a..eb50942 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll
@@ -1,11 +1,12 @@
 ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood -show-mc-encoding -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 
 declare i32 @llvm.AMDGPU.bfe.i32(i32, i32, i32) nounwind readnone
 
 ; FUNC-LABEL: @bfe_i32_arg_arg_arg
 ; SI: V_BFE_I32
 ; EG: BFE_INT
+; EG: encoding: [{{[x0-9a-f]+,[x0-9a-f]+,[x0-9a-f]+,[x0-9a-f]+,[x0-9a-f]+}},0xac
 define void @bfe_i32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind {
   %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 %src1, i32 %src1) nounwind readnone
   store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
@@ -38,3 +39,388 @@ define void @bfe_i32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) n
   store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
   ret void
 }
+
+; FUNC-LABEL: @v_bfe_print_arg
+; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 2, 8
+define void @v_bfe_print_arg(i32 addrspace(1)* %out, i32 addrspace(1)* %src0) nounwind {
+  %load = load i32 addrspace(1)* %src0, align 4
+  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 2, i32 8) nounwind readnone
+  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_i32_arg_0_width_reg_offset
+; SI-NOT: BFE
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_i32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
+  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 %src1, i32 0) nounwind readnone
+  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_i32_arg_0_width_imm_offset
+; SI-NOT: BFE
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_i32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
+  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 8, i32 0) nounwind readnone
+  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_i32_test_6
+; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
+; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
+; SI: S_ENDPGM
+define void @bfe_i32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %x = load i32 addrspace(1)* %in, align 4
+  %shl = shl i32 %x, 31
+  %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 1, i32 31)
+  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_i32_test_7
+; SI-NOT: SHL
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+define void @bfe_i32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %x = load i32 addrspace(1)* %in, align 4
+  %shl = shl i32 %x, 31
+  %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 0, i32 31)
+  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FIXME: The shifts should be 1 BFE
+; FUNC-LABEL: @bfe_i32_test_8
+; SI: BUFFER_LOAD_DWORD
+; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1
+; SI: S_ENDPGM
+define void @bfe_i32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %x = load i32 addrspace(1)* %in, align 4
+  %shl = shl i32 %x, 31
+  %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1)
+  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_i32_test_9
+; SI-NOT: BFE
+; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
+; SI-NOT: BFE
+; SI: S_ENDPGM
+define void @bfe_i32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %x = load i32 addrspace(1)* %in, align 4
+  %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 31, i32 1)
+  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_i32_test_10
+; SI-NOT: BFE
+; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
+; SI-NOT: BFE
+; SI: S_ENDPGM
+define void @bfe_i32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %x = load i32 addrspace(1)* %in, align 4
+  %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 1, i32 31)
+  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_i32_test_11
+; SI-NOT: BFE
+; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}}
+; SI-NOT: BFE
+; SI: S_ENDPGM
+define void @bfe_i32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %x = load i32 addrspace(1)* %in, align 4
+  %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 8, i32 24)
+  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_i32_test_12
+; SI-NOT: BFE
+; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}}
+; SI-NOT: BFE
+; SI: S_ENDPGM
+define void @bfe_i32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %x = load i32 addrspace(1)* %in, align 4
+  %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 24, i32 8)
+  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_i32_test_13
+; SI: V_ASHRREV_I32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}}
+; SI-NOT: BFE
+; SI: S_ENDPGM
+define void @bfe_i32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %x = load i32 addrspace(1)* %in, align 4
+  %shl = ashr i32 %x, 31
+  %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1)
+  store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
+}
+
+; FUNC-LABEL: @bfe_i32_test_14
+; SI-NOT: LSHR
+; SI-NOT: BFE
+; SI: S_ENDPGM
+define void @bfe_i32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %x = load i32 addrspace(1)* %in, align 4
+  %shl = lshr i32 %x, 31
+  %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1)
+  store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
+}
+
+; FUNC-LABEL: @bfe_i32_constant_fold_test_0
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_i32_constant_fold_test_0(i32 addrspace(1)* %out) nounwind {
+  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 0, i32 0, i32 0) nounwind readnone
+  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_i32_constant_fold_test_1
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_i32_constant_fold_test_1(i32 addrspace(1)* %out) nounwind {
+  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 12334, i32 0, i32 0) nounwind readnone
+  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_i32_constant_fold_test_2
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_i32_constant_fold_test_2(i32 addrspace(1)* %out) nounwind {
+  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 0, i32 0, i32 1) nounwind readnone
+  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_i32_constant_fold_test_3
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_i32_constant_fold_test_3(i32 addrspace(1)* %out) nounwind {
+  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 1, i32 0, i32 1) nounwind readnone
+  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_i32_constant_fold_test_4
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_i32_constant_fold_test_4(i32 addrspace(1)* %out) nounwind {
+  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 4294967295, i32 0, i32 1) nounwind readnone
+  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_i32_constant_fold_test_5
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_i32_constant_fold_test_5(i32 addrspace(1)* %out) nounwind {
+  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 128, i32 7, i32 1) nounwind readnone
+  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_i32_constant_fold_test_6
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0xffffff80
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_i32_constant_fold_test_6(i32 addrspace(1)* %out) nounwind {
+  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 128, i32 0, i32 8) nounwind readnone
+  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_i32_constant_fold_test_7
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_i32_constant_fold_test_7(i32 addrspace(1)* %out) nounwind {
+  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 127, i32 0, i32 8) nounwind readnone
+  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_i32_constant_fold_test_8
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_i32_constant_fold_test_8(i32 addrspace(1)* %out) nounwind {
+  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 127, i32 6, i32 8) nounwind readnone
+  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_i32_constant_fold_test_9
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_i32_constant_fold_test_9(i32 addrspace(1)* %out) nounwind {
+  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 65536, i32 16, i32 8) nounwind readnone
+  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_i32_constant_fold_test_10
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_i32_constant_fold_test_10(i32 addrspace(1)* %out) nounwind {
+  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 65535, i32 16, i32 16) nounwind readnone
+  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_i32_constant_fold_test_11
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -6
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_i32_constant_fold_test_11(i32 addrspace(1)* %out) nounwind {
+  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 4, i32 4) nounwind readnone
+  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_i32_constant_fold_test_12
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_i32_constant_fold_test_12(i32 addrspace(1)* %out) nounwind {
+  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 31, i32 1) nounwind readnone
+  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_i32_constant_fold_test_13
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_i32_constant_fold_test_13(i32 addrspace(1)* %out) nounwind {
+  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 131070, i32 16, i32 16) nounwind readnone
+  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_i32_constant_fold_test_14
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 40
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_i32_constant_fold_test_14(i32 addrspace(1)* %out) nounwind {
+  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 2, i32 30) nounwind readnone
+  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_i32_constant_fold_test_15
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 10
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_i32_constant_fold_test_15(i32 addrspace(1)* %out) nounwind {
+  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 4, i32 28) nounwind readnone
+  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_i32_constant_fold_test_16
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_i32_constant_fold_test_16(i32 addrspace(1)* %out) nounwind {
+  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 4294967295, i32 1, i32 7) nounwind readnone
+  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_i32_constant_fold_test_17
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_i32_constant_fold_test_17(i32 addrspace(1)* %out) nounwind {
+  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 255, i32 1, i32 31) nounwind readnone
+  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_i32_constant_fold_test_18
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_i32_constant_fold_test_18(i32 addrspace(1)* %out) nounwind {
+  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 255, i32 31, i32 1) nounwind readnone
+  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; XXX - This should really be a single BFE, but the sext_inreg of the
+; extended type i24 is never custom lowered.
+; FUNC-LABEL: @bfe_sext_in_reg_i24
+; SI: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]],
+; SI: V_LSHLREV_B32_e32 {{v[0-9]+}}, 8, {{v[0-9]+}}
+; SI: V_ASHRREV_I32_e32 {{v[0-9]+}}, 8, {{v[0-9]+}}
+; XSI: V_BFE_I32 [[BFE:v[0-9]+]], [[LOAD]], 0, 8
+; XSI-NOT: SHL
+; XSI-NOT: SHR
+; XSI: BUFFER_STORE_DWORD [[BFE]],
+define void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %x = load i32 addrspace(1)* %in, align 4
+  %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 0, i32 24)
+  %shl = shl i32 %bfe, 8
+  %ashr = ashr i32 %shl, 8
+  store i32 %ashr, i32 addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll b/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll
index 0d47863..1a62253 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll
@@ -38,3 +38,517 @@ define void @bfe_u32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) n
   store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
   ret void
 }
+
+; FUNC-LABEL: @bfe_u32_arg_0_width_reg_offset
+; SI-NOT: BFE
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_u32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
+  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 %src1, i32 0) nounwind readnone
+  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_u32_arg_0_width_imm_offset
+; SI-NOT: BFE
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_u32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
+  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 8, i32 0) nounwind readnone
+  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_u32_zextload_i8
+; SI: BUFFER_LOAD_UBYTE
+; SI-NOT: BFE
+; SI: S_ENDPGM
+define void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
+  %load = load i8 addrspace(1)* %in
+  %ext = zext i8 %load to i32
+  %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 0, i32 8)
+  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_u32_zext_in_reg_i8
+; SI: BUFFER_LOAD_DWORD
+; SI: V_ADD_I32
+; SI-NEXT: V_AND_B32_e32
+; SI-NOT: BFE
+; SI: S_ENDPGM
+define void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %load = load i32 addrspace(1)* %in, align 4
+  %add = add i32 %load, 1
+  %ext = and i32 %add, 255
+  %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 0, i32 8)
+  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_u32_zext_in_reg_i16
+; SI: BUFFER_LOAD_DWORD
+; SI: V_ADD_I32
+; SI-NEXT: V_AND_B32_e32
+; SI-NOT: BFE
+; SI: S_ENDPGM
+define void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %load = load i32 addrspace(1)* %in, align 4
+  %add = add i32 %load, 1
+  %ext = and i32 %add, 65535
+  %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 0, i32 16)
+  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_u32_zext_in_reg_i8_offset_1
+; SI: BUFFER_LOAD_DWORD
+; SI: V_ADD_I32
+; SI: BFE
+; SI: S_ENDPGM
+define void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %load = load i32 addrspace(1)* %in, align 4
+  %add = add i32 %load, 1
+  %ext = and i32 %add, 255
+  %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 1, i32 8)
+  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_u32_zext_in_reg_i8_offset_3
+; SI: BUFFER_LOAD_DWORD
+; SI: V_ADD_I32
+; SI-NEXT: V_AND_B32_e32 {{v[0-9]+}}, 0xf8
+; SI-NEXT: BFE
+; SI: S_ENDPGM
+define void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %load = load i32 addrspace(1)* %in, align 4
+  %add = add i32 %load, 1
+  %ext = and i32 %add, 255
+  %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 3, i32 8)
+  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_u32_zext_in_reg_i8_offset_7
+; SI: BUFFER_LOAD_DWORD
+; SI: V_ADD_I32
+; SI-NEXT: V_AND_B32_e32 {{v[0-9]+}}, 0x80
+; SI-NEXT: BFE
+; SI: S_ENDPGM
+define void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %load = load i32 addrspace(1)* %in, align 4
+  %add = add i32 %load, 1
+  %ext = and i32 %add, 255
+  %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 7, i32 8)
+  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_u32_zext_in_reg_i16_offset_8
+; SI: BUFFER_LOAD_DWORD
+; SI: V_ADD_I32
+; SI-NEXT: BFE
+; SI: S_ENDPGM
+define void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %load = load i32 addrspace(1)* %in, align 4
+  %add = add i32 %load, 1
+  %ext = and i32 %add, 65535
+  %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 8, i32 8)
+  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_u32_test_1
+; SI: BUFFER_LOAD_DWORD
+; SI: V_AND_B32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
+; SI: S_ENDPGM
+; EG: AND_INT T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, 1,
+define void @bfe_u32_test_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %x = load i32 addrspace(1)* %in, align 4
+  %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 0, i32 1)
+  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+define void @bfe_u32_test_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %x = load i32 addrspace(1)* %in, align 4
+  %shl = shl i32 %x, 31
+  %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 0, i32 8)
+  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+define void @bfe_u32_test_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %x = load i32 addrspace(1)* %in, align 4
+  %shl = shl i32 %x, 31
+  %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 0, i32 1)
+  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_u32_test_4
+; SI-NOT: LSHL
+; SI-NOT: SHR
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+define void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %x = load i32 addrspace(1)* %in, align 4
+  %shl = shl i32 %x, 31
+  %shr = lshr i32 %shl, 31
+  %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shr, i32 31, i32 1)
+  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_u32_test_5
+; SI: BUFFER_LOAD_DWORD
+; SI-NOT: LSHL
+; SI-NOT: SHR
+; SI: V_BFE_I32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1
+; SI: S_ENDPGM
+define void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %x = load i32 addrspace(1)* %in, align 4
+  %shl = shl i32 %x, 31
+  %shr = ashr i32 %shl, 31
+  %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shr, i32 0, i32 1)
+  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_u32_test_6
+; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
+; SI: V_LSHRREV_B32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
+; SI: S_ENDPGM
+define void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %x = load i32 addrspace(1)* %in, align 4
+  %shl = shl i32 %x, 31
+  %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 1, i32 31)
+  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_u32_test_7
+; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
+; SI-NOT: BFE
+; SI: S_ENDPGM
+define void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %x = load i32 addrspace(1)* %in, align 4
+  %shl = shl i32 %x, 31
+  %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 0, i32 31)
+  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_u32_test_8
+; SI-NOT: BFE
+; SI: V_AND_B32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
+; SI-NOT: BFE
+; SI: S_ENDPGM
+define void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %x = load i32 addrspace(1)* %in, align 4
+  %shl = shl i32 %x, 31
+  %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 31, i32 1)
+  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_u32_test_9
+; SI-NOT: BFE
+; SI: V_LSHRREV_B32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
+; SI-NOT: BFE
+; SI: S_ENDPGM
+define void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %x = load i32 addrspace(1)* %in, align 4
+  %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 31, i32 1)
+  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_u32_test_10
+; SI-NOT: BFE
+; SI: V_LSHRREV_B32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
+; SI-NOT: BFE
+; SI: S_ENDPGM
+define void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %x = load i32 addrspace(1)* %in, align 4
+  %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 1, i32 31)
+  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_u32_test_11
+; SI-NOT: BFE
+; SI: V_LSHRREV_B32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}}
+; SI-NOT: BFE
+; SI: S_ENDPGM
+define void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %x = load i32 addrspace(1)* %in, align 4
+  %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 8, i32 24)
+  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_u32_test_12
+; SI-NOT: BFE
+; SI: V_LSHRREV_B32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}}
+; SI-NOT: BFE
+; SI: S_ENDPGM
+define void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %x = load i32 addrspace(1)* %in, align 4
+  %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 24, i32 8)
+  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_u32_test_13
+; V_ASHRREV_U32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}}
+; SI-NOT: BFE
+; SI: S_ENDPGM
+define void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %x = load i32 addrspace(1)* %in, align 4
+  %shl = ashr i32 %x, 31
+  %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 31, i32 1)
+  store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
+}
+
+; FUNC-LABEL: @bfe_u32_test_14
+; SI-NOT: LSHR
+; SI-NOT: BFE
+; SI: S_ENDPGM
+define void @bfe_u32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %x = load i32 addrspace(1)* %in, align 4
+  %shl = lshr i32 %x, 31
+  %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 31, i32 1)
+  store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
+}
+
+; FUNC-LABEL: @bfe_u32_constant_fold_test_0
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_u32_constant_fold_test_0(i32 addrspace(1)* %out) nounwind {
+  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 0, i32 0, i32 0) nounwind readnone
+  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_u32_constant_fold_test_1
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_u32_constant_fold_test_1(i32 addrspace(1)* %out) nounwind {
+  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 12334, i32 0, i32 0) nounwind readnone
+  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_u32_constant_fold_test_2
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_u32_constant_fold_test_2(i32 addrspace(1)* %out) nounwind {
+  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 0, i32 0, i32 1) nounwind readnone
+  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_u32_constant_fold_test_3
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_u32_constant_fold_test_3(i32 addrspace(1)* %out) nounwind {
+  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 1, i32 0, i32 1) nounwind readnone
+  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_u32_constant_fold_test_4
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_u32_constant_fold_test_4(i32 addrspace(1)* %out) nounwind {
+  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 4294967295, i32 0, i32 1) nounwind readnone
+  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_u32_constant_fold_test_5
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_u32_constant_fold_test_5(i32 addrspace(1)* %out) nounwind {
+  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 128, i32 7, i32 1) nounwind readnone
+  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_u32_constant_fold_test_6
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x80
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_u32_constant_fold_test_6(i32 addrspace(1)* %out) nounwind {
+  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 128, i32 0, i32 8) nounwind readnone
+  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_u32_constant_fold_test_7
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_u32_constant_fold_test_7(i32 addrspace(1)* %out) nounwind {
+  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 127, i32 0, i32 8) nounwind readnone
+  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_u32_constant_fold_test_8
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_u32_constant_fold_test_8(i32 addrspace(1)* %out) nounwind {
+  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 127, i32 6, i32 8) nounwind readnone
+  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_u32_constant_fold_test_9
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFEfppppppppppppp
+define void @bfe_u32_constant_fold_test_9(i32 addrspace(1)* %out) nounwind {
+  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 65536, i32 16, i32 8) nounwind readnone
+  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_u32_constant_fold_test_10
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_u32_constant_fold_test_10(i32 addrspace(1)* %out) nounwind {
+  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 65535, i32 16, i32 16) nounwind readnone
+  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_u32_constant_fold_test_11
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 10
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_u32_constant_fold_test_11(i32 addrspace(1)* %out) nounwind {
+  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 4, i32 4) nounwind readnone
+  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_u32_constant_fold_test_12
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_u32_constant_fold_test_12(i32 addrspace(1)* %out) nounwind {
+  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 31, i32 1) nounwind readnone
+  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_u32_constant_fold_test_13
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_u32_constant_fold_test_13(i32 addrspace(1)* %out) nounwind {
+  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 131070, i32 16, i32 16) nounwind readnone
+  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_u32_constant_fold_test_14
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 40
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_u32_constant_fold_test_14(i32 addrspace(1)* %out) nounwind {
+  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 2, i32 30) nounwind readnone
+  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_u32_constant_fold_test_15
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 10
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_u32_constant_fold_test_15(i32 addrspace(1)* %out) nounwind {
+  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 4, i32 28) nounwind readnone
+  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_u32_constant_fold_test_16
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_u32_constant_fold_test_16(i32 addrspace(1)* %out) nounwind {
+  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 4294967295, i32 1, i32 7) nounwind readnone
+  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_u32_constant_fold_test_17
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_u32_constant_fold_test_17(i32 addrspace(1)* %out) nounwind {
+  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 255, i32 1, i32 31) nounwind readnone
+  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_u32_constant_fold_test_18
+; SI-NOT: BFE
+; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
+; SI: BUFFER_STORE_DWORD [[VREG]],
+; SI: S_ENDPGM
+; EG-NOT: BFE
+define void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) nounwind {
+  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 255, i32 31, i32 1) nounwind readnone
+  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.imad24.ll b/test/CodeGen/R600/llvm.AMDGPU.imad24.ll
new file mode 100644
index 0000000..95795ea
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.imad24.ll
@@ -0,0 +1,21 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=CM -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
+; XUN: llc -march=r600 -mcpu=r600 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
+; XUN: llc -march=r600 -mcpu=r770 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
+
+; FIXME: Store of i32 seems to be broken pre-EG somehow?
+
+declare i32 @llvm.AMDGPU.imad24(i32, i32, i32) nounwind readnone
+
+; FUNC-LABEL: @test_imad24
+; SI: V_MAD_I32_I24
+; CM: MULADD_INT24
+; R600: MULLO_INT
+; R600: ADD_INT
+define void @test_imad24(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind {
+  %mad = call i32 @llvm.AMDGPU.imad24(i32 %src0, i32 %src1, i32 %src2) nounwind readnone
+  store i32 %mad, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
diff --git a/test/CodeGen/R600/llvm.AMDGPU.imul24.ll b/test/CodeGen/R600/llvm.AMDGPU.imul24.ll
new file mode 100644
index 0000000..8ee3520
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.imul24.ll
@@ -0,0 +1,15 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=CM -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
+
+declare i32 @llvm.AMDGPU.imul24(i32, i32) nounwind readnone
+
+; FUNC-LABEL: @test_imul24
+; SI: V_MUL_I32_I24
+; CM: MUL_INT24
+; R600: MULLO_INT
+define void @test_imul24(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
+  %mul = call i32 @llvm.AMDGPU.imul24(i32 %src0, i32 %src1) nounwind readnone
+  store i32 %mul, i32 addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.umad24.ll b/test/CodeGen/R600/llvm.AMDGPU.umad24.ll
new file mode 100644
index 0000000..afdfb18
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.umad24.ll
@@ -0,0 +1,19 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; XUN: llc -march=r600 -mcpu=r600 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
+; XUN: llc -march=r600 -mcpu=rv770 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
+
+declare i32 @llvm.AMDGPU.umad24(i32, i32, i32) nounwind readnone
+
+; FUNC-LABEL: @test_umad24
+; SI: V_MAD_U32_U24
+; EG: MULADD_UINT24
+; R600: MULLO_UINT
+; R600: ADD_INT
+define void @test_umad24(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind {
+  %mad = call i32 @llvm.AMDGPU.umad24(i32 %src0, i32 %src1, i32 %src2) nounwind readnone
+  store i32 %mad, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
diff --git a/test/CodeGen/R600/llvm.AMDGPU.umul24.ll b/test/CodeGen/R600/llvm.AMDGPU.umul24.ll
new file mode 100644
index 0000000..72a3602
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.umul24.ll
@@ -0,0 +1,17 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; XUN: llc -march=r600 -mcpu=r600 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
+; XUN: llc -march=r600 -mcpu=r770 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
+
+declare i32 @llvm.AMDGPU.umul24(i32, i32) nounwind readnone
+
+; FUNC-LABEL: @test_umul24
+; SI: V_MUL_U32_U24
+; R600: MUL_UINT24
+; R600: MULLO_UINT
+define void @test_umul24(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
+  %mul = call i32 @llvm.AMDGPU.umul24(i32 %src0, i32 %src1) nounwind readnone
+  store i32 %mul, i32 addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/llvm.SI.tbuffer.store.ll b/test/CodeGen/R600/llvm.SI.tbuffer.store.ll
index 569efb6..740581a 100644
--- a/test/CodeGen/R600/llvm.SI.tbuffer.store.ll
+++ b/test/CodeGen/R600/llvm.SI.tbuffer.store.ll
@@ -1,7 +1,7 @@
 ;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
 
 ;CHECK-LABEL: @test1
-;CHECK: TBUFFER_STORE_FORMAT_XYZW {{v\[[0-9]+:[0-9]+\]}}, 32, -1, 0, -1, 0, 14, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
+;CHECK: TBUFFER_STORE_FORMAT_XYZW {{v\[[0-9]+:[0-9]+\]}}, 0x20, -1, 0, -1, 0, 14, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
 define void @test1(i32 %a1, i32 %vaddr) #0 {
     %vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
     call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
@@ -11,7 +11,7 @@ define void @test1(i32 %a1, i32 %vaddr) #0 {
 }
 
 ;CHECK-LABEL: @test2
-;CHECK: TBUFFER_STORE_FORMAT_XYZ {{v\[[0-9]+:[0-9]+\]}}, 24, -1, 0, -1, 0, 13, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
+;CHECK: TBUFFER_STORE_FORMAT_XYZ {{v\[[0-9]+:[0-9]+\]}}, 0x18, -1, 0, -1, 0, 13, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
 define void @test2(i32 %a1, i32 %vaddr) #0 {
     %vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
     call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
@@ -21,7 +21,7 @@ define void @test2(i32 %a1, i32 %vaddr) #0 {
 }
 
 ;CHECK-LABEL: @test3
-;CHECK: TBUFFER_STORE_FORMAT_XY {{v\[[0-9]+:[0-9]+\]}}, 16, -1, 0, -1, 0, 11, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
+;CHECK: TBUFFER_STORE_FORMAT_XY {{v\[[0-9]+:[0-9]+\]}}, 0x10, -1, 0, -1, 0, 11, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
 define void @test3(i32 %a1, i32 %vaddr) #0 {
     %vdata = insertelement <2 x i32> undef, i32 %a1, i32 0
     call void @llvm.SI.tbuffer.store.v2i32(<16 x i8> undef, <2 x i32> %vdata,
@@ -31,7 +31,7 @@ define void @test3(i32 %a1, i32 %vaddr) #0 {
 }
 
 ;CHECK-LABEL: @test4
-;CHECK: TBUFFER_STORE_FORMAT_X {{v[0-9]+}}, 8, -1, 0, -1, 0, 4, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
+;CHECK: TBUFFER_STORE_FORMAT_X {{v[0-9]+}}, 0x8, -1, 0, -1, 0, 4, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
 define void @test4(i32 %vdata, i32 %vaddr) #0 {
     call void @llvm.SI.tbuffer.store.i32(<16 x i8> undef, i32 %vdata,
         i32 1, i32 %vaddr, i32 0, i32 8, i32 4, i32 4, i32 1, i32 0, i32 1,
diff --git a/test/CodeGen/R600/llvm.cos.ll b/test/CodeGen/R600/llvm.cos.ll
index aaf2305..9e7a4de 100644
--- a/test/CodeGen/R600/llvm.cos.ll
+++ b/test/CodeGen/R600/llvm.cos.ll
@@ -1,19 +1,40 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -check-prefix=EG -check-prefix=FUNC
+;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s -check-prefix=SI -check-prefix=FUNC
 
-;CHECK: MULADD_IEEE *
-;CHECK: FRACT *
-;CHECK: ADD *
-;CHECK: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+;FUNC-LABEL: test
+;EG: MULADD_IEEE *
+;EG: FRACT *
+;EG: ADD *
+;EG: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+;EG-NOT: COS
+;SI: V_COS_F32
+;SI-NOT: V_COS_F32
 
-define void @test(<4 x float> inreg %reg0) #0 {
-   %r0 = extractelement <4 x float> %reg0, i32 0
-   %r1 = call float @llvm.cos.f32(float %r0)
-   %vec = insertelement <4 x float> undef, float %r1, i32 0
-   call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
+define void @test(float addrspace(1)* %out, float %x) #1 {
+   %cos = call float @llvm.cos.f32(float %x)
+   store float %cos, float addrspace(1)* %out
+   ret void
+}
+
+;FUNC-LABEL: testv
+;EG: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+;EG: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+;EG: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+;EG: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+;EG-NOT: COS
+;SI: V_COS_F32
+;SI: V_COS_F32
+;SI: V_COS_F32
+;SI: V_COS_F32
+;SI-NOT: V_COS_F32
+
+define void @testv(<4 x float> addrspace(1)* %out, <4 x float> inreg %vx) #1 {
+   %cos = call <4 x float> @llvm.cos.v4f32(<4 x float> %vx)
+   store <4 x float> %cos, <4 x float> addrspace(1)* %out
    ret void
 }
 
 declare float @llvm.cos.f32(float) readnone
-declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+declare <4 x float> @llvm.cos.v4f32(<4 x float>) readnone
 
 attributes #0 = { "ShaderType"="0" }
diff --git a/test/CodeGen/R600/llvm.rint.f64.ll b/test/CodeGen/R600/llvm.rint.f64.ll
new file mode 100644
index 0000000..a7a909a
--- /dev/null
+++ b/test/CodeGen/R600/llvm.rint.f64.ll
@@ -0,0 +1,37 @@
+; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
+
+; FUNC-LABEL: @f64
+; CI: V_RNDNE_F64_e32
+define void @f64(double addrspace(1)* %out, double %in) {
+entry:
+  %0 = call double @llvm.rint.f64(double %in)
+  store double %0, double addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @v2f64
+; CI: V_RNDNE_F64_e32
+; CI: V_RNDNE_F64_e32
+define void @v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) {
+entry:
+  %0 = call <2 x double> @llvm.rint.v2f64(<2 x double> %in)
+  store <2 x double> %0, <2 x double> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @v4f64
+; CI: V_RNDNE_F64_e32
+; CI: V_RNDNE_F64_e32
+; CI: V_RNDNE_F64_e32
+; CI: V_RNDNE_F64_e32
+define void @v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) {
+entry:
+  %0 = call <4 x double> @llvm.rint.v4f64(<4 x double> %in)
+  store <4 x double> %0, <4 x double> addrspace(1)* %out
+  ret void
+}
+
+
+declare double @llvm.rint.f64(double) #0
+declare <2 x double> @llvm.rint.v2f64(<2 x double>) #0
+declare <4 x double> @llvm.rint.v4f64(<4 x double>) #0
diff --git a/test/CodeGen/R600/llvm.rint.ll b/test/CodeGen/R600/llvm.rint.ll
index c174b33..db8352f 100644
--- a/test/CodeGen/R600/llvm.rint.ll
+++ b/test/CodeGen/R600/llvm.rint.ll
@@ -1,10 +1,10 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
-; R600-CHECK: @f32
-; R600-CHECK: RNDNE
-; SI-CHECK: @f32
-; SI-CHECK: V_RNDNE_F32_e32
+; FUNC-LABEL: @f32
+; R600: RNDNE
+
+; SI: V_RNDNE_F32_e32
 define void @f32(float addrspace(1)* %out, float %in) {
 entry:
   %0 = call float @llvm.rint.f32(float %in)
@@ -12,12 +12,12 @@ entry:
   ret void
 }
 
-; R600-CHECK: @v2f32
-; R600-CHECK: RNDNE
-; R600-CHECK: RNDNE
-; SI-CHECK: @v2f32
-; SI-CHECK: V_RNDNE_F32_e32
-; SI-CHECK: V_RNDNE_F32_e32
+; FUNC-LABEL: @v2f32
+; R600: RNDNE
+; R600: RNDNE
+
+; SI: V_RNDNE_F32_e32
+; SI: V_RNDNE_F32_e32
 define void @v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
 entry:
   %0 = call <2 x float> @llvm.rint.v2f32(<2 x float> %in)
@@ -25,16 +25,16 @@ entry:
   ret void
 }
 
-; R600-CHECK: @v4f32
-; R600-CHECK: RNDNE
-; R600-CHECK: RNDNE
-; R600-CHECK: RNDNE
-; R600-CHECK: RNDNE
-; SI-CHECK: @v4f32
-; SI-CHECK: V_RNDNE_F32_e32
-; SI-CHECK: V_RNDNE_F32_e32
-; SI-CHECK: V_RNDNE_F32_e32
-; SI-CHECK: V_RNDNE_F32_e32
+; FUNC-LABEL: @v4f32
+; R600: RNDNE
+; R600: RNDNE
+; R600: RNDNE
+; R600: RNDNE
+
+; SI: V_RNDNE_F32_e32
+; SI: V_RNDNE_F32_e32
+; SI: V_RNDNE_F32_e32
+; SI: V_RNDNE_F32_e32
 define void @v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
 entry:
   %0 = call <4 x float> @llvm.rint.v4f32(<4 x float> %in)
@@ -42,13 +42,8 @@ entry:
   ret void
 }
 
-; Function Attrs: nounwind readonly
 declare float @llvm.rint.f32(float) #0
-
-; Function Attrs: nounwind readonly
 declare <2 x float> @llvm.rint.v2f32(<2 x float>) #0
-
-; Function Attrs: nounwind readonly
 declare <4 x float> @llvm.rint.v4f32(<4 x float>) #0
 
 attributes #0 = { nounwind readonly }
diff --git a/test/CodeGen/R600/llvm.sin.ll b/test/CodeGen/R600/llvm.sin.ll
index 9eb9983..41c363c 100644
--- a/test/CodeGen/R600/llvm.sin.ll
+++ b/test/CodeGen/R600/llvm.sin.ll
@@ -1,19 +1,41 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -check-prefix=EG -check-prefix=FUNC
+;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s -check-prefix=SI -check-prefix=FUNC
 
-;CHECK: MULADD_IEEE *
-;CHECK: FRACT *
-;CHECK: ADD *
-;CHECK: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+;FUNC-LABEL: test
+;EG: MULADD_IEEE *
+;EG: FRACT *
+;EG: ADD *
+;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+;EG-NOT: SIN
+;SI: V_MUL_F32
+;SI: V_SIN_F32
+;SI-NOT: V_SIN_F32
 
-define void @test(<4 x float> inreg %reg0) #0 {
-   %r0 = extractelement <4 x float> %reg0, i32 0
-   %r1 = call float @llvm.sin.f32( float %r0)
-   %vec = insertelement <4 x float> undef, float %r1, i32 0
-   call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
+define void @test(float addrspace(1)* %out, float %x) #1 {
+   %sin = call float @llvm.sin.f32(float %x)
+   store float %sin, float addrspace(1)* %out
+   ret void
+}
+
+;FUNC-LABEL: testv
+;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+;EG-NOT: SIN
+;SI: V_SIN_F32
+;SI: V_SIN_F32
+;SI: V_SIN_F32
+;SI: V_SIN_F32
+;SI-NOT: V_SIN_F32
+
+define void @testv(<4 x float> addrspace(1)* %out, <4 x float> %vx) #1 {
+   %sin = call <4 x float> @llvm.sin.v4f32( <4 x float> %vx)
+   store <4 x float> %sin, <4 x float> addrspace(1)* %out
    ret void
 }
 
 declare float @llvm.sin.f32(float) readnone
-declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+declare <4 x float> @llvm.sin.v4f32(<4 x float>) readnone
 
 attributes #0 = { "ShaderType"="0" }
diff --git a/test/CodeGen/R600/llvm.sqrt.ll b/test/CodeGen/R600/llvm.sqrt.ll
index 0d0d186..4eee37f 100644
--- a/test/CodeGen/R600/llvm.sqrt.ll
+++ b/test/CodeGen/R600/llvm.sqrt.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=r600 --mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
-; RUN: llc < %s -march=r600 --mcpu=SI | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc < %s -march=r600 --mcpu=SI -verify-machineinstrs| FileCheck %s --check-prefix=SI-CHECK
 
 ; R600-CHECK-LABEL: @sqrt_f32
 ; R600-CHECK: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[2].Z
diff --git a/test/CodeGen/R600/load-i1.ll b/test/CodeGen/R600/load-i1.ll
new file mode 100644
index 0000000..9ba81b8
--- /dev/null
+++ b/test/CodeGen/R600/load-i1.ll
@@ -0,0 +1,107 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
+
+
+; SI-LABEL: @global_copy_i1_to_i1
+; SI: BUFFER_LOAD_UBYTE
+; SI: V_AND_B32_e32 v{{[0-9]+}}, 1
+; SI: BUFFER_STORE_BYTE
+; SI: S_ENDPGM
+define void @global_copy_i1_to_i1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
+  %load = load i1 addrspace(1)* %in
+  store i1 %load, i1 addrspace(1)* %out, align 1
+  ret void
+}
+
+; SI-LABEL: @global_sextload_i1_to_i32
+; XSI: BUFFER_LOAD_BYTE
+; SI: BUFFER_STORE_DWORD
+; SI: S_ENDPGM
+define void @global_sextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
+  %load = load i1 addrspace(1)* %in
+  %ext = sext i1 %load to i32
+  store i32 %ext, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: @global_zextload_i1_to_i32
+; SI: BUFFER_LOAD_UBYTE
+; SI: BUFFER_STORE_DWORD
+; SI: S_ENDPGM
+define void @global_zextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
+  %load = load i1 addrspace(1)* %in
+  %ext = zext i1 %load to i32
+  store i32 %ext, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: @global_sextload_i1_to_i64
+; XSI: BUFFER_LOAD_BYTE
+; SI: BUFFER_STORE_DWORDX2
+; SI: S_ENDPGM
+define void @global_sextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
+  %load = load i1 addrspace(1)* %in
+  %ext = sext i1 %load to i64
+  store i64 %ext, i64 addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: @global_zextload_i1_to_i64
+; SI: BUFFER_LOAD_UBYTE
+; SI: BUFFER_STORE_DWORDX2
+; SI: S_ENDPGM
+define void @global_zextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
+  %load = load i1 addrspace(1)* %in
+  %ext = zext i1 %load to i64
+  store i64 %ext, i64 addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: @i1_arg
+; SI: BUFFER_LOAD_UBYTE
+; SI: V_AND_B32_e32
+; SI: BUFFER_STORE_BYTE
+; SI: S_ENDPGM
+define void @i1_arg(i1 addrspace(1)* %out, i1 %x) nounwind {
+  store i1 %x, i1 addrspace(1)* %out, align 1
+  ret void
+}
+
+; SI-LABEL: @i1_arg_zext_i32
+; SI: BUFFER_LOAD_UBYTE
+; SI: BUFFER_STORE_DWORD
+; SI: S_ENDPGM
+define void @i1_arg_zext_i32(i32 addrspace(1)* %out, i1 %x) nounwind {
+  %ext = zext i1 %x to i32
+  store i32 %ext, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: @i1_arg_zext_i64
+; SI: BUFFER_LOAD_UBYTE
+; SI: BUFFER_STORE_DWORDX2
+; SI: S_ENDPGM
+define void @i1_arg_zext_i64(i64 addrspace(1)* %out, i1 %x) nounwind {
+  %ext = zext i1 %x to i64
+  store i64 %ext, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; SI-LABEL: @i1_arg_sext_i32
+; XSI: BUFFER_LOAD_BYTE
+; SI: BUFFER_STORE_DWORD
+; SI: S_ENDPGM
+define void @i1_arg_sext_i32(i32 addrspace(1)* %out, i1 %x) nounwind {
+  %ext = sext i1 %x to i32
+  store i32 %ext, i32addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: @i1_arg_sext_i64
+; XSI: BUFFER_LOAD_BYTE
+; SI: BUFFER_STORE_DWORDX2
+; SI: S_ENDPGM
+define void @i1_arg_sext_i64(i64 addrspace(1)* %out, i1 %x) nounwind {
+  %ext = sext i1 %x to i64
+  store i64 %ext, i64 addrspace(1)* %out, align 8
+  ret void
+}
diff --git a/test/CodeGen/R600/local-64.ll b/test/CodeGen/R600/local-64.ll
index 87f18ae..c52b41b 100644
--- a/test/CodeGen/R600/local-64.ll
+++ b/test/CodeGen/R600/local-64.ll
@@ -1,7 +1,7 @@
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
 
 ; SI-LABEL: @local_i32_load
-; SI: DS_READ_B32 [[REG:v[0-9]+]], v{{[0-9]+}}, 28, [M0]
+; SI: DS_READ_B32 [[REG:v[0-9]+]], v{{[0-9]+}}, 0x1c, [M0]
 ; SI: BUFFER_STORE_DWORD [[REG]],
 define void @local_i32_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounwind {
   %gep = getelementptr i32 addrspace(3)* %in, i32 7
@@ -11,7 +11,7 @@ define void @local_i32_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounw
 }
 
 ; SI-LABEL: @local_i32_load_0_offset
-; SI: DS_READ_B32 [[REG:v[0-9]+]], v{{[0-9]+}}, 0, [M0]
+; SI: DS_READ_B32 [[REG:v[0-9]+]], v{{[0-9]+}}, 0x0, [M0]
 ; SI: BUFFER_STORE_DWORD [[REG]],
 define void @local_i32_load_0_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounwind {
   %val = load i32 addrspace(3)* %in, align 4
@@ -21,7 +21,7 @@ define void @local_i32_load_0_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %
 
 ; SI-LABEL: @local_i8_load_i16_max_offset
 ; SI-NOT: ADD
-; SI: DS_READ_U8 [[REG:v[0-9]+]], {{v[0-9]+}}, -1, [M0]
+; SI: DS_READ_U8 [[REG:v[0-9]+]], {{v[0-9]+}}, 0xffff, [M0]
 ; SI: BUFFER_STORE_BYTE [[REG]],
 define void @local_i8_load_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %in) nounwind {
   %gep = getelementptr i8 addrspace(3)* %in, i32 65535
@@ -31,9 +31,9 @@ define void @local_i8_load_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)
 }
 
 ; SI-LABEL: @local_i8_load_over_i16_max_offset
-; SI: S_ADD_I32 [[ADDR:s[0-9]+]], s{{[0-9]+}}, 65536
+; SI: S_ADD_I32 [[ADDR:s[0-9]+]], s{{[0-9]+}}, 0x10000
 ; SI: V_MOV_B32_e32 [[VREGADDR:v[0-9]+]], [[ADDR]]
-; SI: DS_READ_U8 [[REG:v[0-9]+]], [[VREGADDR]], 0, [M0]
+; SI: DS_READ_U8 [[REG:v[0-9]+]], [[VREGADDR]], 0x0, [M0]
 ; SI: BUFFER_STORE_BYTE [[REG]],
 define void @local_i8_load_over_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %in) nounwind {
   %gep = getelementptr i8 addrspace(3)* %in, i32 65536
@@ -44,7 +44,7 @@ define void @local_i8_load_over_i16_max_offset(i8 addrspace(1)* %out, i8 addrspa
 
 ; SI-LABEL: @local_i64_load
 ; SI-NOT: ADD
-; SI: DS_READ_B64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}}, 56, [M0]
+; SI: DS_READ_B64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}}, 0x38, [M0]
 ; SI: BUFFER_STORE_DWORDX2 [[REG]],
 define void @local_i64_load(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounwind {
   %gep = getelementptr i64 addrspace(3)* %in, i32 7
@@ -54,7 +54,7 @@ define void @local_i64_load(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounw
 }
 
 ; SI-LABEL: @local_i64_load_0_offset
-; SI: DS_READ_B64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0, [M0]
+; SI: DS_READ_B64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0x0, [M0]
 ; SI: BUFFER_STORE_DWORDX2 [[REG]],
 define void @local_i64_load_0_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounwind {
   %val = load i64 addrspace(3)* %in, align 8
@@ -64,7 +64,7 @@ define void @local_i64_load_0_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %
 
 ; SI-LABEL: @local_f64_load
 ; SI-NOT: ADD
-; SI: DS_READ_B64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}}, 56, [M0]
+; SI: DS_READ_B64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}}, 0x38, [M0]
 ; SI: BUFFER_STORE_DWORDX2 [[REG]],
 define void @local_f64_load(double addrspace(1)* %out, double addrspace(3)* %in) nounwind {
   %gep = getelementptr double addrspace(3)* %in, i32 7
@@ -74,7 +74,7 @@ define void @local_f64_load(double addrspace(1)* %out, double addrspace(3)* %in)
 }
 
 ; SI-LABEL: @local_f64_load_0_offset
-; SI: DS_READ_B64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0, [M0]
+; SI: DS_READ_B64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0x0, [M0]
 ; SI: BUFFER_STORE_DWORDX2 [[REG]],
 define void @local_f64_load_0_offset(double addrspace(1)* %out, double addrspace(3)* %in) nounwind {
   %val = load double addrspace(3)* %in, align 8
@@ -84,7 +84,7 @@ define void @local_f64_load_0_offset(double addrspace(1)* %out, double addrspace
 
 ; SI-LABEL: @local_i64_store
 ; SI-NOT: ADD
-; SI: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 56 [M0]
+; SI: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x38 [M0]
 define void @local_i64_store(i64 addrspace(3)* %out) nounwind {
   %gep = getelementptr i64 addrspace(3)* %out, i32 7
   store i64 5678, i64 addrspace(3)* %gep, align 8
@@ -93,7 +93,7 @@ define void @local_i64_store(i64 addrspace(3)* %out) nounwind {
 
 ; SI-LABEL: @local_i64_store_0_offset
 ; SI-NOT: ADD
-; SI: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0 [M0]
+; SI: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x0 [M0]
 define void @local_i64_store_0_offset(i64 addrspace(3)* %out) nounwind {
   store i64 1234, i64 addrspace(3)* %out, align 8
   ret void
@@ -101,7 +101,7 @@ define void @local_i64_store_0_offset(i64 addrspace(3)* %out) nounwind {
 
 ; SI-LABEL: @local_f64_store
 ; SI-NOT: ADD
-; SI: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 56 [M0]
+; SI: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x38 [M0]
 define void @local_f64_store(double addrspace(3)* %out) nounwind {
   %gep = getelementptr double addrspace(3)* %out, i32 7
   store double 16.0, double addrspace(3)* %gep, align 8
@@ -109,7 +109,7 @@ define void @local_f64_store(double addrspace(3)* %out) nounwind {
 }
 
 ; SI-LABEL: @local_f64_store_0_offset
-; SI: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0 [M0]
+; SI: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x0 [M0]
 define void @local_f64_store_0_offset(double addrspace(3)* %out) nounwind {
   store double 20.0, double addrspace(3)* %out, align 8
   ret void
@@ -117,8 +117,8 @@ define void @local_f64_store_0_offset(double addrspace(3)* %out) nounwind {
 
 ; SI-LABEL: @local_v2i64_store
 ; SI-NOT: ADD
-; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 120 [M0]
-; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 112 [M0]
+; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x78 [M0]
+; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x70 [M0]
 define void @local_v2i64_store(<2 x i64> addrspace(3)* %out) nounwind {
   %gep = getelementptr <2 x i64> addrspace(3)* %out, i32 7
   store <2 x i64> <i64 5678, i64 5678>, <2 x i64> addrspace(3)* %gep, align 16
@@ -127,8 +127,8 @@ define void @local_v2i64_store(<2 x i64> addrspace(3)* %out) nounwind {
 
 ; SI-LABEL: @local_v2i64_store_0_offset
 ; SI-NOT: ADD
-; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 8 [M0]
-; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0 [M0]
+; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x8 [M0]
+; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x0 [M0]
 define void @local_v2i64_store_0_offset(<2 x i64> addrspace(3)* %out) nounwind {
   store <2 x i64> <i64 1234, i64 1234>, <2 x i64> addrspace(3)* %out, align 16
   ret void
@@ -136,10 +136,10 @@ define void @local_v2i64_store_0_offset(<2 x i64> addrspace(3)* %out) nounwind {
 
 ; SI-LABEL: @local_v4i64_store
 ; SI-NOT: ADD
-; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 248 [M0]
-; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 240 [M0]
-; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 232 [M0]
-; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 224 [M0]
+; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0xf8 [M0]
+; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0xf0 [M0]
+; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0xe8 [M0]
+; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0xe0 [M0]
 define void @local_v4i64_store(<4 x i64> addrspace(3)* %out) nounwind {
   %gep = getelementptr <4 x i64> addrspace(3)* %out, i32 7
   store <4 x i64> <i64 5678, i64 5678, i64 5678, i64 5678>, <4 x i64> addrspace(3)* %gep, align 16
@@ -148,10 +148,10 @@ define void @local_v4i64_store(<4 x i64> addrspace(3)* %out) nounwind {
 
 ; SI-LABEL: @local_v4i64_store_0_offset
 ; SI-NOT: ADD
-; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 24 [M0]
-; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 16 [M0]
-; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 8 [M0]
-; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0 [M0]
+; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x18 [M0]
+; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x10 [M0]
+; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x8 [M0]
+; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x0 [M0]
 define void @local_v4i64_store_0_offset(<4 x i64> addrspace(3)* %out) nounwind {
   store <4 x i64> <i64 1234, i64 1234, i64 1234, i64 1234>, <4 x i64> addrspace(3)* %out, align 16
   ret void
diff --git a/test/CodeGen/R600/local-memory-two-objects.ll b/test/CodeGen/R600/local-memory-two-objects.ll
index 616000d..1e42285 100644
--- a/test/CodeGen/R600/local-memory-two-objects.ll
+++ b/test/CodeGen/R600/local-memory-two-objects.ll
@@ -28,8 +28,8 @@
 ; constant offsets.
 ; EG-CHECK: LDS_READ_RET {{[*]*}} OQAP, {{PV|T}}[[ADDRR:[0-9]*\.[XYZW]]]
 ; EG-CHECK-NOT: LDS_READ_RET {{[*]*}} OQAP, T[[ADDRR]]
-; SI-CHECK: DS_READ_B32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]], 16
-; SI-CHECK: DS_READ_B32 {{v[0-9]+}}, [[ADDRR]], 0,
+; SI-CHECK: DS_READ_B32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]], 0x10
+; SI-CHECK: DS_READ_B32 {{v[0-9]+}}, [[ADDRR]], 0x0,
 
 define void @local_memory_two_objects(i32 addrspace(1)* %out) {
 entry:
diff --git a/test/CodeGen/R600/loop-idiom.ll b/test/CodeGen/R600/loop-idiom.ll
index 8a9cba2..128f661 100644
--- a/test/CodeGen/R600/loop-idiom.ll
+++ b/test/CodeGen/R600/loop-idiom.ll
@@ -1,5 +1,5 @@
 ; RUN: opt -basicaa -loop-idiom -S < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600 --check-prefix=FUNC %s
-; RUN: opt -basicaa -loop-idiom -S < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI --check-prefix=FUNC %s
+; RUN: opt -basicaa -loop-idiom -S < %s -march=r600 -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s
 
 target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
 target triple = "r600--"
diff --git a/test/CodeGen/R600/mad_int24.ll b/test/CodeGen/R600/mad_int24.ll
index df063ec..abb5290 100644
--- a/test/CodeGen/R600/mad_int24.ll
+++ b/test/CodeGen/R600/mad_int24.ll
@@ -1,12 +1,15 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK
-; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM-CHECK
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC
+; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM --check-prefix=FUNC
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC
 
-; EG-CHECK: @i32_mad24
+; FUNC-LABEL: @i32_mad24
 ; Signed 24-bit multiply is not supported on pre-Cayman GPUs.
-; EG-CHECK: MULLO_INT
-; CM-CHECK: MULADD_INT24 {{[ *]*}}T{{[0-9].[XYZW]}}, KC0[2].Z, KC0[2].W, KC0[3].X
-; SI-CHECK: V_MAD_I32_I24
+; EG: MULLO_INT
+; Make sure we aren't masking the inputs.
+; CM-NOT: AND
+; CM: MULADD_INT24
+; SI-NOT: AND
+; SI: V_MAD_I32_I24
 define void @i32_mad24(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
 entry:
   %0 = shl i32 %a, 8
diff --git a/test/CodeGen/R600/mad_uint24.ll b/test/CodeGen/R600/mad_uint24.ll
index 3dcadc9..0f0893b 100644
--- a/test/CodeGen/R600/mad_uint24.ll
+++ b/test/CodeGen/R600/mad_uint24.ll
@@ -1,11 +1,10 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK
-; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG-CHECK
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC
+; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC
 
-; EG-CHECK-LABEL: @u32_mad24
-; EG-CHECK: MULADD_UINT24 {{[* ]*}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, KC0[2].W, KC0[3].X
-; SI-CHECK-LABEL: @u32_mad24
-; SI-CHECK: V_MAD_U32_U24
+; FUNC-LABEL: @u32_mad24
+; EG: MULADD_UINT24
+; SI: V_MAD_U32_U24
 
 define void @u32_mad24(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
 entry:
@@ -19,18 +18,14 @@ entry:
   ret void
 }
 
-; EG-CHECK-LABEL: @i16_mad24
-; EG-CHECK-DAG: VTX_READ_16 [[A:T[0-9]\.X]], T{{[0-9]}}.X, 40
-; EG-CHECK-DAG: VTX_READ_16 [[B:T[0-9]\.X]], T{{[0-9]}}.X, 44
-; EG-CHECK-DAG: VTX_READ_16 [[C:T[0-9]\.X]], T{{[0-9]}}.X, 48
+; FUNC-LABEL: @i16_mad24
 ; The order of A and B does not matter.
-; EG-CHECK: MULADD_UINT24 {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]], [[A]], [[B]], [[C]]
+; EG: MULADD_UINT24 {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]]
 ; The result must be sign-extended
-; EG-CHECK: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x
-; EG-CHECK: 16
-; SI-CHECK-LABEL: @i16_mad24
-; SI-CHECK: V_MAD_U32_U24 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
-; SI-CHECK: V_BFE_I32 v{{[0-9]}}, [[MAD]], 0, 16
+; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x
+; EG: 16
+; SI: V_MAD_U32_U24 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
+; SI: V_BFE_I32 v{{[0-9]}}, [[MAD]], 0, 16
 
 define void @i16_mad24(i32 addrspace(1)* %out, i16 %a, i16 %b, i16 %c) {
 entry:
@@ -41,18 +36,13 @@ entry:
   ret void
 }
 
-; EG-CHECK-LABEL: @i8_mad24
-; EG-CHECK-DAG: VTX_READ_8 [[A:T[0-9]\.X]], T{{[0-9]}}.X, 40
-; EG-CHECK-DAG: VTX_READ_8 [[B:T[0-9]\.X]], T{{[0-9]}}.X, 44
-; EG-CHECK-DAG: VTX_READ_8 [[C:T[0-9]\.X]], T{{[0-9]}}.X, 48
-; The order of A and B does not matter.
-; EG-CHECK: MULADD_UINT24 {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]], [[A]], [[B]], [[C]]
+; FUNC-LABEL: @i8_mad24
+; EG: MULADD_UINT24 {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]]
 ; The result must be sign-extended
-; EG-CHECK: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x
-; EG-CHECK: 8
-; SI-CHECK-LABEL: @i8_mad24
-; SI-CHECK: V_MAD_U32_U24 [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
-; SI-CHECK: V_BFE_I32 v{{[0-9]}}, [[MUL]], 0, 8
+; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x
+; EG: 8
+; SI: V_MAD_U32_U24 [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
+; SI: V_BFE_I32 v{{[0-9]}}, [[MUL]], 0, 8
 
 define void @i8_mad24(i32 addrspace(1)* %out, i8 %a, i8 %b, i8 %c) {
 entry:
@@ -62,3 +52,24 @@ entry:
   store i32 %2, i32 addrspace(1)* %out
   ret void
 }
+
+; This tests for a bug where the mad_u24 pattern matcher would call
+; SimplifyDemandedBits on the first operand of the mul instruction
+; assuming that the pattern would be matched to a 24-bit mad.  This
+; led to some instructions being incorrectly erased when the entire
+; 24-bit mad pattern wasn't being matched.
+
+; Check that the select instruction is not deleted.
+; FUNC-LABEL: @i24_i32_i32_mad
+; EG: CNDE_INT
+; SI: V_CNDMASK
+define void @i24_i32_i32_mad(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) {
+entry:
+  %0 = ashr i32 %a, 8
+  %1 = icmp ne i32 %c, 0
+  %2 = select i1 %1, i32 %0, i32 34
+  %3 = mul i32 %2, %c
+  %4 = add i32 %3, %d
+  store i32 %4, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/mubuf.ll b/test/CodeGen/R600/mubuf.ll
index 2d5ddeb..f465d3d 100644
--- a/test/CodeGen/R600/mubuf.ll
+++ b/test/CodeGen/R600/mubuf.ll
@@ -6,7 +6,7 @@
 
 ; MUBUF load with an immediate byte offset that fits into 12-bits
 ; CHECK-LABEL: @mubuf_load0
-; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 4 ; encoding: [0x04,0x80
+; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0x4 ; encoding: [0x04,0x80
 define void @mubuf_load0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
 entry:
   %0 = getelementptr i32 addrspace(1)* %in, i64 1
@@ -17,7 +17,7 @@ entry:
 
 ; MUBUF load with the largest possible immediate offset
 ; CHECK-LABEL: @mubuf_load1
-; CHECK: BUFFER_LOAD_UBYTE v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 4095 ; encoding: [0xff,0x8f
+; CHECK: BUFFER_LOAD_UBYTE v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0xfff ; encoding: [0xff,0x8f
 define void @mubuf_load1(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
 entry:
   %0 = getelementptr i8 addrspace(1)* %in, i64 4095
@@ -28,7 +28,7 @@ entry:
 
 ; MUBUF load with an immediate byte offset that doesn't fit into 12-bits
 ; CHECK-LABEL: @mubuf_load2
-; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0 ; encoding: [0x00,0x80
+; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0x0 ; encoding: [0x00,0x80
 define void @mubuf_load2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
 entry:
   %0 = getelementptr i32 addrspace(1)* %in, i64 1024
@@ -40,7 +40,7 @@ entry:
 ; MUBUF load with a 12-bit immediate offset and a register offset
 ; CHECK-LABEL: @mubuf_load3
 ; CHECK-NOT: ADD
-; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 4 ; encoding: [0x04,0x80
+; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0x4 ; encoding: [0x04,0x80
 define void @mubuf_load3(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i64 %offset) {
 entry:
   %0 = getelementptr i32 addrspace(1)* %in, i64 %offset
@@ -56,7 +56,7 @@ entry:
 
 ; MUBUF store with an immediate byte offset that fits into 12-bits
 ; CHECK-LABEL: @mubuf_store0
-; CHECK: BUFFER_STORE_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 4 ; encoding: [0x04,0x80
+; CHECK: BUFFER_STORE_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0x4 ; encoding: [0x04,0x80
 define void @mubuf_store0(i32 addrspace(1)* %out) {
 entry:
   %0 = getelementptr i32 addrspace(1)* %out, i64 1
@@ -66,7 +66,7 @@ entry:
 
 ; MUBUF store with the largest possible immediate offset
 ; CHECK-LABEL: @mubuf_store1
-; CHECK: BUFFER_STORE_BYTE v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 4095 ; encoding: [0xff,0x8f
+; CHECK: BUFFER_STORE_BYTE v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0xfff ; encoding: [0xff,0x8f
 
 define void @mubuf_store1(i8 addrspace(1)* %out) {
 entry:
@@ -77,7 +77,7 @@ entry:
 
 ; MUBUF store with an immediate byte offset that doesn't fit into 12-bits
 ; CHECK-LABEL: @mubuf_store2
-; CHECK: BUFFER_STORE_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0 ; encoding: [0x00,0x80
+; CHECK: BUFFER_STORE_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0x0 ; encoding: [0x00,0x80
 define void @mubuf_store2(i32 addrspace(1)* %out) {
 entry:
   %0 = getelementptr i32 addrspace(1)* %out, i64 1024
@@ -88,7 +88,7 @@ entry:
 ; MUBUF store with a 12-bit immediate offset and a register offset
 ; CHECK-LABEL: @mubuf_store3
 ; CHECK-NOT: ADD
-; CHECK: BUFFER_STORE_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 4 ; encoding: [0x04,0x80
+; CHECK: BUFFER_STORE_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0x4 ; encoding: [0x04,0x80
 define void @mubuf_store3(i32 addrspace(1)* %out, i64 %offset) {
 entry:
   %0 = getelementptr i32 addrspace(1)* %out, i64 %offset
diff --git a/test/CodeGen/R600/mul.ll b/test/CodeGen/R600/mul.ll
index e176148..6ed754c 100644
--- a/test/CodeGen/R600/mul.ll
+++ b/test/CodeGen/R600/mul.ll
@@ -1,15 +1,14 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
-; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG %s --check-prefix=FUNC
+; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
 
 ; mul24 and mad24 are affected
 
-;EG-CHECK: @test2
-;EG-CHECK: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;FUNC-LABEL: @test2
+;EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-;SI-CHECK: @test2
-;SI-CHECK: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 
 define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
@@ -20,17 +19,16 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   ret void
 }
 
-;EG-CHECK: @test4
-;EG-CHECK: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;FUNC-LABEL: @test4
+;EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-;SI-CHECK: @test4
-;SI-CHECK: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 
 define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
@@ -52,3 +50,32 @@ define void @trunc_i64_mul_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
   store i32 %trunc, i32 addrspace(1)* %out, align 8
   ret void
 }
+
+; This 64-bit multiply should just use MUL_HI and MUL_LO, since the top
+; 32-bits of both arguments are sign bits.
+; FUNC-LABEL: @mul64_sext_c
+; EG-DAG: MULLO_INT
+; EG-DAG: MULHI_INT
+; SI-DAG: V_MUL_LO_I32
+; SI-DAG: V_MUL_HI_I32
+define void @mul64_sext_c(i64 addrspace(1)* %out, i32 %in) {
+entry:
+  %0 = sext i32 %in to i64
+  %1 = mul i64 %0, 80
+  store i64 %1, i64 addrspace(1)* %out
+  ret void
+}
+
+; A standard 64-bit multiply.  The expansion should be around 6 instructions.
+; It would be difficult to match the expansion correctly without writing
+; a really complicated list of FileCheck expressions.  I don't want
+; to confuse people who may 'break' this test with a correct optimization,
+; so this test just uses FUNC-LABEL to make sure the compiler does not
+; crash with a 'failed to select' error.
+; FUNC-LABEL: @mul64
+define void @mul64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
+entry:
+  %0 = mul i64 %a, %b
+  store i64 %0, i64 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/mul_int24.ll b/test/CodeGen/R600/mul_int24.ll
index 66a1a9e..046911b 100644
--- a/test/CodeGen/R600/mul_int24.ll
+++ b/test/CodeGen/R600/mul_int24.ll
@@ -1,12 +1,15 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK
-; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM-CHECK
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC
+; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM --check-prefix=FUNC
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC
 
-; EG-CHECK: @i32_mul24
+; FUNC-LABEL: @i32_mul24
 ; Signed 24-bit multiply is not supported on pre-Cayman GPUs.
-; EG-CHECK: MULLO_INT
-; CM-CHECK: MUL_INT24 {{[ *]*}}T{{[0-9].[XYZW]}}, KC0[2].Z, KC0[2].W
-; SI-CHECK: V_MUL_I32_I24
+; EG: MULLO_INT
+; Make sure we are not masking the inputs
+; CM-NOT: AND
+; CM: MUL_INT24
+; SI-NOT: AND
+; SI: V_MUL_I32_I24
 define void @i32_mul24(i32 addrspace(1)* %out, i32 %a, i32 %b) {
 entry:
   %0 = shl i32 %a, 8
diff --git a/test/CodeGen/R600/mul_uint24.ll b/test/CodeGen/R600/mul_uint24.ll
index a413961..419f275 100644
--- a/test/CodeGen/R600/mul_uint24.ll
+++ b/test/CodeGen/R600/mul_uint24.ll
@@ -1,11 +1,10 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK
-; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG-CHECK
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC
+; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC
 
-; EG-CHECK-LABEL: @u32_mul24
-; EG-CHECK: MUL_UINT24 {{[* ]*}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, KC0[2].W
-; SI-CHECK-LABEL: @u32_mul24
-; SI-CHECK: V_MUL_U32_U24
+; FUNC-LABEL: @u32_mul24
+; EG: MUL_UINT24 {{[* ]*}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, KC0[2].W
+; SI: V_MUL_U32_U24
 
 define void @u32_mul24(i32 addrspace(1)* %out, i32 %a, i32 %b) {
 entry:
@@ -18,17 +17,13 @@ entry:
   ret void
 }
 
-; EG-CHECK-LABEL: @i16_mul24
-; EG-CHECK-DAG: VTX_READ_16 [[A:T[0-9]\.X]], T{{[0-9]}}.X, 40
-; EG-CHECK-DAG: VTX_READ_16 [[B:T[0-9]\.X]], T{{[0-9]}}.X, 44
-; The order of A and B does not matter.
-; EG-CHECK: MUL_UINT24 {{[* ]*}}T{{[0-9]}}.[[MUL_CHAN:[XYZW]]], [[A]], [[B]]
+; FUNC-LABEL: @i16_mul24
+; EG: MUL_UINT24 {{[* ]*}}T{{[0-9]}}.[[MUL_CHAN:[XYZW]]]
 ; The result must be sign-extended
-; EG-CHECK: BFE_INT {{[* ]*}}T{{[0-9]}}.{{[XYZW]}}, PV.[[MUL_CHAN]], 0.0, literal.x
-; EG-CHECK: 16
-; SI-CHECK-LABEL: @i16_mul24
-; SI-CHECK: V_MUL_U32_U24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
-; SI-CHECK: V_BFE_I32 v{{[0-9]}}, [[MUL]], 0, 16,
+; EG: BFE_INT {{[* ]*}}T{{[0-9]}}.{{[XYZW]}}, PV.[[MUL_CHAN]], 0.0, literal.x
+; EG: 16
+; SI: V_MUL_U32_U24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
+; SI: V_BFE_I32 v{{[0-9]}}, [[MUL]], 0, 16,
 define void @i16_mul24(i32 addrspace(1)* %out, i16 %a, i16 %b) {
 entry:
   %0 = mul i16 %a, %b
@@ -37,16 +32,12 @@ entry:
   ret void
 }
 
-; EG-CHECK-LABEL: @i8_mul24
-; EG-CHECK-DAG: VTX_READ_8 [[A:T[0-9]\.X]], T{{[0-9]}}.X, 40
-; EG-CHECK-DAG: VTX_READ_8 [[B:T[0-9]\.X]], T{{[0-9]}}.X, 44
-; The order of A and B does not matter.
-; EG-CHECK: MUL_UINT24 {{[* ]*}}T{{[0-9]}}.[[MUL_CHAN:[XYZW]]], [[A]], [[B]]
+; FUNC-LABEL: @i8_mul24
+; EG: MUL_UINT24 {{[* ]*}}T{{[0-9]}}.[[MUL_CHAN:[XYZW]]]
 ; The result must be sign-extended
-; EG-CHECK: BFE_INT {{[* ]*}}T{{[0-9]}}.{{[XYZW]}}, PV.[[MUL_CHAN]], 0.0, literal.x
-; SI-CHECK-LABEL: @i8_mul24
-; SI-CHECK: V_MUL_U32_U24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
-; SI-CHECK: V_BFE_I32 v{{[0-9]}}, [[MUL]], 0, 8,
+; EG: BFE_INT {{[* ]*}}T{{[0-9]}}.{{[XYZW]}}, PV.[[MUL_CHAN]], 0.0, literal.x
+; SI: V_MUL_U32_U24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
+; SI: V_BFE_I32 v{{[0-9]}}, [[MUL]], 0, 8,
 
 define void @i8_mul24(i32 addrspace(1)* %out, i8 %a, i8 %b) {
 entry:
@@ -55,3 +46,21 @@ entry:
   store i32 %1, i32 addrspace(1)* %out
   ret void
 }
+
+; Multiply with 24-bit inputs and 64-bit output
+; FUNC_LABEL: @mul24_i64
+; EG; MUL_UINT24
+; EG: MULHI
+; SI: V_MUL_U32_U24
+; FIXME: SI support 24-bit mulhi
+; SI: V_MUL_HI_U32
+define void @mul24_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
+entry:
+  %0 = shl i64 %a, 40
+  %a_24 = lshr i64 %0, 40
+  %1 = shl i64 %b, 40
+  %b_24 = lshr i64 %1, 40
+  %2 = mul i64 %a_24, %b_24
+  store i64 %2, i64 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/mulhu.ll b/test/CodeGen/R600/mulhu.ll
index d5fc014..8640127 100644
--- a/test/CodeGen/R600/mulhu.ll
+++ b/test/CodeGen/R600/mulhu.ll
@@ -1,6 +1,6 @@
 ;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
 
-;CHECK: V_MOV_B32_e32 v{{[0-9]+}}, -1431655765
+;CHECK: V_MOV_B32_e32 v{{[0-9]+}}, 0xaaaaaaab
 ;CHECK: V_MUL_HI_U32 v0, {{[sv][0-9]+}}, {{v[0-9]+}}
 ;CHECK-NEXT: V_LSHRREV_B32_e32 v0, 1, v0
 
diff --git a/test/CodeGen/R600/or.ll b/test/CodeGen/R600/or.ll
index 2cc991e..9878366 100644
--- a/test/CodeGen/R600/or.ll
+++ b/test/CodeGen/R600/or.ll
@@ -89,8 +89,8 @@ define void @scalar_vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a,
 }
 
 ; SI-LABEL: @vector_or_i64_loadimm
-; SI-DAG: S_MOV_B32 [[LO_S_IMM:s[0-9]+]], -545810305
-; SI-DAG: S_MOV_B32 [[HI_S_IMM:s[0-9]+]], 5231
+; SI-DAG: S_MOV_B32 [[LO_S_IMM:s[0-9]+]], 0xdf77987f
+; SI-DAG: S_MOV_B32 [[HI_S_IMM:s[0-9]+]], 0x146f
 ; SI-DAG: BUFFER_LOAD_DWORDX2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
 ; SI-DAG: V_OR_B32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
 ; SI-DAG: V_OR_B32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
diff --git a/test/CodeGen/R600/private-memory.ll b/test/CodeGen/R600/private-memory.ll
index 4920320..d3453f2 100644
--- a/test/CodeGen/R600/private-memory.ll
+++ b/test/CodeGen/R600/private-memory.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK --check-prefix=FUNC
-; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK --check-prefix=FUNC
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s --check-prefix=R600-CHECK --check-prefix=FUNC
+; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck %s --check-prefix=SI-CHECK --check-prefix=FUNC
 
 ; This test checks that uses and defs of the AR register happen in the same
 ; instruction clause.
@@ -119,7 +119,7 @@ for.end:
 ; R600-CHECK: *
 ; R600-CHECK: MOVA_INT
 
-; SI-CHECK: V_MOV_B32_e32 v{{[0-9]}}, 65536
+; SI-CHECK: V_MOV_B32_e32 v{{[0-9]}}, 0x10000
 ; SI-CHECK: V_MOVRELS_B32_e32
 define void @short_array(i32 addrspace(1)* %out, i32 %index) {
 entry:
@@ -142,7 +142,7 @@ entry:
 ; R600-CHECK: *
 ; R600-CHECK-NEXT: MOVA_INT
 
-; SI-CHECK: V_OR_B32_e32 v{{[0-9]}}, 256
+; SI-CHECK: V_OR_B32_e32 v{{[0-9]}}, 0x100
 ; SI-CHECK: V_MOVRELS_B32_e32
 define void @char_array(i32 addrspace(1)* %out, i32 %index) {
 entry:
diff --git a/test/CodeGen/R600/pv.ll b/test/CodeGen/R600/pv.ll
index 5a930b2..f322bc7 100644
--- a/test/CodeGen/R600/pv.ll
+++ b/test/CodeGen/R600/pv.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=r600 | FileCheck %s
 
 ;CHECK: DOT4 * T{{[0-9]\.W}} (MASKED)
-;CHECK: MAX T{{[0-9].[XYZW]}}, 0.0, PV.X
+;CHECK: MAX T{{[0-9].[XYZW]}}, PV.X, 0.0
 
 define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3, <4 x float> inreg %reg4, <4 x float> inreg %reg5, <4 x float> inreg %reg6, <4 x float> inreg %reg7) #0 {
 main_body:
diff --git a/test/CodeGen/R600/register-count-comments.ll b/test/CodeGen/R600/register-count-comments.ll
index a64b280..329077c 100644
--- a/test/CodeGen/R600/register-count-comments.ll
+++ b/test/CodeGen/R600/register-count-comments.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
 
 declare i32 @llvm.SI.tid() nounwind readnone
 
diff --git a/test/CodeGen/R600/salu-to-valu.ll b/test/CodeGen/R600/salu-to-valu.ll
index e461bf9..e7719b6 100644
--- a/test/CodeGen/R600/salu-to-valu.ll
+++ b/test/CodeGen/R600/salu-to-valu.ll
@@ -46,3 +46,45 @@ declare i32 @llvm.r600.read.tidig.x() #1
 declare i32 @llvm.r600.read.tidig.y() #1
 
 attributes #1 = { nounwind readnone }
+
+; Test moving an SMRD instruction to the VALU
+
+; CHECK-LABEL: @smrd_valu
+; CHECK: BUFFER_LOAD_DWORD [[OUT:v[0-9]+]]
+; CHECK: BUFFER_STORE_DWORD [[OUT]]
+
+define void @smrd_valu(i32 addrspace(2)* addrspace(1)* %in, i32 %a, i32 addrspace(1)* %out) {
+entry:
+  %0 = icmp ne i32 %a, 0
+  br i1 %0, label %if, label %else
+
+if:
+  %1 = load i32 addrspace(2)* addrspace(1)* %in
+  br label %endif
+
+else:
+  %2 = getelementptr i32 addrspace(2)* addrspace(1)* %in
+  %3 = load i32 addrspace(2)* addrspace(1)* %2
+  br label %endif
+
+endif:
+  %4 = phi i32 addrspace(2)*  [%1, %if], [%3, %else]
+  %5 = getelementptr i32 addrspace(2)* %4, i32 3000
+  %6 = load i32 addrspace(2)* %5
+  store i32 %6, i32 addrspace(1)* %out
+  ret void
+}
+
+; Test moving ann SMRD with an immediate offset to the VALU
+
+; CHECK-LABEL: @smrd_valu2
+; CHECK: BUFFER_LOAD_DWORD
+define void @smrd_valu2(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in) {
+entry:
+  %0 = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %1 = add i32 %0, 4
+  %2 = getelementptr [8 x i32] addrspace(2)* %in, i32 %0, i32 4
+  %3 = load i32 addrspace(2)* %2
+  store i32 %3, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll b/test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll
index 2a286d1..3d2142d 100644
--- a/test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll
+++ b/test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll
@@ -1,6 +1,6 @@
 ; XFAIL: *
 ; REQUIRES: asserts
-; RUN: llc -O0 -march=r600 -mcpu=SI < %s | FileCheck %s -check-prefix=SI
+; RUN: llc -O0 -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck %s -check-prefix=SI
 
 declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate
 
diff --git a/test/CodeGen/R600/selectcc.ll b/test/CodeGen/R600/selectcc.ll
new file mode 100644
index 0000000..a8f57cf
--- /dev/null
+++ b/test/CodeGen/R600/selectcc.ll
@@ -0,0 +1,19 @@
+; RUN: llc -verify-machineinstrs -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+; FUNC-LABEL: @selectcc_i64
+; EG: XOR_INT
+; EG: XOR_INT
+; EG: OR_INT
+; EG: CNDE_INT
+; EG: CNDE_INT
+; SI: V_CMP_EQ_I64
+; SI: V_CNDMASK
+; SI: V_CNDMASK
+define void @selectcc_i64(i64 addrspace(1) * %out, i64 %lhs, i64 %rhs, i64 %true, i64 %false) {
+entry:
+  %0 = icmp eq i64 %lhs, %rhs
+  %1 = select i1 %0, i64 %true, i64 %false
+  store i64 %1, i64 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/setcc.ll b/test/CodeGen/R600/setcc.ll
index 8d34c4a..5bd95b7 100644
--- a/test/CodeGen/R600/setcc.ll
+++ b/test/CodeGen/R600/setcc.ll
@@ -1,5 +1,5 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600 --check-prefix=FUNC %s
-;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI --check-prefix=FUNC %s
+;RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s
 
 ; FUNC-LABEL: @setcc_v2i32
 ; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW]}}, KC0[3].X, KC0[3].Z
@@ -96,7 +96,9 @@ entry:
 ; R600-DAG: SETNE_INT
 ; SI: V_CMP_O_F32
 ; SI: V_CMP_NEQ_F32
-; SI: S_AND_B64
+; SI: V_CNDMASK_B32_e64
+; SI: V_CNDMASK_B32_e64
+; SI: V_AND_B32_e32
 define void @f32_one(i32 addrspace(1)* %out, float %a, float %b) {
 entry:
   %0 = fcmp one float %a, %b
@@ -128,7 +130,9 @@ entry:
 ; R600-DAG: SETNE_INT
 ; SI: V_CMP_U_F32
 ; SI: V_CMP_EQ_F32
-; SI: S_OR_B64
+; SI: V_CNDMASK_B32_e64
+; SI: V_CNDMASK_B32_e64
+; SI: V_OR_B32_e32
 define void @f32_ueq(i32 addrspace(1)* %out, float %a, float %b) {
 entry:
   %0 = fcmp ueq float %a, %b
@@ -142,7 +146,9 @@ entry:
 ; R600: SETE_DX10
 ; SI: V_CMP_U_F32
 ; SI: V_CMP_GT_F32
-; SI: S_OR_B64
+; SI: V_CNDMASK_B32_e64
+; SI: V_CNDMASK_B32_e64
+; SI: V_OR_B32_e32
 define void @f32_ugt(i32 addrspace(1)* %out, float %a, float %b) {
 entry:
   %0 = fcmp ugt float %a, %b
@@ -156,7 +162,9 @@ entry:
 ; R600: SETE_DX10
 ; SI: V_CMP_U_F32
 ; SI: V_CMP_GE_F32
-; SI: S_OR_B64
+; SI: V_CNDMASK_B32_e64
+; SI: V_CNDMASK_B32_e64
+; SI: V_OR_B32_e32
 define void @f32_uge(i32 addrspace(1)* %out, float %a, float %b) {
 entry:
   %0 = fcmp uge float %a, %b
@@ -170,7 +178,9 @@ entry:
 ; R600: SETE_DX10
 ; SI: V_CMP_U_F32
 ; SI: V_CMP_LT_F32
-; SI: S_OR_B64
+; SI: V_CNDMASK_B32_e64
+; SI: V_CNDMASK_B32_e64
+; SI: V_OR_B32_e32
 define void @f32_ult(i32 addrspace(1)* %out, float %a, float %b) {
 entry:
   %0 = fcmp ult float %a, %b
@@ -184,7 +194,9 @@ entry:
 ; R600: SETE_DX10
 ; SI: V_CMP_U_F32
 ; SI: V_CMP_LE_F32
-; SI: S_OR_B64
+; SI: V_CNDMASK_B32_e64
+; SI: V_CNDMASK_B32_e64
+; SI: V_OR_B32_e32
 define void @f32_ule(i32 addrspace(1)* %out, float %a, float %b) {
 entry:
   %0 = fcmp ule float %a, %b
diff --git a/test/CodeGen/R600/setcc64.ll b/test/CodeGen/R600/setcc64.ll
index 9202fc0..54a33b3 100644
--- a/test/CodeGen/R600/setcc64.ll
+++ b/test/CodeGen/R600/setcc64.ll
@@ -1,4 +1,4 @@
-;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI --check-prefix=FUNC %s
+;RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s
 
 ; XXX: Merge this into setcc, once R600 supports 64-bit operations
 
@@ -59,7 +59,9 @@ entry:
 ; FUNC-LABEL: @f64_one
 ; SI: V_CMP_O_F64
 ; SI: V_CMP_NEQ_F64
-; SI: S_AND_B64
+; SI: V_CNDMASK_B32_e64
+; SI: V_CNDMASK_B32_e64
+; SI: V_AND_B32_e32
 define void @f64_one(i32 addrspace(1)* %out, double %a, double %b) {
 entry:
   %0 = fcmp one double %a, %b
@@ -81,7 +83,9 @@ entry:
 ; FUNC-LABEL: @f64_ueq
 ; SI: V_CMP_U_F64
 ; SI: V_CMP_EQ_F64
-; SI: S_OR_B64
+; SI: V_CNDMASK_B32_e64
+; SI: V_CNDMASK_B32_e64
+; SI: V_OR_B32_e32
 define void @f64_ueq(i32 addrspace(1)* %out, double %a, double %b) {
 entry:
   %0 = fcmp ueq double %a, %b
@@ -93,7 +97,9 @@ entry:
 ; FUNC-LABEL: @f64_ugt
 ; SI: V_CMP_U_F64
 ; SI: V_CMP_GT_F64
-; SI: S_OR_B64
+; SI: V_CNDMASK_B32_e64
+; SI: V_CNDMASK_B32_e64
+; SI: V_OR_B32_e32
 define void @f64_ugt(i32 addrspace(1)* %out, double %a, double %b) {
 entry:
   %0 = fcmp ugt double %a, %b
@@ -105,7 +111,9 @@ entry:
 ; FUNC-LABEL: @f64_uge
 ; SI: V_CMP_U_F64
 ; SI: V_CMP_GE_F64
-; SI: S_OR_B64
+; SI: V_CNDMASK_B32_e64
+; SI: V_CNDMASK_B32_e64
+; SI: V_OR_B32_e32
 define void @f64_uge(i32 addrspace(1)* %out, double %a, double %b) {
 entry:
   %0 = fcmp uge double %a, %b
@@ -117,7 +125,9 @@ entry:
 ; FUNC-LABEL: @f64_ult
 ; SI: V_CMP_U_F64
 ; SI: V_CMP_LT_F64
-; SI: S_OR_B64
+; SI: V_CNDMASK_B32_e64
+; SI: V_CNDMASK_B32_e64
+; SI: V_OR_B32_e32
 define void @f64_ult(i32 addrspace(1)* %out, double %a, double %b) {
 entry:
   %0 = fcmp ult double %a, %b
@@ -129,7 +139,9 @@ entry:
 ; FUNC-LABEL: @f64_ule
 ; SI: V_CMP_U_F64
 ; SI: V_CMP_LE_F64
-; SI: S_OR_B64
+; SI: V_CNDMASK_B32_e64
+; SI: V_CNDMASK_B32_e64
+; SI: V_OR_B32_e32
 define void @f64_ule(i32 addrspace(1)* %out, double %a, double %b) {
 entry:
   %0 = fcmp ule double %a, %b
diff --git a/test/CodeGen/R600/seto.ll b/test/CodeGen/R600/seto.ll
index 8633a4b..e90e788 100644
--- a/test/CodeGen/R600/seto.ll
+++ b/test/CodeGen/R600/seto.ll
@@ -1,6 +1,7 @@
 ;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
 
-;CHECK: V_CMP_O_F32_e64 s[0:1], {{[sv][0-9]+, [sv][0-9]+}}, 0, 0, 0, 0
+;CHECK-LABEL: @main
+;CHECK: V_CMP_O_F32_e64 s[0:1], {{[sv][0-9]+, [sv][0-9]+}}, 0, 0
 
 define void @main(float %p) {
 main_body:
diff --git a/test/CodeGen/R600/setuo.ll b/test/CodeGen/R600/setuo.ll
index c77a37e..3b1db8b 100644
--- a/test/CodeGen/R600/setuo.ll
+++ b/test/CodeGen/R600/setuo.ll
@@ -1,6 +1,7 @@
 ;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
 
-;CHECK: V_CMP_U_F32_e64 s[0:1], {{[sv][0-9]+, [sv][0-9]+}}, 0, 0, 0, 0
+;CHECK-LABEL: @main
+;CHECK: V_CMP_U_F32_e64 s[0:1], {{[sv][0-9]+, [sv][0-9]+}}, 0, 0
 
 define void @main(float %p) {
 main_body:
diff --git a/test/CodeGen/R600/sext-in-reg.ll b/test/CodeGen/R600/sext-in-reg.ll
index eef3f07..1b02e4b 100644
--- a/test/CodeGen/R600/sext-in-reg.ll
+++ b/test/CodeGen/R600/sext-in-reg.ll
@@ -1,15 +1,18 @@
-; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc < %s -march=r600 -mcpu=cypress | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 
 declare i32 @llvm.AMDGPU.imax(i32, i32) nounwind readnone
 
 
 ; FUNC-LABEL: @sext_in_reg_i1_i32
 ; SI: S_LOAD_DWORD [[ARG:s[0-9]+]],
-; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], [[ARG]], 0, 1
+; SI: S_BFE_I32 [[SEXTRACT:s[0-9]+]], [[ARG]], 0x10000
+; SI: V_MOV_B32_e32 [[EXTRACT:v[0-9]+]], [[SEXTRACT]]
 ; SI: BUFFER_STORE_DWORD [[EXTRACT]],
 
-; EG: BFE_INT
+; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
+; EG: BFE_INT [[RES]], {{.*}}, 0.0, 1
+; EG-NEXT: LSHR * [[ADDR]]
 define void @sext_in_reg_i1_i32(i32 addrspace(1)* %out, i32 %in) {
   %shl = shl i32 %in, 31
   %sext = ashr i32 %shl, 31
@@ -19,10 +22,14 @@ define void @sext_in_reg_i1_i32(i32 addrspace(1)* %out, i32 %in) {
 
 ; FUNC-LABEL: @sext_in_reg_i8_to_i32
 ; SI: S_ADD_I32 [[VAL:s[0-9]+]],
-; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], [[VAL]], 0, 8
-; SI: BUFFER_STORE_DWORD [[EXTRACT]],
-
-; EG: BFE_INT
+; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]]
+; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
+; SI: BUFFER_STORE_DWORD [[VEXTRACT]],
+
+; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
+; EG: ADD_INT
+; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal
+; EG-NEXT: LSHR * [[ADDR]]
 define void @sext_in_reg_i8_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
   %c = add i32 %a, %b ; add to prevent folding into extload
   %shl = shl i32 %c, 24
@@ -33,10 +40,14 @@ define void @sext_in_reg_i8_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounw
 
 ; FUNC-LABEL: @sext_in_reg_i16_to_i32
 ; SI: S_ADD_I32 [[VAL:s[0-9]+]],
-; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], [[VAL]], 0, 16
-; SI: BUFFER_STORE_DWORD [[EXTRACT]],
-
-; EG: BFE_INT
+; SI: S_SEXT_I32_I16 [[EXTRACT:s[0-9]+]], [[VAL]]
+; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
+; SI: BUFFER_STORE_DWORD [[VEXTRACT]],
+
+; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
+; EG: ADD_INT
+; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal
+; EG-NEXT: LSHR * [[ADDR]]
 define void @sext_in_reg_i16_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
   %c = add i32 %a, %b ; add to prevent folding into extload
   %shl = shl i32 %c, 16
@@ -47,10 +58,14 @@ define void @sext_in_reg_i16_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) noun
 
 ; FUNC-LABEL: @sext_in_reg_i8_to_v1i32
 ; SI: S_ADD_I32 [[VAL:s[0-9]+]],
-; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], [[VAL]], 0, 8
-; SI: BUFFER_STORE_DWORD [[EXTRACT]],
-
-; EG: BFE_INT
+; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]]
+; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
+; SI: BUFFER_STORE_DWORD [[VEXTRACT]],
+
+; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
+; EG: ADD_INT
+; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal
+; EG-NEXT: LSHR * [[ADDR]]
 define void @sext_in_reg_i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) nounwind {
   %c = add <1 x i32> %a, %b ; add to prevent folding into extload
   %shl = shl <1 x i32> %c, <i32 24>
@@ -59,13 +74,35 @@ define void @sext_in_reg_i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a,
   ret void
 }
 
+; FUNC-LABEL: @sext_in_reg_i1_to_i64
+; SI: S_ADD_I32 [[VAL:s[0-9]+]],
+; SI: S_BFE_I32 s{{[0-9]+}}, s{{[0-9]+}}, 0x10000
+; SI: S_MOV_B32 {{s[0-9]+}}, -1
+; SI: BUFFER_STORE_DWORDX2
+define void @sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
+  %c = add i64 %a, %b
+  %shl = shl i64 %c, 63
+  %ashr = ashr i64 %shl, 63
+  store i64 %ashr, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
 ; FUNC-LABEL: @sext_in_reg_i8_to_i64
-; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8
-; SI: V_ASHRREV_I32_e32 {{v[0-9]+}}, 31,
-; SI: BUFFER_STORE_DWORD
+; SI: S_ADD_I32 [[VAL:s[0-9]+]],
+; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]]
+; SI: S_MOV_B32 {{s[0-9]+}}, -1
+; SI: BUFFER_STORE_DWORDX2
 
-; EG: BFE_INT
-; EG: ASHR
+; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]]
+; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]]
+; EG: ADD_INT
+; EG-NEXT: BFE_INT {{\*?}} [[RES_LO]], {{.*}}, 0.0, literal
+; EG: ASHR [[RES_HI]]
+; EG-NOT: BFE_INT
+; EG: LSHR
+; EG: LSHR
+;; TODO Check address computation, using | with variables in {{}} does not work,
+;; also the _LO/_HI order might be different
 define void @sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
   %c = add i64 %a, %b
   %shl = shl i64 %c, 56
@@ -75,12 +112,21 @@ define void @sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounw
 }
 
 ; FUNC-LABEL: @sext_in_reg_i16_to_i64
-; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 16
-; SI: V_ASHRREV_I32_e32 {{v[0-9]+}}, 31,
-; SI: BUFFER_STORE_DWORD
+; SI: S_ADD_I32 [[VAL:s[0-9]+]],
+; SI: S_SEXT_I32_I16 [[EXTRACT:s[0-9]+]], [[VAL]]
+; SI: S_MOV_B32 {{s[0-9]+}}, -1
+; SI: BUFFER_STORE_DWORDX2
 
-; EG: BFE_INT
-; EG: ASHR
+; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]]
+; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]]
+; EG: ADD_INT
+; EG-NEXT: BFE_INT {{\*?}} [[RES_LO]], {{.*}}, 0.0, literal
+; EG: ASHR [[RES_HI]]
+; EG-NOT: BFE_INT
+; EG: LSHR
+; EG: LSHR
+;; TODO Check address computation, using | with variables in {{}} does not work,
+;; also the _LO/_HI order might be different
 define void @sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
   %c = add i64 %a, %b
   %shl = shl i64 %c, 48
@@ -95,6 +141,17 @@ define void @sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) noun
 ; SI: S_ADD_I32 [[ADD:s[0-9]+]],
 ; SI: S_ASHR_I32 s{{[0-9]+}}, [[ADD]], 31
 ; SI: BUFFER_STORE_DWORDX2
+
+; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]]
+; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]]
+; EG-NOT: BFE_INT
+; EG: ADD_INT {{\*?}} [[RES_LO]]
+; EG: ASHR [[RES_HI]]
+; EG: ADD_INT
+; EG: LSHR
+; EG: LSHR
+;; TODO Check address computation, using | with variables in {{}} does not work,
+;; also the _LO/_HI order might be different
 define void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
   %c = add i64 %a, %b
   %shl = shl i64 %c, 32
@@ -105,8 +162,8 @@ define void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) noun
 
 ; This is broken on Evergreen for some reason related to the <1 x i64> kernel arguments.
 ; XFUNC-LABEL: @sext_in_reg_i8_to_v1i64
-; XSI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8
-; XSI: V_ASHRREV_I32_e32 {{v[0-9]+}}, 31,
+; XSI: S_BFE_I32 [[EXTRACT:s[0-9]+]], {{s[0-9]+}}, 524288
+; XSI: S_ASHR_I32 {{v[0-9]+}}, [[EXTRACT]], 31
 ; XSI: BUFFER_STORE_DWORD
 ; XEG: BFE_INT
 ; XEG: ASHR
@@ -122,7 +179,13 @@ define void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) noun
 ; SI-NOT: BFE
 ; SI: S_LSHL_B32 [[REG:s[0-9]+]], {{s[0-9]+}}, 6
 ; SI: S_ASHR_I32 {{s[0-9]+}}, [[REG]], 7
+
+; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
 ; EG-NOT: BFE
+; EG: ADD_INT
+; EG: LSHL
+; EG: ASHR [[RES]]
+; EG: LSHR {{\*?}} [[ADDR]]
 define void @sext_in_reg_i1_in_i32_other_amount(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
   %c = add i32 %a, %b
   %x = shl i32 %c, 6
@@ -136,7 +199,15 @@ define void @sext_in_reg_i1_in_i32_other_amount(i32 addrspace(1)* %out, i32 %a,
 ; SI: S_ASHR_I32 {{s[0-9]+}}, [[REG0]], 7
 ; SI: S_LSHL_B32 [[REG1:s[0-9]+]], {{s[0-9]}}, 6
 ; SI: S_ASHR_I32 {{s[0-9]+}}, [[REG1]], 7
+
+; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
 ; EG-NOT: BFE
+; EG: ADD_INT
+; EG: LSHL
+; EG: ASHR [[RES]]
+; EG: LSHL
+; EG: ASHR [[RES]]
+; EG: LSHR {{\*?}} [[ADDR]]
 define void @sext_in_reg_v2i1_in_v2i32_other_amount(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
   %c = add <2 x i32> %a, %b
   %x = shl <2 x i32> %c, <i32 6, i32 6>
@@ -147,11 +218,14 @@ define void @sext_in_reg_v2i1_in_v2i32_other_amount(<2 x i32> addrspace(1)* %out
 
 
 ; FUNC-LABEL: @sext_in_reg_v2i1_to_v2i32
-; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 1
-; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 1
+; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
+; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
 ; SI: BUFFER_STORE_DWORDX2
-; EG: BFE
-; EG: BFE
+
+; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
+; EG: BFE_INT [[RES]]
+; EG: BFE_INT [[RES]]
+; EG: LSHR {{\*?}} [[ADDR]]
 define void @sext_in_reg_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
   %c = add <2 x i32> %a, %b ; add to prevent folding into extload
   %shl = shl <2 x i32> %c, <i32 31, i32 31>
@@ -161,16 +235,18 @@ define void @sext_in_reg_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %
 }
 
 ; FUNC-LABEL: @sext_in_reg_v4i1_to_v4i32
-; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 1
-; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 1
-; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 1
-; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 1
+; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
+; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
+; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
+; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
 ; SI: BUFFER_STORE_DWORDX4
 
-; EG: BFE
-; EG: BFE
-; EG: BFE
-; EG: BFE
+; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW][XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
+; EG: BFE_INT [[RES]]
+; EG: BFE_INT [[RES]]
+; EG: BFE_INT [[RES]]
+; EG: BFE_INT [[RES]]
+; EG: LSHR {{\*?}} [[ADDR]]
 define void @sext_in_reg_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) nounwind {
   %c = add <4 x i32> %a, %b ; add to prevent folding into extload
   %shl = shl <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31>
@@ -180,12 +256,14 @@ define void @sext_in_reg_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %
 }
 
 ; FUNC-LABEL: @sext_in_reg_v2i8_to_v2i32
-; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8
-; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8
+; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
+; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
 ; SI: BUFFER_STORE_DWORDX2
 
-; EG: BFE
-; EG: BFE
+; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
+; EG: BFE_INT [[RES]]
+; EG: BFE_INT [[RES]]
+; EG: LSHR {{\*?}} [[ADDR]]
 define void @sext_in_reg_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
   %c = add <2 x i32> %a, %b ; add to prevent folding into extload
   %shl = shl <2 x i32> %c, <i32 24, i32 24>
@@ -195,16 +273,18 @@ define void @sext_in_reg_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %
 }
 
 ; FUNC-LABEL: @sext_in_reg_v4i8_to_v4i32
-; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8
-; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8
-; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8
-; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8
+; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
+; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
+; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
+; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
 ; SI: BUFFER_STORE_DWORDX4
 
-; EG: BFE
-; EG: BFE
-; EG: BFE
-; EG: BFE
+; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW][XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
+; EG: BFE_INT [[RES]]
+; EG: BFE_INT [[RES]]
+; EG: BFE_INT [[RES]]
+; EG: BFE_INT [[RES]]
+; EG: LSHR {{\*?}} [[ADDR]]
 define void @sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) nounwind {
   %c = add <4 x i32> %a, %b ; add to prevent folding into extload
   %shl = shl <4 x i32> %c, <i32 24, i32 24, i32 24, i32 24>
@@ -214,16 +294,18 @@ define void @sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %
 }
 
 ; FUNC-LABEL: @sext_in_reg_v2i16_to_v2i32
-; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8
-; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8
+; SI: S_SEXT_I32_I16 {{s[0-9]+}}, {{s[0-9]+}}
+; SI: S_SEXT_I32_I16 {{s[0-9]+}}, {{s[0-9]+}}
 ; SI: BUFFER_STORE_DWORDX2
 
-; EG: BFE
-; EG: BFE
+; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
+; EG: BFE_INT [[RES]]
+; EG: BFE_INT [[RES]]
+; EG: LSHR {{\*?}} [[ADDR]]
 define void @sext_in_reg_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
   %c = add <2 x i32> %a, %b ; add to prevent folding into extload
-  %shl = shl <2 x i32> %c, <i32 24, i32 24>
-  %ashr = ashr <2 x i32> %shl, <i32 24, i32 24>
+  %shl = shl <2 x i32> %c, <i32 16, i32 16>
+  %ashr = ashr <2 x i32> %shl, <i32 16, i32 16>
   store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8
   ret void
 }
@@ -252,8 +334,36 @@ define void @testcase_3(i8 addrspace(1)* %out, i8 %a) nounwind {
   ret void
 }
 
+; FUNC-LABEL: @vgpr_sext_in_reg_v4i8_to_v4i32
+; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
+; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
+; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
+; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
+define void @vgpr_sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
+  %loada = load <4 x i32> addrspace(1)* %a, align 16
+  %loadb = load <4 x i32> addrspace(1)* %b, align 16
+  %c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload
+  %shl = shl <4 x i32> %c, <i32 24, i32 24, i32 24, i32 24>
+  %ashr = ashr <4 x i32> %shl, <i32 24, i32 24, i32 24, i32 24>
+  store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @vgpr_sext_in_reg_v4i16_to_v4i32
+; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16
+; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16
+define void @vgpr_sext_in_reg_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
+  %loada = load <4 x i32> addrspace(1)* %a, align 16
+  %loadb = load <4 x i32> addrspace(1)* %b, align 16
+  %c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload
+  %shl = shl <4 x i32> %c, <i32 16, i32 16, i32 16, i32 16>
+  %ashr = ashr <4 x i32> %shl, <i32 16, i32 16, i32 16, i32 16>
+  store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8
+  ret void
+}
+
 ; FIXME: The BFE should really be eliminated. I think it should happen
-; when computeMaskedBitsForTargetNode is implemented for imax.
+; when computeKnownBitsForTargetNode is implemented for imax.
 
 ; FUNC-LABEL: @sext_in_reg_to_illegal_type
 ; SI: BUFFER_LOAD_SBYTE
@@ -269,3 +379,146 @@ define void @sext_in_reg_to_illegal_type(i16 addrspace(1)* nocapture %out, i8 ad
   store i16 %tmp6, i16 addrspace(1)* %out, align 2
   ret void
 }
+
+declare i32 @llvm.AMDGPU.bfe.i32(i32, i32, i32) nounwind readnone
+
+; FUNC-LABEL: @bfe_0_width
+; SI-NOT: BFE
+; SI: S_ENDPGM
+define void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
+  %load = load i32 addrspace(1)* %ptr, align 4
+  %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 8, i32 0) nounwind readnone
+  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_8_bfe_8
+; SI: V_BFE_I32
+; SI-NOT: BFE
+; SI: S_ENDPGM
+define void @bfe_8_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
+  %load = load i32 addrspace(1)* %ptr, align 4
+  %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone
+  %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 8) nounwind readnone
+  store i32 %bfe1, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @bfe_8_bfe_16
+; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
+; SI: S_ENDPGM
+define void @bfe_8_bfe_16(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
+  %load = load i32 addrspace(1)* %ptr, align 4
+  %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone
+  %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 16) nounwind readnone
+  store i32 %bfe1, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; This really should be folded into 1
+; FUNC-LABEL: @bfe_16_bfe_8
+; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
+; SI-NOT: BFE
+; SI: S_ENDPGM
+define void @bfe_16_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
+  %load = load i32 addrspace(1)* %ptr, align 4
+  %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 16) nounwind readnone
+  %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 8) nounwind readnone
+  store i32 %bfe1, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; Make sure there isn't a redundant BFE
+; FUNC-LABEL: @sext_in_reg_i8_to_i32_bfe
+; SI: S_SEXT_I32_I8 s{{[0-9]+}}, s{{[0-9]+}}
+; SI-NOT: BFE
+; SI: S_ENDPGM
+define void @sext_in_reg_i8_to_i32_bfe(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+  %c = add i32 %a, %b ; add to prevent folding into extload
+  %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %c, i32 0, i32 8) nounwind readnone
+  %shl = shl i32 %bfe, 24
+  %ashr = ashr i32 %shl, 24
+  store i32 %ashr, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @sext_in_reg_i8_to_i32_bfe_wrong
+define void @sext_in_reg_i8_to_i32_bfe_wrong(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+  %c = add i32 %a, %b ; add to prevent folding into extload
+  %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %c, i32 8, i32 0) nounwind readnone
+  %shl = shl i32 %bfe, 24
+  %ashr = ashr i32 %shl, 24
+  store i32 %ashr, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @sextload_i8_to_i32_bfe
+; SI: BUFFER_LOAD_SBYTE
+; SI-NOT: BFE
+; SI: S_ENDPGM
+define void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind {
+  %load = load i8 addrspace(1)* %ptr, align 1
+  %sext = sext i8 %load to i32
+  %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %sext, i32 0, i32 8) nounwind readnone
+  %shl = shl i32 %bfe, 24
+  %ashr = ashr i32 %shl, 24
+  store i32 %ashr, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @sextload_i8_to_i32_bfe_0:
+; SI-NOT: BFE
+; SI: S_ENDPGM
+define void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind {
+  %load = load i8 addrspace(1)* %ptr, align 1
+  %sext = sext i8 %load to i32
+  %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %sext, i32 8, i32 0) nounwind readnone
+  %shl = shl i32 %bfe, 24
+  %ashr = ashr i32 %shl, 24
+  store i32 %ashr, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @sext_in_reg_i1_bfe_offset_0:
+; SI-NOT: SHR
+; SI-NOT: SHL
+; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1
+; SI: S_ENDPGM
+define void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %x = load i32 addrspace(1)* %in, align 4
+  %shl = shl i32 %x, 31
+  %shr = ashr i32 %shl, 31
+  %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 0, i32 1)
+  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @sext_in_reg_i1_bfe_offset_1
+; SI: BUFFER_LOAD_DWORD
+; SI-NOT: SHL
+; SI-NOT: SHR
+; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 1
+; SI: S_ENDPGM
+define void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %x = load i32 addrspace(1)* %in, align 4
+  %shl = shl i32 %x, 30
+  %shr = ashr i32 %shl, 30
+  %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 1, i32 1)
+  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @sext_in_reg_i2_bfe_offset_1:
+; SI: BUFFER_LOAD_DWORD
+; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 30, v{{[0-9]+}}
+; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 30, v{{[0-9]+}}
+; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 2
+; SI: S_ENDPGM
+define void @sext_in_reg_i2_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %x = load i32 addrspace(1)* %in, align 4
+  %shl = shl i32 %x, 30
+  %shr = ashr i32 %shl, 30
+  %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 1, i32 2)
+  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/sgpr-control-flow.ll b/test/CodeGen/R600/sgpr-control-flow.ll
new file mode 100644
index 0000000..06ad24d
--- /dev/null
+++ b/test/CodeGen/R600/sgpr-control-flow.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
+;
+;
+; Most SALU instructions ignore control flow, so we need to make sure
+; they don't overwrite values from other blocks.
+
+; SI-NOT: S_ADD
+
+define void @sgpr_if_else(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
+entry:
+  %0 = icmp eq i32 %a, 0
+  br i1 %0, label %if, label %else
+
+if:
+  %1 = add i32 %b, %c
+  br label %endif
+
+else:
+  %2 = add i32 %d, %e
+  br label %endif
+
+endif:
+  %3 = phi i32 [%1, %if], [%2, %else]
+  %4 = add i32 %3, %a
+  store i32 %4, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll b/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll
index d74161b..9d8a623 100644
--- a/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll
+++ b/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
 
 ; Copy VGPR -> SGPR used twice as an instruction operand, which is then
 ; used in an REG_SEQUENCE that also needs to be handled.
diff --git a/test/CodeGen/R600/sgpr-copy.ll b/test/CodeGen/R600/sgpr-copy.ll
index 5472c1b..c581d86 100644
--- a/test/CodeGen/R600/sgpr-copy.ll
+++ b/test/CodeGen/R600/sgpr-copy.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=r600 -mcpu=SI  | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s
 
 ; This test checks that no VGPR to SGPR copies are created by the register
 ; allocator.
diff --git a/test/CodeGen/R600/si-annotate-cf-assertion.ll b/test/CodeGen/R600/si-annotate-cf-assertion.ll
index cd3ba2b..daa4667 100644
--- a/test/CodeGen/R600/si-annotate-cf-assertion.ll
+++ b/test/CodeGen/R600/si-annotate-cf-assertion.ll
@@ -1,6 +1,6 @@
 ; REQUIRES: asserts
 ; XFAIL: *
-; RUN: llc -march=r600 -mcpu=SI -asm-verbose=false < %s | FileCheck %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs-asm-verbose=false < %s | FileCheck %s
 
 
 define void @test(i32 addrspace(1)* %g, i8 addrspace(3)* %l, i32 %x) nounwind {
diff --git a/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll b/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll
new file mode 100644
index 0000000..d9f60ea
--- /dev/null
+++ b/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll
@@ -0,0 +1,36 @@
+; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
+
+; 64-bit select was originally lowered with a build_pair, and this
+; could be simplified to 1 cndmask instead of 2, but that broken when
+; it started being implemented with a v2i32 build_vector and
+; bitcasting.
+define void @trunc_select_i64(i32 addrspace(1)* %out, i64 %a, i64 %b, i32 %c) {
+  %cmp = icmp eq i32 %c, 0
+  %select = select i1 %cmp, i64 %a, i64 %b
+  %trunc = trunc i64 %select to i32
+  store i32 %trunc, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: @trunc_load_alloca_i64:
+; SI: V_MOVRELS_B32
+; SI-NOT: V_MOVRELS_B32
+; SI: S_ENDPGM
+define void @trunc_load_alloca_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) {
+  %idx = add i32 %a, %b
+  %alloca = alloca i64, i32 4
+  %gep0 = getelementptr i64* %alloca, i64 0
+  %gep1 = getelementptr i64* %alloca, i64 1
+  %gep2 = getelementptr i64* %alloca, i64 2
+  %gep3 = getelementptr i64* %alloca, i64 3
+  store i64 24, i64* %gep0, align 8
+  store i64 9334, i64* %gep1, align 8
+  store i64 3935, i64* %gep2, align 8
+  store i64 9342, i64* %gep3, align 8
+  %gep = getelementptr i64* %alloca, i32 %idx
+  %load = load i64* %gep, align 8
+  %mask = and i64 %load, 4294967296
+  %add = add i64 %mask, -1
+  store i64 %add, i64 addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/smrd.ll b/test/CodeGen/R600/smrd.ll
index 43231df..dec6185 100644
--- a/test/CodeGen/R600/smrd.ll
+++ b/test/CodeGen/R600/smrd.ll
@@ -2,7 +2,7 @@
 
 ; SMRD load with an immediate offset.
 ; CHECK-LABEL: @smrd0
-; CHECK: S_LOAD_DWORD s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 1 ; encoding: [0x01
+; CHECK: S_LOAD_DWORD s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01
 define void @smrd0(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
 entry:
   %0 = getelementptr i32 addrspace(2)* %ptr, i64 1
@@ -13,7 +13,7 @@ entry:
 
 ; SMRD load with the largest possible immediate offset.
 ; CHECK-LABEL: @smrd1
-; CHECK: S_LOAD_DWORD s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 255 ; encoding: [0xff
+; CHECK: S_LOAD_DWORD s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
 define void @smrd1(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
 entry:
   %0 = getelementptr i32 addrspace(2)* %ptr, i64 255
@@ -24,7 +24,7 @@ entry:
 
 ; SMRD load with an offset greater than the largest possible immediate.
 ; CHECK-LABEL: @smrd2
-; CHECK: S_MOV_B32 s[[OFFSET:[0-9]]], 1024
+; CHECK: S_MOV_B32 s[[OFFSET:[0-9]]], 0x400
 ; CHECK: S_LOAD_DWORD s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
 define void @smrd2(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
 entry:
@@ -34,9 +34,27 @@ entry:
   ret void
 }
 
+; SMRD load with a 64-bit offset
+; CHECK-LABEL: @smrd3
+; CHECK-DAG: S_MOV_B32 s[[SHI:[0-9]+]], 4
+; CHECK-DAG: S_MOV_B32 s[[SLO:[0-9]+]], 0
+; FIXME: We don't need to copy these values to VGPRs
+; CHECK-DAG: V_MOV_B32_e32 v[[VHI:[0-9]+]], s[[SHI]]
+; CHECK-DAG: V_MOV_B32_e32 v[[VLO:[0-9]+]], s[[SLO]]
+; FIXME: We should be able to use S_LOAD_DWORD here
+; BUFFER_LOAD_DWORD v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] + v[[[VLO]]:[[VHI]]] + 0x0
+
+define void @smrd3(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
+entry:
+  %0 = getelementptr i32 addrspace(2)* %ptr, i64 4294967296 ; 2 ^ 32
+  %1 = load i32 addrspace(2)* %0
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
 ; SMRD load using the load.const intrinsic with an immediate offset
 ; CHECK-LABEL: @smrd_load_const0
-; CHECK: S_BUFFER_LOAD_DWORD s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 4 ; encoding: [0x04
+; CHECK: S_BUFFER_LOAD_DWORD s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4 ; encoding: [0x04
 define void @smrd_load_const0(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
 main_body:
   %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
@@ -49,7 +67,7 @@ main_body:
 ; SMRD load using the load.const intrinsic with an offset greater largest possible
 ; immediate offset.
 ; CHECK-LABEL: @smrd_load_const1
-; CHECK: S_BUFFER_LOAD_DWORD s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 255 ; encoding: [0xff
+; CHECK: S_BUFFER_LOAD_DWORD s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
 define void @smrd_load_const1(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
 main_body:
   %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
diff --git a/test/CodeGen/R600/store-v3i64.ll b/test/CodeGen/R600/store-v3i64.ll
index 58229f6..58d28b5 100644
--- a/test/CodeGen/R600/store-v3i64.ll
+++ b/test/CodeGen/R600/store-v3i64.ll
@@ -1,5 +1,5 @@
 ; XFAIL: *
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI
 
 ; SI-LABEL: @global_store_v3i64:
 ; SI: BUFFER_STORE_DWORDX4
diff --git a/test/CodeGen/R600/store-vector-ptrs.ll b/test/CodeGen/R600/store-vector-ptrs.ll
index 3af7d91..41c5edc 100644
--- a/test/CodeGen/R600/store-vector-ptrs.ll
+++ b/test/CodeGen/R600/store-vector-ptrs.ll
@@ -1,6 +1,6 @@
 ; REQUIRES: asserts
 ; XFAIL: *
-; RUN: llc -march=r600 -mcpu=SI < %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s
 
 define void @store_vector_ptrs(<4 x i32*>* %out, <4 x [1024 x i32]*> %array) nounwind {
   %p = getelementptr <4 x [1024 x i32]*> %array, <4 x i16> zeroinitializer, <4 x i16> <i16 16, i16 16, i16 16, i16 16>
diff --git a/test/CodeGen/R600/store.ll b/test/CodeGen/R600/store.ll
index a3c5331..c0c8ccc 100644
--- a/test/CodeGen/R600/store.ll
+++ b/test/CodeGen/R600/store.ll
@@ -177,6 +177,26 @@ entry:
   ret void
 }
 
+; FUNC-LABEL: @store_i64_i8
+; EG-CHECK: MEM_RAT MSKOR
+; SI-CHECK: BUFFER_STORE_BYTE
+define void @store_i64_i8(i8 addrspace(1)* %out, i64 %in) {
+entry:
+  %0 = trunc i64 %in to i8
+  store i8 %0, i8 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @store_i64_i16
+; EG-CHECK: MEM_RAT MSKOR
+; SI-CHECK: BUFFER_STORE_SHORT
+define void @store_i64_i16(i16 addrspace(1)* %out, i64 %in) {
+entry:
+  %0 = trunc i64 %in to i16
+  store i16 %0, i16 addrspace(1)* %out
+  ret void
+}
+
 ;===------------------------------------------------------------------------===;
 ; Local Address Space
 ;===------------------------------------------------------------------------===;
@@ -272,6 +292,26 @@ entry:
   ret void
 }
 
+; FUNC-LABEL: @store_local_i64_i8
+; EG-CHECK: LDS_BYTE_WRITE
+; SI-CHECK: DS_WRITE_B8
+define void @store_local_i64_i8(i8 addrspace(3)* %out, i64 %in) {
+entry:
+  %0 = trunc i64 %in to i8
+  store i8 %0, i8 addrspace(3)* %out
+  ret void
+}
+
+; FUNC-LABEL: @store_local_i64_i16
+; EG-CHECK: LDS_SHORT_WRITE
+; SI-CHECK: DS_WRITE_B16
+define void @store_local_i64_i16(i16 addrspace(3)* %out, i64 %in) {
+entry:
+  %0 = trunc i64 %in to i16
+  store i16 %0, i16 addrspace(3)* %out
+  ret void
+}
+
 ; The stores in this function are combined by the optimizer to create a
 ; 64-bit store with 32-bit alignment.  This is legal for SI and the legalizer
 ; should not try to split the 64-bit store back into 2 32-bit stores.
@@ -297,3 +337,29 @@ entry:
 }
 
 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+; When i128 was a legal type this program generated cannot select errors:
+
+; FUNC-LABEL: @i128-const-store
+; FIXME: We should be able to to this with one store instruction
+; EG-CHECK: STORE_RAW
+; EG-CHECK: STORE_RAW
+; EG-CHECK: STORE_RAW
+; EG-CHECK: STORE_RAW
+; CM-CHECK: STORE_DWORD
+; CM-CHECK: STORE_DWORD
+; CM-CHECK: STORE_DWORD
+; CM-CHECK: STORE_DWORD
+; SI: BUFFER_STORE_DWORDX2
+; SI: BUFFER_STORE_DWORDX2
+define void @i128-const-store(i32 addrspace(1)* %out) {
+entry:
+  store i32 1, i32 addrspace(1)* %out, align 4
+  %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %out, i64 1
+  store i32 1, i32 addrspace(1)* %arrayidx2, align 4
+  %arrayidx4 = getelementptr inbounds i32 addrspace(1)* %out, i64 2
+  store i32 2, i32 addrspace(1)* %arrayidx4, align 4
+  %arrayidx6 = getelementptr inbounds i32 addrspace(1)* %out, i64 3
+  store i32 2, i32 addrspace(1)* %arrayidx6, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/sub.ll b/test/CodeGen/R600/sub.ll
index 5fdd2b8..e321ed6 100644
--- a/test/CodeGen/R600/sub.ll
+++ b/test/CodeGen/R600/sub.ll
@@ -1,13 +1,12 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG --check-prefix=FUNC %s
+;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
 
-;EG-CHECK: @test2
-;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;FUNC-LABEL: @test2
+;EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-;SI-CHECK: @test2
-;SI-CHECK: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 
 define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
@@ -18,17 +17,16 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   ret void
 }
 
-;EG-CHECK: @test4
-;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;FUNC-LABEL: @test4
+;EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-;SI-CHECK: @test4
-;SI-CHECK: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 
 define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
@@ -38,3 +36,24 @@ define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
 }
+
+;FUNC_LABEL: @test5
+
+;EG-DAG: SETGE_UINT
+;EG-DAG: CNDE_INT
+;EG-DAG: SUB_INT
+;EG-DAG: SUB_INT
+;EG-DAG: SUB_INT
+
+;SI: S_XOR_B64
+;SI-DAG: S_ADD_I32
+;SI-DAG: S_ADDC_U32
+;SI-DAG: S_ADD_I32
+;SI-DAG: S_ADDC_U32
+
+define void @test5(i64 addrspace(1)* %out, i64 %a, i64 %b) {
+entry:
+  %0 = sub i64 %a, %b
+  store i64 %0, i64 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/trunc-store-i1.ll b/test/CodeGen/R600/trunc-store-i1.ll
index a888943..a3975c8 100644
--- a/test/CodeGen/R600/trunc-store-i1.ll
+++ b/test/CodeGen/R600/trunc-store-i1.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
 
 
 ; SI-LABEL: @global_truncstore_i32_to_i1
diff --git a/test/CodeGen/R600/trunc.ll b/test/CodeGen/R600/trunc.ll
index 8a759dc..31cdfcd 100644
--- a/test/CodeGen/R600/trunc.ll
+++ b/test/CodeGen/R600/trunc.ll
@@ -3,7 +3,7 @@
 
 define void @trunc_i64_to_i32_store(i32 addrspace(1)* %out, i64 %in) {
 ; SI-LABEL: @trunc_i64_to_i32_store
-; SI: S_LOAD_DWORD s0, s[0:1], 11
+; SI: S_LOAD_DWORD s0, s[0:1], 0xb
 ; SI: V_MOV_B32_e32 v0, s0
 ; SI: BUFFER_STORE_DWORD v0
 
@@ -31,8 +31,9 @@ define void @trunc_load_shl_i64(i32 addrspace(1)* %out, i64 %a) {
 
 ; SI-LABEL: @trunc_shl_i64:
 ; SI: S_LOAD_DWORDX2 s{{\[}}[[LO_SREG:[0-9]+]]:{{[0-9]+\]}},
-; SI: V_ADD_I32_e32 v[[LO_ADD:[0-9]+]], s[[LO_SREG]],
-; SI: V_LSHL_B64 v{{\[}}[[LO_VREG:[0-9]+]]:{{[0-9]+\]}}, v{{\[}}[[LO_ADD]]:{{[0-9]+\]}}, 2
+; SI: S_ADD_I32 s[[LO_ADD:[0-9]+]], s[[LO_SREG]],
+; SI: S_LSHL_B64 s{{\[}}[[LO_SREG2:[0-9]+]]:{{[0-9]+\]}}, s{{\[}}[[LO_ADD]]:{{[0-9]+\]}}, 2
+; SI: V_MOV_B32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG2]]
 ; SI: BUFFER_STORE_DWORD v[[LO_VREG]],
 define void @trunc_shl_i64(i64 addrspace(1)* %out2, i32 addrspace(1)* %out, i64 %a) {
   %aa = add i64 %a, 234 ; Prevent shrinking store.
diff --git a/test/CodeGen/R600/uaddo.ll b/test/CodeGen/R600/uaddo.ll
new file mode 100644
index 0000000..3b69687
--- /dev/null
+++ b/test/CodeGen/R600/uaddo.ll
@@ -0,0 +1,17 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
+
+declare { i64, i1 } @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
+
+; SI-LABEL: @uaddo_i64_zext
+; SI: ADD
+; SI: ADDC
+; SI: ADDC
+define void @uaddo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
+  %uadd = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) nounwind
+  %val = extractvalue { i64, i1 } %uadd, 0
+  %carry = extractvalue { i64, i1 } %uadd, 1
+  %ext = zext i1 %carry to i64
+  %add2 = add i64 %val, %ext
+  store i64 %add2, i64 addrspace(1)* %out, align 8
+  ret void
+}
diff --git a/test/CodeGen/R600/udivrem64.ll b/test/CodeGen/R600/udivrem64.ll
new file mode 100644
index 0000000..a71315a
--- /dev/null
+++ b/test/CodeGen/R600/udivrem64.ll
@@ -0,0 +1,82 @@
+;XUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG --check-prefix=FUNC %s
+
+;FUNC-LABEL: @test_udiv
+;EG: RECIP_UINT
+;EG: LSHL {{.*}}, 1,
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;SI: S_ENDPGM
+define void @test_udiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
+  %result = udiv i64 %x, %y
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+;FUNC-LABEL: @test_urem
+;EG: RECIP_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: AND_INT {{.*}}, 1,
+;SI: S_ENDPGM
+define void @test_urem(i64 addrspace(1)* %out, i64 %x, i64 %y) {
+  %result = urem i64 %x, %y
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/uint_to_fp.f64.ll b/test/CodeGen/R600/uint_to_fp.f64.ll
new file mode 100644
index 0000000..75150c2
--- /dev/null
+++ b/test/CodeGen/R600/uint_to_fp.f64.ll
@@ -0,0 +1,9 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+; SI-LABEL: @uint_to_fp_f64_i32
+; SI: V_CVT_F64_U32_e32
+define void @uint_to_fp_f64_i32(double addrspace(1)* %out, i32 %in) {
+  %cast = uitofp i32 %in to double
+  store double %cast, double addrspace(1)* %out, align 8
+  ret void
+}
diff --git a/test/CodeGen/R600/unaligned-load-store.ll b/test/CodeGen/R600/unaligned-load-store.ll
index 2824ff8..4df69d1 100644
--- a/test/CodeGen/R600/unaligned-load-store.ll
+++ b/test/CodeGen/R600/unaligned-load-store.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
 
 ; SI-LABEL: @unaligned_load_store_i32:
 ; DS_READ_U32 {{v[0-9]+}}, 0, [[REG]]
diff --git a/test/CodeGen/R600/v_cndmask.ll b/test/CodeGen/R600/v_cndmask.ll
index f8e9655..84087ee 100644
--- a/test/CodeGen/R600/v_cndmask.ll
+++ b/test/CodeGen/R600/v_cndmask.ll
@@ -3,7 +3,8 @@
 ; SI: @v_cnd_nan
 ; SI: V_CNDMASK_B32_e64 v{{[0-9]}},
 ; SI-DAG: v{{[0-9]}}
-; SI-DAG: {{nan|#QNAN}}
+; All nan values are converted to 0xffffffff
+; SI-DAG: -1
 define void @v_cnd_nan(float addrspace(1)* %out, i32 %c, float %f) {
 entry:
   %0 = icmp ne i32 %c, 0
diff --git a/test/CodeGen/R600/valu-i1.ll b/test/CodeGen/R600/valu-i1.ll
new file mode 100644
index 0000000..5d5e3ff
--- /dev/null
+++ b/test/CodeGen/R600/valu-i1.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI %s
+
+; Make sure the i1 values created by the cfg structurizer pass are
+; moved using VALU instructions
+; SI-NOT: S_MOV_B64 s[{{[0-9]:[0-9]}}], -1
+; SI: V_MOV_B32_e32 v{{[0-9]}}, -1
+define void @test_if(i32 %a, i32 %b, i32 addrspace(1)* %src, i32 addrspace(1)* %dst) {
+entry:
+  switch i32 %a, label %default [
+    i32 0, label %case0
+    i32 1, label %case1
+  ]
+
+case0:
+  %arrayidx1 = getelementptr i32 addrspace(1)* %dst, i32 %b
+  store i32 0, i32 addrspace(1)* %arrayidx1, align 4
+  br label %end
+
+case1:
+  %arrayidx5 = getelementptr i32 addrspace(1)* %dst, i32 %b
+  store i32 1, i32 addrspace(1)* %arrayidx5, align 4
+  br label %end
+
+default:
+  %cmp8 = icmp eq i32 %a, 2
+  %arrayidx10 = getelementptr i32 addrspace(1)* %dst, i32 %b
+  br i1 %cmp8, label %if, label %else
+
+if:
+  store i32 2, i32 addrspace(1)* %arrayidx10, align 4
+  br label %end
+
+else:
+  store i32 3, i32 addrspace(1)* %arrayidx10, align 4
+  br label %end
+
+end:
+  ret void
+}
diff --git a/test/CodeGen/R600/work-item-intrinsics.ll b/test/CodeGen/R600/work-item-intrinsics.ll
index 9618d7f..90079b0 100644
--- a/test/CodeGen/R600/work-item-intrinsics.ll
+++ b/test/CodeGen/R600/work-item-intrinsics.ll
@@ -19,7 +19,7 @@ entry:
 ; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
 ; R600-CHECK: MOV [[VAL]], KC0[0].Y
 ; SI-CHECK: @ngroups_y
-; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 1
+; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x1
 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
 ; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
 define void @ngroups_y (i32 addrspace(1)* %out) {
@@ -33,7 +33,7 @@ entry:
 ; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
 ; R600-CHECK: MOV [[VAL]], KC0[0].Z
 ; SI-CHECK: @ngroups_z
-; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 2
+; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x2
 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
 ; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
 define void @ngroups_z (i32 addrspace(1)* %out) {
@@ -47,7 +47,7 @@ entry:
 ; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
 ; R600-CHECK: MOV [[VAL]], KC0[0].W
 ; SI-CHECK: @global_size_x
-; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 3
+; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x3
 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
 ; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
 define void @global_size_x (i32 addrspace(1)* %out) {
@@ -61,7 +61,7 @@ entry:
 ; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
 ; R600-CHECK: MOV [[VAL]], KC0[1].X
 ; SI-CHECK: @global_size_y
-; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 4
+; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x4
 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
 ; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
 define void @global_size_y (i32 addrspace(1)* %out) {
@@ -75,7 +75,7 @@ entry:
 ; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
 ; R600-CHECK: MOV [[VAL]], KC0[1].Y
 ; SI-CHECK: @global_size_z
-; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 5
+; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x5
 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
 ; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
 define void @global_size_z (i32 addrspace(1)* %out) {
@@ -89,7 +89,7 @@ entry:
 ; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
 ; R600-CHECK: MOV [[VAL]], KC0[1].Z
 ; SI-CHECK: @local_size_x
-; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 6
+; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x6
 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
 ; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
 define void @local_size_x (i32 addrspace(1)* %out) {
@@ -103,7 +103,7 @@ entry:
 ; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
 ; R600-CHECK: MOV [[VAL]], KC0[1].W
 ; SI-CHECK: @local_size_y
-; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 7
+; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x7
 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
 ; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
 define void @local_size_y (i32 addrspace(1)* %out) {
@@ -117,7 +117,7 @@ entry:
 ; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
 ; R600-CHECK: MOV [[VAL]], KC0[2].X
 ; SI-CHECK: @local_size_z
-; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 8
+; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x8
 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
 ; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
 define void @local_size_z (i32 addrspace(1)* %out) {
diff --git a/test/CodeGen/R600/xor.ll b/test/CodeGen/R600/xor.ll
index 49ed12d..5a5c86d 100644
--- a/test/CodeGen/R600/xor.ll
+++ b/test/CodeGen/R600/xor.ll
@@ -72,3 +72,21 @@ define void @scalar_xor_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
   store i32 %result, i32 addrspace(1)* %out
   ret void
 }
+
+; SI-CHECK-LABEL: @scalar_not_i32
+; SI-CHECK: S_NOT_B32
+define void @scalar_not_i32(i32 addrspace(1)* %out, i32 %a) {
+  %result = xor i32 %a, -1
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
+
+; SI-CHECK-LABEL: @vector_not_i32
+; SI-CHECK: V_NOT_B32
+define void @vector_not_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) {
+  %a = load i32 addrspace(1)* %in0
+  %b = load i32 addrspace(1)* %in1
+  %result = xor i32 %a, -1
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/zero_extend.ll b/test/CodeGen/R600/zero_extend.ll
index a114bfc..8585d4a 100644
--- a/test/CodeGen/R600/zero_extend.ll
+++ b/test/CodeGen/R600/zero_extend.ll
@@ -6,8 +6,9 @@
 ; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW
 
 ; SI-CHECK: @test
-; SI-CHECK: V_MOV_B32_e32 v[[ZERO:[0-9]]], 0
-; SI-CHECK: BUFFER_STORE_DWORDX2 v[0:[[ZERO]]{{\]}}
+; SI-CHECK: S_MOV_B32 [[ZERO:s[0-9]]], 0
+; SI-CHECK: V_MOV_B32_e32 v[[V_ZERO:[0-9]]], [[ZERO]]
+; SI-CHECK: BUFFER_STORE_DWORDX2 v[0:[[V_ZERO]]{{\]}}
 define void @test(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
 entry:
   %0 = mul i32 %a, %b
@@ -26,3 +27,14 @@ entry:
   store i32 %1, i32 addrspace(1)* %out
   ret void
 }
+
+; SI-CHECK-LABEL: @zext_i1_to_i64
+; SI-CHECK: V_CMP_EQ_I32
+; SI-CHECK: V_CNDMASK_B32
+; SI-CHECK: S_MOV_B32 s{{[0-9]+}}, 0
+define void @zext_i1_to_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+  %cmp = icmp eq i32 %a, %b
+  %ext = zext i1 %cmp to i64
+  store i64 %ext, i64 addrspace(1)* %out, align 8
+  ret void
+}
diff --git a/test/CodeGen/SPARC/2011-01-11-FrameAddr.ll b/test/CodeGen/SPARC/2011-01-11-FrameAddr.ll
index 050b76d..1c8e7d8 100644
--- a/test/CodeGen/SPARC/2011-01-11-FrameAddr.ll
+++ b/test/CodeGen/SPARC/2011-01-11-FrameAddr.ll
@@ -60,13 +60,13 @@ declare i8* @llvm.frameaddress(i32) nounwind readnone
 define i8* @retaddr() nounwind readnone {
 entry:
 ;V8-LABEL: retaddr:
-;V8: or %g0, %o7, {{.+}}
+;V8: mov %o7, {{.+}}
 
 ;V9-LABEL: retaddr:
-;V9: or %g0, %o7, {{.+}}
+;V9: mov %o7, {{.+}}
 
 ;SPARC64-LABEL: retaddr
-;SPARC64:       or %g0, %o7, {{.+}}
+;SPARC64:       mov %o7, {{.+}}
 
   %0 = tail call i8* @llvm.returnaddress(i32 0)
   ret i8* %0
diff --git a/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll b/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll
index 60bdf06..8a3edc6 100644
--- a/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll
+++ b/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll
@@ -1,6 +1,7 @@
 ;RUN: llc -march=sparc < %s -verify-machineinstrs | FileCheck %s
 ;RUN: llc -march=sparc -O0 < %s -verify-machineinstrs | FileCheck %s -check-prefix=UNOPT
 
+target triple = "sparc-unknown-linux-gnu"
 
 define i32 @test(i32 %a) nounwind {
 entry:
@@ -59,7 +60,7 @@ entry:
 ;CHECK:      !NO_APP
 ;CHECK-NEXT: cmp
 ;CHECK-NEXT: bg
-;CHECK-NEXT: or
+;CHECK-NEXT: mov
   tail call void asm sideeffect "sethi 0, %g0", ""() nounwind
   %0 = icmp slt i32 %a, 0
   br i1 %0, label %bb, label %bb1
diff --git a/test/CodeGen/SPARC/64abi.ll b/test/CodeGen/SPARC/64abi.ll
index 3771888..a88e19a5 100644
--- a/test/CodeGen/SPARC/64abi.ll
+++ b/test/CodeGen/SPARC/64abi.ll
@@ -44,7 +44,7 @@ define void @intarg(i8  %a0,   ; %i0
 ; CHECK: sra %i0, 0, [[R:%[gilo][0-7]]]
 ; CHECK: stx [[R]], [%sp+2223]
 ; Use %o0-%o5 for outgoing arguments
-; CHECK: or %g0, 5, %o5
+; CHECK: mov 5, %o5
 ; CHECK: call intarg
 ; CHECK-NOT: add %sp
 ; CHECK: restore
@@ -208,7 +208,7 @@ define i32 @inreg_if(float inreg %a0, ; %f0
 
 ; CHECK: call_inreg_if
 ; CHECK: fmovs %f3, %f0
-; CHECK: or %g0, %i2, %o0
+; CHECK: mov %i2, %o0
 ; CHECK: call inreg_if
 define void @call_inreg_if(i32* %p, float %f3, i32 %i2) {
   %x = call i32 @inreg_if(float %f3, i32 %i2)
diff --git a/test/CodeGen/SPARC/64bit.ll b/test/CodeGen/SPARC/64bit.ll
index 7ab19f3..b18f1bc 100644
--- a/test/CodeGen/SPARC/64bit.ll
+++ b/test/CodeGen/SPARC/64bit.ll
@@ -2,11 +2,11 @@
 ; RUN: llc < %s -march=sparcv9 -mattr=+popc | FileCheck %s -check-prefix=OPT
 
 ; CHECK-LABEL: ret2:
-; CHECK: or %g0, %i1, %i0
+; CHECK: mov %i1, %i0
 
 ; OPT-LABEL: ret2:
 ; OPT: retl
-; OPT: or %g0, %o1, %o0
+; OPT: mov %o1, %o0
 define i64 @ret2(i64 %a, i64 %b) {
   ret i64 %b
 }
@@ -39,21 +39,21 @@ define i64 @sra_reg(i64 %a, i64 %b) {
 ;     restore %g0, %g0, %o0
 ;
 ; CHECK: ret_imm0
-; CHECK: or %g0, 0, %i0
+; CHECK: mov 0, %i0
 
 ; OPT: ret_imm0
 ; OPT: retl
-; OPT: or %g0, 0, %o0
+; OPT: mov 0, %o0
 define i64 @ret_imm0() {
   ret i64 0
 }
 
 ; CHECK: ret_simm13
-; CHECK: or %g0, -4096, %i0
+; CHECK: mov -4096, %i0
 
 ; OPT:   ret_simm13
 ; OPT:   retl
-; OPT:   or %g0, -4096, %o0
+; OPT:   mov -4096, %o0
 define i64 @ret_simm13() {
   ret i64 -4096
 }
diff --git a/test/CodeGen/SPARC/64cond.ll b/test/CodeGen/SPARC/64cond.ll
index 1bd17a4..e491d61 100644
--- a/test/CodeGen/SPARC/64cond.ll
+++ b/test/CodeGen/SPARC/64cond.ll
@@ -112,9 +112,9 @@ entry:
 
 ; CHECK-LABEL: setcc_resultty
 ; CHECK-DAG:       srax %i0, 63, %o0
-; CHECK-DAG:       or %g0, %i0, %o1
-; CHECK-DAG:       or %g0, 0, %o2
-; CHECK-DAG:       or %g0, 32, %o3
+; CHECK-DAG:       mov %i0, %o1
+; CHECK-DAG:       mov 0, %o2
+; CHECK-DAG:       mov 32, %o3
 ; CHECK-DAG:       call __multi3
 ; CHECK:       cmp
 ; CHECK:       movne %xcc, 1, [[R:%[gilo][0-7]]]
diff --git a/test/CodeGen/SPARC/atomics.ll b/test/CodeGen/SPARC/atomics.ll
index 4e3e7ae..5e41300 100644
--- a/test/CodeGen/SPARC/atomics.ll
+++ b/test/CodeGen/SPARC/atomics.ll
@@ -33,7 +33,7 @@ entry:
 }
 
 ; CHECK-LABEL: test_cmpxchg_i32
-; CHECK:       or  %g0, 123, [[R:%[gilo][0-7]]]
+; CHECK:       mov 123, [[R:%[gilo][0-7]]]
 ; CHECK:       cas [%o1], %o0, [[R]]
 
 define i32 @test_cmpxchg_i32(i32 %a, i32* %ptr) {
@@ -43,7 +43,7 @@ entry:
 }
 
 ; CHECK-LABEL: test_cmpxchg_i64
-; CHECK:       or  %g0, 123, [[R:%[gilo][0-7]]]
+; CHECK:       mov 123, [[R:%[gilo][0-7]]]
 ; CHECK:       casx [%o1], %o0, [[R]]
 
 define i64 @test_cmpxchg_i64(i64 %a, i64* %ptr) {
@@ -53,7 +53,7 @@ entry:
 }
 
 ; CHECK-LABEL: test_swap_i32
-; CHECK:       or  %g0, 42, [[R:%[gilo][0-7]]]
+; CHECK:       mov 42, [[R:%[gilo][0-7]]]
 ; CHECK:       swap [%o1], [[R]]
 
 define i32 @test_swap_i32(i32 %a, i32* %ptr) {
diff --git a/test/CodeGen/SPARC/exception.ll b/test/CodeGen/SPARC/exception.ll
index 3a3f59f..eca9c8b 100644
--- a/test/CodeGen/SPARC/exception.ll
+++ b/test/CodeGen/SPARC/exception.ll
@@ -1,9 +1,7 @@
 ; RUN: llc < %s -march=sparc   -relocation-model=static | FileCheck -check-prefix=V8ABS %s
 ; RUN: llc < %s -march=sparc   -relocation-model=pic    | FileCheck -check-prefix=V8PIC %s
-; RUN: llc < %s -march=sparc   -relocation-model=pic -disable-cfi    | FileCheck -check-prefix=V8PIC_NOCFI %s
 ; RUN: llc < %s -march=sparcv9 -relocation-model=static | FileCheck -check-prefix=V9ABS %s
 ; RUN: llc < %s -march=sparcv9 -relocation-model=pic    | FileCheck -check-prefix=V9PIC %s
-; RUN: llc < %s -march=sparcv9 -relocation-model=pic -disable-cfi    | FileCheck -check-prefix=V9PIC_NOCFI %s
 
 
 %struct.__fundamental_type_info_pseudo = type { %struct.__type_info_pseudo }
@@ -47,22 +45,6 @@
 ; V8PIC: .L_ZTIi.DW.stub:
 ; V8PIC-NEXT:   .word _ZTIi
 
-; V8PIC_NOCFI-LABEL: main:
-; V8PIC_NOCFI:        .section .gcc_except_table
-; V8PIC_NOCFI-NOT:    .section
-; V8PIC_NOCFI:        .word %r_disp32(.L_ZTIi.DW.stub)
-; V8PIC_NOCFI:        .data
-; V8PIC_NOCFI: .L_ZTIi.DW.stub:
-; V8PIC_NOCFI-NEXT:   .word _ZTIi
-; V8PIC_NOCFI:        .section .eh_frame
-; V8PIC_NOCFI-NOT:    .section
-; V8PIC_NOCFI:        .byte 15                     ! CIE Return Address Column
-; V8PIC_NOCFI:        .word %r_disp32(DW.ref.__gxx_personality_v0)
-; V8PIC_NOCFI:        .byte 12                     ! DW_CFA_def_cfa
-; V8PIC_NOCFI:        .byte 14                     ! Reg 14
-; V8PIC_NOCFI-NEXT:   .byte 0                      ! Offset 0
-; V8PIC_NOCFI:        .word %r_disp32(.Ltmp{{.+}}) ! FDE initial location
-
 
 ; V9ABS-LABEL: main:
 ; V9ABS:        .cfi_startproc
@@ -89,22 +71,6 @@
 ; V9PIC: .L_ZTIi.DW.stub:
 ; V9PIC-NEXT:   .xword _ZTIi
 
-; V9PIC_NOCFI-LABEL: main:
-; V9PIC_NOCFI:        .section .gcc_except_table
-; V9PIC_NOCFI-NOT:    .section
-; V9PIC_NOCFI:        .word %r_disp32(.L_ZTIi.DW.stub)
-; V9PIC_NOCFI:        .data
-; V9PIC_NOCFI: .L_ZTIi.DW.stub:
-; V9PIC_NOCFI-NEXT:   .xword _ZTIi
-; V9PIC_NOCFI:        .section .eh_frame
-; V9PIC_NOCFI-NOT:    .section
-; V9PIC_NOCFI:        .byte 15                     ! CIE Return Address Column
-; V9PIC_NOCFI:        .word %r_disp32(DW.ref.__gxx_personality_v0)
-; V9PIC_NOCFI:        .byte 12                     ! DW_CFA_def_cfa
-; V9PIC_NOCFI-NEXT:   .byte 14                     ! Reg 14
-; V9PIC_NOCFI:        .ascii "\377\017"            ! Offset 2047
-; V9PIC_NOCFI:        .word %r_disp32(.Ltmp{{.+}}) ! FDE initial location
-
 define i32 @main(i32 %argc, i8** nocapture readnone %argv) unnamed_addr #0 {
 entry:
   %0 = icmp eq i32 %argc, 2
diff --git a/test/CodeGen/SPARC/leafproc.ll b/test/CodeGen/SPARC/leafproc.ll
index 963fac0..abb8ed9 100644
--- a/test/CodeGen/SPARC/leafproc.ll
+++ b/test/CodeGen/SPARC/leafproc.ll
@@ -11,7 +11,7 @@ entry:
 
 ; CHECK-LABEL:      return_int_const:
 ; CHECK:      retl
-; CHECK-NEXT: or %g0, 1729, %o0
+; CHECK-NEXT: mov 1729, %o0
 define i32 @return_int_const() {
 entry:
   ret i32 1729
@@ -58,9 +58,9 @@ entry:
 
 ; CHECK-LABEL:      leaf_proc_with_local_array:
 ; CHECK:      add %sp, -104, %sp
-; CHECK:      or %g0, 1, [[R1:%[go][0-7]]]
+; CHECK:      mov 1, [[R1:%[go][0-7]]]
 ; CHECK:      st [[R1]], [%sp+96]
-; CHECK:      or %g0, 2, [[R2:%[go][0-7]]]
+; CHECK:      mov 2, [[R2:%[go][0-7]]]
 ; CHECK:      st [[R2]], [%sp+100]
 ; CHECK:      ld {{.+}}, %o0
 ; CHECK:      retl
diff --git a/test/CodeGen/SPARC/parts.ll b/test/CodeGen/SPARC/parts.ll
index 57add49..47feb15 100644
--- a/test/CodeGen/SPARC/parts.ll
+++ b/test/CodeGen/SPARC/parts.ll
@@ -2,10 +2,10 @@
   
 ; CHECK-LABEL: test
 ; CHECK:        srl %i1, 0, %o2
-; CHECK-NEXT:   or %g0, %i2, %o0
+; CHECK-NEXT:   mov %i2, %o0
 ; CHECK-NEXT:   call __ashlti3
-; CHECK-NEXT:   or %g0, %i3, %o1
-; CHECK-NEXT:   or %g0, %o0, %i0
+; CHECK-NEXT:   mov %i3, %o1
+; CHECK-NEXT:   mov %o0, %i0
   
 define i128 @test(i128 %a, i128 %b) {
 entry:
diff --git a/test/CodeGen/SPARC/sret-secondary.ll b/test/CodeGen/SPARC/sret-secondary.ll
new file mode 100644
index 0000000..4efcabf
--- /dev/null
+++ b/test/CodeGen/SPARC/sret-secondary.ll
@@ -0,0 +1,8 @@
+; RUN: not llc -march=sparc < %s -o /dev/null 2>&1 | FileCheck %s
+
+; CHECK: sparc only supports sret on the first parameter
+
+define void @foo(i32 %a, i32* sret %out) {
+  store i32 %a, i32* %out
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/alias-01.ll b/test/CodeGen/SystemZ/alias-01.ll
index dc90481..8839aad 100644
--- a/test/CodeGen/SystemZ/alias-01.ll
+++ b/test/CodeGen/SystemZ/alias-01.ll
@@ -2,9 +2,6 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
-; The use of TBAA in CodeGen has been temporarily disabled pending correctness fixes.
-; XFAIL: *
-
 ; Check that there are no spills.
 define void @f1(<16 x i32> *%src1, <16 x float> *%dest) {
 ; CHECK-LABEL: f1:
diff --git a/test/CodeGen/Thumb/2009-06-18-ThumbCommuteMul.ll b/test/CodeGen/Thumb/2009-06-18-ThumbCommuteMul.ll
index 5c883b3..ca6df7c 100644
--- a/test/CodeGen/Thumb/2009-06-18-ThumbCommuteMul.ll
+++ b/test/CodeGen/Thumb/2009-06-18-ThumbCommuteMul.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb | grep r0 | count 1
+; RUN: llc -mtriple=thumb-eabi %s -o - | FileCheck %s
 
 define i32 @a(i32 %x, i32 %y) nounwind readnone {
 entry:
@@ -6,3 +6,5 @@ entry:
 	ret i32 %mul
 }
 
+; CHECK: r0
+
diff --git a/test/CodeGen/Thumb/2010-06-18-SibCallCrash.ll b/test/CodeGen/Thumb/2010-06-18-SibCallCrash.ll
index ad8b064..e1efd3b 100644
--- a/test/CodeGen/Thumb/2010-06-18-SibCallCrash.ll
+++ b/test/CodeGen/Thumb/2010-06-18-SibCallCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=thumb < %s
+; RUN: llc -mtriple=thumb-eabi %s -o /dev/null
 ; rdar://8104457
 
 define arm_apcscc void @t(i32* %m) nounwind {
diff --git a/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll b/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
index b87bf24..ffc9584 100644
--- a/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
+++ b/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
@@ -151,5 +151,5 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !98 = metadata !{i32 52, i32 0, metadata !1, null}
 !101 = metadata !{metadata !"ggEdgeDiscrepancy.cc", metadata !"/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src"}
 !102 = metadata !{i32 0}
-!103 = metadata !{metadata !3}
+!103 = metadata !{metadata !3, metadata !77}
 !104 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/CodeGen/Thumb/DbgValueOtherTargets.test b/test/CodeGen/Thumb/DbgValueOtherTargets.test
index afb18a4..557892b 100644
--- a/test/CodeGen/Thumb/DbgValueOtherTargets.test
+++ b/test/CodeGen/Thumb/DbgValueOtherTargets.test
@@ -1 +1 @@
-RUN: llc -O0 -march=thumb -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll
+RUN: llc -O0 -mtriple=thumb-eabi -asm-verbose %S/../Inputs/DbgValueOtherTargets.ll -o - | FileCheck %S/../Inputs/DbgValueOtherTargets.ll
diff --git a/test/CodeGen/Thumb/barrier.ll b/test/CodeGen/Thumb/barrier.ll
index 1c27fa0..92d9bb2 100644
--- a/test/CodeGen/Thumb/barrier.ll
+++ b/test/CodeGen/Thumb/barrier.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -mtriple=thumbv6-apple-darwin  | FileCheck %s -check-prefix=V6
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=-db | FileCheck %s -check-prefix=V6
-; RUN: llc < %s -march=thumb -mcpu=cortex-m0   | FileCheck %s -check-prefix=V6M
+; RUN: llc -mtriple=thumbv6-apple-darwin %s -o - | FileCheck %s -check-prefix=V6
+; RUN: llc -mtriple=thumbv7-apple-darwin -mattr=-db %s -o - | FileCheck %s -check-prefix=V6
+; RUN: llc -mtriple=thumb-eabi -mcpu=cortex-m0 %s -o - | FileCheck %s -check-prefix=V6M
 
 define void @t1() {
 ; V6-LABEL: t1:
diff --git a/test/CodeGen/Thumb/dyn-stackalloc.ll b/test/CodeGen/Thumb/dyn-stackalloc.ll
index 6c6de55..6bc39af 100644
--- a/test/CodeGen/Thumb/dyn-stackalloc.ll
+++ b/test/CodeGen/Thumb/dyn-stackalloc.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-cgp-branch-opts -disable-post-ra | FileCheck %s
-; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-cgp-branch-opts -disable-post-ra -regalloc=basic | FileCheck %s
+; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-cgp-branch-opts -disable-post-ra | FileCheck %s -check-prefix=CHECK -check-prefix=RA_GREEDY
+; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-cgp-branch-opts -disable-post-ra -regalloc=basic | FileCheck %s -check-prefix=CHECK -check-prefix=RA_BASIC
 
 	%struct.state = type { i32, %struct.info*, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* }
 	%struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* }
@@ -45,7 +45,8 @@ define void @t2(%struct.comment* %vc, i8* %tag, i8* %contents) {
 ; CHECK: sub sp, #
 ; CHECK: mov r[[R0:[0-9]+]], sp
 ; CHECK: str r{{[0-9+]}}, [r[[R0]]
-; CHECK: str r{{[0-9+]}}, [r[[R0]]
+; RA_GREEDY: str r{{[0-9+]}}, [r[[R0]]
+; RA_BASIC: stm r[[R0]]!
 ; CHECK-NOT: ldr r0, [sp
 ; CHECK: mov r[[R1:[0-9]+]], sp
 ; CHECK: subs r[[R2:[0-9]+]], r[[R1]], r{{[0-9]+}}
diff --git a/test/CodeGen/Thumb/fpconv.ll b/test/CodeGen/Thumb/fpconv.ll
index 7da36dd..0ade798 100644
--- a/test/CodeGen/Thumb/fpconv.ll
+++ b/test/CodeGen/Thumb/fpconv.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb
+; RUN: llc -mtriple=thumb-eabi %s -o /dev/null
 
 define float @f1(double %x) {
 entry:
diff --git a/test/CodeGen/Thumb/fpow.ll b/test/CodeGen/Thumb/fpow.ll
index be3dc0b..18b1c91 100644
--- a/test/CodeGen/Thumb/fpow.ll
+++ b/test/CodeGen/Thumb/fpow.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb
+; RUN: llc -mtriple=thumb-eabi %s -o /dev/null
 
 define double @t(double %x, double %y) nounwind optsize {
 entry:
diff --git a/test/CodeGen/Thumb/inlineasm-imm-thumb.ll b/test/CodeGen/Thumb/inlineasm-imm-thumb.ll
index d557b9d..4e4f8fa 100644
--- a/test/CodeGen/Thumb/inlineasm-imm-thumb.ll
+++ b/test/CodeGen/Thumb/inlineasm-imm-thumb.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -no-integrated-as
+; RUN: llc -mtriple=thumb-eabi -no-integrated-as %s -o /dev/null
 
 ; Test Thumb-mode "I" constraint, for ADD immediate.
 define i32 @testI(i32 %x) {
diff --git a/test/CodeGen/Thumb/inlineasm-thumb.ll b/test/CodeGen/Thumb/inlineasm-thumb.ll
index f2683c8..2547ce8 100644
--- a/test/CodeGen/Thumb/inlineasm-thumb.ll
+++ b/test/CodeGen/Thumb/inlineasm-thumb.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=thumb | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi %s -o - | FileCheck %s
+
 define i32 @t1(i32 %x, i32 %y) nounwind {
 entry:
   ; CHECK: mov r0, r12
diff --git a/test/CodeGen/Thumb/ispositive.ll b/test/CodeGen/Thumb/ispositive.ll
index 7b28227..8d39687 100644
--- a/test/CodeGen/Thumb/ispositive.ll
+++ b/test/CodeGen/Thumb/ispositive.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi %s -o - | FileCheck %s
 
 define i32 @test1(i32 %X) {
 entry:
diff --git a/test/CodeGen/Thumb/ldr_ext.ll b/test/CodeGen/Thumb/ldr_ext.ll
index 9a28124..2d25af3 100644
--- a/test/CodeGen/Thumb/ldr_ext.ll
+++ b/test/CodeGen/Thumb/ldr_ext.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=thumb | FileCheck %s -check-prefix=V5
-; RUN: llc < %s -march=thumb -mattr=+v6 | FileCheck %s -check-prefix=V6
+; RUN: llc -mtriple=thumb-eabi %s -o - | FileCheck %s -check-prefix=V5
+; RUN: llc -mtriple=thumb-eabi -mattr=+v6 %s -o - | FileCheck %s -check-prefix=V6
 
 ; rdar://7176514
 
diff --git a/test/CodeGen/Thumb/ldr_frame.ll b/test/CodeGen/Thumb/ldr_frame.ll
index 6c58638..0e879d7 100644
--- a/test/CodeGen/Thumb/ldr_frame.ll
+++ b/test/CodeGen/Thumb/ldr_frame.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi %s -o - | FileCheck %s
 
 define i32 @f1() {
 ; CHECK-LABEL: f1:
diff --git a/test/CodeGen/Thumb/long-setcc.ll b/test/CodeGen/Thumb/long-setcc.ll
index 8f2d98f..3460edb 100644
--- a/test/CodeGen/Thumb/long-setcc.ll
+++ b/test/CodeGen/Thumb/long-setcc.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=thumb | grep cmp | count 1
-
+; RUN: llc -mtriple=thumb-eabi %s -o - | FileCheck %s
 
 define i1 @t1(i64 %x) {
 	%B = icmp slt i64 %x, 0
@@ -15,3 +14,9 @@ define i1 @t3(i32 %x) {
 	%tmp = icmp ugt i32 %x, -1
 	ret i1 %tmp
 }
+
+; CHECK: cmp
+; CHECK-NOT: cmp
+
+
+
diff --git a/test/CodeGen/Thumb/long.ll b/test/CodeGen/Thumb/long.ll
index 197e19e..2449e5a 100644
--- a/test/CodeGen/Thumb/long.ll
+++ b/test/CodeGen/Thumb/long.ll
@@ -1,10 +1,5 @@
-; RUN: llc < %s -march=thumb | \
-; RUN:   grep mvn | count 1
-; RUN: llc < %s -march=thumb | \
-; RUN:   grep adc | count 1
-; RUN: llc < %s -march=thumb | \
-; RUN:   grep sbc | count 1
-; RUN: llc < %s -mtriple=thumb-apple-darwin | grep __muldi3
+; RUN: llc -mtriple=thumb-eabi %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumb-apple-darwin %s -o - | FileCheck %s -check-prefix CHECK-DARWIN
 
 define i64 @f1() {
 entry:
@@ -74,3 +69,14 @@ entry:
         ret i64 %retval
 }
 
+; CHECK: mvn
+; CHECK-NOT: mvn
+
+; CHECK: adc
+; CHECK-NOT: adc
+
+; CHECK: sbc
+; CHECK-NOT: sbc
+
+; CHECK-DARWIN: __muldi3
+
diff --git a/test/CodeGen/Thumb/long_shift.ll b/test/CodeGen/Thumb/long_shift.ll
index 2431714..6aa1afd 100644
--- a/test/CodeGen/Thumb/long_shift.ll
+++ b/test/CodeGen/Thumb/long_shift.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb
+; RUN: llc -mtriple=thumb-eabi %s -o /dev/null
 
 define i64 @f0(i64 %A, i64 %B) {
         %tmp = bitcast i64 %A to i64
diff --git a/test/CodeGen/Thumb/mul.ll b/test/CodeGen/Thumb/mul.ll
index c1a2fb2..13a2cfb 100644
--- a/test/CodeGen/Thumb/mul.ll
+++ b/test/CodeGen/Thumb/mul.ll
@@ -1,22 +1,32 @@
-; RUN: llc < %s -march=thumb | grep mul | count 3
-; RUN: llc < %s -march=thumb | grep lsl | count 1
+; RUN: llc -mtriple=thumb-eabi %s -o - | FileCheck %s
 
 define i32 @f1(i32 %u) {
     %tmp = mul i32 %u, %u
     ret i32 %tmp
 }
 
+; CHECK: mul{{s?}}
+
 define i32 @f2(i32 %u, i32 %v) {
     %tmp = mul i32 %u, %v
     ret i32 %tmp
 }
 
+; CHECK: mul{{s?}}
+
 define i32 @f3(i32 %u) {
     %tmp = mul i32 %u, 5
     ret i32 %tmp
 }
 
+; CHECK: mul{{s?}}
+
 define i32 @f4(i32 %u) {
     %tmp = mul i32 %u, 4
     ret i32 %tmp
 }
+
+; CHECK: lsl
+; CHECK-NOT: mul{{s?}}
+; CHECK-NOT: lsl
+
diff --git a/test/CodeGen/Thumb/rev.ll b/test/CodeGen/Thumb/rev.ll
index dcba00e..3e94702 100644
--- a/test/CodeGen/Thumb/rev.ll
+++ b/test/CodeGen/Thumb/rev.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mattr=+v6 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mattr=+v6 %s -o - | FileCheck %s
 
 define i32 @test1(i32 %X) nounwind {
 ; CHECK: test1
diff --git a/test/CodeGen/Thumb/segmented-stacks-dynamic.ll b/test/CodeGen/Thumb/segmented-stacks-dynamic.ll
index 067c07b..5d51f40 100644
--- a/test/CodeGen/Thumb/segmented-stacks-dynamic.ll
+++ b/test/CodeGen/Thumb/segmented-stacks-dynamic.ll
@@ -1,12 +1,12 @@
-; RUN: llc < %s -mtriple=thumb-linux-unknown-gnueabi -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=Thumb-linux
-; RUN: llc < %s -mtriple=thumb-linux-androideabi -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=Thumb-android
-; RUN: llc < %s -mtriple=thumb-linux-unknown-gnueabi -segmented-stacks -filetype=obj
-; RUN: llc < %s -mtriple=thumb-linux-androideabi -segmented-stacks -filetype=obj
+; RUN: llc < %s -mtriple=thumb-linux-unknown-gnueabi -verify-machineinstrs | FileCheck %s -check-prefix=Thumb-linux
+; RUN: llc < %s -mtriple=thumb-linux-androideabi -verify-machineinstrs | FileCheck %s -check-prefix=Thumb-android
+; RUN: llc < %s -mtriple=thumb-linux-unknown-gnueabi -filetype=obj
+; RUN: llc < %s -mtriple=thumb-linux-androideabi -filetype=obj
 
 ; Just to prevent the alloca from being optimized away
 declare void @dummy_use(i32*, i32)
 
-define i32 @test_basic(i32 %l) {
+define i32 @test_basic(i32 %l) #0 {
         %mem = alloca i32, i32 %l
         call void @dummy_use (i32* %mem, i32 %l)
         %terminate = icmp eq i32 %l, 0
@@ -61,3 +61,5 @@ false:
 ; Thumb-android:      pop {r4, r5}
 
 }
+
+attributes #0 = { "split-stack" }
diff --git a/test/CodeGen/Thumb/segmented-stacks.ll b/test/CodeGen/Thumb/segmented-stacks.ll
index 5649b00..d6e25c7 100644
--- a/test/CodeGen/Thumb/segmented-stacks.ll
+++ b/test/CodeGen/Thumb/segmented-stacks.ll
@@ -1,13 +1,13 @@
-; RUN: llc < %s -mtriple=thumb-linux-androideabi -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=Thumb-android
-; RUN: llc < %s -mtriple=thumb-linux-unknown-gnueabi -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=Thumb-linux
-; RUN: llc < %s -mtriple=thumb-linux-androideabi -segmented-stacks -filetype=obj
-; RUN: llc < %s -mtriple=thumb-linux-unknown-gnueabi -segmented-stacks -filetype=obj
+; RUN: llc < %s -mtriple=thumb-linux-androideabi -verify-machineinstrs | FileCheck %s -check-prefix=Thumb-android
+; RUN: llc < %s -mtriple=thumb-linux-unknown-gnueabi -verify-machineinstrs | FileCheck %s -check-prefix=Thumb-linux
+; RUN: llc < %s -mtriple=thumb-linux-androideabi -filetype=obj
+; RUN: llc < %s -mtriple=thumb-linux-unknown-gnueabi -filetype=obj
 
 
 ; Just to prevent the alloca from being optimized away
 declare void @dummy_use(i32*, i32)
 
-define void @test_basic() {
+define void @test_basic() #0 {
         %mem = alloca i32, i32 10
         call void @dummy_use (i32* %mem, i32 10)
 	ret void
@@ -54,9 +54,11 @@ define void @test_basic() {
 
 }
 
-define i32 @test_nested(i32 * nest %closure, i32 %other) {
+define i32 @test_nested(i32 * nest %closure, i32 %other) #0 {
        %addend = load i32 * %closure
        %result = add i32 %other, %addend
+       %mem = alloca i32, i32 10
+       call void @dummy_use (i32* %mem, i32 10)
        ret i32 %result
 
 ; Thumb-android:      test_nested:
@@ -68,7 +70,7 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) {
 ; Thumb-android-NEXT: cmp     r4, r5
 ; Thumb-android-NEXT: blo     .LBB1_2
 
-; Thumb-android:      mov     r4, #0
+; Thumb-android:      mov     r4, #56
 ; Thumb-android-NEXT: mov     r5, #0
 ; Thumb-android-NEXT: push    {lr}
 ; Thumb-android-NEXT: bl      __morestack
@@ -88,7 +90,7 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) {
 ; Thumb-linux-NEXT: cmp     r4, r5
 ; Thumb-linux-NEXT: blo     .LBB1_2
 
-; Thumb-linux:      mov     r4, #0
+; Thumb-linux:      mov     r4, #56
 ; Thumb-linux-NEXT: mov     r5, #0
 ; Thumb-linux-NEXT: push    {lr}
 ; Thumb-linux-NEXT: bl      __morestack
@@ -101,7 +103,7 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) {
 
 }
 
-define void @test_large() {
+define void @test_large() #0 {
         %mem = alloca i32, i32 10000
         call void @dummy_use (i32* %mem, i32 0)
         ret void
@@ -150,7 +152,7 @@ define void @test_large() {
 
 }
 
-define fastcc void @test_fastcc() {
+define fastcc void @test_fastcc() #0 {
         %mem = alloca i32, i32 10
         call void @dummy_use (i32* %mem, i32 10)
         ret void
@@ -197,7 +199,7 @@ define fastcc void @test_fastcc() {
 
 }
 
-define fastcc void @test_fastcc_large() {
+define fastcc void @test_fastcc_large() #0 {
         %mem = alloca i32, i32 10000
         call void @dummy_use (i32* %mem, i32 0)
         ret void
@@ -245,3 +247,15 @@ define fastcc void @test_fastcc_large() {
 ; Thumb-linux:      pop     {r4, r5}
 
 }
+
+define void @test_nostack() #0 {
+	ret void
+
+; Thumb-android-LABEL: test_nostack:
+; Thumb-android-NOT:   bl __morestack
+
+; Thumb-linux-LABEL: test_nostack:
+; Thumb-linux-NOT:   bl __morestack
+}
+
+attributes #0 = { "split-stack" }
diff --git a/test/CodeGen/Thumb/stack-coloring-without-frame-ptr.ll b/test/CodeGen/Thumb/stack-coloring-without-frame-ptr.ll
index 3f6407a..97c66d9 100644
--- a/test/CodeGen/Thumb/stack-coloring-without-frame-ptr.ll
+++ b/test/CodeGen/Thumb/stack-coloring-without-frame-ptr.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1022e
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1022e %s -o /dev/null
 
 %iterator = type { i8**, i8**, i8**, i8*** }
 %insert_iterator = type { %deque*, %iterator }
diff --git a/test/CodeGen/Thumb/stack-frame.ll b/test/CodeGen/Thumb/stack-frame.ll
index b103b33..09d480a 100644
--- a/test/CodeGen/Thumb/stack-frame.ll
+++ b/test/CodeGen/Thumb/stack-frame.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=thumb
-; RUN: llc < %s -march=thumb | grep add | count 1
+; RUN: llc -mtriple=thumb-eabi < %s -o - | FileCheck %s
 
 define void @f1() {
 	%c = alloca i8, align 1
@@ -10,4 +9,6 @@ define i32 @f2() {
 	ret i32 1
 }
 
+; CHECK: add
+; CHECK-NOT: add
 
diff --git a/test/CodeGen/Thumb/thumb-imm.ll b/test/CodeGen/Thumb/thumb-imm.ll
index 74a57ff..592e694 100644
--- a/test/CodeGen/Thumb/thumb-imm.ll
+++ b/test/CodeGen/Thumb/thumb-imm.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=thumb | not grep CPI
-
+; RUN: llc -mtriple=thumb-eabi %s -o - | FileCheck %s
 
 define i32 @test1() {
   ret i32 1000
@@ -8,3 +7,6 @@ define i32 @test1() {
 define i32 @test2() {
   ret i32 -256
 }
+
+; CHECK-NOT: CPI
+
diff --git a/test/CodeGen/Thumb/thumb-ldm.ll b/test/CodeGen/Thumb/thumb-ldm.ll
new file mode 100644
index 0000000..dd98e6f
--- /dev/null
+++ b/test/CodeGen/Thumb/thumb-ldm.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -mtriple=thumbv6m-eabi -o - | FileCheck %s
+
+@X = external global [0 x i32]          ; <[0 x i32]*> [#uses=5]
+
+define i32 @t1() {
+; CHECK-LABEL: t1:
+; CHECK: push {r7, lr}
+; CHECK: ldm
+; CHECK: pop {r7, pc}
+        %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 0)            ; <i32> [#uses=1]
+        %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)           ; <i32> [#uses=1]
+        %tmp4 = call i32 @f1( i32 %tmp, i32 %tmp3 )                ; <i32> [#uses=1]
+        ret i32 %tmp4
+}
+
+define i32 @t2() {
+; CHECK-LABEL: t2:
+; CHECK: push {r7, lr}
+; CHECK: ldm
+; CHECK: pop {r7, pc}
+        %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2)            ; <i32> [#uses=1]
+        %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3)           ; <i32> [#uses=1]
+        %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 4)           ; <i32> [#uses=1]
+        %tmp6 = call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 )             ; <i32> [#uses=1]
+        ret i32 %tmp6
+}
+
+define i32 @t3() {
+; CHECK-LABEL: t3:
+; CHECK: push {r7, lr}
+; CHECK: ldm
+; CHECK: pop {r7, pc}
+        %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)            ; <i32> [#uses=1]
+        %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2)           ; <i32> [#uses=1]
+        %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3)           ; <i32> [#uses=1]
+        %tmp6 = call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 )             ; <i32> [#uses=1]
+        ret i32 %tmp6
+}
+
+declare i32 @f1(i32, i32)
+
+declare i32 @f2(i32, i32, i32)
diff --git a/test/CodeGen/Thumb/thumb-memcpy-ldm-stm.ll b/test/CodeGen/Thumb/thumb-memcpy-ldm-stm.ll
new file mode 100644
index 0000000..06cfd9b
--- /dev/null
+++ b/test/CodeGen/Thumb/thumb-memcpy-ldm-stm.ll
@@ -0,0 +1,37 @@
+; RUN: llc -mtriple=thumbv6m-eabi %s -o - | FileCheck %s
+
+@d = external global [64 x i32]
+@s = external global [64 x i32]
+
+; Function Attrs: nounwind
+define void @t1() #0 {
+entry:
+; CHECK: ldr [[REG0:r[0-9]]],
+; CHECK: ldm [[REG0]]!,
+; CHECK: ldr [[REG1:r[0-9]]],
+; CHECK: stm [[REG1]]!,
+; CHECK: subs [[REG0]], #32
+; CHECK-NEXT: ldrb
+; CHECK: subs [[REG1]], #32
+; CHECK-NEXT: strb
+    tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([64 x i32]* @s to i8*), i8* bitcast ([64 x i32]* @d to i8*), i32 33, i32 4, i1 false)
+    ret void
+}
+
+; Function Attrs: nounwind
+define void @t2() #0 {
+entry:
+; CHECK: ldr [[REG0:r[0-9]]],
+; CHECK: ldm [[REG0]]!,
+; CHECK: ldr [[REG1:r[0-9]]],
+; CHECK: stm [[REG1]]!,
+; CHECK: ldrh
+; CHECK: ldrb
+; CHECK: strb
+; CHECK: strh
+    tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([64 x i32]* @s to i8*), i8* bitcast ([64 x i32]* @d to i8*), i32 15, i32 4, i1 false)
+    ret void
+}
+
+; Function Attrs: nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1
diff --git a/test/CodeGen/Thumb/trap.ll b/test/CodeGen/Thumb/trap.ll
index e04059c..7d2f6f1 100644
--- a/test/CodeGen/Thumb/trap.ll
+++ b/test/CodeGen/Thumb/trap.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi %s -o - | FileCheck %s
 ; rdar://7961298
 
 define void @t() nounwind {
diff --git a/test/CodeGen/Thumb/tst_teq.ll b/test/CodeGen/Thumb/tst_teq.ll
index 21ada3e..2b6d9a3 100644
--- a/test/CodeGen/Thumb/tst_teq.ll
+++ b/test/CodeGen/Thumb/tst_teq.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb | grep tst
+; RUN: llc -mtriple=thumb-eabi %s -o - | FileCheck %s
 
 define i32 @f(i32 %a) {
 entry:
@@ -15,3 +15,6 @@ entry:
 	%retval = select i1 %0, i32 20, i32 10		; <i32> [#uses=1]
 	ret i32 %retval
 }
+
+; CHECK: tst
+
diff --git a/test/CodeGen/Thumb/vargs.ll b/test/CodeGen/Thumb/vargs.ll
index 50a1a07..4078b01 100644
--- a/test/CodeGen/Thumb/vargs.ll
+++ b/test/CodeGen/Thumb/vargs.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=thumb
-; RUN: llc < %s -mtriple=thumb-linux | grep pop | count 2
-; RUN: llc < %s -mtriple=thumb-darwin | grep pop | count 2
+; RUN: llc -mtriple=thumb-eabi %s -o /dev/null
+; RUN: llc -mtriple=thumb-linux %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumb-darwin %s -o - | FileCheck %s
 
 @str = internal constant [4 x i8] c"%d\0A\00"           ; <[4 x i8]*> [#uses=1]
 
@@ -34,3 +34,8 @@ declare void @llvm.va_start(i8*)
 declare i32 @printf(i8*, ...)
 
 declare void @llvm.va_end(i8*)
+
+; CHECK: pop
+; CHECK: pop
+; CHECK-NOT: pop
+
diff --git a/test/CodeGen/Thumb2/bfi.ll b/test/CodeGen/Thumb2/bfi.ll
index 3612e27..4f056d5 100644
--- a/test/CodeGen/Thumb2/bfi.ll
+++ b/test/CodeGen/Thumb2/bfi.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=thumb -mattr=+v6t2 < %s | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mattr=+v6t2 %s -o - | FileCheck %s
 
 %struct.F = type { [3 x i8], i8 }
 
diff --git a/test/CodeGen/Thumb2/bfx.ll b/test/CodeGen/Thumb2/bfx.ll
index e380b8f..9bd8d70 100644
--- a/test/CodeGen/Thumb2/bfx.ll
+++ b/test/CodeGen/Thumb2/bfx.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 define i32 @sbfx1(i32 %a) {
 ; CHECK: sbfx1
diff --git a/test/CodeGen/Thumb2/carry.ll b/test/CodeGen/Thumb2/carry.ll
index 48fba4e..26622e2 100644
--- a/test/CodeGen/Thumb2/carry.ll
+++ b/test/CodeGen/Thumb2/carry.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 define i64 @f1(i64 %a, i64 %b) {
 entry:
diff --git a/test/CodeGen/Thumb2/div.ll b/test/CodeGen/Thumb2/div.ll
index e783c88..b273a89 100644
--- a/test/CodeGen/Thumb2/div.ll
+++ b/test/CodeGen/Thumb2/div.ll
@@ -1,10 +1,10 @@
-; RUN: llc < %s -mtriple=thumb-apple-darwin -mcpu=arm1156t2-s -mattr=+thumb2 \
+; RUN: llc -mtriple=thumb-apple-darwin -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - \
 ; RUN:    | FileCheck %s -check-prefix=CHECK-THUMB
-; RUN: llc < %s -march=thumb -mcpu=cortex-m3 -mattr=+thumb2 \
+; RUN: llc -mtriple=thumb-apple-darwin -mcpu=cortex-m3 -mattr=+thumb2 %s -o - \
 ; RUN:    | FileCheck %s -check-prefix=CHECK-THUMBV7M
-; RUN: llc < %s -march=thumb -mcpu=swift \
+; RUN: llc -mtriple=thumb-apple-darwin -mcpu=swift %s -o - \
 ; RUN:    | FileCheck %s -check-prefix=CHECK-HWDIV
-; RUN: llc < %s -march=thumb -mcpu=cortex-r5 \
+; RUN: llc -mtriple=thumb-apple-darwin -mcpu=cortex-r5 %s -o - \
 ; RUN:    | FileCheck %s -check-prefix=CHECK-HWDIV
 
 define i32 @f1(i32 %a, i32 %b) {
diff --git a/test/CodeGen/Thumb2/ifcvt-neon.ll b/test/CodeGen/Thumb2/ifcvt-neon.ll
index 6832053..501b0b6 100644
--- a/test/CodeGen/Thumb2/ifcvt-neon.ll
+++ b/test/CodeGen/Thumb2/ifcvt-neon.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s
 ; rdar://7368193
 
 @a = common global float 0.000000e+00             ; <float*> [#uses=2]
diff --git a/test/CodeGen/Thumb2/longMACt.ll b/test/CodeGen/Thumb2/longMACt.ll
index abe65f2..7322d0f 100644
--- a/test/CodeGen/Thumb2/longMACt.ll
+++ b/test/CodeGen/Thumb2/longMACt.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 ; Check generated signed and unsigned multiply accumulate long.
 
 define i64 @MACLongTest1(i32 %a, i32 %b, i64 %c) {
diff --git a/test/CodeGen/Thumb2/mul_const.ll b/test/CodeGen/Thumb2/mul_const.ll
index 41de477..7064798 100644
--- a/test/CodeGen/Thumb2/mul_const.ll
+++ b/test/CodeGen/Thumb2/mul_const.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 ; rdar://7069502
 
 define i32 @t1(i32 %v) nounwind readnone {
diff --git a/test/CodeGen/Thumb2/segmented-stacks.ll b/test/CodeGen/Thumb2/segmented-stacks.ll
index 602fc84..38bf915 100644
--- a/test/CodeGen/Thumb2/segmented-stacks.ll
+++ b/test/CodeGen/Thumb2/segmented-stacks.ll
@@ -1,11 +1,11 @@
-; RUN: llc < %s -mtriple=thumb-linux-androideabi -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=Thumb-android
-; RUN: llc < %s -mtriple=thumb-linux-androideabi -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 -segmented-stacks -filetype=obj
+; RUN: llc < %s -mtriple=thumb-linux-androideabi -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 -verify-machineinstrs | FileCheck %s -check-prefix=Thumb-android
+; RUN: llc < %s -mtriple=thumb-linux-androideabi -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 -filetype=obj
 
 
 ; Just to prevent the alloca from being optimized away
 declare void @dummy_use(i32*, i32)
 
-define void @test_basic() {
+define void @test_basic() #0 {
         %mem = alloca i32, i32 10
         call void @dummy_use (i32* %mem, i32 10)
 	ret void
@@ -30,3 +30,5 @@ define void @test_basic() {
 ; Thumb-android:      pop     {r4, r5}
 
 }
+
+attributes #0 = { "split-stack" }
diff --git a/test/CodeGen/Thumb2/thumb2-adc.ll b/test/CodeGen/Thumb2/thumb2-adc.ll
index 58e4c59..a97654c 100644
--- a/test/CodeGen/Thumb2/thumb2-adc.ll
+++ b/test/CodeGen/Thumb2/thumb2-adc.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 ; 734439407618 = 0x000000ab00000002
 define i64 @f1(i64 %a) {
diff --git a/test/CodeGen/Thumb2/thumb2-add.ll b/test/CodeGen/Thumb2/thumb2-add.ll
index 5e81fcf..8ff931a 100644
--- a/test/CodeGen/Thumb2/thumb2-add.ll
+++ b/test/CodeGen/Thumb2/thumb2-add.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 define i32 @t2ADDrc_255(i32 %lhs) {
 ; CHECK-LABEL: t2ADDrc_255:
diff --git a/test/CodeGen/Thumb2/thumb2-add2.ll b/test/CodeGen/Thumb2/thumb2-add2.ll
index ff0e087..9d64fd2 100644
--- a/test/CodeGen/Thumb2/thumb2-add2.ll
+++ b/test/CodeGen/Thumb2/thumb2-add2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 ; 171 = 0x000000ab
 define i32 @f1(i32 %a) {
diff --git a/test/CodeGen/Thumb2/thumb2-add3.ll b/test/CodeGen/Thumb2/thumb2-add3.ll
index bb7788f..03a8170 100644
--- a/test/CodeGen/Thumb2/thumb2-add3.ll
+++ b/test/CodeGen/Thumb2/thumb2-add3.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 define i32 @f1(i32 %a) {
     %tmp = add i32 %a, 4095
diff --git a/test/CodeGen/Thumb2/thumb2-add4.ll b/test/CodeGen/Thumb2/thumb2-add4.ll
index ed68d62..ad9642d 100644
--- a/test/CodeGen/Thumb2/thumb2-add4.ll
+++ b/test/CodeGen/Thumb2/thumb2-add4.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 ; 171 = 0x000000ab
 define i64 @f1(i64 %a) {
diff --git a/test/CodeGen/Thumb2/thumb2-add5.ll b/test/CodeGen/Thumb2/thumb2-add5.ll
index 7ef756f..f60e0be 100644
--- a/test/CodeGen/Thumb2/thumb2-add5.ll
+++ b/test/CodeGen/Thumb2/thumb2-add5.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
 ; CHECK-LABEL: f1:
diff --git a/test/CodeGen/Thumb2/thumb2-add6.ll b/test/CodeGen/Thumb2/thumb2-add6.ll
index c4a13be..af09293 100644
--- a/test/CodeGen/Thumb2/thumb2-add6.ll
+++ b/test/CodeGen/Thumb2/thumb2-add6.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 define i64 @f1(i64 %a, i64 %b) {
 ; CHECK-LABEL: f1:
diff --git a/test/CodeGen/Thumb2/thumb2-and.ll b/test/CodeGen/Thumb2/thumb2-and.ll
index 3ffcfd7..1984b3f 100644
--- a/test/CodeGen/Thumb2/thumb2-and.ll
+++ b/test/CodeGen/Thumb2/thumb2-and.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
 ; CHECK-LABEL: f1:
diff --git a/test/CodeGen/Thumb2/thumb2-and2.ll b/test/CodeGen/Thumb2/thumb2-and2.ll
index 3bfe9b2..70de9c9 100644
--- a/test/CodeGen/Thumb2/thumb2-and2.ll
+++ b/test/CodeGen/Thumb2/thumb2-and2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 ; 171 = 0x000000ab
 define i32 @f1(i32 %a) {
diff --git a/test/CodeGen/Thumb2/thumb2-asr.ll b/test/CodeGen/Thumb2/thumb2-asr.ll
index fbe3971..a4cccd5 100644
--- a/test/CodeGen/Thumb2/thumb2-asr.ll
+++ b/test/CodeGen/Thumb2/thumb2-asr.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
 ; CHECK-LABEL: f1:
diff --git a/test/CodeGen/Thumb2/thumb2-asr2.ll b/test/CodeGen/Thumb2/thumb2-asr2.ll
index 321b3f5..da050fb 100644
--- a/test/CodeGen/Thumb2/thumb2-asr2.ll
+++ b/test/CodeGen/Thumb2/thumb2-asr2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 define i32 @f1(i32 %a) {
 ; CHECK-LABEL: f1:
diff --git a/test/CodeGen/Thumb2/thumb2-bcc.ll b/test/CodeGen/Thumb2/thumb2-bcc.ll
index 61171ac..e7b3822 100644
--- a/test/CodeGen/Thumb2/thumb2-bcc.ll
+++ b/test/CodeGen/Thumb2/thumb2-bcc.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 ; If-conversion defeats the purpose of this test, which is to check CBZ
 ; generation, so use memory barrier instruction to make sure it doesn't
 ; happen and we get actual branches.
diff --git a/test/CodeGen/Thumb2/thumb2-bfc.ll b/test/CodeGen/Thumb2/thumb2-bfc.ll
index 844fb4a..dbf697cd 100644
--- a/test/CodeGen/Thumb2/thumb2-bfc.ll
+++ b/test/CodeGen/Thumb2/thumb2-bfc.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 ; 4278190095 = 0xff00000f
 define i32 @f1(i32 %a) {
diff --git a/test/CodeGen/Thumb2/thumb2-bic.ll b/test/CodeGen/Thumb2/thumb2-bic.ll
index fc57ec8..68d92b8 100644
--- a/test/CodeGen/Thumb2/thumb2-bic.ll
+++ b/test/CodeGen/Thumb2/thumb2-bic.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
 ; CHECK-LABEL: f1:
diff --git a/test/CodeGen/Thumb2/thumb2-clz.ll b/test/CodeGen/Thumb2/thumb2-clz.ll
index a5cd074..52b540b 100644
--- a/test/CodeGen/Thumb2/thumb2-clz.ll
+++ b/test/CodeGen/Thumb2/thumb2-clz.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2,+v7 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2,+v7 %s -o - | FileCheck %s
 
 define i32 @f1(i32 %a) {
 ; CHECK-LABEL: f1:
diff --git a/test/CodeGen/Thumb2/thumb2-cmn.ll b/test/CodeGen/Thumb2/thumb2-cmn.ll
index da7d4b1..efa1505 100644
--- a/test/CodeGen/Thumb2/thumb2-cmn.ll
+++ b/test/CodeGen/Thumb2/thumb2-cmn.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 ; These tests could be improved by 'movs r0, #0' being rematerialized below the
 ; test as 'mov.w r0, #0'.
diff --git a/test/CodeGen/Thumb2/thumb2-cmn2.ll b/test/CodeGen/Thumb2/thumb2-cmn2.ll
index a09a149..42473c2 100644
--- a/test/CodeGen/Thumb2/thumb2-cmn2.ll
+++ b/test/CodeGen/Thumb2/thumb2-cmn2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 ; -0x000000bb = 4294967109
 define i1 @f1(i32 %a) {
diff --git a/test/CodeGen/Thumb2/thumb2-cmp.ll b/test/CodeGen/Thumb2/thumb2-cmp.ll
index 06c611d..8f08617 100644
--- a/test/CodeGen/Thumb2/thumb2-cmp.ll
+++ b/test/CodeGen/Thumb2/thumb2-cmp.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 ; These tests would be improved by 'movs r0, #0' being rematerialized below the
 ; test as 'mov.w r0, #0'.
diff --git a/test/CodeGen/Thumb2/thumb2-cmp2.ll b/test/CodeGen/Thumb2/thumb2-cmp2.ll
index 8ca3caf..4d84003 100644
--- a/test/CodeGen/Thumb2/thumb2-cmp2.ll
+++ b/test/CodeGen/Thumb2/thumb2-cmp2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 ; These tests would be improved by 'movs r0, #0' being rematerialized below the
 ; test as 'mov.w r0, #0'.
diff --git a/test/CodeGen/Thumb2/thumb2-eor.ll b/test/CodeGen/Thumb2/thumb2-eor.ll
index 6dfc5cd..2028299 100644
--- a/test/CodeGen/Thumb2/thumb2-eor.ll
+++ b/test/CodeGen/Thumb2/thumb2-eor.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
 ; CHECK-LABEL: f1:
diff --git a/test/CodeGen/Thumb2/thumb2-eor2.ll b/test/CodeGen/Thumb2/thumb2-eor2.ll
index cf27448..f26aafe 100644
--- a/test/CodeGen/Thumb2/thumb2-eor2.ll
+++ b/test/CodeGen/Thumb2/thumb2-eor2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 ; 0x000000bb = 187
 define i32 @f1(i32 %a) {
diff --git a/test/CodeGen/Thumb2/thumb2-jtb.ll b/test/CodeGen/Thumb2/thumb2-jtb.ll
index 11620c2..ce7fb9f 100644
--- a/test/CodeGen/Thumb2/thumb2-jtb.ll
+++ b/test/CodeGen/Thumb2/thumb2-jtb.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 -arm-adjust-jump-tables=0 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 -arm-adjust-jump-tables=0 %s -o - | FileCheck %s
 
 ; Do not use tbb / tbh if any destination is before the jumptable.
 ; rdar://7102917
diff --git a/test/CodeGen/Thumb2/thumb2-ldm.ll b/test/CodeGen/Thumb2/thumb2-ldm.ll
index 8716d80..adfcf2b 100644
--- a/test/CodeGen/Thumb2/thumb2-ldm.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldm.ll
@@ -5,6 +5,7 @@
 define i32 @t1() {
 ; CHECK-LABEL: t1:
 ; CHECK: push {r7, lr}
+; CHECK: ldrd
 ; CHECK: pop {r7, pc}
         %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 0)            ; <i32> [#uses=1]
         %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)           ; <i32> [#uses=1]
@@ -27,6 +28,7 @@ define i32 @t2() {
 define i32 @t3() {
 ; CHECK-LABEL: t3:
 ; CHECK: push {r7, lr}
+; CHECK: ldm
 ; CHECK: pop {r7, pc}
         %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)            ; <i32> [#uses=1]
         %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2)           ; <i32> [#uses=1]
diff --git a/test/CodeGen/Thumb2/thumb2-ldr.ll b/test/CodeGen/Thumb2/thumb2-ldr.ll
index 09212d3..c25ed78 100644
--- a/test/CodeGen/Thumb2/thumb2-ldr.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldr.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 define i32 @f1(i32* %v) {
 entry:
diff --git a/test/CodeGen/Thumb2/thumb2-ldr_ext.ll b/test/CodeGen/Thumb2/thumb2-ldr_ext.ll
index b865cf4..b50b333 100644
--- a/test/CodeGen/Thumb2/thumb2-ldr_ext.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldr_ext.ll
@@ -1,7 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | grep ldrb | count 1
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | grep ldrh | count 1
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | grep ldrsb | count 1
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | grep ldrsh | count 1
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 define i32 @test1(i8* %v.pntr.s0.u1) {
     %tmp.u = load i8* %v.pntr.s0.u1
@@ -26,3 +23,16 @@ define i32 @test4() {
     %tmp1.s = sext i16 %tmp.s to i32
     ret i32 %tmp1.s
 }
+
+; CHECK: ldrb
+; CHECK-NOT: ldrb
+
+; CHECK: ldrh
+; CHECK-NOT: ldrh
+
+; CHECK: ldrsb
+; CHECK-NOT: ldrsb
+
+; CHECK: ldrsh
+; CHECK-NOT: ldrsh
+
diff --git a/test/CodeGen/Thumb2/thumb2-ldr_post.ll b/test/CodeGen/Thumb2/thumb2-ldr_post.ll
index 4f04647..c26e6b1 100644
--- a/test/CodeGen/Thumb2/thumb2-ldr_post.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldr_post.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 define i32 @test(i32 %a, i32 %b, i32 %c) {
         %tmp1 = mul i32 %a, %b          ; <i32> [#uses=2]
diff --git a/test/CodeGen/Thumb2/thumb2-ldr_pre.ll b/test/CodeGen/Thumb2/thumb2-ldr_pre.ll
index 4907dec..cafb02a 100644
--- a/test/CodeGen/Thumb2/thumb2-ldr_pre.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldr_pre.ll
@@ -1,7 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | \
-; RUN:   grep "ldr.*\!" | count 3
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | \
-; RUN:   grep "ldrsb.*\!" | count 1
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 define i32* @test1(i32* %X, i32* %dest) {
         %Y = getelementptr i32* %X, i32 4               ; <i32*> [#uses=2]
@@ -10,6 +7,8 @@ define i32* @test1(i32* %X, i32* %dest) {
         ret i32* %Y
 }
 
+; CHECK: ldr{{.*}}!
+
 define i32 @test2(i32 %a, i32 %b) {
         %tmp1 = sub i32 %a, 64          ; <i32> [#uses=2]
         %tmp2 = inttoptr i32 %tmp1 to i32*              ; <i32*> [#uses=1]
@@ -19,6 +18,8 @@ define i32 @test2(i32 %a, i32 %b) {
         ret i32 %tmp5
 }
 
+; CHECK: ldr{{.*}}!
+
 define i8* @test3(i8* %X, i32* %dest) {
         %tmp1 = getelementptr i8* %X, i32 4
         %tmp2 = load i8* %tmp1
@@ -26,3 +27,6 @@ define i8* @test3(i8* %X, i32* %dest) {
         store i32 %tmp3, i32* %dest
         ret i8* %tmp1
 }
+
+; CHECK: ldrsb{{.*}}!
+
diff --git a/test/CodeGen/Thumb2/thumb2-ldrb.ll b/test/CodeGen/Thumb2/thumb2-ldrb.ll
index c79f732..0b3441e 100644
--- a/test/CodeGen/Thumb2/thumb2-ldrb.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldrb.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 define i8 @f1(i8* %v) {
 entry:
diff --git a/test/CodeGen/Thumb2/thumb2-ldrh.ll b/test/CodeGen/Thumb2/thumb2-ldrh.ll
index 7ba9f22..db5dcfa 100644
--- a/test/CodeGen/Thumb2/thumb2-ldrh.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldrh.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 define i16 @f1(i16* %v) {
 entry:
diff --git a/test/CodeGen/Thumb2/thumb2-lsl.ll b/test/CodeGen/Thumb2/thumb2-lsl.ll
index 015a9dd..05441c8 100644
--- a/test/CodeGen/Thumb2/thumb2-lsl.ll
+++ b/test/CodeGen/Thumb2/thumb2-lsl.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 define i32 @f1(i32 %a) {
 ; CHECK-LABEL: f1:
diff --git a/test/CodeGen/Thumb2/thumb2-lsl2.ll b/test/CodeGen/Thumb2/thumb2-lsl2.ll
index c64897a..5a456b0 100644
--- a/test/CodeGen/Thumb2/thumb2-lsl2.ll
+++ b/test/CodeGen/Thumb2/thumb2-lsl2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
 ; CHECK-LABEL: f1:
diff --git a/test/CodeGen/Thumb2/thumb2-lsr.ll b/test/CodeGen/Thumb2/thumb2-lsr.ll
index 24973c7..48c2ec4 100644
--- a/test/CodeGen/Thumb2/thumb2-lsr.ll
+++ b/test/CodeGen/Thumb2/thumb2-lsr.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 define i32 @f1(i32 %a) {
 ; CHECK-LABEL: f1:
diff --git a/test/CodeGen/Thumb2/thumb2-lsr2.ll b/test/CodeGen/Thumb2/thumb2-lsr2.ll
index 0b199bb..5d158af 100644
--- a/test/CodeGen/Thumb2/thumb2-lsr2.ll
+++ b/test/CodeGen/Thumb2/thumb2-lsr2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
 ; CHECK-LABEL: f1:
diff --git a/test/CodeGen/Thumb2/thumb2-lsr3.ll b/test/CodeGen/Thumb2/thumb2-lsr3.ll
index c814123..c9344c8 100644
--- a/test/CodeGen/Thumb2/thumb2-lsr3.ll
+++ b/test/CodeGen/Thumb2/thumb2-lsr3.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 define i1 @test1(i64 %poscnt, i32 %work) {
 entry:
diff --git a/test/CodeGen/Thumb2/thumb2-mla.ll b/test/CodeGen/Thumb2/thumb2-mla.ll
index a99ffe7..0c97d50 100644
--- a/test/CodeGen/Thumb2/thumb2-mla.ll
+++ b/test/CodeGen/Thumb2/thumb2-mla.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 \
-; RUN:  -arm-use-mulops=false | FileCheck %s -check-prefix=NO_MULOPS
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 \
+; RUN:  -arm-use-mulops=false %s -o - | FileCheck %s -check-prefix=NO_MULOPS
 
 define i32 @f1(i32 %a, i32 %b, i32 %c) {
     %tmp1 = mul i32 %a, %b
diff --git a/test/CodeGen/Thumb2/thumb2-mls.ll b/test/CodeGen/Thumb2/thumb2-mls.ll
index 45d6d13..9b0e7ff 100644
--- a/test/CodeGen/Thumb2/thumb2-mls.ll
+++ b/test/CodeGen/Thumb2/thumb2-mls.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b, i32 %c) {
     %tmp1 = mul i32 %a, %b
diff --git a/test/CodeGen/Thumb2/thumb2-mov.ll b/test/CodeGen/Thumb2/thumb2-mov.ll
index 7c0dc01..e563362 100644
--- a/test/CodeGen/Thumb2/thumb2-mov.ll
+++ b/test/CodeGen/Thumb2/thumb2-mov.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 ; Test #<const>
 
diff --git a/test/CodeGen/Thumb2/thumb2-mul.ll b/test/CodeGen/Thumb2/thumb2-mul.ll
index 5f68250..4815f4b 100644
--- a/test/CodeGen/Thumb2/thumb2-mul.ll
+++ b/test/CodeGen/Thumb2/thumb2-mul.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: f1:
diff --git a/test/CodeGen/Thumb2/thumb2-mulhi.ll b/test/CodeGen/Thumb2/thumb2-mulhi.ll
index e32bd26..db9b644 100644
--- a/test/CodeGen/Thumb2/thumb2-mulhi.ll
+++ b/test/CodeGen/Thumb2/thumb2-mulhi.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2,+t2dsp | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2,+t2dsp %s -o - | FileCheck %s
 
 define i32 @smulhi(i32 %x, i32 %y) {
 ; CHECK: smulhi
diff --git a/test/CodeGen/Thumb2/thumb2-mvn.ll b/test/CodeGen/Thumb2/thumb2-mvn.ll
index a5592f6..adf982f 100644
--- a/test/CodeGen/Thumb2/thumb2-mvn.ll
+++ b/test/CodeGen/Thumb2/thumb2-mvn.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+; RUN: llc -mtriple=thumbv7-apple-darwin %s -o - | FileCheck %s
 
 ; 0x000000bb = 187
 define i32 @f1(i32 %a) {
diff --git a/test/CodeGen/Thumb2/thumb2-mvn2.ll b/test/CodeGen/Thumb2/thumb2-mvn2.ll
index cee6f23..323c2cc 100644
--- a/test/CodeGen/Thumb2/thumb2-mvn2.ll
+++ b/test/CodeGen/Thumb2/thumb2-mvn2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 define i32 @f1(i32 %a) {
 ; CHECK-LABEL: f1:
diff --git a/test/CodeGen/Thumb2/thumb2-neg.ll b/test/CodeGen/Thumb2/thumb2-neg.ll
index 491e4de..bec6097 100644
--- a/test/CodeGen/Thumb2/thumb2-neg.ll
+++ b/test/CodeGen/Thumb2/thumb2-neg.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 define i32 @f1(i32 %a) {
 ; CHECK-LABEL: f1:
diff --git a/test/CodeGen/Thumb2/thumb2-orn.ll b/test/CodeGen/Thumb2/thumb2-orn.ll
index 08676b1..e1f0bba 100644
--- a/test/CodeGen/Thumb2/thumb2-orn.ll
+++ b/test/CodeGen/Thumb2/thumb2-orn.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
-
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
     %tmp = xor i32 %b, 4294967295
diff --git a/test/CodeGen/Thumb2/thumb2-orn2.ll b/test/CodeGen/Thumb2/thumb2-orn2.ll
index a8f4a84..c8347df 100644
--- a/test/CodeGen/Thumb2/thumb2-orn2.ll
+++ b/test/CodeGen/Thumb2/thumb2-orn2.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
-
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 ; 0x000000bb = 187
 define i32 @f1(i32 %a) {
diff --git a/test/CodeGen/Thumb2/thumb2-orr.ll b/test/CodeGen/Thumb2/thumb2-orr.ll
index 776d7fe..f962866 100644
--- a/test/CodeGen/Thumb2/thumb2-orr.ll
+++ b/test/CodeGen/Thumb2/thumb2-orr.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
 ; CHECK-LABEL: f1:
diff --git a/test/CodeGen/Thumb2/thumb2-orr2.ll b/test/CodeGen/Thumb2/thumb2-orr2.ll
index 37885e2..045cc1d 100644
--- a/test/CodeGen/Thumb2/thumb2-orr2.ll
+++ b/test/CodeGen/Thumb2/thumb2-orr2.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
-
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 ; 0x000000bb = 187
 define i32 @f1(i32 %a) {
diff --git a/test/CodeGen/Thumb2/thumb2-pack.ll b/test/CodeGen/Thumb2/thumb2-pack.ll
index 9a0d889..4825628 100644
--- a/test/CodeGen/Thumb2/thumb2-pack.ll
+++ b/test/CodeGen/Thumb2/thumb2-pack.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2,+t2xtpk | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2,+t2xtpk %s -o - | FileCheck %s
 
 ; CHECK: test1
 ; CHECK: pkhbt   r0, r0, r1, lsl #16
diff --git a/test/CodeGen/Thumb2/thumb2-rev.ll b/test/CodeGen/Thumb2/thumb2-rev.ll
index d710113..873a2d4 100644
--- a/test/CodeGen/Thumb2/thumb2-rev.ll
+++ b/test/CodeGen/Thumb2/thumb2-rev.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2,+v7,+t2xtpk | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2,+v7,+t2xtpk %s -o - | FileCheck %s
 
 define i32 @f1(i32 %a) {
 ; CHECK-LABEL: f1:
diff --git a/test/CodeGen/Thumb2/thumb2-ror.ll b/test/CodeGen/Thumb2/thumb2-ror.ll
index 3a21560..71b0015 100644
--- a/test/CodeGen/Thumb2/thumb2-ror.ll
+++ b/test/CodeGen/Thumb2/thumb2-ror.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
-; RUN: llc < %s -march=thumb | FileCheck %s -check-prefix=THUMB1
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi %s -o - | FileCheck %s -check-prefix=THUMB1
 
 ; CHECK-LABEL: f1:
 ; CHECK: 	ror.w	r0, r0, #22
diff --git a/test/CodeGen/Thumb2/thumb2-rsb.ll b/test/CodeGen/Thumb2/thumb2-rsb.ll
index 94a1fb0..1c5acad 100644
--- a/test/CodeGen/Thumb2/thumb2-rsb.ll
+++ b/test/CodeGen/Thumb2/thumb2-rsb.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
     %tmp = shl i32 %b, 5
diff --git a/test/CodeGen/Thumb2/thumb2-rsb2.ll b/test/CodeGen/Thumb2/thumb2-rsb2.ll
index 248ab16..838e55e 100644
--- a/test/CodeGen/Thumb2/thumb2-rsb2.ll
+++ b/test/CodeGen/Thumb2/thumb2-rsb2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 ; 171 = 0x000000ab
 define i32 @f1(i32 %a) {
diff --git a/test/CodeGen/Thumb2/thumb2-sbc.ll b/test/CodeGen/Thumb2/thumb2-sbc.ll
index 7c69451..b04dae6 100644
--- a/test/CodeGen/Thumb2/thumb2-sbc.ll
+++ b/test/CodeGen/Thumb2/thumb2-sbc.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 < %s | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 define i64 @f1(i64 %a, i64 %b) {
 ; CHECK: f1
diff --git a/test/CodeGen/Thumb2/thumb2-select.ll b/test/CodeGen/Thumb2/thumb2-select.ll
index 949b611..105c267 100644
--- a/test/CodeGen/Thumb2/thumb2-select.ll
+++ b/test/CodeGen/Thumb2/thumb2-select.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 -show-mc-encoding | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 -show-mc-encoding %s -o - \
+; RUN:  | FileCheck %s
 
 define i32 @f1(i32 %a.s) {
 entry:
diff --git a/test/CodeGen/Thumb2/thumb2-select_xform.ll b/test/CodeGen/Thumb2/thumb2-select_xform.ll
index f8ceba2..20f0e5e 100644
--- a/test/CodeGen/Thumb2/thumb2-select_xform.ll
+++ b/test/CodeGen/Thumb2/thumb2-select_xform.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind {
 ; CHECK: t1
diff --git a/test/CodeGen/Thumb2/thumb2-shifter.ll b/test/CodeGen/Thumb2/thumb2-shifter.ll
index 05dd90c..538fc22 100644
--- a/test/CodeGen/Thumb2/thumb2-shifter.ll
+++ b/test/CodeGen/Thumb2/thumb2-shifter.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=thumb -mcpu=cortex-a8 | FileCheck %s --check-prefix=A8
-; RUN: llc < %s -march=thumb -mcpu=swift | FileCheck %s --check-prefix=SWIFT
+; RUN: llc -mtriple=thumb-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s --check-prefix=A8
+; RUN: llc -mtriple=thumb-eabi -mcpu=swift %s -o - | FileCheck %s --check-prefix=SWIFT
 
 ; rdar://12892707
 
diff --git a/test/CodeGen/Thumb2/thumb2-smla.ll b/test/CodeGen/Thumb2/thumb2-smla.ll
index f96263e..8573d39 100644
--- a/test/CodeGen/Thumb2/thumb2-smla.ll
+++ b/test/CodeGen/Thumb2/thumb2-smla.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2,+t2xtpk,+t2dsp | FileCheck %s
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2,+t2xtpk,+t2dsp -arm-use-mulops=false | FileCheck %s -check-prefix=NO_MULOPS
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2,+t2xtpk,+t2dsp %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2,+t2xtpk,+t2dsp -arm-use-mulops=false %s -o - | FileCheck %s -check-prefix=NO_MULOPS
 
 define i32 @f3(i32 %a, i16 %x, i32 %y) {
 ; CHECK: f3
diff --git a/test/CodeGen/Thumb2/thumb2-smul.ll b/test/CodeGen/Thumb2/thumb2-smul.ll
index 742e766..67783d2 100644
--- a/test/CodeGen/Thumb2/thumb2-smul.ll
+++ b/test/CodeGen/Thumb2/thumb2-smul.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2,+t2xtpk,+t2dsp |  FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2,+t2xtpk,+t2dsp %s -o - |  FileCheck %s
 
 @x = weak global i16 0          ; <i16*> [#uses=1]
 @y = weak global i16 0          ; <i16*> [#uses=0]
diff --git a/test/CodeGen/Thumb2/thumb2-str.ll b/test/CodeGen/Thumb2/thumb2-str.ll
index f800974..4008145 100644
--- a/test/CodeGen/Thumb2/thumb2-str.ll
+++ b/test/CodeGen/Thumb2/thumb2-str.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 define i32 @f1(i32 %a, i32* %v) {
 ; CHECK-LABEL: f1:
diff --git a/test/CodeGen/Thumb2/thumb2-str_post.ll b/test/CodeGen/Thumb2/thumb2-str_post.ll
index 716c2d2..aed849e 100644
--- a/test/CodeGen/Thumb2/thumb2-str_post.ll
+++ b/test/CodeGen/Thumb2/thumb2-str_post.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 define i16 @test1(i32* %X, i16* %A) {
 ; CHECK-LABEL: test1:
diff --git a/test/CodeGen/Thumb2/thumb2-str_pre.ll b/test/CodeGen/Thumb2/thumb2-str_pre.ll
index 83b3779..e957400 100644
--- a/test/CodeGen/Thumb2/thumb2-str_pre.ll
+++ b/test/CodeGen/Thumb2/thumb2-str_pre.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 define void @test1(i32* %X, i32* %A, i32** %dest) {
 ; CHECK: test1
diff --git a/test/CodeGen/Thumb2/thumb2-strb.ll b/test/CodeGen/Thumb2/thumb2-strb.ll
index 39e376d..a2558ec 100644
--- a/test/CodeGen/Thumb2/thumb2-strb.ll
+++ b/test/CodeGen/Thumb2/thumb2-strb.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 define i8 @f1(i8 %a, i8* %v) {
 ; CHECK-LABEL: f1:
diff --git a/test/CodeGen/Thumb2/thumb2-strh.ll b/test/CodeGen/Thumb2/thumb2-strh.ll
index 9444383..cbe73d5 100644
--- a/test/CodeGen/Thumb2/thumb2-strh.ll
+++ b/test/CodeGen/Thumb2/thumb2-strh.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 define i16 @f1(i16 %a, i16* %v) {
 ; CHECK-LABEL: f1:
diff --git a/test/CodeGen/Thumb2/thumb2-sub.ll b/test/CodeGen/Thumb2/thumb2-sub.ll
index ad5eda1..1c69aeb 100644
--- a/test/CodeGen/Thumb2/thumb2-sub.ll
+++ b/test/CodeGen/Thumb2/thumb2-sub.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 ; 171 = 0x000000ab
 define i32 @f1(i32 %a) {
diff --git a/test/CodeGen/Thumb2/thumb2-sub2.ll b/test/CodeGen/Thumb2/thumb2-sub2.ll
index f114892..8afc4cb 100644
--- a/test/CodeGen/Thumb2/thumb2-sub2.ll
+++ b/test/CodeGen/Thumb2/thumb2-sub2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 define i32 @f1(i32 %a) {
     %tmp = sub i32 %a, 4095
diff --git a/test/CodeGen/Thumb2/thumb2-sub3.ll b/test/CodeGen/Thumb2/thumb2-sub3.ll
index ae12b28..a3702f4 100644
--- a/test/CodeGen/Thumb2/thumb2-sub3.ll
+++ b/test/CodeGen/Thumb2/thumb2-sub3.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 < %s | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 ; 171 = 0x000000ab
 define i64 @f1(i64 %a) {
diff --git a/test/CodeGen/Thumb2/thumb2-sub4.ll b/test/CodeGen/Thumb2/thumb2-sub4.ll
index 873080a..0ff7567 100644
--- a/test/CodeGen/Thumb2/thumb2-sub4.ll
+++ b/test/CodeGen/Thumb2/thumb2-sub4.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 define i32 @f1(i32 %a, i32 %b) {
 ; CHECK-LABEL: f1:
diff --git a/test/CodeGen/Thumb2/thumb2-sub5.ll b/test/CodeGen/Thumb2/thumb2-sub5.ll
index 02c83f6..e12d3e1 100644
--- a/test/CodeGen/Thumb2/thumb2-sub5.ll
+++ b/test/CodeGen/Thumb2/thumb2-sub5.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2,+32bit \
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2,+32bit %s -o - \
 ; RUN:  | FileCheck %s
 
 define i64 @f1(i64 %a, i64 %b) {
diff --git a/test/CodeGen/Thumb2/thumb2-sxt-uxt.ll b/test/CodeGen/Thumb2/thumb2-sxt-uxt.ll
index 792ebef..47b94c5 100644
--- a/test/CodeGen/Thumb2/thumb2-sxt-uxt.ll
+++ b/test/CodeGen/Thumb2/thumb2-sxt-uxt.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=cortex-m3 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=cortex-m3 %s -o - | FileCheck %s
 
 define i32 @test1(i16 zeroext %z) nounwind {
 ; CHECK-LABEL: test1:
diff --git a/test/CodeGen/Thumb2/thumb2-sxt_rot.ll b/test/CodeGen/Thumb2/thumb2-sxt_rot.ll
index 75bbd83..cef3490 100644
--- a/test/CodeGen/Thumb2/thumb2-sxt_rot.ll
+++ b/test/CodeGen/Thumb2/thumb2-sxt_rot.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2,+t2xtpk | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2,+t2xtpk %s -o - \
+; RUN:  | FileCheck %s
 
 define i32 @test0(i8 %A) {
 ; CHECK: test0
diff --git a/test/CodeGen/Thumb2/thumb2-teq.ll b/test/CodeGen/Thumb2/thumb2-teq.ll
index 6b34e70..258b7e4 100644
--- a/test/CodeGen/Thumb2/thumb2-teq.ll
+++ b/test/CodeGen/Thumb2/thumb2-teq.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 ; These tests would be improved by 'movs r0, #0' being rematerialized below the
 ; test as 'mov.w r0, #0'.
diff --git a/test/CodeGen/Thumb2/thumb2-teq2.ll b/test/CodeGen/Thumb2/thumb2-teq2.ll
index ea43e560..3b4970b 100644
--- a/test/CodeGen/Thumb2/thumb2-teq2.ll
+++ b/test/CodeGen/Thumb2/thumb2-teq2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 ; These tests would be improved by 'movs r0, #0' being rematerialized below the
 ; tst as 'mov.w r0, #0'.
diff --git a/test/CodeGen/Thumb2/thumb2-tst.ll b/test/CodeGen/Thumb2/thumb2-tst.ll
index c17510d..8cf6f14 100644
--- a/test/CodeGen/Thumb2/thumb2-tst.ll
+++ b/test/CodeGen/Thumb2/thumb2-tst.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 ; These tests would be improved by 'movs r0, #0' being rematerialized below the
 ; tst as 'mov.w r0, #0'.
diff --git a/test/CodeGen/Thumb2/thumb2-tst2.ll b/test/CodeGen/Thumb2/thumb2-tst2.ll
index 764e3d4..178a2a5 100644
--- a/test/CodeGen/Thumb2/thumb2-tst2.ll
+++ b/test/CodeGen/Thumb2/thumb2-tst2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 ; These tests would be improved by 'movs r0, #0' being rematerialized below the
 ; tst as 'mov.w r0, #0'.
diff --git a/test/CodeGen/Thumb2/thumb2-uxt_rot.ll b/test/CodeGen/Thumb2/thumb2-uxt_rot.ll
index 61e849e..bcd4a0f 100644
--- a/test/CodeGen/Thumb2/thumb2-uxt_rot.ll
+++ b/test/CodeGen/Thumb2/thumb2-uxt_rot.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=thumb -mcpu=cortex-a8 | FileCheck %s --check-prefix=A8
-; RUN: llc < %s -march=thumb -mcpu=cortex-m3 | FileCheck %s --check-prefix=M3
+; RUN: llc -mtriple=thumb-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s --check-prefix=A8
+; RUN: llc -mtriple=thumb-eabi -mcpu=cortex-m3 %s -o - | FileCheck %s --check-prefix=M3
 ; rdar://11318438
 
 define zeroext i8 @test1(i32 %A.u)  {
diff --git a/test/CodeGen/Thumb2/thumb2-uxtb.ll b/test/CodeGen/Thumb2/thumb2-uxtb.ll
index 2074f98..b8b1bc8 100644
--- a/test/CodeGen/Thumb2/thumb2-uxtb.ll
+++ b/test/CodeGen/Thumb2/thumb2-uxtb.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=thumb -mcpu=cortex-a8 | FileCheck %s -check-prefix=ARMv7A
-; RUN: llc < %s -march=thumb -mcpu=cortex-m3 | FileCheck %s -check-prefix=ARMv7M
+; RUN: llc -mtriple=thumb-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s -check-prefix=ARMv7A
+; RUN: llc -mtriple=thumb-eabi -mcpu=cortex-m3 %s -o - | FileCheck %s -check-prefix=ARMv7M
 
 define i32 @test1(i32 %x) {
 ; ARMv7A: test1
diff --git a/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll b/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll
index e1f8901..4d7c3a1 100644
--- a/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll
+++ b/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll
@@ -1,8 +1,14 @@
-; RUN: llc < %s -march=x86 -mtriple=i686-darwin | \
-; RUN:   grep push | count 3
+; RUN: llc < %s -march=x86 -mtriple=i686-darwin | FileCheck %s
+; RUN: llc < %s -march=x86 -mtriple=i686-darwin -addr-sink-using-gep=1 | FileCheck %s
 
 define void @foo(i8** %buf, i32 %size, i32 %col, i8* %p) nounwind {
 entry:
+; CHECK-LABEL: @foo
+; CHECK: push
+; CHECK: push
+; CHECK: push
+; CHECK-NOT: push
+
 	icmp sgt i32 %size, 0		; <i1>:0 [#uses=1]
 	br i1 %0, label %bb.preheader, label %return
 
diff --git a/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll b/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll
index e673d31..e64375a 100644
--- a/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll
+++ b/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -relocation-model=pic | grep TLSGD | count 2
+; RUN: llc < %s -relocation-model=pic | FileCheck %s
 ; PR2137
 
 ; ModuleID = '1.c'
@@ -11,6 +11,8 @@ target triple = "i386-pc-linux-gnu"
 @__libc_resp = hidden alias %struct.__res_state** @__resp		; <%struct.__res_state**> [#uses=2]
 
 define i32 @foo() {
+; CHECK-LABEL: foo:
+; CHECK: leal    __libc_resp@TLSLD
 entry:
 	%retval = alloca i32		; <i32*> [#uses=1]
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
@@ -24,6 +26,8 @@ return:		; preds = %entry
 }
 
 define i32 @bar() {
+; CHECK-LABEL: bar:
+; CHECK: leal    __libc_resp@TLSLD
 entry:
 	%retval = alloca i32		; <i32*> [#uses=1]
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
diff --git a/test/CodeGen/X86/2010-08-04-StackVariable.ll b/test/CodeGen/X86/2010-08-04-StackVariable.ll
index 91fec3b..09e34ef 100644
--- a/test/CodeGen/X86/2010-08-04-StackVariable.ll
+++ b/test/CodeGen/X86/2010-08-04-StackVariable.ll
@@ -76,7 +76,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!3}
 !llvm.module.flags = !{!49}
-!46 = metadata !{metadata !0, metadata !9, metadata !16, metadata !17, metadata !20}
+!46 = metadata !{metadata !16, metadata !17, metadata !20}
 
 !0 = metadata !{i32 786478, metadata !47, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"", i32 11, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 11} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786451, metadata !47, metadata !2, metadata !"SVal", i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [SVal] [line 1, size 128, align 64, offset 0] [def] [from ]
diff --git a/test/CodeGen/X86/MergeConsecutiveStores.ll b/test/CodeGen/X86/MergeConsecutiveStores.ll
index 0ef3aa5..f6d6852 100644
--- a/test/CodeGen/X86/MergeConsecutiveStores.ll
+++ b/test/CodeGen/X86/MergeConsecutiveStores.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -march=x86-64 -mcpu=corei7 -mattr=+avx < %s | FileCheck %s
+; RUN: llc -march=x86-64 -mcpu=corei7 -mattr=+avx -addr-sink-using-gep=1 < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/CodeGen/X86/aliases.ll b/test/CodeGen/X86/aliases.ll
index d0a262d..8487c60 100644
--- a/test/CodeGen/X86/aliases.ll
+++ b/test/CodeGen/X86/aliases.ll
@@ -22,7 +22,7 @@ define i32 @foo_f() {
 @bar_i = alias internal i32* @bar
 
 ; CHECK-DAG: .globl	A
-@A = alias bitcast (i32* @bar to i64*)
+@A = alias i64, i32* @bar
 
 ; CHECK-DAG: .globl	bar_h
 ; CHECK-DAG: .hidden	bar_h
diff --git a/test/CodeGen/X86/atom-bypass-slow-division-64.ll b/test/CodeGen/X86/atom-bypass-slow-division-64.ll
index d1b52a4..5980b79 100644
--- a/test/CodeGen/X86/atom-bypass-slow-division-64.ll
+++ b/test/CodeGen/X86/atom-bypass-slow-division-64.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -mcpu=atom -mtriple=i686-linux -march=x86-64 | FileCheck %s
+; RUN: llc < %s -mcpu=atom -march=x86-64 | FileCheck %s
+
+target triple = "x86_64-unknown-linux-gnu"
 
 ; Additional tests for 64-bit divide bypass
 
diff --git a/test/CodeGen/X86/avoid_complex_am.ll b/test/CodeGen/X86/avoid_complex_am.ll
new file mode 100644
index 0000000..7f09519
--- /dev/null
+++ b/test/CodeGen/X86/avoid_complex_am.ll
@@ -0,0 +1,40 @@
+; RUN: opt -S -loop-reduce < %s | FileCheck %s
+; Complex addressing mode are costly.
+; Make loop-reduce prefer unscaled accesses.
+; On X86, reg1 + 1*reg2 has the same cost as reg1 + 8*reg2.
+; Therefore, LSR currently prefers to fold as much computation as possible
+; in the addressing mode.
+; <rdar://problem/16730541>
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx"
+
+define void @mulDouble(double* nocapture %a, double* nocapture %b, double* nocapture %c) {
+; CHECK: @mulDouble
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+; CHECK: [[IV:%[^ ]+]] = phi i64 [ [[IVNEXT:%[^,]+]], %for.body ], [ 0, %entry ]
+; Only one induction variable should have been generated.
+; CHECK-NOT: phi
+  %indvars.iv = phi i64 [ 1, %entry ], [ %indvars.iv.next, %for.body ]
+  %tmp = add nsw i64 %indvars.iv, -1
+  %arrayidx = getelementptr inbounds double* %b, i64 %tmp
+  %tmp1 = load double* %arrayidx, align 8
+; The induction variable should carry the scaling factor: 1.
+; CHECK: [[IVNEXT]] = add nuw nsw i64 [[IV]], 1
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %arrayidx2 = getelementptr inbounds double* %c, i64 %indvars.iv.next
+  %tmp2 = load double* %arrayidx2, align 8
+  %mul = fmul double %tmp1, %tmp2
+  %arrayidx4 = getelementptr inbounds double* %a, i64 %indvars.iv
+  store double %mul, double* %arrayidx4, align 8
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+; Comparison should be 19 * 1 = 19.
+; CHECK: icmp eq i32 {{%[^,]+}}, 19
+  %exitcond = icmp eq i32 %lftr.wideiv, 20
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
diff --git a/test/CodeGen/X86/avx-blend.ll b/test/CodeGen/X86/avx-blend.ll
index 5fcd5ff..e21c7a0 100644
--- a/test/CodeGen/X86/avx-blend.ll
+++ b/test/CodeGen/X86/avx-blend.ll
@@ -3,7 +3,16 @@
 ; AVX128 tests:
 
 ;CHECK-LABEL: vsel_float:
-;CHECK: vblendvps
+; select mask is <i1 true, i1 false, i1 true, i1 false>.
+; Big endian representation is 0101 = 5.
+; '1' means takes the first argument, '0' means takes the second argument.
+; This is the opposite of the intel syntax, thus we expect
+; the inverted mask: 1010 = 10.
+; According to the ABI:
+; v1 is in xmm0 => first argument is xmm0.
+; v2 is in xmm1 => second argument is xmm1.
+; result is in xmm0 => destination argument.
+;CHECK: vblendps    $10, %xmm1, %xmm0, %xmm0
 ;CHECK: ret
 define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
   %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x float> %v1, <4 x float> %v2
@@ -12,7 +21,7 @@ define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
 
 
 ;CHECK-LABEL: vsel_i32:
-;CHECK: vblendvps
+;CHECK: vblendps   $10, %xmm1, %xmm0, %xmm0
 ;CHECK: ret
 define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) {
   %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> %v1, <4 x i32> %v2
@@ -52,7 +61,13 @@ define <16 x i8> @vsel_i8(<16 x i8> %v1, <16 x i8> %v2) {
 
 ;CHECK-LABEL: vsel_float8:
 ;CHECK-NOT: vinsertf128
-;CHECK: vblendvps
+; <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>
+; which translates into the boolean mask (big endian representation):
+; 00010001 = 17.
+; '1' means takes the first argument, '0' means takes the second argument.
+; This is the opposite of the intel syntax, thus we expect
+; the inverted mask: 11101110 = 238.
+;CHECK: vblendps    $238, %ymm1, %ymm0, %ymm0
 ;CHECK: ret
 define <8 x float> @vsel_float8(<8 x float> %v1, <8 x float> %v2) {
   %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x float> %v1, <8 x float> %v2
@@ -61,7 +76,7 @@ define <8 x float> @vsel_float8(<8 x float> %v1, <8 x float> %v2) {
 
 ;CHECK-LABEL: vsel_i328:
 ;CHECK-NOT: vinsertf128
-;CHECK: vblendvps
+;CHECK: vblendps    $238, %ymm1, %ymm0, %ymm0
 ;CHECK-NEXT: ret
 define <8 x i32> @vsel_i328(<8 x i32> %v1, <8 x i32> %v2) {
   %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i32> %v1, <8 x i32> %v2
@@ -69,7 +84,15 @@ define <8 x i32> @vsel_i328(<8 x i32> %v1, <8 x i32> %v2) {
 }
 
 ;CHECK-LABEL: vsel_double8:
-;CHECK: vblendvpd
+; select mask is 2x: 0001 => intel mask: ~0001 = 14
+; ABI:
+; v1 is in ymm0 and ymm1.
+; v2 is in ymm2 and ymm3.
+; result is in ymm0 and ymm1.
+; Compute the low part: res.low = blend v1.low, v2.low, blendmask
+;CHECK: vblendpd    $14, %ymm2, %ymm0, %ymm0
+; Compute the high part.
+;CHECK: vblendpd    $14, %ymm3, %ymm1, %ymm1
 ;CHECK: ret
 define <8 x double> @vsel_double8(<8 x double> %v1, <8 x double> %v2) {
   %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x double> %v1, <8 x double> %v2
@@ -77,7 +100,8 @@ define <8 x double> @vsel_double8(<8 x double> %v1, <8 x double> %v2) {
 }
 
 ;CHECK-LABEL: vsel_i648:
-;CHECK: vblendvpd
+;CHECK: vblendpd    $14, %ymm2, %ymm0, %ymm0
+;CHECK: vblendpd    $14, %ymm3, %ymm1, %ymm1
 ;CHECK: ret
 define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) {
   %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i64> %v1, <8 x i64> %v2
@@ -86,7 +110,7 @@ define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) {
 
 ;CHECK-LABEL: vsel_double4:
 ;CHECK-NOT: vinsertf128
-;CHECK: vblendvpd
+;CHECK: vshufpd $10
 ;CHECK-NEXT: ret
 define <4 x double> @vsel_double4(<4 x double> %v1, <4 x double> %v2) {
   %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x double> %v1, <4 x double> %v2
@@ -112,4 +136,25 @@ define <2 x double> @testb(<2 x double> %x, <2 x double> %y) {
   ret <2 x double> %min
 }
 
+; If we can figure out a blend has a constant mask, we should emit the
+; blend instruction with an immediate mask
+define <4 x double> @constant_blendvpd_avx(<4 x double> %xy, <4 x double> %ab) {
+; CHECK-LABEL: constant_blendvpd_avx:
+; CHECK-NOT: mov
+; CHECK: vblendpd
+; CHECK: ret
+  %1 = select <4 x i1> <i1 false, i1 false, i1 true, i1 false>, <4 x double> %xy, <4 x double> %ab
+  ret <4 x double> %1
+}
+
+define <8 x float> @constant_blendvps_avx(<8 x float> %xyzw, <8 x float> %abcd) {
+; CHECK-LABEL: constant_blendvps_avx:
+; CHECK-NOT: mov
+; CHECK: vblendps
+; CHECK: ret
+  %1 = select <8 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true>, <8 x float> %xyzw, <8 x float> %abcd
+  ret <8 x float> %1
+}
 
+declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>)
+declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>)
diff --git a/test/CodeGen/X86/avx-shuffle.ll b/test/CodeGen/X86/avx-shuffle.ll
index 02aa617..f407ba4 100644
--- a/test/CodeGen/X86/avx-shuffle.ll
+++ b/test/CodeGen/X86/avx-shuffle.ll
@@ -306,3 +306,29 @@ define void @test20() {
   store <3 x double> %a1, <3 x double>* undef, align 1
   ret void
 }
+
+define <2 x i64> @test_insert_64_zext(<2 x i64> %i) {
+; CHECK-LABEL: test_insert_64_zext
+; CHECK-NOT: xor
+; CHECK: vmovq
+  %1 = shufflevector <2 x i64> %i, <2 x i64> <i64 0, i64 undef>, <2 x i32> <i32 0, i32 2>
+  ret <2 x i64> %1
+}
+
+;; Ensure we don't use insertps from non v4x32 vectors.
+;; On SSE4.1 it works because bigger vectors use more than 1 register.
+;; On AVX they get passed in a single register.
+;; FIXME: We could probably optimize this case, if we're only using the
+;; first 4 indices.
+define <4 x i32> @insert_from_diff_size(<8 x i32> %x) {
+; CHECK-LABEL: insert_from_diff_size:
+; CHECK-NOT: insertps
+; CHECK: ret
+  %vecext = extractelement <8 x i32> %x, i32 0
+  %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
+  %vecinit1 = insertelement <4 x i32> %vecinit, i32 0, i32 1
+  %vecinit2 = insertelement <4 x i32> %vecinit1, i32 0, i32 2
+  %a.0 = extractelement <8 x i32> %x, i32 0
+  %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %a.0, i32 3
+  ret <4 x i32> %vecinit3
+}
diff --git a/test/CodeGen/X86/avx.ll b/test/CodeGen/X86/avx.ll
new file mode 100644
index 0000000..6069c14
--- /dev/null
+++ b/test/CodeGen/X86/avx.ll
@@ -0,0 +1,136 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=corei7-avx | FileCheck %s -check-prefix=X32 --check-prefix=CHECK
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck %s -check-prefix=X64 --check-prefix=CHECK
+
+define <4 x i32> @blendvb_fallback_v4i32(<4 x i1> %mask, <4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: @blendvb_fallback_v4i32
+; CHECK: vblendvps
+; CHECK: ret
+  %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %y
+  ret <4 x i32> %ret
+}
+
+define <8 x i32> @blendvb_fallback_v8i32(<8 x i1> %mask, <8 x i32> %x, <8 x i32> %y) {
+; CHECK-LABEL: @blendvb_fallback_v8i32
+; CHECK: vblendvps
+; CHECK: ret
+  %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
+  ret <8 x i32> %ret
+}
+
+define <8 x float> @blendvb_fallback_v8f32(<8 x i1> %mask, <8 x float> %x, <8 x float> %y) {
+; CHECK-LABEL: @blendvb_fallback_v8f32
+; CHECK: vblendvps
+; CHECK: ret
+  %ret = select <8 x i1> %mask, <8 x float> %x, <8 x float> %y
+  ret <8 x float> %ret
+}
+
+declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
+
+define <4 x float> @insertps_from_vector_load(<4 x float> %a, <4 x float>* nocapture readonly %pb) {
+; CHECK-LABEL: insertps_from_vector_load:
+; On X32, account for the argument's move to registers
+; X32: movl    4(%esp), %eax
+; CHECK-NOT: mov
+; CHECK: insertps    $48
+; CHECK-NEXT: ret
+  %1 = load <4 x float>* %pb, align 16
+  %2 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %1, i32 48)
+  ret <4 x float> %2
+}
+
+;; Use a non-zero CountS for insertps
+define <4 x float> @insertps_from_vector_load_offset(<4 x float> %a, <4 x float>* nocapture readonly %pb) {
+; CHECK-LABEL: insertps_from_vector_load_offset:
+; On X32, account for the argument's move to registers
+; X32: movl    4(%esp), %eax
+; CHECK-NOT: mov
+;; Try to match a bit more of the instr, since we need the load's offset.
+; CHECK: insertps    $96, 4(%{{...}}), %
+; CHECK-NEXT: ret
+  %1 = load <4 x float>* %pb, align 16
+  %2 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %1, i32 96)
+  ret <4 x float> %2
+}
+
+define <4 x float> @insertps_from_vector_load_offset_2(<4 x float> %a, <4 x float>* nocapture readonly %pb, i64 %index) {
+; CHECK-LABEL: insertps_from_vector_load_offset_2:
+; On X32, account for the argument's move to registers
+; X32: movl    4(%esp), %eax
+; X32: movl    8(%esp), %ecx
+; CHECK-NOT: mov
+;; Try to match a bit more of the instr, since we need the load's offset.
+; CHECK: vinsertps    $192, 12(%{{...}},%{{...}}), %
+; CHECK-NEXT: ret
+  %1 = getelementptr inbounds <4 x float>* %pb, i64 %index
+  %2 = load <4 x float>* %1, align 16
+  %3 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %2, i32 192)
+  ret <4 x float> %3
+}
+
+define <4 x float> @insertps_from_broadcast_loadf32(<4 x float> %a, float* nocapture readonly %fb, i64 %index) {
+; CHECK-LABEL: insertps_from_broadcast_loadf32:
+; On X32, account for the arguments' move to registers
+; X32: movl    8(%esp), %eax
+; X32: movl    4(%esp), %ecx
+; CHECK-NOT: mov
+; CHECK: insertps    $48
+; CHECK-NEXT: ret
+  %1 = getelementptr inbounds float* %fb, i64 %index
+  %2 = load float* %1, align 4
+  %3 = insertelement <4 x float> undef, float %2, i32 0
+  %4 = insertelement <4 x float> %3, float %2, i32 1
+  %5 = insertelement <4 x float> %4, float %2, i32 2
+  %6 = insertelement <4 x float> %5, float %2, i32 3
+  %7 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %6, i32 48)
+  ret <4 x float> %7
+}
+
+define <4 x float> @insertps_from_broadcast_loadv4f32(<4 x float> %a, <4 x float>* nocapture readonly %b) {
+; CHECK-LABEL: insertps_from_broadcast_loadv4f32:
+; On X32, account for the arguments' move to registers
+; X32: movl    4(%esp), %{{...}}
+; CHECK-NOT: mov
+; CHECK: insertps    $48
+; CHECK-NEXT: ret
+  %1 = load <4 x float>* %b, align 4
+  %2 = extractelement <4 x float> %1, i32 0
+  %3 = insertelement <4 x float> undef, float %2, i32 0
+  %4 = insertelement <4 x float> %3, float %2, i32 1
+  %5 = insertelement <4 x float> %4, float %2, i32 2
+  %6 = insertelement <4 x float> %5, float %2, i32 3
+  %7 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %6, i32 48)
+  ret <4 x float> %7
+}
+
+;; FIXME: We're emitting an extraneous pshufd/vbroadcast.
+define <4 x float> @insertps_from_broadcast_multiple_use(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, float* nocapture readonly %fb, i64 %index) {
+; CHECK-LABEL: insertps_from_broadcast_multiple_use:
+; On X32, account for the arguments' move to registers
+; X32: movl    8(%esp), %eax
+; X32: movl    4(%esp), %ecx
+; CHECK: vbroadcastss
+; CHECK-NOT: mov
+; CHECK: insertps    $48
+; CHECK: insertps    $48
+; CHECK: insertps    $48
+; CHECK: insertps    $48
+; CHECK: vaddps
+; CHECK: vaddps
+; CHECK: vaddps
+; CHECK-NEXT: ret
+  %1 = getelementptr inbounds float* %fb, i64 %index
+  %2 = load float* %1, align 4
+  %3 = insertelement <4 x float> undef, float %2, i32 0
+  %4 = insertelement <4 x float> %3, float %2, i32 1
+  %5 = insertelement <4 x float> %4, float %2, i32 2
+  %6 = insertelement <4 x float> %5, float %2, i32 3
+  %7 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %6, i32 48)
+  %8 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %b, <4 x float> %6, i32 48)
+  %9 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %c, <4 x float> %6, i32 48)
+  %10 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %d, <4 x float> %6, i32 48)
+  %11 = fadd <4 x float> %7, %8
+  %12 = fadd <4 x float> %9, %10
+  %13 = fadd <4 x float> %11, %12
+  ret <4 x float> %13
+}
diff --git a/test/CodeGen/X86/avx1-logical-load-folding.ll b/test/CodeGen/X86/avx1-logical-load-folding.ll
new file mode 100644
index 0000000..32301b1
--- /dev/null
+++ b/test/CodeGen/X86/avx1-logical-load-folding.ll
@@ -0,0 +1,60 @@
+; RUN: llc -O3 -disable-peephole -mcpu=corei7-avx -mattr=+avx < %s | FileCheck %s
+
+target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+; Function Attrs: nounwind ssp uwtable
+define void @test1(float* %A, float* %C) #0 {
+  %tmp1 = bitcast float* %A to <8 x float>*
+  %tmp2 = load <8 x float>* %tmp1, align 32
+  %tmp3 = bitcast <8 x float> %tmp2 to <8 x i32>
+  %tmp4 = and <8 x i32> %tmp3, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
+  %tmp5 = bitcast <8 x i32> %tmp4 to <8 x float>
+  %tmp6 = extractelement <8 x float> %tmp5, i32 0
+  store float %tmp6, float* %C
+  ret void
+
+  ; CHECK: vandps LCPI0_0(%rip), %ymm0, %ymm0
+}
+
+; Function Attrs: nounwind ssp uwtable
+define void @test2(float* %A, float* %C) #0 {
+  %tmp1 = bitcast float* %A to <8 x float>*
+  %tmp2 = load <8 x float>* %tmp1, align 32
+  %tmp3 = bitcast <8 x float> %tmp2 to <8 x i32>
+  %tmp4 = or <8 x i32> %tmp3, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
+  %tmp5 = bitcast <8 x i32> %tmp4 to <8 x float>
+  %tmp6 = extractelement <8 x float> %tmp5, i32 0
+  store float %tmp6, float* %C
+  ret void
+
+  ; CHECK: vorps LCPI1_0(%rip), %ymm0, %ymm0
+}
+
+; Function Attrs: nounwind ssp uwtable
+define void @test3(float* %A, float* %C) #0 {
+  %tmp1 = bitcast float* %A to <8 x float>*
+  %tmp2 = load <8 x float>* %tmp1, align 32
+  %tmp3 = bitcast <8 x float> %tmp2 to <8 x i32>
+  %tmp4 = xor <8 x i32> %tmp3, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
+  %tmp5 = bitcast <8 x i32> %tmp4 to <8 x float>
+  %tmp6 = extractelement <8 x float> %tmp5, i32 0
+  store float %tmp6, float* %C
+  ret void
+
+  ; CHECK: vxorps LCPI2_0(%rip), %ymm0, %ymm0
+}
+
+define void @test4(float* %A, float* %C) #0 {
+  %tmp1 = bitcast float* %A to <8 x float>*
+  %tmp2 = load <8 x float>* %tmp1, align 32
+  %tmp3 = bitcast <8 x float> %tmp2 to <8 x i32>
+  %tmp4 = xor <8 x i32> %tmp3, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+  %tmp5 = and <8 x i32> %tmp4, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
+  %tmp6 = bitcast <8 x i32> %tmp5 to <8 x float>
+  %tmp7 = extractelement <8 x float> %tmp6, i32 0
+  store float %tmp7, float * %C
+  ret void
+
+  ;CHECK: vandnps LCPI3_0(%rip), %ymm0, %ymm0
+}
diff --git a/test/CodeGen/X86/avx2-blend.ll b/test/CodeGen/X86/avx2-blend.ll
new file mode 100644
index 0000000..b02442b
--- /dev/null
+++ b/test/CodeGen/X86/avx2-blend.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 | FileCheck %s
+
+define <32 x i8> @constant_pblendvb_avx2(<32 x i8> %xyzw, <32 x i8> %abcd) {
+; CHECK-LABEL: constant_pblendvb_avx2:
+; CHECK: vmovdqa
+; CHECK: vpblendvb
+  %1 = select <32 x i1> <i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false>, <32 x i8> %xyzw, <32 x i8> %abcd
+  ret <32 x i8> %1
+}
+
+declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>)
diff --git a/test/CodeGen/X86/avx2-vector-shifts.ll b/test/CodeGen/X86/avx2-vector-shifts.ll
index 4ae2905..e355301 100644
--- a/test/CodeGen/X86/avx2-vector-shifts.ll
+++ b/test/CodeGen/X86/avx2-vector-shifts.ll
@@ -52,6 +52,16 @@ entry:
 ; CHECK: vpaddd  %ymm0, %ymm0, %ymm0
 ; CHECK: ret
 
+define <8 x i32> @test_vpslld_var(i32 %shift) {
+  %amt = insertelement <8 x i32> undef, i32 %shift, i32 0
+  %tmp = shl <8 x i32> <i32 192, i32 193, i32 194, i32 195, i32 196, i32 197, i32 198, i32 199>, %amt
+  ret <8 x i32> %tmp
+}
+
+; CHECK-LABEL: test_vpslld_var:
+; CHECK: vpslld %xmm0, %ymm1, %ymm0
+; CHECK: ret
+
 define <8 x i32> @test_slld_3(<8 x i32> %InVec) {
 entry:
   %shl = shl <8 x i32> %InVec, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
diff --git a/test/CodeGen/X86/avx512-cvt.ll b/test/CodeGen/X86/avx512-cvt.ll
index 1d83485..2476ea1 100644
--- a/test/CodeGen/X86/avx512-cvt.ll
+++ b/test/CodeGen/X86/avx512-cvt.ll
@@ -24,6 +24,22 @@ define <16 x i32> @fptoui00(<16 x float> %a) nounwind {
   ret <16 x i32> %b
 }
 
+; CHECK-LABEL: fptoui_256
+; CHECK: vcvttps2udq
+; CHECK: ret
+define <8 x i32> @fptoui_256(<8 x float> %a) nounwind {
+  %b = fptoui <8 x float> %a to <8 x i32>
+  ret <8 x i32> %b
+}
+
+; CHECK-LABEL: fptoui_128
+; CHECK: vcvttps2udq
+; CHECK: ret
+define <4 x i32> @fptoui_128(<4 x float> %a) nounwind {
+  %b = fptoui <4 x float> %a to <4 x i32>
+  ret <4 x i32> %b
+}
+
 ; CHECK-LABEL: fptoui01
 ; CHECK: vcvttpd2udq
 ; CHECK: ret
@@ -184,6 +200,22 @@ define <16 x float> @uitof32(<16 x i32> %a) nounwind {
   ret <16 x float> %b
 }
 
+; CHECK-LABEL: uitof32_256
+; CHECK: vcvtudq2ps
+; CHECK: ret
+define <8 x float> @uitof32_256(<8 x i32> %a) nounwind {
+  %b = uitofp <8 x i32> %a to <8 x float>
+  ret <8 x float> %b
+}
+
+; CHECK-LABEL: uitof32_128
+; CHECK: vcvtudq2ps
+; CHECK: ret
+define <4 x float> @uitof32_128(<4 x i32> %a) nounwind {
+  %b = uitofp <4 x i32> %a to <4 x float>
+  ret <4 x float> %b
+}
+
 ; CHECK-LABEL: @fptosi02
 ; CHECK: vcvttss2si {{.*}} encoding: [0x62
 ; CHECK: ret
diff --git a/test/CodeGen/X86/avx512-gather-scatter-intrin.ll b/test/CodeGen/X86/avx512-gather-scatter-intrin.ll
index e429a22..20bf7e4 100644
--- a/test/CodeGen/X86/avx512-gather-scatter-intrin.ll
+++ b/test/CodeGen/X86/avx512-gather-scatter-intrin.ll
@@ -1,14 +1,14 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
 
-declare <16 x float> @llvm.x86.avx512.gather.dps.mask.512 (<16 x float>, i16, <16 x i32>, i8*, i32)
-declare void @llvm.x86.avx512.scatter.dps.mask.512 (i8*, i16, <16 x i32>, <16 x float>, i32)
-declare <8 x double> @llvm.x86.avx512.gather.dpd.mask.512 (<8 x double>, i8, <8 x i32>, i8*, i32)
-declare void @llvm.x86.avx512.scatter.dpd.mask.512 (i8*, i8, <8 x i32>, <8 x double>, i32)
+declare <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float>, i8*, <16 x i32>, i16, i32)
+declare void @llvm.x86.avx512.scatter.dps.512 (i8*, i16, <16 x i32>, <16 x float>, i32)
+declare <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double>, i8*, <8 x i32>, i8, i32)
+declare void @llvm.x86.avx512.scatter.dpd.512 (i8*, i8, <8 x i32>, <8 x double>, i32)
 
-declare <8 x float> @llvm.x86.avx512.gather.qps.mask.512 (<8 x float>, i8, <8 x i64>, i8*, i32)
-declare void @llvm.x86.avx512.scatter.qps.mask.512 (i8*, i8, <8 x i64>, <8 x float>, i32)
-declare <8 x double> @llvm.x86.avx512.gather.qpd.mask.512 (<8 x double>, i8, <8 x i64>, i8*, i32)
-declare void @llvm.x86.avx512.scatter.qpd.mask.512 (i8*, i8, <8 x i64>, <8 x double>, i32)
+declare <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float>, i8*, <8 x i64>, i8, i32)
+declare void @llvm.x86.avx512.scatter.qps.512 (i8*, i8, <8 x i64>, <8 x float>, i32)
+declare <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double>, i8*, <8 x i64>, i8, i32)
+declare void @llvm.x86.avx512.scatter.qpd.512 (i8*, i8, <8 x i64>, <8 x double>, i32)
 
 ;CHECK-LABEL: gather_mask_dps
 ;CHECK: kmovw
@@ -17,9 +17,9 @@ declare void @llvm.x86.avx512.scatter.qpd.mask.512 (i8*, i8, <8 x i64>, <8 x dou
 ;CHECK: vscatterdps
 ;CHECK: ret
 define void @gather_mask_dps(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* %base, i8* %stbuf)  {
-  %x = call <16 x float> @llvm.x86.avx512.gather.dps.mask.512 (<16 x float> %src, i16 %mask, <16 x i32>%ind, i8* %base, i32 4)
+  %x = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4)
   %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
-  call void @llvm.x86.avx512.scatter.dps.mask.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x float> %x, i32 4)
+  call void @llvm.x86.avx512.scatter.dps.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x float> %x, i32 4)
   ret void
 }
 
@@ -30,9 +30,9 @@ define void @gather_mask_dps(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8*
 ;CHECK: vscatterdpd
 ;CHECK: ret
 define void @gather_mask_dpd(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf)  {
-  %x = call <8 x double> @llvm.x86.avx512.gather.dpd.mask.512 (<8 x double> %src, i8 %mask, <8 x i32>%ind, i8* %base, i32 4)
+  %x = call <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4)
   %ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
-  call void @llvm.x86.avx512.scatter.dpd.mask.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x double> %x, i32 4)
+  call void @llvm.x86.avx512.scatter.dpd.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x double> %x, i32 4)
   ret void
 }
 
@@ -43,9 +43,9 @@ define void @gather_mask_dpd(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %b
 ;CHECK: vscatterqps
 ;CHECK: ret
 define void @gather_mask_qps(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %base, i8* %stbuf)  {
-  %x = call <8 x float> @llvm.x86.avx512.gather.qps.mask.512 (<8 x float> %src, i8 %mask, <8 x i64>%ind, i8* %base, i32 4)
+  %x = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
   %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
-  call void @llvm.x86.avx512.scatter.qps.mask.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x float> %x, i32 4)
+  call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x float> %x, i32 4)
   ret void
 }
 
@@ -56,23 +56,23 @@ define void @gather_mask_qps(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %ba
 ;CHECK: vscatterqpd
 ;CHECK: ret
 define void @gather_mask_qpd(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf)  {
-  %x = call <8 x double> @llvm.x86.avx512.gather.qpd.mask.512 (<8 x double> %src, i8 %mask, <8 x i64>%ind, i8* %base, i32 4)
+  %x = call <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
   %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
-  call void @llvm.x86.avx512.scatter.qpd.mask.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x double> %x, i32 4)
+  call void @llvm.x86.avx512.scatter.qpd.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x double> %x, i32 4)
   ret void
 }
 ;;
 ;; Integer Gather/Scatter
 ;;
-declare <16 x i32> @llvm.x86.avx512.gather.dpi.mask.512 (<16 x i32>, i16, <16 x i32>, i8*, i32)
-declare void @llvm.x86.avx512.scatter.dpi.mask.512 (i8*, i16, <16 x i32>, <16 x i32>, i32)
-declare <8 x i64> @llvm.x86.avx512.gather.dpq.mask.512 (<8 x i64>, i8, <8 x i32>, i8*, i32)
-declare void @llvm.x86.avx512.scatter.dpq.mask.512 (i8*, i8, <8 x i32>, <8 x i64>, i32)
+declare <16 x i32> @llvm.x86.avx512.gather.dpi.512 (<16 x i32>, i8*, <16 x i32>, i16, i32)
+declare void @llvm.x86.avx512.scatter.dpi.512 (i8*, i16, <16 x i32>, <16 x i32>, i32)
+declare <8 x i64> @llvm.x86.avx512.gather.dpq.512 (<8 x i64>, i8*, <8 x i32>, i8, i32)
+declare void @llvm.x86.avx512.scatter.dpq.512 (i8*, i8, <8 x i32>, <8 x i64>, i32)
 
-declare <8 x i32> @llvm.x86.avx512.gather.qpi.mask.512 (<8 x i32>, i8, <8 x i64>, i8*, i32)
-declare void @llvm.x86.avx512.scatter.qpi.mask.512 (i8*, i8, <8 x i64>, <8 x i32>, i32)
-declare <8 x i64> @llvm.x86.avx512.gather.qpq.mask.512 (<8 x i64>, i8, <8 x i64>, i8*, i32)
-declare void @llvm.x86.avx512.scatter.qpq.mask.512 (i8*, i8, <8 x i64>, <8 x i64>, i32)
+declare <8 x i32> @llvm.x86.avx512.gather.qpi.512 (<8 x i32>, i8*, <8 x i64>, i8, i32)
+declare void @llvm.x86.avx512.scatter.qpi.512 (i8*, i8, <8 x i64>, <8 x i32>, i32)
+declare <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64>, i8*, <8 x i64>, i8, i32)
+declare void @llvm.x86.avx512.scatter.qpq.512 (i8*, i8, <8 x i64>, <8 x i64>, i32)
 
 ;CHECK-LABEL: gather_mask_dd
 ;CHECK: kmovw
@@ -81,9 +81,9 @@ declare void @llvm.x86.avx512.scatter.qpq.mask.512 (i8*, i8, <8 x i64>, <8 x i64
 ;CHECK: vpscatterdd
 ;CHECK: ret
 define void @gather_mask_dd(<16 x i32> %ind, <16 x i32> %src, i16 %mask, i8* %base, i8* %stbuf)  {
-  %x = call <16 x i32> @llvm.x86.avx512.gather.dpi.mask.512 (<16 x i32> %src, i16 %mask, <16 x i32>%ind, i8* %base, i32 4)
+  %x = call <16 x i32> @llvm.x86.avx512.gather.dpi.512 (<16 x i32> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4)
   %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
-  call void @llvm.x86.avx512.scatter.dpi.mask.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x i32> %x, i32 4)
+  call void @llvm.x86.avx512.scatter.dpi.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x i32> %x, i32 4)
   ret void
 }
 
@@ -94,9 +94,9 @@ define void @gather_mask_dd(<16 x i32> %ind, <16 x i32> %src, i16 %mask, i8* %ba
 ;CHECK: vpscatterqd
 ;CHECK: ret
 define void @gather_mask_qd(<8 x i64> %ind, <8 x i32> %src, i8 %mask, i8* %base, i8* %stbuf)  {
-  %x = call <8 x i32> @llvm.x86.avx512.gather.qpi.mask.512 (<8 x i32> %src, i8 %mask, <8 x i64>%ind, i8* %base, i32 4)
+  %x = call <8 x i32> @llvm.x86.avx512.gather.qpi.512 (<8 x i32> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
   %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
-  call void @llvm.x86.avx512.scatter.qpi.mask.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i32> %x, i32 4)
+  call void @llvm.x86.avx512.scatter.qpi.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i32> %x, i32 4)
   ret void
 }
 
@@ -107,9 +107,9 @@ define void @gather_mask_qd(<8 x i64> %ind, <8 x i32> %src, i8 %mask, i8* %base,
 ;CHECK: vpscatterqq
 ;CHECK: ret
 define void @gather_mask_qq(<8 x i64> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf)  {
-  %x = call <8 x i64> @llvm.x86.avx512.gather.qpq.mask.512 (<8 x i64> %src, i8 %mask, <8 x i64>%ind, i8* %base, i32 4)
+  %x = call <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
   %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
-  call void @llvm.x86.avx512.scatter.qpq.mask.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i64> %x, i32 4)
+  call void @llvm.x86.avx512.scatter.qpq.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i64> %x, i32 4)
   ret void
 }
 
@@ -120,116 +120,19 @@ define void @gather_mask_qq(<8 x i64> %ind, <8 x i64> %src, i8 %mask, i8* %base,
 ;CHECK: vpscatterdq
 ;CHECK: ret
 define void @gather_mask_dq(<8 x i32> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf)  {
-  %x = call <8 x i64> @llvm.x86.avx512.gather.dpq.mask.512 (<8 x i64> %src, i8 %mask, <8 x i32>%ind, i8* %base, i32 4)
+  %x = call <8 x i64> @llvm.x86.avx512.gather.dpq.512 (<8 x i64> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4)
   %ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
-  call void @llvm.x86.avx512.scatter.dpq.mask.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x i64> %x, i32 4)
+  call void @llvm.x86.avx512.scatter.dpq.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x i64> %x, i32 4)
   ret void
 }
 
-;; FP Intinsics without masks
-
-declare <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x i32>, i8*, i32)
-declare void @llvm.x86.avx512.scatter.dps.512 (i8*, <16 x i32>, <16 x float>, i32)
-declare <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x i64>, i8*, i32)
-declare void @llvm.x86.avx512.scatter.qps.512 (i8*, <8 x i64>, <8 x float>, i32)
-declare <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x i64>, i8*, i32)
-declare void @llvm.x86.avx512.scatter.qpd.512 (i8*, <8 x i64>, <8 x double>, i32)
-
-;CHECK-LABEL: gather_dps
-;CHECK: kxnorw
-;CHECK: vgatherdps
-;CHECK: vscatterdps
-;CHECK: ret
-define void @gather_dps(<16 x i32> %ind, i8* %base, i8* %stbuf)  {
-  %x = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x i32>%ind, i8* %base, i32 4)
-  %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
-  call void @llvm.x86.avx512.scatter.dps.512 (i8* %stbuf, <16 x i32>%ind2, <16 x float> %x, i32 4)
-  ret void
-}
-
-;CHECK-LABEL: gather_qps
-;CHECK: kxnorw
-;CHECK: vgatherqps
-;CHECK: vscatterqps
-;CHECK: ret
-define void @gather_qps(<8 x i64> %ind, i8* %base, i8* %stbuf)  {
-  %x = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x i64>%ind, i8* %base, i32 4)
-  %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
-  call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, <8 x i64>%ind2, <8 x float> %x, i32 4)
-  ret void
-}
-
-;CHECK-LABEL: gather_qpd
-;CHECK: kxnorw
-;CHECK: vgatherqpd
-;CHECK: vpadd
-;CHECK: vscatterqpd
-;CHECK: ret
-define void @gather_qpd(<8 x i64> %ind, i8* %base, i8* %stbuf)  {
-  %x = call <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x i64>%ind, i8* %base, i32 4)
-  %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
-  call void @llvm.x86.avx512.scatter.qpd.512 (i8* %stbuf, <8 x i64>%ind2, <8 x double> %x, i32 4)
-  ret void
-}
-
-;; Integer Intinsics without masks
-
-declare <16 x i32> @llvm.x86.avx512.gather.dpi.512 (<16 x i32>, i8*, i32)
-declare void @llvm.x86.avx512.scatter.dpi.512 (i8*, <16 x i32>, <16 x i32>, i32)
-declare <8 x i64> @llvm.x86.avx512.gather.dpq.512 (<8 x i32>, i8*, i32)
-declare void @llvm.x86.avx512.scatter.dpq.512 (i8*, <8 x i32>, <8 x i64>, i32)
-
-declare <8 x i32> @llvm.x86.avx512.gather.qpi.512 (<8 x i64>, i8*, i32)
-declare void @llvm.x86.avx512.scatter.qpi.512 (i8*, <8 x i64>, <8 x i32>, i32)
-declare <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64>, i8*, i32)
-declare void @llvm.x86.avx512.scatter.qpq.512 (i8*, <8 x i64>, <8 x i64>, i32)
-
-;CHECK-LABEL: gather_dpi
-;CHECK: kxnorw
-;CHECK: vpgatherdd
-;CHECK: vpscatterdd
-;CHECK: ret
-define void @gather_dpi(<16 x i32> %ind, i8* %base, i8* %stbuf)  {
-  %x = call <16 x i32> @llvm.x86.avx512.gather.dpi.512 (<16 x i32>%ind, i8* %base, i32 4)
-  %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
-  call void @llvm.x86.avx512.scatter.dpi.512 (i8* %stbuf, <16 x i32>%ind2, <16 x i32> %x, i32 4)
-  ret void
-}
-
-;CHECK-LABEL: gather_qpq
-;CHECK: vpxord  %zmm
-;CHECK: kxnorw
-;CHECK: vpgatherqq
-;CHECK: vpadd
-;CHECK: vpscatterqq
-;CHECK: ret
-define void @gather_qpq(<8 x i64> %ind, i8* %base, i8* %stbuf)  {
-  %x = call <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64>%ind, i8* %base, i32 4)
-  %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
-  call void @llvm.x86.avx512.scatter.qpq.512 (i8* %stbuf, <8 x i64>%ind2, <8 x i64> %x, i32 4)
-  ret void
-}
-
-;CHECK-LABEL: gather_qpi
-;CHECK: vpxor %ymm
-;CHECK: kxnorw
-;CHECK: vpgatherqd
-;CHECK: vpadd
-;CHECK: vpscatterqd
-;CHECK: ret
-define void @gather_qpi(<8 x i64> %ind, i8* %base, i8* %stbuf)  {
-  %x = call <8 x i32> @llvm.x86.avx512.gather.qpi.512 (<8 x i64>%ind, i8* %base, i32 4)
-  %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
-  call void @llvm.x86.avx512.scatter.qpi.512 (i8* %stbuf, <8 x i64>%ind2, <8 x i32> %x, i32 4)
-  ret void
-}
 
 ;CHECK-LABEL: gather_mask_dpd_execdomain
 ;CHECK: vgatherdpd
 ;CHECK: vmovapd
 ;CHECK: ret
 define void @gather_mask_dpd_execdomain(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %base, <8 x double>* %stbuf)  {
-  %x = call <8 x double> @llvm.x86.avx512.gather.dpd.mask.512 (<8 x double> %src, i8 %mask, <8 x i32>%ind, i8* %base, i32 4)
+  %x = call <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4)
   store <8 x double> %x, <8 x double>* %stbuf
   ret void
 }
@@ -239,7 +142,7 @@ define void @gather_mask_dpd_execdomain(<8 x i32> %ind, <8 x double> %src, i8 %m
 ;CHECK: vmovapd
 ;CHECK: ret
 define void @gather_mask_qpd_execdomain(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %base, <8 x double>* %stbuf)  {
-  %x = call <8 x double> @llvm.x86.avx512.gather.qpd.mask.512 (<8 x double> %src, i8 %mask, <8 x i64>%ind, i8* %base, i32 4)
+  %x = call <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
   store <8 x double> %x, <8 x double>* %stbuf
   ret void
 }
@@ -249,7 +152,7 @@ define void @gather_mask_qpd_execdomain(<8 x i64> %ind, <8 x double> %src, i8 %m
 ;CHECK: vmovaps 
 ;CHECK: ret
 define <16 x float> @gather_mask_dps_execdomain(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* %base)  {
-  %res = call <16 x float> @llvm.x86.avx512.gather.dps.mask.512 (<16 x float> %src, i16 %mask, <16 x i32>%ind, i8* %base, i32 4)
+  %res = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4)
   ret <16 x float> %res;
 }
 
@@ -258,7 +161,7 @@ define <16 x float> @gather_mask_dps_execdomain(<16 x i32> %ind, <16 x float> %s
 ;CHECK: vmovaps
 ;CHECK: ret
 define <8 x float> @gather_mask_qps_execdomain(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %base)  {
-  %res = call <8 x float> @llvm.x86.avx512.gather.qps.mask.512 (<8 x float> %src, i8 %mask, <8 x i64>%ind, i8* %base, i32 4)
+  %res = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
   ret <8 x float> %res;
 }
 
@@ -268,7 +171,7 @@ define <8 x float> @gather_mask_qps_execdomain(<8 x i64> %ind, <8 x float> %src,
 ;CHECK: ret
 define void @scatter_mask_dpd_execdomain(<8 x i32> %ind, <8 x double>* %src, i8 %mask, i8* %base, i8* %stbuf)  {
   %x = load <8 x double>* %src, align 64 
-  call void @llvm.x86.avx512.scatter.dpd.mask.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind, <8 x double> %x, i32 4)
+  call void @llvm.x86.avx512.scatter.dpd.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind, <8 x double> %x, i32 4)
   ret void
 }
 
@@ -278,7 +181,7 @@ define void @scatter_mask_dpd_execdomain(<8 x i32> %ind, <8 x double>* %src, i8
 ;CHECK: ret
 define void @scatter_mask_qpd_execdomain(<8 x i64> %ind, <8 x double>* %src, i8 %mask, i8* %base, i8* %stbuf)  {
   %x = load <8 x double>* %src, align 64
-  call void @llvm.x86.avx512.scatter.qpd.mask.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind, <8 x double> %x, i32 4)
+  call void @llvm.x86.avx512.scatter.qpd.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind, <8 x double> %x, i32 4)
   ret void
 }
 
@@ -288,7 +191,7 @@ define void @scatter_mask_qpd_execdomain(<8 x i64> %ind, <8 x double>* %src, i8
 ;CHECK: ret
 define void @scatter_mask_dps_execdomain(<16 x i32> %ind, <16 x float>* %src, i16 %mask, i8* %base, i8* %stbuf)  {
   %x = load <16 x float>* %src, align 64
-  call void @llvm.x86.avx512.scatter.dps.mask.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind, <16 x float> %x, i32 4)
+  call void @llvm.x86.avx512.scatter.dps.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind, <16 x float> %x, i32 4)
   ret void
 }
 
@@ -298,6 +201,35 @@ define void @scatter_mask_dps_execdomain(<16 x i32> %ind, <16 x float>* %src, i1
 ;CHECK: ret
 define void @scatter_mask_qps_execdomain(<8 x i64> %ind, <8 x float>* %src, i8 %mask, i8* %base, i8* %stbuf)  {
   %x = load <8 x float>* %src, align 32 
-  call void @llvm.x86.avx512.scatter.qps.mask.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind, <8 x float> %x, i32 4)
+  call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind, <8 x float> %x, i32 4)
+  ret void
+}
+
+;CHECK-LABEL: gather_qps
+;CHECK: kxnorw
+;CHECK: vgatherqps
+;CHECK: vpadd
+;CHECK: vscatterqps
+;CHECK: ret
+define void @gather_qps(<8 x i64> %ind, <8 x float> %src, i8* %base, i8* %stbuf)  {
+  %x = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 -1, i32 4)
+  %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
+  call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 -1, <8 x i64>%ind2, <8 x float> %x, i32 4)
+  ret void
+}
+
+;CHECK-LABEL: prefetch
+;CHECK: gatherpf0
+;CHECK: gatherpf1
+;CHECK: scatterpf0
+;CHECK: scatterpf1
+;CHECK: ret
+declare  void @llvm.x86.avx512.gatherpf.qps.512(i8, <8 x i64>, i8* , i32, i32);
+declare  void @llvm.x86.avx512.scatterpf.qps.512(i8, <8 x i64>, i8* , i32, i32);
+define void @prefetch(<8 x i64> %ind, i8* %base) {
+  call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 4, i32 0)
+  call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 4, i32 1)
+  call void @llvm.x86.avx512.scatterpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 2, i32 0)
+  call void @llvm.x86.avx512.scatterpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 2, i32 1)
   ret void
 }
diff --git a/test/CodeGen/X86/avx512-insert-extract.ll b/test/CodeGen/X86/avx512-insert-extract.ll
index 6557ac3..b360c71 100644
--- a/test/CodeGen/X86/avx512-insert-extract.ll
+++ b/test/CodeGen/X86/avx512-insert-extract.ll
@@ -158,3 +158,41 @@ define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) {
   %res = select i1 %extract24vector_func.i, i64 %a1, i64 %b1
   ret i64 %res
 }
+
+;CHECK-LABEL: test15
+;CHECK: kshiftlw
+;CHECK: kmovw
+;CHECK: ret
+define i16 @test15(i1 *%addr) {
+  %x = load i1 * %addr, align 128
+  %x1 = insertelement <16 x i1> undef, i1 %x, i32 10
+  %x2 = bitcast <16 x i1>%x1 to i16
+  ret i16 %x2
+}
+
+;CHECK-LABEL: test16
+;CHECK: kshiftlw
+;CHECK: kshiftrw
+;CHECK: korw
+;CHECK: ret
+define i16 @test16(i1 *%addr, i16 %a) {
+  %x = load i1 * %addr, align 128
+  %a1 = bitcast i16 %a to <16 x i1>
+  %x1 = insertelement <16 x i1> %a1, i1 %x, i32 10
+  %x2 = bitcast <16 x i1>%x1 to i16
+  ret i16 %x2
+}
+
+;CHECK-LABEL: test17
+;CHECK: kshiftlw
+;CHECK: kshiftrw
+;CHECK: korw
+;CHECK: ret
+define i8 @test17(i1 *%addr, i8 %a) {
+  %x = load i1 * %addr, align 128
+  %a1 = bitcast i8 %a to <8 x i1>
+  %x1 = insertelement <8 x i1> %a1, i1 %x, i32 10
+  %x2 = bitcast <8 x i1>%x1 to i8
+  ret i8 %x2
+}
+
diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll
index 3fb38ed..e19841a 100644
--- a/test/CodeGen/X86/avx512-intrinsics.ll
+++ b/test/CodeGen/X86/avx512-intrinsics.ll
@@ -78,7 +78,7 @@ declare <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double>, i32, <8
 
 define <8 x double> @test7(<8 x double> %a) {
 ; CHECK: vrndscalepd {{.*}}encoding: [0x62,0xf3,0xfd,0x48,0x09,0xc0,0x0b]
-  %res = call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %a, i32 11, <8 x double> zeroinitializer, i8 -1, i32 4)
+  %res = call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %a, i32 11, <8 x double> %a, i8 -1, i32 4)
   ret <8 x double>%res
 }
 
@@ -86,7 +86,7 @@ declare <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float>, i32, <1
 
 define <16 x float> @test8(<16 x float> %a) {
 ; CHECK: vrndscaleps {{.*}}encoding: [0x62,0xf3,0x7d,0x48,0x08,0xc0,0x0b]
-  %res = call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %a, i32 11, <16 x float> zeroinitializer, i16 -1, i32 4)
+  %res = call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %a, i32 11, <16 x float> %a, i16 -1, i32 4)
   ret <16 x float>%res
 }
 
@@ -536,4 +536,12 @@ define void @test_store2(<8 x double> %data, i8* %ptr, i8 %mask) {
   ret void
 }
 
-declare void @llvm.x86.avx512.mask.storeu.pd.512(i8*, <8 x double>, i8 )
-\ No newline at end of file
+declare void @llvm.x86.avx512.mask.storeu.pd.512(i8*, <8 x double>, i8 )
+
+define <16 x float> @test_vpermt2ps(<16 x float>%x, <16 x float>%y, <16 x i32>%perm) {
+; CHECK: vpermt2ps {{.*}}encoding: [0x62,0xf2,0x6d,0x48,0x7f,0xc1]
+  %res = call <16 x float> @llvm.x86.avx512.mask.vpermt.ps.512(<16 x i32>%perm, <16 x float>%x, <16 x float>%y, i16 -1)
+  ret <16 x float> %res
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.vpermt.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16)
diff --git a/test/CodeGen/X86/avx512-mov.ll b/test/CodeGen/X86/avx512-mov.ll
index 13e6843..009802f 100644
--- a/test/CodeGen/X86/avx512-mov.ll
+++ b/test/CodeGen/X86/avx512-mov.ll
@@ -153,3 +153,31 @@ define void @test18(i8 * %addr, <8 x i64> %data) {
   ret void
 }
 
+; CHECK-LABEL: store_i1_1
+; CHECK: movb
+; CHECK: movb
+; CHECK: ret
+define void @store_i1_1() {
+  store i1 true, i1 addrspace(3)* undef, align 128
+  store i1 false, i1 addrspace(2)* undef, align 128
+  ret void
+}
+
+; CHECK-LABEL: store_i1_2
+; CHECK: movb
+; CHECK: ret
+define void @store_i1_2(i64 %a, i64 %b) {
+  %res = icmp eq i64 %a, %b
+  store i1 %res, i1 addrspace(3)* undef, align 128
+  ret void
+}
+
+; CHECK-LABEL: store_i1_3
+; CHECK: kmovw
+; CHECK: ret
+define void @store_i1_3(i16 %a) {
+  %a_vec = bitcast i16 %a to <16 x i1>
+  %res = extractelement <16 x i1> %a_vec, i32 4
+  store i1 %res, i1 addrspace(3)* undef, align 128
+  ret void
+}
diff --git a/test/CodeGen/X86/avx512-shuffle.ll b/test/CodeGen/X86/avx512-shuffle.ll
index 59d7010..23ddc3a 100644
--- a/test/CodeGen/X86/avx512-shuffle.ll
+++ b/test/CodeGen/X86/avx512-shuffle.ll
@@ -231,3 +231,22 @@ define <16 x i32> @test27(<4 x i32>%a) {
  %res = shufflevector <4 x i32> %a, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ret <16 x i32> %res
 }
+
+; CHECK-LABEL: @test28
+; CHECK: vinserti64x4 $1
+; CHECK: ret
+define <16 x i32> @test28(<16 x i32>%x, <16 x i32>%y) {
+ %res = shufflevector <16 x i32>%x, <16 x i32>%y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
+                                                              i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+ ret <16 x i32> %res
+}
+
+; CHECK-LABEL: @test29
+; CHECK: vinserti64x4 $0
+; CHECK: ret
+define <16 x i32> @test29(<16 x i32>%x, <16 x i32>%y) {
+ %res = shufflevector <16 x i32>%x, <16 x i32>%y, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23,
+                                                              i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ ret <16 x i32> %res
+}
+
diff --git a/test/CodeGen/X86/blend-msb.ll b/test/CodeGen/X86/blend-msb.ll
index 6b46596..34aaf2c 100644
--- a/test/CodeGen/X86/blend-msb.ll
+++ b/test/CodeGen/X86/blend-msb.ll
@@ -4,7 +4,7 @@
 ; Verify that we produce movss instead of blendvps when possible.
 
 ;CHECK-LABEL: vsel_float:
-;CHECK-NOT: blendvps
+;CHECK-NOT: blend
 ;CHECK: movss
 ;CHECK: ret
 define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
@@ -13,7 +13,7 @@ define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
 }
 
 ;CHECK-LABEL: vsel_4xi8:
-;CHECK-NOT: blendvps
+;CHECK-NOT: blend
 ;CHECK: movss
 ;CHECK: ret
 define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
@@ -21,14 +21,18 @@ define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
   ret <4 x i8> %vsel
 }
 
-
-; We do not have native support for v8i16 blends and we have to use the
-; blendvb instruction or a sequence of NAND/OR/AND. Make sure that we do not
-; reduce the mask in this case.
 ;CHECK-LABEL: vsel_8xi16:
-;CHECK: andps
-;CHECK: andps
-;CHECK: orps
+; The select mask is
+; <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>
+; which translates into the boolean mask (big endian representation):
+; 00010001 = 17.
+; '1' means takes the first argument, '0' means takes the second argument.
+; This is the opposite of the intel syntax, thus we expect
+; the inverted mask: 11101110 = 238.
+; According to the ABI:
+; v1 is in xmm0 => first argument is xmm0.
+; v2 is in xmm1 => second argument is xmm1.
+;CHECK: pblendw $238, %xmm1, %xmm0
 ;CHECK: ret
 define <8 x i16> @vsel_8xi16(<8 x i16> %v1, <8 x i16> %v2) {
   %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i16> %v1, <8 x i16> %v2
diff --git a/test/CodeGen/X86/bmi.ll b/test/CodeGen/X86/bmi.ll
index 242075a..a707209 100644
--- a/test/CodeGen/X86/bmi.ll
+++ b/test/CodeGen/X86/bmi.ll
@@ -216,6 +216,23 @@ entry:
 ; CHECK: bzhiq
 }
 
+define i64 @bzhi64_constant_mask(i64 %x) #0 {
+entry:
+  %and = and i64 %x, 4611686018427387903
+  ret i64 %and
+; CHECK-LABEL: bzhi64_constant_mask:
+; CHECK: movb    $62, %al
+; CHECK: bzhiq   %rax, %r[[ARG1:di|cx]], %rax
+}
+
+define i64 @bzhi64_small_constant_mask(i64 %x) #0 {
+entry:
+  %and = and i64 %x, 2147483647
+  ret i64 %and
+; CHECK-LABEL: bzhi64_small_constant_mask:
+; CHECK: andq  $2147483647, %r[[ARG1]]
+}
+
 define i32 @blsi32(i32 %x) nounwind readnone {
   %tmp = sub i32 0, %x
   %tmp2 = and i32 %x, %tmp
diff --git a/test/CodeGen/X86/br-fold.ll b/test/CodeGen/X86/br-fold.ll
index 5223463..fd1e73b 100644
--- a/test/CodeGen/X86/br-fold.ll
+++ b/test/CodeGen/X86/br-fold.ll
@@ -1,7 +1,19 @@
-; RUN: llc -march=x86-64 < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-apple-darwin < %s | FileCheck  -check-prefix=X64_DARWIN %s
+; RUN: llc -mtriple=x86_64-pc-linux < %s | FileCheck  -check-prefix=X64_LINUX %s
+; RUN: llc -mtriple=x86_64-pc-windows < %s | FileCheck  -check-prefix=X64_WINDOWS %s
+; RUN: llc -mtriple=x86_64-pc-windows-gnu < %s | FileCheck  -check-prefix=X64_WINDOWS_GNU %s
 
-; CHECK: orq
-; CHECK-NEXT: %bb8.i329
+; X64_DARWIN: orq
+; X64_DARWIN-NEXT: %bb8.i329
+
+; X64_LINUX: orq %rax, %rcx
+; X64_LINUX-NEXT: %bb8.i329
+
+; X64_WINDOWS: orq %rax, %rcx
+; X64_WINDOWS-NEXT: ud2
+
+; X64_WINDOWS_GNU: orq %rax, %rcx
+; X64_WINDOWS_GNU-NEXT: ud2
 
 @_ZN11xercesc_2_513SchemaSymbols21fgURI_SCHEMAFORSCHEMAE = external constant [33 x i16], align 32 ; <[33 x i16]*> [#uses=1]
 @_ZN11xercesc_2_56XMLUni16fgNotationStringE = external constant [9 x i16], align 16 ; <[9 x i16]*> [#uses=1]
diff --git a/test/CodeGen/X86/bswap-vector.ll b/test/CodeGen/X86/bswap-vector.ll
index 6b77176..3c931db 100644
--- a/test/CodeGen/X86/bswap-vector.ll
+++ b/test/CodeGen/X86/bswap-vector.ll
@@ -1,19 +1,144 @@
-; RUN: llc < %s -mcpu=x86_64 | FileCheck %s
+; RUN: llc < %s -mcpu=x86-64 | FileCheck %s -check-prefix=CHECK-NOSSSE3
+; RUN: llc < %s -mcpu=core2 | FileCheck %s -check-prefix=CHECK-SSSE3
+; RUN: llc < %s -mcpu=core-avx2 | FileCheck %s -check-prefix=CHECK-AVX2
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
+declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
+declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
 declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
 
-define <2 x i64> @foo(<2 x i64> %v) #0 {
+define <8 x i16> @test1(<8 x i16> %v) #0 {
+entry:
+  %r = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %v)
+  ret <8 x i16> %r
+
+; CHECK-NOSSSE3-LABEL: @test1
+; CHECK-NOSSSE3: rolw
+; CHECK-NOSSSE3: rolw
+; CHECK-NOSSSE3: rolw
+; CHECK-NOSSSE3: rolw
+; CHECK-NOSSSE3: rolw
+; CHECK-NOSSSE3: rolw
+; CHECK-NOSSSE3: rolw
+; CHECK-NOSSSE3: rolw
+; CHECK-NOSSSE3: retq
+
+; CHECK-SSSE3-LABEL: @test1
+; CHECK-SSSE3: pshufb
+; CHECK-SSSE3-NEXT: retq
+
+; CHECK-AVX2-LABEL: @test1
+; CHECK-AVX2: vpshufb
+; CHECK-AVX2-NEXT: retq
+}
+
+define <4 x i32> @test2(<4 x i32> %v) #0 {
+entry:
+  %r = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %v)
+  ret <4 x i32> %r
+
+; CHECK-NOSSSE3-LABEL: @test2
+; CHECK-NOSSSE3: bswapl
+; CHECK-NOSSSE3: bswapl
+; CHECK-NOSSSE3: bswapl
+; CHECK-NOSSSE3: bswapl
+; CHECK-NOSSSE3: retq
+
+; CHECK-SSSE3-LABEL: @test2
+; CHECK-SSSE3: pshufb
+; CHECK-SSSE3-NEXT: retq
+
+; CHECK-AVX2-LABEL: @test2
+; CHECK-AVX2: vpshufb
+; CHECK-AVX2-NEXT: retq
+}
+
+define <2 x i64> @test3(<2 x i64> %v) #0 {
 entry:
   %r = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %v)
   ret <2 x i64> %r
+
+; CHECK-NOSSSE3-LABEL: @test3
+; CHECK-NOSSSE3: bswapq
+; CHECK-NOSSSE3: bswapq
+; CHECK-NOSSSE3: retq
+
+; CHECK-SSSE3-LABEL: @test3
+; CHECK-SSSE3: pshufb
+; CHECK-SSSE3-NEXT: retq
+
+; CHECK-AVX2-LABEL: @test3
+; CHECK-AVX2: vpshufb
+; CHECK-AVX2-NEXT: retq
+}
+
+declare <16 x i16> @llvm.bswap.v16i16(<16 x i16>)
+declare <8 x i32> @llvm.bswap.v8i32(<8 x i32>)
+declare <4 x i64> @llvm.bswap.v4i64(<4 x i64>)
+
+define <16 x i16> @test4(<16 x i16> %v) #0 {
+entry:
+  %r = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %v)
+  ret <16 x i16> %r
+
+; CHECK-SSSE3-LABEL: @test4
+; CHECK-SSSE3: pshufb
+; CHECK-SSSE3: pshufb
+; CHECK-SSSE3-NEXT: retq
+
+; CHECK-AVX2-LABEL: @test4
+; CHECK-AVX2: vpshufb
+; CHECK-AVX2-NEXT: retq
+}
+
+define <8 x i32> @test5(<8 x i32> %v) #0 {
+entry:
+  %r = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %v)
+  ret <8 x i32> %r
+
+; CHECK-SSSE3-LABEL: @test5
+; CHECK-SSSE3: pshufb
+; CHECK-SSSE3: pshufb
+; CHECK-SSSE3-NEXT: retq
+
+; CHECK-AVX2-LABEL: @test5
+; CHECK-AVX2: vpshufb
+; CHECK-AVX2-NEXT: retq
+}
+
+define <4 x i64> @test6(<4 x i64> %v) #0 {
+entry:
+  %r = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %v)
+  ret <4 x i64> %r
+
+; CHECK-SSSE3-LABEL: @test6
+; CHECK-SSSE3: pshufb
+; CHECK-SSSE3: pshufb
+; CHECK-SSSE3-NEXT: retq
+
+; CHECK-AVX2-LABEL: @test6
+; CHECK-AVX2: vpshufb
+; CHECK-AVX2-NEXT: retq
 }
 
-; CHECK-LABEL: @foo
-; CHECK: bswapq
-; CHECK: bswapq
-; CHECK: retq
+declare <4 x i16> @llvm.bswap.v4i16(<4 x i16>)
+
+define <4 x i16> @test7(<4 x i16> %v) #0 {
+entry:
+  %r = call <4 x i16> @llvm.bswap.v4i16(<4 x i16> %v)
+  ret <4 x i16> %r
+
+; CHECK-SSSE3-LABEL: @test7
+; CHECK-SSSE3: pshufb
+; CHECK-SSSE3: psrld $16
+; CHECK-SSSE3-NEXT: retq
+
+; CHECK-AVX2-LABEL: @test7
+; CHECK-AVX2: vpshufb
+; CHECK-AVX2: vpsrld $16
+; CHECK-AVX2-NEXT: retq
+}
 
 attributes #0 = { nounwind uwtable }
 
diff --git a/test/CodeGen/X86/cdecl-method-return.ll b/test/CodeGen/X86/cdecl-method-return.ll
deleted file mode 100644
index 2baa47a..0000000
--- a/test/CodeGen/X86/cdecl-method-return.ll
+++ /dev/null
@@ -1,69 +0,0 @@
-; RUN: llc < %s -mtriple=i686-pc-win32 -mcpu=core2 | FileCheck %s
-
-; The sret flag causes the first two parameters to be reordered on the stack.
-
-define x86_cdeclmethodcc void @foo(i32* sret %dst, i32* %src) {
-  %v = load i32* %src
-  store i32 %v, i32* %dst
-  ret void
-}
-
-; CHECK-LABEL: _foo:
-; CHECK:  movl    8(%esp), %[[dst:[^ ]*]]
-; CHECK:  movl    4(%esp), %[[src:[^ ]*]]
-; CHECK:  movl    (%[[src]]), %[[v:[^ ]*]]
-; CHECK:  movl    %[[v]], (%[[dst]])
-; CHECK:  retl
-
-define i32 @bar() {
-  %src = alloca i32
-  %dst = alloca i32
-  store i32 42, i32* %src
-  call x86_cdeclmethodcc void @foo(i32* sret %dst, i32* %src)
-  %v = load i32* %dst
-  ret i32 %v
-}
-
-; CHECK-LABEL: _bar:
-; CHECK:  movl    $42, [[src:[^,]*]]
-; CHECK:  leal    [[src]], %[[reg:[^ ]*]]
-; CHECK:  movl    %[[reg]], (%esp)
-; CHECK:  leal    [[dst:[^,]*]], %[[reg:[^ ]*]]
-; CHECK:  movl    %[[reg]], 4(%esp)
-; CHECK:  calll   _foo
-; CHECK:  movl    [[dst]], %eax
-; CHECK:  retl
-
-; If we don't have the sret flag, parameters are not reordered.
-
-define x86_cdeclmethodcc void @baz(i32* %dst, i32* %src) {
-  %v = load i32* %src
-  store i32 %v, i32* %dst
-  ret void
-}
-
-; CHECK-LABEL: _baz:
-; CHECK:  movl    4(%esp), %[[dst:[^ ]*]]
-; CHECK:  movl    8(%esp), %[[src:[^ ]*]]
-; CHECK:  movl    (%[[src]]), %[[v:[^ ]*]]
-; CHECK:  movl    %[[v]], (%[[dst]])
-; CHECK:  retl
-
-define i32 @qux() {
-  %src = alloca i32
-  %dst = alloca i32
-  store i32 42, i32* %src
-  call x86_cdeclmethodcc void @baz(i32* %dst, i32* %src)
-  %v = load i32* %dst
-  ret i32 %v
-}
-
-; CHECK-LABEL: _qux:
-; CHECK:  movl    $42, [[src:[^,]*]]
-; CHECK:  leal    [[src]], %[[reg:[^ ]*]]
-; CHECK:  movl    %[[reg]], 4(%esp)
-; CHECK:  leal    [[dst:[^,]*]], %[[reg:[^ ]*]]
-; CHECK:  movl    %[[reg]], (%esp)
-; CHECK:  calll   _baz
-; CHECK:  movl    [[dst]], %eax
-; CHECK:  retl
diff --git a/test/CodeGen/X86/cfi.ll b/test/CodeGen/X86/cfi.ll
new file mode 100644
index 0000000..b57ff45
--- /dev/null
+++ b/test/CodeGen/X86/cfi.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck --check-prefix=STATIC %s
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -relocation-model=pic | FileCheck --check-prefix=PIC %s
+
+; STATIC: .cfi_personality 3, __gxx_personality_v0
+; STATIC: .cfi_lsda 3, .Lexception0
+
+; PIC: .cfi_personality 155, DW.ref.__gxx_personality_v0
+; PIC: .cfi_lsda 27, .Lexception0
+
+
+define void @bar() {
+entry:
+  %call = invoke i32 @foo()
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+  ret void
+
+lpad:
+  %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+            catch i8* null
+  ret void
+}
+
+declare i32 @foo()
+
+declare i32 @__gxx_personality_v0(...)
diff --git a/test/CodeGen/X86/cmp.ll b/test/CodeGen/X86/cmp.ll
index 551d9bc..cdcdc96 100644
--- a/test/CodeGen/X86/cmp.ll
+++ b/test/CodeGen/X86/cmp.ll
@@ -26,9 +26,22 @@ cond_true:		; preds = %0
 ReturnBlock:		; preds = %0
 	ret i32 0
 ; CHECK-LABEL: test2:
-; CHECK: movl	(%rsi), %eax
-; CHECK: shll	$3, %eax
-; CHECK: testl	%eax, %eax
+; CHECK: testl	$536870911, (%rsi)
+}
+
+define i8 @test2b(i8 %X, i8* %y) nounwind {
+	%tmp = load i8* %y		; <i8> [#uses=1]
+	%tmp1 = shl i8 %tmp, 3		; <i8> [#uses=1]
+	%tmp1.upgrd.2 = icmp eq i8 %tmp1, 0		; <i1> [#uses=1]
+	br i1 %tmp1.upgrd.2, label %ReturnBlock, label %cond_true
+
+cond_true:		; preds = %0
+	ret i8 1
+
+ReturnBlock:		; preds = %0
+	ret i8 0
+; CHECK-LABEL: test2b:
+; CHECK: testb	$31, (%rsi)
 }
 
 define i64 @test3(i64 %x) nounwind {
@@ -68,8 +81,8 @@ define i32 @test5(double %A) nounwind  {
  bb12:; preds = %entry
  ret i32 32
 ; CHECK-LABEL: test5:
-; CHECK: ucomisd	LCPI4_0(%rip), %xmm0
-; CHECK: ucomisd	LCPI4_1(%rip), %xmm0
+; CHECK: ucomisd	LCPI5_0(%rip), %xmm0
+; CHECK: ucomisd	LCPI5_1(%rip), %xmm0
 }
 
 declare i32 @foo(...)
@@ -163,3 +176,25 @@ define i32 @test12() uwtable ssp {
 }
 
 declare zeroext i1 @test12b()
+
+define i32 @test13(i32 %mask, i32 %base, i32 %intra) {
+  %and = and i32 %mask, 8
+  %tobool = icmp ne i32 %and, 0
+  %cond = select i1 %tobool, i32 %intra, i32 %base
+  ret i32 %cond
+
+; CHECK-LABEL: test13:
+; CHECK: testb	$8, %dil
+; CHECK: cmovnel
+}
+
+define i32 @test14(i32 %mask, i32 %base, i32 %intra) #0 {
+  %s = lshr i32 %mask, 7
+  %tobool = icmp sgt i32 %s, -1
+  %cond = select i1 %tobool, i32 %intra, i32 %base
+  ret i32 %cond
+
+; CHECK-LABEL: test14:
+; CHECK: 	shrl	$7, %edi
+; CHECK-NEXT: 	cmovnsl	%edx, %esi
+}
diff --git a/test/CodeGen/X86/codegen-prepare-addrmode-sext.ll b/test/CodeGen/X86/codegen-prepare-addrmode-sext.ll
index e3d6b34..78e1dd2 100644
--- a/test/CodeGen/X86/codegen-prepare-addrmode-sext.ll
+++ b/test/CodeGen/X86/codegen-prepare-addrmode-sext.ll
@@ -1,4 +1,5 @@
 ; RUN: opt -S -codegenprepare %s -o - | FileCheck %s
+; RUN: opt -S -codegenprepare -addr-sink-using-gep=1 %s -o - | FileCheck -check-prefix=CHECK-GEP %s
 ; This file tests the different cases what are involved when codegen prepare
 ; tries to get sign extension out of the way of addressing mode.
 ; This tests require an actual target as addressing mode decisions depends
@@ -281,6 +282,25 @@ define i8 @twoArgsNoPromotionRemove(i1 %arg1, i8 %arg2, i8* %base) {
 ; CHECK: [[ADDR2:%[a-zA-Z_0-9-]+]] = inttoptr i64 [[BASE2]] to i32*
 ; CHECK: load i32* [[ADDR2]]
 ; CHECK: ret
+; CHECK-GEP-LABEL: @checkProfitability
+; CHECK-GEP-NOT: {{%[a-zA-Z_0-9-]+}} = sext i32 %arg1 to i64
+; CHECK-GEP-NOT: {{%[a-zA-Z_0-9-]+}} = sext i32 %arg2 to i64
+; CHECK-GEP: [[SHL:%[a-zA-Z_0-9-]+]] = shl nsw i32 %arg1, 1
+; CHECK-GEP: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i32 [[SHL]], %arg2
+; CHECK-GEP: [[SEXTADD:%[a-zA-Z_0-9-]+]] = sext i32 [[ADD]] to i64
+; BB then
+; CHECK-GEP: [[BASE1:%[a-zA-Z_0-9-]+]] = inttoptr i64 [[SEXTADD]] to i32*
+; CHECK-GEP: [[BCC1:%[a-zA-Z_0-9-]+]] = bitcast i32* [[BASE1]] to i8*
+; CHECK-GEP: [[FULL1:%[a-zA-Z_0-9-]+]] = getelementptr i8* [[BCC1]], i64 48
+; CHECK-GEP: [[ADDR1:%[a-zA-Z_0-9-]+]] = bitcast i8* [[FULL1]] to i32*
+; CHECK-GEP: load i32* [[ADDR1]]
+; BB else
+; CHECK-GEP: [[BASE2:%[a-zA-Z_0-9-]+]] = inttoptr i64 [[SEXTADD]] to i32*
+; CHECK-GEP: [[BCC2:%[a-zA-Z_0-9-]+]] = bitcast i32* [[BASE2]] to i8*
+; CHECK-GEP: [[FULL2:%[a-zA-Z_0-9-]+]] = getelementptr i8* [[BCC2]], i64 48
+; CHECK-GEP: [[ADDR2:%[a-zA-Z_0-9-]+]] = bitcast i8* [[FULL2]] to i32*
+; CHECK-GEP: load i32* [[ADDR2]]
+; CHECK-GEP: ret
 define i32 @checkProfitability(i32 %arg1, i32 %arg2, i1 %test) {
   %shl = shl nsw i32 %arg1, 1
   %add1 = add nsw i32 %shl, %arg2
diff --git a/test/CodeGen/X86/codegen-prepare-crash.ll b/test/CodeGen/X86/codegen-prepare-crash.ll
new file mode 100644
index 0000000..c328817
--- /dev/null
+++ b/test/CodeGen/X86/codegen-prepare-crash.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s
+target triple = "x86_64-unknown-linux-gnu"
+
+@g = external global [10 x i32]
+
+define void @f(i32 %u) {
+  %1 = add i32 %u, 4
+  br label %P.Proc8.exit
+
+P.Proc8.exit:
+  %valueindex35.i = getelementptr [10 x i32]* @g, i32 0, i32 %1
+  store i32 %u, i32* %valueindex35.i
+  ret void
+}
diff --git a/test/CodeGen/X86/codegen-prepare.ll b/test/CodeGen/X86/codegen-prepare.ll
index 316accf..4ff0f1c 100644
--- a/test/CodeGen/X86/codegen-prepare.ll
+++ b/test/CodeGen/X86/codegen-prepare.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-pc-linux -addr-sink-using-gep=1 | FileCheck %s
 
 ; Check that the CodeGenPrepare Pass
 ; does not wrongly rewrite the address computed by Instruction %4
diff --git a/test/CodeGen/X86/combine-avx-intrinsics.ll b/test/CodeGen/X86/combine-avx-intrinsics.ll
new file mode 100644
index 0000000..f610f7f
--- /dev/null
+++ b/test/CodeGen/X86/combine-avx-intrinsics.ll
@@ -0,0 +1,119 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7-avx | FileCheck %s
+
+
+define <4 x double> @test_x86_avx_blend_pd_256(<4 x double> %a0) {
+  %1 = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a0, i32 7)
+  ret <4 x double> %1
+}
+; CHECK-LABEL: test_x86_avx_blend_pd_256
+; CHECK-NOT: vblendpd
+; CHECK: ret
+
+
+define <8 x float> @test_x86_avx_blend_ps_256(<8 x float> %a0) {
+  %1 = call <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float> %a0, <8 x float> %a0, i32 7)
+  ret <8 x float> %1
+}
+; CHECK-LABEL: test_x86_avx_blend_ps_256
+; CHECK-NOT: vblendps
+; CHECK: ret
+
+
+define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1) {
+  %1 = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a0, <4 x double> %a1)
+  ret <4 x double> %1
+}
+; CHECK-LABEL: test_x86_avx_blendv_pd_256
+; CHECK-NOT: vblendvpd
+; CHECK: ret
+
+
+define <8 x float> @test_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1) {
+  %1 = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a0, <8 x float> %a1)
+  ret <8 x float> %1
+}
+; CHECK-LABEL: test_x86_avx_blendv_ps_256
+; CHECK-NOT: vblendvps
+; CHECK: ret
+
+
+define <4 x double> @test2_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) {
+  %1 = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i32 0)
+  ret <4 x double> %1
+}
+; CHECK-LABEL: test2_x86_avx_blend_pd_256
+; CHECK-NOT: vblendpd
+; CHECK: ret
+
+
+define <8 x float> @test2_x86_avx_blend_ps_256(<8 x float> %a0, <8 x float> %a1) {
+  %1 = call <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float> %a0, <8 x float> %a1, i32 0)
+  ret <8 x float> %1
+}
+; CHECK-LABEL: test2_x86_avx_blend_ps_256
+; CHECK-NOT: vblendps
+; CHECK: ret
+
+
+define <4 x double> @test2_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1) {
+  %1 = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> zeroinitializer)
+  ret <4 x double> %1
+}
+; CHECK-LABEL: test2_x86_avx_blendv_pd_256
+; CHECK-NOT: vblendvpd
+; CHECK: ret
+
+
+define <8 x float> @test2_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1) {
+  %1 = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> zeroinitializer)
+  ret <8 x float> %1
+}
+; CHECK-LABEL: test2_x86_avx_blendv_ps_256
+; CHECK-NOT: vblendvps
+; CHECK: ret
+
+
+define <4 x double> @test3_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) {
+  %1 = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i32 -1)
+  ret <4 x double> %1
+}
+; CHECK-LABEL: test3_x86_avx_blend_pd_256
+; CHECK-NOT: vblendpd
+; CHECK: ret
+
+
+define <8 x float> @test3_x86_avx_blend_ps_256(<8 x float> %a0, <8 x float> %a1) {
+  %1 = call <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float> %a0, <8 x float> %a1, i32 -1)
+  ret <8 x float> %1
+}
+; CHECK-LABEL: test3_x86_avx_blend_ps_256
+; CHECK-NOT: vblendps
+; CHECK: ret
+
+
+define <4 x double> @test3_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1) {
+  %Mask = bitcast <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1> to <4 x double>
+  %1 = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %Mask)
+  ret <4 x double> %1
+}
+; CHECK-LABEL: test3_x86_avx_blendv_pd_256
+; CHECK-NOT: vblendvpd
+; CHECK: ret
+
+
+define <8 x float> @test3_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1) {
+  %Mask = bitcast <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1> to <8 x float>
+  %1 = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %Mask)
+  ret <8 x float> %1
+}
+; CHECK-LABEL: test3_x86_avx_blendv_ps_256
+; CHECK-NOT: vblendvps
+; CHECK: ret
+
+
+
+declare <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double>, <4 x double>, i32)
+declare <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float>, <8 x float>, i32)
+declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>)
+declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>)
+
diff --git a/test/CodeGen/X86/combine-avx2-intrinsics.ll b/test/CodeGen/X86/combine-avx2-intrinsics.ll
new file mode 100644
index 0000000..8794f8b
--- /dev/null
+++ b/test/CodeGen/X86/combine-avx2-intrinsics.ll
@@ -0,0 +1,164 @@
+; RUN: llc < %s -march=x86-64 -mcpu=core-avx2 | FileCheck %s
+
+; Verify that the backend correctly combines AVX2 builtin intrinsics.
+
+
+define <8 x i32> @test_psra_1(<8 x i32> %A) {
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %A, i32 3)
+  %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %1, <4 x i32> <i32 3, i32 0, i32 7, i32 0>)
+  %3 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %2, i32 2)
+  ret <8 x i32> %3
+}
+; CHECK-LABEL: test_psra_1
+; CHECK: vpsrad $8, %ymm0, %ymm0
+; CHECK-NEXT: ret
+
+define <16 x i16> @test_psra_2(<16 x i16> %A) {
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %A, i32 3)
+  %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %1, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
+  %3 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %2, i32 2)
+  ret <16 x i16> %3
+}
+; CHECK-LABEL: test_psra_2
+; CHECK: vpsraw $8, %ymm0, %ymm0
+; CHECK-NEXT: ret
+
+define <16 x i16> @test_psra_3(<16 x i16> %A) {
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %A, i32 0)
+  %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
+  %3 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %2, i32 0)
+  ret <16 x i16> %3
+}
+; CHECK-LABEL: test_psra_3
+; CHECK-NOT: vpsraw
+; CHECK: ret
+
+define <8 x i32> @test_psra_4(<8 x i32> %A) {
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %A, i32 0)
+  %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>)
+  %3 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %2, i32 0)
+  ret <8 x i32> %3
+}
+; CHECK-LABEL: test_psra_4
+; CHECK-NOT: vpsrad
+; CHECK: ret
+
+
+define <32 x i8> @test_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1) {
+  %res = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a0, <32 x i8> %a1)
+  ret <32 x i8> %res
+}
+; CHECK-LABEL: test_x86_avx2_pblendvb
+; CHECK-NOT: vpblendvb
+; CHECK: ret
+
+
+define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0) {
+  %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a0, i32 7)
+  ret <16 x i16> %res
+}
+; CHECK-LABEL: test_x86_avx2_pblendw
+; CHECK-NOT: vpblendw
+; CHECK: ret
+
+
+define <4 x i32> @test_x86_avx2_pblendd_128(<4 x i32> %a0) {
+  %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a0, i32 7)
+  ret <4 x i32> %res
+}
+; CHECK-LABEL: test_x86_avx2_pblendd_128
+; CHECK-NOT: vpblendd
+; CHECK: ret
+
+
+define <8 x i32> @test_x86_avx2_pblendd_256(<8 x i32> %a0) {
+  %res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a0, i32 7)
+  ret <8 x i32> %res
+}
+; CHECK-LABEL: test_x86_avx2_pblendd_256
+; CHECK-NOT: vpblendd
+; CHECK: ret
+
+
+define <32 x i8> @test2_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1) {
+  %res = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> zeroinitializer)
+  ret <32 x i8> %res
+}
+; CHECK-LABEL: test2_x86_avx2_pblendvb
+; CHECK-NOT: vpblendvb
+; CHECK: ret
+
+
+define <16 x i16> @test2_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) {
+  %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i32 0)
+  ret <16 x i16> %res
+}
+; CHECK-LABEL: test2_x86_avx2_pblendw
+; CHECK-NOT: vpblendw
+; CHECK: ret
+
+
+define <4 x i32> @test2_x86_avx2_pblendd_128(<4 x i32> %a0, <4 x i32> %a1) {
+  %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i32 0)
+  ret <4 x i32> %res
+}
+; CHECK-LABEL: test2_x86_avx2_pblendd_128
+; CHECK-NOT: vpblendd
+; CHECK: ret
+
+
+define <8 x i32> @test2_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) {
+  %res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i32 0)
+  ret <8 x i32> %res
+}
+; CHECK-LABEL: test2_x86_avx2_pblendd_256
+; CHECK-NOT: vpblendd
+; CHECK: ret
+
+
+define <32 x i8> @test3_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1) {
+  %1 = bitcast <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1> to <32 x i8>
+  %res = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %1)
+  ret <32 x i8> %res
+}
+; CHECK-LABEL: test3_x86_avx2_pblendvb
+; CHECK-NOT: vpblendvb
+; CHECK: ret
+
+
+define <16 x i16> @test3_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) {
+  %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i32 -1)
+  ret <16 x i16> %res
+}
+; CHECK-LABEL: test3_x86_avx2_pblendw
+; CHECK-NOT: vpblendw
+; CHECK: ret
+
+
+define <4 x i32> @test3_x86_avx2_pblendd_128(<4 x i32> %a0, <4 x i32> %a1) {
+  %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i32 -1)
+  ret <4 x i32> %res
+}
+; CHECK-LABEL: test3_x86_avx2_pblendd_128
+; CHECK-NOT: vpblendd
+; CHECK: ret
+
+
+define <8 x i32> @test3_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) {
+  %res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i32 -1)
+  ret <8 x i32> %res
+}
+; CHECK-LABEL: test3_x86_avx2_pblendd_256
+; CHECK-NOT: vpblendd
+; CHECK: ret
+
+
+declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>)
+declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i32)
+declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i32)
+declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i32)
+declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>)
+declare <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16>, i32)
+declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>)
+declare <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32>, i32)
+
diff --git a/test/CodeGen/X86/combine-sse2-intrinsics.ll b/test/CodeGen/X86/combine-sse2-intrinsics.ll
new file mode 100644
index 0000000..fa500e5
--- /dev/null
+++ b/test/CodeGen/X86/combine-sse2-intrinsics.ll
@@ -0,0 +1,53 @@
+; RUN: llc < %s -march=x86 -mcpu=core2 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
+
+; Verify that the backend correctly combines SSE2 builtin intrinsics.
+
+
+define <4 x i32> @test_psra_1(<4 x i32> %A) {
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %A, i32 3)
+  %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %1, <4 x i32> <i32 3, i32 0, i32 7, i32 0>)
+  %3 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %2, i32 2)
+  ret <4 x i32> %3
+}
+; CHECK-LABEL: test_psra_1
+; CHECK: psrad $8, %xmm0
+; CHECK-NEXT: ret
+
+define <8 x i16> @test_psra_2(<8 x i16> %A) {
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %A, i32 3)
+  %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %1, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
+  %3 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %2, i32 2)
+  ret <8 x i16> %3
+}
+; CHECK-LABEL: test_psra_2
+; CHECK: psraw $8, %xmm0
+; CHECK-NEXT: ret
+
+define <4 x i32> @test_psra_3(<4 x i32> %A) {
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %A, i32 0)
+  %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>)
+  %3 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %2, i32 0)
+  ret <4 x i32> %3
+}
+; CHECK-LABEL: test_psra_3
+; CHECK-NOT: psrad
+; CHECK: ret
+
+
+define <8 x i16> @test_psra_4(<8 x i16> %A) {
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %A, i32 0)
+  %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
+  %3 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %2, i32 0)
+  ret <8 x i16> %3
+}
+; CHECK-LABEL: test_psra_4
+; CHECK-NOT: psraw
+; CHECK: ret
+
+
+declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32)
+declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32)
+
diff --git a/test/CodeGen/X86/combine-sse41-intrinsics.ll b/test/CodeGen/X86/combine-sse41-intrinsics.ll
new file mode 100644
index 0000000..254991a
--- /dev/null
+++ b/test/CodeGen/X86/combine-sse41-intrinsics.ll
@@ -0,0 +1,182 @@
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=corei7 | FileCheck %s
+
+
+define <2 x double> @test_x86_sse41_blend_pd(<2 x double> %a0, <2 x double> %a1) {
+  %1 = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i32 0)
+  ret <2 x double> %1
+}
+; CHECK-LABEL: test_x86_sse41_blend_pd
+; CHECK-NOT: blendpd
+; CHECK: ret
+
+
+define <4 x float> @test_x86_sse41_blend_ps(<4 x float> %a0, <4 x float> %a1) {
+  %1 = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i32 0)
+  ret <4 x float> %1
+}
+; CHECK-LABEL: test_x86_sse41_blend_ps
+; CHECK-NOT: blendps
+; CHECK: ret
+
+
+define <2 x double> @test_x86_sse41_blendv_pd(<2 x double> %a0, <2 x double> %a1) {
+  %1 = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> zeroinitializer)
+  ret <2 x double> %1
+}
+; CHECK-LABEL: test_x86_sse41_blendv_pd
+; CHECK-NOT: blendvpd
+; CHECK: ret
+
+
+define <4 x float> @test_x86_sse41_blendv_ps(<4 x float> %a0, <4 x float> %a1) {
+  %1 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> zeroinitializer)
+  ret <4 x float> %1
+}
+; CHECK-LABEL: test_x86_sse41_blendv_ps
+; CHECK-NOT: blendvps
+; CHECK: ret
+
+
+define <16 x i8> @test_x86_sse41_pblendv_b(<16 x i8> %a0, <16 x i8> %a1) {
+  %1 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> zeroinitializer)
+  ret <16 x i8> %1
+}
+; CHECK-LABEL: test_x86_sse41_pblendv_b
+; CHECK-NOT: pblendvb
+; CHECK: ret
+
+
+define <8 x i16> @test_x86_sse41_pblend_w(<8 x i16> %a0, <8 x i16> %a1) {
+  %1 = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i32 0)
+  ret <8 x i16> %1
+}
+; CHECK-LABEL: test_x86_sse41_pblend_w
+; CHECK-NOT: pblendw
+; CHECK: ret
+
+
+define <2 x double> @test2_x86_sse41_blend_pd(<2 x double> %a0, <2 x double> %a1) {
+  %1 = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i32 -1)
+  ret <2 x double> %1
+}
+; CHECK-LABEL: test2_x86_sse41_blend_pd
+; CHECK-NOT: blendpd
+; CHECK: movaps %xmm1, %xmm0
+; CHECK-NEXT: ret
+
+
+define <4 x float> @test2_x86_sse41_blend_ps(<4 x float> %a0, <4 x float> %a1) {
+  %1 = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i32 -1)
+  ret <4 x float> %1
+}
+; CHECK-LABEL: test2_x86_sse41_blend_ps
+; CHECK-NOT: blendps
+; CHECK: movaps %xmm1, %xmm0
+; CHECK-NEXT: ret
+
+
+define <2 x double> @test2_x86_sse41_blendv_pd(<2 x double> %a0, <2 x double> %a1) {
+  %Mask = bitcast <2 x i64> <i64 -1, i64 -1> to <2 x double>
+  %1 = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %Mask )
+  ret <2 x double> %1
+}
+; CHECK-LABEL: test2_x86_sse41_blendv_pd
+; CHECK-NOT: blendvpd
+; CHECK: movaps %xmm1, %xmm0
+; CHECK-NEXT: ret
+
+
+define <4 x float> @test2_x86_sse41_blendv_ps(<4 x float> %a0, <4 x float> %a1) {
+  %Mask = bitcast <2 x i64> <i64 -1, i64 -1> to <4 x float>
+  %1 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %Mask)
+  ret <4 x float> %1
+}
+; CHECK-LABEL: test2_x86_sse41_blendv_ps
+; CHECK-NOT: blendvps
+; CHECK: movaps %xmm1, %xmm0
+; CHECK-NEXT: ret
+
+
+define <16 x i8> @test2_x86_sse41_pblendv_b(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
+  %Mask = bitcast <2 x i64> <i64 -1, i64 -1> to <16 x i8>
+  %1 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %Mask)
+  ret <16 x i8> %1
+}
+; CHECK-LABEL: test2_x86_sse41_pblendv_b
+; CHECK-NOT: pblendvb
+; CHECK: movaps %xmm1, %xmm0
+; CHECK-NEXT: ret
+
+
+define <8 x i16> @test2_x86_sse41_pblend_w(<8 x i16> %a0, <8 x i16> %a1) {
+  %1 = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i32 -1)
+  ret <8 x i16> %1
+}
+; CHECK-LABEL: test2_x86_sse41_pblend_w
+; CHECK-NOT: pblendw
+; CHECK: movaps %xmm1, %xmm0
+; CHECK-NEXT: ret
+
+
+define <2 x double> @test3_x86_sse41_blend_pd(<2 x double> %a0) {
+  %1 = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a0, i32 7)
+  ret <2 x double> %1
+}
+; CHECK-LABEL: test3_x86_sse41_blend_pd
+; CHECK-NOT: blendpd
+; CHECK: ret
+
+
+define <4 x float> @test3_x86_sse41_blend_ps(<4 x float> %a0) {
+  %1 = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a0, i32 7)
+  ret <4 x float> %1
+}
+; CHECK-LABEL: test3_x86_sse41_blend_ps
+; CHECK-NOT: blendps
+; CHECK: ret
+
+
+define <2 x double> @test3_x86_sse41_blendv_pd(<2 x double> %a0, <2 x double> %a1) {
+  %1 = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a0, <2 x double> %a1 )
+  ret <2 x double> %1
+}
+; CHECK-LABEL: test3_x86_sse41_blendv_pd
+; CHECK-NOT: blendvpd
+; CHECK: ret
+
+
+define <4 x float> @test3_x86_sse41_blendv_ps(<4 x float> %a0, <4 x float> %a1) {
+  %1 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a0, <4 x float> %a1)
+  ret <4 x float> %1
+}
+; CHECK-LABEL: test3_x86_sse41_blendv_ps
+; CHECK-NOT: blendvps
+; CHECK: ret
+
+
+define <16 x i8> @test3_x86_sse41_pblendv_b(<16 x i8> %a0, <16 x i8> %a1) {
+  %1 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a0, <16 x i8> %a1)
+  ret <16 x i8> %1
+}
+; CHECK-LABEL: test3_x86_sse41_pblendv_b
+; CHECK-NOT: pblendvb
+; CHECK: ret
+
+
+define <8 x i16> @test3_x86_sse41_pblend_w(<8 x i16> %a0) {
+  %1 = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a0, i32 7)
+  ret <8 x i16> %1
+}
+; CHECK-LABEL: test3_x86_sse41_pblend_w
+; CHECK-NOT: pblendw
+; CHECK: ret
+
+
+declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i32)
+declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i32)
+declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>)
+declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>)
+declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32)
+declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>)
+
diff --git a/test/CodeGen/X86/constant-hoisting-shift-immediate.ll b/test/CodeGen/X86/constant-hoisting-shift-immediate.ll
new file mode 100644
index 0000000..883be35
--- /dev/null
+++ b/test/CodeGen/X86/constant-hoisting-shift-immediate.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -O3 -march=x86-64 |FileCheck %s
+define i64 @foo(i1 %z, i192* %p, i192* %q)
+{
+; If const 128 is hoisted to a variable, then in basic block L_val2 we would
+; have %lshr2 = lshr i192 %data2, %const, and the definition of %const would
+; be in another basic block. As a result, a very inefficient code might be
+; produced. Here we check that this doesn't occur.
+entry:
+  %data1 = load i192* %p, align 8
+  %lshr1 = lshr i192 %data1, 128
+  %val1  = trunc i192 %lshr1 to i64
+  br i1 %z, label %End, label %L_val2
+
+; CHECK: movq    16(%rdx), %rax
+; CHECK-NEXT: retq
+L_val2:
+  %data2 = load i192* %q, align 8
+  %lshr2 = lshr i192 %data2, 128
+  %val2  = trunc i192 %lshr2 to i64
+  br label %End
+
+End:
+  %p1 = phi i64 [%val1,%entry], [%val2,%L_val2]
+  ret i64 %p1
+}
diff --git a/test/CodeGen/X86/divide-by-constant.ll b/test/CodeGen/X86/divide-by-constant.ll
index 98ae1d5..21225e3 100644
--- a/test/CodeGen/X86/divide-by-constant.ll
+++ b/test/CodeGen/X86/divide-by-constant.ll
@@ -7,7 +7,7 @@ entry:
 	%div = udiv i16 %x, 33
 	ret i16 %div
 ; CHECK-LABEL: test1:
-; CHECK: imull	$63551, %eax, %eax
+; CHECK: imull	$63551, %eax
 ; CHECK-NEXT: shrl	$21, %eax
 ; CHECK-NEXT: ret
 }
@@ -18,7 +18,7 @@ entry:
   ret i16 %div
 
 ; CHECK-LABEL: test2:
-; CHECK: imull	$43691, %eax, %eax
+; CHECK: imull	$43691, %eax
 ; CHECK-NEXT: shrl	$17, %eax
 ; CHECK-NEXT: ret
 }
@@ -30,7 +30,7 @@ entry:
 
 ; CHECK-LABEL: test3:
 ; CHECK: movzbl  8(%esp), %eax
-; CHECK-NEXT: imull	$171, %eax, %eax
+; CHECK-NEXT: imull	$171, %eax
 ; CHECK-NEXT: shrl	$9, %eax
 ; CHECK-NEXT: ret
 }
@@ -40,7 +40,7 @@ entry:
 	%div = sdiv i16 %x, 33		; <i32> [#uses=1]
 	ret i16 %div
 ; CHECK-LABEL: test4:
-; CHECK: imull	$1986, %eax, %
+; CHECK: imull	$1986, %eax
 }
 
 define i32 @test5(i32 %A) nounwind {
diff --git a/test/CodeGen/X86/dllexport-x86_64.ll b/test/CodeGen/X86/dllexport-x86_64.ll
index a38c2d8..f4dec4f 100644
--- a/test/CodeGen/X86/dllexport-x86_64.ll
+++ b/test/CodeGen/X86/dllexport-x86_64.ll
@@ -40,18 +40,18 @@ define weak_odr dllexport void @weak1() {
 ; CHECK: .globl Var1
 @Var1 = dllexport global i32 1, align 4
 
-; CHECK: .rdata,"r"
+; CHECK: .rdata,"rd"
 ; CHECK: .globl Var2
 @Var2 = dllexport unnamed_addr constant i32 1
 
 ; CHECK: .comm Var3
 @Var3 = common dllexport global i32 0, align 4
 
-; CHECK: .section .data,"w",discard,WeakVar1
+; CHECK: .section .data,"wd",discard,WeakVar1
 ; CHECK: .globl WeakVar1
 @WeakVar1 = weak_odr dllexport global i32 1, align 4
 
-; CHECK: .section .rdata,"r",discard,WeakVar2
+; CHECK: .section .rdata,"rd",discard,WeakVar2
 ; CHECK: .globl WeakVar2
 @WeakVar2 = weak_odr dllexport unnamed_addr constant i32 1
 
@@ -66,39 +66,43 @@ define weak_odr dllexport void @weak1() {
 
 ; CHECK: .globl alias3
 ; CHECK: alias3 = notExported
-@alias3 = dllexport alias void()* @alias
+@alias3 = dllexport alias void()* @notExported
 
 ; CHECK: .weak weak_alias
 ; CHECK: weak_alias = f1
 @weak_alias = dllexport alias weak_odr void()* @f1
 
+@blob = global [6 x i8] c"\B8*\00\00\00\C3", section ".text", align 16
+@blob_alias = dllexport alias i32 (), [6 x i8]* @blob
 
 ; CHECK: .section .drectve
-; WIN32: /EXPORT:Var1,DATA
-; WIN32: /EXPORT:Var2,DATA
-; WIN32: /EXPORT:Var3,DATA
-; WIN32: /EXPORT:WeakVar1,DATA
-; WIN32: /EXPORT:WeakVar2,DATA
-; WIN32: /EXPORT:f1
-; WIN32: /EXPORT:f2
-; WIN32: /EXPORT:lnk1
-; WIN32: /EXPORT:lnk2
-; WIN32: /EXPORT:weak1
-; WIN32: /EXPORT:alias
-; WIN32: /EXPORT:alias2
-; WIN32: /EXPORT:alias3
-; WIN32: /EXPORT:weak_alias
-; MINGW: -export:Var1,data
-; MINGW: -export:Var2,data
-; MINGW: -export:Var3,data
-; MINGW: -export:WeakVar1,data
-; MINGW: -export:WeakVar2,data
-; MINGW: -export:f1
-; MINGW: -export:f2
-; MINGW: -export:lnk1
-; MINGW: -export:lnk2
-; MINGW: -export:weak1
-; MINGW: -export:alias
-; MINGW: -export:alias2
-; MINGW: -export:alias3
-; MINGW: -export:weak_alias
+; WIN32: " /EXPORT:Var1,DATA"
+; WIN32: " /EXPORT:Var2,DATA"
+; WIN32: " /EXPORT:Var3,DATA"
+; WIN32: " /EXPORT:WeakVar1,DATA"
+; WIN32: " /EXPORT:WeakVar2,DATA"
+; WIN32: " /EXPORT:f1"
+; WIN32: " /EXPORT:f2"
+; WIN32: " /EXPORT:lnk1"
+; WIN32: " /EXPORT:lnk2"
+; WIN32: " /EXPORT:weak1"
+; WIN32: " /EXPORT:alias"
+; WIN32: " /EXPORT:alias2"
+; WIN32: " /EXPORT:alias3"
+; WIN32: " /EXPORT:weak_alias"
+; WIN32: " /EXPORT:blob_alias"
+; MINGW: " -export:Var1,data"
+; MINGW: " -export:Var2,data"
+; MINGW: " -export:Var3,data"
+; MINGW: " -export:WeakVar1,data"
+; MINGW: " -export:WeakVar2,data"
+; MINGW: " -export:f1"
+; MINGW: " -export:f2"
+; MINGW: " -export:lnk1"
+; MINGW: " -export:lnk2"
+; MINGW: " -export:weak1"
+; MINGW: " -export:alias"
+; MINGW: " -export:alias2"
+; MINGW: " -export:alias3"
+; MINGW: " -export:weak_alias"
+; MINGW: " -export:blob_alias"
diff --git a/test/CodeGen/X86/dllexport.ll b/test/CodeGen/X86/dllexport.ll
index 1b34d23..e2c3f13 100644
--- a/test/CodeGen/X86/dllexport.ll
+++ b/test/CodeGen/X86/dllexport.ll
@@ -1,5 +1,9 @@
-; RUN: llc -mtriple i386-pc-win32 < %s | FileCheck -check-prefix=CHECK -check-prefix=WIN32 %s
-; RUN: llc -mtriple i386-pc-mingw32 < %s | FileCheck -check-prefix=CHECK -check-prefix=MINGW %s
+; RUN: llc -mtriple i386-pc-win32 < %s \
+; RUN:    | FileCheck -check-prefix CHECK -check-prefix CHECK-CL %s
+; RUN: llc -mtriple i386-pc-mingw32 < %s \
+; RUN:    | FileCheck -check-prefix CHECK -check-prefix CHECK-GCC %s
+; RUN: llc -mtriple i686-pc-cygwin %s -o - \
+; RUN:    | FileCheck -check-prefix CHECK -check-prefix CHECK-GCC %s
 
 ; CHECK: .text
 
@@ -55,18 +59,18 @@ define weak_odr dllexport void @weak1() {
 ; CHECK: .globl _Var1
 @Var1 = dllexport global i32 1, align 4
 
-; CHECK: .rdata,"r"
+; CHECK: .rdata,"rd"
 ; CHECK: .globl _Var2
 @Var2 = dllexport unnamed_addr constant i32 1
 
 ; CHECK: .comm _Var3
 @Var3 = common dllexport global i32 0, align 4
 
-; CHECK: .section .data,"w",discard,_WeakVar1
+; CHECK: .section .data,"wd",discard,_WeakVar1
 ; CHECK: .globl _WeakVar1
 @WeakVar1 = weak_odr dllexport global i32 1, align 4
 
-; CHECK: .section .rdata,"r",discard,_WeakVar2
+; CHECK: .section .rdata,"rd",discard,_WeakVar2
 ; CHECK: .globl _WeakVar2
 @WeakVar2 = weak_odr dllexport unnamed_addr constant i32 1
 
@@ -81,7 +85,7 @@ define weak_odr dllexport void @weak1() {
 
 ; CHECK: .globl _alias3
 ; CHECK: _alias3 = _notExported
-@alias3 = dllexport alias void()* @alias
+@alias3 = dllexport alias void()* @notExported
 
 ; CHECK: .weak _weak_alias
 ; CHECK: _weak_alias = _f1
@@ -89,37 +93,38 @@ define weak_odr dllexport void @weak1() {
 
 
 ; CHECK: .section .drectve
-; WIN32: /EXPORT:_Var1,DATA
-; WIN32: /EXPORT:_Var2,DATA
-; WIN32: /EXPORT:_Var3,DATA
-; WIN32: /EXPORT:_WeakVar1,DATA
-; WIN32: /EXPORT:_WeakVar2,DATA
-; WIN32: /EXPORT:_f1
-; WIN32: /EXPORT:_f2
-; WIN32: /EXPORT:_stdfun@0
-; WIN32: /EXPORT:@fastfun@0
-; WIN32: /EXPORT:_thisfun
-; WIN32: /EXPORT:_lnk1
-; WIN32: /EXPORT:_lnk2
-; WIN32: /EXPORT:_weak1
-; WIN32: /EXPORT:_alias
-; WIN32: /EXPORT:_alias2
-; WIN32: /EXPORT:_alias3
-; WIN32: /EXPORT:_weak_alias
-; MINGW: -export:_Var1,data
-; MINGW: -export:_Var2,data
-; MINGW: -export:_Var3,data
-; MINGW: -export:_WeakVar1,data
-; MINGW: -export:_WeakVar2,data
-; MINGW: -export:_f1
-; MINGW: -export:_f2
-; MINGW: -export:_stdfun@0
-; MINGW: -export:@fastfun@0
-; MINGW: -export:_thisfun
-; MINGW: -export:_lnk1
-; MINGW: -export:_lnk2
-; MINGW: -export:_weak1
-; MINGW: -export:_alias
-; MINGW: -export:_alias2
-; MINGW: -export:_alias3
-; MINGW: -export:_weak_alias
+; CHECK-CL: " /EXPORT:_Var1,DATA"
+; CHECK-CL: " /EXPORT:_Var2,DATA"
+; CHECK-CL: " /EXPORT:_Var3,DATA"
+; CHECK-CL: " /EXPORT:_WeakVar1,DATA"
+; CHECK-CL: " /EXPORT:_WeakVar2,DATA"
+; CHECK-CL: " /EXPORT:_f1"
+; CHECK-CL: " /EXPORT:_f2"
+; CHECK-CL: " /EXPORT:_stdfun@0"
+; CHECK-CL: " /EXPORT:@fastfun@0"
+; CHECK-CL: " /EXPORT:_thisfun"
+; CHECK-CL: " /EXPORT:_lnk1"
+; CHECK-CL: " /EXPORT:_lnk2"
+; CHECK-CL: " /EXPORT:_weak1"
+; CHECK-CL: " /EXPORT:_alias"
+; CHECK-CL: " /EXPORT:_alias2"
+; CHECK-CL: " /EXPORT:_alias3"
+; CHECK-CL: " /EXPORT:_weak_alias"
+; CHECK-GCC: " -export:Var1,data"
+; CHECK-GCC: " -export:Var2,data"
+; CHECK-GCC: " -export:Var3,data"
+; CHECK-GCC: " -export:WeakVar1,data"
+; CHECK-GCC: " -export:WeakVar2,data"
+; CHECK-GCC: " -export:f1"
+; CHECK-GCC: " -export:f2"
+; CHECK-GCC: " -export:stdfun@0"
+; CHECK-GCC: " -export:@fastfun@0"
+; CHECK-GCC: " -export:thisfun"
+; CHECK-GCC: " -export:lnk1"
+; CHECK-GCC: " -export:lnk2"
+; CHECK-GCC: " -export:weak1"
+; CHECK-GCC: " -export:alias"
+; CHECK-GCC: " -export:alias2"
+; CHECK-GCC: " -export:alias3"
+; CHECK-GCC: " -export:weak_alias"
+
diff --git a/test/CodeGen/X86/expand-opaque-const.ll b/test/CodeGen/X86/expand-opaque-const.ll
new file mode 100644
index 0000000..6e461cf
--- /dev/null
+++ b/test/CodeGen/X86/expand-opaque-const.ll
@@ -0,0 +1,21 @@
+; RUN: llc -mcpu=generic -O1 -relocation-model=pic < %s | FileCheck %s
+target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
+target triple = "i686-apple-darwin"
+
+define i64 @test_lshr() {
+entry:
+; CHECK-NOT: movl $-1, 16(%esp)
+; CHECK-NOT: movl  $-1, %eax
+  %retval = alloca i64
+  %op1 = alloca i64
+  %op2 = alloca i64
+  store i64 -6687208052682386272, i64* %op1
+  store i64 7106745059734980448, i64* %op2
+  %tmp1 = load i64* %op1
+  %tmp2 = load i64* %op2
+  %tmp = xor i64 %tmp2, 7106745059734980448
+  %tmp3 = lshr i64 %tmp1, %tmp
+  store i64 %tmp3, i64* %retval
+  %tmp4 = load i64* %retval
+  ret i64 %tmp4
+}
diff --git a/test/CodeGen/X86/f16c-intrinsics.ll b/test/CodeGen/X86/f16c-intrinsics.ll
index 2135f94..514d929 100644
--- a/test/CodeGen/X86/f16c-intrinsics.ll
+++ b/test/CodeGen/X86/f16c-intrinsics.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -march=x86 -mattr=+avx,+f16c | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=+avx,+f16c | FileCheck %s
 
 define <4 x float> @test_x86_vcvtph2ps_128(<8 x i16> %a0) {
   ; CHECK: vcvtph2ps
@@ -30,3 +31,16 @@ define <8 x i16> @test_x86_vcvtps2ph_256(<8 x float> %a0) {
   ret <8 x i16> %res
 }
 declare <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float>, i32) nounwind readonly
+
+define <4 x float> @test_x86_vcvtps2ph_128_scalar(i64* %ptr) {
+; CHECK-LABEL: test_x86_vcvtps2ph_128_scalar
+; CHECK-NOT: vmov
+; CHECK: vcvtph2ps (%
+
+  %load = load i64* %ptr
+  %ins1 = insertelement <2 x i64> undef, i64 %load, i32 0
+  %ins2 = insertelement <2 x i64> %ins1, i64 0, i32 1
+  %bc = bitcast <2 x i64> %ins2 to <8 x i16>
+  %res = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %bc) #2
+  ret <4 x float> %res
+}
diff --git a/test/CodeGen/X86/fma-do-not-commute.ll b/test/CodeGen/X86/fma-do-not-commute.ll
new file mode 100644
index 0000000..4e21172
--- /dev/null
+++ b/test/CodeGen/X86/fma-do-not-commute.ll
@@ -0,0 +1,30 @@
+; RUN: llc -fp-contract=fast -mattr=+fma -disable-cgp < %s -o - | FileCheck %s
+; Check that the 2nd and 3rd arguments of fmaXXX231 reg1, reg2, mem3 are not commuted.
+; <rdar://problem/16800495> 
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx"
+
+; CHECK-LABEL: test1:
+; %arg lives in xmm0 and it shouldn't be redefined until it is used in the FMA.
+; CHECK-NOT {{.*}}, %xmm0
+; %addr lives in rdi.
+; %addr2 lives in rsi.
+; CHECK: vmovss (%rsi), [[ADDR2:%xmm[0-9]+]]
+; The assembly syntax is in the reverse order.
+; CHECK: vfmadd231ss (%rdi), [[ADDR2]], %xmm0
+define void @test1(float* %addr, float* %addr2, float %arg) {
+entry:
+  br label %loop
+
+loop:
+  %sum0 = phi float [ %fma, %loop ], [ %arg, %entry ]
+  %addrVal = load float* %addr, align 4
+  %addr2Val = load float* %addr2, align 4
+  %fmul = fmul float %addrVal, %addr2Val
+  %fma = fadd float %sum0, %fmul
+  br i1 true, label %exit, label %loop
+
+exit:
+  store float %fma, float* %addr, align 4
+  ret void
+}
diff --git a/test/CodeGen/X86/fold-load-vec.ll b/test/CodeGen/X86/fold-load-vec.ll
index e85d8f7..96c5be4 100644
--- a/test/CodeGen/X86/fold-load-vec.ll
+++ b/test/CodeGen/X86/fold-load-vec.ll
@@ -5,7 +5,7 @@
 ; loads from m32.
 define void @sample_test(<4 x float>* %source, <2 x float>* %dest) nounwind {
 ; CHECK: sample_test
-; CHECK: movaps
+; CHECK-NOT: movaps
 ; CHECK: insertps
 entry:
   %source.addr = alloca <4 x float>*, align 8
diff --git a/test/CodeGen/X86/gcc_except_table.ll b/test/CodeGen/X86/gcc_except_table.ll
index 7a29b07..8c328ec 100644
--- a/test/CodeGen/X86/gcc_except_table.ll
+++ b/test/CodeGen/X86/gcc_except_table.ll
@@ -50,7 +50,3 @@ eh.resume:
 declare void @_Z1fv() optsize
 
 declare i32 @__gxx_personality_v0(...)
-
-; CHECK: Leh_func_end0:
-; CHECK: GCC_except_table0
-; CHECK: = Leh_func_end0-
diff --git a/test/CodeGen/X86/global-sections.ll b/test/CodeGen/X86/global-sections.ll
index 5ad5047..c763f39 100644
--- a/test/CodeGen/X86/global-sections.ll
+++ b/test/CodeGen/X86/global-sections.ll
@@ -2,8 +2,8 @@
 ; RUN: llc < %s -mtriple=i386-apple-darwin9.7 | FileCheck %s -check-prefix=DARWIN
 ; RUN: llc < %s -mtriple=i386-apple-darwin10 -relocation-model=static | FileCheck %s -check-prefix=DARWIN-STATIC
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s -check-prefix=DARWIN64
-; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -fdata-sections | FileCheck %s -check-prefix=LINUX-SECTIONS
-; RUN: llc < %s -mtriple=i686-pc-win32 -fdata-sections -ffunction-sections | FileCheck %s -check-prefix=WIN32-SECTIONS
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -data-sections | FileCheck %s -check-prefix=LINUX-SECTIONS
+; RUN: llc < %s -mtriple=i686-pc-win32 -data-sections -function-sections | FileCheck %s -check-prefix=WIN32-SECTIONS
 
 define void @F1() {
   ret void
@@ -18,13 +18,13 @@ define void @F1() {
 ; LINUX: .type   G1,@object
 ; LINUX: .comm  G1,4,4
 
-; DARWIN: .comm	_G1,4,2
+; DARWIN: .comm _G1,4,2
 
 
 
 
 ; const int G2 __attribute__((weak)) = 42;
-@G2 = weak_odr unnamed_addr constant i32 42	
+@G2 = weak_odr unnamed_addr constant i32 42     
 
 
 ; TODO: linux drops this into .rodata, we drop it into ".gnu.linkonce.r.G2"
@@ -48,7 +48,7 @@ define void @F1() {
 ; LINUX-SECTIONS: .section        .rodata.G3,"a",@progbits
 ; LINUX-SECTIONS: .globl  G3
 
-; WIN32-SECTIONS: .section        .rdata,"r",one_only,_G3
+; WIN32-SECTIONS: .section        .rdata,"rd",one_only,_G3
 ; WIN32-SECTIONS: .globl  _G3
 
 
@@ -85,25 +85,25 @@ define void @F1() {
 ; PR4584
 @"foo bar" = linkonce global i32 42
 
-; LINUX: .type	"foo bar",@object
+; LINUX: .type  "foo bar",@object
 ; LINUX: .section ".data.foo bar","aGw",@progbits,"foo bar",comdat
-; LINUX: .weak	"foo bar"
+; LINUX: .weak  "foo bar"
 ; LINUX: "foo bar":
 
-; DARWIN: .section		__DATA,__datacoal_nt,coalesced
-; DARWIN: .globl	"_foo bar"
-; DARWIN:	.weak_definition "_foo bar"
+; DARWIN: .section              __DATA,__datacoal_nt,coalesced
+; DARWIN: .globl        "_foo bar"
+; DARWIN:       .weak_definition "_foo bar"
 ; DARWIN: "_foo bar":
 
 ; PR4650
 @G6 = weak_odr unnamed_addr constant [1 x i8] c"\01"
 
-; LINUX:   .type	G6,@object
-; LINUX:   .section	.rodata.G6,"aG",@progbits,G6,comdat
-; LINUX:   .weak	G6
+; LINUX:   .type        G6,@object
+; LINUX:   .section     .rodata.G6,"aG",@progbits,G6,comdat
+; LINUX:   .weak        G6
 ; LINUX: G6:
-; LINUX:   .byte	1
-; LINUX:   .size	G6, 1
+; LINUX:   .byte        1
+; LINUX:   .size        G6, 1
 
 ; DARWIN:  .section __TEXT,__const_coal,coalesced
 ; DARWIN:  .globl _G6
@@ -114,58 +114,58 @@ define void @F1() {
 
 @G7 = unnamed_addr constant [10 x i8] c"abcdefghi\00"
 
-; DARWIN:	__TEXT,__cstring,cstring_literals
-; DARWIN:	.globl _G7
+; DARWIN:       __TEXT,__cstring,cstring_literals
+; DARWIN:       .globl _G7
 ; DARWIN: _G7:
-; DARWIN:	.asciz	"abcdefghi"
+; DARWIN:       .asciz  "abcdefghi"
 
-; LINUX:	.section	.rodata.str1.1,"aMS",@progbits,1
-; LINUX:	.globl G7
+; LINUX:        .section        .rodata.str1.1,"aMS",@progbits,1
+; LINUX:        .globl G7
 ; LINUX: G7:
-; LINUX:	.asciz	"abcdefghi"
+; LINUX:        .asciz  "abcdefghi"
 
 ; LINUX-SECTIONS: .section        .rodata.G7,"aMS",@progbits,1
-; LINUX-SECTIONS:	.globl G7
+; LINUX-SECTIONS:       .globl G7
 
-; WIN32-SECTIONS: .section        .rdata,"r",one_only,_G7
-; WIN32-SECTIONS:	.globl _G7
+; WIN32-SECTIONS: .section        .rdata,"rd",one_only,_G7
+; WIN32-SECTIONS:       .globl _G7
 
 
 @G8 = unnamed_addr constant [4 x i16] [ i16 1, i16 2, i16 3, i16 0 ]
 
-; DARWIN:	.section	__TEXT,__const
-; DARWIN:	.globl _G8
+; DARWIN:       .section        __TEXT,__const
+; DARWIN:       .globl _G8
 ; DARWIN: _G8:
 
-; LINUX:	.section	.rodata.str2.2,"aMS",@progbits,2
-; LINUX:	.globl G8
+; LINUX:        .section        .rodata.str2.2,"aMS",@progbits,2
+; LINUX:        .globl G8
 ; LINUX:G8:
 
 @G9 = unnamed_addr constant [4 x i32] [ i32 1, i32 2, i32 3, i32 0 ]
 
-; DARWIN:	.globl _G9
+; DARWIN:       .globl _G9
 ; DARWIN: _G9:
 
-; LINUX:	.section	.rodata.str4.4,"aMS",@progbits,4
-; LINUX:	.globl G9
+; LINUX:        .section        .rodata.str4.4,"aMS",@progbits,4
+; LINUX:        .globl G9
 ; LINUX:G9
 
 
 @G10 = weak global [100 x i32] zeroinitializer, align 32 ; <[100 x i32]*> [#uses=0]
 
 
-; DARWIN: 	.section	__DATA,__datacoal_nt,coalesced
+; DARWIN:       .section        __DATA,__datacoal_nt,coalesced
 ; DARWIN: .globl _G10
-; DARWIN:	.weak_definition _G10
-; DARWIN:	.align	5
+; DARWIN:       .weak_definition _G10
+; DARWIN:       .align  5
 ; DARWIN: _G10:
-; DARWIN:	.space	400
+; DARWIN:       .space  400
 
-; LINUX:	.bss
-; LINUX:	.weak	G10
-; LINUX:	.align	32
+; LINUX:        .bss
+; LINUX:        .weak   G10
+; LINUX:        .align  32
 ; LINUX: G10:
-; LINUX:	.zero	400
+; LINUX:        .zero   400
 
 
 
@@ -190,7 +190,7 @@ define void @F1() {
 ; LINUX-SECTIONS:        .asciz  "foo"
 ; LINUX-SECTIONS:        .size   .LG14, 4
 
-; WIN32-SECTIONS:        .section        .rdata,"r"
+; WIN32-SECTIONS:        .section        .rdata,"rd"
 ; WIN32-SECTIONS: L_G14:
 ; WIN32-SECTIONS:        .asciz  "foo"
 
diff --git a/test/CodeGen/X86/indirect-hidden.ll b/test/CodeGen/X86/indirect-hidden.ll
new file mode 100644
index 0000000..309375d
--- /dev/null
+++ b/test/CodeGen/X86/indirect-hidden.ll
@@ -0,0 +1,43 @@
+; RUN: llc -mtriple=i686-apple-macosx -o - %s | FileCheck %s
+
+; x86 doesn't normally use indirect symbols, particularly hidden ones, but it
+; can be tricked into it for exception-handling typeids.
+
+@hidden_typeid = external hidden constant i8*
+@normal_typeid = external constant i8*
+
+declare void @throws()
+
+define void @get_indirect_hidden() {
+  invoke void @throws() to label %end unwind label %lpad
+lpad:
+  %tmp = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* bitcast (i8** @hidden_typeid to i8*)
+  br label %end
+
+end:
+  ret void
+}
+
+define void @get_indirect() {
+  invoke void @throws() to label %end unwind label %lpad
+lpad:
+  %tmp = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* bitcast (i8** @normal_typeid to i8*)
+  br label %end
+
+end:
+  ret void
+}
+
+declare i32 @__gxx_personality_v0(...)
+
+; CHECK: .section __IMPORT,__pointers,non_lazy_symbol_pointers
+
+; CHECK-NOT: __DATA,__data
+; CHECK: .indirect_symbol _normal_typeid
+; CHECK-NEXT: .long 0
+
+; CHECK-NOT: __DATA,__data
+; CHECK: .indirect_symbol _hidden_typeid
+; CHECK-NEXT: .long 0
diff --git a/test/CodeGen/X86/isel-sink.ll b/test/CodeGen/X86/isel-sink.ll
index 458f19d..e4af9b6 100644
--- a/test/CodeGen/X86/isel-sink.ll
+++ b/test/CodeGen/X86/isel-sink.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -march=x86 | FileCheck %s
+; RUN: llc < %s -march=x86 -addr-sink-using-gep=1 | FileCheck %s
 
 define i32 @test(i32* %X, i32 %B) {
 ; CHECK-LABEL: test:
diff --git a/test/CodeGen/X86/lit.local.cfg b/test/CodeGen/X86/lit.local.cfg
index 1637fa4..3d91b03 100644
--- a/test/CodeGen/X86/lit.local.cfg
+++ b/test/CodeGen/X86/lit.local.cfg
@@ -4,7 +4,7 @@
 #
 # It should be possible to remove this override once all the bots have cycled
 # cleanly.
-config.suffixes = ['.ll', '.c', '.cpp', '.test', '.txt']
+config.suffixes = ['.ll', '.test', '.txt']
 
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
diff --git a/test/CodeGen/X86/live-out-reg-info.ll b/test/CodeGen/X86/live-out-reg-info.ll
index 8cd9774..283ee3a 100644
--- a/test/CodeGen/X86/live-out-reg-info.ll
+++ b/test/CodeGen/X86/live-out-reg-info.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=x86-64 | grep testb
 
 ; Make sure dagcombine doesn't eliminate the comparison due
-; to an off-by-one bug with ComputeMaskedBits information.
+; to an off-by-one bug with computeKnownBits information.
 
 declare void @qux()
 
diff --git a/test/CodeGen/X86/lower-bitcast.ll b/test/CodeGen/X86/lower-bitcast.ll
new file mode 100644
index 0000000..b9b29a5
--- /dev/null
+++ b/test/CodeGen/X86/lower-bitcast.ll
@@ -0,0 +1,155 @@
+; RUN: llc < %s -march=x86-64 -mcpu=core2 -mattr=+sse2 | FileCheck %s
+
+
+define double @test1(double %A) {
+  %1 = bitcast double %A to <2 x i32>
+  %add = add <2 x i32> %1, <i32 3, i32 5>
+  %2 = bitcast <2 x i32> %add to double
+  ret double %2
+}
+; FIXME: Ideally we should be able to fold the entire body of @test1 into a
+; single paddd instruction. At the moment we produce the sequence 
+; pshufd+paddq+pshufd.
+
+; CHECK-LABEL: test1
+; CHECK-NOT: movsd
+; CHECK: pshufd
+; CHECK-NEXT: paddq
+; CHECK-NEXT: pshufd
+; CHECK-NEXT: ret
+
+
+define double @test2(double %A, double %B) {
+  %1 = bitcast double %A to <2 x i32>
+  %2 = bitcast double %B to <2 x i32>
+  %add = add <2 x i32> %1, %2
+  %3 = bitcast <2 x i32> %add to double
+  ret double %3
+}
+; FIXME: Ideally we should be able to fold the entire body of @test2 into a
+; single 'paddd %xmm1, %xmm0' instruction. At the moment we produce the
+; sequence pshufd+pshufd+paddq+pshufd.
+
+; CHECK-LABEL: test2
+; CHECK-NOT: movsd
+; CHECK: pshufd
+; CHECK-NEXT: pshufd
+; CHECK-NEXT: paddq
+; CHECK-NEXT: pshufd
+; CHECK-NEXT: ret
+
+
+define i64 @test3(i64 %A) {
+  %1 = bitcast i64 %A to <2 x float>
+  %add = fadd <2 x float> %1, <float 3.0, float 5.0>
+  %2 = bitcast <2 x float> %add to i64
+  ret i64 %2
+}
+; CHECK-LABEL: test3
+; CHECK-NOT: pshufd
+; CHECK: addps
+; CHECK-NOT: pshufd
+; CHECK: ret
+
+
+define i64 @test4(i64 %A) {
+  %1 = bitcast i64 %A to <2 x i32>
+  %add = add <2 x i32> %1, <i32 3, i32 5>
+  %2 = bitcast <2 x i32> %add to i64
+  ret i64 %2
+}
+; FIXME: At the moment we still produce the sequence pshufd+paddq+pshufd.
+; Ideally, we should fold that sequence into a single paddd.
+
+; CHECK-LABEL: test4
+; CHECK: pshufd
+; CHECK-NEXT: paddq
+; CHECK-NEXT: pshufd
+; CHECK: ret
+
+
+define double @test5(double %A) {
+  %1 = bitcast double %A to <2 x float>
+  %add = fadd <2 x float> %1, <float 3.0, float 5.0>
+  %2 = bitcast <2 x float> %add to double
+  ret double %2
+}
+; CHECK-LABEL: test5
+; CHECK: addps
+; CHECK-NEXT: ret
+
+
+define double @test6(double %A) {
+  %1 = bitcast double %A to <4 x i16>
+  %add = add <4 x i16> %1, <i16 3, i16 4, i16 5, i16 6>
+  %2 = bitcast <4 x i16> %add to double
+  ret double %2
+}
+; FIXME: Ideally we should be able to fold the entire body of @test6 into a
+; single paddw instruction.
+
+; CHECK-LABEL: test6
+; CHECK-NOT: movsd
+; CHECK: punpcklwd
+; CHECK-NEXT: paddd
+; CHECK-NEXT: pshufb
+; CHECK-NEXT: ret
+
+
+define double @test7(double %A, double %B) {
+  %1 = bitcast double %A to <4 x i16>
+  %2 = bitcast double %B to <4 x i16>
+  %add = add <4 x i16> %1, %2
+  %3 = bitcast <4 x i16> %add to double
+  ret double %3
+}
+; FIXME: Ideally we should be able to fold the entire body of @test7 into a
+; single 'paddw %xmm1, %xmm0' instruction. At the moment we produce the
+; sequence pshufd+pshufd+paddd+pshufd.
+
+; CHECK-LABEL: test7
+; CHECK-NOT: movsd
+; CHECK: punpcklwd
+; CHECK-NEXT: punpcklwd
+; CHECK-NEXT: paddd
+; CHECK-NEXT: pshufb
+; CHECK-NEXT: ret
+
+
+define double @test8(double %A) {
+  %1 = bitcast double %A to <8 x i8>
+  %add = add <8 x i8> %1, <i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10>
+  %2 = bitcast <8 x i8> %add to double
+  ret double %2
+}
+; FIXME: Ideally we should be able to fold the entire body of @test8 into a
+; single paddb instruction. At the moment we produce the sequence 
+; pshufd+paddw+pshufd.
+
+; CHECK-LABEL: test8
+; CHECK-NOT: movsd
+; CHECK: punpcklbw
+; CHECK-NEXT: paddw
+; CHECK-NEXT: pshufb
+; CHECK-NEXT: ret
+
+
+define double @test9(double %A, double %B) {
+  %1 = bitcast double %A to <8 x i8>
+  %2 = bitcast double %B to <8 x i8>
+  %add = add <8 x i8> %1, %2
+  %3 = bitcast <8 x i8> %add to double
+  ret double %3
+}
+; FIXME: Ideally we should be able to fold the entire body of @test9 into a
+; single 'paddb %xmm1, %xmm0' instruction. At the moment we produce the
+; sequence pshufd+pshufd+paddw+pshufd.
+
+; CHECK-LABEL: test9
+; CHECK-NOT: movsd
+; CHECK: punpcklbw
+; CHECK-NEXT: punpcklbw
+; CHECK-NEXT: paddw
+; CHECK-NEXT: pshufb
+; CHECK-NEXT: ret
+
diff --git a/test/CodeGen/X86/lower-vec-shift.ll b/test/CodeGen/X86/lower-vec-shift.ll
new file mode 100644
index 0000000..c28f82a
--- /dev/null
+++ b/test/CodeGen/X86/lower-vec-shift.ll
@@ -0,0 +1,125 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2
+
+
+; Verify that the following shifts are lowered into a sequence of two shifts plus
+; a blend. On pre-avx2 targets, instead of scalarizing logical and arithmetic
+; packed shift right by a constant build_vector the backend should always try to
+; emit a simpler sequence of two shifts + blend when possible.
+
+define <8 x i16> @test1(<8 x i16> %a) {
+  %lshr = lshr <8 x i16> %a, <i16 3, i16 3, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+  ret <8 x i16> %lshr
+}
+; CHECK-LABEL: test1
+; SSE: psrlw
+; SSE-NEXT: psrlw
+; SSE-NEXT: movss
+; AVX: vpsrlw
+; AVX-NEXT: vpsrlw
+; AVX-NEXT: vmovss
+; AVX2: vpsrlw
+; AVX2-NEXT: vpsrlw
+; AVX2-NEXT: vmovss
+; CHECK: ret
+
+
+define <8 x i16> @test2(<8 x i16> %a) {
+  %lshr = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 2, i16 2, i16 2, i16 2>
+  ret <8 x i16> %lshr
+}
+; CHECK-LABEL: test2
+; SSE: psrlw
+; SSE-NEXT: psrlw
+; SSE-NEXT: movsd
+; AVX: vpsrlw
+; AVX-NEXT: vpsrlw
+; AVX-NEXT: vmovsd
+; AVX2: vpsrlw
+; AVX2-NEXT: vpsrlw
+; AVX2-NEXT: vmovsd
+; CHECK: ret
+
+
+define <4 x i32> @test3(<4 x i32> %a) {
+  %lshr = lshr <4 x i32> %a, <i32 3, i32 2, i32 2, i32 2>
+  ret <4 x i32> %lshr
+}
+; CHECK-LABEL: test3
+; SSE: psrld
+; SSE-NEXT: psrld
+; SSE-NEXT: movss
+; AVX: vpsrld
+; AVX-NEXT: vpsrld
+; AVX-NEXT: vmovss
+; AVX2: vpsrlvd
+; CHECK: ret
+
+
+define <4 x i32> @test4(<4 x i32> %a) {
+  %lshr = lshr <4 x i32> %a, <i32 3, i32 3, i32 2, i32 2>
+  ret <4 x i32> %lshr
+}
+; CHECK-LABEL: test4
+; SSE: psrld
+; SSE-NEXT: psrld
+; SSE-NEXT: movsd
+; AVX: vpsrld
+; AVX-NEXT: vpsrld
+; AVX-NEXT: vmovsd
+; AVX2: vpsrlvd
+; CHECK: ret
+
+
+define <8 x i16> @test5(<8 x i16> %a) {
+  %lshr = ashr <8 x i16> %a, <i16 3, i16 3, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+  ret <8 x i16> %lshr
+}
+
+define <8 x i16> @test6(<8 x i16> %a) {
+  %lshr = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 2, i16 2, i16 2, i16 2>
+  ret <8 x i16> %lshr
+}
+; CHECK-LABEL: test6
+; SSE: psraw
+; SSE-NEXT: psraw
+; SSE-NEXT: movsd
+; AVX: vpsraw
+; AVX-NEXT: vpsraw
+; AVX-NEXT: vmovsd
+; AVX2: vpsraw
+; AVX2-NEXT: vpsraw
+; AVX2-NEXT: vmovsd
+; CHECK: ret
+
+
+define <4 x i32> @test7(<4 x i32> %a) {
+  %lshr = ashr <4 x i32> %a, <i32 3, i32 2, i32 2, i32 2>
+  ret <4 x i32> %lshr
+}
+; CHECK-LABEL: test7
+; SSE: psrad
+; SSE-NEXT: psrad
+; SSE-NEXT: movss
+; AVX: vpsrad
+; AVX-NEXT: vpsrad
+; AVX-NEXT: vmovss
+; AVX2: vpsravd
+; CHECK: ret
+
+
+define <4 x i32> @test8(<4 x i32> %a) {
+  %lshr = ashr <4 x i32> %a, <i32 3, i32 3, i32 2, i32 2>
+  ret <4 x i32> %lshr
+}
+; CHECK-LABEL: test8
+; SSE: psrad
+; SSE-NEXT: psrad
+; SSE-NEXT: movsd
+; AVX: vpsrad
+; AVX-NEXT: vpsrad
+; AVX-NEXT: vmovsd
+; AVX2: vpsravd
+; CHECK: ret
+
diff --git a/test/CodeGen/X86/lzcnt-tzcnt.ll b/test/CodeGen/X86/lzcnt-tzcnt.ll
new file mode 100644
index 0000000..07e4b9d
--- /dev/null
+++ b/test/CodeGen/X86/lzcnt-tzcnt.ll
@@ -0,0 +1,447 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+bmi,+lzcnt | FileCheck %s
+
+; LZCNT and TZCNT will always produce the operand size when the input operand
+; is zero. This test is to verify that we efficiently select LZCNT/TZCNT
+; based on the fact that the 'icmp+select' sequence is always redundant
+; in every function defined below.
+
+
+define i16 @test1_ctlz(i16 %v) {
+  %cnt = tail call i16 @llvm.ctlz.i16(i16 %v, i1 true)
+  %tobool = icmp eq i16 %v, 0
+  %cond = select i1 %tobool, i16 16, i16 %cnt
+  ret i16 %cond
+}
+; CHECK-LABEL: test1_ctlz
+; CHECK: lzcnt
+; CHECK-NEXT: ret
+
+
+define i32 @test2_ctlz(i32 %v) {
+  %cnt = tail call i32 @llvm.ctlz.i32(i32 %v, i1 true)
+  %tobool = icmp eq i32 %v, 0
+  %cond = select i1 %tobool, i32 32, i32 %cnt
+  ret i32 %cond
+}
+; CHECK-LABEL: test2_ctlz
+; CHECK: lzcnt
+; CHECK-NEXT: ret
+
+
+define i64 @test3_ctlz(i64 %v) {
+  %cnt = tail call i64 @llvm.ctlz.i64(i64 %v, i1 true)
+  %tobool = icmp eq i64 %v, 0
+  %cond = select i1 %tobool, i64 64, i64 %cnt
+  ret i64 %cond
+}
+; CHECK-LABEL: test3_ctlz
+; CHECK: lzcnt
+; CHECK-NEXT: ret
+
+
+define i16 @test4_ctlz(i16 %v) {
+  %cnt = tail call i16 @llvm.ctlz.i16(i16 %v, i1 true)
+  %tobool = icmp eq i16 0, %v
+  %cond = select i1 %tobool, i16 16, i16 %cnt
+  ret i16 %cond
+}
+; CHECK-LABEL: test4_ctlz
+; CHECK: lzcnt
+; CHECK-NEXT: ret
+
+
+define i32 @test5_ctlz(i32 %v) {
+  %cnt = tail call i32 @llvm.ctlz.i32(i32 %v, i1 true)
+  %tobool = icmp eq i32 0, %v
+  %cond = select i1 %tobool, i32 32, i32 %cnt
+  ret i32 %cond
+}
+; CHECK-LABEL: test5_ctlz
+; CHECK: lzcnt
+; CHECK-NEXT: ret
+
+
+define i64 @test6_ctlz(i64 %v) {
+  %cnt = tail call i64 @llvm.ctlz.i64(i64 %v, i1 true)
+  %tobool = icmp eq i64 0, %v
+  %cond = select i1 %tobool, i64 64, i64 %cnt
+  ret i64 %cond
+}
+; CHECK-LABEL: test6_ctlz
+; CHECK: lzcnt
+; CHECK-NEXT: ret
+
+
+define i16 @test7_ctlz(i16 %v) {
+  %cnt = tail call i16 @llvm.ctlz.i16(i16 %v, i1 true)
+  %tobool = icmp eq i16 0, %v
+  %cond = select i1 %tobool, i16 %cnt, i16 16
+  ret i16 %cond
+}
+; CHECK-LABEL: test7_ctlz
+; CHECK: lzcnt
+; CHECK-NEXT: ret
+
+
+define i32 @test8_ctlz(i32 %v) {
+  %cnt = tail call i32 @llvm.ctlz.i32(i32 %v, i1 true)
+  %tobool = icmp eq i32 0, %v
+  %cond = select i1 %tobool, i32 %cnt, i32 32
+  ret i32 %cond
+}
+; CHECK-LABEL: test8_ctlz
+; CHECK: lzcnt
+; CHECK-NEXT: ret
+
+
+define i64 @test9_ctlz(i64 %v) {
+  %cnt = tail call i64 @llvm.ctlz.i64(i64 %v, i1 true)
+  %tobool = icmp eq i64 0, %v
+  %cond = select i1 %tobool, i64 %cnt, i64 64
+  ret i64 %cond
+}
+; CHECK-LABEL: test9_ctlz
+; CHECK: lzcnt
+; CHECK-NEXT: ret
+
+
+define i16 @test10_ctlz(i16* %ptr) {
+  %v = load i16* %ptr
+  %cnt = tail call i16 @llvm.ctlz.i16(i16 %v, i1 true)
+  %tobool = icmp eq i16 %v, 0
+  %cond = select i1 %tobool, i16 16, i16 %cnt
+  ret i16 %cond
+}
+; CHECK-LABEL: test10_ctlz
+; CHECK-NOT: movw
+; CHECK: lzcnt
+; CHECK-NEXT: ret
+
+
+define i32 @test11_ctlz(i32* %ptr) {
+  %v = load i32* %ptr
+  %cnt = tail call i32 @llvm.ctlz.i32(i32 %v, i1 true)
+  %tobool = icmp eq i32 %v, 0
+  %cond = select i1 %tobool, i32 32, i32 %cnt
+  ret i32 %cond
+}
+; CHECK-LABEL: test11_ctlz
+; CHECK-NOT: movd
+; CHECK: lzcnt
+; CHECK-NEXT: ret
+
+
+define i64 @test12_ctlz(i64* %ptr) {
+  %v = load i64* %ptr
+  %cnt = tail call i64 @llvm.ctlz.i64(i64 %v, i1 true)
+  %tobool = icmp eq i64 %v, 0
+  %cond = select i1 %tobool, i64 64, i64 %cnt
+  ret i64 %cond
+}
+; CHECK-LABEL: test12_ctlz
+; CHECK-NOT: movq
+; CHECK: lzcnt
+; CHECK-NEXT: ret
+
+
+define i16 @test13_ctlz(i16* %ptr) {
+  %v = load i16* %ptr
+  %cnt = tail call i16 @llvm.ctlz.i16(i16 %v, i1 true)
+  %tobool = icmp eq i16 0, %v
+  %cond = select i1 %tobool, i16 16, i16 %cnt
+  ret i16 %cond
+}
+; CHECK-LABEL: test13_ctlz
+; CHECK-NOT: movw
+; CHECK: lzcnt
+; CHECK-NEXT: ret
+
+
+define i32 @test14_ctlz(i32* %ptr) {
+  %v = load i32* %ptr
+  %cnt = tail call i32 @llvm.ctlz.i32(i32 %v, i1 true)
+  %tobool = icmp eq i32 0, %v
+  %cond = select i1 %tobool, i32 32, i32 %cnt
+  ret i32 %cond
+}
+; CHECK-LABEL: test14_ctlz
+; CHECK-NOT: movd
+; CHECK: lzcnt
+; CHECK-NEXT: ret
+
+
+define i64 @test15_ctlz(i64* %ptr) {
+  %v = load i64* %ptr
+  %cnt = tail call i64 @llvm.ctlz.i64(i64 %v, i1 true)
+  %tobool = icmp eq i64 0, %v
+  %cond = select i1 %tobool, i64 64, i64 %cnt
+  ret i64 %cond
+}
+; CHECK-LABEL: test15_ctlz
+; CHECK-NOT: movq
+; CHECK: lzcnt
+; CHECK-NEXT: ret
+
+
+define i16 @test16_ctlz(i16* %ptr) {
+  %v = load i16* %ptr
+  %cnt = tail call i16 @llvm.ctlz.i16(i16 %v, i1 true)
+  %tobool = icmp eq i16 0, %v
+  %cond = select i1 %tobool, i16 %cnt, i16 16
+  ret i16 %cond
+}
+; CHECK-LABEL: test16_ctlz
+; CHECK-NOT: movw
+; CHECK: lzcnt
+; CHECK-NEXT: ret
+
+
+define i32 @test17_ctlz(i32* %ptr) {
+  %v = load i32* %ptr
+  %cnt = tail call i32 @llvm.ctlz.i32(i32 %v, i1 true)
+  %tobool = icmp eq i32 0, %v
+  %cond = select i1 %tobool, i32 %cnt, i32 32
+  ret i32 %cond
+}
+; CHECK-LABEL: test17_ctlz
+; CHECK-NOT: movd
+; CHECK: lzcnt
+; CHECK-NEXT: ret
+
+
+define i64 @test18_ctlz(i64* %ptr) {
+  %v = load i64* %ptr
+  %cnt = tail call i64 @llvm.ctlz.i64(i64 %v, i1 true)
+  %tobool = icmp eq i64 0, %v
+  %cond = select i1 %tobool, i64 %cnt, i64 64
+  ret i64 %cond
+}
+; CHECK-LABEL: test18_ctlz
+; CHECK-NOT: movq
+; CHECK: lzcnt
+; CHECK-NEXT: ret
+
+
+define i16 @test1_cttz(i16 %v) {
+  %cnt = tail call i16 @llvm.cttz.i16(i16 %v, i1 true)
+  %tobool = icmp eq i16 %v, 0
+  %cond = select i1 %tobool, i16 16, i16 %cnt
+  ret i16 %cond
+}
+; CHECK-LABEL: test1_cttz
+; CHECK: tzcnt
+; CHECK-NEXT: ret
+
+
+define i32 @test2_cttz(i32 %v) {
+  %cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true)
+  %tobool = icmp eq i32 %v, 0
+  %cond = select i1 %tobool, i32 32, i32 %cnt
+  ret i32 %cond
+}
+; CHECK-LABEL: test2_cttz
+; CHECK: tzcnt
+; CHECK-NEXT: ret
+
+
+define i64 @test3_cttz(i64 %v) {
+  %cnt = tail call i64 @llvm.cttz.i64(i64 %v, i1 true)
+  %tobool = icmp eq i64 %v, 0
+  %cond = select i1 %tobool, i64 64, i64 %cnt
+  ret i64 %cond
+}
+; CHECK-LABEL: test3_cttz
+; CHECK: tzcnt
+; CHECK-NEXT: ret
+
+
+define i16 @test4_cttz(i16 %v) {
+  %cnt = tail call i16 @llvm.cttz.i16(i16 %v, i1 true)
+  %tobool = icmp eq i16 0, %v
+  %cond = select i1 %tobool, i16 16, i16 %cnt
+  ret i16 %cond
+}
+; CHECK-LABEL: test4_cttz
+; CHECK: tzcnt
+; CHECK-NEXT: ret
+
+
+define i32 @test5_cttz(i32 %v) {
+  %cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true)
+  %tobool = icmp eq i32 0, %v
+  %cond = select i1 %tobool, i32 32, i32 %cnt
+  ret i32 %cond
+}
+; CHECK-LABEL: test5_cttz
+; CHECK: tzcnt
+; CHECK-NEXT: ret
+
+
+define i64 @test6_cttz(i64 %v) {
+  %cnt = tail call i64 @llvm.cttz.i64(i64 %v, i1 true)
+  %tobool = icmp eq i64 0, %v
+  %cond = select i1 %tobool, i64 64, i64 %cnt
+  ret i64 %cond
+}
+; CHECK-LABEL: test6_cttz
+; CHECK: tzcnt
+; CHECK-NEXT: ret
+
+
+define i16 @test7_cttz(i16 %v) {
+  %cnt = tail call i16 @llvm.cttz.i16(i16 %v, i1 true)
+  %tobool = icmp eq i16 0, %v
+  %cond = select i1 %tobool, i16 %cnt, i16 16
+  ret i16 %cond
+}
+; CHECK-LABEL: test7_cttz
+; CHECK: tzcnt
+; CHECK-NEXT: ret
+
+
+define i32 @test8_cttz(i32 %v) {
+  %cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true)
+  %tobool = icmp eq i32 0, %v
+  %cond = select i1 %tobool, i32 %cnt, i32 32
+  ret i32 %cond
+}
+; CHECK-LABEL: test8_cttz
+; CHECK: tzcnt
+; CHECK-NEXT: ret
+
+
+define i64 @test9_cttz(i64 %v) {
+  %cnt = tail call i64 @llvm.cttz.i64(i64 %v, i1 true)
+  %tobool = icmp eq i64 0, %v
+  %cond = select i1 %tobool, i64 %cnt, i64 64
+  ret i64 %cond
+}
+; CHECK-LABEL: test9_cttz
+; CHECK: tzcnt
+; CHECK-NEXT: ret
+
+
+define i16 @test10_cttz(i16* %ptr) {
+  %v = load i16* %ptr
+  %cnt = tail call i16 @llvm.cttz.i16(i16 %v, i1 true)
+  %tobool = icmp eq i16 %v, 0
+  %cond = select i1 %tobool, i16 16, i16 %cnt
+  ret i16 %cond
+}
+; CHECK-LABEL: test10_cttz
+; CHECK-NOT: movw
+; CHECK: tzcnt
+; CHECK-NEXT: ret
+
+
+define i32 @test11_cttz(i32* %ptr) {
+  %v = load i32* %ptr
+  %cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true)
+  %tobool = icmp eq i32 %v, 0
+  %cond = select i1 %tobool, i32 32, i32 %cnt
+  ret i32 %cond
+}
+; CHECK-LABEL: test11_cttz
+; CHECK-NOT: movd
+; CHECK: tzcnt
+; CHECK-NEXT: ret
+
+
+define i64 @test12_cttz(i64* %ptr) {
+  %v = load i64* %ptr
+  %cnt = tail call i64 @llvm.cttz.i64(i64 %v, i1 true)
+  %tobool = icmp eq i64 %v, 0
+  %cond = select i1 %tobool, i64 64, i64 %cnt
+  ret i64 %cond
+}
+; CHECK-LABEL: test12_cttz
+; CHECK-NOT: movq
+; CHECK: tzcnt
+; CHECK-NEXT: ret
+
+
+define i16 @test13_cttz(i16* %ptr) {
+  %v = load i16* %ptr
+  %cnt = tail call i16 @llvm.cttz.i16(i16 %v, i1 true)
+  %tobool = icmp eq i16 0, %v
+  %cond = select i1 %tobool, i16 16, i16 %cnt
+  ret i16 %cond
+}
+; CHECK-LABEL: test13_cttz
+; CHECK-NOT: movw
+; CHECK: tzcnt
+; CHECK-NEXT: ret
+
+
+define i32 @test14_cttz(i32* %ptr) {
+  %v = load i32* %ptr
+  %cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true)
+  %tobool = icmp eq i32 0, %v
+  %cond = select i1 %tobool, i32 32, i32 %cnt
+  ret i32 %cond
+}
+; CHECK-LABEL: test14_cttz
+; CHECK-NOT: movd
+; CHECK: tzcnt
+; CHECK-NEXT: ret
+
+
+define i64 @test15_cttz(i64* %ptr) {
+  %v = load i64* %ptr
+  %cnt = tail call i64 @llvm.cttz.i64(i64 %v, i1 true)
+  %tobool = icmp eq i64 0, %v
+  %cond = select i1 %tobool, i64 64, i64 %cnt
+  ret i64 %cond
+}
+; CHECK-LABEL: test15_cttz
+; CHECK-NOT: movq
+; CHECK: tzcnt
+; CHECK-NEXT: ret
+
+
+define i16 @test16_cttz(i16* %ptr) {
+  %v = load i16* %ptr
+  %cnt = tail call i16 @llvm.cttz.i16(i16 %v, i1 true)
+  %tobool = icmp eq i16 0, %v
+  %cond = select i1 %tobool, i16 %cnt, i16 16
+  ret i16 %cond
+}
+; CHECK-LABEL: test16_cttz
+; CHECK-NOT: movw
+; CHECK: tzcnt
+; CHECK-NEXT: ret
+
+
+define i32 @test17_cttz(i32* %ptr) {
+  %v = load i32* %ptr
+  %cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true)
+  %tobool = icmp eq i32 0, %v
+  %cond = select i1 %tobool, i32 %cnt, i32 32
+  ret i32 %cond
+}
+; CHECK-LABEL: test17_cttz
+; CHECK-NOT: movd
+; CHECK: tzcnt
+; CHECK-NEXT: ret
+
+
+define i64 @test18_cttz(i64* %ptr) {
+  %v = load i64* %ptr
+  %cnt = tail call i64 @llvm.cttz.i64(i64 %v, i1 true)
+  %tobool = icmp eq i64 0, %v
+  %cond = select i1 %tobool, i64 %cnt, i64 64
+  ret i64 %cond
+}
+; CHECK-LABEL: test18_cttz
+; CHECK-NOT: movq
+; CHECK: tzcnt
+; CHECK-NEXT: ret
+
+
+declare i64 @llvm.cttz.i64(i64, i1)
+declare i32 @llvm.cttz.i32(i32, i1)
+declare i16 @llvm.cttz.i16(i16, i1)
+declare i64 @llvm.ctlz.i64(i64, i1)
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i16 @llvm.ctlz.i16(i16, i1)
+
diff --git a/test/CodeGen/X86/masked-iv-safe.ll b/test/CodeGen/X86/masked-iv-safe.ll
index 4a4d178..9ddc847 100644
--- a/test/CodeGen/X86/masked-iv-safe.ll
+++ b/test/CodeGen/X86/masked-iv-safe.ll
@@ -5,7 +5,7 @@
 
 ; CHECK-LABEL: count_up
 ; CHECK-NOT: {{and|movz|sar|shl}}
-; CHECK: inc
+; CHECK: incq
 ; CHECK-NOT: {{and|movz|sar|shl}}
 ; CHECK: jne
 define void @count_up(double* %d, i64 %n) nounwind {
@@ -71,7 +71,7 @@ return:
 
 ; CHECK-LABEL: count_up_signed
 ; CHECK-NOT: {{and|movz|sar|shl}}
-; CHECK: inc
+; CHECK: incq
 ; CHECK-NOT: {{and|movz|sar|shl}}
 ; CHECK: jne
 define void @count_up_signed(double* %d, i64 %n) nounwind {
@@ -174,7 +174,7 @@ return:
 
 ; CHECK-LABEL: another_count_down
 ; CHECK-NOT: {{and|movz|sar|shl}}
-; CHECK: decq
+; CHECK: addq $-8,
 ; CHECK-NOT: {{and|movz|sar|shl}}
 ; CHECK: jne
 define void @another_count_down(double* %d, i64 %n) nounwind {
diff --git a/test/CodeGen/X86/merge_store.ll b/test/CodeGen/X86/merge_store.ll
index 940688c..f98963d 100644
--- a/test/CodeGen/X86/merge_store.ll
+++ b/test/CodeGen/X86/merge_store.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -addr-sink-using-gep=1 | FileCheck %s
 
 define void @merge_store(i32* nocapture %a) {
 ; CHECK-LABEL: merge_store:
diff --git a/test/CodeGen/X86/mod128.ll b/test/CodeGen/X86/mod128.ll
new file mode 100644
index 0000000..4fdee11
--- /dev/null
+++ b/test/CodeGen/X86/mod128.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X86-64
+; RUN: llc < %s -mtriple=x86_64-cygwin | FileCheck %s -check-prefix=WIN64
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=WIN64
+; RUN: llc < %s -mtriple=x86_64-mingw32 | FileCheck %s -check-prefix=WIN64
+
+define i64 @mod128(i128 %x) {
+  ; X86-64: movl  $3, %edx
+  ; X86-64: xorl  %ecx, %ecx
+  ; X86-64: callq __modti3
+  ; X86-64-NOT: movd %xmm0, %rax
+
+  ; WIN64-NOT: movl $3, %r8d
+  ; WIN64-NOT: xorl %r9d, %r9d
+  ; WIN64-DAG: movq %rdx, 56(%rsp)
+  ; WIN64-DAG: movq %rcx, 48(%rsp)
+  ; WIN64-DAG: leaq 48(%rsp), %rcx
+  ; WIN64-DAG: leaq 32(%rsp), %rdx
+  ; WIN64-DAG: movq $0, 40(%rsp)
+  ; WIN64-DAG: movq $3, 32(%rsp)
+  ; WIN64: callq   __modti3
+  ; WIN64: movd    %xmm0, %rax
+
+  %1 = srem i128 %x, 3
+  %2 = trunc i128 %1 to i64
+  ret i64 %2
+}
diff --git a/test/CodeGen/X86/musttail-indirect.ll b/test/CodeGen/X86/musttail-indirect.ll
new file mode 100644
index 0000000..9d21b5e
--- /dev/null
+++ b/test/CodeGen/X86/musttail-indirect.ll
@@ -0,0 +1,124 @@
+; RUN: llc < %s -mtriple=i686-win32 | FileCheck %s
+; RUN: llc < %s -mtriple=i686-win32 -O0 | FileCheck %s
+
+; IR simplified from the following C++ snippet compiled for i686-windows-msvc:
+
+; struct A { A(); ~A(); int a; };
+;
+; struct B {
+;   virtual int  f(int);
+;   virtual int  g(A, int, A);
+;   virtual void h(A, int, A);
+;   virtual A    i(A, int, A);
+;   virtual A    j(int);
+; };
+;
+; int  (B::*mp_f)(int)       = &B::f;
+; int  (B::*mp_g)(A, int, A) = &B::g;
+; void (B::*mp_h)(A, int, A) = &B::h;
+; A    (B::*mp_i)(A, int, A) = &B::i;
+; A    (B::*mp_j)(int)       = &B::j;
+
+; Each member pointer creates a thunk.  The ones with inalloca are required to
+; tail calls by the ABI, even at O0.
+
+%struct.B = type { i32 (...)** }
+%struct.A = type { i32 }
+
+; CHECK-LABEL: f_thunk:
+; CHECK: jmpl
+; CHECK-NOT: ret
+define x86_thiscallcc i32 @f_thunk(%struct.B* %this, i32) {
+entry:
+  %1 = bitcast %struct.B* %this to i32 (%struct.B*, i32)***
+  %vtable = load i32 (%struct.B*, i32)*** %1
+  %2 = load i32 (%struct.B*, i32)** %vtable
+  %3 = musttail call x86_thiscallcc i32 %2(%struct.B* %this, i32 %0)
+  ret i32 %3
+}
+
+; Inalloca thunks shouldn't require any stores to the stack.
+; CHECK-LABEL: g_thunk:
+; CHECK-NOT: mov %{{.*}}, {{.*(.*esp.*)}}
+; CHECK: jmpl
+; CHECK-NOT: ret
+define x86_thiscallcc i32 @g_thunk(%struct.B* %this, <{ %struct.A, i32, %struct.A }>* inalloca) {
+entry:
+  %1 = bitcast %struct.B* %this to i32 (%struct.B*, <{ %struct.A, i32, %struct.A }>*)***
+  %vtable = load i32 (%struct.B*, <{ %struct.A, i32, %struct.A }>*)*** %1
+  %vfn = getelementptr inbounds i32 (%struct.B*, <{ %struct.A, i32, %struct.A }>*)** %vtable, i32 1
+  %2 = load i32 (%struct.B*, <{ %struct.A, i32, %struct.A }>*)** %vfn
+  %3 = musttail call x86_thiscallcc i32 %2(%struct.B* %this, <{ %struct.A, i32, %struct.A }>* inalloca %0)
+  ret i32 %3
+}
+
+; CHECK-LABEL: h_thunk:
+; CHECK: jmpl
+; CHECK-NOT: mov %{{.*}}, {{.*(.*esp.*)}}
+; CHECK-NOT: ret
+define x86_thiscallcc void @h_thunk(%struct.B* %this, <{ %struct.A, i32, %struct.A }>* inalloca) {
+entry:
+  %1 = bitcast %struct.B* %this to void (%struct.B*, <{ %struct.A, i32, %struct.A }>*)***
+  %vtable = load void (%struct.B*, <{ %struct.A, i32, %struct.A }>*)*** %1
+  %vfn = getelementptr inbounds void (%struct.B*, <{ %struct.A, i32, %struct.A }>*)** %vtable, i32 2
+  %2 = load void (%struct.B*, <{ %struct.A, i32, %struct.A }>*)** %vfn
+  musttail call x86_thiscallcc void %2(%struct.B* %this, <{ %struct.A, i32, %struct.A }>* inalloca %0)
+  ret void
+}
+
+; CHECK-LABEL: i_thunk:
+; CHECK-NOT: mov %{{.*}}, {{.*(.*esp.*)}}
+; CHECK: jmpl
+; CHECK-NOT: ret
+define x86_thiscallcc %struct.A* @i_thunk(%struct.B* %this, <{ %struct.A*, %struct.A, i32, %struct.A }>* inalloca) {
+entry:
+  %1 = bitcast %struct.B* %this to %struct.A* (%struct.B*, <{ %struct.A*, %struct.A, i32, %struct.A }>*)***
+  %vtable = load %struct.A* (%struct.B*, <{ %struct.A*, %struct.A, i32, %struct.A }>*)*** %1
+  %vfn = getelementptr inbounds %struct.A* (%struct.B*, <{ %struct.A*, %struct.A, i32, %struct.A }>*)** %vtable, i32 3
+  %2 = load %struct.A* (%struct.B*, <{ %struct.A*, %struct.A, i32, %struct.A }>*)** %vfn
+  %3 = musttail call x86_thiscallcc %struct.A* %2(%struct.B* %this, <{ %struct.A*, %struct.A, i32, %struct.A }>* inalloca %0)
+  ret %struct.A* %3
+}
+
+; CHECK-LABEL: j_thunk:
+; CHECK: jmpl
+; CHECK-NOT: ret
+define x86_thiscallcc void @j_thunk(%struct.A* noalias sret %agg.result, %struct.B* %this, i32) {
+entry:
+  %1 = bitcast %struct.B* %this to void (%struct.A*, %struct.B*, i32)***
+  %vtable = load void (%struct.A*, %struct.B*, i32)*** %1
+  %vfn = getelementptr inbounds void (%struct.A*, %struct.B*, i32)** %vtable, i32 4
+  %2 = load void (%struct.A*, %struct.B*, i32)** %vfn
+  musttail call x86_thiscallcc void %2(%struct.A* sret %agg.result, %struct.B* %this, i32 %0)
+  ret void
+}
+
+; CHECK-LABEL: _stdcall_thunk@8:
+; CHECK-NOT: mov %{{.*}}, {{.*(.*esp.*)}}
+; CHECK: jmpl
+; CHECK-NOT: ret
+define x86_stdcallcc i32 @stdcall_thunk(<{ %struct.B*, %struct.A }>* inalloca) {
+entry:
+  %this_ptr = getelementptr inbounds <{ %struct.B*, %struct.A }>* %0, i32 0, i32 0
+  %this = load %struct.B** %this_ptr
+  %1 = bitcast %struct.B* %this to i32 (<{ %struct.B*, %struct.A }>*)***
+  %vtable = load i32 (<{ %struct.B*, %struct.A }>*)*** %1
+  %vfn = getelementptr inbounds i32 (<{ %struct.B*, %struct.A }>*)** %vtable, i32 1
+  %2 = load i32 (<{ %struct.B*, %struct.A }>*)** %vfn
+  %3 = musttail call x86_stdcallcc i32 %2(<{ %struct.B*, %struct.A }>* inalloca %0)
+  ret i32 %3
+}
+
+; CHECK-LABEL: @fastcall_thunk@8:
+; CHECK-NOT: mov %{{.*}}, {{.*(.*esp.*)}}
+; CHECK: jmpl
+; CHECK-NOT: ret
+define x86_fastcallcc i32 @fastcall_thunk(%struct.B* inreg %this, <{ %struct.A }>* inalloca) {
+entry:
+  %1 = bitcast %struct.B* %this to i32 (%struct.B*, <{ %struct.A }>*)***
+  %vtable = load i32 (%struct.B*, <{ %struct.A }>*)*** %1
+  %vfn = getelementptr inbounds i32 (%struct.B*, <{ %struct.A }>*)** %vtable, i32 1
+  %2 = load i32 (%struct.B*, <{ %struct.A }>*)** %vfn
+  %3 = musttail call x86_fastcallcc i32 %2(%struct.B* inreg %this, <{ %struct.A }>* inalloca %0)
+  ret i32 %3
+}
diff --git a/test/CodeGen/X86/musttail-thiscall.ll b/test/CodeGen/X86/musttail-thiscall.ll
new file mode 100644
index 0000000..8ea1248
--- /dev/null
+++ b/test/CodeGen/X86/musttail-thiscall.ll
@@ -0,0 +1,31 @@
+; RUN: llc -march=x86 < %s | FileCheck %s
+; RUN: llc -march=x86 -O0 < %s | FileCheck %s
+
+; CHECK-LABEL: t1:
+; CHECK: jmp {{_?}}t1_callee
+define x86_thiscallcc void @t1(i8* %this) {
+  %adj = getelementptr i8* %this, i32 4
+  musttail call x86_thiscallcc void @t1_callee(i8* %adj)
+  ret void
+}
+declare x86_thiscallcc void @t1_callee(i8* %this)
+
+; CHECK-LABEL: t2:
+; CHECK: jmp {{_?}}t2_callee
+define x86_thiscallcc i32 @t2(i8* %this, i32 %a) {
+  %adj = getelementptr i8* %this, i32 4
+  %rv = musttail call x86_thiscallcc i32 @t2_callee(i8* %adj, i32 %a)
+  ret i32 %rv
+}
+declare x86_thiscallcc i32 @t2_callee(i8* %this, i32 %a)
+
+; CHECK-LABEL: t3:
+; CHECK: jmp {{_?}}t3_callee
+define x86_thiscallcc i8* @t3(i8* %this, <{ i8*, i32 }>* inalloca %args) {
+  %adj = getelementptr i8* %this, i32 4
+  %a_ptr = getelementptr <{ i8*, i32 }>* %args, i32 0, i32 1
+  store i32 0, i32* %a_ptr
+  %rv = musttail call x86_thiscallcc i8* @t3_callee(i8* %adj, <{ i8*, i32 }>* inalloca %args)
+  ret i8* %rv
+}
+declare x86_thiscallcc i8* @t3_callee(i8* %this, <{ i8*, i32 }>* inalloca %args);
diff --git a/test/CodeGen/X86/musttail.ll b/test/CodeGen/X86/musttail.ll
new file mode 100644
index 0000000..ca5d311
--- /dev/null
+++ b/test/CodeGen/X86/musttail.ll
@@ -0,0 +1,90 @@
+; RUN: llc -march=x86 < %s | FileCheck %s
+; RUN: llc -march=x86 -O0 < %s | FileCheck %s
+; RUN: llc -march=x86 -disable-tail-calls < %s | FileCheck %s
+
+declare void @t1_callee(i8*)
+define void @t1(i32* %a) {
+; CHECK-LABEL: t1:
+; CHECK: jmp {{_?}}t1_callee
+  %b = bitcast i32* %a to i8*
+  musttail call void @t1_callee(i8* %b)
+  ret void
+}
+
+declare i8* @t2_callee()
+define i32* @t2() {
+; CHECK-LABEL: t2:
+; CHECK: jmp {{_?}}t2_callee
+  %v = musttail call i8* @t2_callee()
+  %w = bitcast i8* %v to i32*
+  ret i32* %w
+}
+
+; Complex frame layout: stack realignment with dynamic alloca.
+define void @t3(i32 %n) alignstack(32) nounwind {
+entry:
+; CHECK: t3:
+; CHECK: pushl %ebp
+; CHECK: pushl %esi
+; CHECK: andl $-32, %esp
+; CHECK: movl %esp, %esi
+; CHECK: popl %esi
+; CHECK: popl %ebp
+; CHECK-NEXT: jmp {{_?}}t3_callee
+  %a = alloca i8, i32 %n
+  call void @capture(i8* %a)
+  musttail call void @t3_callee(i32 %n) nounwind
+  ret void
+}
+
+declare void @capture(i8*)
+declare void @t3_callee(i32)
+
+; Test that we actually copy in and out stack arguments that aren't forwarded
+; without modification.
+define i32 @t4({}* %fn, i32 %n, i32 %r) {
+; CHECK-LABEL: t4:
+; CHECK: incl %[[r:.*]]
+; CHECK: decl %[[n:.*]]
+; CHECK: movl %[[r]], {{[0-9]+}}(%esp)
+; CHECK: movl %[[n]], {{[0-9]+}}(%esp)
+; CHECK: jmpl *%{{.*}}
+
+entry:
+  %r1 = add i32 %r, 1
+  %n1 = sub i32 %n, 1
+  %fn_cast = bitcast {}* %fn to i32 ({}*, i32, i32)*
+  %r2 = musttail call i32 %fn_cast({}* %fn, i32 %n1, i32 %r1)
+  ret i32 %r2
+}
+
+; Combine the complex stack frame with the parameter modification.
+define i32 @t5({}* %fn, i32 %n, i32 %r) alignstack(32) {
+; CHECK-LABEL: t5:
+; CHECK: pushl %ebp
+; CHECK: movl %esp, %ebp
+; CHECK: pushl %esi
+; 	Align the stack.
+; CHECK: andl $-32, %esp
+; CHECK: movl %esp, %esi
+; 	Modify the args.
+; CHECK: incl %[[r:.*]]
+; CHECK: decl %[[n:.*]]
+; 	Store them through ebp, since that's the only stable arg pointer.
+; CHECK: movl %[[r]], {{[0-9]+}}(%ebp)
+; CHECK: movl %[[n]], {{[0-9]+}}(%ebp)
+; 	Epilogue.
+; CHECK: leal {{[-0-9]+}}(%ebp), %esp
+; CHECK: popl %esi
+; CHECK: popl %ebp
+; CHECK: jmpl *%{{.*}}
+
+entry:
+  %a = alloca i8, i32 %n
+  call void @capture(i8* %a)
+  %r1 = add i32 %r, 1
+  %n1 = sub i32 %n, 1
+  %fn_cast = bitcast {}* %fn to i32 ({}*, i32, i32)*
+  %r2 = musttail call i32 %fn_cast({}* %fn, i32 %n1, i32 %r1)
+  ret i32 %r2
+}
diff --git a/test/CodeGen/X86/named-reg-alloc.ll b/test/CodeGen/X86/named-reg-alloc.ll
new file mode 100644
index 0000000..9463ea3
--- /dev/null
+++ b/test/CodeGen/X86/named-reg-alloc.ll
@@ -0,0 +1,14 @@
+; RUN: not llc < %s -mtriple=x86_64-apple-darwin 2>&1 | FileCheck %s
+; RUN: not llc < %s -mtriple=x86_64-linux-gnueabi 2>&1 | FileCheck %s
+
+define i32 @get_stack() nounwind {
+entry:
+; FIXME: Include an allocatable-specific error message
+; CHECK: Invalid register name global variable
+	%sp = call i32 @llvm.read_register.i32(metadata !0)
+  ret i32 %sp
+}
+
+declare i32 @llvm.read_register.i32(metadata) nounwind
+
+!0 = metadata !{metadata !"eax\00"}
diff --git a/test/CodeGen/X86/named-reg-notareg.ll b/test/CodeGen/X86/named-reg-notareg.ll
new file mode 100644
index 0000000..d85dddd
--- /dev/null
+++ b/test/CodeGen/X86/named-reg-notareg.ll
@@ -0,0 +1,13 @@
+; RUN: not llc < %s -mtriple=x86_64-apple-darwin 2>&1 | FileCheck %s
+; RUN: not llc < %s -mtriple=x86_64-linux-gnueabi 2>&1 | FileCheck %s
+
+define i32 @get_stack() nounwind {
+entry:
+; CHECK: Invalid register name global variable
+	%sp = call i32 @llvm.read_register.i32(metadata !0)
+  ret i32 %sp
+}
+
+declare i32 @llvm.read_register.i32(metadata) nounwind
+
+!0 = metadata !{metadata !"notareg\00"}
diff --git a/test/CodeGen/X86/no-cfi.ll b/test/CodeGen/X86/no-cfi.ll
deleted file mode 100644
index 5bb9bb2..0000000
--- a/test/CodeGen/X86/no-cfi.ll
+++ /dev/null
@@ -1,34 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -disable-cfi | FileCheck --check-prefix=STATIC %s
-; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -disable-cfi -relocation-model=pic | FileCheck --check-prefix=PIC %s
-
-; STATIC:      .ascii   "zPLR"
-; STATIC:      .byte   3
-; STATIC-NEXT: .long   __gxx_personality_v0
-; STATIC-NEXT: .byte   3
-; STATIC-NEXT: .byte   3
-
-; PIC:      .ascii   "zPLR"
-; PIC:      .byte   155
-; PIC-NEXT: .L
-; PIC-NEXT: .long   DW.ref.__gxx_personality_v0-.L
-; PIC-NEXT: .byte   27
-; PIC-NEXT: .byte   27
-
-
-define void @bar() {
-entry:
-  %call = invoke i32 @foo()
-          to label %invoke.cont unwind label %lpad
-
-invoke.cont:
-  ret void
-
-lpad:
-  %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
-            catch i8* null
-  ret void
-}
-
-declare i32 @foo()
-
-declare i32 @__gxx_personality_v0(...)
diff --git a/test/CodeGen/X86/peep-test-4.ll b/test/CodeGen/X86/peep-test-4.ll
index 884ee7c..1ae621f 100644
--- a/test/CodeGen/X86/peep-test-4.ll
+++ b/test/CodeGen/X86/peep-test-4.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+bmi,+bmi2,+popcnt | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+bmi,+bmi2,+popcnt,+lzcnt | FileCheck %s
 declare void @foo(i32)
+declare void @foo32(i32)
 declare void @foo64(i64)
 
 ; CHECK-LABEL: neg:
@@ -189,3 +190,76 @@ bb:
 return:
   ret void
 }
+
+; CHECK-LABEL: testCTZ
+; CHECK: tzcntq
+; CHECK-NOT: test
+; CHECK: cmovaeq
+declare i64 @llvm.cttz.i64(i64, i1)
+define i64 @testCTZ(i64 %v) nounwind {
+  %cnt = tail call i64 @llvm.cttz.i64(i64 %v, i1 true)
+  %tobool = icmp eq i64 %v, 0
+  %cond = select i1 %tobool, i64 255, i64 %cnt
+  ret i64 %cond
+}
+
+; CHECK-LABEL: testCTZ2
+; CHECK: tzcntl
+; CHECK-NEXT: jb
+; CHECK: jmp foo
+declare i32 @llvm.cttz.i32(i32, i1)
+define void @testCTZ2(i32 %v) nounwind {
+  %cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true)
+  %cmp = icmp eq i32 %v, 0
+  br i1 %cmp, label %return, label %bb
+
+bb:
+  tail call void @foo(i32 %cnt)
+  br label %return
+
+return:
+  tail call void @foo32(i32 %cnt)
+  ret void
+}
+
+; CHECK-LABEL: testCTZ3
+; CHECK: tzcntl
+; CHECK-NEXT: jae
+; CHECK: jmp foo
+define void @testCTZ3(i32 %v) nounwind {
+  %cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true)
+  %cmp = icmp ne i32 %v, 0
+  br i1 %cmp, label %return, label %bb
+
+bb:
+  tail call void @foo(i32 %cnt)
+  br label %return
+
+return:
+  tail call void @foo32(i32 %cnt)
+  ret void
+}
+
+; CHECK-LABEL: testCLZ
+; CHECK: lzcntq
+; CHECK-NOT: test
+; CHECK: cmovaeq
+declare i64 @llvm.ctlz.i64(i64, i1)
+define i64 @testCLZ(i64 %v) nounwind {
+  %cnt = tail call i64 @llvm.ctlz.i64(i64 %v, i1 true)
+  %tobool = icmp ne i64 %v, 0
+  %cond = select i1 %tobool, i64 %cnt, i64 255
+  ret i64 %cond
+}
+
+; CHECK-LABEL: testPOPCNT
+; CHECK: popcntq
+; CHECK-NOT: test
+; CHECK: cmovneq
+declare i64 @llvm.ctpop.i64(i64)
+define i64 @testPOPCNT(i64 %v) nounwind {
+  %cnt = tail call i64 @llvm.ctpop.i64(i64 %v)
+  %tobool = icmp ne i64 %v, 0
+  %cond = select i1 %tobool, i64 %cnt, i64 255
+  ret i64 %cond
+}
diff --git a/test/CodeGen/X86/peephole-multiple-folds.ll b/test/CodeGen/X86/peephole-multiple-folds.ll
index d184569..a6cec66 100644
--- a/test/CodeGen/X86/peephole-multiple-folds.ll
+++ b/test/CodeGen/X86/peephole-multiple-folds.ll
@@ -9,8 +9,8 @@ entry:
 
 loopbody:
 ; CHECK: test_peephole_multi_fold:
-; CHECK: vfmadd231ps (%rdi),
-; CHECK: vfmadd231ps (%rsi),
+; CHECK: vfmadd231ps ({{%rdi|%rcx}}),
+; CHECK: vfmadd231ps ({{%rsi|%rdx}}),
   %vsum1 = phi <8 x float> [ %vsum1.next, %loopbody ], [ zeroinitializer, %entry ]
   %vsum2 = phi <8 x float> [ %vsum2.next, %loopbody ], [ zeroinitializer, %entry ]
   %m1 = load <8 x float>* %p1, align 1
diff --git a/test/CodeGen/X86/ragreedy-last-chance-recoloring.ll b/test/CodeGen/X86/ragreedy-last-chance-recoloring.ll
index f3669fb..d8e4572 100644
--- a/test/CodeGen/X86/ragreedy-last-chance-recoloring.ll
+++ b/test/CodeGen/X86/ragreedy-last-chance-recoloring.ll
@@ -2,6 +2,16 @@
 ; Without the last chance recoloring, this test fails with:
 ; "ran out of registers".
 
+; RUN: not llc -regalloc=greedy -relocation-model=pic -lcr-max-depth=0  < %s 2>&1 | FileCheck %s --check-prefix=CHECK-DEPTH
+; Test whether failure due to cutoff for depth is reported
+
+; RUN: not llc -regalloc=greedy -relocation-model=pic -lcr-max-interf=1  < %s 2>&1 | FileCheck %s --check-prefix=CHECK-INTERF
+; Test whether failure due to cutoff for interference is reported
+
+; RUN: llc -regalloc=greedy -relocation-model=pic -lcr-max-interf=1 -lcr-max-depth=0 -exhaustive-register-search < %s > %t 2>&1
+; RUN: FileCheck --input-file=%t %s --check-prefix=CHECK-EXHAUSTIVE
+; Test whether exhaustive-register-search can bypass the depth and interference cutoffs of last chance recoloring 
+
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
 target triple = "i386-apple-macosx"
 
@@ -12,6 +22,9 @@ target triple = "i386-apple-macosx"
 
 ; Function Attrs: nounwind ssp
 ; CHECK-NOT: ran out of registers during register allocation
+; CHECK-INTERF: error: register allocation failed: maximum interference for recoloring reached
+; CHECK-DEPTH: error: register allocation failed: maximum depth for recoloring reached
+; CHECK-EXHAUSTIVE-NOT: error: register allocation failed: maximum {{depth|interference}} for recoloring reached
 define void @fp_dh_f870bf31fd8ffe068450366e3f05389a(i8* %arg) #0 {
 bb:
   indirectbr i8* undef, [label %bb85, label %bb206]
diff --git a/test/CodeGen/X86/rdtsc.ll b/test/CodeGen/X86/rdtsc.ll
index f21a44c..dba614a 100644
--- a/test/CodeGen/X86/rdtsc.ll
+++ b/test/CodeGen/X86/rdtsc.ll
@@ -1,8 +1,49 @@
-; RUN: llc < %s -march=x86 | grep rdtsc
-; RUN: llc < %s -march=x86-64 | grep rdtsc
-declare i64 @llvm.readcyclecounter()
+; RUN: llc < %s -march=x86-64 -mcpu=generic | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=generic | FileCheck %s --check-prefix=CHECK --check-prefix=X86
+
+; Verify that we correctly lower ISD::READCYCLECOUNTER.
+
+
+define i64 @test_builtin_readcyclecounter() {
+  %1 = tail call i64 @llvm.readcyclecounter()
+  ret i64 %1
+}
+; CHECK-LABEL: test_builtin_readcyclecounter
+; CHECK: rdtsc
+; X86-NOT: shlq
+; X86-NOT: or
+; CHECK-NOT: mov
+; CHECK: ret
+
+
+; Verify that we correctly lower the Read Cycle Counter GCC x86 builtins
+; (i.e. RDTSC and RDTSCP).
 
-define i64 @foo() {
-	%tmp.1 = call i64 @llvm.readcyclecounter( )		; <i64> [#uses=1]
-	ret i64 %tmp.1
+define i64 @test_builtin_rdtsc() {
+  %1 = tail call i64 @llvm.x86.rdtsc()
+  ret i64 %1
 }
+; CHECK-LABEL: test_builtin_rdtsc
+; CHECK: rdtsc
+; X86-NOT: shlq
+; X86-NOT: or
+; CHECK-NOT: mov
+; CHECK: ret
+
+
+define i64 @test_builtin_rdtscp(i8* %A) {
+  %1 = tail call i64 @llvm.x86.rdtscp(i8* %A)
+  ret i64 %1
+}
+; CHECK-LABEL: test_builtin_rdtscp
+; CHECK: rdtscp
+; X86-NOT: shlq
+; CHECK:   movl	%ecx, (%{{[a-z0-9]+}})
+; X86-NOT: shlq
+; CHECK: ret
+
+
+declare i64 @llvm.readcyclecounter()
+declare i64 @llvm.x86.rdtscp(i8*)
+declare i64 @llvm.x86.rdtsc()
+
diff --git a/test/CodeGen/X86/remat-invalid-liveness.ll b/test/CodeGen/X86/remat-invalid-liveness.ll
new file mode 100644
index 0000000..d285e83
--- /dev/null
+++ b/test/CodeGen/X86/remat-invalid-liveness.ll
@@ -0,0 +1,85 @@
+; RUN: llc %s -mcpu=core2 -o - | FileCheck %s
+; This test was failing while tracking the liveness in the register scavenger
+; during the branching folding pass. The allocation of the subregisters was
+; incorrect.
+; I.e., the faulty pattern looked like:
+; CH = movb 64
+; ECX = movl 3 <- CH was killed here.
+; CH = subb CH, ...
+;
+; This reduced test case triggers the crash before the fix, but does not
+; strictly speaking check that the resulting code is correct.
+; To check that the code is actually correct we would need to check the
+; liveness of the produced code.
+;
+; Currently, we check that after ECX = movl 3, we do not have subb CH,
+; whereas CH could have been redefine in between and that would have been
+; totally fine.
+; <rdar://problem/16582185>
+target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.9"
+
+%struct.A = type { %struct.B, %struct.C, %struct.D*, [1 x i8*] }
+%struct.B = type { i32, [4 x i8] }
+%struct.C = type { i128 }
+%struct.D = type { {}*, [0 x i32] }
+%union.E = type { i32 }
+
+; CHECK-LABEL: __XXX1:
+; CHECK: movl $3, %ecx
+; CHECK-NOT: subb %{{[a-z]+}}, %ch
+; Function Attrs: nounwind optsize ssp
+define fastcc void @__XXX1(%struct.A* %ht) #0 {
+entry:
+  %const72 = bitcast i128 72 to i128
+  %const3 = bitcast i128 3 to i128
+  switch i32 undef, label %if.end196 [
+    i32 1, label %sw.bb.i
+    i32 3, label %sw.bb2.i
+  ]
+
+sw.bb.i:                                          ; preds = %entry
+  %call.i.i.i = tail call i32 undef(%struct.A* %ht, i8 zeroext 22, i32 undef, i32 0, %struct.D* undef)
+  %bf.load.i.i = load i128* undef, align 4
+  %bf.lshr.i.i = lshr i128 %bf.load.i.i, %const72
+  %shl1.i.i = shl nuw nsw i128 %bf.lshr.i.i, 8
+  %shl.i.i = trunc i128 %shl1.i.i to i32
+  br i1 undef, label %cond.false10.i.i, label %__XXX2.exit.i.i
+
+__XXX2.exit.i.i:                    ; preds = %sw.bb.i
+  %extract11.i.i.i = lshr i128 %bf.load.i.i, %const3
+  %extract.t12.i.i.i = trunc i128 %extract11.i.i.i to i32
+  %bf.cast7.i.i.i = and i32 %extract.t12.i.i.i, 3
+  %arrayidx.i.i.i = getelementptr inbounds %struct.A* %ht, i32 0, i32 3, i32 %bf.cast7.i.i.i
+  br label %cond.end12.i.i
+
+cond.false10.i.i:                                 ; preds = %sw.bb.i
+  %arrayidx.i6.i.i = getelementptr inbounds %struct.A* %ht, i32 0, i32 3, i32 0
+  br label %cond.end12.i.i
+
+cond.end12.i.i:                                   ; preds = %cond.false10.i.i, %__XXX2.exit.i.i
+  %.sink.in.i.i = phi i8** [ %arrayidx.i.i.i, %__XXX2.exit.i.i ], [ %arrayidx.i6.i.i, %cond.false10.i.i ]
+  %.sink.i.i = load i8** %.sink.in.i.i, align 4
+  %tmp = bitcast i8* %.sink.i.i to %union.E*
+  br i1 undef, label %for.body.i.i, label %if.end196
+
+for.body.i.i:                                     ; preds = %for.body.i.i, %cond.end12.i.i
+  %weak.i.i = getelementptr inbounds %union.E* %tmp, i32 undef, i32 0
+  %tmp1 = load i32* %weak.i.i, align 4
+  %cmp36.i.i = icmp ne i32 %tmp1, %shl.i.i
+  %or.cond = and i1 %cmp36.i.i, false
+  br i1 %or.cond, label %for.body.i.i, label %if.end196
+
+sw.bb2.i:                                         ; preds = %entry
+  %bf.lshr.i85.i = lshr i128 undef, %const72
+  br i1 undef, label %if.end196, label %__XXX2.exit.i95.i
+
+__XXX2.exit.i95.i:                  ; preds = %sw.bb2.i
+  %extract11.i.i91.i = lshr i128 undef, %const3
+  br label %if.end196
+
+if.end196:                                        ; preds = %__XXX2.exit.i95.i, %sw.bb2.i, %for.body.i.i, %cond.end12.i.i, %entry
+  ret void
+}
+
+attributes #0 = { nounwind optsize ssp "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" }
diff --git a/test/CodeGen/X86/ret-mmx.ll b/test/CodeGen/X86/ret-mmx.ll
index 091fd53..fc9c78d 100644
--- a/test/CodeGen/X86/ret-mmx.ll
+++ b/test/CodeGen/X86/ret-mmx.ll
@@ -34,6 +34,7 @@ define double @t4() nounwind {
 	ret double bitcast (<2 x i32> <i32 1, i32 0> to double)
 ; CHECK-LABEL: t4:
 ; CHECK: movl $1
+; CHECK-NOT: pshufd
 ; CHECK: movd {{.*}}, %xmm0
 }
 
diff --git a/test/CodeGen/X86/rotate3.ll b/test/CodeGen/X86/rotate3.ll
deleted file mode 100644
index b92f7c2..0000000
--- a/test/CodeGen/X86/rotate3.ll
+++ /dev/null
@@ -1,76 +0,0 @@
-; Check that (or (shl x, y), (srl x, (sub 32, y))) is folded into (rotl x, y)
-; and (or (shl x, (sub 32, y)), (srl x, r)) into (rotr x, y) even if the
-; argument is zero extended. Fix for PR16726.
-
-; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
-
-define zeroext i8 @rolbyte(i32 %nBits_arg, i8 %x_arg) nounwind readnone {
-entry:
-  %tmp1 = zext i8 %x_arg to i32
-  %tmp3 = shl i32 %tmp1, %nBits_arg
-  %tmp8 = sub i32 8, %nBits_arg
-  %tmp10 = lshr i32 %tmp1, %tmp8
-  %tmp11 = or i32 %tmp3, %tmp10
-  %tmp12 = trunc i32 %tmp11 to i8
-  ret i8 %tmp12
-}
-; CHECK:    rolb %cl, %{{[a-z0-9]+}}
-
-
-define zeroext i8 @rorbyte(i32 %nBits_arg, i8 %x_arg) nounwind readnone {
-entry:
-  %tmp1 = zext i8 %x_arg to i32
-  %tmp3 = lshr i32 %tmp1, %nBits_arg
-  %tmp8 = sub i32 8, %nBits_arg
-  %tmp10 = shl i32 %tmp1, %tmp8
-  %tmp11 = or i32 %tmp3, %tmp10
-  %tmp12 = trunc i32 %tmp11 to i8
-  ret i8 %tmp12
-}
-; CHECK:    rorb %cl, %{{[a-z0-9]+}}
-
-define zeroext i16 @rolword(i32 %nBits_arg, i16 %x_arg) nounwind readnone {
-entry:
-  %tmp1 = zext i16 %x_arg to i32
-  %tmp3 = shl i32 %tmp1, %nBits_arg
-  %tmp8 = sub i32 16, %nBits_arg
-  %tmp10 = lshr i32 %tmp1, %tmp8
-  %tmp11 = or i32 %tmp3, %tmp10
-  %tmp12 = trunc i32 %tmp11 to i16
-  ret i16 %tmp12
-}
-; CHECK:    rolw %cl, %{{[a-z0-9]+}}
-
-define zeroext i16 @rorword(i32 %nBits_arg, i16 %x_arg) nounwind readnone {
-entry:
-  %tmp1 = zext i16 %x_arg to i32
-  %tmp3 = lshr i32 %tmp1, %nBits_arg
-  %tmp8 = sub i32 16, %nBits_arg
-  %tmp10 = shl i32 %tmp1, %tmp8
-  %tmp11 = or i32 %tmp3, %tmp10
-  %tmp12 = trunc i32 %tmp11 to i16
-  ret i16 %tmp12
-}
-; CHECK:    rorw %cl, %{{[a-z0-9]+}}
-
-define i64 @roldword(i64 %nBits_arg, i32 %x_arg) nounwind readnone {
-entry:
-  %tmp1 = zext i32 %x_arg to i64
-  %tmp3 = shl i64 %tmp1, %nBits_arg
-  %tmp8 = sub i64 32, %nBits_arg
-  %tmp10 = lshr i64 %tmp1, %tmp8
-  %tmp11 = or i64 %tmp3, %tmp10
-  ret i64 %tmp11
-}
-; CHECK:    roll %cl, %{{[a-z0-9]+}}
-
-define zeroext i64 @rordword(i64 %nBits_arg, i32 %x_arg) nounwind readnone {
-entry:
-  %tmp1 = zext i32 %x_arg to i64
-  %tmp3 = lshr i64 %tmp1, %nBits_arg
-  %tmp8 = sub i64 32, %nBits_arg
-  %tmp10 = shl i64 %tmp1, %tmp8
-  %tmp11 = or i64 %tmp3, %tmp10
-  ret i64 %tmp11
-}
-; CHECK:    rorl %cl, %{{[a-z0-9]+}}
diff --git a/test/CodeGen/X86/segmented-stacks-dynamic.ll b/test/CodeGen/X86/segmented-stacks-dynamic.ll
index e170762..b82be41 100644
--- a/test/CodeGen/X86/segmented-stacks-dynamic.ll
+++ b/test/CodeGen/X86/segmented-stacks-dynamic.ll
@@ -1,12 +1,12 @@
-; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32
-; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux  -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64
-; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -segmented-stacks -filetype=obj
-; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -segmented-stacks -filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -verify-machineinstrs | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -verify-machineinstrs | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -filetype=obj
 
 ; Just to prevent the alloca from being optimized away
 declare void @dummy_use(i32*, i32)
 
-define i32 @test_basic(i32 %l) {
+define i32 @test_basic(i32 %l) #0 {
         %mem = alloca i32, i32 %l
         call void @dummy_use (i32* %mem, i32 %l)
         %terminate = icmp eq i32 %l, 0
@@ -62,3 +62,5 @@ false:
 ; X64:      movq %rax, %rdi
 
 }
+
+attributes #0 = { "split-stack" }
diff --git a/test/CodeGen/X86/segmented-stacks.ll b/test/CodeGen/X86/segmented-stacks.ll
index c02152b..9dab3cd 100644
--- a/test/CodeGen/X86/segmented-stacks.ll
+++ b/test/CodeGen/X86/segmented-stacks.ll
@@ -1,23 +1,23 @@
-; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32-Linux
-; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux  -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64-Linux
-; RUN: llc < %s -mcpu=generic -mtriple=i686-darwin -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32-Darwin
-; RUN: llc < %s -mcpu=generic -mtriple=x86_64-darwin -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64-Darwin
-; RUN: llc < %s -mcpu=generic -mtriple=i686-mingw32 -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32-MinGW
-; RUN: llc < %s -mcpu=generic -mtriple=x86_64-freebsd -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64-FreeBSD
-; RUN: llc < %s -mcpu=generic -mtriple=x86_64-mingw32 -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64-MinGW
+; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -verify-machineinstrs | FileCheck %s -check-prefix=X32-Linux
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux  -verify-machineinstrs | FileCheck %s -check-prefix=X64-Linux
+; RUN: llc < %s -mcpu=generic -mtriple=i686-darwin -verify-machineinstrs | FileCheck %s -check-prefix=X32-Darwin
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-darwin -verify-machineinstrs | FileCheck %s -check-prefix=X64-Darwin
+; RUN: llc < %s -mcpu=generic -mtriple=i686-mingw32 -verify-machineinstrs | FileCheck %s -check-prefix=X32-MinGW
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-freebsd -verify-machineinstrs | FileCheck %s -check-prefix=X64-FreeBSD
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-mingw32 -verify-machineinstrs | FileCheck %s -check-prefix=X64-MinGW
 
 ; We used to crash with filetype=obj
-; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -segmented-stacks -filetype=obj
-; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -segmented-stacks -filetype=obj
-; RUN: llc < %s -mcpu=generic -mtriple=i686-darwin -segmented-stacks -filetype=obj
-; RUN: llc < %s -mcpu=generic -mtriple=x86_64-darwin -segmented-stacks -filetype=obj
-; RUN: llc < %s -mcpu=generic -mtriple=i686-mingw32 -segmented-stacks -filetype=obj
-; RUN: llc < %s -mcpu=generic -mtriple=x86_64-freebsd -segmented-stacks -filetype=obj
-; RUN: llc < %s -mcpu=generic -mtriple=x86_64-mingw32 -segmented-stacks -filetype=obj
-
-; RUN: not llc < %s -mcpu=generic -mtriple=x86_64-solaris -segmented-stacks 2> %t.log
+; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=i686-darwin -filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-darwin -filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=i686-mingw32 -filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-freebsd -filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-mingw32 -filetype=obj
+
+; RUN: not llc < %s -mcpu=generic -mtriple=x86_64-solaris 2> %t.log
 ; RUN: FileCheck %s -input-file=%t.log -check-prefix=X64-Solaris
-; RUN: not llc < %s -mcpu=generic -mtriple=i686-freebsd -segmented-stacks 2> %t.log
+; RUN: not llc < %s -mcpu=generic -mtriple=i686-freebsd 2> %t.log
 ; RUN: FileCheck %s -input-file=%t.log -check-prefix=X32-FreeBSD
 
 ; X64-Solaris: Segmented stacks not supported on this platform
@@ -26,7 +26,7 @@
 ; Just to prevent the alloca from being optimized away
 declare void @dummy_use(i32*, i32)
 
-define void @test_basic() {
+define void @test_basic() #0 {
         %mem = alloca i32, i32 10
         call void @dummy_use (i32* %mem, i32 10)
 	ret void
@@ -104,16 +104,18 @@ define void @test_basic() {
 
 }
 
-define i32 @test_nested(i32 * nest %closure, i32 %other) {
+define i32 @test_nested(i32 * nest %closure, i32 %other) #0 {
        %addend = load i32 * %closure
        %result = add i32 %other, %addend
+       %mem = alloca i32, i32 10
+       call void @dummy_use (i32* %mem, i32 10)
        ret i32 %result
 
 ; X32-Linux:       cmpl %gs:48, %esp
 ; X32-Linux-NEXT:  ja      .LBB1_2
 
 ; X32-Linux:       pushl $4
-; X32-Linux-NEXT:  pushl $0
+; X32-Linux-NEXT:  pushl $60
 ; X32-Linux-NEXT:  calll __morestack
 ; X32-Linux-NEXT:  ret
 
@@ -121,7 +123,7 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) {
 ; X64-Linux-NEXT:  ja      .LBB1_2
 
 ; X64-Linux:       movq %r10, %rax
-; X64-Linux-NEXT:  movabsq $0, %r10
+; X64-Linux-NEXT:  movabsq $56, %r10
 ; X64-Linux-NEXT:  movabsq $0, %r11
 ; X64-Linux-NEXT:  callq __morestack
 ; X64-Linux-NEXT:  ret
@@ -132,7 +134,7 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) {
 ; X32-Darwin-NEXT: ja      LBB1_2
 
 ; X32-Darwin:      pushl $4
-; X32-Darwin-NEXT: pushl $0
+; X32-Darwin-NEXT: pushl $60
 ; X32-Darwin-NEXT: calll ___morestack
 ; X32-Darwin-NEXT: ret
 
@@ -140,7 +142,7 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) {
 ; X64-Darwin-NEXT: ja      LBB1_2
 
 ; X64-Darwin:      movq %r10, %rax
-; X64-Darwin-NEXT: movabsq $0, %r10
+; X64-Darwin-NEXT: movabsq $56, %r10
 ; X64-Darwin-NEXT: movabsq $0, %r11
 ; X64-Darwin-NEXT: callq ___morestack
 ; X64-Darwin-NEXT: ret
@@ -150,7 +152,7 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) {
 ; X32-MinGW-NEXT:  ja      LBB1_2
 
 ; X32-MinGW:       pushl $4
-; X32-MinGW-NEXT:  pushl $0
+; X32-MinGW-NEXT:  pushl $52
 ; X32-MinGW-NEXT:  calll ___morestack
 ; X32-MinGW-NEXT:  ret
 
@@ -159,7 +161,7 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) {
 ; X64-MinGW-NEXT:  ja      .LBB1_2
 
 ; X64-MinGW:       movq %r10, %rax
-; X64-MinGW-NEXT:  movabsq $0, %r10
+; X64-MinGW-NEXT:  movabsq $88, %r10
 ; X64-MinGW-NEXT:  movabsq $32, %r11
 ; X64-MinGW-NEXT:  callq __morestack
 ; X64-MinGW-NEXT:  retq
@@ -169,7 +171,7 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) {
 ; X64-FreeBSD-NEXT:  ja      .LBB1_2
 
 ; X64-FreeBSD:       movq %r10, %rax
-; X64-FreeBSD-NEXT:  movabsq $0, %r10
+; X64-FreeBSD-NEXT:  movabsq $56, %r10
 ; X64-FreeBSD-NEXT:  movabsq $0, %r11
 ; X64-FreeBSD-NEXT:  callq __morestack
 ; X64-FreeBSD-NEXT:  ret
@@ -177,7 +179,7 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) {
 
 }
 
-define void @test_large() {
+define void @test_large() #0 {
         %mem = alloca i32, i32 10000
         call void @dummy_use (i32* %mem, i32 0)
         ret void
@@ -249,7 +251,7 @@ define void @test_large() {
 
 }
 
-define fastcc void @test_fastcc() {
+define fastcc void @test_fastcc() #0 {
         %mem = alloca i32, i32 10
         call void @dummy_use (i32* %mem, i32 10)
         ret void
@@ -327,7 +329,7 @@ define fastcc void @test_fastcc() {
 
 }
 
-define fastcc void @test_fastcc_large() {
+define fastcc void @test_fastcc_large() #0 {
         %mem = alloca i32, i32 10000
         call void @dummy_use (i32* %mem, i32 0)
         ret void
@@ -412,7 +414,7 @@ define fastcc void @test_fastcc_large() {
 
 }
 
-define fastcc void @test_fastcc_large_with_ecx_arg(i32 %a) {
+define fastcc void @test_fastcc_large_with_ecx_arg(i32 %a) #0 {
         %mem = alloca i32, i32 10000
         call void @dummy_use (i32* %mem, i32 %a)
         ret void
@@ -434,3 +436,30 @@ define fastcc void @test_fastcc_large_with_ecx_arg(i32 %a) {
 ; X32-Darwin-NEXT: ret
 
 }
+
+define void @test_nostack() #0 {
+	ret void
+
+; X32-Linux-LABEL: test_nostack:
+; X32-Linux-NOT:   calll __morestack
+
+; X64-Linux-LABEL: test_nostack:
+; X32-Linux-NOT:   callq __morestack
+
+; X32-Darwin-LABEL: test_nostack:
+; X32-Darwin-NOT:   calll __morestack
+
+; X64-Darwin-LABEL: test_nostack:
+; X64-Darwin-NOT:   callq __morestack
+
+; X32-MinGW-LABEL: test_nostack:
+; X32-MinGW-NOT:   calll __morestack
+
+; X64-MinGW-LABEL: test_nostack:
+; X64-MinGW-NOT:   callq __morestack
+
+; X64-FreeBSD-LABEL: test_nostack:
+; X64-FreeBSD-NOT:   callq __morestack
+}
+
+attributes #0 = { "split-stack" }
diff --git a/test/CodeGen/X86/sse2.ll b/test/CodeGen/X86/sse2.ll
index 628dba0..e8d3d6f 100644
--- a/test/CodeGen/X86/sse2.ll
+++ b/test/CodeGen/X86/sse2.ll
@@ -221,3 +221,21 @@ entry:
  %double2float.i = fptrunc <4 x double> %0 to <4 x float>
  ret <4 x float> %double2float.i
 }
+
+define <2 x i64> @test_insert_64_zext(<2 x i64> %i) {
+; CHECK-LABEL: test_insert_64_zext
+; CHECK-NOT: xor
+; CHECK: movq
+  %1 = shufflevector <2 x i64> %i, <2 x i64> <i64 0, i64 undef>, <2 x i32> <i32 0, i32 2>
+  ret <2 x i64> %1
+}
+
+define <4 x i32> @PR19721(<4 x i32> %i) {
+  %bc = bitcast <4 x i32> %i to i128
+  %insert = and i128 %bc, -4294967296
+  %bc2 = bitcast i128 %insert to <4 x i32>
+  ret <4 x i32> %bc2
+
+; CHECK-LABEL: PR19721
+; CHECK: punpckldq
+}
diff --git a/test/CodeGen/X86/sse3.ll b/test/CodeGen/X86/sse3.ll
index 6d5b192..18bdcb3 100644
--- a/test/CodeGen/X86/sse3.ll
+++ b/test/CodeGen/X86/sse3.ll
@@ -209,7 +209,7 @@ entry:
 ; X64-LABEL: t13:
 ; X64: 	punpcklqdq	%xmm0, %xmm1
 ; X64: 	pextrw	$3, %xmm1, %eax
-; X64: 	pshufd	$52, %xmm1, %xmm0
+; X64: 	pshufhw	$12, %xmm1, %xmm0
 ; X64: 	pinsrw	$4, %eax, %xmm0
 ; X64: 	ret
 }
diff --git a/test/CodeGen/X86/sse41-blend.ll b/test/CodeGen/X86/sse41-blend.ll
index 4681fde..8ad7987 100644
--- a/test/CodeGen/X86/sse41-blend.ll
+++ b/test/CodeGen/X86/sse41-blend.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -mattr=+sse4.1 | FileCheck %s
 
 ;CHECK-LABEL: vsel_float:
-;CHECK: blendvps
+;CHECK: blendps
 ;CHECK: ret
 define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
   %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x float> %v1, <4 x float> %v2
@@ -10,7 +10,7 @@ define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
 
 
 ;CHECK-LABEL: vsel_4xi8:
-;CHECK: blendvps
+;CHECK: blendps
 ;CHECK: ret
 define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
   %vsel = select <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i8> %v1, <4 x i8> %v2
@@ -18,7 +18,7 @@ define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
 }
 
 ;CHECK-LABEL: vsel_4xi16:
-;CHECK: blendvps
+;CHECK: blendps
 ;CHECK: ret
 define <4 x i16> @vsel_4xi16(<4 x i16> %v1, <4 x i16> %v2) {
   %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x i16> %v1, <4 x i16> %v2
@@ -27,7 +27,7 @@ define <4 x i16> @vsel_4xi16(<4 x i16> %v1, <4 x i16> %v2) {
 
 
 ;CHECK-LABEL: vsel_i32:
-;CHECK: blendvps
+;CHECK: blendps
 ;CHECK: ret
 define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) {
   %vsel = select <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> %v1, <4 x i32> %v2
@@ -88,3 +88,35 @@ entry:
   store double %extract214vector_func.i, double addrspace(1)* undef, align 8
   ret void
 }
+
+; If we can figure out a blend has a constant mask, we should emit the
+; blend instruction with an immediate mask
+define <2 x double> @constant_blendvpd(<2 x double> %xy, <2 x double> %ab) {
+; In this case, we emit a simple movss
+; CHECK-LABEL: constant_blendvpd
+; CHECK: movsd
+; CHECK: ret
+  %1 = select <2 x i1> <i1 true, i1 false>, <2 x double> %xy, <2 x double> %ab
+  ret <2 x double> %1
+}
+
+define <4 x float> @constant_blendvps(<4 x float> %xyzw, <4 x float> %abcd) {
+; CHECK-LABEL: constant_blendvps
+; CHECK-NOT: mov
+; CHECK: blendps $7
+; CHECK: ret
+  %1 = select <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x float> %xyzw, <4 x float> %abcd
+  ret <4 x float> %1
+}
+
+define <16 x i8> @constant_pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd) {
+; CHECK-LABEL: constant_pblendvb:
+; CHECK: movaps
+; CHECK: pblendvb
+; CHECK: ret
+  %1 = select <16 x i1> <i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false>, <16 x i8> %xyzw, <16 x i8> %abcd
+  ret <16 x i8> %1
+}
+declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>)
+declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>)
+declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>)
diff --git a/test/CodeGen/X86/sse41.ll b/test/CodeGen/X86/sse41.ll
index c15e24c..a3c6201 100644
--- a/test/CodeGen/X86/sse41.ll
+++ b/test/CodeGen/X86/sse41.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=sse4.1 -mcpu=penryn | FileCheck %s -check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=sse4.1 -mcpu=penryn | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=sse4.1 -mcpu=penryn | FileCheck %s -check-prefix=X32 --check-prefix=CHECK
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=sse4.1 -mcpu=penryn | FileCheck %s -check-prefix=X64 --check-prefix=CHECK
 
 @g16 = external global i16
 
@@ -249,3 +249,446 @@ entry:
 ; X64: ret
 }
 
+define <4 x float> @insertps_from_shufflevector_1(<4 x float> %a, <4 x float>* nocapture readonly %pb) {
+entry:
+  %0 = load <4 x float>* %pb, align 16
+  %vecinit6 = shufflevector <4 x float> %a, <4 x float> %0, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+  ret <4 x float> %vecinit6
+; CHECK-LABEL: insertps_from_shufflevector_1:
+; CHECK-NOT: movss
+; CHECK-NOT: shufps
+; CHECK: insertps    $48,
+; CHECK: ret
+}
+
+define <4 x float> @insertps_from_shufflevector_2(<4 x float> %a, <4 x float> %b) {
+entry:
+  %vecinit6 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 5, i32 3>
+  ret <4 x float> %vecinit6
+; CHECK-LABEL: insertps_from_shufflevector_2:
+; CHECK-NOT: shufps
+; CHECK: insertps    $96,
+; CHECK: ret
+}
+
+; For loading an i32 from memory into an xmm register we use pinsrd
+; instead of insertps
+define <4 x i32> @pinsrd_from_shufflevector_i32(<4 x i32> %a, <4 x i32>* nocapture readonly %pb) {
+entry:
+  %0 = load <4 x i32>* %pb, align 16
+  %vecinit6 = shufflevector <4 x i32> %a, <4 x i32> %0, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+  ret <4 x i32> %vecinit6
+; CHECK-LABEL: pinsrd_from_shufflevector_i32:
+; CHECK-NOT: movss
+; CHECK-NOT: shufps
+; CHECK: pinsrd  $3,
+; CHECK: ret
+}
+
+define <4 x i32> @insertps_from_shufflevector_i32_2(<4 x i32> %a, <4 x i32> %b) {
+entry:
+  %vecinit6 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 7, i32 2, i32 3>
+  ret <4 x i32> %vecinit6
+; CHECK-LABEL: insertps_from_shufflevector_i32_2:
+; CHECK-NOT: shufps
+; CHECK-NOT: movaps
+; CHECK: insertps    $208,
+; CHECK: ret
+}
+
+define <4 x float> @insertps_from_load_ins_elt_undef(<4 x float> %a, float* %b) {
+; CHECK-LABEL: insertps_from_load_ins_elt_undef:
+; CHECK-NOT: movss
+; CHECK-NOT: shufps
+; CHECK: insertps    $16,
+; CHECK: ret
+  %1 = load float* %b, align 4
+  %2 = insertelement <4 x float> undef, float %1, i32 0
+  %result = shufflevector <4 x float> %a, <4 x float> %2, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
+  ret <4 x float> %result
+}
+
+define <4 x i32> @insertps_from_load_ins_elt_undef_i32(<4 x i32> %a, i32* %b) {
+; CHECK-LABEL: insertps_from_load_ins_elt_undef_i32:
+; TODO: Like on pinsrd_from_shufflevector_i32, remove this mov instr
+;; aCHECK-NOT: movd
+; CHECK-NOT: shufps
+; CHECK: insertps    $32,
+; CHECK: ret
+  %1 = load i32* %b, align 4
+  %2 = insertelement <4 x i32> undef, i32 %1, i32 0
+  %result = shufflevector <4 x i32> %a, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
+  ret <4 x i32> %result
+}
+
+;;;;;; Shuffles optimizable with a single insertps instruction
+define <4 x float> @shuf_XYZ0(<4 x float> %x, <4 x float> %a) {
+; CHECK-LABEL: shuf_XYZ0:
+; CHECK-NOT: pextrd
+; CHECK-NOT: punpckldq
+; CHECK: insertps    $8
+; CHECK: ret
+  %vecext = extractelement <4 x float> %x, i32 0
+  %vecinit = insertelement <4 x float> undef, float %vecext, i32 0
+  %vecext1 = extractelement <4 x float> %x, i32 1
+  %vecinit2 = insertelement <4 x float> %vecinit, float %vecext1, i32 1
+  %vecext3 = extractelement <4 x float> %x, i32 2
+  %vecinit4 = insertelement <4 x float> %vecinit2, float %vecext3, i32 2
+  %vecinit5 = insertelement <4 x float> %vecinit4, float 0.0, i32 3
+  ret <4 x float> %vecinit5
+}
+
+define <4 x float> @shuf_XY00(<4 x float> %x, <4 x float> %a) {
+; CHECK-LABEL: shuf_XY00:
+; CHECK-NOT: pextrd
+; CHECK-NOT: punpckldq
+; CHECK: insertps    $12
+; CHECK: ret
+  %vecext = extractelement <4 x float> %x, i32 0
+  %vecinit = insertelement <4 x float> undef, float %vecext, i32 0
+  %vecext1 = extractelement <4 x float> %x, i32 1
+  %vecinit2 = insertelement <4 x float> %vecinit, float %vecext1, i32 1
+  %vecinit3 = insertelement <4 x float> %vecinit2, float 0.0, i32 2
+  %vecinit4 = insertelement <4 x float> %vecinit3, float 0.0, i32 3
+  ret <4 x float> %vecinit4
+}
+
+define <4 x float> @shuf_XYY0(<4 x float> %x, <4 x float> %a) {
+; CHECK-LABEL: shuf_XYY0:
+; CHECK-NOT: pextrd
+; CHECK-NOT: punpckldq
+; CHECK: insertps    $104
+; CHECK: ret
+  %vecext = extractelement <4 x float> %x, i32 0
+  %vecinit = insertelement <4 x float> undef, float %vecext, i32 0
+  %vecext1 = extractelement <4 x float> %x, i32 1
+  %vecinit2 = insertelement <4 x float> %vecinit, float %vecext1, i32 1
+  %vecinit4 = insertelement <4 x float> %vecinit2, float %vecext1, i32 2
+  %vecinit5 = insertelement <4 x float> %vecinit4, float 0.0, i32 3
+  ret <4 x float> %vecinit5
+}
+
+define <4 x float> @shuf_XYW0(<4 x float> %x, <4 x float> %a) {
+; CHECK-LABEL: shuf_XYW0:
+; CHECK: insertps    $232
+; CHECK: ret
+  %vecext = extractelement <4 x float> %x, i32 0
+  %vecinit = insertelement <4 x float> undef, float %vecext, i32 0
+  %vecext1 = extractelement <4 x float> %x, i32 1
+  %vecinit2 = insertelement <4 x float> %vecinit, float %vecext1, i32 1
+  %vecext2 = extractelement <4 x float> %x, i32 3
+  %vecinit3 = insertelement <4 x float> %vecinit2, float %vecext2, i32 2
+  %vecinit4 = insertelement <4 x float> %vecinit3, float 0.0, i32 3
+  ret <4 x float> %vecinit4
+}
+
+define <4 x float> @shuf_W00W(<4 x float> %x, <4 x float> %a) {
+; CHECK-LABEL: shuf_W00W:
+; CHECK-NOT: pextrd
+; CHECK-NOT: punpckldq
+; CHECK: insertps    $198
+; CHECK: ret
+  %vecext = extractelement <4 x float> %x, i32 3
+  %vecinit = insertelement <4 x float> undef, float %vecext, i32 0
+  %vecinit2 = insertelement <4 x float> %vecinit, float 0.0, i32 1
+  %vecinit3 = insertelement <4 x float> %vecinit2, float 0.0, i32 2
+  %vecinit4 = insertelement <4 x float> %vecinit3, float %vecext, i32 3
+  ret <4 x float> %vecinit4
+}
+
+define <4 x float> @shuf_X00A(<4 x float> %x, <4 x float> %a) {
+; CHECK-LABEL: shuf_X00A:
+; CHECK-NOT: movaps
+; CHECK-NOT: shufps
+; CHECK: insertps    $48
+; CHECK: ret
+  %vecext = extractelement <4 x float> %x, i32 0
+  %vecinit = insertelement <4 x float> undef, float %vecext, i32 0
+  %vecinit1 = insertelement <4 x float> %vecinit, float 0.0, i32 1
+  %vecinit2 = insertelement <4 x float> %vecinit1, float 0.0, i32 2
+  %vecinit4 = shufflevector <4 x float> %vecinit2, <4 x float> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+  ret <4 x float> %vecinit4
+}
+
+define <4 x float> @shuf_X00X(<4 x float> %x, <4 x float> %a) {
+; CHECK-LABEL: shuf_X00X:
+; CHECK-NOT: movaps
+; CHECK-NOT: shufps
+; CHECK: insertps    $48
+; CHECK: ret
+  %vecext = extractelement <4 x float> %x, i32 0
+  %vecinit = insertelement <4 x float> undef, float %vecext, i32 0
+  %vecinit1 = insertelement <4 x float> %vecinit, float 0.0, i32 1
+  %vecinit2 = insertelement <4 x float> %vecinit1, float 0.0, i32 2
+  %vecinit4 = shufflevector <4 x float> %vecinit2, <4 x float> %x, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+  ret <4 x float> %vecinit4
+}
+
+define <4 x float> @shuf_X0YC(<4 x float> %x, <4 x float> %a) {
+; CHECK-LABEL: shuf_X0YC:
+; CHECK: shufps
+; CHECK-NOT: movhlps
+; CHECK-NOT: shufps
+; CHECK: insertps    $176
+; CHECK: ret
+  %vecext = extractelement <4 x float> %x, i32 0
+  %vecinit = insertelement <4 x float> undef, float %vecext, i32 0
+  %vecinit1 = insertelement <4 x float> %vecinit, float 0.0, i32 1
+  %vecinit3 = shufflevector <4 x float> %vecinit1, <4 x float> %x, <4 x i32> <i32 0, i32 1, i32 5, i32 undef>
+  %vecinit5 = shufflevector <4 x float> %vecinit3, <4 x float> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+  ret <4 x float> %vecinit5
+}
+
+define <4 x i32> @i32_shuf_XYZ0(<4 x i32> %x, <4 x i32> %a) {
+; CHECK-LABEL: i32_shuf_XYZ0:
+; CHECK-NOT: pextrd
+; CHECK-NOT: punpckldq
+; CHECK: insertps    $8
+; CHECK: ret
+  %vecext = extractelement <4 x i32> %x, i32 0
+  %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
+  %vecext1 = extractelement <4 x i32> %x, i32 1
+  %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
+  %vecext3 = extractelement <4 x i32> %x, i32 2
+  %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2
+  %vecinit5 = insertelement <4 x i32> %vecinit4, i32 0, i32 3
+  ret <4 x i32> %vecinit5
+}
+
+define <4 x i32> @i32_shuf_XY00(<4 x i32> %x, <4 x i32> %a) {
+; CHECK-LABEL: i32_shuf_XY00:
+; CHECK-NOT: pextrd
+; CHECK-NOT: punpckldq
+; CHECK: insertps    $12
+; CHECK: ret
+  %vecext = extractelement <4 x i32> %x, i32 0
+  %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
+  %vecext1 = extractelement <4 x i32> %x, i32 1
+  %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
+  %vecinit3 = insertelement <4 x i32> %vecinit2, i32 0, i32 2
+  %vecinit4 = insertelement <4 x i32> %vecinit3, i32 0, i32 3
+  ret <4 x i32> %vecinit4
+}
+
+define <4 x i32> @i32_shuf_XYY0(<4 x i32> %x, <4 x i32> %a) {
+; CHECK-LABEL: i32_shuf_XYY0:
+; CHECK-NOT: pextrd
+; CHECK-NOT: punpckldq
+; CHECK: insertps    $104
+; CHECK: ret
+  %vecext = extractelement <4 x i32> %x, i32 0
+  %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
+  %vecext1 = extractelement <4 x i32> %x, i32 1
+  %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
+  %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext1, i32 2
+  %vecinit5 = insertelement <4 x i32> %vecinit4, i32 0, i32 3
+  ret <4 x i32> %vecinit5
+}
+
+define <4 x i32> @i32_shuf_XYW0(<4 x i32> %x, <4 x i32> %a) {
+; CHECK-LABEL: i32_shuf_XYW0:
+; CHECK-NOT: pextrd
+; CHECK-NOT: punpckldq
+; CHECK: insertps    $232
+; CHECK: ret
+  %vecext = extractelement <4 x i32> %x, i32 0
+  %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
+  %vecext1 = extractelement <4 x i32> %x, i32 1
+  %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
+  %vecext2 = extractelement <4 x i32> %x, i32 3
+  %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %vecext2, i32 2
+  %vecinit4 = insertelement <4 x i32> %vecinit3, i32 0, i32 3
+  ret <4 x i32> %vecinit4
+}
+
+define <4 x i32> @i32_shuf_W00W(<4 x i32> %x, <4 x i32> %a) {
+; CHECK-LABEL: i32_shuf_W00W:
+; CHECK-NOT: pextrd
+; CHECK-NOT: punpckldq
+; CHECK: insertps    $198
+; CHECK: ret
+  %vecext = extractelement <4 x i32> %x, i32 3
+  %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
+  %vecinit2 = insertelement <4 x i32> %vecinit, i32 0, i32 1
+  %vecinit3 = insertelement <4 x i32> %vecinit2, i32 0, i32 2
+  %vecinit4 = insertelement <4 x i32> %vecinit3, i32 %vecext, i32 3
+  ret <4 x i32> %vecinit4
+}
+
+define <4 x i32> @i32_shuf_X00A(<4 x i32> %x, <4 x i32> %a) {
+; CHECK-LABEL: i32_shuf_X00A:
+; CHECK-NOT: movaps
+; CHECK-NOT: shufps
+; CHECK: insertps    $48
+; CHECK: ret
+  %vecext = extractelement <4 x i32> %x, i32 0
+  %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
+  %vecinit1 = insertelement <4 x i32> %vecinit, i32 0, i32 1
+  %vecinit2 = insertelement <4 x i32> %vecinit1, i32 0, i32 2
+  %vecinit4 = shufflevector <4 x i32> %vecinit2, <4 x i32> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+  ret <4 x i32> %vecinit4
+}
+
+define <4 x i32> @i32_shuf_X00X(<4 x i32> %x, <4 x i32> %a) {
+; CHECK-LABEL: i32_shuf_X00X:
+; CHECK-NOT: movaps
+; CHECK-NOT: shufps
+; CHECK: insertps    $48
+; CHECK: ret
+  %vecext = extractelement <4 x i32> %x, i32 0
+  %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
+  %vecinit1 = insertelement <4 x i32> %vecinit, i32 0, i32 1
+  %vecinit2 = insertelement <4 x i32> %vecinit1, i32 0, i32 2
+  %vecinit4 = shufflevector <4 x i32> %vecinit2, <4 x i32> %x, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+  ret <4 x i32> %vecinit4
+}
+
+define <4 x i32> @i32_shuf_X0YC(<4 x i32> %x, <4 x i32> %a) {
+; CHECK-LABEL: i32_shuf_X0YC:
+; CHECK: shufps
+; CHECK-NOT: movhlps
+; CHECK-NOT: shufps
+; CHECK: insertps    $176
+; CHECK: ret
+  %vecext = extractelement <4 x i32> %x, i32 0
+  %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
+  %vecinit1 = insertelement <4 x i32> %vecinit, i32 0, i32 1
+  %vecinit3 = shufflevector <4 x i32> %vecinit1, <4 x i32> %x, <4 x i32> <i32 0, i32 1, i32 5, i32 undef>
+  %vecinit5 = shufflevector <4 x i32> %vecinit3, <4 x i32> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+  ret <4 x i32> %vecinit5
+}
+
+;; Test for a bug in the first implementation of LowerBuildVectorv4x32
+define < 4 x float> @test_insertps_no_undef(<4 x float> %x) {
+; CHECK-LABEL: test_insertps_no_undef:
+; CHECK: movaps  %xmm0, %xmm1
+; CHECK-NEXT: insertps        $8, %xmm1, %xmm1
+; CHECK-NEXT: maxps   %xmm1, %xmm0
+; CHECK-NEXT: ret
+  %vecext = extractelement <4 x float> %x, i32 0
+  %vecinit = insertelement <4 x float> undef, float %vecext, i32 0
+  %vecext1 = extractelement <4 x float> %x, i32 1
+  %vecinit2 = insertelement <4 x float> %vecinit, float %vecext1, i32 1
+  %vecext3 = extractelement <4 x float> %x, i32 2
+  %vecinit4 = insertelement <4 x float> %vecinit2, float %vecext3, i32 2
+  %vecinit5 = insertelement <4 x float> %vecinit4, float 0.0, i32 3
+  %mask = fcmp olt <4 x float> %vecinit5, %x
+  %res = select  <4 x i1> %mask, <4 x float> %x, <4 x float>%vecinit5
+  ret <4 x float> %res
+}
+
+define <8 x i16> @blendvb_fallback(<8 x i1> %mask, <8 x i16> %x, <8 x i16> %y) {
+; CHECK-LABEL: blendvb_fallback
+; CHECK: blendvb
+; CHECK: ret
+  %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %y
+  ret <8 x i16> %ret
+}
+
+define <4 x float> @insertps_from_vector_load(<4 x float> %a, <4 x float>* nocapture readonly %pb) {
+; CHECK-LABEL: insertps_from_vector_load:
+; On X32, account for the argument's move to registers
+; X32: movl    4(%esp), %eax
+; CHECK-NOT: mov
+; CHECK: insertps    $48
+; CHECK-NEXT: ret
+  %1 = load <4 x float>* %pb, align 16
+  %2 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %1, i32 48)
+  ret <4 x float> %2
+}
+
+;; Use a non-zero CountS for insertps
+define <4 x float> @insertps_from_vector_load_offset(<4 x float> %a, <4 x float>* nocapture readonly %pb) {
+; CHECK-LABEL: insertps_from_vector_load_offset:
+; On X32, account for the argument's move to registers
+; X32: movl    4(%esp), %eax
+; CHECK-NOT: mov
+;; Try to match a bit more of the instr, since we need the load's offset.
+; CHECK: insertps    $96, 4(%{{...}}), %
+; CHECK-NEXT: ret
+  %1 = load <4 x float>* %pb, align 16
+  %2 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %1, i32 96)
+  ret <4 x float> %2
+}
+
+define <4 x float> @insertps_from_vector_load_offset_2(<4 x float> %a, <4 x float>* nocapture readonly %pb, i64 %index) {
+; CHECK-LABEL: insertps_from_vector_load_offset_2:
+; On X32, account for the argument's move to registers
+; X32: movl    4(%esp), %eax
+; X32: movl    8(%esp), %ecx
+; CHECK-NOT: mov
+;; Try to match a bit more of the instr, since we need the load's offset.
+; CHECK: insertps    $192, 12(%{{...}},%{{...}}), %
+; CHECK-NEXT: ret
+  %1 = getelementptr inbounds <4 x float>* %pb, i64 %index
+  %2 = load <4 x float>* %1, align 16
+  %3 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %2, i32 192)
+  ret <4 x float> %3
+}
+
+define <4 x float> @insertps_from_broadcast_loadf32(<4 x float> %a, float* nocapture readonly %fb, i64 %index) {
+; CHECK-LABEL: insertps_from_broadcast_loadf32:
+; On X32, account for the arguments' move to registers
+; X32: movl    8(%esp), %eax
+; X32: movl    4(%esp), %ecx
+; CHECK-NOT: mov
+; CHECK: insertps    $48
+; CHECK-NEXT: ret
+  %1 = getelementptr inbounds float* %fb, i64 %index
+  %2 = load float* %1, align 4
+  %3 = insertelement <4 x float> undef, float %2, i32 0
+  %4 = insertelement <4 x float> %3, float %2, i32 1
+  %5 = insertelement <4 x float> %4, float %2, i32 2
+  %6 = insertelement <4 x float> %5, float %2, i32 3
+  %7 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %6, i32 48)
+  ret <4 x float> %7
+}
+
+define <4 x float> @insertps_from_broadcast_loadv4f32(<4 x float> %a, <4 x float>* nocapture readonly %b) {
+; CHECK-LABEL: insertps_from_broadcast_loadv4f32:
+; On X32, account for the arguments' move to registers
+; X32: movl    4(%esp), %{{...}}
+; CHECK-NOT: mov
+; CHECK: insertps    $48
+; CHECK-NEXT: ret
+  %1 = load <4 x float>* %b, align 4
+  %2 = extractelement <4 x float> %1, i32 0
+  %3 = insertelement <4 x float> undef, float %2, i32 0
+  %4 = insertelement <4 x float> %3, float %2, i32 1
+  %5 = insertelement <4 x float> %4, float %2, i32 2
+  %6 = insertelement <4 x float> %5, float %2, i32 3
+  %7 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %6, i32 48)
+  ret <4 x float> %7
+}
+
+;; FIXME: We're emitting an extraneous pshufd/vbroadcast.
+define <4 x float> @insertps_from_broadcast_multiple_use(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, float* nocapture readonly %fb, i64 %index) {
+; CHECK-LABEL: insertps_from_broadcast_multiple_use:
+; On X32, account for the arguments' move to registers
+; X32: movl    8(%esp), %eax
+; X32: movl    4(%esp), %ecx
+; CHECK: movss
+; CHECK-NOT: mov
+; CHECK: insertps    $48
+; CHECK: insertps    $48
+; CHECK: insertps    $48
+; CHECK: insertps    $48
+; CHECK: addps
+; CHECK: addps
+; CHECK: addps
+; CHECK-NEXT: ret
+  %1 = getelementptr inbounds float* %fb, i64 %index
+  %2 = load float* %1, align 4
+  %3 = insertelement <4 x float> undef, float %2, i32 0
+  %4 = insertelement <4 x float> %3, float %2, i32 1
+  %5 = insertelement <4 x float> %4, float %2, i32 2
+  %6 = insertelement <4 x float> %5, float %2, i32 3
+  %7 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %6, i32 48)
+  %8 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %b, <4 x float> %6, i32 48)
+  %9 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %c, <4 x float> %6, i32 48)
+  %10 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %d, <4 x float> %6, i32 48)
+  %11 = fadd <4 x float> %7, %8
+  %12 = fadd <4 x float> %9, %10
+  %13 = fadd <4 x float> %11, %12
+  ret <4 x float> %13
+}
diff --git a/test/CodeGen/X86/stack-protector-dbginfo.ll b/test/CodeGen/X86/stack-protector-dbginfo.ll
index fb7e2db..cf88ade 100644
--- a/test/CodeGen/X86/stack-protector-dbginfo.ll
+++ b/test/CodeGen/X86/stack-protector-dbginfo.ll
@@ -33,7 +33,7 @@ attributes #0 = { sspreq }
 !5 = metadata !{}
 !6 = metadata !{metadata !7}
 !7 = metadata !{i32 786472, metadata !"max_frame_size", i64 0} ; [ DW_TAG_enumerator ] [max_frame_size :: 0]
-!8 = metadata !{metadata !9}
+!8 = metadata !{metadata !9, metadata !24, metadata !41, metadata !65}
 !9 = metadata !{i32 786478, metadata !1, metadata !10, metadata !"read_response_size", metadata !"read_response_size", metadata !"_Z18read_response_sizev", i32 27, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @_Z18read_response_sizev, null, null, metadata !14, i32 27} ; [ DW_TAG_subprogram ] [line 27] [def] [read_response_size]
 !10 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/Users/matt/ryan_bug/<unknown>]
 !11 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
diff --git a/test/CodeGen/X86/stack-protector.ll b/test/CodeGen/X86/stack-protector.ll
index 265ec80..4db0f9a 100644
--- a/test/CodeGen/X86/stack-protector.ll
+++ b/test/CodeGen/X86/stack-protector.ll
@@ -16,13 +16,14 @@
 %struct.anon.0 = type { %union.anon.1 }
 %union.anon.1 = type { [2 x i8] }
 %struct.small = type { i8 }
+%struct.small_char = type { i32, [5 x i8] }
 
 @.str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
 
 ; test1a: array of [16 x i8] 
 ;         no ssp attribute
 ; Requires no protector.
-define void @test1a(i8* %a) nounwind uwtable {
+define void @test1a(i8* %a) {
 entry:
 ; LINUX-I386-LABEL: test1a:
 ; LINUX-I386-NOT: calll __stack_chk_fail
@@ -53,7 +54,8 @@ entry:
 ; test1b: array of [16 x i8] 
 ;         ssp attribute
 ; Requires protector.
-define void @test1b(i8* %a) nounwind uwtable ssp {
+; Function Attrs: ssp
+define void @test1b(i8* %a) #0 {
 entry:
 ; LINUX-I386-LABEL: test1b:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -88,7 +90,8 @@ entry:
 ; test1c: array of [16 x i8] 
 ;         sspstrong attribute
 ; Requires protector.
-define void @test1c(i8* %a) nounwind uwtable sspstrong {
+; Function Attrs: sspstrong 
+define void @test1c(i8* %a) #1 {
 entry:
 ; LINUX-I386-LABEL: test1c:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -119,7 +122,8 @@ entry:
 ; test1d: array of [16 x i8] 
 ;         sspreq attribute
 ; Requires protector.
-define void @test1d(i8* %a) nounwind uwtable sspreq {
+; Function Attrs: sspreq 
+define void @test1d(i8* %a) #2 {
 entry:
 ; LINUX-I386-LABEL: test1d:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -150,7 +154,7 @@ entry:
 ; test2a: struct { [16 x i8] }
 ;         no ssp attribute
 ; Requires no protector.
-define void @test2a(i8* %a) nounwind uwtable {
+define void @test2a(i8* %a) {
 entry:
 ; LINUX-I386-LABEL: test2a:
 ; LINUX-I386-NOT: calll __stack_chk_fail
@@ -183,7 +187,8 @@ entry:
 ; test2b: struct { [16 x i8] }
 ;          ssp attribute
 ; Requires protector.
-define void @test2b(i8* %a) nounwind uwtable ssp {
+; Function Attrs: ssp
+define void @test2b(i8* %a) #0 {
 entry:
 ; LINUX-I386-LABEL: test2b:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -216,7 +221,8 @@ entry:
 ; test2c: struct { [16 x i8] }
 ;          sspstrong attribute
 ; Requires protector.
-define void @test2c(i8* %a) nounwind uwtable sspstrong {
+; Function Attrs: sspstrong 
+define void @test2c(i8* %a) #1 {
 entry:
 ; LINUX-I386-LABEL: test2c:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -249,7 +255,8 @@ entry:
 ; test2d: struct { [16 x i8] }
 ;          sspreq attribute
 ; Requires protector.
-define void @test2d(i8* %a) nounwind uwtable sspreq {
+; Function Attrs: sspreq 
+define void @test2d(i8* %a) #2 {
 entry:
 ; LINUX-I386-LABEL: test2d:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -282,7 +289,7 @@ entry:
 ; test3a:  array of [4 x i8]
 ;          no ssp attribute
 ; Requires no protector.
-define void @test3a(i8* %a) nounwind uwtable {
+define void @test3a(i8* %a) {
 entry:
 ; LINUX-I386-LABEL: test3a:
 ; LINUX-I386-NOT: calll __stack_chk_fail
@@ -313,7 +320,8 @@ entry:
 ; test3b:  array [4 x i8]
 ;          ssp attribute
 ; Requires no protector.
-define void @test3b(i8* %a) nounwind uwtable ssp {
+; Function Attrs: ssp
+define void @test3b(i8* %a) #0 {
 entry:
 ; LINUX-I386-LABEL: test3b:
 ; LINUX-I386-NOT: calll __stack_chk_fail
@@ -344,7 +352,8 @@ entry:
 ; test3c:  array of [4 x i8]
 ;          sspstrong attribute
 ; Requires protector.
-define void @test3c(i8* %a) nounwind uwtable sspstrong {
+; Function Attrs: sspstrong 
+define void @test3c(i8* %a) #1 {
 entry:
 ; LINUX-I386-LABEL: test3c:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -375,7 +384,8 @@ entry:
 ; test3d:  array of [4 x i8]
 ;          sspreq attribute
 ; Requires protector.
-define void @test3d(i8* %a) nounwind uwtable sspreq {
+; Function Attrs: sspreq 
+define void @test3d(i8* %a) #2 {
 entry:
 ; LINUX-I386-LABEL: test3d:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -406,7 +416,7 @@ entry:
 ; test4a:  struct { [4 x i8] }
 ;          no ssp attribute
 ; Requires no protector.
-define void @test4a(i8* %a) nounwind uwtable {
+define void @test4a(i8* %a) {
 entry:
 ; LINUX-I386-LABEL: test4a:
 ; LINUX-I386-NOT: calll __stack_chk_fail
@@ -439,7 +449,8 @@ entry:
 ; test4b:  struct { [4 x i8] }
 ;          ssp attribute
 ; Requires no protector.
-define void @test4b(i8* %a) nounwind uwtable ssp {
+; Function Attrs: ssp
+define void @test4b(i8* %a) #0 {
 entry:
 ; LINUX-I386-LABEL: test4b:
 ; LINUX-I386-NOT: calll __stack_chk_fail
@@ -472,7 +483,8 @@ entry:
 ; test4c:  struct { [4 x i8] }
 ;          sspstrong attribute
 ; Requires protector.
-define void @test4c(i8* %a) nounwind uwtable sspstrong {
+; Function Attrs: sspstrong 
+define void @test4c(i8* %a) #1 {
 entry:
 ; LINUX-I386-LABEL: test4c:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -505,7 +517,8 @@ entry:
 ; test4d:  struct { [4 x i8] }
 ;          sspreq attribute
 ; Requires protector.
-define void @test4d(i8* %a) nounwind uwtable sspreq {
+; Function Attrs: sspreq 
+define void @test4d(i8* %a) #2 {
 entry:
 ; LINUX-I386-LABEL: test4d:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -538,7 +551,7 @@ entry:
 ; test5a:  no arrays / no nested arrays
 ;          no ssp attribute
 ; Requires no protector.
-define void @test5a(i8* %a) nounwind uwtable {
+define void @test5a(i8* %a) {
 entry:
 ; LINUX-I386-LABEL: test5a:
 ; LINUX-I386-NOT: calll __stack_chk_fail
@@ -565,7 +578,8 @@ entry:
 ; test5b:  no arrays / no nested arrays
 ;          ssp attribute
 ; Requires no protector.
-define void @test5b(i8* %a) nounwind uwtable ssp {
+; Function Attrs: ssp
+define void @test5b(i8* %a) #0 {
 entry:
 ; LINUX-I386-LABEL: test5b:
 ; LINUX-I386-NOT: calll __stack_chk_fail
@@ -592,7 +606,8 @@ entry:
 ; test5c:  no arrays / no nested arrays
 ;          sspstrong attribute
 ; Requires no protector.
-define void @test5c(i8* %a) nounwind uwtable sspstrong {
+; Function Attrs: sspstrong 
+define void @test5c(i8* %a) #1 {
 entry:
 ; LINUX-I386-LABEL: test5c:
 ; LINUX-I386-NOT: calll __stack_chk_fail
@@ -619,7 +634,8 @@ entry:
 ; test5d:  no arrays / no nested arrays
 ;          sspreq attribute
 ; Requires protector.
-define void @test5d(i8* %a) nounwind uwtable sspreq {
+; Function Attrs: sspreq 
+define void @test5d(i8* %a) #2 {
 entry:
 ; LINUX-I386-LABEL: test5d:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -646,7 +662,7 @@ entry:
 ; test6a:  Address-of local taken (j = &a)
 ;          no ssp attribute
 ; Requires no protector.
-define void @test6a() nounwind uwtable {
+define void @test6a() {
 entry:
 ; LINUX-I386-LABEL: test6a:
 ; LINUX-I386-NOT: calll __stack_chk_fail
@@ -677,7 +693,8 @@ entry:
 ; test6b:  Address-of local taken (j = &a)
 ;          ssp attribute
 ; Requires no protector.
-define void @test6b() nounwind uwtable ssp {
+; Function Attrs: ssp
+define void @test6b() #0 {
 entry:
 ; LINUX-I386-LABEL: test6b:
 ; LINUX-I386-NOT: calll __stack_chk_fail
@@ -708,7 +725,8 @@ entry:
 ; test6c:  Address-of local taken (j = &a)
 ;          sspstrong attribute
 ; Requires protector.
-define void @test6c() nounwind uwtable sspstrong {
+; Function Attrs: sspstrong 
+define void @test6c() #1 {
 entry:
 ; LINUX-I386-LABEL: test6c:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -739,7 +757,8 @@ entry:
 ; test6d:  Address-of local taken (j = &a)
 ;          sspreq attribute
 ; Requires protector.
-define void @test6d() nounwind uwtable sspreq {
+; Function Attrs: sspreq 
+define void @test6d() #2 {
 entry:
 ; LINUX-I386-LABEL: test6d:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -770,7 +789,7 @@ entry:
 ; test7a:  PtrToInt Cast
 ;          no ssp attribute
 ; Requires no protector.
-define void @test7a() nounwind uwtable readnone {
+define void @test7a()  {
 entry:
 ; LINUX-I386-LABEL: test7a:
 ; LINUX-I386-NOT: calll __stack_chk_fail
@@ -796,7 +815,8 @@ entry:
 ; test7b:  PtrToInt Cast
 ;          ssp attribute
 ; Requires no protector.
-define void @test7b() nounwind uwtable readnone ssp {
+; Function Attrs: ssp 
+define void @test7b() #0 {
 entry:
 ; LINUX-I386-LABEL: test7b:
 ; LINUX-I386-NOT: calll __stack_chk_fail
@@ -822,7 +842,8 @@ entry:
 ; test7c:  PtrToInt Cast
 ;          sspstrong attribute
 ; Requires protector.
-define void @test7c() nounwind uwtable readnone sspstrong {
+; Function Attrs: sspstrong 
+define void @test7c() #1 {
 entry:
 ; LINUX-I386-LABEL: test7c:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -848,7 +869,8 @@ entry:
 ; test7d:  PtrToInt Cast
 ;          sspreq attribute
 ; Requires protector.
-define void @test7d() nounwind uwtable readnone sspreq {
+; Function Attrs: sspreq 
+define void @test7d() #2 {
 entry:
 ; LINUX-I386-LABEL: test7d:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -874,7 +896,7 @@ entry:
 ; test8a:  Passing addr-of to function call
 ;          no ssp attribute
 ; Requires no protector.
-define void @test8a() nounwind uwtable {
+define void @test8a() {
 entry:
 ; LINUX-I386-LABEL: test8a:
 ; LINUX-I386-NOT: calll __stack_chk_fail
@@ -892,14 +914,15 @@ entry:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %b = alloca i32, align 4
-  call void @funcall(i32* %b) nounwind
+  call void @funcall(i32* %b)
   ret void
 }
 
 ; test8b:  Passing addr-of to function call
 ;          ssp attribute
 ; Requires no protector.
-define void @test8b() nounwind uwtable ssp {
+; Function Attrs: ssp
+define void @test8b() #0 {
 entry:
 ; LINUX-I386-LABEL: test8b:
 ; LINUX-I386-NOT: calll __stack_chk_fail
@@ -917,14 +940,15 @@ entry:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %b = alloca i32, align 4
-  call void @funcall(i32* %b) nounwind
+  call void @funcall(i32* %b)
   ret void
 }
 
 ; test8c:  Passing addr-of to function call
 ;          sspstrong attribute
 ; Requires protector.
-define void @test8c() nounwind uwtable sspstrong {
+; Function Attrs: sspstrong 
+define void @test8c() #1 {
 entry:
 ; LINUX-I386-LABEL: test8c:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -942,14 +966,15 @@ entry:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %b = alloca i32, align 4
-  call void @funcall(i32* %b) nounwind
+  call void @funcall(i32* %b)
   ret void
 }
 
 ; test8d:  Passing addr-of to function call
 ;          sspreq attribute
 ; Requires protector.
-define void @test8d() nounwind uwtable sspreq {
+; Function Attrs: sspreq 
+define void @test8d() #2 {
 entry:
 ; LINUX-I386-LABEL: test8d:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -967,14 +992,14 @@ entry:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %b = alloca i32, align 4
-  call void @funcall(i32* %b) nounwind
+  call void @funcall(i32* %b)
   ret void
 }
 
 ; test9a:  Addr-of in select instruction
 ;          no ssp attribute
 ; Requires no protector.
-define void @test9a() nounwind uwtable {
+define void @test9a() {
 entry:
 ; LINUX-I386-LABEL: test9a:
 ; LINUX-I386-NOT: calll __stack_chk_fail
@@ -992,7 +1017,7 @@ entry:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %x = alloca double, align 8
-  %call = call double @testi_aux() nounwind
+  %call = call double @testi_aux()
   store double %call, double* %x, align 8
   %cmp2 = fcmp ogt double %call, 0.000000e+00
   %y.1 = select i1 %cmp2, double* %x, double* null
@@ -1003,7 +1028,8 @@ entry:
 ; test9b:  Addr-of in select instruction
 ;          ssp attribute
 ; Requires no protector.
-define void @test9b() nounwind uwtable ssp {
+; Function Attrs: ssp
+define void @test9b() #0 {
 entry:
 ; LINUX-I386-LABEL: test9b:
 ; LINUX-I386-NOT: calll __stack_chk_fail
@@ -1021,7 +1047,7 @@ entry:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %x = alloca double, align 8
-  %call = call double @testi_aux() nounwind
+  %call = call double @testi_aux()
   store double %call, double* %x, align 8
   %cmp2 = fcmp ogt double %call, 0.000000e+00
   %y.1 = select i1 %cmp2, double* %x, double* null
@@ -1032,7 +1058,8 @@ entry:
 ; test9c:  Addr-of in select instruction
 ;          sspstrong attribute
 ; Requires protector.
-define void @test9c() nounwind uwtable sspstrong {
+; Function Attrs: sspstrong 
+define void @test9c() #1 {
 entry:
 ; LINUX-I386-LABEL: test9c:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -1050,7 +1077,7 @@ entry:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %x = alloca double, align 8
-  %call = call double @testi_aux() nounwind
+  %call = call double @testi_aux()
   store double %call, double* %x, align 8
   %cmp2 = fcmp ogt double %call, 0.000000e+00
   %y.1 = select i1 %cmp2, double* %x, double* null
@@ -1061,7 +1088,8 @@ entry:
 ; test9d:  Addr-of in select instruction
 ;          sspreq attribute
 ; Requires protector.
-define void @test9d() nounwind uwtable sspreq {
+; Function Attrs: sspreq 
+define void @test9d() #2 {
 entry:
 ; LINUX-I386-LABEL: test9d:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -1079,7 +1107,7 @@ entry:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %x = alloca double, align 8
-  %call = call double @testi_aux() nounwind
+  %call = call double @testi_aux()
   store double %call, double* %x, align 8
   %cmp2 = fcmp ogt double %call, 0.000000e+00
   %y.1 = select i1 %cmp2, double* %x, double* null
@@ -1090,7 +1118,7 @@ entry:
 ; test10a: Addr-of in phi instruction
 ;          no ssp attribute
 ; Requires no protector.
-define void @test10a() nounwind uwtable {
+define void @test10a() {
 entry:
 ; LINUX-I386-LABEL: test10a:
 ; LINUX-I386-NOT: calll __stack_chk_fail
@@ -1108,13 +1136,13 @@ entry:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %x = alloca double, align 8
-  %call = call double @testi_aux() nounwind
+  %call = call double @testi_aux()
   store double %call, double* %x, align 8
   %cmp = fcmp ogt double %call, 3.140000e+00
   br i1 %cmp, label %if.then, label %if.else
 
 if.then:                                          ; preds = %entry
-  %call1 = call double @testi_aux() nounwind
+  %call1 = call double @testi_aux()
   store double %call1, double* %x, align 8
   br label %if.end4
 
@@ -1127,14 +1155,15 @@ if.then3:                                         ; preds = %if.else
 
 if.end4:                                          ; preds = %if.else, %if.then3, %if.then
   %y.0 = phi double* [ null, %if.then ], [ %x, %if.then3 ], [ null, %if.else ]
-  %call5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), double* %y.0) nounwind
+  %call5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), double* %y.0)
   ret void
 }
 
 ; test10b: Addr-of in phi instruction
 ;          ssp attribute
 ; Requires no protector.
-define void @test10b() nounwind uwtable ssp {
+; Function Attrs: ssp
+define void @test10b() #0 {
 entry:
 ; LINUX-I386-LABEL: test10b:
 ; LINUX-I386-NOT: calll __stack_chk_fail
@@ -1152,13 +1181,13 @@ entry:
 ; DARWIN-X64-NOT: callq ___stack_chk_fail
 ; DARWIN-X64: .cfi_endproc
   %x = alloca double, align 8
-  %call = call double @testi_aux() nounwind
+  %call = call double @testi_aux()
   store double %call, double* %x, align 8
   %cmp = fcmp ogt double %call, 3.140000e+00
   br i1 %cmp, label %if.then, label %if.else
 
 if.then:                                          ; preds = %entry
-  %call1 = call double @testi_aux() nounwind
+  %call1 = call double @testi_aux()
   store double %call1, double* %x, align 8
   br label %if.end4
 
@@ -1171,14 +1200,15 @@ if.then3:                                         ; preds = %if.else
 
 if.end4:                                          ; preds = %if.else, %if.then3, %if.then
   %y.0 = phi double* [ null, %if.then ], [ %x, %if.then3 ], [ null, %if.else ]
-  %call5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), double* %y.0) nounwind
+  %call5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), double* %y.0)
   ret void
 }
 
 ; test10c: Addr-of in phi instruction
 ;          sspstrong attribute
 ; Requires protector.
-define void @test10c() nounwind uwtable sspstrong {
+; Function Attrs: sspstrong 
+define void @test10c() #1 {
 entry:
 ; LINUX-I386-LABEL: test10c:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -1196,13 +1226,13 @@ entry:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %x = alloca double, align 8
-  %call = call double @testi_aux() nounwind
+  %call = call double @testi_aux()
   store double %call, double* %x, align 8
   %cmp = fcmp ogt double %call, 3.140000e+00
   br i1 %cmp, label %if.then, label %if.else
 
 if.then:                                          ; preds = %entry
-  %call1 = call double @testi_aux() nounwind
+  %call1 = call double @testi_aux()
   store double %call1, double* %x, align 8
   br label %if.end4
 
@@ -1215,14 +1245,15 @@ if.then3:                                         ; preds = %if.else
 
 if.end4:                                          ; preds = %if.else, %if.then3, %if.then
   %y.0 = phi double* [ null, %if.then ], [ %x, %if.then3 ], [ null, %if.else ]
-  %call5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), double* %y.0) nounwind
+  %call5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), double* %y.0)
   ret void
 }
 
 ; test10d: Addr-of in phi instruction
 ;          sspreq attribute
 ; Requires protector.
-define void @test10d() nounwind uwtable sspreq {
+; Function Attrs: sspreq 
+define void @test10d() #2 {
 entry:
 ; LINUX-I386-LABEL: test10d:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -1240,13 +1271,13 @@ entry:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %x = alloca double, align 8
-  %call = call double @testi_aux() nounwind
+  %call = call double @testi_aux()
   store double %call, double* %x, align 8
   %cmp = fcmp ogt double %call, 3.140000e+00
   br i1 %cmp, label %if.then, label %if.else
 
 if.then:                                          ; preds = %entry
-  %call1 = call double @testi_aux() nounwind
+  %call1 = call double @testi_aux()
   store double %call1, double* %x, align 8
   br label %if.end4
 
@@ -1259,14 +1290,14 @@ if.then3:                                         ; preds = %if.else
 
 if.end4:                                          ; preds = %if.else, %if.then3, %if.then
   %y.0 = phi double* [ null, %if.then ], [ %x, %if.then3 ], [ null, %if.else ]
-  %call5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), double* %y.0) nounwind
+  %call5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), double* %y.0)
   ret void
 }
 
 ; test11a: Addr-of struct element. (GEP followed by store).
 ;          no ssp attribute
 ; Requires no protector.
-define void @test11a() nounwind uwtable {
+define void @test11a() {
 entry:
 ; LINUX-I386-LABEL: test11a:
 ; LINUX-I386-NOT: calll __stack_chk_fail
@@ -1295,7 +1326,8 @@ entry:
 ; test11b: Addr-of struct element. (GEP followed by store).
 ;          ssp attribute
 ; Requires no protector.
-define void @test11b() nounwind uwtable ssp {
+; Function Attrs: ssp
+define void @test11b() #0 {
 entry:
 ; LINUX-I386-LABEL: test11b:
 ; LINUX-I386-NOT: calll __stack_chk_fail
@@ -1324,7 +1356,8 @@ entry:
 ; test11c: Addr-of struct element. (GEP followed by store).
 ;          sspstrong attribute
 ; Requires protector.
-define void @test11c() nounwind uwtable sspstrong {
+; Function Attrs: sspstrong 
+define void @test11c() #1 {
 entry:
 ; LINUX-I386-LABEL: test11c:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -1353,7 +1386,8 @@ entry:
 ; test11d: Addr-of struct element. (GEP followed by store).
 ;          sspreq attribute
 ; Requires protector.
-define void @test11d() nounwind uwtable sspreq {
+; Function Attrs: sspreq 
+define void @test11d() #2 {
 entry:
 ; LINUX-I386-LABEL: test11d:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -1382,7 +1416,7 @@ entry:
 ; test12a: Addr-of struct element, GEP followed by ptrtoint.
 ;          no ssp attribute
 ; Requires no protector.
-define void @test12a() nounwind uwtable {
+define void @test12a() {
 entry:
 ; LINUX-I386-LABEL: test12a:
 ; LINUX-I386-NOT: calll __stack_chk_fail
@@ -1410,7 +1444,8 @@ entry:
 ; test12b: Addr-of struct element, GEP followed by ptrtoint.
 ;          ssp attribute
 ; Requires no protector.
-define void @test12b() nounwind uwtable ssp {
+; Function Attrs: ssp
+define void @test12b() #0 {
 entry:
 ; LINUX-I386-LABEL: test12b:
 ; LINUX-I386-NOT: calll __stack_chk_fail
@@ -1437,8 +1472,8 @@ entry:
 
 ; test12c: Addr-of struct element, GEP followed by ptrtoint.
 ;          sspstrong attribute
-; Requires protector.
-define void @test12c() nounwind uwtable sspstrong {
+; Function Attrs: sspstrong 
+define void @test12c() #1 {
 entry:
 ; LINUX-I386-LABEL: test12c:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -1466,7 +1501,8 @@ entry:
 ; test12d: Addr-of struct element, GEP followed by ptrtoint.
 ;          sspreq attribute
 ; Requires protector.
-define void @test12d() nounwind uwtable sspreq {
+; Function Attrs: sspreq 
+define void @test12d() #2 {
 entry:
 ; LINUX-I386-LABEL: test12d:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -1494,7 +1530,7 @@ entry:
 ; test13a: Addr-of struct element, GEP followed by callinst.
 ;          no ssp attribute
 ; Requires no protector.
-define void @test13a() nounwind uwtable {
+define void @test13a() {
 entry:
 ; LINUX-I386-LABEL: test13a:
 ; LINUX-I386-NOT: calll __stack_chk_fail
@@ -1513,14 +1549,15 @@ entry:
 ; DARWIN-X64: .cfi_endproc
   %c = alloca %struct.pair, align 4
   %y = getelementptr inbounds %struct.pair* %c, i64 0, i32 1
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32* %y) nounwind
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32* %y)
   ret void
 }
 
 ; test13b: Addr-of struct element, GEP followed by callinst.
 ;          ssp attribute
 ; Requires no protector.
-define void @test13b() nounwind uwtable ssp {
+; Function Attrs: ssp
+define void @test13b() #0 {
 entry:
 ; LINUX-I386-LABEL: test13b:
 ; LINUX-I386-NOT: calll __stack_chk_fail
@@ -1539,14 +1576,15 @@ entry:
 ; DARWIN-X64: .cfi_endproc
   %c = alloca %struct.pair, align 4
   %y = getelementptr inbounds %struct.pair* %c, i64 0, i32 1
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32* %y) nounwind
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32* %y)
   ret void
 }
 
 ; test13c: Addr-of struct element, GEP followed by callinst.
 ;          sspstrong attribute
 ; Requires protector.
-define void @test13c() nounwind uwtable sspstrong {
+; Function Attrs: sspstrong 
+define void @test13c() #1 {
 entry:
 ; LINUX-I386-LABEL: test13c:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -1565,14 +1603,15 @@ entry:
 ; DARWIN-X64: callq ___stack_chk_fail
   %c = alloca %struct.pair, align 4
   %y = getelementptr inbounds %struct.pair* %c, i64 0, i32 1
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32* %y) nounwind
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32* %y)
   ret void
 }
 
 ; test13d: Addr-of struct element, GEP followed by callinst.
 ;          sspreq attribute
 ; Requires protector.
-define void @test13d() nounwind uwtable sspreq {
+; Function Attrs: sspreq 
+define void @test13d() #2 {
 entry:
 ; LINUX-I386-LABEL: test13d:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -1591,14 +1630,14 @@ entry:
 ; DARWIN-X64: callq ___stack_chk_fail
   %c = alloca %struct.pair, align 4
   %y = getelementptr inbounds %struct.pair* %c, i64 0, i32 1
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32* %y) nounwind
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32* %y)
   ret void
 }
 
 ; test14a: Addr-of a local, optimized into a GEP (e.g., &a - 12)
 ;          no ssp attribute
 ; Requires no protector.
-define void @test14a() nounwind uwtable {
+define void @test14a() {
 entry:
 ; LINUX-I386-LABEL: test14a:
 ; LINUX-I386-NOT: calll __stack_chk_fail
@@ -1617,14 +1656,15 @@ entry:
 ; DARWIN-X64: .cfi_endproc
   %a = alloca i32, align 4
   %add.ptr5 = getelementptr inbounds i32* %a, i64 -12
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32* %add.ptr5) nounwind
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32* %add.ptr5)
   ret void
 }
 
 ; test14b: Addr-of a local, optimized into a GEP (e.g., &a - 12)
 ;          ssp attribute
 ; Requires no protector.
-define void @test14b() nounwind uwtable ssp {
+; Function Attrs: ssp
+define void @test14b() #0 {
 entry:
 ; LINUX-I386-LABEL: test14b:
 ; LINUX-I386-NOT: calll __stack_chk_fail
@@ -1643,14 +1683,15 @@ entry:
 ; DARWIN-X64: .cfi_endproc
   %a = alloca i32, align 4
   %add.ptr5 = getelementptr inbounds i32* %a, i64 -12
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32* %add.ptr5) nounwind
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32* %add.ptr5)
   ret void
 }
 
 ; test14c: Addr-of a local, optimized into a GEP (e.g., &a - 12)
 ;          sspstrong attribute
 ; Requires protector.
-define void @test14c() nounwind uwtable sspstrong {
+; Function Attrs: sspstrong 
+define void @test14c() #1 {
 entry:
 ; LINUX-I386-LABEL: test14c:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -1669,14 +1710,15 @@ entry:
 ; DARWIN-X64: callq ___stack_chk_fail
   %a = alloca i32, align 4
   %add.ptr5 = getelementptr inbounds i32* %a, i64 -12
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32* %add.ptr5) nounwind
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32* %add.ptr5)
   ret void
 }
 
 ; test14d: Addr-of a local, optimized into a GEP (e.g., &a - 12)
 ;          sspreq  attribute
 ; Requires protector.
-define void @test14d() nounwind uwtable sspreq {
+; Function Attrs: sspreq 
+define void @test14d() #2 {
 entry:
 ; LINUX-I386-LABEL: test14d:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -1695,7 +1737,7 @@ entry:
 ; DARWIN-X64: callq ___stack_chk_fail
   %a = alloca i32, align 4
   %add.ptr5 = getelementptr inbounds i32* %a, i64 -12
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32* %add.ptr5) nounwind
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32* %add.ptr5)
   ret void
 }
 
@@ -1703,7 +1745,7 @@ entry:
 ;           (e.g., int a; ... ; float *b = &a;)
 ;          no ssp attribute
 ; Requires no protector.
-define void @test15a() nounwind uwtable {
+define void @test15a() {
 entry:
 ; LINUX-I386-LABEL: test15a:
 ; LINUX-I386-NOT: calll __stack_chk_fail
@@ -1734,7 +1776,8 @@ entry:
 ;           (e.g., int a; ... ; float *b = &a;)
 ;          ssp attribute
 ; Requires no protector.
-define void @test15b() nounwind uwtable ssp {
+; Function Attrs: ssp
+define void @test15b() #0 {
 entry:
 ; LINUX-I386-LABEL: test15b:
 ; LINUX-I386-NOT: calll __stack_chk_fail
@@ -1765,7 +1808,8 @@ entry:
 ;           (e.g., int a; ... ; float *b = &a;)
 ;          sspstrong attribute
 ; Requires protector.
-define void @test15c() nounwind uwtable sspstrong {
+; Function Attrs: sspstrong 
+define void @test15c() #1 {
 entry:
 ; LINUX-I386-LABEL: test15c:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -1796,7 +1840,8 @@ entry:
 ;           (e.g., int a; ... ; float *b = &a;)
 ;          sspreq attribute
 ; Requires protector.
-define void @test15d() nounwind uwtable sspreq {
+; Function Attrs: sspreq 
+define void @test15d() #2 {
 entry:
 ; LINUX-I386-LABEL: test15d:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -1827,7 +1872,7 @@ entry:
 ;           (e.g., int a; ... ; float *b = &a;)
 ;          no ssp attribute
 ; Requires no protector.
-define void @test16a() nounwind uwtable {
+define void @test16a() {
 entry:
 ; LINUX-I386-LABEL: test16a:
 ; LINUX-I386-NOT: calll __stack_chk_fail
@@ -1847,7 +1892,7 @@ entry:
   %a = alloca i32, align 4
   store i32 0, i32* %a, align 4
   %0 = bitcast i32* %a to float*
-  call void @funfloat(float* %0) nounwind
+  call void @funfloat(float* %0)
   ret void
 }
 
@@ -1855,7 +1900,8 @@ entry:
 ;           (e.g., int a; ... ; float *b = &a;)
 ;          ssp attribute
 ; Requires no protector.
-define void @test16b() nounwind uwtable ssp {
+; Function Attrs: ssp
+define void @test16b() #0 {
 entry:
 ; LINUX-I386-LABEL: test16b:
 ; LINUX-I386-NOT: calll __stack_chk_fail
@@ -1875,7 +1921,7 @@ entry:
   %a = alloca i32, align 4
   store i32 0, i32* %a, align 4
   %0 = bitcast i32* %a to float*
-  call void @funfloat(float* %0) nounwind
+  call void @funfloat(float* %0)
   ret void
 }
 
@@ -1883,7 +1929,8 @@ entry:
 ;           (e.g., int a; ... ; float *b = &a;)
 ;          sspstrong attribute
 ; Requires protector.
-define void @test16c() nounwind uwtable sspstrong {
+; Function Attrs: sspstrong 
+define void @test16c() #1 {
 entry:
 ; LINUX-I386-LABEL: test16c:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -1903,7 +1950,7 @@ entry:
   %a = alloca i32, align 4
   store i32 0, i32* %a, align 4
   %0 = bitcast i32* %a to float*
-  call void @funfloat(float* %0) nounwind
+  call void @funfloat(float* %0)
   ret void
 }
 
@@ -1911,7 +1958,8 @@ entry:
 ;           (e.g., int a; ... ; float *b = &a;)
 ;          sspreq attribute
 ; Requires protector.
-define void @test16d() nounwind uwtable sspreq {
+; Function Attrs: sspreq 
+define void @test16d() #2 {
 entry:
 ; LINUX-I386-LABEL: test16d:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -1931,14 +1979,14 @@ entry:
   %a = alloca i32, align 4
   store i32 0, i32* %a, align 4
   %0 = bitcast i32* %a to float*
-  call void @funfloat(float* %0) nounwind
+  call void @funfloat(float* %0)
   ret void
 }
 
 ; test17a: Addr-of a vector nested in a struct
 ;          no ssp attribute
 ; Requires no protector.
-define void @test17a() nounwind uwtable {
+define void @test17a() {
 entry:
 ; LINUX-I386-LABEL: test17a:
 ; LINUX-I386-NOT: calll __stack_chk_fail
@@ -1958,14 +2006,15 @@ entry:
   %c = alloca %struct.vec, align 16
   %y = getelementptr inbounds %struct.vec* %c, i64 0, i32 0
   %add.ptr = getelementptr inbounds <4 x i32>* %y, i64 -12
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), <4 x i32>* %add.ptr) nounwind
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), <4 x i32>* %add.ptr)
   ret void
 }
 
 ; test17b: Addr-of a vector nested in a struct
 ;          ssp attribute
 ; Requires no protector.
-define void @test17b() nounwind uwtable ssp {
+; Function Attrs: ssp
+define void @test17b() #0 {
 entry:
 ; LINUX-I386-LABEL: test17b:
 ; LINUX-I386-NOT: calll __stack_chk_fail
@@ -1985,14 +2034,15 @@ entry:
   %c = alloca %struct.vec, align 16
   %y = getelementptr inbounds %struct.vec* %c, i64 0, i32 0
   %add.ptr = getelementptr inbounds <4 x i32>* %y, i64 -12
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), <4 x i32>* %add.ptr) nounwind
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), <4 x i32>* %add.ptr)
   ret void
 }
 
 ; test17c: Addr-of a vector nested in a struct
 ;          sspstrong attribute
 ; Requires protector.
-define void @test17c() nounwind uwtable sspstrong {
+; Function Attrs: sspstrong 
+define void @test17c() #1 {
 entry:
 ; LINUX-I386-LABEL: test17c:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -2012,14 +2062,15 @@ entry:
   %c = alloca %struct.vec, align 16
   %y = getelementptr inbounds %struct.vec* %c, i64 0, i32 0
   %add.ptr = getelementptr inbounds <4 x i32>* %y, i64 -12
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), <4 x i32>* %add.ptr) nounwind
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), <4 x i32>* %add.ptr)
   ret void
 }
 
 ; test17d: Addr-of a vector nested in a struct
 ;          sspreq attribute
 ; Requires protector.
-define void @test17d() nounwind uwtable sspreq {
+; Function Attrs: sspreq 
+define void @test17d() #2 {
 entry:
 ; LINUX-I386-LABEL: test17d:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -2039,14 +2090,14 @@ entry:
   %c = alloca %struct.vec, align 16
   %y = getelementptr inbounds %struct.vec* %c, i64 0, i32 0
   %add.ptr = getelementptr inbounds <4 x i32>* %y, i64 -12
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), <4 x i32>* %add.ptr) nounwind
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), <4 x i32>* %add.ptr)
   ret void
 }
 
 ; test18a: Addr-of a variable passed into an invoke instruction.
 ;          no ssp attribute
 ; Requires no protector.
-define i32 @test18a() uwtable {
+define i32 @test18a()  {
 entry:
 ; LINUX-I386-LABEL: test18a:
 ; LINUX-I386-NOT: calll __stack_chk_fail
@@ -2082,7 +2133,8 @@ lpad:
 ; test18b: Addr-of a variable passed into an invoke instruction.
 ;          ssp attribute
 ; Requires no protector.
-define i32 @test18b() uwtable ssp {
+; Function Attrs: ssp 
+define i32 @test18b() #0 {
 entry:
 ; LINUX-I386-LABEL: test18b:
 ; LINUX-I386-NOT: calll __stack_chk_fail
@@ -2118,7 +2170,8 @@ lpad:
 ; test18c: Addr-of a variable passed into an invoke instruction.
 ;          sspstrong attribute
 ; Requires protector.
-define i32 @test18c() uwtable sspstrong {
+; Function Attrs: sspstrong 
+define i32 @test18c() #1 {
 entry:
 ; LINUX-I386-LABEL: test18c:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -2154,7 +2207,8 @@ lpad:
 ; test18d: Addr-of a variable passed into an invoke instruction.
 ;          sspreq attribute
 ; Requires protector.
-define i32 @test18d() uwtable sspreq {
+; Function Attrs: sspreq 
+define i32 @test18d() #2 {
 entry:
 ; LINUX-I386-LABEL: test18d:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -2186,12 +2240,11 @@ lpad:
           catch i8* null
   ret i32 0
 }
-
 ; test19a: Addr-of a struct element passed into an invoke instruction.
 ;           (GEP followed by an invoke)
 ;          no ssp attribute
 ; Requires no protector.
-define i32 @test19a() uwtable {
+define i32 @test19a()  {
 entry:
 ; LINUX-I386-LABEL: test19a:
 ; LINUX-I386-NOT: calll __stack_chk_fail
@@ -2230,7 +2283,8 @@ lpad:
 ;           (GEP followed by an invoke)
 ;          ssp attribute
 ; Requires no protector.
-define i32 @test19b() uwtable ssp {
+; Function Attrs: ssp 
+define i32 @test19b() #0 {
 entry:
 ; LINUX-I386-LABEL: test19b:
 ; LINUX-I386-NOT: calll __stack_chk_fail
@@ -2269,7 +2323,8 @@ lpad:
 ;           (GEP followed by an invoke)
 ;          sspstrong attribute
 ; Requires protector.
-define i32 @test19c() uwtable sspstrong {
+; Function Attrs: sspstrong 
+define i32 @test19c() #1 {
 entry:
 ; LINUX-I386-LABEL: test19c:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -2308,7 +2363,8 @@ lpad:
 ;           (GEP followed by an invoke)
 ;          sspreq attribute
 ; Requires protector.
-define i32 @test19d() uwtable sspreq {
+; Function Attrs: sspreq 
+define i32 @test19d() #2 {
 entry:
 ; LINUX-I386-LABEL: test19d:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -2350,7 +2406,7 @@ lpad:
 ; test20a: Addr-of a pointer
 ;          no ssp attribute
 ; Requires no protector.
-define void @test20a() nounwind uwtable {
+define void @test20a() {
 entry:
 ; LINUX-I386-LABEL: test20a:
 ; LINUX-I386-NOT: calll __stack_chk_fail
@@ -2380,7 +2436,8 @@ entry:
 ; test20b: Addr-of a pointer
 ;          ssp attribute
 ; Requires no protector.
-define void @test20b() nounwind uwtable ssp {
+; Function Attrs: ssp
+define void @test20b() #0 {
 entry:
 ; LINUX-I386-LABEL: test20b:
 ; LINUX-I386-NOT: calll __stack_chk_fail
@@ -2410,7 +2467,8 @@ entry:
 ; test20c: Addr-of a pointer
 ;          sspstrong attribute
 ; Requires protector.
-define void @test20c() nounwind uwtable sspstrong {
+; Function Attrs: sspstrong 
+define void @test20c() #1 {
 entry:
 ; LINUX-I386-LABEL: test20c:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -2440,7 +2498,8 @@ entry:
 ; test20d: Addr-of a pointer
 ;          sspreq attribute
 ; Requires protector.
-define void @test20d() nounwind uwtable sspreq {
+; Function Attrs: sspreq 
+define void @test20d() #2 {
 entry:
 ; LINUX-I386-LABEL: test20d:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -2470,7 +2529,7 @@ entry:
 ; test21a: Addr-of a casted pointer
 ;          no ssp attribute
 ; Requires no protector.
-define void @test21a() nounwind uwtable {
+define void @test21a() {
 entry:
 ; LINUX-I386-LABEL: test21a:
 ; LINUX-I386-NOT: calll __stack_chk_fail
@@ -2501,7 +2560,8 @@ entry:
 ; test21b: Addr-of a casted pointer
 ;          ssp attribute
 ; Requires no protector.
-define void @test21b() nounwind uwtable ssp {
+; Function Attrs: ssp
+define void @test21b() #0 {
 entry:
 ; LINUX-I386-LABEL: test21b:
 ; LINUX-I386-NOT: calll __stack_chk_fail
@@ -2532,7 +2592,8 @@ entry:
 ; test21c: Addr-of a casted pointer
 ;          sspstrong attribute
 ; Requires protector.
-define void @test21c() nounwind uwtable sspstrong {
+; Function Attrs: sspstrong 
+define void @test21c() #1 {
 entry:
 ; LINUX-I386-LABEL: test21c:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -2563,7 +2624,8 @@ entry:
 ; test21d: Addr-of a casted pointer
 ;          sspreq attribute
 ; Requires protector.
-define void @test21d() nounwind uwtable sspreq {
+; Function Attrs: sspreq 
+define void @test21d() #2 {
 entry:
 ; LINUX-I386-LABEL: test21d:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -2594,7 +2656,7 @@ entry:
 ; test22a: [2 x i8] in a class
 ;          no ssp attribute
 ; Requires no protector.
-define signext i8 @test22a() nounwind uwtable {
+define signext i8 @test22a() {
 entry:
 ; LINUX-I386-LABEL: test22a:
 ; LINUX-I386-NOT: calll __stack_chk_fail
@@ -2621,7 +2683,8 @@ entry:
 ; test22b: [2 x i8] in a class
 ;          ssp attribute
 ; Requires no protector.
-define signext i8 @test22b() nounwind uwtable ssp {
+; Function Attrs: ssp
+define signext i8 @test22b() #0 {
 entry:
 ; LINUX-I386-LABEL: test22b:
 ; LINUX-I386-NOT: calll __stack_chk_fail
@@ -2648,7 +2711,8 @@ entry:
 ; test22c: [2 x i8] in a class
 ;          sspstrong attribute
 ; Requires protector.
-define signext i8 @test22c() nounwind uwtable sspstrong {
+; Function Attrs: sspstrong 
+define signext i8 @test22c() #1 {
 entry:
 ; LINUX-I386-LABEL: test22c:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -2675,7 +2739,8 @@ entry:
 ; test22d: [2 x i8] in a class
 ;          sspreq attribute
 ; Requires protector.
-define signext i8 @test22d() nounwind uwtable sspreq {
+; Function Attrs: sspreq 
+define signext i8 @test22d() #2 {
 entry:
 ; LINUX-I386-LABEL: test22d:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -2702,7 +2767,7 @@ entry:
 ; test23a: [2 x i8] nested in several layers of structs and unions
 ;          no ssp attribute
 ; Requires no protector.
-define signext i8 @test23a() nounwind uwtable {
+define signext i8 @test23a() {
 entry:
 ; LINUX-I386-LABEL: test23a:
 ; LINUX-I386-NOT: calll __stack_chk_fail
@@ -2733,7 +2798,8 @@ entry:
 ; test23b: [2 x i8] nested in several layers of structs and unions
 ;          ssp attribute
 ; Requires no protector.
-define signext i8 @test23b() nounwind uwtable ssp {
+; Function Attrs: ssp
+define signext i8 @test23b() #0 {
 entry:
 ; LINUX-I386-LABEL: test23b:
 ; LINUX-I386-NOT: calll __stack_chk_fail
@@ -2764,7 +2830,8 @@ entry:
 ; test23c: [2 x i8] nested in several layers of structs and unions
 ;          sspstrong attribute
 ; Requires protector.
-define signext i8 @test23c() nounwind uwtable sspstrong {
+; Function Attrs: sspstrong 
+define signext i8 @test23c() #1 {
 entry:
 ; LINUX-I386-LABEL: test23c:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -2795,7 +2862,8 @@ entry:
 ; test23d: [2 x i8] nested in several layers of structs and unions
 ;          sspreq attribute
 ; Requires protector.
-define signext i8 @test23d() nounwind uwtable sspreq {
+; Function Attrs: sspreq 
+define signext i8 @test23d() #2 {
 entry:
 ; LINUX-I386-LABEL: test23d:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -2826,7 +2894,7 @@ entry:
 ; test24a: Variable sized alloca
 ;          no ssp attribute
 ; Requires no protector.
-define void @test24a(i32 %n) nounwind uwtable {
+define void @test24a(i32 %n) {
 entry:
 ; LINUX-I386-LABEL: test24a:
 ; LINUX-I386-NOT: calll __stack_chk_fail
@@ -2857,7 +2925,8 @@ entry:
 ; test24b: Variable sized alloca
 ;          ssp attribute
 ; Requires protector.
-define void @test24b(i32 %n) nounwind uwtable ssp {
+; Function Attrs: ssp
+define void @test24b(i32 %n) #0 {
 entry:
 ; LINUX-I386-LABEL: test24b:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -2888,7 +2957,8 @@ entry:
 ; test24c: Variable sized alloca
 ;          sspstrong attribute
 ; Requires protector.
-define void @test24c(i32 %n) nounwind uwtable sspstrong {
+; Function Attrs: sspstrong 
+define void @test24c(i32 %n) #1 {
 entry:
 ; LINUX-I386-LABEL: test24c:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -2919,7 +2989,8 @@ entry:
 ; test24d: Variable sized alloca
 ;          sspreq attribute
 ; Requires protector.
-define void @test24d(i32 %n) nounwind uwtable sspreq  {
+; Function Attrs: sspreq 
+define void @test24d(i32 %n) #2 {
 entry:
 ; LINUX-I386-LABEL: test24d:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -2950,7 +3021,7 @@ entry:
 ; test25a: array of [4 x i32]
 ;          no ssp attribute
 ; Requires no protector.
-define i32 @test25a() nounwind uwtable {
+define i32 @test25a() {
 entry:
 ; LINUX-I386-LABEL: test25a:
 ; LINUX-I386-NOT: calll __stack_chk_fail
@@ -2976,7 +3047,8 @@ entry:
 ; test25b: array of [4 x i32]
 ;          ssp attribute
 ; Requires no protector, except for Darwin which _does_ require a protector.
-define i32 @test25b() nounwind uwtable ssp {
+; Function Attrs: ssp
+define i32 @test25b() #0 {
 entry:
 ; LINUX-I386-LABEL: test25b:
 ; LINUX-I386-NOT: calll __stack_chk_fail
@@ -3002,7 +3074,8 @@ entry:
 ; test25c: array of [4 x i32]
 ;          sspstrong attribute
 ; Requires protector.
-define i32 @test25c() nounwind uwtable sspstrong {
+; Function Attrs: sspstrong 
+define i32 @test25c() #1 {
 entry:
 ; LINUX-I386-LABEL: test25c:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -3028,7 +3101,8 @@ entry:
 ; test25d: array of [4 x i32]
 ;          sspreq attribute
 ; Requires protector.
-define i32 @test25d() nounwind uwtable sspreq {
+; Function Attrs: sspreq 
+define i32 @test25d() #2 {
 entry:
 ; LINUX-I386-LABEL: test25d:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -3056,7 +3130,8 @@ entry:
 ;         a stack protector.
 ;         ssptrong attribute
 ; Requires no protector.
-define void @test26() nounwind uwtable sspstrong {
+; Function Attrs: sspstrong 
+define void @test26() #1 {
 entry:
 ; LINUX-I386-LABEL: test26:
 ; LINUX-I386-NOT: calll __stack_chk_fail
@@ -3087,7 +3162,8 @@ entry:
 ;         Verify that the address-of analysis does not get stuck in infinite
 ;         recursion when chasing the alloca through the PHI nodes.
 ; Requires protector.
-define i32 @test27(i32 %arg) nounwind uwtable sspstrong {
+; Function Attrs: sspstrong 
+define i32 @test27(i32 %arg) #1 {
 bb:
 ; LINUX-I386-LABEL: test27:
 ; LINUX-I386: mov{{l|q}} %gs:
@@ -3105,7 +3181,7 @@ bb:
 ; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
 ; DARWIN-X64: callq ___stack_chk_fail
   %tmp = alloca %struct.small*, align 8
-  %tmp1 = call i32 (...)* @dummy(%struct.small** %tmp) nounwind
+  %tmp1 = call i32 (...)* @dummy(%struct.small** %tmp)
   %tmp2 = load %struct.small** %tmp, align 8
   %tmp3 = ptrtoint %struct.small* %tmp2 to i64
   %tmp4 = trunc i64 %tmp3 to i32
@@ -3133,10 +3209,239 @@ bb17:                                             ; preds = %bb6
 
 bb21:                                             ; preds = %bb6, %bb
   %tmp22 = phi i32 [ %tmp1, %bb ], [ %tmp14, %bb6 ]
-  %tmp23 = call i32 (...)* @dummy(i32 %tmp22) nounwind
+  %tmp23 = call i32 (...)* @dummy(i32 %tmp22)
   ret i32 undef
 }
 
+; test28a: An array of [32 x i8] and a requested ssp-buffer-size of 33.
+; Requires no protector.
+; Function Attrs: ssp stack-protector-buffer-size=33
+define i32 @test28a() #3 {
+entry:
+; LINUX-I386-LABEL: test28a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64-LABEL: test28a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64-LABEL: test28a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64-LABEL: test28a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %test = alloca [32 x i8], align 16
+  %arraydecay = getelementptr inbounds [32 x i8]* %test, i32 0, i32 0
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay)
+  ret i32 %call
+}
+
+; test28b: An array of [33 x i8] and a requested ssp-buffer-size of 33.
+; Requires protector.
+; Function Attrs: ssp stack-protector-buffer-size=33
+define i32 @test28b() #3 {
+entry:
+; LINUX-I386-LABEL: test28b:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64-LABEL: test28b:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64-LABEL: test28b:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64-LABEL: test28b:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %test = alloca [33 x i8], align 16
+  %arraydecay = getelementptr inbounds [33 x i8]* %test, i32 0, i32 0
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay)
+  ret i32 %call
+}
+
+; test29a: An array of [4 x i8] and a requested ssp-buffer-size of 5.
+; Requires no protector.
+; Function Attrs: ssp stack-protector-buffer-size=5
+define i32 @test29a() #4 {
+entry:
+; LINUX-I386-LABEL: test29a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64-LABEL: test29a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64-LABEL: test29a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64-LABEL: test29a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %test = alloca [4 x i8], align 1
+  %arraydecay = getelementptr inbounds [4 x i8]* %test, i32 0, i32 0
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay)
+  ret i32 %call
+}
+
+; test29b: An array of [5 x i8] and a requested ssp-buffer-size of 5.
+; Requires protector.
+; Function Attrs: ssp stack-protector-buffer-size=5
+define i32 @test29b() #4 {
+entry:
+; LINUX-I386-LABEL: test29b:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64-LABEL: test29b:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64-LABEL: test29b:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64-LABEL: test29b:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %test = alloca [5 x i8], align 1
+  %arraydecay = getelementptr inbounds [5 x i8]* %test, i32 0, i32 0
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay)
+  ret i32 %call
+}
+
+; test30a: An structure containing an i32 and an array of [5 x i8].
+;          Requested ssp-buffer-size of 6.
+; Requires no protector.
+; Function Attrs: ssp stack-protector-buffer-size=6
+define i32 @test30a() #5 {
+entry:
+; LINUX-I386-LABEL: test30a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64-LABEL: test30a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64-LABEL: test30a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64-LABEL: test30a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %test = alloca %struct.small_char, align 4
+  %test.coerce = alloca { i64, i8 }
+  %0 = bitcast { i64, i8 }* %test.coerce to i8*
+  %1 = bitcast %struct.small_char* %test to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 12, i32 0, i1 false)
+  %2 = getelementptr { i64, i8 }* %test.coerce, i32 0, i32 0
+  %3 = load i64* %2, align 1
+  %4 = getelementptr { i64, i8 }* %test.coerce, i32 0, i32 1
+  %5 = load i8* %4, align 1
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i64 %3, i8 %5)
+  ret i32 %call
+}
+
+; test30b: An structure containing an i32 and an array of [5 x i8].
+;          Requested ssp-buffer-size of 5.
+; Requires protector.
+; Function Attrs: ssp stack-protector-buffer-size=5
+define i32 @test30b() #4 {
+entry:
+; LINUX-I386-LABEL: test30b:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64-LABEL: test30b:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64-LABEL: test30b:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64-LABEL: test30b:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %test = alloca %struct.small_char, align 4
+  %test.coerce = alloca { i64, i8 }
+  %0 = bitcast { i64, i8 }* %test.coerce to i8*
+  %1 = bitcast %struct.small_char* %test to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 12, i32 0, i1 false)
+  %2 = getelementptr { i64, i8 }* %test.coerce, i32 0, i32 0
+  %3 = load i64* %2, align 1
+  %4 = getelementptr { i64, i8 }* %test.coerce, i32 0, i32 1
+  %5 = load i8* %4, align 1
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i64 %3, i8 %5)
+  ret i32 %call
+}
+
+; test31a: An alloca of size 5.
+;          Requested ssp-buffer-size of 6.
+; Requires no protector.
+; Function Attrs: ssp stack-protector-buffer-size=6
+define i32 @test31a() #5 {
+entry:
+; LINUX-I386-LABEL: test31a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64-LABEL: test31a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64-LABEL: test31a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64-LABEL: test31a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %test = alloca i8*, align 8
+  %0 = alloca i8, i64 4
+  store i8* %0, i8** %test, align 8
+  %1 = load i8** %test, align 8
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %1)
+  ret i32 %call
+}
+
+; test31b: An alloca of size 5.
+;          Requested ssp-buffer-size of 5.
+; Requires protector.
+define i32 @test31b() #4 {
+entry:
+; LINUX-I386-LABEL: test31b:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64-LABEL: test31b:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64-LABEL: test31b:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64-LABEL: test31b:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %test = alloca i8*, align 8
+  %0 = alloca i8, i64 5
+  store i8* %0, i8** %test, align 8
+  %1 = load i8** %test, align 8
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %1)
+  ret i32 %call
+}
+
 declare double @testi_aux()
 declare i8* @strcpy(i8*, i8*)
 declare i32 @printf(i8*, ...)
@@ -3148,3 +3453,11 @@ declare void @_Z3exceptPi(i32*)
 declare i32 @__gxx_personality_v0(...)
 declare i32* @getp()
 declare i32 @dummy(...)
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1)
+
+attributes #0 = { ssp }
+attributes #1 = { sspstrong }
+attributes #2 = { sspreq }
+attributes #3 = { ssp "stack-protector-buffer-size"="33" }
+attributes #4 = { ssp "stack-protector-buffer-size"="5" }
+attributes #5 = { ssp "stack-protector-buffer-size"="6" }
diff --git a/test/CodeGen/X86/stackpointer.ll b/test/CodeGen/X86/stackpointer.ll
new file mode 100644
index 0000000..80bcfbf
--- /dev/null
+++ b/test/CodeGen/X86/stackpointer.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin  | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux-gnueabi | FileCheck %s
+; RUN: opt < %s -O3 -S -mtriple=x86_64-linux-gnueabi | FileCheck %s --check-prefix=OPT
+
+define i64 @get_stack() nounwind {
+entry:
+; CHECK-LABEL: get_stack:
+; CHECK: movq	%rsp, %rax
+	%sp = call i64 @llvm.read_register.i64(metadata !0)
+; OPT: @llvm.read_register.i64
+  ret i64 %sp
+}
+
+define void @set_stack(i64 %val) nounwind {
+entry:
+; CHECK-LABEL: set_stack:
+; CHECK: movq	%rdi, %rsp
+  call void @llvm.write_register.i64(metadata !0, i64 %val)
+; OPT: @llvm.write_register.i64
+  ret void
+}
+
+declare i64 @llvm.read_register.i64(metadata) nounwind
+declare void @llvm.write_register.i64(metadata, i64) nounwind
+
+; register unsigned long current_stack_pointer asm("rsp");
+; CHECK-NOT: .asciz  "rsp"
+!0 = metadata !{metadata !"rsp\00"}
diff --git a/test/CodeGen/X86/tls.ll b/test/CodeGen/X86/tls.ll
index 76a8402..75e7fc4 100644
--- a/test/CodeGen/X86/tls.ll
+++ b/test/CodeGen/X86/tls.ll
@@ -2,6 +2,8 @@
 ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu | FileCheck -check-prefix=X64_LINUX %s
 ; RUN: llc < %s -march=x86 -mtriple=x86-pc-win32 | FileCheck -check-prefix=X32_WIN %s
 ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-win32 | FileCheck -check-prefix=X64_WIN %s
+; RUN: llc < %s -march=x86 -mtriple=x86-pc-windows-gnu | FileCheck -check-prefix=MINGW32 %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-windows-gnu | FileCheck -check-prefix=X64_WIN %s
 
 @i1 = thread_local global i32 15
 @i2 = external thread_local global i32
@@ -30,6 +32,12 @@ define i32 @f1() {
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
 ; X64_WIN-NEXT: movl i1@SECREL32(%rax), %eax
 ; X64_WIN-NEXT: ret
+; MINGW32-LABEL: _f1:
+; MINGW32: movl __tls_index, %eax
+; MINGW32-NEXT: movl %fs:44, %ecx
+; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
+; MINGW32-NEXT: movl _i1@SECREL32(%eax), %eax
+; MINGW32-NEXT: retl
 
 entry:
 	%tmp1 = load i32* @i1
@@ -57,6 +65,12 @@ define i32* @f2() {
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
 ; X64_WIN-NEXT: leaq i1@SECREL32(%rax), %rax
 ; X64_WIN-NEXT: ret
+; MINGW32-LABEL: _f2:
+; MINGW32: movl __tls_index, %eax
+; MINGW32-NEXT: movl %fs:44, %ecx
+; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
+; MINGW32-NEXT: leal _i1@SECREL32(%eax), %eax
+; MINGW32-NEXT: retl
 
 entry:
 	ret i32* @i1
@@ -83,6 +97,12 @@ define i32 @f3() nounwind {
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
 ; X64_WIN-NEXT: movl i2@SECREL32(%rax), %eax
 ; X64_WIN-NEXT: ret
+; MINGW32-LABEL: _f3:
+; MINGW32: movl __tls_index, %eax
+; MINGW32-NEXT: movl %fs:44, %ecx
+; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
+; MINGW32-NEXT: movl _i2@SECREL32(%eax), %eax
+; MINGW32-NEXT: retl
 
 entry:
 	%tmp1 = load i32* @i2
@@ -110,6 +130,12 @@ define i32* @f4() {
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
 ; X64_WIN-NEXT: leaq i2@SECREL32(%rax), %rax
 ; X64_WIN-NEXT: ret
+; MINGW32-LABEL: _f4:
+; MINGW32: movl __tls_index, %eax
+; MINGW32-NEXT: movl %fs:44, %ecx
+; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
+; MINGW32-NEXT: leal _i2@SECREL32(%eax), %eax
+; MINGW32-NEXT: retl
 
 entry:
 	ret i32* @i2
@@ -134,6 +160,12 @@ define i32 @f5() nounwind {
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
 ; X64_WIN-NEXT: movl i3@SECREL32(%rax), %eax
 ; X64_WIN-NEXT: ret
+; MINGW32-LABEL: _f5:
+; MINGW32: movl __tls_index, %eax
+; MINGW32-NEXT: movl %fs:44, %ecx
+; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
+; MINGW32-NEXT: movl _i3@SECREL32(%eax), %eax
+; MINGW32-NEXT: retl
 
 entry:
 	%tmp1 = load i32* @i3
@@ -161,6 +193,12 @@ define i32* @f6() {
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
 ; X64_WIN-NEXT: leaq i3@SECREL32(%rax), %rax
 ; X64_WIN-NEXT: ret
+; MINGW32-LABEL: _f6:
+; MINGW32: movl __tls_index, %eax
+; MINGW32-NEXT: movl %fs:44, %ecx
+; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
+; MINGW32-NEXT: leal _i3@SECREL32(%eax), %eax
+; MINGW32-NEXT: retl
 
 entry:
 	ret i32* @i3
@@ -173,6 +211,12 @@ define i32 @f7() {
 ; X64_LINUX-LABEL: f7:
 ; X64_LINUX:      movl %fs:i4@TPOFF, %eax
 ; X64_LINUX-NEXT: ret
+; MINGW32-LABEL: _f7:
+; MINGW32: movl __tls_index, %eax
+; MINGW32-NEXT: movl %fs:44, %ecx
+; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
+; MINGW32-NEXT: movl _i4@SECREL32(%eax), %eax
+; MINGW32-NEXT: retl
 
 entry:
 	%tmp1 = load i32* @i4
@@ -188,6 +232,12 @@ define i32* @f8() {
 ; X64_LINUX:      movq %fs:0, %rax
 ; X64_LINUX-NEXT: leaq i4@TPOFF(%rax), %rax
 ; X64_LINUX-NEXT: ret
+; MINGW32-LABEL: _f8:
+; MINGW32: movl __tls_index, %eax
+; MINGW32-NEXT: movl %fs:44, %ecx
+; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
+; MINGW32-NEXT: leal _i4@SECREL32(%eax), %eax
+; MINGW32-NEXT: retl
 
 entry:
 	ret i32* @i4
@@ -200,6 +250,12 @@ define i32 @f9() {
 ; X64_LINUX-LABEL: f9:
 ; X64_LINUX:      movl %fs:i5@TPOFF, %eax
 ; X64_LINUX-NEXT: ret
+; MINGW32-LABEL: _f9:
+; MINGW32: movl __tls_index, %eax
+; MINGW32-NEXT: movl %fs:44, %ecx
+; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
+; MINGW32-NEXT: movl _i5@SECREL32(%eax), %eax
+; MINGW32-NEXT: retl
 
 entry:
 	%tmp1 = load i32* @i5
@@ -215,6 +271,12 @@ define i32* @f10() {
 ; X64_LINUX:      movq %fs:0, %rax
 ; X64_LINUX-NEXT: leaq i5@TPOFF(%rax), %rax
 ; X64_LINUX-NEXT: ret
+; MINGW32-LABEL: _f10:
+; MINGW32: movl __tls_index, %eax
+; MINGW32-NEXT: movl %fs:44, %ecx
+; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
+; MINGW32-NEXT: leal _i5@SECREL32(%eax), %eax
+; MINGW32-NEXT: retl
 
 entry:
 	ret i32* @i5
@@ -239,6 +301,12 @@ define i16 @f11() {
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
 ; X64_WIN-NEXT: movzwl s1@SECREL32(%rax), %eax
 ; X64_WIN:      ret
+; MINGW32-LABEL: _f11:
+; MINGW32: movl __tls_index, %eax
+; MINGW32-NEXT: movl %fs:44, %ecx
+; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
+; MINGW32-NEXT: movzwl  _s1@SECREL32(%eax), %eax
+; MINGW32: retl
 
 entry:
 	%tmp1 = load i16* @s1
@@ -264,6 +332,13 @@ define i32 @f12() {
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
 ; X64_WIN-NEXT: movswl s1@SECREL32(%rax), %eax
 ; X64_WIN-NEXT: ret
+; MINGW32-LABEL: _f12:
+; MINGW32: movl __tls_index, %eax
+; MINGW32-NEXT: movl %fs:44, %ecx
+; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
+; MINGW32-NEXT: movswl _s1@SECREL32(%eax), %eax
+; MINGW32-NEXT: retl
+
 
 entry:
 	%tmp1 = load i16* @s1
@@ -290,6 +365,12 @@ define i8 @f13() {
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
 ; X64_WIN-NEXT: movb b1@SECREL32(%rax), %al
 ; X64_WIN-NEXT: ret
+; MINGW32-LABEL: _f13:
+; MINGW32: movl __tls_index, %eax
+; MINGW32-NEXT: movl %fs:44, %ecx
+; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
+; MINGW32-NEXT: movb _b1@SECREL32(%eax), %al
+; MINGW32-NEXT: retl
 
 entry:
 	%tmp1 = load i8* @b1
@@ -315,6 +396,12 @@ define i32 @f14() {
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
 ; X64_WIN-NEXT: movsbl b1@SECREL32(%rax), %eax
 ; X64_WIN-NEXT: ret
+; MINGW32-LABEL: _f14:
+; MINGW32: movl __tls_index, %eax
+; MINGW32-NEXT: movl %fs:44, %ecx
+; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
+; MINGW32-NEXT: movsbl  _b1@SECREL32(%eax), %eax
+; MINGW32-NEXT: retl
 
 entry:
 	%tmp1 = load i8* @b1
diff --git a/test/CodeGen/X86/vec_shuffle-41.ll b/test/CodeGen/X86/vec_shuffle-41.ll
new file mode 100644
index 0000000..28fdd2f
--- /dev/null
+++ b/test/CodeGen/X86/vec_shuffle-41.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck %s
+
+; Use buildFromShuffleMostly which allows this to be generated as two 128-bit
+; shuffles and an insert.
+
+; This is the (somewhat questionable) LLVM IR that is generated for:
+;    x8.s0123456 = x8.s1234567;  // x8 is a <8 x float> type
+;    x8.s7 = f;                  // f is float
+
+
+define <8 x float> @test1(<8 x float> %a, float %b) {
+; CHECK-LABEL: test1:
+; CHECK: vinsertps
+; CHECK-NOT: vinsertps
+entry:
+  %shift = shufflevector <8 x float> %a, <8 x float> undef, <7 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %extend = shufflevector <7 x float> %shift, <7 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 undef>
+  %insert = insertelement <8 x float> %extend, float %b, i32 7
+
+  ret <8 x float> %insert
+}
diff --git a/test/CodeGen/X86/vec_splat.ll b/test/CodeGen/X86/vec_splat.ll
index 543c96e..a02e383 100644
--- a/test/CodeGen/X86/vec_splat.ll
+++ b/test/CodeGen/X86/vec_splat.ll
@@ -32,3 +32,19 @@ define void @test_v2sd(<2 x double>* %P, <2 x double>* %Q, double %X) nounwind {
 ; SSE3-LABEL: test_v2sd:
 ; SSE3: movddup
 }
+
+; Fold extract of a load into the load's address computation. This avoids spilling to the stack.
+define <4 x float> @load_extract_splat(<4 x float>* nocapture readonly %ptr, i64 %i, i64 %j) nounwind {
+  %1 = getelementptr inbounds <4 x float>* %ptr, i64 %i
+  %2 = load <4 x float>* %1, align 16
+  %3 = extractelement <4 x float> %2, i64 %j
+  %4 = insertelement <4 x float> undef, float %3, i32 0
+  %5 = insertelement <4 x float> %4, float %3, i32 1
+  %6 = insertelement <4 x float> %5, float %3, i32 2
+  %7 = insertelement <4 x float> %6, float %3, i32 3
+  ret <4 x float> %7
+  
+; AVX-LABEL: load_extract_splat
+; AVX-NOT: movs
+; AVX: vbroadcastss
+}
diff --git a/test/CodeGen/X86/vector-idiv.ll b/test/CodeGen/X86/vector-idiv.ll
new file mode 100644
index 0000000..4c30184
--- /dev/null
+++ b/test/CodeGen/X86/vector-idiv.ll
@@ -0,0 +1,217 @@
+; RUN: llc -march=x86-64 -mcpu=core2 -mattr=+sse4.1 < %s | FileCheck %s -check-prefix=SSE41
+; RUN: llc -march=x86-64 -mcpu=core2 < %s | FileCheck %s -check-prefix=SSE
+; RUN: llc -march=x86-64 -mcpu=core-avx2 < %s | FileCheck %s -check-prefix=AVX
+
+define <4 x i32> @test1(<4 x i32> %a) {
+  %div = udiv <4 x i32> %a, <i32 7, i32 7, i32 7, i32 7>
+  ret <4 x i32> %div
+
+; SSE41-LABEL: test1:
+; SSE41: pmuludq
+; SSE41: pshufd	$57
+; SSE41: pmuludq
+; SSE41: shufps	$-35
+; SSE41: psubd
+; SSE41: psrld $1
+; SSE41: padd
+; SSE41: psrld $2
+
+; AVX-LABEL: test1:
+; AVX: vpmuludq
+; AVX: vpshufd	$57
+; AVX: vpmuludq
+; AVX: vshufps	$-35
+; AVX: vpsubd
+; AVX: vpsrld $1
+; AVX: vpadd
+; AVX: vpsrld $2
+}
+
+define <8 x i32> @test2(<8 x i32> %a) {
+  %div = udiv <8 x i32> %a, <i32 7, i32 7, i32 7, i32 7,i32 7, i32 7, i32 7, i32 7>
+  ret <8 x i32> %div
+
+; AVX-LABEL: test2:
+; AVX: vpermd
+; AVX: vpmuludq
+; AVX: vshufps	$-35
+; AVX: vpmuludq
+; AVX: vshufps	$-35
+; AVX: vpsubd
+; AVX: vpsrld $1
+; AVX: vpadd
+; AVX: vpsrld $2
+}
+
+define <8 x i16> @test3(<8 x i16> %a) {
+  %div = udiv <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+  ret <8 x i16> %div
+
+; SSE41-LABEL: test3:
+; SSE41: pmulhuw
+; SSE41: psubw
+; SSE41: psrlw $1
+; SSE41: paddw
+; SSE41: psrlw $2
+
+; AVX-LABEL: test3:
+; AVX: vpmulhuw
+; AVX: vpsubw
+; AVX: vpsrlw $1
+; AVX: vpaddw
+; AVX: vpsrlw $2
+}
+
+define <16 x i16> @test4(<16 x i16> %a) {
+  %div = udiv <16 x i16> %a, <i16 7, i16 7, i16 7, i16 7,i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7,i16 7, i16 7, i16 7, i16 7>
+  ret <16 x i16> %div
+
+; AVX-LABEL: test4:
+; AVX: vpmulhuw
+; AVX: vpsubw
+; AVX: vpsrlw $1
+; AVX: vpaddw
+; AVX: vpsrlw $2
+; AVX-NOT: vpmulhuw
+}
+
+define <8 x i16> @test5(<8 x i16> %a) {
+  %div = sdiv <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+  ret <8 x i16> %div
+
+; SSE41-LABEL: test5:
+; SSE41: pmulhw
+; SSE41: psrlw $15
+; SSE41: psraw $1
+; SSE41: paddw
+
+; AVX-LABEL: test5:
+; AVX: vpmulhw
+; AVX: vpsrlw $15
+; AVX: vpsraw $1
+; AVX: vpaddw
+}
+
+define <16 x i16> @test6(<16 x i16> %a) {
+  %div = sdiv <16 x i16> %a, <i16 7, i16 7, i16 7, i16 7,i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7,i16 7, i16 7, i16 7, i16 7>
+  ret <16 x i16> %div
+
+; AVX-LABEL: test6:
+; AVX: vpmulhw
+; AVX: vpsrlw $15
+; AVX: vpsraw $1
+; AVX: vpaddw
+; AVX-NOT: vpmulhw
+}
+
+define <16 x i8> @test7(<16 x i8> %a) {
+  %div = sdiv <16 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
+  ret <16 x i8> %div
+}
+
+define <4 x i32> @test8(<4 x i32> %a) {
+  %div = sdiv <4 x i32> %a, <i32 7, i32 7, i32 7, i32 7>
+  ret <4 x i32> %div
+
+; SSE41-LABEL: test8:
+; SSE41: pmuldq
+; SSE41: pshufd	$57
+; SSE41-NOT: pshufd	$57
+; SSE41: pmuldq
+; SSE41: shufps	$-35
+; SSE41: pshufd	$-40
+; SSE41: padd
+; SSE41: psrld $31
+; SSE41: psrad $2
+; SSE41: padd
+
+; SSE-LABEL: test8:
+; SSE: psrad $31
+; SSE: pand
+; SSE: paddd
+; SSE: pmuludq
+; SSE: pshufd	$57
+; SSE-NOT: pshufd	$57
+; SSE: pmuludq
+; SSE: shufps	$-35
+; SSE: pshufd	$-40
+; SSE: psubd
+; SSE: padd
+; SSE: psrld $31
+; SSE: psrad $2
+; SSE: padd
+
+; AVX-LABEL: test8:
+; AVX: vpmuldq
+; AVX: vpshufd	$57
+; AVX-NOT: vpshufd	$57
+; AVX: vpmuldq
+; AVX: vshufps	$-35
+; AVX: vpshufd	$-40
+; AVX: vpadd
+; AVX: vpsrld $31
+; AVX: vpsrad $2
+; AVX: vpadd
+}
+
+define <8 x i32> @test9(<8 x i32> %a) {
+  %div = sdiv <8 x i32> %a, <i32 7, i32 7, i32 7, i32 7,i32 7, i32 7, i32 7, i32 7>
+  ret <8 x i32> %div
+
+; AVX-LABEL: test9:
+; AVX: vpbroadcastd
+; AVX: vpmuldq
+; AVX: vshufps	$-35
+; AVX: vpmuldq
+; AVX: vshufps	$-35
+; AVX: vpshufd	$-40
+; AVX: vpadd
+; AVX: vpsrld $31
+; AVX: vpsrad $2
+; AVX: vpadd
+}
+
+define <8 x i32> @test10(<8 x i32> %a) {
+  %rem = urem <8 x i32> %a, <i32 7, i32 7, i32 7, i32 7,i32 7, i32 7, i32 7, i32 7>
+  ret <8 x i32> %rem
+
+; AVX-LABEL: test10:
+; AVX: vpbroadcastd
+; AVX: vpmuludq
+; AVX: vshufps	$-35
+; AVX: vpmuludq
+; AVX: vshufps	$-35
+; AVX: vpsubd
+; AVX: vpsrld $1
+; AVX: vpadd
+; AVX: vpsrld $2
+; AVX: vpmulld
+}
+
+define <8 x i32> @test11(<8 x i32> %a) {
+  %rem = srem <8 x i32> %a, <i32 7, i32 7, i32 7, i32 7,i32 7, i32 7, i32 7, i32 7>
+  ret <8 x i32> %rem
+
+; AVX-LABEL: test11:
+; AVX: vpbroadcastd
+; AVX: vpmuldq
+; AVX: vshufps	$-35
+; AVX: vpmuldq
+; AVX: vshufps	$-35
+; AVX: vpshufd	$-40
+; AVX: vpadd
+; AVX: vpsrld $31
+; AVX: vpsrad $2
+; AVX: vpadd
+; AVX: vpmulld
+}
+
+define <2 x i16> @test12() {
+  %I8 = insertelement <2 x i16> zeroinitializer, i16 -1, i32 0
+  %I9 = insertelement <2 x i16> %I8, i16 -1, i32 1
+  %B9 = urem <2 x i16> %I9, %I9
+  ret <2 x i16> %B9
+
+; AVX-LABEL: test12:
+; AVX: xorps
+}
diff --git a/test/CodeGen/X86/win32_sret.ll b/test/CodeGen/X86/win32_sret.ll
index d8ecd44..8728712 100644
--- a/test/CodeGen/X86/win32_sret.ll
+++ b/test/CodeGen/X86/win32_sret.ll
@@ -2,9 +2,11 @@
 ; some setups (e.g., Atom) from affecting the output.
 ; RUN: llc < %s -mcpu=core2 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN32
 ; RUN: llc < %s -mcpu=core2 -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X86
+; RUN: llc < %s -mcpu=core2 -mtriple=i686-pc-cygwin | FileCheck %s -check-prefix=CYGWIN
 ; RUN: llc < %s -mcpu=core2 -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX
 ; RUN: llc < %s -mcpu=core2 -O0 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN32
 ; RUN: llc < %s -mcpu=core2 -O0 -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X86
+; RUN: llc < %s -mcpu=core2 -O0 -mtriple=i686-pc-cygwin | FileCheck %s -check-prefix=CYGWIN
 ; RUN: llc < %s -mcpu=core2 -O0 -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX
 
 ; The SysV ABI used by most Unixes and Mingw on x86 specifies that an sret pointer
@@ -21,6 +23,9 @@ entry:
 ; MINGW_X86-LABEL:  _sret1:
 ; MINGW_X86:  {{retl$}}
 
+; CYGWIN-LABEL:     _sret1:
+; CYGWIN:     retl $4
+
 ; LINUX-LABEL:      sret1:
 ; LINUX:      retl $4
 
@@ -38,6 +43,9 @@ entry:
 ; MINGW_X86-LABEL:  _sret2:
 ; MINGW_X86:  {{retl$}}
 
+; CYGWIN-LABEL:     _sret2:
+; CYGWIN:     retl $4
+
 ; LINUX-LABEL:      sret2:
 ; LINUX:      retl $4
 
@@ -56,6 +64,9 @@ entry:
 ; MINGW_X86-LABEL:  _sret3:
 ; MINGW_X86:  {{retl$}}
 
+; CYGWIN-LABEL:     _sret3:
+; CYGWIN:     retl $4
+
 ; LINUX-LABEL:      sret3:
 ; LINUX:      retl $4
 
@@ -77,6 +88,9 @@ entry:
 ; MINGW_X86-LABEL: _sret4:
 ; MINGW_X86: {{retl$}}
 
+; CYGWIN-LABEL:    _sret4:
+; CYGWIN:    retl $4
+
 ; LINUX-LABEL:     sret4:
 ; LINUX:     retl $4
 
@@ -98,6 +112,7 @@ entry:
   ret void
 ; WIN32-LABEL:     {{^}}"?foo@C5@@QAE?AUS5@@XZ":
 ; MINGW_X86-LABEL: {{^}}"?foo@C5@@QAE?AUS5@@XZ":
+; CYGWIN-LABEL:    {{^}}"?foo@C5@@QAE?AUS5@@XZ":
 ; LINUX-LABEL:     {{^}}"?foo@C5@@QAE?AUS5@@XZ":
 
 ; The address of the return structure is passed as an implicit parameter.
@@ -115,6 +130,7 @@ entry:
   call x86_thiscallcc void @"\01?foo@C5@@QAE?AUS5@@XZ"(%struct.S5* sret %s, %class.C5* %c)
 ; WIN32-LABEL:      {{^}}_call_foo5:
 ; MINGW_X86-LABEL:  {{^}}_call_foo5:
+; CYGWIN-LABEL:     {{^}}_call_foo5:
 ; LINUX-LABEL:      {{^}}call_foo5:
 
 
@@ -135,6 +151,7 @@ entry:
 define void @test6_f(%struct.test6* %x) nounwind {
 ; WIN32-LABEL: _test6_f:
 ; MINGW_X86-LABEL: _test6_f:
+; CYGWIN-LABEL: _test6_f:
 ; LINUX-LABEL: test6_f:
 
 ; The %x argument is moved to %ecx. It will be the this pointer.
@@ -145,6 +162,9 @@ define void @test6_f(%struct.test6* %x) nounwind {
 ; MINGW_X86: movl    8(%ebp), %eax
 ; MINGW_X86: movl    %eax, (%e{{([a-d]x)|(sp)}})
 
+; CYGWIN: movl    8(%ebp), %eax
+; CYGWIN: movl    %eax, (%e{{([a-d]x)|(sp)}})
+
 ; The sret pointer is (%esp)
 ; WIN32:          leal    8(%esp), %[[REG:e[a-d]x]]
 ; WIN32-NEXT:     movl    %[[REG]], (%e{{([a-d]x)|(sp)}})
@@ -153,8 +173,71 @@ define void @test6_f(%struct.test6* %x) nounwind {
 ; MINGW_X86-NEXT: leal    8(%esp), %ecx
 ; MINGW_X86-NEXT: calll   _test6_g
 
+; CYGWIN-NEXT: leal    8(%esp), %ecx
+; CYGWIN-NEXT: calll   _test6_g
+
   %tmp = alloca %struct.test6, align 4
   call x86_thiscallcc void @test6_g(%struct.test6* sret %tmp, %struct.test6* %x)
   ret void
 }
 declare x86_thiscallcc void @test6_g(%struct.test6* sret, %struct.test6*)
+
+; Flipping the parameters at the IR level generates the same code.
+%struct.test7 = type { i32, i32, i32 }
+define void @test7_f(%struct.test7* %x) nounwind {
+; WIN32-LABEL: _test7_f:
+; MINGW_X86-LABEL: _test7_f:
+; CYGWIN-LABEL: _test7_f:
+; LINUX-LABEL: test7_f:
+
+; The %x argument is moved to %ecx on all OSs. It will be the this pointer.
+; WIN32:      movl    8(%ebp), %ecx
+; MINGW_X86:  movl    8(%ebp), %ecx
+; CYGWIN:     movl    8(%ebp), %ecx
+
+; The sret pointer is (%esp)
+; WIN32:          leal    8(%esp), %[[REG:e[a-d]x]]
+; WIN32-NEXT:     movl    %[[REG]], (%e{{([a-d]x)|(sp)}})
+; MINGW_X86:      leal    8(%esp), %[[REG:e[a-d]x]]
+; MINGW_X86-NEXT: movl    %[[REG]], (%e{{([a-d]x)|(sp)}})
+; CYGWIN:         leal    8(%esp), %[[REG:e[a-d]x]]
+; CYGWIN-NEXT:    movl    %[[REG]], (%e{{([a-d]x)|(sp)}})
+
+  %tmp = alloca %struct.test7, align 4
+  call x86_thiscallcc void @test7_g(%struct.test7* %x, %struct.test7* sret %tmp)
+  ret void
+}
+
+define x86_thiscallcc void @test7_g(%struct.test7* %in, %struct.test7* sret %out) {
+  %s = getelementptr %struct.test7* %in, i32 0, i32 0
+  %d = getelementptr %struct.test7* %out, i32 0, i32 0
+  %v = load i32* %s
+  store i32 %v, i32* %d
+  call void @clobber_eax()
+  ret void
+
+; Make sure we return the second parameter in %eax.
+; WIN32-LABEL: _test7_g:
+; WIN32: calll _clobber_eax
+; WIN32: movl {{.*}}, %eax
+; WIN32: retl
+}
+
+declare void @clobber_eax()
+
+; Test what happens if the first parameter has to be split by codegen.
+; Realistically, no frontend will generate code like this, but here it is for
+; completeness.
+define void @test8_f(i64 inreg %a, i64* sret %out) {
+  store i64 %a, i64* %out
+  call void @clobber_eax()
+  ret void
+
+; WIN32-LABEL: _test8_f:
+; WIN32: movl {{[0-9]+}}(%esp), %[[out:[a-z]+]]
+; WIN32-DAG: movl %edx, 4(%[[out]])
+; WIN32-DAG: movl %eax, (%[[out]])
+; WIN32: calll _clobber_eax
+; WIN32: movl {{.*}}, %eax
+; WIN32: retl
+}
diff --git a/test/CodeGen/X86/x86-64-sret-return-2.ll b/test/CodeGen/X86/x86-64-sret-return-2.ll
new file mode 100644
index 0000000..9f57ee1
--- /dev/null
+++ b/test/CodeGen/X86/x86-64-sret-return-2.ll
@@ -0,0 +1,18 @@
+; RUN: llc -mtriple=x86_64-apple-darwin8 < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-pc-linux < %s | FileCheck %s
+
+; FIXME: x32 doesn't know how to select this.  This isn't a regression, it never
+; worked.
+; RUNX: llc -mtriple=x86_64-pc-linux-gnux32 < %s | FileCheck -check-prefix=X32ABI %s
+
+; This used to crash due to topological sorting issues in selection DAG.
+define void @foo(i32* sret %agg.result, i32, i32, i32, i32, i32, void (i32)* %pred) {
+entry:
+  call void %pred(i32 undef)
+  ret void
+
+; CHECK-LABEL: foo:
+; CHECK: callq
+; CHECK: movq {{.*}}, %rax
+; CHECK: ret
+}
diff --git a/test/CodeGen/XCore/epilogue_prologue.ll b/test/CodeGen/XCore/epilogue_prologue.ll
index 14f04a3..9997814 100644
--- a/test/CodeGen/XCore/epilogue_prologue.ll
+++ b/test/CodeGen/XCore/epilogue_prologue.ll
@@ -206,14 +206,41 @@ entry:
   ret i32 %i
 }
 
+; FP + large frame: spill FP+SR+LR = entsp 2 + 256  + extsp 1
+; CHECKFP-LABEL:f8
+; CHECKFP: entsp 258
+; CHECKFP-NEXT: stw r10, sp[1]
+; CHECKFP-NEXT: ldaw r10, sp[0]
+; CHECKFP-NEXT: mkmsk [[REG:r[0-9]+]], 8
+; CHECKFP-NEXT: ldaw r0, r10{{\[}}[[REG]]{{\]}}
+; CHECKFP-NEXT: extsp 1
+; CHECKFP-NEXT: bl f5
+; CHECKFP-NEXT: ldaw sp, sp[1]
+; CHECKFP-NEXT: set sp, r10
+; CHECKFP-NEXT: ldw r10, sp[1]
+; CHECKFP-NEXT: retsp 258
+;
+; !FP + large frame: spill SR+SR+LR = entsp 3 + 256
+; CHECK-LABEL:f8
+; CHECK: entsp 257
+; CHECK-NEXT: ldaw r0, sp[254]
+; CHECK-NEXT: bl f5
+; CHECK-NEXT: retsp 257
+define void @f8() nounwind {
+entry:
+  %0 = alloca [256 x i32]
+  %1 = getelementptr inbounds [256 x i32]* %0, i32 0, i32 253
+  call void @f5(i32* %1)
+  ret void
+}
 
 ; FP + large frame: spill FP+SR+LR = entsp 2 + 32768  + extsp 1
-; CHECKFP-LABEL:f8
+; CHECKFP-LABEL:f9
 ; CHECKFP: entsp 32770
 ; CHECKFP-NEXT: stw r10, sp[1]
 ; CHECKFP-NEXT: ldaw r10, sp[0]
-; CHECKFP-NEXT: mkmsk r1, 15
-; CHECKFP-NEXT: ldaw r0, r10[r1]
+; CHECKFP-NEXT: ldc [[REG:r[0-9]+]], 32767
+; CHECKFP-NEXT: ldaw r0, r10{{\[}}[[REG]]{{\]}}
 ; CHECKFP-NEXT: extsp 1
 ; CHECKFP-NEXT: bl f5
 ; CHECKFP-NEXT: ldaw sp, sp[1]
@@ -222,12 +249,12 @@ entry:
 ; CHECKFP-NEXT: retsp 32770
 ;
 ; !FP + large frame: spill SR+SR+LR = entsp 3 + 32768
-; CHECK-LABEL:f8
+; CHECK-LABEL:f9
 ; CHECK: entsp 32771
 ; CHECK-NEXT: ldaw r0, sp[32768]
 ; CHECK-NEXT: bl f5
 ; CHECK-NEXT: retsp 32771
-define void @f8() nounwind {
+define void @f9() nounwind {
 entry:
   %0 = alloca [32768 x i32]
   %1 = getelementptr inbounds [32768 x i32]* %0, i32 0, i32 32765
diff --git a/test/CodeGen/XCore/llvm-intrinsics.ll b/test/CodeGen/XCore/llvm-intrinsics.ll
index e0acd66..b436282 100644
--- a/test/CodeGen/XCore/llvm-intrinsics.ll
+++ b/test/CodeGen/XCore/llvm-intrinsics.ll
@@ -287,9 +287,8 @@ define void @Unwind1() {
 ; CHECKFP: .LBB{{[0-9_]+}}
 ; CHECKFP-NEXT: ldc r2, 40
 ; CHECKFP-NEXT: add r2, r10, r2
-; CHECKFP-NEXT: add r0, r2, r0
+; CHECKFP-NEXT: add r2, r2, r0
 ; CHECKFP-NEXT: mov r3, r1
-; CHECKFP-NEXT: mov r2, r0
 ; CHECKFP-NEXT: ldw r9, r10[4]
 ; CHECKFP-NEXT: ldw r8, r10[5]
 ; CHECKFP-NEXT: ldw r7, r10[6]
@@ -337,9 +336,8 @@ define void @Unwind1() {
 ; CHECK-NEXT: ldc r2, 36
 ; CHECK-NEXT: ldaw r3, sp[0]
 ; CHECK-NEXT: add r2, r3, r2
-; CHECK-NEXT: add r0, r2, r0
+; CHECK-NEXT: add r2, r2, r0
 ; CHECK-NEXT: mov r3, r1
-; CHECK-NEXT: mov r2, r0
 ; CHECK-NEXT: ldw r10, sp[2]
 ; CHECK-NEXT: ldw r9, sp[3]
 ; CHECK-NEXT: ldw r8, sp[4]
diff --git a/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll b/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll
index 6fd7887..c78b8b8 100644
--- a/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll
+++ b/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll
@@ -13,13 +13,11 @@ entry:
 !0 = metadata !{i32 720913, metadata !17, i32 12, metadata !"clang version 3.0 (trunk 139632)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !12, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, metadata !17, metadata !6, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 ()* @foo, null, null, metadata !10, i32 0} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 0] [foo]
+!5 = metadata !{i32 720942, metadata !17, metadata !6, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 0] [foo]
 !6 = metadata !{i32 720937, metadata !17} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9}
 !9 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!10 = metadata !{metadata !11}
-!11 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
 !12 = metadata !{metadata !14}
 !14 = metadata !{i32 720948, i32 0, metadata !5, metadata !"bar", metadata !"bar", metadata !"", metadata !6, i32 2, metadata !9, i32 1, i32 1, null, null} ; [ DW_TAG_variable ]
 !15 = metadata !{i32 3, i32 3, metadata !16, null}
diff --git a/test/DebugInfo/2009-11-06-NamelessGlobalVariable.ll b/test/DebugInfo/2009-11-06-NamelessGlobalVariable.ll
index 5a10459..9beab20 100644
--- a/test/DebugInfo/2009-11-06-NamelessGlobalVariable.ll
+++ b/test/DebugInfo/2009-11-06-NamelessGlobalVariable.ll
@@ -5,7 +5,7 @@
 !llvm.module.flags = !{!9}
 
 !0 = metadata !{i32 720913, metadata !8, i32 12, metadata !"clang version 3.0 (trunk 139632)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !2, metadata !3, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!2 = metadata !{i32 0}
+!2 = metadata !{}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 720948, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 2, metadata !7, i32 0, i32 1, i32* @0, null} ; [ DW_TAG_variable ]
 !6 = metadata !{i32 720937, metadata !8} ; [ DW_TAG_file_type ]
diff --git a/test/DebugInfo/2010-03-19-DbgDeclare.ll b/test/DebugInfo/2010-03-19-DbgDeclare.ll
index 0c0a4dc..94aa259 100644
--- a/test/DebugInfo/2010-03-19-DbgDeclare.ll
+++ b/test/DebugInfo/2010-03-19-DbgDeclare.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -verify -S -asm-verbose | FileCheck %s
+; RUN: opt < %s -verify -S | FileCheck %s
 
 ; CHECK: lang 0x8001
 
diff --git a/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll b/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll
index bec0318..5f7cb69 100644
--- a/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll
+++ b/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll
@@ -1,6 +1,22 @@
-; RUN: llvm-as < %s | %llc_dwarf -asm-verbose -O0 | grep AT_specification | count 2
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf -O0 -filetype=obj -o - < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
 ; Radar 7833483
-; Do not emit AT_specification for nested function foo.
+; Do not emit a separate out-of-line definition DIE for the function-local 'foo'
+; function (member of the function local 'A' type)
+; CHECK: DW_TAG_class_type
+; CHECK: DW_TAG_class_type
+; CHECK-NEXT: DW_AT_name {{.*}} "A"
+; Check that the subprogram inside the class definition has low_pc, only
+; attached to the definition.
+; CHECK: [[FOO_INL:0x........]]: DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_low_pc
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_MIPS_linkage_name {{.*}} "_ZZN1B2fnEvEN1A3fooEv"
+; And just double check that there's no out of line definition that references
+; this subprogram.
+; CHECK-NOT: DW_AT_specification {{.*}} {[[FOO_INL]]}
 
 %class.A = type { i8 }
 %class.B = type { i8 }
diff --git a/test/DebugInfo/2010-07-19-Crash.ll b/test/DebugInfo/2010-07-19-Crash.ll
index 6b6e61d..a10b10a 100644
--- a/test/DebugInfo/2010-07-19-Crash.ll
+++ b/test/DebugInfo/2010-07-19-Crash.ll
@@ -25,6 +25,6 @@ entry:
 !10 = metadata !{i32 524299, metadata !12, metadata !0, i32 3, i32 11, i32 0} ; [ DW_TAG_lexical_block ]
 !11 = metadata !{i32 524334, metadata !12, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", i32 7, metadata !3, i1 true, i1 false, i32 0, i32 0, null, i1 false, i1 true, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !12 = metadata !{metadata !"one.c", metadata !"/private/tmp"}
-!13 = metadata !{metadata !0, metadata !6, metadata !11}
+!13 = metadata !{metadata !0}
 !14 = metadata !{i32 0}
 !15 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/AArch64/cfi-frame.ll b/test/DebugInfo/AArch64/cfi-frame.ll
deleted file mode 100644
index 7290ddf..0000000
--- a/test/DebugInfo/AArch64/cfi-frame.ll
+++ /dev/null
@@ -1,58 +0,0 @@
-; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
-; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -disable-fp-elim < %s | FileCheck %s --check-prefix=CHECK-WITH-FP
-
-@bigspace = global [8 x i64] zeroinitializer
-
-declare void @use_addr(i8*)
-
-define void @test_frame([8 x i64] %val) {
-; CHECK: test_frame:
-; CHECK: .cfi_startproc
-
-  %var = alloca i8, i32 1000000
-; CHECK: sub sp, sp, #[[SP_INIT_ADJ:[0-9]+]]
-; CHECK-NEXT: .Ltmp
-; CHECK-NEXT: .cfi_def_cfa sp, [[SP_INIT_ADJ]]
-
-; Make sure the prologue is reasonably efficient
-; CHECK-NEXT: stp x29, x30, [sp,
-; CHECK-NEXT: stp x25, x26, [sp,
-; CHECK-NEXT: stp x23, x24, [sp,
-; CHECK-NEXT: stp x21, x22, [sp,
-; CHECK-NEXT: stp x19, x20, [sp,
-; CHECK-NEXT: sub sp, sp, #160
-; CHECK-NEXT: sub sp, sp, #244, lsl #12
-; CHECK-NEXT: .Ltmp
-; CHECK-NEXT: .cfi_def_cfa sp, 1000080
-; CHECK-NEXT: .Ltmp
-; CHECK-NEXT: .cfi_offset x30, -8
-; CHECK-NEXT: .Ltmp
-; CHECK-NEXT: .cfi_offset x29, -16
-; [...]
-; CHECK: .cfi_offset x19, -80
-
-; CHECK: bl use_addr
-  call void @use_addr(i8* %var)
-
-  store [8 x i64] %val, [8 x i64]* @bigspace
-  ret void
-; CHECK: ret
-; CHECK: .cfi_endproc
-}
-
-; CHECK-WITH-FP: test_frame:
-
-; CHECK-WITH-FP: sub sp, sp, #[[SP_INIT_ADJ:[0-9]+]]
-; CHECK-WITH-FP-NEXT: .Ltmp
-; CHECK-WITH-FP-NEXT: .cfi_def_cfa sp, [[SP_INIT_ADJ]]
-
-; CHECK-WITH-FP: stp x29, x30, [sp, [[OFFSET:#[0-9]+]]]
-; CHECK-WITH-FP-NEXT: add x29, sp, [[OFFSET]]
-; CHECK-WITH-FP-NEXT: .Ltmp
-; CHECK-WITH-FP-NEXT: .cfi_def_cfa x29, 16
-
-  ; We shouldn't emit any kind of update for the second stack adjustment if the
-  ; FP is in use.
-; CHECK-WITH-FP-NOT: .cfi_def_cfa_offset
-
-; CHECK-WITH-FP: bl use_addr
diff --git a/test/DebugInfo/AArch64/lit.local.cfg b/test/DebugInfo/AArch64/lit.local.cfg
index 9a66a00..a75a42b 100644
--- a/test/DebugInfo/AArch64/lit.local.cfg
+++ b/test/DebugInfo/AArch64/lit.local.cfg
@@ -1,4 +1,4 @@
 targets = set(config.root.targets_to_build.split())
-if not 'AArch64' in targets:
+if not 'ARM64' in targets:
     config.unsupported = True
 
diff --git a/test/DebugInfo/ARM64/struct_by_value.ll b/test/DebugInfo/AArch64/struct_by_value.ll
index 0023c3d..0023c3d 100644
--- a/test/DebugInfo/ARM64/struct_by_value.ll
+++ b/test/DebugInfo/AArch64/struct_by_value.ll
diff --git a/test/DebugInfo/AArch64/variable-loc.ll b/test/DebugInfo/AArch64/variable-loc.ll
deleted file mode 100644
index f28ee76..0000000
--- a/test/DebugInfo/AArch64/variable-loc.ll
+++ /dev/null
@@ -1,101 +0,0 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-fp-elim < %s | FileCheck %s
-
-; This is a regression test making sure the location of variables is correct in
-; debugging information, even if they're addressed via the frame pointer.
-
-; In case it needs, regenerating, the following suffices:
-; int printf(const char *, ...);
-; void populate_array(int *, int);
-; int sum_array(int *, int);
-
-; int main() {
-;     int main_arr[100], val;
-;     populate_array(main_arr, 100);
-;     val = sum_array(main_arr, 100);
-;     printf("Total is %d\n", val);
-;     return 0;
-; }
-
-  ; First make sure main_arr is where we expect it: sp + 4 == x29 - 412:
-; CHECK: main:
-; CHECK: sub sp, sp, #432
-; CHECK: stp x29, x30, [sp, #416]
-; CHECK: add x29, sp, #416
-; CHECK: add {{x[0-9]+}}, sp, #4
-
-; CHECK: .Linfo_string7:
-; CHECK-NEXT: main_arr
-
-; Now check the debugging information reflects this:
-; CHECK: DW_TAG_variable
-; CHECK-NEXT: .word .Linfo_string7
-
-  ; Rather hard-coded, but 145 => DW_OP_fbreg and the .ascii is LEB128 encoded -412.
-; CHECK: DW_AT_location
-; CHECK-NEXT: .byte 145
-; CHECK-NEXT: .ascii "\344|"
-
-
-
-target datalayout = "e-p:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-f128:128:128-n32:64-S128"
-target triple = "aarch64-none-linux-gnu"
-
-@.str = private unnamed_addr constant [13 x i8] c"Total is %d\0A\00", align 1
-
-declare void @populate_array(i32*, i32) nounwind
-
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-declare i32 @sum_array(i32*, i32) nounwind
-
-define i32 @main() nounwind {
-entry:
-  %retval = alloca i32, align 4
-  %main_arr = alloca [100 x i32], align 4
-  %val = alloca i32, align 4
-  store i32 0, i32* %retval
-  call void @llvm.dbg.declare(metadata !{[100 x i32]* %main_arr}, metadata !17), !dbg !22
-  call void @llvm.dbg.declare(metadata !{i32* %val}, metadata !23), !dbg !24
-  %arraydecay = getelementptr inbounds [100 x i32]* %main_arr, i32 0, i32 0, !dbg !25
-  call void @populate_array(i32* %arraydecay, i32 100), !dbg !25
-  %arraydecay1 = getelementptr inbounds [100 x i32]* %main_arr, i32 0, i32 0, !dbg !26
-  %call = call i32 @sum_array(i32* %arraydecay1, i32 100), !dbg !26
-  store i32 %call, i32* %val, align 4, !dbg !26
-  %0 = load i32* %val, align 4, !dbg !27
-  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0), i32 %0), !dbg !27
-  ret i32 0, !dbg !28
-}
-
-declare i32 @printf(i8*, ...)
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!30}
-
-!0 = metadata !{i32 786449, metadata !29, i32 12, metadata !"clang version 3.2 ", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/home/timnor01/a64-trunk/build/simple.c] [DW_LANG_C99]
-!1 = metadata !{}
-!3 = metadata !{metadata !5, metadata !11, metadata !14}
-!5 = metadata !{i32 786478, metadata !29, metadata !6, metadata !"populate_array", metadata !"populate_array", metadata !"", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32*, i32)* @populate_array, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [populate_array]
-!6 = metadata !{i32 786473, metadata !29} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{null, metadata !9, metadata !10}
-!9 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
-!10 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!11 = metadata !{i32 786478, metadata !29, metadata !6, metadata !"sum_array", metadata !"sum_array", metadata !"", i32 9, metadata !12, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32*, i32)* @sum_array, null, null, metadata !1, i32 9} ; [ DW_TAG_subprogram ] [line 9] [def] [sum_array]
-!12 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!13 = metadata !{metadata !10, metadata !9, metadata !10}
-!14 = metadata !{i32 786478, metadata !29, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 18, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !1, i32 18} ; [ DW_TAG_subprogram ] [line 18] [def] [main]
-!15 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!16 = metadata !{metadata !10}
-!17 = metadata !{i32 786688, metadata !18, metadata !"main_arr", metadata !6, i32 19, metadata !19, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [main_arr] [line 19]
-!18 = metadata !{i32 786443, metadata !29, metadata !14, i32 18, i32 16, i32 4} ; [ DW_TAG_lexical_block ] [/home/timnor01/a64-trunk/build/simple.c]
-!19 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 3200, i64 32, i32 0, i32 0, metadata !10, metadata !20, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 3200, align 32, offset 0] [from int]
-!20 = metadata !{i32 786465, i64 0, i64 99}       ; [ DW_TAG_subrange_type ] [0, 99]
-!22 = metadata !{i32 19, i32 7, metadata !18, null}
-!23 = metadata !{i32 786688, metadata !18, metadata !"val", metadata !6, i32 20, metadata !10, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [val] [line 20]
-!24 = metadata !{i32 20, i32 7, metadata !18, null}
-!25 = metadata !{i32 22, i32 3, metadata !18, null}
-!26 = metadata !{i32 23, i32 9, metadata !18, null}
-!27 = metadata !{i32 24, i32 3, metadata !18, null}
-!28 = metadata !{i32 26, i32 3, metadata !18, null}
-!29 = metadata !{metadata !"simple.c", metadata !"/home/timnor01/a64-trunk/build"}
-!30 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/ARM64/lit.local.cfg b/test/DebugInfo/ARM64/lit.local.cfg
deleted file mode 100644
index a75a42b..0000000
--- a/test/DebugInfo/ARM64/lit.local.cfg
+++ /dev/null
@@ -1,4 +0,0 @@
-targets = set(config.root.targets_to_build.split())
-if not 'ARM64' in targets:
-    config.unsupported = True
-
diff --git a/test/DebugInfo/COFF/asm.ll b/test/DebugInfo/COFF/asm.ll
index 1ce3681..8c9dff0 100644
--- a/test/DebugInfo/COFF/asm.ll
+++ b/test/DebugInfo/COFF/asm.ll
@@ -21,7 +21,7 @@
 ; X86-NEXT: ret
 ; X86-NEXT: [[END_OF_F:.*]]:
 ;
-; X86-LABEL: .section        .debug$S,"rn"
+; X86-LABEL: .section        .debug$S,"rnd"
 ; X86-NEXT: .long   4
 ; X86-NEXT: .long   242
 ; X86-NEXT: .long [[F2_END:.*]]-[[F2_START:.*]]
@@ -90,7 +90,7 @@
 ; X64-NEXT: ret
 ; X64-NEXT: [[END_OF_F:.*]]:
 ;
-; X64-LABEL: .section        .debug$S,"rn"
+; X64-LABEL: .section        .debug$S,"rnd"
 ; X64-NEXT: .long   4
 ; X64-NEXT: .long   242
 ; X64-NEXT: .long [[F2_END:.*]]-[[F2_START:.*]]
diff --git a/test/DebugInfo/COFF/multifile.ll b/test/DebugInfo/COFF/multifile.ll
index 53a645e..c04bdb3 100644
--- a/test/DebugInfo/COFF/multifile.ll
+++ b/test/DebugInfo/COFF/multifile.ll
@@ -28,7 +28,7 @@
 ; X86-NEXT: ret
 ; X86-NEXT: [[END_OF_F:.*]]:
 ;
-; X86-LABEL: .section        .debug$S,"rn"
+; X86-LABEL: .section        .debug$S,"rnd"
 ; X86-NEXT: .long   4
 ; X86-NEXT: .long   242
 ; X86-NEXT: .long [[F2_END:.*]]-[[F2_START:.*]]
@@ -122,7 +122,7 @@
 ; X64-NEXT: ret
 ; X64-NEXT: [[END_OF_F:.*]]:
 ;
-; X64-LABEL: .section        .debug$S,"rn"
+; X64-LABEL: .section        .debug$S,"rnd"
 ; X64-NEXT: .long   4
 ; X64-NEXT: .long   242
 ; X64-NEXT: .long [[F2_END:.*]]-[[F2_START:.*]]
diff --git a/test/DebugInfo/COFF/multifunction.ll b/test/DebugInfo/COFF/multifunction.ll
index d664716..5a65558 100644
--- a/test/DebugInfo/COFF/multifunction.ll
+++ b/test/DebugInfo/COFF/multifunction.ll
@@ -50,7 +50,7 @@
 ; X86-NEXT: ret
 ; X86-NEXT: [[END_OF_F:.*]]:
 ;
-; X86-LABEL: .section        .debug$S,"rn"
+; X86-LABEL: .section        .debug$S,"rnd"
 ; X86-NEXT: .long   4
 ; Line table subsection for x
 ; X86-NEXT: .long   242
@@ -200,7 +200,7 @@
 ; X64-NEXT: ret
 ; X64-NEXT: [[END_OF_F:.*]]:
 ;
-; X64-LABEL: .section        .debug$S,"rn"
+; X64-LABEL: .section        .debug$S,"rnd"
 ; X64-NEXT: .long   4
 ; Line table subsection for x
 ; X64-NEXT: .long   242
diff --git a/test/DebugInfo/COFF/simple.ll b/test/DebugInfo/COFF/simple.ll
index 8fa6870..2613a18 100644
--- a/test/DebugInfo/COFF/simple.ll
+++ b/test/DebugInfo/COFF/simple.ll
@@ -19,7 +19,7 @@
 ; X86-NEXT: ret
 ; X86-NEXT: [[END_OF_F:.*]]:
 ;
-; X86-LABEL: .section        .debug$S,"rn"
+; X86-LABEL: .section        .debug$S,"rnd"
 ; X86-NEXT: .long   4
 ; X86-NEXT: .long   242
 ; X86-NEXT: .long [[F2_END:.*]]-[[F2_START:.*]]
@@ -81,7 +81,7 @@
 ; X64-NEXT: ret
 ; X64-NEXT: [[END_OF_F:.*]]:
 ;
-; X64-LABEL: .section        .debug$S,"rn"
+; X64-LABEL: .section        .debug$S,"rnd"
 ; X64-NEXT: .long   4
 ; X64-NEXT: .long   242
 ; X64-NEXT: .long [[F2_END:.*]]-[[F2_START:.*]]
diff --git a/test/DebugInfo/COFF/tail-call-without-lexical-scopes.ll b/test/DebugInfo/COFF/tail-call-without-lexical-scopes.ll
index f5e2eae..4d2e427 100644
--- a/test/DebugInfo/COFF/tail-call-without-lexical-scopes.ll
+++ b/test/DebugInfo/COFF/tail-call-without-lexical-scopes.ll
@@ -22,7 +22,7 @@
 ; X86-NEXT: [[END_OF_BAR:^L.*]]:{{$}}
 ; X86-NOT:  ret
 
-; X86-LABEL: .section        .debug$S,"rn"
+; X86-LABEL: .section        .debug$S,"rnd"
 ; X86:       .secrel32 "?bar@@YAXHZZ"
 ; X86-NEXT:  .secidx   "?bar@@YAXHZZ"
 ; X86:       .long   0
diff --git a/test/DebugInfo/Inputs/llvm-symbolizer-dwo-test b/test/DebugInfo/Inputs/llvm-symbolizer-dwo-test
new file mode 100755
index 0000000..c28c3d2
--- /dev/null
+++ b/test/DebugInfo/Inputs/llvm-symbolizer-dwo-test
diff --git a/test/DebugInfo/Inputs/llvm-symbolizer-dwo-test.cc b/test/DebugInfo/Inputs/llvm-symbolizer-dwo-test.cc
new file mode 100644
index 0000000..ea0967a
--- /dev/null
+++ b/test/DebugInfo/Inputs/llvm-symbolizer-dwo-test.cc
@@ -0,0 +1,18 @@
+int f(int a, int b) {
+  return a + b;
+}
+
+int g(int a) {
+  return a + 1;
+}
+
+
+int main() {
+  return f(2, g(2));
+}
+
+// Built with Clang 3.5.0:
+// $ mkdir -p /tmp/dbginfo
+// $ cp llvm-symbolizer-dwo-test.cc /tmp/dbginfo
+// $ cd /tmp/dbginfo
+// $ clang -gsplit-dwarf llvm-symbolizer-dwo-test.cc
diff --git a/test/DebugInfo/Mips/delay-slot.ll b/test/DebugInfo/Mips/delay-slot.ll
new file mode 100644
index 0000000..9bce4ba
--- /dev/null
+++ b/test/DebugInfo/Mips/delay-slot.ll
@@ -0,0 +1,75 @@
+; RUN: llc -filetype=obj -O0 < %s -mtriple mips-unknown-linux-gnu | llvm-dwarfdump - | FileCheck %s
+; PR19815
+
+; Generated using clang -target mips-linux-gnu -g test.c -S -o - -flto|opt -sroa -S
+; test.c:
+;
+; int foo(int x) {
+;  if (x)
+;    return 0;
+;  return 1;
+; }
+
+; CHECK: Address            Line   Column File   ISA Discriminator Flags
+; CHECK: ------------------ ------ ------ ------ --- ------------- -------------
+; CHECK: 0x0000000000000000      1      0      1   0             0  is_stmt
+; CHECK: 0x0000000000000000      1      0      1   0             0  is_stmt prologue_end
+; CHECK: 0x0000000000000008      2      0      1   0             0  is_stmt
+; CHECK: 0x0000000000000020      3      0      1   0             0  is_stmt
+; CHECK: 0x0000000000000030      4      0      1   0             0  is_stmt
+; CHECK: 0x0000000000000040      5      0      1   0             0  is_stmt
+; CHECK: 0x0000000000000050      5      0      1   0             0  is_stmt end_sequence
+
+target datalayout = "E-m:m-p:32:32-i8:8:32-i16:16:32-i64:64-n32-S64"
+target triple = "mips--linux-gnu"
+
+; Function Attrs: nounwind
+define i32 @foo(i32 %x) #0 {
+entry:
+  call void @llvm.dbg.value(metadata !{i32 %x}, i64 0, metadata !12), !dbg !13
+  %tobool = icmp ne i32 %x, 0, !dbg !14
+  br i1 %tobool, label %if.then, label %if.end, !dbg !14
+
+if.then:                                          ; preds = %entry
+  br label %return, !dbg !16
+
+if.end:                                           ; preds = %entry
+  br label %return, !dbg !17
+
+return:                                           ; preds = %if.end, %if.then
+  %retval.0 = phi i32 [ 0, %if.then ], [ 1, %if.end ]
+  ret i32 %retval.0, !dbg !18
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9, !10}
+!llvm.ident = !{!11}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/test.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"test.c", metadata !"/tmp"}
+!2 = metadata !{}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @foo, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/test.c]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{metadata !8, metadata !8}
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!10 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
+!11 = metadata !{metadata !"clang version 3.5.0"}
+!12 = metadata !{i32 786689, metadata !4, metadata !"x", metadata !5, i32 16777217, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [x] [line 1]
+!13 = metadata !{i32 1, i32 0, metadata !4, null}
+!14 = metadata !{i32 2, i32 0, metadata !15, null}
+!15 = metadata !{i32 786443, metadata !1, metadata !4, i32 2, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/tmp/test.c]
+!16 = metadata !{i32 3, i32 0, metadata !15, null}
+!17 = metadata !{i32 4, i32 0, metadata !4, null}
+!18 = metadata !{i32 5, i32 0, metadata !4, null}
diff --git a/test/DebugInfo/Mips/lit.local.cfg b/test/DebugInfo/Mips/lit.local.cfg
new file mode 100644
index 0000000..88262fb
--- /dev/null
+++ b/test/DebugInfo/Mips/lit.local.cfg
@@ -0,0 +1,3 @@
+targets = set(config.root.targets_to_build.split())
+if not 'Mips' in targets:
+    config.unsupported = True
diff --git a/test/DebugInfo/SystemZ/variable-loc.ll b/test/DebugInfo/SystemZ/variable-loc.ll
index 2d92fd9..e0e4156 100644
--- a/test/DebugInfo/SystemZ/variable-loc.ll
+++ b/test/DebugInfo/SystemZ/variable-loc.ll
@@ -1,4 +1,6 @@
 ; RUN: llc -mtriple=s390x-linux-gnu -disable-fp-elim < %s | FileCheck %s
+; RUN: llc -mtriple=s390x-linux-gnu -disable-fp-elim -filetype=obj < %s \
+; RUN:     | llvm-dwarfdump -debug-dump=info - | FileCheck --check-prefix=DEBUG %s
 ;
 ; This is a regression test making sure the location of variables is correct in
 ; debugging information, even if they're addressed via the frame pointer.
@@ -10,20 +12,13 @@
 ; CHECK: aghi    %r15, -568
 ; CHECK: la      %r2, 164(%r11)
 ; CHECK: brasl   %r14, populate_array@PLT
-;
-; CHECK: .Linfo_string7:
-; CHECK-NEXT: main_arr
-;
-; Now check that the debugging information reflects this:
-; CHECK: DW_TAG_variable
-; CHECK-NEXT: .long .Linfo_string7
-;
-; Rather hard-coded, but 145 => DW_OP_fbreg and the .ascii is the sleb128
-; encoding of 164:
-; CHECK: DW_AT_location
-; CHECK-NEXT: .byte 145
-; CHECK-NEXT: .ascii "\244\001"
-;
+
+; DEBUG: DW_TAG_variable
+; DEBUG-NOT: DW_TAG
+; DEBUG: DW_AT_name {{.*}} "main_arr"
+; Rather hard-coded, but 0x91 => DW_OP_fbreg and 0xa401 is SLEB128 encoded 164.
+; DEBUG-NOT: DW_TAG
+; DEBUG: DW_AT_location {{.*}}(<0x3> 91 a4 01 )
 
 
 @.str = private unnamed_addr constant [13 x i8] c"Total is %d\0A\00", align 2
diff --git a/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll b/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll
index 0c90587..1bbfbf4 100644
--- a/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll
+++ b/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll
@@ -22,13 +22,11 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !0 = metadata !{i32 786449, metadata !20, i32 12, metadata !"clang version 3.0 (trunk)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !12,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, metadata !6, metadata !6, metadata !"f", metadata !"f", metadata !"", i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @f, null, null, metadata !10, i32 0} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 0] [f]
+!5 = metadata !{i32 720942, metadata !6, metadata !6, metadata !"f", metadata !"f", metadata !"", i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @f, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 0] [f]
 !6 = metadata !{i32 720937, metadata !20} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9}
 !9 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!10 = metadata !{metadata !11}
-!11 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
 !12 = metadata !{metadata !14}
 !14 = metadata !{i32 720948, i32 0, null, metadata !"GLB", metadata !"GLB", metadata !"", metadata !6, i32 1, metadata !9, i32 0, i32 1, i32* @GLB, null} ; [ DW_TAG_variable ]
 !15 = metadata !{i32 786688, metadata !16, metadata !"LOC", metadata !6, i32 4, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
diff --git a/test/DebugInfo/X86/2011-12-16-BadStructRef.ll b/test/DebugInfo/X86/2011-12-16-BadStructRef.ll
index 8898cf1..21dccd7 100644
--- a/test/DebugInfo/X86/2011-12-16-BadStructRef.ll
+++ b/test/DebugInfo/X86/2011-12-16-BadStructRef.ll
@@ -100,22 +100,18 @@ entry:
 !10 = metadata !{metadata !11, metadata !13}
 !11 = metadata !{i32 720909, metadata !82, metadata !9, metadata !"h", i32 5, i64 32, i64 32, i64 0, i32 0, metadata !12} ; [ DW_TAG_member ]
 !12 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!13 = metadata !{i32 720942, metadata !82, metadata !9, metadata !"baz", metadata !"baz", metadata !"", i32 6, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !17, i32 0} ; [ DW_TAG_subprogram ]
+!13 = metadata !{i32 720942, metadata !82, metadata !9, metadata !"baz", metadata !"baz", metadata !"", i32 6, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, null, i32 0} ; [ DW_TAG_subprogram ]
 !14 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !15, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !15 = metadata !{null, metadata !16, metadata !12}
 !16 = metadata !{i32 720911, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !9} ; [ DW_TAG_pointer_type ]
-!17 = metadata !{metadata !18}
-!18 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
 !19 = metadata !{i32 720909, metadata !82, metadata !5, metadata !"b_ref", i32 12, i64 64, i64 64, i64 64, i32 0, metadata !20} ; [ DW_TAG_member ]
 !20 = metadata !{i32 720912, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_reference_type ]
-!21 = metadata !{i32 720942, metadata !82, metadata !5, metadata !"bar", metadata !"bar", metadata !"", i32 13, metadata !22, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !25, i32 0} ; [ DW_TAG_subprogram ]
+!21 = metadata !{i32 720942, metadata !82, metadata !5, metadata !"bar", metadata !"bar", metadata !"", i32 13, metadata !22, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, null, i32 0} ; [ DW_TAG_subprogram ]
 !22 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !23, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !23 = metadata !{null, metadata !24, metadata !12}
 !24 = metadata !{i32 720911, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !5} ; [ DW_TAG_pointer_type ]
-!25 = metadata !{metadata !26}
-!26 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
 !27 = metadata !{metadata !29, metadata !37, metadata !40, metadata !43, metadata !46}
-!29 = metadata !{i32 720942, metadata !82, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 17, metadata !30, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32, i8**)* @main, null, null, metadata !47, i32 0} ; [ DW_TAG_subprogram ] [line 17] [def] [scope 0] [main]
+!29 = metadata !{i32 720942, metadata !82, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 17, metadata !30, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32, i8**)* @main, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 17] [def] [scope 0] [main]
 !30 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !31, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !31 = metadata !{metadata !12, metadata !12, metadata !32}
 !32 = metadata !{i32 720911, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !33} ; [ DW_TAG_pointer_type ]
@@ -123,18 +119,16 @@ entry:
 !34 = metadata !{i32 720932, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
 !35 = metadata !{metadata !36}
 !36 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!37 = metadata !{i32 720942, metadata !82, null, metadata !"bar", metadata !"bar", metadata !"_ZN3barC1Ei", i32 13, metadata !22, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.bar*, i32)* @_ZN3barC1Ei, null, metadata !21, metadata !47, i32 0} ; [ DW_TAG_subprogram ] [line 13] [def] [scope 0] [bar]
+!37 = metadata !{i32 720942, metadata !82, null, metadata !"bar", metadata !"bar", metadata !"_ZN3barC1Ei", i32 13, metadata !22, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.bar*, i32)* @_ZN3barC1Ei, null, metadata !21, null, i32 0} ; [ DW_TAG_subprogram ] [line 13] [def] [scope 0] [bar]
 !38 = metadata !{metadata !39}
 !39 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!40 = metadata !{i32 720942, metadata !82, null, metadata !"bar", metadata !"bar", metadata !"_ZN3barC2Ei", i32 13, metadata !22, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.bar*, i32)* @_ZN3barC2Ei, null, metadata !21, metadata !47, i32 0} ; [ DW_TAG_subprogram ] [line 13] [def] [scope 0] [bar]
+!40 = metadata !{i32 720942, metadata !82, null, metadata !"bar", metadata !"bar", metadata !"_ZN3barC2Ei", i32 13, metadata !22, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.bar*, i32)* @_ZN3barC2Ei, null, metadata !21, null, i32 0} ; [ DW_TAG_subprogram ] [line 13] [def] [scope 0] [bar]
 !41 = metadata !{metadata !42}
 !42 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!43 = metadata !{i32 720942, metadata !82, null, metadata !"baz", metadata !"baz", metadata !"_ZN3bazC1Ei", i32 6, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.baz*, i32)* @_ZN3bazC1Ei, null, metadata !13, metadata !47, i32 0} ; [ DW_TAG_subprogram ] [line 6] [def] [scope 0] [baz]
+!43 = metadata !{i32 720942, metadata !82, null, metadata !"baz", metadata !"baz", metadata !"_ZN3bazC1Ei", i32 6, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.baz*, i32)* @_ZN3bazC1Ei, null, metadata !13, null, i32 0} ; [ DW_TAG_subprogram ] [line 6] [def] [scope 0] [baz]
 !44 = metadata !{metadata !45}
 !45 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!46 = metadata !{i32 720942, metadata !82, null, metadata !"baz", metadata !"baz", metadata !"_ZN3bazC2Ei", i32 6, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.baz*, i32)* @_ZN3bazC2Ei, null, metadata !13, metadata !47, i32 0} ; [ DW_TAG_subprogram ] [line 6] [def] [scope 0] [baz]
-!47 = metadata !{metadata !48}
-!48 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
+!46 = metadata !{i32 720942, metadata !82, null, metadata !"baz", metadata !"baz", metadata !"_ZN3bazC2Ei", i32 6, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.baz*, i32)* @_ZN3bazC2Ei, null, metadata !13, null, i32 0} ; [ DW_TAG_subprogram ] [line 6] [def] [scope 0] [baz]
 !49 = metadata !{i32 721153, metadata !29, metadata !"argc", metadata !6, i32 16777232, metadata !12, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
 !50 = metadata !{i32 16, i32 14, metadata !29, null}
 !51 = metadata !{i32 721153, metadata !29, metadata !"argv", metadata !6, i32 33554448, metadata !32, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
diff --git a/test/DebugInfo/X86/DW_AT_byte_size.ll b/test/DebugInfo/X86/DW_AT_byte_size.ll
index 6884c41..59921bd 100644
--- a/test/DebugInfo/X86/DW_AT_byte_size.ll
+++ b/test/DebugInfo/X86/DW_AT_byte_size.ll
@@ -29,7 +29,7 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !0 = metadata !{i32 786449, metadata !20, i32 4, metadata !"clang version 3.1 (trunk 150996)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, metadata !20, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3fooP1A", i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (%struct.A*)* @_Z3fooP1A, null, null, metadata !14, i32 3} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 786478, metadata !20, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3fooP1A", i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (%struct.A*)* @_Z3fooP1A, null, null, null, i32 3} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9, metadata !10}
@@ -38,8 +38,6 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !11 = metadata !{i32 786434, metadata !20, null, metadata !"A", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 32, align 32, offset 0] [def] [from ]
 !12 = metadata !{metadata !13}
 !13 = metadata !{i32 786445, metadata !20, metadata !11, metadata !"b", i32 1, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ]
-!14 = metadata !{metadata !15}
-!15 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
 !16 = metadata !{i32 786689, metadata !5, metadata !"a", metadata !6, i32 16777219, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
 !17 = metadata !{i32 3, i32 13, metadata !5, null}
 !18 = metadata !{i32 4, i32 3, metadata !19, null}
diff --git a/test/DebugInfo/X86/DW_AT_linkage_name.ll b/test/DebugInfo/X86/DW_AT_linkage_name.ll
new file mode 100644
index 0000000..dce234a
--- /dev/null
+++ b/test/DebugInfo/X86/DW_AT_linkage_name.ll
@@ -0,0 +1,116 @@
+; RUN: llc -mtriple=x86_64-apple-macosx %s -o %t -filetype=obj
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
+;
+; struct A {
+;   A(int i);
+;   ~A();
+; };
+;
+; A::~A() {}
+;
+; void foo() {
+;   A a(1);
+; }
+;
+; rdar://problem/16362674
+;
+; Test that we do not emit a linkage name for the declaration of a destructor.
+; Test that we do emit a linkage name for a specific instance of it.
+
+; CHECK: DW_TAG_subprogram
+; CHECK: [[A_DTOR:.*]]:     DW_TAG_subprogram
+; CHECK: DW_AT_name {{.*}} "~A"
+; CHECK-NOT: DW_AT_MIPS_linkage_name
+; CHECK: DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_MIPS_linkage_name {{.*}} "_ZN1AD2Ev"
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_specification {{.*}}[[A_DTOR]]
+
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+%struct.A = type { i8 }
+
+; Function Attrs: nounwind ssp uwtable
+define void @_ZN1AD2Ev(%struct.A* %this) unnamed_addr #0 align 2 {
+entry:
+  %this.addr = alloca %struct.A*, align 8
+  store %struct.A* %this, %struct.A** %this.addr, align 8
+  call void @llvm.dbg.declare(metadata !{%struct.A** %this.addr}, metadata !26), !dbg !28
+  %this1 = load %struct.A** %this.addr
+  ret void, !dbg !29
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+; Function Attrs: nounwind ssp uwtable
+define void @_ZN1AD1Ev(%struct.A* %this) unnamed_addr #0 align 2 {
+entry:
+  %this.addr = alloca %struct.A*, align 8
+  store %struct.A* %this, %struct.A** %this.addr, align 8
+  call void @llvm.dbg.declare(metadata !{%struct.A** %this.addr}, metadata !30), !dbg !31
+  %this1 = load %struct.A** %this.addr
+  call void @_ZN1AD2Ev(%struct.A* %this1), !dbg !32
+  ret void, !dbg !33
+}
+
+; Function Attrs: ssp uwtable
+define void @_Z3foov() #2 {
+entry:
+  %a = alloca %struct.A, align 1
+  call void @llvm.dbg.declare(metadata !{%struct.A* %a}, metadata !34), !dbg !35
+  call void @_ZN1AC1Ei(%struct.A* %a, i32 1), !dbg !35
+  call void @_ZN1AD1Ev(%struct.A* %a), !dbg !36
+  ret void, !dbg !36
+}
+
+declare void @_ZN1AC1Ei(%struct.A*, i32)
+
+attributes #0 = { nounwind ssp uwtable }
+attributes #1 = { nounwind readnone }
+attributes #2 = { ssp uwtable }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!23, !24}
+!llvm.ident = !{!25}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !16, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [linkage-name.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"linkage-name.cpp", metadata !""}
+!2 = metadata !{}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786451, metadata !1, null, metadata !"A", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !5, i32 0, null, null, metadata !"_ZTS1A"} ; [ DW_TAG_structure_type ] [A] [line 1, size 8, align 8, offset 0] [def] [from ]
+!5 = metadata !{metadata !6, metadata !12}
+!6 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1A", metadata !"A", metadata !"A", metadata !"", i32 2, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !11, i32 2} ; [ DW_TAG_subprogram ] [line 2] [A]
+!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{null, metadata !9, metadata !10}
+!9 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !"_ZTS1A"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1A]
+!10 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!11 = metadata !{i32 786468}
+!12 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1A", metadata !"~A", metadata !"~A", metadata !"", i32 3, metadata !13, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !15, i32 3} ; [ DW_TAG_subprogram ] [line 3] [~A]
+!13 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!14 = metadata !{null, metadata !9}
+!15 = metadata !{i32 786468}
+!16 = metadata !{metadata !17, metadata !18, metadata !19}
+!17 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1A", metadata !"~A", metadata !"~A", metadata !"_ZN1AD2Ev", i32 6, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.A*)* @_ZN1AD2Ev, null, metadata !12, metadata !2, i32 6} ; [ DW_TAG_subprogram ] [line 6] [def] [~A]
+!18 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1A", metadata !"~A", metadata !"~A", metadata !"_ZN1AD1Ev", i32 6, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.A*)* @_ZN1AD1Ev, null, metadata !12, metadata !2, i32 6} ; [ DW_TAG_subprogram ] [line 6] [def] [~A]
+!19 = metadata !{i32 786478, metadata !1, metadata !20, metadata !"foo", metadata !"foo", metadata !"_Z3foov", i32 10, metadata !21, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3foov, null, null, metadata !2, i32 10} ; [ DW_TAG_subprogram ] [line 10] [def] [foo]
+!20 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [linkage-name.cpp]
+!21 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !22, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!22 = metadata !{null}
+!23 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
+!24 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!25 = metadata !{metadata !"clang version 3.5.0 "}
+!26 = metadata !{i32 786689, metadata !17, metadata !"this", null, i32 16777216, metadata !27, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!27 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !"_ZTS1A"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS1A]
+!28 = metadata !{i32 0, i32 0, metadata !17, null}
+!29 = metadata !{i32 8, i32 0, metadata !17, null} ; [ DW_TAG_imported_declaration ]
+!30 = metadata !{i32 786689, metadata !18, metadata !"this", null, i32 16777216, metadata !27, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!31 = metadata !{i32 0, i32 0, metadata !18, null}
+!32 = metadata !{i32 6, i32 0, metadata !18, null}
+!33 = metadata !{i32 8, i32 0, metadata !18, null} ; [ DW_TAG_imported_declaration ]
+!34 = metadata !{i32 786688, metadata !19, metadata !"a", metadata !20, i32 11, metadata !"_ZTS1A", i32 0, i32 0} ; [ DW_TAG_auto_variable ] [a] [line 11]
+!35 = metadata !{i32 11, i32 0, metadata !19, null}
+!36 = metadata !{i32 12, i32 0, metadata !19, null}
diff --git a/test/DebugInfo/X86/DW_AT_location-reference.ll b/test/DebugInfo/X86/DW_AT_location-reference.ll
index 4bdfd6f..6c5e32c0 100644
--- a/test/DebugInfo/X86/DW_AT_location-reference.ll
+++ b/test/DebugInfo/X86/DW_AT_location-reference.ll
@@ -1,8 +1,6 @@
 ; RUN: llc -O1 -filetype=obj -mtriple=x86_64-apple-darwin < %s > %t
 ; RUN: llvm-dwarfdump %t  | FileCheck %s
-; FIXME: llvm-objdump is failing with an error when parsing some relocations
-; here, though it doesn't seem to adversely affect the test
-; RUN: not llvm-objdump -r %t | FileCheck -check-prefix=DARWIN %s
+; RUN: llvm-objdump -r %t | FileCheck -check-prefix=DARWIN %s
 ; RUN: llc -O1 -filetype=obj -mtriple=x86_64-pc-linux-gnu < %s > %t
 ; RUN: llvm-dwarfdump %t  | FileCheck %s
 ; RUN: llvm-objdump -r %t | FileCheck -check-prefix=LINUX %s
diff --git a/test/DebugInfo/X86/DW_AT_specification.ll b/test/DebugInfo/X86/DW_AT_specification.ll
index c1e7d9c..b93cdf0 100644
--- a/test/DebugInfo/X86/DW_AT_specification.ll
+++ b/test/DebugInfo/X86/DW_AT_specification.ll
@@ -3,10 +3,10 @@
 
 ; test that the DW_AT_specification is a back edge in the file.
 
-; CHECK: DW_TAG_subprogram [{{[0-9]+}}] *
-; CHECK: DW_AT_specification [DW_FORM_ref4]      (cu + 0x[[OFFSET:[0-9a-f]*]] => {0x0000[[OFFSET]]})
-; CHECK: 0x0000[[OFFSET]]: DW_TAG_subprogram [{{[0-9]+}}] *
-; CHECK: DW_AT_name [DW_FORM_strp]	( .debug_str[0x{{[0-9a-f]*}}] = "bar")
+; CHECK: [[BAR_DECL:0x[0-9a-f]*]]: DW_TAG_subprogram
+; CHECK-NEXT: DW_AT_MIPS_linkage_name {{.*}} "_ZN3foo3barEv"
+; CHECK: DW_TAG_subprogram
+; CHECK-NEXT: DW_AT_specification {{.*}} {[[BAR_DECL]]}
 
 
 @_ZZN3foo3barEvE1x = constant i32 0, align 4
@@ -22,19 +22,15 @@ entry:
 !0 = metadata !{i32 786449, metadata !27, i32 4, metadata !"clang version 3.0 ()", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !18,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, metadata !6, null, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_ZN3foo3barEv, null, metadata !11, metadata !16, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [bar]
+!5 = metadata !{i32 720942, metadata !6, null, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_ZN3foo3barEv, null, metadata !11, null, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [bar]
 !6 = metadata !{i32 720937, metadata !27} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{null, metadata !9}
 !9 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !10} ; [ DW_TAG_pointer_type ]
 !10 = metadata !{i32 786451, metadata !27, null, metadata !"foo", i32 1, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [foo] [line 1, size 0, align 0, offset 0] [decl] [from ]
-!11 = metadata !{i32 720942, metadata !6, metadata !12, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", i32 2, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !14, i32 2} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 720942, metadata !6, metadata !12, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", i32 2, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, null, i32 2} ; [ DW_TAG_subprogram ]
 !12 = metadata !{i32 720898, metadata !27, null, metadata !"foo", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !13, i32 0, null, null} ; [ DW_TAG_class_type ]
 !13 = metadata !{metadata !11}
-!14 = metadata !{metadata !15}
-!15 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!16 = metadata !{metadata !17}
-!17 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
 !18 = metadata !{metadata !20}
 !20 = metadata !{i32 720948, i32 0, metadata !5, metadata !"x", metadata !"x", metadata !"", metadata !6, i32 5, metadata !21, i32 1, i32 1, i32* @_ZZN3foo3barEvE1x, null} ; [ DW_TAG_variable ]
 !21 = metadata !{i32 720934, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !22} ; [ DW_TAG_const_type ]
diff --git a/test/DebugInfo/X86/arguments.ll b/test/DebugInfo/X86/arguments.ll
index 6735284..3597b2c 100644
--- a/test/DebugInfo/X86/arguments.ll
+++ b/test/DebugInfo/X86/arguments.ll
@@ -15,7 +15,8 @@
 
 ; CHECK: debug_info contents
 ; CHECK: DW_TAG_subprogram
-; CHECK-NEXT: DW_AT_MIPS_linkage_name{{.*}}"_Z4func3fooS_"
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_MIPS_linkage_name{{.*}}"_Z4func3fooS_"
 ; CHECK-NOT: NULL
 ; CHECK: DW_TAG_formal_parameter
 ; CHECK-NEXT: DW_AT_name{{.*}}"f"
diff --git a/test/DebugInfo/X86/array.ll b/test/DebugInfo/X86/array.ll
new file mode 100644
index 0000000..dc6c7a4
--- /dev/null
+++ b/test/DebugInfo/X86/array.ll
@@ -0,0 +1,101 @@
+; ModuleID = 'array.c'
+;
+; From (clang -g -c -O1):
+;
+; void f(int* p) {
+;   p[0] = 42;
+; }
+;
+; int main(int argc, char** argv) {
+;   int array[4] = { 0, 1, 2, 3 };
+;   f(array);
+;   return array[0];
+; }
+;
+; RUN: llc -filetype=asm %s -o - | FileCheck %s
+; Test that we only emit register-indirect locations for the array array.
+; rdar://problem/14874886
+;
+; CHECK:     ##DEBUG_VALUE: main:array <- [R{{.*}}+0]
+; CHECK-NOT: ##DEBUG_VALUE: main:array <- R{{.*}}
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+@main.array = private unnamed_addr constant [4 x i32] [i32 0, i32 1, i32 2, i32 3], align 16
+
+; Function Attrs: nounwind ssp uwtable
+define void @f(i32* nocapture %p) #0 {
+  tail call void @llvm.dbg.value(metadata !{i32* %p}, i64 0, metadata !11), !dbg !28
+  store i32 42, i32* %p, align 4, !dbg !29, !tbaa !30
+  ret void, !dbg !34
+}
+
+; Function Attrs: nounwind ssp uwtable
+define i32 @main(i32 %argc, i8** nocapture readnone %argv) #0 {
+  %array = alloca [4 x i32], align 16
+  tail call void @llvm.dbg.value(metadata !{i32 %argc}, i64 0, metadata !19), !dbg !35
+  tail call void @llvm.dbg.value(metadata !{i8** %argv}, i64 0, metadata !20), !dbg !35
+  tail call void @llvm.dbg.value(metadata !{[4 x i32]* %array}, i64 0, metadata !21), !dbg !36
+  %1 = bitcast [4 x i32]* %array to i8*, !dbg !36
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast ([4 x i32]* @main.array to i8*), i64 16, i32 16, i1 false), !dbg !36
+  tail call void @llvm.dbg.value(metadata !{[4 x i32]* %array}, i64 0, metadata !21), !dbg !36
+  %2 = getelementptr inbounds [4 x i32]* %array, i64 0, i64 0, !dbg !37
+  call void @f(i32* %2), !dbg !37
+  tail call void @llvm.dbg.value(metadata !{[4 x i32]* %array}, i64 0, metadata !21), !dbg !36
+  %3 = load i32* %2, align 16, !dbg !38, !tbaa !30
+  ret i32 %3, !dbg !38
+}
+
+; Function Attrs: nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata) #2
+
+attributes #0 = { nounwind ssp uwtable }
+attributes #1 = { nounwind }
+attributes #2 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!25, !26}
+!llvm.ident = !{!27}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5.0 ", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/array.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"array.c", metadata !""}
+!2 = metadata !{}
+!3 = metadata !{metadata !4, metadata !12}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"f", metadata !"f", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32*)* @f, null, null, metadata !10, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [f]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/array.c]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{null, metadata !8}
+!8 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
+!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = metadata !{metadata !11}
+!11 = metadata !{i32 786689, metadata !4, metadata !"p", metadata !5, i32 16777217, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [p] [line 1]
+!12 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 5, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !18, i32 5} ; [ DW_TAG_subprogram ] [line 5] [def] [main]
+!13 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!14 = metadata !{metadata !9, metadata !9, metadata !15}
+!15 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !16} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!16 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !17} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from char]
+!17 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
+!18 = metadata !{metadata !19, metadata !20, metadata !21}
+!19 = metadata !{i32 786689, metadata !12, metadata !"argc", metadata !5, i32 16777221, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [argc] [line 5]
+!20 = metadata !{i32 786689, metadata !12, metadata !"argv", metadata !5, i32 33554437, metadata !15, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [argv] [line 5]
+!21 = metadata !{i32 786688, metadata !12, metadata !"array", metadata !5, i32 6, metadata !22, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [array] [line 6]
+!22 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 128, i64 32, i32 0, i32 0, metadata !9, metadata !23, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 128, align 32, offset 0] [from int]
+!23 = metadata !{metadata !24}
+!24 = metadata !{i32 786465, i64 0, i64 4}        ; [ DW_TAG_subrange_type ] [0, 3]
+!25 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
+!26 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!27 = metadata !{metadata !"clang version 3.5.0 "}
+!28 = metadata !{i32 1, i32 0, metadata !4, null}
+!29 = metadata !{i32 2, i32 0, metadata !4, null}
+!30 = metadata !{metadata !31, metadata !31, i64 0}
+!31 = metadata !{metadata !"int", metadata !32, i64 0}
+!32 = metadata !{metadata !"omnipotent char", metadata !33, i64 0}
+!33 = metadata !{metadata !"Simple C/C++ TBAA"}
+!34 = metadata !{i32 3, i32 0, metadata !4, null}
+!35 = metadata !{i32 5, i32 0, metadata !12, null}
+!36 = metadata !{i32 6, i32 0, metadata !12, null}
+!37 = metadata !{i32 7, i32 0, metadata !12, null}
+!38 = metadata !{i32 8, i32 0, metadata !12, null} ; [ DW_TAG_imported_declaration ]
diff --git a/test/DebugInfo/X86/array2.ll b/test/DebugInfo/X86/array2.ll
new file mode 100644
index 0000000..2dc2af3
--- /dev/null
+++ b/test/DebugInfo/X86/array2.ll
@@ -0,0 +1,107 @@
+; ModuleID = 'array.c'
+;
+; From (clang -g -c -O0):
+;
+; void f(int* p) {
+;   p[0] = 42;
+; }
+;
+; int main(int argc, char** argv) {
+;   int array[4] = { 0, 1, 2, 3 };
+;   f(array);
+;   return array[0];
+; }
+;
+; RUN: opt %s -O2 -S -o - | FileCheck %s
+; Test that we do not lower dbg.declares for arrays.
+;
+; CHECK: define i32 @main
+; CHECK: call void @llvm.dbg.value
+; CHECK: call void @llvm.dbg.value
+; CHECK: call void @llvm.dbg.declare
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+@main.array = private unnamed_addr constant [4 x i32] [i32 0, i32 1, i32 2, i32 3], align 16
+
+; Function Attrs: nounwind ssp uwtable
+define void @f(i32* %p) #0 {
+entry:
+  %p.addr = alloca i32*, align 8
+  store i32* %p, i32** %p.addr, align 8
+  call void @llvm.dbg.declare(metadata !{i32** %p.addr}, metadata !19), !dbg !20
+  %0 = load i32** %p.addr, align 8, !dbg !21
+  %arrayidx = getelementptr inbounds i32* %0, i64 0, !dbg !21
+  store i32 42, i32* %arrayidx, align 4, !dbg !21
+  ret void, !dbg !22
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+; Function Attrs: nounwind ssp uwtable
+define i32 @main(i32 %argc, i8** %argv) #0 {
+entry:
+  %retval = alloca i32, align 4
+  %argc.addr = alloca i32, align 4
+  %argv.addr = alloca i8**, align 8
+  %array = alloca [4 x i32], align 16
+  store i32 0, i32* %retval
+  store i32 %argc, i32* %argc.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %argc.addr}, metadata !23), !dbg !24
+  store i8** %argv, i8*** %argv.addr, align 8
+  call void @llvm.dbg.declare(metadata !{i8*** %argv.addr}, metadata !25), !dbg !24
+  call void @llvm.dbg.declare(metadata !{[4 x i32]* %array}, metadata !26), !dbg !30
+  %0 = bitcast [4 x i32]* %array to i8*, !dbg !30
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast ([4 x i32]* @main.array to i8*), i64 16, i32 16, i1 false), !dbg !30
+  %arraydecay = getelementptr inbounds [4 x i32]* %array, i32 0, i32 0, !dbg !31
+  call void @f(i32* %arraydecay), !dbg !31
+  %arrayidx = getelementptr inbounds [4 x i32]* %array, i32 0, i64 0, !dbg !32
+  %1 = load i32* %arrayidx, align 4, !dbg !32
+  ret i32 %1, !dbg !32
+}
+
+; Function Attrs: nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #2
+
+attributes #0 = { nounwind ssp uwtable }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!16, !17}
+!llvm.ident = !{!18}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [array.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"array.c", metadata !""}
+!2 = metadata !{}
+!3 = metadata !{metadata !4, metadata !10}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"f", metadata !"f", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32*)* @f, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [f]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [array.c]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{null, metadata !8}
+!8 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
+!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 5, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32, i8**)* @main, null, null, metadata !2, i32 5} ; [ DW_TAG_subprogram ] [line 5] [def] [main]
+!11 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = metadata !{metadata !9, metadata !9, metadata !13}
+!13 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!14 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !15} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from char]
+!15 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
+!16 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
+!17 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!18 = metadata !{metadata !"clang version 3.5.0 "}
+!19 = metadata !{i32 786689, metadata !4, metadata !"p", metadata !5, i32 16777217, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [p] [line 1]
+!20 = metadata !{i32 1, i32 0, metadata !4, null}
+!21 = metadata !{i32 2, i32 0, metadata !4, null}
+!22 = metadata !{i32 3, i32 0, metadata !4, null}
+!23 = metadata !{i32 786689, metadata !10, metadata !"argc", metadata !5, i32 16777221, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [argc] [line 5]
+!24 = metadata !{i32 5, i32 0, metadata !10, null}
+!25 = metadata !{i32 786689, metadata !10, metadata !"argv", metadata !5, i32 33554437, metadata !13, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [argv] [line 5]
+!26 = metadata !{i32 786688, metadata !10, metadata !"array", metadata !5, i32 6, metadata !27, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [array] [line 6]
+!27 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 128, i64 32, i32 0, i32 0, metadata !9, metadata !28, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 128, align 32, offset 0] [from int]
+!28 = metadata !{metadata !29}
+!29 = metadata !{i32 786465, i64 0, i64 4}        ; [ DW_TAG_subrange_type ] [0, 3]
+!30 = metadata !{i32 6, i32 0, metadata !10, null}
+!31 = metadata !{i32 7, i32 0, metadata !10, null}
+!32 = metadata !{i32 8, i32 0, metadata !10, null} ; [ DW_TAG_imported_declaration ]
diff --git a/test/DebugInfo/X86/block-capture.ll b/test/DebugInfo/X86/block-capture.ll
index 9f4c391..31b4fa9 100644
--- a/test/DebugInfo/X86/block-capture.ll
+++ b/test/DebugInfo/X86/block-capture.ll
@@ -4,15 +4,15 @@
 ; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s -check-prefix=DWARF3
 
 ; Checks that we emit debug info for the block variable declare.
-; CHECK: DW_TAG_subprogram [3]
-; CHECK: DW_TAG_variable [5]
-; CHECK: DW_AT_name [DW_FORM_strp]     ( .debug_str[{{.*}}] = "block")
-; CHECK: DW_AT_location [DW_FORM_sec_offset]        ({{.*}})
+; CHECK: DW_TAG_subprogram
+; CHECK: DW_TAG_variable
+; CHECK: DW_AT_name {{.*}} "block"
+; CHECK: DW_AT_location [DW_FORM_sec_offset]
 
-; DWARF3: DW_TAG_subprogram [3]
-; DWARF3: DW_TAG_variable [5]
-; DWARF3: DW_AT_name [DW_FORM_strp]     ( .debug_str[{{.*}}] = "block")
-; DWARF3: DW_AT_location [DW_FORM_data4]        ({{.*}})
+; DWARF3: DW_TAG_subprogram
+; DWARF3: DW_TAG_variable
+; DWARF3: DW_AT_name {{.*}} "block"
+; DWARF3: DW_AT_location [DW_FORM_data4]
 
 %struct.__block_descriptor = type { i64, i64 }
 %struct.__block_literal_generic = type { i8*, i32, i32, i8*, %struct.__block_descriptor* }
@@ -72,7 +72,7 @@ declare i32 @__objc_personality_v0(...)
 !0 = metadata !{i32 786449, metadata !63, i32 16, metadata !"clang version 3.1 (trunk 151227)", i1 false, metadata !"", i32 2, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{}
 !3 = metadata !{metadata !5, metadata !28, metadata !31, metadata !34}
-!5 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"foo", metadata !"foo", metadata !"", i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !26, i32 5} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"foo", metadata !"foo", metadata !"", i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, null, i32 5} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !63} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{null, metadata !9}
@@ -93,15 +93,13 @@ declare i32 @__objc_personality_v0(...)
 !23 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"reserved", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !24} ; [ DW_TAG_member ]
 !24 = metadata !{i32 786468, null, null, metadata !"long unsigned int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
 !25 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"Size", i32 0, i64 64, i64 64, i64 64, i32 0, metadata !24} ; [ DW_TAG_member ]
-!26 = metadata !{metadata !27}
-!27 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
-!28 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"__foo_block_invoke_0", metadata !"__foo_block_invoke_0", metadata !"", i32 7, metadata !29, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*)* @__foo_block_invoke_0, null, null, metadata !26, i32 7} ; [ DW_TAG_subprogram ]
+!28 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"__foo_block_invoke_0", metadata !"__foo_block_invoke_0", metadata !"", i32 7, metadata !29, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*)* @__foo_block_invoke_0, null, null, null, i32 7} ; [ DW_TAG_subprogram ]
 !29 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !30, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !30 = metadata !{null, metadata !14}
-!31 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"__copy_helper_block_", metadata !"__copy_helper_block_", metadata !"", i32 10, metadata !32, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !26, i32 10} ; [ DW_TAG_subprogram ]
+!31 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"__copy_helper_block_", metadata !"__copy_helper_block_", metadata !"", i32 10, metadata !32, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, null, i32 10} ; [ DW_TAG_subprogram ]
 !32 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !33, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !33 = metadata !{null, metadata !14, metadata !14}
-!34 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"__destroy_helper_block_", metadata !"__destroy_helper_block_", metadata !"", i32 10, metadata !29, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !26, i32 10} ; [ DW_TAG_subprogram ]
+!34 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"__destroy_helper_block_", metadata !"__destroy_helper_block_", metadata !"", i32 10, metadata !29, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, null, i32 10} ; [ DW_TAG_subprogram ]
 !35 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
 !36 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
 !37 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
diff --git a/test/DebugInfo/X86/coff_debug_info_type.ll b/test/DebugInfo/X86/coff_debug_info_type.ll
index e61c807..a0b8ccc 100644
--- a/test/DebugInfo/X86/coff_debug_info_type.ll
+++ b/test/DebugInfo/X86/coff_debug_info_type.ll
@@ -4,7 +4,7 @@
 ; CHECK:    .section  .debug_info
 
 ; RUN: llc -mtriple=i686-pc-win32 -filetype=asm -O0 < %s | FileCheck -check-prefix=WIN32 %s
-; WIN32:    .section .debug$S,"rn"
+; WIN32:    .section .debug$S,"rnd"
 
 ; generated from:
 ; clang -g -S -emit-llvm test.c -o test.ll
diff --git a/test/DebugInfo/X86/concrete_out_of_line.ll b/test/DebugInfo/X86/concrete_out_of_line.ll
index 2f5a7d1..40300de 100644
--- a/test/DebugInfo/X86/concrete_out_of_line.ll
+++ b/test/DebugInfo/X86/concrete_out_of_line.ll
@@ -1,5 +1,4 @@
-; RUN: llc -mtriple=x86_64-linux %s -o %t -filetype=obj
-; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
+; RUN: llc -mtriple=x86_64-linux < %s -filetype=obj | llvm-dwarfdump -debug-dump=info - | FileCheck %s
 
 ; test that we add DW_AT_inline even when we only have concrete out of line
 ; instances.
@@ -8,21 +7,56 @@
 ; AT_inline.
 
 ; CHECK: DW_TAG_class_type
-; CHECK: DW_TAG_class_type
 ; CHECK:   DW_TAG_subprogram
+; CHECK: [[ASSIGN_DECL:0x........]]:  DW_TAG_subprogram
+
+; CHECK: DW_TAG_class_type
+; CHECK: [[RELEASE_DECL:0x........]]:  DW_TAG_subprogram
 ; CHECK: [[DTOR_DECL:0x........]]:  DW_TAG_subprogram
 
-; CHECK: [[DTOR_OOL:0x........]]: DW_TAG_subprogram
-; CHECK-NEXT:     DW_AT_specification {{.*}} {[[DTOR_DECL]]})
+; CHECK: [[D2_ABS:.*]]: DW_TAG_subprogram
+; CHECK-NEXT:     DW_AT_{{.*}}linkage_name {{.*}}D2
+; CHECK-NEXT:     DW_AT_specification {{.*}} {[[DTOR_DECL]]}
+; CHECK-NEXT:     DW_AT_inline
+; CHECK-NOT:      DW_AT
+; CHECK: DW_TAG
+; CHECK: [[D1_ABS:.*]]: DW_TAG_subprogram
+; CHECK-NEXT:     DW_AT_{{.*}}linkage_name {{.*}}D1
+; CHECK-NEXT:     DW_AT_specification {{.*}} {[[DTOR_DECL]]}
 ; CHECK-NEXT:     DW_AT_inline
+; CHECK-NOT:     DW_AT
+; CHECK: [[D1_THIS_ABS:.*]]: DW_TAG_formal_parameter
 
+; CHECK: [[RELEASE:0x........]]: DW_TAG_subprogram
+; CHECK:     DW_AT_specification {{.*}} {[[RELEASE_DECL]]}
+; CHECK: DW_TAG_formal_parameter
+; CHECK-NOT: NULL
+; CHECK-NOT: DW_TAG
+; CHECK: DW_TAG_lexical_block
+; CHECK-NOT: NULL
+; CHECK-NOT: DW_TAG
+; CHECK: DW_TAG_inlined_subroutine
+; CHECK-NEXT: DW_AT_abstract_origin {{.*}} {[[ASSIGN:0x........]]}
+; CHECK-NOT: NULL
+; CHECK-NOT: DW_TAG
+; CHECK: DW_TAG_inlined_subroutine
+; CHECK-NEXT: DW_AT_abstract_origin {{.*}} {[[D1_ABS]]}
+; CHECK-NOT: NULL
+; CHECK-NOT: DW_TAG
+; CHECK: DW_TAG_inlined_subroutine
+; CHECK-NEXT: DW_AT_abstract_origin {{.*}} {[[D2_ABS]]}
 
 ; and then that a TAG_subprogram refers to it with AT_abstract_origin.
 
 ; CHECK: DW_TAG_subprogram
-; CHECK: DW_TAG_subprogram
-; CHECK: DW_TAG_subprogram
-; CHECK-NEXT: DW_AT_abstract_origin {{.*}} {[[DTOR_OOL]]})
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_abstract_origin {{.*}} {[[D1_ABS]]}
+; CHECK: DW_TAG_formal_parameter
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_abstract_origin {{.*}} {[[D1_THIS_ABS]]}
+; CHECK: DW_TAG_inlined_subroutine
+; CHECK-NEXT: DW_AT_abstract_origin {{.*}} {[[D2_ABS]]}
+
 
 define i32 @_ZN17nsAutoRefCnt7ReleaseEv() {
 entry:
@@ -58,7 +92,7 @@ declare void @_Z8moz_freePv(i8*)
 !15 = metadata !{i32 720942, metadata !6, metadata !13, metadata !"~nsAutoRefCnt", metadata !"~nsAutoRefCnt", metadata !"", i32 12, metadata !16, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18, i32 12} ; [ DW_TAG_subprogram ]
 !16 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !17, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !17 = metadata !{null, metadata !10}
-!18 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
+!18 = metadata !{}
 !20 = metadata !{metadata !22}
 !22 = metadata !{i32 786689, metadata !5, metadata !"this", metadata !6, i32 16777230, metadata !10, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
 !23 = metadata !{i32 720942, metadata !6, null, metadata !"~nsAutoRefCnt", metadata !"~nsAutoRefCnt", metadata !"_ZN17nsAutoRefCntD1Ev", i32 18, metadata !16, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32* null, null, metadata !15, metadata !24, i32 18} ; [ DW_TAG_subprogram ] [line 18] [def] [~nsAutoRefCnt]
diff --git a/test/DebugInfo/X86/cu-ranges.ll b/test/DebugInfo/X86/cu-ranges.ll
index e6dc17e..405a498 100644
--- a/test/DebugInfo/X86/cu-ranges.ll
+++ b/test/DebugInfo/X86/cu-ranges.ll
@@ -1,4 +1,4 @@
-; RUN: llc -split-dwarf=Enable -O0 %s -ffunction-sections -mtriple=x86_64-unknown-linux-gnu -filetype=obj -o %t
+; RUN: llc -split-dwarf=Enable -O0 %s -function-sections -mtriple=x86_64-unknown-linux-gnu -filetype=obj -o %t
 ; RUN: llvm-dwarfdump -debug-dump=all %t | FileCheck --check-prefix=FUNCTION-SECTIONS %s
 ; RUN: llvm-readobj --relocations %t | FileCheck --check-prefix=FUNCTION-SECTIONS-RELOCS %s
 
diff --git a/test/DebugInfo/X86/dbg-at-specficiation.ll b/test/DebugInfo/X86/dbg-at-specficiation.ll
index 8003a0f..c765367 100644
--- a/test/DebugInfo/X86/dbg-at-specficiation.ll
+++ b/test/DebugInfo/X86/dbg-at-specficiation.ll
@@ -9,7 +9,7 @@
 !llvm.module.flags = !{!12}
 
 !0 = metadata !{i32 720913, metadata !11, i32 12, metadata !"clang version 3.0 (trunk 140253)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !2, metadata !3, null, i32 0} ; [ DW_TAG_compile_unit ]
-!2 = metadata !{i32 0}
+!2 = metadata !{}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 720948, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, [10 x i32]* @a, null} ; [ DW_TAG_variable ]
 !6 = metadata !{i32 720937, metadata !11} ; [ DW_TAG_file_type ]
diff --git a/test/DebugInfo/X86/dbg-const.ll b/test/DebugInfo/X86/dbg-const.ll
index 12dc154..300c1ee 100644
--- a/test/DebugInfo/X86/dbg-const.ll
+++ b/test/DebugInfo/X86/dbg-const.ll
@@ -13,7 +13,7 @@
 
 target triple = "x86_64-apple-darwin10.0.0"
 
-;CHECK:        ## DW_OP_constu
+;CHECK:        ## DW_OP_consts
 ;CHECK-NEXT:  .byte	42
 define i32 @foobar() nounwind readonly noinline ssp {
 entry:
diff --git a/test/DebugInfo/X86/dbg-declare-arg.ll b/test/DebugInfo/X86/dbg-declare-arg.ll
index 7bf6f4f..b537265 100644
--- a/test/DebugInfo/X86/dbg-declare-arg.ll
+++ b/test/DebugInfo/X86/dbg-declare-arg.ll
@@ -122,6 +122,6 @@ entry:
 !47 = metadata !{i32 2, i32 47, metadata !25, null}
 !48 = metadata !{i32 2, i32 54, metadata !49, null}
 !49 = metadata !{i32 786443, metadata !51, metadata !25, i32 2, i32 52, i32 2} ; [ DW_TAG_lexical_block ]
-!50 = metadata !{metadata !0, metadata !10, metadata !14, metadata !19, metadata !22, metadata !25}
+!50 = metadata !{metadata !19, metadata !22, metadata !25}
 !51 = metadata !{metadata !"a.cc", metadata !"/private/tmp"}
 !52 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/dbg-declare.ll b/test/DebugInfo/X86/dbg-declare.ll
index 8bb574e..241a5a1 100644
--- a/test/DebugInfo/X86/dbg-declare.ll
+++ b/test/DebugInfo/X86/dbg-declare.ll
@@ -33,15 +33,13 @@ declare void @llvm.stackrestore(i8*) nounwind
 !0 = metadata !{i32 786449, metadata !26, i32 12, metadata !"clang version 3.1 (trunk 153698)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, metadata !26, metadata !0, metadata !"foo", metadata !"foo", metadata !"", i32 6, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32*)* @foo, null, null, metadata !12, i32 0} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 786478, metadata !26, metadata !0, metadata !"foo", metadata !"foo", metadata !"", i32 6, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32*)* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !26} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9, metadata !10}
 !9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !10 = metadata !{i32 786447, null, null, null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
 !11 = metadata !{i32 786470, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_const_type ]
-!12 = metadata !{metadata !13}
-!13 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
 !14 = metadata !{i32 786689, metadata !5, metadata !"x", metadata !6, i32 16777221, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
 !15 = metadata !{i32 5, i32 21, metadata !5, null}
 !16 = metadata !{i32 7, i32 13, metadata !17, null}
diff --git a/test/DebugInfo/X86/dbg-large-unsigned-const.ll b/test/DebugInfo/X86/dbg-large-unsigned-const.ll
deleted file mode 100644
index a037f3c..0000000
--- a/test/DebugInfo/X86/dbg-large-unsigned-const.ll
+++ /dev/null
@@ -1,62 +0,0 @@
-; RUN: llc -filetype=obj %s -o /dev/null
-; Hanle large unsigned constant values.
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
-target triple = "i386-apple-macosx10.7.0"
-
-define zeroext i1 @_Z3iseRKxS0_(i64* nocapture %LHS, i64* nocapture %RHS) nounwind readonly optsize ssp {
-entry:
-  tail call void @llvm.dbg.value(metadata !{i64* %LHS}, i64 0, metadata !7), !dbg !13
-  tail call void @llvm.dbg.value(metadata !{i64* %RHS}, i64 0, metadata !11), !dbg !14
-  %tmp1 = load i64* %LHS, align 4, !dbg !15
-  %tmp3 = load i64* %RHS, align 4, !dbg !15
-  %cmp = icmp eq i64 %tmp1, %tmp3, !dbg !15
-  ret i1 %cmp, !dbg !15
-}
-
-define zeroext i1 @_Z2fnx(i64 %a) nounwind readnone optsize ssp {
-entry:
-  tail call void @llvm.dbg.value(metadata !{i64 %a}, i64 0, metadata !12), !dbg !20
-  tail call void @llvm.dbg.value(metadata !{i64 %a}, i64 0, metadata !12), !dbg !20
-  tail call void @llvm.dbg.value(metadata !{i64 %a}, i64 0, metadata !21), !dbg !24
-  tail call void @llvm.dbg.value(metadata !25, i64 0, metadata !26), !dbg !27
-  %cmp.i = icmp eq i64 %a, 9223372036854775807, !dbg !28
-  ret i1 %cmp.i, !dbg !22
-}
-
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!34}
-!29 = metadata !{metadata !1, metadata !6}
-!30 = metadata !{metadata !7, metadata !11}
-!31 = metadata !{metadata !12}
-
-!0 = metadata !{i32 786449, metadata !32, i32 4, metadata !"clang version 3.0 (trunk 135593)", i1 true, metadata !"", i32 0, metadata !33, metadata !33, metadata !29, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 786478, metadata !32, null, metadata !"ise", metadata !"ise", metadata !"_Z3iseRKxS0_", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i1 (i64*, i64*)* @_Z3iseRKxS0_, null, null, metadata !30, i32 2} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 786473, metadata !32} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786453, metadata !32, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 786468, null, metadata !0, metadata !"bool", i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786478, metadata !32, null, metadata !"fn", metadata !"fn", metadata !"_Z2fnx", i32 6, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i1 (i64)* @_Z2fnx, null, null, metadata !31, i32 6} ; [ DW_TAG_subprogram ] [line 6] [def] [fn]
-!7 = metadata !{i32 786689, metadata !1, metadata !"LHS", metadata !2, i32 16777218, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!8 = metadata !{i32 786448, metadata !0, null, null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_reference_type ]
-!9 = metadata !{i32 786470, metadata !0, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_const_type ]
-!10 = metadata !{i32 786468, null, metadata !0, metadata !"long long int", i32 0, i64 64, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!11 = metadata !{i32 786689, metadata !1, metadata !"RHS", metadata !2, i32 33554434, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!12 = metadata !{i32 786689, metadata !6, metadata !"a", metadata !2, i32 16777222, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!13 = metadata !{i32 2, i32 27, metadata !1, null}
-!14 = metadata !{i32 2, i32 49, metadata !1, null}
-!15 = metadata !{i32 3, i32 3, metadata !16, null}
-!16 = metadata !{i32 786443, metadata !32, metadata !1, i32 2, i32 54, i32 0} ; [ DW_TAG_lexical_block ]
-!20 = metadata !{i32 6, i32 19, metadata !6, null}
-!21 = metadata !{i32 786689, metadata !1, metadata !"LHS", metadata !2, i32 16777218, metadata !8, i32 0, metadata !22} ; [ DW_TAG_arg_variable ]
-!22 = metadata !{i32 7, i32 10, metadata !23, null}
-!23 = metadata !{i32 786443, metadata !32, metadata !6, i32 6, i32 22, i32 1} ; [ DW_TAG_lexical_block ]
-!24 = metadata !{i32 2, i32 27, metadata !1, metadata !22}
-!25 = metadata !{i64 9223372036854775807}         
-!26 = metadata !{i32 786689, metadata !1, metadata !"RHS", metadata !2, i32 33554434, metadata !8, i32 0, metadata !22} ; [ DW_TAG_arg_variable ]
-!27 = metadata !{i32 2, i32 49, metadata !1, metadata !22}
-!28 = metadata !{i32 3, i32 3, metadata !16, metadata !22}
-!32 = metadata !{metadata !"lli.cc", metadata !"/private/tmp"}
-!33 = metadata !{i32 0}
-!34 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/DebugInfo/X86/dbg-subrange.ll b/test/DebugInfo/X86/dbg-subrange.ll
index f253cac..f8761d0 100644
--- a/test/DebugInfo/X86/dbg-subrange.ll
+++ b/test/DebugInfo/X86/dbg-subrange.ll
@@ -18,12 +18,10 @@ entry:
 !0 = metadata !{i32 786449, metadata !21, i32 12, metadata !"clang version 3.1 (trunk 144833)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !11,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, metadata !21, metadata !6, metadata !"bar", metadata !"bar", metadata !"", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @bar, null, null, metadata !9, i32 0} ; [ DW_TAG_subprogram ] [line 4] [def] [scope 0] [bar]
+!5 = metadata !{i32 720942, metadata !21, metadata !6, metadata !"bar", metadata !"bar", metadata !"", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @bar, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 4] [def] [scope 0] [bar]
 !6 = metadata !{i32 720937, metadata !21} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{null}
-!9 = metadata !{metadata !10}
-!10 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
 !11 = metadata !{metadata !13}
 !13 = metadata !{i32 720948, i32 0, null, metadata !"s", metadata !"s", metadata !"", metadata !6, i32 2, metadata !14, i32 0, i32 1, [4294967296 x i8]* @s, null} ; [ DW_TAG_variable ]
 !14 = metadata !{i32 720897, null, null, null, i32 0, i64 34359738368, i64 8, i32 0, i32 0, metadata !15, metadata !16, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 34359738368, align 8, offset 0] [from char]
diff --git a/test/DebugInfo/X86/dbg-value-const-byref.ll b/test/DebugInfo/X86/dbg-value-const-byref.ll
index 3d1e87d..baba0cd 100644
--- a/test/DebugInfo/X86/dbg-value-const-byref.ll
+++ b/test/DebugInfo/X86/dbg-value-const-byref.ll
@@ -14,7 +14,7 @@
 ; }
 ;
 ; Test that we generate valid debug info for optimized code,
-; particularily variables that are described as constants and passed
+; particularly variables that are described as constants and passed
 ; by reference.
 ; rdar://problem/14874886
 ;
@@ -25,18 +25,22 @@
 ; CHECK:     DW_AT_location [DW_FORM_data4]	([[LOC:.*]])
 ; CHECK: .debug_loc contents:
 ; CHECK: [[LOC]]:
-;        constu 0x00000003
+;        consts 0x00000003
 ; CHECK: Beginning address offset: 0x0000000000000{{.*}}
 ; CHECK:    Ending address offset: [[C1:.*]]
-; CHECK:     Location description: 10 03
-;        constu 0x00000007
+; CHECK:     Location description: 11 03
+;        consts 0x00000007
 ; CHECK: Beginning address offset: [[C1]]
 ; CHECK:    Ending address offset: [[C2:.*]]
-; CHECK:     Location description: 10 07
+; CHECK:     Location description: 11 07
 ;        rax, piece 0x00000004
 ; CHECK: Beginning address offset: [[C2]]
 ; CHECK:    Ending address offset: [[R1:.*]]
 ; CHECK:     Location description: 50 93 04
+;         rdi+0
+; CHECK: Beginning address offset: [[R1]]
+; CHECK:    Ending address offset: [[R2:.*]]
+; CHECK:     Location description: 75 00
 ;
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.9.0"
@@ -51,6 +55,7 @@ entry:
   %call1 = call i32 (...)* @f1() #3, !dbg !19
   call void @llvm.dbg.value(metadata !{i32 %call1}, i64 0, metadata !10), !dbg !19
   store i32 %call1, i32* %i, align 4, !dbg !19, !tbaa !20
+  call void @llvm.dbg.value(metadata !{i32* %i}, i64 0, metadata !10), !dbg !24
   call void @f2(i32* %i) #3, !dbg !24
   ret i32 0, !dbg !25
 }
diff --git a/test/DebugInfo/X86/dbg-value-inlined-parameter.ll b/test/DebugInfo/X86/dbg-value-inlined-parameter.ll
index 3db67ff..1922272 100644
--- a/test/DebugInfo/X86/dbg-value-inlined-parameter.ll
+++ b/test/DebugInfo/X86/dbg-value-inlined-parameter.ll
@@ -5,8 +5,24 @@
 ; RUN: llc -mtriple=x86_64-apple-darwin < %s -filetype=obj -regalloc=basic \
 ; RUN:     | llvm-dwarfdump -debug-dump=info - | FileCheck --check-prefix=CHECK --check-prefix=DARWIN %s
 
+; CHECK: DW_TAG_subprogram
+; CHECK:   DW_AT_abstract_origin {{.*}}{[[ABS:.*]]}
+; FIXME: An out of line definition preceeding an inline usage doesn't properly
+; reference abstract variables.
+; CHECK:   DW_TAG_formal_parameter
+; CHECK-NEXT:     DW_AT_name {{.*}} "sp"
+; CHECK:   DW_TAG_formal_parameter
+; CHECK-NEXT:     DW_AT_name {{.*}} "nums"
+
+; CHECK: [[ABS]]: DW_TAG_subprogram
+; CHECK:   DW_AT_name {{.*}} "foo"
+; CHECK: [[ABS_SP:.*]]:   DW_TAG_formal_parameter
+; CHECK-NEXT:     DW_AT_name {{.*}} "sp"
+; CHECK: [[ABS_NUMS:.*]]:  DW_TAG_formal_parameter
+; CHECK-NEXT:     DW_AT_name {{.*}} "nums"
+
 ;CHECK: DW_TAG_inlined_subroutine
-;CHECK-NEXT: DW_AT_abstract_origin
+;CHECK-NEXT: DW_AT_abstract_origin {{.*}}{[[ABS]]}
 ;CHECK-NEXT: DW_AT_low_pc [DW_FORM_addr]
 ;CHECK-NEXT: DW_AT_high_pc [DW_FORM_data4]
 ;CHECK-NEXT: DW_AT_call_file
@@ -14,9 +30,10 @@
 
 ;CHECK: DW_TAG_formal_parameter
 ;FIXME: Linux shouldn't drop this parameter either...
-;LINUX-NOT: DW_TAG_formal_parameter
+;DARWIN-NEXT:   DW_AT_abstract_origin {{.*}}{[[ABS_SP]]}
 ;DARWIN: DW_TAG_formal_parameter
-;DARWIN-NEXT: DW_AT_name [DW_FORM_strp] ( .debug_str[0x00000055] = "sp")
+;CHECK-NEXT: DW_AT_abstract_origin {{.*}}{[[ABS_NUMS]]}
+;CHECK-NOT: DW_TAG_formal_parameter
 
 %struct.S1 = type { float*, i32 }
 
@@ -62,7 +79,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !6 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"foobar", metadata !"foobar", metadata !"", i32 15, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, void ()* @foobar, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 15] [def] [scope 0] [foobar]
 !7 = metadata !{i32 786453, metadata !42, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{null}
-!9 = metadata !{i32 786689, metadata !0, metadata !"sp", metadata !1, i32 7, metadata !10, i32 0, metadata !32} ; [ DW_TAG_arg_variable ]
+!9 = metadata !{i32 786689, metadata !0, metadata !"sp", metadata !1, i32 16777223, metadata !10, i32 0, metadata !32} ; [ DW_TAG_arg_variable ]
 !10 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
 !11 = metadata !{i32 786454, metadata !42, metadata !2, metadata !"S1", i32 4, i64 0, i64 0, i64 0, i32 0, metadata !12} ; [ DW_TAG_typedef ]
 !12 = metadata !{i32 786451, metadata !42, metadata !2, metadata !"S1", i32 1, i64 128, i64 64, i32 0, i32 0, null, metadata !13, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [S1] [line 1, size 128, align 64, offset 0] [def] [from ]
@@ -71,7 +88,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !15 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !16} ; [ DW_TAG_pointer_type ]
 !16 = metadata !{i32 786468, null, metadata !2, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
 !17 = metadata !{i32 786445, metadata !42, metadata !1, metadata !"nums", i32 3, i64 32, i64 32, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
-!18 = metadata !{i32 786689, metadata !0, metadata !"nums", metadata !1, i32 7, metadata !5, i32 0, metadata !32} ; [ DW_TAG_arg_variable ]
+!18 = metadata !{i32 786689, metadata !0, metadata !"nums", metadata !1, i32 33554439, metadata !5, i32 0, metadata !32} ; [ DW_TAG_arg_variable ]
 !19 = metadata !{i32 786484, i32 0, metadata !2, metadata !"p", metadata !"p", metadata !"", metadata !1, i32 14, metadata !11, i32 0, i32 1, %struct.S1* @p, null} ; [ DW_TAG_variable ]
 !20 = metadata !{i32 7, i32 13, metadata !0, null}
 !21 = metadata !{i32 7, i32 21, metadata !0, null}
diff --git a/test/DebugInfo/X86/dbg-value-location.ll b/test/DebugInfo/X86/dbg-value-location.ll
index a9449c6..9184217 100644
--- a/test/DebugInfo/X86/dbg-value-location.ll
+++ b/test/DebugInfo/X86/dbg-value-location.ll
@@ -71,7 +71,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !18 = metadata !{i32 786443, metadata !26, metadata !0, i32 19510, i32 1, i32 99} ; [ DW_TAG_lexical_block ]
 !22 = metadata !{i32 18094, i32 2, metadata !15, metadata !17}
 !23 = metadata !{i32 19524, i32 1, metadata !18, null}
-!24 = metadata !{metadata !0, metadata !6, metadata !7, metadata !8}
+!24 = metadata !{metadata !0, metadata !6, metadata !7, metadata !8, metadata !16}
 !25 = metadata !{i32 786473, metadata !27} ; [ DW_TAG_file_type ]
 !26 = metadata !{metadata !"/tmp/f.c", metadata !"/tmp"}
 !27 = metadata !{metadata !"f.i", metadata !"/tmp"}
diff --git a/test/DebugInfo/X86/debug-dead-local-var.ll b/test/DebugInfo/X86/debug-dead-local-var.ll
new file mode 100644
index 0000000..64f0b2a
--- /dev/null
+++ b/test/DebugInfo/X86/debug-dead-local-var.ll
@@ -0,0 +1,51 @@
+; RUN: llc -mtriple=x86_64-linux-gnu %s -filetype=obj -o %t
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; Reconstruct this via clang and -O2.
+; static void foo() {
+;   struct X { int a; int b; } xyz;
+; }
+
+; int bar() {
+;   foo();
+;   return 1;
+; }
+
+; Check that we still have the structure type for X even though we're not
+; going to emit a low/high_pc for foo.
+; CHECK: DW_TAG_structure_type
+
+; Function Attrs: nounwind readnone uwtable
+define i32 @bar() #0 {
+entry:
+  ret i32 1, !dbg !21
+}
+
+attributes #0 = { nounwind readnone uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!18, !19}
+!llvm.ident = !{!20}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5.0 (trunk 209255) (llvm/trunk 209253)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/debug-dead-local-var.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"debug-dead-local-var.c", metadata !"/usr/local/google/home/echristo"}
+!2 = metadata !{}
+!3 = metadata !{metadata !4, metadata !9}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"bar", metadata !"bar", metadata !"", i32 11, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 ()* @bar, null, null, metadata !2, i32 11} ; [ DW_TAG_subprogram ] [line 11] [def] [bar]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/debug-dead-local-var.c]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{metadata !8}
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 6, metadata !10, i1 true, i1 true, i32 0, i32 0, null, i32 0, i1 true, null, null, null, metadata !12, i32 6} ; [ DW_TAG_subprogram ] [line 6] [local] [def] [foo]
+!10 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!11 = metadata !{null}
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 786688, metadata !9, metadata !"xyz", metadata !5, i32 8, metadata !14, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [xyz] [line 8]
+!14 = metadata !{i32 786451, metadata !1, metadata !9, metadata !"X", i32 8, i64 64, i64 32, i32 0, i32 0, null, metadata !15, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [X] [line 8, size 64, align 32, offset 0] [def] [from ]
+!15 = metadata !{metadata !16, metadata !17}
+!16 = metadata !{i32 786445, metadata !1, metadata !14, metadata !"a", i32 8, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_member ] [a] [line 8, size 32, align 32, offset 0] [from int]
+!17 = metadata !{i32 786445, metadata !1, metadata !14, metadata !"b", i32 8, i64 32, i64 32, i64 32, i32 0, metadata !8} ; [ DW_TAG_member ] [b] [line 8, size 32, align 32, offset 32] [from int]
+!18 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!19 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
+!20 = metadata !{metadata !"clang version 3.5.0 (trunk 209255) (llvm/trunk 209253)"}
+!21 = metadata !{i32 13, i32 0, metadata !4, null}
diff --git a/test/DebugInfo/X86/debug-info-blocks.ll b/test/DebugInfo/X86/debug-info-blocks.ll
index 9daecee..430c157 100644
--- a/test/DebugInfo/X86/debug-info-blocks.ll
+++ b/test/DebugInfo/X86/debug-info-blocks.ll
@@ -5,16 +5,26 @@
 ; rdar://problem/9279956
 ; test that the DW_AT_location of self is at ( fbreg +{{[0-9]+}}, deref, +{{[0-9]+}} )
 
+; CHECK: [[A:.*]]:   DW_TAG_structure_type
+; CHECK-NEXT: DW_AT_APPLE_objc_complete_type
+; CHECK-NEXT: DW_AT_name{{.*}}"A"
+
+; CHECK: DW_TAG_subprogram
+; CHECK: DW_TAG_subprogram
 ; CHECK: DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_object_pointer
+; CHECK-NOT: DW_TAG
 ; CHECK: DW_AT_name{{.*}}_block_invoke
 
-; CHECK-NOT: DW_TAG_subprogram
+; CHECK-NOT: {{DW_TAG|NULL}}
 ; CHECK: DW_TAG_formal_parameter
-; CHECK-NEXT: DW_AT_name{{.*}}.block_descriptor
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name{{.*}}.block_descriptor
 ; CHECK-NOT: DW_TAG
 ; CHECK: DW_AT_location
 
-; CHECK-NOT: DW_TAG_subprogram
+; CHECK-NOT: {{DW_TAG|NULL}}
 ; CHECK: DW_TAG_variable
 ; CHECK-NEXT: DW_AT_name{{.*}}"self"
 ; CHECK-NOT: DW_TAG
@@ -27,10 +37,7 @@
 ; 0x91 = DW_OP_fbreg
 ; CHECK: DW_AT_location{{.*}}91 {{[0-9]+}} 06 23 {{[0-9]+}} )
 
-; CHECK: [[A:.*]]:   DW_TAG_structure_type
-; CHECK-NEXT: DW_AT_APPLE_objc_complete_type
-; CHECK-NEXT: DW_AT_name{{.*}}"A"
-; CHECK: [[APTR]]:   DW_TAG_pointer_type [5]
+; CHECK: [[APTR]]:   DW_TAG_pointer_type
 ; CHECK-NEXT: {[[A]]}
 
 
diff --git a/test/DebugInfo/X86/debug-info-static-member.ll b/test/DebugInfo/X86/debug-info-static-member.ll
index f15f2c1..7d258f9 100644
--- a/test/DebugInfo/X86/debug-info-static-member.ll
+++ b/test/DebugInfo/X86/debug-info-static-member.ll
@@ -114,7 +114,7 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 ; PRESENT:      DW_TAG_member
 ; PRESENT-NEXT: DW_AT_name {{.*}} "const_b"
 ; PRESENT:      DW_AT_accessibility [DW_FORM_data1]   (0x02)
-; PRESENT:      DW_AT_const_value {{.*}} (0x4048f5c3)
+; PRESENT:      DW_AT_const_value [DW_FORM_udata] (1078523331)
 ; PRESENT:      0x[[DECL_C:[0-9a-f]+]]: DW_TAG_member
 ; PRESENT-NEXT: DW_AT_name {{.*}} "c"
 ; PRESENT:      DW_AT_accessibility [DW_FORM_data1]   (0x01)
@@ -164,7 +164,7 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 ; DARWINP:      DW_TAG_member
 ; DARWINP-NEXT: DW_AT_name {{.*}} "const_b"
 ; DARWINP:      DW_AT_accessibility [DW_FORM_data1]   (0x02)
-; DARWINP:      DW_AT_const_value {{.*}} (0x4048f5c3)
+; DARWINP:      DW_AT_const_value [DW_FORM_udata] (1078523331)
 ; DARWINP:      0x[[DECL_C:[0-9a-f]+]]: DW_TAG_member
 ; DARWINP-NEXT: DW_AT_name {{.*}} "c"
 ; DARWINP:      DW_AT_accessibility [DW_FORM_data1]   (0x01)
diff --git a/test/DebugInfo/X86/debug-loc-offset.ll b/test/DebugInfo/X86/debug-loc-offset.ll
index b10309c..3f4d39d 100644
--- a/test/DebugInfo/X86/debug-loc-offset.ll
+++ b/test/DebugInfo/X86/debug-loc-offset.ll
@@ -37,7 +37,9 @@
 ; CHECK: DW_AT_high_pc
 
 ; CHECK: DW_TAG_subprogram
-; CHECK-NEXT: DW_AT_MIPS_linkage_name [DW_FORM_strp]{{.*}}"_Z1a1A"
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_MIPS_linkage_name [DW_FORM_strp]{{.*}}"_Z1a1A"
+; CHECK-NOT: {{DW_TAG|NULL}}
 ; CHECK: DW_TAG_formal_parameter
 ; CHECK-NEXT: DW_AT_name [DW_FORM_strp]{{.*}}"var"
 ; CHECK: DW_AT_location [DW_FORM_sec_offset]   (0x00000000)
diff --git a/test/DebugInfo/X86/debug-ranges-offset.ll b/test/DebugInfo/X86/debug-ranges-offset.ll
new file mode 100644
index 0000000..365ba17
--- /dev/null
+++ b/test/DebugInfo/X86/debug-ranges-offset.ll
@@ -0,0 +1,241 @@
+; RUN: llc -filetype=obj -mtriple=x86_64-pc-linux-gnu %s -o %t
+; RUN: llvm-readobj --relocations %t | FileCheck %s
+
+; Check that we don't have any relocations in the ranges section - 
+; to show that we're producing this as a relative offset to the
+; low_pc for the compile unit.
+; CHECK-NOT: .rela.debug_ranges
+
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 0, void ()* @__msan_init }]
+@str = private unnamed_addr constant [4 x i8] c"zzz\00"
+@__msan_retval_tls = external thread_local(initialexec) global [8 x i64]
+@__msan_retval_origin_tls = external thread_local(initialexec) global i32
+@__msan_param_tls = external thread_local(initialexec) global [1000 x i64]
+@__msan_param_origin_tls = external thread_local(initialexec) global [1000 x i32]
+@__msan_va_arg_tls = external thread_local(initialexec) global [1000 x i64]
+@__msan_va_arg_overflow_size_tls = external thread_local(initialexec) global i64
+@__msan_origin_tls = external thread_local(initialexec) global i32
+@__executable_start = external hidden global i32
+@_end = external hidden global i32
+
+; Function Attrs: sanitize_memory uwtable
+define void @_Z1fv() #0 {
+entry:
+  %p = alloca i32*, align 8
+  %0 = ptrtoint i32** %p to i64, !dbg !19
+  %1 = and i64 %0, -70368744177672, !dbg !19
+  %2 = inttoptr i64 %1 to i64*, !dbg !19
+  store i64 -1, i64* %2, align 8, !dbg !19
+  store i64 0, i64* getelementptr inbounds ([1000 x i64]* @__msan_param_tls, i64 0, i64 0), align 8, !dbg !19
+  store i64 0, i64* getelementptr inbounds ([8 x i64]* @__msan_retval_tls, i64 0, i64 0), align 8, !dbg !19
+  %call = call i8* @_Znwm(i64 4) #4, !dbg !19
+  %_msret = load i64* getelementptr inbounds ([8 x i64]* @__msan_retval_tls, i64 0, i64 0), align 8, !dbg !19
+  %3 = bitcast i8* %call to i32*, !dbg !19
+  tail call void @llvm.dbg.value(metadata !{i32* %3}, i64 0, metadata !9), !dbg !19
+  %4 = inttoptr i64 %1 to i64*, !dbg !19
+  store i64 %_msret, i64* %4, align 8, !dbg !19
+  store volatile i32* %3, i32** %p, align 8, !dbg !19
+  tail call void @llvm.dbg.value(metadata !{i32** %p}, i64 0, metadata !9), !dbg !19
+  %p.0.p.0. = load volatile i32** %p, align 8, !dbg !20
+  %_msld = load i64* %4, align 8, !dbg !20
+  %_mscmp = icmp eq i64 %_msld, 0, !dbg !20
+  br i1 %_mscmp, label %6, label %5, !dbg !20, !prof !22
+
+; <label>:5                                       ; preds = %entry
+  call void @__msan_warning_noreturn(), !dbg !20
+  call void asm sideeffect "", ""() #3, !dbg !20
+  unreachable, !dbg !20
+
+; <label>:6                                       ; preds = %entry
+  %7 = load i32* %p.0.p.0., align 4, !dbg !20, !tbaa !23
+  %8 = ptrtoint i32* %p.0.p.0. to i64, !dbg !20
+  %9 = and i64 %8, -70368744177665, !dbg !20
+  %10 = inttoptr i64 %9 to i32*, !dbg !20
+  %_msld2 = load i32* %10, align 4, !dbg !20
+  %11 = icmp ne i32 %_msld2, 0, !dbg !20
+  %12 = xor i32 %_msld2, -1, !dbg !20
+  %13 = and i32 %7, %12, !dbg !20
+  %14 = icmp eq i32 %13, 0, !dbg !20
+  %_msprop_icmp = and i1 %11, %14, !dbg !20
+  br i1 %_msprop_icmp, label %15, label %16, !dbg !20, !prof !27
+
+; <label>:15                                      ; preds = %6
+  call void @__msan_warning_noreturn(), !dbg !20
+  call void asm sideeffect "", ""() #3, !dbg !20
+  unreachable, !dbg !20
+
+; <label>:16                                      ; preds = %6
+  %tobool = icmp eq i32 %7, 0, !dbg !20
+  br i1 %tobool, label %if.end, label %if.then, !dbg !20
+
+if.then:                                          ; preds = %16
+  store i64 0, i64* getelementptr inbounds ([1000 x i64]* @__msan_param_tls, i64 0, i64 0), align 8, !dbg !28
+  store i32 0, i32* bitcast ([8 x i64]* @__msan_retval_tls to i32*), align 8, !dbg !28
+  %puts = call i32 @puts(i8* getelementptr inbounds ([4 x i8]* @str, i64 0, i64 0)), !dbg !28
+  br label %if.end, !dbg !28
+
+if.end:                                           ; preds = %16, %if.then
+  ret void, !dbg !29
+}
+
+; Function Attrs: nobuiltin
+declare i8* @_Znwm(i64) #1
+
+; Function Attrs: sanitize_memory uwtable
+define i32 @main() #0 {
+entry:
+  %p.i = alloca i32*, align 8
+  %0 = ptrtoint i32** %p.i to i64, !dbg !30
+  %1 = and i64 %0, -70368744177672, !dbg !30
+  %2 = inttoptr i64 %1 to i64*, !dbg !30
+  store i64 -1, i64* %2, align 8, !dbg !30
+  %p.i.0..sroa_cast = bitcast i32** %p.i to i8*, !dbg !30
+  call void @llvm.lifetime.start(i64 8, i8* %p.i.0..sroa_cast), !dbg !30
+  store i64 0, i64* getelementptr inbounds ([1000 x i64]* @__msan_param_tls, i64 0, i64 0), align 8, !dbg !30
+  store i64 0, i64* getelementptr inbounds ([8 x i64]* @__msan_retval_tls, i64 0, i64 0), align 8, !dbg !30
+  %call.i = call i8* @_Znwm(i64 4) #4, !dbg !30
+  %_msret = load i64* getelementptr inbounds ([8 x i64]* @__msan_retval_tls, i64 0, i64 0), align 8, !dbg !30
+  %3 = bitcast i8* %call.i to i32*, !dbg !30
+  tail call void @llvm.dbg.value(metadata !{i32* %3}, i64 0, metadata !32), !dbg !30
+  %4 = inttoptr i64 %1 to i64*, !dbg !30
+  store i64 %_msret, i64* %4, align 8, !dbg !30
+  store volatile i32* %3, i32** %p.i, align 8, !dbg !30
+  tail call void @llvm.dbg.value(metadata !{i32** %p.i}, i64 0, metadata !32), !dbg !30
+  %p.i.0.p.0.p.0..i = load volatile i32** %p.i, align 8, !dbg !33
+  %_msld = load i64* %4, align 8, !dbg !33
+  %_mscmp = icmp eq i64 %_msld, 0, !dbg !33
+  br i1 %_mscmp, label %6, label %5, !dbg !33, !prof !22
+
+; <label>:5                                       ; preds = %entry
+  call void @__msan_warning_noreturn(), !dbg !33
+  call void asm sideeffect "", ""() #3, !dbg !33
+  unreachable, !dbg !33
+
+; <label>:6                                       ; preds = %entry
+  %7 = load i32* %p.i.0.p.0.p.0..i, align 4, !dbg !33, !tbaa !23
+  %8 = ptrtoint i32* %p.i.0.p.0.p.0..i to i64, !dbg !33
+  %9 = and i64 %8, -70368744177665, !dbg !33
+  %10 = inttoptr i64 %9 to i32*, !dbg !33
+  %_msld2 = load i32* %10, align 4, !dbg !33
+  %11 = icmp ne i32 %_msld2, 0, !dbg !33
+  %12 = xor i32 %_msld2, -1, !dbg !33
+  %13 = and i32 %7, %12, !dbg !33
+  %14 = icmp eq i32 %13, 0, !dbg !33
+  %_msprop_icmp = and i1 %11, %14, !dbg !33
+  br i1 %_msprop_icmp, label %15, label %16, !dbg !33, !prof !27
+
+; <label>:15                                      ; preds = %6
+  call void @__msan_warning_noreturn(), !dbg !33
+  call void asm sideeffect "", ""() #3, !dbg !33
+  unreachable, !dbg !33
+
+; <label>:16                                      ; preds = %6
+  %tobool.i = icmp eq i32 %7, 0, !dbg !33
+  br i1 %tobool.i, label %_Z1fv.exit, label %if.then.i, !dbg !33
+
+if.then.i:                                        ; preds = %16
+  store i64 0, i64* getelementptr inbounds ([1000 x i64]* @__msan_param_tls, i64 0, i64 0), align 8, !dbg !34
+  store i32 0, i32* bitcast ([8 x i64]* @__msan_retval_tls to i32*), align 8, !dbg !34
+  %puts.i = call i32 @puts(i8* getelementptr inbounds ([4 x i8]* @str, i64 0, i64 0)), !dbg !34
+  br label %_Z1fv.exit, !dbg !34
+
+_Z1fv.exit:                                       ; preds = %16, %if.then.i
+  call void @llvm.lifetime.end(i64 8, i8* %p.i.0..sroa_cast), !dbg !35
+  store i32 0, i32* bitcast ([8 x i64]* @__msan_retval_tls to i32*), align 8, !dbg !36
+  ret i32 0, !dbg !36
+}
+
+declare void @__msan_init()
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata) #2
+
+; Function Attrs: nounwind
+declare i32 @puts(i8* nocapture readonly) #3
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture) #3
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) #3
+
+declare void @__msan_warning_noreturn()
+
+declare void @__msan_maybe_warning_1(i8, i32)
+
+declare void @__msan_maybe_store_origin_1(i8, i8*, i32)
+
+declare void @__msan_maybe_warning_2(i16, i32)
+
+declare void @__msan_maybe_store_origin_2(i16, i8*, i32)
+
+declare void @__msan_maybe_warning_4(i32, i32)
+
+declare void @__msan_maybe_store_origin_4(i32, i8*, i32)
+
+declare void @__msan_maybe_warning_8(i64, i32)
+
+declare void @__msan_maybe_store_origin_8(i64, i8*, i32)
+
+declare void @__msan_set_alloca_origin4(i8*, i64, i8*, i64)
+
+declare void @__msan_poison_stack(i8*, i64)
+
+declare i32 @__msan_chain_origin(i32)
+
+declare i8* @__msan_memmove(i8*, i8*, i64)
+
+declare i8* @__msan_memcpy(i8*, i8*, i64)
+
+declare i8* @__msan_memset(i8*, i32, i64)
+
+; Function Attrs: nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #3
+
+attributes #0 = { sanitize_memory uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nobuiltin "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind }
+attributes #4 = { builtin }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!16, !17}
+!llvm.ident = !{!18}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 (trunk 207243) (llvm/trunk 207259)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/foo.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"foo.cpp", metadata !"/usr/local/google/home/echristo/tmp"}
+!2 = metadata !{}
+!3 = metadata !{metadata !4, metadata !13}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"f", metadata !"f", metadata !"_Z1fv", i32 3, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @_Z1fv, null, null, metadata !8, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [f]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/tmp/foo.cpp]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{null}
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 786688, metadata !4, metadata !"p", metadata !5, i32 4, metadata !10, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [p] [line 4]
+!10 = metadata !{i32 786485, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !11} ; [ DW_TAG_volatile_type ] [line 0, size 0, align 0, offset 0] [from ]
+!11 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
+!12 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!13 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 9, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @main, null, null, metadata !2, i32 9} ; [ DW_TAG_subprogram ] [line 9] [def] [main]
+!14 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!15 = metadata !{metadata !12}
+!16 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!17 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!18 = metadata !{metadata !"clang version 3.5.0 (trunk 207243) (llvm/trunk 207259)"}
+!19 = metadata !{i32 4, i32 0, metadata !4, null}
+!20 = metadata !{i32 5, i32 0, metadata !21, null}
+!21 = metadata !{i32 786443, metadata !1, metadata !4, i32 5, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/foo.cpp]
+!22 = metadata !{metadata !"branch_weights", i32 1000, i32 1}
+!23 = metadata !{metadata !24, metadata !24, i64 0}
+!24 = metadata !{metadata !"int", metadata !25, i64 0}
+!25 = metadata !{metadata !"omnipotent char", metadata !26, i64 0}
+!26 = metadata !{metadata !"Simple C/C++ TBAA"}
+!27 = metadata !{metadata !"branch_weights", i32 1, i32 1000}
+!28 = metadata !{i32 6, i32 0, metadata !21, null}
+!29 = metadata !{i32 7, i32 0, metadata !4, null}
+!30 = metadata !{i32 4, i32 0, metadata !4, metadata !31}
+!31 = metadata !{i32 10, i32 0, metadata !13, null}
+!32 = metadata !{i32 786688, metadata !4, metadata !"p", metadata !5, i32 4, metadata !10, i32 0, metadata !31} ; [ DW_TAG_auto_variable ] [p] [line 4]
+!33 = metadata !{i32 5, i32 0, metadata !21, metadata !31}
+!34 = metadata !{i32 6, i32 0, metadata !21, metadata !31}
+!35 = metadata !{i32 7, i32 0, metadata !4, metadata !31}
+!36 = metadata !{i32 11, i32 0, metadata !13, null}
diff --git a/test/DebugInfo/X86/elf-names.ll b/test/DebugInfo/X86/elf-names.ll
index 7eef2de..176c2af 100644
--- a/test/DebugInfo/X86/elf-names.ll
+++ b/test/DebugInfo/X86/elf-names.ll
@@ -87,13 +87,11 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !24 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from D]
 !25 = metadata !{metadata !26}
 !26 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
-!27 = metadata !{metadata !28}
-!28 = metadata !{metadata !29}
+!27 = metadata !{metadata !29}
 !29 = metadata !{i32 786689, metadata !5, metadata !"this", metadata !6, i32 16777228, metadata !30, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 12]
 !30 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from D]
 !31 = metadata !{i32 786478, metadata !6, null, metadata !"D", metadata !"D", metadata !"_ZN1DC2ERKS_", i32 19, metadata !21, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (%class.D*, %class.D*)* @_ZN1DC2ERKS_, null, metadata !20, metadata !32, i32 19} ; [ DW_TAG_subprogram ] [line 19] [def] [D]
-!32 = metadata !{metadata !33}
-!33 = metadata !{metadata !34, metadata !35}
+!32 = metadata !{metadata !34, metadata !35}
 !34 = metadata !{i32 786689, metadata !31, metadata !"this", metadata !6, i32 16777235, metadata !30, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 19]
 !35 = metadata !{i32 786689, metadata !31, metadata !"d", metadata !6, i32 33554451, metadata !23, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [d] [line 19]
 !36 = metadata !{i32 12, i32 0, metadata !5, null}
diff --git a/test/DebugInfo/X86/empty-and-one-elem-array.ll b/test/DebugInfo/X86/empty-and-one-elem-array.ll
index f5c37df..974bd73 100644
--- a/test/DebugInfo/X86/empty-and-one-elem-array.ll
+++ b/test/DebugInfo/X86/empty-and-one-elem-array.ll
@@ -28,11 +28,6 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 ; An empty array should not have an AT_upper_bound attribute. But an array of 1
 ; should.
 
-; CHECK:      DW_TAG_base_type
-; CHECK-NEXT: DW_AT_name [DW_FORM_strp]  ( .debug_str[{{.*}}] = "int")
-; CHECK-NEXT: DW_AT_encoding [DW_FORM_data1]   (0x05)
-; CHECK-NEXT: DW_AT_byte_size [DW_FORM_data1]  (0x04)
-
 ; int foo::b[1]:
 ; CHECK: DW_TAG_structure_type
 ; CHECK: DW_AT_name{{.*}}"foo"
@@ -41,6 +36,11 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 ; CHECK-NEXT: DW_AT_name [DW_FORM_strp]  ( .debug_str[{{.*}}] = "b")
 ; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]
 
+; CHECK:      DW_TAG_base_type
+; CHECK-NEXT: DW_AT_name [DW_FORM_strp]  ( .debug_str[{{.*}}] = "int")
+; CHECK-NEXT: DW_AT_encoding [DW_FORM_data1]   (0x05)
+; CHECK-NEXT: DW_AT_byte_size [DW_FORM_data1]  (0x04)
+
 ; int[1]:
 ; CHECK:      DW_TAG_array_type [{{.*}}] *
 ; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]
diff --git a/test/DebugInfo/X86/ending-run.ll b/test/DebugInfo/X86/ending-run.ll
index 6dd15af..165074e 100644
--- a/test/DebugInfo/X86/ending-run.ll
+++ b/test/DebugInfo/X86/ending-run.ll
@@ -32,13 +32,11 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !0 = metadata !{i32 786449, metadata !19, i32 12, metadata !"clang version 3.1 (trunk 153921) (llvm/trunk 153916)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, metadata !19, metadata !6, metadata !"callee", metadata !"callee", metadata !"", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 (i32)* @callee, null, null, metadata !10, i32 7} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 786478, metadata !19, metadata !6, metadata !"callee", metadata !"callee", metadata !"", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 (i32)* @callee, null, null, null, i32 7} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !19} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9, metadata !9}
 !9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!10 = metadata !{metadata !11}
-!11 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
 !12 = metadata !{i32 786689, metadata !5, metadata !"x", metadata !6, i32 16777221, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
 !13 = metadata !{i32 5, i32 5, metadata !5, null}
 !14 = metadata !{i32 786688, metadata !15, metadata !"y", metadata !6, i32 8, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
diff --git a/test/DebugInfo/X86/fission-ranges.ll b/test/DebugInfo/X86/fission-ranges.ll
index 416e7cb..057039c 100644
--- a/test/DebugInfo/X86/fission-ranges.ll
+++ b/test/DebugInfo/X86/fission-ranges.ll
@@ -26,7 +26,7 @@
 
 ; CHECK: [[A]]: Beginning address index: 2
 ; CHECK-NEXT:                    Length: 199
-; CHECK-NEXT:      Location description: 10 00
+; CHECK-NEXT:      Location description: 11 00
 ; CHECK-NEXT: {{^$}}
 ; CHECK-NEXT:   Beginning address index: 3
 ; CHECK-NEXT:                    Length: 23
diff --git a/test/DebugInfo/X86/formal_parameter.ll b/test/DebugInfo/X86/formal_parameter.ll
new file mode 100644
index 0000000..3445f46
--- /dev/null
+++ b/test/DebugInfo/X86/formal_parameter.ll
@@ -0,0 +1,83 @@
+; ModuleID = 'formal_parameter.c'
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+;
+; From (clang -g -c -O1):
+;
+; int lookup(int* map);
+; int verify(int val);
+; void foo(int map)
+; {
+;   lookup(&map);
+;   if (!verify(map)) {  }
+; }
+;
+; RUN: opt %s -O2 -S -o %t
+; RUN: cat %t | FileCheck --check-prefix=LOWERING %s
+; RUN: llc -filetype=obj %t -o - | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+; Test that we only emit only one DW_AT_formal_parameter "map" for this function.
+; rdar://problem/14874886
+;
+; CHECK: DW_TAG_formal_parameter
+; CHECK-NEXT: DW_AT_name {{.*}}map
+; CHECK-NOT: DW_AT_name {{.*}}map
+
+; Function Attrs: nounwind ssp uwtable
+define void @foo(i32 %map) #0 {
+entry:
+  %map.addr = alloca i32, align 4
+  store i32 %map, i32* %map.addr, align 4, !tbaa !15
+  call void @llvm.dbg.declare(metadata !{i32* %map.addr}, metadata !10), !dbg !14
+  %call = call i32 (i32*, ...)* bitcast (i32 (...)* @lookup to i32 (i32*, ...)*)(i32* %map.addr) #3, !dbg !19
+  ; Ensure that all dbg intrinsics have the same scope after
+  ; LowerDbgDeclare is finished with them.
+  ;
+  ; LOWERING: call void @llvm.dbg.value{{.*}}, !dbg ![[LOC:.*]]
+  ; LOWERING: call void @llvm.dbg.value{{.*}}, !dbg ![[LOC]]
+  ; LOWERING: call void @llvm.dbg.value{{.*}}, !dbg ![[LOC]]
+%0 = load i32* %map.addr, align 4, !dbg !20, !tbaa !15
+  %call1 = call i32 (i32, ...)* bitcast (i32 (...)* @verify to i32 (i32, ...)*)(i32 %0) #3, !dbg !20
+  ret void, !dbg !22
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+declare i32 @lookup(...)
+
+declare i32 @verify(...)
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata) #1
+
+attributes #0 = { nounwind ssp uwtable }
+attributes #1 = { nounwind readnone }
+attributes #3 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!11, !12}
+!llvm.ident = !{!13}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5.0 ", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [formal_parameter.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"formal_parameter.c", metadata !""}
+!2 = metadata !{}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32)* @foo, null, null, metadata !9, i32 2} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 2] [foo]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [formal_parameter.c]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{null, metadata !8}
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{metadata !10}
+!10 = metadata !{i32 786689, metadata !4, metadata !"map", metadata !5, i32 16777217, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [map] [line 1]
+!11 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
+!12 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!13 = metadata !{metadata !"clang version 3.5.0 "}
+!14 = metadata !{i32 1, i32 0, metadata !4, null}
+!15 = metadata !{metadata !16, metadata !16, i64 0}
+!16 = metadata !{metadata !"int", metadata !17, i64 0}
+!17 = metadata !{metadata !"omnipotent char", metadata !18, i64 0}
+!18 = metadata !{metadata !"Simple C/C++ TBAA"}
+!19 = metadata !{i32 3, i32 0, metadata !4, null}
+!20 = metadata !{i32 4, i32 0, metadata !21, null}
+!21 = metadata !{i32 786443, metadata !1, metadata !4, i32 4, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [formal_parameter.c]
+!22 = metadata !{i32 5, i32 0, metadata !4, null}
diff --git a/test/DebugInfo/X86/gnu-public-names.ll b/test/DebugInfo/X86/gnu-public-names.ll
index f4001e3..4e35dbe 100644
--- a/test/DebugInfo/X86/gnu-public-names.ll
+++ b/test/DebugInfo/X86/gnu-public-names.ll
@@ -86,8 +86,10 @@
 ; CHECK-NEXT: DW_AT_name {{.*}} "D"
 
 ; CHECK: [[GLOB_NS_FUNC:[0-9a-f]+]]: DW_TAG_subprogram
-; CHECK-NEXT: DW_AT_MIPS_linkage_name
-; CHECK-NEXT: DW_AT_name {{.*}} "global_namespace_function"
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_MIPS_linkage_name
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name {{.*}} "global_namespace_function"
 
 ; CHECK: [[GLOB_NS_VAR:[0-9a-f]+]]: DW_TAG_variable
 ; CHECK-NEXT: DW_AT_specification {{.*}}[[GLOB_NS_VAR_DECL]]
@@ -96,14 +98,18 @@
 ; CHECK-NEXT: DW_AT_specification {{.*}}[[D_VAR_DECL]]
 
 ; CHECK: [[MEM_FUNC:[0-9a-f]+]]: DW_TAG_subprogram
-; CHECK-NEXT: DW_AT_specification {{.*}}[[MEM_FUNC_DECL]]
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_specification {{.*}}[[MEM_FUNC_DECL]]
 
 ; CHECK: [[STATIC_MEM_FUNC:[0-9a-f]+]]: DW_TAG_subprogram
-; CHECK-NEXT: DW_AT_specification {{.*}}[[STATIC_MEM_FUNC_DECL]]
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_specification {{.*}}[[STATIC_MEM_FUNC_DECL]]
 
 ; CHECK: [[GLOBAL_FUNC:[0-9a-f]+]]: DW_TAG_subprogram
-; CHECK-NEXT: DW_AT_MIPS_linkage_name
-; CHECK-NEXT: DW_AT_name {{.*}} "global_function"
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_MIPS_linkage_name
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name {{.*}} "global_function"
 
 ; CHECK-LABEL: .debug_gnu_pubnames contents:
 ; CHECK-NEXT: length = 0x000000e7 version = 0x0002 unit_offset = 0x00000000 unit_size = [[UNIT_SIZE]]
@@ -166,8 +172,10 @@
 ; DWARF3-NEXT: DW_AT_name {{.*}} "D"
 
 ; DWARF3: [[GLOB_NS_FUNC:[0-9a-f]+]]: DW_TAG_subprogram
-; DWARF3-NEXT: DW_AT_MIPS_linkage_name
-; DWARF3-NEXT: DW_AT_name {{.*}} "global_namespace_function"
+; DWARF3-NOT: DW_TAG
+; DWARF3: DW_AT_MIPS_linkage_name
+; DWARF3-NOT: DW_TAG
+; DWARF3: DW_AT_name {{.*}} "global_namespace_function"
 
 ; DWARF3: [[GLOB_NS_VAR:[0-9a-f]+]]: DW_TAG_variable
 ; DWARF3-NEXT: DW_AT_specification {{.*}}[[GLOB_NS_VAR_DECL]]
@@ -176,14 +184,18 @@
 ; DWARF3-NEXT: DW_AT_specification {{.*}}[[D_VAR_DECL]]
 
 ; DWARF3: [[MEM_FUNC:[0-9a-f]+]]: DW_TAG_subprogram
-; DWARF3-NEXT: DW_AT_specification {{.*}}[[MEM_FUNC_DECL]]
+; DWARF3-NOT: DW_TAG
+; DWARF3: DW_AT_specification {{.*}}[[MEM_FUNC_DECL]]
 
 ; DWARF3: [[STATIC_MEM_FUNC:[0-9a-f]+]]: DW_TAG_subprogram
-; DWARF3-NEXT: DW_AT_specification {{.*}}[[STATIC_MEM_FUNC_DECL]]
+; DWARF3-NOT: DW_TAG
+; DWARF3: DW_AT_specification {{.*}}[[STATIC_MEM_FUNC_DECL]]
 
 ; DWARF3: [[GLOBAL_FUNC:[0-9a-f]+]]: DW_TAG_subprogram
-; DWARF3-NEXT: DW_AT_MIPS_linkage_name
-; DWARF3-NEXT: DW_AT_name {{.*}} "global_function"
+; DWARF3-NOT: DW_TAG
+; DWARF3: DW_AT_MIPS_linkage_name
+; DWARF3-NOT: DW_TAG
+; DWARF3: DW_AT_name {{.*}} "global_function"
 
 ; DWARF3-LABEL: .debug_gnu_pubnames contents:
 ; DWARF3-NEXT: length = 0x000000e7 version = 0x0002 unit_offset = 0x00000000 unit_size = [[UNIT_SIZE]]
diff --git a/test/DebugInfo/X86/inline-member-function.ll b/test/DebugInfo/X86/inline-member-function.ll
new file mode 100644
index 0000000..3dc6043
--- /dev/null
+++ b/test/DebugInfo/X86/inline-member-function.ll
@@ -0,0 +1,95 @@
+; REQUIRES: object-emission
+
+; RUN: llc -mtriple=x86_64-linux -O0 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+; From source:
+; struct foo {
+;   int __attribute__((always_inline)) func(int x) { return x + 2; }
+; };
+
+; int i;
+
+; int main() {
+;   return foo().func(i);
+; }
+
+; CHECK: DW_TAG_structure_type
+; CHECK:   DW_TAG_subprogram
+
+; But make sure we emit DW_AT_object_pointer on the abstract definition.
+; CHECK: [[ABSTRACT_ORIGIN:.*]]: DW_TAG_subprogram
+; CHECK-NOT: NULL
+; CHECK-NOT: TAG
+; CHECK: DW_AT_object_pointer
+
+; Ensure we omit DW_AT_object_pointer on inlined subroutines.
+; CHECK: DW_TAG_inlined_subroutine
+; CHECK-NEXT: DW_AT_abstract_origin {{.*}}{[[ABSTRACT_ORIGIN]]}
+; CHECK-NOT: NULL
+; CHECK-NOT: DW_AT_object_pointer
+; CHECK: DW_TAG_formal_parameter
+; CHECK-NOT: DW_AT_artificial
+; CHECK: DW_TAG
+
+%struct.foo = type { i8 }
+
+@i = global i32 0, align 4
+
+; Function Attrs: uwtable
+define i32 @main() #0 {
+entry:
+  %this.addr.i = alloca %struct.foo*, align 8
+  %x.addr.i = alloca i32, align 4
+  %retval = alloca i32, align 4
+  %tmp = alloca %struct.foo, align 1
+  store i32 0, i32* %retval
+  %0 = load i32* @i, align 4, !dbg !23
+  store %struct.foo* %tmp, %struct.foo** %this.addr.i, align 8
+  call void @llvm.dbg.declare(metadata !{%struct.foo** %this.addr.i}, metadata !24), !dbg !26
+  store i32 %0, i32* %x.addr.i, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %x.addr.i}, metadata !27), !dbg !28
+  %this1.i = load %struct.foo** %this.addr.i
+  %1 = load i32* %x.addr.i, align 4, !dbg !28
+  %add.i = add nsw i32 %1, 2, !dbg !28
+  ret i32 %add.i, !dbg !23
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!20, !21}
+!llvm.ident = !{!22}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !12, metadata !18, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/inline.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"inline.cpp", metadata !"/tmp/dbginfo"}
+!2 = metadata !{}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786451, metadata !1, null, metadata !"foo", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !5, i32 0, null, null, metadata !"_ZTS3foo"} ; [ DW_TAG_structure_type ] [foo] [line 1, size 8, align 8, offset 0] [def] [from ]
+!5 = metadata !{metadata !6}
+!6 = metadata !{i32 786478, metadata !1, metadata !"_ZTS3foo", metadata !"func", metadata !"func", metadata !"_ZN3foo4funcEi", i32 2, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !11, i32 2} ; [ DW_TAG_subprogram ] [line 2] [func]
+!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{metadata !9, metadata !10, metadata !9}
+!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !"_ZTS3foo"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS3foo]
+!11 = metadata !{i32 786468}
+!12 = metadata !{metadata !13, metadata !17}
+!13 = metadata !{i32 786478, metadata !1, metadata !14, metadata !"main", metadata !"main", metadata !"", i32 7, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !2, i32 7} ; [ DW_TAG_subprogram ] [line 7] [def] [main]
+!14 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/tmp/dbginfo/inline.cpp]
+!15 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!16 = metadata !{metadata !9}
+!17 = metadata !{i32 786478, metadata !1, metadata !"_ZTS3foo", metadata !"func", metadata !"func", metadata !"_ZN3foo4funcEi", i32 2, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, metadata !6, metadata !2, i32 2} ; [ DW_TAG_subprogram ] [line 2] [def] [func]
+!18 = metadata !{metadata !19}
+!19 = metadata !{i32 786484, i32 0, null, metadata !"i", metadata !"i", metadata !"", metadata !14, i32 5, metadata !9, i32 0, i32 1, i32* @i, null} ; [ DW_TAG_variable ] [i] [line 5] [def]
+!20 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!21 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!22 = metadata !{metadata !"clang version 3.5.0 "}
+!23 = metadata !{i32 8, i32 0, metadata !13, null} ; [ DW_TAG_imported_declaration ]
+!24 = metadata !{i32 786689, metadata !17, metadata !"this", null, i32 16777216, metadata !25, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!25 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !"_ZTS3foo"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS3foo]
+!26 = metadata !{i32 0, i32 0, metadata !17, metadata !23}
+!27 = metadata !{i32 786689, metadata !17, metadata !"x", metadata !14, i32 33554434, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [x] [line 2]
+!28 = metadata !{i32 2, i32 0, metadata !17, metadata !23}
diff --git a/test/DebugInfo/X86/inline-seldag-test.ll b/test/DebugInfo/X86/inline-seldag-test.ll
new file mode 100644
index 0000000..615f03a
--- /dev/null
+++ b/test/DebugInfo/X86/inline-seldag-test.ll
@@ -0,0 +1,77 @@
+; RUN: llc -mtriple=x86_64-linux-gnu -fast-isel=false -filetype=obj < %s -o - | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+; RUN: llc -mtriple=x86_64-linux-gnu -fast-isel=false -filetype=asm < %s -o - | FileCheck --check-prefix=ASM %s
+
+; Generated from:
+; clang-tot -c -S -emit-llvm -g inline-seldag-test.c
+; inline int __attribute__((always_inline)) f(int y) {
+;   return y ? 4 : 7;
+; }
+; void func() {
+;   volatile int x;
+;   x = f(x);
+; }
+
+; CHECK: [[F:.*]]: DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name {{.*}} "f"
+
+; CHECK: DW_TAG_inlined_subroutine
+; CHECK-NEXT: DW_AT_abstract_origin {{.*}} {[[F]]}
+
+
+; Make sure the condition test is attributed to the inline function, not the
+; location of the test's operands within the caller.
+
+; ASM: # inline-seldag-test.c:2:0
+; ASM-NOT: .loc
+; ASM: testl
+
+; Function Attrs: nounwind uwtable
+define void @func() #0 {
+entry:
+  %y.addr.i = alloca i32, align 4
+  %x = alloca i32, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %x}, metadata !15), !dbg !17
+  %0 = load volatile i32* %x, align 4, !dbg !18
+  store i32 %0, i32* %y.addr.i, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %y.addr.i}, metadata !19), !dbg !20
+  %1 = load i32* %y.addr.i, align 4, !dbg !21
+  %tobool.i = icmp ne i32 %1, 0, !dbg !21
+  %cond.i = select i1 %tobool.i, i32 4, i32 7, !dbg !21
+  store volatile i32 %cond.i, i32* %x, align 4, !dbg !18
+  ret void, !dbg !22
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!12, !13}
+!llvm.ident = !{!14}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/inline-seldag-test.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"inline-seldag-test.c", metadata !"/tmp/dbginfo"}
+!2 = metadata !{}
+!3 = metadata !{metadata !4, metadata !8}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"func", metadata !"func", metadata !"", i32 4, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void ()* @func, null, null, metadata !2, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [func]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/dbginfo/inline-seldag-test.c]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{null}
+!8 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"f", metadata !"f", metadata !"", i32 1, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [f]
+!9 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !10, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!10 = metadata !{metadata !11, metadata !11}
+!11 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!12 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!13 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!14 = metadata !{metadata !"clang version 3.5.0 "}
+!15 = metadata !{i32 786688, metadata !4, metadata !"x", metadata !5, i32 5, metadata !16, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [x] [line 5]
+!16 = metadata !{i32 786485, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !11} ; [ DW_TAG_volatile_type ] [line 0, size 0, align 0, offset 0] [from int]
+!17 = metadata !{i32 5, i32 0, metadata !4, null}
+!18 = metadata !{i32 6, i32 7, metadata !4, null}
+!19 = metadata !{i32 786689, metadata !8, metadata !"y", metadata !5, i32 16777217, metadata !11, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [y] [line 1]
+!20 = metadata !{i32 1, i32 0, metadata !8, metadata !18}
+!21 = metadata !{i32 2, i32 0, metadata !8, metadata !18}
+!22 = metadata !{i32 7, i32 0, metadata !4, null}
diff --git a/test/DebugInfo/X86/instcombine-instrinsics.ll b/test/DebugInfo/X86/instcombine-instrinsics.ll
index 41dd09f..2fd7ee3 100644
--- a/test/DebugInfo/X86/instcombine-instrinsics.ll
+++ b/test/DebugInfo/X86/instcombine-instrinsics.ll
@@ -1,102 +1,79 @@
-; RUN: opt < %s -O2 -S | FileCheck %s
+; RUN: opt %s -O2 -S -o - | FileCheck %s
 ; Verify that we emit the same intrinsic at most once.
-; CHECK: call void @llvm.dbg.value(metadata !{%struct.i14** %i14}
-; CHECK-NOT: call void @llvm.dbg.value(metadata !{%struct.i14** %i14}
+; rdar://problem/13056109
+;
+; CHECK: call void @llvm.dbg.value(metadata !{%struct.i14** %p}
+; CHECK-NOT: call void @llvm.dbg.value(metadata !{%struct.i14** %p}
+; CHECK-NEXT: call i32 @foo
 ; CHECK: ret
+;
+;
+; typedef struct {
+;   long i;
+; } i14;
+;
+; int foo(i14**);
+;
+;   void init() {
+;     i14* p = 0;
+;     foo(&p);
+;     p->i |= 4;
+;     foo(&p);
+;   }
+;
+; ModuleID = 'instcombine_intrinsics.c'
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
 
-;*** IR Dump After Dead Argument Elimination ***
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-apple-macosx10.8.0"
+%struct.i14 = type { i64 }
 
-%struct.i3 = type { i32 }
-%struct.i14 = type { i32 }
-%struct.i24 = type opaque
-
-define %struct.i3* @barz(i64 %i9) nounwind {
-entry:
-  br label %while.cond
-
-while.cond:                                       ; preds = %while.body, %entry
-  br label %while.body
-
-while.body:                                       ; preds = %while.cond
-  br label %while.cond
-}
-
-declare void @llvm.dbg.declare(metadata, metadata)
-
-define void @init() nounwind {
-entry:
-  %i14 = alloca %struct.i14*, align 8
-  call void @llvm.dbg.declare(metadata !{%struct.i14** %i14}, metadata !25)
-  store %struct.i14* null, %struct.i14** %i14, align 8
-  %call = call i32 @foo(i8* bitcast (void ()* @bar to i8*), %struct.i14** %i14)
-  %0 = load %struct.i14** %i14, align 8
-  %i16 = getelementptr inbounds %struct.i14* %0, i32 0, i32 0
-  %1 = load i32* %i16, align 4
-  %or = or i32 %1, 4
-  store i32 %or, i32* %i16, align 4
-  %call4 = call i32 @foo(i8* bitcast (void ()* @baz to i8*), %struct.i14** %i14)
-  ret void
+; Function Attrs: nounwind ssp uwtable
+define void @init() #0 {
+  %p = alloca %struct.i14*, align 8
+  call void @llvm.dbg.declare(metadata !{%struct.i14** %p}, metadata !11), !dbg !18
+  store %struct.i14* null, %struct.i14** %p, align 8, !dbg !18
+  %1 = call i32 @foo(%struct.i14** %p), !dbg !19
+  %2 = load %struct.i14** %p, align 8, !dbg !20
+  %3 = getelementptr inbounds %struct.i14* %2, i32 0, i32 0, !dbg !20
+  %4 = load i64* %3, align 8, !dbg !20
+  %5 = or i64 %4, 4, !dbg !20
+  store i64 %5, i64* %3, align 8, !dbg !20
+  %6 = call i32 @foo(%struct.i14** %p), !dbg !21
+  ret void, !dbg !22
 }
 
-declare i32 @foo(i8*, %struct.i14**) nounwind
-
-define internal void @bar() nounwind {
-entry:
-  %i9 = alloca i64, align 8
-  store i64 0, i64* %i9, align 8
-  %call = call i32 @put(i64 0, i64* %i9, i64 0, %struct.i24* null)
-  ret void
-}
-
-define internal void @baz() nounwind {
-entry:
-  ret void
-}
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
 
-declare i32 @put(i64, i64*, i64, %struct.i24*) nounwind readnone
+declare i32 @foo(%struct.i14**)
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+attributes #0 = { nounwind ssp uwtable }
+attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!73}
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.3 ", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !48, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !"i1", metadata !""}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4, metadata !21, metadata !33, metadata !47}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"i2", metadata !"i2", metadata !"", i32 31, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, %struct.i3* (i64)* @barz, null, null, metadata !16, i32 32} ; [ DW_TAG_subprogram ] [line 31]  [scope 32]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ]
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [instcombine_intrinsics.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"instcombine_intrinsics.c", metadata !""}
+!2 = metadata !{}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"init", metadata !"init", metadata !"", i32 7, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void ()* @init, null, null, metadata !2, i32 7} ; [ DW_TAG_subprogram ] [line 7] [def] [init]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [instcombine_intrinsics.c]
 !6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{metadata !8, metadata !13}
-!8 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from i3]
-!9 = metadata !{i32 786451, metadata !1, null, metadata !"i3", i32 25, i64 32, i64 32, i32 0, i32 0, null, metadata !10, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [i3] [line 25, size 32, align 32, offset 0] [def] [from ]
-!10 = metadata !{metadata !11}
-!11 = metadata !{i32 786445, metadata !1, metadata !9, metadata !"i4", i32 26, i64 32, i64 32, i64 0, i32 0, metadata !12} ; [ DW_TAG_member ]  [line 26, size 32, align 32, offset 0] [from i5]
-!12 = metadata !{i32 786468, null, null, metadata !"i5", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]  [line 0, size 32, align 32, offset 0, enc DW_ATE_unsigned]
-!13 = metadata !{i32 786454, metadata !1, null, metadata !"i6", i32 5, i64 0, i64 0, i64 0, i32 0, metadata !14} ; [ DW_TAG_typedef ]  [line 5, size 0, align 0, offset 0] [from i7]
-!14 = metadata !{i32 786454, metadata !1, null, metadata !"i7", i32 2, i64 0, i64 0, i64 0, i32 0, metadata !15} ; [ DW_TAG_typedef ]  [line 2, size 0, align 0, offset 0] [from i8]
-!15 = metadata !{i32 786468, null, null, metadata !"i8", i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]  [line 0, size 64, align 64, offset 0, enc DW_ATE_unsigned]
-!16 = metadata !{}
-!21 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"i13", metadata !"i13", metadata !"", i32 42, metadata !22, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @init, null, null, metadata !24, i32 43} ; [ DW_TAG_subprogram ] [line 42]  [scope 43]
-!22 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !34, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!23 = metadata !{null}
-!24 = metadata !{metadata !25}
-!25 = metadata !{i32 786688, metadata !21, metadata !"i14", metadata !5, i32 45, metadata !27, i32 0, i32 0} ; [ DW_TAG_auto_variable ]  [line 45]
-!27 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !28} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from i14]
-!28 = metadata !{i32 786451, metadata !1, null, metadata !"i14", i32 16, i64 32, i64 32, i32 0, i32 0, null, metadata !29, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [i14] [line 16, size 32, align 32, offset 0] [def] [from ]
-!29 = metadata !{metadata !30}
-!30 = metadata !{i32 786445, metadata !1, metadata !28, metadata !"i16", i32 17, i64 32, i64 32, i64 0, i32 0, metadata !31} ; [ DW_TAG_member ]  [line 17, size 32, align 32, offset 0] [from i17]
-!31 = metadata !{i32 786454, metadata !1, null, metadata !"i17", i32 7, i64 0, i64 0, i64 0, i32 0, metadata !32} ; [ DW_TAG_typedef ]  [line 7, size 0, align 0, offset 0] [from int]
-!32 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]  [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!33 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"i18", metadata !"i18", metadata !"", i32 54, metadata !22, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @bar, null, null, metadata !34, i32 55} ; [ DW_TAG_subprogram ] [line 54]   [scope 55]
-!34 = metadata !{null}
-!47 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"i29", metadata !"i29", metadata !"", i32 53, metadata !22, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @baz, null, null, metadata !2, i32 53} ; [ DW_TAG_subprogram ] [line 53]
-!48 = metadata !{metadata !49}
-!49 = metadata !{i32 786484, i32 0, metadata !21, metadata !"i30", metadata !"i30", metadata !"", metadata !5, i32 44, metadata !50, i32 1, i32 1, null, null}
-!50 = metadata !{i32 786454, metadata !1, null, metadata !"i31", i32 6, i64 0, i64 0, i64 0, i32 0, metadata !32} ; [ DW_TAG_typedef ]  [line 6, size 0, align 0, offset 0] [from int]
-!52 = metadata !{i64 0}
-!55 = metadata !{%struct.i3* null}
-!72 = metadata !{%struct.i24* null}
-!73 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!7 = metadata !{null}
+!8 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
+!9 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!10 = metadata !{metadata !"clang version 3.5.0 "}
+!11 = metadata !{i32 786688, metadata !4, metadata !"p", metadata !5, i32 8, metadata !12, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [p] [line 8]
+!12 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !13} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from i14]
+!13 = metadata !{i32 786454, metadata !1, null, metadata !"i14", i32 3, i64 0, i64 0, i64 0, i32 0, metadata !14} ; [ DW_TAG_typedef ] [i14] [line 3, size 0, align 0, offset 0] [from ]
+!14 = metadata !{i32 786451, metadata !1, null, metadata !"", i32 1, i64 64, i64 64, i32 0, i32 0, null, metadata !15, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [line 1, size 64, align 64, offset 0] [def] [from ]
+!15 = metadata !{metadata !16}
+!16 = metadata !{i32 786445, metadata !1, metadata !14, metadata !"i", i32 2, i64 64, i64 64, i64 0, i32 0, metadata !17} ; [ DW_TAG_member ] [i] [line 2, size 64, align 64, offset 0] [from long int]
+!17 = metadata !{i32 786468, null, null, metadata !"long int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [long int] [line 0, size 64, align 64, offset 0, enc DW_ATE_signed]
+!18 = metadata !{i32 8, i32 0, metadata !4, null} ; [ DW_TAG_imported_declaration ]
+!19 = metadata !{i32 9, i32 0, metadata !4, null}
+!20 = metadata !{i32 10, i32 0, metadata !4, null}
+!21 = metadata !{i32 11, i32 0, metadata !4, null}
+!22 = metadata !{i32 12, i32 0, metadata !4, null}
diff --git a/test/DebugInfo/X86/linkage-name.ll b/test/DebugInfo/X86/linkage-name.ll
index a89869d..2b1647b 100644
--- a/test/DebugInfo/X86/linkage-name.ll
+++ b/test/DebugInfo/X86/linkage-name.ll
@@ -30,7 +30,7 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !0 = metadata !{i32 786449, metadata !28, i32 4, metadata !"clang version 3.1 (trunk 152691) (llvm/trunk 152692)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !18,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{}
 !3 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, metadata !6, null, metadata !"a", metadata !"a", metadata !"_ZN1A1aEi", i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (%class.A*, i32)* @_ZN1A1aEi, null, metadata !13, metadata !16, i32 5} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 786478, metadata !6, null, metadata !"a", metadata !"a", metadata !"_ZN1A1aEi", i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (%class.A*, i32)* @_ZN1A1aEi, null, metadata !13, null, i32 5} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !28} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9, metadata !10, metadata !9}
@@ -38,11 +38,7 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !10 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !11} ; [ DW_TAG_pointer_type ]
 !11 = metadata !{i32 786434, metadata !28, null, metadata !"A", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 8, align 8, offset 0] [def] [from ]
 !12 = metadata !{metadata !13}
-!13 = metadata !{i32 786478, metadata !6, metadata !11, metadata !"a", metadata !"a", metadata !"_ZN1A1aEi", i32 2, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 257, i1 false, null, null, i32 0, metadata !14, i32 0} ; [ DW_TAG_subprogram ]
-!14 = metadata !{metadata !15}
-!15 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
-!16 = metadata !{metadata !17}
-!17 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
+!13 = metadata !{i32 786478, metadata !6, metadata !11, metadata !"a", metadata !"a", metadata !"_ZN1A1aEi", i32 2, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 257, i1 false, null, null, i32 0, null, i32 0} ; [ DW_TAG_subprogram ]
 !18 = metadata !{metadata !20}
 !20 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 9, metadata !11, i32 0, i32 1, %class.A* @a, null} ; [ DW_TAG_variable ]
 !21 = metadata !{i32 786689, metadata !5, metadata !"this", metadata !6, i32 16777221, metadata !22, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
diff --git a/test/DebugInfo/X86/pr11300.ll b/test/DebugInfo/X86/pr11300.ll
index 19032fa..11c409c 100644
--- a/test/DebugInfo/X86/pr11300.ll
+++ b/test/DebugInfo/X86/pr11300.ll
@@ -3,11 +3,14 @@
 
 ; test that the DW_AT_specification is a back edge in the file.
 
+; Skip the definition of zed(foo*)
 ; CHECK: DW_TAG_subprogram
-; CHECK: DW_AT_name [DW_FORM_strp]	( .debug_str[0x{{[0-9a-f]*}}] = "zed")
+; CHECK: DW_TAG_class_type
+; CHECK: [[BAR_DECL:0x[0-9a-f]*]]:     DW_TAG_subprogram
+; CHECK:     DW_AT_MIPS_linkage_name {{.*}} "_ZN3foo3barEv"
 ; CHECK: DW_TAG_subprogram
-; CHECK-NEXT: DW_AT_specification [DW_FORM_ref4]      (cu + {{.*}} => {[[BACK:0x[0-9a-f]*]]})
-; CHECK: [[BACK]]:     DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_specification {{.*}} {[[BAR_DECL]]}
 
 %struct.foo = type { i8 }
 
@@ -38,7 +41,7 @@ entry:
 !0 = metadata !{i32 786449, metadata !32, i32 4, metadata !"clang version 3.0 ()", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{}
 !3 = metadata !{metadata !5, metadata !20}
-!5 = metadata !{i32 720942, metadata !6, metadata !6, metadata !"zed", metadata !"zed", metadata !"_Z3zedP3foo", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.foo*)* @_Z3zedP3foo, null, null, metadata !21, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [zed]
+!5 = metadata !{i32 720942, metadata !6, metadata !6, metadata !"zed", metadata !"zed", metadata !"_Z3zedP3foo", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.foo*)* @_Z3zedP3foo, null, null, null, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [zed]
 !6 = metadata !{i32 720937, metadata !32} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{null, metadata !9}
@@ -53,9 +56,7 @@ entry:
 !17 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
 !18 = metadata !{metadata !19}
 !19 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!20 = metadata !{i32 720942, metadata !6, null, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", i32 2, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.foo*)* @_ZN3foo3barEv, null, metadata !12, metadata !21, i32 2} ; [ DW_TAG_subprogram ] [line 2] [def] [bar]
-!21 = metadata !{metadata !22}
-!22 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
+!20 = metadata !{i32 720942, metadata !6, null, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", i32 2, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.foo*)* @_ZN3foo3barEv, null, metadata !12, null, i32 2} ; [ DW_TAG_subprogram ] [line 2] [def] [bar]
 !23 = metadata !{i32 786689, metadata !5, metadata !"x", metadata !6, i32 16777220, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
 !24 = metadata !{i32 4, i32 15, metadata !5, null}
 !25 = metadata !{i32 4, i32 20, metadata !26, null}
diff --git a/test/DebugInfo/X86/pr12831.ll b/test/DebugInfo/X86/pr12831.ll
index 6dea4a0..117e426 100644
--- a/test/DebugInfo/X86/pr12831.ll
+++ b/test/DebugInfo/X86/pr12831.ll
@@ -79,7 +79,7 @@ entry:
 !llvm.module.flags = !{!162}
 
 !0 = metadata !{i32 786449, metadata !161, i32 4, metadata !"clang version 3.2 ", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !128, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 0}
+!1 = metadata !{}
 !3 = metadata !{metadata !5, metadata !106, metadata !107, metadata !126, metadata !127}
 !5 = metadata !{i32 786478, metadata !6, null, metadata !"writeExpr", metadata !"writeExpr", metadata !"_ZN17BPLFunctionWriter9writeExprEv", i32 19, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.BPLFunctionWriter*)* @_ZN17BPLFunctionWriter9writeExprEv, null, metadata !103, metadata !1, i32 19} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !160} ; [ DW_TAG_file_type ]
diff --git a/test/DebugInfo/X86/sret.ll b/test/DebugInfo/X86/sret.ll
index c43b045..fed4334 100644
--- a/test/DebugInfo/X86/sret.ll
+++ b/test/DebugInfo/X86/sret.ll
@@ -3,8 +3,8 @@
 
 ; Based on the debuginfo-tests/sret.cpp code.
 
-; CHECK: DW_AT_GNU_dwo_id [DW_FORM_data8] (0xc68148e4333befda)
-; CHECK: DW_AT_GNU_dwo_id [DW_FORM_data8] (0xc68148e4333befda)
+; CHECK: DW_AT_GNU_dwo_id [DW_FORM_data8] (0x72aabf538392d298)
+; CHECK: DW_AT_GNU_dwo_id [DW_FORM_data8] (0x72aabf538392d298)
 
 %class.A = type { i32 (...)**, i32 }
 %class.B = type { i8 }
@@ -277,40 +277,33 @@ attributes #7 = { builtin nounwind }
 !11 = metadata !{metadata !12}
 !12 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !13 = metadata !{i32 786445, metadata !1, metadata !"_ZTS1A", metadata !"m_int", i32 13, i64 32, i64 32, i64 64, i32 2, metadata !12} ; [ DW_TAG_member ] [m_int] [line 13, size 32, align 32, offset 64] [protected] [from int]
-!14 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1A", metadata !"A", metadata !"A", metadata !"", i32 4, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !18, i32 4} ; [ DW_TAG_subprogram ] [line 4] [A]
+!14 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1A", metadata !"A", metadata !"A", metadata !"", i32 4, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, null, i32 4} ; [ DW_TAG_subprogram ] [line 4] [A]
 !15 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !16 = metadata !{null, metadata !17, metadata !12}
 !17 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !"_ZTS1A"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1A]
-!18 = metadata !{i32 786468}
-!19 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1A", metadata !"A", metadata !"A", metadata !"", i32 5, metadata !20, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !24, i32 5} ; [ DW_TAG_subprogram ] [line 5] [A]
+!19 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1A", metadata !"A", metadata !"A", metadata !"", i32 5, metadata !20, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, null, i32 5} ; [ DW_TAG_subprogram ] [line 5] [A]
 !20 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !21, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !21 = metadata !{null, metadata !17, metadata !22}
 !22 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !23} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from ]
 !23 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !"_ZTS1A"} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from _ZTS1A]
-!24 = metadata !{i32 786468}
-!25 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1A", metadata !"operator=", metadata !"operator=", metadata !"_ZN1AaSERKS_", i32 7, metadata !26, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !28, i32 7} ; [ DW_TAG_subprogram ] [line 7] [operator=]
+!25 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1A", metadata !"operator=", metadata !"operator=", metadata !"_ZN1AaSERKS_", i32 7, metadata !26, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, null, i32 7} ; [ DW_TAG_subprogram ] [line 7] [operator=]
 !26 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !27, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !27 = metadata !{metadata !22, metadata !17, metadata !22}
-!28 = metadata !{i32 786468}
-!29 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1A", metadata !"~A", metadata !"~A", metadata !"", i32 8, metadata !30, i1 false, i1 false, i32 1, i32 0, metadata !"_ZTS1A", i32 256, i1 false, null, null, i32 0, metadata !32, i32 8} ; [ DW_TAG_subprogram ] [line 8] [~A]
+!29 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1A", metadata !"~A", metadata !"~A", metadata !"", i32 8, metadata !30, i1 false, i1 false, i32 1, i32 0, metadata !"_ZTS1A", i32 256, i1 false, null, null, i32 0, null, i32 8} ; [ DW_TAG_subprogram ] [line 8] [~A]
 !30 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !31, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !31 = metadata !{null, metadata !17}
-!32 = metadata !{i32 786468}
-!33 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1A", metadata !"get_int", metadata !"get_int", metadata !"_ZN1A7get_intEv", i32 10, metadata !34, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !36, i32 10} ; [ DW_TAG_subprogram ] [line 10] [get_int]
+!33 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1A", metadata !"get_int", metadata !"get_int", metadata !"_ZN1A7get_intEv", i32 10, metadata !34, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, null, i32 10} ; [ DW_TAG_subprogram ] [line 10] [get_int]
 !34 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !35, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !35 = metadata !{metadata !12, metadata !17}
-!36 = metadata !{i32 786468}
 !37 = metadata !{i32 786434, metadata !1, null, metadata !"B", i32 38, i64 8, i64 8, i32 0, i32 0, null, metadata !38, i32 0, null, null, metadata !"_ZTS1B"} ; [ DW_TAG_class_type ] [B] [line 38, size 8, align 8, offset 0] [def] [from ]
 !38 = metadata !{metadata !39, metadata !44}
-!39 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1B", metadata !"B", metadata !"B", metadata !"", i32 41, metadata !40, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !43, i32 41} ; [ DW_TAG_subprogram ] [line 41] [B]
+!39 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1B", metadata !"B", metadata !"B", metadata !"", i32 41, metadata !40, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, null, i32 41} ; [ DW_TAG_subprogram ] [line 41] [B]
 !40 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !41, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !41 = metadata !{null, metadata !42}
 !42 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !"_ZTS1B"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1B]
-!43 = metadata !{i32 786468}
-!44 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1B", metadata !"AInstance", metadata !"AInstance", metadata !"_ZN1B9AInstanceEv", i32 43, metadata !45, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !47, i32 43} ; [ DW_TAG_subprogram ] [line 43] [AInstance]
+!44 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1B", metadata !"AInstance", metadata !"AInstance", metadata !"_ZN1B9AInstanceEv", i32 43, metadata !45, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, null, i32 43} ; [ DW_TAG_subprogram ] [line 43] [AInstance]
 !45 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !46, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !46 = metadata !{metadata !4, metadata !42}
-!47 = metadata !{i32 786468}
 !48 = metadata !{metadata !49, metadata !50, metadata !51, metadata !52, metadata !53, metadata !54, metadata !61, metadata !62, metadata !63}
 !49 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1A", metadata !"A", metadata !"A", metadata !"_ZN1AC2Ei", i32 16, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.A*, i32)* @_ZN1AC2Ei, null, metadata !14, metadata !2, i32 18} ; [ DW_TAG_subprogram ] [line 16] [def] [scope 18] [A]
 !50 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1A", metadata !"A", metadata !"A", metadata !"_ZN1AC2ERKS_", i32 21, metadata !20, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.A*, %class.A*)* @_ZN1AC2ERKS_, null, metadata !19, metadata !2, i32 23} ; [ DW_TAG_subprogram ] [line 21] [def] [scope 23] [A]
diff --git a/test/DebugInfo/X86/type_units_with_addresses.ll b/test/DebugInfo/X86/type_units_with_addresses.ll
new file mode 100644
index 0000000..ff278f6
--- /dev/null
+++ b/test/DebugInfo/X86/type_units_with_addresses.ll
@@ -0,0 +1,151 @@
+; REQUIRES: object-emission
+
+; RUN: llc -split-dwarf=Enable -filetype=obj -O0 -generate-type-units -mtriple=x86_64-unknown-linux-gnu < %s \
+; RUN:     | llvm-dwarfdump - | FileCheck %s
+
+; RUN: llc -split-dwarf=Disable -filetype=obj -O0 -generate-type-units -mtriple=x86_64-unknown-linux-gnu < %s \
+; RUN:     | llvm-dwarfdump - | FileCheck --check-prefix=SINGLE %s
+
+; Test case built from:
+;int i;
+;
+;template <int *I>
+;struct S1 {};
+;
+;S1<&i> s1;
+;
+;template <int *I>
+;struct S2_1 {};
+;
+;struct S2 {
+;  S2_1<&i> s2_1;
+;};
+;
+;S2 s2;
+;
+;template <int *I>
+;struct S3_1 {};
+;
+;struct S3_2 {};
+;
+;struct S3 {
+;  S3_1<&i> s3_1;
+;  S3_2 s3_2;
+;};
+;
+;S3 s3;
+;
+;struct S4_1 {};
+;
+;template <int *T>
+;struct S4_2 {};
+;
+;struct S4 {
+;  S4_1 s4_1;
+;  S4_2<&::i> s4_2;
+;};
+;
+;S4 s4;
+
+
+; CHECK: .debug_info.dwo contents:
+
+; CHECK: DW_TAG_structure_type
+; CHECK-NEXT: DW_AT_name {{.*}}"S1<&i>"
+
+; CHECK: DW_TAG_structure_type
+; CHECK-NEXT: DW_AT_name {{.*}}"S2"
+; CHECK: DW_TAG_structure_type
+; CHECK-NEXT: DW_AT_name {{.*}}"S2_1<&i>"
+
+; CHECK: DW_TAG_structure_type
+; CHECK-NEXT: DW_AT_name {{.*}}"S3"
+; CHECK: DW_TAG_structure_type
+; CHECK-NEXT: DW_AT_name {{.*}}"S3_1<&i>"
+; CHECK: DW_TAG_structure_type
+; CHECK-NEXT: DW_AT_declaration
+; CHECK-NEXT: DW_AT_signature
+
+; CHECK: DW_TAG_structure_type
+; CHECK-NEXT: DW_AT_name {{.*}}"S4"
+; CHECK: DW_TAG_structure_type
+; CHECK-NEXT: DW_AT_declaration
+; CHECK-NEXT: DW_AT_signature
+; CHECK: DW_TAG_structure_type
+; CHECK-NEXT: DW_AT_name {{.*}}"S4_2<&i>"
+
+; SINGLE: .debug_info contents:
+
+; SINGLE: DW_TAG_structure_type
+; SINGLE-NEXT: DW_AT_declaration
+; SINGLE-NEXT: DW_AT_signature
+
+; SINGLE: DW_TAG_structure_type
+; SINGLE-NEXT: DW_AT_declaration
+; SINGLE-NEXT: DW_AT_signature
+
+; SINGLE: DW_TAG_structure_type
+; SINGLE-NEXT: DW_AT_declaration
+; SINGLE-NEXT: DW_AT_signature
+
+; SINGLE: DW_TAG_structure_type
+; SINGLE-NEXT: DW_AT_declaration
+; SINGLE-NEXT: DW_AT_signature
+
+%struct.S1 = type { i8 }
+%struct.S2 = type { %struct.S2_1 }
+%struct.S2_1 = type { i8 }
+%struct.S3 = type { %struct.S3_1, %struct.S3_2 }
+%struct.S3_1 = type { i8 }
+%struct.S3_2 = type { i8 }
+%struct.S4 = type { %struct.S4_1, %struct.S4_2 }
+%struct.S4_1 = type { i8 }
+%struct.S4_2 = type { i8 }
+
+@i = global i32 0, align 4
+@a = global %struct.S1 zeroinitializer, align 1
+@s2 = global %struct.S2 zeroinitializer, align 1
+@s3 = global %struct.S3 zeroinitializer, align 1
+@s4 = global %struct.S4 zeroinitializer, align 1
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!34, !35}
+!llvm.ident = !{!36}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !2, metadata !27, metadata !2, metadata !"tu.dwo", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/tu.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"tu.cpp", metadata !"/tmp/dbginfo"}
+!2 = metadata !{}
+!3 = metadata !{metadata !4, metadata !9, metadata !12, metadata !13, metadata !17, metadata !18, metadata !19, metadata !23, metadata !24}
+!4 = metadata !{i32 786451, metadata !1, null, metadata !"S1<&i>", i32 4, i64 8, i64 8, i32 0, i32 0, null, metadata !2, i32 0, null, metadata !5, metadata !"_ZTS2S1IXadL_Z1iEEE"} ; [ DW_TAG_structure_type ] [S1<&i>] [line 4, size 8, align 8, offset 0] [def] [from ]
+!5 = metadata !{metadata !6}
+!6 = metadata !{i32 786480, null, metadata !"I", metadata !7, i32* @i, null, i32 0, i32 0} ; [ DW_TAG_template_value_parameter ]
+!7 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{i32 786451, metadata !1, null, metadata !"S2", i32 11, i64 8, i64 8, i32 0, i32 0, null, metadata !10, i32 0, null, null, metadata !"_ZTS2S2"} ; [ DW_TAG_structure_type ] [S2] [line 11, size 8, align 8, offset 0] [def] [from ]
+!10 = metadata !{metadata !11}
+!11 = metadata !{i32 786445, metadata !1, metadata !"_ZTS2S2", metadata !"s2_1", i32 12, i64 8, i64 8, i64 0, i32 0, metadata !"_ZTS4S2_1IXadL_Z1iEEE"} ; [ DW_TAG_member ] [s2_1] [line 12, size 8, align 8, offset 0] [from _ZTS4S2_1IXadL_Z1iEEE]
+!12 = metadata !{i32 786451, metadata !1, null, metadata !"S2_1<&i>", i32 9, i64 8, i64 8, i32 0, i32 0, null, metadata !2, i32 0, null, metadata !5, metadata !"_ZTS4S2_1IXadL_Z1iEEE"} ; [ DW_TAG_structure_type ] [S2_1<&i>] [line 9, size 8, align 8, offset 0] [def] [from ]
+!13 = metadata !{i32 786451, metadata !1, null, metadata !"S3", i32 22, i64 16, i64 8, i32 0, i32 0, null, metadata !14, i32 0, null, null, metadata !"_ZTS2S3"} ; [ DW_TAG_structure_type ] [S3] [line 22, size 16, align 8, offset 0] [def] [from ]
+!14 = metadata !{metadata !15, metadata !16}
+!15 = metadata !{i32 786445, metadata !1, metadata !"_ZTS2S3", metadata !"s3_1", i32 23, i64 8, i64 8, i64 0, i32 0, metadata !"_ZTS4S3_1IXadL_Z1iEEE"} ; [ DW_TAG_member ] [s3_1] [line 23, size 8, align 8, offset 0] [from _ZTS4S3_1IXadL_Z1iEEE]
+!16 = metadata !{i32 786445, metadata !1, metadata !"_ZTS2S3", metadata !"s3_2", i32 24, i64 8, i64 8, i64 8, i32 0, metadata !"_ZTS4S3_2"} ; [ DW_TAG_member ] [s3_2] [line 24, size 8, align 8, offset 8] [from _ZTS4S3_2]
+!17 = metadata !{i32 786451, metadata !1, null, metadata !"S3_1<&i>", i32 18, i64 8, i64 8, i32 0, i32 0, null, metadata !2, i32 0, null, metadata !5, metadata !"_ZTS4S3_1IXadL_Z1iEEE"} ; [ DW_TAG_structure_type ] [S3_1<&i>] [line 18, size 8, align 8, offset 0] [def] [from ]
+!18 = metadata !{i32 786451, metadata !1, null, metadata !"S3_2", i32 20, i64 8, i64 8, i32 0, i32 0, null, metadata !2, i32 0, null, null, metadata !"_ZTS4S3_2"} ; [ DW_TAG_structure_type ] [S3_2] [line 20, size 8, align 8, offset 0] [def] [from ]
+!19 = metadata !{i32 786451, metadata !1, null, metadata !"S4", i32 34, i64 16, i64 8, i32 0, i32 0, null, metadata !20, i32 0, null, null, metadata !"_ZTS2S4"} ; [ DW_TAG_structure_type ] [S4] [line 34, size 16, align 8, offset 0] [def] [from ]
+!20 = metadata !{metadata !21, metadata !22}
+!21 = metadata !{i32 786445, metadata !1, metadata !"_ZTS2S4", metadata !"s4_1", i32 35, i64 8, i64 8, i64 0, i32 0, metadata !"_ZTS4S4_1"} ; [ DW_TAG_member ] [s4_1] [line 35, size 8, align 8, offset 0] [from _ZTS4S4_1]
+!22 = metadata !{i32 786445, metadata !1, metadata !"_ZTS2S4", metadata !"s4_2", i32 36, i64 8, i64 8, i64 8, i32 0, metadata !"_ZTS4S4_2IXadL_Z1iEEE"} ; [ DW_TAG_member ] [s4_2] [line 36, size 8, align 8, offset 8] [from _ZTS4S4_2IXadL_Z1iEEE]
+!23 = metadata !{i32 786451, metadata !1, null, metadata !"S4_1", i32 29, i64 8, i64 8, i32 0, i32 0, null, metadata !2, i32 0, null, null, metadata !"_ZTS4S4_1"} ; [ DW_TAG_structure_type ] [S4_1] [line 29, size 8, align 8, offset 0] [def] [from ]
+!24 = metadata !{i32 786451, metadata !1, null, metadata !"S4_2<&i>", i32 32, i64 8, i64 8, i32 0, i32 0, null, metadata !2, i32 0, null, metadata !25, metadata !"_ZTS4S4_2IXadL_Z1iEEE"} ; [ DW_TAG_structure_type ] [S4_2<&i>] [line 32, size 8, align 8, offset 0] [def] [from ]
+!25 = metadata !{metadata !26}
+!26 = metadata !{i32 786480, null, metadata !"T", metadata !7, i32* @i, null, i32 0, i32 0} ; [ DW_TAG_template_value_parameter ]
+!27 = metadata !{metadata !28, metadata !30, metadata !31, metadata !32, metadata !33}
+!28 = metadata !{i32 786484, i32 0, null, metadata !"i", metadata !"i", metadata !"", metadata !29, i32 1, metadata !8, i32 0, i32 1, i32* @i, null} ; [ DW_TAG_variable ] [i] [line 1] [def]
+!29 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/tmp/dbginfo/tu.cpp]
+!30 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !29, i32 6, metadata !"_ZTS2S1IXadL_Z1iEEE", i32 0, i32 1, %struct.S1* @a, null} ; [ DW_TAG_variable ] [a] [line 6] [def]
+!31 = metadata !{i32 786484, i32 0, null, metadata !"s2", metadata !"s2", metadata !"", metadata !29, i32 15, metadata !"_ZTS2S2", i32 0, i32 1, %struct.S2* @s2, null} ; [ DW_TAG_variable ] [s2] [line 15] [def]
+!32 = metadata !{i32 786484, i32 0, null, metadata !"s3", metadata !"s3", metadata !"", metadata !29, i32 27, metadata !"_ZTS2S3", i32 0, i32 1, %struct.S3* @s3, null} ; [ DW_TAG_variable ] [s3] [line 27] [def]
+!33 = metadata !{i32 786484, i32 0, null, metadata !"s4", metadata !"s4", metadata !"", metadata !29, i32 39, metadata !"_ZTS2S4", i32 0, i32 1, %struct.S4* @s4, null} ; [ DW_TAG_variable ] [s4] [line 39] [def]
+!34 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!35 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!36 = metadata !{metadata !"clang version 3.5.0 "}
diff --git a/test/DebugInfo/constant-pointers.ll b/test/DebugInfo/constant-pointers.ll
new file mode 100644
index 0000000..fdde06d
--- /dev/null
+++ b/test/DebugInfo/constant-pointers.ll
@@ -0,0 +1,51 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf -O0 -filetype=obj %s -o - | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+; Ensure that pointer constants are emitted as unsigned data. Alternatively,
+; these could be signless data (dataN).
+
+; Built with Clang from:
+; template <void *V, void (*F)(), int i>
+; void func() {}
+; template void func<nullptr, nullptr, 42>();
+
+; CHECK: DW_TAG_subprogram
+; CHECK:   DW_TAG_template_value_parameter
+; CHECK:     DW_AT_name {{.*}} "V"
+; CHECK:     DW_AT_const_value [DW_FORM_udata] (0)
+; CHECK:   DW_TAG_template_value_parameter
+; CHECK:     DW_AT_name {{.*}} "F"
+; CHECK:     DW_AT_const_value [DW_FORM_udata] (0)
+
+; Function Attrs: nounwind uwtable
+define weak_odr void @_Z4funcILPv0ELPFvvE0ELi42EEvv() #0 {
+entry:
+  ret void, !dbg !18
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!15, !16}
+!llvm.ident = !{!17}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/constant-pointers.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"constant-pointers.cpp", metadata !"/tmp/dbginfo"}
+!2 = metadata !{}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"func<nullptr, nullptr, 42>", metadata !"func<nullptr, nullptr, 42>", metadata !"_Z4funcILPv0ELPFvvE0ELi42EEvv", i32 2, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z4funcILPv0ELPFvvE0ELi42EEvv, metadata !8, null, metadata !2, i32 2} ; [ DW_TAG_subprogram ] [line 2] [def] [func<nullptr, nullptr, 42>]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/dbginfo/constant-pointers.cpp]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{null}
+!8 = metadata !{metadata !9, metadata !11, metadata !13}
+!9 = metadata !{i32 786480, null, metadata !"V", metadata !10, i8 0, null, i32 0, i32 0} ; [ DW_TAG_template_value_parameter ]
+!10 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!11 = metadata !{i32 786480, null, metadata !"F", metadata !12, i8 0, null, i32 0, i32 0} ; [ DW_TAG_template_value_parameter ]
+!12 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!13 = metadata !{i32 786480, null, metadata !"i", metadata !14, i32 42, null, i32 0, i32 0} ; [ DW_TAG_template_value_parameter ]
+!14 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!15 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!16 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
+!17 = metadata !{metadata !"clang version 3.5.0 "}
+!18 = metadata !{i32 3, i32 0, metadata !4, null}
diff --git a/test/DebugInfo/cross-cu-inlining.ll b/test/DebugInfo/cross-cu-inlining.ll
new file mode 100644
index 0000000..266a24d
--- /dev/null
+++ b/test/DebugInfo/cross-cu-inlining.ll
@@ -0,0 +1,137 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+; Build from source:
+; $ clang++ a.cpp b.cpp -g -c -emit-llvm
+; $ llvm-link a.bc b.bc -o ab.bc
+; $ opt -inline ab.bc -o ab-opt.bc
+; $ cat a.cpp
+; extern int i;
+; int func(int);
+; int main() {
+;   return func(i);
+; }
+; $ cat b.cpp
+; int __attribute__((always_inline)) func(int x) {
+;   return x * 2;
+; }
+
+; Ensure that func inlined into main is described and references the abstract
+; definition in b.cpp's CU.
+
+; CHECK: DW_TAG_compile_unit
+; CHECK:   DW_AT_name {{.*}} "a.cpp"
+; CHECK:   DW_TAG_subprogram
+; CHECK:     DW_AT_type [DW_FORM_ref_addr] (0x00000000[[INT:.*]])
+; CHECK:     DW_TAG_inlined_subroutine
+; CHECK-NOT: DW_TAG
+; CHECK:       DW_AT_abstract_origin {{.*}}[[ABS_FUNC:........]])
+; CHECK:       DW_TAG_formal_parameter
+; CHECK-NOT: DW_TAG
+; CHECK:         DW_AT_abstract_origin {{.*}}[[ABS_VAR:........]])
+
+; Check the abstract definition is in the 'b.cpp' CU and doesn't contain any
+; concrete information (address range or variable location)
+; CHECK: DW_TAG_compile_unit
+; CHECK:   DW_AT_name {{.*}} "b.cpp"
+; CHECK: 0x[[ABS_FUNC]]: DW_TAG_subprogram
+; CHECK-NOT: DW_AT_low_pc
+; CHECK: 0x[[ABS_VAR]]: DW_TAG_formal_parameter
+; CHECK-NOT: DW_TAG
+; CHECK-NOT: DW_AT_location
+; CHECK: DW_AT_type [DW_FORM_ref4] {{.*}} {0x[[INT]]}
+; CHECK-NOT: DW_AT_location
+
+; CHECK: 0x[[INT]]: DW_TAG_base_type
+; CHECK-NOT: DW_TAG
+; CHECK:   DW_AT_name {{.*}} "int"
+
+; Check the concrete out of line definition references the abstract and
+; provides the address range and variable location
+; CHECK: DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK:   DW_AT_low_pc
+; CHECK-NOT: DW_TAG
+; CHECK:   DW_AT_abstract_origin {{.*}} {0x[[ABS_FUNC]]}
+; CHECK:   DW_TAG_formal_parameter
+; CHECK-NOT: DW_TAG
+; CHECK:     DW_AT_abstract_origin {{.*}} {0x[[ABS_VAR]]}
+; CHECK:     DW_AT_location
+
+
+@i = external global i32
+
+; Function Attrs: uwtable
+define i32 @main() #0 {
+entry:
+  %x.addr.i = alloca i32, align 4
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  %0 = load i32* @i, align 4, !dbg !19
+  %1 = bitcast i32* %x.addr.i to i8*
+  call void @llvm.lifetime.start(i64 4, i8* %1)
+  store i32 %0, i32* %x.addr.i, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %x.addr.i}, metadata !20), !dbg !21
+  %2 = load i32* %x.addr.i, align 4, !dbg !22
+  %mul.i = mul nsw i32 %2, 2, !dbg !22
+  %3 = bitcast i32* %x.addr.i to i8*, !dbg !22
+  call void @llvm.lifetime.end(i64 4, i8* %3), !dbg !22
+  ret i32 %mul.i, !dbg !19
+}
+
+; Function Attrs: alwaysinline nounwind uwtable
+define i32 @_Z4funci(i32 %x) #1 {
+entry:
+  %x.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %x.addr}, metadata !20), !dbg !23
+  %0 = load i32* %x.addr, align 4, !dbg !24
+  %mul = mul nsw i32 %0, 2, !dbg !24
+  ret i32 %mul, !dbg !24
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #2
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture) #3
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) #3
+
+attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { alwaysinline nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind }
+
+!llvm.dbg.cu = !{!0, !9}
+!llvm.module.flags = !{!16, !17}
+!llvm.ident = !{!18, !18}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/a.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"a.cpp", metadata !"/tmp/dbginfo"}
+!2 = metadata !{}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 3, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !2, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [main]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/dbginfo/a.cpp]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{metadata !8}
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{i32 786449, metadata !10, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !11, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/b.cpp] [DW_LANG_C_plus_plus]
+!10 = metadata !{metadata !"b.cpp", metadata !"/tmp/dbginfo"}
+!11 = metadata !{metadata !12}
+!12 = metadata !{i32 786478, metadata !10, metadata !13, metadata !"func", metadata !"func", metadata !"_Z4funci", i32 1, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @_Z4funci, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [func]
+!13 = metadata !{i32 786473, metadata !10}        ; [ DW_TAG_file_type ] [/tmp/dbginfo/b.cpp]
+!14 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!15 = metadata !{metadata !8, metadata !8}
+!16 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!17 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
+!18 = metadata !{metadata !"clang version 3.5.0 "}
+!19 = metadata !{i32 4, i32 0, metadata !4, null}
+!20 = metadata !{i32 786689, metadata !12, metadata !"x", metadata !13, i32 16777217, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [x] [line 1]
+!21 = metadata !{i32 1, i32 0, metadata !12, metadata !19}
+!22 = metadata !{i32 2, i32 0, metadata !12, metadata !19}
+!23 = metadata !{i32 1, i32 0, metadata !12, null}
+!24 = metadata !{i32 2, i32 0, metadata !12, null}
+
diff --git a/test/DebugInfo/cross-cu-linkonce.ll b/test/DebugInfo/cross-cu-linkonce.ll
new file mode 100644
index 0000000..16a5012
--- /dev/null
+++ b/test/DebugInfo/cross-cu-linkonce.ll
@@ -0,0 +1,74 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+; Built from source:
+; $ clang++ a.cpp b.cpp -g -c -emit-llvm
+; $ llvm-link a.bc b.bc -o ab.bc
+; $ opt -inline ab.bc -o ab-opt.bc
+; $ cat a.cpp
+; # 1 "func.h"
+; inline int func(int i) {
+;   return i * 2;
+; }
+; int (*x)(int) = &func;
+; $ cat b.cpp
+; # 1 "func.h"
+; inline int func(int i) {
+;   return i * 2;
+; }
+; int (*y)(int) = &func;
+
+; CHECK: DW_TAG_compile_unit
+; CHECK:   DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK:     DW_AT_name {{.*}} "func"
+; CHECK: DW_TAG_compile_unit
+; CHECK-NOT: DW_TAG_subprogram
+
+@x = global i32 (i32)* @_Z4funci, align 8
+@y = global i32 (i32)* @_Z4funci, align 8
+
+; Function Attrs: inlinehint nounwind uwtable
+define linkonce_odr i32 @_Z4funci(i32 %i) #0 {
+  %1 = alloca i32, align 4
+  store i32 %i, i32* %1, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %1}, metadata !20), !dbg !21
+  %2 = load i32* %1, align 4, !dbg !22
+  %3 = mul nsw i32 %2, 2, !dbg !22
+  ret i32 %3, !dbg !22
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+attributes #0 = { inlinehint nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0, !13}
+!llvm.module.flags = !{!17, !18}
+!llvm.ident = !{!19, !19}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !10, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/a.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"a.cpp", metadata !"/tmp/dbginfo"}
+!2 = metadata !{}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"func", metadata !"func", metadata !"_Z4funci", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @_Z4funci, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [func]
+!5 = metadata !{metadata !"func.h", metadata !"/tmp/dbginfo"}
+!6 = metadata !{i32 786473, metadata !5}          ; [ DW_TAG_file_type ] [/tmp/dbginfo/func.h]
+!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{metadata !9, metadata !9}
+!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = metadata !{metadata !11}
+!11 = metadata !{i32 786484, i32 0, null, metadata !"x", metadata !"x", metadata !"", metadata !6, i32 4, metadata !12, i32 0, i32 1, i32 (i32)** @x, null} ; [ DW_TAG_variable ] [x] [line 4] [def]
+!12 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !7} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!13 = metadata !{i32 786449, metadata !14, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !15, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/b.cpp] [DW_LANG_C_plus_plus]
+!14 = metadata !{metadata !"b.cpp", metadata !"/tmp/dbginfo"}
+!15 = metadata !{metadata !16}
+!16 = metadata !{i32 786484, i32 0, null, metadata !"y", metadata !"y", metadata !"", metadata !6, i32 4, metadata !12, i32 0, i32 1, i32 (i32)** @y, null} ; [ DW_TAG_variable ] [y] [line 4] [def]
+!17 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!18 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!19 = metadata !{metadata !"clang version 3.5.0 "}
+!20 = metadata !{i32 786689, metadata !4, metadata !"i", metadata !6, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [i] [line 1]
+!21 = metadata !{i32 1, i32 0, metadata !4, null}
+!22 = metadata !{i32 2, i32 0, metadata !4, null}
diff --git a/test/DebugInfo/cu-line-tables.ll b/test/DebugInfo/cu-line-tables.ll
index d404a66..2496f3f 100644
--- a/test/DebugInfo/cu-line-tables.ll
+++ b/test/DebugInfo/cu-line-tables.ll
@@ -23,7 +23,6 @@ define i32 @f(i32 %a) #0 {
 entry:
   %a.addr = alloca i32, align 4
   store i32 %a, i32* %a.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %a.addr}, metadata !12), !dbg !13
   %0 = load i32* %a.addr, align 4, !dbg !14
   %add = add nsw i32 %0, 4, !dbg !14
   ret i32 %add, !dbg !14
@@ -45,12 +44,8 @@ attributes #1 = { nounwind readnone }
 !3 = metadata !{metadata !4}
 !4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"f", metadata !"f", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @f, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [f]
 !5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/tmp/foo.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{metadata !8, metadata !8}
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !9 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
 !10 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
 !11 = metadata !{metadata !"clang version 3.5 (trunk 197756) (llvm/trunk 197768)"}
-!12 = metadata !{i32 786689, metadata !4, metadata !"a", metadata !5, i32 16777217, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 1]
-!13 = metadata !{i32 1, i32 0, metadata !4, null}
 !14 = metadata !{i32 2, i32 0, metadata !4, null}
diff --git a/test/DebugInfo/debug-info-qualifiers.ll b/test/DebugInfo/debug-info-qualifiers.ll
index 2aea736..b624d38 100644
--- a/test/DebugInfo/debug-info-qualifiers.ll
+++ b/test/DebugInfo/debug-info-qualifiers.ll
@@ -21,8 +21,6 @@
 ; CHECK-NEXT: DW_AT_rvalue_reference DW_FORM_flag_present
 ;
 ; CHECK: DW_TAG_subprogram
-;
-; CHECK: DW_TAG_subprogram
 ; CHECK-NOT: DW_TAG_subprogram
 ; CHECK:   DW_AT_name {{.*}}"l"
 ; CHECK-NOT: DW_TAG_subprogram
diff --git a/test/DebugInfo/dwarfdump-inlining.test b/test/DebugInfo/dwarfdump-inlining.test
deleted file mode 100644
index e926634..0000000
--- a/test/DebugInfo/dwarfdump-inlining.test
+++ /dev/null
@@ -1,28 +0,0 @@
-RUN: llvm-dwarfdump %p/Inputs/dwarfdump-inl-test.elf-x86-64 --address=0x710 \
-RUN:   --inlining --functions | FileCheck %s -check-prefix DEEP_STACK
-RUN: llvm-dwarfdump %p/Inputs/dwarfdump-inl-test.elf-x86-64 --address=0x7d1 \
-RUN:   --inlining | FileCheck %s -check-prefix SHORTER_STACK
-RUN: llvm-dwarfdump %p/Inputs/dwarfdump-inl-test.elf-x86-64 --address=0x785 \
-RUN:   --inlining | FileCheck %s -check-prefix SHORT_STACK
-RUN: llvm-dwarfdump %p/Inputs/dwarfdump-inl-test.elf-x86-64 --address=0x737 \
-RUN:   --functions | FileCheck %s -check-prefix INL_FUNC_NAME
-
-DEEP_STACK:      inlined_h
-DEEP_STACK-NEXT: dwarfdump-inl-test.h:2
-DEEP_STACK-NEXT: inlined_g
-DEEP_STACK-NEXT: dwarfdump-inl-test.h:7
-DEEP_STACK-NEXT: inlined_f
-DEEP_STACK-NEXT: dwarfdump-inl-test.cc:3
-DEEP_STACK-NEXT: main
-DEEP_STACK-NEXT: dwarfdump-inl-test.cc:8
-
-SHORTER_STACK:      dwarfdump-inl-test.h:7
-SHORTER_STACK-NEXT: dwarfdump-inl-test.cc:3
-SHORTER_STACK-NEXT: dwarfdump-inl-test.cc:8
-
-SHORT_STACK:      dwarfdump-inl-test.cc:3
-SHORT_STACK-NEXT: dwarfdump-inl-test.cc:8
-
-INL_FUNC_NAME:      inlined_g
-INL_FUNC_NAME-NEXT: dwarfdump-inl-test.h:7
-
diff --git a/test/DebugInfo/dwarfdump-ranges.test b/test/DebugInfo/dwarfdump-ranges.test
new file mode 100644
index 0000000..c9e33dc
--- /dev/null
+++ b/test/DebugInfo/dwarfdump-ranges.test
@@ -0,0 +1,10 @@
+RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test4.elf-x86-64 | FileCheck %s
+
+CHECK:      .debug_ranges contents:
+CHECK-NEXT: 00000000 000000000000062c 0000000000000637
+CHECK-NEXT: 00000000 0000000000000637 000000000000063d
+CHECK-NEXT: 00000000 <End of list>
+CHECK-NEXT: 00000030 0000000000000640 000000000000064b
+CHECK-NEXT: 00000030 0000000000000637 000000000000063d
+CHECK-NEXT: 00000030 <End of list>
+
diff --git a/test/DebugInfo/dwarfdump-test.test b/test/DebugInfo/dwarfdump-test.test
deleted file mode 100644
index 058d6a3..0000000
--- a/test/DebugInfo/dwarfdump-test.test
+++ /dev/null
@@ -1,56 +0,0 @@
-RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test.elf-x86-64  \
-RUN:   --address=0x400559 --functions | FileCheck %s -check-prefix MAIN
-RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test.elf-x86-64  \
-RUN:   --address=0x400528 --functions | FileCheck %s -check-prefix FUNCTION
-RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test.elf-x86-64 \
-RUN:   --address=0x400586 --functions | FileCheck %s -check-prefix CTOR_WITH_SPEC
-RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test2.elf-x86-64 \
-RUN:   --address=0x4004e8 --functions | FileCheck %s -check-prefix MANY_CU_1
-RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test2.elf-x86-64 \
-RUN:   --address=0x4004f4 --functions | FileCheck %s -check-prefix MANY_CU_2
-RUN: llvm-dwarfdump "%p/Inputs/dwarfdump-test3.elf-x86-64 space" \
-RUN:   --address=0x640 --functions | FileCheck %s -check-prefix ABS_ORIGIN_1
-RUN: llvm-dwarfdump "%p/Inputs/dwarfdump-test3.elf-x86-64 space" \
-RUN:   --address=0x633 --functions | FileCheck %s -check-prefix INCLUDE_TEST_1
-RUN: llvm-dwarfdump "%p/Inputs/dwarfdump-test3.elf-x86-64 space" \
-RUN:   --address=0x62d --functions | FileCheck %s -check-prefix INCLUDE_TEST_2
-RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test4.elf-x86-64 \
-RUN:   --address=0x62c --functions \
-RUN:   | FileCheck %s -check-prefix MANY_SEQ_IN_LINE_TABLE
-RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test4.elf-x86-64 \
-RUN:   | FileCheck %s -check-prefix DEBUG_RANGES
-
-MAIN: main
-MAIN-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test.cc:16
-
-FUNCTION: _Z1fii
-FUNCTION-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test.cc:11
-
-CTOR_WITH_SPEC: DummyClass
-CTOR_WITH_SPEC-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test.cc:4
-
-MANY_CU_1: a
-MANY_CU_1-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test2-helper.cc:2
-
-MANY_CU_2: main
-MANY_CU_2-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test2-main.cc:4
-
-ABS_ORIGIN_1: C
-ABS_ORIGIN_1-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test3.cc:3
-
-INCLUDE_TEST_1: _Z3do1v
-INCLUDE_TEST_1-NEXT: /tmp/include{{[/\\]}}dwarfdump-test3-decl.h:7
-
-INCLUDE_TEST_2: _Z3do2v
-INCLUDE_TEST_2-NEXT: /tmp/dbginfo{{[/\\]}}include{{[/\\]}}dwarfdump-test3-decl2.h:1
-
-MANY_SEQ_IN_LINE_TABLE: _Z1cv
-MANY_SEQ_IN_LINE_TABLE-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test4-part1.cc:2
-
-DEBUG_RANGES:      .debug_ranges contents:
-DEBUG_RANGES-NEXT: 00000000 000000000000062c 0000000000000637
-DEBUG_RANGES-NEXT: 00000000 0000000000000637 000000000000063d
-DEBUG_RANGES-NEXT: 00000000 <End of list>
-DEBUG_RANGES-NEXT: 00000030 0000000000000640 000000000000064b
-DEBUG_RANGES-NEXT: 00000030 0000000000000637 000000000000063d
-DEBUG_RANGES-NEXT: 00000030 <End of list>
diff --git a/test/DebugInfo/dwarfdump-zlib.test b/test/DebugInfo/dwarfdump-zlib.test
index 8ce2cf7..cbd85ca 100644
--- a/test/DebugInfo/dwarfdump-zlib.test
+++ b/test/DebugInfo/dwarfdump-zlib.test
@@ -1,12 +1,6 @@
 REQUIRES: zlib
 
-RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test-zlib.elf-x86-64  \
-RUN:   | FileCheck %s -check-prefix FULLDUMP
-RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test-zlib.elf-x86-64  \
-RUN:   --address=0x400559 --functions | FileCheck %s -check-prefix MAIN
+RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test-zlib.elf-x86-64 | FileCheck %s
 
-FULLDUMP: .debug_abbrev contents
-FULLDUMP: .debug_info contents
-
-MAIN: main
-MAIN-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test-zlib.cc:16
+CHECK: .debug_abbrev contents
+CHECK: .debug_info contents
diff --git a/test/DebugInfo/inline-scopes.ll b/test/DebugInfo/inline-scopes.ll
new file mode 100644
index 0000000..36c0735
--- /dev/null
+++ b/test/DebugInfo/inline-scopes.ll
@@ -0,0 +1,130 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+; bool f();
+; inline __attribute__((always_inline)) int f1() {
+;   if (bool b = f())
+;     return 1;
+;   return 2;
+; }
+;
+; inline __attribute__((always_inline)) int f2() {
+; # 2 "y.cc"
+;   if (bool b = f())
+;     return 3;
+;   return 4;
+; }
+;
+; int main() {
+;   f1();
+;   f2();
+; }
+
+; Ensure that lexical_blocks within inlined_subroutines are preserved/emitted.
+; CHECK: DW_TAG_inlined_subroutine
+; CHECK-NOT: DW_TAG
+; CHECK-NOT: NULL
+; CHECK: DW_TAG_lexical_block
+; CHECK-NOT: DW_TAG
+; CHECK-NOT: NULL
+; CHECK: DW_TAG_variable
+; Ensure that file changes don't interfere with creating inlined subroutines.
+; (see the line directive inside 'f2' in thesource)
+; CHECK: DW_TAG_inlined_subroutine
+; CHECK:   DW_TAG_variable
+; CHECK-NOT: DW_TAG
+; CHECK:     DW_AT_abstract_origin
+
+; Function Attrs: uwtable
+define i32 @main() #0 {
+entry:
+  %retval.i2 = alloca i32, align 4
+  %b.i3 = alloca i8, align 1
+  %retval.i = alloca i32, align 4
+  %b.i = alloca i8, align 1
+  call void @llvm.dbg.declare(metadata !{i8* %b.i}, metadata !16), !dbg !19
+  %call.i = call zeroext i1 @_Z1fv(), !dbg !19
+  %frombool.i = zext i1 %call.i to i8, !dbg !19
+  store i8 %frombool.i, i8* %b.i, align 1, !dbg !19
+  %0 = load i8* %b.i, align 1, !dbg !19
+  %tobool.i = trunc i8 %0 to i1, !dbg !19
+  br i1 %tobool.i, label %if.then.i, label %if.end.i, !dbg !19
+
+if.then.i:                                        ; preds = %entry
+  store i32 1, i32* %retval.i, !dbg !21
+  br label %_Z2f1v.exit, !dbg !21
+
+if.end.i:                                         ; preds = %entry
+  store i32 2, i32* %retval.i, !dbg !22
+  br label %_Z2f1v.exit, !dbg !22
+
+_Z2f1v.exit:                                      ; preds = %if.then.i, %if.end.i
+  %1 = load i32* %retval.i, !dbg !23
+  call void @llvm.dbg.declare(metadata !{i8* %b.i3}, metadata !24), !dbg !27
+  %call.i4 = call zeroext i1 @_Z1fv(), !dbg !27
+  %frombool.i5 = zext i1 %call.i4 to i8, !dbg !27
+  store i8 %frombool.i5, i8* %b.i3, align 1, !dbg !27
+  %2 = load i8* %b.i3, align 1, !dbg !27
+  %tobool.i6 = trunc i8 %2 to i1, !dbg !27
+  br i1 %tobool.i6, label %if.then.i7, label %if.end.i8, !dbg !27
+
+if.then.i7:                                       ; preds = %_Z2f1v.exit
+  store i32 3, i32* %retval.i2, !dbg !29
+  br label %_Z2f2v.exit, !dbg !29
+
+if.end.i8:                                        ; preds = %_Z2f1v.exit
+  store i32 4, i32* %retval.i2, !dbg !30
+  br label %_Z2f2v.exit, !dbg !30
+
+_Z2f2v.exit:                                      ; preds = %if.then.i7, %if.end.i8
+  %3 = load i32* %retval.i2, !dbg !31
+  ret i32 0, !dbg !32
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+declare zeroext i1 @_Z1fv() #2
+
+attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!13, !14}
+!llvm.ident = !{!15}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/inline-scopes.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"inline-scopes.cpp", metadata !"/tmp/dbginfo"}
+!2 = metadata !{}
+!3 = metadata !{metadata !4, metadata !10, metadata !12}
+!4 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 7, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !2, i32 7} ; [ DW_TAG_subprogram ] [line 7] [def] [main]
+!5 = metadata !{metadata !"y.cc", metadata !"/tmp/dbginfo"}
+!6 = metadata !{i32 786473, metadata !5}          ; [ DW_TAG_file_type ] [/tmp/dbginfo/y.cc]
+!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = metadata !{i32 786478, metadata !1, metadata !11, metadata !"f2", metadata !"f2", metadata !"_Z2f2v", i32 8, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !2, i32 8} ; [ DW_TAG_subprogram ] [line 8] [def] [f2]
+!11 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/tmp/dbginfo/inline-scopes.cpp]
+!12 = metadata !{i32 786478, metadata !1, metadata !11, metadata !"f1", metadata !"f1", metadata !"_Z2f1v", i32 2, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !2, i32 2} ; [ DW_TAG_subprogram ] [line 2] [def] [f1]
+!13 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!14 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!15 = metadata !{metadata !"clang version 3.5.0 "}
+!16 = metadata !{i32 786688, metadata !17, metadata !"b", metadata !11, i32 3, metadata !18, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [b] [line 3]
+!17 = metadata !{i32 786443, metadata !1, metadata !12, i32 3, i32 0, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/inline-scopes.cpp]
+!18 = metadata !{i32 786468, null, null, metadata !"bool", i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ] [bool] [line 0, size 8, align 8, offset 0, enc DW_ATE_boolean]
+!19 = metadata !{i32 3, i32 0, metadata !17, metadata !20}
+!20 = metadata !{i32 8, i32 0, metadata !4, null} ; [ DW_TAG_imported_declaration ]
+!21 = metadata !{i32 4, i32 0, metadata !17, metadata !20}
+!22 = metadata !{i32 5, i32 0, metadata !12, metadata !20}
+!23 = metadata !{i32 6, i32 0, metadata !12, metadata !20}
+!24 = metadata !{i32 786688, metadata !25, metadata !"b", metadata !6, i32 2, metadata !18, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [b] [line 2]
+!25 = metadata !{i32 786443, metadata !5, metadata !26, i32 2, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/y.cc]
+!26 = metadata !{i32 786443, metadata !5, metadata !10} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/y.cc]
+!27 = metadata !{i32 2, i32 0, metadata !25, metadata !28}
+!28 = metadata !{i32 9, i32 0, metadata !4, null}
+!29 = metadata !{i32 3, i32 0, metadata !25, metadata !28}
+!30 = metadata !{i32 4, i32 0, metadata !26, metadata !28}
+!31 = metadata !{i32 5, i32 0, metadata !26, metadata !28}
+!32 = metadata !{i32 10, i32 0, metadata !4, null}
diff --git a/test/DebugInfo/inlined-vars.ll b/test/DebugInfo/inlined-vars.ll
index e9c439a..9cfde1f 100644
--- a/test/DebugInfo/inlined-vars.ll
+++ b/test/DebugInfo/inlined-vars.ll
@@ -30,8 +30,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !10 = metadata !{i32 786478, metadata !26, metadata !6, metadata !"f", metadata !"f", metadata !"_ZL1fi", i32 3, metadata !11, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !13, i32 3} ; [ DW_TAG_subprogram ]
 !11 = metadata !{i32 786453, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !12 = metadata !{metadata !9, metadata !9}
-!13 = metadata !{metadata !14}
-!14 = metadata !{metadata !15, metadata !16}
+!13 = metadata !{metadata !15, metadata !16}
 !15 = metadata !{i32 786689, metadata !10, metadata !"argument", metadata !6, i32 16777219, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
 
 ; Two DW_TAG_formal_parameter: one abstract and one inlined.
diff --git a/test/DebugInfo/llvm-symbolizer-zlib.test b/test/DebugInfo/llvm-symbolizer-zlib.test
new file mode 100644
index 0000000..0aae7e6
--- /dev/null
+++ b/test/DebugInfo/llvm-symbolizer-zlib.test
@@ -0,0 +1,7 @@
+REQUIRES: zlib
+
+RUN: echo "%p/Inputs/dwarfdump-test-zlib.elf-x86-64 0x400559" > %t.input
+RUN: llvm-symbolizer < %t.input | FileCheck %s
+
+CHECK: main
+CHECK-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test-zlib.cc:16
diff --git a/test/DebugInfo/llvm-symbolizer.test b/test/DebugInfo/llvm-symbolizer.test
index 4b532f3..6aa1287 100644
--- a/test/DebugInfo/llvm-symbolizer.test
+++ b/test/DebugInfo/llvm-symbolizer.test
@@ -1,15 +1,24 @@
 RUN: echo "%p/Inputs/dwarfdump-test.elf-x86-64 0x400559" > %t.input
 RUN: echo "%p/Inputs/dwarfdump-test.elf-x86-64.debuglink 0x400559" >> %t.input
 RUN: echo "%p/Inputs/dwarfdump-test.elf-x86-64 0x400436" >> %t.input
+RUN: echo "%p/Inputs/dwarfdump-test.elf-x86-64 0x400528" >> %t.input
+RUN: echo "%p/Inputs/dwarfdump-test.elf-x86-64 0x400586" >> %t.input
+RUN: echo "%p/Inputs/dwarfdump-test2.elf-x86-64 0x4004e8" >> %t.input
+RUN: echo "%p/Inputs/dwarfdump-test2.elf-x86-64 0x4004f4" >> %t.input
 RUN: echo "%p/Inputs/dwarfdump-test4.elf-x86-64 0x62c" >> %t.input
 RUN: echo "%p/Inputs/dwarfdump-inl-test.elf-x86-64 0x710" >> %t.input
+RUN: echo "%p/Inputs/dwarfdump-inl-test.elf-x86-64 0x7d1" >> %t.input
+RUN: echo "%p/Inputs/dwarfdump-inl-test.elf-x86-64 0x785" >> %t.input
 RUN: echo "%p/Inputs/dwarfdump-inl-test.high_pc.elf-x86-64 0x568" >> %t.input
+RUN: echo "\"%p/Inputs/dwarfdump-test3.elf-x86-64 space\" 0x640" >> %t.input
 RUN: echo "\"%p/Inputs/dwarfdump-test3.elf-x86-64 space\" 0x633" >> %t.input
+RUN: echo "\"%p/Inputs/dwarfdump-test3.elf-x86-64 space\" 0x62d" >> %t.input
 RUN: echo "%p/Inputs/macho-universal 0x1f84" >> %t.input
 RUN: echo "%p/Inputs/macho-universal:i386 0x1f67" >> %t.input
 RUN: echo "%p/Inputs/macho-universal:x86_64 0x100000f05" >> %t.input
+RUN: echo "%p/Inputs/llvm-symbolizer-dwo-test 0x400514" >> %t.input
 
-RUN: llvm-symbolizer --functions --inlining --demangle=false \
+RUN: llvm-symbolizer --functions=linkage --inlining --demangle=false \
 RUN:    --default-arch=i386 < %t.input | FileCheck %s
 
 CHECK:       main
@@ -20,6 +29,18 @@ CHECK-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test.cc:16
 
 CHECK:      _start
 
+CHECK: _Z1fii
+CHECK-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test.cc:11
+
+CHECK: DummyClass
+CHECK-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test.cc:4
+
+CHECK: a
+CHECK-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test2-helper.cc:2
+
+CHECK: main
+CHECK-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test2-main.cc:4
+
 CHECK:      _Z1cv
 CHECK-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test4-part1.cc:2
 
@@ -30,7 +51,19 @@ CHECK-NEXT: dwarfdump-inl-test.h:7
 CHECK-NEXT: inlined_f
 CHECK-NEXT: dwarfdump-inl-test.cc:3
 CHECK-NEXT: main
-CHECK-NEXT: dwarfdump-inl-test.cc:
+CHECK-NEXT: dwarfdump-inl-test.cc:8
+
+CHECK:      inlined_g
+CHECK-NEXT: dwarfdump-inl-test.h:7
+CHECK-NEXT: inlined_f
+CHECK-NEXT: dwarfdump-inl-test.cc:3
+CHECK-NEXT: main
+CHECK-NEXT: dwarfdump-inl-test.cc:8
+
+CHECK:      inlined_f
+CHECK-NEXT: dwarfdump-inl-test.cc:3
+CHECK-NEXT: main
+CHECK-NEXT: dwarfdump-inl-test.cc:8
 
 CHECK:      inlined_h
 CHECK-NEXT: dwarfdump-inl-test.h:3
@@ -39,15 +72,24 @@ CHECK-NEXT: dwarfdump-inl-test.h:7
 CHECK-NEXT: inlined_f
 CHECK-NEXT: dwarfdump-inl-test.cc:3
 CHECK-NEXT: main
-CHECK-NEXT: dwarfdump-inl-test.cc:
+CHECK-NEXT: dwarfdump-inl-test.cc:8
+
+CHECK: C
+CHECK-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test3.cc:3
 
-CHECK:       _Z3do1v
-CHECK-NEXT: dwarfdump-test3-decl.h:7
+CHECK: _Z3do1v
+CHECK-NEXT: /tmp/include{{[/\\]}}dwarfdump-test3-decl.h:7
+
+CHECK: _Z3do2v
+CHECK-NEXT: /tmp/dbginfo{{[/\\]}}include{{[/\\]}}dwarfdump-test3-decl2.h:1
 
 CHECK:      main
 CHECK:      _Z3inci
 CHECK:      _Z3inci
 
+CHECK: main
+CHECK-NEXT: llvm-symbolizer-dwo-test.cc:11
+
 RUN: echo "unexisting-file 0x1234" > %t.input2
 RUN: llvm-symbolizer < %t.input2
 
@@ -83,3 +125,9 @@ RUN: llvm-symbolizer --obj %p/Inputs/shared-object-stripped.elf-i386 < %t.input6
 RUN:   | FileCheck %s --check-prefix=STRIPPED
 
 STRIPPED:  global_func
+
+RUN: echo "%p/Inputs/dwarfdump-test4.elf-x86-64 0x62c" > %t.input7
+RUN: llvm-symbolizer --functions=short --use-symbol-table=false --demangle=false < %t.input7 \
+RUN:    | FileCheck %s --check-prefix=SHORT_FUNCTION_NAME
+
+SHORT_FUNCTION_NAME-NOT: _Z1cv
diff --git a/test/DebugInfo/namespace.ll b/test/DebugInfo/namespace.ll
index f36688d..a9de62c 100644
--- a/test/DebugInfo/namespace.ll
+++ b/test/DebugInfo/namespace.ll
@@ -16,14 +16,6 @@
 ; CHECK: [[I:0x[0-9a-f]*]]:{{ *}}DW_TAG_variable
 ; CHECK-NEXT: DW_AT_name{{.*}}= "i"
 ; CHECK-NOT: NULL
-; CHECK: DW_TAG_subprogram
-; CHECK-NEXT: DW_AT_MIPS_linkage_name
-; CHECK-NEXT: DW_AT_name{{.*}}= "f1"
-; CHECK: [[FUNC1:0x[0-9a-f]*]]:{{ *}}DW_TAG_subprogram
-; CHECK-NEXT: DW_AT_MIPS_linkage_name
-; CHECK-NEXT: DW_AT_name{{.*}}= "f1"
-; CHECK: NULL
-; CHECK-NOT: NULL
 ; CHECK: [[FOO:0x[0-9a-f]*]]:{{ *}}DW_TAG_structure_type
 ; CHECK-NEXT: DW_AT_name{{.*}}= "foo"
 ; CHECK-NEXT: DW_AT_declaration
@@ -31,7 +23,16 @@
 ; CHECK: [[BAR:0x[0-9a-f]*]]:{{ *}}DW_TAG_structure_type
 ; CHECK-NEXT: DW_AT_name{{.*}}= "bar"
 ; CHECK: NULL
-; CHECK: NULL
+; CHECK: [[FUNC1:.*]]: DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_MIPS_linkage_name
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name{{.*}}= "f1"
+; CHECK: DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_MIPS_linkage_name
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name{{.*}}= "f1"
 ; CHECK: NULL
 
 ; CHECK-NOT: NULL
@@ -44,9 +45,18 @@
 ; CHECK: NULL
 ; CHECK-NOT: NULL
 
+; CHECK: DW_TAG_imported_module
+; Same bug as above, this should be F2, not F1
+; CHECK-NEXT: DW_AT_decl_file{{.*}}(0x0[[F1]])
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(0x0b)
+; CHECK-NEXT: DW_AT_import{{.*}}=> {[[NS1]]})
+; CHECK-NOT: NULL
+
 ; CHECK: DW_TAG_subprogram
-; CHECK-NEXT: DW_AT_MIPS_linkage_name
-; CHECK-NEXT: DW_AT_name{{.*}}= "func"
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_MIPS_linkage_name
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name{{.*}}= "func"
 ; CHECK-NOT: NULL
 ; CHECK: DW_TAG_imported_module
 ; CHECK-NEXT: DW_AT_decl_file{{.*}}(0x0[[F2]])
@@ -73,13 +83,13 @@
 ; CHECK-NEXT: DW_AT_decl_line{{.*}}(0x16)
 ; CHECK-NEXT: DW_AT_import{{.*}}=> {[[I]]})
 ; CHECK-NOT: NULL
-; CHECK: [[X:0x[0-9a-f]*]]:{{ *}}DW_TAG_imported_module
+; CHECK: [[X:0x[0-9a-f]*]]:{{ *}}DW_TAG_imported_declaration
 ; CHECK-NEXT: DW_AT_decl_file{{.*}}(0x0[[F2]])
 ; CHECK-NEXT: DW_AT_decl_line{{.*}}(0x18)
 ; CHECK-NEXT: DW_AT_import{{.*}}=> {[[NS1]]})
 ; CHECK-NEXT: DW_AT_name{{.*}}"X"
 ; CHECK-NOT: NULL
-; CHECK: DW_TAG_imported_module
+; CHECK: DW_TAG_imported_declaration
 ; CHECK-NEXT: DW_AT_decl_file{{.*}}(0x0[[F2]])
 ; CHECK-NEXT: DW_AT_decl_line{{.*}}(0x19)
 ; CHECK-NEXT: DW_AT_import{{.*}}=> {[[X]]})
@@ -93,13 +103,7 @@
 ; CHECK-NEXT: DW_AT_import{{.*}}=> {[[NS2]]})
 ; CHECK: NULL
 ; CHECK: NULL
-; CHECK-NOT: NULL
-
-; CHECK: DW_TAG_imported_module
-; Same bug as above, this should be F2, not F1
-; CHECK-NEXT: DW_AT_decl_file{{.*}}(0x0[[F1]])
-; CHECK-NEXT: DW_AT_decl_line{{.*}}(0x0b)
-; CHECK-NEXT: DW_AT_import{{.*}}=> {[[NS1]]})
+; CHECK: NULL
 
 ; CHECK: file_names[  [[F1]]]{{.*}}debug-info-namespace.cpp
 ; CHECK: file_names[  [[F2]]]{{.*}}foo.cpp
@@ -199,7 +203,7 @@ attributes #1 = { nounwind readnone }
 
 !0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !19, metadata !21, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/blaikie/dev/llvm/build/clang/debug//usr/local/google/home/blaikie/dev/llvm/src/tools/clang/test/CodeGenCXX/debug-info-namespace.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{metadata !"/usr/local/google/home/blaikie/dev/llvm/src/tools/clang/test/CodeGenCXX/debug-info-namespace.cpp", metadata !"/usr/local/google/home/blaikie/dev/llvm/build/clang/debug"}
-!2 = metadata !{i32 0}
+!2 = metadata !{}
 !3 = metadata !{metadata !4, metadata !10, metadata !14}
 !4 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"f1", metadata !"f1", metadata !"_ZN1A1B2f1Ev", i32 3, metadata !8, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_ZN1A1B2f1Ev, null, null, metadata !2, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [f1]
 !5 = metadata !{metadata !"foo.cpp", metadata !"/usr/local/google/home/blaikie/dev/llvm/build/clang/debug"}
@@ -236,8 +240,8 @@ attributes #1 = { nounwind readnone }
 !36 = metadata !{i32 786468}
 !37 = metadata !{i32 786440, metadata !14, metadata !10, i32 21} ; [ DW_TAG_imported_declaration ]
 !38 = metadata !{i32 786440, metadata !14, metadata !20, i32 22} ; [ DW_TAG_imported_declaration ]
-!39 = metadata !{i32 786490, metadata !14, metadata !7, i32 24, metadata !"X"} ; [ DW_TAG_imported_module ]
-!40 = metadata !{i32 786490, metadata !14, metadata !39, i32 25, metadata !"Y"} ; [ DW_TAG_imported_module ]
+!39 = metadata !{i32 786440, metadata !14, metadata !7, i32 24, metadata !"X"} ; [ DW_TAG_imported_declaration ]
+!40 = metadata !{i32 786440, metadata !14, metadata !39, i32 25, metadata !"Y"} ; [ DW_TAG_imported_declaration ]
 !41 = metadata !{i32 3, i32 0, metadata !4, null}
 !42 = metadata !{i32 786689, metadata !10, metadata !"", metadata !15, i32 16777220, metadata !13, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [line 4]
 !43 = metadata !{i32 4, i32 0, metadata !10, null}
diff --git a/test/DebugInfo/namespace_function_definition.ll b/test/DebugInfo/namespace_function_definition.ll
new file mode 100644
index 0000000..590f2b3
--- /dev/null
+++ b/test/DebugInfo/namespace_function_definition.ll
@@ -0,0 +1,44 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+; Generated from clang with the following source:
+; namespace ns {
+; void func() {
+; }
+; }
+
+; CHECK: DW_TAG_namespace
+; CHECK-NEXT: DW_AT_name {{.*}} "ns"
+; CHECK: DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK:   DW_AT_low_pc
+; CHECK-NOT: DW_TAG
+; CHECK:   DW_AT_MIPS_linkage_name {{.*}} "_ZN2ns4funcEv"
+; CHECK: NULL
+; CHECK: NULL
+
+; Function Attrs: nounwind uwtable
+define void @_ZN2ns4funcEv() #0 {
+entry:
+  ret void, !dbg !11
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/namespace_function_definition.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"namespace_function_definition.cpp", metadata !"/tmp/dbginfo"}
+!2 = metadata !{}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"func", metadata !"func", metadata !"_ZN2ns4funcEv", i32 2, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_ZN2ns4funcEv, null, null, metadata !2, i32 2} ; [ DW_TAG_subprogram ] [line 2] [def] [func]
+!5 = metadata !{i32 786489, metadata !1, null, metadata !"ns", i32 1} ; [ DW_TAG_namespace ] [ns] [line 1]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{null}
+!8 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!9 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!10 = metadata !{metadata !"clang version 3.5.0 "}
+!11 = metadata !{i32 3, i32 0, metadata !4, null}
diff --git a/test/DebugInfo/namespace_inline_function_definition.ll b/test/DebugInfo/namespace_inline_function_definition.ll
new file mode 100644
index 0000000..65fa4a4
--- /dev/null
+++ b/test/DebugInfo/namespace_inline_function_definition.ll
@@ -0,0 +1,92 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+; Generate from clang with the following source. Note that the definition of
+; the inline function follows its use to workaround another bug that should be
+; fixed soon.
+; namespace ns {
+; int func(int i);
+; }
+; extern int x;
+; int main() { return ns::func(x); }
+; int __attribute__((always_inline)) ns::func(int i) { return i * 2; }
+
+; CHECK: DW_TAG_namespace
+; CHECK-NEXT: DW_AT_name {{.*}} "ns"
+; CHECK-NOT: DW_TAG
+; CHECK: [[ABS_DEF:0x.*]]: DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK:   DW_AT_MIPS_linkage_name {{.*}} "_ZN2ns4funcEi"
+; CHECK-NOT: DW_TAG
+; CHECK: [[ABS_PRM:0x.*]]:   DW_TAG_formal_parameter
+; CHECK:   NULL
+; CHECK-NOT: NULL
+; CHECK:   DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK:     DW_AT_abstract_origin {{.*}} {[[ABS_DEF]]}
+; CHECK-NOT: DW_TAG
+; CHECK:     DW_TAG_formal_parameter
+; CHECK:       DW_AT_abstract_origin {{.*}} {[[ABS_PRM]]}
+; CHECK:     NULL
+; CHECK:   NULL
+; CHECK: NULL
+
+@x = external global i32
+
+; Function Attrs: uwtable
+define i32 @main() #0 {
+entry:
+  %i.addr.i = alloca i32, align 4
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  %0 = load i32* @x, align 4, !dbg !16
+  store i32 %0, i32* %i.addr.i, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %i.addr.i}, metadata !17), !dbg !18
+  %1 = load i32* %i.addr.i, align 4, !dbg !18
+  %mul.i = mul nsw i32 %1, 2, !dbg !18
+  ret i32 %mul.i, !dbg !16
+}
+
+; Function Attrs: alwaysinline nounwind uwtable
+define i32 @_ZN2ns4funcEi(i32 %i) #1 {
+entry:
+  %i.addr = alloca i32, align 4
+  store i32 %i, i32* %i.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %i.addr}, metadata !17), !dbg !19
+  %0 = load i32* %i.addr, align 4, !dbg !19
+  %mul = mul nsw i32 %0, 2, !dbg !19
+  ret i32 %mul, !dbg !19
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #2
+
+attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { alwaysinline nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!13, !14}
+!llvm.ident = !{!15}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/namespace_inline_function_definition.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"namespace_inline_function_definition.cpp", metadata !"/tmp/dbginfo"}
+!2 = metadata !{}
+!3 = metadata !{metadata !4, metadata !9}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 5, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !2, i32 5} ; [ DW_TAG_subprogram ] [line 5] [def] [main]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/dbginfo/namespace_inline_function_definition.cpp]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{metadata !8}
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{i32 786478, metadata !1, metadata !10, metadata !"func", metadata !"func", metadata !"_ZN2ns4funcEi", i32 6, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @_ZN2ns4funcEi, null, null, metadata !2, i32 6} ; [ DW_TAG_subprogram ] [line 6] [def] [func]
+!10 = metadata !{i32 786489, metadata !1, null, metadata !"ns", i32 1} ; [ DW_TAG_namespace ] [ns] [line 1]
+!11 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = metadata !{metadata !8, metadata !8}
+!13 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!14 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
+!15 = metadata !{metadata !"clang version 3.5.0 "}
+!16 = metadata !{i32 5, i32 0, metadata !4, null}
+!17 = metadata !{i32 786689, metadata !9, metadata !"i", metadata !5, i32 16777222, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [i] [line 6]
+!18 = metadata !{i32 6, i32 0, metadata !9, metadata !16}
+!19 = metadata !{i32 6, i32 0, metadata !9, null}
diff --git a/test/DebugInfo/restrict.ll b/test/DebugInfo/restrict.ll
new file mode 100644
index 0000000..ceb844f
--- /dev/null
+++ b/test/DebugInfo/restrict.ll
@@ -0,0 +1,53 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf -dwarf-version=2 -O0 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck --check-prefix=CHECK --check-prefix=V2 %s
+; RUN: %llc_dwarf -dwarf-version=3 -O0 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck --check-prefix=CHECK --check-prefix=V3 %s
+
+; CHECK: DW_AT_name {{.*}} "dst"
+; V2: DW_AT_type {{.*}} {[[PTR:0x.*]]}
+; V3: DW_AT_type {{.*}} {[[RESTRICT:0x.*]]}
+; V3: [[RESTRICT]]: {{.*}}DW_TAG_restrict_type
+; V3-NEXT: DW_AT_type {{.*}} {[[PTR:0x.*]]}
+; CHECK: [[PTR]]: {{.*}}DW_TAG_pointer_type
+; CHECK-NOT: DW_AT_type
+
+; Generated with clang from:
+; void foo(void* __restrict__ dst) {
+; }
+
+
+; Function Attrs: nounwind uwtable
+define void @_Z3fooPv(i8* noalias %dst) #0 {
+entry:
+  %dst.addr = alloca i8*, align 8
+  store i8* %dst, i8** %dst.addr, align 8
+  call void @llvm.dbg.declare(metadata !{i8** %dst.addr}, metadata !13), !dbg !14
+  ret void, !dbg !15
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!10, !11}
+!llvm.ident = !{!12}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/restrict.c] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"restrict.c", metadata !"/tmp/dbginfo"}
+!2 = metadata !{}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"_Z3fooPv", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*)* @_Z3fooPv, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/dbginfo/restrict.c]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{null, metadata !8}
+!8 = metadata !{i32 786487, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_restrict_type ] [line 0, size 0, align 0, offset 0] [from ]
+!9 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!10 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!11 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!12 = metadata !{metadata !"clang version 3.5.0 "}
+!13 = metadata !{i32 786689, metadata !4, metadata !"dst", metadata !5, i32 16777217, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [dst] [line 1]
+!14 = metadata !{i32 1, i32 0, metadata !4, null}
+!15 = metadata !{i32 2, i32 0, metadata !4, null}
diff --git a/test/DebugInfo/sugared-constants.ll b/test/DebugInfo/sugared-constants.ll
new file mode 100644
index 0000000..0d2ebe6
--- /dev/null
+++ b/test/DebugInfo/sugared-constants.ll
@@ -0,0 +1,82 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf -O0 -filetype=obj %s -o - | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+; Use correct signedness when emitting constants of derived (sugared) types.
+
+; Test compiled to IR from clang with -O1 and the following source:
+
+; void func(int);
+; void func(unsigned);
+; void func(char16_t);
+; int main() {
+;   const int i = 42;
+;   func(i);
+;   const unsigned j = 117;
+;   func(j);
+;   char16_t c = 7;
+;   func(c);
+; }
+
+; CHECK: DW_AT_const_value [DW_FORM_sdata] (42)
+; CHECK: DW_AT_const_value [DW_FORM_udata] (117)
+; CHECK: DW_AT_const_value [DW_FORM_udata] (7)
+
+; Function Attrs: uwtable
+define i32 @main() #0 {
+entry:
+  tail call void @llvm.dbg.value(metadata !20, i64 0, metadata !10), !dbg !21
+  tail call void @_Z4funci(i32 42), !dbg !22
+  tail call void @llvm.dbg.value(metadata !23, i64 0, metadata !12), !dbg !24
+  tail call void @_Z4funcj(i32 117), !dbg !25
+  tail call void @llvm.dbg.value(metadata !26, i64 0, metadata !15), !dbg !27
+  tail call void @_Z4funcDs(i16 zeroext 7), !dbg !28
+  ret i32 0, !dbg !29
+}
+
+declare void @_Z4funci(i32) #1
+
+declare void @_Z4funcj(i32) #1
+
+declare void @_Z4funcDs(i16 zeroext) #1
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata) #2
+
+attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!17, !18}
+!llvm.ident = !{!19}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 ", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/const.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"const.cpp", metadata !"/tmp/dbginfo"}
+!2 = metadata !{}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 4, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @main, null, null, metadata !9, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [main]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/dbginfo/const.cpp]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{metadata !8}
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{metadata !10, metadata !12, metadata !15}
+!10 = metadata !{i32 786688, metadata !4, metadata !"i", metadata !5, i32 5, metadata !11, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 5]
+!11 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !8} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from int]
+!12 = metadata !{i32 786688, metadata !4, metadata !"j", metadata !5, i32 7, metadata !13, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 7]
+!13 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !14} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from unsigned int]
+!14 = metadata !{i32 786468, null, null, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [unsigned int] [line 0, size 32, align 32, offset 0, enc DW_ATE_unsigned]
+!15 = metadata !{i32 786688, metadata !4, metadata !"c", metadata !5, i32 9, metadata !16, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [c] [line 9]
+!16 = metadata !{i32 786468, null, null, metadata !"char16_t", i32 0, i64 16, i64 16, i64 0, i32 0, i32 16} ; [ DW_TAG_base_type ] [char16_t] [line 0, size 16, align 16, offset 0, enc DW_ATE_UTF]
+!17 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!18 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!19 = metadata !{metadata !"clang version 3.5.0 "}
+!20 = metadata !{i32 42}
+!21 = metadata !{i32 5, i32 0, metadata !4, null}
+!22 = metadata !{i32 6, i32 0, metadata !4, null}
+!23 = metadata !{i32 117}
+!24 = metadata !{i32 7, i32 0, metadata !4, null}
+!25 = metadata !{i32 8, i32 0, metadata !4, null} ; [ DW_TAG_imported_declaration ]
+!26 = metadata !{i16 7}
+!27 = metadata !{i32 9, i32 0, metadata !4, null}
+!28 = metadata !{i32 10, i32 0, metadata !4, null}
+!29 = metadata !{i32 11, i32 0, metadata !4, null}
diff --git a/test/DebugInfo/two-cus-from-same-file.ll b/test/DebugInfo/two-cus-from-same-file.ll
index c4d663c..2ab82a9 100644
--- a/test/DebugInfo/two-cus-from-same-file.ll
+++ b/test/DebugInfo/two-cus-from-same-file.ll
@@ -51,8 +51,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !16 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !17} ; [ DW_TAG_pointer_type ]
 !17 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !18} ; [ DW_TAG_pointer_type ]
 !18 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!19 = metadata !{metadata !20}
-!20 = metadata !{metadata !21, metadata !22}
+!19 = metadata !{metadata !21, metadata !22}
 !21 = metadata !{i32 786689, metadata !12, metadata !"argc", metadata !6, i32 16777227, metadata !15, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
 !22 = metadata !{i32 786689, metadata !12, metadata !"argv", metadata !6, i32 33554443, metadata !16, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
 !23 = metadata !{i32 6, i32 3, metadata !24, null}
diff --git a/test/DebugInfo/typedef.ll b/test/DebugInfo/typedef.ll
new file mode 100644
index 0000000..40cecdf
--- /dev/null
+++ b/test/DebugInfo/typedef.ll
@@ -0,0 +1,32 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+; From source:
+; typedef void x;
+; x *y;
+
+; Check that a typedef with no DW_AT_type is produced. The absence of a type is used to imply the 'void' type.
+
+; CHECK: DW_TAG_typedef
+; CHECK-NOT: DW_AT_type
+; CHECK: {{DW_TAG|NULL}}
+
+@y = global i8* null, align 8
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !2, metadata !3, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/typedef.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"typedef.cpp", metadata !"/tmp/dbginfo"}
+!2 = metadata !{}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786484, i32 0, null, metadata !"y", metadata !"y", metadata !"", metadata !5, i32 2, metadata !6, i32 0, i32 1, i8** @y, null} ; [ DW_TAG_variable ] [y] [line 2] [def]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/dbginfo/typedef.cpp]
+!6 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !7} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from x]
+!7 = metadata !{i32 786454, metadata !1, null, metadata !"x", i32 1, i64 0, i64 0, i64 0, i32 0, null} ; [ DW_TAG_typedef ] [x] [line 1, size 0, align 0, offset 0] [from ]
+!8 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!9 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!10 = metadata !{metadata !"clang version 3.5.0 "}
+
diff --git a/test/DebugInfo/unconditional-branch.ll b/test/DebugInfo/unconditional-branch.ll
index 1e5dac6..6c31375 100644
--- a/test/DebugInfo/unconditional-branch.ll
+++ b/test/DebugInfo/unconditional-branch.ll
@@ -1,12 +1,12 @@
 ; REQUIRES: object-emission
 ; PR 19261
 
-; RUN: %llc_dwarf -fast-isel=false -O0 -filetype=obj < %s > %t
+; RUN: %llc_dwarf -fast-isel=false -O0 -filetype=obj %s -o %t
 ; RUN: llvm-dwarfdump %t | FileCheck %s
 
 ; CHECK: {{0x[0-9a-f]+}}      1      0      1   0             0  is_stmt
-; CHECK-NEXT: {{0x[0-9a-f]+}}      2      0      1   0             0  is_stmt
-; CHECK-NEXT: {{0x[0-9a-f]+}}      4      0      1   0             0  is_stmt
+; CHECK: {{0x[0-9a-f]+}}      2      0      1   0             0  is_stmt
+; CHECK: {{0x[0-9a-f]+}}      4      0      1   0             0  is_stmt
 
 ; IR generated from clang -O0 -g with the following source:
 ;void foo(int i){
diff --git a/test/DebugInfo/varargs.ll b/test/DebugInfo/varargs.ll
index a327414..ddfcd85 100644
--- a/test/DebugInfo/varargs.ll
+++ b/test/DebugInfo/varargs.ll
@@ -13,25 +13,25 @@
 ;
 ; CHECK: DW_TAG_subprogram
 ; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_name {{.*}} "b"
+; CHECK: DW_AT_name {{.*}} "a"
+; CHECK-NOT: DW_TAG
+; CHECK: DW_TAG_formal_parameter
 ; CHECK-NOT: DW_TAG
 ; CHECK: DW_TAG_formal_parameter
 ; CHECK-NOT: DW_TAG
 ; CHECK: DW_TAG_unspecified_parameters
 ;
-; Variadic C++ member function.
-; struct A { void a(int c, ...); }
-;
 ; CHECK: DW_TAG_subprogram
 ; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_name {{.*}} "a"
-; CHECK-NOT: DW_TAG
-; CHECK: DW_TAG_formal_parameter
+; CHECK: DW_AT_name {{.*}} "b"
 ; CHECK-NOT: DW_TAG
 ; CHECK: DW_TAG_formal_parameter
 ; CHECK-NOT: DW_TAG
 ; CHECK: DW_TAG_unspecified_parameters
 ;
+; Variadic C++ member function.
+; struct A { void a(int c, ...); }
+;
 ; Variadic function pointer.
 ; void (*fptr)(int, ...);
 ;
diff --git a/test/ExecutionEngine/RuntimeDyld/arm_secdiff_reloc.test b/test/ExecutionEngine/RuntimeDyld/macho_relocations.test
index 92e4dd7..92e4dd7 100644
--- a/test/ExecutionEngine/RuntimeDyld/arm_secdiff_reloc.test
+++ b/test/ExecutionEngine/RuntimeDyld/macho_relocations.test
diff --git a/test/ExecutionEngine/lit.local.cfg b/test/ExecutionEngine/lit.local.cfg
index a198439..7f0b69e 100644
--- a/test/ExecutionEngine/lit.local.cfg
+++ b/test/ExecutionEngine/lit.local.cfg
@@ -1,9 +1,10 @@
-if config.root.host_arch in ['PowerPC', 'AArch64', 'SystemZ']:
+if config.root.host_arch in ['PowerPC', 'AArch64', 'ARM64', 'SystemZ']:
     config.unsupported = True
 
 # CMake and autoconf diverge in naming or host_arch
-if 'aarch64' in config.root.target_triple:
-    config.unsupported = True
+if 'aarch64' in config.root.target_triple \
+    or 'arm64' in config.root.target_triple:
+        config.unsupported = True
 
 if 'hexagon' in config.root.target_triple:
     config.unsupported = True
diff --git a/test/Feature/alias2.ll b/test/Feature/alias2.ll
new file mode 100644
index 0000000..693ef7c
--- /dev/null
+++ b/test/Feature/alias2.ll
@@ -0,0 +1,19 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+@v1 = global i32 0
+; CHECK: @v1 = global i32 0
+
+@v2 = global [1 x i32] zeroinitializer
+; CHECK: @v2 = global [1 x i32] zeroinitializer
+
+@v3 = alias i16, i32* @v1
+; CHECK: @v3 = alias i16, i32* @v1
+
+@v4 = alias i32, [1 x i32]* @v2
+; CHECK: @v4 = alias i32, [1 x i32]* @v2
+
+@v5 = alias addrspace(2) i32, i32* @v1
+; CHECK: @v5 = alias addrspace(2) i32, i32* @v1
+
+@v6 = alias i16, i32* @v1
+; CHECK: @v6 = alias i16, i32* @v1
diff --git a/test/Feature/aliases.ll b/test/Feature/aliases.ll
index 7fe9d0b..b2ce82a 100644
--- a/test/Feature/aliases.ll
+++ b/test/Feature/aliases.ll
@@ -7,7 +7,6 @@
 @bar = global i32 0
 @foo1 = alias i32* @bar
 @foo2 = alias i32* @bar
-@foo3 = alias i32* @foo2
 
 %FunTy = type i32()
 
@@ -15,11 +14,10 @@ define i32 @foo_f() {
   ret i32 0
 }
 @bar_f = alias weak_odr %FunTy* @foo_f
-@bar_ff = alias i32()* @bar_f
 
 @bar_i = alias internal i32* @bar
 
-@A = alias bitcast (i32* @bar to i64*)
+@A = alias i64, i32* @bar
 
 define i32 @test() {
 entry:
diff --git a/test/Feature/instructions.ll b/test/Feature/instructions.ll
index d0c303d..aa96294 100644
--- a/test/Feature/instructions.ll
+++ b/test/Feature/instructions.ll
@@ -4,11 +4,13 @@
 
 define i32 @test_extractelement(<4 x i32> %V) {
         %R = extractelement <4 x i32> %V, i32 1         ; <i32> [#uses=1]
+		%S = extractelement <4 x i32> %V, i64 1         ; <i32> [#uses=0]
         ret i32 %R
 }
 
 define <4 x i32> @test_insertelement(<4 x i32> %V) {
         %R = insertelement <4 x i32> %V, i32 0, i32 0           ; <<4 x i32>> [#uses=1]
+		%S = insertelement <4 x i32> %V, i32 0, i64 0           ; <<4 x i32>> [#uses=0]
         ret <4 x i32> %R
 }
 
diff --git a/test/Instrumentation/AddressSanitizer/X86/asm_attr.ll b/test/Instrumentation/AddressSanitizer/X86/asm_attr.ll
new file mode 100644
index 0000000..b83a7e9
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/X86/asm_attr.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+sse2 -asm-instrumentation=address -asan-instrument-assembly | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK-LABEL: mov_no_attr
+; CHECK-NOT: callq __sanitizer_sanitize_load8@PLT
+; CHECK-NOT: callq __sanitizer_sanitize_store8@PLT
+define void @mov_no_attr(i64* %dst, i64* %src) {
+  tail call void asm sideeffect "movq ($1), %rax  \0A\09movq %rax, ($0)  \0A\09", "r,r,~{memory},~{rax},~{dirflag},~{fpsr},~{flags}"(i64* %dst, i64* %src)
+  ret void
+}
+
+; CHECK-LABEL: mov_sanitize
+; CHECK: callq __sanitizer_sanitize_load8@PLT
+; CHECK: callq __sanitizer_sanitize_store8@PLT
+define void @mov_sanitize(i64* %dst, i64* %src) sanitize_address {
+  tail call void asm sideeffect "movq ($1), %rax  \0A\09movq %rax, ($0)  \0A\09", "r,r,~{memory},~{rax},~{dirflag},~{fpsr},~{flags}"(i64* %dst, i64* %src)
+  ret void
+}
diff --git a/test/Instrumentation/AddressSanitizer/X86/asm_mov.ll b/test/Instrumentation/AddressSanitizer/X86/asm_mov.ll
index 7af8139..030af7e 100644
--- a/test/Instrumentation/AddressSanitizer/X86/asm_mov.ll
+++ b/test/Instrumentation/AddressSanitizer/X86/asm_mov.ll
@@ -1,22 +1,22 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+sse2 -asan-instrument-inline-assembly | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+sse2 -asm-instrumentation=address -asan-instrument-assembly | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
 ; CHECK-LABEL: mov1b
-; CHECK: subq $128, %rsp
+; CHECK: leaq -128(%rsp), %rsp
 ; CHECK-NEXT: pushq %rdi
 ; CHECK-NEXT: leaq {{.*}}, %rdi
 ; CHECK-NEXT: callq __sanitizer_sanitize_load1@PLT
 ; CHECK-NEXT: popq %rdi
-; CHECK-NEXT: addq $128, %rsp
+; CHECK-NEXT: leaq 128(%rsp), %rsp
 
-; CHECK: subq $128, %rsp
+; CHECK: leaq -128(%rsp), %rsp
 ; CHECK-NEXT: pushq %rdi
 ; CHECK-NEXT: leaq {{.*}}, %rdi
 ; CHECK-NEXT: callq __sanitizer_sanitize_store1@PLT
 ; CHECK-NEXT: popq %rdi
-; CHECK-NEXT: addq $128, %rsp
+; CHECK-NEXT: leaq 128(%rsp), %rsp
 
 ; CHECK: movb {{.*}}, {{.*}}
 define void @mov1b(i8* %dst, i8* %src) #0 {
@@ -26,19 +26,19 @@ entry:
 }
 
 ; CHECK-LABEL: mov2b
-; CHECK: subq $128, %rsp
+; CHECK: leaq -128(%rsp), %rsp
 ; CHECK-NEXT: pushq %rdi
 ; CHECK-NEXT: leaq {{.*}}, %rdi
 ; CHECK-NEXT: callq __sanitizer_sanitize_load2@PLT
 ; CHECK-NEXT: popq %rdi
-; CHECK-NEXT: addq $128, %rsp
+; CHECK-NEXT: leaq 128(%rsp), %rsp
 
-; CHECK: subq $128, %rsp
+; CHECK: leaq -128(%rsp), %rsp
 ; CHECK-NEXT: pushq %rdi
 ; CHECK-NEXT: leaq {{.*}}, %rdi
 ; CHECK-NEXT: callq __sanitizer_sanitize_store2@PLT
 ; CHECK-NEXT: popq %rdi
-; CHECK-NEXT: addq $128, %rsp
+; CHECK-NEXT: leaq 128(%rsp), %rsp
 
 ; CHECK: movw {{.*}}, {{.*}}
 define void @mov2b(i16* %dst, i16* %src) #0 {
@@ -48,19 +48,19 @@ entry:
 }
 
 ; CHECK-LABEL: mov4b
-; CHECK: subq $128, %rsp
+; CHECK: leaq -128(%rsp), %rsp
 ; CHECK-NEXT: pushq %rdi
 ; CHECK-NEXT: leaq {{.*}}, %rdi
 ; CHECK-NEXT: callq __sanitizer_sanitize_load4@PLT
 ; CHECK-NEXT: popq %rdi
-; CHECK-NEXT: addq $128, %rsp
+; CHECK-NEXT: leaq 128(%rsp), %rsp
 
-; CHECK: subq $128, %rsp
+; CHECK: leaq -128(%rsp), %rsp
 ; CHECK-NEXT: pushq %rdi
 ; CHECK-NEXT: leaq {{.*}}, %rdi
 ; CHECK-NEXT: callq __sanitizer_sanitize_store4@PLT
 ; CHECK-NEXT: popq %rdi
-; CHECK-NEXT: addq $128, %rsp
+; CHECK-NEXT: leaq 128(%rsp), %rsp
 
 ; CHECK: movl {{.*}}, {{.*}}
 define void @mov4b(i32* %dst, i32* %src) #0 {
@@ -70,19 +70,19 @@ entry:
 }
 
 ; CHECK-LABEL: mov8b
-; CHECK: subq $128, %rsp
+; CHECK: leaq -128(%rsp), %rsp
 ; CHECK-NEXT: pushq %rdi
 ; CHECK-NEXT: leaq {{.*}}, %rdi
 ; CHECK-NEXT: callq __sanitizer_sanitize_load8@PLT
 ; CHECK-NEXT: popq %rdi
-; CHECK-NEXT: addq $128, %rsp
+; CHECK-NEXT: leaq 128(%rsp), %rsp
 
-; CHECK: subq $128, %rsp
+; CHECK: leaq -128(%rsp), %rsp
 ; CHECK-NEXT: pushq %rdi
 ; CHECK-NEXT: leaq {{.*}}, %rdi
 ; CHECK-NEXT: callq __sanitizer_sanitize_store8@PLT
 ; CHECK-NEXT: popq %rdi
-; CHECK-NEXT: addq $128, %rsp
+; CHECK-NEXT: leaq 128(%rsp), %rsp
 
 ; CHECK: movq {{.*}}, {{.*}}
 define void @mov8b(i64* %dst, i64* %src) #0 {
@@ -92,19 +92,19 @@ entry:
 }
 
 ; CHECK-LABEL: mov16b
-; CHECK: subq $128, %rsp
+; CHECK: leaq -128(%rsp), %rsp
 ; CHECK-NEXT: pushq %rdi
 ; CHECK-NEXT: leaq {{.*}}, %rdi
 ; CHECK-NEXT: callq __sanitizer_sanitize_load16@PLT
 ; CHECK-NEXT: popq %rdi
-; CHECK-NEXT: addq $128, %rsp
+; CHECK-NEXT: leaq 128(%rsp), %rsp
 
-; CHECK: subq $128, %rsp
+; CHECK: leaq -128(%rsp), %rsp
 ; CHECK-NEXT: pushq %rdi
 ; CHECK-NEXT: leaq {{.*}}, %rdi
 ; CHECK-NEXT: callq __sanitizer_sanitize_store16@PLT
 ; CHECK-NEXT: popq %rdi
-; CHECK-NEXT: addq $128, %rsp
+; CHECK-NEXT: leaq 128(%rsp), %rsp
 
 ; CHECK: movaps {{.*}}, {{.*}}
 define void @mov16b(<2 x i64>* %dst, <2 x i64>* %src) #0 {
@@ -113,7 +113,7 @@ entry:
   ret void
 }
 
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable sanitize_address "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind }
 
 !0 = metadata !{i32 98, i32 122, i32 160}
diff --git a/test/Instrumentation/AddressSanitizer/X86/asm_mov.s b/test/Instrumentation/AddressSanitizer/X86/asm_mov.s
index 9001067..df217c0 100644
--- a/test/Instrumentation/AddressSanitizer/X86/asm_mov.s
+++ b/test/Instrumentation/AddressSanitizer/X86/asm_mov.s
@@ -1,4 +1,4 @@
-# RUN: llvm-mc %s -triple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+sse2 -asan-instrument-inline-assembly | FileCheck %s
+# RUN: llvm-mc %s -triple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+sse2 -asm-instrumentation=address -asan-instrument-assembly | FileCheck %s
 
 	.text
 	.globl	mov1b
@@ -6,21 +6,21 @@
 	.type	mov1b,@function
 # CHECK-LABEL: mov1b:
 #
-# CHECK: subq $128, %rsp
+# CHECK: leaq -128(%rsp), %rsp
 # CHECK-NEXT: pushq %rdi
 # CHECK-NEXT: leaq (%rsi), %rdi
 # CHECK-NEXT: callq __sanitizer_sanitize_load1@PLT
 # CHECK-NEXT: popq %rdi
-# CHECK-NEXT: addq $128, %rsp
+# CHECK-NEXT: leaq 128(%rsp), %rsp
 #
 # CHECK-NEXT: movb (%rsi), %al
 #
-# CHECK-NEXT: subq $128, %rsp
+# CHECK-NEXT: leaq -128(%rsp), %rsp
 # CHECK-NEXT: pushq %rdi
 # CHECK-NEXT: leaq (%rdi), %rdi
 # CHECK-NEXT: callq __sanitizer_sanitize_store1@PLT
 # CHECK-NEXT: popq %rdi
-# CHECK-NEXT: addq $128, %rsp
+# CHECK-NEXT: leaq 128(%rsp), %rsp
 #
 # CHECK-NEXT: movb %al, (%rdi)
 mov1b:                                  # @mov1b
@@ -41,21 +41,21 @@ mov1b:                                  # @mov1b
 	.type	mov16b,@function
 # CHECK-LABEL: mov16b:
 #
-# CHECK: subq $128, %rsp
+# CHECK: leaq -128(%rsp), %rsp
 # CHECK-NEXT: pushq %rdi
 # CHECK-NEXT: leaq (%rsi), %rdi
 # CHECK-NEXT: callq __sanitizer_sanitize_load16@PLT
 # CHECK-NEXT: popq %rdi
-# CHECK-NEXT: addq $128, %rsp
+# CHECK-NEXT: leaq 128(%rsp), %rsp
 #
 # CHECK-NEXT: movaps (%rsi), %xmm0
 #
-# CHECK-NEXT: subq $128, %rsp
+# CHECK-NEXT: leaq -128(%rsp), %rsp
 # CHECK-NEXT: pushq %rdi
 # CHECK-NEXT: leaq (%rdi), %rdi
 # CHECK-NEXT: callq __sanitizer_sanitize_store16@PLT
 # CHECK-NEXT: popq %rdi
-# CHECK-NEXT: addq $128, %rsp
+# CHECK-NEXT: leaq 128(%rsp), %rsp
 #
 # CHECK-NEXT: movaps %xmm0, (%rdi)
 mov16b:                                 # @mov16b
diff --git a/test/Instrumentation/AddressSanitizer/X86/asm_mov_no_instrumentation.s b/test/Instrumentation/AddressSanitizer/X86/asm_mov_no_instrumentation.s
index a9ef4df..cc05527 100644
--- a/test/Instrumentation/AddressSanitizer/X86/asm_mov_no_instrumentation.s
+++ b/test/Instrumentation/AddressSanitizer/X86/asm_mov_no_instrumentation.s
@@ -20,25 +20,5 @@ mov1b:                                  # @mov1b
 	.size	mov1b, .Ltmp0-mov1b
 	.cfi_endproc
 
-	.globl	mov16b
-	.align	16, 0x90
-	.type	mov16b,@function
-# CHECK-LABEL: mov16b
-# CHECK-NOT: callq __sanitizer_sanitize_load16@PLT
-# CHECK-NOT: callq __sanitizer_sanitize_store16@PLT
-mov16b:                                 # @mov16b
-	.cfi_startproc
-# BB#0:
-	#APP
-	movaps	(%rsi), %xmm0
-	movaps	%xmm0, (%rdi)
-
-	#NO_APP
-	retq
-.Ltmp1:
-	.size	mov16b, .Ltmp1-mov16b
-	.cfi_endproc
-
-
 	.ident	"clang version 3.5 "
 	.section	".note.GNU-stack","",@progbits
diff --git a/test/Instrumentation/AddressSanitizer/X86/asm_swap_intel.s b/test/Instrumentation/AddressSanitizer/X86/asm_swap_intel.s
new file mode 100644
index 0000000..8a6a8d5
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/X86/asm_swap_intel.s
@@ -0,0 +1,71 @@
+# RUN: llvm-mc %s -x86-asm-syntax=intel -triple=x86_64-unknown-linux-gnu -asm-instrumentation=address -asan-instrument-assembly | FileCheck %s
+
+	.text
+	.globl	swap
+	.align	16, 0x90
+	.type	swap,@function
+# CHECK-LABEL: swap:
+#
+# CHECK: leaq -128(%rsp), %rsp
+# CHECK-NEXT: pushq %rdi
+# CHECK-NEXT: leaq (%rcx), %rdi
+# CHECK-NEXT: callq __sanitizer_sanitize_load8@PLT
+# CHECK-NEXT: popq %rdi
+# CHECK-NEXT: leaq 128(%rsp), %rsp
+#
+# CHECK-NEXT: movq (%rcx), %rax
+#
+# CHECK-NEXT: leaq -128(%rsp), %rsp
+# CHECK-NEXT: pushq %rdi
+# CHECK-NEXT: leaq (%rdx), %rdi
+# CHECK-NEXT: callq __sanitizer_sanitize_load8@PLT
+# CHECK-NEXT: popq %rdi
+# CHECK-NEXT: leaq 128(%rsp), %rsp
+#
+# CHECK-NEXT: movq (%rdx), %rbx
+#
+# CHECK: leaq -128(%rsp), %rsp
+# CHECK-NEXT: pushq %rdi
+# CHECK-NEXT: leaq (%rcx), %rdi
+# CHECK-NEXT: callq __sanitizer_sanitize_store8@PLT
+# CHECK-NEXT: popq %rdi
+# CHECK-NEXT: leaq 128(%rsp), %rsp
+#
+# CHECK-NEXT: movq %rbx, (%rcx)
+#
+# CHECK-NEXT: leaq -128(%rsp), %rsp
+# CHECK-NEXT: pushq %rdi
+# CHECK-NEXT: leaq (%rdx), %rdi
+# CHECK-NEXT: callq __sanitizer_sanitize_store8@PLT
+# CHECK-NEXT: popq %rdi
+# CHECK-NEXT: leaq 128(%rsp), %rsp
+#
+# CHECK-NEXT: movq %rax, (%rdx)
+swap:                                   # @swap
+	.cfi_startproc
+# BB#0:
+	push	rbx
+.Ltmp0:
+	.cfi_def_cfa_offset 16
+.Ltmp1:
+	.cfi_offset rbx, -16
+	mov	rcx, rdi
+	mov	rdx, rsi
+	#APP
+
+
+	mov	rax, qword ptr [rcx]
+	mov	rbx, qword ptr [rdx]
+	mov	qword ptr [rcx], rbx
+	mov	qword ptr [rdx], rax
+
+	#NO_APP
+	pop	rbx
+	ret
+.Ltmp2:
+	.size	swap, .Ltmp2-swap
+	.cfi_endproc
+
+
+	.ident	"clang version 3.5.0 "
+	.section	".note.GNU-stack","",@progbits
diff --git a/test/Instrumentation/AddressSanitizer/basic.ll b/test/Instrumentation/AddressSanitizer/basic.ll
index 4863a3d..7d1aa0b 100644
--- a/test/Instrumentation/AddressSanitizer/basic.ll
+++ b/test/Instrumentation/AddressSanitizer/basic.ll
@@ -34,7 +34,7 @@ define i32 @test_load(i32* %a) sanitize_address {
 
 
 entry:
-  %tmp1 = load i32* %a
+  %tmp1 = load i32* %a, align 4
   ret i32 %tmp1
 }
 
@@ -66,7 +66,7 @@ define void @test_store(i32* %a) sanitize_address {
 ;
 
 entry:
-  store i32 42, i32* %a
+  store i32 42, i32* %a, align 4
   ret void
 }
 
@@ -115,6 +115,18 @@ define void @i40test(i40* %a, i40* %b) nounwind uwtable sanitize_address {
 ; CHECK: __asan_report_store_n{{.*}}, i64 5)
 ; CHECK: ret void
 
+define void @i64test_align1(i64* %b) nounwind uwtable sanitize_address {
+  entry:
+  store i64 0, i64* %b, align 1
+  ret void
+}
+
+; CHECK-LABEL: i64test_align1
+; CHECK: __asan_report_store_n{{.*}}, i64 8)
+; CHECK: __asan_report_store_n{{.*}}, i64 8)
+; CHECK: ret void
+
+
 define void @i80test(i80* %a, i80* %b) nounwind uwtable sanitize_address {
   entry:
   %t = load i80* %a
@@ -139,4 +151,21 @@ entry:
 ; CHECK-NOT: __asan_report
 ; CHECK: ret i32
 
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) nounwind
+
+define void @memintr_test(i8* %a, i8* %b) nounwind uwtable sanitize_address {
+  entry:
+  tail call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 100, i32 1, i1 false)
+  tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %a, i8* %b, i64 100, i32 1, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 100, i32 1, i1 false)
+  ret void
+}
+
+; CHECK-LABEL: memintr_test
+; CHECK: __asan_memset
+; CHECK: __asan_memmove
+; CHECK: __asan_memcpy
+; CHECK: ret void
 
diff --git a/test/Instrumentation/AddressSanitizer/coverage-dbg.ll b/test/Instrumentation/AddressSanitizer/coverage-dbg.ll
new file mode 100644
index 0000000..77d7286
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/coverage-dbg.ll
@@ -0,0 +1,33 @@
+; Test that coverage instrumentation does not lose debug location.
+
+; RUN: opt < %s -asan -asan-module -asan-coverage=1 -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind readnone uwtable
+define void @_Z1fv() #0 {
+entry:
+  ret void, !dbg !11
+}
+
+; CHECK: call void @__sanitizer_cov(), !dbg !
+
+attributes #0 = { sanitize_address nounwind readnone uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 (208682)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp//tmp/1.cc] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"/tmp/1.cc", metadata !"/tmp"}
+!2 = metadata !{}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"f", metadata !"f", metadata !"_Z1fv", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @_Z1fv, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [f]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp//tmp/1.cc]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{null}
+!8 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!9 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!10 = metadata !{metadata !"clang version 3.5.0 (208682)"}
+!11 = metadata !{i32 2, i32 0, metadata !4, null}
diff --git a/test/Instrumentation/AddressSanitizer/coverage.ll b/test/Instrumentation/AddressSanitizer/coverage.ll
index 0670132..5bc5103 100644
--- a/test/Instrumentation/AddressSanitizer/coverage.ll
+++ b/test/Instrumentation/AddressSanitizer/coverage.ll
@@ -1,5 +1,7 @@
 ; RUN: opt < %s -asan -asan-module -asan-coverage=1 -S | FileCheck %s --check-prefix=CHECK1
 ; RUN: opt < %s -asan -asan-module -asan-coverage=2 -S | FileCheck %s --check-prefix=CHECK2
+; RUN: opt < %s -asan -asan-module -asan-coverage=2 -asan-coverage-block-threshold=10 -S | FileCheck %s --check-prefix=CHECK2
+; RUN: opt < %s -asan -asan-module -asan-coverage=2 -asan-coverage-block-threshold=1  -S | FileCheck %s --check-prefix=CHECK1
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-linux-gnu"
 define void @foo(i32* %a) sanitize_address {
@@ -14,6 +16,7 @@ entry:
   if.end:                                           ; preds = %entry, %if.then
   ret void
 }
+
 ; CHECK1-LABEL: define void @foo
 ; CHECK1: %0 = load atomic i8* @__asan_gen_cov_foo monotonic, align 1
 ; CHECK1: %1 = icmp eq i8 0, %0
@@ -22,9 +25,20 @@ entry:
 ; CHECK1-NOT: call void @__sanitizer_cov
 ; CHECK1: store atomic i8 1, i8* @__asan_gen_cov_foo monotonic, align 1
 
+; CHECK1-LABEL: define internal void @asan.module_ctor
+; CHECK1-NOT: ret
+; CHECK1: call void @__sanitizer_cov_module_init(i64 1)
+; CHECK1: ret
+
+
 ; CHECK2-LABEL: define void @foo
 ; CHECK2: call void @__sanitizer_cov
 ; CHECK2: call void @__sanitizer_cov
 ; CHECK2: call void @__sanitizer_cov
 ; CHECK2-NOT: call void @__sanitizer_cov
 ; CHECK2: ret void
+
+; CHECK2-LABEL: define internal void @asan.module_ctor
+; CHECK2-NOT: ret
+; CHECK2: call void @__sanitizer_cov_module_init(i64 3)
+; CHECK2: ret
diff --git a/test/Instrumentation/AddressSanitizer/instrumentation-with-call-threshold.ll b/test/Instrumentation/AddressSanitizer/instrumentation-with-call-threshold.ll
new file mode 100644
index 0000000..adb4341
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/instrumentation-with-call-threshold.ll
@@ -0,0 +1,30 @@
+; Test asan internal compiler flags:
+;   -asan-instrumentation-with-call-threshold
+;   -asan-memory-access-callback-prefix
+
+; RUN: opt < %s -asan -asan-module -asan-instrumentation-with-call-threshold=1 -S | FileCheck %s --check-prefix=CHECK-CALL
+; RUN: opt < %s -asan -asan-module -asan-instrumentation-with-call-threshold=0 -S | FileCheck %s --check-prefix=CHECK-CALL
+; RUN: opt < %s -asan -asan-module -asan-instrumentation-with-call-threshold=0 -asan-memory-access-callback-prefix=__foo_ -S | FileCheck %s --check-prefix=CHECK-CUSTOM-PREFIX
+; RUN: opt < %s -asan -asan-module -asan-instrumentation-with-call-threshold=5 -S | FileCheck %s --check-prefix=CHECK-INLINE
+; RUN: opt < %s -asan -asan-module  -S | FileCheck %s --check-prefix=CHECK-INLINE
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @test_load(i32* %a, i64* %b, i512* %c, i80* %d) sanitize_address {
+entry:
+; CHECK-CALL: call void @__asan_load4
+; CHECK-CALL: call void @__asan_load8
+; CHECK-CALL: call void @__asan_loadN{{.*}}i64 64)
+; CHECK-CALL: call void @__asan_loadN{{.*}}i64 10)
+; CHECK-CUSTOM-PREFIX: call void @__foo_load4
+; CHECK-CUSTOM-PREFIX: call void @__foo_load8
+; CHECK-CUSTOM-PREFIX: call void @__foo_loadN
+; CHECK-INLINE-NOT: call void @__asan_load
+  %tmp1 = load i32* %a, align 4
+  %tmp2 = load i64* %b, align 8
+  %tmp3 = load i512* %c, align 32
+  %tmp4 = load i80* %d, align 8
+  ret void
+}
+
+
diff --git a/test/Instrumentation/AddressSanitizer/test64.ll b/test/Instrumentation/AddressSanitizer/test64.ll
index 4f3ed5b..fd93f45 100644
--- a/test/Instrumentation/AddressSanitizer/test64.ll
+++ b/test/Instrumentation/AddressSanitizer/test64.ll
@@ -6,7 +6,7 @@ entry:
   %tmp1 = load i32* %a, align 4
   ret i32 %tmp1
 }
-; CHECK: @read_4_bytes
+; CHECK-LABEL: @read_4_bytes
 ; CHECK-NOT: ret
 ; CHECK: lshr {{.*}} 3
 ; Check for ASAN's Offset for 64-bit (7fff8000)
@@ -19,8 +19,10 @@ entry:
   ret void
 }
 
-; CHECK: @example_atomicrmw
+; CHECK-LABEL: @example_atomicrmw
 ; CHECK: lshr {{.*}} 3
+; CHECK: __asan_report_store8
+; CHECK-NOT: __asan_report
 ; CHECK: atomicrmw
 ; CHECK: ret
 
@@ -30,7 +32,9 @@ entry:
   ret void
 }
 
-; CHECK: @example_cmpxchg
+; CHECK-LABEL: @example_cmpxchg
 ; CHECK: lshr {{.*}} 3
+; CHECK: __asan_report_store8
+; CHECK-NOT: __asan_report
 ; CHECK: cmpxchg
 ; CHECK: ret
diff --git a/test/Instrumentation/MemorySanitizer/do-not-emit-module-limits.ll b/test/Instrumentation/MemorySanitizer/do-not-emit-module-limits.ll
new file mode 100644
index 0000000..7d0a62a
--- /dev/null
+++ b/test/Instrumentation/MemorySanitizer/do-not-emit-module-limits.ll
@@ -0,0 +1,21 @@
+; Test that MSan does not emit undefined symbol __executable_start when it is
+; not needed (i.e. without -msan-wrap-indirect-calls).
+
+; RUN: opt < %s -msan -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define void @_Z1fv() #0 {
+entry:
+  ret void
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.ident = !{!0}
+
+!0 = metadata !{metadata !"clang version 3.5.0 (208165)"}
+
+; CHECK-NOT: __executable_start
diff --git a/test/Instrumentation/MemorySanitizer/instrumentation-with-call-threshold.ll b/test/Instrumentation/MemorySanitizer/instrumentation-with-call-threshold.ll
new file mode 100644
index 0000000..34988ef
--- /dev/null
+++ b/test/Instrumentation/MemorySanitizer/instrumentation-with-call-threshold.ll
@@ -0,0 +1,47 @@
+; Test -msan-instrumentation-with-call-threshold
+
+; RUN: opt < %s -msan -msan-check-access-address=0 -msan-instrumentation-with-call-threshold=0 -S | FileCheck %s
+; RUN: opt < %s -msan -msan-check-access-address=0 -msan-instrumentation-with-call-threshold=0 -msan-track-origins=1 -S | FileCheck -check-prefix=CHECK -check-prefix=CHECK-ORIGINS %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @LoadAndCmp(i32* nocapture %a) nounwind uwtable sanitize_memory {
+entry:
+  %0 = load i32* %a, align 4
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  tail call void (...)* @foo() nounwind
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+  ret void
+}
+
+declare void @foo(...)
+
+; CHECK-LABEL: @LoadAndCmp
+; CHECK: = load
+; CHECK: = load
+; CHECK: = zext i1 {{.*}} to i8
+; CHECK: call void @__msan_maybe_warning_1(
+; CHECK-NOT: unreachable
+; CHECK: ret void
+
+
+define void @Store(i64* nocapture %p, i64 %x) nounwind uwtable sanitize_memory {
+entry:
+  store i64 %x, i64* %p, align 4
+  ret void
+}
+
+; CHECK-LABEL: @Store
+; CHECK: load {{.*}} @__msan_param_tls
+; CHECK-ORIGINS: load {{.*}} @__msan_param_origin_tls
+; CHECK: store
+; CHECK-ORIGINS: bitcast i64* {{.*}} to i8*
+; CHECK-ORIGINS: call void @__msan_maybe_store_origin_8(
+; CHECK: store i64
+; CHECK: ret void
diff --git a/test/LTO/attrs.ll b/test/LTO/attrs.ll
new file mode 100644
index 0000000..d196747
--- /dev/null
+++ b/test/LTO/attrs.ll
@@ -0,0 +1,15 @@
+; RUN: llvm-as < %s >%t1
+; RUN: llvm-lto -exported-symbol=test_x86_aesni_aeskeygenassist -mattr=+aes -o %t2 %t1
+; RUN: llvm-objdump -d %t2 | FileCheck -check-prefix=WITH_AES %s
+; RUN: not llvm-lto -exported-symbol=test_x86_aesni_aeskeygenassist -mattr=-aes -o %t3 %t1 2>&1 | FileCheck -check-prefix=WITHOUT_AES %s
+
+target triple = "x86_64-unknown-linux-gnu"
+declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8)
+define <2 x i64> @test_x86_aesni_aeskeygenassist(<2 x i64> %a0) {
+  ; WITH_AES: test_x86_aesni_aeskeygenassist
+  ; WITH_AES: aeskeygenassist
+  %res = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %a0, i8 7)
+  ret <2 x i64> %res
+}
+
+; WITHOUT_AES: LLVM ERROR: Cannot select: intrinsic %llvm.x86.aesni.aeskeygenassist
diff --git a/test/LTO/keep-used-puts-during-instcombine.ll b/test/LTO/keep-used-puts-during-instcombine.ll
index 1dc63dd..69ce3ee 100644
--- a/test/LTO/keep-used-puts-during-instcombine.ll
+++ b/test/LTO/keep-used-puts-during-instcombine.ll
@@ -20,14 +20,14 @@ entry:
   ret i32 0
 }
 
-define internal hidden i32 @printf(i8* readonly nocapture %fmt, ...) {
+define internal i32 @printf(i8* readonly nocapture %fmt, ...) {
 entry:
   %ret = call i32 @bar(i8* %fmt)
   ret i32 %ret
 }
 
 ; CHECK: define {{.*}} @puts(
-define internal hidden i32 @puts(i8* %s) {
+define internal i32 @puts(i8* %s) {
 entry:
   %ret = call i32 @bar(i8* %s)
   ret i32 %ret
diff --git a/test/Linker/Inputs/PR8300.b.ll b/test/Linker/Inputs/PR8300.b.ll
index 9e538f5..362d309 100644
--- a/test/Linker/Inputs/PR8300.b.ll
+++ b/test/Linker/Inputs/PR8300.b.ll
@@ -1,7 +1,7 @@
 %foo = type { [8 x i8] }
 %bar = type { [9 x i8] }
 
-@zed = alias bitcast (void (%bar*)* @xyz to void (%foo*)*)
+@zed = alias void (%foo*), void (%bar*)* @xyz
 
 define void @xyz(%bar* %this) {
 entry:
diff --git a/test/Linker/Inputs/alias.ll b/test/Linker/Inputs/alias.ll
new file mode 100644
index 0000000..b869cae
--- /dev/null
+++ b/test/Linker/Inputs/alias.ll
@@ -0,0 +1,3 @@
+@zed = global i32 42
+@foo = alias i32* @zed
+@foo2 = alias i16, i32* @zed
diff --git a/test/Linker/Inputs/cycle.ll b/test/Linker/Inputs/cycle.ll
new file mode 100644
index 0000000..d0eddb6
--- /dev/null
+++ b/test/Linker/Inputs/cycle.ll
@@ -0,0 +1,2 @@
+@foo = alias i32* @bar
+@bar = weak global i32 0
diff --git a/test/Linker/Inputs/datalayout-b.ll b/test/Linker/Inputs/datalayout-b.ll
index 59cdb68..d76c1aa 100644
--- a/test/Linker/Inputs/datalayout-b.ll
+++ b/test/Linker/Inputs/datalayout-b.ll
@@ -1 +1 @@
-target datalayout = "E"
+target datalayout = "e-p:16:16"
diff --git a/test/Linker/Inputs/old_global_ctors.3.4.bc b/test/Linker/Inputs/old_global_ctors.3.4.bc
new file mode 100644
index 0000000..a24b1b4
--- /dev/null
+++ b/test/Linker/Inputs/old_global_ctors.3.4.bc
diff --git a/test/Linker/alias.ll b/test/Linker/alias.ll
new file mode 100644
index 0000000..5809a15
--- /dev/null
+++ b/test/Linker/alias.ll
@@ -0,0 +1,16 @@
+; RUN: llvm-link %s %S/Inputs/alias.ll -S -o - | FileCheck %s
+; RUN: llvm-link %S/Inputs/alias.ll %s -S -o - | FileCheck %s
+
+@foo = weak global i32 0
+; CHECK-DAG: @foo = alias i32* @zed
+
+@bar = alias i32* @foo
+; CHECK-DAG: @bar = alias i32* @zed
+
+@foo2 = weak global i32 0
+; CHECK-DAG: @foo2 = alias i16, i32* @zed
+
+@bar2 = alias i32* @foo2
+; CHECK-DAG: @bar2 = alias i32* @zed
+
+; CHECK-DAG: @zed = global i32 42
diff --git a/test/Linker/cycle.ll b/test/Linker/cycle.ll
new file mode 100644
index 0000000..7d9ad2d
--- /dev/null
+++ b/test/Linker/cycle.ll
@@ -0,0 +1,7 @@
+; RUN: not llvm-link %s %S/Inputs/cycle.ll 2>&1 | FileCheck %s
+; RUN: not llvm-link %S/Inputs/cycle.ll %s 2>&1 | FileCheck %s
+
+; CHECK: Linking these modules creates an alias cycle
+
+@foo = weak global i32 0
+@bar = alias i32* @foo
diff --git a/test/Linker/debug-info-version-a.ll b/test/Linker/debug-info-version-a.ll
new file mode 100644
index 0000000..c3d9c87
--- /dev/null
+++ b/test/Linker/debug-info-version-a.ll
@@ -0,0 +1,16 @@
+; RUN: llvm-link %s %p/debug-info-version-b.ll -S -o - | FileCheck %s
+
+; Test linking of incompatible debug info versions. The debug info
+; from the other file should be dropped.
+
+; CHECK-NOT: metadata !{metadata !"b.c", metadata !""}
+; CHECK: metadata !{metadata !"a.c", metadata !""}
+; CHECK-NOT: metadata !{metadata !"b.c", metadata !""}
+
+!llvm.module.flags = !{ !0 }
+!llvm.dbg.cu = !{!1}
+
+!0 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
+!1 = metadata !{i32 589841, metadata !2, i32 12, metadata !"clang", i1 true, metadata !"", i32 0, metadata !3, metadata !3, metadata !3, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{metadata !"a.c", metadata !""}
+!3 = metadata !{}
diff --git a/test/Linker/debug-info-version-b.ll b/test/Linker/debug-info-version-b.ll
new file mode 100644
index 0000000..2b4f184
--- /dev/null
+++ b/test/Linker/debug-info-version-b.ll
@@ -0,0 +1,10 @@
+; RUN: true
+; Companion for debug-info-version-a.ll.
+
+!llvm.module.flags = !{ !0 }
+!llvm.dbg.cu = !{!1}
+
+!0 = metadata !{i32 2, metadata !"Debug Info Version", i32 42}
+!1 = metadata !{i32 589841, metadata !2, i32 12, metadata !"clang", metadata !"I AM UNEXPECTED!"} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{metadata !"b.c", metadata !""}
+!3 = metadata !{}
diff --git a/test/Linker/global_ctors.ll b/test/Linker/global_ctors.ll
new file mode 100644
index 0000000..541f0d4
--- /dev/null
+++ b/test/Linker/global_ctors.ll
@@ -0,0 +1,29 @@
+; RUN: llvm-as %s -o %t.new.bc
+; RUN: llvm-link %t.new.bc %S/Inputs/old_global_ctors.3.4.bc | llvm-dis | FileCheck %s
+
+; old_global_ctors.3.4.bc contains the following LLVM IL, assembled into
+; bitcode by llvm-as from 3.4.  It uses a two element @llvm.global_ctors array.
+; ---
+; declare void @a_global_ctor()
+; declare void @b_global_ctor()
+;
+; @llvm.global_ctors = appending global [2 x { i32, void ()* } ] [
+;   { i32, void ()* } { i32 65535, void ()* @a_global_ctor },
+;   { i32, void ()* } { i32 65535, void ()* @b_global_ctor }
+; ]
+; ---
+
+declare void @c_global_ctor()
+declare void @d_global_ctor()
+
+@llvm.global_ctors = appending global [2 x { i32, void ()*, i8* } ] [
+  { i32, void ()*, i8* } { i32 65535, void ()* @c_global_ctor, i8* null },
+  { i32, void ()*, i8* } { i32 65535, void ()* @d_global_ctor, i8* null }
+]
+
+; CHECK: @llvm.global_ctors = appending global [4 x { i32, void ()*, i8* }] [
+; CHECK-DAG:  { i32, void ()*, i8* } { i32 65535, void ()* @a_global_ctor, i8* null }
+; CHECK-DAG:  { i32, void ()*, i8* } { i32 65535, void ()* @b_global_ctor, i8* null }
+; CHECK-DAG:  { i32, void ()*, i8* } { i32 65535, void ()* @c_global_ctor, i8* null }
+; CHECK-DAG:  { i32, void ()*, i8* } { i32 65535, void ()* @d_global_ctor, i8* null }
+; CHECK: ]
diff --git a/test/Linker/type-unique-odr-a.ll b/test/Linker/type-unique-odr-a.ll
index a1b8d28..91c8033 100644
--- a/test/Linker/type-unique-odr-a.ll
+++ b/test/Linker/type-unique-odr-a.ll
@@ -22,10 +22,6 @@
 ;     return A().getFoo();
 ; }
 ;
-; CHECK:      DW_TAG_subprogram
-; CHECK-NEXT:   DW_AT_MIPS_linkage_name {{.*}} "_Z3bazv"
-; CHECK:      DW_TAG_subprogram
-; CHECK-NEXT:   DW_AT_MIPS_linkage_name {{.*}} "_ZL3barv"
 ; CHECK:      DW_TAG_class_type
 ; CHECK-NEXT:   DW_AT_name {{.*}} "A"
 ; CHECK-NOT:  DW_TAG
@@ -33,8 +29,16 @@
 ; CHECK-NEXT:   DW_AT_name {{.*}} "data"
 ; CHECK-NOT:  DW_TAG
 ; CHECK:      DW_TAG_subprogram
-; CHECK-NEXT:   DW_AT_MIPS_linkage_name {{.*}} "_ZN1A6getFooEv"
-; CHECK-NEXT:   DW_AT_name {{.*}} "getFoo"
+; CHECK-NOT: DW_TAG
+; CHECK:   DW_AT_MIPS_linkage_name {{.*}} "_ZN1A6getFooEv"
+; CHECK-NOT: DW_TAG
+; CHECK:   DW_AT_name {{.*}} "getFoo"
+; CHECK:      DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK:   DW_AT_MIPS_linkage_name {{.*}} "_Z3bazv"
+; CHECK:      DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK:   DW_AT_MIPS_linkage_name {{.*}} "_ZL3barv"
 
 ; getFoo and A may only appear once.
 ; CHECK-NOT:  {{(getFoo)|("A")}}
diff --git a/test/MC/AArch64/arm64-adr.s b/test/MC/AArch64/arm64-adr.s
new file mode 100644
index 0000000..131e545
--- /dev/null
+++ b/test/MC/AArch64/arm64-adr.s
@@ -0,0 +1,31 @@
+// RUN: not llvm-mc -triple arm64 -show-encoding < %s 2>%t | FileCheck %s
+// RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
+
+adr x0, #0
+adr x0, #1
+adr x0, 1f
+adr x0, foo
+// CHECK: adr x0, #0          // encoding: [0x00,0x00,0x00,0x10]
+// CHECK: adr x0, #1          // encoding: [0x00,0x00,0x00,0x30]
+// CHECK: adr x0, .Ltmp0      // encoding: [A,A,A,0x10'A']
+// CHECK-NEXT:                //   fixup A - offset: 0, value: .Ltmp0, kind: fixup_aarch64_pcrel_adr_imm21
+// CHECK: adr x0, foo         // encoding: [A,A,A,0x10'A']
+// CHECK-NEXT:                //   fixup A - offset: 0, value: foo, kind: fixup_aarch64_pcrel_adr_imm21
+
+adrp x0, #0
+adrp x0, #4096
+adrp x0, 1f
+adrp x0, foo
+// CHECK: adrp    x0, #0      // encoding: [0x00,0x00,0x00,0x90]
+// CHECK: adrp    x0, #4096   // encoding: [0x00,0x00,0x00,0xb0]
+// CHECK: adrp    x0, .Ltmp0  // encoding: [A,A,A,0x90'A']
+// CHECK-NEXT:                //   fixup A - offset: 0, value: .Ltmp0, kind: fixup_aarch64_pcrel_adrp_imm21
+// CHECK: adrp    x0, foo     // encoding: [A,A,A,0x90'A']
+// CHECK-NEXT:                //   fixup A - offset: 0, value: foo, kind: fixup_aarch64_pcrel_adrp_imm21
+
+adr x0, #0xffffffff
+adrp x0, #0xffffffff
+adrp x0, #1
+// CHECK-ERRORS: error: expected label or encodable integer pc offset
+// CHECK-ERRORS: error: expected label or encodable integer pc offset
+// CHECK-ERRORS: error: expected label or encodable integer pc offset
diff --git a/test/MC/AArch64/arm64-advsimd.s b/test/MC/AArch64/arm64-advsimd.s
new file mode 100644
index 0000000..c627de7
--- /dev/null
+++ b/test/MC/AArch64/arm64-advsimd.s
@@ -0,0 +1,1997 @@
+; RUN: llvm-mc -triple arm64-apple-darwin -mattr=crypto -output-asm-variant=1 -show-encoding < %s | FileCheck %s
+
+foo:
+
+  abs.8b  v0, v0
+  abs.16b v0, v0
+  abs.4h  v0, v0
+  abs.8h  v0, v0
+  abs.2s  v0, v0
+  abs.4s  v0, v0
+
+; CHECK: abs.8b  v0, v0              ; encoding: [0x00,0xb8,0x20,0x0e]
+; CHECK: abs.16b v0, v0              ; encoding: [0x00,0xb8,0x20,0x4e]
+; CHECK: abs.4h  v0, v0              ; encoding: [0x00,0xb8,0x60,0x0e]
+; CHECK: abs.8h  v0, v0              ; encoding: [0x00,0xb8,0x60,0x4e]
+; CHECK: abs.2s  v0, v0              ; encoding: [0x00,0xb8,0xa0,0x0e]
+; CHECK: abs.4s  v0, v0              ; encoding: [0x00,0xb8,0xa0,0x4e]
+
+  add.8b  v0, v0, v0
+  add.16b v0, v0, v0
+  add.4h  v0, v0, v0
+  add.8h  v0, v0, v0
+  add.2s  v0, v0, v0
+  add.4s  v0, v0, v0
+  add.2d  v0, v0, v0
+
+; CHECK: add.8b  v0, v0, v0          ; encoding: [0x00,0x84,0x20,0x0e]
+; CHECK: add.16b v0, v0, v0          ; encoding: [0x00,0x84,0x20,0x4e]
+; CHECK: add.4h  v0, v0, v0          ; encoding: [0x00,0x84,0x60,0x0e]
+; CHECK: add.8h  v0, v0, v0          ; encoding: [0x00,0x84,0x60,0x4e]
+; CHECK: add.2s  v0, v0, v0          ; encoding: [0x00,0x84,0xa0,0x0e]
+; CHECK: add.4s  v0, v0, v0          ; encoding: [0x00,0x84,0xa0,0x4e]
+; CHECK: add.2d  v0, v0, v0          ; encoding: [0x00,0x84,0xe0,0x4e]
+
+  add d1, d2, d3
+
+; CHECK: add d1, d2, d3              ; encoding: [0x41,0x84,0xe3,0x5e]
+
+  addhn.8b   v0, v0, v0
+  addhn2.16b v0, v0, v0
+  addhn.4h   v0, v0, v0
+  addhn2.8h  v0, v0, v0
+  addhn.2s   v0, v0, v0
+  addhn2.4s  v0, v0, v0
+
+; CHECK: addhn.8b   v0, v0, v0       ; encoding: [0x00,0x40,0x20,0x0e]
+; CHECK: addhn2.16b v0, v0, v0       ; encoding: [0x00,0x40,0x20,0x4e]
+; CHECK: addhn.4h   v0, v0, v0       ; encoding: [0x00,0x40,0x60,0x0e]
+; CHECK: addhn2.8h  v0, v0, v0       ; encoding: [0x00,0x40,0x60,0x4e]
+; CHECK: addhn.2s   v0, v0, v0       ; encoding: [0x00,0x40,0xa0,0x0e]
+; CHECK: addhn2.4s  v0, v0, v0       ; encoding: [0x00,0x40,0xa0,0x4e]
+
+  addp.8b  v0, v0, v0
+  addp.16b v0, v0, v0
+  addp.4h  v0, v0, v0
+  addp.8h  v0, v0, v0
+  addp.2s  v0, v0, v0
+  addp.4s  v0, v0, v0
+  addp.2d  v0, v0, v0
+
+; CHECK: addp.8b   v0, v0, v0        ; encoding: [0x00,0xbc,0x20,0x0e]
+; CHECK: addp.16b  v0, v0, v0        ; encoding: [0x00,0xbc,0x20,0x4e]
+; CHECK: addp.4h   v0, v0, v0        ; encoding: [0x00,0xbc,0x60,0x0e]
+; CHECK: addp.8h   v0, v0, v0        ; encoding: [0x00,0xbc,0x60,0x4e]
+; CHECK: addp.2s   v0, v0, v0        ; encoding: [0x00,0xbc,0xa0,0x0e]
+; CHECK: addp.4s   v0, v0, v0        ; encoding: [0x00,0xbc,0xa0,0x4e]
+; CHECK: addp.2d   v0, v0, v0        ; encoding: [0x00,0xbc,0xe0,0x4e]
+
+  addp.2d  d0, v0
+
+; CHECK: addp.2d d0, v0              ; encoding: [0x00,0xb8,0xf1,0x5e]
+
+  addv.8b  b0, v0
+  addv.16b b0, v0
+  addv.4h  h0, v0
+  addv.8h  h0, v0
+  addv.4s  s0, v0
+
+; CHECK: addv.8b  b0, v0             ; encoding: [0x00,0xb8,0x31,0x0e]
+; CHECK: addv.16b b0, v0             ; encoding: [0x00,0xb8,0x31,0x4e]
+; CHECK: addv.4h  h0, v0             ; encoding: [0x00,0xb8,0x71,0x0e]
+; CHECK: addv.8h  h0, v0             ; encoding: [0x00,0xb8,0x71,0x4e]
+; CHECK: addv.4s  s0, v0             ; encoding: [0x00,0xb8,0xb1,0x4e]
+
+
+; INS/DUP
+  dup.2d  v0, x3
+  dup.4s  v0, w3
+  dup.2s  v0, w3
+  dup.8h  v0, w3
+  dup.4h  v0, w3
+  dup.16b v0, w3
+  dup.8b  v0, w3
+
+  dup v1.2d, x3
+  dup v2.4s, w4
+  dup v3.2s, w5
+  dup v4.8h, w6
+  dup v5.4h, w7
+  dup v6.16b, w8
+  dup v7.8b, w9
+
+; CHECK: dup.2d  v0, x3              ; encoding: [0x60,0x0c,0x08,0x4e]
+; CHECK: dup.4s  v0, w3              ; encoding: [0x60,0x0c,0x04,0x4e]
+; CHECK: dup.2s  v0, w3              ; encoding: [0x60,0x0c,0x04,0x0e]
+; CHECK: dup.8h  v0, w3              ; encoding: [0x60,0x0c,0x02,0x4e]
+; CHECK: dup.4h  v0, w3              ; encoding: [0x60,0x0c,0x02,0x0e]
+; CHECK: dup.16b v0, w3              ; encoding: [0x60,0x0c,0x01,0x4e]
+; CHECK: dup.8b  v0, w3              ; encoding: [0x60,0x0c,0x01,0x0e]
+
+; CHECK: dup.2d	v1, x3               ; encoding: [0x61,0x0c,0x08,0x4e]
+; CHECK: dup.4s	v2, w4               ; encoding: [0x82,0x0c,0x04,0x4e]
+; CHECK: dup.2s	v3, w5               ; encoding: [0xa3,0x0c,0x04,0x0e]
+; CHECK: dup.8h	v4, w6               ; encoding: [0xc4,0x0c,0x02,0x4e]
+; CHECK: dup.4h	v5, w7               ; encoding: [0xe5,0x0c,0x02,0x0e]
+; CHECK: dup.16b v6, w8              ; encoding: [0x06,0x0d,0x01,0x4e]
+; CHECK: dup.8b	v7, w9               ; encoding: [0x27,0x0d,0x01,0x0e]
+
+  dup.2d  v0, v3[1]
+  dup.2s  v0, v3[1]
+  dup.4s  v0, v3[1]
+  dup.4h  v0, v3[1]
+  dup.8h  v0, v3[1]
+  dup.8b  v0, v3[1]
+  dup.16b v0, v3[1]
+
+  dup v7.2d, v9.d[1]
+  dup v6.2s, v8.s[1]
+  dup v5.4s, v7.s[2]
+  dup v4.4h, v6.h[3]
+  dup v3.8h, v5.h[4]
+  dup v2.8b, v4.b[5]
+  dup v1.16b, v3.b[6]
+
+; CHECK: dup.2d  v0, v3[1]           ; encoding: [0x60,0x04,0x18,0x4e]
+; CHECK: dup.2s  v0, v3[1]           ; encoding: [0x60,0x04,0x0c,0x0e]
+; CHECK: dup.4s  v0, v3[1]           ; encoding: [0x60,0x04,0x0c,0x4e]
+; CHECK: dup.4h  v0, v3[1]           ; encoding: [0x60,0x04,0x06,0x0e]
+; CHECK: dup.8h  v0, v3[1]           ; encoding: [0x60,0x04,0x06,0x4e]
+; CHECK: dup.8b  v0, v3[1]           ; encoding: [0x60,0x04,0x03,0x0e]
+; CHECK: dup.16b v0, v3[1]           ; encoding: [0x60,0x04,0x03,0x4e]
+
+; CHECK: dup.2d  v7, v9[1]            ; encoding: [0x27,0x05,0x18,0x4e]
+; CHECK: dup.2s  v6, v8[1]            ; encoding: [0x06,0x05,0x0c,0x0e]
+; CHECK: dup.4s  v5, v7[2]            ; encoding: [0xe5,0x04,0x14,0x4e]
+; CHECK: dup.4h  v4, v6[3]            ; encoding: [0xc4,0x04,0x0e,0x0e]
+; CHECK: dup.8h  v3, v5[4]            ; encoding: [0xa3,0x04,0x12,0x4e]
+; CHECK: dup.8b  v2, v4[5]            ; encoding: [0x82,0x04,0x0b,0x0e]
+; CHECK: dup.16b v1, v3[6]            ; encoding: [0x61,0x04,0x0d,0x4e]
+
+  dup b3, v4[1]
+  dup h3, v4[1]
+  dup s3, v4[1]
+  dup d3, v4[1]
+  dup b3, v4.b[1]
+  dup h3, v4.h[1]
+  dup s3, v4.s[1]
+  dup d3, v4.d[1]
+
+  mov b3, v4[1]
+  mov h3, v4[1]
+  mov s3, v4[1]
+  mov d3, v4[1]
+  mov b3, v4.b[1]
+  mov h3, v4.h[1]
+  mov s3, v4.s[1]
+  mov d3, v4.d[1]
+
+; CHECK: mov b3, v4[1]               ; encoding: [0x83,0x04,0x03,0x5e]
+; CHECK: mov h3, v4[1]               ; encoding: [0x83,0x04,0x06,0x5e]
+; CHECK: mov s3, v4[1]               ; encoding: [0x83,0x04,0x0c,0x5e]
+; CHECK: mov d3, v4[1]               ; encoding: [0x83,0x04,0x18,0x5e]
+; CHECK: mov b3, v4[1]               ; encoding: [0x83,0x04,0x03,0x5e]
+; CHECK: mov h3, v4[1]               ; encoding: [0x83,0x04,0x06,0x5e]
+; CHECK: mov s3, v4[1]               ; encoding: [0x83,0x04,0x0c,0x5e]
+; CHECK: mov d3, v4[1]               ; encoding: [0x83,0x04,0x18,0x5e]
+
+; CHECK: mov b3, v4[1]               ; encoding: [0x83,0x04,0x03,0x5e]
+; CHECK: mov h3, v4[1]               ; encoding: [0x83,0x04,0x06,0x5e]
+; CHECK: mov s3, v4[1]               ; encoding: [0x83,0x04,0x0c,0x5e]
+; CHECK: mov d3, v4[1]               ; encoding: [0x83,0x04,0x18,0x5e]
+; CHECK: mov b3, v4[1]               ; encoding: [0x83,0x04,0x03,0x5e]
+; CHECK: mov h3, v4[1]               ; encoding: [0x83,0x04,0x06,0x5e]
+; CHECK: mov s3, v4[1]               ; encoding: [0x83,0x04,0x0c,0x5e]
+; CHECK: mov d3, v4[1]               ; encoding: [0x83,0x04,0x18,0x5e]
+
+  smov.s x3, v2[2]
+  smov   x3, v2.s[2]
+  umov.s w3, v2[2]
+  umov   w3, v2.s[2]
+  umov.d x3, v2[1]
+  umov   x3, v2.d[1]
+
+; CHECK: smov.s  x3, v2[2]           ; encoding: [0x43,0x2c,0x14,0x4e]
+; CHECK: smov.s  x3, v2[2]           ; encoding: [0x43,0x2c,0x14,0x4e]
+; CHECK: mov.s  w3, v2[2]           ; encoding: [0x43,0x3c,0x14,0x0e]
+; CHECK: mov.s  w3, v2[2]           ; encoding: [0x43,0x3c,0x14,0x0e]
+; CHECK: mov.d  x3, v2[1]           ; encoding: [0x43,0x3c,0x18,0x4e]
+; CHECK: mov.d  x3, v2[1]           ; encoding: [0x43,0x3c,0x18,0x4e]
+
+  ; MOV aliases for UMOV instructions above
+
+  mov.s w2, v3[3]
+  mov   w5, v7.s[2]
+  mov.d x11, v13[1]
+  mov   x17, v19.d[0]
+
+; CHECK: mov.s  w2, v3[3]               ; encoding: [0x62,0x3c,0x1c,0x0e]
+; CHECK: mov.s  w5, v7[2]               ; encoding: [0xe5,0x3c,0x14,0x0e]
+; CHECK: mov.d  x11, v13[1]             ; encoding: [0xab,0x3d,0x18,0x4e]
+; CHECK: mov.d  x17, v19[0]             ; encoding: [0x71,0x3e,0x08,0x4e]
+
+  ins.d v2[1], x5
+  ins.s v2[1], w5
+  ins.h v2[1], w5
+  ins.b v2[1], w5
+
+  ins   v2.d[1], x5
+  ins   v2.s[1], w5
+  ins   v2.h[1], w5
+  ins   v2.b[1], w5
+
+; CHECK: ins.d v2[1], x5             ; encoding: [0xa2,0x1c,0x18,0x4e]
+; CHECK: ins.s v2[1], w5             ; encoding: [0xa2,0x1c,0x0c,0x4e]
+; CHECK: ins.h v2[1], w5             ; encoding: [0xa2,0x1c,0x06,0x4e]
+; CHECK: ins.b v2[1], w5             ; encoding: [0xa2,0x1c,0x03,0x4e]
+
+; CHECK: ins.d v2[1], x5             ; encoding: [0xa2,0x1c,0x18,0x4e]
+; CHECK: ins.s v2[1], w5             ; encoding: [0xa2,0x1c,0x0c,0x4e]
+; CHECK: ins.h v2[1], w5             ; encoding: [0xa2,0x1c,0x06,0x4e]
+; CHECK: ins.b v2[1], w5             ; encoding: [0xa2,0x1c,0x03,0x4e]
+
+  ins.d v2[1], v15[1]
+  ins.s v2[1], v15[1]
+  ins.h v2[1], v15[1]
+  ins.b v2[1], v15[1]
+
+  ins   v2.d[1], v15.d[0]
+  ins   v2.s[3], v15.s[2]
+  ins   v2.h[7], v15.h[3]
+  ins   v2.b[10], v15.b[5]
+
+; CHECK: ins.d v2[1], v15[1]         ; encoding: [0xe2,0x45,0x18,0x6e]
+; CHECK: ins.s v2[1], v15[1]         ; encoding: [0xe2,0x25,0x0c,0x6e]
+; CHECK: ins.h v2[1], v15[1]         ; encoding: [0xe2,0x15,0x06,0x6e]
+; CHECK: ins.b v2[1], v15[1]         ; encoding: [0xe2,0x0d,0x03,0x6e]
+
+; CHECK: ins.d v2[1], v15[0]         ; encoding: [0xe2,0x05,0x18,0x6e]
+; CHECK: ins.s v2[3], v15[2]         ; encoding: [0xe2,0x45,0x1c,0x6e]
+; CHECK: ins.h v2[7], v15[3]         ; encoding: [0xe2,0x35,0x1e,0x6e]
+; CHECK: ins.b v2[10], v15[5]        ; encoding: [0xe2,0x2d,0x15,0x6e]
+
+; MOV aliases for the above INS instructions.
+  mov.d v2[1], x5
+  mov.s v3[1], w6
+  mov.h v4[1], w7
+  mov.b v5[1], w8
+
+  mov   v9.d[1], x2
+  mov   v8.s[1], w3
+  mov   v7.h[1], w4
+  mov   v6.b[1], w5
+
+  mov.d v1[1], v10[1]
+  mov.s v2[1], v11[1]
+  mov.h v7[1], v12[1]
+  mov.b v8[1], v15[1]
+
+  mov   v2.d[1], v15.d[0]
+  mov   v7.s[3], v16.s[2]
+  mov   v8.h[7], v17.h[3]
+  mov   v9.b[10], v18.b[5]
+
+; CHECK: ins.d	v2[1], x5               ; encoding: [0xa2,0x1c,0x18,0x4e]
+; CHECK: ins.s	v3[1], w6               ; encoding: [0xc3,0x1c,0x0c,0x4e]
+; CHECK: ins.h	v4[1], w7               ; encoding: [0xe4,0x1c,0x06,0x4e]
+; CHECK: ins.b	v5[1], w8               ; encoding: [0x05,0x1d,0x03,0x4e]
+; CHECK: ins.d	v9[1], x2               ; encoding: [0x49,0x1c,0x18,0x4e]
+; CHECK: ins.s	v8[1], w3               ; encoding: [0x68,0x1c,0x0c,0x4e]
+; CHECK: ins.h	v7[1], w4               ; encoding: [0x87,0x1c,0x06,0x4e]
+; CHECK: ins.b	v6[1], w5               ; encoding: [0xa6,0x1c,0x03,0x4e]
+; CHECK: ins.d	v1[1], v10[1]           ; encoding: [0x41,0x45,0x18,0x6e]
+; CHECK: ins.s	v2[1], v11[1]           ; encoding: [0x62,0x25,0x0c,0x6e]
+; CHECK: ins.h	v7[1], v12[1]           ; encoding: [0x87,0x15,0x06,0x6e]
+; CHECK: ins.b	v8[1], v15[1]           ; encoding: [0xe8,0x0d,0x03,0x6e]
+; CHECK: ins.d	v2[1], v15[0]           ; encoding: [0xe2,0x05,0x18,0x6e]
+; CHECK: ins.s	v7[3], v16[2]           ; encoding: [0x07,0x46,0x1c,0x6e]
+; CHECK: ins.h	v8[7], v17[3]           ; encoding: [0x28,0x36,0x1e,0x6e]
+; CHECK: ins.b	v9[10], v18[5]          ; encoding: [0x49,0x2e,0x15,0x6e]
+
+
+  and.8b  v0, v0, v0
+  and.16b v0, v0, v0
+
+; CHECK: and.8b  v0, v0, v0          ; encoding: [0x00,0x1c,0x20,0x0e]
+; CHECK: and.16b v0, v0, v0          ; encoding: [0x00,0x1c,0x20,0x4e]
+
+  bic.8b  v0, v0, v0
+
+; CHECK: bic.8b  v0, v0, v0          ; encoding: [0x00,0x1c,0x60,0x0e]
+
+  cmeq.8b v0, v0, v0
+  cmge.8b v0, v0, v0
+  cmgt.8b v0, v0, v0
+  cmhi.8b v0, v0, v0
+  cmhs.8b v0, v0, v0
+  cmtst.8b v0, v0, v0
+  fabd.2s v0, v0, v0
+  facge.2s  v0, v0, v0
+  facgt.2s  v0, v0, v0
+  faddp.2s v0, v0, v0
+  fadd.2s v0, v0, v0
+  fcmeq.2s  v0, v0, v0
+  fcmge.2s  v0, v0, v0
+  fcmgt.2s  v0, v0, v0
+  fdiv.2s v0, v0, v0
+  fmaxnmp.2s v0, v0, v0
+  fmaxnm.2s v0, v0, v0
+  fmaxp.2s v0, v0, v0
+  fmax.2s v0, v0, v0
+  fminnmp.2s v0, v0, v0
+  fminnm.2s v0, v0, v0
+  fminp.2s v0, v0, v0
+  fmin.2s v0, v0, v0
+  fmla.2s v0, v0, v0
+  fmls.2s v0, v0, v0
+  fmulx.2s v0, v0, v0
+  fmul.2s v0, v0, v0
+  fmulx	d2, d3, d1
+  fmulx	s2, s3, s1
+  frecps.2s v0, v0, v0
+  frsqrts.2s v0, v0, v0
+  fsub.2s v0, v0, v0
+  mla.8b v0, v0, v0
+  mls.8b v0, v0, v0
+  mul.8b v0, v0, v0
+  pmul.8b v0, v0, v0
+  saba.8b v0, v0, v0
+  sabd.8b v0, v0, v0
+  shadd.8b v0, v0, v0
+  shsub.8b v0, v0, v0
+  smaxp.8b v0, v0, v0
+  smax.8b v0, v0, v0
+  sminp.8b v0, v0, v0
+  smin.8b v0, v0, v0
+  sqadd.8b v0, v0, v0
+  sqdmulh.4h v0, v0, v0
+  sqrdmulh.4h v0, v0, v0
+  sqrshl.8b v0, v0, v0
+  sqshl.8b v0, v0, v0
+  sqsub.8b v0, v0, v0
+  srhadd.8b v0, v0, v0
+  srshl.8b v0, v0, v0
+  sshl.8b v0, v0, v0
+  sub.8b v0, v0, v0
+  uaba.8b v0, v0, v0
+  uabd.8b v0, v0, v0
+  uhadd.8b v0, v0, v0
+  uhsub.8b v0, v0, v0
+  umaxp.8b v0, v0, v0
+  umax.8b v0, v0, v0
+  uminp.8b v0, v0, v0
+  umin.8b v0, v0, v0
+  uqadd.8b v0, v0, v0
+  uqrshl.8b v0, v0, v0
+  uqshl.8b v0, v0, v0
+  uqsub.8b v0, v0, v0
+  urhadd.8b v0, v0, v0
+  urshl.8b v0, v0, v0
+  ushl.8b v0, v0, v0
+
+; CHECK: cmeq.8b	v0, v0, v0              ; encoding: [0x00,0x8c,0x20,0x2e]
+; CHECK: cmge.8b	v0, v0, v0              ; encoding: [0x00,0x3c,0x20,0x0e]
+; CHECK: cmgt.8b	v0, v0, v0              ; encoding: [0x00,0x34,0x20,0x0e]
+; CHECK: cmhi.8b	v0, v0, v0              ; encoding: [0x00,0x34,0x20,0x2e]
+; CHECK: cmhs.8b	v0, v0, v0              ; encoding: [0x00,0x3c,0x20,0x2e]
+; CHECK: cmtst.8b	v0, v0, v0      ; encoding: [0x00,0x8c,0x20,0x0e]
+; CHECK: fabd.2s	v0, v0, v0              ; encoding: [0x00,0xd4,0xa0,0x2e]
+; CHECK: facge.2s	v0, v0, v0      ; encoding: [0x00,0xec,0x20,0x2e]
+; CHECK: facgt.2s	v0, v0, v0      ; encoding: [0x00,0xec,0xa0,0x2e]
+; CHECK: faddp.2s	v0, v0, v0      ; encoding: [0x00,0xd4,0x20,0x2e]
+; CHECK: fadd.2s	v0, v0, v0              ; encoding: [0x00,0xd4,0x20,0x0e]
+; CHECK: fcmeq.2s	v0, v0, v0      ; encoding: [0x00,0xe4,0x20,0x0e]
+; CHECK: fcmge.2s	v0, v0, v0      ; encoding: [0x00,0xe4,0x20,0x2e]
+; CHECK: fcmgt.2s	v0, v0, v0      ; encoding: [0x00,0xe4,0xa0,0x2e]
+; CHECK: fdiv.2s	v0, v0, v0              ; encoding: [0x00,0xfc,0x20,0x2e]
+; CHECK: fmaxnmp.2s	v0, v0, v0      ; encoding: [0x00,0xc4,0x20,0x2e]
+; CHECK: fmaxnm.2s	v0, v0, v0      ; encoding: [0x00,0xc4,0x20,0x0e]
+; CHECK: fmaxp.2s	v0, v0, v0      ; encoding: [0x00,0xf4,0x20,0x2e]
+; CHECK: fmax.2s	v0, v0, v0              ; encoding: [0x00,0xf4,0x20,0x0e]
+; CHECK: fminnmp.2s	v0, v0, v0      ; encoding: [0x00,0xc4,0xa0,0x2e]
+; CHECK: fminnm.2s	v0, v0, v0      ; encoding: [0x00,0xc4,0xa0,0x0e]
+; CHECK: fminp.2s	v0, v0, v0      ; encoding: [0x00,0xf4,0xa0,0x2e]
+; CHECK: fmin.2s	v0, v0, v0              ; encoding: [0x00,0xf4,0xa0,0x0e]
+; CHECK: fmla.2s	v0, v0, v0              ; encoding: [0x00,0xcc,0x20,0x0e]
+; CHECK: fmls.2s	v0, v0, v0              ; encoding: [0x00,0xcc,0xa0,0x0e]
+; CHECK: fmulx.2s	v0, v0, v0      ; encoding: [0x00,0xdc,0x20,0x0e]
+
+; CHECK: fmul.2s	v0, v0, v0              ; encoding: [0x00,0xdc,0x20,0x2e]
+; CHECK: fmulx	d2, d3, d1              ; encoding: [0x62,0xdc,0x61,0x5e]
+; CHECK: fmulx	s2, s3, s1              ; encoding: [0x62,0xdc,0x21,0x5e]
+; CHECK: frecps.2s	v0, v0, v0      ; encoding: [0x00,0xfc,0x20,0x0e]
+; CHECK: frsqrts.2s	v0, v0, v0      ; encoding: [0x00,0xfc,0xa0,0x0e]
+; CHECK: fsub.2s	v0, v0, v0              ; encoding: [0x00,0xd4,0xa0,0x0e]
+; CHECK: mla.8b	v0, v0, v0              ; encoding: [0x00,0x94,0x20,0x0e]
+; CHECK: mls.8b	v0, v0, v0              ; encoding: [0x00,0x94,0x20,0x2e]
+; CHECK: mul.8b	v0, v0, v0              ; encoding: [0x00,0x9c,0x20,0x0e]
+; CHECK: pmul.8b	v0, v0, v0              ; encoding: [0x00,0x9c,0x20,0x2e]
+; CHECK: saba.8b	v0, v0, v0              ; encoding: [0x00,0x7c,0x20,0x0e]
+; CHECK: sabd.8b	v0, v0, v0              ; encoding: [0x00,0x74,0x20,0x0e]
+; CHECK: shadd.8b	v0, v0, v0      ; encoding: [0x00,0x04,0x20,0x0e]
+; CHECK: shsub.8b	v0, v0, v0      ; encoding: [0x00,0x24,0x20,0x0e]
+; CHECK: smaxp.8b	v0, v0, v0      ; encoding: [0x00,0xa4,0x20,0x0e]
+; CHECK: smax.8b	v0, v0, v0              ; encoding: [0x00,0x64,0x20,0x0e]
+; CHECK: sminp.8b	v0, v0, v0      ; encoding: [0x00,0xac,0x20,0x0e]
+; CHECK: smin.8b	v0, v0, v0              ; encoding: [0x00,0x6c,0x20,0x0e]
+; CHECK: sqadd.8b	v0, v0, v0      ; encoding: [0x00,0x0c,0x20,0x0e]
+; CHECK: sqdmulh.4h v0, v0, v0 ; encoding: [0x00,0xb4,0x60,0x0e]
+; CHECK: sqrdmulh.4h v0, v0, v0 ; encoding: [0x00,0xb4,0x60,0x2e]
+; CHECK: sqrshl.8b	v0, v0, v0      ; encoding: [0x00,0x5c,0x20,0x0e]
+; CHECK: sqshl.8b	v0, v0, v0      ; encoding: [0x00,0x4c,0x20,0x0e]
+; CHECK: sqsub.8b	v0, v0, v0      ; encoding: [0x00,0x2c,0x20,0x0e]
+; CHECK: srhadd.8b	v0, v0, v0      ; encoding: [0x00,0x14,0x20,0x0e]
+; CHECK: srshl.8b	v0, v0, v0      ; encoding: [0x00,0x54,0x20,0x0e]
+; CHECK: sshl.8b	v0, v0, v0              ; encoding: [0x00,0x44,0x20,0x0e]
+; CHECK: sub.8b	v0, v0, v0              ; encoding: [0x00,0x84,0x20,0x2e]
+; CHECK: uaba.8b	v0, v0, v0              ; encoding: [0x00,0x7c,0x20,0x2e]
+; CHECK: uabd.8b	v0, v0, v0              ; encoding: [0x00,0x74,0x20,0x2e]
+; CHECK: uhadd.8b	v0, v0, v0      ; encoding: [0x00,0x04,0x20,0x2e]
+; CHECK: uhsub.8b	v0, v0, v0      ; encoding: [0x00,0x24,0x20,0x2e]
+; CHECK: umaxp.8b	v0, v0, v0      ; encoding: [0x00,0xa4,0x20,0x2e]
+; CHECK: umax.8b	v0, v0, v0              ; encoding: [0x00,0x64,0x20,0x2e]
+; CHECK: uminp.8b	v0, v0, v0      ; encoding: [0x00,0xac,0x20,0x2e]
+; CHECK: umin.8b	v0, v0, v0              ; encoding: [0x00,0x6c,0x20,0x2e]
+; CHECK: uqadd.8b	v0, v0, v0      ; encoding: [0x00,0x0c,0x20,0x2e]
+; CHECK: uqrshl.8b	v0, v0, v0      ; encoding: [0x00,0x5c,0x20,0x2e]
+; CHECK: uqshl.8b	v0, v0, v0      ; encoding: [0x00,0x4c,0x20,0x2e]
+; CHECK: uqsub.8b	v0, v0, v0      ; encoding: [0x00,0x2c,0x20,0x2e]
+; CHECK: urhadd.8b	v0, v0, v0      ; encoding: [0x00,0x14,0x20,0x2e]
+; CHECK: urshl.8b	v0, v0, v0      ; encoding: [0x00,0x54,0x20,0x2e]
+; CHECK: ushl.8b	v0, v0, v0              ; encoding: [0x00,0x44,0x20,0x2e]
+
+  bif.8b v0, v0, v0
+  bit.8b v0, v0, v0
+  bsl.8b v0, v0, v0
+  eor.8b v0, v0, v0
+  orn.8b v0, v0, v0
+  orr.8b v0, v0, v1
+
+; CHECK: bif.8b	v0, v0, v0              ; encoding: [0x00,0x1c,0xe0,0x2e]
+; CHECK: bit.8b	v0, v0, v0              ; encoding: [0x00,0x1c,0xa0,0x2e]
+; CHECK: bsl.8b	v0, v0, v0              ; encoding: [0x00,0x1c,0x60,0x2e]
+; CHECK: eor.8b	v0, v0, v0              ; encoding: [0x00,0x1c,0x20,0x2e]
+; CHECK: orn.8b	v0, v0, v0              ; encoding: [0x00,0x1c,0xe0,0x0e]
+; CHECK: orr.8b v0, v0, v1              ; encoding: [0x00,0x1c,0xa1,0x0e]
+
+  sadalp.4h   v0, v0
+  sadalp.8h  v0, v0
+  sadalp.2s   v0, v0
+  sadalp.4s   v0, v0
+  sadalp.1d   v0, v0
+  sadalp.2d   v0, v0
+
+; CHECK: sadalp.4h	v0, v0          ; encoding: [0x00,0x68,0x20,0x0e]
+; CHECK: sadalp.8h	v0, v0          ; encoding: [0x00,0x68,0x20,0x4e]
+; CHECK: sadalp.2s	v0, v0          ; encoding: [0x00,0x68,0x60,0x0e]
+; CHECK: sadalp.4s	v0, v0          ; encoding: [0x00,0x68,0x60,0x4e]
+; CHECK: sadalp.1d	v0, v0          ; encoding: [0x00,0x68,0xa0,0x0e]
+; CHECK: sadalp.2d	v0, v0          ; encoding: [0x00,0x68,0xa0,0x4e]
+
+  cls.8b      v0, v0
+  clz.8b      v0, v0
+  cnt.8b      v0, v0
+  fabs.2s     v0, v0
+  fneg.2s     v0, v0
+  frecpe.2s   v0, v0
+  frinta.2s   v0, v0
+  frintx.2s   v0, v0
+  frinti.2s   v0, v0
+  frintm.2s   v0, v0
+  frintn.2s   v0, v0
+  frintp.2s   v0, v0
+  frintz.2s   v0, v0
+  frsqrte.2s  v0, v0
+  fsqrt.2s    v0, v0
+  neg.8b      v0, v0
+  not.8b      v0, v0
+  rbit.8b     v0, v0
+  rev16.8b    v0, v0
+  rev32.8b    v0, v0
+  rev64.8b    v0, v0
+  sadalp.4h   v0, v0
+  saddlp.4h	  v0, v0
+  scvtf.2s    v0, v0
+  sqabs.8b    v0, v0
+  sqneg.8b    v0, v0
+  sqxtn.8b    v0, v0
+  sqxtun.8b   v0, v0
+  suqadd.8b   v0, v0
+  uadalp.4h   v0, v0
+  uaddlp.4h   v0, v0
+  ucvtf.2s    v0, v0
+  uqxtn.8b    v0, v0
+  urecpe.2s   v0, v0
+  ursqrte.2s  v0, v0
+  usqadd.8b   v0, v0
+  xtn.8b      v0, v0
+  shll.8h v1, v2, #8
+  shll.4s v3, v4, #16
+  shll.2d v5, v6, #32
+  shll2.8h v7, v8, #8
+  shll2.4s v9, v10, #16
+  shll2.2d v11, v12, #32
+  shll v1.8h, v2.8b, #8
+  shll v1.4s, v2.4h, #16
+  shll v1.2d, v2.2s, #32
+  shll2 v1.8h, v2.16b, #8
+  shll2 v1.4s, v2.8h, #16
+  shll2 v1.2d, v2.4s, #32
+
+; CHECK: cls.8b	v0, v0                  ; encoding: [0x00,0x48,0x20,0x0e]
+; CHECK: clz.8b	v0, v0                  ; encoding: [0x00,0x48,0x20,0x2e]
+; CHECK: cnt.8b	v0, v0                  ; encoding: [0x00,0x58,0x20,0x0e]
+; CHECK: fabs.2s	v0, v0                  ; encoding: [0x00,0xf8,0xa0,0x0e]
+; CHECK: fneg.2s	v0, v0                  ; encoding: [0x00,0xf8,0xa0,0x2e]
+; CHECK: frecpe.2s	v0, v0          ; encoding: [0x00,0xd8,0xa1,0x0e]
+; CHECK: frinta.2s	v0, v0          ; encoding: [0x00,0x88,0x21,0x2e]
+; CHECK: frintx.2s	v0, v0          ; encoding: [0x00,0x98,0x21,0x2e]
+; CHECK: frinti.2s	v0, v0          ; encoding: [0x00,0x98,0xa1,0x2e]
+; CHECK: frintm.2s	v0, v0          ; encoding: [0x00,0x98,0x21,0x0e]
+; CHECK: frintn.2s	v0, v0          ; encoding: [0x00,0x88,0x21,0x0e]
+; CHECK: frintp.2s	v0, v0          ; encoding: [0x00,0x88,0xa1,0x0e]
+; CHECK: frintz.2s	v0, v0          ; encoding: [0x00,0x98,0xa1,0x0e]
+; CHECK: frsqrte.2s	v0, v0          ; encoding: [0x00,0xd8,0xa1,0x2e]
+; CHECK: fsqrt.2s	v0, v0          ; encoding: [0x00,0xf8,0xa1,0x2e]
+; CHECK: neg.8b	v0, v0                  ; encoding: [0x00,0xb8,0x20,0x2e]
+; CHECK: mvn.8b	v0, v0                  ; encoding: [0x00,0x58,0x20,0x2e]
+; CHECK: rbit.8b	v0, v0                  ; encoding: [0x00,0x58,0x60,0x2e]
+; CHECK: rev16.8b	v0, v0          ; encoding: [0x00,0x18,0x20,0x0e]
+; CHECK: rev32.8b	v0, v0          ; encoding: [0x00,0x08,0x20,0x2e]
+; CHECK: rev64.8b	v0, v0          ; encoding: [0x00,0x08,0x20,0x0e]
+; CHECK: sadalp.4h	v0, v0          ; encoding: [0x00,0x68,0x20,0x0e]
+; CHECK: saddlp.4h	v0, v0          ; encoding: [0x00,0x28,0x20,0x0e]
+; CHECK: scvtf.2s	v0, v0          ; encoding: [0x00,0xd8,0x21,0x0e]
+; CHECK: sqabs.8b	v0, v0          ; encoding: [0x00,0x78,0x20,0x0e]
+; CHECK: sqneg.8b	v0, v0          ; encoding: [0x00,0x78,0x20,0x2e]
+; CHECK: sqxtn.8b	v0, v0          ; encoding: [0x00,0x48,0x21,0x0e]
+; CHECK: sqxtun.8b	v0, v0          ; encoding: [0x00,0x28,0x21,0x2e]
+; CHECK: suqadd.8b	v0, v0          ; encoding: [0x00,0x38,0x20,0x0e]
+; CHECK: uadalp.4h	v0, v0          ; encoding: [0x00,0x68,0x20,0x2e]
+; CHECK: uaddlp.4h	v0, v0          ; encoding: [0x00,0x28,0x20,0x2e]
+; CHECK: ucvtf.2s	v0, v0          ; encoding: [0x00,0xd8,0x21,0x2e]
+; CHECK: uqxtn.8b	v0, v0          ; encoding: [0x00,0x48,0x21,0x2e]
+; CHECK: urecpe.2s	v0, v0          ; encoding: [0x00,0xc8,0xa1,0x0e]
+; CHECK: ursqrte.2s	v0, v0          ; encoding: [0x00,0xc8,0xa1,0x2e]
+; CHECK: usqadd.8b	v0, v0          ; encoding: [0x00,0x38,0x20,0x2e]
+; CHECK: xtn.8b	v0, v0                  ; encoding: [0x00,0x28,0x21,0x0e]
+; CHECK: shll.8h	v1, v2, #8      ; encoding: [0x41,0x38,0x21,0x2e]
+; CHECK: shll.4s	v3, v4, #16     ; encoding: [0x83,0x38,0x61,0x2e]
+; CHECK: shll.2d	v5, v6, #32     ; encoding: [0xc5,0x38,0xa1,0x2e]
+; CHECK: shll2.8h	v7, v8, #8      ; encoding: [0x07,0x39,0x21,0x6e]
+; CHECK: shll2.4s	v9, v10, #16    ; encoding: [0x49,0x39,0x61,0x6e]
+; CHECK: shll2.2d	v11, v12, #32   ; encoding: [0x8b,0x39,0xa1,0x6e]
+; CHECK: shll.8h	v1, v2, #8      ; encoding: [0x41,0x38,0x21,0x2e]
+; CHECK: shll.4s	v1, v2, #16     ; encoding: [0x41,0x38,0x61,0x2e]
+; CHECK: shll.2d	v1, v2, #32     ; encoding: [0x41,0x38,0xa1,0x2e]
+; CHECK: shll2.8h	v1, v2, #8      ; encoding: [0x41,0x38,0x21,0x6e]
+; CHECK: shll2.4s	v1, v2, #16     ; encoding: [0x41,0x38,0x61,0x6e]
+; CHECK: shll2.2d	v1, v2, #32     ; encoding: [0x41,0x38,0xa1,0x6e]
+
+
+  cmeq.8b   v0, v0, #0
+  cmeq.16b  v0, v0, #0
+  cmeq.4h   v0, v0, #0
+  cmeq.8h   v0, v0, #0
+  cmeq.2s   v0, v0, #0
+  cmeq.4s   v0, v0, #0
+  cmeq.2d   v0, v0, #0
+
+; CHECK: cmeq.8b	v0, v0, #0              ; encoding: [0x00,0x98,0x20,0x0e]
+; CHECK: cmeq.16b	v0, v0, #0      ; encoding: [0x00,0x98,0x20,0x4e]
+; CHECK: cmeq.4h	v0, v0, #0              ; encoding: [0x00,0x98,0x60,0x0e]
+; CHECK: cmeq.8h	v0, v0, #0              ; encoding: [0x00,0x98,0x60,0x4e]
+; CHECK: cmeq.2s	v0, v0, #0              ; encoding: [0x00,0x98,0xa0,0x0e]
+; CHECK: cmeq.4s	v0, v0, #0              ; encoding: [0x00,0x98,0xa0,0x4e]
+; CHECK: cmeq.2d	v0, v0, #0              ; encoding: [0x00,0x98,0xe0,0x4e]
+
+  cmge.8b   v0, v0, #0
+  cmgt.8b   v0, v0, #0
+  cmle.8b   v0, v0, #0
+  cmlt.8b   v0, v0, #0
+  fcmeq.2s  v0, v0, #0
+  fcmge.2s  v0, v0, #0
+  fcmgt.2s  v0, v0, #0
+  fcmle.2s  v0, v0, #0
+  fcmlt.2s  v0, v0, #0
+
+; ARM verbose mode aliases
+  cmlt v8.8b, v14.8b, #0
+  cmlt v8.16b, v14.16b, #0
+  cmlt v8.4h, v14.4h, #0
+  cmlt v8.8h, v14.8h, #0
+  cmlt v8.2s, v14.2s, #0
+  cmlt v8.4s, v14.4s, #0
+  cmlt v8.2d, v14.2d, #0
+
+; CHECK: cmge.8b	v0, v0, #0              ; encoding: [0x00,0x88,0x20,0x2e]
+; CHECK: cmgt.8b	v0, v0, #0              ; encoding: [0x00,0x88,0x20,0x0e]
+; CHECK: cmle.8b	v0, v0, #0              ; encoding: [0x00,0x98,0x20,0x2e]
+; CHECK: cmlt.8b	v0, v0, #0              ; encoding: [0x00,0xa8,0x20,0x0e]
+; CHECK: fcmeq.2s	v0, v0, #0.0      ; encoding: [0x00,0xd8,0xa0,0x0e]
+; CHECK: fcmge.2s	v0, v0, #0.0      ; encoding: [0x00,0xc8,0xa0,0x2e]
+; CHECK: fcmgt.2s	v0, v0, #0.0      ; encoding: [0x00,0xc8,0xa0,0x0e]
+; CHECK: fcmle.2s	v0, v0, #0.0      ; encoding: [0x00,0xd8,0xa0,0x2e]
+; CHECK: fcmlt.2s	v0, v0, #0.0      ; encoding: [0x00,0xe8,0xa0,0x0e]
+; CHECK: cmlt.8b	v8, v14, #0             ; encoding: [0xc8,0xa9,0x20,0x0e]
+; CHECK: cmlt.16b	v8, v14, #0     ; encoding: [0xc8,0xa9,0x20,0x4e]
+; CHECK: cmlt.4h	v8, v14, #0             ; encoding: [0xc8,0xa9,0x60,0x0e]
+; CHECK: cmlt.8h	v8, v14, #0             ; encoding: [0xc8,0xa9,0x60,0x4e]
+; CHECK: cmlt.2s	v8, v14, #0             ; encoding: [0xc8,0xa9,0xa0,0x0e]
+; CHECK: cmlt.4s	v8, v14, #0             ; encoding: [0xc8,0xa9,0xa0,0x4e]
+; CHECK: cmlt.2d	v8, v14, #0             ; encoding: [0xc8,0xa9,0xe0,0x4e]
+
+
+;===-------------------------------------------------------------------------===
+; AdvSIMD Floating-point <-> Integer Conversions
+;===-------------------------------------------------------------------------===
+
+  fcvtas.2s   v0, v0
+  fcvtas.4s   v0, v0
+  fcvtas.2d   v0, v0
+  fcvtas      s0, s0
+  fcvtas      d0, d0
+
+; CHECK: fcvtas.2s  v0, v0           ; encoding: [0x00,0xc8,0x21,0x0e]
+; CHECK: fcvtas.4s  v0, v0           ; encoding: [0x00,0xc8,0x21,0x4e]
+; CHECK: fcvtas.2d  v0, v0           ; encoding: [0x00,0xc8,0x61,0x4e]
+; CHECK: fcvtas     s0, s0           ; encoding: [0x00,0xc8,0x21,0x5e]
+; CHECK: fcvtas     d0, d0           ; encoding: [0x00,0xc8,0x61,0x5e]
+
+  fcvtau.2s   v0, v0
+  fcvtau.4s   v0, v0
+  fcvtau.2d   v0, v0
+  fcvtau      s0, s0
+  fcvtau      d0, d0
+
+; CHECK: fcvtau.2s  v0, v0           ; encoding: [0x00,0xc8,0x21,0x2e]
+; CHECK: fcvtau.4s  v0, v0           ; encoding: [0x00,0xc8,0x21,0x6e]
+; CHECK: fcvtau.2d  v0, v0           ; encoding: [0x00,0xc8,0x61,0x6e]
+; CHECK: fcvtau     s0, s0           ; encoding: [0x00,0xc8,0x21,0x7e]
+; CHECK: fcvtau     d0, d0           ; encoding: [0x00,0xc8,0x61,0x7e]
+
+  fcvtl   v1.4s, v5.4h
+  fcvtl   v2.2d, v6.2s
+  fcvtl2  v3.4s, v7.8h
+  fcvtl2  v4.2d, v8.4s
+
+; CHECK: fcvtl	v1.4s, v5.4h            ; encoding: [0xa1,0x78,0x21,0x0e]
+; CHECK: fcvtl	v2.2d, v6.2s            ; encoding: [0xc2,0x78,0x61,0x0e]
+; CHECK: fcvtl2	v3.4s, v7.8h            ; encoding: [0xe3,0x78,0x21,0x4e]
+; CHECK: fcvtl2	v4.2d, v8.4s            ; encoding: [0x04,0x79,0x61,0x4e]
+
+  fcvtms.2s  v0, v0
+  fcvtms.4s  v0, v0
+  fcvtms.2d  v0, v0
+  fcvtms     s0, s0
+  fcvtms     d0, d0
+
+; CHECK: fcvtms.2s v0, v0            ; encoding: [0x00,0xb8,0x21,0x0e]
+; CHECK: fcvtms.4s v0, v0            ; encoding: [0x00,0xb8,0x21,0x4e]
+; CHECK: fcvtms.2d v0, v0            ; encoding: [0x00,0xb8,0x61,0x4e]
+; CHECK: fcvtms    s0, s0            ; encoding: [0x00,0xb8,0x21,0x5e]
+; CHECK: fcvtms    d0, d0            ; encoding: [0x00,0xb8,0x61,0x5e]
+
+  fcvtmu.2s   v0, v0
+  fcvtmu.4s   v0, v0
+  fcvtmu.2d   v0, v0
+  fcvtmu      s0, s0
+  fcvtmu      d0, d0
+
+; CHECK: fcvtmu.2s v0, v0            ; encoding: [0x00,0xb8,0x21,0x2e]
+; CHECK: fcvtmu.4s v0, v0            ; encoding: [0x00,0xb8,0x21,0x6e]
+; CHECK: fcvtmu.2d v0, v0            ; encoding: [0x00,0xb8,0x61,0x6e]
+; CHECK: fcvtmu    s0, s0            ; encoding: [0x00,0xb8,0x21,0x7e]
+; CHECK: fcvtmu    d0, d0            ; encoding: [0x00,0xb8,0x61,0x7e]
+
+  fcvtns.2s   v0, v0
+  fcvtns.4s   v0, v0
+  fcvtns.2d   v0, v0
+  fcvtns      s0, s0
+  fcvtns      d0, d0
+
+; CHECK: fcvtns.2s v0, v0            ; encoding: [0x00,0xa8,0x21,0x0e]
+; CHECK: fcvtns.4s v0, v0            ; encoding: [0x00,0xa8,0x21,0x4e]
+; CHECK: fcvtns.2d v0, v0            ; encoding: [0x00,0xa8,0x61,0x4e]
+; CHECK: fcvtns    s0, s0            ; encoding: [0x00,0xa8,0x21,0x5e]
+; CHECK: fcvtns    d0, d0            ; encoding: [0x00,0xa8,0x61,0x5e]
+
+  fcvtnu.2s   v0, v0
+  fcvtnu.4s   v0, v0
+  fcvtnu.2d   v0, v0
+  fcvtnu      s0, s0
+  fcvtnu      d0, d0
+
+; CHECK: fcvtnu.2s v0, v0            ; encoding: [0x00,0xa8,0x21,0x2e]
+; CHECK: fcvtnu.4s v0, v0            ; encoding: [0x00,0xa8,0x21,0x6e]
+; CHECK: fcvtnu.2d v0, v0            ; encoding: [0x00,0xa8,0x61,0x6e]
+; CHECK: fcvtnu    s0, s0            ; encoding: [0x00,0xa8,0x21,0x7e]
+; CHECK: fcvtnu    d0, d0            ; encoding: [0x00,0xa8,0x61,0x7e]
+
+  fcvtn   v2.4h, v4.4s
+  fcvtn   v3.2s, v5.2d
+  fcvtn2  v4.8h, v6.4s
+  fcvtn2  v5.4s, v7.2d
+  fcvtxn  v6.2s, v9.2d
+  fcvtxn2 v7.4s, v8.2d
+
+; CHECK: fcvtn	v2.4h, v4.4s            ; encoding: [0x82,0x68,0x21,0x0e]
+; CHECK: fcvtn	v3.2s, v5.2d            ; encoding: [0xa3,0x68,0x61,0x0e]
+; CHECK: fcvtn2	v4.8h, v6.4s            ; encoding: [0xc4,0x68,0x21,0x4e]
+; CHECK: fcvtn2	v5.4s, v7.2d            ; encoding: [0xe5,0x68,0x61,0x4e]
+; CHECK: fcvtxn	v6.2s, v9.2d            ; encoding: [0x26,0x69,0x61,0x2e]
+; CHECK: fcvtxn2 v7.4s, v8.2d           ; encoding: [0x07,0x69,0x61,0x6e]
+
+  fcvtps.2s  v0, v0
+  fcvtps.4s  v0, v0
+  fcvtps.2d  v0, v0
+  fcvtps     s0, s0
+  fcvtps     d0, d0
+
+; CHECK: fcvtps.2s v0, v0            ; encoding: [0x00,0xa8,0xa1,0x0e]
+; CHECK: fcvtps.4s v0, v0            ; encoding: [0x00,0xa8,0xa1,0x4e]
+; CHECK: fcvtps.2d v0, v0            ; encoding: [0x00,0xa8,0xe1,0x4e]
+; CHECK: fcvtps    s0, s0            ; encoding: [0x00,0xa8,0xa1,0x5e]
+; CHECK: fcvtps    d0, d0            ; encoding: [0x00,0xa8,0xe1,0x5e]
+
+  fcvtpu.2s  v0, v0
+  fcvtpu.4s  v0, v0
+  fcvtpu.2d  v0, v0
+  fcvtpu     s0, s0
+  fcvtpu     d0, d0
+
+; CHECK: fcvtpu.2s v0, v0            ; encoding: [0x00,0xa8,0xa1,0x2e]
+; CHECK: fcvtpu.4s v0, v0            ; encoding: [0x00,0xa8,0xa1,0x6e]
+; CHECK: fcvtpu.2d v0, v0            ; encoding: [0x00,0xa8,0xe1,0x6e]
+; CHECK: fcvtpu    s0, s0            ; encoding: [0x00,0xa8,0xa1,0x7e]
+; CHECK: fcvtpu    d0, d0            ; encoding: [0x00,0xa8,0xe1,0x7e]
+
+  fcvtzs.2s  v0, v0
+  fcvtzs.4s  v0, v0
+  fcvtzs.2d  v0, v0
+  fcvtzs     s0, s0
+  fcvtzs     d0, d0
+
+; CHECK: fcvtzs.2s v0, v0            ; encoding: [0x00,0xb8,0xa1,0x0e]
+; CHECK: fcvtzs.4s v0, v0            ; encoding: [0x00,0xb8,0xa1,0x4e]
+; CHECK: fcvtzs.2d v0, v0            ; encoding: [0x00,0xb8,0xe1,0x4e]
+; CHECK: fcvtzs    s0, s0            ; encoding: [0x00,0xb8,0xa1,0x5e]
+; CHECK: fcvtzs    d0, d0            ; encoding: [0x00,0xb8,0xe1,0x5e]
+
+  fcvtzu.2s  v0, v0
+  fcvtzu.4s  v0, v0
+  fcvtzu.2d  v0, v0
+  fcvtzu     s0, s0
+  fcvtzu     d0, d0
+
+; CHECK: fcvtzu.2s v0, v0            ; encoding: [0x00,0xb8,0xa1,0x2e]
+; CHECK: fcvtzu.4s v0, v0            ; encoding: [0x00,0xb8,0xa1,0x6e]
+; CHECK: fcvtzu.2d v0, v0            ; encoding: [0x00,0xb8,0xe1,0x6e]
+; CHECK: fcvtzu    s0, s0            ; encoding: [0x00,0xb8,0xa1,0x7e]
+; CHECK: fcvtzu    d0, d0            ; encoding: [0x00,0xb8,0xe1,0x7e]
+
+;===-------------------------------------------------------------------------===
+; AdvSIMD modified immediate instructions
+;===-------------------------------------------------------------------------===
+
+  bic.2s  v0, #1
+  bic.2s  v0, #1, lsl #0
+  bic.2s  v0, #1, lsl #8
+  bic.2s  v0, #1, lsl #16
+  bic.2s  v0, #1, lsl #24
+
+; CHECK: bic.2s v0, #0x1               ; encoding: [0x20,0x14,0x00,0x2f]
+; CHECK: bic.2s v0, #0x1               ; encoding: [0x20,0x14,0x00,0x2f]
+; CHECK: bic.2s v0, #0x1, lsl #8       ; encoding: [0x20,0x34,0x00,0x2f]
+; CHECK: bic.2s v0, #0x1, lsl #16      ; encoding: [0x20,0x54,0x00,0x2f]
+; CHECK: bic.2s v0, #0x1, lsl #24      ; encoding: [0x20,0x74,0x00,0x2f]
+
+  bic.4h  v0, #1
+  bic.4h  v0, #1, lsl #0
+  bic.4h  v0, #1, lsl #8
+
+; CHECK: bic.4h v0, #0x1               ; encoding: [0x20,0x94,0x00,0x2f]
+; CHECK: bic.4h v0, #0x1               ; encoding: [0x20,0x94,0x00,0x2f]
+; CHECK: bic.4h v0, #0x1, lsl #8       ; encoding: [0x20,0xb4,0x00,0x2f]
+
+  bic.4s  v0, #1
+  bic.4s  v0, #1, lsl #0
+  bic.4s  v0, #1, lsl #8
+  bic.4s  v0, #1, lsl #16
+  bic.4s  v0, #1, lsl #24
+
+; CHECK: bic.4s v0, #0x1               ; encoding: [0x20,0x14,0x00,0x6f]
+; CHECK: bic.4s v0, #0x1               ; encoding: [0x20,0x14,0x00,0x6f]
+; CHECK: bic.4s v0, #0x1, lsl #8       ; encoding: [0x20,0x34,0x00,0x6f]
+; CHECK: bic.4s v0, #0x1, lsl #16      ; encoding: [0x20,0x54,0x00,0x6f]
+; CHECK: bic.4s v0, #0x1, lsl #24      ; encoding: [0x20,0x74,0x00,0x6f]
+
+  bic.8h  v0, #1
+  bic.8h  v0, #1, lsl #0
+  bic.8h  v0, #1, lsl #8
+
+; CHECK: bic.8h v0, #0x1               ; encoding: [0x20,0x94,0x00,0x6f]
+; CHECK: bic.8h v0, #0x1               ; encoding: [0x20,0x94,0x00,0x6f]
+; CHECK: bic.8h v0, #0x1, lsl #8       ; encoding: [0x20,0xb4,0x00,0x6f]
+
+  fmov.2d v0, #1.250000e-01
+
+; CHECK: fmov.2d v0, #0.12500000             ; encoding: [0x00,0xf4,0x02,0x6f]
+
+  fmov.2s v0, #1.250000e-01
+  fmov.4s v0, #1.250000e-01
+
+; CHECK: fmov.2s v0, #0.12500000             ; encoding: [0x00,0xf4,0x02,0x0f]
+; CHECK: fmov.4s v0, #0.12500000             ; encoding: [0x00,0xf4,0x02,0x4f]
+
+  orr.2s  v0, #1
+  orr.2s  v0, #1, lsl #0
+  orr.2s  v0, #1, lsl #8
+  orr.2s  v0, #1, lsl #16
+  orr.2s  v0, #1, lsl #24
+
+; CHECK: orr.2s v0, #0x1               ; encoding: [0x20,0x14,0x00,0x0f]
+; CHECK: orr.2s v0, #0x1               ; encoding: [0x20,0x14,0x00,0x0f]
+; CHECK: orr.2s v0, #0x1, lsl #8       ; encoding: [0x20,0x34,0x00,0x0f]
+; CHECK: orr.2s v0, #0x1, lsl #16      ; encoding: [0x20,0x54,0x00,0x0f]
+; CHECK: orr.2s v0, #0x1, lsl #24      ; encoding: [0x20,0x74,0x00,0x0f]
+
+  orr.4h  v0, #1
+  orr.4h  v0, #1, lsl #0
+  orr.4h  v0, #1, lsl #8
+
+; CHECK: orr.4h v0, #0x1               ; encoding: [0x20,0x94,0x00,0x0f]
+; CHECK: orr.4h v0, #0x1               ; encoding: [0x20,0x94,0x00,0x0f]
+; CHECK: orr.4h v0, #0x1, lsl #8       ; encoding: [0x20,0xb4,0x00,0x0f]
+
+  orr.4s  v0, #1
+  orr.4s  v0, #1, lsl #0
+  orr.4s  v0, #1, lsl #8
+  orr.4s  v0, #1, lsl #16
+  orr.4s  v0, #1, lsl #24
+
+; CHECK: orr.4s v0, #0x1               ; encoding: [0x20,0x14,0x00,0x4f]
+; CHECK: orr.4s v0, #0x1               ; encoding: [0x20,0x14,0x00,0x4f]
+; CHECK: orr.4s v0, #0x1, lsl #8       ; encoding: [0x20,0x34,0x00,0x4f]
+; CHECK: orr.4s v0, #0x1, lsl #16      ; encoding: [0x20,0x54,0x00,0x4f]
+; CHECK: orr.4s v0, #0x1, lsl #24      ; encoding: [0x20,0x74,0x00,0x4f]
+
+  orr.8h  v0, #1
+  orr.8h  v0, #1, lsl #0
+  orr.8h  v0, #1, lsl #8
+
+; CHECK: orr.8h v0, #0x1               ; encoding: [0x20,0x94,0x00,0x4f]
+; CHECK: orr.8h v0, #0x1               ; encoding: [0x20,0x94,0x00,0x4f]
+; CHECK: orr.8h v0, #0x1, lsl #8       ; encoding: [0x20,0xb4,0x00,0x4f]
+
+  movi     d0, #0x000000000000ff
+  movi.2d  v0, #0x000000000000ff
+
+; CHECK: movi     d0, #0x000000000000ff ; encoding: [0x20,0xe4,0x00,0x2f]
+; CHECK: movi.2d  v0, #0x000000000000ff ; encoding: [0x20,0xe4,0x00,0x6f]
+
+  movi.2s v0, #1
+  movi.2s v0, #1, lsl #0
+  movi.2s v0, #1, lsl #8
+  movi.2s v0, #1, lsl #16
+  movi.2s v0, #1, lsl #24
+
+; CHECK: movi.2s v0, #0x1              ; encoding: [0x20,0x04,0x00,0x0f]
+; CHECK: movi.2s v0, #0x1              ; encoding: [0x20,0x04,0x00,0x0f]
+; CHECK: movi.2s v0, #0x1, lsl #8      ; encoding: [0x20,0x24,0x00,0x0f]
+; CHECK: movi.2s v0, #0x1, lsl #16     ; encoding: [0x20,0x44,0x00,0x0f]
+; CHECK: movi.2s v0, #0x1, lsl #24     ; encoding: [0x20,0x64,0x00,0x0f]
+
+  movi.4s v0, #1
+  movi.4s v0, #1, lsl #0
+  movi.4s v0, #1, lsl #8
+  movi.4s v0, #1, lsl #16
+  movi.4s v0, #1, lsl #24
+
+; CHECK: movi.4s v0, #0x1              ; encoding: [0x20,0x04,0x00,0x4f]
+; CHECK: movi.4s v0, #0x1              ; encoding: [0x20,0x04,0x00,0x4f]
+; CHECK: movi.4s v0, #0x1, lsl #8      ; encoding: [0x20,0x24,0x00,0x4f]
+; CHECK: movi.4s v0, #0x1, lsl #16     ; encoding: [0x20,0x44,0x00,0x4f]
+; CHECK: movi.4s v0, #0x1, lsl #24     ; encoding: [0x20,0x64,0x00,0x4f]
+
+  movi.4h v0, #1
+  movi.4h v0, #1, lsl #0
+  movi.4h v0, #1, lsl #8
+
+; CHECK: movi.4h v0, #0x1              ; encoding: [0x20,0x84,0x00,0x0f]
+; CHECK: movi.4h v0, #0x1              ; encoding: [0x20,0x84,0x00,0x0f]
+; CHECK: movi.4h v0, #0x1, lsl #8      ; encoding: [0x20,0xa4,0x00,0x0f]
+
+  movi.8h v0, #1
+  movi.8h v0, #1, lsl #0
+  movi.8h v0, #1, lsl #8
+
+; CHECK: movi.8h v0, #0x1              ; encoding: [0x20,0x84,0x00,0x4f]
+; CHECK: movi.8h v0, #0x1              ; encoding: [0x20,0x84,0x00,0x4f]
+; CHECK: movi.8h v0, #0x1, lsl #8      ; encoding: [0x20,0xa4,0x00,0x4f]
+
+  movi.2s v0, #1, msl #8
+  movi.2s v0, #1, msl #16
+  movi.4s v0, #1, msl #8
+  movi.4s v0, #1, msl #16
+
+; CHECK: movi.2s v0, #0x1, msl #8      ; encoding: [0x20,0xc4,0x00,0x0f]
+; CHECK: movi.2s v0, #0x1, msl #16     ; encoding: [0x20,0xd4,0x00,0x0f]
+; CHECK: movi.4s v0, #0x1, msl #8      ; encoding: [0x20,0xc4,0x00,0x4f]
+; CHECK: movi.4s v0, #0x1, msl #16     ; encoding: [0x20,0xd4,0x00,0x4f]
+
+  movi.8b  v0, #1
+  movi.16b v0, #1
+
+; CHECK: movi.8b  v0, #0x1             ; encoding: [0x20,0xe4,0x00,0x0f]
+; CHECK: movi.16b v0, #0x1             ; encoding: [0x20,0xe4,0x00,0x4f]
+
+  mvni.2s v0, #1
+  mvni.2s v0, #1, lsl #0
+  mvni.2s v0, #1, lsl #8
+  mvni.2s v0, #1, lsl #16
+  mvni.2s v0, #1, lsl #24
+
+; CHECK: mvni.2s v0, #0x1              ; encoding: [0x20,0x04,0x00,0x2f]
+; CHECK: mvni.2s v0, #0x1              ; encoding: [0x20,0x04,0x00,0x2f]
+; CHECK: mvni.2s v0, #0x1, lsl #8      ; encoding: [0x20,0x24,0x00,0x2f]
+; CHECK: mvni.2s v0, #0x1, lsl #16     ; encoding: [0x20,0x44,0x00,0x2f]
+; CHECK: mvni.2s v0, #0x1, lsl #24     ; encoding: [0x20,0x64,0x00,0x2f]
+
+  mvni.4s v0, #1
+  mvni.4s v0, #1, lsl #0
+  mvni.4s v0, #1, lsl #8
+  mvni.4s v0, #1, lsl #16
+  mvni.4s v0, #1, lsl #24
+
+; CHECK: mvni.4s v0, #0x1              ; encoding: [0x20,0x04,0x00,0x6f]
+; CHECK: mvni.4s v0, #0x1              ; encoding: [0x20,0x04,0x00,0x6f]
+; CHECK: mvni.4s v0, #0x1, lsl #8      ; encoding: [0x20,0x24,0x00,0x6f]
+; CHECK: mvni.4s v0, #0x1, lsl #16     ; encoding: [0x20,0x44,0x00,0x6f]
+; CHECK: mvni.4s v0, #0x1, lsl #24     ; encoding: [0x20,0x64,0x00,0x6f]
+
+  mvni.4h v0, #1
+  mvni.4h v0, #1, lsl #0
+  mvni.4h v0, #1, lsl #8
+
+; CHECK: mvni.4h v0, #0x1              ; encoding: [0x20,0x84,0x00,0x2f]
+; CHECK: mvni.4h v0, #0x1              ; encoding: [0x20,0x84,0x00,0x2f]
+; CHECK: mvni.4h v0, #0x1, lsl #8      ; encoding: [0x20,0xa4,0x00,0x2f]
+
+  mvni.8h v0, #1
+  mvni.8h v0, #1, lsl #0
+  mvni.8h v0, #1, lsl #8
+
+; CHECK: mvni.8h v0, #0x1              ; encoding: [0x20,0x84,0x00,0x6f]
+; CHECK: mvni.8h v0, #0x1              ; encoding: [0x20,0x84,0x00,0x6f]
+; CHECK: mvni.8h v0, #0x1, lsl #8      ; encoding: [0x20,0xa4,0x00,0x6f]
+
+  mvni.2s v0, #1, msl #8
+  mvni.2s v0, #1, msl #16
+  mvni.4s v0, #1, msl #8
+  mvni.4s v0, #1, msl #16
+
+; CHECK: mvni.2s v0, #0x1, msl #8      ; encoding: [0x20,0xc4,0x00,0x2f]
+; CHECK: mvni.2s v0, #0x1, msl #16     ; encoding: [0x20,0xd4,0x00,0x2f]
+; CHECK: mvni.4s v0, #0x1, msl #8      ; encoding: [0x20,0xc4,0x00,0x6f]
+; CHECK: mvni.4s v0, #0x1, msl #16     ; encoding: [0x20,0xd4,0x00,0x6f]
+
+;===-------------------------------------------------------------------------===
+; AdvSIMD scalar x index
+;===-------------------------------------------------------------------------===
+
+  fmla.s  s0, s0, v0[3]
+  fmla.d  d0, d0, v0[1]
+  fmls.s  s0, s0, v0[3]
+  fmls.d  d0, d0, v0[1]
+  fmulx.s s0, s0, v0[3]
+  fmulx.d d0, d0, v0[1]
+  fmul.s  s0, s0, v0[3]
+  fmul.d  d0, d0, v0[1]
+  sqdmlal.h s0, h0, v0[7]
+  sqdmlal.s d0, s0, v0[3]
+  sqdmlsl.h s0, h0, v0[7]
+  sqdmulh.h h0, h0, v0[7]
+  sqdmulh.s s0, s0, v0[3]
+  sqdmull.h s0, h0, v0[7]
+  sqdmull.s d0, s0, v0[3]
+  sqrdmulh.h  h0, h0, v0[7]
+  sqrdmulh.s  s0, s0, v0[3]
+
+; CHECK: fmla.s	s0, s0, v0[3]           ; encoding: [0x00,0x18,0xa0,0x5f]
+; CHECK: fmla.d	d0, d0, v0[1]           ; encoding: [0x00,0x18,0xc0,0x5f]
+; CHECK: fmls.s	s0, s0, v0[3]           ; encoding: [0x00,0x58,0xa0,0x5f]
+; CHECK: fmls.d	d0, d0, v0[1]           ; encoding: [0x00,0x58,0xc0,0x5f]
+; CHECK: fmulx.s	s0, s0, v0[3]           ; encoding: [0x00,0x98,0xa0,0x7f]
+; CHECK: fmulx.d	d0, d0, v0[1]           ; encoding: [0x00,0x98,0xc0,0x7f]
+; CHECK: fmul.s	s0, s0, v0[3]           ; encoding: [0x00,0x98,0xa0,0x5f]
+; CHECK: fmul.d	d0, d0, v0[1]           ; encoding: [0x00,0x98,0xc0,0x5f]
+; CHECK: sqdmlal.h	s0, h0, v0[7]   ; encoding: [0x00,0x38,0x70,0x5f]
+; CHECK: sqdmlal.s	d0, s0, v0[3]   ; encoding: [0x00,0x38,0xa0,0x5f]
+; CHECK: sqdmlsl.h	s0, h0, v0[7]   ; encoding: [0x00,0x78,0x70,0x5f]
+; CHECK: sqdmulh.h	h0, h0, v0[7]   ; encoding: [0x00,0xc8,0x70,0x5f]
+; CHECK: sqdmulh.s	s0, s0, v0[3]   ; encoding: [0x00,0xc8,0xa0,0x5f]
+; CHECK: sqdmull.h	s0, h0, v0[7]   ; encoding: [0x00,0xb8,0x70,0x5f]
+; CHECK: sqdmull.s	d0, s0, v0[3]   ; encoding: [0x00,0xb8,0xa0,0x5f]
+; CHECK: sqrdmulh.h	h0, h0, v0[7]   ; encoding: [0x00,0xd8,0x70,0x5f]
+; CHECK: sqrdmulh.s	s0, s0, v0[3]   ; encoding: [0x00,0xd8,0xa0,0x5f]
+
+;===-------------------------------------------------------------------------===
+; AdvSIMD SMLAL
+;===-------------------------------------------------------------------------===
+        smlal.8h v1, v2, v3
+        smlal.4s v1, v2, v3
+        smlal.2d v1, v2, v3
+        smlal2.8h v1, v2, v3
+        smlal2.4s v1, v2, v3
+        smlal2.2d v1, v2, v3
+
+        smlal v13.8h, v8.8b, v0.8b
+        smlal v13.4s, v8.4h, v0.4h
+        smlal v13.2d, v8.2s, v0.2s
+        smlal2 v13.8h, v8.16b, v0.16b
+        smlal2 v13.4s, v8.8h, v0.8h
+        smlal2 v13.2d, v8.4s, v0.4s
+
+; CHECK: smlal.8h	v1, v2, v3      ; encoding: [0x41,0x80,0x23,0x0e]
+; CHECK: smlal.4s	v1, v2, v3      ; encoding: [0x41,0x80,0x63,0x0e]
+; CHECK: smlal.2d	v1, v2, v3      ; encoding: [0x41,0x80,0xa3,0x0e]
+; CHECK: smlal2.8h	v1, v2, v3      ; encoding: [0x41,0x80,0x23,0x4e]
+; CHECK: smlal2.4s	v1, v2, v3      ; encoding: [0x41,0x80,0x63,0x4e]
+; CHECK: smlal2.2d	v1, v2, v3      ; encoding: [0x41,0x80,0xa3,0x4e]
+; CHECK: smlal.8h	v13, v8, v0     ; encoding: [0x0d,0x81,0x20,0x0e]
+; CHECK: smlal.4s	v13, v8, v0     ; encoding: [0x0d,0x81,0x60,0x0e]
+; CHECK: smlal.2d	v13, v8, v0     ; encoding: [0x0d,0x81,0xa0,0x0e]
+; CHECK: smlal2.8h	v13, v8, v0     ; encoding: [0x0d,0x81,0x20,0x4e]
+; CHECK: smlal2.4s	v13, v8, v0     ; encoding: [0x0d,0x81,0x60,0x4e]
+; CHECK: smlal2.2d	v13, v8, v0     ; encoding: [0x0d,0x81,0xa0,0x4e]
+
+
+;===-------------------------------------------------------------------------===
+; AdvSIMD scalar x index
+;===-------------------------------------------------------------------------===
+
+  fmla.2s v0, v0, v0[0]
+  fmla.4s v0, v0, v0[1]
+  fmla.2d v0, v0, v0[1]
+  fmls.2s v0, v0, v0[0]
+  fmls.4s v0, v0, v0[1]
+  fmls.2d v0, v0, v0[1]
+  fmulx.2s  v0, v0, v0[0]
+  fmulx.4s  v0, v0, v0[1]
+  fmulx.2d  v0, v0, v0[1]
+  fmul.2s v0, v0, v0[0]
+  fmul.4s v0, v0, v0[1]
+  fmul.2d v0, v0, v0[1]
+  mla.4h  v0, v0, v0[0]
+  mla.8h  v0, v0, v0[1]
+  mla.2s  v0, v0, v0[2]
+  mla.4s  v0, v0, v0[3]
+  mls.4h  v0, v0, v0[0]
+  mls.8h  v0, v0, v0[1]
+  mls.2s  v0, v0, v0[2]
+  mls.4s  v0, v0, v0[3]
+  mul.4h  v0, v0, v0[0]
+  mul.8h  v0, v0, v0[1]
+  mul.2s  v0, v0, v0[2]
+  mul.4s  v0, v0, v0[3]
+  smlal.4s  v0, v0, v0[0]
+  smlal2.4s v0, v0, v0[1]
+  smlal.2d  v0, v0, v0[2]
+  smlal2.2d v0, v0, v0[3]
+  smlsl.4s  v0, v0, v0[0]
+  smlsl2.4s v0, v0, v0[1]
+  smlsl.2d  v0, v0, v0[2]
+  smlsl2.2d v0, v0, v0[3]
+  smull.4s  v0, v0, v0[0]
+  smull2.4s v0, v0, v0[1]
+  smull.2d  v0, v0, v0[2]
+  smull2.2d v0, v0, v0[3]
+  sqdmlal.4s  v0, v0, v0[0]
+  sqdmlal2.4s v0, v0, v0[1]
+  sqdmlal.2d  v0, v0, v0[2]
+  sqdmlal2.2d v0, v0, v0[3]
+  sqdmlsl.4s  v0, v0, v0[0]
+  sqdmlsl2.4s v0, v0, v0[1]
+  sqdmlsl.2d  v0, v0, v0[2]
+  sqdmlsl2.2d v0, v0, v0[3]
+  sqdmulh.4h  v0, v0, v0[0]
+  sqdmulh.8h  v0, v0, v0[1]
+  sqdmulh.2s  v0, v0, v0[2]
+  sqdmulh.4s  v0, v0, v0[3]
+  sqdmull.4s  v0, v0, v0[0]
+  sqdmull2.4s v0, v0, v0[1]
+  sqdmull.2d  v0, v0, v0[2]
+  sqdmull2.2d v0, v0, v0[3]
+  sqrdmulh.4h v0, v0, v0[0]
+  sqrdmulh.8h v0, v0, v0[1]
+  sqrdmulh.2s v0, v0, v0[2]
+  sqrdmulh.4s v0, v0, v0[3]
+  umlal.4s  v0, v0, v0[0]
+  umlal2.4s v0, v0, v0[1]
+  umlal.2d  v0, v0, v0[2]
+  umlal2.2d v0, v0, v0[3]
+  umlsl.4s  v0, v0, v0[0]
+  umlsl2.4s v0, v0, v0[1]
+  umlsl.2d  v0, v0, v0[2]
+  umlsl2.2d v0, v0, v0[3]
+  umull.4s  v0, v0, v0[0]
+  umull2.4s v0, v0, v0[1]
+  umull.2d  v0, v0, v0[2]
+  umull2.2d v0, v0, v0[3]
+
+; CHECK: fmla.2s	v0, v0, v0[0]           ; encoding: [0x00,0x10,0x80,0x0f]
+; CHECK: fmla.4s	v0, v0, v0[1]           ; encoding: [0x00,0x10,0xa0,0x4f]
+; CHECK: fmla.2d	v0, v0, v0[1]           ; encoding: [0x00,0x18,0xc0,0x4f]
+; CHECK: fmls.2s	v0, v0, v0[0]           ; encoding: [0x00,0x50,0x80,0x0f]
+; CHECK: fmls.4s	v0, v0, v0[1]           ; encoding: [0x00,0x50,0xa0,0x4f]
+; CHECK: fmls.2d	v0, v0, v0[1]           ; encoding: [0x00,0x58,0xc0,0x4f]
+; CHECK: fmulx.2s	v0, v0, v0[0]   ; encoding: [0x00,0x90,0x80,0x2f]
+; CHECK: fmulx.4s	v0, v0, v0[1]   ; encoding: [0x00,0x90,0xa0,0x6f]
+; CHECK: fmulx.2d	v0, v0, v0[1]   ; encoding: [0x00,0x98,0xc0,0x6f]
+; CHECK: fmul.2s	v0, v0, v0[0]           ; encoding: [0x00,0x90,0x80,0x0f]
+; CHECK: fmul.4s	v0, v0, v0[1]           ; encoding: [0x00,0x90,0xa0,0x4f]
+; CHECK: fmul.2d	v0, v0, v0[1]           ; encoding: [0x00,0x98,0xc0,0x4f]
+; CHECK: mla.4h	v0, v0, v0[0]           ; encoding: [0x00,0x00,0x40,0x2f]
+; CHECK: mla.8h	v0, v0, v0[1]           ; encoding: [0x00,0x00,0x50,0x6f]
+; CHECK: mla.2s	v0, v0, v0[2]           ; encoding: [0x00,0x08,0x80,0x2f]
+; CHECK: mla.4s	v0, v0, v0[3]           ; encoding: [0x00,0x08,0xa0,0x6f]
+; CHECK: mls.4h	v0, v0, v0[0]           ; encoding: [0x00,0x40,0x40,0x2f]
+; CHECK: mls.8h	v0, v0, v0[1]           ; encoding: [0x00,0x40,0x50,0x6f]
+; CHECK: mls.2s	v0, v0, v0[2]           ; encoding: [0x00,0x48,0x80,0x2f]
+; CHECK: mls.4s	v0, v0, v0[3]           ; encoding: [0x00,0x48,0xa0,0x6f]
+; CHECK: mul.4h	v0, v0, v0[0]           ; encoding: [0x00,0x80,0x40,0x0f]
+; CHECK: mul.8h	v0, v0, v0[1]           ; encoding: [0x00,0x80,0x50,0x4f]
+; CHECK: mul.2s	v0, v0, v0[2]           ; encoding: [0x00,0x88,0x80,0x0f]
+; CHECK: mul.4s	v0, v0, v0[3]           ; encoding: [0x00,0x88,0xa0,0x4f]
+; CHECK: smlal.4s	v0, v0, v0[0]   ; encoding: [0x00,0x20,0x40,0x0f]
+; CHECK: smlal2.4s	v0, v0, v0[1]   ; encoding: [0x00,0x20,0x50,0x4f]
+; CHECK: smlal.2d	v0, v0, v0[2]   ; encoding: [0x00,0x28,0x80,0x0f]
+; CHECK: smlal2.2d	v0, v0, v0[3]   ; encoding: [0x00,0x28,0xa0,0x4f]
+; CHECK: smlsl.4s	v0, v0, v0[0]   ; encoding: [0x00,0x60,0x40,0x0f]
+; CHECK: smlsl2.4s	v0, v0, v0[1]   ; encoding: [0x00,0x60,0x50,0x4f]
+; CHECK: smlsl.2d	v0, v0, v0[2]   ; encoding: [0x00,0x68,0x80,0x0f]
+; CHECK: smlsl2.2d	v0, v0, v0[3]   ; encoding: [0x00,0x68,0xa0,0x4f]
+; CHECK: smull.4s	v0, v0, v0[0]   ; encoding: [0x00,0xa0,0x40,0x0f]
+; CHECK: smull2.4s	v0, v0, v0[1]   ; encoding: [0x00,0xa0,0x50,0x4f]
+; CHECK: smull.2d	v0, v0, v0[2]   ; encoding: [0x00,0xa8,0x80,0x0f]
+; CHECK: smull2.2d	v0, v0, v0[3]   ; encoding: [0x00,0xa8,0xa0,0x4f]
+; CHECK: sqdmlal.4s	v0, v0, v0[0]   ; encoding: [0x00,0x30,0x40,0x0f]
+; CHECK: sqdmlal2.4s	v0, v0, v0[1]   ; encoding: [0x00,0x30,0x50,0x4f]
+; CHECK: sqdmlal.2d	v0, v0, v0[2]   ; encoding: [0x00,0x38,0x80,0x0f]
+; CHECK: sqdmlal2.2d	v0, v0, v0[3]   ; encoding: [0x00,0x38,0xa0,0x4f]
+; CHECK: sqdmlsl.4s	v0, v0, v0[0]   ; encoding: [0x00,0x70,0x40,0x0f]
+; CHECK: sqdmlsl2.4s	v0, v0, v0[1]   ; encoding: [0x00,0x70,0x50,0x4f]
+; CHECK: sqdmlsl.2d	v0, v0, v0[2]   ; encoding: [0x00,0x78,0x80,0x0f]
+; CHECK: sqdmlsl2.2d	v0, v0, v0[3]   ; encoding: [0x00,0x78,0xa0,0x4f]
+; CHECK: sqdmulh.4h	v0, v0, v0[0]   ; encoding: [0x00,0xc0,0x40,0x0f]
+; CHECK: sqdmulh.8h	v0, v0, v0[1]   ; encoding: [0x00,0xc0,0x50,0x4f]
+; CHECK: sqdmulh.2s	v0, v0, v0[2]   ; encoding: [0x00,0xc8,0x80,0x0f]
+; CHECK: sqdmulh.4s	v0, v0, v0[3]   ; encoding: [0x00,0xc8,0xa0,0x4f]
+; CHECK: sqdmull.4s	v0, v0, v0[0]   ; encoding: [0x00,0xb0,0x40,0x0f]
+; CHECK: sqdmull2.4s	v0, v0, v0[1]   ; encoding: [0x00,0xb0,0x50,0x4f]
+; CHECK: sqdmull.2d	v0, v0, v0[2]   ; encoding: [0x00,0xb8,0x80,0x0f]
+; CHECK: sqdmull2.2d	v0, v0, v0[3]   ; encoding: [0x00,0xb8,0xa0,0x4f]
+; CHECK: sqrdmulh.4h	v0, v0, v0[0]   ; encoding: [0x00,0xd0,0x40,0x0f]
+; CHECK: sqrdmulh.8h	v0, v0, v0[1]   ; encoding: [0x00,0xd0,0x50,0x4f]
+; CHECK: sqrdmulh.2s	v0, v0, v0[2]   ; encoding: [0x00,0xd8,0x80,0x0f]
+; CHECK: sqrdmulh.4s	v0, v0, v0[3]   ; encoding: [0x00,0xd8,0xa0,0x4f]
+; CHECK: umlal.4s	v0, v0, v0[0]   ; encoding: [0x00,0x20,0x40,0x2f]
+; CHECK: umlal2.4s	v0, v0, v0[1]   ; encoding: [0x00,0x20,0x50,0x6f]
+; CHECK: umlal.2d	v0, v0, v0[2]   ; encoding: [0x00,0x28,0x80,0x2f]
+; CHECK: umlal2.2d	v0, v0, v0[3]   ; encoding: [0x00,0x28,0xa0,0x6f]
+; CHECK: umlsl.4s	v0, v0, v0[0]   ; encoding: [0x00,0x60,0x40,0x2f]
+; CHECK: umlsl2.4s	v0, v0, v0[1]   ; encoding: [0x00,0x60,0x50,0x6f]
+; CHECK: umlsl.2d	v0, v0, v0[2]   ; encoding: [0x00,0x68,0x80,0x2f]
+; CHECK: umlsl2.2d	v0, v0, v0[3]   ; encoding: [0x00,0x68,0xa0,0x6f]
+; CHECK: umull.4s	v0, v0, v0[0]   ; encoding: [0x00,0xa0,0x40,0x2f]
+; CHECK: umull2.4s	v0, v0, v0[1]   ; encoding: [0x00,0xa0,0x50,0x6f]
+; CHECK: umull.2d	v0, v0, v0[2]   ; encoding: [0x00,0xa8,0x80,0x2f]
+; CHECK: umull2.2d	v0, v0, v0[3]   ; encoding: [0x00,0xa8,0xa0,0x6f]
+
+
+;===-------------------------------------------------------------------------===
+; AdvSIMD scalar with shift
+;===-------------------------------------------------------------------------===
+
+  fcvtzs s0, s0, #1
+  fcvtzs d0, d0, #2
+  fcvtzu s0, s0, #1
+  fcvtzu d0, d0, #2
+  shl    d0, d0, #1
+  sli    d0, d0, #1
+  sqrshrn b0, h0, #1
+  sqrshrn h0, s0, #2
+  sqrshrn s0, d0, #3
+  sqrshrun b0, h0, #1
+  sqrshrun h0, s0, #2
+  sqrshrun s0, d0, #3
+  sqshlu  b0, b0, #1
+  sqshlu  h0, h0, #2
+  sqshlu  s0, s0, #3
+  sqshlu  d0, d0, #4
+  sqshl   b0, b0, #1
+  sqshl   h0, h0, #2
+  sqshl   s0, s0, #3
+  sqshl   d0, d0, #4
+  sqshrn  b0, h0, #1
+  sqshrn  h0, s0, #2
+  sqshrn  s0, d0, #3
+  sqshrun b0, h0, #1
+  sqshrun h0, s0, #2
+  sqshrun s0, d0, #3
+  sri     d0, d0, #1
+  srshr   d0, d0, #1
+  srsra   d0, d0, #1
+  sshr    d0, d0, #1
+  ucvtf   s0, s0, #1
+  ucvtf   d0, d0, #2
+  scvtf   s0, s0, #1
+  scvtf   d0, d0, #2
+  uqrshrn b0, h0, #1
+  uqrshrn h0, s0, #2
+  uqrshrn s0, d0, #3
+  uqshl   b0, b0, #1
+  uqshl   h0, h0, #2
+  uqshl   s0, s0, #3
+  uqshl   d0, d0, #4
+  uqshrn  b0, h0, #1
+  uqshrn  h0, s0, #2
+  uqshrn  s0, d0, #3
+  urshr   d0, d0, #1
+  ursra   d0, d0, #1
+  ushr    d0, d0, #1
+  usra    d0, d0, #1
+
+; CHECK: fcvtzs	s0, s0, #1              ; encoding: [0x00,0xfc,0x3f,0x5f]
+; CHECK: fcvtzs	d0, d0, #2              ; encoding: [0x00,0xfc,0x7e,0x5f]
+; CHECK: fcvtzu	s0, s0, #1              ; encoding: [0x00,0xfc,0x3f,0x7f]
+; CHECK: fcvtzu	d0, d0, #2              ; encoding: [0x00,0xfc,0x7e,0x7f]
+; CHECK: shl	d0, d0, #1              ; encoding: [0x00,0x54,0x41,0x5f]
+; CHECK: sli	d0, d0, #1              ; encoding: [0x00,0x54,0x41,0x7f]
+; CHECK: sqrshrn	b0, h0, #1              ; encoding: [0x00,0x9c,0x0f,0x5f]
+; CHECK: sqrshrn	h0, s0, #2              ; encoding: [0x00,0x9c,0x1e,0x5f]
+; CHECK: sqrshrn	s0, d0, #3              ; encoding: [0x00,0x9c,0x3d,0x5f]
+; CHECK: sqrshrun	b0, h0, #1      ; encoding: [0x00,0x8c,0x0f,0x7f]
+; CHECK: sqrshrun	h0, s0, #2      ; encoding: [0x00,0x8c,0x1e,0x7f]
+; CHECK: sqrshrun	s0, d0, #3      ; encoding: [0x00,0x8c,0x3d,0x7f]
+; CHECK: sqshlu	b0, b0, #1              ; encoding: [0x00,0x64,0x09,0x7f]
+; CHECK: sqshlu	h0, h0, #2              ; encoding: [0x00,0x64,0x12,0x7f]
+; CHECK: sqshlu	s0, s0, #3              ; encoding: [0x00,0x64,0x23,0x7f]
+; CHECK: sqshlu	d0, d0, #4              ; encoding: [0x00,0x64,0x44,0x7f]
+; CHECK: sqshl	b0, b0, #1              ; encoding: [0x00,0x74,0x09,0x5f]
+; CHECK: sqshl	h0, h0, #2              ; encoding: [0x00,0x74,0x12,0x5f]
+; CHECK: sqshl	s0, s0, #3              ; encoding: [0x00,0x74,0x23,0x5f]
+; CHECK: sqshl	d0, d0, #4              ; encoding: [0x00,0x74,0x44,0x5f]
+; CHECK: sqshrn	b0, h0, #1              ; encoding: [0x00,0x94,0x0f,0x5f]
+; CHECK: sqshrn	h0, s0, #2              ; encoding: [0x00,0x94,0x1e,0x5f]
+; CHECK: sqshrn	s0, d0, #3              ; encoding: [0x00,0x94,0x3d,0x5f]
+; CHECK: sqshrun	b0, h0, #1              ; encoding: [0x00,0x84,0x0f,0x7f]
+; CHECK: sqshrun	h0, s0, #2              ; encoding: [0x00,0x84,0x1e,0x7f]
+; CHECK: sqshrun	s0, d0, #3              ; encoding: [0x00,0x84,0x3d,0x7f]
+; CHECK: sri	d0, d0, #1              ; encoding: [0x00,0x44,0x7f,0x7f]
+; CHECK: srshr	d0, d0, #1              ; encoding: [0x00,0x24,0x7f,0x5f]
+; CHECK: srsra	d0, d0, #1              ; encoding: [0x00,0x34,0x7f,0x5f]
+; CHECK: sshr	d0, d0, #1              ; encoding: [0x00,0x04,0x7f,0x5f]
+; CHECK: ucvtf	s0, s0, #1              ; encoding: [0x00,0xe4,0x3f,0x7f]
+; CHECK: ucvtf	d0, d0, #2              ; encoding: [0x00,0xe4,0x7e,0x7f]
+; check: scvtf  s0, s0, #1              ; encoding: [0x00,0xe4,0x3f,0x5f]
+; check: scvtf  d0, d0, #2              ; encoding: [0x00,0xe4,0x7e,0x5f]
+; CHECK: uqrshrn	b0, h0, #1              ; encoding: [0x00,0x9c,0x0f,0x7f]
+; CHECK: uqrshrn	h0, s0, #2              ; encoding: [0x00,0x9c,0x1e,0x7f]
+; CHECK: uqrshrn	s0, d0, #3              ; encoding: [0x00,0x9c,0x3d,0x7f]
+; CHECK: uqshl	b0, b0, #1              ; encoding: [0x00,0x74,0x09,0x7f]
+; CHECK: uqshl	h0, h0, #2              ; encoding: [0x00,0x74,0x12,0x7f]
+; CHECK: uqshl	s0, s0, #3              ; encoding: [0x00,0x74,0x23,0x7f]
+; CHECK: uqshl	d0, d0, #4              ; encoding: [0x00,0x74,0x44,0x7f]
+; CHECK: uqshrn	b0, h0, #1              ; encoding: [0x00,0x94,0x0f,0x7f]
+; CHECK: uqshrn	h0, s0, #2              ; encoding: [0x00,0x94,0x1e,0x7f]
+; CHECK: uqshrn	s0, d0, #3              ; encoding: [0x00,0x94,0x3d,0x7f]
+; CHECK: urshr	d0, d0, #1              ; encoding: [0x00,0x24,0x7f,0x7f]
+; CHECK: ursra	d0, d0, #1              ; encoding: [0x00,0x34,0x7f,0x7f]
+; CHECK: ushr	d0, d0, #1              ; encoding: [0x00,0x04,0x7f,0x7f]
+; CHECK: usra	d0, d0, #1              ; encoding: [0x00,0x14,0x7f,0x7f]
+
+
+;===-------------------------------------------------------------------------===
+; AdvSIMD vector with shift
+;===-------------------------------------------------------------------------===
+
+   fcvtzs.2s v0, v0, #1
+   fcvtzs.4s v0, v0, #2
+   fcvtzs.2d v0, v0, #3
+   fcvtzu.2s v0, v0, #1
+   fcvtzu.4s v0, v0, #2
+   fcvtzu.2d v0, v0, #3
+   rshrn.8b v0, v0, #1
+   rshrn2.16b v0, v0, #2
+   rshrn.4h v0, v0, #3
+   rshrn2.8h v0, v0, #4
+   rshrn.2s v0, v0, #5
+   rshrn2.4s v0, v0, #6
+   scvtf.2s v0, v0, #1
+   scvtf.4s v0, v0, #2
+   scvtf.2d v0, v0, #3
+   shl.8b v0, v0, #1
+   shl.16b v0, v0, #2
+   shl.4h v0, v0, #3
+   shl.8h v0, v0, #4
+   shl.2s v0, v0, #5
+   shl.4s v0, v0, #6
+   shl.2d v0, v0, #7
+   shrn.8b v0, v0, #1
+   shrn2.16b v0, v0, #2
+   shrn.4h v0, v0, #3
+   shrn2.8h v0, v0, #4
+   shrn.2s v0, v0, #5
+   shrn2.4s v0, v0, #6
+   sli.8b v0, v0, #1
+   sli.16b v0, v0, #2
+   sli.4h v0, v0, #3
+   sli.8h v0, v0, #4
+   sli.2s v0, v0, #5
+   sli.4s v0, v0, #6
+   sli.2d v0, v0, #7
+   sqrshrn.8b v0, v0, #1
+   sqrshrn2.16b v0, v0, #2
+   sqrshrn.4h v0, v0, #3
+   sqrshrn2.8h v0, v0, #4
+   sqrshrn.2s v0, v0, #5
+   sqrshrn2.4s v0, v0, #6
+   sqrshrun.8b v0, v0, #1
+   sqrshrun2.16b v0, v0, #2
+   sqrshrun.4h v0, v0, #3
+   sqrshrun2.8h v0, v0, #4
+   sqrshrun.2s v0, v0, #5
+   sqrshrun2.4s v0, v0, #6
+   sqshlu.8b v0, v0, #1
+   sqshlu.16b v0, v0, #2
+   sqshlu.4h v0, v0, #3
+   sqshlu.8h v0, v0, #4
+   sqshlu.2s v0, v0, #5
+   sqshlu.4s v0, v0, #6
+   sqshlu.2d v0, v0, #7
+   sqshl.8b v0, v0, #1
+   sqshl.16b v0, v0, #2
+   sqshl.4h v0, v0, #3
+   sqshl.8h v0, v0, #4
+   sqshl.2s v0, v0, #5
+   sqshl.4s v0, v0, #6
+   sqshl.2d v0, v0, #7
+   sqshrn.8b v0, v0, #1
+   sqshrn2.16b v0, v0, #2
+   sqshrn.4h v0, v0, #3
+   sqshrn2.8h v0, v0, #4
+   sqshrn.2s v0, v0, #5
+   sqshrn2.4s v0, v0, #6
+   sqshrun.8b v0, v0, #1
+   sqshrun2.16b v0, v0, #2
+   sqshrun.4h v0, v0, #3
+   sqshrun2.8h v0, v0, #4
+   sqshrun.2s v0, v0, #5
+   sqshrun2.4s v0, v0, #6
+   sri.8b v0, v0, #1
+   sri.16b v0, v0, #2
+   sri.4h v0, v0, #3
+   sri.8h v0, v0, #4
+   sri.2s v0, v0, #5
+   sri.4s v0, v0, #6
+   sri.2d v0, v0, #7
+   srshr.8b v0, v0, #1
+   srshr.16b v0, v0, #2
+   srshr.4h v0, v0, #3
+   srshr.8h v0, v0, #4
+   srshr.2s v0, v0, #5
+   srshr.4s v0, v0, #6
+   srshr.2d v0, v0, #7
+   srsra.8b v0, v0, #1
+   srsra.16b v0, v0, #2
+   srsra.4h v0, v0, #3
+   srsra.8h v0, v0, #4
+   srsra.2s v0, v0, #5
+   srsra.4s v0, v0, #6
+   srsra.2d v0, v0, #7
+   sshll.8h v0, v0, #1
+   sshll2.8h v0, v0, #2
+   sshll.4s v0, v0, #3
+   sshll2.4s v0, v0, #4
+   sshll.2d v0, v0, #5
+   sshll2.2d v0, v0, #6
+   sshr.8b v0, v0, #1
+   sshr.16b v0, v0, #2
+   sshr.4h v0, v0, #3
+   sshr.8h v0, v0, #4
+   sshr.2s v0, v0, #5
+   sshr.4s v0, v0, #6
+   sshr.2d v0, v0, #7
+   sshr.8b v0, v0, #1
+   ssra.16b v0, v0, #2
+   ssra.4h v0, v0, #3
+   ssra.8h v0, v0, #4
+   ssra.2s v0, v0, #5
+   ssra.4s v0, v0, #6
+   ssra.2d v0, v0, #7
+   ssra d0, d0, #64
+   ucvtf.2s v0, v0, #1
+   ucvtf.4s v0, v0, #2
+   ucvtf.2d v0, v0, #3
+   uqrshrn.8b v0, v0, #1
+   uqrshrn2.16b v0, v0, #2
+   uqrshrn.4h v0, v0, #3
+   uqrshrn2.8h v0, v0, #4
+   uqrshrn.2s v0, v0, #5
+   uqrshrn2.4s v0, v0, #6
+   uqshl.8b v0, v0, #1
+   uqshl.16b v0, v0, #2
+   uqshl.4h v0, v0, #3
+   uqshl.8h v0, v0, #4
+   uqshl.2s v0, v0, #5
+   uqshl.4s v0, v0, #6
+   uqshl.2d v0, v0, #7
+   uqshrn.8b v0, v0, #1
+   uqshrn2.16b v0, v0, #2
+   uqshrn.4h v0, v0, #3
+   uqshrn2.8h v0, v0, #4
+   uqshrn.2s v0, v0, #5
+   uqshrn2.4s v0, v0, #6
+   urshr.8b v0, v0, #1
+   urshr.16b v0, v0, #2
+   urshr.4h v0, v0, #3
+   urshr.8h v0, v0, #4
+   urshr.2s v0, v0, #5
+   urshr.4s v0, v0, #6
+   urshr.2d v0, v0, #7
+   ursra.8b v0, v0, #1
+   ursra.16b v0, v0, #2
+   ursra.4h v0, v0, #3
+   ursra.8h v0, v0, #4
+   ursra.2s v0, v0, #5
+   ursra.4s v0, v0, #6
+   ursra.2d v0, v0, #7
+   ushll.8h v0, v0, #1
+   ushll2.8h v0, v0, #2
+   ushll.4s v0, v0, #3
+   ushll2.4s v0, v0, #4
+   ushll.2d v0, v0, #5
+   ushll2.2d v0, v0, #6
+   ushr.8b v0, v0, #1
+   ushr.16b v0, v0, #2
+   ushr.4h v0, v0, #3
+   ushr.8h v0, v0, #4
+   ushr.2s v0, v0, #5
+   ushr.4s v0, v0, #6
+   ushr.2d v0, v0, #7
+   usra.8b v0, v0, #1
+   usra.16b v0, v0, #2
+   usra.4h v0, v0, #3
+   usra.8h v0, v0, #4
+   usra.2s v0, v0, #5
+   usra.4s v0, v0, #6
+   usra.2d v0, v0, #7
+
+; CHECK: fcvtzs.2s	v0, v0, #1      ; encoding: [0x00,0xfc,0x3f,0x0f]
+; CHECK: fcvtzs.4s	v0, v0, #2      ; encoding: [0x00,0xfc,0x3e,0x4f]
+; CHECK: fcvtzs.2d	v0, v0, #3      ; encoding: [0x00,0xfc,0x7d,0x4f]
+; CHECK: fcvtzu.2s	v0, v0, #1      ; encoding: [0x00,0xfc,0x3f,0x2f]
+; CHECK: fcvtzu.4s	v0, v0, #2      ; encoding: [0x00,0xfc,0x3e,0x6f]
+; CHECK: fcvtzu.2d	v0, v0, #3      ; encoding: [0x00,0xfc,0x7d,0x6f]
+; CHECK: rshrn.8b	v0, v0, #1      ; encoding: [0x00,0x8c,0x0f,0x0f]
+; CHECK: rshrn2.16b	v0, v0, #2      ; encoding: [0x00,0x8c,0x0e,0x4f]
+; CHECK: rshrn.4h	v0, v0, #3      ; encoding: [0x00,0x8c,0x1d,0x0f]
+; CHECK: rshrn2.8h	v0, v0, #4      ; encoding: [0x00,0x8c,0x1c,0x4f]
+; CHECK: rshrn.2s	v0, v0, #5      ; encoding: [0x00,0x8c,0x3b,0x0f]
+; CHECK: rshrn2.4s	v0, v0, #6      ; encoding: [0x00,0x8c,0x3a,0x4f]
+; CHECK: scvtf.2s	v0, v0, #1      ; encoding: [0x00,0xe4,0x3f,0x0f]
+; CHECK: scvtf.4s	v0, v0, #2      ; encoding: [0x00,0xe4,0x3e,0x4f]
+; CHECK: scvtf.2d	v0, v0, #3      ; encoding: [0x00,0xe4,0x7d,0x4f]
+; CHECK: shl.8b	v0, v0, #1              ; encoding: [0x00,0x54,0x09,0x0f]
+; CHECK: shl.16b	v0, v0, #2              ; encoding: [0x00,0x54,0x0a,0x4f]
+; CHECK: shl.4h	v0, v0, #3              ; encoding: [0x00,0x54,0x13,0x0f]
+; CHECK: shl.8h	v0, v0, #4              ; encoding: [0x00,0x54,0x14,0x4f]
+; CHECK: shl.2s	v0, v0, #5              ; encoding: [0x00,0x54,0x25,0x0f]
+; CHECK: shl.4s	v0, v0, #6              ; encoding: [0x00,0x54,0x26,0x4f]
+; CHECK: shl.2d	v0, v0, #7              ; encoding: [0x00,0x54,0x47,0x4f]
+; CHECK: shrn.8b	v0, v0, #1              ; encoding: [0x00,0x84,0x0f,0x0f]
+; CHECK: shrn2.16b	v0, v0, #2      ; encoding: [0x00,0x84,0x0e,0x4f]
+; CHECK: shrn.4h	v0, v0, #3              ; encoding: [0x00,0x84,0x1d,0x0f]
+; CHECK: shrn2.8h	v0, v0, #4      ; encoding: [0x00,0x84,0x1c,0x4f]
+; CHECK: shrn.2s	v0, v0, #5              ; encoding: [0x00,0x84,0x3b,0x0f]
+; CHECK: shrn2.4s	v0, v0, #6      ; encoding: [0x00,0x84,0x3a,0x4f]
+; CHECK: sli.8b	v0, v0, #1              ; encoding: [0x00,0x54,0x09,0x2f]
+; CHECK: sli.16b	v0, v0, #2              ; encoding: [0x00,0x54,0x0a,0x6f]
+; CHECK: sli.4h	v0, v0, #3              ; encoding: [0x00,0x54,0x13,0x2f]
+; CHECK: sli.8h	v0, v0, #4              ; encoding: [0x00,0x54,0x14,0x6f]
+; CHECK: sli.2s	v0, v0, #5              ; encoding: [0x00,0x54,0x25,0x2f]
+; CHECK: sli.4s	v0, v0, #6              ; encoding: [0x00,0x54,0x26,0x6f]
+; CHECK: sli.2d	v0, v0, #7              ; encoding: [0x00,0x54,0x47,0x6f]
+; CHECK: sqrshrn.8b	v0, v0, #1      ; encoding: [0x00,0x9c,0x0f,0x0f]
+; CHECK: sqrshrn2.16b	v0, v0, #2      ; encoding: [0x00,0x9c,0x0e,0x4f]
+; CHECK: sqrshrn.4h	v0, v0, #3      ; encoding: [0x00,0x9c,0x1d,0x0f]
+; CHECK: sqrshrn2.8h	v0, v0, #4      ; encoding: [0x00,0x9c,0x1c,0x4f]
+; CHECK: sqrshrn.2s	v0, v0, #5      ; encoding: [0x00,0x9c,0x3b,0x0f]
+; CHECK: sqrshrn2.4s	v0, v0, #6      ; encoding: [0x00,0x9c,0x3a,0x4f]
+; CHECK: sqrshrun.8b	v0, v0, #1      ; encoding: [0x00,0x8c,0x0f,0x2f]
+; CHECK: sqrshrun2.16b	v0, v0, #2      ; encoding: [0x00,0x8c,0x0e,0x6f]
+; CHECK: sqrshrun.4h	v0, v0, #3      ; encoding: [0x00,0x8c,0x1d,0x2f]
+; CHECK: sqrshrun2.8h	v0, v0, #4      ; encoding: [0x00,0x8c,0x1c,0x6f]
+; CHECK: sqrshrun.2s	v0, v0, #5      ; encoding: [0x00,0x8c,0x3b,0x2f]
+; CHECK: sqrshrun2.4s	v0, v0, #6      ; encoding: [0x00,0x8c,0x3a,0x6f]
+; CHECK: sqshlu.8b	v0, v0, #1      ; encoding: [0x00,0x64,0x09,0x2f]
+; CHECK: sqshlu.16b	v0, v0, #2      ; encoding: [0x00,0x64,0x0a,0x6f]
+; CHECK: sqshlu.4h	v0, v0, #3      ; encoding: [0x00,0x64,0x13,0x2f]
+; CHECK: sqshlu.8h	v0, v0, #4      ; encoding: [0x00,0x64,0x14,0x6f]
+; CHECK: sqshlu.2s	v0, v0, #5      ; encoding: [0x00,0x64,0x25,0x2f]
+; CHECK: sqshlu.4s	v0, v0, #6      ; encoding: [0x00,0x64,0x26,0x6f]
+; CHECK: sqshlu.2d	v0, v0, #7      ; encoding: [0x00,0x64,0x47,0x6f]
+; CHECK: sqshl.8b	v0, v0, #1      ; encoding: [0x00,0x74,0x09,0x0f]
+; CHECK: sqshl.16b	v0, v0, #2      ; encoding: [0x00,0x74,0x0a,0x4f]
+; CHECK: sqshl.4h	v0, v0, #3      ; encoding: [0x00,0x74,0x13,0x0f]
+; CHECK: sqshl.8h	v0, v0, #4      ; encoding: [0x00,0x74,0x14,0x4f]
+; CHECK: sqshl.2s	v0, v0, #5      ; encoding: [0x00,0x74,0x25,0x0f]
+; CHECK: sqshl.4s	v0, v0, #6      ; encoding: [0x00,0x74,0x26,0x4f]
+; CHECK: sqshl.2d	v0, v0, #7      ; encoding: [0x00,0x74,0x47,0x4f]
+; CHECK: sqshrn.8b	v0, v0, #1      ; encoding: [0x00,0x94,0x0f,0x0f]
+; CHECK: sqshrn2.16b	v0, v0, #2      ; encoding: [0x00,0x94,0x0e,0x4f]
+; CHECK: sqshrn.4h	v0, v0, #3      ; encoding: [0x00,0x94,0x1d,0x0f]
+; CHECK: sqshrn2.8h	v0, v0, #4      ; encoding: [0x00,0x94,0x1c,0x4f]
+; CHECK: sqshrn.2s	v0, v0, #5      ; encoding: [0x00,0x94,0x3b,0x0f]
+; CHECK: sqshrn2.4s	v0, v0, #6      ; encoding: [0x00,0x94,0x3a,0x4f]
+; CHECK: sqshrun.8b	v0, v0, #1      ; encoding: [0x00,0x84,0x0f,0x2f]
+; CHECK: sqshrun2.16b	v0, v0, #2      ; encoding: [0x00,0x84,0x0e,0x6f]
+; CHECK: sqshrun.4h	v0, v0, #3      ; encoding: [0x00,0x84,0x1d,0x2f]
+; CHECK: sqshrun2.8h	v0, v0, #4      ; encoding: [0x00,0x84,0x1c,0x6f]
+; CHECK: sqshrun.2s	v0, v0, #5      ; encoding: [0x00,0x84,0x3b,0x2f]
+; CHECK: sqshrun2.4s	v0, v0, #6      ; encoding: [0x00,0x84,0x3a,0x6f]
+; CHECK: sri.8b	v0, v0, #1              ; encoding: [0x00,0x44,0x0f,0x2f]
+; CHECK: sri.16b	v0, v0, #2              ; encoding: [0x00,0x44,0x0e,0x6f]
+; CHECK: sri.4h	v0, v0, #3              ; encoding: [0x00,0x44,0x1d,0x2f]
+; CHECK: sri.8h	v0, v0, #4              ; encoding: [0x00,0x44,0x1c,0x6f]
+; CHECK: sri.2s	v0, v0, #5              ; encoding: [0x00,0x44,0x3b,0x2f]
+; CHECK: sri.4s	v0, v0, #6              ; encoding: [0x00,0x44,0x3a,0x6f]
+; CHECK: sri.2d	v0, v0, #7              ; encoding: [0x00,0x44,0x79,0x6f]
+; CHECK: srshr.8b	v0, v0, #1      ; encoding: [0x00,0x24,0x0f,0x0f]
+; CHECK: srshr.16b	v0, v0, #2      ; encoding: [0x00,0x24,0x0e,0x4f]
+; CHECK: srshr.4h	v0, v0, #3      ; encoding: [0x00,0x24,0x1d,0x0f]
+; CHECK: srshr.8h	v0, v0, #4      ; encoding: [0x00,0x24,0x1c,0x4f]
+; CHECK: srshr.2s	v0, v0, #5      ; encoding: [0x00,0x24,0x3b,0x0f]
+; CHECK: srshr.4s	v0, v0, #6      ; encoding: [0x00,0x24,0x3a,0x4f]
+; CHECK: srshr.2d	v0, v0, #7      ; encoding: [0x00,0x24,0x79,0x4f]
+; CHECK: srsra.8b	v0, v0, #1      ; encoding: [0x00,0x34,0x0f,0x0f]
+; CHECK: srsra.16b	v0, v0, #2      ; encoding: [0x00,0x34,0x0e,0x4f]
+; CHECK: srsra.4h	v0, v0, #3      ; encoding: [0x00,0x34,0x1d,0x0f]
+; CHECK: srsra.8h	v0, v0, #4      ; encoding: [0x00,0x34,0x1c,0x4f]
+; CHECK: srsra.2s	v0, v0, #5      ; encoding: [0x00,0x34,0x3b,0x0f]
+; CHECK: srsra.4s	v0, v0, #6      ; encoding: [0x00,0x34,0x3a,0x4f]
+; CHECK: srsra.2d	v0, v0, #7      ; encoding: [0x00,0x34,0x79,0x4f]
+; CHECK: sshll.8h	v0, v0, #1      ; encoding: [0x00,0xa4,0x09,0x0f]
+; CHECK: sshll2.8h	v0, v0, #2      ; encoding: [0x00,0xa4,0x0a,0x4f]
+; CHECK: sshll.4s	v0, v0, #3      ; encoding: [0x00,0xa4,0x13,0x0f]
+; CHECK: sshll2.4s	v0, v0, #4      ; encoding: [0x00,0xa4,0x14,0x4f]
+; CHECK: sshll.2d	v0, v0, #5      ; encoding: [0x00,0xa4,0x25,0x0f]
+; CHECK: sshll2.2d	v0, v0, #6      ; encoding: [0x00,0xa4,0x26,0x4f]
+; CHECK: sshr.8b	v0, v0, #1              ; encoding: [0x00,0x04,0x0f,0x0f]
+; CHECK: sshr.16b	v0, v0, #2      ; encoding: [0x00,0x04,0x0e,0x4f]
+; CHECK: sshr.4h	v0, v0, #3              ; encoding: [0x00,0x04,0x1d,0x0f]
+; CHECK: sshr.8h	v0, v0, #4              ; encoding: [0x00,0x04,0x1c,0x4f]
+; CHECK: sshr.2s	v0, v0, #5              ; encoding: [0x00,0x04,0x3b,0x0f]
+; CHECK: sshr.4s	v0, v0, #6              ; encoding: [0x00,0x04,0x3a,0x4f]
+; CHECK: sshr.2d	v0, v0, #7              ; encoding: [0x00,0x04,0x79,0x4f]
+; CHECK: sshr.8b	v0, v0, #1              ; encoding: [0x00,0x04,0x0f,0x0f]
+; CHECK: ssra.16b	v0, v0, #2      ; encoding: [0x00,0x14,0x0e,0x4f]
+; CHECK: ssra.4h	v0, v0, #3              ; encoding: [0x00,0x14,0x1d,0x0f]
+; CHECK: ssra.8h	v0, v0, #4              ; encoding: [0x00,0x14,0x1c,0x4f]
+; CHECK: ssra.2s	v0, v0, #5              ; encoding: [0x00,0x14,0x3b,0x0f]
+; CHECK: ssra.4s	v0, v0, #6              ; encoding: [0x00,0x14,0x3a,0x4f]
+; CHECK: ssra.2d	v0, v0, #7              ; encoding: [0x00,0x14,0x79,0x4f]
+; CHECK: ssra		d0, d0, #64             ; encoding: [0x00,0x14,0x40,0x5f]
+; CHECK: ucvtf.2s	v0, v0, #1      ; encoding: [0x00,0xe4,0x3f,0x2f]
+; CHECK: ucvtf.4s	v0, v0, #2      ; encoding: [0x00,0xe4,0x3e,0x6f]
+; CHECK: ucvtf.2d	v0, v0, #3      ; encoding: [0x00,0xe4,0x7d,0x6f]
+; CHECK: uqrshrn.8b	v0, v0, #1      ; encoding: [0x00,0x9c,0x0f,0x2f]
+; CHECK: uqrshrn2.16b	v0, v0, #2      ; encoding: [0x00,0x9c,0x0e,0x6f]
+; CHECK: uqrshrn.4h	v0, v0, #3      ; encoding: [0x00,0x9c,0x1d,0x2f]
+; CHECK: uqrshrn2.8h	v0, v0, #4      ; encoding: [0x00,0x9c,0x1c,0x6f]
+; CHECK: uqrshrn.2s	v0, v0, #5      ; encoding: [0x00,0x9c,0x3b,0x2f]
+; CHECK: uqrshrn2.4s	v0, v0, #6      ; encoding: [0x00,0x9c,0x3a,0x6f]
+; CHECK: uqshl.8b	v0, v0, #1      ; encoding: [0x00,0x74,0x09,0x2f]
+; CHECK: uqshl.16b	v0, v0, #2      ; encoding: [0x00,0x74,0x0a,0x6f]
+; CHECK: uqshl.4h	v0, v0, #3      ; encoding: [0x00,0x74,0x13,0x2f]
+; CHECK: uqshl.8h	v0, v0, #4      ; encoding: [0x00,0x74,0x14,0x6f]
+; CHECK: uqshl.2s	v0, v0, #5      ; encoding: [0x00,0x74,0x25,0x2f]
+; CHECK: uqshl.4s	v0, v0, #6      ; encoding: [0x00,0x74,0x26,0x6f]
+; CHECK: uqshl.2d	v0, v0, #7      ; encoding: [0x00,0x74,0x47,0x6f]
+; CHECK: uqshrn.8b	v0, v0, #1      ; encoding: [0x00,0x94,0x0f,0x2f]
+; CHECK: uqshrn2.16b	v0, v0, #2      ; encoding: [0x00,0x94,0x0e,0x6f]
+; CHECK: uqshrn.4h	v0, v0, #3      ; encoding: [0x00,0x94,0x1d,0x2f]
+; CHECK: uqshrn2.8h	v0, v0, #4      ; encoding: [0x00,0x94,0x1c,0x6f]
+; CHECK: uqshrn.2s	v0, v0, #5      ; encoding: [0x00,0x94,0x3b,0x2f]
+; CHECK: uqshrn2.4s	v0, v0, #6      ; encoding: [0x00,0x94,0x3a,0x6f]
+; CHECK: urshr.8b	v0, v0, #1      ; encoding: [0x00,0x24,0x0f,0x2f]
+; CHECK: urshr.16b	v0, v0, #2      ; encoding: [0x00,0x24,0x0e,0x6f]
+; CHECK: urshr.4h	v0, v0, #3      ; encoding: [0x00,0x24,0x1d,0x2f]
+; CHECK: urshr.8h	v0, v0, #4      ; encoding: [0x00,0x24,0x1c,0x6f]
+; CHECK: urshr.2s	v0, v0, #5      ; encoding: [0x00,0x24,0x3b,0x2f]
+; CHECK: urshr.4s	v0, v0, #6      ; encoding: [0x00,0x24,0x3a,0x6f]
+; CHECK: urshr.2d	v0, v0, #7      ; encoding: [0x00,0x24,0x79,0x6f]
+; CHECK: ursra.8b	v0, v0, #1      ; encoding: [0x00,0x34,0x0f,0x2f]
+; CHECK: ursra.16b	v0, v0, #2      ; encoding: [0x00,0x34,0x0e,0x6f]
+; CHECK: ursra.4h	v0, v0, #3      ; encoding: [0x00,0x34,0x1d,0x2f]
+; CHECK: ursra.8h	v0, v0, #4      ; encoding: [0x00,0x34,0x1c,0x6f]
+; CHECK: ursra.2s	v0, v0, #5      ; encoding: [0x00,0x34,0x3b,0x2f]
+; CHECK: ursra.4s	v0, v0, #6      ; encoding: [0x00,0x34,0x3a,0x6f]
+; CHECK: ursra.2d	v0, v0, #7      ; encoding: [0x00,0x34,0x79,0x6f]
+; CHECK: ushll.8h	v0, v0, #1      ; encoding: [0x00,0xa4,0x09,0x2f]
+; CHECK: ushll2.8h	v0, v0, #2      ; encoding: [0x00,0xa4,0x0a,0x6f]
+; CHECK: ushll.4s	v0, v0, #3      ; encoding: [0x00,0xa4,0x13,0x2f]
+; CHECK: ushll2.4s	v0, v0, #4      ; encoding: [0x00,0xa4,0x14,0x6f]
+; CHECK: ushll.2d	v0, v0, #5      ; encoding: [0x00,0xa4,0x25,0x2f]
+; CHECK: ushll2.2d	v0, v0, #6      ; encoding: [0x00,0xa4,0x26,0x6f]
+; CHECK: ushr.8b	v0, v0, #1              ; encoding: [0x00,0x04,0x0f,0x2f]
+; CHECK: ushr.16b	v0, v0, #2      ; encoding: [0x00,0x04,0x0e,0x6f]
+; CHECK: ushr.4h	v0, v0, #3              ; encoding: [0x00,0x04,0x1d,0x2f]
+; CHECK: ushr.8h	v0, v0, #4              ; encoding: [0x00,0x04,0x1c,0x6f]
+; CHECK: ushr.2s	v0, v0, #5              ; encoding: [0x00,0x04,0x3b,0x2f]
+; CHECK: ushr.4s	v0, v0, #6              ; encoding: [0x00,0x04,0x3a,0x6f]
+; CHECK: ushr.2d	v0, v0, #7              ; encoding: [0x00,0x04,0x79,0x6f]
+; CHECK: usra.8b	v0, v0, #1              ; encoding: [0x00,0x14,0x0f,0x2f]
+; CHECK: usra.16b	v0, v0, #2      ; encoding: [0x00,0x14,0x0e,0x6f]
+; CHECK: usra.4h	v0, v0, #3              ; encoding: [0x00,0x14,0x1d,0x2f]
+; CHECK: usra.8h	v0, v0, #4              ; encoding: [0x00,0x14,0x1c,0x6f]
+; CHECK: usra.2s	v0, v0, #5              ; encoding: [0x00,0x14,0x3b,0x2f]
+; CHECK: usra.4s	v0, v0, #6              ; encoding: [0x00,0x14,0x3a,0x6f]
+; CHECK: usra.2d	v0, v0, #7              ; encoding: [0x00,0x14,0x79,0x6f]
+
+
+; ARM Verbose syntax variants.
+
+   rshrn v9.8b, v11.8h, #1
+   rshrn2 v8.16b, v9.8h, #2
+   rshrn v7.4h, v8.4s, #3
+   rshrn2 v6.8h, v7.4s, #4
+   rshrn v5.2s, v6.2d, #5
+   rshrn2 v4.4s, v5.2d, #6
+
+   shrn v9.8b, v11.8h, #1
+   shrn2 v8.16b, v9.8h, #2
+   shrn v7.4h, v8.4s, #3
+   shrn2 v6.8h, v7.4s, #4
+   shrn v5.2s, v6.2d, #5
+   shrn2 v4.4s, v5.2d, #6
+
+   sqrshrn v9.8b, v11.8h, #1
+   sqrshrn2 v8.16b, v9.8h, #2
+   sqrshrn v7.4h, v8.4s, #3
+   sqrshrn2 v6.8h, v7.4s, #4
+   sqrshrn v5.2s, v6.2d, #5
+   sqrshrn2 v4.4s, v5.2d, #6
+
+   sqshrn v9.8b, v11.8h, #1
+   sqshrn2 v8.16b, v9.8h, #2
+   sqshrn v7.4h, v8.4s, #3
+   sqshrn2 v6.8h, v7.4s, #4
+   sqshrn v5.2s, v6.2d, #5
+   sqshrn2 v4.4s, v5.2d, #6
+
+   sqrshrun v9.8b, v11.8h, #1
+   sqrshrun2 v8.16b, v9.8h, #2
+   sqrshrun v7.4h, v8.4s, #3
+   sqrshrun2 v6.8h, v7.4s, #4
+   sqrshrun v5.2s, v6.2d, #5
+   sqrshrun2 v4.4s, v5.2d, #6
+
+   sqshrun v9.8b, v11.8h, #1
+   sqshrun2 v8.16b, v9.8h, #2
+   sqshrun v7.4h, v8.4s, #3
+   sqshrun2 v6.8h, v7.4s, #4
+   sqshrun v5.2s, v6.2d, #5
+   sqshrun2 v4.4s, v5.2d, #6
+
+   uqrshrn v9.8b, v11.8h, #1
+   uqrshrn2 v8.16b, v9.8h, #2
+   uqrshrn v7.4h, v8.4s, #3
+   uqrshrn2 v6.8h, v7.4s, #4
+   uqrshrn v5.2s, v6.2d, #5
+   uqrshrn2 v4.4s, v5.2d, #6
+
+   uqshrn v9.8b, v11.8h, #1
+   uqshrn2 v8.16b, v9.8h, #2
+   uqshrn v7.4h, v8.4s, #3
+   uqshrn2 v6.8h, v7.4s, #4
+   uqshrn v5.2s, v6.2d, #5
+   uqshrn2 v4.4s, v5.2d, #6
+
+   sshll2 v10.8h, v3.16b, #6
+   sshll2 v11.4s, v4.8h, #5
+   sshll2 v12.2d, v5.4s, #4
+   sshll v13.8h, v6.8b, #3
+   sshll v14.4s, v7.4h, #2
+   sshll v15.2d, v8.2s, #7
+
+   ushll2 v10.8h, v3.16b, #6
+   ushll2 v11.4s, v4.8h, #5
+   ushll2 v12.2d, v5.4s, #4
+   ushll v13.8h, v6.8b, #3
+   ushll v14.4s, v7.4h, #2
+   ushll v15.2d, v8.2s, #7
+
+
+; CHECK: rshrn.8b	v9, v11, #1     ; encoding: [0x69,0x8d,0x0f,0x0f]
+; CHECK: rshrn2.16b	v8, v9, #2      ; encoding: [0x28,0x8d,0x0e,0x4f]
+; CHECK: rshrn.4h	v7, v8, #3      ; encoding: [0x07,0x8d,0x1d,0x0f]
+; CHECK: rshrn2.8h	v6, v7, #4      ; encoding: [0xe6,0x8c,0x1c,0x4f]
+; CHECK: rshrn.2s	v5, v6, #5      ; encoding: [0xc5,0x8c,0x3b,0x0f]
+; CHECK: rshrn2.4s	v4, v5, #6      ; encoding: [0xa4,0x8c,0x3a,0x4f]
+; CHECK: shrn.8b	v9, v11, #1             ; encoding: [0x69,0x85,0x0f,0x0f]
+; CHECK: shrn2.16b	v8, v9, #2      ; encoding: [0x28,0x85,0x0e,0x4f]
+; CHECK: shrn.4h	v7, v8, #3              ; encoding: [0x07,0x85,0x1d,0x0f]
+; CHECK: shrn2.8h	v6, v7, #4      ; encoding: [0xe6,0x84,0x1c,0x4f]
+; CHECK: shrn.2s	v5, v6, #5              ; encoding: [0xc5,0x84,0x3b,0x0f]
+; CHECK: shrn2.4s	v4, v5, #6      ; encoding: [0xa4,0x84,0x3a,0x4f]
+; CHECK: sqrshrn.8b	v9, v11, #1     ; encoding: [0x69,0x9d,0x0f,0x0f]
+; CHECK: sqrshrn2.16b	v8, v9, #2      ; encoding: [0x28,0x9d,0x0e,0x4f]
+; CHECK: sqrshrn.4h	v7, v8, #3      ; encoding: [0x07,0x9d,0x1d,0x0f]
+; CHECK: sqrshrn2.8h	v6, v7, #4      ; encoding: [0xe6,0x9c,0x1c,0x4f]
+; CHECK: sqrshrn.2s	v5, v6, #5      ; encoding: [0xc5,0x9c,0x3b,0x0f]
+; CHECK: sqrshrn2.4s	v4, v5, #6      ; encoding: [0xa4,0x9c,0x3a,0x4f]
+; CHECK: sqshrn.8b	v9, v11, #1     ; encoding: [0x69,0x95,0x0f,0x0f]
+; CHECK: sqshrn2.16b	v8, v9, #2      ; encoding: [0x28,0x95,0x0e,0x4f]
+; CHECK: sqshrn.4h	v7, v8, #3      ; encoding: [0x07,0x95,0x1d,0x0f]
+; CHECK: sqshrn2.8h	v6, v7, #4      ; encoding: [0xe6,0x94,0x1c,0x4f]
+; CHECK: sqshrn.2s	v5, v6, #5      ; encoding: [0xc5,0x94,0x3b,0x0f]
+; CHECK: sqshrn2.4s	v4, v5, #6      ; encoding: [0xa4,0x94,0x3a,0x4f]
+; CHECK: sqrshrun.8b	v9, v11, #1     ; encoding: [0x69,0x8d,0x0f,0x2f]
+; CHECK: sqrshrun2.16b	v8, v9, #2      ; encoding: [0x28,0x8d,0x0e,0x6f]
+; CHECK: sqrshrun.4h	v7, v8, #3      ; encoding: [0x07,0x8d,0x1d,0x2f]
+; CHECK: sqrshrun2.8h	v6, v7, #4      ; encoding: [0xe6,0x8c,0x1c,0x6f]
+; CHECK: sqrshrun.2s	v5, v6, #5      ; encoding: [0xc5,0x8c,0x3b,0x2f]
+; CHECK: sqrshrun2.4s	v4, v5, #6      ; encoding: [0xa4,0x8c,0x3a,0x6f]
+; CHECK: sqshrun.8b	v9, v11, #1     ; encoding: [0x69,0x85,0x0f,0x2f]
+; CHECK: sqshrun2.16b	v8, v9, #2      ; encoding: [0x28,0x85,0x0e,0x6f]
+; CHECK: sqshrun.4h	v7, v8, #3      ; encoding: [0x07,0x85,0x1d,0x2f]
+; CHECK: sqshrun2.8h	v6, v7, #4      ; encoding: [0xe6,0x84,0x1c,0x6f]
+; CHECK: sqshrun.2s	v5, v6, #5      ; encoding: [0xc5,0x84,0x3b,0x2f]
+; CHECK: sqshrun2.4s	v4, v5, #6      ; encoding: [0xa4,0x84,0x3a,0x6f]
+; CHECK: uqrshrn.8b	v9, v11, #1     ; encoding: [0x69,0x9d,0x0f,0x2f]
+; CHECK: uqrshrn2.16b	v8, v9, #2      ; encoding: [0x28,0x9d,0x0e,0x6f]
+; CHECK: uqrshrn.4h	v7, v8, #3      ; encoding: [0x07,0x9d,0x1d,0x2f]
+; CHECK: uqrshrn2.8h	v6, v7, #4      ; encoding: [0xe6,0x9c,0x1c,0x6f]
+; CHECK: uqrshrn.2s	v5, v6, #5      ; encoding: [0xc5,0x9c,0x3b,0x2f]
+; CHECK: uqrshrn2.4s	v4, v5, #6      ; encoding: [0xa4,0x9c,0x3a,0x6f]
+; CHECK: uqshrn.8b	v9, v11, #1     ; encoding: [0x69,0x95,0x0f,0x2f]
+; CHECK: uqshrn2.16b	v8, v9, #2      ; encoding: [0x28,0x95,0x0e,0x6f]
+; CHECK: uqshrn.4h	v7, v8, #3      ; encoding: [0x07,0x95,0x1d,0x2f]
+; CHECK: uqshrn2.8h	v6, v7, #4      ; encoding: [0xe6,0x94,0x1c,0x6f]
+; CHECK: uqshrn.2s	v5, v6, #5      ; encoding: [0xc5,0x94,0x3b,0x2f]
+; CHECK: uqshrn2.4s	v4, v5, #6      ; encoding: [0xa4,0x94,0x3a,0x6f]
+; CHECK: sshll2.8h	v10, v3, #6     ; encoding: [0x6a,0xa4,0x0e,0x4f]
+; CHECK: sshll2.4s	v11, v4, #5     ; encoding: [0x8b,0xa4,0x15,0x4f]
+; CHECK: sshll2.2d	v12, v5, #4     ; encoding: [0xac,0xa4,0x24,0x4f]
+; CHECK: sshll.8h	v13, v6, #3     ; encoding: [0xcd,0xa4,0x0b,0x0f]
+; CHECK: sshll.4s	v14, v7, #2     ; encoding: [0xee,0xa4,0x12,0x0f]
+; CHECK: sshll.2d	v15, v8, #7     ; encoding: [0x0f,0xa5,0x27,0x0f]
+; CHECK: ushll2.8h	v10, v3, #6     ; encoding: [0x6a,0xa4,0x0e,0x6f]
+; CHECK: ushll2.4s	v11, v4, #5     ; encoding: [0x8b,0xa4,0x15,0x6f]
+; CHECK: ushll2.2d	v12, v5, #4     ; encoding: [0xac,0xa4,0x24,0x6f]
+; CHECK: ushll.8h	v13, v6, #3     ; encoding: [0xcd,0xa4,0x0b,0x2f]
+; CHECK: ushll.4s	v14, v7, #2     ; encoding: [0xee,0xa4,0x12,0x2f]
+; CHECK: ushll.2d	v15, v8, #7     ; encoding: [0x0f,0xa5,0x27,0x2f]
+
+
+  pmull.8h v0, v0, v0
+  pmull2.8h v0, v0, v0
+  pmull.1q v2, v3, v4
+  pmull2.1q v2, v3, v4
+  pmull v2.1q, v3.1d, v4.1d
+  pmull2 v2.1q, v3.2d, v4.2d
+
+; CHECK: pmull.8h	v0, v0, v0      ; encoding: [0x00,0xe0,0x20,0x0e]
+; CHECK: pmull2.8h	v0, v0, v0      ; encoding: [0x00,0xe0,0x20,0x4e]
+; CHECK: pmull.1q	v2, v3, v4      ; encoding: [0x62,0xe0,0xe4,0x0e]
+; CHECK: pmull2.1q	v2, v3, v4      ; encoding: [0x62,0xe0,0xe4,0x4e]
+; CHECK: pmull.1q	v2, v3, v4      ; encoding: [0x62,0xe0,0xe4,0x0e]
+; CHECK: pmull2.1q	v2, v3, v4      ; encoding: [0x62,0xe0,0xe4,0x4e]
+
+
+  faddp.2d d1, v2
+  faddp.2s s3, v4
+; CHECK: faddp.2d	d1, v2          ; encoding: [0x41,0xd8,0x70,0x7e]
+; CHECK: faddp.2s	s3, v4          ; encoding: [0x83,0xd8,0x30,0x7e]
+
+  tbl.16b v2, {v4,v5,v6,v7}, v1
+  tbl.8b v0, {v4,v5,v6,v7}, v1
+  tbl.16b v2, {v5}, v1
+  tbl.8b v0, {v5}, v1
+  tbl.16b v2, {v5,v6,v7}, v1
+  tbl.8b v0, {v5,v6,v7}, v1
+  tbl.16b v2, {v6,v7}, v1
+  tbl.8b v0, {v6,v7}, v1
+; CHECK: tbl.16b	v2, { v4, v5, v6, v7 }, v1 ; encoding: [0x82,0x60,0x01,0x4e]
+; CHECK: tbl.8b	v0, { v4, v5, v6, v7 }, v1 ; encoding: [0x80,0x60,0x01,0x0e]
+; CHECK: tbl.16b	v2, { v5 }, v1          ; encoding: [0xa2,0x00,0x01,0x4e]
+; CHECK: tbl.8b	v0, { v5 }, v1          ; encoding: [0xa0,0x00,0x01,0x0e]
+; CHECK: tbl.16b	v2, { v5, v6, v7 }, v1  ; encoding: [0xa2,0x40,0x01,0x4e]
+; CHECK: tbl.8b	v0, { v5, v6, v7 }, v1  ; encoding: [0xa0,0x40,0x01,0x0e]
+; CHECK: tbl.16b	v2, { v6, v7 }, v1      ; encoding: [0xc2,0x20,0x01,0x4e]
+; CHECK: tbl.8b	v0, { v6, v7 }, v1      ; encoding: [0xc0,0x20,0x01,0x0e]
+
+  tbl v2.16b, {v4.16b,v5.16b,v6.16b,v7.16b}, v1.16b
+  tbl v0.8b, {v4.16b,v5.16b,v6.16b,v7.16b}, v1.8b
+  tbl v2.16b, {v5.16b}, v1.16b
+  tbl v0.8b, {v5.16b}, v1.8b
+  tbl v2.16b, {v5.16b,v6.16b,v7.16b}, v1.16b
+  tbl v0.8b, {v5.16b,v6.16b,v7.16b}, v1.8b
+  tbl v2.16b, {v6.16b,v7.16b}, v1.16b
+  tbl v0.8b, {v6.16b,v7.16b}, v1.8b
+; CHECK: tbl.16b v2, { v4, v5, v6, v7 }, v1 ; encoding: [0x82,0x60,0x01,0x4e]
+; CHECK: tbl.8b v0, { v4, v5, v6, v7 }, v1 ; encoding: [0x80,0x60,0x01,0x0e]
+; CHECK: tbl.16b v2, { v5 }, v1          ; encoding: [0xa2,0x00,0x01,0x4e]
+; CHECK: tbl.8b v0, { v5 }, v1          ; encoding: [0xa0,0x00,0x01,0x0e]
+; CHECK: tbl.16b v2, { v5, v6, v7 }, v1  ; encoding: [0xa2,0x40,0x01,0x4e]
+; CHECK: tbl.8b v0, { v5, v6, v7 }, v1  ; encoding: [0xa0,0x40,0x01,0x0e]
+; CHECK: tbl.16b v2, { v6, v7 }, v1      ; encoding: [0xc2,0x20,0x01,0x4e]
+; CHECK: tbl.8b v0, { v6, v7 }, v1      ; encoding: [0xc0,0x20,0x01,0x0e]
+
+  sqdmull	s0, h0, h0
+  sqdmull	d0, s0, s0
+; CHECK: sqdmull	s0, h0, h0              ; encoding: [0x00,0xd0,0x60,0x5e]
+; CHECK: sqdmull	d0, s0, s0              ; encoding: [0x00,0xd0,0xa0,0x5e]
+
+  frsqrte s0, s0
+  frsqrte d0, d0
+; CHECK: frsqrte s0, s0                  ; encoding: [0x00,0xd8,0xa1,0x7e]
+; CHECK: frsqrte d0, d0                  ; encoding: [0x00,0xd8,0xe1,0x7e]
+
+  mov.16b v0, v0
+  mov.2s v0, v0
+; CHECK: mov.16b v0, v0              ; encoding: [0x00,0x1c,0xa0,0x4e]
+; CHECK: mov.8b v0, v0              ; encoding: [0x00,0x1c,0xa0,0x0e]
+
+
+; uadalp/sadalp verbose mode aliases.
+  uadalp v14.4h, v25.8b
+  uadalp v15.8h, v24.16b
+  uadalp v16.2s, v23.4h
+  uadalp v17.4s, v22.8h
+  uadalp v18.1d, v21.2s
+  uadalp v19.2d, v20.4s
+
+  sadalp v1.4h, v11.8b
+  sadalp v2.8h, v12.16b
+  sadalp v3.2s, v13.4h
+  sadalp v4.4s, v14.8h
+  sadalp v5.1d, v15.2s
+  sadalp v6.2d, v16.4s
+
+; CHECK: uadalp.4h	v14, v25        ; encoding: [0x2e,0x6b,0x20,0x2e]
+; CHECK: uadalp.8h	v15, v24        ; encoding: [0x0f,0x6b,0x20,0x6e]
+; CHECK: uadalp.2s	v16, v23        ; encoding: [0xf0,0x6a,0x60,0x2e]
+; CHECK: uadalp.4s	v17, v22        ; encoding: [0xd1,0x6a,0x60,0x6e]
+; CHECK: uadalp.1d	v18, v21        ; encoding: [0xb2,0x6a,0xa0,0x2e]
+; CHECK: uadalp.2d	v19, v20        ; encoding: [0x93,0x6a,0xa0,0x6e]
+; CHECK: sadalp.4h	v1, v11         ; encoding: [0x61,0x69,0x20,0x0e]
+; CHECK: sadalp.8h	v2, v12         ; encoding: [0x82,0x69,0x20,0x4e]
+; CHECK: sadalp.2s	v3, v13         ; encoding: [0xa3,0x69,0x60,0x0e]
+; CHECK: sadalp.4s	v4, v14         ; encoding: [0xc4,0x69,0x60,0x4e]
+; CHECK: sadalp.1d	v5, v15         ; encoding: [0xe5,0x69,0xa0,0x0e]
+; CHECK: sadalp.2d	v6, v16         ; encoding: [0x06,0x6a,0xa0,0x4e]
+
+; MVN is an alias for 'not'.
+  mvn v1.8b, v4.8b
+  mvn v19.16b, v17.16b
+  mvn.8b v10, v6
+  mvn.16b v11, v7
+
+; CHECK: mvn.8b	v1, v4                  ; encoding: [0x81,0x58,0x20,0x2e]
+; CHECK: mvn.16b	v19, v17                ; encoding: [0x33,0x5a,0x20,0x6e]
+; CHECK: mvn.8b	v10, v6                 ; encoding: [0xca,0x58,0x20,0x2e]
+; CHECK: mvn.16b	v11, v7                 ; encoding: [0xeb,0x58,0x20,0x6e]
+
+; sqdmull verbose mode aliases
+ sqdmull v10.4s, v12.4h, v12.4h
+ sqdmull2 v10.4s, v13.8h, v13.8h
+ sqdmull v10.2d, v13.2s, v13.2s
+ sqdmull2 v10.2d, v13.4s, v13.4s
+; CHECK: sqdmull.4s	v10, v12, v12   ; encoding: [0x8a,0xd1,0x6c,0x0e]
+; CHECK: sqdmull2.4s	v10, v13, v13   ; encoding: [0xaa,0xd1,0x6d,0x4e]
+; CHECK: sqdmull.2d	v10, v13, v13   ; encoding: [0xaa,0xd1,0xad,0x0e]
+; CHECK: sqdmull2.2d	v10, v13, v13   ; encoding: [0xaa,0xd1,0xad,0x4e]
+
+; xtn verbose mode aliases
+ xtn v14.8b, v14.8h
+ xtn2 v14.16b, v14.8h
+ xtn v14.4h, v14.4s
+ xtn2 v14.8h, v14.4s
+ xtn v14.2s, v14.2d
+ xtn2 v14.4s, v14.2d
+; CHECK: xtn.8b v14, v14                ; encoding: [0xce,0x29,0x21,0x0e]
+; CHECK: xtn2.16b v14, v14              ; encoding: [0xce,0x29,0x21,0x4e]
+; CHECK: xtn.4h v14, v14                ; encoding: [0xce,0x29,0x61,0x0e]
+; CHECK: xtn2.8h v14, v14               ; encoding: [0xce,0x29,0x61,0x4e]
+; CHECK: xtn.2s v14, v14                ; encoding: [0xce,0x29,0xa1,0x0e]
+; CHECK: xtn2.4s v14, v14               ; encoding: [0xce,0x29,0xa1,0x4e]
+
+; uaddl verbose mode aliases
+ uaddl v9.8h, v13.8b, v14.8b
+ uaddl2 v9.8h, v13.16b, v14.16b
+ uaddl v9.4s, v13.4h, v14.4h
+ uaddl2 v9.4s, v13.8h, v14.8h
+ uaddl v9.2d, v13.2s, v14.2s
+ uaddl2 v9.2d, v13.4s, v14.4s
+; CHECK: uaddl.8h	v9, v13, v14    ; encoding: [0xa9,0x01,0x2e,0x2e]
+; CHECK: uaddl2.8h	v9, v13, v14    ; encoding: [0xa9,0x01,0x2e,0x6e]
+; CHECK: uaddl.4s	v9, v13, v14    ; encoding: [0xa9,0x01,0x6e,0x2e]
+; CHECK: uaddl2.4s	v9, v13, v14    ; encoding: [0xa9,0x01,0x6e,0x6e]
+; CHECK: uaddl.2d	v9, v13, v14    ; encoding: [0xa9,0x01,0xae,0x2e]
+; CHECK: uaddl2.2d	v9, v13, v14    ; encoding: [0xa9,0x01,0xae,0x6e]
+
+; bit verbose mode aliases
+ bit v9.16b, v10.16b, v10.16b
+ bit v9.8b, v10.8b, v10.8b
+; CHECK: bit.16b v9, v10, v10           ; encoding: [0x49,0x1d,0xaa,0x6e]
+; CHECK: bit.8b v9, v10, v10            ; encoding: [0x49,0x1d,0xaa,0x2e]
+
+; pmull verbose mode aliases
+ pmull v8.8h, v8.8b, v8.8b
+ pmull2 v8.8h, v8.16b, v8.16b
+ pmull v8.1q, v8.1d, v8.1d
+ pmull2 v8.1q, v8.2d, v8.2d
+; CHECK: pmull.8h	v8, v8, v8      ; encoding: [0x08,0xe1,0x28,0x0e]
+; CHECK: pmull2.8h	v8, v8, v8      ; encoding: [0x08,0xe1,0x28,0x4e]
+; CHECK: pmull.1q	v8, v8, v8      ; encoding: [0x08,0xe1,0xe8,0x0e]
+; CHECK: pmull2.1q	v8, v8, v8      ; encoding: [0x08,0xe1,0xe8,0x4e]
+
+; usubl verbose mode aliases
+ usubl v9.8h, v13.8b, v14.8b
+ usubl2 v9.8h, v13.16b, v14.16b
+ usubl v9.4s, v13.4h, v14.4h
+ usubl2 v9.4s, v13.8h, v14.8h
+ usubl v9.2d, v13.2s, v14.2s
+ usubl2 v9.2d, v13.4s, v14.4s
+; CHECK: usubl.8h	v9, v13, v14    ; encoding: [0xa9,0x21,0x2e,0x2e]
+; CHECK: usubl2.8h	v9, v13, v14    ; encoding: [0xa9,0x21,0x2e,0x6e]
+; CHECK: usubl.4s	v9, v13, v14    ; encoding: [0xa9,0x21,0x6e,0x2e]
+; CHECK: usubl2.4s	v9, v13, v14    ; encoding: [0xa9,0x21,0x6e,0x6e]
+; CHECK: usubl.2d	v9, v13, v14    ; encoding: [0xa9,0x21,0xae,0x2e]
+; CHECK: usubl2.2d	v9, v13, v14    ; encoding: [0xa9,0x21,0xae,0x6e]
+
+; uabdl verbose mode aliases
+ uabdl v9.8h, v13.8b, v14.8b
+ uabdl2 v9.8h, v13.16b, v14.16b
+ uabdl v9.4s, v13.4h, v14.4h
+ uabdl2 v9.4s, v13.8h, v14.8h
+ uabdl v9.2d, v13.2s, v14.2s
+ uabdl2 v9.2d, v13.4s, v14.4s
+; CHECK: uabdl.8h	v9, v13, v14    ; encoding: [0xa9,0x71,0x2e,0x2e]
+; CHECK: uabdl2.8h	v9, v13, v14    ; encoding: [0xa9,0x71,0x2e,0x6e]
+; CHECK: uabdl.4s	v9, v13, v14    ; encoding: [0xa9,0x71,0x6e,0x2e]
+; CHECK: uabdl2.4s	v9, v13, v14    ; encoding: [0xa9,0x71,0x6e,0x6e]
+; CHECK: uabdl.2d	v9, v13, v14    ; encoding: [0xa9,0x71,0xae,0x2e]
+; CHECK: uabdl2.2d	v9, v13, v14    ; encoding: [0xa9,0x71,0xae,0x6e]
+
+; umull verbose mode aliases
+ umull v9.8h, v13.8b, v14.8b
+ umull2 v9.8h, v13.16b, v14.16b
+ umull v9.4s, v13.4h, v14.4h
+ umull2 v9.4s, v13.8h, v14.8h
+ umull v9.2d, v13.2s, v14.2s
+ umull2 v9.2d, v13.4s, v14.4s
+; CHECK: umull.8h	v9, v13, v14    ; encoding: [0xa9,0xc1,0x2e,0x2e]
+; CHECK: umull2.8h	v9, v13, v14    ; encoding: [0xa9,0xc1,0x2e,0x6e]
+; CHECK: umull.4s	v9, v13, v14    ; encoding: [0xa9,0xc1,0x6e,0x2e]
+; CHECK: umull2.4s	v9, v13, v14    ; encoding: [0xa9,0xc1,0x6e,0x6e]
+; CHECK: umull.2d	v9, v13, v14    ; encoding: [0xa9,0xc1,0xae,0x2e]
+; CHECK: umull2.2d	v9, v13, v14    ; encoding: [0xa9,0xc1,0xae,0x6e]
+
+; smull verbose mode aliases
+ smull v9.8h, v13.8b, v14.8b
+ smull2 v9.8h, v13.16b, v14.16b
+ smull v9.4s, v13.4h, v14.4h
+ smull2 v9.4s, v13.8h, v14.8h
+ smull v9.2d, v13.2s, v14.2s
+ smull2 v9.2d, v13.4s, v14.4s
+; CHECK: smull.8h	v9, v13, v14    ; encoding: [0xa9,0xc1,0x2e,0x0e]
+; CHECK: smull2.8h	v9, v13, v14    ; encoding: [0xa9,0xc1,0x2e,0x4e]
+; CHECK: smull.4s	v9, v13, v14    ; encoding: [0xa9,0xc1,0x6e,0x0e]
+; CHECK: smull2.4s	v9, v13, v14    ; encoding: [0xa9,0xc1,0x6e,0x4e]
+; CHECK: smull.2d	v9, v13, v14    ; encoding: [0xa9,0xc1,0xae,0x0e]
+; CHECK: smull2.2d	v9, v13, v14    ; encoding: [0xa9,0xc1,0xae,0x4e]
diff --git a/test/MC/AArch64/arm64-aliases.s b/test/MC/AArch64/arm64-aliases.s
new file mode 100644
index 0000000..c3affe3
--- /dev/null
+++ b/test/MC/AArch64/arm64-aliases.s
@@ -0,0 +1,753 @@
+; RUN: llvm-mc -triple arm64-apple-darwin -mattr=neon -output-asm-variant=1 -show-encoding < %s | FileCheck %s
+
+foo:
+;-----------------------------------------------------------------------------
+; ADD #0 to/from SP/WSP is a MOV
+;-----------------------------------------------------------------------------
+  add x1, sp, #0
+; CHECK: mov x1, sp
+  add sp, x2, #0
+; CHECK: mov sp, x2
+  add w3, wsp, #0
+; CHECK: mov w3, wsp
+  add wsp, w4, #0
+; CHECK: mov wsp, w4
+  mov x5, sp
+; CHECK: mov x5, sp
+  mov sp, x6
+; CHECK: mov sp, x6
+  mov w7, wsp
+; CHECK: mov w7, wsp
+  mov wsp, w8
+; CHECK: mov wsp, w8
+
+;-----------------------------------------------------------------------------
+; ORR Rd, Rn, Rn is a MOV
+;-----------------------------------------------------------------------------
+  orr x2, xzr, x9
+; CHECK: mov x2, x9
+  orr w2, wzr, w9
+; CHECK: mov w2, w9
+  mov x3, x4
+; CHECK: mov x3, x4
+  mov w5, w6
+; CHECK: mov w5, w6
+
+;-----------------------------------------------------------------------------
+; TST Xn, #<imm>
+;-----------------------------------------------------------------------------
+        tst w1, #3
+        tst x1, #3
+        tst w1, w2
+        tst x1, x2
+        ands wzr, w1, w2, lsl #2
+        ands xzr, x1, x2, lsl #3
+        tst w3, w7, lsl #31
+        tst x2, x20, asr #0
+
+; CHECK: tst	w1, #0x3                ; encoding: [0x3f,0x04,0x00,0x72]
+; CHECK: tst	x1, #0x3                ; encoding: [0x3f,0x04,0x40,0xf2]
+; CHECK: tst	w1, w2                  ; encoding: [0x3f,0x00,0x02,0x6a]
+; CHECK: tst	x1, x2                  ; encoding: [0x3f,0x00,0x02,0xea]
+; CHECK: tst	w1, w2, lsl #2          ; encoding: [0x3f,0x08,0x02,0x6a]
+; CHECK: tst	x1, x2, lsl #3          ; encoding: [0x3f,0x0c,0x02,0xea]
+; CHECK: tst	w3, w7, lsl #31         ; encoding: [0x7f,0x7c,0x07,0x6a]
+; CHECK: tst	x2, x20, asr #0         ; encoding: [0x5f,0x00,0x94,0xea]
+
+;-----------------------------------------------------------------------------
+; ADDS to WZR/XZR is a CMN
+;-----------------------------------------------------------------------------
+  cmn w1, #3, lsl #0
+  cmn x2, #4194304
+  cmn w4, w5
+  cmn x6, x7
+  cmn w8, w9, asr #3
+  cmn x2, x3, lsr #4
+  cmn x2, w3, uxtb #1
+  cmn x4, x5, uxtx #1
+
+; CHECK: cmn	w1, #3                  ; encoding: [0x3f,0x0c,0x00,0x31]
+; CHECK: cmn	x2, #1024, lsl #12      ; encoding: [0x5f,0x00,0x50,0xb1]
+; CHECK: cmn	w4, w5                  ; encoding: [0x9f,0x00,0x05,0x2b]
+; CHECK: cmn	x6, x7                  ; encoding: [0xdf,0x00,0x07,0xab]
+; CHECK: cmn	w8, w9, asr #3          ; encoding: [0x1f,0x0d,0x89,0x2b]
+; CHECK: cmn	x2, x3, lsr #4          ; encoding: [0x5f,0x10,0x43,0xab]
+; CHECK: cmn	x2, w3, uxtb #1         ; encoding: [0x5f,0x04,0x23,0xab]
+; CHECK: cmn	x4, x5, uxtx #1         ; encoding: [0x9f,0x64,0x25,0xab]
+
+
+;-----------------------------------------------------------------------------
+; SUBS to WZR/XZR is a CMP
+;-----------------------------------------------------------------------------
+  cmp w1, #1024, lsl #12
+  cmp x2, #1024
+  cmp w4, w5
+  cmp x6, x7
+  cmp w8, w9, asr #3
+  cmp x2, x3, lsr #4
+  cmp x2, w3, uxth #2
+  cmp x4, x5, uxtx
+  cmp wzr, w1
+  cmp x8, w8, uxtw
+  cmp w9, w8, uxtw
+  cmp wsp, w9, lsl #0
+
+; CHECK: cmp	w1, #1024, lsl #12      ; encoding: [0x3f,0x00,0x50,0x71]
+; CHECK: cmp	x2, #1024               ; encoding: [0x5f,0x00,0x10,0xf1]
+; CHECK: cmp	w4, w5                  ; encoding: [0x9f,0x00,0x05,0x6b]
+; CHECK: cmp	x6, x7                  ; encoding: [0xdf,0x00,0x07,0xeb]
+; CHECK: cmp	w8, w9, asr #3          ; encoding: [0x1f,0x0d,0x89,0x6b]
+; CHECK: cmp	x2, x3, lsr #4          ; encoding: [0x5f,0x10,0x43,0xeb]
+; CHECK: cmp	x2, w3, uxth #2         ; encoding: [0x5f,0x28,0x23,0xeb]
+; CHECK: cmp	x4, x5, uxtx            ; encoding: [0x9f,0x60,0x25,0xeb]
+; CHECK: cmp	wzr, w1                 ; encoding: [0xff,0x03,0x01,0x6b]
+; CHECK: cmp	x8, w8, uxtw            ; encoding: [0x1f,0x41,0x28,0xeb]
+; CHECK: cmp	w9, w8, uxtw            ; encoding: [0x3f,0x41,0x28,0x6b]
+; CHECK: cmp	wsp, w9                 ; encoding: [0xff,0x43,0x29,0x6b]
+
+
+;-----------------------------------------------------------------------------
+; SUB/SUBS from WZR/XZR is a NEG
+;-----------------------------------------------------------------------------
+
+  neg w0, w1
+; CHECK: neg w0, w1
+  neg w0, w1, lsl #1
+; CHECK: neg w0, w1, lsl #1
+  neg x0, x1
+; CHECK: neg x0, x1
+  neg x0, x1, asr #1
+; CHECK: neg x0, x1, asr #1
+  negs w0, w1
+; CHECK: negs w0, w1
+  negs w0, w1, lsl #1
+; CHECK: negs w0, w1, lsl #1
+  negs x0, x1
+; CHECK: negs x0, x1
+  negs x0, x1, asr #1
+; CHECK: negs x0, x1, asr #1
+
+;-----------------------------------------------------------------------------
+; MOV aliases
+;-----------------------------------------------------------------------------
+
+  mov x0, #281470681743360
+  mov x0, #18446744073709486080
+
+; CHECK: movz	x0, #0xffff, lsl #32
+; CHECK: movn	x0, #0xffff
+
+  mov w0, #0xffffffff
+  mov w0, #0xffffff00
+  mov wzr, #0xffffffff
+  mov wzr, #0xffffff00
+
+; CHECK: movn   w0, #0
+; CHECK: movn   w0, #0xff
+; CHECK: movn   wzr, #0
+; CHECK: movn   wzr, #0xff
+
+;-----------------------------------------------------------------------------
+; MVN aliases
+;-----------------------------------------------------------------------------
+
+        mvn w4, w9
+        mvn x2, x3
+        orn w4, wzr, w9
+
+; CHECK: mvn	w4, w9             ; encoding: [0xe4,0x03,0x29,0x2a]
+; CHECK: mvn	x2, x3             ; encoding: [0xe2,0x03,0x23,0xaa]
+; CHECK: mvn	w4, w9             ; encoding: [0xe4,0x03,0x29,0x2a]
+
+        mvn w4, w9, lsl #1
+        mvn x2, x3, lsl #1
+        orn w4, wzr, w9, lsl #1
+
+; CHECK: mvn	w4, w9, lsl #1     ; encoding: [0xe4,0x07,0x29,0x2a]
+; CHECK: mvn	x2, x3, lsl #1     ; encoding: [0xe2,0x07,0x23,0xaa]
+; CHECK: mvn	w4, w9, lsl #1     ; encoding: [0xe4,0x07,0x29,0x2a]
+
+;-----------------------------------------------------------------------------
+; Bitfield aliases
+;-----------------------------------------------------------------------------
+
+  bfi   w0, w0, #1, #4
+  bfi   x0, x0, #1, #4
+  bfi   w0, w0, #0, #2
+  bfi   x0, x0, #0, #2
+  bfxil w0, w0, #2, #3
+  bfxil x0, x0, #2, #3
+  sbfiz w0, w0, #1, #4
+  sbfiz x0, x0, #1, #4
+  sbfx  w0, w0, #2, #3
+  sbfx  x0, x0, #2, #3
+  ubfiz w0, w0, #1, #4
+  ubfiz x0, x0, #1, #4
+  ubfx  w0, w0, #2, #3
+  ubfx  x0, x0, #2, #3
+
+; CHECK: bfi   w0, w0, #1, #4
+; CHECK: bfi   x0, x0, #1, #4
+; CHECK: bfxil w0, w0, #0, #2
+; CHECK: bfxil x0, x0, #0, #2
+; CHECK: bfxil w0, w0, #2, #3
+; CHECK: bfxil x0, x0, #2, #3
+; CHECK: sbfiz w0, w0, #1, #4
+; CHECK: sbfiz x0, x0, #1, #4
+; CHECK: sbfx  w0, w0, #2, #3
+; CHECK: sbfx  x0, x0, #2, #3
+; CHECK: ubfiz w0, w0, #1, #4
+; CHECK: ubfiz x0, x0, #1, #4
+; CHECK: ubfx  w0, w0, #2, #3
+; CHECK: ubfx  x0, x0, #2, #3
+
+;-----------------------------------------------------------------------------
+; Shift (immediate) aliases
+;-----------------------------------------------------------------------------
+
+; CHECK: asr w1, w3, #13
+; CHECK: asr x1, x3, #13
+; CHECK: lsl w0, w0, #1
+; CHECK: lsl x0, x0, #1
+; CHECK: lsr w0, w0, #4
+; CHECK: lsr x0, x0, #4
+
+   sbfm w1, w3, #13, #31
+   sbfm x1, x3, #13, #63
+   ubfm w0, w0, #31, #30
+   ubfm x0, x0, #63, #62
+   ubfm w0, w0, #4, #31
+   ubfm x0, x0, #4, #63
+; CHECK: ror w1, w3, #5
+; CHECK: ror x1, x3, #5
+   ror w1, w3, #5
+   ror x1, x3, #5
+; CHECK: lsl w1, wzr, #3
+   lsl w1, wzr, #3
+
+;-----------------------------------------------------------------------------
+; Sign/Zero extend aliases
+;-----------------------------------------------------------------------------
+
+  sxtb  w1, w2
+  sxth  w1, w2
+  uxtb  w1, w2
+  uxth  w1, w2
+
+; CHECK: sxtb w1, w2
+; CHECK: sxth w1, w2
+; CHECK: uxtb w1, w2
+; CHECK: uxth w1, w2
+
+  sxtb  x1, w2
+  sxth  x1, w2
+  sxtw  x1, w2
+  uxtb  x1, w2
+  uxth  x1, w2
+  uxtw  x1, w2
+
+; CHECK: sxtb x1, w2
+; CHECK: sxth x1, w2
+; CHECK: sxtw x1, w2
+; CHECK: uxtb w1, w2
+; CHECK: uxth w1, w2
+; CHECK: ubfx x1, x2, #0, #32
+
+;-----------------------------------------------------------------------------
+; Negate with carry
+;-----------------------------------------------------------------------------
+
+  ngc   w1, w2
+  ngc   x1, x2
+  ngcs  w1, w2
+  ngcs  x1, x2
+
+; CHECK: ngc  w1, w2
+; CHECK: ngc  x1, x2
+; CHECK: ngcs w1, w2
+; CHECK: ngcs x1, x2
+
+;-----------------------------------------------------------------------------
+; 6.6.1 Multiply aliases
+;-----------------------------------------------------------------------------
+
+  mneg   w1, w2, w3
+  mneg   x1, x2, x3
+  mul    w1, w2, w3
+  mul    x1, x2, x3
+  smnegl x1, w2, w3
+  umnegl x1, w2, w3
+  smull   x1, w2, w3
+  umull   x1, w2, w3
+
+; CHECK: mneg w1, w2, w3
+; CHECK: mneg x1, x2, x3
+; CHECK: mul w1, w2, w3
+; CHECK: mul x1, x2, x3
+; CHECK: smnegl x1, w2, w3
+; CHECK: umnegl x1, w2, w3
+; CHECK: smull x1, w2, w3
+; CHECK: umull x1, w2, w3
+
+;-----------------------------------------------------------------------------
+; Conditional select aliases
+;-----------------------------------------------------------------------------
+
+  cset   w1, eq
+  cset   x1, eq
+  csetm  w1, ne
+  csetm  x1, ne
+  cinc   w1, w2, lt
+  cinc   x1, x2, lt
+  cinv   w1, w2, mi
+  cinv   x1, x2, mi
+
+; CHECK: cset  w1, eq
+; CHECK: cset  x1, eq
+; CHECK: csetm  w1, ne
+; CHECK: csetm  x1, ne
+; CHECK: cinc  w1, w2, lt
+; CHECK: cinc  x1, x2, lt
+; CHECK: cinv  w1, w2, mi
+; CHECK: cinv  x1, x2, mi
+
+;-----------------------------------------------------------------------------
+; SYS aliases
+;-----------------------------------------------------------------------------
+
+  sys #0, c7, c1, #0
+; CHECK: ic ialluis
+  sys #0, c7, c5, #0
+; CHECK: ic iallu
+  sys #3, c7, c5, #1
+; CHECK: ic ivau
+
+  sys #3, c7, c4, #1
+; CHECK: dc zva
+  sys #0, c7, c6, #1
+; CHECK: dc ivac
+  sys #0, c7, c6, #2
+; CHECK: dc isw
+  sys #3, c7, c10, #1
+; CHECK: dc cvac
+  sys #0, c7, c10, #2
+; CHECK: dc csw
+  sys #3, c7, c11, #1
+; CHECK: dc cvau
+  sys #3, c7, c14, #1
+; CHECK: dc civac
+  sys #0, c7, c14, #2
+; CHECK: dc cisw
+
+  sys #0, c7, c8, #0
+; CHECK: at s1e1r
+  sys #4, c7, c8, #0
+; CHECK: at s1e2r
+  sys #6, c7, c8, #0
+; CHECK: at s1e3r
+  sys #0, c7, c8, #1
+; CHECK: at s1e1w
+  sys #4, c7, c8, #1
+; CHECK: at s1e2w
+  sys #6, c7, c8, #1
+; CHECK: at s1e3w
+  sys #0, c7, c8, #2
+; CHECK: at s1e0r
+  sys #0, c7, c8, #3
+; CHECK: at s1e0w
+  sys #4, c7, c8, #4
+; CHECK: at s12e1r
+  sys #4, c7, c8, #5
+; CHECK: at s12e1w
+  sys #4, c7, c8, #6
+; CHECK: at s12e0r
+  sys #4, c7, c8, #7
+; CHECK: at s12e0w
+
+  sys #0, c8, c3, #0
+; CHECK: tlbi vmalle1is
+  sys #4, c8, c3, #0
+; CHECK: tlbi alle2is
+  sys #6, c8, c3, #0
+; CHECK: tlbi alle3is
+  sys #0, c8, c3, #1
+; CHECK: tlbi vae1is
+  sys #4, c8, c3, #1
+; CHECK: tlbi vae2is
+  sys #6, c8, c3, #1
+; CHECK: tlbi vae3is
+  sys #0, c8, c3, #2
+; CHECK: tlbi aside1is
+  sys #0, c8, c3, #3
+; CHECK: tlbi vaae1is
+  sys #4, c8, c3, #4
+; CHECK: tlbi alle1is
+  sys #0, c8, c3, #5
+; CHECK: tlbi vale1is
+  sys #0, c8, c3, #7
+; CHECK: tlbi vaale1is
+  sys #0, c8, c7, #0
+; CHECK: tlbi vmalle1
+  sys #4, c8, c7, #0
+; CHECK: tlbi alle2
+  sys #4, c8, c3, #5
+; CHECK: tlbi vale2is
+  sys #6, c8, c3, #5
+; CHECK: tlbi vale3is
+  sys #6, c8, c7, #0
+; CHECK: tlbi alle3
+  sys #0, c8, c7, #1
+; CHECK: tlbi vae1
+  sys #4, c8, c7, #1
+; CHECK: tlbi vae2
+  sys #6, c8, c7, #1
+; CHECK: tlbi vae3
+  sys #0, c8, c7, #2
+; CHECK: tlbi aside1
+  sys #0, c8, c7, #3
+; CHECK: tlbi vaae1
+  sys #4, c8, c7, #4
+; CHECK: tlbi alle1
+  sys #0, c8, c7, #5
+; CHECK: tlbi vale1
+  sys #4, c8, c7, #5
+; CHECK: tlbi vale2
+  sys #6, c8, c7, #5
+; CHECK: tlbi vale3
+  sys #0, c8, c7, #7
+; CHECK: tlbi vaale1
+  sys #4, c8, c4, #1
+; CHECK: tlbi ipas2e1
+  sys #4, c8, c4, #5
+; CHECK: tlbi ipas2le1
+  sys #4, c8, c0, #1
+; CHECK: tlbi ipas2e1is
+  sys #4, c8, c0, #5
+; CHECK: tlbi ipas2le1is
+  sys #4, c8, c7, #6
+; CHECK: tlbi vmalls12e1
+  sys #4, c8, c3, #6
+; CHECK: tlbi vmalls12e1is
+
+  ic ialluis
+; CHECK: ic ialluis                 ; encoding: [0x1f,0x71,0x08,0xd5]
+  ic iallu
+; CHECK: ic iallu                   ; encoding: [0x1f,0x75,0x08,0xd5]
+  ic ivau, x0
+; CHECK: ic ivau, x0                ; encoding: [0x20,0x75,0x0b,0xd5]
+
+  dc zva, x0
+; CHECK: dc zva, x0                 ; encoding: [0x20,0x74,0x0b,0xd5]
+  dc ivac, x0
+; CHECK: dc ivac, x0                ; encoding: [0x20,0x76,0x08,0xd5]
+  dc isw, x0
+; CHECK: dc isw, x0                 ; encoding: [0x40,0x76,0x08,0xd5]
+  dc cvac, x0
+; CHECK: dc cvac, x0                ; encoding: [0x20,0x7a,0x0b,0xd5]
+  dc csw, x0
+; CHECK: dc csw, x0                 ; encoding: [0x40,0x7a,0x08,0xd5]
+  dc cvau, x0
+; CHECK: dc cvau, x0                ; encoding: [0x20,0x7b,0x0b,0xd5]
+  dc civac, x0
+; CHECK: dc civac, x0               ; encoding: [0x20,0x7e,0x0b,0xd5]
+  dc cisw, x0
+; CHECK: dc cisw, x0                ; encoding: [0x40,0x7e,0x08,0xd5]
+
+  at s1e1r, x0
+; CHECK: at s1e1r, x0               ; encoding: [0x00,0x78,0x08,0xd5]
+  at s1e2r, x0
+; CHECK: at s1e2r, x0               ; encoding: [0x00,0x78,0x0c,0xd5]
+  at s1e3r, x0
+; CHECK: at s1e3r, x0               ; encoding: [0x00,0x78,0x0e,0xd5]
+  at s1e1w, x0
+; CHECK: at s1e1w, x0               ; encoding: [0x20,0x78,0x08,0xd5]
+  at s1e2w, x0
+; CHECK: at s1e2w, x0               ; encoding: [0x20,0x78,0x0c,0xd5]
+  at s1e3w, x0
+; CHECK: at s1e3w, x0               ; encoding: [0x20,0x78,0x0e,0xd5]
+  at s1e0r, x0
+; CHECK: at s1e0r, x0               ; encoding: [0x40,0x78,0x08,0xd5]
+  at s1e0w, x0
+; CHECK: at s1e0w, x0               ; encoding: [0x60,0x78,0x08,0xd5]
+  at s12e1r, x0
+; CHECK: at s12e1r, x0              ; encoding: [0x80,0x78,0x0c,0xd5]
+  at s12e1w, x0
+; CHECK: at s12e1w, x0              ; encoding: [0xa0,0x78,0x0c,0xd5]
+  at s12e0r, x0
+; CHECK: at s12e0r, x0              ; encoding: [0xc0,0x78,0x0c,0xd5]
+  at s12e0w, x0
+; CHECK: at s12e0w, x0              ; encoding: [0xe0,0x78,0x0c,0xd5]
+
+  tlbi vmalle1is
+; CHECK: tlbi vmalle1is             ; encoding: [0x1f,0x83,0x08,0xd5]
+  tlbi alle2is
+; CHECK: tlbi alle2is               ; encoding: [0x1f,0x83,0x0c,0xd5]
+  tlbi alle3is
+; CHECK: tlbi alle3is               ; encoding: [0x1f,0x83,0x0e,0xd5]
+  tlbi vae1is, x0
+; CHECK: tlbi vae1is, x0            ; encoding: [0x20,0x83,0x08,0xd5]
+  tlbi vae2is, x0
+; CHECK: tlbi vae2is, x0            ; encoding: [0x20,0x83,0x0c,0xd5]
+  tlbi vae3is, x0
+; CHECK: tlbi vae3is, x0            ; encoding: [0x20,0x83,0x0e,0xd5]
+  tlbi aside1is, x0
+; CHECK: tlbi aside1is, x0          ; encoding: [0x40,0x83,0x08,0xd5]
+  tlbi vaae1is, x0
+; CHECK: tlbi vaae1is, x0           ; encoding: [0x60,0x83,0x08,0xd5]
+  tlbi alle1is
+; CHECK: tlbi alle1is               ; encoding: [0x9f,0x83,0x0c,0xd5]
+  tlbi vale1is, x0
+; CHECK: tlbi vale1is, x0           ; encoding: [0xa0,0x83,0x08,0xd5]
+  tlbi vaale1is, x0
+; CHECK: tlbi vaale1is, x0          ; encoding: [0xe0,0x83,0x08,0xd5]
+  tlbi vmalle1
+; CHECK: tlbi vmalle1               ; encoding: [0x1f,0x87,0x08,0xd5]
+  tlbi alle2
+; CHECK: tlbi alle2                 ; encoding: [0x1f,0x87,0x0c,0xd5]
+  tlbi vale2is, x0
+; CHECK: tlbi vale2is, x0           ; encoding: [0xa0,0x83,0x0c,0xd5]
+  tlbi vale3is, x0
+; CHECK: tlbi vale3is, x0           ; encoding: [0xa0,0x83,0x0e,0xd5]
+  tlbi alle3
+; CHECK: tlbi alle3                 ; encoding: [0x1f,0x87,0x0e,0xd5]
+  tlbi vae1, x0
+; CHECK: tlbi vae1, x0              ; encoding: [0x20,0x87,0x08,0xd5]
+  tlbi vae2, x0
+; CHECK: tlbi vae2, x0              ; encoding: [0x20,0x87,0x0c,0xd5]
+  tlbi vae3, x0
+; CHECK: tlbi vae3, x0              ; encoding: [0x20,0x87,0x0e,0xd5]
+  tlbi aside1, x0
+; CHECK: tlbi aside1, x0            ; encoding: [0x40,0x87,0x08,0xd5]
+  tlbi vaae1, x0
+; CHECK: tlbi vaae1, x0             ; encoding: [0x60,0x87,0x08,0xd5]
+  tlbi alle1
+; CHECK: tlbi alle1                 ; encoding: [0x9f,0x87,0x0c,0xd5
+  tlbi vale1, x0
+; CHECK: tlbi vale1, x0             ; encoding: [0xa0,0x87,0x08,0xd5]
+  tlbi vale2, x0
+; CHECK: tlbi vale2, x0             ; encoding: [0xa0,0x87,0x0c,0xd5]
+  tlbi vale3, x0
+; CHECK: tlbi vale3, x0             ; encoding: [0xa0,0x87,0x0e,0xd5]
+  tlbi vaale1, x0
+; CHECK: tlbi vaale1, x0            ; encoding: [0xe0,0x87,0x08,0xd5]
+  tlbi ipas2e1, x0
+; CHECK: tlbi ipas2e1, x0           ; encoding: [0x20,0x84,0x0c,0xd5]
+  tlbi ipas2le1, x0
+; CHECK: tlbi ipas2le1, x0          ; encoding: [0xa0,0x84,0x0c,0xd5]
+  tlbi ipas2e1is, x0
+; CHECK: tlbi ipas2e1is, x0         ; encoding: [0x20,0x80,0x0c,0xd5]
+  tlbi ipas2le1is, x0
+; CHECK: tlbi ipas2le1is, x0        ; encoding: [0xa0,0x80,0x0c,0xd5]
+  tlbi vmalls12e1
+; CHECK: tlbi vmalls12e1            ; encoding: [0xdf,0x87,0x0c,0xd5]
+  tlbi vmalls12e1is
+; CHECK: tlbi vmalls12e1is          ; encoding: [0xdf,0x83,0x0c,0xd5]
+
+;-----------------------------------------------------------------------------
+; 5.8.5 Vector Arithmetic aliases
+;-----------------------------------------------------------------------------
+
+  cmls.8b v0, v2, v1
+  cmls.16b v0, v2, v1
+  cmls.4h v0, v2, v1
+  cmls.8h v0, v2, v1
+  cmls.2s v0, v2, v1
+  cmls.4s v0, v2, v1
+  cmls.2d v0, v2, v1
+; CHECK: cmhs.8b v0, v1, v2
+; CHECK: cmhs.16b v0, v1, v2
+; CHECK: cmhs.4h v0, v1, v2
+; CHECK: cmhs.8h v0, v1, v2
+; CHECK: cmhs.2s v0, v1, v2
+; CHECK: cmhs.4s v0, v1, v2
+; CHECK: cmhs.2d v0, v1, v2
+
+  cmlo.8b v0, v2, v1
+  cmlo.16b v0, v2, v1
+  cmlo.4h v0, v2, v1
+  cmlo.8h v0, v2, v1
+  cmlo.2s v0, v2, v1
+  cmlo.4s v0, v2, v1
+  cmlo.2d v0, v2, v1
+; CHECK: cmhi.8b v0, v1, v2
+; CHECK: cmhi.16b v0, v1, v2
+; CHECK: cmhi.4h v0, v1, v2
+; CHECK: cmhi.8h v0, v1, v2
+; CHECK: cmhi.2s v0, v1, v2
+; CHECK: cmhi.4s v0, v1, v2
+; CHECK: cmhi.2d v0, v1, v2
+
+  cmle.8b v0, v2, v1
+  cmle.16b v0, v2, v1
+  cmle.4h v0, v2, v1
+  cmle.8h  v0, v2, v1
+  cmle.2s v0, v2, v1
+  cmle.4s v0, v2, v1
+  cmle.2d v0, v2, v1
+; CHECK: cmge.8b v0, v1, v2
+; CHECK: cmge.16b v0, v1, v2
+; CHECK: cmge.4h v0, v1, v2
+; CHECK: cmge.8h v0, v1, v2
+; CHECK: cmge.2s v0, v1, v2
+; CHECK: cmge.4s v0, v1, v2
+; CHECK: cmge.2d v0, v1, v2
+
+  cmlt.8b v0, v2, v1
+  cmlt.16b v0, v2, v1
+  cmlt.4h v0, v2, v1
+  cmlt.8h  v0, v2, v1
+  cmlt.2s v0, v2, v1
+  cmlt.4s v0, v2, v1
+  cmlt.2d v0, v2, v1
+; CHECK: cmgt.8b v0, v1, v2
+; CHECK: cmgt.16b v0, v1, v2
+; CHECK: cmgt.4h v0, v1, v2
+; CHECK: cmgt.8h v0, v1, v2
+; CHECK: cmgt.2s v0, v1, v2
+; CHECK: cmgt.4s v0, v1, v2
+; CHECK: cmgt.2d v0, v1, v2
+
+  fcmle.2s v0, v2, v1
+  fcmle.4s v0, v2, v1
+  fcmle.2d v0, v2, v1
+; CHECK: fcmge.2s v0, v1, v2
+; CHECK: fcmge.4s v0, v1, v2
+; CHECK: fcmge.2d v0, v1, v2
+
+  fcmlt.2s v0, v2, v1
+  fcmlt.4s v0, v2, v1
+  fcmlt.2d v0, v2, v1
+; CHECK: fcmgt.2s v0, v1, v2
+; CHECK: fcmgt.4s v0, v1, v2
+; CHECK: fcmgt.2d v0, v1, v2
+
+  facle.2s v0, v2, v1
+  facle.4s v0, v2, v1
+  facle.2d v0, v2, v1
+; CHECK: facge.2s v0, v1, v2
+; CHECK: facge.4s v0, v1, v2
+; CHECK: facge.2d v0, v1, v2
+
+  faclt.2s v0, v2, v1
+  faclt.4s v0, v2, v1
+  faclt.2d v0, v2, v1
+; CHECK: facgt.2s v0, v1, v2
+; CHECK: facgt.4s v0, v1, v2
+; CHECK: facgt.2d v0, v1, v2
+
+;-----------------------------------------------------------------------------
+; 5.8.6 Scalar Arithmetic aliases
+;-----------------------------------------------------------------------------
+
+  cmls d0, d2, d1
+; CHECK: cmhs d0, d1, d2
+
+  cmle d0, d2, d1
+; CHECK: cmge d0, d1, d2
+
+  cmlo d0, d2, d1
+; CHECK: cmhi d0, d1, d2
+
+  cmlt d0, d2, d1
+; CHECK: cmgt d0, d1, d2
+
+  fcmle s0, s2, s1
+  fcmle d0, d2, d1
+; CHECK: fcmge s0, s1, s2
+; CHECK: fcmge d0, d1, d2
+
+  fcmlt s0, s2, s1
+  fcmlt d0, d2, d1
+; CHECK: fcmgt s0, s1, s2
+; CHECK: fcmgt d0, d1, d2
+
+  facle s0, s2, s1
+  facle d0, d2, d1
+; CHECK: facge s0, s1, s2
+; CHECK: facge d0, d1, d2
+
+  faclt s0, s2, s1
+  faclt d0, d2, d1
+; CHECK: facgt s0, s1, s2
+; CHECK: facgt d0, d1, d2
+
+;-----------------------------------------------------------------------------
+; 5.8.14 Vector Shift (immediate)
+;-----------------------------------------------------------------------------
+  sxtl v1.8h, v2.8b
+; CHECK: sshll.8h v1, v2, #0
+  sxtl.8h v1, v2
+; CHECK: sshll.8h v1, v2, #0
+
+  sxtl v1.4s, v2.4h
+; CHECK: sshll.4s v1, v2, #0
+  sxtl.4s v1, v2
+; CHECK: sshll.4s v1, v2, #0
+
+  sxtl v1.2d, v2.2s
+; CHECK: sshll.2d v1, v2, #0
+  sxtl.2d v1, v2
+; CHECK: sshll.2d v1, v2, #0
+
+  sxtl2 v1.8h, v2.16b
+; CHECK: sshll2.8h v1, v2, #0
+  sxtl2.8h v1, v2
+; CHECK: sshll2.8h v1, v2, #0
+
+  sxtl2 v1.4s, v2.8h
+; CHECK: sshll2.4s v1, v2, #0
+  sxtl2.4s v1, v2
+; CHECK: sshll2.4s v1, v2, #0
+
+  sxtl2 v1.2d, v2.4s
+; CHECK: sshll2.2d v1, v2, #0
+  sxtl2.2d v1, v2
+; CHECK: sshll2.2d v1, v2, #0
+
+  uxtl v1.8h, v2.8b
+; CHECK: ushll.8h v1, v2, #0
+  uxtl.8h v1, v2
+; CHECK: ushll.8h v1, v2, #0
+
+  uxtl v1.4s, v2.4h
+; CHECK: ushll.4s v1, v2, #0
+  uxtl.4s v1, v2
+; CHECK: ushll.4s v1, v2, #0
+
+  uxtl v1.2d, v2.2s
+; CHECK: ushll.2d v1, v2, #0
+  uxtl.2d v1, v2
+; CHECK: ushll.2d v1, v2, #0
+
+  uxtl2 v1.8h, v2.16b
+; CHECK: ushll2.8h v1, v2, #0
+  uxtl2.8h v1, v2
+; CHECK: ushll2.8h v1, v2, #0
+
+  uxtl2 v1.4s, v2.8h
+; CHECK: ushll2.4s v1, v2, #0
+  uxtl2.4s v1, v2
+; CHECK: ushll2.4s v1, v2, #0
+
+  uxtl2 v1.2d, v2.4s
+; CHECK: ushll2.2d v1, v2, #0
+  uxtl2.2d v1, v2
+; CHECK: ushll2.2d v1, v2, #0
+
+
+;-----------------------------------------------------------------------------
+; MOVI verbose syntax with shift operand omitted.
+;-----------------------------------------------------------------------------
+  movi v4.16b, #0x00
+  movi v4.16B, #0x01
+  movi v4.8b, #0x02
+  movi v4.8B, #0x03
+  movi v1.2d, #0x000000000000ff
+  movi v2.2D, #0x000000000000ff
+
+; CHECK: movi.16b	v4, #0              ; encoding: [0x04,0xe4,0x00,0x4f]
+; CHECK: movi.16b	v4, #0x1              ; encoding: [0x24,0xe4,0x00,0x4f]
+; CHECK: movi.8b	v4, #0x2               ; encoding: [0x44,0xe4,0x00,0x0f]
+; CHECK: movi.8b	v4, #0x3               ; encoding: [0x64,0xe4,0x00,0x0f]
+; CHECK: movi.2d	v1, #0x000000000000ff ; encoding: [0x21,0xe4,0x00,0x6f]
+; CHECK: movi.2d	v2, #0x000000000000ff ; encoding: [0x22,0xe4,0x00,0x6f]
diff --git a/test/MC/AArch64/arm64-arithmetic-encoding.s b/test/MC/AArch64/arm64-arithmetic-encoding.s
new file mode 100644
index 0000000..5fd5912
--- /dev/null
+++ b/test/MC/AArch64/arm64-arithmetic-encoding.s
@@ -0,0 +1,615 @@
+; RUN: llvm-mc -triple arm64-apple-darwin -mattr=neon -show-encoding < %s | FileCheck %s
+
+foo:
+;==---------------------------------------------------------------------------==
+; Add/Subtract with carry/borrow
+;==---------------------------------------------------------------------------==
+
+  adc   w1, w2, w3
+  adc   x1, x2, x3
+  adcs  w5, w4, w3
+  adcs  x5, x4, x3
+
+; CHECK: adc  w1, w2, w3             ; encoding: [0x41,0x00,0x03,0x1a]
+; CHECK: adc  x1, x2, x3             ; encoding: [0x41,0x00,0x03,0x9a]
+; CHECK: adcs w5, w4, w3             ; encoding: [0x85,0x00,0x03,0x3a]
+; CHECK: adcs x5, x4, x3             ; encoding: [0x85,0x00,0x03,0xba]
+
+  sbc   w1, w2, w3
+  sbc   x1, x2, x3
+  sbcs  w1, w2, w3
+  sbcs  x1, x2, x3
+
+; CHECK: sbc  w1, w2, w3             ; encoding: [0x41,0x00,0x03,0x5a]
+; CHECK: sbc  x1, x2, x3             ; encoding: [0x41,0x00,0x03,0xda]
+; CHECK: sbcs w1, w2, w3             ; encoding: [0x41,0x00,0x03,0x7a]
+; CHECK: sbcs x1, x2, x3             ; encoding: [0x41,0x00,0x03,0xfa]
+
+;==---------------------------------------------------------------------------==
+; Add/Subtract with (optionally shifted) immediate
+;==---------------------------------------------------------------------------==
+
+  add w3, w4, #1024
+  add w3, w4, #1024, lsl #0
+  add x3, x4, #1024
+  add x3, x4, #1024, lsl #0
+
+; CHECK: add w3, w4, #1024           ; encoding: [0x83,0x00,0x10,0x11]
+; CHECK: add w3, w4, #1024           ; encoding: [0x83,0x00,0x10,0x11]
+; CHECK: add x3, x4, #1024           ; encoding: [0x83,0x00,0x10,0x91]
+; CHECK: add x3, x4, #1024           ; encoding: [0x83,0x00,0x10,0x91]
+
+  add w3, w4, #1024, lsl #12
+  add w3, w4, #4194304
+  add w3, w4, #0, lsl #12
+  add x3, x4, #1024, lsl #12
+  add x3, x4, #4194304
+  add x3, x4, #0, lsl #12
+  add sp, sp, #32
+
+; CHECK: add w3, w4, #1024, lsl #12  ; encoding: [0x83,0x00,0x50,0x11]
+; CHECK: add w3, w4, #1024, lsl #12  ; encoding: [0x83,0x00,0x50,0x11]
+; CHECK: add w3, w4, #0, lsl #12     ; encoding: [0x83,0x00,0x40,0x11]
+; CHECK: add x3, x4, #1024, lsl #12  ; encoding: [0x83,0x00,0x50,0x91]
+; CHECK: add x3, x4, #1024, lsl #12  ; encoding: [0x83,0x00,0x50,0x91]
+; CHECK: add x3, x4, #0, lsl #12     ; encoding: [0x83,0x00,0x40,0x91]
+; CHECK: add sp, sp, #32             ; encoding: [0xff,0x83,0x00,0x91]
+
+  adds w3, w4, #1024
+  adds w3, w4, #1024, lsl #0
+  adds w3, w4, #1024, lsl #12
+  adds x3, x4, #1024
+  adds x3, x4, #1024, lsl #0
+  adds x3, x4, #1024, lsl #12
+
+; CHECK: adds w3, w4, #1024          ; encoding: [0x83,0x00,0x10,0x31]
+; CHECK: adds w3, w4, #1024          ; encoding: [0x83,0x00,0x10,0x31]
+; CHECK: adds w3, w4, #1024, lsl #12 ; encoding: [0x83,0x00,0x50,0x31]
+; CHECK: adds x3, x4, #1024          ; encoding: [0x83,0x00,0x10,0xb1]
+; CHECK: adds x3, x4, #1024          ; encoding: [0x83,0x00,0x10,0xb1]
+; CHECK: adds x3, x4, #1024, lsl #12 ; encoding: [0x83,0x00,0x50,0xb1]
+
+  sub w3, w4, #1024
+  sub w3, w4, #1024, lsl #0
+  sub w3, w4, #1024, lsl #12
+  sub x3, x4, #1024
+  sub x3, x4, #1024, lsl #0
+  sub x3, x4, #1024, lsl #12
+  sub sp, sp, #32
+
+; CHECK: sub w3, w4, #1024           ; encoding: [0x83,0x00,0x10,0x51]
+; CHECK: sub w3, w4, #1024           ; encoding: [0x83,0x00,0x10,0x51]
+; CHECK: sub w3, w4, #1024, lsl #12  ; encoding: [0x83,0x00,0x50,0x51]
+; CHECK: sub x3, x4, #1024           ; encoding: [0x83,0x00,0x10,0xd1]
+; CHECK: sub x3, x4, #1024           ; encoding: [0x83,0x00,0x10,0xd1]
+; CHECK: sub x3, x4, #1024, lsl #12  ; encoding: [0x83,0x00,0x50,0xd1]
+; CHECK: sub sp, sp, #32             ; encoding: [0xff,0x83,0x00,0xd1]
+
+  subs w3, w4, #1024
+  subs w3, w4, #1024, lsl #0
+  subs w3, w4, #1024, lsl #12
+  subs x3, x4, #1024
+  subs x3, x4, #1024, lsl #0
+  subs x3, x4, #1024, lsl #12
+
+; CHECK: subs w3, w4, #1024          ; encoding: [0x83,0x00,0x10,0x71]
+; CHECK: subs w3, w4, #1024          ; encoding: [0x83,0x00,0x10,0x71]
+; CHECK: subs w3, w4, #1024, lsl #12 ; encoding: [0x83,0x00,0x50,0x71]
+; CHECK: subs x3, x4, #1024          ; encoding: [0x83,0x00,0x10,0xf1]
+; CHECK: subs x3, x4, #1024          ; encoding: [0x83,0x00,0x10,0xf1]
+; CHECK: subs x3, x4, #1024, lsl #12 ; encoding: [0x83,0x00,0x50,0xf1]
+
+;==---------------------------------------------------------------------------==
+; Add/Subtract register with (optional) shift
+;==---------------------------------------------------------------------------==
+
+  add w12, w13, w14
+  add x12, x13, x14
+  add w12, w13, w14, lsl #12
+  add x12, x13, x14, lsl #12
+  add x12, x13, x14, lsr #42
+  add x12, x13, x14, asr #39
+
+; CHECK: add w12, w13, w14           ; encoding: [0xac,0x01,0x0e,0x0b]
+; CHECK: add x12, x13, x14           ; encoding: [0xac,0x01,0x0e,0x8b]
+; CHECK: add w12, w13, w14, lsl #12  ; encoding: [0xac,0x31,0x0e,0x0b]
+; CHECK: add x12, x13, x14, lsl #12  ; encoding: [0xac,0x31,0x0e,0x8b]
+; CHECK: add x12, x13, x14, lsr #42  ; encoding: [0xac,0xa9,0x4e,0x8b]
+; CHECK: add x12, x13, x14, asr #39  ; encoding: [0xac,0x9d,0x8e,0x8b]
+
+  sub w12, w13, w14
+  sub x12, x13, x14
+  sub w12, w13, w14, lsl #12
+  sub x12, x13, x14, lsl #12
+  sub x12, x13, x14, lsr #42
+  sub x12, x13, x14, asr #39
+
+; CHECK: sub w12, w13, w14           ; encoding: [0xac,0x01,0x0e,0x4b]
+; CHECK: sub x12, x13, x14           ; encoding: [0xac,0x01,0x0e,0xcb]
+; CHECK: sub w12, w13, w14, lsl #12  ; encoding: [0xac,0x31,0x0e,0x4b]
+; CHECK: sub x12, x13, x14, lsl #12  ; encoding: [0xac,0x31,0x0e,0xcb]
+; CHECK: sub x12, x13, x14, lsr #42  ; encoding: [0xac,0xa9,0x4e,0xcb]
+; CHECK: sub x12, x13, x14, asr #39  ; encoding: [0xac,0x9d,0x8e,0xcb]
+
+  adds w12, w13, w14
+  adds x12, x13, x14
+  adds w12, w13, w14, lsl #12
+  adds x12, x13, x14, lsl #12
+  adds x12, x13, x14, lsr #42
+  adds x12, x13, x14, asr #39
+
+; CHECK: adds w12, w13, w14          ; encoding: [0xac,0x01,0x0e,0x2b]
+; CHECK: adds x12, x13, x14          ; encoding: [0xac,0x01,0x0e,0xab]
+; CHECK: adds w12, w13, w14, lsl #12 ; encoding: [0xac,0x31,0x0e,0x2b]
+; CHECK: adds x12, x13, x14, lsl #12 ; encoding: [0xac,0x31,0x0e,0xab]
+; CHECK: adds x12, x13, x14, lsr #42 ; encoding: [0xac,0xa9,0x4e,0xab]
+; CHECK: adds x12, x13, x14, asr #39 ; encoding: [0xac,0x9d,0x8e,0xab]
+
+  subs w12, w13, w14
+  subs x12, x13, x14
+  subs w12, w13, w14, lsl #12
+  subs x12, x13, x14, lsl #12
+  subs x12, x13, x14, lsr #42
+  subs x12, x13, x14, asr #39
+
+; CHECK: subs w12, w13, w14          ; encoding: [0xac,0x01,0x0e,0x6b]
+; CHECK: subs x12, x13, x14          ; encoding: [0xac,0x01,0x0e,0xeb]
+; CHECK: subs w12, w13, w14, lsl #12 ; encoding: [0xac,0x31,0x0e,0x6b]
+; CHECK: subs x12, x13, x14, lsl #12 ; encoding: [0xac,0x31,0x0e,0xeb]
+; CHECK: subs x12, x13, x14, lsr #42 ; encoding: [0xac,0xa9,0x4e,0xeb]
+; CHECK: subs x12, x13, x14, asr #39 ; encoding: [0xac,0x9d,0x8e,0xeb]
+
+; Check use of upper case register names rdar://14354073
+  add X2, X2, X2
+; CHECK: add x2, x2, x2              ; encoding: [0x42,0x00,0x02,0x8b]
+
+;==---------------------------------------------------------------------------==
+; Add/Subtract with (optional) extend
+;==---------------------------------------------------------------------------==
+
+  add w1, w2, w3, uxtb
+  add w1, w2, w3, uxth
+  add w1, w2, w3, uxtw
+  add w1, w2, w3, uxtx
+  add w1, w2, w3, sxtb
+  add w1, w2, w3, sxth
+  add w1, w2, w3, sxtw
+  add w1, w2, w3, sxtx
+
+; CHECK: add w1, w2, w3, uxtb        ; encoding: [0x41,0x00,0x23,0x0b]
+; CHECK: add w1, w2, w3, uxth        ; encoding: [0x41,0x20,0x23,0x0b]
+; CHECK: add w1, w2, w3, uxtw        ; encoding: [0x41,0x40,0x23,0x0b]
+; CHECK: add w1, w2, w3, uxtx        ; encoding: [0x41,0x60,0x23,0x0b]
+; CHECK: add w1, w2, w3, sxtb        ; encoding: [0x41,0x80,0x23,0x0b]
+; CHECK: add w1, w2, w3, sxth        ; encoding: [0x41,0xa0,0x23,0x0b]
+; CHECK: add w1, w2, w3, sxtw        ; encoding: [0x41,0xc0,0x23,0x0b]
+; CHECK: add w1, w2, w3, sxtx        ; encoding: [0x41,0xe0,0x23,0x0b]
+
+  add x1, x2, w3, uxtb
+  add x1, x2, w3, uxth
+  add x1, x2, w3, uxtw
+  add x1, x2, w3, sxtb
+  add x1, x2, w3, sxth
+  add x1, x2, w3, sxtw
+
+; CHECK: add x1, x2, w3, uxtb        ; encoding: [0x41,0x00,0x23,0x8b]
+; CHECK: add x1, x2, w3, uxth        ; encoding: [0x41,0x20,0x23,0x8b]
+; CHECK: add x1, x2, w3, uxtw        ; encoding: [0x41,0x40,0x23,0x8b]
+; CHECK: add x1, x2, w3, sxtb        ; encoding: [0x41,0x80,0x23,0x8b]
+; CHECK: add x1, x2, w3, sxth        ; encoding: [0x41,0xa0,0x23,0x8b]
+; CHECK: add x1, x2, w3, sxtw        ; encoding: [0x41,0xc0,0x23,0x8b]
+
+  add w1, wsp, w3
+  add w1, wsp, w3, uxtw #0
+  add w2, wsp, w3, lsl #1
+  add sp, x2, x3
+  add sp, x2, x3, uxtx #0
+
+; CHECK: add w1, wsp, w3             ; encoding: [0xe1,0x43,0x23,0x0b]
+; CHECK: add w1, wsp, w3             ; encoding: [0xe1,0x43,0x23,0x0b]
+; CHECK: add w2, wsp, w3, lsl #1     ; encoding: [0xe2,0x47,0x23,0x0b]
+; CHECK: add sp, x2, x3              ; encoding: [0x5f,0x60,0x23,0x8b]
+; CHECK: add sp, x2, x3              ; encoding: [0x5f,0x60,0x23,0x8b]
+
+  sub w1, w2, w3, uxtb
+  sub w1, w2, w3, uxth
+  sub w1, w2, w3, uxtw
+  sub w1, w2, w3, uxtx
+  sub w1, w2, w3, sxtb
+  sub w1, w2, w3, sxth
+  sub w1, w2, w3, sxtw
+  sub w1, w2, w3, sxtx
+
+; CHECK: sub w1, w2, w3, uxtb        ; encoding: [0x41,0x00,0x23,0x4b]
+; CHECK: sub w1, w2, w3, uxth        ; encoding: [0x41,0x20,0x23,0x4b]
+; CHECK: sub w1, w2, w3, uxtw        ; encoding: [0x41,0x40,0x23,0x4b]
+; CHECK: sub w1, w2, w3, uxtx        ; encoding: [0x41,0x60,0x23,0x4b]
+; CHECK: sub w1, w2, w3, sxtb        ; encoding: [0x41,0x80,0x23,0x4b]
+; CHECK: sub w1, w2, w3, sxth        ; encoding: [0x41,0xa0,0x23,0x4b]
+; CHECK: sub w1, w2, w3, sxtw        ; encoding: [0x41,0xc0,0x23,0x4b]
+; CHECK: sub w1, w2, w3, sxtx        ; encoding: [0x41,0xe0,0x23,0x4b]
+
+  sub x1, x2, w3, uxtb
+  sub x1, x2, w3, uxth
+  sub x1, x2, w3, uxtw
+  sub x1, x2, w3, sxtb
+  sub x1, x2, w3, sxth
+  sub x1, x2, w3, sxtw
+
+; CHECK: sub x1, x2, w3, uxtb        ; encoding: [0x41,0x00,0x23,0xcb]
+; CHECK: sub x1, x2, w3, uxth        ; encoding: [0x41,0x20,0x23,0xcb]
+; CHECK: sub x1, x2, w3, uxtw        ; encoding: [0x41,0x40,0x23,0xcb]
+; CHECK: sub x1, x2, w3, sxtb        ; encoding: [0x41,0x80,0x23,0xcb]
+; CHECK: sub x1, x2, w3, sxth        ; encoding: [0x41,0xa0,0x23,0xcb]
+; CHECK: sub x1, x2, w3, sxtw        ; encoding: [0x41,0xc0,0x23,0xcb]
+
+  sub w1, wsp, w3
+  sub w1, wsp, w3, uxtw #0
+  sub sp, x2, x3
+  sub sp, x2, x3, uxtx #0
+  sub sp, x3, x7, lsl #4
+
+; CHECK: sub w1, wsp, w3             ; encoding: [0xe1,0x43,0x23,0x4b]
+; CHECK: sub w1, wsp, w3             ; encoding: [0xe1,0x43,0x23,0x4b]
+; CHECK: sub sp, x2, x3              ; encoding: [0x5f,0x60,0x23,0xcb]
+; CHECK: sub sp, x2, x3              ; encoding: [0x5f,0x60,0x23,0xcb]
+; CHECK: sp, x3, x7, lsl #4          ; encoding: [0x7f,0x70,0x27,0xcb]
+
+  adds w1, w2, w3, uxtb
+  adds w1, w2, w3, uxth
+  adds w1, w2, w3, uxtw
+  adds w1, w2, w3, uxtx
+  adds w1, w2, w3, sxtb
+  adds w1, w2, w3, sxth
+  adds w1, w2, w3, sxtw
+  adds w1, w2, w3, sxtx
+
+; CHECK: adds w1, w2, w3, uxtb       ; encoding: [0x41,0x00,0x23,0x2b]
+; CHECK: adds w1, w2, w3, uxth       ; encoding: [0x41,0x20,0x23,0x2b]
+; CHECK: adds w1, w2, w3, uxtw       ; encoding: [0x41,0x40,0x23,0x2b]
+; CHECK: adds w1, w2, w3, uxtx       ; encoding: [0x41,0x60,0x23,0x2b]
+; CHECK: adds w1, w2, w3, sxtb       ; encoding: [0x41,0x80,0x23,0x2b]
+; CHECK: adds w1, w2, w3, sxth       ; encoding: [0x41,0xa0,0x23,0x2b]
+; CHECK: adds w1, w2, w3, sxtw       ; encoding: [0x41,0xc0,0x23,0x2b]
+; CHECK: adds w1, w2, w3, sxtx       ; encoding: [0x41,0xe0,0x23,0x2b]
+
+  adds x1, x2, w3, uxtb
+  adds x1, x2, w3, uxth
+  adds x1, x2, w3, uxtw
+  adds x1, x2, w3, uxtx
+  adds x1, x2, w3, sxtb
+  adds x1, x2, w3, sxth
+  adds x1, x2, w3, sxtw
+  adds x1, x2, w3, sxtx
+
+; CHECK: adds x1, x2, w3, uxtb       ; encoding: [0x41,0x00,0x23,0xab]
+; CHECK: adds x1, x2, w3, uxth       ; encoding: [0x41,0x20,0x23,0xab]
+; CHECK: adds x1, x2, w3, uxtw       ; encoding: [0x41,0x40,0x23,0xab]
+; CHECK: adds x1, x2, w3, uxtx       ; encoding: [0x41,0x60,0x23,0xab]
+; CHECK: adds x1, x2, w3, sxtb       ; encoding: [0x41,0x80,0x23,0xab]
+; CHECK: adds x1, x2, w3, sxth       ; encoding: [0x41,0xa0,0x23,0xab]
+; CHECK: adds x1, x2, w3, sxtw       ; encoding: [0x41,0xc0,0x23,0xab]
+; CHECK: adds x1, x2, w3, sxtx       ; encoding: [0x41,0xe0,0x23,0xab]
+
+  adds w1, wsp, w3
+  adds w1, wsp, w3, uxtw #0
+  adds wzr, wsp, w3, lsl #4
+
+; CHECK: adds w1, wsp, w3            ; encoding: [0xe1,0x43,0x23,0x2b]
+; CHECK: adds w1, wsp, w3            ; encoding: [0xe1,0x43,0x23,0x2b]
+; CHECK: cmn wsp, w3, lsl #4         ; encoding: [0xff,0x53,0x23,0x2b]
+
+  subs w1, w2, w3, uxtb
+  subs w1, w2, w3, uxth
+  subs w1, w2, w3, uxtw
+  subs w1, w2, w3, uxtx
+  subs w1, w2, w3, sxtb
+  subs w1, w2, w3, sxth
+  subs w1, w2, w3, sxtw
+  subs w1, w2, w3, sxtx
+
+; CHECK: subs w1, w2, w3, uxtb       ; encoding: [0x41,0x00,0x23,0x6b]
+; CHECK: subs w1, w2, w3, uxth       ; encoding: [0x41,0x20,0x23,0x6b]
+; CHECK: subs w1, w2, w3, uxtw       ; encoding: [0x41,0x40,0x23,0x6b]
+; CHECK: subs w1, w2, w3, uxtx       ; encoding: [0x41,0x60,0x23,0x6b]
+; CHECK: subs w1, w2, w3, sxtb       ; encoding: [0x41,0x80,0x23,0x6b]
+; CHECK: subs w1, w2, w3, sxth       ; encoding: [0x41,0xa0,0x23,0x6b]
+; CHECK: subs w1, w2, w3, sxtw       ; encoding: [0x41,0xc0,0x23,0x6b]
+; CHECK: subs w1, w2, w3, sxtx       ; encoding: [0x41,0xe0,0x23,0x6b]
+
+  subs x1, x2, w3, uxtb
+  subs x1, x2, w3, uxth
+  subs x1, x2, w3, uxtw
+  subs x1, x2, w3, uxtx
+  subs x1, x2, w3, sxtb
+  subs x1, x2, w3, sxth
+  subs x1, x2, w3, sxtw
+  subs x1, x2, w3, sxtx
+
+; CHECK: subs x1, x2, w3, uxtb       ; encoding: [0x41,0x00,0x23,0xeb]
+; CHECK: subs x1, x2, w3, uxth       ; encoding: [0x41,0x20,0x23,0xeb]
+; CHECK: subs x1, x2, w3, uxtw       ; encoding: [0x41,0x40,0x23,0xeb]
+; CHECK: subs x1, x2, w3, uxtx       ; encoding: [0x41,0x60,0x23,0xeb]
+; CHECK: subs x1, x2, w3, sxtb       ; encoding: [0x41,0x80,0x23,0xeb]
+; CHECK: subs x1, x2, w3, sxth       ; encoding: [0x41,0xa0,0x23,0xeb]
+; CHECK: subs x1, x2, w3, sxtw       ; encoding: [0x41,0xc0,0x23,0xeb]
+; CHECK: subs x1, x2, w3, sxtx       ; encoding: [0x41,0xe0,0x23,0xeb]
+
+  subs w1, wsp, w3
+  subs w1, wsp, w3, uxtw #0
+
+; CHECK: subs w1, wsp, w3            ; encoding: [0xe1,0x43,0x23,0x6b]
+; CHECK: subs w1, wsp, w3            ; encoding: [0xe1,0x43,0x23,0x6b]
+
+  cmp wsp, w9, lsl #0
+  subs x3, sp, x9, lsl #2
+  cmp wsp, w8, uxtw
+  subs wzr, wsp, w8, uxtw
+  cmp sp, w8, uxtw
+  subs xzr, sp, w8, uxtw
+
+; CHECK: cmp wsp, w9                 ; encoding: [0xff,0x43,0x29,0x6b]
+; CHECK: subs x3, sp, x9, lsl #2     ; encoding: [0xe3,0x6b,0x29,0xeb]
+; CHECK: cmp wsp, w8                 ; encoding: [0xff,0x43,0x28,0x6b]
+; CHECK: cmp wsp, w8                 ; encoding: [0xff,0x43,0x28,0x6b]
+; CHECK: cmp sp, w8, uxtw            ; encoding: [0xff,0x43,0x28,0xeb]
+; CHECK: cmp sp, w8, uxtw            ; encoding: [0xff,0x43,0x28,0xeb]
+
+  sub wsp, w9, w8, uxtw
+  sub w1, wsp, w8, uxtw
+  sub wsp, wsp, w8, uxtw
+  sub sp, x9, w8, uxtw
+  sub x1, sp, w8, uxtw
+  sub sp, sp, w8, uxtw
+  subs w1, wsp, w8, uxtw
+  subs x1, sp, w8, uxtw
+
+; CHECK: sub wsp, w9, w8             ; encoding: [0x3f,0x41,0x28,0x4b]
+; CHECK: sub w1, wsp, w8             ; encoding: [0xe1,0x43,0x28,0x4b]
+; CHECK: sub wsp, wsp, w8            ; encoding: [0xff,0x43,0x28,0x4b]
+; CHECK: sub sp, x9, w8, uxtw        ; encoding: [0x3f,0x41,0x28,0xcb]
+; CHECK: sub x1, sp, w8, uxtw        ; encoding: [0xe1,0x43,0x28,0xcb]
+; CHECK: sub sp, sp, w8, uxtw        ; encoding: [0xff,0x43,0x28,0xcb]
+; CHECK: subs w1, wsp, w8            ; encoding: [0xe1,0x43,0x28,0x6b]
+; CHECK: subs x1, sp, w8, uxtw       ; encoding: [0xe1,0x43,0x28,0xeb]
+
+;==---------------------------------------------------------------------------==
+; Signed/Unsigned divide
+;==---------------------------------------------------------------------------==
+
+  sdiv w1, w2, w3
+  sdiv x1, x2, x3
+  udiv w1, w2, w3
+  udiv x1, x2, x3
+
+; CHECK: sdiv w1, w2, w3             ; encoding: [0x41,0x0c,0xc3,0x1a]
+; CHECK: sdiv x1, x2, x3             ; encoding: [0x41,0x0c,0xc3,0x9a]
+; CHECK: udiv w1, w2, w3             ; encoding: [0x41,0x08,0xc3,0x1a]
+; CHECK: udiv x1, x2, x3             ; encoding: [0x41,0x08,0xc3,0x9a]
+
+;==---------------------------------------------------------------------------==
+; Variable shifts
+;==---------------------------------------------------------------------------==
+
+  asrv w1, w2, w3
+  asrv x1, x2, x3
+  asr w1, w2, w3
+  asr x1, x2, x3
+  lslv w1, w2, w3
+  lslv x1, x2, x3
+  lsl w1, w2, w3
+  lsl x1, x2, x3
+  lsrv w1, w2, w3
+  lsrv x1, x2, x3
+  lsr w1, w2, w3
+  lsr x1, x2, x3
+  rorv w1, w2, w3
+  rorv x1, x2, x3
+  ror w1, w2, w3
+  ror x1, x2, x3
+
+; CHECK: encoding: [0x41,0x28,0xc3,0x1a]
+; CHECK: encoding: [0x41,0x28,0xc3,0x9a]
+; CHECK: encoding: [0x41,0x28,0xc3,0x1a]
+; CHECK: encoding: [0x41,0x28,0xc3,0x9a]
+; CHECK: encoding: [0x41,0x20,0xc3,0x1a]
+; CHECK: encoding: [0x41,0x20,0xc3,0x9a]
+; CHECK: encoding: [0x41,0x20,0xc3,0x1a]
+; CHECK: encoding: [0x41,0x20,0xc3,0x9a]
+; CHECK: encoding: [0x41,0x24,0xc3,0x1a]
+; CHECK: encoding: [0x41,0x24,0xc3,0x9a]
+; CHECK: encoding: [0x41,0x24,0xc3,0x1a]
+; CHECK: encoding: [0x41,0x24,0xc3,0x9a]
+; CHECK: encoding: [0x41,0x2c,0xc3,0x1a]
+; CHECK: encoding: [0x41,0x2c,0xc3,0x9a]
+; CHECK: encoding: [0x41,0x2c,0xc3,0x1a]
+; CHECK: encoding: [0x41,0x2c,0xc3,0x9a]
+
+;==---------------------------------------------------------------------------==
+; One operand instructions
+;==---------------------------------------------------------------------------==
+
+  cls w1, w2
+  cls x1, x2
+  clz w1, w2
+  clz x1, x2
+  rbit w1, w2
+  rbit x1, x2
+  rev w1, w2
+  rev x1, x2
+  rev16 w1, w2
+  rev16 x1, x2
+  rev32 x1, x2
+
+; CHECK: encoding: [0x41,0x14,0xc0,0x5a]
+; CHECK: encoding: [0x41,0x14,0xc0,0xda]
+; CHECK: encoding: [0x41,0x10,0xc0,0x5a]
+; CHECK: encoding: [0x41,0x10,0xc0,0xda]
+; CHECK: encoding: [0x41,0x00,0xc0,0x5a]
+; CHECK: encoding: [0x41,0x00,0xc0,0xda]
+; CHECK: encoding: [0x41,0x08,0xc0,0x5a]
+; CHECK: encoding: [0x41,0x0c,0xc0,0xda]
+; CHECK: encoding: [0x41,0x04,0xc0,0x5a]
+; CHECK: encoding: [0x41,0x04,0xc0,0xda]
+; CHECK: encoding: [0x41,0x08,0xc0,0xda]
+
+;==---------------------------------------------------------------------------==
+; 6.6.1 Multiply-add instructions
+;==---------------------------------------------------------------------------==
+
+  madd   w1, w2, w3, w4
+  madd   x1, x2, x3, x4
+  msub   w1, w2, w3, w4
+  msub   x1, x2, x3, x4
+  smaddl x1, w2, w3, x4
+  smsubl x1, w2, w3, x4
+  umaddl x1, w2, w3, x4
+  umsubl x1, w2, w3, x4
+
+; CHECK: madd   w1, w2, w3, w4       ; encoding: [0x41,0x10,0x03,0x1b]
+; CHECK: madd   x1, x2, x3, x4       ; encoding: [0x41,0x10,0x03,0x9b]
+; CHECK: msub   w1, w2, w3, w4       ; encoding: [0x41,0x90,0x03,0x1b]
+; CHECK: msub   x1, x2, x3, x4       ; encoding: [0x41,0x90,0x03,0x9b]
+; CHECK: smaddl x1, w2, w3, x4       ; encoding: [0x41,0x10,0x23,0x9b]
+; CHECK: smsubl x1, w2, w3, x4       ; encoding: [0x41,0x90,0x23,0x9b]
+; CHECK: umaddl x1, w2, w3, x4       ; encoding: [0x41,0x10,0xa3,0x9b]
+; CHECK: umsubl x1, w2, w3, x4       ; encoding: [0x41,0x90,0xa3,0x9b]
+
+;==---------------------------------------------------------------------------==
+; Multiply-high instructions
+;==---------------------------------------------------------------------------==
+
+  smulh x1, x2, x3
+  umulh x1, x2, x3
+
+; CHECK: smulh x1, x2, x3            ; encoding: [0x41,0x7c,0x43,0x9b]
+; CHECK: umulh x1, x2, x3            ; encoding: [0x41,0x7c,0xc3,0x9b]
+
+;==---------------------------------------------------------------------------==
+; Move immediate instructions
+;==---------------------------------------------------------------------------==
+
+  movz w0, #1
+  movz x0, #1
+  movz w0, #1, lsl #16
+  movz x0, #1, lsl #16
+
+; CHECK: movz w0, #0x1                 ; encoding: [0x20,0x00,0x80,0x52]
+; CHECK: movz x0, #0x1                 ; encoding: [0x20,0x00,0x80,0xd2]
+; CHECK: movz w0, #0x1, lsl #16        ; encoding: [0x20,0x00,0xa0,0x52]
+; CHECK: movz x0, #0x1, lsl #16        ; encoding: [0x20,0x00,0xa0,0xd2]
+
+  movn w0, #2
+  movn x0, #2
+  movn w0, #2, lsl #16
+  movn x0, #2, lsl #16
+
+; CHECK: movn w0, #0x2                 ; encoding: [0x40,0x00,0x80,0x12]
+; CHECK: movn x0, #0x2                 ; encoding: [0x40,0x00,0x80,0x92]
+; CHECK: movn w0, #0x2, lsl #16        ; encoding: [0x40,0x00,0xa0,0x12]
+; CHECK: movn x0, #0x2, lsl #16        ; encoding: [0x40,0x00,0xa0,0x92]
+
+  movk w0, #1
+  movk x0, #1
+  movk w0, #1, lsl #16
+  movk x0, #1, lsl #16
+
+; CHECK: movk w0, #0x1                 ; encoding: [0x20,0x00,0x80,0x72]
+; CHECK: movk x0, #0x1                 ; encoding: [0x20,0x00,0x80,0xf2]
+; CHECK: movk w0, #0x1, lsl #16        ; encoding: [0x20,0x00,0xa0,0x72]
+; CHECK: movk x0, #0x1, lsl #16        ; encoding: [0x20,0x00,0xa0,0xf2]
+
+;==---------------------------------------------------------------------------==
+; Conditionally set flags instructions
+;==---------------------------------------------------------------------------==
+
+  ccmn w1, #2, #3, eq
+  ccmn x1, #2, #3, eq
+  ccmp w1, #2, #3, eq
+  ccmp x1, #2, #3, eq
+
+; CHECK: encoding: [0x23,0x08,0x42,0x3a]
+; CHECK: encoding: [0x23,0x08,0x42,0xba]
+; CHECK: encoding: [0x23,0x08,0x42,0x7a]
+; CHECK: encoding: [0x23,0x08,0x42,0xfa]
+
+  ccmn w1, w2, #3, eq
+  ccmn x1, x2, #3, eq
+  ccmp w1, w2, #3, eq
+  ccmp x1, x2, #3, eq
+
+; CHECK: encoding: [0x23,0x00,0x42,0x3a]
+; CHECK: encoding: [0x23,0x00,0x42,0xba]
+; CHECK: encoding: [0x23,0x00,0x42,0x7a]
+; CHECK: encoding: [0x23,0x00,0x42,0xfa]
+
+;==---------------------------------------------------------------------------==
+; Conditional select instructions
+;==---------------------------------------------------------------------------==
+
+  csel w1, w2, w3, eq
+  csel x1, x2, x3, eq
+  csinc w1, w2, w3, eq
+  csinc x1, x2, x3, eq
+  csinv w1, w2, w3, eq
+  csinv x1, x2, x3, eq
+  csneg w1, w2, w3, eq
+  csneg x1, x2, x3, eq
+
+; CHECK: encoding: [0x41,0x00,0x83,0x1a]
+; CHECK: encoding: [0x41,0x00,0x83,0x9a]
+; CHECK: encoding: [0x41,0x04,0x83,0x1a]
+; CHECK: encoding: [0x41,0x04,0x83,0x9a]
+; CHECK: encoding: [0x41,0x00,0x83,0x5a]
+; CHECK: encoding: [0x41,0x00,0x83,0xda]
+; CHECK: encoding: [0x41,0x04,0x83,0x5a]
+; CHECK: encoding: [0x41,0x04,0x83,0xda]
+
+; Make sure we handle upper case, too. In particular, condition codes.
+  CSEL W16, W7, W27, EQ
+  CSEL W15, W6, W26, NE
+  CSEL W14, W5, W25, CS
+  CSEL W13, W4, W24, HS
+  csel w12, w3, w23, CC
+  csel w11, w2, w22, LO
+  csel w10, w1, w21, MI
+  csel x9, x9, x1, PL
+  csel x8, x8, x2, VS
+  CSEL X7, X7, X3, VC
+  CSEL X6, X7, X4, HI
+  CSEL X5, X6, X5, LS
+  CSEL X4, X5, X6, GE
+  csel x3, x4, x7, LT
+  csel x2, x3, x8, GT
+  csel x1, x2, x9, LE
+  csel x10, x1, x20, AL
+
+; CHECK: csel	w16, w7, w27, eq        ; encoding: [0xf0,0x00,0x9b,0x1a]
+; CHECK: csel	w15, w6, w26, ne        ; encoding: [0xcf,0x10,0x9a,0x1a]
+; CHECK: csel	w14, w5, w25, hs        ; encoding: [0xae,0x20,0x99,0x1a]
+; CHECK: csel	w13, w4, w24, hs        ; encoding: [0x8d,0x20,0x98,0x1a]
+; CHECK: csel	w12, w3, w23, lo        ; encoding: [0x6c,0x30,0x97,0x1a]
+; CHECK: csel	w11, w2, w22, lo        ; encoding: [0x4b,0x30,0x96,0x1a]
+; CHECK: csel	w10, w1, w21, mi        ; encoding: [0x2a,0x40,0x95,0x1a]
+; CHECK: csel	x9, x9, x1, pl          ; encoding: [0x29,0x51,0x81,0x9a]
+; CHECK: csel	x8, x8, x2, vs          ; encoding: [0x08,0x61,0x82,0x9a]
+; CHECK: csel	x7, x7, x3, vc          ; encoding: [0xe7,0x70,0x83,0x9a]
+; CHECK: csel	x6, x7, x4, hi          ; encoding: [0xe6,0x80,0x84,0x9a]
+; CHECK: csel	x5, x6, x5, ls          ; encoding: [0xc5,0x90,0x85,0x9a]
+; CHECK: csel	x4, x5, x6, ge          ; encoding: [0xa4,0xa0,0x86,0x9a]
+; CHECK: csel	x3, x4, x7, lt          ; encoding: [0x83,0xb0,0x87,0x9a]
+; CHECK: csel	x2, x3, x8, gt          ; encoding: [0x62,0xc0,0x88,0x9a]
+; CHECK: csel	x1, x2, x9, le          ; encoding: [0x41,0xd0,0x89,0x9a]
+; CHECK: csel	x10, x1, x20, al        ; encoding: [0x2a,0xe0,0x94,0x9a]
+
+
+;==---------------------------------------------------------------------------==
+; Scalar saturating arithmetic
+;==---------------------------------------------------------------------------==
+  uqxtn b4, h2
+  uqxtn h2, s3
+  uqxtn s9, d2
+
+; CHECK: uqxtn b4, h2                  ; encoding: [0x44,0x48,0x21,0x7e]
+; CHECK: uqxtn h2, s3                  ; encoding: [0x62,0x48,0x61,0x7e]
+; CHECK: uqxtn s9, d2                  ; encoding: [0x49,0x48,0xa1,0x7e]
diff --git a/test/MC/AArch64/arm64-arm64-fixup.s b/test/MC/AArch64/arm64-arm64-fixup.s
new file mode 100644
index 0000000..81306fb
--- /dev/null
+++ b/test/MC/AArch64/arm64-arm64-fixup.s
@@ -0,0 +1,10 @@
+; RUN: llvm-mc < %s -triple arm64-apple-darwin --show-encoding | FileCheck %s
+
+foo:
+  adr x3, Lbar
+; CHECK: adr x3, Lbar            ; encoding: [0x03'A',A,A,0x10'A']
+; CHECK: fixup A - offset: 0, value: Lbar, kind: fixup_aarch64_pcrel_adr_imm21
+Lbar:
+  adrp x3, _printf@page
+; CHECK: adrp x3, _printf@PAGE      ; encoding: [0x03'A',A,A,0x90'A']
+; CHECK: fixup A - offset: 0, value: _printf@PAGE, kind: fixup_aarch64_pcrel_adrp_imm21
diff --git a/test/MC/AArch64/arm64-basic-a64-instructions.s b/test/MC/AArch64/arm64-basic-a64-instructions.s
new file mode 100644
index 0000000..2f58ead
--- /dev/null
+++ b/test/MC/AArch64/arm64-basic-a64-instructions.s
@@ -0,0 +1,18 @@
+// RUN: llvm-mc -triple arm64 -mattr=+crc -show-encoding < %s | FileCheck %s
+
+        crc32b  w5, w7, w20
+        crc32h  w28, wzr, w30
+        crc32w  w0, w1, w2
+        crc32x  w7, w9, x20
+        crc32cb w9, w5, w4
+        crc32ch w13, w17, w25
+        crc32cw wzr, w3, w5
+        crc32cx w18, w16, xzr
+// CHECK: crc32b   w5, w7, w20             // encoding: [0xe5,0x40,0xd4,0x1a]
+// CHECK: crc32h   w28, wzr, w30           // encoding: [0xfc,0x47,0xde,0x1a]
+// CHECK: crc32w   w0, w1, w2              // encoding: [0x20,0x48,0xc2,0x1a]
+// CHECK: crc32x   w7, w9, x20             // encoding: [0x27,0x4d,0xd4,0x9a]
+// CHECK: crc32cb  w9, w5, w4              // encoding: [0xa9,0x50,0xc4,0x1a]
+// CHECK: crc32ch  w13, w17, w25           // encoding: [0x2d,0x56,0xd9,0x1a]
+// CHECK: crc32cw  wzr, w3, w5             // encoding: [0x7f,0x58,0xc5,0x1a]
+// CHECK: crc32cx  w18, w16, xzr           // encoding: [0x12,0x5e,0xdf,0x9a]
diff --git a/test/MC/AArch64/arm64-be-datalayout.s b/test/MC/AArch64/arm64-be-datalayout.s
new file mode 100644
index 0000000..f448a4b
--- /dev/null
+++ b/test/MC/AArch64/arm64-be-datalayout.s
@@ -0,0 +1,4 @@
+// RUN: llvm-mc -filetype=obj -triple arm64_be %s | llvm-readobj -section-data -sections | FileCheck %s
+
+// CHECK: 0000: 00123456 789ABCDE
+foo:    .xword 0x123456789abcde
diff --git a/test/MC/AArch64/arm64-bitfield-encoding.s b/test/MC/AArch64/arm64-bitfield-encoding.s
new file mode 100644
index 0000000..1589aa7
--- /dev/null
+++ b/test/MC/AArch64/arm64-bitfield-encoding.s
@@ -0,0 +1,38 @@
+; RUN: llvm-mc -triple arm64-apple-darwin -show-encoding < %s | FileCheck %s
+
+foo:
+;==---------------------------------------------------------------------------==
+; 5.4.4 Bitfield Operations
+;==---------------------------------------------------------------------------==
+
+  bfm  w1, w2, #1, #15
+  bfm  x1, x2, #1, #15
+  sbfm w1, w2, #1, #15
+  sbfm x1, x2, #1, #15
+  ubfm w1, w2, #1, #15
+  ubfm x1, x2, #1, #15
+  sbfiz wzr, w0, #31, #1
+  sbfiz xzr, x0, #31, #1
+  ubfiz wzr, w0, #31, #1
+  ubfiz xzr, x0, #31, #1
+
+; CHECK: bfxil w1, w2, #1, #15       ; encoding: [0x41,0x3c,0x01,0x33]
+; CHECK: bfxil x1, x2, #1, #15       ; encoding: [0x41,0x3c,0x41,0xb3]
+; CHECK: sbfx w1, w2, #1, #15        ; encoding: [0x41,0x3c,0x01,0x13]
+; CHECK: sbfx x1, x2, #1, #15        ; encoding: [0x41,0x3c,0x41,0x93]
+; CHECK: ubfx w1, w2, #1, #15        ; encoding: [0x41,0x3c,0x01,0x53]
+; CHECK: ubfx x1, x2, #1, #15        ; encoding: [0x41,0x3c,0x41,0xd3]
+; CHECK: sbfiz wzr, w0, #31, #1      ; encoding: [0x1f,0x00,0x01,0x13]
+; CHECK: sbfiz xzr, x0, #31, #1      ; encoding: [0x1f,0x00,0x61,0x93]
+; CHECK: lsl  wzr, w0, #31           ; encoding: [0x1f,0x00,0x01,0x53]
+; CHECK: ubfiz xzr, x0, #31, #1      ; encoding: [0x1f,0x00,0x61,0xd3]
+
+;==---------------------------------------------------------------------------==
+; 5.4.5 Extract (immediate)
+;==---------------------------------------------------------------------------==
+
+  extr w1, w2, w3, #15
+  extr x2, x3, x4, #1
+
+; CHECK: extr w1, w2, w3, #15        ; encoding: [0x41,0x3c,0x83,0x13]
+; CHECK: extr x2, x3, x4, #1         ; encoding: [0x62,0x04,0xc4,0x93]
diff --git a/test/MC/AArch64/arm64-branch-encoding.s b/test/MC/AArch64/arm64-branch-encoding.s
new file mode 100644
index 0000000..48c2099
--- /dev/null
+++ b/test/MC/AArch64/arm64-branch-encoding.s
@@ -0,0 +1,159 @@
+; RUN: llvm-mc -triple arm64-apple-darwin -show-encoding < %s | FileCheck %s
+
+foo:
+
+;-----------------------------------------------------------------------------
+; Unconditional branch (register) instructions.
+;-----------------------------------------------------------------------------
+
+  ret
+; CHECK: encoding: [0xc0,0x03,0x5f,0xd6]
+  ret x1
+; CHECK: encoding: [0x20,0x00,0x5f,0xd6]
+  drps
+; CHECK: encoding: [0xe0,0x03,0xbf,0xd6]
+  eret
+; CHECK: encoding: [0xe0,0x03,0x9f,0xd6]
+  br  x5
+; CHECK: encoding: [0xa0,0x00,0x1f,0xd6]
+  blr x9
+; CHECK: encoding: [0x20,0x01,0x3f,0xd6]
+  bl  L1
+; CHECK: bl L1   ; encoding: [A,A,A,0b100101AA]
+; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_call26
+
+;-----------------------------------------------------------------------------
+; Contitional branch instructions.
+;-----------------------------------------------------------------------------
+
+  b     L1
+; CHECK: b L1      ; encoding: [A,A,A,0b000101AA]
+; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_branch26
+  b.eq  L1
+; CHECK: b.eq L1   ; encoding: [0bAAA00000,A,A,0x54]
+; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_branch19
+  b.ne  L1
+; CHECK: b.ne L1   ; encoding: [0bAAA00001,A,A,0x54]
+; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_branch19
+  b.cs  L1
+; CHECK: b.hs L1   ; encoding: [0bAAA00010,A,A,0x54]
+; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_branch19
+  b.cc  L1
+; CHECK: b.lo L1   ; encoding: [0bAAA00011,A,A,0x54]
+; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_branch19
+  b.mi  L1
+; CHECK: b.mi L1   ; encoding: [0bAAA00100,A,A,0x54]
+; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_branch19
+  b.pl  L1
+; CHECK: b.pl L1   ; encoding: [0bAAA00101,A,A,0x54]
+; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_branch19
+  b.vs  L1
+; CHECK: b.vs L1   ; encoding: [0bAAA00110,A,A,0x54]
+; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_branch19
+  b.vc  L1
+; CHECK: b.vc L1   ; encoding: [0bAAA00111,A,A,0x54]
+; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_branch19
+  b.hi  L1
+; CHECK: b.hi L1   ; encoding: [0bAAA01000,A,A,0x54]
+; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_branch19
+  b.ls  L1
+; CHECK: b.ls L1   ; encoding: [0bAAA01001,A,A,0x54]
+; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_branch19
+  b.ge  L1
+; CHECK: b.ge L1   ; encoding: [0bAAA01010,A,A,0x54]
+; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_branch19
+  b.lt  L1
+; CHECK: b.lt L1   ; encoding: [0bAAA01011,A,A,0x54]
+; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_branch19
+  b.gt  L1
+; CHECK: b.gt L1   ; encoding: [0bAAA01100,A,A,0x54]
+; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_branch19
+  b.le  L1
+; CHECK: b.le L1   ; encoding: [0bAAA01101,A,A,0x54]
+; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_branch19
+  b.al  L1
+; CHECK: b.al L1      ; encoding: [0bAAA01110,A,A,0x54]
+; CHECK: fixup A - offset: 0, value: L1, kind: fixup_aarch64_pcrel_branch19
+L1:
+  b #28
+; CHECK: b #28
+  b.lt #28
+; CHECK: b.lt #28
+  b.cc #1048572
+; CHECK: b.lo	#1048572                ; encoding: [0xe3,0xff,0x7f,0x54]
+  b #134217724
+; CHECK: b	#134217724              ; encoding: [0xff,0xff,0xff,0x15]
+  b #-134217728
+; CHECK: b	#-134217728             ; encoding: [0x00,0x00,0x00,0x16]
+
+;-----------------------------------------------------------------------------
+; Compare-and-branch instructions.
+;-----------------------------------------------------------------------------
+
+  cbz w1, foo
+; CHECK: encoding: [0bAAA00001,A,A,0x34]
+  cbz x1, foo
+; CHECK: encoding: [0bAAA00001,A,A,0xb4]
+  cbnz w2, foo
+; CHECK: encoding: [0bAAA00010,A,A,0x35]
+  cbnz x2, foo
+; CHECK: encoding: [0bAAA00010,A,A,0xb5]
+  cbz w1, #28
+; CHECK: cbz w1, #28
+  cbz     w20, #1048572
+; CHECK: cbz	w20, #1048572           ; encoding: [0xf4,0xff,0x7f,0x34]
+  cbnz x2, #-1048576
+; CHECK: cbnz	x2, #-1048576           ; encoding: [0x02,0x00,0x80,0xb5]
+
+
+;-----------------------------------------------------------------------------
+; Bit-test-and-branch instructions.
+;-----------------------------------------------------------------------------
+
+  tbz x1, #3, foo
+; CHECK: encoding: [0bAAA00001,A,0b00011AAA,0x36]
+  tbnz x1, #63, foo
+; CHECK: encoding: [0bAAA00001,A,0b11111AAA,0xb7]
+
+  tbz w1, #3, foo
+; CHECK: encoding: [0bAAA00001,A,0b00011AAA,0x36]
+  tbnz w1, #31, foo
+; CHECK: encoding: [0bAAA00001,A,0b11111AAA,0x37]
+
+  tbz w1, #3, #28
+; CHECK: tbz w1, #3, #28
+  tbz w3, #5, #32764
+; CHECK: tbz	w3, #5, #32764          ; encoding: [0xe3,0xff,0x2b,0x36]
+  tbnz x3, #8, #-32768
+; CHECK: tbnz	w3, #8, #-32768         ; encoding: [0x03,0x00,0x44,0x37]
+
+;-----------------------------------------------------------------------------
+; Exception generation instructions.
+;-----------------------------------------------------------------------------
+
+  brk   #1
+; CHECK: encoding: [0x20,0x00,0x20,0xd4]
+  dcps1 #2
+; CHECK: encoding: [0x41,0x00,0xa0,0xd4]
+  dcps2 #3
+; CHECK: encoding: [0x62,0x00,0xa0,0xd4]
+  dcps3 #4
+; CHECK: encoding: [0x83,0x00,0xa0,0xd4]
+  hlt   #5
+; CHECK: encoding: [0xa0,0x00,0x40,0xd4]
+  hvc   #6
+; CHECK: encoding: [0xc2,0x00,0x00,0xd4]
+  smc   #7
+; CHECK: encoding: [0xe3,0x00,0x00,0xd4]
+  svc   #8
+; CHECK: encoding: [0x01,0x01,0x00,0xd4]
+
+; The immediate defaults to zero for DCPSn
+  dcps1
+  dcps2
+  dcps3
+
+; CHECK: dcps1                     ; encoding: [0x01,0x00,0xa0,0xd4]
+; CHECK: dcps2                     ; encoding: [0x02,0x00,0xa0,0xd4]
+; CHECK: dcps3                     ; encoding: [0x03,0x00,0xa0,0xd4]
+
diff --git a/test/MC/AArch64/arm64-condbr-without-dots.s b/test/MC/AArch64/arm64-condbr-without-dots.s
new file mode 100644
index 0000000..2a9f7a7
--- /dev/null
+++ b/test/MC/AArch64/arm64-condbr-without-dots.s
@@ -0,0 +1,37 @@
+// RUN: llvm-mc -triple arm64-apple-ios -o - %s | FileCheck %s
+        
+        beq lbl
+        bne lbl
+        bcs lbl
+        bhs lbl
+        blo lbl
+        bcc lbl
+        bmi lbl
+        bpl lbl
+        bvs lbl
+        bvc lbl
+        bhi lbl
+        bls lbl
+        bge lbl
+        blt lbl
+        bgt lbl
+        ble lbl
+        bal lbl
+
+// CHECK: b.eq lbl
+// CHECK: b.ne lbl
+// CHECK: b.hs lbl
+// CHECK: b.hs lbl
+// CHECK: b.lo lbl
+// CHECK: b.lo lbl
+// CHECK: b.mi lbl
+// CHECK: b.pl lbl
+// CHECK: b.vs lbl
+// CHECK: b.vc lbl
+// CHECK: b.hi lbl
+// CHECK: b.ls lbl
+// CHECK: b.ge lbl
+// CHECK: b.lt lbl
+// CHECK: b.gt lbl
+// CHECK: b.le lbl
+// CHECK: b.al lbl
diff --git a/test/MC/AArch64/arm64-crypto.s b/test/MC/AArch64/arm64-crypto.s
new file mode 100644
index 0000000..51efd21
--- /dev/null
+++ b/test/MC/AArch64/arm64-crypto.s
@@ -0,0 +1,66 @@
+; RUN: llvm-mc -triple arm64-apple-darwin -mattr=crypto -show-encoding -output-asm-variant=1 < %s | FileCheck %s
+
+foo:
+  aese.16b v0, v1
+  aesd.16b v0, v1
+  aesmc.16b v0, v1
+  aesimc.16b v0, v1
+
+  sha1c.4s q0, s1, v2
+  sha1p.4s q0, s1, v2
+  sha1m.4s q0, s1, v2
+  sha1su0.4s v0, v1, v2
+  sha256h.4s q0, q1, v2
+  sha256h2.4s q0, q1, v2
+  sha256su1.4s v0, v1, v2
+  sha1h s0, s1
+  sha1su1.4s v0, v1
+  sha256su0.4s v0, v1
+
+; CHECK: aese.16b v0, v1               ; encoding: [0x20,0x48,0x28,0x4e]
+; CHECK: aesd.16b v0, v1               ; encoding: [0x20,0x58,0x28,0x4e]
+; CHECK: aesmc.16b v0, v1              ; encoding: [0x20,0x68,0x28,0x4e]
+; CHECK: aesimc.16b v0, v1             ; encoding: [0x20,0x78,0x28,0x4e]
+
+; CHECK: sha1c.4s q0, s1, v2           ; encoding: [0x20,0x00,0x02,0x5e]
+; CHECK: sha1p.4s q0, s1, v2           ; encoding: [0x20,0x10,0x02,0x5e]
+; CHECK: sha1m.4s q0, s1, v2           ; encoding: [0x20,0x20,0x02,0x5e]
+; CHECK: sha1su0.4s v0, v1, v2         ; encoding: [0x20,0x30,0x02,0x5e]
+; CHECK: sha256h.4s q0, q1, v2         ; encoding: [0x20,0x40,0x02,0x5e]
+; CHECK: sha256h2.4s q0, q1, v2        ; encoding: [0x20,0x50,0x02,0x5e]
+; CHECK: sha256su1.4s v0, v1, v2       ; encoding: [0x20,0x60,0x02,0x5e]
+; CHECK: sha1h s0, s1                  ; encoding: [0x20,0x08,0x28,0x5e]
+; CHECK: sha1su1.4s v0, v1             ; encoding: [0x20,0x18,0x28,0x5e]
+; CHECK: sha256su0.4s v0, v1           ; encoding: [0x20,0x28,0x28,0x5e]
+
+  aese v2.16b, v3.16b
+  aesd v5.16b, v7.16b
+  aesmc v11.16b, v13.16b
+  aesimc v17.16b, v19.16b
+
+; CHECK: aese.16b v2, v3            ; encoding: [0x62,0x48,0x28,0x4e]
+; CHECK: aesd.16b v5, v7            ; encoding: [0xe5,0x58,0x28,0x4e]
+; CHECK: aesmc.16b v11, v13         ; encoding: [0xab,0x69,0x28,0x4e]
+; CHECK: aesimc.16b v17, v19        ; encoding: [0x71,0x7a,0x28,0x4e]
+
+  sha1c q23, s29, v3.4s
+  sha1p q14, s15, v9.4s
+  sha1m q2, s6, v5.4s
+  sha1su0 v3.4s, v5.4s, v9.4s
+  sha256h q2, q7, v18.4s
+  sha256h2 q28, q18, v28.4s
+  sha256su1 v4.4s, v5.4s, v9.4s
+  sha1h s30, s0
+  sha1su1 v10.4s, v21.4s
+  sha256su0 v2.4s, v31.4s
+
+; CHECK: sha1c.4s q23, s29, v3       ; encoding: [0xb7,0x03,0x03,0x5e]
+; CHECK: sha1p.4s q14, s15, v9       ; encoding: [0xee,0x11,0x09,0x5e]
+; CHECK: sha1m.4s q2, s6, v5         ; encoding: [0xc2,0x20,0x05,0x5e]
+; CHECK: sha1su0.4s v3, v5, v9       ; encoding: [0xa3,0x30,0x09,0x5e]
+; CHECK: sha256h.4s q2, q7, v18      ; encoding: [0xe2,0x40,0x12,0x5e]
+; CHECK: sha256h2.4s q28, q18, v28   ; encoding: [0x5c,0x52,0x1c,0x5e]
+; CHECK: sha256su1.4s v4, v5, v9     ; encoding: [0xa4,0x60,0x09,0x5e]
+; CHECK: sha1h s30, s0               ; encoding: [0x1e,0x08,0x28,0x5e]
+; CHECK: sha1su1.4s v10, v21         ; encoding: [0xaa,0x1a,0x28,0x5e]
+; CHECK: sha256su0.4s v2, v31        ; encoding: [0xe2,0x2b,0x28,0x5e]
diff --git a/test/MC/AArch64/arm64-diagno-predicate.s b/test/MC/AArch64/arm64-diagno-predicate.s
new file mode 100644
index 0000000..3b757e8
--- /dev/null
+++ b/test/MC/AArch64/arm64-diagno-predicate.s
@@ -0,0 +1,24 @@
+// RUN: not llvm-mc  -triple arm64-linux-gnu -mattr=-fp-armv8,-crc < %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s
+
+
+        fcvt d0, s0
+// CHECK-ERROR: error: instruction requires: fp-armv8
+// CHECK-ERROR-NEXT:        fcvt d0, s0
+// CHECK-ERROR-NEXT:        ^
+
+        fmla v9.2s, v9.2s, v0.2s
+// CHECK-ERROR: error: instruction requires: neon
+// CHECK-ERROR-NEXT:        fmla v9.2s, v9.2s, v0.2s
+// CHECK-ERROR-NEXT:        ^
+
+        pmull v0.1q, v1.1d, v2.1d
+// CHECK-ERROR: error: instruction requires: crypto
+// CHECK-ERROR-NEXT:        pmull v0.1q, v1.1d, v2.1d
+// CHECK-ERROR-NEXT:        ^
+
+        crc32b  w5, w7, w20
+// CHECK-ERROR: error: instruction requires: crc
+// CHECK-ERROR-NEXT:        crc32b  w5, w7, w20
+// CHECK-ERROR-NEXT:        ^
+
diff --git a/test/MC/AArch64/arm64-diags.s b/test/MC/AArch64/arm64-diags.s
new file mode 100644
index 0000000..cf00e98
--- /dev/null
+++ b/test/MC/AArch64/arm64-diags.s
@@ -0,0 +1,392 @@
+; RUN: not llvm-mc -triple arm64-apple-darwin -show-encoding < %s 2> %t | FileCheck %s
+; RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
+
+foo:
+
+; The first should encode as an expression. The second should error expecting
+; a register.
+  ldr x3, (foo + 4)
+  ldr x3, [foo + 4]
+; CHECK:  ldr x3, foo+4               ; encoding: [0bAAA00011,A,A,0x58]
+; CHECK:                              ;   fixup A - offset: 0, value: foo+4, kind: fixup_aarch64_ldr_pcrel_imm19
+; CHECK-ERRORS: error: invalid operand for instruction
+
+; The last argument should be flagged as an error.  rdar://9576009
+  ld4.8b	{v0, v1, v2, v3}, [x0], #33
+; CHECK-ERRORS: error: invalid operand for instruction
+; CHECK-ERRORS: ld4.8b	{v0, v1, v2, v3}, [x0], #33
+
+
+        ldr x0, [x0, #804]
+        ldr w0, [x0, #802]
+        ldr x0, [x0, #804]!
+        ldr w0, [w0, #301]!
+        ldr x0, [x0], #804
+        ldr w0, [w0], #301
+
+        ldp w3, w4, [x5, #11]!
+        ldp x3, x4, [x5, #12]!
+        ldp q3, q4, [x5, #12]!
+        ldp w3, w4, [x5], #11
+        ldp x3, x4, [x5], #12
+        ldp q3, q4, [x5], #12
+
+        ldur x0, [x1, #-257]
+
+; CHECK-ERRORS: error: index must be an integer in range [-256, 255].
+; CHECK-ERRORS:         ldr x0, [x0, #804]
+; CHECK-ERRORS:                 ^
+; CHECK-ERRORS: error: index must be an integer in range [-256, 255].
+; CHECK-ERRORS:         ldr w0, [x0, #802]
+; CHECK-ERRORS:                 ^
+; CHECK-ERRORS: error: index must be an integer in range [-256, 255].
+; CHECK-ERRORS:         ldr x0, [x0, #804]!
+; CHECK-ERRORS:                 ^
+; CHECK-ERRORS: error: invalid operand for instruction
+; CHECK-ERRORS:         ldr w0, [w0, #301]!
+; CHECK-ERRORS:                  ^
+; CHECK-ERRORS: error: index must be an integer in range [-256, 255].
+; CHECK-ERRORS:         ldr x0, [x0], #804
+; CHECK-ERRORS:                       ^
+; CHECK-ERRORS: error: invalid operand for instruction
+; CHECK-ERRORS:         ldr w0, [w0], #301
+; CHECK-ERRORS:                  ^
+; CHECK-ERRORS: error: index must be a multiple of 4 in range [-256, 252].
+; CHECK-ERRORS:         ldp w3, w4, [x5, #11]!
+; CHECK-ERRORS:                     ^
+; CHECK-ERRORS: error: index must be a multiple of 8 in range [-512, 504].
+; CHECK-ERRORS:         ldp x3, x4, [x5, #12]!
+; CHECK-ERRORS:                     ^
+; CHECK-ERRORS: error: index must be a multiple of 16 in range [-1024, 1008].
+; CHECK-ERRORS:         ldp q3, q4, [x5, #12]!
+; CHECK-ERRORS:                     ^
+; CHECK-ERRORS: error: index must be a multiple of 4 in range [-256, 252].
+; CHECK-ERRORS:         ldp w3, w4, [x5], #11
+; CHECK-ERRORS:                           ^
+; CHECK-ERRORS: error: index must be a multiple of 8 in range [-512, 504].
+; CHECK-ERRORS:         ldp x3, x4, [x5], #12
+; CHECK-ERRORS:                           ^
+; CHECK-ERRORS: error: index must be a multiple of 16 in range [-1024, 1008].
+; CHECK-ERRORS:         ldp q3, q4, [x5], #12
+; CHECK-ERRORS:                           ^
+; CHECK-ERRORS: error: index must be an integer in range [-256, 255].
+; CHECK-ERRORS:         ldur x0, [x1, #-257]
+; CHECK-ERRORS:                   ^
+
+
+ldrb   w1, [x3, w3, sxtw #4]
+ldrh   w1, [x3, w3, sxtw #4]
+ldr    w1, [x3, w3, sxtw #4]
+ldr    x1, [x3, w3, sxtw #4]
+ldr    b1, [x3, w3, sxtw #4]
+ldr    h1, [x3, w3, sxtw #4]
+ldr    s1, [x3, w3, sxtw #4]
+ldr    d1, [x3, w3, sxtw #4]
+ldr    q1, [x3, w3, sxtw #1]
+
+; CHECK-ERRORS: error: expected 'uxtw' or 'sxtw' with optional shift of #0
+; CHECK-ERRORS:ldrb   w1, [x3, w3, sxtw #4]
+; CHECK-ERRORS:           ^
+; CHECK-ERRORS: error: expected 'uxtw' or 'sxtw' with optional shift of #0 or #1
+; CHECK-ERRORS:ldrh   w1, [x3, w3, sxtw #4]
+; CHECK-ERRORS:           ^
+; CHECK-ERRORS: error: expected 'uxtw' or 'sxtw' with optional shift of #0 or #2
+; CHECK-ERRORS:ldr    w1, [x3, w3, sxtw #4]
+; CHECK-ERRORS:           ^
+; CHECK-ERRORS: error: expected 'uxtw' or 'sxtw' with optional shift of #0 or #3
+; CHECK-ERRORS:ldr    x1, [x3, w3, sxtw #4]
+; CHECK-ERRORS:           ^
+; CHECK-ERRORS: error: expected 'uxtw' or 'sxtw' with optional shift of #0
+; CHECK-ERRORS:ldr    b1, [x3, w3, sxtw #4]
+; CHECK-ERRORS:           ^
+; CHECK-ERRORS: error: expected 'uxtw' or 'sxtw' with optional shift of #0 or #1
+; CHECK-ERRORS:ldr    h1, [x3, w3, sxtw #4]
+; CHECK-ERRORS:           ^
+; CHECK-ERRORS: error: expected 'uxtw' or 'sxtw' with optional shift of #0 or #2
+; CHECK-ERRORS:ldr    s1, [x3, w3, sxtw #4]
+; CHECK-ERRORS:           ^
+; CHECK-ERRORS: error: expected 'uxtw' or 'sxtw' with optional shift of #0 or #3
+; CHECK-ERRORS:ldr    d1, [x3, w3, sxtw #4]
+; CHECK-ERRORS:           ^
+; CHECK-ERRORS: error: expected 'uxtw' or 'sxtw' with optional shift of #0 or #4
+; CHECK-ERRORS:ldr    q1, [x3, w3, sxtw #1]
+; CHECK-ERRORS:           ^
+
+; Check that register offset addressing modes only accept 32-bit offset
+; registers when using uxtw/sxtw extends. Everything else requires a 64-bit
+; register.
+  str    d1, [x3, w3, sxtx #3]
+  ldr    s1, [x3, d3, sxtx #2]
+
+; CHECK-ERRORS: error: expected 'uxtw' or 'sxtw' with optional shift of #0 or #3
+; CHECK-ERRORS:   str    d1, [x3, w3, sxtx #3]
+; CHECK-ERRORS:                       ^
+; CHECK-ERRORS: error: index must be an integer in range [-256, 255].
+; CHECK-ERRORS:   ldr    s1, [x3, d3, sxtx #2]
+; CHECK-ERRORS:                   ^
+
+; Shift immediates range checking.
+  sqrshrn b4, h9, #10
+  rshrn v9.8b, v11.8h, #17
+  sqrshrn v7.4h, v8.4s, #39
+  uqshrn2 v4.4s, v5.2d, #67
+
+; CHECK-ERRORS: error: immediate must be an integer in range [1, 8].
+; CHECK-ERRORS:   sqrshrn b4, h9, #10
+; CHECK-ERRORS:                   ^
+; CHECK-ERRORS: error: immediate must be an integer in range [1, 8].
+; CHECK-ERRORS:   rshrn v9.8b, v11.8h, #17
+; CHECK-ERRORS:                        ^
+; CHECK-ERRORS: error: immediate must be an integer in range [1, 16].
+; CHECK-ERRORS:   sqrshrn v7.4h, v8.4s, #39
+; CHECK-ERRORS:                         ^
+; CHECK-ERRORS: error: immediate must be an integer in range [1, 32].
+; CHECK-ERRORS:   uqshrn2 v4.4s, v5.2d, #67
+; CHECK-ERRORS:                         ^
+
+
+  st1.s4 {v14, v15}, [x2], #32
+; CHECK-ERRORS: error: invalid type suffix for instruction
+; CHECK-ERRORS: st1.s4 {v14, v15}, [x2], #32
+; CHECK-ERRORS:     ^
+
+
+
+; Load pair instructions where Rt==Rt2 and writeback load/store instructions
+; where Rt==Rn or Rt2==Rn are unpredicatable.
+  ldp x1, x2, [x2], #16
+  ldp x2, x2, [x2], #16
+  ldp w1, w2, [x2], #16
+  ldp w2, w2, [x2], #16
+  ldp x1, x1, [x2]
+
+  ldr x2, [x2], #8
+  ldr x2, [x2, #8]!
+  ldr w2, [x2], #8
+  ldr w2, [x2, #8]!
+
+  str x2, [x2], #8
+  str x2, [x2, #8]!
+  str w2, [x2], #8
+  str w2, [x2, #8]!
+
+; CHECK-ERRORS: error: unpredictable LDP instruction, writeback base is also a destination
+; CHECK-ERRORS:   ldp x1, x2, [x2], #16
+; CHECK-ERRORS:           ^
+; CHECK-ERRORS: error: unpredictable LDP instruction, writeback base is also a destination
+; CHECK-ERRORS:   ldp x2, x2, [x2], #16
+; CHECK-ERRORS:       ^
+; CHECK-ERRORS: error: unpredictable LDP instruction, writeback base is also a destination
+; CHECK-ERRORS:   ldp w1, w2, [x2], #16
+; CHECK-ERRORS:           ^
+; CHECK-ERRORS: error: unpredictable LDP instruction, writeback base is also a destination
+; CHECK-ERRORS:   ldp w2, w2, [x2], #16
+; CHECK-ERRORS:       ^
+; CHECK-ERRORS: error: unpredictable LDP instruction, Rt2==Rt
+; CHECK-ERRORS:   ldp x1, x1, [x2]
+; CHECK-ERRORS:           ^
+; CHECK-ERRORS: error: unpredictable LDR instruction, writeback base is also a source
+; CHECK-ERRORS:   ldr x2, [x2], #8
+; CHECK-ERRORS:       ^
+; CHECK-ERRORS: error: unpredictable LDR instruction, writeback base is also a source
+; CHECK-ERRORS:   ldr x2, [x2, #8]!
+; CHECK-ERRORS:       ^
+; CHECK-ERRORS: error: unpredictable LDR instruction, writeback base is also a source
+; CHECK-ERRORS:   ldr w2, [x2], #8
+; CHECK-ERRORS:       ^
+; CHECK-ERRORS: error: unpredictable LDR instruction, writeback base is also a source
+; CHECK-ERRORS:   ldr w2, [x2, #8]!
+; CHECK-ERRORS:       ^
+; CHECK-ERRORS: error: unpredictable STR instruction, writeback base is also a source
+; CHECK-ERRORS:   str x2, [x2], #8
+; CHECK-ERRORS:       ^
+; CHECK-ERRORS: error: unpredictable STR instruction, writeback base is also a source
+; CHECK-ERRORS:   str x2, [x2, #8]!
+; CHECK-ERRORS:       ^
+; CHECK-ERRORS: error: unpredictable STR instruction, writeback base is also a source
+; CHECK-ERRORS:   str w2, [x2], #8
+; CHECK-ERRORS:       ^
+; CHECK-ERRORS: error: unpredictable STR instruction, writeback base is also a source
+; CHECK-ERRORS:   str w2, [x2, #8]!
+; CHECK-ERRORS:       ^
+
+; The validity checking for shifted-immediate operands.  rdar://13174476
+; Where the immediate is out of range.
+  add w1, w2, w3, lsr #75
+
+; CHECK-ERRORS: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+; CHECK-ERRORS: add w1, w2, w3, lsr #75
+; CHECK-ERRORS:                      ^
+
+; logical instructions on 32-bit regs with shift > 31 is not legal
+orr w0, w0, w0, lsl #32
+; CHECK-ERRORS: error: expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 31]
+; CHECK-ERRORS:        orr w0, w0, w0, lsl #32
+; CHECK-ERRORS:                        ^
+eor w0, w0, w0, lsl #32
+; CHECK-ERRORS: error: expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 31]
+; CHECK-ERRORS:        eor w0, w0, w0, lsl #32
+; CHECK-ERRORS:                        ^
+and w0, w0, w0, lsl #32
+; CHECK-ERRORS: error: expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 31]
+; CHECK-ERRORS:        and w0, w0, w0, lsl #32
+; CHECK-ERRORS:                        ^
+ands w0, w0, w0, lsl #32
+; CHECK-ERRORS: error: expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 31]
+; CHECK-ERRORS:        ands w0, w0, w0, lsl #32
+; CHECK-ERRORS:                        ^
+
+; Relocated expressions should not be accepted for 32-bit adds or sub (imm)
+add w3, w5, sym@PAGEOFF
+; CHECK-ERRORS: error: invalid immediate expression
+; CHECK-ERRORS: add w3, w5, sym@PAGEOFF
+; CHECK-ERRORS:             ^
+
+adds w3, w5, sym@PAGEOFF
+adds x9, x12, sym@PAGEOFF
+; CHECK-ERRORS: error: invalid immediate expression
+; CHECK-ERRORS: adds w3, w5, sym@PAGEOFF
+; CHECK-ERRORS:              ^
+; CHECK-ERRORS: error: invalid immediate expression
+; CHECK-ERRORS: adds x9, x12, sym@PAGEOFF
+; CHECK-ERRORS:               ^
+
+sub x3, x5, sym@PAGEOFF
+sub w20, w30, sym@PAGEOFF
+; CHECK-ERRORS: error: invalid immediate expression
+; CHECK-ERRORS: sub x3, x5, sym@PAGEOFF
+; CHECK-ERRORS:             ^
+; CHECK-ERRORS: error: invalid immediate expression
+; CHECK-ERRORS: sub w20, w30, sym@PAGEOFF
+; CHECK-ERRORS:               ^
+
+subs w9, w10, sym@PAGEOFF
+subs x20, x30, sym@PAGEOFF
+; CHECK-ERRORS: error: invalid immediate expression
+; CHECK-ERRORS: subs w9, w10, sym@PAGEOFF
+; CHECK-ERRORS:               ^
+; CHECK-ERRORS: error: invalid immediate expression
+; CHECK-ERRORS: subs x20, x30, sym@PAGEOFF
+; CHECK-ERRORS:                ^
+
+tbl v0.8b, { v1 }, v0.8b
+tbl v0.16b, { v1.8b, v2.8b, v3.8b }, v0.16b
+tbx v3.16b, { v12.8b, v13.8b, v14.8b }, v6.8b
+tbx v2.8b, { v0 }, v6.8b
+; CHECK-ERRORS: error: invalid operand for instruction
+; CHECK-ERRORS: tbl v0.8b, { v1 }, v0.8b
+; CHECK-ERRORS:            ^
+; CHECK-ERRORS: error: invalid operand for instruction
+; CHECK-ERRORS: tbl v0.16b, { v1.8b, v2.8b, v3.8b }, v0.16b
+; CHECK-ERRORS:             ^
+; CHECK-ERRORS: error: invalid operand for instruction
+; CHECK-ERRORS: tbx v3.16b, { v12.8b, v13.8b, v14.8b }, v6.8b
+; CHECK-ERRORS:             ^
+; CHECK-ERRORS: error: invalid operand for instruction
+; CHECK-ERRORS: tbx v2.8b, { v0 }, v6.8b
+; CHECK-ERRORS:            ^
+
+b.c #0x4
+; CHECK-ERRORS: error: invalid condition code
+; CHECK-ERRORS: b.c #0x4
+; CHECK-ERRORS:   ^
+
+ic ialluis, x0
+; CHECK-ERRORS: error: specified ic op does not use a register
+ic iallu, x0
+; CHECK-ERRORS: error: specified ic op does not use a register
+ic ivau
+; CHECK-ERRORS: error: specified ic op requires a register
+
+dc zva
+; CHECK-ERRORS: error: specified dc op requires a register
+dc ivac
+; CHECK-ERRORS: error: specified dc op requires a register
+dc isw
+; CHECK-ERRORS: error: specified dc op requires a register
+dc cvac
+; CHECK-ERRORS: error: specified dc op requires a register
+dc csw
+; CHECK-ERRORS: error: specified dc op requires a register
+dc cvau
+; CHECK-ERRORS: error: specified dc op requires a register
+dc civac
+; CHECK-ERRORS: error: specified dc op requires a register
+dc cisw
+; CHECK-ERRORS: error: specified dc op requires a register
+
+at s1e1r
+; CHECK-ERRORS: error: specified at op requires a register
+at s1e2r
+; CHECK-ERRORS: error: specified at op requires a register
+at s1e3r
+; CHECK-ERRORS: error: specified at op requires a register
+at s1e1w
+; CHECK-ERRORS: error: specified at op requires a register
+at s1e2w
+; CHECK-ERRORS: error: specified at op requires a register
+at s1e3w
+; CHECK-ERRORS: error: specified at op requires a register
+at s1e0r
+; CHECK-ERRORS: error: specified at op requires a register
+at s1e0w
+; CHECK-ERRORS: error: specified at op requires a register
+at s12e1r
+; CHECK-ERRORS: error: specified at op requires a register
+at s12e1w
+; CHECK-ERRORS: error: specified at op requires a register
+at s12e0r
+; CHECK-ERRORS: error: specified at op requires a register
+at s12e0w
+; CHECK-ERRORS: error: specified at op requires a register
+
+tlbi vmalle1is, x0
+; CHECK-ERRORS: error: specified tlbi op does not use a register
+tlbi vmalle1, x0
+; CHECK-ERRORS: error: specified tlbi op does not use a register
+tlbi alle1is, x0
+; CHECK-ERRORS: error: specified tlbi op does not use a register
+tlbi alle2is, x0
+; CHECK-ERRORS: error: specified tlbi op does not use a register
+tlbi alle3is, x0
+; CHECK-ERRORS: error: specified tlbi op does not use a register
+tlbi alle1, x0
+; CHECK-ERRORS: error: specified tlbi op does not use a register
+tlbi alle2, x0
+; CHECK-ERRORS: error: specified tlbi op does not use a register
+tlbi alle3, x0
+; CHECK-ERRORS: error: specified tlbi op does not use a register
+tlbi vae1is
+; CHECK-ERRORS: error: specified tlbi op requires a register
+tlbi vae2is
+; CHECK-ERRORS: error: specified tlbi op requires a register
+tlbi vae3is
+; CHECK-ERRORS: error: specified tlbi op requires a register
+tlbi aside1is
+; CHECK-ERRORS: error: specified tlbi op requires a register
+tlbi vaae1is
+; CHECK-ERRORS: error: specified tlbi op requires a register
+tlbi vale1is
+; CHECK-ERRORS: error: specified tlbi op requires a register
+tlbi vaale1is
+; CHECK-ERRORS: error: specified tlbi op requires a register
+tlbi vale2is
+; CHECK-ERRORS: error: specified tlbi op requires a register
+tlbi vale3is
+; CHECK-ERRORS: error: specified tlbi op requires a register
+tlbi vae1
+; CHECK-ERRORS: error: specified tlbi op requires a register
+tlbi vae2
+; CHECK-ERRORS: error: specified tlbi op requires a register
+tlbi vae3
+; CHECK-ERRORS: error: specified tlbi op requires a register
+tlbi aside1
+; CHECK-ERRORS: error: specified tlbi op requires a register
+tlbi vaae1
+; CHECK-ERRORS: error: specified tlbi op requires a register
+tlbi vale1
+; CHECK-ERRORS: error: specified tlbi op requires a register
+tlbi vale2
+; CHECK-ERRORS: error: specified tlbi op requires a register
+tlbi vale3
+; CHECK-ERRORS: error: specified tlbi op requires a register
diff --git a/test/MC/ARM64/directive_loh.s b/test/MC/AArch64/arm64-directive_loh.s
index 76d2d7f..76d2d7f 100644
--- a/test/MC/ARM64/directive_loh.s
+++ b/test/MC/AArch64/arm64-directive_loh.s
diff --git a/test/MC/AArch64/arm64-elf-reloc-condbr.s b/test/MC/AArch64/arm64-elf-reloc-condbr.s
new file mode 100644
index 0000000..9b70a20
--- /dev/null
+++ b/test/MC/AArch64/arm64-elf-reloc-condbr.s
@@ -0,0 +1,10 @@
+// RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj %s -o - | \
+// RUN:   llvm-readobj -r | FileCheck -check-prefix=OBJ %s
+
+        b.eq somewhere
+
+// OBJ:      Relocations [
+// OBJ-NEXT:   Section (2) .rela.text {
+// OBJ-NEXT:     0x0 R_AARCH64_CONDBR19 somewhere 0x0
+// OBJ-NEXT:   }
+// OBJ-NEXT: ]
diff --git a/test/MC/AArch64/arm64-elf-relocs.s b/test/MC/AArch64/arm64-elf-relocs.s
new file mode 100644
index 0000000..eb22cc2
--- /dev/null
+++ b/test/MC/AArch64/arm64-elf-relocs.s
@@ -0,0 +1,249 @@
+// RUN: llvm-mc -triple=arm64-linux-gnu -o - < %s | FileCheck %s
+// RUN: llvm-mc -triple=arm64-linux-gnu -filetype=obj < %s | llvm-objdump -triple=arm64-linux-gnu - -r | FileCheck %s --check-prefix=CHECK-OBJ
+
+   add x0, x2, #:lo12:sym
+// CHECK: add x0, x2, :lo12:sym
+// CHECK-OBJ: 0 R_AARCH64_ADD_ABS_LO12_NC sym
+
+   add x5, x7, #:dtprel_lo12:sym
+// CHECK: add x5, x7, :dtprel_lo12:sym
+// CHECK-OBJ: 4 R_AARCH64_TLSLD_ADD_DTPREL_LO12 sym
+
+   add x9, x12, #:dtprel_lo12_nc:sym
+// CHECK: add x9, x12, :dtprel_lo12_nc:sym
+// CHECK-OBJ: 8 R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC sym
+
+   add x20, x30, #:tprel_lo12:sym
+// CHECK: add x20, x30, :tprel_lo12:sym
+// CHECK-OBJ: c R_AARCH64_TLSLE_ADD_TPREL_LO12 sym
+
+   add x9, x12, #:tprel_lo12_nc:sym
+// CHECK: add x9, x12, :tprel_lo12_nc:sym
+// CHECK-OBJ: 10 R_AARCH64_TLSLE_ADD_TPREL_LO12_NC sym
+
+   add x5, x0, #:tlsdesc_lo12:sym
+// CHECK: add x5, x0, :tlsdesc_lo12:sym
+// CHECK-OBJ: 14 R_AARCH64_TLSDESC_ADD_LO12_NC sym
+
+        add x0, x2, #:lo12:sym+8
+// CHECK: add x0, x2, :lo12:sym
+// CHECK-OBJ: 18 R_AARCH64_ADD_ABS_LO12_NC sym+8
+
+   add x5, x7, #:dtprel_lo12:sym+1
+// CHECK: add x5, x7, :dtprel_lo12:sym+1
+// CHECK-OBJ: 1c R_AARCH64_TLSLD_ADD_DTPREL_LO12 sym+1
+
+   add x9, x12, #:dtprel_lo12_nc:sym+2
+// CHECK: add x9, x12, :dtprel_lo12_nc:sym+2
+// CHECK-OBJ:20 R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC sym+2
+
+   add x20, x30, #:tprel_lo12:sym+12
+// CHECK: add x20, x30, :tprel_lo12:sym+12
+// CHECK-OBJ: 24 R_AARCH64_TLSLE_ADD_TPREL_LO12 sym+12
+
+   add x9, x12, #:tprel_lo12_nc:sym+54
+// CHECK: add x9, x12, :tprel_lo12_nc:sym+54
+// CHECK-OBJ: 28 R_AARCH64_TLSLE_ADD_TPREL_LO12_NC sym+54
+
+   add x5, x0, #:tlsdesc_lo12:sym+70
+// CHECK: add x5, x0, :tlsdesc_lo12:sym+70
+// CHECK-OBJ: 2c R_AARCH64_TLSDESC_ADD_LO12_NC sym+70
+
+        .hword sym + 4 - .
+// CHECK-OBJ: 30 R_AARCH64_PREL16 sym+4
+        .word sym - . + 8
+// CHECK-OBJ: 32 R_AARCH64_PREL32 sym+8
+        .xword sym-.
+// CHECK-OBJ: 36 R_AARCH64_PREL64 sym{{$}}
+
+        .hword sym
+// CHECK-OBJ: 3e R_AARCH64_ABS16 sym
+        .word sym+1
+// CHECK-OBJ: 40 R_AARCH64_ABS32 sym+1
+        .xword sym+16
+// CHECK-OBJ: 44 R_AARCH64_ABS64 sym+16
+
+   adrp x0, sym
+// CHECK: adrp x0, sym
+// CHECK-OBJ: 4c R_AARCH64_ADR_PREL_PG_HI21 sym
+
+   adrp x15, :got:sym
+// CHECK: adrp x15, :got:sym
+// CHECK-OBJ: 50 R_AARCH64_ADR_GOT_PAGE sym
+
+   adrp x29, :gottprel:sym
+// CHECK: adrp x29, :gottprel:sym
+// CHECK-OBJ: 54 R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 sym
+
+   adrp x2, :tlsdesc:sym
+// CHECK: adrp x2, :tlsdesc:sym
+// CHECK-OBJ: 58 R_AARCH64_TLSDESC_ADR_PAGE sym
+
+   // LLVM is not competent enough to do this relocation because the
+   // page boundary could occur anywhere after linking. A relocation
+   // is needed.
+   adrp x3, trickQuestion
+   .global trickQuestion
+trickQuestion:
+// CHECK: adrp x3, trickQuestion
+// CHECK-OBJ: 5c R_AARCH64_ADR_PREL_PG_HI21 trickQuestion
+
+   ldrb w2, [x3, :lo12:sym]
+   ldrsb w5, [x7, #:lo12:sym]
+   ldrsb x11, [x13, :lo12:sym]
+   ldr b17, [x19, #:lo12:sym]
+// CHECK: ldrb w2, [x3, :lo12:sym]
+// CHECK: ldrsb w5, [x7, :lo12:sym]
+// CHECK: ldrsb x11, [x13, :lo12:sym]
+// CHECK: ldr b17, [x19, :lo12:sym]
+// CHECK-OBJ: R_AARCH64_LDST8_ABS_LO12_NC sym
+// CHECK-OBJ: R_AARCH64_LDST8_ABS_LO12_NC sym
+// CHECK-OBJ: R_AARCH64_LDST8_ABS_LO12_NC sym
+// CHECK-OBJ: R_AARCH64_LDST8_ABS_LO12_NC sym
+
+   ldrb w23, [x29, #:dtprel_lo12_nc:sym]
+   ldrsb w23, [x19, #:dtprel_lo12:sym]
+   ldrsb x17, [x13, :dtprel_lo12_nc:sym]
+   ldr b11, [x7, #:dtprel_lo12:sym]
+// CHECK: ldrb w23, [x29, :dtprel_lo12_nc:sym]
+// CHECK: ldrsb w23, [x19, :dtprel_lo12:sym]
+// CHECK: ldrsb x17, [x13, :dtprel_lo12_nc:sym]
+// CHECK: ldr b11, [x7, :dtprel_lo12:sym]
+// CHECK-OBJ: R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC sym
+// CHECK-OBJ: R_AARCH64_TLSLD_LDST8_DTPREL_LO12 sym
+// CHECK-OBJ: R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC sym
+// CHECK-OBJ: R_AARCH64_TLSLD_LDST8_DTPREL_LO12 sym
+
+   ldrb w1, [x2, :tprel_lo12:sym]
+   ldrsb w3, [x4, #:tprel_lo12_nc:sym]
+   ldrsb x5, [x6, :tprel_lo12:sym]
+   ldr b7, [x8, #:tprel_lo12_nc:sym]
+// CHECK: ldrb w1, [x2, :tprel_lo12:sym]
+// CHECK: ldrsb w3, [x4, :tprel_lo12_nc:sym]
+// CHECK: ldrsb x5, [x6, :tprel_lo12:sym]
+// CHECK: ldr b7, [x8, :tprel_lo12_nc:sym]
+// CHECK-OBJ: R_AARCH64_TLSLE_LDST8_TPREL_LO12 sym
+// CHECK-OBJ: R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC sym
+// CHECK-OBJ: R_AARCH64_TLSLE_LDST8_TPREL_LO12 sym
+// CHECK-OBJ: R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC sym
+
+   ldrh w2, [x3, #:lo12:sym]
+   ldrsh w5, [x7, :lo12:sym]
+   ldrsh x11, [x13, #:lo12:sym]
+   ldr h17, [x19, :lo12:sym]
+// CHECK: ldrh w2, [x3, :lo12:sym]
+// CHECK: ldrsh w5, [x7, :lo12:sym]
+// CHECK: ldrsh x11, [x13, :lo12:sym]
+// CHECK: ldr h17, [x19, :lo12:sym]
+// CHECK-OBJ: R_AARCH64_LDST16_ABS_LO12_NC sym
+// CHECK-OBJ: R_AARCH64_LDST16_ABS_LO12_NC sym
+// CHECK-OBJ: R_AARCH64_LDST16_ABS_LO12_NC sym
+// CHECK-OBJ: R_AARCH64_LDST16_ABS_LO12_NC sym
+
+   ldrh w23, [x29, #:dtprel_lo12_nc:sym]
+   ldrsh w23, [x19, :dtprel_lo12:sym]
+   ldrsh x17, [x13, :dtprel_lo12_nc:sym]
+   ldr h11, [x7, #:dtprel_lo12:sym]
+// CHECK: ldrh w23, [x29, :dtprel_lo12_nc:sym]
+// CHECK: ldrsh w23, [x19, :dtprel_lo12:sym]
+// CHECK: ldrsh x17, [x13, :dtprel_lo12_nc:sym]
+// CHECK: ldr h11, [x7, :dtprel_lo12:sym]
+// CHECK-OBJ: R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC sym
+// CHECK-OBJ: R_AARCH64_TLSLD_LDST16_DTPREL_LO12 sym
+// CHECK-OBJ: R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC sym
+// CHECK-OBJ: R_AARCH64_TLSLD_LDST16_DTPREL_LO12 sym
+
+   ldrh w1, [x2, :tprel_lo12:sym]
+   ldrsh w3, [x4, #:tprel_lo12_nc:sym]
+   ldrsh x5, [x6, :tprel_lo12:sym]
+   ldr h7, [x8, #:tprel_lo12_nc:sym]
+// CHECK: ldrh w1, [x2, :tprel_lo12:sym]
+// CHECK: ldrsh w3, [x4, :tprel_lo12_nc:sym]
+// CHECK: ldrsh x5, [x6, :tprel_lo12:sym]
+// CHECK: ldr h7, [x8, :tprel_lo12_nc:sym]
+// CHECK-OBJ: R_AARCH64_TLSLE_LDST16_TPREL_LO12 sym
+// CHECK-OBJ: R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC sym
+// CHECK-OBJ: R_AARCH64_TLSLE_LDST16_TPREL_LO12 sym
+// CHECK-OBJ: R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC sym
+
+   ldr w1, [x2, #:lo12:sym]
+   ldrsw x3, [x4, #:lo12:sym]
+   ldr s4, [x5, :lo12:sym]
+// CHECK: ldr w1, [x2, :lo12:sym]
+// CHECK: ldrsw x3, [x4, :lo12:sym]
+// CHECK: ldr s4, [x5, :lo12:sym]
+// CHECK-OBJ: R_AARCH64_LDST32_ABS_LO12_NC sym
+// CHECK-OBJ: R_AARCH64_LDST32_ABS_LO12_NC sym
+// CHECK-OBJ: R_AARCH64_LDST32_ABS_LO12_NC sym
+
+   ldr w1, [x2, :dtprel_lo12:sym]
+   ldrsw x3, [x4, #:dtprel_lo12_nc:sym]
+   ldr s4, [x5, #:dtprel_lo12_nc:sym]
+// CHECK: ldr w1, [x2, :dtprel_lo12:sym]
+// CHECK: ldrsw x3, [x4, :dtprel_lo12_nc:sym]
+// CHECK: ldr s4, [x5, :dtprel_lo12_nc:sym]
+// CHECK-OBJ: R_AARCH64_TLSLD_LDST32_DTPREL_LO12 sym
+// CHECK-OBJ: R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC sym
+// CHECK-OBJ: R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC sym
+
+
+   ldr w1, [x2, #:tprel_lo12:sym]
+   ldrsw x3, [x4, :tprel_lo12_nc:sym]
+   ldr s4, [x5, :tprel_lo12_nc:sym]
+// CHECK: ldr w1, [x2, :tprel_lo12:sym]
+// CHECK: ldrsw x3, [x4, :tprel_lo12_nc:sym]
+// CHECK: ldr s4, [x5, :tprel_lo12_nc:sym]
+// CHECK-OBJ: R_AARCH64_TLSLE_LDST32_TPREL_LO12 sym
+// CHECK-OBJ: R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC sym
+// CHECK-OBJ: R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC sym
+
+   ldr x28, [x27, :lo12:sym]
+   ldr d26, [x25, #:lo12:sym]
+// CHECK: ldr x28, [x27, :lo12:sym]
+// CHECK: ldr d26, [x25, :lo12:sym]
+// CHECK-OBJ: R_AARCH64_LDST64_ABS_LO12_NC sym
+// CHECK-OBJ: R_AARCH64_LDST64_ABS_LO12_NC sym
+
+   ldr x24, [x23, #:got_lo12:sym]
+   ldr d22, [x21, :got_lo12:sym]
+// CHECK: ldr x24, [x23, :got_lo12:sym]
+// CHECK: ldr d22, [x21, :got_lo12:sym]
+// CHECK-OBJ: R_AARCH64_LD64_GOT_LO12_NC sym
+// CHECK-OBJ: R_AARCH64_LD64_GOT_LO12_NC sym
+
+   ldr x24, [x23, :dtprel_lo12_nc:sym]
+   ldr d22, [x21, #:dtprel_lo12:sym]
+// CHECK: ldr x24, [x23, :dtprel_lo12_nc:sym]
+// CHECK: ldr d22, [x21, :dtprel_lo12:sym]
+// CHECK-OBJ: R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC sym
+// CHECK-OBJ: R_AARCH64_TLSLD_LDST64_DTPREL_LO12 sym
+
+   ldr x24, [x23, #:tprel_lo12:sym]
+   ldr d22, [x21, :tprel_lo12_nc:sym]
+// CHECK: ldr x24, [x23, :tprel_lo12:sym]
+// CHECK: ldr d22, [x21, :tprel_lo12_nc:sym]
+// CHECK-OBJ: R_AARCH64_TLSLE_LDST64_TPREL_LO12 sym
+// CHECK-OBJ: R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC sym
+
+   ldr x24, [x23, :gottprel_lo12:sym]
+   ldr d22, [x21, #:gottprel_lo12:sym]
+// CHECK: ldr x24, [x23, :gottprel_lo12:sym]
+// CHECK: ldr d22, [x21, :gottprel_lo12:sym]
+// CHECK-OBJ: R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC sym
+// CHECK-OBJ: R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC sym
+
+   ldr x24, [x23, #:tlsdesc_lo12:sym]
+   ldr d22, [x21, :tlsdesc_lo12:sym]
+// CHECK: ldr x24, [x23, :tlsdesc_lo12:sym]
+// CHECK: ldr d22, [x21, :tlsdesc_lo12:sym]
+// CHECK-OBJ: R_AARCH64_TLSDESC_LD64_LO12_NC sym
+// CHECK-OBJ: R_AARCH64_TLSDESC_LD64_LO12_NC sym
+
+   ldr q20, [x19, #:lo12:sym]
+// CHECK: ldr q20, [x19, :lo12:sym]
+// CHECK-OBJ: R_AARCH64_LDST128_ABS_LO12_NC sym
+
+// Since relocated instructions print without a '#', that syntax should
+// certainly be accepted when assembling.
+   add x3, x5, :lo12:imm
+// CHECK: add x3, x5, :lo12:imm
diff --git a/test/MC/AArch64/arm64-fp-encoding.s b/test/MC/AArch64/arm64-fp-encoding.s
new file mode 100644
index 0000000..684d988
--- /dev/null
+++ b/test/MC/AArch64/arm64-fp-encoding.s
@@ -0,0 +1,443 @@
+; RUN: llvm-mc -triple arm64-apple-darwin -mattr=neon -show-encoding -output-asm-variant=1 < %s | FileCheck %s
+
+foo:
+;-----------------------------------------------------------------------------
+; Floating-point arithmetic
+;-----------------------------------------------------------------------------
+
+  fabs s1, s2
+  fabs d1, d2
+
+; CHECK: fabs s1, s2                 ; encoding: [0x41,0xc0,0x20,0x1e]
+; CHECK: fabs d1, d2                 ; encoding: [0x41,0xc0,0x60,0x1e]
+
+  fadd s1, s2, s3
+  fadd d1, d2, d3
+
+; CHECK: fadd s1, s2, s3             ; encoding: [0x41,0x28,0x23,0x1e]
+; CHECK: fadd d1, d2, d3             ; encoding: [0x41,0x28,0x63,0x1e]
+
+  fdiv s1, s2, s3
+  fdiv d1, d2, d3
+
+; CHECK: fdiv s1, s2, s3             ; encoding: [0x41,0x18,0x23,0x1e]
+; CHECK: fdiv d1, d2, d3             ; encoding: [0x41,0x18,0x63,0x1e]
+
+  fmadd s1, s2, s3, s4
+  fmadd d1, d2, d3, d4
+
+; CHECK: fmadd s1, s2, s3, s4        ; encoding: [0x41,0x10,0x03,0x1f]
+; CHECK: fmadd d1, d2, d3, d4        ; encoding: [0x41,0x10,0x43,0x1f]
+
+  fmax   s1, s2, s3
+  fmax   d1, d2, d3
+  fmaxnm s1, s2, s3
+  fmaxnm d1, d2, d3
+
+; CHECK: fmax   s1, s2, s3           ; encoding: [0x41,0x48,0x23,0x1e]
+; CHECK: fmax   d1, d2, d3           ; encoding: [0x41,0x48,0x63,0x1e]
+; CHECK: fmaxnm s1, s2, s3           ; encoding: [0x41,0x68,0x23,0x1e]
+; CHECK: fmaxnm d1, d2, d3           ; encoding: [0x41,0x68,0x63,0x1e]
+
+  fmin   s1, s2, s3
+  fmin   d1, d2, d3
+  fminnm s1, s2, s3
+  fminnm d1, d2, d3
+
+; CHECK: fmin   s1, s2, s3           ; encoding: [0x41,0x58,0x23,0x1e]
+; CHECK: fmin   d1, d2, d3           ; encoding: [0x41,0x58,0x63,0x1e]
+; CHECK: fminnm s1, s2, s3           ; encoding: [0x41,0x78,0x23,0x1e]
+; CHECK: fminnm d1, d2, d3           ; encoding: [0x41,0x78,0x63,0x1e]
+
+  fmsub s1, s2, s3, s4
+  fmsub d1, d2, d3, d4
+
+; CHECK: fmsub s1, s2, s3, s4        ; encoding: [0x41,0x90,0x03,0x1f]
+; CHECK: fmsub d1, d2, d3, d4        ; encoding: [0x41,0x90,0x43,0x1f]
+
+  fmul s1, s2, s3
+  fmul d1, d2, d3
+
+; CHECK: fmul s1, s2, s3             ; encoding: [0x41,0x08,0x23,0x1e]
+; CHECK: fmul d1, d2, d3             ; encoding: [0x41,0x08,0x63,0x1e]
+
+  fneg s1, s2
+  fneg d1, d2
+
+; CHECK: fneg s1, s2                 ; encoding: [0x41,0x40,0x21,0x1e]
+; CHECK: fneg d1, d2                 ; encoding: [0x41,0x40,0x61,0x1e]
+
+  fnmadd s1, s2, s3, s4
+  fnmadd d1, d2, d3, d4
+
+; CHECK: fnmadd s1, s2, s3, s4       ; encoding: [0x41,0x10,0x23,0x1f]
+; CHECK: fnmadd d1, d2, d3, d4       ; encoding: [0x41,0x10,0x63,0x1f]
+
+  fnmsub s1, s2, s3, s4
+  fnmsub d1, d2, d3, d4
+
+; CHECK: fnmsub s1, s2, s3, s4       ; encoding: [0x41,0x90,0x23,0x1f]
+; CHECK: fnmsub d1, d2, d3, d4       ; encoding: [0x41,0x90,0x63,0x1f]
+
+  fnmul s1, s2, s3
+  fnmul d1, d2, d3
+
+; CHECK: fnmul s1, s2, s3            ; encoding: [0x41,0x88,0x23,0x1e]
+; CHECK: fnmul d1, d2, d3            ; encoding: [0x41,0x88,0x63,0x1e]
+
+  fsqrt s1, s2
+  fsqrt d1, d2
+
+; CHECK: fsqrt s1, s2                ; encoding: [0x41,0xc0,0x21,0x1e]
+; CHECK: fsqrt d1, d2                ; encoding: [0x41,0xc0,0x61,0x1e]
+
+  fsub s1, s2, s3
+  fsub d1, d2, d3
+
+; CHECK: fsub s1, s2, s3             ; encoding: [0x41,0x38,0x23,0x1e]
+; CHECK: fsub d1, d2, d3             ; encoding: [0x41,0x38,0x63,0x1e]
+
+;-----------------------------------------------------------------------------
+; Floating-point comparison
+;-----------------------------------------------------------------------------
+
+  fccmp  s1, s2, #0, eq
+  fccmp  d1, d2, #0, eq
+  fccmpe s1, s2, #0, eq
+  fccmpe d1, d2, #0, eq
+
+; CHECK: fccmp  s1, s2, #0, eq       ; encoding: [0x20,0x04,0x22,0x1e]
+; CHECK: fccmp  d1, d2, #0, eq       ; encoding: [0x20,0x04,0x62,0x1e]
+; CHECK: fccmpe s1, s2, #0, eq       ; encoding: [0x30,0x04,0x22,0x1e]
+; CHECK: fccmpe d1, d2, #0, eq       ; encoding: [0x30,0x04,0x62,0x1e]
+
+  fcmp  s1, s2
+  fcmp  d1, d2
+  fcmp  s1, #0.0
+  fcmp  d1, #0.0
+  fcmpe s1, s2
+  fcmpe d1, d2
+  fcmpe s1, #0.0
+  fcmpe d1, #0.0
+
+; CHECK: fcmp  s1, s2                ; encoding: [0x20,0x20,0x22,0x1e]
+; CHECK: fcmp  d1, d2                ; encoding: [0x20,0x20,0x62,0x1e]
+; CHECK: fcmp  s1, #0.0              ; encoding: [0x28,0x20,0x20,0x1e]
+; CHECK: fcmp  d1, #0.0              ; encoding: [0x28,0x20,0x60,0x1e]
+; CHECK: fcmpe s1, s2                ; encoding: [0x30,0x20,0x22,0x1e]
+; CHECK: fcmpe d1, d2                ; encoding: [0x30,0x20,0x62,0x1e]
+; CHECK: fcmpe s1, #0.0              ; encoding: [0x38,0x20,0x20,0x1e]
+; CHECK: fcmpe d1, #0.0              ; encoding: [0x38,0x20,0x60,0x1e]
+
+;-----------------------------------------------------------------------------
+; Floating-point conditional select
+;-----------------------------------------------------------------------------
+
+  fcsel s1, s2, s3, eq
+  fcsel d1, d2, d3, eq
+
+; CHECK: fcsel s1, s2, s3, eq        ; encoding: [0x41,0x0c,0x23,0x1e]
+; CHECK: fcsel d1, d2, d3, eq        ; encoding: [0x41,0x0c,0x63,0x1e]
+
+;-----------------------------------------------------------------------------
+; Floating-point convert
+;-----------------------------------------------------------------------------
+
+  fcvt h1, d2
+  fcvt s1, d2
+  fcvt d1, h2
+  fcvt s1, h2
+  fcvt d1, s2
+  fcvt h1, s2
+
+; CHECK: fcvt h1, d2                 ; encoding: [0x41,0xc0,0x63,0x1e]
+; CHECK: fcvt s1, d2                 ; encoding: [0x41,0x40,0x62,0x1e]
+; CHECK: fcvt d1, h2                 ; encoding: [0x41,0xc0,0xe2,0x1e]
+; CHECK: fcvt s1, h2                 ; encoding: [0x41,0x40,0xe2,0x1e]
+; CHECK: fcvt d1, s2                 ; encoding: [0x41,0xc0,0x22,0x1e]
+; CHECK: fcvt h1, s2                 ; encoding: [0x41,0xc0,0x23,0x1e]
+
+  fcvtas w1, d2
+  fcvtas x1, d2
+  fcvtas w1, s2
+  fcvtas x1, s2
+
+; CHECK: fcvtas	w1, d2                  ; encoding: [0x41,0x00,0x64,0x1e]
+; CHECK: fcvtas	x1, d2                  ; encoding: [0x41,0x00,0x64,0x9e]
+; CHECK: fcvtas	w1, s2                  ; encoding: [0x41,0x00,0x24,0x1e]
+; CHECK: fcvtas	x1, s2                  ; encoding: [0x41,0x00,0x24,0x9e]
+
+  fcvtau w1, s2
+  fcvtau w1, d2
+  fcvtau x1, s2
+  fcvtau x1, d2
+
+; CHECK: fcvtau	w1, s2                  ; encoding: [0x41,0x00,0x25,0x1e]
+; CHECK: fcvtau	w1, d2                  ; encoding: [0x41,0x00,0x65,0x1e]
+; CHECK: fcvtau	x1, s2                  ; encoding: [0x41,0x00,0x25,0x9e]
+; CHECK: fcvtau	x1, d2                  ; encoding: [0x41,0x00,0x65,0x9e]
+
+  fcvtms w1, s2
+  fcvtms w1, d2
+  fcvtms x1, s2
+  fcvtms x1, d2
+
+; CHECK: fcvtms	w1, s2                  ; encoding: [0x41,0x00,0x30,0x1e]
+; CHECK: fcvtms	w1, d2                  ; encoding: [0x41,0x00,0x70,0x1e]
+; CHECK: fcvtms	x1, s2                  ; encoding: [0x41,0x00,0x30,0x9e]
+; CHECK: fcvtms	x1, d2                  ; encoding: [0x41,0x00,0x70,0x9e]
+
+  fcvtmu w1, s2
+  fcvtmu w1, d2
+  fcvtmu x1, s2
+  fcvtmu x1, d2
+
+; CHECK: fcvtmu	w1, s2                  ; encoding: [0x41,0x00,0x31,0x1e]
+; CHECK: fcvtmu	w1, d2                  ; encoding: [0x41,0x00,0x71,0x1e]
+; CHECK: fcvtmu	x1, s2                  ; encoding: [0x41,0x00,0x31,0x9e]
+; CHECK: fcvtmu	x1, d2                  ; encoding: [0x41,0x00,0x71,0x9e]
+
+  fcvtns w1, s2
+  fcvtns w1, d2
+  fcvtns x1, s2
+  fcvtns x1, d2
+
+; CHECK: fcvtns	w1, s2                  ; encoding: [0x41,0x00,0x20,0x1e]
+; CHECK: fcvtns	w1, d2                  ; encoding: [0x41,0x00,0x60,0x1e]
+; CHECK: fcvtns	x1, s2                  ; encoding: [0x41,0x00,0x20,0x9e]
+; CHECK: fcvtns	x1, d2                  ; encoding: [0x41,0x00,0x60,0x9e]
+
+  fcvtnu w1, s2
+  fcvtnu w1, d2
+  fcvtnu x1, s2
+  fcvtnu x1, d2
+
+; CHECK: fcvtnu	w1, s2                  ; encoding: [0x41,0x00,0x21,0x1e]
+; CHECK: fcvtnu	w1, d2                  ; encoding: [0x41,0x00,0x61,0x1e]
+; CHECK: fcvtnu	x1, s2                  ; encoding: [0x41,0x00,0x21,0x9e]
+; CHECK: fcvtnu	x1, d2                  ; encoding: [0x41,0x00,0x61,0x9e]
+
+  fcvtps w1, s2
+  fcvtps w1, d2
+  fcvtps x1, s2
+  fcvtps x1, d2
+
+; CHECK: fcvtps	w1, s2                  ; encoding: [0x41,0x00,0x28,0x1e]
+; CHECK: fcvtps	w1, d2                  ; encoding: [0x41,0x00,0x68,0x1e]
+; CHECK: fcvtps	x1, s2                  ; encoding: [0x41,0x00,0x28,0x9e]
+; CHECK: fcvtps	x1, d2                  ; encoding: [0x41,0x00,0x68,0x9e]
+
+  fcvtpu w1, s2
+  fcvtpu w1, d2
+  fcvtpu x1, s2
+  fcvtpu x1, d2
+
+; CHECK: fcvtpu	w1, s2                  ; encoding: [0x41,0x00,0x29,0x1e]
+; CHECK: fcvtpu	w1, d2                  ; encoding: [0x41,0x00,0x69,0x1e]
+; CHECK: fcvtpu	x1, s2                  ; encoding: [0x41,0x00,0x29,0x9e]
+; CHECK: fcvtpu	x1, d2                  ; encoding: [0x41,0x00,0x69,0x9e]
+
+  fcvtzs w1, s2
+  fcvtzs w1, s2, #1
+  fcvtzs w1, d2
+  fcvtzs w1, d2, #1
+  fcvtzs x1, s2
+  fcvtzs x1, s2, #1
+  fcvtzs x1, d2
+  fcvtzs x1, d2, #1
+
+; CHECK: fcvtzs	w1, s2                  ; encoding: [0x41,0x00,0x38,0x1e]
+; CHECK: fcvtzs	w1, s2, #1              ; encoding: [0x41,0xfc,0x18,0x1e]
+; CHECK: fcvtzs	w1, d2                  ; encoding: [0x41,0x00,0x78,0x1e]
+; CHECK: fcvtzs	w1, d2, #1              ; encoding: [0x41,0xfc,0x58,0x1e]
+; CHECK: fcvtzs	x1, s2                  ; encoding: [0x41,0x00,0x38,0x9e]
+; CHECK: fcvtzs	x1, s2, #1              ; encoding: [0x41,0xfc,0x18,0x9e]
+; CHECK: fcvtzs	x1, d2                  ; encoding: [0x41,0x00,0x78,0x9e]
+; CHECK: fcvtzs	x1, d2, #1              ; encoding: [0x41,0xfc,0x58,0x9e]
+
+  fcvtzu w1, s2
+  fcvtzu w1, s2, #1
+  fcvtzu w1, d2
+  fcvtzu w1, d2, #1
+  fcvtzu x1, s2
+  fcvtzu x1, s2, #1
+  fcvtzu x1, d2
+  fcvtzu x1, d2, #1
+
+; CHECK: fcvtzu	w1, s2                  ; encoding: [0x41,0x00,0x39,0x1e]
+; CHECK: fcvtzu	w1, s2, #1              ; encoding: [0x41,0xfc,0x19,0x1e]
+; CHECK: fcvtzu	w1, d2                  ; encoding: [0x41,0x00,0x79,0x1e]
+; CHECK: fcvtzu	w1, d2, #1              ; encoding: [0x41,0xfc,0x59,0x1e]
+; CHECK: fcvtzu	x1, s2                  ; encoding: [0x41,0x00,0x39,0x9e]
+; CHECK: fcvtzu	x1, s2, #1              ; encoding: [0x41,0xfc,0x19,0x9e]
+; CHECK: fcvtzu	x1, d2                  ; encoding: [0x41,0x00,0x79,0x9e]
+; CHECK: fcvtzu	x1, d2, #1              ; encoding: [0x41,0xfc,0x59,0x9e]
+
+  scvtf s1, w2
+  scvtf s1, w2, #1
+  scvtf d1, w2
+  scvtf d1, w2, #1
+  scvtf s1, x2
+  scvtf s1, x2, #1
+  scvtf d1, x2
+  scvtf d1, x2, #1
+
+; CHECK: scvtf	s1, w2                  ; encoding: [0x41,0x00,0x22,0x1e]
+; CHECK: scvtf	s1, w2, #1              ; encoding: [0x41,0xfc,0x02,0x1e]
+; CHECK: scvtf	d1, w2                  ; encoding: [0x41,0x00,0x62,0x1e]
+; CHECK: scvtf	d1, w2, #1              ; encoding: [0x41,0xfc,0x42,0x1e]
+; CHECK: scvtf	s1, x2                  ; encoding: [0x41,0x00,0x22,0x9e]
+; CHECK: scvtf	s1, x2, #1              ; encoding: [0x41,0xfc,0x02,0x9e]
+; CHECK: scvtf	d1, x2                  ; encoding: [0x41,0x00,0x62,0x9e]
+; CHECK: scvtf	d1, x2, #1              ; encoding: [0x41,0xfc,0x42,0x9e]
+
+  ucvtf s1, w2
+  ucvtf s1, w2, #1
+  ucvtf d1, w2
+  ucvtf d1, w2, #1
+  ucvtf s1, x2
+  ucvtf s1, x2, #1
+  ucvtf d1, x2
+  ucvtf d1, x2, #1
+
+; CHECK: ucvtf	s1, w2                  ; encoding: [0x41,0x00,0x23,0x1e]
+; CHECK: ucvtf	s1, w2, #1              ; encoding: [0x41,0xfc,0x03,0x1e]
+; CHECK: ucvtf	d1, w2                  ; encoding: [0x41,0x00,0x63,0x1e]
+; CHECK: ucvtf	d1, w2, #1              ; encoding: [0x41,0xfc,0x43,0x1e]
+; CHECK: ucvtf	s1, x2                  ; encoding: [0x41,0x00,0x23,0x9e]
+; CHECK: ucvtf	s1, x2, #1              ; encoding: [0x41,0xfc,0x03,0x9e]
+; CHECK: ucvtf	d1, x2                  ; encoding: [0x41,0x00,0x63,0x9e]
+; CHECK: ucvtf	d1, x2, #1              ; encoding: [0x41,0xfc,0x43,0x9e]
+
+;-----------------------------------------------------------------------------
+; Floating-point move
+;-----------------------------------------------------------------------------
+
+  fmov s1, w2
+  fmov w1, s2
+  fmov d1, x2
+  fmov x1, d2
+
+; CHECK: fmov s1, w2                 ; encoding: [0x41,0x00,0x27,0x1e]
+; CHECK: fmov w1, s2                 ; encoding: [0x41,0x00,0x26,0x1e]
+; CHECK: fmov d1, x2                 ; encoding: [0x41,0x00,0x67,0x9e]
+; CHECK: fmov x1, d2                 ; encoding: [0x41,0x00,0x66,0x9e]
+
+  fmov s1, #0.125
+  fmov s1, #0x40
+  fmov d1, #0.125
+  fmov d1, #0x40
+  fmov d1, #-4.843750e-01
+  fmov d1, #4.843750e-01
+  fmov d3, #3
+  fmov s2, #0.0
+  fmov d2, #0.0
+
+; CHECK: fmov s1, #0.12500000      ; encoding: [0x01,0x10,0x28,0x1e]
+; CHECK: fmov s1, #0.12500000      ; encoding: [0x01,0x10,0x28,0x1e]
+; CHECK: fmov d1, #0.12500000      ; encoding: [0x01,0x10,0x68,0x1e]
+; CHECK: fmov d1, #0.12500000      ; encoding: [0x01,0x10,0x68,0x1e]
+; CHECK: fmov d1, #-0.48437500     ; encoding: [0x01,0xf0,0x7b,0x1e]
+; CHECK: fmov d1, #0.48437500      ; encoding: [0x01,0xf0,0x6b,0x1e]
+; CHECK: fmov d3, #3.00000000      ; encoding: [0x03,0x10,0x61,0x1e]
+; CHECK: fmov s2, wzr                ; encoding: [0xe2,0x03,0x27,0x1e]
+; CHECK: fmov d2, xzr                ; encoding: [0xe2,0x03,0x67,0x9e]
+
+  fmov s1, s2
+  fmov d1, d2
+
+; CHECK: fmov s1, s2                 ; encoding: [0x41,0x40,0x20,0x1e]
+; CHECK: fmov d1, d2                 ; encoding: [0x41,0x40,0x60,0x1e]
+
+
+  fmov x2, v5.d[1]
+  fmov.d x9, v7[1]
+  fmov v1.d[1], x1
+  fmov.d v8[1], x6
+
+; CHECK: fmov.d	x2, v5[1]               ; encoding: [0xa2,0x00,0xae,0x9e]
+; CHECK: fmov.d	x9, v7[1]               ; encoding: [0xe9,0x00,0xae,0x9e]
+; CHECK: fmov.d	v1[1], x1               ; encoding: [0x21,0x00,0xaf,0x9e]
+; CHECK: fmov.d	v8[1], x6               ; encoding: [0xc8,0x00,0xaf,0x9e]
+
+
+;-----------------------------------------------------------------------------
+; Floating-point round to integral
+;-----------------------------------------------------------------------------
+
+  frinta s1, s2
+  frinta d1, d2
+
+; CHECK: frinta s1, s2               ; encoding: [0x41,0x40,0x26,0x1e]
+; CHECK: frinta d1, d2               ; encoding: [0x41,0x40,0x66,0x1e]
+
+  frinti s1, s2
+  frinti d1, d2
+
+; CHECK: frinti s1, s2               ; encoding: [0x41,0xc0,0x27,0x1e]
+; CHECK: frinti d1, d2               ; encoding: [0x41,0xc0,0x67,0x1e]
+
+  frintm s1, s2
+  frintm d1, d2
+
+; CHECK: frintm s1, s2               ; encoding: [0x41,0x40,0x25,0x1e]
+; CHECK: frintm d1, d2               ; encoding: [0x41,0x40,0x65,0x1e]
+
+  frintn s1, s2
+  frintn d1, d2
+
+; CHECK: frintn s1, s2               ; encoding: [0x41,0x40,0x24,0x1e]
+; CHECK: frintn d1, d2               ; encoding: [0x41,0x40,0x64,0x1e]
+
+  frintp s1, s2
+  frintp d1, d2
+
+; CHECK: frintp s1, s2               ; encoding: [0x41,0xc0,0x24,0x1e]
+; CHECK: frintp d1, d2               ; encoding: [0x41,0xc0,0x64,0x1e]
+
+  frintx s1, s2
+  frintx d1, d2
+
+; CHECK: frintx s1, s2               ; encoding: [0x41,0x40,0x27,0x1e]
+; CHECK: frintx d1, d2               ; encoding: [0x41,0x40,0x67,0x1e]
+
+  frintz s1, s2
+  frintz d1, d2
+
+; CHECK: frintz s1, s2               ; encoding: [0x41,0xc0,0x25,0x1e]
+; CHECK: frintz d1, d2               ; encoding: [0x41,0xc0,0x65,0x1e]
+
+  cmhs d0, d0, d0
+  cmtst d0, d0, d0
+
+; CHECK: cmhs	d0, d0, d0              ; encoding: [0x00,0x3c,0xe0,0x7e]
+; CHECK: cmtst	d0, d0, d0              ; encoding: [0x00,0x8c,0xe0,0x5e]
+
+
+
+;-----------------------------------------------------------------------------
+; Floating-point extract and narrow
+;-----------------------------------------------------------------------------
+  sqxtn b4, h2
+  sqxtn h2, s3
+  sqxtn s9, d2
+
+; CHECK: sqxtn b4, h2                  ; encoding: [0x44,0x48,0x21,0x5e]
+; CHECK: sqxtn h2, s3                  ; encoding: [0x62,0x48,0x61,0x5e]
+; CHECK: sqxtn s9, d2                  ; encoding: [0x49,0x48,0xa1,0x5e]
+
+  sqxtun b4, h2
+  sqxtun h2, s3
+  sqxtun s9, d2
+
+; CHECK: sqxtun b4, h2                  ; encoding: [0x44,0x28,0x21,0x7e]
+; CHECK: sqxtun h2, s3                  ; encoding: [0x62,0x28,0x61,0x7e]
+; CHECK: sqxtun s9, d2                  ; encoding: [0x49,0x28,0xa1,0x7e]
+
+  uqxtn b4, h2
+  uqxtn h2, s3
+  uqxtn s9, d2
+
+; CHECK: uqxtn b4, h2                  ; encoding: [0x44,0x48,0x21,0x7e]
+; CHECK: uqxtn h2, s3                  ; encoding: [0x62,0x48,0x61,0x7e]
+; CHECK: uqxtn s9, d2                  ; encoding: [0x49,0x48,0xa1,0x7e]
diff --git a/test/MC/AArch64/arm64-large-relocs.s b/test/MC/AArch64/arm64-large-relocs.s
new file mode 100644
index 0000000..2a0cfa2
--- /dev/null
+++ b/test/MC/AArch64/arm64-large-relocs.s
@@ -0,0 +1,38 @@
+// RUN: llvm-mc -triple=arm64-linux-gnu -show-encoding -o - %s | FileCheck %s
+// RUN: llvm-mc -triple=arm64-linux-gnu -show-encoding -filetype=obj -o - %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-OBJ %s
+
+        movz x2, #:abs_g0:sym
+        movk w3, #:abs_g0_nc:sym
+// CHECK: movz    x2, #:abs_g0:sym        // encoding: [0bAAA00010,A,0b100AAAAA,0xd2]
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g0:sym, kind: fixup_aarch64_movw
+// CHECK: movk     w3, #:abs_g0_nc:sym    // encoding: [0bAAA00011,A,0b100AAAAA,0x72]
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g0_nc:sym, kind: fixup_aarch64_movw
+
+// CHECK-OBJ: 0 R_AARCH64_MOVW_UABS_G0 sym
+// CHECK-OBJ: 4 R_AARCH64_MOVW_UABS_G0_NC sym
+
+        movz x4, #:abs_g1:sym
+        movk w5, #:abs_g1_nc:sym
+// CHECK: movz     x4, #:abs_g1:sym       // encoding: [0bAAA00100,A,0b101AAAAA,0xd2]
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g1:sym, kind: fixup_aarch64_movw
+// CHECK: movk     w5, #:abs_g1_nc:sym    // encoding: [0bAAA00101,A,0b101AAAAA,0x72]
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g1_nc:sym, kind: fixup_aarch64_movw
+
+// CHECK-OBJ: 8 R_AARCH64_MOVW_UABS_G1 sym
+// CHECK-OBJ: c R_AARCH64_MOVW_UABS_G1_NC sym
+
+        movz x6, #:abs_g2:sym
+        movk x7, #:abs_g2_nc:sym
+// CHECK: movz     x6, #:abs_g2:sym       // encoding: [0bAAA00110,A,0b110AAAAA,0xd2]
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g2:sym, kind: fixup_aarch64_movw
+// CHECK: movk     x7, #:abs_g2_nc:sym    // encoding: [0bAAA00111,A,0b110AAAAA,0xf2]
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g2_nc:sym, kind: fixup_aarch64_movw
+
+// CHECK-OBJ: 10 R_AARCH64_MOVW_UABS_G2 sym
+// CHECK-OBJ: 14 R_AARCH64_MOVW_UABS_G2_NC sym
+
+        movz x8, #:abs_g3:sym
+// CHECK: movz     x8, #:abs_g3:sym       // encoding: [0bAAA01000,A,0b111AAAAA,0xd2]
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g3:sym, kind: fixup_aarch64_movw
+
+// CHECK-OBJ: 18 R_AARCH64_MOVW_UABS_G3 sym
diff --git a/test/MC/AArch64/arm64-leaf-compact-unwind.s b/test/MC/AArch64/arm64-leaf-compact-unwind.s
new file mode 100644
index 0000000..d699813
--- /dev/null
+++ b/test/MC/AArch64/arm64-leaf-compact-unwind.s
@@ -0,0 +1,208 @@
+// RUN: llvm-mc -triple=arm64-apple-ios -filetype=obj < %s | \
+// RUN: llvm-readobj -sections -section-relocations -section-data | \
+// RUN: FileCheck %s
+//
+// rdar://13070556
+
+// FIXME: we should add compact unwind support to llvm-objdump -unwind-info
+
+// CHECK:      Section {
+// CHECK:        Index: 1
+// CHECK-NEXT:   Name: __compact_unwind
+// CHECK-NEXT:   Segment: __LD
+// CHECK-NEXT:   Address:
+// CHECK-NEXT:   Size:
+// CHECK-NEXT:   Offset:
+// CHECK-NEXT:   Alignment:
+// CHECK-NEXT:   RelocationOffset:
+// CHECK-NEXT:   RelocationCount:
+// CHECK-NEXT:   Type:
+// CHECK-NEXT:   Attributes [
+// CHECK-NEXT:     Debug
+// CHECK-NEXT:   ]
+// CHECK-NEXT:   Reserved1:
+// CHECK-NEXT:   Reserved2:
+// CHECK-NEXT:   Relocations [
+// CHECK-NEXT:     0x60 0 3 0 ARM64_RELOC_UNSIGNED 0 -
+// CHECK-NEXT:     0x40 0 3 0 ARM64_RELOC_UNSIGNED 0 -
+// CHECK-NEXT:     0x20 0 3 0 ARM64_RELOC_UNSIGNED 0 -
+// CHECK-NEXT:     0x0 0 3 0 ARM64_RELOC_UNSIGNED 0 -
+// CHECK-NEXT:   ]
+// CHECK-NEXT:   SectionData (
+// CHECK-NEXT:     0000: 00000000 00000000 08000000 00000002
+// CHECK-NEXT:     0010: 00000000 00000000 00000000 00000000
+// CHECK-NEXT:     0020: 08000000 00000000 40000000 00900002
+// CHECK-NEXT:     0030: 00000000 00000000 00000000 00000000
+// CHECK-NEXT:     0040: 48000000 00000000 D4000000 0F400002
+// CHECK-NEXT:     0050: 00000000 00000000 00000000 00000000
+// CHECK-NEXT:     0060: 1C010000 00000000 54000000 10100202
+// CHECK-NEXT:     0070: 00000000 00000000 00000000 00000000
+// CHECK-NEXT:   )
+// CHECK-NEXT: }
+
+	.section	__TEXT,__text,regular,pure_instructions
+	.globl	_foo1
+	.align	2
+_foo1:                                  ; @foo1
+	.cfi_startproc
+; BB#0:                                 ; %entry
+	add	w0, w0, #42             ; =#42
+	ret
+	.cfi_endproc
+
+	.globl	_foo2
+	.align	2
+_foo2:                                  ; @foo2
+	.cfi_startproc
+; BB#0:                                 ; %entry
+	sub	sp, sp, #144            ; =#144
+Ltmp2:
+	.cfi_def_cfa_offset 144
+	mov	x9, xzr
+	mov	x8, sp
+LBB1_1:                                 ; %for.body
+                                        ; =>This Inner Loop Header: Depth=1
+	str	w9, [x8, x9, lsl #2]
+	add	x9, x9, #1              ; =#1
+	cmp	w9, #36                 ; =#36
+	b.ne	LBB1_1
+; BB#2:
+	mov	x9, xzr
+	mov	w0, wzr
+LBB1_3:                                 ; %for.body4
+                                        ; =>This Inner Loop Header: Depth=1
+	ldr	w10, [x8, x9]
+	add	x9, x9, #4              ; =#4
+	cmp	w9, #144                ; =#144
+	add	w0, w10, w0
+	b.ne	LBB1_3
+; BB#4:                                 ; %for.end9
+	add	sp, sp, #144            ; =#144
+	ret
+	.cfi_endproc
+
+	.globl	_foo3
+	.align	2
+_foo3:                                  ; @foo3
+	.cfi_startproc
+; BB#0:                                 ; %entry
+	stp	x26, x25, [sp, #-64]!
+	stp	x24, x23, [sp, #16]
+	stp	x22, x21, [sp, #32]
+	stp	x20, x19, [sp, #48]
+Ltmp3:
+	.cfi_def_cfa_offset 64
+Ltmp4:
+	.cfi_offset w19, -16
+Ltmp5:
+	.cfi_offset w20, -24
+Ltmp6:
+	.cfi_offset w21, -32
+Ltmp7:
+	.cfi_offset w22, -40
+Ltmp8:
+	.cfi_offset w23, -48
+Ltmp9:
+	.cfi_offset w24, -56
+Ltmp10:
+	.cfi_offset w25, -64
+Ltmp11:
+	.cfi_offset w26, -72
+Lloh0:
+	adrp	x8, _bar@GOTPAGE
+Lloh1:
+	ldr	x8, [x8, _bar@GOTPAGEOFF]
+	ldr	w9, [x8]
+	ldr	w10, [x8]
+	ldr	w11, [x8]
+	ldr	w12, [x8]
+	ldr	w13, [x8]
+	ldr	w14, [x8]
+	ldr	w15, [x8]
+	ldr	w16, [x8]
+	ldr	w17, [x8]
+	ldr	w0, [x8]
+	ldr	w19, [x8]
+	ldr	w20, [x8]
+	ldr	w21, [x8]
+	ldr	w22, [x8]
+	ldr	w23, [x8]
+	ldr	w24, [x8]
+	ldr	w25, [x8]
+	ldr	w8, [x8]
+	add	w9, w10, w9
+	add	w9, w9, w11
+	add	w9, w9, w12
+	add	w9, w9, w13
+	add	w9, w9, w14
+	add	w9, w9, w15
+	add	w9, w9, w16
+	add	w9, w9, w17
+	add	w9, w9, w0
+	add	w9, w9, w19
+	add	w9, w9, w20
+	add	w9, w9, w21
+	add	w9, w9, w22
+	add	w9, w9, w23
+	add	w9, w9, w24
+	add	w9, w9, w25
+	sub	w8, w8, w9
+	sub	w8, w8, w7, lsl #1
+	sub	w8, w8, w6, lsl #1
+	sub	w8, w8, w5, lsl #1
+	sub	w8, w8, w4, lsl #1
+	sub	w8, w8, w3, lsl #1
+	sub	w8, w8, w2, lsl #1
+	sub	w0, w8, w1, lsl #1
+	ldp	x20, x19, [sp, #48]
+	ldp	x22, x21, [sp, #32]
+	ldp	x24, x23, [sp, #16]
+	ldp	x26, x25, [sp], #64
+	ret
+	.loh AdrpLdrGot	Lloh0, Lloh1
+	.cfi_endproc
+
+	.globl	_foo4
+	.align	2
+_foo4:                                  ; @foo4
+	.cfi_startproc
+; BB#0:                                 ; %entry
+	stp	x28, x27, [sp, #-16]!
+	sub	sp, sp, #512            ; =#512
+Ltmp12:
+	.cfi_def_cfa_offset 528
+Ltmp13:
+	.cfi_offset w27, -16
+Ltmp14:
+	.cfi_offset w28, -24
+                                        ; kill: W0<def> W0<kill> X0<def>
+	mov	x9, xzr
+	ubfx	x10, x0, #0, #32
+	mov	x8, sp
+LBB3_1:                                 ; %for.body
+                                        ; =>This Inner Loop Header: Depth=1
+	add	w11, w10, w9
+	str	w11, [x8, x9, lsl #2]
+	add	x9, x9, #1              ; =#1
+	cmp	w9, #128                ; =#128
+	b.ne	LBB3_1
+; BB#2:                                 ; %for.cond2.preheader
+	mov	x9, xzr
+	mov	w0, wzr
+	add	x8, x8, w5, sxtw #2
+LBB3_3:                                 ; %for.body4
+                                        ; =>This Inner Loop Header: Depth=1
+	ldr	w10, [x8, x9]
+	add	x9, x9, #4              ; =#4
+	cmp	w9, #512                ; =#512
+	add	w0, w10, w0
+	b.ne	LBB3_3
+; BB#4:                                 ; %for.end11
+	add	sp, sp, #512            ; =#512
+	ldp	x28, x27, [sp], #16
+	ret
+	.cfi_endproc
+
+	.comm	_bar,4,2                ; @bar
+
+.subsections_via_symbols
diff --git a/test/MC/ARM64/logical-encoding.s b/test/MC/AArch64/arm64-logical-encoding.s
index e5f1436..e5f1436 100644
--- a/test/MC/ARM64/logical-encoding.s
+++ b/test/MC/AArch64/arm64-logical-encoding.s
diff --git a/test/MC/ARM64/mapping-across-sections.s b/test/MC/AArch64/arm64-mapping-across-sections.s
index 00b324c..00b324c 100644
--- a/test/MC/ARM64/mapping-across-sections.s
+++ b/test/MC/AArch64/arm64-mapping-across-sections.s
diff --git a/test/MC/ARM64/mapping-within-section.s b/test/MC/AArch64/arm64-mapping-within-section.s
index f515cb9..f515cb9 100644
--- a/test/MC/ARM64/mapping-within-section.s
+++ b/test/MC/AArch64/arm64-mapping-within-section.s
diff --git a/test/MC/AArch64/arm64-memory.s b/test/MC/AArch64/arm64-memory.s
new file mode 100644
index 0000000..5798596
--- /dev/null
+++ b/test/MC/AArch64/arm64-memory.s
@@ -0,0 +1,634 @@
+; RUN: llvm-mc -triple arm64-apple-darwin -show-encoding < %s | FileCheck %s
+
+foo:
+;-----------------------------------------------------------------------------
+; Indexed loads
+;-----------------------------------------------------------------------------
+
+  ldr    w5, [x4, #20]
+  ldr    x4, [x3]
+  ldr    x2, [sp, #32]
+  ldr    b5, [sp, #1]
+  ldr    h6, [sp, #2]
+  ldr    s7, [sp, #4]
+  ldr    d8, [sp, #8]
+  ldr    q9, [sp, #16]
+  ldrb   w4, [x3]
+  ldrb   w5, [x4, #20]
+  ldrb	 w2, [x3, _foo@pageoff]
+  ldrb   w3, [x2, "+[Test method].var"@PAGEOFF]
+  ldrsb  w9, [x3]
+  ldrsb  x2, [sp, #128]
+  ldrh   w2, [sp, #32]
+  ldrsh  w3, [sp, #32]
+  ldrsh  x5, [x9, #24]
+  ldrsw  x9, [sp, #512]
+
+  prfm   #5, [sp, #32]
+  prfm   #31, [sp, #32]
+  prfm   pldl1keep, [x2]
+  prfm   pldl1strm, [x2]
+  prfm   pldl2keep, [x2]
+  prfm   pldl2strm, [x2]
+  prfm   pldl3keep, [x2]
+  prfm   pldl3strm, [x2]
+  prfm   pstl1keep, [x2]
+  prfm   pstl1strm, [x2]
+  prfm   pstl2keep, [x2]
+  prfm   pstl2strm, [x2]
+  prfm   pstl3keep, [x2]
+  prfm   pstl3strm, [x2]
+  prfm  pstl3strm, [x4, x5, lsl #3]
+
+; CHECK: ldr    w5, [x4, #20]           ; encoding: [0x85,0x14,0x40,0xb9]
+; CHECK: ldr    x4, [x3]                ; encoding: [0x64,0x00,0x40,0xf9]
+; CHECK: ldr    x2, [sp, #32]           ; encoding: [0xe2,0x13,0x40,0xf9]
+; CHECK: ldr    b5, [sp, #1]            ; encoding: [0xe5,0x07,0x40,0x3d]
+; CHECK: ldr    h6, [sp, #2]            ; encoding: [0xe6,0x07,0x40,0x7d]
+; CHECK: ldr    s7, [sp, #4]            ; encoding: [0xe7,0x07,0x40,0xbd]
+; CHECK: ldr    d8, [sp, #8]            ; encoding: [0xe8,0x07,0x40,0xfd]
+; CHECK: ldr    q9, [sp, #16]           ; encoding: [0xe9,0x07,0xc0,0x3d]
+; CHECK: ldrb   w4, [x3]                ; encoding: [0x64,0x00,0x40,0x39]
+; CHECK: ldrb   w5, [x4, #20]           ; encoding: [0x85,0x50,0x40,0x39]
+; CHECK: ldrb	w2, [x3, _foo@PAGEOFF]  ; encoding: [0x62,0bAAAAAA00,0b01AAAAAA,0x39]
+; CHECK: ldrb	w3, [x2, "+[Test method].var"@PAGEOFF] ; encoding: [0x43,0bAAAAAA00,0b01AAAAAA,0x39]
+; CHECK: ldrsb  w9, [x3]                ; encoding: [0x69,0x00,0xc0,0x39]
+; CHECK: ldrsb  x2, [sp, #128]          ; encoding: [0xe2,0x03,0x82,0x39]
+; CHECK: ldrh   w2, [sp, #32]           ; encoding: [0xe2,0x43,0x40,0x79]
+; CHECK: ldrsh  w3, [sp, #32]           ; encoding: [0xe3,0x43,0xc0,0x79]
+; CHECK: ldrsh  x5, [x9, #24]           ; encoding: [0x25,0x31,0x80,0x79]
+; CHECK: ldrsw  x9, [sp, #512]          ; encoding: [0xe9,0x03,0x82,0xb9]
+; CHECK: prfm   pldl3strm, [sp, #32]    ; encoding: [0xe5,0x13,0x80,0xf9]
+; CHECK: prfm	#31, [sp, #32]          ; encoding: [0xff,0x13,0x80,0xf9]
+; CHECK: prfm   pldl1keep, [x2]         ; encoding: [0x40,0x00,0x80,0xf9]
+; CHECK: prfm   pldl1strm, [x2]         ; encoding: [0x41,0x00,0x80,0xf9]
+; CHECK: prfm   pldl2keep, [x2]         ; encoding: [0x42,0x00,0x80,0xf9]
+; CHECK: prfm   pldl2strm, [x2]         ; encoding: [0x43,0x00,0x80,0xf9]
+; CHECK: prfm   pldl3keep, [x2]         ; encoding: [0x44,0x00,0x80,0xf9]
+; CHECK: prfm   pldl3strm, [x2]         ; encoding: [0x45,0x00,0x80,0xf9]
+; CHECK: prfm   pstl1keep, [x2]         ; encoding: [0x50,0x00,0x80,0xf9]
+; CHECK: prfm   pstl1strm, [x2]         ; encoding: [0x51,0x00,0x80,0xf9]
+; CHECK: prfm   pstl2keep, [x2]         ; encoding: [0x52,0x00,0x80,0xf9]
+; CHECK: prfm   pstl2strm, [x2]         ; encoding: [0x53,0x00,0x80,0xf9]
+; CHECK: prfm   pstl3keep, [x2]         ; encoding: [0x54,0x00,0x80,0xf9]
+; CHECK: prfm   pstl3strm, [x2]         ; encoding: [0x55,0x00,0x80,0xf9]
+; CHECK: prfm	pstl3strm, [x4, x5, lsl #3] ; encoding: [0x95,0x78,0xa5,0xf8]
+
+;-----------------------------------------------------------------------------
+; Indexed stores
+;-----------------------------------------------------------------------------
+
+  str   x4, [x3]
+  str   x2, [sp, #32]
+  str   w5, [x4, #20]
+  str   b5, [sp, #1]
+  str   h6, [sp, #2]
+  str   s7, [sp, #4]
+  str   d8, [sp, #8]
+  str   q9, [sp, #16]
+  strb  w4, [x3]
+  strb  w5, [x4, #20]
+  strh  w2, [sp, #32]
+
+; CHECK: str   x4, [x3]                 ; encoding: [0x64,0x00,0x00,0xf9]
+; CHECK: str   x2, [sp, #32]            ; encoding: [0xe2,0x13,0x00,0xf9]
+; CHECK: str   w5, [x4, #20]            ; encoding: [0x85,0x14,0x00,0xb9]
+; CHECK: str   b5, [sp, #1]             ; encoding: [0xe5,0x07,0x00,0x3d]
+; CHECK: str   h6, [sp, #2]             ; encoding: [0xe6,0x07,0x00,0x7d]
+; CHECK: str   s7, [sp, #4]             ; encoding: [0xe7,0x07,0x00,0xbd]
+; CHECK: str   d8, [sp, #8]             ; encoding: [0xe8,0x07,0x00,0xfd]
+; CHECK: str   q9, [sp, #16]            ; encoding: [0xe9,0x07,0x80,0x3d]
+; CHECK: strb  w4, [x3]                 ; encoding: [0x64,0x00,0x00,0x39]
+; CHECK: strb  w5, [x4, #20]            ; encoding: [0x85,0x50,0x00,0x39]
+; CHECK: strh  w2, [sp, #32]            ; encoding: [0xe2,0x43,0x00,0x79]
+
+;-----------------------------------------------------------------------------
+; Unscaled immediate loads and stores
+;-----------------------------------------------------------------------------
+
+  ldur    w2, [x3]
+  ldur    w2, [sp, #24]
+  ldur    x2, [x3]
+  ldur    x2, [sp, #24]
+  ldur    b5, [sp, #1]
+  ldur    h6, [sp, #2]
+  ldur    s7, [sp, #4]
+  ldur    d8, [sp, #8]
+  ldur    q9, [sp, #16]
+  ldursb  w9, [x3]
+  ldursb  x2, [sp, #128]
+  ldursh  w3, [sp, #32]
+  ldursh  x5, [x9, #24]
+  ldursw  x9, [sp, #-128]
+
+; CHECK: ldur    w2, [x3]               ; encoding: [0x62,0x00,0x40,0xb8]
+; CHECK: ldur    w2, [sp, #24]          ; encoding: [0xe2,0x83,0x41,0xb8]
+; CHECK: ldur    x2, [x3]               ; encoding: [0x62,0x00,0x40,0xf8]
+; CHECK: ldur    x2, [sp, #24]          ; encoding: [0xe2,0x83,0x41,0xf8]
+; CHECK: ldur    b5, [sp, #1]           ; encoding: [0xe5,0x13,0x40,0x3c]
+; CHECK: ldur    h6, [sp, #2]           ; encoding: [0xe6,0x23,0x40,0x7c]
+; CHECK: ldur    s7, [sp, #4]           ; encoding: [0xe7,0x43,0x40,0xbc]
+; CHECK: ldur    d8, [sp, #8]           ; encoding: [0xe8,0x83,0x40,0xfc]
+; CHECK: ldur    q9, [sp, #16]          ; encoding: [0xe9,0x03,0xc1,0x3c]
+; CHECK: ldursb  w9, [x3]               ; encoding: [0x69,0x00,0xc0,0x38]
+; CHECK: ldursb  x2, [sp, #128]         ; encoding: [0xe2,0x03,0x88,0x38]
+; CHECK: ldursh  w3, [sp, #32]          ; encoding: [0xe3,0x03,0xc2,0x78]
+; CHECK: ldursh  x5, [x9, #24]          ; encoding: [0x25,0x81,0x81,0x78]
+; CHECK: ldursw  x9, [sp, #-128]        ; encoding: [0xe9,0x03,0x98,0xb8]
+
+  stur    w4, [x3]
+  stur    w2, [sp, #32]
+  stur    x4, [x3]
+  stur    x2, [sp, #32]
+  stur    w5, [x4, #20]
+  stur    b5, [sp, #1]
+  stur    h6, [sp, #2]
+  stur    s7, [sp, #4]
+  stur    d8, [sp, #8]
+  stur    q9, [sp, #16]
+  sturb   w4, [x3]
+  sturb   w5, [x4, #20]
+  sturh   w2, [sp, #32]
+  prfum   #5, [sp, #32]
+
+; CHECK: stur    w4, [x3]               ; encoding: [0x64,0x00,0x00,0xb8]
+; CHECK: stur    w2, [sp, #32]          ; encoding: [0xe2,0x03,0x02,0xb8]
+; CHECK: stur    x4, [x3]               ; encoding: [0x64,0x00,0x00,0xf8]
+; CHECK: stur    x2, [sp, #32]          ; encoding: [0xe2,0x03,0x02,0xf8]
+; CHECK: stur    w5, [x4, #20]          ; encoding: [0x85,0x40,0x01,0xb8]
+; CHECK: stur    b5, [sp, #1]           ; encoding: [0xe5,0x13,0x00,0x3c]
+; CHECK: stur    h6, [sp, #2]           ; encoding: [0xe6,0x23,0x00,0x7c]
+; CHECK: stur    s7, [sp, #4]           ; encoding: [0xe7,0x43,0x00,0xbc]
+; CHECK: stur    d8, [sp, #8]           ; encoding: [0xe8,0x83,0x00,0xfc]
+; CHECK: stur    q9, [sp, #16]          ; encoding: [0xe9,0x03,0x81,0x3c]
+; CHECK: sturb   w4, [x3]               ; encoding: [0x64,0x00,0x00,0x38]
+; CHECK: sturb   w5, [x4, #20]          ; encoding: [0x85,0x40,0x01,0x38]
+; CHECK: sturh   w2, [sp, #32]          ; encoding: [0xe2,0x03,0x02,0x78]
+; CHECK: prfum   pldl3strm, [sp, #32]   ; encoding: [0xe5,0x03,0x82,0xf8]
+
+;-----------------------------------------------------------------------------
+; Unprivileged loads and stores
+;-----------------------------------------------------------------------------
+
+  ldtr    w3, [x4, #16]
+  ldtr    x3, [x4, #16]
+  ldtrb   w3, [x4, #16]
+  ldtrsb  w9, [x3]
+  ldtrsb  x2, [sp, #128]
+  ldtrh   w3, [x4, #16]
+  ldtrsh  w3, [sp, #32]
+  ldtrsh  x5, [x9, #24]
+  ldtrsw  x9, [sp, #-128]
+
+; CHECK: ldtr   w3, [x4, #16]           ; encoding: [0x83,0x08,0x41,0xb8]
+; CHECK: ldtr   x3, [x4, #16]           ; encoding: [0x83,0x08,0x41,0xf8]
+; CHECK: ldtrb  w3, [x4, #16]           ; encoding: [0x83,0x08,0x41,0x38]
+; CHECK: ldtrsb w9, [x3]                ; encoding: [0x69,0x08,0xc0,0x38]
+; CHECK: ldtrsb x2, [sp, #128]          ; encoding: [0xe2,0x0b,0x88,0x38]
+; CHECK: ldtrh  w3, [x4, #16]           ; encoding: [0x83,0x08,0x41,0x78]
+; CHECK: ldtrsh w3, [sp, #32]           ; encoding: [0xe3,0x0b,0xc2,0x78]
+; CHECK: ldtrsh x5, [x9, #24]           ; encoding: [0x25,0x89,0x81,0x78]
+; CHECK: ldtrsw x9, [sp, #-128]         ; encoding: [0xe9,0x0b,0x98,0xb8]
+
+  sttr    w5, [x4, #20]
+  sttr    x4, [x3]
+  sttr    x2, [sp, #32]
+  sttrb   w4, [x3]
+  sttrb   w5, [x4, #20]
+  sttrh   w2, [sp, #32]
+
+; CHECK: sttr   w5, [x4, #20]           ; encoding: [0x85,0x48,0x01,0xb8]
+; CHECK: sttr   x4, [x3]                ; encoding: [0x64,0x08,0x00,0xf8]
+; CHECK: sttr   x2, [sp, #32]           ; encoding: [0xe2,0x0b,0x02,0xf8]
+; CHECK: sttrb  w4, [x3]                ; encoding: [0x64,0x08,0x00,0x38]
+; CHECK: sttrb  w5, [x4, #20]           ; encoding: [0x85,0x48,0x01,0x38]
+; CHECK: sttrh  w2, [sp, #32]           ; encoding: [0xe2,0x0b,0x02,0x78]
+
+;-----------------------------------------------------------------------------
+; Pre-indexed loads and stores
+;-----------------------------------------------------------------------------
+
+  ldr   x29, [x7, #8]!
+  ldr   x30, [x7, #8]!
+  ldr   b5, [x0, #1]!
+  ldr   h6, [x0, #2]!
+  ldr   s7, [x0, #4]!
+  ldr   d8, [x0, #8]!
+  ldr   q9, [x0, #16]!
+
+  str   x30, [x7, #-8]!
+  str   x29, [x7, #-8]!
+  str   b5, [x0, #-1]!
+  str   h6, [x0, #-2]!
+  str   s7, [x0, #-4]!
+  str   d8, [x0, #-8]!
+  str   q9, [x0, #-16]!
+
+; CHECK: ldr  x29, [x7, #8]!             ; encoding: [0xfd,0x8c,0x40,0xf8]
+; CHECK: ldr  x30, [x7, #8]!             ; encoding: [0xfe,0x8c,0x40,0xf8]
+; CHECK: ldr  b5, [x0, #1]!             ; encoding: [0x05,0x1c,0x40,0x3c]
+; CHECK: ldr  h6, [x0, #2]!             ; encoding: [0x06,0x2c,0x40,0x7c]
+; CHECK: ldr  s7, [x0, #4]!             ; encoding: [0x07,0x4c,0x40,0xbc]
+; CHECK: ldr  d8, [x0, #8]!             ; encoding: [0x08,0x8c,0x40,0xfc]
+; CHECK: ldr  q9, [x0, #16]!            ; encoding: [0x09,0x0c,0xc1,0x3c]
+
+; CHECK: str  x30, [x7, #-8]!            ; encoding: [0xfe,0x8c,0x1f,0xf8]
+; CHECK: str  x29, [x7, #-8]!            ; encoding: [0xfd,0x8c,0x1f,0xf8]
+; CHECK: str  b5, [x0, #-1]!            ; encoding: [0x05,0xfc,0x1f,0x3c]
+; CHECK: str  h6, [x0, #-2]!            ; encoding: [0x06,0xec,0x1f,0x7c]
+; CHECK: str  s7, [x0, #-4]!            ; encoding: [0x07,0xcc,0x1f,0xbc]
+; CHECK: str  d8, [x0, #-8]!            ; encoding: [0x08,0x8c,0x1f,0xfc]
+; CHECK: str  q9, [x0, #-16]!           ; encoding: [0x09,0x0c,0x9f,0x3c]
+
+;-----------------------------------------------------------------------------
+; post-indexed loads and stores
+;-----------------------------------------------------------------------------
+  str x30, [x7], #-8
+  str x29, [x7], #-8
+  str b5, [x0], #-1
+  str h6, [x0], #-2
+  str s7, [x0], #-4
+  str d8, [x0], #-8
+  str q9, [x0], #-16
+
+  ldr x29, [x7], #8
+  ldr x30, [x7], #8
+  ldr b5, [x0], #1
+  ldr h6, [x0], #2
+  ldr s7, [x0], #4
+  ldr d8, [x0], #8
+  ldr q9, [x0], #16
+
+; CHECK: str x30, [x7], #-8             ; encoding: [0xfe,0x84,0x1f,0xf8]
+; CHECK: str x29, [x7], #-8             ; encoding: [0xfd,0x84,0x1f,0xf8]
+; CHECK: str b5, [x0], #-1             ; encoding: [0x05,0xf4,0x1f,0x3c]
+; CHECK: str h6, [x0], #-2             ; encoding: [0x06,0xe4,0x1f,0x7c]
+; CHECK: str s7, [x0], #-4             ; encoding: [0x07,0xc4,0x1f,0xbc]
+; CHECK: str d8, [x0], #-8             ; encoding: [0x08,0x84,0x1f,0xfc]
+; CHECK: str q9, [x0], #-16            ; encoding: [0x09,0x04,0x9f,0x3c]
+
+; CHECK: ldr x29, [x7], #8              ; encoding: [0xfd,0x84,0x40,0xf8]
+; CHECK: ldr x30, [x7], #8              ; encoding: [0xfe,0x84,0x40,0xf8]
+; CHECK: ldr b5, [x0], #1              ; encoding: [0x05,0x14,0x40,0x3c]
+; CHECK: ldr h6, [x0], #2              ; encoding: [0x06,0x24,0x40,0x7c]
+; CHECK: ldr s7, [x0], #4              ; encoding: [0x07,0x44,0x40,0xbc]
+; CHECK: ldr d8, [x0], #8              ; encoding: [0x08,0x84,0x40,0xfc]
+; CHECK: ldr q9, [x0], #16             ; encoding: [0x09,0x04,0xc1,0x3c]
+
+;-----------------------------------------------------------------------------
+; Load/Store pair (indexed, offset)
+;-----------------------------------------------------------------------------
+
+  ldp    w3, w2, [x15, #16]
+  ldp    x4, x9, [sp, #-16]
+  ldpsw  x2, x3, [x14, #16]
+  ldpsw  x2, x3, [sp, #-16]
+  ldp    s10, s1, [x2, #64]
+  ldp    d10, d1, [x2]
+  ldp    q2, q3, [x0, #32]
+
+; CHECK: ldp    w3, w2, [x15, #16]      ; encoding: [0xe3,0x09,0x42,0x29]
+; CHECK: ldp    x4, x9, [sp, #-16]      ; encoding: [0xe4,0x27,0x7f,0xa9]
+; CHECK: ldpsw  x2, x3, [x14, #16]      ; encoding: [0xc2,0x0d,0x42,0x69]
+; CHECK: ldpsw  x2, x3, [sp, #-16]      ; encoding: [0xe2,0x0f,0x7e,0x69]
+; CHECK: ldp    s10, s1, [x2, #64]      ; encoding: [0x4a,0x04,0x48,0x2d]
+; CHECK: ldp    d10, d1, [x2]           ; encoding: [0x4a,0x04,0x40,0x6d]
+; CHECK: ldp    q2, q3, [x0, #32]       ; encoding: [0x02,0x0c,0x41,0xad]
+
+  stp    w3, w2, [x15, #16]
+  stp    x4, x9, [sp, #-16]
+  stp    s10, s1, [x2, #64]
+  stp    d10, d1, [x2]
+  stp    q2, q3, [x0, #32]
+
+; CHECK: stp    w3, w2, [x15, #16]      ; encoding: [0xe3,0x09,0x02,0x29]
+; CHECK: stp    x4, x9, [sp, #-16]      ; encoding: [0xe4,0x27,0x3f,0xa9]
+; CHECK: stp    s10, s1, [x2, #64]      ; encoding: [0x4a,0x04,0x08,0x2d]
+; CHECK: stp    d10, d1, [x2]           ; encoding: [0x4a,0x04,0x00,0x6d]
+; CHECK: stp    q2, q3, [x0, #32]       ; encoding: [0x02,0x0c,0x01,0xad]
+
+;-----------------------------------------------------------------------------
+; Load/Store pair (pre-indexed)
+;-----------------------------------------------------------------------------
+
+  ldp    w3, w2, [x15, #16]!
+  ldp    x4, x9, [sp, #-16]!
+  ldpsw  x2, x3, [x14, #16]!
+  ldpsw  x2, x3, [sp, #-16]!
+  ldp    s10, s1, [x2, #64]!
+  ldp    d10, d1, [x2, #16]!
+
+; CHECK: ldp  w3, w2, [x15, #16]!       ; encoding: [0xe3,0x09,0xc2,0x29]
+; CHECK: ldp  x4, x9, [sp, #-16]!       ; encoding: [0xe4,0x27,0xff,0xa9]
+; CHECK: ldpsw	x2, x3, [x14, #16]!     ; encoding: [0xc2,0x0d,0xc2,0x69]
+; CHECK: ldpsw	x2, x3, [sp, #-16]!     ; encoding: [0xe2,0x0f,0xfe,0x69]
+; CHECK: ldp  s10, s1, [x2, #64]!       ; encoding: [0x4a,0x04,0xc8,0x2d]
+; CHECK: ldp  d10, d1, [x2, #16]!       ; encoding: [0x4a,0x04,0xc1,0x6d]
+
+  stp    w3, w2, [x15, #16]!
+  stp    x4, x9, [sp, #-16]!
+  stp    s10, s1, [x2, #64]!
+  stp    d10, d1, [x2, #16]!
+
+; CHECK: stp  w3, w2, [x15, #16]!       ; encoding: [0xe3,0x09,0x82,0x29]
+; CHECK: stp  x4, x9, [sp, #-16]!       ; encoding: [0xe4,0x27,0xbf,0xa9]
+; CHECK: stp  s10, s1, [x2, #64]!       ; encoding: [0x4a,0x04,0x88,0x2d]
+; CHECK: stp  d10, d1, [x2, #16]!       ; encoding: [0x4a,0x04,0x81,0x6d]
+
+;-----------------------------------------------------------------------------
+; Load/Store pair (post-indexed)
+;-----------------------------------------------------------------------------
+
+  ldp    w3, w2, [x15], #16
+  ldp    x4, x9, [sp], #-16
+  ldpsw  x2, x3, [x14], #16
+  ldpsw  x2, x3, [sp], #-16
+  ldp    s10, s1, [x2], #64
+  ldp    d10, d1, [x2], #16
+
+; CHECK: ldp  w3, w2, [x15], #16        ; encoding: [0xe3,0x09,0xc2,0x28]
+; CHECK: ldp  x4, x9, [sp], #-16        ; encoding: [0xe4,0x27,0xff,0xa8]
+; CHECK: ldpsw	x2, x3, [x14], #16      ; encoding: [0xc2,0x0d,0xc2,0x68]
+; CHECK: ldpsw	x2, x3, [sp], #-16      ; encoding: [0xe2,0x0f,0xfe,0x68]
+; CHECK: ldp  s10, s1, [x2], #64        ; encoding: [0x4a,0x04,0xc8,0x2c]
+; CHECK: ldp  d10, d1, [x2], #16        ; encoding: [0x4a,0x04,0xc1,0x6c]
+
+  stp    w3, w2, [x15], #16
+  stp    x4, x9, [sp], #-16
+  stp    s10, s1, [x2], #64
+  stp    d10, d1, [x2], #16
+
+; CHECK: stp  w3, w2, [x15], #16        ; encoding: [0xe3,0x09,0x82,0x28]
+; CHECK: stp  x4, x9, [sp], #-16        ; encoding: [0xe4,0x27,0xbf,0xa8]
+; CHECK: stp  s10, s1, [x2], #64        ; encoding: [0x4a,0x04,0x88,0x2c]
+; CHECK: stp  d10, d1, [x2], #16        ; encoding: [0x4a,0x04,0x81,0x6c]
+
+;-----------------------------------------------------------------------------
+; Load/Store pair (no-allocate)
+;-----------------------------------------------------------------------------
+
+  ldnp  w3, w2, [x15, #16]
+  ldnp  x4, x9, [sp, #-16]
+  ldnp  s10, s1, [x2, #64]
+  ldnp  d10, d1, [x2]
+
+; CHECK: ldnp  w3, w2, [x15, #16]       ; encoding: [0xe3,0x09,0x42,0x28]
+; CHECK: ldnp  x4, x9, [sp, #-16]       ; encoding: [0xe4,0x27,0x7f,0xa8]
+; CHECK: ldnp  s10, s1, [x2, #64]       ; encoding: [0x4a,0x04,0x48,0x2c]
+; CHECK: ldnp  d10, d1, [x2]            ; encoding: [0x4a,0x04,0x40,0x6c]
+
+  stnp  w3, w2, [x15, #16]
+  stnp  x4, x9, [sp, #-16]
+  stnp  s10, s1, [x2, #64]
+  stnp  d10, d1, [x2]
+
+; CHECK: stnp  w3, w2, [x15, #16]       ; encoding: [0xe3,0x09,0x02,0x28]
+; CHECK: stnp  x4, x9, [sp, #-16]       ; encoding: [0xe4,0x27,0x3f,0xa8]
+; CHECK: stnp  s10, s1, [x2, #64]       ; encoding: [0x4a,0x04,0x08,0x2c]
+; CHECK: stnp  d10, d1, [x2]            ; encoding: [0x4a,0x04,0x00,0x6c]
+
+;-----------------------------------------------------------------------------
+; Load/Store register offset
+;-----------------------------------------------------------------------------
+
+  ldr  w0, [x0, x0]
+  ldr  w0, [x0, x0, lsl #2]
+  ldr  x0, [x0, x0]
+  ldr  x0, [x0, x0, lsl #3]
+  ldr  x0, [x0, x0, sxtx]
+
+; CHECK: ldr  w0, [x0, x0]              ; encoding: [0x00,0x68,0x60,0xb8]
+; CHECK: ldr  w0, [x0, x0, lsl #2]      ; encoding: [0x00,0x78,0x60,0xb8]
+; CHECK: ldr  x0, [x0, x0]              ; encoding: [0x00,0x68,0x60,0xf8]
+; CHECK: ldr  x0, [x0, x0, lsl #3]      ; encoding: [0x00,0x78,0x60,0xf8]
+; CHECK: ldr  x0, [x0, x0, sxtx]        ; encoding: [0x00,0xe8,0x60,0xf8]
+
+  ldr  b1, [x1, x2]
+  ldr  b1, [x1, x2, lsl #0]
+  ldr  h1, [x1, x2]
+  ldr  h1, [x1, x2, lsl #1]
+  ldr  s1, [x1, x2]
+  ldr  s1, [x1, x2, lsl #2]
+  ldr  d1, [x1, x2]
+  ldr  d1, [x1, x2, lsl #3]
+  ldr  q1, [x1, x2]
+  ldr  q1, [x1, x2, lsl #4]
+
+; CHECK: ldr  b1, [x1, x2]              ; encoding: [0x21,0x68,0x62,0x3c]
+; CHECK: ldr  b1, [x1, x2, lsl #0]      ; encoding: [0x21,0x78,0x62,0x3c]
+; CHECK: ldr  h1, [x1, x2]              ; encoding: [0x21,0x68,0x62,0x7c]
+; CHECK: ldr  h1, [x1, x2, lsl #1]      ; encoding: [0x21,0x78,0x62,0x7c]
+; CHECK: ldr  s1, [x1, x2]              ; encoding: [0x21,0x68,0x62,0xbc]
+; CHECK: ldr  s1, [x1, x2, lsl #2]      ; encoding: [0x21,0x78,0x62,0xbc]
+; CHECK: ldr  d1, [x1, x2]              ; encoding: [0x21,0x68,0x62,0xfc]
+; CHECK: ldr  d1, [x1, x2, lsl #3]      ; encoding: [0x21,0x78,0x62,0xfc]
+; CHECK: ldr  q1, [x1, x2]              ; encoding: [0x21,0x68,0xe2,0x3c]
+; CHECK: ldr  q1, [x1, x2, lsl #4]      ; encoding: [0x21,0x78,0xe2,0x3c]
+
+  str  d1, [sp, x3]
+  str  d1, [sp, w3, uxtw #3]
+  str  q1, [sp, x3]
+  str  q1, [sp, w3, uxtw #4]
+
+; CHECK: str  d1, [sp, x3]              ; encoding: [0xe1,0x6b,0x23,0xfc]
+; CHECK: str  d1, [sp, w3, uxtw #3]     ; encoding: [0xe1,0x5b,0x23,0xfc]
+; CHECK: str  q1, [sp, x3]              ; encoding: [0xe1,0x6b,0xa3,0x3c]
+; CHECK: str  q1, [sp, w3, uxtw #4]     ; encoding: [0xe1,0x5b,0xa3,0x3c]
+
+;-----------------------------------------------------------------------------
+; Load literal
+;-----------------------------------------------------------------------------
+
+  ldr    w5, foo
+  ldr    x4, foo
+  ldrsw  x9, foo
+  prfm   #5, foo
+
+; CHECK: ldr    w5, foo                 ; encoding: [0bAAA00101,A,A,0x18]
+; CHECK: ldr    x4, foo                 ; encoding: [0bAAA00100,A,A,0x58]
+; CHECK: ldrsw  x9, foo                 ; encoding: [0bAAA01001,A,A,0x98]
+; CHECK: prfm   pldl3strm, foo          ; encoding: [0bAAA00101,A,A,0xd8]
+
+;-----------------------------------------------------------------------------
+; Load/Store exclusive
+;-----------------------------------------------------------------------------
+
+  ldxr   w6, [x1]
+  ldxr   x6, [x1]
+  ldxrb  w6, [x1]
+  ldxrh  w6, [x1]
+  ldxp   w7, w3, [x9]
+  ldxp   x7, x3, [x9]
+
+; CHECK: ldxrb  w6, [x1]                ; encoding: [0x26,0x7c,0x5f,0x08]
+; CHECK: ldxrh  w6, [x1]                ; encoding: [0x26,0x7c,0x5f,0x48]
+; CHECK: ldxp   w7, w3, [x9]            ; encoding: [0x27,0x0d,0x7f,0x88]
+; CHECK: ldxp   x7, x3, [x9]            ; encoding: [0x27,0x0d,0x7f,0xc8]
+
+  stxr   w1, x4, [x3]
+  stxr   w1, w4, [x3]
+  stxrb  w1, w4, [x3]
+  stxrh  w1, w4, [x3]
+  stxp   w1, x2, x6, [x1]
+  stxp   w1, w2, w6, [x1]
+
+; CHECK: stxr   w1, x4, [x3]            ; encoding: [0x64,0x7c,0x01,0xc8]
+; CHECK: stxr   w1, w4, [x3]            ; encoding: [0x64,0x7c,0x01,0x88]
+; CHECK: stxrb  w1, w4, [x3]            ; encoding: [0x64,0x7c,0x01,0x08]
+; CHECK: stxrh  w1, w4, [x3]            ; encoding: [0x64,0x7c,0x01,0x48]
+; CHECK: stxp   w1, x2, x6, [x1]        ; encoding: [0x22,0x18,0x21,0xc8]
+; CHECK: stxp   w1, w2, w6, [x1]        ; encoding: [0x22,0x18,0x21,0x88]
+
+;-----------------------------------------------------------------------------
+; Load-acquire/Store-release non-exclusive
+;-----------------------------------------------------------------------------
+
+  ldar   w4, [sp]
+  ldar   x4, [sp, #0]
+  ldarb  w4, [sp]
+  ldarh  w4, [sp]
+
+; CHECK: ldar   w4, [sp]                ; encoding: [0xe4,0xff,0xdf,0x88]
+; CHECK: ldar   x4, [sp]                ; encoding: [0xe4,0xff,0xdf,0xc8]
+; CHECK: ldarb  w4, [sp]                ; encoding: [0xe4,0xff,0xdf,0x08]
+; CHECK: ldarh  w4, [sp]                ; encoding: [0xe4,0xff,0xdf,0x48]
+
+  stlr   w3, [x6]
+  stlr   x3, [x6]
+  stlrb  w3, [x6]
+  stlrh  w3, [x6]
+
+; CHECK: stlr   w3, [x6]                ; encoding: [0xc3,0xfc,0x9f,0x88]
+; CHECK: stlr   x3, [x6]                ; encoding: [0xc3,0xfc,0x9f,0xc8]
+; CHECK: stlrb  w3, [x6]                ; encoding: [0xc3,0xfc,0x9f,0x08]
+; CHECK: stlrh  w3, [x6]                ; encoding: [0xc3,0xfc,0x9f,0x48]
+
+;-----------------------------------------------------------------------------
+; Load-acquire/Store-release exclusive
+;-----------------------------------------------------------------------------
+
+  ldaxr   w2, [x4]
+  ldaxr   x2, [x4]
+  ldaxrb  w2, [x4, #0]
+  ldaxrh  w2, [x4]
+  ldaxp   w2, w6, [x1]
+  ldaxp   x2, x6, [x1]
+
+; CHECK: ldaxr   w2, [x4]               ; encoding: [0x82,0xfc,0x5f,0x88]
+; CHECK: ldaxr   x2, [x4]               ; encoding: [0x82,0xfc,0x5f,0xc8]
+; CHECK: ldaxrb  w2, [x4]               ; encoding: [0x82,0xfc,0x5f,0x08]
+; CHECK: ldaxrh  w2, [x4]               ; encoding: [0x82,0xfc,0x5f,0x48]
+; CHECK: ldaxp   w2, w6, [x1]           ; encoding: [0x22,0x98,0x7f,0x88]
+; CHECK: ldaxp   x2, x6, [x1]           ; encoding: [0x22,0x98,0x7f,0xc8]
+
+  stlxr   w8, x7, [x1]
+  stlxr   w8, w7, [x1]
+  stlxrb  w8, w7, [x1]
+  stlxrh  w8, w7, [x1]
+  stlxp   w1, x2, x6, [x1]
+  stlxp   w1, w2, w6, [x1]
+
+; CHECK: stlxr  w8, x7, [x1]            ; encoding: [0x27,0xfc,0x08,0xc8]
+; CHECK: stlxr  w8, w7, [x1]            ; encoding: [0x27,0xfc,0x08,0x88]
+; CHECK: stlxrb w8, w7, [x1]            ; encoding: [0x27,0xfc,0x08,0x08]
+; CHECK: stlxrh w8, w7, [x1]            ; encoding: [0x27,0xfc,0x08,0x48]
+; CHECK: stlxp  w1, x2, x6, [x1]        ; encoding: [0x22,0x98,0x21,0xc8]
+; CHECK: stlxp  w1, w2, w6, [x1]        ; encoding: [0x22,0x98,0x21,0x88]
+
+
+;-----------------------------------------------------------------------------
+; LDUR/STUR aliases for negative and unaligned LDR/STR instructions.
+;
+; According to the ARM ISA documentation:
+; "A programmer-friendly assembler should also generate these instructions
+; in response to the standard LDR/STR mnemonics when the immediate offset is
+; unambiguous, i.e. negative or unaligned."
+;-----------------------------------------------------------------------------
+
+  ldr x11, [x29, #-8]
+  ldr x11, [x29, #7]
+  ldr w0, [x0, #2]
+  ldr w0, [x0, #-256]
+  ldr b2, [x1, #-2]
+  ldr h3, [x2, #3]
+  ldr h3, [x3, #-4]
+  ldr s3, [x4, #3]
+  ldr s3, [x5, #-4]
+  ldr d4, [x6, #4]
+  ldr d4, [x7, #-8]
+  ldr q5, [x8, #8]
+  ldr q5, [x9, #-16]
+
+; CHECK: ldur	x11, [x29, #-8]          ; encoding: [0xab,0x83,0x5f,0xf8]
+; CHECK: ldur	x11, [x29, #7]           ; encoding: [0xab,0x73,0x40,0xf8]
+; CHECK: ldur	w0, [x0, #2]            ; encoding: [0x00,0x20,0x40,0xb8]
+; CHECK: ldur	w0, [x0, #-256]         ; encoding: [0x00,0x00,0x50,0xb8]
+; CHECK: ldur	b2, [x1, #-2]           ; encoding: [0x22,0xe0,0x5f,0x3c]
+; CHECK: ldur	h3, [x2, #3]            ; encoding: [0x43,0x30,0x40,0x7c]
+; CHECK: ldur	h3, [x3, #-4]           ; encoding: [0x63,0xc0,0x5f,0x7c]
+; CHECK: ldur	s3, [x4, #3]            ; encoding: [0x83,0x30,0x40,0xbc]
+; CHECK: ldur	s3, [x5, #-4]           ; encoding: [0xa3,0xc0,0x5f,0xbc]
+; CHECK: ldur	d4, [x6, #4]            ; encoding: [0xc4,0x40,0x40,0xfc]
+; CHECK: ldur	d4, [x7, #-8]           ; encoding: [0xe4,0x80,0x5f,0xfc]
+; CHECK: ldur	q5, [x8, #8]            ; encoding: [0x05,0x81,0xc0,0x3c]
+; CHECK: ldur	q5, [x9, #-16]          ; encoding: [0x25,0x01,0xdf,0x3c]
+
+  str x11, [x29, #-8]
+  str x11, [x29, #7]
+  str w0, [x0, #2]
+  str w0, [x0, #-256]
+  str b2, [x1, #-2]
+  str h3, [x2, #3]
+  str h3, [x3, #-4]
+  str s3, [x4, #3]
+  str s3, [x5, #-4]
+  str d4, [x6, #4]
+  str d4, [x7, #-8]
+  str q5, [x8, #8]
+  str q5, [x9, #-16]
+
+; CHECK: stur	x11, [x29, #-8]          ; encoding: [0xab,0x83,0x1f,0xf8]
+; CHECK: stur	x11, [x29, #7]           ; encoding: [0xab,0x73,0x00,0xf8]
+; CHECK: stur	w0, [x0, #2]            ; encoding: [0x00,0x20,0x00,0xb8]
+; CHECK: stur	w0, [x0, #-256]         ; encoding: [0x00,0x00,0x10,0xb8]
+; CHECK: stur	b2, [x1, #-2]           ; encoding: [0x22,0xe0,0x1f,0x3c]
+; CHECK: stur	h3, [x2, #3]            ; encoding: [0x43,0x30,0x00,0x7c]
+; CHECK: stur	h3, [x3, #-4]           ; encoding: [0x63,0xc0,0x1f,0x7c]
+; CHECK: stur	s3, [x4, #3]            ; encoding: [0x83,0x30,0x00,0xbc]
+; CHECK: stur	s3, [x5, #-4]           ; encoding: [0xa3,0xc0,0x1f,0xbc]
+; CHECK: stur	d4, [x6, #4]            ; encoding: [0xc4,0x40,0x00,0xfc]
+; CHECK: stur	d4, [x7, #-8]           ; encoding: [0xe4,0x80,0x1f,0xfc]
+; CHECK: stur	q5, [x8, #8]            ; encoding: [0x05,0x81,0x80,0x3c]
+; CHECK: stur	q5, [x9, #-16]          ; encoding: [0x25,0x01,0x9f,0x3c]
+
+  ldrb w3, [x1, #-1]
+  ldrh w4, [x2, #1]
+  ldrh w5, [x3, #-1]
+  ldrsb w6, [x4, #-1]
+  ldrsb x7, [x5, #-1]
+  ldrsh w8, [x6, #1]
+  ldrsh w9, [x7, #-1]
+  ldrsh x1, [x8, #1]
+  ldrsh x2, [x9, #-1]
+  ldrsw x3, [x10, #10]
+  ldrsw x4, [x11, #-1]
+
+; CHECK: ldurb	w3, [x1, #-1]           ; encoding: [0x23,0xf0,0x5f,0x38]
+; CHECK: ldurh	w4, [x2, #1]            ; encoding: [0x44,0x10,0x40,0x78]
+; CHECK: ldurh	w5, [x3, #-1]           ; encoding: [0x65,0xf0,0x5f,0x78]
+; CHECK: ldursb	w6, [x4, #-1]           ; encoding: [0x86,0xf0,0xdf,0x38]
+; CHECK: ldursb	x7, [x5, #-1]           ; encoding: [0xa7,0xf0,0x9f,0x38]
+; CHECK: ldursh	w8, [x6, #1]            ; encoding: [0xc8,0x10,0xc0,0x78]
+; CHECK: ldursh	w9, [x7, #-1]           ; encoding: [0xe9,0xf0,0xdf,0x78]
+; CHECK: ldursh	x1, [x8, #1]            ; encoding: [0x01,0x11,0x80,0x78]
+; CHECK: ldursh	x2, [x9, #-1]           ; encoding: [0x22,0xf1,0x9f,0x78]
+; CHECK: ldursw	x3, [x10, #10]          ; encoding: [0x43,0xa1,0x80,0xb8]
+; CHECK: ldursw	x4, [x11, #-1]          ; encoding: [0x64,0xf1,0x9f,0xb8]
+
+  strb w3, [x1, #-1]
+  strh w4, [x2, #1]
+  strh w5, [x3, #-1]
+
+; CHECK: sturb	w3, [x1, #-1]           ; encoding: [0x23,0xf0,0x1f,0x38]
+; CHECK: sturh	w4, [x2, #1]            ; encoding: [0x44,0x10,0x00,0x78]
+; CHECK: sturh	w5, [x3, #-1]           ; encoding: [0x65,0xf0,0x1f,0x78]
diff --git a/test/MC/AArch64/arm64-nv-cond.s b/test/MC/AArch64/arm64-nv-cond.s
new file mode 100644
index 0000000..1b4d054
--- /dev/null
+++ b/test/MC/AArch64/arm64-nv-cond.s
@@ -0,0 +1,11 @@
+// RUN: llvm-mc < %s -triple arm64 -mattr=neon -show-encoding | FileCheck %s
+
+fcsel d28,d31,d31,nv
+csel x0,x0,x0,nv
+ccmp x0,x0,#0,nv
+b.nv #0
+
+// CHECK: fcsel   d28, d31, d31, nv       // encoding: [0xfc,0xff,0x7f,0x1e]
+// CHECK: csel    x0, x0, x0, nv          // encoding: [0x00,0xf0,0x80,0x9a]
+// CHECK: ccmp    x0, x0, #0, nv          // encoding: [0x00,0xf0,0x40,0xfa]
+// CHECK: b.nv #0                         // encoding: [0x0f,0x00,0x00,0x54]
diff --git a/test/MC/AArch64/arm64-optional-hash.s b/test/MC/AArch64/arm64-optional-hash.s
new file mode 100644
index 0000000..71e2fda
--- /dev/null
+++ b/test/MC/AArch64/arm64-optional-hash.s
@@ -0,0 +1,31 @@
+; RUN: llvm-mc -triple arm64-apple-darwin -show-encoding < %s | FileCheck %s
+.text
+; parseOperand check
+; CHECK: add sp, sp, #32             ; encoding: [0xff,0x83,0x00,0x91]
+    add sp, sp, 32
+
+; Optional shift
+; CHECK: adds x3, x4, #1024, lsl #12 ; encoding: [0x83,0x00,0x50,0xb1]
+adds x3, x4, 1024, lsl 12
+
+; Optional extend
+; CHECK: add sp, x2, x3              ; encoding: [0x5f,0x60,0x23,0x8b]
+add sp, x2, x3, uxtx 0
+
+; FP immediates
+; CHECK: fmov s1, #0.12500000      ; encoding: [0x01,0x10,0x28,0x1e]
+fmov s1, 0.125
+
+; Barrier operand
+; CHECK: dmb osh    ; encoding: [0xbf,0x33,0x03,0xd5]
+dmb 3
+
+; Prefetch and memory
+
+; Single register inside []
+; CHECK: ldnp  w3, w2, [x15, #16]       ; encoding: [0xe3,0x09,0x42,0x28]
+ldnp  w3, w2, [x15, 16]
+
+; Memory, two registers inside []
+; CHECK: prfm   pstl3strm, [x4, x5, lsl #3] ; encoding: [0x95,0x78,0xa5,0xf8]
+prfm  pstl3strm, [x4, x5, lsl 3]
diff --git a/test/MC/AArch64/arm64-separator.s b/test/MC/AArch64/arm64-separator.s
new file mode 100644
index 0000000..e67deba
--- /dev/null
+++ b/test/MC/AArch64/arm64-separator.s
@@ -0,0 +1,20 @@
+; RUN: llvm-mc -triple arm64-apple-darwin -show-encoding < %s | FileCheck %s
+
+; ARM64 uses a multi-character statement separator, "%%". Check that we lex
+; it properly and recognize the multiple assembly statements on the line.
+
+; To make sure the output assembly correctly handled the instructions,
+; tell it to show encodings. That will result in the two 'mov' instructions
+; being on separate lines in the output. We look for the "; encoding" string
+; to verify that. For this test, we don't care what the encoding is, just that
+; there is one for each 'mov' instruction.
+
+
+_foo:
+; CHECK: foo
+; CHECK: mov x0, x1 ; encoding
+; CHECK: mov x1, x0 ; encoding
+	mov x0, x1 %% mov x1, x0
+	ret	lr
+
+
diff --git a/test/MC/AArch64/arm64-simd-ldst.s b/test/MC/AArch64/arm64-simd-ldst.s
new file mode 100644
index 0000000..3085485
--- /dev/null
+++ b/test/MC/AArch64/arm64-simd-ldst.s
@@ -0,0 +1,2404 @@
+; RUN: llvm-mc -triple arm64-apple-darwin -mattr=neon -output-asm-variant=1 -show-encoding < %s | FileCheck %s
+
+_ld1st1_multiple:
+  ld1.8b {v0}, [x1]
+  ld1.8b {v0, v1}, [x1]
+  ld1.8b {v0, v1, v2}, [x1]
+  ld1.8b {v0, v1, v2, v3}, [x1]
+
+  ld1.8b {v3}, [x1]
+  ld1.8b {v3, v4}, [x2]
+  ld1.8b {v4, v5, v6}, [x3]
+  ld1.8b {v7, v8, v9, v10}, [x4]
+
+  ld1.16b {v0}, [x1]
+  ld1.16b {v0, v1}, [x1]
+  ld1.16b {v0, v1, v2}, [x1]
+  ld1.16b {v0, v1, v2, v3}, [x1]
+
+  ld1.4h {v0}, [x1]
+  ld1.4h {v0, v1}, [x1]
+  ld1.4h {v0, v1, v2}, [x1]
+  ld1.4h {v0, v1, v2, v3}, [x1]
+
+  ld1.8h {v0}, [x1]
+  ld1.8h {v0, v1}, [x1]
+  ld1.8h {v0, v1, v2}, [x1]
+  ld1.8h {v0, v1, v2, v3}, [x1]
+
+  ld1.2s {v0}, [x1]
+  ld1.2s {v0, v1}, [x1]
+  ld1.2s {v0, v1, v2}, [x1]
+  ld1.2s {v0, v1, v2, v3}, [x1]
+
+  ld1.4s {v0}, [x1]
+  ld1.4s {v0, v1}, [x1]
+  ld1.4s {v0, v1, v2}, [x1]
+  ld1.4s {v0, v1, v2, v3}, [x1]
+
+  ld1.1d {v0}, [x1]
+  ld1.1d {v0, v1}, [x1]
+  ld1.1d {v0, v1, v2}, [x1]
+  ld1.1d {v0, v1, v2, v3}, [x1]
+
+  ld1.2d {v0}, [x1]
+  ld1.2d {v0, v1}, [x1]
+  ld1.2d {v0, v1, v2}, [x1]
+  ld1.2d {v0, v1, v2, v3}, [x1]
+
+  st1.8b {v0}, [x1]
+  st1.8b {v0, v1}, [x1]
+  st1.8b {v0, v1, v2}, [x1]
+  st1.8b {v0, v1, v2, v3}, [x1]
+
+  st1.16b {v0}, [x1]
+  st1.16b {v0, v1}, [x1]
+  st1.16b {v0, v1, v2}, [x1]
+  st1.16b {v0, v1, v2, v3}, [x1]
+
+  st1.4h {v0}, [x1]
+  st1.4h {v0, v1}, [x1]
+  st1.4h {v0, v1, v2}, [x1]
+  st1.4h {v0, v1, v2, v3}, [x1]
+
+  st1.8h {v0}, [x1]
+  st1.8h {v0, v1}, [x1]
+  st1.8h {v0, v1, v2}, [x1]
+  st1.8h {v0, v1, v2, v3}, [x1]
+
+  st1.2s {v0}, [x1]
+  st1.2s {v0, v1}, [x1]
+  st1.2s {v0, v1, v2}, [x1]
+  st1.2s {v0, v1, v2, v3}, [x1]
+
+  st1.4s {v0}, [x1]
+  st1.4s {v0, v1}, [x1]
+  st1.4s {v0, v1, v2}, [x1]
+  st1.4s {v0, v1, v2, v3}, [x1]
+
+  st1.1d {v0}, [x1]
+  st1.1d {v0, v1}, [x1]
+  st1.1d {v0, v1, v2}, [x1]
+  st1.1d {v0, v1, v2, v3}, [x1]
+
+  st1.2d {v0}, [x1]
+  st1.2d {v0, v1}, [x1]
+  st1.2d {v0, v1, v2}, [x1]
+  st1.2d {v0, v1, v2, v3}, [x1]
+
+  st1.2d {v5}, [x1]
+  st1.2d {v7, v8}, [x10]
+  st1.2d {v11, v12, v13}, [x1]
+  st1.2d {v28, v29, v30, v31}, [x13]
+
+; CHECK: _ld1st1_multiple:
+; CHECK: ld1.8b	{ v0 }, [x1]            ; encoding: [0x20,0x70,0x40,0x0c]
+; CHECK: ld1.8b	{ v0, v1 }, [x1]        ; encoding: [0x20,0xa0,0x40,0x0c]
+; CHECK: ld1.8b	{ v0, v1, v2 }, [x1]    ; encoding: [0x20,0x60,0x40,0x0c]
+; CHECK: ld1.8b	{ v0, v1, v2, v3 }, [x1] ; encoding: [0x20,0x20,0x40,0x0c]
+
+; CHECK: ld1.8b { v3 }, [x1]            ; encoding: [0x23,0x70,0x40,0x0c]
+; CHECK: ld1.8b { v3, v4 }, [x2]        ; encoding: [0x43,0xa0,0x40,0x0c]
+; CHECK: ld1.8b { v4, v5, v6 }, [x3]    ; encoding: [0x64,0x60,0x40,0x0c]
+; CHECK: ld1.8b { v7, v8, v9, v10 }, [x4] ; encoding: [0x87,0x20,0x40,0x0c]
+
+; CHECK: ld1.16b	{ v0 }, [x1]            ; encoding: [0x20,0x70,0x40,0x4c]
+; CHECK: ld1.16b	{ v0, v1 }, [x1]        ; encoding: [0x20,0xa0,0x40,0x4c]
+; CHECK: ld1.16b	{ v0, v1, v2 }, [x1]    ; encoding: [0x20,0x60,0x40,0x4c]
+; CHECK: ld1.16b	{ v0, v1, v2, v3 }, [x1] ; encoding: [0x20,0x20,0x40,0x4c]
+
+; CHECK: ld1.4h	{ v0 }, [x1]            ; encoding: [0x20,0x74,0x40,0x0c]
+; CHECK: ld1.4h	{ v0, v1 }, [x1]        ; encoding: [0x20,0xa4,0x40,0x0c]
+; CHECK: ld1.4h	{ v0, v1, v2 }, [x1]    ; encoding: [0x20,0x64,0x40,0x0c]
+; CHECK: ld1.4h	{ v0, v1, v2, v3 }, [x1] ; encoding: [0x20,0x24,0x40,0x0c]
+
+; CHECK: ld1.8h	{ v0 }, [x1]            ; encoding: [0x20,0x74,0x40,0x4c]
+; CHECK: ld1.8h	{ v0, v1 }, [x1]        ; encoding: [0x20,0xa4,0x40,0x4c]
+; CHECK: ld1.8h	{ v0, v1, v2 }, [x1]    ; encoding: [0x20,0x64,0x40,0x4c]
+; CHECK: ld1.8h	{ v0, v1, v2, v3 }, [x1] ; encoding: [0x20,0x24,0x40,0x4c]
+
+; CHECK: ld1.2s	{ v0 }, [x1]            ; encoding: [0x20,0x78,0x40,0x0c]
+; CHECK: ld1.2s	{ v0, v1 }, [x1]        ; encoding: [0x20,0xa8,0x40,0x0c]
+; CHECK: ld1.2s	{ v0, v1, v2 }, [x1]    ; encoding: [0x20,0x68,0x40,0x0c]
+; CHECK: ld1.2s	{ v0, v1, v2, v3 }, [x1] ; encoding: [0x20,0x28,0x40,0x0c]
+
+; CHECK: ld1.4s	{ v0 }, [x1]            ; encoding: [0x20,0x78,0x40,0x4c]
+; CHECK: ld1.4s	{ v0, v1 }, [x1]        ; encoding: [0x20,0xa8,0x40,0x4c]
+; CHECK: ld1.4s	{ v0, v1, v2 }, [x1]    ; encoding: [0x20,0x68,0x40,0x4c]
+; CHECK: ld1.4s	{ v0, v1, v2, v3 }, [x1] ; encoding: [0x20,0x28,0x40,0x4c]
+
+; CHECK: ld1.1d	{ v0 }, [x1]            ; encoding: [0x20,0x7c,0x40,0x0c]
+; CHECK: ld1.1d	{ v0, v1 }, [x1]        ; encoding: [0x20,0xac,0x40,0x0c]
+; CHECK: ld1.1d	{ v0, v1, v2 }, [x1]    ; encoding: [0x20,0x6c,0x40,0x0c]
+; CHECK: ld1.1d	{ v0, v1, v2, v3 }, [x1] ; encoding: [0x20,0x2c,0x40,0x0c]
+
+; CHECK: ld1.2d	{ v0 }, [x1]            ; encoding: [0x20,0x7c,0x40,0x4c]
+; CHECK: ld1.2d	{ v0, v1 }, [x1]        ; encoding: [0x20,0xac,0x40,0x4c]
+; CHECK: ld1.2d	{ v0, v1, v2 }, [x1]    ; encoding: [0x20,0x6c,0x40,0x4c]
+; CHECK: ld1.2d	{ v0, v1, v2, v3 }, [x1] ; encoding: [0x20,0x2c,0x40,0x4c]
+
+
+; CHECK: st1.8b	{ v0 }, [x1]            ; encoding: [0x20,0x70,0x00,0x0c]
+; CHECK: st1.8b	{ v0, v1 }, [x1]        ; encoding: [0x20,0xa0,0x00,0x0c]
+; CHECK: st1.8b	{ v0, v1, v2 }, [x1]    ; encoding: [0x20,0x60,0x00,0x0c]
+; CHECK: st1.8b	{ v0, v1, v2, v3 }, [x1] ; encoding: [0x20,0x20,0x00,0x0c]
+
+; CHECK: st1.16b	{ v0 }, [x1]            ; encoding: [0x20,0x70,0x00,0x4c]
+; CHECK: st1.16b	{ v0, v1 }, [x1]        ; encoding: [0x20,0xa0,0x00,0x4c]
+; CHECK: st1.16b	{ v0, v1, v2 }, [x1]    ; encoding: [0x20,0x60,0x00,0x4c]
+; CHECK: st1.16b	{ v0, v1, v2, v3 }, [x1] ; encoding: [0x20,0x20,0x00,0x4c]
+
+; CHECK: st1.4h	{ v0 }, [x1]            ; encoding: [0x20,0x74,0x00,0x0c]
+; CHECK: st1.4h	{ v0, v1 }, [x1]        ; encoding: [0x20,0xa4,0x00,0x0c]
+; CHECK: st1.4h	{ v0, v1, v2 }, [x1]    ; encoding: [0x20,0x64,0x00,0x0c]
+; CHECK: st1.4h	{ v0, v1, v2, v3 }, [x1] ; encoding: [0x20,0x24,0x00,0x0c]
+
+; CHECK: st1.8h	{ v0 }, [x1]            ; encoding: [0x20,0x74,0x00,0x4c]
+; CHECK: st1.8h	{ v0, v1 }, [x1]        ; encoding: [0x20,0xa4,0x00,0x4c]
+; CHECK: st1.8h	{ v0, v1, v2 }, [x1]    ; encoding: [0x20,0x64,0x00,0x4c]
+; CHECK: st1.8h	{ v0, v1, v2, v3 }, [x1] ; encoding: [0x20,0x24,0x00,0x4c]
+
+; CHECK: st1.2s	{ v0 }, [x1]            ; encoding: [0x20,0x78,0x00,0x0c]
+; CHECK: st1.2s	{ v0, v1 }, [x1]        ; encoding: [0x20,0xa8,0x00,0x0c]
+; CHECK: st1.2s	{ v0, v1, v2 }, [x1]    ; encoding: [0x20,0x68,0x00,0x0c]
+; CHECK: st1.2s	{ v0, v1, v2, v3 }, [x1] ; encoding: [0x20,0x28,0x00,0x0c]
+
+; CHECK: st1.4s	{ v0 }, [x1]            ; encoding: [0x20,0x78,0x00,0x4c]
+; CHECK: st1.4s	{ v0, v1 }, [x1]        ; encoding: [0x20,0xa8,0x00,0x4c]
+; CHECK: st1.4s	{ v0, v1, v2 }, [x1]    ; encoding: [0x20,0x68,0x00,0x4c]
+; CHECK: st1.4s	{ v0, v1, v2, v3 }, [x1] ; encoding: [0x20,0x28,0x00,0x4c]
+
+; CHECK: st1.1d	{ v0 }, [x1]            ; encoding: [0x20,0x7c,0x00,0x0c]
+; CHECK: st1.1d	{ v0, v1 }, [x1]        ; encoding: [0x20,0xac,0x00,0x0c]
+; CHECK: st1.1d	{ v0, v1, v2 }, [x1]    ; encoding: [0x20,0x6c,0x00,0x0c]
+; CHECK: st1.1d	{ v0, v1, v2, v3 }, [x1] ; encoding: [0x20,0x2c,0x00,0x0c]
+
+; CHECK: st1.2d	{ v0 }, [x1]            ; encoding: [0x20,0x7c,0x00,0x4c]
+; CHECK: st1.2d	{ v0, v1 }, [x1]        ; encoding: [0x20,0xac,0x00,0x4c]
+; CHECK: st1.2d	{ v0, v1, v2 }, [x1]    ; encoding: [0x20,0x6c,0x00,0x4c]
+; CHECK: st1.2d	{ v0, v1, v2, v3 }, [x1] ; encoding: [0x20,0x2c,0x00,0x4c]
+
+; CHECK: st1.2d { v5 }, [x1]            ; encoding: [0x25,0x7c,0x00,0x4c]
+; CHECK: st1.2d { v7, v8 }, [x10]       ; encoding: [0x47,0xad,0x00,0x4c]
+; CHECK: st1.2d { v11, v12, v13 }, [x1] ; encoding: [0x2b,0x6c,0x00,0x4c]
+; CHECK: st1.2d { v28, v29, v30, v31 }, [x13] ; encoding: [0xbc,0x2d,0x00,0x4c]
+
+_ld2st2_multiple:
+  ld2.8b {v4, v5}, [x19]
+  ld2.16b {v4, v5}, [x19]
+  ld2.4h {v4, v5}, [x19]
+  ld2.8h {v4, v5}, [x19]
+  ld2.2s {v4, v5}, [x19]
+  ld2.4s {v4, v5}, [x19]
+  ld2.2d {v4, v5}, [x19]
+
+  st2.8b {v4, v5}, [x19]
+  st2.16b {v4, v5}, [x19]
+  st2.4h {v4, v5}, [x19]
+  st2.8h {v4, v5}, [x19]
+  st2.2s {v4, v5}, [x19]
+  st2.4s {v4, v5}, [x19]
+  st2.2d {v4, v5}, [x19]
+
+
+; CHECK: _ld2st2_multiple
+; CHECK: ld2.8b { v4, v5 }, [x19]       ; encoding: [0x64,0x82,0x40,0x0c]
+; CHECK: ld2.16b { v4, v5 }, [x19]      ; encoding: [0x64,0x82,0x40,0x4c]
+; CHECK: ld2.4h { v4, v5 }, [x19]       ; encoding: [0x64,0x86,0x40,0x0c]
+; CHECK: ld2.8h { v4, v5 }, [x19]       ; encoding: [0x64,0x86,0x40,0x4c]
+; CHECK: ld2.2s { v4, v5 }, [x19]       ; encoding: [0x64,0x8a,0x40,0x0c]
+; CHECK: ld2.4s { v4, v5 }, [x19]       ; encoding: [0x64,0x8a,0x40,0x4c]
+; CHECK: ld2.2d { v4, v5 }, [x19]       ; encoding: [0x64,0x8e,0x40,0x4c]
+
+; CHECK: st2.8b { v4, v5 }, [x19]       ; encoding: [0x64,0x82,0x00,0x0c]
+; CHECK: st2.16b { v4, v5 }, [x19]      ; encoding: [0x64,0x82,0x00,0x4c]
+; CHECK: st2.4h { v4, v5 }, [x19]       ; encoding: [0x64,0x86,0x00,0x0c]
+; CHECK: st2.8h { v4, v5 }, [x19]       ; encoding: [0x64,0x86,0x00,0x4c]
+; CHECK: st2.2s { v4, v5 }, [x19]       ; encoding: [0x64,0x8a,0x00,0x0c]
+; CHECK: st2.4s { v4, v5 }, [x19]       ; encoding: [0x64,0x8a,0x00,0x4c]
+; CHECK: st2.2d { v4, v5 }, [x19]       ; encoding: [0x64,0x8e,0x00,0x4c]
+
+
+ld3st3_multiple:
+    ld3.8b {v4, v5, v6}, [x19]
+    ld3.16b {v4, v5, v6}, [x19]
+    ld3.4h {v4, v5, v6}, [x19]
+    ld3.8h {v4, v5, v6}, [x19]
+    ld3.2s {v4, v5, v6}, [x19]
+    ld3.4s {v4, v5, v6}, [x19]
+    ld3.2d {v4, v5, v6}, [x19]
+
+    ld3.8b {v9, v10, v11}, [x9]
+    ld3.16b {v14, v15, v16}, [x19]
+    ld3.4h {v24, v25, v26}, [x29]
+    ld3.8h {v30, v31, v0}, [x9]
+    ld3.2s {v2, v3, v4}, [x19]
+    ld3.4s {v4, v5, v6}, [x29]
+    ld3.2d {v7, v8, v9}, [x9]
+
+    st3.8b {v4, v5, v6}, [x19]
+    st3.16b {v4, v5, v6}, [x19]
+    st3.4h {v4, v5, v6}, [x19]
+    st3.8h {v4, v5, v6}, [x19]
+    st3.2s {v4, v5, v6}, [x19]
+    st3.4s {v4, v5, v6}, [x19]
+    st3.2d {v4, v5, v6}, [x19]
+
+    st3.8b {v10, v11, v12}, [x9]
+    st3.16b {v14, v15, v16}, [x19]
+    st3.4h {v24, v25, v26}, [x29]
+    st3.8h {v30, v31, v0}, [x9]
+    st3.2s {v2, v3, v4}, [x19]
+    st3.4s {v7, v8, v9}, [x29]
+    st3.2d {v4, v5, v6}, [x9]
+
+; CHECK: ld3st3_multiple:
+; CHECK: ld3.8b { v4, v5, v6 }, [x19]   ; encoding: [0x64,0x42,0x40,0x0c]
+; CHECK: ld3.16b { v4, v5, v6 }, [x19]   ; encoding: [0x64,0x42,0x40,0x4c]
+; CHECK: ld3.4h { v4, v5, v6 }, [x19]   ; encoding: [0x64,0x46,0x40,0x0c]
+; CHECK: ld3.8h { v4, v5, v6 }, [x19]   ; encoding: [0x64,0x46,0x40,0x4c]
+; CHECK: ld3.2s { v4, v5, v6 }, [x19]   ; encoding: [0x64,0x4a,0x40,0x0c]
+; CHECK: ld3.4s { v4, v5, v6 }, [x19]   ; encoding: [0x64,0x4a,0x40,0x4c]
+; CHECK: ld3.2d { v4, v5, v6 }, [x19]   ; encoding: [0x64,0x4e,0x40,0x4c]
+
+; CHECK: ld3.8b { v9, v10, v11 }, [x9]  ; encoding: [0x29,0x41,0x40,0x0c]
+; CHECK: ld3.16b { v14, v15, v16 }, [x19] ; encoding: [0x6e,0x42,0x40,0x4c]
+; CHECK: ld3.4h { v24, v25, v26 }, [x29] ; encoding: [0xb8,0x47,0x40,0x0c]
+; CHECK: ld3.8h { v30, v31, v0 }, [x9]  ; encoding: [0x3e,0x45,0x40,0x4c]
+; CHECK: ld3.2s { v2, v3, v4 }, [x19]   ; encoding: [0x62,0x4a,0x40,0x0c]
+; CHECK: ld3.4s { v4, v5, v6 }, [x29]    ; encoding: [0xa4,0x4b,0x40,0x4c]
+; CHECK: ld3.2d { v7, v8, v9 }, [x9]    ; encoding: [0x27,0x4d,0x40,0x4c]
+
+; CHECK: st3.8b { v4, v5, v6 }, [x19]   ; encoding: [0x64,0x42,0x00,0x0c]
+; CHECK: st3.16b { v4, v5, v6 }, [x19]   ; encoding: [0x64,0x42,0x00,0x4c]
+; CHECK: st3.4h { v4, v5, v6 }, [x19]   ; encoding: [0x64,0x46,0x00,0x0c]
+; CHECK: st3.8h { v4, v5, v6 }, [x19]   ; encoding: [0x64,0x46,0x00,0x4c]
+; CHECK: st3.2s { v4, v5, v6 }, [x19]   ; encoding: [0x64,0x4a,0x00,0x0c]
+; CHECK: st3.4s { v4, v5, v6 }, [x19]   ; encoding: [0x64,0x4a,0x00,0x4c]
+; CHECK: st3.2d { v4, v5, v6 }, [x19]   ; encoding: [0x64,0x4e,0x00,0x4c]
+
+; CHECK: st3.8b { v10, v11, v12 }, [x9] ; encoding: [0x2a,0x41,0x00,0x0c]
+; CHECK: st3.16b { v14, v15, v16 }, [x19] ; encoding: [0x6e,0x42,0x00,0x4c]
+; CHECK: st3.4h { v24, v25, v26 }, [x29] ; encoding: [0xb8,0x47,0x00,0x0c]
+; CHECK: st3.8h { v30, v31, v0 }, [x9]  ; encoding: [0x3e,0x45,0x00,0x4c]
+; CHECK: st3.2s { v2, v3, v4 }, [x19]   ; encoding: [0x62,0x4a,0x00,0x0c]
+; CHECK: st3.4s { v7, v8, v9 }, [x29]    ; encoding: [0xa7,0x4b,0x00,0x4c]
+; CHECK: st3.2d { v4, v5, v6 }, [x9]    ; encoding: [0x24,0x4d,0x00,0x4c]
+
+ld4st4_multiple:
+    ld4.8b {v4, v5, v6, v7}, [x19]
+    ld4.16b {v4, v5, v6, v7}, [x19]
+    ld4.4h {v4, v5, v6, v7}, [x19]
+    ld4.8h {v4, v5, v6, v7}, [x19]
+    ld4.2s {v4, v5, v6, v7}, [x19]
+    ld4.4s {v4, v5, v6, v7}, [x19]
+    ld4.2d {v4, v5, v6, v7}, [x19]
+
+    st4.8b {v4, v5, v6, v7}, [x19]
+    st4.16b {v4, v5, v6, v7}, [x19]
+    st4.4h {v4, v5, v6, v7}, [x19]
+    st4.8h {v4, v5, v6, v7}, [x19]
+    st4.2s {v4, v5, v6, v7}, [x19]
+    st4.4s {v4, v5, v6, v7}, [x19]
+    st4.2d {v4, v5, v6, v7}, [x19]
+
+; CHECK: ld4st4_multiple:
+; CHECK: ld4.8b { v4, v5, v6, v7 }, [x19] ; encoding: [0x64,0x02,0x40,0x0c]
+; CHECK: ld4.16b { v4, v5, v6, v7 }, [x19] ; encoding: [0x64,0x02,0x40,0x4c]
+; CHECK: ld4.4h { v4, v5, v6, v7 }, [x19] ; encoding: [0x64,0x06,0x40,0x0c]
+; CHECK: ld4.8h { v4, v5, v6, v7 }, [x19] ; encoding: [0x64,0x06,0x40,0x4c]
+; CHECK: ld4.2s { v4, v5, v6, v7 }, [x19] ; encoding: [0x64,0x0a,0x40,0x0c]
+; CHECK: ld4.4s { v4, v5, v6, v7 }, [x19] ; encoding: [0x64,0x0a,0x40,0x4c]
+; CHECK: ld4.2d { v4, v5, v6, v7 }, [x19] ; encoding: [0x64,0x0e,0x40,0x4c]
+
+; CHECK: st4.8b { v4, v5, v6, v7 }, [x19] ; encoding: [0x64,0x02,0x00,0x0c]
+; CHECK: st4.16b { v4, v5, v6, v7 }, [x19] ; encoding: [0x64,0x02,0x00,0x4c]
+; CHECK: st4.4h { v4, v5, v6, v7 }, [x19] ; encoding: [0x64,0x06,0x00,0x0c]
+; CHECK: st4.8h { v4, v5, v6, v7 }, [x19] ; encoding: [0x64,0x06,0x00,0x4c]
+; CHECK: st4.2s { v4, v5, v6, v7 }, [x19] ; encoding: [0x64,0x0a,0x00,0x0c]
+; CHECK: st4.4s { v4, v5, v6, v7 }, [x19] ; encoding: [0x64,0x0a,0x00,0x4c]
+; CHECK: st4.2d { v4, v5, v6, v7 }, [x19] ; encoding: [0x64,0x0e,0x00,0x4c]
+
+;-----------------------------------------------------------------------------
+; Post-increment versions.
+;-----------------------------------------------------------------------------
+
+_ld1st1_multiple_post:
+  ld1.8b {v0}, [x1], x15
+  ld1.8b {v0, v1}, [x1], x15
+  ld1.8b {v0, v1, v2}, [x1], x15
+  ld1.8b {v0, v1, v2, v3}, [x1], x15
+
+  ld1.16b {v0}, [x1], x15
+  ld1.16b {v0, v1}, [x1], x15
+  ld1.16b {v0, v1, v2}, [x1], x15
+  ld1.16b {v0, v1, v2, v3}, [x1], x15
+
+  ld1.4h {v0}, [x1], x15
+  ld1.4h {v0, v1}, [x1], x15
+  ld1.4h {v0, v1, v2}, [x1], x15
+  ld1.4h {v0, v1, v2, v3}, [x1], x15
+
+  ld1.8h {v0}, [x1], x15
+  ld1.8h {v0, v1}, [x1], x15
+  ld1.8h {v0, v1, v2}, [x1], x15
+  ld1.8h {v0, v1, v2, v3}, [x1], x15
+
+  ld1.2s {v0}, [x1], x15
+  ld1.2s {v0, v1}, [x1], x15
+  ld1.2s {v0, v1, v2}, [x1], x15
+  ld1.2s {v0, v1, v2, v3}, [x1], x15
+
+  ld1.4s {v0}, [x1], x15
+  ld1.4s {v0, v1}, [x1], x15
+  ld1.4s {v0, v1, v2}, [x1], x15
+  ld1.4s {v0, v1, v2, v3}, [x1], x15
+
+  ld1.1d {v0}, [x1], x15
+  ld1.1d {v0, v1}, [x1], x15
+  ld1.1d {v0, v1, v2}, [x1], x15
+  ld1.1d {v0, v1, v2, v3}, [x1], x15
+
+  ld1.2d {v0}, [x1], x15
+  ld1.2d {v0, v1}, [x1], x15
+  ld1.2d {v0, v1, v2}, [x1], x15
+  ld1.2d {v0, v1, v2, v3}, [x1], x15
+
+  st1.8b {v0}, [x1], x15
+  st1.8b {v0, v1}, [x1], x15
+  st1.8b {v0, v1, v2}, [x1], x15
+  st1.8b {v0, v1, v2, v3}, [x1], x15
+
+  st1.16b {v0}, [x1], x15
+  st1.16b {v0, v1}, [x1], x15
+  st1.16b {v0, v1, v2}, [x1], x15
+  st1.16b {v0, v1, v2, v3}, [x1], x15
+
+  st1.4h {v0}, [x1], x15
+  st1.4h {v0, v1}, [x1], x15
+  st1.4h {v0, v1, v2}, [x1], x15
+  st1.4h {v0, v1, v2, v3}, [x1], x15
+
+  st1.8h {v0}, [x1], x15
+  st1.8h {v0, v1}, [x1], x15
+  st1.8h {v0, v1, v2}, [x1], x15
+  st1.8h {v0, v1, v2, v3}, [x1], x15
+
+  st1.2s {v0}, [x1], x15
+  st1.2s {v0, v1}, [x1], x15
+  st1.2s {v0, v1, v2}, [x1], x15
+  st1.2s {v0, v1, v2, v3}, [x1], x15
+
+  st1.4s {v0}, [x1], x15
+  st1.4s {v0, v1}, [x1], x15
+  st1.4s {v0, v1, v2}, [x1], x15
+  st1.4s {v0, v1, v2, v3}, [x1], x15
+
+  st1.1d {v0}, [x1], x15
+  st1.1d {v0, v1}, [x1], x15
+  st1.1d {v0, v1, v2}, [x1], x15
+  st1.1d {v0, v1, v2, v3}, [x1], x15
+
+  st1.2d {v0}, [x1], x15
+  st1.2d {v0, v1}, [x1], x15
+  st1.2d {v0, v1, v2}, [x1], x15
+  st1.2d {v0, v1, v2, v3}, [x1], x15
+
+  ld1.8b {v0}, [x1], #8
+  ld1.8b {v0, v1}, [x1], #16
+  ld1.8b {v0, v1, v2}, [x1], #24
+  ld1.8b {v0, v1, v2, v3}, [x1], #32
+
+  ld1.16b {v0}, [x1], #16
+  ld1.16b {v0, v1}, [x1], #32
+  ld1.16b {v0, v1, v2}, [x1], #48
+  ld1.16b {v0, v1, v2, v3}, [x1], #64
+
+  ld1.4h {v0}, [x1], #8
+  ld1.4h {v0, v1}, [x1], #16
+  ld1.4h {v0, v1, v2}, [x1], #24
+  ld1.4h {v0, v1, v2, v3}, [x1], #32
+
+  ld1.8h {v0}, [x1], #16
+  ld1.8h {v0, v1}, [x1], #32
+  ld1.8h {v0, v1, v2}, [x1], #48
+  ld1.8h {v0, v1, v2, v3}, [x1], #64
+
+  ld1.2s {v0}, [x1], #8
+  ld1.2s {v0, v1}, [x1], #16
+  ld1.2s {v0, v1, v2}, [x1], #24
+  ld1.2s {v0, v1, v2, v3}, [x1], #32
+
+  ld1.4s {v0}, [x1], #16
+  ld1.4s {v0, v1}, [x1], #32
+  ld1.4s {v0, v1, v2}, [x1], #48
+  ld1.4s {v0, v1, v2, v3}, [x1], #64
+
+  ld1.1d {v0}, [x1], #8
+  ld1.1d {v0, v1}, [x1], #16
+  ld1.1d {v0, v1, v2}, [x1], #24
+  ld1.1d {v0, v1, v2, v3}, [x1], #32
+
+  ld1.2d {v0}, [x1], #16
+  ld1.2d {v0, v1}, [x1], #32
+  ld1.2d {v0, v1, v2}, [x1], #48
+  ld1.2d {v0, v1, v2, v3}, [x1], #64
+
+  st1.8b {v0}, [x1], #8
+  st1.8b {v0, v1}, [x1], #16
+  st1.8b {v0, v1, v2}, [x1], #24
+  st1.8b {v0, v1, v2, v3}, [x1], #32
+
+  st1.16b {v0}, [x1], #16
+  st1.16b {v0, v1}, [x1], #32
+  st1.16b {v0, v1, v2}, [x1], #48
+  st1.16b {v0, v1, v2, v3}, [x1], #64
+
+  st1.4h {v0}, [x1], #8
+  st1.4h {v0, v1}, [x1], #16
+  st1.4h {v0, v1, v2}, [x1], #24
+  st1.4h {v0, v1, v2, v3}, [x1], #32
+
+  st1.8h {v0}, [x1], #16
+  st1.8h {v0, v1}, [x1], #32
+  st1.8h {v0, v1, v2}, [x1], #48
+  st1.8h {v0, v1, v2, v3}, [x1], #64
+
+  st1.2s {v0}, [x1], #8
+  st1.2s {v0, v1}, [x1], #16
+  st1.2s {v0, v1, v2}, [x1], #24
+  st1.2s {v0, v1, v2, v3}, [x1], #32
+
+  st1.4s {v0}, [x1], #16
+  st1.4s {v0, v1}, [x1], #32
+  st1.4s {v0, v1, v2}, [x1], #48
+  st1.4s {v0, v1, v2, v3}, [x1], #64
+
+  st1.1d {v0}, [x1], #8
+  st1.1d {v0, v1}, [x1], #16
+  st1.1d {v0, v1, v2}, [x1], #24
+  st1.1d {v0, v1, v2, v3}, [x1], #32
+
+  st1.2d {v0}, [x1], #16
+  st1.2d {v0, v1}, [x1], #32
+  st1.2d {v0, v1, v2}, [x1], #48
+  st1.2d {v0, v1, v2, v3}, [x1], #64
+
+; CHECK: ld1st1_multiple_post:
+; CHECK: ld1.8b { v0 }, [x1], x15       ; encoding: [0x20,0x70,0xcf,0x0c]
+; CHECK: ld1.8b { v0, v1 }, [x1], x15   ; encoding: [0x20,0xa0,0xcf,0x0c]
+; CHECK: ld1.8b { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x60,0xcf,0x0c]
+; CHECK: ld1.8b { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x20,0xcf,0x0c]
+
+; CHECK: ld1.16b { v0 }, [x1], x15       ; encoding: [0x20,0x70,0xcf,0x4c]
+; CHECK: ld1.16b { v0, v1 }, [x1], x15   ; encoding: [0x20,0xa0,0xcf,0x4c]
+; CHECK: ld1.16b { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x60,0xcf,0x4c]
+; CHECK: ld1.16b { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x20,0xcf,0x4c]
+
+; CHECK: ld1.4h { v0 }, [x1], x15       ; encoding: [0x20,0x74,0xcf,0x0c]
+; CHECK: ld1.4h { v0, v1 }, [x1], x15   ; encoding: [0x20,0xa4,0xcf,0x0c]
+; CHECK: ld1.4h { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x64,0xcf,0x0c]
+; CHECK: ld1.4h { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x24,0xcf,0x0c]
+
+; CHECK: ld1.8h { v0 }, [x1], x15       ; encoding: [0x20,0x74,0xcf,0x4c]
+; CHECK: ld1.8h { v0, v1 }, [x1], x15   ; encoding: [0x20,0xa4,0xcf,0x4c]
+; CHECK: ld1.8h { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x64,0xcf,0x4c]
+; CHECK: ld1.8h { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x24,0xcf,0x4c]
+
+; CHECK: ld1.2s { v0 }, [x1], x15       ; encoding: [0x20,0x78,0xcf,0x0c]
+; CHECK: ld1.2s { v0, v1 }, [x1], x15   ; encoding: [0x20,0xa8,0xcf,0x0c]
+; CHECK: ld1.2s { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x68,0xcf,0x0c]
+; CHECK: ld1.2s { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x28,0xcf,0x0c]
+
+; CHECK: ld1.4s { v0 }, [x1], x15       ; encoding: [0x20,0x78,0xcf,0x4c]
+; CHECK: ld1.4s { v0, v1 }, [x1], x15   ; encoding: [0x20,0xa8,0xcf,0x4c]
+; CHECK: ld1.4s { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x68,0xcf,0x4c]
+; CHECK: ld1.4s { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x28,0xcf,0x4c]
+
+; CHECK: ld1.1d { v0 }, [x1], x15       ; encoding: [0x20,0x7c,0xcf,0x0c]
+; CHECK: ld1.1d { v0, v1 }, [x1], x15   ; encoding: [0x20,0xac,0xcf,0x0c]
+; CHECK: ld1.1d { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x6c,0xcf,0x0c]
+; CHECK: ld1.1d { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x2c,0xcf,0x0c]
+
+; CHECK: ld1.2d { v0 }, [x1], x15       ; encoding: [0x20,0x7c,0xcf,0x4c]
+; CHECK: ld1.2d { v0, v1 }, [x1], x15   ; encoding: [0x20,0xac,0xcf,0x4c]
+; CHECK: ld1.2d { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x6c,0xcf,0x4c]
+; CHECK: ld1.2d { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x2c,0xcf,0x4c]
+
+; CHECK: st1.8b { v0 }, [x1], x15       ; encoding: [0x20,0x70,0x8f,0x0c]
+; CHECK: st1.8b { v0, v1 }, [x1], x15   ; encoding: [0x20,0xa0,0x8f,0x0c]
+; CHECK: st1.8b { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x60,0x8f,0x0c]
+; CHECK: st1.8b { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x20,0x8f,0x0c]
+
+; CHECK: st1.16b { v0 }, [x1], x15       ; encoding: [0x20,0x70,0x8f,0x4c]
+; CHECK: st1.16b { v0, v1 }, [x1], x15   ; encoding: [0x20,0xa0,0x8f,0x4c]
+; CHECK: st1.16b { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x60,0x8f,0x4c]
+; CHECK: st1.16b { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x20,0x8f,0x4c]
+
+; CHECK: st1.4h { v0 }, [x1], x15       ; encoding: [0x20,0x74,0x8f,0x0c]
+; CHECK: st1.4h { v0, v1 }, [x1], x15   ; encoding: [0x20,0xa4,0x8f,0x0c]
+; CHECK: st1.4h { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x64,0x8f,0x0c]
+; CHECK: st1.4h { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x24,0x8f,0x0c]
+
+; CHECK: st1.8h { v0 }, [x1], x15       ; encoding: [0x20,0x74,0x8f,0x4c]
+; CHECK: st1.8h { v0, v1 }, [x1], x15   ; encoding: [0x20,0xa4,0x8f,0x4c]
+; CHECK: st1.8h { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x64,0x8f,0x4c]
+; CHECK: st1.8h { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x24,0x8f,0x4c]
+
+; CHECK: st1.2s { v0 }, [x1], x15       ; encoding: [0x20,0x78,0x8f,0x0c]
+; CHECK: st1.2s { v0, v1 }, [x1], x15   ; encoding: [0x20,0xa8,0x8f,0x0c]
+; CHECK: st1.2s { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x68,0x8f,0x0c]
+; CHECK: st1.2s { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x28,0x8f,0x0c]
+
+; CHECK: st1.4s { v0 }, [x1], x15       ; encoding: [0x20,0x78,0x8f,0x4c]
+; CHECK: st1.4s { v0, v1 }, [x1], x15   ; encoding: [0x20,0xa8,0x8f,0x4c]
+; CHECK: st1.4s { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x68,0x8f,0x4c]
+; CHECK: st1.4s { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x28,0x8f,0x4c]
+
+; CHECK: st1.1d { v0 }, [x1], x15       ; encoding: [0x20,0x7c,0x8f,0x0c]
+; CHECK: st1.1d { v0, v1 }, [x1], x15   ; encoding: [0x20,0xac,0x8f,0x0c]
+; CHECK: st1.1d { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x6c,0x8f,0x0c]
+; CHECK: st1.1d { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x2c,0x8f,0x0c]
+
+; CHECK: st1.2d { v0 }, [x1], x15       ; encoding: [0x20,0x7c,0x8f,0x4c]
+; CHECK: st1.2d { v0, v1 }, [x1], x15   ; encoding: [0x20,0xac,0x8f,0x4c]
+; CHECK: st1.2d { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x6c,0x8f,0x4c]
+; CHECK: st1.2d { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x2c,0x8f,0x4c]
+
+; CHECK: ld1.8b { v0 }, [x1], #8       ; encoding: [0x20,0x70,0xdf,0x0c]
+; CHECK: ld1.8b { v0, v1 }, [x1], #16   ; encoding: [0x20,0xa0,0xdf,0x0c]
+; CHECK: ld1.8b { v0, v1, v2 }, [x1], #24 ; encoding: [0x20,0x60,0xdf,0x0c]
+; CHECK: ld1.8b { v0, v1, v2, v3 }, [x1], #32 ; encoding: [0x20,0x20,0xdf,0x0c]
+
+; CHECK: ld1.16b { v0 }, [x1], #16       ; encoding: [0x20,0x70,0xdf,0x4c]
+; CHECK: ld1.16b { v0, v1 }, [x1], #32   ; encoding: [0x20,0xa0,0xdf,0x4c]
+; CHECK: ld1.16b { v0, v1, v2 }, [x1], #48 ; encoding: [0x20,0x60,0xdf,0x4c]
+; CHECK: ld1.16b { v0, v1, v2, v3 }, [x1], #64 ; encoding: [0x20,0x20,0xdf,0x4c]
+
+; CHECK: ld1.4h { v0 }, [x1], #8       ; encoding: [0x20,0x74,0xdf,0x0c]
+; CHECK: ld1.4h { v0, v1 }, [x1], #16   ; encoding: [0x20,0xa4,0xdf,0x0c]
+; CHECK: ld1.4h { v0, v1, v2 }, [x1], #24 ; encoding: [0x20,0x64,0xdf,0x0c]
+; CHECK: ld1.4h { v0, v1, v2, v3 }, [x1], #32 ; encoding: [0x20,0x24,0xdf,0x0c]
+
+; CHECK: ld1.8h { v0 }, [x1], #16       ; encoding: [0x20,0x74,0xdf,0x4c]
+; CHECK: ld1.8h { v0, v1 }, [x1], #32   ; encoding: [0x20,0xa4,0xdf,0x4c]
+; CHECK: ld1.8h { v0, v1, v2 }, [x1], #48 ; encoding: [0x20,0x64,0xdf,0x4c]
+; CHECK: ld1.8h { v0, v1, v2, v3 }, [x1], #64 ; encoding: [0x20,0x24,0xdf,0x4c]
+
+; CHECK: ld1.2s { v0 }, [x1], #8       ; encoding: [0x20,0x78,0xdf,0x0c]
+; CHECK: ld1.2s { v0, v1 }, [x1], #16   ; encoding: [0x20,0xa8,0xdf,0x0c]
+; CHECK: ld1.2s { v0, v1, v2 }, [x1], #24 ; encoding: [0x20,0x68,0xdf,0x0c]
+; CHECK: ld1.2s { v0, v1, v2, v3 }, [x1], #32 ; encoding: [0x20,0x28,0xdf,0x0c]
+
+; CHECK: ld1.4s { v0 }, [x1], #16       ; encoding: [0x20,0x78,0xdf,0x4c]
+; CHECK: ld1.4s { v0, v1 }, [x1], #32   ; encoding: [0x20,0xa8,0xdf,0x4c]
+; CHECK: ld1.4s { v0, v1, v2 }, [x1], #48 ; encoding: [0x20,0x68,0xdf,0x4c]
+; CHECK: ld1.4s { v0, v1, v2, v3 }, [x1], #64 ; encoding: [0x20,0x28,0xdf,0x4c]
+
+; CHECK: ld1.1d { v0 }, [x1], #8       ; encoding: [0x20,0x7c,0xdf,0x0c]
+; CHECK: ld1.1d { v0, v1 }, [x1], #16   ; encoding: [0x20,0xac,0xdf,0x0c]
+; CHECK: ld1.1d { v0, v1, v2 }, [x1], #24 ; encoding: [0x20,0x6c,0xdf,0x0c]
+; CHECK: ld1.1d { v0, v1, v2, v3 }, [x1], #32 ; encoding: [0x20,0x2c,0xdf,0x0c]
+
+; CHECK: ld1.2d { v0 }, [x1], #16       ; encoding: [0x20,0x7c,0xdf,0x4c]
+; CHECK: ld1.2d { v0, v1 }, [x1], #32   ; encoding: [0x20,0xac,0xdf,0x4c]
+; CHECK: ld1.2d { v0, v1, v2 }, [x1], #48 ; encoding: [0x20,0x6c,0xdf,0x4c]
+; CHECK: ld1.2d { v0, v1, v2, v3 }, [x1], #64 ; encoding: [0x20,0x2c,0xdf,0x4c]
+
+; CHECK: st1.8b { v0 }, [x1], #8       ; encoding: [0x20,0x70,0x9f,0x0c]
+; CHECK: st1.8b { v0, v1 }, [x1], #16   ; encoding: [0x20,0xa0,0x9f,0x0c]
+; CHECK: st1.8b { v0, v1, v2 }, [x1], #24 ; encoding: [0x20,0x60,0x9f,0x0c]
+; CHECK: st1.8b { v0, v1, v2, v3 }, [x1], #32 ; encoding: [0x20,0x20,0x9f,0x0c]
+
+; CHECK: st1.16b { v0 }, [x1], #16       ; encoding: [0x20,0x70,0x9f,0x4c]
+; CHECK: st1.16b { v0, v1 }, [x1], #32   ; encoding: [0x20,0xa0,0x9f,0x4c]
+; CHECK: st1.16b { v0, v1, v2 }, [x1], #48 ; encoding: [0x20,0x60,0x9f,0x4c]
+; CHECK: st1.16b { v0, v1, v2, v3 }, [x1], #64 ; encoding: [0x20,0x20,0x9f,0x4c]
+
+; CHECK: st1.4h { v0 }, [x1], #8       ; encoding: [0x20,0x74,0x9f,0x0c]
+; CHECK: st1.4h { v0, v1 }, [x1], #16   ; encoding: [0x20,0xa4,0x9f,0x0c]
+; CHECK: st1.4h { v0, v1, v2 }, [x1], #24 ; encoding: [0x20,0x64,0x9f,0x0c]
+; CHECK: st1.4h { v0, v1, v2, v3 }, [x1], #32 ; encoding: [0x20,0x24,0x9f,0x0c]
+
+; CHECK: st1.8h { v0 }, [x1], #16       ; encoding: [0x20,0x74,0x9f,0x4c]
+; CHECK: st1.8h { v0, v1 }, [x1], #32   ; encoding: [0x20,0xa4,0x9f,0x4c]
+; CHECK: st1.8h { v0, v1, v2 }, [x1], #48 ; encoding: [0x20,0x64,0x9f,0x4c]
+; CHECK: st1.8h { v0, v1, v2, v3 }, [x1], #64 ; encoding: [0x20,0x24,0x9f,0x4c]
+
+; CHECK: st1.2s { v0 }, [x1], #8       ; encoding: [0x20,0x78,0x9f,0x0c]
+; CHECK: st1.2s { v0, v1 }, [x1], #16   ; encoding: [0x20,0xa8,0x9f,0x0c]
+; CHECK: st1.2s { v0, v1, v2 }, [x1], #24 ; encoding: [0x20,0x68,0x9f,0x0c]
+; CHECK: st1.2s { v0, v1, v2, v3 }, [x1], #32 ; encoding: [0x20,0x28,0x9f,0x0c]
+
+; CHECK: st1.4s { v0 }, [x1], #16       ; encoding: [0x20,0x78,0x9f,0x4c]
+; CHECK: st1.4s { v0, v1 }, [x1], #32   ; encoding: [0x20,0xa8,0x9f,0x4c]
+; CHECK: st1.4s { v0, v1, v2 }, [x1], #48 ; encoding: [0x20,0x68,0x9f,0x4c]
+; CHECK: st1.4s { v0, v1, v2, v3 }, [x1], #64 ; encoding: [0x20,0x28,0x9f,0x4c]
+
+; CHECK: st1.1d { v0 }, [x1], #8       ; encoding: [0x20,0x7c,0x9f,0x0c]
+; CHECK: st1.1d { v0, v1 }, [x1], #16   ; encoding: [0x20,0xac,0x9f,0x0c]
+; CHECK: st1.1d { v0, v1, v2 }, [x1], #24 ; encoding: [0x20,0x6c,0x9f,0x0c]
+; CHECK: st1.1d { v0, v1, v2, v3 }, [x1], #32 ; encoding: [0x20,0x2c,0x9f,0x0c]
+
+; CHECK: st1.2d { v0 }, [x1], #16       ; encoding: [0x20,0x7c,0x9f,0x4c]
+; CHECK: st1.2d { v0, v1 }, [x1], #32   ; encoding: [0x20,0xac,0x9f,0x4c]
+; CHECK: st1.2d { v0, v1, v2 }, [x1], #48 ; encoding: [0x20,0x6c,0x9f,0x4c]
+; CHECK: st1.2d { v0, v1, v2, v3 }, [x1], #64 ; encoding: [0x20,0x2c,0x9f,0x4c]
+
+
+_ld2st2_multiple_post:
+  ld2.8b {v0, v1}, [x1], x15
+  ld2.16b {v0, v1}, [x1], x15
+  ld2.4h {v0, v1}, [x1], x15
+  ld2.8h {v0, v1}, [x1], x15
+  ld2.2s {v0, v1}, [x1], x15
+  ld2.4s {v0, v1}, [x1], x15
+  ld2.2d {v0, v1}, [x1], x15
+
+  st2.8b {v0, v1}, [x1], x15
+  st2.16b {v0, v1}, [x1], x15
+  st2.4h {v0, v1}, [x1], x15
+  st2.8h {v0, v1}, [x1], x15
+  st2.2s {v0, v1}, [x1], x15
+  st2.4s {v0, v1}, [x1], x15
+  st2.2d {v0, v1}, [x1], x15
+
+  ld2.8b {v0, v1}, [x1], #16
+  ld2.16b {v0, v1}, [x1], #32
+  ld2.4h {v0, v1}, [x1], #16
+  ld2.8h {v0, v1}, [x1], #32
+  ld2.2s {v0, v1}, [x1], #16
+  ld2.4s {v0, v1}, [x1], #32
+  ld2.2d {v0, v1}, [x1], #32
+
+  st2.8b {v0, v1}, [x1], #16
+  st2.16b {v0, v1}, [x1], #32
+  st2.4h {v0, v1}, [x1], #16
+  st2.8h {v0, v1}, [x1], #32
+  st2.2s {v0, v1}, [x1], #16
+  st2.4s {v0, v1}, [x1], #32
+  st2.2d {v0, v1}, [x1], #32
+
+
+; CHECK: ld2st2_multiple_post:
+; CHECK: ld2.8b { v0, v1 }, [x1], x15   ; encoding: [0x20,0x80,0xcf,0x0c]
+; CHECK: ld2.16b { v0, v1 }, [x1], x15   ; encoding: [0x20,0x80,0xcf,0x4c]
+; CHECK: ld2.4h { v0, v1 }, [x1], x15   ; encoding: [0x20,0x84,0xcf,0x0c]
+; CHECK: ld2.8h { v0, v1 }, [x1], x15   ; encoding: [0x20,0x84,0xcf,0x4c]
+; CHECK: ld2.2s { v0, v1 }, [x1], x15   ; encoding: [0x20,0x88,0xcf,0x0c]
+; CHECK: ld2.4s { v0, v1 }, [x1], x15   ; encoding: [0x20,0x88,0xcf,0x4c]
+; CHECK: ld2.2d { v0, v1 }, [x1], x15   ; encoding: [0x20,0x8c,0xcf,0x4c]
+
+; CHECK: st2.8b { v0, v1 }, [x1], x15   ; encoding: [0x20,0x80,0x8f,0x0c]
+; CHECK: st2.16b { v0, v1 }, [x1], x15   ; encoding: [0x20,0x80,0x8f,0x4c]
+; CHECK: st2.4h { v0, v1 }, [x1], x15   ; encoding: [0x20,0x84,0x8f,0x0c]
+; CHECK: st2.8h { v0, v1 }, [x1], x15   ; encoding: [0x20,0x84,0x8f,0x4c]
+; CHECK: st2.2s { v0, v1 }, [x1], x15   ; encoding: [0x20,0x88,0x8f,0x0c]
+; CHECK: st2.4s { v0, v1 }, [x1], x15   ; encoding: [0x20,0x88,0x8f,0x4c]
+; CHECK: st2.2d { v0, v1 }, [x1], x15   ; encoding: [0x20,0x8c,0x8f,0x4c]
+
+; CHECK: ld2.8b { v0, v1 }, [x1], #16   ; encoding: [0x20,0x80,0xdf,0x0c]
+; CHECK: ld2.16b { v0, v1 }, [x1], #32   ; encoding: [0x20,0x80,0xdf,0x4c]
+; CHECK: ld2.4h { v0, v1 }, [x1], #16   ; encoding: [0x20,0x84,0xdf,0x0c]
+; CHECK: ld2.8h { v0, v1 }, [x1], #32   ; encoding: [0x20,0x84,0xdf,0x4c]
+; CHECK: ld2.2s { v0, v1 }, [x1], #16   ; encoding: [0x20,0x88,0xdf,0x0c]
+; CHECK: ld2.4s { v0, v1 }, [x1], #32   ; encoding: [0x20,0x88,0xdf,0x4c]
+; CHECK: ld2.2d { v0, v1 }, [x1], #32   ; encoding: [0x20,0x8c,0xdf,0x4c]
+
+; CHECK: st2.8b { v0, v1 }, [x1], #16   ; encoding: [0x20,0x80,0x9f,0x0c]
+; CHECK: st2.16b { v0, v1 }, [x1], #32   ; encoding: [0x20,0x80,0x9f,0x4c]
+; CHECK: st2.4h { v0, v1 }, [x1], #16   ; encoding: [0x20,0x84,0x9f,0x0c]
+; CHECK: st2.8h { v0, v1 }, [x1], #32   ; encoding: [0x20,0x84,0x9f,0x4c]
+; CHECK: st2.2s { v0, v1 }, [x1], #16   ; encoding: [0x20,0x88,0x9f,0x0c]
+; CHECK: st2.4s { v0, v1 }, [x1], #32   ; encoding: [0x20,0x88,0x9f,0x4c]
+; CHECK: st2.2d { v0, v1 }, [x1], #32   ; encoding: [0x20,0x8c,0x9f,0x4c]
+
+
+_ld3st3_multiple_post:
+  ld3.8b {v0, v1, v2}, [x1], x15
+  ld3.16b {v0, v1, v2}, [x1], x15
+  ld3.4h {v0, v1, v2}, [x1], x15
+  ld3.8h {v0, v1, v2}, [x1], x15
+  ld3.2s {v0, v1, v2}, [x1], x15
+  ld3.4s {v0, v1, v2}, [x1], x15
+  ld3.2d {v0, v1, v2}, [x1], x15
+
+  st3.8b {v0, v1, v2}, [x1], x15
+  st3.16b {v0, v1, v2}, [x1], x15
+  st3.4h {v0, v1, v2}, [x1], x15
+  st3.8h {v0, v1, v2}, [x1], x15
+  st3.2s {v0, v1, v2}, [x1], x15
+  st3.4s {v0, v1, v2}, [x1], x15
+  st3.2d {v0, v1, v2}, [x1], x15
+
+  ld3.8b {v0, v1, v2}, [x1], #24
+  ld3.16b {v0, v1, v2}, [x1], #48
+  ld3.4h {v0, v1, v2}, [x1], #24
+  ld3.8h {v0, v1, v2}, [x1], #48
+  ld3.2s {v0, v1, v2}, [x1], #24
+  ld3.4s {v0, v1, v2}, [x1], #48
+  ld3.2d {v0, v1, v2}, [x1], #48
+
+  st3.8b {v0, v1, v2}, [x1], #24
+  st3.16b {v0, v1, v2}, [x1], #48
+  st3.4h {v0, v1, v2}, [x1], #24
+  st3.8h {v0, v1, v2}, [x1], #48
+  st3.2s {v0, v1, v2}, [x1], #24
+  st3.4s {v0, v1, v2}, [x1], #48
+  st3.2d {v0, v1, v2}, [x1], #48
+
+; CHECK: ld3st3_multiple_post:
+; CHECK: ld3.8b { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x40,0xcf,0x0c]
+; CHECK: ld3.16b { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x40,0xcf,0x4c]
+; CHECK: ld3.4h { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x44,0xcf,0x0c]
+; CHECK: ld3.8h { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x44,0xcf,0x4c]
+; CHECK: ld3.2s { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x48,0xcf,0x0c]
+; CHECK: ld3.4s { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x48,0xcf,0x4c]
+; CHECK: ld3.2d { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x4c,0xcf,0x4c]
+
+; CHECK: st3.8b { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x40,0x8f,0x0c]
+; CHECK: st3.16b { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x40,0x8f,0x4c]
+; CHECK: st3.4h { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x44,0x8f,0x0c]
+; CHECK: st3.8h { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x44,0x8f,0x4c]
+; CHECK: st3.2s { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x48,0x8f,0x0c]
+; CHECK: st3.4s { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x48,0x8f,0x4c]
+; CHECK: st3.2d { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x4c,0x8f,0x4c]
+
+; CHECK: ld3.8b { v0, v1, v2 }, [x1], #24 ; encoding: [0x20,0x40,0xdf,0x0c]
+; CHECK: ld3.16b { v0, v1, v2 }, [x1], #48 ; encoding: [0x20,0x40,0xdf,0x4c]
+; CHECK: ld3.4h { v0, v1, v2 }, [x1], #24 ; encoding: [0x20,0x44,0xdf,0x0c]
+; CHECK: ld3.8h { v0, v1, v2 }, [x1], #48 ; encoding: [0x20,0x44,0xdf,0x4c]
+; CHECK: ld3.2s { v0, v1, v2 }, [x1], #24 ; encoding: [0x20,0x48,0xdf,0x0c]
+; CHECK: ld3.4s { v0, v1, v2 }, [x1], #48 ; encoding: [0x20,0x48,0xdf,0x4c]
+; CHECK: ld3.2d { v0, v1, v2 }, [x1], #48 ; encoding: [0x20,0x4c,0xdf,0x4c]
+
+; CHECK: st3.8b { v0, v1, v2 }, [x1], #24 ; encoding: [0x20,0x40,0x9f,0x0c]
+; CHECK: st3.16b { v0, v1, v2 }, [x1], #48 ; encoding: [0x20,0x40,0x9f,0x4c]
+; CHECK: st3.4h { v0, v1, v2 }, [x1], #24 ; encoding: [0x20,0x44,0x9f,0x0c]
+; CHECK: st3.8h { v0, v1, v2 }, [x1], #48 ; encoding: [0x20,0x44,0x9f,0x4c]
+; CHECK: st3.2s { v0, v1, v2 }, [x1], #24 ; encoding: [0x20,0x48,0x9f,0x0c]
+; CHECK: st3.4s { v0, v1, v2 }, [x1], #48 ; encoding: [0x20,0x48,0x9f,0x4c]
+; CHECK: st3.2d { v0, v1, v2 }, [x1], #48 ; encoding: [0x20,0x4c,0x9f,0x4c]
+
+_ld4st4_multiple_post:
+  ld4.8b {v0, v1, v2, v3}, [x1], x15
+  ld4.16b {v0, v1, v2, v3}, [x1], x15
+  ld4.4h {v0, v1, v2, v3}, [x1], x15
+  ld4.8h {v0, v1, v2, v3}, [x1], x15
+  ld4.2s {v0, v1, v2, v3}, [x1], x15
+  ld4.4s {v0, v1, v2, v3}, [x1], x15
+  ld4.2d {v0, v1, v2, v3}, [x1], x15
+
+  st4.8b {v0, v1, v2, v3}, [x1], x15
+  st4.16b {v0, v1, v2, v3}, [x1], x15
+  st4.4h {v0, v1, v2, v3}, [x1], x15
+  st4.8h {v0, v1, v2, v3}, [x1], x15
+  st4.2s {v0, v1, v2, v3}, [x1], x15
+  st4.4s {v0, v1, v2, v3}, [x1], x15
+  st4.2d {v0, v1, v2, v3}, [x1], x15
+
+  ld4.8b {v0, v1, v2, v3}, [x1], #32
+  ld4.16b {v0, v1, v2, v3}, [x1], #64
+  ld4.4h {v0, v1, v2, v3}, [x1], #32
+  ld4.8h {v0, v1, v2, v3}, [x1], #64
+  ld4.2s {v0, v1, v2, v3}, [x1], #32
+  ld4.4s {v0, v1, v2, v3}, [x1], #64
+  ld4.2d {v0, v1, v2, v3}, [x1], #64
+
+  st4.8b {v0, v1, v2, v3}, [x1], #32
+  st4.16b {v0, v1, v2, v3}, [x1], #64
+  st4.4h {v0, v1, v2, v3}, [x1], #32
+  st4.8h {v0, v1, v2, v3}, [x1], #64
+  st4.2s {v0, v1, v2, v3}, [x1], #32
+  st4.4s {v0, v1, v2, v3}, [x1], #64
+  st4.2d {v0, v1, v2, v3}, [x1], #64
+
+
+; CHECK: ld4st4_multiple_post:
+; CHECK: ld4.8b { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x00,0xcf,0x0c]
+; CHECK: ld4.16b { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x00,0xcf,0x4c]
+; CHECK: ld4.4h { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x04,0xcf,0x0c]
+; CHECK: ld4.8h { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x04,0xcf,0x4c]
+; CHECK: ld4.2s { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x08,0xcf,0x0c]
+; CHECK: ld4.4s { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x08,0xcf,0x4c]
+; CHECK: ld4.2d { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x0c,0xcf,0x4c]
+
+; CHECK: st4.8b { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x00,0x8f,0x0c]
+; CHECK: st4.16b { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x00,0x8f,0x4c]
+; CHECK: st4.4h { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x04,0x8f,0x0c]
+; CHECK: st4.8h { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x04,0x8f,0x4c]
+; CHECK: st4.2s { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x08,0x8f,0x0c]
+; CHECK: st4.4s { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x08,0x8f,0x4c]
+; CHECK: st4.2d { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x0c,0x8f,0x4c]
+
+; CHECK: ld4.8b { v0, v1, v2, v3 }, [x1], #32 ; encoding: [0x20,0x00,0xdf,0x0c]
+; CHECK: ld4.16b { v0, v1, v2, v3 }, [x1], #64 ; encoding: [0x20,0x00,0xdf,0x4c]
+; CHECK: ld4.4h { v0, v1, v2, v3 }, [x1], #32 ; encoding: [0x20,0x04,0xdf,0x0c]
+; CHECK: ld4.8h { v0, v1, v2, v3 }, [x1], #64 ; encoding: [0x20,0x04,0xdf,0x4c]
+; CHECK: ld4.2s { v0, v1, v2, v3 }, [x1], #32 ; encoding: [0x20,0x08,0xdf,0x0c]
+; CHECK: ld4.4s { v0, v1, v2, v3 }, [x1], #64 ; encoding: [0x20,0x08,0xdf,0x4c]
+; CHECK: ld4.2d { v0, v1, v2, v3 }, [x1], #64 ; encoding: [0x20,0x0c,0xdf,0x4c]
+
+; CHECK: st4.8b { v0, v1, v2, v3 }, [x1], #32 ; encoding: [0x20,0x00,0x9f,0x0c]
+; CHECK: st4.16b { v0, v1, v2, v3 }, [x1], #64 ; encoding: [0x20,0x00,0x9f,0x4c]
+; CHECK: st4.4h { v0, v1, v2, v3 }, [x1], #32 ; encoding: [0x20,0x04,0x9f,0x0c]
+; CHECK: st4.8h { v0, v1, v2, v3 }, [x1], #64 ; encoding: [0x20,0x04,0x9f,0x4c]
+; CHECK: st4.2s { v0, v1, v2, v3 }, [x1], #32 ; encoding: [0x20,0x08,0x9f,0x0c]
+; CHECK: st4.4s { v0, v1, v2, v3 }, [x1], #64 ; encoding: [0x20,0x08,0x9f,0x4c]
+; CHECK: st4.2d { v0, v1, v2, v3 }, [x1], #64 ; encoding: [0x20,0x0c,0x9f,0x4c]
+
+ld1r:
+  ld1r.8b {v4}, [x2]
+  ld1r.8b {v4}, [x2], x3
+  ld1r.16b {v4}, [x2]
+  ld1r.16b {v4}, [x2], x3
+  ld1r.4h {v4}, [x2]
+  ld1r.4h {v4}, [x2], x3
+  ld1r.8h {v4}, [x2]
+  ld1r.8h {v4}, [x2], x3
+  ld1r.2s {v4}, [x2]
+  ld1r.2s {v4}, [x2], x3
+  ld1r.4s {v4}, [x2]
+  ld1r.4s {v4}, [x2], x3
+  ld1r.1d {v4}, [x2]
+  ld1r.1d {v4}, [x2], x3
+  ld1r.2d {v4}, [x2]
+  ld1r.2d {v4}, [x2], x3
+
+  ld1r.8b {v4}, [x2], #1
+  ld1r.16b {v4}, [x2], #1
+  ld1r.4h {v4}, [x2], #2
+  ld1r.8h {v4}, [x2], #2
+  ld1r.2s {v4}, [x2], #4
+  ld1r.4s {v4}, [x2], #4
+  ld1r.1d {v4}, [x2], #8
+  ld1r.2d {v4}, [x2], #8
+
+; CHECK: ld1r:
+; CHECK: ld1r.8b { v4 }, [x2]            ; encoding: [0x44,0xc0,0x40,0x0d]
+; CHECK: ld1r.8b { v4 }, [x2], x3        ; encoding: [0x44,0xc0,0xc3,0x0d]
+; CHECK: ld1r.16b { v4 }, [x2]    ; encoding: [0x44,0xc0,0x40,0x4d]
+; CHECK: ld1r.16b { v4 }, [x2], x3 ; encoding: [0x44,0xc0,0xc3,0x4d]
+; CHECK: ld1r.4h { v4 }, [x2]            ; encoding: [0x44,0xc4,0x40,0x0d]
+; CHECK: ld1r.4h { v4 }, [x2], x3        ; encoding: [0x44,0xc4,0xc3,0x0d]
+; CHECK: ld1r.8h { v4 }, [x2]            ; encoding: [0x44,0xc4,0x40,0x4d]
+; CHECK: ld1r.8h { v4 }, [x2], x3        ; encoding: [0x44,0xc4,0xc3,0x4d]
+; CHECK: ld1r.2s { v4 }, [x2]            ; encoding: [0x44,0xc8,0x40,0x0d]
+; CHECK: ld1r.2s { v4 }, [x2], x3        ; encoding: [0x44,0xc8,0xc3,0x0d]
+; CHECK: ld1r.4s { v4 }, [x2]            ; encoding: [0x44,0xc8,0x40,0x4d]
+; CHECK: ld1r.4s { v4 }, [x2], x3        ; encoding: [0x44,0xc8,0xc3,0x4d]
+; CHECK: ld1r.1d { v4 }, [x2]            ; encoding: [0x44,0xcc,0x40,0x0d]
+; CHECK: ld1r.1d { v4 }, [x2], x3        ; encoding: [0x44,0xcc,0xc3,0x0d]
+; CHECK: ld1r.2d { v4 }, [x2]            ; encoding: [0x44,0xcc,0x40,0x4d]
+; CHECK: ld1r.2d { v4 }, [x2], x3        ; encoding: [0x44,0xcc,0xc3,0x4d]
+
+; CHECK: ld1r.8b { v4 }, [x2], #1        ; encoding: [0x44,0xc0,0xdf,0x0d]
+; CHECK: ld1r.16b { v4 }, [x2], #1 ; encoding: [0x44,0xc0,0xdf,0x4d]
+; CHECK: ld1r.4h { v4 }, [x2], #2        ; encoding: [0x44,0xc4,0xdf,0x0d]
+; CHECK: ld1r.8h { v4 }, [x2], #2        ; encoding: [0x44,0xc4,0xdf,0x4d]
+; CHECK: ld1r.2s { v4 }, [x2], #4        ; encoding: [0x44,0xc8,0xdf,0x0d]
+; CHECK: ld1r.4s { v4 }, [x2], #4        ; encoding: [0x44,0xc8,0xdf,0x4d]
+; CHECK: ld1r.1d { v4 }, [x2], #8        ; encoding: [0x44,0xcc,0xdf,0x0d]
+; CHECK: ld1r.2d { v4 }, [x2], #8        ; encoding: [0x44,0xcc,0xdf,0x4d]
+
+ld2r:
+  ld2r.8b {v4, v5}, [x2]
+  ld2r.8b {v4, v5}, [x2], x3
+  ld2r.16b {v4, v5}, [x2]
+  ld2r.16b {v4, v5}, [x2], x3
+  ld2r.4h {v4, v5}, [x2]
+  ld2r.4h {v4, v5}, [x2], x3
+  ld2r.8h {v4, v5}, [x2]
+  ld2r.8h {v4, v5}, [x2], x3
+  ld2r.2s {v4, v5}, [x2]
+  ld2r.2s {v4, v5}, [x2], x3
+  ld2r.4s {v4, v5}, [x2]
+  ld2r.4s {v4, v5}, [x2], x3
+  ld2r.1d {v4, v5}, [x2]
+  ld2r.1d {v4, v5}, [x2], x3
+  ld2r.2d {v4, v5}, [x2]
+  ld2r.2d {v4, v5}, [x2], x3
+
+  ld2r.8b {v4, v5}, [x2], #2
+  ld2r.16b {v4, v5}, [x2], #2
+  ld2r.4h {v4, v5}, [x2], #4
+  ld2r.8h {v4, v5}, [x2], #4
+  ld2r.2s {v4, v5}, [x2], #8
+  ld2r.4s {v4, v5}, [x2], #8
+  ld2r.1d {v4, v5}, [x2], #16
+  ld2r.2d {v4, v5}, [x2], #16
+
+; CHECK: ld2r:
+; CHECK: ld2r.8b { v4, v5 }, [x2]        ; encoding: [0x44,0xc0,0x60,0x0d]
+; CHECK: ld2r.8b { v4, v5 }, [x2], x3    ; encoding: [0x44,0xc0,0xe3,0x0d]
+; CHECK: ld2r.16b { v4, v5 }, [x2] ; encoding: [0x44,0xc0,0x60,0x4d]
+; CHECK: ld2r.16b { v4, v5 }, [x2], x3 ; encoding: [0x44,0xc0,0xe3,0x4d]
+; CHECK: ld2r.4h { v4, v5 }, [x2]        ; encoding: [0x44,0xc4,0x60,0x0d]
+; CHECK: ld2r.4h { v4, v5 }, [x2], x3    ; encoding: [0x44,0xc4,0xe3,0x0d]
+; CHECK: ld2r.8h { v4, v5 }, [x2]        ; encoding: [0x44,0xc4,0x60,0x4d]
+; CHECK: ld2r.8h { v4, v5 }, [x2], x3    ; encoding: [0x44,0xc4,0xe3,0x4d]
+; CHECK: ld2r.2s { v4, v5 }, [x2]        ; encoding: [0x44,0xc8,0x60,0x0d]
+; CHECK: ld2r.2s { v4, v5 }, [x2], x3    ; encoding: [0x44,0xc8,0xe3,0x0d]
+; CHECK: ld2r.4s { v4, v5 }, [x2]        ; encoding: [0x44,0xc8,0x60,0x4d]
+; CHECK: ld2r.4s { v4, v5 }, [x2], x3    ; encoding: [0x44,0xc8,0xe3,0x4d]
+; CHECK: ld2r.1d { v4, v5 }, [x2]        ; encoding: [0x44,0xcc,0x60,0x0d]
+; CHECK: ld2r.1d { v4, v5 }, [x2], x3    ; encoding: [0x44,0xcc,0xe3,0x0d]
+; CHECK: ld2r.2d { v4, v5 }, [x2]        ; encoding: [0x44,0xcc,0x60,0x4d]
+; CHECK: ld2r.2d { v4, v5 }, [x2], x3    ; encoding: [0x44,0xcc,0xe3,0x4d]
+
+; CHECK: ld2r.8b { v4, v5 }, [x2], #2    ; encoding: [0x44,0xc0,0xff,0x0d]
+; CHECK: ld2r.16b { v4, v5 }, [x2], #2 ; encoding: [0x44,0xc0,0xff,0x4d]
+; CHECK: ld2r.4h { v4, v5 }, [x2], #4    ; encoding: [0x44,0xc4,0xff,0x0d]
+; CHECK: ld2r.8h { v4, v5 }, [x2], #4    ; encoding: [0x44,0xc4,0xff,0x4d]
+; CHECK: ld2r.2s { v4, v5 }, [x2], #8    ; encoding: [0x44,0xc8,0xff,0x0d]
+; CHECK: ld2r.4s { v4, v5 }, [x2], #8    ; encoding: [0x44,0xc8,0xff,0x4d]
+; CHECK: ld2r.1d { v4, v5 }, [x2], #16    ; encoding: [0x44,0xcc,0xff,0x0d]
+; CHECK: ld2r.2d { v4, v5 }, [x2], #16    ; encoding: [0x44,0xcc,0xff,0x4d]
+
+ld3r:
+  ld3r.8b {v4, v5, v6}, [x2]
+  ld3r.8b {v4, v5, v6}, [x2], x3
+  ld3r.16b {v4, v5, v6}, [x2]
+  ld3r.16b {v4, v5, v6}, [x2], x3
+  ld3r.4h {v4, v5, v6}, [x2]
+  ld3r.4h {v4, v5, v6}, [x2], x3
+  ld3r.8h {v4, v5, v6}, [x2]
+  ld3r.8h {v4, v5, v6}, [x2], x3
+  ld3r.2s {v4, v5, v6}, [x2]
+  ld3r.2s {v4, v5, v6}, [x2], x3
+  ld3r.4s {v4, v5, v6}, [x2]
+  ld3r.4s {v4, v5, v6}, [x2], x3
+  ld3r.1d {v4, v5, v6}, [x2]
+  ld3r.1d {v4, v5, v6}, [x2], x3
+  ld3r.2d {v4, v5, v6}, [x2]
+  ld3r.2d {v4, v5, v6}, [x2], x3
+
+  ld3r.8b {v4, v5, v6}, [x2], #3
+  ld3r.16b {v4, v5, v6}, [x2], #3
+  ld3r.4h {v4, v5, v6}, [x2], #6
+  ld3r.8h {v4, v5, v6}, [x2], #6
+  ld3r.2s {v4, v5, v6}, [x2], #12
+  ld3r.4s {v4, v5, v6}, [x2], #12
+  ld3r.1d {v4, v5, v6}, [x2], #24
+  ld3r.2d {v4, v5, v6}, [x2], #24
+
+; CHECK: ld3r:
+; CHECK: ld3r.8b { v4, v5, v6 }, [x2]    ; encoding: [0x44,0xe0,0x40,0x0d]
+; CHECK: ld3r.8b { v4, v5, v6 }, [x2], x3 ; encoding: [0x44,0xe0,0xc3,0x0d]
+; CHECK: ld3r.16b { v4, v5, v6 }, [x2] ; encoding: [0x44,0xe0,0x40,0x4d]
+; CHECK: ld3r.16b { v4, v5, v6 }, [x2], x3 ; encoding: [0x44,0xe0,0xc3,0x4d]
+; CHECK: ld3r.4h { v4, v5, v6 }, [x2]    ; encoding: [0x44,0xe4,0x40,0x0d]
+; CHECK: ld3r.4h { v4, v5, v6 }, [x2], x3 ; encoding: [0x44,0xe4,0xc3,0x0d]
+; CHECK: ld3r.8h { v4, v5, v6 }, [x2]    ; encoding: [0x44,0xe4,0x40,0x4d]
+; CHECK: ld3r.8h { v4, v5, v6 }, [x2], x3 ; encoding: [0x44,0xe4,0xc3,0x4d]
+; CHECK: ld3r.2s { v4, v5, v6 }, [x2]    ; encoding: [0x44,0xe8,0x40,0x0d]
+; CHECK: ld3r.2s { v4, v5, v6 }, [x2], x3 ; encoding: [0x44,0xe8,0xc3,0x0d]
+; CHECK: ld3r.4s { v4, v5, v6 }, [x2]    ; encoding: [0x44,0xe8,0x40,0x4d]
+; CHECK: ld3r.4s { v4, v5, v6 }, [x2], x3 ; encoding: [0x44,0xe8,0xc3,0x4d]
+; CHECK: ld3r.1d { v4, v5, v6 }, [x2]    ; encoding: [0x44,0xec,0x40,0x0d]
+; CHECK: ld3r.1d { v4, v5, v6 }, [x2], x3 ; encoding: [0x44,0xec,0xc3,0x0d]
+; CHECK: ld3r.2d { v4, v5, v6 }, [x2]    ; encoding: [0x44,0xec,0x40,0x4d]
+; CHECK: ld3r.2d { v4, v5, v6 }, [x2], x3 ; encoding: [0x44,0xec,0xc3,0x4d]
+
+; CHECK: ld3r.8b { v4, v5, v6 }, [x2], #3 ; encoding: [0x44,0xe0,0xdf,0x0d]
+; CHECK: ld3r.16b { v4, v5, v6 }, [x2], #3 ; encoding: [0x44,0xe0,0xdf,0x4d]
+; CHECK: ld3r.4h { v4, v5, v6 }, [x2], #6 ; encoding: [0x44,0xe4,0xdf,0x0d]
+; CHECK: ld3r.8h { v4, v5, v6 }, [x2], #6 ; encoding: [0x44,0xe4,0xdf,0x4d]
+; CHECK: ld3r.2s { v4, v5, v6 }, [x2], #12 ; encoding: [0x44,0xe8,0xdf,0x0d]
+; CHECK: ld3r.4s { v4, v5, v6 }, [x2], #12 ; encoding: [0x44,0xe8,0xdf,0x4d]
+; CHECK: ld3r.1d { v4, v5, v6 }, [x2], #24 ; encoding: [0x44,0xec,0xdf,0x0d]
+; CHECK: ld3r.2d { v4, v5, v6 }, [x2], #24 ; encoding: [0x44,0xec,0xdf,0x4d]
+
+ld4r:
+  ld4r.8b {v4, v5, v6, v7}, [x2]
+  ld4r.8b {v4, v5, v6, v7}, [x2], x3
+  ld4r.16b {v4, v5, v6, v7}, [x2]
+  ld4r.16b {v4, v5, v6, v7}, [x2], x3
+  ld4r.4h {v4, v5, v6, v7}, [x2]
+  ld4r.4h {v4, v5, v6, v7}, [x2], x3
+  ld4r.8h {v4, v5, v6, v7}, [x2]
+  ld4r.8h {v4, v5, v6, v7}, [x2], x3
+  ld4r.2s {v4, v5, v6, v7}, [x2]
+  ld4r.2s {v4, v5, v6, v7}, [x2], x3
+  ld4r.4s {v4, v5, v6, v7}, [x2]
+  ld4r.4s {v4, v5, v6, v7}, [x2], x3
+  ld4r.1d {v4, v5, v6, v7}, [x2]
+  ld4r.1d {v4, v5, v6, v7}, [x2], x3
+  ld4r.2d {v4, v5, v6, v7}, [x2]
+  ld4r.2d {v4, v5, v6, v7}, [x2], x3
+
+  ld4r.8b {v4, v5, v6, v7}, [x2], #4
+  ld4r.16b {v5, v6, v7, v8}, [x2], #4
+  ld4r.4h {v6, v7, v8, v9}, [x2], #8
+  ld4r.8h {v1, v2, v3, v4}, [x2], #8
+  ld4r.2s {v2, v3, v4, v5}, [x2], #16
+  ld4r.4s {v3, v4, v5, v6}, [x2], #16
+  ld4r.1d {v0, v1, v2, v3}, [x2], #32
+  ld4r.2d {v4, v5, v6, v7}, [x2], #32
+
+; CHECK: ld4r:
+; CHECK: ld4r.8b { v4, v5, v6, v7 }, [x2] ; encoding: [0x44,0xe0,0x60,0x0d]
+; CHECK: ld4r.8b { v4, v5, v6, v7 }, [x2], x3 ; encoding: [0x44,0xe0,0xe3,0x0d]
+; CHECK: ld4r.16b { v4, v5, v6, v7 }, [x2] ; encoding: [0x44,0xe0,0x60,0x4d]
+; CHECK: ld4r.16b { v4, v5, v6, v7 }, [x2], x3 ; encoding: [0x44,0xe0,0xe3,0x4d]
+; CHECK: ld4r.4h { v4, v5, v6, v7 }, [x2] ; encoding: [0x44,0xe4,0x60,0x0d]
+; CHECK: ld4r.4h { v4, v5, v6, v7 }, [x2], x3 ; encoding: [0x44,0xe4,0xe3,0x0d]
+; CHECK: ld4r.8h { v4, v5, v6, v7 }, [x2] ; encoding: [0x44,0xe4,0x60,0x4d]
+; CHECK: ld4r.8h { v4, v5, v6, v7 }, [x2], x3 ; encoding: [0x44,0xe4,0xe3,0x4d]
+; CHECK: ld4r.2s { v4, v5, v6, v7 }, [x2] ; encoding: [0x44,0xe8,0x60,0x0d]
+; CHECK: ld4r.2s { v4, v5, v6, v7 }, [x2], x3 ; encoding: [0x44,0xe8,0xe3,0x0d]
+; CHECK: ld4r.4s { v4, v5, v6, v7 }, [x2] ; encoding: [0x44,0xe8,0x60,0x4d]
+; CHECK: ld4r.4s { v4, v5, v6, v7 }, [x2], x3 ; encoding: [0x44,0xe8,0xe3,0x4d]
+; CHECK: ld4r.1d { v4, v5, v6, v7 }, [x2] ; encoding: [0x44,0xec,0x60,0x0d]
+; CHECK: ld4r.1d { v4, v5, v6, v7 }, [x2], x3 ; encoding: [0x44,0xec,0xe3,0x0d]
+; CHECK: ld4r.2d { v4, v5, v6, v7 }, [x2] ; encoding: [0x44,0xec,0x60,0x4d]
+; CHECK: ld4r.2d { v4, v5, v6, v7 }, [x2], x3 ; encoding: [0x44,0xec,0xe3,0x4d]
+
+; CHECK: ld4r.8b { v4, v5, v6, v7 }, [x2], #4 ; encoding: [0x44,0xe0,0xff,0x0d]
+; CHECK: ld4r.16b { v5, v6, v7, v8 }, [x2], #4 ; encoding: [0x45,0xe0,0xff,0x4d]
+; CHECK: ld4r.4h { v6, v7, v8, v9 }, [x2], #8 ; encoding: [0x46,0xe4,0xff,0x0d]
+; CHECK: ld4r.8h { v1, v2, v3, v4 }, [x2], #8 ; encoding: [0x41,0xe4,0xff,0x4d]
+; CHECK: ld4r.2s { v2, v3, v4, v5 }, [x2], #16 ; encoding: [0x42,0xe8,0xff,0x0d]
+; CHECK: ld4r.4s { v3, v4, v5, v6 }, [x2], #16 ; encoding: [0x43,0xe8,0xff,0x4d]
+; CHECK: ld4r.1d { v0, v1, v2, v3 }, [x2], #32 ; encoding: [0x40,0xec,0xff,0x0d]
+; CHECK: ld4r.2d { v4, v5, v6, v7 }, [x2], #32 ; encoding: [0x44,0xec,0xff,0x4d]
+
+
+_ld1:
+  ld1.b {v4}[13], [x3]
+  ld1.h {v4}[2], [x3]
+  ld1.s {v4}[2], [x3]
+  ld1.d {v4}[1], [x3]
+  ld1.b {v4}[13], [x3], x5
+  ld1.h {v4}[2], [x3], x5
+  ld1.s {v4}[2], [x3], x5
+  ld1.d {v4}[1], [x3], x5
+  ld1.b {v4}[13], [x3], #1
+  ld1.h {v4}[2], [x3], #2
+  ld1.s {v4}[2], [x3], #4
+  ld1.d {v4}[1], [x3], #8
+
+; CHECK: _ld1:
+; CHECK: ld1.b { v4 }[13], [x3]        ; encoding: [0x64,0x14,0x40,0x4d]
+; CHECK: ld1.h { v4 }[2], [x3]         ; encoding: [0x64,0x50,0x40,0x0d]
+; CHECK: ld1.s { v4 }[2], [x3]         ; encoding: [0x64,0x80,0x40,0x4d]
+; CHECK: ld1.d { v4 }[1], [x3]         ; encoding: [0x64,0x84,0x40,0x4d]
+; CHECK: ld1.b { v4 }[13], [x3], x5    ; encoding: [0x64,0x14,0xc5,0x4d]
+; CHECK: ld1.h { v4 }[2], [x3], x5     ; encoding: [0x64,0x50,0xc5,0x0d]
+; CHECK: ld1.s { v4 }[2], [x3], x5     ; encoding: [0x64,0x80,0xc5,0x4d]
+; CHECK: ld1.d { v4 }[1], [x3], x5     ; encoding: [0x64,0x84,0xc5,0x4d]
+; CHECK: ld1.b { v4 }[13], [x3], #1   ; encoding: [0x64,0x14,0xdf,0x4d]
+; CHECK: ld1.h { v4 }[2], [x3], #2    ; encoding: [0x64,0x50,0xdf,0x0d]
+; CHECK: ld1.s { v4 }[2], [x3], #4    ; encoding: [0x64,0x80,0xdf,0x4d]
+; CHECK: ld1.d { v4 }[1], [x3], #8    ; encoding: [0x64,0x84,0xdf,0x4d]
+
+_ld2:
+  ld2.b {v4, v5}[13], [x3]
+  ld2.h {v4, v5}[2], [x3]
+  ld2.s {v4, v5}[2], [x3]
+  ld2.d {v4, v5}[1], [x3]
+  ld2.b {v4, v5}[13], [x3], x5
+  ld2.h {v4, v5}[2], [x3], x5
+  ld2.s {v4, v5}[2], [x3], x5
+  ld2.d {v4, v5}[1], [x3], x5
+  ld2.b {v4, v5}[13], [x3], #2
+  ld2.h {v4, v5}[2], [x3], #4
+  ld2.s {v4, v5}[2], [x3], #8
+  ld2.d {v4, v5}[1], [x3], #16
+
+
+; CHECK: _ld2:
+; CHECK: ld2.b { v4, v5 }[13], [x3]    ; encoding: [0x64,0x14,0x60,0x4d]
+; CHECK: ld2.h { v4, v5 }[2], [x3]     ; encoding: [0x64,0x50,0x60,0x0d]
+; CHECK: ld2.s { v4, v5 }[2], [x3]     ; encoding: [0x64,0x80,0x60,0x4d]
+; CHECK: ld2.d { v4, v5 }[1], [x3]     ; encoding: [0x64,0x84,0x60,0x4d]
+; CHECK: ld2.b { v4, v5 }[13], [x3], x5 ; encoding: [0x64,0x14,0xe5,0x4d]
+; CHECK: ld2.h { v4, v5 }[2], [x3], x5 ; encoding: [0x64,0x50,0xe5,0x0d]
+; CHECK: ld2.s { v4, v5 }[2], [x3], x5 ; encoding: [0x64,0x80,0xe5,0x4d]
+; CHECK: ld2.d { v4, v5 }[1], [x3], x5 ; encoding: [0x64,0x84,0xe5,0x4d]
+; CHECK: ld2.b { v4, v5 }[13], [x3], #2 ; encoding: [0x64,0x14,0xff,0x4d]
+; CHECK: ld2.h { v4, v5 }[2], [x3], #4 ; encoding: [0x64,0x50,0xff,0x0d]
+; CHECK: ld2.s { v4, v5 }[2], [x3], #8 ; encoding: [0x64,0x80,0xff,0x4d]
+; CHECK: ld2.d { v4, v5 }[1], [x3], #16 ; encoding: [0x64,0x84,0xff,0x4d]
+
+
+_ld3:
+  ld3.b {v4, v5, v6}[13], [x3]
+  ld3.h {v4, v5, v6}[2], [x3]
+  ld3.s {v4, v5, v6}[2], [x3]
+  ld3.d {v4, v5, v6}[1], [x3]
+  ld3.b {v4, v5, v6}[13], [x3], x5
+  ld3.h {v4, v5, v6}[2], [x3], x5
+  ld3.s {v4, v5, v6}[2], [x3], x5
+  ld3.d {v4, v5, v6}[1], [x3], x5
+  ld3.b {v4, v5, v6}[13], [x3], #3
+  ld3.h {v4, v5, v6}[2], [x3], #6
+  ld3.s {v4, v5, v6}[2], [x3], #12
+  ld3.d {v4, v5, v6}[1], [x3], #24
+
+
+; CHECK: _ld3:
+; CHECK: ld3.b { v4, v5, v6 }[13], [x3] ; encoding: [0x64,0x34,0x40,0x4d]
+; CHECK: ld3.h { v4, v5, v6 }[2], [x3] ; encoding: [0x64,0x70,0x40,0x0d]
+; CHECK: ld3.s { v4, v5, v6 }[2], [x3] ; encoding: [0x64,0xa0,0x40,0x4d]
+; CHECK: ld3.d { v4, v5, v6 }[1], [x3] ; encoding: [0x64,0xa4,0x40,0x4d]
+; CHECK: ld3.b { v4, v5, v6 }[13], [x3], x5 ; encoding: [0x64,0x34,0xc5,0x4d]
+; CHECK: ld3.h { v4, v5, v6 }[2], [x3], x5 ; encoding: [0x64,0x70,0xc5,0x0d]
+; CHECK: ld3.s { v4, v5, v6 }[2], [x3], x5 ; encoding: [0x64,0xa0,0xc5,0x4d]
+; CHECK: ld3.d { v4, v5, v6 }[1], [x3], x5 ; encoding: [0x64,0xa4,0xc5,0x4d]
+; CHECK: ld3.b { v4, v5, v6 }[13], [x3], #3 ; encoding: [0x64,0x34,0xdf,0x4d]
+; CHECK: ld3.h { v4, v5, v6 }[2], [x3], #6 ; encoding: [0x64,0x70,0xdf,0x0d]
+; CHECK: ld3.s { v4, v5, v6 }[2], [x3], #12 ; encoding: [0x64,0xa0,0xdf,0x4d]
+; CHECK: ld3.d { v4, v5, v6 }[1], [x3], #24 ; encoding: [0x64,0xa4,0xdf,0x4d]
+
+
+_ld4:
+  ld4.b {v4, v5, v6, v7}[13], [x3]
+  ld4.h {v4, v5, v6, v7}[2], [x3]
+  ld4.s {v4, v5, v6, v7}[2], [x3]
+  ld4.d {v4, v5, v6, v7}[1], [x3]
+  ld4.b {v4, v5, v6, v7}[13], [x3], x5
+  ld4.h {v4, v5, v6, v7}[2], [x3], x5
+  ld4.s {v4, v5, v6, v7}[2], [x3], x5
+  ld4.d {v4, v5, v6, v7}[1], [x3], x5
+  ld4.b {v4, v5, v6, v7}[13], [x3], #4
+  ld4.h {v4, v5, v6, v7}[2], [x3], #8
+  ld4.s {v4, v5, v6, v7}[2], [x3], #16
+  ld4.d {v4, v5, v6, v7}[1], [x3], #32
+
+; CHECK: _ld4:
+; CHECK: ld4.b { v4, v5, v6, v7 }[13], [x3] ; encoding: [0x64,0x34,0x60,0x4d]
+; CHECK: ld4.h { v4, v5, v6, v7 }[2], [x3] ; encoding: [0x64,0x70,0x60,0x0d]
+; CHECK: ld4.s { v4, v5, v6, v7 }[2], [x3] ; encoding: [0x64,0xa0,0x60,0x4d]
+; CHECK: ld4.d { v4, v5, v6, v7 }[1], [x3] ; encoding: [0x64,0xa4,0x60,0x4d]
+; CHECK: ld4.b { v4, v5, v6, v7 }[13], [x3], x5 ; encoding: [0x64,0x34,0xe5,0x4d]
+; CHECK: ld4.h { v4, v5, v6, v7 }[2], [x3], x5 ; encoding: [0x64,0x70,0xe5,0x0d]
+; CHECK: ld4.s { v4, v5, v6, v7 }[2], [x3], x5 ; encoding: [0x64,0xa0,0xe5,0x4d]
+; CHECK: ld4.d { v4, v5, v6, v7 }[1], [x3], x5 ; encoding: [0x64,0xa4,0xe5,0x4d]
+; CHECK: ld4.b { v4, v5, v6, v7 }[13], [x3], #4 ; encoding: [0x64,0x34,0xff,0x4d]
+; CHECK: ld4.h { v4, v5, v6, v7 }[2], [x3], #8 ; encoding: [0x64,0x70,0xff,0x0d]
+; CHECK: ld4.s { v4, v5, v6, v7 }[2], [x3], #16 ; encoding: [0x64,0xa0,0xff,0x4d]
+; CHECK: ld4.d { v4, v5, v6, v7 }[1], [x3], #32 ; encoding: [0x64,0xa4,0xff,0x4d]
+
+_st1:
+  st1.b {v4}[13], [x3]
+  st1.h {v4}[2], [x3]
+  st1.s {v4}[2], [x3]
+  st1.d {v4}[1], [x3]
+  st1.b {v4}[13], [x3], x5
+  st1.h {v4}[2], [x3], x5
+  st1.s {v4}[2], [x3], x5
+  st1.d {v4}[1], [x3], x5
+  st1.b {v4}[13], [x3], #1
+  st1.h {v4}[2], [x3], #2
+  st1.s {v4}[2], [x3], #4
+  st1.d {v4}[1], [x3], #8
+
+; CHECK: _st1:
+; CHECK: st1.b { v4 }[13], [x3]        ; encoding: [0x64,0x14,0x00,0x4d]
+; CHECK: st1.h { v4 }[2], [x3]         ; encoding: [0x64,0x50,0x00,0x0d]
+; CHECK: st1.s { v4 }[2], [x3]         ; encoding: [0x64,0x80,0x00,0x4d]
+; CHECK: st1.d { v4 }[1], [x3]         ; encoding: [0x64,0x84,0x00,0x4d]
+; CHECK: st1.b { v4 }[13], [x3], x5    ; encoding: [0x64,0x14,0x85,0x4d]
+; CHECK: st1.h { v4 }[2], [x3], x5     ; encoding: [0x64,0x50,0x85,0x0d]
+; CHECK: st1.s { v4 }[2], [x3], x5     ; encoding: [0x64,0x80,0x85,0x4d]
+; CHECK: st1.d { v4 }[1], [x3], x5     ; encoding: [0x64,0x84,0x85,0x4d]
+; CHECK: st1.b { v4 }[13], [x3], #1   ; encoding: [0x64,0x14,0x9f,0x4d]
+; CHECK: st1.h { v4 }[2], [x3], #2    ; encoding: [0x64,0x50,0x9f,0x0d]
+; CHECK: st1.s { v4 }[2], [x3], #4    ; encoding: [0x64,0x80,0x9f,0x4d]
+; CHECK: st1.d { v4 }[1], [x3], #8    ; encoding: [0x64,0x84,0x9f,0x4d]
+
+_st2:
+  st2.b {v4, v5}[13], [x3]
+  st2.h {v4, v5}[2], [x3]
+  st2.s {v4, v5}[2], [x3]
+  st2.d {v4, v5}[1], [x3]
+  st2.b {v4, v5}[13], [x3], x5
+  st2.h {v4, v5}[2], [x3], x5
+  st2.s {v4, v5}[2], [x3], x5
+  st2.d {v4, v5}[1], [x3], x5
+  st2.b {v4, v5}[13], [x3], #2
+  st2.h {v4, v5}[2], [x3], #4
+  st2.s {v4, v5}[2], [x3], #8
+  st2.d {v4, v5}[1], [x3], #16
+
+; CHECK: _st2:
+; CHECK: st2.b { v4, v5 }[13], [x3]    ; encoding: [0x64,0x14,0x20,0x4d]
+; CHECK: st2.h { v4, v5 }[2], [x3]     ; encoding: [0x64,0x50,0x20,0x0d]
+; CHECK: st2.s { v4, v5 }[2], [x3]     ; encoding: [0x64,0x80,0x20,0x4d]
+; CHECK: st2.d { v4, v5 }[1], [x3]     ; encoding: [0x64,0x84,0x20,0x4d]
+; CHECK: st2.b { v4, v5 }[13], [x3], x5 ; encoding: [0x64,0x14,0xa5,0x4d]
+; CHECK: st2.h { v4, v5 }[2], [x3], x5 ; encoding: [0x64,0x50,0xa5,0x0d]
+; CHECK: st2.s { v4, v5 }[2], [x3], x5 ; encoding: [0x64,0x80,0xa5,0x4d]
+; CHECK: st2.d { v4, v5 }[1], [x3], x5 ; encoding: [0x64,0x84,0xa5,0x4d]
+; CHECK: st2.b { v4, v5 }[13], [x3], #2 ; encoding: [0x64,0x14,0xbf,0x4d]
+; CHECK: st2.h { v4, v5 }[2], [x3], #4 ; encoding: [0x64,0x50,0xbf,0x0d]
+; CHECK: st2.s { v4, v5 }[2], [x3], #8 ; encoding: [0x64,0x80,0xbf,0x4d]
+; CHECK: st2.d { v4, v5 }[1], [x3], #16 ; encoding: [0x64,0x84,0xbf,0x4d]
+
+
+_st3:
+  st3.b {v4, v5, v6}[13], [x3]
+  st3.h {v4, v5, v6}[2], [x3]
+  st3.s {v4, v5, v6}[2], [x3]
+  st3.d {v4, v5, v6}[1], [x3]
+  st3.b {v4, v5, v6}[13], [x3], x5
+  st3.h {v4, v5, v6}[2], [x3], x5
+  st3.s {v4, v5, v6}[2], [x3], x5
+  st3.d {v4, v5, v6}[1], [x3], x5
+  st3.b {v4, v5, v6}[13], [x3], #3
+  st3.h {v4, v5, v6}[2], [x3], #6
+  st3.s {v4, v5, v6}[2], [x3], #12
+  st3.d {v4, v5, v6}[1], [x3], #24
+
+; CHECK: _st3:
+; CHECK: st3.b { v4, v5, v6 }[13], [x3] ; encoding: [0x64,0x34,0x00,0x4d]
+; CHECK: st3.h { v4, v5, v6 }[2], [x3] ; encoding: [0x64,0x70,0x00,0x0d]
+; CHECK: st3.s { v4, v5, v6 }[2], [x3] ; encoding: [0x64,0xa0,0x00,0x4d]
+; CHECK: st3.d { v4, v5, v6 }[1], [x3] ; encoding: [0x64,0xa4,0x00,0x4d]
+; CHECK: st3.b { v4, v5, v6 }[13], [x3], x5 ; encoding: [0x64,0x34,0x85,0x4d]
+; CHECK: st3.h { v4, v5, v6 }[2], [x3], x5 ; encoding: [0x64,0x70,0x85,0x0d]
+; CHECK: st3.s { v4, v5, v6 }[2], [x3], x5 ; encoding: [0x64,0xa0,0x85,0x4d]
+; CHECK: st3.d { v4, v5, v6 }[1], [x3], x5 ; encoding: [0x64,0xa4,0x85,0x4d]
+; CHECK: st3.b { v4, v5, v6 }[13], [x3], #3 ; encoding: [0x64,0x34,0x9f,0x4d]
+; CHECK: st3.h { v4, v5, v6 }[2], [x3], #6 ; encoding: [0x64,0x70,0x9f,0x0d]
+; CHECK: st3.s { v4, v5, v6 }[2], [x3], #12 ; encoding: [0x64,0xa0,0x9f,0x4d]
+; CHECK: st3.d { v4, v5, v6 }[1], [x3], #24 ; encoding: [0x64,0xa4,0x9f,0x4d]
+
+_st4:
+  st4.b {v4, v5, v6, v7}[13], [x3]
+  st4.h {v4, v5, v6, v7}[2], [x3]
+  st4.s {v4, v5, v6, v7}[2], [x3]
+  st4.d {v4, v5, v6, v7}[1], [x3]
+  st4.b {v4, v5, v6, v7}[13], [x3], x5
+  st4.h {v4, v5, v6, v7}[2], [x3], x5
+  st4.s {v4, v5, v6, v7}[2], [x3], x5
+  st4.d {v4, v5, v6, v7}[1], [x3], x5
+  st4.b {v4, v5, v6, v7}[13], [x3], #4
+  st4.h {v4, v5, v6, v7}[2], [x3], #8
+  st4.s {v4, v5, v6, v7}[2], [x3], #16
+  st4.d {v4, v5, v6, v7}[1], [x3], #32
+
+; CHECK: _st4:
+; CHECK: st4.b { v4, v5, v6, v7 }[13], [x3] ; encoding: [0x64,0x34,0x20,0x4d]
+; CHECK: st4.h { v4, v5, v6, v7 }[2], [x3] ; encoding: [0x64,0x70,0x20,0x0d]
+; CHECK: st4.s { v4, v5, v6, v7 }[2], [x3] ; encoding: [0x64,0xa0,0x20,0x4d]
+; CHECK: st4.d { v4, v5, v6, v7 }[1], [x3] ; encoding: [0x64,0xa4,0x20,0x4d]
+; CHECK: st4.b { v4, v5, v6, v7 }[13], [x3], x5 ; encoding: [0x64,0x34,0xa5,0x4d]
+; CHECK: st4.h { v4, v5, v6, v7 }[2], [x3], x5 ; encoding: [0x64,0x70,0xa5,0x0d]
+; CHECK: st4.s { v4, v5, v6, v7 }[2], [x3], x5 ; encoding: [0x64,0xa0,0xa5,0x4d]
+; CHECK: st4.d { v4, v5, v6, v7 }[1], [x3], x5 ; encoding: [0x64,0xa4,0xa5,0x4d]
+; CHECK: st4.b { v4, v5, v6, v7 }[13], [x3], #4 ; encoding: [0x64,0x34,0xbf,0x4d]
+; CHECK: st4.h { v4, v5, v6, v7 }[2], [x3], #8 ; encoding: [0x64,0x70,0xbf,0x0d]
+; CHECK: st4.s { v4, v5, v6, v7 }[2], [x3], #16 ; encoding: [0x64,0xa0,0xbf,0x4d]
+; CHECK: st4.d { v4, v5, v6, v7 }[1], [x3], #32 ; encoding: [0x64,0xa4,0xbf,0x4d]
+
+
+;---------
+; ARM verbose syntax equivalents to the above.
+;---------
+verbose_syntax:
+
+  ld1 { v1.8b }, [x1]
+  ld1 { v2.8b, v3.8b }, [x1]
+  ld1 { v3.8b, v4.8b, v5.8b }, [x1]
+  ld1 { v4.8b, v5.8b, v6.8b, v7.8b }, [x1]
+
+  ld1 { v1.16b }, [x1]
+  ld1 { v2.16b, v3.16b }, [x1]
+  ld1 { v3.16b, v4.16b, v5.16b }, [x1]
+  ld1 { v4.16b, v5.16b, v6.16b, v7.16b }, [x1]
+
+  ld1 { v1.4h }, [x1]
+  ld1 { v2.4h, v3.4h }, [x1]
+  ld1 { v3.4h, v4.4h, v5.4h }, [x1]
+  ld1 { v7.4h, v8.4h, v9.4h, v10.4h }, [x1]
+
+  ld1 { v1.8h }, [x1]
+  ld1 { v2.8h, v3.8h }, [x1]
+  ld1 { v3.8h, v4.8h, v5.8h }, [x1]
+  ld1 { v7.8h, v8.8h, v9.8h, v10.8h }, [x1]
+
+  ld1 { v1.2s }, [x1]
+  ld1 { v2.2s, v3.2s }, [x1]
+  ld1 { v3.2s, v4.2s, v5.2s }, [x1]
+  ld1 { v7.2s, v8.2s, v9.2s, v10.2s }, [x1]
+
+  ld1 { v1.4s }, [x1]
+  ld1 { v2.4s, v3.4s }, [x1]
+  ld1 { v3.4s, v4.4s, v5.4s }, [x1]
+  ld1 { v7.4s, v8.4s, v9.4s, v10.4s }, [x1]
+
+  ld1 { v1.1d }, [x1]
+  ld1 { v2.1d, v3.1d }, [x1]
+  ld1 { v3.1d, v4.1d, v5.1d }, [x1]
+  ld1 { v7.1d, v8.1d, v9.1d, v10.1d }, [x1]
+
+  ld1 { v1.2d }, [x1]
+  ld1 { v2.2d, v3.2d }, [x1]
+  ld1 { v3.2d, v4.2d, v5.2d }, [x1]
+  ld1 { v7.2d, v8.2d, v9.2d, v10.2d }, [x1]
+
+  st1 { v1.8b }, [x1]
+  st1 { v2.8b, v3.8b }, [x1]
+  st1 { v3.8b, v4.8b, v5.8b }, [x1]
+  st1 { v4.8b, v5.8b, v6.8b, v7.8b }, [x1]
+
+  st1 { v1.16b }, [x1]
+  st1 { v2.16b, v3.16b }, [x1]
+  st1 { v3.16b, v4.16b, v5.16b }, [x1]
+  st1 { v4.16b, v5.16b, v6.16b, v7.16b }, [x1]
+
+  st1 { v1.4h }, [x1]
+  st1 { v2.4h, v3.4h }, [x1]
+  st1 { v3.4h, v4.4h, v5.4h }, [x1]
+  st1 { v7.4h, v8.4h, v9.4h, v10.4h }, [x1]
+
+  st1 { v1.8h }, [x1]
+  st1 { v2.8h, v3.8h }, [x1]
+  st1 { v3.8h, v4.8h, v5.8h }, [x1]
+  st1 { v7.8h, v8.8h, v9.8h, v10.8h }, [x1]
+
+  st1 { v1.2s }, [x1]
+  st1 { v2.2s, v3.2s }, [x1]
+  st1 { v3.2s, v4.2s, v5.2s }, [x1]
+  st1 { v7.2s, v8.2s, v9.2s, v10.2s }, [x1]
+
+  st1 { v1.4s }, [x1]
+  st1 { v2.4s, v3.4s }, [x1]
+  st1 { v3.4s, v4.4s, v5.4s }, [x1]
+  st1 { v7.4s, v8.4s, v9.4s, v10.4s }, [x1]
+
+  st1 { v1.1d }, [x1]
+  st1 { v2.1d, v3.1d }, [x1]
+  st1 { v3.1d, v4.1d, v5.1d }, [x1]
+  st1 { v7.1d, v8.1d, v9.1d, v10.1d }, [x1]
+
+  st1 { v1.2d }, [x1]
+  st1 { v2.2d, v3.2d }, [x1]
+  st1 { v3.2d, v4.2d, v5.2d }, [x1]
+  st1 { v7.2d, v8.2d, v9.2d, v10.2d }, [x1]
+
+  ld2 { v3.8b, v4.8b }, [x19]
+  ld2 { v3.16b, v4.16b }, [x19]
+  ld2 { v3.4h, v4.4h }, [x19]
+  ld2 { v3.8h, v4.8h }, [x19]
+  ld2 { v3.2s, v4.2s }, [x19]
+  ld2 { v3.4s, v4.4s }, [x19]
+  ld2 { v3.2d, v4.2d }, [x19]
+
+  st2 { v3.8b, v4.8b }, [x19]
+  st2 { v3.16b, v4.16b }, [x19]
+  st2 { v3.4h, v4.4h }, [x19]
+  st2 { v3.8h, v4.8h }, [x19]
+  st2 { v3.2s, v4.2s }, [x19]
+  st2 { v3.4s, v4.4s }, [x19]
+  st2 { v3.2d, v4.2d }, [x19]
+
+  ld3 { v2.8b, v3.8b, v4.8b }, [x19]
+  ld3 { v2.16b, v3.16b, v4.16b }, [x19]
+  ld3 { v2.4h, v3.4h, v4.4h }, [x19]
+  ld3 { v2.8h, v3.8h, v4.8h }, [x19]
+  ld3 { v2.2s, v3.2s, v4.2s }, [x19]
+  ld3 { v2.4s, v3.4s, v4.4s }, [x19]
+  ld3 { v2.2d, v3.2d, v4.2d }, [x19]
+
+  st3 { v2.8b, v3.8b, v4.8b }, [x19]
+  st3 { v2.16b, v3.16b, v4.16b }, [x19]
+  st3 { v2.4h, v3.4h, v4.4h }, [x19]
+  st3 { v2.8h, v3.8h, v4.8h }, [x19]
+  st3 { v2.2s, v3.2s, v4.2s }, [x19]
+  st3 { v2.4s, v3.4s, v4.4s }, [x19]
+  st3 { v2.2d, v3.2d, v4.2d }, [x19]
+
+  ld4 { v2.8b, v3.8b, v4.8b, v5.8b }, [x19]
+  ld4 { v2.16b, v3.16b, v4.16b, v5.16b }, [x19]
+  ld4 { v2.4h, v3.4h, v4.4h, v5.4h }, [x19]
+  ld4 { v2.8h, v3.8h, v4.8h, v5.8h }, [x19]
+  ld4 { v2.2s, v3.2s, v4.2s, v5.2s }, [x19]
+  ld4 { v2.4s, v3.4s, v4.4s, v5.4s }, [x19]
+  ld4 { v2.2d, v3.2d, v4.2d, v5.2d }, [x19]
+
+  st4 { v2.8b, v3.8b, v4.8b, v5.8b }, [x19]
+  st4 { v2.16b, v3.16b, v4.16b, v5.16b }, [x19]
+  st4 { v2.4h, v3.4h, v4.4h, v5.4h }, [x19]
+  st4 { v2.8h, v3.8h, v4.8h, v5.8h }, [x19]
+  st4 { v2.2s, v3.2s, v4.2s, v5.2s }, [x19]
+  st4 { v2.4s, v3.4s, v4.4s, v5.4s }, [x19]
+  st4 { v2.2d, v3.2d, v4.2d, v5.2d }, [x19]
+
+  ld1 { v1.8b }, [x1], x15
+  ld1 { v2.8b, v3.8b }, [x1], x15
+  ld1 { v3.8b, v4.8b, v5.8b }, [x1], x15
+  ld1 { v4.8b, v5.8b, v6.8b, v7.8b }, [x1], x15
+
+  ld1 { v1.16b }, [x1], x15
+  ld1 { v2.16b, v3.16b }, [x1], x15
+  ld1 { v3.16b, v4.16b, v5.16b }, [x1], x15
+  ld1 { v4.16b, v5.16b, v6.16b, v7.16b }, [x1], x15
+
+  ld1 { v1.4h }, [x1], x15
+  ld1 { v2.4h, v3.4h }, [x1], x15
+  ld1 { v3.4h, v4.4h, v5.4h }, [x1], x15
+  ld1 { v7.4h, v8.4h, v9.4h, v10.4h }, [x1], x15
+
+  ld1 { v1.8h }, [x1], x15
+  ld1 { v2.8h, v3.8h }, [x1], x15
+  ld1 { v3.8h, v4.8h, v5.8h }, [x1], x15
+  ld1 { v7.8h, v8.8h, v9.8h, v10.8h }, [x1], x15
+
+  ld1 { v1.2s }, [x1], x15
+  ld1 { v2.2s, v3.2s }, [x1], x15
+  ld1 { v3.2s, v4.2s, v5.2s }, [x1], x15
+  ld1 { v7.2s, v8.2s, v9.2s, v10.2s }, [x1], x15
+
+  ld1 { v1.4s }, [x1], x15
+  ld1 { v2.4s, v3.4s }, [x1], x15
+  ld1 { v3.4s, v4.4s, v5.4s }, [x1], x15
+  ld1 { v7.4s, v8.4s, v9.4s, v10.4s }, [x1], x15
+
+  ld1 { v1.1d }, [x1], x15
+  ld1 { v2.1d, v3.1d }, [x1], x15
+  ld1 { v3.1d, v4.1d, v5.1d }, [x1], x15
+  ld1 { v7.1d, v8.1d, v9.1d, v10.1d }, [x1], x15
+
+  ld1 { v1.2d }, [x1], x15
+  ld1 { v2.2d, v3.2d }, [x1], x15
+  ld1 { v3.2d, v4.2d, v5.2d }, [x1], x15
+  ld1 { v7.2d, v8.2d, v9.2d, v10.2d }, [x1], x15
+
+  st1 { v1.8b }, [x1], x15
+  st1 { v2.8b, v3.8b }, [x1], x15
+  st1 { v3.8b, v4.8b, v5.8b }, [x1], x15
+  st1 { v4.8b, v5.8b, v6.8b, v7.8b }, [x1], x15
+
+  st1 { v1.16b }, [x1], x15
+  st1 { v2.16b, v3.16b }, [x1], x15
+  st1 { v3.16b, v4.16b, v5.16b }, [x1], x15
+  st1 { v4.16b, v5.16b, v6.16b, v7.16b }, [x1], x15
+
+  st1 { v1.4h }, [x1], x15
+  st1 { v2.4h, v3.4h }, [x1], x15
+  st1 { v3.4h, v4.4h, v5.4h }, [x1], x15
+  st1 { v7.4h, v8.4h, v9.4h, v10.4h }, [x1], x15
+
+  st1 { v1.8h }, [x1], x15
+  st1 { v2.8h, v3.8h }, [x1], x15
+  st1 { v3.8h, v4.8h, v5.8h }, [x1], x15
+  st1 { v7.8h, v8.8h, v9.8h, v10.8h }, [x1], x15
+
+  st1 { v1.2s }, [x1], x15
+  st1 { v2.2s, v3.2s }, [x1], x15
+  st1 { v3.2s, v4.2s, v5.2s }, [x1], x15
+  st1 { v7.2s, v8.2s, v9.2s, v10.2s }, [x1], x15
+
+  st1 { v1.4s }, [x1], x15
+  st1 { v2.4s, v3.4s }, [x1], x15
+  st1 { v3.4s, v4.4s, v5.4s }, [x1], x15
+  st1 { v7.4s, v8.4s, v9.4s, v10.4s }, [x1], x15
+
+  st1 { v1.1d }, [x1], x15
+  st1 { v2.1d, v3.1d }, [x1], x15
+  st1 { v3.1d, v4.1d, v5.1d }, [x1], x15
+  st1 { v7.1d, v8.1d, v9.1d, v10.1d }, [x1], x15
+
+  st1 { v1.2d }, [x1], x15
+  st1 { v2.2d, v3.2d }, [x1], x15
+  st1 { v3.2d, v4.2d, v5.2d }, [x1], x15
+  st1 { v7.2d, v8.2d, v9.2d, v10.2d }, [x1], x15
+
+  ld1 { v1.8b }, [x1], #8
+  ld1 { v2.8b, v3.8b }, [x1], #16
+  ld1 { v3.8b, v4.8b, v5.8b }, [x1], #24
+  ld1 { v4.8b, v5.8b, v6.8b, v7.8b }, [x1], #32
+
+  ld1 { v1.16b }, [x1], #16
+  ld1 { v2.16b, v3.16b }, [x1], #32
+  ld1 { v3.16b, v4.16b, v5.16b }, [x1], #48
+  ld1 { v4.16b, v5.16b, v6.16b, v7.16b }, [x1], #64
+
+  ld1 { v1.4h }, [x1], #8
+  ld1 { v2.4h, v3.4h }, [x1], #16
+  ld1 { v3.4h, v4.4h, v5.4h }, [x1], #24
+  ld1 { v7.4h, v8.4h, v9.4h, v10.4h }, [x1], #32
+
+  ld1 { v1.8h }, [x1], #16
+  ld1 { v2.8h, v3.8h }, [x1], #32
+  ld1 { v3.8h, v4.8h, v5.8h }, [x1], #48
+  ld1 { v7.8h, v8.8h, v9.8h, v10.8h }, [x1], #64
+
+  ld1 { v1.2s }, [x1], #8
+  ld1 { v2.2s, v3.2s }, [x1], #16
+  ld1 { v3.2s, v4.2s, v5.2s }, [x1], #24
+  ld1 { v7.2s, v8.2s, v9.2s, v10.2s }, [x1], #32
+
+  ld1 { v1.4s }, [x1], #16
+  ld1 { v2.4s, v3.4s }, [x1], #32
+  ld1 { v3.4s, v4.4s, v5.4s }, [x1], #48
+  ld1 { v7.4s, v8.4s, v9.4s, v10.4s }, [x1], #64
+
+  ld1 { v1.1d }, [x1], #8
+  ld1 { v2.1d, v3.1d }, [x1], #16
+  ld1 { v3.1d, v4.1d, v5.1d }, [x1], #24
+  ld1 { v7.1d, v8.1d, v9.1d, v10.1d }, [x1], #32
+
+  ld1 { v1.2d }, [x1], #16
+  ld1 { v2.2d, v3.2d }, [x1], #32
+  ld1 { v3.2d, v4.2d, v5.2d }, [x1], #48
+  ld1 { v7.2d, v8.2d, v9.2d, v10.2d }, [x1], #64
+
+  st1 { v1.8b }, [x1], #8
+  st1 { v2.8b, v3.8b }, [x1], #16
+  st1 { v3.8b, v4.8b, v5.8b }, [x1], #24
+  st1 { v4.8b, v5.8b, v6.8b, v7.8b }, [x1], #32
+
+  st1 { v1.16b }, [x1], #16
+  st1 { v2.16b, v3.16b }, [x1], #32
+  st1 { v3.16b, v4.16b, v5.16b }, [x1], #48
+  st1 { v4.16b, v5.16b, v6.16b, v7.16b }, [x1], #64
+
+  st1 { v1.4h }, [x1], #8
+  st1 { v2.4h, v3.4h }, [x1], #16
+  st1 { v3.4h, v4.4h, v5.4h }, [x1], #24
+  st1 { v7.4h, v8.4h, v9.4h, v10.4h }, [x1], #32
+
+  st1 { v1.8h }, [x1], #16
+  st1 { v2.8h, v3.8h }, [x1], #32
+  st1 { v3.8h, v4.8h, v5.8h }, [x1], #48
+  st1 { v7.8h, v8.8h, v9.8h, v10.8h }, [x1], #64
+
+  st1 { v1.2s }, [x1], #8
+  st1 { v2.2s, v3.2s }, [x1], #16
+  st1 { v3.2s, v4.2s, v5.2s }, [x1], #24
+  st1 { v7.2s, v8.2s, v9.2s, v10.2s }, [x1], #32
+
+  st1 { v1.4s }, [x1], #16
+  st1 { v2.4s, v3.4s }, [x1], #32
+  st1 { v3.4s, v4.4s, v5.4s }, [x1], #48
+  st1 { v7.4s, v8.4s, v9.4s, v10.4s }, [x1], #64
+
+  st1 { v1.1d }, [x1], #8
+  st1 { v2.1d, v3.1d }, [x1], #16
+  st1 { v3.1d, v4.1d, v5.1d }, [x1], #24
+  st1 { v7.1d, v8.1d, v9.1d, v10.1d }, [x1], #32
+
+  st1 { v1.2d }, [x1], #16
+  st1 { v2.2d, v3.2d }, [x1], #32
+  st1 { v3.2d, v4.2d, v5.2d }, [x1], #48
+  st1 { v7.2d, v8.2d, v9.2d, v10.2d }, [x1], #64
+
+  ld2 { v2.8b, v3.8b }, [x1], x15
+  ld2 { v2.16b, v3.16b }, [x1], x15
+  ld2 { v2.4h, v3.4h }, [x1], x15
+  ld2 { v2.8h, v3.8h }, [x1], x15
+  ld2 { v2.2s, v3.2s }, [x1], x15
+  ld2 { v2.4s, v3.4s }, [x1], x15
+  ld2 { v2.2d, v3.2d }, [x1], x15
+
+  st2 { v2.8b, v3.8b }, [x1], x15
+  st2 { v2.16b, v3.16b }, [x1], x15
+  st2 { v2.4h, v3.4h }, [x1], x15
+  st2 { v2.8h, v3.8h }, [x1], x15
+  st2 { v2.2s, v3.2s }, [x1], x15
+  st2 { v2.4s, v3.4s }, [x1], x15
+  st2 { v2.2d, v3.2d }, [x1], x15
+
+  ld2 { v2.8b, v3.8b }, [x1], #16
+  ld2 { v2.16b, v3.16b }, [x1], #32
+  ld2 { v2.4h, v3.4h }, [x1], #16
+  ld2 { v2.8h, v3.8h }, [x1], #32
+  ld2 { v2.2s, v3.2s }, [x1], #16
+  ld2 { v2.4s, v3.4s }, [x1], #32
+  ld2 { v2.2d, v3.2d }, [x1], #32
+
+  st2 { v2.8b, v3.8b }, [x1], #16
+  st2 { v2.16b, v3.16b }, [x1], #32
+  st2 { v2.4h, v3.4h }, [x1], #16
+  st2 { v2.8h, v3.8h }, [x1], #32
+  st2 { v2.2s, v3.2s }, [x1], #16
+  st2 { v2.4s, v3.4s }, [x1], #32
+  st2 { v2.2d, v3.2d }, [x1], #32
+
+  ld3 { v3.8b, v4.8b, v5.8b }, [x1], x15
+  ld3 { v3.16b, v4.16b, v5.16b }, [x1], x15
+  ld3 { v3.4h, v4.4h, v5.4h }, [x1], x15
+  ld3 { v3.8h, v4.8h, v5.8h }, [x1], x15
+  ld3 { v3.2s, v4.2s, v5.2s }, [x1], x15
+  ld3 { v3.4s, v4.4s, v5.4s }, [x1], x15
+  ld3 { v3.2d, v4.2d, v5.2d }, [x1], x15
+
+  st3 { v3.8b, v4.8b, v5.8b }, [x1], x15
+  st3 { v3.16b, v4.16b, v5.16b }, [x1], x15
+  st3 { v3.4h, v4.4h, v5.4h }, [x1], x15
+  st3 { v3.8h, v4.8h, v5.8h }, [x1], x15
+  st3 { v3.2s, v4.2s, v5.2s }, [x1], x15
+  st3 { v3.4s, v4.4s, v5.4s }, [x1], x15
+  st3 { v3.2d, v4.2d, v5.2d }, [x1], x15
+  ld3 { v3.8b, v4.8b, v5.8b }, [x1], #24
+
+  ld3 { v3.16b, v4.16b, v5.16b }, [x1], #48
+  ld3 { v3.4h, v4.4h, v5.4h }, [x1], #24
+  ld3 { v3.8h, v4.8h, v5.8h }, [x1], #48
+  ld3 { v3.2s, v4.2s, v5.2s }, [x1], #24
+  ld3 { v3.4s, v4.4s, v5.4s }, [x1], #48
+  ld3 { v3.2d, v4.2d, v5.2d }, [x1], #48
+
+  st3 { v3.8b, v4.8b, v5.8b }, [x1], #24
+  st3 { v3.16b, v4.16b, v5.16b }, [x1], #48
+  st3 { v3.4h, v4.4h, v5.4h }, [x1], #24
+  st3 { v3.8h, v4.8h, v5.8h }, [x1], #48
+  st3 { v3.2s, v4.2s, v5.2s }, [x1], #24
+  st3 { v3.4s, v4.4s, v5.4s }, [x1], #48
+  st3 { v3.2d, v4.2d, v5.2d }, [x1], #48
+
+  ld4 { v4.8b, v5.8b, v6.8b, v7.8b }, [x1], x15
+  ld4 { v4.16b, v5.16b, v6.16b, v7.16b }, [x1], x15
+  ld4 { v7.4h, v8.4h, v9.4h, v10.4h }, [x1], x15
+  ld4 { v7.8h, v8.8h, v9.8h, v10.8h }, [x1], x15
+  ld4 { v7.2s, v8.2s, v9.2s, v10.2s }, [x1], x15
+  ld4 { v7.4s, v8.4s, v9.4s, v10.4s }, [x1], x15
+  ld4 { v7.2d, v8.2d, v9.2d, v10.2d }, [x1], x15
+
+  st4 { v4.8b, v5.8b, v6.8b, v7.8b }, [x1], x15
+  st4 { v4.16b, v5.16b, v6.16b, v7.16b }, [x1], x15
+  st4 { v7.4h, v8.4h, v9.4h, v10.4h }, [x1], x15
+  st4 { v7.8h, v8.8h, v9.8h, v10.8h }, [x1], x15
+  st4 { v7.2s, v8.2s, v9.2s, v10.2s }, [x1], x15
+  st4 { v7.4s, v8.4s, v9.4s, v10.4s }, [x1], x15
+  st4 { v7.2d, v8.2d, v9.2d, v10.2d }, [x1], x15
+
+  ld4 { v4.8b, v5.8b, v6.8b, v7.8b }, [x1], #32
+  ld4 { v4.16b, v5.16b, v6.16b, v7.16b }, [x1], #64
+  ld4 { v7.4h, v8.4h, v9.4h, v10.4h }, [x1], #32
+  ld4 { v7.8h, v8.8h, v9.8h, v10.8h }, [x1], #64
+  ld4 { v7.2s, v8.2s, v9.2s, v10.2s }, [x1], #32
+  ld4 { v7.4s, v8.4s, v9.4s, v10.4s }, [x1], #64
+  ld4 { v7.2d, v8.2d, v9.2d, v10.2d }, [x1], #64
+
+  st4 { v4.8b, v5.8b, v6.8b, v7.8b }, [x1], #32
+  st4 { v4.16b, v5.16b, v6.16b, v7.16b }, [x1], #64
+  st4 { v7.4h, v8.4h, v9.4h, v10.4h }, [x1], #32
+  st4 { v7.8h, v8.8h, v9.8h, v10.8h }, [x1], #64
+  st4 { v7.2s, v8.2s, v9.2s, v10.2s }, [x1], #32
+  st4 { v7.4s, v8.4s, v9.4s, v10.4s }, [x1], #64
+  st4 { v7.2d, v8.2d, v9.2d, v10.2d }, [x1], #64
+
+
+  ld1r { v12.8b }, [x2]
+  ld1r { v12.8b }, [x2], x3
+  ld1r { v12.16b }, [x2]
+  ld1r { v12.16b }, [x2], x3
+  ld1r { v12.4h }, [x2]
+  ld1r { v12.4h }, [x2], x3
+  ld1r { v12.8h }, [x2]
+  ld1r { v12.8h }, [x2], x3
+  ld1r { v12.2s }, [x2]
+  ld1r { v12.2s }, [x2], x3
+  ld1r { v12.4s }, [x2]
+  ld1r { v12.4s }, [x2], x3
+  ld1r { v12.1d }, [x2]
+  ld1r { v12.1d }, [x2], x3
+  ld1r { v12.2d }, [x2]
+  ld1r { v12.2d }, [x2], x3
+
+  ld1r { v12.8b }, [x2], #1
+  ld1r { v12.16b }, [x2], #1
+  ld1r { v12.4h }, [x2], #2
+  ld1r { v12.8h }, [x2], #2
+  ld1r { v12.2s }, [x2], #4
+  ld1r { v12.4s }, [x2], #4
+  ld1r { v12.1d }, [x2], #8
+  ld1r { v12.2d }, [x2], #8
+  ld2r { v3.8b, v4.8b }, [x2]
+  ld2r { v3.8b, v4.8b }, [x2], x3
+  ld2r { v3.16b, v4.16b }, [x2]
+  ld2r { v3.16b, v4.16b }, [x2], x3
+  ld2r { v3.4h, v4.4h }, [x2]
+  ld2r { v3.4h, v4.4h }, [x2], x3
+  ld2r { v3.8h, v4.8h }, [x2]
+  ld2r { v3.8h, v4.8h }, [x2], x3
+  ld2r { v3.2s, v4.2s }, [x2]
+  ld2r { v3.2s, v4.2s }, [x2], x3
+  ld2r { v3.4s, v4.4s }, [x2]
+  ld2r { v3.4s, v4.4s }, [x2], x3
+  ld2r { v3.1d, v4.1d }, [x2]
+  ld2r { v3.1d, v4.1d }, [x2], x3
+  ld2r { v3.2d, v4.2d }, [x2]
+  ld2r { v3.2d, v4.2d }, [x2], x3
+
+  ld2r { v3.8b, v4.8b }, [x2], #2
+  ld2r { v3.16b, v4.16b }, [x2], #2
+  ld2r { v3.4h, v4.4h }, [x2], #4
+  ld2r { v3.8h, v4.8h }, [x2], #4
+  ld2r { v3.2s, v4.2s }, [x2], #8
+  ld2r { v3.4s, v4.4s }, [x2], #8
+  ld2r { v3.1d, v4.1d }, [x2], #16
+  ld2r { v3.2d, v4.2d }, [x2], #16
+
+  ld3r { v2.8b, v3.8b, v4.8b }, [x2]
+  ld3r { v2.8b, v3.8b, v4.8b }, [x2], x3
+  ld3r { v2.16b, v3.16b, v4.16b }, [x2]
+  ld3r { v2.16b, v3.16b, v4.16b }, [x2], x3
+  ld3r { v2.4h, v3.4h, v4.4h }, [x2]
+  ld3r { v2.4h, v3.4h, v4.4h }, [x2], x3
+  ld3r { v2.8h, v3.8h, v4.8h }, [x2]
+  ld3r { v2.8h, v3.8h, v4.8h }, [x2], x3
+  ld3r { v2.2s, v3.2s, v4.2s }, [x2]
+  ld3r { v2.2s, v3.2s, v4.2s }, [x2], x3
+  ld3r { v2.4s, v3.4s, v4.4s }, [x2]
+  ld3r { v2.4s, v3.4s, v4.4s }, [x2], x3
+  ld3r { v2.1d, v3.1d, v4.1d }, [x2]
+  ld3r { v2.1d, v3.1d, v4.1d }, [x2], x3
+  ld3r { v2.2d, v3.2d, v4.2d }, [x2]
+  ld3r { v2.2d, v3.2d, v4.2d }, [x2], x3
+
+  ld3r { v2.8b, v3.8b, v4.8b }, [x2], #3
+  ld3r { v2.16b, v3.16b, v4.16b }, [x2], #3
+  ld3r { v2.4h, v3.4h, v4.4h }, [x2], #6
+  ld3r { v2.8h, v3.8h, v4.8h }, [x2], #6
+  ld3r { v2.2s, v3.2s, v4.2s }, [x2], #12
+  ld3r { v2.4s, v3.4s, v4.4s }, [x2], #12
+  ld3r { v2.1d, v3.1d, v4.1d }, [x2], #24
+  ld3r { v2.2d, v3.2d, v4.2d }, [x2], #24
+
+  ld4r { v2.8b, v3.8b, v4.8b, v5.8b }, [x2]
+  ld4r { v2.8b, v3.8b, v4.8b, v5.8b }, [x2], x3
+  ld4r { v2.16b, v3.16b, v4.16b, v5.16b }, [x2]
+  ld4r { v2.16b, v3.16b, v4.16b, v5.16b }, [x2], x3
+  ld4r { v2.4h, v3.4h, v4.4h, v5.4h }, [x2]
+  ld4r { v2.4h, v3.4h, v4.4h, v5.4h }, [x2], x3
+  ld4r { v2.8h, v3.8h, v4.8h, v5.8h }, [x2]
+  ld4r { v2.8h, v3.8h, v4.8h, v5.8h }, [x2], x3
+  ld4r { v2.2s, v3.2s, v4.2s, v5.2s }, [x2]
+  ld4r { v2.2s, v3.2s, v4.2s, v5.2s }, [x2], x3
+  ld4r { v2.4s, v3.4s, v4.4s, v5.4s }, [x2]
+  ld4r { v2.4s, v3.4s, v4.4s, v5.4s }, [x2], x3
+  ld4r { v2.1d, v3.1d, v4.1d, v5.1d }, [x2]
+  ld4r { v2.1d, v3.1d, v4.1d, v5.1d }, [x2], x3
+  ld4r { v2.2d, v3.2d, v4.2d, v5.2d }, [x2]
+  ld4r { v2.2d, v3.2d, v4.2d, v5.2d }, [x2], x3
+
+  ld4r { v2.8b, v3.8b, v4.8b, v5.8b }, [x2], #4
+  ld4r { v2.16b, v3.16b, v4.16b, v5.16b }, [x2], #4
+  ld4r { v2.4h, v3.4h, v4.4h, v5.4h }, [x2], #8
+  ld4r { v2.8h, v3.8h, v4.8h, v5.8h }, [x2], #8
+  ld4r { v2.2s, v3.2s, v4.2s, v5.2s }, [x2], #16
+  ld4r { v2.4s, v3.4s, v4.4s, v5.4s }, [x2], #16
+  ld4r { v2.1d, v3.1d, v4.1d, v5.1d }, [x2], #32
+  ld4r { v2.2d, v3.2d, v4.2d, v5.2d }, [x2], #32
+
+  ld1 { v6.b }[13], [x3]
+  ld1 { v6.h }[2], [x3]
+  ld1 { v6.s }[2], [x3]
+  ld1 { v6.d }[1], [x3]
+  ld1 { v6.b }[13], [x3], x5
+  ld1 { v6.h }[2], [x3], x5
+  ld1 { v6.s }[2], [x3], x5
+  ld1 { v6.d }[1], [x3], x5
+  ld1 { v6.b }[13], [x3], #1
+  ld1 { v6.h }[2], [x3], #2
+  ld1 { v6.s }[2], [x3], #4
+  ld1 { v6.d }[1], [x3], #8
+
+  ld2 { v5.b, v6.b }[13], [x3]
+  ld2 { v5.h, v6.h }[2], [x3]
+  ld2 { v5.s, v6.s }[2], [x3]
+  ld2 { v5.d, v6.d }[1], [x3]
+  ld2 { v5.b, v6.b }[13], [x3], x5
+  ld2 { v5.h, v6.h }[2], [x3], x5
+  ld2 { v5.s, v6.s }[2], [x3], x5
+  ld2 { v5.d, v6.d }[1], [x3], x5
+  ld2 { v5.b, v6.b }[13], [x3], #2
+  ld2 { v5.h, v6.h }[2], [x3], #4
+  ld2 { v5.s, v6.s }[2], [x3], #8
+  ld2 { v5.d, v6.d }[1], [x3], #16
+
+  ld3 { v7.b, v8.b, v9.b }[13], [x3]
+  ld3 { v7.h, v8.h, v9.h }[2], [x3]
+  ld3 { v7.s, v8.s, v9.s }[2], [x3]
+  ld3 { v7.d, v8.d, v9.d }[1], [x3]
+  ld3 { v7.b, v8.b, v9.b }[13], [x3], x5
+  ld3 { v7.h, v8.h, v9.h }[2], [x3], x5
+  ld3 { v7.s, v8.s, v9.s }[2], [x3], x5
+  ld3 { v7.d, v8.d, v9.d }[1], [x3], x5
+  ld3 { v7.b, v8.b, v9.b }[13], [x3], #3
+  ld3 { v7.h, v8.h, v9.h }[2], [x3], #6
+  ld3 { v7.s, v8.s, v9.s }[2], [x3], #12
+  ld3 { v7.d, v8.d, v9.d }[1], [x3], #24
+
+  ld4 { v7.b, v8.b, v9.b, v10.b }[13], [x3]
+  ld4 { v7.h, v8.h, v9.h, v10.h }[2], [x3]
+  ld4 { v7.s, v8.s, v9.s, v10.s }[2], [x3]
+  ld4 { v7.d, v8.d, v9.d, v10.d }[1], [x3]
+  ld4 { v7.b, v8.b, v9.b, v10.b }[13], [x3], x5
+  ld4 { v7.h, v8.h, v9.h, v10.h }[2], [x3], x5
+  ld4 { v7.s, v8.s, v9.s, v10.s }[2], [x3], x5
+  ld4 { v7.d, v8.d, v9.d, v10.d }[1], [x3], x5
+  ld4 { v7.b, v8.b, v9.b, v10.b }[13], [x3], #4
+  ld4 { v7.h, v8.h, v9.h, v10.h }[2], [x3], #8
+  ld4 { v7.s, v8.s, v9.s, v10.s }[2], [x3], #16
+  ld4 { v7.d, v8.d, v9.d, v10.d }[1], [x3], #32
+
+  st1 { v6.b }[13], [x3]
+  st1 { v6.h }[2], [x3]
+  st1 { v6.s }[2], [x3]
+  st1 { v6.d }[1], [x3]
+  st1 { v6.b }[13], [x3], x5
+  st1 { v6.h }[2], [x3], x5
+  st1 { v6.s }[2], [x3], x5
+  st1 { v6.d }[1], [x3], x5
+  st1 { v6.b }[13], [x3], #1
+  st1 { v6.h }[2], [x3], #2
+  st1 { v6.s }[2], [x3], #4
+  st1 { v6.d }[1], [x3], #8
+
+
+  st2 { v5.b, v6.b }[13], [x3]
+  st2 { v5.h, v6.h }[2], [x3]
+  st2 { v5.s, v6.s }[2], [x3]
+  st2 { v5.d, v6.d }[1], [x3]
+  st2 { v5.b, v6.b }[13], [x3], x5
+  st2 { v5.h, v6.h }[2], [x3], x5
+  st2 { v5.s, v6.s }[2], [x3], x5
+  st2 { v5.d, v6.d }[1], [x3], x5
+  st2 { v5.b, v6.b }[13], [x3], #2
+  st2 { v5.h, v6.h }[2], [x3], #4
+  st2 { v5.s, v6.s }[2], [x3], #8
+  st2 { v5.d, v6.d }[1], [x3], #16
+
+  st3 { v7.b, v8.b, v9.b }[13], [x3]
+  st3 { v7.h, v8.h, v9.h }[2], [x3]
+  st3 { v7.s, v8.s, v9.s }[2], [x3]
+  st3 { v7.d, v8.d, v9.d }[1], [x3]
+  st3 { v7.b, v8.b, v9.b }[13], [x3], x5
+  st3 { v7.h, v8.h, v9.h }[2], [x3], x5
+  st3 { v7.s, v8.s, v9.s }[2], [x3], x5
+  st3 { v7.d, v8.d, v9.d }[1], [x3], x5
+  st3 { v7.b, v8.b, v9.b }[13], [x3], #3
+  st3 { v7.h, v8.h, v9.h }[2], [x3], #6
+  st3 { v7.s, v8.s, v9.s }[2], [x3], #12
+  st3 { v7.d, v8.d, v9.d }[1], [x3], #24
+
+  st4 { v7.b, v8.b, v9.b, v10.b }[13], [x3]
+  st4 { v7.h, v8.h, v9.h, v10.h }[2], [x3]
+  st4 { v7.s, v8.s, v9.s, v10.s }[2], [x3]
+  st4 { v7.d, v8.d, v9.d, v10.d }[1], [x3]
+  st4 { v7.b, v8.b, v9.b, v10.b }[13], [x3], x5
+  st4 { v7.h, v8.h, v9.h, v10.h }[2], [x3], x5
+  st4 { v7.s, v8.s, v9.s, v10.s }[2], [x3], x5
+  st4 { v7.d, v8.d, v9.d, v10.d }[1], [x3], x5
+  st4 { v7.b, v8.b, v9.b, v10.b }[13], [x3], #4
+  st4 { v7.h, v8.h, v9.h, v10.h }[2], [x3], #8
+  st4 { v7.s, v8.s, v9.s, v10.s }[2], [x3], #16
+  st4 { v7.d, v8.d, v9.d, v10.d }[1], [x3], #32
+
+; CHECK: ld1.8b	{ v1 }, [x1]            ; encoding: [0x21,0x70,0x40,0x0c]
+; CHECK: ld1.8b	{ v2, v3 }, [x1]        ; encoding: [0x22,0xa0,0x40,0x0c]
+; CHECK: ld1.8b	{ v3, v4, v5 }, [x1]    ; encoding: [0x23,0x60,0x40,0x0c]
+; CHECK: ld1.8b	{ v4, v5, v6, v7 }, [x1] ; encoding: [0x24,0x20,0x40,0x0c]
+; CHECK: ld1.16b	{ v1 }, [x1]            ; encoding: [0x21,0x70,0x40,0x4c]
+; CHECK: ld1.16b	{ v2, v3 }, [x1]        ; encoding: [0x22,0xa0,0x40,0x4c]
+; CHECK: ld1.16b	{ v3, v4, v5 }, [x1]    ; encoding: [0x23,0x60,0x40,0x4c]
+; CHECK: ld1.16b	{ v4, v5, v6, v7 }, [x1] ; encoding: [0x24,0x20,0x40,0x4c]
+; CHECK: ld1.4h	{ v1 }, [x1]            ; encoding: [0x21,0x74,0x40,0x0c]
+; CHECK: ld1.4h	{ v2, v3 }, [x1]        ; encoding: [0x22,0xa4,0x40,0x0c]
+; CHECK: ld1.4h	{ v3, v4, v5 }, [x1]    ; encoding: [0x23,0x64,0x40,0x0c]
+; CHECK: ld1.4h	{ v7, v8, v9, v10 }, [x1] ; encoding: [0x27,0x24,0x40,0x0c]
+; CHECK: ld1.8h	{ v1 }, [x1]            ; encoding: [0x21,0x74,0x40,0x4c]
+; CHECK: ld1.8h	{ v2, v3 }, [x1]        ; encoding: [0x22,0xa4,0x40,0x4c]
+; CHECK: ld1.8h	{ v3, v4, v5 }, [x1]    ; encoding: [0x23,0x64,0x40,0x4c]
+; CHECK: ld1.8h	{ v7, v8, v9, v10 }, [x1] ; encoding: [0x27,0x24,0x40,0x4c]
+; CHECK: ld1.2s	{ v1 }, [x1]            ; encoding: [0x21,0x78,0x40,0x0c]
+; CHECK: ld1.2s	{ v2, v3 }, [x1]        ; encoding: [0x22,0xa8,0x40,0x0c]
+; CHECK: ld1.2s	{ v3, v4, v5 }, [x1]    ; encoding: [0x23,0x68,0x40,0x0c]
+; CHECK: ld1.2s	{ v7, v8, v9, v10 }, [x1] ; encoding: [0x27,0x28,0x40,0x0c]
+; CHECK: ld1.4s	{ v1 }, [x1]            ; encoding: [0x21,0x78,0x40,0x4c]
+; CHECK: ld1.4s	{ v2, v3 }, [x1]        ; encoding: [0x22,0xa8,0x40,0x4c]
+; CHECK: ld1.4s	{ v3, v4, v5 }, [x1]    ; encoding: [0x23,0x68,0x40,0x4c]
+; CHECK: ld1.4s	{ v7, v8, v9, v10 }, [x1] ; encoding: [0x27,0x28,0x40,0x4c]
+; CHECK: ld1.1d	{ v1 }, [x1]            ; encoding: [0x21,0x7c,0x40,0x0c]
+; CHECK: ld1.1d	{ v2, v3 }, [x1]        ; encoding: [0x22,0xac,0x40,0x0c]
+; CHECK: ld1.1d	{ v3, v4, v5 }, [x1]    ; encoding: [0x23,0x6c,0x40,0x0c]
+; CHECK: ld1.1d	{ v7, v8, v9, v10 }, [x1] ; encoding: [0x27,0x2c,0x40,0x0c]
+; CHECK: ld1.2d	{ v1 }, [x1]            ; encoding: [0x21,0x7c,0x40,0x4c]
+; CHECK: ld1.2d	{ v2, v3 }, [x1]        ; encoding: [0x22,0xac,0x40,0x4c]
+; CHECK: ld1.2d	{ v3, v4, v5 }, [x1]    ; encoding: [0x23,0x6c,0x40,0x4c]
+; CHECK: ld1.2d	{ v7, v8, v9, v10 }, [x1] ; encoding: [0x27,0x2c,0x40,0x4c]
+; CHECK: st1.8b	{ v1 }, [x1]            ; encoding: [0x21,0x70,0x00,0x0c]
+; CHECK: st1.8b	{ v2, v3 }, [x1]        ; encoding: [0x22,0xa0,0x00,0x0c]
+; CHECK: st1.8b	{ v3, v4, v5 }, [x1]    ; encoding: [0x23,0x60,0x00,0x0c]
+; CHECK: st1.8b	{ v4, v5, v6, v7 }, [x1] ; encoding: [0x24,0x20,0x00,0x0c]
+; CHECK: st1.16b	{ v1 }, [x1]            ; encoding: [0x21,0x70,0x00,0x4c]
+; CHECK: st1.16b	{ v2, v3 }, [x1]        ; encoding: [0x22,0xa0,0x00,0x4c]
+; CHECK: st1.16b	{ v3, v4, v5 }, [x1]    ; encoding: [0x23,0x60,0x00,0x4c]
+; CHECK: st1.16b	{ v4, v5, v6, v7 }, [x1] ; encoding: [0x24,0x20,0x00,0x4c]
+; CHECK: st1.4h	{ v1 }, [x1]            ; encoding: [0x21,0x74,0x00,0x0c]
+; CHECK: st1.4h	{ v2, v3 }, [x1]        ; encoding: [0x22,0xa4,0x00,0x0c]
+; CHECK: st1.4h	{ v3, v4, v5 }, [x1]    ; encoding: [0x23,0x64,0x00,0x0c]
+; CHECK: st1.4h	{ v7, v8, v9, v10 }, [x1] ; encoding: [0x27,0x24,0x00,0x0c]
+; CHECK: st1.8h	{ v1 }, [x1]            ; encoding: [0x21,0x74,0x00,0x4c]
+; CHECK: st1.8h	{ v2, v3 }, [x1]        ; encoding: [0x22,0xa4,0x00,0x4c]
+; CHECK: st1.8h	{ v3, v4, v5 }, [x1]    ; encoding: [0x23,0x64,0x00,0x4c]
+; CHECK: st1.8h	{ v7, v8, v9, v10 }, [x1] ; encoding: [0x27,0x24,0x00,0x4c]
+; CHECK: st1.2s	{ v1 }, [x1]            ; encoding: [0x21,0x78,0x00,0x0c]
+; CHECK: st1.2s	{ v2, v3 }, [x1]        ; encoding: [0x22,0xa8,0x00,0x0c]
+; CHECK: st1.2s	{ v3, v4, v5 }, [x1]    ; encoding: [0x23,0x68,0x00,0x0c]
+; CHECK: st1.2s	{ v7, v8, v9, v10 }, [x1] ; encoding: [0x27,0x28,0x00,0x0c]
+; CHECK: st1.4s	{ v1 }, [x1]            ; encoding: [0x21,0x78,0x00,0x4c]
+; CHECK: st1.4s	{ v2, v3 }, [x1]        ; encoding: [0x22,0xa8,0x00,0x4c]
+; CHECK: st1.4s	{ v3, v4, v5 }, [x1]    ; encoding: [0x23,0x68,0x00,0x4c]
+; CHECK: st1.4s	{ v7, v8, v9, v10 }, [x1] ; encoding: [0x27,0x28,0x00,0x4c]
+; CHECK: st1.1d	{ v1 }, [x1]            ; encoding: [0x21,0x7c,0x00,0x0c]
+; CHECK: st1.1d	{ v2, v3 }, [x1]        ; encoding: [0x22,0xac,0x00,0x0c]
+; CHECK: st1.1d	{ v3, v4, v5 }, [x1]    ; encoding: [0x23,0x6c,0x00,0x0c]
+; CHECK: st1.1d	{ v7, v8, v9, v10 }, [x1] ; encoding: [0x27,0x2c,0x00,0x0c]
+; CHECK: st1.2d	{ v1 }, [x1]            ; encoding: [0x21,0x7c,0x00,0x4c]
+; CHECK: st1.2d	{ v2, v3 }, [x1]        ; encoding: [0x22,0xac,0x00,0x4c]
+; CHECK: st1.2d	{ v3, v4, v5 }, [x1]    ; encoding: [0x23,0x6c,0x00,0x4c]
+; CHECK: st1.2d	{ v7, v8, v9, v10 }, [x1] ; encoding: [0x27,0x2c,0x00,0x4c]
+; CHECK: ld2.8b	{ v3, v4 }, [x19]       ; encoding: [0x63,0x82,0x40,0x0c]
+; CHECK: ld2.16b	{ v3, v4 }, [x19]       ; encoding: [0x63,0x82,0x40,0x4c]
+; CHECK: ld2.4h	{ v3, v4 }, [x19]       ; encoding: [0x63,0x86,0x40,0x0c]
+; CHECK: ld2.8h	{ v3, v4 }, [x19]       ; encoding: [0x63,0x86,0x40,0x4c]
+; CHECK: ld2.2s	{ v3, v4 }, [x19]       ; encoding: [0x63,0x8a,0x40,0x0c]
+; CHECK: ld2.4s	{ v3, v4 }, [x19]       ; encoding: [0x63,0x8a,0x40,0x4c]
+; CHECK: ld2.2d	{ v3, v4 }, [x19]       ; encoding: [0x63,0x8e,0x40,0x4c]
+; CHECK: st2.8b	{ v3, v4 }, [x19]       ; encoding: [0x63,0x82,0x00,0x0c]
+; CHECK: st2.16b { v3, v4 }, [x19]       ; encoding: [0x63,0x82,0x00,0x4c]
+; CHECK: st2.4h	{ v3, v4 }, [x19]       ; encoding: [0x63,0x86,0x00,0x0c]
+; CHECK: st2.8h	{ v3, v4 }, [x19]       ; encoding: [0x63,0x86,0x00,0x4c]
+; CHECK: st2.2s	{ v3, v4 }, [x19]       ; encoding: [0x63,0x8a,0x00,0x0c]
+; CHECK: st2.4s	{ v3, v4 }, [x19]       ; encoding: [0x63,0x8a,0x00,0x4c]
+; CHECK: st2.2d	{ v3, v4 }, [x19]       ; encoding: [0x63,0x8e,0x00,0x4c]
+; CHECK: ld3.8b	{ v2, v3, v4 }, [x19]   ; encoding: [0x62,0x42,0x40,0x0c]
+; CHECK: ld3.16b	{ v2, v3, v4 }, [x19]   ; encoding: [0x62,0x42,0x40,0x4c]
+; CHECK: ld3.4h	{ v2, v3, v4 }, [x19]   ; encoding: [0x62,0x46,0x40,0x0c]
+; CHECK: ld3.8h	{ v2, v3, v4 }, [x19]   ; encoding: [0x62,0x46,0x40,0x4c]
+; CHECK: ld3.2s	{ v2, v3, v4 }, [x19]   ; encoding: [0x62,0x4a,0x40,0x0c]
+; CHECK: ld3.4s	{ v2, v3, v4 }, [x19]   ; encoding: [0x62,0x4a,0x40,0x4c]
+; CHECK: ld3.2d	{ v2, v3, v4 }, [x19]   ; encoding: [0x62,0x4e,0x40,0x4c]
+; CHECK: st3.8b	{ v2, v3, v4 }, [x19]   ; encoding: [0x62,0x42,0x00,0x0c]
+; CHECK: st3.16b	{ v2, v3, v4 }, [x19]   ; encoding: [0x62,0x42,0x00,0x4c]
+; CHECK: st3.4h	{ v2, v3, v4 }, [x19]   ; encoding: [0x62,0x46,0x00,0x0c]
+; CHECK: st3.8h	{ v2, v3, v4 }, [x19]   ; encoding: [0x62,0x46,0x00,0x4c]
+; CHECK: st3.2s	{ v2, v3, v4 }, [x19]   ; encoding: [0x62,0x4a,0x00,0x0c]
+; CHECK: st3.4s	{ v2, v3, v4 }, [x19]   ; encoding: [0x62,0x4a,0x00,0x4c]
+; CHECK: st3.2d	{ v2, v3, v4 }, [x19]   ; encoding: [0x62,0x4e,0x00,0x4c]
+; CHECK: ld4.8b	{ v2, v3, v4, v5 }, [x19] ; encoding: [0x62,0x02,0x40,0x0c]
+; CHECK: ld4.16b	{ v2, v3, v4, v5 }, [x19] ; encoding: [0x62,0x02,0x40,0x4c]
+; CHECK: ld4.4h	{ v2, v3, v4, v5 }, [x19] ; encoding: [0x62,0x06,0x40,0x0c]
+; CHECK: ld4.8h	{ v2, v3, v4, v5 }, [x19] ; encoding: [0x62,0x06,0x40,0x4c]
+; CHECK: ld4.2s	{ v2, v3, v4, v5 }, [x19] ; encoding: [0x62,0x0a,0x40,0x0c]
+; CHECK: ld4.4s	{ v2, v3, v4, v5 }, [x19] ; encoding: [0x62,0x0a,0x40,0x4c]
+; CHECK: ld4.2d	{ v2, v3, v4, v5 }, [x19] ; encoding: [0x62,0x0e,0x40,0x4c]
+; CHECK: st4.8b	{ v2, v3, v4, v5 }, [x19] ; encoding: [0x62,0x02,0x00,0x0c]
+; CHECK: st4.16b	{ v2, v3, v4, v5 }, [x19] ; encoding: [0x62,0x02,0x00,0x4c]
+; CHECK: st4.4h	{ v2, v3, v4, v5 }, [x19] ; encoding: [0x62,0x06,0x00,0x0c]
+; CHECK: st4.8h	{ v2, v3, v4, v5 }, [x19] ; encoding: [0x62,0x06,0x00,0x4c]
+; CHECK: st4.2s	{ v2, v3, v4, v5 }, [x19] ; encoding: [0x62,0x0a,0x00,0x0c]
+; CHECK: st4.4s	{ v2, v3, v4, v5 }, [x19] ; encoding: [0x62,0x0a,0x00,0x4c]
+; CHECK: st4.2d	{ v2, v3, v4, v5 }, [x19] ; encoding: [0x62,0x0e,0x00,0x4c]
+; CHECK: ld1.8b	{ v1 }, [x1], x15       ; encoding: [0x21,0x70,0xcf,0x0c]
+; CHECK: ld1.8b	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0xa0,0xcf,0x0c]
+; CHECK: ld1.8b	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x60,0xcf,0x0c]
+; CHECK: ld1.8b	{ v4, v5, v6, v7 }, [x1], x15 ; encoding: [0x24,0x20,0xcf,0x0c]
+; CHECK: ld1.16b	{ v1 }, [x1], x15       ; encoding: [0x21,0x70,0xcf,0x4c]
+; CHECK: ld1.16b	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0xa0,0xcf,0x4c]
+; CHECK: ld1.16b	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x60,0xcf,0x4c]
+; CHECK: ld1.16b	{ v4, v5, v6, v7 }, [x1], x15 ; encoding: [0x24,0x20,0xcf,0x4c]
+; CHECK: ld1.4h	{ v1 }, [x1], x15       ; encoding: [0x21,0x74,0xcf,0x0c]
+; CHECK: ld1.4h	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0xa4,0xcf,0x0c]
+; CHECK: ld1.4h	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x64,0xcf,0x0c]
+; CHECK: ld1.4h	{ v7, v8, v9, v10 }, [x1], x15 ; encoding: [0x27,0x24,0xcf,0x0c]
+; CHECK: ld1.8h	{ v1 }, [x1], x15       ; encoding: [0x21,0x74,0xcf,0x4c]
+; CHECK: ld1.8h	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0xa4,0xcf,0x4c]
+; CHECK: ld1.8h	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x64,0xcf,0x4c]
+; CHECK: ld1.8h	{ v7, v8, v9, v10 }, [x1], x15 ; encoding: [0x27,0x24,0xcf,0x4c]
+; CHECK: ld1.2s	{ v1 }, [x1], x15       ; encoding: [0x21,0x78,0xcf,0x0c]
+; CHECK: ld1.2s	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0xa8,0xcf,0x0c]
+; CHECK: ld1.2s	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x68,0xcf,0x0c]
+; CHECK: ld1.2s	{ v7, v8, v9, v10 }, [x1], x15 ; encoding: [0x27,0x28,0xcf,0x0c]
+; CHECK: ld1.4s	{ v1 }, [x1], x15       ; encoding: [0x21,0x78,0xcf,0x4c]
+; CHECK: ld1.4s	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0xa8,0xcf,0x4c]
+; CHECK: ld1.4s	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x68,0xcf,0x4c]
+; CHECK: ld1.4s	{ v7, v8, v9, v10 }, [x1], x15 ; encoding: [0x27,0x28,0xcf,0x4c]
+; CHECK: ld1.1d	{ v1 }, [x1], x15       ; encoding: [0x21,0x7c,0xcf,0x0c]
+; CHECK: ld1.1d	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0xac,0xcf,0x0c]
+; CHECK: ld1.1d	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x6c,0xcf,0x0c]
+; CHECK: ld1.1d	{ v7, v8, v9, v10 }, [x1], x15 ; encoding: [0x27,0x2c,0xcf,0x0c]
+; CHECK: ld1.2d	{ v1 }, [x1], x15       ; encoding: [0x21,0x7c,0xcf,0x4c]
+; CHECK: ld1.2d	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0xac,0xcf,0x4c]
+; CHECK: ld1.2d	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x6c,0xcf,0x4c]
+; CHECK: ld1.2d	{ v7, v8, v9, v10 }, [x1], x15 ; encoding: [0x27,0x2c,0xcf,0x4c]
+; CHECK: st1.8b	{ v1 }, [x1], x15       ; encoding: [0x21,0x70,0x8f,0x0c]
+; CHECK: st1.8b	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0xa0,0x8f,0x0c]
+; CHECK: st1.8b	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x60,0x8f,0x0c]
+; CHECK: st1.8b	{ v4, v5, v6, v7 }, [x1], x15 ; encoding: [0x24,0x20,0x8f,0x0c]
+; CHECK: st1.16b	{ v1 }, [x1], x15       ; encoding: [0x21,0x70,0x8f,0x4c]
+; CHECK: st1.16b	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0xa0,0x8f,0x4c]
+; CHECK: st1.16b	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x60,0x8f,0x4c]
+; CHECK: st1.16b	{ v4, v5, v6, v7 }, [x1], x15 ; encoding: [0x24,0x20,0x8f,0x4c]
+; CHECK: st1.4h	{ v1 }, [x1], x15       ; encoding: [0x21,0x74,0x8f,0x0c]
+; CHECK: st1.4h	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0xa4,0x8f,0x0c]
+; CHECK: st1.4h	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x64,0x8f,0x0c]
+; CHECK: st1.4h	{ v7, v8, v9, v10 }, [x1], x15 ; encoding: [0x27,0x24,0x8f,0x0c]
+; CHECK: st1.8h	{ v1 }, [x1], x15       ; encoding: [0x21,0x74,0x8f,0x4c]
+; CHECK: st1.8h	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0xa4,0x8f,0x4c]
+; CHECK: st1.8h	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x64,0x8f,0x4c]
+; CHECK: st1.8h	{ v7, v8, v9, v10 }, [x1], x15 ; encoding: [0x27,0x24,0x8f,0x4c]
+; CHECK: st1.2s	{ v1 }, [x1], x15       ; encoding: [0x21,0x78,0x8f,0x0c]
+; CHECK: st1.2s	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0xa8,0x8f,0x0c]
+; CHECK: st1.2s	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x68,0x8f,0x0c]
+; CHECK: st1.2s	{ v7, v8, v9, v10 }, [x1], x15 ; encoding: [0x27,0x28,0x8f,0x0c]
+; CHECK: st1.4s	{ v1 }, [x1], x15       ; encoding: [0x21,0x78,0x8f,0x4c]
+; CHECK: st1.4s	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0xa8,0x8f,0x4c]
+; CHECK: st1.4s	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x68,0x8f,0x4c]
+; CHECK: st1.4s	{ v7, v8, v9, v10 }, [x1], x15 ; encoding: [0x27,0x28,0x8f,0x4c]
+; CHECK: st1.1d	{ v1 }, [x1], x15       ; encoding: [0x21,0x7c,0x8f,0x0c]
+; CHECK: st1.1d	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0xac,0x8f,0x0c]
+; CHECK: st1.1d	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x6c,0x8f,0x0c]
+; CHECK: st1.1d	{ v7, v8, v9, v10 }, [x1], x15 ; encoding: [0x27,0x2c,0x8f,0x0c]
+; CHECK: st1.2d	{ v1 }, [x1], x15       ; encoding: [0x21,0x7c,0x8f,0x4c]
+; CHECK: st1.2d	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0xac,0x8f,0x4c]
+; CHECK: st1.2d	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x6c,0x8f,0x4c]
+; CHECK: st1.2d	{ v7, v8, v9, v10 }, [x1], x15 ; encoding: [0x27,0x2c,0x8f,0x4c]
+; CHECK: ld1.8b	{ v1 }, [x1], #8       ; encoding: [0x21,0x70,0xdf,0x0c]
+; CHECK: ld1.8b	{ v2, v3 }, [x1], #16   ; encoding: [0x22,0xa0,0xdf,0x0c]
+; CHECK: ld1.8b	{ v3, v4, v5 }, [x1], #24 ; encoding: [0x23,0x60,0xdf,0x0c]
+; CHECK: ld1.8b	{ v4, v5, v6, v7 }, [x1], #32 ; encoding: [0x24,0x20,0xdf,0x0c]
+; CHECK: ld1.16b	{ v1 }, [x1], #16       ; encoding: [0x21,0x70,0xdf,0x4c]
+; CHECK: ld1.16b	{ v2, v3 }, [x1], #32   ; encoding: [0x22,0xa0,0xdf,0x4c]
+; CHECK: ld1.16b	{ v3, v4, v5 }, [x1], #48 ; encoding: [0x23,0x60,0xdf,0x4c]
+; CHECK: ld1.16b	{ v4, v5, v6, v7 }, [x1], #64 ; encoding: [0x24,0x20,0xdf,0x4c]
+; CHECK: ld1.4h	{ v1 }, [x1], #8       ; encoding: [0x21,0x74,0xdf,0x0c]
+; CHECK: ld1.4h	{ v2, v3 }, [x1], #16   ; encoding: [0x22,0xa4,0xdf,0x0c]
+; CHECK: ld1.4h	{ v3, v4, v5 }, [x1], #24 ; encoding: [0x23,0x64,0xdf,0x0c]
+; CHECK: ld1.4h	{ v7, v8, v9, v10 }, [x1], #32 ; encoding: [0x27,0x24,0xdf,0x0c]
+; CHECK: ld1.8h	{ v1 }, [x1], #16       ; encoding: [0x21,0x74,0xdf,0x4c]
+; CHECK: ld1.8h	{ v2, v3 }, [x1], #32   ; encoding: [0x22,0xa4,0xdf,0x4c]
+; CHECK: ld1.8h	{ v3, v4, v5 }, [x1], #48 ; encoding: [0x23,0x64,0xdf,0x4c]
+; CHECK: ld1.8h	{ v7, v8, v9, v10 }, [x1], #64 ; encoding: [0x27,0x24,0xdf,0x4c]
+; CHECK: ld1.2s	{ v1 }, [x1], #8       ; encoding: [0x21,0x78,0xdf,0x0c]
+; CHECK: ld1.2s	{ v2, v3 }, [x1], #16   ; encoding: [0x22,0xa8,0xdf,0x0c]
+; CHECK: ld1.2s	{ v3, v4, v5 }, [x1], #24 ; encoding: [0x23,0x68,0xdf,0x0c]
+; CHECK: ld1.2s	{ v7, v8, v9, v10 }, [x1], #32 ; encoding: [0x27,0x28,0xdf,0x0c]
+; CHECK: ld1.4s	{ v1 }, [x1], #16       ; encoding: [0x21,0x78,0xdf,0x4c]
+; CHECK: ld1.4s	{ v2, v3 }, [x1], #32   ; encoding: [0x22,0xa8,0xdf,0x4c]
+; CHECK: ld1.4s	{ v3, v4, v5 }, [x1], #48 ; encoding: [0x23,0x68,0xdf,0x4c]
+; CHECK: ld1.4s	{ v7, v8, v9, v10 }, [x1], #64 ; encoding: [0x27,0x28,0xdf,0x4c]
+; CHECK: ld1.1d	{ v1 }, [x1], #8       ; encoding: [0x21,0x7c,0xdf,0x0c]
+; CHECK: ld1.1d	{ v2, v3 }, [x1], #16   ; encoding: [0x22,0xac,0xdf,0x0c]
+; CHECK: ld1.1d	{ v3, v4, v5 }, [x1], #24 ; encoding: [0x23,0x6c,0xdf,0x0c]
+; CHECK: ld1.1d	{ v7, v8, v9, v10 }, [x1], #32 ; encoding: [0x27,0x2c,0xdf,0x0c]
+; CHECK: ld1.2d	{ v1 }, [x1], #16       ; encoding: [0x21,0x7c,0xdf,0x4c]
+; CHECK: ld1.2d	{ v2, v3 }, [x1], #32   ; encoding: [0x22,0xac,0xdf,0x4c]
+; CHECK: ld1.2d	{ v3, v4, v5 }, [x1], #48 ; encoding: [0x23,0x6c,0xdf,0x4c]
+; CHECK: ld1.2d	{ v7, v8, v9, v10 }, [x1], #64 ; encoding: [0x27,0x2c,0xdf,0x4c]
+; CHECK: st1.8b	{ v1 }, [x1], #8       ; encoding: [0x21,0x70,0x9f,0x0c]
+; CHECK: st1.8b	{ v2, v3 }, [x1], #16   ; encoding: [0x22,0xa0,0x9f,0x0c]
+; CHECK: st1.8b	{ v3, v4, v5 }, [x1], #24 ; encoding: [0x23,0x60,0x9f,0x0c]
+; CHECK: st1.8b	{ v4, v5, v6, v7 }, [x1], #32 ; encoding: [0x24,0x20,0x9f,0x0c]
+; CHECK: st1.16b	{ v1 }, [x1], #16       ; encoding: [0x21,0x70,0x9f,0x4c]
+; CHECK: st1.16b	{ v2, v3 }, [x1], #32   ; encoding: [0x22,0xa0,0x9f,0x4c]
+; CHECK: st1.16b	{ v3, v4, v5 }, [x1], #48 ; encoding: [0x23,0x60,0x9f,0x4c]
+; CHECK: st1.16b	{ v4, v5, v6, v7 }, [x1], #64 ; encoding: [0x24,0x20,0x9f,0x4c]
+; CHECK: st1.4h	{ v1 }, [x1], #8       ; encoding: [0x21,0x74,0x9f,0x0c]
+; CHECK: st1.4h	{ v2, v3 }, [x1], #16   ; encoding: [0x22,0xa4,0x9f,0x0c]
+; CHECK: st1.4h	{ v3, v4, v5 }, [x1], #24 ; encoding: [0x23,0x64,0x9f,0x0c]
+; CHECK: st1.4h	{ v7, v8, v9, v10 }, [x1], #32 ; encoding: [0x27,0x24,0x9f,0x0c]
+; CHECK: st1.8h	{ v1 }, [x1], #16       ; encoding: [0x21,0x74,0x9f,0x4c]
+; CHECK: st1.8h	{ v2, v3 }, [x1], #32   ; encoding: [0x22,0xa4,0x9f,0x4c]
+; CHECK: st1.8h	{ v3, v4, v5 }, [x1], #48 ; encoding: [0x23,0x64,0x9f,0x4c]
+; CHECK: st1.8h	{ v7, v8, v9, v10 }, [x1], #64 ; encoding: [0x27,0x24,0x9f,0x4c]
+; CHECK: st1.2s	{ v1 }, [x1], #8       ; encoding: [0x21,0x78,0x9f,0x0c]
+; CHECK: st1.2s	{ v2, v3 }, [x1], #16   ; encoding: [0x22,0xa8,0x9f,0x0c]
+; CHECK: st1.2s	{ v3, v4, v5 }, [x1], #24 ; encoding: [0x23,0x68,0x9f,0x0c]
+; CHECK: st1.2s	{ v7, v8, v9, v10 }, [x1], #32 ; encoding: [0x27,0x28,0x9f,0x0c]
+; CHECK: st1.4s	{ v1 }, [x1], #16       ; encoding: [0x21,0x78,0x9f,0x4c]
+; CHECK: st1.4s	{ v2, v3 }, [x1], #32   ; encoding: [0x22,0xa8,0x9f,0x4c]
+; CHECK: st1.4s	{ v3, v4, v5 }, [x1], #48 ; encoding: [0x23,0x68,0x9f,0x4c]
+; CHECK: st1.4s	{ v7, v8, v9, v10 }, [x1], #64 ; encoding: [0x27,0x28,0x9f,0x4c]
+; CHECK: st1.1d	{ v1 }, [x1], #8       ; encoding: [0x21,0x7c,0x9f,0x0c]
+; CHECK: st1.1d	{ v2, v3 }, [x1], #16   ; encoding: [0x22,0xac,0x9f,0x0c]
+; CHECK: st1.1d	{ v3, v4, v5 }, [x1], #24 ; encoding: [0x23,0x6c,0x9f,0x0c]
+; CHECK: st1.1d	{ v7, v8, v9, v10 }, [x1], #32 ; encoding: [0x27,0x2c,0x9f,0x0c]
+; CHECK: st1.2d	{ v1 }, [x1], #16       ; encoding: [0x21,0x7c,0x9f,0x4c]
+; CHECK: st1.2d	{ v2, v3 }, [x1], #32   ; encoding: [0x22,0xac,0x9f,0x4c]
+; CHECK: st1.2d	{ v3, v4, v5 }, [x1], #48 ; encoding: [0x23,0x6c,0x9f,0x4c]
+; CHECK: st1.2d	{ v7, v8, v9, v10 }, [x1], #64 ; encoding: [0x27,0x2c,0x9f,0x4c]
+; CHECK: ld2.8b	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0x80,0xcf,0x0c]
+; CHECK: ld2.16b	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0x80,0xcf,0x4c]
+; CHECK: ld2.4h	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0x84,0xcf,0x0c]
+; CHECK: ld2.8h	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0x84,0xcf,0x4c]
+; CHECK: ld2.2s	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0x88,0xcf,0x0c]
+; CHECK: ld2.4s	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0x88,0xcf,0x4c]
+; CHECK: ld2.2d	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0x8c,0xcf,0x4c]
+; CHECK: st2.8b	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0x80,0x8f,0x0c]
+; CHECK: st2.16b	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0x80,0x8f,0x4c]
+; CHECK: st2.4h	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0x84,0x8f,0x0c]
+; CHECK: st2.8h	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0x84,0x8f,0x4c]
+; CHECK: st2.2s	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0x88,0x8f,0x0c]
+; CHECK: st2.4s	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0x88,0x8f,0x4c]
+; CHECK: st2.2d	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0x8c,0x8f,0x4c]
+; CHECK: ld2.8b	{ v2, v3 }, [x1], #16   ; encoding: [0x22,0x80,0xdf,0x0c]
+; CHECK: ld2.16b	{ v2, v3 }, [x1], #32   ; encoding: [0x22,0x80,0xdf,0x4c]
+; CHECK: ld2.4h	{ v2, v3 }, [x1], #16   ; encoding: [0x22,0x84,0xdf,0x0c]
+; CHECK: ld2.8h	{ v2, v3 }, [x1], #32   ; encoding: [0x22,0x84,0xdf,0x4c]
+; CHECK: ld2.2s	{ v2, v3 }, [x1], #16   ; encoding: [0x22,0x88,0xdf,0x0c]
+; CHECK: ld2.4s	{ v2, v3 }, [x1], #32   ; encoding: [0x22,0x88,0xdf,0x4c]
+; CHECK: ld2.2d	{ v2, v3 }, [x1], #32	; encoding: [0x22,0x8c,0xdf,0x4c]
+; CHECK: st2.8b	{ v2, v3 }, [x1], #16   ; encoding: [0x22,0x80,0x9f,0x0c]
+; CHECK: st2.16b	{ v2, v3 }, [x1], #32   ; encoding: [0x22,0x80,0x9f,0x4c]
+; CHECK: st2.4h	{ v2, v3 }, [x1], #16   ; encoding: [0x22,0x84,0x9f,0x0c]
+; CHECK: st2.8h	{ v2, v3 }, [x1], #32   ; encoding: [0x22,0x84,0x9f,0x4c]
+; CHECK: st2.2s	{ v2, v3 }, [x1], #16   ; encoding: [0x22,0x88,0x9f,0x0c]
+; CHECK: st2.4s	{ v2, v3 }, [x1], #32   ; encoding: [0x22,0x88,0x9f,0x4c]
+; CHECK: st2.2d	{ v2, v3 }, [x1], #32   ; encoding: [0x22,0x8c,0x9f,0x4c]
+; CHECK: ld3.8b	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x40,0xcf,0x0c]
+; CHECK: ld3.16b	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x40,0xcf,0x4c]
+; CHECK: ld3.4h	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x44,0xcf,0x0c]
+; CHECK: ld3.8h	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x44,0xcf,0x4c]
+; CHECK: ld3.2s	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x48,0xcf,0x0c]
+; CHECK: ld3.4s	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x48,0xcf,0x4c]
+; CHECK: ld3.2d	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x4c,0xcf,0x4c]
+; CHECK: st3.8b	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x40,0x8f,0x0c]
+; CHECK: st3.16b	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x40,0x8f,0x4c]
+; CHECK: st3.4h	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x44,0x8f,0x0c]
+; CHECK: st3.8h	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x44,0x8f,0x4c]
+; CHECK: st3.2s	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x48,0x8f,0x0c]
+; CHECK: st3.4s	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x48,0x8f,0x4c]
+; CHECK: st3.2d	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x4c,0x8f,0x4c]
+; CHECK: ld3.8b	{ v3, v4, v5 }, [x1], #24 ; encoding: [0x23,0x40,0xdf,0x0c]
+; CHECK: ld3.16b	{ v3, v4, v5 }, [x1], #48 ; encoding: [0x23,0x40,0xdf,0x4c]
+; CHECK: ld3.4h	{ v3, v4, v5 }, [x1], #24 ; encoding: [0x23,0x44,0xdf,0x0c]
+; CHECK: ld3.8h	{ v3, v4, v5 }, [x1], #48 ; encoding: [0x23,0x44,0xdf,0x4c]
+; CHECK: ld3.2s	{ v3, v4, v5 }, [x1], #24 ; encoding: [0x23,0x48,0xdf,0x0c]
+; CHECK: ld3.4s	{ v3, v4, v5 }, [x1], #48 ; encoding: [0x23,0x48,0xdf,0x4c]
+; CHECK: ld3.2d	{ v3, v4, v5 }, [x1], #48 ; encoding: [0x23,0x4c,0xdf,0x4c]
+; CHECK: st3.8b	{ v3, v4, v5 }, [x1], #24 ; encoding: [0x23,0x40,0x9f,0x0c]
+; CHECK: st3.16b	{ v3, v4, v5 }, [x1], #48 ; encoding: [0x23,0x40,0x9f,0x4c]
+; CHECK: st3.4h	{ v3, v4, v5 }, [x1], #24 ; encoding: [0x23,0x44,0x9f,0x0c]
+; CHECK: st3.8h	{ v3, v4, v5 }, [x1], #48 ; encoding: [0x23,0x44,0x9f,0x4c]
+; CHECK: st3.2s	{ v3, v4, v5 }, [x1], #24 ; encoding: [0x23,0x48,0x9f,0x0c]
+; CHECK: st3.4s	{ v3, v4, v5 }, [x1], #48 ; encoding: [0x23,0x48,0x9f,0x4c]
+; CHECK: st3.2d	{ v3, v4, v5 }, [x1], #48 ; encoding: [0x23,0x4c,0x9f,0x4c]
+; CHECK: ld4.8b	{ v4, v5, v6, v7 }, [x1], x15 ; encoding: [0x24,0x00,0xcf,0x0c]
+; CHECK: ld4.16b	{ v4, v5, v6, v7 }, [x1], x15 ; encoding: [0x24,0x00,0xcf,0x4c]
+; CHECK: ld4.4h	{ v7, v8, v9, v10 }, [x1], x15 ; encoding: [0x27,0x04,0xcf,0x0c]
+; CHECK: ld4.8h	{ v7, v8, v9, v10 }, [x1], x15 ; encoding: [0x27,0x04,0xcf,0x4c]
+; CHECK: ld4.2s	{ v7, v8, v9, v10 }, [x1], x15 ; encoding: [0x27,0x08,0xcf,0x0c]
+; CHECK: ld4.4s	{ v7, v8, v9, v10 }, [x1], x15 ; encoding: [0x27,0x08,0xcf,0x4c]
+; CHECK: ld4.2d	{ v7, v8, v9, v10 }, [x1], x15 ; encoding: [0x27,0x0c,0xcf,0x4c]
+; CHECK: st4.8b	{ v4, v5, v6, v7 }, [x1], x15 ; encoding: [0x24,0x00,0x8f,0x0c]
+; CHECK: st4.16b	{ v4, v5, v6, v7 }, [x1], x15 ; encoding: [0x24,0x00,0x8f,0x4c]
+; CHECK: st4.4h	{ v7, v8, v9, v10 }, [x1], x15 ; encoding: [0x27,0x04,0x8f,0x0c]
+; CHECK: st4.8h	{ v7, v8, v9, v10 }, [x1], x15 ; encoding: [0x27,0x04,0x8f,0x4c]
+; CHECK: st4.2s	{ v7, v8, v9, v10 }, [x1], x15 ; encoding: [0x27,0x08,0x8f,0x0c]
+; CHECK: st4.4s	{ v7, v8, v9, v10 }, [x1], x15 ; encoding: [0x27,0x08,0x8f,0x4c]
+; CHECK: st4.2d	{ v7, v8, v9, v10 }, [x1], x15 ; encoding: [0x27,0x0c,0x8f,0x4c]
+; CHECK: ld4.8b	{ v4, v5, v6, v7 }, [x1], #32 ; encoding: [0x24,0x00,0xdf,0x0c]
+; CHECK: ld4.16b	{ v4, v5, v6, v7 }, [x1], #64 ; encoding: [0x24,0x00,0xdf,0x4c]
+; CHECK: ld4.4h	{ v7, v8, v9, v10 }, [x1], #32 ; encoding: [0x27,0x04,0xdf,0x0c]
+; CHECK: ld4.8h	{ v7, v8, v9, v10 }, [x1], #64 ; encoding: [0x27,0x04,0xdf,0x4c]
+; CHECK: ld4.2s	{ v7, v8, v9, v10 }, [x1], #32 ; encoding: [0x27,0x08,0xdf,0x0c]
+; CHECK: ld4.4s	{ v7, v8, v9, v10 }, [x1], #64 ; encoding: [0x27,0x08,0xdf,0x4c]
+; CHECK: ld4.2d	{ v7, v8, v9, v10 }, [x1], #64 ; encoding: [0x27,0x0c,0xdf,0x4c]
+; CHECK: st4.8b	{ v4, v5, v6, v7 }, [x1], #32 ; encoding: [0x24,0x00,0x9f,0x0c]
+; CHECK: st4.16b	{ v4, v5, v6, v7 }, [x1], #64 ; encoding: [0x24,0x00,0x9f,0x4c]
+; CHECK: st4.4h	{ v7, v8, v9, v10 }, [x1], #32 ; encoding: [0x27,0x04,0x9f,0x0c]
+; CHECK: st4.8h	{ v7, v8, v9, v10 }, [x1], #64 ; encoding: [0x27,0x04,0x9f,0x4c]
+; CHECK: st4.2s	{ v7, v8, v9, v10 }, [x1], #32 ; encoding: [0x27,0x08,0x9f,0x0c]
+; CHECK: st4.4s	{ v7, v8, v9, v10 }, [x1], #64 ; encoding: [0x27,0x08,0x9f,0x4c]
+; CHECK: st4.2d	{ v7, v8, v9, v10 }, [x1], #64 ; encoding: [0x27,0x0c,0x9f,0x4c]
+; CHECK: ld1r.8b	{ v12 }, [x2]           ; encoding: [0x4c,0xc0,0x40,0x0d]
+; CHECK: ld1r.8b	{ v12 }, [x2], x3       ; encoding: [0x4c,0xc0,0xc3,0x0d]
+; CHECK: ld1r.16b	{ v12 }, [x2]   ; encoding: [0x4c,0xc0,0x40,0x4d]
+; CHECK: ld1r.16b	{ v12 }, [x2], x3 ; encoding: [0x4c,0xc0,0xc3,0x4d]
+; CHECK: ld1r.4h	{ v12 }, [x2]           ; encoding: [0x4c,0xc4,0x40,0x0d]
+; CHECK: ld1r.4h	{ v12 }, [x2], x3       ; encoding: [0x4c,0xc4,0xc3,0x0d]
+; CHECK: ld1r.8h	{ v12 }, [x2]           ; encoding: [0x4c,0xc4,0x40,0x4d]
+; CHECK: ld1r.8h	{ v12 }, [x2], x3       ; encoding: [0x4c,0xc4,0xc3,0x4d]
+; CHECK: ld1r.2s	{ v12 }, [x2]           ; encoding: [0x4c,0xc8,0x40,0x0d]
+; CHECK: ld1r.2s	{ v12 }, [x2], x3       ; encoding: [0x4c,0xc8,0xc3,0x0d]
+; CHECK: ld1r.4s	{ v12 }, [x2]           ; encoding: [0x4c,0xc8,0x40,0x4d]
+; CHECK: ld1r.4s	{ v12 }, [x2], x3       ; encoding: [0x4c,0xc8,0xc3,0x4d]
+; CHECK: ld1r.1d	{ v12 }, [x2]           ; encoding: [0x4c,0xcc,0x40,0x0d]
+; CHECK: ld1r.1d	{ v12 }, [x2], x3       ; encoding: [0x4c,0xcc,0xc3,0x0d]
+; CHECK: ld1r.2d	{ v12 }, [x2]           ; encoding: [0x4c,0xcc,0x40,0x4d]
+; CHECK: ld1r.2d	{ v12 }, [x2], x3       ; encoding: [0x4c,0xcc,0xc3,0x4d]
+; CHECK: ld1r.8b	{ v12 }, [x2], #1      ; encoding: [0x4c,0xc0,0xdf,0x0d]
+; CHECK: ld1r.16b	{ v12 }, [x2], #1 ; encoding: [0x4c,0xc0,0xdf,0x4d]
+; CHECK: ld1r.4h	{ v12 }, [x2], #2      ; encoding: [0x4c,0xc4,0xdf,0x0d]
+; CHECK: ld1r.8h	{ v12 }, [x2], #2      ; encoding: [0x4c,0xc4,0xdf,0x4d]
+; CHECK: ld1r.2s	{ v12 }, [x2], #4      ; encoding: [0x4c,0xc8,0xdf,0x0d]
+; CHECK: ld1r.4s	{ v12 }, [x2], #4      ; encoding: [0x4c,0xc8,0xdf,0x4d]
+; CHECK: ld1r.1d	{ v12 }, [x2], #8      ; encoding: [0x4c,0xcc,0xdf,0x0d]
+; CHECK: ld1r.2d	{ v12 }, [x2], #8      ; encoding: [0x4c,0xcc,0xdf,0x4d]
+; CHECK: ld2r.8b	{ v3, v4 }, [x2]        ; encoding: [0x43,0xc0,0x60,0x0d]
+; CHECK: ld2r.8b	{ v3, v4 }, [x2], x3    ; encoding: [0x43,0xc0,0xe3,0x0d]
+; CHECK: ld2r.16b	{ v3, v4 }, [x2] ; encoding: [0x43,0xc0,0x60,0x4d]
+; CHECK: ld2r.16b	{ v3, v4 }, [x2], x3 ; encoding: [0x43,0xc0,0xe3,0x4d]
+; CHECK: ld2r.4h	{ v3, v4 }, [x2]        ; encoding: [0x43,0xc4,0x60,0x0d]
+; CHECK: ld2r.4h	{ v3, v4 }, [x2], x3    ; encoding: [0x43,0xc4,0xe3,0x0d]
+; CHECK: ld2r.8h	{ v3, v4 }, [x2]        ; encoding: [0x43,0xc4,0x60,0x4d]
+; CHECK: ld2r.8h	{ v3, v4 }, [x2], x3    ; encoding: [0x43,0xc4,0xe3,0x4d]
+; CHECK: ld2r.2s	{ v3, v4 }, [x2]        ; encoding: [0x43,0xc8,0x60,0x0d]
+; CHECK: ld2r.2s	{ v3, v4 }, [x2], x3    ; encoding: [0x43,0xc8,0xe3,0x0d]
+; CHECK: ld2r.4s	{ v3, v4 }, [x2]        ; encoding: [0x43,0xc8,0x60,0x4d]
+; CHECK: ld2r.4s	{ v3, v4 }, [x2], x3    ; encoding: [0x43,0xc8,0xe3,0x4d]
+; CHECK: ld2r.1d	{ v3, v4 }, [x2]        ; encoding: [0x43,0xcc,0x60,0x0d]
+; CHECK: ld2r.1d	{ v3, v4 }, [x2], x3    ; encoding: [0x43,0xcc,0xe3,0x0d]
+; CHECK: ld2r.2d	{ v3, v4 }, [x2]        ; encoding: [0x43,0xcc,0x60,0x4d]
+; CHECK: ld2r.2d	{ v3, v4 }, [x2], x3    ; encoding: [0x43,0xcc,0xe3,0x4d]
+; CHECK: ld2r.8b	{ v3, v4 }, [x2], #2   ; encoding: [0x43,0xc0,0xff,0x0d]
+; CHECK: ld2r.16b	{ v3, v4 }, [x2], #2 ; encoding: [0x43,0xc0,0xff,0x4d]
+; CHECK: ld2r.4h	{ v3, v4 }, [x2], #4   ; encoding: [0x43,0xc4,0xff,0x0d]
+; CHECK: ld2r.8h	{ v3, v4 }, [x2], #4   ; encoding: [0x43,0xc4,0xff,0x4d]
+; CHECK: ld2r.2s	{ v3, v4 }, [x2], #8   ; encoding: [0x43,0xc8,0xff,0x0d]
+; CHECK: ld2r.4s	{ v3, v4 }, [x2], #8   ; encoding: [0x43,0xc8,0xff,0x4d]
+; CHECK: ld2r.1d	{ v3, v4 }, [x2], #16   ; encoding: [0x43,0xcc,0xff,0x0d]
+; CHECK: ld2r.2d	{ v3, v4 }, [x2], #16   ; encoding: [0x43,0xcc,0xff,0x4d]
+; CHECK: ld3r.8b	{ v2, v3, v4 }, [x2]    ; encoding: [0x42,0xe0,0x40,0x0d]
+; CHECK: ld3r.8b	{ v2, v3, v4 }, [x2], x3 ; encoding: [0x42,0xe0,0xc3,0x0d]
+; CHECK: ld3r.16b	{ v2, v3, v4 }, [x2] ; encoding: [0x42,0xe0,0x40,0x4d]
+; CHECK: ld3r.16b	{ v2, v3, v4 }, [x2], x3 ; encoding: [0x42,0xe0,0xc3,0x4d]
+; CHECK: ld3r.4h	{ v2, v3, v4 }, [x2]    ; encoding: [0x42,0xe4,0x40,0x0d]
+; CHECK: ld3r.4h	{ v2, v3, v4 }, [x2], x3 ; encoding: [0x42,0xe4,0xc3,0x0d]
+; CHECK: ld3r.8h	{ v2, v3, v4 }, [x2]    ; encoding: [0x42,0xe4,0x40,0x4d]
+; CHECK: ld3r.8h	{ v2, v3, v4 }, [x2], x3 ; encoding: [0x42,0xe4,0xc3,0x4d]
+; CHECK: ld3r.2s	{ v2, v3, v4 }, [x2]    ; encoding: [0x42,0xe8,0x40,0x0d]
+; CHECK: ld3r.2s	{ v2, v3, v4 }, [x2], x3 ; encoding: [0x42,0xe8,0xc3,0x0d]
+; CHECK: ld3r.4s	{ v2, v3, v4 }, [x2]    ; encoding: [0x42,0xe8,0x40,0x4d]
+; CHECK: ld3r.4s	{ v2, v3, v4 }, [x2], x3 ; encoding: [0x42,0xe8,0xc3,0x4d]
+; CHECK: ld3r.1d	{ v2, v3, v4 }, [x2]    ; encoding: [0x42,0xec,0x40,0x0d]
+; CHECK: ld3r.1d	{ v2, v3, v4 }, [x2], x3 ; encoding: [0x42,0xec,0xc3,0x0d]
+; CHECK: ld3r.2d	{ v2, v3, v4 }, [x2]    ; encoding: [0x42,0xec,0x40,0x4d]
+; CHECK: ld3r.2d	{ v2, v3, v4 }, [x2], x3 ; encoding: [0x42,0xec,0xc3,0x4d]
+; CHECK: ld3r.8b	{ v2, v3, v4 }, [x2], #3 ; encoding: [0x42,0xe0,0xdf,0x0d]
+; CHECK: ld3r.16b	{ v2, v3, v4 }, [x2], #3 ; encoding: [0x42,0xe0,0xdf,0x4d]
+; CHECK: ld3r.4h	{ v2, v3, v4 }, [x2], #6 ; encoding: [0x42,0xe4,0xdf,0x0d]
+; CHECK: ld3r.8h	{ v2, v3, v4 }, [x2], #6 ; encoding: [0x42,0xe4,0xdf,0x4d]
+; CHECK: ld3r.2s	{ v2, v3, v4 }, [x2], #12 ; encoding: [0x42,0xe8,0xdf,0x0d]
+; CHECK: ld3r.4s	{ v2, v3, v4 }, [x2], #12 ; encoding: [0x42,0xe8,0xdf,0x4d]
+; CHECK: ld3r.1d	{ v2, v3, v4 }, [x2], #24 ; encoding: [0x42,0xec,0xdf,0x0d]
+; CHECK: ld3r.2d	{ v2, v3, v4 }, [x2], #24 ; encoding: [0x42,0xec,0xdf,0x4d]
+; CHECK: ld4r.8b	{ v2, v3, v4, v5 }, [x2] ; encoding: [0x42,0xe0,0x60,0x0d]
+; CHECK: ld4r.8b	{ v2, v3, v4, v5 }, [x2], x3 ; encoding: [0x42,0xe0,0xe3,0x0d]
+; CHECK: ld4r.16b	{ v2, v3, v4, v5 }, [x2] ; encoding: [0x42,0xe0,0x60,0x4d]
+; CHECK: ld4r.16b	{ v2, v3, v4, v5 }, [x2], x3 ; encoding: [0x42,0xe0,0xe3,0x4d]
+; CHECK: ld4r.4h	{ v2, v3, v4, v5 }, [x2] ; encoding: [0x42,0xe4,0x60,0x0d]
+; CHECK: ld4r.4h	{ v2, v3, v4, v5 }, [x2], x3 ; encoding: [0x42,0xe4,0xe3,0x0d]
+; CHECK: ld4r.8h	{ v2, v3, v4, v5 }, [x2] ; encoding: [0x42,0xe4,0x60,0x4d]
+; CHECK: ld4r.8h	{ v2, v3, v4, v5 }, [x2], x3 ; encoding: [0x42,0xe4,0xe3,0x4d]
+; CHECK: ld4r.2s	{ v2, v3, v4, v5 }, [x2] ; encoding: [0x42,0xe8,0x60,0x0d]
+; CHECK: ld4r.2s	{ v2, v3, v4, v5 }, [x2], x3 ; encoding: [0x42,0xe8,0xe3,0x0d]
+; CHECK: ld4r.4s	{ v2, v3, v4, v5 }, [x2] ; encoding: [0x42,0xe8,0x60,0x4d]
+; CHECK: ld4r.4s	{ v2, v3, v4, v5 }, [x2], x3 ; encoding: [0x42,0xe8,0xe3,0x4d]
+; CHECK: ld4r.1d	{ v2, v3, v4, v5 }, [x2] ; encoding: [0x42,0xec,0x60,0x0d]
+; CHECK: ld4r.1d	{ v2, v3, v4, v5 }, [x2], x3 ; encoding: [0x42,0xec,0xe3,0x0d]
+; CHECK: ld4r.2d	{ v2, v3, v4, v5 }, [x2] ; encoding: [0x42,0xec,0x60,0x4d]
+; CHECK: ld4r.2d	{ v2, v3, v4, v5 }, [x2], x3 ; encoding: [0x42,0xec,0xe3,0x4d]
+; CHECK: ld4r.8b	{ v2, v3, v4, v5 }, [x2], #4 ; encoding: [0x42,0xe0,0xff,0x0d]
+; CHECK: ld4r.16b	{ v2, v3, v4, v5 }, [x2], #4 ; encoding: [0x42,0xe0,0xff,0x4d]
+; CHECK: ld4r.4h	{ v2, v3, v4, v5 }, [x2], #8 ; encoding: [0x42,0xe4,0xff,0x0d]
+; CHECK: ld4r.8h	{ v2, v3, v4, v5 }, [x2], #8 ; encoding: [0x42,0xe4,0xff,0x4d]
+; CHECK: ld4r.2s	{ v2, v3, v4, v5 }, [x2], #16 ; encoding: [0x42,0xe8,0xff,0x0d]
+; CHECK: ld4r.4s	{ v2, v3, v4, v5 }, [x2], #16 ; encoding: [0x42,0xe8,0xff,0x4d]
+; CHECK: ld4r.1d	{ v2, v3, v4, v5 }, [x2], #32 ; encoding: [0x42,0xec,0xff,0x0d]
+; CHECK: ld4r.2d	{ v2, v3, v4, v5 }, [x2], #32 ; encoding: [0x42,0xec,0xff,0x4d]
+; CHECK: ld1.b	{ v6 }[13], [x3]        ; encoding: [0x66,0x14,0x40,0x4d]
+; CHECK: ld1.h	{ v6 }[2], [x3]         ; encoding: [0x66,0x50,0x40,0x0d]
+; CHECK: ld1.s	{ v6 }[2], [x3]         ; encoding: [0x66,0x80,0x40,0x4d]
+; CHECK: ld1.d	{ v6 }[1], [x3]         ; encoding: [0x66,0x84,0x40,0x4d]
+; CHECK: ld1.b	{ v6 }[13], [x3], x5    ; encoding: [0x66,0x14,0xc5,0x4d]
+; CHECK: ld1.h	{ v6 }[2], [x3], x5     ; encoding: [0x66,0x50,0xc5,0x0d]
+; CHECK: ld1.s	{ v6 }[2], [x3], x5     ; encoding: [0x66,0x80,0xc5,0x4d]
+; CHECK: ld1.d	{ v6 }[1], [x3], x5     ; encoding: [0x66,0x84,0xc5,0x4d]
+; CHECK: ld1.b	{ v6 }[13], [x3], #1   ; encoding: [0x66,0x14,0xdf,0x4d]
+; CHECK: ld1.h	{ v6 }[2], [x3], #2    ; encoding: [0x66,0x50,0xdf,0x0d]
+; CHECK: ld1.s	{ v6 }[2], [x3], #4    ; encoding: [0x66,0x80,0xdf,0x4d]
+; CHECK: ld1.d	{ v6 }[1], [x3], #8    ; encoding: [0x66,0x84,0xdf,0x4d]
+; CHECK: ld2.b	{ v5, v6 }[13], [x3]    ; encoding: [0x65,0x14,0x60,0x4d]
+; CHECK: ld2.h	{ v5, v6 }[2], [x3]     ; encoding: [0x65,0x50,0x60,0x0d]
+; CHECK: ld2.s	{ v5, v6 }[2], [x3]     ; encoding: [0x65,0x80,0x60,0x4d]
+; CHECK: ld2.d	{ v5, v6 }[1], [x3]     ; encoding: [0x65,0x84,0x60,0x4d]
+; CHECK: ld2.b	{ v5, v6 }[13], [x3], x5 ; encoding: [0x65,0x14,0xe5,0x4d]
+; CHECK: ld2.h	{ v5, v6 }[2], [x3], x5 ; encoding: [0x65,0x50,0xe5,0x0d]
+; CHECK: ld2.s	{ v5, v6 }[2], [x3], x5 ; encoding: [0x65,0x80,0xe5,0x4d]
+; CHECK: ld2.d	{ v5, v6 }[1], [x3], x5 ; encoding: [0x65,0x84,0xe5,0x4d]
+; CHECK: ld2.b	{ v5, v6 }[13], [x3], #2 ; encoding: [0x65,0x14,0xff,0x4d]
+; CHECK: ld2.h	{ v5, v6 }[2], [x3], #4 ; encoding: [0x65,0x50,0xff,0x0d]
+; CHECK: ld2.s	{ v5, v6 }[2], [x3], #8 ; encoding: [0x65,0x80,0xff,0x4d]
+; CHECK: ld2.d	{ v5, v6 }[1], [x3], #16 ; encoding: [0x65,0x84,0xff,0x4d]
+; CHECK: ld3.b	{ v7, v8, v9 }[13], [x3] ; encoding: [0x67,0x34,0x40,0x4d]
+; CHECK: ld3.h	{ v7, v8, v9 }[2], [x3] ; encoding: [0x67,0x70,0x40,0x0d]
+; CHECK: ld3.s	{ v7, v8, v9 }[2], [x3] ; encoding: [0x67,0xa0,0x40,0x4d]
+; CHECK: ld3.d	{ v7, v8, v9 }[1], [x3] ; encoding: [0x67,0xa4,0x40,0x4d]
+; CHECK: ld3.b	{ v7, v8, v9 }[13], [x3], x5 ; encoding: [0x67,0x34,0xc5,0x4d]
+; CHECK: ld3.h	{ v7, v8, v9 }[2], [x3], x5 ; encoding: [0x67,0x70,0xc5,0x0d]
+; CHECK: ld3.s	{ v7, v8, v9 }[2], [x3], x5 ; encoding: [0x67,0xa0,0xc5,0x4d]
+; CHECK: ld3.d	{ v7, v8, v9 }[1], [x3], x5 ; encoding: [0x67,0xa4,0xc5,0x4d]
+; CHECK: ld3.b	{ v7, v8, v9 }[13], [x3], #3 ; encoding: [0x67,0x34,0xdf,0x4d]
+; CHECK: ld3.h	{ v7, v8, v9 }[2], [x3], #6 ; encoding: [0x67,0x70,0xdf,0x0d]
+; CHECK: ld3.s	{ v7, v8, v9 }[2], [x3], #12 ; encoding: [0x67,0xa0,0xdf,0x4d]
+; CHECK: ld3.d	{ v7, v8, v9 }[1], [x3], #24 ; encoding: [0x67,0xa4,0xdf,0x4d]
+; CHECK: ld4.b	{ v7, v8, v9, v10 }[13], [x3] ; encoding: [0x67,0x34,0x60,0x4d]
+; CHECK: ld4.h	{ v7, v8, v9, v10 }[2], [x3] ; encoding: [0x67,0x70,0x60,0x0d]
+; CHECK: ld4.s	{ v7, v8, v9, v10 }[2], [x3] ; encoding: [0x67,0xa0,0x60,0x4d]
+; CHECK: ld4.d	{ v7, v8, v9, v10 }[1], [x3] ; encoding: [0x67,0xa4,0x60,0x4d]
+; CHECK: ld4.b	{ v7, v8, v9, v10 }[13], [x3], x5 ; encoding: [0x67,0x34,0xe5,0x4d]
+; CHECK: ld4.h	{ v7, v8, v9, v10 }[2], [x3], x5 ; encoding: [0x67,0x70,0xe5,0x0d]
+; CHECK: ld4.s	{ v7, v8, v9, v10 }[2], [x3], x5 ; encoding: [0x67,0xa0,0xe5,0x4d]
+; CHECK: ld4.d	{ v7, v8, v9, v10 }[1], [x3], x5 ; encoding: [0x67,0xa4,0xe5,0x4d]
+; CHECK: ld4.b	{ v7, v8, v9, v10 }[13], [x3], #4 ; encoding: [0x67,0x34,0xff,0x4d]
+; CHECK: ld4.h	{ v7, v8, v9, v10 }[2], [x3], #8 ; encoding: [0x67,0x70,0xff,0x0d]
+; CHECK: ld4.s	{ v7, v8, v9, v10 }[2], [x3], #16 ; encoding: [0x67,0xa0,0xff,0x4d]
+; CHECK: ld4.d	{ v7, v8, v9, v10 }[1], [x3], #32 ; encoding: [0x67,0xa4,0xff,0x4d]
+; CHECK: st1.b	{ v6 }[13], [x3]        ; encoding: [0x66,0x14,0x00,0x4d]
+; CHECK: st1.h	{ v6 }[2], [x3]         ; encoding: [0x66,0x50,0x00,0x0d]
+; CHECK: st1.s	{ v6 }[2], [x3]         ; encoding: [0x66,0x80,0x00,0x4d]
+; CHECK: st1.d	{ v6 }[1], [x3]         ; encoding: [0x66,0x84,0x00,0x4d]
+; CHECK: st1.b	{ v6 }[13], [x3], x5    ; encoding: [0x66,0x14,0x85,0x4d]
+; CHECK: st1.h	{ v6 }[2], [x3], x5     ; encoding: [0x66,0x50,0x85,0x0d]
+; CHECK: st1.s	{ v6 }[2], [x3], x5     ; encoding: [0x66,0x80,0x85,0x4d]
+; CHECK: st1.d	{ v6 }[1], [x3], x5     ; encoding: [0x66,0x84,0x85,0x4d]
+; CHECK: st1.b	{ v6 }[13], [x3], #1   ; encoding: [0x66,0x14,0x9f,0x4d]
+; CHECK: st1.h	{ v6 }[2], [x3], #2    ; encoding: [0x66,0x50,0x9f,0x0d]
+; CHECK: st1.s	{ v6 }[2], [x3], #4    ; encoding: [0x66,0x80,0x9f,0x4d]
+; CHECK: st1.d	{ v6 }[1], [x3], #8    ; encoding: [0x66,0x84,0x9f,0x4d]
+; CHECK: st2.b	{ v5, v6 }[13], [x3]    ; encoding: [0x65,0x14,0x20,0x4d]
+; CHECK: st2.h	{ v5, v6 }[2], [x3]     ; encoding: [0x65,0x50,0x20,0x0d]
+; CHECK: st2.s	{ v5, v6 }[2], [x3]     ; encoding: [0x65,0x80,0x20,0x4d]
+; CHECK: st2.d	{ v5, v6 }[1], [x3]     ; encoding: [0x65,0x84,0x20,0x4d]
+; CHECK: st2.b	{ v5, v6 }[13], [x3], x5 ; encoding: [0x65,0x14,0xa5,0x4d]
+; CHECK: st2.h	{ v5, v6 }[2], [x3], x5 ; encoding: [0x65,0x50,0xa5,0x0d]
+; CHECK: st2.s	{ v5, v6 }[2], [x3], x5 ; encoding: [0x65,0x80,0xa5,0x4d]
+; CHECK: st2.d	{ v5, v6 }[1], [x3], x5 ; encoding: [0x65,0x84,0xa5,0x4d]
+; CHECK: st2.b	{ v5, v6 }[13], [x3], #2 ; encoding: [0x65,0x14,0xbf,0x4d]
+; CHECK: st2.h	{ v5, v6 }[2], [x3], #4 ; encoding: [0x65,0x50,0xbf,0x0d]
+; CHECK: st2.s	{ v5, v6 }[2], [x3], #8 ; encoding: [0x65,0x80,0xbf,0x4d]
+; CHECK: st2.d	{ v5, v6 }[1], [x3], #16 ; encoding: [0x65,0x84,0xbf,0x4d]
+; CHECK: st3.b	{ v7, v8, v9 }[13], [x3] ; encoding: [0x67,0x34,0x00,0x4d]
+; CHECK: st3.h	{ v7, v8, v9 }[2], [x3] ; encoding: [0x67,0x70,0x00,0x0d]
+; CHECK: st3.s	{ v7, v8, v9 }[2], [x3] ; encoding: [0x67,0xa0,0x00,0x4d]
+; CHECK: st3.d	{ v7, v8, v9 }[1], [x3] ; encoding: [0x67,0xa4,0x00,0x4d]
+; CHECK: st3.b	{ v7, v8, v9 }[13], [x3], x5 ; encoding: [0x67,0x34,0x85,0x4d]
+; CHECK: st3.h	{ v7, v8, v9 }[2], [x3], x5 ; encoding: [0x67,0x70,0x85,0x0d]
+; CHECK: st3.s	{ v7, v8, v9 }[2], [x3], x5 ; encoding: [0x67,0xa0,0x85,0x4d]
+; CHECK: st3.d	{ v7, v8, v9 }[1], [x3], x5 ; encoding: [0x67,0xa4,0x85,0x4d]
+; CHECK: st3.b	{ v7, v8, v9 }[13], [x3], #3 ; encoding: [0x67,0x34,0x9f,0x4d]
+; CHECK: st3.h	{ v7, v8, v9 }[2], [x3], #6 ; encoding: [0x67,0x70,0x9f,0x0d]
+; CHECK: st3.s	{ v7, v8, v9 }[2], [x3], #12 ; encoding: [0x67,0xa0,0x9f,0x4d]
+; CHECK: st3.d	{ v7, v8, v9 }[1], [x3], #24 ; encoding: [0x67,0xa4,0x9f,0x4d]
+; CHECK: st4.b	{ v7, v8, v9, v10 }[13], [x3] ; encoding: [0x67,0x34,0x20,0x4d]
+; CHECK: st4.h	{ v7, v8, v9, v10 }[2], [x3] ; encoding: [0x67,0x70,0x20,0x0d]
+; CHECK: st4.s	{ v7, v8, v9, v10 }[2], [x3] ; encoding: [0x67,0xa0,0x20,0x4d]
+; CHECK: st4.d	{ v7, v8, v9, v10 }[1], [x3] ; encoding: [0x67,0xa4,0x20,0x4d]
+; CHECK: st4.b	{ v7, v8, v9, v10 }[13], [x3], x5 ; encoding: [0x67,0x34,0xa5,0x4d]
+; CHECK: st4.h	{ v7, v8, v9, v10 }[2], [x3], x5 ; encoding: [0x67,0x70,0xa5,0x0d]
+; CHECK: st4.s	{ v7, v8, v9, v10 }[2], [x3], x5 ; encoding: [0x67,0xa0,0xa5,0x4d]
+; CHECK: st4.d	{ v7, v8, v9, v10 }[1], [x3], x5 ; encoding: [0x67,0xa4,0xa5,0x4d]
+; CHECK: st4.b	{ v7, v8, v9, v10 }[13], [x3], #4 ; encoding: [0x67,0x34,0xbf,0x4d]
+; CHECK: st4.h	{ v7, v8, v9, v10 }[2], [x3], #8 ; encoding: [0x67,0x70,0xbf,0x0d]
+; CHECK: st4.s	{ v7, v8, v9, v10 }[2], [x3], #16 ; encoding: [0x67,0xa0,0xbf,0x4d]
+; CHECK: st4.d	{ v7, v8, v9, v10 }[1], [x3], #32 ; encoding: [0x67,0xa4,0xbf,0x4d]
diff --git a/test/MC/ARM64/small-data-fixups.s b/test/MC/AArch64/arm64-small-data-fixups.s
index 3fe7c75..3fe7c75 100644
--- a/test/MC/ARM64/small-data-fixups.s
+++ b/test/MC/AArch64/arm64-small-data-fixups.s
diff --git a/test/MC/AArch64/arm64-spsel-sysreg.s b/test/MC/AArch64/arm64-spsel-sysreg.s
new file mode 100644
index 0000000..f1d94d8
--- /dev/null
+++ b/test/MC/AArch64/arm64-spsel-sysreg.s
@@ -0,0 +1,24 @@
+// RUN: not llvm-mc -triple arm64 -show-encoding < %s 2>%t | FileCheck %s
+// RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
+
+msr SPSel, #0
+msr SPSel, x0
+msr DAIFSet, #0
+msr ESR_EL1, x0
+mrs x0, SPSel
+mrs x0, ESR_EL1
+
+// CHECK: msr SPSEL, #0               // encoding: [0xbf,0x40,0x00,0xd5]
+// CHECK: msr SPSEL, x0               // encoding: [0x00,0x42,0x18,0xd5]
+// CHECK: msr DAIFSET, #0             // encoding: [0xdf,0x40,0x03,0xd5]
+// CHECK: msr ESR_EL1, x0             // encoding: [0x00,0x52,0x18,0xd5]
+// CHECK: mrs x0, SPSEL               // encoding: [0x00,0x42,0x38,0xd5]
+// CHECK: mrs x0, ESR_EL1             // encoding: [0x00,0x52,0x38,0xd5]
+
+
+msr DAIFSet, x0
+msr ESR_EL1, #0
+mrs x0, DAIFSet
+// CHECK-ERRORS: error: immediate must be an integer in range [0, 15]
+// CHECK-ERRORS: error: invalid operand for instruction
+// CHECK-ERRORS: error: expected readable system register
diff --git a/test/MC/AArch64/arm64-system-encoding.s b/test/MC/AArch64/arm64-system-encoding.s
new file mode 100644
index 0000000..9246608
--- /dev/null
+++ b/test/MC/AArch64/arm64-system-encoding.s
@@ -0,0 +1,623 @@
+; RUN: not llvm-mc -triple arm64-apple-darwin -show-encoding < %s 2> %t | FileCheck %s
+; RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
+
+foo:
+
+;-----------------------------------------------------------------------------
+; Simple encodings (instuctions w/ no operands)
+;-----------------------------------------------------------------------------
+
+  nop
+  sev
+  sevl
+  wfe
+  wfi
+  yield
+
+; CHECK: nop                             ; encoding: [0x1f,0x20,0x03,0xd5]
+; CHECK: sev                             ; encoding: [0x9f,0x20,0x03,0xd5]
+; CHECK: sevl                            ; encoding: [0xbf,0x20,0x03,0xd5]
+; CHECK: wfe                             ; encoding: [0x5f,0x20,0x03,0xd5]
+; CHECK: wfi                             ; encoding: [0x7f,0x20,0x03,0xd5]
+; CHECK: yield                           ; encoding: [0x3f,0x20,0x03,0xd5]
+
+;-----------------------------------------------------------------------------
+; Single-immediate operand instructions
+;-----------------------------------------------------------------------------
+
+  clrex #10
+; CHECK: clrex #10  ; encoding: [0x5f,0x3a,0x03,0xd5]
+  isb #15
+  isb sy
+; CHECK: isb     ; encoding: [0xdf,0x3f,0x03,0xd5]
+; CHECK: isb     ; encoding: [0xdf,0x3f,0x03,0xd5]
+  dmb #3
+  dmb osh
+; CHECK: dmb osh    ; encoding: [0xbf,0x33,0x03,0xd5]
+; CHECK: dmb osh    ; encoding: [0xbf,0x33,0x03,0xd5]
+  dsb #7
+  dsb nsh
+; CHECK: dsb nsh    ; encoding: [0x9f,0x37,0x03,0xd5]
+; CHECK: dsb nsh    ; encoding: [0x9f,0x37,0x03,0xd5]
+
+;-----------------------------------------------------------------------------
+; Generic system instructions
+;-----------------------------------------------------------------------------
+  sys #2, c0, c5, #7
+; CHECK: encoding: [0xff,0x05,0x0a,0xd5]
+  sys #7, C6, c10, #7, x7
+; CHECK: encoding: [0xe7,0x6a,0x0f,0xd5]
+  sysl  x20, #6, c3, C15, #7
+; CHECK: encoding: [0xf4,0x3f,0x2e,0xd5]
+
+; Check for error on invalid 'C' operand value.
+  sys #2, c16, c5, #7
+; CHECK-ERRORS: error: Expected cN operand where 0 <= N <= 15
+
+;-----------------------------------------------------------------------------
+; MSR/MRS instructions
+;-----------------------------------------------------------------------------
+  msr ACTLR_EL1, x3
+  msr ACTLR_EL2, x3
+  msr ACTLR_EL3, x3
+  msr AFSR0_EL1, x3
+  msr AFSR0_EL2, x3
+  msr AFSR0_EL3, x3
+  msr AFSR1_EL1, x3
+  msr AFSR1_EL2, x3
+  msr AFSR1_EL3, x3
+  msr AMAIR_EL1, x3
+  msr AMAIR_EL2, x3
+  msr AMAIR_EL3, x3
+  msr CNTFRQ_EL0, x3
+  msr CNTHCTL_EL2, x3
+  msr CNTHP_CTL_EL2, x3
+  msr CNTHP_CVAL_EL2, x3
+  msr CNTHP_TVAL_EL2, x3
+  msr CNTKCTL_EL1, x3
+  msr CNTP_CTL_EL0, x3
+  msr CNTP_CVAL_EL0, x3
+  msr CNTP_TVAL_EL0, x3
+  msr CNTVOFF_EL2, x3
+  msr CNTV_CTL_EL0, x3
+  msr CNTV_CVAL_EL0, x3
+  msr CNTV_TVAL_EL0, x3
+  msr CONTEXTIDR_EL1, x3
+  msr CPACR_EL1, x3
+  msr CPTR_EL2, x3
+  msr CPTR_EL3, x3
+  msr CSSELR_EL1, x3
+  msr CURRENTEL, x3
+  msr DACR32_EL2, x3
+  msr ESR_EL1, x3
+  msr ESR_EL2, x3
+  msr ESR_EL3, x3
+  msr FAR_EL1, x3
+  msr FAR_EL2, x3
+  msr FAR_EL3, x3
+  msr FPEXC32_EL2, x3
+  msr HACR_EL2, x3
+  msr HCR_EL2, x3
+  msr HPFAR_EL2, x3
+  msr HSTR_EL2, x3
+  msr IFSR32_EL2, x3
+  msr MAIR_EL1, x3
+  msr MAIR_EL2, x3
+  msr MAIR_EL3, x3
+  msr MDCR_EL2, x3
+  msr MDCR_EL3, x3
+  msr PAR_EL1, x3
+  msr SCR_EL3, x3
+  msr SCTLR_EL1, x3
+  msr SCTLR_EL2, x3
+  msr SCTLR_EL3, x3
+  msr SDER32_EL3, x3
+  msr TCR_EL1, x3
+  msr TCR_EL2, x3
+  msr TCR_EL3, x3
+  msr TEECR32_EL1, x3
+  msr TEEHBR32_EL1, x3
+  msr TPIDRRO_EL0, x3
+  msr TPIDR_EL0, x3
+  msr TPIDR_EL1, x3
+  msr TPIDR_EL2, x3
+  msr TPIDR_EL3, x3
+  msr TTBR0_EL1, x3
+  msr TTBR0_EL2, x3
+  msr TTBR0_EL3, x3
+  msr TTBR1_EL1, x3
+  msr VBAR_EL1, x3
+  msr VBAR_EL2, x3
+  msr VBAR_EL3, x3
+  msr VMPIDR_EL2, x3
+  msr VPIDR_EL2, x3
+  msr VTCR_EL2, x3
+  msr VTTBR_EL2, x3
+  msr SPSel, x3
+  msr S3_2_C11_C6_4, x1
+; CHECK: msr ACTLR_EL1, x3              ; encoding: [0x23,0x10,0x18,0xd5]
+; CHECK: msr ACTLR_EL2, x3              ; encoding: [0x23,0x10,0x1c,0xd5]
+; CHECK: msr ACTLR_EL3, x3              ; encoding: [0x23,0x10,0x1e,0xd5]
+; CHECK: msr AFSR0_EL1, x3              ; encoding: [0x03,0x51,0x18,0xd5]
+; CHECK: msr AFSR0_EL2, x3              ; encoding: [0x03,0x51,0x1c,0xd5]
+; CHECK: msr AFSR0_EL3, x3              ; encoding: [0x03,0x51,0x1e,0xd5]
+; CHECK: msr AFSR1_EL1, x3              ; encoding: [0x23,0x51,0x18,0xd5]
+; CHECK: msr AFSR1_EL2, x3              ; encoding: [0x23,0x51,0x1c,0xd5]
+; CHECK: msr AFSR1_EL3, x3              ; encoding: [0x23,0x51,0x1e,0xd5]
+; CHECK: msr AMAIR_EL1, x3              ; encoding: [0x03,0xa3,0x18,0xd5]
+; CHECK: msr AMAIR_EL2, x3              ; encoding: [0x03,0xa3,0x1c,0xd5]
+; CHECK: msr AMAIR_EL3, x3              ; encoding: [0x03,0xa3,0x1e,0xd5]
+; CHECK: msr CNTFRQ_EL0, x3             ; encoding: [0x03,0xe0,0x1b,0xd5]
+; CHECK: msr CNTHCTL_EL2, x3            ; encoding: [0x03,0xe1,0x1c,0xd5]
+; CHECK: msr CNTHP_CTL_EL2, x3          ; encoding: [0x23,0xe2,0x1c,0xd5]
+; CHECK: msr CNTHP_CVAL_EL2, x3         ; encoding: [0x43,0xe2,0x1c,0xd5]
+; CHECK: msr CNTHP_TVAL_EL2, x3         ; encoding: [0x03,0xe2,0x1c,0xd5]
+; CHECK: msr CNTKCTL_EL1, x3            ; encoding: [0x03,0xe1,0x18,0xd5]
+; CHECK: msr CNTP_CTL_EL0, x3           ; encoding: [0x23,0xe2,0x1b,0xd5]
+; CHECK: msr CNTP_CVAL_EL0, x3          ; encoding: [0x43,0xe2,0x1b,0xd5]
+; CHECK: msr CNTP_TVAL_EL0, x3          ; encoding: [0x03,0xe2,0x1b,0xd5]
+; CHECK: msr CNTVOFF_EL2, x3            ; encoding: [0x63,0xe0,0x1c,0xd5]
+; CHECK: msr CNTV_CTL_EL0, x3           ; encoding: [0x23,0xe3,0x1b,0xd5]
+; CHECK: msr CNTV_CVAL_EL0, x3          ; encoding: [0x43,0xe3,0x1b,0xd5]
+; CHECK: msr CNTV_TVAL_EL0, x3          ; encoding: [0x03,0xe3,0x1b,0xd5]
+; CHECK: msr CONTEXTIDR_EL1, x3         ; encoding: [0x23,0xd0,0x18,0xd5]
+; CHECK: msr CPACR_EL1, x3              ; encoding: [0x43,0x10,0x18,0xd5]
+; CHECK: msr CPTR_EL2, x3               ; encoding: [0x43,0x11,0x1c,0xd5]
+; CHECK: msr CPTR_EL3, x3               ; encoding: [0x43,0x11,0x1e,0xd5]
+; CHECK: msr CSSELR_EL1, x3             ; encoding: [0x03,0x00,0x1a,0xd5]
+; CHECK: msr CURRENTEL, x3              ; encoding: [0x43,0x42,0x18,0xd5]
+; CHECK: msr DACR32_EL2, x3             ; encoding: [0x03,0x30,0x1c,0xd5]
+; CHECK: msr ESR_EL1, x3                ; encoding: [0x03,0x52,0x18,0xd5]
+; CHECK: msr ESR_EL2, x3                ; encoding: [0x03,0x52,0x1c,0xd5]
+; CHECK: msr ESR_EL3, x3                ; encoding: [0x03,0x52,0x1e,0xd5]
+; CHECK: msr FAR_EL1, x3                ; encoding: [0x03,0x60,0x18,0xd5]
+; CHECK: msr FAR_EL2, x3                ; encoding: [0x03,0x60,0x1c,0xd5]
+; CHECK: msr FAR_EL3, x3                ; encoding: [0x03,0x60,0x1e,0xd5]
+; CHECK: msr FPEXC32_EL2, x3            ; encoding: [0x03,0x53,0x1c,0xd5]
+; CHECK: msr HACR_EL2, x3               ; encoding: [0xe3,0x11,0x1c,0xd5]
+; CHECK: msr HCR_EL2, x3                ; encoding: [0x03,0x11,0x1c,0xd5]
+; CHECK: msr HPFAR_EL2, x3              ; encoding: [0x83,0x60,0x1c,0xd5]
+; CHECK: msr HSTR_EL2, x3               ; encoding: [0x63,0x11,0x1c,0xd5]
+; CHECK: msr IFSR32_EL2, x3             ; encoding: [0x23,0x50,0x1c,0xd5]
+; CHECK: msr MAIR_EL1, x3               ; encoding: [0x03,0xa2,0x18,0xd5]
+; CHECK: msr MAIR_EL2, x3               ; encoding: [0x03,0xa2,0x1c,0xd5]
+; CHECK: msr MAIR_EL3, x3               ; encoding: [0x03,0xa2,0x1e,0xd5]
+; CHECK: msr MDCR_EL2, x3               ; encoding: [0x23,0x11,0x1c,0xd5]
+; CHECK: msr MDCR_EL3, x3               ; encoding: [0x23,0x13,0x1e,0xd5]
+; CHECK: msr PAR_EL1, x3                ; encoding: [0x03,0x74,0x18,0xd5]
+; CHECK: msr SCR_EL3, x3                ; encoding: [0x03,0x11,0x1e,0xd5]
+; CHECK: msr SCTLR_EL1, x3              ; encoding: [0x03,0x10,0x18,0xd5]
+; CHECK: msr SCTLR_EL2, x3              ; encoding: [0x03,0x10,0x1c,0xd5]
+; CHECK: msr SCTLR_EL3, x3              ; encoding: [0x03,0x10,0x1e,0xd5]
+; CHECK: msr SDER32_EL3, x3             ; encoding: [0x23,0x11,0x1e,0xd5]
+; CHECK: msr TCR_EL1, x3                ; encoding: [0x43,0x20,0x18,0xd5]
+; CHECK: msr TCR_EL2, x3                ; encoding: [0x43,0x20,0x1c,0xd5]
+; CHECK: msr TCR_EL3, x3                ; encoding: [0x43,0x20,0x1e,0xd5]
+; CHECK: msr TEECR32_EL1, x3            ; encoding: [0x03,0x00,0x12,0xd5]
+; CHECK: msr TEEHBR32_EL1, x3           ; encoding: [0x03,0x10,0x12,0xd5]
+; CHECK: msr TPIDRRO_EL0, x3            ; encoding: [0x63,0xd0,0x1b,0xd5]
+; CHECK: msr TPIDR_EL0, x3              ; encoding: [0x43,0xd0,0x1b,0xd5]
+; CHECK: msr TPIDR_EL1, x3              ; encoding: [0x83,0xd0,0x18,0xd5]
+; CHECK: msr TPIDR_EL2, x3              ; encoding: [0x43,0xd0,0x1c,0xd5]
+; CHECK: msr TPIDR_EL3, x3              ; encoding: [0x43,0xd0,0x1e,0xd5]
+; CHECK: msr TTBR0_EL1, x3              ; encoding: [0x03,0x20,0x18,0xd5]
+; CHECK: msr TTBR0_EL2, x3              ; encoding: [0x03,0x20,0x1c,0xd5]
+; CHECK: msr TTBR0_EL3, x3              ; encoding: [0x03,0x20,0x1e,0xd5]
+; CHECK: msr TTBR1_EL1, x3              ; encoding: [0x23,0x20,0x18,0xd5]
+; CHECK: msr VBAR_EL1, x3               ; encoding: [0x03,0xc0,0x18,0xd5]
+; CHECK: msr VBAR_EL2, x3               ; encoding: [0x03,0xc0,0x1c,0xd5]
+; CHECK: msr VBAR_EL3, x3               ; encoding: [0x03,0xc0,0x1e,0xd5]
+; CHECK: msr VMPIDR_EL2, x3             ; encoding: [0xa3,0x00,0x1c,0xd5]
+; CHECK: msr VPIDR_EL2, x3              ; encoding: [0x03,0x00,0x1c,0xd5]
+; CHECK: msr VTCR_EL2, x3               ; encoding: [0x43,0x21,0x1c,0xd5]
+; CHECK: msr VTTBR_EL2, x3              ; encoding: [0x03,0x21,0x1c,0xd5]
+; CHECK: msr  SPSEL, x3                 ; encoding: [0x03,0x42,0x18,0xd5]
+; CHECK: msr  S3_2_C11_C6_4, x1         ; encoding: [0x81,0xb6,0x1a,0xd5]
+
+  mrs x3, ACTLR_EL1
+  mrs x3, ACTLR_EL2
+  mrs x3, ACTLR_EL3
+  mrs x3, AFSR0_EL1
+  mrs x3, AFSR0_EL2
+  mrs x3, AFSR0_EL3
+  mrs x3, AIDR_EL1
+  mrs x3, AFSR1_EL1
+  mrs x3, AFSR1_EL2
+  mrs x3, AFSR1_EL3
+  mrs x3, AMAIR_EL1
+  mrs x3, AMAIR_EL2
+  mrs x3, AMAIR_EL3
+  mrs x3, CCSIDR_EL1
+  mrs x3, CLIDR_EL1
+  mrs x3, CNTFRQ_EL0
+  mrs x3, CNTHCTL_EL2
+  mrs x3, CNTHP_CTL_EL2
+  mrs x3, CNTHP_CVAL_EL2
+  mrs x3, CNTHP_TVAL_EL2
+  mrs x3, CNTKCTL_EL1
+  mrs x3, CNTPCT_EL0
+  mrs x3, CNTP_CTL_EL0
+  mrs x3, CNTP_CVAL_EL0
+  mrs x3, CNTP_TVAL_EL0
+  mrs x3, CNTVCT_EL0
+  mrs x3, CNTVOFF_EL2
+  mrs x3, CNTV_CTL_EL0
+  mrs x3, CNTV_CVAL_EL0
+  mrs x3, CNTV_TVAL_EL0
+  mrs x3, CONTEXTIDR_EL1
+  mrs x3, CPACR_EL1
+  mrs x3, CPTR_EL2
+  mrs x3, CPTR_EL3
+  mrs x3, CSSELR_EL1
+  mrs x3, CTR_EL0
+  mrs x3, CURRENTEL
+  mrs x3, DACR32_EL2
+  mrs x3, DCZID_EL0
+  mrs x3, REVIDR_EL1
+  mrs x3, ESR_EL1
+  mrs x3, ESR_EL2
+  mrs x3, ESR_EL3
+  mrs x3, FAR_EL1
+  mrs x3, FAR_EL2
+  mrs x3, FAR_EL3
+  mrs x3, FPEXC32_EL2
+  mrs x3, HACR_EL2
+  mrs x3, HCR_EL2
+  mrs x3, HPFAR_EL2
+  mrs x3, HSTR_EL2
+  mrs x3, ID_AA64DFR0_EL1
+  mrs x3, ID_AA64DFR1_EL1
+  mrs x3, ID_AA64ISAR0_EL1
+  mrs x3, ID_AA64ISAR1_EL1
+  mrs x3, ID_AA64MMFR0_EL1
+  mrs x3, ID_AA64MMFR1_EL1
+  mrs x3, ID_AA64PFR0_EL1
+  mrs x3, ID_AA64PFR1_EL1
+  mrs x3, IFSR32_EL2
+  mrs x3, ISR_EL1
+  mrs x3, MAIR_EL1
+  mrs x3, MAIR_EL2
+  mrs x3, MAIR_EL3
+  mrs x3, MDCR_EL2
+  mrs x3, MDCR_EL3
+  mrs x3, MIDR_EL1
+  mrs x3, MPIDR_EL1
+  mrs x3, MVFR0_EL1
+  mrs x3, MVFR1_EL1
+  mrs x3, PAR_EL1
+  mrs x3, RVBAR_EL1
+  mrs x3, RVBAR_EL2
+  mrs x3, RVBAR_EL3
+  mrs x3, SCR_EL3
+  mrs x3, SCTLR_EL1
+  mrs x3, SCTLR_EL2
+  mrs x3, SCTLR_EL3
+  mrs x3, SDER32_EL3
+  mrs x3, TCR_EL1
+  mrs x3, TCR_EL2
+  mrs x3, TCR_EL3
+  mrs x3, TEECR32_EL1
+  mrs x3, TEEHBR32_EL1
+  mrs x3, TPIDRRO_EL0
+  mrs x3, TPIDR_EL0
+  mrs x3, TPIDR_EL1
+  mrs x3, TPIDR_EL2
+  mrs x3, TPIDR_EL3
+  mrs x3, TTBR0_EL1
+  mrs x3, TTBR0_EL2
+  mrs x3, TTBR0_EL3
+  mrs x3, TTBR1_EL1
+  mrs x3, VBAR_EL1
+  mrs x3, VBAR_EL2
+  mrs x3, VBAR_EL3
+  mrs x3, VMPIDR_EL2
+  mrs x3, VPIDR_EL2
+  mrs x3, VTCR_EL2
+  mrs x3, VTTBR_EL2
+
+  mrs x3, MDCCSR_EL0
+  mrs x3, MDCCINT_EL1
+  mrs x3, DBGDTR_EL0
+  mrs x3, DBGDTRRX_EL0
+  mrs x3, DBGVCR32_EL2
+  mrs x3, OSDTRRX_EL1
+  mrs x3, MDSCR_EL1
+  mrs x3, OSDTRTX_EL1
+  mrs x3, OSECCR_EL1
+  mrs x3, DBGBVR0_EL1
+  mrs x3, DBGBVR1_EL1
+  mrs x3, DBGBVR2_EL1
+  mrs x3, DBGBVR3_EL1
+  mrs x3, DBGBVR4_EL1
+  mrs x3, DBGBVR5_EL1
+  mrs x3, DBGBVR6_EL1
+  mrs x3, DBGBVR7_EL1
+  mrs x3, DBGBVR8_EL1
+  mrs x3, DBGBVR9_EL1
+  mrs x3, DBGBVR10_EL1
+  mrs x3, DBGBVR11_EL1
+  mrs x3, DBGBVR12_EL1
+  mrs x3, DBGBVR13_EL1
+  mrs x3, DBGBVR14_EL1
+  mrs x3, DBGBVR15_EL1
+  mrs x3, DBGBCR0_EL1
+  mrs x3, DBGBCR1_EL1
+  mrs x3, DBGBCR2_EL1
+  mrs x3, DBGBCR3_EL1
+  mrs x3, DBGBCR4_EL1
+  mrs x3, DBGBCR5_EL1
+  mrs x3, DBGBCR6_EL1
+  mrs x3, DBGBCR7_EL1
+  mrs x3, DBGBCR8_EL1
+  mrs x3, DBGBCR9_EL1
+  mrs x3, DBGBCR10_EL1
+  mrs x3, DBGBCR11_EL1
+  mrs x3, DBGBCR12_EL1
+  mrs x3, DBGBCR13_EL1
+  mrs x3, DBGBCR14_EL1
+  mrs x3, DBGBCR15_EL1
+  mrs x3, DBGWVR0_EL1
+  mrs x3, DBGWVR1_EL1
+  mrs x3, DBGWVR2_EL1
+  mrs x3, DBGWVR3_EL1
+  mrs x3, DBGWVR4_EL1
+  mrs x3, DBGWVR5_EL1
+  mrs x3, DBGWVR6_EL1
+  mrs x3, DBGWVR7_EL1
+  mrs x3, DBGWVR8_EL1
+  mrs x3, DBGWVR9_EL1
+  mrs x3, DBGWVR10_EL1
+  mrs x3, DBGWVR11_EL1
+  mrs x3, DBGWVR12_EL1
+  mrs x3, DBGWVR13_EL1
+  mrs x3, DBGWVR14_EL1
+  mrs x3, DBGWVR15_EL1
+  mrs x3, DBGWCR0_EL1
+  mrs x3, DBGWCR1_EL1
+  mrs x3, DBGWCR2_EL1
+  mrs x3, DBGWCR3_EL1
+  mrs x3, DBGWCR4_EL1
+  mrs x3, DBGWCR5_EL1
+  mrs x3, DBGWCR6_EL1
+  mrs x3, DBGWCR7_EL1
+  mrs x3, DBGWCR8_EL1
+  mrs x3, DBGWCR9_EL1
+  mrs x3, DBGWCR10_EL1
+  mrs x3, DBGWCR11_EL1
+  mrs x3, DBGWCR12_EL1
+  mrs x3, DBGWCR13_EL1
+  mrs x3, DBGWCR14_EL1
+  mrs x3, DBGWCR15_EL1
+  mrs x3, MDRAR_EL1
+  mrs x3, OSLSR_EL1
+  mrs x3, OSDLR_EL1
+  mrs x3, DBGPRCR_EL1
+  mrs x3, DBGCLAIMSET_EL1
+  mrs x3, DBGCLAIMCLR_EL1
+  mrs x3, DBGAUTHSTATUS_EL1
+  mrs x1, S3_2_C15_C6_4
+  mrs x3, s3_3_c11_c1_4
+  mrs x3, S3_3_c11_c1_4
+
+; CHECK: mrs x3, ACTLR_EL1              ; encoding: [0x23,0x10,0x38,0xd5]
+; CHECK: mrs x3, ACTLR_EL2              ; encoding: [0x23,0x10,0x3c,0xd5]
+; CHECK: mrs x3, ACTLR_EL3              ; encoding: [0x23,0x10,0x3e,0xd5]
+; CHECK: mrs x3, AFSR0_EL1              ; encoding: [0x03,0x51,0x38,0xd5]
+; CHECK: mrs x3, AFSR0_EL2              ; encoding: [0x03,0x51,0x3c,0xd5]
+; CHECK: mrs x3, AFSR0_EL3              ; encoding: [0x03,0x51,0x3e,0xd5]
+; CHECK: mrs x3, AIDR_EL1               ; encoding: [0xe3,0x00,0x39,0xd5]
+; CHECK: mrs x3, AFSR1_EL1              ; encoding: [0x23,0x51,0x38,0xd5]
+; CHECK: mrs x3, AFSR1_EL2              ; encoding: [0x23,0x51,0x3c,0xd5]
+; CHECK: mrs x3, AFSR1_EL3              ; encoding: [0x23,0x51,0x3e,0xd5]
+; CHECK: mrs x3, AMAIR_EL1              ; encoding: [0x03,0xa3,0x38,0xd5]
+; CHECK: mrs x3, AMAIR_EL2              ; encoding: [0x03,0xa3,0x3c,0xd5]
+; CHECK: mrs x3, AMAIR_EL3              ; encoding: [0x03,0xa3,0x3e,0xd5]
+; CHECK: mrs x3, CCSIDR_EL1             ; encoding: [0x03,0x00,0x39,0xd5]
+; CHECK: mrs x3, CLIDR_EL1              ; encoding: [0x23,0x00,0x39,0xd5]
+; CHECK: mrs x3, CNTFRQ_EL0             ; encoding: [0x03,0xe0,0x3b,0xd5]
+; CHECK: mrs x3, CNTHCTL_EL2            ; encoding: [0x03,0xe1,0x3c,0xd5]
+; CHECK: mrs x3, CNTHP_CTL_EL2          ; encoding: [0x23,0xe2,0x3c,0xd5]
+; CHECK: mrs x3, CNTHP_CVAL_EL2         ; encoding: [0x43,0xe2,0x3c,0xd5]
+; CHECK: mrs x3, CNTHP_TVAL_EL2         ; encoding: [0x03,0xe2,0x3c,0xd5]
+; CHECK: mrs x3, CNTKCTL_EL1            ; encoding: [0x03,0xe1,0x38,0xd5]
+; CHECK: mrs x3, CNTPCT_EL0             ; encoding: [0x23,0xe0,0x3b,0xd5]
+; CHECK: mrs x3, CNTP_CTL_EL0           ; encoding: [0x23,0xe2,0x3b,0xd5]
+; CHECK: mrs x3, CNTP_CVAL_EL0          ; encoding: [0x43,0xe2,0x3b,0xd5]
+; CHECK: mrs x3, CNTP_TVAL_EL0          ; encoding: [0x03,0xe2,0x3b,0xd5]
+; CHECK: mrs x3, CNTVCT_EL0             ; encoding: [0x43,0xe0,0x3b,0xd5]
+; CHECK: mrs x3, CNTVOFF_EL2            ; encoding: [0x63,0xe0,0x3c,0xd5]
+; CHECK: mrs x3, CNTV_CTL_EL0           ; encoding: [0x23,0xe3,0x3b,0xd5]
+; CHECK: mrs x3, CNTV_CVAL_EL0          ; encoding: [0x43,0xe3,0x3b,0xd5]
+; CHECK: mrs x3, CNTV_TVAL_EL0          ; encoding: [0x03,0xe3,0x3b,0xd5]
+; CHECK: mrs x3, CONTEXTIDR_EL1         ; encoding: [0x23,0xd0,0x38,0xd5]
+; CHECK: mrs x3, CPACR_EL1              ; encoding: [0x43,0x10,0x38,0xd5]
+; CHECK: mrs x3, CPTR_EL2               ; encoding: [0x43,0x11,0x3c,0xd5]
+; CHECK: mrs x3, CPTR_EL3               ; encoding: [0x43,0x11,0x3e,0xd5]
+; CHECK: mrs x3, CSSELR_EL1             ; encoding: [0x03,0x00,0x3a,0xd5]
+; CHECK: mrs x3, CTR_EL0                ; encoding: [0x23,0x00,0x3b,0xd5]
+; CHECK: mrs x3, CURRENTEL              ; encoding: [0x43,0x42,0x38,0xd5]
+; CHECK: mrs x3, DACR32_EL2             ; encoding: [0x03,0x30,0x3c,0xd5]
+; CHECK: mrs x3, DCZID_EL0              ; encoding: [0xe3,0x00,0x3b,0xd5]
+; CHECK: mrs x3, REVIDR_EL1             ; encoding: [0xc3,0x00,0x38,0xd5]
+; CHECK: mrs x3, ESR_EL1                ; encoding: [0x03,0x52,0x38,0xd5]
+; CHECK: mrs x3, ESR_EL2                ; encoding: [0x03,0x52,0x3c,0xd5]
+; CHECK: mrs x3, ESR_EL3                ; encoding: [0x03,0x52,0x3e,0xd5]
+; CHECK: mrs x3, FAR_EL1                ; encoding: [0x03,0x60,0x38,0xd5]
+; CHECK: mrs x3, FAR_EL2                ; encoding: [0x03,0x60,0x3c,0xd5]
+; CHECK: mrs x3, FAR_EL3                ; encoding: [0x03,0x60,0x3e,0xd5]
+; CHECK: mrs x3, FPEXC32_EL2            ; encoding: [0x03,0x53,0x3c,0xd5]
+; CHECK: mrs x3, HACR_EL2               ; encoding: [0xe3,0x11,0x3c,0xd5]
+; CHECK: mrs x3, HCR_EL2                ; encoding: [0x03,0x11,0x3c,0xd5]
+; CHECK: mrs x3, HPFAR_EL2              ; encoding: [0x83,0x60,0x3c,0xd5]
+; CHECK: mrs x3, HSTR_EL2               ; encoding: [0x63,0x11,0x3c,0xd5]
+; CHECK: mrs x3, ID_AA64DFR0_EL1        ; encoding: [0x03,0x05,0x38,0xd5]
+; CHECK: mrs x3, ID_AA64DFR1_EL1        ; encoding: [0x23,0x05,0x38,0xd5]
+; CHECK: mrs x3, ID_AA64ISAR0_EL1       ; encoding: [0x03,0x06,0x38,0xd5]
+; CHECK: mrs x3, ID_AA64ISAR1_EL1       ; encoding: [0x23,0x06,0x38,0xd5]
+; CHECK: mrs x3, ID_AA64MMFR0_EL1       ; encoding: [0x03,0x07,0x38,0xd5]
+; CHECK: mrs x3, ID_AA64MMFR1_EL1       ; encoding: [0x23,0x07,0x38,0xd5]
+; CHECK: mrs x3, ID_AA64PFR0_EL1        ; encoding: [0x03,0x04,0x38,0xd5]
+; CHECK: mrs x3, ID_AA64PFR1_EL1        ; encoding: [0x23,0x04,0x38,0xd5]
+; CHECK: mrs x3, IFSR32_EL2             ; encoding: [0x23,0x50,0x3c,0xd5]
+; CHECK: mrs x3, ISR_EL1                ; encoding: [0x03,0xc1,0x38,0xd5]
+; CHECK: mrs x3, MAIR_EL1               ; encoding: [0x03,0xa2,0x38,0xd5]
+; CHECK: mrs x3, MAIR_EL2               ; encoding: [0x03,0xa2,0x3c,0xd5]
+; CHECK: mrs x3, MAIR_EL3               ; encoding: [0x03,0xa2,0x3e,0xd5]
+; CHECK: mrs x3, MDCR_EL2               ; encoding: [0x23,0x11,0x3c,0xd5]
+; CHECK: mrs x3, MDCR_EL3               ; encoding: [0x23,0x13,0x3e,0xd5]
+; CHECK: mrs x3, MIDR_EL1               ; encoding: [0x03,0x00,0x38,0xd5]
+; CHECK: mrs x3, MPIDR_EL1              ; encoding: [0xa3,0x00,0x38,0xd5]
+; CHECK: mrs x3, MVFR0_EL1              ; encoding: [0x03,0x03,0x38,0xd5]
+; CHECK: mrs x3, MVFR1_EL1              ; encoding: [0x23,0x03,0x38,0xd5]
+; CHECK: mrs x3, PAR_EL1                ; encoding: [0x03,0x74,0x38,0xd5]
+; CHECK: mrs x3, RVBAR_EL1              ; encoding: [0x23,0xc0,0x38,0xd5]
+; CHECK: mrs x3, RVBAR_EL2              ; encoding: [0x23,0xc0,0x3c,0xd5]
+; CHECK: mrs x3, RVBAR_EL3              ; encoding: [0x23,0xc0,0x3e,0xd5]
+; CHECK: mrs x3, SCR_EL3                ; encoding: [0x03,0x11,0x3e,0xd5]
+; CHECK: mrs x3, SCTLR_EL1              ; encoding: [0x03,0x10,0x38,0xd5]
+; CHECK: mrs x3, SCTLR_EL2              ; encoding: [0x03,0x10,0x3c,0xd5]
+; CHECK: mrs x3, SCTLR_EL3              ; encoding: [0x03,0x10,0x3e,0xd5]
+; CHECK: mrs x3, SDER32_EL3             ; encoding: [0x23,0x11,0x3e,0xd5]
+; CHECK: mrs x3, TCR_EL1                ; encoding: [0x43,0x20,0x38,0xd5]
+; CHECK: mrs x3, TCR_EL2                ; encoding: [0x43,0x20,0x3c,0xd5]
+; CHECK: mrs x3, TCR_EL3                ; encoding: [0x43,0x20,0x3e,0xd5]
+; CHECK: mrs x3, TEECR32_EL1            ; encoding: [0x03,0x00,0x32,0xd5]
+; CHECK: mrs x3, TEEHBR32_EL1           ; encoding: [0x03,0x10,0x32,0xd5]
+; CHECK: mrs x3, TPIDRRO_EL0            ; encoding: [0x63,0xd0,0x3b,0xd5]
+; CHECK: mrs x3, TPIDR_EL0              ; encoding: [0x43,0xd0,0x3b,0xd5]
+; CHECK: mrs x3, TPIDR_EL1              ; encoding: [0x83,0xd0,0x38,0xd5]
+; CHECK: mrs x3, TPIDR_EL2              ; encoding: [0x43,0xd0,0x3c,0xd5]
+; CHECK: mrs x3, TPIDR_EL3              ; encoding: [0x43,0xd0,0x3e,0xd5]
+; CHECK: mrs x3, TTBR0_EL1              ; encoding: [0x03,0x20,0x38,0xd5]
+; CHECK: mrs x3, TTBR0_EL2              ; encoding: [0x03,0x20,0x3c,0xd5]
+; CHECK: mrs x3, TTBR0_EL3              ; encoding: [0x03,0x20,0x3e,0xd5]
+; CHECK: mrs x3, TTBR1_EL1              ; encoding: [0x23,0x20,0x38,0xd5]
+; CHECK: mrs x3, VBAR_EL1               ; encoding: [0x03,0xc0,0x38,0xd5]
+; CHECK: mrs x3, VBAR_EL2               ; encoding: [0x03,0xc0,0x3c,0xd5]
+; CHECK: mrs x3, VBAR_EL3               ; encoding: [0x03,0xc0,0x3e,0xd5]
+; CHECK: mrs x3, VMPIDR_EL2             ; encoding: [0xa3,0x00,0x3c,0xd5]
+; CHECK: mrs x3, VPIDR_EL2              ; encoding: [0x03,0x00,0x3c,0xd5]
+; CHECK: mrs x3, VTCR_EL2               ; encoding: [0x43,0x21,0x3c,0xd5]
+; CHECK: mrs x3, VTTBR_EL2              ; encoding: [0x03,0x21,0x3c,0xd5]
+; CHECK: mrs	x3, MDCCSR_EL0          ; encoding: [0x03,0x01,0x33,0xd5]
+; CHECK: mrs	x3, MDCCINT_EL1         ; encoding: [0x03,0x02,0x30,0xd5]
+; CHECK: mrs	x3, DBGDTR_EL0          ; encoding: [0x03,0x04,0x33,0xd5]
+; CHECK: mrs	x3, DBGDTRRX_EL0        ; encoding: [0x03,0x05,0x33,0xd5]
+; CHECK: mrs	x3, DBGVCR32_EL2        ; encoding: [0x03,0x07,0x34,0xd5]
+; CHECK: mrs	x3, OSDTRRX_EL1         ; encoding: [0x43,0x00,0x30,0xd5]
+; CHECK: mrs	x3, MDSCR_EL1           ; encoding: [0x43,0x02,0x30,0xd5]
+; CHECK: mrs	x3, OSDTRTX_EL1         ; encoding: [0x43,0x03,0x30,0xd5]
+; CHECK: mrs	x3, OSECCR_EL1          ; encoding: [0x43,0x06,0x30,0xd5]
+; CHECK: mrs	x3, DBGBVR0_EL1         ; encoding: [0x83,0x00,0x30,0xd5]
+; CHECK: mrs	x3, DBGBVR1_EL1         ; encoding: [0x83,0x01,0x30,0xd5]
+; CHECK: mrs	x3, DBGBVR2_EL1         ; encoding: [0x83,0x02,0x30,0xd5]
+; CHECK: mrs	x3, DBGBVR3_EL1         ; encoding: [0x83,0x03,0x30,0xd5]
+; CHECK: mrs	x3, DBGBVR4_EL1         ; encoding: [0x83,0x04,0x30,0xd5]
+; CHECK: mrs	x3, DBGBVR5_EL1         ; encoding: [0x83,0x05,0x30,0xd5]
+; CHECK: mrs	x3, DBGBVR6_EL1         ; encoding: [0x83,0x06,0x30,0xd5]
+; CHECK: mrs	x3, DBGBVR7_EL1         ; encoding: [0x83,0x07,0x30,0xd5]
+; CHECK: mrs	x3, DBGBVR8_EL1         ; encoding: [0x83,0x08,0x30,0xd5]
+; CHECK: mrs	x3, DBGBVR9_EL1         ; encoding: [0x83,0x09,0x30,0xd5]
+; CHECK: mrs	x3, DBGBVR10_EL1        ; encoding: [0x83,0x0a,0x30,0xd5]
+; CHECK: mrs	x3, DBGBVR11_EL1        ; encoding: [0x83,0x0b,0x30,0xd5]
+; CHECK: mrs	x3, DBGBVR12_EL1        ; encoding: [0x83,0x0c,0x30,0xd5]
+; CHECK: mrs	x3, DBGBVR13_EL1        ; encoding: [0x83,0x0d,0x30,0xd5]
+; CHECK: mrs	x3, DBGBVR14_EL1        ; encoding: [0x83,0x0e,0x30,0xd5]
+; CHECK: mrs	x3, DBGBVR15_EL1        ; encoding: [0x83,0x0f,0x30,0xd5]
+; CHECK: mrs	x3, DBGBCR0_EL1         ; encoding: [0xa3,0x00,0x30,0xd5]
+; CHECK: mrs	x3, DBGBCR1_EL1         ; encoding: [0xa3,0x01,0x30,0xd5]
+; CHECK: mrs	x3, DBGBCR2_EL1         ; encoding: [0xa3,0x02,0x30,0xd5]
+; CHECK: mrs	x3, DBGBCR3_EL1         ; encoding: [0xa3,0x03,0x30,0xd5]
+; CHECK: mrs	x3, DBGBCR4_EL1         ; encoding: [0xa3,0x04,0x30,0xd5]
+; CHECK: mrs	x3, DBGBCR5_EL1         ; encoding: [0xa3,0x05,0x30,0xd5]
+; CHECK: mrs	x3, DBGBCR6_EL1         ; encoding: [0xa3,0x06,0x30,0xd5]
+; CHECK: mrs	x3, DBGBCR7_EL1         ; encoding: [0xa3,0x07,0x30,0xd5]
+; CHECK: mrs	x3, DBGBCR8_EL1         ; encoding: [0xa3,0x08,0x30,0xd5]
+; CHECK: mrs	x3, DBGBCR9_EL1         ; encoding: [0xa3,0x09,0x30,0xd5]
+; CHECK: mrs	x3, DBGBCR10_EL1        ; encoding: [0xa3,0x0a,0x30,0xd5]
+; CHECK: mrs	x3, DBGBCR11_EL1        ; encoding: [0xa3,0x0b,0x30,0xd5]
+; CHECK: mrs	x3, DBGBCR12_EL1        ; encoding: [0xa3,0x0c,0x30,0xd5]
+; CHECK: mrs	x3, DBGBCR13_EL1        ; encoding: [0xa3,0x0d,0x30,0xd5]
+; CHECK: mrs	x3, DBGBCR14_EL1        ; encoding: [0xa3,0x0e,0x30,0xd5]
+; CHECK: mrs	x3, DBGBCR15_EL1        ; encoding: [0xa3,0x0f,0x30,0xd5]
+; CHECK: mrs	x3, DBGWVR0_EL1         ; encoding: [0xc3,0x00,0x30,0xd5]
+; CHECK: mrs	x3, DBGWVR1_EL1         ; encoding: [0xc3,0x01,0x30,0xd5]
+; CHECK: mrs	x3, DBGWVR2_EL1         ; encoding: [0xc3,0x02,0x30,0xd5]
+; CHECK: mrs	x3, DBGWVR3_EL1         ; encoding: [0xc3,0x03,0x30,0xd5]
+; CHECK: mrs	x3, DBGWVR4_EL1         ; encoding: [0xc3,0x04,0x30,0xd5]
+; CHECK: mrs	x3, DBGWVR5_EL1         ; encoding: [0xc3,0x05,0x30,0xd5]
+; CHECK: mrs	x3, DBGWVR6_EL1         ; encoding: [0xc3,0x06,0x30,0xd5]
+; CHECK: mrs	x3, DBGWVR7_EL1         ; encoding: [0xc3,0x07,0x30,0xd5]
+; CHECK: mrs	x3, DBGWVR8_EL1         ; encoding: [0xc3,0x08,0x30,0xd5]
+; CHECK: mrs	x3, DBGWVR9_EL1         ; encoding: [0xc3,0x09,0x30,0xd5]
+; CHECK: mrs	x3, DBGWVR10_EL1        ; encoding: [0xc3,0x0a,0x30,0xd5]
+; CHECK: mrs	x3, DBGWVR11_EL1        ; encoding: [0xc3,0x0b,0x30,0xd5]
+; CHECK: mrs	x3, DBGWVR12_EL1        ; encoding: [0xc3,0x0c,0x30,0xd5]
+; CHECK: mrs	x3, DBGWVR13_EL1        ; encoding: [0xc3,0x0d,0x30,0xd5]
+; CHECK: mrs	x3, DBGWVR14_EL1        ; encoding: [0xc3,0x0e,0x30,0xd5]
+; CHECK: mrs	x3, DBGWVR15_EL1        ; encoding: [0xc3,0x0f,0x30,0xd5]
+; CHECK: mrs	x3, DBGWCR0_EL1         ; encoding: [0xe3,0x00,0x30,0xd5]
+; CHECK: mrs	x3, DBGWCR1_EL1         ; encoding: [0xe3,0x01,0x30,0xd5]
+; CHECK: mrs	x3, DBGWCR2_EL1         ; encoding: [0xe3,0x02,0x30,0xd5]
+; CHECK: mrs	x3, DBGWCR3_EL1         ; encoding: [0xe3,0x03,0x30,0xd5]
+; CHECK: mrs	x3, DBGWCR4_EL1         ; encoding: [0xe3,0x04,0x30,0xd5]
+; CHECK: mrs	x3, DBGWCR5_EL1         ; encoding: [0xe3,0x05,0x30,0xd5]
+; CHECK: mrs	x3, DBGWCR6_EL1         ; encoding: [0xe3,0x06,0x30,0xd5]
+; CHECK: mrs	x3, DBGWCR7_EL1         ; encoding: [0xe3,0x07,0x30,0xd5]
+; CHECK: mrs	x3, DBGWCR8_EL1         ; encoding: [0xe3,0x08,0x30,0xd5]
+; CHECK: mrs	x3, DBGWCR9_EL1         ; encoding: [0xe3,0x09,0x30,0xd5]
+; CHECK: mrs	x3, DBGWCR10_EL1        ; encoding: [0xe3,0x0a,0x30,0xd5]
+; CHECK: mrs	x3, DBGWCR11_EL1        ; encoding: [0xe3,0x0b,0x30,0xd5]
+; CHECK: mrs	x3, DBGWCR12_EL1        ; encoding: [0xe3,0x0c,0x30,0xd5]
+; CHECK: mrs	x3, DBGWCR13_EL1        ; encoding: [0xe3,0x0d,0x30,0xd5]
+; CHECK: mrs	x3, DBGWCR14_EL1        ; encoding: [0xe3,0x0e,0x30,0xd5]
+; CHECK: mrs	x3, DBGWCR15_EL1        ; encoding: [0xe3,0x0f,0x30,0xd5]
+; CHECK: mrs	x3, MDRAR_EL1           ; encoding: [0x03,0x10,0x30,0xd5]
+; CHECK: mrs	x3, OSLSR_EL1           ; encoding: [0x83,0x11,0x30,0xd5]
+; CHECK: mrs	x3, OSDLR_EL1           ; encoding: [0x83,0x13,0x30,0xd5]
+; CHECK: mrs	x3, DBGPRCR_EL1         ; encoding: [0x83,0x14,0x30,0xd5]
+; CHECK: mrs	x3, DBGCLAIMSET_EL1     ; encoding: [0xc3,0x78,0x30,0xd5]
+; CHECK: mrs	x3, DBGCLAIMCLR_EL1     ; encoding: [0xc3,0x79,0x30,0xd5]
+; CHECK: mrs	x3, DBGAUTHSTATUS_EL1   ; encoding: [0xc3,0x7e,0x30,0xd5]
+; CHECK: mrs    x1, S3_2_C15_C6_4       ; encoding: [0x81,0xf6,0x3a,0xd5]
+; CHECK: mrs	x3, S3_3_C11_C1_4       ; encoding: [0x83,0xb1,0x3b,0xd5]
+; CHECK: mrs	x3, S3_3_C11_C1_4       ; encoding: [0x83,0xb1,0x3b,0xd5]
+
+  msr RMR_EL3, x0
+  msr RMR_EL2, x0
+  msr RMR_EL1, x0
+  msr OSLAR_EL1, x3
+  msr DBGDTRTX_EL0, x3
+        
+; CHECK: msr	RMR_EL3, x0             ; encoding: [0x40,0xc0,0x1e,0xd5]
+; CHECK: msr	RMR_EL2, x0             ; encoding: [0x40,0xc0,0x1c,0xd5]
+; CHECK: msr	RMR_EL1, x0             ; encoding: [0x40,0xc0,0x18,0xd5]
+; CHECK: msr	OSLAR_EL1, x3           ; encoding: [0x83,0x10,0x10,0xd5]
+; CHECK: msr	DBGDTRTX_EL0, x3        ; encoding: [0x03,0x05,0x13,0xd5]
+        
+ mrs x0, ID_PFR0_EL1
+ mrs x0, ID_PFR1_EL1
+ mrs x0, ID_DFR0_EL1
+ mrs x0, ID_AFR0_EL1
+ mrs x0, ID_ISAR0_EL1
+ mrs x0, ID_ISAR1_EL1
+ mrs x0, ID_ISAR2_EL1
+ mrs x0, ID_ISAR3_EL1
+ mrs x0, ID_ISAR4_EL1
+ mrs x0, ID_ISAR5_EL1
+ mrs x0, AFSR1_EL1
+ mrs x0, AFSR0_EL1
+ mrs x0, REVIDR_EL1
+; CHECK: mrs	x0, ID_PFR0_EL1         ; encoding: [0x00,0x01,0x38,0xd5]
+; CHECK: mrs	x0, ID_PFR1_EL1         ; encoding: [0x20,0x01,0x38,0xd5]
+; CHECK: mrs	x0, ID_DFR0_EL1         ; encoding: [0x40,0x01,0x38,0xd5]
+; CHECK: mrs	x0, ID_AFR0_EL1         ; encoding: [0x60,0x01,0x38,0xd5]
+; CHECK: mrs	x0, ID_ISAR0_EL1        ; encoding: [0x00,0x02,0x38,0xd5]
+; CHECK: mrs	x0, ID_ISAR1_EL1        ; encoding: [0x20,0x02,0x38,0xd5]
+; CHECK: mrs	x0, ID_ISAR2_EL1        ; encoding: [0x40,0x02,0x38,0xd5]
+; CHECK: mrs	x0, ID_ISAR3_EL1        ; encoding: [0x60,0x02,0x38,0xd5]
+; CHECK: mrs	x0, ID_ISAR4_EL1        ; encoding: [0x80,0x02,0x38,0xd5]
+; CHECK: mrs	x0, ID_ISAR5_EL1        ; encoding: [0xa0,0x02,0x38,0xd5]
+; CHECK: mrs	x0, AFSR1_EL1           ; encoding: [0x20,0x51,0x38,0xd5]
+; CHECK: mrs	x0, AFSR0_EL1           ; encoding: [0x00,0x51,0x38,0xd5]
+; CHECK: mrs	x0, REVIDR_EL1          ; encoding: [0xc0,0x00,0x38,0xd5]
diff --git a/test/MC/AArch64/arm64-target-specific-sysreg.s b/test/MC/AArch64/arm64-target-specific-sysreg.s
new file mode 100644
index 0000000..05cea3a
--- /dev/null
+++ b/test/MC/AArch64/arm64-target-specific-sysreg.s
@@ -0,0 +1,10 @@
+// RUN: not llvm-mc -triple arm64 -mcpu=generic -show-encoding < %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefix=CHECK-GENERIC
+//
+// RUN: llvm-mc -triple arm64 -mcpu=cyclone -show-encoding < %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefix=CHECK-CYCLONE
+
+msr CPM_IOACC_CTL_EL3, x0
+
+// CHECK-GENERIC: error: expected writable system register or pstate
+// CHECK-CYCLONE: msr CPM_IOACC_CTL_EL3, x0   // encoding: [0x00,0xf2,0x1f,0xd5]
diff --git a/test/MC/AArch64/arm64-tls-modifiers-darwin.s b/test/MC/AArch64/arm64-tls-modifiers-darwin.s
new file mode 100644
index 0000000..8ff07cd
--- /dev/null
+++ b/test/MC/AArch64/arm64-tls-modifiers-darwin.s
@@ -0,0 +1,13 @@
+; RUN: llvm-mc -triple=arm64-apple-ios7.0 %s -o - | FileCheck %s
+; RUN: llvm-mc -triple=arm64-apple-ios7.0 -filetype=obj %s -o - | llvm-objdump -r - | FileCheck %s --check-prefix=CHECK-OBJ
+
+        adrp x2, _var@TLVPPAGE
+        ldr x0, [x15, _var@TLVPPAGEOFF]
+        add x30, x0, _var@TLVPPAGEOFF
+; CHECK: adrp x2, _var@TLVPPAG
+; CHECK: ldr x0, [x15, _var@TLVPPAGEOFF]
+; CHECK: add x30, x0, _var@TLVPPAGEOFF
+
+; CHECK-OBJ: 8 ARM64_RELOC_TLVP_LOAD_PAGEOFF12 _var
+; CHECK-OBJ: 4 ARM64_RELOC_TLVP_LOAD_PAGEOFF12 _var
+; CHECK-OBJ: 0 ARM64_RELOC_TLVP_LOAD_PAGE21 _var
diff --git a/test/MC/AArch64/arm64-tls-relocs.s b/test/MC/AArch64/arm64-tls-relocs.s
new file mode 100644
index 0000000..96c2b55
--- /dev/null
+++ b/test/MC/AArch64/arm64-tls-relocs.s
@@ -0,0 +1,320 @@
+// RUN: llvm-mc -triple=arm64-none-linux-gnu -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj < %s -o - | \
+// RUN:   llvm-readobj -r -t | FileCheck --check-prefix=CHECK-ELF %s
+
+
+////////////////////////////////////////////////////////////////////////////////
+// TLS initial-exec forms
+////////////////////////////////////////////////////////////////////////////////
+
+        movz x15, #:gottprel_g1:var
+// CHECK: movz    x15, #:gottprel_g1:var  // encoding: [0bAAA01111,A,0b101AAAAA,0x92]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel_g1:var, kind: fixup_aarch64_movw
+
+// CHECK-ELF:     {{0x[0-9A-F]+}} R_AARCH64_TLSIE_MOVW_GOTTPREL_G1 [[VARSYM:[^ ]+]]
+
+
+        movk x13, #:gottprel_g0_nc:var
+// CHECK: movk    x13, #:gottprel_g0_nc:var // encoding: [0bAAA01101,A,0b100AAAAA,0xf2]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel_g0_nc:var, kind: fixup_aarch64_movw
+
+
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC [[VARSYM]]
+
+        adrp x11, :gottprel:var
+        ldr x10, [x0, #:gottprel_lo12:var]
+        ldr x9, :gottprel:var
+// CHECK: adrp    x11, :gottprel:var      // encoding: [0x0b'A',A,A,0x90'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel:var, kind: fixup_aarch64_pcrel_adrp_imm21
+// CHECK: ldr     x10, [x0, :gottprel_lo12:var] // encoding: [0x0a,0bAAAAAA00,0b01AAAAAA,0xf9]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale8
+// CHECK: ldr     x9, :gottprel:var       // encoding: [0bAAA01001,A,A,0x58]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel:var, kind: fixup_aarch64_ldr_pcrel_imm19
+
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 [[VARSYM]]
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC [[VARSYM]]
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSIE_LD_GOTTPREL_PREL19 [[VARSYM]]
+
+
+////////////////////////////////////////////////////////////////////////////////
+// TLS local-exec forms
+////////////////////////////////////////////////////////////////////////////////
+
+        movz x3, #:tprel_g2:var
+        movn x4, #:tprel_g2:var
+// CHECK: movz    x3, #:tprel_g2:var      // encoding: [0bAAA00011,A,0b110AAAAA,0x92]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g2:var, kind: fixup_aarch64_movw
+// CHECK: movn    x4, #:tprel_g2:var      // encoding: [0bAAA00100,A,0b110AAAAA,0x92]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g2:var, kind: fixup_aarch64_movw
+
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_MOVW_TPREL_G2 [[VARSYM]]
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_MOVW_TPREL_G2 [[VARSYM]]
+
+
+        movz x5, #:tprel_g1:var
+        movn x6, #:tprel_g1:var
+        movz w7, #:tprel_g1:var
+// CHECK: movz    x5, #:tprel_g1:var      // encoding: [0bAAA00101,A,0b101AAAAA,0x92]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_aarch64_movw
+// CHECK: movn    x6, #:tprel_g1:var      // encoding: [0bAAA00110,A,0b101AAAAA,0x92]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_aarch64_movw
+// CHECK: movz    w7, #:tprel_g1:var      // encoding: [0bAAA00111,A,0b101AAAAA,0x12]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_aarch64_movw
+
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_MOVW_TPREL_G1 [[VARSYM]]
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_MOVW_TPREL_G1 [[VARSYM]]
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_MOVW_TPREL_G1 [[VARSYM]]
+
+
+        movk x9, #:tprel_g1_nc:var
+        movk w10, #:tprel_g1_nc:var
+// CHECK: movk    x9, #:tprel_g1_nc:var   // encoding: [0bAAA01001,A,0b101AAAAA,0xf2]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1_nc:var, kind: fixup_aarch64_movw
+// CHECK: movk    w10, #:tprel_g1_nc:var  // encoding: [0bAAA01010,A,0b101AAAAA,0x72]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1_nc:var, kind: fixup_aarch64_movw
+
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_MOVW_TPREL_G1_NC [[VARSYM]]
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_MOVW_TPREL_G1_NC [[VARSYM]]
+
+
+        movz x11, #:tprel_g0:var
+        movn x12, #:tprel_g0:var
+        movz w13, #:tprel_g0:var
+// CHECK: movz    x11, #:tprel_g0:var     // encoding: [0bAAA01011,A,0b100AAAAA,0x92]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_aarch64_movw
+// CHECK: movn    x12, #:tprel_g0:var     // encoding: [0bAAA01100,A,0b100AAAAA,0x92]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_aarch64_movw
+// CHECK: movz    w13, #:tprel_g0:var     // encoding: [0bAAA01101,A,0b100AAAAA,0x12]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_aarch64_movw
+
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_MOVW_TPREL_G0 [[VARSYM]]
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_MOVW_TPREL_G0 [[VARSYM]]
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_MOVW_TPREL_G0 [[VARSYM]]
+
+
+        movk x15, #:tprel_g0_nc:var
+        movk w16, #:tprel_g0_nc:var
+// CHECK: movk    x15, #:tprel_g0_nc:var  // encoding: [0bAAA01111,A,0b100AAAAA,0xf2]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0_nc:var, kind: fixup_aarch64_movw
+// CHECK: movk    w16, #:tprel_g0_nc:var  // encoding: [0bAAA10000,A,0b100AAAAA,0x72]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0_nc:var, kind: fixup_aarch64_movw
+
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_MOVW_TPREL_G0_NC [[VARSYM]]
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_MOVW_TPREL_G0_NC [[VARSYM]]
+
+
+        add x21, x22, #:tprel_lo12:var
+// CHECK: add     x21, x22, :tprel_lo12:var // encoding: [0xd5,0bAAAAAA10,0b00AAAAAA,0x91]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_add_imm12
+
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_ADD_TPREL_LO12 [[VARSYM]]
+
+
+        add x25, x26, #:tprel_lo12_nc:var
+// CHECK: add     x25, x26, :tprel_lo12_nc:var // encoding: [0x59,0bAAAAAA11,0b00AAAAAA,0x91]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_add_imm12
+
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_ADD_TPREL_LO12_NC [[VARSYM]]
+
+
+        ldrb w29, [x30, #:tprel_lo12:var]
+        ldrsb x29, [x28, #:tprel_lo12_nc:var]
+// CHECK: ldrb    w29, [x30, :tprel_lo12:var] // encoding: [0xdd,0bAAAAAA11,0b01AAAAAA,0x39]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale1
+// CHECK: ldrsb   x29, [x28, :tprel_lo12_nc:var] // encoding: [0x9d,0bAAAAAA11,0b10AAAAAA,0x39]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale1
+
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_LDST8_TPREL_LO12 [[VARSYM]]
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC [[VARSYM]]
+
+
+        strh w27, [x26, #:tprel_lo12:var]
+        ldrsh x25, [x24, #:tprel_lo12_nc:var]
+// CHECK: strh    w27, [x26, :tprel_lo12:var] // encoding: [0x5b,0bAAAAAA11,0b00AAAAAA,0x79]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale2
+// CHECK: ldrsh   x25, [x24, :tprel_lo12_nc:var] // encoding: [0x19,0bAAAAAA11,0b10AAAAAA,0x79]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale2
+
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_LDST16_TPREL_LO12 [[VARSYM]]
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC [[VARSYM]]
+
+
+        ldr w23, [x22, #:tprel_lo12:var]
+        ldrsw x21, [x20, #:tprel_lo12_nc:var]
+// CHECK: ldr     w23, [x22, :tprel_lo12:var] // encoding: [0xd7,0bAAAAAA10,0b01AAAAAA,0xb9]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale4
+// CHECK: ldrsw   x21, [x20, :tprel_lo12_nc:var] // encoding: [0x95,0bAAAAAA10,0b10AAAAAA,0xb9]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale4
+
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_LDST32_TPREL_LO12 [[VARSYM]]
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC [[VARSYM]]
+
+        ldr x19, [x18, #:tprel_lo12:var]
+        str x17, [x16, #:tprel_lo12_nc:var]
+// CHECK: ldr     x19, [x18, :tprel_lo12:var] // encoding: [0x53,0bAAAAAA10,0b01AAAAAA,0xf9]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale8
+// CHECK: str     x17, [x16, :tprel_lo12_nc:var] // encoding: [0x11,0bAAAAAA10,0b00AAAAAA,0xf9]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale8
+
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_LDST64_TPREL_LO12 [[VARSYM]]
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC [[VARSYM]]
+
+
+////////////////////////////////////////////////////////////////////////////////
+// TLS local-dynamic forms
+////////////////////////////////////////////////////////////////////////////////
+
+        movz x3, #:dtprel_g2:var
+        movn x4, #:dtprel_g2:var
+// CHECK: movz    x3, #:dtprel_g2:var      // encoding: [0bAAA00011,A,0b110AAAAA,0x92]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_aarch64_movw
+// CHECK: movn    x4, #:dtprel_g2:var      // encoding: [0bAAA00100,A,0b110AAAAA,0x92]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_aarch64_movw
+
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_MOVW_DTPREL_G2 [[VARSYM]]
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_MOVW_DTPREL_G2 [[VARSYM]]
+
+
+        movz x5, #:dtprel_g1:var
+        movn x6, #:dtprel_g1:var
+        movz w7, #:dtprel_g1:var
+// CHECK: movz    x5, #:dtprel_g1:var      // encoding: [0bAAA00101,A,0b101AAAAA,0x92]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_aarch64_movw
+// CHECK: movn    x6, #:dtprel_g1:var      // encoding: [0bAAA00110,A,0b101AAAAA,0x92]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_aarch64_movw
+// CHECK: movz    w7, #:dtprel_g1:var      // encoding: [0bAAA00111,A,0b101AAAAA,0x12]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_aarch64_movw
+
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_MOVW_DTPREL_G1 [[VARSYM]]
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_MOVW_DTPREL_G1 [[VARSYM]]
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_MOVW_DTPREL_G1 [[VARSYM]]
+
+
+        movk x9, #:dtprel_g1_nc:var
+        movk w10, #:dtprel_g1_nc:var
+// CHECK: movk    x9, #:dtprel_g1_nc:var   // encoding: [0bAAA01001,A,0b101AAAAA,0xf2]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1_nc:var, kind: fixup_aarch64_movw
+// CHECK: movk    w10, #:dtprel_g1_nc:var  // encoding: [0bAAA01010,A,0b101AAAAA,0x72]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1_nc:var, kind: fixup_aarch64_movw
+
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC [[VARSYM]]
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC [[VARSYM]]
+
+
+        movz x11, #:dtprel_g0:var
+        movn x12, #:dtprel_g0:var
+        movz w13, #:dtprel_g0:var
+// CHECK: movz    x11, #:dtprel_g0:var     // encoding: [0bAAA01011,A,0b100AAAAA,0x92]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_aarch64_movw
+// CHECK: movn    x12, #:dtprel_g0:var     // encoding: [0bAAA01100,A,0b100AAAAA,0x92]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_aarch64_movw
+// CHECK: movz    w13, #:dtprel_g0:var     // encoding: [0bAAA01101,A,0b100AAAAA,0x12]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_aarch64_movw
+
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_MOVW_DTPREL_G0 [[VARSYM]]
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_MOVW_DTPREL_G0 [[VARSYM]]
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_MOVW_DTPREL_G0 [[VARSYM]]
+
+
+        movk x15, #:dtprel_g0_nc:var
+        movk w16, #:dtprel_g0_nc:var
+// CHECK: movk    x15, #:dtprel_g0_nc:var  // encoding: [0bAAA01111,A,0b100AAAAA,0xf2]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0_nc:var, kind: fixup_aarch64_movw
+// CHECK: movk    w16, #:dtprel_g0_nc:var  // encoding: [0bAAA10000,A,0b100AAAAA,0x72]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0_nc:var, kind: fixup_aarch64_movw
+
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC [[VARSYM]]
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC [[VARSYM]]
+
+
+        add x21, x22, #:dtprel_lo12:var
+// CHECK: add     x21, x22, :dtprel_lo12:var // encoding: [0xd5,0bAAAAAA10,0b00AAAAAA,0x91]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_add_imm12
+
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_ADD_DTPREL_LO12 [[VARSYM]]
+
+
+        add x25, x26, #:dtprel_lo12_nc:var
+// CHECK: add     x25, x26, :dtprel_lo12_nc:var // encoding: [0x59,0bAAAAAA11,0b00AAAAAA,0x91]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_add_imm12
+
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC [[VARSYM]]
+
+
+        ldrb w29, [x30, #:dtprel_lo12:var]
+        ldrsb x29, [x28, #:dtprel_lo12_nc:var]
+// CHECK: ldrb    w29, [x30, :dtprel_lo12:var] // encoding: [0xdd,0bAAAAAA11,0b01AAAAAA,0x39]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale1
+// CHECK: ldrsb   x29, [x28, :dtprel_lo12_nc:var] // encoding: [0x9d,0bAAAAAA11,0b10AAAAAA,0x39]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale1
+
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_LDST8_DTPREL_LO12 [[VARSYM]]
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC [[VARSYM]]
+
+
+        strh w27, [x26, #:dtprel_lo12:var]
+        ldrsh x25, [x24, #:dtprel_lo12_nc:var]
+// CHECK: strh    w27, [x26, :dtprel_lo12:var] // encoding: [0x5b,0bAAAAAA11,0b00AAAAAA,0x79]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale2
+// CHECK: ldrsh   x25, [x24, :dtprel_lo12_nc:var] // encoding: [0x19,0bAAAAAA11,0b10AAAAAA,0x79]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale2
+
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_LDST16_DTPREL_LO12 [[VARSYM]]
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC [[VARSYM]]
+
+
+        ldr w23, [x22, #:dtprel_lo12:var]
+        ldrsw x21, [x20, #:dtprel_lo12_nc:var]
+// CHECK: ldr     w23, [x22, :dtprel_lo12:var] // encoding: [0xd7,0bAAAAAA10,0b01AAAAAA,0xb9]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale4
+// CHECK: ldrsw   x21, [x20, :dtprel_lo12_nc:var] // encoding: [0x95,0bAAAAAA10,0b10AAAAAA,0xb9]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale4
+
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_LDST32_DTPREL_LO12 [[VARSYM]]
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC [[VARSYM]]
+
+        ldr x19, [x18, #:dtprel_lo12:var]
+        str x17, [x16, #:dtprel_lo12_nc:var]
+// CHECK: ldr     x19, [x18, :dtprel_lo12:var] // encoding: [0x53,0bAAAAAA10,0b01AAAAAA,0xf9]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale8
+// CHECK: str     x17, [x16, :dtprel_lo12_nc:var] // encoding: [0x11,0bAAAAAA10,0b00AAAAAA,0xf9]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale8
+
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_LDST64_DTPREL_LO12 [[VARSYM]]
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC [[VARSYM]]
+
+////////////////////////////////////////////////////////////////////////////////
+// TLS descriptor forms
+////////////////////////////////////////////////////////////////////////////////
+
+        adrp x8, :tlsdesc:var
+        ldr x7, [x6, #:tlsdesc_lo12:var]
+        add x5, x4, #:tlsdesc_lo12:var
+        .tlsdesccall var
+        blr x3
+
+// CHECK: adrp    x8, :tlsdesc:var        // encoding: [0x08'A',A,A,0x90'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tlsdesc:var, kind: fixup_aarch64_pcrel_adrp_imm21
+// CHECK: ldr     x7, [x6, :tlsdesc_lo12:var] // encoding: [0xc7,0bAAAAAA00,0b01AAAAAA,0xf9]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tlsdesc_lo12:var, kind: fixup_aarch64_ldst_imm12_scale8
+// CHECK: add     x5, x4, :tlsdesc_lo12:var // encoding: [0x85,0bAAAAAA00,0b00AAAAAA,0x91]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tlsdesc_lo12:var, kind: fixup_aarch64_add_imm12
+// CHECK: .tlsdesccall var                // encoding: []
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: var, kind: fixup_aarch64_tlsdesc_call
+// CHECK: blr     x3                      // encoding: [0x60,0x00,0x3f,0xd6]
+
+
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSDESC_ADR_PAGE [[VARSYM]]
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSDESC_LD64_LO12_NC [[VARSYM]]
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSDESC_ADD_LO12_NC [[VARSYM]]
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSDESC_CALL [[VARSYM]]
+
+        // Make sure symbol 5 has type STT_TLS:
+
+// CHECK-ELF:      Symbols [
+// CHECK-ELF:        Symbol {
+// CHECK-ELF:          Name: var
+// CHECK-ELF-NEXT:     Value:
+// CHECK-ELF-NEXT:     Size:
+// CHECK-ELF-NEXT:     Binding: Global
+// CHECK-ELF-NEXT:     Type: TLS
diff --git a/test/MC/AArch64/arm64-v128_lo-diagnostics.s b/test/MC/AArch64/arm64-v128_lo-diagnostics.s
new file mode 100644
index 0000000..ffe29cf
--- /dev/null
+++ b/test/MC/AArch64/arm64-v128_lo-diagnostics.s
@@ -0,0 +1,11 @@
+// RUN: not llvm-mc -triple arm64 -mattr=neon %s 2> %t > /dev/null
+// RUN: FileCheck %s < %t
+
+        sqrdmulh v0.8h, v1.8h, v16.h[0]
+// CHECK: error: invalid operand for instruction
+
+        sqrdmulh h0, h1, v16.h[0]
+// CHECK: error: invalid operand for instruction
+
+        sqdmull2 v0.4h, v1.8h, v16.h[0]
+// CHECK: error: invalid operand for instruction
diff --git a/test/MC/ARM64/variable-exprs.s b/test/MC/AArch64/arm64-variable-exprs.s
index 0120442..0120442 100644
--- a/test/MC/ARM64/variable-exprs.s
+++ b/test/MC/AArch64/arm64-variable-exprs.s
diff --git a/test/MC/AArch64/arm64-vector-lists.s b/test/MC/AArch64/arm64-vector-lists.s
new file mode 100644
index 0000000..a9b2d19
--- /dev/null
+++ b/test/MC/AArch64/arm64-vector-lists.s
@@ -0,0 +1,20 @@
+// RUN: not llvm-mc -triple arm64 -mattr=neon -show-encoding < %s 2>%t | FileCheck %s
+// RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
+
+    ST4     {v0.8B-v3.8B}, [x0]
+    ST4     {v0.4H-v3.4H}, [x0]
+
+// CHECK: st4  { v0.8b, v1.8b, v2.8b, v3.8b }, [x0] // encoding: [0x00,0x00,0x00,0x0c]
+// CHECK: st4  { v0.4h, v1.4h, v2.4h, v3.4h }, [x0] // encoding: [0x00,0x04,0x00,0x0c]
+
+    ST4     {v0.8B-v4.8B}, [x0]
+    ST4     {v0.8B-v3.8B,v4.8B}, [x0]
+    ST4     {v0.8B-v3.8H}, [x0]
+    ST4     {v0.8B-v3.16B}, [x0]
+    ST4     {v0.8B-},[x0]
+
+// CHECK-ERRORS: error: invalid number of vectors
+// CHECK-ERRORS: error: '}' expected
+// CHECK-ERRORS: error: mismatched register size suffix
+// CHECK-ERRORS: error: mismatched register size suffix
+// CHECK-ERRORS: error: vector register expected
diff --git a/test/MC/AArch64/arm64-verbose-vector-case.s b/test/MC/AArch64/arm64-verbose-vector-case.s
new file mode 100644
index 0000000..6f0a381
--- /dev/null
+++ b/test/MC/AArch64/arm64-verbose-vector-case.s
@@ -0,0 +1,19 @@
+// RUN: llvm-mc -triple arm64 -mattr=crypto -show-encoding < %s | FileCheck %s
+
+pmull v8.8h, v8.8b, v8.8b
+pmull2 v8.8h, v8.16b, v8.16b
+pmull v8.1q, v8.1d, v8.1d
+pmull2 v8.1q, v8.2d, v8.2d
+// CHECK: pmull v8.8h, v8.8b, v8.8b    // encoding: [0x08,0xe1,0x28,0x0e]
+// CHECK: pmull2 v8.8h, v8.16b, v8.16b // encoding: [0x08,0xe1,0x28,0x4e]
+// CHECK: pmull v8.1q, v8.1d, v8.1d    // encoding: [0x08,0xe1,0xe8,0x0e]
+// CHECK: pmull2 v8.1q, v8.2d, v8.2d   // encoding: [0x08,0xe1,0xe8,0x4e]
+
+pmull v8.8H, v8.8B, v8.8B
+pmull2 v8.8H, v8.16B, v8.16B
+pmull v8.1Q, v8.1D, v8.1D
+pmull2 v8.1Q, v8.2D, v8.2D
+// CHECK: pmull v8.8h, v8.8b, v8.8b    // encoding: [0x08,0xe1,0x28,0x0e]
+// CHECK: pmull2 v8.8h, v8.16b, v8.16b // encoding: [0x08,0xe1,0x28,0x4e]
+// CHECK: pmull v8.1q, v8.1d, v8.1d    // encoding: [0x08,0xe1,0xe8,0x0e]
+// CHECK: pmull2 v8.1q, v8.2d, v8.2d   // encoding: [0x08,0xe1,0xe8,0x4e]
diff --git a/test/MC/AArch64/basic-a64-diagnostics.s b/test/MC/AArch64/basic-a64-diagnostics.s
index 792538c..a4a3b13 100644
--- a/test/MC/AArch64/basic-a64-diagnostics.s
+++ b/test/MC/AArch64/basic-a64-diagnostics.s
@@ -1,5 +1,5 @@
 // RUN: not llvm-mc -triple aarch64-none-linux-gnu < %s 2> %t
-// RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s
+// RUN: FileCheck --check-prefix=CHECK-ERROR --check-prefix=CHECK-ERROR-ARM64 < %t %s
 
 //------------------------------------------------------------------------------
 // Add/sub (extended register)
@@ -83,9 +83,9 @@
 // CHECK-ERROR: error: expected compatible register, symbol or integer in range [0, 4095]
 // CHECK-ERROR-NEXT:         add w4, w5, #-1
 // CHECK-ERROR-NEXT:                     ^
-// CHECK-ERROR-NEXT: error: expected compatible register, symbol or integer in range [0, 4095]
-// CHECK-ERROR-NEXT:         add w5, w6, #0x1000
-// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-AARCH64-NEXT: error: expected compatible register, symbol or integer in range [0, 4095]
+// CHECK-ERROR-AARCH64-NEXT:         add w5, w6, #0x1000
+// CHECK-ERROR-AARCH64-NEXT:                     ^
 // CHECK-ERROR-NEXT: error: expected compatible register, symbol or integer in range [0, 4095]
 // CHECK-ERROR-NEXT:         add w4, w5, #-1, lsl #12
 // CHECK-ERROR-NEXT:                     ^
@@ -141,9 +141,9 @@
 
 // Out of range immediate
         adds w0, w5, #0x10000
-// CHECK-ERROR: error: expected compatible register, symbol or integer in range [0, 4095]
-// CHECK-ERROR-NEXT:         adds w0, w5, #0x10000
-// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-AARCH64: error: expected compatible register, symbol or integer in range [0, 4095]
+// CHECK-ERROR-AARCH64-NEXT:         adds w0, w5, #0x10000
+// CHECK-ERROR-AARCH64-NEXT:                      ^
 
 // Wn|WSP should be in second place
         adds w4, wzr, #0x123
@@ -750,10 +750,10 @@
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:         sbfm w3, wsp, #1, #9
 // CHECK-ERROR-NEXT:                  ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 63]
 // CHECK-ERROR-NEXT:         sbfm x9, x5, #-1, #0
 // CHECK-ERROR-NEXT:                      ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 63]
 // CHECK-ERROR-NEXT:         sbfm x9, x5, #0, #-1
 // CHECK-ERROR-NEXT:                          ^
 
@@ -761,16 +761,16 @@
         sbfm w7, w11, #19, #32
         sbfm x29, x30, #64, #0
         sbfm x10, x20, #63, #64
-// CHECK-ERROR: error: expected integer in range [0, 31]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [0, 31]
 // CHECK-ERROR-NEXT:         sbfm w3, w5, #32, #1
 // CHECK-ERROR-NEXT:                      ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 31]
 // CHECK-ERROR-NEXT:         sbfm w7, w11, #19, #32
 // CHECK-ERROR-NEXT:                            ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 63]
 // CHECK-ERROR-NEXT:         sbfm x29, x30, #64, #0
 // CHECK-ERROR-NEXT:                        ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 63]
 // CHECK-ERROR-NEXT:         sbfm x10, x20, #63, #64
 // CHECK-ERROR-NEXT:                             ^
 
@@ -778,16 +778,16 @@
         ubfm w7, w11, #19, #32
         ubfm x29, x30, #64, #0
         ubfm x10, x20, #63, #64
-// CHECK-ERROR: error: expected integer in range [0, 31]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [0, 31]
 // CHECK-ERROR-NEXT:         ubfm w3, w5, #32, #1
 // CHECK-ERROR-NEXT:                      ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 31]
 // CHECK-ERROR-NEXT:         ubfm w7, w11, #19, #32
 // CHECK-ERROR-NEXT:                            ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 63]
 // CHECK-ERROR-NEXT:         ubfm x29, x30, #64, #0
 // CHECK-ERROR-NEXT:                        ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 63]
 // CHECK-ERROR-NEXT:         ubfm x10, x20, #63, #64
 // CHECK-ERROR-NEXT:                             ^
 
@@ -795,31 +795,31 @@
         bfm w7, w11, #19, #32
         bfm x29, x30, #64, #0
         bfm x10, x20, #63, #64
-// CHECK-ERROR: error: expected integer in range [0, 31]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [0, 31]
 // CHECK-ERROR-NEXT:         bfm w3, w5, #32, #1
 // CHECK-ERROR-NEXT:                      ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 31]
 // CHECK-ERROR-NEXT:         bfm w7, w11, #19, #32
 // CHECK-ERROR-NEXT:                            ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 63]
 // CHECK-ERROR-NEXT:         bfm x29, x30, #64, #0
 // CHECK-ERROR-NEXT:                        ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 63]
 // CHECK-ERROR-NEXT:         bfm x10, x20, #63, #64
 // CHECK-ERROR-NEXT:                             ^
 
         sxtb x3, x2
         sxth xzr, xzr
         sxtw x3, x5
-// CHECK-ERROR: error: invalid operand for instruction
-// CHECK-ERROR-NEXT:         sxtb x3, x2
-// CHECK-ERROR-NEXT:                  ^
-// CHECK-ERROR-NEXT: error: invalid operand for instruction
-// CHECK-ERROR-NEXT:         sxth xzr, xzr
-// CHECK-ERROR-NEXT:                   ^
-// CHECK-ERROR-NEXT: error: invalid operand for instruction
-// CHECK-ERROR-NEXT:         sxtw x3, x5
-// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-AARCH64: error: invalid operand for instruction
+// CHECK-ERROR-AARCH64-NEXT:         sxtb x3, x2
+// CHECK-ERROR-AARCH64-NEXT:                  ^
+// CHECK-ERROR-AARCH64-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-AARCH64-NEXT:         sxth xzr, xzr
+// CHECK-ERROR-AARCH64-NEXT:                   ^
+// CHECK-ERROR-AARCH64-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-AARCH64-NEXT:         sxtw x3, x5
+// CHECK-ERROR-AARCH64-NEXT:                  ^
 
         uxtb x3, x12
         uxth x5, x9
@@ -832,9 +832,9 @@
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:         uxth x5, x9
 // CHECK-ERROR-NEXT:                  ^
-// CHECK-ERROR-NEXT: error: invalid instruction
-// CHECK-ERROR-NEXT:         uxtw x3, x5
-// CHECK-ERROR-NEXT:         ^
+// CHECK-ERROR-AARCH64-NEXT: error: invalid instruction
+// CHECK-ERROR-AARCH64-NEXT:         uxtw x3, x5
+// CHECK-ERROR-AARCH64-NEXT:         ^
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:         uxtb x2, sp
 // CHECK-ERROR-NEXT:                  ^
@@ -853,13 +853,13 @@
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:         asr sp, x2, #1
 // CHECK-ERROR-NEXT:             ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 63]
 // CHECK-ERROR-NEXT:         asr x25, x26, #-1
 // CHECK-ERROR-NEXT:                       ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 63]
 // CHECK-ERROR-NEXT:         asr x25, x26, #64
 // CHECK-ERROR-NEXT:                       ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 31]
 // CHECK-ERROR-NEXT:         asr w9, w8, #32
 // CHECK-ERROR-NEXT:                     ^
 
@@ -869,18 +869,19 @@
         sbfiz w11, w12, #32, #0
         sbfiz w9, w10, #10, #23
         sbfiz x3, x5, #12, #53
-        sbfiz sp, x3, #5, #6
-        sbfiz w3, wsp, #7, #8
-// CHECK-ERROR: error: expected integer in range [<lsb>, 31]
+        sbfiz sp, x3, #7, #6
+        sbfiz w3, wsp, #10, #8
+// CHECK-ERROR-AARCH64: error: expected integer in range [<lsb>, 31]
+// CHECK-ERROR-ARM64: error: expected integer in range [1, 32]
 // CHECK-ERROR-NEXT:         sbfiz w1, w2, #0, #0
 // CHECK-ERROR-NEXT:                           ^
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:         sbfiz wsp, w9, #0, #1
 // CHECK-ERROR-NEXT:               ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 31]
 // CHECK-ERROR-NEXT:         sbfiz w9, w10, #32, #1
 // CHECK-ERROR-NEXT:                        ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 31]
 // CHECK-ERROR-NEXT:         sbfiz w11, w12, #32, #0
 // CHECK-ERROR-NEXT:                         ^
 // CHECK-ERROR-NEXT: error: requested insert overflows register
@@ -890,10 +891,10 @@
 // CHECK-ERROR-NEXT:         sbfiz x3, x5, #12, #53
 // CHECK-ERROR-NEXT:                            ^
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
-// CHECK-ERROR-NEXT:         sbfiz sp, x3, #5, #6
+// CHECK-ERROR-NEXT:         sbfiz sp, x3, #7, #6
 // CHECK-ERROR-NEXT:               ^
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
-// CHECK-ERROR-NEXT:         sbfiz w3, wsp, #7, #8
+// CHECK-ERROR-NEXT:         sbfiz w3, wsp, #10, #8
 // CHECK-ERROR-NEXT:                   ^
 
         sbfx w1, w2, #0, #0
@@ -902,18 +903,19 @@
         sbfx w11, w12, #32, #0
         sbfx w9, w10, #10, #23
         sbfx x3, x5, #12, #53
-        sbfx sp, x3, #5, #6
-        sbfx w3, wsp, #7, #8
-// CHECK-ERROR: error: expected integer in range [<lsb>, 31]
+        sbfx sp, x3, #7, #6
+        sbfx w3, wsp, #10, #8
+// CHECK-ERROR-AARCH64: error: expected integer in range [<lsb>, 31]
+// CHECK-ERROR-ARM64: error: expected integer in range [1, 32]
 // CHECK-ERROR-NEXT:         sbfx w1, w2, #0, #0
 // CHECK-ERROR-NEXT:                          ^
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:         sbfx wsp, w9, #0, #1
 // CHECK-ERROR-NEXT:              ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 31]
 // CHECK-ERROR-NEXT:         sbfx w9, w10, #32, #1
 // CHECK-ERROR-NEXT:                       ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 31]
 // CHECK-ERROR-NEXT:         sbfx w11, w12, #32, #0
 // CHECK-ERROR-NEXT:                        ^
 // CHECK-ERROR-NEXT: error: requested extract overflows register
@@ -923,10 +925,10 @@
 // CHECK-ERROR-NEXT:         sbfx x3, x5, #12, #53
 // CHECK-ERROR-NEXT:                           ^
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
-// CHECK-ERROR-NEXT:         sbfx sp, x3, #5, #6
+// CHECK-ERROR-NEXT:         sbfx sp, x3, #7, #6
 // CHECK-ERROR-NEXT:              ^
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
-// CHECK-ERROR-NEXT:         sbfx w3, wsp, #7, #8
+// CHECK-ERROR-NEXT:         sbfx w3, wsp, #10, #8
 // CHECK-ERROR-NEXT:                  ^
 
         bfi w1, w2, #0, #0
@@ -935,18 +937,19 @@
         bfi w11, w12, #32, #0
         bfi w9, w10, #10, #23
         bfi x3, x5, #12, #53
-        bfi sp, x3, #5, #6
-        bfi w3, wsp, #7, #8
-// CHECK-ERROR: error: expected integer in range [<lsb>, 31]
+        bfi sp, x3, #7, #6
+        bfi w3, wsp, #10, #8
+// CHECK-ERROR-AARCH64: error: expected integer in range [<lsb>, 31]
+// CHECK-ERROR-ARM64: error: expected integer in range [1, 32]
 // CHECK-ERROR-NEXT:         bfi w1, w2, #0, #0
 // CHECK-ERROR-NEXT:                         ^
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:         bfi wsp, w9, #0, #1
 // CHECK-ERROR-NEXT:             ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 31]
 // CHECK-ERROR-NEXT:         bfi w9, w10, #32, #1
 // CHECK-ERROR-NEXT:                      ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 31]
 // CHECK-ERROR-NEXT:         bfi w11, w12, #32, #0
 // CHECK-ERROR-NEXT:                       ^
 // CHECK-ERROR-NEXT: error: requested insert overflows register
@@ -956,10 +959,10 @@
 // CHECK-ERROR-NEXT:         bfi x3, x5, #12, #53
 // CHECK-ERROR-NEXT:                          ^
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
-// CHECK-ERROR-NEXT:         bfi sp, x3, #5, #6
+// CHECK-ERROR-NEXT:         bfi sp, x3, #7, #6
 // CHECK-ERROR-NEXT:             ^
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
-// CHECK-ERROR-NEXT:         bfi w3, wsp, #7, #8
+// CHECK-ERROR-NEXT:         bfi w3, wsp, #10, #8
 // CHECK-ERROR-NEXT:                 ^
 
         bfxil w1, w2, #0, #0
@@ -968,18 +971,19 @@
         bfxil w11, w12, #32, #0
         bfxil w9, w10, #10, #23
         bfxil x3, x5, #12, #53
-        bfxil sp, x3, #5, #6
-        bfxil w3, wsp, #7, #8
-// CHECK-ERROR: error: expected integer in range [<lsb>, 31]
+        bfxil sp, x3, #7, #6
+        bfxil w3, wsp, #10, #8
+// CHECK-ERROR-AARCH64: error: expected integer in range [<lsb>, 31]
+// CHECK-ERROR-ARM64: error: expected integer in range [1, 32]
 // CHECK-ERROR-NEXT:         bfxil w1, w2, #0, #0
 // CHECK-ERROR-NEXT:                           ^
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:         bfxil wsp, w9, #0, #1
 // CHECK-ERROR-NEXT:               ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 31]
 // CHECK-ERROR-NEXT:         bfxil w9, w10, #32, #1
 // CHECK-ERROR-NEXT:                        ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 31]
 // CHECK-ERROR-NEXT:         bfxil w11, w12, #32, #0
 // CHECK-ERROR-NEXT:                         ^
 // CHECK-ERROR-NEXT: error: requested extract overflows register
@@ -989,10 +993,10 @@
 // CHECK-ERROR-NEXT:         bfxil x3, x5, #12, #53
 // CHECK-ERROR-NEXT:                            ^
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
-// CHECK-ERROR-NEXT:         bfxil sp, x3, #5, #6
+// CHECK-ERROR-NEXT:         bfxil sp, x3, #7, #6
 // CHECK-ERROR-NEXT:               ^
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
-// CHECK-ERROR-NEXT:         bfxil w3, wsp, #7, #8
+// CHECK-ERROR-NEXT:         bfxil w3, wsp, #10, #8
 // CHECK-ERROR-NEXT:                   ^
 
         ubfiz w1, w2, #0, #0
@@ -1001,18 +1005,19 @@
         ubfiz w11, w12, #32, #0
         ubfiz w9, w10, #10, #23
         ubfiz x3, x5, #12, #53
-        ubfiz sp, x3, #5, #6
-        ubfiz w3, wsp, #7, #8
-// CHECK-ERROR: error: expected integer in range [<lsb>, 31]
+        ubfiz sp, x3, #7, #6
+        ubfiz w3, wsp, #10, #8
+// CHECK-ERROR-AARCH64: error: expected integer in range [<lsb>, 31]
+// CHECK-ERROR-ARM64: error: expected integer in range [1, 32]
 // CHECK-ERROR-NEXT:         ubfiz w1, w2, #0, #0
 // CHECK-ERROR-NEXT:                           ^
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:         ubfiz wsp, w9, #0, #1
 // CHECK-ERROR-NEXT:               ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 31]
 // CHECK-ERROR-NEXT:         ubfiz w9, w10, #32, #1
 // CHECK-ERROR-NEXT:                        ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 31]
 // CHECK-ERROR-NEXT:         ubfiz w11, w12, #32, #0
 // CHECK-ERROR-NEXT:                         ^
 // CHECK-ERROR-NEXT: error: requested insert overflows register
@@ -1022,10 +1027,10 @@
 // CHECK-ERROR-NEXT:         ubfiz x3, x5, #12, #53
 // CHECK-ERROR-NEXT:                            ^
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
-// CHECK-ERROR-NEXT:         ubfiz sp, x3, #5, #6
+// CHECK-ERROR-NEXT:         ubfiz sp, x3, #7, #6
 // CHECK-ERROR-NEXT:               ^
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
-// CHECK-ERROR-NEXT:         ubfiz w3, wsp, #7, #8
+// CHECK-ERROR-NEXT:         ubfiz w3, wsp, #10, #8
 // CHECK-ERROR-NEXT:                   ^
 
         ubfx w1, w2, #0, #0
@@ -1034,18 +1039,19 @@
         ubfx w11, w12, #32, #0
         ubfx w9, w10, #10, #23
         ubfx x3, x5, #12, #53
-        ubfx sp, x3, #5, #6
-        ubfx w3, wsp, #7, #8
-// CHECK-ERROR: error: expected integer in range [<lsb>, 31]
+        ubfx sp, x3, #7, #6
+        ubfx w3, wsp, #10, #8
+// CHECK-ERROR-AARCH64: error: expected integer in range [<lsb>, 31]
+// CHECK-ERROR-ARM64: error: expected integer in range [1, 32]
 // CHECK-ERROR-NEXT:         ubfx w1, w2, #0, #0
 // CHECK-ERROR-NEXT:                      ^
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:         ubfx wsp, w9, #0, #1
 // CHECK-ERROR-NEXT:              ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 31]
 // CHECK-ERROR-NEXT:         ubfx w9, w10, #32, #1
 // CHECK-ERROR-NEXT:                       ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 31]
 // CHECK-ERROR-NEXT:         ubfx w11, w12, #32, #0
 // CHECK-ERROR-NEXT:                        ^
 // CHECK-ERROR-NEXT: error: requested extract overflows register
@@ -1055,10 +1061,10 @@
 // CHECK-ERROR-NEXT:         ubfx x3, x5, #12, #53
 // CHECK-ERROR-NEXT:                           ^
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
-// CHECK-ERROR-NEXT:         ubfx sp, x3, #5, #6
+// CHECK-ERROR-NEXT:         ubfx sp, x3, #7, #6
 // CHECK-ERROR-NEXT:              ^
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
-// CHECK-ERROR-NEXT:         ubfx w3, wsp, #7, #8
+// CHECK-ERROR-NEXT:         ubfx w3, wsp, #10, #8
 // CHECK-ERROR-NEXT:                  ^
 
 //------------------------------------------------------------------------------
@@ -1125,16 +1131,16 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:        ccmp wsp, #4, #2, ne
 // CHECK-ERROR-NEXT:             ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 31]
 // CHECK-ERROR-NEXT:        ccmp w25, #-1, #15, hs
 // CHECK-ERROR-NEXT:                  ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 31]
 // CHECK-ERROR-NEXT:        ccmp w3, #32, #0, ge
 // CHECK-ERROR-NEXT:                 ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 15]
 // CHECK-ERROR-NEXT:        ccmp w19, #5, #-1, lt
 // CHECK-ERROR-NEXT:                      ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 15]
 // CHECK-ERROR-NEXT:        ccmp w20, #7, #16, hs
 // CHECK-ERROR-NEXT:                      ^
 
@@ -1146,16 +1152,16 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:        ccmp sp, #4, #2, ne
 // CHECK-ERROR-NEXT:             ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 31]
 // CHECK-ERROR-NEXT:        ccmp x25, #-1, #15, hs
 // CHECK-ERROR-NEXT:                  ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 31]
 // CHECK-ERROR-NEXT:        ccmp x3, #32, #0, ge
 // CHECK-ERROR-NEXT:                 ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 15]
 // CHECK-ERROR-NEXT:        ccmp x19, #5, #-1, lt
 // CHECK-ERROR-NEXT:                      ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 15]
 // CHECK-ERROR-NEXT:        ccmp x20, #7, #16, hs
 // CHECK-ERROR-NEXT:                      ^
 
@@ -1167,16 +1173,16 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:        ccmn wsp, #4, #2, ne
 // CHECK-ERROR-NEXT:             ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 31]
 // CHECK-ERROR-NEXT:        ccmn w25, #-1, #15, hs
 // CHECK-ERROR-NEXT:                  ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 31]
 // CHECK-ERROR-NEXT:        ccmn w3, #32, #0, ge
 // CHECK-ERROR-NEXT:                 ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 15]
 // CHECK-ERROR-NEXT:        ccmn w19, #5, #-1, lt
 // CHECK-ERROR-NEXT:                      ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 15]
 // CHECK-ERROR-NEXT:        ccmn w20, #7, #16, hs
 // CHECK-ERROR-NEXT:                      ^
 
@@ -1188,16 +1194,16 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:        ccmn sp, #4, #2, ne
 // CHECK-ERROR-NEXT:             ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 31]
 // CHECK-ERROR-NEXT:        ccmn x25, #-1, #15, hs
 // CHECK-ERROR-NEXT:                  ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 31]
 // CHECK-ERROR-NEXT:        ccmn x3, #32, #0, ge
 // CHECK-ERROR-NEXT:                 ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 15]
 // CHECK-ERROR-NEXT:        ccmn x19, #5, #-1, lt
 // CHECK-ERROR-NEXT:                      ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 15]
 // CHECK-ERROR-NEXT:        ccmn x20, #7, #16, hs
 // CHECK-ERROR-NEXT:                      ^
 
@@ -1212,13 +1218,13 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:        ccmp wsp, w4, #2, ne
 // CHECK-ERROR-NEXT:             ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 31]
 // CHECK-ERROR-NEXT:        ccmp w3, wsp, #0, ge
 // CHECK-ERROR-NEXT:                 ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 15]
 // CHECK-ERROR-NEXT:        ccmp w19, w5, #-1, lt
 // CHECK-ERROR-NEXT:                      ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 15]
 // CHECK-ERROR-NEXT:        ccmp w20, w7, #16, hs
 // CHECK-ERROR-NEXT:                      ^
 
@@ -1229,13 +1235,13 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:        ccmp sp, x4, #2, ne
 // CHECK-ERROR-NEXT:             ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 31]
 // CHECK-ERROR-NEXT:        ccmp x25, sp, #15, hs
 // CHECK-ERROR-NEXT:                  ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 15]
 // CHECK-ERROR-NEXT:        ccmp x19, x5, #-1, lt
 // CHECK-ERROR-NEXT:                      ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 15]
 // CHECK-ERROR-NEXT:        ccmp x20, x7, #16, hs
 // CHECK-ERROR-NEXT:                      ^
 
@@ -1246,13 +1252,13 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:        ccmn wsp, w4, #2, ne
 // CHECK-ERROR-NEXT:             ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 31]
 // CHECK-ERROR-NEXT:        ccmn w25, wsp, #15, hs
 // CHECK-ERROR-NEXT:                  ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 15]
 // CHECK-ERROR-NEXT:        ccmn w19, w5, #-1, lt
 // CHECK-ERROR-NEXT:                      ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 15]
 // CHECK-ERROR-NEXT:        ccmn w20, w7, #16, hs
 // CHECK-ERROR-NEXT:                      ^
 
@@ -1263,13 +1269,13 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:        ccmn sp, x4, #2, ne
 // CHECK-ERROR-NEXT:             ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 31]
 // CHECK-ERROR-NEXT:        ccmn x25, sp, #15, hs
 // CHECK-ERROR-NEXT:                  ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 15]
 // CHECK-ERROR-NEXT:        ccmn x19, x5, #-1, lt
 // CHECK-ERROR-NEXT:                      ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 15]
 // CHECK-ERROR-NEXT:        ccmn x20, x7, #16, hs
 // CHECK-ERROR-NEXT:                      ^
 
@@ -1418,16 +1424,16 @@
         hlt #65536
         dcps4 #43
         dcps4
-// CHECK-ERROR: error: expected integer in range [0, 65535]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [0, 65535]
 // CHECK-ERROR-NEXT:         svc #-1
 // CHECK-ERROR-NEXT:             ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 65535]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 65535]
 // CHECK-ERROR-NEXT:         hlt #65536
 // CHECK-ERROR-NEXT:             ^
-// CHECK-ERROR-NEXT: error: invalid instruction
+// CHECK-ERROR-NEXT: error: {{invalid instruction|unrecognized instruction mnemonic}}
 // CHECK-ERROR-NEXT:         dcps4 #43
 // CHECK-ERROR-NEXT:         ^
-// CHECK-ERROR-NEXT: error: invalid instruction
+// CHECK-ERROR-NEXT: error: {{invalid instruction|unrecognized instruction mnemonic}}
 // CHECK-ERROR-NEXT:         dcps4
 // CHECK-ERROR-NEXT:         ^
 
@@ -1437,28 +1443,28 @@
 
         extr w2, w20, w30, #-1
         extr w9, w19, w20, #32
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 31]
 // CHECK-ERROR-NEXT:         extr w2, w20, w30, #-1
 // CHECK-ERROR-NEXT:                            ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 31]
 // CHECK-ERROR-NEXT:         extr w9, w19, w20, #32
 // CHECK-ERROR-NEXT:                            ^
 
         extr x10, x15, x20, #-1
         extr x20, x25, x30, #64
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 63]
 // CHECK-ERROR-NEXT:         extr x10, x15, x20, #-1
 // CHECK-ERROR-NEXT:                             ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 63]
 // CHECK-ERROR-NEXT:         extr x20, x25, x30, #64
 // CHECK-ERROR-NEXT:                             ^
 
         ror w9, w10, #32
         ror x10, x11, #64
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 31]
 // CHECK-ERROR-NEXT:         ror w9, w10, #32
 // CHECK-ERROR-NEXT:                      ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 63]
 // CHECK-ERROR-NEXT:         ror x10, x11, #64
 // CHECK-ERROR-NEXT:                       ^
 
@@ -1467,7 +1473,8 @@
 //------------------------------------------------------------------------------
 
         fcmp s3, d2
-// CHECK-ERROR: error: expected floating-point constant #0.0
+// CHECK-ERROR-AARCH64: error: expected floating-point constant #0.0
+// CHECK-ERROR-ARM64: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:         fcmp s3, d2
 // CHECK-ERROR-NEXT:                  ^
 
@@ -1494,37 +1501,37 @@
 
         fccmp s19, s5, #-1, lt
         fccmp s20, s7, #16, hs
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 15]
 // CHECK-ERROR-NEXT:        fccmp s19, s5, #-1, lt
 // CHECK-ERROR-NEXT:                      ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 15]
 // CHECK-ERROR-NEXT:        fccmp s20, s7, #16, hs
 // CHECK-ERROR-NEXT:                      ^
 
         fccmp d19, d5, #-1, lt
         fccmp d20, d7, #16, hs
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 15]
 // CHECK-ERROR-NEXT:        fccmp d19, d5, #-1, lt
 // CHECK-ERROR-NEXT:                      ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 15]
 // CHECK-ERROR-NEXT:        fccmp d20, d7, #16, hs
 // CHECK-ERROR-NEXT:                      ^
 
         fccmpe s19, s5, #-1, lt
         fccmpe s20, s7, #16, hs
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 15]
 // CHECK-ERROR-NEXT:        fccmpe s19, s5, #-1, lt
 // CHECK-ERROR-NEXT:                      ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 15]
 // CHECK-ERROR-NEXT:        fccmpe s20, s7, #16, hs
 // CHECK-ERROR-NEXT:                      ^
 
         fccmpe d19, d5, #-1, lt
         fccmpe d20, d7, #16, hs
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 15]
 // CHECK-ERROR-NEXT:        fccmpe d19, d5, #-1, lt
 // CHECK-ERROR-NEXT:                      ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 15]
 // CHECK-ERROR-NEXT:        fccmpe d20, d7, #16, hs
 // CHECK-ERROR-NEXT:                      ^
 
@@ -1604,10 +1611,10 @@
         fcvtzs w13, s31, #0
         fcvtzs w19, s20, #33
         fcvtzs wsp, s19, #14
-// CHECK-ERROR-NEXT: error: expected integer in range [1, 32]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [1, 32]
 // CHECK-ERROR-NEXT:        fcvtzs w13, s31, #0
 // CHECK-ERROR-NEXT:                         ^
-// CHECK-ERROR-NEXT: error: expected integer in range [1, 32]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [1, 32]
 // CHECK-ERROR-NEXT:        fcvtzs w19, s20, #33
 // CHECK-ERROR-NEXT:                         ^
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
@@ -1617,10 +1624,10 @@
         fcvtzs x13, s31, #0
         fcvtzs x19, s20, #65
         fcvtzs sp, s19, #14
-// CHECK-ERROR-NEXT: error: expected integer in range [1, 64]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [1, 64]
 // CHECK-ERROR-NEXT:        fcvtzs x13, s31, #0
 // CHECK-ERROR-NEXT:                         ^
-// CHECK-ERROR-NEXT: error: expected integer in range [1, 64]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [1, 64]
 // CHECK-ERROR-NEXT:        fcvtzs x19, s20, #65
 // CHECK-ERROR-NEXT:                         ^
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
@@ -1630,10 +1637,10 @@
         fcvtzu w13, s31, #0
         fcvtzu w19, s20, #33
         fcvtzu wsp, s19, #14
-// CHECK-ERROR-NEXT: error: expected integer in range [1, 32]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [1, 32]
 // CHECK-ERROR-NEXT:        fcvtzu w13, s31, #0
 // CHECK-ERROR-NEXT:                         ^
-// CHECK-ERROR-NEXT: error: expected integer in range [1, 32]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [1, 32]
 // CHECK-ERROR-NEXT:        fcvtzu w19, s20, #33
 // CHECK-ERROR-NEXT:                         ^
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
@@ -1643,10 +1650,10 @@
         fcvtzu x13, s31, #0
         fcvtzu x19, s20, #65
         fcvtzu sp, s19, #14
-// CHECK-ERROR-NEXT: error: expected integer in range [1, 64]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [1, 64]
 // CHECK-ERROR-NEXT:        fcvtzu x13, s31, #0
 // CHECK-ERROR-NEXT:                         ^
-// CHECK-ERROR-NEXT: error: expected integer in range [1, 64]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [1, 64]
 // CHECK-ERROR-NEXT:        fcvtzu x19, s20, #65
 // CHECK-ERROR-NEXT:                         ^
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
@@ -1730,9 +1737,9 @@
 
         ;; No particular reason, but a striking omission
         fmov d0, #0.0
-// CHECK-ERROR: error: expected compatible register or floating-point constant
-// CHECK-ERROR-NEXT:           fmov d0, #0.0
-// CHECK-ERROR-NEXT:                    ^
+// CHECK-ERROR-AARCH64: error: expected compatible register or floating-point constant
+// CHECK-ERROR-AARCH64-NEXT:           fmov d0, #0.0
+// CHECK-ERROR-AARCH64-NEXT:                    ^
 
 //------------------------------------------------------------------------------
 // Floating-point <-> integer conversion
@@ -1746,10 +1753,12 @@
 // CHECK-ERROR: error: expected lane specifier '[1]'
 // CHECK-ERROR-NEXT:         fmov x3, v0.d[0]
 // CHECK-ERROR-NEXT:                       ^
-// CHECK-ERROR-NEXT: error: lane number incompatible with layout
+// CHECK-ERROR-AARCH64-NEXT: error: lane number incompatible with layout
+// CHECK-ERROR-ARM64-NEXT: error: invalid operand for instruction
 // CHECK-ERROR-NEXT: fmov v29.1d[1], x2
 // CHECK-ERROR-NEXT:             ^
-// CHECK-ERROR-NEXT: error: lane number incompatible with layout
+// CHECK-ERROR-AARCH64-NEXT: error: lane number incompatible with layout
+// CHECK-ERROR-ARM64-NEXT: error: expected lane specifier '[1]'
 // CHECK-ERROR-NEXT: fmov x7, v0.d[2]
 // CHECK-ERROR-NEXT:               ^
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
@@ -1789,10 +1798,11 @@
 // Load/store exclusive
 //------------------------------------------------------------------------------
 
-       stxrb w2, x3, [x4, #20]
+       stxrb w2, w3, [x4, #20]
        stlxrh w10, w11, [w2]
-// CHECK-ERROR: error: expected '#0'
-// CHECK-ERROR-NEXT:         stxrb w2, x3, [x4, #20]
+// CHECK-ERROR-AARCH64: error: expected '#0'
+// CHECK-ERROR-ARM64: error: index must be absent or #0
+// CHECK-ERROR-NEXT:         stxrb w2, w3, [x4, #20]
 // CHECK-ERROR-NEXT:                       ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:         stlxrh w10, w11, [w2]
@@ -1831,16 +1841,16 @@
         sturh w17, [x1, #256]
         ldursw x20, [x1, #256]
         ldur x12, [sp, #256]
-// CHECK-ERROR: error: expected integer in range [-256, 255]
+// CHECK-ERROR: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:        ldurb w2, [sp, #256]
 // CHECK-ERROR-NEXT:                  ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         sturh w17, [x1, #256]
 // CHECK-ERROR-NEXT:                    ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldursw x20, [x1, #256]
 // CHECK-ERROR-NEXT:                     ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldur x12, [sp, #256]
 // CHECK-ERROR-NEXT:                   ^
 
@@ -1849,19 +1859,19 @@
         ldursb x9, [sp, #-257]
         ldur w2, [x30, #-257]
         stur q9, [x20, #-257]
-// CHECK-ERROR: error: expected integer in range [-256, 255]
+// CHECK-ERROR: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         stur h2, [x2, #-257]
 // CHECK-ERROR-NEXT:                  ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         stur b2, [x2, #-257]
 // CHECK-ERROR-NEXT:                  ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldursb x9, [sp, #-257]
 // CHECK-ERROR-NEXT:                    ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldur w2, [x30, #-257]
 // CHECK-ERROR-NEXT:                  ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         stur q9, [x20, #-257]
 // CHECK-ERROR-NEXT:                  ^
 
@@ -1875,12 +1885,13 @@
 //------------------------------------------------------------------------------
         ldr x3, [x4, #25], #0
         ldr x4, [x9, #0], #4
-// CHECK-ERROR: error: expected symbolic reference or integer in range [0, 32760]
+// CHECK-ERROR-AARCH64: error: {{expected symbolic reference or integer|index must be a multiple of 8}} in range [0, 32760]
+// CHECK-ERROR-ARM64: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:         ldr x3, [x4, #25], #0
 // CHECK-ERROR-NEXT:                 ^
-// CHECK-ERROR-NEXT: error: invalid operand for instruction
-// CHECK-ERROR-NEXT:         ldr x4, [x9, #0], #4
-// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-AARCH64-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-AARCH64-NEXT:         ldr x4, [x9, #0], #4
+// CHECK-ERROR-AARCH64-NEXT:                           ^
 
         strb w1, [x19], #256
         strb w9, [sp], #-257
@@ -1888,22 +1899,22 @@
         strh w9, [sp], #-257
         str w1, [x19], #256
         str w9, [sp], #-257
-// CHECK-ERROR: error: expected integer in range [-256, 255]
+// CHECK-ERROR: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         strb w1, [x19], #256
 // CHECK-ERROR-NEXT:                         ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         strb w9, [sp], #-257
 // CHECK-ERROR-NEXT:                        ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         strh w1, [x19], #256
 // CHECK-ERROR-NEXT:                         ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         strh w9, [sp], #-257
 // CHECK-ERROR-NEXT:                        ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         str w1, [x19], #256
 // CHECK-ERROR-NEXT:                        ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         str w9, [sp], #-257
 // CHECK-ERROR-NEXT:                       ^
 
@@ -1913,22 +1924,22 @@
         ldrh w9, [sp], #-257
         ldr w1, [x19], #256
         ldr w9, [sp], #-257
-// CHECK-ERROR: error: expected integer in range [-256, 255]
+// CHECK-ERROR: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldrb w1, [x19], #256
 // CHECK-ERROR-NEXT:                         ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldrb w9, [sp], #-257
 // CHECK-ERROR-NEXT:                        ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldrh w1, [x19], #256
 // CHECK-ERROR-NEXT:                         ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldrh w9, [sp], #-257
 // CHECK-ERROR-NEXT:                        ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldr w1, [x19], #256
 // CHECK-ERROR-NEXT:                        ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldr w9, [sp], #-257
 // CHECK-ERROR-NEXT:                       ^
 
@@ -1938,22 +1949,22 @@
         ldrsh x22, [x13], #-257
         ldrsw x2, [x3], #256
         ldrsw x22, [x13], #-257
-// CHECK-ERROR: error: expected integer in range [-256, 255]
+// CHECK-ERROR: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldrsb x2, [x3], #256
 // CHECK-ERROR-NEXT:                         ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldrsb x22, [x13], #-257
 // CHECK-ERROR-NEXT:                           ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldrsh x2, [x3], #256
 // CHECK-ERROR-NEXT:                         ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldrsh x22, [x13], #-257
 // CHECK-ERROR-NEXT:                           ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldrsw x2, [x3], #256
 // CHECK-ERROR-NEXT:                         ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldrsw x22, [x13], #-257
 // CHECK-ERROR-NEXT:                           ^
 
@@ -1961,16 +1972,16 @@
         ldrsb w22, [x13], #-257
         ldrsh w2, [x3], #256
         ldrsh w22, [x13], #-257
-// CHECK-ERROR: error: expected integer in range [-256, 255]
+// CHECK-ERROR: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldrsb w2, [x3], #256
 // CHECK-ERROR-NEXT:                         ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldrsb w22, [x13], #-257
 // CHECK-ERROR-NEXT:                           ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldrsh w2, [x3], #256
 // CHECK-ERROR-NEXT:                         ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldrsh w22, [x13], #-257
 // CHECK-ERROR-NEXT:                           ^
 
@@ -1984,34 +1995,34 @@
         str d3, [x13], #-257
         str q3, [x3], #256
         str q3, [x13], #-257
-// CHECK-ERROR: error: expected integer in range [-256, 255]
+// CHECK-ERROR: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         str b3, [x3], #256
 // CHECK-ERROR-NEXT:                       ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         str b3, [x13], #-257
 // CHECK-ERROR-NEXT:                        ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         str h3, [x3], #256
 // CHECK-ERROR-NEXT:                       ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         str h3, [x13], #-257
 // CHECK-ERROR-NEXT:                        ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         str s3, [x3], #256
 // CHECK-ERROR-NEXT:                       ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         str s3, [x13], #-257
 // CHECK-ERROR-NEXT:                        ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         str d3, [x3], #256
 // CHECK-ERROR-NEXT:                       ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         str d3, [x13], #-257
 // CHECK-ERROR-NEXT:                        ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         str q3, [x3], #256
 // CHECK-ERROR-NEXT:                       ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         str q3, [x13], #-257
 // CHECK-ERROR-NEXT:                        ^
 
@@ -2025,34 +2036,34 @@
         ldr d3, [x13], #-257
         ldr q3, [x3], #256
         ldr q3, [x13], #-257
-// CHECK-ERROR: error: expected integer in range [-256, 255]
+// CHECK-ERROR: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldr b3, [x3], #256
 // CHECK-ERROR-NEXT:                       ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldr b3, [x13], #-257
 // CHECK-ERROR-NEXT:                        ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldr h3, [x3], #256
 // CHECK-ERROR-NEXT:                       ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldr h3, [x13], #-257
 // CHECK-ERROR-NEXT:                        ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldr s3, [x3], #256
 // CHECK-ERROR-NEXT:                       ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldr s3, [x13], #-257
 // CHECK-ERROR-NEXT:                        ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldr d3, [x3], #256
 // CHECK-ERROR-NEXT:                       ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldr d3, [x13], #-257
 // CHECK-ERROR-NEXT:                        ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldr q3, [x3], #256
 // CHECK-ERROR-NEXT:                       ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldr q3, [x13], #-257
 // CHECK-ERROR-NEXT:                        ^
 
@@ -2074,19 +2085,19 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:         strb w1, [x19, #256]!
 // CHECK-ERROR-NEXT:                             ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         strb w9, [sp, #-257]!
 // CHECK-ERROR-NEXT:                  ^
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:         strh w1, [x19, #256]!
 // CHECK-ERROR-NEXT:                             ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         strh w9, [sp, #-257]!
 // CHECK-ERROR-NEXT:                  ^
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:         str w1, [x19, #256]!
 // CHECK-ERROR-NEXT:                            ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         str w9, [sp, #-257]!
 // CHECK-ERROR-NEXT:                 ^
 
@@ -2099,19 +2110,19 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:         ldrb w1, [x19, #256]!
 // CHECK-ERROR-NEXT:                             ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldrb w9, [sp, #-257]!
 // CHECK-ERROR-NEXT:                  ^
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:         ldrh w1, [x19, #256]!
 // CHECK-ERROR-NEXT:                             ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldrh w9, [sp, #-257]!
 // CHECK-ERROR-NEXT:                  ^
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:         ldr w1, [x19, #256]!
 // CHECK-ERROR-NEXT:                            ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldr w9, [sp, #-257]!
 // CHECK-ERROR-NEXT:                 ^
 
@@ -2124,19 +2135,19 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:         ldrsb x2, [x3, #256]!
 // CHECK-ERROR-NEXT:                             ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldrsb x22, [x13, #-257]!
 // CHECK-ERROR-NEXT:                    ^
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:         ldrsh x2, [x3, #256]!
 // CHECK-ERROR-NEXT:                             ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldrsh x22, [x13, #-257]!
 // CHECK-ERROR-NEXT:                    ^
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:         ldrsw x2, [x3, #256]!
 // CHECK-ERROR-NEXT:                             ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldrsw x22, [x13, #-257]!
 // CHECK-ERROR-NEXT:                    ^
 
@@ -2147,13 +2158,13 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:         ldrsb w2, [x3, #256]!
 // CHECK-ERROR-NEXT:                             ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldrsb w22, [x13, #-257]!
 // CHECK-ERROR-NEXT:                    ^
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:         ldrsh w2, [x3, #256]!
 // CHECK-ERROR-NEXT:                             ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldrsh w22, [x13, #-257]!
 // CHECK-ERROR-NEXT:                    ^
 
@@ -2168,25 +2179,25 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:         str b3, [x3, #256]!
 // CHECK-ERROR-NEXT:                           ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         str b3, [x13, #-257]!
 // CHECK-ERROR-NEXT:                 ^
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:         str h3, [x3, #256]!
 // CHECK-ERROR-NEXT:                           ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         str h3, [x13, #-257]!
 // CHECK-ERROR-NEXT:                 ^
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:         str s3, [x3, #256]!
 // CHECK-ERROR-NEXT:                           ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         str s3, [x13, #-257]!
 // CHECK-ERROR-NEXT:                 ^
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:         str d3, [x3, #256]!
 // CHECK-ERROR-NEXT:                           ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         str d3, [x13, #-257]!
 // CHECK-ERROR-NEXT:                 ^
 
@@ -2201,25 +2212,25 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:         ldr b3, [x3, #256]!
 // CHECK-ERROR-NEXT:                           ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldr b3, [x13, #-257]!
 // CHECK-ERROR-NEXT:                 ^
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:         ldr h3, [x3, #256]!
 // CHECK-ERROR-NEXT:                           ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldr h3, [x13, #-257]!
 // CHECK-ERROR-NEXT:                 ^
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:         ldr s3, [x3, #256]!
 // CHECK-ERROR-NEXT:                           ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldr s3, [x13, #-257]!
 // CHECK-ERROR-NEXT:                 ^
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:         ldr d3, [x3, #256]!
 // CHECK-ERROR-NEXT:                           ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldr d3, [x13, #-257]!
 // CHECK-ERROR-NEXT:                 ^
 
@@ -2231,16 +2242,16 @@
         sttrh w17, [x1, #256]
         ldtrsw x20, [x1, #256]
         ldtr x12, [sp, #256]
-// CHECK-ERROR: error: expected integer in range [-256, 255]
+// CHECK-ERROR: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:        ldtrb w2, [sp, #256]
 // CHECK-ERROR-NEXT:                  ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         sttrh w17, [x1, #256]
 // CHECK-ERROR-NEXT:                    ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldtrsw x20, [x1, #256]
 // CHECK-ERROR-NEXT:                     ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldtr x12, [sp, #256]
 // CHECK-ERROR-NEXT:                   ^
 
@@ -2255,10 +2266,10 @@
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:         sttr b2, [x2, #-257]
 // CHECK-ERROR-NEXT:              ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldtrsb x9, [sp, #-257]
 // CHECK-ERROR-NEXT:                    ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldtr w2, [x30, #-257]
 // CHECK-ERROR-NEXT:                  ^
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
@@ -2276,19 +2287,19 @@
         ldr w0, [x4, #16384]
         ldrh w2, [x21, #8192]
         ldrb w3, [x12, #4096]
-// CHECK-ERROR: error: expected integer in range [-256, 255]
+// CHECK-ERROR: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldr q0, [x11, #65536]
 // CHECK-ERROR-NEXT:                 ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldr x0, [sp, #32768]
 // CHECK-ERROR-NEXT:                 ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldr w0, [x4, #16384]
 // CHECK-ERROR-NEXT:                 ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldrh w2, [x21, #8192]
 // CHECK-ERROR-NEXT:                  ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         ldrb w3, [x12, #4096]
 // CHECK-ERROR-NEXT:                  ^
 
@@ -2296,15 +2307,15 @@
         ldr w0, [x0, #2]
         ldrsh w2, [x0, #123]
         str q0, [x0, #8]
-// CHECK-ERROR: error: too few operands for instruction
-// CHECK-ERROR-NEXT:         ldr w0, [x0, #2]
-// CHECK-ERROR-NEXT:                 ^
-// CHECK-ERROR-NEXT: error: too few operands for instruction
-// CHECK-ERROR-NEXT:         ldrsh w2, [x0, #123]
-// CHECK-ERROR-NEXT:                   ^
-// CHECK-ERROR-NEXT: error: too few operands for instruction
-// CHECK-ERROR-NEXT:         str q0, [x0, #8]
-// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-AARCH64: error: too few operands for instruction
+// CHECK-ERROR-AARCH64-NEXT:         ldr w0, [x0, #2]
+// CHECK-ERROR-AARCH64-NEXT:                 ^
+// CHECK-ERROR-AARCH64-NEXT: error: too few operands for instruction
+// CHECK-ERROR-AARCH64-NEXT:         ldrsh w2, [x0, #123]
+// CHECK-ERROR-AARCH64-NEXT:                   ^
+// CHECK-ERROR-AARCH64-NEXT: error: too few operands for instruction
+// CHECK-ERROR-AARCH64-NEXT:         str q0, [x0, #8]
+// CHECK-ERROR-AARCH64-NEXT:                 ^
 
 //// 32-bit addresses
         ldr w0, [w20]
@@ -2324,13 +2335,13 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR-NEXT: strb w0, [wsp]
 // CHECK-ERROR-NEXT:           ^
-// CHECK-ERROR: error: invalid operand for instruction
-// CHECK-ERROR-NEXT:         strh w31, [x23, #1]
-// CHECK-ERROR-NEXT:              ^
-// CHECK-ERROR-NEXT: error: too few operands for instruction
-// CHECK-ERROR-NEXT:         str x5, [x22, #12]
-// CHECK-ERROR-NEXT:                 ^
-// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-AARCH64: error: invalid operand for instruction
+// CHECK-ERROR-AARCH64-NEXT:         strh w31, [x23, #1]
+// CHECK-ERROR-AARCH64-NEXT:              ^
+// CHECK-ERROR-AARCH64-NEXT: error: too few operands for instruction
+// CHECK-ERROR-AARCH64-NEXT:         str x5, [x22, #12]
+// CHECK-ERROR-AARCH64-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: {{expected|index must be an}} integer in range [-256, 255]
 // CHECK-ERROR-NEXT:         str w7, [x12, #16384]
 // CHECK-ERROR-NEXT:                 ^
 
@@ -2339,16 +2350,19 @@
         prfm #32, [sp, #8]
         prfm pldl1strm, [w3, #8]
         prfm wibble, [sp]
-// CHECK-ERROR: error: Invalid immediate for instruction
+// CHECK-ERROR-AARCH64: error: Invalid immediate for instruction
+// CHECK-ERROR-ARM64: error: prefetch operand out of range, [0,31] expected
 // CHECK-ERROR-NEXT:        prfm #-1, [sp]
 // CHECK-ERROR-NEXT:             ^
-// CHECK-ERROR-NEXT: error: Invalid immediate for instruction
+// CHECK-ERROR-AARCH64-NEXT: error: Invalid immediate for instruction
+// CHECK-ERROR-ARM64-NEXT: error: prefetch operand out of range, [0,31] expected
 // CHECK-ERROR-NEXT:        prfm #32, [sp, #8]
 // CHECK-ERROR-NEXT:             ^
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:        prfm pldl1strm, [w3, #8]
 // CHECK-ERROR-NEXT:                         ^
-// CHECK-ERROR-NEXT: error: operand specifier not recognised
+// CHECK-ERROR-AARCH64-NEXT: error: operand specifier not recognised
+// CHECK-ERROR-ARM64-NEXT: error: pre-fetch hint expected
 // CHECK-ERROR-NEXT:        prfm wibble, [sp]
 // CHECK-ERROR-NEXT:             ^
 
@@ -2431,10 +2445,12 @@
 // CHECK-ERROR-NEXT: error: expected integer shift amount
 // CHECK-ERROR-NEXT:         ldr q5, [sp, x2, lsl #-1]
 // CHECK-ERROR-NEXT:                               ^
-// CHECK-ERROR-NEXT: error: expected 'lsl' or 'sxtw' with optional shift of #0 or #4
+// CHECK-ERROR-AARCH64-NEXT: error: expected 'lsl' or 'sxtw' with optional shift of #0 or #4
+// CHECK-ERROR-ARM64-NEXT: error: expected 'uxtw' or 'sxtw' with optional shift of #0 or #4
 // CHECK-ERROR-NEXT:         ldr q10, [x20, w4, uxtw #2]
 // CHECK-ERROR-NEXT:                  ^
-// CHECK-ERROR-NEXT: error: expected 'lsl' or 'sxtw' with optional shift of #0 or #4
+// CHECK-ERROR-AARCH64-NEXT: error: expected 'lsl' or 'sxtw' with optional shift of #0 or #4
+// CHECK-ERROR-ARM64-NEXT: error: expected 'uxtw' or 'sxtw' with optional shift of #0 or #4
 // CHECK-ERROR-NEXT:         str q21, [x20, w4, uxtw #5]
 // CHECK-ERROR-NEXT:                  ^
 
@@ -2446,16 +2462,16 @@
         stp w9, w10, [x5, #256]
         ldp w11, w12, [x9, #-260]
         stp wsp, w9, [sp]
-// CHECK-ERROR: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR: error: {{expected integer|index must be a}} multiple of 4 in range [-256, 252]
 // CHECK-ERROR-NEXT:         ldp w3, w2, [x4, #1]
 // CHECK-ERROR-NEXT:                          ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 4 in range [-256, 252]
 // CHECK-ERROR-NEXT:         stp w1, w2, [x3, #253]
 // CHECK-ERROR-NEXT:                     ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 4 in range [-256, 252]
 // CHECK-ERROR-NEXT:         stp w9, w10, [x5, #256]
 // CHECK-ERROR-NEXT:                      ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 4 in range [-256, 252]
 // CHECK-ERROR-NEXT:         ldp w11, w12, [x9, #-260]
 // CHECK-ERROR-NEXT:                       ^
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
@@ -2465,26 +2481,26 @@
         ldpsw x9, x2, [sp, #2]
         ldpsw x1, x2, [x10, #256]
         ldpsw x3, x4, [x11, #-260]
-// CHECK-ERROR: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR: error: {{expected integer|index must be a}} multiple of 4 in range [-256, 252]
 // CHECK-ERROR-NEXT:         ldpsw x9, x2, [sp, #2]
 // CHECK-ERROR-NEXT:                       ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 4 in range [-256, 252]
 // CHECK-ERROR-NEXT:         ldpsw x1, x2, [x10, #256]
 // CHECK-ERROR-NEXT:                       ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 4 in range [-256, 252]
 // CHECK-ERROR-NEXT:         ldpsw x3, x4, [x11, #-260]
 // CHECK-ERROR-NEXT:                       ^
 
         ldp x2, x5, [sp, #4]
         ldp x5, x6, [x9, #512]
         stp x7, x8, [x10, #-520]
-// CHECK-ERROR: error: expected integer multiple of 8 in range [-512, 504]
+// CHECK-ERROR: error: {{expected integer|index must be a}} multiple of 8 in range [-512, 504]
 // CHECK-ERROR-NEXT:         ldp x2, x5, [sp, #4]
 // CHECK-ERROR-NEXT:                     ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 8 in range [-512, 504]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 8 in range [-512, 504]
 // CHECK-ERROR-NEXT:         ldp x5, x6, [x9, #512]
 // CHECK-ERROR-NEXT:                     ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 8 in range [-512, 504]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 8 in range [-512, 504]
 // CHECK-ERROR-NEXT:         stp x7, x8, [x10, #-520]
 // CHECK-ERROR-NEXT:                     ^
 
@@ -2500,13 +2516,13 @@
         stp s3, s5, [sp, #-2]
         ldp s6, s26, [x4, #-260]
         stp s13, s19, [x5, #256]
-// CHECK-ERROR: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR: error: {{expected integer|index must be a}} multiple of 4 in range [-256, 252]
 // CHECK-ERROR-NEXT:         stp s3, s5, [sp, #-2]
 // CHECK-ERROR-NEXT:                     ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 4 in range [-256, 252]
 // CHECK-ERROR-NEXT:         ldp s6, s26, [x4, #-260]
 // CHECK-ERROR-NEXT:                      ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 4 in range [-256, 252]
 // CHECK-ERROR-NEXT:         stp s13, s19, [x5, #256]
 // CHECK-ERROR-NEXT:                       ^
 
@@ -2516,10 +2532,10 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:         ldp d3, d4, [xzr]
 // CHECK-ERROR-NEXT:                      ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 8 in range [-512, 504]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 8 in range [-512, 504]
 // CHECK-ERROR-NEXT:         ldp d5, d6, [x0, #512]
 // CHECK-ERROR-NEXT:                     ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 8 in range [-512, 504]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 8 in range [-512, 504]
 // CHECK-ERROR-NEXT:         stp d7, d8, [x0, #-520]
 // CHECK-ERROR-NEXT:                     ^
 
@@ -2530,13 +2546,13 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:         ldp d3, q2, [sp]
 // CHECK-ERROR-NEXT:                 ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 16 in range [-1024, 1008]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 16 in range [-1024, 1008]
 // CHECK-ERROR-NEXT:         ldp q3, q5, [sp, #8]
 // CHECK-ERROR-NEXT:                     ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 16 in range [-1024, 1008]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 16 in range [-1024, 1008]
 // CHECK-ERROR-NEXT:         stp q20, q25, [x5, #1024]
 // CHECK-ERROR-NEXT:                       ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 16 in range [-1024, 1008]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 16 in range [-1024, 1008]
 // CHECK-ERROR-NEXT:         ldp q30, q15, [x23, #-1040]
 // CHECK-ERROR-NEXT:                       ^
 
@@ -2549,16 +2565,16 @@
         stp w9, w10, [x5], #256
         ldp w11, w12, [x9], #-260
         stp wsp, w9, [sp], #0
-// CHECK-ERROR: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR: error: {{expected integer|index must be a}} multiple of 4 in range [-256, 252]
 // CHECK-ERROR-NEXT:         ldp w3, w2, [x4], #1
 // CHECK-ERROR-NEXT:                           ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 4 in range [-256, 252]
 // CHECK-ERROR-NEXT:         stp w1, w2, [x3], #253
 // CHECK-ERROR-NEXT:                     ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 4 in range [-256, 252]
 // CHECK-ERROR-NEXT:         stp w9, w10, [x5], #256
 // CHECK-ERROR-NEXT:                      ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 4 in range [-256, 252]
 // CHECK-ERROR-NEXT:         ldp w11, w12, [x9], #-260
 // CHECK-ERROR-NEXT:                       ^
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
@@ -2568,26 +2584,26 @@
         ldpsw x9, x2, [sp], #2
         ldpsw x1, x2, [x10], #256
         ldpsw x3, x4, [x11], #-260
-// CHECK-ERROR: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR: error: {{expected integer|index must be a}} multiple of 4 in range [-256, 252]
 // CHECK-ERROR-NEXT:         ldpsw x9, x2, [sp], #2
 // CHECK-ERROR-NEXT:                       ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 4 in range [-256, 252]
 // CHECK-ERROR-NEXT:         ldpsw x1, x2, [x10], #256
 // CHECK-ERROR-NEXT:                       ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 4 in range [-256, 252]
 // CHECK-ERROR-NEXT:         ldpsw x3, x4, [x11], #-260
 // CHECK-ERROR-NEXT:                       ^
 
         ldp x2, x5, [sp], #4
         ldp x5, x6, [x9], #512
         stp x7, x8, [x10], #-520
-// CHECK-ERROR: error: expected integer multiple of 8 in range [-512, 504]
+// CHECK-ERROR: error: {{expected integer|index must be a}} multiple of 8 in range [-512, 504]
 // CHECK-ERROR-NEXT:         ldp x2, x5, [sp], #4
 // CHECK-ERROR-NEXT:                           ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 8 in range [-512, 504]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 8 in range [-512, 504]
 // CHECK-ERROR-NEXT:         ldp x5, x6, [x9], #512
 // CHECK-ERROR-NEXT:                           ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 8 in range [-512, 504]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 8 in range [-512, 504]
 // CHECK-ERROR-NEXT:         stp x7, x8, [x10], #-520
 // CHECK-ERROR-NEXT:                            ^
 
@@ -2603,13 +2619,13 @@
         stp s3, s5, [sp], #-2
         ldp s6, s26, [x4], #-260
         stp s13, s19, [x5], #256
-// CHECK-ERROR: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR: error: {{expected integer|index must be a}} multiple of 4 in range [-256, 252]
 // CHECK-ERROR-NEXT:         stp s3, s5, [sp], #-2
 // CHECK-ERROR-NEXT:                     ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 4 in range [-256, 252]
 // CHECK-ERROR-NEXT:         ldp s6, s26, [x4], #-260
 // CHECK-ERROR-NEXT:                      ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 4 in range [-256, 252]
 // CHECK-ERROR-NEXT:         stp s13, s19, [x5], #256
 // CHECK-ERROR-NEXT:                       ^
 
@@ -2619,10 +2635,10 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:         ldp d3, d4, [xzr], #0
 // CHECK-ERROR-NEXT:                      ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 8 in range [-512, 504]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 8 in range [-512, 504]
 // CHECK-ERROR-NEXT:         ldp d5, d6, [x0], #512
 // CHECK-ERROR-NEXT:                     ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 8 in range [-512, 504]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 8 in range [-512, 504]
 // CHECK-ERROR-NEXT:         stp d7, d8, [x0], #-520
 // CHECK-ERROR-NEXT:                     ^
 
@@ -2633,13 +2649,13 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:         ldp d3, q2, [sp], #0
 // CHECK-ERROR-NEXT:                 ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 16 in range [-1024, 1008]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 16 in range [-1024, 1008]
 // CHECK-ERROR-NEXT:         ldp q3, q5, [sp], #8
 // CHECK-ERROR-NEXT:                     ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 16 in range [-1024, 1008]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 16 in range [-1024, 1008]
 // CHECK-ERROR-NEXT:         stp q20, q25, [x5], #1024
 // CHECK-ERROR-NEXT:                       ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 16 in range [-1024, 1008]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 16 in range [-1024, 1008]
 // CHECK-ERROR-NEXT:         ldp q30, q15, [x23], #-1040
 // CHECK-ERROR-NEXT:                       ^
 
@@ -2652,16 +2668,16 @@
         stp w9, w10, [x5, #256]!
         ldp w11, w12, [x9, #-260]!
         stp wsp, w9, [sp, #0]!
-// CHECK-ERROR: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR: error: {{expected integer|index must be a}} multiple of 4 in range [-256, 252]
 // CHECK-ERROR-NEXT:         ldp w3, w2, [x4, #1]!
 // CHECK-ERROR-NEXT:                           ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 4 in range [-256, 252]
 // CHECK-ERROR-NEXT:         stp w1, w2, [x3, #253]!
 // CHECK-ERROR-NEXT:                     ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 4 in range [-256, 252]
 // CHECK-ERROR-NEXT:         stp w9, w10, [x5, #256]!
 // CHECK-ERROR-NEXT:                      ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 4 in range [-256, 252]
 // CHECK-ERROR-NEXT:         ldp w11, w12, [x9, #-260]!
 // CHECK-ERROR-NEXT:                       ^
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
@@ -2671,26 +2687,26 @@
         ldpsw x9, x2, [sp, #2]!
         ldpsw x1, x2, [x10, #256]!
         ldpsw x3, x4, [x11, #-260]!
-// CHECK-ERROR: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR: error: {{expected integer|index must be a}} multiple of 4 in range [-256, 252]
 // CHECK-ERROR-NEXT:         ldpsw x9, x2, [sp, #2]!
 // CHECK-ERROR-NEXT:                       ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 4 in range [-256, 252]
 // CHECK-ERROR-NEXT:         ldpsw x1, x2, [x10, #256]!
 // CHECK-ERROR-NEXT:                       ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 4 in range [-256, 252]
 // CHECK-ERROR-NEXT:         ldpsw x3, x4, [x11, #-260]!
 // CHECK-ERROR-NEXT:                       ^
 
         ldp x2, x5, [sp, #4]!
         ldp x5, x6, [x9, #512]!
         stp x7, x8, [x10, #-520]!
-// CHECK-ERROR: error: expected integer multiple of 8 in range [-512, 504]
+// CHECK-ERROR: error: {{expected integer|index must be a}} multiple of 8 in range [-512, 504]
 // CHECK-ERROR-NEXT:         ldp x2, x5, [sp, #4]!
 // CHECK-ERROR-NEXT:                     ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 8 in range [-512, 504]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 8 in range [-512, 504]
 // CHECK-ERROR-NEXT:         ldp x5, x6, [x9, #512]!
 // CHECK-ERROR-NEXT:                     ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 8 in range [-512, 504]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 8 in range [-512, 504]
 // CHECK-ERROR-NEXT:         stp x7, x8, [x10, #-520]!
 // CHECK-ERROR-NEXT:                     ^
 
@@ -2706,13 +2722,13 @@
         stp s3, s5, [sp, #-2]!
         ldp s6, s26, [x4, #-260]!
         stp s13, s19, [x5, #256]!
-// CHECK-ERROR: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR: error: {{expected integer|index must be a}} multiple of 4 in range [-256, 252]
 // CHECK-ERROR-NEXT:         stp s3, s5, [sp, #-2]!
 // CHECK-ERROR-NEXT:                     ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 4 in range [-256, 252]
 // CHECK-ERROR-NEXT:         ldp s6, s26, [x4, #-260]!
 // CHECK-ERROR-NEXT:                      ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 4 in range [-256, 252]
 // CHECK-ERROR-NEXT:         stp s13, s19, [x5, #256]!
 // CHECK-ERROR-NEXT:                       ^
 
@@ -2722,10 +2738,10 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:         ldp d3, d4, [xzr, #0]!
 // CHECK-ERROR-NEXT:                      ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 8 in range [-512, 504]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 8 in range [-512, 504]
 // CHECK-ERROR-NEXT:         ldp d5, d6, [x0, #512]!
 // CHECK-ERROR-NEXT:                     ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 8 in range [-512, 504]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 8 in range [-512, 504]
 // CHECK-ERROR-NEXT:         stp d7, d8, [x0, #-520]!
 // CHECK-ERROR-NEXT:                     ^
 
@@ -2736,13 +2752,13 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:         ldp d3, q2, [sp, #0]!
 // CHECK-ERROR-NEXT:                 ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 16 in range [-1024, 1008]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 16 in range [-1024, 1008]
 // CHECK-ERROR-NEXT:         ldp q3, q5, [sp, #8]!
 // CHECK-ERROR-NEXT:                     ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 16 in range [-1024, 1008]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 16 in range [-1024, 1008]
 // CHECK-ERROR-NEXT:         stp q20, q25, [x5, #1024]!
 // CHECK-ERROR-NEXT:                       ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 16 in range [-1024, 1008]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 16 in range [-1024, 1008]
 // CHECK-ERROR-NEXT:         ldp q30, q15, [x23, #-1040]!
 // CHECK-ERROR-NEXT:                       ^
 
@@ -2754,16 +2770,16 @@
         stnp w9, w10, [x5, #256]
         ldnp w11, w12, [x9, #-260]
         stnp wsp, w9, [sp]
-// CHECK-ERROR: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR: error: {{expected integer|index must be a}} multiple of 4 in range [-256, 252]
 // CHECK-ERROR-NEXT:         ldnp w3, w2, [x4, #1]
 // CHECK-ERROR-NEXT:                           ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 4 in range [-256, 252]
 // CHECK-ERROR-NEXT:         stnp w1, w2, [x3, #253]
 // CHECK-ERROR-NEXT:                           ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 4 in range [-256, 252]
 // CHECK-ERROR-NEXT:         stnp w9, w10, [x5, #256]
 // CHECK-ERROR-NEXT:                            ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 4 in range [-256, 252]
 // CHECK-ERROR-NEXT:         ldnp w11, w12, [x9, #-260]
 // CHECK-ERROR-NEXT:                             ^
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
@@ -2773,13 +2789,13 @@
         ldnp x2, x5, [sp, #4]
         ldnp x5, x6, [x9, #512]
         stnp x7, x8, [x10, #-520]
-// CHECK-ERROR: error: expected integer multiple of 8 in range [-512, 504]
+// CHECK-ERROR: error: {{expected integer|index must be a}} multiple of 8 in range [-512, 504]
 // CHECK-ERROR-NEXT:         ldnp x2, x5, [sp, #4]
 // CHECK-ERROR-NEXT:                           ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 8 in range [-512, 504]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 8 in range [-512, 504]
 // CHECK-ERROR-NEXT:         ldnp x5, x6, [x9, #512]
 // CHECK-ERROR-NEXT:                           ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 8 in range [-512, 504]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 8 in range [-512, 504]
 // CHECK-ERROR-NEXT:         stnp x7, x8, [x10, #-520]
 // CHECK-ERROR-NEXT:                            ^
 
@@ -2795,13 +2811,13 @@
         stnp s3, s5, [sp, #-2]
         ldnp s6, s26, [x4, #-260]
         stnp s13, s19, [x5, #256]
-// CHECK-ERROR: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR: error: {{expected integer|index must be a}} multiple of 4 in range [-256, 252]
 // CHECK-ERROR-NEXT:         stnp s3, s5, [sp, #-2]
 // CHECK-ERROR-NEXT:                     ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 4 in range [-256, 252]
 // CHECK-ERROR-NEXT:         ldnp s6, s26, [x4, #-260]
 // CHECK-ERROR-NEXT:                      ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 4 in range [-256, 252]
 // CHECK-ERROR-NEXT:         stnp s13, s19, [x5, #256]
 // CHECK-ERROR-NEXT:                       ^
 
@@ -2811,10 +2827,10 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:         ldnp d3, d4, [xzr]
 // CHECK-ERROR-NEXT:                      ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 8 in range [-512, 504]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 8 in range [-512, 504]
 // CHECK-ERROR-NEXT:         ldnp d5, d6, [x0, #512]
 // CHECK-ERROR-NEXT:                     ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 8 in range [-512, 504]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 8 in range [-512, 504]
 // CHECK-ERROR-NEXT:         stnp d7, d8, [x0, #-520]
 // CHECK-ERROR-NEXT:                     ^
 
@@ -2825,13 +2841,13 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:         ldnp d3, q2, [sp]
 // CHECK-ERROR-NEXT:                 ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 16 in range [-1024, 1008]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 16 in range [-1024, 1008]
 // CHECK-ERROR-NEXT:         ldnp q3, q5, [sp, #8]
 // CHECK-ERROR-NEXT:                     ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 16 in range [-1024, 1008]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 16 in range [-1024, 1008]
 // CHECK-ERROR-NEXT:         stnp q20, q25, [x5, #1024]
 // CHECK-ERROR-NEXT:                       ^
-// CHECK-ERROR-NEXT: error: expected integer multiple of 16 in range [-1024, 1008]
+// CHECK-ERROR-NEXT: error: {{expected integer|index must be a}} multiple of 16 in range [-1024, 1008]
 // CHECK-ERROR-NEXT:         ldnp q30, q15, [x23, #-1040]
 // CHECK-ERROR-NEXT:                       ^
 
@@ -2974,28 +2990,32 @@
         movz x3, #-1
         movk w3, #1, lsl #32
         movn x2, #12, lsl #64
-// CHECK-ERROR: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR: error: {{expected relocated symbol or|immediate must be an}} integer in range [0, 65535]
 // CHECK-ERROR-NEXT:         movz w3, #65536, lsl #0
 // CHECK-ERROR-NEXT:                  ^
-// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT: error: {{expected relocated symbol or|immediate must be an}} integer in range [0, 65535]
 // CHECK-ERROR-NEXT:         movz w4, #65536
 // CHECK-ERROR-NEXT:                  ^
-// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-AARCH64-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-ARM64-NEXT: error: expected 'lsl' with optional integer 0 or 16
 // CHECK-ERROR-NEXT:         movn w1, #2, lsl #1
 // CHECK-ERROR-NEXT:                  ^
-// CHECK-ERROR-NEXT: error: only 'lsl #+N' valid after immediate
+// CHECK-ERROR-AARCH64-NEXT: error: only 'lsl #+N' valid after immediate
+// CHECK-ERROR-ARM64-NEXT: error: expected integer shift amount
 // CHECK-ERROR-NEXT:         movk w3, #0, lsl #-1
 // CHECK-ERROR-NEXT:                           ^
-// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT: error: {{expected relocated symbol or|immediate must be an}} integer in range [0, 65535]
 // CHECK-ERROR-NEXT:         movn w2, #-1, lsl #0
 // CHECK-ERROR-NEXT:                  ^
-// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT: error: {{expected relocated symbol or|immediate must be an}} integer in range [0, 65535]
 // CHECK-ERROR-NEXT:         movz x3, #-1
 // CHECK-ERROR-NEXT:                  ^
-// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-AARCH64-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-ARM64-NEXT: error: expected 'lsl' with optional integer 0 or 16
 // CHECK-ERROR-NEXT:         movk w3, #1, lsl #32
 // CHECK-ERROR-NEXT:                  ^
-// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-AARCH64-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-ARM64-NEXT: error: expected 'lsl' with optional integer 0, 16, 32 or 48
 // CHECK-ERROR-NEXT:         movn x2, #12, lsl #64
 // CHECK-ERROR-NEXT:                  ^
 
@@ -3005,22 +3025,22 @@
         movk w3, #:abs_g0:sym
         movz x3, #:abs_g0_nc:sym
         movn x4, #:abs_g0_nc:sym
-// CHECK-ERROR: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR: error: {{expected relocated symbol or|immediate must be an}} integer in range [0, 65535]
 // CHECK-ERROR-NEXT:         movz x12, #:abs_g0:sym, lsl #16
 // CHECK-ERROR-NEXT:                                 ^
-// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT: error: {{expected relocated symbol or|immediate must be an}} integer in range [0, 65535]
 // CHECK-ERROR-NEXT:         movz x12, #:abs_g0:sym, lsl #0
 // CHECK-ERROR-NEXT:                                 ^
-// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
-// CHECK-ERROR-NEXT:         movn x2, #:abs_g0:sym
-// CHECK-ERROR-NEXT:                  ^
-// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-AARCH64-NEXT: error: {{expected relocated symbol or|immediate must be an}} integer in range [0, 65535]
+// CHECK-ERROR-AARCH64-NEXT:         movn x2, #:abs_g0:sym
+// CHECK-ERROR-AARCH64-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: {{expected relocated symbol or|immediate must be an}} integer in range [0, 65535]
 // CHECK-ERROR-NEXT:         movk w3, #:abs_g0:sym
 // CHECK-ERROR-NEXT:                  ^
-// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT: error: {{expected relocated symbol or|immediate must be an}} integer in range [0, 65535]
 // CHECK-ERROR-NEXT:         movz x3, #:abs_g0_nc:sym
 // CHECK-ERROR-NEXT:                  ^
-// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT: error: {{expected relocated symbol or|immediate must be an}} integer in range [0, 65535]
 // CHECK-ERROR-NEXT:         movn x4, #:abs_g0_nc:sym
 // CHECK-ERROR-NEXT:                  ^
 
@@ -3028,16 +3048,16 @@
         movk w3, #:abs_g1:sym
         movz x3, #:abs_g1_nc:sym
         movn x4, #:abs_g1_nc:sym
-// CHECK-ERROR: error: expected relocated symbol or integer in range [0, 65535]
-// CHECK-ERROR-NEXT:         movn x2, #:abs_g1:sym
-// CHECK-ERROR-NEXT:                  ^
-// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-AARCH64: error: {{expected relocated symbol or|immediate must be an}} integer in range [0, 65535]
+// CHECK-ERROR-AARCH64-NEXT:         movn x2, #:abs_g1:sym
+// CHECK-ERROR-AARCH64-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: {{expected relocated symbol or|immediate must be an}} integer in range [0, 65535]
 // CHECK-ERROR-NEXT:         movk w3, #:abs_g1:sym
 // CHECK-ERROR-NEXT:                  ^
-// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT: error: {{expected relocated symbol or|immediate must be an}} integer in range [0, 65535]
 // CHECK-ERROR-NEXT:         movz x3, #:abs_g1_nc:sym
 // CHECK-ERROR-NEXT:                  ^
-// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT: error: {{expected relocated symbol or|immediate must be an}} integer in range [0, 65535]
 // CHECK-ERROR-NEXT:         movn x4, #:abs_g1_nc:sym
 // CHECK-ERROR-NEXT:                  ^
 
@@ -3047,53 +3067,53 @@
         movk w3, #:abs_g2_nc:sym
         movz x13, #:abs_g2_nc:sym
         movn x24, #:abs_g2_nc:sym
-// CHECK-ERROR: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR: error: {{expected relocated symbol or|immediate must be an}} integer in range [0, 65535]
 // CHECK-ERROR-NEXT:         movz w12, #:abs_g2:sym
 // CHECK-ERROR-NEXT:                   ^
-// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
-// CHECK-ERROR-NEXT:         movn x12, #:abs_g2:sym
-// CHECK-ERROR-NEXT:                   ^
-// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-AARCH64-NEXT: error: {{expected relocated symbol or|immediate must be an}} integer in range [0, 65535]
+// CHECK-ERROR-AARCH64-NEXT:         movn x12, #:abs_g2:sym
+// CHECK-ERROR-AARCH64-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: {{expected relocated symbol or|immediate must be an}} integer in range [0, 65535]
 // CHECK-ERROR-NEXT:         movk x13, #:abs_g2:sym
 // CHECK-ERROR-NEXT:                   ^
-// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT: error: {{expected relocated symbol or|immediate must be an}} integer in range [0, 65535]
 // CHECK-ERROR-NEXT:         movk w3, #:abs_g2_nc:sym
 // CHECK-ERROR-NEXT:                  ^
-// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT: error: {{expected relocated symbol or|immediate must be an}} integer in range [0, 65535]
 // CHECK-ERROR-NEXT:         movz x13, #:abs_g2_nc:sym
 // CHECK-ERROR-NEXT:                   ^
-// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT: error: {{expected relocated symbol or|immediate must be an}} integer in range [0, 65535]
 // CHECK-ERROR-NEXT:         movn x24, #:abs_g2_nc:sym
 // CHECK-ERROR-NEXT:                   ^
 
         movn x19, #:abs_g3:sym
         movz w20, #:abs_g3:sym
         movk w21, #:abs_g3:sym
-// CHECK-ERROR: error: expected relocated symbol or integer in range [0, 65535]
-// CHECK-ERROR-NEXT:         movn x19, #:abs_g3:sym
-// CHECK-ERROR-NEXT:                   ^
-// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-AARCH64: error: {{expected relocated symbol or|immediate must be an}} integer in range [0, 65535]
+// CHECK-ERROR-AARCH64-NEXT:         movn x19, #:abs_g3:sym
+// CHECK-ERROR-AARCH64-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: {{expected relocated symbol or|immediate must be an}} integer in range [0, 65535]
 // CHECK-ERROR-NEXT:         movz w20, #:abs_g3:sym
 // CHECK-ERROR-NEXT:                   ^
-// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT: error: {{expected relocated symbol or|immediate must be an}} integer in range [0, 65535]
 // CHECK-ERROR-NEXT:         movk w21, #:abs_g3:sym
 // CHECK-ERROR-NEXT:                   ^
 
         movk x19, #:abs_g0_s:sym
         movk w23, #:abs_g0_s:sym
-// CHECK-ERROR: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR: error: {{expected relocated symbol or|immediate must be an}} integer in range [0, 65535]
 // CHECK-ERROR-NEXT:         movk x19, #:abs_g0_s:sym
 // CHECK-ERROR-NEXT:                   ^
-// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT: error: {{expected relocated symbol or|immediate must be an}} integer in range [0, 65535]
 // CHECK-ERROR-NEXT:         movk w23, #:abs_g0_s:sym
 // CHECK-ERROR-NEXT:                   ^
 
         movk x19, #:abs_g1_s:sym
         movk w23, #:abs_g1_s:sym
-// CHECK-ERROR: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR: error: {{expected relocated symbol or|immediate must be an}} integer in range [0, 65535]
 // CHECK-ERROR-NEXT:         movk x19, #:abs_g1_s:sym
 // CHECK-ERROR-NEXT:                   ^
-// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT: error: {{expected relocated symbol or|immediate must be an}} integer in range [0, 65535]
 // CHECK-ERROR-NEXT:         movk w23, #:abs_g1_s:sym
 // CHECK-ERROR-NEXT:                   ^
 
@@ -3101,16 +3121,16 @@
         movn w29, #:abs_g2_s:sym
         movk x19, #:abs_g2_s:sym
         movk w23, #:abs_g2_s:sym
-// CHECK-ERROR: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR: error: {{expected relocated symbol or|immediate must be an}} integer in range [0, 65535]
 // CHECK-ERROR-NEXT:         movz w2, #:abs_g2_s:sym
 // CHECK-ERROR-NEXT:                    ^
-// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT: error: {{expected relocated symbol or|immediate must be an}} integer in range [0, 65535]
 // CHECK-ERROR-NEXT:         movn w29, #:abs_g2_s:sym
 // CHECK-ERROR-NEXT:                   ^
-// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT: error: {{expected relocated symbol or|immediate must be an}} integer in range [0, 65535]
 // CHECK-ERROR-NEXT:         movk x19, #:abs_g2_s:sym
 // CHECK-ERROR-NEXT:                   ^
-// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT: error: {{expected relocated symbol or|immediate must be an}} integer in range [0, 65535]
 // CHECK-ERROR-NEXT:         movk w23, #:abs_g2_s:sym
 // CHECK-ERROR-NEXT:                   ^
 
@@ -3154,19 +3174,19 @@
 
         hint #-1
         hint #128
-// CHECK-ERROR: error: expected integer in range [0, 127]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [0, 127]
 // CHECK-ERROR-NEXT:         hint #-1
 // CHECK-ERROR-NEXT:              ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 127]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 127]
 // CHECK-ERROR-NEXT:         hint #128
 // CHECK-ERROR-NEXT:              ^
 
         clrex #-1
         clrex #16
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 15]
 // CHECK-ERROR-NEXT:         clrex #-1
 // CHECK-ERROR-NEXT:               ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 15]
 // CHECK-ERROR-NEXT:         clrex #16
 // CHECK-ERROR-NEXT:               ^
 
@@ -3174,25 +3194,25 @@
         dsb #16
         dmb #-1
         dmb #16
-// CHECK-ERROR-NEXT: error: Invalid immediate for instruction
+// CHECK-ERROR-NEXT: error: {{Invalid immediate for instruction|barrier operand out of range}}
 // CHECK-ERROR-NEXT:         dsb #-1
 // CHECK-ERROR-NEXT:             ^
-// CHECK-ERROR-NEXT: error: Invalid immediate for instruction
+// CHECK-ERROR-NEXT: error: {{Invalid immediate for instruction|barrier operand out of range}}
 // CHECK-ERROR-NEXT:         dsb #16
 // CHECK-ERROR-NEXT:             ^
-// CHECK-ERROR-NEXT: error: Invalid immediate for instruction
+// CHECK-ERROR-NEXT: error: {{Invalid immediate for instruction|barrier operand out of range}}
 // CHECK-ERROR-NEXT:         dmb #-1
 // CHECK-ERROR-NEXT:             ^
-// CHECK-ERROR-NEXT: error: Invalid immediate for instruction
+// CHECK-ERROR-NEXT: error: {{Invalid immediate for instruction|barrier operand out of range}}
 // CHECK-ERROR-NEXT:         dmb #16
 // CHECK-ERROR-NEXT:             ^
 
         isb #-1
         isb #16
-// CHECK-ERROR-NEXT: error: Invalid immediate for instruction
+// CHECK-ERROR-NEXT: error: {{Invalid immediate for instruction|barrier operand out of range}}
 // CHECK-ERROR-NEXT:         isb #-1
 // CHECK-ERROR-NEXT:             ^
-// CHECK-ERROR-NEXT: error: Invalid immediate for instruction
+// CHECK-ERROR-NEXT: error: {{Invalid immediate for instruction|barrier operand out of range}}
 // CHECK-ERROR-NEXT:         isb #16
 // CHECK-ERROR-NEXT:             ^
 
@@ -3200,16 +3220,16 @@
         msr spsel, #-1
         msr spsel #-1
         msr daifclr, #16
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 15]
 // CHECK-ERROR-NEXT:         msr daifset, x4
 // CHECK-ERROR-NEXT:                      ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 15]
 // CHECK-ERROR-NEXT:         msr spsel, #-1
 // CHECK-ERROR-NEXT:                    ^
-// CHECK-ERROR-NEXT: error: expected comma before next operand
+// CHECK-ERROR-NEXT: error: {{expected comma before next operand|unexpected token in argument list}}
 // CHECK-ERROR-NEXT:         msr spsel #-1
 // CHECK-ERROR-NEXT:                   ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 15]
 // CHECK-ERROR-NEXT:         msr daifclr, #16
 // CHECK-ERROR-NEXT:                      ^
 
@@ -3221,7 +3241,7 @@
         sysl x13, #3, c16, c2, #3
         sysl x9, #2, c11, c16, #5
         sysl x4, #4, c9, c8, #8
-// CHECK-ERROR-NEXT: error:  expected integer in range [0, 7]
+// CHECK-ERROR-NEXT: error:  {{expected|immediate must be an}} integer in range [0, 7]
 // CHECK-ERROR-NEXT:         sys #8, c1, c2, #7, x9
 // CHECK-ERROR-NEXT:             ^
 // CHECK-ERROR-NEXT: error: Expected cN operand where 0 <= N <= 15
@@ -3230,10 +3250,10 @@
 // CHECK-ERROR-NEXT: error: Expected cN operand where 0 <= N <= 15
 // CHECK-ERROR-NEXT:         sys #2, c11, c16, #5
 // CHECK-ERROR-NEXT:                      ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 7]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 7]
 // CHECK-ERROR-NEXT:         sys #4, c9, c8, #8, xzr
 // CHECK-ERROR-NEXT:                         ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 7]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 7]
 // CHECK-ERROR-NEXT:         sysl x11, #8, c1, c2, #7
 // CHECK-ERROR-NEXT:                   ^
 // CHECK-ERROR-NEXT: error: Expected cN operand where 0 <= N <= 15
@@ -3242,20 +3262,21 @@
 // CHECK-ERROR-NEXT: error: Expected cN operand where 0 <= N <= 15
 // CHECK-ERROR-NEXT:         sysl x9, #2, c11, c16, #5
 // CHECK-ERROR-NEXT:                           ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 7]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 7]
 // CHECK-ERROR-NEXT:         sysl x4, #4, c9, c8, #8
 // CHECK-ERROR-NEXT:                              ^
 
         ic ialluis, x2
         ic allu, x7
         ic ivau
-// CHECK-ERROR-NEXT: error: specified IC op does not use a register
+// CHECK-ERROR-NEXT: error: specified {{IC|ic}} op does not use a register
 // CHECK-ERROR-NEXT:         ic ialluis, x2
 // CHECK-ERROR-NEXT:                     ^
-// CHECK-ERROR-NEXT: error: operand specifier not recognised
+// CHECK-ERROR-AARCH64-NEXT: error: operand specifier not recognised
+// CHECK-ERROR-ARM64-NEXT: error: invalid operand for IC instruction
 // CHECK-ERROR-NEXT:         ic allu, x7
 // CHECK-ERROR-NEXT:            ^
-// CHECK-ERROR-NEXT: error: specified IC op requires a register
+// CHECK-ERROR-NEXT: error: specified {{IC|ic}} op requires a register
 // CHECK-ERROR-NEXT:         ic ivau
 // CHECK-ERROR-NEXT:            ^
 
@@ -3291,100 +3312,100 @@
         tlbi VALE3
         tlbi VMALLS12E1, x15
         tlbi VAALE1
-// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT: error: specified {{TLBI|tlbi}} op requires a register
 // CHECK-ERROR-NEXT:         tlbi IPAS2E1IS
 // CHECK-ERROR-NEXT:              ^
-// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT: error: specified {{TLBI|tlbi}} op requires a register
 // CHECK-ERROR-NEXT:         tlbi IPAS2LE1IS
 // CHECK-ERROR-NEXT:              ^
-// CHECK-ERROR-NEXT: error: specified TLBI op does not use a register
+// CHECK-ERROR-NEXT: error: specified {{TLBI|tlbi}} op does not use a register
 // CHECK-ERROR-NEXT:         tlbi VMALLE1IS, x12
 // CHECK-ERROR-NEXT:                         ^
-// CHECK-ERROR-NEXT: error: specified TLBI op does not use a register
+// CHECK-ERROR-NEXT: error: specified {{TLBI|tlbi}} op does not use a register
 // CHECK-ERROR-NEXT:         tlbi ALLE2IS, x11
 // CHECK-ERROR-NEXT:                       ^
-// CHECK-ERROR-NEXT: error: specified TLBI op does not use a register
+// CHECK-ERROR-NEXT: error: specified {{TLBI|tlbi}} op does not use a register
 // CHECK-ERROR-NEXT:         tlbi ALLE3IS, x20
 // CHECK-ERROR-NEXT:                       ^
-// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT: error: specified {{TLBI|tlbi}} op requires a register
 // CHECK-ERROR-NEXT:         tlbi VAE1IS
 // CHECK-ERROR-NEXT:              ^
-// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT: error: specified {{TLBI|tlbi}} op requires a register
 // CHECK-ERROR-NEXT:         tlbi VAE2IS
 // CHECK-ERROR-NEXT:              ^
-// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT: error: specified {{TLBI|tlbi}} op requires a register
 // CHECK-ERROR-NEXT:         tlbi VAE3IS
 // CHECK-ERROR-NEXT:              ^
-// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT: error: specified {{TLBI|tlbi}} op requires a register
 // CHECK-ERROR-NEXT:         tlbi ASIDE1IS
 // CHECK-ERROR-NEXT:              ^
-// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT: error: specified {{TLBI|tlbi}} op requires a register
 // CHECK-ERROR-NEXT:         tlbi VAAE1IS
 // CHECK-ERROR-NEXT:              ^
-// CHECK-ERROR-NEXT: error: specified TLBI op does not use a register
+// CHECK-ERROR-NEXT: error: specified {{TLBI|tlbi}} op does not use a register
 // CHECK-ERROR-NEXT:         tlbi ALLE1IS, x0
 // CHECK-ERROR-NEXT:                       ^
-// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT: error: specified {{TLBI|tlbi}} op requires a register
 // CHECK-ERROR-NEXT:         tlbi VALE1IS
 // CHECK-ERROR-NEXT:              ^
-// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT: error: specified {{TLBI|tlbi}} op requires a register
 // CHECK-ERROR-NEXT:         tlbi VALE2IS
 // CHECK-ERROR-NEXT:              ^
-// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT: error: specified {{TLBI|tlbi}} op requires a register
 // CHECK-ERROR-NEXT:         tlbi VALE3IS
 // CHECK-ERROR-NEXT:              ^
-// CHECK-ERROR-NEXT: error: specified TLBI op does not use a register
+// CHECK-ERROR-NEXT: error: specified {{TLBI|tlbi}} op does not use a register
 // CHECK-ERROR-NEXT:         tlbi VMALLS12E1IS, xzr
 // CHECK-ERROR-NEXT:                            ^
-// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT: error: specified {{TLBI|tlbi}} op requires a register
 // CHECK-ERROR-NEXT:         tlbi VAALE1IS
 // CHECK-ERROR-NEXT:              ^
-// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT: error: specified {{TLBI|tlbi}} op requires a register
 // CHECK-ERROR-NEXT:         tlbi IPAS2E1
 // CHECK-ERROR-NEXT:              ^
-// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT: error: specified {{TLBI|tlbi}} op requires a register
 // CHECK-ERROR-NEXT:         tlbi IPAS2LE1
 // CHECK-ERROR-NEXT:              ^
-// CHECK-ERROR-NEXT: error: specified TLBI op does not use a register
+// CHECK-ERROR-NEXT: error: specified {{TLBI|tlbi}} op does not use a register
 // CHECK-ERROR-NEXT:         tlbi VMALLE1, x9
 // CHECK-ERROR-NEXT:                       ^
-// CHECK-ERROR-NEXT: error: specified TLBI op does not use a register
+// CHECK-ERROR-NEXT: error: specified {{TLBI|tlbi}} op does not use a register
 // CHECK-ERROR-NEXT:         tlbi ALLE2, x10
 // CHECK-ERROR-NEXT:                     ^
-// CHECK-ERROR-NEXT: error: specified TLBI op does not use a register
+// CHECK-ERROR-NEXT: error: specified {{TLBI|tlbi}} op does not use a register
 // CHECK-ERROR-NEXT:         tlbi ALLE3, x11
 // CHECK-ERROR-NEXT:                     ^
-// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT: error: specified {{TLBI|tlbi}} op requires a register
 // CHECK-ERROR-NEXT:         tlbi VAE1
 // CHECK-ERROR-NEXT:              ^
-// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT: error: specified {{TLBI|tlbi}} op requires a register
 // CHECK-ERROR-NEXT:         tlbi VAE2
 // CHECK-ERROR-NEXT:              ^
-// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT: error: specified {{TLBI|tlbi}} op requires a register
 // CHECK-ERROR-NEXT:         tlbi VAE3
 // CHECK-ERROR-NEXT:              ^
-// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT: error: specified {{TLBI|tlbi}} op requires a register
 // CHECK-ERROR-NEXT:         tlbi ASIDE1
 // CHECK-ERROR-NEXT:              ^
-// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT: error: specified {{TLBI|tlbi}} op requires a register
 // CHECK-ERROR-NEXT:         tlbi VAAE1
 // CHECK-ERROR-NEXT:              ^
-// CHECK-ERROR-NEXT: error: specified TLBI op does not use a register
+// CHECK-ERROR-NEXT: error: specified {{TLBI|tlbi}} op does not use a register
 // CHECK-ERROR-NEXT:         tlbi ALLE1, x25
 // CHECK-ERROR-NEXT:                     ^
-// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT: error: specified {{TLBI|tlbi}} op requires a register
 // CHECK-ERROR-NEXT:         tlbi VALE1
 // CHECK-ERROR-NEXT:              ^
-// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT: error: specified {{TLBI|tlbi}} op requires a register
 // CHECK-ERROR-NEXT:         tlbi VALE2
 // CHECK-ERROR-NEXT:              ^
-// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT: error: specified {{TLBI|tlbi}} op requires a register
 // CHECK-ERROR-NEXT:         tlbi VALE3
 // CHECK-ERROR-NEXT:              ^
-// CHECK-ERROR-NEXT: error: specified TLBI op does not use a register
+// CHECK-ERROR-NEXT: error: specified {{TLBI|tlbi}} op does not use a register
 // CHECK-ERROR-NEXT:         tlbi VMALLS12E1, x15
 // CHECK-ERROR-NEXT:                          ^
-// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT: error: specified {{TLBI|tlbi}} op requires a register
 // CHECK-ERROR-NEXT:         tlbi VAALE1
 // CHECK-ERROR-NEXT:              ^
 
@@ -3642,16 +3663,16 @@
         tbz w3, #32, nowhere
         tbz x9, #-1, there
         tbz x20, #64, dont
-// CHECK-ERROR: error: expected integer in range [0, 31]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [0, 31]
 // CHECK-ERROR-NEXT:     tbz w3, #-1, addr
 // CHECK-ERROR-NEXT:             ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 31]
 // CHECK-ERROR-NEXT:        tbz w3, #32, nowhere
 // CHECK-ERROR-NEXT:                ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 63]
 // CHECK-ERROR-NEXT:        tbz x9, #-1, there
 // CHECK-ERROR-NEXT:                ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 63]
 // CHECK-ERROR-NEXT:        tbz x20, #64, dont
 // CHECK-ERROR-NEXT:                 ^
 
@@ -3659,16 +3680,16 @@
         tbnz w3, #32, nowhere
         tbnz x9, #-1, there
         tbnz x20, #64, dont
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 31]
 // CHECK-ERROR-NEXT:        tbnz w3, #-1, addr
 // CHECK-ERROR-NEXT:                 ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 31]
 // CHECK-ERROR-NEXT:        tbnz w3, #32, nowhere
 // CHECK-ERROR-NEXT:                 ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 63]
 // CHECK-ERROR-NEXT:        tbnz x9, #-1, there
 // CHECK-ERROR-NEXT:                 ^
-// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT: error: {{expected|immediate must be an}} integer in range [0, 63]
 // CHECK-ERROR-NEXT:        tbnz x20, #64, dont
 
 //------------------------------------------------------------------------------
diff --git a/test/MC/AArch64/basic-a64-instructions.s b/test/MC/AArch64/basic-a64-instructions.s
index be00e14..a12968b 100644
--- a/test/MC/AArch64/basic-a64-instructions.s
+++ b/test/MC/AArch64/basic-a64-instructions.s
@@ -108,9 +108,9 @@ _func:
 // CHECK: adds     x20, sp, w19, uxth #4      // encoding: [0xf4,0x33,0x33,0xab]
 // CHECK: adds     x12, x1, w20, uxtw         // encoding: [0x2c,0x40,0x34,0xab]
 // CHECK: adds     x20, x3, x13, uxtx         // encoding: [0x74,0x60,0x2d,0xab]
-// CHECK: adds     xzr, x25, w20, sxtb #3     // encoding: [0x3f,0x8f,0x34,0xab]
+// CHECK: {{adds xzr,|cmn}} x25, w20, sxtb #3     // encoding: [0x3f,0x8f,0x34,0xab]
 // CHECK: adds     x18, sp, w19, sxth         // encoding: [0xf2,0xa3,0x33,0xab]
-// CHECK: adds     xzr, x2, w3, sxtw          // encoding: [0x5f,0xc0,0x23,0xab]
+// CHECK: {{adds xzr,|cmn}} x2, w3, sxtw          // encoding: [0x5f,0xc0,0x23,0xab]
 // CHECK: adds     x3, x5, x9, sxtx #2        // encoding: [0xa3,0xe8,0x29,0xab]
 
         adds w2, w5, w7, uxtb
@@ -127,7 +127,7 @@ _func:
 // CHECK: adds     w19, w17, w1, uxtx         // encoding: [0x33,0x62,0x21,0x2b]
 // CHECK: adds     w2, w5, w1, sxtb #1        // encoding: [0xa2,0x84,0x21,0x2b]
 // CHECK: adds     w26, wsp, w19, sxth        // encoding: [0xfa,0xa3,0x33,0x2b]
-// CHECK: adds     wzr, w2, w3, sxtw          // encoding: [0x5f,0xc0,0x23,0x2b]
+// CHECK: cmn w2, w3, sxtw          // encoding: [0x5f,0xc0,0x23,0x2b]
 // CHECK: adds     w2, w3, w5, sxtx           // encoding: [0x62,0xe0,0x25,0x2b]
 
         // subs
@@ -143,9 +143,9 @@ _func:
 // CHECK: subs     x20, sp, w19, uxth #4      // encoding: [0xf4,0x33,0x33,0xeb]
 // CHECK: subs     x12, x1, w20, uxtw         // encoding: [0x2c,0x40,0x34,0xeb]
 // CHECK: subs     x20, x3, x13, uxtx         // encoding: [0x74,0x60,0x2d,0xeb]
-// CHECK: subs     xzr, x25, w20, sxtb #3     // encoding: [0x3f,0x8f,0x34,0xeb]
+// CHECK: {{subs xzr,|cmp}} x25, w20, sxtb #3     // encoding: [0x3f,0x8f,0x34,0xeb]
 // CHECK: subs     x18, sp, w19, sxth         // encoding: [0xf2,0xa3,0x33,0xeb]
-// CHECK: subs     xzr, x2, w3, sxtw          // encoding: [0x5f,0xc0,0x23,0xeb]
+// CHECK: {{subs xzr,|cmp}} x2, w3, sxtw          // encoding: [0x5f,0xc0,0x23,0xeb]
 // CHECK: subs     x3, x5, x9, sxtx #2        // encoding: [0xa3,0xe8,0x29,0xeb]
 
         subs w2, w5, w7, uxtb
@@ -162,7 +162,7 @@ _func:
 // CHECK: subs     w19, w17, w1, uxtx         // encoding: [0x33,0x62,0x21,0x6b]
 // CHECK: subs     w2, w5, w1, sxtb #1        // encoding: [0xa2,0x84,0x21,0x6b]
 // CHECK: subs     w26, wsp, w19, sxth        // encoding: [0xfa,0xa3,0x33,0x6b]
-// CHECK: subs     wzr, w2, w3, sxtw          // encoding: [0x5f,0xc0,0x23,0x6b]
+// CHECK: {{subs wzr,|cmp}} w2, w3, sxtw          // encoding: [0x5f,0xc0,0x23,0x6b]
 // CHECK: subs     w2, w3, w5, sxtx           // encoding: [0x62,0xe0,0x25,0x6b]
 
         // cmp
@@ -227,14 +227,14 @@ _func:
         cmn wsp, w19, sxth
         cmn w2, w3, sxtw
         cmn w3, w5, sxtx
-// CHECK: cmn      w5, w7, uxtb               // encoding: [0xbf,0x00,0x27,0x2b]
-// CHECK: cmn      w15, w17, uxth             // encoding: [0xff,0x21,0x31,0x2b]
-// CHECK: cmn      w29, wzr, uxtw             // encoding: [0xbf,0x43,0x3f,0x2b]
-// CHECK: cmn      w17, w1, uxtx              // encoding: [0x3f,0x62,0x21,0x2b]
-// CHECK: cmn      w5, w1, sxtb #1            // encoding: [0xbf,0x84,0x21,0x2b]
-// CHECK: cmn      wsp, w19, sxth             // encoding: [0xff,0xa3,0x33,0x2b]
-// CHECK: cmn      w2, w3, sxtw               // encoding: [0x5f,0xc0,0x23,0x2b]
-// CHECK: cmn      w3, w5, sxtx               // encoding: [0x7f,0xe0,0x25,0x2b]
+// CHECK: {{cmn|adds wzr,}}      w5, w7, uxtb               // encoding: [0xbf,0x00,0x27,0x2b]
+// CHECK: {{cmn|adds wzr,}}      w15, w17, uxth             // encoding: [0xff,0x21,0x31,0x2b]
+// CHECK: {{cmn|adds wzr,}}      w29, wzr, uxtw             // encoding: [0xbf,0x43,0x3f,0x2b]
+// CHECK: {{cmn|adds wzr,}}      w17, w1, uxtx              // encoding: [0x3f,0x62,0x21,0x2b]
+// CHECK: {{cmn|adds wzr,}}      w5, w1, sxtb #1            // encoding: [0xbf,0x84,0x21,0x2b]
+// CHECK: {{cmn|adds wzr,}}      wsp, w19, sxth             // encoding: [0xff,0xa3,0x33,0x2b]
+// CHECK: {{cmn|adds wzr,}}      w2, w3, sxtw               // encoding: [0x5f,0xc0,0x23,0x2b]
+// CHECK: {{cmn|adds wzr,}}      w3, w5, sxtx               // encoding: [0x7f,0xe0,0x25,0x2b]
 
         // operands for cmp
         cmp x20, w29, uxtb #3
@@ -244,7 +244,7 @@ _func:
 // CHECK: cmp      x20, w29, uxtb #3          // encoding: [0x9f,0x0e,0x3d,0xeb]
 // CHECK: cmp      x12, x13, uxtx #4          // encoding: [0x9f,0x71,0x2d,0xeb]
 // CHECK: cmp      wsp, w1, uxtb              // encoding: [0xff,0x03,0x21,0x6b]
-// CHECK: cmn      wsp, wzr, sxtw             // encoding: [0xff,0xc3,0x3f,0x2b]
+// CHECK: {{cmn|adds wzr,}}      wsp, wzr, sxtw             // encoding: [0xff,0xc3,0x3f,0x2b]
 
         // LSL variant if sp involved
         sub sp, x3, x7, lsl #4
@@ -255,7 +255,7 @@ _func:
 // CHECK: sub      sp, x3, x7, lsl #4         // encoding: [0x7f,0x70,0x27,0xcb]
 // CHECK: add      w2, wsp, w3, lsl #1        // encoding: [0xe2,0x47,0x23,0x0b]
 // CHECK: cmp      wsp, w9                    // encoding: [0xff,0x43,0x29,0x6b]
-// CHECK: adds     wzr, wsp, w3, lsl #4       // encoding: [0xff,0x53,0x23,0x2b]
+// CHECK: cmn wsp, w3, lsl #4       // encoding: [0xff,0x53,0x23,0x2b]
 // CHECK: subs     x3, sp, x9, lsl #2         // encoding: [0xe3,0x6b,0x29,0xeb]
 
 //------------------------------------------------------------------------------
@@ -309,16 +309,16 @@ _func:
         adds w20, wsp, #0x0
         adds xzr, x3, #0x1, lsl #12          // FIXME: canonically should be cmn
 // CHECK: adds     w13, w23, #291, lsl #12    // encoding: [0xed,0x8e,0x44,0x31]
-// CHECK: adds     wzr, w2, #4095             // encoding: [0x5f,0xfc,0x3f,0x31]
+// CHECK: {{adds wzr,|cmn}} w2, #4095         // encoding: [0x5f,0xfc,0x3f,0x31]
 // CHECK: adds     w20, wsp, #0               // encoding: [0xf4,0x03,0x00,0x31]
-// CHECK: adds     xzr, x3, #1, lsl #12       // encoding: [0x7f,0x04,0x40,0xb1]
+// CHECK: {{adds xzr,|cmn}} x3, #1, lsl #12   // encoding: [0x7f,0x04,0x40,0xb1]
 
 // Checks for subs
         subs xzr, sp, #20, lsl #12           // FIXME: canonically should be cmp
         subs xzr, x30, #4095, lsl #0         // FIXME: canonically should be cmp
         subs x4, sp, #3822
-// CHECK: subs     xzr, sp, #20, lsl #12      // encoding: [0xff,0x53,0x40,0xf1]
-// CHECK: subs     xzr, x30, #4095            // encoding: [0xdf,0xff,0x3f,0xf1]
+// CHECK: {{subs xzr,|cmp}} sp, #20, lsl #12  // encoding: [0xff,0x53,0x40,0xf1]
+// CHECK: {{subs xzr,|cmp}} x30, #4095        // encoding: [0xdf,0xff,0x3f,0xf1]
 // CHECK: subs     x4, sp, #3822              // encoding: [0xe4,0xbb,0x3b,0xf1]
 
 // cmn is an alias for adds zr, ...
@@ -349,8 +349,8 @@ _func:
 
 // A relocation check (default to lo12, which is the only sane relocation anyway really)
         add x0, x4, #:lo12:var
-// CHECK: add     x0, x4, #:lo12:var         // encoding: [0x80'A',A,A,0x91'A']
-// CHECK:                                    //   fixup A - offset: 0, value: :lo12:var, kind: fixup_a64_add_lo12
+// CHECK: add x0, x4, :lo12:var       // encoding: [0x80,0bAAAAAA00,0b00AAAAAA,0x91]
+// CHECK:                             // fixup A - offset: 0, value: :lo12:var, kind: fixup_aarch64_add_imm12
 
 //------------------------------------------------------------------------------
 // Add-sub (shifted register)
@@ -423,7 +423,7 @@ _func:
         adds w20, wzr, w4
         adds w4, w6, wzr
 // CHECK: adds     w3, w5, w7                 // encoding: [0xa3,0x00,0x07,0x2b]
-// CHECK: adds     wzr, w3, w5                // encoding: [0x7f,0x00,0x05,0x2b]
+// CHECK: {{adds wzr,|cmn}} w3, w5                // encoding: [0x7f,0x00,0x05,0x2b]
 // CHECK: adds     w20, wzr, w4               // encoding: [0xf4,0x03,0x04,0x2b]
 // CHECK: adds     w4, w6, wzr                // encoding: [0xc4,0x00,0x1f,0x2b]
 
@@ -453,7 +453,7 @@ _func:
         adds x20, xzr, x4
         adds x4, x6, xzr
 // CHECK: adds     x3, x5, x7                 // encoding: [0xa3,0x00,0x07,0xab]
-// CHECK: adds     xzr, x3, x5                // encoding: [0x7f,0x00,0x05,0xab]
+// CHECK: {{adds xzr,|cmn}} x3, x5                // encoding: [0x7f,0x00,0x05,0xab]
 // CHECK: adds     x20, xzr, x4               // encoding: [0xf4,0x03,0x04,0xab]
 // CHECK: adds     x4, x6, xzr                // encoding: [0xc4,0x00,0x1f,0xab]
 
@@ -484,7 +484,7 @@ _func:
         sub w4, w6, wzr
 // CHECK: sub      w3, w5, w7                 // encoding: [0xa3,0x00,0x07,0x4b]
 // CHECK: sub      wzr, w3, w5                // encoding: [0x7f,0x00,0x05,0x4b]
-// CHECK: sub      w20, wzr, w4               // encoding: [0xf4,0x03,0x04,0x4b]
+// CHECK: neg      w20, w4              // encoding: [0xf4,0x03,0x04,0x4b]
 // CHECK: sub      w4, w6, wzr                // encoding: [0xc4,0x00,0x1f,0x4b]
 
         sub w11, w13, w15, lsl #0
@@ -514,7 +514,7 @@ _func:
         sub x4, x6, xzr
 // CHECK: sub      x3, x5, x7                 // encoding: [0xa3,0x00,0x07,0xcb]
 // CHECK: sub      xzr, x3, x5                // encoding: [0x7f,0x00,0x05,0xcb]
-// CHECK: sub      x20, xzr, x4               // encoding: [0xf4,0x03,0x04,0xcb]
+// CHECK: neg      x20, x4              // encoding: [0xf4,0x03,0x04,0xcb]
 // CHECK: sub      x4, x6, xzr                // encoding: [0xc4,0x00,0x1f,0xcb]
 
         sub x11, x13, x15, lsl #0
@@ -543,8 +543,8 @@ _func:
         subs w20, wzr, w4
         subs w4, w6, wzr
 // CHECK: subs     w3, w5, w7                 // encoding: [0xa3,0x00,0x07,0x6b]
-// CHECK: subs     wzr, w3, w5                // encoding: [0x7f,0x00,0x05,0x6b]
-// CHECK: subs     w20, wzr, w4               // encoding: [0xf4,0x03,0x04,0x6b]
+// CHECK: {{subs wzr,|cmp}} w3, w5            // encoding: [0x7f,0x00,0x05,0x6b]
+// CHECK: negs     w20, w4              // encoding: [0xf4,0x03,0x04,0x6b]
 // CHECK: subs     w4, w6, wzr                // encoding: [0xc4,0x00,0x1f,0x6b]
 
         subs w11, w13, w15, lsl #0
@@ -573,8 +573,8 @@ _func:
         subs x20, xzr, x4
         subs x4, x6, xzr
 // CHECK: subs     x3, x5, x7                 // encoding: [0xa3,0x00,0x07,0xeb]
-// CHECK: subs     xzr, x3, x5                // encoding: [0x7f,0x00,0x05,0xeb]
-// CHECK: subs     x20, xzr, x4               // encoding: [0xf4,0x03,0x04,0xeb]
+// CHECK: {{subs xzr,|cmp}} x3, x5            // encoding: [0x7f,0x00,0x05,0xeb]
+// CHECK: negs     x20, x4              // encoding: [0xf4,0x03,0x04,0xeb]
 // CHECK: subs     x4, x6, xzr                // encoding: [0xc4,0x00,0x1f,0xeb]
 
         subs x11, x13, x15, lsl #0
@@ -713,114 +713,118 @@ _func:
         neg w29, w30
         neg w30, wzr
         neg wzr, w0
-// CHECK: sub      w29, wzr, w30              // encoding: [0xfd,0x03,0x1e,0x4b]
-// CHECK: sub      w30, wzr, wzr              // encoding: [0xfe,0x03,0x1f,0x4b]
-// CHECK: sub      wzr, wzr, w0                    // encoding: [0xff,0x03,0x00,0x4b]
+// CHECK: neg      w29, w30              // encoding: [0xfd,0x03,0x1e,0x4b]
+// CHECK: neg      w30, wzr              // encoding: [0xfe,0x03,0x1f,0x4b]
+// CHECK: neg      wzr, w0               // encoding: [0xff,0x03,0x00,0x4b]
 
         neg w28, w27, lsl #0
         neg w26, w25, lsl #29
         neg w24, w23, lsl #31
-// CHECK: sub      w28, wzr, w27              // encoding: [0xfc,0x03,0x1b,0x4b]
-// CHECK: sub      w26, wzr, w25, lsl #29     // encoding: [0xfa,0x77,0x19,0x4b]
-// CHECK: sub      w24, wzr, w23, lsl #31     // encoding: [0xf8,0x7f,0x17,0x4b]
+
+// CHECK: neg      w28, w27              // encoding: [0xfc,0x03,0x1b,0x4b]
+// CHECK: neg      w26, w25, lsl #29     // encoding: [0xfa,0x77,0x19,0x4b]
+// CHECK: neg      w24, w23, lsl #31     // encoding: [0xf8,0x7f,0x17,0x4b]
 
         neg w22, w21, lsr #0
         neg w20, w19, lsr #1
         neg w18, w17, lsr #31
-// CHECK: sub      w22, wzr, w21, lsr #0      // encoding: [0xf6,0x03,0x55,0x4b]
-// CHECK: sub      w20, wzr, w19, lsr #1      // encoding: [0xf4,0x07,0x53,0x4b]
-// CHECK: sub      w18, wzr, w17, lsr #31     // encoding: [0xf2,0x7f,0x51,0x4b]
+// CHECK: neg      w22, w21, lsr #0      // encoding: [0xf6,0x03,0x55,0x4b]
+// CHECK: neg      w20, w19, lsr #1      // encoding: [0xf4,0x07,0x53,0x4b]
+// CHECK: neg      w18, w17, lsr #31     // encoding: [0xf2,0x7f,0x51,0x4b]
 
         neg w16, w15, asr #0
         neg w14, w13, asr #12
         neg w12, w11, asr #31
-// CHECK: sub      w16, wzr, w15, asr #0      // encoding: [0xf0,0x03,0x8f,0x4b]
-// CHECK: sub      w14, wzr, w13, asr #12     // encoding: [0xee,0x33,0x8d,0x4b]
-// CHECK: sub      w12, wzr, w11, asr #31     // encoding: [0xec,0x7f,0x8b,0x4b]
+// CHECK: neg      w16, w15, asr #0      // encoding: [0xf0,0x03,0x8f,0x4b]
+// CHECK: neg      w14, w13, asr #12     // encoding: [0xee,0x33,0x8d,0x4b]
+// CHECK: neg      w12, w11, asr #31     // encoding: [0xec,0x7f,0x8b,0x4b]
 
         neg x29, x30
         neg x30, xzr
         neg xzr, x0
-// CHECK: sub      x29, xzr, x30              // encoding: [0xfd,0x03,0x1e,0xcb]
-// CHECK: sub      x30, xzr, xzr              // encoding: [0xfe,0x03,0x1f,0xcb]
-// CHECK: sub      xzr, xzr, x0               // encoding: [0xff,0x03,0x00,0xcb]
+// CHECK: neg      x29, x30              // encoding: [0xfd,0x03,0x1e,0xcb]
+// CHECK: neg      x30, xzr              // encoding: [0xfe,0x03,0x1f,0xcb]
+// CHECK: neg      xzr, x0               // encoding: [0xff,0x03,0x00,0xcb]
 
         neg x28, x27, lsl #0
         neg x26, x25, lsl #29
         neg x24, x23, lsl #31
-// CHECK: sub      x28, xzr, x27              // encoding: [0xfc,0x03,0x1b,0xcb]
-// CHECK: sub      x26, xzr, x25, lsl #29     // encoding: [0xfa,0x77,0x19,0xcb]
-// CHECK: sub      x24, xzr, x23, lsl #31     // encoding: [0xf8,0x7f,0x17,0xcb]
+
+// CHECK: neg      x28, x27              // encoding: [0xfc,0x03,0x1b,0xcb]
+// CHECK: neg      x26, x25, lsl #29     // encoding: [0xfa,0x77,0x19,0xcb]
+// CHECK: neg      x24, x23, lsl #31     // encoding: [0xf8,0x7f,0x17,0xcb]
 
         neg x22, x21, lsr #0
         neg x20, x19, lsr #1
         neg x18, x17, lsr #31
-// CHECK: sub      x22, xzr, x21, lsr #0      // encoding: [0xf6,0x03,0x55,0xcb]
-// CHECK: sub      x20, xzr, x19, lsr #1      // encoding: [0xf4,0x07,0x53,0xcb]
-// CHECK: sub      x18, xzr, x17, lsr #31     // encoding: [0xf2,0x7f,0x51,0xcb]
+// CHECK: neg      x22, x21, lsr #0      // encoding: [0xf6,0x03,0x55,0xcb]
+// CHECK: neg      x20, x19, lsr #1      // encoding: [0xf4,0x07,0x53,0xcb]
+// CHECK: neg      x18, x17, lsr #31     // encoding: [0xf2,0x7f,0x51,0xcb]
 
         neg x16, x15, asr #0
         neg x14, x13, asr #12
         neg x12, x11, asr #31
-// CHECK: sub      x16, xzr, x15, asr #0      // encoding: [0xf0,0x03,0x8f,0xcb]
-// CHECK: sub      x14, xzr, x13, asr #12     // encoding: [0xee,0x33,0x8d,0xcb]
-// CHECK: sub      x12, xzr, x11, asr #31     // encoding: [0xec,0x7f,0x8b,0xcb]
+// CHECK: neg      x16, x15, asr #0      // encoding: [0xf0,0x03,0x8f,0xcb]
+// CHECK: neg      x14, x13, asr #12     // encoding: [0xee,0x33,0x8d,0xcb]
+// CHECK: neg      x12, x11, asr #31     // encoding: [0xec,0x7f,0x8b,0xcb]
 
         negs w29, w30
         negs w30, wzr
         negs wzr, w0
-// CHECK: subs     w29, wzr, w30              // encoding: [0xfd,0x03,0x1e,0x6b]
-// CHECK: subs     w30, wzr, wzr              // encoding: [0xfe,0x03,0x1f,0x6b]
-// CHECK: subs     wzr, wzr, w0               // encoding: [0xff,0x03,0x00,0x6b]
+// CHECK: negs     w29, w30              // encoding: [0xfd,0x03,0x1e,0x6b]
+// CHECK: negs     w30, wzr              // encoding: [0xfe,0x03,0x1f,0x6b]
+// CHECK: cmp      wzr, w0               // encoding: [0xff,0x03,0x00,0x6b]
 
         negs w28, w27, lsl #0
         negs w26, w25, lsl #29
         negs w24, w23, lsl #31
-// CHECK: subs     w28, wzr, w27              // encoding: [0xfc,0x03,0x1b,0x6b]
-// CHECK: subs     w26, wzr, w25, lsl #29     // encoding: [0xfa,0x77,0x19,0x6b]
-// CHECK: subs     w24, wzr, w23, lsl #31     // encoding: [0xf8,0x7f,0x17,0x6b]
+
+// CHECK: negs     w28, w27             // encoding: [0xfc,0x03,0x1b,0x6b]
+// CHECK: negs     w26, w25, lsl #29     // encoding: [0xfa,0x77,0x19,0x6b]
+// CHECK: negs     w24, w23, lsl #31     // encoding: [0xf8,0x7f,0x17,0x6b]
 
         negs w22, w21, lsr #0
         negs w20, w19, lsr #1
         negs w18, w17, lsr #31
-// CHECK: subs     w22, wzr, w21, lsr #0      // encoding: [0xf6,0x03,0x55,0x6b]
-// CHECK: subs     w20, wzr, w19, lsr #1      // encoding: [0xf4,0x07,0x53,0x6b]
-// CHECK: subs     w18, wzr, w17, lsr #31     // encoding: [0xf2,0x7f,0x51,0x6b]
+// CHECK: negs     w22, w21, lsr #0      // encoding: [0xf6,0x03,0x55,0x6b]
+// CHECK: negs     w20, w19, lsr #1      // encoding: [0xf4,0x07,0x53,0x6b]
+// CHECK: negs     w18, w17, lsr #31     // encoding: [0xf2,0x7f,0x51,0x6b]
 
         negs w16, w15, asr #0
         negs w14, w13, asr #12
         negs w12, w11, asr #31
-// CHECK: subs     w16, wzr, w15, asr #0      // encoding: [0xf0,0x03,0x8f,0x6b]
-// CHECK: subs     w14, wzr, w13, asr #12     // encoding: [0xee,0x33,0x8d,0x6b]
-// CHECK: subs     w12, wzr, w11, asr #31     // encoding: [0xec,0x7f,0x8b,0x6b]
+// CHECK: negs     w16, w15, asr #0      // encoding: [0xf0,0x03,0x8f,0x6b]
+// CHECK: negs     w14, w13, asr #12     // encoding: [0xee,0x33,0x8d,0x6b]
+// CHECK: negs     w12, w11, asr #31     // encoding: [0xec,0x7f,0x8b,0x6b]
 
         negs x29, x30
         negs x30, xzr
         negs xzr, x0
-// CHECK: subs     x29, xzr, x30              // encoding: [0xfd,0x03,0x1e,0xeb]
-// CHECK: subs     x30, xzr, xzr              // encoding: [0xfe,0x03,0x1f,0xeb]
-// CHECK: subs     xzr, xzr, x0               // encoding: [0xff,0x03,0x00,0xeb]
+// CHECK: negs     x29, x30              // encoding: [0xfd,0x03,0x1e,0xeb]
+// CHECK: negs     x30, xzr              // encoding: [0xfe,0x03,0x1f,0xeb]
+// CHECK: cmp     xzr, x0                // encoding: [0xff,0x03,0x00,0xeb]
 
         negs x28, x27, lsl #0
         negs x26, x25, lsl #29
         negs x24, x23, lsl #31
-// CHECK: subs     x28, xzr, x27              // encoding: [0xfc,0x03,0x1b,0xeb]
-// CHECK: subs     x26, xzr, x25, lsl #29     // encoding: [0xfa,0x77,0x19,0xeb]
-// CHECK: subs     x24, xzr, x23, lsl #31     // encoding: [0xf8,0x7f,0x17,0xeb]
+
+// CHECK: negs     x28, x27              // encoding: [0xfc,0x03,0x1b,0xeb]
+// CHECK: negs     x26, x25, lsl #29     // encoding: [0xfa,0x77,0x19,0xeb]
+// CHECK: negs     x24, x23, lsl #31     // encoding: [0xf8,0x7f,0x17,0xeb]
 
         negs x22, x21, lsr #0
         negs x20, x19, lsr #1
         negs x18, x17, lsr #31
-// CHECK: subs     x22, xzr, x21, lsr #0      // encoding: [0xf6,0x03,0x55,0xeb]
-// CHECK: subs     x20, xzr, x19, lsr #1      // encoding: [0xf4,0x07,0x53,0xeb]
-// CHECK: subs     x18, xzr, x17, lsr #31     // encoding: [0xf2,0x7f,0x51,0xeb]
+// CHECK: negs     x22, x21, lsr #0      // encoding: [0xf6,0x03,0x55,0xeb]
+// CHECK: negs     x20, x19, lsr #1      // encoding: [0xf4,0x07,0x53,0xeb]
+// CHECK: negs     x18, x17, lsr #31     // encoding: [0xf2,0x7f,0x51,0xeb]
 
         negs x16, x15, asr #0
         negs x14, x13, asr #12
         negs x12, x11, asr #31
-// CHECK: subs     x16, xzr, x15, asr #0      // encoding: [0xf0,0x03,0x8f,0xeb]
-// CHECK: subs     x14, xzr, x13, asr #12     // encoding: [0xee,0x33,0x8d,0xeb]
-// CHECK: subs     x12, xzr, x11, asr #31     // encoding: [0xec,0x7f,0x8b,0xeb]
+// CHECK: negs     x16, x15, asr #0      // encoding: [0xf0,0x03,0x8f,0xeb]
+// CHECK: negs     x14, x13, asr #12     // encoding: [0xee,0x33,0x8d,0xeb]
+// CHECK: negs     x12, x11, asr #31     // encoding: [0xec,0x7f,0x8b,0xeb]
 
 //------------------------------------------------------------------------------
 // Add-sub (shifted register)
@@ -933,28 +937,29 @@ _func:
         sbfm x3, x4, #63, #63
         sbfm wzr, wzr, #31, #31
         sbfm w12, w9, #0, #0
-// CHECK: sbfm     x1, x2, #3, #4             // encoding: [0x41,0x10,0x43,0x93]
-// CHECK: sbfm     x3, x4, #63, #63           // encoding: [0x83,0xfc,0x7f,0x93]
-// CHECK: sbfm     wzr, wzr, #31, #31         // encoding: [0xff,0x7f,0x1f,0x13]
-// CHECK: sbfm     w12, w9, #0, #0            // encoding: [0x2c,0x01,0x00,0x13]
+
+// CHECK: sbfx     x1, x2, #3, #2       // encoding: [0x41,0x10,0x43,0x93]
+// CHECK: asr      x3, x4, #63          // encoding: [0x83,0xfc,0x7f,0x93]
+// CHECK: asr      wzr, wzr, #31        // encoding: [0xff,0x7f,0x1f,0x13]
+// CHECK: sbfx     w12, w9, #0, #1      // encoding: [0x2c,0x01,0x00,0x13]
 
         ubfm x4, x5, #12, #10
         ubfm xzr, x4, #0, #0
         ubfm x4, xzr, #63, #5
         ubfm x5, x6, #12, #63
-// CHECK: ubfm     x4, x5, #12, #10           // encoding: [0xa4,0x28,0x4c,0xd3]
-// CHECK: ubfm     xzr, x4, #0, #0            // encoding: [0x9f,0x00,0x40,0xd3]
-// CHECK: ubfm     x4, xzr, #63, #5            // encoding: [0xe4,0x17,0x7f,0xd3]
-// CHECK: ubfm     x5, x6, #12, #63           // encoding: [0xc5,0xfc,0x4c,0xd3]
+// CHECK: ubfiz    x4, x5, #52, #11        // encoding: [0xa4,0x28,0x4c,0xd3]
+// CHECK: ubfx     xzr, x4, #0, #1         // encoding: [0x9f,0x00,0x40,0xd3]
+// CHECK: ubfiz    x4, xzr, #1, #6         // encoding: [0xe4,0x17,0x7f,0xd3]
+// CHECK: lsr      x5, x6, #12             // encoding: [0xc5,0xfc,0x4c,0xd3]
 
         bfm x4, x5, #12, #10
         bfm xzr, x4, #0, #0
         bfm x4, xzr, #63, #5
         bfm x5, x6, #12, #63
-// CHECK: bfm      x4, x5, #12, #10           // encoding: [0xa4,0x28,0x4c,0xb3]
-// CHECK: bfm      xzr, x4, #0, #0            // encoding: [0x9f,0x00,0x40,0xb3]
-// CHECK: bfm      x4, xzr, #63, #5            // encoding: [0xe4,0x17,0x7f,0xb3]
-// CHECK: bfm      x5, x6, #12, #63           // encoding: [0xc5,0xfc,0x4c,0xb3]
+// CHECK: bfi      x4, x5, #52, #11           // encoding: [0xa4,0x28,0x4c,0xb3]
+// CHECK: bfxil    xzr, x4, #0, #1            // encoding: [0x9f,0x00,0x40,0xb3]
+// CHECK: bfi      x4, xzr, #1, #6            // encoding: [0xe4,0x17,0x7f,0xb3]
+// CHECK: bfxil    x5, x6, #12, #52           // encoding: [0xc5,0xfc,0x4c,0xb3]
 
         sxtb w1, w2
         sxtb xzr, w3
@@ -972,9 +977,9 @@ _func:
         uxth w9, w10
         uxth x0, w1
 // CHECK: uxtb     w1, w2                     // encoding: [0x41,0x1c,0x00,0x53]
-// CHECK: uxtb     xzr, w3                    // encoding: [0x7f,0x1c,0x00,0x53]
+// CHECK: uxtb     {{[wx]}}zr, w3             // encoding: [0x7f,0x1c,0x00,0x53]
 // CHECK: uxth     w9, w10                    // encoding: [0x49,0x3d,0x00,0x53]
-// CHECK: uxth     x0, w1                     // encoding: [0x20,0x3c,0x00,0x53]
+// CHECK: uxth     {{[wx]}}0, w1              // encoding: [0x20,0x3c,0x00,0x53]
 
         asr w3, w2, #0
         asr w9, w10, #31
@@ -998,7 +1003,7 @@ _func:
         lsl w9, w10, #31
         lsl x20, x21, #63
         lsl w1, wzr, #3
-// CHECK: lsl      w3, w2, #0                 // encoding: [0x43,0x7c,0x00,0x53]
+// CHECK: {{lsl|lsr}}      w3, w2, #0         // encoding: [0x43,0x7c,0x00,0x53]
 // CHECK: lsl      w9, w10, #31               // encoding: [0x49,0x01,0x01,0x53]
 // CHECK: lsl      x20, x21, #63              // encoding: [0xb4,0x02,0x41,0xd3]
 // CHECK: lsl      w1, wzr, #3                // encoding: [0xe1,0x73,0x1d,0x53]
@@ -1011,11 +1016,11 @@ _func:
         sbfiz w11, w12, #31, #1
         sbfiz w13, w14, #29, #3
         sbfiz xzr, xzr, #10, #11
-// CHECK: sbfiz    w9, w10, #0, #1            // encoding: [0x49,0x01,0x00,0x13]
+// CHECK: {{sbfiz|sbfx}}    w9, w10, #0, #1   // encoding: [0x49,0x01,0x00,0x13]
 // CHECK: sbfiz    x2, x3, #63, #1            // encoding: [0x62,0x00,0x41,0x93]
-// CHECK: sbfiz    x19, x20, #0, #64          // encoding: [0x93,0xfe,0x40,0x93]
+// CHECK: asr    x19, x20, #0           // encoding: [0x93,0xfe,0x40,0x93]
 // CHECK: sbfiz    x9, x10, #5, #59           // encoding: [0x49,0xe9,0x7b,0x93]
-// CHECK: sbfiz    w9, w10, #0, #32           // encoding: [0x49,0x7d,0x00,0x13]
+// CHECK: asr    w9, w10, #0            // encoding: [0x49,0x7d,0x00,0x13]
 // CHECK: sbfiz    w11, w12, #31, #1          // encoding: [0x8b,0x01,0x01,0x13]
 // CHECK: sbfiz    w13, w14, #29, #3          // encoding: [0xcd,0x09,0x03,0x13]
 // CHECK: sbfiz    xzr, xzr, #10, #11         // encoding: [0xff,0x2b,0x76,0x93]
@@ -1029,12 +1034,12 @@ _func:
         sbfx w13, w14, #29, #3
         sbfx xzr, xzr, #10, #11
 // CHECK: sbfx     w9, w10, #0, #1            // encoding: [0x49,0x01,0x00,0x13]
-// CHECK: sbfx     x2, x3, #63, #1            // encoding: [0x62,0xfc,0x7f,0x93]
-// CHECK: sbfx     x19, x20, #0, #64          // encoding: [0x93,0xfe,0x40,0x93]
-// CHECK: sbfx     x9, x10, #5, #59           // encoding: [0x49,0xfd,0x45,0x93]
-// CHECK: sbfx     w9, w10, #0, #32           // encoding: [0x49,0x7d,0x00,0x13]
-// CHECK: sbfx     w11, w12, #31, #1          // encoding: [0x8b,0x7d,0x1f,0x13]
-// CHECK: sbfx     w13, w14, #29, #3          // encoding: [0xcd,0x7d,0x1d,0x13]
+// CHECK: asr     x2, x3, #63           // encoding: [0x62,0xfc,0x7f,0x93]
+// CHECK: asr     x19, x20, #0          // encoding: [0x93,0xfe,0x40,0x93]
+// CHECK: asr     x9, x10, #5           // encoding: [0x49,0xfd,0x45,0x93]
+// CHECK: asr     w9, w10, #0           // encoding: [0x49,0x7d,0x00,0x13]
+// CHECK: asr     w11, w12, #31         // encoding: [0x8b,0x7d,0x1f,0x13]
+// CHECK: asr     w13, w14, #29         // encoding: [0xcd,0x7d,0x1d,0x13]
 // CHECK: sbfx     xzr, xzr, #10, #11         // encoding: [0xff,0x53,0x4a,0x93]
 
         bfi w9, w10, #0, #1
@@ -1045,11 +1050,12 @@ _func:
         bfi w11, w12, #31, #1
         bfi w13, w14, #29, #3
         bfi xzr, xzr, #10, #11
-// CHECK: bfi      w9, w10, #0, #1            // encoding: [0x49,0x01,0x00,0x33]
+
+// CHECK: bfxil    w9, w10, #0, #1            // encoding: [0x49,0x01,0x00,0x33]
 // CHECK: bfi      x2, x3, #63, #1            // encoding: [0x62,0x00,0x41,0xb3]
-// CHECK: bfi      x19, x20, #0, #64          // encoding: [0x93,0xfe,0x40,0xb3]
+// CHECK: bfxil    x19, x20, #0, #64          // encoding: [0x93,0xfe,0x40,0xb3]
 // CHECK: bfi      x9, x10, #5, #59           // encoding: [0x49,0xe9,0x7b,0xb3]
-// CHECK: bfi      w9, w10, #0, #32           // encoding: [0x49,0x7d,0x00,0x33]
+// CHECK: bfxil    w9, w10, #0, #32           // encoding: [0x49,0x7d,0x00,0x33]
 // CHECK: bfi      w11, w12, #31, #1          // encoding: [0x8b,0x01,0x01,0x33]
 // CHECK: bfi      w13, w14, #29, #3          // encoding: [0xcd,0x09,0x03,0x33]
 // CHECK: bfi      xzr, xzr, #10, #11         // encoding: [0xff,0x2b,0x76,0xb3]
@@ -1079,14 +1085,15 @@ _func:
         ubfiz w11, w12, #31, #1
         ubfiz w13, w14, #29, #3
         ubfiz xzr, xzr, #10, #11
-// CHECK: ubfiz    w9, w10, #0, #1            // encoding: [0x49,0x01,0x00,0x53]
-// CHECK: ubfiz    x2, x3, #63, #1            // encoding: [0x62,0x00,0x41,0xd3]
-// CHECK: ubfiz    x19, x20, #0, #64          // encoding: [0x93,0xfe,0x40,0xd3]
-// CHECK: ubfiz    x9, x10, #5, #59           // encoding: [0x49,0xe9,0x7b,0xd3]
-// CHECK: ubfiz    w9, w10, #0, #32           // encoding: [0x49,0x7d,0x00,0x53]
-// CHECK: ubfiz    w11, w12, #31, #1          // encoding: [0x8b,0x01,0x01,0x53]
-// CHECK: ubfiz    w13, w14, #29, #3          // encoding: [0xcd,0x09,0x03,0x53]
-// CHECK: ubfiz    xzr, xzr, #10, #11         // encoding: [0xff,0x2b,0x76,0xd3]
+
+// CHECK: ubfx     w9, w10, #0, #1         // encoding: [0x49,0x01,0x00,0x53]
+// CHECK: lsl      x2, x3, #63             // encoding: [0x62,0x00,0x41,0xd3]
+// CHECK: lsr      x19, x20, #0            // encoding: [0x93,0xfe,0x40,0xd3]
+// CHECK: lsl      x9, x10, #5             // encoding: [0x49,0xe9,0x7b,0xd3]
+// CHECK: lsr      w9, w10, #0             // encoding: [0x49,0x7d,0x00,0x53]
+// CHECK: lsl      w11, w12, #31           // encoding: [0x8b,0x01,0x01,0x53]
+// CHECK: lsl      w13, w14, #29           // encoding: [0xcd,0x09,0x03,0x53]
+// CHECK: ubfiz    xzr, xzr, #10, #11      // encoding: [0xff,0x2b,0x76,0xd3]
 
         ubfx w9, w10, #0, #1
         ubfx x2, x3, #63, #1
@@ -1096,15 +1103,15 @@ _func:
         ubfx w11, w12, #31, #1
         ubfx w13, w14, #29, #3
         ubfx xzr, xzr, #10, #11
-// CHECK: ubfx     w9, w10, #0, #1            // encoding: [0x49,0x01,0x00,0x53]
-// CHECK: ubfx     x2, x3, #63, #1            // encoding: [0x62,0xfc,0x7f,0xd3]
-// CHECK: ubfx     x19, x20, #0, #64          // encoding: [0x93,0xfe,0x40,0xd3]
-// CHECK: ubfx     x9, x10, #5, #59           // encoding: [0x49,0xfd,0x45,0xd3]
-// CHECK: ubfx     w9, w10, #0, #32           // encoding: [0x49,0x7d,0x00,0x53]
-// CHECK: ubfx     w11, w12, #31, #1          // encoding: [0x8b,0x7d,0x1f,0x53]
-// CHECK: ubfx     w13, w14, #29, #3          // encoding: [0xcd,0x7d,0x1d,0x53]
-// CHECK: ubfx     xzr, xzr, #10, #11         // encoding: [0xff,0x53,0x4a,0xd3]
 
+// CHECK: ubfx    w9, w10, #0, #1         // encoding: [0x49,0x01,0x00,0x53]
+// CHECK: lsr     x2, x3, #63             // encoding: [0x62,0xfc,0x7f,0xd3]
+// CHECK: lsr     x19, x20, #0            // encoding: [0x93,0xfe,0x40,0xd3]
+// CHECK: lsr     x9, x10, #5             // encoding: [0x49,0xfd,0x45,0xd3]
+// CHECK: lsr     w9, w10, #0             // encoding: [0x49,0x7d,0x00,0x53]
+// CHECK: lsr     w11, w12, #31           // encoding: [0x8b,0x7d,0x1f,0x53]
+// CHECK: lsr     w13, w14, #29           // encoding: [0xcd,0x7d,0x1d,0x53]
+// CHECK: ubfx    xzr, xzr, #10, #11      // encoding: [0xff,0x53,0x4a,0xd3]
 //------------------------------------------------------------------------------
 // Compare & branch (immediate)
 //------------------------------------------------------------------------------
@@ -1113,21 +1120,22 @@ _func:
         cbz x5, lbl
         cbnz x2, lbl
         cbnz x26, lbl
-// CHECK: cbz      w5, lbl                // encoding: [0x05'A',A,A,0x34'A']
-// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
-// CHECK: cbz      x5, lbl                // encoding: [0x05'A',A,A,0xb4'A']
-// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
-// CHECK: cbnz     x2, lbl                // encoding: [0x02'A',A,A,0xb5'A']
-// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
-// CHECK: cbnz     x26, lbl               // encoding: [0x1a'A',A,A,0xb5'A']
-// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: cbz    w5, lbl                 // encoding: [0bAAA00101,A,A,0x34]
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19
+// CHECK: cbz    x5, lbl                 // encoding: [0bAAA00101,A,A,0xb4]
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19
+// CHECK: cbnz    x2, lbl                 // encoding: [0bAAA00010,A,A,0xb5]
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19
+// CHECK: cbnz    x26, lbl                // encoding: [0bAAA11010,A,A,0xb5]
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19
 
         cbz wzr, lbl
         cbnz xzr, lbl
-// CHECK: cbz      wzr, lbl               // encoding: [0x1f'A',A,A,0x34'A']
-// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
-// CHECK: cbnz     xzr, lbl               // encoding: [0x1f'A',A,A,0xb5'A']
-// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+
+// CHECK: cbz    wzr, lbl                // encoding: [0bAAA11111,A,A,0x34]
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19
+// CHECK: cbnz    xzr, lbl                // encoding: [0bAAA11111,A,A,0xb5]
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19
 
         cbz w5, #0
         cbnz x3, #-4
@@ -1159,41 +1167,43 @@ _func:
         b.gt lbl
         b.le lbl
         b.al lbl
-// CHECK: b.eq lbl                        // encoding: [A,A,A,0x54'A']
-// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
-// CHECK: b.ne lbl                        // encoding: [0x01'A',A,A,0x54'A']
-// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
-// CHECK: b.hs lbl                        // encoding: [0x02'A',A,A,0x54'A']
-// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
-// CHECK: b.hs lbl                        // encoding: [0x02'A',A,A,0x54'A']
-// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
-// CHECK: b.lo lbl                        // encoding: [0x03'A',A,A,0x54'A']
-// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
-// CHECK: b.lo lbl                        // encoding: [0x03'A',A,A,0x54'A']
-// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
-// CHECK: b.mi lbl                        // encoding: [0x04'A',A,A,0x54'A']
-// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
-// CHECK: b.pl lbl                        // encoding: [0x05'A',A,A,0x54'A']
-// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
-// CHECK: b.vs lbl                        // encoding: [0x06'A',A,A,0x54'A']
-// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
-// CHECK: b.vc lbl                        // encoding: [0x07'A',A,A,0x54'A']
-// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
-// CHECK: b.hi lbl                        // encoding: [0x08'A',A,A,0x54'A']
-// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
-// CHECK: b.ls lbl                        // encoding: [0x09'A',A,A,0x54'A']
-// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
-// CHECK: b.ge lbl                        // encoding: [0x0a'A',A,A,0x54'A']
-// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
-// CHECK: b.lt lbl                        // encoding: [0x0b'A',A,A,0x54'A']
-// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
-// CHECK: b.gt lbl                        // encoding: [0x0c'A',A,A,0x54'A']
-// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
-// CHECK: b.le lbl                        // encoding: [0x0d'A',A,A,0x54'A']
-// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
-// CHECK: b.al lbl                        // encoding: [0x0e'A',A,A,0x54'A']
-// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
 
+// CHECK: b.eq lbl                     // encoding: [0bAAA00000,A,A,0x54]
+// CHECK:                              //   fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19
+// CHECK: b.ne lbl                     // encoding: [0bAAA00001,A,A,0x54]
+// CHECK:                              //   fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19
+// CHECK: b.hs lbl                     // encoding: [0bAAA00010,A,A,0x54]
+// CHECK:                              //   fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19
+// CHECK: b.hs lbl                     // encoding: [0bAAA00010,A,A,0x54]
+// CHECK:                              //   fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19
+// CHECK: b.lo lbl                     // encoding: [0bAAA00011,A,A,0x54]
+// CHECK:                              //   fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19
+// CHECK: b.lo lbl                     // encoding: [0bAAA00011,A,A,0x54]
+// CHECK:                              //   fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19
+// CHECK: b.mi lbl                     // encoding: [0bAAA00100,A,A,0x54]
+// CHECK:                              //   fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19
+// CHECK: b.pl lbl                     // encoding: [0bAAA00101,A,A,0x54]
+// CHECK:                              //   fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19
+// CHECK: b.vs lbl                     // encoding: [0bAAA00110,A,A,0x54]
+// CHECK:                              //   fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19
+// CHECK: b.vc lbl                     // encoding: [0bAAA00111,A,A,0x54]
+// CHECK:                              //   fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19
+// CHECK: b.hi lbl                     // encoding: [0bAAA01000,A,A,0x54]
+// CHECK:                              //   fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19
+// CHECK: b.ls lbl                     // encoding: [0bAAA01001,A,A,0x54]
+// CHECK:                              //   fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19
+// CHECK: b.ge lbl                     // encoding: [0bAAA01010,A,A,0x54]
+// CHECK:                              //   fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19
+// CHECK: b.lt lbl                     // encoding: [0bAAA01011,A,A,0x54]
+// CHECK:                              //   fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19
+// CHECK: b.gt lbl                     // encoding: [0bAAA01100,A,A,0x54]
+// CHECK:                              //   fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19
+// CHECK: b.le lbl                     // encoding: [0bAAA01101,A,A,0x54]
+// CHECK:                              //   fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19
+// CHECK: b.al lbl                     // encoding: [0bAAA01110,A,A,0x54]
+// CHECK:                              //   fixup A - offset: 0, value: lbl, kind: fixup_aarch64_pcrel_branch19
+
+        //  ARM64 has these in a separate file
         beq lbl
         bne lbl
         bcs lbl
@@ -1211,40 +1221,6 @@ _func:
         bgt lbl
         ble lbl
         bal lbl
-// CHECK: b.eq lbl                        // encoding: [A,A,A,0x54'A']
-// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
-// CHECK: b.ne lbl                        // encoding: [0x01'A',A,A,0x54'A']
-// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
-// CHECK: b.hs lbl                        // encoding: [0x02'A',A,A,0x54'A']
-// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
-// CHECK: b.hs lbl                        // encoding: [0x02'A',A,A,0x54'A']
-// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
-// CHECK: b.lo lbl                        // encoding: [0x03'A',A,A,0x54'A']
-// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
-// CHECK: b.lo lbl                        // encoding: [0x03'A',A,A,0x54'A']
-// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
-// CHECK: b.mi lbl                        // encoding: [0x04'A',A,A,0x54'A']
-// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
-// CHECK: b.pl lbl                        // encoding: [0x05'A',A,A,0x54'A']
-// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
-// CHECK: b.vs lbl                        // encoding: [0x06'A',A,A,0x54'A']
-// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
-// CHECK: b.vc lbl                        // encoding: [0x07'A',A,A,0x54'A']
-// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
-// CHECK: b.hi lbl                        // encoding: [0x08'A',A,A,0x54'A']
-// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
-// CHECK: b.ls lbl                        // encoding: [0x09'A',A,A,0x54'A']
-// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
-// CHECK: b.ge lbl                        // encoding: [0x0a'A',A,A,0x54'A']
-// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
-// CHECK: b.lt lbl                        // encoding: [0x0b'A',A,A,0x54'A']
-// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
-// CHECK: b.gt lbl                        // encoding: [0x0c'A',A,A,0x54'A']
-// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
-// CHECK: b.le lbl                        // encoding: [0x0d'A',A,A,0x54'A']
-// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
-// CHECK: b.al lbl                        // encoding: [0x0e'A',A,A,0x54'A']
-// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
 
         b.eq #0
         b.lt #-4
@@ -1394,55 +1370,55 @@ _func:
 
         cset w3, eq
         cset x9, pl
-// CHECK: csinc    w3, wzr, wzr, ne           // encoding: [0xe3,0x17,0x9f,0x1a]
-// CHECK: csinc    x9, xzr, xzr, mi           // encoding: [0xe9,0x47,0x9f,0x9a]
+// CHECK: cset    w3, eq           // encoding: [0xe3,0x17,0x9f,0x1a]
+// CHECK: cset    x9, pl           // encoding: [0xe9,0x47,0x9f,0x9a]
 
         csetm w20, ne
         csetm x30, ge
-// CHECK: csinv    w20, wzr, wzr, eq          // encoding: [0xf4,0x03,0x9f,0x5a]
-// CHECK: csinv    x30, xzr, xzr, lt          // encoding: [0xfe,0xb3,0x9f,0xda]
+// CHECK: csetm    w20, ne          // encoding: [0xf4,0x03,0x9f,0x5a]
+// CHECK: csetm    x30, ge          // encoding: [0xfe,0xb3,0x9f,0xda]
 
         cinc w3, w5, gt
         cinc wzr, w4, le
         cinc w9, wzr, lt
-// CHECK: csinc    w3, w5, w5, le             // encoding: [0xa3,0xd4,0x85,0x1a]
-// CHECK: csinc    wzr, w4, w4, gt            // encoding: [0x9f,0xc4,0x84,0x1a]
-// CHECK: csinc    w9, wzr, wzr, ge           // encoding: [0xe9,0xa7,0x9f,0x1a]
+// CHECK: cinc    w3, w5, gt             // encoding: [0xa3,0xd4,0x85,0x1a]
+// CHECK: cinc    wzr, w4, le            // encoding: [0x9f,0xc4,0x84,0x1a]
+// CHECK: cset    w9, lt           // encoding: [0xe9,0xa7,0x9f,0x1a]
 
         cinc x3, x5, gt
         cinc xzr, x4, le
         cinc x9, xzr, lt
-// CHECK: csinc     x3, x5, x5, le             // encoding: [0xa3,0xd4,0x85,0x9a]
-// CHECK: csinc     xzr, x4, x4, gt            // encoding: [0x9f,0xc4,0x84,0x9a]
-// CHECK: csinc     x9, xzr, xzr, ge           // encoding: [0xe9,0xa7,0x9f,0x9a]
+// CHECK: cinc     x3, x5, gt             // encoding: [0xa3,0xd4,0x85,0x9a]
+// CHECK: cinc     xzr, x4, le            // encoding: [0x9f,0xc4,0x84,0x9a]
+// CHECK: cset     x9, lt           // encoding: [0xe9,0xa7,0x9f,0x9a]
 
         cinv w3, w5, gt
         cinv wzr, w4, le
         cinv w9, wzr, lt
-// CHECK: csinv    w3, w5, w5, le             // encoding: [0xa3,0xd0,0x85,0x5a]
-// CHECK: csinv    wzr, w4, w4, gt            // encoding: [0x9f,0xc0,0x84,0x5a]
-// CHECK: csinv    w9, wzr, wzr, ge           // encoding: [0xe9,0xa3,0x9f,0x5a]
+// CHECK: cinv    w3, w5, gt             // encoding: [0xa3,0xd0,0x85,0x5a]
+// CHECK: cinv    wzr, w4, le            // encoding: [0x9f,0xc0,0x84,0x5a]
+// CHECK: csetm    w9, lt           // encoding: [0xe9,0xa3,0x9f,0x5a]
 
         cinv x3, x5, gt
         cinv xzr, x4, le
         cinv x9, xzr, lt
-// CHECK: csinv    x3, x5, x5, le             // encoding: [0xa3,0xd0,0x85,0xda]
-// CHECK: csinv    xzr, x4, x4, gt            // encoding: [0x9f,0xc0,0x84,0xda]
-// CHECK: csinv    x9, xzr, xzr, ge           // encoding: [0xe9,0xa3,0x9f,0xda]
+// CHECK: cinv    x3, x5, gt             // encoding: [0xa3,0xd0,0x85,0xda]
+// CHECK: cinv    xzr, x4, le            // encoding: [0x9f,0xc0,0x84,0xda]
+// CHECK: csetm    x9, lt           // encoding: [0xe9,0xa3,0x9f,0xda]
 
         cneg w3, w5, gt
         cneg wzr, w4, le
         cneg w9, wzr, lt
-// CHECK: csneg    w3, w5, w5, le             // encoding: [0xa3,0xd4,0x85,0x5a]
-// CHECK: csneg    wzr, w4, w4, gt            // encoding: [0x9f,0xc4,0x84,0x5a]
-// CHECK: csneg    w9, wzr, wzr, ge           // encoding: [0xe9,0xa7,0x9f,0x5a]
+// CHECK: cneg    w3, w5, gt            // encoding: [0xa3,0xd4,0x85,0x5a]
+// CHECK: cneg    wzr, w4, le            // encoding: [0x9f,0xc4,0x84,0x5a]
+// CHECK: cneg    w9, wzr, lt           // encoding: [0xe9,0xa7,0x9f,0x5a]
 
         cneg x3, x5, gt
         cneg xzr, x4, le
         cneg x9, xzr, lt
-// CHECK: csneg    x3, x5, x5, le             // encoding: [0xa3,0xd4,0x85,0xda]
-// CHECK: csneg    xzr, x4, x4, gt            // encoding: [0x9f,0xc4,0x84,0xda]
-// CHECK: csneg    x9, xzr, xzr, ge           // encoding: [0xe9,0xa7,0x9f,0xda]
+// CHECK: cneg    x3, x5, gt             // encoding: [0xa3,0xd4,0x85,0xda]
+// CHECK: cneg    xzr, x4, le            // encoding: [0x9f,0xc4,0x84,0xda]
+// CHECK: cneg    x9, xzr, lt           // encoding: [0xe9,0xa7,0x9f,0xda]
 
 //------------------------------------------------------------------------------
 // Data-processing (1 source)
@@ -1699,23 +1675,23 @@ _func:
         svc #0
         svc #65535
 // CHECK: svc      #0                         // encoding: [0x01,0x00,0x00,0xd4]
-// CHECK: svc      #65535                     // encoding: [0xe1,0xff,0x1f,0xd4]
+// CHECK: svc      #{{65535|0xffff}}          // encoding: [0xe1,0xff,0x1f,0xd4]
 
         hvc #1
         smc #12000
         brk #12
         hlt #123
-// CHECK: hvc      #1                         // encoding: [0x22,0x00,0x00,0xd4]
-// CHECK: smc      #12000                     // encoding: [0x03,0xdc,0x05,0xd4]
-// CHECK: brk      #12                        // encoding: [0x80,0x01,0x20,0xd4]
-// CHECK: hlt      #123                       // encoding: [0x60,0x0f,0x40,0xd4]
+// CHECK: hvc      #{{1|0x1}}                 // encoding: [0x22,0x00,0x00,0xd4]
+// CHECK: smc      #{{12000|0x2ee0}}          // encoding: [0x03,0xdc,0x05,0xd4]
+// CHECK: brk      #{{12|0xc}}                // encoding: [0x80,0x01,0x20,0xd4]
+// CHECK: hlt      #{{123|0x7b}}              // encoding: [0x60,0x0f,0x40,0xd4]
 
         dcps1 #42
         dcps2 #9
         dcps3 #1000
-// CHECK: dcps1    #42                        // encoding: [0x41,0x05,0xa0,0xd4]
-// CHECK: dcps2    #9                         // encoding: [0x22,0x01,0xa0,0xd4]
-// CHECK: dcps3    #1000                      // encoding: [0x03,0x7d,0xa0,0xd4]
+// CHECK: dcps1    #{{42|0x2a}}               // encoding: [0x41,0x05,0xa0,0xd4]
+// CHECK: dcps2    #{{9|0x9}}                 // encoding: [0x22,0x01,0xa0,0xd4]
+// CHECK: dcps3    #{{1000|0x3e8}}            // encoding: [0x03,0x7d,0xa0,0xd4]
 
         dcps1
         dcps2
@@ -1740,11 +1716,11 @@ _func:
 
         ror x19, x23, #24
         ror x29, xzr, #63
-// CHECK: extr     x19, x23, x23, #24         // encoding: [0xf3,0x62,0xd7,0x93]
-// CHECK: extr     x29, xzr, xzr, #63         // encoding: [0xfd,0xff,0xdf,0x93]
+// CHECK: ror     x19, x23, #24         // encoding: [0xf3,0x62,0xd7,0x93]
+// CHECK: ror     x29, xzr, #63         // encoding: [0xfd,0xff,0xdf,0x93]
 
         ror w9, w13, #31
-// CHECK: extr     w9, w13, w13, #31          // encoding: [0xa9,0x7d,0x8d,0x13]
+// CHECK: ror     w9, w13, #31          // encoding: [0xa9,0x7d,0x8d,0x13]
 
 //------------------------------------------------------------------------------
 // Floating-point compare
@@ -2176,7 +2152,7 @@ _func:
 
         fmov x3, v12.d[1]
         fmov v1.d[1], x19
-        fmov v3.2d[1], xzr
+        fmov v3.d[1], xzr
 // CHECK: fmov     x3, v12.d[1]               // encoding: [0x83,0x01,0xae,0x9e]
 // CHECK: fmov     v1.d[1], x19               // encoding: [0x61,0x02,0xaf,0x9e]
 // CHECK: fmov     v3.d[1], xzr               // encoding: [0xe3,0x03,0xaf,0x9e]
@@ -2188,20 +2164,20 @@ _func:
         fmov s2, #0.125
         fmov s3, #1.0
         fmov d30, #16.0
-// CHECK: fmov     s2, #0.12500000            // encoding: [0x02,0x10,0x28,0x1e]
-// CHECK: fmov     s3, #1.00000000            // encoding: [0x03,0x10,0x2e,0x1e]
-// CHECK: fmov     d30, #16.00000000          // encoding: [0x1e,0x10,0x66,0x1e]
+// CHECK: fmov     s2, #{{0.12500000|1.250*e-01}}            // encoding: [0x02,0x10,0x28,0x1e]
+// CHECK: fmov     s3, #{{1.00000000|1.0*e\+00}}            // encoding: [0x03,0x10,0x2e,0x1e]
+// CHECK: fmov     d30, #{{16.00000000|1.60*e\+01}}          // encoding: [0x1e,0x10,0x66,0x1e]
 
         fmov s4, #1.0625
         fmov d10, #1.9375
-// CHECK: fmov     s4, #1.06250000            // encoding: [0x04,0x30,0x2e,0x1e]
-// CHECK: fmov     d10, #1.93750000           // encoding: [0x0a,0xf0,0x6f,0x1e]
+// CHECK: fmov     s4, #{{1.06250*(e\+00)?}}            // encoding: [0x04,0x30,0x2e,0x1e]
+// CHECK: fmov     d10, #{{1.93750*(e\+00)?}}           // encoding: [0x0a,0xf0,0x6f,0x1e]
 
         fmov s12, #-1.0
-// CHECK: fmov     s12, #-1.00000000          // encoding: [0x0c,0x10,0x3e,0x1e]
+// CHECK: fmov     s12, #{{-1.0*(e\+00)?}}          // encoding: [0x0c,0x10,0x3e,0x1e]
 
         fmov d16, #8.5
-// CHECK: fmov     d16, #8.50000000           // encoding: [0x10,0x30,0x64,0x1e]
+// CHECK: fmov     d16, #{{8.50*(e\+00)?}}          // encoding: [0x10,0x30,0x64,0x1e]
 
 //------------------------------------------------------------------------------
 // Load-register (literal)
@@ -2209,22 +2185,24 @@ _func:
         ldr w3, here
         ldr x29, there
         ldrsw xzr, everywhere
-// CHECK: ldr     w3, here                // encoding: [0x03'A',A,A,0x18'A']
-// CHECK:                                 //   fixup A - offset: 0, value: here, kind: fixup_a64_ld_prel
-// CHECK: ldr     x29, there              // encoding: [0x1d'A',A,A,0x58'A']
-// CHECK:                                 //   fixup A - offset: 0, value: there, kind: fixup_a64_ld_prel
-// CHECK: ldrsw   xzr, everywhere         // encoding: [0x1f'A',A,A,0x98'A']
-// CHECK:                                 //   fixup A - offset: 0, value: everywhere, kind: fixup_a64_ld_prel
+
+// CHECK: ldr    w3, here                // encoding: [0bAAA00011,A,A,0x18]
+// CHECK:                                 //   fixup A - offset: 0, value: here, kind: fixup_aarch64_ldr_pcrel_imm19
+// CHECK: ldr    x29, there              // encoding: [0bAAA11101,A,A,0x58]
+// CHECK:                                 //   fixup A - offset: 0, value: there, kind: fixup_aarch64_ldr_pcrel_imm19
+// CHECK: ldrsw    xzr, everywhere         // encoding: [0bAAA11111,A,A,0x98]
+// CHECK:                                 //   fixup A - offset: 0, value: everywhere, kind: fixup_aarch64_ldr_pcrel_imm19
 
         ldr s0, who_knows
         ldr d0, i_dont
         ldr q0, there_must_be_a_better_way
-// CHECK: ldr     s0, who_knows           // encoding: [A,A,A,0x1c'A']
-// CHECK:                                 //   fixup A - offset: 0, value: who_knows, kind: fixup_a64_ld_prel
-// CHECK: ldr     d0, i_dont              // encoding: [A,A,A,0x5c'A']
-// CHECK:                                 //   fixup A - offset: 0, value: i_dont, kind: fixup_a64_ld_prel
-// CHECK: ldr     q0, there_must_be_a_better_way // encoding: [A,A,A,0x9c'A']
-// CHECK:                                 //   fixup A - offset: 0, value: there_must_be_a_better_way, kind: fixup_a64_ld_prel
+
+// CHECK: ldr    s0, who_knows           // encoding: [0bAAA00000,A,A,0x1c]
+// CHECK:                                 //   fixup A - offset: 0, value: who_knows, kind: fixup_aarch64_ldr_pcrel_imm19
+// CHECK: ldr    d0, i_dont              // encoding: [0bAAA00000,A,A,0x5c]
+// CHECK:                                 //   fixup A - offset: 0, value: i_dont, kind: fixup_aarch64_ldr_pcrel_imm19
+// CHECK: ldr    q0, there_must_be_a_better_way // encoding: [0bAAA00000,A,A,0x9c]
+// CHECK:                                 //   fixup A - offset: 0, value: there_must_be_a_better_way, kind: fixup_aarch64_ldr_pcrel_imm19
 
         ldr w0, #1048572
         ldr x10, #-1048576
@@ -2233,32 +2211,11 @@ _func:
 
         prfm pldl1strm, nowhere
         prfm #22, somewhere
-// CHECK: prfm    pldl1strm, nowhere      // encoding: [0x01'A',A,A,0xd8'A']
-// CHECK:                                 //   fixup A - offset: 0, value: nowhere, kind: fixup_a64_ld_prel
-// CHECK: prfm    #22, somewhere          // encoding: [0x16'A',A,A,0xd8'A']
-// CHECK:                                 //   fixup A - offset: 0, value: somewhere, kind: fixup_a64_ld_prel
-
-//------------------------------------------------------------------------------
-// Floating-point immediate
-//------------------------------------------------------------------------------
 
-        fmov s2, #0.125
-        fmov s3, #1.0
-        fmov d30, #16.0
-// CHECK: fmov     s2, #0.12500000            // encoding: [0x02,0x10,0x28,0x1e]
-// CHECK: fmov     s3, #1.00000000            // encoding: [0x03,0x10,0x2e,0x1e]
-// CHECK: fmov     d30, #16.00000000          // encoding: [0x1e,0x10,0x66,0x1e]
-
-        fmov s4, #1.0625
-        fmov d10, #1.9375
-// CHECK: fmov     s4, #1.06250000            // encoding: [0x04,0x30,0x2e,0x1e]
-// CHECK: fmov     d10, #1.93750000           // encoding: [0x0a,0xf0,0x6f,0x1e]
-
-        fmov s12, #-1.0
-// CHECK: fmov     s12, #-1.00000000          // encoding: [0x0c,0x10,0x3e,0x1e]
-
-        fmov d16, #8.5
-// CHECK: fmov     d16, #8.50000000           // encoding: [0x10,0x30,0x64,0x1e]
+// CHECK: prfm    pldl1strm, nowhere      // encoding: [0bAAA00001,A,A,0xd8]
+// CHECK:                                 //   fixup A - offset: 0, value: nowhere, kind: fixup_aarch64_ldr_pcrel_imm19
+// CHECK: prfm    #22, somewhere          // encoding: [0bAAA10110,A,A,0xd8]
+// CHECK:                                 //   fixup A - offset: 0, value: somewhere, kind: fixup_aarch64_ldr_pcrel_imm19
 
 //------------------------------------------------------------------------------
 // Load/store exclusive
@@ -2473,18 +2430,19 @@ _func:
         ldrsw x15, [x5, #:lo12:sym]
         ldr x15, [x5, #:lo12:sym]
         ldr q3, [x2, #:lo12:sym]
-// CHECK: str     x15, [x5, #:lo12:sym]   // encoding: [0xaf'A',A,A,0xf9'A']
-// CHECK:                                         //   fixup A - offset: 0, value: :lo12:sym, kind: fixup_a64_ldst64_lo12
-// CHECK: ldrb    w15, [x5, #:lo12:sym]   // encoding: [0xaf'A',A,0x40'A',0x39'A']
-// CHECK:                                         //   fixup A - offset: 0, value: :lo12:sym, kind: fixup_a64_ldst8_lo12
-// CHECK: ldrsh   x15, [x5, #:lo12:sym]   // encoding: [0xaf'A',A,0x80'A',0x79'A']
-// CHECK:                                         //   fixup A - offset: 0, value: :lo12:sym, kind: fixup_a64_ldst16_lo12
-// CHECK: ldrsw   x15, [x5, #:lo12:sym]   // encoding: [0xaf'A',A,0x80'A',0xb9'A']
-// CHECK:                                         //   fixup A - offset: 0, value: :lo12:sym, kind: fixup_a64_ldst32_lo12
-// CHECK: ldr     x15, [x5, #:lo12:sym]   // encoding: [0xaf'A',A,0x40'A',0xf9'A']
-// CHECK:                                         //   fixup A - offset: 0, value: :lo12:sym, kind: fixup_a64_ldst64_lo12
-// CHECK: ldr     q3, [x2, #:lo12:sym]    // encoding: [0x43'A',A,0xc0'A',0x3d'A']
-// CHECK:                                         //   fixup A - offset: 0, value: :lo12:sym, kind: fixup_a64_ldst128_lo12
+
+// CHECK: str    x15, [x5, :lo12:sym]    // encoding: [0xaf,0bAAAAAA00,0b00AAAAAA,0xf9]
+// CHECK:                                 //   fixup A - offset: 0, value: :lo12:sym, kind: fixup_aarch64_ldst_imm12_scale8
+// CHECK: ldrb    w15, [x5, :lo12:sym]    // encoding: [0xaf,0bAAAAAA00,0b01AAAAAA,0x39]
+// CHECK:                                 //   fixup A - offset: 0, value: :lo12:sym, kind: fixup_aarch64_ldst_imm12_scale1
+// CHECK: ldrsh    x15, [x5, :lo12:sym]    // encoding: [0xaf,0bAAAAAA00,0b10AAAAAA,0x79]
+// CHECK:                                 //   fixup A - offset: 0, value: :lo12:sym, kind: fixup_aarch64_ldst_imm12_scale2
+// CHECK: ldrsw    x15, [x5, :lo12:sym]    // encoding: [0xaf,0bAAAAAA00,0b10AAAAAA,0xb9]
+// CHECK:                                 //   fixup A - offset: 0, value: :lo12:sym, kind: fixup_aarch64_ldst_imm12_scale4
+// CHECK: ldr    x15, [x5, :lo12:sym]    // encoding: [0xaf,0bAAAAAA00,0b01AAAAAA,0xf9]
+// CHECK:                                 //   fixup A - offset: 0, value: :lo12:sym, kind: fixup_aarch64_ldst_imm12_scale8
+// CHECK: ldr    q3, [x2, :lo12:sym]     // encoding: [0x43,0bAAAAAA00,0b11AAAAAA,0x3d]
+// CHECK:                                 //   fixup A - offset: 0, value: :lo12:sym, kind: fixup_aarch64_ldst_imm12_scale16
 
         prfm pldl1keep, [sp, #8]
         prfm pldl1strm, [x3]
@@ -2506,24 +2464,24 @@ _func:
         prfm pstl3strm, [x6]
         prfm #15, [sp]
 // CHECK: prfm    pldl1keep, [sp, #8]     // encoding: [0xe0,0x07,0x80,0xf9]
-// CHECK: prfm    pldl1strm, [x3, #0]     // encoding: [0x61,0x00,0x80,0xf9]
+// CHECK: prfm    pldl1strm, [x3{{(, #0)?}}]     // encoding: [0x61,0x00,0x80,0xf9]
 // CHECK: prfm    pldl2keep, [x5, #16]    // encoding: [0xa2,0x08,0x80,0xf9]
-// CHECK: prfm    pldl2strm, [x2, #0]     // encoding: [0x43,0x00,0x80,0xf9]
-// CHECK: prfm    pldl3keep, [x5, #0]     // encoding: [0xa4,0x00,0x80,0xf9]
-// CHECK: prfm    pldl3strm, [x6, #0]     // encoding: [0xc5,0x00,0x80,0xf9]
+// CHECK: prfm    pldl2strm, [x2{{(, #0)?}}]     // encoding: [0x43,0x00,0x80,0xf9]
+// CHECK: prfm    pldl3keep, [x5{{(, #0)?}}]     // encoding: [0xa4,0x00,0x80,0xf9]
+// CHECK: prfm    pldl3strm, [x6{{(, #0)?}}]     // encoding: [0xc5,0x00,0x80,0xf9]
 // CHECK: prfm    plil1keep, [sp, #8]     // encoding: [0xe8,0x07,0x80,0xf9]
-// CHECK: prfm    plil1strm, [x3, #0]     // encoding: [0x69,0x00,0x80,0xf9]
+// CHECK: prfm    plil1strm, [x3{{(, #0)?}}]     // encoding: [0x69,0x00,0x80,0xf9]
 // CHECK: prfm    plil2keep, [x5, #16]    // encoding: [0xaa,0x08,0x80,0xf9]
-// CHECK: prfm    plil2strm, [x2, #0]     // encoding: [0x4b,0x00,0x80,0xf9]
-// CHECK: prfm    plil3keep, [x5, #0]     // encoding: [0xac,0x00,0x80,0xf9]
-// CHECK: prfm    plil3strm, [x6, #0]     // encoding: [0xcd,0x00,0x80,0xf9]
+// CHECK: prfm    plil2strm, [x2{{(, #0)?}}]     // encoding: [0x4b,0x00,0x80,0xf9]
+// CHECK: prfm    plil3keep, [x5{{(, #0)?}}]     // encoding: [0xac,0x00,0x80,0xf9]
+// CHECK: prfm    plil3strm, [x6{{(, #0)?}}]     // encoding: [0xcd,0x00,0x80,0xf9]
 // CHECK: prfm    pstl1keep, [sp, #8]     // encoding: [0xf0,0x07,0x80,0xf9]
-// CHECK: prfm    pstl1strm, [x3, #0]     // encoding: [0x71,0x00,0x80,0xf9]
+// CHECK: prfm    pstl1strm, [x3{{(, #0)?}}]     // encoding: [0x71,0x00,0x80,0xf9]
 // CHECK: prfm    pstl2keep, [x5, #16]    // encoding: [0xb2,0x08,0x80,0xf9]
-// CHECK: prfm    pstl2strm, [x2, #0]     // encoding: [0x53,0x00,0x80,0xf9]
-// CHECK: prfm    pstl3keep, [x5, #0]     // encoding: [0xb4,0x00,0x80,0xf9]
-// CHECK: prfm    pstl3strm, [x6, #0]     // encoding: [0xd5,0x00,0x80,0xf9]
-// CHECK: prfm    #15, [sp, #0]           // encoding: [0xef,0x03,0x80,0xf9]
+// CHECK: prfm    pstl2strm, [x2{{(, #0)?}}]     // encoding: [0x53,0x00,0x80,0xf9]
+// CHECK: prfm    pstl3keep, [x5{{(, #0)?}}]     // encoding: [0xb4,0x00,0x80,0xf9]
+// CHECK: prfm    pstl3strm, [x6{{(, #0)?}}]     // encoding: [0xd5,0x00,0x80,0xf9]
+// CHECK: prfm    #15, [sp{{(, #0)?}}]           // encoding: [0xef,0x03,0x80,0xf9]
 
 //// Floating-point versions
 
@@ -2636,7 +2594,7 @@ _func:
 // CHECK: ldr      x17, [x23, w9, sxtw]       // encoding: [0xf1,0xca,0x69,0xf8]
 // CHECK: ldr      x18, [x22, w10, sxtw]      // encoding: [0xd2,0xca,0x6a,0xf8]
 // CHECK: str      d19, [x21, wzr, sxtw #3]   // encoding: [0xb3,0xda,0x3f,0xfc]
-// CHECK: prfm     #6, [x0, x5, lsl #0]       // encoding: [0x06,0x68,0xa5,0xf8]
+// CHECK: prfm     #6, [x0, x5{{(, lsl #0)?}}]       // encoding: [0x06,0x68,0xa5,0xf8]
 
         ldr q3, [sp, x5]
         ldr q9, [x27, x6, lsl #0]
@@ -3218,15 +3176,15 @@ _func:
         ands wzr, w18, #0xcccccccc
         ands w19, w20, #0x33333333
         ands w21, w22, #0x99999999
-// CHECK: ands     wzr, w18, #0xcccccccc      // encoding: [0x5f,0xe6,0x02,0x72]
+// CHECK: {{ands wzr,|tst}} w18, #0xcccccccc      // encoding: [0x5f,0xe6,0x02,0x72]
 // CHECK: ands     w19, w20, #0x33333333      // encoding: [0x93,0xe6,0x00,0x72]
 // CHECK: ands     w21, w22, #0x99999999      // encoding: [0xd5,0xe6,0x01,0x72]
 
         // 2 bit replication width
         tst w3, #0xaaaaaaaa
         tst wzr, #0x55555555
-// CHECK: ands     wzr, w3, #0xaaaaaaaa       // encoding: [0x7f,0xf0,0x01,0x72]
-// CHECK: ands     wzr, wzr, #0x55555555      // encoding: [0xff,0xf3,0x00,0x72]
+// CHECK: {{ands wzr,|tst}} w3, #0xaaaaaaaa       // encoding: [0x7f,0xf0,0x01,0x72]
+// CHECK: {{ands wzr,|tst}} wzr, #0x55555555      // encoding: [0xff,0xf3,0x00,0x72]
 
         // 64 bit replication-width
         eor x3, x5, #0xffffffffc000000
@@ -3264,20 +3222,20 @@ _func:
         ands xzr, x18, #0xcccccccccccccccc
         ands x19, x20, #0x3333333333333333
         ands x21, x22, #0x9999999999999999
-// CHECK: ands     xzr, x18, #0xcccccccccccccccc // encoding: [0x5f,0xe6,0x02,0xf2]
+// CHECK: {{ands xzr,|tst}} x18, #0xcccccccccccccccc // encoding: [0x5f,0xe6,0x02,0xf2]
 // CHECK: ands     x19, x20, #0x3333333333333333 // encoding: [0x93,0xe6,0x00,0xf2]
 // CHECK: ands     x21, x22, #0x9999999999999999 // encoding: [0xd5,0xe6,0x01,0xf2]
 
         // 2 bit replication-width
         tst x3, #0xaaaaaaaaaaaaaaaa
         tst xzr, #0x5555555555555555
-// CHECK: ands     xzr, x3, #0xaaaaaaaaaaaaaaaa    // encoding: [0x7f,0xf0,0x01,0xf2]
-// CHECK: ands     xzr, xzr, #0x5555555555555555   // encoding: [0xff,0xf3,0x00,0xf2]
+// CHECK: {{ands xzr,|tst}} x3, #0xaaaaaaaaaaaaaaaa    // encoding: [0x7f,0xf0,0x01,0xf2]
+// CHECK: {{ands xzr,|tst}} xzr, #0x5555555555555555   // encoding: [0xff,0xf3,0x00,0xf2]
 
         mov w3, #0xf000f
         mov x10, #0xaaaaaaaaaaaaaaaa
 // CHECK: orr      w3, wzr, #0xf000f          // encoding: [0xe3,0x8f,0x00,0x32]
-// CHECK: orr      x10, xzr, #0xaaaaaaaaaaaaaaaa // encoding: [0xea,0xf3,0x01,0xb2]
+// CHECK: orr x10, xzr, #0xaaaaaaaaaaaaaaaa // encoding: [0xea,0xf3,0x01,0xb2]
 
 //------------------------------------------------------------------------------
 // Logical (shifted register)
@@ -3353,75 +3311,83 @@ _func:
         movz w1, #65535, lsl #0
         movz w2, #0, lsl #16
         movn w2, #1234, lsl #0
-// CHECK: movz     w1, #65535                 // encoding: [0xe1,0xff,0x9f,0x52]
+// CHECK: movz     w1, #{{65535|0xffff}}      // encoding: [0xe1,0xff,0x9f,0x52]
 // CHECK: movz     w2, #0, lsl #16            // encoding: [0x02,0x00,0xa0,0x52]
-// CHECK: movn     w2, #1234                  // encoding: [0x42,0x9a,0x80,0x12]
+// CHECK: movn     w2, #{{1234|0x4d2}}        // encoding: [0x42,0x9a,0x80,0x12]
 
         movz x2, #1234, lsl #32
         movk xzr, #4321, lsl #48
-// CHECK: movz     x2, #1234, lsl #32         // encoding: [0x42,0x9a,0xc0,0xd2]
-// CHECK: movk     xzr, #4321, lsl #48        // encoding: [0x3f,0x1c,0xe2,0xf2]
+// CHECK: movz     x2, #{{1234|0x4d2}}, lsl #32   // encoding: [0x42,0x9a,0xc0,0xd2]
+// CHECK: movk     xzr, #{{4321|0x10e1}}, lsl #48 // encoding: [0x3f,0x1c,0xe2,0xf2]
 
         movz x2, #:abs_g0:sym
         movk w3, #:abs_g0_nc:sym
-// CHECK: movz    x2, #:abs_g0:sym        // encoding: [0x02'A',A,0x80'A',0xd2'A']
-// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g0:sym, kind: fixup_a64_movw_uabs_g0
-// CHECK: movk     w3, #:abs_g0_nc:sym    // encoding: [0x03'A',A,0x80'A',0x72'A']
-// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g0_nc:sym, kind: fixup_a64_movw_uabs_g0_nc
+
+// CHECK: movz    x2, #:abs_g0:sym        // encoding: [0bAAA00010,A,0b100AAAAA,0xd2]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :abs_g0:sym, kind: fixup_aarch64_movw
+// CHECK: movk    w3, #:abs_g0_nc:sym     // encoding: [0bAAA00011,A,0b100AAAAA,0x72]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :abs_g0_nc:sym, kind: fixup_aarch64_movw
 
         movz x4, #:abs_g1:sym
         movk w5, #:abs_g1_nc:sym
-// CHECK: movz     x4, #:abs_g1:sym       // encoding: [0x04'A',A,0xa0'A',0xd2'A']
-// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g1:sym, kind: fixup_a64_movw_uabs_g1
-// CHECK: movk     w5, #:abs_g1_nc:sym    // encoding: [0x05'A',A,0xa0'A',0x72'A']
-// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g1_nc:sym, kind: fixup_a64_movw_uabs_g1_nc
+
+// CHECK: movz    x4, #:abs_g1:sym        // encoding: [0bAAA00100,A,0b101AAAAA,0xd2]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :abs_g1:sym, kind: fixup_aarch64_movw
+// CHECK: movk    w5, #:abs_g1_nc:sym     // encoding: [0bAAA00101,A,0b101AAAAA,0x72]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :abs_g1_nc:sym, kind: fixup_aarch64_movw
 
         movz x6, #:abs_g2:sym
         movk x7, #:abs_g2_nc:sym
-// CHECK: movz     x6, #:abs_g2:sym       // encoding: [0x06'A',A,0xc0'A',0xd2'A']
-// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g2:sym, kind: fixup_a64_movw_uabs_g2
-// CHECK: movk     x7, #:abs_g2_nc:sym    // encoding: [0x07'A',A,0xc0'A',0xf2'A']
-// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g2_nc:sym, kind: fixup_a64_movw_uabs_g2_nc
+
+// CHECK: movz    x6, #:abs_g2:sym        // encoding: [0bAAA00110,A,0b110AAAAA,0xd2]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :abs_g2:sym, kind: fixup_aarch64_movw
+// CHECK: movk    x7, #:abs_g2_nc:sym     // encoding: [0bAAA00111,A,0b110AAAAA,0xf2]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :abs_g2_nc:sym, kind: fixup_aarch64_movw
 
         movz x8, #:abs_g3:sym
         movk x9, #:abs_g3:sym
-// CHECK: movz     x8, #:abs_g3:sym       // encoding: [0x08'A',A,0xe0'A',0xd2'A']
-// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g3:sym, kind: fixup_a64_movw_uabs_g3
-// CHECK: movk     x9, #:abs_g3:sym       // encoding: [0x09'A',A,0xe0'A',0xf2'A']
-// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g3:sym, kind: fixup_a64_movw_uabs_g3
+
+// CHECK: movz    x8, #:abs_g3:sym        // encoding: [0bAAA01000,A,0b111AAAAA,0xd2]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :abs_g3:sym, kind: fixup_aarch64_movw
+// CHECK: movk    x9, #:abs_g3:sym        // encoding: [0bAAA01001,A,0b111AAAAA,0xf2]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :abs_g3:sym, kind: fixup_aarch64_movw
+
 
         movn x30, #:abs_g0_s:sym
         movz x19, #:abs_g0_s:sym
         movn w10, #:abs_g0_s:sym
         movz w25, #:abs_g0_s:sym
-// CHECK: movn     x30, #:abs_g0_s:sym    // encoding: [0x1e'A',A,0x80'A',0x92'A']
-// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_a64_movw_sabs_g0
-// CHECK: movz     x19, #:abs_g0_s:sym    // encoding: [0x13'A',A,0x80'A',0x92'A']
-// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_a64_movw_sabs_g0
-// CHECK: movn     w10, #:abs_g0_s:sym    // encoding: [0x0a'A',A,0x80'A',0x12'A']
-// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_a64_movw_sabs_g0
-// CHECK: movz     w25, #:abs_g0_s:sym    // encoding: [0x19'A',A,0x80'A',0x12'A']
-// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_a64_movw_sabs_g0
+
+// CHECK: movn    x30, #:abs_g0_s:sym     // encoding: [0bAAA11110,A,0b100AAAAA,0x92]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_aarch64_movw
+// CHECK: movz    x19, #:abs_g0_s:sym     // encoding: [0bAAA10011,A,0b100AAAAA,0xd2]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_aarch64_movw
+// CHECK: movn    w10, #:abs_g0_s:sym     // encoding: [0bAAA01010,A,0b100AAAAA,0x12]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_aarch64_movw
+// CHECK: movz    w25, #:abs_g0_s:sym     // encoding: [0bAAA11001,A,0b100AAAAA,0x52]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_aarch64_movw
 
         movn x30, #:abs_g1_s:sym
         movz x19, #:abs_g1_s:sym
         movn w10, #:abs_g1_s:sym
         movz w25, #:abs_g1_s:sym
-// CHECK: movn     x30, #:abs_g1_s:sym    // encoding: [0x1e'A',A,0xa0'A',0x92'A']
-// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g1_s:sym, kind: fixup_a64_movw_sabs_g1
-// CHECK: movz     x19, #:abs_g1_s:sym    // encoding: [0x13'A',A,0xa0'A',0x92'A']
-// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g1_s:sym, kind: fixup_a64_movw_sabs_g1
-// CHECK: movn     w10, #:abs_g1_s:sym    // encoding: [0x0a'A',A,0xa0'A',0x12'A']
-// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g1_s:sym, kind: fixup_a64_movw_sabs_g1
-// CHECK: movz     w25, #:abs_g1_s:sym    // encoding: [0x19'A',A,0xa0'A',0x12'A']
-// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g1_s:sym, kind: fixup_a64_movw_sabs_g1
+
+// CHECK: movn    x30, #:abs_g1_s:sym     // encoding: [0bAAA11110,A,0b101AAAAA,0x92]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :abs_g1_s:sym, kind: fixup_aarch64_movw
+// CHECK: movz    x19, #:abs_g1_s:sym     // encoding: [0bAAA10011,A,0b101AAAAA,0xd2]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :abs_g1_s:sym, kind: fixup_aarch64_movw
+// CHECK: movn    w10, #:abs_g1_s:sym     // encoding: [0bAAA01010,A,0b101AAAAA,0x12]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :abs_g1_s:sym, kind: fixup_aarch64_movw
+// CHECK: movz    w25, #:abs_g1_s:sym     // encoding: [0bAAA11001,A,0b101AAAAA,0x52]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :abs_g1_s:sym, kind: fixup_aarch64_movw
 
         movn x30, #:abs_g2_s:sym
         movz x19, #:abs_g2_s:sym
-// CHECK: movn     x30, #:abs_g2_s:sym    // encoding: [0x1e'A',A,0xc0'A',0x92'A']
-// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g2_s:sym, kind: fixup_a64_movw_sabs_g2
-// CHECK: movz     x19, #:abs_g2_s:sym    // encoding: [0x13'A',A,0xc0'A',0x92'A']
-// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g2_s:sym, kind: fixup_a64_movw_sabs_g2
+
+// CHECK: movn    x30, #:abs_g2_s:sym     // encoding: [0bAAA11110,A,0b110AAAAA,0x92]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :abs_g2_s:sym, kind: fixup_aarch64_movw
+// CHECK: movz    x19, #:abs_g2_s:sym     // encoding: [0bAAA10011,A,0b110AAAAA,0xd2]
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :abs_g2_s:sym, kind: fixup_aarch64_movw
 
 //------------------------------------------------------------------------------
 // PC-relative addressing
@@ -3429,15 +3395,16 @@ _func:
 
         adr x2, loc
         adr xzr, loc
- // CHECK: adr     x2, loc                 // encoding: [0x02'A',A,A,0x10'A']
- // CHECK:                                 //   fixup A - offset: 0, value: loc, kind: fixup_a64_adr_prel
- // CHECK: adr     xzr, loc                // encoding: [0x1f'A',A,A,0x10'A']
- // CHECK:                                 //   fixup A - offset: 0, value: loc, kind: fixup_a64_adr_prel
+
+// CHECK: adr    x2, loc                 // encoding: [0x02'A',A,A,0x10'A']
+// CHECK:                                 //   fixup A - offset: 0, value: loc, kind: fixup_aarch64_pcrel_adr_imm21
+// CHECK: adr    xzr, loc                // encoding: [0x1f'A',A,A,0x10'A']
+// CHECK:                                 //   fixup A - offset: 0, value: loc, kind: fixup_aarch64_pcrel_adr_imm21
 
         adrp x29, loc
- // CHECK: adrp    x29, loc                // encoding: [0x1d'A',A,A,0x90'A']
- // CHECK:                                 //   fixup A - offset: 0, value: loc, kind: fixup_a64_adr_prel_page
 
+// CHECK: adrp    x29, loc                // encoding: [0x1d'A',A,A,0x90'A']
+// CHECK:                                 //   fixup A - offset: 0, value: loc, kind: fixup_aarch64_pcrel_adrp_imm21
         adrp x30, #4096
         adr x20, #0
         adr x9, #-1
@@ -3463,7 +3430,7 @@ _func:
         hint #0
         hint #127
 // CHECK: nop                             // encoding: [0x1f,0x20,0x03,0xd5]
-// CHECK: hint    #127                    // encoding: [0xff,0x2f,0x03,0xd5]
+// CHECK: hint    #{{127|0x7f}}           // encoding: [0xff,0x2f,0x03,0xd5]
 
         nop
         yield
@@ -3560,14 +3527,14 @@ _func:
         msr spsel, #0
         msr daifset, #15
         msr daifclr, #12
-// CHECK: msr     spsel, #0               // encoding: [0xbf,0x40,0x00,0xd5]
-// CHECK: msr     daifset, #15            // encoding: [0xdf,0x4f,0x03,0xd5]
-// CHECK: msr     daifclr, #12            // encoding: [0xff,0x4c,0x03,0xd5]
+// CHECK: msr     {{spsel|SPSEL}}, #0               // encoding: [0xbf,0x40,0x00,0xd5]
+// CHECK: msr     {{daifset|DAIFSET}}, #15            // encoding: [0xdf,0x4f,0x03,0xd5]
+// CHECK: msr     {{daifclr|DAIFCLR}}, #12            // encoding: [0xff,0x4c,0x03,0xd5]
 
         sys #7, c5, c9, #7, x5
         sys #0, c15, c15, #2
 // CHECK: sys     #7, c5, c9, #7, x5      // encoding: [0xe5,0x59,0x0f,0xd5]
-// CHECK: sys     #0, c15, c15, #2, xzr   // encoding: [0x5f,0xff,0x08,0xd5]
+// CHECK: sys     #0, c15, c15, #2   // encoding: [0x5f,0xff,0x08,0xd5]
 
         sysl x9, #7, c5, c9, #7
         sysl x1, #0, c15, c15, #2
@@ -3942,260 +3909,260 @@ _func:
 	msr PMEVTYPER28_EL0, x12
 	msr PMEVTYPER29_EL0, x12
 	msr PMEVTYPER30_EL0, x12
-// CHECK: msr      teecr32_el1, x12           // encoding: [0x0c,0x00,0x12,0xd5]
-// CHECK: msr      osdtrrx_el1, x12           // encoding: [0x4c,0x00,0x10,0xd5]
-// CHECK: msr      mdccint_el1, x12           // encoding: [0x0c,0x02,0x10,0xd5]
-// CHECK: msr      mdscr_el1, x12             // encoding: [0x4c,0x02,0x10,0xd5]
-// CHECK: msr      osdtrtx_el1, x12           // encoding: [0x4c,0x03,0x10,0xd5]
-// CHECK: msr      dbgdtr_el0, x12            // encoding: [0x0c,0x04,0x13,0xd5]
-// CHECK: msr      dbgdtrtx_el0, x12          // encoding: [0x0c,0x05,0x13,0xd5]
-// CHECK: msr      oseccr_el1, x12            // encoding: [0x4c,0x06,0x10,0xd5]
-// CHECK: msr      dbgvcr32_el2, x12          // encoding: [0x0c,0x07,0x14,0xd5]
-// CHECK: msr      dbgbvr0_el1, x12           // encoding: [0x8c,0x00,0x10,0xd5]
-// CHECK: msr      dbgbvr1_el1, x12           // encoding: [0x8c,0x01,0x10,0xd5]
-// CHECK: msr      dbgbvr2_el1, x12           // encoding: [0x8c,0x02,0x10,0xd5]
-// CHECK: msr      dbgbvr3_el1, x12           // encoding: [0x8c,0x03,0x10,0xd5]
-// CHECK: msr      dbgbvr4_el1, x12           // encoding: [0x8c,0x04,0x10,0xd5]
-// CHECK: msr      dbgbvr5_el1, x12           // encoding: [0x8c,0x05,0x10,0xd5]
-// CHECK: msr      dbgbvr6_el1, x12           // encoding: [0x8c,0x06,0x10,0xd5]
-// CHECK: msr      dbgbvr7_el1, x12           // encoding: [0x8c,0x07,0x10,0xd5]
-// CHECK: msr      dbgbvr8_el1, x12           // encoding: [0x8c,0x08,0x10,0xd5]
-// CHECK: msr      dbgbvr9_el1, x12           // encoding: [0x8c,0x09,0x10,0xd5]
-// CHECK: msr      dbgbvr10_el1, x12          // encoding: [0x8c,0x0a,0x10,0xd5]
-// CHECK: msr      dbgbvr11_el1, x12          // encoding: [0x8c,0x0b,0x10,0xd5]
-// CHECK: msr      dbgbvr12_el1, x12          // encoding: [0x8c,0x0c,0x10,0xd5]
-// CHECK: msr      dbgbvr13_el1, x12          // encoding: [0x8c,0x0d,0x10,0xd5]
-// CHECK: msr      dbgbvr14_el1, x12          // encoding: [0x8c,0x0e,0x10,0xd5]
-// CHECK: msr      dbgbvr15_el1, x12          // encoding: [0x8c,0x0f,0x10,0xd5]
-// CHECK: msr      dbgbcr0_el1, x12           // encoding: [0xac,0x00,0x10,0xd5]
-// CHECK: msr      dbgbcr1_el1, x12           // encoding: [0xac,0x01,0x10,0xd5]
-// CHECK: msr      dbgbcr2_el1, x12           // encoding: [0xac,0x02,0x10,0xd5]
-// CHECK: msr      dbgbcr3_el1, x12           // encoding: [0xac,0x03,0x10,0xd5]
-// CHECK: msr      dbgbcr4_el1, x12           // encoding: [0xac,0x04,0x10,0xd5]
-// CHECK: msr      dbgbcr5_el1, x12           // encoding: [0xac,0x05,0x10,0xd5]
-// CHECK: msr      dbgbcr6_el1, x12           // encoding: [0xac,0x06,0x10,0xd5]
-// CHECK: msr      dbgbcr7_el1, x12           // encoding: [0xac,0x07,0x10,0xd5]
-// CHECK: msr      dbgbcr8_el1, x12           // encoding: [0xac,0x08,0x10,0xd5]
-// CHECK: msr      dbgbcr9_el1, x12           // encoding: [0xac,0x09,0x10,0xd5]
-// CHECK: msr      dbgbcr10_el1, x12          // encoding: [0xac,0x0a,0x10,0xd5]
-// CHECK: msr      dbgbcr11_el1, x12          // encoding: [0xac,0x0b,0x10,0xd5]
-// CHECK: msr      dbgbcr12_el1, x12          // encoding: [0xac,0x0c,0x10,0xd5]
-// CHECK: msr      dbgbcr13_el1, x12          // encoding: [0xac,0x0d,0x10,0xd5]
-// CHECK: msr      dbgbcr14_el1, x12          // encoding: [0xac,0x0e,0x10,0xd5]
-// CHECK: msr      dbgbcr15_el1, x12          // encoding: [0xac,0x0f,0x10,0xd5]
-// CHECK: msr      dbgwvr0_el1, x12           // encoding: [0xcc,0x00,0x10,0xd5]
-// CHECK: msr      dbgwvr1_el1, x12           // encoding: [0xcc,0x01,0x10,0xd5]
-// CHECK: msr      dbgwvr2_el1, x12           // encoding: [0xcc,0x02,0x10,0xd5]
-// CHECK: msr      dbgwvr3_el1, x12           // encoding: [0xcc,0x03,0x10,0xd5]
-// CHECK: msr      dbgwvr4_el1, x12           // encoding: [0xcc,0x04,0x10,0xd5]
-// CHECK: msr      dbgwvr5_el1, x12           // encoding: [0xcc,0x05,0x10,0xd5]
-// CHECK: msr      dbgwvr6_el1, x12           // encoding: [0xcc,0x06,0x10,0xd5]
-// CHECK: msr      dbgwvr7_el1, x12           // encoding: [0xcc,0x07,0x10,0xd5]
-// CHECK: msr      dbgwvr8_el1, x12           // encoding: [0xcc,0x08,0x10,0xd5]
-// CHECK: msr      dbgwvr9_el1, x12           // encoding: [0xcc,0x09,0x10,0xd5]
-// CHECK: msr      dbgwvr10_el1, x12          // encoding: [0xcc,0x0a,0x10,0xd5]
-// CHECK: msr      dbgwvr11_el1, x12          // encoding: [0xcc,0x0b,0x10,0xd5]
-// CHECK: msr      dbgwvr12_el1, x12          // encoding: [0xcc,0x0c,0x10,0xd5]
-// CHECK: msr      dbgwvr13_el1, x12          // encoding: [0xcc,0x0d,0x10,0xd5]
-// CHECK: msr      dbgwvr14_el1, x12          // encoding: [0xcc,0x0e,0x10,0xd5]
-// CHECK: msr      dbgwvr15_el1, x12          // encoding: [0xcc,0x0f,0x10,0xd5]
-// CHECK: msr      dbgwcr0_el1, x12           // encoding: [0xec,0x00,0x10,0xd5]
-// CHECK: msr      dbgwcr1_el1, x12           // encoding: [0xec,0x01,0x10,0xd5]
-// CHECK: msr      dbgwcr2_el1, x12           // encoding: [0xec,0x02,0x10,0xd5]
-// CHECK: msr      dbgwcr3_el1, x12           // encoding: [0xec,0x03,0x10,0xd5]
-// CHECK: msr      dbgwcr4_el1, x12           // encoding: [0xec,0x04,0x10,0xd5]
-// CHECK: msr      dbgwcr5_el1, x12           // encoding: [0xec,0x05,0x10,0xd5]
-// CHECK: msr      dbgwcr6_el1, x12           // encoding: [0xec,0x06,0x10,0xd5]
-// CHECK: msr      dbgwcr7_el1, x12           // encoding: [0xec,0x07,0x10,0xd5]
-// CHECK: msr      dbgwcr8_el1, x12           // encoding: [0xec,0x08,0x10,0xd5]
-// CHECK: msr      dbgwcr9_el1, x12           // encoding: [0xec,0x09,0x10,0xd5]
-// CHECK: msr      dbgwcr10_el1, x12          // encoding: [0xec,0x0a,0x10,0xd5]
-// CHECK: msr      dbgwcr11_el1, x12          // encoding: [0xec,0x0b,0x10,0xd5]
-// CHECK: msr      dbgwcr12_el1, x12          // encoding: [0xec,0x0c,0x10,0xd5]
-// CHECK: msr      dbgwcr13_el1, x12          // encoding: [0xec,0x0d,0x10,0xd5]
-// CHECK: msr      dbgwcr14_el1, x12          // encoding: [0xec,0x0e,0x10,0xd5]
-// CHECK: msr      dbgwcr15_el1, x12          // encoding: [0xec,0x0f,0x10,0xd5]
-// CHECK: msr      teehbr32_el1, x12          // encoding: [0x0c,0x10,0x12,0xd5]
-// CHECK: msr      oslar_el1, x12             // encoding: [0x8c,0x10,0x10,0xd5]
-// CHECK: msr      osdlr_el1, x12             // encoding: [0x8c,0x13,0x10,0xd5]
-// CHECK: msr      dbgprcr_el1, x12           // encoding: [0x8c,0x14,0x10,0xd5]
-// CHECK: msr      dbgclaimset_el1, x12       // encoding: [0xcc,0x78,0x10,0xd5]
-// CHECK: msr      dbgclaimclr_el1, x12       // encoding: [0xcc,0x79,0x10,0xd5]
-// CHECK: msr      csselr_el1, x12            // encoding: [0x0c,0x00,0x1a,0xd5]
-// CHECK: msr      vpidr_el2, x12             // encoding: [0x0c,0x00,0x1c,0xd5]
-// CHECK: msr      vmpidr_el2, x12            // encoding: [0xac,0x00,0x1c,0xd5]
-// CHECK: msr      sctlr_el1, x12             // encoding: [0x0c,0x10,0x18,0xd5]
-// CHECK: msr      sctlr_el2, x12             // encoding: [0x0c,0x10,0x1c,0xd5]
-// CHECK: msr      sctlr_el3, x12             // encoding: [0x0c,0x10,0x1e,0xd5]
-// CHECK: msr      actlr_el1, x12             // encoding: [0x2c,0x10,0x18,0xd5]
-// CHECK: msr      actlr_el2, x12             // encoding: [0x2c,0x10,0x1c,0xd5]
-// CHECK: msr      actlr_el3, x12             // encoding: [0x2c,0x10,0x1e,0xd5]
-// CHECK: msr      cpacr_el1, x12             // encoding: [0x4c,0x10,0x18,0xd5]
-// CHECK: msr      hcr_el2, x12               // encoding: [0x0c,0x11,0x1c,0xd5]
-// CHECK: msr      scr_el3, x12               // encoding: [0x0c,0x11,0x1e,0xd5]
-// CHECK: msr      mdcr_el2, x12              // encoding: [0x2c,0x11,0x1c,0xd5]
-// CHECK: msr      sder32_el3, x12            // encoding: [0x2c,0x11,0x1e,0xd5]
-// CHECK: msr      cptr_el2, x12              // encoding: [0x4c,0x11,0x1c,0xd5]
-// CHECK: msr      cptr_el3, x12              // encoding: [0x4c,0x11,0x1e,0xd5]
-// CHECK: msr      hstr_el2, x12              // encoding: [0x6c,0x11,0x1c,0xd5]
-// CHECK: msr      hacr_el2, x12              // encoding: [0xec,0x11,0x1c,0xd5]
-// CHECK: msr      mdcr_el3, x12              // encoding: [0x2c,0x13,0x1e,0xd5]
-// CHECK: msr      ttbr0_el1, x12             // encoding: [0x0c,0x20,0x18,0xd5]
-// CHECK: msr      ttbr0_el2, x12             // encoding: [0x0c,0x20,0x1c,0xd5]
-// CHECK: msr      ttbr0_el3, x12             // encoding: [0x0c,0x20,0x1e,0xd5]
-// CHECK: msr      ttbr1_el1, x12             // encoding: [0x2c,0x20,0x18,0xd5]
-// CHECK: msr      tcr_el1, x12               // encoding: [0x4c,0x20,0x18,0xd5]
-// CHECK: msr      tcr_el2, x12               // encoding: [0x4c,0x20,0x1c,0xd5]
-// CHECK: msr      tcr_el3, x12               // encoding: [0x4c,0x20,0x1e,0xd5]
-// CHECK: msr      vttbr_el2, x12             // encoding: [0x0c,0x21,0x1c,0xd5]
-// CHECK: msr      vtcr_el2, x12              // encoding: [0x4c,0x21,0x1c,0xd5]
-// CHECK: msr      dacr32_el2, x12            // encoding: [0x0c,0x30,0x1c,0xd5]
-// CHECK: msr      spsr_el1, x12              // encoding: [0x0c,0x40,0x18,0xd5]
-// CHECK: msr      spsr_el2, x12              // encoding: [0x0c,0x40,0x1c,0xd5]
-// CHECK: msr      spsr_el3, x12              // encoding: [0x0c,0x40,0x1e,0xd5]
-// CHECK: msr      elr_el1, x12               // encoding: [0x2c,0x40,0x18,0xd5]
-// CHECK: msr      elr_el2, x12               // encoding: [0x2c,0x40,0x1c,0xd5]
-// CHECK: msr      elr_el3, x12               // encoding: [0x2c,0x40,0x1e,0xd5]
-// CHECK: msr      sp_el0, x12                // encoding: [0x0c,0x41,0x18,0xd5]
-// CHECK: msr      sp_el1, x12                // encoding: [0x0c,0x41,0x1c,0xd5]
-// CHECK: msr      sp_el2, x12                // encoding: [0x0c,0x41,0x1e,0xd5]
-// CHECK: msr      spsel, x12                 // encoding: [0x0c,0x42,0x18,0xd5]
-// CHECK: msr      nzcv, x12                  // encoding: [0x0c,0x42,0x1b,0xd5]
-// CHECK: msr      daif, x12                  // encoding: [0x2c,0x42,0x1b,0xd5]
-// CHECK: msr      currentel, x12             // encoding: [0x4c,0x42,0x18,0xd5]
-// CHECK: msr      spsr_irq, x12              // encoding: [0x0c,0x43,0x1c,0xd5]
-// CHECK: msr      spsr_abt, x12              // encoding: [0x2c,0x43,0x1c,0xd5]
-// CHECK: msr      spsr_und, x12              // encoding: [0x4c,0x43,0x1c,0xd5]
-// CHECK: msr      spsr_fiq, x12              // encoding: [0x6c,0x43,0x1c,0xd5]
-// CHECK: msr      fpcr, x12                  // encoding: [0x0c,0x44,0x1b,0xd5]
-// CHECK: msr      fpsr, x12                  // encoding: [0x2c,0x44,0x1b,0xd5]
-// CHECK: msr      dspsr_el0, x12             // encoding: [0x0c,0x45,0x1b,0xd5]
-// CHECK: msr      dlr_el0, x12               // encoding: [0x2c,0x45,0x1b,0xd5]
-// CHECK: msr      ifsr32_el2, x12            // encoding: [0x2c,0x50,0x1c,0xd5]
-// CHECK: msr      afsr0_el1, x12             // encoding: [0x0c,0x51,0x18,0xd5]
-// CHECK: msr      afsr0_el2, x12             // encoding: [0x0c,0x51,0x1c,0xd5]
-// CHECK: msr      afsr0_el3, x12             // encoding: [0x0c,0x51,0x1e,0xd5]
-// CHECK: msr      afsr1_el1, x12             // encoding: [0x2c,0x51,0x18,0xd5]
-// CHECK: msr      afsr1_el2, x12             // encoding: [0x2c,0x51,0x1c,0xd5]
-// CHECK: msr      afsr1_el3, x12             // encoding: [0x2c,0x51,0x1e,0xd5]
-// CHECK: msr      esr_el1, x12               // encoding: [0x0c,0x52,0x18,0xd5]
-// CHECK: msr      esr_el2, x12               // encoding: [0x0c,0x52,0x1c,0xd5]
-// CHECK: msr      esr_el3, x12               // encoding: [0x0c,0x52,0x1e,0xd5]
-// CHECK: msr      fpexc32_el2, x12           // encoding: [0x0c,0x53,0x1c,0xd5]
-// CHECK: msr      far_el1, x12               // encoding: [0x0c,0x60,0x18,0xd5]
-// CHECK: msr      far_el2, x12               // encoding: [0x0c,0x60,0x1c,0xd5]
-// CHECK: msr      far_el3, x12               // encoding: [0x0c,0x60,0x1e,0xd5]
-// CHECK: msr      hpfar_el2, x12             // encoding: [0x8c,0x60,0x1c,0xd5]
-// CHECK: msr      par_el1, x12               // encoding: [0x0c,0x74,0x18,0xd5]
-// CHECK: msr      pmcr_el0, x12              // encoding: [0x0c,0x9c,0x1b,0xd5]
-// CHECK: msr      pmcntenset_el0, x12        // encoding: [0x2c,0x9c,0x1b,0xd5]
-// CHECK: msr      pmcntenclr_el0, x12        // encoding: [0x4c,0x9c,0x1b,0xd5]
-// CHECK: msr      pmovsclr_el0, x12          // encoding: [0x6c,0x9c,0x1b,0xd5]
-// CHECK: msr      pmselr_el0, x12            // encoding: [0xac,0x9c,0x1b,0xd5]
-// CHECK: msr      pmccntr_el0, x12           // encoding: [0x0c,0x9d,0x1b,0xd5]
-// CHECK: msr      pmxevtyper_el0, x12        // encoding: [0x2c,0x9d,0x1b,0xd5]
-// CHECK: msr      pmxevcntr_el0, x12         // encoding: [0x4c,0x9d,0x1b,0xd5]
-// CHECK: msr      pmuserenr_el0, x12         // encoding: [0x0c,0x9e,0x1b,0xd5]
-// CHECK: msr      pmintenset_el1, x12        // encoding: [0x2c,0x9e,0x18,0xd5]
-// CHECK: msr      pmintenclr_el1, x12        // encoding: [0x4c,0x9e,0x18,0xd5]
-// CHECK: msr      pmovsset_el0, x12          // encoding: [0x6c,0x9e,0x1b,0xd5]
-// CHECK: msr      mair_el1, x12              // encoding: [0x0c,0xa2,0x18,0xd5]
-// CHECK: msr      mair_el2, x12              // encoding: [0x0c,0xa2,0x1c,0xd5]
-// CHECK: msr      mair_el3, x12              // encoding: [0x0c,0xa2,0x1e,0xd5]
-// CHECK: msr      amair_el1, x12             // encoding: [0x0c,0xa3,0x18,0xd5]
-// CHECK: msr      amair_el2, x12             // encoding: [0x0c,0xa3,0x1c,0xd5]
-// CHECK: msr      amair_el3, x12             // encoding: [0x0c,0xa3,0x1e,0xd5]
-// CHECK: msr      vbar_el1, x12              // encoding: [0x0c,0xc0,0x18,0xd5]
-// CHECK: msr      vbar_el2, x12              // encoding: [0x0c,0xc0,0x1c,0xd5]
-// CHECK: msr      vbar_el3, x12              // encoding: [0x0c,0xc0,0x1e,0xd5]
-// CHECK: msr      rmr_el1, x12               // encoding: [0x4c,0xc0,0x18,0xd5]
-// CHECK: msr      rmr_el2, x12               // encoding: [0x4c,0xc0,0x1c,0xd5]
-// CHECK: msr      rmr_el3, x12               // encoding: [0x4c,0xc0,0x1e,0xd5]
-// CHECK: msr      contextidr_el1, x12        // encoding: [0x2c,0xd0,0x18,0xd5]
-// CHECK: msr      tpidr_el0, x12             // encoding: [0x4c,0xd0,0x1b,0xd5]
-// CHECK: msr      tpidr_el2, x12             // encoding: [0x4c,0xd0,0x1c,0xd5]
-// CHECK: msr      tpidr_el3, x12             // encoding: [0x4c,0xd0,0x1e,0xd5]
-// CHECK: msr      tpidrro_el0, x12           // encoding: [0x6c,0xd0,0x1b,0xd5]
-// CHECK: msr      tpidr_el1, x12             // encoding: [0x8c,0xd0,0x18,0xd5]
-// CHECK: msr      cntfrq_el0, x12            // encoding: [0x0c,0xe0,0x1b,0xd5]
-// CHECK: msr      cntvoff_el2, x12           // encoding: [0x6c,0xe0,0x1c,0xd5]
-// CHECK: msr      cntkctl_el1, x12           // encoding: [0x0c,0xe1,0x18,0xd5]
-// CHECK: msr      cnthctl_el2, x12           // encoding: [0x0c,0xe1,0x1c,0xd5]
-// CHECK: msr      cntp_tval_el0, x12         // encoding: [0x0c,0xe2,0x1b,0xd5]
-// CHECK: msr      cnthp_tval_el2, x12        // encoding: [0x0c,0xe2,0x1c,0xd5]
-// CHECK: msr      cntps_tval_el1, x12        // encoding: [0x0c,0xe2,0x1f,0xd5]
-// CHECK: msr      cntp_ctl_el0, x12          // encoding: [0x2c,0xe2,0x1b,0xd5]
-// CHECK: msr      cnthp_ctl_el2, x12         // encoding: [0x2c,0xe2,0x1c,0xd5]
-// CHECK: msr      cntps_ctl_el1, x12         // encoding: [0x2c,0xe2,0x1f,0xd5]
-// CHECK: msr      cntp_cval_el0, x12         // encoding: [0x4c,0xe2,0x1b,0xd5]
-// CHECK: msr      cnthp_cval_el2, x12        // encoding: [0x4c,0xe2,0x1c,0xd5]
-// CHECK: msr      cntps_cval_el1, x12        // encoding: [0x4c,0xe2,0x1f,0xd5]
-// CHECK: msr      cntv_tval_el0, x12         // encoding: [0x0c,0xe3,0x1b,0xd5]
-// CHECK: msr      cntv_ctl_el0, x12          // encoding: [0x2c,0xe3,0x1b,0xd5]
-// CHECK: msr      cntv_cval_el0, x12         // encoding: [0x4c,0xe3,0x1b,0xd5]
-// CHECK: msr      pmevcntr0_el0, x12         // encoding: [0x0c,0xe8,0x1b,0xd5]
-// CHECK: msr      pmevcntr1_el0, x12         // encoding: [0x2c,0xe8,0x1b,0xd5]
-// CHECK: msr      pmevcntr2_el0, x12         // encoding: [0x4c,0xe8,0x1b,0xd5]
-// CHECK: msr      pmevcntr3_el0, x12         // encoding: [0x6c,0xe8,0x1b,0xd5]
-// CHECK: msr      pmevcntr4_el0, x12         // encoding: [0x8c,0xe8,0x1b,0xd5]
-// CHECK: msr      pmevcntr5_el0, x12         // encoding: [0xac,0xe8,0x1b,0xd5]
-// CHECK: msr      pmevcntr6_el0, x12         // encoding: [0xcc,0xe8,0x1b,0xd5]
-// CHECK: msr      pmevcntr7_el0, x12         // encoding: [0xec,0xe8,0x1b,0xd5]
-// CHECK: msr      pmevcntr8_el0, x12         // encoding: [0x0c,0xe9,0x1b,0xd5]
-// CHECK: msr      pmevcntr9_el0, x12         // encoding: [0x2c,0xe9,0x1b,0xd5]
-// CHECK: msr      pmevcntr10_el0, x12        // encoding: [0x4c,0xe9,0x1b,0xd5]
-// CHECK: msr      pmevcntr11_el0, x12        // encoding: [0x6c,0xe9,0x1b,0xd5]
-// CHECK: msr      pmevcntr12_el0, x12        // encoding: [0x8c,0xe9,0x1b,0xd5]
-// CHECK: msr      pmevcntr13_el0, x12        // encoding: [0xac,0xe9,0x1b,0xd5]
-// CHECK: msr      pmevcntr14_el0, x12        // encoding: [0xcc,0xe9,0x1b,0xd5]
-// CHECK: msr      pmevcntr15_el0, x12        // encoding: [0xec,0xe9,0x1b,0xd5]
-// CHECK: msr      pmevcntr16_el0, x12        // encoding: [0x0c,0xea,0x1b,0xd5]
-// CHECK: msr      pmevcntr17_el0, x12        // encoding: [0x2c,0xea,0x1b,0xd5]
-// CHECK: msr      pmevcntr18_el0, x12        // encoding: [0x4c,0xea,0x1b,0xd5]
-// CHECK: msr      pmevcntr19_el0, x12        // encoding: [0x6c,0xea,0x1b,0xd5]
-// CHECK: msr      pmevcntr20_el0, x12        // encoding: [0x8c,0xea,0x1b,0xd5]
-// CHECK: msr      pmevcntr21_el0, x12        // encoding: [0xac,0xea,0x1b,0xd5]
-// CHECK: msr      pmevcntr22_el0, x12        // encoding: [0xcc,0xea,0x1b,0xd5]
-// CHECK: msr      pmevcntr23_el0, x12        // encoding: [0xec,0xea,0x1b,0xd5]
-// CHECK: msr      pmevcntr24_el0, x12        // encoding: [0x0c,0xeb,0x1b,0xd5]
-// CHECK: msr      pmevcntr25_el0, x12        // encoding: [0x2c,0xeb,0x1b,0xd5]
-// CHECK: msr      pmevcntr26_el0, x12        // encoding: [0x4c,0xeb,0x1b,0xd5]
-// CHECK: msr      pmevcntr27_el0, x12        // encoding: [0x6c,0xeb,0x1b,0xd5]
-// CHECK: msr      pmevcntr28_el0, x12        // encoding: [0x8c,0xeb,0x1b,0xd5]
-// CHECK: msr      pmevcntr29_el0, x12        // encoding: [0xac,0xeb,0x1b,0xd5]
-// CHECK: msr      pmevcntr30_el0, x12        // encoding: [0xcc,0xeb,0x1b,0xd5]
-// CHECK: msr      pmccfiltr_el0, x12         // encoding: [0xec,0xef,0x1b,0xd5]
-// CHECK: msr      pmevtyper0_el0, x12        // encoding: [0x0c,0xec,0x1b,0xd5]
-// CHECK: msr      pmevtyper1_el0, x12        // encoding: [0x2c,0xec,0x1b,0xd5]
-// CHECK: msr      pmevtyper2_el0, x12        // encoding: [0x4c,0xec,0x1b,0xd5]
-// CHECK: msr      pmevtyper3_el0, x12        // encoding: [0x6c,0xec,0x1b,0xd5]
-// CHECK: msr      pmevtyper4_el0, x12        // encoding: [0x8c,0xec,0x1b,0xd5]
-// CHECK: msr      pmevtyper5_el0, x12        // encoding: [0xac,0xec,0x1b,0xd5]
-// CHECK: msr      pmevtyper6_el0, x12        // encoding: [0xcc,0xec,0x1b,0xd5]
-// CHECK: msr      pmevtyper7_el0, x12        // encoding: [0xec,0xec,0x1b,0xd5]
-// CHECK: msr      pmevtyper8_el0, x12        // encoding: [0x0c,0xed,0x1b,0xd5]
-// CHECK: msr      pmevtyper9_el0, x12        // encoding: [0x2c,0xed,0x1b,0xd5]
-// CHECK: msr      pmevtyper10_el0, x12       // encoding: [0x4c,0xed,0x1b,0xd5]
-// CHECK: msr      pmevtyper11_el0, x12       // encoding: [0x6c,0xed,0x1b,0xd5]
-// CHECK: msr      pmevtyper12_el0, x12       // encoding: [0x8c,0xed,0x1b,0xd5]
-// CHECK: msr      pmevtyper13_el0, x12       // encoding: [0xac,0xed,0x1b,0xd5]
-// CHECK: msr      pmevtyper14_el0, x12       // encoding: [0xcc,0xed,0x1b,0xd5]
-// CHECK: msr      pmevtyper15_el0, x12       // encoding: [0xec,0xed,0x1b,0xd5]
-// CHECK: msr      pmevtyper16_el0, x12       // encoding: [0x0c,0xee,0x1b,0xd5]
-// CHECK: msr      pmevtyper17_el0, x12       // encoding: [0x2c,0xee,0x1b,0xd5]
-// CHECK: msr      pmevtyper18_el0, x12       // encoding: [0x4c,0xee,0x1b,0xd5]
-// CHECK: msr      pmevtyper19_el0, x12       // encoding: [0x6c,0xee,0x1b,0xd5]
-// CHECK: msr      pmevtyper20_el0, x12       // encoding: [0x8c,0xee,0x1b,0xd5]
-// CHECK: msr      pmevtyper21_el0, x12       // encoding: [0xac,0xee,0x1b,0xd5]
-// CHECK: msr      pmevtyper22_el0, x12       // encoding: [0xcc,0xee,0x1b,0xd5]
-// CHECK: msr      pmevtyper23_el0, x12       // encoding: [0xec,0xee,0x1b,0xd5]
-// CHECK: msr      pmevtyper24_el0, x12       // encoding: [0x0c,0xef,0x1b,0xd5]
-// CHECK: msr      pmevtyper25_el0, x12       // encoding: [0x2c,0xef,0x1b,0xd5]
-// CHECK: msr      pmevtyper26_el0, x12       // encoding: [0x4c,0xef,0x1b,0xd5]
-// CHECK: msr      pmevtyper27_el0, x12       // encoding: [0x6c,0xef,0x1b,0xd5]
-// CHECK: msr      pmevtyper28_el0, x12       // encoding: [0x8c,0xef,0x1b,0xd5]
-// CHECK: msr      pmevtyper29_el0, x12       // encoding: [0xac,0xef,0x1b,0xd5]
-// CHECK: msr      pmevtyper30_el0, x12       // encoding: [0xcc,0xef,0x1b,0xd5]
+// CHECK: msr      {{teecr32_el1|TEECR32_EL1}}, x12           // encoding: [0x0c,0x00,0x12,0xd5]
+// CHECK: msr      {{osdtrrx_el1|OSDTRRX_EL1}}, x12           // encoding: [0x4c,0x00,0x10,0xd5]
+// CHECK: msr      {{mdccint_el1|MDCCINT_EL1}}, x12           // encoding: [0x0c,0x02,0x10,0xd5]
+// CHECK: msr      {{mdscr_el1|MDSCR_EL1}}, x12             // encoding: [0x4c,0x02,0x10,0xd5]
+// CHECK: msr      {{osdtrtx_el1|OSDTRTX_EL1}}, x12           // encoding: [0x4c,0x03,0x10,0xd5]
+// CHECK: msr      {{dbgdtr_el0|DBGDTR_EL0}}, x12            // encoding: [0x0c,0x04,0x13,0xd5]
+// CHECK: msr      {{dbgdtrtx_el0|DBGDTRTX_EL0}}, x12          // encoding: [0x0c,0x05,0x13,0xd5]
+// CHECK: msr      {{oseccr_el1|OSECCR_EL1}}, x12            // encoding: [0x4c,0x06,0x10,0xd5]
+// CHECK: msr      {{dbgvcr32_el2|DBGVCR32_EL2}}, x12          // encoding: [0x0c,0x07,0x14,0xd5]
+// CHECK: msr      {{dbgbvr0_el1|DBGBVR0_EL1}}, x12           // encoding: [0x8c,0x00,0x10,0xd5]
+// CHECK: msr      {{dbgbvr1_el1|DBGBVR1_EL1}}, x12           // encoding: [0x8c,0x01,0x10,0xd5]
+// CHECK: msr      {{dbgbvr2_el1|DBGBVR2_EL1}}, x12           // encoding: [0x8c,0x02,0x10,0xd5]
+// CHECK: msr      {{dbgbvr3_el1|DBGBVR3_EL1}}, x12           // encoding: [0x8c,0x03,0x10,0xd5]
+// CHECK: msr      {{dbgbvr4_el1|DBGBVR4_EL1}}, x12           // encoding: [0x8c,0x04,0x10,0xd5]
+// CHECK: msr      {{dbgbvr5_el1|DBGBVR5_EL1}}, x12           // encoding: [0x8c,0x05,0x10,0xd5]
+// CHECK: msr      {{dbgbvr6_el1|DBGBVR6_EL1}}, x12           // encoding: [0x8c,0x06,0x10,0xd5]
+// CHECK: msr      {{dbgbvr7_el1|DBGBVR7_EL1}}, x12           // encoding: [0x8c,0x07,0x10,0xd5]
+// CHECK: msr      {{dbgbvr8_el1|DBGBVR8_EL1}}, x12           // encoding: [0x8c,0x08,0x10,0xd5]
+// CHECK: msr      {{dbgbvr9_el1|DBGBVR9_EL1}}, x12           // encoding: [0x8c,0x09,0x10,0xd5]
+// CHECK: msr      {{dbgbvr10_el1|DBGBVR10_EL1}}, x12          // encoding: [0x8c,0x0a,0x10,0xd5]
+// CHECK: msr      {{dbgbvr11_el1|DBGBVR11_EL1}}, x12          // encoding: [0x8c,0x0b,0x10,0xd5]
+// CHECK: msr      {{dbgbvr12_el1|DBGBVR12_EL1}}, x12          // encoding: [0x8c,0x0c,0x10,0xd5]
+// CHECK: msr      {{dbgbvr13_el1|DBGBVR13_EL1}}, x12          // encoding: [0x8c,0x0d,0x10,0xd5]
+// CHECK: msr      {{dbgbvr14_el1|DBGBVR14_EL1}}, x12          // encoding: [0x8c,0x0e,0x10,0xd5]
+// CHECK: msr      {{dbgbvr15_el1|DBGBVR15_EL1}}, x12          // encoding: [0x8c,0x0f,0x10,0xd5]
+// CHECK: msr      {{dbgbcr0_el1|DBGBCR0_EL1}}, x12           // encoding: [0xac,0x00,0x10,0xd5]
+// CHECK: msr      {{dbgbcr1_el1|DBGBCR1_EL1}}, x12           // encoding: [0xac,0x01,0x10,0xd5]
+// CHECK: msr      {{dbgbcr2_el1|DBGBCR2_EL1}}, x12           // encoding: [0xac,0x02,0x10,0xd5]
+// CHECK: msr      {{dbgbcr3_el1|DBGBCR3_EL1}}, x12           // encoding: [0xac,0x03,0x10,0xd5]
+// CHECK: msr      {{dbgbcr4_el1|DBGBCR4_EL1}}, x12           // encoding: [0xac,0x04,0x10,0xd5]
+// CHECK: msr      {{dbgbcr5_el1|DBGBCR5_EL1}}, x12           // encoding: [0xac,0x05,0x10,0xd5]
+// CHECK: msr      {{dbgbcr6_el1|DBGBCR6_EL1}}, x12           // encoding: [0xac,0x06,0x10,0xd5]
+// CHECK: msr      {{dbgbcr7_el1|DBGBCR7_EL1}}, x12           // encoding: [0xac,0x07,0x10,0xd5]
+// CHECK: msr      {{dbgbcr8_el1|DBGBCR8_EL1}}, x12           // encoding: [0xac,0x08,0x10,0xd5]
+// CHECK: msr      {{dbgbcr9_el1|DBGBCR9_EL1}}, x12           // encoding: [0xac,0x09,0x10,0xd5]
+// CHECK: msr      {{dbgbcr10_el1|DBGBCR10_EL1}}, x12          // encoding: [0xac,0x0a,0x10,0xd5]
+// CHECK: msr      {{dbgbcr11_el1|DBGBCR11_EL1}}, x12          // encoding: [0xac,0x0b,0x10,0xd5]
+// CHECK: msr      {{dbgbcr12_el1|DBGBCR12_EL1}}, x12          // encoding: [0xac,0x0c,0x10,0xd5]
+// CHECK: msr      {{dbgbcr13_el1|DBGBCR13_EL1}}, x12          // encoding: [0xac,0x0d,0x10,0xd5]
+// CHECK: msr      {{dbgbcr14_el1|DBGBCR14_EL1}}, x12          // encoding: [0xac,0x0e,0x10,0xd5]
+// CHECK: msr      {{dbgbcr15_el1|DBGBCR15_EL1}}, x12          // encoding: [0xac,0x0f,0x10,0xd5]
+// CHECK: msr      {{dbgwvr0_el1|DBGWVR0_EL1}}, x12           // encoding: [0xcc,0x00,0x10,0xd5]
+// CHECK: msr      {{dbgwvr1_el1|DBGWVR1_EL1}}, x12           // encoding: [0xcc,0x01,0x10,0xd5]
+// CHECK: msr      {{dbgwvr2_el1|DBGWVR2_EL1}}, x12           // encoding: [0xcc,0x02,0x10,0xd5]
+// CHECK: msr      {{dbgwvr3_el1|DBGWVR3_EL1}}, x12           // encoding: [0xcc,0x03,0x10,0xd5]
+// CHECK: msr      {{dbgwvr4_el1|DBGWVR4_EL1}}, x12           // encoding: [0xcc,0x04,0x10,0xd5]
+// CHECK: msr      {{dbgwvr5_el1|DBGWVR5_EL1}}, x12           // encoding: [0xcc,0x05,0x10,0xd5]
+// CHECK: msr      {{dbgwvr6_el1|DBGWVR6_EL1}}, x12           // encoding: [0xcc,0x06,0x10,0xd5]
+// CHECK: msr      {{dbgwvr7_el1|DBGWVR7_EL1}}, x12           // encoding: [0xcc,0x07,0x10,0xd5]
+// CHECK: msr      {{dbgwvr8_el1|DBGWVR8_EL1}}, x12           // encoding: [0xcc,0x08,0x10,0xd5]
+// CHECK: msr      {{dbgwvr9_el1|DBGWVR9_EL1}}, x12           // encoding: [0xcc,0x09,0x10,0xd5]
+// CHECK: msr      {{dbgwvr10_el1|DBGWVR10_EL1}}, x12          // encoding: [0xcc,0x0a,0x10,0xd5]
+// CHECK: msr      {{dbgwvr11_el1|DBGWVR11_EL1}}, x12          // encoding: [0xcc,0x0b,0x10,0xd5]
+// CHECK: msr      {{dbgwvr12_el1|DBGWVR12_EL1}}, x12          // encoding: [0xcc,0x0c,0x10,0xd5]
+// CHECK: msr      {{dbgwvr13_el1|DBGWVR13_EL1}}, x12          // encoding: [0xcc,0x0d,0x10,0xd5]
+// CHECK: msr      {{dbgwvr14_el1|DBGWVR14_EL1}}, x12          // encoding: [0xcc,0x0e,0x10,0xd5]
+// CHECK: msr      {{dbgwvr15_el1|DBGWVR15_EL1}}, x12          // encoding: [0xcc,0x0f,0x10,0xd5]
+// CHECK: msr      {{dbgwcr0_el1|DBGWCR0_EL1}}, x12           // encoding: [0xec,0x00,0x10,0xd5]
+// CHECK: msr      {{dbgwcr1_el1|DBGWCR1_EL1}}, x12           // encoding: [0xec,0x01,0x10,0xd5]
+// CHECK: msr      {{dbgwcr2_el1|DBGWCR2_EL1}}, x12           // encoding: [0xec,0x02,0x10,0xd5]
+// CHECK: msr      {{dbgwcr3_el1|DBGWCR3_EL1}}, x12           // encoding: [0xec,0x03,0x10,0xd5]
+// CHECK: msr      {{dbgwcr4_el1|DBGWCR4_EL1}}, x12           // encoding: [0xec,0x04,0x10,0xd5]
+// CHECK: msr      {{dbgwcr5_el1|DBGWCR5_EL1}}, x12           // encoding: [0xec,0x05,0x10,0xd5]
+// CHECK: msr      {{dbgwcr6_el1|DBGWCR6_EL1}}, x12           // encoding: [0xec,0x06,0x10,0xd5]
+// CHECK: msr      {{dbgwcr7_el1|DBGWCR7_EL1}}, x12           // encoding: [0xec,0x07,0x10,0xd5]
+// CHECK: msr      {{dbgwcr8_el1|DBGWCR8_EL1}}, x12           // encoding: [0xec,0x08,0x10,0xd5]
+// CHECK: msr      {{dbgwcr9_el1|DBGWCR9_EL1}}, x12           // encoding: [0xec,0x09,0x10,0xd5]
+// CHECK: msr      {{dbgwcr10_el1|DBGWCR10_EL1}}, x12          // encoding: [0xec,0x0a,0x10,0xd5]
+// CHECK: msr      {{dbgwcr11_el1|DBGWCR11_EL1}}, x12          // encoding: [0xec,0x0b,0x10,0xd5]
+// CHECK: msr      {{dbgwcr12_el1|DBGWCR12_EL1}}, x12          // encoding: [0xec,0x0c,0x10,0xd5]
+// CHECK: msr      {{dbgwcr13_el1|DBGWCR13_EL1}}, x12          // encoding: [0xec,0x0d,0x10,0xd5]
+// CHECK: msr      {{dbgwcr14_el1|DBGWCR14_EL1}}, x12          // encoding: [0xec,0x0e,0x10,0xd5]
+// CHECK: msr      {{dbgwcr15_el1|DBGWCR15_EL1}}, x12          // encoding: [0xec,0x0f,0x10,0xd5]
+// CHECK: msr      {{teehbr32_el1|TEEHBR32_EL1}}, x12          // encoding: [0x0c,0x10,0x12,0xd5]
+// CHECK: msr      {{oslar_el1|OSLAR_EL1}}, x12             // encoding: [0x8c,0x10,0x10,0xd5]
+// CHECK: msr      {{osdlr_el1|OSDLR_EL1}}, x12             // encoding: [0x8c,0x13,0x10,0xd5]
+// CHECK: msr      {{dbgprcr_el1|DBGPRCR_EL1}}, x12           // encoding: [0x8c,0x14,0x10,0xd5]
+// CHECK: msr      {{dbgclaimset_el1|DBGCLAIMSET_EL1}}, x12       // encoding: [0xcc,0x78,0x10,0xd5]
+// CHECK: msr      {{dbgclaimclr_el1|DBGCLAIMCLR_EL1}}, x12       // encoding: [0xcc,0x79,0x10,0xd5]
+// CHECK: msr      {{csselr_el1|CSSELR_EL1}}, x12            // encoding: [0x0c,0x00,0x1a,0xd5]
+// CHECK: msr      {{vpidr_el2|VPIDR_EL2}}, x12             // encoding: [0x0c,0x00,0x1c,0xd5]
+// CHECK: msr      {{vmpidr_el2|VMPIDR_EL2}}, x12            // encoding: [0xac,0x00,0x1c,0xd5]
+// CHECK: msr      {{sctlr_el1|SCTLR_EL1}}, x12             // encoding: [0x0c,0x10,0x18,0xd5]
+// CHECK: msr      {{sctlr_el2|SCTLR_EL2}}, x12             // encoding: [0x0c,0x10,0x1c,0xd5]
+// CHECK: msr      {{sctlr_el3|SCTLR_EL3}}, x12             // encoding: [0x0c,0x10,0x1e,0xd5]
+// CHECK: msr      {{actlr_el1|ACTLR_EL1}}, x12             // encoding: [0x2c,0x10,0x18,0xd5]
+// CHECK: msr      {{actlr_el2|ACTLR_EL2}}, x12             // encoding: [0x2c,0x10,0x1c,0xd5]
+// CHECK: msr      {{actlr_el3|ACTLR_EL3}}, x12             // encoding: [0x2c,0x10,0x1e,0xd5]
+// CHECK: msr      {{cpacr_el1|CPACR_EL1}}, x12             // encoding: [0x4c,0x10,0x18,0xd5]
+// CHECK: msr      {{hcr_el2|HCR_EL2}}, x12               // encoding: [0x0c,0x11,0x1c,0xd5]
+// CHECK: msr      {{scr_el3|SCR_EL3}}, x12               // encoding: [0x0c,0x11,0x1e,0xd5]
+// CHECK: msr      {{mdcr_el2|MDCR_EL2}}, x12              // encoding: [0x2c,0x11,0x1c,0xd5]
+// CHECK: msr      {{sder32_el3|SDER32_EL3}}, x12            // encoding: [0x2c,0x11,0x1e,0xd5]
+// CHECK: msr      {{cptr_el2|CPTR_EL2}}, x12              // encoding: [0x4c,0x11,0x1c,0xd5]
+// CHECK: msr      {{cptr_el3|CPTR_EL3}}, x12              // encoding: [0x4c,0x11,0x1e,0xd5]
+// CHECK: msr      {{hstr_el2|HSTR_EL2}}, x12              // encoding: [0x6c,0x11,0x1c,0xd5]
+// CHECK: msr      {{hacr_el2|HACR_EL2}}, x12              // encoding: [0xec,0x11,0x1c,0xd5]
+// CHECK: msr      {{mdcr_el3|MDCR_EL3}}, x12              // encoding: [0x2c,0x13,0x1e,0xd5]
+// CHECK: msr      {{ttbr0_el1|TTBR0_EL1}}, x12             // encoding: [0x0c,0x20,0x18,0xd5]
+// CHECK: msr      {{ttbr0_el2|TTBR0_EL2}}, x12             // encoding: [0x0c,0x20,0x1c,0xd5]
+// CHECK: msr      {{ttbr0_el3|TTBR0_EL3}}, x12             // encoding: [0x0c,0x20,0x1e,0xd5]
+// CHECK: msr      {{ttbr1_el1|TTBR1_EL1}}, x12             // encoding: [0x2c,0x20,0x18,0xd5]
+// CHECK: msr      {{tcr_el1|TCR_EL1}}, x12               // encoding: [0x4c,0x20,0x18,0xd5]
+// CHECK: msr      {{tcr_el2|TCR_EL2}}, x12               // encoding: [0x4c,0x20,0x1c,0xd5]
+// CHECK: msr      {{tcr_el3|TCR_EL3}}, x12               // encoding: [0x4c,0x20,0x1e,0xd5]
+// CHECK: msr      {{vttbr_el2|VTTBR_EL2}}, x12             // encoding: [0x0c,0x21,0x1c,0xd5]
+// CHECK: msr      {{vtcr_el2|VTCR_EL2}}, x12              // encoding: [0x4c,0x21,0x1c,0xd5]
+// CHECK: msr      {{dacr32_el2|DACR32_EL2}}, x12            // encoding: [0x0c,0x30,0x1c,0xd5]
+// CHECK: msr      {{spsr_el1|SPSR_EL1}}, x12              // encoding: [0x0c,0x40,0x18,0xd5]
+// CHECK: msr      {{spsr_el2|SPSR_EL2}}, x12              // encoding: [0x0c,0x40,0x1c,0xd5]
+// CHECK: msr      {{spsr_el3|SPSR_EL3}}, x12              // encoding: [0x0c,0x40,0x1e,0xd5]
+// CHECK: msr      {{elr_el1|ELR_EL1}}, x12               // encoding: [0x2c,0x40,0x18,0xd5]
+// CHECK: msr      {{elr_el2|ELR_EL2}}, x12               // encoding: [0x2c,0x40,0x1c,0xd5]
+// CHECK: msr      {{elr_el3|ELR_EL3}}, x12               // encoding: [0x2c,0x40,0x1e,0xd5]
+// CHECK: msr      {{sp_el0|SP_EL0}}, x12                // encoding: [0x0c,0x41,0x18,0xd5]
+// CHECK: msr      {{sp_el1|SP_EL1}}, x12                // encoding: [0x0c,0x41,0x1c,0xd5]
+// CHECK: msr      {{sp_el2|SP_EL2}}, x12                // encoding: [0x0c,0x41,0x1e,0xd5]
+// CHECK: msr      {{spsel|SPSEL}}, x12                 // encoding: [0x0c,0x42,0x18,0xd5]
+// CHECK: msr      {{nzcv|NZCV}}, x12                  // encoding: [0x0c,0x42,0x1b,0xd5]
+// CHECK: msr      {{daif|DAIF}}, x12                  // encoding: [0x2c,0x42,0x1b,0xd5]
+// CHECK: msr      {{currentel|CURRENTEL}}, x12             // encoding: [0x4c,0x42,0x18,0xd5]
+// CHECK: msr      {{spsr_irq|SPSR_IRQ}}, x12              // encoding: [0x0c,0x43,0x1c,0xd5]
+// CHECK: msr      {{spsr_abt|SPSR_ABT}}, x12              // encoding: [0x2c,0x43,0x1c,0xd5]
+// CHECK: msr      {{spsr_und|SPSR_UND}}, x12              // encoding: [0x4c,0x43,0x1c,0xd5]
+// CHECK: msr      {{spsr_fiq|SPSR_FIQ}}, x12              // encoding: [0x6c,0x43,0x1c,0xd5]
+// CHECK: msr      {{fpcr|FPCR}}, x12                  // encoding: [0x0c,0x44,0x1b,0xd5]
+// CHECK: msr      {{fpsr|FPSR}}, x12                  // encoding: [0x2c,0x44,0x1b,0xd5]
+// CHECK: msr      {{dspsr_el0|DSPSR_EL0}}, x12             // encoding: [0x0c,0x45,0x1b,0xd5]
+// CHECK: msr      {{dlr_el0|DLR_EL0}}, x12               // encoding: [0x2c,0x45,0x1b,0xd5]
+// CHECK: msr      {{ifsr32_el2|IFSR32_EL2}}, x12            // encoding: [0x2c,0x50,0x1c,0xd5]
+// CHECK: msr      {{afsr0_el1|AFSR0_EL1}}, x12             // encoding: [0x0c,0x51,0x18,0xd5]
+// CHECK: msr      {{afsr0_el2|AFSR0_EL2}}, x12             // encoding: [0x0c,0x51,0x1c,0xd5]
+// CHECK: msr      {{afsr0_el3|AFSR0_EL3}}, x12             // encoding: [0x0c,0x51,0x1e,0xd5]
+// CHECK: msr      {{afsr1_el1|AFSR1_EL1}}, x12             // encoding: [0x2c,0x51,0x18,0xd5]
+// CHECK: msr      {{afsr1_el2|AFSR1_EL2}}, x12             // encoding: [0x2c,0x51,0x1c,0xd5]
+// CHECK: msr      {{afsr1_el3|AFSR1_EL3}}, x12             // encoding: [0x2c,0x51,0x1e,0xd5]
+// CHECK: msr      {{esr_el1|ESR_EL1}}, x12               // encoding: [0x0c,0x52,0x18,0xd5]
+// CHECK: msr      {{esr_el2|ESR_EL2}}, x12               // encoding: [0x0c,0x52,0x1c,0xd5]
+// CHECK: msr      {{esr_el3|ESR_EL3}}, x12               // encoding: [0x0c,0x52,0x1e,0xd5]
+// CHECK: msr      {{fpexc32_el2|FPEXC32_EL2}}, x12           // encoding: [0x0c,0x53,0x1c,0xd5]
+// CHECK: msr      {{far_el1|FAR_EL1}}, x12               // encoding: [0x0c,0x60,0x18,0xd5]
+// CHECK: msr      {{far_el2|FAR_EL2}}, x12               // encoding: [0x0c,0x60,0x1c,0xd5]
+// CHECK: msr      {{far_el3|FAR_EL3}}, x12               // encoding: [0x0c,0x60,0x1e,0xd5]
+// CHECK: msr      {{hpfar_el2|HPFAR_EL2}}, x12             // encoding: [0x8c,0x60,0x1c,0xd5]
+// CHECK: msr      {{par_el1|PAR_EL1}}, x12               // encoding: [0x0c,0x74,0x18,0xd5]
+// CHECK: msr      {{pmcr_el0|PMCR_EL0}}, x12              // encoding: [0x0c,0x9c,0x1b,0xd5]
+// CHECK: msr      {{pmcntenset_el0|PMCNTENSET_EL0}}, x12        // encoding: [0x2c,0x9c,0x1b,0xd5]
+// CHECK: msr      {{pmcntenclr_el0|PMCNTENCLR_EL0}}, x12        // encoding: [0x4c,0x9c,0x1b,0xd5]
+// CHECK: msr      {{pmovsclr_el0|PMOVSCLR_EL0}}, x12          // encoding: [0x6c,0x9c,0x1b,0xd5]
+// CHECK: msr      {{pmselr_el0|PMSELR_EL0}}, x12            // encoding: [0xac,0x9c,0x1b,0xd5]
+// CHECK: msr      {{pmccntr_el0|PMCCNTR_EL0}}, x12           // encoding: [0x0c,0x9d,0x1b,0xd5]
+// CHECK: msr      {{pmxevtyper_el0|PMXEVTYPER_EL0}}, x12        // encoding: [0x2c,0x9d,0x1b,0xd5]
+// CHECK: msr      {{pmxevcntr_el0|PMXEVCNTR_EL0}}, x12         // encoding: [0x4c,0x9d,0x1b,0xd5]
+// CHECK: msr      {{pmuserenr_el0|PMUSERENR_EL0}}, x12         // encoding: [0x0c,0x9e,0x1b,0xd5]
+// CHECK: msr      {{pmintenset_el1|PMINTENSET_EL1}}, x12        // encoding: [0x2c,0x9e,0x18,0xd5]
+// CHECK: msr      {{pmintenclr_el1|PMINTENCLR_EL1}}, x12        // encoding: [0x4c,0x9e,0x18,0xd5]
+// CHECK: msr      {{pmovsset_el0|PMOVSSET_EL0}}, x12          // encoding: [0x6c,0x9e,0x1b,0xd5]
+// CHECK: msr      {{mair_el1|MAIR_EL1}}, x12              // encoding: [0x0c,0xa2,0x18,0xd5]
+// CHECK: msr      {{mair_el2|MAIR_EL2}}, x12              // encoding: [0x0c,0xa2,0x1c,0xd5]
+// CHECK: msr      {{mair_el3|MAIR_EL3}}, x12              // encoding: [0x0c,0xa2,0x1e,0xd5]
+// CHECK: msr      {{amair_el1|AMAIR_EL1}}, x12             // encoding: [0x0c,0xa3,0x18,0xd5]
+// CHECK: msr      {{amair_el2|AMAIR_EL2}}, x12             // encoding: [0x0c,0xa3,0x1c,0xd5]
+// CHECK: msr      {{amair_el3|AMAIR_EL3}}, x12             // encoding: [0x0c,0xa3,0x1e,0xd5]
+// CHECK: msr      {{vbar_el1|VBAR_EL1}}, x12              // encoding: [0x0c,0xc0,0x18,0xd5]
+// CHECK: msr      {{vbar_el2|VBAR_EL2}}, x12              // encoding: [0x0c,0xc0,0x1c,0xd5]
+// CHECK: msr      {{vbar_el3|VBAR_EL3}}, x12              // encoding: [0x0c,0xc0,0x1e,0xd5]
+// CHECK: msr      {{rmr_el1|RMR_EL1}}, x12               // encoding: [0x4c,0xc0,0x18,0xd5]
+// CHECK: msr      {{rmr_el2|RMR_EL2}}, x12               // encoding: [0x4c,0xc0,0x1c,0xd5]
+// CHECK: msr      {{rmr_el3|RMR_EL3}}, x12               // encoding: [0x4c,0xc0,0x1e,0xd5]
+// CHECK: msr      {{contextidr_el1|CONTEXTIDR_EL1}}, x12        // encoding: [0x2c,0xd0,0x18,0xd5]
+// CHECK: msr      {{tpidr_el0|TPIDR_EL0}}, x12             // encoding: [0x4c,0xd0,0x1b,0xd5]
+// CHECK: msr      {{tpidr_el2|TPIDR_EL2}}, x12             // encoding: [0x4c,0xd0,0x1c,0xd5]
+// CHECK: msr      {{tpidr_el3|TPIDR_EL3}}, x12             // encoding: [0x4c,0xd0,0x1e,0xd5]
+// CHECK: msr      {{tpidrro_el0|TPIDRRO_EL0}}, x12           // encoding: [0x6c,0xd0,0x1b,0xd5]
+// CHECK: msr      {{tpidr_el1|TPIDR_EL1}}, x12             // encoding: [0x8c,0xd0,0x18,0xd5]
+// CHECK: msr      {{cntfrq_el0|CNTFRQ_EL0}}, x12            // encoding: [0x0c,0xe0,0x1b,0xd5]
+// CHECK: msr      {{cntvoff_el2|CNTVOFF_EL2}}, x12           // encoding: [0x6c,0xe0,0x1c,0xd5]
+// CHECK: msr      {{cntkctl_el1|CNTKCTL_EL1}}, x12           // encoding: [0x0c,0xe1,0x18,0xd5]
+// CHECK: msr      {{cnthctl_el2|CNTHCTL_EL2}}, x12           // encoding: [0x0c,0xe1,0x1c,0xd5]
+// CHECK: msr      {{cntp_tval_el0|CNTP_TVAL_EL0}}, x12         // encoding: [0x0c,0xe2,0x1b,0xd5]
+// CHECK: msr      {{cnthp_tval_el2|CNTHP_TVAL_EL2}}, x12        // encoding: [0x0c,0xe2,0x1c,0xd5]
+// CHECK: msr      {{cntps_tval_el1|CNTPS_TVAL_EL1}}, x12        // encoding: [0x0c,0xe2,0x1f,0xd5]
+// CHECK: msr      {{cntp_ctl_el0|CNTP_CTL_EL0}}, x12          // encoding: [0x2c,0xe2,0x1b,0xd5]
+// CHECK: msr      {{cnthp_ctl_el2|CNTHP_CTL_EL2}}, x12         // encoding: [0x2c,0xe2,0x1c,0xd5]
+// CHECK: msr      {{cntps_ctl_el1|CNTPS_CTL_EL1}}, x12         // encoding: [0x2c,0xe2,0x1f,0xd5]
+// CHECK: msr      {{cntp_cval_el0|CNTP_CVAL_EL0}}, x12         // encoding: [0x4c,0xe2,0x1b,0xd5]
+// CHECK: msr      {{cnthp_cval_el2|CNTHP_CVAL_EL2}}, x12        // encoding: [0x4c,0xe2,0x1c,0xd5]
+// CHECK: msr      {{cntps_cval_el1|CNTPS_CVAL_EL1}}, x12        // encoding: [0x4c,0xe2,0x1f,0xd5]
+// CHECK: msr      {{cntv_tval_el0|CNTV_TVAL_EL0}}, x12         // encoding: [0x0c,0xe3,0x1b,0xd5]
+// CHECK: msr      {{cntv_ctl_el0|CNTV_CTL_EL0}}, x12          // encoding: [0x2c,0xe3,0x1b,0xd5]
+// CHECK: msr      {{cntv_cval_el0|CNTV_CVAL_EL0}}, x12         // encoding: [0x4c,0xe3,0x1b,0xd5]
+// CHECK: msr      {{pmevcntr0_el0|PMEVCNTR0_EL0}}, x12         // encoding: [0x0c,0xe8,0x1b,0xd5]
+// CHECK: msr      {{pmevcntr1_el0|PMEVCNTR1_EL0}}, x12         // encoding: [0x2c,0xe8,0x1b,0xd5]
+// CHECK: msr      {{pmevcntr2_el0|PMEVCNTR2_EL0}}, x12         // encoding: [0x4c,0xe8,0x1b,0xd5]
+// CHECK: msr      {{pmevcntr3_el0|PMEVCNTR3_EL0}}, x12         // encoding: [0x6c,0xe8,0x1b,0xd5]
+// CHECK: msr      {{pmevcntr4_el0|PMEVCNTR4_EL0}}, x12         // encoding: [0x8c,0xe8,0x1b,0xd5]
+// CHECK: msr      {{pmevcntr5_el0|PMEVCNTR5_EL0}}, x12         // encoding: [0xac,0xe8,0x1b,0xd5]
+// CHECK: msr      {{pmevcntr6_el0|PMEVCNTR6_EL0}}, x12         // encoding: [0xcc,0xe8,0x1b,0xd5]
+// CHECK: msr      {{pmevcntr7_el0|PMEVCNTR7_EL0}}, x12         // encoding: [0xec,0xe8,0x1b,0xd5]
+// CHECK: msr      {{pmevcntr8_el0|PMEVCNTR8_EL0}}, x12         // encoding: [0x0c,0xe9,0x1b,0xd5]
+// CHECK: msr      {{pmevcntr9_el0|PMEVCNTR9_EL0}}, x12         // encoding: [0x2c,0xe9,0x1b,0xd5]
+// CHECK: msr      {{pmevcntr10_el0|PMEVCNTR10_EL0}}, x12        // encoding: [0x4c,0xe9,0x1b,0xd5]
+// CHECK: msr      {{pmevcntr11_el0|PMEVCNTR11_EL0}}, x12        // encoding: [0x6c,0xe9,0x1b,0xd5]
+// CHECK: msr      {{pmevcntr12_el0|PMEVCNTR12_EL0}}, x12        // encoding: [0x8c,0xe9,0x1b,0xd5]
+// CHECK: msr      {{pmevcntr13_el0|PMEVCNTR13_EL0}}, x12        // encoding: [0xac,0xe9,0x1b,0xd5]
+// CHECK: msr      {{pmevcntr14_el0|PMEVCNTR14_EL0}}, x12        // encoding: [0xcc,0xe9,0x1b,0xd5]
+// CHECK: msr      {{pmevcntr15_el0|PMEVCNTR15_EL0}}, x12        // encoding: [0xec,0xe9,0x1b,0xd5]
+// CHECK: msr      {{pmevcntr16_el0|PMEVCNTR16_EL0}}, x12        // encoding: [0x0c,0xea,0x1b,0xd5]
+// CHECK: msr      {{pmevcntr17_el0|PMEVCNTR17_EL0}}, x12        // encoding: [0x2c,0xea,0x1b,0xd5]
+// CHECK: msr      {{pmevcntr18_el0|PMEVCNTR18_EL0}}, x12        // encoding: [0x4c,0xea,0x1b,0xd5]
+// CHECK: msr      {{pmevcntr19_el0|PMEVCNTR19_EL0}}, x12        // encoding: [0x6c,0xea,0x1b,0xd5]
+// CHECK: msr      {{pmevcntr20_el0|PMEVCNTR20_EL0}}, x12        // encoding: [0x8c,0xea,0x1b,0xd5]
+// CHECK: msr      {{pmevcntr21_el0|PMEVCNTR21_EL0}}, x12        // encoding: [0xac,0xea,0x1b,0xd5]
+// CHECK: msr      {{pmevcntr22_el0|PMEVCNTR22_EL0}}, x12        // encoding: [0xcc,0xea,0x1b,0xd5]
+// CHECK: msr      {{pmevcntr23_el0|PMEVCNTR23_EL0}}, x12        // encoding: [0xec,0xea,0x1b,0xd5]
+// CHECK: msr      {{pmevcntr24_el0|PMEVCNTR24_EL0}}, x12        // encoding: [0x0c,0xeb,0x1b,0xd5]
+// CHECK: msr      {{pmevcntr25_el0|PMEVCNTR25_EL0}}, x12        // encoding: [0x2c,0xeb,0x1b,0xd5]
+// CHECK: msr      {{pmevcntr26_el0|PMEVCNTR26_EL0}}, x12        // encoding: [0x4c,0xeb,0x1b,0xd5]
+// CHECK: msr      {{pmevcntr27_el0|PMEVCNTR27_EL0}}, x12        // encoding: [0x6c,0xeb,0x1b,0xd5]
+// CHECK: msr      {{pmevcntr28_el0|PMEVCNTR28_EL0}}, x12        // encoding: [0x8c,0xeb,0x1b,0xd5]
+// CHECK: msr      {{pmevcntr29_el0|PMEVCNTR29_EL0}}, x12        // encoding: [0xac,0xeb,0x1b,0xd5]
+// CHECK: msr      {{pmevcntr30_el0|PMEVCNTR30_EL0}}, x12        // encoding: [0xcc,0xeb,0x1b,0xd5]
+// CHECK: msr      {{pmccfiltr_el0|PMCCFILTR_EL0}}, x12         // encoding: [0xec,0xef,0x1b,0xd5]
+// CHECK: msr      {{pmevtyper0_el0|PMEVTYPER0_EL0}}, x12        // encoding: [0x0c,0xec,0x1b,0xd5]
+// CHECK: msr      {{pmevtyper1_el0|PMEVTYPER1_EL0}}, x12        // encoding: [0x2c,0xec,0x1b,0xd5]
+// CHECK: msr      {{pmevtyper2_el0|PMEVTYPER2_EL0}}, x12        // encoding: [0x4c,0xec,0x1b,0xd5]
+// CHECK: msr      {{pmevtyper3_el0|PMEVTYPER3_EL0}}, x12        // encoding: [0x6c,0xec,0x1b,0xd5]
+// CHECK: msr      {{pmevtyper4_el0|PMEVTYPER4_EL0}}, x12        // encoding: [0x8c,0xec,0x1b,0xd5]
+// CHECK: msr      {{pmevtyper5_el0|PMEVTYPER5_EL0}}, x12        // encoding: [0xac,0xec,0x1b,0xd5]
+// CHECK: msr      {{pmevtyper6_el0|PMEVTYPER6_EL0}}, x12        // encoding: [0xcc,0xec,0x1b,0xd5]
+// CHECK: msr      {{pmevtyper7_el0|PMEVTYPER7_EL0}}, x12        // encoding: [0xec,0xec,0x1b,0xd5]
+// CHECK: msr      {{pmevtyper8_el0|PMEVTYPER8_EL0}}, x12        // encoding: [0x0c,0xed,0x1b,0xd5]
+// CHECK: msr      {{pmevtyper9_el0|PMEVTYPER9_EL0}}, x12        // encoding: [0x2c,0xed,0x1b,0xd5]
+// CHECK: msr      {{pmevtyper10_el0|PMEVTYPER10_EL0}}, x12       // encoding: [0x4c,0xed,0x1b,0xd5]
+// CHECK: msr      {{pmevtyper11_el0|PMEVTYPER11_EL0}}, x12       // encoding: [0x6c,0xed,0x1b,0xd5]
+// CHECK: msr      {{pmevtyper12_el0|PMEVTYPER12_EL0}}, x12       // encoding: [0x8c,0xed,0x1b,0xd5]
+// CHECK: msr      {{pmevtyper13_el0|PMEVTYPER13_EL0}}, x12       // encoding: [0xac,0xed,0x1b,0xd5]
+// CHECK: msr      {{pmevtyper14_el0|PMEVTYPER14_EL0}}, x12       // encoding: [0xcc,0xed,0x1b,0xd5]
+// CHECK: msr      {{pmevtyper15_el0|PMEVTYPER15_EL0}}, x12       // encoding: [0xec,0xed,0x1b,0xd5]
+// CHECK: msr      {{pmevtyper16_el0|PMEVTYPER16_EL0}}, x12       // encoding: [0x0c,0xee,0x1b,0xd5]
+// CHECK: msr      {{pmevtyper17_el0|PMEVTYPER17_EL0}}, x12       // encoding: [0x2c,0xee,0x1b,0xd5]
+// CHECK: msr      {{pmevtyper18_el0|PMEVTYPER18_EL0}}, x12       // encoding: [0x4c,0xee,0x1b,0xd5]
+// CHECK: msr      {{pmevtyper19_el0|PMEVTYPER19_EL0}}, x12       // encoding: [0x6c,0xee,0x1b,0xd5]
+// CHECK: msr      {{pmevtyper20_el0|PMEVTYPER20_EL0}}, x12       // encoding: [0x8c,0xee,0x1b,0xd5]
+// CHECK: msr      {{pmevtyper21_el0|PMEVTYPER21_EL0}}, x12       // encoding: [0xac,0xee,0x1b,0xd5]
+// CHECK: msr      {{pmevtyper22_el0|PMEVTYPER22_EL0}}, x12       // encoding: [0xcc,0xee,0x1b,0xd5]
+// CHECK: msr      {{pmevtyper23_el0|PMEVTYPER23_EL0}}, x12       // encoding: [0xec,0xee,0x1b,0xd5]
+// CHECK: msr      {{pmevtyper24_el0|PMEVTYPER24_EL0}}, x12       // encoding: [0x0c,0xef,0x1b,0xd5]
+// CHECK: msr      {{pmevtyper25_el0|PMEVTYPER25_EL0}}, x12       // encoding: [0x2c,0xef,0x1b,0xd5]
+// CHECK: msr      {{pmevtyper26_el0|PMEVTYPER26_EL0}}, x12       // encoding: [0x4c,0xef,0x1b,0xd5]
+// CHECK: msr      {{pmevtyper27_el0|PMEVTYPER27_EL0}}, x12       // encoding: [0x6c,0xef,0x1b,0xd5]
+// CHECK: msr      {{pmevtyper28_el0|PMEVTYPER28_EL0}}, x12       // encoding: [0x8c,0xef,0x1b,0xd5]
+// CHECK: msr      {{pmevtyper29_el0|PMEVTYPER29_EL0}}, x12       // encoding: [0xac,0xef,0x1b,0xd5]
+// CHECK: msr      {{pmevtyper30_el0|PMEVTYPER30_EL0}}, x12       // encoding: [0xcc,0xef,0x1b,0xd5]
 
 	mrs x9, TEECR32_EL1
 	mrs x9, OSDTRRX_EL1
@@ -4497,315 +4464,315 @@ _func:
 	mrs x9, PMEVTYPER28_EL0
 	mrs x9, PMEVTYPER29_EL0
 	mrs x9, PMEVTYPER30_EL0
-// CHECK: mrs      x9, teecr32_el1            // encoding: [0x09,0x00,0x32,0xd5]
-// CHECK: mrs      x9, osdtrrx_el1            // encoding: [0x49,0x00,0x30,0xd5]
-// CHECK: mrs      x9, mdccsr_el0             // encoding: [0x09,0x01,0x33,0xd5]
-// CHECK: mrs      x9, mdccint_el1            // encoding: [0x09,0x02,0x30,0xd5]
-// CHECK: mrs      x9, mdscr_el1              // encoding: [0x49,0x02,0x30,0xd5]
-// CHECK: mrs      x9, osdtrtx_el1            // encoding: [0x49,0x03,0x30,0xd5]
-// CHECK: mrs      x9, dbgdtr_el0             // encoding: [0x09,0x04,0x33,0xd5]
-// CHECK: mrs      x9, dbgdtrrx_el0           // encoding: [0x09,0x05,0x33,0xd5]
-// CHECK: mrs      x9, oseccr_el1             // encoding: [0x49,0x06,0x30,0xd5]
-// CHECK: mrs      x9, dbgvcr32_el2           // encoding: [0x09,0x07,0x34,0xd5]
-// CHECK: mrs      x9, dbgbvr0_el1            // encoding: [0x89,0x00,0x30,0xd5]
-// CHECK: mrs      x9, dbgbvr1_el1            // encoding: [0x89,0x01,0x30,0xd5]
-// CHECK: mrs      x9, dbgbvr2_el1            // encoding: [0x89,0x02,0x30,0xd5]
-// CHECK: mrs      x9, dbgbvr3_el1            // encoding: [0x89,0x03,0x30,0xd5]
-// CHECK: mrs      x9, dbgbvr4_el1            // encoding: [0x89,0x04,0x30,0xd5]
-// CHECK: mrs      x9, dbgbvr5_el1            // encoding: [0x89,0x05,0x30,0xd5]
-// CHECK: mrs      x9, dbgbvr6_el1            // encoding: [0x89,0x06,0x30,0xd5]
-// CHECK: mrs      x9, dbgbvr7_el1            // encoding: [0x89,0x07,0x30,0xd5]
-// CHECK: mrs      x9, dbgbvr8_el1            // encoding: [0x89,0x08,0x30,0xd5]
-// CHECK: mrs      x9, dbgbvr9_el1            // encoding: [0x89,0x09,0x30,0xd5]
-// CHECK: mrs      x9, dbgbvr10_el1           // encoding: [0x89,0x0a,0x30,0xd5]
-// CHECK: mrs      x9, dbgbvr11_el1           // encoding: [0x89,0x0b,0x30,0xd5]
-// CHECK: mrs      x9, dbgbvr12_el1           // encoding: [0x89,0x0c,0x30,0xd5]
-// CHECK: mrs      x9, dbgbvr13_el1           // encoding: [0x89,0x0d,0x30,0xd5]
-// CHECK: mrs      x9, dbgbvr14_el1           // encoding: [0x89,0x0e,0x30,0xd5]
-// CHECK: mrs      x9, dbgbvr15_el1           // encoding: [0x89,0x0f,0x30,0xd5]
-// CHECK: mrs      x9, dbgbcr0_el1            // encoding: [0xa9,0x00,0x30,0xd5]
-// CHECK: mrs      x9, dbgbcr1_el1            // encoding: [0xa9,0x01,0x30,0xd5]
-// CHECK: mrs      x9, dbgbcr2_el1            // encoding: [0xa9,0x02,0x30,0xd5]
-// CHECK: mrs      x9, dbgbcr3_el1            // encoding: [0xa9,0x03,0x30,0xd5]
-// CHECK: mrs      x9, dbgbcr4_el1            // encoding: [0xa9,0x04,0x30,0xd5]
-// CHECK: mrs      x9, dbgbcr5_el1            // encoding: [0xa9,0x05,0x30,0xd5]
-// CHECK: mrs      x9, dbgbcr6_el1            // encoding: [0xa9,0x06,0x30,0xd5]
-// CHECK: mrs      x9, dbgbcr7_el1            // encoding: [0xa9,0x07,0x30,0xd5]
-// CHECK: mrs      x9, dbgbcr8_el1            // encoding: [0xa9,0x08,0x30,0xd5]
-// CHECK: mrs      x9, dbgbcr9_el1            // encoding: [0xa9,0x09,0x30,0xd5]
-// CHECK: mrs      x9, dbgbcr10_el1           // encoding: [0xa9,0x0a,0x30,0xd5]
-// CHECK: mrs      x9, dbgbcr11_el1           // encoding: [0xa9,0x0b,0x30,0xd5]
-// CHECK: mrs      x9, dbgbcr12_el1           // encoding: [0xa9,0x0c,0x30,0xd5]
-// CHECK: mrs      x9, dbgbcr13_el1           // encoding: [0xa9,0x0d,0x30,0xd5]
-// CHECK: mrs      x9, dbgbcr14_el1           // encoding: [0xa9,0x0e,0x30,0xd5]
-// CHECK: mrs      x9, dbgbcr15_el1           // encoding: [0xa9,0x0f,0x30,0xd5]
-// CHECK: mrs      x9, dbgwvr0_el1            // encoding: [0xc9,0x00,0x30,0xd5]
-// CHECK: mrs      x9, dbgwvr1_el1            // encoding: [0xc9,0x01,0x30,0xd5]
-// CHECK: mrs      x9, dbgwvr2_el1            // encoding: [0xc9,0x02,0x30,0xd5]
-// CHECK: mrs      x9, dbgwvr3_el1            // encoding: [0xc9,0x03,0x30,0xd5]
-// CHECK: mrs      x9, dbgwvr4_el1            // encoding: [0xc9,0x04,0x30,0xd5]
-// CHECK: mrs      x9, dbgwvr5_el1            // encoding: [0xc9,0x05,0x30,0xd5]
-// CHECK: mrs      x9, dbgwvr6_el1            // encoding: [0xc9,0x06,0x30,0xd5]
-// CHECK: mrs      x9, dbgwvr7_el1            // encoding: [0xc9,0x07,0x30,0xd5]
-// CHECK: mrs      x9, dbgwvr8_el1            // encoding: [0xc9,0x08,0x30,0xd5]
-// CHECK: mrs      x9, dbgwvr9_el1            // encoding: [0xc9,0x09,0x30,0xd5]
-// CHECK: mrs      x9, dbgwvr10_el1           // encoding: [0xc9,0x0a,0x30,0xd5]
-// CHECK: mrs      x9, dbgwvr11_el1           // encoding: [0xc9,0x0b,0x30,0xd5]
-// CHECK: mrs      x9, dbgwvr12_el1           // encoding: [0xc9,0x0c,0x30,0xd5]
-// CHECK: mrs      x9, dbgwvr13_el1           // encoding: [0xc9,0x0d,0x30,0xd5]
-// CHECK: mrs      x9, dbgwvr14_el1           // encoding: [0xc9,0x0e,0x30,0xd5]
-// CHECK: mrs      x9, dbgwvr15_el1           // encoding: [0xc9,0x0f,0x30,0xd5]
-// CHECK: mrs      x9, dbgwcr0_el1            // encoding: [0xe9,0x00,0x30,0xd5]
-// CHECK: mrs      x9, dbgwcr1_el1            // encoding: [0xe9,0x01,0x30,0xd5]
-// CHECK: mrs      x9, dbgwcr2_el1            // encoding: [0xe9,0x02,0x30,0xd5]
-// CHECK: mrs      x9, dbgwcr3_el1            // encoding: [0xe9,0x03,0x30,0xd5]
-// CHECK: mrs      x9, dbgwcr4_el1            // encoding: [0xe9,0x04,0x30,0xd5]
-// CHECK: mrs      x9, dbgwcr5_el1            // encoding: [0xe9,0x05,0x30,0xd5]
-// CHECK: mrs      x9, dbgwcr6_el1            // encoding: [0xe9,0x06,0x30,0xd5]
-// CHECK: mrs      x9, dbgwcr7_el1            // encoding: [0xe9,0x07,0x30,0xd5]
-// CHECK: mrs      x9, dbgwcr8_el1            // encoding: [0xe9,0x08,0x30,0xd5]
-// CHECK: mrs      x9, dbgwcr9_el1            // encoding: [0xe9,0x09,0x30,0xd5]
-// CHECK: mrs      x9, dbgwcr10_el1           // encoding: [0xe9,0x0a,0x30,0xd5]
-// CHECK: mrs      x9, dbgwcr11_el1           // encoding: [0xe9,0x0b,0x30,0xd5]
-// CHECK: mrs      x9, dbgwcr12_el1           // encoding: [0xe9,0x0c,0x30,0xd5]
-// CHECK: mrs      x9, dbgwcr13_el1           // encoding: [0xe9,0x0d,0x30,0xd5]
-// CHECK: mrs      x9, dbgwcr14_el1           // encoding: [0xe9,0x0e,0x30,0xd5]
-// CHECK: mrs      x9, dbgwcr15_el1           // encoding: [0xe9,0x0f,0x30,0xd5]
-// CHECK: mrs      x9, mdrar_el1              // encoding: [0x09,0x10,0x30,0xd5]
-// CHECK: mrs      x9, teehbr32_el1           // encoding: [0x09,0x10,0x32,0xd5]
-// CHECK: mrs      x9, oslsr_el1              // encoding: [0x89,0x11,0x30,0xd5]
-// CHECK: mrs      x9, osdlr_el1              // encoding: [0x89,0x13,0x30,0xd5]
-// CHECK: mrs      x9, dbgprcr_el1            // encoding: [0x89,0x14,0x30,0xd5]
-// CHECK: mrs      x9, dbgclaimset_el1        // encoding: [0xc9,0x78,0x30,0xd5]
-// CHECK: mrs      x9, dbgclaimclr_el1        // encoding: [0xc9,0x79,0x30,0xd5]
-// CHECK: mrs      x9, dbgauthstatus_el1      // encoding: [0xc9,0x7e,0x30,0xd5]
-// CHECK: mrs      x9, midr_el1               // encoding: [0x09,0x00,0x38,0xd5]
-// CHECK: mrs      x9, ccsidr_el1             // encoding: [0x09,0x00,0x39,0xd5]
-// CHECK: mrs      x9, csselr_el1             // encoding: [0x09,0x00,0x3a,0xd5]
-// CHECK: mrs      x9, vpidr_el2              // encoding: [0x09,0x00,0x3c,0xd5]
-// CHECK: mrs      x9, clidr_el1              // encoding: [0x29,0x00,0x39,0xd5]
-// CHECK: mrs      x9, ctr_el0                // encoding: [0x29,0x00,0x3b,0xd5]
-// CHECK: mrs      x9, mpidr_el1              // encoding: [0xa9,0x00,0x38,0xd5]
-// CHECK: mrs      x9, vmpidr_el2             // encoding: [0xa9,0x00,0x3c,0xd5]
-// CHECK: mrs      x9, revidr_el1             // encoding: [0xc9,0x00,0x38,0xd5]
-// CHECK: mrs      x9, aidr_el1               // encoding: [0xe9,0x00,0x39,0xd5]
-// CHECK: mrs      x9, dczid_el0              // encoding: [0xe9,0x00,0x3b,0xd5]
-// CHECK: mrs      x9, id_pfr0_el1            // encoding: [0x09,0x01,0x38,0xd5]
-// CHECK: mrs      x9, id_pfr1_el1            // encoding: [0x29,0x01,0x38,0xd5]
-// CHECK: mrs      x9, id_dfr0_el1            // encoding: [0x49,0x01,0x38,0xd5]
-// CHECK: mrs      x9, id_afr0_el1            // encoding: [0x69,0x01,0x38,0xd5]
-// CHECK: mrs      x9, id_mmfr0_el1           // encoding: [0x89,0x01,0x38,0xd5]
-// CHECK: mrs      x9, id_mmfr1_el1           // encoding: [0xa9,0x01,0x38,0xd5]
-// CHECK: mrs      x9, id_mmfr2_el1           // encoding: [0xc9,0x01,0x38,0xd5]
-// CHECK: mrs      x9, id_mmfr3_el1           // encoding: [0xe9,0x01,0x38,0xd5]
-// CHECK: mrs      x9, id_isar0_el1           // encoding: [0x09,0x02,0x38,0xd5]
-// CHECK: mrs      x9, id_isar1_el1           // encoding: [0x29,0x02,0x38,0xd5]
-// CHECK: mrs      x9, id_isar2_el1           // encoding: [0x49,0x02,0x38,0xd5]
-// CHECK: mrs      x9, id_isar3_el1           // encoding: [0x69,0x02,0x38,0xd5]
-// CHECK: mrs      x9, id_isar4_el1           // encoding: [0x89,0x02,0x38,0xd5]
-// CHECK: mrs      x9, id_isar5_el1           // encoding: [0xa9,0x02,0x38,0xd5]
-// CHECK: mrs      x9, mvfr0_el1              // encoding: [0x09,0x03,0x38,0xd5]
-// CHECK: mrs      x9, mvfr1_el1              // encoding: [0x29,0x03,0x38,0xd5]
-// CHECK: mrs      x9, mvfr2_el1              // encoding: [0x49,0x03,0x38,0xd5]
-// CHECK: mrs      x9, id_aa64pfr0_el1        // encoding: [0x09,0x04,0x38,0xd5]
-// CHECK: mrs      x9, id_aa64pfr1_el1        // encoding: [0x29,0x04,0x38,0xd5]
-// CHECK: mrs      x9, id_aa64dfr0_el1        // encoding: [0x09,0x05,0x38,0xd5]
-// CHECK: mrs      x9, id_aa64dfr1_el1        // encoding: [0x29,0x05,0x38,0xd5]
-// CHECK: mrs      x9, id_aa64afr0_el1        // encoding: [0x89,0x05,0x38,0xd5]
-// CHECK: mrs      x9, id_aa64afr1_el1        // encoding: [0xa9,0x05,0x38,0xd5]
-// CHECK: mrs      x9, id_aa64isar0_el1       // encoding: [0x09,0x06,0x38,0xd5]
-// CHECK: mrs      x9, id_aa64isar1_el1       // encoding: [0x29,0x06,0x38,0xd5]
-// CHECK: mrs      x9, id_aa64mmfr0_el1       // encoding: [0x09,0x07,0x38,0xd5]
-// CHECK: mrs      x9, id_aa64mmfr1_el1       // encoding: [0x29,0x07,0x38,0xd5]
-// CHECK: mrs      x9, sctlr_el1              // encoding: [0x09,0x10,0x38,0xd5]
-// CHECK: mrs      x9, sctlr_el2              // encoding: [0x09,0x10,0x3c,0xd5]
-// CHECK: mrs      x9, sctlr_el3              // encoding: [0x09,0x10,0x3e,0xd5]
-// CHECK: mrs      x9, actlr_el1              // encoding: [0x29,0x10,0x38,0xd5]
-// CHECK: mrs      x9, actlr_el2              // encoding: [0x29,0x10,0x3c,0xd5]
-// CHECK: mrs      x9, actlr_el3              // encoding: [0x29,0x10,0x3e,0xd5]
-// CHECK: mrs      x9, cpacr_el1              // encoding: [0x49,0x10,0x38,0xd5]
-// CHECK: mrs      x9, hcr_el2                // encoding: [0x09,0x11,0x3c,0xd5]
-// CHECK: mrs      x9, scr_el3                // encoding: [0x09,0x11,0x3e,0xd5]
-// CHECK: mrs      x9, mdcr_el2               // encoding: [0x29,0x11,0x3c,0xd5]
-// CHECK: mrs      x9, sder32_el3             // encoding: [0x29,0x11,0x3e,0xd5]
-// CHECK: mrs      x9, cptr_el2               // encoding: [0x49,0x11,0x3c,0xd5]
-// CHECK: mrs      x9, cptr_el3               // encoding: [0x49,0x11,0x3e,0xd5]
-// CHECK: mrs      x9, hstr_el2               // encoding: [0x69,0x11,0x3c,0xd5]
-// CHECK: mrs      x9, hacr_el2               // encoding: [0xe9,0x11,0x3c,0xd5]
-// CHECK: mrs      x9, mdcr_el3               // encoding: [0x29,0x13,0x3e,0xd5]
-// CHECK: mrs      x9, ttbr0_el1              // encoding: [0x09,0x20,0x38,0xd5]
-// CHECK: mrs      x9, ttbr0_el2              // encoding: [0x09,0x20,0x3c,0xd5]
-// CHECK: mrs      x9, ttbr0_el3              // encoding: [0x09,0x20,0x3e,0xd5]
-// CHECK: mrs      x9, ttbr1_el1              // encoding: [0x29,0x20,0x38,0xd5]
-// CHECK: mrs      x9, tcr_el1                // encoding: [0x49,0x20,0x38,0xd5]
-// CHECK: mrs      x9, tcr_el2                // encoding: [0x49,0x20,0x3c,0xd5]
-// CHECK: mrs      x9, tcr_el3                // encoding: [0x49,0x20,0x3e,0xd5]
-// CHECK: mrs      x9, vttbr_el2              // encoding: [0x09,0x21,0x3c,0xd5]
-// CHECK: mrs      x9, vtcr_el2               // encoding: [0x49,0x21,0x3c,0xd5]
-// CHECK: mrs      x9, dacr32_el2             // encoding: [0x09,0x30,0x3c,0xd5]
-// CHECK: mrs      x9, spsr_el1               // encoding: [0x09,0x40,0x38,0xd5]
-// CHECK: mrs      x9, spsr_el2               // encoding: [0x09,0x40,0x3c,0xd5]
-// CHECK: mrs      x9, spsr_el3               // encoding: [0x09,0x40,0x3e,0xd5]
-// CHECK: mrs      x9, elr_el1                // encoding: [0x29,0x40,0x38,0xd5]
-// CHECK: mrs      x9, elr_el2                // encoding: [0x29,0x40,0x3c,0xd5]
-// CHECK: mrs      x9, elr_el3                // encoding: [0x29,0x40,0x3e,0xd5]
-// CHECK: mrs      x9, sp_el0                 // encoding: [0x09,0x41,0x38,0xd5]
-// CHECK: mrs      x9, sp_el1                 // encoding: [0x09,0x41,0x3c,0xd5]
-// CHECK: mrs      x9, sp_el2                 // encoding: [0x09,0x41,0x3e,0xd5]
-// CHECK: mrs      x9, spsel                  // encoding: [0x09,0x42,0x38,0xd5]
-// CHECK: mrs      x9, nzcv                   // encoding: [0x09,0x42,0x3b,0xd5]
-// CHECK: mrs      x9, daif                   // encoding: [0x29,0x42,0x3b,0xd5]
-// CHECK: mrs      x9, currentel              // encoding: [0x49,0x42,0x38,0xd5]
-// CHECK: mrs      x9, spsr_irq               // encoding: [0x09,0x43,0x3c,0xd5]
-// CHECK: mrs      x9, spsr_abt               // encoding: [0x29,0x43,0x3c,0xd5]
-// CHECK: mrs      x9, spsr_und               // encoding: [0x49,0x43,0x3c,0xd5]
-// CHECK: mrs      x9, spsr_fiq               // encoding: [0x69,0x43,0x3c,0xd5]
-// CHECK: mrs      x9, fpcr                   // encoding: [0x09,0x44,0x3b,0xd5]
-// CHECK: mrs      x9, fpsr                   // encoding: [0x29,0x44,0x3b,0xd5]
-// CHECK: mrs      x9, dspsr_el0              // encoding: [0x09,0x45,0x3b,0xd5]
-// CHECK: mrs      x9, dlr_el0                // encoding: [0x29,0x45,0x3b,0xd5]
-// CHECK: mrs      x9, ifsr32_el2             // encoding: [0x29,0x50,0x3c,0xd5]
-// CHECK: mrs      x9, afsr0_el1              // encoding: [0x09,0x51,0x38,0xd5]
-// CHECK: mrs      x9, afsr0_el2              // encoding: [0x09,0x51,0x3c,0xd5]
-// CHECK: mrs      x9, afsr0_el3              // encoding: [0x09,0x51,0x3e,0xd5]
-// CHECK: mrs      x9, afsr1_el1              // encoding: [0x29,0x51,0x38,0xd5]
-// CHECK: mrs      x9, afsr1_el2              // encoding: [0x29,0x51,0x3c,0xd5]
-// CHECK: mrs      x9, afsr1_el3              // encoding: [0x29,0x51,0x3e,0xd5]
-// CHECK: mrs      x9, esr_el1                // encoding: [0x09,0x52,0x38,0xd5]
-// CHECK: mrs      x9, esr_el2                // encoding: [0x09,0x52,0x3c,0xd5]
-// CHECK: mrs      x9, esr_el3                // encoding: [0x09,0x52,0x3e,0xd5]
-// CHECK: mrs      x9, fpexc32_el2            // encoding: [0x09,0x53,0x3c,0xd5]
-// CHECK: mrs      x9, far_el1                // encoding: [0x09,0x60,0x38,0xd5]
-// CHECK: mrs      x9, far_el2                // encoding: [0x09,0x60,0x3c,0xd5]
-// CHECK: mrs      x9, far_el3                // encoding: [0x09,0x60,0x3e,0xd5]
-// CHECK: mrs      x9, hpfar_el2              // encoding: [0x89,0x60,0x3c,0xd5]
-// CHECK: mrs      x9, par_el1                // encoding: [0x09,0x74,0x38,0xd5]
-// CHECK: mrs      x9, pmcr_el0               // encoding: [0x09,0x9c,0x3b,0xd5]
-// CHECK: mrs      x9, pmcntenset_el0         // encoding: [0x29,0x9c,0x3b,0xd5]
-// CHECK: mrs      x9, pmcntenclr_el0         // encoding: [0x49,0x9c,0x3b,0xd5]
-// CHECK: mrs      x9, pmovsclr_el0           // encoding: [0x69,0x9c,0x3b,0xd5]
-// CHECK: mrs      x9, pmselr_el0             // encoding: [0xa9,0x9c,0x3b,0xd5]
-// CHECK: mrs      x9, pmceid0_el0            // encoding: [0xc9,0x9c,0x3b,0xd5]
-// CHECK: mrs      x9, pmceid1_el0            // encoding: [0xe9,0x9c,0x3b,0xd5]
-// CHECK: mrs      x9, pmccntr_el0            // encoding: [0x09,0x9d,0x3b,0xd5]
-// CHECK: mrs      x9, pmxevtyper_el0         // encoding: [0x29,0x9d,0x3b,0xd5]
-// CHECK: mrs      x9, pmxevcntr_el0          // encoding: [0x49,0x9d,0x3b,0xd5]
-// CHECK: mrs      x9, pmuserenr_el0          // encoding: [0x09,0x9e,0x3b,0xd5]
-// CHECK: mrs      x9, pmintenset_el1         // encoding: [0x29,0x9e,0x38,0xd5]
-// CHECK: mrs      x9, pmintenclr_el1         // encoding: [0x49,0x9e,0x38,0xd5]
-// CHECK: mrs      x9, pmovsset_el0           // encoding: [0x69,0x9e,0x3b,0xd5]
-// CHECK: mrs      x9, mair_el1               // encoding: [0x09,0xa2,0x38,0xd5]
-// CHECK: mrs      x9, mair_el2               // encoding: [0x09,0xa2,0x3c,0xd5]
-// CHECK: mrs      x9, mair_el3               // encoding: [0x09,0xa2,0x3e,0xd5]
-// CHECK: mrs      x9, amair_el1              // encoding: [0x09,0xa3,0x38,0xd5]
-// CHECK: mrs      x9, amair_el2              // encoding: [0x09,0xa3,0x3c,0xd5]
-// CHECK: mrs      x9, amair_el3              // encoding: [0x09,0xa3,0x3e,0xd5]
-// CHECK: mrs      x9, vbar_el1               // encoding: [0x09,0xc0,0x38,0xd5]
-// CHECK: mrs      x9, vbar_el2               // encoding: [0x09,0xc0,0x3c,0xd5]
-// CHECK: mrs      x9, vbar_el3               // encoding: [0x09,0xc0,0x3e,0xd5]
-// CHECK: mrs      x9, rvbar_el1              // encoding: [0x29,0xc0,0x38,0xd5]
-// CHECK: mrs      x9, rvbar_el2              // encoding: [0x29,0xc0,0x3c,0xd5]
-// CHECK: mrs      x9, rvbar_el3              // encoding: [0x29,0xc0,0x3e,0xd5]
-// CHECK: mrs      x9, rmr_el1                // encoding: [0x49,0xc0,0x38,0xd5]
-// CHECK: mrs      x9, rmr_el2                // encoding: [0x49,0xc0,0x3c,0xd5]
-// CHECK: mrs      x9, rmr_el3                // encoding: [0x49,0xc0,0x3e,0xd5]
-// CHECK: mrs      x9, isr_el1                // encoding: [0x09,0xc1,0x38,0xd5]
-// CHECK: mrs      x9, contextidr_el1         // encoding: [0x29,0xd0,0x38,0xd5]
-// CHECK: mrs      x9, tpidr_el0              // encoding: [0x49,0xd0,0x3b,0xd5]
-// CHECK: mrs      x9, tpidr_el2              // encoding: [0x49,0xd0,0x3c,0xd5]
-// CHECK: mrs      x9, tpidr_el3              // encoding: [0x49,0xd0,0x3e,0xd5]
-// CHECK: mrs      x9, tpidrro_el0            // encoding: [0x69,0xd0,0x3b,0xd5]
-// CHECK: mrs      x9, tpidr_el1              // encoding: [0x89,0xd0,0x38,0xd5]
-// CHECK: mrs      x9, cntfrq_el0             // encoding: [0x09,0xe0,0x3b,0xd5]
-// CHECK: mrs      x9, cntpct_el0             // encoding: [0x29,0xe0,0x3b,0xd5]
-// CHECK: mrs      x9, cntvct_el0             // encoding: [0x49,0xe0,0x3b,0xd5]
-// CHECK: mrs      x9, cntvoff_el2            // encoding: [0x69,0xe0,0x3c,0xd5]
-// CHECK: mrs      x9, cntkctl_el1            // encoding: [0x09,0xe1,0x38,0xd5]
-// CHECK: mrs      x9, cnthctl_el2            // encoding: [0x09,0xe1,0x3c,0xd5]
-// CHECK: mrs      x9, cntp_tval_el0          // encoding: [0x09,0xe2,0x3b,0xd5]
-// CHECK: mrs      x9, cnthp_tval_el2         // encoding: [0x09,0xe2,0x3c,0xd5]
-// CHECK: mrs      x9, cntps_tval_el1         // encoding: [0x09,0xe2,0x3f,0xd5]
-// CHECK: mrs      x9, cntp_ctl_el0           // encoding: [0x29,0xe2,0x3b,0xd5]
-// CHECK: mrs      x9, cnthp_ctl_el2          // encoding: [0x29,0xe2,0x3c,0xd5]
-// CHECK: mrs      x9, cntps_ctl_el1          // encoding: [0x29,0xe2,0x3f,0xd5]
-// CHECK: mrs      x9, cntp_cval_el0          // encoding: [0x49,0xe2,0x3b,0xd5]
-// CHECK: mrs      x9, cnthp_cval_el2         // encoding: [0x49,0xe2,0x3c,0xd5]
-// CHECK: mrs      x9, cntps_cval_el1         // encoding: [0x49,0xe2,0x3f,0xd5]
-// CHECK: mrs      x9, cntv_tval_el0          // encoding: [0x09,0xe3,0x3b,0xd5]
-// CHECK: mrs      x9, cntv_ctl_el0           // encoding: [0x29,0xe3,0x3b,0xd5]
-// CHECK: mrs      x9, cntv_cval_el0          // encoding: [0x49,0xe3,0x3b,0xd5]
-// CHECK: mrs      x9, pmevcntr0_el0          // encoding: [0x09,0xe8,0x3b,0xd5]
-// CHECK: mrs      x9, pmevcntr1_el0          // encoding: [0x29,0xe8,0x3b,0xd5]
-// CHECK: mrs      x9, pmevcntr2_el0          // encoding: [0x49,0xe8,0x3b,0xd5]
-// CHECK: mrs      x9, pmevcntr3_el0          // encoding: [0x69,0xe8,0x3b,0xd5]
-// CHECK: mrs      x9, pmevcntr4_el0          // encoding: [0x89,0xe8,0x3b,0xd5]
-// CHECK: mrs      x9, pmevcntr5_el0          // encoding: [0xa9,0xe8,0x3b,0xd5]
-// CHECK: mrs      x9, pmevcntr6_el0          // encoding: [0xc9,0xe8,0x3b,0xd5]
-// CHECK: mrs      x9, pmevcntr7_el0          // encoding: [0xe9,0xe8,0x3b,0xd5]
-// CHECK: mrs      x9, pmevcntr8_el0          // encoding: [0x09,0xe9,0x3b,0xd5]
-// CHECK: mrs      x9, pmevcntr9_el0          // encoding: [0x29,0xe9,0x3b,0xd5]
-// CHECK: mrs      x9, pmevcntr10_el0         // encoding: [0x49,0xe9,0x3b,0xd5]
-// CHECK: mrs      x9, pmevcntr11_el0         // encoding: [0x69,0xe9,0x3b,0xd5]
-// CHECK: mrs      x9, pmevcntr12_el0         // encoding: [0x89,0xe9,0x3b,0xd5]
-// CHECK: mrs      x9, pmevcntr13_el0         // encoding: [0xa9,0xe9,0x3b,0xd5]
-// CHECK: mrs      x9, pmevcntr14_el0         // encoding: [0xc9,0xe9,0x3b,0xd5]
-// CHECK: mrs      x9, pmevcntr15_el0         // encoding: [0xe9,0xe9,0x3b,0xd5]
-// CHECK: mrs      x9, pmevcntr16_el0         // encoding: [0x09,0xea,0x3b,0xd5]
-// CHECK: mrs      x9, pmevcntr17_el0         // encoding: [0x29,0xea,0x3b,0xd5]
-// CHECK: mrs      x9, pmevcntr18_el0         // encoding: [0x49,0xea,0x3b,0xd5]
-// CHECK: mrs      x9, pmevcntr19_el0         // encoding: [0x69,0xea,0x3b,0xd5]
-// CHECK: mrs      x9, pmevcntr20_el0         // encoding: [0x89,0xea,0x3b,0xd5]
-// CHECK: mrs      x9, pmevcntr21_el0         // encoding: [0xa9,0xea,0x3b,0xd5]
-// CHECK: mrs      x9, pmevcntr22_el0         // encoding: [0xc9,0xea,0x3b,0xd5]
-// CHECK: mrs      x9, pmevcntr23_el0         // encoding: [0xe9,0xea,0x3b,0xd5]
-// CHECK: mrs      x9, pmevcntr24_el0         // encoding: [0x09,0xeb,0x3b,0xd5]
-// CHECK: mrs      x9, pmevcntr25_el0         // encoding: [0x29,0xeb,0x3b,0xd5]
-// CHECK: mrs      x9, pmevcntr26_el0         // encoding: [0x49,0xeb,0x3b,0xd5]
-// CHECK: mrs      x9, pmevcntr27_el0         // encoding: [0x69,0xeb,0x3b,0xd5]
-// CHECK: mrs      x9, pmevcntr28_el0         // encoding: [0x89,0xeb,0x3b,0xd5]
-// CHECK: mrs      x9, pmevcntr29_el0         // encoding: [0xa9,0xeb,0x3b,0xd5]
-// CHECK: mrs      x9, pmevcntr30_el0         // encoding: [0xc9,0xeb,0x3b,0xd5]
-// CHECK: mrs      x9, pmccfiltr_el0          // encoding: [0xe9,0xef,0x3b,0xd5]
-// CHECK: mrs      x9, pmevtyper0_el0         // encoding: [0x09,0xec,0x3b,0xd5]
-// CHECK: mrs      x9, pmevtyper1_el0         // encoding: [0x29,0xec,0x3b,0xd5]
-// CHECK: mrs      x9, pmevtyper2_el0         // encoding: [0x49,0xec,0x3b,0xd5]
-// CHECK: mrs      x9, pmevtyper3_el0         // encoding: [0x69,0xec,0x3b,0xd5]
-// CHECK: mrs      x9, pmevtyper4_el0         // encoding: [0x89,0xec,0x3b,0xd5]
-// CHECK: mrs      x9, pmevtyper5_el0         // encoding: [0xa9,0xec,0x3b,0xd5]
-// CHECK: mrs      x9, pmevtyper6_el0         // encoding: [0xc9,0xec,0x3b,0xd5]
-// CHECK: mrs      x9, pmevtyper7_el0         // encoding: [0xe9,0xec,0x3b,0xd5]
-// CHECK: mrs      x9, pmevtyper8_el0         // encoding: [0x09,0xed,0x3b,0xd5]
-// CHECK: mrs      x9, pmevtyper9_el0         // encoding: [0x29,0xed,0x3b,0xd5]
-// CHECK: mrs      x9, pmevtyper10_el0        // encoding: [0x49,0xed,0x3b,0xd5]
-// CHECK: mrs      x9, pmevtyper11_el0        // encoding: [0x69,0xed,0x3b,0xd5]
-// CHECK: mrs      x9, pmevtyper12_el0        // encoding: [0x89,0xed,0x3b,0xd5]
-// CHECK: mrs      x9, pmevtyper13_el0        // encoding: [0xa9,0xed,0x3b,0xd5]
-// CHECK: mrs      x9, pmevtyper14_el0        // encoding: [0xc9,0xed,0x3b,0xd5]
-// CHECK: mrs      x9, pmevtyper15_el0        // encoding: [0xe9,0xed,0x3b,0xd5]
-// CHECK: mrs      x9, pmevtyper16_el0        // encoding: [0x09,0xee,0x3b,0xd5]
-// CHECK: mrs      x9, pmevtyper17_el0        // encoding: [0x29,0xee,0x3b,0xd5]
-// CHECK: mrs      x9, pmevtyper18_el0        // encoding: [0x49,0xee,0x3b,0xd5]
-// CHECK: mrs      x9, pmevtyper19_el0        // encoding: [0x69,0xee,0x3b,0xd5]
-// CHECK: mrs      x9, pmevtyper20_el0        // encoding: [0x89,0xee,0x3b,0xd5]
-// CHECK: mrs      x9, pmevtyper21_el0        // encoding: [0xa9,0xee,0x3b,0xd5]
-// CHECK: mrs      x9, pmevtyper22_el0        // encoding: [0xc9,0xee,0x3b,0xd5]
-// CHECK: mrs      x9, pmevtyper23_el0        // encoding: [0xe9,0xee,0x3b,0xd5]
-// CHECK: mrs      x9, pmevtyper24_el0        // encoding: [0x09,0xef,0x3b,0xd5]
-// CHECK: mrs      x9, pmevtyper25_el0        // encoding: [0x29,0xef,0x3b,0xd5]
-// CHECK: mrs      x9, pmevtyper26_el0        // encoding: [0x49,0xef,0x3b,0xd5]
-// CHECK: mrs      x9, pmevtyper27_el0        // encoding: [0x69,0xef,0x3b,0xd5]
-// CHECK: mrs      x9, pmevtyper28_el0        // encoding: [0x89,0xef,0x3b,0xd5]
-// CHECK: mrs      x9, pmevtyper29_el0        // encoding: [0xa9,0xef,0x3b,0xd5]
-// CHECK: mrs      x9, pmevtyper30_el0        // encoding: [0xc9,0xef,0x3b,0xd5]
+// CHECK: mrs      x9, {{teecr32_el1|TEECR32_EL1}}            // encoding: [0x09,0x00,0x32,0xd5]
+// CHECK: mrs      x9, {{osdtrrx_el1|OSDTRRX_EL1}}            // encoding: [0x49,0x00,0x30,0xd5]
+// CHECK: mrs      x9, {{mdccsr_el0|MDCCSR_EL0}}             // encoding: [0x09,0x01,0x33,0xd5]
+// CHECK: mrs      x9, {{mdccint_el1|MDCCINT_EL1}}            // encoding: [0x09,0x02,0x30,0xd5]
+// CHECK: mrs      x9, {{mdscr_el1|MDSCR_EL1}}              // encoding: [0x49,0x02,0x30,0xd5]
+// CHECK: mrs      x9, {{osdtrtx_el1|OSDTRTX_EL1}}            // encoding: [0x49,0x03,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgdtr_el0|DBGDTR_EL0}}             // encoding: [0x09,0x04,0x33,0xd5]
+// CHECK: mrs      x9, {{dbgdtrrx_el0|DBGDTRRX_EL0}}           // encoding: [0x09,0x05,0x33,0xd5]
+// CHECK: mrs      x9, {{oseccr_el1|OSECCR_EL1}}             // encoding: [0x49,0x06,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgvcr32_el2|DBGVCR32_EL2}}           // encoding: [0x09,0x07,0x34,0xd5]
+// CHECK: mrs      x9, {{dbgbvr0_el1|DBGBVR0_EL1}}            // encoding: [0x89,0x00,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgbvr1_el1|DBGBVR1_EL1}}            // encoding: [0x89,0x01,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgbvr2_el1|DBGBVR2_EL1}}            // encoding: [0x89,0x02,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgbvr3_el1|DBGBVR3_EL1}}            // encoding: [0x89,0x03,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgbvr4_el1|DBGBVR4_EL1}}            // encoding: [0x89,0x04,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgbvr5_el1|DBGBVR5_EL1}}            // encoding: [0x89,0x05,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgbvr6_el1|DBGBVR6_EL1}}            // encoding: [0x89,0x06,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgbvr7_el1|DBGBVR7_EL1}}            // encoding: [0x89,0x07,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgbvr8_el1|DBGBVR8_EL1}}            // encoding: [0x89,0x08,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgbvr9_el1|DBGBVR9_EL1}}            // encoding: [0x89,0x09,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgbvr10_el1|DBGBVR10_EL1}}           // encoding: [0x89,0x0a,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgbvr11_el1|DBGBVR11_EL1}}           // encoding: [0x89,0x0b,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgbvr12_el1|DBGBVR12_EL1}}           // encoding: [0x89,0x0c,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgbvr13_el1|DBGBVR13_EL1}}           // encoding: [0x89,0x0d,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgbvr14_el1|DBGBVR14_EL1}}           // encoding: [0x89,0x0e,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgbvr15_el1|DBGBVR15_EL1}}           // encoding: [0x89,0x0f,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgbcr0_el1|DBGBCR0_EL1}}            // encoding: [0xa9,0x00,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgbcr1_el1|DBGBCR1_EL1}}            // encoding: [0xa9,0x01,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgbcr2_el1|DBGBCR2_EL1}}            // encoding: [0xa9,0x02,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgbcr3_el1|DBGBCR3_EL1}}            // encoding: [0xa9,0x03,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgbcr4_el1|DBGBCR4_EL1}}            // encoding: [0xa9,0x04,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgbcr5_el1|DBGBCR5_EL1}}            // encoding: [0xa9,0x05,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgbcr6_el1|DBGBCR6_EL1}}            // encoding: [0xa9,0x06,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgbcr7_el1|DBGBCR7_EL1}}            // encoding: [0xa9,0x07,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgbcr8_el1|DBGBCR8_EL1}}            // encoding: [0xa9,0x08,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgbcr9_el1|DBGBCR9_EL1}}            // encoding: [0xa9,0x09,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgbcr10_el1|DBGBCR10_EL1}}           // encoding: [0xa9,0x0a,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgbcr11_el1|DBGBCR11_EL1}}           // encoding: [0xa9,0x0b,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgbcr12_el1|DBGBCR12_EL1}}           // encoding: [0xa9,0x0c,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgbcr13_el1|DBGBCR13_EL1}}           // encoding: [0xa9,0x0d,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgbcr14_el1|DBGBCR14_EL1}}           // encoding: [0xa9,0x0e,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgbcr15_el1|DBGBCR15_EL1}}           // encoding: [0xa9,0x0f,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgwvr0_el1|DBGWVR0_EL1}}            // encoding: [0xc9,0x00,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgwvr1_el1|DBGWVR1_EL1}}            // encoding: [0xc9,0x01,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgwvr2_el1|DBGWVR2_EL1}}            // encoding: [0xc9,0x02,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgwvr3_el1|DBGWVR3_EL1}}            // encoding: [0xc9,0x03,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgwvr4_el1|DBGWVR4_EL1}}            // encoding: [0xc9,0x04,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgwvr5_el1|DBGWVR5_EL1}}            // encoding: [0xc9,0x05,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgwvr6_el1|DBGWVR6_EL1}}            // encoding: [0xc9,0x06,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgwvr7_el1|DBGWVR7_EL1}}            // encoding: [0xc9,0x07,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgwvr8_el1|DBGWVR8_EL1}}            // encoding: [0xc9,0x08,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgwvr9_el1|DBGWVR9_EL1}}            // encoding: [0xc9,0x09,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgwvr10_el1|DBGWVR10_EL1}}           // encoding: [0xc9,0x0a,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgwvr11_el1|DBGWVR11_EL1}}           // encoding: [0xc9,0x0b,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgwvr12_el1|DBGWVR12_EL1}}           // encoding: [0xc9,0x0c,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgwvr13_el1|DBGWVR13_EL1}}           // encoding: [0xc9,0x0d,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgwvr14_el1|DBGWVR14_EL1}}           // encoding: [0xc9,0x0e,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgwvr15_el1|DBGWVR15_EL1}}           // encoding: [0xc9,0x0f,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgwcr0_el1|DBGWCR0_EL1}}            // encoding: [0xe9,0x00,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgwcr1_el1|DBGWCR1_EL1}}            // encoding: [0xe9,0x01,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgwcr2_el1|DBGWCR2_EL1}}            // encoding: [0xe9,0x02,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgwcr3_el1|DBGWCR3_EL1}}            // encoding: [0xe9,0x03,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgwcr4_el1|DBGWCR4_EL1}}            // encoding: [0xe9,0x04,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgwcr5_el1|DBGWCR5_EL1}}            // encoding: [0xe9,0x05,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgwcr6_el1|DBGWCR6_EL1}}            // encoding: [0xe9,0x06,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgwcr7_el1|DBGWCR7_EL1}}            // encoding: [0xe9,0x07,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgwcr8_el1|DBGWCR8_EL1}}            // encoding: [0xe9,0x08,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgwcr9_el1|DBGWCR9_EL1}}            // encoding: [0xe9,0x09,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgwcr10_el1|DBGWCR10_EL1}}           // encoding: [0xe9,0x0a,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgwcr11_el1|DBGWCR11_EL1}}           // encoding: [0xe9,0x0b,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgwcr12_el1|DBGWCR12_EL1}}           // encoding: [0xe9,0x0c,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgwcr13_el1|DBGWCR13_EL1}}           // encoding: [0xe9,0x0d,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgwcr14_el1|DBGWCR14_EL1}}           // encoding: [0xe9,0x0e,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgwcr15_el1|DBGWCR15_EL1}}           // encoding: [0xe9,0x0f,0x30,0xd5]
+// CHECK: mrs      x9, {{mdrar_el1|MDRAR_EL1}}              // encoding: [0x09,0x10,0x30,0xd5]
+// CHECK: mrs      x9, {{teehbr32_el1|TEEHBR32_EL1}}           // encoding: [0x09,0x10,0x32,0xd5]
+// CHECK: mrs      x9, {{oslsr_el1|OSLSR_EL1}}              // encoding: [0x89,0x11,0x30,0xd5]
+// CHECK: mrs      x9, {{osdlr_el1|OSDLR_EL1}}              // encoding: [0x89,0x13,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgprcr_el1|DBGPRCR_EL1}}            // encoding: [0x89,0x14,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgclaimset_el1|DBGCLAIMSET_EL1}}        // encoding: [0xc9,0x78,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgclaimclr_el1|DBGCLAIMCLR_EL1}}        // encoding: [0xc9,0x79,0x30,0xd5]
+// CHECK: mrs      x9, {{dbgauthstatus_el1|DBGAUTHSTATUS_EL1}}      // encoding: [0xc9,0x7e,0x30,0xd5]
+// CHECK: mrs      x9, {{midr_el1|MIDR_EL1}}               // encoding: [0x09,0x00,0x38,0xd5]
+// CHECK: mrs      x9, {{ccsidr_el1|CCSIDR_EL1}}             // encoding: [0x09,0x00,0x39,0xd5]
+// CHECK: mrs      x9, {{csselr_el1|CSSELR_EL1}}             // encoding: [0x09,0x00,0x3a,0xd5]
+// CHECK: mrs      x9, {{vpidr_el2|VPIDR_EL2}}              // encoding: [0x09,0x00,0x3c,0xd5]
+// CHECK: mrs      x9, {{clidr_el1|CLIDR_EL1}}              // encoding: [0x29,0x00,0x39,0xd5]
+// CHECK: mrs      x9, {{ctr_el0|CTR_EL0}}                // encoding: [0x29,0x00,0x3b,0xd5]
+// CHECK: mrs      x9, {{mpidr_el1|MPIDR_EL1}}              // encoding: [0xa9,0x00,0x38,0xd5]
+// CHECK: mrs      x9, {{vmpidr_el2|VMPIDR_EL2}}             // encoding: [0xa9,0x00,0x3c,0xd5]
+// CHECK: mrs      x9, {{revidr_el1|REVIDR_EL1}}             // encoding: [0xc9,0x00,0x38,0xd5]
+// CHECK: mrs      x9, {{aidr_el1|AIDR_EL1}}               // encoding: [0xe9,0x00,0x39,0xd5]
+// CHECK: mrs      x9, {{dczid_el0|DCZID_EL0}}              // encoding: [0xe9,0x00,0x3b,0xd5]
+// CHECK: mrs      x9, {{id_pfr0_el1|ID_PFR0_EL1}}            // encoding: [0x09,0x01,0x38,0xd5]
+// CHECK: mrs      x9, {{id_pfr1_el1|ID_PFR1_EL1}}            // encoding: [0x29,0x01,0x38,0xd5]
+// CHECK: mrs      x9, {{id_dfr0_el1|ID_DFR0_EL1}}            // encoding: [0x49,0x01,0x38,0xd5]
+// CHECK: mrs      x9, {{id_afr0_el1|ID_AFR0_EL1}}            // encoding: [0x69,0x01,0x38,0xd5]
+// CHECK: mrs      x9, {{id_mmfr0_el1|ID_MMFR0_EL1}}           // encoding: [0x89,0x01,0x38,0xd5]
+// CHECK: mrs      x9, {{id_mmfr1_el1|ID_MMFR1_EL1}}           // encoding: [0xa9,0x01,0x38,0xd5]
+// CHECK: mrs      x9, {{id_mmfr2_el1|ID_MMFR2_EL1}}           // encoding: [0xc9,0x01,0x38,0xd5]
+// CHECK: mrs      x9, {{id_mmfr3_el1|ID_MMFR3_EL1}}           // encoding: [0xe9,0x01,0x38,0xd5]
+// CHECK: mrs      x9, {{id_isar0_el1|ID_ISAR0_EL1}}           // encoding: [0x09,0x02,0x38,0xd5]
+// CHECK: mrs      x9, {{id_isar1_el1|ID_ISAR1_EL1}}           // encoding: [0x29,0x02,0x38,0xd5]
+// CHECK: mrs      x9, {{id_isar2_el1|ID_ISAR2_EL1}}           // encoding: [0x49,0x02,0x38,0xd5]
+// CHECK: mrs      x9, {{id_isar3_el1|ID_ISAR3_EL1}}           // encoding: [0x69,0x02,0x38,0xd5]
+// CHECK: mrs      x9, {{id_isar4_el1|ID_ISAR4_EL1}}           // encoding: [0x89,0x02,0x38,0xd5]
+// CHECK: mrs      x9, {{id_isar5_el1|ID_ISAR5_EL1}}           // encoding: [0xa9,0x02,0x38,0xd5]
+// CHECK: mrs      x9, {{mvfr0_el1|MVFR0_EL1}}              // encoding: [0x09,0x03,0x38,0xd5]
+// CHECK: mrs      x9, {{mvfr1_el1|MVFR1_EL1}}              // encoding: [0x29,0x03,0x38,0xd5]
+// CHECK: mrs      x9, {{mvfr2_el1|MVFR2_EL1}}              // encoding: [0x49,0x03,0x38,0xd5]
+// CHECK: mrs      x9, {{id_aa64pfr0_el1|ID_AA64PFR0_EL1}}        // encoding: [0x09,0x04,0x38,0xd5]
+// CHECK: mrs      x9, {{id_aa64pfr1_el1|ID_AA64PFR1_EL1}}        // encoding: [0x29,0x04,0x38,0xd5]
+// CHECK: mrs      x9, {{id_aa64dfr0_el1|ID_AA64DFR0_EL1}}        // encoding: [0x09,0x05,0x38,0xd5]
+// CHECK: mrs      x9, {{id_aa64dfr1_el1|ID_AA64DFR1_EL1}}        // encoding: [0x29,0x05,0x38,0xd5]
+// CHECK: mrs      x9, {{id_aa64afr0_el1|ID_AA64AFR0_EL1}}        // encoding: [0x89,0x05,0x38,0xd5]
+// CHECK: mrs      x9, {{id_aa64afr1_el1|ID_AA64AFR1_EL1}}        // encoding: [0xa9,0x05,0x38,0xd5]
+// CHECK: mrs      x9, {{id_aa64isar0_el1|ID_AA64ISAR0_EL1}}       // encoding: [0x09,0x06,0x38,0xd5]
+// CHECK: mrs      x9, {{id_aa64isar1_el1|ID_AA64ISAR1_EL1}}       // encoding: [0x29,0x06,0x38,0xd5]
+// CHECK: mrs      x9, {{id_aa64mmfr0_el1|ID_AA64MMFR0_EL1}}       // encoding: [0x09,0x07,0x38,0xd5]
+// CHECK: mrs      x9, {{id_aa64mmfr1_el1|ID_AA64MMFR1_EL1}}       // encoding: [0x29,0x07,0x38,0xd5]
+// CHECK: mrs      x9, {{sctlr_el1|SCTLR_EL1}}              // encoding: [0x09,0x10,0x38,0xd5]
+// CHECK: mrs      x9, {{sctlr_el2|SCTLR_EL2}}              // encoding: [0x09,0x10,0x3c,0xd5]
+// CHECK: mrs      x9, {{sctlr_el3|SCTLR_EL3}}              // encoding: [0x09,0x10,0x3e,0xd5]
+// CHECK: mrs      x9, {{actlr_el1|ACTLR_EL1}}              // encoding: [0x29,0x10,0x38,0xd5]
+// CHECK: mrs      x9, {{actlr_el2|ACTLR_EL2}}              // encoding: [0x29,0x10,0x3c,0xd5]
+// CHECK: mrs      x9, {{actlr_el3|ACTLR_EL3}}              // encoding: [0x29,0x10,0x3e,0xd5]
+// CHECK: mrs      x9, {{cpacr_el1|CPACR_EL1}}              // encoding: [0x49,0x10,0x38,0xd5]
+// CHECK: mrs      x9, {{hcr_el2|HCR_EL2}}                // encoding: [0x09,0x11,0x3c,0xd5]
+// CHECK: mrs      x9, {{scr_el3|SCR_EL3}}                // encoding: [0x09,0x11,0x3e,0xd5]
+// CHECK: mrs      x9, {{mdcr_el2|MDCR_EL2}}               // encoding: [0x29,0x11,0x3c,0xd5]
+// CHECK: mrs      x9, {{sder32_el3|SDER32_EL3}}             // encoding: [0x29,0x11,0x3e,0xd5]
+// CHECK: mrs      x9, {{cptr_el2|CPTR_EL2}}               // encoding: [0x49,0x11,0x3c,0xd5]
+// CHECK: mrs      x9, {{cptr_el3|CPTR_EL3}}               // encoding: [0x49,0x11,0x3e,0xd5]
+// CHECK: mrs      x9, {{hstr_el2|HSTR_EL2}}               // encoding: [0x69,0x11,0x3c,0xd5]
+// CHECK: mrs      x9, {{hacr_el2|HACR_EL2}}               // encoding: [0xe9,0x11,0x3c,0xd5]
+// CHECK: mrs      x9, {{mdcr_el3|MDCR_EL3}}               // encoding: [0x29,0x13,0x3e,0xd5]
+// CHECK: mrs      x9, {{ttbr0_el1|TTBR0_EL1}}              // encoding: [0x09,0x20,0x38,0xd5]
+// CHECK: mrs      x9, {{ttbr0_el2|TTBR0_EL2}}              // encoding: [0x09,0x20,0x3c,0xd5]
+// CHECK: mrs      x9, {{ttbr0_el3|TTBR0_EL3}}              // encoding: [0x09,0x20,0x3e,0xd5]
+// CHECK: mrs      x9, {{ttbr1_el1|TTBR1_EL1}}              // encoding: [0x29,0x20,0x38,0xd5]
+// CHECK: mrs      x9, {{tcr_el1|TCR_EL1}}                // encoding: [0x49,0x20,0x38,0xd5]
+// CHECK: mrs      x9, {{tcr_el2|TCR_EL2}}                // encoding: [0x49,0x20,0x3c,0xd5]
+// CHECK: mrs      x9, {{tcr_el3|TCR_EL3}}                // encoding: [0x49,0x20,0x3e,0xd5]
+// CHECK: mrs      x9, {{vttbr_el2|VTTBR_EL2}}              // encoding: [0x09,0x21,0x3c,0xd5]
+// CHECK: mrs      x9, {{vtcr_el2|VTCR_EL2}}               // encoding: [0x49,0x21,0x3c,0xd5]
+// CHECK: mrs      x9, {{dacr32_el2|DACR32_EL2}}             // encoding: [0x09,0x30,0x3c,0xd5]
+// CHECK: mrs      x9, {{spsr_el1|SPSR_EL1}}               // encoding: [0x09,0x40,0x38,0xd5]
+// CHECK: mrs      x9, {{spsr_el2|SPSR_EL2}}               // encoding: [0x09,0x40,0x3c,0xd5]
+// CHECK: mrs      x9, {{spsr_el3|SPSR_EL3}}               // encoding: [0x09,0x40,0x3e,0xd5]
+// CHECK: mrs      x9, {{elr_el1|ELR_EL1}}                // encoding: [0x29,0x40,0x38,0xd5]
+// CHECK: mrs      x9, {{elr_el2|ELR_EL2}}                // encoding: [0x29,0x40,0x3c,0xd5]
+// CHECK: mrs      x9, {{elr_el3|ELR_EL3}}                // encoding: [0x29,0x40,0x3e,0xd5]
+// CHECK: mrs      x9, {{sp_el0|SP_EL0}}                 // encoding: [0x09,0x41,0x38,0xd5]
+// CHECK: mrs      x9, {{sp_el1|SP_EL1}}                 // encoding: [0x09,0x41,0x3c,0xd5]
+// CHECK: mrs      x9, {{sp_el2|SP_EL2}}                 // encoding: [0x09,0x41,0x3e,0xd5]
+// CHECK: mrs      x9, {{spsel|SPSEL}}                  // encoding: [0x09,0x42,0x38,0xd5]
+// CHECK: mrs      x9, {{nzcv|NZCV}}                   // encoding: [0x09,0x42,0x3b,0xd5]
+// CHECK: mrs      x9, {{daif|DAIF}}                   // encoding: [0x29,0x42,0x3b,0xd5]
+// CHECK: mrs      x9, {{currentel|CURRENTEL}}              // encoding: [0x49,0x42,0x38,0xd5]
+// CHECK: mrs      x9, {{spsr_irq|SPSR_IRQ}}               // encoding: [0x09,0x43,0x3c,0xd5]
+// CHECK: mrs      x9, {{spsr_abt|SPSR_ABT}}               // encoding: [0x29,0x43,0x3c,0xd5]
+// CHECK: mrs      x9, {{spsr_und|SPSR_UND}}               // encoding: [0x49,0x43,0x3c,0xd5]
+// CHECK: mrs      x9, {{spsr_fiq|SPSR_FIQ}}               // encoding: [0x69,0x43,0x3c,0xd5]
+// CHECK: mrs      x9, {{fpcr|FPCR}}                   // encoding: [0x09,0x44,0x3b,0xd5]
+// CHECK: mrs      x9, {{fpsr|FPSR}}                   // encoding: [0x29,0x44,0x3b,0xd5]
+// CHECK: mrs      x9, {{dspsr_el0|DSPSR_EL0}}              // encoding: [0x09,0x45,0x3b,0xd5]
+// CHECK: mrs      x9, {{dlr_el0|DLR_EL0}}                // encoding: [0x29,0x45,0x3b,0xd5]
+// CHECK: mrs      x9, {{ifsr32_el2|IFSR32_EL2}}             // encoding: [0x29,0x50,0x3c,0xd5]
+// CHECK: mrs      x9, {{afsr0_el1|AFSR0_EL1}}              // encoding: [0x09,0x51,0x38,0xd5]
+// CHECK: mrs      x9, {{afsr0_el2|AFSR0_EL2}}              // encoding: [0x09,0x51,0x3c,0xd5]
+// CHECK: mrs      x9, {{afsr0_el3|AFSR0_EL3}}              // encoding: [0x09,0x51,0x3e,0xd5]
+// CHECK: mrs      x9, {{afsr1_el1|AFSR1_EL1}}              // encoding: [0x29,0x51,0x38,0xd5]
+// CHECK: mrs      x9, {{afsr1_el2|AFSR1_EL2}}              // encoding: [0x29,0x51,0x3c,0xd5]
+// CHECK: mrs      x9, {{afsr1_el3|AFSR1_EL3}}              // encoding: [0x29,0x51,0x3e,0xd5]
+// CHECK: mrs      x9, {{esr_el1|ESR_EL1}}                // encoding: [0x09,0x52,0x38,0xd5]
+// CHECK: mrs      x9, {{esr_el2|ESR_EL2}}                // encoding: [0x09,0x52,0x3c,0xd5]
+// CHECK: mrs      x9, {{esr_el3|ESR_EL3}}                // encoding: [0x09,0x52,0x3e,0xd5]
+// CHECK: mrs      x9, {{fpexc32_el2|FPEXC32_EL2}}            // encoding: [0x09,0x53,0x3c,0xd5]
+// CHECK: mrs      x9, {{far_el1|FAR_EL1}}                // encoding: [0x09,0x60,0x38,0xd5]
+// CHECK: mrs      x9, {{far_el2|FAR_EL2}}                // encoding: [0x09,0x60,0x3c,0xd5]
+// CHECK: mrs      x9, {{far_el3|FAR_EL3}}                // encoding: [0x09,0x60,0x3e,0xd5]
+// CHECK: mrs      x9, {{hpfar_el2|HPFAR_EL2}}              // encoding: [0x89,0x60,0x3c,0xd5]
+// CHECK: mrs      x9, {{par_el1|PAR_EL1}}                // encoding: [0x09,0x74,0x38,0xd5]
+// CHECK: mrs      x9, {{pmcr_el0|PMCR_EL0}}               // encoding: [0x09,0x9c,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmcntenset_el0|PMCNTENSET_EL0}}         // encoding: [0x29,0x9c,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmcntenclr_el0|PMCNTENCLR_EL0}}         // encoding: [0x49,0x9c,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmovsclr_el0|PMOVSCLR_EL0}}           // encoding: [0x69,0x9c,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmselr_el0|PMSELR_EL0}}             // encoding: [0xa9,0x9c,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmceid0_el0|PMCEID0_EL0}}            // encoding: [0xc9,0x9c,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmceid1_el0|PMCEID1_EL0}}            // encoding: [0xe9,0x9c,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmccntr_el0|PMCCNTR_EL0}}            // encoding: [0x09,0x9d,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmxevtyper_el0|PMXEVTYPER_EL0}}         // encoding: [0x29,0x9d,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmxevcntr_el0|PMXEVCNTR_EL0}}          // encoding: [0x49,0x9d,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmuserenr_el0|PMUSERENR_EL0}}          // encoding: [0x09,0x9e,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmintenset_el1|PMINTENSET_EL1}}         // encoding: [0x29,0x9e,0x38,0xd5]
+// CHECK: mrs      x9, {{pmintenclr_el1|PMINTENCLR_EL1}}         // encoding: [0x49,0x9e,0x38,0xd5]
+// CHECK: mrs      x9, {{pmovsset_el0|PMOVSSET_EL0}}           // encoding: [0x69,0x9e,0x3b,0xd5]
+// CHECK: mrs      x9, {{mair_el1|MAIR_EL1}}               // encoding: [0x09,0xa2,0x38,0xd5]
+// CHECK: mrs      x9, {{mair_el2|MAIR_EL2}}               // encoding: [0x09,0xa2,0x3c,0xd5]
+// CHECK: mrs      x9, {{mair_el3|MAIR_EL3}}               // encoding: [0x09,0xa2,0x3e,0xd5]
+// CHECK: mrs      x9, {{amair_el1|AMAIR_EL1}}              // encoding: [0x09,0xa3,0x38,0xd5]
+// CHECK: mrs      x9, {{amair_el2|AMAIR_EL2}}              // encoding: [0x09,0xa3,0x3c,0xd5]
+// CHECK: mrs      x9, {{amair_el3|AMAIR_EL3}}              // encoding: [0x09,0xa3,0x3e,0xd5]
+// CHECK: mrs      x9, {{vbar_el1|VBAR_EL1}}               // encoding: [0x09,0xc0,0x38,0xd5]
+// CHECK: mrs      x9, {{vbar_el2|VBAR_EL2}}               // encoding: [0x09,0xc0,0x3c,0xd5]
+// CHECK: mrs      x9, {{vbar_el3|VBAR_EL3}}               // encoding: [0x09,0xc0,0x3e,0xd5]
+// CHECK: mrs      x9, {{rvbar_el1|RVBAR_EL1}}              // encoding: [0x29,0xc0,0x38,0xd5]
+// CHECK: mrs      x9, {{rvbar_el2|RVBAR_EL2}}              // encoding: [0x29,0xc0,0x3c,0xd5]
+// CHECK: mrs      x9, {{rvbar_el3|RVBAR_EL3}}              // encoding: [0x29,0xc0,0x3e,0xd5]
+// CHECK: mrs      x9, {{rmr_el1|RMR_EL1}}                // encoding: [0x49,0xc0,0x38,0xd5]
+// CHECK: mrs      x9, {{rmr_el2|RMR_EL2}}                // encoding: [0x49,0xc0,0x3c,0xd5]
+// CHECK: mrs      x9, {{rmr_el3|RMR_EL3}}                // encoding: [0x49,0xc0,0x3e,0xd5]
+// CHECK: mrs      x9, {{isr_el1|ISR_EL1}}                // encoding: [0x09,0xc1,0x38,0xd5]
+// CHECK: mrs      x9, {{contextidr_el1|CONTEXTIDR_EL1}}         // encoding: [0x29,0xd0,0x38,0xd5]
+// CHECK: mrs      x9, {{tpidr_el0|TPIDR_EL0}}              // encoding: [0x49,0xd0,0x3b,0xd5]
+// CHECK: mrs      x9, {{tpidr_el2|TPIDR_EL2}}              // encoding: [0x49,0xd0,0x3c,0xd5]
+// CHECK: mrs      x9, {{tpidr_el3|TPIDR_EL3}}              // encoding: [0x49,0xd0,0x3e,0xd5]
+// CHECK: mrs      x9, {{tpidrro_el0|TPIDRRO_EL0}}            // encoding: [0x69,0xd0,0x3b,0xd5]
+// CHECK: mrs      x9, {{tpidr_el1|TPIDR_EL1}}              // encoding: [0x89,0xd0,0x38,0xd5]
+// CHECK: mrs      x9, {{cntfrq_el0|CNTFRQ_EL0}}             // encoding: [0x09,0xe0,0x3b,0xd5]
+// CHECK: mrs      x9, {{cntpct_el0|CNTPCT_EL0}}             // encoding: [0x29,0xe0,0x3b,0xd5]
+// CHECK: mrs      x9, {{cntvct_el0|CNTVCT_EL0}}             // encoding: [0x49,0xe0,0x3b,0xd5]
+// CHECK: mrs      x9, {{cntvoff_el2|CNTVOFF_EL2}}            // encoding: [0x69,0xe0,0x3c,0xd5]
+// CHECK: mrs      x9, {{cntkctl_el1|CNTKCTL_EL1}}            // encoding: [0x09,0xe1,0x38,0xd5]
+// CHECK: mrs      x9, {{cnthctl_el2|CNTHCTL_EL2}}            // encoding: [0x09,0xe1,0x3c,0xd5]
+// CHECK: mrs      x9, {{cntp_tval_el0|CNTP_TVAL_EL0}}          // encoding: [0x09,0xe2,0x3b,0xd5]
+// CHECK: mrs      x9, {{cnthp_tval_el2|CNTHP_TVAL_EL2}}         // encoding: [0x09,0xe2,0x3c,0xd5]
+// CHECK: mrs      x9, {{cntps_tval_el1|CNTPS_TVAL_EL1}}         // encoding: [0x09,0xe2,0x3f,0xd5]
+// CHECK: mrs      x9, {{cntp_ctl_el0|CNTP_CTL_EL0}}           // encoding: [0x29,0xe2,0x3b,0xd5]
+// CHECK: mrs      x9, {{cnthp_ctl_el2|CNTHP_CTL_EL2}}          // encoding: [0x29,0xe2,0x3c,0xd5]
+// CHECK: mrs      x9, {{cntps_ctl_el1|CNTPS_CTL_EL1}}          // encoding: [0x29,0xe2,0x3f,0xd5]
+// CHECK: mrs      x9, {{cntp_cval_el0|CNTP_CVAL_EL0}}          // encoding: [0x49,0xe2,0x3b,0xd5]
+// CHECK: mrs      x9, {{cnthp_cval_el2|CNTHP_CVAL_EL2}}         // encoding: [0x49,0xe2,0x3c,0xd5]
+// CHECK: mrs      x9, {{cntps_cval_el1|CNTPS_CVAL_EL1}}         // encoding: [0x49,0xe2,0x3f,0xd5]
+// CHECK: mrs      x9, {{cntv_tval_el0|CNTV_TVAL_EL0}}          // encoding: [0x09,0xe3,0x3b,0xd5]
+// CHECK: mrs      x9, {{cntv_ctl_el0|CNTV_CTL_EL0}}           // encoding: [0x29,0xe3,0x3b,0xd5]
+// CHECK: mrs      x9, {{cntv_cval_el0|CNTV_CVAL_EL0}}          // encoding: [0x49,0xe3,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevcntr0_el0|PMEVCNTR0_EL0}}          // encoding: [0x09,0xe8,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevcntr1_el0|PMEVCNTR1_EL0}}          // encoding: [0x29,0xe8,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevcntr2_el0|PMEVCNTR2_EL0}}          // encoding: [0x49,0xe8,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevcntr3_el0|PMEVCNTR3_EL0}}          // encoding: [0x69,0xe8,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevcntr4_el0|PMEVCNTR4_EL0}}          // encoding: [0x89,0xe8,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevcntr5_el0|PMEVCNTR5_EL0}}          // encoding: [0xa9,0xe8,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevcntr6_el0|PMEVCNTR6_EL0}}          // encoding: [0xc9,0xe8,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevcntr7_el0|PMEVCNTR7_EL0}}          // encoding: [0xe9,0xe8,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevcntr8_el0|PMEVCNTR8_EL0}}          // encoding: [0x09,0xe9,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevcntr9_el0|PMEVCNTR9_EL0}}          // encoding: [0x29,0xe9,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevcntr10_el0|PMEVCNTR10_EL0}}         // encoding: [0x49,0xe9,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevcntr11_el0|PMEVCNTR11_EL0}}         // encoding: [0x69,0xe9,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevcntr12_el0|PMEVCNTR12_EL0}}         // encoding: [0x89,0xe9,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevcntr13_el0|PMEVCNTR13_EL0}}         // encoding: [0xa9,0xe9,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevcntr14_el0|PMEVCNTR14_EL0}}         // encoding: [0xc9,0xe9,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevcntr15_el0|PMEVCNTR15_EL0}}         // encoding: [0xe9,0xe9,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevcntr16_el0|PMEVCNTR16_EL0}}         // encoding: [0x09,0xea,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevcntr17_el0|PMEVCNTR17_EL0}}         // encoding: [0x29,0xea,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevcntr18_el0|PMEVCNTR18_EL0}}         // encoding: [0x49,0xea,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevcntr19_el0|PMEVCNTR19_EL0}}         // encoding: [0x69,0xea,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevcntr20_el0|PMEVCNTR20_EL0}}         // encoding: [0x89,0xea,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevcntr21_el0|PMEVCNTR21_EL0}}         // encoding: [0xa9,0xea,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevcntr22_el0|PMEVCNTR22_EL0}}         // encoding: [0xc9,0xea,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevcntr23_el0|PMEVCNTR23_EL0}}         // encoding: [0xe9,0xea,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevcntr24_el0|PMEVCNTR24_EL0}}         // encoding: [0x09,0xeb,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevcntr25_el0|PMEVCNTR25_EL0}}         // encoding: [0x29,0xeb,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevcntr26_el0|PMEVCNTR26_EL0}}         // encoding: [0x49,0xeb,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevcntr27_el0|PMEVCNTR27_EL0}}         // encoding: [0x69,0xeb,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevcntr28_el0|PMEVCNTR28_EL0}}         // encoding: [0x89,0xeb,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevcntr29_el0|PMEVCNTR29_EL0}}         // encoding: [0xa9,0xeb,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevcntr30_el0|PMEVCNTR30_EL0}}         // encoding: [0xc9,0xeb,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmccfiltr_el0|PMCCFILTR_EL0}}          // encoding: [0xe9,0xef,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevtyper0_el0|PMEVTYPER0_EL0}}         // encoding: [0x09,0xec,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevtyper1_el0|PMEVTYPER1_EL0}}         // encoding: [0x29,0xec,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevtyper2_el0|PMEVTYPER2_EL0}}         // encoding: [0x49,0xec,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevtyper3_el0|PMEVTYPER3_EL0}}         // encoding: [0x69,0xec,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevtyper4_el0|PMEVTYPER4_EL0}}         // encoding: [0x89,0xec,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevtyper5_el0|PMEVTYPER5_EL0}}         // encoding: [0xa9,0xec,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevtyper6_el0|PMEVTYPER6_EL0}}         // encoding: [0xc9,0xec,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevtyper7_el0|PMEVTYPER7_EL0}}         // encoding: [0xe9,0xec,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevtyper8_el0|PMEVTYPER8_EL0}}         // encoding: [0x09,0xed,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevtyper9_el0|PMEVTYPER9_EL0}}         // encoding: [0x29,0xed,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevtyper10_el0|PMEVTYPER10_EL0}}        // encoding: [0x49,0xed,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevtyper11_el0|PMEVTYPER11_EL0}}        // encoding: [0x69,0xed,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevtyper12_el0|PMEVTYPER12_EL0}}        // encoding: [0x89,0xed,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevtyper13_el0|PMEVTYPER13_EL0}}        // encoding: [0xa9,0xed,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevtyper14_el0|PMEVTYPER14_EL0}}        // encoding: [0xc9,0xed,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevtyper15_el0|PMEVTYPER15_EL0}}        // encoding: [0xe9,0xed,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevtyper16_el0|PMEVTYPER16_EL0}}        // encoding: [0x09,0xee,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevtyper17_el0|PMEVTYPER17_EL0}}        // encoding: [0x29,0xee,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevtyper18_el0|PMEVTYPER18_EL0}}        // encoding: [0x49,0xee,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevtyper19_el0|PMEVTYPER19_EL0}}        // encoding: [0x69,0xee,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevtyper20_el0|PMEVTYPER20_EL0}}        // encoding: [0x89,0xee,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevtyper21_el0|PMEVTYPER21_EL0}}        // encoding: [0xa9,0xee,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevtyper22_el0|PMEVTYPER22_EL0}}        // encoding: [0xc9,0xee,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevtyper23_el0|PMEVTYPER23_EL0}}        // encoding: [0xe9,0xee,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevtyper24_el0|PMEVTYPER24_EL0}}        // encoding: [0x09,0xef,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevtyper25_el0|PMEVTYPER25_EL0}}        // encoding: [0x29,0xef,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevtyper26_el0|PMEVTYPER26_EL0}}        // encoding: [0x49,0xef,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevtyper27_el0|PMEVTYPER27_EL0}}        // encoding: [0x69,0xef,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevtyper28_el0|PMEVTYPER28_EL0}}        // encoding: [0x89,0xef,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevtyper29_el0|PMEVTYPER29_EL0}}        // encoding: [0xa9,0xef,0x3b,0xd5]
+// CHECK: mrs      x9, {{pmevtyper30_el0|PMEVTYPER30_EL0}}        // encoding: [0xc9,0xef,0x3b,0xd5]
 
         mrs x12, s3_7_c15_c1_5
         mrs x13, s3_2_c11_c15_7
         msr s3_0_c15_c0_0, x12
         msr s3_7_c11_c13_7, x5
-// CHECK: mrs     x12, s3_7_c15_c1_5      // encoding: [0xac,0xf1,0x3f,0xd5]
-// CHECK: mrs     x13, s3_2_c11_c15_7     // encoding: [0xed,0xbf,0x3a,0xd5]
-// CHECK: msr     s3_0_c15_c0_0, x12      // encoding: [0x0c,0xf0,0x18,0xd5]
-// CHECK: msr     s3_7_c11_c13_7, x5      // encoding: [0xe5,0xbd,0x1f,0xd5]
+// CHECK: mrs     x12, {{s3_7_c15_c1_5|S3_7_C15_C1_5}}      // encoding: [0xac,0xf1,0x3f,0xd5]
+// CHECK: mrs     x13, {{s3_2_c11_c15_7|S3_2_C11_C15_7}}     // encoding: [0xed,0xbf,0x3a,0xd5]
+// CHECK: msr     {{s3_0_c15_c0_0|S3_0_C15_C0_0}}, x12      // encoding: [0x0c,0xf0,0x18,0xd5]
+// CHECK: msr     {{s3_7_c11_c13_7|S3_7_C11_C13_7}}, x5      // encoding: [0xe5,0xbd,0x1f,0xd5]
 
 //------------------------------------------------------------------------------
 // Unconditional branch (immediate)
@@ -4814,22 +4781,25 @@ _func:
         tbz x5, #0, somewhere
         tbz xzr, #63, elsewhere
         tbnz x5, #45, nowhere
-// CHECK: tbz     x5, #0, somewhere       // encoding: [0x05'A',A,A,0x36'A']
-// CHECK:                                 //   fixup A - offset: 0, value: somewhere, kind: fixup_a64_tstbr
-// CHECK: tbz     xzr, #63, elsewhere     // encoding: [0x1f'A',A,0xf8'A',0xb6'A']
-// CHECK:                                 //   fixup A - offset: 0, value: elsewhere, kind: fixup_a64_tstbr
-// CHECK: tbnz    x5, #45, nowhere        // encoding: [0x05'A',A,0x68'A',0xb7'A']
-// CHECK:                                 //   fixup A - offset: 0, value: nowhere, kind: fixup_a64_tstbr
+
+// CHECK: tbz    w5, #0, somewhere       // encoding: [0bAAA00101,A,0b00000AAA,0x36]
+// CHECK:                                //   fixup A - offset: 0, value: somewhere, kind: fixup_aarch64_pcrel_branch14
+// CHECK: tbz    xzr, #63, elsewhere     // encoding: [0bAAA11111,A,0b11111AAA,0xb6]
+// CHECK:                                //   fixup A - offset: 0, value: elsewhere, kind: fixup_aarch64_pcrel_branch14
+// CHECK: tbnz   x5, #45, nowhere        // encoding: [0bAAA00101,A,0b01101AAA,0xb7]
+// CHECK:                                //   fixup A - offset: 0, value: nowhere, kind: fixup_aarch64_pcrel_branch14
+
 
         tbnz w3, #2, there
         tbnz wzr, #31, nowhere
         tbz w5, #12, anywhere
-// CHECK: tbnz    w3, #2, there           // encoding: [0x03'A',A,0x10'A',0x37'A']
-// CHECK:                                 //   fixup A - offset: 0, value: there, kind: fixup_a64_tstbr
-// CHECK: tbnz    wzr, #31, nowhere       // encoding: [0x1f'A',A,0xf8'A',0x37'A']
-// CHECK:                                 //   fixup A - offset: 0, value: nowhere, kind: fixup_a64_tstbr
-// CHECK: tbz     w5, #12, anywhere       // encoding: [0x05'A',A,0x60'A',0x36'A']
-// CHECK:                                 //   fixup A - offset: 0, value: anywhere, kind: fixup_a64_tstbr
+
+// CHECK: tbnz    w3, #2, there           // encoding: [0bAAA00011,A,0b00010AAA,0x37]
+// CHECK:                                 //   fixup A - offset: 0, value: there, kind: fixup_aarch64_pcrel_branch14
+// CHECK: tbnz    wzr, #31, nowhere       // encoding: [0bAAA11111,A,0b11111AAA,0x37]
+// CHECK:                                 //   fixup A - offset: 0, value: nowhere, kind: fixup_aarch64_pcrel_branch14
+// CHECK: tbz     w5, #12, anywhere       // encoding: [0bAAA00101,A,0b01100AAA,0x36]
+// CHECK:                                 //   fixup A - offset: 0, value: anywhere, kind: fixup_aarch64_pcrel_branch14
 
 //------------------------------------------------------------------------------
 // Unconditional branch (immediate)
@@ -4837,10 +4807,11 @@ _func:
 
         b somewhere
         bl elsewhere
-// CHECK: b       somewhere               // encoding: [A,A,A,0x14'A']
-// CHECK:                                 //   fixup A - offset: 0, value: somewhere, kind: fixup_a64_uncondbr
-// CHECK: bl      elsewhere               // encoding: [A,A,A,0x94'A']
-// CHECK:                                 //   fixup A - offset: 0, value: elsewhere, kind: fixup_a64_call
+
+// CHECK: b    somewhere               // encoding: [A,A,A,0b000101AA]
+// CHECK:                              //   fixup A - offset: 0, value: somewhere, kind: fixup_aarch64_pcrel_branch26
+// CHECK: bl    elsewhere               // encoding: [A,A,A,0b100101AA]
+// CHECK:                               //   fixup A - offset: 0, value: elsewhere, kind: fixup_aarch64_pcrel_call26
 
         b #4
         bl #0
diff --git a/test/MC/AArch64/elf-globaladdress.ll b/test/MC/AArch64/elf-globaladdress.ll
index bc43113..7d031e6 100644
--- a/test/MC/AArch64/elf-globaladdress.ll
+++ b/test/MC/AArch64/elf-globaladdress.ll
@@ -3,7 +3,7 @@
 
 ; Also take it on a round-trip through llvm-mc to stretch assembly-parsing's legs:
 ;; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | \
-;; RUN:     llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj -o - | \
+;; RUN:     llvm-mc -triple=arm64-none-linux-gnu -filetype=obj -o - | \
 ;; RUN:     llvm-readobj -h -r | FileCheck -check-prefix=OBJ %s
 
 @var8 = global i8 0
diff --git a/test/MC/AArch64/elf-reloc-addend.s b/test/MC/AArch64/elf-reloc-addend.s
deleted file mode 100644
index 0e7e2ca..0000000
--- a/test/MC/AArch64/elf-reloc-addend.s
+++ /dev/null
@@ -1,8 +0,0 @@
-// RUN: llvm-mc -triple=aarch64-linux-gnu -filetype=obj -o - %s | llvm-objdump -triple=aarch64-linux-gnu -r - | FileCheck %s
-
-	add x0, x4, #:lo12:sym
-// CHECK: 0 R_AARCH64_ADD_ABS_LO12_NC sym
-	add x3, x5, #:lo12:sym+1
-// CHECK: 4 R_AARCH64_ADD_ABS_LO12_NC sym+1
-	add x3, x5, #:lo12:sym-1
-// CHECK: 8 R_AARCH64_ADD_ABS_LO12_NC sym-1
diff --git a/test/MC/AArch64/elf-reloc-condbr.s b/test/MC/AArch64/elf-reloc-condbr.s
deleted file mode 100644
index b70dfa7..0000000
--- a/test/MC/AArch64/elf-reloc-condbr.s
+++ /dev/null
@@ -1,10 +0,0 @@
-// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o - | \
-// RUN:   llvm-readobj -r | FileCheck -check-prefix=OBJ %s
-
-        b.eq somewhere
-
-// OBJ:      Relocations [
-// OBJ-NEXT:   Section (2) .rela.text {
-// OBJ-NEXT:     0x0 R_AARCH64_CONDBR19 somewhere 0x0
-// OBJ-NEXT:   }
-// OBJ-NEXT: ]
diff --git a/test/MC/AArch64/gicv3-regs.s b/test/MC/AArch64/gicv3-regs.s
index f777651..0f5742e 100644
--- a/test/MC/AArch64/gicv3-regs.s
+++ b/test/MC/AArch64/gicv3-regs.s
@@ -56,62 +56,62 @@
         mrs x8, ich_lr13_el2
         mrs x2, ich_lr14_el2
         mrs x8, ich_lr15_el2
-// CHECK: mrs      x8, icc_iar1_el1           // encoding: [0x08,0xcc,0x38,0xd5]
-// CHECK: mrs      x26, icc_iar0_el1          // encoding: [0x1a,0xc8,0x38,0xd5]
-// CHECK: mrs      x2, icc_hppir1_el1         // encoding: [0x42,0xcc,0x38,0xd5]
-// CHECK: mrs      x17, icc_hppir0_el1        // encoding: [0x51,0xc8,0x38,0xd5]
-// CHECK: mrs      x29, icc_rpr_el1           // encoding: [0x7d,0xcb,0x38,0xd5]
-// CHECK: mrs      x4, ich_vtr_el2            // encoding: [0x24,0xcb,0x3c,0xd5]
-// CHECK: mrs      x24, ich_eisr_el2          // encoding: [0x78,0xcb,0x3c,0xd5]
-// CHECK: mrs      x9, ich_elsr_el2           // encoding: [0xa9,0xcb,0x3c,0xd5]
-// CHECK: mrs      x24, icc_bpr1_el1          // encoding: [0x78,0xcc,0x38,0xd5]
-// CHECK: mrs      x14, icc_bpr0_el1          // encoding: [0x6e,0xc8,0x38,0xd5]
-// CHECK: mrs      x19, icc_pmr_el1           // encoding: [0x13,0x46,0x38,0xd5]
-// CHECK: mrs      x23, icc_ctlr_el1          // encoding: [0x97,0xcc,0x38,0xd5]
-// CHECK: mrs      x20, icc_ctlr_el3          // encoding: [0x94,0xcc,0x3e,0xd5]
-// CHECK: mrs      x28, icc_sre_el1           // encoding: [0xbc,0xcc,0x38,0xd5]
-// CHECK: mrs      x25, icc_sre_el2           // encoding: [0xb9,0xc9,0x3c,0xd5]
-// CHECK: mrs      x8, icc_sre_el3            // encoding: [0xa8,0xcc,0x3e,0xd5]
-// CHECK: mrs      x22, icc_igrpen0_el1       // encoding: [0xd6,0xcc,0x38,0xd5]
-// CHECK: mrs      x5, icc_igrpen1_el1        // encoding: [0xe5,0xcc,0x38,0xd5]
-// CHECK: mrs      x7, icc_igrpen1_el3        // encoding: [0xe7,0xcc,0x3e,0xd5]
-// CHECK: mrs      x22, icc_seien_el1         // encoding: [0x16,0xcd,0x38,0xd5]
-// CHECK: mrs      x4, icc_ap0r0_el1          // encoding: [0x84,0xc8,0x38,0xd5]
-// CHECK: mrs      x11, icc_ap0r1_el1         // encoding: [0xab,0xc8,0x38,0xd5]
-// CHECK: mrs      x27, icc_ap0r2_el1         // encoding: [0xdb,0xc8,0x38,0xd5]
-// CHECK: mrs      x21, icc_ap0r3_el1         // encoding: [0xf5,0xc8,0x38,0xd5]
-// CHECK: mrs      x2, icc_ap1r0_el1          // encoding: [0x02,0xc9,0x38,0xd5]
-// CHECK: mrs      x21, icc_ap1r1_el1         // encoding: [0x35,0xc9,0x38,0xd5]
-// CHECK: mrs      x10, icc_ap1r2_el1         // encoding: [0x4a,0xc9,0x38,0xd5]
-// CHECK: mrs      x27, icc_ap1r3_el1         // encoding: [0x7b,0xc9,0x38,0xd5]
-// CHECK: mrs      x20, ich_ap0r0_el2         // encoding: [0x14,0xc8,0x3c,0xd5]
-// CHECK: mrs      x21, ich_ap0r1_el2         // encoding: [0x35,0xc8,0x3c,0xd5]
-// CHECK: mrs      x5, ich_ap0r2_el2          // encoding: [0x45,0xc8,0x3c,0xd5]
-// CHECK: mrs      x4, ich_ap0r3_el2          // encoding: [0x64,0xc8,0x3c,0xd5]
-// CHECK: mrs      x15, ich_ap1r0_el2         // encoding: [0x0f,0xc9,0x3c,0xd5]
-// CHECK: mrs      x12, ich_ap1r1_el2         // encoding: [0x2c,0xc9,0x3c,0xd5]
-// CHECK: mrs      x27, ich_ap1r2_el2         // encoding: [0x5b,0xc9,0x3c,0xd5]
-// CHECK: mrs      x20, ich_ap1r3_el2         // encoding: [0x74,0xc9,0x3c,0xd5]
-// CHECK: mrs      x10, ich_hcr_el2           // encoding: [0x0a,0xcb,0x3c,0xd5]
-// CHECK: mrs      x27, ich_misr_el2          // encoding: [0x5b,0xcb,0x3c,0xd5]
-// CHECK: mrs      x6, ich_vmcr_el2           // encoding: [0xe6,0xcb,0x3c,0xd5]
-// CHECK: mrs      x19, ich_vseir_el2         // encoding: [0x93,0xc9,0x3c,0xd5]
-// CHECK: mrs      x3, ich_lr0_el2            // encoding: [0x03,0xcc,0x3c,0xd5]
-// CHECK: mrs      x1, ich_lr1_el2            // encoding: [0x21,0xcc,0x3c,0xd5]
-// CHECK: mrs      x22, ich_lr2_el2           // encoding: [0x56,0xcc,0x3c,0xd5]
-// CHECK: mrs      x21, ich_lr3_el2           // encoding: [0x75,0xcc,0x3c,0xd5]
-// CHECK: mrs      x6, ich_lr4_el2            // encoding: [0x86,0xcc,0x3c,0xd5]
-// CHECK: mrs      x10, ich_lr5_el2           // encoding: [0xaa,0xcc,0x3c,0xd5]
-// CHECK: mrs      x11, ich_lr6_el2           // encoding: [0xcb,0xcc,0x3c,0xd5]
-// CHECK: mrs      x12, ich_lr7_el2           // encoding: [0xec,0xcc,0x3c,0xd5]
-// CHECK: mrs      x0, ich_lr8_el2            // encoding: [0x00,0xcd,0x3c,0xd5]
-// CHECK: mrs      x21, ich_lr9_el2           // encoding: [0x35,0xcd,0x3c,0xd5]
-// CHECK: mrs      x13, ich_lr10_el2          // encoding: [0x4d,0xcd,0x3c,0xd5]
-// CHECK: mrs      x26, ich_lr11_el2          // encoding: [0x7a,0xcd,0x3c,0xd5]
-// CHECK: mrs      x1, ich_lr12_el2           // encoding: [0x81,0xcd,0x3c,0xd5]
-// CHECK: mrs      x8, ich_lr13_el2           // encoding: [0xa8,0xcd,0x3c,0xd5]
-// CHECK: mrs      x2, ich_lr14_el2           // encoding: [0xc2,0xcd,0x3c,0xd5]
-// CHECK: mrs      x8, ich_lr15_el2           // encoding: [0xe8,0xcd,0x3c,0xd5]
+// CHECK: mrs      x8, {{icc_iar1_el1|ICC_IAR1_EL1}}           // encoding: [0x08,0xcc,0x38,0xd5]
+// CHECK: mrs      x26, {{icc_iar0_el1|ICC_IAR0_EL1}}          // encoding: [0x1a,0xc8,0x38,0xd5]
+// CHECK: mrs      x2, {{icc_hppir1_el1|ICC_HPPIR1_EL1}}         // encoding: [0x42,0xcc,0x38,0xd5]
+// CHECK: mrs      x17, {{icc_hppir0_el1|ICC_HPPIR0_EL1}}        // encoding: [0x51,0xc8,0x38,0xd5]
+// CHECK: mrs      x29, {{icc_rpr_el1|ICC_RPR_EL1}}           // encoding: [0x7d,0xcb,0x38,0xd5]
+// CHECK: mrs      x4, {{ich_vtr_el2|ICH_VTR_EL2}}            // encoding: [0x24,0xcb,0x3c,0xd5]
+// CHECK: mrs      x24, {{ich_eisr_el2|ICH_EISR_EL2}}          // encoding: [0x78,0xcb,0x3c,0xd5]
+// CHECK: mrs      x9, {{ich_elsr_el2|ICH_ELSR_EL2}}           // encoding: [0xa9,0xcb,0x3c,0xd5]
+// CHECK: mrs      x24, {{icc_bpr1_el1|ICC_BPR1_EL1}}          // encoding: [0x78,0xcc,0x38,0xd5]
+// CHECK: mrs      x14, {{icc_bpr0_el1|ICC_BPR0_EL1}}          // encoding: [0x6e,0xc8,0x38,0xd5]
+// CHECK: mrs      x19, {{icc_pmr_el1|ICC_PMR_EL1}}           // encoding: [0x13,0x46,0x38,0xd5]
+// CHECK: mrs      x23, {{icc_ctlr_el1|ICC_CTLR_EL1}}          // encoding: [0x97,0xcc,0x38,0xd5]
+// CHECK: mrs      x20, {{icc_ctlr_el3|ICC_CTLR_EL3}}          // encoding: [0x94,0xcc,0x3e,0xd5]
+// CHECK: mrs      x28, {{icc_sre_el1|ICC_SRE_EL1}}           // encoding: [0xbc,0xcc,0x38,0xd5]
+// CHECK: mrs      x25, {{icc_sre_el2|ICC_SRE_EL2}}           // encoding: [0xb9,0xc9,0x3c,0xd5]
+// CHECK: mrs      x8, {{icc_sre_el3|ICC_SRE_EL3}}            // encoding: [0xa8,0xcc,0x3e,0xd5]
+// CHECK: mrs      x22, {{icc_igrpen0_el1|ICC_IGRPEN0_EL1}}       // encoding: [0xd6,0xcc,0x38,0xd5]
+// CHECK: mrs      x5, {{icc_igrpen1_el1|ICC_IGRPEN1_EL1}}        // encoding: [0xe5,0xcc,0x38,0xd5]
+// CHECK: mrs      x7, {{icc_igrpen1_el3|ICC_IGRPEN1_EL3}}        // encoding: [0xe7,0xcc,0x3e,0xd5]
+// CHECK: mrs      x22, {{icc_seien_el1|ICC_SEIEN_EL1}}         // encoding: [0x16,0xcd,0x38,0xd5]
+// CHECK: mrs      x4, {{icc_ap0r0_el1|ICC_AP0R0_EL1}}          // encoding: [0x84,0xc8,0x38,0xd5]
+// CHECK: mrs      x11, {{icc_ap0r1_el1|ICC_AP0R1_EL1}}         // encoding: [0xab,0xc8,0x38,0xd5]
+// CHECK: mrs      x27, {{icc_ap0r2_el1|ICC_AP0R2_EL1}}         // encoding: [0xdb,0xc8,0x38,0xd5]
+// CHECK: mrs      x21, {{icc_ap0r3_el1|ICC_AP0R3_EL1}}         // encoding: [0xf5,0xc8,0x38,0xd5]
+// CHECK: mrs      x2, {{icc_ap1r0_el1|ICC_AP1R0_EL1}}          // encoding: [0x02,0xc9,0x38,0xd5]
+// CHECK: mrs      x21, {{icc_ap1r1_el1|ICC_AP1R1_EL1}}         // encoding: [0x35,0xc9,0x38,0xd5]
+// CHECK: mrs      x10, {{icc_ap1r2_el1|ICC_AP1R2_EL1}}         // encoding: [0x4a,0xc9,0x38,0xd5]
+// CHECK: mrs      x27, {{icc_ap1r3_el1|ICC_AP1R3_EL1}}         // encoding: [0x7b,0xc9,0x38,0xd5]
+// CHECK: mrs      x20, {{ich_ap0r0_el2|ICH_AP0R0_EL2}}         // encoding: [0x14,0xc8,0x3c,0xd5]
+// CHECK: mrs      x21, {{ich_ap0r1_el2|ICH_AP0R1_EL2}}         // encoding: [0x35,0xc8,0x3c,0xd5]
+// CHECK: mrs      x5, {{ich_ap0r2_el2|ICH_AP0R2_EL2}}          // encoding: [0x45,0xc8,0x3c,0xd5]
+// CHECK: mrs      x4, {{ich_ap0r3_el2|ICH_AP0R3_EL2}}          // encoding: [0x64,0xc8,0x3c,0xd5]
+// CHECK: mrs      x15, {{ich_ap1r0_el2|ICH_AP1R0_EL2}}         // encoding: [0x0f,0xc9,0x3c,0xd5]
+// CHECK: mrs      x12, {{ich_ap1r1_el2|ICH_AP1R1_EL2}}         // encoding: [0x2c,0xc9,0x3c,0xd5]
+// CHECK: mrs      x27, {{ich_ap1r2_el2|ICH_AP1R2_EL2}}         // encoding: [0x5b,0xc9,0x3c,0xd5]
+// CHECK: mrs      x20, {{ich_ap1r3_el2|ICH_AP1R3_EL2}}         // encoding: [0x74,0xc9,0x3c,0xd5]
+// CHECK: mrs      x10, {{ich_hcr_el2|ICH_HCR_EL2}}           // encoding: [0x0a,0xcb,0x3c,0xd5]
+// CHECK: mrs      x27, {{ich_misr_el2|ICH_MISR_EL2}}          // encoding: [0x5b,0xcb,0x3c,0xd5]
+// CHECK: mrs      x6, {{ich_vmcr_el2|ICH_VMCR_EL2}}           // encoding: [0xe6,0xcb,0x3c,0xd5]
+// CHECK: mrs      x19, {{ich_vseir_el2|ICH_VSEIR_EL2}}         // encoding: [0x93,0xc9,0x3c,0xd5]
+// CHECK: mrs      x3, {{ich_lr0_el2|ICH_LR0_EL2}}            // encoding: [0x03,0xcc,0x3c,0xd5]
+// CHECK: mrs      x1, {{ich_lr1_el2|ICH_LR1_EL2}}            // encoding: [0x21,0xcc,0x3c,0xd5]
+// CHECK: mrs      x22, {{ich_lr2_el2|ICH_LR2_EL2}}           // encoding: [0x56,0xcc,0x3c,0xd5]
+// CHECK: mrs      x21, {{ich_lr3_el2|ICH_LR3_EL2}}           // encoding: [0x75,0xcc,0x3c,0xd5]
+// CHECK: mrs      x6, {{ich_lr4_el2|ICH_LR4_EL2}}            // encoding: [0x86,0xcc,0x3c,0xd5]
+// CHECK: mrs      x10, {{ich_lr5_el2|ICH_LR5_EL2}}           // encoding: [0xaa,0xcc,0x3c,0xd5]
+// CHECK: mrs      x11, {{ich_lr6_el2|ICH_LR6_EL2}}           // encoding: [0xcb,0xcc,0x3c,0xd5]
+// CHECK: mrs      x12, {{ich_lr7_el2|ICH_LR7_EL2}}           // encoding: [0xec,0xcc,0x3c,0xd5]
+// CHECK: mrs      x0, {{ich_lr8_el2|ICH_LR8_EL2}}            // encoding: [0x00,0xcd,0x3c,0xd5]
+// CHECK: mrs      x21, {{ich_lr9_el2|ICH_LR9_EL2}}           // encoding: [0x35,0xcd,0x3c,0xd5]
+// CHECK: mrs      x13, {{ich_lr10_el2|ICH_LR10_EL2}}          // encoding: [0x4d,0xcd,0x3c,0xd5]
+// CHECK: mrs      x26, {{ich_lr11_el2|ICH_LR11_EL2}}          // encoding: [0x7a,0xcd,0x3c,0xd5]
+// CHECK: mrs      x1, {{ich_lr12_el2|ICH_LR12_EL2}}           // encoding: [0x81,0xcd,0x3c,0xd5]
+// CHECK: mrs      x8, {{ich_lr13_el2|ICH_LR13_EL2}}           // encoding: [0xa8,0xcd,0x3c,0xd5]
+// CHECK: mrs      x2, {{ich_lr14_el2|ICH_LR14_EL2}}           // encoding: [0xc2,0xcd,0x3c,0xd5]
+// CHECK: mrs      x8, {{ich_lr15_el2|ICH_LR15_EL2}}           // encoding: [0xe8,0xcd,0x3c,0xd5]
 
         msr icc_eoir1_el1, x27
         msr icc_eoir0_el1, x5
@@ -167,57 +167,57 @@
         msr ich_lr13_el2, x2
         msr ich_lr14_el2, x13
         msr ich_lr15_el2, x27
-// CHECK: msr      icc_eoir1_el1, x27         // encoding: [0x3b,0xcc,0x18,0xd5]
-// CHECK: msr      icc_eoir0_el1, x5          // encoding: [0x25,0xc8,0x18,0xd5]
-// CHECK: msr      icc_dir_el1, x13           // encoding: [0x2d,0xcb,0x18,0xd5]
-// CHECK: msr      icc_sgi1r_el1, x21         // encoding: [0xb5,0xcb,0x18,0xd5]
-// CHECK: msr      icc_asgi1r_el1, x25        // encoding: [0xd9,0xcb,0x18,0xd5]
-// CHECK: msr      icc_sgi0r_el1, x28         // encoding: [0xfc,0xcb,0x18,0xd5]
-// CHECK: msr      icc_bpr1_el1, x7           // encoding: [0x67,0xcc,0x18,0xd5]
-// CHECK: msr      icc_bpr0_el1, x9           // encoding: [0x69,0xc8,0x18,0xd5]
-// CHECK: msr      icc_pmr_el1, x29           // encoding: [0x1d,0x46,0x18,0xd5]
-// CHECK: msr      icc_ctlr_el1, x24          // encoding: [0x98,0xcc,0x18,0xd5]
-// CHECK: msr      icc_ctlr_el3, x0           // encoding: [0x80,0xcc,0x1e,0xd5]
-// CHECK: msr      icc_sre_el1, x2            // encoding: [0xa2,0xcc,0x18,0xd5]
-// CHECK: msr      icc_sre_el2, x5            // encoding: [0xa5,0xc9,0x1c,0xd5]
-// CHECK: msr      icc_sre_el3, x10           // encoding: [0xaa,0xcc,0x1e,0xd5]
-// CHECK: msr      icc_igrpen0_el1, x22       // encoding: [0xd6,0xcc,0x18,0xd5]
-// CHECK: msr      icc_igrpen1_el1, x11       // encoding: [0xeb,0xcc,0x18,0xd5]
-// CHECK: msr      icc_igrpen1_el3, x8        // encoding: [0xe8,0xcc,0x1e,0xd5]
-// CHECK: msr      icc_seien_el1, x4          // encoding: [0x04,0xcd,0x18,0xd5]
-// CHECK: msr      icc_ap0r0_el1, x27         // encoding: [0x9b,0xc8,0x18,0xd5]
-// CHECK: msr      icc_ap0r1_el1, x5          // encoding: [0xa5,0xc8,0x18,0xd5]
-// CHECK: msr      icc_ap0r2_el1, x20         // encoding: [0xd4,0xc8,0x18,0xd5]
-// CHECK: msr      icc_ap0r3_el1, x0          // encoding: [0xe0,0xc8,0x18,0xd5]
-// CHECK: msr      icc_ap1r0_el1, x2          // encoding: [0x02,0xc9,0x18,0xd5]
-// CHECK: msr      icc_ap1r1_el1, x29         // encoding: [0x3d,0xc9,0x18,0xd5]
-// CHECK: msr      icc_ap1r2_el1, x23         // encoding: [0x57,0xc9,0x18,0xd5]
-// CHECK: msr      icc_ap1r3_el1, x11         // encoding: [0x6b,0xc9,0x18,0xd5]
-// CHECK: msr      ich_ap0r0_el2, x2          // encoding: [0x02,0xc8,0x1c,0xd5]
-// CHECK: msr      ich_ap0r1_el2, x27         // encoding: [0x3b,0xc8,0x1c,0xd5]
-// CHECK: msr      ich_ap0r2_el2, x7          // encoding: [0x47,0xc8,0x1c,0xd5]
-// CHECK: msr      ich_ap0r3_el2, x1          // encoding: [0x61,0xc8,0x1c,0xd5]
-// CHECK: msr      ich_ap1r0_el2, x7          // encoding: [0x07,0xc9,0x1c,0xd5]
-// CHECK: msr      ich_ap1r1_el2, x12         // encoding: [0x2c,0xc9,0x1c,0xd5]
-// CHECK: msr      ich_ap1r2_el2, x14         // encoding: [0x4e,0xc9,0x1c,0xd5]
-// CHECK: msr      ich_ap1r3_el2, x13         // encoding: [0x6d,0xc9,0x1c,0xd5]
-// CHECK: msr      ich_hcr_el2, x1            // encoding: [0x01,0xcb,0x1c,0xd5]
-// CHECK: msr      ich_misr_el2, x10          // encoding: [0x4a,0xcb,0x1c,0xd5]
-// CHECK: msr      ich_vmcr_el2, x24          // encoding: [0xf8,0xcb,0x1c,0xd5]
-// CHECK: msr      ich_vseir_el2, x29         // encoding: [0x9d,0xc9,0x1c,0xd5]
-// CHECK: msr      ich_lr0_el2, x26           // encoding: [0x1a,0xcc,0x1c,0xd5]
-// CHECK: msr      ich_lr1_el2, x9            // encoding: [0x29,0xcc,0x1c,0xd5]
-// CHECK: msr      ich_lr2_el2, x18           // encoding: [0x52,0xcc,0x1c,0xd5]
-// CHECK: msr      ich_lr3_el2, x26           // encoding: [0x7a,0xcc,0x1c,0xd5]
-// CHECK: msr      ich_lr4_el2, x22           // encoding: [0x96,0xcc,0x1c,0xd5]
-// CHECK: msr      ich_lr5_el2, x26           // encoding: [0xba,0xcc,0x1c,0xd5]
-// CHECK: msr      ich_lr6_el2, x27           // encoding: [0xdb,0xcc,0x1c,0xd5]
-// CHECK: msr      ich_lr7_el2, x8            // encoding: [0xe8,0xcc,0x1c,0xd5]
-// CHECK: msr      ich_lr8_el2, x17           // encoding: [0x11,0xcd,0x1c,0xd5]
-// CHECK: msr      ich_lr9_el2, x19           // encoding: [0x33,0xcd,0x1c,0xd5]
-// CHECK: msr      ich_lr10_el2, x17          // encoding: [0x51,0xcd,0x1c,0xd5]
-// CHECK: msr      ich_lr11_el2, x5           // encoding: [0x65,0xcd,0x1c,0xd5]
-// CHECK: msr      ich_lr12_el2, x29          // encoding: [0x9d,0xcd,0x1c,0xd5]
-// CHECK: msr      ich_lr13_el2, x2           // encoding: [0xa2,0xcd,0x1c,0xd5]
-// CHECK: msr      ich_lr14_el2, x13          // encoding: [0xcd,0xcd,0x1c,0xd5]
-// CHECK: msr      ich_lr15_el2, x27          // encoding: [0xfb,0xcd,0x1c,0xd5]
+// CHECK: msr      {{icc_eoir1_el1|ICC_EOIR1_EL1}}, x27         // encoding: [0x3b,0xcc,0x18,0xd5]
+// CHECK: msr      {{icc_eoir0_el1|ICC_EOIR0_EL1}}, x5          // encoding: [0x25,0xc8,0x18,0xd5]
+// CHECK: msr      {{icc_dir_el1|ICC_DIR_EL1}}, x13           // encoding: [0x2d,0xcb,0x18,0xd5]
+// CHECK: msr      {{icc_sgi1r_el1|ICC_SGI1R_EL1}}, x21         // encoding: [0xb5,0xcb,0x18,0xd5]
+// CHECK: msr      {{icc_asgi1r_el1|ICC_ASGI1R_EL1}}, x25        // encoding: [0xd9,0xcb,0x18,0xd5]
+// CHECK: msr      {{icc_sgi0r_el1|ICC_SGI0R_EL1}}, x28         // encoding: [0xfc,0xcb,0x18,0xd5]
+// CHECK: msr      {{icc_bpr1_el1|ICC_BPR1_EL1}}, x7           // encoding: [0x67,0xcc,0x18,0xd5]
+// CHECK: msr      {{icc_bpr0_el1|ICC_BPR0_EL1}}, x9           // encoding: [0x69,0xc8,0x18,0xd5]
+// CHECK: msr      {{icc_pmr_el1|ICC_PMR_EL1}}, x29           // encoding: [0x1d,0x46,0x18,0xd5]
+// CHECK: msr      {{icc_ctlr_el1|ICC_CTLR_EL1}}, x24          // encoding: [0x98,0xcc,0x18,0xd5]
+// CHECK: msr      {{icc_ctlr_el3|ICC_CTLR_EL3}}, x0           // encoding: [0x80,0xcc,0x1e,0xd5]
+// CHECK: msr      {{icc_sre_el1|ICC_SRE_EL1}}, x2            // encoding: [0xa2,0xcc,0x18,0xd5]
+// CHECK: msr      {{icc_sre_el2|ICC_SRE_EL2}}, x5            // encoding: [0xa5,0xc9,0x1c,0xd5]
+// CHECK: msr      {{icc_sre_el3|ICC_SRE_EL3}}, x10           // encoding: [0xaa,0xcc,0x1e,0xd5]
+// CHECK: msr      {{icc_igrpen0_el1|ICC_IGRPEN0_EL1}}, x22       // encoding: [0xd6,0xcc,0x18,0xd5]
+// CHECK: msr      {{icc_igrpen1_el1|ICC_IGRPEN1_EL1}}, x11       // encoding: [0xeb,0xcc,0x18,0xd5]
+// CHECK: msr      {{icc_igrpen1_el3|ICC_IGRPEN1_EL3}}, x8        // encoding: [0xe8,0xcc,0x1e,0xd5]
+// CHECK: msr      {{icc_seien_el1|ICC_SEIEN_EL1}}, x4          // encoding: [0x04,0xcd,0x18,0xd5]
+// CHECK: msr      {{icc_ap0r0_el1|ICC_AP0R0_EL1}}, x27         // encoding: [0x9b,0xc8,0x18,0xd5]
+// CHECK: msr      {{icc_ap0r1_el1|ICC_AP0R1_EL1}}, x5          // encoding: [0xa5,0xc8,0x18,0xd5]
+// CHECK: msr      {{icc_ap0r2_el1|ICC_AP0R2_EL1}}, x20         // encoding: [0xd4,0xc8,0x18,0xd5]
+// CHECK: msr      {{icc_ap0r3_el1|ICC_AP0R3_EL1}}, x0          // encoding: [0xe0,0xc8,0x18,0xd5]
+// CHECK: msr      {{icc_ap1r0_el1|ICC_AP1R0_EL1}}, x2          // encoding: [0x02,0xc9,0x18,0xd5]
+// CHECK: msr      {{icc_ap1r1_el1|ICC_AP1R1_EL1}}, x29         // encoding: [0x3d,0xc9,0x18,0xd5]
+// CHECK: msr      {{icc_ap1r2_el1|ICC_AP1R2_EL1}}, x23         // encoding: [0x57,0xc9,0x18,0xd5]
+// CHECK: msr      {{icc_ap1r3_el1|ICC_AP1R3_EL1}}, x11         // encoding: [0x6b,0xc9,0x18,0xd5]
+// CHECK: msr      {{ich_ap0r0_el2|ICH_AP0R0_EL2}}, x2          // encoding: [0x02,0xc8,0x1c,0xd5]
+// CHECK: msr      {{ich_ap0r1_el2|ICH_AP0R1_EL2}}, x27         // encoding: [0x3b,0xc8,0x1c,0xd5]
+// CHECK: msr      {{ich_ap0r2_el2|ICH_AP0R2_EL2}}, x7          // encoding: [0x47,0xc8,0x1c,0xd5]
+// CHECK: msr      {{ich_ap0r3_el2|ICH_AP0R3_EL2}}, x1          // encoding: [0x61,0xc8,0x1c,0xd5]
+// CHECK: msr      {{ich_ap1r0_el2|ICH_AP1R0_EL2}}, x7          // encoding: [0x07,0xc9,0x1c,0xd5]
+// CHECK: msr      {{ich_ap1r1_el2|ICH_AP1R1_EL2}}, x12         // encoding: [0x2c,0xc9,0x1c,0xd5]
+// CHECK: msr      {{ich_ap1r2_el2|ICH_AP1R2_EL2}}, x14         // encoding: [0x4e,0xc9,0x1c,0xd5]
+// CHECK: msr      {{ich_ap1r3_el2|ICH_AP1R3_EL2}}, x13         // encoding: [0x6d,0xc9,0x1c,0xd5]
+// CHECK: msr      {{ich_hcr_el2|ICH_HCR_EL2}}, x1            // encoding: [0x01,0xcb,0x1c,0xd5]
+// CHECK: msr      {{ich_misr_el2|ICH_MISR_EL2}}, x10          // encoding: [0x4a,0xcb,0x1c,0xd5]
+// CHECK: msr      {{ich_vmcr_el2|ICH_VMCR_EL2}}, x24          // encoding: [0xf8,0xcb,0x1c,0xd5]
+// CHECK: msr      {{ich_vseir_el2|ICH_VSEIR_EL2}}, x29         // encoding: [0x9d,0xc9,0x1c,0xd5]
+// CHECK: msr      {{ich_lr0_el2|ICH_LR0_EL2}}, x26           // encoding: [0x1a,0xcc,0x1c,0xd5]
+// CHECK: msr      {{ich_lr1_el2|ICH_LR1_EL2}}, x9            // encoding: [0x29,0xcc,0x1c,0xd5]
+// CHECK: msr      {{ich_lr2_el2|ICH_LR2_EL2}}, x18           // encoding: [0x52,0xcc,0x1c,0xd5]
+// CHECK: msr      {{ich_lr3_el2|ICH_LR3_EL2}}, x26           // encoding: [0x7a,0xcc,0x1c,0xd5]
+// CHECK: msr      {{ich_lr4_el2|ICH_LR4_EL2}}, x22           // encoding: [0x96,0xcc,0x1c,0xd5]
+// CHECK: msr      {{ich_lr5_el2|ICH_LR5_EL2}}, x26           // encoding: [0xba,0xcc,0x1c,0xd5]
+// CHECK: msr      {{ich_lr6_el2|ICH_LR6_EL2}}, x27           // encoding: [0xdb,0xcc,0x1c,0xd5]
+// CHECK: msr      {{ich_lr7_el2|ICH_LR7_EL2}}, x8            // encoding: [0xe8,0xcc,0x1c,0xd5]
+// CHECK: msr      {{ich_lr8_el2|ICH_LR8_EL2}}, x17           // encoding: [0x11,0xcd,0x1c,0xd5]
+// CHECK: msr      {{ich_lr9_el2|ICH_LR9_EL2}}, x19           // encoding: [0x33,0xcd,0x1c,0xd5]
+// CHECK: msr      {{ich_lr10_el2|ICH_LR10_EL2}}, x17          // encoding: [0x51,0xcd,0x1c,0xd5]
+// CHECK: msr      {{ich_lr11_el2|ICH_LR11_EL2}}, x5           // encoding: [0x65,0xcd,0x1c,0xd5]
+// CHECK: msr      {{ich_lr12_el2|ICH_LR12_EL2}}, x29          // encoding: [0x9d,0xcd,0x1c,0xd5]
+// CHECK: msr      {{ich_lr13_el2|ICH_LR13_EL2}}, x2           // encoding: [0xa2,0xcd,0x1c,0xd5]
+// CHECK: msr      {{ich_lr14_el2|ICH_LR14_EL2}}, x13          // encoding: [0xcd,0xcd,0x1c,0xd5]
+// CHECK: msr      {{ich_lr15_el2|ICH_LR15_EL2}}, x27          // encoding: [0xfb,0xcd,0x1c,0xd5]
diff --git a/test/MC/AArch64/lit.local.cfg b/test/MC/AArch64/lit.local.cfg
index 75dba81..1be70c0 100644
--- a/test/MC/AArch64/lit.local.cfg
+++ b/test/MC/AArch64/lit.local.cfg
@@ -1,3 +1,3 @@
 targets = set(config.root.targets_to_build.split())
-if not 'AArch64' in targets:
-    config.unsupported = True
-\ No newline at end of file
+if 'AArch64' not in targets:
+    config.unsupported = True
diff --git a/test/MC/AArch64/neon-2velem.s b/test/MC/AArch64/neon-2velem.s
index cde792a..04841d0 100644
--- a/test/MC/AArch64/neon-2velem.s
+++ b/test/MC/AArch64/neon-2velem.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple=arm64 -mattr=+neon -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
diff --git a/test/MC/AArch64/neon-3vdiff.s b/test/MC/AArch64/neon-3vdiff.s
index 3ff86bf..fc3215b 100644
--- a/test/MC/AArch64/neon-3vdiff.s
+++ b/test/MC/AArch64/neon-3vdiff.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+crypto -mattr=+neon -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
diff --git a/test/MC/AArch64/neon-across.s b/test/MC/AArch64/neon-across.s
index 8b1c2d4..60b766d 100644
--- a/test/MC/AArch64/neon-across.s
+++ b/test/MC/AArch64/neon-across.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple=arm64 -mattr=+neon -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
diff --git a/test/MC/AArch64/neon-compare-instructions.s b/test/MC/AArch64/neon-compare-instructions.s
index d4e3ef5..19cfaf1 100644
--- a/test/MC/AArch64/neon-compare-instructions.s
+++ b/test/MC/AArch64/neon-compare-instructions.s
@@ -255,13 +255,13 @@
          cmeq v9.4s, v7.4s, #0
          cmeq v3.2d, v31.2d, #0
 
-// CHECK: cmeq v0.8b, v15.8b, #0x0    // encoding: [0xe0,0x99,0x20,0x0e]
-// CHECK: cmeq v1.16b, v31.16b, #0x0  // encoding: [0xe1,0x9b,0x20,0x4e]
-// CHECK: cmeq v15.4h, v16.4h, #0x0   // encoding: [0x0f,0x9a,0x60,0x0e]
-// CHECK: cmeq v5.8h, v6.8h, #0x0     // encoding: [0xc5,0x98,0x60,0x4e]
-// CHECK: cmeq v29.2s, v27.2s, #0x0   // encoding: [0x7d,0x9b,0xa0,0x0e]
-// CHECK: cmeq v9.4s, v7.4s, #0x0     // encoding: [0xe9,0x98,0xa0,0x4e]
-// CHECK: cmeq v3.2d, v31.2d, #0x0    // encoding: [0xe3,0x9b,0xe0,0x4e]
+// CHECK: cmeq v0.8b, v15.8b, #{{0x0|0}}    // encoding: [0xe0,0x99,0x20,0x0e]
+// CHECK: cmeq v1.16b, v31.16b, #{{0x0|0}}  // encoding: [0xe1,0x9b,0x20,0x4e]
+// CHECK: cmeq v15.4h, v16.4h, #{{0x0|0}}   // encoding: [0x0f,0x9a,0x60,0x0e]
+// CHECK: cmeq v5.8h, v6.8h, #{{0x0|0}}     // encoding: [0xc5,0x98,0x60,0x4e]
+// CHECK: cmeq v29.2s, v27.2s, #{{0x0|0}}   // encoding: [0x7d,0x9b,0xa0,0x0e]
+// CHECK: cmeq v9.4s, v7.4s, #{{0x0|0}}     // encoding: [0xe9,0x98,0xa0,0x4e]
+// CHECK: cmeq v3.2d, v31.2d, #{{0x0|0}}    // encoding: [0xe3,0x9b,0xe0,0x4e]
 
 //----------------------------------------------------------------------
 // Vector Compare Mask Greater Than or Equal to Zero (Signed Integer)
@@ -274,13 +274,13 @@
          cmge v17.4s, v20.4s, #0
          cmge v3.2d, v31.2d, #0
 
-// CHECK: cmge v0.8b, v15.8b, #0x0    // encoding: [0xe0,0x89,0x20,0x2e]
-// CHECK: cmge v1.16b, v31.16b, #0x0  // encoding: [0xe1,0x8b,0x20,0x6e]
-// CHECK: cmge v15.4h, v16.4h, #0x0   // encoding: [0x0f,0x8a,0x60,0x2e]
-// CHECK: cmge v5.8h, v6.8h, #0x0     // encoding: [0xc5,0x88,0x60,0x6e]
-// CHECK: cmge v29.2s, v27.2s, #0x0   // encoding: [0x7d,0x8b,0xa0,0x2e]
-// CHECK: cmge v17.4s, v20.4s, #0x0   // encoding: [0x91,0x8a,0xa0,0x6e]
-// CHECK: cmge v3.2d, v31.2d, #0x0    // encoding: [0xe3,0x8b,0xe0,0x6e]
+// CHECK: cmge v0.8b, v15.8b, #{{0x0|0}}    // encoding: [0xe0,0x89,0x20,0x2e]
+// CHECK: cmge v1.16b, v31.16b, #{{0x0|0}}  // encoding: [0xe1,0x8b,0x20,0x6e]
+// CHECK: cmge v15.4h, v16.4h, #{{0x0|0}}   // encoding: [0x0f,0x8a,0x60,0x2e]
+// CHECK: cmge v5.8h, v6.8h, #{{0x0|0}}     // encoding: [0xc5,0x88,0x60,0x6e]
+// CHECK: cmge v29.2s, v27.2s, #{{0x0|0}}   // encoding: [0x7d,0x8b,0xa0,0x2e]
+// CHECK: cmge v17.4s, v20.4s, #{{0x0|0}}   // encoding: [0x91,0x8a,0xa0,0x6e]
+// CHECK: cmge v3.2d, v31.2d, #{{0x0|0}}    // encoding: [0xe3,0x8b,0xe0,0x6e]
 
 //----------------------------------------------------------------------
 // Vector Compare Mask Greater Than Zero (Signed Integer)
@@ -294,13 +294,13 @@
          cmgt v9.4s, v7.4s, #0
          cmgt v3.2d, v31.2d, #0
 
-// CHECK: cmgt v0.8b, v15.8b, #0x0    // encoding: [0xe0,0x89,0x20,0x0e]
-// CHECK: cmgt v1.16b, v31.16b, #0x0  // encoding: [0xe1,0x8b,0x20,0x4e]
-// CHECK: cmgt v15.4h, v16.4h, #0x0   // encoding: [0x0f,0x8a,0x60,0x0e]
-// CHECK: cmgt v5.8h, v6.8h, #0x0     // encoding: [0xc5,0x88,0x60,0x4e]
-// CHECK: cmgt v29.2s, v27.2s, #0x0   // encoding: [0x7d,0x8b,0xa0,0x0e]
-// CHECK: cmgt v9.4s, v7.4s, #0x0     // encoding: [0xe9,0x88,0xa0,0x4e]
-// CHECK: cmgt v3.2d, v31.2d, #0x0    // encoding: [0xe3,0x8b,0xe0,0x4e]
+// CHECK: cmgt v0.8b, v15.8b, #{{0x0|0}}    // encoding: [0xe0,0x89,0x20,0x0e]
+// CHECK: cmgt v1.16b, v31.16b, #{{0x0|0}}  // encoding: [0xe1,0x8b,0x20,0x4e]
+// CHECK: cmgt v15.4h, v16.4h, #{{0x0|0}}   // encoding: [0x0f,0x8a,0x60,0x0e]
+// CHECK: cmgt v5.8h, v6.8h, #{{0x0|0}}     // encoding: [0xc5,0x88,0x60,0x4e]
+// CHECK: cmgt v29.2s, v27.2s, #{{0x0|0}}   // encoding: [0x7d,0x8b,0xa0,0x0e]
+// CHECK: cmgt v9.4s, v7.4s, #{{0x0|0}}     // encoding: [0xe9,0x88,0xa0,0x4e]
+// CHECK: cmgt v3.2d, v31.2d, #{{0x0|0}}    // encoding: [0xe3,0x8b,0xe0,0x4e]
 
 //----------------------------------------------------------------------
 // Vector Compare Mask Less Than or Equal To Zero (Signed Integer)
@@ -313,13 +313,13 @@
          cmle v9.4s, v7.4s, #0
          cmle v3.2d, v31.2d, #0
 
-// CHECK: cmle v0.8b, v15.8b, #0x0    // encoding: [0xe0,0x99,0x20,0x2e]
-// CHECK: cmle v1.16b, v31.16b, #0x0  // encoding: [0xe1,0x9b,0x20,0x6e]
-// CHECK: cmle v15.4h, v16.4h, #0x0   // encoding: [0x0f,0x9a,0x60,0x2e]
-// CHECK: cmle v5.8h, v6.8h, #0x0     // encoding: [0xc5,0x98,0x60,0x6e]
-// CHECK: cmle v29.2s, v27.2s, #0x0   // encoding: [0x7d,0x9b,0xa0,0x2e]
-// CHECK: cmle v9.4s, v7.4s, #0x0     // encoding: [0xe9,0x98,0xa0,0x6e]
-// CHECK: cmle v3.2d, v31.2d, #0x0    // encoding: [0xe3,0x9b,0xe0,0x6e]
+// CHECK: cmle v0.8b, v15.8b, #{{0x0|0}}    // encoding: [0xe0,0x99,0x20,0x2e]
+// CHECK: cmle v1.16b, v31.16b, #{{0x0|0}}  // encoding: [0xe1,0x9b,0x20,0x6e]
+// CHECK: cmle v15.4h, v16.4h, #{{0x0|0}}   // encoding: [0x0f,0x9a,0x60,0x2e]
+// CHECK: cmle v5.8h, v6.8h, #{{0x0|0}}     // encoding: [0xc5,0x98,0x60,0x6e]
+// CHECK: cmle v29.2s, v27.2s, #{{0x0|0}}   // encoding: [0x7d,0x9b,0xa0,0x2e]
+// CHECK: cmle v9.4s, v7.4s, #{{0x0|0}}     // encoding: [0xe9,0x98,0xa0,0x6e]
+// CHECK: cmle v3.2d, v31.2d, #{{0x0|0}}    // encoding: [0xe3,0x9b,0xe0,0x6e]
 
 //----------------------------------------------------------------------
 // Vector Compare Mask Less Than Zero (Signed Integer)
@@ -332,13 +332,13 @@
          cmlt v9.4s, v7.4s, #0
          cmlt v3.2d, v31.2d, #0
 
-// CHECK: cmlt v0.8b, v15.8b, #0x0    // encoding: [0xe0,0xa9,0x20,0x0e]
-// CHECK: cmlt v1.16b, v31.16b, #0x0  // encoding: [0xe1,0xab,0x20,0x4e]
-// CHECK: cmlt v15.4h, v16.4h, #0x0   // encoding: [0x0f,0xaa,0x60,0x0e]
-// CHECK: cmlt v5.8h, v6.8h, #0x0     // encoding: [0xc5,0xa8,0x60,0x4e]
-// CHECK: cmlt v29.2s, v27.2s, #0x0   // encoding: [0x7d,0xab,0xa0,0x0e]
-// CHECK: cmlt v9.4s, v7.4s, #0x0     // encoding: [0xe9,0xa8,0xa0,0x4e]
-// CHECK: cmlt v3.2d, v31.2d, #0x0    // encoding: [0xe3,0xab,0xe0,0x4e]
+// CHECK: cmlt v0.8b, v15.8b, #{{0x0|0}}    // encoding: [0xe0,0xa9,0x20,0x0e]
+// CHECK: cmlt v1.16b, v31.16b, #{{0x0|0}}  // encoding: [0xe1,0xab,0x20,0x4e]
+// CHECK: cmlt v15.4h, v16.4h, #{{0x0|0}}   // encoding: [0x0f,0xaa,0x60,0x0e]
+// CHECK: cmlt v5.8h, v6.8h, #{{0x0|0}}     // encoding: [0xc5,0xa8,0x60,0x4e]
+// CHECK: cmlt v29.2s, v27.2s, #{{0x0|0}}   // encoding: [0x7d,0xab,0xa0,0x0e]
+// CHECK: cmlt v9.4s, v7.4s, #{{0x0|0}}     // encoding: [0xe9,0xa8,0xa0,0x4e]
+// CHECK: cmlt v3.2d, v31.2d, #{{0x0|0}}    // encoding: [0xe3,0xab,0xe0,0x4e]
 
 //----------------------------------------------------------------------
 // Vector Compare Mask Equal to Zero (Floating Point)
diff --git a/test/MC/AArch64/neon-crypto.s b/test/MC/AArch64/neon-crypto.s
index 2952dd5..ed1bf88 100644
--- a/test/MC/AArch64/neon-crypto.s
+++ b/test/MC/AArch64/neon-crypto.s
@@ -1,5 +1,5 @@
-// RUN: llvm-mc -triple=aarch64 -mattr=+neon -mattr=+crypto -show-encoding < %s | FileCheck %s
-// RUN: not llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s 2>&1 | FileCheck -check-prefix=CHECK-NO-CRYPTO %s
+// RUN: llvm-mc -triple=arm64 -mattr=+neon -mattr=+crypto -show-encoding < %s | FileCheck %s
+// RUN: not llvm-mc -triple=arm64 -mattr=+neon -show-encoding < %s 2>&1 | FileCheck -check-prefix=CHECK-NO-CRYPTO-ARM64 %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
@@ -13,6 +13,7 @@
         aesimc v0.16b, v1.16b
 
 // CHECK-NO-CRYPTO: error: instruction requires a CPU feature not currently enabled
+// CHECK-NO-CRYPTO-ARM64: error: instruction requires: crypto
 // CHECK: aese	v0.16b, v1.16b          // encoding: [0x20,0x48,0x28,0x4e]
 // CHECK: aesd	v0.16b, v1.16b          // encoding: [0x20,0x58,0x28,0x4e]
 // CHECK: aesmc	v0.16b, v1.16b          // encoding: [0x20,0x68,0x28,0x4e]
diff --git a/test/MC/AArch64/neon-diagnostics.s b/test/MC/AArch64/neon-diagnostics.s
index aa08857..fa1f3ca 100644
--- a/test/MC/AArch64/neon-diagnostics.s
+++ b/test/MC/AArch64/neon-diagnostics.s
@@ -587,10 +587,11 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        fcmgt v0.2d, v31.2s, v16.2s
 // CHECK-ERROR:                         ^
-// CHECK-ERROR: error: expected floating-point constant #0.0 or invalid register type
+
+// CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        fcmgt v4.4s, v7.4s, v15.4h
 // CHECK-ERROR:                                ^
-// CHECK-ERROR: error: expected floating-point constant #0.0 or invalid register type
+// CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        fcmlt v29.2d, v5.2d, v2.16b
 // CHECK-ERROR:                                ^
 
@@ -680,12 +681,15 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        fcmeq v0.16b, v1.16b, #0.0
 // CHECK-ERROR:                 ^
-// CHECK-ERROR: error: only #0.0 is acceptable as immediate
+
+
+// CHECK-ERROR: error: expected floating-point constant #0.0
 // CHECK-ERROR:        fcmeq v0.8b, v1.4h, #1.0
 // CHECK-ERROR:                             ^
-// CHECK-ERROR: error: only #0.0 is acceptable as immediate
+// CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        fcmeq v0.8b, v1.4h, #1
 // CHECK-ERROR:                             ^
+
 //----------------------------------------------------------------------
 // Vector Compare Mask Greater Than or Equal to Zero (Floating Point)
 //----------------------------------------------------------------------
@@ -702,12 +706,15 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        fcmge v3.8b, v8.2s, #0.0
 // CHECK-ERROR:                 ^
-// CHECK-ERROR: error: only #0.0 is acceptable as immediate
+
+
+// CHECK-ERROR: error: expected floating-point constant #0.0
 // CHECK-ERROR:        fcmle v17.8h, v15.2d, #-1.0
 // CHECK-ERROR:                               ^
-// CHECK-ERROR: error: only #0.0 is acceptable as immediate
+// CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        fcmle v17.8h, v15.2d, #2
 // CHECK-ERROR:                               ^
+
 //----------------------------------------------------------------------
 // Vector Compare Mask Greater Than Zero (Floating Point)
 //----------------------------------------------------------------------
@@ -723,10 +730,12 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        fcmgt v4.4s, v7.4h, #0.0
 // CHECK-ERROR:                        ^
-// CHECK-ERROR: error: only #0.0 is acceptable as immediate
+
+
+// CHECK-ERROR: error: expected floating-point constant #0.0
 // CHECK-ERROR:        fcmlt v29.2d, v5.2d, #255.0
 // CHECK-ERROR:                              ^
-// CHECK-ERROR: error: only #0.0 is acceptable as immediate
+// CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        fcmlt v29.2d, v5.2d, #255
 // CHECK-ERROR:                              ^
 
@@ -745,10 +754,12 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        fcmge v3.8b, v8.2s, #0.0
 // CHECK-ERROR:                 ^
-// CHECK-ERROR: error: only #0.0 is acceptable as immediate
+
+
+// CHECK-ERROR: error: expected floating-point constant #0.0
 // CHECK-ERROR:        fcmle v17.2d, v15.2d, #15.0
 // CHECK-ERROR:                               ^
-// CHECK-ERROR: error: only #0.0 is acceptable as immediate
+// CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        fcmle v17.2d, v15.2d, #15
 // CHECK-ERROR:                              ^
 
@@ -767,10 +778,12 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        fcmgt v4.4s, v7.4h, #0.0
 // CHECK-ERROR:                        ^
-// CHECK-ERROR: error: only #0.0 is acceptable as immediate
+
+
+// CHECK-ERROR: error: expected floating-point constant #0.0
 // CHECK-ERROR:        fcmlt v29.2d, v5.2d, #16.0
 // CHECK-ERROR:                              ^
-// CHECK-ERROR: error: only #0.0 is acceptable as immediate
+// CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        fcmlt v29.2d, v5.2d, #2
 // CHECK-ERROR:                              ^
 
@@ -1285,22 +1298,24 @@
          shl v0.4s, v21.4s, #32
          shl v0.2d, v1.2d, #64
 
-// CHECK-ERROR: error: expected comma before next operand
+
+// CHECK-ERROR: error: unexpected token in argument list
 // CHECK-ERROR:         shl v0.4s, v15,2s, #3
 // CHECK-ERROR:                         ^
+
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         shl v0.2d, v17.4s, #3
 // CHECK-ERROR:                        ^
-// CHECK-ERROR: error: expected integer in range [0, 7]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [0, 7]
 // CHECK-ERROR:         shl v0.8b, v31.8b, #-1
 // CHECK-ERROR:                            ^
-// CHECK-ERROR: error: expected integer in range [0, 7]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [0, 7]
 // CHECK-ERROR:         shl v0.8b, v31.8b, #8
 // CHECK-ERROR:                            ^
-// CHECK-ERROR: error: expected integer in range [0, 31]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [0, 31]
 // CHECK-ERROR:         shl v0.4s, v21.4s, #32
 // CHECK-ERROR:                            ^
-// CHECK-ERROR: error: expected integer in range [0, 63]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [0, 63]
 // CHECK-ERROR:         shl v0.2d, v1.2d, #64
 // CHECK-ERROR:                           ^
 
@@ -1334,25 +1349,25 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        ushll2 v1.4s, v25.4s, #7
 // CHECK-ERROR:                          ^
-// CHECK-ERROR: error: expected integer in range [0, 7]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [0, 7]
 // CHECK-ERROR:        sshll v0.8h, v1.8b, #-1
 // CHECK-ERROR:                            ^
-// CHECK-ERROR: error: expected integer in range [0, 7]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [0, 7]
 // CHECK-ERROR:        sshll v0.8h, v1.8b, #9
 // CHECK-ERROR:                            ^
-// CHECK-ERROR: error: expected integer in range [0, 15]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [0, 15]
 // CHECK-ERROR:        ushll v0.4s, v1.4h, #17
 // CHECK-ERROR:                            ^
-// CHECK-ERROR: error: expected integer in range [0, 31]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [0, 31]
 // CHECK-ERROR:        ushll v0.2d, v1.2s, #33
 // CHECK-ERROR:                            ^
-// CHECK-ERROR: error: expected integer in range [0, 7]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [0, 7]
 // CHECK-ERROR:        sshll2 v0.8h, v1.16b, #9
 // CHECK-ERROR:                              ^
-// CHECK-ERROR: error: expected integer in range [0, 15]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [0, 15]
 // CHECK-ERROR:        sshll2 v0.4s, v1.8h, #17
 // CHECK-ERROR:                             ^
-// CHECK-ERROR: error: expected integer in range [0, 31]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [0, 31]
 // CHECK-ERROR:        ushll2 v0.2d, v1.4s, #33
 // CHECK-ERROR:                             ^
 
@@ -1377,16 +1392,16 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         sshr v0.2s, v1.2d, #3
 // CHECK-ERROR:                        ^
-// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 8]
 // CHECK-ERROR:         sshr v0.16b, v1.16b, #9
 // CHECK-ERROR:                              ^
-// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 16]
 // CHECK-ERROR:         sshr v0.8h, v1.8h, #17
 // CHECK-ERROR:                            ^
-// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 32]
 // CHECK-ERROR:         sshr v0.4s, v1.4s, #33
 // CHECK-ERROR:                            ^
-// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 64]
 // CHECK-ERROR:         sshr v0.2d, v1.2d, #65
 // CHECK-ERROR:                            ^
 
@@ -1410,16 +1425,16 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         ushr v0.2s, v1.2d, #3
 // CHECK-ERROR:                        ^
-// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 8]
 // CHECK-ERROR:         ushr v0.16b, v1.16b, #9
 // CHECK-ERROR:                              ^
-// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 16]
 // CHECK-ERROR:         ushr v0.8h, v1.8h, #17
 // CHECK-ERROR:                            ^
-// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 32]
 // CHECK-ERROR:         ushr v0.4s, v1.4s, #33
 // CHECK-ERROR:                            ^
-// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 64]
 // CHECK-ERROR:         ushr v0.2d, v1.2d, #65
 // CHECK-ERROR:                            ^
 
@@ -1443,16 +1458,16 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         ssra v0.2s, v1.2d, #3
 // CHECK-ERROR:                        ^
-// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 8]
 // CHECK-ERROR:         ssra v0.16b, v1.16b, #9
 // CHECK-ERROR:                              ^
-// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 16]
 // CHECK-ERROR:         ssra v0.8h, v1.8h, #17
 // CHECK-ERROR:                            ^
-// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 32]
 // CHECK-ERROR:         ssra v0.4s, v1.4s, #33
 // CHECK-ERROR:                            ^
-// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 64]
 // CHECK-ERROR:         ssra v0.2d, v1.2d, #65
 // CHECK-ERROR:                            ^
 
@@ -1476,16 +1491,16 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         usra v0.2s, v1.2d, #3
 // CHECK-ERROR:                        ^
-// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 8]
 // CHECK-ERROR:         usra v0.16b, v1.16b, #9
 // CHECK-ERROR:                              ^
-// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 16]
 // CHECK-ERROR:         usra v0.8h, v1.8h, #17
 // CHECK-ERROR:                            ^
-// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 32]
 // CHECK-ERROR:         usra v0.4s, v1.4s, #33
 // CHECK-ERROR:                            ^
-// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 64]
 // CHECK-ERROR:         usra v0.2d, v1.2d, #65
 // CHECK-ERROR:                            ^
 
@@ -1509,16 +1524,16 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         srshr v0.2s, v1.2d, #3
 // CHECK-ERROR:                         ^
-// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 8]
 // CHECK-ERROR:         srshr v0.16b, v1.16b, #9
 // CHECK-ERROR:                               ^
-// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 16]
 // CHECK-ERROR:         srshr v0.8h, v1.8h, #17
 // CHECK-ERROR:                             ^
-// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 32]
 // CHECK-ERROR:         srshr v0.4s, v1.4s, #33
 // CHECK-ERROR:                             ^
-// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 64]
 // CHECK-ERROR:         srshr v0.2d, v1.2d, #65
 // CHECK-ERROR:                             ^
 
@@ -1542,16 +1557,16 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         urshr v0.2s, v1.2d, #3
 // CHECK-ERROR:                         ^
-// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 8]
 // CHECK-ERROR:         urshr v0.16b, v1.16b, #9
 // CHECK-ERROR:                               ^
-// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 16]
 // CHECK-ERROR:         urshr v0.8h, v1.8h, #17
 // CHECK-ERROR:                             ^
-// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 32]
 // CHECK-ERROR:         urshr v0.4s, v1.4s, #33
 // CHECK-ERROR:                             ^
-// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 64]
 // CHECK-ERROR:         urshr v0.2d, v1.2d, #65
 // CHECK-ERROR:                             ^
 
@@ -1575,16 +1590,16 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         srsra v0.2s, v1.2d, #3
 // CHECK-ERROR:                         ^
-// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 8]
 // CHECK-ERROR:         srsra v0.16b, v1.16b, #9
 // CHECK-ERROR:                               ^
-// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 16]
 // CHECK-ERROR:         srsra v0.8h, v1.8h, #17
 // CHECK-ERROR:                             ^
-// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 32]
 // CHECK-ERROR:         srsra v0.4s, v1.4s, #33
 // CHECK-ERROR:                             ^
-// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 64]
 // CHECK-ERROR:         srsra v0.2d, v1.2d, #65
 // CHECK-ERROR:                             ^
 
@@ -1608,16 +1623,16 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         ursra v0.2s, v1.2d, #3
 // CHECK-ERROR:                         ^
-// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 8]
 // CHECK-ERROR:         ursra v0.16b, v1.16b, #9
 // CHECK-ERROR:                               ^
-// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 16]
 // CHECK-ERROR:         ursra v0.8h, v1.8h, #17
 // CHECK-ERROR:                             ^
-// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 32]
 // CHECK-ERROR:         ursra v0.4s, v1.4s, #33
 // CHECK-ERROR:                             ^
-// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 64]
 // CHECK-ERROR:         ursra v0.2d, v1.2d, #65
 // CHECK-ERROR:                             ^
 
@@ -1641,16 +1656,16 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         sri v0.2s, v1.2d, #3
 // CHECK-ERROR:                       ^
-// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 8]
 // CHECK-ERROR:         sri v0.16b, v1.16b, #9
 // CHECK-ERROR:                             ^
-// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 16]
 // CHECK-ERROR:         sri v0.8h, v1.8h, #17
 // CHECK-ERROR:                           ^
-// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 32]
 // CHECK-ERROR:         sri v0.4s, v1.4s, #33
 // CHECK-ERROR:                           ^
-// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 64]
 // CHECK-ERROR:         sri v0.2d, v1.2d, #65
 // CHECK-ERROR:                           ^
 
@@ -1674,16 +1689,16 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         sli v0.2s, v1.2d, #3
 // CHECK-ERROR:                       ^
-// CHECK-ERROR: error: expected integer in range [0, 7]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [0, 7]
 // CHECK-ERROR:         sli v0.16b, v1.16b, #8
 // CHECK-ERROR:                             ^
-// CHECK-ERROR: error: expected integer in range [0, 15]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [0, 15]
 // CHECK-ERROR:         sli v0.8h, v1.8h, #16
 // CHECK-ERROR:                           ^
-// CHECK-ERROR: error: expected integer in range [0, 31]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [0, 31]
 // CHECK-ERROR:         sli v0.4s, v1.4s, #32
 // CHECK-ERROR:                           ^
-// CHECK-ERROR: error: expected integer in range [0, 63]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [0, 63]
 // CHECK-ERROR:         sli v0.2d, v1.2d, #64
 // CHECK-ERROR:                           ^
 
@@ -1707,16 +1722,16 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         sqshlu v0.2s, v1.2d, #3
 // CHECK-ERROR:                          ^
-// CHECK-ERROR: error: expected integer in range [0, 7]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [0, 7]
 // CHECK-ERROR:         sqshlu v0.16b, v1.16b, #8
 // CHECK-ERROR:                                ^
-// CHECK-ERROR: error: expected integer in range [0, 15]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [0, 15]
 // CHECK-ERROR:         sqshlu v0.8h, v1.8h, #16
 // CHECK-ERROR:                              ^
-// CHECK-ERROR: error: expected integer in range [0, 31]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [0, 31]
 // CHECK-ERROR:         sqshlu v0.4s, v1.4s, #32
 // CHECK-ERROR:                              ^
-// CHECK-ERROR: error: expected integer in range [0, 63]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [0, 63]
 // CHECK-ERROR:         sqshlu v0.2d, v1.2d, #64
 // CHECK-ERROR:                              ^
 
@@ -1740,16 +1755,16 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         sqshl v0.2s, v1.2d, #3
 // CHECK-ERROR:                         ^
-// CHECK-ERROR: error: expected integer in range [0, 7]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [0, 7]
 // CHECK-ERROR:         sqshl v0.16b, v1.16b, #8
 // CHECK-ERROR:                               ^
-// CHECK-ERROR: error: expected integer in range [0, 15]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [0, 15]
 // CHECK-ERROR:         sqshl v0.8h, v1.8h, #16
 // CHECK-ERROR:                             ^
-// CHECK-ERROR: error: expected integer in range [0, 31]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [0, 31]
 // CHECK-ERROR:         sqshl v0.4s, v1.4s, #32
 // CHECK-ERROR:                             ^
-// CHECK-ERROR: error: expected integer in range [0, 63]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [0, 63]
 // CHECK-ERROR:         sqshl v0.2d, v1.2d, #64
 // CHECK-ERROR:                             ^
 
@@ -1773,16 +1788,16 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         uqshl v0.2s, v1.2d, #3
 // CHECK-ERROR:                         ^
-// CHECK-ERROR: error: expected integer in range [0, 7]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [0, 7]
 // CHECK-ERROR:         uqshl v0.16b, v1.16b, #8
 // CHECK-ERROR:                               ^
-// CHECK-ERROR: error: expected integer in range [0, 15]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [0, 15]
 // CHECK-ERROR:         uqshl v0.8h, v1.8h, #16
 // CHECK-ERROR:                             ^
-// CHECK-ERROR: error: expected integer in range [0, 31]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [0, 31]
 // CHECK-ERROR:         uqshl v0.4s, v1.4s, #32
 // CHECK-ERROR:                             ^
-// CHECK-ERROR: error: expected integer in range [0, 63]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [0, 63]
 // CHECK-ERROR:         uqshl v0.2d, v1.2d, #64
 // CHECK-ERROR:                             ^
 
@@ -1805,13 +1820,13 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         shrn v0.2s, v1.2s, #3
 // CHECK-ERROR:                        ^
-// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 8]
 // CHECK-ERROR:         shrn2 v0.16b, v1.8h, #17
 // CHECK-ERROR:                              ^
-// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 16]
 // CHECK-ERROR:         shrn2 v0.8h, v1.4s, #33
 // CHECK-ERROR:                             ^
-// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 32]
 // CHECK-ERROR:         shrn2 v0.4s, v1.2d, #65
 // CHECK-ERROR:                             ^
 
@@ -1834,13 +1849,13 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         sqshrun v0.2s, v1.2s, #3
 // CHECK-ERROR:                           ^
-// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 8]
 // CHECK-ERROR:         sqshrun2 v0.16b, v1.8h, #17
 // CHECK-ERROR:                                 ^
-// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 16]
 // CHECK-ERROR:         sqshrun2 v0.8h, v1.4s, #33
 // CHECK-ERROR:                                ^
-// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 32]
 // CHECK-ERROR:         sqshrun2 v0.4s, v1.2d, #65
 // CHECK-ERROR:                                ^
 
@@ -1863,13 +1878,13 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         rshrn v0.2s, v1.2s, #3
 // CHECK-ERROR:                         ^
-// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 8]
 // CHECK-ERROR:         rshrn2 v0.16b, v1.8h, #17
 // CHECK-ERROR:                               ^
-// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 16]
 // CHECK-ERROR:         rshrn2 v0.8h, v1.4s, #33
 // CHECK-ERROR:                              ^
-// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 32]
 // CHECK-ERROR:         rshrn2 v0.4s, v1.2d, #65
 // CHECK-ERROR:                              ^
 
@@ -1892,13 +1907,13 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         sqrshrun v0.2s, v1.2s, #3
 // CHECK-ERROR:                            ^
-// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 8]
 // CHECK-ERROR:         sqrshrun2 v0.16b, v1.8h, #17
 // CHECK-ERROR:                                  ^
-// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 16]
 // CHECK-ERROR:         sqrshrun2 v0.8h, v1.4s, #33
 // CHECK-ERROR:                                 ^
-// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 32]
 // CHECK-ERROR:         sqrshrun2 v0.4s, v1.2d, #65
 // CHECK-ERROR:                                 ^
 
@@ -1921,13 +1936,13 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         sqshrn v0.2s, v1.2s, #3
 // CHECK-ERROR:                          ^
-// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 8]
 // CHECK-ERROR:         sqshrn2 v0.16b, v1.8h, #17
 // CHECK-ERROR:                                ^
-// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 16]
 // CHECK-ERROR:         sqshrn2 v0.8h, v1.4s, #33
 // CHECK-ERROR:                               ^
-// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 32]
 // CHECK-ERROR:         sqshrn2 v0.4s, v1.2d, #65
 // CHECK-ERROR:                               ^
 
@@ -1950,13 +1965,13 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         uqshrn v0.2s, v1.2s, #3
 // CHECK-ERROR:                          ^
-// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 8]
 // CHECK-ERROR:         uqshrn2 v0.16b, v1.8h, #17
 // CHECK-ERROR:                                ^
-// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 16]
 // CHECK-ERROR:         uqshrn2 v0.8h, v1.4s, #33
 // CHECK-ERROR:                               ^
-// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 32]
 // CHECK-ERROR:         uqshrn2 v0.4s, v1.2d, #65
 // CHECK-ERROR:                               ^
 
@@ -1979,13 +1994,13 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         sqrshrn v0.2s, v1.2s, #3
 // CHECK-ERROR:                           ^
-// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 8]
 // CHECK-ERROR:         sqrshrn2 v0.16b, v1.8h, #17
 // CHECK-ERROR:                                 ^
-// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 16]
 // CHECK-ERROR:         sqrshrn2 v0.8h, v1.4s, #33
 // CHECK-ERROR:                                ^
-// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 32]
 // CHECK-ERROR:         sqrshrn2 v0.4s, v1.2d, #65
 // CHECK-ERROR:                                ^
 
@@ -2008,13 +2023,13 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         uqrshrn v0.2s, v1.2s, #3
 // CHECK-ERROR:                           ^
-// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 8]
 // CHECK-ERROR:         uqrshrn2 v0.16b, v1.8h, #17
 // CHECK-ERROR:                                 ^
-// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 16]
 // CHECK-ERROR:         uqrshrn2 v0.8h, v1.4s, #33
 // CHECK-ERROR:                                ^
-// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 32]
 // CHECK-ERROR:         uqrshrn2 v0.4s, v1.2d, #65
 // CHECK-ERROR:                                ^
 
@@ -2037,13 +2052,13 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         scvtf v0.2d, v1.2s, #3
 // CHECK-ERROR:                         ^
-// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 32]
 // CHECK-ERROR:         ucvtf v0.2s, v1.2s, #33
 // CHECK-ERROR:                             ^
-// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 32]
 // CHECK-ERROR:         ucvtf v0.4s, v1.4s, #33
 // CHECK-ERROR:                             ^
-// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 64]
 // CHECK-ERROR:         ucvtf v0.2d, v1.2d, #65
 // CHECK-ERROR:                             ^
 
@@ -2066,13 +2081,13 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fcvtzs v0.2d, v1.2s, #3
 // CHECK-ERROR:                          ^
-// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 32]
 // CHECK-ERROR:         fcvtzu v0.2s, v1.2s, #33
 // CHECK-ERROR:                              ^
-// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 32]
 // CHECK-ERROR:         fcvtzu v0.4s, v1.4s, #33
 // CHECK-ERROR:                              ^
-// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 64]
 // CHECK-ERROR:         fcvtzu v0.2d, v1.2d, #65
 // CHECK-ERROR:                              ^
 
@@ -2616,9 +2631,11 @@
         pmull2 v0.4s, v1.8h v2.8h
         pmull2 v0.2d, v1.4s, v2.4s
 
-// CHECK-ERROR: error: expected comma before next operand
+
+// CHECK-ERROR: error: unexpected token in argument list
 // CHECK-ERROR:        pmull2 v0.4s, v1.8h v2.8h
 // CHECK-ERROR:                            ^
+
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        pmull2 v0.2d, v1.4s, v2.4s
 // CHECK-ERROR:                  ^
@@ -2941,19 +2958,19 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        mla v0.2d, v1.2d, v16.d[1]
 // CHECK-ERROR:               ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        mla v0.2s, v1.2s, v2.s[4]
 // CHECK-ERROR:                               ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        mla v0.4s, v1.4s, v2.s[4]
 // CHECK-ERROR:                               ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        mla v0.2h, v1.2h, v2.h[1]
 // CHECK-ERROR:            ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        mla v0.4h, v1.4h, v2.h[8]
 // CHECK-ERROR:                               ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        mla v0.8h, v1.8h, v2.h[8]
 // CHECK-ERROR:                               ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -2975,19 +2992,19 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        mls v0.2d, v1.2d, v16.d[1]
 // CHECK-ERROR:               ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        mls v0.2s, v1.2s, v2.s[4]
 // CHECK-ERROR:                               ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        mls v0.4s, v1.4s, v2.s[4]
 // CHECK-ERROR:                               ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        mls v0.2h, v1.2h, v2.h[1]
 // CHECK-ERROR:            ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        mls v0.4h, v1.4h, v2.h[8]
 // CHECK-ERROR:                               ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        mls v0.8h, v1.8h, v2.h[8]
 // CHECK-ERROR:                               ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -3012,22 +3029,22 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        fmla v0.8h, v1.8h, v2.h[2]
 // CHECK-ERROR:                ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        fmla v0.2s, v1.2s, v2.s[4]
 // CHECK-ERROR:                                ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        fmla v0.2s, v1.2s, v22.s[4]
 // CHECK-ERROR:                                 ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        fmla v3.4s, v8.4s, v2.s[4]
 // CHECK-ERROR:                                ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        fmla v3.4s, v8.4s, v22.s[4]
 // CHECK-ERROR:                                 ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        fmla v0.2d, v1.2d, v2.d[2]
 // CHECK-ERROR:                                ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        fmla v0.2d, v1.2d, v22.d[2]
 // CHECK-ERROR:                                 ^
 
@@ -3046,29 +3063,29 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        fmls v0.8h, v1.8h, v2.h[2]
 // CHECK-ERROR:                ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        fmls v0.2s, v1.2s, v2.s[4]
 // CHECK-ERROR:                                ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        fmls v0.2s, v1.2s, v22.s[4]
 // CHECK-ERROR:                                 ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        fmls v3.4s, v8.4s, v2.s[4]
 // CHECK-ERROR:                                ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        fmls v3.4s, v8.4s, v22.s[4]
 // CHECK-ERROR:                                 ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        fmls v0.2d, v1.2d, v2.d[2]
 // CHECK-ERROR:                                ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        fmls v0.2d, v1.2d, v22.d[2]
 // CHECK-ERROR:                                 ^
 
       smlal v0.4h, v1.4h, v2.h[2]
       smlal v0.4s, v1.4h, v2.h[8]
       smlal v0.4s, v1.4h, v16.h[2]
-      smlal v0.2s, v1.2s, v2.s[4]
+      smlal v0.2s, v1.2s, v2.s[1]
       smlal v0.2d, v1.2s, v2.s[4]
       smlal v0.2d, v1.2s, v22.s[4]
       smlal2 v0.4h, v1.8h, v1.h[2]
@@ -3081,25 +3098,25 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        smlal v0.4h, v1.4h, v2.h[2]
 // CHECK-ERROR:              ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        smlal v0.4s, v1.4h, v2.h[8]
 // CHECK-ERROR:                                 ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        smlal v0.4s, v1.4h, v16.h[2]
 // CHECK-ERROR:                            ^
-// CHECK-ERROR: error: lane number incompatible with layout
-// CHECK-ERROR:        smlal v0.2s, v1.2s, v2.s[4]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smlal v0.2s, v1.2s, v2.s[1]
 // CHECK-ERROR:                                 ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        smlal v0.2d, v1.2s, v2.s[4]
 // CHECK-ERROR:                                 ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        smlal v0.2d, v1.2s, v22.s[4]
 // CHECK-ERROR:                                  ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        smlal2 v0.4h, v1.8h, v1.h[2]
 // CHECK-ERROR:               ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        smlal2 v0.4s, v1.8h, v1.h[8]
 // CHECK-ERROR:                                  ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -3108,17 +3125,17 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        smlal2 v0.2s, v1.4s, v1.s[2]
 // CHECK-ERROR:               ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        smlal2 v0.2d, v1.4s, v1.s[4]
 // CHECK-ERROR:                                  ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        smlal2 v0.2d, v1.4s, v22.s[4]
 // CHECK-ERROR:                                   ^
 
       smlsl v0.4h, v1.4h, v2.h[2]
       smlsl v0.4s, v1.4h, v2.h[8]
       smlsl v0.4s, v1.4h, v16.h[2]
-      smlsl v0.2s, v1.2s, v2.s[4]
+      smlsl v0.2s, v1.2s, v2.s[1]
       smlsl v0.2d, v1.2s, v2.s[4]
       smlsl v0.2d, v1.2s, v22.s[4]
       smlsl2 v0.4h, v1.8h, v1.h[2]
@@ -3131,25 +3148,25 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        smlsl v0.4h, v1.4h, v2.h[2]
 // CHECK-ERROR:              ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        smlsl v0.4s, v1.4h, v2.h[8]
 // CHECK-ERROR:                                 ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        smlsl v0.4s, v1.4h, v16.h[2]
 // CHECK-ERROR:                            ^
-// CHECK-ERROR: error: lane number incompatible with layout
-// CHECK-ERROR:        smlsl v0.2s, v1.2s, v2.s[4]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        smlsl v0.2s, v1.2s, v2.s[1]
 // CHECK-ERROR:                                 ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        smlsl v0.2d, v1.2s, v2.s[4]
 // CHECK-ERROR:                                 ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        smlsl v0.2d, v1.2s, v22.s[4]
 // CHECK-ERROR:                                  ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        smlsl2 v0.4h, v1.8h, v1.h[2]
 // CHECK-ERROR:               ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        smlsl2 v0.4s, v1.8h, v1.h[8]
 // CHECK-ERROR:                                  ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -3158,17 +3175,17 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        smlsl2 v0.2s, v1.4s, v1.s[2]
 // CHECK-ERROR:               ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        smlsl2 v0.2d, v1.4s, v1.s[4]
 // CHECK-ERROR:                                  ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        smlsl2 v0.2d, v1.4s, v22.s[4]
 // CHECK-ERROR:                                   ^
 
       umlal v0.4h, v1.4h, v2.h[2]
       umlal v0.4s, v1.4h, v2.h[8]
       umlal v0.4s, v1.4h, v16.h[2]
-      umlal v0.2s, v1.2s, v2.s[4]
+      umlal v0.2s, v1.2s, v2.s[1]
       umlal v0.2d, v1.2s, v2.s[4]
       umlal v0.2d, v1.2s, v22.s[4]
       umlal2 v0.4h, v1.8h, v1.h[2]
@@ -3181,25 +3198,25 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        umlal v0.4h, v1.4h, v2.h[2]
 // CHECK-ERROR:              ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        umlal v0.4s, v1.4h, v2.h[8]
 // CHECK-ERROR:                                 ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        umlal v0.4s, v1.4h, v16.h[2]
 // CHECK-ERROR:                            ^
-// CHECK-ERROR: error: lane number incompatible with layout
-// CHECK-ERROR:        umlal v0.2s, v1.2s, v2.s[4]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umlal v0.2s, v1.2s, v2.s[1]
 // CHECK-ERROR:                                 ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        umlal v0.2d, v1.2s, v2.s[4]
 // CHECK-ERROR:                                 ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        umlal v0.2d, v1.2s, v22.s[4]
 // CHECK-ERROR:                                  ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        umlal2 v0.4h, v1.8h, v1.h[2]
 // CHECK-ERROR:               ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        umlal2 v0.4s, v1.8h, v1.h[8]
 // CHECK-ERROR:                                  ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -3208,17 +3225,17 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        umlal2 v0.2s, v1.4s, v1.s[2]
 // CHECK-ERROR:               ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        umlal2 v0.2d, v1.4s, v1.s[4]
 // CHECK-ERROR:                                  ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        umlal2 v0.2d, v1.4s, v22.s[4]
 // CHECK-ERROR:                                   ^
 
       umlsl v0.4h, v1.4h, v2.h[2]
       umlsl v0.4s, v1.4h, v2.h[8]
       umlsl v0.4s, v1.4h, v16.h[2]
-      umlsl v0.2s, v1.2s, v2.s[4]
+      umlsl v0.2s, v1.2s, v2.s[3]
       umlsl v0.2d, v1.2s, v2.s[4]
       umlsl v0.2d, v1.2s, v22.s[4]
       umlsl2 v0.4h, v1.8h, v1.h[2]
@@ -3231,25 +3248,25 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        umlsl v0.4h, v1.4h, v2.h[2]
 // CHECK-ERROR:              ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        umlsl v0.4s, v1.4h, v2.h[8]
 // CHECK-ERROR:                                 ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        umlsl v0.4s, v1.4h, v16.h[2]
 // CHECK-ERROR:                            ^
-// CHECK-ERROR: error: lane number incompatible with layout
-// CHECK-ERROR:        umlsl v0.2s, v1.2s, v2.s[4]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        umlsl v0.2s, v1.2s, v2.s[3]
 // CHECK-ERROR:                                 ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        umlsl v0.2d, v1.2s, v2.s[4]
 // CHECK-ERROR:                                 ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        umlsl v0.2d, v1.2s, v22.s[4]
 // CHECK-ERROR:                                  ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        umlsl2 v0.4h, v1.8h, v1.h[2]
 // CHECK-ERROR:               ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        umlsl2 v0.4s, v1.8h, v1.h[8]
 // CHECK-ERROR:                                  ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -3258,17 +3275,17 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        umlsl2 v0.2s, v1.4s, v1.s[2]
 // CHECK-ERROR:               ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        umlsl2 v0.2d, v1.4s, v1.s[4]
 // CHECK-ERROR:                                  ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        umlsl2 v0.2d, v1.4s, v22.s[4]
 // CHECK-ERROR:                                   ^
 
       sqdmlal v0.4h, v1.4h, v2.h[2]
       sqdmlal v0.4s, v1.4h, v2.h[8]
       sqdmlal v0.4s, v1.4h, v16.h[2]
-      sqdmlal v0.2s, v1.2s, v2.s[4]
+      sqdmlal v0.2s, v1.2s, v2.s[3]
       sqdmlal v0.2d, v1.2s, v2.s[4]
       sqdmlal v0.2d, v1.2s, v22.s[4]
       sqdmlal2 v0.4h, v1.8h, v1.h[2]
@@ -3281,25 +3298,25 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        sqdmlal v0.4h, v1.4h, v2.h[2]
 // CHECK-ERROR:                ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqdmlal v0.4s, v1.4h, v2.h[8]
 // CHECK-ERROR:                                   ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        sqdmlal v0.4s, v1.4h, v16.h[2]
 // CHECK-ERROR:                              ^
-// CHECK-ERROR: error: lane number incompatible with layout
-// CHECK-ERROR:        sqdmlal v0.2s, v1.2s, v2.s[4]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmlal v0.2s, v1.2s, v2.s[3]
 // CHECK-ERROR:                                   ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqdmlal v0.2d, v1.2s, v2.s[4]
 // CHECK-ERROR:                                   ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqdmlal v0.2d, v1.2s, v22.s[4]
 // CHECK-ERROR:                                    ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        sqdmlal2 v0.4h, v1.8h, v1.h[2]
 // CHECK-ERROR:                 ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqdmlal2 v0.4s, v1.8h, v1.h[8]
 // CHECK-ERROR:                                    ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -3308,17 +3325,17 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        sqdmlal2 v0.2s, v1.4s, v1.s[2]
 // CHECK-ERROR:                 ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqdmlal2 v0.2d, v1.4s, v1.s[4]
 // CHECK-ERROR:                                    ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqdmlal2 v0.2d, v1.4s, v22.s[4]
 // CHECK-ERROR:                                     ^
 
       sqdmlsl v0.4h, v1.4h, v2.h[2]
       sqdmlsl v0.4s, v1.4h, v2.h[8]
       sqdmlsl v0.4s, v1.4h, v16.h[2]
-      sqdmlsl v0.2s, v1.2s, v2.s[4]
+      sqdmlsl v0.2s, v1.2s, v2.s[3]
       sqdmlsl v0.2d, v1.2s, v2.s[4]
       sqdmlsl v0.2d, v1.2s, v22.s[4]
       sqdmlsl2 v0.4h, v1.8h, v1.h[2]
@@ -3331,25 +3348,25 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        sqdmlsl v0.4h, v1.4h, v2.h[2]
 // CHECK-ERROR:                ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqdmlsl v0.4s, v1.4h, v2.h[8]
 // CHECK-ERROR:                                   ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        sqdmlsl v0.4s, v1.4h, v16.h[2]
 // CHECK-ERROR:                              ^
-// CHECK-ERROR: error: lane number incompatible with layout
-// CHECK-ERROR:        sqdmlsl v0.2s, v1.2s, v2.s[4]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        sqdmlsl v0.2s, v1.2s, v2.s[3]
 // CHECK-ERROR:                                   ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqdmlsl v0.2d, v1.2s, v2.s[4]
 // CHECK-ERROR:                                   ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqdmlsl v0.2d, v1.2s, v22.s[4]
 // CHECK-ERROR:                                    ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        sqdmlsl2 v0.4h, v1.8h, v1.h[2]
 // CHECK-ERROR:                 ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqdmlsl2 v0.4s, v1.8h, v1.h[8]
 // CHECK-ERROR:                                    ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -3358,10 +3375,10 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        sqdmlsl2 v0.2s, v1.4s, v1.s[2]
 // CHECK-ERROR:                 ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqdmlsl2 v0.2d, v1.4s, v1.s[4]
 // CHECK-ERROR:                                    ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqdmlsl2 v0.2d, v1.4s, v22.s[4]
 // CHECK-ERROR:                                     ^
 
@@ -3375,28 +3392,28 @@
       mul v0.4s, v1.4s, v22.s[4]
       mul v0.2d, v1.2d, v2.d[1]
 
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        mul v0.4h, v1.4h, v2.h[8]
 // CHECK-ERROR:                               ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        mul v0.4h, v1.4h, v16.h[8]
 // CHECK-ERROR:                                ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        mul v0.8h, v1.8h, v2.h[8]
 // CHECK-ERROR:                               ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: invalid operand for instruction
 // CHECK-ERROR:        mul v0.8h, v1.8h, v16.h[8]
 // CHECK-ERROR:                                ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        mul v0.2s, v1.2s, v2.s[4]
 // CHECK-ERROR:                               ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        mul v0.2s, v1.2s, v22.s[4]
 // CHECK-ERROR:                                ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        mul v0.4s, v1.4s, v2.s[4]
 // CHECK-ERROR:                               ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        mul v0.4s, v1.4s, v22.s[4]
 // CHECK-ERROR:                                ^
 
@@ -3414,22 +3431,22 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        fmul v0.4h, v1.4h, v2.h[4]
 // CHECK-ERROR:                ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        fmul v0.2s, v1.2s, v2.s[4]
 // CHECK-ERROR:                                ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        fmul v0.2s, v1.2s, v22.s[4]
 // CHECK-ERROR:                                 ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        fmul v0.4s, v1.4s, v2.s[4]
 // CHECK-ERROR:                                ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        fmul v0.4s, v1.4s, v22.s[4]
 // CHECK-ERROR:                                 ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        fmul v0.2d, v1.2d, v2.d[2]
 // CHECK-ERROR:                                ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        fmul v0.2d, v1.2d, v22.d[2]
 // CHECK-ERROR:                                 ^
 
@@ -3444,22 +3461,22 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        fmulx v0.4h, v1.4h, v2.h[4]
 // CHECK-ERROR:                 ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        fmulx v0.2s, v1.2s, v2.s[4]
 // CHECK-ERROR:                                 ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        fmulx v0.2s, v1.2s, v22.s[4]
 // CHECK-ERROR:                                  ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        fmulx v0.4s, v1.4s, v2.s[4]
 // CHECK-ERROR:                                 ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        fmulx v0.4s, v1.4s, v22.s[4]
 // CHECK-ERROR:                                  ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        fmulx v0.2d, v1.2d, v2.d[2]
 // CHECK-ERROR:                                 ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        fmulx v0.2d, v1.2d, v22.d[2]
 // CHECK-ERROR:                                  ^
 
@@ -3479,7 +3496,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        smull v0.4h, v1.4h, v2.h[2]
 // CHECK-ERROR:              ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        smull v0.4s, v1.4h, v2.h[8]
 // CHECK-ERROR:                                 ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -3488,16 +3505,16 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        smull v0.2s, v1.2s, v2.s[2]
 // CHECK-ERROR:              ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        smull v0.2d, v1.2s, v2.s[4]
 // CHECK-ERROR:                                 ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        smull v0.2d, v1.2s, v22.s[4]
 // CHECK-ERROR:                                  ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        smull2 v0.4h, v1.8h, v2.h[2]
 // CHECK-ERROR:               ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        smull2 v0.4s, v1.8h, v2.h[8]
 // CHECK-ERROR:                                  ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -3506,10 +3523,10 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        smull2 v0.2s, v1.4s, v2.s[2]
 // CHECK-ERROR:               ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        smull2 v0.2d, v1.4s, v2.s[4]
 // CHECK-ERROR:                                  ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        smull2 v0.2d, v1.4s, v22.s[4]
 // CHECK-ERROR:                                   ^
 
@@ -3529,7 +3546,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        umull v0.4h, v1.4h, v2.h[2]
 // CHECK-ERROR:              ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        umull v0.4s, v1.4h, v2.h[8]
 // CHECK-ERROR:                                 ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -3538,16 +3555,16 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        umull v0.2s, v1.2s, v2.s[2]
 // CHECK-ERROR:              ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        umull v0.2d, v1.2s, v2.s[4]
 // CHECK-ERROR:                                 ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        umull v0.2d, v1.2s, v22.s[4]
 // CHECK-ERROR:                                  ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        umull2 v0.4h, v1.8h, v2.h[2]
 // CHECK-ERROR:               ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        umull2 v0.4s, v1.8h, v2.h[8]
 // CHECK-ERROR:                                  ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -3556,10 +3573,10 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        umull2 v0.2s, v1.4s, v2.s[2]
 // CHECK-ERROR:               ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        umull2 v0.2d, v1.4s, v2.s[4]
 // CHECK-ERROR:                                  ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        umull2 v0.2d, v1.4s, v22.s[4]
 // CHECK-ERROR:                                   ^
 
@@ -3579,7 +3596,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        sqdmull v0.4h, v1.4h, v2.h[2]
 // CHECK-ERROR:                ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqdmull v0.4s, v1.4h, v2.h[8]
 // CHECK-ERROR:                                   ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -3588,16 +3605,16 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        sqdmull v0.2s, v1.2s, v2.s[2]
 // CHECK-ERROR:                ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqdmull v0.2d, v1.2s, v2.s[4]
 // CHECK-ERROR:                                   ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqdmull v0.2d, v1.2s, v22.s[4]
 // CHECK-ERROR:                                    ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        sqdmull2 v0.4h, v1.8h, v2.h[2]
 // CHECK-ERROR:                 ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqdmull2 v0.4s, v1.8h, v2.h[8]
 // CHECK-ERROR:                                    ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -3606,10 +3623,10 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        sqdmull2 v0.2s, v1.4s, v2.s[2]
 // CHECK-ERROR:                 ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqdmull2 v0.2d, v1.4s, v2.s[4]
 // CHECK-ERROR:                                    ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqdmull2 v0.2d, v1.4s, v22.s[4]
 // CHECK-ERROR:                                     ^
 
@@ -3623,28 +3640,28 @@
       sqdmulh v0.4s, v1.4s, v22.s[4]
       sqdmulh v0.2d, v1.2d, v22.d[1]
 
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqdmulh v0.4h, v1.4h, v2.h[8]
 // CHECK-ERROR:                                   ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        sqdmulh v0.4h, v1.4h, v16.h[2]
 // CHECK-ERROR:                              ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqdmulh v0.8h, v1.8h, v2.h[8]
 // CHECK-ERROR:                                   ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        sqdmulh v0.8h, v1.8h, v16.h[2]
 // CHECK-ERROR:                                  ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqdmulh v0.2s, v1.2s, v2.s[4]
 // CHECK-ERROR:                                   ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqdmulh v0.2s, v1.2s, v22.s[4]
 // CHECK-ERROR:                                    ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqdmulh v0.4s, v1.4s, v2.s[4]
 // CHECK-ERROR:                                   ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqdmulh v0.4s, v1.4s, v22.s[4]
 // CHECK-ERROR:                                    ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -3661,28 +3678,28 @@
       sqrdmulh v0.4s, v1.4s, v22.s[4]
       sqrdmulh v0.2d, v1.2d, v22.d[1]
 
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqrdmulh v0.4h, v1.4h, v2.h[8]
 // CHECK-ERROR:                                    ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        sqrdmulh v0.4h, v1.4h, v16.h[2]
 // CHECK-ERROR:                               ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqrdmulh v0.8h, v1.8h, v2.h[8]
 // CHECK-ERROR:                                    ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        sqrdmulh v0.8h, v1.8h, v16.h[2]
 // CHECK-ERROR:                                   ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqrdmulh v0.2s, v1.2s, v2.s[4]
 // CHECK-ERROR:                                    ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqrdmulh v0.2s, v1.2s, v22.s[4]
 // CHECK-ERROR:                                     ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqrdmulh v0.4s, v1.4s, v2.s[4]
 // CHECK-ERROR:                                    ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:        sqrdmulh v0.4s, v1.4s, v22.s[4]
 // CHECK-ERROR:                                     ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -3900,13 +3917,13 @@
          ld1 {v4}, [x0]
          ld1 {v32.16b}, [x0]
          ld1 {v15.8h}, [x32]
-// CHECK-ERROR: error: expected vector type register
+// CHECK-ERROR: error: vector register expected
 // CHECK-ERROR:        ld1 {x3}, [x2]
 // CHECK-ERROR:             ^
-// CHECK-ERROR: error: expected vector type register
+// CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        ld1 {v4}, [x0]
 // CHECK-ERROR:             ^
-// CHECK-ERROR: error: expected vector type register
+// CHECK-ERROR: error: vector register expected
 // CHECK-ERROR:        ld1 {v32.16b}, [x0]
 // CHECK-ERROR:             ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -3920,13 +3937,13 @@
          ld1 {v1.8h-v1.8h}, [x0]
          ld1 {v15.8h-v17.4h}, [x15]
          ld1 {v0.8b-v2.8b, [x0]
-// CHECK-ERROR: error: invalid space between two vectors
+// CHECK-ERROR: error: registers must be sequential
 // CHECK-ERROR:        ld1 {v0.16b, v2.16b}, [x0]
 // CHECK-ERROR:                     ^
 // CHECK-ERROR: error: invalid number of vectors
 // CHECK-ERROR:        ld1 {v0.8h, v1.8h, v2.8h, v3.8h, v4.8h}, [x0]
 // CHECK-ERROR:                                         ^
-// CHECK-ERROR: error: '{' expected
+// CHECK-ERROR: error: unexpected token in argument list
 // CHECK-ERROR:        ld1 v0.8b, v1.8b}, [x0]
 // CHECK-ERROR:            ^
 // CHECK-ERROR: error: invalid number of vectors
@@ -3935,7 +3952,7 @@
 // CHECK-ERROR: error: invalid number of vectors
 // CHECK-ERROR:        ld1 {v1.8h-v1.8h}, [x0]
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: expected the same vector layout
+// CHECK-ERROR: error: mismatched register size suffix
 // CHECK-ERROR:        ld1 {v15.8h-v17.4h}, [x15]
 // CHECK-ERROR:                        ^
 // CHECK-ERROR: error: '}' expected
@@ -3947,16 +3964,15 @@
          ld2 {v15.4h, v16.4h, v17.4h}, [x32]
          ld2 {v15.8h-v16.4h}, [x15]
          ld2 {v0.2d-v2.2d}, [x0]
-// CHECK-ERROR: error: invalid space between two vectors
+// CHECK-ERROR: error: mismatched register size suffix
 // CHECK-ERROR:        ld2 {v15.8h, v16.4h}, [x15]
 // CHECK-ERROR:                     ^
-// CHECK-ERROR: error: invalid space between two vectors
+// CHECK-ERROR: error: registers must be sequential
 // CHECK-ERROR:        ld2 {v0.8b, v2.8b}, [x0]
 // CHECK-ERROR:                    ^
-// CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        ld2 {v15.4h, v16.4h, v17.4h}, [x32]
 // CHECK-ERROR:            ^
-// CHECK-ERROR: error: expected the same vector layout
+// CHECK-ERROR: error: mismatched register size suffix
 // CHECK-ERROR:        ld2 {v15.8h-v16.4h}, [x15]
 // CHECK-ERROR:                        ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -3968,16 +3984,16 @@
          ld3 {v0.8b, v2.8b, v3.8b}, [x0]
          ld3 {v15.8h-v17.4h}, [x15]
          ld3 {v31.4s-v2.4s}, [sp]
-// CHECK-ERROR: error: invalid space between two vectors
+// CHECK-ERROR: error: mismatched register size suffix
 // CHECK-ERROR:        ld3 {v15.8h, v16.8h, v17.4h}, [x15]
 // CHECK-ERROR:                             ^
-// CHECK-ERROR: error: expected vector type register
+// CHECK-ERROR: error: mismatched register size suffix
 // CHECK-ERROR:        ld3 {v0.8b, v1,8b, v2.8b, v3.8b}, [x0]
 // CHECK-ERROR:                    ^
-// CHECK-ERROR: error: invalid space between two vectors
+// CHECK-ERROR: error: registers must be sequential
 // CHECK-ERROR:        ld3 {v0.8b, v2.8b, v3.8b}, [x0]
 // CHECK-ERROR:                    ^
-// CHECK-ERROR: error: expected the same vector layout
+// CHECK-ERROR: error: mismatched register size suffix
 // CHECK-ERROR:        ld3 {v15.8h-v17.4h}, [x15]
 // CHECK-ERROR:                        ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -3989,16 +4005,16 @@
          ld4 {v15.4h, v16.4h, v17.4h, v18.4h, v19.4h}, [x31]
          ld4 {v15.8h-v18.4h}, [x15]
          ld4 {v31.2s-v1.2s}, [x31]
-// CHECK-ERROR: error: invalid space between two vectors
+// CHECK-ERROR: error: mismatched register size suffix
 // CHECK-ERROR:        ld4 {v15.8h, v16.8h, v17.4h, v18.8h}, [x15]
 // CHECK-ERROR:                             ^
-// CHECK-ERROR: error: invalid space between two vectors
+// CHECK-ERROR: error: registers must be sequential
 // CHECK-ERROR:        ld4 {v0.8b, v2.8b, v3.8b, v4.8b}, [x0]
 // CHECK-ERROR:                    ^
 // CHECK-ERROR: error: invalid number of vectors
 // CHECK-ERROR:        ld4 {v15.4h, v16.4h, v17.4h, v18.4h, v19.4h}, [x31]
 // CHECK-ERROR:                                             ^
-// CHECK-ERROR: error: expected the same vector layout
+// CHECK-ERROR: error: mismatched register size suffix
 // CHECK-ERROR:        ld4 {v15.8h-v18.4h}, [x15]
 // CHECK-ERROR:                        ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -4009,13 +4025,13 @@
          st1 {v4}, [x0]
          st1 {v32.16b}, [x0]
          st1 {v15.8h}, [x32]
-// CHECK-ERROR: error: expected vector type register
+// CHECK-ERROR: error: vector register expected
 // CHECK-ERROR:        st1 {x3}, [x2]
 // CHECK-ERROR:             ^
-// CHECK-ERROR: error: expected vector type register
+// CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        st1 {v4}, [x0]
 // CHECK-ERROR:             ^
-// CHECK-ERROR: error: expected vector type register
+// CHECK-ERROR: error: vector register expected
 // CHECK-ERROR:        st1 {v32.16b}, [x0]
 // CHECK-ERROR:             ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -4029,13 +4045,13 @@
          st1 {v1.8h-v1.8h}, [x0]
          st1 {v15.8h-v17.4h}, [x15]
          st1 {v0.8b-v2.8b, [x0]
-// CHECK-ERROR: error: invalid space between two vectors
+// CHECK-ERROR: error: registers must be sequential
 // CHECK-ERROR:        st1 {v0.16b, v2.16b}, [x0]
 // CHECK-ERROR:                     ^
 // CHECK-ERROR: error: invalid number of vectors
 // CHECK-ERROR:        st1 {v0.8h, v1.8h, v2.8h, v3.8h, v4.8h}, [x0]
 // CHECK-ERROR:                                         ^
-// CHECK-ERROR: error: '{' expected
+// CHECK-ERROR: error: unexpected token in argument list
 // CHECK-ERROR:        st1 v0.8b, v1.8b}, [x0]
 // CHECK-ERROR:            ^
 // CHECK-ERROR: error: invalid number of vectors
@@ -4044,7 +4060,7 @@
 // CHECK-ERROR: error: invalid number of vectors
 // CHECK-ERROR:        st1 {v1.8h-v1.8h}, [x0]
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: expected the same vector layout
+// CHECK-ERROR: error: mismatched register size suffix
 // CHECK-ERROR:        st1 {v15.8h-v17.4h}, [x15]
 // CHECK-ERROR:                        ^
 // CHECK-ERROR: error: '}' expected
@@ -4056,16 +4072,16 @@
          st2 {v15.4h, v16.4h, v17.4h}, [x30]
          st2 {v15.8h-v16.4h}, [x15]
          st2 {v0.2d-v2.2d}, [x0]
-// CHECK-ERROR: error: invalid space between two vectors
+// CHECK-ERROR: error: mismatched register size suffix
 // CHECK-ERROR:        st2 {v15.8h, v16.4h}, [x15]
 // CHECK-ERROR:                     ^
-// CHECK-ERROR: error: invalid space between two vectors
+// CHECK-ERROR: error: registers must be sequential
 // CHECK-ERROR:        st2 {v0.8b, v2.8b}, [x0]
 // CHECK-ERROR:                    ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        st2 {v15.4h, v16.4h, v17.4h}, [x30]
 // CHECK-ERROR:            ^
-// CHECK-ERROR: error: expected the same vector layout
+// CHECK-ERROR: error: mismatched register size suffix
 // CHECK-ERROR:        st2 {v15.8h-v16.4h}, [x15]
 // CHECK-ERROR:                        ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -4077,16 +4093,16 @@
          st3 {v0.8b, v2.8b, v3.8b}, [x0]
          st3 {v15.8h-v17.4h}, [x15]
          st3 {v31.4s-v2.4s}, [sp]
-// CHECK-ERROR: error: invalid space between two vectors
+// CHECK-ERROR: error: mismatched register size suffix
 // CHECK-ERROR:        st3 {v15.8h, v16.8h, v17.4h}, [x15]
 // CHECK-ERROR:                             ^
-// CHECK-ERROR: error: expected vector type register
+// CHECK-ERROR: error: mismatched register size suffix
 // CHECK-ERROR:        st3 {v0.8b, v1,8b, v2.8b, v3.8b}, [x0]
 // CHECK-ERROR:                    ^
-// CHECK-ERROR: error: invalid space between two vectors
+// CHECK-ERROR: error: registers must be sequential
 // CHECK-ERROR:        st3 {v0.8b, v2.8b, v3.8b}, [x0]
 // CHECK-ERROR:                    ^
-// CHECK-ERROR: error: expected the same vector layout
+// CHECK-ERROR: error: mismatched register size suffix
 // CHECK-ERROR:        st3 {v15.8h-v17.4h}, [x15]
 // CHECK-ERROR:                        ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -4098,16 +4114,16 @@
          st4 {v15.4h, v16.4h, v17.4h, v18.4h, v19.4h}, [x31]
          st4 {v15.8h-v18.4h}, [x15]
          st4 {v31.2s-v1.2s}, [x31]
-// CHECK-ERROR: error: invalid space between two vectors
+// CHECK-ERROR: error: mismatched register size suffix
 // CHECK-ERROR:        st4 {v15.8h, v16.8h, v17.4h, v18.8h}, [x15]
 // CHECK-ERROR:                             ^
-// CHECK-ERROR: error: invalid space between two vectors
+// CHECK-ERROR: error: registers must be sequential
 // CHECK-ERROR:        st4 {v0.8b, v2.8b, v3.8b, v4.8b}, [x0]
 // CHECK-ERROR:                    ^
 // CHECK-ERROR: error: invalid number of vectors
 // CHECK-ERROR:        st4 {v15.4h, v16.4h, v17.4h, v18.4h, v19.4h}, [x31]
 // CHECK-ERROR:                                             ^
-// CHECK-ERROR: error: expected the same vector layout
+// CHECK-ERROR: error: mismatched register size suffix
 // CHECK-ERROR:        st4 {v15.8h-v18.4h}, [x15]
 // CHECK-ERROR:                        ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -4124,7 +4140,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:          ld1 {v0.16b}, [x0], #8
 // CHECK-ERROR:                              ^
-// CHECK-ERROR:  error: expected vector type register
+// CHECK-ERROR: error: invalid vector kind qualifier
 // CHECK-ERROR:          ld1 {v0.8h, v1.16h}, [x0], x1
 // CHECK-ERROR:                      ^
 // CHECK-ERROR:  error: invalid operand for instruction
@@ -4140,7 +4156,7 @@
 // CHECK-ERROR:  error: invalid operand for instruction
 // CHECK-ERROR:          ld3 {v5.2s, v6.2s, v7.2s}, [x1], #48
 // CHECK-ERROR:                                           ^
-// CHECK-ERROR:  error: invalid space between two vectors
+// CHECK-ERROR: error: mismatched register size suffix
 // CHECK-ERROR:          ld4 {v31.2d, v0.2d, v1.2d, v2.1d}, [x3], x1
 // CHECK-ERROR:                                     ^
 
@@ -4150,7 +4166,7 @@
 // CHECK-ERROR:  error: invalid operand for instruction
 // CHECK-ERROR:          st1 {v0.16b}, [x0], #8
 // CHECK-ERROR:                              ^
-// CHECK-ERROR:  error: expected vector type register
+// CHECK-ERROR: error: invalid vector kind qualifier
 // CHECK-ERROR:          st1 {v0.8h, v1.16h}, [x0], x1
 // CHECK-ERROR:                      ^
 // CHECK-ERROR:  error: invalid operand for instruction
@@ -4166,7 +4182,7 @@
 // CHECK-ERROR:  error: invalid operand for instruction
 // CHECK-ERROR:          st3 {v5.2s, v6.2s, v7.2s}, [x1], #48
 // CHECK-ERROR:                                           ^
-// CHECK-ERROR:  error: invalid space between two vectors
+// CHECK-ERROR: error: mismatched register size suffix
 // CHECK-ERROR:          st4 {v31.2d, v0.2d, v1.2d, v2.1d}, [x3], x1
 // CHECK-ERROR:                                     ^
 
@@ -4178,16 +4194,16 @@
          ld2r {v31.4s, v0.2s}, [sp]
          ld3r {v0.8b, v1.8b, v2.8b, v3.8b}, [x0]
          ld4r {v31.2s, v0.2s, v1.2d, v2.2s}, [sp]
-// CHECK-ERROR: error: expected vector type register
+// CHECK-ERROR: error: vector register expected
 // CHECK-ERROR: ld1r {x1}, [x0]
 // CHECK-ERROR:       ^
-// CHECK-ERROR: error: invalid space between two vectors
+// CHECK-ERROR: error: mismatched register size suffix
 // CHECK-ERROR: ld2r {v31.4s, v0.2s}, [sp]
 // CHECK-ERROR:               ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR: ld3r {v0.8b, v1.8b, v2.8b, v3.8b}, [x0]
 // CHECK-ERROR:      ^
-// CHECK-ERROR: error: invalid space between two vectors
+// CHECK-ERROR: error: mismatched register size suffix
 // CHECK-ERROR: ld4r {v31.2s, v0.2s, v1.2d, v2.2s}, [sp]
 // CHECK-ERROR:                      ^
 
@@ -4199,16 +4215,16 @@
          ld2 {v15.h, v16.h}[8], [x15]
          ld3 {v31.s, v0.s, v1.s}[-1], [sp]
          ld4 {v0.d, v1.d, v2.d, v3.d}[2], [x0]
-// CHECK-ERROR:: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR: ld1 {v0.b}[16], [x0]
 // CHECK-ERROR:            ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR: ld2 {v15.h, v16.h}[8], [x15]
 // CHECK-ERROR:                    ^
-// CHECK-ERROR: error: expected lane number
+// CHECK-ERROR: error: vector lane must be an integer in range
 // CHECK-ERROR: ld3 {v31.s, v0.s, v1.s}[-1], [sp]
 // CHECK-ERROR:                         ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR: ld4 {v0.d, v1.d, v2.d, v3.d}[2], [x0]
 // CHECK-ERROR:                              ^
 
@@ -4216,16 +4232,16 @@
          st2 {v31.s, v0.s}[3], [8]
          st3 {v15.h, v16.h, v17.h}[-1], [x15]
          st4 {v0.d, v1.d, v2.d, v3.d}[2], [x0]
-// CHECK-ERROR:: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR: st1 {v0.d}[16], [x0]
 // CHECK-ERROR:            ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR: st2 {v31.s, v0.s}[3], [8]
 // CHECK-ERROR:                        ^
-// CHECK-ERROR: error: expected lane number
+// CHECK-ERROR: error: vector lane must be an integer in range
 // CHECK-ERROR: st3 {v15.h, v16.h, v17.h}[-1], [x15]
 // CHECK-ERROR:                           ^
-// CHECK-ERROR: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR: st4 {v0.d, v1.d, v2.d, v3.d}[2], [x0]
 // CHECK-ERROR:                              ^
 
@@ -4264,7 +4280,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR: ld2 {v15.h, v16.h}[0], [x15], #3
 // CHECK-ERROR:                               ^
-// CHECK-ERROR: error: expected the same vector layout
+// CHECK-ERROR: error: mismatched register size suffix
 // CHECK-ERROR: ld3 {v31.s, v0.s, v1.d}[0], [sp], x9
 // CHECK-ERROR:                      ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -4298,16 +4314,16 @@
          ins v20.s[1], s30
          ins v1.d[0], d7
 
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:         ins v2.b[16], w1
 // CHECK-ERROR:                  ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:         ins v7.h[8], w14
 // CHECK-ERROR:                  ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:         ins v20.s[5], w30
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:         ins v1.d[2], x7
 // CHECK-ERROR:                  ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -4334,19 +4350,19 @@
          smov x14, v6.d[1]
          smov x20, v9.d[0]
 
-// CHECK-ERROR error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR         smov w1, v0.b[16]
 // CHECK-ERROR                       ^
-// CHECK-ERROR error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR         smov w14, v6.h[8]
 // CHECK-ERROR                        ^
-// CHECK-ERROR error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR         smov x1, v0.b[16]
 // CHECK-ERROR                       ^
-// CHECK-ERROR error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR         smov x14, v6.h[8]
 // CHECK-ERROR                        ^
-// CHECK-ERROR error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR         smov x20, v9.s[5]
 // CHECK-ERROR                        ^
 // CHECK-ERROR error: invalid operand for instruction
@@ -4373,16 +4389,16 @@
          umov s20, v9.s[2]
          umov d7, v18.d[1]
 
-// CHECK-ERROR error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR         umov w1, v0.b[16]
 // CHECK-ERROR                       ^
-// CHECK-ERROR error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR         umov w14, v6.h[8]
 // CHECK-ERROR                        ^
-// CHECK-ERROR error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR         umov w20, v9.s[5]
 // CHECK-ERROR                        ^
-// CHECK-ERROR error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR         umov x7, v18.d[3]
 // CHECK-ERROR                        ^
 // CHECK-ERROR error: invalid operand for instruction
@@ -4798,7 +4814,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        sqdmlal s17, h27, s12
 // CHECK-ERROR:                          ^
-// CHECK-ERROR: error: too few operands for instruction
+// CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        sqdmlal d19, s24, d12
 // CHECK-ERROR:                          ^
 
@@ -4812,7 +4828,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        sqdmlsl s14, h12, s25
 // CHECK-ERROR:                          ^
-// CHECK-ERROR: error: too few operands for instruction
+// CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        sqdmlsl d12, s23, d13
 // CHECK-ERROR:                          ^
 
@@ -4826,7 +4842,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        sqdmull s12, h22, s12
 // CHECK-ERROR:                          ^
-// CHECK-ERROR: error: too few operands for instruction
+// CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        sqdmull d15, s22, d12
 // CHECK-ERROR:                          ^
 
@@ -4890,7 +4906,7 @@
 //----------------------------------------------------------------------
         sshr d15, d16, #99
 
-// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 64]
 // CHECK-ERROR:        sshr d15, d16, #99
 // CHECK-ERROR:                       ^
 
@@ -4906,7 +4922,7 @@
 
         ushr d10, d17, #99
 
-// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 64]
 // CHECK-ERROR:        ushr d10, d17, #99
 // CHECK-ERROR:                       ^
 
@@ -4916,7 +4932,7 @@
 
         srshr d19, d18, #99
 
-// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 64]
 // CHECK-ERROR:        srshr d19, d18, #99
 // CHECK-ERROR:                        ^
 
@@ -4926,7 +4942,7 @@
 
         urshr d20, d23, #99
 
-// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 64]
 // CHECK-ERROR:        urshr d20, d23, #99
 // CHECK-ERROR:                        ^
 
@@ -4936,7 +4952,7 @@
 
         ssra d18, d12, #99
 
-// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 64]
 // CHECK-ERROR:        ssra d18, d12, #99
 // CHECK-ERROR:                       ^
 
@@ -4946,7 +4962,7 @@
 
         usra d20, d13, #99
 
-// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 64]
 // CHECK-ERROR:        usra d20, d13, #99
 // CHECK-ERROR:                       ^
 
@@ -4956,7 +4972,7 @@
 
         srsra d15, d11, #99
 
-// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 64]
 // CHECK-ERROR:        srsra d15, d11, #99
 // CHECK-ERROR:                        ^
 
@@ -4966,7 +4982,7 @@
 
         ursra d18, d10, #99
 
-// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 64]
 // CHECK-ERROR:        ursra d18, d10, #99
 // CHECK-ERROR:                        ^
 
@@ -4976,7 +4992,7 @@
 
         shl d7, d10, #99
 
-// CHECK-ERROR: error: expected integer in range [0, 63]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [0, 63]
 // CHECK-ERROR:        shl d7, d10, #99
 // CHECK-ERROR:                     ^
 
@@ -4995,16 +5011,16 @@
         sqshl s14, s17, #99
         sqshl d15, d16, #99
 
-// CHECK-ERROR: error: expected integer in range [0, 7]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [0, 7]
 // CHECK-ERROR:        sqshl b11, b19, #99
 // CHECK-ERROR:                        ^
-// CHECK-ERROR: error: expected integer in range [0, 15]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [0, 15]
 // CHECK-ERROR:        sqshl h13, h18, #99
 // CHECK-ERROR:                        ^
-// CHECK-ERROR: error: expected integer in range [0, 31]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [0, 31]
 // CHECK-ERROR:        sqshl s14, s17, #99
 // CHECK-ERROR:                        ^
-// CHECK-ERROR: error: expected integer in range [0, 63]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [0, 63]
 // CHECK-ERROR:        sqshl d15, d16, #99
 // CHECK-ERROR:                        ^
 
@@ -5017,16 +5033,16 @@
         uqshl s14, s19, #99
         uqshl d15, d12, #99
 
-// CHECK-ERROR: error: expected integer in range [0, 7]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [0, 7]
 // CHECK-ERROR:        uqshl b18, b15, #99
 // CHECK-ERROR:                        ^
-// CHECK-ERROR: error: expected integer in range [0, 15]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [0, 15]
 // CHECK-ERROR:        uqshl h11, h18, #99
 // CHECK-ERROR:                        ^
-// CHECK-ERROR: error: expected integer in range [0, 31]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [0, 31]
 // CHECK-ERROR:        uqshl s14, s19, #99
 // CHECK-ERROR:                        ^
-// CHECK-ERROR: error: expected integer in range [0, 63]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [0, 63]
 // CHECK-ERROR:        uqshl d15, d12, #99
 // CHECK-ERROR:                        ^
 
@@ -5039,16 +5055,16 @@
         sqshlu s16, s14, #99
         sqshlu d11, d13, #99
 
-// CHECK-ERROR: error: expected integer in range [0, 7]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [0, 7]
 // CHECK-ERROR:        sqshlu  b15, b18, #99
 // CHECK-ERROR:                          ^
-// CHECK-ERROR: error: expected integer in range [0, 15]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [0, 15]
 // CHECK-ERROR:        sqshlu  h19, h17, #99
 // CHECK-ERROR:                          ^
-// CHECK-ERROR: error: expected integer in range [0, 31]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [0, 31]
 // CHECK-ERROR:        sqshlu  s16, s14, #99
 // CHECK-ERROR:                          ^
-// CHECK-ERROR: error: expected integer in range [0, 63]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [0, 63]
 // CHECK-ERROR:        sqshlu  d11, d13, #99
 // CHECK-ERROR:                          ^
 
@@ -5058,7 +5074,7 @@
 
         sri d10, d12, #99
 
-// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 64]
 // CHECK-ERROR:        sri d10, d12, #99
 // CHECK-ERROR:                      ^
 
@@ -5068,7 +5084,7 @@
 
         sli d10, d14, #99
 
-// CHECK-ERROR: error: expected integer in range [0, 63]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [0, 63]
 // CHECK-ERROR:        sli d10, d14, #99
 // CHECK-ERROR:                      ^
 
@@ -5080,13 +5096,13 @@
         sqshrn h17, s10, #99
         sqshrn s18, d10, #99
 
-// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 8]
 // CHECK-ERROR:        sqshrn  b10, h15, #99
 // CHECK-ERROR:                          ^
-// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 16]
 // CHECK-ERROR:        sqshrn  h17, s10, #99
 // CHECK-ERROR:                          ^
-// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 32]
 // CHECK-ERROR:        sqshrn  s18, d10, #99
 // CHECK-ERROR:                          ^
         
@@ -5098,13 +5114,13 @@
         uqshrn h10, s14, #99
         uqshrn s10, d12, #99
 
-// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 8]
 // CHECK-ERROR:        uqshrn  b12, h10, #99
 // CHECK-ERROR:                          ^
-// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 16]
 // CHECK-ERROR:        uqshrn  h10, s14, #99
 // CHECK-ERROR:                          ^
-// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 32]
 // CHECK-ERROR:        uqshrn  s10, d12, #99
 // CHECK-ERROR:                          ^
         
@@ -5116,13 +5132,13 @@
         sqrshrn h15, s10, #99
         sqrshrn s15, d12, #99
 
-// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 8]
 // CHECK-ERROR:        sqrshrn b10, h13, #99
 // CHECK-ERROR:                          ^
-// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 16]
 // CHECK-ERROR:        sqrshrn h15, s10, #99
 // CHECK-ERROR:                          ^
-// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 32]
 // CHECK-ERROR:        sqrshrn s15, d12, #99
 // CHECK-ERROR:                          ^
         
@@ -5134,13 +5150,13 @@
         uqrshrn h12, s10, #99
         uqrshrn s10, d10, #99
 
-// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 8]
 // CHECK-ERROR:        uqrshrn b10, h12, #99
 // CHECK-ERROR:                          ^
-// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 16]
 // CHECK-ERROR:        uqrshrn h12, s10, #99
 // CHECK-ERROR:                          ^
-// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 32]
 // CHECK-ERROR:        uqrshrn s10, d10, #99
 // CHECK-ERROR:                          ^
 
@@ -5152,13 +5168,13 @@
         sqshrun h20, s14, #99
         sqshrun s10, d15, #99
 
-// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 8]
 // CHECK-ERROR:        sqshrun b15, h10, #99
 // CHECK-ERROR:                          ^
-// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 16]
 // CHECK-ERROR:        sqshrun h20, s14, #99
 // CHECK-ERROR:                          ^
-// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 32]
 // CHECK-ERROR:        sqshrun s10, d15, #99
 // CHECK-ERROR:                          ^
 
@@ -5170,13 +5186,13 @@
         sqrshrun h10, s13, #99
         sqrshrun s22, d16, #99
 
-// CHECK-ERROR: error: expected integer in range [1, 8]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 8]
 // CHECK-ERROR:        sqrshrun b17, h10, #99
 // CHECK-ERROR:                           ^
-// CHECK-ERROR: error: expected integer in range [1, 16]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 16]
 // CHECK-ERROR:        sqrshrun h10, s13, #99
 // CHECK-ERROR:                           ^
-// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 32]
 // CHECK-ERROR:        sqrshrun s22, d16, #99
 // CHECK-ERROR:                           ^
 
@@ -5189,13 +5205,13 @@
     scvtf d21, d12, #65
     scvtf d21, s12, #31
         
-// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 32]
 // CHECK-ERROR:        scvtf s22, s13, #0
 // CHECK-ERROR:                        ^
-// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 32]
 // CHECK-ERROR:        scvtf s22, s13, #33
 // CHECK-ERROR:                        ^
-// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 64]
 // CHECK-ERROR:        scvtf d21, d12, #65
 // CHECK-ERROR:                        ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -5210,10 +5226,10 @@
     ucvtf d21, d14, #65
     ucvtf d21, s14, #64
         
-// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 32]
 // CHECK-ERROR:        ucvtf s22, s13, #34
 // CHECK-ERROR:                        ^
-// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 64]
 // CHECK-ERROR:        ucvtf d21, d14, #65
 // CHECK-ERROR:                        ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -6262,10 +6278,10 @@
     fcvtzs d21, d12, #65
     fcvtzs s21, d12, #1
 
-// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 32]
 // CHECK-ERROR:        fcvtzs s21, s12, #0
 // CHECK-ERROR:                         ^
-// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 64]
 // CHECK-ERROR:        fcvtzs d21, d12, #65
 // CHECK-ERROR:                         ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -6280,10 +6296,10 @@
     fcvtzu d21, d12, #0
     fcvtzu s21, d12, #1
 
-// CHECK-ERROR: error: expected integer in range [1, 32]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 32]
 // CHECK-ERROR:        fcvtzu s21, s12, #33
 // CHECK-ERROR:                         ^
-// CHECK-ERROR: error: expected integer in range [1, 64]
+// CHECK-ERROR: error: {{expected|immediate must be an}} integer in range [1, 64]
 // CHECK-ERROR:        fcvtzu d21, d12, #0
 // CHECK-ERROR:                         ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -6868,7 +6884,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:          fmul    h0, h1, v1.s[0]
 // CHECK-ERROR:                  ^
-// CHECK-ERROR: error:  lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:          fmul    s2, s29, v10.s[4]
 // CHECK-ERROR:                                 ^
 
@@ -6887,7 +6903,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:          fmulx    h0, h1, v1.d[0]
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error:  lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:          fmulx    d2, d29, v10.d[3]
 // CHECK-ERROR:                                  ^
 
@@ -6906,7 +6922,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:          fmla    d30, s11, v1.d[1]
 // CHECK-ERROR:                       ^
-// CHECK-ERROR: error:  lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:          fmla    s16, s22, v16.s[5]
 // CHECK-ERROR:                                  ^
 
@@ -6925,7 +6941,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:          fmls    h7, h17, v26.s[2]
 // CHECK-ERROR:                  ^
-// CHECK-ERROR: error:  expected lane number
+// CHECK-ERROR: error: vector lane must be an integer in range [0, 1]
 // CHECK-ERROR:          fmls    d16, d22, v16.d[-1]
 // CHECK-ERROR:                                  ^
 
@@ -6937,7 +6953,7 @@
     sqdmlal s0, h0, v0.s[0]
     sqdmlal s8, s9, v14.s[1]
     // invalid lane
-    sqdmlal s4, s5, v1.s[5]
+    sqdmlal d4, s5, v1.s[5]
     // invalid vector index
     sqdmlal s0, h0, v17.h[0]
 
@@ -6947,8 +6963,8 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:          sqdmlal s8, s9, v14.s[1]
 // CHECK-ERROR:                  ^
-// CHECK-ERROR: error: lane number incompatible with layout
-// CHECK-ERROR:          sqdmlal s4, s5, v1.s[5]
+// CHECK-ERROR: vector lane must be an integer in range
+// CHECK-ERROR:          sqdmlal d4, s5, v1.s[5]
 // CHECK-ERROR:                               ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:          sqdmlal s0, h0, v17.h[0]
@@ -6972,7 +6988,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:          sqdmlsl d1, h1, v13.s[0]
 // CHECK-ERROR:                      ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:          sqdmlsl d1, s1, v13.s[4]
 // CHECK-ERROR:                                ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -6999,7 +7015,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:          sqdmull s1, s1, v4.s[0]
 // CHECK-ERROR:                  ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:          sqdmull s12, h17, v9.h[9]
 // CHECK-ERROR:                                 ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -7024,7 +7040,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:          sqdmulh s25, s26, v27.h[3]
 // CHECK-ERROR:                  ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:          sqdmulh s25, s26, v27.s[4]
 // CHECK-ERROR:                                  ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -7049,7 +7065,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:          sqrdmulh s5, h6, v7.s[2]
 // CHECK-ERROR:                       ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:          sqrdmulh h31, h30, v14.h[9]
 // CHECK-ERROR:                                 ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -7081,16 +7097,16 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:          dup d0, v17.s[3]
 // CHECK-ERROR:                      ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:          dup d0, v17.d[4]
 // CHECK-ERROR:                        ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:          dup s0, v1.s[7]
 // CHECK-ERROR:                       ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:          dup h0, v31.h[16]
 // CHECK-ERROR:                        ^
-// CHECK-ERROR: error: lane number incompatible with layout
+// CHECK-ERROR: vector lane must be an integer in range
 // CHECK-ERROR:          dup b1, v3.b[16]
 // CHECK-ERROR:                       ^
 
diff --git a/test/MC/AArch64/neon-extract.s b/test/MC/AArch64/neon-extract.s
index 2d58a75..1daa46d 100644
--- a/test/MC/AArch64/neon-extract.s
+++ b/test/MC/AArch64/neon-extract.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple=arm64 -mattr=+neon -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
@@ -9,5 +9,5 @@
         ext v0.8b, v1.8b, v2.8b, #0x3
         ext v0.16b, v1.16b, v2.16b, #0x3
 
-// CHECK: ext	v0.8b, v1.8b, v2.8b, #0x3  // encoding: [0x20,0x18,0x02,0x2e]
-// CHECK: ext	v0.16b, v1.16b, v2.16b, #0x3 // encoding: [0x20,0x18,0x02,0x6e]
+// CHECK: ext	v0.8b, v1.8b, v2.8b, #{{0x3|3}}  // encoding: [0x20,0x18,0x02,0x2e]
+// CHECK: ext	v0.16b, v1.16b, v2.16b, #{{0x3|3}} // encoding: [0x20,0x18,0x02,0x6e]
diff --git a/test/MC/AArch64/neon-mov.s b/test/MC/AArch64/neon-mov.s
index c2ca803..567a5ec 100644
--- a/test/MC/AArch64/neon-mov.s
+++ b/test/MC/AArch64/neon-mov.s
@@ -20,19 +20,19 @@
          movi v0.8h, #1
          movi v0.8h, #1, lsl #8
 
-// CHECK:  movi v0.2s, #0x1           // encoding: [0x20,0x04,0x00,0x0f]
-// CHECK:  movi v1.2s, #0x0           // encoding: [0x01,0x04,0x00,0x0f]
-// CHECK:  movi v15.2s, #0x1, lsl #8  // encoding: [0x2f,0x24,0x00,0x0f]
-// CHECK:  movi v16.2s, #0x1, lsl #16 // encoding: [0x30,0x44,0x00,0x0f]
-// CHECK:  movi v31.2s, #0x1, lsl #24 // encoding: [0x3f,0x64,0x00,0x0f]
-// CHECK:  movi v0.4s, #0x1           // encoding: [0x20,0x04,0x00,0x4f]
-// CHECK:  movi v0.4s, #0x1, lsl #8   // encoding: [0x20,0x24,0x00,0x4f]
-// CHECK:  movi v0.4s, #0x1, lsl #16  // encoding: [0x20,0x44,0x00,0x4f]
-// CHECK:  movi v0.4s, #0x1, lsl #24  // encoding: [0x20,0x64,0x00,0x4f]
-// CHECK:  movi v0.4h, #0x1           // encoding: [0x20,0x84,0x00,0x0f]
-// CHECK:  movi v0.4h, #0x1, lsl #8   // encoding: [0x20,0xa4,0x00,0x0f]
-// CHECK:  movi v0.8h, #0x1           // encoding: [0x20,0x84,0x00,0x4f]
-// CHECK:  movi v0.8h, #0x1, lsl #8   // encoding: [0x20,0xa4,0x00,0x4f]
+// CHECK:  movi v0.2s, #{{0x1|1}}           // encoding: [0x20,0x04,0x00,0x0f]
+// CHECK:  movi v1.2s, #{{0x0|0}}           // encoding: [0x01,0x04,0x00,0x0f]
+// CHECK:  movi v15.2s, #{{0x1|1}}, lsl #8  // encoding: [0x2f,0x24,0x00,0x0f]
+// CHECK:  movi v16.2s, #{{0x1|1}}, lsl #16 // encoding: [0x30,0x44,0x00,0x0f]
+// CHECK:  movi v31.2s, #{{0x1|1}}, lsl #24 // encoding: [0x3f,0x64,0x00,0x0f]
+// CHECK:  movi v0.4s, #{{0x1|1}}           // encoding: [0x20,0x04,0x00,0x4f]
+// CHECK:  movi v0.4s, #{{0x1|1}}, lsl #8   // encoding: [0x20,0x24,0x00,0x4f]
+// CHECK:  movi v0.4s, #{{0x1|1}}, lsl #16  // encoding: [0x20,0x44,0x00,0x4f]
+// CHECK:  movi v0.4s, #{{0x1|1}}, lsl #24  // encoding: [0x20,0x64,0x00,0x4f]
+// CHECK:  movi v0.4h, #{{0x1|1}}           // encoding: [0x20,0x84,0x00,0x0f]
+// CHECK:  movi v0.4h, #{{0x1|1}}, lsl #8   // encoding: [0x20,0xa4,0x00,0x0f]
+// CHECK:  movi v0.8h, #{{0x1|1}}           // encoding: [0x20,0x84,0x00,0x4f]
+// CHECK:  movi v0.8h, #{{0x1|1}}, lsl #8   // encoding: [0x20,0xa4,0x00,0x4f]
 
 //----------------------------------------------------------------------
 // Vector Move Inverted Immediate Shifted
@@ -51,19 +51,19 @@
          mvni v0.8h, #1
          mvni v0.8h, #1, lsl #8
 
-// CHECK:  mvni v0.2s, #0x1           // encoding: [0x20,0x04,0x00,0x2f]
-// CHECK:  mvni v1.2s, #0x0           // encoding: [0x01,0x04,0x00,0x2f]
-// CHECK:  mvni v0.2s, #0x1, lsl #8   // encoding: [0x20,0x24,0x00,0x2f]
-// CHECK:  mvni v0.2s, #0x1, lsl #16  // encoding: [0x20,0x44,0x00,0x2f]
-// CHECK:  mvni v0.2s, #0x1, lsl #24  // encoding: [0x20,0x64,0x00,0x2f]
-// CHECK:  mvni v0.4s, #0x1           // encoding: [0x20,0x04,0x00,0x6f]
-// CHECK:  mvni v15.4s, #0x1, lsl #8  // encoding: [0x2f,0x24,0x00,0x6f]
-// CHECK:  mvni v16.4s, #0x1, lsl #16 // encoding: [0x30,0x44,0x00,0x6f]
-// CHECK:  mvni v31.4s, #0x1, lsl #24 // encoding: [0x3f,0x64,0x00,0x6f]
-// CHECK:  mvni v0.4h, #0x1           // encoding: [0x20,0x84,0x00,0x2f]
-// CHECK:  mvni v0.4h, #0x1, lsl #8   // encoding: [0x20,0xa4,0x00,0x2f]
-// CHECK:  mvni v0.8h, #0x1           // encoding: [0x20,0x84,0x00,0x6f]
-// CHECK:  mvni v0.8h, #0x1, lsl #8   // encoding: [0x20,0xa4,0x00,0x6f]
+// CHECK:  mvni v0.2s, #{{0x1|1}}           // encoding: [0x20,0x04,0x00,0x2f]
+// CHECK:  mvni v1.2s, #{{0x0|0}}           // encoding: [0x01,0x04,0x00,0x2f]
+// CHECK:  mvni v0.2s, #{{0x1|1}}, lsl #8   // encoding: [0x20,0x24,0x00,0x2f]
+// CHECK:  mvni v0.2s, #{{0x1|1}}, lsl #16  // encoding: [0x20,0x44,0x00,0x2f]
+// CHECK:  mvni v0.2s, #{{0x1|1}}, lsl #24  // encoding: [0x20,0x64,0x00,0x2f]
+// CHECK:  mvni v0.4s, #{{0x1|1}}           // encoding: [0x20,0x04,0x00,0x6f]
+// CHECK:  mvni v15.4s, #{{0x1|1}}, lsl #8  // encoding: [0x2f,0x24,0x00,0x6f]
+// CHECK:  mvni v16.4s, #{{0x1|1}}, lsl #16 // encoding: [0x30,0x44,0x00,0x6f]
+// CHECK:  mvni v31.4s, #{{0x1|1}}, lsl #24 // encoding: [0x3f,0x64,0x00,0x6f]
+// CHECK:  mvni v0.4h, #{{0x1|1}}           // encoding: [0x20,0x84,0x00,0x2f]
+// CHECK:  mvni v0.4h, #{{0x1|1}}, lsl #8   // encoding: [0x20,0xa4,0x00,0x2f]
+// CHECK:  mvni v0.8h, #{{0x1|1}}           // encoding: [0x20,0x84,0x00,0x6f]
+// CHECK:  mvni v0.8h, #{{0x1|1}}, lsl #8   // encoding: [0x20,0xa4,0x00,0x6f]
 
 //----------------------------------------------------------------------
 // Vector Bitwise Bit Clear (AND NOT) - immediate
@@ -82,19 +82,19 @@
          bic v0.8h, #1
          bic v31.8h, #1, lsl #8
 
-// CHECK:  bic v0.2s, #0x1           // encoding: [0x20,0x14,0x00,0x2f]
-// CHECK:  bic v1.2s, #0x0           // encoding: [0x01,0x14,0x00,0x2f]
-// CHECK:  bic v0.2s, #0x1, lsl #8   // encoding: [0x20,0x34,0x00,0x2f]
-// CHECK:  bic v0.2s, #0x1, lsl #16  // encoding: [0x20,0x54,0x00,0x2f]
-// CHECK:  bic v0.2s, #0x1, lsl #24  // encoding: [0x20,0x74,0x00,0x2f]
-// CHECK:  bic v0.4s, #0x1           // encoding: [0x20,0x14,0x00,0x6f]
-// CHECK:  bic v0.4s, #0x1, lsl #8   // encoding: [0x20,0x34,0x00,0x6f]
-// CHECK:  bic v0.4s, #0x1, lsl #16  // encoding: [0x20,0x54,0x00,0x6f]
-// CHECK:  bic v0.4s, #0x1, lsl #24  // encoding: [0x20,0x74,0x00,0x6f]
-// CHECK:  bic v15.4h, #0x1          // encoding: [0x2f,0x94,0x00,0x2f]
-// CHECK:  bic v16.4h, #0x1, lsl #8  // encoding: [0x30,0xb4,0x00,0x2f]
-// CHECK:  bic v0.8h, #0x1           // encoding: [0x20,0x94,0x00,0x6f]
-// CHECK:  bic v31.8h, #0x1, lsl #8  // encoding: [0x3f,0xb4,0x00,0x6f]
+// CHECK:  bic v0.2s, #{{0x1|1}}           // encoding: [0x20,0x14,0x00,0x2f]
+// CHECK:  bic v1.2s, #{{0x0|0}}           // encoding: [0x01,0x14,0x00,0x2f]
+// CHECK:  bic v0.2s, #{{0x1|1}}, lsl #8   // encoding: [0x20,0x34,0x00,0x2f]
+// CHECK:  bic v0.2s, #{{0x1|1}}, lsl #16  // encoding: [0x20,0x54,0x00,0x2f]
+// CHECK:  bic v0.2s, #{{0x1|1}}, lsl #24  // encoding: [0x20,0x74,0x00,0x2f]
+// CHECK:  bic v0.4s, #{{0x1|1}}           // encoding: [0x20,0x14,0x00,0x6f]
+// CHECK:  bic v0.4s, #{{0x1|1}}, lsl #8   // encoding: [0x20,0x34,0x00,0x6f]
+// CHECK:  bic v0.4s, #{{0x1|1}}, lsl #16  // encoding: [0x20,0x54,0x00,0x6f]
+// CHECK:  bic v0.4s, #{{0x1|1}}, lsl #24  // encoding: [0x20,0x74,0x00,0x6f]
+// CHECK:  bic v15.4h, #{{0x1|1}}          // encoding: [0x2f,0x94,0x00,0x2f]
+// CHECK:  bic v16.4h, #{{0x1|1}}, lsl #8  // encoding: [0x30,0xb4,0x00,0x2f]
+// CHECK:  bic v0.8h, #{{0x1|1}}           // encoding: [0x20,0x94,0x00,0x6f]
+// CHECK:  bic v31.8h, #{{0x1|1}}, lsl #8  // encoding: [0x3f,0xb4,0x00,0x6f]
 
 //----------------------------------------------------------------------
 // Vector Bitwise OR - immedidate
@@ -113,19 +113,19 @@
          orr v0.8h, #1
          orr v16.8h, #1, lsl #8
 
-// CHECK:  orr v0.2s, #0x1           // encoding: [0x20,0x14,0x00,0x0f]
-// CHECK:  orr v1.2s, #0x0           // encoding: [0x01,0x14,0x00,0x0f]
-// CHECK:  orr v0.2s, #0x1, lsl #8   // encoding: [0x20,0x34,0x00,0x0f]
-// CHECK:  orr v0.2s, #0x1, lsl #16  // encoding: [0x20,0x54,0x00,0x0f]
-// CHECK:  orr v0.2s, #0x1, lsl #24  // encoding: [0x20,0x74,0x00,0x0f]
-// CHECK:  orr v0.4s, #0x1           // encoding: [0x20,0x14,0x00,0x4f]
-// CHECK:  orr v0.4s, #0x1, lsl #8   // encoding: [0x20,0x34,0x00,0x4f]
-// CHECK:  orr v0.4s, #0x1, lsl #16  // encoding: [0x20,0x54,0x00,0x4f]
-// CHECK:  orr v0.4s, #0x1, lsl #24  // encoding: [0x20,0x74,0x00,0x4f]
-// CHECK:  orr v31.4h, #0x1          // encoding: [0x3f,0x94,0x00,0x0f]
-// CHECK:  orr v15.4h, #0x1, lsl #8  // encoding: [0x2f,0xb4,0x00,0x0f]
-// CHECK:  orr v0.8h, #0x1           // encoding: [0x20,0x94,0x00,0x4f]
-// CHECK:  orr v16.8h, #0x1, lsl #8  // encoding: [0x30,0xb4,0x00,0x4f]
+// CHECK:  orr v0.2s, #{{0x1|1}}           // encoding: [0x20,0x14,0x00,0x0f]
+// CHECK:  orr v1.2s, #{{0x0|0}}           // encoding: [0x01,0x14,0x00,0x0f]
+// CHECK:  orr v0.2s, #{{0x1|1}}, lsl #8   // encoding: [0x20,0x34,0x00,0x0f]
+// CHECK:  orr v0.2s, #{{0x1|1}}, lsl #16  // encoding: [0x20,0x54,0x00,0x0f]
+// CHECK:  orr v0.2s, #{{0x1|1}}, lsl #24  // encoding: [0x20,0x74,0x00,0x0f]
+// CHECK:  orr v0.4s, #{{0x1|1}}           // encoding: [0x20,0x14,0x00,0x4f]
+// CHECK:  orr v0.4s, #{{0x1|1}}, lsl #8   // encoding: [0x20,0x34,0x00,0x4f]
+// CHECK:  orr v0.4s, #{{0x1|1}}, lsl #16  // encoding: [0x20,0x54,0x00,0x4f]
+// CHECK:  orr v0.4s, #{{0x1|1}}, lsl #24  // encoding: [0x20,0x74,0x00,0x4f]
+// CHECK:  orr v31.4h, #{{0x1|1}}          // encoding: [0x3f,0x94,0x00,0x0f]
+// CHECK:  orr v15.4h, #{{0x1|1}}, lsl #8  // encoding: [0x2f,0xb4,0x00,0x0f]
+// CHECK:  orr v0.8h, #{{0x1|1}}           // encoding: [0x20,0x94,0x00,0x4f]
+// CHECK:  orr v16.8h, #{{0x1|1}}, lsl #8  // encoding: [0x30,0xb4,0x00,0x4f]
 
 //----------------------------------------------------------------------
 // Vector Move Immediate Masked
@@ -135,10 +135,10 @@
          movi v0.4s, #1, msl #8
          movi v31.4s, #1, msl #16
 
-// CHECK:  movi v0.2s, #0x1, msl #8   // encoding: [0x20,0xc4,0x00,0x0f]
-// CHECK:  movi v1.2s, #0x1, msl #16  // encoding: [0x21,0xd4,0x00,0x0f]
-// CHECK:  movi v0.4s, #0x1, msl #8   // encoding: [0x20,0xc4,0x00,0x4f]
-// CHECK:  movi v31.4s, #0x1, msl #16 // encoding: [0x3f,0xd4,0x00,0x4f]
+// CHECK:  movi v0.2s, #{{0x1|1}}, msl #8   // encoding: [0x20,0xc4,0x00,0x0f]
+// CHECK:  movi v1.2s, #{{0x1|1}}, msl #16  // encoding: [0x21,0xd4,0x00,0x0f]
+// CHECK:  movi v0.4s, #{{0x1|1}}, msl #8   // encoding: [0x20,0xc4,0x00,0x4f]
+// CHECK:  movi v31.4s, #{{0x1|1}}, msl #16 // encoding: [0x3f,0xd4,0x00,0x4f]
 
 //----------------------------------------------------------------------
 // Vector Move Inverted Immediate Masked
@@ -148,10 +148,10 @@
          mvni v31.4s, #0x1, msl #8
          mvni v0.4s, #0x1, msl #16
 
-// CHECK:   mvni v1.2s, #0x1, msl #8  // encoding: [0x21,0xc4,0x00,0x2f]
-// CHECK:   mvni v0.2s, #0x1, msl #16 // encoding: [0x20,0xd4,0x00,0x2f]
-// CHECK:   mvni v31.4s, #0x1, msl #8 // encoding: [0x3f,0xc4,0x00,0x6f]
-// CHECK:   mvni v0.4s, #0x1, msl #16 // encoding: [0x20,0xd4,0x00,0x6f]
+// CHECK:   mvni v1.2s, #{{0x1|1}}, msl #8  // encoding: [0x21,0xc4,0x00,0x2f]
+// CHECK:   mvni v0.2s, #{{0x1|1}}, msl #16 // encoding: [0x20,0xd4,0x00,0x2f]
+// CHECK:   mvni v31.4s, #{{0x1|1}}, msl #8 // encoding: [0x3f,0xc4,0x00,0x6f]
+// CHECK:   mvni v0.4s, #{{0x1|1}}, msl #16 // encoding: [0x20,0xd4,0x00,0x6f]
 
 //----------------------------------------------------------------------
 // Vector Immediate - per byte
@@ -161,10 +161,10 @@
          movi v15.16b, #0xf
          movi v31.16b, #0x1f
 
-// CHECK:   movi v0.8b, #0x0        // encoding: [0x00,0xe4,0x00,0x0f]
-// CHECK:   movi v31.8b, #0xff      // encoding: [0xff,0xe7,0x07,0x0f]
-// CHECK:   movi v15.16b, #0xf      // encoding: [0xef,0xe5,0x00,0x4f]
-// CHECK:   movi v31.16b, #0x1f     // encoding: [0xff,0xe7,0x00,0x4f]
+// CHECK:   movi v0.8b, #{{0x0|0}}        // encoding: [0x00,0xe4,0x00,0x0f]
+// CHECK:   movi v31.8b, #{{0xff|255}}      // encoding: [0xff,0xe7,0x07,0x0f]
+// CHECK:   movi v15.16b, #{{0xf|15}}      // encoding: [0xef,0xe5,0x00,0x4f]
+// CHECK:   movi v31.16b, #{{0x1f|31}}     // encoding: [0xff,0xe7,0x00,0x4f]
 
 //----------------------------------------------------------------------
 // Vector Move Immediate - bytemask, per doubleword
@@ -187,23 +187,22 @@
          fmov v15.4s, #1.0
          fmov v31.2d, #1.0
 
-// CHECK:  fmov v1.2s, #1.00000000     // encoding: [0x01,0xf6,0x03,0x0f]
-// CHECK:  fmov v15.4s, #1.00000000    // encoding: [0x0f,0xf6,0x03,0x4f]
-// CHECK:  fmov v31.2d, #1.00000000    // encoding: [0x1f,0xf6,0x03,0x6f]
+// CHECK:  fmov v1.2s, #{{1.00000000|1.000000e\+00}}     // encoding: [0x01,0xf6,0x03,0x0f]
+// CHECK:  fmov v15.4s, #{{1.00000000|1.000000e\+00}}    // encoding: [0x0f,0xf6,0x03,0x4f]
+// CHECK:  fmov v31.2d, #{{1.00000000|1.000000e\+00}}    // encoding: [0x1f,0xf6,0x03,0x6f]
 
 
 //----------------------------------------------------------------------
 // Vector Move -  register
 //----------------------------------------------------------------------
 
-      // FIXME: these should all print with the "mov" syntax.
       mov v0.8b, v31.8b
       mov v15.16b, v16.16b
       orr v0.8b, v31.8b, v31.8b
       orr v15.16b, v16.16b, v16.16b
 
-// CHECK:   orr v0.8b, v31.8b, v31.8b      // encoding: [0xe0,0x1f,0xbf,0x0e]
-// CHECK:   orr v15.16b, v16.16b, v16.16b  // encoding: [0x0f,0x1e,0xb0,0x4e]
-// CHECK:   orr v0.8b, v31.8b, v31.8b      // encoding: [0xe0,0x1f,0xbf,0x0e]
-// CHECK:   orr v15.16b, v16.16b, v16.16b  // encoding: [0x0f,0x1e,0xb0,0x4e]
+// CHECK:   mov v0.8b, v31.8b      // encoding: [0xe0,0x1f,0xbf,0x0e]
+// CHECK:   mov v15.16b, v16.16b  // encoding: [0x0f,0x1e,0xb0,0x4e]
+// CHECK:   mov v0.8b, v31.8b      // encoding: [0xe0,0x1f,0xbf,0x0e]
+// CHECK:   mov v15.16b, v16.16b  // encoding: [0x0f,0x1e,0xb0,0x4e]
 
diff --git a/test/MC/AArch64/neon-perm.s b/test/MC/AArch64/neon-perm.s
index 20a4acde..4b28dd0 100644
--- a/test/MC/AArch64/neon-perm.s
+++ b/test/MC/AArch64/neon-perm.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple=arm64 -mattr=+neon -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
diff --git a/test/MC/AArch64/neon-scalar-compare.s b/test/MC/AArch64/neon-scalar-compare.s
index 55ade0e..28de46a 100644
--- a/test/MC/AArch64/neon-scalar-compare.s
+++ b/test/MC/AArch64/neon-scalar-compare.s
@@ -16,7 +16,7 @@
 
          cmeq d20, d21, #0x0
 
-// CHECK: cmeq d20, d21, #0x0   // encoding: [0xb4,0x9a,0xe0,0x5e]
+// CHECK: cmeq d20, d21, #{{0x0|0}}   // encoding: [0xb4,0x9a,0xe0,0x5e]
 
 //----------------------------------------------------------------------
 // Scalar Compare Unsigned Higher Or Same
@@ -40,7 +40,7 @@
 
          cmge d20, d21, #0x0
 
-// CHECK: cmge d20, d21, #0x0   // encoding: [0xb4,0x8a,0xe0,0x7e]
+// CHECK: cmge d20, d21, #{{0x0|0}}   // encoding: [0xb4,0x8a,0xe0,0x7e]
 
 //----------------------------------------------------------------------
 // Scalar Compare Unsigned Higher
@@ -63,7 +63,7 @@
 
          cmgt d20, d21, #0x0
 
-// CHECK: cmgt d20, d21, #0x0   // encoding: [0xb4,0x8a,0xe0,0x5e]
+// CHECK: cmgt d20, d21, #{{0x0|0}}   // encoding: [0xb4,0x8a,0xe0,0x5e]
 
 //----------------------------------------------------------------------
 // Scalar Compare Signed Less Than Or Equal To Zero
@@ -71,7 +71,7 @@
 
          cmle d20, d21, #0x0
 
-// CHECK: cmle d20, d21, #0x0   // encoding: [0xb4,0x9a,0xe0,0x7e]
+// CHECK: cmle d20, d21, #{{0x0|0}}   // encoding: [0xb4,0x9a,0xe0,0x7e]
 
 //----------------------------------------------------------------------
 // Scalar Compare Less Than Zero
@@ -79,7 +79,7 @@
 
          cmlt d20, d21, #0x0
 
-// CHECK: cmlt d20, d21, #0x0   // encoding: [0xb4,0xaa,0xe0,0x5e]
+// CHECK: cmlt d20, d21, #{{0x0|0}}   // encoding: [0xb4,0xaa,0xe0,0x5e]
 
 //----------------------------------------------------------------------
 // Scalar Compare Bitwise Test Bits
diff --git a/test/MC/AArch64/neon-scalar-dup.s b/test/MC/AArch64/neon-scalar-dup.s
index 77c638d..db11ea2 100644
--- a/test/MC/AArch64/neon-scalar-dup.s
+++ b/test/MC/AArch64/neon-scalar-dup.s
@@ -15,17 +15,17 @@
          dup d3, v5.d[0]
          dup d6, v5.d[1]
 
-// CHECK: dup b0, v0.b[15]      // encoding: [0x00,0x04,0x1f,0x5e]
-// CHECK: dup b1, v0.b[7]       // encoding: [0x01,0x04,0x0f,0x5e]
-// CHECK: dup b17, v0.b[0]      // encoding: [0x11,0x04,0x01,0x5e]
-// CHECK: dup h5, v31.h[7]      // encoding: [0xe5,0x07,0x1e,0x5e]
-// CHECK: dup h9, v1.h[4]       // encoding: [0x29,0x04,0x12,0x5e]
-// CHECK: dup h11, v17.h[0]     // encoding: [0x2b,0x06,0x02,0x5e]
-// CHECK: dup s2, v2.s[3]       // encoding: [0x42,0x04,0x1c,0x5e]
-// CHECK: dup s4, v21.s[0]      // encoding: [0xa4,0x06,0x04,0x5e]
-// CHECK: dup s31, v21.s[2]     // encoding: [0xbf,0x06,0x14,0x5e]
-// CHECK: dup d3, v5.d[0]       // encoding: [0xa3,0x04,0x08,0x5e]
-// CHECK: dup d6, v5.d[1]       // encoding: [0xa6,0x04,0x18,0x5e]
+// CHECK: {{dup|mov}} b0, v0.b[15]      // encoding: [0x00,0x04,0x1f,0x5e]
+// CHECK: {{dup|mov}} b1, v0.b[7]       // encoding: [0x01,0x04,0x0f,0x5e]
+// CHECK: {{dup|mov}} b17, v0.b[0]      // encoding: [0x11,0x04,0x01,0x5e]
+// CHECK: {{dup|mov}} h5, v31.h[7]      // encoding: [0xe5,0x07,0x1e,0x5e]
+// CHECK: {{dup|mov}} h9, v1.h[4]       // encoding: [0x29,0x04,0x12,0x5e]
+// CHECK: {{dup|mov}} h11, v17.h[0]     // encoding: [0x2b,0x06,0x02,0x5e]
+// CHECK: {{dup|mov}} s2, v2.s[3]       // encoding: [0x42,0x04,0x1c,0x5e]
+// CHECK: {{dup|mov}} s4, v21.s[0]      // encoding: [0xa4,0x06,0x04,0x5e]
+// CHECK: {{dup|mov}} s31, v21.s[2]     // encoding: [0xbf,0x06,0x14,0x5e]
+// CHECK: {{dup|mov}} d3, v5.d[0]       // encoding: [0xa3,0x04,0x08,0x5e]
+// CHECK: {{dup|mov}} d6, v5.d[1]       // encoding: [0xa6,0x04,0x18,0x5e]
 
 //------------------------------------------------------------------------------
 // Aliases for Duplicate element (scalar)
@@ -42,14 +42,14 @@
          mov d3, v5.d[0]
          mov d6, v5.d[1]
 
-// CHECK: dup b0, v0.b[15]      // encoding: [0x00,0x04,0x1f,0x5e]
-// CHECK: dup b1, v0.b[7]       // encoding: [0x01,0x04,0x0f,0x5e]
-// CHECK: dup b17, v0.b[0]      // encoding: [0x11,0x04,0x01,0x5e]
-// CHECK: dup h5, v31.h[7]      // encoding: [0xe5,0x07,0x1e,0x5e]
-// CHECK: dup h9, v1.h[4]       // encoding: [0x29,0x04,0x12,0x5e]
-// CHECK: dup h11, v17.h[0]     // encoding: [0x2b,0x06,0x02,0x5e]
-// CHECK: dup s2, v2.s[3]       // encoding: [0x42,0x04,0x1c,0x5e]
-// CHECK: dup s4, v21.s[0]      // encoding: [0xa4,0x06,0x04,0x5e]
-// CHECK: dup s31, v21.s[2]     // encoding: [0xbf,0x06,0x14,0x5e]
-// CHECK: dup d3, v5.d[0]       // encoding: [0xa3,0x04,0x08,0x5e]
-// CHECK: dup d6, v5.d[1]       // encoding: [0xa6,0x04,0x18,0x5e]
+// CHECK: {{dup|mov}} b0, v0.b[15]      // encoding: [0x00,0x04,0x1f,0x5e]
+// CHECK: {{dup|mov}} b1, v0.b[7]       // encoding: [0x01,0x04,0x0f,0x5e]
+// CHECK: {{dup|mov}} b17, v0.b[0]      // encoding: [0x11,0x04,0x01,0x5e]
+// CHECK: {{dup|mov}} h5, v31.h[7]      // encoding: [0xe5,0x07,0x1e,0x5e]
+// CHECK: {{dup|mov}} h9, v1.h[4]       // encoding: [0x29,0x04,0x12,0x5e]
+// CHECK: {{dup|mov}} h11, v17.h[0]     // encoding: [0x2b,0x06,0x02,0x5e]
+// CHECK: {{dup|mov}} s2, v2.s[3]       // encoding: [0x42,0x04,0x1c,0x5e]
+// CHECK: {{dup|mov}} s4, v21.s[0]      // encoding: [0xa4,0x06,0x04,0x5e]
+// CHECK: {{dup|mov}} s31, v21.s[2]     // encoding: [0xbf,0x06,0x14,0x5e]
+// CHECK: {{dup|mov}} d3, v5.d[0]       // encoding: [0xa3,0x04,0x08,0x5e]
+// CHECK: {{dup|mov}} d6, v5.d[1]       // encoding: [0xa6,0x04,0x18,0x5e]
diff --git a/test/MC/AArch64/neon-simd-copy.s b/test/MC/AArch64/neon-simd-copy.s
index f254d65..4837a4c 100644
--- a/test/MC/AArch64/neon-simd-copy.s
+++ b/test/MC/AArch64/neon-simd-copy.s
@@ -16,15 +16,15 @@
          mov v20.s[0], w30
          mov v1.d[1], x7
 
-// CHECK: ins	v2.b[2], w1           // encoding: [0x22,0x1c,0x05,0x4e]
-// CHECK: ins	v7.h[7], w14          // encoding: [0xc7,0x1d,0x1e,0x4e]
-// CHECK: ins	v20.s[0], w30         // encoding: [0xd4,0x1f,0x04,0x4e]
-// CHECK: ins	v1.d[1], x7           // encoding: [0xe1,0x1c,0x18,0x4e]
+// CHECK: {{mov|ins}}	v2.b[2], w1           // encoding: [0x22,0x1c,0x05,0x4e]
+// CHECK: {{mov|ins}}	v7.h[7], w14          // encoding: [0xc7,0x1d,0x1e,0x4e]
+// CHECK: {{mov|ins}}	v20.s[0], w30         // encoding: [0xd4,0x1f,0x04,0x4e]
+// CHECK: {{mov|ins}}	v1.d[1], x7           // encoding: [0xe1,0x1c,0x18,0x4e]
 
-// CHECK: ins v2.b[2], w1           // encoding: [0x22,0x1c,0x05,0x4e]
-// CHECK: ins v7.h[7], w14          // encoding: [0xc7,0x1d,0x1e,0x4e]
-// CHECK: ins v20.s[0], w30         // encoding: [0xd4,0x1f,0x04,0x4e]
-// CHECK: ins v1.d[1], x7           // encoding: [0xe1,0x1c,0x18,0x4e]
+// CHECK: {{mov|ins}} v2.b[2], w1           // encoding: [0x22,0x1c,0x05,0x4e]
+// CHECK: {{mov|ins}} v7.h[7], w14          // encoding: [0xc7,0x1d,0x1e,0x4e]
+// CHECK: {{mov|ins}} v20.s[0], w30         // encoding: [0xd4,0x1f,0x04,0x4e]
+// CHECK: {{mov|ins}} v1.d[1], x7           // encoding: [0xe1,0x1c,0x18,0x4e]
 
 
 //------------------------------------------------------------------------------
@@ -54,13 +54,13 @@
          mov w20, v9.s[2]
          mov x7, v18.d[1]
 
-// CHECK: umov	w1, v0.b[15]          // encoding: [0x01,0x3c,0x1f,0x0e]
-// CHECK: umov	w14, v6.h[4]          // encoding: [0xce,0x3c,0x12,0x0e]
-// CHECK: umov	w20, v9.s[2]          // encoding: [0x34,0x3d,0x14,0x0e]
-// CHECK: umov	x7, v18.d[1]          // encoding: [0x47,0x3e,0x18,0x4e]
+// CHECK: {{mov|umov}}	w1, v0.b[15]          // encoding: [0x01,0x3c,0x1f,0x0e]
+// CHECK: {{mov|umov}}	w14, v6.h[4]          // encoding: [0xce,0x3c,0x12,0x0e]
+// CHECK: {{mov|umov}}	w20, v9.s[2]          // encoding: [0x34,0x3d,0x14,0x0e]
+// CHECK: {{mov|umov}}	x7, v18.d[1]          // encoding: [0x47,0x3e,0x18,0x4e]
 
-// CHECK: umov  w20, v9.s[2]          // encoding: [0x34,0x3d,0x14,0x0e]
-// CHECK: umov  x7, v18.d[1]          // encoding: [0x47,0x3e,0x18,0x4e]
+// CHECK: {{mov|umov}}  w20, v9.s[2]          // encoding: [0x34,0x3d,0x14,0x0e]
+// CHECK: {{mov|umov}}  x7, v18.d[1]          // encoding: [0x47,0x3e,0x18,0x4e]
 
 //------------------------------------------------------------------------------
 // Insert element (vector, from element)
@@ -76,15 +76,15 @@
          mov v15.s[3], v22.s[2]
          mov v0.d[0], v4.d[1]
 
-// CHECK: ins	v1.b[14], v3.b[6]       // encoding: [0x61,0x34,0x1d,0x6e]
-// CHECK: ins	v6.h[7], v7.h[5]        // encoding: [0xe6,0x54,0x1e,0x6e]
-// CHECK: ins	v15.s[3], v22.s[2]      // encoding: [0xcf,0x46,0x1c,0x6e]
-// CHECK: ins	v0.d[0], v4.d[1]        // encoding: [0x80,0x44,0x08,0x6e]
+// CHECK: {{mov|ins}}	v1.b[14], v3.b[6]       // encoding: [0x61,0x34,0x1d,0x6e]
+// CHECK: {{mov|ins}}	v6.h[7], v7.h[5]        // encoding: [0xe6,0x54,0x1e,0x6e]
+// CHECK: {{mov|ins}}	v15.s[3], v22.s[2]      // encoding: [0xcf,0x46,0x1c,0x6e]
+// CHECK: {{mov|ins}}	v0.d[0], v4.d[1]        // encoding: [0x80,0x44,0x08,0x6e]
 
-// CHECK: ins v1.b[14], v3.b[6]       // encoding: [0x61,0x34,0x1d,0x6e]
-// CHECK: ins v6.h[7], v7.h[5]        // encoding: [0xe6,0x54,0x1e,0x6e]
-// CHECK: ins v15.s[3], v22.s[2]      // encoding: [0xcf,0x46,0x1c,0x6e]
-// CHECK: ins v0.d[0], v4.d[1]        // encoding: [0x80,0x44,0x08,0x6e]
+// CHECK: {{mov|ins}} v1.b[14], v3.b[6]       // encoding: [0x61,0x34,0x1d,0x6e]
+// CHECK: {{mov|ins}} v6.h[7], v7.h[5]        // encoding: [0xe6,0x54,0x1e,0x6e]
+// CHECK: {{mov|ins}} v15.s[3], v22.s[2]      // encoding: [0xcf,0x46,0x1c,0x6e]
+// CHECK: {{mov|ins}} v0.d[0], v4.d[1]        // encoding: [0x80,0x44,0x08,0x6e]
 
 //------------------------------------------------------------------------------
 // Duplicate to all lanes( vector, from element)
@@ -97,13 +97,13 @@
          dup v17.4s, v20.s[0]
          dup v5.2d, v1.d[1]         
 
-// CHECK: dup v1.8b, v2.b[2]        // encoding: [0x41,0x04,0x05,0x0e]
-// CHECK: dup v11.4h, v7.h[7]       // encoding: [0xeb,0x04,0x1e,0x0e]
-// CHECK: dup v17.2s, v20.s[0]      // encoding: [0x91,0x06,0x04,0x0e]
-// CHECK: dup v1.16b, v2.b[2]       // encoding: [0x41,0x04,0x05,0x4e]
-// CHECK: dup v11.8h, v7.h[7]       // encoding: [0xeb,0x04,0x1e,0x4e]
-// CHECK: dup v17.4s, v20.s[0]      // encoding: [0x91,0x06,0x04,0x4e]
-// CHECK: dup v5.2d, v1.d[1]        // encoding: [0x25,0x04,0x18,0x4e]
+// CHECK: {{mov|dup}} v1.8b, v2.b[2]        // encoding: [0x41,0x04,0x05,0x0e]
+// CHECK: {{mov|dup}} v11.4h, v7.h[7]       // encoding: [0xeb,0x04,0x1e,0x0e]
+// CHECK: {{mov|dup}} v17.2s, v20.s[0]      // encoding: [0x91,0x06,0x04,0x0e]
+// CHECK: {{mov|dup}} v1.16b, v2.b[2]       // encoding: [0x41,0x04,0x05,0x4e]
+// CHECK: {{mov|dup}} v11.8h, v7.h[7]       // encoding: [0xeb,0x04,0x1e,0x4e]
+// CHECK: {{mov|dup}} v17.4s, v20.s[0]      // encoding: [0x91,0x06,0x04,0x4e]
+// CHECK: {{mov|dup}} v5.2d, v1.d[1]        // encoding: [0x25,0x04,0x18,0x4e]
 
 //------------------------------------------------------------------------------
 // Duplicate to all lanes( vector, from main)
@@ -116,13 +116,13 @@
          dup v17.4s, w28
          dup v5.2d, x0        
 
-// CHECK: dup	v1.8b, w1             // encoding: [0x21,0x0c,0x01,0x0e]
-// CHECK: dup	v11.4h, w14           // encoding: [0xcb,0x0d,0x02,0x0e]
-// CHECK: dup	v17.2s, w30           // encoding: [0xd1,0x0f,0x04,0x0e]
-// CHECK: dup	v1.16b, w2            // encoding: [0x41,0x0c,0x01,0x4e]
-// CHECK: dup	v11.8h, w16           // encoding: [0x0b,0x0e,0x02,0x4e]
-// CHECK: dup	v17.4s, w28           // encoding: [0x91,0x0f,0x04,0x4e]
-// CHECK: dup	v5.2d, x0             // encoding: [0x05,0x0c,0x08,0x4e]
+// CHECK: {{mov|dup}}	v1.8b, w1             // encoding: [0x21,0x0c,0x01,0x0e]
+// CHECK: {{mov|dup}}	v11.4h, w14           // encoding: [0xcb,0x0d,0x02,0x0e]
+// CHECK: {{mov|dup}}	v17.2s, w30           // encoding: [0xd1,0x0f,0x04,0x0e]
+// CHECK: {{mov|dup}}	v1.16b, w2            // encoding: [0x41,0x0c,0x01,0x4e]
+// CHECK: {{mov|dup}}	v11.8h, w16           // encoding: [0x0b,0x0e,0x02,0x4e]
+// CHECK: {{mov|dup}}	v17.4s, w28           // encoding: [0x91,0x0f,0x04,0x4e]
+// CHECK: {{mov|dup}}	v5.2d, x0             // encoding: [0x05,0x0c,0x08,0x4e]
 
 
 
diff --git a/test/MC/AArch64/neon-simd-ldst-multi-elem.s b/test/MC/AArch64/neon-simd-ldst-multi-elem.s
index 05fe4da..b8b3e72 100644
--- a/test/MC/AArch64/neon-simd-ldst-multi-elem.s
+++ b/test/MC/AArch64/neon-simd-ldst-multi-elem.s
@@ -1,463 +1,463 @@
-// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple=arm64 -mattr=+neon -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
 //------------------------------------------------------------------------------
 // Store multiple 1-element structures from one register
 //------------------------------------------------------------------------------
-         st1 {v0.16b}, [x0]
-         st1 {v15.8h}, [x15]
-         st1 {v31.4s}, [sp]
-         st1 {v0.2d}, [x0]
-         st1 {v0.8b}, [x0]
-         st1 {v15.4h}, [x15]
-         st1 {v31.2s}, [sp]
-         st1 {v0.1d}, [x0]
-// CHECK:	st1	{v0.16b}, [x0]          // encoding: [0x00,0x70,0x00,0x4c]
-// CHECK:	st1	{v15.8h}, [x15]         // encoding: [0xef,0x75,0x00,0x4c]
-// CHECK:	st1	{v31.4s}, [sp]          // encoding: [0xff,0x7b,0x00,0x4c]
-// CHECK:	st1	{v0.2d}, [x0]           // encoding: [0x00,0x7c,0x00,0x4c]
-// CHECK:	st1	{v0.8b}, [x0]           // encoding: [0x00,0x70,0x00,0x0c]
-// CHECK:	st1	{v15.4h}, [x15]         // encoding: [0xef,0x75,0x00,0x0c]
-// CHECK:	st1	{v31.2s}, [sp]          // encoding: [0xff,0x7b,0x00,0x0c]
-// CHECK:	st1	{v0.1d}, [x0]           // encoding: [0x00,0x7c,0x00,0x0c]
+         st1 { v0.16b }, [x0]
+         st1 { v15.8h }, [x15]
+         st1 { v31.4s }, [sp]
+         st1 { v0.2d }, [x0]
+         st1 { v0.8b }, [x0]
+         st1 { v15.4h }, [x15]
+         st1 { v31.2s }, [sp]
+         st1 { v0.1d }, [x0]
+// CHECK:	st1	{ v0.16b }, [x0]          // encoding: [0x00,0x70,0x00,0x4c]
+// CHECK:	st1	{ v15.8h }, [x15]         // encoding: [0xef,0x75,0x00,0x4c]
+// CHECK:	st1	{ v31.4s }, [sp]          // encoding: [0xff,0x7b,0x00,0x4c]
+// CHECK:	st1	{ v0.2d }, [x0]           // encoding: [0x00,0x7c,0x00,0x4c]
+// CHECK:	st1	{ v0.8b }, [x0]           // encoding: [0x00,0x70,0x00,0x0c]
+// CHECK:	st1	{ v15.4h }, [x15]         // encoding: [0xef,0x75,0x00,0x0c]
+// CHECK:	st1	{ v31.2s }, [sp]          // encoding: [0xff,0x7b,0x00,0x0c]
+// CHECK:	st1	{ v0.1d }, [x0]           // encoding: [0x00,0x7c,0x00,0x0c]
 
 //------------------------------------------------------------------------------
 // Store multiple 1-element structures from two consecutive registers
 //------------------------------------------------------------------------------
-         st1 {v0.16b, v1.16b}, [x0]
-         st1 {v15.8h, v16.8h}, [x15]
-         st1 {v31.4s, v0.4s}, [sp]
-         st1 {v0.2d, v1.2d}, [x0]
-         st1 {v0.8b, v1.8b}, [x0]
-         st1 {v15.4h, v16.4h}, [x15]
-         st1 {v31.2s, v0.2s}, [sp]
-         st1 {v0.1d, v1.1d}, [x0]
-// CHECK:	st1	{v0.16b, v1.16b}, [x0]  // encoding: [0x00,0xa0,0x00,0x4c]
-// CHECK:	st1	{v15.8h, v16.8h}, [x15] // encoding: [0xef,0xa5,0x00,0x4c]
-// CHECK:	st1	{v31.4s, v0.4s}, [sp]   // encoding: [0xff,0xab,0x00,0x4c]
-// CHECK:	st1	{v0.2d, v1.2d}, [x0]    // encoding: [0x00,0xac,0x00,0x4c]
-// CHECK:	st1	{v0.8b, v1.8b}, [x0]    // encoding: [0x00,0xa0,0x00,0x0c]
-// CHECK:	st1	{v15.4h, v16.4h}, [x15] // encoding: [0xef,0xa5,0x00,0x0c]
-// CHECK:	st1	{v31.2s, v0.2s}, [sp]   // encoding: [0xff,0xab,0x00,0x0c]
-// CHECK:	st1	{v0.1d, v1.1d}, [x0]    // encoding: [0x00,0xac,0x00,0x0c]
+         st1 { v0.16b, v1.16b }, [x0]
+         st1 { v15.8h, v16.8h }, [x15]
+         st1 { v31.4s, v0.4s }, [sp]
+         st1 { v0.2d, v1.2d }, [x0]
+         st1 { v0.8b, v1.8b }, [x0]
+         st1 { v15.4h, v16.4h }, [x15]
+         st1 { v31.2s, v0.2s }, [sp]
+         st1 { v0.1d, v1.1d }, [x0]
+// CHECK:	st1	{ v0.16b, v1.16b }, [x0]  // encoding: [0x00,0xa0,0x00,0x4c]
+// CHECK:	st1	{ v15.8h, v16.8h }, [x15] // encoding: [0xef,0xa5,0x00,0x4c]
+// CHECK:	st1	{ v31.4s, v0.4s }, [sp]   // encoding: [0xff,0xab,0x00,0x4c]
+// CHECK:	st1	{ v0.2d, v1.2d }, [x0]    // encoding: [0x00,0xac,0x00,0x4c]
+// CHECK:	st1	{ v0.8b, v1.8b }, [x0]    // encoding: [0x00,0xa0,0x00,0x0c]
+// CHECK:	st1	{ v15.4h, v16.4h }, [x15] // encoding: [0xef,0xa5,0x00,0x0c]
+// CHECK:	st1	{ v31.2s, v0.2s }, [sp]   // encoding: [0xff,0xab,0x00,0x0c]
+// CHECK:	st1	{ v0.1d, v1.1d }, [x0]    // encoding: [0x00,0xac,0x00,0x0c]
 
-         st1 {v0.16b-v1.16b}, [x0]
-         st1 {v15.8h-v16.8h}, [x15]
-         st1 {v31.4s-v0.4s}, [sp]
-         st1 {v0.2d-v1.2d}, [x0]
-         st1 {v0.8b-v1.8b}, [x0]
-         st1 {v15.4h-v16.4h}, [x15]
-         st1 {v31.2s-v0.2s}, [sp]
-         st1 {v0.1d-v1.1d}, [x0]
-// CHECK:	st1	{v0.16b, v1.16b}, [x0]  // encoding: [0x00,0xa0,0x00,0x4c]
-// CHECK:	st1	{v15.8h, v16.8h}, [x15] // encoding: [0xef,0xa5,0x00,0x4c]
-// CHECK:	st1	{v31.4s, v0.4s}, [sp]   // encoding: [0xff,0xab,0x00,0x4c]
-// CHECK:	st1	{v0.2d, v1.2d}, [x0]    // encoding: [0x00,0xac,0x00,0x4c]
-// CHECK:	st1	{v0.8b, v1.8b}, [x0]    // encoding: [0x00,0xa0,0x00,0x0c]
-// CHECK:	st1	{v15.4h, v16.4h}, [x15] // encoding: [0xef,0xa5,0x00,0x0c]
-// CHECK:	st1	{v31.2s, v0.2s}, [sp]   // encoding: [0xff,0xab,0x00,0x0c]
-// CHECK:	st1	{v0.1d, v1.1d}, [x0]    // encoding: [0x00,0xac,0x00,0x0c]
+         st1 { v0.16b-v1.16b }, [x0]
+         st1 { v15.8h-v16.8h }, [x15]
+         st1 { v31.4s-v0.4s }, [sp]
+         st1 { v0.2d-v1.2d }, [x0]
+         st1 { v0.8b-v1.8b }, [x0]
+         st1 { v15.4h-v16.4h }, [x15]
+         st1 { v31.2s-v0.2s }, [sp]
+         st1 { v0.1d-v1.1d }, [x0]
+// CHECK:	st1	{ v0.16b, v1.16b }, [x0]  // encoding: [0x00,0xa0,0x00,0x4c]
+// CHECK:	st1	{ v15.8h, v16.8h }, [x15] // encoding: [0xef,0xa5,0x00,0x4c]
+// CHECK:	st1	{ v31.4s, v0.4s }, [sp]   // encoding: [0xff,0xab,0x00,0x4c]
+// CHECK:	st1	{ v0.2d, v1.2d }, [x0]    // encoding: [0x00,0xac,0x00,0x4c]
+// CHECK:	st1	{ v0.8b, v1.8b }, [x0]    // encoding: [0x00,0xa0,0x00,0x0c]
+// CHECK:	st1	{ v15.4h, v16.4h }, [x15] // encoding: [0xef,0xa5,0x00,0x0c]
+// CHECK:	st1	{ v31.2s, v0.2s }, [sp]   // encoding: [0xff,0xab,0x00,0x0c]
+// CHECK:	st1	{ v0.1d, v1.1d }, [x0]    // encoding: [0x00,0xac,0x00,0x0c]
 
 //------------------------------------------------------------------------------
 // Store multiple 1-element structures from three consecutive registers
 //------------------------------------------------------------------------------
-         st1 {v0.16b, v1.16b, v2.16b}, [x0]
-         st1 {v15.8h, v16.8h, v17.8h}, [x15]
-         st1 {v31.4s, v0.4s, v1.4s}, [sp]
-         st1 {v0.2d, v1.2d, v2.2d}, [x0]
-         st1 {v0.8b, v1.8b, v2.8b}, [x0]
-         st1 {v15.4h, v16.4h, v17.4h}, [x15]
-         st1 {v31.2s, v0.2s, v1.2s}, [sp]
-         st1 {v0.1d, v1.1d, v2.1d}, [x0]
-// CHECK:	st1	{v0.16b, v1.16b, v2.16b}, [x0] // encoding: [0x00,0x60,0x00,0x4c]
-// CHECK:	st1	{v15.8h, v16.8h, v17.8h}, [x15] // encoding: [0xef,0x65,0x00,0x4c]
-// CHECK:	st1	{v31.4s, v0.4s, v1.4s}, [sp] // encoding: [0xff,0x6b,0x00,0x4c]
-// CHECK:	st1	{v0.2d, v1.2d, v2.2d}, [x0] // encoding: [0x00,0x6c,0x00,0x4c]
-// CHECK:	st1	{v0.8b, v1.8b, v2.8b}, [x0] // encoding: [0x00,0x60,0x00,0x0c]
-// CHECK:	st1	{v15.4h, v16.4h, v17.4h}, [x15] // encoding: [0xef,0x65,0x00,0x0c]
-// CHECK:	st1	{v31.2s, v0.2s, v1.2s}, [sp] // encoding: [0xff,0x6b,0x00,0x0c]
-// CHECK:	st1	{v0.1d, v1.1d, v2.1d}, [x0] // encoding: [0x00,0x6c,0x00,0x0c]
+         st1 { v0.16b, v1.16b, v2.16b }, [x0]
+         st1 { v15.8h, v16.8h, v17.8h }, [x15]
+         st1 { v31.4s, v0.4s, v1.4s }, [sp]
+         st1 { v0.2d, v1.2d, v2.2d }, [x0]
+         st1 { v0.8b, v1.8b, v2.8b }, [x0]
+         st1 { v15.4h, v16.4h, v17.4h }, [x15]
+         st1 { v31.2s, v0.2s, v1.2s }, [sp]
+         st1 { v0.1d, v1.1d, v2.1d }, [x0]
+// CHECK:	st1	{ v0.16b, v1.16b, v2.16b }, [x0] // encoding: [0x00,0x60,0x00,0x4c]
+// CHECK:	st1	{ v15.8h, v16.8h, v17.8h }, [x15] // encoding: [0xef,0x65,0x00,0x4c]
+// CHECK:	st1	{ v31.4s, v0.4s, v1.4s }, [sp] // encoding: [0xff,0x6b,0x00,0x4c]
+// CHECK:	st1	{ v0.2d, v1.2d, v2.2d }, [x0] // encoding: [0x00,0x6c,0x00,0x4c]
+// CHECK:	st1	{ v0.8b, v1.8b, v2.8b }, [x0] // encoding: [0x00,0x60,0x00,0x0c]
+// CHECK:	st1	{ v15.4h, v16.4h, v17.4h }, [x15] // encoding: [0xef,0x65,0x00,0x0c]
+// CHECK:	st1	{ v31.2s, v0.2s, v1.2s }, [sp] // encoding: [0xff,0x6b,0x00,0x0c]
+// CHECK:	st1	{ v0.1d, v1.1d, v2.1d }, [x0] // encoding: [0x00,0x6c,0x00,0x0c]
 
-         st1 {v0.16b-v2.16b}, [x0]
-         st1 {v15.8h-v17.8h}, [x15]
-         st1 {v31.4s-v1.4s}, [sp]
-         st1 {v0.2d-v2.2d}, [x0]
-         st1 {v0.8b-v2.8b}, [x0]
-         st1 {v15.4h-v17.4h}, [x15]
-         st1 {v31.2s-v1.2s}, [sp]
-         st1 {v0.1d-v2.1d}, [x0]
-// CHECK:	st1	{v0.16b, v1.16b, v2.16b}, [x0] // encoding: [0x00,0x60,0x00,0x4c]
-// CHECK:	st1	{v15.8h, v16.8h, v17.8h}, [x15] // encoding: [0xef,0x65,0x00,0x4c]
-// CHECK:	st1	{v31.4s, v0.4s, v1.4s}, [sp] // encoding: [0xff,0x6b,0x00,0x4c]
-// CHECK:	st1	{v0.2d, v1.2d, v2.2d}, [x0] // encoding: [0x00,0x6c,0x00,0x4c]
-// CHECK:	st1	{v0.8b, v1.8b, v2.8b}, [x0] // encoding: [0x00,0x60,0x00,0x0c]
-// CHECK:	st1	{v15.4h, v16.4h, v17.4h}, [x15] // encoding: [0xef,0x65,0x00,0x0c]
-// CHECK:	st1	{v31.2s, v0.2s, v1.2s}, [sp] // encoding: [0xff,0x6b,0x00,0x0c]
-// CHECK:	st1	{v0.1d, v1.1d, v2.1d}, [x0] // encoding: [0x00,0x6c,0x00,0x0c]
+         st1 { v0.16b-v2.16b }, [x0]
+         st1 { v15.8h-v17.8h }, [x15]
+         st1 { v31.4s-v1.4s }, [sp]
+         st1 { v0.2d-v2.2d }, [x0]
+         st1 { v0.8b-v2.8b }, [x0]
+         st1 { v15.4h-v17.4h }, [x15]
+         st1 { v31.2s-v1.2s }, [sp]
+         st1 { v0.1d-v2.1d }, [x0]
+// CHECK:	st1	{ v0.16b, v1.16b, v2.16b }, [x0] // encoding: [0x00,0x60,0x00,0x4c]
+// CHECK:	st1	{ v15.8h, v16.8h, v17.8h }, [x15] // encoding: [0xef,0x65,0x00,0x4c]
+// CHECK:	st1	{ v31.4s, v0.4s, v1.4s }, [sp] // encoding: [0xff,0x6b,0x00,0x4c]
+// CHECK:	st1	{ v0.2d, v1.2d, v2.2d }, [x0] // encoding: [0x00,0x6c,0x00,0x4c]
+// CHECK:	st1	{ v0.8b, v1.8b, v2.8b }, [x0] // encoding: [0x00,0x60,0x00,0x0c]
+// CHECK:	st1	{ v15.4h, v16.4h, v17.4h }, [x15] // encoding: [0xef,0x65,0x00,0x0c]
+// CHECK:	st1	{ v31.2s, v0.2s, v1.2s }, [sp] // encoding: [0xff,0x6b,0x00,0x0c]
+// CHECK:	st1	{ v0.1d, v1.1d, v2.1d }, [x0] // encoding: [0x00,0x6c,0x00,0x0c]
 
 //------------------------------------------------------------------------------
 // Store multiple 1-element structures from four consecutive registers
 //------------------------------------------------------------------------------
-         st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0]
-         st1 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15]
-         st1 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp]
-         st1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0]
-         st1 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0]
-         st1 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15]
-         st1 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp]
-         st1 {v0.1d, v1.1d, v2.1d, v3.1d}, [x0]
-// CHECK:	st1	{v0.16b, v1.16b, v2.16b, v3.16b}, [x0] // encoding: [0x00,0x20,0x00,0x4c]
-// CHECK:	st1	{v15.8h, v16.8h, v17.8h, v18.8h}, [x15] // encoding: [0xef,0x25,0x00,0x4c]
-// CHECK:	st1	{v31.4s, v0.4s, v1.4s, v2.4s}, [sp] // encoding: [0xff,0x2b,0x00,0x4c]
-// CHECK:	st1	{v0.2d, v1.2d, v2.2d, v3.2d}, [x0] // encoding: [0x00,0x2c,0x00,0x4c]
-// CHECK:	st1	{v0.8b, v1.8b, v2.8b, v3.8b}, [x0] // encoding: [0x00,0x20,0x00,0x0c]
-// CHECK:	st1	{v15.4h, v16.4h, v17.4h, v18.4h}, [x15] // encoding: [0xef,0x25,0x00,0x0c]
-// CHECK:	st1	{v31.2s, v0.2s, v1.2s, v2.2s}, [sp] // encoding: [0xff,0x2b,0x00,0x0c]
-// CHECK:	st1	{v0.1d, v1.1d, v2.1d, v3.1d}, [x0] // encoding: [0x00,0x2c,0x00,0x0c]
+         st1 { v0.16b, v1.16b, v2.16b, v3.16b }, [x0]
+         st1 { v15.8h, v16.8h, v17.8h, v18.8h }, [x15]
+         st1 { v31.4s, v0.4s, v1.4s, v2.4s }, [sp]
+         st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0]
+         st1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0]
+         st1 { v15.4h, v16.4h, v17.4h, v18.4h }, [x15]
+         st1 { v31.2s, v0.2s, v1.2s, v2.2s }, [sp]
+         st1 { v0.1d, v1.1d, v2.1d, v3.1d }, [x0]
+// CHECK:	st1	{ v0.16b, v1.16b, v2.16b, v3.16b }, [x0] // encoding: [0x00,0x20,0x00,0x4c]
+// CHECK:	st1	{ v15.8h, v16.8h, v17.8h, v18.8h }, [x15] // encoding: [0xef,0x25,0x00,0x4c]
+// CHECK:	st1	{ v31.4s, v0.4s, v1.4s, v2.4s }, [sp] // encoding: [0xff,0x2b,0x00,0x4c]
+// CHECK:	st1	{ v0.2d, v1.2d, v2.2d, v3.2d }, [x0] // encoding: [0x00,0x2c,0x00,0x4c]
+// CHECK:	st1	{ v0.8b, v1.8b, v2.8b, v3.8b }, [x0] // encoding: [0x00,0x20,0x00,0x0c]
+// CHECK:	st1	{ v15.4h, v16.4h, v17.4h, v18.4h }, [x15] // encoding: [0xef,0x25,0x00,0x0c]
+// CHECK:	st1	{ v31.2s, v0.2s, v1.2s, v2.2s }, [sp] // encoding: [0xff,0x2b,0x00,0x0c]
+// CHECK:	st1	{ v0.1d, v1.1d, v2.1d, v3.1d }, [x0] // encoding: [0x00,0x2c,0x00,0x0c]
 
-         st1 {v0.16b-v3.16b}, [x0]
-         st1 {v15.8h-v18.8h}, [x15]
-         st1 {v31.4s-v2.4s}, [sp]
-         st1 {v0.2d-v3.2d}, [x0]
-         st1 {v0.8b-v3.8b}, [x0]
-         st1 {v15.4h-v18.4h}, [x15]
-         st1 {v31.2s-v2.2s}, [sp]
-         st1 {v0.1d-v3.1d}, [x0]
-// CHECK:	st1	{v0.16b, v1.16b, v2.16b, v3.16b}, [x0] // encoding: [0x00,0x20,0x00,0x4c]
-// CHECK:	st1	{v15.8h, v16.8h, v17.8h, v18.8h}, [x15] // encoding: [0xef,0x25,0x00,0x4c]
-// CHECK:	st1	{v31.4s, v0.4s, v1.4s, v2.4s}, [sp] // encoding: [0xff,0x2b,0x00,0x4c]
-// CHECK:	st1	{v0.2d, v1.2d, v2.2d, v3.2d}, [x0] // encoding: [0x00,0x2c,0x00,0x4c]
-// CHECK:	st1	{v0.8b, v1.8b, v2.8b, v3.8b}, [x0] // encoding: [0x00,0x20,0x00,0x0c]
-// CHECK:	st1	{v15.4h, v16.4h, v17.4h, v18.4h}, [x15] // encoding: [0xef,0x25,0x00,0x0c]
-// CHECK:	st1	{v31.2s, v0.2s, v1.2s, v2.2s}, [sp] // encoding: [0xff,0x2b,0x00,0x0c]
-// CHECK:	st1	{v0.1d, v1.1d, v2.1d, v3.1d}, [x0] // encoding: [0x00,0x2c,0x00,0x0c]
+         st1 { v0.16b-v3.16b }, [x0]
+         st1 { v15.8h-v18.8h }, [x15]
+         st1 { v31.4s-v2.4s }, [sp]
+         st1 { v0.2d-v3.2d }, [x0]
+         st1 { v0.8b-v3.8b }, [x0]
+         st1 { v15.4h-v18.4h }, [x15]
+         st1 { v31.2s-v2.2s }, [sp]
+         st1 { v0.1d-v3.1d }, [x0]
+// CHECK:	st1	{ v0.16b, v1.16b, v2.16b, v3.16b }, [x0] // encoding: [0x00,0x20,0x00,0x4c]
+// CHECK:	st1	{ v15.8h, v16.8h, v17.8h, v18.8h }, [x15] // encoding: [0xef,0x25,0x00,0x4c]
+// CHECK:	st1	{ v31.4s, v0.4s, v1.4s, v2.4s }, [sp] // encoding: [0xff,0x2b,0x00,0x4c]
+// CHECK:	st1	{ v0.2d, v1.2d, v2.2d, v3.2d }, [x0] // encoding: [0x00,0x2c,0x00,0x4c]
+// CHECK:	st1	{ v0.8b, v1.8b, v2.8b, v3.8b }, [x0] // encoding: [0x00,0x20,0x00,0x0c]
+// CHECK:	st1	{ v15.4h, v16.4h, v17.4h, v18.4h }, [x15] // encoding: [0xef,0x25,0x00,0x0c]
+// CHECK:	st1	{ v31.2s, v0.2s, v1.2s, v2.2s }, [sp] // encoding: [0xff,0x2b,0x00,0x0c]
+// CHECK:	st1	{ v0.1d, v1.1d, v2.1d, v3.1d }, [x0] // encoding: [0x00,0x2c,0x00,0x0c]
 
 //------------------------------------------------------------------------------
 // Store multiple 2-element structures from two consecutive registers
 //------------------------------------------------------------------------------
-         st2 {v0.16b, v1.16b}, [x0]
-         st2 {v15.8h, v16.8h}, [x15]
-         st2 {v31.4s, v0.4s}, [sp]
-         st2 {v0.2d, v1.2d}, [x0]
-         st2 {v0.8b, v1.8b}, [x0]
-         st2 {v15.4h, v16.4h}, [x15]
-         st2 {v31.2s, v0.2s}, [sp]
-// CHECK:	st2	{v0.16b, v1.16b}, [x0]  // encoding: [0x00,0x80,0x00,0x4c]
-// CHECK:	st2	{v15.8h, v16.8h}, [x15] // encoding: [0xef,0x85,0x00,0x4c]
-// CHECK:	st2	{v31.4s, v0.4s}, [sp]   // encoding: [0xff,0x8b,0x00,0x4c]
-// CHECK:	st2	{v0.2d, v1.2d}, [x0]    // encoding: [0x00,0x8c,0x00,0x4c]
-// CHECK:	st2	{v0.8b, v1.8b}, [x0]    // encoding: [0x00,0x80,0x00,0x0c]
-// CHECK:	st2	{v15.4h, v16.4h}, [x15] // encoding: [0xef,0x85,0x00,0x0c]
-// CHECK:	st2	{v31.2s, v0.2s}, [sp]   // encoding: [0xff,0x8b,0x00,0x0c]
+         st2 { v0.16b, v1.16b }, [x0]
+         st2 { v15.8h, v16.8h }, [x15]
+         st2 { v31.4s, v0.4s }, [sp]
+         st2 { v0.2d, v1.2d }, [x0]
+         st2 { v0.8b, v1.8b }, [x0]
+         st2 { v15.4h, v16.4h }, [x15]
+         st2 { v31.2s, v0.2s }, [sp]
+// CHECK:	st2	{ v0.16b, v1.16b }, [x0]  // encoding: [0x00,0x80,0x00,0x4c]
+// CHECK:	st2	{ v15.8h, v16.8h }, [x15] // encoding: [0xef,0x85,0x00,0x4c]
+// CHECK:	st2	{ v31.4s, v0.4s }, [sp]   // encoding: [0xff,0x8b,0x00,0x4c]
+// CHECK:	st2	{ v0.2d, v1.2d }, [x0]    // encoding: [0x00,0x8c,0x00,0x4c]
+// CHECK:	st2	{ v0.8b, v1.8b }, [x0]    // encoding: [0x00,0x80,0x00,0x0c]
+// CHECK:	st2	{ v15.4h, v16.4h }, [x15] // encoding: [0xef,0x85,0x00,0x0c]
+// CHECK:	st2	{ v31.2s, v0.2s }, [sp]   // encoding: [0xff,0x8b,0x00,0x0c]
 
-         st2 {v0.16b-v1.16b}, [x0]
-         st2 {v15.8h-v16.8h}, [x15]
-         st2 {v31.4s-v0.4s}, [sp]
-         st2 {v0.2d-v1.2d}, [x0]
-         st2 {v0.8b-v1.8b}, [x0]
-         st2 {v15.4h-v16.4h}, [x15]
-         st2 {v31.2s-v0.2s}, [sp]
-// CHECK:	st2	{v0.16b, v1.16b}, [x0]  // encoding: [0x00,0x80,0x00,0x4c]
-// CHECK:	st2	{v15.8h, v16.8h}, [x15] // encoding: [0xef,0x85,0x00,0x4c]
-// CHECK:	st2	{v31.4s, v0.4s}, [sp]   // encoding: [0xff,0x8b,0x00,0x4c]
-// CHECK:	st2	{v0.2d, v1.2d}, [x0]    // encoding: [0x00,0x8c,0x00,0x4c]
-// CHECK:	st2	{v0.8b, v1.8b}, [x0]    // encoding: [0x00,0x80,0x00,0x0c]
-// CHECK:	st2	{v15.4h, v16.4h}, [x15] // encoding: [0xef,0x85,0x00,0x0c]
-// CHECK:	st2	{v31.2s, v0.2s}, [sp]   // encoding: [0xff,0x8b,0x00,0x0c]
+         st2 { v0.16b-v1.16b }, [x0]
+         st2 { v15.8h-v16.8h }, [x15]
+         st2 { v31.4s-v0.4s }, [sp]
+         st2 { v0.2d-v1.2d }, [x0]
+         st2 { v0.8b-v1.8b }, [x0]
+         st2 { v15.4h-v16.4h }, [x15]
+         st2 { v31.2s-v0.2s }, [sp]
+// CHECK:	st2	{ v0.16b, v1.16b }, [x0]  // encoding: [0x00,0x80,0x00,0x4c]
+// CHECK:	st2	{ v15.8h, v16.8h }, [x15] // encoding: [0xef,0x85,0x00,0x4c]
+// CHECK:	st2	{ v31.4s, v0.4s }, [sp]   // encoding: [0xff,0x8b,0x00,0x4c]
+// CHECK:	st2	{ v0.2d, v1.2d }, [x0]    // encoding: [0x00,0x8c,0x00,0x4c]
+// CHECK:	st2	{ v0.8b, v1.8b }, [x0]    // encoding: [0x00,0x80,0x00,0x0c]
+// CHECK:	st2	{ v15.4h, v16.4h }, [x15] // encoding: [0xef,0x85,0x00,0x0c]
+// CHECK:	st2	{ v31.2s, v0.2s }, [sp]   // encoding: [0xff,0x8b,0x00,0x0c]
 
 //------------------------------------------------------------------------------
 // Store multiple 3-element structures from three consecutive registers
 //------------------------------------------------------------------------------
-         st3 {v0.16b, v1.16b, v2.16b}, [x0]
-         st3 {v15.8h, v16.8h, v17.8h}, [x15]
-         st3 {v31.4s, v0.4s, v1.4s}, [sp]
-         st3 {v0.2d, v1.2d, v2.2d}, [x0]
-         st3 {v0.8b, v1.8b, v2.8b}, [x0]
-         st3 {v15.4h, v16.4h, v17.4h}, [x15]
-         st3 {v31.2s, v0.2s, v1.2s}, [sp]
-// CHECK:	st3	{v0.16b, v1.16b, v2.16b}, [x0] // encoding: [0x00,0x40,0x00,0x4c]
-// CHECK:	st3	{v15.8h, v16.8h, v17.8h}, [x15] // encoding: [0xef,0x45,0x00,0x4c]
-// CHECK:	st3	{v31.4s, v0.4s, v1.4s}, [sp] // encoding: [0xff,0x4b,0x00,0x4c]
-// CHECK:	st3	{v0.2d, v1.2d, v2.2d}, [x0] // encoding: [0x00,0x4c,0x00,0x4c]
-// CHECK:	st3	{v0.8b, v1.8b, v2.8b}, [x0] // encoding: [0x00,0x40,0x00,0x0c]
-// CHECK:	st3	{v15.4h, v16.4h, v17.4h}, [x15] // encoding: [0xef,0x45,0x00,0x0c]
-// CHECK:	st3	{v31.2s, v0.2s, v1.2s}, [sp] // encoding: [0xff,0x4b,0x00,0x0c]
+         st3 { v0.16b, v1.16b, v2.16b }, [x0]
+         st3 { v15.8h, v16.8h, v17.8h }, [x15]
+         st3 { v31.4s, v0.4s, v1.4s }, [sp]
+         st3 { v0.2d, v1.2d, v2.2d }, [x0]
+         st3 { v0.8b, v1.8b, v2.8b }, [x0]
+         st3 { v15.4h, v16.4h, v17.4h }, [x15]
+         st3 { v31.2s, v0.2s, v1.2s }, [sp]
+// CHECK:	st3	{ v0.16b, v1.16b, v2.16b }, [x0] // encoding: [0x00,0x40,0x00,0x4c]
+// CHECK:	st3	{ v15.8h, v16.8h, v17.8h }, [x15] // encoding: [0xef,0x45,0x00,0x4c]
+// CHECK:	st3	{ v31.4s, v0.4s, v1.4s }, [sp] // encoding: [0xff,0x4b,0x00,0x4c]
+// CHECK:	st3	{ v0.2d, v1.2d, v2.2d }, [x0] // encoding: [0x00,0x4c,0x00,0x4c]
+// CHECK:	st3	{ v0.8b, v1.8b, v2.8b }, [x0] // encoding: [0x00,0x40,0x00,0x0c]
+// CHECK:	st3	{ v15.4h, v16.4h, v17.4h }, [x15] // encoding: [0xef,0x45,0x00,0x0c]
+// CHECK:	st3	{ v31.2s, v0.2s, v1.2s }, [sp] // encoding: [0xff,0x4b,0x00,0x0c]
 
-         st3 {v0.16b-v2.16b}, [x0]
-         st3 {v15.8h-v17.8h}, [x15]
-         st3 {v31.4s-v1.4s}, [sp]
-         st3 {v0.2d-v2.2d}, [x0]
-         st3 {v0.8b-v2.8b}, [x0]
-         st3 {v15.4h-v17.4h}, [x15]
-         st3 {v31.2s-v1.2s}, [sp]
-// CHECK:	st3	{v0.16b, v1.16b, v2.16b}, [x0] // encoding: [0x00,0x40,0x00,0x4c]
-// CHECK:	st3	{v15.8h, v16.8h, v17.8h}, [x15] // encoding: [0xef,0x45,0x00,0x4c]
-// CHECK:	st3	{v31.4s, v0.4s, v1.4s}, [sp] // encoding: [0xff,0x4b,0x00,0x4c]
-// CHECK:	st3	{v0.2d, v1.2d, v2.2d}, [x0] // encoding: [0x00,0x4c,0x00,0x4c]
-// CHECK:	st3	{v0.8b, v1.8b, v2.8b}, [x0] // encoding: [0x00,0x40,0x00,0x0c]
-// CHECK:	st3	{v15.4h, v16.4h, v17.4h}, [x15] // encoding: [0xef,0x45,0x00,0x0c]
-// CHECK:	st3	{v31.2s, v0.2s, v1.2s}, [sp] // encoding: [0xff,0x4b,0x00,0x0c]
+         st3 { v0.16b-v2.16b }, [x0]
+         st3 { v15.8h-v17.8h }, [x15]
+         st3 { v31.4s-v1.4s }, [sp]
+         st3 { v0.2d-v2.2d }, [x0]
+         st3 { v0.8b-v2.8b }, [x0]
+         st3 { v15.4h-v17.4h }, [x15]
+         st3 { v31.2s-v1.2s }, [sp]
+// CHECK:	st3	{ v0.16b, v1.16b, v2.16b }, [x0] // encoding: [0x00,0x40,0x00,0x4c]
+// CHECK:	st3	{ v15.8h, v16.8h, v17.8h }, [x15] // encoding: [0xef,0x45,0x00,0x4c]
+// CHECK:	st3	{ v31.4s, v0.4s, v1.4s }, [sp] // encoding: [0xff,0x4b,0x00,0x4c]
+// CHECK:	st3	{ v0.2d, v1.2d, v2.2d }, [x0] // encoding: [0x00,0x4c,0x00,0x4c]
+// CHECK:	st3	{ v0.8b, v1.8b, v2.8b }, [x0] // encoding: [0x00,0x40,0x00,0x0c]
+// CHECK:	st3	{ v15.4h, v16.4h, v17.4h }, [x15] // encoding: [0xef,0x45,0x00,0x0c]
+// CHECK:	st3	{ v31.2s, v0.2s, v1.2s }, [sp] // encoding: [0xff,0x4b,0x00,0x0c]
 
 //------------------------------------------------------------------------------
 // Store multiple 4-element structures from four consecutive registers
 //------------------------------------------------------------------------------
-         st4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0]
-         st4 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15]
-         st4 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp]
-         st4 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0]
-         st4 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0]
-         st4 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15]
-         st4 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp]
-// CHECK:	st4	{v0.16b, v1.16b, v2.16b, v3.16b}, [x0] // encoding: [0x00,0x00,0x00,0x4c]
-// CHECK:	st4	{v15.8h, v16.8h, v17.8h, v18.8h}, [x15] // encoding: [0xef,0x05,0x00,0x4c]
-// CHECK:	st4	{v31.4s, v0.4s, v1.4s, v2.4s}, [sp] // encoding: [0xff,0x0b,0x00,0x4c]
-// CHECK:	st4	{v0.2d, v1.2d, v2.2d, v3.2d}, [x0] // encoding: [0x00,0x0c,0x00,0x4c]
-// CHECK:	st4	{v0.8b, v1.8b, v2.8b, v3.8b}, [x0] // encoding: [0x00,0x00,0x00,0x0c]
-// CHECK:	st4	{v15.4h, v16.4h, v17.4h, v18.4h}, [x15] // encoding: [0xef,0x05,0x00,0x0c]
-// CHECK:	st4	{v31.2s, v0.2s, v1.2s, v2.2s}, [sp] // encoding: [0xff,0x0b,0x00,0x0c]
+         st4 { v0.16b, v1.16b, v2.16b, v3.16b }, [x0]
+         st4 { v15.8h, v16.8h, v17.8h, v18.8h }, [x15]
+         st4 { v31.4s, v0.4s, v1.4s, v2.4s }, [sp]
+         st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0]
+         st4 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0]
+         st4 { v15.4h, v16.4h, v17.4h, v18.4h }, [x15]
+         st4 { v31.2s, v0.2s, v1.2s, v2.2s }, [sp]
+// CHECK:	st4	{ v0.16b, v1.16b, v2.16b, v3.16b }, [x0] // encoding: [0x00,0x00,0x00,0x4c]
+// CHECK:	st4	{ v15.8h, v16.8h, v17.8h, v18.8h }, [x15] // encoding: [0xef,0x05,0x00,0x4c]
+// CHECK:	st4	{ v31.4s, v0.4s, v1.4s, v2.4s }, [sp] // encoding: [0xff,0x0b,0x00,0x4c]
+// CHECK:	st4	{ v0.2d, v1.2d, v2.2d, v3.2d }, [x0] // encoding: [0x00,0x0c,0x00,0x4c]
+// CHECK:	st4	{ v0.8b, v1.8b, v2.8b, v3.8b }, [x0] // encoding: [0x00,0x00,0x00,0x0c]
+// CHECK:	st4	{ v15.4h, v16.4h, v17.4h, v18.4h }, [x15] // encoding: [0xef,0x05,0x00,0x0c]
+// CHECK:	st4	{ v31.2s, v0.2s, v1.2s, v2.2s }, [sp] // encoding: [0xff,0x0b,0x00,0x0c]
 
-         st4 {v0.16b-v3.16b}, [x0]
-         st4 {v15.8h-v18.8h}, [x15]
-         st4 {v31.4s-v2.4s}, [sp]
-         st4 {v0.2d-v3.2d}, [x0]
-         st4 {v0.8b-v3.8b}, [x0]
-         st4 {v15.4h-v18.4h}, [x15]
-         st4 {v31.2s-v2.2s}, [sp]
-// CHECK:	st4	{v0.16b, v1.16b, v2.16b, v3.16b}, [x0] // encoding: [0x00,0x00,0x00,0x4c]
-// CHECK:	st4	{v15.8h, v16.8h, v17.8h, v18.8h}, [x15] // encoding: [0xef,0x05,0x00,0x4c]
-// CHECK:	st4	{v31.4s, v0.4s, v1.4s, v2.4s}, [sp] // encoding: [0xff,0x0b,0x00,0x4c]
-// CHECK:	st4	{v0.2d, v1.2d, v2.2d, v3.2d}, [x0] // encoding: [0x00,0x0c,0x00,0x4c]
-// CHECK:	st4	{v0.8b, v1.8b, v2.8b, v3.8b}, [x0] // encoding: [0x00,0x00,0x00,0x0c]
-// CHECK:	st4	{v15.4h, v16.4h, v17.4h, v18.4h}, [x15] // encoding: [0xef,0x05,0x00,0x0c]
-// CHECK:	st4	{v31.2s, v0.2s, v1.2s, v2.2s}, [sp] // encoding: [0xff,0x0b,0x00,0x0c]
+         st4 { v0.16b-v3.16b }, [x0]
+         st4 { v15.8h-v18.8h }, [x15]
+         st4 { v31.4s-v2.4s }, [sp]
+         st4 { v0.2d-v3.2d }, [x0]
+         st4 { v0.8b-v3.8b }, [x0]
+         st4 { v15.4h-v18.4h }, [x15]
+         st4 { v31.2s-v2.2s }, [sp]
+// CHECK:	st4	{ v0.16b, v1.16b, v2.16b, v3.16b }, [x0] // encoding: [0x00,0x00,0x00,0x4c]
+// CHECK:	st4	{ v15.8h, v16.8h, v17.8h, v18.8h }, [x15] // encoding: [0xef,0x05,0x00,0x4c]
+// CHECK:	st4	{ v31.4s, v0.4s, v1.4s, v2.4s }, [sp] // encoding: [0xff,0x0b,0x00,0x4c]
+// CHECK:	st4	{ v0.2d, v1.2d, v2.2d, v3.2d }, [x0] // encoding: [0x00,0x0c,0x00,0x4c]
+// CHECK:	st4	{ v0.8b, v1.8b, v2.8b, v3.8b }, [x0] // encoding: [0x00,0x00,0x00,0x0c]
+// CHECK:	st4	{ v15.4h, v16.4h, v17.4h, v18.4h }, [x15] // encoding: [0xef,0x05,0x00,0x0c]
+// CHECK:	st4	{ v31.2s, v0.2s, v1.2s, v2.2s }, [sp] // encoding: [0xff,0x0b,0x00,0x0c]
 
 //------------------------------------------------------------------------------
 // Load multiple 1-element structures to one register
 //------------------------------------------------------------------------------
-         ld1 {v0.16b}, [x0]
-         ld1 {v15.8h}, [x15]
-         ld1 {v31.4s}, [sp]
-         ld1 {v0.2d}, [x0]
-         ld1 {v0.8b}, [x0]
-         ld1 {v15.4h}, [x15]
-         ld1 {v31.2s}, [sp]
-         ld1 {v0.1d}, [x0]
-// CHECK:	ld1	{v0.16b}, [x0]          // encoding: [0x00,0x70,0x40,0x4c]
-// CHECK:	ld1	{v15.8h}, [x15]         // encoding: [0xef,0x75,0x40,0x4c]
-// CHECK:	ld1	{v31.4s}, [sp]          // encoding: [0xff,0x7b,0x40,0x4c]
-// CHECK:	ld1	{v0.2d}, [x0]           // encoding: [0x00,0x7c,0x40,0x4c]
-// CHECK:	ld1	{v0.8b}, [x0]           // encoding: [0x00,0x70,0x40,0x0c]
-// CHECK:	ld1	{v15.4h}, [x15]         // encoding: [0xef,0x75,0x40,0x0c]
-// CHECK:	ld1	{v31.2s}, [sp]          // encoding: [0xff,0x7b,0x40,0x0c]
-// CHECK:	ld1	{v0.1d}, [x0]           // encoding: [0x00,0x7c,0x40,0x0c]
+         ld1 { v0.16b }, [x0]
+         ld1 { v15.8h }, [x15]
+         ld1 { v31.4s }, [sp]
+         ld1 { v0.2d }, [x0]
+         ld1 { v0.8b }, [x0]
+         ld1 { v15.4h }, [x15]
+         ld1 { v31.2s }, [sp]
+         ld1 { v0.1d }, [x0]
+// CHECK:	ld1	{ v0.16b }, [x0]          // encoding: [0x00,0x70,0x40,0x4c]
+// CHECK:	ld1	{ v15.8h }, [x15]         // encoding: [0xef,0x75,0x40,0x4c]
+// CHECK:	ld1	{ v31.4s }, [sp]          // encoding: [0xff,0x7b,0x40,0x4c]
+// CHECK:	ld1	{ v0.2d }, [x0]           // encoding: [0x00,0x7c,0x40,0x4c]
+// CHECK:	ld1	{ v0.8b }, [x0]           // encoding: [0x00,0x70,0x40,0x0c]
+// CHECK:	ld1	{ v15.4h }, [x15]         // encoding: [0xef,0x75,0x40,0x0c]
+// CHECK:	ld1	{ v31.2s }, [sp]          // encoding: [0xff,0x7b,0x40,0x0c]
+// CHECK:	ld1	{ v0.1d }, [x0]           // encoding: [0x00,0x7c,0x40,0x0c]
 
 //------------------------------------------------------------------------------
 // Load multiple 1-element structures to two consecutive registers
 //------------------------------------------------------------------------------
-         ld1 {v0.16b, v1.16b}, [x0]
-         ld1 {v15.8h, v16.8h}, [x15]
-         ld1 {v31.4s, v0.4s}, [sp]
-         ld1 {v0.2d, v1.2d}, [x0]
-         ld1 {v0.8b, v1.8b}, [x0]
-         ld1 {v15.4h, v16.4h}, [x15]
-         ld1 {v31.2s, v0.2s}, [sp]
-         ld1 {v0.1d, v1.1d}, [x0]
-// CHECK:	ld1	{v0.16b, v1.16b}, [x0]  // encoding: [0x00,0xa0,0x40,0x4c]
-// CHECK:	ld1	{v15.8h, v16.8h}, [x15] // encoding: [0xef,0xa5,0x40,0x4c]
-// CHECK:	ld1	{v31.4s, v0.4s}, [sp]   // encoding: [0xff,0xab,0x40,0x4c]
-// CHECK:	ld1	{v0.2d, v1.2d}, [x0]    // encoding: [0x00,0xac,0x40,0x4c]
-// CHECK:	ld1	{v0.8b, v1.8b}, [x0]    // encoding: [0x00,0xa0,0x40,0x0c]
-// CHECK:	ld1	{v15.4h, v16.4h}, [x15] // encoding: [0xef,0xa5,0x40,0x0c]
-// CHECK:	ld1	{v31.2s, v0.2s}, [sp]   // encoding: [0xff,0xab,0x40,0x0c]
-// CHECK:	ld1	{v0.1d, v1.1d}, [x0]    // encoding: [0x00,0xac,0x40,0x0c]
+         ld1 { v0.16b, v1.16b }, [x0]
+         ld1 { v15.8h, v16.8h }, [x15]
+         ld1 { v31.4s, v0.4s }, [sp]
+         ld1 { v0.2d, v1.2d }, [x0]
+         ld1 { v0.8b, v1.8b }, [x0]
+         ld1 { v15.4h, v16.4h }, [x15]
+         ld1 { v31.2s, v0.2s }, [sp]
+         ld1 { v0.1d, v1.1d }, [x0]
+// CHECK:	ld1	{ v0.16b, v1.16b }, [x0]  // encoding: [0x00,0xa0,0x40,0x4c]
+// CHECK:	ld1	{ v15.8h, v16.8h }, [x15] // encoding: [0xef,0xa5,0x40,0x4c]
+// CHECK:	ld1	{ v31.4s, v0.4s }, [sp]   // encoding: [0xff,0xab,0x40,0x4c]
+// CHECK:	ld1	{ v0.2d, v1.2d }, [x0]    // encoding: [0x00,0xac,0x40,0x4c]
+// CHECK:	ld1	{ v0.8b, v1.8b }, [x0]    // encoding: [0x00,0xa0,0x40,0x0c]
+// CHECK:	ld1	{ v15.4h, v16.4h }, [x15] // encoding: [0xef,0xa5,0x40,0x0c]
+// CHECK:	ld1	{ v31.2s, v0.2s }, [sp]   // encoding: [0xff,0xab,0x40,0x0c]
+// CHECK:	ld1	{ v0.1d, v1.1d }, [x0]    // encoding: [0x00,0xac,0x40,0x0c]
 
-         ld1 {v0.16b-v1.16b}, [x0]
-         ld1 {v15.8h-v16.8h}, [x15]
-         ld1 {v31.4s-v0.4s}, [sp]
-         ld1 {v0.2d-v1.2d}, [x0]
-         ld1 {v0.8b-v1.8b}, [x0]
-         ld1 {v15.4h-v16.4h}, [x15]
-         ld1 {v31.2s-v0.2s}, [sp]
-         ld1 {v0.1d-v1.1d}, [x0]
-// CHECK:	ld1	{v0.16b, v1.16b}, [x0]  // encoding: [0x00,0xa0,0x40,0x4c]
-// CHECK:	ld1	{v15.8h, v16.8h}, [x15] // encoding: [0xef,0xa5,0x40,0x4c]
-// CHECK:	ld1	{v31.4s, v0.4s}, [sp]   // encoding: [0xff,0xab,0x40,0x4c]
-// CHECK:	ld1	{v0.2d, v1.2d}, [x0]    // encoding: [0x00,0xac,0x40,0x4c]
-// CHECK:	ld1	{v0.8b, v1.8b}, [x0]    // encoding: [0x00,0xa0,0x40,0x0c]
-// CHECK:	ld1	{v15.4h, v16.4h}, [x15] // encoding: [0xef,0xa5,0x40,0x0c]
-// CHECK:	ld1	{v31.2s, v0.2s}, [sp]   // encoding: [0xff,0xab,0x40,0x0c]
-// CHECK:	ld1	{v0.1d, v1.1d}, [x0]    // encoding: [0x00,0xac,0x40,0x0c]
+         ld1 { v0.16b-v1.16b }, [x0]
+         ld1 { v15.8h-v16.8h }, [x15]
+         ld1 { v31.4s-v0.4s }, [sp]
+         ld1 { v0.2d-v1.2d }, [x0]
+         ld1 { v0.8b-v1.8b }, [x0]
+         ld1 { v15.4h-v16.4h }, [x15]
+         ld1 { v31.2s-v0.2s }, [sp]
+         ld1 { v0.1d-v1.1d }, [x0]
+// CHECK:	ld1	{ v0.16b, v1.16b }, [x0]  // encoding: [0x00,0xa0,0x40,0x4c]
+// CHECK:	ld1	{ v15.8h, v16.8h }, [x15] // encoding: [0xef,0xa5,0x40,0x4c]
+// CHECK:	ld1	{ v31.4s, v0.4s }, [sp]   // encoding: [0xff,0xab,0x40,0x4c]
+// CHECK:	ld1	{ v0.2d, v1.2d }, [x0]    // encoding: [0x00,0xac,0x40,0x4c]
+// CHECK:	ld1	{ v0.8b, v1.8b }, [x0]    // encoding: [0x00,0xa0,0x40,0x0c]
+// CHECK:	ld1	{ v15.4h, v16.4h }, [x15] // encoding: [0xef,0xa5,0x40,0x0c]
+// CHECK:	ld1	{ v31.2s, v0.2s }, [sp]   // encoding: [0xff,0xab,0x40,0x0c]
+// CHECK:	ld1	{ v0.1d, v1.1d }, [x0]    // encoding: [0x00,0xac,0x40,0x0c]
 
 //------------------------------------------------------------------------------
 // Load multiple 1-element structures to three consecutive registers
 //------------------------------------------------------------------------------
-         ld1 {v0.16b, v1.16b, v2.16b}, [x0]
-         ld1 {v15.8h, v16.8h, v17.8h}, [x15]
-         ld1 {v31.4s, v0.4s, v1.4s}, [sp]
-         ld1 {v0.2d, v1.2d, v2.2d}, [x0]
-         ld1 {v0.8b, v1.8b, v2.8b}, [x0]
-         ld1 {v15.4h, v16.4h, v17.4h}, [x15]
-         ld1 {v31.2s, v0.2s, v1.2s}, [sp]
-         ld1 {v0.1d, v1.1d, v2.1d}, [x0]
-// CHECK:	ld1	{v0.16b, v1.16b, v2.16b}, [x0] // encoding: [0x00,0x60,0x40,0x4c]
-// CHECK:	ld1	{v15.8h, v16.8h, v17.8h}, [x15] // encoding: [0xef,0x65,0x40,0x4c]
-// CHECK:	ld1	{v31.4s, v0.4s, v1.4s}, [sp] // encoding: [0xff,0x6b,0x40,0x4c]
-// CHECK:	ld1	{v0.2d, v1.2d, v2.2d}, [x0] // encoding: [0x00,0x6c,0x40,0x4c]
-// CHECK:	ld1	{v0.8b, v1.8b, v2.8b}, [x0] // encoding: [0x00,0x60,0x40,0x0c]
-// CHECK:	ld1	{v15.4h, v16.4h, v17.4h}, [x15] // encoding: [0xef,0x65,0x40,0x0c]
-// CHECK:	ld1	{v31.2s, v0.2s, v1.2s}, [sp] // encoding: [0xff,0x6b,0x40,0x0c]
-// CHECK:	ld1	{v0.1d, v1.1d, v2.1d}, [x0] // encoding: [0x00,0x6c,0x40,0x0c]
+         ld1 { v0.16b, v1.16b, v2.16b }, [x0]
+         ld1 { v15.8h, v16.8h, v17.8h }, [x15]
+         ld1 { v31.4s, v0.4s, v1.4s }, [sp]
+         ld1 { v0.2d, v1.2d, v2.2d }, [x0]
+         ld1 { v0.8b, v1.8b, v2.8b }, [x0]
+         ld1 { v15.4h, v16.4h, v17.4h }, [x15]
+         ld1 { v31.2s, v0.2s, v1.2s }, [sp]
+         ld1 { v0.1d, v1.1d, v2.1d }, [x0]
+// CHECK:	ld1	{ v0.16b, v1.16b, v2.16b }, [x0] // encoding: [0x00,0x60,0x40,0x4c]
+// CHECK:	ld1	{ v15.8h, v16.8h, v17.8h }, [x15] // encoding: [0xef,0x65,0x40,0x4c]
+// CHECK:	ld1	{ v31.4s, v0.4s, v1.4s }, [sp] // encoding: [0xff,0x6b,0x40,0x4c]
+// CHECK:	ld1	{ v0.2d, v1.2d, v2.2d }, [x0] // encoding: [0x00,0x6c,0x40,0x4c]
+// CHECK:	ld1	{ v0.8b, v1.8b, v2.8b }, [x0] // encoding: [0x00,0x60,0x40,0x0c]
+// CHECK:	ld1	{ v15.4h, v16.4h, v17.4h }, [x15] // encoding: [0xef,0x65,0x40,0x0c]
+// CHECK:	ld1	{ v31.2s, v0.2s, v1.2s }, [sp] // encoding: [0xff,0x6b,0x40,0x0c]
+// CHECK:	ld1	{ v0.1d, v1.1d, v2.1d }, [x0] // encoding: [0x00,0x6c,0x40,0x0c]
 
-         ld1 {v0.16b-v2.16b}, [x0]
-         ld1 {v15.8h-v17.8h}, [x15]
-         ld1 {v31.4s-v1.4s}, [sp]
-         ld1 {v0.2d-v2.2d}, [x0]
-         ld1 {v0.8b-v2.8b}, [x0]
-         ld1 {v15.4h-v17.4h}, [x15]
-         ld1 {v31.2s-v1.2s}, [sp]
-         ld1 {v0.1d-v2.1d}, [x0]
-// CHECK:	ld1	{v0.16b, v1.16b, v2.16b}, [x0] // encoding: [0x00,0x60,0x40,0x4c]
-// CHECK:	ld1	{v15.8h, v16.8h, v17.8h}, [x15] // encoding: [0xef,0x65,0x40,0x4c]
-// CHECK:	ld1	{v31.4s, v0.4s, v1.4s}, [sp] // encoding: [0xff,0x6b,0x40,0x4c]
-// CHECK:	ld1	{v0.2d, v1.2d, v2.2d}, [x0] // encoding: [0x00,0x6c,0x40,0x4c]
-// CHECK:	ld1	{v0.8b, v1.8b, v2.8b}, [x0] // encoding: [0x00,0x60,0x40,0x0c]
-// CHECK:	ld1	{v15.4h, v16.4h, v17.4h}, [x15] // encoding: [0xef,0x65,0x40,0x0c]
-// CHECK:	ld1	{v31.2s, v0.2s, v1.2s}, [sp] // encoding: [0xff,0x6b,0x40,0x0c]
-// CHECK:	ld1	{v0.1d, v1.1d, v2.1d}, [x0] // encoding: [0x00,0x6c,0x40,0x0c]
+         ld1 { v0.16b-v2.16b }, [x0]
+         ld1 { v15.8h-v17.8h }, [x15]
+         ld1 { v31.4s-v1.4s }, [sp]
+         ld1 { v0.2d-v2.2d }, [x0]
+         ld1 { v0.8b-v2.8b }, [x0]
+         ld1 { v15.4h-v17.4h }, [x15]
+         ld1 { v31.2s-v1.2s }, [sp]
+         ld1 { v0.1d-v2.1d }, [x0]
+// CHECK:	ld1	{ v0.16b, v1.16b, v2.16b }, [x0] // encoding: [0x00,0x60,0x40,0x4c]
+// CHECK:	ld1	{ v15.8h, v16.8h, v17.8h }, [x15] // encoding: [0xef,0x65,0x40,0x4c]
+// CHECK:	ld1	{ v31.4s, v0.4s, v1.4s }, [sp] // encoding: [0xff,0x6b,0x40,0x4c]
+// CHECK:	ld1	{ v0.2d, v1.2d, v2.2d }, [x0] // encoding: [0x00,0x6c,0x40,0x4c]
+// CHECK:	ld1	{ v0.8b, v1.8b, v2.8b }, [x0] // encoding: [0x00,0x60,0x40,0x0c]
+// CHECK:	ld1	{ v15.4h, v16.4h, v17.4h }, [x15] // encoding: [0xef,0x65,0x40,0x0c]
+// CHECK:	ld1	{ v31.2s, v0.2s, v1.2s }, [sp] // encoding: [0xff,0x6b,0x40,0x0c]
+// CHECK:	ld1	{ v0.1d, v1.1d, v2.1d }, [x0] // encoding: [0x00,0x6c,0x40,0x0c]
 
 //------------------------------------------------------------------------------
 // Load multiple 1-element structures to four consecutive registers
 //------------------------------------------------------------------------------
-         ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0]
-         ld1 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15]
-         ld1 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp]
-         ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0]
-         ld1 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0]
-         ld1 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15]
-         ld1 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp]
-         ld1 {v0.1d, v1.1d, v2.1d, v3.1d}, [x0]
-// CHECK:	ld1	{v0.16b, v1.16b, v2.16b, v3.16b}, [x0] // encoding: [0x00,0x20,0x40,0x4c]
-// CHECK:	ld1	{v15.8h, v16.8h, v17.8h, v18.8h}, [x15] // encoding: [0xef,0x25,0x40,0x4c]
-// CHECK:	ld1	{v31.4s, v0.4s, v1.4s, v2.4s}, [sp] // encoding: [0xff,0x2b,0x40,0x4c]
-// CHECK:	ld1	{v0.2d, v1.2d, v2.2d, v3.2d}, [x0] // encoding: [0x00,0x2c,0x40,0x4c]
-// CHECK:	ld1	{v0.8b, v1.8b, v2.8b, v3.8b}, [x0] // encoding: [0x00,0x20,0x40,0x0c]
-// CHECK:	ld1	{v15.4h, v16.4h, v17.4h, v18.4h}, [x15] // encoding: [0xef,0x25,0x40,0x0c]
-// CHECK:	ld1	{v31.2s, v0.2s, v1.2s, v2.2s}, [sp] // encoding: [0xff,0x2b,0x40,0x0c]
-// CHECK:	ld1	{v0.1d, v1.1d, v2.1d, v3.1d}, [x0] // encoding: [0x00,0x2c,0x40,0x0c]
+         ld1 { v0.16b, v1.16b, v2.16b, v3.16b }, [x0]
+         ld1 { v15.8h, v16.8h, v17.8h, v18.8h }, [x15]
+         ld1 { v31.4s, v0.4s, v1.4s, v2.4s }, [sp]
+         ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0]
+         ld1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0]
+         ld1 { v15.4h, v16.4h, v17.4h, v18.4h }, [x15]
+         ld1 { v31.2s, v0.2s, v1.2s, v2.2s }, [sp]
+         ld1 { v0.1d, v1.1d, v2.1d, v3.1d }, [x0]
+// CHECK:	ld1	{ v0.16b, v1.16b, v2.16b, v3.16b }, [x0] // encoding: [0x00,0x20,0x40,0x4c]
+// CHECK:	ld1	{ v15.8h, v16.8h, v17.8h, v18.8h }, [x15] // encoding: [0xef,0x25,0x40,0x4c]
+// CHECK:	ld1	{ v31.4s, v0.4s, v1.4s, v2.4s }, [sp] // encoding: [0xff,0x2b,0x40,0x4c]
+// CHECK:	ld1	{ v0.2d, v1.2d, v2.2d, v3.2d }, [x0] // encoding: [0x00,0x2c,0x40,0x4c]
+// CHECK:	ld1	{ v0.8b, v1.8b, v2.8b, v3.8b }, [x0] // encoding: [0x00,0x20,0x40,0x0c]
+// CHECK:	ld1	{ v15.4h, v16.4h, v17.4h, v18.4h }, [x15] // encoding: [0xef,0x25,0x40,0x0c]
+// CHECK:	ld1	{ v31.2s, v0.2s, v1.2s, v2.2s }, [sp] // encoding: [0xff,0x2b,0x40,0x0c]
+// CHECK:	ld1	{ v0.1d, v1.1d, v2.1d, v3.1d }, [x0] // encoding: [0x00,0x2c,0x40,0x0c]
 
-         ld1 {v0.16b-v3.16b}, [x0]
-         ld1 {v15.8h-v18.8h}, [x15]
-         ld1 {v31.4s-v2.4s}, [sp]
-         ld1 {v0.2d-v3.2d}, [x0]
-         ld1 {v0.8b-v3.8b}, [x0]
-         ld1 {v15.4h-v18.4h}, [x15]
-         ld1 {v31.2s-v2.2s}, [sp]
-         ld1 {v0.1d-v3.1d}, [x0]
-// CHECK:	ld1	{v0.16b, v1.16b, v2.16b, v3.16b}, [x0] // encoding: [0x00,0x20,0x40,0x4c]
-// CHECK:	ld1	{v15.8h, v16.8h, v17.8h, v18.8h}, [x15] // encoding: [0xef,0x25,0x40,0x4c]
-// CHECK:	ld1	{v31.4s, v0.4s, v1.4s, v2.4s}, [sp] // encoding: [0xff,0x2b,0x40,0x4c]
-// CHECK:	ld1	{v0.2d, v1.2d, v2.2d, v3.2d}, [x0] // encoding: [0x00,0x2c,0x40,0x4c]
-// CHECK:	ld1	{v0.8b, v1.8b, v2.8b, v3.8b}, [x0] // encoding: [0x00,0x20,0x40,0x0c]
-// CHECK:	ld1	{v15.4h, v16.4h, v17.4h, v18.4h}, [x15] // encoding: [0xef,0x25,0x40,0x0c]
-// CHECK:	ld1	{v31.2s, v0.2s, v1.2s, v2.2s}, [sp] // encoding: [0xff,0x2b,0x40,0x0c]
-// CHECK:	ld1	{v0.1d, v1.1d, v2.1d, v3.1d}, [x0] // encoding: [0x00,0x2c,0x40,0x0c]
+         ld1 { v0.16b-v3.16b }, [x0]
+         ld1 { v15.8h-v18.8h }, [x15]
+         ld1 { v31.4s-v2.4s }, [sp]
+         ld1 { v0.2d-v3.2d }, [x0]
+         ld1 { v0.8b-v3.8b }, [x0]
+         ld1 { v15.4h-v18.4h }, [x15]
+         ld1 { v31.2s-v2.2s }, [sp]
+         ld1 { v0.1d-v3.1d }, [x0]
+// CHECK:	ld1	{ v0.16b, v1.16b, v2.16b, v3.16b }, [x0] // encoding: [0x00,0x20,0x40,0x4c]
+// CHECK:	ld1	{ v15.8h, v16.8h, v17.8h, v18.8h }, [x15] // encoding: [0xef,0x25,0x40,0x4c]
+// CHECK:	ld1	{ v31.4s, v0.4s, v1.4s, v2.4s }, [sp] // encoding: [0xff,0x2b,0x40,0x4c]
+// CHECK:	ld1	{ v0.2d, v1.2d, v2.2d, v3.2d }, [x0] // encoding: [0x00,0x2c,0x40,0x4c]
+// CHECK:	ld1	{ v0.8b, v1.8b, v2.8b, v3.8b }, [x0] // encoding: [0x00,0x20,0x40,0x0c]
+// CHECK:	ld1	{ v15.4h, v16.4h, v17.4h, v18.4h }, [x15] // encoding: [0xef,0x25,0x40,0x0c]
+// CHECK:	ld1	{ v31.2s, v0.2s, v1.2s, v2.2s }, [sp] // encoding: [0xff,0x2b,0x40,0x0c]
+// CHECK:	ld1	{ v0.1d, v1.1d, v2.1d, v3.1d }, [x0] // encoding: [0x00,0x2c,0x40,0x0c]
 
 //------------------------------------------------------------------------------
 // Load multiple 4-element structures to two consecutive registers
 //------------------------------------------------------------------------------
-         ld2 {v0.16b, v1.16b}, [x0]
-         ld2 {v15.8h, v16.8h}, [x15]
-         ld2 {v31.4s, v0.4s}, [sp]
-         ld2 {v0.2d, v1.2d}, [x0]
-         ld2 {v0.8b, v1.8b}, [x0]
-         ld2 {v15.4h, v16.4h}, [x15]
-         ld2 {v31.2s, v0.2s}, [sp]
-// CHECK:	ld2	{v0.16b, v1.16b}, [x0]  // encoding: [0x00,0x80,0x40,0x4c]
-// CHECK:	ld2	{v15.8h, v16.8h}, [x15] // encoding: [0xef,0x85,0x40,0x4c]
-// CHECK:	ld2	{v31.4s, v0.4s}, [sp]   // encoding: [0xff,0x8b,0x40,0x4c]
-// CHECK:	ld2	{v0.2d, v1.2d}, [x0]    // encoding: [0x00,0x8c,0x40,0x4c]
-// CHECK:	ld2	{v0.8b, v1.8b}, [x0]    // encoding: [0x00,0x80,0x40,0x0c]
-// CHECK:	ld2	{v15.4h, v16.4h}, [x15] // encoding: [0xef,0x85,0x40,0x0c]
-// CHECK:	ld2	{v31.2s, v0.2s}, [sp]   // encoding: [0xff,0x8b,0x40,0x0c]
+         ld2 { v0.16b, v1.16b }, [x0]
+         ld2 { v15.8h, v16.8h }, [x15]
+         ld2 { v31.4s, v0.4s }, [sp]
+         ld2 { v0.2d, v1.2d }, [x0]
+         ld2 { v0.8b, v1.8b }, [x0]
+         ld2 { v15.4h, v16.4h }, [x15]
+         ld2 { v31.2s, v0.2s }, [sp]
+// CHECK:	ld2	{ v0.16b, v1.16b }, [x0]  // encoding: [0x00,0x80,0x40,0x4c]
+// CHECK:	ld2	{ v15.8h, v16.8h }, [x15] // encoding: [0xef,0x85,0x40,0x4c]
+// CHECK:	ld2	{ v31.4s, v0.4s }, [sp]   // encoding: [0xff,0x8b,0x40,0x4c]
+// CHECK:	ld2	{ v0.2d, v1.2d }, [x0]    // encoding: [0x00,0x8c,0x40,0x4c]
+// CHECK:	ld2	{ v0.8b, v1.8b }, [x0]    // encoding: [0x00,0x80,0x40,0x0c]
+// CHECK:	ld2	{ v15.4h, v16.4h }, [x15] // encoding: [0xef,0x85,0x40,0x0c]
+// CHECK:	ld2	{ v31.2s, v0.2s }, [sp]   // encoding: [0xff,0x8b,0x40,0x0c]
 
-         ld2 {v0.16b-v1.16b}, [x0]
-         ld2 {v15.8h-v16.8h}, [x15]
-         ld2 {v31.4s-v0.4s}, [sp]
-         ld2 {v0.2d-v1.2d}, [x0]
-         ld2 {v0.8b-v1.8b}, [x0]
-         ld2 {v15.4h-v16.4h}, [x15]
-         ld2 {v31.2s-v0.2s}, [sp]
-// CHECK:	ld2	{v0.16b, v1.16b}, [x0]  // encoding: [0x00,0x80,0x40,0x4c]
-// CHECK:	ld2	{v15.8h, v16.8h}, [x15] // encoding: [0xef,0x85,0x40,0x4c]
-// CHECK:	ld2	{v31.4s, v0.4s}, [sp]   // encoding: [0xff,0x8b,0x40,0x4c]
-// CHECK:	ld2	{v0.2d, v1.2d}, [x0]    // encoding: [0x00,0x8c,0x40,0x4c]
-// CHECK:	ld2	{v0.8b, v1.8b}, [x0]    // encoding: [0x00,0x80,0x40,0x0c]
-// CHECK:	ld2	{v15.4h, v16.4h}, [x15] // encoding: [0xef,0x85,0x40,0x0c]
-// CHECK:	ld2	{v31.2s, v0.2s}, [sp]   // encoding: [0xff,0x8b,0x40,0x0c]
+         ld2 { v0.16b-v1.16b }, [x0]
+         ld2 { v15.8h-v16.8h }, [x15]
+         ld2 { v31.4s-v0.4s }, [sp]
+         ld2 { v0.2d-v1.2d }, [x0]
+         ld2 { v0.8b-v1.8b }, [x0]
+         ld2 { v15.4h-v16.4h }, [x15]
+         ld2 { v31.2s-v0.2s }, [sp]
+// CHECK:	ld2	{ v0.16b, v1.16b }, [x0]  // encoding: [0x00,0x80,0x40,0x4c]
+// CHECK:	ld2	{ v15.8h, v16.8h }, [x15] // encoding: [0xef,0x85,0x40,0x4c]
+// CHECK:	ld2	{ v31.4s, v0.4s }, [sp]   // encoding: [0xff,0x8b,0x40,0x4c]
+// CHECK:	ld2	{ v0.2d, v1.2d }, [x0]    // encoding: [0x00,0x8c,0x40,0x4c]
+// CHECK:	ld2	{ v0.8b, v1.8b }, [x0]    // encoding: [0x00,0x80,0x40,0x0c]
+// CHECK:	ld2	{ v15.4h, v16.4h }, [x15] // encoding: [0xef,0x85,0x40,0x0c]
+// CHECK:	ld2	{ v31.2s, v0.2s }, [sp]   // encoding: [0xff,0x8b,0x40,0x0c]
 
 //------------------------------------------------------------------------------
 // Load multiple 3-element structures to three consecutive registers
 //------------------------------------------------------------------------------
-         ld3 {v0.16b, v1.16b, v2.16b}, [x0]
-         ld3 {v15.8h, v16.8h, v17.8h}, [x15]
-         ld3 {v31.4s, v0.4s, v1.4s}, [sp]
-         ld3 {v0.2d, v1.2d, v2.2d}, [x0]
-         ld3 {v0.8b, v1.8b, v2.8b}, [x0]
-         ld3 {v15.4h, v16.4h, v17.4h}, [x15]
-         ld3 {v31.2s, v0.2s, v1.2s}, [sp]
-// CHECK:	ld3	{v0.16b, v1.16b, v2.16b}, [x0] // encoding: [0x00,0x40,0x40,0x4c]
-// CHECK:	ld3	{v15.8h, v16.8h, v17.8h}, [x15] // encoding: [0xef,0x45,0x40,0x4c]
-// CHECK:	ld3	{v31.4s, v0.4s, v1.4s}, [sp] // encoding: [0xff,0x4b,0x40,0x4c]
-// CHECK:	ld3	{v0.2d, v1.2d, v2.2d}, [x0] // encoding: [0x00,0x4c,0x40,0x4c]
-// CHECK:	ld3	{v0.8b, v1.8b, v2.8b}, [x0] // encoding: [0x00,0x40,0x40,0x0c]
-// CHECK:	ld3	{v15.4h, v16.4h, v17.4h}, [x15] // encoding: [0xef,0x45,0x40,0x0c]
-// CHECK:	ld3	{v31.2s, v0.2s, v1.2s}, [sp] // encoding: [0xff,0x4b,0x40,0x0c]
+         ld3 { v0.16b, v1.16b, v2.16b }, [x0]
+         ld3 { v15.8h, v16.8h, v17.8h }, [x15]
+         ld3 { v31.4s, v0.4s, v1.4s }, [sp]
+         ld3 { v0.2d, v1.2d, v2.2d }, [x0]
+         ld3 { v0.8b, v1.8b, v2.8b }, [x0]
+         ld3 { v15.4h, v16.4h, v17.4h }, [x15]
+         ld3 { v31.2s, v0.2s, v1.2s }, [sp]
+// CHECK:	ld3	{ v0.16b, v1.16b, v2.16b }, [x0] // encoding: [0x00,0x40,0x40,0x4c]
+// CHECK:	ld3	{ v15.8h, v16.8h, v17.8h }, [x15] // encoding: [0xef,0x45,0x40,0x4c]
+// CHECK:	ld3	{ v31.4s, v0.4s, v1.4s }, [sp] // encoding: [0xff,0x4b,0x40,0x4c]
+// CHECK:	ld3	{ v0.2d, v1.2d, v2.2d }, [x0] // encoding: [0x00,0x4c,0x40,0x4c]
+// CHECK:	ld3	{ v0.8b, v1.8b, v2.8b }, [x0] // encoding: [0x00,0x40,0x40,0x0c]
+// CHECK:	ld3	{ v15.4h, v16.4h, v17.4h }, [x15] // encoding: [0xef,0x45,0x40,0x0c]
+// CHECK:	ld3	{ v31.2s, v0.2s, v1.2s }, [sp] // encoding: [0xff,0x4b,0x40,0x0c]
 
-         ld3 {v0.16b-v2.16b}, [x0]
-         ld3 {v15.8h-v17.8h}, [x15]
-         ld3 {v31.4s-v1.4s}, [sp]
-         ld3 {v0.2d-v2.2d}, [x0]
-         ld3 {v0.8b-v2.8b}, [x0]
-         ld3 {v15.4h-v17.4h}, [x15]
-         ld3 {v31.2s-v1.2s}, [sp]
-// CHECK:	ld3	{v0.16b, v1.16b, v2.16b}, [x0] // encoding: [0x00,0x40,0x40,0x4c]
-// CHECK:	ld3	{v15.8h, v16.8h, v17.8h}, [x15] // encoding: [0xef,0x45,0x40,0x4c]
-// CHECK:	ld3	{v31.4s, v0.4s, v1.4s}, [sp] // encoding: [0xff,0x4b,0x40,0x4c]
-// CHECK:	ld3	{v0.2d, v1.2d, v2.2d}, [x0] // encoding: [0x00,0x4c,0x40,0x4c]
-// CHECK:	ld3	{v0.8b, v1.8b, v2.8b}, [x0] // encoding: [0x00,0x40,0x40,0x0c]
-// CHECK:	ld3	{v15.4h, v16.4h, v17.4h}, [x15] // encoding: [0xef,0x45,0x40,0x0c]
-// CHECK:	ld3	{v31.2s, v0.2s, v1.2s}, [sp] // encoding: [0xff,0x4b,0x40,0x0c]
+         ld3 { v0.16b-v2.16b }, [x0]
+         ld3 { v15.8h-v17.8h }, [x15]
+         ld3 { v31.4s-v1.4s }, [sp]
+         ld3 { v0.2d-v2.2d }, [x0]
+         ld3 { v0.8b-v2.8b }, [x0]
+         ld3 { v15.4h-v17.4h }, [x15]
+         ld3 { v31.2s-v1.2s }, [sp]
+// CHECK:	ld3	{ v0.16b, v1.16b, v2.16b }, [x0] // encoding: [0x00,0x40,0x40,0x4c]
+// CHECK:	ld3	{ v15.8h, v16.8h, v17.8h }, [x15] // encoding: [0xef,0x45,0x40,0x4c]
+// CHECK:	ld3	{ v31.4s, v0.4s, v1.4s }, [sp] // encoding: [0xff,0x4b,0x40,0x4c]
+// CHECK:	ld3	{ v0.2d, v1.2d, v2.2d }, [x0] // encoding: [0x00,0x4c,0x40,0x4c]
+// CHECK:	ld3	{ v0.8b, v1.8b, v2.8b }, [x0] // encoding: [0x00,0x40,0x40,0x0c]
+// CHECK:	ld3	{ v15.4h, v16.4h, v17.4h }, [x15] // encoding: [0xef,0x45,0x40,0x0c]
+// CHECK:	ld3	{ v31.2s, v0.2s, v1.2s }, [sp] // encoding: [0xff,0x4b,0x40,0x0c]
 
 //------------------------------------------------------------------------------
 // Load multiple 4-element structures to four consecutive registers
 //------------------------------------------------------------------------------
-         ld4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0]
-         ld4 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15]
-         ld4 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp]
-         ld4 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0]
-         ld4 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0]
-         ld4 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15]
-         ld4 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp]
-// CHECK:	ld4	{v0.16b, v1.16b, v2.16b, v3.16b}, [x0] // encoding: [0x00,0x00,0x40,0x4c]
-// CHECK:	ld4	{v15.8h, v16.8h, v17.8h, v18.8h}, [x15] // encoding: [0xef,0x05,0x40,0x4c]
-// CHECK:	ld4	{v31.4s, v0.4s, v1.4s, v2.4s}, [sp] // encoding: [0xff,0x0b,0x40,0x4c]
-// CHECK:	ld4	{v0.2d, v1.2d, v2.2d, v3.2d}, [x0] // encoding: [0x00,0x0c,0x40,0x4c]
-// CHECK:	ld4	{v0.8b, v1.8b, v2.8b, v3.8b}, [x0] // encoding: [0x00,0x00,0x40,0x0c]
-// CHECK:	ld4	{v15.4h, v16.4h, v17.4h, v18.4h}, [x15] // encoding: [0xef,0x05,0x40,0x0c]
-// CHECK:	ld4	{v31.2s, v0.2s, v1.2s, v2.2s}, [sp] // encoding: [0xff,0x0b,0x40,0x0c]
+         ld4 { v0.16b, v1.16b, v2.16b, v3.16b }, [x0]
+         ld4 { v15.8h, v16.8h, v17.8h, v18.8h }, [x15]
+         ld4 { v31.4s, v0.4s, v1.4s, v2.4s }, [sp]
+         ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0]
+         ld4 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0]
+         ld4 { v15.4h, v16.4h, v17.4h, v18.4h }, [x15]
+         ld4 { v31.2s, v0.2s, v1.2s, v2.2s }, [sp]
+// CHECK:	ld4	{ v0.16b, v1.16b, v2.16b, v3.16b }, [x0] // encoding: [0x00,0x00,0x40,0x4c]
+// CHECK:	ld4	{ v15.8h, v16.8h, v17.8h, v18.8h }, [x15] // encoding: [0xef,0x05,0x40,0x4c]
+// CHECK:	ld4	{ v31.4s, v0.4s, v1.4s, v2.4s }, [sp] // encoding: [0xff,0x0b,0x40,0x4c]
+// CHECK:	ld4	{ v0.2d, v1.2d, v2.2d, v3.2d }, [x0] // encoding: [0x00,0x0c,0x40,0x4c]
+// CHECK:	ld4	{ v0.8b, v1.8b, v2.8b, v3.8b }, [x0] // encoding: [0x00,0x00,0x40,0x0c]
+// CHECK:	ld4	{ v15.4h, v16.4h, v17.4h, v18.4h }, [x15] // encoding: [0xef,0x05,0x40,0x0c]
+// CHECK:	ld4	{ v31.2s, v0.2s, v1.2s, v2.2s }, [sp] // encoding: [0xff,0x0b,0x40,0x0c]
 
-         ld4 {v0.16b-v3.16b}, [x0]
-         ld4 {v15.8h-v18.8h}, [x15]
-         ld4 {v31.4s-v2.4s}, [sp]
-         ld4 {v0.2d-v3.2d}, [x0]
-         ld4 {v0.8b-v3.8b}, [x0]
-         ld4 {v15.4h-v18.4h}, [x15]
-         ld4 {v31.2s-v2.2s}, [sp]
-// CHECK:	ld4	{v0.16b, v1.16b, v2.16b, v3.16b}, [x0] // encoding: [0x00,0x00,0x40,0x4c]
-// CHECK:	ld4	{v15.8h, v16.8h, v17.8h, v18.8h}, [x15] // encoding: [0xef,0x05,0x40,0x4c]
-// CHECK:	ld4	{v31.4s, v0.4s, v1.4s, v2.4s}, [sp] // encoding: [0xff,0x0b,0x40,0x4c]
-// CHECK:	ld4	{v0.2d, v1.2d, v2.2d, v3.2d}, [x0] // encoding: [0x00,0x0c,0x40,0x4c]
-// CHECK:	ld4	{v0.8b, v1.8b, v2.8b, v3.8b}, [x0] // encoding: [0x00,0x00,0x40,0x0c]
-// CHECK:	ld4	{v15.4h, v16.4h, v17.4h, v18.4h}, [x15] // encoding: [0xef,0x05,0x40,0x0c]
-// CHECK:	ld4	{v31.2s, v0.2s, v1.2s, v2.2s}, [sp] // encoding: [0xff,0x0b,0x40,0x0c]
+         ld4 { v0.16b-v3.16b }, [x0]
+         ld4 { v15.8h-v18.8h }, [x15]
+         ld4 { v31.4s-v2.4s }, [sp]
+         ld4 { v0.2d-v3.2d }, [x0]
+         ld4 { v0.8b-v3.8b }, [x0]
+         ld4 { v15.4h-v18.4h }, [x15]
+         ld4 { v31.2s-v2.2s }, [sp]
+// CHECK:	ld4	{ v0.16b, v1.16b, v2.16b, v3.16b }, [x0] // encoding: [0x00,0x00,0x40,0x4c]
+// CHECK:	ld4	{ v15.8h, v16.8h, v17.8h, v18.8h }, [x15] // encoding: [0xef,0x05,0x40,0x4c]
+// CHECK:	ld4	{ v31.4s, v0.4s, v1.4s, v2.4s }, [sp] // encoding: [0xff,0x0b,0x40,0x4c]
+// CHECK:	ld4	{ v0.2d, v1.2d, v2.2d, v3.2d }, [x0] // encoding: [0x00,0x0c,0x40,0x4c]
+// CHECK:	ld4	{ v0.8b, v1.8b, v2.8b, v3.8b }, [x0] // encoding: [0x00,0x00,0x40,0x0c]
+// CHECK:	ld4	{ v15.4h, v16.4h, v17.4h, v18.4h }, [x15] // encoding: [0xef,0x05,0x40,0x0c]
+// CHECK:	ld4	{ v31.2s, v0.2s, v1.2s, v2.2s }, [sp] // encoding: [0xff,0x0b,0x40,0x0c]
diff --git a/test/MC/AArch64/neon-simd-ldst-one-elem.s b/test/MC/AArch64/neon-simd-ldst-one-elem.s
index 140d752..4febf6d 100644
--- a/test/MC/AArch64/neon-simd-ldst-one-elem.s
+++ b/test/MC/AArch64/neon-simd-ldst-one-elem.s
@@ -1,325 +1,325 @@
-// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple=arm64 -mattr=+neon -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
 //------------------------------------------------------------------------------
 // Load single 1-element structure to all lanes of 1 register
 //------------------------------------------------------------------------------
-         ld1r {v0.16b}, [x0]
-         ld1r {v15.8h}, [x15]
-         ld1r {v31.4s}, [sp]
-         ld1r {v0.2d}, [x0]
-         ld1r {v0.8b}, [x0]
-         ld1r {v15.4h}, [x15]
-         ld1r {v31.2s}, [sp]
-         ld1r {v0.1d}, [x0]
-// CHECK: ld1r {v0.16b}, [x0]          // encoding: [0x00,0xc0,0x40,0x4d]
-// CHECK: ld1r {v15.8h}, [x15]         // encoding: [0xef,0xc5,0x40,0x4d]
-// CHECK: ld1r {v31.4s}, [sp]          // encoding: [0xff,0xcb,0x40,0x4d]
-// CHECK: ld1r {v0.2d}, [x0]           // encoding: [0x00,0xcc,0x40,0x4d]
-// CHECK: ld1r {v0.8b}, [x0]           // encoding: [0x00,0xc0,0x40,0x0d]
-// CHECK: ld1r {v15.4h}, [x15]         // encoding: [0xef,0xc5,0x40,0x0d]
-// CHECK: ld1r {v31.2s}, [sp]          // encoding: [0xff,0xcb,0x40,0x0d]
-// CHECK: ld1r {v0.1d}, [x0]           // encoding: [0x00,0xcc,0x40,0x0d]
+         ld1r { v0.16b }, [x0]
+         ld1r { v15.8h }, [x15]
+         ld1r { v31.4s }, [sp]
+         ld1r { v0.2d }, [x0]
+         ld1r { v0.8b }, [x0]
+         ld1r { v15.4h }, [x15]
+         ld1r { v31.2s }, [sp]
+         ld1r { v0.1d }, [x0]
+// CHECK: ld1r { v0.16b }, [x0]          // encoding: [0x00,0xc0,0x40,0x4d]
+// CHECK: ld1r { v15.8h }, [x15]         // encoding: [0xef,0xc5,0x40,0x4d]
+// CHECK: ld1r { v31.4s }, [sp]          // encoding: [0xff,0xcb,0x40,0x4d]
+// CHECK: ld1r { v0.2d }, [x0]           // encoding: [0x00,0xcc,0x40,0x4d]
+// CHECK: ld1r { v0.8b }, [x0]           // encoding: [0x00,0xc0,0x40,0x0d]
+// CHECK: ld1r { v15.4h }, [x15]         // encoding: [0xef,0xc5,0x40,0x0d]
+// CHECK: ld1r { v31.2s }, [sp]          // encoding: [0xff,0xcb,0x40,0x0d]
+// CHECK: ld1r { v0.1d }, [x0]           // encoding: [0x00,0xcc,0x40,0x0d]
 
 //------------------------------------------------------------------------------
 // Load single N-element structure to all lanes of N consecutive
 // registers (N = 2,3,4)
 //------------------------------------------------------------------------------
-         ld2r {v0.16b, v1.16b}, [x0]
-         ld2r {v15.8h, v16.8h}, [x15]
-         ld2r {v31.4s, v0.4s}, [sp]
-         ld2r {v0.2d, v1.2d}, [x0]
-         ld2r {v0.8b, v1.8b}, [x0]
-         ld2r {v15.4h, v16.4h}, [x15]
-         ld2r {v31.2s, v0.2s}, [sp]
-         ld2r {v31.1d, v0.1d}, [sp]
-// CHECK: ld2r {v0.16b, v1.16b}, [x0]  // encoding: [0x00,0xc0,0x60,0x4d]
-// CHECK: ld2r {v15.8h, v16.8h}, [x15] // encoding: [0xef,0xc5,0x60,0x4d]
-// CHECK: ld2r {v31.4s, v0.4s}, [sp]   // encoding: [0xff,0xcb,0x60,0x4d]
-// CHECK: ld2r {v0.2d, v1.2d}, [x0]    // encoding: [0x00,0xcc,0x60,0x4d]
-// CHECK: ld2r {v0.8b, v1.8b}, [x0]    // encoding: [0x00,0xc0,0x60,0x0d]
-// CHECK: ld2r {v15.4h, v16.4h}, [x15] // encoding: [0xef,0xc5,0x60,0x0d]
-// CHECK: ld2r {v31.2s, v0.2s}, [sp]   // encoding: [0xff,0xcb,0x60,0x0d]
-// CHECK: ld2r {v31.1d, v0.1d}, [sp]   // encoding: [0xff,0xcf,0x60,0x0d]
+         ld2r { v0.16b, v1.16b }, [x0]
+         ld2r { v15.8h, v16.8h }, [x15]
+         ld2r { v31.4s, v0.4s }, [sp]
+         ld2r { v0.2d, v1.2d }, [x0]
+         ld2r { v0.8b, v1.8b }, [x0]
+         ld2r { v15.4h, v16.4h }, [x15]
+         ld2r { v31.2s, v0.2s }, [sp]
+         ld2r { v31.1d, v0.1d }, [sp]
+// CHECK: ld2r { v0.16b, v1.16b }, [x0]  // encoding: [0x00,0xc0,0x60,0x4d]
+// CHECK: ld2r { v15.8h, v16.8h }, [x15] // encoding: [0xef,0xc5,0x60,0x4d]
+// CHECK: ld2r { v31.4s, v0.4s }, [sp]   // encoding: [0xff,0xcb,0x60,0x4d]
+// CHECK: ld2r { v0.2d, v1.2d }, [x0]    // encoding: [0x00,0xcc,0x60,0x4d]
+// CHECK: ld2r { v0.8b, v1.8b }, [x0]    // encoding: [0x00,0xc0,0x60,0x0d]
+// CHECK: ld2r { v15.4h, v16.4h }, [x15] // encoding: [0xef,0xc5,0x60,0x0d]
+// CHECK: ld2r { v31.2s, v0.2s }, [sp]   // encoding: [0xff,0xcb,0x60,0x0d]
+// CHECK: ld2r { v31.1d, v0.1d }, [sp]   // encoding: [0xff,0xcf,0x60,0x0d]
 
-         ld3r {v0.16b, v1.16b, v2.16b}, [x0]
-         ld3r {v15.8h, v16.8h, v17.8h}, [x15]
-         ld3r {v31.4s, v0.4s, v1.4s}, [sp]
-         ld3r {v0.2d, v1.2d, v2.2d}, [x0]
-         ld3r {v0.8b, v1.8b, v2.8b}, [x0]
-         ld3r {v15.4h, v16.4h, v17.4h}, [x15]
-         ld3r {v31.2s, v0.2s, v1.2s}, [sp]
-         ld3r {v31.1d, v0.1d, v1.1d}, [sp]
-// CHECK: ld3r {v0.16b, v1.16b, v2.16b}, [x0] // encoding: [0x00,0xe0,0x40,0x4d]
-// CHECK: ld3r {v15.8h, v16.8h, v17.8h}, [x15] // encoding: [0xef,0xe5,0x40,0x4d]
-// CHECK: ld3r {v31.4s, v0.4s, v1.4s}, [sp] // encoding: [0xff,0xeb,0x40,0x4d]
-// CHECK: ld3r {v0.2d, v1.2d, v2.2d}, [x0] // encoding: [0x00,0xec,0x40,0x4d]
-// CHECK: ld3r {v0.8b, v1.8b, v2.8b}, [x0] // encoding: [0x00,0xe0,0x40,0x0d]
-// CHECK: ld3r {v15.4h, v16.4h, v17.4h}, [x15] // encoding: [0xef,0xe5,0x40,0x0d]
-// CHECK: ld3r {v31.2s, v0.2s, v1.2s}, [sp] // encoding: [0xff,0xeb,0x40,0x0d]
-// CHECK: ld3r {v31.1d, v0.1d, v1.1d}, [sp] // encoding: [0xff,0xef,0x40,0x0d]
+         ld3r { v0.16b, v1.16b, v2.16b }, [x0]
+         ld3r { v15.8h, v16.8h, v17.8h }, [x15]
+         ld3r { v31.4s, v0.4s, v1.4s }, [sp]
+         ld3r { v0.2d, v1.2d, v2.2d }, [x0]
+         ld3r { v0.8b, v1.8b, v2.8b }, [x0]
+         ld3r { v15.4h, v16.4h, v17.4h }, [x15]
+         ld3r { v31.2s, v0.2s, v1.2s }, [sp]
+         ld3r { v31.1d, v0.1d, v1.1d }, [sp]
+// CHECK: ld3r { v0.16b, v1.16b, v2.16b }, [x0] // encoding: [0x00,0xe0,0x40,0x4d]
+// CHECK: ld3r { v15.8h, v16.8h, v17.8h }, [x15] // encoding: [0xef,0xe5,0x40,0x4d]
+// CHECK: ld3r { v31.4s, v0.4s, v1.4s }, [sp] // encoding: [0xff,0xeb,0x40,0x4d]
+// CHECK: ld3r { v0.2d, v1.2d, v2.2d }, [x0] // encoding: [0x00,0xec,0x40,0x4d]
+// CHECK: ld3r { v0.8b, v1.8b, v2.8b }, [x0] // encoding: [0x00,0xe0,0x40,0x0d]
+// CHECK: ld3r { v15.4h, v16.4h, v17.4h }, [x15] // encoding: [0xef,0xe5,0x40,0x0d]
+// CHECK: ld3r { v31.2s, v0.2s, v1.2s }, [sp] // encoding: [0xff,0xeb,0x40,0x0d]
+// CHECK: ld3r { v31.1d, v0.1d, v1.1d }, [sp] // encoding: [0xff,0xef,0x40,0x0d]
 
-         ld4r {v0.16b, v1.16b, v2.16b, v3.16b}, [x0]
-         ld4r {v15.8h, v16.8h, v17.8h, v18.8h}, [x15]
-         ld4r {v31.4s, v0.4s, v1.4s, v2.4s}, [sp]
-         ld4r {v0.2d, v1.2d, v2.2d, v3.2d}, [x0]
-         ld4r {v0.8b, v1.8b, v2.8b, v3.8b}, [x0]
-         ld4r {v15.4h, v16.4h, v17.4h, v18.4h}, [x15]
-         ld4r {v31.2s, v0.2s, v1.2s, v2.2s}, [sp]
-         ld4r {v31.1d, v0.1d, v1.1d, v2.1d}, [sp]
-// CHECK: ld4r {v0.16b, v1.16b, v2.16b, v3.16b}, [x0] // encoding: [0x00,0xe0,0x60,0x4d]
-// CHECK: ld4r {v15.8h, v16.8h, v17.8h, v18.8h}, [x15] // encoding: [0xef,0xe5,0x60,0x4d]
-// CHECK: ld4r {v31.4s, v0.4s, v1.4s, v2.4s}, [sp] // encoding: [0xff,0xeb,0x60,0x4d]
-// CHECK: ld4r {v0.2d, v1.2d, v2.2d, v3.2d}, [x0] // encoding: [0x00,0xec,0x60,0x4d]
-// CHECK: ld4r {v0.8b, v1.8b, v2.8b, v3.8b}, [x0] // encoding: [0x00,0xe0,0x60,0x0d]
-// CHECK: ld4r {v15.4h, v16.4h, v17.4h, v18.4h}, [x15] // encoding: [0xef,0xe5,0x60,0x0d]
-// CHECK: ld4r {v31.2s, v0.2s, v1.2s, v2.2s}, [sp] // encoding: [0xff,0xeb,0x60,0x0d]
-// CHECK: ld4r {v31.1d, v0.1d, v1.1d, v2.1d}, [sp] // encoding: [0xff,0xef,0x60,0x0d]
+         ld4r { v0.16b, v1.16b, v2.16b, v3.16b }, [x0]
+         ld4r { v15.8h, v16.8h, v17.8h, v18.8h }, [x15]
+         ld4r { v31.4s, v0.4s, v1.4s, v2.4s }, [sp]
+         ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [x0]
+         ld4r { v0.8b, v1.8b, v2.8b, v3.8b }, [x0]
+         ld4r { v15.4h, v16.4h, v17.4h, v18.4h }, [x15]
+         ld4r { v31.2s, v0.2s, v1.2s, v2.2s }, [sp]
+         ld4r { v31.1d, v0.1d, v1.1d, v2.1d }, [sp]
+// CHECK: ld4r { v0.16b, v1.16b, v2.16b, v3.16b }, [x0] // encoding: [0x00,0xe0,0x60,0x4d]
+// CHECK: ld4r { v15.8h, v16.8h, v17.8h, v18.8h }, [x15] // encoding: [0xef,0xe5,0x60,0x4d]
+// CHECK: ld4r { v31.4s, v0.4s, v1.4s, v2.4s }, [sp] // encoding: [0xff,0xeb,0x60,0x4d]
+// CHECK: ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [x0] // encoding: [0x00,0xec,0x60,0x4d]
+// CHECK: ld4r { v0.8b, v1.8b, v2.8b, v3.8b }, [x0] // encoding: [0x00,0xe0,0x60,0x0d]
+// CHECK: ld4r { v15.4h, v16.4h, v17.4h, v18.4h }, [x15] // encoding: [0xef,0xe5,0x60,0x0d]
+// CHECK: ld4r { v31.2s, v0.2s, v1.2s, v2.2s }, [sp] // encoding: [0xff,0xeb,0x60,0x0d]
+// CHECK: ld4r { v31.1d, v0.1d, v1.1d, v2.1d }, [sp] // encoding: [0xff,0xef,0x60,0x0d]
 
 //------------------------------------------------------------------------------
 // Load single 1-element structure to one lane of 1 register.
 //------------------------------------------------------------------------------
-         ld1 {v0.b}[9], [x0]
-         ld1 {v15.h}[7], [x15]
-         ld1 {v31.s}[3], [sp]
-         ld1 {v0.d}[1], [x0]
-// CHECK: ld1 {v0.b}[9], [x0]         // encoding: [0x00,0x04,0x40,0x4d]
-// CHECK: ld1 {v15.h}[7], [x15]       // encoding: [0xef,0x59,0x40,0x4d]
-// CHECK: ld1 {v31.s}[3], [sp]        // encoding: [0xff,0x93,0x40,0x4d]
-// CHECK: ld1 {v0.d}[1], [x0]         // encoding: [0x00,0x84,0x40,0x4d]
+         ld1 { v0.b }[9], [x0]
+         ld1 { v15.h }[7], [x15]
+         ld1 { v31.s }[3], [sp]
+         ld1 { v0.d }[1], [x0]
+// CHECK: ld1 { v0.b }[9], [x0]         // encoding: [0x00,0x04,0x40,0x4d]
+// CHECK: ld1 { v15.h }[7], [x15]       // encoding: [0xef,0x59,0x40,0x4d]
+// CHECK: ld1 { v31.s }[3], [sp]        // encoding: [0xff,0x93,0x40,0x4d]
+// CHECK: ld1 { v0.d }[1], [x0]         // encoding: [0x00,0x84,0x40,0x4d]
 
 //------------------------------------------------------------------------------
 // Load single N-element structure to one lane of N consecutive registers
 // (N = 2,3,4)
 //------------------------------------------------------------------------------
-         ld2 {v0.b, v1.b}[9], [x0]
-         ld2 {v15.h, v16.h}[7], [x15]
-         ld2 {v31.s, v0.s}[3], [sp]
-         ld2 {v0.d, v1.d}[1], [x0]
-// CHECK: ld2 {v0.b, v1.b}[9], [x0]   // encoding: [0x00,0x04,0x60,0x4d]
-// CHECK: ld2 {v15.h, v16.h}[7], [x15] // encoding: [0xef,0x59,0x60,0x4d]
-// CHECK: ld2 {v31.s, v0.s}[3], [sp]  // encoding: [0xff,0x93,0x60,0x4d]
-// CHECK: ld2 {v0.d, v1.d}[1], [x0]   // encoding: [0x00,0x84,0x60,0x4d]
+         ld2 { v0.b, v1.b }[9], [x0]
+         ld2 { v15.h, v16.h }[7], [x15]
+         ld2 { v31.s, v0.s }[3], [sp]
+         ld2 { v0.d, v1.d }[1], [x0]
+// CHECK: ld2 { v0.b, v1.b }[9], [x0]   // encoding: [0x00,0x04,0x60,0x4d]
+// CHECK: ld2 { v15.h, v16.h }[7], [x15] // encoding: [0xef,0x59,0x60,0x4d]
+// CHECK: ld2 { v31.s, v0.s }[3], [sp]  // encoding: [0xff,0x93,0x60,0x4d]
+// CHECK: ld2 { v0.d, v1.d }[1], [x0]   // encoding: [0x00,0x84,0x60,0x4d]
 
-         ld3 {v0.b, v1.b, v2.b}[9], [x0]
-         ld3 {v15.h, v16.h, v17.h}[7], [x15]
-         ld3 {v31.s, v0.s, v1.s}[3], [sp]
-         ld3 {v0.d, v1.d, v2.d}[1], [x0]
-// CHECK: ld3 {v0.b, v1.b, v2.b}[9], [x0] // encoding: [0x00,0x24,0x40,0x4d]
-// CHECK: ld3 {v15.h, v16.h, v17.h}[7], [x15] // encoding: [0xef,0x79,0x40,0x4d]
-// CHECK: ld3 {v31.s, v0.s, v1.s}[3], [sp] // encoding: [0xff,0xb3,0x40,0x4d]
-// CHECK: ld3 {v0.d, v1.d, v2.d}[1], [x0] // encoding: [0x00,0xa4,0x40,0x4d]
+         ld3 { v0.b, v1.b, v2.b }[9], [x0]
+         ld3 { v15.h, v16.h, v17.h }[7], [x15]
+         ld3 { v31.s, v0.s, v1.s }[3], [sp]
+         ld3 { v0.d, v1.d, v2.d }[1], [x0]
+// CHECK: ld3 { v0.b, v1.b, v2.b }[9], [x0] // encoding: [0x00,0x24,0x40,0x4d]
+// CHECK: ld3 { v15.h, v16.h, v17.h }[7], [x15] // encoding: [0xef,0x79,0x40,0x4d]
+// CHECK: ld3 { v31.s, v0.s, v1.s }[3], [sp] // encoding: [0xff,0xb3,0x40,0x4d]
+// CHECK: ld3 { v0.d, v1.d, v2.d }[1], [x0] // encoding: [0x00,0xa4,0x40,0x4d]
 
-         ld4 {v0.b, v1.b, v2.b, v3.b}[9], [x0]
-         ld4 {v15.h, v16.h, v17.h, v18.h}[7], [x15]
-         ld4 {v31.s, v0.s, v1.s, v2.s}[3], [sp]
-         ld4 {v0.d, v1.d, v2.d, v3.d}[1], [x0]
-// CHECK: ld4 {v0.b, v1.b, v2.b, v3.b}[9], [x0] // encoding: [0x00,0x24,0x60,0x4d]
-// CHECK: ld4 {v15.h, v16.h, v17.h, v18.h}[7], [x15] // encoding: [0xef,0x79,0x60,0x4d]
-// CHECK: ld4 {v31.s, v0.s, v1.s, v2.s}[3], [sp] // encoding: [0xff,0xb3,0x60,0x4d]
-// CHECK: ld4 {v0.d, v1.d, v2.d, v3.d}[1], [x0] // encoding: [0x00,0xa4,0x60,0x4d]
+         ld4 { v0.b, v1.b, v2.b, v3.b }[9], [x0]
+         ld4 { v15.h, v16.h, v17.h, v18.h }[7], [x15]
+         ld4 { v31.s, v0.s, v1.s, v2.s }[3], [sp]
+         ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0]
+// CHECK: ld4 { v0.b, v1.b, v2.b, v3.b }[9], [x0] // encoding: [0x00,0x24,0x60,0x4d]
+// CHECK: ld4 { v15.h, v16.h, v17.h, v18.h }[7], [x15] // encoding: [0xef,0x79,0x60,0x4d]
+// CHECK: ld4 { v31.s, v0.s, v1.s, v2.s }[3], [sp] // encoding: [0xff,0xb3,0x60,0x4d]
+// CHECK: ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0] // encoding: [0x00,0xa4,0x60,0x4d]
 
 //------------------------------------------------------------------------------
 // Store single 1-element structure from one lane of 1 register.
 //------------------------------------------------------------------------------
-         st1 {v0.b}[9], [x0]
-         st1 {v15.h}[7], [x15]
-         st1 {v31.s}[3], [sp]
-         st1 {v0.d}[1], [x0]
-// CHECK: st1 {v0.b}[9], [x0]         // encoding: [0x00,0x04,0x00,0x4d]
-// CHECK: st1 {v15.h}[7], [x15]       // encoding: [0xef,0x59,0x00,0x4d]
-// CHECK: st1 {v31.s}[3], [sp]        // encoding: [0xff,0x93,0x00,0x4d]
-// CHECK: st1 {v0.d}[1], [x0]         // encoding: [0x00,0x84,0x00,0x4d]
+         st1 { v0.b }[9], [x0]
+         st1 { v15.h }[7], [x15]
+         st1 { v31.s }[3], [sp]
+         st1 { v0.d }[1], [x0]
+// CHECK: st1 { v0.b }[9], [x0]         // encoding: [0x00,0x04,0x00,0x4d]
+// CHECK: st1 { v15.h }[7], [x15]       // encoding: [0xef,0x59,0x00,0x4d]
+// CHECK: st1 { v31.s }[3], [sp]        // encoding: [0xff,0x93,0x00,0x4d]
+// CHECK: st1 { v0.d }[1], [x0]         // encoding: [0x00,0x84,0x00,0x4d]
 
 //------------------------------------------------------------------------------
 // Store single N-element structure from one lane of N consecutive registers
 // (N = 2,3,4)
 //------------------------------------------------------------------------------
-         st2 {v0.b, v1.b}[9], [x0]
-         st2 {v15.h, v16.h}[7], [x15]
-         st2 {v31.s, v0.s}[3], [sp]
-         st2 {v0.d, v1.d}[1], [x0]
-// CHECK: st2 {v0.b, v1.b}[9], [x0]   // encoding: [0x00,0x04,0x20,0x4d]
-// CHECK: st2 {v15.h, v16.h}[7], [x15] // encoding: [0xef,0x59,0x20,0x4d]
-// CHECK: st2 {v31.s, v0.s}[3], [sp]  // encoding: [0xff,0x93,0x20,0x4d]
-// CHECK: st2 {v0.d, v1.d}[1], [x0]   // encoding: [0x00,0x84,0x20,0x4d]
+         st2 { v0.b, v1.b }[9], [x0]
+         st2 { v15.h, v16.h }[7], [x15]
+         st2 { v31.s, v0.s }[3], [sp]
+         st2 { v0.d, v1.d }[1], [x0]
+// CHECK: st2 { v0.b, v1.b }[9], [x0]   // encoding: [0x00,0x04,0x20,0x4d]
+// CHECK: st2 { v15.h, v16.h }[7], [x15] // encoding: [0xef,0x59,0x20,0x4d]
+// CHECK: st2 { v31.s, v0.s }[3], [sp]  // encoding: [0xff,0x93,0x20,0x4d]
+// CHECK: st2 { v0.d, v1.d }[1], [x0]   // encoding: [0x00,0x84,0x20,0x4d]
 
-         st3 {v0.b, v1.b, v2.b}[9], [x0]
-         st3 {v15.h, v16.h, v17.h}[7], [x15]
-         st3 {v31.s, v0.s, v1.s}[3], [sp]
-         st3 {v0.d, v1.d, v2.d}[1], [x0]
-// CHECK: st3 {v0.b, v1.b, v2.b}[9], [x0] // encoding: [0x00,0x24,0x00,0x4d]
-// CHECK: st3 {v15.h, v16.h, v17.h}[7], [x15] // encoding: [0xef,0x79,0x00,0x4d]
-// CHECK: st3 {v31.s, v0.s, v1.s}[3], [sp] // encoding: [0xff,0xb3,0x00,0x4d]
-// CHECK: st3 {v0.d, v1.d, v2.d}[1], [x0] // encoding: [0x00,0xa4,0x00,0x4d]
+         st3 { v0.b, v1.b, v2.b }[9], [x0]
+         st3 { v15.h, v16.h, v17.h }[7], [x15]
+         st3 { v31.s, v0.s, v1.s }[3], [sp]
+         st3 { v0.d, v1.d, v2.d }[1], [x0]
+// CHECK: st3 { v0.b, v1.b, v2.b }[9], [x0] // encoding: [0x00,0x24,0x00,0x4d]
+// CHECK: st3 { v15.h, v16.h, v17.h }[7], [x15] // encoding: [0xef,0x79,0x00,0x4d]
+// CHECK: st3 { v31.s, v0.s, v1.s }[3], [sp] // encoding: [0xff,0xb3,0x00,0x4d]
+// CHECK: st3 { v0.d, v1.d, v2.d }[1], [x0] // encoding: [0x00,0xa4,0x00,0x4d]
 
-         st4 {v0.b, v1.b, v2.b, v3.b}[9], [x0]
-         st4 {v15.h, v16.h, v17.h, v18.h}[7], [x15]
-         st4 {v31.s, v0.s, v1.s, v2.s}[3], [sp]
-         st4 {v0.d, v1.d, v2.d, v3.d}[1], [x0]
-// CHECK: st4 {v0.b, v1.b, v2.b, v3.b}[9], [x0] // encoding: [0x00,0x24,0x20,0x4d]
-// CHECK: st4 {v15.h, v16.h, v17.h, v18.h}[7], [x15] // encoding: [0xef,0x79,0x20,0x4d]
-// CHECK: st4 {v31.s, v0.s, v1.s, v2.s}[3], [sp] // encoding: [0xff,0xb3,0x20,0x4d]
-// CHECK: st4 {v0.d, v1.d, v2.d, v3.d}[1], [x0] // encoding: [0x00,0xa4,0x20,0x4d]
+         st4 { v0.b, v1.b, v2.b, v3.b }[9], [x0]
+         st4 { v15.h, v16.h, v17.h, v18.h }[7], [x15]
+         st4 { v31.s, v0.s, v1.s, v2.s }[3], [sp]
+         st4 { v0.d, v1.d, v2.d, v3.d }[1], [x0]
+// CHECK: st4 { v0.b, v1.b, v2.b, v3.b }[9], [x0] // encoding: [0x00,0x24,0x20,0x4d]
+// CHECK: st4 { v15.h, v16.h, v17.h, v18.h }[7], [x15] // encoding: [0xef,0x79,0x20,0x4d]
+// CHECK: st4 { v31.s, v0.s, v1.s, v2.s }[3], [sp] // encoding: [0xff,0xb3,0x20,0x4d]
+// CHECK: st4 { v0.d, v1.d, v2.d, v3.d }[1], [x0] // encoding: [0x00,0xa4,0x20,0x4d]
 
 //------------------------------------------------------------------------------
 // Post-index oad single 1-element structure to all lanes of 1 register
 //------------------------------------------------------------------------------
-         ld1r {v0.16b}, [x0], #1
-         ld1r {v15.8h}, [x15], #2
-         ld1r {v31.4s}, [sp], #4
-         ld1r {v0.2d}, [x0], #8
-         ld1r {v0.8b}, [x0], x0
-         ld1r {v15.4h}, [x15], x1
-         ld1r {v31.2s}, [sp], x2
-         ld1r {v0.1d}, [x0], x3
-// CHECK: ld1r {v0.16b}, [x0], #1      // encoding: [0x00,0xc0,0xdf,0x4d]
-// CHECK: ld1r {v15.8h}, [x15], #2     // encoding: [0xef,0xc5,0xdf,0x4d]
-// CHECK: ld1r {v31.4s}, [sp], #4      // encoding: [0xff,0xcb,0xdf,0x4d]
-// CHECK: ld1r {v0.2d}, [x0], #8       // encoding: [0x00,0xcc,0xdf,0x4d]
-// CHECK: ld1r {v0.8b}, [x0], x0       // encoding: [0x00,0xc0,0xc0,0x0d]
-// CHECK: ld1r {v15.4h}, [x15], x1     // encoding: [0xef,0xc5,0xc1,0x0d]
-// CHECK: ld1r {v31.2s}, [sp], x2      // encoding: [0xff,0xcb,0xc2,0x0d]
-// CHECK: ld1r {v0.1d}, [x0], x3       // encoding: [0x00,0xcc,0xc3,0x0d]
+         ld1r { v0.16b }, [x0], #1
+         ld1r { v15.8h }, [x15], #2
+         ld1r { v31.4s }, [sp], #4
+         ld1r { v0.2d }, [x0], #8
+         ld1r { v0.8b }, [x0], x0
+         ld1r { v15.4h }, [x15], x1
+         ld1r { v31.2s }, [sp], x2
+         ld1r { v0.1d }, [x0], x3
+// CHECK: ld1r { v0.16b }, [x0], #1      // encoding: [0x00,0xc0,0xdf,0x4d]
+// CHECK: ld1r { v15.8h }, [x15], #2     // encoding: [0xef,0xc5,0xdf,0x4d]
+// CHECK: ld1r { v31.4s }, [sp], #4      // encoding: [0xff,0xcb,0xdf,0x4d]
+// CHECK: ld1r { v0.2d }, [x0], #8       // encoding: [0x00,0xcc,0xdf,0x4d]
+// CHECK: ld1r { v0.8b }, [x0], x0       // encoding: [0x00,0xc0,0xc0,0x0d]
+// CHECK: ld1r { v15.4h }, [x15], x1     // encoding: [0xef,0xc5,0xc1,0x0d]
+// CHECK: ld1r { v31.2s }, [sp], x2      // encoding: [0xff,0xcb,0xc2,0x0d]
+// CHECK: ld1r { v0.1d }, [x0], x3       // encoding: [0x00,0xcc,0xc3,0x0d]
 
 //------------------------------------------------------------------------------
 // Post-index load single N-element structure to all lanes of N consecutive
 // registers (N = 2,3,4)
 //------------------------------------------------------------------------------
-         ld2r {v0.16b, v1.16b}, [x0], #2
-         ld2r {v15.8h, v16.8h}, [x15], #4
-         ld2r {v31.4s, v0.4s}, [sp], #8
-         ld2r {v0.2d, v1.2d}, [x0], #16
-         ld2r {v0.8b, v1.8b}, [x0], x6
-         ld2r {v15.4h, v16.4h}, [x15], x7
-         ld2r {v31.2s, v0.2s}, [sp], x9
-         ld2r {v31.1d, v0.1d}, [x0], x5
-// CHECK: ld2r {v0.16b, v1.16b}, [x0], #2 // encoding: [0x00,0xc0,0xff,0x4d]
-// CHECK: ld2r {v15.8h, v16.8h}, [x15], #4 // encoding: [0xef,0xc5,0xff,0x4d]
-// CHECK: ld2r {v31.4s, v0.4s}, [sp], #8 // encoding: [0xff,0xcb,0xff,0x4d]
-// CHECK: ld2r {v0.2d, v1.2d}, [x0], #16 // encoding: [0x00,0xcc,0xff,0x4d]
-// CHECK: ld2r {v0.8b, v1.8b}, [x0], x6 // encoding: [0x00,0xc0,0xe6,0x0d]
-// CHECK: ld2r {v15.4h, v16.4h}, [x15], x7 // encoding: [0xef,0xc5,0xe7,0x0d]
-// CHECK: ld2r {v31.2s, v0.2s}, [sp], x9 // encoding: [0xff,0xcb,0xe9,0x0d]
-// CHECK: ld2r {v31.1d, v0.1d}, [x0], x5 // encoding: [0x1f,0xcc,0xe5,0x0d]
+         ld2r { v0.16b, v1.16b }, [x0], #2
+         ld2r { v15.8h, v16.8h }, [x15], #4
+         ld2r { v31.4s, v0.4s }, [sp], #8
+         ld2r { v0.2d, v1.2d }, [x0], #16
+         ld2r { v0.8b, v1.8b }, [x0], x6
+         ld2r { v15.4h, v16.4h }, [x15], x7
+         ld2r { v31.2s, v0.2s }, [sp], x9
+         ld2r { v31.1d, v0.1d }, [x0], x5
+// CHECK: ld2r { v0.16b, v1.16b }, [x0], #2 // encoding: [0x00,0xc0,0xff,0x4d]
+// CHECK: ld2r { v15.8h, v16.8h }, [x15], #4 // encoding: [0xef,0xc5,0xff,0x4d]
+// CHECK: ld2r { v31.4s, v0.4s }, [sp], #8 // encoding: [0xff,0xcb,0xff,0x4d]
+// CHECK: ld2r { v0.2d, v1.2d }, [x0], #16 // encoding: [0x00,0xcc,0xff,0x4d]
+// CHECK: ld2r { v0.8b, v1.8b }, [x0], x6 // encoding: [0x00,0xc0,0xe6,0x0d]
+// CHECK: ld2r { v15.4h, v16.4h }, [x15], x7 // encoding: [0xef,0xc5,0xe7,0x0d]
+// CHECK: ld2r { v31.2s, v0.2s }, [sp], x9 // encoding: [0xff,0xcb,0xe9,0x0d]
+// CHECK: ld2r { v31.1d, v0.1d }, [x0], x5 // encoding: [0x1f,0xcc,0xe5,0x0d]
 
-         ld3r {v0.16b, v1.16b, v2.16b}, [x0], x9
-         ld3r {v15.8h, v16.8h, v17.8h}, [x15], x6
-         ld3r {v31.4s, v0.4s, v1.4s}, [sp], x7
-         ld3r {v0.2d, v1.2d, v2.2d}, [x0], x5
-         ld3r {v0.8b, v1.8b, v2.8b}, [x0], #3
-         ld3r {v15.4h, v16.4h, v17.4h}, [x15], #6
-         ld3r {v31.2s, v0.2s, v1.2s}, [sp], #12
-         ld3r {v31.1d, v0.1d, v1.1d}, [sp], #24
-// CHECK: ld3r {v0.16b, v1.16b, v2.16b}, [x0], x9 // encoding: [0x00,0xe0,0xc9,0x4d]
-// CHECK: ld3r {v15.8h, v16.8h, v17.8h}, [x15], x6 // encoding: [0xef,0xe5,0xc6,0x4d]
-// CHECK: ld3r {v31.4s, v0.4s, v1.4s}, [sp], x7 // encoding: [0xff,0xeb,0xc7,0x4d]
-// CHECK: ld3r {v0.2d, v1.2d, v2.2d}, [x0], x5 // encoding: [0x00,0xec,0xc5,0x4d]
-// CHECK: ld3r {v0.8b, v1.8b, v2.8b}, [x0], #3 // encoding: [0x00,0xe0,0xdf,0x0d]
-// CHECK: ld3r {v15.4h, v16.4h, v17.4h}, [x15], #6 // encoding: [0xef,0xe5,0xdf,0x0d]
-// CHECK: ld3r {v31.2s, v0.2s, v1.2s}, [sp], #12 // encoding: [0xff,0xeb,0xdf,0x0d]
-// CHECK: ld3r {v31.1d, v0.1d, v1.1d}, [sp], #24 // encoding: [0xff,0xef,0xdf,0x0d]
+         ld3r { v0.16b, v1.16b, v2.16b }, [x0], x9
+         ld3r { v15.8h, v16.8h, v17.8h }, [x15], x6
+         ld3r { v31.4s, v0.4s, v1.4s }, [sp], x7
+         ld3r { v0.2d, v1.2d, v2.2d }, [x0], x5
+         ld3r { v0.8b, v1.8b, v2.8b }, [x0], #3
+         ld3r { v15.4h, v16.4h, v17.4h }, [x15], #6
+         ld3r { v31.2s, v0.2s, v1.2s }, [sp], #12
+         ld3r { v31.1d, v0.1d, v1.1d }, [sp], #24
+// CHECK: ld3r { v0.16b, v1.16b, v2.16b }, [x0], x9 // encoding: [0x00,0xe0,0xc9,0x4d]
+// CHECK: ld3r { v15.8h, v16.8h, v17.8h }, [x15], x6 // encoding: [0xef,0xe5,0xc6,0x4d]
+// CHECK: ld3r { v31.4s, v0.4s, v1.4s }, [sp], x7 // encoding: [0xff,0xeb,0xc7,0x4d]
+// CHECK: ld3r { v0.2d, v1.2d, v2.2d }, [x0], x5 // encoding: [0x00,0xec,0xc5,0x4d]
+// CHECK: ld3r { v0.8b, v1.8b, v2.8b }, [x0], #3 // encoding: [0x00,0xe0,0xdf,0x0d]
+// CHECK: ld3r { v15.4h, v16.4h, v17.4h }, [x15], #6 // encoding: [0xef,0xe5,0xdf,0x0d]
+// CHECK: ld3r { v31.2s, v0.2s, v1.2s }, [sp], #12 // encoding: [0xff,0xeb,0xdf,0x0d]
+// CHECK: ld3r { v31.1d, v0.1d, v1.1d }, [sp], #24 // encoding: [0xff,0xef,0xdf,0x0d]
 
-         ld4r {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], #4
-         ld4r {v15.8h, v16.8h, v17.8h, v18.8h}, [x15], #8
-         ld4r {v31.4s, v0.4s, v1.4s, v2.4s}, [sp], #16
-         ld4r {v0.2d, v1.2d, v2.2d, v3.2d}, [x0], #32
-         ld4r {v0.8b, v1.8b, v2.8b, v3.8b}, [x0], x5
-         ld4r {v15.4h, v16.4h, v17.4h, v18.4h}, [x15], x9
-         ld4r {v31.2s, v0.2s, v1.2s, v2.2s}, [sp], x30
-         ld4r {v31.1d, v0.1d, v1.1d, v2.1d}, [sp], x7
-// CHECK: ld4r {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], #4 // encoding: [0x00,0xe0,0xff,0x4d]
-// CHECK: ld4r {v15.8h, v16.8h, v17.8h, v18.8h}, [x15], #8 // encoding: [0xef,0xe5,0xff,0x4d]
-// CHECK: ld4r {v31.4s, v0.4s, v1.4s, v2.4s}, [sp], #16 // encoding: [0xff,0xeb,0xff,0x4d]
-// CHECK: ld4r {v0.2d, v1.2d, v2.2d, v3.2d}, [x0], #32 // encoding: [0x00,0xec,0xff,0x4d]
-// CHECK: ld4r {v0.8b, v1.8b, v2.8b, v3.8b}, [x0], x5 // encoding: [0x00,0xe0,0xe5,0x0d]
-// CHECK: ld4r {v15.4h, v16.4h, v17.4h, v18.4h}, [x15], x9 // encoding: [0xef,0xe5,0xe9,0x0d]
-// CHECK: ld4r {v31.2s, v0.2s, v1.2s, v2.2s}, [sp], x30 // encoding: [0xff,0xeb,0xfe,0x0d]
-// CHECK: ld4r {v31.1d, v0.1d, v1.1d, v2.1d}, [sp], x7 // encoding: [0xff,0xef,0xe7,0x0d]
+         ld4r { v0.16b, v1.16b, v2.16b, v3.16b }, [x0], #4
+         ld4r { v15.8h, v16.8h, v17.8h, v18.8h }, [x15], #8
+         ld4r { v31.4s, v0.4s, v1.4s, v2.4s }, [sp], #16
+         ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [x0], #32
+         ld4r { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x5
+         ld4r { v15.4h, v16.4h, v17.4h, v18.4h }, [x15], x9
+         ld4r { v31.2s, v0.2s, v1.2s, v2.2s }, [sp], x30
+         ld4r { v31.1d, v0.1d, v1.1d, v2.1d }, [sp], x7
+// CHECK: ld4r { v0.16b, v1.16b, v2.16b, v3.16b }, [x0], #4 // encoding: [0x00,0xe0,0xff,0x4d]
+// CHECK: ld4r { v15.8h, v16.8h, v17.8h, v18.8h }, [x15], #8 // encoding: [0xef,0xe5,0xff,0x4d]
+// CHECK: ld4r { v31.4s, v0.4s, v1.4s, v2.4s }, [sp], #16 // encoding: [0xff,0xeb,0xff,0x4d]
+// CHECK: ld4r { v0.2d, v1.2d, v2.2d, v3.2d }, [x0], #32 // encoding: [0x00,0xec,0xff,0x4d]
+// CHECK: ld4r { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x5 // encoding: [0x00,0xe0,0xe5,0x0d]
+// CHECK: ld4r { v15.4h, v16.4h, v17.4h, v18.4h }, [x15], x9 // encoding: [0xef,0xe5,0xe9,0x0d]
+// CHECK: ld4r { v31.2s, v0.2s, v1.2s, v2.2s }, [sp], x30 // encoding: [0xff,0xeb,0xfe,0x0d]
+// CHECK: ld4r { v31.1d, v0.1d, v1.1d, v2.1d }, [sp], x7 // encoding: [0xff,0xef,0xe7,0x0d]
 
 //------------------------------------------------------------------------------
 // Post-index load single 1-element structure to one lane of 1 register.
 //------------------------------------------------------------------------------
-         ld1 {v0.b}[9], [x0], #1
-         ld1 {v15.h}[7], [x15], x9
-         ld1 {v31.s}[3], [sp], x6
-         ld1 {v0.d}[1], [x0], #8
-// CHECK: ld1 {v0.b}[9], [x0], #1     // encoding: [0x00,0x04,0xdf,0x4d]
-// CHECK: ld1 {v15.h}[7], [x15], x9   // encoding: [0xef,0x59,0xc9,0x4d]
-// CHECK: ld1 {v31.s}[3], [sp], x6    // encoding: [0xff,0x93,0xc6,0x4d]
-// CHECK: ld1 {v0.d}[1], [x0], #8     // encoding: [0x00,0x84,0xdf,0x4d]
+         ld1 { v0.b }[9], [x0], #1
+         ld1 { v15.h }[7], [x15], x9
+         ld1 { v31.s }[3], [sp], x6
+         ld1 { v0.d }[1], [x0], #8
+// CHECK: ld1 { v0.b }[9], [x0], #1     // encoding: [0x00,0x04,0xdf,0x4d]
+// CHECK: ld1 { v15.h }[7], [x15], x9   // encoding: [0xef,0x59,0xc9,0x4d]
+// CHECK: ld1 { v31.s }[3], [sp], x6    // encoding: [0xff,0x93,0xc6,0x4d]
+// CHECK: ld1 { v0.d }[1], [x0], #8     // encoding: [0x00,0x84,0xdf,0x4d]
 
 //------------------------------------------------------------------------------
 // Post-index load single N-element structure to one lane of N consecutive
 // registers (N = 2,3,4)
 //------------------------------------------------------------------------------
-         ld2 {v0.b, v1.b}[9], [x0], x3
-         ld2 {v15.h, v16.h}[7], [x15], #4
-         ld2 {v31.s, v0.s}[3], [sp], #8
-         ld2 {v0.d, v1.d}[1], [x0], x0
-// CHECK: ld2 {v0.b, v1.b}[9], [x0], x3 // encoding: [0x00,0x04,0xe3,0x4d]
-// CHECK: ld2 {v15.h, v16.h}[7], [x15], #4 // encoding: [0xef,0x59,0xff,0x4d]
-// CHECK: ld2 {v31.s, v0.s}[3], [sp], #8 // encoding: [0xff,0x93,0xff,0x4d]
-// CHECK: ld2 {v0.d, v1.d}[1], [x0], x0 // encoding: [0x00,0x84,0xe0,0x4d]
+         ld2 { v0.b, v1.b }[9], [x0], x3
+         ld2 { v15.h, v16.h }[7], [x15], #4
+         ld2 { v31.s, v0.s }[3], [sp], #8
+         ld2 { v0.d, v1.d }[1], [x0], x0
+// CHECK: ld2 { v0.b, v1.b }[9], [x0], x3 // encoding: [0x00,0x04,0xe3,0x4d]
+// CHECK: ld2 { v15.h, v16.h }[7], [x15], #4 // encoding: [0xef,0x59,0xff,0x4d]
+// CHECK: ld2 { v31.s, v0.s }[3], [sp], #8 // encoding: [0xff,0x93,0xff,0x4d]
+// CHECK: ld2 { v0.d, v1.d }[1], [x0], x0 // encoding: [0x00,0x84,0xe0,0x4d]
 
-         ld3 {v0.b, v1.b, v2.b}[9], [x0], #3
-         ld3 {v15.h, v16.h, v17.h}[7], [x15], #6
-         ld3 {v31.s, v0.s, v1.s}[3], [sp], x3
-         ld3 {v0.d, v1.d, v2.d}[1], [x0], x6
-// CHECK: ld3 {v0.b, v1.b, v2.b}[9], [x0], #3 // encoding: [0x00,0x24,0xdf,0x4d]
-// CHECK: ld3 {v15.h, v16.h, v17.h}[7], [x15], #6 // encoding: [0xef,0x79,0xdf,0x4d]
-// CHECK: ld3 {v31.s, v0.s, v1.s}[3], [sp], x3 // encoding: [0xff,0xb3,0xc3,0x4d]
-// CHECK: ld3 {v0.d, v1.d, v2.d}[1], [x0], x6 // encoding: [0x00,0xa4,0xc6,0x4d]
+         ld3 { v0.b, v1.b, v2.b }[9], [x0], #3
+         ld3 { v15.h, v16.h, v17.h }[7], [x15], #6
+         ld3 { v31.s, v0.s, v1.s }[3], [sp], x3
+         ld3 { v0.d, v1.d, v2.d }[1], [x0], x6
+// CHECK: ld3 { v0.b, v1.b, v2.b }[9], [x0], #3 // encoding: [0x00,0x24,0xdf,0x4d]
+// CHECK: ld3 { v15.h, v16.h, v17.h }[7], [x15], #6 // encoding: [0xef,0x79,0xdf,0x4d]
+// CHECK: ld3 { v31.s, v0.s, v1.s }[3], [sp], x3 // encoding: [0xff,0xb3,0xc3,0x4d]
+// CHECK: ld3 { v0.d, v1.d, v2.d }[1], [x0], x6 // encoding: [0x00,0xa4,0xc6,0x4d]
 
-         ld4 {v0.b, v1.b, v2.b, v3.b}[9], [x0], x5
-         ld4 {v15.h, v16.h, v17.h, v18.h}[7], [x15], x7
-         ld4 {v31.s, v0.s, v1.s, v2.s}[3], [sp], #16
-         ld4 {v0.d, v1.d, v2.d, v3.d}[1], [x0], #32
-// CHECK: ld4 {v0.b, v1.b, v2.b, v3.b}[9], [x0], x5 // encoding: [0x00,0x24,0xe5,0x4d]
-// CHECK: ld4 {v15.h, v16.h, v17.h, v18.h}[7], [x15], x7 // encoding: [0xef,0x79,0xe7,0x4d]
-// CHECK: ld4 {v31.s, v0.s, v1.s, v2.s}[3], [sp], #16 // encoding: [0xff,0xb3,0xff,0x4d]
-// CHECK: ld4 {v0.d, v1.d, v2.d, v3.d}[1], [x0], #32 // encoding: [0x00,0xa4,0xff,0x4d]
+         ld4 { v0.b, v1.b, v2.b, v3.b }[9], [x0], x5
+         ld4 { v15.h, v16.h, v17.h, v18.h }[7], [x15], x7
+         ld4 { v31.s, v0.s, v1.s, v2.s }[3], [sp], #16
+         ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0], #32
+// CHECK: ld4 { v0.b, v1.b, v2.b, v3.b }[9], [x0], x5 // encoding: [0x00,0x24,0xe5,0x4d]
+// CHECK: ld4 { v15.h, v16.h, v17.h, v18.h }[7], [x15], x7 // encoding: [0xef,0x79,0xe7,0x4d]
+// CHECK: ld4 { v31.s, v0.s, v1.s, v2.s }[3], [sp], #16 // encoding: [0xff,0xb3,0xff,0x4d]
+// CHECK: ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0], #32 // encoding: [0x00,0xa4,0xff,0x4d]
 
 //------------------------------------------------------------------------------
 // Post-index store single 1-element structure from one lane of 1 register.
 //------------------------------------------------------------------------------
-         st1 {v0.b}[9], [x0], #1
-         st1 {v15.h}[7], [x15], x9
-         st1 {v31.s}[3], [sp], x6
-         st1 {v0.d}[1], [x0], #8
-// CHECK: st1 {v0.b}[9], [x0], #1     // encoding: [0x00,0x04,0x9f,0x4d]
-// CHECK: st1 {v15.h}[7], [x15], x9   // encoding: [0xef,0x59,0x89,0x4d]
-// CHECK: st1 {v31.s}[3], [sp], x6    // encoding: [0xff,0x93,0x86,0x4d]
-// CHECK: st1 {v0.d}[1], [x0], #8     // encoding: [0x00,0x84,0x9f,0x4d]
+         st1 { v0.b }[9], [x0], #1
+         st1 { v15.h }[7], [x15], x9
+         st1 { v31.s }[3], [sp], x6
+         st1 { v0.d }[1], [x0], #8
+// CHECK: st1 { v0.b }[9], [x0], #1     // encoding: [0x00,0x04,0x9f,0x4d]
+// CHECK: st1 { v15.h }[7], [x15], x9   // encoding: [0xef,0x59,0x89,0x4d]
+// CHECK: st1 { v31.s }[3], [sp], x6    // encoding: [0xff,0x93,0x86,0x4d]
+// CHECK: st1 { v0.d }[1], [x0], #8     // encoding: [0x00,0x84,0x9f,0x4d]
 
 //------------------------------------------------------------------------------
 // Post-index store single N-element structure from one lane of N consecutive
 // registers (N = 2,3,4)
 //------------------------------------------------------------------------------
-         st2 {v0.b, v1.b}[9], [x0], x3
-         st2 {v15.h, v16.h}[7], [x15], #4
-         st2 {v31.s, v0.s}[3], [sp], #8
-         st2 {v0.d, v1.d}[1], [x0], x0
-// CHECK: st2 {v0.b, v1.b}[9], [x0], x3 // encoding: [0x00,0x04,0xa3,0x4d]
-// CHECK: st2 {v15.h, v16.h}[7], [x15], #4 // encoding: [0xef,0x59,0xbf,0x4d]
-// CHECK: st2 {v31.s, v0.s}[3], [sp], #8 // encoding: [0xff,0x93,0xbf,0x4d]
-// CHECK: st2 {v0.d, v1.d}[1], [x0], x0 // encoding: [0x00,0x84,0xa0,0x4d]
+         st2 { v0.b, v1.b }[9], [x0], x3
+         st2 { v15.h, v16.h }[7], [x15], #4
+         st2 { v31.s, v0.s }[3], [sp], #8
+         st2 { v0.d, v1.d }[1], [x0], x0
+// CHECK: st2 { v0.b, v1.b }[9], [x0], x3 // encoding: [0x00,0x04,0xa3,0x4d]
+// CHECK: st2 { v15.h, v16.h }[7], [x15], #4 // encoding: [0xef,0x59,0xbf,0x4d]
+// CHECK: st2 { v31.s, v0.s }[3], [sp], #8 // encoding: [0xff,0x93,0xbf,0x4d]
+// CHECK: st2 { v0.d, v1.d }[1], [x0], x0 // encoding: [0x00,0x84,0xa0,0x4d]
 
-         st3 {v0.b, v1.b, v2.b}[9], [x0], #3
-         st3 {v15.h, v16.h, v17.h}[7], [x15], #6
-         st3 {v31.s, v0.s, v1.s}[3], [sp], x3
-         st3 {v0.d, v1.d, v2.d}[1], [x0], x6
-// CHECK: st3 {v0.b, v1.b, v2.b}[9], [x0], #3 // encoding: [0x00,0x24,0x9f,0x4d]
-// CHECK: st3 {v15.h, v16.h, v17.h}[7], [x15], #6 // encoding: [0xef,0x79,0x9f,0x4d]
-// CHECK: st3 {v31.s, v0.s, v1.s}[3], [sp], x3 // encoding: [0xff,0xb3,0x83,0x4d]
-// CHECK: st3 {v0.d, v1.d, v2.d}[1], [x0], x6 // encoding: [0x00,0xa4,0x86,0x4d]
+         st3 { v0.b, v1.b, v2.b }[9], [x0], #3
+         st3 { v15.h, v16.h, v17.h }[7], [x15], #6
+         st3 { v31.s, v0.s, v1.s }[3], [sp], x3
+         st3 { v0.d, v1.d, v2.d }[1], [x0], x6
+// CHECK: st3 { v0.b, v1.b, v2.b }[9], [x0], #3 // encoding: [0x00,0x24,0x9f,0x4d]
+// CHECK: st3 { v15.h, v16.h, v17.h }[7], [x15], #6 // encoding: [0xef,0x79,0x9f,0x4d]
+// CHECK: st3 { v31.s, v0.s, v1.s }[3], [sp], x3 // encoding: [0xff,0xb3,0x83,0x4d]
+// CHECK: st3 { v0.d, v1.d, v2.d }[1], [x0], x6 // encoding: [0x00,0xa4,0x86,0x4d]
 
-         st4 {v0.b, v1.b, v2.b, v3.b}[9], [x0], x5
-         st4 {v15.h, v16.h, v17.h, v18.h}[7], [x15], x7
-         st4 {v31.s, v0.s, v1.s, v2.s}[3], [sp], #16
-         st4 {v0.d, v1.d, v2.d, v3.d}[1], [x0], #32
-// CHECK: st4 {v0.b, v1.b, v2.b, v3.b}[9], [x0], x5 // encoding: [0x00,0x24,0xa5,0x4d]
-// CHECK: st4 {v15.h, v16.h, v17.h, v18.h}[7], [x15], x7 // encoding: [0xef,0x79,0xa7,0x4d]
-// CHECK: st4 {v31.s, v0.s, v1.s, v2.s}[3], [sp], #16 // encoding: [0xff,0xb3,0xbf,0x4d]
-// CHECK: st4 {v0.d, v1.d, v2.d, v3.d}[1], [x0], #32 // encoding: [0x00,0xa4,0xbf,0x4d]
+         st4 { v0.b, v1.b, v2.b, v3.b }[9], [x0], x5
+         st4 { v15.h, v16.h, v17.h, v18.h }[7], [x15], x7
+         st4 { v31.s, v0.s, v1.s, v2.s }[3], [sp], #16
+         st4 { v0.d, v1.d, v2.d, v3.d }[1], [x0], #32
+// CHECK: st4 { v0.b, v1.b, v2.b, v3.b }[9], [x0], x5 // encoding: [0x00,0x24,0xa5,0x4d]
+// CHECK: st4 { v15.h, v16.h, v17.h, v18.h }[7], [x15], x7 // encoding: [0xef,0x79,0xa7,0x4d]
+// CHECK: st4 { v31.s, v0.s, v1.s, v2.s }[3], [sp], #16 // encoding: [0xff,0xb3,0xbf,0x4d]
+// CHECK: st4 { v0.d, v1.d, v2.d, v3.d }[1], [x0], #32 // encoding: [0x00,0xa4,0xbf,0x4d]
diff --git a/test/MC/AArch64/neon-simd-misc.s b/test/MC/AArch64/neon-simd-misc.s
index 9e0f9c5..6d1aafd 100644
--- a/test/MC/AArch64/neon-simd-misc.s
+++ b/test/MC/AArch64/neon-simd-misc.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple=arm64 -mattr=+neon -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
@@ -281,8 +281,8 @@
          not v0.16b, v31.16b
          not v1.8b, v9.8b
 
-// CHECK:	not	v0.16b, v31.16b         // encoding: [0xe0,0x5b,0x20,0x6e]
-// CHECK:	not	v1.8b, v9.8b            // encoding: [0x21,0x59,0x20,0x2e]
+// CHECK: {{mvn|not}} v0.16b, v31.16b         // encoding: [0xe0,0x5b,0x20,0x6e]
+// CHECK: {{mvn|not}} v1.8b, v9.8b            // encoding: [0x21,0x59,0x20,0x2e]
 
 //------------------------------------------------------------------------------
 // Bitwise reverse
diff --git a/test/MC/AArch64/neon-simd-post-ldst-multi-elem.s b/test/MC/AArch64/neon-simd-post-ldst-multi-elem.s
index 8dc271e..c57a122 100644
--- a/test/MC/AArch64/neon-simd-post-ldst-multi-elem.s
+++ b/test/MC/AArch64/neon-simd-post-ldst-multi-elem.s
@@ -1,389 +1,389 @@
-// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple=arm64 -mattr=+neon -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
 //------------------------------------------------------------------------------
 // Load multiple 1-element structures from one register (post-index)
 //------------------------------------------------------------------------------
-         ld1 {v0.16b}, [x0], x1
-         ld1 {v15.8h}, [x15], x2
-         ld1 {v31.4s}, [sp], #16
-         ld1 {v0.2d}, [x0], #16
-         ld1 {v0.8b}, [x0], x2
-         ld1 {v15.4h}, [x15], x3
-         ld1 {v31.2s}, [sp], #8
-         ld1 {v0.1d}, [x0], #8
-// CHECK: ld1 {v0.16b}, [x0], x1
+         ld1 { v0.16b }, [x0], x1
+         ld1 { v15.8h }, [x15], x2
+         ld1 { v31.4s }, [sp], #16
+         ld1 { v0.2d }, [x0], #16
+         ld1 { v0.8b }, [x0], x2
+         ld1 { v15.4h }, [x15], x3
+         ld1 { v31.2s }, [sp], #8
+         ld1 { v0.1d }, [x0], #8
+// CHECK: ld1 { v0.16b }, [x0], x1
 // CHECK:     // encoding: [0x00,0x70,0xc1,0x4c]
-// CHECK: ld1 {v15.8h}, [x15], x2
+// CHECK: ld1 { v15.8h }, [x15], x2
 // CHECK:     // encoding: [0xef,0x75,0xc2,0x4c]
-// CHECK: ld1 {v31.4s}, [sp], #16
+// CHECK: ld1 { v31.4s }, [sp], #16
 // CHECK:     // encoding: [0xff,0x7b,0xdf,0x4c]
-// CHECK: ld1 {v0.2d}, [x0], #16
+// CHECK: ld1 { v0.2d }, [x0], #16
 // CHECK:     // encoding: [0x00,0x7c,0xdf,0x4c]
-// CHECK: ld1 {v0.8b}, [x0], x2
+// CHECK: ld1 { v0.8b }, [x0], x2
 // CHECK:     // encoding: [0x00,0x70,0xc2,0x0c]
-// CHECK: ld1 {v15.4h}, [x15], x3
+// CHECK: ld1 { v15.4h }, [x15], x3
 // CHECK:     // encoding: [0xef,0x75,0xc3,0x0c]
-// CHECK: ld1 {v31.2s}, [sp], #8
+// CHECK: ld1 { v31.2s }, [sp], #8
 // CHECK:     // encoding: [0xff,0x7b,0xdf,0x0c]
-// CHECK: ld1 {v0.1d}, [x0], #8
+// CHECK: ld1 { v0.1d }, [x0], #8
 // CHECK:     // encoding: [0x00,0x7c,0xdf,0x0c]
 
 //------------------------------------------------------------------------------
 // Load multiple 1-element structures from two consecutive registers
 // (post-index)
 //------------------------------------------------------------------------------
-         ld1 {v0.16b, v1.16b}, [x0], x1
-         ld1 {v15.8h, v16.8h}, [x15], x2
-         ld1 {v31.4s, v0.4s}, [sp], #32
-         ld1 {v0.2d, v1.2d}, [x0], #32
-         ld1 {v0.8b, v1.8b}, [x0], x2
-         ld1 {v15.4h, v16.4h}, [x15], x3
-         ld1 {v31.2s, v0.2s}, [sp], #16
-         ld1 {v0.1d, v1.1d}, [x0], #16
-// CHECK: ld1 {v0.16b, v1.16b}, [x0], x1
+         ld1 { v0.16b, v1.16b }, [x0], x1
+         ld1 { v15.8h, v16.8h }, [x15], x2
+         ld1 { v31.4s, v0.4s }, [sp], #32
+         ld1 { v0.2d, v1.2d }, [x0], #32
+         ld1 { v0.8b, v1.8b }, [x0], x2
+         ld1 { v15.4h, v16.4h }, [x15], x3
+         ld1 { v31.2s, v0.2s }, [sp], #16
+         ld1 { v0.1d, v1.1d }, [x0], #16
+// CHECK: ld1 { v0.16b, v1.16b }, [x0], x1
 // CHECK:     // encoding: [0x00,0xa0,0xc1,0x4c]
-// CHECK: ld1 {v15.8h, v16.8h}, [x15], x2
+// CHECK: ld1 { v15.8h, v16.8h }, [x15], x2
 // CHECK:     // encoding: [0xef,0xa5,0xc2,0x4c]
-// CHECK: ld1 {v31.4s, v0.4s}, [sp], #32
+// CHECK: ld1 { v31.4s, v0.4s }, [sp], #32
 // CHECK:     // encoding: [0xff,0xab,0xdf,0x4c]
-// CHECK: ld1 {v0.2d, v1.2d}, [x0], #32
+// CHECK: ld1 { v0.2d, v1.2d }, [x0], #32
 // CHECK:     // encoding: [0x00,0xac,0xdf,0x4c]
-// CHECK: ld1 {v0.8b, v1.8b}, [x0], x2
+// CHECK: ld1 { v0.8b, v1.8b }, [x0], x2
 // CHECK:     // encoding: [0x00,0xa0,0xc2,0x0c]
-// CHECK: ld1 {v15.4h, v16.4h}, [x15], x3
+// CHECK: ld1 { v15.4h, v16.4h }, [x15], x3
 // CHECK:     // encoding: [0xef,0xa5,0xc3,0x0c]
-// CHECK: ld1 {v31.2s, v0.2s}, [sp], #16
+// CHECK: ld1 { v31.2s, v0.2s }, [sp], #16
 // CHECK:     // encoding: [0xff,0xab,0xdf,0x0c]
-// CHECK: ld1 {v0.1d, v1.1d}, [x0], #16
+// CHECK: ld1 { v0.1d, v1.1d }, [x0], #16
 // CHECK:     // encoding: [0x00,0xac,0xdf,0x0c]
 
 //------------------------------------------------------------------------------
 // Load multiple 1-element structures from three consecutive registers
 // (post-index)
 //------------------------------------------------------------------------------
-         ld1 {v0.16b, v1.16b, v2.16b}, [x0], x1
-         ld1 {v15.8h, v16.8h, v17.8h}, [x15], x2
-         ld1 {v31.4s, v0.4s, v1.4s}, [sp], #48
-         ld1 {v0.2d, v1.2d, v2.2d}, [x0], #48
-         ld1 {v0.8b, v1.8b, v2.8b}, [x0], x2
-         ld1 {v15.4h, v16.4h, v17.4h}, [x15], x3
-         ld1 {v31.2s, v0.2s, v1.2s}, [sp], #24
-         ld1 {v0.1d, v1.1d, v2.1d}, [x0], #24
-// CHECK: ld1 {v0.16b, v1.16b, v2.16b}, [x0], x1
+         ld1 { v0.16b, v1.16b, v2.16b }, [x0], x1
+         ld1 { v15.8h, v16.8h, v17.8h }, [x15], x2
+         ld1 { v31.4s, v0.4s, v1.4s }, [sp], #48
+         ld1 { v0.2d, v1.2d, v2.2d }, [x0], #48
+         ld1 { v0.8b, v1.8b, v2.8b }, [x0], x2
+         ld1 { v15.4h, v16.4h, v17.4h }, [x15], x3
+         ld1 { v31.2s, v0.2s, v1.2s }, [sp], #24
+         ld1 { v0.1d, v1.1d, v2.1d }, [x0], #24
+// CHECK: ld1 { v0.16b, v1.16b, v2.16b }, [x0], x1
 // CHECK:     // encoding: [0x00,0x60,0xc1,0x4c]
-// CHECK: ld1 {v15.8h, v16.8h, v17.8h}, [x15], x2
+// CHECK: ld1 { v15.8h, v16.8h, v17.8h }, [x15], x2
 // CHECK:     // encoding: [0xef,0x65,0xc2,0x4c]
-// CHECK: ld1 {v31.4s, v0.4s, v1.4s}, [sp], #48
+// CHECK: ld1 { v31.4s, v0.4s, v1.4s }, [sp], #48
 // CHECK:     // encoding: [0xff,0x6b,0xdf,0x4c]
-// CHECK: ld1 {v0.2d, v1.2d, v2.2d}, [x0], #48
+// CHECK: ld1 { v0.2d, v1.2d, v2.2d }, [x0], #48
 // CHECK:     // encoding: [0x00,0x6c,0xdf,0x4c]
-// CHECK: ld1 {v0.8b, v1.8b, v2.8b}, [x0], x2
+// CHECK: ld1 { v0.8b, v1.8b, v2.8b }, [x0], x2
 // CHECK:     // encoding: [0x00,0x60,0xc2,0x0c]
-// CHECK: ld1 {v15.4h, v16.4h, v17.4h}, [x15], x3
+// CHECK: ld1 { v15.4h, v16.4h, v17.4h }, [x15], x3
 // CHECK:     // encoding: [0xef,0x65,0xc3,0x0c]
-// CHECK: ld1 {v31.2s, v0.2s, v1.2s}, [sp], #24
+// CHECK: ld1 { v31.2s, v0.2s, v1.2s }, [sp], #24
 // CHECK:     // encoding: [0xff,0x6b,0xdf,0x0c]
-// CHECK: ld1 {v0.1d, v1.1d, v2.1d}, [x0], #24
+// CHECK: ld1 { v0.1d, v1.1d, v2.1d }, [x0], #24
 // CHECK:     // encoding: [0x00,0x6c,0xdf,0x0c]
 
 //------------------------------------------------------------------------------
 // Load multiple 1-element structures from four consecutive registers
 // (post-index)
 //------------------------------------------------------------------------------
-         ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], x1
-         ld1 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15], x2
-         ld1 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp], #64
-         ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0], #64
-         ld1 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0], x3
-         ld1 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15], x4
-         ld1 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp], #32
-         ld1 {v0.1d, v1.1d, v2.1d, v3.1d}, [x0], #32
-// CHECK: ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], x1
+         ld1 { v0.16b, v1.16b, v2.16b, v3.16b }, [x0], x1
+         ld1 { v15.8h, v16.8h, v17.8h, v18.8h }, [x15], x2
+         ld1 { v31.4s, v0.4s, v1.4s, v2.4s }, [sp], #64
+         ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0], #64
+         ld1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3
+         ld1 { v15.4h, v16.4h, v17.4h, v18.4h }, [x15], x4
+         ld1 { v31.2s, v0.2s, v1.2s, v2.2s }, [sp], #32
+         ld1 { v0.1d, v1.1d, v2.1d, v3.1d }, [x0], #32
+// CHECK: ld1 { v0.16b, v1.16b, v2.16b, v3.16b }, [x0], x1
 // CHECK:     // encoding: [0x00,0x20,0xc1,0x4c]
-// CHECK: ld1 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15], x2
+// CHECK: ld1 { v15.8h, v16.8h, v17.8h, v18.8h }, [x15], x2
 // CHECK:     // encoding: [0xef,0x25,0xc2,0x4c]
-// CHECK: ld1 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp], #64
+// CHECK: ld1 { v31.4s, v0.4s, v1.4s, v2.4s }, [sp], #64
 // CHECK:     // encoding: [0xff,0x2b,0xdf,0x4c]
-// CHECK: ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0], #64
+// CHECK: ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0], #64
 // CHECK:     // encoding: [0x00,0x2c,0xdf,0x4c]
-// CHECK: ld1 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0], x3
+// CHECK: ld1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3
 // CHECK:     // encoding: [0x00,0x20,0xc3,0x0c]
-// CHECK: ld1 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15], x4
+// CHECK: ld1 { v15.4h, v16.4h, v17.4h, v18.4h }, [x15], x4
 // CHECK:     // encoding: [0xef,0x25,0xc4,0x0c]
-// CHECK: ld1 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp], #32
+// CHECK: ld1 { v31.2s, v0.2s, v1.2s, v2.2s }, [sp], #32
 // CHECK:     // encoding: [0xff,0x2b,0xdf,0x0c]
-// CHECK: ld1 {v0.1d, v1.1d, v2.1d, v3.1d}, [x0], #32
+// CHECK: ld1 { v0.1d, v1.1d, v2.1d, v3.1d }, [x0], #32
 // CHECK:     // encoding: [0x00,0x2c,0xdf,0x0c]
 
 //------------------------------------------------------------------------------
 // Load multiple 2-element structures from two consecutive registers
 // (post-index)
 //------------------------------------------------------------------------------
-         ld2 {v0.16b, v1.16b}, [x0], x1
-         ld2 {v15.8h, v16.8h}, [x15], x2
-         ld2 {v31.4s, v0.4s}, [sp], #32
-         ld2 {v0.2d, v1.2d}, [x0], #32
-         ld2 {v0.8b, v1.8b}, [x0], x2
-         ld2 {v15.4h, v16.4h}, [x15], x3
-         ld2 {v31.2s, v0.2s}, [sp], #16
-// CHECK: ld2 {v0.16b, v1.16b}, [x0], x1
+         ld2 { v0.16b, v1.16b }, [x0], x1
+         ld2 { v15.8h, v16.8h }, [x15], x2
+         ld2 { v31.4s, v0.4s }, [sp], #32
+         ld2 { v0.2d, v1.2d }, [x0], #32
+         ld2 { v0.8b, v1.8b }, [x0], x2
+         ld2 { v15.4h, v16.4h }, [x15], x3
+         ld2 { v31.2s, v0.2s }, [sp], #16
+// CHECK: ld2 { v0.16b, v1.16b }, [x0], x1
 // CHECK:     // encoding: [0x00,0x80,0xc1,0x4c]
-// CHECK: ld2 {v15.8h, v16.8h}, [x15], x2
+// CHECK: ld2 { v15.8h, v16.8h }, [x15], x2
 // CHECK:     // encoding: [0xef,0x85,0xc2,0x4c]
-// CHECK: ld2 {v31.4s, v0.4s}, [sp], #32
+// CHECK: ld2 { v31.4s, v0.4s }, [sp], #32
 // CHECK:     // encoding: [0xff,0x8b,0xdf,0x4c]
-// CHECK: ld2 {v0.2d, v1.2d}, [x0], #32
+// CHECK: ld2 { v0.2d, v1.2d }, [x0], #32
 // CHECK:     // encoding: [0x00,0x8c,0xdf,0x4c]
-// CHECK: ld2 {v0.8b, v1.8b}, [x0], x2
+// CHECK: ld2 { v0.8b, v1.8b }, [x0], x2
 // CHECK:     // encoding: [0x00,0x80,0xc2,0x0c]
-// CHECK: ld2 {v15.4h, v16.4h}, [x15], x3
+// CHECK: ld2 { v15.4h, v16.4h }, [x15], x3
 // CHECK:     // encoding: [0xef,0x85,0xc3,0x0c]
-// CHECK: ld2 {v31.2s, v0.2s}, [sp], #16
+// CHECK: ld2 { v31.2s, v0.2s }, [sp], #16
 // CHECK:     // encoding: [0xff,0x8b,0xdf,0x0c]
 
 //------------------------------------------------------------------------------
 // Load multiple 3-element structures from three consecutive registers
 // (post-index)
 //------------------------------------------------------------------------------
-         ld3 {v0.16b, v1.16b, v2.16b}, [x0], x1
-         ld3 {v15.8h, v16.8h, v17.8h}, [x15], x2
-         ld3 {v31.4s, v0.4s, v1.4s}, [sp], #48
-         ld3 {v0.2d, v1.2d, v2.2d}, [x0], #48
-         ld3 {v0.8b, v1.8b, v2.8b}, [x0], x2
-         ld3 {v15.4h, v16.4h, v17.4h}, [x15], x3
-         ld3 {v31.2s, v0.2s, v1.2s}, [sp], #24
-// CHECK: ld3 {v0.16b, v1.16b, v2.16b}, [x0], x1
+         ld3 { v0.16b, v1.16b, v2.16b }, [x0], x1
+         ld3 { v15.8h, v16.8h, v17.8h }, [x15], x2
+         ld3 { v31.4s, v0.4s, v1.4s }, [sp], #48
+         ld3 { v0.2d, v1.2d, v2.2d }, [x0], #48
+         ld3 { v0.8b, v1.8b, v2.8b }, [x0], x2
+         ld3 { v15.4h, v16.4h, v17.4h }, [x15], x3
+         ld3 { v31.2s, v0.2s, v1.2s }, [sp], #24
+// CHECK: ld3 { v0.16b, v1.16b, v2.16b }, [x0], x1
 // CHECK:     // encoding: [0x00,0x40,0xc1,0x4c]
-// CHECK: ld3 {v15.8h, v16.8h, v17.8h}, [x15], x2
+// CHECK: ld3 { v15.8h, v16.8h, v17.8h }, [x15], x2
 // CHECK:     // encoding: [0xef,0x45,0xc2,0x4c]
-// CHECK: ld3 {v31.4s, v0.4s, v1.4s}, [sp], #48
+// CHECK: ld3 { v31.4s, v0.4s, v1.4s }, [sp], #48
 // CHECK:     // encoding: [0xff,0x4b,0xdf,0x4c]
-// CHECK: ld3 {v0.2d, v1.2d, v2.2d}, [x0], #48
+// CHECK: ld3 { v0.2d, v1.2d, v2.2d }, [x0], #48
 // CHECK:     // encoding: [0x00,0x4c,0xdf,0x4c]
-// CHECK: ld3 {v0.8b, v1.8b, v2.8b}, [x0], x2
+// CHECK: ld3 { v0.8b, v1.8b, v2.8b }, [x0], x2
 // CHECK:     // encoding: [0x00,0x40,0xc2,0x0c]
-// CHECK: ld3 {v15.4h, v16.4h, v17.4h}, [x15], x3
+// CHECK: ld3 { v15.4h, v16.4h, v17.4h }, [x15], x3
 // CHECK:     // encoding: [0xef,0x45,0xc3,0x0c]
-// CHECK: ld3 {v31.2s, v0.2s, v1.2s}, [sp], #24
+// CHECK: ld3 { v31.2s, v0.2s, v1.2s }, [sp], #24
 // CHECK:     // encoding: [0xff,0x4b,0xdf,0x0c]
 
 //------------------------------------------------------------------------------
 // Load multiple 4-element structures from four consecutive registers
 // (post-index)
 //------------------------------------------------------------------------------
-         ld4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], x1
-         ld4 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15], x2
-         ld4 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp], #64
-         ld4 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0], #64
-         ld4 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0], x3
-         ld4 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15], x4
-         ld4 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp], #32
-// CHECK: ld4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], x1
+         ld4 { v0.16b, v1.16b, v2.16b, v3.16b }, [x0], x1
+         ld4 { v15.8h, v16.8h, v17.8h, v18.8h }, [x15], x2
+         ld4 { v31.4s, v0.4s, v1.4s, v2.4s }, [sp], #64
+         ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0], #64
+         ld4 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3
+         ld4 { v15.4h, v16.4h, v17.4h, v18.4h }, [x15], x4
+         ld4 { v31.2s, v0.2s, v1.2s, v2.2s }, [sp], #32
+// CHECK: ld4 { v0.16b, v1.16b, v2.16b, v3.16b }, [x0], x1
 // CHECK:     // encoding: [0x00,0x00,0xc1,0x4c]
-// CHECK: ld4 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15], x2
+// CHECK: ld4 { v15.8h, v16.8h, v17.8h, v18.8h }, [x15], x2
 // CHECK:     // encoding: [0xef,0x05,0xc2,0x4c]
-// CHECK: ld4 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp], #64
+// CHECK: ld4 { v31.4s, v0.4s, v1.4s, v2.4s }, [sp], #64
 // CHECK:     // encoding: [0xff,0x0b,0xdf,0x4c]
-// CHECK: ld4 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0], #64
+// CHECK: ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0], #64
 // CHECK:     // encoding: [0x00,0x0c,0xdf,0x4c]
-// CHECK: ld4 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0], x3
+// CHECK: ld4 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3
 // CHECK:     // encoding: [0x00,0x00,0xc3,0x0c]
-// CHECK: ld4 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15], x4
+// CHECK: ld4 { v15.4h, v16.4h, v17.4h, v18.4h }, [x15], x4
 // CHECK:     // encoding: [0xef,0x05,0xc4,0x0c]
-// CHECK: ld4 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp], #32
+// CHECK: ld4 { v31.2s, v0.2s, v1.2s, v2.2s }, [sp], #32
 // CHECK:     // encoding: [0xff,0x0b,0xdf,0x0c]
 
 //------------------------------------------------------------------------------
 // Store multiple 1-element structures from one register (post-index)
 //------------------------------------------------------------------------------
-         st1 {v0.16b}, [x0], x1
-         st1 {v15.8h}, [x15], x2
-         st1 {v31.4s}, [sp], #16
-         st1 {v0.2d}, [x0], #16
-         st1 {v0.8b}, [x0], x2
-         st1 {v15.4h}, [x15], x3
-         st1 {v31.2s}, [sp], #8
-         st1 {v0.1d}, [x0], #8
-// CHECK: st1 {v0.16b}, [x0], x1
+         st1 { v0.16b }, [x0], x1
+         st1 { v15.8h }, [x15], x2
+         st1 { v31.4s }, [sp], #16
+         st1 { v0.2d }, [x0], #16
+         st1 { v0.8b }, [x0], x2
+         st1 { v15.4h }, [x15], x3
+         st1 { v31.2s }, [sp], #8
+         st1 { v0.1d }, [x0], #8
+// CHECK: st1 { v0.16b }, [x0], x1
 // CHECK:     // encoding: [0x00,0x70,0x81,0x4c]
-// CHECK: st1 {v15.8h}, [x15], x2
+// CHECK: st1 { v15.8h }, [x15], x2
 // CHECK:     // encoding: [0xef,0x75,0x82,0x4c]
-// CHECK: st1 {v31.4s}, [sp], #16
+// CHECK: st1 { v31.4s }, [sp], #16
 // CHECK:     // encoding: [0xff,0x7b,0x9f,0x4c]
-// CHECK: st1 {v0.2d}, [x0], #16
+// CHECK: st1 { v0.2d }, [x0], #16
 // CHECK:     // encoding: [0x00,0x7c,0x9f,0x4c]
-// CHECK: st1 {v0.8b}, [x0], x2
+// CHECK: st1 { v0.8b }, [x0], x2
 // CHECK:     // encoding: [0x00,0x70,0x82,0x0c]
-// CHECK: st1 {v15.4h}, [x15], x3
+// CHECK: st1 { v15.4h }, [x15], x3
 // CHECK:     // encoding: [0xef,0x75,0x83,0x0c]
-// CHECK: st1 {v31.2s}, [sp], #8
+// CHECK: st1 { v31.2s }, [sp], #8
 // CHECK:     // encoding: [0xff,0x7b,0x9f,0x0c]
-// CHECK: st1 {v0.1d}, [x0], #8
+// CHECK: st1 { v0.1d }, [x0], #8
 // CHECK:     // encoding: [0x00,0x7c,0x9f,0x0c]
 
 //------------------------------------------------------------------------------
 // Store multiple 1-element structures from two consecutive registers
 // (post-index)
 //------------------------------------------------------------------------------
-         st1 {v0.16b, v1.16b}, [x0], x1
-         st1 {v15.8h, v16.8h}, [x15], x2
-         st1 {v31.4s, v0.4s}, [sp], #32
-         st1 {v0.2d, v1.2d}, [x0], #32
-         st1 {v0.8b, v1.8b}, [x0], x2
-         st1 {v15.4h, v16.4h}, [x15], x3
-         st1 {v31.2s, v0.2s}, [sp], #16
-         st1 {v0.1d, v1.1d}, [x0], #16
-// CHECK: st1 {v0.16b, v1.16b}, [x0], x1
+         st1 { v0.16b, v1.16b }, [x0], x1
+         st1 { v15.8h, v16.8h }, [x15], x2
+         st1 { v31.4s, v0.4s }, [sp], #32
+         st1 { v0.2d, v1.2d }, [x0], #32
+         st1 { v0.8b, v1.8b }, [x0], x2
+         st1 { v15.4h, v16.4h }, [x15], x3
+         st1 { v31.2s, v0.2s }, [sp], #16
+         st1 { v0.1d, v1.1d }, [x0], #16
+// CHECK: st1 { v0.16b, v1.16b }, [x0], x1
 // CHECK:     // encoding: [0x00,0xa0,0x81,0x4c]
-// CHECK: st1 {v15.8h, v16.8h}, [x15], x2
+// CHECK: st1 { v15.8h, v16.8h }, [x15], x2
 // CHECK:     // encoding: [0xef,0xa5,0x82,0x4c]
-// CHECK: st1 {v31.4s, v0.4s}, [sp], #32
+// CHECK: st1 { v31.4s, v0.4s }, [sp], #32
 // CHECK:     // encoding: [0xff,0xab,0x9f,0x4c]
-// CHECK: st1 {v0.2d, v1.2d}, [x0], #32
+// CHECK: st1 { v0.2d, v1.2d }, [x0], #32
 // CHECK:     // encoding: [0x00,0xac,0x9f,0x4c]
-// CHECK: st1 {v0.8b, v1.8b}, [x0], x2
+// CHECK: st1 { v0.8b, v1.8b }, [x0], x2
 // CHECK:     // encoding: [0x00,0xa0,0x82,0x0c]
-// CHECK: st1 {v15.4h, v16.4h}, [x15], x3
+// CHECK: st1 { v15.4h, v16.4h }, [x15], x3
 // CHECK:     // encoding: [0xef,0xa5,0x83,0x0c]
-// CHECK: st1 {v31.2s, v0.2s}, [sp], #16
+// CHECK: st1 { v31.2s, v0.2s }, [sp], #16
 // CHECK:     // encoding: [0xff,0xab,0x9f,0x0c]
-// CHECK: st1 {v0.1d, v1.1d}, [x0], #16
+// CHECK: st1 { v0.1d, v1.1d }, [x0], #16
 // CHECK:     // encoding: [0x00,0xac,0x9f,0x0c]
 
 //------------------------------------------------------------------------------
 // Store multiple 1-element structures from three consecutive registers
 // (post-index)
 //------------------------------------------------------------------------------
-         st1 {v0.16b, v1.16b, v2.16b}, [x0], x1
-         st1 {v15.8h, v16.8h, v17.8h}, [x15], x2
-         st1 {v31.4s, v0.4s, v1.4s}, [sp], #48
-         st1 {v0.2d, v1.2d, v2.2d}, [x0], #48
-         st1 {v0.8b, v1.8b, v2.8b}, [x0], x2
-         st1 {v15.4h, v16.4h, v17.4h}, [x15], x3
-         st1 {v31.2s, v0.2s, v1.2s}, [sp], #24
-         st1 {v0.1d, v1.1d, v2.1d}, [x0], #24
-// CHECK: st1 {v0.16b, v1.16b, v2.16b}, [x0], x1
+         st1 { v0.16b, v1.16b, v2.16b }, [x0], x1
+         st1 { v15.8h, v16.8h, v17.8h }, [x15], x2
+         st1 { v31.4s, v0.4s, v1.4s }, [sp], #48
+         st1 { v0.2d, v1.2d, v2.2d }, [x0], #48
+         st1 { v0.8b, v1.8b, v2.8b }, [x0], x2
+         st1 { v15.4h, v16.4h, v17.4h }, [x15], x3
+         st1 { v31.2s, v0.2s, v1.2s }, [sp], #24
+         st1 { v0.1d, v1.1d, v2.1d }, [x0], #24
+// CHECK: st1 { v0.16b, v1.16b, v2.16b }, [x0], x1
 // CHECK:     // encoding: [0x00,0x60,0x81,0x4c]
-// CHECK: st1 {v15.8h, v16.8h, v17.8h}, [x15], x2
+// CHECK: st1 { v15.8h, v16.8h, v17.8h }, [x15], x2
 // CHECK:     // encoding: [0xef,0x65,0x82,0x4c]
-// CHECK: st1 {v31.4s, v0.4s, v1.4s}, [sp], #48
+// CHECK: st1 { v31.4s, v0.4s, v1.4s }, [sp], #48
 // CHECK:     // encoding: [0xff,0x6b,0x9f,0x4c]
-// CHECK: st1 {v0.2d, v1.2d, v2.2d}, [x0], #48
+// CHECK: st1 { v0.2d, v1.2d, v2.2d }, [x0], #48
 // CHECK:     // encoding: [0x00,0x6c,0x9f,0x4c]
-// CHECK: st1 {v0.8b, v1.8b, v2.8b}, [x0], x2
+// CHECK: st1 { v0.8b, v1.8b, v2.8b }, [x0], x2
 // CHECK:     // encoding: [0x00,0x60,0x82,0x0c]
-// CHECK: st1 {v15.4h, v16.4h, v17.4h}, [x15], x3
+// CHECK: st1 { v15.4h, v16.4h, v17.4h }, [x15], x3
 // CHECK:     // encoding: [0xef,0x65,0x83,0x0c]
-// CHECK: st1 {v31.2s, v0.2s, v1.2s}, [sp], #24
+// CHECK: st1 { v31.2s, v0.2s, v1.2s }, [sp], #24
 // CHECK:     // encoding: [0xff,0x6b,0x9f,0x0c]
-// CHECK: st1 {v0.1d, v1.1d, v2.1d}, [x0], #24
+// CHECK: st1 { v0.1d, v1.1d, v2.1d }, [x0], #24
 // CHECK:     // encoding: [0x00,0x6c,0x9f,0x0c]
 
 //------------------------------------------------------------------------------
 // Store multiple 1-element structures from four consecutive registers
 // (post-index)
 //------------------------------------------------------------------------------
-         st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], x1
-         st1 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15], x2
-         st1 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp], #64
-         st1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0], #64
-         st1 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0], x3
-         st1 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15], x4
-         st1 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp], #32
-         st1 {v0.1d, v1.1d, v2.1d, v3.1d}, [x0], #32
-// CHECK: st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], x1
+         st1 { v0.16b, v1.16b, v2.16b, v3.16b }, [x0], x1
+         st1 { v15.8h, v16.8h, v17.8h, v18.8h }, [x15], x2
+         st1 { v31.4s, v0.4s, v1.4s, v2.4s }, [sp], #64
+         st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0], #64
+         st1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3
+         st1 { v15.4h, v16.4h, v17.4h, v18.4h }, [x15], x4
+         st1 { v31.2s, v0.2s, v1.2s, v2.2s }, [sp], #32
+         st1 { v0.1d, v1.1d, v2.1d, v3.1d }, [x0], #32
+// CHECK: st1 { v0.16b, v1.16b, v2.16b, v3.16b }, [x0], x1
 // CHECK:     // encoding: [0x00,0x20,0x81,0x4c]
-// CHECK: st1 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15], x2
+// CHECK: st1 { v15.8h, v16.8h, v17.8h, v18.8h }, [x15], x2
 // CHECK:     // encoding: [0xef,0x25,0x82,0x4c]
-// CHECK: st1 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp], #64
+// CHECK: st1 { v31.4s, v0.4s, v1.4s, v2.4s }, [sp], #64
 // CHECK:     // encoding: [0xff,0x2b,0x9f,0x4c]
-// CHECK: st1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0], #64
+// CHECK: st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0], #64
 // CHECK:     // encoding: [0x00,0x2c,0x9f,0x4c]
-// CHECK: st1 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0], x3
+// CHECK: st1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3
 // CHECK:     // encoding: [0x00,0x20,0x83,0x0c]
-// CHECK: st1 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15], x4
+// CHECK: st1 { v15.4h, v16.4h, v17.4h, v18.4h }, [x15], x4
 // CHECK:     // encoding: [0xef,0x25,0x84,0x0c]
-// CHECK: st1 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp], #32
+// CHECK: st1 { v31.2s, v0.2s, v1.2s, v2.2s }, [sp], #32
 // CHECK:     // encoding: [0xff,0x2b,0x9f,0x0c]
-// CHECK: st1 {v0.1d, v1.1d, v2.1d, v3.1d}, [x0], #32
+// CHECK: st1 { v0.1d, v1.1d, v2.1d, v3.1d }, [x0], #32
 // CHECK:     // encoding: [0x00,0x2c,0x9f,0x0c]
 
 //------------------------------------------------------------------------------
 // Store multiple 2-element structures from two consecutive registers
 // (post-index)
 //------------------------------------------------------------------------------
-         st2 {v0.16b, v1.16b}, [x0], x1
-         st2 {v15.8h, v16.8h}, [x15], x2
-         st2 {v31.4s, v0.4s}, [sp], #32
-         st2 {v0.2d, v1.2d}, [x0], #32
-         st2 {v0.8b, v1.8b}, [x0], x2
-         st2 {v15.4h, v16.4h}, [x15], x3
-         st2 {v31.2s, v0.2s}, [sp], #16
-// CHECK: st2 {v0.16b, v1.16b}, [x0], x1
+         st2 { v0.16b, v1.16b }, [x0], x1
+         st2 { v15.8h, v16.8h }, [x15], x2
+         st2 { v31.4s, v0.4s }, [sp], #32
+         st2 { v0.2d, v1.2d }, [x0], #32
+         st2 { v0.8b, v1.8b }, [x0], x2
+         st2 { v15.4h, v16.4h }, [x15], x3
+         st2 { v31.2s, v0.2s }, [sp], #16
+// CHECK: st2 { v0.16b, v1.16b }, [x0], x1
 // CHECK:     // encoding: [0x00,0x80,0x81,0x4c]
-// CHECK: st2 {v15.8h, v16.8h}, [x15], x2
+// CHECK: st2 { v15.8h, v16.8h }, [x15], x2
 // CHECK:     // encoding: [0xef,0x85,0x82,0x4c]
-// CHECK: st2 {v31.4s, v0.4s}, [sp], #32
+// CHECK: st2 { v31.4s, v0.4s }, [sp], #32
 // CHECK:     // encoding: [0xff,0x8b,0x9f,0x4c]
-// CHECK: st2 {v0.2d, v1.2d}, [x0], #32
+// CHECK: st2 { v0.2d, v1.2d }, [x0], #32
 // CHECK:     // encoding: [0x00,0x8c,0x9f,0x4c]
-// CHECK: st2 {v0.8b, v1.8b}, [x0], x2
+// CHECK: st2 { v0.8b, v1.8b }, [x0], x2
 // CHECK:     // encoding: [0x00,0x80,0x82,0x0c]
-// CHECK: st2 {v15.4h, v16.4h}, [x15], x3
+// CHECK: st2 { v15.4h, v16.4h }, [x15], x3
 // CHECK:     // encoding: [0xef,0x85,0x83,0x0c]
-// CHECK: st2 {v31.2s, v0.2s}, [sp], #16
+// CHECK: st2 { v31.2s, v0.2s }, [sp], #16
 // CHECK:     // encoding: [0xff,0x8b,0x9f,0x0c]
 
 //------------------------------------------------------------------------------
 // Store multiple 3-element structures from three consecutive registers
 // (post-index)
 //------------------------------------------------------------------------------
-         st3 {v0.16b, v1.16b, v2.16b}, [x0], x1
-         st3 {v15.8h, v16.8h, v17.8h}, [x15], x2
-         st3 {v31.4s, v0.4s, v1.4s}, [sp], #48
-         st3 {v0.2d, v1.2d, v2.2d}, [x0], #48
-         st3 {v0.8b, v1.8b, v2.8b}, [x0], x2
-         st3 {v15.4h, v16.4h, v17.4h}, [x15], x3
-         st3 {v31.2s, v0.2s, v1.2s}, [sp], #24
-// CHECK: st3 {v0.16b, v1.16b, v2.16b}, [x0], x1
+         st3 { v0.16b, v1.16b, v2.16b }, [x0], x1
+         st3 { v15.8h, v16.8h, v17.8h }, [x15], x2
+         st3 { v31.4s, v0.4s, v1.4s }, [sp], #48
+         st3 { v0.2d, v1.2d, v2.2d }, [x0], #48
+         st3 { v0.8b, v1.8b, v2.8b }, [x0], x2
+         st3 { v15.4h, v16.4h, v17.4h }, [x15], x3
+         st3 { v31.2s, v0.2s, v1.2s }, [sp], #24
+// CHECK: st3 { v0.16b, v1.16b, v2.16b }, [x0], x1
 // CHECK:     // encoding: [0x00,0x40,0x81,0x4c]
-// CHECK: st3 {v15.8h, v16.8h, v17.8h}, [x15], x2
+// CHECK: st3 { v15.8h, v16.8h, v17.8h }, [x15], x2
 // CHECK:     // encoding: [0xef,0x45,0x82,0x4c]
-// CHECK: st3 {v31.4s, v0.4s, v1.4s}, [sp], #48
+// CHECK: st3 { v31.4s, v0.4s, v1.4s }, [sp], #48
 // CHECK:     // encoding: [0xff,0x4b,0x9f,0x4c]
-// CHECK: st3 {v0.2d, v1.2d, v2.2d}, [x0], #48
+// CHECK: st3 { v0.2d, v1.2d, v2.2d }, [x0], #48
 // CHECK:     // encoding: [0x00,0x4c,0x9f,0x4c]
-// CHECK: st3 {v0.8b, v1.8b, v2.8b}, [x0], x2
+// CHECK: st3 { v0.8b, v1.8b, v2.8b }, [x0], x2
 // CHECK:     // encoding: [0x00,0x40,0x82,0x0c]
-// CHECK: st3 {v15.4h, v16.4h, v17.4h}, [x15], x3
+// CHECK: st3 { v15.4h, v16.4h, v17.4h }, [x15], x3
 // CHECK:     // encoding: [0xef,0x45,0x83,0x0c]
-// CHECK: st3 {v31.2s, v0.2s, v1.2s}, [sp], #24
+// CHECK: st3 { v31.2s, v0.2s, v1.2s }, [sp], #24
 // CHECK:     // encoding: [0xff,0x4b,0x9f,0x0c]
 
 //------------------------------------------------------------------------------
 // Store multiple 4-element structures from four consecutive registers
 // (post-index)
 //------------------------------------------------------------------------------
-         st4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], x1
-         st4 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15], x2
-         st4 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp], #64
-         st4 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0], #64
-         st4 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0], x3
-         st4 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15], x4
-         st4 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp], #32
-// CHECK: st4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], x1
+         st4 { v0.16b, v1.16b, v2.16b, v3.16b }, [x0], x1
+         st4 { v15.8h, v16.8h, v17.8h, v18.8h }, [x15], x2
+         st4 { v31.4s, v0.4s, v1.4s, v2.4s }, [sp], #64
+         st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0], #64
+         st4 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3
+         st4 { v15.4h, v16.4h, v17.4h, v18.4h }, [x15], x4
+         st4 { v31.2s, v0.2s, v1.2s, v2.2s }, [sp], #32
+// CHECK: st4 { v0.16b, v1.16b, v2.16b, v3.16b }, [x0], x1
 // CHECK:     // encoding: [0x00,0x00,0x81,0x4c]
-// CHECK: st4 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15], x2
+// CHECK: st4 { v15.8h, v16.8h, v17.8h, v18.8h }, [x15], x2
 // CHECK:     // encoding: [0xef,0x05,0x82,0x4c]
-// CHECK: st4 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp], #64
+// CHECK: st4 { v31.4s, v0.4s, v1.4s, v2.4s }, [sp], #64
 // CHECK:     // encoding: [0xff,0x0b,0x9f,0x4c]
-// CHECK: st4 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0], #64
+// CHECK: st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0], #64
 // CHECK:     // encoding: [0x00,0x0c,0x9f,0x4c]
-// CHECK: st4 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0], x3
+// CHECK: st4 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3
 // CHECK:     // encoding: [0x00,0x00,0x83,0x0c]
-// CHECK: st4 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15], x4
+// CHECK: st4 { v15.4h, v16.4h, v17.4h, v18.4h }, [x15], x4
 // CHECK:     // encoding: [0xef,0x05,0x84,0x0c]
-// CHECK: st4 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp], #32
+// CHECK: st4 { v31.2s, v0.2s, v1.2s, v2.2s }, [sp], #32
 // CHECK:     // encoding: [0xff,0x0b,0x9f,0x0c]
diff --git a/test/MC/AArch64/neon-tbl.s b/test/MC/AArch64/neon-tbl.s
index ff3e86b..bb39fa9 100644
--- a/test/MC/AArch64/neon-tbl.s
+++ b/test/MC/AArch64/neon-tbl.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple=arm64 -mattr=+neon -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
@@ -6,51 +6,50 @@
 // Instructions across vector registers
 //------------------------------------------------------------------------------
 
-        tbl v0.8b, {v1.16b}, v2.8b
-        tbl v0.8b, {v1.16b, v2.16b}, v2.8b
-        tbl v0.8b, {v1.16b, v2.16b, v3.16b}, v2.8b
-        tbl v0.8b, {v1.16b, v2.16b, v3.16b, v4.16b}, v2.8b
-        tbl v0.8b, {v31.16b, v0.16b, v1.16b, v2.16b}, v2.8b
-
-// CHECK: tbl	v0.8b, {v1.16b}, v2.8b  // encoding: [0x20,0x00,0x02,0x0e]
-// CHECK: tbl	v0.8b, {v1.16b, v2.16b}, v2.8b // encoding: [0x20,0x20,0x02,0x0e]
-// CHECK: tbl	v0.8b, {v1.16b, v2.16b, v3.16b}, v2.8b // encoding: [0x20,0x40,0x02,0x0e]
-// CHECK: tbl	v0.8b, {v1.16b, v2.16b, v3.16b, v4.16b}, v2.8b // encoding: [0x20,0x60,0x02,0x0e]
-// CHECK: tbl	v0.8b, {v31.16b, v0.16b, v1.16b, v2.16b}, v2.8b // encoding: [0xe0,0x63,0x02,0x0e]
-
-        tbl v0.16b, {v1.16b}, v2.16b
-        tbl v0.16b, {v1.16b, v2.16b}, v2.16b
-        tbl v0.16b, {v1.16b, v2.16b, v3.16b}, v2.16b
-        tbl v0.16b, {v1.16b, v2.16b, v3.16b, v4.16b}, v2.16b
-        tbl v0.16b, {v30.16b, v31.16b, v0.16b, v1.16b}, v2.16b
-
-// CHECK: tbl	v0.16b, {v1.16b}, v2.16b // encoding: [0x20,0x00,0x02,0x4e]
-// CHECK: tbl	v0.16b, {v1.16b, v2.16b}, v2.16b // encoding: [0x20,0x20,0x02,0x4e]
-// CHECK: tbl	v0.16b, {v1.16b, v2.16b, v3.16b}, v2.16b // encoding: [0x20,0x40,0x02,0x4e]
-// CHECK: tbl	v0.16b, {v1.16b, v2.16b, v3.16b, v4.16b}, v2.16b // encoding: [0x20,0x60,0x02,0x4e]
-// CHECK: tbl	v0.16b, {v30.16b, v31.16b, v0.16b, v1.16b}, v2.16b // encoding: [0xc0,0x63,0x02,0x4e]
-
-        tbx v0.8b, {v1.16b}, v2.8b
-        tbx v0.8b, {v1.16b, v2.16b}, v2.8b
-        tbx v0.8b, {v1.16b, v2.16b, v3.16b}, v2.8b
-        tbx v0.8b, {v1.16b, v2.16b, v3.16b, v4.16b}, v2.8b
-        tbx v0.8b, {v31.16b, v0.16b, v1.16b, v2.16b}, v2.8b
-
-// CHECK: tbx	v0.8b, {v1.16b}, v2.8b  // encoding: [0x20,0x10,0x02,0x0e]
-// CHECK: tbx	v0.8b, {v1.16b, v2.16b}, v2.8b // encoding: [0x20,0x30,0x02,0x0e]
-// CHECK: tbx	v0.8b, {v1.16b, v2.16b, v3.16b}, v2.8b // encoding: [0x20,0x50,0x02,0x0e]
-// CHECK: tbx	v0.8b, {v1.16b, v2.16b, v3.16b, v4.16b}, v2.8b // encoding: [0x20,0x70,0x02,0x0e]
-// CHECK: tbx	v0.8b, {v31.16b, v0.16b, v1.16b, v2.16b}, v2.8b // encoding: [0xe0,0x73,0x02,0x0e]
-
-        tbx v0.16b, {v1.16b}, v2.16b
-        tbx v0.16b, {v1.16b, v2.16b}, v2.16b
-        tbx v0.16b, {v1.16b, v2.16b, v3.16b}, v2.16b
-        tbx v0.16b, {v1.16b, v2.16b, v3.16b, v4.16b}, v2.16b
-        tbx v0.16b, {v30.16b, v31.16b, v0.16b, v1.16b}, v2.16b
-
-// CHECK: tbx	v0.16b, {v1.16b}, v2.16b // encoding: [0x20,0x10,0x02,0x4e]
-// CHECK: tbx	v0.16b, {v1.16b, v2.16b}, v2.16b // encoding: [0x20,0x30,0x02,0x4e]
-// CHECK: tbx	v0.16b, {v1.16b, v2.16b, v3.16b}, v2.16b // encoding: [0x20,0x50,0x02,0x4e]
-// CHECK: tbx	v0.16b, {v1.16b, v2.16b, v3.16b, v4.16b}, v2.16b // encoding: [0x20,0x70,0x02,0x4e]
-// CHECK: tbx	v0.16b, {v30.16b, v31.16b, v0.16b, v1.16b}, v2.16b // encoding: [0xc0,0x73,0x02,0x4e]
-
+        tbl v0.8b, { v1.16b }, v2.8b
+        tbl v0.8b, { v1.16b, v2.16b }, v2.8b
+        tbl v0.8b, { v1.16b, v2.16b, v3.16b }, v2.8b
+        tbl v0.8b, { v1.16b, v2.16b, v3.16b, v4.16b }, v2.8b
+        tbl v0.8b, { v31.16b, v0.16b, v1.16b, v2.16b }, v2.8b
+
+// CHECK: tbl	v0.8b, { v1.16b }, v2.8b  // encoding: [0x20,0x00,0x02,0x0e]
+// CHECK: tbl	v0.8b, { v1.16b, v2.16b }, v2.8b // encoding: [0x20,0x20,0x02,0x0e]
+// CHECK: tbl	v0.8b, { v1.16b, v2.16b, v3.16b }, v2.8b // encoding: [0x20,0x40,0x02,0x0e]
+// CHECK: tbl	v0.8b, { v1.16b, v2.16b, v3.16b, v4.16b }, v2.8b // encoding: [0x20,0x60,0x02,0x0e]
+// CHECK: tbl	v0.8b, { v31.16b, v0.16b, v1.16b, v2.16b }, v2.8b // encoding: [0xe0,0x63,0x02,0x0e]
+
+        tbl v0.16b, { v1.16b }, v2.16b
+        tbl v0.16b, { v1.16b, v2.16b }, v2.16b
+        tbl v0.16b, { v1.16b, v2.16b, v3.16b }, v2.16b
+        tbl v0.16b, { v1.16b, v2.16b, v3.16b, v4.16b }, v2.16b
+        tbl v0.16b, { v30.16b, v31.16b, v0.16b, v1.16b }, v2.16b
+
+// CHECK: tbl	v0.16b, { v1.16b }, v2.16b // encoding: [0x20,0x00,0x02,0x4e]
+// CHECK: tbl	v0.16b, { v1.16b, v2.16b }, v2.16b // encoding: [0x20,0x20,0x02,0x4e]
+// CHECK: tbl	v0.16b, { v1.16b, v2.16b, v3.16b }, v2.16b // encoding: [0x20,0x40,0x02,0x4e]
+// CHECK: tbl	v0.16b, { v1.16b, v2.16b, v3.16b, v4.16b }, v2.16b // encoding: [0x20,0x60,0x02,0x4e]
+// CHECK: tbl	v0.16b, { v30.16b, v31.16b, v0.16b, v1.16b }, v2.16b // encoding: [0xc0,0x63,0x02,0x4e]
+
+        tbx v0.8b, { v1.16b }, v2.8b
+        tbx v0.8b, { v1.16b, v2.16b }, v2.8b
+        tbx v0.8b, { v1.16b, v2.16b, v3.16b }, v2.8b
+        tbx v0.8b, { v1.16b, v2.16b, v3.16b, v4.16b }, v2.8b
+        tbx v0.8b, { v31.16b, v0.16b, v1.16b, v2.16b }, v2.8b
+
+// CHECK: tbx	v0.8b, { v1.16b }, v2.8b  // encoding: [0x20,0x10,0x02,0x0e]
+// CHECK: tbx	v0.8b, { v1.16b, v2.16b }, v2.8b // encoding: [0x20,0x30,0x02,0x0e]
+// CHECK: tbx	v0.8b, { v1.16b, v2.16b, v3.16b }, v2.8b // encoding: [0x20,0x50,0x02,0x0e]
+// CHECK: tbx	v0.8b, { v1.16b, v2.16b, v3.16b, v4.16b }, v2.8b // encoding: [0x20,0x70,0x02,0x0e]
+// CHECK: tbx	v0.8b, { v31.16b, v0.16b, v1.16b, v2.16b }, v2.8b // encoding: [0xe0,0x73,0x02,0x0e]
+
+        tbx v0.16b, { v1.16b }, v2.16b
+        tbx v0.16b, { v1.16b, v2.16b }, v2.16b
+        tbx v0.16b, { v1.16b, v2.16b, v3.16b }, v2.16b
+        tbx v0.16b, { v1.16b, v2.16b, v3.16b, v4.16b }, v2.16b
+        tbx v0.16b, { v30.16b, v31.16b, v0.16b, v1.16b }, v2.16b
+
+// CHECK: tbx	v0.16b, { v1.16b }, v2.16b // encoding: [0x20,0x10,0x02,0x4e]
+// CHECK: tbx	v0.16b, { v1.16b, v2.16b }, v2.16b // encoding: [0x20,0x30,0x02,0x4e]
+// CHECK: tbx	v0.16b, { v1.16b, v2.16b, v3.16b }, v2.16b // encoding: [0x20,0x50,0x02,0x4e]
+// CHECK: tbx	v0.16b, { v1.16b, v2.16b, v3.16b, v4.16b }, v2.16b // encoding: [0x20,0x70,0x02,0x4e]
+// CHECK: tbx	v0.16b, { v30.16b, v31.16b, v0.16b, v1.16b }, v2.16b // encoding: [0xc0,0x73,0x02,0x4e]
diff --git a/test/MC/AArch64/noneon-diagnostics.s b/test/MC/AArch64/noneon-diagnostics.s
index ea786c0..60a5fd2 100644
--- a/test/MC/AArch64/noneon-diagnostics.s
+++ b/test/MC/AArch64/noneon-diagnostics.s
@@ -4,25 +4,26 @@
         fmla v3.4s, v12.4s, v17.4s
         fmla v1.2d, v30.2d, v20.2d
         fmla v9.2s, v9.2s, v0.2s
-// CHECK-ERROR: error: instruction requires a CPU feature not currently enabled
+// CHECK-ERROR: error: instruction requires: neon
 // CHECK-ERROR-NEXT:    fmla v3.4s, v12.4s, v17.4s
 // CHECK-ERROR-NEXT:    ^
-// CHECK-ERROR-NEXT: error: instruction requires a CPU feature not currently enabled
+// CHECK-ERROR-NEXT: error: instruction requires: neon
 // CHECK-ERROR-NEXT:    fmla v1.2d, v30.2d, v20.2d
 // CHECK-ERROR-NEXT:    ^
-// CHECK-ERROR-NEXT: error: instruction requires a CPU feature not currently enabled
+// CHECK-ERROR-NEXT: error: instruction requires: neon
 // CHECK-ERROR-NEXT:    fmla v9.2s, v9.2s, v0.2s
 // CHECK-ERROR-NEXT:    ^
 
         fmls v3.4s, v12.4s, v17.4s
         fmls v1.2d, v30.2d, v20.2d
         fmls v9.2s, v9.2s, v0.2s
-// CHECK-ERROR: error: instruction requires a CPU feature not currently enabled
+
+// CHECK-ERROR: error: instruction requires: neon
 // CHECK-ERROR-NEXT:    fmls v3.4s, v12.4s, v17.4s
 // CHECK-ERROR-NEXT:    ^
-// CHECK-ERROR-NEXT: error: instruction requires a CPU feature not currently enabled
+// CHECK-ERROR-NEXT: error: instruction requires: neon
 // CHECK-ERROR-NEXT:    fmls v1.2d, v30.2d, v20.2d
 // CHECK-ERROR-NEXT:    ^
-// CHECK-ERROR-NEXT: error: instruction requires a CPU feature not currently enabled
+// CHECK-ERROR-NEXT: error: instruction requires: neon
 // CHECK-ERROR-NEXT:    fmls v9.2s, v9.2s, v0.2s
 // CHECK-ERROR-NEXT:    ^
diff --git a/test/MC/AArch64/optional-hash.s b/test/MC/AArch64/optional-hash.s
index 54b6fb3..3922b5b 100644
--- a/test/MC/AArch64/optional-hash.s
+++ b/test/MC/AArch64/optional-hash.s
@@ -1,6 +1,6 @@
 // PR18929
 // RUN: llvm-mc < %s -triple=aarch64-linux-gnueabi -mattr=+fp-armv8,+neon -filetype=obj -o - \
-// RUN: | llvm-objdump --disassemble -arch=aarch64 -mattr=+fp-armv8,+neon - | FileCheck %s
+// RUN: | llvm-objdump --disassemble -arch=arm64 -mattr=+fp-armv8,+neon - | FileCheck %s
 
     .text
 // CHECK: cmp w0, #123
diff --git a/test/MC/AArch64/tls-relocs.s b/test/MC/AArch64/tls-relocs.s
index f99cb41..ebf0216 100644
--- a/test/MC/AArch64/tls-relocs.s
+++ b/test/MC/AArch64/tls-relocs.s
@@ -7,14 +7,15 @@
         movn x2, #:dtprel_g2:var
         movz x3, #:dtprel_g2:var
         movn x4, #:dtprel_g2:var
-// CHECK: movz    x1, #:dtprel_g2:var     // encoding: [0x01'A',A,0xc0'A',0x92'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_a64_movw_dtprel_g2
-// CHECK: movn    x2, #:dtprel_g2:var     // encoding: [0x02'A',A,0xc0'A',0x92'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_a64_movw_dtprel_g2
-// CHECK: movz    x3, #:dtprel_g2:var     // encoding: [0x03'A',A,0xc0'A',0x92'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_a64_movw_dtprel_g2
-// CHECK: movn    x4, #:dtprel_g2:var     // encoding: [0x04'A',A,0xc0'A',0x92'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_a64_movw_dtprel_g2
+
+// CHECK: movz    x1, #:dtprel_g2:var     // encoding: [0bAAA00001,A,0b110AAAAA,0x92]
+// CHECK:                                 //   fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_aarch64_movw
+// CHECK: movn    x2, #:dtprel_g2:var     // encoding: [0bAAA00010,A,0b110AAAAA,0x92]
+// CHECK:                                 //   fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_aarch64_movw
+// CHECK: movz    x3, #:dtprel_g2:var     // encoding: [0bAAA00011,A,0b110AAAAA,0x92]
+// CHECK:                                 //   fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_aarch64_movw
+// CHECK: movn    x4, #:dtprel_g2:var     // encoding: [0bAAA00100,A,0b110AAAAA,0x92]
+// CHECK:                                 //   fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_aarch64_movw
 
 // CHECK-ELF:      Relocations [
 // CHECK-ELF-NEXT:   Section (2) .rela.text {
@@ -28,14 +29,15 @@
         movn x6, #:dtprel_g1:var
         movz w7, #:dtprel_g1:var
         movn w8, #:dtprel_g1:var
-// CHECK: movz    x5, #:dtprel_g1:var     // encoding: [0x05'A',A,0xa0'A',0x92'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_a64_movw_dtprel_g1
-// CHECK: movn    x6, #:dtprel_g1:var     // encoding: [0x06'A',A,0xa0'A',0x92'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_a64_movw_dtprel_g1
-// CHECK: movz    w7, #:dtprel_g1:var     // encoding: [0x07'A',A,0xa0'A',0x12'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_a64_movw_dtprel_g1
-// CHECK: movn    w8, #:dtprel_g1:var     // encoding: [0x08'A',A,0xa0'A',0x12'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_a64_movw_dtprel_g1
+
+// CHECK: movz    x5, #:dtprel_g1:var     // encoding: [0bAAA00101,A,0b101AAAAA,0x92]
+// CHECK:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_aarch64_movw
+// CHECK: movn    x6, #:dtprel_g1:var     // encoding: [0bAAA00110,A,0b101AAAAA,0x92]
+// CHECK:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_aarch64_movw
+// CHECK: movz    w7, #:dtprel_g1:var     // encoding: [0bAAA00111,A,0b101AAAAA,0x12]
+// CHECK:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_aarch64_movw
+// CHECK: movn    w8, #:dtprel_g1:var     // encoding: [0bAAA01000,A,0b101AAAAA,0x12]
+// CHECK:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_aarch64_movw
 
 // CHECK-ELF-NEXT:     0x10 R_AARCH64_TLSLD_MOVW_DTPREL_G1 [[VARSYM]]
 // CHECK-ELF-NEXT:     0x14 R_AARCH64_TLSLD_MOVW_DTPREL_G1 [[VARSYM]]
@@ -45,10 +47,11 @@
 
         movk x9, #:dtprel_g1_nc:var
         movk w10, #:dtprel_g1_nc:var
-// CHECK: movk    x9, #:dtprel_g1_nc:var  // encoding: [0x09'A',A,0xa0'A',0xf2'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1_nc:var, kind: fixup_a64_movw_dtprel_g1_nc
-// CHECK: movk    w10, #:dtprel_g1_nc:var // encoding: [0x0a'A',A,0xa0'A',0x72'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1_nc:var, kind: fixup_a64_movw_dtprel_g1_nc
+
+// CHECK: movk    x9, #:dtprel_g1_nc:var  // encoding: [0bAAA01001,A,0b101AAAAA,0xf2]
+// CHECK:                                 //   fixup A - offset: 0, value: :dtprel_g1_nc:var, kind: fixup_aarch64_movw
+// CHECK: movk    w10, #:dtprel_g1_nc:var // encoding: [0bAAA01010,A,0b101AAAAA,0x72]
+// CHECK:                                 //   fixup A - offset: 0, value: :dtprel_g1_nc:var, kind: fixup_aarch64_movw
 
 // CHECK-ELF-NEXT:     0x20 R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC [[VARSYM]]
 // CHECK-ELF-NEXT:     0x24 R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC [[VARSYM]]
@@ -58,13 +61,15 @@
         movn x12, #:dtprel_g0:var
         movz w13, #:dtprel_g0:var
         movn w14, #:dtprel_g0:var
-// CHECK: movz    x11, #:dtprel_g0:var    // encoding: [0x0b'A',A,0x80'A',0x92'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_a64_movw_dtprel_g0
-// CHECK: movn    x12, #:dtprel_g0:var    // encoding: [0x0c'A',A,0x80'A',0x92'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_a64_movw_dtprel_g0
-// CHECK: movz    w13, #:dtprel_g0:var    // encoding: [0x0d'A',A,0x80'A',0x12'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_a64_movw_dtprel_g0
-// CHECK: movn    w14, #:dtprel_g0:var    // encoding: [0x0e'A',A,0x80'A',0x12'A']
+
+// CHECK: movz    x11, #:dtprel_g0:var    // encoding: [0bAAA01011,A,0b100AAAAA,0x92]
+// CHECK:                                 //   fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_aarch64_movw
+// CHECK: movn    x12, #:dtprel_g0:var    // encoding: [0bAAA01100,A,0b100AAAAA,0x92]
+// CHECK:                                 //   fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_aarch64_movw
+// CHECK: movz    w13, #:dtprel_g0:var    // encoding: [0bAAA01101,A,0b100AAAAA,0x12]
+// CHECK:                                 //   fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_aarch64_movw
+// CHECK: movn    w14, #:dtprel_g0:var    // encoding: [0bAAA01110,A,0b100AAAAA,0x12]
+// CHECK:                                 //   fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_aarch64_movw
 
 // CHECK-ELF-NEXT:     0x28 R_AARCH64_TLSLD_MOVW_DTPREL_G0 [[VARSYM]]
 // CHECK-ELF-NEXT:     0x2C R_AARCH64_TLSLD_MOVW_DTPREL_G0 [[VARSYM]]
@@ -74,10 +79,11 @@
 
         movk x15, #:dtprel_g0_nc:var
         movk w16, #:dtprel_g0_nc:var
-// CHECK: movk    x15, #:dtprel_g0_nc:var // encoding: [0x0f'A',A,0x80'A',0xf2'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0_nc:var, kind: fixup_a64_movw_dtprel_g0_nc
-// CHECK: movk    w16, #:dtprel_g0_nc:var // encoding: [0x10'A',A,0x80'A',0x72'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0_nc:var, kind: fixup_a64_movw_dtprel_g0_nc
+
+// CHECK: movk    x15, #:dtprel_g0_nc:var // encoding: [0bAAA01111,A,0b100AAAAA,0xf2]
+// CHECK:                                 //   fixup A - offset: 0, value: :dtprel_g0_nc:var, kind: fixup_aarch64_movw
+// CHECK: movk    w16, #:dtprel_g0_nc:var // encoding: [0bAAA10000,A,0b100AAAAA,0x72]
+// CHECK:                                 //   fixup A - offset: 0, value: :dtprel_g0_nc:var, kind: fixup_aarch64_movw
 
 // CHECK-ELF-NEXT:     0x38 R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC [[VARSYM]]
 // CHECK-ELF-NEXT:     0x3C R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC [[VARSYM]]
@@ -85,10 +91,11 @@
 
         add x17, x18, #:dtprel_hi12:var, lsl #12
         add w19, w20, #:dtprel_hi12:var, lsl #12
-// CHECK: add     x17, x18, #:dtprel_hi12:var, lsl #12 // encoding: [0x51'A',0x02'A',0x40'A',0x91'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_hi12:var, kind: fixup_a64_add_dtprel_hi12
-// CHECK: add     w19, w20, #:dtprel_hi12:var, lsl #12 // encoding: [0x93'A',0x02'A',0x40'A',0x11'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_hi12:var, kind: fixup_a64_add_dtprel_hi12
+
+// CHECK: add    x17, x18, :dtprel_hi12:var, lsl #12 // encoding: [0x51,0bAAAAAA10,0b00AAAAAA,0x91]
+// CHECK:                                            //   fixup A - offset: 0, value: :dtprel_hi12:var, kind: fixup_aarch64_add_imm12
+// CHECK: add    w19, w20, :dtprel_hi12:var, lsl #12 // encoding: [0x93,0bAAAAAA10,0b00AAAAAA,0x11]
+// CHECK:                                            //   fixup A - offset: 0, value: :dtprel_hi12:var, kind: fixup_aarch64_add_imm12
 
 // CHECK-ELF-NEXT:     0x40 R_AARCH64_TLSLD_ADD_DTPREL_HI12 [[VARSYM]]
 // CHECK-ELF-NEXT:     0x44 R_AARCH64_TLSLD_ADD_DTPREL_HI12 [[VARSYM]]
@@ -96,10 +103,11 @@
 
         add x21, x22, #:dtprel_lo12:var
         add w23, w24, #:dtprel_lo12:var
-// CHECK: add     x21, x22, #:dtprel_lo12:var // encoding: [0xd5'A',0x02'A',A,0x91'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_a64_add_dtprel_lo12
-// CHECK: add     w23, w24, #:dtprel_lo12:var // encoding: [0x17'A',0x03'A',A,0x11'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_a64_add_dtprel_lo12
+
+// CHECK: add    x21, x22, :dtprel_lo12:var // encoding: [0xd5,0bAAAAAA10,0b00AAAAAA,0x91]
+// CHECK:                                   //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_add_imm12
+// CHECK: add    w23, w24, :dtprel_lo12:var // encoding: [0x17,0bAAAAAA11,0b00AAAAAA,0x11]
+// CHECK:                                   //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_add_imm12
 
 // CHECK-ELF-NEXT:     0x48 R_AARCH64_TLSLD_ADD_DTPREL_LO12 [[VARSYM]]
 // CHECK-ELF-NEXT:     0x4C R_AARCH64_TLSLD_ADD_DTPREL_LO12 [[VARSYM]]
@@ -107,10 +115,11 @@
 
         add x25, x26, #:dtprel_lo12_nc:var
         add w27, w28, #:dtprel_lo12_nc:var
-// CHECK: add     x25, x26, #:dtprel_lo12_nc:var // encoding: [0x59'A',0x03'A',A,0x91'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_a64_add_dtprel_lo12_nc
-// CHECK: add     w27, w28, #:dtprel_lo12_nc:var // encoding: [0x9b'A',0x03'A',A,0x11'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_a64_add_dtprel_lo12_nc
+
+// CHECK: add    x25, x26, :dtprel_lo12_nc:var // encoding: [0x59,0bAAAAAA11,0b00AAAAAA,0x91]
+// CHECK:                                      //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_add_imm12
+// CHECK: add    w27, w28, :dtprel_lo12_nc:var // encoding: [0x9b,0bAAAAAA11,0b00AAAAAA,0x11]
+// CHECK:                                      //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_add_imm12
 
 // CHECK-ELF-NEXT:     0x50 R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC [[VARSYM]]
 // CHECK-ELF-NEXT:     0x54 R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC [[VARSYM]]
@@ -118,10 +127,11 @@
 
         ldrb w29, [x30, #:dtprel_lo12:var]
         ldrsb x29, [x28, #:dtprel_lo12_nc:var]
-// CHECK: ldrb    w29, [x30, #:dtprel_lo12:var] // encoding: [0xdd'A',0x03'A',0x40'A',0x39'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_a64_ldst8_dtprel_lo12
-// CHECK: ldrsb   x29, [x28, #:dtprel_lo12_nc:var] // encoding: [0x9d'A',0x03'A',0x80'A',0x39'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_a64_ldst8_dtprel_lo12_nc
+
+// CHECK: ldrb    w29, [x30, :dtprel_lo12:var] // encoding: [0xdd,0bAAAAAA11,0b01AAAAAA,0x39]
+// CHECK:                                      //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale1
+// CHECK: ldrsb    x29, [x28, :dtprel_lo12_nc:var] // encoding: [0x9d,0bAAAAAA11,0b10AAAAAA,0x39]
+// CHECK:                                          //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale1
 
 // CHECK-ELF-NEXT:     0x58 R_AARCH64_TLSLD_LDST8_DTPREL_LO12 [[VARSYM]]
 // CHECK-ELF-NEXT:     0x5C R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC [[VARSYM]]
@@ -129,10 +139,11 @@
 
         strh w27, [x26, #:dtprel_lo12:var]
         ldrsh x25, [x24, #:dtprel_lo12_nc:var]
-// CHECK: strh    w27, [x26, #:dtprel_lo12:var] // encoding: [0x5b'A',0x03'A',A,0x79'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_a64_ldst16_dtprel_lo12
-// CHECK: ldrsh   x25, [x24, #:dtprel_lo12_nc:var] // encoding: [0x19'A',0x03'A',0x80'A',0x79'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_a64_ldst16_dtprel_lo12_n
+
+// CHECK: strh    w27, [x26, :dtprel_lo12:var] // encoding: [0x5b,0bAAAAAA11,0b00AAAAAA,0x79]
+// CHECK:                                      //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale2
+// CHECK: ldrsh    x25, [x24, :dtprel_lo12_nc:var] // encoding: [0x19,0bAAAAAA11,0b10AAAAAA,0x79]
+// CHECK:                                          //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale2
 
 // CHECK-ELF-NEXT:     0x60 R_AARCH64_TLSLD_LDST16_DTPREL_LO12 [[VARSYM]]
 // CHECK-ELF-NEXT:     0x64 R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC [[VARSYM]]
@@ -140,10 +151,11 @@
 
         ldr w23, [x22, #:dtprel_lo12:var]
         ldrsw x21, [x20, #:dtprel_lo12_nc:var]
-// CHECK: ldr     w23, [x22, #:dtprel_lo12:var] // encoding: [0xd7'A',0x02'A',0x40'A',0xb9'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_a64_ldst32_dtprel_lo12
-// CHECK: ldrsw   x21, [x20, #:dtprel_lo12_nc:var] // encoding: [0x95'A',0x02'A',0x80'A',0xb9'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_a64_ldst32_dtprel_lo12_n
+
+// CHECK: ldr    w23, [x22, :dtprel_lo12:var] // encoding: [0xd7,0bAAAAAA10,0b01AAAAAA,0xb9]
+// CHECK:                                     //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale4
+// CHECK: ldrsw    x21, [x20, :dtprel_lo12_nc:var] // encoding: [0x95,0bAAAAAA10,0b10AAAAAA,0xb9]
+// CHECK:                                          //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale4
 
 // CHECK-ELF-NEXT:     0x68 R_AARCH64_TLSLD_LDST32_DTPREL_LO12 [[VARSYM]]
 // CHECK-ELF-NEXT:     0x6C R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC [[VARSYM]]
@@ -151,11 +163,11 @@
 
         ldr x19, [x18, #:dtprel_lo12:var]
         str x17, [x16, #:dtprel_lo12_nc:var]
-// CHECK: ldr     x19, [x18, #:dtprel_lo12:var] // encoding: [0x53'A',0x02'A',0x40'A',0xf9'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_a64_ldst64_dtprel_lo12
-// CHECK: str     x17, [x16, #:dtprel_lo12_nc:var] // encoding: [0x11'A',0x02'A',A,0xf9'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_a64_ldst64_dtprel_lo12_nc
 
+// CHECK: ldr    x19, [x18, :dtprel_lo12:var] // encoding: [0x53,0bAAAAAA10,0b01AAAAAA,0xf9]
+// CHECK:                                     //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale8
+// CHECK: str    x17, [x16, :dtprel_lo12_nc:var] // encoding: [0x11,0bAAAAAA10,0b00AAAAAA,0xf9]
+// CHECK:                                        //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale8
 
 // CHECK-ELF-NEXT:     0x70 R_AARCH64_TLSLD_LDST64_DTPREL_LO12 [[VARSYM]]
 // CHECK-ELF-NEXT:     0x74 R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC [[VARSYM]]
@@ -164,10 +176,11 @@
         // TLS initial-exec forms
         movz x15, #:gottprel_g1:var
         movz w14, #:gottprel_g1:var
-// CHECK: movz    x15, #:gottprel_g1:var  // encoding: [0x0f'A',A,0xa0'A',0x92'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel_g1:var, kind: fixup_a64_movw_gottprel_g1
-// CHECK: movz    w14, #:gottprel_g1:var  // encoding: [0x0e'A',A,0xa0'A',0x12'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel_g1:var, kind: fixup_a64_movw_gottprel_g1
+
+// CHECK: movz    x15, #:gottprel_g1:var  // encoding: [0bAAA01111,A,0b101AAAAA,0x92]
+// CHECK:                                 //   fixup A - offset: 0, value: :gottprel_g1:var, kind: fixup_aarch64_movw
+// CHECK: movz    w14, #:gottprel_g1:var  // encoding: [0bAAA01110,A,0b101AAAAA,0x12]
+// CHECK:                                 //   fixup A - offset: 0, value: :gottprel_g1:var, kind: fixup_aarch64_movw
 
 // CHECK-ELF-NEXT:     0x78 R_AARCH64_TLSIE_MOVW_GOTTPREL_G1 [[VARSYM]]
 // CHECK-ELF-NEXT:     0x7C R_AARCH64_TLSIE_MOVW_GOTTPREL_G1 [[VARSYM]]
@@ -175,10 +188,11 @@
 
         movk x13, #:gottprel_g0_nc:var
         movk w12, #:gottprel_g0_nc:var
-// CHECK: movk    x13, #:gottprel_g0_nc:var // encoding: [0x0d'A',A,0x80'A',0xf2'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel_g0_nc:var, kind: fixup_a64_movw_gottprel_g0_nc
-// CHECK: movk    w12, #:gottprel_g0_nc:var // encoding: [0x0c'A',A,0x80'A',0x72'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel_g0_nc:var, kind: fixup_a64_movw_gottprel_g0_nc
+
+// CHECK: movk    x13, #:gottprel_g0_nc:var // encoding: [0bAAA01101,A,0b100AAAAA,0xf2]
+// CHECK:                                   //   fixup A - offset: 0, value: :gottprel_g0_nc:var, kind: fixup_aarch64_movw
+// CHECK: movk    w12, #:gottprel_g0_nc:var // encoding: [0bAAA01100,A,0b100AAAAA,0x72]
+// CHECK:                                   //   fixup A - offset: 0, value: :gottprel_g0_nc:var, kind: fixup_aarch64_movw
 
 // CHECK-ELF-NEXT:     0x80 R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC [[VARSYM]]
 // CHECK-ELF-NEXT:     0x84 R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC [[VARSYM]]
@@ -187,12 +201,13 @@
         adrp x11, :gottprel:var
         ldr x10, [x0, #:gottprel_lo12:var]
         ldr x9, :gottprel:var
+
 // CHECK: adrp    x11, :gottprel:var      // encoding: [0x0b'A',A,A,0x90'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel:var, kind: fixup_a64_adr_gottprel_page
-// CHECK: ldr     x10, [x0, #:gottprel_lo12:var] // encoding: [0x0a'A',A,0x40'A',0xf9'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel_lo12:var, kind: fixup_a64_ld64_gottprel_lo12_nc
-// CHECK: ldr     x9, :gottprel:var       // encoding: [0x09'A',A,A,0x58'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel:var, kind: fixup_a64_ld_gottprel_prel19
+// CHECK:                                 //   fixup A - offset: 0, value: :gottprel:var, kind: fixup_aarch64_pcrel_adrp_imm21
+// CHECK: ldr    x10, [x0, :gottprel_lo12:var] // encoding: [0x0a,0bAAAAAA00,0b01AAAAAA,0xf9]
+// CHECK:                                      //   fixup A - offset: 0, value: :gottprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale8
+// CHECK: ldr    x9, :gottprel:var       // encoding: [0bAAA01001,A,A,0x58]
+// CHECK:                                //   fixup A - offset: 0, value: :gottprel:var, kind: fixup_aarch64_ldr_pcrel_imm19
 
 // CHECK-ELF-NEXT:     0x88 R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 [[VARSYM]]
 // CHECK-ELF-NEXT:     0x8C R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC [[VARSYM]]
@@ -202,10 +217,11 @@
         // TLS local-exec forms
         movz x3, #:tprel_g2:var
         movn x4, #:tprel_g2:var
-// CHECK: movz    x3, #:tprel_g2:var      // encoding: [0x03'A',A,0xc0'A',0x92'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g2:var, kind: fixup_a64_movw_tprel_g2
-// CHECK: movn    x4, #:tprel_g2:var      // encoding: [0x04'A',A,0xc0'A',0x92'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g2:var, kind: fixup_a64_movw_tprel_g2
+
+// CHECK: movz    x3, #:tprel_g2:var      // encoding: [0bAAA00011,A,0b110AAAAA,0x92]
+// CHECK:                                 //   fixup A - offset: 0, value: :tprel_g2:var, kind: fixup_aarch64_movw
+// CHECK: movn    x4, #:tprel_g2:var      // encoding: [0bAAA00100,A,0b110AAAAA,0x92]
+// CHECK:                                 //   fixup A - offset: 0, value: :tprel_g2:var, kind: fixup_aarch64_movw
 
 // CHECK-ELF-NEXT:     0x94 R_AARCH64_TLSLE_MOVW_TPREL_G2 [[VARSYM]]
 // CHECK-ELF-NEXT:     0x98 R_AARCH64_TLSLE_MOVW_TPREL_G2 [[VARSYM]]
@@ -215,14 +231,15 @@
         movn x6, #:tprel_g1:var
         movz w7, #:tprel_g1:var
         movn w8, #:tprel_g1:var
-// CHECK: movz    x5, #:tprel_g1:var      // encoding: [0x05'A',A,0xa0'A',0x92'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_a64_movw_tprel_g1
-// CHECK: movn    x6, #:tprel_g1:var      // encoding: [0x06'A',A,0xa0'A',0x92'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_a64_movw_tprel_g1
-// CHECK: movz    w7, #:tprel_g1:var      // encoding: [0x07'A',A,0xa0'A',0x12'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_a64_movw_tprel_g1
-// CHECK: movn    w8, #:tprel_g1:var      // encoding: [0x08'A',A,0xa0'A',0x12'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_a64_movw_tprel_g1
+
+// CHECK: movz    x5, #:tprel_g1:var      // encoding: [0bAAA00101,A,0b101AAAAA,0x92]
+// CHECK:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_aarch64_movw
+// CHECK: movn    x6, #:tprel_g1:var      // encoding: [0bAAA00110,A,0b101AAAAA,0x92]
+// CHECK:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_aarch64_movw
+// CHECK: movz    w7, #:tprel_g1:var      // encoding: [0bAAA00111,A,0b101AAAAA,0x12]
+// CHECK:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_aarch64_movw
+// CHECK: movn    w8, #:tprel_g1:var      // encoding: [0bAAA01000,A,0b101AAAAA,0x12]
+// CHECK:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_aarch64_movw
 
 // CHECK-ELF-NEXT:     0x9C R_AARCH64_TLSLE_MOVW_TPREL_G1 [[VARSYM]]
 // CHECK-ELF-NEXT:     0xA0 R_AARCH64_TLSLE_MOVW_TPREL_G1 [[VARSYM]]
@@ -232,10 +249,11 @@
 
         movk x9, #:tprel_g1_nc:var
         movk w10, #:tprel_g1_nc:var
-// CHECK: movk    x9, #:tprel_g1_nc:var   // encoding: [0x09'A',A,0xa0'A',0xf2'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1_nc:var, kind: fixup_a64_movw_tprel_g1_nc
-// CHECK: movk    w10, #:tprel_g1_nc:var  // encoding: [0x0a'A',A,0xa0'A',0x72'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1_nc:var, kind: fixup_a64_movw_tprel_g1_nc
+
+// CHECK: movk    x9, #:tprel_g1_nc:var   // encoding: [0bAAA01001,A,0b101AAAAA,0xf2]
+// CHECK:                                 //   fixup A - offset: 0, value: :tprel_g1_nc:var, kind: fixup_aarch64_movw
+// CHECK: movk    w10, #:tprel_g1_nc:var  // encoding: [0bAAA01010,A,0b101AAAAA,0x72]
+// CHECK:                                 //   fixup A - offset: 0, value: :tprel_g1_nc:var, kind: fixup_aarch64_movw
 
 // CHECK-ELF-NEXT:     0xAC R_AARCH64_TLSLE_MOVW_TPREL_G1_NC [[VARSYM]]
 // CHECK-ELF-NEXT:     0xB0 R_AARCH64_TLSLE_MOVW_TPREL_G1_NC [[VARSYM]]
@@ -245,14 +263,15 @@
         movn x12, #:tprel_g0:var
         movz w13, #:tprel_g0:var
         movn w14, #:tprel_g0:var
-// CHECK: movz    x11, #:tprel_g0:var     // encoding: [0x0b'A',A,0x80'A',0x92'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_a64_movw_tprel_g0
-// CHECK: movn    x12, #:tprel_g0:var     // encoding: [0x0c'A',A,0x80'A',0x92'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_a64_movw_tprel_g0
-// CHECK: movz    w13, #:tprel_g0:var     // encoding: [0x0d'A',A,0x80'A',0x12'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_a64_movw_tprel_g0
-// CHECK: movn    w14, #:tprel_g0:var     // encoding: [0x0e'A',A,0x80'A',0x12'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_a64_movw_tprel_g0
+
+// CHECK: movz    x11, #:tprel_g0:var     // encoding: [0bAAA01011,A,0b100AAAAA,0x92]
+// CHECK:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_aarch64_movw
+// CHECK: movn    x12, #:tprel_g0:var     // encoding: [0bAAA01100,A,0b100AAAAA,0x92]
+// CHECK:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_aarch64_movw
+// CHECK: movz    w13, #:tprel_g0:var     // encoding: [0bAAA01101,A,0b100AAAAA,0x12]
+// CHECK:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_aarch64_movw
+// CHECK: movn    w14, #:tprel_g0:var     // encoding: [0bAAA01110,A,0b100AAAAA,0x12]
+// CHECK:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_aarch64_movw
 
 // CHECK-ELF-NEXT:     0xB4 R_AARCH64_TLSLE_MOVW_TPREL_G0 [[VARSYM]]
 // CHECK-ELF-NEXT:     0xB8 R_AARCH64_TLSLE_MOVW_TPREL_G0 [[VARSYM]]
@@ -262,10 +281,11 @@
 
         movk x15, #:tprel_g0_nc:var
         movk w16, #:tprel_g0_nc:var
-// CHECK: movk    x15, #:tprel_g0_nc:var  // encoding: [0x0f'A',A,0x80'A',0xf2'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0_nc:var, kind: fixup_a64_movw_tprel_g0_nc
-// CHECK: movk    w16, #:tprel_g0_nc:var  // encoding: [0x10'A',A,0x80'A',0x72'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0_nc:var, kind: fixup_a64_movw_tprel_g0_nc
+
+// CHECK: movk    x15, #:tprel_g0_nc:var  // encoding: [0bAAA01111,A,0b100AAAAA,0xf2]
+// CHECK:                                 //   fixup A - offset: 0, value: :tprel_g0_nc:var, kind: fixup_aarch64_movw
+// CHECK: movk    w16, #:tprel_g0_nc:var  // encoding: [0bAAA10000,A,0b100AAAAA,0x72]
+// CHECK:                                 //   fixup A - offset: 0, value: :tprel_g0_nc:var, kind: fixup_aarch64_movw
 
 // CHECK-ELF-NEXT:     0xC4 R_AARCH64_TLSLE_MOVW_TPREL_G0_NC [[VARSYM]]
 // CHECK-ELF-NEXT:     0xC8 R_AARCH64_TLSLE_MOVW_TPREL_G0_NC [[VARSYM]]
@@ -273,10 +293,11 @@
 
         add x17, x18, #:tprel_hi12:var, lsl #12
         add w19, w20, #:tprel_hi12:var, lsl #12
-// CHECK: add     x17, x18, #:tprel_hi12:var, lsl #12 // encoding: [0x51'A',0x02'A',0x40'A',0x91'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_hi12:var, kind: fixup_a64_add_tprel_hi12
-// CHECK: add     w19, w20, #:tprel_hi12:var, lsl #12 // encoding: [0x93'A',0x02'A',0x40'A',0x11'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_hi12:var, kind: fixup_a64_add_tprel_hi12
+
+// CHECK: add    x17, x18, :tprel_hi12:var, lsl #12 // encoding: [0x51,0bAAAAAA10,0b00AAAAAA,0x91]
+// CHECK:                                           //   fixup A - offset: 0, value: :tprel_hi12:var, kind: fixup_aarch64_add_imm12
+// CHECK: add    w19, w20, :tprel_hi12:var, lsl #12 // encoding: [0x93,0bAAAAAA10,0b00AAAAAA,0x11]
+// CHECK:                                           //   fixup A - offset: 0, value: :tprel_hi12:var, kind: fixup_aarch64_add_imm12
 
 // CHECK-ELF-NEXT:     0xCC R_AARCH64_TLSLE_ADD_TPREL_HI12 [[VARSYM]]
 // CHECK-ELF-NEXT:     0xD0 R_AARCH64_TLSLE_ADD_TPREL_HI12 [[VARSYM]]
@@ -284,10 +305,11 @@
 
         add x21, x22, #:tprel_lo12:var
         add w23, w24, #:tprel_lo12:var
-// CHECK: add     x21, x22, #:tprel_lo12:var // encoding: [0xd5'A',0x02'A',A,0x91'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_a64_add_tprel_lo12
-// CHECK: add     w23, w24, #:tprel_lo12:var // encoding: [0x17'A',0x03'A',A,0x11'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_a64_add_tprel_lo12
+
+// CHECK: add    x21, x22, :tprel_lo12:var // encoding: [0xd5,0bAAAAAA10,0b00AAAAAA,0x91]
+// CHECK:                                  //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_add_imm12
+// CHECK: add    w23, w24, :tprel_lo12:var // encoding: [0x17,0bAAAAAA11,0b00AAAAAA,0x11]
+// CHECK:                                  //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_add_imm12
 
 // CHECK-ELF-NEXT:     0xD4 R_AARCH64_TLSLE_ADD_TPREL_LO12 [[VARSYM]]
 // CHECK-ELF-NEXT:     0xD8 R_AARCH64_TLSLE_ADD_TPREL_LO12 [[VARSYM]]
@@ -295,10 +317,11 @@
 
         add x25, x26, #:tprel_lo12_nc:var
         add w27, w28, #:tprel_lo12_nc:var
-// CHECK: add     x25, x26, #:tprel_lo12_nc:var // encoding: [0x59'A',0x03'A',A,0x91'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_a64_add_tprel_lo12_nc
-// CHECK: add     w27, w28, #:tprel_lo12_nc:var // encoding: [0x9b'A',0x03'A',A,0x11'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_a64_add_tprel_lo12_nc
+
+// CHECK: add    x25, x26, :tprel_lo12_nc:var // encoding: [0x59,0bAAAAAA11,0b00AAAAAA,0x91]
+// CHECK:                                     //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_add_imm12
+// CHECK: add    w27, w28, :tprel_lo12_nc:var // encoding: [0x9b,0bAAAAAA11,0b00AAAAAA,0x11]
+// CHECK:                                     //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_add_imm12
 
 // CHECK-ELF-NEXT:     0xDC R_AARCH64_TLSLE_ADD_TPREL_LO12_NC [[VARSYM]]
 // CHECK-ELF-NEXT:     0xE0 R_AARCH64_TLSLE_ADD_TPREL_LO12_NC [[VARSYM]]
@@ -306,10 +329,11 @@
 
         ldrb w29, [x30, #:tprel_lo12:var]
         ldrsb x29, [x28, #:tprel_lo12_nc:var]
-// CHECK: ldrb    w29, [x30, #:tprel_lo12:var] // encoding: [0xdd'A',0x03'A',0x40'A',0x39'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_a64_ldst8_tprel_lo12
-// CHECK: ldrsb   x29, [x28, #:tprel_lo12_nc:var] // encoding: [0x9d'A',0x03'A',0x80'A',0x39'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_a64_ldst8_tprel_lo12_nc
+
+// CHECK: ldrb    w29, [x30, :tprel_lo12:var] // encoding: [0xdd,0bAAAAAA11,0b01AAAAAA,0x39]
+// CHECK:                                     //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale1
+// CHECK: ldrsb    x29, [x28, :tprel_lo12_nc:var] // encoding: [0x9d,0bAAAAAA11,0b10AAAAAA,0x39]
+// CHECK:                                     //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale1
 
 // CHECK-ELF-NEXT:     0xE4 R_AARCH64_TLSLE_LDST8_TPREL_LO12 [[VARSYM]]
 // CHECK-ELF-NEXT:     0xE8 R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC [[VARSYM]]
@@ -317,10 +341,11 @@
 
         strh w27, [x26, #:tprel_lo12:var]
         ldrsh x25, [x24, #:tprel_lo12_nc:var]
-// CHECK: strh    w27, [x26, #:tprel_lo12:var] // encoding: [0x5b'A',0x03'A',A,0x79'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_a64_ldst16_tprel_lo12
-// CHECK: ldrsh   x25, [x24, #:tprel_lo12_nc:var] // encoding: [0x19'A',0x03'A',0x80'A',0x79'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_a64_ldst16_tprel_lo12_n
+
+// CHECK: strh    w27, [x26, :tprel_lo12:var] // encoding: [0x5b,0bAAAAAA11,0b00AAAAAA,0x79]
+// CHECK:                                     //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale2
+// CHECK: ldrsh    x25, [x24, :tprel_lo12_nc:var] // encoding: [0x19,0bAAAAAA11,0b10AAAAAA,0x79]
+// CHECK:                                         //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale2
 
 // CHECK-ELF-NEXT:     0xEC R_AARCH64_TLSLE_LDST16_TPREL_LO12 [[VARSYM]]
 // CHECK-ELF-NEXT:     0xF0 R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC [[VARSYM]]
@@ -328,20 +353,22 @@
 
         ldr w23, [x22, #:tprel_lo12:var]
         ldrsw x21, [x20, #:tprel_lo12_nc:var]
-// CHECK: ldr     w23, [x22, #:tprel_lo12:var] // encoding: [0xd7'A',0x02'A',0x40'A',0xb9'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_a64_ldst32_tprel_lo12
-// CHECK: ldrsw   x21, [x20, #:tprel_lo12_nc:var] // encoding: [0x95'A',0x02'A',0x80'A',0xb9'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_a64_ldst32_tprel_lo12_n
+
+// CHECK: ldr    w23, [x22, :tprel_lo12:var] // encoding: [0xd7,0bAAAAAA10,0b01AAAAAA,0xb9]
+// CHECK:                                    //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale4
+// CHECK: ldrsw    x21, [x20, :tprel_lo12_nc:var] // encoding: [0x95,0bAAAAAA10,0b10AAAAAA,0xb9]
+// CHECK:                                         //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale4
 
 // CHECK-ELF-NEXT:     0xF4 R_AARCH64_TLSLE_LDST32_TPREL_LO12 [[VARSYM]]
 // CHECK-ELF-NEXT:     0xF8 R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC [[VARSYM]]
 
         ldr x19, [x18, #:tprel_lo12:var]
         str x17, [x16, #:tprel_lo12_nc:var]
-// CHECK: ldr     x19, [x18, #:tprel_lo12:var] // encoding: [0x53'A',0x02'A',0x40'A',0xf9'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_a64_ldst64_tprel_lo12
-// CHECK: str     x17, [x16, #:tprel_lo12_nc:var] // encoding: [0x11'A',0x02'A',A,0xf9'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_a64_ldst64_tprel_lo12_nc
+
+// CHECK: ldr    x19, [x18, :tprel_lo12:var] // encoding: [0x53,0bAAAAAA10,0b01AAAAAA,0xf9]
+// CHECK:                                    //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale8
+// CHECK: str    x17, [x16, :tprel_lo12_nc:var] // encoding: [0x11,0bAAAAAA10,0b00AAAAAA,0xf9]
+// CHECK:                                       //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale8
 
 // CHECK-ELF-NEXT:     0xFC  R_AARCH64_TLSLE_LDST64_TPREL_LO12 [[VARSYM]]
 // CHECK-ELF-NEXT:     0x100 R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC [[VARSYM]]
@@ -353,16 +380,16 @@
         .tlsdesccall var
         blr x3
 
+
 // CHECK: adrp    x8, :tlsdesc:var        // encoding: [0x08'A',A,A,0x90'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tlsdesc:var, kind: fixup_a64_tlsdesc_adr_page
-// CHECK: ldr     x7, [x6, #:tlsdesc_lo12:var] // encoding: [0xc7'A',A,0x40'A',0xf9'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tlsdesc_lo12:var, kind: fixup_a64_tlsdesc_ld64_lo12_nc
-// CHECK: add     x5, x4, #:tlsdesc_lo12:var // encoding: [0x85'A',A,A,0x91'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tlsdesc_lo12:var, kind: fixup_a64_tlsdesc_add_lo12_nc
+// CHECK:                                 //   fixup A - offset: 0, value: :tlsdesc:var, kind: fixup_aarch64_pcrel_adrp_imm21
+// CHECK: ldr    x7, [x6, :tlsdesc_lo12:var] // encoding: [0xc7,0bAAAAAA00,0b01AAAAAA,0xf9]
+// CHECK:                                    //   fixup A - offset: 0, value: :tlsdesc_lo12:var, kind: fixup_aarch64_ldst_imm12_scale8
+// CHECK: add    x5, x4, :tlsdesc_lo12:var // encoding: [0x85,0bAAAAAA00,0b00AAAAAA,0x91]
+// CHECK:                                  //   fixup A - offset: 0, value: :tlsdesc_lo12:var, kind: fixup_aarch64_add_imm12
 // CHECK: .tlsdesccall var                // encoding: []
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tlsdesc:var, kind: fixup_a64_tlsdesc_call
-// CHECK: blr     x3                      // encoding: [0x60,0x00,0x3f,0xd6]
-
+// CHECK:                                 //   fixup A - offset: 0, value: var, kind: fixup_aarch64_tlsdesc_call
+// CHECK: blr    x3                      // encoding: [0x60,0x00,0x3f,0xd6]
 
 // CHECK-ELF-NEXT:     0x104 R_AARCH64_TLSDESC_ADR_PAGE [[VARSYM]]
 // CHECK-ELF-NEXT:     0x108 R_AARCH64_TLSDESC_LD64_LO12_NC [[VARSYM]]
@@ -374,7 +401,7 @@
 
 // CHECK-ELF:      Symbols [
 // CHECK-ELF:        Symbol {
-// CHECK-ELF:          Name: var (6)
+// CHECK-ELF:          Name: var
 // CHECK-ELF-NEXT:     Value:
 // CHECK-ELF-NEXT:     Size:
 // CHECK-ELF-NEXT:     Binding: Global
diff --git a/test/MC/AArch64/trace-regs.s b/test/MC/AArch64/trace-regs.s
index f9ab4c9..92f16cd 100644
--- a/test/MC/AArch64/trace-regs.s
+++ b/test/MC/AArch64/trace-regs.s
@@ -1,4 +1,5 @@
 // RUN: llvm-mc -triple=aarch64-none-linux-gnu -show-encoding < %s | FileCheck %s
+
         mrs x8, trcstatr
         mrs x9, trcidr8
         mrs x11, trcidr9
@@ -207,214 +208,214 @@
         mrs x22, trcitctrl
         mrs x23, trcclaimset
         mrs x14, trcclaimclr
-// CHECK: mrs      x8, trcstatr               // encoding: [0x08,0x03,0x31,0xd5]
-// CHECK: mrs      x9, trcidr8                // encoding: [0xc9,0x00,0x31,0xd5]
-// CHECK: mrs      x11, trcidr9               // encoding: [0xcb,0x01,0x31,0xd5]
-// CHECK: mrs      x25, trcidr10              // encoding: [0xd9,0x02,0x31,0xd5]
-// CHECK: mrs      x7, trcidr11               // encoding: [0xc7,0x03,0x31,0xd5]
-// CHECK: mrs      x7, trcidr12               // encoding: [0xc7,0x04,0x31,0xd5]
-// CHECK: mrs      x6, trcidr13               // encoding: [0xc6,0x05,0x31,0xd5]
-// CHECK: mrs      x27, trcidr0               // encoding: [0xfb,0x08,0x31,0xd5]
-// CHECK: mrs      x29, trcidr1               // encoding: [0xfd,0x09,0x31,0xd5]
-// CHECK: mrs      x4, trcidr2                // encoding: [0xe4,0x0a,0x31,0xd5]
-// CHECK: mrs      x8, trcidr3                // encoding: [0xe8,0x0b,0x31,0xd5]
-// CHECK: mrs      x15, trcidr4               // encoding: [0xef,0x0c,0x31,0xd5]
-// CHECK: mrs      x20, trcidr5               // encoding: [0xf4,0x0d,0x31,0xd5]
-// CHECK: mrs      x6, trcidr6                // encoding: [0xe6,0x0e,0x31,0xd5]
-// CHECK: mrs      x6, trcidr7                // encoding: [0xe6,0x0f,0x31,0xd5]
-// CHECK: mrs      x24, trcoslsr              // encoding: [0x98,0x11,0x31,0xd5]
-// CHECK: mrs      x18, trcpdsr               // encoding: [0x92,0x15,0x31,0xd5]
-// CHECK: mrs      x28, trcdevaff0            // encoding: [0xdc,0x7a,0x31,0xd5]
-// CHECK: mrs      x5, trcdevaff1             // encoding: [0xc5,0x7b,0x31,0xd5]
-// CHECK: mrs      x5, trclsr                 // encoding: [0xc5,0x7d,0x31,0xd5]
-// CHECK: mrs      x11, trcauthstatus         // encoding: [0xcb,0x7e,0x31,0xd5]
-// CHECK: mrs      x13, trcdevarch            // encoding: [0xcd,0x7f,0x31,0xd5]
-// CHECK: mrs      x18, trcdevid              // encoding: [0xf2,0x72,0x31,0xd5]
-// CHECK: mrs      x22, trcdevtype            // encoding: [0xf6,0x73,0x31,0xd5]
-// CHECK: mrs      x14, trcpidr4              // encoding: [0xee,0x74,0x31,0xd5]
-// CHECK: mrs      x5, trcpidr5               // encoding: [0xe5,0x75,0x31,0xd5]
-// CHECK: mrs      x5, trcpidr6               // encoding: [0xe5,0x76,0x31,0xd5]
-// CHECK: mrs      x9, trcpidr7               // encoding: [0xe9,0x77,0x31,0xd5]
-// CHECK: mrs      x15, trcpidr0              // encoding: [0xef,0x78,0x31,0xd5]
-// CHECK: mrs      x6, trcpidr1               // encoding: [0xe6,0x79,0x31,0xd5]
-// CHECK: mrs      x11, trcpidr2              // encoding: [0xeb,0x7a,0x31,0xd5]
-// CHECK: mrs      x20, trcpidr3              // encoding: [0xf4,0x7b,0x31,0xd5]
-// CHECK: mrs      x17, trccidr0              // encoding: [0xf1,0x7c,0x31,0xd5]
-// CHECK: mrs      x2, trccidr1               // encoding: [0xe2,0x7d,0x31,0xd5]
-// CHECK: mrs      x20, trccidr2              // encoding: [0xf4,0x7e,0x31,0xd5]
-// CHECK: mrs      x4, trccidr3               // encoding: [0xe4,0x7f,0x31,0xd5]
-// CHECK: mrs      x11, trcprgctlr            // encoding: [0x0b,0x01,0x31,0xd5]
-// CHECK: mrs      x23, trcprocselr           // encoding: [0x17,0x02,0x31,0xd5]
-// CHECK: mrs      x13, trcconfigr            // encoding: [0x0d,0x04,0x31,0xd5]
-// CHECK: mrs      x23, trcauxctlr            // encoding: [0x17,0x06,0x31,0xd5]
-// CHECK: mrs      x9, trceventctl0r          // encoding: [0x09,0x08,0x31,0xd5]
-// CHECK: mrs      x16, trceventctl1r         // encoding: [0x10,0x09,0x31,0xd5]
-// CHECK: mrs      x4, trcstallctlr           // encoding: [0x04,0x0b,0x31,0xd5]
-// CHECK: mrs      x14, trctsctlr             // encoding: [0x0e,0x0c,0x31,0xd5]
-// CHECK: mrs      x24, trcsyncpr             // encoding: [0x18,0x0d,0x31,0xd5]
-// CHECK: mrs      x28, trcccctlr             // encoding: [0x1c,0x0e,0x31,0xd5]
-// CHECK: mrs      x15, trcbbctlr             // encoding: [0x0f,0x0f,0x31,0xd5]
-// CHECK: mrs      x1, trctraceidr            // encoding: [0x21,0x00,0x31,0xd5]
-// CHECK: mrs      x20, trcqctlr              // encoding: [0x34,0x01,0x31,0xd5]
-// CHECK: mrs      x2, trcvictlr              // encoding: [0x42,0x00,0x31,0xd5]
-// CHECK: mrs      x12, trcviiectlr           // encoding: [0x4c,0x01,0x31,0xd5]
-// CHECK: mrs      x16, trcvissctlr           // encoding: [0x50,0x02,0x31,0xd5]
-// CHECK: mrs      x8, trcvipcssctlr          // encoding: [0x48,0x03,0x31,0xd5]
-// CHECK: mrs      x27, trcvdctlr             // encoding: [0x5b,0x08,0x31,0xd5]
-// CHECK: mrs      x9, trcvdsacctlr           // encoding: [0x49,0x09,0x31,0xd5]
-// CHECK: mrs      x0, trcvdarcctlr           // encoding: [0x40,0x0a,0x31,0xd5]
-// CHECK: mrs      x13, trcseqevr0            // encoding: [0x8d,0x00,0x31,0xd5]
-// CHECK: mrs      x11, trcseqevr1            // encoding: [0x8b,0x01,0x31,0xd5]
-// CHECK: mrs      x26, trcseqevr2            // encoding: [0x9a,0x02,0x31,0xd5]
-// CHECK: mrs      x14, trcseqrstevr          // encoding: [0x8e,0x06,0x31,0xd5]
-// CHECK: mrs      x4, trcseqstr              // encoding: [0x84,0x07,0x31,0xd5]
-// CHECK: mrs      x17, trcextinselr          // encoding: [0x91,0x08,0x31,0xd5]
-// CHECK: mrs      x21, trccntrldvr0          // encoding: [0xb5,0x00,0x31,0xd5]
-// CHECK: mrs      x10, trccntrldvr1          // encoding: [0xaa,0x01,0x31,0xd5]
-// CHECK: mrs      x20, trccntrldvr2          // encoding: [0xb4,0x02,0x31,0xd5]
-// CHECK: mrs      x5, trccntrldvr3           // encoding: [0xa5,0x03,0x31,0xd5]
-// CHECK: mrs      x17, trccntctlr0           // encoding: [0xb1,0x04,0x31,0xd5]
-// CHECK: mrs      x1, trccntctlr1            // encoding: [0xa1,0x05,0x31,0xd5]
-// CHECK: mrs      x17, trccntctlr2           // encoding: [0xb1,0x06,0x31,0xd5]
-// CHECK: mrs      x6, trccntctlr3            // encoding: [0xa6,0x07,0x31,0xd5]
-// CHECK: mrs      x28, trccntvr0             // encoding: [0xbc,0x08,0x31,0xd5]
-// CHECK: mrs      x23, trccntvr1             // encoding: [0xb7,0x09,0x31,0xd5]
-// CHECK: mrs      x9, trccntvr2              // encoding: [0xa9,0x0a,0x31,0xd5]
-// CHECK: mrs      x6, trccntvr3              // encoding: [0xa6,0x0b,0x31,0xd5]
-// CHECK: mrs      x24, trcimspec0            // encoding: [0xf8,0x00,0x31,0xd5]
-// CHECK: mrs      x24, trcimspec1            // encoding: [0xf8,0x01,0x31,0xd5]
-// CHECK: mrs      x15, trcimspec2            // encoding: [0xef,0x02,0x31,0xd5]
-// CHECK: mrs      x10, trcimspec3            // encoding: [0xea,0x03,0x31,0xd5]
-// CHECK: mrs      x29, trcimspec4            // encoding: [0xfd,0x04,0x31,0xd5]
-// CHECK: mrs      x18, trcimspec5            // encoding: [0xf2,0x05,0x31,0xd5]
-// CHECK: mrs      x29, trcimspec6            // encoding: [0xfd,0x06,0x31,0xd5]
-// CHECK: mrs      x2, trcimspec7             // encoding: [0xe2,0x07,0x31,0xd5]
-// CHECK: mrs      x8, trcrsctlr2             // encoding: [0x08,0x12,0x31,0xd5]
-// CHECK: mrs      x0, trcrsctlr3             // encoding: [0x00,0x13,0x31,0xd5]
-// CHECK: mrs      x12, trcrsctlr4            // encoding: [0x0c,0x14,0x31,0xd5]
-// CHECK: mrs      x26, trcrsctlr5            // encoding: [0x1a,0x15,0x31,0xd5]
-// CHECK: mrs      x29, trcrsctlr6            // encoding: [0x1d,0x16,0x31,0xd5]
-// CHECK: mrs      x17, trcrsctlr7            // encoding: [0x11,0x17,0x31,0xd5]
-// CHECK: mrs      x0, trcrsctlr8             // encoding: [0x00,0x18,0x31,0xd5]
-// CHECK: mrs      x1, trcrsctlr9             // encoding: [0x01,0x19,0x31,0xd5]
-// CHECK: mrs      x17, trcrsctlr10           // encoding: [0x11,0x1a,0x31,0xd5]
-// CHECK: mrs      x21, trcrsctlr11           // encoding: [0x15,0x1b,0x31,0xd5]
-// CHECK: mrs      x1, trcrsctlr12            // encoding: [0x01,0x1c,0x31,0xd5]
-// CHECK: mrs      x8, trcrsctlr13            // encoding: [0x08,0x1d,0x31,0xd5]
-// CHECK: mrs      x24, trcrsctlr14           // encoding: [0x18,0x1e,0x31,0xd5]
-// CHECK: mrs      x0, trcrsctlr15            // encoding: [0x00,0x1f,0x31,0xd5]
-// CHECK: mrs      x2, trcrsctlr16            // encoding: [0x22,0x10,0x31,0xd5]
-// CHECK: mrs      x29, trcrsctlr17           // encoding: [0x3d,0x11,0x31,0xd5]
-// CHECK: mrs      x22, trcrsctlr18           // encoding: [0x36,0x12,0x31,0xd5]
-// CHECK: mrs      x6, trcrsctlr19            // encoding: [0x26,0x13,0x31,0xd5]
-// CHECK: mrs      x26, trcrsctlr20           // encoding: [0x3a,0x14,0x31,0xd5]
-// CHECK: mrs      x26, trcrsctlr21           // encoding: [0x3a,0x15,0x31,0xd5]
-// CHECK: mrs      x4, trcrsctlr22            // encoding: [0x24,0x16,0x31,0xd5]
-// CHECK: mrs      x12, trcrsctlr23           // encoding: [0x2c,0x17,0x31,0xd5]
-// CHECK: mrs      x1, trcrsctlr24            // encoding: [0x21,0x18,0x31,0xd5]
-// CHECK: mrs      x0, trcrsctlr25            // encoding: [0x20,0x19,0x31,0xd5]
-// CHECK: mrs      x17, trcrsctlr26           // encoding: [0x31,0x1a,0x31,0xd5]
-// CHECK: mrs      x8, trcrsctlr27            // encoding: [0x28,0x1b,0x31,0xd5]
-// CHECK: mrs      x10, trcrsctlr28           // encoding: [0x2a,0x1c,0x31,0xd5]
-// CHECK: mrs      x25, trcrsctlr29           // encoding: [0x39,0x1d,0x31,0xd5]
-// CHECK: mrs      x12, trcrsctlr30           // encoding: [0x2c,0x1e,0x31,0xd5]
-// CHECK: mrs      x11, trcrsctlr31           // encoding: [0x2b,0x1f,0x31,0xd5]
-// CHECK: mrs      x18, trcssccr0             // encoding: [0x52,0x10,0x31,0xd5]
-// CHECK: mrs      x12, trcssccr1             // encoding: [0x4c,0x11,0x31,0xd5]
-// CHECK: mrs      x3, trcssccr2              // encoding: [0x43,0x12,0x31,0xd5]
-// CHECK: mrs      x2, trcssccr3              // encoding: [0x42,0x13,0x31,0xd5]
-// CHECK: mrs      x21, trcssccr4             // encoding: [0x55,0x14,0x31,0xd5]
-// CHECK: mrs      x10, trcssccr5             // encoding: [0x4a,0x15,0x31,0xd5]
-// CHECK: mrs      x22, trcssccr6             // encoding: [0x56,0x16,0x31,0xd5]
-// CHECK: mrs      x23, trcssccr7             // encoding: [0x57,0x17,0x31,0xd5]
-// CHECK: mrs      x23, trcsscsr0             // encoding: [0x57,0x18,0x31,0xd5]
-// CHECK: mrs      x19, trcsscsr1             // encoding: [0x53,0x19,0x31,0xd5]
-// CHECK: mrs      x25, trcsscsr2             // encoding: [0x59,0x1a,0x31,0xd5]
-// CHECK: mrs      x17, trcsscsr3             // encoding: [0x51,0x1b,0x31,0xd5]
-// CHECK: mrs      x19, trcsscsr4             // encoding: [0x53,0x1c,0x31,0xd5]
-// CHECK: mrs      x11, trcsscsr5             // encoding: [0x4b,0x1d,0x31,0xd5]
-// CHECK: mrs      x5, trcsscsr6              // encoding: [0x45,0x1e,0x31,0xd5]
-// CHECK: mrs      x9, trcsscsr7              // encoding: [0x49,0x1f,0x31,0xd5]
-// CHECK: mrs      x1, trcsspcicr0            // encoding: [0x61,0x10,0x31,0xd5]
-// CHECK: mrs      x12, trcsspcicr1           // encoding: [0x6c,0x11,0x31,0xd5]
-// CHECK: mrs      x21, trcsspcicr2           // encoding: [0x75,0x12,0x31,0xd5]
-// CHECK: mrs      x11, trcsspcicr3           // encoding: [0x6b,0x13,0x31,0xd5]
-// CHECK: mrs      x3, trcsspcicr4            // encoding: [0x63,0x14,0x31,0xd5]
-// CHECK: mrs      x9, trcsspcicr5            // encoding: [0x69,0x15,0x31,0xd5]
-// CHECK: mrs      x5, trcsspcicr6            // encoding: [0x65,0x16,0x31,0xd5]
-// CHECK: mrs      x2, trcsspcicr7            // encoding: [0x62,0x17,0x31,0xd5]
-// CHECK: mrs      x26, trcpdcr               // encoding: [0x9a,0x14,0x31,0xd5]
-// CHECK: mrs      x8, trcacvr0               // encoding: [0x08,0x20,0x31,0xd5]
-// CHECK: mrs      x15, trcacvr1              // encoding: [0x0f,0x22,0x31,0xd5]
-// CHECK: mrs      x19, trcacvr2              // encoding: [0x13,0x24,0x31,0xd5]
-// CHECK: mrs      x8, trcacvr3               // encoding: [0x08,0x26,0x31,0xd5]
-// CHECK: mrs      x28, trcacvr4              // encoding: [0x1c,0x28,0x31,0xd5]
-// CHECK: mrs      x3, trcacvr5               // encoding: [0x03,0x2a,0x31,0xd5]
-// CHECK: mrs      x25, trcacvr6              // encoding: [0x19,0x2c,0x31,0xd5]
-// CHECK: mrs      x24, trcacvr7              // encoding: [0x18,0x2e,0x31,0xd5]
-// CHECK: mrs      x6, trcacvr8               // encoding: [0x26,0x20,0x31,0xd5]
-// CHECK: mrs      x3, trcacvr9               // encoding: [0x23,0x22,0x31,0xd5]
-// CHECK: mrs      x24, trcacvr10             // encoding: [0x38,0x24,0x31,0xd5]
-// CHECK: mrs      x3, trcacvr11              // encoding: [0x23,0x26,0x31,0xd5]
-// CHECK: mrs      x12, trcacvr12             // encoding: [0x2c,0x28,0x31,0xd5]
-// CHECK: mrs      x9, trcacvr13              // encoding: [0x29,0x2a,0x31,0xd5]
-// CHECK: mrs      x14, trcacvr14             // encoding: [0x2e,0x2c,0x31,0xd5]
-// CHECK: mrs      x3, trcacvr15              // encoding: [0x23,0x2e,0x31,0xd5]
-// CHECK: mrs      x21, trcacatr0             // encoding: [0x55,0x20,0x31,0xd5]
-// CHECK: mrs      x26, trcacatr1             // encoding: [0x5a,0x22,0x31,0xd5]
-// CHECK: mrs      x8, trcacatr2              // encoding: [0x48,0x24,0x31,0xd5]
-// CHECK: mrs      x22, trcacatr3             // encoding: [0x56,0x26,0x31,0xd5]
-// CHECK: mrs      x6, trcacatr4              // encoding: [0x46,0x28,0x31,0xd5]
-// CHECK: mrs      x29, trcacatr5             // encoding: [0x5d,0x2a,0x31,0xd5]
-// CHECK: mrs      x5, trcacatr6              // encoding: [0x45,0x2c,0x31,0xd5]
-// CHECK: mrs      x18, trcacatr7             // encoding: [0x52,0x2e,0x31,0xd5]
-// CHECK: mrs      x2, trcacatr8              // encoding: [0x62,0x20,0x31,0xd5]
-// CHECK: mrs      x19, trcacatr9             // encoding: [0x73,0x22,0x31,0xd5]
-// CHECK: mrs      x13, trcacatr10            // encoding: [0x6d,0x24,0x31,0xd5]
-// CHECK: mrs      x25, trcacatr11            // encoding: [0x79,0x26,0x31,0xd5]
-// CHECK: mrs      x18, trcacatr12            // encoding: [0x72,0x28,0x31,0xd5]
-// CHECK: mrs      x29, trcacatr13            // encoding: [0x7d,0x2a,0x31,0xd5]
-// CHECK: mrs      x9, trcacatr14             // encoding: [0x69,0x2c,0x31,0xd5]
-// CHECK: mrs      x18, trcacatr15            // encoding: [0x72,0x2e,0x31,0xd5]
-// CHECK: mrs      x29, trcdvcvr0             // encoding: [0x9d,0x20,0x31,0xd5]
-// CHECK: mrs      x15, trcdvcvr1             // encoding: [0x8f,0x24,0x31,0xd5]
-// CHECK: mrs      x15, trcdvcvr2             // encoding: [0x8f,0x28,0x31,0xd5]
-// CHECK: mrs      x15, trcdvcvr3             // encoding: [0x8f,0x2c,0x31,0xd5]
-// CHECK: mrs      x19, trcdvcvr4             // encoding: [0xb3,0x20,0x31,0xd5]
-// CHECK: mrs      x22, trcdvcvr5             // encoding: [0xb6,0x24,0x31,0xd5]
-// CHECK: mrs      x27, trcdvcvr6             // encoding: [0xbb,0x28,0x31,0xd5]
-// CHECK: mrs      x1, trcdvcvr7              // encoding: [0xa1,0x2c,0x31,0xd5]
-// CHECK: mrs      x29, trcdvcmr0             // encoding: [0xdd,0x20,0x31,0xd5]
-// CHECK: mrs      x9, trcdvcmr1              // encoding: [0xc9,0x24,0x31,0xd5]
-// CHECK: mrs      x1, trcdvcmr2              // encoding: [0xc1,0x28,0x31,0xd5]
-// CHECK: mrs      x2, trcdvcmr3              // encoding: [0xc2,0x2c,0x31,0xd5]
-// CHECK: mrs      x5, trcdvcmr4              // encoding: [0xe5,0x20,0x31,0xd5]
-// CHECK: mrs      x21, trcdvcmr5             // encoding: [0xf5,0x24,0x31,0xd5]
-// CHECK: mrs      x5, trcdvcmr6              // encoding: [0xe5,0x28,0x31,0xd5]
-// CHECK: mrs      x1, trcdvcmr7              // encoding: [0xe1,0x2c,0x31,0xd5]
-// CHECK: mrs      x21, trccidcvr0            // encoding: [0x15,0x30,0x31,0xd5]
-// CHECK: mrs      x24, trccidcvr1            // encoding: [0x18,0x32,0x31,0xd5]
-// CHECK: mrs      x24, trccidcvr2            // encoding: [0x18,0x34,0x31,0xd5]
-// CHECK: mrs      x12, trccidcvr3            // encoding: [0x0c,0x36,0x31,0xd5]
-// CHECK: mrs      x10, trccidcvr4            // encoding: [0x0a,0x38,0x31,0xd5]
-// CHECK: mrs      x9, trccidcvr5             // encoding: [0x09,0x3a,0x31,0xd5]
-// CHECK: mrs      x6, trccidcvr6             // encoding: [0x06,0x3c,0x31,0xd5]
-// CHECK: mrs      x20, trccidcvr7            // encoding: [0x14,0x3e,0x31,0xd5]
-// CHECK: mrs      x20, trcvmidcvr0           // encoding: [0x34,0x30,0x31,0xd5]
-// CHECK: mrs      x20, trcvmidcvr1           // encoding: [0x34,0x32,0x31,0xd5]
-// CHECK: mrs      x26, trcvmidcvr2           // encoding: [0x3a,0x34,0x31,0xd5]
-// CHECK: mrs      x1, trcvmidcvr3            // encoding: [0x21,0x36,0x31,0xd5]
-// CHECK: mrs      x14, trcvmidcvr4           // encoding: [0x2e,0x38,0x31,0xd5]
-// CHECK: mrs      x27, trcvmidcvr5           // encoding: [0x3b,0x3a,0x31,0xd5]
-// CHECK: mrs      x29, trcvmidcvr6           // encoding: [0x3d,0x3c,0x31,0xd5]
-// CHECK: mrs      x17, trcvmidcvr7           // encoding: [0x31,0x3e,0x31,0xd5]
-// CHECK: mrs      x10, trccidcctlr0          // encoding: [0x4a,0x30,0x31,0xd5]
-// CHECK: mrs      x4, trccidcctlr1           // encoding: [0x44,0x31,0x31,0xd5]
-// CHECK: mrs      x9, trcvmidcctlr0          // encoding: [0x49,0x32,0x31,0xd5]
-// CHECK: mrs      x11, trcvmidcctlr1         // encoding: [0x4b,0x33,0x31,0xd5]
-// CHECK: mrs      x22, trcitctrl             // encoding: [0x96,0x70,0x31,0xd5]
-// CHECK: mrs      x23, trcclaimset           // encoding: [0xd7,0x78,0x31,0xd5]
-// CHECK: mrs      x14, trcclaimclr           // encoding: [0xce,0x79,0x31,0xd5]
+// CHECK: mrs      x8, {{trcstatr|TRCSTATR}}               // encoding: [0x08,0x03,0x31,0xd5]
+// CHECK: mrs      x9, {{trcidr8|TRCIDR8}}                // encoding: [0xc9,0x00,0x31,0xd5]
+// CHECK: mrs      x11, {{trcidr9|TRCIDR9}}               // encoding: [0xcb,0x01,0x31,0xd5]
+// CHECK: mrs      x25, {{trcidr10|TRCIDR10}}              // encoding: [0xd9,0x02,0x31,0xd5]
+// CHECK: mrs      x7, {{trcidr11|TRCIDR11}}               // encoding: [0xc7,0x03,0x31,0xd5]
+// CHECK: mrs      x7, {{trcidr12|TRCIDR12}}               // encoding: [0xc7,0x04,0x31,0xd5]
+// CHECK: mrs      x6, {{trcidr13|TRCIDR13}}               // encoding: [0xc6,0x05,0x31,0xd5]
+// CHECK: mrs      x27, {{trcidr0|TRCIDR0}}               // encoding: [0xfb,0x08,0x31,0xd5]
+// CHECK: mrs      x29, {{trcidr1|TRCIDR1}}               // encoding: [0xfd,0x09,0x31,0xd5]
+// CHECK: mrs      x4, {{trcidr2|TRCIDR2}}                // encoding: [0xe4,0x0a,0x31,0xd5]
+// CHECK: mrs      x8, {{trcidr3|TRCIDR3}}                // encoding: [0xe8,0x0b,0x31,0xd5]
+// CHECK: mrs      x15, {{trcidr4|TRCIDR4}}               // encoding: [0xef,0x0c,0x31,0xd5]
+// CHECK: mrs      x20, {{trcidr5|TRCIDR5}}               // encoding: [0xf4,0x0d,0x31,0xd5]
+// CHECK: mrs      x6, {{trcidr6|TRCIDR6}}                // encoding: [0xe6,0x0e,0x31,0xd5]
+// CHECK: mrs      x6, {{trcidr7|TRCIDR7}}                // encoding: [0xe6,0x0f,0x31,0xd5]
+// CHECK: mrs      x24, {{trcoslsr|TRCOSLSR}}              // encoding: [0x98,0x11,0x31,0xd5]
+// CHECK: mrs      x18, {{trcpdsr|TRCPDSR}}               // encoding: [0x92,0x15,0x31,0xd5]
+// CHECK: mrs      x28, {{trcdevaff0|TRCDEVAFF0}}            // encoding: [0xdc,0x7a,0x31,0xd5]
+// CHECK: mrs      x5, {{trcdevaff1|TRCDEVAFF1}}             // encoding: [0xc5,0x7b,0x31,0xd5]
+// CHECK: mrs      x5, {{trclsr|TRCLSR}}                 // encoding: [0xc5,0x7d,0x31,0xd5]
+// CHECK: mrs      x11, {{trcauthstatus|TRCAUTHSTATUS}}         // encoding: [0xcb,0x7e,0x31,0xd5]
+// CHECK: mrs      x13, {{trcdevarch|TRCDEVARCH}}            // encoding: [0xcd,0x7f,0x31,0xd5]
+// CHECK: mrs      x18, {{trcdevid|TRCDEVID}}              // encoding: [0xf2,0x72,0x31,0xd5]
+// CHECK: mrs      x22, {{trcdevtype|TRCDEVTYPE}}            // encoding: [0xf6,0x73,0x31,0xd5]
+// CHECK: mrs      x14, {{trcpidr4|TRCPIDR4}}              // encoding: [0xee,0x74,0x31,0xd5]
+// CHECK: mrs      x5, {{trcpidr5|TRCPIDR5}}               // encoding: [0xe5,0x75,0x31,0xd5]
+// CHECK: mrs      x5, {{trcpidr6|TRCPIDR6}}               // encoding: [0xe5,0x76,0x31,0xd5]
+// CHECK: mrs      x9, {{trcpidr7|TRCPIDR7}}               // encoding: [0xe9,0x77,0x31,0xd5]
+// CHECK: mrs      x15, {{trcpidr0|TRCPIDR0}}              // encoding: [0xef,0x78,0x31,0xd5]
+// CHECK: mrs      x6, {{trcpidr1|TRCPIDR1}}               // encoding: [0xe6,0x79,0x31,0xd5]
+// CHECK: mrs      x11, {{trcpidr2|TRCPIDR2}}              // encoding: [0xeb,0x7a,0x31,0xd5]
+// CHECK: mrs      x20, {{trcpidr3|TRCPIDR3}}              // encoding: [0xf4,0x7b,0x31,0xd5]
+// CHECK: mrs      x17, {{trccidr0|TRCCIDR0}}              // encoding: [0xf1,0x7c,0x31,0xd5]
+// CHECK: mrs      x2, {{trccidr1|TRCCIDR1}}               // encoding: [0xe2,0x7d,0x31,0xd5]
+// CHECK: mrs      x20, {{trccidr2|TRCCIDR2}}              // encoding: [0xf4,0x7e,0x31,0xd5]
+// CHECK: mrs      x4, {{trccidr3|TRCCIDR3}}               // encoding: [0xe4,0x7f,0x31,0xd5]
+// CHECK: mrs      x11, {{trcprgctlr|TRCPRGCTLR}}            // encoding: [0x0b,0x01,0x31,0xd5]
+// CHECK: mrs      x23, {{trcprocselr|TRCPROCSELR}}           // encoding: [0x17,0x02,0x31,0xd5]
+// CHECK: mrs      x13, {{trcconfigr|TRCCONFIGR}}            // encoding: [0x0d,0x04,0x31,0xd5]
+// CHECK: mrs      x23, {{trcauxctlr|TRCAUXCTLR}}            // encoding: [0x17,0x06,0x31,0xd5]
+// CHECK: mrs      x9, {{trceventctl0r|TRCEVENTCTL0R}}          // encoding: [0x09,0x08,0x31,0xd5]
+// CHECK: mrs      x16, {{trceventctl1r|TRCEVENTCTL1R}}         // encoding: [0x10,0x09,0x31,0xd5]
+// CHECK: mrs      x4, {{trcstallctlr|TRCSTALLCTLR}}           // encoding: [0x04,0x0b,0x31,0xd5]
+// CHECK: mrs      x14, {{trctsctlr|TRCTSCTLR}}             // encoding: [0x0e,0x0c,0x31,0xd5]
+// CHECK: mrs      x24, {{trcsyncpr|TRCSYNCPR}}             // encoding: [0x18,0x0d,0x31,0xd5]
+// CHECK: mrs      x28, {{trcccctlr|TRCCCCTLR}}             // encoding: [0x1c,0x0e,0x31,0xd5]
+// CHECK: mrs      x15, {{trcbbctlr|TRCBBCTLR}}             // encoding: [0x0f,0x0f,0x31,0xd5]
+// CHECK: mrs      x1, {{trctraceidr|TRCTRACEIDR}}            // encoding: [0x21,0x00,0x31,0xd5]
+// CHECK: mrs      x20, {{trcqctlr|TRCQCTLR}}              // encoding: [0x34,0x01,0x31,0xd5]
+// CHECK: mrs      x2, {{trcvictlr|TRCVICTLR}}              // encoding: [0x42,0x00,0x31,0xd5]
+// CHECK: mrs      x12, {{trcviiectlr|TRCVIIECTLR}}           // encoding: [0x4c,0x01,0x31,0xd5]
+// CHECK: mrs      x16, {{trcvissctlr|TRCVISSCTLR}}           // encoding: [0x50,0x02,0x31,0xd5]
+// CHECK: mrs      x8, {{trcvipcssctlr|TRCVIPCSSCTLR}}          // encoding: [0x48,0x03,0x31,0xd5]
+// CHECK: mrs      x27, {{trcvdctlr|TRCVDCTLR}}             // encoding: [0x5b,0x08,0x31,0xd5]
+// CHECK: mrs      x9, {{trcvdsacctlr|TRCVDSACCTLR}}           // encoding: [0x49,0x09,0x31,0xd5]
+// CHECK: mrs      x0, {{trcvdarcctlr|TRCVDARCCTLR}}           // encoding: [0x40,0x0a,0x31,0xd5]
+// CHECK: mrs      x13, {{trcseqevr0|TRCSEQEVR0}}            // encoding: [0x8d,0x00,0x31,0xd5]
+// CHECK: mrs      x11, {{trcseqevr1|TRCSEQEVR1}}            // encoding: [0x8b,0x01,0x31,0xd5]
+// CHECK: mrs      x26, {{trcseqevr2|TRCSEQEVR2}}            // encoding: [0x9a,0x02,0x31,0xd5]
+// CHECK: mrs      x14, {{trcseqrstevr|TRCSEQRSTEVR}}          // encoding: [0x8e,0x06,0x31,0xd5]
+// CHECK: mrs      x4, {{trcseqstr|TRCSEQSTR}}              // encoding: [0x84,0x07,0x31,0xd5]
+// CHECK: mrs      x17, {{trcextinselr|TRCEXTINSELR}}          // encoding: [0x91,0x08,0x31,0xd5]
+// CHECK: mrs      x21, {{trccntrldvr0|TRCCNTRLDVR0}}          // encoding: [0xb5,0x00,0x31,0xd5]
+// CHECK: mrs      x10, {{trccntrldvr1|TRCCNTRLDVR1}}          // encoding: [0xaa,0x01,0x31,0xd5]
+// CHECK: mrs      x20, {{trccntrldvr2|TRCCNTRLDVR2}}          // encoding: [0xb4,0x02,0x31,0xd5]
+// CHECK: mrs      x5, {{trccntrldvr3|TRCCNTRLDVR3}}           // encoding: [0xa5,0x03,0x31,0xd5]
+// CHECK: mrs      x17, {{trccntctlr0|TRCCNTCTLR0}}           // encoding: [0xb1,0x04,0x31,0xd5]
+// CHECK: mrs      x1, {{trccntctlr1|TRCCNTCTLR1}}            // encoding: [0xa1,0x05,0x31,0xd5]
+// CHECK: mrs      x17, {{trccntctlr2|TRCCNTCTLR2}}           // encoding: [0xb1,0x06,0x31,0xd5]
+// CHECK: mrs      x6, {{trccntctlr3|TRCCNTCTLR3}}            // encoding: [0xa6,0x07,0x31,0xd5]
+// CHECK: mrs      x28, {{trccntvr0|TRCCNTVR0}}             // encoding: [0xbc,0x08,0x31,0xd5]
+// CHECK: mrs      x23, {{trccntvr1|TRCCNTVR1}}             // encoding: [0xb7,0x09,0x31,0xd5]
+// CHECK: mrs      x9, {{trccntvr2|TRCCNTVR2}}              // encoding: [0xa9,0x0a,0x31,0xd5]
+// CHECK: mrs      x6, {{trccntvr3|TRCCNTVR3}}              // encoding: [0xa6,0x0b,0x31,0xd5]
+// CHECK: mrs      x24, {{trcimspec0|TRCIMSPEC0}}            // encoding: [0xf8,0x00,0x31,0xd5]
+// CHECK: mrs      x24, {{trcimspec1|TRCIMSPEC1}}            // encoding: [0xf8,0x01,0x31,0xd5]
+// CHECK: mrs      x15, {{trcimspec2|TRCIMSPEC2}}            // encoding: [0xef,0x02,0x31,0xd5]
+// CHECK: mrs      x10, {{trcimspec3|TRCIMSPEC3}}            // encoding: [0xea,0x03,0x31,0xd5]
+// CHECK: mrs      x29, {{trcimspec4|TRCIMSPEC4}}            // encoding: [0xfd,0x04,0x31,0xd5]
+// CHECK: mrs      x18, {{trcimspec5|TRCIMSPEC5}}            // encoding: [0xf2,0x05,0x31,0xd5]
+// CHECK: mrs      x29, {{trcimspec6|TRCIMSPEC6}}            // encoding: [0xfd,0x06,0x31,0xd5]
+// CHECK: mrs      x2, {{trcimspec7|TRCIMSPEC7}}             // encoding: [0xe2,0x07,0x31,0xd5]
+// CHECK: mrs      x8, {{trcrsctlr2|TRCRSCTLR2}}             // encoding: [0x08,0x12,0x31,0xd5]
+// CHECK: mrs      x0, {{trcrsctlr3|TRCRSCTLR3}}             // encoding: [0x00,0x13,0x31,0xd5]
+// CHECK: mrs      x12, {{trcrsctlr4|TRCRSCTLR4}}            // encoding: [0x0c,0x14,0x31,0xd5]
+// CHECK: mrs      x26, {{trcrsctlr5|TRCRSCTLR5}}            // encoding: [0x1a,0x15,0x31,0xd5]
+// CHECK: mrs      x29, {{trcrsctlr6|TRCRSCTLR6}}            // encoding: [0x1d,0x16,0x31,0xd5]
+// CHECK: mrs      x17, {{trcrsctlr7|TRCRSCTLR7}}            // encoding: [0x11,0x17,0x31,0xd5]
+// CHECK: mrs      x0, {{trcrsctlr8|TRCRSCTLR8}}             // encoding: [0x00,0x18,0x31,0xd5]
+// CHECK: mrs      x1, {{trcrsctlr9|TRCRSCTLR9}}             // encoding: [0x01,0x19,0x31,0xd5]
+// CHECK: mrs      x17, {{trcrsctlr10|TRCRSCTLR10}}           // encoding: [0x11,0x1a,0x31,0xd5]
+// CHECK: mrs      x21, {{trcrsctlr11|TRCRSCTLR11}}           // encoding: [0x15,0x1b,0x31,0xd5]
+// CHECK: mrs      x1, {{trcrsctlr12|TRCRSCTLR12}}            // encoding: [0x01,0x1c,0x31,0xd5]
+// CHECK: mrs      x8, {{trcrsctlr13|TRCRSCTLR13}}            // encoding: [0x08,0x1d,0x31,0xd5]
+// CHECK: mrs      x24, {{trcrsctlr14|TRCRSCTLR14}}           // encoding: [0x18,0x1e,0x31,0xd5]
+// CHECK: mrs      x0, {{trcrsctlr15|TRCRSCTLR15}}            // encoding: [0x00,0x1f,0x31,0xd5]
+// CHECK: mrs      x2, {{trcrsctlr16|TRCRSCTLR16}}            // encoding: [0x22,0x10,0x31,0xd5]
+// CHECK: mrs      x29, {{trcrsctlr17|TRCRSCTLR17}}           // encoding: [0x3d,0x11,0x31,0xd5]
+// CHECK: mrs      x22, {{trcrsctlr18|TRCRSCTLR18}}           // encoding: [0x36,0x12,0x31,0xd5]
+// CHECK: mrs      x6, {{trcrsctlr19|TRCRSCTLR19}}            // encoding: [0x26,0x13,0x31,0xd5]
+// CHECK: mrs      x26, {{trcrsctlr20|TRCRSCTLR20}}           // encoding: [0x3a,0x14,0x31,0xd5]
+// CHECK: mrs      x26, {{trcrsctlr21|TRCRSCTLR21}}           // encoding: [0x3a,0x15,0x31,0xd5]
+// CHECK: mrs      x4, {{trcrsctlr22|TRCRSCTLR22}}            // encoding: [0x24,0x16,0x31,0xd5]
+// CHECK: mrs      x12, {{trcrsctlr23|TRCRSCTLR23}}           // encoding: [0x2c,0x17,0x31,0xd5]
+// CHECK: mrs      x1, {{trcrsctlr24|TRCRSCTLR24}}            // encoding: [0x21,0x18,0x31,0xd5]
+// CHECK: mrs      x0, {{trcrsctlr25|TRCRSCTLR25}}            // encoding: [0x20,0x19,0x31,0xd5]
+// CHECK: mrs      x17, {{trcrsctlr26|TRCRSCTLR26}}           // encoding: [0x31,0x1a,0x31,0xd5]
+// CHECK: mrs      x8, {{trcrsctlr27|TRCRSCTLR27}}            // encoding: [0x28,0x1b,0x31,0xd5]
+// CHECK: mrs      x10, {{trcrsctlr28|TRCRSCTLR28}}           // encoding: [0x2a,0x1c,0x31,0xd5]
+// CHECK: mrs      x25, {{trcrsctlr29|TRCRSCTLR29}}           // encoding: [0x39,0x1d,0x31,0xd5]
+// CHECK: mrs      x12, {{trcrsctlr30|TRCRSCTLR30}}           // encoding: [0x2c,0x1e,0x31,0xd5]
+// CHECK: mrs      x11, {{trcrsctlr31|TRCRSCTLR31}}           // encoding: [0x2b,0x1f,0x31,0xd5]
+// CHECK: mrs      x18, {{trcssccr0|TRCSSCCR0}}             // encoding: [0x52,0x10,0x31,0xd5]
+// CHECK: mrs      x12, {{trcssccr1|TRCSSCCR1}}             // encoding: [0x4c,0x11,0x31,0xd5]
+// CHECK: mrs      x3, {{trcssccr2|TRCSSCCR2}}              // encoding: [0x43,0x12,0x31,0xd5]
+// CHECK: mrs      x2, {{trcssccr3|TRCSSCCR3}}              // encoding: [0x42,0x13,0x31,0xd5]
+// CHECK: mrs      x21, {{trcssccr4|TRCSSCCR4}}             // encoding: [0x55,0x14,0x31,0xd5]
+// CHECK: mrs      x10, {{trcssccr5|TRCSSCCR5}}             // encoding: [0x4a,0x15,0x31,0xd5]
+// CHECK: mrs      x22, {{trcssccr6|TRCSSCCR6}}             // encoding: [0x56,0x16,0x31,0xd5]
+// CHECK: mrs      x23, {{trcssccr7|TRCSSCCR7}}             // encoding: [0x57,0x17,0x31,0xd5]
+// CHECK: mrs      x23, {{trcsscsr0|TRCSSCSR0}}             // encoding: [0x57,0x18,0x31,0xd5]
+// CHECK: mrs      x19, {{trcsscsr1|TRCSSCSR1}}             // encoding: [0x53,0x19,0x31,0xd5]
+// CHECK: mrs      x25, {{trcsscsr2|TRCSSCSR2}}             // encoding: [0x59,0x1a,0x31,0xd5]
+// CHECK: mrs      x17, {{trcsscsr3|TRCSSCSR3}}             // encoding: [0x51,0x1b,0x31,0xd5]
+// CHECK: mrs      x19, {{trcsscsr4|TRCSSCSR4}}             // encoding: [0x53,0x1c,0x31,0xd5]
+// CHECK: mrs      x11, {{trcsscsr5|TRCSSCSR5}}             // encoding: [0x4b,0x1d,0x31,0xd5]
+// CHECK: mrs      x5, {{trcsscsr6|TRCSSCSR6}}              // encoding: [0x45,0x1e,0x31,0xd5]
+// CHECK: mrs      x9, {{trcsscsr7|TRCSSCSR7}}              // encoding: [0x49,0x1f,0x31,0xd5]
+// CHECK: mrs      x1, {{trcsspcicr0|TRCSSPCICR0}}            // encoding: [0x61,0x10,0x31,0xd5]
+// CHECK: mrs      x12, {{trcsspcicr1|TRCSSPCICR1}}           // encoding: [0x6c,0x11,0x31,0xd5]
+// CHECK: mrs      x21, {{trcsspcicr2|TRCSSPCICR2}}           // encoding: [0x75,0x12,0x31,0xd5]
+// CHECK: mrs      x11, {{trcsspcicr3|TRCSSPCICR3}}           // encoding: [0x6b,0x13,0x31,0xd5]
+// CHECK: mrs      x3, {{trcsspcicr4|TRCSSPCICR4}}            // encoding: [0x63,0x14,0x31,0xd5]
+// CHECK: mrs      x9, {{trcsspcicr5|TRCSSPCICR5}}            // encoding: [0x69,0x15,0x31,0xd5]
+// CHECK: mrs      x5, {{trcsspcicr6|TRCSSPCICR6}}            // encoding: [0x65,0x16,0x31,0xd5]
+// CHECK: mrs      x2, {{trcsspcicr7|TRCSSPCICR7}}            // encoding: [0x62,0x17,0x31,0xd5]
+// CHECK: mrs      x26, {{trcpdcr|TRCPDCR}}               // encoding: [0x9a,0x14,0x31,0xd5]
+// CHECK: mrs      x8, {{trcacvr0|TRCACVR0}}               // encoding: [0x08,0x20,0x31,0xd5]
+// CHECK: mrs      x15, {{trcacvr1|TRCACVR1}}              // encoding: [0x0f,0x22,0x31,0xd5]
+// CHECK: mrs      x19, {{trcacvr2|TRCACVR2}}              // encoding: [0x13,0x24,0x31,0xd5]
+// CHECK: mrs      x8, {{trcacvr3|TRCACVR3}}               // encoding: [0x08,0x26,0x31,0xd5]
+// CHECK: mrs      x28, {{trcacvr4|TRCACVR4}}              // encoding: [0x1c,0x28,0x31,0xd5]
+// CHECK: mrs      x3, {{trcacvr5|TRCACVR5}}               // encoding: [0x03,0x2a,0x31,0xd5]
+// CHECK: mrs      x25, {{trcacvr6|TRCACVR6}}              // encoding: [0x19,0x2c,0x31,0xd5]
+// CHECK: mrs      x24, {{trcacvr7|TRCACVR7}}              // encoding: [0x18,0x2e,0x31,0xd5]
+// CHECK: mrs      x6, {{trcacvr8|TRCACVR8}}               // encoding: [0x26,0x20,0x31,0xd5]
+// CHECK: mrs      x3, {{trcacvr9|TRCACVR9}}               // encoding: [0x23,0x22,0x31,0xd5]
+// CHECK: mrs      x24, {{trcacvr10|TRCACVR10}}             // encoding: [0x38,0x24,0x31,0xd5]
+// CHECK: mrs      x3, {{trcacvr11|TRCACVR11}}              // encoding: [0x23,0x26,0x31,0xd5]
+// CHECK: mrs      x12, {{trcacvr12|TRCACVR12}}             // encoding: [0x2c,0x28,0x31,0xd5]
+// CHECK: mrs      x9, {{trcacvr13|TRCACVR13}}              // encoding: [0x29,0x2a,0x31,0xd5]
+// CHECK: mrs      x14, {{trcacvr14|TRCACVR14}}             // encoding: [0x2e,0x2c,0x31,0xd5]
+// CHECK: mrs      x3, {{trcacvr15|TRCACVR15}}              // encoding: [0x23,0x2e,0x31,0xd5]
+// CHECK: mrs      x21, {{trcacatr0|TRCACATR0}}             // encoding: [0x55,0x20,0x31,0xd5]
+// CHECK: mrs      x26, {{trcacatr1|TRCACATR1}}             // encoding: [0x5a,0x22,0x31,0xd5]
+// CHECK: mrs      x8, {{trcacatr2|TRCACATR2}}              // encoding: [0x48,0x24,0x31,0xd5]
+// CHECK: mrs      x22, {{trcacatr3|TRCACATR3}}             // encoding: [0x56,0x26,0x31,0xd5]
+// CHECK: mrs      x6, {{trcacatr4|TRCACATR4}}              // encoding: [0x46,0x28,0x31,0xd5]
+// CHECK: mrs      x29, {{trcacatr5|TRCACATR5}}             // encoding: [0x5d,0x2a,0x31,0xd5]
+// CHECK: mrs      x5, {{trcacatr6|TRCACATR6}}              // encoding: [0x45,0x2c,0x31,0xd5]
+// CHECK: mrs      x18, {{trcacatr7|TRCACATR7}}             // encoding: [0x52,0x2e,0x31,0xd5]
+// CHECK: mrs      x2, {{trcacatr8|TRCACATR8}}              // encoding: [0x62,0x20,0x31,0xd5]
+// CHECK: mrs      x19, {{trcacatr9|TRCACATR9}}             // encoding: [0x73,0x22,0x31,0xd5]
+// CHECK: mrs      x13, {{trcacatr10|TRCACATR10}}            // encoding: [0x6d,0x24,0x31,0xd5]
+// CHECK: mrs      x25, {{trcacatr11|TRCACATR11}}            // encoding: [0x79,0x26,0x31,0xd5]
+// CHECK: mrs      x18, {{trcacatr12|TRCACATR12}}            // encoding: [0x72,0x28,0x31,0xd5]
+// CHECK: mrs      x29, {{trcacatr13|TRCACATR13}}            // encoding: [0x7d,0x2a,0x31,0xd5]
+// CHECK: mrs      x9, {{trcacatr14|TRCACATR14}}             // encoding: [0x69,0x2c,0x31,0xd5]
+// CHECK: mrs      x18, {{trcacatr15|TRCACATR15}}            // encoding: [0x72,0x2e,0x31,0xd5]
+// CHECK: mrs      x29, {{trcdvcvr0|TRCDVCVR0}}             // encoding: [0x9d,0x20,0x31,0xd5]
+// CHECK: mrs      x15, {{trcdvcvr1|TRCDVCVR1}}             // encoding: [0x8f,0x24,0x31,0xd5]
+// CHECK: mrs      x15, {{trcdvcvr2|TRCDVCVR2}}             // encoding: [0x8f,0x28,0x31,0xd5]
+// CHECK: mrs      x15, {{trcdvcvr3|TRCDVCVR3}}             // encoding: [0x8f,0x2c,0x31,0xd5]
+// CHECK: mrs      x19, {{trcdvcvr4|TRCDVCVR4}}             // encoding: [0xb3,0x20,0x31,0xd5]
+// CHECK: mrs      x22, {{trcdvcvr5|TRCDVCVR5}}             // encoding: [0xb6,0x24,0x31,0xd5]
+// CHECK: mrs      x27, {{trcdvcvr6|TRCDVCVR6}}             // encoding: [0xbb,0x28,0x31,0xd5]
+// CHECK: mrs      x1, {{trcdvcvr7|TRCDVCVR7}}              // encoding: [0xa1,0x2c,0x31,0xd5]
+// CHECK: mrs      x29, {{trcdvcmr0|TRCDVCMR0}}             // encoding: [0xdd,0x20,0x31,0xd5]
+// CHECK: mrs      x9, {{trcdvcmr1|TRCDVCMR1}}              // encoding: [0xc9,0x24,0x31,0xd5]
+// CHECK: mrs      x1, {{trcdvcmr2|TRCDVCMR2}}              // encoding: [0xc1,0x28,0x31,0xd5]
+// CHECK: mrs      x2, {{trcdvcmr3|TRCDVCMR3}}              // encoding: [0xc2,0x2c,0x31,0xd5]
+// CHECK: mrs      x5, {{trcdvcmr4|TRCDVCMR4}}              // encoding: [0xe5,0x20,0x31,0xd5]
+// CHECK: mrs      x21, {{trcdvcmr5|TRCDVCMR5}}             // encoding: [0xf5,0x24,0x31,0xd5]
+// CHECK: mrs      x5, {{trcdvcmr6|TRCDVCMR6}}              // encoding: [0xe5,0x28,0x31,0xd5]
+// CHECK: mrs      x1, {{trcdvcmr7|TRCDVCMR7}}              // encoding: [0xe1,0x2c,0x31,0xd5]
+// CHECK: mrs      x21, {{trccidcvr0|TRCCIDCVR0}}            // encoding: [0x15,0x30,0x31,0xd5]
+// CHECK: mrs      x24, {{trccidcvr1|TRCCIDCVR1}}            // encoding: [0x18,0x32,0x31,0xd5]
+// CHECK: mrs      x24, {{trccidcvr2|TRCCIDCVR2}}            // encoding: [0x18,0x34,0x31,0xd5]
+// CHECK: mrs      x12, {{trccidcvr3|TRCCIDCVR3}}            // encoding: [0x0c,0x36,0x31,0xd5]
+// CHECK: mrs      x10, {{trccidcvr4|TRCCIDCVR4}}            // encoding: [0x0a,0x38,0x31,0xd5]
+// CHECK: mrs      x9, {{trccidcvr5|TRCCIDCVR5}}             // encoding: [0x09,0x3a,0x31,0xd5]
+// CHECK: mrs      x6, {{trccidcvr6|TRCCIDCVR6}}             // encoding: [0x06,0x3c,0x31,0xd5]
+// CHECK: mrs      x20, {{trccidcvr7|TRCCIDCVR7}}            // encoding: [0x14,0x3e,0x31,0xd5]
+// CHECK: mrs      x20, {{trcvmidcvr0|TRCVMIDCVR0}}           // encoding: [0x34,0x30,0x31,0xd5]
+// CHECK: mrs      x20, {{trcvmidcvr1|TRCVMIDCVR1}}           // encoding: [0x34,0x32,0x31,0xd5]
+// CHECK: mrs      x26, {{trcvmidcvr2|TRCVMIDCVR2}}           // encoding: [0x3a,0x34,0x31,0xd5]
+// CHECK: mrs      x1, {{trcvmidcvr3|TRCVMIDCVR3}}            // encoding: [0x21,0x36,0x31,0xd5]
+// CHECK: mrs      x14, {{trcvmidcvr4|TRCVMIDCVR4}}           // encoding: [0x2e,0x38,0x31,0xd5]
+// CHECK: mrs      x27, {{trcvmidcvr5|TRCVMIDCVR5}}           // encoding: [0x3b,0x3a,0x31,0xd5]
+// CHECK: mrs      x29, {{trcvmidcvr6|TRCVMIDCVR6}}           // encoding: [0x3d,0x3c,0x31,0xd5]
+// CHECK: mrs      x17, {{trcvmidcvr7|TRCVMIDCVR7}}           // encoding: [0x31,0x3e,0x31,0xd5]
+// CHECK: mrs      x10, {{trccidcctlr0|TRCCIDCCTLR0}}          // encoding: [0x4a,0x30,0x31,0xd5]
+// CHECK: mrs      x4, {{trccidcctlr1|TRCCIDCCTLR1}}           // encoding: [0x44,0x31,0x31,0xd5]
+// CHECK: mrs      x9, {{trcvmidcctlr0|TRCVMIDCCTLR0}}          // encoding: [0x49,0x32,0x31,0xd5]
+// CHECK: mrs      x11, {{trcvmidcctlr1|TRCVMIDCCTLR1}}         // encoding: [0x4b,0x33,0x31,0xd5]
+// CHECK: mrs      x22, {{trcitctrl|TRCITCTRL}}             // encoding: [0x96,0x70,0x31,0xd5]
+// CHECK: mrs      x23, {{trcclaimset|TRCCLAIMSET}}           // encoding: [0xd7,0x78,0x31,0xd5]
+// CHECK: mrs      x14, {{trcclaimclr|TRCCLAIMCLR}}           // encoding: [0xce,0x79,0x31,0xd5]
 
         msr trcoslar, x28
         msr trclar, x14
@@ -590,177 +591,177 @@
         msr trcitctrl, x1
         msr trcclaimset, x7
         msr trcclaimclr, x29
-// CHECK: msr      trcoslar, x28              // encoding: [0x9c,0x10,0x11,0xd5]
-// CHECK: msr      trclar, x14                // encoding: [0xce,0x7c,0x11,0xd5]
-// CHECK: msr      trcprgctlr, x10            // encoding: [0x0a,0x01,0x11,0xd5]
-// CHECK: msr      trcprocselr, x27           // encoding: [0x1b,0x02,0x11,0xd5]
-// CHECK: msr      trcconfigr, x24            // encoding: [0x18,0x04,0x11,0xd5]
-// CHECK: msr      trcauxctlr, x8             // encoding: [0x08,0x06,0x11,0xd5]
-// CHECK: msr      trceventctl0r, x16         // encoding: [0x10,0x08,0x11,0xd5]
-// CHECK: msr      trceventctl1r, x27         // encoding: [0x1b,0x09,0x11,0xd5]
-// CHECK: msr      trcstallctlr, x26          // encoding: [0x1a,0x0b,0x11,0xd5]
-// CHECK: msr      trctsctlr, x0              // encoding: [0x00,0x0c,0x11,0xd5]
-// CHECK: msr      trcsyncpr, x14             // encoding: [0x0e,0x0d,0x11,0xd5]
-// CHECK: msr      trcccctlr, x8              // encoding: [0x08,0x0e,0x11,0xd5]
-// CHECK: msr      trcbbctlr, x6              // encoding: [0x06,0x0f,0x11,0xd5]
-// CHECK: msr      trctraceidr, x23           // encoding: [0x37,0x00,0x11,0xd5]
-// CHECK: msr      trcqctlr, x5               // encoding: [0x25,0x01,0x11,0xd5]
-// CHECK: msr      trcvictlr, x0              // encoding: [0x40,0x00,0x11,0xd5]
-// CHECK: msr      trcviiectlr, x0            // encoding: [0x40,0x01,0x11,0xd5]
-// CHECK: msr      trcvissctlr, x1            // encoding: [0x41,0x02,0x11,0xd5]
-// CHECK: msr      trcvipcssctlr, x0          // encoding: [0x40,0x03,0x11,0xd5]
-// CHECK: msr      trcvdctlr, x7              // encoding: [0x47,0x08,0x11,0xd5]
-// CHECK: msr      trcvdsacctlr, x18          // encoding: [0x52,0x09,0x11,0xd5]
-// CHECK: msr      trcvdarcctlr, x24          // encoding: [0x58,0x0a,0x11,0xd5]
-// CHECK: msr      trcseqevr0, x28            // encoding: [0x9c,0x00,0x11,0xd5]
-// CHECK: msr      trcseqevr1, x21            // encoding: [0x95,0x01,0x11,0xd5]
-// CHECK: msr      trcseqevr2, x16            // encoding: [0x90,0x02,0x11,0xd5]
-// CHECK: msr      trcseqrstevr, x16          // encoding: [0x90,0x06,0x11,0xd5]
-// CHECK: msr      trcseqstr, x25             // encoding: [0x99,0x07,0x11,0xd5]
-// CHECK: msr      trcextinselr, x29          // encoding: [0x9d,0x08,0x11,0xd5]
-// CHECK: msr      trccntrldvr0, x20          // encoding: [0xb4,0x00,0x11,0xd5]
-// CHECK: msr      trccntrldvr1, x20          // encoding: [0xb4,0x01,0x11,0xd5]
-// CHECK: msr      trccntrldvr2, x22          // encoding: [0xb6,0x02,0x11,0xd5]
-// CHECK: msr      trccntrldvr3, x12          // encoding: [0xac,0x03,0x11,0xd5]
-// CHECK: msr      trccntctlr0, x20           // encoding: [0xb4,0x04,0x11,0xd5]
-// CHECK: msr      trccntctlr1, x4            // encoding: [0xa4,0x05,0x11,0xd5]
-// CHECK: msr      trccntctlr2, x8            // encoding: [0xa8,0x06,0x11,0xd5]
-// CHECK: msr      trccntctlr3, x16           // encoding: [0xb0,0x07,0x11,0xd5]
-// CHECK: msr      trccntvr0, x5              // encoding: [0xa5,0x08,0x11,0xd5]
-// CHECK: msr      trccntvr1, x27             // encoding: [0xbb,0x09,0x11,0xd5]
-// CHECK: msr      trccntvr2, x21             // encoding: [0xb5,0x0a,0x11,0xd5]
-// CHECK: msr      trccntvr3, x8              // encoding: [0xa8,0x0b,0x11,0xd5]
-// CHECK: msr      trcimspec0, x6             // encoding: [0xe6,0x00,0x11,0xd5]
-// CHECK: msr      trcimspec1, x27            // encoding: [0xfb,0x01,0x11,0xd5]
-// CHECK: msr      trcimspec2, x23            // encoding: [0xf7,0x02,0x11,0xd5]
-// CHECK: msr      trcimspec3, x15            // encoding: [0xef,0x03,0x11,0xd5]
-// CHECK: msr      trcimspec4, x13            // encoding: [0xed,0x04,0x11,0xd5]
-// CHECK: msr      trcimspec5, x25            // encoding: [0xf9,0x05,0x11,0xd5]
-// CHECK: msr      trcimspec6, x19            // encoding: [0xf3,0x06,0x11,0xd5]
-// CHECK: msr      trcimspec7, x27            // encoding: [0xfb,0x07,0x11,0xd5]
-// CHECK: msr      trcrsctlr2, x4             // encoding: [0x04,0x12,0x11,0xd5]
-// CHECK: msr      trcrsctlr3, x0             // encoding: [0x00,0x13,0x11,0xd5]
-// CHECK: msr      trcrsctlr4, x21            // encoding: [0x15,0x14,0x11,0xd5]
-// CHECK: msr      trcrsctlr5, x8             // encoding: [0x08,0x15,0x11,0xd5]
-// CHECK: msr      trcrsctlr6, x20            // encoding: [0x14,0x16,0x11,0xd5]
-// CHECK: msr      trcrsctlr7, x11            // encoding: [0x0b,0x17,0x11,0xd5]
-// CHECK: msr      trcrsctlr8, x18            // encoding: [0x12,0x18,0x11,0xd5]
-// CHECK: msr      trcrsctlr9, x24            // encoding: [0x18,0x19,0x11,0xd5]
-// CHECK: msr      trcrsctlr10, x15           // encoding: [0x0f,0x1a,0x11,0xd5]
-// CHECK: msr      trcrsctlr11, x21           // encoding: [0x15,0x1b,0x11,0xd5]
-// CHECK: msr      trcrsctlr12, x4            // encoding: [0x04,0x1c,0x11,0xd5]
-// CHECK: msr      trcrsctlr13, x28           // encoding: [0x1c,0x1d,0x11,0xd5]
-// CHECK: msr      trcrsctlr14, x3            // encoding: [0x03,0x1e,0x11,0xd5]
-// CHECK: msr      trcrsctlr15, x20           // encoding: [0x14,0x1f,0x11,0xd5]
-// CHECK: msr      trcrsctlr16, x12           // encoding: [0x2c,0x10,0x11,0xd5]
-// CHECK: msr      trcrsctlr17, x17           // encoding: [0x31,0x11,0x11,0xd5]
-// CHECK: msr      trcrsctlr18, x10           // encoding: [0x2a,0x12,0x11,0xd5]
-// CHECK: msr      trcrsctlr19, x11           // encoding: [0x2b,0x13,0x11,0xd5]
-// CHECK: msr      trcrsctlr20, x3            // encoding: [0x23,0x14,0x11,0xd5]
-// CHECK: msr      trcrsctlr21, x18           // encoding: [0x32,0x15,0x11,0xd5]
-// CHECK: msr      trcrsctlr22, x26           // encoding: [0x3a,0x16,0x11,0xd5]
-// CHECK: msr      trcrsctlr23, x5            // encoding: [0x25,0x17,0x11,0xd5]
-// CHECK: msr      trcrsctlr24, x25           // encoding: [0x39,0x18,0x11,0xd5]
-// CHECK: msr      trcrsctlr25, x5            // encoding: [0x25,0x19,0x11,0xd5]
-// CHECK: msr      trcrsctlr26, x4            // encoding: [0x24,0x1a,0x11,0xd5]
-// CHECK: msr      trcrsctlr27, x20           // encoding: [0x34,0x1b,0x11,0xd5]
-// CHECK: msr      trcrsctlr28, x5            // encoding: [0x25,0x1c,0x11,0xd5]
-// CHECK: msr      trcrsctlr29, x10           // encoding: [0x2a,0x1d,0x11,0xd5]
-// CHECK: msr      trcrsctlr30, x24           // encoding: [0x38,0x1e,0x11,0xd5]
-// CHECK: msr      trcrsctlr31, x20           // encoding: [0x34,0x1f,0x11,0xd5]
-// CHECK: msr      trcssccr0, x23             // encoding: [0x57,0x10,0x11,0xd5]
-// CHECK: msr      trcssccr1, x27             // encoding: [0x5b,0x11,0x11,0xd5]
-// CHECK: msr      trcssccr2, x27             // encoding: [0x5b,0x12,0x11,0xd5]
-// CHECK: msr      trcssccr3, x6              // encoding: [0x46,0x13,0x11,0xd5]
-// CHECK: msr      trcssccr4, x3              // encoding: [0x43,0x14,0x11,0xd5]
-// CHECK: msr      trcssccr5, x12             // encoding: [0x4c,0x15,0x11,0xd5]
-// CHECK: msr      trcssccr6, x7              // encoding: [0x47,0x16,0x11,0xd5]
-// CHECK: msr      trcssccr7, x6              // encoding: [0x46,0x17,0x11,0xd5]
-// CHECK: msr      trcsscsr0, x20             // encoding: [0x54,0x18,0x11,0xd5]
-// CHECK: msr      trcsscsr1, x17             // encoding: [0x51,0x19,0x11,0xd5]
-// CHECK: msr      trcsscsr2, x11             // encoding: [0x4b,0x1a,0x11,0xd5]
-// CHECK: msr      trcsscsr3, x4              // encoding: [0x44,0x1b,0x11,0xd5]
-// CHECK: msr      trcsscsr4, x14             // encoding: [0x4e,0x1c,0x11,0xd5]
-// CHECK: msr      trcsscsr5, x22             // encoding: [0x56,0x1d,0x11,0xd5]
-// CHECK: msr      trcsscsr6, x3              // encoding: [0x43,0x1e,0x11,0xd5]
-// CHECK: msr      trcsscsr7, x11             // encoding: [0x4b,0x1f,0x11,0xd5]
-// CHECK: msr      trcsspcicr0, x2            // encoding: [0x62,0x10,0x11,0xd5]
-// CHECK: msr      trcsspcicr1, x3            // encoding: [0x63,0x11,0x11,0xd5]
-// CHECK: msr      trcsspcicr2, x5            // encoding: [0x65,0x12,0x11,0xd5]
-// CHECK: msr      trcsspcicr3, x7            // encoding: [0x67,0x13,0x11,0xd5]
-// CHECK: msr      trcsspcicr4, x11           // encoding: [0x6b,0x14,0x11,0xd5]
-// CHECK: msr      trcsspcicr5, x13           // encoding: [0x6d,0x15,0x11,0xd5]
-// CHECK: msr      trcsspcicr6, x17           // encoding: [0x71,0x16,0x11,0xd5]
-// CHECK: msr      trcsspcicr7, x23           // encoding: [0x77,0x17,0x11,0xd5]
-// CHECK: msr      trcpdcr, x3                // encoding: [0x83,0x14,0x11,0xd5]
-// CHECK: msr      trcacvr0, x6               // encoding: [0x06,0x20,0x11,0xd5]
-// CHECK: msr      trcacvr1, x20              // encoding: [0x14,0x22,0x11,0xd5]
-// CHECK: msr      trcacvr2, x25              // encoding: [0x19,0x24,0x11,0xd5]
-// CHECK: msr      trcacvr3, x1               // encoding: [0x01,0x26,0x11,0xd5]
-// CHECK: msr      trcacvr4, x28              // encoding: [0x1c,0x28,0x11,0xd5]
-// CHECK: msr      trcacvr5, x15              // encoding: [0x0f,0x2a,0x11,0xd5]
-// CHECK: msr      trcacvr6, x25              // encoding: [0x19,0x2c,0x11,0xd5]
-// CHECK: msr      trcacvr7, x12              // encoding: [0x0c,0x2e,0x11,0xd5]
-// CHECK: msr      trcacvr8, x5               // encoding: [0x25,0x20,0x11,0xd5]
-// CHECK: msr      trcacvr9, x25              // encoding: [0x39,0x22,0x11,0xd5]
-// CHECK: msr      trcacvr10, x13             // encoding: [0x2d,0x24,0x11,0xd5]
-// CHECK: msr      trcacvr11, x10             // encoding: [0x2a,0x26,0x11,0xd5]
-// CHECK: msr      trcacvr12, x19             // encoding: [0x33,0x28,0x11,0xd5]
-// CHECK: msr      trcacvr13, x10             // encoding: [0x2a,0x2a,0x11,0xd5]
-// CHECK: msr      trcacvr14, x19             // encoding: [0x33,0x2c,0x11,0xd5]
-// CHECK: msr      trcacvr15, x2              // encoding: [0x22,0x2e,0x11,0xd5]
-// CHECK: msr      trcacatr0, x15             // encoding: [0x4f,0x20,0x11,0xd5]
-// CHECK: msr      trcacatr1, x13             // encoding: [0x4d,0x22,0x11,0xd5]
-// CHECK: msr      trcacatr2, x8              // encoding: [0x48,0x24,0x11,0xd5]
-// CHECK: msr      trcacatr3, x1              // encoding: [0x41,0x26,0x11,0xd5]
-// CHECK: msr      trcacatr4, x11             // encoding: [0x4b,0x28,0x11,0xd5]
-// CHECK: msr      trcacatr5, x8              // encoding: [0x48,0x2a,0x11,0xd5]
-// CHECK: msr      trcacatr6, x24             // encoding: [0x58,0x2c,0x11,0xd5]
-// CHECK: msr      trcacatr7, x6              // encoding: [0x46,0x2e,0x11,0xd5]
-// CHECK: msr      trcacatr8, x23             // encoding: [0x77,0x20,0x11,0xd5]
-// CHECK: msr      trcacatr9, x5              // encoding: [0x65,0x22,0x11,0xd5]
-// CHECK: msr      trcacatr10, x11            // encoding: [0x6b,0x24,0x11,0xd5]
-// CHECK: msr      trcacatr11, x11            // encoding: [0x6b,0x26,0x11,0xd5]
-// CHECK: msr      trcacatr12, x3             // encoding: [0x63,0x28,0x11,0xd5]
-// CHECK: msr      trcacatr13, x28            // encoding: [0x7c,0x2a,0x11,0xd5]
-// CHECK: msr      trcacatr14, x25            // encoding: [0x79,0x2c,0x11,0xd5]
-// CHECK: msr      trcacatr15, x4             // encoding: [0x64,0x2e,0x11,0xd5]
-// CHECK: msr      trcdvcvr0, x6              // encoding: [0x86,0x20,0x11,0xd5]
-// CHECK: msr      trcdvcvr1, x3              // encoding: [0x83,0x24,0x11,0xd5]
-// CHECK: msr      trcdvcvr2, x5              // encoding: [0x85,0x28,0x11,0xd5]
-// CHECK: msr      trcdvcvr3, x11             // encoding: [0x8b,0x2c,0x11,0xd5]
-// CHECK: msr      trcdvcvr4, x9              // encoding: [0xa9,0x20,0x11,0xd5]
-// CHECK: msr      trcdvcvr5, x14             // encoding: [0xae,0x24,0x11,0xd5]
-// CHECK: msr      trcdvcvr6, x10             // encoding: [0xaa,0x28,0x11,0xd5]
-// CHECK: msr      trcdvcvr7, x12             // encoding: [0xac,0x2c,0x11,0xd5]
-// CHECK: msr      trcdvcmr0, x8              // encoding: [0xc8,0x20,0x11,0xd5]
-// CHECK: msr      trcdvcmr1, x8              // encoding: [0xc8,0x24,0x11,0xd5]
-// CHECK: msr      trcdvcmr2, x22             // encoding: [0xd6,0x28,0x11,0xd5]
-// CHECK: msr      trcdvcmr3, x22             // encoding: [0xd6,0x2c,0x11,0xd5]
-// CHECK: msr      trcdvcmr4, x5              // encoding: [0xe5,0x20,0x11,0xd5]
-// CHECK: msr      trcdvcmr5, x16             // encoding: [0xf0,0x24,0x11,0xd5]
-// CHECK: msr      trcdvcmr6, x27             // encoding: [0xfb,0x28,0x11,0xd5]
-// CHECK: msr      trcdvcmr7, x21             // encoding: [0xf5,0x2c,0x11,0xd5]
-// CHECK: msr      trccidcvr0, x8             // encoding: [0x08,0x30,0x11,0xd5]
-// CHECK: msr      trccidcvr1, x6             // encoding: [0x06,0x32,0x11,0xd5]
-// CHECK: msr      trccidcvr2, x9             // encoding: [0x09,0x34,0x11,0xd5]
-// CHECK: msr      trccidcvr3, x8             // encoding: [0x08,0x36,0x11,0xd5]
-// CHECK: msr      trccidcvr4, x3             // encoding: [0x03,0x38,0x11,0xd5]
-// CHECK: msr      trccidcvr5, x21            // encoding: [0x15,0x3a,0x11,0xd5]
-// CHECK: msr      trccidcvr6, x12            // encoding: [0x0c,0x3c,0x11,0xd5]
-// CHECK: msr      trccidcvr7, x7             // encoding: [0x07,0x3e,0x11,0xd5]
-// CHECK: msr      trcvmidcvr0, x4            // encoding: [0x24,0x30,0x11,0xd5]
-// CHECK: msr      trcvmidcvr1, x3            // encoding: [0x23,0x32,0x11,0xd5]
-// CHECK: msr      trcvmidcvr2, x9            // encoding: [0x29,0x34,0x11,0xd5]
-// CHECK: msr      trcvmidcvr3, x17           // encoding: [0x31,0x36,0x11,0xd5]
-// CHECK: msr      trcvmidcvr4, x14           // encoding: [0x2e,0x38,0x11,0xd5]
-// CHECK: msr      trcvmidcvr5, x12           // encoding: [0x2c,0x3a,0x11,0xd5]
-// CHECK: msr      trcvmidcvr6, x10           // encoding: [0x2a,0x3c,0x11,0xd5]
-// CHECK: msr      trcvmidcvr7, x3            // encoding: [0x23,0x3e,0x11,0xd5]
-// CHECK: msr      trccidcctlr0, x14          // encoding: [0x4e,0x30,0x11,0xd5]
-// CHECK: msr      trccidcctlr1, x22          // encoding: [0x56,0x31,0x11,0xd5]
-// CHECK: msr      trcvmidcctlr0, x8          // encoding: [0x48,0x32,0x11,0xd5]
-// CHECK: msr      trcvmidcctlr1, x15         // encoding: [0x4f,0x33,0x11,0xd5]
-// CHECK: msr      trcitctrl, x1              // encoding: [0x81,0x70,0x11,0xd5]
-// CHECK: msr      trcclaimset, x7            // encoding: [0xc7,0x78,0x11,0xd5]
-// CHECK: msr      trcclaimclr, x29           // encoding: [0xdd,0x79,0x11,0xd5]
+// CHECK: msr      {{trcoslar|TRCOSLAR}}, x28              // encoding: [0x9c,0x10,0x11,0xd5]
+// CHECK: msr      {{trclar|TRCLAR}}, x14                // encoding: [0xce,0x7c,0x11,0xd5]
+// CHECK: msr      {{trcprgctlr|TRCPRGCTLR}}, x10            // encoding: [0x0a,0x01,0x11,0xd5]
+// CHECK: msr      {{trcprocselr|TRCPROCSELR}}, x27           // encoding: [0x1b,0x02,0x11,0xd5]
+// CHECK: msr      {{trcconfigr|TRCCONFIGR}}, x24            // encoding: [0x18,0x04,0x11,0xd5]
+// CHECK: msr      {{trcauxctlr|TRCAUXCTLR}}, x8             // encoding: [0x08,0x06,0x11,0xd5]
+// CHECK: msr      {{trceventctl0r|TRCEVENTCTL0R}}, x16         // encoding: [0x10,0x08,0x11,0xd5]
+// CHECK: msr      {{trceventctl1r|TRCEVENTCTL1R}}, x27         // encoding: [0x1b,0x09,0x11,0xd5]
+// CHECK: msr      {{trcstallctlr|TRCSTALLCTLR}}, x26          // encoding: [0x1a,0x0b,0x11,0xd5]
+// CHECK: msr      {{trctsctlr|TRCTSCTLR}}, x0              // encoding: [0x00,0x0c,0x11,0xd5]
+// CHECK: msr      {{trcsyncpr|TRCSYNCPR}}, x14             // encoding: [0x0e,0x0d,0x11,0xd5]
+// CHECK: msr      {{trcccctlr|TRCCCCTLR}}, x8              // encoding: [0x08,0x0e,0x11,0xd5]
+// CHECK: msr      {{trcbbctlr|TRCBBCTLR}}, x6              // encoding: [0x06,0x0f,0x11,0xd5]
+// CHECK: msr      {{trctraceidr|TRCTRACEIDR}}, x23           // encoding: [0x37,0x00,0x11,0xd5]
+// CHECK: msr      {{trcqctlr|TRCQCTLR}}, x5               // encoding: [0x25,0x01,0x11,0xd5]
+// CHECK: msr      {{trcvictlr|TRCVICTLR}}, x0              // encoding: [0x40,0x00,0x11,0xd5]
+// CHECK: msr      {{trcviiectlr|TRCVIIECTLR}}, x0            // encoding: [0x40,0x01,0x11,0xd5]
+// CHECK: msr      {{trcvissctlr|TRCVISSCTLR}}, x1            // encoding: [0x41,0x02,0x11,0xd5]
+// CHECK: msr      {{trcvipcssctlr|TRCVIPCSSCTLR}}, x0          // encoding: [0x40,0x03,0x11,0xd5]
+// CHECK: msr      {{trcvdctlr|TRCVDCTLR}}, x7              // encoding: [0x47,0x08,0x11,0xd5]
+// CHECK: msr      {{trcvdsacctlr|TRCVDSACCTLR}}, x18          // encoding: [0x52,0x09,0x11,0xd5]
+// CHECK: msr      {{trcvdarcctlr|TRCVDARCCTLR}}, x24          // encoding: [0x58,0x0a,0x11,0xd5]
+// CHECK: msr      {{trcseqevr0|TRCSEQEVR0}}, x28            // encoding: [0x9c,0x00,0x11,0xd5]
+// CHECK: msr      {{trcseqevr1|TRCSEQEVR1}}, x21            // encoding: [0x95,0x01,0x11,0xd5]
+// CHECK: msr      {{trcseqevr2|TRCSEQEVR2}}, x16            // encoding: [0x90,0x02,0x11,0xd5]
+// CHECK: msr      {{trcseqrstevr|TRCSEQRSTEVR}}, x16          // encoding: [0x90,0x06,0x11,0xd5]
+// CHECK: msr      {{trcseqstr|TRCSEQSTR}}, x25             // encoding: [0x99,0x07,0x11,0xd5]
+// CHECK: msr      {{trcextinselr|TRCEXTINSELR}}, x29          // encoding: [0x9d,0x08,0x11,0xd5]
+// CHECK: msr      {{trccntrldvr0|TRCCNTRLDVR0}}, x20          // encoding: [0xb4,0x00,0x11,0xd5]
+// CHECK: msr      {{trccntrldvr1|TRCCNTRLDVR1}}, x20          // encoding: [0xb4,0x01,0x11,0xd5]
+// CHECK: msr      {{trccntrldvr2|TRCCNTRLDVR2}}, x22          // encoding: [0xb6,0x02,0x11,0xd5]
+// CHECK: msr      {{trccntrldvr3|TRCCNTRLDVR3}}, x12          // encoding: [0xac,0x03,0x11,0xd5]
+// CHECK: msr      {{trccntctlr0|TRCCNTCTLR0}}, x20           // encoding: [0xb4,0x04,0x11,0xd5]
+// CHECK: msr      {{trccntctlr1|TRCCNTCTLR1}}, x4            // encoding: [0xa4,0x05,0x11,0xd5]
+// CHECK: msr      {{trccntctlr2|TRCCNTCTLR2}}, x8            // encoding: [0xa8,0x06,0x11,0xd5]
+// CHECK: msr      {{trccntctlr3|TRCCNTCTLR3}}, x16           // encoding: [0xb0,0x07,0x11,0xd5]
+// CHECK: msr      {{trccntvr0|TRCCNTVR0}}, x5              // encoding: [0xa5,0x08,0x11,0xd5]
+// CHECK: msr      {{trccntvr1|TRCCNTVR1}}, x27             // encoding: [0xbb,0x09,0x11,0xd5]
+// CHECK: msr      {{trccntvr2|TRCCNTVR2}}, x21             // encoding: [0xb5,0x0a,0x11,0xd5]
+// CHECK: msr      {{trccntvr3|TRCCNTVR3}}, x8              // encoding: [0xa8,0x0b,0x11,0xd5]
+// CHECK: msr      {{trcimspec0|TRCIMSPEC0}}, x6             // encoding: [0xe6,0x00,0x11,0xd5]
+// CHECK: msr      {{trcimspec1|TRCIMSPEC1}}, x27            // encoding: [0xfb,0x01,0x11,0xd5]
+// CHECK: msr      {{trcimspec2|TRCIMSPEC2}}, x23            // encoding: [0xf7,0x02,0x11,0xd5]
+// CHECK: msr      {{trcimspec3|TRCIMSPEC3}}, x15            // encoding: [0xef,0x03,0x11,0xd5]
+// CHECK: msr      {{trcimspec4|TRCIMSPEC4}}, x13            // encoding: [0xed,0x04,0x11,0xd5]
+// CHECK: msr      {{trcimspec5|TRCIMSPEC5}}, x25            // encoding: [0xf9,0x05,0x11,0xd5]
+// CHECK: msr      {{trcimspec6|TRCIMSPEC6}}, x19            // encoding: [0xf3,0x06,0x11,0xd5]
+// CHECK: msr      {{trcimspec7|TRCIMSPEC7}}, x27            // encoding: [0xfb,0x07,0x11,0xd5]
+// CHECK: msr      {{trcrsctlr2|TRCRSCTLR2}}, x4             // encoding: [0x04,0x12,0x11,0xd5]
+// CHECK: msr      {{trcrsctlr3|TRCRSCTLR3}}, x0             // encoding: [0x00,0x13,0x11,0xd5]
+// CHECK: msr      {{trcrsctlr4|TRCRSCTLR4}}, x21            // encoding: [0x15,0x14,0x11,0xd5]
+// CHECK: msr      {{trcrsctlr5|TRCRSCTLR5}}, x8             // encoding: [0x08,0x15,0x11,0xd5]
+// CHECK: msr      {{trcrsctlr6|TRCRSCTLR6}}, x20            // encoding: [0x14,0x16,0x11,0xd5]
+// CHECK: msr      {{trcrsctlr7|TRCRSCTLR7}}, x11            // encoding: [0x0b,0x17,0x11,0xd5]
+// CHECK: msr      {{trcrsctlr8|TRCRSCTLR8}}, x18            // encoding: [0x12,0x18,0x11,0xd5]
+// CHECK: msr      {{trcrsctlr9|TRCRSCTLR9}}, x24            // encoding: [0x18,0x19,0x11,0xd5]
+// CHECK: msr      {{trcrsctlr10|TRCRSCTLR10}}, x15           // encoding: [0x0f,0x1a,0x11,0xd5]
+// CHECK: msr      {{trcrsctlr11|TRCRSCTLR11}}, x21           // encoding: [0x15,0x1b,0x11,0xd5]
+// CHECK: msr      {{trcrsctlr12|TRCRSCTLR12}}, x4            // encoding: [0x04,0x1c,0x11,0xd5]
+// CHECK: msr      {{trcrsctlr13|TRCRSCTLR13}}, x28           // encoding: [0x1c,0x1d,0x11,0xd5]
+// CHECK: msr      {{trcrsctlr14|TRCRSCTLR14}}, x3            // encoding: [0x03,0x1e,0x11,0xd5]
+// CHECK: msr      {{trcrsctlr15|TRCRSCTLR15}}, x20           // encoding: [0x14,0x1f,0x11,0xd5]
+// CHECK: msr      {{trcrsctlr16|TRCRSCTLR16}}, x12           // encoding: [0x2c,0x10,0x11,0xd5]
+// CHECK: msr      {{trcrsctlr17|TRCRSCTLR17}}, x17           // encoding: [0x31,0x11,0x11,0xd5]
+// CHECK: msr      {{trcrsctlr18|TRCRSCTLR18}}, x10           // encoding: [0x2a,0x12,0x11,0xd5]
+// CHECK: msr      {{trcrsctlr19|TRCRSCTLR19}}, x11           // encoding: [0x2b,0x13,0x11,0xd5]
+// CHECK: msr      {{trcrsctlr20|TRCRSCTLR20}}, x3            // encoding: [0x23,0x14,0x11,0xd5]
+// CHECK: msr      {{trcrsctlr21|TRCRSCTLR21}}, x18           // encoding: [0x32,0x15,0x11,0xd5]
+// CHECK: msr      {{trcrsctlr22|TRCRSCTLR22}}, x26           // encoding: [0x3a,0x16,0x11,0xd5]
+// CHECK: msr      {{trcrsctlr23|TRCRSCTLR23}}, x5            // encoding: [0x25,0x17,0x11,0xd5]
+// CHECK: msr      {{trcrsctlr24|TRCRSCTLR24}}, x25           // encoding: [0x39,0x18,0x11,0xd5]
+// CHECK: msr      {{trcrsctlr25|TRCRSCTLR25}}, x5            // encoding: [0x25,0x19,0x11,0xd5]
+// CHECK: msr      {{trcrsctlr26|TRCRSCTLR26}}, x4            // encoding: [0x24,0x1a,0x11,0xd5]
+// CHECK: msr      {{trcrsctlr27|TRCRSCTLR27}}, x20           // encoding: [0x34,0x1b,0x11,0xd5]
+// CHECK: msr      {{trcrsctlr28|TRCRSCTLR28}}, x5            // encoding: [0x25,0x1c,0x11,0xd5]
+// CHECK: msr      {{trcrsctlr29|TRCRSCTLR29}}, x10           // encoding: [0x2a,0x1d,0x11,0xd5]
+// CHECK: msr      {{trcrsctlr30|TRCRSCTLR30}}, x24           // encoding: [0x38,0x1e,0x11,0xd5]
+// CHECK: msr      {{trcrsctlr31|TRCRSCTLR31}}, x20           // encoding: [0x34,0x1f,0x11,0xd5]
+// CHECK: msr      {{trcssccr0|TRCSSCCR0}}, x23             // encoding: [0x57,0x10,0x11,0xd5]
+// CHECK: msr      {{trcssccr1|TRCSSCCR1}}, x27             // encoding: [0x5b,0x11,0x11,0xd5]
+// CHECK: msr      {{trcssccr2|TRCSSCCR2}}, x27             // encoding: [0x5b,0x12,0x11,0xd5]
+// CHECK: msr      {{trcssccr3|TRCSSCCR3}}, x6              // encoding: [0x46,0x13,0x11,0xd5]
+// CHECK: msr      {{trcssccr4|TRCSSCCR4}}, x3              // encoding: [0x43,0x14,0x11,0xd5]
+// CHECK: msr      {{trcssccr5|TRCSSCCR5}}, x12             // encoding: [0x4c,0x15,0x11,0xd5]
+// CHECK: msr      {{trcssccr6|TRCSSCCR6}}, x7              // encoding: [0x47,0x16,0x11,0xd5]
+// CHECK: msr      {{trcssccr7|TRCSSCCR7}}, x6              // encoding: [0x46,0x17,0x11,0xd5]
+// CHECK: msr      {{trcsscsr0|TRCSSCSR0}}, x20             // encoding: [0x54,0x18,0x11,0xd5]
+// CHECK: msr      {{trcsscsr1|TRCSSCSR1}}, x17             // encoding: [0x51,0x19,0x11,0xd5]
+// CHECK: msr      {{trcsscsr2|TRCSSCSR2}}, x11             // encoding: [0x4b,0x1a,0x11,0xd5]
+// CHECK: msr      {{trcsscsr3|TRCSSCSR3}}, x4              // encoding: [0x44,0x1b,0x11,0xd5]
+// CHECK: msr      {{trcsscsr4|TRCSSCSR4}}, x14             // encoding: [0x4e,0x1c,0x11,0xd5]
+// CHECK: msr      {{trcsscsr5|TRCSSCSR5}}, x22             // encoding: [0x56,0x1d,0x11,0xd5]
+// CHECK: msr      {{trcsscsr6|TRCSSCSR6}}, x3              // encoding: [0x43,0x1e,0x11,0xd5]
+// CHECK: msr      {{trcsscsr7|TRCSSCSR7}}, x11             // encoding: [0x4b,0x1f,0x11,0xd5]
+// CHECK: msr      {{trcsspcicr0|TRCSSPCICR0}}, x2            // encoding: [0x62,0x10,0x11,0xd5]
+// CHECK: msr      {{trcsspcicr1|TRCSSPCICR1}}, x3            // encoding: [0x63,0x11,0x11,0xd5]
+// CHECK: msr      {{trcsspcicr2|TRCSSPCICR2}}, x5            // encoding: [0x65,0x12,0x11,0xd5]
+// CHECK: msr      {{trcsspcicr3|TRCSSPCICR3}}, x7            // encoding: [0x67,0x13,0x11,0xd5]
+// CHECK: msr      {{trcsspcicr4|TRCSSPCICR4}}, x11           // encoding: [0x6b,0x14,0x11,0xd5]
+// CHECK: msr      {{trcsspcicr5|TRCSSPCICR5}}, x13           // encoding: [0x6d,0x15,0x11,0xd5]
+// CHECK: msr      {{trcsspcicr6|TRCSSPCICR6}}, x17           // encoding: [0x71,0x16,0x11,0xd5]
+// CHECK: msr      {{trcsspcicr7|TRCSSPCICR7}}, x23           // encoding: [0x77,0x17,0x11,0xd5]
+// CHECK: msr      {{trcpdcr|TRCPDCR}}, x3                // encoding: [0x83,0x14,0x11,0xd5]
+// CHECK: msr      {{trcacvr0|TRCACVR0}}, x6               // encoding: [0x06,0x20,0x11,0xd5]
+// CHECK: msr      {{trcacvr1|TRCACVR1}}, x20              // encoding: [0x14,0x22,0x11,0xd5]
+// CHECK: msr      {{trcacvr2|TRCACVR2}}, x25              // encoding: [0x19,0x24,0x11,0xd5]
+// CHECK: msr      {{trcacvr3|TRCACVR3}}, x1               // encoding: [0x01,0x26,0x11,0xd5]
+// CHECK: msr      {{trcacvr4|TRCACVR4}}, x28              // encoding: [0x1c,0x28,0x11,0xd5]
+// CHECK: msr      {{trcacvr5|TRCACVR5}}, x15              // encoding: [0x0f,0x2a,0x11,0xd5]
+// CHECK: msr      {{trcacvr6|TRCACVR6}}, x25              // encoding: [0x19,0x2c,0x11,0xd5]
+// CHECK: msr      {{trcacvr7|TRCACVR7}}, x12              // encoding: [0x0c,0x2e,0x11,0xd5]
+// CHECK: msr      {{trcacvr8|TRCACVR8}}, x5               // encoding: [0x25,0x20,0x11,0xd5]
+// CHECK: msr      {{trcacvr9|TRCACVR9}}, x25              // encoding: [0x39,0x22,0x11,0xd5]
+// CHECK: msr      {{trcacvr10|TRCACVR10}}, x13             // encoding: [0x2d,0x24,0x11,0xd5]
+// CHECK: msr      {{trcacvr11|TRCACVR11}}, x10             // encoding: [0x2a,0x26,0x11,0xd5]
+// CHECK: msr      {{trcacvr12|TRCACVR12}}, x19             // encoding: [0x33,0x28,0x11,0xd5]
+// CHECK: msr      {{trcacvr13|TRCACVR13}}, x10             // encoding: [0x2a,0x2a,0x11,0xd5]
+// CHECK: msr      {{trcacvr14|TRCACVR14}}, x19             // encoding: [0x33,0x2c,0x11,0xd5]
+// CHECK: msr      {{trcacvr15|TRCACVR15}}, x2              // encoding: [0x22,0x2e,0x11,0xd5]
+// CHECK: msr      {{trcacatr0|TRCACATR0}}, x15             // encoding: [0x4f,0x20,0x11,0xd5]
+// CHECK: msr      {{trcacatr1|TRCACATR1}}, x13             // encoding: [0x4d,0x22,0x11,0xd5]
+// CHECK: msr      {{trcacatr2|TRCACATR2}}, x8              // encoding: [0x48,0x24,0x11,0xd5]
+// CHECK: msr      {{trcacatr3|TRCACATR3}}, x1              // encoding: [0x41,0x26,0x11,0xd5]
+// CHECK: msr      {{trcacatr4|TRCACATR4}}, x11             // encoding: [0x4b,0x28,0x11,0xd5]
+// CHECK: msr      {{trcacatr5|TRCACATR5}}, x8              // encoding: [0x48,0x2a,0x11,0xd5]
+// CHECK: msr      {{trcacatr6|TRCACATR6}}, x24             // encoding: [0x58,0x2c,0x11,0xd5]
+// CHECK: msr      {{trcacatr7|TRCACATR7}}, x6              // encoding: [0x46,0x2e,0x11,0xd5]
+// CHECK: msr      {{trcacatr8|TRCACATR8}}, x23             // encoding: [0x77,0x20,0x11,0xd5]
+// CHECK: msr      {{trcacatr9|TRCACATR9}}, x5              // encoding: [0x65,0x22,0x11,0xd5]
+// CHECK: msr      {{trcacatr10|TRCACATR10}}, x11            // encoding: [0x6b,0x24,0x11,0xd5]
+// CHECK: msr      {{trcacatr11|TRCACATR11}}, x11            // encoding: [0x6b,0x26,0x11,0xd5]
+// CHECK: msr      {{trcacatr12|TRCACATR12}}, x3             // encoding: [0x63,0x28,0x11,0xd5]
+// CHECK: msr      {{trcacatr13|TRCACATR13}}, x28            // encoding: [0x7c,0x2a,0x11,0xd5]
+// CHECK: msr      {{trcacatr14|TRCACATR14}}, x25            // encoding: [0x79,0x2c,0x11,0xd5]
+// CHECK: msr      {{trcacatr15|TRCACATR15}}, x4             // encoding: [0x64,0x2e,0x11,0xd5]
+// CHECK: msr      {{trcdvcvr0|TRCDVCVR0}}, x6              // encoding: [0x86,0x20,0x11,0xd5]
+// CHECK: msr      {{trcdvcvr1|TRCDVCVR1}}, x3              // encoding: [0x83,0x24,0x11,0xd5]
+// CHECK: msr      {{trcdvcvr2|TRCDVCVR2}}, x5              // encoding: [0x85,0x28,0x11,0xd5]
+// CHECK: msr      {{trcdvcvr3|TRCDVCVR3}}, x11             // encoding: [0x8b,0x2c,0x11,0xd5]
+// CHECK: msr      {{trcdvcvr4|TRCDVCVR4}}, x9              // encoding: [0xa9,0x20,0x11,0xd5]
+// CHECK: msr      {{trcdvcvr5|TRCDVCVR5}}, x14             // encoding: [0xae,0x24,0x11,0xd5]
+// CHECK: msr      {{trcdvcvr6|TRCDVCVR6}}, x10             // encoding: [0xaa,0x28,0x11,0xd5]
+// CHECK: msr      {{trcdvcvr7|TRCDVCVR7}}, x12             // encoding: [0xac,0x2c,0x11,0xd5]
+// CHECK: msr      {{trcdvcmr0|TRCDVCMR0}}, x8              // encoding: [0xc8,0x20,0x11,0xd5]
+// CHECK: msr      {{trcdvcmr1|TRCDVCMR1}}, x8              // encoding: [0xc8,0x24,0x11,0xd5]
+// CHECK: msr      {{trcdvcmr2|TRCDVCMR2}}, x22             // encoding: [0xd6,0x28,0x11,0xd5]
+// CHECK: msr      {{trcdvcmr3|TRCDVCMR3}}, x22             // encoding: [0xd6,0x2c,0x11,0xd5]
+// CHECK: msr      {{trcdvcmr4|TRCDVCMR4}}, x5              // encoding: [0xe5,0x20,0x11,0xd5]
+// CHECK: msr      {{trcdvcmr5|TRCDVCMR5}}, x16             // encoding: [0xf0,0x24,0x11,0xd5]
+// CHECK: msr      {{trcdvcmr6|TRCDVCMR6}}, x27             // encoding: [0xfb,0x28,0x11,0xd5]
+// CHECK: msr      {{trcdvcmr7|TRCDVCMR7}}, x21             // encoding: [0xf5,0x2c,0x11,0xd5]
+// CHECK: msr      {{trccidcvr0|TRCCIDCVR0}}, x8             // encoding: [0x08,0x30,0x11,0xd5]
+// CHECK: msr      {{trccidcvr1|TRCCIDCVR1}}, x6             // encoding: [0x06,0x32,0x11,0xd5]
+// CHECK: msr      {{trccidcvr2|TRCCIDCVR2}}, x9             // encoding: [0x09,0x34,0x11,0xd5]
+// CHECK: msr      {{trccidcvr3|TRCCIDCVR3}}, x8             // encoding: [0x08,0x36,0x11,0xd5]
+// CHECK: msr      {{trccidcvr4|TRCCIDCVR4}}, x3             // encoding: [0x03,0x38,0x11,0xd5]
+// CHECK: msr      {{trccidcvr5|TRCCIDCVR5}}, x21            // encoding: [0x15,0x3a,0x11,0xd5]
+// CHECK: msr      {{trccidcvr6|TRCCIDCVR6}}, x12            // encoding: [0x0c,0x3c,0x11,0xd5]
+// CHECK: msr      {{trccidcvr7|TRCCIDCVR7}}, x7             // encoding: [0x07,0x3e,0x11,0xd5]
+// CHECK: msr      {{trcvmidcvr0|TRCVMIDCVR0}}, x4            // encoding: [0x24,0x30,0x11,0xd5]
+// CHECK: msr      {{trcvmidcvr1|TRCVMIDCVR1}}, x3            // encoding: [0x23,0x32,0x11,0xd5]
+// CHECK: msr      {{trcvmidcvr2|TRCVMIDCVR2}}, x9            // encoding: [0x29,0x34,0x11,0xd5]
+// CHECK: msr      {{trcvmidcvr3|TRCVMIDCVR3}}, x17           // encoding: [0x31,0x36,0x11,0xd5]
+// CHECK: msr      {{trcvmidcvr4|TRCVMIDCVR4}}, x14           // encoding: [0x2e,0x38,0x11,0xd5]
+// CHECK: msr      {{trcvmidcvr5|TRCVMIDCVR5}}, x12           // encoding: [0x2c,0x3a,0x11,0xd5]
+// CHECK: msr      {{trcvmidcvr6|TRCVMIDCVR6}}, x10           // encoding: [0x2a,0x3c,0x11,0xd5]
+// CHECK: msr      {{trcvmidcvr7|TRCVMIDCVR7}}, x3            // encoding: [0x23,0x3e,0x11,0xd5]
+// CHECK: msr      {{trccidcctlr0|TRCCIDCCTLR0}}, x14          // encoding: [0x4e,0x30,0x11,0xd5]
+// CHECK: msr      {{trccidcctlr1|TRCCIDCCTLR1}}, x22          // encoding: [0x56,0x31,0x11,0xd5]
+// CHECK: msr      {{trcvmidcctlr0|TRCVMIDCCTLR0}}, x8          // encoding: [0x48,0x32,0x11,0xd5]
+// CHECK: msr      {{trcvmidcctlr1|TRCVMIDCCTLR1}}, x15         // encoding: [0x4f,0x33,0x11,0xd5]
+// CHECK: msr      {{trcitctrl|TRCITCTRL}}, x1              // encoding: [0x81,0x70,0x11,0xd5]
+// CHECK: msr      {{trcclaimset|TRCCLAIMSET}}, x7            // encoding: [0xc7,0x78,0x11,0xd5]
+// CHECK: msr      {{trcclaimclr|TRCCLAIMCLR}}, x29           // encoding: [0xdd,0x79,0x11,0xd5]
diff --git a/test/MC/ARM/Windows/mov32t-range.s b/test/MC/ARM/Windows/mov32t-range.s
new file mode 100644
index 0000000..fef8ff2
--- /dev/null
+++ b/test/MC/ARM/Windows/mov32t-range.s
@@ -0,0 +1,37 @@
+@ RUN: llvm-mc -triple thumbv7-windows-itanium -filetype obj -o - %s \
+@ RUN:   | llvm-readobj -r - | FileCheck -check-prefix CHECK-RELOCATIONS %s
+
+@ RUN: llvm-mc -triple thumbv7-windows-itanium -filetype obj -o - %s \
+@ RUN:   | llvm-objdump -d - | FileCheck -check-prefix CHECK-ENCODING %s
+
+	.syntax unified
+	.thumb
+	.text
+
+	.def truncation
+		.scl 3
+		.type 32
+	.endef
+	.align 2
+	.thumb_func
+truncation:
+	movw r0, :lower16:.Lerange
+	movt r0, :upper16:.Lerange
+	bx lr
+
+	.section .rdata,"rd"
+.Lbuffer:
+	.zero 65536
+.Lerange:
+	.asciz "-erange"
+
+@ CHECK-RELOCATIONS: Relocations [
+@ CHECK-RELOCATIONS:   .text {
+@ CHECK-RELOCATIONS:     0x0 IMAGE_REL_ARM_MOV32T .rdata
+@ CHECK-RELOCATIONS-NOT: 0x4 IMAGE_REL_ARM_MOV32T .rdata
+@ CHECK-RELOCATIONS:   }
+@ CHECK-RELOCATIONS: ]
+
+@ CHECK-ENCODING:      0: 40 f2 00 00
+@ CHECK-ENCODING-NEXT: 4: c0 f2 01 00
+
diff --git a/test/MC/ARM/arm-thumb-cpus-default.s b/test/MC/ARM/arm-thumb-cpus-default.s
index 636ee3c..d7a1849 100644
--- a/test/MC/ARM/arm-thumb-cpus-default.s
+++ b/test/MC/ARM/arm-thumb-cpus-default.s
@@ -1,9 +1,20 @@
-@ RUN: llvm-mc -show-encoding -arch=arm < %s | FileCheck %s --check-prefix=CHECK-ARM-ONLY
-@ RUN: llvm-mc -show-encoding -triple=armv4t < %s | FileCheck %s --check-prefix=CHECK-ARM-THUMB
-@ RUN: llvm-mc -show-encoding -arch=arm -mcpu=cortex-a15 < %s| FileCheck %s --check-prefix=CHECK-ARM-THUMB
-@ RUN: llvm-mc -show-encoding -arch=arm -mcpu=cortex-m3 < %s | FileCheck %s --check-prefix=CHECK-THUMB-ONLY
-@ RUN: llvm-mc -show-encoding -triple=armv7m < %s | FileCheck %s --check-prefix=CHECK-THUMB-ONLY
-@ RUN: llvm-mc -show-encoding -triple=armv6m < %s | FileCheck %s --check-prefix=CHECK-THUMB-ONLY
+@ RUN: llvm-mc -show-encoding -triple=arm-eabi < %s \
+@ RUN:  | FileCheck %s --check-prefix=CHECK-ARM-ONLY
+
+@ RUN: llvm-mc -show-encoding -triple=armv4t-eabi < %s \
+@ RUN:  | FileCheck %s --check-prefix=CHECK-ARM-THUMB
+
+@ RUN: llvm-mc -show-encoding -triple=arm-eabi -mcpu=cortex-a15 < %s \
+@ RUN:  | FileCheck %s --check-prefix=CHECK-ARM-THUMB
+
+@ RUN: llvm-mc -show-encoding -triple=arm-eabi -mcpu=cortex-m3 < %s \
+@ RUN:  | FileCheck %s --check-prefix=CHECK-THUMB-ONLY
+
+@ RUN: llvm-mc -show-encoding -triple=armv7m-eabi < %s \
+@ RUN:  | FileCheck %s --check-prefix=CHECK-THUMB-ONLY
+
+@ RUN: llvm-mc -show-encoding -triple=armv6m-eabi < %s \
+@ RUN:  | FileCheck %s --check-prefix=CHECK-THUMB-ONLY
 
         @ Make sure the architecture chosen by LLVM defaults to a compatible
         @ ARM/Thumb mode.
diff --git a/test/MC/ARM/arm-thumb-cpus.s b/test/MC/ARM/arm-thumb-cpus.s
index 24be989..9005c7f 100644
--- a/test/MC/ARM/arm-thumb-cpus.s
+++ b/test/MC/ARM/arm-thumb-cpus.s
@@ -1,9 +1,20 @@
-@ RUN: not llvm-mc -show-encoding -arch=arm < %s 2>&1 | FileCheck %s --check-prefix=CHECK-ARM-ONLY
-@ RUN: llvm-mc -show-encoding -triple=armv4t < %s 2>&1| FileCheck %s --check-prefix=CHECK-ARM-THUMB
-@ RUN: llvm-mc -show-encoding -arch=arm -mcpu=cortex-a15 < %s 2>&1| FileCheck %s --check-prefix=CHECK-ARM-THUMB
-@ RUN: not llvm-mc -show-encoding -arch=arm -mcpu=cortex-m3 < %s 2>&1 | FileCheck %s --check-prefix=CHECK-THUMB-ONLY
-@ RUN: not llvm-mc -show-encoding -triple=armv7m < %s 2>&1 | FileCheck %s --check-prefix=CHECK-THUMB-ONLY
-@ RUN: not llvm-mc -show-encoding -triple=armv6m < %s 2>&1 | FileCheck %s --check-prefix=CHECK-THUMB-ONLY
+@ RUN: not llvm-mc -show-encoding -triple=arm-eabi < %s 2>&1 \
+@ RUN:  | FileCheck %s --check-prefix=CHECK-ARM-ONLY
+
+@ RUN: llvm-mc -show-encoding -triple=armv4t < %s 2>&1 \
+@ RUN:  | FileCheck %s --check-prefix=CHECK-ARM-THUMB
+
+@ RUN: llvm-mc -show-encoding -triple=arm-eabi -mcpu=cortex-a15 < %s 2>&1 \
+@ RUN:  | FileCheck %s --check-prefix=CHECK-ARM-THUMB
+
+@ RUN: not llvm-mc -show-encoding -triple=arm-eabi -mcpu=cortex-m3 < %s 2>&1 \
+@ RUN:  | FileCheck %s --check-prefix=CHECK-THUMB-ONLY
+
+@ RUN: not llvm-mc -show-encoding -triple=armv7m-eabi < %s 2>&1 \
+@ RUN:  | FileCheck %s --check-prefix=CHECK-THUMB-ONLY
+
+@ RUN: not llvm-mc -show-encoding -triple=armv6m-eabi < %s 2>&1 \
+@ RUN:  | FileCheck %s --check-prefix=CHECK-THUMB-ONLY
 
         @ Make sure correct diagnostics are given for CPUs without support for
         @ one or other of the execution states.
diff --git a/test/MC/ARM/arm_fixups.s b/test/MC/ARM/arm_fixups.s
index bd6906b..1f56e12 100644
--- a/test/MC/ARM/arm_fixups.s
+++ b/test/MC/ARM/arm_fixups.s
@@ -26,9 +26,9 @@
 @ CHECK-BE: movt	r9, :upper16:_foo       @ encoding: [0xe3,0b0100AAAA,0x90'A',A]
 @ CHECK-BE: @   fixup A - offset: 0, value: _foo, kind: fixup_arm_movt_hi16
 
-    mov r2, fred
+    mov r2, :lower16:fred
 
-@ CHECK: movw  r2, fred                 @ encoding: [A,0x20'A',0b0000AAAA,0xe3]
+@ CHECK: movw  r2, :lower16:fred                 @ encoding: [A,0x20'A',0b0000AAAA,0xe3]
 @ CHECK: @   fixup A - offset: 0, value: fred, kind: fixup_arm_movw_lo16
-@ CHECK-BE: movw  r2, fred                 @ encoding: [0xe3,0b0000AAAA,0x20'A',A]
+@ CHECK-BE: movw  r2, :lower16:fred                 @ encoding: [0xe3,0b0000AAAA,0x20'A',A]
 @ CHECK-BE: @   fixup A - offset: 0, value: fred, kind: fixup_arm_movw_lo16
diff --git a/test/MC/ARM/basic-thumb2-instructions.s b/test/MC/ARM/basic-thumb2-instructions.s
index a8c9cdc..05e0b2b 100644
--- a/test/MC/ARM/basic-thumb2-instructions.s
+++ b/test/MC/ARM/basic-thumb2-instructions.s
@@ -2805,6 +2805,9 @@ _func:
         strd r0, r1, [r2, #-0]
         strd r0, r1, [r2, #-0]!
         strd r0, r1, [r2], #-0
+        strd r0, r1, [r2, #256]
+        strd r0, r1, [r2, #256]!
+        strd r0, r1, [r2], #256
 
 @ CHECK: strd	r3, r5, [r6, #24]       @ encoding: [0xc6,0xe9,0x06,0x35]
 @ CHECK: strd	r3, r5, [r6, #24]!      @ encoding: [0xe6,0xe9,0x06,0x35]
@@ -2815,6 +2818,9 @@ _func:
 @ CHECK: strd   r0, r1, [r2, #-0]       @ encoding: [0x42,0xe9,0x00,0x01]
 @ CHECK: strd   r0, r1, [r2, #-0]!      @ encoding: [0x62,0xe9,0x00,0x01]
 @ CHECK: strd   r0, r1, [r2], #-0       @ encoding: [0x62,0xe8,0x00,0x01]
+@ CHECK: strd	r0, r1, [r2, #256]      @ encoding: [0xc2,0xe9,0x40,0x01]
+@ CHECK: strd	r0, r1, [r2, #256]!     @ encoding: [0xe2,0xe9,0x40,0x01]
+@ CHECK: strd	r0, r1, [r2], #256      @ encoding: [0xe2,0xe8,0x40,0x01]
 
 
 @------------------------------------------------------------------------------
diff --git a/test/MC/ARM/big-endian-arm-fixup.s b/test/MC/ARM/big-endian-arm-fixup.s
new file mode 100644
index 0000000..5fb9cef
--- /dev/null
+++ b/test/MC/ARM/big-endian-arm-fixup.s
@@ -0,0 +1,107 @@
+// RUN: llvm-mc -triple=armeb-eabi -mattr v7,vfp2 -filetype=obj < %s | llvm-objdump -s - | FileCheck %s
+
+	.syntax unified
+	.text
+	.align	2
+	.code 32
+
+@ARM::fixup_arm_condbl
+.section s_condbl,"ax",%progbits
+// CHECK-LABEL: Contents of section s_condbl
+// CHECK: 0000 0b000002
+ 	bleq condbl_label+16
+condbl_label:
+
+@ARM::fixup_arm_uncondbl
+.section s_uncondbl,"ax",%progbits
+// CHECK-LABEL: Contents of section s_uncondbl
+// CHECK: 0000 eb000002
+ 	bl uncond_label+16
+uncond_label:
+
+@ARM::fixup_arm_blx
+.section s_blx,"ax",%progbits
+// CHECK-LABEL: Contents of section s_blx
+// CHECK: 0000 fa000002
+ 	blx blx_label+16
+blx_label:
+
+@ARM::fixup_arm_uncondbranch
+.section s_uncondbranch,"ax",%progbits
+// CHECK-LABEL: Contents of section s_uncondbranch
+// CHECK: 0000 ea000003
+ 	b uncondbranch_label+16
+uncondbranch_label:
+
+@ARM::fixup_arm_condbranch
+.section s_condbranch,"ax",%progbits
+// CHECK-LABEL: Contents of section s_condbranch
+// CHECK: 0000 0a000003
+ 	beq condbranch_label+16
+condbranch_label:
+
+@ARM::fixup_arm_pcrel_10
+.section s_arm_pcrel_10,"ax",%progbits
+// CHECK-LABEL: Contents of section s_arm_pcrel_10
+// CHECK: 0000 ed9f0b03
+ 	vldr d0, arm_pcrel_10_label+16
+arm_pcrel_10_label:
+
+@ARM::fixup_arm_ldst_pcrel_12
+.section s_arm_ldst_pcrel_12,"ax",%progbits
+// CHECK-LABEL: Contents of section s_arm_ldst_pcrel_12
+// CHECK: 0000 e59f000c
+ 	ldr r0, arm_ldst_pcrel_12_label+16
+arm_ldst_pcrel_12_label:
+
+@ARM::fixup_arm_adr_pcrel_12
+.section s_arm_adr_pcrel_12,"ax",%progbits
+// CHECK-LABEL: Contents of section s_arm_adr_pcrel_12
+// CHECK: 0000 e28f0010
+	adr	r0, arm_adr_pcrel_12_label+20
+arm_adr_pcrel_12_label:
+
+@ARM::fixup_arm_adr_pcrel_10_unscaled
+.section s_arm_adr_pcrel_10_unscaled,"ax",%progbits
+// CHECK-LABEL: Contents of section s_arm_adr_pcrel_10_unscaled
+// CHECK: 0000 e1cf01d4
+	ldrd	r0, r1, arm_adr_pcrel_10_unscaled_label+24
+arm_adr_pcrel_10_unscaled_label:
+
+@ARM::fixup_arm_movw_lo16
+.section s_movw,"ax",%progbits
+// CHECK-LABEL: Contents of section s_movw
+// CHECK: 0000 e3000008
+	movw	r0, :lower16:(some_label+8)
+
+@ARM::fixup_arm_movt_hi16
+.section s_movt,"ax",%progbits
+// CHECK-LABEL: Contents of section s_movt
+// CHECK: 0000 e34f0ffc
+	movt	r0, :upper16:GOT-(movt_label)
+movt_label:
+
+@FK_Data_1
+.section s_fk_data_1
+// CHECK-LABEL: Contents of section s_fk_data_1
+// CHECK: 0000 01
+fk_data1_l_label:
+.byte fk_data1_h_label-fk_data1_l_label
+fk_data1_h_label:
+
+@FK_Data_2
+.section s_fk_data_2
+// CHECK-LABEL: Contents of section s_fk_data_2
+// CHECK: 0000 0002
+fk_data2_l_label:
+.short fk_data2_h_label-fk_data2_l_label
+fk_data2_h_label:
+
+@FK_Data_4
+.section s_fk_data_4
+// CHECK-LABEL: Contents of section s_fk_data_4
+// CHECK: 0000 00000004
+fk_data4_l_label:
+.long fk_data4_h_label-fk_data4_l_label
+fk_data4_h_label:
+
diff --git a/test/MC/ARM/big-endian-thumb-fixup.s b/test/MC/ARM/big-endian-thumb-fixup.s
new file mode 100644
index 0000000..5023fca
--- /dev/null
+++ b/test/MC/ARM/big-endian-thumb-fixup.s
@@ -0,0 +1,63 @@
+// RUN: llvm-mc -triple=armeb-eabi -mattr v7,vfp2 -filetype=obj < %s | llvm-objdump -s - | FileCheck %s
+
+	.syntax unified
+	.text
+	.align	2
+	.code 16
+
+@ARM::fixup_arm_thumb_bl
+.section s_thumb_bl,"ax",%progbits
+// CHECK-LABEL: Contents of section s_thumb_bl
+// CHECK: 0000 f000f801
+ 	bl thumb_bl_label
+	nop
+thumb_bl_label:
+
+@ARM::fixup_arm_thumb_blx
+// CHECK-LABEL: Contents of section s_thumb_bl
+// CHECK: 0000 f000e802
+.section s_thumb_blx,"ax",%progbits
+ 	blx thumb_blx_label+8
+thumb_blx_label:
+
+@ARM::fixup_arm_thumb_br
+.section s_thumb_br,"ax",%progbits
+// CHECK-LABEL: Contents of section s_thumb_br
+// CHECK: 0000 e000bf00
+ 	b thumb_br_label
+	nop
+thumb_br_label:
+
+@ARM::fixup_arm_thumb_bcc
+.section s_thumb_bcc,"ax",%progbits
+// CHECK-LABEL: Contents of section s_thumb_bcc
+// CHECK: 0000 d000bf00
+ 	beq thumb_bcc_label
+	nop
+thumb_bcc_label:
+
+@ARM::fixup_arm_thumb_cb
+.section s_thumb_cb,"ax",%progbits
+// CHECK-LABEL: Contents of section s_thumb_cb
+// CHECK: 0000 b100bf00
+ 	cbz r0, thumb_cb_label
+	nop
+thumb_cb_label:
+
+@ARM::fixup_arm_thumb_cp
+.section s_thumb_cp,"ax",%progbits
+// CHECK-LABEL: Contents of section s_thumb_cp
+// CHECK: 0000 4801bf00
+ 	ldr r0, =thumb_cp_label
+	nop
+	nop
+thumb_cp_label:
+
+@ARM::fixup_arm_thumb_adr_pcrel_10
+.section s_thumb_adr_pcrel_10,"ax",%progbits
+// CHECK-LABEL: Contents of section s_thumb_adr_pcrel_10
+// CHECK: 0000 a000bf00
+	adr r0, thumb_adr_pcrel_10_label
+	nop
+thumb_adr_pcrel_10_label:
+
diff --git a/test/MC/ARM/big-endian-thumb2-fixup.s b/test/MC/ARM/big-endian-thumb2-fixup.s
new file mode 100644
index 0000000..4fd5276
--- /dev/null
+++ b/test/MC/ARM/big-endian-thumb2-fixup.s
@@ -0,0 +1,49 @@
+// RUN: llvm-mc -triple=thumbeb-eabi -mattr v7,vfp2 -filetype=obj < %s | llvm-objdump -s - | FileCheck %s
+
+	.syntax unified
+	.text
+	.align	2
+
+@ARM::fixup_t2_movw_lo16
+.section s_movw,"ax",%progbits
+// CHECK-LABEL: Contents of section s_movw
+// CHECK: 0000 f2400008
+	movw	r0, :lower16:(some_label+8)
+
+@ARM::fixup_t2_movt_hi16
+.section s_movt,"ax",%progbits
+// CHECK-LABEL: Contents of section s_movt
+// CHECK: 0000 f6cf70fc
+	movt	r0, :upper16:GOT-(movt_label)
+movt_label:
+
+@ARM::fixup_t2_uncondbranch
+.section s_uncondbranch,"ax",%progbits
+// CHECK-LABEL: Contents of section s_uncondbranch
+// CHECK: 0000 f000b801 bf00
+ 	b.w uncond_label
+	nop
+uncond_label:
+
+@ARM::fixup_t2_condbranch
+.section s_condbranch,"ax",%progbits
+// CHECK-LABEL: Contents of section s_condbranch
+// CHECK: 0000 f0008001 bf00
+ 	beq.w cond_label
+	nop
+cond_label:
+
+@ARM::fixup_t2_ldst_precel_12
+.section s_ldst_precel_12,"ax",%progbits
+ 	ldr r0, ldst_precel_12_label
+	nop
+	nop
+ldst_precel_12_label:
+
+@ARM::fixup_t2_adr_pcrel_12
+.section s_adr_pcrel_12,"ax",%progbits
+ 	adr r0, adr_pcrel_12_label
+	nop
+	nop
+adr_pcrel_12_label:
+
diff --git a/test/MC/ARM/coff-debugging-secrel.ll b/test/MC/ARM/coff-debugging-secrel.ll
new file mode 100644
index 0000000..f37b19e
--- /dev/null
+++ b/test/MC/ARM/coff-debugging-secrel.ll
@@ -0,0 +1,49 @@
+; RUN: llc -mtriple thumbv7--windows-itanium -filetype obj -o - %s \
+; RUN:     | llvm-readobj -r - | FileCheck %s -check-prefix CHECK-ITANIUM
+
+; RUN: llc -mtriple thumbv7--windows-msvc -filetype obj -o - %s \
+; RUN:    | llvm-readobj -r - | FileCheck %s -check-prefix CHECK-MSVC
+
+; ModuleID = '/Users/compnerd/work/llvm/test/MC/ARM/reduced.c'
+target datalayout = "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv7--windows-itanium"
+
+define arm_aapcs_vfpcc void @function() {
+entry:
+  ret void, !dbg !0
+}
+
+!llvm.dbg.cu = !{!7}
+!llvm.module.flags = !{!9, !10}
+
+!0 = metadata !{i32 1, i32 0, metadata !1, null}
+!1 = metadata !{i32 786478, metadata !2, metadata !3, metadata !"function", metadata !"function", metadata !"", i32 1, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void ()* @function, null, null, metadata !6, i32 1} ; [ DW_TAG_subprogram ], [line 1], [def], [function]
+!2 = metadata !{metadata !"/Users/compnerd/work/llvm/test/MC/ARM/reduced.c", metadata !"/Users/compnerd/work/llvm"}
+!3 = metadata !{i32 786473, metadata !2} ; [ DW_TAG_file_type] [/Users/compnerd/work/llvm/test/MC/ARM/reduced.c]
+!4 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ], [line 0, size 0, align 0, offset 0] [from ]
+!5 = metadata !{null}
+!6 = metadata !{}
+!7 = metadata !{i32 786449, metadata !2, i32 12, metadata !"clang version 3.5.0", i1 false, metadata !"", i32 0, metadata !6, metadata !6, metadata !8, metadata !6, metadata !6, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/Users/compnerd/work/llvm/test/MC/ARM/reduced.c] [DW_LANG_C99]
+!8 = metadata !{metadata !1}
+!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!10 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+
+; CHECK-ITANIUM: Relocations [
+; CHECK-ITANIUM:   Section {{.*}} .debug_info {
+; CHECK-ITANIUM:     0x6 IMAGE_REL_ARM_SECREL .debug_abbrev
+; CHECK-ITANIUM:     0xC IMAGE_REL_ARM_SECREL .debug_str
+; CHECK-ITANIUM:     0x12 IMAGE_REL_ARM_SECREL .debug_str
+; CHECK-ITANIUM:     0x16 IMAGE_REL_ARM_SECREL .debug_line
+; CHECK-ITANIUM:   }
+; CHECK-ITANIUM:   Section {{.*}}.debug_pubnames {
+; CHECK-ITANIUM:     0x6 IMAGE_REL_ARM_SECREL .debug_info
+; CHECK-ITANIUM:   }
+; CHECK-ITANIUM: ]
+
+; CHECK-MSVC: Relocations [
+; CHECK-MSVC:   Section {{.*}} .debug$S {
+; CHECK-MSVC:     0xC IMAGE_REL_ARM_SECREL function
+; CHECK-MSVC:     0x10 IMAGE_REL_ARM_SECTION function
+; CHECK-MSVC:   }
+; CHECK-MSVC: ]
+
diff --git a/test/MC/ARM/coff-file.s b/test/MC/ARM/coff-file.s
new file mode 100644
index 0000000..f0dd29a
--- /dev/null
+++ b/test/MC/ARM/coff-file.s
@@ -0,0 +1,47 @@
+// RUN: llvm-mc -triple thumbv7-windows -filetype obj %s -o - | llvm-objdump -t - \
+// RUN:   | FileCheck %s
+
+// RUN: llvm-mc -triple thumbv7-windows -filetype obj %s -o - \
+// RUN:	  | llvm-readobj -symbols | FileCheck %s -check-prefix CHECK-SCN
+
+	.file "null-padded.asm"
+// CHECK: (nx 1) {{0x[0-9]+}} .file
+// CHECK-NEXT: AUX null-padded.asm{{$}}
+
+	.file "eighteen-chars.asm"
+
+// CHECK: (nx 1) {{0x[0-9]+}} .file
+// CHECK-NEXT: AUX eighteen-chars.asm{{$}}
+
+	.file "multiple-auxiliary-entries.asm"
+
+// CHECK: (nx 2) {{0x[0-9]+}} .file
+// CHECK-NEXT: AUX multiple-auxiliary-entries.asm{{$}}
+
+// CHECK-SCN: Symbols [
+// CHECK-SCN:   Symbol {
+// CHECK-SCN:     Name: .file
+// CHECK-SCN:     Section: (65534)
+// CHECK-SCN:     StorageClass: File
+// CHECK-SCN:     AuxFileRecord {
+// CHECK-SCN:       FileName: null-padded.asm
+// CHECK-SCN:     }
+// CHECK-SCN:   }
+// CHECK-SCN:   Symbol {
+// CHECK-SCN:     Name: .file
+// CHECK-SCN:     Section: (65534)
+// CHECK-SCN:     StorageClass: File
+// CHECK-SCN:     AuxFileRecord {
+// CHECK-SCN:       FileName: eighteen-chars.asm
+// CHECK-SCN:     }
+// CHECK-SCN:   }
+// CHECK-SCN:   Symbol {
+// CHECK-SCN:     Name: .file
+// CHECK-SCN:     Section: (65534)
+// CHECK-SCN:     StorageClass: File
+// CHECK-SCN:     AuxFileRecord {
+// CHECK-SCN:       FileName: multiple-auxiliary-entries.asm
+// CHECK-SCN:     }
+// CHECK-SCN:   }
+// CHECK-SCN: ]
+
diff --git a/test/MC/ARM/coff-function-type-info.ll b/test/MC/ARM/coff-function-type-info.ll
new file mode 100644
index 0000000..a9f7c18
--- /dev/null
+++ b/test/MC/ARM/coff-function-type-info.ll
@@ -0,0 +1,45 @@
+; RUN: llc -mtriple thumbv7-windows-itanium -filetype asm -o - %s \
+; RUN:    | FileCheck %s -check-prefix CHECK-ASM
+
+; RUN: llc -mtriple thumbv7-windows-itanium -filetype obj -o - %s \
+; RUN:    | llvm-readobj -t | FileCheck %s -check-prefix CHECK-OBJECT
+
+define arm_aapcs_vfpcc void @external() {
+entry:
+  ret void
+}
+
+; CHECK-ASM: .def external
+; CHECK-ASM:   .scl 2
+; CHECK-ASM:   .type 32
+; CHECK-ASM: .endef
+; CHECK-ASM: .globl external
+
+define internal arm_aapcs_vfpcc void @internal() {
+entry:
+  ret void
+}
+
+; CHECK-ASM: .def internal
+; CHECK-ASM:    .scl 3
+; CHECK-ASM:    .type 32
+; CHECK-ASM: .endef
+; CHECK-ASM-NOT: .globl internal
+
+; CHECK-OBJECT: Symbol {
+; CHECK-OBJECT:   Name: external
+; CHECK-OBJECT:   Section: .text
+; CHECK-OBJECT:   BaseType: Null
+; CHECK-OBJECT:   ComplexType: Function
+; CHECK-OBJECT:   StorageClass: External
+; CHECK-OBJECT:   AuxSymbolCount: 0
+; CHECK-OBJECT: }
+; CHECK-OBJECT: Symbol {
+; CHECK-OBJECT:   Name: internal
+; CHECK-OBJECT:   Section: .text
+; CHECK-OBJECT:   BaseType: Null
+; CHECK-OBJECT:   ComplexType: Function
+; CHECK-OBJECT:   StorageClass: Static
+; CHECK-OBJECT:   AuxSymbolCount: 0
+; CHECK-OBJECT: }
+
diff --git a/test/MC/ARM/coff-relocations.s b/test/MC/ARM/coff-relocations.s
new file mode 100644
index 0000000..6ebae70
--- /dev/null
+++ b/test/MC/ARM/coff-relocations.s
@@ -0,0 +1,101 @@
+@ RUN: llvm-mc -triple thumbv7-windows-itanium -filetype obj -o - %s \
+@ RUN:   | llvm-readobj -r - | FileCheck %s -check-prefix CHECK-RELOCATION
+
+@ RUN: llvm-mc -triple thumbv7-windows-itanium -filetype obj -o - %s \
+@ RUN:   | llvm-objdump -d - | FileCheck %s -check-prefix CHECK-ENCODING
+
+	.syntax unified
+	.text
+	.thumb
+
+	.global target
+
+	.thumb_func
+branch24t:
+	b target
+
+@ CHECK-ENCODING-LABEL: branch24t
+@ CHECK-ENCODING-NEXT: b.w #0
+
+	.thumb_func
+branch20t:
+	bcc target
+
+@ CHECK-ENCODING-LABEL: branch20t
+@ CHECK-ENCODING-NEXT: blo.w #0
+
+	.thumb_func
+blx23t:
+	bl target
+
+@ CHECK-ENCODING-LABEL: blx23t
+@ CHECK-ENCODING-NEXT: bl #0
+
+	.thumb_func
+mov32t:
+	movw r0, :lower16:target
+	movt r0, :upper16:target
+	blx r0
+
+@ CHECK-ENCODING-LABEL: mov32t
+@ CHECK-ENCODING-NEXT: movw r0, #0
+@ CHECK-ENCODING-NEXT: movt r0, #0
+@ CHECK-ENCODING-NEXT: blx r0
+
+	.thumb_func
+addr32:
+	ldr r0, .Laddr32
+	bx r0
+	trap
+.Laddr32:
+	.long target
+
+@ CHECK-ENCODING-LABEL: addr32
+@ CHECK-ENCODING-NEXT: ldr r0, [pc, #4]
+@ CHECK-ENCODING-NEXT: bx r0
+@ CHECK-ENCODING-NEXT: trap
+@ CHECK-ENCODING-NEXT: movs r0, r0
+@ CHECK-ENCODING-NEXT: movs r0, r0
+
+	.thumb_func
+addr32nb:
+	ldr r0, .Laddr32nb
+	bx r0
+	trap
+.Laddr32nb:
+	.long target(imgrel)
+
+@ CHECK-ENCODING-LABEL: addr32nb
+@ CHECK-ENCODING-NEXT: ldr.w r0, [pc, #4]
+@ CHECK-ENCODING-NEXT: bx r0
+@ CHECK-ENCODING-NEXT: trap
+@ CHECK-ENCODING-NEXT: movs r0, r0
+@ CHECK-ENCODING-NEXT: movs r0, r0
+
+       .thumb_func
+secrel:
+	ldr r0, .Lsecrel
+	bx r0
+	trap
+.Lsecrel:
+	.long target(secrel32)
+
+@ CHECK-ENCODING-LABEL: secrel
+@ CHECK-ENCODING-NEXT: ldr.w r0, [pc, #4]
+@ CHECK-ENCODING-NEXT: bx r0
+@ CHECK-ENCODING-NEXT: trap
+@ CHECK-ENCODING-NEXT: movs r0, r0
+@ CHECK-ENCODING-NEXT: movs r0, r0
+
+@ CHECK-RELOCATION: Relocations [
+@ CHECK-RELOCATION:   Section (1) .text {
+@ CHCEK-RELOCATION:     0x0 IMAGE_REL_ARM_BRANCH24T
+@ CHECK-RELOCATION:     0x4 IMAGE_REL_ARM_BRANCH20T
+@ CHECK-RELOCATION:     0x8 IMAGE_REL_ARM_BLX23T
+@ CHECK-RELOCATION:     0xC IMAGE_REL_ARM_MOV32T
+@ CHECK-RELOCATION:     0x1C IMAGE_REL_ARM_ADDR32
+@ CHECK-RELOCATION:     0x28 IMAGE_REL_ARM_ADDR32NB
+@ CHECK-RELOCATION:     0x34 IMAGE_REL_ARM_SECREL
+@ CHECK-RELOCATION:   }
+@ CHECK-RELOCATION: ]
+
diff --git a/test/MC/ARM/complex-operands.s b/test/MC/ARM/complex-operands.s
index 2a721c4..72f8f88 100644
--- a/test/MC/ARM/complex-operands.s
+++ b/test/MC/ARM/complex-operands.s
@@ -21,20 +21,20 @@ return:
 	.global arm_function
 	.type arm_function,%function
 arm_function:
-	mov r0, #(.L_table_end - .L_table_begin) >> 2
+	mov r0, #:lower16:((.L_table_end - .L_table_begin) >> 2)
 	blx return
 
 @ CHECK-LABEL: arm_function
-@ CHECK:  	movw r0, #(.L_table_end-.L_table_begin)>>2
+@ CHECK:  	movw r0, :lower16:((.L_table_end-.L_table_begin)>>2)
 @ CHECK:  	blx return
 
 	.global thumb_function
 	.type thumb_function,%function
 thumb_function:
-	mov r0, #(.L_table_end - .L_table_begin) >> 2
+	mov r0, #:lower16:((.L_table_end - .L_table_begin) >> 2)
 	blx return
 
 @ CHECK-LABEL: thumb_function
-@ CHECK:  	movw r0, #(.L_table_end-.L_table_begin)>>2
+@ CHECK:  	movw r0, :lower16:((.L_table_end-.L_table_begin)>>2)
 @ CHECK:  	blx return
 
diff --git a/test/MC/ARM/diagnostics.s b/test/MC/ARM/diagnostics.s
index 3c26f6d..62d7dae 100644
--- a/test/MC/ARM/diagnostics.s
+++ b/test/MC/ARM/diagnostics.s
@@ -465,3 +465,11 @@
         ldm sp!, {r0}^
 @ CHECK-ERRORS: error: system STM cannot have writeback register
 @ CHECK-ERRORS: error: writeback register only allowed on system LDM if PC in register-list
+
+foo2:
+        mov r0, foo2
+        movw r0, foo2
+@ CHECK-ERRORS: error: immediate expression for mov requires :lower16: or :upper16
+@ CHECK-ERRORS:                 ^
+@ CHECK-ERRORS: error: immediate expression for mov requires :lower16: or :upper16
+@ CHECK-ERRORS:                  ^
diff --git a/test/MC/ARM/dwarf-cfi-initial-state.s b/test/MC/ARM/dwarf-cfi-initial-state.s
index 2d638e9..0d1c08a 100644
--- a/test/MC/ARM/dwarf-cfi-initial-state.s
+++ b/test/MC/ARM/dwarf-cfi-initial-state.s
@@ -1,6 +1,7 @@
 # RUN: llvm-mc < %s -triple=armv7-linux-gnueabi -filetype=obj -o - \
 # RUN:     | llvm-dwarfdump - | FileCheck %s
 
+_proc:
 .cfi_sections .debug_frame
 .cfi_startproc
 bx lr
diff --git a/test/MC/ARM/eh-directive-save-diagnoatics.s b/test/MC/ARM/eh-directive-save-diagnostics.s
index 0e6d740..0e6d740 100644
--- a/test/MC/ARM/eh-directive-save-diagnoatics.s
+++ b/test/MC/ARM/eh-directive-save-diagnostics.s
diff --git a/test/MC/ARM/elf-thumbfunc-reloc.s b/test/MC/ARM/elf-thumbfunc-reloc.s
index 6147020..ea7d507 100644
--- a/test/MC/ARM/elf-thumbfunc-reloc.s
+++ b/test/MC/ARM/elf-thumbfunc-reloc.s
@@ -5,7 +5,6 @@
 
 	.syntax unified
         .text
-        .globl  f
         .align  2
         .type   f,%function
         .code   16
@@ -16,9 +15,21 @@ f:
         bl      g
         pop     {r7, pc}
 
+	.section	.data.rel.local,"aw",%progbits
+ptr:
+	.long	f
+
+
 @@ make sure an R_ARM_THM_CALL relocation is generated for the call to g
 @CHECK:      Relocations [
 @CHECK-NEXT:   Section (2) .rel.text {
 @CHECK-NEXT:     0x4 R_ARM_THM_CALL g 0x0
 @CHECK-NEXT:   }
+
+
+@@ make sure the relocation is with f. That is one way to make sure it includes
+@@ the thumb bit.
+@CHECK-NEXT:   Section (6) .rel.data.rel.local {
+@CHECK-NEXT:     0x0 R_ARM_ABS32 f 0x0
+@CHECK-NEXT:   }
 @CHECK-NEXT: ]
diff --git a/test/MC/ARM/elf-thumbfunc.s b/test/MC/ARM/elf-thumbfunc.s
index 0ea1182..af061b5 100644
--- a/test/MC/ARM/elf-thumbfunc.s
+++ b/test/MC/ARM/elf-thumbfunc.s
@@ -11,7 +11,17 @@
 foo:
 	bx	lr
 
-@@ make sure foo is thumb function: bit 0 = 1 (st_value)
+	.global bar
+bar = foo
+
+@@ make sure foo and bar are thumb function: bit 0 = 1 (st_value)
+@CHECK:        Symbol {
+@CHECK:          Name: bar
+@CHECK-NEXT:     Value: 0x1
+@CHECK-NEXT:     Size: 0
+@CHECK-NEXT:     Binding: Global
+@CHECK-NEXT:     Type: Function
+
 @CHECK:        Symbol {
 @CHECK:          Name: foo
 @CHECK-NEXT:     Value: 0x1
diff --git a/test/MC/ARM/ldrd-strd-gnu-arm-bad-imm.s b/test/MC/ARM/ldrd-strd-gnu-arm-bad-imm.s
new file mode 100644
index 0000000..fbe459c
--- /dev/null
+++ b/test/MC/ARM/ldrd-strd-gnu-arm-bad-imm.s
@@ -0,0 +1,9 @@
+@ RUN: not llvm-mc -triple=armv7-linux-gnueabi %s 2>&1 | FileCheck %s
+.text
+@ CHECK: error: instruction requires: thumb2
+@ CHECK:         ldrd    r0, [r0, #512]
+        ldrd    r0, [r0, #512]
+
+@ CHECK: error: instruction requires: thumb2
+@ CHECK:         strd    r0, [r0, #512]
+        strd    r0, [r0, #512]
diff --git a/test/MC/ARM/ldrd-strd-gnu-arm.s b/test/MC/ARM/ldrd-strd-gnu-arm.s
new file mode 100644
index 0000000..57d21c7
--- /dev/null
+++ b/test/MC/ARM/ldrd-strd-gnu-arm.s
@@ -0,0 +1,20 @@
+@ PR18921
+@ RUN: llvm-mc -triple=armv7-linux-gnueabi -show-encoding < %s | FileCheck %s
+.text
+
+@ CHECK-NOT: .code	16
+
+
+@ CHECK: ldrd	r0, r1, [r10, #32]!     @ encoding: [0xd0,0x02,0xea,0xe1]
+@ CHECK: ldrd	r0, r1, [r10], #32      @ encoding: [0xd0,0x02,0xca,0xe0]
+@ CHECK: ldrd	r0, r1, [r10, #32]      @ encoding: [0xd0,0x02,0xca,0xe1]
+        ldrd    r0, [r10, #32]!
+        ldrd    r0, [r10], #32
+        ldrd    r0, [r10, #32]
+
+@ CHECK: strd	r0, r1, [r10, #32]!     @ encoding: [0xf0,0x02,0xea,0xe1]
+@ CHECK: strd	r0, r1, [r10], #32      @ encoding: [0xf0,0x02,0xca,0xe0]
+@ CHECK: strd	r0, r1, [r10, #32]      @ encoding: [0xf0,0x02,0xca,0xe1]
+        strd    r0, [r10, #32]!
+        strd    r0, [r10], #32
+        strd    r0, [r10, #32]
diff --git a/test/MC/ARM/ldrd-strd-gnu-thumb-bad-regs.s b/test/MC/ARM/ldrd-strd-gnu-thumb-bad-regs.s
new file mode 100644
index 0000000..9d81a27
--- /dev/null
+++ b/test/MC/ARM/ldrd-strd-gnu-thumb-bad-regs.s
@@ -0,0 +1,10 @@
+@ RUN: not llvm-mc -triple=armv7-linux-gnueabi %s 2>&1 | FileCheck %s
+.text
+.thumb
+@ CHECK: error: invalid operand for instruction
+@ CHECK:         ldrd    r12, [r0, #512]
+        ldrd    r12, [r0, #512]
+
+@ CHECK: error: invalid operand for instruction
+@ CHECK:         strd    r12, [r0, #512]
+        strd    r12, [r0, #512]
diff --git a/test/MC/ARM/ldrd-strd-gnu-thumb.s b/test/MC/ARM/ldrd-strd-gnu-thumb.s
new file mode 100644
index 0000000..67d2aa7
--- /dev/null
+++ b/test/MC/ARM/ldrd-strd-gnu-thumb.s
@@ -0,0 +1,20 @@
+@ PR18921
+@ RUN: llvm-mc -triple=armv7-linux-gnueabi -show-encoding < %s | FileCheck %s
+.text
+.thumb
+
+@ CHECK: .code	16
+
+@ CHECK: ldrd	r0, r1, [r10, #512]!    @ encoding: [0xfa,0xe9,0x80,0x01]
+@ CHECK: ldrd	r0, r1, [r10], #512     @ encoding: [0xfa,0xe8,0x80,0x01]
+@ CHECK: ldrd	r0, r1, [r10, #512]     @ encoding: [0xda,0xe9,0x80,0x01]
+        ldrd    r0, [r10, #512]!
+        ldrd    r0, [r10], #512
+        ldrd    r0, [r10, #512]
+
+@ CHECK: strd	r0, r1, [r10, #512]!    @ encoding: [0xea,0xe9,0x80,0x01]
+@ CHECK: strd	r0, r1, [r10], #512     @ encoding: [0xea,0xe8,0x80,0x01]
+@ CHECK: strd	r0, r1, [r10, #512]     @ encoding: [0xca,0xe9,0x80,0x01]
+        strd    r0, [r10, #512]!
+        strd    r0, [r10], #512
+        strd    r0, [r10, #512]
diff --git a/test/MC/ARM/neon-vld-encoding.s b/test/MC/ARM/neon-vld-encoding.s
index 3fcbe3e..b96784e 100644
--- a/test/MC/ARM/neon-vld-encoding.s
+++ b/test/MC/ARM/neon-vld-encoding.s
@@ -367,7 +367,7 @@
 @ CHECK: vld3.16 {d16[], d17[], d18[]}, [r2]! @ encoding: [0x4d,0x0e,0xe2,0xf4]
 @ CHECK: vld3.32 {d16[], d17[], d18[]}, [r3]! @ encoding: [0x8d,0x0e,0xe3,0xf4]
 @ CHECK: vld3.8 {d17[], d18[], d19[]}, [r7]! @ encoding: [0x2d,0x1e,0xe7,0xf4]
-@ CHECK: vld3.16 {d17[], d18[], d19[]}, [r7]! @ encoding: [0x6d,0x1e,0xe7,0xf4]
+@ CHECK: vld3.16 {d17[], d19[], d21[]}, [r7]! @ encoding: [0x6d,0x1e,0xe7,0xf4]
 @ CHECK: vld3.32 {d16[], d18[], d20[]}, [r8]! @ encoding: [0xad,0x0e,0xe8,0xf4]
 @ CHECK: vld3.8 {d16[], d17[], d18[]}, [r1], r8 @ encoding: [0x08,0x0e,0xe1,0xf4]
 @ CHECK: vld3.16 {d16[], d17[], d18[]}, [r2], r7 @ encoding: [0x47,0x0e,0xe2,0xf4]
diff --git a/test/MC/ARM/neon-vld-vst-align.s b/test/MC/ARM/neon-vld-vst-align.s
new file mode 100644
index 0000000..c3628ce
--- /dev/null
+++ b/test/MC/ARM/neon-vld-vst-align.s
@@ -0,0 +1,8354 @@
+@ RUN: not llvm-mc -triple=thumbv7-apple-darwin -show-encoding < %s > %t 2> %t.err
+@ RUN: FileCheck < %t %s
+@ RUN: FileCheck --check-prefix=CHECK-ERRORS < %t.err %s
+
+	vld1.8	{d0}, [r4]
+	vld1.8	{d0}, [r4:16]
+	vld1.8	{d0}, [r4:32]
+	vld1.8	{d0}, [r4:64]
+	vld1.8	{d0}, [r4:128]
+	vld1.8	{d0}, [r4:256]
+
+@ CHECK: vld1.8	{d0}, [r4]              @ encoding: [0x24,0xf9,0x0f,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.8  {d0}, [r4:16]
+@ CHECK-ERRORS:                           ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.8  {d0}, [r4:32]
+@ CHECK-ERRORS:                           ^
+@ CHECK: vld1.8	{d0}, [r4:64]           @ encoding: [0x24,0xf9,0x1f,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.8  {d0}, [r4:128]
+@ CHECK-ERRORS:                           ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.8  {d0}, [r4:256]
+@ CHECK-ERRORS:                           ^
+
+	vld1.8	{d0}, [r4]!
+	vld1.8	{d0}, [r4:16]!
+	vld1.8	{d0}, [r4:32]!
+	vld1.8	{d0}, [r4:64]!
+	vld1.8	{d0}, [r4:128]!
+	vld1.8	{d0}, [r4:256]!
+
+@ CHECK: vld1.8	{d0}, [r4]!             @ encoding: [0x24,0xf9,0x0d,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.8  {d0}, [r4:16]!
+@ CHECK-ERRORS:                           ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.8  {d0}, [r4:32]!
+@ CHECK-ERRORS:                           ^
+@ CHECK: vld1.8	{d0}, [r4:64]!          @ encoding: [0x24,0xf9,0x1d,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.8  {d0}, [r4:128]!
+@ CHECK-ERRORS:                           ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.8  {d0}, [r4:256]!
+@ CHECK-ERRORS:                           ^
+
+	vld1.8	{d0}, [r4], r6
+	vld1.8	{d0}, [r4:16], r6
+	vld1.8	{d0}, [r4:32], r6
+	vld1.8	{d0}, [r4:64], r6
+	vld1.8	{d0}, [r4:128], r6
+	vld1.8	{d0}, [r4:256], r6
+
+@ CHECK: vld1.8	{d0}, [r4], r6          @ encoding: [0x24,0xf9,0x06,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.8  {d0}, [r4:16], r6
+@ CHECK-ERRORS:                           ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.8  {d0}, [r4:32], r6
+@ CHECK-ERRORS:                           ^
+@ CHECK: vld1.8	{d0}, [r4:64], r6       @ encoding: [0x24,0xf9,0x16,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.8  {d0}, [r4:128], r6
+@ CHECK-ERRORS:                           ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.8  {d0}, [r4:256], r6
+@ CHECK-ERRORS:                           ^
+
+	vld1.8	{d0, d1}, [r4]
+	vld1.8	{d0, d1}, [r4:16]
+	vld1.8	{d0, d1}, [r4:32]
+	vld1.8	{d0, d1}, [r4:64]
+	vld1.8	{d0, d1}, [r4:128]
+	vld1.8	{d0, d1}, [r4:256]
+
+@ CHECK: vld1.8	{d0, d1}, [r4]          @ encoding: [0x24,0xf9,0x0f,0x0a]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld1.8  {d0, d1}, [r4:16]
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld1.8  {d0, d1}, [r4:32]
+@ CHECK-ERRORS:                               ^
+@ CHECK: vld1.8	{d0, d1}, [r4:64]       @ encoding: [0x24,0xf9,0x1f,0x0a]
+@ CHECK: vld1.8	{d0, d1}, [r4:128]      @ encoding: [0x24,0xf9,0x2f,0x0a]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld1.8  {d0, d1}, [r4:256]
+@ CHECK-ERRORS:                               ^
+
+	vld1.8	{d0, d1}, [r4]!
+	vld1.8	{d0, d1}, [r4:16]!
+	vld1.8	{d0, d1}, [r4:32]!
+	vld1.8	{d0, d1}, [r4:64]!
+	vld1.8	{d0, d1}, [r4:128]!
+	vld1.8	{d0, d1}, [r4:256]!
+
+@ CHECK: vld1.8	{d0, d1}, [r4]!         @ encoding: [0x24,0xf9,0x0d,0x0a]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld1.8  {d0, d1}, [r4:16]!
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld1.8  {d0, d1}, [r4:32]!
+@ CHECK-ERRORS:                               ^
+@ CHECK: vld1.8	{d0, d1}, [r4:64]!      @ encoding: [0x24,0xf9,0x1d,0x0a]
+@ CHECK: vld1.8	{d0, d1}, [r4:128]!     @ encoding: [0x24,0xf9,0x2d,0x0a]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld1.8  {d0, d1}, [r4:256]!
+@ CHECK-ERRORS:                               ^
+
+	vld1.8	{d0, d1}, [r4], r6
+	vld1.8	{d0, d1}, [r4:16], r6
+	vld1.8	{d0, d1}, [r4:32], r6
+	vld1.8	{d0, d1}, [r4:64], r6
+	vld1.8	{d0, d1}, [r4:128], r6
+	vld1.8	{d0, d1}, [r4:256], r6
+
+@ CHECK: vld1.8	{d0, d1}, [r4], r6      @ encoding: [0x24,0xf9,0x06,0x0a]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld1.8  {d0, d1}, [r4:16], r6
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld1.8  {d0, d1}, [r4:32], r6
+@ CHECK-ERRORS:                               ^
+@ CHECK: vld1.8	{d0, d1}, [r4:64], r6   @ encoding: [0x24,0xf9,0x16,0x0a]
+@ CHECK: vld1.8	{d0, d1}, [r4:128], r6  @ encoding: [0x24,0xf9,0x26,0x0a]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld1.8  {d0, d1}, [r4:256], r6
+@ CHECK-ERRORS:                               ^
+
+	vld1.8	{d0, d1, d2}, [r4]
+	vld1.8	{d0, d1, d2}, [r4:16]
+	vld1.8	{d0, d1, d2}, [r4:32]
+	vld1.8	{d0, d1, d2}, [r4:64]
+	vld1.8	{d0, d1, d2}, [r4:128]
+	vld1.8	{d0, d1, d2}, [r4:256]
+
+@ CHECK: vld1.8	{d0, d1, d2}, [r4]      @ encoding: [0x24,0xf9,0x0f,0x06]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.8  {d0, d1, d2}, [r4:16]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.8  {d0, d1, d2}, [r4:32]
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vld1.8	{d0, d1, d2}, [r4:64]   @ encoding: [0x24,0xf9,0x1f,0x06]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.8  {d0, d1, d2}, [r4:128]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.8  {d0, d1, d2}, [r4:256]
+@ CHECK-ERRORS:                                   ^
+
+	vld1.8	{d0, d1, d2}, [r4]!
+	vld1.8	{d0, d1, d2}, [r4:16]!
+	vld1.8	{d0, d1, d2}, [r4:32]!
+	vld1.8	{d0, d1, d2}, [r4:64]!
+	vld1.8	{d0, d1, d2}, [r4:128]!
+	vld1.8	{d0, d1, d2}, [r4:256]!
+
+@ CHECK: vld1.8	{d0, d1, d2}, [r4]!     @ encoding: [0x24,0xf9,0x0d,0x06]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.8  {d0, d1, d2}, [r4:16]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.8  {d0, d1, d2}, [r4:32]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vld1.8	{d0, d1, d2}, [r4:64]!  @ encoding: [0x24,0xf9,0x1d,0x06]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.8  {d0, d1, d2}, [r4:128]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.8  {d0, d1, d2}, [r4:256]!
+@ CHECK-ERRORS:                                   ^
+
+	vld1.8	{d0, d1, d2}, [r4], r6
+	vld1.8	{d0, d1, d2}, [r4:16], r6
+	vld1.8	{d0, d1, d2}, [r4:32], r6
+	vld1.8	{d0, d1, d2}, [r4:64], r6
+	vld1.8	{d0, d1, d2}, [r4:128], r6
+	vld1.8	{d0, d1, d2}, [r4:256], r6
+
+@ CHECK: vld1.8	{d0, d1, d2}, [r4], r6  @ encoding: [0x24,0xf9,0x06,0x06]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.8  {d0, d1, d2}, [r4:16], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.8  {d0, d1, d2}, [r4:32], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vld1.8	{d0, d1, d2}, [r4:64], r6 @ encoding: [0x24,0xf9,0x16,0x06]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.8  {d0, d1, d2}, [r4:128], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.8  {d0, d1, d2}, [r4:256], r6
+@ CHECK-ERRORS:                                   ^
+
+	vld1.8	{d0, d1, d2, d3}, [r4]
+	vld1.8	{d0, d1, d2, d3}, [r4:16]
+	vld1.8	{d0, d1, d2, d3}, [r4:32]
+	vld1.8	{d0, d1, d2, d3}, [r4:64]
+	vld1.8	{d0, d1, d2, d3}, [r4:128]
+	vld1.8	{d0, d1, d2, d3}, [r4:256]
+
+@ CHECK: vld1.8	{d0, d1, d2, d3}, [r4]  @ encoding: [0x24,0xf9,0x0f,0x02]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld1.8  {d0, d1, d2, d3}, [r4:16]
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld1.8  {d0, d1, d2, d3}, [r4:32]
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vld1.8	{d0, d1, d2, d3}, [r4:64] @ encoding: [0x24,0xf9,0x1f,0x02]
+@ CHECK: vld1.8	{d0, d1, d2, d3}, [r4:128] @ encoding: [0x24,0xf9,0x2f,0x02]
+@ CHECK: vld1.8	{d0, d1, d2, d3}, [r4:256] @ encoding: [0x24,0xf9,0x3f,0x02]
+
+	vld1.8	{d0, d1, d2, d3}, [r4]!
+	vld1.8	{d0, d1, d2, d3}, [r4:16]!
+	vld1.8	{d0, d1, d2, d3}, [r4:32]!
+	vld1.8	{d0, d1, d2, d3}, [r4:64]!
+	vld1.8	{d0, d1, d2, d3}, [r4:128]!
+	vld1.8	{d0, d1, d2, d3}, [r4:256]!
+
+@ CHECK: vld1.8	{d0, d1, d2, d3}, [r4]! @ encoding: [0x24,0xf9,0x0d,0x02]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld1.8  {d0, d1, d2, d3}, [r4:16]!
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld1.8  {d0, d1, d2, d3}, [r4:32]!
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vld1.8	{d0, d1, d2, d3}, [r4:64]! @ encoding: [0x24,0xf9,0x1d,0x02]
+@ CHECK: vld1.8	{d0, d1, d2, d3}, [r4:128]! @ encoding: [0x24,0xf9,0x2d,0x02]
+@ CHECK: vld1.8	{d0, d1, d2, d3}, [r4:256]! @ encoding: [0x24,0xf9,0x3d,0x02]
+
+	vld1.8	{d0, d1, d2, d3}, [r4], r6
+	vld1.8	{d0, d1, d2, d3}, [r4:16], r6
+	vld1.8	{d0, d1, d2, d3}, [r4:32], r6
+	vld1.8	{d0, d1, d2, d3}, [r4:64], r6
+	vld1.8	{d0, d1, d2, d3}, [r4:128], r6
+	vld1.8	{d0, d1, d2, d3}, [r4:256], r6
+
+@ CHECK: vld1.8	{d0, d1, d2, d3}, [r4], r6 @ encoding: [0x24,0xf9,0x06,0x02]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld1.8  {d0, d1, d2, d3}, [r4:16], r6
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld1.8  {d0, d1, d2, d3}, [r4:32], r6
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vld1.8	{d0, d1, d2, d3}, [r4:64], r6 @ encoding: [0x24,0xf9,0x16,0x02]
+@ CHECK: vld1.8	{d0, d1, d2, d3}, [r4:128], r6 @ encoding: [0x24,0xf9,0x26,0x02]
+@ CHECK: vld1.8	{d0, d1, d2, d3}, [r4:256], r6 @ encoding: [0x24,0xf9,0x36,0x02]
+
+	vld1.8	{d0[2]}, [r4]
+	vld1.8	{d0[2]}, [r4:16]
+	vld1.8	{d0[2]}, [r4:32]
+	vld1.8	{d0[2]}, [r4:64]
+	vld1.8	{d0[2]}, [r4:128]
+	vld1.8	{d0[2]}, [r4:256]
+
+@ CHECK: vld1.8	{d0[2]}, [r4]           @ encoding: [0xa4,0xf9,0x4f,0x00]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld1.8  {d0[2]}, [r4:16]
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld1.8  {d0[2]}, [r4:32]
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld1.8  {d0[2]}, [r4:64]
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld1.8  {d0[2]}, [r4:128]
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld1.8  {d0[2]}, [r4:256]
+@ CHECK-ERRORS:                              ^
+
+	vld1.8	{d0[2]}, [r4]!
+	vld1.8	{d0[2]}, [r4:16]!
+	vld1.8	{d0[2]}, [r4:32]!
+	vld1.8	{d0[2]}, [r4:64]!
+	vld1.8	{d0[2]}, [r4:128]!
+	vld1.8	{d0[2]}, [r4:256]!
+
+@ CHECK: vld1.8	{d0[2]}, [r4]!          @ encoding: [0xa4,0xf9,0x4d,0x00]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld1.8  {d0[2]}, [r4:16]!
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld1.8  {d0[2]}, [r4:32]!
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld1.8  {d0[2]}, [r4:64]!
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld1.8  {d0[2]}, [r4:128]!
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld1.8  {d0[2]}, [r4:256]!
+@ CHECK-ERRORS:                              ^
+
+	vld1.8	{d0[2]}, [r4], r6
+	vld1.8	{d0[2]}, [r4:16], r6
+	vld1.8	{d0[2]}, [r4:32], r6
+	vld1.8	{d0[2]}, [r4:64], r6
+	vld1.8	{d0[2]}, [r4:128], r6
+	vld1.8	{d0[2]}, [r4:256], r6
+
+@ CHECK: vld1.8	{d0[2]}, [r4], r6       @ encoding: [0xa4,0xf9,0x46,0x00]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld1.8  {d0[2]}, [r4:16], r6
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld1.8  {d0[2]}, [r4:32], r6
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld1.8  {d0[2]}, [r4:64], r6
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld1.8  {d0[2]}, [r4:128], r6
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld1.8  {d0[2]}, [r4:256], r6
+@ CHECK-ERRORS:                              ^
+
+	vld1.8	{d0[]}, [r4]
+	vld1.8	{d0[]}, [r4:16]
+	vld1.8	{d0[]}, [r4:32]
+	vld1.8	{d0[]}, [r4:64]
+	vld1.8	{d0[]}, [r4:128]
+	vld1.8	{d0[]}, [r4:256]
+
+@ CHECK: vld1.8	{d0[]}, [r4]            @ encoding: [0xa4,0xf9,0x0f,0x0c]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld1.8  {d0[]}, [r4:16]
+@ CHECK-ERRORS:                             ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld1.8  {d0[]}, [r4:32]
+@ CHECK-ERRORS:                             ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld1.8  {d0[]}, [r4:64]
+@ CHECK-ERRORS:                             ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld1.8  {d0[]}, [r4:128]
+@ CHECK-ERRORS:                             ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld1.8  {d0[]}, [r4:256]
+@ CHECK-ERRORS:                             ^
+
+	vld1.8	{d0[]}, [r4]!
+	vld1.8	{d0[]}, [r4:16]!
+	vld1.8	{d0[]}, [r4:32]!
+	vld1.8	{d0[]}, [r4:64]!
+	vld1.8	{d0[]}, [r4:128]!
+	vld1.8	{d0[]}, [r4:256]!
+
+@ CHECK: vld1.8	{d0[]}, [r4]!           @ encoding: [0xa4,0xf9,0x0d,0x0c]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld1.8  {d0[]}, [r4:16]!
+@ CHECK-ERRORS:                             ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld1.8  {d0[]}, [r4:32]!
+@ CHECK-ERRORS:                             ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld1.8  {d0[]}, [r4:64]!
+@ CHECK-ERRORS:                             ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld1.8  {d0[]}, [r4:128]!
+@ CHECK-ERRORS:                             ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld1.8  {d0[]}, [r4:256]!
+@ CHECK-ERRORS:                             ^
+
+	vld1.8	{d0[]}, [r4], r6
+	vld1.8	{d0[]}, [r4:16], r6
+	vld1.8	{d0[]}, [r4:32], r6
+	vld1.8	{d0[]}, [r4:64], r6
+	vld1.8	{d0[]}, [r4:128], r6
+	vld1.8	{d0[]}, [r4:256], r6
+
+@ CHECK: vld1.8	{d0[]}, [r4], r6        @ encoding: [0xa4,0xf9,0x06,0x0c]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld1.8  {d0[]}, [r4:16], r6
+@ CHECK-ERRORS:                             ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld1.8  {d0[]}, [r4:32], r6
+@ CHECK-ERRORS:                             ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld1.8  {d0[]}, [r4:64], r6
+@ CHECK-ERRORS:                             ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld1.8  {d0[]}, [r4:128], r6
+@ CHECK-ERRORS:                             ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld1.8  {d0[]}, [r4:256], r6
+@ CHECK-ERRORS:                             ^
+
+	vld1.8	{d0[], d1[]}, [r4]
+	vld1.8	{d0[], d1[]}, [r4:16]
+	vld1.8	{d0[], d1[]}, [r4:32]
+	vld1.8	{d0[], d1[]}, [r4:64]
+	vld1.8	{d0[], d1[]}, [r4:128]
+	vld1.8	{d0[], d1[]}, [r4:256]
+
+@ CHECK: vld1.8	{d0[], d1[]}, [r4]      @ encoding: [0xa4,0xf9,0x2f,0x0c]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld1.8  {d0[], d1[]}, [r4:16]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld1.8  {d0[], d1[]}, [r4:32]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld1.8  {d0[], d1[]}, [r4:64]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld1.8  {d0[], d1[]}, [r4:128]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld1.8  {d0[], d1[]}, [r4:256]
+@ CHECK-ERRORS:                                   ^
+
+	vld1.8	{d0[], d1[]}, [r4]!
+	vld1.8	{d0[], d1[]}, [r4:16]!
+	vld1.8	{d0[], d1[]}, [r4:32]!
+	vld1.8	{d0[], d1[]}, [r4:64]!
+	vld1.8	{d0[], d1[]}, [r4:128]!
+	vld1.8	{d0[], d1[]}, [r4:256]!
+
+@ CHECK: vld1.8	{d0[], d1[]}, [r4]!     @ encoding: [0xa4,0xf9,0x2d,0x0c]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld1.8  {d0[], d1[]}, [r4:16]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld1.8  {d0[], d1[]}, [r4:32]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld1.8  {d0[], d1[]}, [r4:64]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld1.8  {d0[], d1[]}, [r4:128]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld1.8  {d0[], d1[]}, [r4:256]!
+@ CHECK-ERRORS:                                   ^
+
+	vld1.8	{d0[], d1[]}, [r4], r6
+	vld1.8	{d0[], d1[]}, [r4:16], r6
+	vld1.8	{d0[], d1[]}, [r4:32], r6
+	vld1.8	{d0[], d1[]}, [r4:64], r6
+	vld1.8	{d0[], d1[]}, [r4:128], r6
+	vld1.8	{d0[], d1[]}, [r4:256], r6
+
+@ CHECK: vld1.8	{d0[], d1[]}, [r4], r6  @ encoding: [0xa4,0xf9,0x26,0x0c]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld1.8  {d0[], d1[]}, [r4:16], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld1.8  {d0[], d1[]}, [r4:32], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld1.8  {d0[], d1[]}, [r4:64], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld1.8  {d0[], d1[]}, [r4:128], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld1.8  {d0[], d1[]}, [r4:256], r6
+@ CHECK-ERRORS:                                   ^
+
+	vld1.16	{d0}, [r4]
+	vld1.16	{d0}, [r4:16]
+	vld1.16	{d0}, [r4:32]
+	vld1.16	{d0}, [r4:64]
+	vld1.16	{d0}, [r4:128]
+	vld1.16	{d0}, [r4:256]
+
+@ CHECK: vld1.16 {d0}, [r4]              @ encoding: [0x24,0xf9,0x4f,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0}, [r4:16]
+@ CHECK-ERRORS:                           ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0}, [r4:32]
+@ CHECK-ERRORS:                           ^
+@ CHECK: vld1.16 {d0}, [r4:64]           @ encoding: [0x24,0xf9,0x5f,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0}, [r4:128]
+@ CHECK-ERRORS:                           ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0}, [r4:256]
+@ CHECK-ERRORS:                           ^
+
+	vld1.16	{d0}, [r4]!
+	vld1.16	{d0}, [r4:16]!
+	vld1.16	{d0}, [r4:32]!
+	vld1.16	{d0}, [r4:64]!
+	vld1.16	{d0}, [r4:128]!
+	vld1.16	{d0}, [r4:256]!
+
+@ CHECK: vld1.16 {d0}, [r4]!             @ encoding: [0x24,0xf9,0x4d,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0}, [r4:16]!
+@ CHECK-ERRORS:                           ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0}, [r4:32]!
+@ CHECK-ERRORS:                           ^
+@ CHECK: vld1.16 {d0}, [r4:64]!          @ encoding: [0x24,0xf9,0x5d,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0}, [r4:128]!
+@ CHECK-ERRORS:                           ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0}, [r4:256]!
+@ CHECK-ERRORS:                           ^
+
+	vld1.16	{d0}, [r4], r6
+	vld1.16	{d0}, [r4:16], r6
+	vld1.16	{d0}, [r4:32], r6
+	vld1.16	{d0}, [r4:64], r6
+	vld1.16	{d0}, [r4:128], r6
+	vld1.16	{d0}, [r4:256], r6
+
+@ CHECK: vld1.16 {d0}, [r4], r6          @ encoding: [0x24,0xf9,0x46,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0}, [r4:16], r6
+@ CHECK-ERRORS:                           ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0}, [r4:32], r6
+@ CHECK-ERRORS:                           ^
+@ CHECK: vld1.16 {d0}, [r4:64], r6       @ encoding: [0x24,0xf9,0x56,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0}, [r4:128], r6
+@ CHECK-ERRORS:                           ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0}, [r4:256], r6
+@ CHECK-ERRORS:                           ^
+
+	vld1.16	{d0, d1}, [r4]
+	vld1.16	{d0, d1}, [r4:16]
+	vld1.16	{d0, d1}, [r4:32]
+	vld1.16	{d0, d1}, [r4:64]
+	vld1.16	{d0, d1}, [r4:128]
+	vld1.16	{d0, d1}, [r4:256]
+
+@ CHECK: vld1.16 {d0, d1}, [r4]          @ encoding: [0x24,0xf9,0x4f,0x0a]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0, d1}, [r4:16]
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0, d1}, [r4:32]
+@ CHECK-ERRORS:                               ^
+@ CHECK: vld1.16 {d0, d1}, [r4:64]       @ encoding: [0x24,0xf9,0x5f,0x0a]
+@ CHECK: vld1.16 {d0, d1}, [r4:128]      @ encoding: [0x24,0xf9,0x6f,0x0a]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0, d1}, [r4:256]
+@ CHECK-ERRORS:                               ^
+
+	vld1.16	{d0, d1}, [r4]!
+	vld1.16	{d0, d1}, [r4:16]!
+	vld1.16	{d0, d1}, [r4:32]!
+	vld1.16	{d0, d1}, [r4:64]!
+	vld1.16	{d0, d1}, [r4:128]!
+	vld1.16	{d0, d1}, [r4:256]!
+
+@ CHECK: vld1.16 {d0, d1}, [r4]!         @ encoding: [0x24,0xf9,0x4d,0x0a]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0, d1}, [r4:16]!
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0, d1}, [r4:32]!
+@ CHECK-ERRORS:                               ^
+@ CHECK: vld1.16 {d0, d1}, [r4:64]!      @ encoding: [0x24,0xf9,0x5d,0x0a]
+@ CHECK: vld1.16 {d0, d1}, [r4:128]!     @ encoding: [0x24,0xf9,0x6d,0x0a]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0, d1}, [r4:256]!
+@ CHECK-ERRORS:                               ^
+
+	vld1.16	{d0, d1}, [r4], r6
+	vld1.16	{d0, d1}, [r4:16], r6
+	vld1.16	{d0, d1}, [r4:32], r6
+	vld1.16	{d0, d1}, [r4:64], r6
+	vld1.16	{d0, d1}, [r4:128], r6
+	vld1.16	{d0, d1}, [r4:256], r6
+
+@ CHECK: vld1.16 {d0, d1}, [r4], r6      @ encoding: [0x24,0xf9,0x46,0x0a]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0, d1}, [r4:16], r6
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0, d1}, [r4:32], r6
+@ CHECK-ERRORS:                               ^
+@ CHECK: vld1.16 {d0, d1}, [r4:64], r6   @ encoding: [0x24,0xf9,0x56,0x0a]
+@ CHECK: vld1.16 {d0, d1}, [r4:128], r6  @ encoding: [0x24,0xf9,0x66,0x0a]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0, d1}, [r4:256], r6
+@ CHECK-ERRORS:                               ^
+
+	vld1.16	{d0, d1, d2}, [r4]
+	vld1.16	{d0, d1, d2}, [r4:16]
+	vld1.16	{d0, d1, d2}, [r4:32]
+	vld1.16	{d0, d1, d2}, [r4:64]
+	vld1.16	{d0, d1, d2}, [r4:128]
+	vld1.16	{d0, d1, d2}, [r4:256]
+
+@ CHECK: vld1.16 {d0, d1, d2}, [r4]      @ encoding: [0x24,0xf9,0x4f,0x06]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0, d1, d2}, [r4:16]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0, d1, d2}, [r4:32]
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vld1.16 {d0, d1, d2}, [r4:64]   @ encoding: [0x24,0xf9,0x5f,0x06]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0, d1, d2}, [r4:128]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0, d1, d2}, [r4:256]
+@ CHECK-ERRORS:                                   ^
+
+	vld1.16	{d0, d1, d2}, [r4]!
+	vld1.16	{d0, d1, d2}, [r4:16]!
+	vld1.16	{d0, d1, d2}, [r4:32]!
+	vld1.16	{d0, d1, d2}, [r4:64]!
+	vld1.16	{d0, d1, d2}, [r4:128]!
+	vld1.16	{d0, d1, d2}, [r4:256]!
+
+@ CHECK: vld1.16 {d0, d1, d2}, [r4]!     @ encoding: [0x24,0xf9,0x4d,0x06]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0, d1, d2}, [r4:16]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0, d1, d2}, [r4:32]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vld1.16 {d0, d1, d2}, [r4:64]!  @ encoding: [0x24,0xf9,0x5d,0x06]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0, d1, d2}, [r4:128]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0, d1, d2}, [r4:256]!
+@ CHECK-ERRORS:                                   ^
+
+	vld1.16	{d0, d1, d2}, [r4], r6
+	vld1.16	{d0, d1, d2}, [r4:16], r6
+	vld1.16	{d0, d1, d2}, [r4:32], r6
+	vld1.16	{d0, d1, d2}, [r4:64], r6
+	vld1.16	{d0, d1, d2}, [r4:128], r6
+	vld1.16	{d0, d1, d2}, [r4:256], r6
+
+@ CHECK: vld1.16 {d0, d1, d2}, [r4], r6  @ encoding: [0x24,0xf9,0x46,0x06]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0, d1, d2}, [r4:16], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0, d1, d2}, [r4:32], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vld1.16 {d0, d1, d2}, [r4:64], r6 @ encoding: [0x24,0xf9,0x56,0x06]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0, d1, d2}, [r4:128], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0, d1, d2}, [r4:256], r6
+@ CHECK-ERRORS:                                   ^
+
+	vld1.16	{d0, d1, d2, d3}, [r4]
+	vld1.16	{d0, d1, d2, d3}, [r4:16]
+	vld1.16	{d0, d1, d2, d3}, [r4:32]
+	vld1.16	{d0, d1, d2, d3}, [r4:64]
+	vld1.16	{d0, d1, d2, d3}, [r4:128]
+	vld1.16	{d0, d1, d2, d3}, [r4:256]
+
+@ CHECK: vld1.16 {d0, d1, d2, d3}, [r4]  @ encoding: [0x24,0xf9,0x4f,0x02]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0, d1, d2, d3}, [r4:16]
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0, d1, d2, d3}, [r4:32]
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vld1.16 {d0, d1, d2, d3}, [r4:64] @ encoding: [0x24,0xf9,0x5f,0x02]
+@ CHECK: vld1.16 {d0, d1, d2, d3}, [r4:128] @ encoding: [0x24,0xf9,0x6f,0x02]
+@ CHECK: vld1.16 {d0, d1, d2, d3}, [r4:256] @ encoding: [0x24,0xf9,0x7f,0x02]
+
+	vld1.16	{d0, d1, d2, d3}, [r4]!
+	vld1.16	{d0, d1, d2, d3}, [r4:16]!
+	vld1.16	{d0, d1, d2, d3}, [r4:32]!
+	vld1.16	{d0, d1, d2, d3}, [r4:64]!
+	vld1.16	{d0, d1, d2, d3}, [r4:128]!
+	vld1.16	{d0, d1, d2, d3}, [r4:256]!
+
+@ CHECK: vld1.16 {d0, d1, d2, d3}, [r4]! @ encoding: [0x24,0xf9,0x4d,0x02]
+@ CHECK-ERRORS:         vld1.16 {d0, d1, d2, d3}, [r4:16]!
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0, d1, d2, d3}, [r4:32]!
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vld1.16 {d0, d1, d2, d3}, [r4:64]! @ encoding: [0x24,0xf9,0x5d,0x02]
+@ CHECK: vld1.16 {d0, d1, d2, d3}, [r4:128]! @ encoding: [0x24,0xf9,0x6d,0x02]
+@ CHECK: vld1.16 {d0, d1, d2, d3}, [r4:256]! @ encoding: [0x24,0xf9,0x7d,0x02]
+
+	vld1.16	{d0, d1, d2, d3}, [r4], r6
+	vld1.16	{d0, d1, d2, d3}, [r4:16], r6
+	vld1.16	{d0, d1, d2, d3}, [r4:32], r6
+	vld1.16	{d0, d1, d2, d3}, [r4:64], r6
+	vld1.16	{d0, d1, d2, d3}, [r4:128], r6
+	vld1.16	{d0, d1, d2, d3}, [r4:256], r6
+
+@ CHECK: vld1.16 {d0, d1, d2, d3}, [r4], r6 @ encoding: [0x24,0xf9,0x46,0x02]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0, d1, d2, d3}, [r4:16], r6
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0, d1, d2, d3}, [r4:32], r6
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vld1.16 {d0, d1, d2, d3}, [r4:64], r6 @ encoding: [0x24,0xf9,0x56,0x02]
+@ CHECK: vld1.16 {d0, d1, d2, d3}, [r4:128], r6 @ encoding: [0x24,0xf9,0x66,0x02]
+@ CHECK: vld1.16 {d0, d1, d2, d3}, [r4:256], r6 @ encoding: [0x24,0xf9,0x76,0x02]
+
+	vld1.16	{d0[2]}, [r4]
+	vld1.16	{d0[2]}, [r4:16]
+	vld1.16	{d0[2]}, [r4:32]
+	vld1.16	{d0[2]}, [r4:64]
+	vld1.16	{d0[2]}, [r4:128]
+	vld1.16	{d0[2]}, [r4:256]
+
+@ CHECK: vld1.16 {d0[2]}, [r4]           @ encoding: [0xa4,0xf9,0x8f,0x04]
+@ CHECK: vld1.16 {d0[2]}, [r4:16]        @ encoding: [0xa4,0xf9,0x9f,0x04]
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0[2]}, [r4:32]
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0[2]}, [r4:64]
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0[2]}, [r4:128]
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0[2]}, [r4:256]
+@ CHECK-ERRORS:                              ^
+
+	vld1.16	{d0[2]}, [r4]!
+	vld1.16	{d0[2]}, [r4:16]!
+	vld1.16	{d0[2]}, [r4:32]!
+	vld1.16	{d0[2]}, [r4:64]!
+	vld1.16	{d0[2]}, [r4:128]!
+	vld1.16	{d0[2]}, [r4:256]!
+
+@ CHECK: vld1.16 {d0[2]}, [r4]!          @ encoding: [0xa4,0xf9,0x8d,0x04]
+@ CHECK: vld1.16 {d0[2]}, [r4:16]!       @ encoding: [0xa4,0xf9,0x9d,0x04]
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0[2]}, [r4:32]!
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0[2]}, [r4:64]!
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0[2]}, [r4:128]!
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0[2]}, [r4:256]!
+@ CHECK-ERRORS:                              ^
+
+	vld1.16	{d0[2]}, [r4], r6
+	vld1.16	{d0[2]}, [r4:16], r6
+	vld1.16	{d0[2]}, [r4:32], r6
+	vld1.16	{d0[2]}, [r4:64], r6
+	vld1.16	{d0[2]}, [r4:128], r6
+	vld1.16	{d0[2]}, [r4:256], r6
+
+@ CHECK: vld1.16 {d0[2]}, [r4], r6       @ encoding: [0xa4,0xf9,0x86,0x04]
+@ CHECK: vld1.16 {d0[2]}, [r4:16], r6    @ encoding: [0xa4,0xf9,0x96,0x04]
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0[2]}, [r4:32], r6
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0[2]}, [r4:64], r6
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0[2]}, [r4:128], r6
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0[2]}, [r4:256], r6
+@ CHECK-ERRORS:                              ^
+
+	vld1.16	{d0[]}, [r4]
+	vld1.16	{d0[]}, [r4:16]
+	vld1.16	{d0[]}, [r4:32]
+	vld1.16	{d0[]}, [r4:64]
+	vld1.16	{d0[]}, [r4:128]
+	vld1.16	{d0[]}, [r4:256]
+
+@ CHECK: vld1.16 {d0[]}, [r4]            @ encoding: [0xa4,0xf9,0x4f,0x0c]
+@ CHECK: vld1.16 {d0[]}, [r4:16]         @ encoding: [0xa4,0xf9,0x5f,0x0c]
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0[]}, [r4:32]
+@ CHECK-ERRORS:                             ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0[]}, [r4:64]
+@ CHECK-ERRORS:                             ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0[]}, [r4:128]
+@ CHECK-ERRORS:                             ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0[]}, [r4:256]
+@ CHECK-ERRORS:                             ^
+
+	vld1.16	{d0[]}, [r4]!
+	vld1.16	{d0[]}, [r4:16]!
+	vld1.16	{d0[]}, [r4:32]!
+	vld1.16	{d0[]}, [r4:64]!
+	vld1.16	{d0[]}, [r4:128]!
+	vld1.16	{d0[]}, [r4:256]!
+
+@ CHECK: vld1.16 {d0[]}, [r4]!           @ encoding: [0xa4,0xf9,0x4d,0x0c]
+@ CHECK: vld1.16 {d0[]}, [r4:16]!        @ encoding: [0xa4,0xf9,0x5d,0x0c]
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0[]}, [r4:32]!
+@ CHECK-ERRORS:                             ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0[]}, [r4:64]!
+@ CHECK-ERRORS:                             ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0[]}, [r4:128]!
+@ CHECK-ERRORS:                             ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0[]}, [r4:256]!
+@ CHECK-ERRORS:                             ^
+
+	vld1.16	{d0[]}, [r4], r6
+	vld1.16	{d0[]}, [r4:16], r6
+	vld1.16	{d0[]}, [r4:32], r6
+	vld1.16	{d0[]}, [r4:64], r6
+	vld1.16	{d0[]}, [r4:128], r6
+	vld1.16	{d0[]}, [r4:256], r6
+
+@ CHECK: vld1.16 {d0[]}, [r4], r6        @ encoding: [0xa4,0xf9,0x46,0x0c]
+@ CHECK: vld1.16 {d0[]}, [r4:16], r6     @ encoding: [0xa4,0xf9,0x56,0x0c]
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0[]}, [r4:32], r6
+@ CHECK-ERRORS:                             ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0[]}, [r4:64], r6
+@ CHECK-ERRORS:                             ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0[]}, [r4:128], r6
+@ CHECK-ERRORS:                             ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0[]}, [r4:256], r6
+@ CHECK-ERRORS:                             ^
+
+	vld1.16	{d0[], d1[]}, [r4]
+	vld1.16	{d0[], d1[]}, [r4:16]
+	vld1.16	{d0[], d1[]}, [r4:32]
+	vld1.16	{d0[], d1[]}, [r4:64]
+	vld1.16	{d0[], d1[]}, [r4:128]
+	vld1.16	{d0[], d1[]}, [r4:256]
+
+@ CHECK: vld1.16 {d0[], d1[]}, [r4]      @ encoding: [0xa4,0xf9,0x6f,0x0c]
+@ CHECK: vld1.16 {d0[], d1[]}, [r4:16]   @ encoding: [0xa4,0xf9,0x7f,0x0c]
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0[], d1[]}, [r4:32]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0[], d1[]}, [r4:64]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0[], d1[]}, [r4:128]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0[], d1[]}, [r4:256]
+@ CHECK-ERRORS:                                   ^
+
+	vld1.16	{d0[], d1[]}, [r4]!
+	vld1.16	{d0[], d1[]}, [r4:16]!
+	vld1.16	{d0[], d1[]}, [r4:32]!
+	vld1.16	{d0[], d1[]}, [r4:64]!
+	vld1.16	{d0[], d1[]}, [r4:128]!
+	vld1.16	{d0[], d1[]}, [r4:256]!
+
+@ CHECK: vld1.16 {d0[], d1[]}, [r4]!     @ encoding: [0xa4,0xf9,0x6d,0x0c]
+@ CHECK: vld1.16 {d0[], d1[]}, [r4:16]!  @ encoding: [0xa4,0xf9,0x7d,0x0c]
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0[], d1[]}, [r4:32]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0[], d1[]}, [r4:64]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0[], d1[]}, [r4:128]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0[], d1[]}, [r4:256]!
+@ CHECK-ERRORS:                                   ^
+
+	vld1.16	{d0[], d1[]}, [r4], r6
+	vld1.16	{d0[], d1[]}, [r4:16], r6
+	vld1.16	{d0[], d1[]}, [r4:32], r6
+	vld1.16	{d0[], d1[]}, [r4:64], r6
+	vld1.16	{d0[], d1[]}, [r4:128], r6
+	vld1.16	{d0[], d1[]}, [r4:256], r6
+
+@ CHECK: vld1.16 {d0[], d1[]}, [r4], r6  @ encoding: [0xa4,0xf9,0x66,0x0c]
+@ CHECK: vld1.16 {d0[], d1[]}, [r4:16], r6 @ encoding: [0xa4,0xf9,0x76,0x0c]
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0[], d1[]}, [r4:32], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0[], d1[]}, [r4:64], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0[], d1[]}, [r4:128], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld1.16 {d0[], d1[]}, [r4:256], r6
+@ CHECK-ERRORS:                                   ^
+
+	vld1.32	{d0}, [r4]
+	vld1.32	{d0}, [r4:16]
+	vld1.32	{d0}, [r4:32]
+	vld1.32	{d0}, [r4:64]
+	vld1.32	{d0}, [r4:128]
+	vld1.32	{d0}, [r4:256]
+
+@ CHECK: vld1.32 {d0}, [r4]              @ encoding: [0x24,0xf9,0x8f,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0}, [r4:16]
+@ CHECK-ERRORS:                           ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0}, [r4:32]
+@ CHECK-ERRORS:                           ^
+@ CHECK: vld1.32 {d0}, [r4:64]           @ encoding: [0x24,0xf9,0x9f,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0}, [r4:128]
+@ CHECK-ERRORS:                           ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0}, [r4:256]
+@ CHECK-ERRORS:                           ^
+
+	vld1.32	{d0}, [r4]!
+	vld1.32	{d0}, [r4:16]!
+	vld1.32	{d0}, [r4:32]!
+	vld1.32	{d0}, [r4:64]!
+	vld1.32	{d0}, [r4:128]!
+	vld1.32	{d0}, [r4:256]!
+
+@ CHECK: vld1.32 {d0}, [r4]!             @ encoding: [0x24,0xf9,0x8d,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0}, [r4:16]!
+@ CHECK-ERRORS:                           ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0}, [r4:32]!
+@ CHECK-ERRORS:                           ^
+@ CHECK: vld1.32 {d0}, [r4:64]!          @ encoding: [0x24,0xf9,0x9d,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0}, [r4:128]!
+@ CHECK-ERRORS:                           ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0}, [r4:256]!
+@ CHECK-ERRORS:                           ^
+
+	vld1.32	{d0}, [r4], r6
+	vld1.32	{d0}, [r4:16], r6
+	vld1.32	{d0}, [r4:32], r6
+	vld1.32	{d0}, [r4:64], r6
+	vld1.32	{d0}, [r4:128], r6
+	vld1.32	{d0}, [r4:256], r6
+
+@ CHECK: vld1.32 {d0}, [r4], r6          @ encoding: [0x24,0xf9,0x86,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0}, [r4:16], r6
+@ CHECK-ERRORS:                           ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0}, [r4:32], r6
+@ CHECK-ERRORS:                           ^
+@ CHECK: vld1.32 {d0}, [r4:64], r6       @ encoding: [0x24,0xf9,0x96,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0}, [r4:128], r6
+@ CHECK-ERRORS:                           ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0}, [r4:256], r6
+@ CHECK-ERRORS:                           ^
+
+	vld1.32	{d0, d1}, [r4]
+	vld1.32	{d0, d1}, [r4:16]
+	vld1.32	{d0, d1}, [r4:32]
+	vld1.32	{d0, d1}, [r4:64]
+	vld1.32	{d0, d1}, [r4:128]
+	vld1.32	{d0, d1}, [r4:256]
+
+@ CHECK: vld1.32 {d0, d1}, [r4]          @ encoding: [0x24,0xf9,0x8f,0x0a]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0, d1}, [r4:16]
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0, d1}, [r4:32]
+@ CHECK-ERRORS:                               ^
+@ CHECK: vld1.32 {d0, d1}, [r4:64]       @ encoding: [0x24,0xf9,0x9f,0x0a]
+@ CHECK: vld1.32 {d0, d1}, [r4:128]      @ encoding: [0x24,0xf9,0xaf,0x0a]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0, d1}, [r4:256]
+@ CHECK-ERRORS:                               ^
+
+	vld1.32	{d0, d1}, [r4]!
+	vld1.32	{d0, d1}, [r4:16]!
+	vld1.32	{d0, d1}, [r4:32]!
+	vld1.32	{d0, d1}, [r4:64]!
+	vld1.32	{d0, d1}, [r4:128]!
+	vld1.32	{d0, d1}, [r4:256]!
+
+@ CHECK: vld1.32 {d0, d1}, [r4]!         @ encoding: [0x24,0xf9,0x8d,0x0a]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0, d1}, [r4:16]!
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0, d1}, [r4:32]!
+@ CHECK-ERRORS:                               ^
+@ CHECK: vld1.32 {d0, d1}, [r4:64]!      @ encoding: [0x24,0xf9,0x9d,0x0a]
+@ CHECK: vld1.32 {d0, d1}, [r4:128]!     @ encoding: [0x24,0xf9,0xad,0x0a]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0, d1}, [r4:256]!
+@ CHECK-ERRORS:                               ^
+
+	vld1.32	{d0, d1}, [r4], r6
+	vld1.32	{d0, d1}, [r4:16], r6
+	vld1.32	{d0, d1}, [r4:32], r6
+	vld1.32	{d0, d1}, [r4:64], r6
+	vld1.32	{d0, d1}, [r4:128], r6
+	vld1.32	{d0, d1}, [r4:256], r6
+
+@ CHECK: vld1.32 {d0, d1}, [r4], r6      @ encoding: [0x24,0xf9,0x86,0x0a]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0, d1}, [r4:16], r6
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0, d1}, [r4:32], r6
+@ CHECK-ERRORS:                               ^
+@ CHECK: vld1.32 {d0, d1}, [r4:64], r6   @ encoding: [0x24,0xf9,0x96,0x0a]
+@ CHECK: vld1.32 {d0, d1}, [r4:128], r6  @ encoding: [0x24,0xf9,0xa6,0x0a]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0, d1}, [r4:256], r6
+@ CHECK-ERRORS:                               ^
+
+	vld1.32	{d0, d1, d2}, [r4]
+	vld1.32	{d0, d1, d2}, [r4:16]
+	vld1.32	{d0, d1, d2}, [r4:32]
+	vld1.32	{d0, d1, d2}, [r4:64]
+	vld1.32	{d0, d1, d2}, [r4:128]
+	vld1.32	{d0, d1, d2}, [r4:256]
+
+@ CHECK: vld1.32 {d0, d1, d2}, [r4]      @ encoding: [0x24,0xf9,0x8f,0x06]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0, d1, d2}, [r4:16]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0, d1, d2}, [r4:32]
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vld1.32 {d0, d1, d2}, [r4:64]   @ encoding: [0x24,0xf9,0x9f,0x06]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0, d1, d2}, [r4:128]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0, d1, d2}, [r4:256]
+@ CHECK-ERRORS:                                   ^
+
+	vld1.32	{d0, d1, d2}, [r4]!
+	vld1.32	{d0, d1, d2}, [r4:16]!
+	vld1.32	{d0, d1, d2}, [r4:32]!
+	vld1.32	{d0, d1, d2}, [r4:64]!
+	vld1.32	{d0, d1, d2}, [r4:128]!
+	vld1.32	{d0, d1, d2}, [r4:256]!
+
+@ CHECK: vld1.32 {d0, d1, d2}, [r4]!     @ encoding: [0x24,0xf9,0x8d,0x06]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0, d1, d2}, [r4:16]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0, d1, d2}, [r4:32]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vld1.32 {d0, d1, d2}, [r4:64]!  @ encoding: [0x24,0xf9,0x9d,0x06]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0, d1, d2}, [r4:128]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0, d1, d2}, [r4:256]!
+@ CHECK-ERRORS:                                   ^
+
+	vld1.32	{d0, d1, d2}, [r4], r6
+	vld1.32	{d0, d1, d2}, [r4:16], r6
+	vld1.32	{d0, d1, d2}, [r4:32], r6
+	vld1.32	{d0, d1, d2}, [r4:64], r6
+	vld1.32	{d0, d1, d2}, [r4:128], r6
+	vld1.32	{d0, d1, d2}, [r4:256], r6
+
+@ CHECK: vld1.32 {d0, d1, d2}, [r4], r6  @ encoding: [0x24,0xf9,0x86,0x06]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0, d1, d2}, [r4:16], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0, d1, d2}, [r4:32], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vld1.32 {d0, d1, d2}, [r4:64], r6 @ encoding: [0x24,0xf9,0x96,0x06]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0, d1, d2}, [r4:128], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0, d1, d2}, [r4:256], r6
+@ CHECK-ERRORS:                                   ^
+
+	vld1.32	{d0, d1, d2, d3}, [r4]
+	vld1.32	{d0, d1, d2, d3}, [r4:16]
+	vld1.32	{d0, d1, d2, d3}, [r4:32]
+	vld1.32	{d0, d1, d2, d3}, [r4:64]
+	vld1.32	{d0, d1, d2, d3}, [r4:128]
+	vld1.32	{d0, d1, d2, d3}, [r4:256]
+
+@ CHECK: vld1.32 {d0, d1, d2, d3}, [r4]  @ encoding: [0x24,0xf9,0x8f,0x02]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0, d1, d2, d3}, [r4:16]
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0, d1, d2, d3}, [r4:32]
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vld1.32 {d0, d1, d2, d3}, [r4:64] @ encoding: [0x24,0xf9,0x9f,0x02]
+@ CHECK: vld1.32 {d0, d1, d2, d3}, [r4:128] @ encoding: [0x24,0xf9,0xaf,0x02]
+@ CHECK: vld1.32 {d0, d1, d2, d3}, [r4:256] @ encoding: [0x24,0xf9,0xbf,0x02]
+
+	vld1.32	{d0, d1, d2, d3}, [r4]!
+	vld1.32	{d0, d1, d2, d3}, [r4:16]!
+	vld1.32	{d0, d1, d2, d3}, [r4:32]!
+	vld1.32	{d0, d1, d2, d3}, [r4:64]!
+	vld1.32	{d0, d1, d2, d3}, [r4:128]!
+	vld1.32	{d0, d1, d2, d3}, [r4:256]!
+
+@ CHECK: vld1.32 {d0, d1, d2, d3}, [r4]! @ encoding: [0x24,0xf9,0x8d,0x02]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0, d1, d2, d3}, [r4:16]!
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0, d1, d2, d3}, [r4:32]!
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vld1.32 {d0, d1, d2, d3}, [r4:64]! @ encoding: [0x24,0xf9,0x9d,0x02]
+@ CHECK: vld1.32 {d0, d1, d2, d3}, [r4:128]! @ encoding: [0x24,0xf9,0xad,0x02]
+@ CHECK: vld1.32 {d0, d1, d2, d3}, [r4:256]! @ encoding: [0x24,0xf9,0xbd,0x02]
+
+	vld1.32	{d0, d1, d2, d3}, [r4], r6
+	vld1.32	{d0, d1, d2, d3}, [r4:16], r6
+	vld1.32	{d0, d1, d2, d3}, [r4:32], r6
+	vld1.32	{d0, d1, d2, d3}, [r4:64], r6
+	vld1.32	{d0, d1, d2, d3}, [r4:128], r6
+	vld1.32	{d0, d1, d2, d3}, [r4:256], r6
+
+@ CHECK: vld1.32 {d0, d1, d2, d3}, [r4], r6 @ encoding: [0x24,0xf9,0x86,0x02]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0, d1, d2, d3}, [r4:16], r6
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0, d1, d2, d3}, [r4:32], r6
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vld1.32 {d0, d1, d2, d3}, [r4:64], r6 @ encoding: [0x24,0xf9,0x96,0x02]
+@ CHECK: vld1.32 {d0, d1, d2, d3}, [r4:128], r6 @ encoding: [0x24,0xf9,0xa6,0x02]
+@ CHECK: vld1.32 {d0, d1, d2, d3}, [r4:256], r6 @ encoding: [0x24,0xf9,0xb6,0x02]
+
+	vld1.32	{d0[1]}, [r4]
+	vld1.32	{d0[1]}, [r4:16]
+	vld1.32	{d0[1]}, [r4:32]
+	vld1.32	{d0[1]}, [r4:64]
+	vld1.32	{d0[1]}, [r4:128]
+	vld1.32	{d0[1]}, [r4:256]
+
+@ CHECK: vld1.32 {d0[1]}, [r4]           @ encoding: [0xa4,0xf9,0x8f,0x08]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0[1]}, [r4:16]
+@ CHECK-ERRORS:                              ^
+@ CHECK: vld1.32 {d0[1]}, [r4:32]        @ encoding: [0xa4,0xf9,0xbf,0x08]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0[1]}, [r4:64]
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0[1]}, [r4:128]
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0[1]}, [r4:256]
+@ CHECK-ERRORS:                              ^
+
+	vld1.32	{d0[1]}, [r4]!
+	vld1.32	{d0[1]}, [r4:16]!
+	vld1.32	{d0[1]}, [r4:32]!
+	vld1.32	{d0[1]}, [r4:64]!
+	vld1.32	{d0[1]}, [r4:128]!
+	vld1.32	{d0[1]}, [r4:256]!
+
+@ CHECK: vld1.32 {d0[1]}, [r4]!          @ encoding: [0xa4,0xf9,0x8d,0x08]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0[1]}, [r4:16]!
+@ CHECK-ERRORS:                              ^
+@ CHECK: vld1.32 {d0[1]}, [r4:32]!       @ encoding: [0xa4,0xf9,0xbd,0x08]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0[1]}, [r4:64]!
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0[1]}, [r4:128]!
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0[1]}, [r4:256]!
+@ CHECK-ERRORS:                              ^
+
+	vld1.32	{d0[1]}, [r4], r6
+	vld1.32	{d0[1]}, [r4:16], r6
+	vld1.32	{d0[1]}, [r4:32], r6
+	vld1.32	{d0[1]}, [r4:64], r6
+	vld1.32	{d0[1]}, [r4:128], r6
+	vld1.32	{d0[1]}, [r4:256], r6
+
+@ CHECK: vld1.32 {d0[1]}, [r4], r6       @ encoding: [0xa4,0xf9,0x86,0x08]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0[1]}, [r4:16], r6
+@ CHECK-ERRORS:                              ^
+@ CHECK: vld1.32 {d0[1]}, [r4:32], r6    @ encoding: [0xa4,0xf9,0xb6,0x08]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0[1]}, [r4:64], r6
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0[1]}, [r4:128], r6
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0[1]}, [r4:256], r6
+@ CHECK-ERRORS:                              ^
+
+	vld1.32	{d0[]}, [r4]
+	vld1.32	{d0[]}, [r4:16]
+	vld1.32	{d0[]}, [r4:32]
+	vld1.32	{d0[]}, [r4:64]
+	vld1.32	{d0[]}, [r4:128]
+	vld1.32	{d0[]}, [r4:256]
+
+@ CHECK: vld1.32 {d0[]}, [r4]            @ encoding: [0xa4,0xf9,0x8f,0x0c]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0[]}, [r4:16]
+@ CHECK-ERRORS:                             ^
+@ CHECK: vld1.32 {d0[]}, [r4:32]         @ encoding: [0xa4,0xf9,0x9f,0x0c]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0[]}, [r4:64]
+@ CHECK-ERRORS:                             ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0[]}, [r4:128]
+@ CHECK-ERRORS:                             ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0[]}, [r4:256]
+@ CHECK-ERRORS:                             ^
+
+	vld1.32	{d0[]}, [r4]!
+	vld1.32	{d0[]}, [r4:16]!
+	vld1.32	{d0[]}, [r4:32]!
+	vld1.32	{d0[]}, [r4:64]!
+	vld1.32	{d0[]}, [r4:128]!
+	vld1.32	{d0[]}, [r4:256]!
+
+@ CHECK: vld1.32 {d0[]}, [r4]!           @ encoding: [0xa4,0xf9,0x8d,0x0c]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0[]}, [r4:16]!
+@ CHECK-ERRORS:                             ^
+@ CHECK: vld1.32 {d0[]}, [r4:32]!        @ encoding: [0xa4,0xf9,0x9d,0x0c]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0[]}, [r4:64]!
+@ CHECK-ERRORS:                             ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0[]}, [r4:128]!
+@ CHECK-ERRORS:                             ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0[]}, [r4:256]!
+@ CHECK-ERRORS:                             ^
+
+	vld1.32	{d0[]}, [r4], r6
+	vld1.32	{d0[]}, [r4:16], r6
+	vld1.32	{d0[]}, [r4:32], r6
+	vld1.32	{d0[]}, [r4:64], r6
+	vld1.32	{d0[]}, [r4:128], r6
+	vld1.32	{d0[]}, [r4:256], r6
+
+@ CHECK: vld1.32 {d0[]}, [r4], r6        @ encoding: [0xa4,0xf9,0x86,0x0c]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0[]}, [r4:16], r6
+@ CHECK-ERRORS:                             ^
+@ CHECK: vld1.32 {d0[]}, [r4:32], r6     @ encoding: [0xa4,0xf9,0x96,0x0c]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0[]}, [r4:64], r6
+@ CHECK-ERRORS:                             ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0[]}, [r4:128], r6
+@ CHECK-ERRORS:                             ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0[]}, [r4:256], r6
+@ CHECK-ERRORS:                             ^
+
+	vld1.32	{d0[], d1[]}, [r4]
+	vld1.32	{d0[], d1[]}, [r4:16]
+	vld1.32	{d0[], d1[]}, [r4:32]
+	vld1.32	{d0[], d1[]}, [r4:64]
+	vld1.32	{d0[], d1[]}, [r4:128]
+	vld1.32	{d0[], d1[]}, [r4:256]
+
+@ CHECK: vld1.32 {d0[], d1[]}, [r4]      @ encoding: [0xa4,0xf9,0xaf,0x0c]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0[], d1[]}, [r4:16]
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vld1.32 {d0[], d1[]}, [r4:32]   @ encoding: [0xa4,0xf9,0xbf,0x0c]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0[], d1[]}, [r4:64]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0[], d1[]}, [r4:128]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0[], d1[]}, [r4:256]
+@ CHECK-ERRORS:                                   ^
+
+	vld1.32	{d0[], d1[]}, [r4]!
+	vld1.32	{d0[], d1[]}, [r4:16]!
+	vld1.32	{d0[], d1[]}, [r4:32]!
+	vld1.32	{d0[], d1[]}, [r4:64]!
+	vld1.32	{d0[], d1[]}, [r4:128]!
+	vld1.32	{d0[], d1[]}, [r4:256]!
+
+@ CHECK: vld1.32 {d0[], d1[]}, [r4]!     @ encoding: [0xa4,0xf9,0xad,0x0c]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0[], d1[]}, [r4:16]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vld1.32 {d0[], d1[]}, [r4:32]!  @ encoding: [0xa4,0xf9,0xbd,0x0c]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0[], d1[]}, [r4:64]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0[], d1[]}, [r4:128]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0[], d1[]}, [r4:256]!
+@ CHECK-ERRORS:                                   ^
+
+	vld1.32	{d0[], d1[]}, [r4], r6
+	vld1.32	{d0[], d1[]}, [r4:16], r6
+	vld1.32	{d0[], d1[]}, [r4:32], r6
+	vld1.32	{d0[], d1[]}, [r4:64], r6
+	vld1.32	{d0[], d1[]}, [r4:128], r6
+	vld1.32	{d0[], d1[]}, [r4:256], r6
+
+@ CHECK: vld1.32 {d0[], d1[]}, [r4], r6  @ encoding: [0xa4,0xf9,0xa6,0x0c]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0[], d1[]}, [r4:16], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vld1.32 {d0[], d1[]}, [r4:32], r6 @ encoding: [0xa4,0xf9,0xb6,0x0c]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0[], d1[]}, [r4:64], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0[], d1[]}, [r4:128], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0[], d1[]}, [r4:256], r6
+@ CHECK-ERRORS:                                   ^
+
+	vld1.32	{d0[1]}, [r4]
+	vld1.32	{d0[1]}, [r4:16]
+	vld1.32	{d0[1]}, [r4:32]
+	vld1.32	{d0[1]}, [r4:64]
+	vld1.32	{d0[1]}, [r4:128]
+	vld1.32	{d0[1]}, [r4:256]
+
+@ CHECK: vld1.32 {d0[1]}, [r4]           @ encoding: [0xa4,0xf9,0x8f,0x08]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0[1]}, [r4:16]
+@ CHECK-ERRORS:                              ^
+@ CHECK: vld1.32 {d0[1]}, [r4:32]        @ encoding: [0xa4,0xf9,0xbf,0x08]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0[1]}, [r4:64]
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0[1]}, [r4:128]
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0[1]}, [r4:256]
+@ CHECK-ERRORS:                              ^
+
+	vld1.32	{d0[1]}, [r4]!
+	vld1.32	{d0[1]}, [r4:16]!
+	vld1.32	{d0[1]}, [r4:32]!
+	vld1.32	{d0[1]}, [r4:64]!
+	vld1.32	{d0[1]}, [r4:128]!
+	vld1.32	{d0[1]}, [r4:256]!
+
+@ CHECK: vld1.32 {d0[1]}, [r4]!          @ encoding: [0xa4,0xf9,0x8d,0x08]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0[1]}, [r4:16]!
+@ CHECK-ERRORS:                              ^
+@ CHECK: vld1.32 {d0[1]}, [r4:32]!       @ encoding: [0xa4,0xf9,0xbd,0x08]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0[1]}, [r4:64]!
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0[1]}, [r4:128]!
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0[1]}, [r4:256]!
+@ CHECK-ERRORS:                              ^
+
+	vld1.32	{d0[1]}, [r4], r6
+	vld1.32	{d0[1]}, [r4:16], r6
+	vld1.32	{d0[1]}, [r4:32], r6
+	vld1.32	{d0[1]}, [r4:64], r6
+	vld1.32	{d0[1]}, [r4:128], r6
+	vld1.32	{d0[1]}, [r4:256], r6
+
+@ CHECK: vld1.32 {d0[1]}, [r4], r6       @ encoding: [0xa4,0xf9,0x86,0x08]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0[1]}, [r4:16], r6
+@ CHECK-ERRORS:                              ^
+@ CHECK: vld1.32 {d0[1]}, [r4:32], r6    @ encoding: [0xa4,0xf9,0xb6,0x08]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0[1]}, [r4:64], r6
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0[1]}, [r4:128], r6
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld1.32 {d0[1]}, [r4:256], r6
+@ CHECK-ERRORS:                              ^
+
+	vld1.64	{d0}, [r4]
+	vld1.64	{d0}, [r4:16]
+	vld1.64	{d0}, [r4:32]
+	vld1.64	{d0}, [r4:64]
+	vld1.64	{d0}, [r4:128]
+	vld1.64	{d0}, [r4:256]
+
+@ CHECK: vld1.64 {d0}, [r4]              @ encoding: [0x24,0xf9,0xcf,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.64 {d0}, [r4:16]
+@ CHECK-ERRORS:                           ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.64 {d0}, [r4:32]
+@ CHECK-ERRORS:                           ^
+@ CHECK: vld1.64 {d0}, [r4:64]           @ encoding: [0x24,0xf9,0xdf,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.64 {d0}, [r4:128]
+@ CHECK-ERRORS:                           ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.64 {d0}, [r4:256]
+@ CHECK-ERRORS:                           ^
+
+	vld1.64	{d0}, [r4]!
+	vld1.64	{d0}, [r4:16]!
+	vld1.64	{d0}, [r4:32]!
+	vld1.64	{d0}, [r4:64]!
+	vld1.64	{d0}, [r4:128]!
+	vld1.64	{d0}, [r4:256]!
+
+@ CHECK: vld1.64 {d0}, [r4]!             @ encoding: [0x24,0xf9,0xcd,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.64 {d0}, [r4:16]!
+@ CHECK-ERRORS:                           ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.64 {d0}, [r4:32]!
+@ CHECK-ERRORS:                           ^
+@ CHECK: vld1.64 {d0}, [r4:64]!          @ encoding: [0x24,0xf9,0xdd,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.64 {d0}, [r4:128]!
+@ CHECK-ERRORS:                           ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.64 {d0}, [r4:256]!
+@ CHECK-ERRORS:                           ^
+
+	vld1.64	{d0}, [r4], r6
+	vld1.64	{d0}, [r4:16], r6
+	vld1.64	{d0}, [r4:32], r6
+	vld1.64	{d0}, [r4:64], r6
+	vld1.64	{d0}, [r4:128], r6
+	vld1.64	{d0}, [r4:256], r6
+
+@ CHECK: vld1.64 {d0}, [r4], r6          @ encoding: [0x24,0xf9,0xc6,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.64 {d0}, [r4:16], r6
+@ CHECK-ERRORS:                           ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.64 {d0}, [r4:32], r6
+@ CHECK-ERRORS:                           ^
+@ CHECK: vld1.64 {d0}, [r4:64], r6       @ encoding: [0x24,0xf9,0xd6,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.64 {d0}, [r4:128], r6
+@ CHECK-ERRORS:                           ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.64 {d0}, [r4:256], r6
+@ CHECK-ERRORS:                           ^
+
+	vld1.64	{d0, d1}, [r4]
+	vld1.64	{d0, d1}, [r4:16]
+	vld1.64	{d0, d1}, [r4:32]
+	vld1.64	{d0, d1}, [r4:64]
+	vld1.64	{d0, d1}, [r4:128]
+	vld1.64	{d0, d1}, [r4:256]
+
+@ CHECK: vld1.64 {d0, d1}, [r4]          @ encoding: [0x24,0xf9,0xcf,0x0a]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld1.64 {d0, d1}, [r4:16]
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld1.64 {d0, d1}, [r4:32]
+@ CHECK-ERRORS:                               ^
+@ CHECK: vld1.64 {d0, d1}, [r4:64]       @ encoding: [0x24,0xf9,0xdf,0x0a]
+@ CHECK: vld1.64 {d0, d1}, [r4:128]      @ encoding: [0x24,0xf9,0xef,0x0a]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld1.64 {d0, d1}, [r4:256]
+@ CHECK-ERRORS:                               ^
+
+	vld1.64	{d0, d1}, [r4]!
+	vld1.64	{d0, d1}, [r4:16]!
+	vld1.64	{d0, d1}, [r4:32]!
+	vld1.64	{d0, d1}, [r4:64]!
+	vld1.64	{d0, d1}, [r4:128]!
+	vld1.64	{d0, d1}, [r4:256]!
+
+@ CHECK: vld1.64 {d0, d1}, [r4]!         @ encoding: [0x24,0xf9,0xcd,0x0a]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld1.64 {d0, d1}, [r4:16]!
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld1.64 {d0, d1}, [r4:32]!
+@ CHECK-ERRORS:                               ^
+@ CHECK: vld1.64 {d0, d1}, [r4:64]!      @ encoding: [0x24,0xf9,0xdd,0x0a]
+@ CHECK: vld1.64 {d0, d1}, [r4:128]!     @ encoding: [0x24,0xf9,0xed,0x0a]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld1.64 {d0, d1}, [r4:256]!
+@ CHECK-ERRORS:                               ^
+
+	vld1.64	{d0, d1}, [r4], r6
+	vld1.64	{d0, d1}, [r4:16], r6
+	vld1.64	{d0, d1}, [r4:32], r6
+	vld1.64	{d0, d1}, [r4:64], r6
+	vld1.64	{d0, d1}, [r4:128], r6
+	vld1.64	{d0, d1}, [r4:256], r6
+
+@ CHECK: vld1.64 {d0, d1}, [r4], r6      @ encoding: [0x24,0xf9,0xc6,0x0a]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld1.64 {d0, d1}, [r4:16], r6
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld1.64 {d0, d1}, [r4:32], r6
+@ CHECK-ERRORS:                               ^
+@ CHECK: vld1.64 {d0, d1}, [r4:64], r6   @ encoding: [0x24,0xf9,0xd6,0x0a]
+@ CHECK: vld1.64 {d0, d1}, [r4:128], r6  @ encoding: [0x24,0xf9,0xe6,0x0a]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld1.64 {d0, d1}, [r4:256], r6
+@ CHECK-ERRORS:                               ^
+
+	vld1.64	{d0, d1, d2}, [r4]
+	vld1.64	{d0, d1, d2}, [r4:16]
+	vld1.64	{d0, d1, d2}, [r4:32]
+	vld1.64	{d0, d1, d2}, [r4:64]
+	vld1.64	{d0, d1, d2}, [r4:128]
+	vld1.64	{d0, d1, d2}, [r4:256]
+
+@ CHECK: vld1.64 {d0, d1, d2}, [r4]      @ encoding: [0x24,0xf9,0xcf,0x06]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.64 {d0, d1, d2}, [r4:16]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.64 {d0, d1, d2}, [r4:32]
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vld1.64 {d0, d1, d2}, [r4:64]   @ encoding: [0x24,0xf9,0xdf,0x06]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.64 {d0, d1, d2}, [r4:128]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.64 {d0, d1, d2}, [r4:256]
+@ CHECK-ERRORS:                                   ^
+
+	vld1.64	{d0, d1, d2}, [r4]!
+	vld1.64	{d0, d1, d2}, [r4:16]!
+	vld1.64	{d0, d1, d2}, [r4:32]!
+	vld1.64	{d0, d1, d2}, [r4:64]!
+	vld1.64	{d0, d1, d2}, [r4:128]!
+	vld1.64	{d0, d1, d2}, [r4:256]!
+
+@ CHECK: vld1.64 {d0, d1, d2}, [r4]!     @ encoding: [0x24,0xf9,0xcd,0x06]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.64 {d0, d1, d2}, [r4:16]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.64 {d0, d1, d2}, [r4:32]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vld1.64 {d0, d1, d2}, [r4:64]!  @ encoding: [0x24,0xf9,0xdd,0x06]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.64 {d0, d1, d2}, [r4:128]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.64 {d0, d1, d2}, [r4:256]!
+@ CHECK-ERRORS:                                   ^
+
+	vld1.64	{d0, d1, d2}, [r4], r6
+	vld1.64	{d0, d1, d2}, [r4:16], r6
+	vld1.64	{d0, d1, d2}, [r4:32], r6
+	vld1.64	{d0, d1, d2}, [r4:64], r6
+	vld1.64	{d0, d1, d2}, [r4:128], r6
+	vld1.64	{d0, d1, d2}, [r4:256], r6
+
+@ CHECK: vld1.64 {d0, d1, d2}, [r4], r6  @ encoding: [0x24,0xf9,0xc6,0x06]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.64 {d0, d1, d2}, [r4:16], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.64 {d0, d1, d2}, [r4:32], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vld1.64 {d0, d1, d2}, [r4:64], r6 @ encoding: [0x24,0xf9,0xd6,0x06]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.64 {d0, d1, d2}, [r4:128], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld1.64 {d0, d1, d2}, [r4:256], r6
+@ CHECK-ERRORS:                                   ^
+
+	vld1.64	{d0, d1, d2, d3}, [r4]
+	vld1.64	{d0, d1, d2, d3}, [r4:16]
+	vld1.64	{d0, d1, d2, d3}, [r4:32]
+	vld1.64	{d0, d1, d2, d3}, [r4:64]
+	vld1.64	{d0, d1, d2, d3}, [r4:128]
+	vld1.64	{d0, d1, d2, d3}, [r4:256]
+
+@ CHECK: vld1.64 {d0, d1, d2, d3}, [r4]  @ encoding: [0x24,0xf9,0xcf,0x02]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld1.64 {d0, d1, d2, d3}, [r4:16]
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld1.64 {d0, d1, d2, d3}, [r4:32]
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vld1.64 {d0, d1, d2, d3}, [r4:64] @ encoding: [0x24,0xf9,0xdf,0x02]
+@ CHECK: vld1.64 {d0, d1, d2, d3}, [r4:128] @ encoding: [0x24,0xf9,0xef,0x02]
+@ CHECK: vld1.64 {d0, d1, d2, d3}, [r4:256] @ encoding: [0x24,0xf9,0xff,0x02]
+
+	vld1.64	{d0, d1, d2, d3}, [r4]!
+	vld1.64	{d0, d1, d2, d3}, [r4:16]!
+	vld1.64	{d0, d1, d2, d3}, [r4:32]!
+	vld1.64	{d0, d1, d2, d3}, [r4:64]!
+	vld1.64	{d0, d1, d2, d3}, [r4:128]!
+	vld1.64	{d0, d1, d2, d3}, [r4:256]!
+
+@ CHECK: vld1.64 {d0, d1, d2, d3}, [r4]! @ encoding: [0x24,0xf9,0xcd,0x02]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld1.64 {d0, d1, d2, d3}, [r4:16]!
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld1.64 {d0, d1, d2, d3}, [r4:32]!
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vld1.64 {d0, d1, d2, d3}, [r4:64]! @ encoding: [0x24,0xf9,0xdd,0x02]
+@ CHECK: vld1.64 {d0, d1, d2, d3}, [r4:128]! @ encoding: [0x24,0xf9,0xed,0x02]
+@ CHECK: vld1.64 {d0, d1, d2, d3}, [r4:256]! @ encoding: [0x24,0xf9,0xfd,0x02]
+
+	vld1.64	{d0, d1, d2, d3}, [r4], r6
+	vld1.64	{d0, d1, d2, d3}, [r4:16], r6
+	vld1.64	{d0, d1, d2, d3}, [r4:32], r6
+	vld1.64	{d0, d1, d2, d3}, [r4:64], r6
+	vld1.64	{d0, d1, d2, d3}, [r4:128], r6
+	vld1.64	{d0, d1, d2, d3}, [r4:256], r6
+
+@ CHECK: vld1.64 {d0, d1, d2, d3}, [r4], r6 @ encoding: [0x24,0xf9,0xc6,0x02]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld1.64 {d0, d1, d2, d3}, [r4:16], r6
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld1.64 {d0, d1, d2, d3}, [r4:32], r6
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vld1.64 {d0, d1, d2, d3}, [r4:64], r6 @ encoding: [0x24,0xf9,0xd6,0x02]
+@ CHECK: vld1.64 {d0, d1, d2, d3}, [r4:128], r6 @ encoding: [0x24,0xf9,0xe6,0x02]
+@ CHECK: vld1.64 {d0, d1, d2, d3}, [r4:256], r6 @ encoding: [0x24,0xf9,0xf6,0x02]
+
+	vld2.8	{d0, d1}, [r4]
+	vld2.8	{d0, d1}, [r4:16]
+	vld2.8	{d0, d1}, [r4:32]
+	vld2.8	{d0, d1}, [r4:64]
+	vld2.8	{d0, d1}, [r4:128]
+	vld2.8	{d0, d1}, [r4:256]
+
+@ CHECK: vld2.8 {d0, d1}, [r4]          @ encoding: [0x24,0xf9,0x0f,0x08]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0, d1}, [r4:16]
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0, d1}, [r4:32]
+@ CHECK-ERRORS:                               ^
+@ CHECK: vld2.8 {d0, d1}, [r4:64]       @ encoding: [0x24,0xf9,0x1f,0x08]
+@ CHECK: vld2.8 {d0, d1}, [r4:128]      @ encoding: [0x24,0xf9,0x2f,0x08]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0, d1}, [r4:256]
+@ CHECK-ERRORS:                               ^
+
+	vld2.8	{d0, d1}, [r4]!
+	vld2.8	{d0, d1}, [r4:16]!
+	vld2.8	{d0, d1}, [r4:32]!
+	vld2.8	{d0, d1}, [r4:64]!
+	vld2.8	{d0, d1}, [r4:128]!
+	vld2.8	{d0, d1}, [r4:256]!
+
+@ CHECK: vld2.8 {d0, d1}, [r4]!         @ encoding: [0x24,0xf9,0x0d,0x08]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0, d1}, [r4:16]!
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0, d1}, [r4:32]!
+@ CHECK-ERRORS:                               ^
+@ CHECK: vld2.8 {d0, d1}, [r4:64]!      @ encoding: [0x24,0xf9,0x1d,0x08]
+@ CHECK: vld2.8 {d0, d1}, [r4:128]!     @ encoding: [0x24,0xf9,0x2d,0x08]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0, d1}, [r4:256]!
+@ CHECK-ERRORS:                               ^
+
+	vld2.8	{d0, d1}, [r4], r6
+	vld2.8	{d0, d1}, [r4:16], r6
+	vld2.8	{d0, d1}, [r4:32], r6
+	vld2.8	{d0, d1}, [r4:64], r6
+	vld2.8	{d0, d1}, [r4:128], r6
+	vld2.8	{d0, d1}, [r4:256], r6
+
+@ CHECK: vld2.8 {d0, d1}, [r4], r6      @ encoding: [0x24,0xf9,0x06,0x08]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0, d1}, [r4:16], r6
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0, d1}, [r4:32], r6
+@ CHECK-ERRORS:                               ^
+@ CHECK: vld2.8 {d0, d1}, [r4:64], r6   @ encoding: [0x24,0xf9,0x16,0x08]
+@ CHECK: vld2.8 {d0, d1}, [r4:128], r6  @ encoding: [0x24,0xf9,0x26,0x08]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0, d1}, [r4:256], r6
+@ CHECK-ERRORS:                               ^
+
+	vld2.8	{d0, d2}, [r4]
+	vld2.8	{d0, d2}, [r4:16]
+	vld2.8	{d0, d2}, [r4:32]
+	vld2.8	{d0, d2}, [r4:64]
+	vld2.8	{d0, d2}, [r4:128]
+	vld2.8	{d0, d2}, [r4:256]
+
+@ CHECK: vld2.8 {d0, d2}, [r4]          @ encoding: [0x24,0xf9,0x0f,0x09]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0, d2}, [r4:16]
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0, d2}, [r4:32]
+@ CHECK-ERRORS:                               ^
+@ CHECK: vld2.8 {d0, d2}, [r4:64]       @ encoding: [0x24,0xf9,0x1f,0x09]
+@ CHECK: vld2.8 {d0, d2}, [r4:128]      @ encoding: [0x24,0xf9,0x2f,0x09]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0, d2}, [r4:256]
+@ CHECK-ERRORS:                               ^
+
+	vld2.8	{d0, d2}, [r4]!
+	vld2.8	{d0, d2}, [r4:16]!
+	vld2.8	{d0, d2}, [r4:32]!
+	vld2.8	{d0, d2}, [r4:64]!
+	vld2.8	{d0, d2}, [r4:128]!
+	vld2.8	{d0, d2}, [r4:256]!
+
+@ CHECK: vld2.8 {d0, d2}, [r4]!         @ encoding: [0x24,0xf9,0x0d,0x09]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0, d2}, [r4:16]!
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0, d2}, [r4:32]!
+@ CHECK-ERRORS:                               ^
+@ CHECK: vld2.8 {d0, d2}, [r4:64]!      @ encoding: [0x24,0xf9,0x1d,0x09]
+@ CHECK: vld2.8 {d0, d2}, [r4:128]!     @ encoding: [0x24,0xf9,0x2d,0x09]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0, d2}, [r4:256]!
+@ CHECK-ERRORS:                               ^
+
+	vld2.8	{d0, d2}, [r4], r6
+	vld2.8	{d0, d2}, [r4:16], r6
+	vld2.8	{d0, d2}, [r4:32], r6
+	vld2.8	{d0, d2}, [r4:64], r6
+	vld2.8	{d0, d2}, [r4:128], r6
+	vld2.8	{d0, d2}, [r4:256], r6
+
+@ CHECK: vld2.8 {d0, d2}, [r4], r6      @ encoding: [0x24,0xf9,0x06,0x09]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0, d2}, [r4:16], r6
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0, d2}, [r4:32], r6
+@ CHECK-ERRORS:                               ^
+@ CHECK: vld2.8 {d0, d2}, [r4:64], r6   @ encoding: [0x24,0xf9,0x16,0x09]
+@ CHECK: vld2.8 {d0, d2}, [r4:128], r6  @ encoding: [0x24,0xf9,0x26,0x09]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0, d2}, [r4:256], r6
+@ CHECK-ERRORS:                               ^
+
+	vld2.8	{d0, d1, d2, d3}, [r4]
+	vld2.8	{d0, d1, d2, d3}, [r4:16]
+	vld2.8	{d0, d1, d2, d3}, [r4:32]
+	vld2.8	{d0, d1, d2, d3}, [r4:64]
+	vld2.8	{d0, d1, d2, d3}, [r4:128]
+	vld2.8	{d0, d1, d2, d3}, [r4:256]
+
+@ CHECK: vld2.8 {d0, d1, d2, d3}, [r4]  @ encoding: [0x24,0xf9,0x0f,0x03]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0, d1, d2, d3}, [r4:16]
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0, d1, d2, d3}, [r4:32]
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vld2.8 {d0, d1, d2, d3}, [r4:64] @ encoding: [0x24,0xf9,0x1f,0x03]
+@ CHECK: vld2.8 {d0, d1, d2, d3}, [r4:128] @ encoding: [0x24,0xf9,0x2f,0x03]
+@ CHECK: vld2.8 {d0, d1, d2, d3}, [r4:256] @ encoding: [0x24,0xf9,0x3f,0x03]
+
+	vld2.8	{d0, d1, d2, d3}, [r4]!
+	vld2.8	{d0, d1, d2, d3}, [r4:16]!
+	vld2.8	{d0, d1, d2, d3}, [r4:32]!
+	vld2.8	{d0, d1, d2, d3}, [r4:64]!
+	vld2.8	{d0, d1, d2, d3}, [r4:128]!
+	vld2.8	{d0, d1, d2, d3}, [r4:256]!
+
+@ CHECK: vld2.8 {d0, d1, d2, d3}, [r4]! @ encoding: [0x24,0xf9,0x0d,0x03]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0, d1, d2, d3}, [r4:16]!
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0, d1, d2, d3}, [r4:32]!
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vld2.8 {d0, d1, d2, d3}, [r4:64]! @ encoding: [0x24,0xf9,0x1d,0x03]
+@ CHECK: vld2.8 {d0, d1, d2, d3}, [r4:128]! @ encoding: [0x24,0xf9,0x2d,0x03]
+@ CHECK: vld2.8 {d0, d1, d2, d3}, [r4:256]! @ encoding: [0x24,0xf9,0x3d,0x03]
+
+	vld2.8	{d0, d1, d2, d3}, [r4], r6
+	vld2.8	{d0, d1, d2, d3}, [r4:16], r6
+	vld2.8	{d0, d1, d2, d3}, [r4:32], r6
+	vld2.8	{d0, d1, d2, d3}, [r4:64], r6
+	vld2.8	{d0, d1, d2, d3}, [r4:128], r6
+	vld2.8	{d0, d1, d2, d3}, [r4:256], r6
+
+@ CHECK: vld2.8 {d0, d1, d2, d3}, [r4], r6 @ encoding: [0x24,0xf9,0x06,0x03]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0, d1, d2, d3}, [r4:16], r6
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0, d1, d2, d3}, [r4:32], r6
+@ CHECK: vld2.8 {d0, d1, d2, d3}, [r4:64], r6 @ encoding: [0x24,0xf9,0x16,0x03]
+@ CHECK: vld2.8 {d0, d1, d2, d3}, [r4:128], r6 @ encoding: [0x24,0xf9,0x26,0x03]
+@ CHECK: vld2.8 {d0, d1, d2, d3}, [r4:256], r6 @ encoding: [0x24,0xf9,0x36,0x03]
+
+	vld2.8	{d0[2], d1[2]}, [r4]
+	vld2.8	{d0[2], d1[2]}, [r4:16]
+	vld2.8	{d0[2], d1[2]}, [r4:32]
+	vld2.8	{d0[2], d1[2]}, [r4:64]
+	vld2.8	{d0[2], d1[2]}, [r4:128]
+	vld2.8	{d0[2], d1[2]}, [r4:256]
+
+@ CHECK: vld2.8 {d0[2], d1[2]}, [r4]    @ encoding: [0xa4,0xf9,0x4f,0x01]
+@ CHECK: vld2.8 {d0[2], d1[2]}, [r4:16] @ encoding: [0xa4,0xf9,0x5f,0x01]
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0[2], d1[2]}, [r4:32]
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0[2], d1[2]}, [r4:64]
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0[2], d1[2]}, [r4:128]
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0[2], d1[2]}, [r4:256]
+@ CHECK-ERRORS:                                     ^
+
+	vld2.8	{d0[2], d1[2]}, [r4]!
+	vld2.8	{d0[2], d1[2]}, [r4:16]!
+	vld2.8	{d0[2], d1[2]}, [r4:32]!
+	vld2.8	{d0[2], d1[2]}, [r4:64]!
+	vld2.8	{d0[2], d1[2]}, [r4:128]!
+	vld2.8	{d0[2], d1[2]}, [r4:256]!
+
+@ CHECK: vld2.8 {d0[2], d1[2]}, [r4]!   @ encoding: [0xa4,0xf9,0x4d,0x01]
+@ CHECK: vld2.8 {d0[2], d1[2]}, [r4:16]! @ encoding: [0xa4,0xf9,0x5d,0x01]
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0[2], d1[2]}, [r4:32]!
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0[2], d1[2]}, [r4:64]!
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0[2], d1[2]}, [r4:128]!
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0[2], d1[2]}, [r4:256]!
+@ CHECK-ERRORS:                                     ^
+
+	vld2.8	{d0[2], d1[2]}, [r4], r6
+	vld2.8	{d0[2], d1[2]}, [r4:16], r6
+	vld2.8	{d0[2], d1[2]}, [r4:32], r6
+	vld2.8	{d0[2], d1[2]}, [r4:64], r6
+	vld2.8	{d0[2], d1[2]}, [r4:128], r6
+	vld2.8	{d0[2], d1[2]}, [r4:256], r6
+
+@ CHECK: vld2.8 {d0[2], d1[2]}, [r4], r6 @ encoding: [0xa4,0xf9,0x46,0x01]
+@ CHECK: vld2.8 {d0[2], d1[2]}, [r4:16], r6 @ encoding: [0xa4,0xf9,0x56,0x01]
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0[2], d1[2]}, [r4:32], r6
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0[2], d1[2]}, [r4:64], r6
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0[2], d1[2]}, [r4:128], r6
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0[2], d1[2]}, [r4:256], r6
+@ CHECK-ERRORS:                                     ^
+
+	vld2.8	{d0[], d1[]}, [r4]
+	vld2.8	{d0[], d1[]}, [r4:16]
+	vld2.8	{d0[], d1[]}, [r4:32]
+	vld2.8	{d0[], d1[]}, [r4:64]
+	vld2.8	{d0[], d1[]}, [r4:128]
+	vld2.8	{d0[], d1[]}, [r4:256]
+
+@ CHECK: vld2.8 {d0[], d1[]}, [r4]      @ encoding: [0xa4,0xf9,0x0f,0x0d]
+@ CHECK: vld2.8 {d0[], d1[]}, [r4:16]   @ encoding: [0xa4,0xf9,0x1f,0x0d]
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0[], d1[]}, [r4:32]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0[], d1[]}, [r4:64]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0[], d1[]}, [r4:128]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0[], d1[]}, [r4:256]
+@ CHECK-ERRORS:                                   ^
+
+	vld2.8	{d0[], d1[]}, [r4]!
+	vld2.8	{d0[], d1[]}, [r4:16]!
+	vld2.8	{d0[], d1[]}, [r4:32]!
+	vld2.8	{d0[], d1[]}, [r4:64]!
+	vld2.8	{d0[], d1[]}, [r4:128]!
+	vld2.8	{d0[], d1[]}, [r4:256]!
+
+@ CHECK: vld2.8 {d0[], d1[]}, [r4]!     @ encoding: [0xa4,0xf9,0x0d,0x0d]
+@ CHECK: vld2.8 {d0[], d1[]}, [r4:16]!  @ encoding: [0xa4,0xf9,0x1d,0x0d]
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0[], d1[]}, [r4:32]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0[], d1[]}, [r4:64]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0[], d1[]}, [r4:128]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0[], d1[]}, [r4:256]!
+@ CHECK-ERRORS:                                   ^
+
+	vld2.8	{d0[], d1[]}, [r4], r6
+	vld2.8	{d0[], d1[]}, [r4:16], r6
+	vld2.8	{d0[], d1[]}, [r4:32], r6
+	vld2.8	{d0[], d1[]}, [r4:64], r6
+	vld2.8	{d0[], d1[]}, [r4:128], r6
+	vld2.8	{d0[], d1[]}, [r4:256], r6
+
+@ CHECK: vld2.8 {d0[], d1[]}, [r4], r6  @ encoding: [0xa4,0xf9,0x06,0x0d]
+@ CHECK: vld2.8 {d0[], d1[]}, [r4:16], r6 @ encoding: [0xa4,0xf9,0x16,0x0d]
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0[], d1[]}, [r4:32], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0[], d1[]}, [r4:64], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0[], d1[]}, [r4:128], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0[], d1[]}, [r4:256], r6
+@ CHECK-ERRORS:                                   ^
+
+	vld2.8	{d0[], d2[]}, [r4]
+	vld2.8	{d0[], d2[]}, [r4:16]
+	vld2.8	{d0[], d2[]}, [r4:32]
+	vld2.8	{d0[], d2[]}, [r4:64]
+	vld2.8	{d0[], d2[]}, [r4:128]
+	vld2.8	{d0[], d2[]}, [r4:256]
+
+@ CHECK: vld2.8 {d0[], d2[]}, [r4]      @ encoding: [0xa4,0xf9,0x2f,0x0d]
+@ CHECK: vld2.8 {d0[], d2[]}, [r4:16]   @ encoding: [0xa4,0xf9,0x3f,0x0d]
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0[], d2[]}, [r4:32]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0[], d2[]}, [r4:64]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0[], d2[]}, [r4:128]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0[], d2[]}, [r4:256]
+@ CHECK-ERRORS:                                   ^
+
+	vld2.8	{d0[], d2[]}, [r4]!
+	vld2.8	{d0[], d2[]}, [r4:16]!
+	vld2.8	{d0[], d2[]}, [r4:32]!
+	vld2.8	{d0[], d2[]}, [r4:64]!
+	vld2.8	{d0[], d2[]}, [r4:128]!
+	vld2.8	{d0[], d2[]}, [r4:256]!
+
+@ CHECK: vld2.8 {d0[], d2[]}, [r4]!     @ encoding: [0xa4,0xf9,0x2d,0x0d]
+@ CHECK: vld2.8 {d0[], d2[]}, [r4:16]!  @ encoding: [0xa4,0xf9,0x3d,0x0d]
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0[], d2[]}, [r4:32]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0[], d2[]}, [r4:64]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0[], d2[]}, [r4:128]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0[], d2[]}, [r4:256]!
+@ CHECK-ERRORS:                                   ^
+
+	vld2.8	{d0[], d2[]}, [r4], r6
+	vld2.8	{d0[], d2[]}, [r4:16], r6
+	vld2.8	{d0[], d2[]}, [r4:32], r6
+	vld2.8	{d0[], d2[]}, [r4:64], r6
+	vld2.8	{d0[], d2[]}, [r4:128], r6
+	vld2.8	{d0[], d2[]}, [r4:256], r6
+
+@ CHECK: vld2.8 {d0[], d2[]}, [r4], r6  @ encoding: [0xa4,0xf9,0x26,0x0d]
+@ CHECK: vld2.8 {d0[], d2[]}, [r4:16], r6 @ encoding: [0xa4,0xf9,0x36,0x0d]
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0[], d2[]}, [r4:32], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0[], d2[]}, [r4:64], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0[], d2[]}, [r4:128], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vld2.8  {d0[], d2[]}, [r4:256], r6
+@ CHECK-ERRORS:                                   ^
+
+	vld2.16	{d0, d1}, [r4]
+	vld2.16	{d0, d1}, [r4:16]
+	vld2.16	{d0, d1}, [r4:32]
+	vld2.16	{d0, d1}, [r4:64]
+	vld2.16	{d0, d1}, [r4:128]
+	vld2.16	{d0, d1}, [r4:256]
+
+@ CHECK: vld2.16 {d0, d1}, [r4]          @ encoding: [0x24,0xf9,0x4f,0x08]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0, d1}, [r4:16]
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0, d1}, [r4:32]
+@ CHECK-ERRORS:                               ^
+@ CHECK: vld2.16 {d0, d1}, [r4:64]       @ encoding: [0x24,0xf9,0x5f,0x08]
+@ CHECK: vld2.16 {d0, d1}, [r4:128]      @ encoding: [0x24,0xf9,0x6f,0x08]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0, d1}, [r4:256]
+@ CHECK-ERRORS:                               ^
+
+	vld2.16	{d0, d1}, [r4]!
+	vld2.16	{d0, d1}, [r4:16]!
+	vld2.16	{d0, d1}, [r4:32]!
+	vld2.16	{d0, d1}, [r4:64]!
+	vld2.16	{d0, d1}, [r4:128]!
+	vld2.16	{d0, d1}, [r4:256]!
+
+@ CHECK: vld2.16 {d0, d1}, [r4]!         @ encoding: [0x24,0xf9,0x4d,0x08]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0, d1}, [r4:16]!
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0, d1}, [r4:32]!
+@ CHECK-ERRORS:                               ^
+@ CHECK: vld2.16 {d0, d1}, [r4:64]!      @ encoding: [0x24,0xf9,0x5d,0x08]
+@ CHECK: vld2.16 {d0, d1}, [r4:128]!     @ encoding: [0x24,0xf9,0x6d,0x08]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0, d1}, [r4:256]!
+@ CHECK-ERRORS:                               ^
+
+	vld2.16	{d0, d1}, [r4], r6
+	vld2.16	{d0, d1}, [r4:16], r6
+	vld2.16	{d0, d1}, [r4:32], r6
+	vld2.16	{d0, d1}, [r4:64], r6
+	vld2.16	{d0, d1}, [r4:128], r6
+	vld2.16	{d0, d1}, [r4:256], r6
+
+@ CHECK: vld2.16 {d0, d1}, [r4], r6      @ encoding: [0x24,0xf9,0x46,0x08]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0, d1}, [r4:16], r6
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0, d1}, [r4:32], r6
+@ CHECK-ERRORS:                               ^
+@ CHECK: vld2.16 {d0, d1}, [r4:64], r6   @ encoding: [0x24,0xf9,0x56,0x08]
+@ CHECK: vld2.16 {d0, d1}, [r4:128], r6  @ encoding: [0x24,0xf9,0x66,0x08]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0, d1}, [r4:256], r6
+@ CHECK-ERRORS:                               ^
+
+	vld2.16	{d0, d2}, [r4]
+	vld2.16	{d0, d2}, [r4:16]
+	vld2.16	{d0, d2}, [r4:32]
+	vld2.16	{d0, d2}, [r4:64]
+	vld2.16	{d0, d2}, [r4:128]
+	vld2.16	{d0, d2}, [r4:256]
+
+@ CHECK: vld2.16 {d0, d2}, [r4]          @ encoding: [0x24,0xf9,0x4f,0x09]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0, d2}, [r4:16]
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0, d2}, [r4:32]
+@ CHECK-ERRORS:                               ^
+@ CHECK: vld2.16 {d0, d2}, [r4:64]       @ encoding: [0x24,0xf9,0x5f,0x09]
+@ CHECK: vld2.16 {d0, d2}, [r4:128]      @ encoding: [0x24,0xf9,0x6f,0x09]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0, d2}, [r4:256]
+@ CHECK-ERRORS:                               ^
+
+	vld2.16	{d0, d2}, [r4]!
+	vld2.16	{d0, d2}, [r4:16]!
+	vld2.16	{d0, d2}, [r4:32]!
+	vld2.16	{d0, d2}, [r4:64]!
+	vld2.16	{d0, d2}, [r4:128]!
+	vld2.16	{d0, d2}, [r4:256]!
+
+@ CHECK: vld2.16 {d0, d2}, [r4]!         @ encoding: [0x24,0xf9,0x4d,0x09]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0, d2}, [r4:16]!
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0, d2}, [r4:32]!
+@ CHECK-ERRORS:                               ^
+@ CHECK: vld2.16 {d0, d2}, [r4:64]!      @ encoding: [0x24,0xf9,0x5d,0x09]
+@ CHECK: vld2.16 {d0, d2}, [r4:128]!     @ encoding: [0x24,0xf9,0x6d,0x09]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0, d2}, [r4:256]!
+@ CHECK-ERRORS:                               ^
+
+	vld2.16	{d0, d2}, [r4], r6
+	vld2.16	{d0, d2}, [r4:16], r6
+	vld2.16	{d0, d2}, [r4:32], r6
+	vld2.16	{d0, d2}, [r4:64], r6
+	vld2.16	{d0, d2}, [r4:128], r6
+	vld2.16	{d0, d2}, [r4:256], r6
+
+@ CHECK: vld2.16 {d0, d2}, [r4], r6      @ encoding: [0x24,0xf9,0x46,0x09]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0, d2}, [r4:16], r6
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0, d2}, [r4:32], r6
+@ CHECK-ERRORS:                               ^
+@ CHECK: vld2.16 {d0, d2}, [r4:64], r6   @ encoding: [0x24,0xf9,0x56,0x09]
+@ CHECK: vld2.16 {d0, d2}, [r4:128], r6  @ encoding: [0x24,0xf9,0x66,0x09]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0, d2}, [r4:256], r6
+@ CHECK-ERRORS:                               ^
+
+	vld2.16	{d0, d1, d2, d3}, [r4]
+	vld2.16	{d0, d1, d2, d3}, [r4:16]
+	vld2.16	{d0, d1, d2, d3}, [r4:32]
+	vld2.16	{d0, d1, d2, d3}, [r4:64]
+	vld2.16	{d0, d1, d2, d3}, [r4:128]
+	vld2.16	{d0, d1, d2, d3}, [r4:256]
+
+@ CHECK: vld2.16 {d0, d1, d2, d3}, [r4]  @ encoding: [0x24,0xf9,0x4f,0x03]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0, d1, d2, d3}, [r4:16]
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0, d1, d2, d3}, [r4:32]
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vld2.16 {d0, d1, d2, d3}, [r4:64] @ encoding: [0x24,0xf9,0x5f,0x03]
+@ CHECK: vld2.16 {d0, d1, d2, d3}, [r4:128] @ encoding: [0x24,0xf9,0x6f,0x03]
+@ CHECK: vld2.16 {d0, d1, d2, d3}, [r4:256] @ encoding: [0x24,0xf9,0x7f,0x03]
+
+	vld2.16	{d0, d1, d2, d3}, [r4]!
+	vld2.16	{d0, d1, d2, d3}, [r4:16]!
+	vld2.16	{d0, d1, d2, d3}, [r4:32]!
+	vld2.16	{d0, d1, d2, d3}, [r4:64]!
+	vld2.16	{d0, d1, d2, d3}, [r4:128]!
+	vld2.16	{d0, d1, d2, d3}, [r4:256]!
+
+@ CHECK: vld2.16 {d0, d1, d2, d3}, [r4]! @ encoding: [0x24,0xf9,0x4d,0x03]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0, d1, d2, d3}, [r4:16]!
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0, d1, d2, d3}, [r4:32]!
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vld2.16 {d0, d1, d2, d3}, [r4:64]! @ encoding: [0x24,0xf9,0x5d,0x03]
+@ CHECK: vld2.16 {d0, d1, d2, d3}, [r4:128]! @ encoding: [0x24,0xf9,0x6d,0x03]
+@ CHECK: vld2.16 {d0, d1, d2, d3}, [r4:256]! @ encoding: [0x24,0xf9,0x7d,0x03]
+
+	vld2.16	{d0, d1, d2, d3}, [r4], r6
+	vld2.16	{d0, d1, d2, d3}, [r4:16], r6
+	vld2.16	{d0, d1, d2, d3}, [r4:32], r6
+	vld2.16	{d0, d1, d2, d3}, [r4:64], r6
+	vld2.16	{d0, d1, d2, d3}, [r4:128], r6
+	vld2.16	{d0, d1, d2, d3}, [r4:256], r6
+
+@ CHECK: vld2.16 {d0, d1, d2, d3}, [r4], r6 @ encoding: [0x24,0xf9,0x46,0x03]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0, d1, d2, d3}, [r4:16], r6
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0, d1, d2, d3}, [r4:32], r6
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vld2.16 {d0, d1, d2, d3}, [r4:64], r6 @ encoding: [0x24,0xf9,0x56,0x03]
+@ CHECK: vld2.16 {d0, d1, d2, d3}, [r4:128], r6 @ encoding: [0x24,0xf9,0x66,0x03]
+@ CHECK: vld2.16 {d0, d1, d2, d3}, [r4:256], r6 @ encoding: [0x24,0xf9,0x76,0x03]
+
+	vld2.16	{d0[2], d1[2]}, [r4]
+	vld2.16	{d0[2], d1[2]}, [r4:16]
+	vld2.16	{d0[2], d1[2]}, [r4:32]
+	vld2.16	{d0[2], d1[2]}, [r4:64]
+	vld2.16	{d0[2], d1[2]}, [r4:128]
+	vld2.16	{d0[2], d1[2]}, [r4:256]
+
+@ CHECK: vld2.16 {d0[2], d1[2]}, [r4]    @ encoding: [0xa4,0xf9,0x8f,0x05]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0[2], d1[2]}, [r4:16]
+@ CHECK-ERRORS:                                     ^
+@ CHECK: vld2.16 {d0[2], d1[2]}, [r4:32] @ encoding: [0xa4,0xf9,0x9f,0x05]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0[2], d1[2]}, [r4:64]
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0[2], d1[2]}, [r4:128]
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0[2], d1[2]}, [r4:256]
+@ CHECK-ERRORS:                                     ^
+
+	vld2.16	{d0[2], d1[2]}, [r4]!
+	vld2.16	{d0[2], d1[2]}, [r4:16]!
+	vld2.16	{d0[2], d1[2]}, [r4:32]!
+	vld2.16	{d0[2], d1[2]}, [r4:64]!
+	vld2.16	{d0[2], d1[2]}, [r4:128]!
+	vld2.16	{d0[2], d1[2]}, [r4:256]!
+
+@ CHECK: vld2.16 {d0[2], d1[2]}, [r4]!   @ encoding: [0xa4,0xf9,0x8d,0x05]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0[2], d1[2]}, [r4:16]!
+@ CHECK-ERRORS:                                     ^
+@ CHECK: vld2.16 {d0[2], d1[2]}, [r4:32]! @ encoding: [0xa4,0xf9,0x9d,0x05]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0[2], d1[2]}, [r4:64]!
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0[2], d1[2]}, [r4:128]!
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0[2], d1[2]}, [r4:256]!
+@ CHECK-ERRORS:                                     ^
+
+	vld2.16	{d0[2], d1[2]}, [r4], r6
+	vld2.16	{d0[2], d1[2]}, [r4:16], r6
+	vld2.16	{d0[2], d1[2]}, [r4:32], r6
+	vld2.16	{d0[2], d1[2]}, [r4:64], r6
+	vld2.16	{d0[2], d1[2]}, [r4:128], r6
+	vld2.16	{d0[2], d1[2]}, [r4:256], r6
+
+@ CHECK: vld2.16 {d0[2], d1[2]}, [r4], r6 @ encoding: [0xa4,0xf9,0x86,0x05]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0[2], d1[2]}, [r4:16], r6
+@ CHECK-ERRORS:                                     ^
+@ CHECK: vld2.16 {d0[2], d1[2]}, [r4:32], r6 @ encoding: [0xa4,0xf9,0x96,0x05]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0[2], d1[2]}, [r4:64], r6
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0[2], d1[2]}, [r4:128], r6
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0[2], d1[2]}, [r4:256], r6
+@ CHECK-ERRORS:                                     ^
+
+	vld2.16	{d0[2], d2[2]}, [r4]
+	vld2.16	{d0[2], d2[2]}, [r4:16]
+	vld2.16	{d0[2], d2[2]}, [r4:32]
+	vld2.16	{d0[2], d2[2]}, [r4:64]
+	vld2.16	{d0[2], d2[2]}, [r4:128]
+	vld2.16	{d0[2], d2[2]}, [r4:256]
+
+@ CHECK: vld2.16 {d0[2], d2[2]}, [r4]    @ encoding: [0xa4,0xf9,0xaf,0x05]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0[2], d2[2]}, [r4:16]
+@ CHECK-ERRORS:                                     ^
+@ CHECK: vld2.16 {d0[2], d2[2]}, [r4:32] @ encoding: [0xa4,0xf9,0xbf,0x05]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0[2], d2[2]}, [r4:64]
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0[2], d2[2]}, [r4:128]
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0[2], d2[2]}, [r4:256]
+@ CHECK-ERRORS:                                     ^
+
+	vld2.16	{d0[2], d2[2]}, [r4]!
+	vld2.16	{d0[2], d2[2]}, [r4:16]!
+	vld2.16	{d0[2], d2[2]}, [r4:32]!
+	vld2.16	{d0[2], d2[2]}, [r4:64]!
+	vld2.16	{d0[2], d2[2]}, [r4:128]!
+	vld2.16	{d0[2], d2[2]}, [r4:256]!
+
+@ CHECK: vld2.16 {d0[2], d1[2]}, [r4]!   @ encoding: [0xa4,0xf9,0xad,0x05]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0[2], d2[2]}, [r4:16]!
+@ CHECK-ERRORS:                                     ^
+@ CHECK: vld2.16 {d0[2], d1[2]}, [r4:32]! @ encoding: [0xa4,0xf9,0xbd,0x05]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0[2], d2[2]}, [r4:64]!
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0[2], d2[2]}, [r4:128]!
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0[2], d2[2]}, [r4:256]!
+@ CHECK-ERRORS:                                     ^
+
+	vld2.16	{d0[2], d2[2]}, [r4], r6
+	vld2.16	{d0[2], d2[2]}, [r4:16], r6
+	vld2.16	{d0[2], d2[2]}, [r4:32], r6
+	vld2.16	{d0[2], d2[2]}, [r4:64], r6
+	vld2.16	{d0[2], d2[2]}, [r4:128], r6
+	vld2.16	{d0[2], d2[2]}, [r4:256], r6
+
+@ CHECK: vld2.16 {d0[2], d2[2]}, [r4], r6 @ encoding: [0xa4,0xf9,0xa6,0x05]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0[2], d2[2]}, [r4:16], r6
+@ CHECK-ERRORS:                                     ^
+@ CHECK: vld2.16 {d0[2], d2[2]}, [r4:32], r6 @ encoding: [0xa4,0xf9,0xb6,0x05]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0[2], d2[2]}, [r4:64], r6
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0[2], d2[2]}, [r4:128], r6
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0[2], d2[2]}, [r4:256], r6
+@ CHECK-ERRORS:                                     ^
+
+	vld2.16	{d0[], d1[]}, [r4]
+	vld2.16	{d0[], d1[]}, [r4:16]
+	vld2.16	{d0[], d1[]}, [r4:32]
+	vld2.16	{d0[], d1[]}, [r4:64]
+	vld2.16	{d0[], d1[]}, [r4:128]
+	vld2.16	{d0[], d1[]}, [r4:256]
+
+@ CHECK: vld2.16 {d0[], d1[]}, [r4]      @ encoding: [0xa4,0xf9,0x4f,0x0d]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0[], d1[]}, [r4:16]
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vld2.16 {d0[], d1[]}, [r4:32]   @ encoding: [0xa4,0xf9,0x5f,0x0d]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0[], d1[]}, [r4:64]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0[], d1[]}, [r4:128]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0[], d1[]}, [r4:256]
+@ CHECK-ERRORS:                                   ^
+
+	vld2.16	{d0[], d1[]}, [r4]!
+	vld2.16	{d0[], d1[]}, [r4:16]!
+	vld2.16	{d0[], d1[]}, [r4:32]!
+	vld2.16	{d0[], d1[]}, [r4:64]!
+	vld2.16	{d0[], d1[]}, [r4:128]!
+	vld2.16	{d0[], d1[]}, [r4:256]!
+
+@ CHECK: vld2.16 {d0[], d1[]}, [r4]!     @ encoding: [0xa4,0xf9,0x4d,0x0d]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0[], d1[]}, [r4:16]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vld2.16 {d0[], d1[]}, [r4:32]!  @ encoding: [0xa4,0xf9,0x5d,0x0d]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0[], d1[]}, [r4:64]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0[], d1[]}, [r4:128]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0[], d1[]}, [r4:256]!
+@ CHECK-ERRORS:                                   ^
+
+	vld2.16	{d0[], d1[]}, [r4], r6
+	vld2.16	{d0[], d1[]}, [r4:16], r6
+	vld2.16	{d0[], d1[]}, [r4:32], r6
+	vld2.16	{d0[], d1[]}, [r4:64], r6
+	vld2.16	{d0[], d1[]}, [r4:128], r6
+	vld2.16	{d0[], d1[]}, [r4:256], r6
+
+@ CHECK: vld2.16 {d0[], d1[]}, [r4], r6  @ encoding: [0xa4,0xf9,0x46,0x0d]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0[], d1[]}, [r4:16], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vld2.16 {d0[], d1[]}, [r4:32], r6 @ encoding: [0xa4,0xf9,0x56,0x0d]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0[], d1[]}, [r4:64], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0[], d1[]}, [r4:128], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0[], d1[]}, [r4:256], r6
+@ CHECK-ERRORS:                                   ^
+
+	vld2.16	{d0[], d2[]}, [r4]
+	vld2.16	{d0[], d2[]}, [r4:16]
+	vld2.16	{d0[], d2[]}, [r4:32]
+	vld2.16	{d0[], d2[]}, [r4:64]
+	vld2.16	{d0[], d2[]}, [r4:128]
+	vld2.16	{d0[], d2[]}, [r4:256]
+
+@ CHECK: vld2.16 {d0[], d2[]}, [r4]      @ encoding: [0xa4,0xf9,0x6f,0x0d]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0[], d2[]}, [r4:16]
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vld2.16 {d0[], d2[]}, [r4:32]   @ encoding: [0xa4,0xf9,0x7f,0x0d]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0[], d2[]}, [r4:64]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0[], d2[]}, [r4:128]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0[], d2[]}, [r4:256]
+@ CHECK-ERRORS:                                   ^
+
+	vld2.16	{d0[], d2[]}, [r4]!
+	vld2.16	{d0[], d2[]}, [r4:16]!
+	vld2.16	{d0[], d2[]}, [r4:32]!
+	vld2.16	{d0[], d2[]}, [r4:64]!
+	vld2.16	{d0[], d2[]}, [r4:128]!
+	vld2.16	{d0[], d2[]}, [r4:256]!
+
+@ CHECK: vld2.16 {d0[], d2[]}, [r4]!     @ encoding: [0xa4,0xf9,0x6d,0x0d]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0[], d2[]}, [r4:16]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vld2.16 {d0[], d2[]}, [r4:32]!  @ encoding: [0xa4,0xf9,0x7d,0x0d]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0[], d2[]}, [r4:64]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0[], d2[]}, [r4:128]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0[], d2[]}, [r4:256]!
+
+	vld2.16	{d0[], d2[]}, [r4], r6
+	vld2.16	{d0[], d2[]}, [r4:16], r6
+	vld2.16	{d0[], d2[]}, [r4:32], r6
+	vld2.16	{d0[], d2[]}, [r4:64], r6
+	vld2.16	{d0[], d2[]}, [r4:128], r6
+	vld2.16	{d0[], d2[]}, [r4:256], r6
+
+@ CHECK: vld2.16 {d0[], d2[]}, [r4], r6  @ encoding: [0xa4,0xf9,0x66,0x0d]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0[], d2[]}, [r4:16], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vld2.16 {d0[], d2[]}, [r4:32], r6 @ encoding: [0xa4,0xf9,0x76,0x0d]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0[], d2[]}, [r4:64], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0[], d2[]}, [r4:128], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld2.16 {d0[], d2[]}, [r4:256], r6
+@ CHECK-ERRORS:                                   ^
+
+	vld2.32	{d0, d1}, [r4]
+	vld2.32	{d0, d1}, [r4:16]
+	vld2.32	{d0, d1}, [r4:32]
+	vld2.32	{d0, d1}, [r4:64]
+	vld2.32	{d0, d1}, [r4:128]
+	vld2.32	{d0, d1}, [r4:256]
+
+@ CHECK: vld2.32 {d0, d1}, [r4]          @ encoding: [0x24,0xf9,0x8f,0x08]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0, d1}, [r4:16]
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0, d1}, [r4:32]
+@ CHECK-ERRORS:                               ^
+@ CHECK: vld2.32 {d0, d1}, [r4:64]       @ encoding: [0x24,0xf9,0x9f,0x08]
+@ CHECK: vld2.32 {d0, d1}, [r4:128]      @ encoding: [0x24,0xf9,0xaf,0x08]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0, d1}, [r4:256]
+@ CHECK-ERRORS:                               ^
+
+	vld2.32	{d0, d1}, [r4]!
+	vld2.32	{d0, d1}, [r4:16]!
+	vld2.32	{d0, d1}, [r4:32]!
+	vld2.32	{d0, d1}, [r4:64]!
+	vld2.32	{d0, d1}, [r4:128]!
+	vld2.32	{d0, d1}, [r4:256]!
+
+@ CHECK: vld2.32 {d0, d1}, [r4]!         @ encoding: [0x24,0xf9,0x8d,0x08]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0, d1}, [r4:16]!
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0, d1}, [r4:32]!
+@ CHECK-ERRORS:                               ^
+@ CHECK: vld2.32 {d0, d1}, [r4:64]!      @ encoding: [0x24,0xf9,0x9d,0x08]
+@ CHECK: vld2.32 {d0, d1}, [r4:128]!     @ encoding: [0x24,0xf9,0xad,0x08]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0, d1}, [r4:256]!
+@ CHECK-ERRORS:                               ^
+
+	vld2.32	{d0, d1}, [r4], r6
+	vld2.32	{d0, d1}, [r4:16], r6
+	vld2.32	{d0, d1}, [r4:32], r6
+	vld2.32	{d0, d1}, [r4:64], r6
+	vld2.32	{d0, d1}, [r4:128], r6
+	vld2.32	{d0, d1}, [r4:256], r6
+
+@ CHECK: vld2.32 {d0, d1}, [r4], r6      @ encoding: [0x24,0xf9,0x86,0x08]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0, d1}, [r4:16], r6
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0, d1}, [r4:32], r6
+@ CHECK-ERRORS:                               ^
+@ CHECK: vld2.32 {d0, d1}, [r4:64], r6   @ encoding: [0x24,0xf9,0x96,0x08]
+@ CHECK: vld2.32 {d0, d1}, [r4:128], r6  @ encoding: [0x24,0xf9,0xa6,0x08]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0, d1}, [r4:256], r6
+@ CHECK-ERRORS:                               ^
+
+	vld2.32	{d0, d2}, [r4]
+	vld2.32	{d0, d2}, [r4:16]
+	vld2.32	{d0, d2}, [r4:32]
+	vld2.32	{d0, d2}, [r4:64]
+	vld2.32	{d0, d2}, [r4:128]
+	vld2.32	{d0, d2}, [r4:256]
+
+@ CHECK: vld2.32 {d0, d2}, [r4]          @ encoding: [0x24,0xf9,0x8f,0x09]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0, d2}, [r4:16]
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0, d2}, [r4:32]
+@ CHECK-ERRORS:                               ^
+@ CHECK: vld2.32 {d0, d2}, [r4:64]       @ encoding: [0x24,0xf9,0x9f,0x09]
+@ CHECK: vld2.32 {d0, d2}, [r4:128]      @ encoding: [0x24,0xf9,0xaf,0x09]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0, d2}, [r4:256]
+@ CHECK-ERRORS:                               ^
+
+	vld2.32	{d0, d2}, [r4]!
+	vld2.32	{d0, d2}, [r4:16]!
+	vld2.32	{d0, d2}, [r4:32]!
+	vld2.32	{d0, d2}, [r4:64]!
+	vld2.32	{d0, d2}, [r4:128]!
+	vld2.32	{d0, d2}, [r4:256]!
+
+@ CHECK: vld2.32 {d0, d2}, [r4]!         @ encoding: [0x24,0xf9,0x8d,0x09]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0, d2}, [r4:16]!
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0, d2}, [r4:32]!
+@ CHECK-ERRORS:                               ^
+@ CHECK: vld2.32 {d0, d2}, [r4:64]!      @ encoding: [0x24,0xf9,0x9d,0x09]
+@ CHECK: vld2.32 {d0, d2}, [r4:128]!     @ encoding: [0x24,0xf9,0xad,0x09]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0, d2}, [r4:256]!
+@ CHECK-ERRORS:                               ^
+
+	vld2.32	{d0, d2}, [r4], r6
+	vld2.32	{d0, d2}, [r4:16], r6
+	vld2.32	{d0, d2}, [r4:32], r6
+	vld2.32	{d0, d2}, [r4:64], r6
+	vld2.32	{d0, d2}, [r4:128], r6
+	vld2.32	{d0, d2}, [r4:256], r6
+
+@ CHECK: vld2.32 {d0, d2}, [r4], r6      @ encoding: [0x24,0xf9,0x86,0x09]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0, d2}, [r4:16], r6
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0, d2}, [r4:32], r6
+@ CHECK-ERRORS:                               ^
+@ CHECK: vld2.32 {d0, d2}, [r4:64], r6   @ encoding: [0x24,0xf9,0x96,0x09]
+@ CHECK: vld2.32 {d0, d2}, [r4:128], r6  @ encoding: [0x24,0xf9,0xa6,0x09]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0, d2}, [r4:256], r6
+@ CHECK-ERRORS:                               ^
+
+	vld2.32	{d0, d1, d2, d3}, [r4]
+	vld2.32	{d0, d1, d2, d3}, [r4:16]
+	vld2.32	{d0, d1, d2, d3}, [r4:32]
+	vld2.32	{d0, d1, d2, d3}, [r4:64]
+	vld2.32	{d0, d1, d2, d3}, [r4:128]
+	vld2.32	{d0, d1, d2, d3}, [r4:256]
+
+@ CHECK: vld2.32 {d0, d1, d2, d3}, [r4]  @ encoding: [0x24,0xf9,0x8f,0x03]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0, d1, d2, d3}, [r4:16]
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0, d1, d2, d3}, [r4:32]
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vld2.32 {d0, d1, d2, d3}, [r4:64] @ encoding: [0x24,0xf9,0x9f,0x03]
+@ CHECK: vld2.32 {d0, d1, d2, d3}, [r4:128] @ encoding: [0x24,0xf9,0xaf,0x03]
+@ CHECK: vld2.32 {d0, d1, d2, d3}, [r4:256] @ encoding: [0x24,0xf9,0xbf,0x03]
+
+	vld2.32	{d0, d1, d2, d3}, [r4]!
+	vld2.32	{d0, d1, d2, d3}, [r4:16]!
+	vld2.32	{d0, d1, d2, d3}, [r4:32]!
+	vld2.32	{d0, d1, d2, d3}, [r4:64]!
+	vld2.32	{d0, d1, d2, d3}, [r4:128]!
+	vld2.32	{d0, d1, d2, d3}, [r4:256]!
+
+@ CHECK: vld2.32 {d0, d1, d2, d3}, [r4]! @ encoding: [0x24,0xf9,0x8d,0x03]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0, d1, d2, d3}, [r4:16]!
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0, d1, d2, d3}, [r4:32]!
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vld2.32 {d0, d1, d2, d3}, [r4:64]! @ encoding: [0x24,0xf9,0x9d,0x03]
+@ CHECK: vld2.32 {d0, d1, d2, d3}, [r4:128]! @ encoding: [0x24,0xf9,0xad,0x03]
+@ CHECK: vld2.32 {d0, d1, d2, d3}, [r4:256]! @ encoding: [0x24,0xf9,0xbd,0x03]
+
+	vld2.32	{d0, d1, d2, d3}, [r4], r6
+	vld2.32	{d0, d1, d2, d3}, [r4:16], r6
+	vld2.32	{d0, d1, d2, d3}, [r4:32], r6
+	vld2.32	{d0, d1, d2, d3}, [r4:64], r6
+	vld2.32	{d0, d1, d2, d3}, [r4:128], r6
+	vld2.32	{d0, d1, d2, d3}, [r4:256], r6
+
+@ CHECK: vld2.32 {d0, d1, d2, d3}, [r4], r6 @ encoding: [0x24,0xf9,0x86,0x03]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0, d1, d2, d3}, [r4:16], r6
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0, d1, d2, d3}, [r4:32], r6
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vld2.32 {d0, d1, d2, d3}, [r4:64], r6 @ encoding: [0x24,0xf9,0x96,0x03]
+@ CHECK: vld2.32 {d0, d1, d2, d3}, [r4:128], r6 @ encoding: [0x24,0xf9,0xa6,0x03]
+@ CHECK: vld2.32 {d0, d1, d2, d3}, [r4:256], r6 @ encoding: [0x24,0xf9,0xb6,0x03]
+
+	vld2.32	{d0[1], d1[1]}, [r4]
+	vld2.32	{d0[1], d1[1]}, [r4:16]
+	vld2.32	{d0[1], d1[1]}, [r4:32]
+	vld2.32	{d0[1], d1[1]}, [r4:64]
+	vld2.32	{d0[1], d1[1]}, [r4:128]
+	vld2.32	{d0[1], d1[1]}, [r4:256]
+
+@ CHECK: vld2.32 {d0[1], d1[1]}, [r4]    @ encoding: [0xa4,0xf9,0x8f,0x09]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0[1], d1[1]}, [r4:16]
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0[1], d1[1]}, [r4:32]
+@ CHECK-ERRORS:                                     ^
+@ CHECK: vld2.32 {d0[1], d1[1]}, [r4:64] @ encoding: [0xa4,0xf9,0x9f,0x09]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0[1], d1[1]}, [r4:128]
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0[1], d1[1]}, [r4:256]
+@ CHECK-ERRORS:                                     ^
+
+	vld2.32	{d0[1], d1[1]}, [r4]!
+	vld2.32	{d0[1], d1[1]}, [r4:16]!
+	vld2.32	{d0[1], d1[1]}, [r4:32]!
+	vld2.32	{d0[1], d1[1]}, [r4:64]!
+	vld2.32	{d0[1], d1[1]}, [r4:128]!
+	vld2.32	{d0[1], d1[1]}, [r4:256]!
+
+@ CHECK: vld2.32 {d0[1], d1[1]}, [r4]!   @ encoding: [0xa4,0xf9,0x8d,0x09]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0[1], d1[1]}, [r4:16]!
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0[1], d1[1]}, [r4:32]!
+@ CHECK-ERRORS:                                     ^
+@ CHECK: vld2.32 {d0[1], d1[1]}, [r4:64]! @ encoding: [0xa4,0xf9,0x9d,0x09]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0[1], d1[1]}, [r4:128]!
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0[1], d1[1]}, [r4:256]!
+@ CHECK-ERRORS:                                     ^
+
+	vld2.32	{d0[1], d1[1]}, [r4], r6
+	vld2.32	{d0[1], d1[1]}, [r4:16], r6
+	vld2.32	{d0[1], d1[1]}, [r4:32], r6
+	vld2.32	{d0[1], d1[1]}, [r4:64], r6
+	vld2.32	{d0[1], d1[1]}, [r4:128], r6
+	vld2.32	{d0[1], d1[1]}, [r4:256], r6
+
+@ CHECK: vld2.32 {d0[1], d1[1]}, [r4], r6 @ encoding: [0xa4,0xf9,0x86,0x09]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0[1], d1[1]}, [r4:16], r6
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0[1], d1[1]}, [r4:32], r6
+@ CHECK-ERRORS:                                     ^
+@ CHECK: vld2.32 {d0[1], d1[1]}, [r4:64], r6 @ encoding: [0xa4,0xf9,0x96,0x09]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0[1], d1[1]}, [r4:128], r6
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0[1], d1[1]}, [r4:256], r6
+@ CHECK-ERRORS:                                     ^
+
+	vld2.32	{d0[1], d2[1]}, [r4]
+	vld2.32	{d0[1], d2[1]}, [r4:16]
+	vld2.32	{d0[1], d2[1]}, [r4:32]
+	vld2.32	{d0[1], d2[1]}, [r4:64]
+	vld2.32	{d0[1], d2[1]}, [r4:128]
+	vld2.32	{d0[1], d2[1]}, [r4:256]
+
+@ CHECK: vld2.32 {d0[1], d2[1]}, [r4]    @ encoding: [0xa4,0xf9,0xcf,0x09]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0[1], d2[1]}, [r4:16]
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0[1], d2[1]}, [r4:32]
+@ CHECK-ERRORS:                                     ^
+@ CHECK: vld2.32 {d0[1], d2[1]}, [r4:64] @ encoding: [0xa4,0xf9,0xdf,0x09]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0[1], d2[1]}, [r4:128]
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0[1], d2[1]}, [r4:256]
+@ CHECK-ERRORS:                                     ^
+
+	vld2.32	{d0[1], d2[1]}, [r4]!
+	vld2.32	{d0[1], d2[1]}, [r4:16]!
+	vld2.32	{d0[1], d2[1]}, [r4:32]!
+	vld2.32	{d0[1], d2[1]}, [r4:64]!
+	vld2.32	{d0[1], d2[1]}, [r4:128]!
+	vld2.32	{d0[1], d2[1]}, [r4:256]!
+
+@ CHECK: vld2.32 {d0[1], d2[1]}, [r4]!   @ encoding: [0xa4,0xf9,0xcd,0x09]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0[1], d2[1]}, [r4:16]!
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0[1], d2[1]}, [r4:32]!
+@ CHECK-ERRORS:                                     ^
+@ CHECK: vld2.32 {d0[1], d2[1]}, [r4:64]! @ encoding: [0xa4,0xf9,0xdd,0x09]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0[1], d2[1]}, [r4:128]!
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0[1], d2[1]}, [r4:256]!
+@ CHECK-ERRORS:                                     ^
+
+	vld2.32	{d0[1], d2[1]}, [r4], r6
+	vld2.32	{d0[1], d2[1]}, [r4:16], r6
+	vld2.32	{d0[1], d2[1]}, [r4:32], r6
+	vld2.32	{d0[1], d2[1]}, [r4:64], r6
+	vld2.32	{d0[1], d2[1]}, [r4:128], r6
+	vld2.32	{d0[1], d2[1]}, [r4:256], r6
+
+@ CHECK: vld2.32 {d0[1], d2[1]}, [r4], r6 @ encoding: [0xa4,0xf9,0xc6,0x09]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0[1], d2[1]}, [r4:16], r6
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0[1], d2[1]}, [r4:32], r6
+@ CHECK-ERRORS:                                     ^
+@ CHECK: vld2.32 {d0[1], d2[1]}, [r4:64], r6 @ encoding: [0xa4,0xf9,0xd6,0x09]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0[1], d2[1]}, [r4:128], r6
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0[1], d2[1]}, [r4:256], r6
+@ CHECK-ERRORS:                                     ^
+
+	vld2.32	{d0[], d1[]}, [r4]
+	vld2.32	{d0[], d1[]}, [r4:16]
+	vld2.32	{d0[], d1[]}, [r4:32]
+	vld2.32	{d0[], d1[]}, [r4:64]
+	vld2.32	{d0[], d1[]}, [r4:128]
+	vld2.32	{d0[], d1[]}, [r4:256]
+
+@ CHECK: vld2.32 {d0[], d1[]}, [r4]      @ encoding: [0xa4,0xf9,0x8f,0x0d]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0[], d1[]}, [r4:16]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0[], d1[]}, [r4:32]
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vld2.32 {d0[], d1[]}, [r4:64]   @ encoding: [0xa4,0xf9,0x9f,0x0d]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0[], d1[]}, [r4:128]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0[], d1[]}, [r4:256]
+@ CHECK-ERRORS:                                   ^
+
+	vld2.32	{d0[], d1[]}, [r4]!
+	vld2.32	{d0[], d1[]}, [r4:16]!
+	vld2.32	{d0[], d1[]}, [r4:32]!
+	vld2.32	{d0[], d1[]}, [r4:64]!
+	vld2.32	{d0[], d1[]}, [r4:128]!
+	vld2.32	{d0[], d1[]}, [r4:256]!
+
+@ CHECK: vld2.32 {d0[], d1[]}, [r4]!     @ encoding: [0xa4,0xf9,0x8d,0x0d]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0[], d1[]}, [r4:16]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0[], d1[]}, [r4:32]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vld2.32 {d0[], d1[]}, [r4:64]!  @ encoding: [0xa4,0xf9,0x9d,0x0d]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0[], d1[]}, [r4:128]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0[], d1[]}, [r4:256]!
+@ CHECK-ERRORS:                                   ^
+
+	vld2.32	{d0[], d1[]}, [r4], r6
+	vld2.32	{d0[], d1[]}, [r4:16], r6
+	vld2.32	{d0[], d1[]}, [r4:32], r6
+	vld2.32	{d0[], d1[]}, [r4:64], r6
+	vld2.32	{d0[], d1[]}, [r4:128], r6
+	vld2.32	{d0[], d1[]}, [r4:256], r6
+
+@ CHECK: vld2.32 {d0[], d1[]}, [r4], r6  @ encoding: [0xa4,0xf9,0x86,0x0d]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0[], d1[]}, [r4:16], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0[], d1[]}, [r4:32], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vld2.32 {d0[], d1[]}, [r4:64], r6 @ encoding: [0xa4,0xf9,0x96,0x0d]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0[], d1[]}, [r4:128], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0[], d1[]}, [r4:256], r6
+@ CHECK-ERRORS:                                   ^
+
+	vld2.32	{d0[], d2[]}, [r4]
+	vld2.32	{d0[], d2[]}, [r4:16]
+	vld2.32	{d0[], d2[]}, [r4:32]
+	vld2.32	{d0[], d2[]}, [r4:64]
+	vld2.32	{d0[], d2[]}, [r4:128]
+	vld2.32	{d0[], d2[]}, [r4:256]
+
+@ CHECK: vld2.32 {d0[], d2[]}, [r4]      @ encoding: [0xa4,0xf9,0xaf,0x0d]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0[], d2[]}, [r4:16]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0[], d2[]}, [r4:32]
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vld2.32 {d0[], d2[]}, [r4:64]   @ encoding: [0xa4,0xf9,0xbf,0x0d]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0[], d2[]}, [r4:128]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0[], d2[]}, [r4:256]
+@ CHECK-ERRORS:                                   ^
+
+	vld2.32	{d0[], d2[]}, [r4]!
+	vld2.32	{d0[], d2[]}, [r4:16]!
+	vld2.32	{d0[], d2[]}, [r4:32]!
+	vld2.32	{d0[], d2[]}, [r4:64]!
+	vld2.32	{d0[], d2[]}, [r4:128]!
+	vld2.32	{d0[], d2[]}, [r4:256]!
+
+@ CHECK: vld2.32 {d0[], d2[]}, [r4]!     @ encoding: [0xa4,0xf9,0xad,0x0d]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0[], d2[]}, [r4:16]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0[], d2[]}, [r4:32]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vld2.32 {d0[], d2[]}, [r4:64]!  @ encoding: [0xa4,0xf9,0xbd,0x0d]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0[], d2[]}, [r4:128]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0[], d2[]}, [r4:256]!
+@ CHECK-ERRORS:                                   ^
+
+	vld2.32	{d0[], d2[]}, [r4], r6
+	vld2.32	{d0[], d2[]}, [r4:16], r6
+	vld2.32	{d0[], d2[]}, [r4:32], r6
+	vld2.32	{d0[], d2[]}, [r4:64], r6
+	vld2.32	{d0[], d2[]}, [r4:128], r6
+	vld2.32	{d0[], d2[]}, [r4:256], r6
+
+@ CHECK: vld2.32 {d0[], d2[]}, [r4], r6  @ encoding: [0xa4,0xf9,0xa6,0x0d]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0[], d2[]}, [r4:16], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0[], d2[]}, [r4:32], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vld2.32 {d0[], d2[]}, [r4:64], r6 @ encoding: [0xa4,0xf9,0xb6,0x0d]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0[], d2[]}, [r4:128], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld2.32 {d0[], d2[]}, [r4:256], r6
+@ CHECK-ERRORS:                                   ^
+
+	vld3.8	{d0, d1, d2}, [r4]
+	vld3.8	{d0, d1, d2}, [r4:16]
+	vld3.8	{d0, d1, d2}, [r4:32]
+	vld3.8	{d0, d1, d2}, [r4:64]
+	vld3.8	{d0, d1, d2}, [r4:128]
+	vld3.8	{d0, d1, d2}, [r4:256]
+
+@ CHECK: vld3.8 {d0, d1, d2}, [r4]      @ encoding: [0x24,0xf9,0x0f,0x04]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.8  {d0, d1, d2}, [r4:16]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.8  {d0, d1, d2}, [r4:32]
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vld3.8 {d0, d1, d2}, [r4:64]   @ encoding: [0x24,0xf9,0x1f,0x04]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.8  {d0, d1, d2}, [r4:128]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.8  {d0, d1, d2}, [r4:256]
+@ CHECK-ERRORS:                                   ^
+
+	vld3.8	{d0, d1, d2}, [r4]!
+	vld3.8	{d0, d1, d2}, [r4:16]!
+	vld3.8	{d0, d1, d2}, [r4:32]!
+	vld3.8	{d0, d1, d2}, [r4:64]!
+	vld3.8	{d0, d1, d2}, [r4:128]!
+	vld3.8	{d0, d1, d2}, [r4:256]!
+
+@ CHECK: vld3.8 {d0, d1, d2}, [r4]!     @ encoding: [0x24,0xf9,0x0d,0x04]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.8  {d0, d1, d2}, [r4:16]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.8  {d0, d1, d2}, [r4:32]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vld3.8 {d0, d1, d2}, [r4:64]!  @ encoding: [0x24,0xf9,0x1d,0x04]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.8  {d0, d1, d2}, [r4:128]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.8  {d0, d1, d2}, [r4:256]!
+@ CHECK-ERRORS:                                   ^
+
+	vld3.8	{d0, d1, d2}, [r4], r6
+	vld3.8	{d0, d1, d2}, [r4:16], r6
+	vld3.8	{d0, d1, d2}, [r4:32], r6
+	vld3.8	{d0, d1, d2}, [r4:64], r6
+	vld3.8	{d0, d1, d2}, [r4:128], r6
+	vld3.8	{d0, d1, d2}, [r4:256], r6
+
+@ CHECK: vld3.8 {d0, d1, d2}, [r4], r6  @ encoding: [0x24,0xf9,0x06,0x04]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.8  {d0, d1, d2}, [r4:16], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.8  {d0, d1, d2}, [r4:32], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vld3.8 {d0, d1, d2}, [r4:64], r6 @ encoding: [0x24,0xf9,0x16,0x04]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.8  {d0, d1, d2}, [r4:128], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.8  {d0, d1, d2}, [r4:256], r6
+@ CHECK-ERRORS:                                   ^
+
+	vld3.8	{d0, d2, d4}, [r4]
+	vld3.8	{d0, d2, d4}, [r4:16]
+	vld3.8	{d0, d2, d4}, [r4:32]
+	vld3.8	{d0, d2, d4}, [r4:64]
+	vld3.8	{d0, d2, d4}, [r4:128]
+	vld3.8	{d0, d2, d4}, [r4:256]
+
+@ CHECK: vld3.8 {d0, d2, d4}, [r4]      @ encoding: [0x24,0xf9,0x0f,0x05]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.8  {d0, d2, d4}, [r4:16]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.8  {d0, d2, d4}, [r4:32]
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vld3.8 {d0, d2, d4}, [r4:64]   @ encoding: [0x24,0xf9,0x1f,0x05]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.8  {d0, d2, d4}, [r4:128]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.8  {d0, d2, d4}, [r4:256]
+@ CHECK-ERRORS:                                   ^
+
+	vld3.8	{d0, d2, d4}, [r4]!
+	vld3.8	{d0, d2, d4}, [r4:16]!
+	vld3.8	{d0, d2, d4}, [r4:32]!
+	vld3.8	{d0, d2, d4}, [r4:64]!
+	vld3.8	{d0, d2, d4}, [r4:128]!
+	vld3.8	{d0, d2, d4}, [r4:256]!
+
+@ CHECK: vld3.8 {d0, d2, d4}, [r4]!     @ encoding: [0x24,0xf9,0x0d,0x05]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.8  {d0, d2, d4}, [r4:16]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.8  {d0, d2, d4}, [r4:32]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vld3.8 {d0, d2, d4}, [r4:64]!  @ encoding: [0x24,0xf9,0x1d,0x05]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.8  {d0, d2, d4}, [r4:128]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.8  {d0, d2, d4}, [r4:256]!
+@ CHECK-ERRORS:                                   ^
+
+	vld3.8	{d0, d2, d4}, [r4], r6
+	vld3.8	{d0, d2, d4}, [r4:16], r6
+	vld3.8	{d0, d2, d4}, [r4:32], r6
+	vld3.8	{d0, d2, d4}, [r4:64], r6
+	vld3.8	{d0, d2, d4}, [r4:128], r6
+	vld3.8	{d0, d2, d4}, [r4:256], r6
+
+@ CHECK: vld3.8 {d0, d2, d4}, [r4], r6  @ encoding: [0x24,0xf9,0x06,0x05]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.8  {d0, d2, d4}, [r4:16], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.8  {d0, d2, d4}, [r4:32], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vld3.8 {d0, d2, d4}, [r4:64], r6 @ encoding: [0x24,0xf9,0x16,0x05]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.8  {d0, d2, d4}, [r4:128], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.8  {d0, d2, d4}, [r4:256], r6
+@ CHECK-ERRORS:                                   ^
+
+	vld3.8	{d0[1], d1[1], d2[1]}, [r4]
+	vld3.8	{d0[1], d1[1], d2[1]}, [r4:16]
+	vld3.8	{d0[1], d1[1], d2[1]}, [r4:32]
+	vld3.8	{d0[1], d1[1], d2[1]}, [r4:64]
+	vld3.8	{d0[1], d1[1], d2[1]}, [r4:128]
+	vld3.8	{d0[1], d1[1], d2[1]}, [r4:256]
+
+@ CHECK: vld3.8 {d0[1], d1[1], d2[1]}, [r4] @ encoding: [0xa4,0xf9,0x2f,0x02]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.8  {d0[1], d1[1], d2[1]}, [r4:16]
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.8  {d0[1], d1[1], d2[1]}, [r4:32]
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.8  {d0[1], d1[1], d2[1]}, [r4:64]
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.8  {d0[1], d1[1], d2[1]}, [r4:128]
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.8  {d0[1], d1[1], d2[1]}, [r4:256]
+@ CHECK-ERRORS:                                            ^
+
+	vld3.8	{d0[1], d1[1], d2[1]}, [r4]!
+	vld3.8	{d0[1], d1[1], d2[1]}, [r4:16]!
+	vld3.8	{d0[1], d1[1], d2[1]}, [r4:32]!
+	vld3.8	{d0[1], d1[1], d2[1]}, [r4:64]!
+	vld3.8	{d0[1], d1[1], d2[1]}, [r4:128]!
+	vld3.8	{d0[1], d1[1], d2[1]}, [r4:256]!
+
+@ CHECK: vld3.8 {d0[1], d1[1], d2[1]}, [r4]! @ encoding: [0xa4,0xf9,0x2d,0x02]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.8  {d0[1], d1[1], d2[1]}, [r4:16]!
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.8  {d0[1], d1[1], d2[1]}, [r4:32]!
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.8  {d0[1], d1[1], d2[1]}, [r4:64]!
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.8  {d0[1], d1[1], d2[1]}, [r4:128]!
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.8  {d0[1], d1[1], d2[1]}, [r4:256]!
+@ CHECK-ERRORS:                                            ^
+
+	vld3.8	{d0[1], d1[1], d2[1]}, [r4], r6
+	vld3.8	{d0[1], d1[1], d2[1]}, [r4:16], r6
+	vld3.8	{d0[1], d1[1], d2[1]}, [r4:32], r6
+	vld3.8	{d0[1], d1[1], d2[1]}, [r4:64], r6
+	vld3.8	{d0[1], d1[1], d2[1]}, [r4:128], r6
+	vld3.8	{d0[1], d1[1], d2[1]}, [r4:256], r6
+
+@ CHECK: vld3.8 {d0[1], d1[1], d2[1]}, [r4], r6 @ encoding: [0xa4,0xf9,0x26,0x02]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.8  {d0[1], d1[1], d2[1]}, [r4:16], r6
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.8  {d0[1], d1[1], d2[1]}, [r4:32], r6
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.8  {d0[1], d1[1], d2[1]}, [r4:64], r6
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.8  {d0[1], d1[1], d2[1]}, [r4:128], r6
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.8  {d0[1], d1[1], d2[1]}, [r4:256], r6
+@ CHECK-ERRORS:                                            ^
+
+	vld3.8	{d0[], d1[], d2[]}, [r4]
+	vld3.8	{d0[], d1[], d2[]}, [r4:16]
+	vld3.8	{d0[], d1[], d2[]}, [r4:32]
+	vld3.8	{d0[], d1[], d2[]}, [r4:64]
+	vld3.8	{d0[], d1[], d2[]}, [r4:128]
+	vld3.8	{d0[], d1[], d2[]}, [r4:256]
+
+@ CHECK: vld3.8 {d0[], d1[], d2[]}, [r4] @ encoding: [0xa4,0xf9,0x0f,0x0e]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.8  {d0[], d1[], d2[]}, [r4:16]
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.8  {d0[], d1[], d2[]}, [r4:32]
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.8  {d0[], d1[], d2[]}, [r4:64]
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.8  {d0[], d1[], d2[]}, [r4:128]
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.8  {d0[], d1[], d2[]}, [r4:256]
+@ CHECK-ERRORS:                                         ^
+
+	vld3.8	{d0[], d1[], d2[]}, [r4]!
+	vld3.8	{d0[], d1[], d2[]}, [r4:16]!
+	vld3.8	{d0[], d1[], d2[]}, [r4:32]!
+	vld3.8	{d0[], d1[], d2[]}, [r4:64]!
+	vld3.8	{d0[], d1[], d2[]}, [r4:128]!
+	vld3.8	{d0[], d1[], d2[]}, [r4:256]!
+
+@ CHECK: vld3.8 {d0[], d1[], d2[]}, [r4]! @ encoding: [0xa4,0xf9,0x0d,0x0e]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.8  {d0[], d1[], d2[]}, [r4:16]!
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.8  {d0[], d1[], d2[]}, [r4:32]!
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.8  {d0[], d1[], d2[]}, [r4:64]!
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.8  {d0[], d1[], d2[]}, [r4:128]!
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.8  {d0[], d1[], d2[]}, [r4:256]!
+@ CHECK-ERRORS:                                         ^
+
+	vld3.8	{d0[], d1[], d2[]}, [r4], r6
+	vld3.8	{d0[], d1[], d2[]}, [r4:16], r6
+	vld3.8	{d0[], d1[], d2[]}, [r4:32], r6
+	vld3.8	{d0[], d1[], d2[]}, [r4:64], r6
+	vld3.8	{d0[], d1[], d2[]}, [r4:128], r6
+	vld3.8	{d0[], d1[], d2[]}, [r4:256], r6
+
+@ CHECK: vld3.8 {d0[], d1[], d2[]}, [r4], r6 @ encoding: [0xa4,0xf9,0x06,0x0e]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.8  {d0[], d1[], d2[]}, [r4:16], r6
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.8  {d0[], d1[], d2[]}, [r4:32], r6
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.8  {d0[], d1[], d2[]}, [r4:64], r6
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.8  {d0[], d1[], d2[]}, [r4:128], r6
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.8  {d0[], d1[], d2[]}, [r4:256], r6
+@ CHECK-ERRORS:                                         ^
+
+	vld3.8	{d0[], d2[], d4[]}, [r4]
+	vld3.8	{d0[], d2[], d4[]}, [r4:16]
+	vld3.8	{d0[], d2[], d4[]}, [r4:32]
+	vld3.8	{d0[], d2[], d4[]}, [r4:64]
+	vld3.8	{d0[], d2[], d4[]}, [r4:128]
+	vld3.8	{d0[], d2[], d4[]}, [r4:256]
+
+@ CHECK: vld3.8 {d0[], d2[], d4[]}, [r4] @ encoding: [0xa4,0xf9,0x2f,0x0e]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.8  {d0[], d2[], d4[]}, [r4:16]
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.8  {d0[], d2[], d4[]}, [r4:32]
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.8  {d0[], d2[], d4[]}, [r4:64]
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.8  {d0[], d2[], d4[]}, [r4:128]
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.8  {d0[], d2[], d4[]}, [r4:256]
+@ CHECK-ERRORS:                                         ^
+
+	vld3.8	{d0[], d2[], d4[]}, [r4]!
+	vld3.8	{d0[], d2[], d4[]}, [r4:16]!
+	vld3.8	{d0[], d2[], d4[]}, [r4:32]!
+	vld3.8	{d0[], d2[], d4[]}, [r4:64]!
+	vld3.8	{d0[], d2[], d4[]}, [r4:128]!
+	vld3.8	{d0[], d2[], d4[]}, [r4:256]!
+
+@ CHECK: vld3.8 {d0[], d1[], d2[]}, [r4]! @ encoding: [0xa4,0xf9,0x2d,0x0e]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.8  {d0[], d2[], d4[]}, [r4:16]!
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.8  {d0[], d2[], d4[]}, [r4:32]!
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.8  {d0[], d2[], d4[]}, [r4:64]!
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.8  {d0[], d2[], d4[]}, [r4:128]!
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.8  {d0[], d2[], d4[]}, [r4:256]!
+@ CHECK-ERRORS:                                         ^
+
+	vld3.8	{d0[], d2[], d4[]}, [r4], r6
+	vld3.8	{d0[], d2[], d4[]}, [r4:16], r6
+	vld3.8	{d0[], d2[], d4[]}, [r4:32], r6
+	vld3.8	{d0[], d2[], d4[]}, [r4:64], r6
+	vld3.8	{d0[], d2[], d4[]}, [r4:128], r6
+	vld3.8	{d0[], d2[], d4[]}, [r4:256], r6
+
+@ CHECK: vld3.8 {d0[], d2[], d4[]}, [r4], r6 @ encoding: [0xa4,0xf9,0x26,0x0e]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.8  {d0[], d2[], d4[]}, [r4:16], r6
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.8  {d0[], d2[], d4[]}, [r4:32], r6
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.8  {d0[], d2[], d4[]}, [r4:64], r6
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.8  {d0[], d2[], d4[]}, [r4:128], r6
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.8  {d0[], d2[], d4[]}, [r4:256], r6
+@ CHECK-ERRORS:                                         ^
+
+	vld3.16	{d0, d1, d2}, [r4]
+	vld3.16	{d0, d1, d2}, [r4:16]
+	vld3.16	{d0, d1, d2}, [r4:32]
+	vld3.16	{d0, d1, d2}, [r4:64]
+	vld3.16	{d0, d1, d2}, [r4:128]
+	vld3.16	{d0, d1, d2}, [r4:256]
+
+@ CHECK: vld3.16 {d0, d1, d2}, [r4]      @ encoding: [0x24,0xf9,0x4f,0x04]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.16 {d0, d1, d2}, [r4:16]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.16 {d0, d1, d2}, [r4:32]
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vld3.16 {d0, d1, d2}, [r4:64]   @ encoding: [0x24,0xf9,0x5f,0x04]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.16 {d0, d1, d2}, [r4:128]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.16 {d0, d1, d2}, [r4:256]
+@ CHECK-ERRORS:                                   ^
+
+	vld3.16	{d0, d1, d2}, [r4]!
+	vld3.16	{d0, d1, d2}, [r4:16]!
+	vld3.16	{d0, d1, d2}, [r4:32]!
+	vld3.16	{d0, d1, d2}, [r4:64]!
+	vld3.16	{d0, d1, d2}, [r4:128]!
+	vld3.16	{d0, d1, d2}, [r4:256]!
+
+@ CHECK: vld3.16 {d0, d1, d2}, [r4]!     @ encoding: [0x24,0xf9,0x4d,0x04]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.16 {d0, d1, d2}, [r4:16]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.16 {d0, d1, d2}, [r4:32]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vld3.16 {d0, d1, d2}, [r4:64]!  @ encoding: [0x24,0xf9,0x5d,0x04]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.16 {d0, d1, d2}, [r4:128]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.16 {d0, d1, d2}, [r4:256]!
+@ CHECK-ERRORS:                                   ^
+
+	vld3.16	{d0, d1, d2}, [r4], r6
+	vld3.16	{d0, d1, d2}, [r4:16], r6
+	vld3.16	{d0, d1, d2}, [r4:32], r6
+	vld3.16	{d0, d1, d2}, [r4:64], r6
+	vld3.16	{d0, d1, d2}, [r4:128], r6
+	vld3.16	{d0, d1, d2}, [r4:256], r6
+
+@ CHECK: vld3.16 {d0, d1, d2}, [r4], r6  @ encoding: [0x24,0xf9,0x46,0x04]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.16 {d0, d1, d2}, [r4:16], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.16 {d0, d1, d2}, [r4:32], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vld3.16 {d0, d1, d2}, [r4:64], r6 @ encoding: [0x24,0xf9,0x56,0x04]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.16 {d0, d1, d2}, [r4:128], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.16 {d0, d1, d2}, [r4:256], r6
+@ CHECK-ERRORS:                                   ^
+
+	vld3.16	{d0, d2, d4}, [r4]
+	vld3.16	{d0, d2, d4}, [r4:16]
+	vld3.16	{d0, d2, d4}, [r4:32]
+	vld3.16	{d0, d2, d4}, [r4:64]
+	vld3.16	{d0, d2, d4}, [r4:128]
+	vld3.16	{d0, d2, d4}, [r4:256]
+
+@ CHECK: vld3.16 {d0, d2, d4}, [r4]      @ encoding: [0x24,0xf9,0x4f,0x05]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.16 {d0, d2, d4}, [r4:16]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.16 {d0, d2, d4}, [r4:32]
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vld3.16 {d0, d2, d4}, [r4:64]   @ encoding: [0x24,0xf9,0x5f,0x05]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.16 {d0, d2, d4}, [r4:128]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.16 {d0, d2, d4}, [r4:256]
+@ CHECK-ERRORS:                                   ^
+
+	vld3.16	{d0, d2, d4}, [r4]!
+	vld3.16	{d0, d2, d4}, [r4:16]!
+	vld3.16	{d0, d2, d4}, [r4:32]!
+	vld3.16	{d0, d2, d4}, [r4:64]!
+	vld3.16	{d0, d2, d4}, [r4:128]!
+	vld3.16	{d0, d2, d4}, [r4:256]!
+
+@ CHECK: vld3.16 {d0, d2, d4}, [r4]!     @ encoding: [0x24,0xf9,0x4d,0x05]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.16 {d0, d2, d4}, [r4:16]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.16 {d0, d2, d4}, [r4:32]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vld3.16 {d0, d2, d4}, [r4:64]!  @ encoding: [0x24,0xf9,0x5d,0x05]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.16 {d0, d2, d4}, [r4:128]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.16 {d0, d2, d4}, [r4:256]!
+@ CHECK-ERRORS:                                   ^
+
+	vld3.16	{d0, d2, d4}, [r4], r6
+	vld3.16	{d0, d2, d4}, [r4:16], r6
+	vld3.16	{d0, d2, d4}, [r4:32], r6
+	vld3.16	{d0, d2, d4}, [r4:64], r6
+	vld3.16	{d0, d2, d4}, [r4:128], r6
+	vld3.16	{d0, d2, d4}, [r4:256], r6
+
+@ CHECK: vld3.16 {d0, d2, d4}, [r4], r6  @ encoding: [0x24,0xf9,0x46,0x05]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.16 {d0, d2, d4}, [r4:16], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.16 {d0, d2, d4}, [r4:32], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vld3.16 {d0, d2, d4}, [r4:64], r6 @ encoding: [0x24,0xf9,0x56,0x05]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.16 {d0, d2, d4}, [r4:128], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.16 {d0, d2, d4}, [r4:256], r6
+@ CHECK-ERRORS:                                   ^
+
+	vld3.16	{d0[1], d1[1], d2[1]}, [r4]
+	vld3.16	{d0[1], d1[1], d2[1]}, [r4:16]
+	vld3.16	{d0[1], d1[1], d2[1]}, [r4:32]
+	vld3.16	{d0[1], d1[1], d2[1]}, [r4:64]
+	vld3.16	{d0[1], d1[1], d2[1]}, [r4:128]
+	vld3.16	{d0[1], d1[1], d2[1]}, [r4:256]
+
+@ CHECK: vld3.16 {d0[1], d1[1], d2[1]}, [r4] @ encoding: [0xa4,0xf9,0x4f,0x06]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[1], d1[1], d2[1]}, [r4:16]
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[1], d1[1], d2[1]}, [r4:32]
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[1], d1[1], d2[1]}, [r4:64]
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[1], d1[1], d2[1]}, [r4:128]
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[1], d1[1], d2[1]}, [r4:256]
+@ CHECK-ERRORS:                                            ^
+
+	vld3.16	{d0[1], d1[1], d2[1]}, [r4]!
+	vld3.16	{d0[1], d1[1], d2[1]}, [r4:16]!
+	vld3.16	{d0[1], d1[1], d2[1]}, [r4:32]!
+	vld3.16	{d0[1], d1[1], d2[1]}, [r4:64]!
+	vld3.16	{d0[1], d1[1], d2[1]}, [r4:128]!
+	vld3.16	{d0[1], d1[1], d2[1]}, [r4:256]!
+
+@ CHECK: vld3.16 {d0[1], d1[1], d2[1]}, [r4]! @ encoding: [0xa4,0xf9,0x4d,0x06]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[1], d1[1], d2[1]}, [r4:16]!
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[1], d1[1], d2[1]}, [r4:32]!
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[1], d1[1], d2[1]}, [r4:64]!
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[1], d1[1], d2[1]}, [r4:128]!
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[1], d1[1], d2[1]}, [r4:256]!
+@ CHECK-ERRORS:                                            ^
+
+	vld3.16	{d0[1], d1[1], d2[1]}, [r4], r6
+	vld3.16	{d0[1], d1[1], d2[1]}, [r4:16], r6
+	vld3.16	{d0[1], d1[1], d2[1]}, [r4:32], r6
+	vld3.16	{d0[1], d1[1], d2[1]}, [r4:64], r6
+	vld3.16	{d0[1], d1[1], d2[1]}, [r4:128], r6
+	vld3.16	{d0[1], d1[1], d2[1]}, [r4:256], r6
+
+@ CHECK: vld3.16 {d0[1], d1[1], d2[1]}, [r4], r6 @ encoding: [0xa4,0xf9,0x46,0x06]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[1], d1[1], d2[1]}, [r4:16], r6
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[1], d1[1], d2[1]}, [r4:32], r6
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[1], d1[1], d2[1]}, [r4:64], r6
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[1], d1[1], d2[1]}, [r4:128], r6
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[1], d1[1], d2[1]}, [r4:256], r6
+@ CHECK-ERRORS:                                            ^
+
+	vld3.16	{d0[1], d2[1], d4[1]}, [r4]
+	vld3.16	{d0[1], d2[1], d4[1]}, [r4:16]
+	vld3.16	{d0[1], d2[1], d4[1]}, [r4:32]
+	vld3.16	{d0[1], d2[1], d4[1]}, [r4:64]
+	vld3.16	{d0[1], d2[1], d4[1]}, [r4:128]
+	vld3.16	{d0[1], d2[1], d4[1]}, [r4:256]
+
+@ CHECK: vld3.16 {d0[1], d2[1], d4[1]}, [r4] @ encoding: [0xa4,0xf9,0x6f,0x06]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[1], d2[1], d4[1]}, [r4:16]
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[1], d2[1], d4[1]}, [r4:32]
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[1], d2[1], d4[1]}, [r4:64]
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[1], d2[1], d4[1]}, [r4:128]
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[1], d2[1], d4[1]}, [r4:256]
+@ CHECK-ERRORS:                                            ^
+
+	vld3.16	{d0[1], d2[1], d4[1]}, [r4]!
+	vld3.16	{d0[1], d2[1], d4[1]}, [r4:16]!
+	vld3.16	{d0[1], d2[1], d4[1]}, [r4:32]!
+	vld3.16	{d0[1], d2[1], d4[1]}, [r4:64]!
+	vld3.16	{d0[1], d2[1], d4[1]}, [r4:128]!
+	vld3.16	{d0[1], d2[1], d4[1]}, [r4:256]!
+
+@ CHECK: vld3.16 {d0[1], d1[1], d2[1]}, [r4]! @ encoding: [0xa4,0xf9,0x6d,0x06]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[1], d2[1], d4[1]}, [r4:16]!
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[1], d2[1], d4[1]}, [r4:32]!
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[1], d2[1], d4[1]}, [r4:64]!
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[1], d2[1], d4[1]}, [r4:128]!
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[1], d2[1], d4[1]}, [r4:256]!
+@ CHECK-ERRORS:                                            ^
+
+	vld3.16	{d0[1], d2[1], d4[1]}, [r4], r6
+	vld3.16	{d0[1], d2[1], d4[1]}, [r4:16], r6
+	vld3.16	{d0[1], d2[1], d4[1]}, [r4:32], r6
+	vld3.16	{d0[1], d2[1], d4[1]}, [r4:64], r6
+	vld3.16	{d0[1], d2[1], d4[1]}, [r4:128], r6
+	vld3.16	{d0[1], d2[1], d4[1]}, [r4:256], r6
+
+@ CHECK: vld3.16 {d0[1], d2[1], d4[1]}, [r4], r6 @ encoding: [0xa4,0xf9,0x66,0x06]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[1], d2[1], d4[1]}, [r4:16], r6
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[1], d2[1], d4[1]}, [r4:32], r6
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[1], d2[1], d4[1]}, [r4:64], r6
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[1], d2[1], d4[1]}, [r4:128], r6
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[1], d2[1], d4[1]}, [r4:256], r6
+@ CHECK-ERRORS:                                            ^
+
+	vld3.16	{d0[], d1[], d2[]}, [r4]
+	vld3.16	{d0[], d1[], d2[]}, [r4:16]
+	vld3.16	{d0[], d1[], d2[]}, [r4:32]
+	vld3.16	{d0[], d1[], d2[]}, [r4:64]
+	vld3.16	{d0[], d1[], d2[]}, [r4:128]
+	vld3.16	{d0[], d1[], d2[]}, [r4:256]
+
+@ CHECK: vld3.16 {d0[], d1[], d2[]}, [r4] @ encoding: [0xa4,0xf9,0x4f,0x0e]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[], d1[], d2[]}, [r4:16]
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[], d1[], d2[]}, [r4:32]
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[], d1[], d2[]}, [r4:64]
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[], d1[], d2[]}, [r4:128]
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[], d1[], d2[]}, [r4:256]
+@ CHECK-ERRORS:                                         ^
+
+	vld3.16	{d0[], d1[], d2[]}, [r4]!
+	vld3.16	{d0[], d1[], d2[]}, [r4:16]!
+	vld3.16	{d0[], d1[], d2[]}, [r4:32]!
+	vld3.16	{d0[], d1[], d2[]}, [r4:64]!
+	vld3.16	{d0[], d1[], d2[]}, [r4:128]!
+	vld3.16	{d0[], d1[], d2[]}, [r4:256]!
+
+@ CHECK: vld3.16 {d0[], d1[], d2[]}, [r4]! @ encoding: [0xa4,0xf9,0x4d,0x0e]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[], d1[], d2[]}, [r4:16]!
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[], d1[], d2[]}, [r4:32]!
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[], d1[], d2[]}, [r4:64]!
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[], d1[], d2[]}, [r4:128]!
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[], d1[], d2[]}, [r4:256]!
+@ CHECK-ERRORS:                                         ^
+
+	vld3.16	{d0[], d1[], d2[]}, [r4], r6
+	vld3.16	{d0[], d1[], d2[]}, [r4:16], r6
+	vld3.16	{d0[], d1[], d2[]}, [r4:32], r6
+	vld3.16	{d0[], d1[], d2[]}, [r4:64], r6
+	vld3.16	{d0[], d1[], d2[]}, [r4:128], r6
+	vld3.16	{d0[], d1[], d2[]}, [r4:256], r6
+
+@ CHECK: vld3.16 {d0[], d1[], d2[]}, [r4], r6 @ encoding: [0xa4,0xf9,0x46,0x0e]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[], d1[], d2[]}, [r4:16], r6
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[], d1[], d2[]}, [r4:32], r6
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[], d1[], d2[]}, [r4:64], r6
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[], d1[], d2[]}, [r4:128], r6
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[], d1[], d2[]}, [r4:256], r6
+@ CHECK-ERRORS:                                         ^
+
+	vld3.16	{d0[], d2[], d4[]}, [r4]
+	vld3.16	{d0[], d2[], d4[]}, [r4:16]
+	vld3.16	{d0[], d2[], d4[]}, [r4:32]
+	vld3.16	{d0[], d2[], d4[]}, [r4:64]
+	vld3.16	{d0[], d2[], d4[]}, [r4:128]
+	vld3.16	{d0[], d2[], d4[]}, [r4:256]
+
+@ CHECK: vld3.16 {d0[], d2[], d4[]}, [r4] @ encoding: [0xa4,0xf9,0x6f,0x0e]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[], d2[], d4[]}, [r4:16]
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[], d2[], d4[]}, [r4:32]
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[], d2[], d4[]}, [r4:64]
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[], d2[], d4[]}, [r4:128]
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[], d2[], d4[]}, [r4:256]
+@ CHECK-ERRORS:                                         ^
+
+	vld3.16	{d0[], d2[], d4[]}, [r4]!
+	vld3.16	{d0[], d2[], d4[]}, [r4:16]!
+	vld3.16	{d0[], d2[], d4[]}, [r4:32]!
+	vld3.16	{d0[], d2[], d4[]}, [r4:64]!
+	vld3.16	{d0[], d2[], d4[]}, [r4:128]!
+	vld3.16	{d0[], d2[], d4[]}, [r4:256]!
+
+@ CHECK: vld3.16 {d0[], d2[], d4[]}, [r4]! @ encoding: [0xa4,0xf9,0x6d,0x0e]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[], d2[], d4[]}, [r4:16]!
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[], d2[], d4[]}, [r4:32]!
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[], d2[], d4[]}, [r4:64]!
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[], d2[], d4[]}, [r4:128]!
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[], d2[], d4[]}, [r4:256]!
+@ CHECK-ERRORS:                                         ^
+
+	vld3.16	{d0[], d2[], d4[]}, [r4], r6
+	vld3.16	{d0[], d2[], d4[]}, [r4:16], r6
+	vld3.16	{d0[], d2[], d4[]}, [r4:32], r6
+	vld3.16	{d0[], d2[], d4[]}, [r4:64], r6
+	vld3.16	{d0[], d2[], d4[]}, [r4:128], r6
+	vld3.16	{d0[], d2[], d4[]}, [r4:256], r6
+
+@ CHECK: vld3.16 {d0[], d2[], d4[]}, [r4], r6 @ encoding: [0xa4,0xf9,0x66,0x0e]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[], d2[], d4[]}, [r4:16], r6
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[], d2[], d4[]}, [r4:32], r6
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[], d2[], d4[]}, [r4:64], r6
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[], d2[], d4[]}, [r4:128], r6
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.16 {d0[], d2[], d4[]}, [r4:256], r6
+
+	vld3.32	{d0, d1, d2}, [r4]
+	vld3.32	{d0, d1, d2}, [r4:16]
+	vld3.32	{d0, d1, d2}, [r4:32]
+	vld3.32	{d0, d1, d2}, [r4:64]
+	vld3.32	{d0, d1, d2}, [r4:128]
+	vld3.32	{d0, d1, d2}, [r4:256]
+
+@ CHECK: vld3.32 {d0, d1, d2}, [r4]      @ encoding: [0x24,0xf9,0x8f,0x04]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.32 {d0, d1, d2}, [r4:16]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.32 {d0, d1, d2}, [r4:32]
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vld3.32 {d0, d1, d2}, [r4:64]   @ encoding: [0x24,0xf9,0x9f,0x04]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.32 {d0, d1, d2}, [r4:128]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.32 {d0, d1, d2}, [r4:256]
+@ CHECK-ERRORS:                                   ^
+
+	vld3.32	{d0, d1, d2}, [r4]!
+	vld3.32	{d0, d1, d2}, [r4:16]!
+	vld3.32	{d0, d1, d2}, [r4:32]!
+	vld3.32	{d0, d1, d2}, [r4:64]!
+	vld3.32	{d0, d1, d2}, [r4:128]!
+	vld3.32	{d0, d1, d2}, [r4:256]!
+
+@ CHECK: vld3.32 {d0, d1, d2}, [r4]!     @ encoding: [0x24,0xf9,0x8d,0x04]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.32 {d0, d1, d2}, [r4:16]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.32 {d0, d1, d2}, [r4:32]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vld3.32 {d0, d1, d2}, [r4:64]!  @ encoding: [0x24,0xf9,0x9d,0x04]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.32 {d0, d1, d2}, [r4:128]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.32 {d0, d1, d2}, [r4:256]!
+@ CHECK-ERRORS:                                   ^
+
+	vld3.32	{d0, d1, d2}, [r4], r6
+	vld3.32	{d0, d1, d2}, [r4:16], r6
+	vld3.32	{d0, d1, d2}, [r4:32], r6
+	vld3.32	{d0, d1, d2}, [r4:64], r6
+	vld3.32	{d0, d1, d2}, [r4:128], r6
+	vld3.32	{d0, d1, d2}, [r4:256], r6
+
+@ CHECK: vld3.32 {d0, d1, d2}, [r4], r6  @ encoding: [0x24,0xf9,0x86,0x04]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.32 {d0, d1, d2}, [r4:16], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.32 {d0, d1, d2}, [r4:32], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vld3.32 {d0, d1, d2}, [r4:64], r6 @ encoding: [0x24,0xf9,0x96,0x04]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.32 {d0, d1, d2}, [r4:128], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.32 {d0, d1, d2}, [r4:256], r6
+@ CHECK-ERRORS:                                   ^
+
+	vld3.32	{d0, d2, d4}, [r4]
+	vld3.32	{d0, d2, d4}, [r4:16]
+	vld3.32	{d0, d2, d4}, [r4:32]
+	vld3.32	{d0, d2, d4}, [r4:64]
+	vld3.32	{d0, d2, d4}, [r4:128]
+	vld3.32	{d0, d2, d4}, [r4:256]
+
+@ CHECK: vld3.32 {d0, d2, d4}, [r4]      @ encoding: [0x24,0xf9,0x8f,0x05]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.32 {d0, d2, d4}, [r4:16]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.32 {d0, d2, d4}, [r4:32]
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vld3.32 {d0, d2, d4}, [r4:64]   @ encoding: [0x24,0xf9,0x9f,0x05]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.32 {d0, d2, d4}, [r4:128]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.32 {d0, d2, d4}, [r4:256]
+@ CHECK-ERRORS:                                   ^
+
+	vld3.32	{d0, d2, d4}, [r4]!
+	vld3.32	{d0, d2, d4}, [r4:16]!
+	vld3.32	{d0, d2, d4}, [r4:32]!
+	vld3.32	{d0, d2, d4}, [r4:64]!
+	vld3.32	{d0, d2, d4}, [r4:128]!
+	vld3.32	{d0, d2, d4}, [r4:256]!
+
+@ CHECK: vld3.32 {d0, d2, d4}, [r4]!     @ encoding: [0x24,0xf9,0x8d,0x05]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.32 {d0, d2, d4}, [r4:16]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.32 {d0, d2, d4}, [r4:32]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vld3.32 {d0, d2, d4}, [r4:64]!  @ encoding: [0x24,0xf9,0x9d,0x05]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.32 {d0, d2, d4}, [r4:128]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.32 {d0, d2, d4}, [r4:256]!
+@ CHECK-ERRORS:                                   ^
+
+	vld3.32	{d0, d2, d4}, [r4], r6
+	vld3.32	{d0, d2, d4}, [r4:16], r6
+	vld3.32	{d0, d2, d4}, [r4:32], r6
+	vld3.32	{d0, d2, d4}, [r4:64], r6
+	vld3.32	{d0, d2, d4}, [r4:128], r6
+	vld3.32	{d0, d2, d4}, [r4:256], r6
+
+@ CHECK: vld3.32 {d0, d2, d4}, [r4], r6  @ encoding: [0x24,0xf9,0x86,0x05]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.32 {d0, d2, d4}, [r4:16], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.32 {d0, d2, d4}, [r4:32], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vld3.32 {d0, d2, d4}, [r4:64], r6 @ encoding: [0x24,0xf9,0x96,0x05]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.32 {d0, d2, d4}, [r4:128], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld3.32 {d0, d2, d4}, [r4:256], r6
+@ CHECK-ERRORS:                                   ^
+
+	vld3.32	{d0[1], d1[1], d2[1]}, [r4]
+	vld3.32	{d0[1], d1[1], d2[1]}, [r4:16]
+	vld3.32	{d0[1], d1[1], d2[1]}, [r4:32]
+	vld3.32	{d0[1], d1[1], d2[1]}, [r4:64]
+	vld3.32	{d0[1], d1[1], d2[1]}, [r4:128]
+	vld3.32	{d0[1], d1[1], d2[1]}, [r4:256]
+
+@ CHECK: vld3.32 {d0[1], d1[1], d2[1]}, [r4] @ encoding: [0xa4,0xf9,0x8f,0x0a]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[1], d1[1], d2[1]}, [r4:16]
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[1], d1[1], d2[1]}, [r4:32]
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[1], d1[1], d2[1]}, [r4:64]
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[1], d1[1], d2[1]}, [r4:128]
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[1], d1[1], d2[1]}, [r4:256]
+@ CHECK-ERRORS:                                            ^
+
+	vld3.32	{d0[1], d1[1], d2[1]}, [r4]!
+	vld3.32	{d0[1], d1[1], d2[1]}, [r4:16]!
+	vld3.32	{d0[1], d1[1], d2[1]}, [r4:32]!
+	vld3.32	{d0[1], d1[1], d2[1]}, [r4:64]!
+	vld3.32	{d0[1], d1[1], d2[1]}, [r4:128]!
+	vld3.32	{d0[1], d1[1], d2[1]}, [r4:256]!
+
+@ CHECK: vld3.32 {d0[1], d1[1], d2[1]}, [r4]! @ encoding: [0xa4,0xf9,0x8d,0x0a]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[1], d1[1], d2[1]}, [r4:16]!
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[1], d1[1], d2[1]}, [r4:32]!
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[1], d1[1], d2[1]}, [r4:64]!
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[1], d1[1], d2[1]}, [r4:128]!
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[1], d1[1], d2[1]}, [r4:256]!
+@ CHECK-ERRORS:                                            ^
+
+	vld3.32	{d0[1], d1[1], d2[1]}, [r4], r6
+	vld3.32	{d0[1], d1[1], d2[1]}, [r4:16], r6
+	vld3.32	{d0[1], d1[1], d2[1]}, [r4:32], r6
+	vld3.32	{d0[1], d1[1], d2[1]}, [r4:64], r6
+	vld3.32	{d0[1], d1[1], d2[1]}, [r4:128], r6
+	vld3.32	{d0[1], d1[1], d2[1]}, [r4:256], r6
+
+@ CHECK: vld3.32 {d0[1], d1[1], d2[1]}, [r4], r6 @ encoding: [0xa4,0xf9,0x86,0x0a]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[1], d1[1], d2[1]}, [r4:16], r6
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[1], d1[1], d2[1]}, [r4:32], r6
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[1], d1[1], d2[1]}, [r4:64], r6
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[1], d1[1], d2[1]}, [r4:128], r6
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[1], d1[1], d2[1]}, [r4:256], r6
+@ CHECK-ERRORS:                                            ^
+
+	vld3.32	{d0[1], d2[1], d4[1]}, [r4]
+	vld3.32	{d0[1], d2[1], d4[1]}, [r4:16]
+	vld3.32	{d0[1], d2[1], d4[1]}, [r4:32]
+	vld3.32	{d0[1], d2[1], d4[1]}, [r4:64]
+	vld3.32	{d0[1], d2[1], d4[1]}, [r4:128]
+	vld3.32	{d0[1], d2[1], d4[1]}, [r4:256]
+
+@ CHECK: vld3.32 {d0[1], d2[1], d4[1]}, [r4] @ encoding: [0xa4,0xf9,0xcf,0x0a]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[1], d2[1], d4[1]}, [r4:16]
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[1], d2[1], d4[1]}, [r4:32]
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[1], d2[1], d4[1]}, [r4:64]
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[1], d2[1], d4[1]}, [r4:128]
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[1], d2[1], d4[1]}, [r4:256]
+@ CHECK-ERRORS:                                            ^
+
+	vld3.32	{d0[1], d2[1], d4[1]}, [r4]!
+	vld3.32	{d0[1], d2[1], d4[1]}, [r4:16]!
+	vld3.32	{d0[1], d2[1], d4[1]}, [r4:32]!
+	vld3.32	{d0[1], d2[1], d4[1]}, [r4:64]!
+	vld3.32	{d0[1], d2[1], d4[1]}, [r4:128]!
+	vld3.32	{d0[1], d2[1], d4[1]}, [r4:256]!
+
+@ CHECK: vld3.32 {d0[1], d2[1], d4[1]}, [r4]! @ encoding: [0xa4,0xf9,0xcd,0x0a]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[1], d2[1], d4[1]}, [r4:16]!
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[1], d2[1], d4[1]}, [r4:32]!
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[1], d2[1], d4[1]}, [r4:64]!
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[1], d2[1], d4[1]}, [r4:128]!
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[1], d2[1], d4[1]}, [r4:256]!
+@ CHECK-ERRORS:                                            ^
+
+	vld3.32	{d0[1], d2[1], d4[1]}, [r4], r6
+	vld3.32	{d0[1], d2[1], d4[1]}, [r4:16], r6
+	vld3.32	{d0[1], d2[1], d4[1]}, [r4:32], r6
+	vld3.32	{d0[1], d2[1], d4[1]}, [r4:64], r6
+	vld3.32	{d0[1], d2[1], d4[1]}, [r4:128], r6
+	vld3.32	{d0[1], d2[1], d4[1]}, [r4:256], r6
+
+@ CHECK: vld3.32 {d0[1], d2[1], d4[1]}, [r4], r6 @ encoding: [0xa4,0xf9,0xc6,0x0a]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[1], d2[1], d4[1]}, [r4:16], r6
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[1], d2[1], d4[1]}, [r4:32], r6
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[1], d2[1], d4[1]}, [r4:64], r6
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[1], d2[1], d4[1]}, [r4:128], r6
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[1], d2[1], d4[1]}, [r4:256], r6
+@ CHECK-ERRORS:                                            ^
+
+	vld3.32	{d0[], d1[], d2[]}, [r4]
+	vld3.32	{d0[], d1[], d2[]}, [r4:16]
+	vld3.32	{d0[], d1[], d2[]}, [r4:32]
+	vld3.32	{d0[], d1[], d2[]}, [r4:64]
+	vld3.32	{d0[], d1[], d2[]}, [r4:128]
+	vld3.32	{d0[], d1[], d2[]}, [r4:256]
+
+@ CHECK: vld3.32 {d0[], d1[], d2[]}, [r4] @ encoding: [0xa4,0xf9,0x8f,0x0e]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[], d1[], d2[]}, [r4:16]
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[], d1[], d2[]}, [r4:32]
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[], d1[], d2[]}, [r4:64]
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[], d1[], d2[]}, [r4:128]
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[], d1[], d2[]}, [r4:256]
+@ CHECK-ERRORS:                                         ^
+
+	vld3.32	{d0[], d1[], d2[]}, [r4]!
+	vld3.32	{d0[], d1[], d2[]}, [r4:16]!
+	vld3.32	{d0[], d1[], d2[]}, [r4:32]!
+	vld3.32	{d0[], d1[], d2[]}, [r4:64]!
+	vld3.32	{d0[], d1[], d2[]}, [r4:128]!
+	vld3.32	{d0[], d1[], d2[]}, [r4:256]!
+
+@ CHECK: vld3.32 {d0[], d1[], d2[]}, [r4]! @ encoding: [0xa4,0xf9,0x8d,0x0e]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[], d1[], d2[]}, [r4:16]!
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[], d1[], d2[]}, [r4:32]!
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[], d1[], d2[]}, [r4:64]!
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[], d1[], d2[]}, [r4:128]!
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[], d1[], d2[]}, [r4:256]!
+@ CHECK-ERRORS:                                         ^
+
+	vld3.32	{d0[], d1[], d2[]}, [r4], r6
+	vld3.32	{d0[], d1[], d2[]}, [r4:16], r6
+	vld3.32	{d0[], d1[], d2[]}, [r4:32], r6
+	vld3.32	{d0[], d1[], d2[]}, [r4:64], r6
+	vld3.32	{d0[], d1[], d2[]}, [r4:128], r6
+	vld3.32	{d0[], d1[], d2[]}, [r4:256], r6
+
+@ CHECK: vld3.32 {d0[], d1[], d2[]}, [r4], r6 @ encoding: [0xa4,0xf9,0x86,0x0e]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[], d1[], d2[]}, [r4:16], r6
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[], d1[], d2[]}, [r4:32], r6
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[], d1[], d2[]}, [r4:64], r6
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[], d1[], d2[]}, [r4:128], r6
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[], d1[], d2[]}, [r4:256], r6
+@ CHECK-ERRORS:                                         ^
+
+	vld3.32	{d0[], d2[], d4[]}, [r4]
+	vld3.32	{d0[], d2[], d4[]}, [r4:16]
+	vld3.32	{d0[], d2[], d4[]}, [r4:32]
+	vld3.32	{d0[], d2[], d4[]}, [r4:64]
+	vld3.32	{d0[], d2[], d4[]}, [r4:128]
+	vld3.32	{d0[], d2[], d4[]}, [r4:256]
+
+@ CHECK: vld3.32 {d0[], d2[], d4[]}, [r4] @ encoding: [0xa4,0xf9,0xaf,0x0e]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[], d2[], d4[]}, [r4:16]
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[], d2[], d4[]}, [r4:32]
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[], d2[], d4[]}, [r4:64]
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[], d2[], d4[]}, [r4:128]
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[], d2[], d4[]}, [r4:256]
+@ CHECK-ERRORS:                                         ^
+
+	vld3.32	{d0[], d2[], d4[]}, [r4]!
+	vld3.32	{d0[], d2[], d4[]}, [r4:16]!
+	vld3.32	{d0[], d2[], d4[]}, [r4:32]!
+	vld3.32	{d0[], d2[], d4[]}, [r4:64]!
+	vld3.32	{d0[], d2[], d4[]}, [r4:128]!
+	vld3.32	{d0[], d2[], d4[]}, [r4:256]!
+
+@ CHECK: vld3.32 {d0[], d2[], d4[]}, [r4]! @ encoding: [0xa4,0xf9,0xad,0x0e]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[], d2[], d4[]}, [r4:16]!
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[], d2[], d4[]}, [r4:32]!
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[], d2[], d4[]}, [r4:64]!
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[], d2[], d4[]}, [r4:128]!
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[], d2[], d4[]}, [r4:256]!
+@ CHECK-ERRORS:                                         ^
+
+	vld3.32	{d0[], d2[], d4[]}, [r4], r6
+	vld3.32	{d0[], d2[], d4[]}, [r4:16], r6
+	vld3.32	{d0[], d2[], d4[]}, [r4:32], r6
+	vld3.32	{d0[], d2[], d4[]}, [r4:64], r6
+	vld3.32	{d0[], d2[], d4[]}, [r4:128], r6
+	vld3.32	{d0[], d2[], d4[]}, [r4:256], r6
+
+@ CHECK: vld3.32 {d0[], d2[], d4[]}, [r4], r6 @ encoding: [0xa4,0xf9,0xa6,0x0e]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[], d2[], d4[]}, [r4:16], r6
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[], d2[], d4[]}, [r4:32], r6
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[], d2[], d4[]}, [r4:64], r6
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[], d2[], d4[]}, [r4:128], r6
+@ CHECK-ERRORS:                                         ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vld3.32 {d0[], d2[], d4[]}, [r4:256], r6
+@ CHECK-ERRORS:                                         ^
+
+	vld4.8	{d0, d1, d2, d3}, [r4]
+	vld4.8	{d0, d1, d2, d3}, [r4:16]
+	vld4.8	{d0, d1, d2, d3}, [r4:32]
+	vld4.8	{d0, d1, d2, d3}, [r4:64]
+	vld4.8	{d0, d1, d2, d3}, [r4:128]
+	vld4.8	{d0, d1, d2, d3}, [r4:256]
+
+@ CHECK: vld4.8 {d0, d1, d2, d3}, [r4]  @ encoding: [0x24,0xf9,0x0f,0x00]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld4.8  {d0, d1, d2, d3}, [r4:16]
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld4.8  {d0, d1, d2, d3}, [r4:32]
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vld4.8 {d0, d1, d2, d3}, [r4:64] @ encoding: [0x24,0xf9,0x1f,0x00]
+@ CHECK: vld4.8 {d0, d1, d2, d3}, [r4:128] @ encoding: [0x24,0xf9,0x2f,0x00]
+@ CHECK: vld4.8 {d0, d1, d2, d3}, [r4:256] @ encoding: [0x24,0xf9,0x3f,0x00]
+
+	vld4.8	{d0, d1, d2, d3}, [r4]!
+	vld4.8	{d0, d1, d2, d3}, [r4:16]!
+	vld4.8	{d0, d1, d2, d3}, [r4:32]!
+	vld4.8	{d0, d1, d2, d3}, [r4:64]!
+	vld4.8	{d0, d1, d2, d3}, [r4:128]!
+	vld4.8	{d0, d1, d2, d3}, [r4:256]!
+
+@ CHECK: vld4.8 {d0, d1, d2, d3}, [r4]! @ encoding: [0x24,0xf9,0x0d,0x00]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld4.8  {d0, d1, d2, d3}, [r4:16]!
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld4.8  {d0, d1, d2, d3}, [r4:32]!
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vld4.8 {d0, d1, d2, d3}, [r4:64]! @ encoding: [0x24,0xf9,0x1d,0x00]
+@ CHECK: vld4.8 {d0, d1, d2, d3}, [r4:128]! @ encoding: [0x24,0xf9,0x2d,0x00]
+@ CHECK: vld4.8 {d0, d1, d2, d3}, [r4:256]! @ encoding: [0x24,0xf9,0x3d,0x00]
+
+	vld4.8	{d0, d1, d2, d3}, [r4], r6
+	vld4.8	{d0, d1, d2, d3}, [r4:16], r6
+	vld4.8	{d0, d1, d2, d3}, [r4:32], r6
+	vld4.8	{d0, d1, d2, d3}, [r4:64], r6
+	vld4.8	{d0, d1, d2, d3}, [r4:128], r6
+	vld4.8	{d0, d1, d2, d3}, [r4:256], r6
+
+@ CHECK: vld4.8 {d0, d1, d2, d3}, [r4], r6 @ encoding: [0x24,0xf9,0x06,0x00]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld4.8  {d0, d1, d2, d3}, [r4:16], r6
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld4.8  {d0, d1, d2, d3}, [r4:32], r6
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vld4.8 {d0, d1, d2, d3}, [r4:64], r6 @ encoding: [0x24,0xf9,0x16,0x00]
+@ CHECK: vld4.8 {d0, d1, d2, d3}, [r4:128], r6 @ encoding: [0x24,0xf9,0x26,0x00]
+@ CHECK: vld4.8 {d0, d1, d2, d3}, [r4:256], r6 @ encoding: [0x24,0xf9,0x36,0x00]
+
+	vld4.8	{d0, d2, d4, d6}, [r4]
+	vld4.8	{d0, d2, d4, d6}, [r4:16]
+	vld4.8	{d0, d2, d4, d6}, [r4:32]
+	vld4.8	{d0, d2, d4, d6}, [r4:64]
+	vld4.8	{d0, d2, d4, d6}, [r4:128]
+	vld4.8	{d0, d2, d4, d6}, [r4:256]
+
+@ CHECK: vld4.8 {d0, d2, d4, d6}, [r4]  @ encoding: [0x24,0xf9,0x0f,0x01]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld4.8  {d0, d2, d4, d6}, [r4:16]
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld4.8  {d0, d2, d4, d6}, [r4:32]
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vld4.8 {d0, d2, d4, d6}, [r4:64] @ encoding: [0x24,0xf9,0x1f,0x01]
+@ CHECK: vld4.8 {d0, d2, d4, d6}, [r4:128] @ encoding: [0x24,0xf9,0x2f,0x01]
+@ CHECK: vld4.8 {d0, d2, d4, d6}, [r4:256] @ encoding: [0x24,0xf9,0x3f,0x01]
+
+	vld4.8	{d0, d2, d4, d6}, [r4]!
+	vld4.8	{d0, d2, d4, d6}, [r4:16]!
+	vld4.8	{d0, d2, d4, d6}, [r4:32]!
+	vld4.8	{d0, d2, d4, d6}, [r4:64]!
+	vld4.8	{d0, d2, d4, d6}, [r4:128]!
+	vld4.8	{d0, d2, d4, d6}, [r4:256]!
+
+@ CHECK: vld4.8 {d0, d2, d4, d6}, [r4]! @ encoding: [0x24,0xf9,0x0d,0x01]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld4.8  {d0, d2, d4, d6}, [r4:16]!
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld4.8  {d0, d2, d4, d6}, [r4:32]!
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vld4.8 {d0, d2, d4, d6}, [r4:64]! @ encoding: [0x24,0xf9,0x1d,0x01]
+@ CHECK: vld4.8 {d0, d2, d4, d6}, [r4:128]! @ encoding: [0x24,0xf9,0x2d,0x01]
+@ CHECK: vld4.8 {d0, d2, d4, d6}, [r4:256]! @ encoding: [0x24,0xf9,0x3d,0x01]
+
+	vld4.8	{d0, d2, d4, d6}, [r4], r6
+	vld4.8	{d0, d2, d4, d6}, [r4:16], r6
+	vld4.8	{d0, d2, d4, d6}, [r4:32], r6
+	vld4.8	{d0, d2, d4, d6}, [r4:64], r6
+	vld4.8	{d0, d2, d4, d6}, [r4:128], r6
+	vld4.8	{d0, d2, d4, d6}, [r4:256], r6
+
+@ CHECK: vld4.8 {d0, d2, d4, d6}, [r4], r6 @ encoding: [0x24,0xf9,0x06,0x01]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld4.8  {d0, d2, d4, d6}, [r4:16], r6
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld4.8  {d0, d2, d4, d6}, [r4:32], r6
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vld4.8 {d0, d2, d4, d6}, [r4:64], r6 @ encoding: [0x24,0xf9,0x16,0x01]
+@ CHECK: vld4.8 {d0, d2, d4, d6}, [r4:128], r6 @ encoding: [0x24,0xf9,0x26,0x01]
+@ CHECK: vld4.8 {d0, d2, d4, d6}, [r4:256], r6 @ encoding: [0x24,0xf9,0x36,0x01]
+
+	vld4.8	{d0[1], d1[1], d2[1], d3[1]}, [r4]
+	vld4.8	{d0[1], d1[1], d2[1], d3[1]}, [r4:16]
+	vld4.8	{d0[1], d1[1], d2[1], d3[1]}, [r4:32]
+	vld4.8	{d0[1], d1[1], d2[1], d3[1]}, [r4:64]
+	vld4.8	{d0[1], d1[1], d2[1], d3[1]}, [r4:128]
+	vld4.8	{d0[1], d1[1], d2[1], d3[1]}, [r4:256]
+
+@ CHECK: vld4.8 {d0[1], d1[1], d2[1], d3[1]}, [r4] @ encoding: [0xa4,0xf9,0x2f,0x03]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld4.8  {d0[1], d1[1], d2[1], d3[1]}, [r4:16]
+@ CHECK-ERRORS:                                                   ^
+@ CHECK: vld4.8 {d0[1], d1[1], d2[1], d3[1]}, [r4:32] @ encoding: [0xa4,0xf9,0x3f,0x03]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld4.8  {d0[1], d1[1], d2[1], d3[1]}, [r4:64]
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld4.8  {d0[1], d1[1], d2[1], d3[1]}, [r4:128]
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld4.8  {d0[1], d1[1], d2[1], d3[1]}, [r4:256]
+@ CHECK-ERRORS:                                                   ^
+
+	vld4.8	{d0[1], d1[1], d2[1], d3[1]}, [r4]!
+	vld4.8	{d0[1], d1[1], d2[1], d3[1]}, [r4:16]!
+	vld4.8	{d0[1], d1[1], d2[1], d3[1]}, [r4:32]!
+	vld4.8	{d0[1], d1[1], d2[1], d3[1]}, [r4:64]!
+	vld4.8	{d0[1], d1[1], d2[1], d3[1]}, [r4:128]!
+	vld4.8	{d0[1], d1[1], d2[1], d3[1]}, [r4:256]!
+
+@ CHECK: vld4.8 {d0[1], d1[1], d2[1], d3[1]}, [r4]! @ encoding: [0xa4,0xf9,0x2d,0x03]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld4.8  {d0[1], d1[1], d2[1], d3[1]}, [r4:16]!
+@ CHECK-ERRORS:                                                   ^
+@ CHECK: vld4.8 {d0[1], d1[1], d2[1], d3[1]}, [r4:32]! @ encoding: [0xa4,0xf9,0x3d,0x03]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld4.8  {d0[1], d1[1], d2[1], d3[1]}, [r4:64]!
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld4.8  {d0[1], d1[1], d2[1], d3[1]}, [r4:128]!
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld4.8  {d0[1], d1[1], d2[1], d3[1]}, [r4:256]!
+@ CHECK-ERRORS:                                                   ^
+
+	vld4.8	{d0[1], d1[1], d2[1], d3[1]}, [r4], r6
+	vld4.8	{d0[1], d1[1], d2[1], d3[1]}, [r4:16], r6
+	vld4.8	{d0[1], d1[1], d2[1], d3[1]}, [r4:32], r6
+	vld4.8	{d0[1], d1[1], d2[1], d3[1]}, [r4:64], r6
+	vld4.8	{d0[1], d1[1], d2[1], d3[1]}, [r4:128], r6
+	vld4.8	{d0[1], d1[1], d2[1], d3[1]}, [r4:256], r6
+
+@ CHECK: vld4.8 {d0[1], d1[1], d2[1], d3[1]}, [r4], r6 @ encoding: [0xa4,0xf9,0x26,0x03]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld4.8  {d0[1], d1[1], d2[1], d3[1]}, [r4:16], r6
+@ CHECK-ERRORS:                                                   ^
+@ CHECK: vld4.8 {d0[1], d1[1], d2[1], d3[1]}, [r4:32], r6 @ encoding: [0xa4,0xf9,0x36,0x03]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld4.8  {d0[1], d1[1], d2[1], d3[1]}, [r4:64], r6
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld4.8  {d0[1], d1[1], d2[1], d3[1]}, [r4:128], r6
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld4.8  {d0[1], d1[1], d2[1], d3[1]}, [r4:256], r6
+@ CHECK-ERRORS:                                                   ^
+
+	vld4.8	{d0[], d1[], d2[], d3[]}, [r4]
+	vld4.8	{d0[], d1[], d2[], d3[]}, [r4:16]
+	vld4.8	{d0[], d1[], d2[], d3[]}, [r4:32]
+	vld4.8	{d0[], d1[], d2[], d3[]}, [r4:64]
+	vld4.8	{d0[], d1[], d2[], d3[]}, [r4:128]
+	vld4.8	{d0[], d1[], d2[], d3[]}, [r4:256]
+
+@ CHECK: vld4.8 {d0[], d1[], d2[], d3[]}, [r4] @ encoding: [0xa4,0xf9,0x0f,0x0f]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld4.8  {d0[], d1[], d2[], d3[]}, [r4:16]
+@ CHECK-ERRORS:                                               ^
+@ CHECK: vld4.8 {d0[], d1[], d2[], d3[]}, [r4:32] @ encoding: [0xa4,0xf9,0x1f,0x0f]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld4.8  {d0[], d1[], d2[], d3[]}, [r4:64]
+@ CHECK-ERRORS:                                               ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld4.8  {d0[], d1[], d2[], d3[]}, [r4:128]
+@ CHECK-ERRORS:                                               ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld4.8  {d0[], d1[], d2[], d3[]}, [r4:256]
+@ CHECK-ERRORS:                                               ^
+
+	vld4.8	{d0[], d1[], d2[], d3[]}, [r4]!
+	vld4.8	{d0[], d1[], d2[], d3[]}, [r4:16]!
+	vld4.8	{d0[], d1[], d2[], d3[]}, [r4:32]!
+	vld4.8	{d0[], d1[], d2[], d3[]}, [r4:64]!
+	vld4.8	{d0[], d1[], d2[], d3[]}, [r4:128]!
+	vld4.8	{d0[], d1[], d2[], d3[]}, [r4:256]!
+
+@ CHECK: vld4.8 {d0[], d1[], d2[], d3[]}, [r4]! @ encoding: [0xa4,0xf9,0x0d,0x0f]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld4.8  {d0[], d1[], d2[], d3[]}, [r4:16]!
+@ CHECK-ERRORS:                                               ^
+@ CHECK: vld4.8 {d0[], d1[], d2[], d3[]}, [r4:32]! @ encoding: [0xa4,0xf9,0x1d,0x0f]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld4.8  {d0[], d1[], d2[], d3[]}, [r4:64]!
+@ CHECK-ERRORS:                                               ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld4.8  {d0[], d1[], d2[], d3[]}, [r4:128]!
+@ CHECK-ERRORS:                                               ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld4.8  {d0[], d1[], d2[], d3[]}, [r4:256]!
+@ CHECK-ERRORS:                                               ^
+
+	vld4.8	{d0[], d1[], d2[], d3[]}, [r4], r6
+	vld4.8	{d0[], d1[], d2[], d3[]}, [r4:16], r6
+	vld4.8	{d0[], d1[], d2[], d3[]}, [r4:32], r6
+	vld4.8	{d0[], d1[], d2[], d3[]}, [r4:64], r6
+	vld4.8	{d0[], d1[], d2[], d3[]}, [r4:128], r6
+	vld4.8	{d0[], d1[], d2[], d3[]}, [r4:256], r6
+
+@ CHECK: vld4.8 {d0[], d1[], d2[], d3[]}, [r4], r6 @ encoding: [0xa4,0xf9,0x06,0x0f]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld4.8  {d0[], d1[], d2[], d3[]}, [r4:16], r6
+@ CHECK-ERRORS:                                               ^
+@ CHECK: vld4.8 {d0[], d1[], d2[], d3[]}, [r4:32], r6 @ encoding: [0xa4,0xf9,0x16,0x0f]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld4.8  {d0[], d1[], d2[], d3[]}, [r4:64], r6
+@ CHECK-ERRORS:                                               ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld4.8  {d0[], d1[], d2[], d3[]}, [r4:128], r6
+@ CHECK-ERRORS:                                               ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld4.8  {d0[], d1[], d2[], d3[]}, [r4:256], r6
+@ CHECK-ERRORS:                                               ^
+
+	vld4.8	{d0[], d2[], d4[], d6[]}, [r4]
+	vld4.8	{d0[], d2[], d4[], d6[]}, [r4:16]
+	vld4.8	{d0[], d2[], d4[], d6[]}, [r4:32]
+	vld4.8	{d0[], d2[], d4[], d6[]}, [r4:64]
+	vld4.8	{d0[], d2[], d4[], d6[]}, [r4:128]
+	vld4.8	{d0[], d2[], d4[], d6[]}, [r4:256]
+
+@ CHECK: vld4.8 {d0[], d2[], d4[], d6[]}, [r4] @ encoding: [0xa4,0xf9,0x2f,0x0f]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld4.8  {d0[], d2[], d4[], d6[]}, [r4:16]
+@ CHECK-ERRORS:                                               ^
+@ CHECK: vld4.8 {d0[], d2[], d4[], d6[]}, [r4:32] @ encoding: [0xa4,0xf9,0x3f,0x0f]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld4.8  {d0[], d2[], d4[], d6[]}, [r4:64]
+@ CHECK-ERRORS:                                               ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld4.8  {d0[], d2[], d4[], d6[]}, [r4:128]
+@ CHECK-ERRORS:                                               ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld4.8  {d0[], d2[], d4[], d6[]}, [r4:256]
+@ CHECK-ERRORS:                                               ^
+
+	vld4.8	{d0[], d2[], d4[], d6[]}, [r4]!
+	vld4.8	{d0[], d2[], d4[], d6[]}, [r4:16]!
+	vld4.8	{d0[], d2[], d4[], d6[]}, [r4:32]!
+	vld4.8	{d0[], d2[], d4[], d6[]}, [r4:64]!
+	vld4.8	{d0[], d2[], d4[], d6[]}, [r4:128]!
+	vld4.8	{d0[], d2[], d4[], d6[]}, [r4:256]!
+
+@ CHECK: vld4.8 {d0[], d1[], d2[], d3[]}, [r4]! @ encoding: [0xa4,0xf9,0x2d,0x0f]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld4.8  {d0[], d2[], d4[], d6[]}, [r4:16]!
+@ CHECK-ERRORS:                                               ^
+@ CHECK: vld4.8 {d0[], d1[], d2[], d3[]}, [r4:32]! @ encoding: [0xa4,0xf9,0x3d,0x0f]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld4.8  {d0[], d2[], d4[], d6[]}, [r4:64]!
+@ CHECK-ERRORS:                                               ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld4.8  {d0[], d2[], d4[], d6[]}, [r4:128]!
+@ CHECK-ERRORS:                                               ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld4.8  {d0[], d2[], d4[], d6[]}, [r4:256]!
+@ CHECK-ERRORS:                                               ^
+
+	vld4.8	{d0[], d2[], d4[], d6[]}, [r4], r6
+	vld4.8	{d0[], d2[], d4[], d6[]}, [r4:16], r6
+	vld4.8	{d0[], d2[], d4[], d6[]}, [r4:32], r6
+	vld4.8	{d0[], d2[], d4[], d6[]}, [r4:64], r6
+	vld4.8	{d0[], d2[], d4[], d6[]}, [r4:128], r6
+	vld4.8	{d0[], d2[], d4[], d6[]}, [r4:256], r6
+
+@ CHECK: vld4.8 {d0[], d2[], d4[], d6[]}, [r4], r6 @ encoding: [0xa4,0xf9,0x26,0x0f]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld4.8  {d0[], d2[], d4[], d6[]}, [r4:16], r6
+@ CHECK-ERRORS:                                               ^
+@ CHECK: vld4.8 {d0[], d2[], d4[], d6[]}, [r4:32], r6 @ encoding: [0xa4,0xf9,0x36,0x0f]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld4.8  {d0[], d2[], d4[], d6[]}, [r4:64], r6
+@ CHECK-ERRORS:                                               ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld4.8  {d0[], d2[], d4[], d6[]}, [r4:128], r6
+@ CHECK-ERRORS:                                               ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vld4.8  {d0[], d2[], d4[], d6[]}, [r4:256], r6
+@ CHECK-ERRORS:                                               ^
+
+	vld4.16	{d0, d1, d2, d3}, [r4]
+	vld4.16	{d0, d1, d2, d3}, [r4:16]
+	vld4.16	{d0, d1, d2, d3}, [r4:32]
+	vld4.16	{d0, d1, d2, d3}, [r4:64]
+	vld4.16	{d0, d1, d2, d3}, [r4:128]
+	vld4.16	{d0, d1, d2, d3}, [r4:256]
+
+@ CHECK: vld4.16 {d0, d1, d2, d3}, [r4]  @ encoding: [0x24,0xf9,0x4f,0x00]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0, d1, d2, d3}, [r4:16]
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0, d1, d2, d3}, [r4:32]
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vld4.16 {d0, d1, d2, d3}, [r4:64] @ encoding: [0x24,0xf9,0x5f,0x00]
+@ CHECK: vld4.16 {d0, d1, d2, d3}, [r4:128] @ encoding: [0x24,0xf9,0x6f,0x00]
+@ CHECK: vld4.16 {d0, d1, d2, d3}, [r4:256] @ encoding: [0x24,0xf9,0x7f,0x00]
+
+	vld4.16	{d0, d1, d2, d3}, [r4]!
+	vld4.16	{d0, d1, d2, d3}, [r4:16]!
+	vld4.16	{d0, d1, d2, d3}, [r4:32]!
+	vld4.16	{d0, d1, d2, d3}, [r4:64]!
+	vld4.16	{d0, d1, d2, d3}, [r4:128]!
+	vld4.16	{d0, d1, d2, d3}, [r4:256]!
+
+@ CHECK: vld4.16 {d0, d1, d2, d3}, [r4]! @ encoding: [0x24,0xf9,0x4d,0x00]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0, d1, d2, d3}, [r4:16]!
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0, d1, d2, d3}, [r4:32]!
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vld4.16 {d0, d1, d2, d3}, [r4:64]! @ encoding: [0x24,0xf9,0x5d,0x00]
+@ CHECK: vld4.16 {d0, d1, d2, d3}, [r4:128]! @ encoding: [0x24,0xf9,0x6d,0x00]
+@ CHECK: vld4.16 {d0, d1, d2, d3}, [r4:256]! @ encoding: [0x24,0xf9,0x7d,0x00]
+
+	vld4.16	{d0, d1, d2, d3}, [r4], r6
+	vld4.16	{d0, d1, d2, d3}, [r4:16], r6
+	vld4.16	{d0, d1, d2, d3}, [r4:32], r6
+	vld4.16	{d0, d1, d2, d3}, [r4:64], r6
+	vld4.16	{d0, d1, d2, d3}, [r4:128], r6
+	vld4.16	{d0, d1, d2, d3}, [r4:256], r6
+
+@ CHECK: vld4.16 {d0, d1, d2, d3}, [r4], r6 @ encoding: [0x24,0xf9,0x46,0x00]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0, d1, d2, d3}, [r4:16], r6
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0, d1, d2, d3}, [r4:32], r6
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vld4.16 {d0, d1, d2, d3}, [r4:64], r6 @ encoding: [0x24,0xf9,0x56,0x00]
+@ CHECK: vld4.16 {d0, d1, d2, d3}, [r4:128], r6 @ encoding: [0x24,0xf9,0x66,0x00]
+@ CHECK: vld4.16 {d0, d1, d2, d3}, [r4:256], r6 @ encoding: [0x24,0xf9,0x76,0x00]
+
+	vld4.16	{d0, d2, d4, d6}, [r4]
+	vld4.16	{d0, d2, d4, d6}, [r4:16]
+	vld4.16	{d0, d2, d4, d6}, [r4:32]
+	vld4.16	{d0, d2, d4, d6}, [r4:64]
+	vld4.16	{d0, d2, d4, d6}, [r4:128]
+	vld4.16	{d0, d2, d4, d6}, [r4:256]
+
+@ CHECK: vld4.16 {d0, d2, d4, d6}, [r4]  @ encoding: [0x24,0xf9,0x4f,0x01]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0, d2, d4, d6}, [r4:16]
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0, d2, d4, d6}, [r4:32]
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vld4.16 {d0, d2, d4, d6}, [r4:64] @ encoding: [0x24,0xf9,0x5f,0x01]
+@ CHECK: vld4.16 {d0, d2, d4, d6}, [r4:128] @ encoding: [0x24,0xf9,0x6f,0x01]
+@ CHECK: vld4.16 {d0, d2, d4, d6}, [r4:256] @ encoding: [0x24,0xf9,0x7f,0x01]
+
+	vld4.16	{d0, d2, d4, d6}, [r4]!
+	vld4.16	{d0, d2, d4, d6}, [r4:16]!
+	vld4.16	{d0, d2, d4, d6}, [r4:32]!
+	vld4.16	{d0, d2, d4, d6}, [r4:64]!
+	vld4.16	{d0, d2, d4, d6}, [r4:128]!
+	vld4.16	{d0, d2, d4, d6}, [r4:256]!
+
+@ CHECK: vld4.16 {d0, d2, d4, d6}, [r4]! @ encoding: [0x24,0xf9,0x4d,0x01]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0, d2, d4, d6}, [r4:16]!
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0, d2, d4, d6}, [r4:32]!
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vld4.16 {d0, d2, d4, d6}, [r4:64]! @ encoding: [0x24,0xf9,0x5d,0x01]
+@ CHECK: vld4.16 {d0, d2, d4, d6}, [r4:128]! @ encoding: [0x24,0xf9,0x6d,0x01]
+@ CHECK: vld4.16 {d0, d2, d4, d6}, [r4:256]! @ encoding: [0x24,0xf9,0x7d,0x01]
+
+	vld4.16	{d0, d2, d4, d6}, [r4], r6
+	vld4.16	{d0, d2, d4, d6}, [r4:16], r6
+	vld4.16	{d0, d2, d4, d6}, [r4:32], r6
+	vld4.16	{d0, d2, d4, d6}, [r4:64], r6
+	vld4.16	{d0, d2, d4, d6}, [r4:128], r6
+	vld4.16	{d0, d2, d4, d6}, [r4:256], r6
+
+@ CHECK: vld4.16 {d0, d2, d4, d6}, [r4], r6 @ encoding: [0x24,0xf9,0x46,0x01]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0, d2, d4, d6}, [r4:16], r6
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0, d2, d4, d6}, [r4:32], r6
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vld4.16 {d0, d2, d4, d6}, [r4:64], r6 @ encoding: [0x24,0xf9,0x56,0x01]
+@ CHECK: vld4.16 {d0, d2, d4, d6}, [r4:128], r6 @ encoding: [0x24,0xf9,0x66,0x01]
+@ CHECK: vld4.16 {d0, d2, d4, d6}, [r4:256], r6 @ encoding: [0x24,0xf9,0x76,0x01]
+
+	vld4.16	{d0[1], d1[1], d2[1], d3[1]}, [r4]
+	vld4.16	{d0[1], d1[1], d2[1], d3[1]}, [r4:16]
+	vld4.16	{d0[1], d1[1], d2[1], d3[1]}, [r4:32]
+	vld4.16	{d0[1], d1[1], d2[1], d3[1]}, [r4:64]
+	vld4.16	{d0[1], d1[1], d2[1], d3[1]}, [r4:128]
+	vld4.16	{d0[1], d1[1], d2[1], d3[1]}, [r4:256]
+
+@ CHECK: vld4.16 {d0[1], d1[1], d2[1], d3[1]}, [r4] @ encoding: [0xa4,0xf9,0x4f,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0[1], d1[1], d2[1], d3[1]}, [r4:16]
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0[1], d1[1], d2[1], d3[1]}, [r4:32]
+@ CHECK-ERRORS:                                                   ^
+@ CHECK: vld4.16 {d0[1], d1[1], d2[1], d3[1]}, [r4:64] @ encoding: [0xa4,0xf9,0x5f,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0[1], d1[1], d2[1], d3[1]}, [r4:128]
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0[1], d1[1], d2[1], d3[1]}, [r4:256]
+@ CHECK-ERRORS:                                                   ^
+
+	vld4.16	{d0[1], d1[1], d2[1], d3[1]}, [r4]!
+	vld4.16	{d0[1], d1[1], d2[1], d3[1]}, [r4:16]!
+	vld4.16	{d0[1], d1[1], d2[1], d3[1]}, [r4:32]!
+	vld4.16	{d0[1], d1[1], d2[1], d3[1]}, [r4:64]!
+	vld4.16	{d0[1], d1[1], d2[1], d3[1]}, [r4:128]!
+	vld4.16	{d0[1], d1[1], d2[1], d3[1]}, [r4:256]!
+
+@ CHECK: vld4.16 {d0[1], d1[1], d2[1], d3[1]}, [r4]! @ encoding: [0xa4,0xf9,0x4d,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0[1], d1[1], d2[1], d3[1]}, [r4:16]!
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0[1], d1[1], d2[1], d3[1]}, [r4:32]!
+@ CHECK-ERRORS:                                                   ^
+@ CHECK: vld4.16 {d0[1], d1[1], d2[1], d3[1]}, [r4:64]! @ encoding: [0xa4,0xf9,0x5d,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0[1], d1[1], d2[1], d3[1]}, [r4:128]!
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0[1], d1[1], d2[1], d3[1]}, [r4:256]!
+@ CHECK-ERRORS:                                                   ^
+
+	vld4.16	{d0[1], d1[1], d2[1], d3[1]}, [r4], r6
+	vld4.16	{d0[1], d1[1], d2[1], d3[1]}, [r4:16], r6
+	vld4.16	{d0[1], d1[1], d2[1], d3[1]}, [r4:32], r6
+	vld4.16	{d0[1], d1[1], d2[1], d3[1]}, [r4:64], r6
+	vld4.16	{d0[1], d1[1], d2[1], d3[1]}, [r4:128], r6
+	vld4.16	{d0[1], d1[1], d2[1], d3[1]}, [r4:256], r6
+
+@ CHECK: vld4.16 {d0[1], d1[1], d2[1], d3[1]}, [r4], r6 @ encoding: [0xa4,0xf9,0x46,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0[1], d1[1], d2[1], d3[1]}, [r4:16], r6
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0[1], d1[1], d2[1], d3[1]}, [r4:32], r6
+@ CHECK-ERRORS:                                                   ^
+@ CHECK: vld4.16 {d0[1], d1[1], d2[1], d3[1]}, [r4:64], r6 @ encoding: [0xa4,0xf9,0x56,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0[1], d1[1], d2[1], d3[1]}, [r4:128], r6
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0[1], d1[1], d2[1], d3[1]}, [r4:256], r6
+@ CHECK-ERRORS:                                                   ^
+
+	vld4.16	{d0[1], d2[1], d4[1], d6[1]}, [r4]
+	vld4.16	{d0[1], d2[1], d4[1], d6[1]}, [r4:16]
+	vld4.16	{d0[1], d2[1], d4[1], d6[1]}, [r4:32]
+	vld4.16	{d0[1], d2[1], d4[1], d6[1]}, [r4:64]
+	vld4.16	{d0[1], d2[1], d4[1], d6[1]}, [r4:128]
+	vld4.16	{d0[1], d2[1], d4[1], d6[1]}, [r4:256]
+
+@ CHECK: vld4.16 {d0[1], d2[1], d4[1], d6[1]}, [r4] @ encoding: [0xa4,0xf9,0x6f,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0[1], d2[1], d4[1], d6[1]}, [r4:16]
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0[1], d2[1], d4[1], d6[1]}, [r4:32]
+@ CHECK-ERRORS:                                                   ^
+@ CHECK: vld4.16 {d0[1], d2[1], d4[1], d6[1]}, [r4:64] @ encoding: [0xa4,0xf9,0x7f,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0[1], d2[1], d4[1], d6[1]}, [r4:128]
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0[1], d2[1], d4[1], d6[1]}, [r4:256]
+@ CHECK-ERRORS:                                                   ^
+
+	vld4.16	{d0[1], d2[1], d4[1], d6[1]}, [r4]!
+	vld4.16	{d0[1], d2[1], d4[1], d6[1]}, [r4:16]!
+	vld4.16	{d0[1], d2[1], d4[1], d6[1]}, [r4:32]!
+	vld4.16	{d0[1], d2[1], d4[1], d6[1]}, [r4:64]!
+	vld4.16	{d0[1], d2[1], d4[1], d6[1]}, [r4:128]!
+	vld4.16	{d0[1], d2[1], d4[1], d6[1]}, [r4:256]!
+
+@ CHECK: vld4.16 {d0[1], d2[1], d4[1], d6[1]}, [r4]! @ encoding: [0xa4,0xf9,0x6d,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0[1], d2[1], d4[1], d6[1]}, [r4:16]!
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0[1], d2[1], d4[1], d6[1]}, [r4:32]!
+@ CHECK-ERRORS:                                                   ^
+@ CHECK: vld4.16 {d0[1], d2[1], d4[1], d6[1]}, [r4:64]! @ encoding: [0xa4,0xf9,0x7d,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0[1], d2[1], d4[1], d6[1]}, [r4:128]!
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0[1], d2[1], d4[1], d6[1]}, [r4:256]!
+@ CHECK-ERRORS:                                                   ^
+
+	vld4.16	{d0[1], d2[1], d4[1], d6[1]}, [r4], r6
+	vld4.16	{d0[1], d2[1], d4[1], d6[1]}, [r4:16], r6
+	vld4.16	{d0[1], d2[1], d4[1], d6[1]}, [r4:32], r6
+	vld4.16	{d0[1], d2[1], d4[1], d6[1]}, [r4:64], r6
+	vld4.16	{d0[1], d2[1], d4[1], d6[1]}, [r4:128], r6
+	vld4.16	{d0[1], d2[1], d4[1], d6[1]}, [r4:256], r6
+
+@ CHECK: vld4.16 {d0[1], d2[1], d4[1], d6[1]}, [r4], r6 @ encoding: [0xa4,0xf9,0x66,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0[1], d2[1], d4[1], d6[1]}, [r4:16], r6
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0[1], d2[1], d4[1], d6[1]}, [r4:32], r6
+@ CHECK-ERRORS:                                                   ^
+@ CHECK: vld4.16 {d0[1], d2[1], d4[1], d6[1]}, [r4:64], r6 @ encoding: [0xa4,0xf9,0x76,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0[1], d2[1], d4[1], d6[1]}, [r4:128], r6
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0[1], d2[1], d4[1], d6[1]}, [r4:256], r6
+@ CHECK-ERRORS:                                                   ^
+
+	vld4.16	{d0[], d1[], d2[], d3[]}, [r4]
+	vld4.16	{d0[], d1[], d2[], d3[]}, [r4:16]
+	vld4.16	{d0[], d1[], d2[], d3[]}, [r4:32]
+	vld4.16	{d0[], d1[], d2[], d3[]}, [r4:64]
+	vld4.16	{d0[], d1[], d2[], d3[]}, [r4:128]
+	vld4.16	{d0[], d1[], d2[], d3[]}, [r4:256]
+
+@ CHECK: vld4.16 {d0[], d1[], d2[], d3[]}, [r4] @ encoding: [0xa4,0xf9,0x4f,0x0f]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0[], d1[], d2[], d3[]}, [r4:16]
+@ CHECK-ERRORS:                                               ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0[], d1[], d2[], d3[]}, [r4:32]
+@ CHECK-ERRORS:                                               ^
+@ CHECK: vld4.16 {d0[], d1[], d2[], d3[]}, [r4:64] @ encoding: [0xa4,0xf9,0x5f,0x0f]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0[], d1[], d2[], d3[]}, [r4:128]
+@ CHECK-ERRORS:                                               ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0[], d1[], d2[], d3[]}, [r4:256]
+@ CHECK-ERRORS:                                               ^
+
+	vld4.16	{d0[], d1[], d2[], d3[]}, [r4]!
+	vld4.16	{d0[], d1[], d2[], d3[]}, [r4:16]!
+	vld4.16	{d0[], d1[], d2[], d3[]}, [r4:32]!
+	vld4.16	{d0[], d1[], d2[], d3[]}, [r4:64]!
+	vld4.16	{d0[], d1[], d2[], d3[]}, [r4:128]!
+	vld4.16	{d0[], d1[], d2[], d3[]}, [r4:256]!
+
+@ CHECK: vld4.16 {d0[], d1[], d2[], d3[]}, [r4]! @ encoding: [0xa4,0xf9,0x4d,0x0f]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0[], d1[], d2[], d3[]}, [r4:16]!
+@ CHECK-ERRORS:                                               ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0[], d1[], d2[], d3[]}, [r4:32]!
+@ CHECK-ERRORS:                                               ^
+@ CHECK: vld4.16 {d0[], d1[], d2[], d3[]}, [r4:64]! @ encoding: [0xa4,0xf9,0x5d,0x0f]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0[], d1[], d2[], d3[]}, [r4:128]!
+@ CHECK-ERRORS:                                               ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0[], d1[], d2[], d3[]}, [r4:256]!
+@ CHECK-ERRORS:                                               ^
+
+	vld4.16	{d0[], d1[], d2[], d3[]}, [r4], r6
+	vld4.16	{d0[], d1[], d2[], d3[]}, [r4:16], r6
+	vld4.16	{d0[], d1[], d2[], d3[]}, [r4:32], r6
+	vld4.16	{d0[], d1[], d2[], d3[]}, [r4:64], r6
+	vld4.16	{d0[], d1[], d2[], d3[]}, [r4:128], r6
+	vld4.16	{d0[], d1[], d2[], d3[]}, [r4:256], r6
+
+@ CHECK: vld4.16 {d0[], d1[], d2[], d3[]}, [r4], r6 @ encoding: [0xa4,0xf9,0x46,0x0f]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0[], d1[], d2[], d3[]}, [r4:16], r6
+@ CHECK-ERRORS:                                               ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0[], d1[], d2[], d3[]}, [r4:32], r6
+@ CHECK-ERRORS:                                               ^
+@ CHECK: vld4.16 {d0[], d1[], d2[], d3[]}, [r4:64], r6 @ encoding: [0xa4,0xf9,0x56,0x0f]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0[], d1[], d2[], d3[]}, [r4:128], r6
+@ CHECK-ERRORS:                                               ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0[], d1[], d2[], d3[]}, [r4:256], r6
+@ CHECK-ERRORS:                                               ^
+
+	vld4.16	{d0[], d2[], d4[], d6[]}, [r4]
+	vld4.16	{d0[], d2[], d4[], d6[]}, [r4:16]
+	vld4.16	{d0[], d2[], d4[], d6[]}, [r4:32]
+	vld4.16	{d0[], d2[], d4[], d6[]}, [r4:64]
+	vld4.16	{d0[], d2[], d4[], d6[]}, [r4:128]
+	vld4.16	{d0[], d2[], d4[], d6[]}, [r4:256]
+
+@ CHECK: vld4.16 {d0[], d2[], d4[], d6[]}, [r4] @ encoding: [0xa4,0xf9,0x6f,0x0f]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0[], d2[], d4[], d6[]}, [r4:16]
+@ CHECK-ERRORS:                                               ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0[], d2[], d4[], d6[]}, [r4:32]
+@ CHECK-ERRORS:                                               ^
+@ CHECK: vld4.16 {d0[], d2[], d4[], d6[]}, [r4:64] @ encoding: [0xa4,0xf9,0x7f,0x0f]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0[], d2[], d4[], d6[]}, [r4:128]
+@ CHECK-ERRORS:                                               ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0[], d2[], d4[], d6[]}, [r4:256]
+@ CHECK-ERRORS:                                               ^
+
+	vld4.16	{d0[], d2[], d4[], d6[]}, [r4]!
+	vld4.16	{d0[], d2[], d4[], d6[]}, [r4:16]!
+	vld4.16	{d0[], d2[], d4[], d6[]}, [r4:32]!
+	vld4.16	{d0[], d2[], d4[], d6[]}, [r4:64]!
+	vld4.16	{d0[], d2[], d4[], d6[]}, [r4:128]!
+	vld4.16	{d0[], d2[], d4[], d6[]}, [r4:256]!
+
+@ CHECK: vld4.16 {d0[], d1[], d2[], d3[]}, [r4]! @ encoding: [0xa4,0xf9,0x6d,0x0f]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0[], d2[], d4[], d6[]}, [r4:16]!
+@ CHECK-ERRORS:                                               ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0[], d2[], d4[], d6[]}, [r4:32]!
+@ CHECK-ERRORS:                                               ^
+@ CHECK: vld4.16 {d0[], d1[], d2[], d3[]}, [r4:64]! @ encoding: [0xa4,0xf9,0x7d,0x0f]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0[], d2[], d4[], d6[]}, [r4:128]!
+@ CHECK-ERRORS:                                               ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0[], d2[], d4[], d6[]}, [r4:256]!
+@ CHECK-ERRORS:                                               ^
+
+	vld4.16	{d0[], d2[], d4[], d6[]}, [r4], r6
+	vld4.16	{d0[], d2[], d4[], d6[]}, [r4:16], r6
+	vld4.16	{d0[], d2[], d4[], d6[]}, [r4:32], r6
+	vld4.16	{d0[], d2[], d4[], d6[]}, [r4:64], r6
+	vld4.16	{d0[], d2[], d4[], d6[]}, [r4:128], r6
+	vld4.16	{d0[], d2[], d4[], d6[]}, [r4:256], r6
+
+@ CHECK: vld4.16 {d0[], d2[], d4[], d6[]}, [r4], r6 @ encoding: [0xa4,0xf9,0x66,0x0f]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0[], d2[], d4[], d6[]}, [r4:16], r6
+@ CHECK-ERRORS:                                               ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0[], d2[], d4[], d6[]}, [r4:32], r6
+@ CHECK-ERRORS:                                               ^
+@ CHECK: vld4.16 {d0[], d2[], d4[], d6[]}, [r4:64], r6 @ encoding: [0xa4,0xf9,0x76,0x0f]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0[], d2[], d4[], d6[]}, [r4:128], r6
+@ CHECK-ERRORS:                                               ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vld4.16 {d0[], d2[], d4[], d6[]}, [r4:256], r6
+@ CHECK-ERRORS:                                               ^
+
+	vld4.32	{d0, d1, d2, d3}, [r4]
+	vld4.32	{d0, d1, d2, d3}, [r4:16]
+	vld4.32	{d0, d1, d2, d3}, [r4:32]
+	vld4.32	{d0, d1, d2, d3}, [r4:64]
+	vld4.32	{d0, d1, d2, d3}, [r4:128]
+	vld4.32	{d0, d1, d2, d3}, [r4:256]
+
+@ CHECK: vld4.32 {d0, d1, d2, d3}, [r4]  @ encoding: [0x24,0xf9,0x8f,0x00]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld4.32 {d0, d1, d2, d3}, [r4:16]
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld4.32 {d0, d1, d2, d3}, [r4:32]
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vld4.32 {d0, d1, d2, d3}, [r4:64] @ encoding: [0x24,0xf9,0x9f,0x00]
+@ CHECK: vld4.32 {d0, d1, d2, d3}, [r4:128] @ encoding: [0x24,0xf9,0xaf,0x00]
+@ CHECK: vld4.32 {d0, d1, d2, d3}, [r4:256] @ encoding: [0x24,0xf9,0xbf,0x00]
+
+	vld4.32	{d0, d1, d2, d3}, [r4]!
+	vld4.32	{d0, d1, d2, d3}, [r4:16]!
+	vld4.32	{d0, d1, d2, d3}, [r4:32]!
+	vld4.32	{d0, d1, d2, d3}, [r4:64]!
+	vld4.32	{d0, d1, d2, d3}, [r4:128]!
+	vld4.32	{d0, d1, d2, d3}, [r4:256]!
+
+@ CHECK: vld4.32 {d0, d1, d2, d3}, [r4]! @ encoding: [0x24,0xf9,0x8d,0x00]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld4.32 {d0, d1, d2, d3}, [r4:16]!
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld4.32 {d0, d1, d2, d3}, [r4:32]!
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vld4.32 {d0, d1, d2, d3}, [r4:64]! @ encoding: [0x24,0xf9,0x9d,0x00]
+@ CHECK: vld4.32 {d0, d1, d2, d3}, [r4:128]! @ encoding: [0x24,0xf9,0xad,0x00]
+@ CHECK: vld4.32 {d0, d1, d2, d3}, [r4:256]! @ encoding: [0x24,0xf9,0xbd,0x00]
+
+	vld4.32	{d0, d1, d2, d3}, [r4], r6
+	vld4.32	{d0, d1, d2, d3}, [r4:16], r6
+	vld4.32	{d0, d1, d2, d3}, [r4:32], r6
+	vld4.32	{d0, d1, d2, d3}, [r4:64], r6
+	vld4.32	{d0, d1, d2, d3}, [r4:128], r6
+	vld4.32	{d0, d1, d2, d3}, [r4:256], r6
+
+@ CHECK: vld4.32 {d0, d1, d2, d3}, [r4], r6 @ encoding: [0x24,0xf9,0x86,0x00]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld4.32 {d0, d1, d2, d3}, [r4:16], r6
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld4.32 {d0, d1, d2, d3}, [r4:32], r6
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vld4.32 {d0, d1, d2, d3}, [r4:64], r6 @ encoding: [0x24,0xf9,0x96,0x00]
+@ CHECK: vld4.32 {d0, d1, d2, d3}, [r4:128], r6 @ encoding: [0x24,0xf9,0xa6,0x00]
+@ CHECK: vld4.32 {d0, d1, d2, d3}, [r4:256], r6 @ encoding: [0x24,0xf9,0xb6,0x00]
+
+	vld4.32	{d0, d2, d4, d6}, [r4]
+	vld4.32	{d0, d2, d4, d6}, [r4:16]
+	vld4.32	{d0, d2, d4, d6}, [r4:32]
+	vld4.32	{d0, d2, d4, d6}, [r4:64]
+	vld4.32	{d0, d2, d4, d6}, [r4:128]
+	vld4.32	{d0, d2, d4, d6}, [r4:256]
+
+@ CHECK: vld4.32 {d0, d2, d4, d6}, [r4]  @ encoding: [0x24,0xf9,0x8f,0x01]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld4.32 {d0, d2, d4, d6}, [r4:16]
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld4.32 {d0, d2, d4, d6}, [r4:32]
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vld4.32 {d0, d2, d4, d6}, [r4:64] @ encoding: [0x24,0xf9,0x9f,0x01]
+@ CHECK: vld4.32 {d0, d2, d4, d6}, [r4:128] @ encoding: [0x24,0xf9,0xaf,0x01]
+@ CHECK: vld4.32 {d0, d2, d4, d6}, [r4:256] @ encoding: [0x24,0xf9,0xbf,0x01]
+
+	vld4.32	{d0, d2, d4, d6}, [r4]!
+	vld4.32	{d0, d2, d4, d6}, [r4:16]!
+	vld4.32	{d0, d2, d4, d6}, [r4:32]!
+	vld4.32	{d0, d2, d4, d6}, [r4:64]!
+	vld4.32	{d0, d2, d4, d6}, [r4:128]!
+	vld4.32	{d0, d2, d4, d6}, [r4:256]!
+
+@ CHECK: vld4.32 {d0, d2, d4, d6}, [r4]! @ encoding: [0x24,0xf9,0x8d,0x01]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld4.32 {d0, d2, d4, d6}, [r4:16]!
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld4.32 {d0, d2, d4, d6}, [r4:32]!
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vld4.32 {d0, d2, d4, d6}, [r4:64]! @ encoding: [0x24,0xf9,0x9d,0x01]
+@ CHECK: vld4.32 {d0, d2, d4, d6}, [r4:128]! @ encoding: [0x24,0xf9,0xad,0x01]
+@ CHECK: vld4.32 {d0, d2, d4, d6}, [r4:256]! @ encoding: [0x24,0xf9,0xbd,0x01]
+
+	vld4.32	{d0, d2, d4, d6}, [r4], r6
+	vld4.32	{d0, d2, d4, d6}, [r4:16], r6
+	vld4.32	{d0, d2, d4, d6}, [r4:32], r6
+	vld4.32	{d0, d2, d4, d6}, [r4:64], r6
+	vld4.32	{d0, d2, d4, d6}, [r4:128], r6
+	vld4.32	{d0, d2, d4, d6}, [r4:256], r6
+
+@ CHECK: vld4.32 {d0, d2, d4, d6}, [r4], r6 @ encoding: [0x24,0xf9,0x86,0x01]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld4.32 {d0, d2, d4, d6}, [r4:16], r6
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vld4.32 {d0, d2, d4, d6}, [r4:32], r6
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vld4.32 {d0, d2, d4, d6}, [r4:64], r6 @ encoding: [0x24,0xf9,0x96,0x01]
+@ CHECK: vld4.32 {d0, d2, d4, d6}, [r4:128], r6 @ encoding: [0x24,0xf9,0xa6,0x01]
+@ CHECK: vld4.32 {d0, d2, d4, d6}, [r4:256], r6 @ encoding: [0x24,0xf9,0xb6,0x01]
+
+	vld4.32	{d0[1], d1[1], d2[1], d3[1]}, [r4]
+	vld4.32	{d0[1], d1[1], d2[1], d3[1]}, [r4:16]
+	vld4.32	{d0[1], d1[1], d2[1], d3[1]}, [r4:32]
+	vld4.32	{d0[1], d1[1], d2[1], d3[1]}, [r4:64]
+	vld4.32	{d0[1], d1[1], d2[1], d3[1]}, [r4:128]
+	vld4.32	{d0[1], d1[1], d2[1], d3[1]}, [r4:256]
+
+@ CHECK: vld4.32 {d0[1], d1[1], d2[1], d3[1]}, [r4] @ encoding: [0xa4,0xf9,0x8f,0x0b]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld4.32 {d0[1], d1[1], d2[1], d3[1]}, [r4:16]
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld4.32 {d0[1], d1[1], d2[1], d3[1]}, [r4:32]
+@ CHECK-ERRORS:                                                   ^
+@ CHECK: vld4.32 {d0[1], d1[1], d2[1], d3[1]}, [r4:64] @ encoding: [0xa4,0xf9,0x9f,0x0b]
+@ CHECK: vld4.32 {d0[1], d1[1], d2[1], d3[1]}, [r4:128] @ encoding: [0xa4,0xf9,0xaf,0x0b]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld4.32 {d0[1], d1[1], d2[1], d3[1]}, [r4:256]
+@ CHECK-ERRORS:                                                   ^
+
+	vld4.32	{d0[1], d1[1], d2[1], d3[1]}, [r4]!
+	vld4.32	{d0[1], d1[1], d2[1], d3[1]}, [r4:16]!
+	vld4.32	{d0[1], d1[1], d2[1], d3[1]}, [r4:32]!
+	vld4.32	{d0[1], d1[1], d2[1], d3[1]}, [r4:64]!
+	vld4.32	{d0[1], d1[1], d2[1], d3[1]}, [r4:128]!
+	vld4.32	{d0[1], d1[1], d2[1], d3[1]}, [r4:256]!
+
+@ CHECK: vld4.32 {d0[1], d1[1], d2[1], d3[1]}, [r4]! @ encoding: [0xa4,0xf9,0x8d,0x0b]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld4.32 {d0[1], d1[1], d2[1], d3[1]}, [r4:16]!
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld4.32 {d0[1], d1[1], d2[1], d3[1]}, [r4:32]!
+@ CHECK-ERRORS:                                                   ^
+@ CHECK: vld4.32 {d0[1], d1[1], d2[1], d3[1]}, [r4:64]! @ encoding: [0xa4,0xf9,0x9d,0x0b]
+@ CHECK: vld4.32 {d0[1], d1[1], d2[1], d3[1]}, [r4:128]! @ encoding: [0xa4,0xf9,0xad,0x0b]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld4.32 {d0[1], d1[1], d2[1], d3[1]}, [r4:256]!
+@ CHECK-ERRORS:                                                   ^
+
+	vld4.32	{d0[1], d1[1], d2[1], d3[1]}, [r4], r6
+	vld4.32	{d0[1], d1[1], d2[1], d3[1]}, [r4:16], r6
+	vld4.32	{d0[1], d1[1], d2[1], d3[1]}, [r4:32], r6
+	vld4.32	{d0[1], d1[1], d2[1], d3[1]}, [r4:64], r6
+	vld4.32	{d0[1], d1[1], d2[1], d3[1]}, [r4:128], r6
+	vld4.32	{d0[1], d1[1], d2[1], d3[1]}, [r4:256], r6
+
+@ CHECK: vld4.32 {d0[1], d1[1], d2[1], d3[1]}, [r4], r6 @ encoding: [0xa4,0xf9,0x86,0x0b]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld4.32 {d0[1], d1[1], d2[1], d3[1]}, [r4:16], r6
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld4.32 {d0[1], d1[1], d2[1], d3[1]}, [r4:32], r6
+@ CHECK-ERRORS:                                                   ^
+@ CHECK: vld4.32 {d0[1], d1[1], d2[1], d3[1]}, [r4:64], r6 @ encoding: [0xa4,0xf9,0x96,0x0b]
+@ CHECK: vld4.32 {d0[1], d1[1], d2[1], d3[1]}, [r4:128], r6 @ encoding: [0xa4,0xf9,0xa6,0x0b]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld4.32 {d0[1], d1[1], d2[1], d3[1]}, [r4:256], r6
+@ CHECK-ERRORS:                                                   ^
+
+	vld4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4]
+	vld4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4:16]
+	vld4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4:32]
+	vld4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4:64]
+	vld4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4:128]
+	vld4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4:256]
+
+@ CHECK: vld4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4] @ encoding: [0xa4,0xf9,0xcf,0x0b]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4:16]
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4:32]
+@ CHECK-ERRORS:                                                   ^
+@ CHECK: vld4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4:64] @ encoding: [0xa4,0xf9,0xdf,0x0b]
+@ CHECK: vld4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4:128] @ encoding: [0xa4,0xf9,0xef,0x0b]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4:256]
+@ CHECK-ERRORS:                                                   ^
+
+	vld4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4]!
+	vld4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4:16]!
+	vld4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4:32]!
+	vld4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4:64]!
+	vld4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4:128]!
+	vld4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4:256]!
+
+@ CHECK: vld4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4]! @ encoding: [0xa4,0xf9,0xcd,0x0b]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4:16]!
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4:32]!
+@ CHECK-ERRORS:                                                   ^
+@ CHECK: vld4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4:64]! @ encoding: [0xa4,0xf9,0xdd,0x0b]
+@ CHECK: vld4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4:128]! @ encoding: [0xa4,0xf9,0xed,0x0b]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4:256]!
+@ CHECK-ERRORS:                                                   ^
+
+	vld4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4], r6
+	vld4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4:16], r6
+	vld4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4:32], r6
+	vld4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4:64], r6
+	vld4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4:128], r6
+	vld4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4:256], r6
+
+@ CHECK: vld4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4], r6 @ encoding: [0xa4,0xf9,0xc6,0x0b]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4:16], r6
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4:32], r6
+@ CHECK-ERRORS:                                                   ^
+@ CHECK: vld4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4:64], r6 @ encoding: [0xa4,0xf9,0xd6,0x0b]
+@ CHECK: vld4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4:128], r6 @ encoding: [0xa4,0xf9,0xe6,0x0b]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4:256], r6
+@ CHECK-ERRORS:                                                   ^
+
+	vld4.32	{d0[], d1[], d2[], d3[]}, [r4]
+	vld4.32	{d0[], d1[], d2[], d3[]}, [r4:16]
+	vld4.32	{d0[], d1[], d2[], d3[]}, [r4:32]
+	vld4.32	{d0[], d1[], d2[], d3[]}, [r4:64]
+	vld4.32	{d0[], d1[], d2[], d3[]}, [r4:128]
+	vld4.32	{d0[], d1[], d2[], d3[]}, [r4:256]
+
+@ CHECK: vld4.32 {d0[], d1[], d2[], d3[]}, [r4] @ encoding: [0xa4,0xf9,0x8f,0x0f]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld4.32 {d0[], d1[], d2[], d3[]}, [r4:16]
+@ CHECK-ERRORS:                                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld4.32 {d0[], d1[], d2[], d3[]}, [r4:32]
+@ CHECK-ERRORS:                                               ^
+@ CHECK: vld4.32 {d0[], d1[], d2[], d3[]}, [r4:64] @ encoding: [0xa4,0xf9,0x9f,0x0f]
+@ CHECK: vld4.32 {d0[], d1[], d2[], d3[]}, [r4:128] @ encoding: [0xa4,0xf9,0xdf,0x0f]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld4.32 {d0[], d1[], d2[], d3[]}, [r4:256]
+@ CHECK-ERRORS:                                               ^
+
+	vld4.32	{d0[], d1[], d2[], d3[]}, [r4]!
+	vld4.32	{d0[], d1[], d2[], d3[]}, [r4:16]!
+	vld4.32	{d0[], d1[], d2[], d3[]}, [r4:32]!
+	vld4.32	{d0[], d1[], d2[], d3[]}, [r4:64]!
+	vld4.32	{d0[], d1[], d2[], d3[]}, [r4:128]!
+	vld4.32	{d0[], d1[], d2[], d3[]}, [r4:256]!
+
+@ CHECK: vld4.32 {d0[], d1[], d2[], d3[]}, [r4]! @ encoding: [0xa4,0xf9,0x8d,0x0f]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld4.32 {d0[], d1[], d2[], d3[]}, [r4:16]!
+@ CHECK-ERRORS:                                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld4.32 {d0[], d1[], d2[], d3[]}, [r4:32]!
+@ CHECK-ERRORS:                                               ^
+@ CHECK: vld4.32 {d0[], d1[], d2[], d3[]}, [r4:64]! @ encoding: [0xa4,0xf9,0x9d,0x0f]
+@ CHECK: vld4.32 {d0[], d1[], d2[], d3[]}, [r4:128]! @ encoding: [0xa4,0xf9,0xdd,0x0f]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld4.32 {d0[], d1[], d2[], d3[]}, [r4:256]!
+@ CHECK-ERRORS:                                               ^
+
+	vld4.32	{d0[], d1[], d2[], d3[]}, [r4], r6
+	vld4.32	{d0[], d1[], d2[], d3[]}, [r4:16], r6
+	vld4.32	{d0[], d1[], d2[], d3[]}, [r4:32], r6
+	vld4.32	{d0[], d1[], d2[], d3[]}, [r4:64], r6
+	vld4.32	{d0[], d1[], d2[], d3[]}, [r4:128], r6
+	vld4.32	{d0[], d1[], d2[], d3[]}, [r4:256], r6
+
+@ CHECK: vld4.32 {d0[], d1[], d2[], d3[]}, [r4], r6 @ encoding: [0xa4,0xf9,0x86,0x0f]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld4.32 {d0[], d1[], d2[], d3[]}, [r4:16], r6
+@ CHECK-ERRORS:                                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld4.32 {d0[], d1[], d2[], d3[]}, [r4:32], r6
+@ CHECK-ERRORS:                                               ^
+@ CHECK: vld4.32 {d0[], d1[], d2[], d3[]}, [r4:64], r6 @ encoding: [0xa4,0xf9,0x96,0x0f]
+@ CHECK: vld4.32 {d0[], d1[], d2[], d3[]}, [r4:128], r6 @ encoding: [0xa4,0xf9,0xd6,0x0f]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld4.32 {d0[], d1[], d2[], d3[]}, [r4:256], r6
+@ CHECK-ERRORS:                                               ^
+
+	vld4.32	{d0[], d2[], d4[], d6[]}, [r4]
+	vld4.32	{d0[], d2[], d4[], d6[]}, [r4:16]
+	vld4.32	{d0[], d2[], d4[], d6[]}, [r4:32]
+	vld4.32	{d0[], d2[], d4[], d6[]}, [r4:64]
+	vld4.32	{d0[], d2[], d4[], d6[]}, [r4:128]
+	vld4.32	{d0[], d2[], d4[], d6[]}, [r4:256]
+
+@ CHECK: vld4.32 {d0[], d2[], d4[], d6[]}, [r4] @ encoding: [0xa4,0xf9,0xaf,0x0f]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld4.32 {d0[], d2[], d4[], d6[]}, [r4:16]
+@ CHECK-ERRORS:                                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld4.32 {d0[], d2[], d4[], d6[]}, [r4:32]
+@ CHECK-ERRORS:                                               ^
+@ CHECK: vld4.32 {d0[], d2[], d4[], d6[]}, [r4:64] @ encoding: [0xa4,0xf9,0xbf,0x0f]
+@ CHECK: vld4.32 {d0[], d2[], d4[], d6[]}, [r4:128] @ encoding: [0xa4,0xf9,0xff,0x0f]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld4.32 {d0[], d2[], d4[], d6[]}, [r4:256]
+@ CHECK-ERRORS:                                               ^
+
+	vld4.32	{d0[], d2[], d4[], d6[]}, [r4]!
+	vld4.32	{d0[], d2[], d4[], d6[]}, [r4:16]!
+	vld4.32	{d0[], d2[], d4[], d6[]}, [r4:32]!
+	vld4.32	{d0[], d2[], d4[], d6[]}, [r4:64]!
+	vld4.32	{d0[], d2[], d4[], d6[]}, [r4:128]!
+	vld4.32	{d0[], d2[], d4[], d6[]}, [r4:256]!
+
+@ CHECK: vld4.32 {d0[], d2[], d4[], d6[]}, [r4]! @ encoding: [0xa4,0xf9,0xad,0x0f]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld4.32 {d0[], d2[], d4[], d6[]}, [r4:16]!
+@ CHECK-ERRORS:                                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld4.32 {d0[], d2[], d4[], d6[]}, [r4:32]!
+@ CHECK-ERRORS:                                               ^
+@ CHECK: vld4.32 {d0[], d2[], d4[], d6[]}, [r4:64]! @ encoding: [0xa4,0xf9,0xbd,0x0f]
+@ CHECK: vld4.32 {d0[], d2[], d4[], d6[]}, [r4:128]! @ encoding: [0xa4,0xf9,0xfd,0x0f]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld4.32 {d0[], d2[], d4[], d6[]}, [r4:256]!
+@ CHECK-ERRORS:                                               ^
+
+	vld4.32	{d0[], d2[], d4[], d6[]}, [r4], r6
+	vld4.32	{d0[], d2[], d4[], d6[]}, [r4:16], r6
+	vld4.32	{d0[], d2[], d4[], d6[]}, [r4:32], r6
+	vld4.32	{d0[], d2[], d4[], d6[]}, [r4:64], r6
+	vld4.32	{d0[], d2[], d4[], d6[]}, [r4:128], r6
+	vld4.32	{d0[], d2[], d4[], d6[]}, [r4:256], r6
+
+@ CHECK: vld4.32 {d0[], d2[], d4[], d6[]}, [r4], r6 @ encoding: [0xa4,0xf9,0xa6,0x0f]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld4.32 {d0[], d2[], d4[], d6[]}, [r4:16], r6
+@ CHECK-ERRORS:                                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld4.32 {d0[], d2[], d4[], d6[]}, [r4:32], r6
+@ CHECK-ERRORS:                                               ^
+@ CHECK: vld4.32 {d0[], d2[], d4[], d6[]}, [r4:64], r6 @ encoding: [0xa4,0xf9,0xb6,0x0f]
+@ CHECK: vld4.32 {d0[], d2[], d4[], d6[]}, [r4:128], r6 @ encoding: [0xa4,0xf9,0xf6,0x0f]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vld4.32 {d0[], d2[], d4[], d6[]}, [r4:256], r6
+@ CHECK-ERRORS:                                               ^
+
+	vst1.8	{d0}, [r4]
+	vst1.8	{d0}, [r4:16]
+	vst1.8	{d0}, [r4:32]
+	vst1.8	{d0}, [r4:64]
+	vst1.8	{d0}, [r4:128]
+	vst1.8	{d0}, [r4:256]
+
+@ CHECK: vst1.8 {d0}, [r4]              @ encoding: [0x04,0xf9,0x0f,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.8  {d0}, [r4:16]
+@ CHECK-ERRORS:                           ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.8  {d0}, [r4:32]
+@ CHECK-ERRORS:                           ^
+@ CHECK: vst1.8 {d0}, [r4:64]           @ encoding: [0x04,0xf9,0x1f,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.8  {d0}, [r4:128]
+@ CHECK-ERRORS:                           ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.8  {d0}, [r4:256]
+@ CHECK-ERRORS:                           ^
+
+	vst1.8	{d0}, [r4]!
+	vst1.8	{d0}, [r4:16]!
+	vst1.8	{d0}, [r4:32]!
+	vst1.8	{d0}, [r4:64]!
+	vst1.8	{d0}, [r4:128]!
+	vst1.8	{d0}, [r4:256]!
+
+@ CHECK: vst1.8 {d0}, [r4]!             @ encoding: [0x04,0xf9,0x0d,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.8  {d0}, [r4:16]!
+@ CHECK-ERRORS:                           ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.8  {d0}, [r4:32]!
+@ CHECK-ERRORS:                           ^
+@ CHECK: vst1.8 {d0}, [r4:64]!          @ encoding: [0x04,0xf9,0x1d,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.8  {d0}, [r4:128]!
+@ CHECK-ERRORS:                           ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.8  {d0}, [r4:256]!
+@ CHECK-ERRORS:                           ^
+
+	vst1.8	{d0}, [r4], r6
+	vst1.8	{d0}, [r4:16], r6
+	vst1.8	{d0}, [r4:32], r6
+	vst1.8	{d0}, [r4:64], r6
+	vst1.8	{d0}, [r4:128], r6
+	vst1.8	{d0}, [r4:256], r6
+
+@ CHECK: vst1.8 {d0}, [r4], r6          @ encoding: [0x04,0xf9,0x06,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.8  {d0}, [r4:16], r6
+@ CHECK-ERRORS:                           ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.8  {d0}, [r4:32], r6
+@ CHECK-ERRORS:                           ^
+@ CHECK: vst1.8 {d0}, [r4:64], r6       @ encoding: [0x04,0xf9,0x16,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.8  {d0}, [r4:128], r6
+@ CHECK-ERRORS:                           ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.8  {d0}, [r4:256], r6
+@ CHECK-ERRORS:                           ^
+
+	vst1.8	{d0, d1}, [r4]
+	vst1.8	{d0, d1}, [r4:16]
+	vst1.8	{d0, d1}, [r4:32]
+	vst1.8	{d0, d1}, [r4:64]
+	vst1.8	{d0, d1}, [r4:128]
+	vst1.8	{d0, d1}, [r4:256]
+
+@ CHECK: vst1.8 {d0, d1}, [r4]          @ encoding: [0x04,0xf9,0x0f,0x0a]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst1.8  {d0, d1}, [r4:16]
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst1.8  {d0, d1}, [r4:32]
+@ CHECK-ERRORS:                               ^
+@ CHECK: vst1.8 {d0, d1}, [r4:64]       @ encoding: [0x04,0xf9,0x1f,0x0a]
+@ CHECK: vst1.8 {d0, d1}, [r4:128]      @ encoding: [0x04,0xf9,0x2f,0x0a]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst1.8  {d0, d1}, [r4:256]
+@ CHECK-ERRORS:                               ^
+
+	vst1.8	{d0, d1}, [r4]!
+	vst1.8	{d0, d1}, [r4:16]!
+	vst1.8	{d0, d1}, [r4:32]!
+	vst1.8	{d0, d1}, [r4:64]!
+	vst1.8	{d0, d1}, [r4:128]!
+	vst1.8	{d0, d1}, [r4:256]!
+
+@ CHECK: vst1.8 {d0, d1}, [r4]!         @ encoding: [0x04,0xf9,0x0d,0x0a]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst1.8  {d0, d1}, [r4:16]!
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst1.8  {d0, d1}, [r4:32]!
+@ CHECK-ERRORS:                               ^
+@ CHECK: vst1.8 {d0, d1}, [r4:64]!      @ encoding: [0x04,0xf9,0x1d,0x0a]
+@ CHECK: vst1.8 {d0, d1}, [r4:128]!     @ encoding: [0x04,0xf9,0x2d,0x0a]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst1.8  {d0, d1}, [r4:256]!
+@ CHECK-ERRORS:                               ^
+
+	vst1.8	{d0, d1}, [r4], r6
+	vst1.8	{d0, d1}, [r4:16], r6
+	vst1.8	{d0, d1}, [r4:32], r6
+	vst1.8	{d0, d1}, [r4:64], r6
+	vst1.8	{d0, d1}, [r4:128], r6
+	vst1.8	{d0, d1}, [r4:256], r6
+
+@ CHECK: vst1.8 {d0, d1}, [r4], r6      @ encoding: [0x04,0xf9,0x06,0x0a]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst1.8  {d0, d1}, [r4:16], r6
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst1.8  {d0, d1}, [r4:32], r6
+@ CHECK-ERRORS:                               ^
+@ CHECK: vst1.8 {d0, d1}, [r4:64], r6   @ encoding: [0x04,0xf9,0x16,0x0a]
+@ CHECK: vst1.8 {d0, d1}, [r4:128], r6  @ encoding: [0x04,0xf9,0x26,0x0a]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst1.8  {d0, d1}, [r4:256], r6
+@ CHECK-ERRORS:                               ^
+
+	vst1.8	{d0, d1, d2}, [r4]
+	vst1.8	{d0, d1, d2}, [r4:16]
+	vst1.8	{d0, d1, d2}, [r4:32]
+	vst1.8	{d0, d1, d2}, [r4:64]
+	vst1.8	{d0, d1, d2}, [r4:128]
+	vst1.8	{d0, d1, d2}, [r4:256]
+
+@ CHECK: vst1.8 {d0, d1, d2}, [r4]      @ encoding: [0x04,0xf9,0x0f,0x06]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.8  {d0, d1, d2}, [r4:16]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.8  {d0, d1, d2}, [r4:32]
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vst1.8 {d0, d1, d2}, [r4:64]   @ encoding: [0x04,0xf9,0x1f,0x06]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.8  {d0, d1, d2}, [r4:128]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.8  {d0, d1, d2}, [r4:256]
+@ CHECK-ERRORS:                                   ^
+
+	vst1.8	{d0, d1, d2}, [r4]!
+	vst1.8	{d0, d1, d2}, [r4:16]!
+	vst1.8	{d0, d1, d2}, [r4:32]!
+	vst1.8	{d0, d1, d2}, [r4:64]!
+	vst1.8	{d0, d1, d2}, [r4:128]!
+	vst1.8	{d0, d1, d2}, [r4:256]!
+
+@ CHECK: vst1.8 {d0, d1, d2}, [r4]!     @ encoding: [0x04,0xf9,0x0d,0x06]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.8  {d0, d1, d2}, [r4:16]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.8  {d0, d1, d2}, [r4:32]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vst1.8 {d0, d1, d2}, [r4:64]!  @ encoding: [0x04,0xf9,0x1d,0x06]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.8  {d0, d1, d2}, [r4:128]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.8  {d0, d1, d2}, [r4:256]!
+@ CHECK-ERRORS:                                   ^
+
+	vst1.8	{d0, d1, d2}, [r4], r6
+	vst1.8	{d0, d1, d2}, [r4:16], r6
+	vst1.8	{d0, d1, d2}, [r4:32], r6
+	vst1.8	{d0, d1, d2}, [r4:64], r6
+	vst1.8	{d0, d1, d2}, [r4:128], r6
+	vst1.8	{d0, d1, d2}, [r4:256], r6
+
+@ CHECK: vst1.8 {d0, d1, d2}, [r4], r6  @ encoding: [0x04,0xf9,0x06,0x06]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.8  {d0, d1, d2}, [r4:16], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.8  {d0, d1, d2}, [r4:32], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vst1.8 {d0, d1, d2}, [r4:64], r6 @ encoding: [0x04,0xf9,0x16,0x06]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.8  {d0, d1, d2}, [r4:128], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.8  {d0, d1, d2}, [r4:256], r6
+@ CHECK-ERRORS:                                   ^
+
+	vst1.8	{d0, d1, d2, d3}, [r4]
+	vst1.8	{d0, d1, d2, d3}, [r4:16]
+	vst1.8	{d0, d1, d2, d3}, [r4:32]
+	vst1.8	{d0, d1, d2, d3}, [r4:64]
+	vst1.8	{d0, d1, d2, d3}, [r4:128]
+	vst1.8	{d0, d1, d2, d3}, [r4:256]
+
+@ CHECK: vst1.8 {d0, d1, d2, d3}, [r4]  @ encoding: [0x04,0xf9,0x0f,0x02]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst1.8  {d0, d1, d2, d3}, [r4:16]
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst1.8  {d0, d1, d2, d3}, [r4:32]
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vst1.8 {d0, d1, d2, d3}, [r4:64] @ encoding: [0x04,0xf9,0x1f,0x02]
+@ CHECK: vst1.8 {d0, d1, d2, d3}, [r4:128] @ encoding: [0x04,0xf9,0x2f,0x02]
+@ CHECK: vst1.8 {d0, d1, d2, d3}, [r4:256] @ encoding: [0x04,0xf9,0x3f,0x02]
+
+	vst1.8	{d0, d1, d2, d3}, [r4]!
+	vst1.8	{d0, d1, d2, d3}, [r4:16]!
+	vst1.8	{d0, d1, d2, d3}, [r4:32]!
+	vst1.8	{d0, d1, d2, d3}, [r4:64]!
+	vst1.8	{d0, d1, d2, d3}, [r4:128]!
+	vst1.8	{d0, d1, d2, d3}, [r4:256]!
+
+@ CHECK: vst1.8 {d0, d1, d2, d3}, [r4]! @ encoding: [0x04,0xf9,0x0d,0x02]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst1.8  {d0, d1, d2, d3}, [r4:16]!
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst1.8  {d0, d1, d2, d3}, [r4:32]!
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vst1.8 {d0, d1, d2, d3}, [r4:64]! @ encoding: [0x04,0xf9,0x1d,0x02]
+@ CHECK: vst1.8 {d0, d1, d2, d3}, [r4:128]! @ encoding: [0x04,0xf9,0x2d,0x02]
+@ CHECK: vst1.8 {d0, d1, d2, d3}, [r4:256]! @ encoding: [0x04,0xf9,0x3d,0x02]
+
+	vst1.8	{d0, d1, d2, d3}, [r4], r6
+	vst1.8	{d0, d1, d2, d3}, [r4:16], r6
+	vst1.8	{d0, d1, d2, d3}, [r4:32], r6
+	vst1.8	{d0, d1, d2, d3}, [r4:64], r6
+	vst1.8	{d0, d1, d2, d3}, [r4:128], r6
+	vst1.8	{d0, d1, d2, d3}, [r4:256], r6
+
+@ CHECK: vst1.8 {d0, d1, d2, d3}, [r4], r6 @ encoding: [0x04,0xf9,0x06,0x02]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst1.8  {d0, d1, d2, d3}, [r4:16], r6
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst1.8  {d0, d1, d2, d3}, [r4:32], r6
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vst1.8 {d0, d1, d2, d3}, [r4:64], r6 @ encoding: [0x04,0xf9,0x16,0x02]
+@ CHECK: vst1.8 {d0, d1, d2, d3}, [r4:128], r6 @ encoding: [0x04,0xf9,0x26,0x02]
+@ CHECK: vst1.8 {d0, d1, d2, d3}, [r4:256], r6 @ encoding: [0x04,0xf9,0x36,0x02]
+
+	vst1.8	{d0[2]}, [r4]
+	vst1.8	{d0[2]}, [r4:16]
+	vst1.8	{d0[2]}, [r4:32]
+	vst1.8	{d0[2]}, [r4:64]
+	vst1.8	{d0[2]}, [r4:128]
+	vst1.8	{d0[2]}, [r4:256]
+
+@ CHECK: vst1.8 {d0[2]}, [r4]           @ encoding: [0x84,0xf9,0x4f,0x00]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst1.8  {d0[2]}, [r4:16]
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst1.8  {d0[2]}, [r4:32]
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst1.8  {d0[2]}, [r4:64]
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst1.8  {d0[2]}, [r4:128]
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst1.8  {d0[2]}, [r4:256]
+@ CHECK-ERRORS:                              ^
+
+	vst1.8	{d0[2]}, [r4]!
+	vst1.8	{d0[2]}, [r4:16]!
+	vst1.8	{d0[2]}, [r4:32]!
+	vst1.8	{d0[2]}, [r4:64]!
+	vst1.8	{d0[2]}, [r4:128]!
+	vst1.8	{d0[2]}, [r4:256]!
+
+@ CHECK: vst1.8 {d0[2]}, [r4]!          @ encoding: [0x84,0xf9,0x4d,0x00]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst1.8  {d0[2]}, [r4:16]!
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst1.8  {d0[2]}, [r4:32]!
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst1.8  {d0[2]}, [r4:64]!
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst1.8  {d0[2]}, [r4:128]!
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst1.8  {d0[2]}, [r4:256]!
+@ CHECK-ERRORS:                              ^
+
+	vst1.8	{d0[2]}, [r4], r6
+	vst1.8	{d0[2]}, [r4:16], r6
+	vst1.8	{d0[2]}, [r4:32], r6
+	vst1.8	{d0[2]}, [r4:64], r6
+	vst1.8	{d0[2]}, [r4:128], r6
+	vst1.8	{d0[2]}, [r4:256], r6
+
+@ CHECK: vst1.8 {d0[2]}, [r4], r6       @ encoding: [0x84,0xf9,0x46,0x00]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst1.8  {d0[2]}, [r4:16], r6
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst1.8  {d0[2]}, [r4:32], r6
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst1.8  {d0[2]}, [r4:64], r6
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst1.8  {d0[2]}, [r4:128], r6
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst1.8  {d0[2]}, [r4:256], r6
+@ CHECK-ERRORS:                              ^
+
+	vst1.16	{d0}, [r4]
+	vst1.16	{d0}, [r4:16]
+	vst1.16	{d0}, [r4:32]
+	vst1.16	{d0}, [r4:64]
+	vst1.16	{d0}, [r4:128]
+	vst1.16	{d0}, [r4:256]
+
+@ CHECK: vst1.16 {d0}, [r4]              @ encoding: [0x04,0xf9,0x4f,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0}, [r4:16]
+@ CHECK-ERRORS:                           ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0}, [r4:32]
+@ CHECK-ERRORS:                           ^
+@ CHECK: vst1.16 {d0}, [r4:64]           @ encoding: [0x04,0xf9,0x5f,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0}, [r4:128]
+@ CHECK-ERRORS:                           ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0}, [r4:256]
+@ CHECK-ERRORS:                           ^
+
+	vst1.16	{d0}, [r4]!
+	vst1.16	{d0}, [r4:16]!
+	vst1.16	{d0}, [r4:32]!
+	vst1.16	{d0}, [r4:64]!
+	vst1.16	{d0}, [r4:128]!
+	vst1.16	{d0}, [r4:256]!
+
+@ CHECK: vst1.16 {d0}, [r4]!             @ encoding: [0x04,0xf9,0x4d,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0}, [r4:16]!
+@ CHECK-ERRORS:                           ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0}, [r4:32]!
+@ CHECK-ERRORS:                           ^
+@ CHECK: vst1.16 {d0}, [r4:64]!          @ encoding: [0x04,0xf9,0x5d,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0}, [r4:128]!
+@ CHECK-ERRORS:                           ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0}, [r4:256]!
+@ CHECK-ERRORS:                           ^
+
+	vst1.16	{d0}, [r4], r6
+	vst1.16	{d0}, [r4:16], r6
+	vst1.16	{d0}, [r4:32], r6
+	vst1.16	{d0}, [r4:64], r6
+	vst1.16	{d0}, [r4:128], r6
+	vst1.16	{d0}, [r4:256], r6
+
+@ CHECK: vst1.16 {d0}, [r4], r6          @ encoding: [0x04,0xf9,0x46,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0}, [r4:16], r6
+@ CHECK-ERRORS:                           ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0}, [r4:32], r6
+@ CHECK-ERRORS:                           ^
+@ CHECK: vst1.16 {d0}, [r4:64], r6       @ encoding: [0x04,0xf9,0x56,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0}, [r4:128], r6
+@ CHECK-ERRORS:                           ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0}, [r4:256], r6
+@ CHECK-ERRORS:                           ^
+
+	vst1.16	{d0, d1}, [r4]
+	vst1.16	{d0, d1}, [r4:16]
+	vst1.16	{d0, d1}, [r4:32]
+	vst1.16	{d0, d1}, [r4:64]
+	vst1.16	{d0, d1}, [r4:128]
+	vst1.16	{d0, d1}, [r4:256]
+
+@ CHECK: vst1.16 {d0, d1}, [r4]          @ encoding: [0x04,0xf9,0x4f,0x0a]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0, d1}, [r4:16]
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0, d1}, [r4:32]
+@ CHECK-ERRORS:                               ^
+@ CHECK: vst1.16 {d0, d1}, [r4:64]       @ encoding: [0x04,0xf9,0x5f,0x0a]
+@ CHECK: vst1.16 {d0, d1}, [r4:128]      @ encoding: [0x04,0xf9,0x6f,0x0a]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0, d1}, [r4:256]
+@ CHECK-ERRORS:                               ^
+
+	vst1.16	{d0, d1}, [r4]!
+	vst1.16	{d0, d1}, [r4:16]!
+	vst1.16	{d0, d1}, [r4:32]!
+	vst1.16	{d0, d1}, [r4:64]!
+	vst1.16	{d0, d1}, [r4:128]!
+	vst1.16	{d0, d1}, [r4:256]!
+
+@ CHECK: vst1.16 {d0, d1}, [r4]!         @ encoding: [0x04,0xf9,0x4d,0x0a]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0, d1}, [r4:16]!
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0, d1}, [r4:32]!
+@ CHECK-ERRORS:                               ^
+@ CHECK: vst1.16 {d0, d1}, [r4:64]!      @ encoding: [0x04,0xf9,0x5d,0x0a]
+@ CHECK: vst1.16 {d0, d1}, [r4:128]!     @ encoding: [0x04,0xf9,0x6d,0x0a]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0, d1}, [r4:256]!
+@ CHECK-ERRORS:                               ^
+
+	vst1.16	{d0, d1}, [r4], r6
+	vst1.16	{d0, d1}, [r4:16], r6
+	vst1.16	{d0, d1}, [r4:32], r6
+	vst1.16	{d0, d1}, [r4:64], r6
+	vst1.16	{d0, d1}, [r4:128], r6
+	vst1.16	{d0, d1}, [r4:256], r6
+
+@ CHECK: vst1.16 {d0, d1}, [r4], r6      @ encoding: [0x04,0xf9,0x46,0x0a]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0, d1}, [r4:16], r6
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0, d1}, [r4:32], r6
+@ CHECK-ERRORS:                               ^
+@ CHECK: vst1.16 {d0, d1}, [r4:64], r6   @ encoding: [0x04,0xf9,0x56,0x0a]
+@ CHECK: vst1.16 {d0, d1}, [r4:128], r6  @ encoding: [0x04,0xf9,0x66,0x0a]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0, d1}, [r4:256], r6
+@ CHECK-ERRORS:                               ^
+
+	vst1.16	{d0, d1, d2}, [r4]
+	vst1.16	{d0, d1, d2}, [r4:16]
+	vst1.16	{d0, d1, d2}, [r4:32]
+	vst1.16	{d0, d1, d2}, [r4:64]
+	vst1.16	{d0, d1, d2}, [r4:128]
+	vst1.16	{d0, d1, d2}, [r4:256]
+
+@ CHECK: vst1.16 {d0, d1, d2}, [r4]      @ encoding: [0x04,0xf9,0x4f,0x06]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0, d1, d2}, [r4:16]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0, d1, d2}, [r4:32]
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vst1.16 {d0, d1, d2}, [r4:64]   @ encoding: [0x04,0xf9,0x5f,0x06]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0, d1, d2}, [r4:128]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0, d1, d2}, [r4:256]
+@ CHECK-ERRORS:                                   ^
+
+	vst1.16	{d0, d1, d2}, [r4]!
+	vst1.16	{d0, d1, d2}, [r4:16]!
+	vst1.16	{d0, d1, d2}, [r4:32]!
+	vst1.16	{d0, d1, d2}, [r4:64]!
+	vst1.16	{d0, d1, d2}, [r4:128]!
+	vst1.16	{d0, d1, d2}, [r4:256]!
+
+@ CHECK: vst1.16 {d0, d1, d2}, [r4]!     @ encoding: [0x04,0xf9,0x4d,0x06]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0, d1, d2}, [r4:16]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0, d1, d2}, [r4:32]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vst1.16 {d0, d1, d2}, [r4:64]!  @ encoding: [0x04,0xf9,0x5d,0x06]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0, d1, d2}, [r4:128]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0, d1, d2}, [r4:256]!
+@ CHECK-ERRORS:                                   ^
+
+	vst1.16	{d0, d1, d2}, [r4], r6
+	vst1.16	{d0, d1, d2}, [r4:16], r6
+	vst1.16	{d0, d1, d2}, [r4:32], r6
+	vst1.16	{d0, d1, d2}, [r4:64], r6
+	vst1.16	{d0, d1, d2}, [r4:128], r6
+	vst1.16	{d0, d1, d2}, [r4:256], r6
+
+@ CHECK: vst1.16 {d0, d1, d2}, [r4], r6  @ encoding: [0x04,0xf9,0x46,0x06]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0, d1, d2}, [r4:16], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0, d1, d2}, [r4:32], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vst1.16 {d0, d1, d2}, [r4:64], r6 @ encoding: [0x04,0xf9,0x56,0x06]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0, d1, d2}, [r4:128], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0, d1, d2}, [r4:256], r6
+@ CHECK-ERRORS:                                   ^
+
+	vst1.16	{d0, d1, d2, d3}, [r4]
+	vst1.16	{d0, d1, d2, d3}, [r4:16]
+	vst1.16	{d0, d1, d2, d3}, [r4:32]
+	vst1.16	{d0, d1, d2, d3}, [r4:64]
+	vst1.16	{d0, d1, d2, d3}, [r4:128]
+	vst1.16	{d0, d1, d2, d3}, [r4:256]
+
+@ CHECK: vst1.16 {d0, d1, d2, d3}, [r4]  @ encoding: [0x04,0xf9,0x4f,0x02]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0, d1, d2, d3}, [r4:16]
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0, d1, d2, d3}, [r4:32]
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vst1.16 {d0, d1, d2, d3}, [r4:64] @ encoding: [0x04,0xf9,0x5f,0x02]
+@ CHECK: vst1.16 {d0, d1, d2, d3}, [r4:128] @ encoding: [0x04,0xf9,0x6f,0x02]
+@ CHECK: vst1.16 {d0, d1, d2, d3}, [r4:256] @ encoding: [0x04,0xf9,0x7f,0x02]
+
+	vst1.16	{d0, d1, d2, d3}, [r4]!
+	vst1.16	{d0, d1, d2, d3}, [r4:16]!
+	vst1.16	{d0, d1, d2, d3}, [r4:32]!
+	vst1.16	{d0, d1, d2, d3}, [r4:64]!
+	vst1.16	{d0, d1, d2, d3}, [r4:128]!
+	vst1.16	{d0, d1, d2, d3}, [r4:256]!
+
+@ CHECK: vst1.16 {d0, d1, d2, d3}, [r4]! @ encoding: [0x04,0xf9,0x4d,0x02]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0, d1, d2, d3}, [r4:16]!
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0, d1, d2, d3}, [r4:32]!
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vst1.16 {d0, d1, d2, d3}, [r4:64]! @ encoding: [0x04,0xf9,0x5d,0x02]
+@ CHECK: vst1.16 {d0, d1, d2, d3}, [r4:128]! @ encoding: [0x04,0xf9,0x6d,0x02]
+@ CHECK: vst1.16 {d0, d1, d2, d3}, [r4:256]! @ encoding: [0x04,0xf9,0x7d,0x02]
+
+	vst1.16	{d0, d1, d2, d3}, [r4], r6
+	vst1.16	{d0, d1, d2, d3}, [r4:16], r6
+	vst1.16	{d0, d1, d2, d3}, [r4:32], r6
+	vst1.16	{d0, d1, d2, d3}, [r4:64], r6
+	vst1.16	{d0, d1, d2, d3}, [r4:128], r6
+	vst1.16	{d0, d1, d2, d3}, [r4:256], r6
+
+@ CHECK: vst1.16 {d0, d1, d2, d3}, [r4], r6 @ encoding: [0x04,0xf9,0x46,0x02]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0, d1, d2, d3}, [r4:16], r6
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0, d1, d2, d3}, [r4:32], r6
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vst1.16 {d0, d1, d2, d3}, [r4:64], r6 @ encoding: [0x04,0xf9,0x56,0x02]
+@ CHECK: vst1.16 {d0, d1, d2, d3}, [r4:128], r6 @ encoding: [0x04,0xf9,0x66,0x02]
+@ CHECK: vst1.16 {d0, d1, d2, d3}, [r4:256], r6 @ encoding: [0x04,0xf9,0x76,0x02]
+
+	vst1.16	{d0[2]}, [r4]
+	vst1.16	{d0[2]}, [r4:16]
+	vst1.16	{d0[2]}, [r4:32]
+	vst1.16	{d0[2]}, [r4:64]
+	vst1.16	{d0[2]}, [r4:128]
+	vst1.16	{d0[2]}, [r4:256]
+
+@ CHECK: vst1.16 {d0[2]}, [r4]           @ encoding: [0x84,0xf9,0x8f,0x04]
+@ CHECK: vst1.16 {d0[2]}, [r4:16]        @ encoding: [0x84,0xf9,0x9f,0x04]
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0[2]}, [r4:32]
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0[2]}, [r4:64]
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0[2]}, [r4:128]
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0[2]}, [r4:256]
+@ CHECK-ERRORS:                              ^
+
+	vst1.16	{d0[2]}, [r4]!
+	vst1.16	{d0[2]}, [r4:16]!
+	vst1.16	{d0[2]}, [r4:32]!
+	vst1.16	{d0[2]}, [r4:64]!
+	vst1.16	{d0[2]}, [r4:128]!
+	vst1.16	{d0[2]}, [r4:256]!
+
+@ CHECK: vst1.16 {d0[2]}, [r4]!          @ encoding: [0x84,0xf9,0x8d,0x04]
+@ CHECK: vst1.16 {d0[2]}, [r4:16]!       @ encoding: [0x84,0xf9,0x9d,0x04]
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0[2]}, [r4:32]!
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0[2]}, [r4:64]!
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0[2]}, [r4:128]!
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0[2]}, [r4:256]!
+@ CHECK-ERRORS:                              ^
+
+	vst1.16	{d0[2]}, [r4], r6
+	vst1.16	{d0[2]}, [r4:16], r6
+	vst1.16	{d0[2]}, [r4:32], r6
+	vst1.16	{d0[2]}, [r4:64], r6
+	vst1.16	{d0[2]}, [r4:128], r6
+	vst1.16	{d0[2]}, [r4:256], r6
+
+@ CHECK: vst1.16 {d0[2]}, [r4], r6       @ encoding: [0x84,0xf9,0x86,0x04]
+@ CHECK: vst1.16 {d0[2]}, [r4:16], r6    @ encoding: [0x84,0xf9,0x96,0x04]
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0[2]}, [r4:32], r6
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0[2]}, [r4:64], r6
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0[2]}, [r4:128], r6
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vst1.16 {d0[2]}, [r4:256], r6
+@ CHECK-ERRORS:                              ^
+
+	vst1.32	{d0}, [r4]
+	vst1.32	{d0}, [r4:16]
+	vst1.32	{d0}, [r4:32]
+	vst1.32	{d0}, [r4:64]
+	vst1.32	{d0}, [r4:128]
+	vst1.32	{d0}, [r4:256]
+
+@ CHECK: vst1.32 {d0}, [r4]              @ encoding: [0x04,0xf9,0x8f,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0}, [r4:16]
+@ CHECK-ERRORS:                           ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0}, [r4:32]
+@ CHECK-ERRORS:                           ^
+@ CHECK: vst1.32 {d0}, [r4:64]           @ encoding: [0x04,0xf9,0x9f,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0}, [r4:128]
+@ CHECK-ERRORS:                           ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0}, [r4:256]
+@ CHECK-ERRORS:                           ^
+
+	vst1.32	{d0}, [r4]!
+	vst1.32	{d0}, [r4:16]!
+	vst1.32	{d0}, [r4:32]!
+	vst1.32	{d0}, [r4:64]!
+	vst1.32	{d0}, [r4:128]!
+	vst1.32	{d0}, [r4:256]!
+
+@ CHECK: vst1.32 {d0}, [r4]!             @ encoding: [0x04,0xf9,0x8d,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0}, [r4:16]!
+@ CHECK-ERRORS:                           ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0}, [r4:32]!
+@ CHECK-ERRORS:                           ^
+@ CHECK: vst1.32 {d0}, [r4:64]!          @ encoding: [0x04,0xf9,0x9d,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0}, [r4:128]!
+@ CHECK-ERRORS:                           ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0}, [r4:256]!
+@ CHECK-ERRORS:                           ^
+
+	vst1.32	{d0}, [r4], r6
+	vst1.32	{d0}, [r4:16], r6
+	vst1.32	{d0}, [r4:32], r6
+	vst1.32	{d0}, [r4:64], r6
+	vst1.32	{d0}, [r4:128], r6
+	vst1.32	{d0}, [r4:256], r6
+
+@ CHECK: vst1.32 {d0}, [r4], r6          @ encoding: [0x04,0xf9,0x86,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0}, [r4:16], r6
+@ CHECK-ERRORS:                           ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0}, [r4:32], r6
+@ CHECK-ERRORS:                           ^
+@ CHECK: vst1.32 {d0}, [r4:64], r6       @ encoding: [0x04,0xf9,0x96,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0}, [r4:128], r6
+@ CHECK-ERRORS:                           ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0}, [r4:256], r6
+@ CHECK-ERRORS:                           ^
+
+	vst1.32	{d0, d1}, [r4]
+	vst1.32	{d0, d1}, [r4:16]
+	vst1.32	{d0, d1}, [r4:32]
+	vst1.32	{d0, d1}, [r4:64]
+	vst1.32	{d0, d1}, [r4:128]
+	vst1.32	{d0, d1}, [r4:256]
+
+@ CHECK: vst1.32 {d0, d1}, [r4]          @ encoding: [0x04,0xf9,0x8f,0x0a]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0, d1}, [r4:16]
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0, d1}, [r4:32]
+@ CHECK-ERRORS:                               ^
+@ CHECK: vst1.32 {d0, d1}, [r4:64]       @ encoding: [0x04,0xf9,0x9f,0x0a]
+@ CHECK: vst1.32 {d0, d1}, [r4:128]      @ encoding: [0x04,0xf9,0xaf,0x0a]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0, d1}, [r4:256]
+@ CHECK-ERRORS:                               ^
+
+	vst1.32	{d0, d1}, [r4]!
+	vst1.32	{d0, d1}, [r4:16]!
+	vst1.32	{d0, d1}, [r4:32]!
+	vst1.32	{d0, d1}, [r4:64]!
+	vst1.32	{d0, d1}, [r4:128]!
+	vst1.32	{d0, d1}, [r4:256]!
+
+@ CHECK: vst1.32 {d0, d1}, [r4]!         @ encoding: [0x04,0xf9,0x8d,0x0a]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0, d1}, [r4:16]!
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0, d1}, [r4:32]!
+@ CHECK-ERRORS:                               ^
+@ CHECK: vst1.32 {d0, d1}, [r4:64]!      @ encoding: [0x04,0xf9,0x9d,0x0a]
+@ CHECK: vst1.32 {d0, d1}, [r4:128]!     @ encoding: [0x04,0xf9,0xad,0x0a]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0, d1}, [r4:256]!
+@ CHECK-ERRORS:                               ^
+
+	vst1.32	{d0, d1}, [r4], r6
+	vst1.32	{d0, d1}, [r4:16], r6
+	vst1.32	{d0, d1}, [r4:32], r6
+	vst1.32	{d0, d1}, [r4:64], r6
+	vst1.32	{d0, d1}, [r4:128], r6
+	vst1.32	{d0, d1}, [r4:256], r6
+
+@ CHECK: vst1.32 {d0, d1}, [r4], r6      @ encoding: [0x04,0xf9,0x86,0x0a]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0, d1}, [r4:16], r6
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0, d1}, [r4:32], r6
+@ CHECK-ERRORS:                               ^
+@ CHECK: vst1.32 {d0, d1}, [r4:64], r6   @ encoding: [0x04,0xf9,0x96,0x0a]
+@ CHECK: vst1.32 {d0, d1}, [r4:128], r6  @ encoding: [0x04,0xf9,0xa6,0x0a]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0, d1}, [r4:256], r6
+@ CHECK-ERRORS:                               ^
+
+	vst1.32	{d0, d1, d2}, [r4]
+	vst1.32	{d0, d1, d2}, [r4:16]
+	vst1.32	{d0, d1, d2}, [r4:32]
+	vst1.32	{d0, d1, d2}, [r4:64]
+	vst1.32	{d0, d1, d2}, [r4:128]
+	vst1.32	{d0, d1, d2}, [r4:256]
+
+@ CHECK: vst1.32 {d0, d1, d2}, [r4]      @ encoding: [0x04,0xf9,0x8f,0x06]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0, d1, d2}, [r4:16]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0, d1, d2}, [r4:32]
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vst1.32 {d0, d1, d2}, [r4:64]   @ encoding: [0x04,0xf9,0x9f,0x06]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0, d1, d2}, [r4:128]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0, d1, d2}, [r4:256]
+@ CHECK-ERRORS:                                   ^
+
+	vst1.32	{d0, d1, d2}, [r4]!
+	vst1.32	{d0, d1, d2}, [r4:16]!
+	vst1.32	{d0, d1, d2}, [r4:32]!
+	vst1.32	{d0, d1, d2}, [r4:64]!
+	vst1.32	{d0, d1, d2}, [r4:128]!
+	vst1.32	{d0, d1, d2}, [r4:256]!
+
+@ CHECK: vst1.32 {d0, d1, d2}, [r4]!     @ encoding: [0x04,0xf9,0x8d,0x06]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0, d1, d2}, [r4:16]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0, d1, d2}, [r4:32]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vst1.32 {d0, d1, d2}, [r4:64]!  @ encoding: [0x04,0xf9,0x9d,0x06]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0, d1, d2}, [r4:128]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0, d1, d2}, [r4:256]!
+@ CHECK-ERRORS:                                   ^
+
+	vst1.32	{d0, d1, d2}, [r4], r6
+	vst1.32	{d0, d1, d2}, [r4:16], r6
+	vst1.32	{d0, d1, d2}, [r4:32], r6
+	vst1.32	{d0, d1, d2}, [r4:64], r6
+	vst1.32	{d0, d1, d2}, [r4:128], r6
+	vst1.32	{d0, d1, d2}, [r4:256], r6
+
+@ CHECK: vst1.32 {d0, d1, d2}, [r4], r6  @ encoding: [0x04,0xf9,0x86,0x06]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0, d1, d2}, [r4:16], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0, d1, d2}, [r4:32], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vst1.32 {d0, d1, d2}, [r4:64], r6 @ encoding: [0x04,0xf9,0x96,0x06]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0, d1, d2}, [r4:128], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0, d1, d2}, [r4:256], r6
+@ CHECK-ERRORS:                                   ^
+
+	vst1.32	{d0, d1, d2, d3}, [r4]
+	vst1.32	{d0, d1, d2, d3}, [r4:16]
+	vst1.32	{d0, d1, d2, d3}, [r4:32]
+	vst1.32	{d0, d1, d2, d3}, [r4:64]
+	vst1.32	{d0, d1, d2, d3}, [r4:128]
+	vst1.32	{d0, d1, d2, d3}, [r4:256]
+
+@ CHECK: vst1.32 {d0, d1, d2, d3}, [r4]  @ encoding: [0x04,0xf9,0x8f,0x02]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0, d1, d2, d3}, [r4:16]
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0, d1, d2, d3}, [r4:32]
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vst1.32 {d0, d1, d2, d3}, [r4:64] @ encoding: [0x04,0xf9,0x9f,0x02]
+@ CHECK: vst1.32 {d0, d1, d2, d3}, [r4:128] @ encoding: [0x04,0xf9,0xaf,0x02]
+@ CHECK: vst1.32 {d0, d1, d2, d3}, [r4:256] @ encoding: [0x04,0xf9,0xbf,0x02]
+
+	vst1.32	{d0, d1, d2, d3}, [r4]!
+	vst1.32	{d0, d1, d2, d3}, [r4:16]!
+	vst1.32	{d0, d1, d2, d3}, [r4:32]!
+	vst1.32	{d0, d1, d2, d3}, [r4:64]!
+	vst1.32	{d0, d1, d2, d3}, [r4:128]!
+	vst1.32	{d0, d1, d2, d3}, [r4:256]!
+
+@ CHECK: vst1.32 {d0, d1, d2, d3}, [r4]! @ encoding: [0x04,0xf9,0x8d,0x02]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0, d1, d2, d3}, [r4:16]!
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0, d1, d2, d3}, [r4:32]!
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vst1.32 {d0, d1, d2, d3}, [r4:64]! @ encoding: [0x04,0xf9,0x9d,0x02]
+@ CHECK: vst1.32 {d0, d1, d2, d3}, [r4:128]! @ encoding: [0x04,0xf9,0xad,0x02]
+@ CHECK: vst1.32 {d0, d1, d2, d3}, [r4:256]! @ encoding: [0x04,0xf9,0xbd,0x02]
+
+	vst1.32	{d0, d1, d2, d3}, [r4], r6
+	vst1.32	{d0, d1, d2, d3}, [r4:16], r6
+	vst1.32	{d0, d1, d2, d3}, [r4:32], r6
+	vst1.32	{d0, d1, d2, d3}, [r4:64], r6
+	vst1.32	{d0, d1, d2, d3}, [r4:128], r6
+	vst1.32	{d0, d1, d2, d3}, [r4:256], r6
+
+@ CHECK: vst1.32 {d0, d1, d2, d3}, [r4], r6 @ encoding: [0x04,0xf9,0x86,0x02]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0, d1, d2, d3}, [r4:16], r6
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0, d1, d2, d3}, [r4:32], r6
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vst1.32 {d0, d1, d2, d3}, [r4:64], r6 @ encoding: [0x04,0xf9,0x96,0x02]
+@ CHECK: vst1.32 {d0, d1, d2, d3}, [r4:128], r6 @ encoding: [0x04,0xf9,0xa6,0x02]
+@ CHECK: vst1.32 {d0, d1, d2, d3}, [r4:256], r6 @ encoding: [0x04,0xf9,0xb6,0x02]
+
+	vst1.32	{d0[1]}, [r4]
+	vst1.32	{d0[1]}, [r4:16]
+	vst1.32	{d0[1]}, [r4:32]
+	vst1.32	{d0[1]}, [r4:64]
+	vst1.32	{d0[1]}, [r4:128]
+	vst1.32	{d0[1]}, [r4:256]
+
+@ CHECK: vst1.32 {d0[1]}, [r4]           @ encoding: [0x84,0xf9,0x8f,0x08]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0[1]}, [r4:16]
+@ CHECK-ERRORS:                              ^
+@ CHECK: vst1.32 {d0[1]}, [r4:32]        @ encoding: [0x84,0xf9,0xbf,0x08]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0[1]}, [r4:64]
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0[1]}, [r4:128]
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0[1]}, [r4:256]
+@ CHECK-ERRORS:                              ^
+
+	vst1.32	{d0[1]}, [r4]!
+	vst1.32	{d0[1]}, [r4:16]!
+	vst1.32	{d0[1]}, [r4:32]!
+	vst1.32	{d0[1]}, [r4:64]!
+	vst1.32	{d0[1]}, [r4:128]!
+	vst1.32	{d0[1]}, [r4:256]!
+
+@ CHECK: vst1.32 {d0[1]}, [r4]!          @ encoding: [0x84,0xf9,0x8d,0x08]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0[1]}, [r4:16]!
+@ CHECK-ERRORS:                              ^
+@ CHECK: vst1.32 {d0[1]}, [r4:32]!       @ encoding: [0x84,0xf9,0xbd,0x08]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0[1]}, [r4:64]!
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0[1]}, [r4:128]!
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0[1]}, [r4:256]!
+@ CHECK-ERRORS:                              ^
+
+	vst1.32	{d0[1]}, [r4], r6
+	vst1.32	{d0[1]}, [r4:16], r6
+	vst1.32	{d0[1]}, [r4:32], r6
+	vst1.32	{d0[1]}, [r4:64], r6
+	vst1.32	{d0[1]}, [r4:128], r6
+	vst1.32	{d0[1]}, [r4:256], r6
+
+@ CHECK: vst1.32 {d0[1]}, [r4], r6       @ encoding: [0x84,0xf9,0x86,0x08]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0[1]}, [r4:16], r6
+@ CHECK-ERRORS:                              ^
+@ CHECK: vst1.32 {d0[1]}, [r4:32], r6    @ encoding: [0x84,0xf9,0xb6,0x08]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0[1]}, [r4:64], r6
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0[1]}, [r4:128], r6
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vst1.32 {d0[1]}, [r4:256], r6
+@ CHECK-ERRORS:                              ^
+
+	vst1.64	{d0}, [r4]
+	vst1.64	{d0}, [r4:16]
+	vst1.64	{d0}, [r4:32]
+	vst1.64	{d0}, [r4:64]
+	vst1.64	{d0}, [r4:128]
+	vst1.64	{d0}, [r4:256]
+
+@ CHECK: vst1.64 {d0}, [r4]              @ encoding: [0x04,0xf9,0xcf,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.64 {d0}, [r4:16]
+@ CHECK-ERRORS:                           ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.64 {d0}, [r4:32]
+@ CHECK-ERRORS:                           ^
+@ CHECK: vst1.64 {d0}, [r4:64]           @ encoding: [0x04,0xf9,0xdf,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.64 {d0}, [r4:128]
+@ CHECK-ERRORS:                           ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.64 {d0}, [r4:256]
+@ CHECK-ERRORS:                           ^
+
+	vst1.64	{d0}, [r4]!
+	vst1.64	{d0}, [r4:16]!
+	vst1.64	{d0}, [r4:32]!
+	vst1.64	{d0}, [r4:64]!
+	vst1.64	{d0}, [r4:128]!
+	vst1.64	{d0}, [r4:256]!
+
+@ CHECK: vst1.64 {d0}, [r4]!             @ encoding: [0x04,0xf9,0xcd,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.64 {d0}, [r4:16]!
+@ CHECK-ERRORS:                           ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.64 {d0}, [r4:32]!
+@ CHECK-ERRORS:                           ^
+@ CHECK: vst1.64 {d0}, [r4:64]!          @ encoding: [0x04,0xf9,0xdd,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.64 {d0}, [r4:128]!
+@ CHECK-ERRORS:                           ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.64 {d0}, [r4:256]!
+@ CHECK-ERRORS:                           ^
+
+	vst1.64	{d0}, [r4], r6
+	vst1.64	{d0}, [r4:16], r6
+	vst1.64	{d0}, [r4:32], r6
+	vst1.64	{d0}, [r4:64], r6
+	vst1.64	{d0}, [r4:128], r6
+	vst1.64	{d0}, [r4:256], r6
+
+@ CHECK: vst1.64 {d0}, [r4], r6          @ encoding: [0x04,0xf9,0xc6,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.64 {d0}, [r4:16], r6
+@ CHECK-ERRORS:                           ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.64 {d0}, [r4:32], r6
+@ CHECK-ERRORS:                           ^
+@ CHECK: vst1.64 {d0}, [r4:64], r6       @ encoding: [0x04,0xf9,0xd6,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.64 {d0}, [r4:128], r6
+@ CHECK-ERRORS:                           ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.64 {d0}, [r4:256], r6
+@ CHECK-ERRORS:                           ^
+
+	vst1.64	{d0, d1}, [r4]
+	vst1.64	{d0, d1}, [r4:16]
+	vst1.64	{d0, d1}, [r4:32]
+	vst1.64	{d0, d1}, [r4:64]
+	vst1.64	{d0, d1}, [r4:128]
+	vst1.64	{d0, d1}, [r4:256]
+
+@ CHECK: vst1.64 {d0, d1}, [r4]          @ encoding: [0x04,0xf9,0xcf,0x0a]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst1.64 {d0, d1}, [r4:16]
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst1.64 {d0, d1}, [r4:32]
+@ CHECK-ERRORS:                               ^
+@ CHECK: vst1.64 {d0, d1}, [r4:64]       @ encoding: [0x04,0xf9,0xdf,0x0a]
+@ CHECK: vst1.64 {d0, d1}, [r4:128]      @ encoding: [0x04,0xf9,0xef,0x0a]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst1.64 {d0, d1}, [r4:256]
+@ CHECK-ERRORS:                               ^
+
+	vst1.64	{d0, d1}, [r4]!
+	vst1.64	{d0, d1}, [r4:16]!
+	vst1.64	{d0, d1}, [r4:32]!
+	vst1.64	{d0, d1}, [r4:64]!
+	vst1.64	{d0, d1}, [r4:128]!
+	vst1.64	{d0, d1}, [r4:256]!
+
+@ CHECK: vst1.64 {d0, d1}, [r4]!         @ encoding: [0x04,0xf9,0xcd,0x0a]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst1.64 {d0, d1}, [r4:16]!
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst1.64 {d0, d1}, [r4:32]!
+@ CHECK-ERRORS:                               ^
+@ CHECK: vst1.64 {d0, d1}, [r4:64]!      @ encoding: [0x04,0xf9,0xdd,0x0a]
+@ CHECK: vst1.64 {d0, d1}, [r4:128]!     @ encoding: [0x04,0xf9,0xed,0x0a]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst1.64 {d0, d1}, [r4:256]!
+@ CHECK-ERRORS:                               ^
+
+	vst1.64	{d0, d1}, [r4], r6
+	vst1.64	{d0, d1}, [r4:16], r6
+	vst1.64	{d0, d1}, [r4:32], r6
+	vst1.64	{d0, d1}, [r4:64], r6
+	vst1.64	{d0, d1}, [r4:128], r6
+	vst1.64	{d0, d1}, [r4:256], r6
+
+@ CHECK: vst1.64 {d0, d1}, [r4], r6      @ encoding: [0x04,0xf9,0xc6,0x0a]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst1.64 {d0, d1}, [r4:16], r6
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst1.64 {d0, d1}, [r4:32], r6
+@ CHECK-ERRORS:                               ^
+@ CHECK: vst1.64 {d0, d1}, [r4:64], r6   @ encoding: [0x04,0xf9,0xd6,0x0a]
+@ CHECK: vst1.64 {d0, d1}, [r4:128], r6  @ encoding: [0x04,0xf9,0xe6,0x0a]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst1.64 {d0, d1}, [r4:256], r6
+@ CHECK-ERRORS:                               ^
+
+	vst1.64	{d0, d1, d2}, [r4]
+	vst1.64	{d0, d1, d2}, [r4:16]
+	vst1.64	{d0, d1, d2}, [r4:32]
+	vst1.64	{d0, d1, d2}, [r4:64]
+	vst1.64	{d0, d1, d2}, [r4:128]
+	vst1.64	{d0, d1, d2}, [r4:256]
+
+@ CHECK: vst1.64 {d0, d1, d2}, [r4]      @ encoding: [0x04,0xf9,0xcf,0x06]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.64 {d0, d1, d2}, [r4:16]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.64 {d0, d1, d2}, [r4:32]
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vst1.64 {d0, d1, d2}, [r4:64]   @ encoding: [0x04,0xf9,0xdf,0x06]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.64 {d0, d1, d2}, [r4:128]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.64 {d0, d1, d2}, [r4:256]
+@ CHECK-ERRORS:                                   ^
+
+	vst1.64	{d0, d1, d2}, [r4]!
+	vst1.64	{d0, d1, d2}, [r4:16]!
+	vst1.64	{d0, d1, d2}, [r4:32]!
+	vst1.64	{d0, d1, d2}, [r4:64]!
+	vst1.64	{d0, d1, d2}, [r4:128]!
+	vst1.64	{d0, d1, d2}, [r4:256]!
+
+@ CHECK: vst1.64 {d0, d1, d2}, [r4]!     @ encoding: [0x04,0xf9,0xcd,0x06]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.64 {d0, d1, d2}, [r4:16]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.64 {d0, d1, d2}, [r4:32]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vst1.64 {d0, d1, d2}, [r4:64]!  @ encoding: [0x04,0xf9,0xdd,0x06]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.64 {d0, d1, d2}, [r4:128]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.64 {d0, d1, d2}, [r4:256]!
+@ CHECK-ERRORS:                                   ^
+
+	vst1.64	{d0, d1, d2}, [r4], r6
+	vst1.64	{d0, d1, d2}, [r4:16], r6
+	vst1.64	{d0, d1, d2}, [r4:32], r6
+	vst1.64	{d0, d1, d2}, [r4:64], r6
+	vst1.64	{d0, d1, d2}, [r4:128], r6
+	vst1.64	{d0, d1, d2}, [r4:256], r6
+
+@ CHECK: vst1.64 {d0, d1, d2}, [r4], r6  @ encoding: [0x04,0xf9,0xc6,0x06]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.64 {d0, d1, d2}, [r4:16], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.64 {d0, d1, d2}, [r4:32], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vst1.64 {d0, d1, d2}, [r4:64], r6 @ encoding: [0x04,0xf9,0xd6,0x06]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.64 {d0, d1, d2}, [r4:128], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst1.64 {d0, d1, d2}, [r4:256], r6
+@ CHECK-ERRORS:                                   ^
+
+	vst1.64	{d0, d1, d2, d3}, [r4]
+	vst1.64	{d0, d1, d2, d3}, [r4:16]
+	vst1.64	{d0, d1, d2, d3}, [r4:32]
+	vst1.64	{d0, d1, d2, d3}, [r4:64]
+	vst1.64	{d0, d1, d2, d3}, [r4:128]
+	vst1.64	{d0, d1, d2, d3}, [r4:256]
+
+@ CHECK: vst1.64 {d0, d1, d2, d3}, [r4]  @ encoding: [0x04,0xf9,0xcf,0x02]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst1.64 {d0, d1, d2, d3}, [r4:16]
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst1.64 {d0, d1, d2, d3}, [r4:32]
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vst1.64 {d0, d1, d2, d3}, [r4:64] @ encoding: [0x04,0xf9,0xdf,0x02]
+@ CHECK: vst1.64 {d0, d1, d2, d3}, [r4:128] @ encoding: [0x04,0xf9,0xef,0x02]
+@ CHECK: vst1.64 {d0, d1, d2, d3}, [r4:256] @ encoding: [0x04,0xf9,0xff,0x02]
+
+	vst1.64	{d0, d1, d2, d3}, [r4]!
+	vst1.64	{d0, d1, d2, d3}, [r4:16]!
+	vst1.64	{d0, d1, d2, d3}, [r4:32]!
+	vst1.64	{d0, d1, d2, d3}, [r4:64]!
+	vst1.64	{d0, d1, d2, d3}, [r4:128]!
+	vst1.64	{d0, d1, d2, d3}, [r4:256]!
+
+@ CHECK: vst1.64 {d0, d1, d2, d3}, [r4]! @ encoding: [0x04,0xf9,0xcd,0x02]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst1.64 {d0, d1, d2, d3}, [r4:16]!
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst1.64 {d0, d1, d2, d3}, [r4:32]!
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vst1.64 {d0, d1, d2, d3}, [r4:64]! @ encoding: [0x04,0xf9,0xdd,0x02]
+@ CHECK: vst1.64 {d0, d1, d2, d3}, [r4:128]! @ encoding: [0x04,0xf9,0xed,0x02]
+@ CHECK: vst1.64 {d0, d1, d2, d3}, [r4:256]! @ encoding: [0x04,0xf9,0xfd,0x02]
+
+	vst1.64	{d0, d1, d2, d3}, [r4], r6
+	vst1.64	{d0, d1, d2, d3}, [r4:16], r6
+	vst1.64	{d0, d1, d2, d3}, [r4:32], r6
+	vst1.64	{d0, d1, d2, d3}, [r4:64], r6
+	vst1.64	{d0, d1, d2, d3}, [r4:128], r6
+	vst1.64	{d0, d1, d2, d3}, [r4:256], r6
+
+@ CHECK: vst1.64 {d0, d1, d2, d3}, [r4], r6 @ encoding: [0x04,0xf9,0xc6,0x02]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst1.64 {d0, d1, d2, d3}, [r4:16], r6
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst1.64 {d0, d1, d2, d3}, [r4:32], r6
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vst1.64 {d0, d1, d2, d3}, [r4:64], r6 @ encoding: [0x04,0xf9,0xd6,0x02]
+@ CHECK: vst1.64 {d0, d1, d2, d3}, [r4:128], r6 @ encoding: [0x04,0xf9,0xe6,0x02]
+@ CHECK: vst1.64 {d0, d1, d2, d3}, [r4:256], r6 @ encoding: [0x04,0xf9,0xf6,0x02]
+
+	vst2.8	{d0, d1}, [r4]
+	vst2.8	{d0, d1}, [r4:16]
+	vst2.8	{d0, d1}, [r4:32]
+	vst2.8	{d0, d1}, [r4:64]
+	vst2.8	{d0, d1}, [r4:128]
+	vst2.8	{d0, d1}, [r4:256]
+
+@ CHECK: vst2.8 {d0, d1}, [r4]          @ encoding: [0x04,0xf9,0x0f,0x08]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst2.8  {d0, d1}, [r4:16]
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst2.8  {d0, d1}, [r4:32]
+@ CHECK-ERRORS:                               ^
+@ CHECK: vst2.8 {d0, d1}, [r4:64]       @ encoding: [0x04,0xf9,0x1f,0x08]
+@ CHECK: vst2.8 {d0, d1}, [r4:128]      @ encoding: [0x04,0xf9,0x2f,0x08]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst2.8  {d0, d1}, [r4:256]
+@ CHECK-ERRORS:                               ^
+
+	vst2.8	{d0, d1}, [r4]!
+	vst2.8	{d0, d1}, [r4:16]!
+	vst2.8	{d0, d1}, [r4:32]!
+	vst2.8	{d0, d1}, [r4:64]!
+	vst2.8	{d0, d1}, [r4:128]!
+	vst2.8	{d0, d1}, [r4:256]!
+
+@ CHECK: vst2.8 {d0, d1}, [r4]!         @ encoding: [0x04,0xf9,0x0d,0x08]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst2.8  {d0, d1}, [r4:16]!
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst2.8  {d0, d1}, [r4:32]!
+@ CHECK-ERRORS:                               ^
+@ CHECK: vst2.8 {d0, d1}, [r4:64]!      @ encoding: [0x04,0xf9,0x1d,0x08]
+@ CHECK: vst2.8 {d0, d1}, [r4:128]!     @ encoding: [0x04,0xf9,0x2d,0x08]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst2.8  {d0, d1}, [r4:256]!
+@ CHECK-ERRORS:                               ^
+
+	vst2.8	{d0, d1}, [r4], r6
+	vst2.8	{d0, d1}, [r4:16], r6
+	vst2.8	{d0, d1}, [r4:32], r6
+	vst2.8	{d0, d1}, [r4:64], r6
+	vst2.8	{d0, d1}, [r4:128], r6
+	vst2.8	{d0, d1}, [r4:256], r6
+
+@ CHECK: vst2.8 {d0, d1}, [r4], r6      @ encoding: [0x04,0xf9,0x06,0x08]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst2.8  {d0, d1}, [r4:16], r6
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst2.8  {d0, d1}, [r4:32], r6
+@ CHECK-ERRORS:                               ^
+@ CHECK: vst2.8 {d0, d1}, [r4:64], r6   @ encoding: [0x04,0xf9,0x16,0x08]
+@ CHECK: vst2.8 {d0, d1}, [r4:128], r6  @ encoding: [0x04,0xf9,0x26,0x08]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst2.8  {d0, d1}, [r4:256], r6
+@ CHECK-ERRORS:                               ^
+
+	vst2.8	{d0, d2}, [r4]
+	vst2.8	{d0, d2}, [r4:16]
+	vst2.8	{d0, d2}, [r4:32]
+	vst2.8	{d0, d2}, [r4:64]
+	vst2.8	{d0, d2}, [r4:128]
+	vst2.8	{d0, d2}, [r4:256]
+
+@ CHECK: vst2.8 {d0, d2}, [r4]          @ encoding: [0x04,0xf9,0x0f,0x09]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst2.8  {d0, d2}, [r4:16]
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst2.8  {d0, d2}, [r4:32]
+@ CHECK-ERRORS:                               ^
+@ CHECK: vst2.8 {d0, d2}, [r4:64]       @ encoding: [0x04,0xf9,0x1f,0x09]
+@ CHECK: vst2.8 {d0, d2}, [r4:128]      @ encoding: [0x04,0xf9,0x2f,0x09]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst2.8  {d0, d2}, [r4:256]
+@ CHECK-ERRORS:                               ^
+
+	vst2.8	{d0, d2}, [r4]!
+	vst2.8	{d0, d2}, [r4:16]!
+	vst2.8	{d0, d2}, [r4:32]!
+	vst2.8	{d0, d2}, [r4:64]!
+	vst2.8	{d0, d2}, [r4:128]!
+	vst2.8	{d0, d2}, [r4:256]!
+
+@ CHECK: vst2.8 {d0, d2}, [r4]!         @ encoding: [0x04,0xf9,0x0d,0x09]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst2.8  {d0, d2}, [r4:16]!
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst2.8  {d0, d2}, [r4:32]!
+@ CHECK-ERRORS:                               ^
+@ CHECK: vst2.8 {d0, d2}, [r4:64]!      @ encoding: [0x04,0xf9,0x1d,0x09]
+@ CHECK: vst2.8 {d0, d2}, [r4:128]!     @ encoding: [0x04,0xf9,0x2d,0x09]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst2.8  {d0, d2}, [r4:256]!
+@ CHECK-ERRORS:                               ^
+
+	vst2.8	{d0, d2}, [r4], r6
+	vst2.8	{d0, d2}, [r4:16], r6
+	vst2.8	{d0, d2}, [r4:32], r6
+	vst2.8	{d0, d2}, [r4:64], r6
+	vst2.8	{d0, d2}, [r4:128], r6
+	vst2.8	{d0, d2}, [r4:256], r6
+
+@ CHECK: vst2.8 {d0, d2}, [r4], r6      @ encoding: [0x04,0xf9,0x06,0x09]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst2.8  {d0, d2}, [r4:16], r6
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst2.8  {d0, d2}, [r4:32], r6
+@ CHECK-ERRORS:                               ^
+@ CHECK: vst2.8 {d0, d2}, [r4:64], r6   @ encoding: [0x04,0xf9,0x16,0x09]
+@ CHECK: vst2.8 {d0, d2}, [r4:128], r6  @ encoding: [0x04,0xf9,0x26,0x09]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst2.8  {d0, d2}, [r4:256], r6
+@ CHECK-ERRORS:                               ^
+
+	vst2.8	{d0, d1, d2, d3}, [r4]
+	vst2.8	{d0, d1, d2, d3}, [r4:16]
+	vst2.8	{d0, d1, d2, d3}, [r4:32]
+	vst2.8	{d0, d1, d2, d3}, [r4:64]
+	vst2.8	{d0, d1, d2, d3}, [r4:128]
+	vst2.8	{d0, d1, d2, d3}, [r4:256]
+
+@ CHECK: vst2.8 {d0, d1, d2, d3}, [r4]  @ encoding: [0x04,0xf9,0x0f,0x03]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst2.8  {d0, d1, d2, d3}, [r4:16]
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst2.8  {d0, d1, d2, d3}, [r4:32]
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vst2.8 {d0, d1, d2, d3}, [r4:64] @ encoding: [0x04,0xf9,0x1f,0x03]
+@ CHECK: vst2.8 {d0, d1, d2, d3}, [r4:128] @ encoding: [0x04,0xf9,0x2f,0x03]
+@ CHECK: vst2.8 {d0, d1, d2, d3}, [r4:256] @ encoding: [0x04,0xf9,0x3f,0x03]
+
+	vst2.8	{d0, d1, d2, d3}, [r4]!
+	vst2.8	{d0, d1, d2, d3}, [r4:16]!
+	vst2.8	{d0, d1, d2, d3}, [r4:32]!
+	vst2.8	{d0, d1, d2, d3}, [r4:64]!
+	vst2.8	{d0, d1, d2, d3}, [r4:128]!
+	vst2.8	{d0, d1, d2, d3}, [r4:256]!
+
+@ CHECK: vst2.8 {d0, d1, d2, d3}, [r4]! @ encoding: [0x04,0xf9,0x0d,0x03]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst2.8  {d0, d1, d2, d3}, [r4:16]!
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst2.8  {d0, d1, d2, d3}, [r4:32]!
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vst2.8 {d0, d1, d2, d3}, [r4:64]! @ encoding: [0x04,0xf9,0x1d,0x03]
+@ CHECK: vst2.8 {d0, d1, d2, d3}, [r4:128]! @ encoding: [0x04,0xf9,0x2d,0x03]
+@ CHECK: vst2.8 {d0, d1, d2, d3}, [r4:256]! @ encoding: [0x04,0xf9,0x3d,0x03]
+
+	vst2.8	{d0, d1, d2, d3}, [r4], r6
+	vst2.8	{d0, d1, d2, d3}, [r4:16], r6
+	vst2.8	{d0, d1, d2, d3}, [r4:32], r6
+	vst2.8	{d0, d1, d2, d3}, [r4:64], r6
+	vst2.8	{d0, d1, d2, d3}, [r4:128], r6
+	vst2.8	{d0, d1, d2, d3}, [r4:256], r6
+
+@ CHECK: vst2.8 {d0, d1, d2, d3}, [r4], r6 @ encoding: [0x04,0xf9,0x06,0x03]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst2.8  {d0, d1, d2, d3}, [r4:16], r6
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst2.8  {d0, d1, d2, d3}, [r4:32], r6
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vst2.8 {d0, d1, d2, d3}, [r4:64], r6 @ encoding: [0x04,0xf9,0x16,0x03]
+@ CHECK: vst2.8 {d0, d1, d2, d3}, [r4:128], r6 @ encoding: [0x04,0xf9,0x26,0x03]
+@ CHECK: vst2.8 {d0, d1, d2, d3}, [r4:256], r6 @ encoding: [0x04,0xf9,0x36,0x03]
+
+	vst2.8	{d0[2], d1[2]}, [r4]
+	vst2.8	{d0[2], d1[2]}, [r4:16]
+	vst2.8	{d0[2], d1[2]}, [r4:32]
+	vst2.8	{d0[2], d1[2]}, [r4:64]
+	vst2.8	{d0[2], d1[2]}, [r4:128]
+	vst2.8	{d0[2], d1[2]}, [r4:256]
+
+@ CHECK: vst2.8 {d0[2], d1[2]}, [r4]    @ encoding: [0x84,0xf9,0x4f,0x01]
+@ CHECK: vst2.8 {d0[2], d1[2]}, [r4:16] @ encoding: [0x84,0xf9,0x5f,0x01]
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vst2.8  {d0[2], d1[2]}, [r4:32]
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vst2.8  {d0[2], d1[2]}, [r4:64]
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vst2.8  {d0[2], d1[2]}, [r4:128]
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vst2.8  {d0[2], d1[2]}, [r4:256]
+@ CHECK-ERRORS:                                     ^
+
+	vst2.8	{d0[2], d1[2]}, [r4]!
+	vst2.8	{d0[2], d1[2]}, [r4:16]!
+	vst2.8	{d0[2], d1[2]}, [r4:32]!
+	vst2.8	{d0[2], d1[2]}, [r4:64]!
+	vst2.8	{d0[2], d1[2]}, [r4:128]!
+	vst2.8	{d0[2], d1[2]}, [r4:256]!
+
+@ CHECK: vst2.8 {d0[2], d1[2]}, [r4]!   @ encoding: [0x84,0xf9,0x4d,0x01]
+@ CHECK: vst2.8 {d0[2], d1[2]}, [r4:16]! @ encoding: [0x84,0xf9,0x5d,0x01]
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vst2.8  {d0[2], d1[2]}, [r4:32]!
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vst2.8  {d0[2], d1[2]}, [r4:64]!
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vst2.8  {d0[2], d1[2]}, [r4:128]!
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vst2.8  {d0[2], d1[2]}, [r4:256]!
+@ CHECK-ERRORS:                                     ^
+
+	vst2.8	{d0[2], d1[2]}, [r4], r6
+	vst2.8	{d0[2], d1[2]}, [r4:16], r6
+	vst2.8	{d0[2], d1[2]}, [r4:32], r6
+	vst2.8	{d0[2], d1[2]}, [r4:64], r6
+	vst2.8	{d0[2], d1[2]}, [r4:128], r6
+	vst2.8	{d0[2], d1[2]}, [r4:256], r6
+
+@ CHECK: vst2.8 {d0[2], d1[2]}, [r4], r6 @ encoding: [0x84,0xf9,0x46,0x01]
+@ CHECK: vst2.8 {d0[2], d1[2]}, [r4:16], r6 @ encoding: [0x84,0xf9,0x56,0x01]
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vst2.8  {d0[2], d1[2]}, [r4:32], r6
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vst2.8  {d0[2], d1[2]}, [r4:64], r6
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vst2.8  {d0[2], d1[2]}, [r4:128], r6
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 16 or omitted
+@ CHECK-ERRORS:         vst2.8  {d0[2], d1[2]}, [r4:256], r6
+@ CHECK-ERRORS:                                     ^
+
+	vst2.32	{d0, d1}, [r4]
+	vst2.32	{d0, d1}, [r4:16]
+	vst2.32	{d0, d1}, [r4:32]
+	vst2.32	{d0, d1}, [r4:64]
+	vst2.32	{d0, d1}, [r4:128]
+	vst2.32	{d0, d1}, [r4:256]
+
+@ CHECK: vst2.32 {d0, d1}, [r4]          @ encoding: [0x04,0xf9,0x8f,0x08]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst2.32 {d0, d1}, [r4:16]
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst2.32 {d0, d1}, [r4:32]
+@ CHECK-ERRORS:                               ^
+@ CHECK: vst2.32 {d0, d1}, [r4:64]       @ encoding: [0x04,0xf9,0x9f,0x08]
+@ CHECK: vst2.32 {d0, d1}, [r4:128]      @ encoding: [0x04,0xf9,0xaf,0x08]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst2.32 {d0, d1}, [r4:256]
+@ CHECK-ERRORS:                               ^
+
+	vst2.32	{d0, d1}, [r4]!
+	vst2.32	{d0, d1}, [r4:16]!
+	vst2.32	{d0, d1}, [r4:32]!
+	vst2.32	{d0, d1}, [r4:64]!
+	vst2.32	{d0, d1}, [r4:128]!
+	vst2.32	{d0, d1}, [r4:256]!
+
+@ CHECK: vst2.32 {d0, d1}, [r4]!         @ encoding: [0x04,0xf9,0x8d,0x08]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst2.32 {d0, d1}, [r4:16]!
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst2.32 {d0, d1}, [r4:32]!
+@ CHECK-ERRORS:                               ^
+@ CHECK: vst2.32 {d0, d1}, [r4:64]!      @ encoding: [0x04,0xf9,0x9d,0x08]
+@ CHECK: vst2.32 {d0, d1}, [r4:128]!     @ encoding: [0x04,0xf9,0xad,0x08]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst2.32 {d0, d1}, [r4:256]!
+@ CHECK-ERRORS:                               ^
+
+	vst2.32	{d0, d1}, [r4], r6
+	vst2.32	{d0, d1}, [r4:16], r6
+	vst2.32	{d0, d1}, [r4:32], r6
+	vst2.32	{d0, d1}, [r4:64], r6
+	vst2.32	{d0, d1}, [r4:128], r6
+	vst2.32	{d0, d1}, [r4:256], r6
+
+@ CHECK: vst2.32 {d0, d1}, [r4], r6      @ encoding: [0x04,0xf9,0x86,0x08]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst2.32 {d0, d1}, [r4:16], r6
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst2.32 {d0, d1}, [r4:32], r6
+@ CHECK-ERRORS:                               ^
+@ CHECK: vst2.32 {d0, d1}, [r4:64], r6   @ encoding: [0x04,0xf9,0x96,0x08]
+@ CHECK: vst2.32 {d0, d1}, [r4:128], r6  @ encoding: [0x04,0xf9,0xa6,0x08]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst2.32 {d0, d1}, [r4:256], r6
+@ CHECK-ERRORS:                               ^
+
+	vst2.32	{d0, d2}, [r4]
+	vst2.32	{d0, d2}, [r4:16]
+	vst2.32	{d0, d2}, [r4:32]
+	vst2.32	{d0, d2}, [r4:64]
+	vst2.32	{d0, d2}, [r4:128]
+	vst2.32	{d0, d2}, [r4:256]
+
+@ CHECK: vst2.32 {d0, d2}, [r4]          @ encoding: [0x04,0xf9,0x8f,0x09]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst2.32 {d0, d2}, [r4:16]
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst2.32 {d0, d2}, [r4:32]
+@ CHECK-ERRORS:                               ^
+@ CHECK: vst2.32 {d0, d2}, [r4:64]       @ encoding: [0x04,0xf9,0x9f,0x09]
+@ CHECK: vst2.32 {d0, d2}, [r4:128]      @ encoding: [0x04,0xf9,0xaf,0x09]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst2.32 {d0, d2}, [r4:256]
+@ CHECK-ERRORS:                               ^
+
+	vst2.32	{d0, d2}, [r4]!
+	vst2.32	{d0, d2}, [r4:16]!
+	vst2.32	{d0, d2}, [r4:32]!
+	vst2.32	{d0, d2}, [r4:64]!
+	vst2.32	{d0, d2}, [r4:128]!
+	vst2.32	{d0, d2}, [r4:256]!
+
+@ CHECK: vst2.32 {d0, d2}, [r4]!         @ encoding: [0x04,0xf9,0x8d,0x09]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst2.32 {d0, d2}, [r4:16]!
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst2.32 {d0, d2}, [r4:32]!
+@ CHECK-ERRORS:                               ^
+@ CHECK: vst2.32 {d0, d2}, [r4:64]!      @ encoding: [0x04,0xf9,0x9d,0x09]
+@ CHECK: vst2.32 {d0, d2}, [r4:128]!     @ encoding: [0x04,0xf9,0xad,0x09]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst2.32 {d0, d2}, [r4:256]!
+@ CHECK-ERRORS:                               ^
+
+	vst2.32	{d0, d2}, [r4], r6
+	vst2.32	{d0, d2}, [r4:16], r6
+	vst2.32	{d0, d2}, [r4:32], r6
+	vst2.32	{d0, d2}, [r4:64], r6
+	vst2.32	{d0, d2}, [r4:128], r6
+	vst2.32	{d0, d2}, [r4:256], r6
+
+@ CHECK: vst2.32 {d0, d2}, [r4], r6      @ encoding: [0x04,0xf9,0x86,0x09]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst2.32 {d0, d2}, [r4:16], r6
+@ CHECK-ERRORS:                               ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst2.32 {d0, d2}, [r4:32], r6
+@ CHECK-ERRORS:                               ^
+@ CHECK: vst2.32 {d0, d2}, [r4:64], r6   @ encoding: [0x04,0xf9,0x96,0x09]
+@ CHECK: vst2.32 {d0, d2}, [r4:128], r6  @ encoding: [0x04,0xf9,0xa6,0x09]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst2.32 {d0, d2}, [r4:256], r6
+@ CHECK-ERRORS:                               ^
+
+	vst2.32	{d0, d1, d2, d3}, [r4]
+	vst2.32	{d0, d1, d2, d3}, [r4:16]
+	vst2.32	{d0, d1, d2, d3}, [r4:32]
+	vst2.32	{d0, d1, d2, d3}, [r4:64]
+	vst2.32	{d0, d1, d2, d3}, [r4:128]
+	vst2.32	{d0, d1, d2, d3}, [r4:256]
+
+@ CHECK: vst2.32 {d0, d1, d2, d3}, [r4]  @ encoding: [0x04,0xf9,0x8f,0x03]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst2.32 {d0, d1, d2, d3}, [r4:16]
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst2.32 {d0, d1, d2, d3}, [r4:32]
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vst2.32 {d0, d1, d2, d3}, [r4:64] @ encoding: [0x04,0xf9,0x9f,0x03]
+@ CHECK: vst2.32 {d0, d1, d2, d3}, [r4:128] @ encoding: [0x04,0xf9,0xaf,0x03]
+@ CHECK: vst2.32 {d0, d1, d2, d3}, [r4:256] @ encoding: [0x04,0xf9,0xbf,0x03]
+
+	vst2.32	{d0, d1, d2, d3}, [r4]!
+	vst2.32	{d0, d1, d2, d3}, [r4:16]!
+	vst2.32	{d0, d1, d2, d3}, [r4:32]!
+	vst2.32	{d0, d1, d2, d3}, [r4:64]!
+	vst2.32	{d0, d1, d2, d3}, [r4:128]!
+	vst2.32	{d0, d1, d2, d3}, [r4:256]!
+
+@ CHECK: vst2.32 {d0, d1, d2, d3}, [r4]! @ encoding: [0x04,0xf9,0x8d,0x03]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst2.32 {d0, d1, d2, d3}, [r4:16]!
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst2.32 {d0, d1, d2, d3}, [r4:32]!
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vst2.32 {d0, d1, d2, d3}, [r4:64]! @ encoding: [0x04,0xf9,0x9d,0x03]
+@ CHECK: vst2.32 {d0, d1, d2, d3}, [r4:128]! @ encoding: [0x04,0xf9,0xad,0x03]
+@ CHECK: vst2.32 {d0, d1, d2, d3}, [r4:256]! @ encoding: [0x04,0xf9,0xbd,0x03]
+
+	vst2.32	{d0, d1, d2, d3}, [r4], r6
+	vst2.32	{d0, d1, d2, d3}, [r4:16], r6
+	vst2.32	{d0, d1, d2, d3}, [r4:32], r6
+	vst2.32	{d0, d1, d2, d3}, [r4:64], r6
+	vst2.32	{d0, d1, d2, d3}, [r4:128], r6
+	vst2.32	{d0, d1, d2, d3}, [r4:256], r6
+
+@ CHECK: vst2.32 {d0, d1, d2, d3}, [r4], r6 @ encoding: [0x04,0xf9,0x86,0x03]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst2.32 {d0, d1, d2, d3}, [r4:16], r6
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst2.32 {d0, d1, d2, d3}, [r4:32], r6
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vst2.32 {d0, d1, d2, d3}, [r4:64], r6 @ encoding: [0x04,0xf9,0x96,0x03]
+@ CHECK: vst2.32 {d0, d1, d2, d3}, [r4:128], r6 @ encoding: [0x04,0xf9,0xa6,0x03]
+@ CHECK: vst2.32 {d0, d1, d2, d3}, [r4:256], r6 @ encoding: [0x04,0xf9,0xb6,0x03]
+
+	vst2.32	{d0[1], d1[1]}, [r4]
+	vst2.32	{d0[1], d1[1]}, [r4:16]
+	vst2.32	{d0[1], d1[1]}, [r4:32]
+	vst2.32	{d0[1], d1[1]}, [r4:64]
+	vst2.32	{d0[1], d1[1]}, [r4:128]
+	vst2.32	{d0[1], d1[1]}, [r4:256]
+
+@ CHECK: vst2.32 {d0[1], d1[1]}, [r4]    @ encoding: [0x84,0xf9,0x8f,0x09]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst2.32 {d0[1], d1[1]}, [r4:16]
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst2.32 {d0[1], d1[1]}, [r4:32]
+@ CHECK-ERRORS:                                     ^
+@ CHECK: vst2.32 {d0[1], d1[1]}, [r4:64] @ encoding: [0x84,0xf9,0x9f,0x09]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst2.32 {d0[1], d1[1]}, [r4:128]
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst2.32 {d0[1], d1[1]}, [r4:256]
+@ CHECK-ERRORS:                                     ^
+
+	vst2.32	{d0[1], d1[1]}, [r4]!
+	vst2.32	{d0[1], d1[1]}, [r4:16]!
+	vst2.32	{d0[1], d1[1]}, [r4:32]!
+	vst2.32	{d0[1], d1[1]}, [r4:64]!
+	vst2.32	{d0[1], d1[1]}, [r4:128]!
+	vst2.32	{d0[1], d1[1]}, [r4:256]!
+
+@ CHECK: vst2.32 {d0[1], d1[1]}, [r4]!   @ encoding: [0x84,0xf9,0x8d,0x09]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst2.32 {d0[1], d1[1]}, [r4:16]!
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst2.32 {d0[1], d1[1]}, [r4:32]!
+@ CHECK-ERRORS:                                     ^
+@ CHECK: vst2.32 {d0[1], d1[1]}, [r4:64]! @ encoding: [0x84,0xf9,0x9d,0x09]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst2.32 {d0[1], d1[1]}, [r4:128]!
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst2.32 {d0[1], d1[1]}, [r4:256]!
+@ CHECK-ERRORS:                                     ^
+
+	vst2.32	{d0[1], d1[1]}, [r4], r6
+	vst2.32	{d0[1], d1[1]}, [r4:16], r6
+	vst2.32	{d0[1], d1[1]}, [r4:32], r6
+	vst2.32	{d0[1], d1[1]}, [r4:64], r6
+	vst2.32	{d0[1], d1[1]}, [r4:128], r6
+	vst2.32	{d0[1], d1[1]}, [r4:256], r6
+
+@ CHECK: vst2.32 {d0[1], d1[1]}, [r4], r6 @ encoding: [0x84,0xf9,0x86,0x09]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst2.32 {d0[1], d1[1]}, [r4:16], r6
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst2.32 {d0[1], d1[1]}, [r4:32], r6
+@ CHECK-ERRORS:                                     ^
+@ CHECK: vst2.32 {d0[1], d1[1]}, [r4:64], r6 @ encoding: [0x84,0xf9,0x96,0x09]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst2.32 {d0[1], d1[1]}, [r4:128], r6
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst2.32 {d0[1], d1[1]}, [r4:256], r6
+@ CHECK-ERRORS:                                     ^
+
+	vst2.32	{d0[1], d2[1]}, [r4]
+	vst2.32	{d0[1], d2[1]}, [r4:16]
+	vst2.32	{d0[1], d2[1]}, [r4:32]
+	vst2.32	{d0[1], d2[1]}, [r4:64]
+	vst2.32	{d0[1], d2[1]}, [r4:128]
+	vst2.32	{d0[1], d2[1]}, [r4:256]
+
+@ CHECK: vst2.32 {d0[1], d2[1]}, [r4]    @ encoding: [0x84,0xf9,0xcf,0x09]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst2.32 {d0[1], d2[1]}, [r4:16]
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst2.32 {d0[1], d2[1]}, [r4:32]
+@ CHECK-ERRORS:                                     ^
+@ CHECK: vst2.32 {d0[1], d2[1]}, [r4:64] @ encoding: [0x84,0xf9,0xdf,0x09]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst2.32 {d0[1], d2[1]}, [r4:128]
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst2.32 {d0[1], d2[1]}, [r4:256]
+@ CHECK-ERRORS:                                     ^
+
+	vst2.32	{d0[1], d2[1]}, [r4]!
+	vst2.32	{d0[1], d2[1]}, [r4:16]!
+	vst2.32	{d0[1], d2[1]}, [r4:32]!
+	vst2.32	{d0[1], d2[1]}, [r4:64]!
+	vst2.32	{d0[1], d2[1]}, [r4:128]!
+	vst2.32	{d0[1], d2[1]}, [r4:256]!
+
+@ CHECK: vst2.32 {d0[1], d2[1]}, [r4]!   @ encoding: [0x84,0xf9,0xcd,0x09]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst2.32 {d0[1], d2[1]}, [r4:16]!
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst2.32 {d0[1], d2[1]}, [r4:32]!
+@ CHECK-ERRORS:                                     ^
+@ CHECK: vst2.32 {d0[1], d2[1]}, [r4:64]! @ encoding: [0x84,0xf9,0xdd,0x09]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst2.32 {d0[1], d2[1]}, [r4:128]!
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst2.32 {d0[1], d2[1]}, [r4:256]!
+@ CHECK-ERRORS:                                     ^
+
+	vst2.32	{d0[1], d2[1]}, [r4], r6
+	vst2.32	{d0[1], d2[1]}, [r4:16], r6
+	vst2.32	{d0[1], d2[1]}, [r4:32], r6
+	vst2.32	{d0[1], d2[1]}, [r4:64], r6
+	vst2.32	{d0[1], d2[1]}, [r4:128], r6
+	vst2.32	{d0[1], d2[1]}, [r4:256], r6
+
+@ CHECK: vst2.32 {d0[1], d2[1]}, [r4], r6 @ encoding: [0x84,0xf9,0xc6,0x09]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst2.32 {d0[1], d2[1]}, [r4:16], r6
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst2.32 {d0[1], d2[1]}, [r4:32], r6
+@ CHECK-ERRORS:                                     ^
+@ CHECK: vst2.32 {d0[1], d2[1]}, [r4:64], r6 @ encoding: [0x84,0xf9,0xd6,0x09]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst2.32 {d0[1], d2[1]}, [r4:128], r6
+@ CHECK-ERRORS:                                     ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst2.32 {d0[1], d2[1]}, [r4:256], r6
+@ CHECK-ERRORS:                                     ^
+
+	vst3.8	{d0, d1, d2}, [r4]
+	vst3.8	{d0, d1, d2}, [r4:16]
+	vst3.8	{d0, d1, d2}, [r4:32]
+	vst3.8	{d0, d1, d2}, [r4:64]
+	vst3.8	{d0, d1, d2}, [r4:128]
+	vst3.8	{d0, d1, d2}, [r4:256]
+
+@ CHECK: vst3.8 {d0, d1, d2}, [r4]      @ encoding: [0x04,0xf9,0x0f,0x04]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.8  {d0, d1, d2}, [r4:16]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.8  {d0, d1, d2}, [r4:32]
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vst3.8 {d0, d1, d2}, [r4:64]   @ encoding: [0x04,0xf9,0x1f,0x04]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.8  {d0, d1, d2}, [r4:128]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.8  {d0, d1, d2}, [r4:256]
+@ CHECK-ERRORS:                                   ^
+
+	vst3.8	{d0, d1, d2}, [r4]!
+	vst3.8	{d0, d1, d2}, [r4:16]!
+	vst3.8	{d0, d1, d2}, [r4:32]!
+	vst3.8	{d0, d1, d2}, [r4:64]!
+	vst3.8	{d0, d1, d2}, [r4:128]!
+	vst3.8	{d0, d1, d2}, [r4:256]!
+
+@ CHECK: vst3.8 {d0, d1, d2}, [r4]!     @ encoding: [0x04,0xf9,0x0d,0x04]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.8  {d0, d1, d2}, [r4:16]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.8  {d0, d1, d2}, [r4:32]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vst3.8 {d0, d1, d2}, [r4:64]!  @ encoding: [0x04,0xf9,0x1d,0x04]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.8  {d0, d1, d2}, [r4:128]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.8  {d0, d1, d2}, [r4:256]!
+@ CHECK-ERRORS:                                   ^
+
+	vst3.8	{d0, d1, d2}, [r4], r6
+	vst3.8	{d0, d1, d2}, [r4:16], r6
+	vst3.8	{d0, d1, d2}, [r4:32], r6
+	vst3.8	{d0, d1, d2}, [r4:64], r6
+	vst3.8	{d0, d1, d2}, [r4:128], r6
+	vst3.8	{d0, d1, d2}, [r4:256], r6
+
+@ CHECK: vst3.8 {d0, d1, d2}, [r4], r6  @ encoding: [0x04,0xf9,0x06,0x04]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.8  {d0, d1, d2}, [r4:16], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.8  {d0, d1, d2}, [r4:32], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vst3.8 {d0, d1, d2}, [r4:64], r6 @ encoding: [0x04,0xf9,0x16,0x04]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.8  {d0, d1, d2}, [r4:128], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.8  {d0, d1, d2}, [r4:256], r6
+@ CHECK-ERRORS:                                   ^
+
+	vst3.8	{d0, d2, d4}, [r4]
+	vst3.8	{d0, d2, d4}, [r4:16]
+	vst3.8	{d0, d2, d4}, [r4:32]
+	vst3.8	{d0, d2, d4}, [r4:64]
+	vst3.8	{d0, d2, d4}, [r4:128]
+	vst3.8	{d0, d2, d4}, [r4:256]
+
+@ CHECK: vst3.8 {d0, d2, d4}, [r4]      @ encoding: [0x04,0xf9,0x0f,0x05]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.8  {d0, d2, d4}, [r4:16]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.8  {d0, d2, d4}, [r4:32]
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vst3.8 {d0, d2, d4}, [r4:64]   @ encoding: [0x04,0xf9,0x1f,0x05]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.8  {d0, d2, d4}, [r4:128]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.8  {d0, d2, d4}, [r4:256]
+@ CHECK-ERRORS:                                   ^
+
+	vst3.8	{d0, d2, d4}, [r4]!
+	vst3.8	{d0, d2, d4}, [r4:16]!
+	vst3.8	{d0, d2, d4}, [r4:32]!
+	vst3.8	{d0, d2, d4}, [r4:64]!
+	vst3.8	{d0, d2, d4}, [r4:128]!
+	vst3.8	{d0, d2, d4}, [r4:256]!
+
+@ CHECK: vst3.8 {d0, d2, d4}, [r4]!     @ encoding: [0x04,0xf9,0x0d,0x05]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.8  {d0, d2, d4}, [r4:16]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.8  {d0, d2, d4}, [r4:32]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vst3.8 {d0, d2, d4}, [r4:64]!  @ encoding: [0x04,0xf9,0x1d,0x05]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.8  {d0, d2, d4}, [r4:128]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.8  {d0, d2, d4}, [r4:256]!
+@ CHECK-ERRORS:                                   ^
+
+	vst3.8	{d0, d2, d4}, [r4], r6
+	vst3.8	{d0, d2, d4}, [r4:16], r6
+	vst3.8	{d0, d2, d4}, [r4:32], r6
+	vst3.8	{d0, d2, d4}, [r4:64], r6
+	vst3.8	{d0, d2, d4}, [r4:128], r6
+	vst3.8	{d0, d2, d4}, [r4:256], r6
+
+@ CHECK: vst3.8 {d0, d2, d4}, [r4], r6  @ encoding: [0x04,0xf9,0x06,0x05]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.8  {d0, d2, d4}, [r4:16], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.8  {d0, d2, d4}, [r4:32], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vst3.8 {d0, d2, d4}, [r4:64], r6 @ encoding: [0x04,0xf9,0x16,0x05]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.8  {d0, d2, d4}, [r4:128], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.8  {d0, d2, d4}, [r4:256], r6
+@ CHECK-ERRORS:                                   ^
+
+	vst3.8	{d0[1], d1[1], d2[1]}, [r4]
+	vst3.8	{d0[1], d1[1], d2[1]}, [r4:16]
+	vst3.8	{d0[1], d1[1], d2[1]}, [r4:32]
+	vst3.8	{d0[1], d1[1], d2[1]}, [r4:64]
+	vst3.8	{d0[1], d1[1], d2[1]}, [r4:128]
+	vst3.8	{d0[1], d1[1], d2[1]}, [r4:256]
+
+@ CHECK: vst3.8 {d0[1], d1[1], d2[1]}, [r4] @ encoding: [0x84,0xf9,0x2f,0x02]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.8  {d0[1], d1[1], d2[1]}, [r4:16]
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.8  {d0[1], d1[1], d2[1]}, [r4:32]
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.8  {d0[1], d1[1], d2[1]}, [r4:64]
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.8  {d0[1], d1[1], d2[1]}, [r4:128]
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.8  {d0[1], d1[1], d2[1]}, [r4:256]
+@ CHECK-ERRORS:                                            ^
+
+	vst3.8	{d0[1], d1[1], d2[1]}, [r4]!
+	vst3.8	{d0[1], d1[1], d2[1]}, [r4:16]!
+	vst3.8	{d0[1], d1[1], d2[1]}, [r4:32]!
+	vst3.8	{d0[1], d1[1], d2[1]}, [r4:64]!
+	vst3.8	{d0[1], d1[1], d2[1]}, [r4:128]!
+	vst3.8	{d0[1], d1[1], d2[1]}, [r4:256]!
+
+@ CHECK: vst3.8 {d0[1], d1[1], d2[1]}, [r4]! @ encoding: [0x84,0xf9,0x2d,0x02]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.8  {d0[1], d1[1], d2[1]}, [r4:16]!
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.8  {d0[1], d1[1], d2[1]}, [r4:32]!
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.8  {d0[1], d1[1], d2[1]}, [r4:64]!
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.8  {d0[1], d1[1], d2[1]}, [r4:128]!
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.8  {d0[1], d1[1], d2[1]}, [r4:256]!
+@ CHECK-ERRORS:                                            ^
+
+	vst3.8	{d0[1], d1[1], d2[1]}, [r4], r6
+	vst3.8	{d0[1], d1[1], d2[1]}, [r4:16], r6
+	vst3.8	{d0[1], d1[1], d2[1]}, [r4:32], r6
+	vst3.8	{d0[1], d1[1], d2[1]}, [r4:64], r6
+	vst3.8	{d0[1], d1[1], d2[1]}, [r4:128], r6
+	vst3.8	{d0[1], d1[1], d2[1]}, [r4:256], r6
+
+@ CHECK: vst3.8 {d0[1], d1[1], d2[1]}, [r4], r6 @ encoding: [0x84,0xf9,0x26,0x02]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.8  {d0[1], d1[1], d2[1]}, [r4:16], r6
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.8  {d0[1], d1[1], d2[1]}, [r4:32], r6
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.8  {d0[1], d1[1], d2[1]}, [r4:64], r6
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.8  {d0[1], d1[1], d2[1]}, [r4:128], r6
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.8  {d0[1], d1[1], d2[1]}, [r4:256], r6
+@ CHECK-ERRORS:                                            ^
+
+	vst3.16	{d0, d1, d2}, [r4]
+	vst3.16	{d0, d1, d2}, [r4:16]
+	vst3.16	{d0, d1, d2}, [r4:32]
+	vst3.16	{d0, d1, d2}, [r4:64]
+	vst3.16	{d0, d1, d2}, [r4:128]
+	vst3.16	{d0, d1, d2}, [r4:256]
+
+@ CHECK: vst3.16 {d0, d1, d2}, [r4]      @ encoding: [0x04,0xf9,0x4f,0x04]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.16 {d0, d1, d2}, [r4:16]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.16 {d0, d1, d2}, [r4:32]
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vst3.16 {d0, d1, d2}, [r4:64]   @ encoding: [0x04,0xf9,0x5f,0x04]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.16 {d0, d1, d2}, [r4:128]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.16 {d0, d1, d2}, [r4:256]
+@ CHECK-ERRORS:                                   ^
+
+	vst3.16	{d0, d1, d2}, [r4]!
+	vst3.16	{d0, d1, d2}, [r4:16]!
+	vst3.16	{d0, d1, d2}, [r4:32]!
+	vst3.16	{d0, d1, d2}, [r4:64]!
+	vst3.16	{d0, d1, d2}, [r4:128]!
+	vst3.16	{d0, d1, d2}, [r4:256]!
+
+@ CHECK: vst3.16 {d0, d1, d2}, [r4]!     @ encoding: [0x04,0xf9,0x4d,0x04]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.16 {d0, d1, d2}, [r4:16]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.16 {d0, d1, d2}, [r4:32]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vst3.16 {d0, d1, d2}, [r4:64]!  @ encoding: [0x04,0xf9,0x5d,0x04]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.16 {d0, d1, d2}, [r4:128]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.16 {d0, d1, d2}, [r4:256]!
+@ CHECK-ERRORS:                                   ^
+
+	vst3.16	{d0, d1, d2}, [r4], r6
+	vst3.16	{d0, d1, d2}, [r4:16], r6
+	vst3.16	{d0, d1, d2}, [r4:32], r6
+	vst3.16	{d0, d1, d2}, [r4:64], r6
+	vst3.16	{d0, d1, d2}, [r4:128], r6
+	vst3.16	{d0, d1, d2}, [r4:256], r6
+
+@ CHECK: vst3.16 {d0, d1, d2}, [r4], r6  @ encoding: [0x04,0xf9,0x46,0x04]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.16 {d0, d1, d2}, [r4:16], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.16 {d0, d1, d2}, [r4:32], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vst3.16 {d0, d1, d2}, [r4:64], r6 @ encoding: [0x04,0xf9,0x56,0x04]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.16 {d0, d1, d2}, [r4:128], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.16 {d0, d1, d2}, [r4:256], r6
+@ CHECK-ERRORS:                                   ^
+
+	vst3.16	{d0, d2, d4}, [r4]
+	vst3.16	{d0, d2, d4}, [r4:16]
+	vst3.16	{d0, d2, d4}, [r4:32]
+	vst3.16	{d0, d2, d4}, [r4:64]
+	vst3.16	{d0, d2, d4}, [r4:128]
+	vst3.16	{d0, d2, d4}, [r4:256]
+
+@ CHECK: vst3.16 {d0, d2, d4}, [r4]      @ encoding: [0x04,0xf9,0x4f,0x05]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.16 {d0, d2, d4}, [r4:16]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.16 {d0, d2, d4}, [r4:32]
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vst3.16 {d0, d2, d4}, [r4:64]   @ encoding: [0x04,0xf9,0x5f,0x05]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.16 {d0, d2, d4}, [r4:128]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.16 {d0, d2, d4}, [r4:256]
+@ CHECK-ERRORS:                                   ^
+
+	vst3.16	{d0, d2, d4}, [r4]!
+	vst3.16	{d0, d2, d4}, [r4:16]!
+	vst3.16	{d0, d2, d4}, [r4:32]!
+	vst3.16	{d0, d2, d4}, [r4:64]!
+	vst3.16	{d0, d2, d4}, [r4:128]!
+	vst3.16	{d0, d2, d4}, [r4:256]!
+
+@ CHECK: vst3.16 {d0, d2, d4}, [r4]!     @ encoding: [0x04,0xf9,0x4d,0x05]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.16 {d0, d2, d4}, [r4:16]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.16 {d0, d2, d4}, [r4:32]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vst3.16 {d0, d2, d4}, [r4:64]!  @ encoding: [0x04,0xf9,0x5d,0x05]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.16 {d0, d2, d4}, [r4:128]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.16 {d0, d2, d4}, [r4:256]!
+@ CHECK-ERRORS:                                   ^
+
+	vst3.16	{d0, d2, d4}, [r4], r6
+	vst3.16	{d0, d2, d4}, [r4:16], r6
+	vst3.16	{d0, d2, d4}, [r4:32], r6
+	vst3.16	{d0, d2, d4}, [r4:64], r6
+	vst3.16	{d0, d2, d4}, [r4:128], r6
+	vst3.16	{d0, d2, d4}, [r4:256], r6
+
+@ CHECK: vst3.16 {d0, d2, d4}, [r4], r6  @ encoding: [0x04,0xf9,0x46,0x05]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.16 {d0, d2, d4}, [r4:16], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.16 {d0, d2, d4}, [r4:32], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vst3.16 {d0, d2, d4}, [r4:64], r6 @ encoding: [0x04,0xf9,0x56,0x05]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.16 {d0, d2, d4}, [r4:128], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.16 {d0, d2, d4}, [r4:256], r6
+@ CHECK-ERRORS:                                   ^
+
+	vst3.16	{d0[1], d1[1], d2[1]}, [r4]
+	vst3.16	{d0[1], d1[1], d2[1]}, [r4:16]
+	vst3.16	{d0[1], d1[1], d2[1]}, [r4:32]
+	vst3.16	{d0[1], d1[1], d2[1]}, [r4:64]
+	vst3.16	{d0[1], d1[1], d2[1]}, [r4:128]
+	vst3.16	{d0[1], d1[1], d2[1]}, [r4:256]
+
+@ CHECK: vst3.16 {d0[1], d1[1], d2[1]}, [r4] @ encoding: [0x84,0xf9,0x4f,0x06]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.16 {d0[1], d1[1], d2[1]}, [r4:16]
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.16 {d0[1], d1[1], d2[1]}, [r4:32]
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.16 {d0[1], d1[1], d2[1]}, [r4:64]
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.16 {d0[1], d1[1], d2[1]}, [r4:128]
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.16 {d0[1], d1[1], d2[1]}, [r4:256]
+@ CHECK-ERRORS:                                            ^
+
+	vst3.16	{d0[1], d1[1], d2[1]}, [r4]!
+	vst3.16	{d0[1], d1[1], d2[1]}, [r4:16]!
+	vst3.16	{d0[1], d1[1], d2[1]}, [r4:32]!
+	vst3.16	{d0[1], d1[1], d2[1]}, [r4:64]!
+	vst3.16	{d0[1], d1[1], d2[1]}, [r4:128]!
+	vst3.16	{d0[1], d1[1], d2[1]}, [r4:256]!
+
+@ CHECK: vst3.16 {d0[1], d1[1], d2[1]}, [r4]! @ encoding: [0x84,0xf9,0x4d,0x06]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.16 {d0[1], d1[1], d2[1]}, [r4:16]!
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.16 {d0[1], d1[1], d2[1]}, [r4:32]!
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.16 {d0[1], d1[1], d2[1]}, [r4:64]!
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.16 {d0[1], d1[1], d2[1]}, [r4:128]!
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.16 {d0[1], d1[1], d2[1]}, [r4:256]!
+@ CHECK-ERRORS:                                            ^
+
+	vst3.16	{d0[1], d1[1], d2[1]}, [r4], r6
+	vst3.16	{d0[1], d1[1], d2[1]}, [r4:16], r6
+	vst3.16	{d0[1], d1[1], d2[1]}, [r4:32], r6
+	vst3.16	{d0[1], d1[1], d2[1]}, [r4:64], r6
+	vst3.16	{d0[1], d1[1], d2[1]}, [r4:128], r6
+	vst3.16	{d0[1], d1[1], d2[1]}, [r4:256], r6
+
+@ CHECK: vst3.16 {d0[1], d1[1], d2[1]}, [r4], r6 @ encoding: [0x84,0xf9,0x46,0x06]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.16 {d0[1], d1[1], d2[1]}, [r4:16], r6
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.16 {d0[1], d1[1], d2[1]}, [r4:32], r6
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.16 {d0[1], d1[1], d2[1]}, [r4:64], r6
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.16 {d0[1], d1[1], d2[1]}, [r4:128], r6
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.16 {d0[1], d1[1], d2[1]}, [r4:256], r6
+@ CHECK-ERRORS:                                            ^
+
+	vst3.16	{d0[1], d2[1], d4[1]}, [r4]
+	vst3.16	{d0[1], d2[1], d4[1]}, [r4:16]
+	vst3.16	{d0[1], d2[1], d4[1]}, [r4:32]
+	vst3.16	{d0[1], d2[1], d4[1]}, [r4:64]
+	vst3.16	{d0[1], d2[1], d4[1]}, [r4:128]
+	vst3.16	{d0[1], d2[1], d4[1]}, [r4:256]
+
+@ CHECK: vst3.16 {d0[1], d2[1], d4[1]}, [r4] @ encoding: [0x84,0xf9,0x6f,0x06]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.16 {d0[1], d2[1], d4[1]}, [r4:16]
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.16 {d0[1], d2[1], d4[1]}, [r4:32]
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.16 {d0[1], d2[1], d4[1]}, [r4:64]
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.16 {d0[1], d2[1], d4[1]}, [r4:128]
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.16 {d0[1], d2[1], d4[1]}, [r4:256]
+@ CHECK-ERRORS:                                            ^
+
+	vst3.16	{d0[1], d2[1], d4[1]}, [r4]!
+	vst3.16	{d0[1], d2[1], d4[1]}, [r4:16]!
+	vst3.16	{d0[1], d2[1], d4[1]}, [r4:32]!
+	vst3.16	{d0[1], d2[1], d4[1]}, [r4:64]!
+	vst3.16	{d0[1], d2[1], d4[1]}, [r4:128]!
+	vst3.16	{d0[1], d2[1], d4[1]}, [r4:256]!
+
+@ CHECK: vst3.16 {d0[1], d1[1], d2[1]}, [r4]! @ encoding: [0x84,0xf9,0x6d,0x06]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.16 {d0[1], d2[1], d4[1]}, [r4:16]!
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.16 {d0[1], d2[1], d4[1]}, [r4:32]!
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.16 {d0[1], d2[1], d4[1]}, [r4:64]!
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.16 {d0[1], d2[1], d4[1]}, [r4:128]!
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.16 {d0[1], d2[1], d4[1]}, [r4:256]!
+@ CHECK-ERRORS:                                            ^
+
+	vst3.16	{d0[1], d2[1], d4[1]}, [r4], r6
+	vst3.16	{d0[1], d2[1], d4[1]}, [r4:16], r6
+	vst3.16	{d0[1], d2[1], d4[1]}, [r4:32], r6
+	vst3.16	{d0[1], d2[1], d4[1]}, [r4:64], r6
+	vst3.16	{d0[1], d2[1], d4[1]}, [r4:128], r6
+	vst3.16	{d0[1], d2[1], d4[1]}, [r4:256], r6
+
+@ CHECK: vst3.16 {d0[1], d2[1], d4[1]}, [r4], r6 @ encoding: [0x84,0xf9,0x66,0x06]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.16 {d0[1], d2[1], d4[1]}, [r4:16], r6
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.16 {d0[1], d2[1], d4[1]}, [r4:32], r6
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.16 {d0[1], d2[1], d4[1]}, [r4:64], r6
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.16 {d0[1], d2[1], d4[1]}, [r4:128], r6
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.16 {d0[1], d2[1], d4[1]}, [r4:256], r6
+@ CHECK-ERRORS:                                            ^
+
+	vst3.32	{d0, d1, d2}, [r4]
+	vst3.32	{d0, d1, d2}, [r4:16]
+	vst3.32	{d0, d1, d2}, [r4:32]
+	vst3.32	{d0, d1, d2}, [r4:64]
+	vst3.32	{d0, d1, d2}, [r4:128]
+	vst3.32	{d0, d1, d2}, [r4:256]
+
+@ CHECK: vst3.32 {d0, d1, d2}, [r4]      @ encoding: [0x04,0xf9,0x8f,0x04]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.32 {d0, d1, d2}, [r4:16]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.32 {d0, d1, d2}, [r4:32]
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vst3.32 {d0, d1, d2}, [r4:64]   @ encoding: [0x04,0xf9,0x9f,0x04]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.32 {d0, d1, d2}, [r4:128]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.32 {d0, d1, d2}, [r4:256]
+@ CHECK-ERRORS:                                   ^
+
+	vst3.32	{d0, d1, d2}, [r4]!
+	vst3.32	{d0, d1, d2}, [r4:16]!
+	vst3.32	{d0, d1, d2}, [r4:32]!
+	vst3.32	{d0, d1, d2}, [r4:64]!
+	vst3.32	{d0, d1, d2}, [r4:128]!
+	vst3.32	{d0, d1, d2}, [r4:256]!
+
+@ CHECK: vst3.32 {d0, d1, d2}, [r4]!     @ encoding: [0x04,0xf9,0x8d,0x04]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.32 {d0, d1, d2}, [r4:16]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.32 {d0, d1, d2}, [r4:32]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vst3.32 {d0, d1, d2}, [r4:64]!  @ encoding: [0x04,0xf9,0x9d,0x04]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.32 {d0, d1, d2}, [r4:128]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.32 {d0, d1, d2}, [r4:256]!
+@ CHECK-ERRORS:                                   ^
+
+	vst3.32	{d0, d1, d2}, [r4], r6
+	vst3.32	{d0, d1, d2}, [r4:16], r6
+	vst3.32	{d0, d1, d2}, [r4:32], r6
+	vst3.32	{d0, d1, d2}, [r4:64], r6
+	vst3.32	{d0, d1, d2}, [r4:128], r6
+	vst3.32	{d0, d1, d2}, [r4:256], r6
+
+@ CHECK: vst3.32 {d0, d1, d2}, [r4], r6  @ encoding: [0x04,0xf9,0x86,0x04]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.32 {d0, d1, d2}, [r4:16], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.32 {d0, d1, d2}, [r4:32], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vst3.32 {d0, d1, d2}, [r4:64], r6 @ encoding: [0x04,0xf9,0x96,0x04]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.32 {d0, d1, d2}, [r4:128], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.32 {d0, d1, d2}, [r4:256], r6
+@ CHECK-ERRORS:                                   ^
+
+	vst3.32	{d0, d2, d4}, [r4]
+	vst3.32	{d0, d2, d4}, [r4:16]
+	vst3.32	{d0, d2, d4}, [r4:32]
+	vst3.32	{d0, d2, d4}, [r4:64]
+	vst3.32	{d0, d2, d4}, [r4:128]
+	vst3.32	{d0, d2, d4}, [r4:256]
+
+@ CHECK: vst3.32 {d0, d2, d4}, [r4]      @ encoding: [0x04,0xf9,0x8f,0x05]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.32 {d0, d2, d4}, [r4:16]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.32 {d0, d2, d4}, [r4:32]
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vst3.32 {d0, d2, d4}, [r4:64]   @ encoding: [0x04,0xf9,0x9f,0x05]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.32 {d0, d2, d4}, [r4:128]
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.32 {d0, d2, d4}, [r4:256]
+@ CHECK-ERRORS:                                   ^
+
+	vst3.32	{d0, d2, d4}, [r4]!
+	vst3.32	{d0, d2, d4}, [r4:16]!
+	vst3.32	{d0, d2, d4}, [r4:32]!
+	vst3.32	{d0, d2, d4}, [r4:64]!
+	vst3.32	{d0, d2, d4}, [r4:128]!
+	vst3.32	{d0, d2, d4}, [r4:256]!
+
+@ CHECK: vst3.32 {d0, d2, d4}, [r4]!     @ encoding: [0x04,0xf9,0x8d,0x05]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.32 {d0, d2, d4}, [r4:16]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.32 {d0, d2, d4}, [r4:32]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vst3.32 {d0, d2, d4}, [r4:64]!  @ encoding: [0x04,0xf9,0x9d,0x05]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.32 {d0, d2, d4}, [r4:128]!
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.32 {d0, d2, d4}, [r4:256]!
+@ CHECK-ERRORS:                                   ^
+
+	vst3.32	{d0, d2, d4}, [r4], r6
+	vst3.32	{d0, d2, d4}, [r4:16], r6
+	vst3.32	{d0, d2, d4}, [r4:32], r6
+	vst3.32	{d0, d2, d4}, [r4:64], r6
+	vst3.32	{d0, d2, d4}, [r4:128], r6
+	vst3.32	{d0, d2, d4}, [r4:256], r6
+
+@ CHECK: vst3.32 {d0, d2, d4}, [r4], r6  @ encoding: [0x04,0xf9,0x86,0x05]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.32 {d0, d2, d4}, [r4:16], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.32 {d0, d2, d4}, [r4:32], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK: vst3.32 {d0, d2, d4}, [r4:64], r6 @ encoding: [0x04,0xf9,0x96,0x05]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.32 {d0, d2, d4}, [r4:128], r6
+@ CHECK-ERRORS:                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst3.32 {d0, d2, d4}, [r4:256], r6
+@ CHECK-ERRORS:                                   ^
+
+	vst3.32	{d0[1], d1[1], d2[1]}, [r4]
+	vst3.32	{d0[1], d1[1], d2[1]}, [r4:16]
+	vst3.32	{d0[1], d1[1], d2[1]}, [r4:32]
+	vst3.32	{d0[1], d1[1], d2[1]}, [r4:64]
+	vst3.32	{d0[1], d1[1], d2[1]}, [r4:128]
+	vst3.32	{d0[1], d1[1], d2[1]}, [r4:256]
+
+@ CHECK: vst3.32 {d0[1], d1[1], d2[1]}, [r4] @ encoding: [0x84,0xf9,0x8f,0x0a]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.32 {d0[1], d1[1], d2[1]}, [r4:16]
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.32 {d0[1], d1[1], d2[1]}, [r4:32]
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.32 {d0[1], d1[1], d2[1]}, [r4:64]
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.32 {d0[1], d1[1], d2[1]}, [r4:128]
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.32 {d0[1], d1[1], d2[1]}, [r4:256]
+@ CHECK-ERRORS:                                            ^
+
+	vst3.32	{d0[1], d1[1], d2[1]}, [r4]!
+	vst3.32	{d0[1], d1[1], d2[1]}, [r4:16]!
+	vst3.32	{d0[1], d1[1], d2[1]}, [r4:32]!
+	vst3.32	{d0[1], d1[1], d2[1]}, [r4:64]!
+	vst3.32	{d0[1], d1[1], d2[1]}, [r4:128]!
+	vst3.32	{d0[1], d1[1], d2[1]}, [r4:256]!
+
+@ CHECK: vst3.32 {d0[1], d1[1], d2[1]}, [r4]! @ encoding: [0x84,0xf9,0x8d,0x0a]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.32 {d0[1], d1[1], d2[1]}, [r4:16]!
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.32 {d0[1], d1[1], d2[1]}, [r4:32]!
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.32 {d0[1], d1[1], d2[1]}, [r4:64]!
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.32 {d0[1], d1[1], d2[1]}, [r4:128]!
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.32 {d0[1], d1[1], d2[1]}, [r4:256]!
+@ CHECK-ERRORS:                                            ^
+
+	vst3.32	{d0[1], d1[1], d2[1]}, [r4], r6
+	vst3.32	{d0[1], d1[1], d2[1]}, [r4:16], r6
+	vst3.32	{d0[1], d1[1], d2[1]}, [r4:32], r6
+	vst3.32	{d0[1], d1[1], d2[1]}, [r4:64], r6
+	vst3.32	{d0[1], d1[1], d2[1]}, [r4:128], r6
+	vst3.32	{d0[1], d1[1], d2[1]}, [r4:256], r6
+
+@ CHECK: vst3.32 {d0[1], d1[1], d2[1]}, [r4], r6 @ encoding: [0x84,0xf9,0x86,0x0a]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.32 {d0[1], d1[1], d2[1]}, [r4:16], r6
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.32 {d0[1], d1[1], d2[1]}, [r4:32], r6
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.32 {d0[1], d1[1], d2[1]}, [r4:64], r6
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.32 {d0[1], d1[1], d2[1]}, [r4:128], r6
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.32 {d0[1], d1[1], d2[1]}, [r4:256], r6
+@ CHECK-ERRORS:                                            ^
+
+	vst3.32	{d0[1], d2[1], d4[1]}, [r4]
+	vst3.32	{d0[1], d2[1], d4[1]}, [r4:16]
+	vst3.32	{d0[1], d2[1], d4[1]}, [r4:32]
+	vst3.32	{d0[1], d2[1], d4[1]}, [r4:64]
+	vst3.32	{d0[1], d2[1], d4[1]}, [r4:128]
+	vst3.32	{d0[1], d2[1], d4[1]}, [r4:256]
+
+@ CHECK: vst3.32 {d0[1], d2[1], d4[1]}, [r4] @ encoding: [0x84,0xf9,0xcf,0x0a]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.32 {d0[1], d2[1], d4[1]}, [r4:16]
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.32 {d0[1], d2[1], d4[1]}, [r4:32]
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.32 {d0[1], d2[1], d4[1]}, [r4:64]
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.32 {d0[1], d2[1], d4[1]}, [r4:128]
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.32 {d0[1], d2[1], d4[1]}, [r4:256]
+@ CHECK-ERRORS:                                            ^
+
+	vst3.32	{d0[1], d2[1], d4[1]}, [r4]!
+	vst3.32	{d0[1], d2[1], d4[1]}, [r4:16]!
+	vst3.32	{d0[1], d2[1], d4[1]}, [r4:32]!
+	vst3.32	{d0[1], d2[1], d4[1]}, [r4:64]!
+	vst3.32	{d0[1], d2[1], d4[1]}, [r4:128]!
+	vst3.32	{d0[1], d2[1], d4[1]}, [r4:256]!
+
+@ CHECK: vst3.32 {d0[1], d2[1], d4[1]}, [r4]! @ encoding: [0x84,0xf9,0xcd,0x0a]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.32 {d0[1], d2[1], d4[1]}, [r4:16]!
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.32 {d0[1], d2[1], d4[1]}, [r4:32]!
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.32 {d0[1], d2[1], d4[1]}, [r4:64]!
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.32 {d0[1], d2[1], d4[1]}, [r4:128]!
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.32 {d0[1], d2[1], d4[1]}, [r4:256]!
+@ CHECK-ERRORS:                                            ^
+
+	vst3.32	{d0[1], d2[1], d4[1]}, [r4], r6
+	vst3.32	{d0[1], d2[1], d4[1]}, [r4:16], r6
+	vst3.32	{d0[1], d2[1], d4[1]}, [r4:32], r6
+	vst3.32	{d0[1], d2[1], d4[1]}, [r4:64], r6
+	vst3.32	{d0[1], d2[1], d4[1]}, [r4:128], r6
+	vst3.32	{d0[1], d2[1], d4[1]}, [r4:256], r6
+
+@ CHECK: vst3.32 {d0[1], d2[1], d4[1]}, [r4], r6 @ encoding: [0x84,0xf9,0xc6,0x0a]
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.32 {d0[1], d2[1], d4[1]}, [r4:16], r6
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.32 {d0[1], d2[1], d4[1]}, [r4:32], r6
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.32 {d0[1], d2[1], d4[1]}, [r4:64], r6
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.32 {d0[1], d2[1], d4[1]}, [r4:128], r6
+@ CHECK-ERRORS:                                            ^
+@ CHECK-ERRORS: error: alignment must be omitted
+@ CHECK-ERRORS:         vst3.32 {d0[1], d2[1], d4[1]}, [r4:256], r6
+@ CHECK-ERRORS:                                            ^
+
+	vst4.8	{d0, d1, d2, d3}, [r4]
+	vst4.8	{d0, d1, d2, d3}, [r4:16]
+	vst4.8	{d0, d1, d2, d3}, [r4:32]
+	vst4.8	{d0, d1, d2, d3}, [r4:64]
+	vst4.8	{d0, d1, d2, d3}, [r4:128]
+	vst4.8	{d0, d1, d2, d3}, [r4:256]
+
+@ CHECK: vst4.8 {d0, d1, d2, d3}, [r4]  @ encoding: [0x04,0xf9,0x0f,0x00]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst4.8  {d0, d1, d2, d3}, [r4:16]
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst4.8  {d0, d1, d2, d3}, [r4:32]
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vst4.8 {d0, d1, d2, d3}, [r4:64] @ encoding: [0x04,0xf9,0x1f,0x00]
+@ CHECK: vst4.8 {d0, d1, d2, d3}, [r4:128] @ encoding: [0x04,0xf9,0x2f,0x00]
+@ CHECK: vst4.8 {d0, d1, d2, d3}, [r4:256] @ encoding: [0x04,0xf9,0x3f,0x00]
+
+	vst4.8	{d0, d1, d2, d3}, [r4]!
+	vst4.8	{d0, d1, d2, d3}, [r4:16]!
+	vst4.8	{d0, d1, d2, d3}, [r4:32]!
+	vst4.8	{d0, d1, d2, d3}, [r4:64]!
+	vst4.8	{d0, d1, d2, d3}, [r4:128]!
+	vst4.8	{d0, d1, d2, d3}, [r4:256]!
+
+@ CHECK: vst4.8 {d0, d1, d2, d3}, [r4]! @ encoding: [0x04,0xf9,0x0d,0x00]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst4.8  {d0, d1, d2, d3}, [r4:16]!
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst4.8  {d0, d1, d2, d3}, [r4:32]!
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vst4.8 {d0, d1, d2, d3}, [r4:64]! @ encoding: [0x04,0xf9,0x1d,0x00]
+@ CHECK: vst4.8 {d0, d1, d2, d3}, [r4:128]! @ encoding: [0x04,0xf9,0x2d,0x00]
+@ CHECK: vst4.8 {d0, d1, d2, d3}, [r4:256]! @ encoding: [0x04,0xf9,0x3d,0x00]
+
+	vst4.8	{d0, d1, d2, d3}, [r4], r6
+	vst4.8	{d0, d1, d2, d3}, [r4:16], r6
+	vst4.8	{d0, d1, d2, d3}, [r4:32], r6
+	vst4.8	{d0, d1, d2, d3}, [r4:64], r6
+	vst4.8	{d0, d1, d2, d3}, [r4:128], r6
+	vst4.8	{d0, d1, d2, d3}, [r4:256], r6
+
+@ CHECK: vst4.8 {d0, d1, d2, d3}, [r4], r6 @ encoding: [0x04,0xf9,0x06,0x00]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst4.8  {d0, d1, d2, d3}, [r4:16], r6
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst4.8  {d0, d1, d2, d3}, [r4:32], r6
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vst4.8 {d0, d1, d2, d3}, [r4:64], r6 @ encoding: [0x04,0xf9,0x16,0x00]
+@ CHECK: vst4.8 {d0, d1, d2, d3}, [r4:128], r6 @ encoding: [0x04,0xf9,0x26,0x00]
+@ CHECK: vst4.8 {d0, d1, d2, d3}, [r4:256], r6 @ encoding: [0x04,0xf9,0x36,0x00]
+
+	vst4.8	{d0, d2, d4, d6}, [r4]
+	vst4.8	{d0, d2, d4, d6}, [r4:16]
+	vst4.8	{d0, d2, d4, d6}, [r4:32]
+	vst4.8	{d0, d2, d4, d6}, [r4:64]
+	vst4.8	{d0, d2, d4, d6}, [r4:128]
+	vst4.8	{d0, d2, d4, d6}, [r4:256]
+
+@ CHECK: vst4.8 {d0, d2, d4, d6}, [r4]  @ encoding: [0x04,0xf9,0x0f,0x01]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst4.8  {d0, d2, d4, d6}, [r4:16]
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst4.8  {d0, d2, d4, d6}, [r4:32]
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vst4.8 {d0, d2, d4, d6}, [r4:64] @ encoding: [0x04,0xf9,0x1f,0x01]
+@ CHECK: vst4.8 {d0, d2, d4, d6}, [r4:128] @ encoding: [0x04,0xf9,0x2f,0x01]
+@ CHECK: vst4.8 {d0, d2, d4, d6}, [r4:256] @ encoding: [0x04,0xf9,0x3f,0x01]
+
+	vst4.8	{d0, d2, d4, d6}, [r4]!
+	vst4.8	{d0, d2, d4, d6}, [r4:16]!
+	vst4.8	{d0, d2, d4, d6}, [r4:32]!
+	vst4.8	{d0, d2, d4, d6}, [r4:64]!
+	vst4.8	{d0, d2, d4, d6}, [r4:128]!
+	vst4.8	{d0, d2, d4, d6}, [r4:256]!
+
+@ CHECK: vst4.8 {d0, d2, d4, d6}, [r4]! @ encoding: [0x04,0xf9,0x0d,0x01]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst4.8  {d0, d2, d4, d6}, [r4:16]!
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst4.8  {d0, d2, d4, d6}, [r4:32]!
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vst4.8 {d0, d2, d4, d6}, [r4:64]! @ encoding: [0x04,0xf9,0x1d,0x01]
+@ CHECK: vst4.8 {d0, d2, d4, d6}, [r4:128]! @ encoding: [0x04,0xf9,0x2d,0x01]
+@ CHECK: vst4.8 {d0, d2, d4, d6}, [r4:256]! @ encoding: [0x04,0xf9,0x3d,0x01]
+
+	vst4.8	{d0, d2, d4, d6}, [r4], r6
+	vst4.8	{d0, d2, d4, d6}, [r4:16], r6
+	vst4.8	{d0, d2, d4, d6}, [r4:32], r6
+	vst4.8	{d0, d2, d4, d6}, [r4:64], r6
+	vst4.8	{d0, d2, d4, d6}, [r4:128], r6
+	vst4.8	{d0, d2, d4, d6}, [r4:256], r6
+
+@ CHECK: vst4.8 {d0, d2, d4, d6}, [r4], r6 @ encoding: [0x04,0xf9,0x06,0x01]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst4.8  {d0, d2, d4, d6}, [r4:16], r6
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst4.8  {d0, d2, d4, d6}, [r4:32], r6
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vst4.8 {d0, d2, d4, d6}, [r4:64], r6 @ encoding: [0x04,0xf9,0x16,0x01]
+@ CHECK: vst4.8 {d0, d2, d4, d6}, [r4:128], r6 @ encoding: [0x04,0xf9,0x26,0x01]
+@ CHECK: vst4.8 {d0, d2, d4, d6}, [r4:256], r6 @ encoding: [0x04,0xf9,0x36,0x01]
+
+	vst4.8	{d0[1], d1[1], d2[1], d3[1]}, [r4]
+	vst4.8	{d0[1], d1[1], d2[1], d3[1]}, [r4:16]
+	vst4.8	{d0[1], d1[1], d2[1], d3[1]}, [r4:32]
+	vst4.8	{d0[1], d1[1], d2[1], d3[1]}, [r4:64]
+	vst4.8	{d0[1], d1[1], d2[1], d3[1]}, [r4:128]
+	vst4.8	{d0[1], d1[1], d2[1], d3[1]}, [r4:256]
+
+@ CHECK: vst4.8 {d0[1], d1[1], d2[1], d3[1]}, [r4] @ encoding: [0x84,0xf9,0x2f,0x03]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vst4.8  {d0[1], d1[1], d2[1], d3[1]}, [r4:16]
+@ CHECK-ERRORS:                                                   ^
+@ CHECK: vst4.8 {d0[1], d1[1], d2[1], d3[1]}, [r4:32] @ encoding: [0x84,0xf9,0x3f,0x03]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vst4.8  {d0[1], d1[1], d2[1], d3[1]}, [r4:64]
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vst4.8  {d0[1], d1[1], d2[1], d3[1]}, [r4:128]
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vst4.8  {d0[1], d1[1], d2[1], d3[1]}, [r4:256]
+@ CHECK-ERRORS:                                                   ^
+
+	vst4.8	{d0[1], d1[1], d2[1], d3[1]}, [r4]!
+	vst4.8	{d0[1], d1[1], d2[1], d3[1]}, [r4:16]!
+	vst4.8	{d0[1], d1[1], d2[1], d3[1]}, [r4:32]!
+	vst4.8	{d0[1], d1[1], d2[1], d3[1]}, [r4:64]!
+	vst4.8	{d0[1], d1[1], d2[1], d3[1]}, [r4:128]!
+	vst4.8	{d0[1], d1[1], d2[1], d3[1]}, [r4:256]!
+
+@ CHECK: vst4.8 {d0[1], d1[1], d2[1], d3[1]}, [r4]! @ encoding: [0x84,0xf9,0x2d,0x03]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vst4.8  {d0[1], d1[1], d2[1], d3[1]}, [r4:16]!
+@ CHECK-ERRORS:                                                   ^
+@ CHECK: vst4.8 {d0[1], d1[1], d2[1], d3[1]}, [r4:32]! @ encoding: [0x84,0xf9,0x3d,0x03]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vst4.8  {d0[1], d1[1], d2[1], d3[1]}, [r4:64]!
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vst4.8  {d0[1], d1[1], d2[1], d3[1]}, [r4:128]!
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vst4.8  {d0[1], d1[1], d2[1], d3[1]}, [r4:256]!
+@ CHECK-ERRORS:                                                   ^
+
+	vst4.8	{d0[1], d1[1], d2[1], d3[1]}, [r4], r6
+	vst4.8	{d0[1], d1[1], d2[1], d3[1]}, [r4:16], r6
+	vst4.8	{d0[1], d1[1], d2[1], d3[1]}, [r4:32], r6
+	vst4.8	{d0[1], d1[1], d2[1], d3[1]}, [r4:64], r6
+	vst4.8	{d0[1], d1[1], d2[1], d3[1]}, [r4:128], r6
+	vst4.8	{d0[1], d1[1], d2[1], d3[1]}, [r4:256], r6
+
+@ CHECK: vst4.8 {d0[1], d1[1], d2[1], d3[1]}, [r4], r6 @ encoding: [0x84,0xf9,0x26,0x03]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vst4.8  {d0[1], d1[1], d2[1], d3[1]}, [r4:16], r6
+@ CHECK-ERRORS:                                                   ^
+@ CHECK: vst4.8 {d0[1], d1[1], d2[1], d3[1]}, [r4:32], r6 @ encoding: [0x84,0xf9,0x36,0x03]
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vst4.8  {d0[1], d1[1], d2[1], d3[1]}, [r4:64], r6
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vst4.8  {d0[1], d1[1], d2[1], d3[1]}, [r4:128], r6
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 32 or omitted
+@ CHECK-ERRORS:         vst4.8  {d0[1], d1[1], d2[1], d3[1]}, [r4:256], r6
+@ CHECK-ERRORS:                                                   ^
+
+	vst4.16	{d0, d1, d2, d3}, [r4]
+	vst4.16	{d0, d1, d2, d3}, [r4:16]
+	vst4.16	{d0, d1, d2, d3}, [r4:32]
+	vst4.16	{d0, d1, d2, d3}, [r4:64]
+	vst4.16	{d0, d1, d2, d3}, [r4:128]
+	vst4.16	{d0, d1, d2, d3}, [r4:256]
+
+@ CHECK: vst4.16 {d0, d1, d2, d3}, [r4]  @ encoding: [0x04,0xf9,0x4f,0x00]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst4.16 {d0, d1, d2, d3}, [r4:16]
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst4.16 {d0, d1, d2, d3}, [r4:32]
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vst4.16 {d0, d1, d2, d3}, [r4:64] @ encoding: [0x04,0xf9,0x5f,0x00]
+@ CHECK: vst4.16 {d0, d1, d2, d3}, [r4:128] @ encoding: [0x04,0xf9,0x6f,0x00]
+@ CHECK: vst4.16 {d0, d1, d2, d3}, [r4:256] @ encoding: [0x04,0xf9,0x7f,0x00]
+
+	vst4.16	{d0, d1, d2, d3}, [r4]!
+	vst4.16	{d0, d1, d2, d3}, [r4:16]!
+	vst4.16	{d0, d1, d2, d3}, [r4:32]!
+	vst4.16	{d0, d1, d2, d3}, [r4:64]!
+	vst4.16	{d0, d1, d2, d3}, [r4:128]!
+	vst4.16	{d0, d1, d2, d3}, [r4:256]!
+
+@ CHECK: vst4.16 {d0, d1, d2, d3}, [r4]! @ encoding: [0x04,0xf9,0x4d,0x00]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst4.16 {d0, d1, d2, d3}, [r4:16]!
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst4.16 {d0, d1, d2, d3}, [r4:32]!
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vst4.16 {d0, d1, d2, d3}, [r4:64]! @ encoding: [0x04,0xf9,0x5d,0x00]
+@ CHECK: vst4.16 {d0, d1, d2, d3}, [r4:128]! @ encoding: [0x04,0xf9,0x6d,0x00]
+@ CHECK: vst4.16 {d0, d1, d2, d3}, [r4:256]! @ encoding: [0x04,0xf9,0x7d,0x00]
+
+	vst4.16	{d0, d1, d2, d3}, [r4], r6
+	vst4.16	{d0, d1, d2, d3}, [r4:16], r6
+	vst4.16	{d0, d1, d2, d3}, [r4:32], r6
+	vst4.16	{d0, d1, d2, d3}, [r4:64], r6
+	vst4.16	{d0, d1, d2, d3}, [r4:128], r6
+	vst4.16	{d0, d1, d2, d3}, [r4:256], r6
+
+@ CHECK: vst4.16 {d0, d1, d2, d3}, [r4], r6 @ encoding: [0x04,0xf9,0x46,0x00]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst4.16 {d0, d1, d2, d3}, [r4:16], r6
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst4.16 {d0, d1, d2, d3}, [r4:32], r6
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vst4.16 {d0, d1, d2, d3}, [r4:64], r6 @ encoding: [0x04,0xf9,0x56,0x00]
+@ CHECK: vst4.16 {d0, d1, d2, d3}, [r4:128], r6 @ encoding: [0x04,0xf9,0x66,0x00]
+@ CHECK: vst4.16 {d0, d1, d2, d3}, [r4:256], r6 @ encoding: [0x04,0xf9,0x76,0x00]
+
+	vst4.16	{d0, d2, d4, d6}, [r4]
+	vst4.16	{d0, d2, d4, d6}, [r4:16]
+	vst4.16	{d0, d2, d4, d6}, [r4:32]
+	vst4.16	{d0, d2, d4, d6}, [r4:64]
+	vst4.16	{d0, d2, d4, d6}, [r4:128]
+	vst4.16	{d0, d2, d4, d6}, [r4:256]
+
+@ CHECK: vst4.16 {d0, d2, d4, d6}, [r4]  @ encoding: [0x04,0xf9,0x4f,0x01]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst4.16 {d0, d2, d4, d6}, [r4:16]
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst4.16 {d0, d2, d4, d6}, [r4:32]
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vst4.16 {d0, d2, d4, d6}, [r4:64] @ encoding: [0x04,0xf9,0x5f,0x01]
+@ CHECK: vst4.16 {d0, d2, d4, d6}, [r4:128] @ encoding: [0x04,0xf9,0x6f,0x01]
+@ CHECK: vst4.16 {d0, d2, d4, d6}, [r4:256] @ encoding: [0x04,0xf9,0x7f,0x01]
+
+	vst4.16	{d0, d2, d4, d6}, [r4]!
+	vst4.16	{d0, d2, d4, d6}, [r4:16]!
+	vst4.16	{d0, d2, d4, d6}, [r4:32]!
+	vst4.16	{d0, d2, d4, d6}, [r4:64]!
+	vst4.16	{d0, d2, d4, d6}, [r4:128]!
+	vst4.16	{d0, d2, d4, d6}, [r4:256]!
+
+@ CHECK: vst4.16 {d0, d2, d4, d6}, [r4]! @ encoding: [0x04,0xf9,0x4d,0x01]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst4.16 {d0, d2, d4, d6}, [r4:16]!
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst4.16 {d0, d2, d4, d6}, [r4:32]!
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vst4.16 {d0, d2, d4, d6}, [r4:64]! @ encoding: [0x04,0xf9,0x5d,0x01]
+@ CHECK: vst4.16 {d0, d2, d4, d6}, [r4:128]! @ encoding: [0x04,0xf9,0x6d,0x01]
+@ CHECK: vst4.16 {d0, d2, d4, d6}, [r4:256]! @ encoding: [0x04,0xf9,0x7d,0x01]
+
+	vst4.16	{d0, d2, d4, d6}, [r4], r6
+	vst4.16	{d0, d2, d4, d6}, [r4:16], r6
+	vst4.16	{d0, d2, d4, d6}, [r4:32], r6
+	vst4.16	{d0, d2, d4, d6}, [r4:64], r6
+	vst4.16	{d0, d2, d4, d6}, [r4:128], r6
+	vst4.16	{d0, d2, d4, d6}, [r4:256], r6
+
+@ CHECK: vst4.16 {d0, d2, d4, d6}, [r4], r6 @ encoding: [0x04,0xf9,0x46,0x01]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst4.16 {d0, d2, d4, d6}, [r4:16], r6
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst4.16 {d0, d2, d4, d6}, [r4:32], r6
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vst4.16 {d0, d2, d4, d6}, [r4:64], r6 @ encoding: [0x04,0xf9,0x56,0x01]
+@ CHECK: vst4.16 {d0, d2, d4, d6}, [r4:128], r6 @ encoding: [0x04,0xf9,0x66,0x01]
+@ CHECK: vst4.16 {d0, d2, d4, d6}, [r4:256], r6 @ encoding: [0x04,0xf9,0x76,0x01]
+
+	vst4.16	{d0[1], d1[1], d2[1], d3[1]}, [r4]
+	vst4.16	{d0[1], d1[1], d2[1], d3[1]}, [r4:16]
+	vst4.16	{d0[1], d1[1], d2[1], d3[1]}, [r4:32]
+	vst4.16	{d0[1], d1[1], d2[1], d3[1]}, [r4:64]
+	vst4.16	{d0[1], d1[1], d2[1], d3[1]}, [r4:128]
+	vst4.16	{d0[1], d1[1], d2[1], d3[1]}, [r4:256]
+
+@ CHECK: vst4.16 {d0[1], d1[1], d2[1], d3[1]}, [r4] @ encoding: [0x84,0xf9,0x4f,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst4.16 {d0[1], d1[1], d2[1], d3[1]}, [r4:16]
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst4.16 {d0[1], d1[1], d2[1], d3[1]}, [r4:32]
+@ CHECK-ERRORS:                                                   ^
+@ CHECK: vst4.16 {d0[1], d1[1], d2[1], d3[1]}, [r4:64] @ encoding: [0x84,0xf9,0x5f,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst4.16 {d0[1], d1[1], d2[1], d3[1]}, [r4:128]
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst4.16 {d0[1], d1[1], d2[1], d3[1]}, [r4:256]
+@ CHECK-ERRORS:                                                   ^
+
+	vst4.16	{d0[1], d1[1], d2[1], d3[1]}, [r4]!
+	vst4.16	{d0[1], d1[1], d2[1], d3[1]}, [r4:16]!
+	vst4.16	{d0[1], d1[1], d2[1], d3[1]}, [r4:32]!
+	vst4.16	{d0[1], d1[1], d2[1], d3[1]}, [r4:64]!
+	vst4.16	{d0[1], d1[1], d2[1], d3[1]}, [r4:128]!
+	vst4.16	{d0[1], d1[1], d2[1], d3[1]}, [r4:256]!
+
+@ CHECK: vst4.16 {d0[1], d1[1], d2[1], d3[1]}, [r4]! @ encoding: [0x84,0xf9,0x4d,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst4.16 {d0[1], d1[1], d2[1], d3[1]}, [r4:16]!
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst4.16 {d0[1], d1[1], d2[1], d3[1]}, [r4:32]!
+@ CHECK-ERRORS:                                                   ^
+@ CHECK: vst4.16 {d0[1], d1[1], d2[1], d3[1]}, [r4:64]! @ encoding: [0x84,0xf9,0x5d,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst4.16 {d0[1], d1[1], d2[1], d3[1]}, [r4:128]!
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst4.16 {d0[1], d1[1], d2[1], d3[1]}, [r4:256]!
+@ CHECK-ERRORS:                                                   ^
+
+	vst4.16	{d0[1], d1[1], d2[1], d3[1]}, [r4], r6
+	vst4.16	{d0[1], d1[1], d2[1], d3[1]}, [r4:16], r6
+	vst4.16	{d0[1], d1[1], d2[1], d3[1]}, [r4:32], r6
+	vst4.16	{d0[1], d1[1], d2[1], d3[1]}, [r4:64], r6
+	vst4.16	{d0[1], d1[1], d2[1], d3[1]}, [r4:128], r6
+	vst4.16	{d0[1], d1[1], d2[1], d3[1]}, [r4:256], r6
+
+@ CHECK: vst4.16 {d0[1], d1[1], d2[1], d3[1]}, [r4], r6 @ encoding: [0x84,0xf9,0x46,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst4.16 {d0[1], d1[1], d2[1], d3[1]}, [r4:16], r6
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst4.16 {d0[1], d1[1], d2[1], d3[1]}, [r4:32], r6
+@ CHECK-ERRORS:                                                   ^
+@ CHECK: vst4.16 {d0[1], d1[1], d2[1], d3[1]}, [r4:64], r6 @ encoding: [0x84,0xf9,0x56,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst4.16 {d0[1], d1[1], d2[1], d3[1]}, [r4:128], r6
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst4.16 {d0[1], d1[1], d2[1], d3[1]}, [r4:256], r6
+@ CHECK-ERRORS:                                                   ^
+
+	vst4.16	{d0[1], d2[1], d4[1], d6[1]}, [r4]
+	vst4.16	{d0[1], d2[1], d4[1], d6[1]}, [r4:16]
+	vst4.16	{d0[1], d2[1], d4[1], d6[1]}, [r4:32]
+	vst4.16	{d0[1], d2[1], d4[1], d6[1]}, [r4:64]
+	vst4.16	{d0[1], d2[1], d4[1], d6[1]}, [r4:128]
+	vst4.16	{d0[1], d2[1], d4[1], d6[1]}, [r4:256]
+
+@ CHECK: vst4.16 {d0[1], d2[1], d4[1], d6[1]}, [r4] @ encoding: [0x84,0xf9,0x6f,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst4.16 {d0[1], d2[1], d4[1], d6[1]}, [r4:16]
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst4.16 {d0[1], d2[1], d4[1], d6[1]}, [r4:32]
+@ CHECK-ERRORS:                                                   ^
+@ CHECK: vst4.16 {d0[1], d2[1], d4[1], d6[1]}, [r4:64] @ encoding: [0x84,0xf9,0x7f,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst4.16 {d0[1], d2[1], d4[1], d6[1]}, [r4:128]
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst4.16 {d0[1], d2[1], d4[1], d6[1]}, [r4:256]
+@ CHECK-ERRORS:                                                   ^
+
+	vst4.16	{d0[1], d2[1], d4[1], d6[1]}, [r4]!
+	vst4.16	{d0[1], d2[1], d4[1], d6[1]}, [r4:16]!
+	vst4.16	{d0[1], d2[1], d4[1], d6[1]}, [r4:32]!
+	vst4.16	{d0[1], d2[1], d4[1], d6[1]}, [r4:64]!
+	vst4.16	{d0[1], d2[1], d4[1], d6[1]}, [r4:128]!
+	vst4.16	{d0[1], d2[1], d4[1], d6[1]}, [r4:256]!
+
+@ CHECK: vst4.16 {d0[1], d1[1], d2[1], d3[1]}, [r4]! @ encoding: [0x84,0xf9,0x6d,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst4.16 {d0[1], d2[1], d4[1], d6[1]}, [r4:16]!
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst4.16 {d0[1], d2[1], d4[1], d6[1]}, [r4:32]!
+@ CHECK-ERRORS:                                                   ^
+@ CHECK: vst4.16 {d0[1], d1[1], d2[1], d3[1]}, [r4:64]! @ encoding: [0x84,0xf9,0x7d,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst4.16 {d0[1], d2[1], d4[1], d6[1]}, [r4:128]!
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst4.16 {d0[1], d2[1], d4[1], d6[1]}, [r4:256]!
+@ CHECK-ERRORS:                                                   ^
+
+	vst4.16	{d0[1], d2[1], d4[1], d6[1]}, [r4], r6
+	vst4.16	{d0[1], d2[1], d4[1], d6[1]}, [r4:16], r6
+	vst4.16	{d0[1], d2[1], d4[1], d6[1]}, [r4:32], r6
+	vst4.16	{d0[1], d2[1], d4[1], d6[1]}, [r4:64], r6
+	vst4.16	{d0[1], d2[1], d4[1], d6[1]}, [r4:128], r6
+	vst4.16	{d0[1], d2[1], d4[1], d6[1]}, [r4:256], r6
+
+@ CHECK: vst4.16 {d0[1], d2[1], d4[1], d6[1]}, [r4], r6 @ encoding: [0x84,0xf9,0x66,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst4.16 {d0[1], d2[1], d4[1], d6[1]}, [r4:16], r6
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst4.16 {d0[1], d2[1], d4[1], d6[1]}, [r4:32], r6
+@ CHECK-ERRORS:                                                   ^
+@ CHECK: vst4.16 {d0[1], d2[1], d4[1], d6[1]}, [r4:64], r6 @ encoding: [0x84,0xf9,0x76,0x07]
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst4.16 {d0[1], d2[1], d4[1], d6[1]}, [r4:128], r6
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 64 or omitted
+@ CHECK-ERRORS:         vst4.16 {d0[1], d2[1], d4[1], d6[1]}, [r4:256], r6
+@ CHECK-ERRORS:                                                   ^
+
+	vst4.32	{d0, d1, d2, d3}, [r4]
+	vst4.32	{d0, d1, d2, d3}, [r4:16]
+	vst4.32	{d0, d1, d2, d3}, [r4:32]
+	vst4.32	{d0, d1, d2, d3}, [r4:64]
+	vst4.32	{d0, d1, d2, d3}, [r4:128]
+	vst4.32	{d0, d1, d2, d3}, [r4:256]
+
+@ CHECK: vst4.32 {d0, d1, d2, d3}, [r4]  @ encoding: [0x04,0xf9,0x8f,0x00]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst4.32 {d0, d1, d2, d3}, [r4:16]
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst4.32 {d0, d1, d2, d3}, [r4:32]
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vst4.32 {d0, d1, d2, d3}, [r4:64] @ encoding: [0x04,0xf9,0x9f,0x00]
+@ CHECK: vst4.32 {d0, d1, d2, d3}, [r4:128] @ encoding: [0x04,0xf9,0xaf,0x00]
+@ CHECK: vst4.32 {d0, d1, d2, d3}, [r4:256] @ encoding: [0x04,0xf9,0xbf,0x00]
+
+	vst4.32	{d0, d1, d2, d3}, [r4]!
+	vst4.32	{d0, d1, d2, d3}, [r4:16]!
+	vst4.32	{d0, d1, d2, d3}, [r4:32]!
+	vst4.32	{d0, d1, d2, d3}, [r4:64]!
+	vst4.32	{d0, d1, d2, d3}, [r4:128]!
+	vst4.32	{d0, d1, d2, d3}, [r4:256]!
+
+@ CHECK: vst4.32 {d0, d1, d2, d3}, [r4]! @ encoding: [0x04,0xf9,0x8d,0x00]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst4.32 {d0, d1, d2, d3}, [r4:16]!
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst4.32 {d0, d1, d2, d3}, [r4:32]!
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vst4.32 {d0, d1, d2, d3}, [r4:64]! @ encoding: [0x04,0xf9,0x9d,0x00]
+@ CHECK: vst4.32 {d0, d1, d2, d3}, [r4:128]! @ encoding: [0x04,0xf9,0xad,0x00]
+@ CHECK: vst4.32 {d0, d1, d2, d3}, [r4:256]! @ encoding: [0x04,0xf9,0xbd,0x00]
+
+	vst4.32	{d0, d1, d2, d3}, [r4], r6
+	vst4.32	{d0, d1, d2, d3}, [r4:16], r6
+	vst4.32	{d0, d1, d2, d3}, [r4:32], r6
+	vst4.32	{d0, d1, d2, d3}, [r4:64], r6
+	vst4.32	{d0, d1, d2, d3}, [r4:128], r6
+	vst4.32	{d0, d1, d2, d3}, [r4:256], r6
+
+@ CHECK: vst4.32 {d0, d1, d2, d3}, [r4], r6 @ encoding: [0x04,0xf9,0x86,0x00]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst4.32 {d0, d1, d2, d3}, [r4:16], r6
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst4.32 {d0, d1, d2, d3}, [r4:32], r6
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vst4.32 {d0, d1, d2, d3}, [r4:64], r6 @ encoding: [0x04,0xf9,0x96,0x00]
+@ CHECK: vst4.32 {d0, d1, d2, d3}, [r4:128], r6 @ encoding: [0x04,0xf9,0xa6,0x00]
+@ CHECK: vst4.32 {d0, d1, d2, d3}, [r4:256], r6 @ encoding: [0x04,0xf9,0xb6,0x00]
+
+	vst4.32	{d0, d2, d4, d6}, [r4]
+	vst4.32	{d0, d2, d4, d6}, [r4:16]
+	vst4.32	{d0, d2, d4, d6}, [r4:32]
+	vst4.32	{d0, d2, d4, d6}, [r4:64]
+	vst4.32	{d0, d2, d4, d6}, [r4:128]
+	vst4.32	{d0, d2, d4, d6}, [r4:256]
+
+@ CHECK: vst4.32 {d0, d2, d4, d6}, [r4]  @ encoding: [0x04,0xf9,0x8f,0x01]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst4.32 {d0, d2, d4, d6}, [r4:16]
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst4.32 {d0, d2, d4, d6}, [r4:32]
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vst4.32 {d0, d2, d4, d6}, [r4:64] @ encoding: [0x04,0xf9,0x9f,0x01]
+@ CHECK: vst4.32 {d0, d2, d4, d6}, [r4:128] @ encoding: [0x04,0xf9,0xaf,0x01]
+@ CHECK: vst4.32 {d0, d2, d4, d6}, [r4:256] @ encoding: [0x04,0xf9,0xbf,0x01]
+
+	vst4.32	{d0, d2, d4, d6}, [r4]!
+	vst4.32	{d0, d2, d4, d6}, [r4:16]!
+	vst4.32	{d0, d2, d4, d6}, [r4:32]!
+	vst4.32	{d0, d2, d4, d6}, [r4:64]!
+	vst4.32	{d0, d2, d4, d6}, [r4:128]!
+	vst4.32	{d0, d2, d4, d6}, [r4:256]!
+
+@ CHECK: vst4.32 {d0, d2, d4, d6}, [r4]! @ encoding: [0x04,0xf9,0x8d,0x01]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst4.32 {d0, d2, d4, d6}, [r4:16]!
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst4.32 {d0, d2, d4, d6}, [r4:32]!
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vst4.32 {d0, d2, d4, d6}, [r4:64]! @ encoding: [0x04,0xf9,0x9d,0x01]
+@ CHECK: vst4.32 {d0, d2, d4, d6}, [r4:128]! @ encoding: [0x04,0xf9,0xad,0x01]
+@ CHECK: vst4.32 {d0, d2, d4, d6}, [r4:256]! @ encoding: [0x04,0xf9,0xbd,0x01]
+
+	vst4.32	{d0, d2, d4, d6}, [r4], r6
+	vst4.32	{d0, d2, d4, d6}, [r4:16], r6
+	vst4.32	{d0, d2, d4, d6}, [r4:32], r6
+	vst4.32	{d0, d2, d4, d6}, [r4:64], r6
+	vst4.32	{d0, d2, d4, d6}, [r4:128], r6
+	vst4.32	{d0, d2, d4, d6}, [r4:256], r6
+
+@ CHECK: vst4.32 {d0, d2, d4, d6}, [r4], r6 @ encoding: [0x04,0xf9,0x86,0x01]
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst4.32 {d0, d2, d4, d6}, [r4:16], r6
+@ CHECK-ERRORS:                                       ^
+@ CHECK-ERRORS: error: alignment must be 64, 128, 256 or omitted
+@ CHECK-ERRORS:         vst4.32 {d0, d2, d4, d6}, [r4:32], r6
+@ CHECK-ERRORS:                                       ^
+@ CHECK: vst4.32 {d0, d2, d4, d6}, [r4:64], r6 @ encoding: [0x04,0xf9,0x96,0x01]
+@ CHECK: vst4.32 {d0, d2, d4, d6}, [r4:128], r6 @ encoding: [0x04,0xf9,0xa6,0x01]
+@ CHECK: vst4.32 {d0, d2, d4, d6}, [r4:256], r6 @ encoding: [0x04,0xf9,0xb6,0x01]
+
+	vst4.32	{d0[1], d1[1], d2[1], d3[1]}, [r4]
+	vst4.32	{d0[1], d1[1], d2[1], d3[1]}, [r4:16]
+	vst4.32	{d0[1], d1[1], d2[1], d3[1]}, [r4:32]
+	vst4.32	{d0[1], d1[1], d2[1], d3[1]}, [r4:64]
+	vst4.32	{d0[1], d1[1], d2[1], d3[1]}, [r4:128]
+	vst4.32	{d0[1], d1[1], d2[1], d3[1]}, [r4:256]
+
+@ CHECK: vst4.32 {d0[1], d1[1], d2[1], d3[1]}, [r4] @ encoding: [0x84,0xf9,0x8f,0x0b]
+@ CHECK-ERRORS:         vst4.32 {d0[1], d1[1], d2[1], d3[1]}, [r4:16]
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst4.32 {d0[1], d1[1], d2[1], d3[1]}, [r4:32]
+@ CHECK-ERRORS:                                                   ^
+@ CHECK: vst4.32 {d0[1], d1[1], d2[1], d3[1]}, [r4:64] @ encoding: [0x84,0xf9,0x9f,0x0b]
+@ CHECK: vst4.32 {d0[1], d1[1], d2[1], d3[1]}, [r4:128] @ encoding: [0x84,0xf9,0xaf,0x0b]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst4.32 {d0[1], d1[1], d2[1], d3[1]}, [r4:256]
+@ CHECK-ERRORS:                                                   ^
+
+	vst4.32	{d0[1], d1[1], d2[1], d3[1]}, [r4]!
+	vst4.32	{d0[1], d1[1], d2[1], d3[1]}, [r4:16]!
+	vst4.32	{d0[1], d1[1], d2[1], d3[1]}, [r4:32]!
+	vst4.32	{d0[1], d1[1], d2[1], d3[1]}, [r4:64]!
+	vst4.32	{d0[1], d1[1], d2[1], d3[1]}, [r4:128]!
+	vst4.32	{d0[1], d1[1], d2[1], d3[1]}, [r4:256]!
+
+@ CHECK: vst4.32 {d0[1], d1[1], d2[1], d3[1]}, [r4]! @ encoding: [0x84,0xf9,0x8d,0x0b]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst4.32 {d0[1], d1[1], d2[1], d3[1]}, [r4:16]!
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst4.32 {d0[1], d1[1], d2[1], d3[1]}, [r4:32]!
+@ CHECK-ERRORS:                                                   ^
+@ CHECK: vst4.32 {d0[1], d1[1], d2[1], d3[1]}, [r4:64]! @ encoding: [0x84,0xf9,0x9d,0x0b]
+@ CHECK: vst4.32 {d0[1], d1[1], d2[1], d3[1]}, [r4:128]! @ encoding: [0x84,0xf9,0xad,0x0b]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst4.32 {d0[1], d1[1], d2[1], d3[1]}, [r4:256]!
+@ CHECK-ERRORS:                                                   ^
+
+	vst4.32	{d0[1], d1[1], d2[1], d3[1]}, [r4], r6
+	vst4.32	{d0[1], d1[1], d2[1], d3[1]}, [r4:16], r6
+	vst4.32	{d0[1], d1[1], d2[1], d3[1]}, [r4:32], r6
+	vst4.32	{d0[1], d1[1], d2[1], d3[1]}, [r4:64], r6
+	vst4.32	{d0[1], d1[1], d2[1], d3[1]}, [r4:128], r6
+	vst4.32	{d0[1], d1[1], d2[1], d3[1]}, [r4:256], r6
+
+@ CHECK: vst4.32 {d0[1], d1[1], d2[1], d3[1]}, [r4], r6 @ encoding: [0x84,0xf9,0x86,0x0b]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst4.32 {d0[1], d1[1], d2[1], d3[1]}, [r4:16], r6
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst4.32 {d0[1], d1[1], d2[1], d3[1]}, [r4:32], r6
+@ CHECK-ERRORS:                                                   ^
+@ CHECK: vst4.32 {d0[1], d1[1], d2[1], d3[1]}, [r4:64], r6 @ encoding: [0x84,0xf9,0x96,0x0b]
+@ CHECK: vst4.32 {d0[1], d1[1], d2[1], d3[1]}, [r4:128], r6 @ encoding: [0x84,0xf9,0xa6,0x0b]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst4.32 {d0[1], d1[1], d2[1], d3[1]}, [r4:256], r6
+@ CHECK-ERRORS:                                                   ^
+
+	vst4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4]
+	vst4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4:16]
+	vst4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4:32]
+	vst4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4:64]
+	vst4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4:128]
+	vst4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4:256]
+
+@ CHECK: vst4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4] @ encoding: [0x84,0xf9,0xcf,0x0b]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4:16]
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4:32]
+@ CHECK-ERRORS:                                                   ^
+@ CHECK: vst4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4:64] @ encoding: [0x84,0xf9,0xdf,0x0b]
+@ CHECK: vst4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4:128] @ encoding: [0x84,0xf9,0xef,0x0b]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4:256]
+@ CHECK-ERRORS:                                                   ^
+
+	vst4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4]!
+	vst4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4:16]!
+	vst4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4:32]!
+	vst4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4:64]!
+	vst4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4:128]!
+	vst4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4:256]!
+
+@ CHECK: vst4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4]! @ encoding: [0x84,0xf9,0xcd,0x0b]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4:16]!
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4:32]!
+@ CHECK-ERRORS:                                                   ^
+@ CHECK: vst4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4:64]! @ encoding: [0x84,0xf9,0xdd,0x0b]
+@ CHECK: vst4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4:128]! @ encoding: [0x84,0xf9,0xed,0x0b]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4:256]!
+@ CHECK-ERRORS:                                                   ^
+
+	vst4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4], r6
+	vst4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4:16], r6
+	vst4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4:32], r6
+	vst4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4:64], r6
+	vst4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4:128], r6
+	vst4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4:256], r6
+
+@ CHECK: vst4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4], r6 @ encoding: [0x84,0xf9,0xc6,0x0b]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4:16], r6
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4:32], r6
+@ CHECK-ERRORS:                                                   ^
+@ CHECK: vst4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4:64], r6 @ encoding: [0x84,0xf9,0xd6,0x0b]
+@ CHECK: vst4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4:128], r6 @ encoding: [0x84,0xf9,0xe6,0x0b]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4:256], r6
+@ CHECK-ERRORS:                                                   ^
+
+	vst4.32	{d0[1], d1[1], d2[1], d3[1]}, [r4]!
+	vst4.32	{d0[1], d1[1], d2[1], d3[1]}, [r4:16]!
+	vst4.32	{d0[1], d1[1], d2[1], d3[1]}, [r4:32]!
+	vst4.32	{d0[1], d1[1], d2[1], d3[1]}, [r4:64]!
+	vst4.32	{d0[1], d1[1], d2[1], d3[1]}, [r4:128]!
+	vst4.32	{d0[1], d1[1], d2[1], d3[1]}, [r4:256]!
+
+@ CHECK: vst4.32 {d0[1], d1[1], d2[1], d3[1]}, [r4]! @ encoding: [0x84,0xf9,0x8d,0x0b]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst4.32 {d0[1], d1[1], d2[1], d3[1]}, [r4:16]!
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst4.32 {d0[1], d1[1], d2[1], d3[1]}, [r4:32]!
+@ CHECK-ERRORS:                                                   ^
+@ CHECK: vst4.32 {d0[1], d1[1], d2[1], d3[1]}, [r4:64]! @ encoding: [0x84,0xf9,0x9d,0x0b]
+@ CHECK: vst4.32 {d0[1], d1[1], d2[1], d3[1]}, [r4:128]! @ encoding: [0x84,0xf9,0xad,0x0b]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst4.32 {d0[1], d1[1], d2[1], d3[1]}, [r4:256]!
+@ CHECK-ERRORS:                                                   ^
+
+	vst4.32	{d0[1], d1[1], d2[1], d3[1]}, [r4], r6
+	vst4.32	{d0[1], d1[1], d2[1], d3[1]}, [r4:16], r6
+	vst4.32	{d0[1], d1[1], d2[1], d3[1]}, [r4:32], r6
+	vst4.32	{d0[1], d1[1], d2[1], d3[1]}, [r4:64], r6
+	vst4.32	{d0[1], d1[1], d2[1], d3[1]}, [r4:128], r6
+	vst4.32	{d0[1], d1[1], d2[1], d3[1]}, [r4:256], r6
+
+@ CHECK: vst4.32 {d0[1], d1[1], d2[1], d3[1]}, [r4], r6 @ encoding: [0x84,0xf9,0x86,0x0b]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst4.32 {d0[1], d1[1], d2[1], d3[1]}, [r4:16], r6
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst4.32 {d0[1], d1[1], d2[1], d3[1]}, [r4:32], r6
+@ CHECK-ERRORS:                                                   ^
+@ CHECK: vst4.32 {d0[1], d1[1], d2[1], d3[1]}, [r4:64], r6 @ encoding: [0x84,0xf9,0x96,0x0b]
+@ CHECK: vst4.32 {d0[1], d1[1], d2[1], d3[1]}, [r4:128], r6 @ encoding: [0x84,0xf9,0xa6,0x0b]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst4.32 {d0[1], d1[1], d2[1], d3[1]}, [r4:256], r6
+@ CHECK-ERRORS:                                                   ^
+
+	vst4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4]
+	vst4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4:16]
+	vst4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4:32]
+	vst4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4:64]
+	vst4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4:128]
+	vst4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4:256]
+
+@ CHECK: vst4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4] @ encoding: [0x84,0xf9,0xcf,0x0b]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4:16]
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4:32]
+@ CHECK-ERRORS:                                                   ^
+@ CHECK: vst4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4:64] @ encoding: [0x84,0xf9,0xdf,0x0b]
+@ CHECK: vst4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4:128] @ encoding: [0x84,0xf9,0xef,0x0b]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4:256]
+@ CHECK-ERRORS:                                                   ^
+
+	vst4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4]!
+	vst4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4:16]!
+	vst4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4:32]!
+	vst4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4:64]!
+	vst4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4:128]!
+	vst4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4:256]!
+
+@ CHECK: vst4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4]! @ encoding: [0x84,0xf9,0xcd,0x0b]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4:16]!
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4:32]!
+@ CHECK-ERRORS:                                                   ^
+@ CHECK: vst4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4:64]! @ encoding: [0x84,0xf9,0xdd,0x0b]
+@ CHECK: vst4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4:128]! @ encoding: [0x84,0xf9,0xed,0x0b]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4:256]!
+@ CHECK-ERRORS:                                                   ^
+
+	vst4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4], r6
+	vst4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4:16], r6
+	vst4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4:32], r6
+	vst4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4:64], r6
+	vst4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4:128], r6
+	vst4.32	{d0[1], d2[1], d4[1], d6[1]}, [r4:256], r6
+
+@ CHECK: vst4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4], r6 @ encoding: [0x84,0xf9,0xc6,0x0b]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4:16], r6
+@ CHECK-ERRORS:                                                   ^
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4:32], r6
+@ CHECK-ERRORS:                                                   ^
+@ CHECK: vst4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4:64], r6 @ encoding: [0x84,0xf9,0xd6,0x0b]
+@ CHECK: vst4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4:128], r6 @ encoding: [0x84,0xf9,0xe6,0x0b]
+@ CHECK-ERRORS: error: alignment must be 64, 128 or omitted
+@ CHECK-ERRORS:         vst4.32 {d0[1], d2[1], d4[1], d6[1]}, [r4:256], r6
+@ CHECK-ERRORS:                                                   ^
diff --git a/test/MC/ARM/pool.s b/test/MC/ARM/pool.s
index 926b4f1..782f67e 100644
--- a/test/MC/ARM/pool.s
+++ b/test/MC/ARM/pool.s
@@ -16,3 +16,4 @@ pool:
 @ CHECK-LABEL: .Ltmp0:
 @ CHECK: .long	3126770193
 
+
diff --git a/test/MC/ARM/symbol-variants.s b/test/MC/ARM/symbol-variants.s
index e1036a3..a10fe50 100644
--- a/test/MC/ARM/symbol-variants.s
+++ b/test/MC/ARM/symbol-variants.s
@@ -2,6 +2,7 @@
 @ RUN: llvm-mc < %s -triple thumbv7-none-linux-gnueabi -filetype=obj  | llvm-objdump -triple thumbv7-none-linux-gnueabi -r - | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB
 
 @ CHECK-LABEL: RELOCATION RECORDS FOR [.rel.text]
+.Lsym:
 
 @ empty
 .word f00
@@ -83,3 +84,8 @@ bl f05(plt)
 @ CHECK: 60 R_ARM_TLS_GOTDESC f24
 @ CHECK: 64 R_ARM_TLS_GOTDESC f25
 
+@ got_prel
+.word	f26(GOT_PREL) + (. - .Lsym)
+	ldr r3, =f27(GOT_PREL)
+@ CHECK: 68 R_ARM_GOT_PREL f26
+@ CHECK: 70 R_ARM_GOT_PREL f27
diff --git a/test/MC/ARM/thumb2-diagnostics.s b/test/MC/ARM/thumb2-diagnostics.s
index 6ac2db0..b2b14bc 100644
--- a/test/MC/ARM/thumb2-diagnostics.s
+++ b/test/MC/ARM/thumb2-diagnostics.s
@@ -70,3 +70,21 @@
 @ CHECK-ERRORS: error: branch target out of range
 @ CHECK-ERRORS: error: branch target out of range
 @ CHECK-ERRORS: error: branch target out of range
+
+foo2:
+        mov r0, foo2
+        movw r0, foo2
+        movt r0, foo2
+@ CHECK-ERRORS: error: immediate expression for mov requires :lower16: or :upper16
+@ CHECK-ERRORS:                 ^
+@ CHECK-ERRORS: error: immediate expression for mov requires :lower16: or :upper16
+@ CHECK-ERRORS:                  ^
+@ CHECK-ERRORS: error: immediate expression for mov requires :lower16: or :upper16
+@ CHECK-ERRORS:                  ^
+
+        and sp, r1, #80008000
+        and pc, r1, #80008000
+@ CHECK-ERRORS: error: invalid operand for instruction
+@ CHECK-ERRORS: error: invalid operand for instruction
+
+
diff --git a/test/MC/ARM/thumb2-strd.s b/test/MC/ARM/thumb2-strd.s
new file mode 100644
index 0000000..3f8025d
--- /dev/null
+++ b/test/MC/ARM/thumb2-strd.s
@@ -0,0 +1,10 @@
+@ RUN: not llvm-mc -triple=armv7-linux-gnueabi %s 2>&1 | FileCheck %s
+.text
+.thumb
+
+@ CHECK: error: invalid operand for instruction
+@ CHECK: error: invalid operand for instruction
+@ CHECK: error: invalid operand for instruction
+strd r12, SP, [r0, #256]
+strd r12, SP, [r0, #256]!
+strd r12, SP, [r0], #256
diff --git a/test/MC/ARM/thumb2be-b.w-encoding.s b/test/MC/ARM/thumb2be-b.w-encoding.s
new file mode 100644
index 0000000..2c3e31b
--- /dev/null
+++ b/test/MC/ARM/thumb2be-b.w-encoding.s
@@ -0,0 +1,9 @@
+@ RUN: llvm-mc -triple=thumbv7-none-linux-gnueabi -show-encoding < %s | FileCheck %s --check-prefix=CHECK-LE
+@ RUN: llvm-mc -triple=thumbebv7-none-linux-gnueabi -show-encoding < %s | FileCheck %s --check-prefix=CHECK-BE
+
+b.w   bar
+@ CHECK-LE: b.w	bar                     @ encoding: [A,0xf0'A',A,0x90'A']
+@ CHECK-LE-NEXT:                        @   fixup A - offset: 0, value: bar, kind: fixup_t2_uncondbranch
+@ CHECK-BE: b.w	bar                     @ encoding: [0xf0'A',A,0x90'A',A]
+@ CHECK-BE-NEXT:                        @   fixup A - offset: 0, value: bar, kind: fixup_t2_uncondbranch
+
diff --git a/test/MC/ARM/thumb2be-beq.w-encoding.s b/test/MC/ARM/thumb2be-beq.w-encoding.s
new file mode 100644
index 0000000..e39e541
--- /dev/null
+++ b/test/MC/ARM/thumb2be-beq.w-encoding.s
@@ -0,0 +1,9 @@
+@ RUN: llvm-mc -triple=thumbv7-none-linux-gnueabi -show-encoding < %s | FileCheck %s --check-prefix=CHECK-LE
+@ RUN: llvm-mc -triple=thumbebv7-none-linux-gnueabi -show-encoding < %s | FileCheck %s --check-prefix=CHECK-BE
+
+beq.w   bar
+@ CHECK-LE: beq.w	bar                     @ encoding: [A,0xf0'A',A,0x80'A']
+@ CHECK-LE-NEXT:                                @   fixup A - offset: 0, value: bar, kind: fixup_t2_condbranch
+@ CHECK-BE: beq.w	bar                     @ encoding: [0xf0'A',A,0x80'A',A]
+@ CHECK-BE-NEXT:                                @   fixup A - offset: 0, value: bar, kind: fixup_t2_condbranch
+
diff --git a/test/MC/ARM/thumb2be-movt-encoding.s b/test/MC/ARM/thumb2be-movt-encoding.s
new file mode 100644
index 0000000..cc6c04e
--- /dev/null
+++ b/test/MC/ARM/thumb2be-movt-encoding.s
@@ -0,0 +1,9 @@
+@ RUN: llvm-mc -triple=thumbv7-none-linux-gnueabi -show-encoding < %s | FileCheck %s --check-prefix=CHECK-LE
+@ RUN: llvm-mc -triple=thumbebv7-none-linux-gnueabi -show-encoding < %s | FileCheck %s --check-prefix=CHECK-BE
+
+movt r9, :upper16:(_bar)
+@ CHECK-LE: movt    r9, :upper16:_bar       @ encoding: [0xc0'A',0xf2'A',0b0000AAAA,0x09]
+@ CHECK-LE-NEXT:                            @   fixup A - offset: 0, value: _bar, kind: fixup_t2_movt_hi16
+@ CHECK-BE: movt    r9, :upper16:_bar       @ encoding: [0xf2,0b1100AAAA,0x09'A',A]
+@ CHECK-BE-NEXT:                            @   fixup A - offset: 0, value: _bar, kind: fixup_t2_movt_hi16
+
diff --git a/test/MC/ARM/thumb2be-movw-encoding.s b/test/MC/ARM/thumb2be-movw-encoding.s
new file mode 100644
index 0000000..3bff457
--- /dev/null
+++ b/test/MC/ARM/thumb2be-movw-encoding.s
@@ -0,0 +1,9 @@
+@ RUN: llvm-mc -triple=thumbv7-none-linux-gnueabi -show-encoding < %s | FileCheck %s --check-prefix=CHECK-LE
+@ RUN: llvm-mc -triple=thumbebv7-none-linux-gnueabi -show-encoding < %s | FileCheck %s --check-prefix=CHECK-BE
+
+movw r9, :lower16:(_bar)
+@ CHECK-LE: movw    r9, :lower16:_bar       @ encoding: [0x40'A',0xf2'A',0b0000AAAA,0x09]
+@ CHECK-LE-NEXT:                            @   fixup A - offset: 0, value: _bar, kind: fixup_t2_movw_lo16
+@ CHECK-BE: movw    r9, :lower16:_bar       @ encoding: [0xf2,0b0100AAAA,0x09'A',A]
+@ CHECK-BE-NEXT:                            @   fixup A - offset: 0, value: _bar, kind: fixup_t2_movw_lo16
+
diff --git a/test/MC/ARM/thumb_set.s b/test/MC/ARM/thumb_set.s
index d0bc985..d2a0dc0 100644
--- a/test/MC/ARM/thumb_set.s
+++ b/test/MC/ARM/thumb_set.s
@@ -1,6 +1,9 @@
 @ RUN: llvm-mc -triple armv7-eabi -filetype obj -o - %s | llvm-readobj -t \
 @ RUN:   | FileCheck %s
 
+@ RUN: llvm-mc -triple armv7-eabi -filetype asm -o - %s \
+@ RUN:   | FileCheck --check-prefix=ASM %s
+
 	.syntax unified
 
 	.arm
@@ -11,6 +14,11 @@ arm_func:
 
 	.thumb_set alias_arm_func, arm_func
 
+        alias_arm_func2 = alias_arm_func
+        alias_arm_func3 = alias_arm_func2
+
+@ ASM: .thumb_set alias_arm_func, arm_func
+
 	.thumb
 
 	.type thumb_func,%function
@@ -51,8 +59,6 @@ beta:
 
 	.thumb_set beta, alpha
 
-	.thumb_set alias_undefined, undefined
-
 @ CHECK: Symbol {
 @ CHECK:   Name: alias_arm_func
 @ CHECK:   Value: 0x1
@@ -60,6 +66,18 @@ beta:
 @ CHECK: }
 
 @ CHECK: Symbol {
+@ CHECK:   Name: alias_arm_func2
+@ CHECK:   Value: 0x1
+@ CHECK:   Type: Function
+@ CHECK: }
+
+@ CHECK: Symbol {
+@ CHECK:   Name: alias_arm_func3
+@ CHECK:   Value: 0x1
+@ CHECK:   Type: Function
+@ CHECK: }
+
+@ CHECK: Symbol {
 @ CHECK:   Name: alias_defined_data
 @ CHECK:   Value: 0x5
 @ CHECK:   Type: Function
@@ -89,6 +107,16 @@ beta:
 @ CHECK:   Type: Function
 @ CHECK: }
 
+@ CHECK:      Symbol {
+@ CHECK:        Name: badblood
+@ CHECK-NEXT:   Value: 0x0
+@ CHECK-NEXT:   Size: 0
+@ CHECK-NEXT:   Binding: Local
+@ CHECK-NEXT:   Type: Object
+@ CHECK-NEXT:   Other: 0
+@ CHECK-NEXT:   Section: .data
+@ CHECK-NEXT: }
+
 @ CHECK: Symbol {
 @ CHECK:   Name: bedazzle
 @ CHECK:   Value: 0x4
@@ -124,16 +152,3 @@ beta:
 @ CHECK:   Value: 0x5
 @ CHECK:   Type: Function
 @ CHECK: }
-
-@ CHECK: Symbol {
-@ CHECK:   Name: badblood
-@ CHECK:   Value: 0x0
-@ CHECK:   Type: Object
-@ CHECK: }
-
-@ CHECK: Symbol {
-@ CHECK:   Name: undefined
-@ CHECK:   Value: 0x0
-@ CHECK:   Type: None
-@ CHECK: }
-
diff --git a/test/MC/ARM/udf-arm-diagnostics.s b/test/MC/ARM/udf-arm-diagnostics.s
new file mode 100644
index 0000000..9ec9bf2
--- /dev/null
+++ b/test/MC/ARM/udf-arm-diagnostics.s
@@ -0,0 +1,19 @@
+@ RUN: not llvm-mc -triple arm-eabi %s 2>&1 | FileCheck %s
+
+	.syntax unified
+	.text
+	.arm
+
+undefined:
+	udfpl
+
+@ CHECK: error: instruction 'udf' is not predicable, but condition code specified
+@ CHECK: 	udfpl
+@ CHECK: 	^
+
+	udf #65536
+
+@ CHECK: error: invalid operand for instruction
+@ CHECK: 	udf #65536
+@ CHECK: 	    ^
+
diff --git a/test/MC/ARM/udf-arm.s b/test/MC/ARM/udf-arm.s
new file mode 100644
index 0000000..a9d19ca
--- /dev/null
+++ b/test/MC/ARM/udf-arm.s
@@ -0,0 +1,11 @@
+@ RUN: llvm-mc -triple arm-eabi -show-encoding %s | FileCheck %s
+
+	.syntax unified
+	.text
+	.arm
+
+undefined:
+	udf #0
+
+@ CHECK: udf	#0                      @ encoding: [0xf0,0x00,0xf0,0xe7]
+
diff --git a/test/MC/ARM/udf-thumb-2-diagnostics.s b/test/MC/ARM/udf-thumb-2-diagnostics.s
new file mode 100644
index 0000000..f837560
--- /dev/null
+++ b/test/MC/ARM/udf-thumb-2-diagnostics.s
@@ -0,0 +1,25 @@
+@ RUN: not llvm-mc -triple thumbv7-eabi -mattr +thumb2 %s 2>&1 | FileCheck %s
+
+	.syntax unified
+	.text
+	.thumb
+
+undefined:
+	udfpl
+
+@ CHECK: error: instruction 'udf' is not predicable, but condition code specified
+@ CHECK: 	udfpl
+@ CHECK: 	^
+
+	udf #256
+
+@ CHECK: error: instruction requires: arm-mode
+@ CHECK: 	udf #256
+@ CHECK: 	^
+
+	udf.w #65536
+
+@ CHECK: error: invalid operand for instruction
+@ CHECK: 	udf.w #65536
+@ CHECK: 	      ^
+
diff --git a/test/MC/ARM/udf-thumb-2.s b/test/MC/ARM/udf-thumb-2.s
new file mode 100644
index 0000000..beb6549
--- /dev/null
+++ b/test/MC/ARM/udf-thumb-2.s
@@ -0,0 +1,13 @@
+@ RUN: llvm-mc -triple thumbv7-eabi -mattr +thumb2 -show-encoding %s | FileCheck %s
+
+	.syntax unified
+	.text
+	.thumb
+
+undefined:
+	udf #0
+	udf.w #0
+
+@ CHECK: udf	#0                      @ encoding: [0x00,0xde]
+@ CHECK: udf.w	#0                      @ encoding: [0xf0,0xf7,0x00,0xa0]
+
diff --git a/test/MC/ARM/udf-thumb-diagnostics.s b/test/MC/ARM/udf-thumb-diagnostics.s
new file mode 100644
index 0000000..51388d0
--- /dev/null
+++ b/test/MC/ARM/udf-thumb-diagnostics.s
@@ -0,0 +1,19 @@
+@ RUN: not llvm-mc -triple thumbv6m-eabi %s 2>&1 | FileCheck %s
+
+	.syntax unified
+	.text
+	.thumb
+
+undefined:
+	udfpl
+
+@ CHECK: error: conditional execution not supported in Thumb1
+@ CHECK: 	udfpl
+@ CHECK: 	^
+
+	udf #256
+
+@ CHECK: error: instruction requires: arm-mode
+@ CHECK: 	udf #256
+@ CHECK: 	^
+
diff --git a/test/MC/ARM/udf-thumb.s b/test/MC/ARM/udf-thumb.s
new file mode 100644
index 0000000..10b3aff
--- /dev/null
+++ b/test/MC/ARM/udf-thumb.s
@@ -0,0 +1,11 @@
+@ RUN: llvm-mc -triple thumbv6m-eabi -show-encoding %s | FileCheck %s
+
+	.syntax unified
+	.text
+	.thumb
+
+undefined:
+	udf #0
+
+@ CHECK: udf	#0                      @ encoding: [0x00,0xde]
+
diff --git a/test/MC/ARM/vmov-vmvn-byte-replicate.s b/test/MC/ARM/vmov-vmvn-byte-replicate.s
new file mode 100644
index 0000000..5931160
--- /dev/null
+++ b/test/MC/ARM/vmov-vmvn-byte-replicate.s
@@ -0,0 +1,31 @@
+@ PR18921, "vmov" part.
+@ RUN: llvm-mc -triple=armv7-linux-gnueabi -show-encoding < %s | FileCheck %s
+.text
+
+@ CHECK: vmov.i8 d2, #0xff @ encoding: [0x1f,0x2e,0x87,0xf3]
+@ CHECK: vmov.i8 q2, #0xff @ encoding: [0x5f,0x4e,0x87,0xf3]
+@ CHECK: vmov.i8 d2, #0xab @ encoding: [0x1b,0x2e,0x82,0xf3]
+@ CHECK: vmov.i8 q2, #0xab @ encoding: [0x5b,0x4e,0x82,0xf3]
+@ CHECK: vmov.i8 q2, #0xab @ encoding: [0x5b,0x4e,0x82,0xf3]
+@ CHECK: vmov.i8 q2, #0xab @ encoding: [0x5b,0x4e,0x82,0xf3]
+
+@ CHECK: vmov.i8 d2, #0x0  @ encoding: [0x10,0x2e,0x80,0xf2]
+@ CHECK: vmov.i8 q2, #0x0  @ encoding: [0x50,0x4e,0x80,0xf2]
+@ CHECK: vmov.i8 d2, #0x54 @ encoding: [0x14,0x2e,0x85,0xf2]
+@ CHECK: vmov.i8 q2, #0x54 @ encoding: [0x54,0x4e,0x85,0xf2]
+@ CHECK: vmov.i8 d2, #0x54 @ encoding: [0x14,0x2e,0x85,0xf2]
+@ CHECK: vmov.i8 q2, #0x54 @ encoding: [0x54,0x4e,0x85,0xf2]
+
+        vmov.i32        d2, #0xffffffff
+        vmov.i32        q2, #0xffffffff
+        vmov.i32        d2, #0xabababab
+        vmov.i32        q2, #0xabababab
+        vmov.i16        q2, #0xabab
+        vmov.i16        q2, #0xabab
+
+        vmvn.i32        d2, #0xffffffff
+        vmvn.i32        q2, #0xffffffff
+        vmvn.i32        d2, #0xabababab
+        vmvn.i32        q2, #0xabababab
+        vmvn.i16        d2, #0xabab
+        vmvn.i16        q2, #0xabab
diff --git a/test/MC/ARM/vmov-vmvn-illegal-cases.s b/test/MC/ARM/vmov-vmvn-illegal-cases.s
new file mode 100644
index 0000000..4609b77
--- /dev/null
+++ b/test/MC/ARM/vmov-vmvn-illegal-cases.s
@@ -0,0 +1,30 @@
+@ RUN: not llvm-mc -triple=armv7-linux-gnueabi %s 2>&1 | FileCheck %s
+.text
+
+@ CHECK: error: invalid operand for instruction
+@ CHECK: vmov.i32        d2, #0xffffffab
+@ CHECK: error: invalid operand for instruction
+@ CHECK: vmov.i32        q2, #0xffffffab
+@ CHECK: error: invalid operand for instruction
+@ CHECK: vmov.i16        q2, #0xffab
+@ CHECK: error: invalid operand for instruction
+@ CHECK: vmov.i16        q2, #0xffab
+
+@ CHECK: error: invalid operand for instruction
+@ CHECK: vmvn.i32        d2, #0xffffffab
+@ CHECK: error: invalid operand for instruction
+@ CHECK: vmvn.i32        q2, #0xffffffab
+@ CHECK: error: invalid operand for instruction
+@ CHECK: vmvn.i16        q2, #0xffab
+@ CHECK: error: invalid operand for instruction
+@ CHECK: vmvn.i16        q2, #0xffab
+
+        vmov.i32        d2, #0xffffffab
+        vmov.i32        q2, #0xffffffab
+        vmov.i16        q2, #0xffab
+        vmov.i16        q2, #0xffab
+
+        vmvn.i32        d2, #0xffffffab
+        vmvn.i32        q2, #0xffffffab
+        vmvn.i16        q2, #0xffab
+        vmvn.i16        q2, #0xffab
diff --git a/test/MC/ARM/vorr-vbic-illegal-cases.s b/test/MC/ARM/vorr-vbic-illegal-cases.s
new file mode 100644
index 0000000..16ab6b5
--- /dev/null
+++ b/test/MC/ARM/vorr-vbic-illegal-cases.s
@@ -0,0 +1,42 @@
+@ RUN: not llvm-mc -triple=armv7-linux-gnueabi %s 2>&1 | FileCheck %s
+.text
+
+@ CHECK: error: invalid operand for instruction
+@ CHECK: vorr.i32        d2, #0xffffffff
+@ CHECK: error: invalid operand for instruction
+@ CHECK: vorr.i32        q2, #0xffffffff
+@ CHECK: error: invalid operand for instruction
+@ CHECK: vorr.i32        d2, #0xabababab
+@ CHECK: error: invalid operand for instruction
+@ CHECK: vorr.i32        q2, #0xabababab
+@ CHECK: error: invalid operand for instruction
+@ CHECK: vorr.i16        q2, #0xabab
+@ CHECK: error: invalid operand for instruction
+@ CHECK: vorr.i16        q2, #0xabab
+
+@ CHECK: error: invalid operand for instruction
+@ CHECK: vbic.i32        d2, #0xffffffff
+@ CHECK: error: invalid operand for instruction
+@ CHECK: vbic.i32        q2, #0xffffffff
+@ CHECK: error: invalid operand for instruction
+@ CHECK: vbic.i32        d2, #0xabababab
+@ CHECK: error: invalid operand for instruction
+@ CHECK: vbic.i32        q2, #0xabababab
+@ CHECK: error: invalid operand for instruction
+@ CHECK: vbic.i16        d2, #0xabab
+@ CHECK: error: invalid operand for instruction
+@ CHECK: vbic.i16        q2, #0xabab
+
+        vorr.i32        d2, #0xffffffff
+        vorr.i32        q2, #0xffffffff
+        vorr.i32        d2, #0xabababab
+        vorr.i32        q2, #0xabababab
+        vorr.i16        q2, #0xabab
+        vorr.i16        q2, #0xabab
+
+        vbic.i32        d2, #0xffffffff
+        vbic.i32        q2, #0xffffffff
+        vbic.i32        d2, #0xabababab
+        vbic.i32        q2, #0xabababab
+        vbic.i16        d2, #0xabab
+        vbic.i16        q2, #0xabab
diff --git a/test/MC/ARM64/advsimd.s b/test/MC/ARM64/advsimd.s
deleted file mode 100644
index fce0832..0000000
--- a/test/MC/ARM64/advsimd.s
+++ /dev/null
@@ -1,1997 +0,0 @@
-; RUN: llvm-mc -triple arm64-apple-darwin -output-asm-variant=1 -show-encoding < %s | FileCheck %s
-
-foo:
-
-  abs.8b  v0, v0
-  abs.16b v0, v0
-  abs.4h  v0, v0
-  abs.8h  v0, v0
-  abs.2s  v0, v0
-  abs.4s  v0, v0
-
-; CHECK: abs.8b  v0, v0              ; encoding: [0x00,0xb8,0x20,0x0e]
-; CHECK: abs.16b v0, v0              ; encoding: [0x00,0xb8,0x20,0x4e]
-; CHECK: abs.4h  v0, v0              ; encoding: [0x00,0xb8,0x60,0x0e]
-; CHECK: abs.8h  v0, v0              ; encoding: [0x00,0xb8,0x60,0x4e]
-; CHECK: abs.2s  v0, v0              ; encoding: [0x00,0xb8,0xa0,0x0e]
-; CHECK: abs.4s  v0, v0              ; encoding: [0x00,0xb8,0xa0,0x4e]
-
-  add.8b  v0, v0, v0
-  add.16b v0, v0, v0
-  add.4h  v0, v0, v0
-  add.8h  v0, v0, v0
-  add.2s  v0, v0, v0
-  add.4s  v0, v0, v0
-  add.2d  v0, v0, v0
-
-; CHECK: add.8b  v0, v0, v0          ; encoding: [0x00,0x84,0x20,0x0e]
-; CHECK: add.16b v0, v0, v0          ; encoding: [0x00,0x84,0x20,0x4e]
-; CHECK: add.4h  v0, v0, v0          ; encoding: [0x00,0x84,0x60,0x0e]
-; CHECK: add.8h  v0, v0, v0          ; encoding: [0x00,0x84,0x60,0x4e]
-; CHECK: add.2s  v0, v0, v0          ; encoding: [0x00,0x84,0xa0,0x0e]
-; CHECK: add.4s  v0, v0, v0          ; encoding: [0x00,0x84,0xa0,0x4e]
-; CHECK: add.2d  v0, v0, v0          ; encoding: [0x00,0x84,0xe0,0x4e]
-
-  add d1, d2, d3
-
-; CHECK: add d1, d2, d3              ; encoding: [0x41,0x84,0xe3,0x5e]
-
-  addhn.8b   v0, v0, v0
-  addhn2.16b v0, v0, v0
-  addhn.4h   v0, v0, v0
-  addhn2.8h  v0, v0, v0
-  addhn.2s   v0, v0, v0
-  addhn2.4s  v0, v0, v0
-
-; CHECK: addhn.8b   v0, v0, v0       ; encoding: [0x00,0x40,0x20,0x0e]
-; CHECK: addhn2.16b v0, v0, v0       ; encoding: [0x00,0x40,0x20,0x4e]
-; CHECK: addhn.4h   v0, v0, v0       ; encoding: [0x00,0x40,0x60,0x0e]
-; CHECK: addhn2.8h  v0, v0, v0       ; encoding: [0x00,0x40,0x60,0x4e]
-; CHECK: addhn.2s   v0, v0, v0       ; encoding: [0x00,0x40,0xa0,0x0e]
-; CHECK: addhn2.4s  v0, v0, v0       ; encoding: [0x00,0x40,0xa0,0x4e]
-
-  addp.8b  v0, v0, v0
-  addp.16b v0, v0, v0
-  addp.4h  v0, v0, v0
-  addp.8h  v0, v0, v0
-  addp.2s  v0, v0, v0
-  addp.4s  v0, v0, v0
-  addp.2d  v0, v0, v0
-
-; CHECK: addp.8b   v0, v0, v0        ; encoding: [0x00,0xbc,0x20,0x0e]
-; CHECK: addp.16b  v0, v0, v0        ; encoding: [0x00,0xbc,0x20,0x4e]
-; CHECK: addp.4h   v0, v0, v0        ; encoding: [0x00,0xbc,0x60,0x0e]
-; CHECK: addp.8h   v0, v0, v0        ; encoding: [0x00,0xbc,0x60,0x4e]
-; CHECK: addp.2s   v0, v0, v0        ; encoding: [0x00,0xbc,0xa0,0x0e]
-; CHECK: addp.4s   v0, v0, v0        ; encoding: [0x00,0xbc,0xa0,0x4e]
-; CHECK: addp.2d   v0, v0, v0        ; encoding: [0x00,0xbc,0xe0,0x4e]
-
-  addp.2d  d0, v0
-
-; CHECK: addp.2d d0, v0              ; encoding: [0x00,0xb8,0xf1,0x5e]
-
-  addv.8b  b0, v0
-  addv.16b b0, v0
-  addv.4h  h0, v0
-  addv.8h  h0, v0
-  addv.4s  s0, v0
-
-; CHECK: addv.8b  b0, v0             ; encoding: [0x00,0xb8,0x31,0x0e]
-; CHECK: addv.16b b0, v0             ; encoding: [0x00,0xb8,0x31,0x4e]
-; CHECK: addv.4h  h0, v0             ; encoding: [0x00,0xb8,0x71,0x0e]
-; CHECK: addv.8h  h0, v0             ; encoding: [0x00,0xb8,0x71,0x4e]
-; CHECK: addv.4s  s0, v0             ; encoding: [0x00,0xb8,0xb1,0x4e]
-
-
-; INS/DUP
-  dup.2d  v0, x3
-  dup.4s  v0, w3
-  dup.2s  v0, w3
-  dup.8h  v0, w3
-  dup.4h  v0, w3
-  dup.16b v0, w3
-  dup.8b  v0, w3
-
-  dup v1.2d, x3
-  dup v2.4s, w4
-  dup v3.2s, w5
-  dup v4.8h, w6
-  dup v5.4h, w7
-  dup v6.16b, w8
-  dup v7.8b, w9
-
-; CHECK: dup.2d  v0, x3              ; encoding: [0x60,0x0c,0x08,0x4e]
-; CHECK: dup.4s  v0, w3              ; encoding: [0x60,0x0c,0x04,0x4e]
-; CHECK: dup.2s  v0, w3              ; encoding: [0x60,0x0c,0x04,0x0e]
-; CHECK: dup.8h  v0, w3              ; encoding: [0x60,0x0c,0x02,0x4e]
-; CHECK: dup.4h  v0, w3              ; encoding: [0x60,0x0c,0x02,0x0e]
-; CHECK: dup.16b v0, w3              ; encoding: [0x60,0x0c,0x01,0x4e]
-; CHECK: dup.8b  v0, w3              ; encoding: [0x60,0x0c,0x01,0x0e]
-
-; CHECK: dup.2d	v1, x3               ; encoding: [0x61,0x0c,0x08,0x4e]
-; CHECK: dup.4s	v2, w4               ; encoding: [0x82,0x0c,0x04,0x4e]
-; CHECK: dup.2s	v3, w5               ; encoding: [0xa3,0x0c,0x04,0x0e]
-; CHECK: dup.8h	v4, w6               ; encoding: [0xc4,0x0c,0x02,0x4e]
-; CHECK: dup.4h	v5, w7               ; encoding: [0xe5,0x0c,0x02,0x0e]
-; CHECK: dup.16b v6, w8              ; encoding: [0x06,0x0d,0x01,0x4e]
-; CHECK: dup.8b	v7, w9               ; encoding: [0x27,0x0d,0x01,0x0e]
-
-  dup.2d  v0, v3[1]
-  dup.2s  v0, v3[1]
-  dup.4s  v0, v3[1]
-  dup.4h  v0, v3[1]
-  dup.8h  v0, v3[1]
-  dup.8b  v0, v3[1]
-  dup.16b v0, v3[1]
-
-  dup v7.2d, v9.d[1]
-  dup v6.2s, v8.s[1]
-  dup v5.4s, v7.s[2]
-  dup v4.4h, v6.h[3]
-  dup v3.8h, v5.h[4]
-  dup v2.8b, v4.b[5]
-  dup v1.16b, v3.b[6]
-
-; CHECK: dup.2d  v0, v3[1]           ; encoding: [0x60,0x04,0x18,0x4e]
-; CHECK: dup.2s  v0, v3[1]           ; encoding: [0x60,0x04,0x0c,0x0e]
-; CHECK: dup.4s  v0, v3[1]           ; encoding: [0x60,0x04,0x0c,0x4e]
-; CHECK: dup.4h  v0, v3[1]           ; encoding: [0x60,0x04,0x06,0x0e]
-; CHECK: dup.8h  v0, v3[1]           ; encoding: [0x60,0x04,0x06,0x4e]
-; CHECK: dup.8b  v0, v3[1]           ; encoding: [0x60,0x04,0x03,0x0e]
-; CHECK: dup.16b v0, v3[1]           ; encoding: [0x60,0x04,0x03,0x4e]
-
-; CHECK: dup.2d  v7, v9[1]            ; encoding: [0x27,0x05,0x18,0x4e]
-; CHECK: dup.2s  v6, v8[1]            ; encoding: [0x06,0x05,0x0c,0x0e]
-; CHECK: dup.4s  v5, v7[2]            ; encoding: [0xe5,0x04,0x14,0x4e]
-; CHECK: dup.4h  v4, v6[3]            ; encoding: [0xc4,0x04,0x0e,0x0e]
-; CHECK: dup.8h  v3, v5[4]            ; encoding: [0xa3,0x04,0x12,0x4e]
-; CHECK: dup.8b  v2, v4[5]            ; encoding: [0x82,0x04,0x0b,0x0e]
-; CHECK: dup.16b v1, v3[6]            ; encoding: [0x61,0x04,0x0d,0x4e]
-
-  dup b3, v4[1]
-  dup h3, v4[1]
-  dup s3, v4[1]
-  dup d3, v4[1]
-  dup b3, v4.b[1]
-  dup h3, v4.h[1]
-  dup s3, v4.s[1]
-  dup d3, v4.d[1]
-
-  mov b3, v4[1]
-  mov h3, v4[1]
-  mov s3, v4[1]
-  mov d3, v4[1]
-  mov b3, v4.b[1]
-  mov h3, v4.h[1]
-  mov s3, v4.s[1]
-  mov d3, v4.d[1]
-
-; CHECK: mov b3, v4[1]               ; encoding: [0x83,0x04,0x03,0x5e]
-; CHECK: mov h3, v4[1]               ; encoding: [0x83,0x04,0x06,0x5e]
-; CHECK: mov s3, v4[1]               ; encoding: [0x83,0x04,0x0c,0x5e]
-; CHECK: mov d3, v4[1]               ; encoding: [0x83,0x04,0x18,0x5e]
-; CHECK: mov b3, v4[1]               ; encoding: [0x83,0x04,0x03,0x5e]
-; CHECK: mov h3, v4[1]               ; encoding: [0x83,0x04,0x06,0x5e]
-; CHECK: mov s3, v4[1]               ; encoding: [0x83,0x04,0x0c,0x5e]
-; CHECK: mov d3, v4[1]               ; encoding: [0x83,0x04,0x18,0x5e]
-
-; CHECK: mov b3, v4[1]               ; encoding: [0x83,0x04,0x03,0x5e]
-; CHECK: mov h3, v4[1]               ; encoding: [0x83,0x04,0x06,0x5e]
-; CHECK: mov s3, v4[1]               ; encoding: [0x83,0x04,0x0c,0x5e]
-; CHECK: mov d3, v4[1]               ; encoding: [0x83,0x04,0x18,0x5e]
-; CHECK: mov b3, v4[1]               ; encoding: [0x83,0x04,0x03,0x5e]
-; CHECK: mov h3, v4[1]               ; encoding: [0x83,0x04,0x06,0x5e]
-; CHECK: mov s3, v4[1]               ; encoding: [0x83,0x04,0x0c,0x5e]
-; CHECK: mov d3, v4[1]               ; encoding: [0x83,0x04,0x18,0x5e]
-
-  smov.s x3, v2[2]
-  smov   x3, v2.s[2]
-  umov.s w3, v2[2]
-  umov   w3, v2.s[2]
-  umov.d x3, v2[1]
-  umov   x3, v2.d[1]
-
-; CHECK: smov.s  x3, v2[2]           ; encoding: [0x43,0x2c,0x14,0x4e]
-; CHECK: smov.s  x3, v2[2]           ; encoding: [0x43,0x2c,0x14,0x4e]
-; CHECK: umov.s  w3, v2[2]           ; encoding: [0x43,0x3c,0x14,0x0e]
-; CHECK: umov.s  w3, v2[2]           ; encoding: [0x43,0x3c,0x14,0x0e]
-; CHECK: umov.d  x3, v2[1]           ; encoding: [0x43,0x3c,0x18,0x4e]
-; CHECK: umov.d  x3, v2[1]           ; encoding: [0x43,0x3c,0x18,0x4e]
-
-  ; MOV aliases for UMOV instructions above
-
-  mov.s w2, v3[3]
-  mov   w5, v7.s[2]
-  mov.d x11, v13[1]
-  mov   x17, v19.d[0]
-
-; CHECK: umov.s  w2, v3[3]               ; encoding: [0x62,0x3c,0x1c,0x0e]
-; CHECK: umov.s  w5, v7[2]               ; encoding: [0xe5,0x3c,0x14,0x0e]
-; CHECK: umov.d  x11, v13[1]             ; encoding: [0xab,0x3d,0x18,0x4e]
-; CHECK: umov.d  x17, v19[0]             ; encoding: [0x71,0x3e,0x08,0x4e]
-
-  ins.d v2[1], x5
-  ins.s v2[1], w5
-  ins.h v2[1], w5
-  ins.b v2[1], w5
-
-  ins   v2.d[1], x5
-  ins   v2.s[1], w5
-  ins   v2.h[1], w5
-  ins   v2.b[1], w5
-
-; CHECK: ins.d v2[1], x5             ; encoding: [0xa2,0x1c,0x18,0x4e]
-; CHECK: ins.s v2[1], w5             ; encoding: [0xa2,0x1c,0x0c,0x4e]
-; CHECK: ins.h v2[1], w5             ; encoding: [0xa2,0x1c,0x06,0x4e]
-; CHECK: ins.b v2[1], w5             ; encoding: [0xa2,0x1c,0x03,0x4e]
-
-; CHECK: ins.d v2[1], x5             ; encoding: [0xa2,0x1c,0x18,0x4e]
-; CHECK: ins.s v2[1], w5             ; encoding: [0xa2,0x1c,0x0c,0x4e]
-; CHECK: ins.h v2[1], w5             ; encoding: [0xa2,0x1c,0x06,0x4e]
-; CHECK: ins.b v2[1], w5             ; encoding: [0xa2,0x1c,0x03,0x4e]
-
-  ins.d v2[1], v15[1]
-  ins.s v2[1], v15[1]
-  ins.h v2[1], v15[1]
-  ins.b v2[1], v15[1]
-
-  ins   v2.d[1], v15.d[0]
-  ins   v2.s[3], v15.s[2]
-  ins   v2.h[7], v15.h[3]
-  ins   v2.b[10], v15.b[5]
-
-; CHECK: ins.d v2[1], v15[1]         ; encoding: [0xe2,0x45,0x18,0x6e]
-; CHECK: ins.s v2[1], v15[1]         ; encoding: [0xe2,0x25,0x0c,0x6e]
-; CHECK: ins.h v2[1], v15[1]         ; encoding: [0xe2,0x15,0x06,0x6e]
-; CHECK: ins.b v2[1], v15[1]         ; encoding: [0xe2,0x0d,0x03,0x6e]
-
-; CHECK: ins.d v2[1], v15[0]         ; encoding: [0xe2,0x05,0x18,0x6e]
-; CHECK: ins.s v2[3], v15[2]         ; encoding: [0xe2,0x45,0x1c,0x6e]
-; CHECK: ins.h v2[7], v15[3]         ; encoding: [0xe2,0x35,0x1e,0x6e]
-; CHECK: ins.b v2[10], v15[5]        ; encoding: [0xe2,0x2d,0x15,0x6e]
-
-; MOV aliases for the above INS instructions.
-  mov.d v2[1], x5
-  mov.s v3[1], w6
-  mov.h v4[1], w7
-  mov.b v5[1], w8
-
-  mov   v9.d[1], x2
-  mov   v8.s[1], w3
-  mov   v7.h[1], w4
-  mov   v6.b[1], w5
-
-  mov.d v1[1], v10[1]
-  mov.s v2[1], v11[1]
-  mov.h v7[1], v12[1]
-  mov.b v8[1], v15[1]
-
-  mov   v2.d[1], v15.d[0]
-  mov   v7.s[3], v16.s[2]
-  mov   v8.h[7], v17.h[3]
-  mov   v9.b[10], v18.b[5]
-
-; CHECK: ins.d	v2[1], x5               ; encoding: [0xa2,0x1c,0x18,0x4e]
-; CHECK: ins.s	v3[1], w6               ; encoding: [0xc3,0x1c,0x0c,0x4e]
-; CHECK: ins.h	v4[1], w7               ; encoding: [0xe4,0x1c,0x06,0x4e]
-; CHECK: ins.b	v5[1], w8               ; encoding: [0x05,0x1d,0x03,0x4e]
-; CHECK: ins.d	v9[1], x2               ; encoding: [0x49,0x1c,0x18,0x4e]
-; CHECK: ins.s	v8[1], w3               ; encoding: [0x68,0x1c,0x0c,0x4e]
-; CHECK: ins.h	v7[1], w4               ; encoding: [0x87,0x1c,0x06,0x4e]
-; CHECK: ins.b	v6[1], w5               ; encoding: [0xa6,0x1c,0x03,0x4e]
-; CHECK: ins.d	v1[1], v10[1]           ; encoding: [0x41,0x45,0x18,0x6e]
-; CHECK: ins.s	v2[1], v11[1]           ; encoding: [0x62,0x25,0x0c,0x6e]
-; CHECK: ins.h	v7[1], v12[1]           ; encoding: [0x87,0x15,0x06,0x6e]
-; CHECK: ins.b	v8[1], v15[1]           ; encoding: [0xe8,0x0d,0x03,0x6e]
-; CHECK: ins.d	v2[1], v15[0]           ; encoding: [0xe2,0x05,0x18,0x6e]
-; CHECK: ins.s	v7[3], v16[2]           ; encoding: [0x07,0x46,0x1c,0x6e]
-; CHECK: ins.h	v8[7], v17[3]           ; encoding: [0x28,0x36,0x1e,0x6e]
-; CHECK: ins.b	v9[10], v18[5]          ; encoding: [0x49,0x2e,0x15,0x6e]
-
-
-  and.8b  v0, v0, v0
-  and.16b v0, v0, v0
-
-; CHECK: and.8b  v0, v0, v0          ; encoding: [0x00,0x1c,0x20,0x0e]
-; CHECK: and.16b v0, v0, v0          ; encoding: [0x00,0x1c,0x20,0x4e]
-
-  bic.8b  v0, v0, v0
-
-; CHECK: bic.8b  v0, v0, v0          ; encoding: [0x00,0x1c,0x60,0x0e]
-
-  cmeq.8b v0, v0, v0
-  cmge.8b v0, v0, v0
-  cmgt.8b v0, v0, v0
-  cmhi.8b v0, v0, v0
-  cmhs.8b v0, v0, v0
-  cmtst.8b v0, v0, v0
-  fabd.2s v0, v0, v0
-  facge.2s  v0, v0, v0
-  facgt.2s  v0, v0, v0
-  faddp.2s v0, v0, v0
-  fadd.2s v0, v0, v0
-  fcmeq.2s  v0, v0, v0
-  fcmge.2s  v0, v0, v0
-  fcmgt.2s  v0, v0, v0
-  fdiv.2s v0, v0, v0
-  fmaxnmp.2s v0, v0, v0
-  fmaxnm.2s v0, v0, v0
-  fmaxp.2s v0, v0, v0
-  fmax.2s v0, v0, v0
-  fminnmp.2s v0, v0, v0
-  fminnm.2s v0, v0, v0
-  fminp.2s v0, v0, v0
-  fmin.2s v0, v0, v0
-  fmla.2s v0, v0, v0
-  fmls.2s v0, v0, v0
-  fmulx.2s v0, v0, v0
-  fmul.2s v0, v0, v0
-  fmulx	d2, d3, d1
-  fmulx	s2, s3, s1
-  frecps.2s v0, v0, v0
-  frsqrts.2s v0, v0, v0
-  fsub.2s v0, v0, v0
-  mla.8b v0, v0, v0
-  mls.8b v0, v0, v0
-  mul.8b v0, v0, v0
-  pmul.8b v0, v0, v0
-  saba.8b v0, v0, v0
-  sabd.8b v0, v0, v0
-  shadd.8b v0, v0, v0
-  shsub.8b v0, v0, v0
-  smaxp.8b v0, v0, v0
-  smax.8b v0, v0, v0
-  sminp.8b v0, v0, v0
-  smin.8b v0, v0, v0
-  sqadd.8b v0, v0, v0
-  sqdmulh.4h v0, v0, v0
-  sqrdmulh.4h v0, v0, v0
-  sqrshl.8b v0, v0, v0
-  sqshl.8b v0, v0, v0
-  sqsub.8b v0, v0, v0
-  srhadd.8b v0, v0, v0
-  srshl.8b v0, v0, v0
-  sshl.8b v0, v0, v0
-  sub.8b v0, v0, v0
-  uaba.8b v0, v0, v0
-  uabd.8b v0, v0, v0
-  uhadd.8b v0, v0, v0
-  uhsub.8b v0, v0, v0
-  umaxp.8b v0, v0, v0
-  umax.8b v0, v0, v0
-  uminp.8b v0, v0, v0
-  umin.8b v0, v0, v0
-  uqadd.8b v0, v0, v0
-  uqrshl.8b v0, v0, v0
-  uqshl.8b v0, v0, v0
-  uqsub.8b v0, v0, v0
-  urhadd.8b v0, v0, v0
-  urshl.8b v0, v0, v0
-  ushl.8b v0, v0, v0
-
-; CHECK: cmeq.8b	v0, v0, v0              ; encoding: [0x00,0x8c,0x20,0x2e]
-; CHECK: cmge.8b	v0, v0, v0              ; encoding: [0x00,0x3c,0x20,0x0e]
-; CHECK: cmgt.8b	v0, v0, v0              ; encoding: [0x00,0x34,0x20,0x0e]
-; CHECK: cmhi.8b	v0, v0, v0              ; encoding: [0x00,0x34,0x20,0x2e]
-; CHECK: cmhs.8b	v0, v0, v0              ; encoding: [0x00,0x3c,0x20,0x2e]
-; CHECK: cmtst.8b	v0, v0, v0      ; encoding: [0x00,0x8c,0x20,0x0e]
-; CHECK: fabd.2s	v0, v0, v0              ; encoding: [0x00,0xd4,0xa0,0x2e]
-; CHECK: facge.2s	v0, v0, v0      ; encoding: [0x00,0xec,0x20,0x2e]
-; CHECK: facgt.2s	v0, v0, v0      ; encoding: [0x00,0xec,0xa0,0x2e]
-; CHECK: faddp.2s	v0, v0, v0      ; encoding: [0x00,0xd4,0x20,0x2e]
-; CHECK: fadd.2s	v0, v0, v0              ; encoding: [0x00,0xd4,0x20,0x0e]
-; CHECK: fcmeq.2s	v0, v0, v0      ; encoding: [0x00,0xe4,0x20,0x0e]
-; CHECK: fcmge.2s	v0, v0, v0      ; encoding: [0x00,0xe4,0x20,0x2e]
-; CHECK: fcmgt.2s	v0, v0, v0      ; encoding: [0x00,0xe4,0xa0,0x2e]
-; CHECK: fdiv.2s	v0, v0, v0              ; encoding: [0x00,0xfc,0x20,0x2e]
-; CHECK: fmaxnmp.2s	v0, v0, v0      ; encoding: [0x00,0xc4,0x20,0x2e]
-; CHECK: fmaxnm.2s	v0, v0, v0      ; encoding: [0x00,0xc4,0x20,0x0e]
-; CHECK: fmaxp.2s	v0, v0, v0      ; encoding: [0x00,0xf4,0x20,0x2e]
-; CHECK: fmax.2s	v0, v0, v0              ; encoding: [0x00,0xf4,0x20,0x0e]
-; CHECK: fminnmp.2s	v0, v0, v0      ; encoding: [0x00,0xc4,0xa0,0x2e]
-; CHECK: fminnm.2s	v0, v0, v0      ; encoding: [0x00,0xc4,0xa0,0x0e]
-; CHECK: fminp.2s	v0, v0, v0      ; encoding: [0x00,0xf4,0xa0,0x2e]
-; CHECK: fmin.2s	v0, v0, v0              ; encoding: [0x00,0xf4,0xa0,0x0e]
-; CHECK: fmla.2s	v0, v0, v0              ; encoding: [0x00,0xcc,0x20,0x0e]
-; CHECK: fmls.2s	v0, v0, v0              ; encoding: [0x00,0xcc,0xa0,0x0e]
-; CHECK: fmulx.2s	v0, v0, v0      ; encoding: [0x00,0xdc,0x20,0x0e]
-
-; CHECK: fmul.2s	v0, v0, v0              ; encoding: [0x00,0xdc,0x20,0x2e]
-; CHECK: fmulx	d2, d3, d1              ; encoding: [0x62,0xdc,0x61,0x5e]
-; CHECK: fmulx	s2, s3, s1              ; encoding: [0x62,0xdc,0x21,0x5e]
-; CHECK: frecps.2s	v0, v0, v0      ; encoding: [0x00,0xfc,0x20,0x0e]
-; CHECK: frsqrts.2s	v0, v0, v0      ; encoding: [0x00,0xfc,0xa0,0x0e]
-; CHECK: fsub.2s	v0, v0, v0              ; encoding: [0x00,0xd4,0xa0,0x0e]
-; CHECK: mla.8b	v0, v0, v0              ; encoding: [0x00,0x94,0x20,0x0e]
-; CHECK: mls.8b	v0, v0, v0              ; encoding: [0x00,0x94,0x20,0x2e]
-; CHECK: mul.8b	v0, v0, v0              ; encoding: [0x00,0x9c,0x20,0x0e]
-; CHECK: pmul.8b	v0, v0, v0              ; encoding: [0x00,0x9c,0x20,0x2e]
-; CHECK: saba.8b	v0, v0, v0              ; encoding: [0x00,0x7c,0x20,0x0e]
-; CHECK: sabd.8b	v0, v0, v0              ; encoding: [0x00,0x74,0x20,0x0e]
-; CHECK: shadd.8b	v0, v0, v0      ; encoding: [0x00,0x04,0x20,0x0e]
-; CHECK: shsub.8b	v0, v0, v0      ; encoding: [0x00,0x24,0x20,0x0e]
-; CHECK: smaxp.8b	v0, v0, v0      ; encoding: [0x00,0xa4,0x20,0x0e]
-; CHECK: smax.8b	v0, v0, v0              ; encoding: [0x00,0x64,0x20,0x0e]
-; CHECK: sminp.8b	v0, v0, v0      ; encoding: [0x00,0xac,0x20,0x0e]
-; CHECK: smin.8b	v0, v0, v0              ; encoding: [0x00,0x6c,0x20,0x0e]
-; CHECK: sqadd.8b	v0, v0, v0      ; encoding: [0x00,0x0c,0x20,0x0e]
-; CHECK: sqdmulh.4h v0, v0, v0 ; encoding: [0x00,0xb4,0x60,0x0e]
-; CHECK: sqrdmulh.4h v0, v0, v0 ; encoding: [0x00,0xb4,0x60,0x2e]
-; CHECK: sqrshl.8b	v0, v0, v0      ; encoding: [0x00,0x5c,0x20,0x0e]
-; CHECK: sqshl.8b	v0, v0, v0      ; encoding: [0x00,0x4c,0x20,0x0e]
-; CHECK: sqsub.8b	v0, v0, v0      ; encoding: [0x00,0x2c,0x20,0x0e]
-; CHECK: srhadd.8b	v0, v0, v0      ; encoding: [0x00,0x14,0x20,0x0e]
-; CHECK: srshl.8b	v0, v0, v0      ; encoding: [0x00,0x54,0x20,0x0e]
-; CHECK: sshl.8b	v0, v0, v0              ; encoding: [0x00,0x44,0x20,0x0e]
-; CHECK: sub.8b	v0, v0, v0              ; encoding: [0x00,0x84,0x20,0x2e]
-; CHECK: uaba.8b	v0, v0, v0              ; encoding: [0x00,0x7c,0x20,0x2e]
-; CHECK: uabd.8b	v0, v0, v0              ; encoding: [0x00,0x74,0x20,0x2e]
-; CHECK: uhadd.8b	v0, v0, v0      ; encoding: [0x00,0x04,0x20,0x2e]
-; CHECK: uhsub.8b	v0, v0, v0      ; encoding: [0x00,0x24,0x20,0x2e]
-; CHECK: umaxp.8b	v0, v0, v0      ; encoding: [0x00,0xa4,0x20,0x2e]
-; CHECK: umax.8b	v0, v0, v0              ; encoding: [0x00,0x64,0x20,0x2e]
-; CHECK: uminp.8b	v0, v0, v0      ; encoding: [0x00,0xac,0x20,0x2e]
-; CHECK: umin.8b	v0, v0, v0              ; encoding: [0x00,0x6c,0x20,0x2e]
-; CHECK: uqadd.8b	v0, v0, v0      ; encoding: [0x00,0x0c,0x20,0x2e]
-; CHECK: uqrshl.8b	v0, v0, v0      ; encoding: [0x00,0x5c,0x20,0x2e]
-; CHECK: uqshl.8b	v0, v0, v0      ; encoding: [0x00,0x4c,0x20,0x2e]
-; CHECK: uqsub.8b	v0, v0, v0      ; encoding: [0x00,0x2c,0x20,0x2e]
-; CHECK: urhadd.8b	v0, v0, v0      ; encoding: [0x00,0x14,0x20,0x2e]
-; CHECK: urshl.8b	v0, v0, v0      ; encoding: [0x00,0x54,0x20,0x2e]
-; CHECK: ushl.8b	v0, v0, v0              ; encoding: [0x00,0x44,0x20,0x2e]
-
-  bif.8b v0, v0, v0
-  bit.8b v0, v0, v0
-  bsl.8b v0, v0, v0
-  eor.8b v0, v0, v0
-  orn.8b v0, v0, v0
-  orr.8b v0, v0, v0
-
-; CHECK: bif.8b	v0, v0, v0              ; encoding: [0x00,0x1c,0xe0,0x2e]
-; CHECK: bit.8b	v0, v0, v0              ; encoding: [0x00,0x1c,0xa0,0x2e]
-; CHECK: bsl.8b	v0, v0, v0              ; encoding: [0x00,0x1c,0x60,0x2e]
-; CHECK: eor.8b	v0, v0, v0              ; encoding: [0x00,0x1c,0x20,0x2e]
-; CHECK: orn.8b	v0, v0, v0              ; encoding: [0x00,0x1c,0xe0,0x0e]
-; CHECK: orr.8b	v0, v0, v0              ; encoding: [0x00,0x1c,0xa0,0x0e]
-
-  sadalp.4h   v0, v0
-  sadalp.8h  v0, v0
-  sadalp.2s   v0, v0
-  sadalp.4s   v0, v0
-  sadalp.1d   v0, v0
-  sadalp.2d   v0, v0
-
-; CHECK: sadalp.4h	v0, v0          ; encoding: [0x00,0x68,0x20,0x0e]
-; CHECK: sadalp.8h	v0, v0          ; encoding: [0x00,0x68,0x20,0x4e]
-; CHECK: sadalp.2s	v0, v0          ; encoding: [0x00,0x68,0x60,0x0e]
-; CHECK: sadalp.4s	v0, v0          ; encoding: [0x00,0x68,0x60,0x4e]
-; CHECK: sadalp.1d	v0, v0          ; encoding: [0x00,0x68,0xa0,0x0e]
-; CHECK: sadalp.2d	v0, v0          ; encoding: [0x00,0x68,0xa0,0x4e]
-
-  cls.8b      v0, v0
-  clz.8b      v0, v0
-  cnt.8b      v0, v0
-  fabs.2s     v0, v0
-  fneg.2s     v0, v0
-  frecpe.2s   v0, v0
-  frinta.2s   v0, v0
-  frintx.2s   v0, v0
-  frinti.2s   v0, v0
-  frintm.2s   v0, v0
-  frintn.2s   v0, v0
-  frintp.2s   v0, v0
-  frintz.2s   v0, v0
-  frsqrte.2s  v0, v0
-  fsqrt.2s    v0, v0
-  neg.8b      v0, v0
-  not.8b      v0, v0
-  rbit.8b     v0, v0
-  rev16.8b    v0, v0
-  rev32.8b    v0, v0
-  rev64.8b    v0, v0
-  sadalp.4h   v0, v0
-  saddlp.4h	  v0, v0
-  scvtf.2s    v0, v0
-  sqabs.8b    v0, v0
-  sqneg.8b    v0, v0
-  sqxtn.8b    v0, v0
-  sqxtun.8b   v0, v0
-  suqadd.8b   v0, v0
-  uadalp.4h   v0, v0
-  uaddlp.4h   v0, v0
-  ucvtf.2s    v0, v0
-  uqxtn.8b    v0, v0
-  urecpe.2s   v0, v0
-  ursqrte.2s  v0, v0
-  usqadd.8b   v0, v0
-  xtn.8b      v0, v0
-  shll.8h v1, v2, #8
-  shll.4s v3, v4, #16
-  shll.2d v5, v6, #32
-  shll2.8h v7, v8, #8
-  shll2.4s v9, v10, #16
-  shll2.2d v11, v12, #32
-  shll v1.8h, v2.8b, #8
-  shll v1.4s, v2.4h, #16
-  shll v1.2d, v2.2s, #32
-  shll2 v1.8h, v2.16b, #8
-  shll2 v1.4s, v2.8h, #16
-  shll2 v1.2d, v2.4s, #32
-
-; CHECK: cls.8b	v0, v0                  ; encoding: [0x00,0x48,0x20,0x0e]
-; CHECK: clz.8b	v0, v0                  ; encoding: [0x00,0x48,0x20,0x2e]
-; CHECK: cnt.8b	v0, v0                  ; encoding: [0x00,0x58,0x20,0x0e]
-; CHECK: fabs.2s	v0, v0                  ; encoding: [0x00,0xf8,0xa0,0x0e]
-; CHECK: fneg.2s	v0, v0                  ; encoding: [0x00,0xf8,0xa0,0x2e]
-; CHECK: frecpe.2s	v0, v0          ; encoding: [0x00,0xd8,0xa1,0x0e]
-; CHECK: frinta.2s	v0, v0          ; encoding: [0x00,0x88,0x21,0x2e]
-; CHECK: frintx.2s	v0, v0          ; encoding: [0x00,0x98,0x21,0x2e]
-; CHECK: frinti.2s	v0, v0          ; encoding: [0x00,0x98,0xa1,0x2e]
-; CHECK: frintm.2s	v0, v0          ; encoding: [0x00,0x98,0x21,0x0e]
-; CHECK: frintn.2s	v0, v0          ; encoding: [0x00,0x88,0x21,0x0e]
-; CHECK: frintp.2s	v0, v0          ; encoding: [0x00,0x88,0xa1,0x0e]
-; CHECK: frintz.2s	v0, v0          ; encoding: [0x00,0x98,0xa1,0x0e]
-; CHECK: frsqrte.2s	v0, v0          ; encoding: [0x00,0xd8,0xa1,0x2e]
-; CHECK: fsqrt.2s	v0, v0          ; encoding: [0x00,0xf8,0xa1,0x2e]
-; CHECK: neg.8b	v0, v0                  ; encoding: [0x00,0xb8,0x20,0x2e]
-; CHECK: not.8b	v0, v0                  ; encoding: [0x00,0x58,0x20,0x2e]
-; CHECK: rbit.8b	v0, v0                  ; encoding: [0x00,0x58,0x60,0x2e]
-; CHECK: rev16.8b	v0, v0          ; encoding: [0x00,0x18,0x20,0x0e]
-; CHECK: rev32.8b	v0, v0          ; encoding: [0x00,0x08,0x20,0x2e]
-; CHECK: rev64.8b	v0, v0          ; encoding: [0x00,0x08,0x20,0x0e]
-; CHECK: sadalp.4h	v0, v0          ; encoding: [0x00,0x68,0x20,0x0e]
-; CHECK: saddlp.4h	v0, v0          ; encoding: [0x00,0x28,0x20,0x0e]
-; CHECK: scvtf.2s	v0, v0          ; encoding: [0x00,0xd8,0x21,0x0e]
-; CHECK: sqabs.8b	v0, v0          ; encoding: [0x00,0x78,0x20,0x0e]
-; CHECK: sqneg.8b	v0, v0          ; encoding: [0x00,0x78,0x20,0x2e]
-; CHECK: sqxtn.8b	v0, v0          ; encoding: [0x00,0x48,0x21,0x0e]
-; CHECK: sqxtun.8b	v0, v0          ; encoding: [0x00,0x28,0x21,0x2e]
-; CHECK: suqadd.8b	v0, v0          ; encoding: [0x00,0x38,0x20,0x0e]
-; CHECK: uadalp.4h	v0, v0          ; encoding: [0x00,0x68,0x20,0x2e]
-; CHECK: uaddlp.4h	v0, v0          ; encoding: [0x00,0x28,0x20,0x2e]
-; CHECK: ucvtf.2s	v0, v0          ; encoding: [0x00,0xd8,0x21,0x2e]
-; CHECK: uqxtn.8b	v0, v0          ; encoding: [0x00,0x48,0x21,0x2e]
-; CHECK: urecpe.2s	v0, v0          ; encoding: [0x00,0xc8,0xa1,0x0e]
-; CHECK: ursqrte.2s	v0, v0          ; encoding: [0x00,0xc8,0xa1,0x2e]
-; CHECK: usqadd.8b	v0, v0          ; encoding: [0x00,0x38,0x20,0x2e]
-; CHECK: xtn.8b	v0, v0                  ; encoding: [0x00,0x28,0x21,0x0e]
-; CHECK: shll.8h	v1, v2, #8      ; encoding: [0x41,0x38,0x21,0x2e]
-; CHECK: shll.4s	v3, v4, #16     ; encoding: [0x83,0x38,0x61,0x2e]
-; CHECK: shll.2d	v5, v6, #32     ; encoding: [0xc5,0x38,0xa1,0x2e]
-; CHECK: shll2.8h	v7, v8, #8      ; encoding: [0x07,0x39,0x21,0x6e]
-; CHECK: shll2.4s	v9, v10, #16    ; encoding: [0x49,0x39,0x61,0x6e]
-; CHECK: shll2.2d	v11, v12, #32   ; encoding: [0x8b,0x39,0xa1,0x6e]
-; CHECK: shll.8h	v1, v2, #8      ; encoding: [0x41,0x38,0x21,0x2e]
-; CHECK: shll.4s	v1, v2, #16     ; encoding: [0x41,0x38,0x61,0x2e]
-; CHECK: shll.2d	v1, v2, #32     ; encoding: [0x41,0x38,0xa1,0x2e]
-; CHECK: shll2.8h	v1, v2, #8      ; encoding: [0x41,0x38,0x21,0x6e]
-; CHECK: shll2.4s	v1, v2, #16     ; encoding: [0x41,0x38,0x61,0x6e]
-; CHECK: shll2.2d	v1, v2, #32     ; encoding: [0x41,0x38,0xa1,0x6e]
-
-
-  cmeq.8b   v0, v0, #0
-  cmeq.16b  v0, v0, #0
-  cmeq.4h   v0, v0, #0
-  cmeq.8h   v0, v0, #0
-  cmeq.2s   v0, v0, #0
-  cmeq.4s   v0, v0, #0
-  cmeq.2d   v0, v0, #0
-
-; CHECK: cmeq.8b	v0, v0, #0              ; encoding: [0x00,0x98,0x20,0x0e]
-; CHECK: cmeq.16b	v0, v0, #0      ; encoding: [0x00,0x98,0x20,0x4e]
-; CHECK: cmeq.4h	v0, v0, #0              ; encoding: [0x00,0x98,0x60,0x0e]
-; CHECK: cmeq.8h	v0, v0, #0              ; encoding: [0x00,0x98,0x60,0x4e]
-; CHECK: cmeq.2s	v0, v0, #0              ; encoding: [0x00,0x98,0xa0,0x0e]
-; CHECK: cmeq.4s	v0, v0, #0              ; encoding: [0x00,0x98,0xa0,0x4e]
-; CHECK: cmeq.2d	v0, v0, #0              ; encoding: [0x00,0x98,0xe0,0x4e]
-
-  cmge.8b   v0, v0, #0
-  cmgt.8b   v0, v0, #0
-  cmle.8b   v0, v0, #0
-  cmlt.8b   v0, v0, #0
-  fcmeq.2s  v0, v0, #0
-  fcmge.2s  v0, v0, #0
-  fcmgt.2s  v0, v0, #0
-  fcmle.2s  v0, v0, #0
-  fcmlt.2s  v0, v0, #0
-
-; ARM verbose mode aliases
-  cmlt v8.8b, v14.8b, #0
-  cmlt v8.16b, v14.16b, #0
-  cmlt v8.4h, v14.4h, #0
-  cmlt v8.8h, v14.8h, #0
-  cmlt v8.2s, v14.2s, #0
-  cmlt v8.4s, v14.4s, #0
-  cmlt v8.2d, v14.2d, #0
-
-; CHECK: cmge.8b	v0, v0, #0              ; encoding: [0x00,0x88,0x20,0x2e]
-; CHECK: cmgt.8b	v0, v0, #0              ; encoding: [0x00,0x88,0x20,0x0e]
-; CHECK: cmle.8b	v0, v0, #0              ; encoding: [0x00,0x98,0x20,0x2e]
-; CHECK: cmlt.8b	v0, v0, #0              ; encoding: [0x00,0xa8,0x20,0x0e]
-; CHECK: fcmeq.2s	v0, v0, #0      ; encoding: [0x00,0xd8,0xa0,0x0e]
-; CHECK: fcmge.2s	v0, v0, #0      ; encoding: [0x00,0xc8,0xa0,0x2e]
-; CHECK: fcmgt.2s	v0, v0, #0      ; encoding: [0x00,0xc8,0xa0,0x0e]
-; CHECK: fcmle.2s	v0, v0, #0      ; encoding: [0x00,0xd8,0xa0,0x2e]
-; CHECK: fcmlt.2s	v0, v0, #0      ; encoding: [0x00,0xe8,0xa0,0x0e]
-; CHECK: cmlt.8b	v8, v14, #0             ; encoding: [0xc8,0xa9,0x20,0x0e]
-; CHECK: cmlt.16b	v8, v14, #0     ; encoding: [0xc8,0xa9,0x20,0x4e]
-; CHECK: cmlt.4h	v8, v14, #0             ; encoding: [0xc8,0xa9,0x60,0x0e]
-; CHECK: cmlt.8h	v8, v14, #0             ; encoding: [0xc8,0xa9,0x60,0x4e]
-; CHECK: cmlt.2s	v8, v14, #0             ; encoding: [0xc8,0xa9,0xa0,0x0e]
-; CHECK: cmlt.4s	v8, v14, #0             ; encoding: [0xc8,0xa9,0xa0,0x4e]
-; CHECK: cmlt.2d	v8, v14, #0             ; encoding: [0xc8,0xa9,0xe0,0x4e]
-
-
-;===-------------------------------------------------------------------------===
-; AdvSIMD Floating-point <-> Integer Conversions
-;===-------------------------------------------------------------------------===
-
-  fcvtas.2s   v0, v0
-  fcvtas.4s   v0, v0
-  fcvtas.2d   v0, v0
-  fcvtas      s0, s0
-  fcvtas      d0, d0
-
-; CHECK: fcvtas.2s  v0, v0           ; encoding: [0x00,0xc8,0x21,0x0e]
-; CHECK: fcvtas.4s  v0, v0           ; encoding: [0x00,0xc8,0x21,0x4e]
-; CHECK: fcvtas.2d  v0, v0           ; encoding: [0x00,0xc8,0x61,0x4e]
-; CHECK: fcvtas     s0, s0           ; encoding: [0x00,0xc8,0x21,0x5e]
-; CHECK: fcvtas     d0, d0           ; encoding: [0x00,0xc8,0x61,0x5e]
-
-  fcvtau.2s   v0, v0
-  fcvtau.4s   v0, v0
-  fcvtau.2d   v0, v0
-  fcvtau      s0, s0
-  fcvtau      d0, d0
-
-; CHECK: fcvtau.2s  v0, v0           ; encoding: [0x00,0xc8,0x21,0x2e]
-; CHECK: fcvtau.4s  v0, v0           ; encoding: [0x00,0xc8,0x21,0x6e]
-; CHECK: fcvtau.2d  v0, v0           ; encoding: [0x00,0xc8,0x61,0x6e]
-; CHECK: fcvtau     s0, s0           ; encoding: [0x00,0xc8,0x21,0x7e]
-; CHECK: fcvtau     d0, d0           ; encoding: [0x00,0xc8,0x61,0x7e]
-
-  fcvtl   v1.4s, v5.4h
-  fcvtl   v2.2d, v6.2s
-  fcvtl2  v3.4s, v7.8h
-  fcvtl2  v4.2d, v8.4s
-
-; CHECK: fcvtl	v1.4s, v5.4h            ; encoding: [0xa1,0x78,0x21,0x0e]
-; CHECK: fcvtl	v2.2d, v6.2s            ; encoding: [0xc2,0x78,0x61,0x0e]
-; CHECK: fcvtl2	v3.4s, v7.8h            ; encoding: [0xe3,0x78,0x21,0x4e]
-; CHECK: fcvtl2	v4.2d, v8.4s            ; encoding: [0x04,0x79,0x61,0x4e]
-
-  fcvtms.2s  v0, v0
-  fcvtms.4s  v0, v0
-  fcvtms.2d  v0, v0
-  fcvtms     s0, s0
-  fcvtms     d0, d0
-
-; CHECK: fcvtms.2s v0, v0            ; encoding: [0x00,0xb8,0x21,0x0e]
-; CHECK: fcvtms.4s v0, v0            ; encoding: [0x00,0xb8,0x21,0x4e]
-; CHECK: fcvtms.2d v0, v0            ; encoding: [0x00,0xb8,0x61,0x4e]
-; CHECK: fcvtms    s0, s0            ; encoding: [0x00,0xb8,0x21,0x5e]
-; CHECK: fcvtms    d0, d0            ; encoding: [0x00,0xb8,0x61,0x5e]
-
-  fcvtmu.2s   v0, v0
-  fcvtmu.4s   v0, v0
-  fcvtmu.2d   v0, v0
-  fcvtmu      s0, s0
-  fcvtmu      d0, d0
-
-; CHECK: fcvtmu.2s v0, v0            ; encoding: [0x00,0xb8,0x21,0x2e]
-; CHECK: fcvtmu.4s v0, v0            ; encoding: [0x00,0xb8,0x21,0x6e]
-; CHECK: fcvtmu.2d v0, v0            ; encoding: [0x00,0xb8,0x61,0x6e]
-; CHECK: fcvtmu    s0, s0            ; encoding: [0x00,0xb8,0x21,0x7e]
-; CHECK: fcvtmu    d0, d0            ; encoding: [0x00,0xb8,0x61,0x7e]
-
-  fcvtns.2s   v0, v0
-  fcvtns.4s   v0, v0
-  fcvtns.2d   v0, v0
-  fcvtns      s0, s0
-  fcvtns      d0, d0
-
-; CHECK: fcvtns.2s v0, v0            ; encoding: [0x00,0xa8,0x21,0x0e]
-; CHECK: fcvtns.4s v0, v0            ; encoding: [0x00,0xa8,0x21,0x4e]
-; CHECK: fcvtns.2d v0, v0            ; encoding: [0x00,0xa8,0x61,0x4e]
-; CHECK: fcvtns    s0, s0            ; encoding: [0x00,0xa8,0x21,0x5e]
-; CHECK: fcvtns    d0, d0            ; encoding: [0x00,0xa8,0x61,0x5e]
-
-  fcvtnu.2s   v0, v0
-  fcvtnu.4s   v0, v0
-  fcvtnu.2d   v0, v0
-  fcvtnu      s0, s0
-  fcvtnu      d0, d0
-
-; CHECK: fcvtnu.2s v0, v0            ; encoding: [0x00,0xa8,0x21,0x2e]
-; CHECK: fcvtnu.4s v0, v0            ; encoding: [0x00,0xa8,0x21,0x6e]
-; CHECK: fcvtnu.2d v0, v0            ; encoding: [0x00,0xa8,0x61,0x6e]
-; CHECK: fcvtnu    s0, s0            ; encoding: [0x00,0xa8,0x21,0x7e]
-; CHECK: fcvtnu    d0, d0            ; encoding: [0x00,0xa8,0x61,0x7e]
-
-  fcvtn   v2.4h, v4.4s
-  fcvtn   v3.2s, v5.2d
-  fcvtn2  v4.8h, v6.4s
-  fcvtn2  v5.4s, v7.2d
-  fcvtxn  v6.2s, v9.2d
-  fcvtxn2 v7.4s, v8.2d
-
-; CHECK: fcvtn	v2.4h, v4.4s            ; encoding: [0x82,0x68,0x21,0x0e]
-; CHECK: fcvtn	v3.2s, v5.2d            ; encoding: [0xa3,0x68,0x61,0x0e]
-; CHECK: fcvtn2	v4.8h, v6.4s            ; encoding: [0xc4,0x68,0x21,0x4e]
-; CHECK: fcvtn2	v5.4s, v7.2d            ; encoding: [0xe5,0x68,0x61,0x4e]
-; CHECK: fcvtxn	v6.2s, v9.2d            ; encoding: [0x26,0x69,0x61,0x2e]
-; CHECK: fcvtxn2 v7.4s, v8.2d           ; encoding: [0x07,0x69,0x61,0x6e]
-
-  fcvtps.2s  v0, v0
-  fcvtps.4s  v0, v0
-  fcvtps.2d  v0, v0
-  fcvtps     s0, s0
-  fcvtps     d0, d0
-
-; CHECK: fcvtps.2s v0, v0            ; encoding: [0x00,0xa8,0xa1,0x0e]
-; CHECK: fcvtps.4s v0, v0            ; encoding: [0x00,0xa8,0xa1,0x4e]
-; CHECK: fcvtps.2d v0, v0            ; encoding: [0x00,0xa8,0xe1,0x4e]
-; CHECK: fcvtps    s0, s0            ; encoding: [0x00,0xa8,0xa1,0x5e]
-; CHECK: fcvtps    d0, d0            ; encoding: [0x00,0xa8,0xe1,0x5e]
-
-  fcvtpu.2s  v0, v0
-  fcvtpu.4s  v0, v0
-  fcvtpu.2d  v0, v0
-  fcvtpu     s0, s0
-  fcvtpu     d0, d0
-
-; CHECK: fcvtpu.2s v0, v0            ; encoding: [0x00,0xa8,0xa1,0x2e]
-; CHECK: fcvtpu.4s v0, v0            ; encoding: [0x00,0xa8,0xa1,0x6e]
-; CHECK: fcvtpu.2d v0, v0            ; encoding: [0x00,0xa8,0xe1,0x6e]
-; CHECK: fcvtpu    s0, s0            ; encoding: [0x00,0xa8,0xa1,0x7e]
-; CHECK: fcvtpu    d0, d0            ; encoding: [0x00,0xa8,0xe1,0x7e]
-
-  fcvtzs.2s  v0, v0
-  fcvtzs.4s  v0, v0
-  fcvtzs.2d  v0, v0
-  fcvtzs     s0, s0
-  fcvtzs     d0, d0
-
-; CHECK: fcvtzs.2s v0, v0            ; encoding: [0x00,0xb8,0xa1,0x0e]
-; CHECK: fcvtzs.4s v0, v0            ; encoding: [0x00,0xb8,0xa1,0x4e]
-; CHECK: fcvtzs.2d v0, v0            ; encoding: [0x00,0xb8,0xe1,0x4e]
-; CHECK: fcvtzs    s0, s0            ; encoding: [0x00,0xb8,0xa1,0x5e]
-; CHECK: fcvtzs    d0, d0            ; encoding: [0x00,0xb8,0xe1,0x5e]
-
-  fcvtzu.2s  v0, v0
-  fcvtzu.4s  v0, v0
-  fcvtzu.2d  v0, v0
-  fcvtzu     s0, s0
-  fcvtzu     d0, d0
-
-; CHECK: fcvtzu.2s v0, v0            ; encoding: [0x00,0xb8,0xa1,0x2e]
-; CHECK: fcvtzu.4s v0, v0            ; encoding: [0x00,0xb8,0xa1,0x6e]
-; CHECK: fcvtzu.2d v0, v0            ; encoding: [0x00,0xb8,0xe1,0x6e]
-; CHECK: fcvtzu    s0, s0            ; encoding: [0x00,0xb8,0xa1,0x7e]
-; CHECK: fcvtzu    d0, d0            ; encoding: [0x00,0xb8,0xe1,0x7e]
-
-;===-------------------------------------------------------------------------===
-; AdvSIMD modified immediate instructions
-;===-------------------------------------------------------------------------===
-
-  bic.2s  v0, #1
-  bic.2s  v0, #1, lsl #0
-  bic.2s  v0, #1, lsl #8
-  bic.2s  v0, #1, lsl #16
-  bic.2s  v0, #1, lsl #24
-
-; CHECK: bic.2s v0, #1               ; encoding: [0x20,0x14,0x00,0x2f]
-; CHECK: bic.2s v0, #1               ; encoding: [0x20,0x14,0x00,0x2f]
-; CHECK: bic.2s v0, #1, lsl #8       ; encoding: [0x20,0x34,0x00,0x2f]
-; CHECK: bic.2s v0, #1, lsl #16      ; encoding: [0x20,0x54,0x00,0x2f]
-; CHECK: bic.2s v0, #1, lsl #24      ; encoding: [0x20,0x74,0x00,0x2f]
-
-  bic.4h  v0, #1
-  bic.4h  v0, #1, lsl #0
-  bic.4h  v0, #1, lsl #8
-
-; CHECK: bic.4h v0, #1               ; encoding: [0x20,0x94,0x00,0x2f]
-; CHECK: bic.4h v0, #1               ; encoding: [0x20,0x94,0x00,0x2f]
-; CHECK: bic.4h v0, #1, lsl #8       ; encoding: [0x20,0xb4,0x00,0x2f]
-
-  bic.4s  v0, #1
-  bic.4s  v0, #1, lsl #0
-  bic.4s  v0, #1, lsl #8
-  bic.4s  v0, #1, lsl #16
-  bic.4s  v0, #1, lsl #24
-
-; CHECK: bic.4s v0, #1               ; encoding: [0x20,0x14,0x00,0x6f]
-; CHECK: bic.4s v0, #1               ; encoding: [0x20,0x14,0x00,0x6f]
-; CHECK: bic.4s v0, #1, lsl #8       ; encoding: [0x20,0x34,0x00,0x6f]
-; CHECK: bic.4s v0, #1, lsl #16      ; encoding: [0x20,0x54,0x00,0x6f]
-; CHECK: bic.4s v0, #1, lsl #24      ; encoding: [0x20,0x74,0x00,0x6f]
-
-  bic.8h  v0, #1
-  bic.8h  v0, #1, lsl #0
-  bic.8h  v0, #1, lsl #8
-
-; CHECK: bic.8h v0, #1               ; encoding: [0x20,0x94,0x00,0x6f]
-; CHECK: bic.8h v0, #1               ; encoding: [0x20,0x94,0x00,0x6f]
-; CHECK: bic.8h v0, #1, lsl #8       ; encoding: [0x20,0xb4,0x00,0x6f]
-
-  fmov.2d v0, #1.250000e-01
-
-; CHECK: fmov.2d v0, #1.250000e-01             ; encoding: [0x00,0xf4,0x02,0x6f]
-
-  fmov.2s v0, #1.250000e-01
-  fmov.4s v0, #1.250000e-01
-
-; CHECK: fmov.2s v0, #1.250000e-01             ; encoding: [0x00,0xf4,0x02,0x0f]
-; CHECK: fmov.4s v0, #1.250000e-01             ; encoding: [0x00,0xf4,0x02,0x4f]
-
-  orr.2s  v0, #1
-  orr.2s  v0, #1, lsl #0
-  orr.2s  v0, #1, lsl #8
-  orr.2s  v0, #1, lsl #16
-  orr.2s  v0, #1, lsl #24
-
-; CHECK: orr.2s v0, #1               ; encoding: [0x20,0x14,0x00,0x0f]
-; CHECK: orr.2s v0, #1               ; encoding: [0x20,0x14,0x00,0x0f]
-; CHECK: orr.2s v0, #1, lsl #8       ; encoding: [0x20,0x34,0x00,0x0f]
-; CHECK: orr.2s v0, #1, lsl #16      ; encoding: [0x20,0x54,0x00,0x0f]
-; CHECK: orr.2s v0, #1, lsl #24      ; encoding: [0x20,0x74,0x00,0x0f]
-
-  orr.4h  v0, #1
-  orr.4h  v0, #1, lsl #0
-  orr.4h  v0, #1, lsl #8
-
-; CHECK: orr.4h v0, #1               ; encoding: [0x20,0x94,0x00,0x0f]
-; CHECK: orr.4h v0, #1               ; encoding: [0x20,0x94,0x00,0x0f]
-; CHECK: orr.4h v0, #1, lsl #8       ; encoding: [0x20,0xb4,0x00,0x0f]
-
-  orr.4s  v0, #1
-  orr.4s  v0, #1, lsl #0
-  orr.4s  v0, #1, lsl #8
-  orr.4s  v0, #1, lsl #16
-  orr.4s  v0, #1, lsl #24
-
-; CHECK: orr.4s v0, #1               ; encoding: [0x20,0x14,0x00,0x4f]
-; CHECK: orr.4s v0, #1               ; encoding: [0x20,0x14,0x00,0x4f]
-; CHECK: orr.4s v0, #1, lsl #8       ; encoding: [0x20,0x34,0x00,0x4f]
-; CHECK: orr.4s v0, #1, lsl #16      ; encoding: [0x20,0x54,0x00,0x4f]
-; CHECK: orr.4s v0, #1, lsl #24      ; encoding: [0x20,0x74,0x00,0x4f]
-
-  orr.8h  v0, #1
-  orr.8h  v0, #1, lsl #0
-  orr.8h  v0, #1, lsl #8
-
-; CHECK: orr.8h v0, #1               ; encoding: [0x20,0x94,0x00,0x4f]
-; CHECK: orr.8h v0, #1               ; encoding: [0x20,0x94,0x00,0x4f]
-; CHECK: orr.8h v0, #1, lsl #8       ; encoding: [0x20,0xb4,0x00,0x4f]
-
-  movi     d0, #0x000000000000ff
-  movi.2d  v0, #0x000000000000ff
-
-; CHECK: movi     d0, #0x000000000000ff ; encoding: [0x20,0xe4,0x00,0x2f]
-; CHECK: movi.2d  v0, #0x000000000000ff ; encoding: [0x20,0xe4,0x00,0x6f]
-
-  movi.2s v0, #1
-  movi.2s v0, #1, lsl #0
-  movi.2s v0, #1, lsl #8
-  movi.2s v0, #1, lsl #16
-  movi.2s v0, #1, lsl #24
-
-; CHECK: movi.2s v0, #1              ; encoding: [0x20,0x04,0x00,0x0f]
-; CHECK: movi.2s v0, #1              ; encoding: [0x20,0x04,0x00,0x0f]
-; CHECK: movi.2s v0, #1, lsl #8      ; encoding: [0x20,0x24,0x00,0x0f]
-; CHECK: movi.2s v0, #1, lsl #16     ; encoding: [0x20,0x44,0x00,0x0f]
-; CHECK: movi.2s v0, #1, lsl #24     ; encoding: [0x20,0x64,0x00,0x0f]
-
-  movi.4s v0, #1
-  movi.4s v0, #1, lsl #0
-  movi.4s v0, #1, lsl #8
-  movi.4s v0, #1, lsl #16
-  movi.4s v0, #1, lsl #24
-
-; CHECK: movi.4s v0, #1              ; encoding: [0x20,0x04,0x00,0x4f]
-; CHECK: movi.4s v0, #1              ; encoding: [0x20,0x04,0x00,0x4f]
-; CHECK: movi.4s v0, #1, lsl #8      ; encoding: [0x20,0x24,0x00,0x4f]
-; CHECK: movi.4s v0, #1, lsl #16     ; encoding: [0x20,0x44,0x00,0x4f]
-; CHECK: movi.4s v0, #1, lsl #24     ; encoding: [0x20,0x64,0x00,0x4f]
-
-  movi.4h v0, #1
-  movi.4h v0, #1, lsl #0
-  movi.4h v0, #1, lsl #8
-
-; CHECK: movi.4h v0, #1              ; encoding: [0x20,0x84,0x00,0x0f]
-; CHECK: movi.4h v0, #1              ; encoding: [0x20,0x84,0x00,0x0f]
-; CHECK: movi.4h v0, #1, lsl #8      ; encoding: [0x20,0xa4,0x00,0x0f]
-
-  movi.8h v0, #1
-  movi.8h v0, #1, lsl #0
-  movi.8h v0, #1, lsl #8
-
-; CHECK: movi.8h v0, #1              ; encoding: [0x20,0x84,0x00,0x4f]
-; CHECK: movi.8h v0, #1              ; encoding: [0x20,0x84,0x00,0x4f]
-; CHECK: movi.8h v0, #1, lsl #8      ; encoding: [0x20,0xa4,0x00,0x4f]
-
-  movi.2s v0, #1, msl #8
-  movi.2s v0, #1, msl #16
-  movi.4s v0, #1, msl #8
-  movi.4s v0, #1, msl #16
-
-; CHECK: movi.2s v0, #1, msl #8      ; encoding: [0x20,0xc4,0x00,0x0f]
-; CHECK: movi.2s v0, #1, msl #16     ; encoding: [0x20,0xd4,0x00,0x0f]
-; CHECK: movi.4s v0, #1, msl #8      ; encoding: [0x20,0xc4,0x00,0x4f]
-; CHECK: movi.4s v0, #1, msl #16     ; encoding: [0x20,0xd4,0x00,0x4f]
-
-  movi.8b  v0, #1
-  movi.16b v0, #1
-
-; CHECK: movi.8b  v0, #1             ; encoding: [0x20,0xe4,0x00,0x0f]
-; CHECK: movi.16b v0, #1             ; encoding: [0x20,0xe4,0x00,0x4f]
-
-  mvni.2s v0, #1
-  mvni.2s v0, #1, lsl #0
-  mvni.2s v0, #1, lsl #8
-  mvni.2s v0, #1, lsl #16
-  mvni.2s v0, #1, lsl #24
-
-; CHECK: mvni.2s v0, #1              ; encoding: [0x20,0x04,0x00,0x2f]
-; CHECK: mvni.2s v0, #1              ; encoding: [0x20,0x04,0x00,0x2f]
-; CHECK: mvni.2s v0, #1, lsl #8      ; encoding: [0x20,0x24,0x00,0x2f]
-; CHECK: mvni.2s v0, #1, lsl #16     ; encoding: [0x20,0x44,0x00,0x2f]
-; CHECK: mvni.2s v0, #1, lsl #24     ; encoding: [0x20,0x64,0x00,0x2f]
-
-  mvni.4s v0, #1
-  mvni.4s v0, #1, lsl #0
-  mvni.4s v0, #1, lsl #8
-  mvni.4s v0, #1, lsl #16
-  mvni.4s v0, #1, lsl #24
-
-; CHECK: mvni.4s v0, #1              ; encoding: [0x20,0x04,0x00,0x6f]
-; CHECK: mvni.4s v0, #1              ; encoding: [0x20,0x04,0x00,0x6f]
-; CHECK: mvni.4s v0, #1, lsl #8      ; encoding: [0x20,0x24,0x00,0x6f]
-; CHECK: mvni.4s v0, #1, lsl #16     ; encoding: [0x20,0x44,0x00,0x6f]
-; CHECK: mvni.4s v0, #1, lsl #24     ; encoding: [0x20,0x64,0x00,0x6f]
-
-  mvni.4h v0, #1
-  mvni.4h v0, #1, lsl #0
-  mvni.4h v0, #1, lsl #8
-
-; CHECK: mvni.4h v0, #1              ; encoding: [0x20,0x84,0x00,0x2f]
-; CHECK: mvni.4h v0, #1              ; encoding: [0x20,0x84,0x00,0x2f]
-; CHECK: mvni.4h v0, #1, lsl #8      ; encoding: [0x20,0xa4,0x00,0x2f]
-
-  mvni.8h v0, #1
-  mvni.8h v0, #1, lsl #0
-  mvni.8h v0, #1, lsl #8
-
-; CHECK: mvni.8h v0, #1              ; encoding: [0x20,0x84,0x00,0x6f]
-; CHECK: mvni.8h v0, #1              ; encoding: [0x20,0x84,0x00,0x6f]
-; CHECK: mvni.8h v0, #1, lsl #8      ; encoding: [0x20,0xa4,0x00,0x6f]
-
-  mvni.2s v0, #1, msl #8
-  mvni.2s v0, #1, msl #16
-  mvni.4s v0, #1, msl #8
-  mvni.4s v0, #1, msl #16
-
-; CHECK: mvni.2s v0, #1, msl #8      ; encoding: [0x20,0xc4,0x00,0x2f]
-; CHECK: mvni.2s v0, #1, msl #16     ; encoding: [0x20,0xd4,0x00,0x2f]
-; CHECK: mvni.4s v0, #1, msl #8      ; encoding: [0x20,0xc4,0x00,0x6f]
-; CHECK: mvni.4s v0, #1, msl #16     ; encoding: [0x20,0xd4,0x00,0x6f]
-
-;===-------------------------------------------------------------------------===
-; AdvSIMD scalar x index
-;===-------------------------------------------------------------------------===
-
-  fmla.s  s0, s0, v0[3]
-  fmla.d  d0, d0, v0[1]
-  fmls.s  s0, s0, v0[3]
-  fmls.d  d0, d0, v0[1]
-  fmulx.s s0, s0, v0[3]
-  fmulx.d d0, d0, v0[1]
-  fmul.s  s0, s0, v0[3]
-  fmul.d  d0, d0, v0[1]
-  sqdmlal.h s0, h0, v0[7]
-  sqdmlal.s d0, s0, v0[3]
-  sqdmlsl.h s0, h0, v0[7]
-  sqdmulh.h h0, h0, v0[7]
-  sqdmulh.s s0, s0, v0[3]
-  sqdmull.h s0, h0, v0[7]
-  sqdmull.s d0, s0, v0[3]
-  sqrdmulh.h  h0, h0, v0[7]
-  sqrdmulh.s  s0, s0, v0[3]
-
-; CHECK: fmla.s	s0, s0, v0[3]           ; encoding: [0x00,0x18,0xa0,0x5f]
-; CHECK: fmla.d	d0, d0, v0[1]           ; encoding: [0x00,0x18,0xc0,0x5f]
-; CHECK: fmls.s	s0, s0, v0[3]           ; encoding: [0x00,0x58,0xa0,0x5f]
-; CHECK: fmls.d	d0, d0, v0[1]           ; encoding: [0x00,0x58,0xc0,0x5f]
-; CHECK: fmulx.s	s0, s0, v0[3]           ; encoding: [0x00,0x98,0xa0,0x7f]
-; CHECK: fmulx.d	d0, d0, v0[1]           ; encoding: [0x00,0x98,0xc0,0x7f]
-; CHECK: fmul.s	s0, s0, v0[3]           ; encoding: [0x00,0x98,0xa0,0x5f]
-; CHECK: fmul.d	d0, d0, v0[1]           ; encoding: [0x00,0x98,0xc0,0x5f]
-; CHECK: sqdmlal.h	s0, h0, v0[7]   ; encoding: [0x00,0x38,0x70,0x5f]
-; CHECK: sqdmlal.s	d0, s0, v0[3]   ; encoding: [0x00,0x38,0xa0,0x5f]
-; CHECK: sqdmlsl.h	s0, h0, v0[7]   ; encoding: [0x00,0x78,0x70,0x5f]
-; CHECK: sqdmulh.h	h0, h0, v0[7]   ; encoding: [0x00,0xc8,0x70,0x5f]
-; CHECK: sqdmulh.s	s0, s0, v0[3]   ; encoding: [0x00,0xc8,0xa0,0x5f]
-; CHECK: sqdmull.h	s0, h0, v0[7]   ; encoding: [0x00,0xb8,0x70,0x5f]
-; CHECK: sqdmull.s	d0, s0, v0[3]   ; encoding: [0x00,0xb8,0xa0,0x5f]
-; CHECK: sqrdmulh.h	h0, h0, v0[7]   ; encoding: [0x00,0xd8,0x70,0x5f]
-; CHECK: sqrdmulh.s	s0, s0, v0[3]   ; encoding: [0x00,0xd8,0xa0,0x5f]
-
-;===-------------------------------------------------------------------------===
-; AdvSIMD SMLAL
-;===-------------------------------------------------------------------------===
-        smlal.8h v1, v2, v3
-        smlal.4s v1, v2, v3
-        smlal.2d v1, v2, v3
-        smlal2.8h v1, v2, v3
-        smlal2.4s v1, v2, v3
-        smlal2.2d v1, v2, v3
-
-        smlal v13.8h, v8.8b, v0.8b
-        smlal v13.4s, v8.4h, v0.4h
-        smlal v13.2d, v8.2s, v0.2s
-        smlal2 v13.8h, v8.16b, v0.16b
-        smlal2 v13.4s, v8.8h, v0.8h
-        smlal2 v13.2d, v8.4s, v0.4s
-
-; CHECK: smlal.8h	v1, v2, v3      ; encoding: [0x41,0x80,0x23,0x0e]
-; CHECK: smlal.4s	v1, v2, v3      ; encoding: [0x41,0x80,0x63,0x0e]
-; CHECK: smlal.2d	v1, v2, v3      ; encoding: [0x41,0x80,0xa3,0x0e]
-; CHECK: smlal2.8h	v1, v2, v3      ; encoding: [0x41,0x80,0x23,0x4e]
-; CHECK: smlal2.4s	v1, v2, v3      ; encoding: [0x41,0x80,0x63,0x4e]
-; CHECK: smlal2.2d	v1, v2, v3      ; encoding: [0x41,0x80,0xa3,0x4e]
-; CHECK: smlal.8h	v13, v8, v0     ; encoding: [0x0d,0x81,0x20,0x0e]
-; CHECK: smlal.4s	v13, v8, v0     ; encoding: [0x0d,0x81,0x60,0x0e]
-; CHECK: smlal.2d	v13, v8, v0     ; encoding: [0x0d,0x81,0xa0,0x0e]
-; CHECK: smlal2.8h	v13, v8, v0     ; encoding: [0x0d,0x81,0x20,0x4e]
-; CHECK: smlal2.4s	v13, v8, v0     ; encoding: [0x0d,0x81,0x60,0x4e]
-; CHECK: smlal2.2d	v13, v8, v0     ; encoding: [0x0d,0x81,0xa0,0x4e]
-
-
-;===-------------------------------------------------------------------------===
-; AdvSIMD scalar x index
-;===-------------------------------------------------------------------------===
-
-  fmla.2s v0, v0, v0[0]
-  fmla.4s v0, v0, v0[1]
-  fmla.2d v0, v0, v0[1]
-  fmls.2s v0, v0, v0[0]
-  fmls.4s v0, v0, v0[1]
-  fmls.2d v0, v0, v0[1]
-  fmulx.2s  v0, v0, v0[0]
-  fmulx.4s  v0, v0, v0[1]
-  fmulx.2d  v0, v0, v0[1]
-  fmul.2s v0, v0, v0[0]
-  fmul.4s v0, v0, v0[1]
-  fmul.2d v0, v0, v0[1]
-  mla.4h  v0, v0, v0[0]
-  mla.8h  v0, v0, v0[1]
-  mla.2s  v0, v0, v0[2]
-  mla.4s  v0, v0, v0[3]
-  mls.4h  v0, v0, v0[0]
-  mls.8h  v0, v0, v0[1]
-  mls.2s  v0, v0, v0[2]
-  mls.4s  v0, v0, v0[3]
-  mul.4h  v0, v0, v0[0]
-  mul.8h  v0, v0, v0[1]
-  mul.2s  v0, v0, v0[2]
-  mul.4s  v0, v0, v0[3]
-  smlal.4s  v0, v0, v0[0]
-  smlal2.4s v0, v0, v0[1]
-  smlal.2d  v0, v0, v0[2]
-  smlal2.2d v0, v0, v0[3]
-  smlsl.4s  v0, v0, v0[0]
-  smlsl2.4s v0, v0, v0[1]
-  smlsl.2d  v0, v0, v0[2]
-  smlsl2.2d v0, v0, v0[3]
-  smull.4s  v0, v0, v0[0]
-  smull2.4s v0, v0, v0[1]
-  smull.2d  v0, v0, v0[2]
-  smull2.2d v0, v0, v0[3]
-  sqdmlal.4s  v0, v0, v0[0]
-  sqdmlal2.4s v0, v0, v0[1]
-  sqdmlal.2d  v0, v0, v0[2]
-  sqdmlal2.2d v0, v0, v0[3]
-  sqdmlsl.4s  v0, v0, v0[0]
-  sqdmlsl2.4s v0, v0, v0[1]
-  sqdmlsl.2d  v0, v0, v0[2]
-  sqdmlsl2.2d v0, v0, v0[3]
-  sqdmulh.4h  v0, v0, v0[0]
-  sqdmulh.8h  v0, v0, v0[1]
-  sqdmulh.2s  v0, v0, v0[2]
-  sqdmulh.4s  v0, v0, v0[3]
-  sqdmull.4s  v0, v0, v0[0]
-  sqdmull2.4s v0, v0, v0[1]
-  sqdmull.2d  v0, v0, v0[2]
-  sqdmull2.2d v0, v0, v0[3]
-  sqrdmulh.4h v0, v0, v0[0]
-  sqrdmulh.8h v0, v0, v0[1]
-  sqrdmulh.2s v0, v0, v0[2]
-  sqrdmulh.4s v0, v0, v0[3]
-  umlal.4s  v0, v0, v0[0]
-  umlal2.4s v0, v0, v0[1]
-  umlal.2d  v0, v0, v0[2]
-  umlal2.2d v0, v0, v0[3]
-  umlsl.4s  v0, v0, v0[0]
-  umlsl2.4s v0, v0, v0[1]
-  umlsl.2d  v0, v0, v0[2]
-  umlsl2.2d v0, v0, v0[3]
-  umull.4s  v0, v0, v0[0]
-  umull2.4s v0, v0, v0[1]
-  umull.2d  v0, v0, v0[2]
-  umull2.2d v0, v0, v0[3]
-
-; CHECK: fmla.2s	v0, v0, v0[0]           ; encoding: [0x00,0x10,0x80,0x0f]
-; CHECK: fmla.4s	v0, v0, v0[1]           ; encoding: [0x00,0x10,0xa0,0x4f]
-; CHECK: fmla.2d	v0, v0, v0[1]           ; encoding: [0x00,0x18,0xc0,0x4f]
-; CHECK: fmls.2s	v0, v0, v0[0]           ; encoding: [0x00,0x50,0x80,0x0f]
-; CHECK: fmls.4s	v0, v0, v0[1]           ; encoding: [0x00,0x50,0xa0,0x4f]
-; CHECK: fmls.2d	v0, v0, v0[1]           ; encoding: [0x00,0x58,0xc0,0x4f]
-; CHECK: fmulx.2s	v0, v0, v0[0]   ; encoding: [0x00,0x90,0x80,0x2f]
-; CHECK: fmulx.4s	v0, v0, v0[1]   ; encoding: [0x00,0x90,0xa0,0x6f]
-; CHECK: fmulx.2d	v0, v0, v0[1]   ; encoding: [0x00,0x98,0xc0,0x6f]
-; CHECK: fmul.2s	v0, v0, v0[0]           ; encoding: [0x00,0x90,0x80,0x0f]
-; CHECK: fmul.4s	v0, v0, v0[1]           ; encoding: [0x00,0x90,0xa0,0x4f]
-; CHECK: fmul.2d	v0, v0, v0[1]           ; encoding: [0x00,0x98,0xc0,0x4f]
-; CHECK: mla.4h	v0, v0, v0[0]           ; encoding: [0x00,0x00,0x40,0x2f]
-; CHECK: mla.8h	v0, v0, v0[1]           ; encoding: [0x00,0x00,0x50,0x6f]
-; CHECK: mla.2s	v0, v0, v0[2]           ; encoding: [0x00,0x08,0x80,0x2f]
-; CHECK: mla.4s	v0, v0, v0[3]           ; encoding: [0x00,0x08,0xa0,0x6f]
-; CHECK: mls.4h	v0, v0, v0[0]           ; encoding: [0x00,0x40,0x40,0x2f]
-; CHECK: mls.8h	v0, v0, v0[1]           ; encoding: [0x00,0x40,0x50,0x6f]
-; CHECK: mls.2s	v0, v0, v0[2]           ; encoding: [0x00,0x48,0x80,0x2f]
-; CHECK: mls.4s	v0, v0, v0[3]           ; encoding: [0x00,0x48,0xa0,0x6f]
-; CHECK: mul.4h	v0, v0, v0[0]           ; encoding: [0x00,0x80,0x40,0x0f]
-; CHECK: mul.8h	v0, v0, v0[1]           ; encoding: [0x00,0x80,0x50,0x4f]
-; CHECK: mul.2s	v0, v0, v0[2]           ; encoding: [0x00,0x88,0x80,0x0f]
-; CHECK: mul.4s	v0, v0, v0[3]           ; encoding: [0x00,0x88,0xa0,0x4f]
-; CHECK: smlal.4s	v0, v0, v0[0]   ; encoding: [0x00,0x20,0x40,0x0f]
-; CHECK: smlal2.4s	v0, v0, v0[1]   ; encoding: [0x00,0x20,0x50,0x4f]
-; CHECK: smlal.2d	v0, v0, v0[2]   ; encoding: [0x00,0x28,0x80,0x0f]
-; CHECK: smlal2.2d	v0, v0, v0[3]   ; encoding: [0x00,0x28,0xa0,0x4f]
-; CHECK: smlsl.4s	v0, v0, v0[0]   ; encoding: [0x00,0x60,0x40,0x0f]
-; CHECK: smlsl2.4s	v0, v0, v0[1]   ; encoding: [0x00,0x60,0x50,0x4f]
-; CHECK: smlsl.2d	v0, v0, v0[2]   ; encoding: [0x00,0x68,0x80,0x0f]
-; CHECK: smlsl2.2d	v0, v0, v0[3]   ; encoding: [0x00,0x68,0xa0,0x4f]
-; CHECK: smull.4s	v0, v0, v0[0]   ; encoding: [0x00,0xa0,0x40,0x0f]
-; CHECK: smull2.4s	v0, v0, v0[1]   ; encoding: [0x00,0xa0,0x50,0x4f]
-; CHECK: smull.2d	v0, v0, v0[2]   ; encoding: [0x00,0xa8,0x80,0x0f]
-; CHECK: smull2.2d	v0, v0, v0[3]   ; encoding: [0x00,0xa8,0xa0,0x4f]
-; CHECK: sqdmlal.4s	v0, v0, v0[0]   ; encoding: [0x00,0x30,0x40,0x0f]
-; CHECK: sqdmlal2.4s	v0, v0, v0[1]   ; encoding: [0x00,0x30,0x50,0x4f]
-; CHECK: sqdmlal.2d	v0, v0, v0[2]   ; encoding: [0x00,0x38,0x80,0x0f]
-; CHECK: sqdmlal2.2d	v0, v0, v0[3]   ; encoding: [0x00,0x38,0xa0,0x4f]
-; CHECK: sqdmlsl.4s	v0, v0, v0[0]   ; encoding: [0x00,0x70,0x40,0x0f]
-; CHECK: sqdmlsl2.4s	v0, v0, v0[1]   ; encoding: [0x00,0x70,0x50,0x4f]
-; CHECK: sqdmlsl.2d	v0, v0, v0[2]   ; encoding: [0x00,0x78,0x80,0x0f]
-; CHECK: sqdmlsl2.2d	v0, v0, v0[3]   ; encoding: [0x00,0x78,0xa0,0x4f]
-; CHECK: sqdmulh.4h	v0, v0, v0[0]   ; encoding: [0x00,0xc0,0x40,0x0f]
-; CHECK: sqdmulh.8h	v0, v0, v0[1]   ; encoding: [0x00,0xc0,0x50,0x4f]
-; CHECK: sqdmulh.2s	v0, v0, v0[2]   ; encoding: [0x00,0xc8,0x80,0x0f]
-; CHECK: sqdmulh.4s	v0, v0, v0[3]   ; encoding: [0x00,0xc8,0xa0,0x4f]
-; CHECK: sqdmull.4s	v0, v0, v0[0]   ; encoding: [0x00,0xb0,0x40,0x0f]
-; CHECK: sqdmull2.4s	v0, v0, v0[1]   ; encoding: [0x00,0xb0,0x50,0x4f]
-; CHECK: sqdmull.2d	v0, v0, v0[2]   ; encoding: [0x00,0xb8,0x80,0x0f]
-; CHECK: sqdmull2.2d	v0, v0, v0[3]   ; encoding: [0x00,0xb8,0xa0,0x4f]
-; CHECK: sqrdmulh.4h	v0, v0, v0[0]   ; encoding: [0x00,0xd0,0x40,0x0f]
-; CHECK: sqrdmulh.8h	v0, v0, v0[1]   ; encoding: [0x00,0xd0,0x50,0x4f]
-; CHECK: sqrdmulh.2s	v0, v0, v0[2]   ; encoding: [0x00,0xd8,0x80,0x0f]
-; CHECK: sqrdmulh.4s	v0, v0, v0[3]   ; encoding: [0x00,0xd8,0xa0,0x4f]
-; CHECK: umlal.4s	v0, v0, v0[0]   ; encoding: [0x00,0x20,0x40,0x2f]
-; CHECK: umlal2.4s	v0, v0, v0[1]   ; encoding: [0x00,0x20,0x50,0x6f]
-; CHECK: umlal.2d	v0, v0, v0[2]   ; encoding: [0x00,0x28,0x80,0x2f]
-; CHECK: umlal2.2d	v0, v0, v0[3]   ; encoding: [0x00,0x28,0xa0,0x6f]
-; CHECK: umlsl.4s	v0, v0, v0[0]   ; encoding: [0x00,0x60,0x40,0x2f]
-; CHECK: umlsl2.4s	v0, v0, v0[1]   ; encoding: [0x00,0x60,0x50,0x6f]
-; CHECK: umlsl.2d	v0, v0, v0[2]   ; encoding: [0x00,0x68,0x80,0x2f]
-; CHECK: umlsl2.2d	v0, v0, v0[3]   ; encoding: [0x00,0x68,0xa0,0x6f]
-; CHECK: umull.4s	v0, v0, v0[0]   ; encoding: [0x00,0xa0,0x40,0x2f]
-; CHECK: umull2.4s	v0, v0, v0[1]   ; encoding: [0x00,0xa0,0x50,0x6f]
-; CHECK: umull.2d	v0, v0, v0[2]   ; encoding: [0x00,0xa8,0x80,0x2f]
-; CHECK: umull2.2d	v0, v0, v0[3]   ; encoding: [0x00,0xa8,0xa0,0x6f]
-
-
-;===-------------------------------------------------------------------------===
-; AdvSIMD scalar with shift
-;===-------------------------------------------------------------------------===
-
-  fcvtzs s0, s0, #1
-  fcvtzs d0, d0, #2
-  fcvtzu s0, s0, #1
-  fcvtzu d0, d0, #2
-  shl    d0, d0, #1
-  sli    d0, d0, #1
-  sqrshrn b0, h0, #1
-  sqrshrn h0, s0, #2
-  sqrshrn s0, d0, #3
-  sqrshrun b0, h0, #1
-  sqrshrun h0, s0, #2
-  sqrshrun s0, d0, #3
-  sqshlu  b0, b0, #1
-  sqshlu  h0, h0, #2
-  sqshlu  s0, s0, #3
-  sqshlu  d0, d0, #4
-  sqshl   b0, b0, #1
-  sqshl   h0, h0, #2
-  sqshl   s0, s0, #3
-  sqshl   d0, d0, #4
-  sqshrn  b0, h0, #1
-  sqshrn  h0, s0, #2
-  sqshrn  s0, d0, #3
-  sqshrun b0, h0, #1
-  sqshrun h0, s0, #2
-  sqshrun s0, d0, #3
-  sri     d0, d0, #1
-  srshr   d0, d0, #1
-  srsra   d0, d0, #1
-  sshr    d0, d0, #1
-  ucvtf   s0, s0, #1
-  ucvtf   d0, d0, #2
-  scvtf   s0, s0, #1
-  scvtf   d0, d0, #2
-  uqrshrn b0, h0, #1
-  uqrshrn h0, s0, #2
-  uqrshrn s0, d0, #3
-  uqshl   b0, b0, #1
-  uqshl   h0, h0, #2
-  uqshl   s0, s0, #3
-  uqshl   d0, d0, #4
-  uqshrn  b0, h0, #1
-  uqshrn  h0, s0, #2
-  uqshrn  s0, d0, #3
-  urshr   d0, d0, #1
-  ursra   d0, d0, #1
-  ushr    d0, d0, #1
-  usra    d0, d0, #1
-
-; CHECK: fcvtzs	s0, s0, #1              ; encoding: [0x00,0xfc,0x3f,0x5f]
-; CHECK: fcvtzs	d0, d0, #2              ; encoding: [0x00,0xfc,0x7e,0x5f]
-; CHECK: fcvtzu	s0, s0, #1              ; encoding: [0x00,0xfc,0x3f,0x7f]
-; CHECK: fcvtzu	d0, d0, #2              ; encoding: [0x00,0xfc,0x7e,0x7f]
-; CHECK: shl	d0, d0, #1              ; encoding: [0x00,0x54,0x41,0x5f]
-; CHECK: sli	d0, d0, #1              ; encoding: [0x00,0x54,0x41,0x7f]
-; CHECK: sqrshrn	b0, h0, #1              ; encoding: [0x00,0x9c,0x0f,0x5f]
-; CHECK: sqrshrn	h0, s0, #2              ; encoding: [0x00,0x9c,0x1e,0x5f]
-; CHECK: sqrshrn	s0, d0, #3              ; encoding: [0x00,0x9c,0x3d,0x5f]
-; CHECK: sqrshrun	b0, h0, #1      ; encoding: [0x00,0x8c,0x0f,0x7f]
-; CHECK: sqrshrun	h0, s0, #2      ; encoding: [0x00,0x8c,0x1e,0x7f]
-; CHECK: sqrshrun	s0, d0, #3      ; encoding: [0x00,0x8c,0x3d,0x7f]
-; CHECK: sqshlu	b0, b0, #1              ; encoding: [0x00,0x64,0x09,0x7f]
-; CHECK: sqshlu	h0, h0, #2              ; encoding: [0x00,0x64,0x12,0x7f]
-; CHECK: sqshlu	s0, s0, #3              ; encoding: [0x00,0x64,0x23,0x7f]
-; CHECK: sqshlu	d0, d0, #4              ; encoding: [0x00,0x64,0x44,0x7f]
-; CHECK: sqshl	b0, b0, #1              ; encoding: [0x00,0x74,0x09,0x5f]
-; CHECK: sqshl	h0, h0, #2              ; encoding: [0x00,0x74,0x12,0x5f]
-; CHECK: sqshl	s0, s0, #3              ; encoding: [0x00,0x74,0x23,0x5f]
-; CHECK: sqshl	d0, d0, #4              ; encoding: [0x00,0x74,0x44,0x5f]
-; CHECK: sqshrn	b0, h0, #1              ; encoding: [0x00,0x94,0x0f,0x5f]
-; CHECK: sqshrn	h0, s0, #2              ; encoding: [0x00,0x94,0x1e,0x5f]
-; CHECK: sqshrn	s0, d0, #3              ; encoding: [0x00,0x94,0x3d,0x5f]
-; CHECK: sqshrun	b0, h0, #1              ; encoding: [0x00,0x84,0x0f,0x7f]
-; CHECK: sqshrun	h0, s0, #2              ; encoding: [0x00,0x84,0x1e,0x7f]
-; CHECK: sqshrun	s0, d0, #3              ; encoding: [0x00,0x84,0x3d,0x7f]
-; CHECK: sri	d0, d0, #1              ; encoding: [0x00,0x44,0x7f,0x7f]
-; CHECK: srshr	d0, d0, #1              ; encoding: [0x00,0x24,0x7f,0x5f]
-; CHECK: srsra	d0, d0, #1              ; encoding: [0x00,0x34,0x7f,0x5f]
-; CHECK: sshr	d0, d0, #1              ; encoding: [0x00,0x04,0x7f,0x5f]
-; CHECK: ucvtf	s0, s0, #1              ; encoding: [0x00,0xe4,0x3f,0x7f]
-; CHECK: ucvtf	d0, d0, #2              ; encoding: [0x00,0xe4,0x7e,0x7f]
-; check: scvtf  s0, s0, #1              ; encoding: [0x00,0xe4,0x3f,0x5f]
-; check: scvtf  d0, d0, #2              ; encoding: [0x00,0xe4,0x7e,0x5f]
-; CHECK: uqrshrn	b0, h0, #1              ; encoding: [0x00,0x9c,0x0f,0x7f]
-; CHECK: uqrshrn	h0, s0, #2              ; encoding: [0x00,0x9c,0x1e,0x7f]
-; CHECK: uqrshrn	s0, d0, #3              ; encoding: [0x00,0x9c,0x3d,0x7f]
-; CHECK: uqshl	b0, b0, #1              ; encoding: [0x00,0x74,0x09,0x7f]
-; CHECK: uqshl	h0, h0, #2              ; encoding: [0x00,0x74,0x12,0x7f]
-; CHECK: uqshl	s0, s0, #3              ; encoding: [0x00,0x74,0x23,0x7f]
-; CHECK: uqshl	d0, d0, #4              ; encoding: [0x00,0x74,0x44,0x7f]
-; CHECK: uqshrn	b0, h0, #1              ; encoding: [0x00,0x94,0x0f,0x7f]
-; CHECK: uqshrn	h0, s0, #2              ; encoding: [0x00,0x94,0x1e,0x7f]
-; CHECK: uqshrn	s0, d0, #3              ; encoding: [0x00,0x94,0x3d,0x7f]
-; CHECK: urshr	d0, d0, #1              ; encoding: [0x00,0x24,0x7f,0x7f]
-; CHECK: ursra	d0, d0, #1              ; encoding: [0x00,0x34,0x7f,0x7f]
-; CHECK: ushr	d0, d0, #1              ; encoding: [0x00,0x04,0x7f,0x7f]
-; CHECK: usra	d0, d0, #1              ; encoding: [0x00,0x14,0x7f,0x7f]
-
-
-;===-------------------------------------------------------------------------===
-; AdvSIMD vector with shift
-;===-------------------------------------------------------------------------===
-
-   fcvtzs.2s v0, v0, #1
-   fcvtzs.4s v0, v0, #2
-   fcvtzs.2d v0, v0, #3
-   fcvtzu.2s v0, v0, #1
-   fcvtzu.4s v0, v0, #2
-   fcvtzu.2d v0, v0, #3
-   rshrn.8b v0, v0, #1
-   rshrn2.16b v0, v0, #2
-   rshrn.4h v0, v0, #3
-   rshrn2.8h v0, v0, #4
-   rshrn.2s v0, v0, #5
-   rshrn2.4s v0, v0, #6
-   scvtf.2s v0, v0, #1
-   scvtf.4s v0, v0, #2
-   scvtf.2d v0, v0, #3
-   shl.8b v0, v0, #1
-   shl.16b v0, v0, #2
-   shl.4h v0, v0, #3
-   shl.8h v0, v0, #4
-   shl.2s v0, v0, #5
-   shl.4s v0, v0, #6
-   shl.2d v0, v0, #7
-   shrn.8b v0, v0, #1
-   shrn2.16b v0, v0, #2
-   shrn.4h v0, v0, #3
-   shrn2.8h v0, v0, #4
-   shrn.2s v0, v0, #5
-   shrn2.4s v0, v0, #6
-   sli.8b v0, v0, #1
-   sli.16b v0, v0, #2
-   sli.4h v0, v0, #3
-   sli.8h v0, v0, #4
-   sli.2s v0, v0, #5
-   sli.4s v0, v0, #6
-   sli.2d v0, v0, #7
-   sqrshrn.8b v0, v0, #1
-   sqrshrn2.16b v0, v0, #2
-   sqrshrn.4h v0, v0, #3
-   sqrshrn2.8h v0, v0, #4
-   sqrshrn.2s v0, v0, #5
-   sqrshrn2.4s v0, v0, #6
-   sqrshrun.8b v0, v0, #1
-   sqrshrun2.16b v0, v0, #2
-   sqrshrun.4h v0, v0, #3
-   sqrshrun2.8h v0, v0, #4
-   sqrshrun.2s v0, v0, #5
-   sqrshrun2.4s v0, v0, #6
-   sqshlu.8b v0, v0, #1
-   sqshlu.16b v0, v0, #2
-   sqshlu.4h v0, v0, #3
-   sqshlu.8h v0, v0, #4
-   sqshlu.2s v0, v0, #5
-   sqshlu.4s v0, v0, #6
-   sqshlu.2d v0, v0, #7
-   sqshl.8b v0, v0, #1
-   sqshl.16b v0, v0, #2
-   sqshl.4h v0, v0, #3
-   sqshl.8h v0, v0, #4
-   sqshl.2s v0, v0, #5
-   sqshl.4s v0, v0, #6
-   sqshl.2d v0, v0, #7
-   sqshrn.8b v0, v0, #1
-   sqshrn2.16b v0, v0, #2
-   sqshrn.4h v0, v0, #3
-   sqshrn2.8h v0, v0, #4
-   sqshrn.2s v0, v0, #5
-   sqshrn2.4s v0, v0, #6
-   sqshrun.8b v0, v0, #1
-   sqshrun2.16b v0, v0, #2
-   sqshrun.4h v0, v0, #3
-   sqshrun2.8h v0, v0, #4
-   sqshrun.2s v0, v0, #5
-   sqshrun2.4s v0, v0, #6
-   sri.8b v0, v0, #1
-   sri.16b v0, v0, #2
-   sri.4h v0, v0, #3
-   sri.8h v0, v0, #4
-   sri.2s v0, v0, #5
-   sri.4s v0, v0, #6
-   sri.2d v0, v0, #7
-   srshr.8b v0, v0, #1
-   srshr.16b v0, v0, #2
-   srshr.4h v0, v0, #3
-   srshr.8h v0, v0, #4
-   srshr.2s v0, v0, #5
-   srshr.4s v0, v0, #6
-   srshr.2d v0, v0, #7
-   srsra.8b v0, v0, #1
-   srsra.16b v0, v0, #2
-   srsra.4h v0, v0, #3
-   srsra.8h v0, v0, #4
-   srsra.2s v0, v0, #5
-   srsra.4s v0, v0, #6
-   srsra.2d v0, v0, #7
-   sshll.8h v0, v0, #1
-   sshll2.8h v0, v0, #2
-   sshll.4s v0, v0, #3
-   sshll2.4s v0, v0, #4
-   sshll.2d v0, v0, #5
-   sshll2.2d v0, v0, #6
-   sshr.8b v0, v0, #1
-   sshr.16b v0, v0, #2
-   sshr.4h v0, v0, #3
-   sshr.8h v0, v0, #4
-   sshr.2s v0, v0, #5
-   sshr.4s v0, v0, #6
-   sshr.2d v0, v0, #7
-   sshr.8b v0, v0, #1
-   ssra.16b v0, v0, #2
-   ssra.4h v0, v0, #3
-   ssra.8h v0, v0, #4
-   ssra.2s v0, v0, #5
-   ssra.4s v0, v0, #6
-   ssra.2d v0, v0, #7
-   ssra d0, d0, #64
-   ucvtf.2s v0, v0, #1
-   ucvtf.4s v0, v0, #2
-   ucvtf.2d v0, v0, #3
-   uqrshrn.8b v0, v0, #1
-   uqrshrn2.16b v0, v0, #2
-   uqrshrn.4h v0, v0, #3
-   uqrshrn2.8h v0, v0, #4
-   uqrshrn.2s v0, v0, #5
-   uqrshrn2.4s v0, v0, #6
-   uqshl.8b v0, v0, #1
-   uqshl.16b v0, v0, #2
-   uqshl.4h v0, v0, #3
-   uqshl.8h v0, v0, #4
-   uqshl.2s v0, v0, #5
-   uqshl.4s v0, v0, #6
-   uqshl.2d v0, v0, #7
-   uqshrn.8b v0, v0, #1
-   uqshrn2.16b v0, v0, #2
-   uqshrn.4h v0, v0, #3
-   uqshrn2.8h v0, v0, #4
-   uqshrn.2s v0, v0, #5
-   uqshrn2.4s v0, v0, #6
-   urshr.8b v0, v0, #1
-   urshr.16b v0, v0, #2
-   urshr.4h v0, v0, #3
-   urshr.8h v0, v0, #4
-   urshr.2s v0, v0, #5
-   urshr.4s v0, v0, #6
-   urshr.2d v0, v0, #7
-   ursra.8b v0, v0, #1
-   ursra.16b v0, v0, #2
-   ursra.4h v0, v0, #3
-   ursra.8h v0, v0, #4
-   ursra.2s v0, v0, #5
-   ursra.4s v0, v0, #6
-   ursra.2d v0, v0, #7
-   ushll.8h v0, v0, #1
-   ushll2.8h v0, v0, #2
-   ushll.4s v0, v0, #3
-   ushll2.4s v0, v0, #4
-   ushll.2d v0, v0, #5
-   ushll2.2d v0, v0, #6
-   ushr.8b v0, v0, #1
-   ushr.16b v0, v0, #2
-   ushr.4h v0, v0, #3
-   ushr.8h v0, v0, #4
-   ushr.2s v0, v0, #5
-   ushr.4s v0, v0, #6
-   ushr.2d v0, v0, #7
-   usra.8b v0, v0, #1
-   usra.16b v0, v0, #2
-   usra.4h v0, v0, #3
-   usra.8h v0, v0, #4
-   usra.2s v0, v0, #5
-   usra.4s v0, v0, #6
-   usra.2d v0, v0, #7
-
-; CHECK: fcvtzs.2s	v0, v0, #1      ; encoding: [0x00,0xfc,0x3f,0x0f]
-; CHECK: fcvtzs.4s	v0, v0, #2      ; encoding: [0x00,0xfc,0x3e,0x4f]
-; CHECK: fcvtzs.2d	v0, v0, #3      ; encoding: [0x00,0xfc,0x7d,0x4f]
-; CHECK: fcvtzu.2s	v0, v0, #1      ; encoding: [0x00,0xfc,0x3f,0x2f]
-; CHECK: fcvtzu.4s	v0, v0, #2      ; encoding: [0x00,0xfc,0x3e,0x6f]
-; CHECK: fcvtzu.2d	v0, v0, #3      ; encoding: [0x00,0xfc,0x7d,0x6f]
-; CHECK: rshrn.8b	v0, v0, #1      ; encoding: [0x00,0x8c,0x0f,0x0f]
-; CHECK: rshrn2.16b	v0, v0, #2      ; encoding: [0x00,0x8c,0x0e,0x4f]
-; CHECK: rshrn.4h	v0, v0, #3      ; encoding: [0x00,0x8c,0x1d,0x0f]
-; CHECK: rshrn2.8h	v0, v0, #4      ; encoding: [0x00,0x8c,0x1c,0x4f]
-; CHECK: rshrn.2s	v0, v0, #5      ; encoding: [0x00,0x8c,0x3b,0x0f]
-; CHECK: rshrn2.4s	v0, v0, #6      ; encoding: [0x00,0x8c,0x3a,0x4f]
-; CHECK: scvtf.2s	v0, v0, #1      ; encoding: [0x00,0xe4,0x3f,0x0f]
-; CHECK: scvtf.4s	v0, v0, #2      ; encoding: [0x00,0xe4,0x3e,0x4f]
-; CHECK: scvtf.2d	v0, v0, #3      ; encoding: [0x00,0xe4,0x7d,0x4f]
-; CHECK: shl.8b	v0, v0, #1              ; encoding: [0x00,0x54,0x09,0x0f]
-; CHECK: shl.16b	v0, v0, #2              ; encoding: [0x00,0x54,0x0a,0x4f]
-; CHECK: shl.4h	v0, v0, #3              ; encoding: [0x00,0x54,0x13,0x0f]
-; CHECK: shl.8h	v0, v0, #4              ; encoding: [0x00,0x54,0x14,0x4f]
-; CHECK: shl.2s	v0, v0, #5              ; encoding: [0x00,0x54,0x25,0x0f]
-; CHECK: shl.4s	v0, v0, #6              ; encoding: [0x00,0x54,0x26,0x4f]
-; CHECK: shl.2d	v0, v0, #7              ; encoding: [0x00,0x54,0x47,0x4f]
-; CHECK: shrn.8b	v0, v0, #1              ; encoding: [0x00,0x84,0x0f,0x0f]
-; CHECK: shrn2.16b	v0, v0, #2      ; encoding: [0x00,0x84,0x0e,0x4f]
-; CHECK: shrn.4h	v0, v0, #3              ; encoding: [0x00,0x84,0x1d,0x0f]
-; CHECK: shrn2.8h	v0, v0, #4      ; encoding: [0x00,0x84,0x1c,0x4f]
-; CHECK: shrn.2s	v0, v0, #5              ; encoding: [0x00,0x84,0x3b,0x0f]
-; CHECK: shrn2.4s	v0, v0, #6      ; encoding: [0x00,0x84,0x3a,0x4f]
-; CHECK: sli.8b	v0, v0, #1              ; encoding: [0x00,0x54,0x09,0x2f]
-; CHECK: sli.16b	v0, v0, #2              ; encoding: [0x00,0x54,0x0a,0x6f]
-; CHECK: sli.4h	v0, v0, #3              ; encoding: [0x00,0x54,0x13,0x2f]
-; CHECK: sli.8h	v0, v0, #4              ; encoding: [0x00,0x54,0x14,0x6f]
-; CHECK: sli.2s	v0, v0, #5              ; encoding: [0x00,0x54,0x25,0x2f]
-; CHECK: sli.4s	v0, v0, #6              ; encoding: [0x00,0x54,0x26,0x6f]
-; CHECK: sli.2d	v0, v0, #7              ; encoding: [0x00,0x54,0x47,0x6f]
-; CHECK: sqrshrn.8b	v0, v0, #1      ; encoding: [0x00,0x9c,0x0f,0x0f]
-; CHECK: sqrshrn2.16b	v0, v0, #2      ; encoding: [0x00,0x9c,0x0e,0x4f]
-; CHECK: sqrshrn.4h	v0, v0, #3      ; encoding: [0x00,0x9c,0x1d,0x0f]
-; CHECK: sqrshrn2.8h	v0, v0, #4      ; encoding: [0x00,0x9c,0x1c,0x4f]
-; CHECK: sqrshrn.2s	v0, v0, #5      ; encoding: [0x00,0x9c,0x3b,0x0f]
-; CHECK: sqrshrn2.4s	v0, v0, #6      ; encoding: [0x00,0x9c,0x3a,0x4f]
-; CHECK: sqrshrun.8b	v0, v0, #1      ; encoding: [0x00,0x8c,0x0f,0x2f]
-; CHECK: sqrshrun2.16b	v0, v0, #2      ; encoding: [0x00,0x8c,0x0e,0x6f]
-; CHECK: sqrshrun.4h	v0, v0, #3      ; encoding: [0x00,0x8c,0x1d,0x2f]
-; CHECK: sqrshrun2.8h	v0, v0, #4      ; encoding: [0x00,0x8c,0x1c,0x6f]
-; CHECK: sqrshrun.2s	v0, v0, #5      ; encoding: [0x00,0x8c,0x3b,0x2f]
-; CHECK: sqrshrun2.4s	v0, v0, #6      ; encoding: [0x00,0x8c,0x3a,0x6f]
-; CHECK: sqshlu.8b	v0, v0, #1      ; encoding: [0x00,0x64,0x09,0x2f]
-; CHECK: sqshlu.16b	v0, v0, #2      ; encoding: [0x00,0x64,0x0a,0x6f]
-; CHECK: sqshlu.4h	v0, v0, #3      ; encoding: [0x00,0x64,0x13,0x2f]
-; CHECK: sqshlu.8h	v0, v0, #4      ; encoding: [0x00,0x64,0x14,0x6f]
-; CHECK: sqshlu.2s	v0, v0, #5      ; encoding: [0x00,0x64,0x25,0x2f]
-; CHECK: sqshlu.4s	v0, v0, #6      ; encoding: [0x00,0x64,0x26,0x6f]
-; CHECK: sqshlu.2d	v0, v0, #7      ; encoding: [0x00,0x64,0x47,0x6f]
-; CHECK: sqshl.8b	v0, v0, #1      ; encoding: [0x00,0x74,0x09,0x0f]
-; CHECK: sqshl.16b	v0, v0, #2      ; encoding: [0x00,0x74,0x0a,0x4f]
-; CHECK: sqshl.4h	v0, v0, #3      ; encoding: [0x00,0x74,0x13,0x0f]
-; CHECK: sqshl.8h	v0, v0, #4      ; encoding: [0x00,0x74,0x14,0x4f]
-; CHECK: sqshl.2s	v0, v0, #5      ; encoding: [0x00,0x74,0x25,0x0f]
-; CHECK: sqshl.4s	v0, v0, #6      ; encoding: [0x00,0x74,0x26,0x4f]
-; CHECK: sqshl.2d	v0, v0, #7      ; encoding: [0x00,0x74,0x47,0x4f]
-; CHECK: sqshrn.8b	v0, v0, #1      ; encoding: [0x00,0x94,0x0f,0x0f]
-; CHECK: sqshrn2.16b	v0, v0, #2      ; encoding: [0x00,0x94,0x0e,0x4f]
-; CHECK: sqshrn.4h	v0, v0, #3      ; encoding: [0x00,0x94,0x1d,0x0f]
-; CHECK: sqshrn2.8h	v0, v0, #4      ; encoding: [0x00,0x94,0x1c,0x4f]
-; CHECK: sqshrn.2s	v0, v0, #5      ; encoding: [0x00,0x94,0x3b,0x0f]
-; CHECK: sqshrn2.4s	v0, v0, #6      ; encoding: [0x00,0x94,0x3a,0x4f]
-; CHECK: sqshrun.8b	v0, v0, #1      ; encoding: [0x00,0x84,0x0f,0x2f]
-; CHECK: sqshrun2.16b	v0, v0, #2      ; encoding: [0x00,0x84,0x0e,0x6f]
-; CHECK: sqshrun.4h	v0, v0, #3      ; encoding: [0x00,0x84,0x1d,0x2f]
-; CHECK: sqshrun2.8h	v0, v0, #4      ; encoding: [0x00,0x84,0x1c,0x6f]
-; CHECK: sqshrun.2s	v0, v0, #5      ; encoding: [0x00,0x84,0x3b,0x2f]
-; CHECK: sqshrun2.4s	v0, v0, #6      ; encoding: [0x00,0x84,0x3a,0x6f]
-; CHECK: sri.8b	v0, v0, #1              ; encoding: [0x00,0x44,0x0f,0x2f]
-; CHECK: sri.16b	v0, v0, #2              ; encoding: [0x00,0x44,0x0e,0x6f]
-; CHECK: sri.4h	v0, v0, #3              ; encoding: [0x00,0x44,0x1d,0x2f]
-; CHECK: sri.8h	v0, v0, #4              ; encoding: [0x00,0x44,0x1c,0x6f]
-; CHECK: sri.2s	v0, v0, #5              ; encoding: [0x00,0x44,0x3b,0x2f]
-; CHECK: sri.4s	v0, v0, #6              ; encoding: [0x00,0x44,0x3a,0x6f]
-; CHECK: sri.2d	v0, v0, #7              ; encoding: [0x00,0x44,0x79,0x6f]
-; CHECK: srshr.8b	v0, v0, #1      ; encoding: [0x00,0x24,0x0f,0x0f]
-; CHECK: srshr.16b	v0, v0, #2      ; encoding: [0x00,0x24,0x0e,0x4f]
-; CHECK: srshr.4h	v0, v0, #3      ; encoding: [0x00,0x24,0x1d,0x0f]
-; CHECK: srshr.8h	v0, v0, #4      ; encoding: [0x00,0x24,0x1c,0x4f]
-; CHECK: srshr.2s	v0, v0, #5      ; encoding: [0x00,0x24,0x3b,0x0f]
-; CHECK: srshr.4s	v0, v0, #6      ; encoding: [0x00,0x24,0x3a,0x4f]
-; CHECK: srshr.2d	v0, v0, #7      ; encoding: [0x00,0x24,0x79,0x4f]
-; CHECK: srsra.8b	v0, v0, #1      ; encoding: [0x00,0x34,0x0f,0x0f]
-; CHECK: srsra.16b	v0, v0, #2      ; encoding: [0x00,0x34,0x0e,0x4f]
-; CHECK: srsra.4h	v0, v0, #3      ; encoding: [0x00,0x34,0x1d,0x0f]
-; CHECK: srsra.8h	v0, v0, #4      ; encoding: [0x00,0x34,0x1c,0x4f]
-; CHECK: srsra.2s	v0, v0, #5      ; encoding: [0x00,0x34,0x3b,0x0f]
-; CHECK: srsra.4s	v0, v0, #6      ; encoding: [0x00,0x34,0x3a,0x4f]
-; CHECK: srsra.2d	v0, v0, #7      ; encoding: [0x00,0x34,0x79,0x4f]
-; CHECK: sshll.8h	v0, v0, #1      ; encoding: [0x00,0xa4,0x09,0x0f]
-; CHECK: sshll2.8h	v0, v0, #2      ; encoding: [0x00,0xa4,0x0a,0x4f]
-; CHECK: sshll.4s	v0, v0, #3      ; encoding: [0x00,0xa4,0x13,0x0f]
-; CHECK: sshll2.4s	v0, v0, #4      ; encoding: [0x00,0xa4,0x14,0x4f]
-; CHECK: sshll.2d	v0, v0, #5      ; encoding: [0x00,0xa4,0x25,0x0f]
-; CHECK: sshll2.2d	v0, v0, #6      ; encoding: [0x00,0xa4,0x26,0x4f]
-; CHECK: sshr.8b	v0, v0, #1              ; encoding: [0x00,0x04,0x0f,0x0f]
-; CHECK: sshr.16b	v0, v0, #2      ; encoding: [0x00,0x04,0x0e,0x4f]
-; CHECK: sshr.4h	v0, v0, #3              ; encoding: [0x00,0x04,0x1d,0x0f]
-; CHECK: sshr.8h	v0, v0, #4              ; encoding: [0x00,0x04,0x1c,0x4f]
-; CHECK: sshr.2s	v0, v0, #5              ; encoding: [0x00,0x04,0x3b,0x0f]
-; CHECK: sshr.4s	v0, v0, #6              ; encoding: [0x00,0x04,0x3a,0x4f]
-; CHECK: sshr.2d	v0, v0, #7              ; encoding: [0x00,0x04,0x79,0x4f]
-; CHECK: sshr.8b	v0, v0, #1              ; encoding: [0x00,0x04,0x0f,0x0f]
-; CHECK: ssra.16b	v0, v0, #2      ; encoding: [0x00,0x14,0x0e,0x4f]
-; CHECK: ssra.4h	v0, v0, #3              ; encoding: [0x00,0x14,0x1d,0x0f]
-; CHECK: ssra.8h	v0, v0, #4              ; encoding: [0x00,0x14,0x1c,0x4f]
-; CHECK: ssra.2s	v0, v0, #5              ; encoding: [0x00,0x14,0x3b,0x0f]
-; CHECK: ssra.4s	v0, v0, #6              ; encoding: [0x00,0x14,0x3a,0x4f]
-; CHECK: ssra.2d	v0, v0, #7              ; encoding: [0x00,0x14,0x79,0x4f]
-; CHECK: ssra		d0, d0, #64             ; encoding: [0x00,0x14,0x40,0x5f]
-; CHECK: ucvtf.2s	v0, v0, #1      ; encoding: [0x00,0xe4,0x3f,0x2f]
-; CHECK: ucvtf.4s	v0, v0, #2      ; encoding: [0x00,0xe4,0x3e,0x6f]
-; CHECK: ucvtf.2d	v0, v0, #3      ; encoding: [0x00,0xe4,0x7d,0x6f]
-; CHECK: uqrshrn.8b	v0, v0, #1      ; encoding: [0x00,0x9c,0x0f,0x2f]
-; CHECK: uqrshrn2.16b	v0, v0, #2      ; encoding: [0x00,0x9c,0x0e,0x6f]
-; CHECK: uqrshrn.4h	v0, v0, #3      ; encoding: [0x00,0x9c,0x1d,0x2f]
-; CHECK: uqrshrn2.8h	v0, v0, #4      ; encoding: [0x00,0x9c,0x1c,0x6f]
-; CHECK: uqrshrn.2s	v0, v0, #5      ; encoding: [0x00,0x9c,0x3b,0x2f]
-; CHECK: uqrshrn2.4s	v0, v0, #6      ; encoding: [0x00,0x9c,0x3a,0x6f]
-; CHECK: uqshl.8b	v0, v0, #1      ; encoding: [0x00,0x74,0x09,0x2f]
-; CHECK: uqshl.16b	v0, v0, #2      ; encoding: [0x00,0x74,0x0a,0x6f]
-; CHECK: uqshl.4h	v0, v0, #3      ; encoding: [0x00,0x74,0x13,0x2f]
-; CHECK: uqshl.8h	v0, v0, #4      ; encoding: [0x00,0x74,0x14,0x6f]
-; CHECK: uqshl.2s	v0, v0, #5      ; encoding: [0x00,0x74,0x25,0x2f]
-; CHECK: uqshl.4s	v0, v0, #6      ; encoding: [0x00,0x74,0x26,0x6f]
-; CHECK: uqshl.2d	v0, v0, #7      ; encoding: [0x00,0x74,0x47,0x6f]
-; CHECK: uqshrn.8b	v0, v0, #1      ; encoding: [0x00,0x94,0x0f,0x2f]
-; CHECK: uqshrn2.16b	v0, v0, #2      ; encoding: [0x00,0x94,0x0e,0x6f]
-; CHECK: uqshrn.4h	v0, v0, #3      ; encoding: [0x00,0x94,0x1d,0x2f]
-; CHECK: uqshrn2.8h	v0, v0, #4      ; encoding: [0x00,0x94,0x1c,0x6f]
-; CHECK: uqshrn.2s	v0, v0, #5      ; encoding: [0x00,0x94,0x3b,0x2f]
-; CHECK: uqshrn2.4s	v0, v0, #6      ; encoding: [0x00,0x94,0x3a,0x6f]
-; CHECK: urshr.8b	v0, v0, #1      ; encoding: [0x00,0x24,0x0f,0x2f]
-; CHECK: urshr.16b	v0, v0, #2      ; encoding: [0x00,0x24,0x0e,0x6f]
-; CHECK: urshr.4h	v0, v0, #3      ; encoding: [0x00,0x24,0x1d,0x2f]
-; CHECK: urshr.8h	v0, v0, #4      ; encoding: [0x00,0x24,0x1c,0x6f]
-; CHECK: urshr.2s	v0, v0, #5      ; encoding: [0x00,0x24,0x3b,0x2f]
-; CHECK: urshr.4s	v0, v0, #6      ; encoding: [0x00,0x24,0x3a,0x6f]
-; CHECK: urshr.2d	v0, v0, #7      ; encoding: [0x00,0x24,0x79,0x6f]
-; CHECK: ursra.8b	v0, v0, #1      ; encoding: [0x00,0x34,0x0f,0x2f]
-; CHECK: ursra.16b	v0, v0, #2      ; encoding: [0x00,0x34,0x0e,0x6f]
-; CHECK: ursra.4h	v0, v0, #3      ; encoding: [0x00,0x34,0x1d,0x2f]
-; CHECK: ursra.8h	v0, v0, #4      ; encoding: [0x00,0x34,0x1c,0x6f]
-; CHECK: ursra.2s	v0, v0, #5      ; encoding: [0x00,0x34,0x3b,0x2f]
-; CHECK: ursra.4s	v0, v0, #6      ; encoding: [0x00,0x34,0x3a,0x6f]
-; CHECK: ursra.2d	v0, v0, #7      ; encoding: [0x00,0x34,0x79,0x6f]
-; CHECK: ushll.8h	v0, v0, #1      ; encoding: [0x00,0xa4,0x09,0x2f]
-; CHECK: ushll2.8h	v0, v0, #2      ; encoding: [0x00,0xa4,0x0a,0x6f]
-; CHECK: ushll.4s	v0, v0, #3      ; encoding: [0x00,0xa4,0x13,0x2f]
-; CHECK: ushll2.4s	v0, v0, #4      ; encoding: [0x00,0xa4,0x14,0x6f]
-; CHECK: ushll.2d	v0, v0, #5      ; encoding: [0x00,0xa4,0x25,0x2f]
-; CHECK: ushll2.2d	v0, v0, #6      ; encoding: [0x00,0xa4,0x26,0x6f]
-; CHECK: ushr.8b	v0, v0, #1              ; encoding: [0x00,0x04,0x0f,0x2f]
-; CHECK: ushr.16b	v0, v0, #2      ; encoding: [0x00,0x04,0x0e,0x6f]
-; CHECK: ushr.4h	v0, v0, #3              ; encoding: [0x00,0x04,0x1d,0x2f]
-; CHECK: ushr.8h	v0, v0, #4              ; encoding: [0x00,0x04,0x1c,0x6f]
-; CHECK: ushr.2s	v0, v0, #5              ; encoding: [0x00,0x04,0x3b,0x2f]
-; CHECK: ushr.4s	v0, v0, #6              ; encoding: [0x00,0x04,0x3a,0x6f]
-; CHECK: ushr.2d	v0, v0, #7              ; encoding: [0x00,0x04,0x79,0x6f]
-; CHECK: usra.8b	v0, v0, #1              ; encoding: [0x00,0x14,0x0f,0x2f]
-; CHECK: usra.16b	v0, v0, #2      ; encoding: [0x00,0x14,0x0e,0x6f]
-; CHECK: usra.4h	v0, v0, #3              ; encoding: [0x00,0x14,0x1d,0x2f]
-; CHECK: usra.8h	v0, v0, #4              ; encoding: [0x00,0x14,0x1c,0x6f]
-; CHECK: usra.2s	v0, v0, #5              ; encoding: [0x00,0x14,0x3b,0x2f]
-; CHECK: usra.4s	v0, v0, #6              ; encoding: [0x00,0x14,0x3a,0x6f]
-; CHECK: usra.2d	v0, v0, #7              ; encoding: [0x00,0x14,0x79,0x6f]
-
-
-; ARM Verbose syntax variants.
-
-   rshrn v9.8b, v11.8h, #1
-   rshrn2 v8.16b, v9.8h, #2
-   rshrn v7.4h, v8.4s, #3
-   rshrn2 v6.8h, v7.4s, #4
-   rshrn v5.2s, v6.2d, #5
-   rshrn2 v4.4s, v5.2d, #6
-
-   shrn v9.8b, v11.8h, #1
-   shrn2 v8.16b, v9.8h, #2
-   shrn v7.4h, v8.4s, #3
-   shrn2 v6.8h, v7.4s, #4
-   shrn v5.2s, v6.2d, #5
-   shrn2 v4.4s, v5.2d, #6
-
-   sqrshrn v9.8b, v11.8h, #1
-   sqrshrn2 v8.16b, v9.8h, #2
-   sqrshrn v7.4h, v8.4s, #3
-   sqrshrn2 v6.8h, v7.4s, #4
-   sqrshrn v5.2s, v6.2d, #5
-   sqrshrn2 v4.4s, v5.2d, #6
-
-   sqshrn v9.8b, v11.8h, #1
-   sqshrn2 v8.16b, v9.8h, #2
-   sqshrn v7.4h, v8.4s, #3
-   sqshrn2 v6.8h, v7.4s, #4
-   sqshrn v5.2s, v6.2d, #5
-   sqshrn2 v4.4s, v5.2d, #6
-
-   sqrshrun v9.8b, v11.8h, #1
-   sqrshrun2 v8.16b, v9.8h, #2
-   sqrshrun v7.4h, v8.4s, #3
-   sqrshrun2 v6.8h, v7.4s, #4
-   sqrshrun v5.2s, v6.2d, #5
-   sqrshrun2 v4.4s, v5.2d, #6
-
-   sqshrun v9.8b, v11.8h, #1
-   sqshrun2 v8.16b, v9.8h, #2
-   sqshrun v7.4h, v8.4s, #3
-   sqshrun2 v6.8h, v7.4s, #4
-   sqshrun v5.2s, v6.2d, #5
-   sqshrun2 v4.4s, v5.2d, #6
-
-   uqrshrn v9.8b, v11.8h, #1
-   uqrshrn2 v8.16b, v9.8h, #2
-   uqrshrn v7.4h, v8.4s, #3
-   uqrshrn2 v6.8h, v7.4s, #4
-   uqrshrn v5.2s, v6.2d, #5
-   uqrshrn2 v4.4s, v5.2d, #6
-
-   uqshrn v9.8b, v11.8h, #1
-   uqshrn2 v8.16b, v9.8h, #2
-   uqshrn v7.4h, v8.4s, #3
-   uqshrn2 v6.8h, v7.4s, #4
-   uqshrn v5.2s, v6.2d, #5
-   uqshrn2 v4.4s, v5.2d, #6
-
-   sshll2 v10.8h, v3.16b, #6
-   sshll2 v11.4s, v4.8h, #5
-   sshll2 v12.2d, v5.4s, #4
-   sshll v13.8h, v6.8b, #3
-   sshll v14.4s, v7.4h, #2
-   sshll v15.2d, v8.2s, #7
-
-   ushll2 v10.8h, v3.16b, #6
-   ushll2 v11.4s, v4.8h, #5
-   ushll2 v12.2d, v5.4s, #4
-   ushll v13.8h, v6.8b, #3
-   ushll v14.4s, v7.4h, #2
-   ushll v15.2d, v8.2s, #7
-
-
-; CHECK: rshrn.8b	v9, v11, #1     ; encoding: [0x69,0x8d,0x0f,0x0f]
-; CHECK: rshrn2.16b	v8, v9, #2      ; encoding: [0x28,0x8d,0x0e,0x4f]
-; CHECK: rshrn.4h	v7, v8, #3      ; encoding: [0x07,0x8d,0x1d,0x0f]
-; CHECK: rshrn2.8h	v6, v7, #4      ; encoding: [0xe6,0x8c,0x1c,0x4f]
-; CHECK: rshrn.2s	v5, v6, #5      ; encoding: [0xc5,0x8c,0x3b,0x0f]
-; CHECK: rshrn2.4s	v4, v5, #6      ; encoding: [0xa4,0x8c,0x3a,0x4f]
-; CHECK: shrn.8b	v9, v11, #1             ; encoding: [0x69,0x85,0x0f,0x0f]
-; CHECK: shrn2.16b	v8, v9, #2      ; encoding: [0x28,0x85,0x0e,0x4f]
-; CHECK: shrn.4h	v7, v8, #3              ; encoding: [0x07,0x85,0x1d,0x0f]
-; CHECK: shrn2.8h	v6, v7, #4      ; encoding: [0xe6,0x84,0x1c,0x4f]
-; CHECK: shrn.2s	v5, v6, #5              ; encoding: [0xc5,0x84,0x3b,0x0f]
-; CHECK: shrn2.4s	v4, v5, #6      ; encoding: [0xa4,0x84,0x3a,0x4f]
-; CHECK: sqrshrn.8b	v9, v11, #1     ; encoding: [0x69,0x9d,0x0f,0x0f]
-; CHECK: sqrshrn2.16b	v8, v9, #2      ; encoding: [0x28,0x9d,0x0e,0x4f]
-; CHECK: sqrshrn.4h	v7, v8, #3      ; encoding: [0x07,0x9d,0x1d,0x0f]
-; CHECK: sqrshrn2.8h	v6, v7, #4      ; encoding: [0xe6,0x9c,0x1c,0x4f]
-; CHECK: sqrshrn.2s	v5, v6, #5      ; encoding: [0xc5,0x9c,0x3b,0x0f]
-; CHECK: sqrshrn2.4s	v4, v5, #6      ; encoding: [0xa4,0x9c,0x3a,0x4f]
-; CHECK: sqshrn.8b	v9, v11, #1     ; encoding: [0x69,0x95,0x0f,0x0f]
-; CHECK: sqshrn2.16b	v8, v9, #2      ; encoding: [0x28,0x95,0x0e,0x4f]
-; CHECK: sqshrn.4h	v7, v8, #3      ; encoding: [0x07,0x95,0x1d,0x0f]
-; CHECK: sqshrn2.8h	v6, v7, #4      ; encoding: [0xe6,0x94,0x1c,0x4f]
-; CHECK: sqshrn.2s	v5, v6, #5      ; encoding: [0xc5,0x94,0x3b,0x0f]
-; CHECK: sqshrn2.4s	v4, v5, #6      ; encoding: [0xa4,0x94,0x3a,0x4f]
-; CHECK: sqrshrun.8b	v9, v11, #1     ; encoding: [0x69,0x8d,0x0f,0x2f]
-; CHECK: sqrshrun2.16b	v8, v9, #2      ; encoding: [0x28,0x8d,0x0e,0x6f]
-; CHECK: sqrshrun.4h	v7, v8, #3      ; encoding: [0x07,0x8d,0x1d,0x2f]
-; CHECK: sqrshrun2.8h	v6, v7, #4      ; encoding: [0xe6,0x8c,0x1c,0x6f]
-; CHECK: sqrshrun.2s	v5, v6, #5      ; encoding: [0xc5,0x8c,0x3b,0x2f]
-; CHECK: sqrshrun2.4s	v4, v5, #6      ; encoding: [0xa4,0x8c,0x3a,0x6f]
-; CHECK: sqshrun.8b	v9, v11, #1     ; encoding: [0x69,0x85,0x0f,0x2f]
-; CHECK: sqshrun2.16b	v8, v9, #2      ; encoding: [0x28,0x85,0x0e,0x6f]
-; CHECK: sqshrun.4h	v7, v8, #3      ; encoding: [0x07,0x85,0x1d,0x2f]
-; CHECK: sqshrun2.8h	v6, v7, #4      ; encoding: [0xe6,0x84,0x1c,0x6f]
-; CHECK: sqshrun.2s	v5, v6, #5      ; encoding: [0xc5,0x84,0x3b,0x2f]
-; CHECK: sqshrun2.4s	v4, v5, #6      ; encoding: [0xa4,0x84,0x3a,0x6f]
-; CHECK: uqrshrn.8b	v9, v11, #1     ; encoding: [0x69,0x9d,0x0f,0x2f]
-; CHECK: uqrshrn2.16b	v8, v9, #2      ; encoding: [0x28,0x9d,0x0e,0x6f]
-; CHECK: uqrshrn.4h	v7, v8, #3      ; encoding: [0x07,0x9d,0x1d,0x2f]
-; CHECK: uqrshrn2.8h	v6, v7, #4      ; encoding: [0xe6,0x9c,0x1c,0x6f]
-; CHECK: uqrshrn.2s	v5, v6, #5      ; encoding: [0xc5,0x9c,0x3b,0x2f]
-; CHECK: uqrshrn2.4s	v4, v5, #6      ; encoding: [0xa4,0x9c,0x3a,0x6f]
-; CHECK: uqshrn.8b	v9, v11, #1     ; encoding: [0x69,0x95,0x0f,0x2f]
-; CHECK: uqshrn2.16b	v8, v9, #2      ; encoding: [0x28,0x95,0x0e,0x6f]
-; CHECK: uqshrn.4h	v7, v8, #3      ; encoding: [0x07,0x95,0x1d,0x2f]
-; CHECK: uqshrn2.8h	v6, v7, #4      ; encoding: [0xe6,0x94,0x1c,0x6f]
-; CHECK: uqshrn.2s	v5, v6, #5      ; encoding: [0xc5,0x94,0x3b,0x2f]
-; CHECK: uqshrn2.4s	v4, v5, #6      ; encoding: [0xa4,0x94,0x3a,0x6f]
-; CHECK: sshll2.8h	v10, v3, #6     ; encoding: [0x6a,0xa4,0x0e,0x4f]
-; CHECK: sshll2.4s	v11, v4, #5     ; encoding: [0x8b,0xa4,0x15,0x4f]
-; CHECK: sshll2.2d	v12, v5, #4     ; encoding: [0xac,0xa4,0x24,0x4f]
-; CHECK: sshll.8h	v13, v6, #3     ; encoding: [0xcd,0xa4,0x0b,0x0f]
-; CHECK: sshll.4s	v14, v7, #2     ; encoding: [0xee,0xa4,0x12,0x0f]
-; CHECK: sshll.2d	v15, v8, #7     ; encoding: [0x0f,0xa5,0x27,0x0f]
-; CHECK: ushll2.8h	v10, v3, #6     ; encoding: [0x6a,0xa4,0x0e,0x6f]
-; CHECK: ushll2.4s	v11, v4, #5     ; encoding: [0x8b,0xa4,0x15,0x6f]
-; CHECK: ushll2.2d	v12, v5, #4     ; encoding: [0xac,0xa4,0x24,0x6f]
-; CHECK: ushll.8h	v13, v6, #3     ; encoding: [0xcd,0xa4,0x0b,0x2f]
-; CHECK: ushll.4s	v14, v7, #2     ; encoding: [0xee,0xa4,0x12,0x2f]
-; CHECK: ushll.2d	v15, v8, #7     ; encoding: [0x0f,0xa5,0x27,0x2f]
-
-
-  pmull.8h v0, v0, v0
-  pmull2.8h v0, v0, v0
-  pmull.1q v2, v3, v4
-  pmull2.1q v2, v3, v4
-  pmull v2.1q, v3.1d, v4.1d
-  pmull2 v2.1q, v3.2d, v4.2d
-
-; CHECK: pmull.8h	v0, v0, v0      ; encoding: [0x00,0xe0,0x20,0x0e]
-; CHECK: pmull2.8h	v0, v0, v0      ; encoding: [0x00,0xe0,0x20,0x4e]
-; CHECK: pmull.1q	v2, v3, v4      ; encoding: [0x62,0xe0,0xe4,0x0e]
-; CHECK: pmull2.1q	v2, v3, v4      ; encoding: [0x62,0xe0,0xe4,0x4e]
-; CHECK: pmull.1q	v2, v3, v4      ; encoding: [0x62,0xe0,0xe4,0x0e]
-; CHECK: pmull2.1q	v2, v3, v4      ; encoding: [0x62,0xe0,0xe4,0x4e]
-
-
-  faddp.2d d1, v2
-  faddp.2s s3, v4
-; CHECK: faddp.2d	d1, v2          ; encoding: [0x41,0xd8,0x70,0x7e]
-; CHECK: faddp.2s	s3, v4          ; encoding: [0x83,0xd8,0x30,0x7e]
-
-  tbl.16b v2, {v4,v5,v6,v7}, v1
-  tbl.8b v0, {v4,v5,v6,v7}, v1
-  tbl.16b v2, {v5}, v1
-  tbl.8b v0, {v5}, v1
-  tbl.16b v2, {v5,v6,v7}, v1
-  tbl.8b v0, {v5,v6,v7}, v1
-  tbl.16b v2, {v6,v7}, v1
-  tbl.8b v0, {v6,v7}, v1
-; CHECK: tbl.16b	v2, { v4, v5, v6, v7 }, v1 ; encoding: [0x82,0x60,0x01,0x4e]
-; CHECK: tbl.8b	v0, { v4, v5, v6, v7 }, v1 ; encoding: [0x80,0x60,0x01,0x0e]
-; CHECK: tbl.16b	v2, { v5 }, v1          ; encoding: [0xa2,0x00,0x01,0x4e]
-; CHECK: tbl.8b	v0, { v5 }, v1          ; encoding: [0xa0,0x00,0x01,0x0e]
-; CHECK: tbl.16b	v2, { v5, v6, v7 }, v1  ; encoding: [0xa2,0x40,0x01,0x4e]
-; CHECK: tbl.8b	v0, { v5, v6, v7 }, v1  ; encoding: [0xa0,0x40,0x01,0x0e]
-; CHECK: tbl.16b	v2, { v6, v7 }, v1      ; encoding: [0xc2,0x20,0x01,0x4e]
-; CHECK: tbl.8b	v0, { v6, v7 }, v1      ; encoding: [0xc0,0x20,0x01,0x0e]
-
-  tbl v2.16b, {v4.16b,v5.16b,v6.16b,v7.16b}, v1.16b
-  tbl v0.8b, {v4.16b,v5.16b,v6.16b,v7.16b}, v1.8b
-  tbl v2.16b, {v5.16b}, v1.16b
-  tbl v0.8b, {v5.16b}, v1.8b
-  tbl v2.16b, {v5.16b,v6.16b,v7.16b}, v1.16b
-  tbl v0.8b, {v5.16b,v6.16b,v7.16b}, v1.8b
-  tbl v2.16b, {v6.16b,v7.16b}, v1.16b
-  tbl v0.8b, {v6.16b,v7.16b}, v1.8b
-; CHECK: tbl.16b v2, { v4, v5, v6, v7 }, v1 ; encoding: [0x82,0x60,0x01,0x4e]
-; CHECK: tbl.8b v0, { v4, v5, v6, v7 }, v1 ; encoding: [0x80,0x60,0x01,0x0e]
-; CHECK: tbl.16b v2, { v5 }, v1          ; encoding: [0xa2,0x00,0x01,0x4e]
-; CHECK: tbl.8b v0, { v5 }, v1          ; encoding: [0xa0,0x00,0x01,0x0e]
-; CHECK: tbl.16b v2, { v5, v6, v7 }, v1  ; encoding: [0xa2,0x40,0x01,0x4e]
-; CHECK: tbl.8b v0, { v5, v6, v7 }, v1  ; encoding: [0xa0,0x40,0x01,0x0e]
-; CHECK: tbl.16b v2, { v6, v7 }, v1      ; encoding: [0xc2,0x20,0x01,0x4e]
-; CHECK: tbl.8b v0, { v6, v7 }, v1      ; encoding: [0xc0,0x20,0x01,0x0e]
-
-  sqdmull	s0, h0, h0
-  sqdmull	d0, s0, s0
-; CHECK: sqdmull	s0, h0, h0              ; encoding: [0x00,0xd0,0x60,0x5e]
-; CHECK: sqdmull	d0, s0, s0              ; encoding: [0x00,0xd0,0xa0,0x5e]
-
-  frsqrte s0, s0
-  frsqrte d0, d0
-; CHECK: frsqrte s0, s0                  ; encoding: [0x00,0xd8,0xa1,0x7e]
-; CHECK: frsqrte d0, d0                  ; encoding: [0x00,0xd8,0xe1,0x7e]
-
-  mov.16b v0, v0
-  mov.2s v0, v0
-; CHECK: orr.16b	v0, v0, v0              ; encoding: [0x00,0x1c,0xa0,0x4e]
-; CHECK: orr.8b	v0, v0, v0              ; encoding: [0x00,0x1c,0xa0,0x0e]
-
-
-; uadalp/sadalp verbose mode aliases.
-  uadalp v14.4h, v25.8b
-  uadalp v15.8h, v24.16b
-  uadalp v16.2s, v23.4h
-  uadalp v17.4s, v22.8h
-  uadalp v18.1d, v21.2s
-  uadalp v19.2d, v20.4s
-
-  sadalp v1.4h, v11.8b
-  sadalp v2.8h, v12.16b
-  sadalp v3.2s, v13.4h
-  sadalp v4.4s, v14.8h
-  sadalp v5.1d, v15.2s
-  sadalp v6.2d, v16.4s
-
-; CHECK: uadalp.4h	v14, v25        ; encoding: [0x2e,0x6b,0x20,0x2e]
-; CHECK: uadalp.8h	v15, v24        ; encoding: [0x0f,0x6b,0x20,0x6e]
-; CHECK: uadalp.2s	v16, v23        ; encoding: [0xf0,0x6a,0x60,0x2e]
-; CHECK: uadalp.4s	v17, v22        ; encoding: [0xd1,0x6a,0x60,0x6e]
-; CHECK: uadalp.1d	v18, v21        ; encoding: [0xb2,0x6a,0xa0,0x2e]
-; CHECK: uadalp.2d	v19, v20        ; encoding: [0x93,0x6a,0xa0,0x6e]
-; CHECK: sadalp.4h	v1, v11         ; encoding: [0x61,0x69,0x20,0x0e]
-; CHECK: sadalp.8h	v2, v12         ; encoding: [0x82,0x69,0x20,0x4e]
-; CHECK: sadalp.2s	v3, v13         ; encoding: [0xa3,0x69,0x60,0x0e]
-; CHECK: sadalp.4s	v4, v14         ; encoding: [0xc4,0x69,0x60,0x4e]
-; CHECK: sadalp.1d	v5, v15         ; encoding: [0xe5,0x69,0xa0,0x0e]
-; CHECK: sadalp.2d	v6, v16         ; encoding: [0x06,0x6a,0xa0,0x4e]
-
-; MVN is an alias for 'not'.
-  mvn v1.8b, v4.8b
-  mvn v19.16b, v17.16b
-  mvn.8b v10, v6
-  mvn.16b v11, v7
-
-; CHECK: not.8b	v1, v4                  ; encoding: [0x81,0x58,0x20,0x2e]
-; CHECK: not.16b	v19, v17                ; encoding: [0x33,0x5a,0x20,0x6e]
-; CHECK: not.8b	v10, v6                 ; encoding: [0xca,0x58,0x20,0x2e]
-; CHECK: not.16b	v11, v7                 ; encoding: [0xeb,0x58,0x20,0x6e]
-
-; sqdmull verbose mode aliases
- sqdmull v10.4s, v12.4h, v12.4h
- sqdmull2 v10.4s, v13.8h, v13.8h
- sqdmull v10.2d, v13.2s, v13.2s
- sqdmull2 v10.2d, v13.4s, v13.4s
-; CHECK: sqdmull.4s	v10, v12, v12   ; encoding: [0x8a,0xd1,0x6c,0x0e]
-; CHECK: sqdmull2.4s	v10, v13, v13   ; encoding: [0xaa,0xd1,0x6d,0x4e]
-; CHECK: sqdmull.2d	v10, v13, v13   ; encoding: [0xaa,0xd1,0xad,0x0e]
-; CHECK: sqdmull2.2d	v10, v13, v13   ; encoding: [0xaa,0xd1,0xad,0x4e]
-
-; xtn verbose mode aliases
- xtn v14.8b, v14.8h
- xtn2 v14.16b, v14.8h
- xtn v14.4h, v14.4s
- xtn2 v14.8h, v14.4s
- xtn v14.2s, v14.2d
- xtn2 v14.4s, v14.2d
-; CHECK: xtn.8b v14, v14                ; encoding: [0xce,0x29,0x21,0x0e]
-; CHECK: xtn2.16b v14, v14              ; encoding: [0xce,0x29,0x21,0x4e]
-; CHECK: xtn.4h v14, v14                ; encoding: [0xce,0x29,0x61,0x0e]
-; CHECK: xtn2.8h v14, v14               ; encoding: [0xce,0x29,0x61,0x4e]
-; CHECK: xtn.2s v14, v14                ; encoding: [0xce,0x29,0xa1,0x0e]
-; CHECK: xtn2.4s v14, v14               ; encoding: [0xce,0x29,0xa1,0x4e]
-
-; uaddl verbose mode aliases
- uaddl v9.8h, v13.8b, v14.8b
- uaddl2 v9.8h, v13.16b, v14.16b
- uaddl v9.4s, v13.4h, v14.4h
- uaddl2 v9.4s, v13.8h, v14.8h
- uaddl v9.2d, v13.2s, v14.2s
- uaddl2 v9.2d, v13.4s, v14.4s
-; CHECK: uaddl.8h	v9, v13, v14    ; encoding: [0xa9,0x01,0x2e,0x2e]
-; CHECK: uaddl2.8h	v9, v13, v14    ; encoding: [0xa9,0x01,0x2e,0x6e]
-; CHECK: uaddl.4s	v9, v13, v14    ; encoding: [0xa9,0x01,0x6e,0x2e]
-; CHECK: uaddl2.4s	v9, v13, v14    ; encoding: [0xa9,0x01,0x6e,0x6e]
-; CHECK: uaddl.2d	v9, v13, v14    ; encoding: [0xa9,0x01,0xae,0x2e]
-; CHECK: uaddl2.2d	v9, v13, v14    ; encoding: [0xa9,0x01,0xae,0x6e]
-
-; bit verbose mode aliases
- bit v9.16b, v10.16b, v10.16b
- bit v9.8b, v10.8b, v10.8b
-; CHECK: bit.16b v9, v10, v10           ; encoding: [0x49,0x1d,0xaa,0x6e]
-; CHECK: bit.8b v9, v10, v10            ; encoding: [0x49,0x1d,0xaa,0x2e]
-
-; pmull verbose mode aliases
- pmull v8.8h, v8.8b, v8.8b
- pmull2 v8.8h, v8.16b, v8.16b
- pmull v8.1q, v8.1d, v8.1d
- pmull2 v8.1q, v8.2d, v8.2d
-; CHECK: pmull.8h	v8, v8, v8      ; encoding: [0x08,0xe1,0x28,0x0e]
-; CHECK: pmull2.8h	v8, v8, v8      ; encoding: [0x08,0xe1,0x28,0x4e]
-; CHECK: pmull.1q	v8, v8, v8      ; encoding: [0x08,0xe1,0xe8,0x0e]
-; CHECK: pmull2.1q	v8, v8, v8      ; encoding: [0x08,0xe1,0xe8,0x4e]
-
-; usubl verbose mode aliases
- usubl v9.8h, v13.8b, v14.8b
- usubl2 v9.8h, v13.16b, v14.16b
- usubl v9.4s, v13.4h, v14.4h
- usubl2 v9.4s, v13.8h, v14.8h
- usubl v9.2d, v13.2s, v14.2s
- usubl2 v9.2d, v13.4s, v14.4s
-; CHECK: usubl.8h	v9, v13, v14    ; encoding: [0xa9,0x21,0x2e,0x2e]
-; CHECK: usubl2.8h	v9, v13, v14    ; encoding: [0xa9,0x21,0x2e,0x6e]
-; CHECK: usubl.4s	v9, v13, v14    ; encoding: [0xa9,0x21,0x6e,0x2e]
-; CHECK: usubl2.4s	v9, v13, v14    ; encoding: [0xa9,0x21,0x6e,0x6e]
-; CHECK: usubl.2d	v9, v13, v14    ; encoding: [0xa9,0x21,0xae,0x2e]
-; CHECK: usubl2.2d	v9, v13, v14    ; encoding: [0xa9,0x21,0xae,0x6e]
-
-; uabdl verbose mode aliases
- uabdl v9.8h, v13.8b, v14.8b
- uabdl2 v9.8h, v13.16b, v14.16b
- uabdl v9.4s, v13.4h, v14.4h
- uabdl2 v9.4s, v13.8h, v14.8h
- uabdl v9.2d, v13.2s, v14.2s
- uabdl2 v9.2d, v13.4s, v14.4s
-; CHECK: uabdl.8h	v9, v13, v14    ; encoding: [0xa9,0x71,0x2e,0x2e]
-; CHECK: uabdl2.8h	v9, v13, v14    ; encoding: [0xa9,0x71,0x2e,0x6e]
-; CHECK: uabdl.4s	v9, v13, v14    ; encoding: [0xa9,0x71,0x6e,0x2e]
-; CHECK: uabdl2.4s	v9, v13, v14    ; encoding: [0xa9,0x71,0x6e,0x6e]
-; CHECK: uabdl.2d	v9, v13, v14    ; encoding: [0xa9,0x71,0xae,0x2e]
-; CHECK: uabdl2.2d	v9, v13, v14    ; encoding: [0xa9,0x71,0xae,0x6e]
-
-; umull verbose mode aliases
- umull v9.8h, v13.8b, v14.8b
- umull2 v9.8h, v13.16b, v14.16b
- umull v9.4s, v13.4h, v14.4h
- umull2 v9.4s, v13.8h, v14.8h
- umull v9.2d, v13.2s, v14.2s
- umull2 v9.2d, v13.4s, v14.4s
-; CHECK: umull.8h	v9, v13, v14    ; encoding: [0xa9,0xc1,0x2e,0x2e]
-; CHECK: umull2.8h	v9, v13, v14    ; encoding: [0xa9,0xc1,0x2e,0x6e]
-; CHECK: umull.4s	v9, v13, v14    ; encoding: [0xa9,0xc1,0x6e,0x2e]
-; CHECK: umull2.4s	v9, v13, v14    ; encoding: [0xa9,0xc1,0x6e,0x6e]
-; CHECK: umull.2d	v9, v13, v14    ; encoding: [0xa9,0xc1,0xae,0x2e]
-; CHECK: umull2.2d	v9, v13, v14    ; encoding: [0xa9,0xc1,0xae,0x6e]
-
-; smull verbose mode aliases
- smull v9.8h, v13.8b, v14.8b
- smull2 v9.8h, v13.16b, v14.16b
- smull v9.4s, v13.4h, v14.4h
- smull2 v9.4s, v13.8h, v14.8h
- smull v9.2d, v13.2s, v14.2s
- smull2 v9.2d, v13.4s, v14.4s
-; CHECK: smull.8h	v9, v13, v14    ; encoding: [0xa9,0xc1,0x2e,0x0e]
-; CHECK: smull2.8h	v9, v13, v14    ; encoding: [0xa9,0xc1,0x2e,0x4e]
-; CHECK: smull.4s	v9, v13, v14    ; encoding: [0xa9,0xc1,0x6e,0x0e]
-; CHECK: smull2.4s	v9, v13, v14    ; encoding: [0xa9,0xc1,0x6e,0x4e]
-; CHECK: smull.2d	v9, v13, v14    ; encoding: [0xa9,0xc1,0xae,0x0e]
-; CHECK: smull2.2d	v9, v13, v14    ; encoding: [0xa9,0xc1,0xae,0x4e]
diff --git a/test/MC/ARM64/aliases.s b/test/MC/ARM64/aliases.s
deleted file mode 100644
index 055edb5..0000000
--- a/test/MC/ARM64/aliases.s
+++ /dev/null
@@ -1,733 +0,0 @@
-; RUN: llvm-mc -triple arm64-apple-darwin -output-asm-variant=1 -show-encoding < %s | FileCheck %s
-
-foo:
-;-----------------------------------------------------------------------------
-; ADD #0 to/from SP/WSP is a MOV
-;-----------------------------------------------------------------------------
-  add x1, sp, #0
-; CHECK: mov x1, sp
-  add sp, x2, #0
-; CHECK: mov sp, x2
-  add w3, wsp, #0
-; CHECK: mov w3, wsp
-  add wsp, w4, #0
-; CHECK: mov wsp, w4
-  mov x5, sp
-; CHECK: mov x5, sp
-  mov sp, x6
-; CHECK: mov sp, x6
-  mov w7, wsp
-; CHECK: mov w7, wsp
-  mov wsp, w8
-; CHECK: mov wsp, w8
-
-;-----------------------------------------------------------------------------
-; ORR Rd, Rn, Rn is a MOV
-;-----------------------------------------------------------------------------
-  orr x2, xzr, x9
-; CHECK: mov x2, x9
-  orr w2, wzr, w9
-; CHECK: mov w2, w9
-  mov x3, x4
-; CHECK: mov x3, x4
-  mov w5, w6
-; CHECK: mov w5, w6
-
-;-----------------------------------------------------------------------------
-; TST Xn, #<imm>
-;-----------------------------------------------------------------------------
-        tst w1, #3
-        tst x1, #3
-        tst w1, w2
-        tst x1, x2
-        ands wzr, w1, w2, lsl #2
-        ands xzr, x1, x2, lsl #3
-        tst w3, w7, lsl #31
-        tst x2, x20, asr #0
-
-; CHECK: tst	w1, #0x3                ; encoding: [0x3f,0x04,0x00,0x72]
-; CHECK: tst	x1, #0x3                ; encoding: [0x3f,0x04,0x40,0xf2]
-; CHECK: tst	w1, w2                  ; encoding: [0x3f,0x00,0x02,0x6a]
-; CHECK: tst	x1, x2                  ; encoding: [0x3f,0x00,0x02,0xea]
-; CHECK: tst	w1, w2, lsl #2          ; encoding: [0x3f,0x08,0x02,0x6a]
-; CHECK: tst	x1, x2, lsl #3          ; encoding: [0x3f,0x0c,0x02,0xea]
-; CHECK: tst	w3, w7, lsl #31         ; encoding: [0x7f,0x7c,0x07,0x6a]
-; CHECK: tst	x2, x20, asr #0         ; encoding: [0x5f,0x00,0x94,0xea]
-
-;-----------------------------------------------------------------------------
-; ADDS to WZR/XZR is a CMN
-;-----------------------------------------------------------------------------
-  cmn w1, #3, lsl #0
-  cmn x2, #4194304
-  cmn w4, w5
-  cmn x6, x7
-  cmn w8, w9, asr #3
-  cmn x2, x3, lsr #4
-  cmn x2, w3, uxtb #1
-  cmn x4, x5, uxtx #1
-
-; CHECK: cmn	w1, #3                  ; encoding: [0x3f,0x0c,0x00,0x31]
-; CHECK: cmn	x2, #4194304            ; encoding: [0x5f,0x00,0x50,0xb1]
-; CHECK: cmn	w4, w5                  ; encoding: [0x9f,0x00,0x05,0x2b]
-; CHECK: cmn	x6, x7                  ; encoding: [0xdf,0x00,0x07,0xab]
-; CHECK: cmn	w8, w9, asr #3          ; encoding: [0x1f,0x0d,0x89,0x2b]
-; CHECK: cmn	x2, x3, lsr #4          ; encoding: [0x5f,0x10,0x43,0xab]
-; CHECK: cmn	x2, w3, uxtb #1         ; encoding: [0x5f,0x04,0x23,0xab]
-; CHECK: cmn	x4, x5, uxtx #1         ; encoding: [0x9f,0x64,0x25,0xab]
-
-
-;-----------------------------------------------------------------------------
-; SUBS to WZR/XZR is a CMP
-;-----------------------------------------------------------------------------
-  cmp w1, #1024, lsl #12
-  cmp x2, #1024
-  cmp w4, w5
-  cmp x6, x7
-  cmp w8, w9, asr #3
-  cmp x2, x3, lsr #4
-  cmp x2, w3, uxth #2
-  cmp x4, x5, uxtx
-  cmp wzr, w1
-  cmp x8, w8, uxtw
-  cmp w9, w8, uxtw
-  cmp wsp, w9, lsl #0
-
-; CHECK: cmp	w1, #4194304            ; encoding: [0x3f,0x00,0x50,0x71]
-; CHECK: cmp	x2, #1024               ; encoding: [0x5f,0x00,0x10,0xf1]
-; CHECK: cmp	w4, w5                  ; encoding: [0x9f,0x00,0x05,0x6b]
-; CHECK: cmp	x6, x7                  ; encoding: [0xdf,0x00,0x07,0xeb]
-; CHECK: cmp	w8, w9, asr #3          ; encoding: [0x1f,0x0d,0x89,0x6b]
-; CHECK: cmp	x2, x3, lsr #4          ; encoding: [0x5f,0x10,0x43,0xeb]
-; CHECK: cmp	x2, w3, uxth #2         ; encoding: [0x5f,0x28,0x23,0xeb]
-; CHECK: cmp	x4, x5, uxtx            ; encoding: [0x9f,0x60,0x25,0xeb]
-; CHECK: cmp	wzr, w1                 ; encoding: [0xff,0x03,0x01,0x6b]
-; CHECK: cmp	x8, w8, uxtw            ; encoding: [0x1f,0x41,0x28,0xeb]
-; CHECK: cmp	w9, w8, uxtw            ; encoding: [0x3f,0x41,0x28,0x6b]
-; CHECK: cmp	wsp, w9                 ; encoding: [0xff,0x63,0x29,0x6b]
-
-
-;-----------------------------------------------------------------------------
-; SUB/SUBS from WZR/XZR is a NEG
-;-----------------------------------------------------------------------------
-
-  neg w0, w1
-; CHECK: neg w0, w1
-  neg w0, w1, lsl #1
-; CHECK: sub w0, wzr, w1, lsl #1
-  neg x0, x1
-; CHECK: neg x0, x1
-  neg x0, x1, asr #1
-; CHECK: sub x0, xzr, x1, asr #1
-  negs w0, w1
-; CHECK: negs w0, w1
-  negs w0, w1, lsl #1
-; CHECK: subs w0, wzr, w1, lsl #1
-  negs x0, x1
-; CHECK: negs x0, x1
-  negs x0, x1, asr #1
-; CHECK: subs x0, xzr, x1, asr #1
-
-;-----------------------------------------------------------------------------
-; MOV aliases
-;-----------------------------------------------------------------------------
-
-  mov x0, #281470681743360
-  mov x0, #18446744073709486080
-
-; CHECK: movz	x0, #65535, lsl #32
-; CHECK: movn	x0, #65535
-
-  mov w0, #0xffffffff
-  mov w0, #0xffffff00
-
-; CHECK: movn   w0, #0
-; CHECK: movn   w0, #255
-
-;-----------------------------------------------------------------------------
-; MVN aliases
-;-----------------------------------------------------------------------------
-
-        mvn w4, w9
-        mvn x2, x3
-        orn w4, wzr, w9
-
-; CHECK: mvn	w4, w9             ; encoding: [0xe4,0x03,0x29,0x2a]
-; CHECK: mvn	x2, x3             ; encoding: [0xe2,0x03,0x23,0xaa]
-; CHECK: mvn	w4, w9             ; encoding: [0xe4,0x03,0x29,0x2a]
-
-;-----------------------------------------------------------------------------
-; Bitfield aliases
-;-----------------------------------------------------------------------------
-
-  bfi   w0, w0, #1, #4
-  bfi   x0, x0, #1, #4
-  bfi   w0, w0, #0, #2
-  bfi   x0, x0, #0, #2
-  bfxil w0, w0, #2, #3
-  bfxil x0, x0, #2, #3
-  sbfiz w0, w0, #1, #4
-  sbfiz x0, x0, #1, #4
-  sbfx  w0, w0, #2, #3
-  sbfx  x0, x0, #2, #3
-  ubfiz w0, w0, #1, #4
-  ubfiz x0, x0, #1, #4
-  ubfx  w0, w0, #2, #3
-  ubfx  x0, x0, #2, #3
-
-; CHECK: bfm  w0, w0, #31, #3
-; CHECK: bfm  x0, x0, #63, #3
-; CHECK: bfm  w0, w0, #0, #1
-; CHECK: bfm  x0, x0, #0, #1
-; CHECK: bfm  w0, w0, #2, #4
-; CHECK: bfm  x0, x0, #2, #4
-; CHECK: sbfm w0, w0, #31, #3
-; CHECK: sbfm x0, x0, #63, #3
-; CHECK: sbfm w0, w0, #2, #4
-; CHECK: sbfm x0, x0, #2, #4
-; CHECK: ubfm w0, w0, #31, #3
-; CHECK: ubfm x0, x0, #63, #3
-; CHECK: ubfm w0, w0, #2, #4
-; CHECK: ubfm x0, x0, #2, #4
-
-;-----------------------------------------------------------------------------
-; Shift (immediate) aliases
-;-----------------------------------------------------------------------------
-
-; CHECK: asr w1, w3, #13
-; CHECK: asr x1, x3, #13
-; CHECK: lsl w0, w0, #1
-; CHECK: lsl x0, x0, #1
-; CHECK: lsr w0, w0, #4
-; CHECK: lsr x0, x0, #4
-
-   sbfm w1, w3, #13, #31
-   sbfm x1, x3, #13, #63
-   ubfm w0, w0, #31, #30
-   ubfm x0, x0, #63, #62
-   ubfm w0, w0, #4, #31
-   ubfm x0, x0, #4, #63
-; CHECK: extr w1, w3, w3, #5
-; CHECK: extr x1, x3, x3, #5
-   ror w1, w3, #5
-   ror x1, x3, #5
-; CHECK: lsl w1, wzr, #3
-   lsl w1, wzr, #3
-
-;-----------------------------------------------------------------------------
-; Sign/Zero extend aliases
-;-----------------------------------------------------------------------------
-
-  sxtb  w1, w2
-  sxth  w1, w2
-  uxtb  w1, w2
-  uxth  w1, w2
-
-; CHECK: sxtb w1, w2
-; CHECK: sxth w1, w2
-; CHECK: uxtb w1, w2
-; CHECK: uxth w1, w2
-
-  sxtb  x1, x2
-  sxth  x1, x2
-  sxtw  x1, x2
-  uxtb  x1, x2
-  uxth  x1, x2
-  uxtw  x1, x2
-
-; CHECK: sxtb x1, x2
-; CHECK: sxth x1, x2
-; CHECK: sxtw x1, x2
-; CHECK: uxtb x1, x2
-; CHECK: uxth x1, x2
-; CHECK: uxtw x1, x2
-
-;-----------------------------------------------------------------------------
-; Negate with carry
-;-----------------------------------------------------------------------------
-
-  ngc   w1, w2
-  ngc   x1, x2
-  ngcs  w1, w2
-  ngcs  x1, x2
-
-; CHECK: ngc  w1, w2
-; CHECK: ngc  x1, x2
-; CHECK: ngcs w1, w2
-; CHECK: ngcs x1, x2
-
-;-----------------------------------------------------------------------------
-; 6.6.1 Multiply aliases
-;-----------------------------------------------------------------------------
-
-  mneg   w1, w2, w3
-  mneg   x1, x2, x3
-  mul    w1, w2, w3
-  mul    x1, x2, x3
-  smnegl x1, w2, w3
-  umnegl x1, w2, w3
-  smull   x1, w2, w3
-  umull   x1, w2, w3
-
-; CHECK: mneg w1, w2, w3
-; CHECK: mneg x1, x2, x3
-; CHECK: mul w1, w2, w3
-; CHECK: mul x1, x2, x3
-; CHECK: smnegl x1, w2, w3
-; CHECK: umnegl x1, w2, w3
-; CHECK: smull x1, w2, w3
-; CHECK: umull x1, w2, w3
-
-;-----------------------------------------------------------------------------
-; Conditional select aliases
-;-----------------------------------------------------------------------------
-
-  cset   w1, eq
-  cset   x1, eq
-  csetm  w1, ne
-  csetm  x1, ne
-  cinc   w1, w2, lt
-  cinc   x1, x2, lt
-  cinv   w1, w2, mi
-  cinv   x1, x2, mi
-
-; CHECK: csinc  w1, wzr, wzr, ne
-; CHECK: csinc  x1, xzr, xzr, ne
-; CHECK: csinv  w1, wzr, wzr, eq
-; CHECK: csinv  x1, xzr, xzr, eq
-; CHECK: csinc  w1, w2, w2, ge
-; CHECK: csinc  x1, x2, x2, ge
-; CHECK: csinv  w1, w2, w2, pl
-; CHECK: csinv  x1, x2, x2, pl
-
-;-----------------------------------------------------------------------------
-; SYS aliases
-;-----------------------------------------------------------------------------
-
-  sys #0, c7, c1, #0
-; CHECK: ic ialluis
-  sys #0, c7, c5, #0
-; CHECK: ic iallu
-  sys #3, c7, c5, #1
-; CHECK: ic ivau
-
-  sys #3, c7, c4, #1
-; CHECK: dc zva
-  sys #0, c7, c6, #1
-; CHECK: dc ivac
-  sys #0, c7, c6, #2
-; CHECK: dc isw
-  sys #3, c7, c10, #1
-; CHECK: dc cvac
-  sys #0, c7, c10, #2
-; CHECK: dc csw
-  sys #3, c7, c11, #1
-; CHECK: dc cvau
-  sys #3, c7, c14, #1
-; CHECK: dc civac
-  sys #0, c7, c14, #2
-; CHECK: dc cisw
-
-  sys #0, c7, c8, #0
-; CHECK: at s1e1r
-  sys #4, c7, c8, #0
-; CHECK: at s1e2r
-  sys #6, c7, c8, #0
-; CHECK: at s1e3r
-  sys #0, c7, c8, #1
-; CHECK: at s1e1w
-  sys #4, c7, c8, #1
-; CHECK: at s1e2w
-  sys #6, c7, c8, #1
-; CHECK: at s1e3w
-  sys #0, c7, c8, #2
-; CHECK: at s1e0r
-  sys #0, c7, c8, #3
-; CHECK: at s1e0w
-  sys #4, c7, c8, #4
-; CHECK: at s12e1r
-  sys #4, c7, c8, #5
-; CHECK: at s12e1w
-  sys #4, c7, c8, #6
-; CHECK: at s12e0r
-  sys #4, c7, c8, #7
-; CHECK: at s12e0w
-
-  sys #0, c8, c3, #0
-; CHECK: tlbi vmalle1is
-  sys #4, c8, c3, #0
-; CHECK: tlbi alle2is
-  sys #6, c8, c3, #0
-; CHECK: tlbi alle3is
-  sys #0, c8, c3, #1
-; CHECK: tlbi vae1is
-  sys #4, c8, c3, #1
-; CHECK: tlbi vae2is
-  sys #6, c8, c3, #1
-; CHECK: tlbi vae3is
-  sys #0, c8, c3, #2
-; CHECK: tlbi aside1is
-  sys #0, c8, c3, #3
-; CHECK: tlbi vaae1is
-  sys #4, c8, c3, #4
-; CHECK: tlbi alle1is
-  sys #0, c8, c3, #5
-; CHECK: tlbi vale1is
-  sys #0, c8, c3, #7
-; CHECK: tlbi vaale1is
-  sys #0, c8, c7, #0
-; CHECK: tlbi vmalle1
-  sys #4, c8, c7, #0
-; CHECK: tlbi alle2
-  sys #4, c8, c3, #5
-; CHECK: tlbi vale2is
-  sys #6, c8, c3, #5
-; CHECK: tlbi vale3is
-  sys #6, c8, c7, #0
-; CHECK: tlbi alle3
-  sys #0, c8, c7, #1
-; CHECK: tlbi vae1
-  sys #4, c8, c7, #1
-; CHECK: tlbi vae2
-  sys #6, c8, c7, #1
-; CHECK: tlbi vae3
-  sys #0, c8, c7, #2
-; CHECK: tlbi aside1
-  sys #0, c8, c7, #3
-; CHECK: tlbi vaae1
-  sys #4, c8, c7, #4
-; CHECK: tlbi alle1
-  sys #0, c8, c7, #5
-; CHECK: tlbi vale1
-  sys #4, c8, c7, #5
-; CHECK: tlbi vale2
-  sys #6, c8, c7, #5
-; CHECK: tlbi vale3
-  sys #0, c8, c7, #7
-; CHECK: tlbi vaale1
-  sys #4, c8, c4, #1
-; CHECK: tlbi ipas2e1
-  sys #4, c8, c4, #5
-; CHECK: tlbi ipas2le1
-  sys #4, c8, c7, #6
-; CHECK: tlbi vmalls12e1
-  sys #4, c8, c3, #6
-; CHECK: tlbi vmalls12e1is
-
-  ic ialluis
-; CHECK: ic ialluis
-  ic iallu
-; CHECK: ic iallu
-  ic ivau
-; CHECK: ic ivau
-
-  dc zva
-; CHECK: dc zva
-  dc ivac
-; CHECK: dc ivac
-  dc isw
-; CHECK: dc isw
-  dc cvac
-; CHECK: dc cvac
-  dc csw
-; CHECK: dc csw
-  dc cvau
-; CHECK: dc cvau
-  dc civac
-; CHECK: dc civac
-  dc cisw
-; CHECK: dc cisw
-
-  at s1e1r
-; CHECK: at s1e1r
-  at s1e2r
-; CHECK: at s1e2r
-  at s1e3r
-; CHECK: at s1e3r
-  at s1e1w
-; CHECK: at s1e1w
-  at s1e2w
-; CHECK: at s1e2w
-  at s1e3w
-; CHECK: at s1e3w
-  at s1e0r
-; CHECK: at s1e0r
-  at s1e0w
-; CHECK: at s1e0w
-  at s12e1r
-; CHECK: at s12e1r
-  at s12e1w
-; CHECK: at s12e1w
-  at s12e0r
-; CHECK: at s12e0r
-  at s12e0w
-; CHECK: at s12e0w
-
-  tlbi vmalle1is
-; CHECK: tlbi vmalle1is
-  tlbi alle2is
-; CHECK: tlbi alle2is
-  tlbi alle3is
-; CHECK: tlbi alle3is
-  tlbi vae1is
-; CHECK: tlbi vae1is
-  tlbi vae2is
-; CHECK: tlbi vae2is
-  tlbi vae3is
-; CHECK: tlbi vae3is
-  tlbi aside1is
-; CHECK: tlbi aside1is
-  tlbi vaae1is
-; CHECK: tlbi vaae1is
-  tlbi alle1is
-; CHECK: tlbi alle1is
-  tlbi vale1is
-; CHECK: tlbi vale1is
-  tlbi vaale1is
-; CHECK: tlbi vaale1is
-  tlbi vmalle1
-; CHECK: tlbi vmalle1
-  tlbi alle2
-; CHECK: tlbi alle2
-  tlbi vale2is
-; CHECK: tlbi vale2is
-  tlbi vale3is
-; CHECK: tlbi vale3is
-  tlbi alle3
-; CHECK: tlbi alle3
-  tlbi vae1
-; CHECK: tlbi vae1
-  tlbi vae2
-; CHECK: tlbi vae2
-  tlbi vae3
-; CHECK: tlbi vae3
-  tlbi aside1
-; CHECK: tlbi aside1
-  tlbi vaae1
-; CHECK: tlbi vaae1
-  tlbi alle1
-; CHECK: tlbi alle1
-  tlbi vale1
-; CHECK: tlbi vale1
-  tlbi vale2
-; CHECK: tlbi vale2
-  tlbi vale3
-; CHECK: tlbi vale3
-  tlbi vaale1
-; CHECK: tlbi vaale1
-  tlbi ipas2e1, x10
-; CHECK: tlbi ipas2e1, x10
-  tlbi ipas2le1, x1
-; CHECK: tlbi ipas2le1, x1
-  tlbi vmalls12e1
-; CHECK: tlbi vmalls12e1
-  tlbi vmalls12e1is
-; CHECK: tlbi vmalls12e1is
-
-;-----------------------------------------------------------------------------
-; 5.8.5 Vector Arithmetic aliases
-;-----------------------------------------------------------------------------
-
-  cmls.8b v0, v2, v1
-  cmls.16b v0, v2, v1
-  cmls.4h v0, v2, v1
-  cmls.8h v0, v2, v1
-  cmls.2s v0, v2, v1
-  cmls.4s v0, v2, v1
-  cmls.2d v0, v2, v1
-; CHECK: cmhs.8b v0, v1, v2
-; CHECK: cmhs.16b v0, v1, v2
-; CHECK: cmhs.4h v0, v1, v2
-; CHECK: cmhs.8h v0, v1, v2
-; CHECK: cmhs.2s v0, v1, v2
-; CHECK: cmhs.4s v0, v1, v2
-; CHECK: cmhs.2d v0, v1, v2
-
-  cmlo.8b v0, v2, v1
-  cmlo.16b v0, v2, v1
-  cmlo.4h v0, v2, v1
-  cmlo.8h v0, v2, v1
-  cmlo.2s v0, v2, v1
-  cmlo.4s v0, v2, v1
-  cmlo.2d v0, v2, v1
-; CHECK: cmhi.8b v0, v1, v2
-; CHECK: cmhi.16b v0, v1, v2
-; CHECK: cmhi.4h v0, v1, v2
-; CHECK: cmhi.8h v0, v1, v2
-; CHECK: cmhi.2s v0, v1, v2
-; CHECK: cmhi.4s v0, v1, v2
-; CHECK: cmhi.2d v0, v1, v2
-
-  cmle.8b v0, v2, v1
-  cmle.16b v0, v2, v1
-  cmle.4h v0, v2, v1
-  cmle.8h  v0, v2, v1
-  cmle.2s v0, v2, v1
-  cmle.4s v0, v2, v1
-  cmle.2d v0, v2, v1
-; CHECK: cmge.8b v0, v1, v2
-; CHECK: cmge.16b v0, v1, v2
-; CHECK: cmge.4h v0, v1, v2
-; CHECK: cmge.8h v0, v1, v2
-; CHECK: cmge.2s v0, v1, v2
-; CHECK: cmge.4s v0, v1, v2
-; CHECK: cmge.2d v0, v1, v2
-
-  cmlt.8b v0, v2, v1
-  cmlt.16b v0, v2, v1
-  cmlt.4h v0, v2, v1
-  cmlt.8h  v0, v2, v1
-  cmlt.2s v0, v2, v1
-  cmlt.4s v0, v2, v1
-  cmlt.2d v0, v2, v1
-; CHECK: cmgt.8b v0, v1, v2
-; CHECK: cmgt.16b v0, v1, v2
-; CHECK: cmgt.4h v0, v1, v2
-; CHECK: cmgt.8h v0, v1, v2
-; CHECK: cmgt.2s v0, v1, v2
-; CHECK: cmgt.4s v0, v1, v2
-; CHECK: cmgt.2d v0, v1, v2
-
-  fcmle.2s v0, v2, v1
-  fcmle.4s v0, v2, v1
-  fcmle.2d v0, v2, v1
-; CHECK: fcmge.2s v0, v1, v2
-; CHECK: fcmge.4s v0, v1, v2
-; CHECK: fcmge.2d v0, v1, v2
-
-  fcmlt.2s v0, v2, v1
-  fcmlt.4s v0, v2, v1
-  fcmlt.2d v0, v2, v1
-; CHECK: fcmgt.2s v0, v1, v2
-; CHECK: fcmgt.4s v0, v1, v2
-; CHECK: fcmgt.2d v0, v1, v2
-
-  facle.2s v0, v2, v1
-  facle.4s v0, v2, v1
-  facle.2d v0, v2, v1
-; CHECK: facge.2s v0, v1, v2
-; CHECK: facge.4s v0, v1, v2
-; CHECK: facge.2d v0, v1, v2
-
-  faclt.2s v0, v2, v1
-  faclt.4s v0, v2, v1
-  faclt.2d v0, v2, v1
-; CHECK: facgt.2s v0, v1, v2
-; CHECK: facgt.4s v0, v1, v2
-; CHECK: facgt.2d v0, v1, v2
-
-;-----------------------------------------------------------------------------
-; 5.8.6 Scalar Arithmetic aliases
-;-----------------------------------------------------------------------------
-
-  cmls d0, d2, d1
-; CHECK: cmhs d0, d1, d2
-
-  cmle d0, d2, d1
-; CHECK: cmge d0, d1, d2
-
-  cmlo d0, d2, d1
-; CHECK: cmhi d0, d1, d2
-
-  cmlt d0, d2, d1
-; CHECK: cmgt d0, d1, d2
-
-  fcmle s0, s2, s1
-  fcmle d0, d2, d1
-; CHECK: fcmge s0, s1, s2
-; CHECK: fcmge d0, d1, d2
-
-  fcmlt s0, s2, s1
-  fcmlt d0, d2, d1
-; CHECK: fcmgt s0, s1, s2
-; CHECK: fcmgt d0, d1, d2
-
-  facle s0, s2, s1
-  facle d0, d2, d1
-; CHECK: facge s0, s1, s2
-; CHECK: facge d0, d1, d2
-
-  faclt s0, s2, s1
-  faclt d0, d2, d1
-; CHECK: facgt s0, s1, s2
-; CHECK: facgt d0, d1, d2
-
-;-----------------------------------------------------------------------------
-; 5.8.14 Vector Shift (immediate)
-;-----------------------------------------------------------------------------
-  sxtl v1.8h, v2.8b
-; CHECK: sshll.8h v1, v2, #0
-  sxtl.8h v1, v2
-; CHECK: sshll.8h v1, v2, #0
-
-  sxtl v1.4s, v2.4h
-; CHECK: sshll.4s v1, v2, #0
-  sxtl.4s v1, v2
-; CHECK: sshll.4s v1, v2, #0
-
-  sxtl v1.2d, v2.2s
-; CHECK: sshll.2d v1, v2, #0
-  sxtl.2d v1, v2
-; CHECK: sshll.2d v1, v2, #0
-
-  sxtl2 v1.8h, v2.16b
-; CHECK: sshll2.8h v1, v2, #0
-  sxtl2.8h v1, v2
-; CHECK: sshll2.8h v1, v2, #0
-
-  sxtl2 v1.4s, v2.8h
-; CHECK: sshll2.4s v1, v2, #0
-  sxtl2.4s v1, v2
-; CHECK: sshll2.4s v1, v2, #0
-
-  sxtl2 v1.2d, v2.4s
-; CHECK: sshll2.2d v1, v2, #0
-  sxtl2.2d v1, v2
-; CHECK: sshll2.2d v1, v2, #0
-
-  uxtl v1.8h, v2.8b
-; CHECK: ushll.8h v1, v2, #0
-  uxtl.8h v1, v2
-; CHECK: ushll.8h v1, v2, #0
-
-  uxtl v1.4s, v2.4h
-; CHECK: ushll.4s v1, v2, #0
-  uxtl.4s v1, v2
-; CHECK: ushll.4s v1, v2, #0
-
-  uxtl v1.2d, v2.2s
-; CHECK: ushll.2d v1, v2, #0
-  uxtl.2d v1, v2
-; CHECK: ushll.2d v1, v2, #0
-
-  uxtl2 v1.8h, v2.16b
-; CHECK: ushll2.8h v1, v2, #0
-  uxtl2.8h v1, v2
-; CHECK: ushll2.8h v1, v2, #0
-
-  uxtl2 v1.4s, v2.8h
-; CHECK: ushll2.4s v1, v2, #0
-  uxtl2.4s v1, v2
-; CHECK: ushll2.4s v1, v2, #0
-
-  uxtl2 v1.2d, v2.4s
-; CHECK: ushll2.2d v1, v2, #0
-  uxtl2.2d v1, v2
-; CHECK: ushll2.2d v1, v2, #0
-
-
-;-----------------------------------------------------------------------------
-; MOVI verbose syntax with shift operand omitted.
-;-----------------------------------------------------------------------------
-  movi v4.16b, #0x00
-  movi v4.16B, #0x01
-  movi v4.8b, #0x02
-  movi v4.8B, #0x03
-  movi v1.2d, #0x000000000000ff
-  movi v2.2D, #0x000000000000ff
-
-; CHECK: movi.16b	v4, #0              ; encoding: [0x04,0xe4,0x00,0x4f]
-; CHECK: movi.16b	v4, #1              ; encoding: [0x24,0xe4,0x00,0x4f]
-; CHECK: movi.8b	v4, #2               ; encoding: [0x44,0xe4,0x00,0x0f]
-; CHECK: movi.8b	v4, #3               ; encoding: [0x64,0xe4,0x00,0x0f]
-; CHECK: movi.2d	v1, #0x000000000000ff ; encoding: [0x21,0xe4,0x00,0x6f]
-; CHECK: movi.2d	v2, #0x000000000000ff ; encoding: [0x22,0xe4,0x00,0x6f]
diff --git a/test/MC/ARM64/arithmetic-encoding.s b/test/MC/ARM64/arithmetic-encoding.s
deleted file mode 100644
index 7c89244..0000000
--- a/test/MC/ARM64/arithmetic-encoding.s
+++ /dev/null
@@ -1,631 +0,0 @@
-; RUN: llvm-mc -triple arm64-apple-darwin -show-encoding < %s | FileCheck %s
-
-foo:
-;==---------------------------------------------------------------------------==
-; Add/Subtract with carry/borrow
-;==---------------------------------------------------------------------------==
-
-  adc   w1, w2, w3
-  adc   x1, x2, x3
-  adcs  w5, w4, w3
-  adcs  x5, x4, x3
-
-; CHECK: adc  w1, w2, w3             ; encoding: [0x41,0x00,0x03,0x1a]
-; CHECK: adc  x1, x2, x3             ; encoding: [0x41,0x00,0x03,0x9a]
-; CHECK: adcs w5, w4, w3             ; encoding: [0x85,0x00,0x03,0x3a]
-; CHECK: adcs x5, x4, x3             ; encoding: [0x85,0x00,0x03,0xba]
-
-  sbc   w1, w2, w3
-  sbc   x1, x2, x3
-  sbcs  w1, w2, w3
-  sbcs  x1, x2, x3
-
-; CHECK: sbc  w1, w2, w3             ; encoding: [0x41,0x00,0x03,0x5a]
-; CHECK: sbc  x1, x2, x3             ; encoding: [0x41,0x00,0x03,0xda]
-; CHECK: sbcs w1, w2, w3             ; encoding: [0x41,0x00,0x03,0x7a]
-; CHECK: sbcs x1, x2, x3             ; encoding: [0x41,0x00,0x03,0xfa]
-
-;==---------------------------------------------------------------------------==
-; Add/Subtract with (optionally shifted) immediate
-;==---------------------------------------------------------------------------==
-
-  add w3, w4, #1024
-  add w3, w4, #1024, lsl #0
-  add x3, x4, #1024
-  add x3, x4, #1024, lsl #0
-
-; CHECK: add w3, w4, #1024           ; encoding: [0x83,0x00,0x10,0x11]
-; CHECK: add w3, w4, #1024           ; encoding: [0x83,0x00,0x10,0x11]
-; CHECK: add x3, x4, #1024           ; encoding: [0x83,0x00,0x10,0x91]
-; CHECK: add x3, x4, #1024           ; encoding: [0x83,0x00,0x10,0x91]
-
-  add w3, w4, #1024, lsl #12
-  add w3, w4, #4194304
-  add w3, w4, #0, lsl #12
-  add x3, x4, #1024, lsl #12
-  add x3, x4, #4194304
-  add x3, x4, #0, lsl #12
-  add sp, sp, #32
-
-; CHECK: add w3, w4, #4194304        ; encoding: [0x83,0x00,0x50,0x11]
-; CHECK: add w3, w4, #4194304        ; encoding: [0x83,0x00,0x50,0x11]
-; CHECK: add w3, w4, #0, lsl #12     ; encoding: [0x83,0x00,0x40,0x11]
-; CHECK: add x3, x4, #4194304        ; encoding: [0x83,0x00,0x50,0x91]
-; CHECK: add x3, x4, #4194304        ; encoding: [0x83,0x00,0x50,0x91]
-; CHECK: add x3, x4, #0, lsl #12     ; encoding: [0x83,0x00,0x40,0x91]
-; CHECK: add sp, sp, #32             ; encoding: [0xff,0x83,0x00,0x91]
-
-  adds w3, w4, #1024
-  adds w3, w4, #1024, lsl #0
-  adds w3, w4, #1024, lsl #12
-  adds x3, x4, #1024
-  adds x3, x4, #1024, lsl #0
-  adds x3, x4, #1024, lsl #12
-
-; CHECK: adds w3, w4, #1024          ; encoding: [0x83,0x00,0x10,0x31]
-; CHECK: adds w3, w4, #1024          ; encoding: [0x83,0x00,0x10,0x31]
-; CHECK: adds w3, w4, #4194304       ; encoding: [0x83,0x00,0x50,0x31]
-; CHECK: adds x3, x4, #1024          ; encoding: [0x83,0x00,0x10,0xb1]
-; CHECK: adds x3, x4, #1024          ; encoding: [0x83,0x00,0x10,0xb1]
-; CHECK: adds x3, x4, #4194304       ; encoding: [0x83,0x00,0x50,0xb1]
-
-  sub w3, w4, #1024
-  sub w3, w4, #1024, lsl #0
-  sub w3, w4, #1024, lsl #12
-  sub x3, x4, #1024
-  sub x3, x4, #1024, lsl #0
-  sub x3, x4, #1024, lsl #12
-  sub sp, sp, #32
-
-; CHECK: sub w3, w4, #1024           ; encoding: [0x83,0x00,0x10,0x51]
-; CHECK: sub w3, w4, #1024           ; encoding: [0x83,0x00,0x10,0x51]
-; CHECK: sub w3, w4, #4194304        ; encoding: [0x83,0x00,0x50,0x51]
-; CHECK: sub x3, x4, #1024           ; encoding: [0x83,0x00,0x10,0xd1]
-; CHECK: sub x3, x4, #1024           ; encoding: [0x83,0x00,0x10,0xd1]
-; CHECK: sub x3, x4, #4194304        ; encoding: [0x83,0x00,0x50,0xd1]
-; CHECK: sub sp, sp, #32             ; encoding: [0xff,0x83,0x00,0xd1]
-
-  subs w3, w4, #1024
-  subs w3, w4, #1024, lsl #0
-  subs w3, w4, #1024, lsl #12
-  subs x3, x4, #1024
-  subs x3, x4, #1024, lsl #0
-  subs x3, x4, #1024, lsl #12
-
-; CHECK: subs w3, w4, #1024          ; encoding: [0x83,0x00,0x10,0x71]
-; CHECK: subs w3, w4, #1024          ; encoding: [0x83,0x00,0x10,0x71]
-; CHECK: subs w3, w4, #4194304       ; encoding: [0x83,0x00,0x50,0x71]
-; CHECK: subs x3, x4, #1024          ; encoding: [0x83,0x00,0x10,0xf1]
-; CHECK: subs x3, x4, #1024          ; encoding: [0x83,0x00,0x10,0xf1]
-; CHECK: subs x3, x4, #4194304       ; encoding: [0x83,0x00,0x50,0xf1]
-
-;==---------------------------------------------------------------------------==
-; Add/Subtract register with (optional) shift
-;==---------------------------------------------------------------------------==
-
-  add w12, w13, w14
-  add x12, x13, x14
-  add w12, w13, w14, lsl #12
-  add x12, x13, x14, lsl #12
-  add w12, w13, w14, lsr #42
-  add x12, x13, x14, lsr #42
-  add w12, w13, w14, asr #39
-  add x12, x13, x14, asr #39
-
-; CHECK: add w12, w13, w14           ; encoding: [0xac,0x01,0x0e,0x0b]
-; CHECK: add x12, x13, x14           ; encoding: [0xac,0x01,0x0e,0x8b]
-; CHECK: add w12, w13, w14, lsl #12  ; encoding: [0xac,0x31,0x0e,0x0b]
-; CHECK: add x12, x13, x14, lsl #12  ; encoding: [0xac,0x31,0x0e,0x8b]
-; CHECK: add w12, w13, w14, lsr #42  ; encoding: [0xac,0xa9,0x4e,0x0b]
-; CHECK: add x12, x13, x14, lsr #42  ; encoding: [0xac,0xa9,0x4e,0x8b]
-; CHECK: add w12, w13, w14, asr #39  ; encoding: [0xac,0x9d,0x8e,0x0b]
-; CHECK: add x12, x13, x14, asr #39  ; encoding: [0xac,0x9d,0x8e,0x8b]
-
-  sub w12, w13, w14
-  sub x12, x13, x14
-  sub w12, w13, w14, lsl #12
-  sub x12, x13, x14, lsl #12
-  sub w12, w13, w14, lsr #42
-  sub x12, x13, x14, lsr #42
-  sub w12, w13, w14, asr #39
-  sub x12, x13, x14, asr #39
-
-; CHECK: sub w12, w13, w14           ; encoding: [0xac,0x01,0x0e,0x4b]
-; CHECK: sub x12, x13, x14           ; encoding: [0xac,0x01,0x0e,0xcb]
-; CHECK: sub w12, w13, w14, lsl #12  ; encoding: [0xac,0x31,0x0e,0x4b]
-; CHECK: sub x12, x13, x14, lsl #12  ; encoding: [0xac,0x31,0x0e,0xcb]
-; CHECK: sub w12, w13, w14, lsr #42  ; encoding: [0xac,0xa9,0x4e,0x4b]
-; CHECK: sub x12, x13, x14, lsr #42  ; encoding: [0xac,0xa9,0x4e,0xcb]
-; CHECK: sub w12, w13, w14, asr #39  ; encoding: [0xac,0x9d,0x8e,0x4b]
-; CHECK: sub x12, x13, x14, asr #39  ; encoding: [0xac,0x9d,0x8e,0xcb]
-
-  adds w12, w13, w14
-  adds x12, x13, x14
-  adds w12, w13, w14, lsl #12
-  adds x12, x13, x14, lsl #12
-  adds w12, w13, w14, lsr #42
-  adds x12, x13, x14, lsr #42
-  adds w12, w13, w14, asr #39
-  adds x12, x13, x14, asr #39
-
-; CHECK: adds w12, w13, w14          ; encoding: [0xac,0x01,0x0e,0x2b]
-; CHECK: adds x12, x13, x14          ; encoding: [0xac,0x01,0x0e,0xab]
-; CHECK: adds w12, w13, w14, lsl #12 ; encoding: [0xac,0x31,0x0e,0x2b]
-; CHECK: adds x12, x13, x14, lsl #12 ; encoding: [0xac,0x31,0x0e,0xab]
-; CHECK: adds w12, w13, w14, lsr #42 ; encoding: [0xac,0xa9,0x4e,0x2b]
-; CHECK: adds x12, x13, x14, lsr #42 ; encoding: [0xac,0xa9,0x4e,0xab]
-; CHECK: adds w12, w13, w14, asr #39 ; encoding: [0xac,0x9d,0x8e,0x2b]
-; CHECK: adds x12, x13, x14, asr #39 ; encoding: [0xac,0x9d,0x8e,0xab]
-
-  subs w12, w13, w14
-  subs x12, x13, x14
-  subs w12, w13, w14, lsl #12
-  subs x12, x13, x14, lsl #12
-  subs w12, w13, w14, lsr #42
-  subs x12, x13, x14, lsr #42
-  subs w12, w13, w14, asr #39
-  subs x12, x13, x14, asr #39
-
-; CHECK: subs w12, w13, w14          ; encoding: [0xac,0x01,0x0e,0x6b]
-; CHECK: subs x12, x13, x14          ; encoding: [0xac,0x01,0x0e,0xeb]
-; CHECK: subs w12, w13, w14, lsl #12 ; encoding: [0xac,0x31,0x0e,0x6b]
-; CHECK: subs x12, x13, x14, lsl #12 ; encoding: [0xac,0x31,0x0e,0xeb]
-; CHECK: subs w12, w13, w14, lsr #42 ; encoding: [0xac,0xa9,0x4e,0x6b]
-; CHECK: subs x12, x13, x14, lsr #42 ; encoding: [0xac,0xa9,0x4e,0xeb]
-; CHECK: subs w12, w13, w14, asr #39 ; encoding: [0xac,0x9d,0x8e,0x6b]
-; CHECK: subs x12, x13, x14, asr #39 ; encoding: [0xac,0x9d,0x8e,0xeb]
-
-; Check use of upper case register names rdar://14354073
-  add X2, X2, X2
-; CHECK: add x2, x2, x2              ; encoding: [0x42,0x00,0x02,0x8b]
-
-;==---------------------------------------------------------------------------==
-; Add/Subtract with (optional) extend
-;==---------------------------------------------------------------------------==
-
-  add w1, w2, w3, uxtb
-  add w1, w2, w3, uxth
-  add w1, w2, w3, uxtw
-  add w1, w2, w3, uxtx
-  add w1, w2, w3, sxtb
-  add w1, w2, w3, sxth
-  add w1, w2, w3, sxtw
-  add w1, w2, w3, sxtx
-
-; CHECK: add w1, w2, w3, uxtb        ; encoding: [0x41,0x00,0x23,0x0b]
-; CHECK: add w1, w2, w3, uxth        ; encoding: [0x41,0x20,0x23,0x0b]
-; CHECK: add w1, w2, w3, uxtw        ; encoding: [0x41,0x40,0x23,0x0b]
-; CHECK: add w1, w2, w3, uxtx        ; encoding: [0x41,0x60,0x23,0x0b]
-; CHECK: add w1, w2, w3, sxtb        ; encoding: [0x41,0x80,0x23,0x0b]
-; CHECK: add w1, w2, w3, sxth        ; encoding: [0x41,0xa0,0x23,0x0b]
-; CHECK: add w1, w2, w3, sxtw        ; encoding: [0x41,0xc0,0x23,0x0b]
-; CHECK: add w1, w2, w3, sxtx        ; encoding: [0x41,0xe0,0x23,0x0b]
-
-  add x1, x2, w3, uxtb
-  add x1, x2, w3, uxth
-  add x1, x2, w3, uxtw
-  add x1, x2, w3, sxtb
-  add x1, x2, w3, sxth
-  add x1, x2, w3, sxtw
-
-; CHECK: add x1, x2, w3, uxtb        ; encoding: [0x41,0x00,0x23,0x8b]
-; CHECK: add x1, x2, w3, uxth        ; encoding: [0x41,0x20,0x23,0x8b]
-; CHECK: add x1, x2, w3, uxtw        ; encoding: [0x41,0x40,0x23,0x8b]
-; CHECK: add x1, x2, w3, sxtb        ; encoding: [0x41,0x80,0x23,0x8b]
-; CHECK: add x1, x2, w3, sxth        ; encoding: [0x41,0xa0,0x23,0x8b]
-; CHECK: add x1, x2, w3, sxtw        ; encoding: [0x41,0xc0,0x23,0x8b]
-
-  add w1, wsp, w3
-  add w1, wsp, w3, uxtw #0
-  add w2, wsp, w3, lsl #1
-  add sp, x2, x3
-  add sp, x2, x3, uxtx #0
-
-; CHECK: add w1, wsp, w3             ; encoding: [0xe1,0x43,0x23,0x0b]
-; CHECK: add w1, wsp, w3             ; encoding: [0xe1,0x43,0x23,0x0b]
-; CHECK: add w2, wsp, w3, lsl #1     ; encoding: [0xe2,0x67,0x23,0x0b]
-; CHECK: add sp, x2, x3              ; encoding: [0x5f,0x60,0x23,0x8b]
-; CHECK: add sp, x2, x3              ; encoding: [0x5f,0x60,0x23,0x8b]
-
-  sub w1, w2, w3, uxtb
-  sub w1, w2, w3, uxth
-  sub w1, w2, w3, uxtw
-  sub w1, w2, w3, uxtx
-  sub w1, w2, w3, sxtb
-  sub w1, w2, w3, sxth
-  sub w1, w2, w3, sxtw
-  sub w1, w2, w3, sxtx
-
-; CHECK: sub w1, w2, w3, uxtb        ; encoding: [0x41,0x00,0x23,0x4b]
-; CHECK: sub w1, w2, w3, uxth        ; encoding: [0x41,0x20,0x23,0x4b]
-; CHECK: sub w1, w2, w3, uxtw        ; encoding: [0x41,0x40,0x23,0x4b]
-; CHECK: sub w1, w2, w3, uxtx        ; encoding: [0x41,0x60,0x23,0x4b]
-; CHECK: sub w1, w2, w3, sxtb        ; encoding: [0x41,0x80,0x23,0x4b]
-; CHECK: sub w1, w2, w3, sxth        ; encoding: [0x41,0xa0,0x23,0x4b]
-; CHECK: sub w1, w2, w3, sxtw        ; encoding: [0x41,0xc0,0x23,0x4b]
-; CHECK: sub w1, w2, w3, sxtx        ; encoding: [0x41,0xe0,0x23,0x4b]
-
-  sub x1, x2, w3, uxtb
-  sub x1, x2, w3, uxth
-  sub x1, x2, w3, uxtw
-  sub x1, x2, w3, sxtb
-  sub x1, x2, w3, sxth
-  sub x1, x2, w3, sxtw
-
-; CHECK: sub x1, x2, w3, uxtb        ; encoding: [0x41,0x00,0x23,0xcb]
-; CHECK: sub x1, x2, w3, uxth        ; encoding: [0x41,0x20,0x23,0xcb]
-; CHECK: sub x1, x2, w3, uxtw        ; encoding: [0x41,0x40,0x23,0xcb]
-; CHECK: sub x1, x2, w3, sxtb        ; encoding: [0x41,0x80,0x23,0xcb]
-; CHECK: sub x1, x2, w3, sxth        ; encoding: [0x41,0xa0,0x23,0xcb]
-; CHECK: sub x1, x2, w3, sxtw        ; encoding: [0x41,0xc0,0x23,0xcb]
-
-  sub w1, wsp, w3
-  sub w1, wsp, w3, uxtw #0
-  sub sp, x2, x3
-  sub sp, x2, x3, uxtx #0
-  sub sp, x3, x7, lsl #4
-
-; CHECK: sub w1, wsp, w3             ; encoding: [0xe1,0x43,0x23,0x4b]
-; CHECK: sub w1, wsp, w3             ; encoding: [0xe1,0x43,0x23,0x4b]
-; CHECK: sub sp, x2, x3              ; encoding: [0x5f,0x60,0x23,0xcb]
-; CHECK: sub sp, x2, x3              ; encoding: [0x5f,0x60,0x23,0xcb]
-; CHECK: sp, x3, x7, lsl #4          ; encoding: [0x7f,0x70,0x27,0xcb]
-
-  adds w1, w2, w3, uxtb
-  adds w1, w2, w3, uxth
-  adds w1, w2, w3, uxtw
-  adds w1, w2, w3, uxtx
-  adds w1, w2, w3, sxtb
-  adds w1, w2, w3, sxth
-  adds w1, w2, w3, sxtw
-  adds w1, w2, w3, sxtx
-
-; CHECK: adds w1, w2, w3, uxtb       ; encoding: [0x41,0x00,0x23,0x2b]
-; CHECK: adds w1, w2, w3, uxth       ; encoding: [0x41,0x20,0x23,0x2b]
-; CHECK: adds w1, w2, w3, uxtw       ; encoding: [0x41,0x40,0x23,0x2b]
-; CHECK: adds w1, w2, w3, uxtx       ; encoding: [0x41,0x60,0x23,0x2b]
-; CHECK: adds w1, w2, w3, sxtb       ; encoding: [0x41,0x80,0x23,0x2b]
-; CHECK: adds w1, w2, w3, sxth       ; encoding: [0x41,0xa0,0x23,0x2b]
-; CHECK: adds w1, w2, w3, sxtw       ; encoding: [0x41,0xc0,0x23,0x2b]
-; CHECK: adds w1, w2, w3, sxtx       ; encoding: [0x41,0xe0,0x23,0x2b]
-
-  adds x1, x2, w3, uxtb
-  adds x1, x2, w3, uxth
-  adds x1, x2, w3, uxtw
-  adds x1, x2, w3, uxtx
-  adds x1, x2, w3, sxtb
-  adds x1, x2, w3, sxth
-  adds x1, x2, w3, sxtw
-  adds x1, x2, w3, sxtx
-
-; CHECK: adds x1, x2, w3, uxtb       ; encoding: [0x41,0x00,0x23,0xab]
-; CHECK: adds x1, x2, w3, uxth       ; encoding: [0x41,0x20,0x23,0xab]
-; CHECK: adds x1, x2, w3, uxtw       ; encoding: [0x41,0x40,0x23,0xab]
-; CHECK: adds x1, x2, w3, uxtx       ; encoding: [0x41,0x60,0x23,0xab]
-; CHECK: adds x1, x2, w3, sxtb       ; encoding: [0x41,0x80,0x23,0xab]
-; CHECK: adds x1, x2, w3, sxth       ; encoding: [0x41,0xa0,0x23,0xab]
-; CHECK: adds x1, x2, w3, sxtw       ; encoding: [0x41,0xc0,0x23,0xab]
-; CHECK: adds x1, x2, w3, sxtx       ; encoding: [0x41,0xe0,0x23,0xab]
-
-  adds w1, wsp, w3
-  adds w1, wsp, w3, uxtw #0
-  adds wzr, wsp, w3, lsl #4
-
-; CHECK: adds w1, wsp, w3            ; encoding: [0xe1,0x43,0x23,0x2b]
-; CHECK: adds w1, wsp, w3            ; encoding: [0xe1,0x43,0x23,0x2b]
-; CHECK: adds wzr, wsp, w3, lsl #4   ; encoding: [0xff,0x73,0x23,0x2b]
-
-  subs w1, w2, w3, uxtb
-  subs w1, w2, w3, uxth
-  subs w1, w2, w3, uxtw
-  subs w1, w2, w3, uxtx
-  subs w1, w2, w3, sxtb
-  subs w1, w2, w3, sxth
-  subs w1, w2, w3, sxtw
-  subs w1, w2, w3, sxtx
-
-; CHECK: subs w1, w2, w3, uxtb       ; encoding: [0x41,0x00,0x23,0x6b]
-; CHECK: subs w1, w2, w3, uxth       ; encoding: [0x41,0x20,0x23,0x6b]
-; CHECK: subs w1, w2, w3, uxtw       ; encoding: [0x41,0x40,0x23,0x6b]
-; CHECK: subs w1, w2, w3, uxtx       ; encoding: [0x41,0x60,0x23,0x6b]
-; CHECK: subs w1, w2, w3, sxtb       ; encoding: [0x41,0x80,0x23,0x6b]
-; CHECK: subs w1, w2, w3, sxth       ; encoding: [0x41,0xa0,0x23,0x6b]
-; CHECK: subs w1, w2, w3, sxtw       ; encoding: [0x41,0xc0,0x23,0x6b]
-; CHECK: subs w1, w2, w3, sxtx       ; encoding: [0x41,0xe0,0x23,0x6b]
-
-  subs x1, x2, w3, uxtb
-  subs x1, x2, w3, uxth
-  subs x1, x2, w3, uxtw
-  subs x1, x2, w3, uxtx
-  subs x1, x2, w3, sxtb
-  subs x1, x2, w3, sxth
-  subs x1, x2, w3, sxtw
-  subs x1, x2, w3, sxtx
-
-; CHECK: subs x1, x2, w3, uxtb       ; encoding: [0x41,0x00,0x23,0xeb]
-; CHECK: subs x1, x2, w3, uxth       ; encoding: [0x41,0x20,0x23,0xeb]
-; CHECK: subs x1, x2, w3, uxtw       ; encoding: [0x41,0x40,0x23,0xeb]
-; CHECK: subs x1, x2, w3, uxtx       ; encoding: [0x41,0x60,0x23,0xeb]
-; CHECK: subs x1, x2, w3, sxtb       ; encoding: [0x41,0x80,0x23,0xeb]
-; CHECK: subs x1, x2, w3, sxth       ; encoding: [0x41,0xa0,0x23,0xeb]
-; CHECK: subs x1, x2, w3, sxtw       ; encoding: [0x41,0xc0,0x23,0xeb]
-; CHECK: subs x1, x2, w3, sxtx       ; encoding: [0x41,0xe0,0x23,0xeb]
-
-  subs w1, wsp, w3
-  subs w1, wsp, w3, uxtw #0
-
-; CHECK: subs w1, wsp, w3            ; encoding: [0xe1,0x43,0x23,0x6b]
-; CHECK: subs w1, wsp, w3            ; encoding: [0xe1,0x43,0x23,0x6b]
-
-  cmp wsp, w9, lsl #0
-  subs x3, sp, x9, lsl #2
-  cmp wsp, w8, uxtw
-  subs wzr, wsp, w8, uxtw
-  cmp sp, w8, uxtw
-  subs xzr, sp, w8, uxtw
-
-; CHECK: cmp wsp, w9                 ; encoding: [0xff,0x63,0x29,0x6b]
-; CHECK: subs x3, sp, x9, lsl #2     ; encoding: [0xe3,0x6b,0x29,0xeb]
-; CHECK: cmp wsp, w8                 ; encoding: [0xff,0x43,0x28,0x6b]
-; CHECK: cmp wsp, w8                 ; encoding: [0xff,0x43,0x28,0x6b]
-; CHECK: cmp sp, w8                  ; encoding: [0xff,0x43,0x28,0xeb]
-; CHECK: cmp sp, w8                  ; encoding: [0xff,0x43,0x28,0xeb]
-
-  sub wsp, w9, w8, uxtw
-  sub w1, wsp, w8, uxtw
-  sub wsp, wsp, w8, uxtw
-  sub sp, x9, w8, uxtw
-  sub x1, sp, w8, uxtw
-  sub sp, sp, w8, uxtw
-  subs w1, wsp, w8, uxtw
-  subs x1, sp, w8, uxtw
-
-; CHECK: sub wsp, w9, w8             ; encoding: [0x3f,0x41,0x28,0x4b]
-; CHECK: sub w1, wsp, w8             ; encoding: [0xe1,0x43,0x28,0x4b]
-; CHECK: sub wsp, wsp, w8            ; encoding: [0xff,0x43,0x28,0x4b]
-; CHECK: sub sp, x9, w8              ; encoding: [0x3f,0x41,0x28,0xcb]
-; CHECK: sub x1, sp, w8              ; encoding: [0xe1,0x43,0x28,0xcb]
-; CHECK: sub sp, sp, w8              ; encoding: [0xff,0x43,0x28,0xcb]
-; CHECK: subs w1, wsp, w8            ; encoding: [0xe1,0x43,0x28,0x6b]
-; CHECK: subs x1, sp, w8             ; encoding: [0xe1,0x43,0x28,0xeb]
-
-;==---------------------------------------------------------------------------==
-; Signed/Unsigned divide
-;==---------------------------------------------------------------------------==
-
-  sdiv w1, w2, w3
-  sdiv x1, x2, x3
-  udiv w1, w2, w3
-  udiv x1, x2, x3
-
-; CHECK: sdiv w1, w2, w3             ; encoding: [0x41,0x0c,0xc3,0x1a]
-; CHECK: sdiv x1, x2, x3             ; encoding: [0x41,0x0c,0xc3,0x9a]
-; CHECK: udiv w1, w2, w3             ; encoding: [0x41,0x08,0xc3,0x1a]
-; CHECK: udiv x1, x2, x3             ; encoding: [0x41,0x08,0xc3,0x9a]
-
-;==---------------------------------------------------------------------------==
-; Variable shifts
-;==---------------------------------------------------------------------------==
-
-  asrv w1, w2, w3
-  asrv x1, x2, x3
-  asr w1, w2, w3
-  asr x1, x2, x3
-  lslv w1, w2, w3
-  lslv x1, x2, x3
-  lsl w1, w2, w3
-  lsl x1, x2, x3
-  lsrv w1, w2, w3
-  lsrv x1, x2, x3
-  lsr w1, w2, w3
-  lsr x1, x2, x3
-  rorv w1, w2, w3
-  rorv x1, x2, x3
-  ror w1, w2, w3
-  ror x1, x2, x3
-
-; CHECK: encoding: [0x41,0x28,0xc3,0x1a]
-; CHECK: encoding: [0x41,0x28,0xc3,0x9a]
-; CHECK: encoding: [0x41,0x28,0xc3,0x1a]
-; CHECK: encoding: [0x41,0x28,0xc3,0x9a]
-; CHECK: encoding: [0x41,0x20,0xc3,0x1a]
-; CHECK: encoding: [0x41,0x20,0xc3,0x9a]
-; CHECK: encoding: [0x41,0x20,0xc3,0x1a]
-; CHECK: encoding: [0x41,0x20,0xc3,0x9a]
-; CHECK: encoding: [0x41,0x24,0xc3,0x1a]
-; CHECK: encoding: [0x41,0x24,0xc3,0x9a]
-; CHECK: encoding: [0x41,0x24,0xc3,0x1a]
-; CHECK: encoding: [0x41,0x24,0xc3,0x9a]
-; CHECK: encoding: [0x41,0x2c,0xc3,0x1a]
-; CHECK: encoding: [0x41,0x2c,0xc3,0x9a]
-; CHECK: encoding: [0x41,0x2c,0xc3,0x1a]
-; CHECK: encoding: [0x41,0x2c,0xc3,0x9a]
-
-;==---------------------------------------------------------------------------==
-; One operand instructions
-;==---------------------------------------------------------------------------==
-
-  cls w1, w2
-  cls x1, x2
-  clz w1, w2
-  clz x1, x2
-  rbit w1, w2
-  rbit x1, x2
-  rev w1, w2
-  rev x1, x2
-  rev16 w1, w2
-  rev16 x1, x2
-  rev32 x1, x2
-
-; CHECK: encoding: [0x41,0x14,0xc0,0x5a]
-; CHECK: encoding: [0x41,0x14,0xc0,0xda]
-; CHECK: encoding: [0x41,0x10,0xc0,0x5a]
-; CHECK: encoding: [0x41,0x10,0xc0,0xda]
-; CHECK: encoding: [0x41,0x00,0xc0,0x5a]
-; CHECK: encoding: [0x41,0x00,0xc0,0xda]
-; CHECK: encoding: [0x41,0x08,0xc0,0x5a]
-; CHECK: encoding: [0x41,0x0c,0xc0,0xda]
-; CHECK: encoding: [0x41,0x04,0xc0,0x5a]
-; CHECK: encoding: [0x41,0x04,0xc0,0xda]
-; CHECK: encoding: [0x41,0x08,0xc0,0xda]
-
-;==---------------------------------------------------------------------------==
-; 6.6.1 Multiply-add instructions
-;==---------------------------------------------------------------------------==
-
-  madd   w1, w2, w3, w4
-  madd   x1, x2, x3, x4
-  msub   w1, w2, w3, w4
-  msub   x1, x2, x3, x4
-  smaddl x1, w2, w3, x4
-  smsubl x1, w2, w3, x4
-  umaddl x1, w2, w3, x4
-  umsubl x1, w2, w3, x4
-
-; CHECK: madd   w1, w2, w3, w4       ; encoding: [0x41,0x10,0x03,0x1b]
-; CHECK: madd   x1, x2, x3, x4       ; encoding: [0x41,0x10,0x03,0x9b]
-; CHECK: msub   w1, w2, w3, w4       ; encoding: [0x41,0x90,0x03,0x1b]
-; CHECK: msub   x1, x2, x3, x4       ; encoding: [0x41,0x90,0x03,0x9b]
-; CHECK: smaddl x1, w2, w3, x4       ; encoding: [0x41,0x10,0x23,0x9b]
-; CHECK: smsubl x1, w2, w3, x4       ; encoding: [0x41,0x90,0x23,0x9b]
-; CHECK: umaddl x1, w2, w3, x4       ; encoding: [0x41,0x10,0xa3,0x9b]
-; CHECK: umsubl x1, w2, w3, x4       ; encoding: [0x41,0x90,0xa3,0x9b]
-
-;==---------------------------------------------------------------------------==
-; Multiply-high instructions
-;==---------------------------------------------------------------------------==
-
-  smulh x1, x2, x3
-  umulh x1, x2, x3
-
-; CHECK: smulh x1, x2, x3            ; encoding: [0x41,0x7c,0x43,0x9b]
-; CHECK: umulh x1, x2, x3            ; encoding: [0x41,0x7c,0xc3,0x9b]
-
-;==---------------------------------------------------------------------------==
-; Move immediate instructions
-;==---------------------------------------------------------------------------==
-
-  movz w0, #1
-  movz x0, #1
-  movz w0, #1, lsl #16
-  movz x0, #1, lsl #16
-
-; CHECK: movz w0, #1                 ; encoding: [0x20,0x00,0x80,0x52]
-; CHECK: movz x0, #1                 ; encoding: [0x20,0x00,0x80,0xd2]
-; CHECK: movz w0, #1, lsl #16        ; encoding: [0x20,0x00,0xa0,0x52]
-; CHECK: movz x0, #1, lsl #16        ; encoding: [0x20,0x00,0xa0,0xd2]
-
-  movn w0, #2
-  movn x0, #2
-  movn w0, #2, lsl #16
-  movn x0, #2, lsl #16
-
-; CHECK: movn w0, #2                 ; encoding: [0x40,0x00,0x80,0x12]
-; CHECK: movn x0, #2                 ; encoding: [0x40,0x00,0x80,0x92]
-; CHECK: movn w0, #2, lsl #16        ; encoding: [0x40,0x00,0xa0,0x12]
-; CHECK: movn x0, #2, lsl #16        ; encoding: [0x40,0x00,0xa0,0x92]
-
-  movk w0, #1
-  movk x0, #1
-  movk w0, #1, lsl #16
-  movk x0, #1, lsl #16
-
-; CHECK: movk w0, #1                 ; encoding: [0x20,0x00,0x80,0x72]
-; CHECK: movk x0, #1                 ; encoding: [0x20,0x00,0x80,0xf2]
-; CHECK: movk w0, #1, lsl #16        ; encoding: [0x20,0x00,0xa0,0x72]
-; CHECK: movk x0, #1, lsl #16        ; encoding: [0x20,0x00,0xa0,0xf2]
-
-;==---------------------------------------------------------------------------==
-; Conditionally set flags instructions
-;==---------------------------------------------------------------------------==
-
-  ccmn w1, #2, #3, eq
-  ccmn x1, #2, #3, eq
-  ccmp w1, #2, #3, eq
-  ccmp x1, #2, #3, eq
-
-; CHECK: encoding: [0x23,0x08,0x42,0x3a]
-; CHECK: encoding: [0x23,0x08,0x42,0xba]
-; CHECK: encoding: [0x23,0x08,0x42,0x7a]
-; CHECK: encoding: [0x23,0x08,0x42,0xfa]
-
-  ccmn w1, w2, #3, eq
-  ccmn x1, x2, #3, eq
-  ccmp w1, w2, #3, eq
-  ccmp x1, x2, #3, eq
-
-; CHECK: encoding: [0x23,0x00,0x42,0x3a]
-; CHECK: encoding: [0x23,0x00,0x42,0xba]
-; CHECK: encoding: [0x23,0x00,0x42,0x7a]
-; CHECK: encoding: [0x23,0x00,0x42,0xfa]
-
-;==---------------------------------------------------------------------------==
-; Conditional select instructions
-;==---------------------------------------------------------------------------==
-
-  csel w1, w2, w3, eq
-  csel x1, x2, x3, eq
-  csinc w1, w2, w3, eq
-  csinc x1, x2, x3, eq
-  csinv w1, w2, w3, eq
-  csinv x1, x2, x3, eq
-  csneg w1, w2, w3, eq
-  csneg x1, x2, x3, eq
-
-; CHECK: encoding: [0x41,0x00,0x83,0x1a]
-; CHECK: encoding: [0x41,0x00,0x83,0x9a]
-; CHECK: encoding: [0x41,0x04,0x83,0x1a]
-; CHECK: encoding: [0x41,0x04,0x83,0x9a]
-; CHECK: encoding: [0x41,0x00,0x83,0x5a]
-; CHECK: encoding: [0x41,0x00,0x83,0xda]
-; CHECK: encoding: [0x41,0x04,0x83,0x5a]
-; CHECK: encoding: [0x41,0x04,0x83,0xda]
-
-; Make sure we handle upper case, too. In particular, condition codes.
-  CSEL W16, W7, W27, EQ
-  CSEL W15, W6, W26, NE
-  CSEL W14, W5, W25, CS
-  CSEL W13, W4, W24, HS
-  csel w12, w3, w23, CC
-  csel w11, w2, w22, LO
-  csel w10, w1, w21, MI
-  csel x9, x9, x1, PL
-  csel x8, x8, x2, VS
-  CSEL X7, X7, X3, VC
-  CSEL X6, X7, X4, HI
-  CSEL X5, X6, X5, LS
-  CSEL X4, X5, X6, GE
-  csel x3, x4, x7, LT
-  csel x2, x3, x8, GT
-  csel x1, x2, x9, LE
-  csel x10, x1, x20, AL
-
-; CHECK: csel	w16, w7, w27, eq        ; encoding: [0xf0,0x00,0x9b,0x1a]
-; CHECK: csel	w15, w6, w26, ne        ; encoding: [0xcf,0x10,0x9a,0x1a]
-; CHECK: csel	w14, w5, w25, cs        ; encoding: [0xae,0x20,0x99,0x1a]
-; CHECK: csel	w13, w4, w24, cs        ; encoding: [0x8d,0x20,0x98,0x1a]
-; CHECK: csel	w12, w3, w23, cc        ; encoding: [0x6c,0x30,0x97,0x1a]
-; CHECK: csel	w11, w2, w22, cc        ; encoding: [0x4b,0x30,0x96,0x1a]
-; CHECK: csel	w10, w1, w21, mi        ; encoding: [0x2a,0x40,0x95,0x1a]
-; CHECK: csel	x9, x9, x1, pl          ; encoding: [0x29,0x51,0x81,0x9a]
-; CHECK: csel	x8, x8, x2, vs          ; encoding: [0x08,0x61,0x82,0x9a]
-; CHECK: csel	x7, x7, x3, vc          ; encoding: [0xe7,0x70,0x83,0x9a]
-; CHECK: csel	x6, x7, x4, hi          ; encoding: [0xe6,0x80,0x84,0x9a]
-; CHECK: csel	x5, x6, x5, ls          ; encoding: [0xc5,0x90,0x85,0x9a]
-; CHECK: csel	x4, x5, x6, ge          ; encoding: [0xa4,0xa0,0x86,0x9a]
-; CHECK: csel	x3, x4, x7, lt          ; encoding: [0x83,0xb0,0x87,0x9a]
-; CHECK: csel	x2, x3, x8, gt          ; encoding: [0x62,0xc0,0x88,0x9a]
-; CHECK: csel	x1, x2, x9, le          ; encoding: [0x41,0xd0,0x89,0x9a]
-; CHECK: csel	x10, x1, x20, al        ; encoding: [0x2a,0xe0,0x94,0x9a]
-
-
-;==---------------------------------------------------------------------------==
-; Scalar saturating arithmetic
-;==---------------------------------------------------------------------------==
-  uqxtn b4, h2
-  uqxtn h2, s3
-  uqxtn s9, d2
-
-; CHECK: uqxtn b4, h2                  ; encoding: [0x44,0x48,0x21,0x7e]
-; CHECK: uqxtn h2, s3                  ; encoding: [0x62,0x48,0x61,0x7e]
-; CHECK: uqxtn s9, d2                  ; encoding: [0x49,0x48,0xa1,0x7e]
diff --git a/test/MC/ARM64/arm64-fixup.s b/test/MC/ARM64/arm64-fixup.s
deleted file mode 100644
index eae6f68..0000000
--- a/test/MC/ARM64/arm64-fixup.s
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: llvm-mc < %s -triple arm64-apple-darwin --show-encoding | FileCheck %s
-
-foo:
-  adr x3, Lbar
-; CHECK: adr x3, Lbar            ; encoding: [0x03'A',A,A,0x10'A']
-; CHECK: fixup A - offset: 0, value: Lbar, kind: fixup_arm64_pcrel_adr_imm21
-Lbar:
-  adrp x3, _printf@page
-; CHECK: adrp x3, _printf@PAGE      ; encoding: [0x03'A',A,A,0x90'A']
-; CHECK: fixup A - offset: 0, value: _printf@PAGE, kind: fixup_arm64_pcrel_adrp_imm21
diff --git a/test/MC/ARM64/basic-a64-instructions.s b/test/MC/ARM64/basic-a64-instructions.s
deleted file mode 100644
index 99b438d..0000000
--- a/test/MC/ARM64/basic-a64-instructions.s
+++ /dev/null
@@ -1,18 +0,0 @@
-// RUN: llvm-mc -triple arm64 -show-encoding < %s | FileCheck %s
-
-        crc32b  w5, w7, w20
-        crc32h  w28, wzr, w30
-        crc32w  w0, w1, w2
-        crc32x  w7, w9, x20
-        crc32cb w9, w5, w4
-        crc32ch w13, w17, w25
-        crc32cw wzr, w3, w5
-        crc32cx w18, w16, xzr
-// CHECK: crc32b   w5, w7, w20             // encoding: [0xe5,0x40,0xd4,0x1a]
-// CHECK: crc32h   w28, wzr, w30           // encoding: [0xfc,0x47,0xde,0x1a]
-// CHECK: crc32w   w0, w1, w2              // encoding: [0x20,0x48,0xc2,0x1a]
-// CHECK: crc32x   w7, w9, x20             // encoding: [0x27,0x4d,0xd4,0x9a]
-// CHECK: crc32cb  w9, w5, w4              // encoding: [0xa9,0x50,0xc4,0x1a]
-// CHECK: crc32ch  w13, w17, w25           // encoding: [0x2d,0x56,0xd9,0x1a]
-// CHECK: crc32cw  wzr, w3, w5             // encoding: [0x7f,0x58,0xc5,0x1a]
-// CHECK: crc32cx  w18, w16, xzr           // encoding: [0x12,0x5e,0xdf,0x9a]
diff --git a/test/MC/ARM64/bitfield-encoding.s b/test/MC/ARM64/bitfield-encoding.s
deleted file mode 100644
index cdbac08..0000000
--- a/test/MC/ARM64/bitfield-encoding.s
+++ /dev/null
@@ -1,30 +0,0 @@
-; RUN: llvm-mc -triple arm64-apple-darwin -show-encoding < %s | FileCheck %s
-
-foo:
-;==---------------------------------------------------------------------------==
-; 5.4.4 Bitfield Operations
-;==---------------------------------------------------------------------------==
-
-  bfm  w1, w2, #1, #15
-  bfm  x1, x2, #1, #15
-  sbfm w1, w2, #1, #15
-  sbfm x1, x2, #1, #15
-  ubfm w1, w2, #1, #15
-  ubfm x1, x2, #1, #15
-
-; CHECK: bfm  w1, w2, #1, #15        ; encoding: [0x41,0x3c,0x01,0x33]
-; CHECK: bfm  x1, x2, #1, #15        ; encoding: [0x41,0x3c,0x41,0xb3]
-; CHECK: sbfm w1, w2, #1, #15        ; encoding: [0x41,0x3c,0x01,0x13]
-; CHECK: sbfm x1, x2, #1, #15        ; encoding: [0x41,0x3c,0x41,0x93]
-; CHECK: ubfm w1, w2, #1, #15        ; encoding: [0x41,0x3c,0x01,0x53]
-; CHECK: ubfm x1, x2, #1, #15        ; encoding: [0x41,0x3c,0x41,0xd3]
-
-;==---------------------------------------------------------------------------==
-; 5.4.5 Extract (immediate)
-;==---------------------------------------------------------------------------==
-
-  extr w1, w2, w3, #15
-  extr x2, x3, x4, #1
-
-; CHECK: extr w1, w2, w3, #15        ; encoding: [0x41,0x3c,0x83,0x13]
-; CHECK: extr x2, x3, x4, #1         ; encoding: [0x62,0x04,0xc4,0x93]
diff --git a/test/MC/ARM64/branch-encoding.s b/test/MC/ARM64/branch-encoding.s
deleted file mode 100644
index 7857fea..0000000
--- a/test/MC/ARM64/branch-encoding.s
+++ /dev/null
@@ -1,159 +0,0 @@
-; RUN: llvm-mc -triple arm64-apple-darwin -show-encoding < %s | FileCheck %s
-
-foo:
-
-;-----------------------------------------------------------------------------
-; Unconditional branch (register) instructions.
-;-----------------------------------------------------------------------------
-
-  ret
-; CHECK: encoding: [0xc0,0x03,0x5f,0xd6]
-  ret x1
-; CHECK: encoding: [0x20,0x00,0x5f,0xd6]
-  drps
-; CHECK: encoding: [0xe0,0x03,0xbf,0xd6]
-  eret
-; CHECK: encoding: [0xe0,0x03,0x9f,0xd6]
-  br  x5
-; CHECK: encoding: [0xa0,0x00,0x1f,0xd6]
-  blr x9
-; CHECK: encoding: [0x20,0x01,0x3f,0xd6]
-  bl  L1
-; CHECK: bl L1   ; encoding: [A,A,A,0b100101AA]
-; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_call26
-
-;-----------------------------------------------------------------------------
-; Contitional branch instructions.
-;-----------------------------------------------------------------------------
-
-  b     L1
-; CHECK: b L1      ; encoding: [A,A,A,0b000101AA]
-; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_branch26
-  b.eq  L1
-; CHECK: b.eq L1   ; encoding: [0bAAA00000,A,A,0x54]
-; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_imm19
-  b.ne  L1
-; CHECK: b.ne L1   ; encoding: [0bAAA00001,A,A,0x54]
-; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_imm19
-  b.cs  L1
-; CHECK: b.cs L1   ; encoding: [0bAAA00010,A,A,0x54]
-; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_imm19
-  b.cc  L1
-; CHECK: b.cc L1   ; encoding: [0bAAA00011,A,A,0x54]
-; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_imm19
-  b.mi  L1
-; CHECK: b.mi L1   ; encoding: [0bAAA00100,A,A,0x54]
-; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_imm19
-  b.pl  L1
-; CHECK: b.pl L1   ; encoding: [0bAAA00101,A,A,0x54]
-; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_imm19
-  b.vs  L1
-; CHECK: b.vs L1   ; encoding: [0bAAA00110,A,A,0x54]
-; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_imm19
-  b.vc  L1
-; CHECK: b.vc L1   ; encoding: [0bAAA00111,A,A,0x54]
-; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_imm19
-  b.hi  L1
-; CHECK: b.hi L1   ; encoding: [0bAAA01000,A,A,0x54]
-; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_imm19
-  b.ls  L1
-; CHECK: b.ls L1   ; encoding: [0bAAA01001,A,A,0x54]
-; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_imm19
-  b.ge  L1
-; CHECK: b.ge L1   ; encoding: [0bAAA01010,A,A,0x54]
-; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_imm19
-  b.lt  L1
-; CHECK: b.lt L1   ; encoding: [0bAAA01011,A,A,0x54]
-; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_imm19
-  b.gt  L1
-; CHECK: b.gt L1   ; encoding: [0bAAA01100,A,A,0x54]
-; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_imm19
-  b.le  L1
-; CHECK: b.le L1   ; encoding: [0bAAA01101,A,A,0x54]
-; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_imm19
-  b.al  L1
-; CHECK: b L1      ; encoding: [0bAAA01110,A,A,0x54]
-; CHECK: fixup A - offset: 0, value: L1, kind: fixup_arm64_pcrel_imm19
-L1:
-  b #28
-; CHECK: b #28
-  b.lt #28
-; CHECK: b.lt #28
-  b.cc #1048572
-; CHECK: b.cc	#1048572                ; encoding: [0xe3,0xff,0x7f,0x54]
-  b #134217724
-; CHECK: b	#134217724              ; encoding: [0xff,0xff,0xff,0x15]
-  b #-134217728
-; CHECK: b	#-134217728             ; encoding: [0x00,0x00,0x00,0x16]
-
-;-----------------------------------------------------------------------------
-; Compare-and-branch instructions.
-;-----------------------------------------------------------------------------
-
-  cbz w1, foo
-; CHECK: encoding: [0bAAA00001,A,A,0x34]
-  cbz x1, foo
-; CHECK: encoding: [0bAAA00001,A,A,0xb4]
-  cbnz w2, foo
-; CHECK: encoding: [0bAAA00010,A,A,0x35]
-  cbnz x2, foo
-; CHECK: encoding: [0bAAA00010,A,A,0xb5]
-  cbz w1, #28
-; CHECK: cbz w1, #28
-  cbz     w20, #1048572
-; CHECK: cbz	w20, #1048572           ; encoding: [0xf4,0xff,0x7f,0x34]
-  cbnz x2, #-1048576
-; CHECK: cbnz	x2, #-1048576           ; encoding: [0x02,0x00,0x80,0xb5]
-
-
-;-----------------------------------------------------------------------------
-; Bit-test-and-branch instructions.
-;-----------------------------------------------------------------------------
-
-  tbz x1, #3, foo
-; CHECK: encoding: [0bAAA00001,A,0b00011AAA,0x36]
-  tbnz x1, #63, foo
-; CHECK: encoding: [0bAAA00001,A,0b11111AAA,0xb7]
-
-  tbz w1, #3, foo
-; CHECK: encoding: [0bAAA00001,A,0b00011AAA,0x36]
-  tbnz w1, #31, foo
-; CHECK: encoding: [0bAAA00001,A,0b11111AAA,0x37]
-
-  tbz w1, #3, #28
-; CHECK: tbz w1, #3, #28
-  tbz w3, #5, #32764
-; CHECK: tbz	w3, #5, #32764          ; encoding: [0xe3,0xff,0x2b,0x36]
-  tbnz x3, #8, #-32768
-; CHECK: tbnz	w3, #8, #-32768         ; encoding: [0x03,0x00,0x44,0x37]
-
-;-----------------------------------------------------------------------------
-; Exception generation instructions.
-;-----------------------------------------------------------------------------
-
-  brk   #1
-; CHECK: encoding: [0x20,0x00,0x20,0xd4]
-  dcps1 #2
-; CHECK: encoding: [0x41,0x00,0xa0,0xd4]
-  dcps2 #3
-; CHECK: encoding: [0x62,0x00,0xa0,0xd4]
-  dcps3 #4
-; CHECK: encoding: [0x83,0x00,0xa0,0xd4]
-  hlt   #5
-; CHECK: encoding: [0xa0,0x00,0x40,0xd4]
-  hvc   #6
-; CHECK: encoding: [0xc2,0x00,0x00,0xd4]
-  smc   #7
-; CHECK: encoding: [0xe3,0x00,0x00,0xd4]
-  svc   #8
-; CHECK: encoding: [0x01,0x01,0x00,0xd4]
-
-; The immediate defaults to zero for DCPSn
-  dcps1
-  dcps2
-  dcps3
-
-; CHECK: dcps1                     ; encoding: [0x01,0x00,0xa0,0xd4]
-; CHECK: dcps2                     ; encoding: [0x02,0x00,0xa0,0xd4]
-; CHECK: dcps3                     ; encoding: [0x03,0x00,0xa0,0xd4]
-
diff --git a/test/MC/ARM64/crypto.s b/test/MC/ARM64/crypto.s
deleted file mode 100644
index d7c4ec3..0000000
--- a/test/MC/ARM64/crypto.s
+++ /dev/null
@@ -1,66 +0,0 @@
-; RUN: llvm-mc -triple arm64-apple-darwin -show-encoding -output-asm-variant=1 < %s | FileCheck %s
-
-foo:
-  aese.16b v0, v1
-  aesd.16b v0, v1
-  aesmc.16b v0, v1
-  aesimc.16b v0, v1
-
-  sha1c.4s q0, s1, v2
-  sha1p.4s q0, s1, v2
-  sha1m.4s q0, s1, v2
-  sha1su0.4s v0, v1, v2
-  sha256h.4s q0, q1, v2
-  sha256h2.4s q0, q1, v2
-  sha256su1.4s v0, v1, v2
-  sha1h s0, s1
-  sha1su1.4s v0, v1
-  sha256su0.4s v0, v1
-
-; CHECK: aese.16b v0, v1               ; encoding: [0x20,0x48,0x28,0x4e]
-; CHECK: aesd.16b v0, v1               ; encoding: [0x20,0x58,0x28,0x4e]
-; CHECK: aesmc.16b v0, v1              ; encoding: [0x20,0x68,0x28,0x4e]
-; CHECK: aesimc.16b v0, v1             ; encoding: [0x20,0x78,0x28,0x4e]
-
-; CHECK: sha1c.4s q0, s1, v2           ; encoding: [0x20,0x00,0x02,0x5e]
-; CHECK: sha1p.4s q0, s1, v2           ; encoding: [0x20,0x10,0x02,0x5e]
-; CHECK: sha1m.4s q0, s1, v2           ; encoding: [0x20,0x20,0x02,0x5e]
-; CHECK: sha1su0.4s v0, v1, v2         ; encoding: [0x20,0x30,0x02,0x5e]
-; CHECK: sha256h.4s q0, q1, v2         ; encoding: [0x20,0x40,0x02,0x5e]
-; CHECK: sha256h2.4s q0, q1, v2        ; encoding: [0x20,0x50,0x02,0x5e]
-; CHECK: sha256su1.4s v0, v1, v2       ; encoding: [0x20,0x60,0x02,0x5e]
-; CHECK: sha1h s0, s1                  ; encoding: [0x20,0x08,0x28,0x5e]
-; CHECK: sha1su1.4s v0, v1             ; encoding: [0x20,0x18,0x28,0x5e]
-; CHECK: sha256su0.4s v0, v1           ; encoding: [0x20,0x28,0x28,0x5e]
-
-  aese v2.16b, v3.16b
-  aesd v5.16b, v7.16b
-  aesmc v11.16b, v13.16b
-  aesimc v17.16b, v19.16b
-
-; CHECK: aese.16b v2, v3            ; encoding: [0x62,0x48,0x28,0x4e]
-; CHECK: aesd.16b v5, v7            ; encoding: [0xe5,0x58,0x28,0x4e]
-; CHECK: aesmc.16b v11, v13         ; encoding: [0xab,0x69,0x28,0x4e]
-; CHECK: aesimc.16b v17, v19        ; encoding: [0x71,0x7a,0x28,0x4e]
-
-  sha1c q23, s29, v3.4s
-  sha1p q14, s15, v9.4s
-  sha1m q2, s6, v5.4s
-  sha1su0 v3.4s, v5.4s, v9.4s
-  sha256h q2, q7, v18.4s
-  sha256h2 q28, q18, v28.4s
-  sha256su1 v4.4s, v5.4s, v9.4s
-  sha1h s30, s0
-  sha1su1 v10.4s, v21.4s
-  sha256su0 v2.4s, v31.4s
-
-; CHECK: sha1c.4s q23, s29, v3       ; encoding: [0xb7,0x03,0x03,0x5e]
-; CHECK: sha1p.4s q14, s15, v9       ; encoding: [0xee,0x11,0x09,0x5e]
-; CHECK: sha1m.4s q2, s6, v5         ; encoding: [0xc2,0x20,0x05,0x5e]
-; CHECK: sha1su0.4s v3, v5, v9       ; encoding: [0xa3,0x30,0x09,0x5e]
-; CHECK: sha256h.4s q2, q7, v18      ; encoding: [0xe2,0x40,0x12,0x5e]
-; CHECK: sha256h2.4s q28, q18, v28   ; encoding: [0x5c,0x52,0x1c,0x5e]
-; CHECK: sha256su1.4s v4, v5, v9     ; encoding: [0xa4,0x60,0x09,0x5e]
-; CHECK: sha1h s30, s0               ; encoding: [0x1e,0x08,0x28,0x5e]
-; CHECK: sha1su1.4s v10, v21         ; encoding: [0xaa,0x1a,0x28,0x5e]
-; CHECK: sha256su0.4s v2, v31        ; encoding: [0xe2,0x2b,0x28,0x5e]
diff --git a/test/MC/ARM64/diags.s b/test/MC/ARM64/diags.s
deleted file mode 100644
index d857fe1..0000000
--- a/test/MC/ARM64/diags.s
+++ /dev/null
@@ -1,242 +0,0 @@
-; RUN: not llvm-mc -triple arm64-apple-darwin -show-encoding < %s 2> %t | FileCheck %s
-; RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
-
-foo:
-
-; The first should encode as an expression. The second should error expecting
-; a register.
-  ldr x3, (foo + 4)
-  ldr x3, [foo + 4]
-; CHECK:  ldr x3, foo+4               ; encoding: [0bAAA00011,A,A,0x58]
-; CHECK:                              ;   fixup A - offset: 0, value: foo+4, kind: fixup_arm64_pcrel_imm19
-; CHECK-ERRORS: error: register expected
-
-; The last argument should be flagged as an error.  rdar://9576009
-  ld4.8b	{v0, v1, v2, v3}, [x0], #33
-; CHECK-ERRORS: error: invalid operand for instruction
-; CHECK-ERRORS: ld4.8b	{v0, v1, v2, v3}, [x0], #33
-
-
-        ldr x0, [x0, #804]
-        ldr w0, [x0, #802]
-        ldr x0, [x0, #804]!
-        ldr w0, [w0, #301]!
-        ldr x0, [x0], #804
-        ldr w0, [w0], #301
-
-        ldp w3, w4, [x5, #11]!
-        ldp x3, x4, [x5, #12]!
-        ldp q3, q4, [x5, #12]!
-        ldp w3, w4, [x5], #11
-        ldp x3, x4, [x5], #12
-        ldp q3, q4, [x5], #12
-
-        ldur x0, [x1, #-257]
-
-; CHECK-ERRORS: error: index must be a multiple of 8 in range [0,32760].
-; CHECK-ERRORS:         ldr x0, [x0, #804]
-; CHECK-ERRORS:                 ^
-; CHECK-ERRORS: error: index must be a multiple of 4 in range [0,16380].
-; CHECK-ERRORS:         ldr w0, [x0, #802]
-; CHECK-ERRORS:                 ^
-; CHECK-ERRORS: error: index must be an integer in range [-256,255].
-; CHECK-ERRORS:         ldr x0, [x0, #804]!
-; CHECK-ERRORS:                 ^
-; CHECK-ERRORS: error: index must be an integer in range [-256,255].
-; CHECK-ERRORS:         ldr w0, [w0, #301]!
-; CHECK-ERRORS:                 ^
-; CHECK-ERRORS: error: index must be an integer in range [-256,255].
-; CHECK-ERRORS:         ldr x0, [x0], #804
-; CHECK-ERRORS:                       ^
-; CHECK-ERRORS: error: index must be an integer in range [-256,255].
-; CHECK-ERRORS:         ldr w0, [w0], #301
-; CHECK-ERRORS:                       ^
-; CHECK-ERRORS: error: index must be a multiple of 4 in range [-256,252].
-; CHECK-ERRORS:         ldp w3, w4, [x5, #11]!
-; CHECK-ERRORS:                     ^
-; CHECK-ERRORS: error: index must be a multiple of 8 in range [-512,504].
-; CHECK-ERRORS:         ldp x3, x4, [x5, #12]!
-; CHECK-ERRORS:                     ^
-; CHECK-ERRORS: error: index must be a multiple of 16 in range [-1024,1008].
-; CHECK-ERRORS:         ldp q3, q4, [x5, #12]!
-; CHECK-ERRORS:                     ^
-; CHECK-ERRORS: error: index must be a multiple of 4 in range [-256,252].
-; CHECK-ERRORS:         ldp w3, w4, [x5], #11
-; CHECK-ERRORS:                           ^
-; CHECK-ERRORS: error: index must be a multiple of 8 in range [-512,504].
-; CHECK-ERRORS:         ldp x3, x4, [x5], #12
-; CHECK-ERRORS:                           ^
-; CHECK-ERRORS: error: index must be a multiple of 8 in range [-512,504].
-; CHECK-ERRORS:         ldp q3, q4, [x5], #12
-; CHECK-ERRORS:                           ^
-; CHECK-ERRORS: error: index must be an integer in range [-256,255].
-; CHECK-ERRORS:         ldur x0, [x1, #-257]
-; CHECK-ERRORS:                   ^
-
-
-
-; Shift immediates range checking.
-  sqrshrn b4, h9, #10
-  rshrn v9.8b, v11.8h, #17
-  sqrshrn v7.4h, v8.4s, #39
-  uqshrn2 v4.4s, v5.2d, #67
-
-; CHECK-ERRORS: error: immediate must be an integer in range [1,8].
-; CHECK-ERRORS:   sqrshrn b4, h9, #10
-; CHECK-ERRORS:                   ^
-; CHECK-ERRORS: error: immediate must be an integer in range [1,8].
-; CHECK-ERRORS:   rshrn v9.8b, v11.8h, #17
-; CHECK-ERRORS:                        ^
-; CHECK-ERRORS: error: immediate must be an integer in range [1,16].
-; CHECK-ERRORS:   sqrshrn v7.4h, v8.4s, #39
-; CHECK-ERRORS:                         ^
-; CHECK-ERRORS: error: immediate must be an integer in range [1,32].
-; CHECK-ERRORS:   uqshrn2 v4.4s, v5.2d, #67
-; CHECK-ERRORS:                         ^
-
-
-  st1.s4 {v14, v15}, [x2], #32
-; CHECK-ERRORS: error: invalid type suffix for instruction
-; CHECK-ERRORS: st1.s4 {v14, v15}, [x2], #32
-; CHECK-ERRORS:     ^
-
-
-
-; Load pair instructions where Rt==Rt2 and writeback load/store instructions
-; where Rt==Rn or Rt2==Rn are unpredicatable.
-  ldp x1, x2, [x2], #16
-  ldp x2, x2, [x2], #16
-  ldp w1, w2, [x2], #16
-  ldp w2, w2, [x2], #16
-  ldp x1, x1, [x2]
-
-  ldr x2, [x2], #8
-  ldr x2, [x2, #8]!
-  ldr w2, [x2], #8
-  ldr w2, [x2, #8]!
-
-  str x2, [x2], #8
-  str x2, [x2, #8]!
-  str w2, [x2], #8
-  str w2, [x2, #8]!
-
-; CHECK-ERRORS: error: unpredictable LDP instruction, writeback base is also a destination
-; CHECK-ERRORS:   ldp x1, x2, [x2], #16
-; CHECK-ERRORS:           ^
-; CHECK-ERRORS: error: unpredictable LDP instruction, writeback base is also a destination
-; CHECK-ERRORS:   ldp x2, x2, [x2], #16
-; CHECK-ERRORS:       ^
-; CHECK-ERRORS: error: unpredictable LDP instruction, writeback base is also a destination
-; CHECK-ERRORS:   ldp w1, w2, [x2], #16
-; CHECK-ERRORS:           ^
-; CHECK-ERRORS: error: unpredictable LDP instruction, writeback base is also a destination
-; CHECK-ERRORS:   ldp w2, w2, [x2], #16
-; CHECK-ERRORS:       ^
-; CHECK-ERRORS: error: unpredictable LDP instruction, Rt2==Rt
-; CHECK-ERRORS:   ldp x1, x1, [x2]
-; CHECK-ERRORS:           ^
-; CHECK-ERRORS: error: unpredictable LDR instruction, writeback base is also a source
-; CHECK-ERRORS:   ldr x2, [x2], #8
-; CHECK-ERRORS:       ^
-; CHECK-ERRORS: error: unpredictable LDR instruction, writeback base is also a source
-; CHECK-ERRORS:   ldr x2, [x2, #8]!
-; CHECK-ERRORS:       ^
-; CHECK-ERRORS: error: unpredictable LDR instruction, writeback base is also a source
-; CHECK-ERRORS:   ldr w2, [x2], #8
-; CHECK-ERRORS:       ^
-; CHECK-ERRORS: error: unpredictable LDR instruction, writeback base is also a source
-; CHECK-ERRORS:   ldr w2, [x2, #8]!
-; CHECK-ERRORS:       ^
-; CHECK-ERRORS: error: unpredictable STR instruction, writeback base is also a source
-; CHECK-ERRORS:   str x2, [x2], #8
-; CHECK-ERRORS:       ^
-; CHECK-ERRORS: error: unpredictable STR instruction, writeback base is also a source
-; CHECK-ERRORS:   str x2, [x2, #8]!
-; CHECK-ERRORS:       ^
-; CHECK-ERRORS: error: unpredictable STR instruction, writeback base is also a source
-; CHECK-ERRORS:   str w2, [x2], #8
-; CHECK-ERRORS:       ^
-; CHECK-ERRORS: error: unpredictable STR instruction, writeback base is also a source
-; CHECK-ERRORS:   str w2, [x2, #8]!
-; CHECK-ERRORS:       ^
-
-; The validity checking for shifted-immediate operands.  rdar://13174476
-; Where the immediate is out of range.
-  add w1, w2, w3, lsr #75
-
-; CHECK-ERRORS: error: immediate value too large for shifter operand
-; CHECK-ERRORS: add w1, w2, w3, lsr #75
-; CHECK-ERRORS:                      ^
-
-; logical instructions on 32-bit regs with shift > 31 is not legal
-orr w0, w0, w0, lsl #32
-; CHECK-ERRORS: error: shift value out of range
-; CHECK-ERRORS:        orr w0, w0, w0, lsl #32
-; CHECK-ERRORS:                        ^
-eor w0, w0, w0, lsl #32
-; CHECK-ERRORS: error: shift value out of range
-; CHECK-ERRORS:        eor w0, w0, w0, lsl #32
-; CHECK-ERRORS:                        ^
-and w0, w0, w0, lsl #32
-; CHECK-ERRORS: error: shift value out of range
-; CHECK-ERRORS:        and w0, w0, w0, lsl #32
-; CHECK-ERRORS:                        ^
-ands w0, w0, w0, lsl #32
-; CHECK-ERRORS: error: shift value out of range
-; CHECK-ERRORS:        ands w0, w0, w0, lsl #32
-; CHECK-ERRORS:                        ^
-
-; Relocated expressions should not be accepted for 32-bit adds or sub (imm)
-add w3, w5, sym@PAGEOFF
-; CHECK-ERRORS: error: invalid immediate expression
-; CHECK-ERRORS: add w3, w5, sym@PAGEOFF
-; CHECK-ERRORS:             ^
-
-adds w3, w5, sym@PAGEOFF
-adds x9, x12, sym@PAGEOFF
-; CHECK-ERRORS: error: invalid immediate expression
-; CHECK-ERRORS: adds w3, w5, sym@PAGEOFF
-; CHECK-ERRORS:              ^
-; CHECK-ERRORS: error: invalid immediate expression
-; CHECK-ERRORS: adds x9, x12, sym@PAGEOFF
-; CHECK-ERRORS:               ^
-
-sub x3, x5, sym@PAGEOFF
-sub w20, w30, sym@PAGEOFF
-; CHECK-ERRORS: error: invalid immediate expression
-; CHECK-ERRORS: sub x3, x5, sym@PAGEOFF
-; CHECK-ERRORS:             ^
-; CHECK-ERRORS: error: invalid immediate expression
-; CHECK-ERRORS: sub w20, w30, sym@PAGEOFF
-; CHECK-ERRORS:               ^
-
-subs w9, w10, sym@PAGEOFF
-subs x20, x30, sym@PAGEOFF
-; CHECK-ERRORS: error: invalid immediate expression
-; CHECK-ERRORS: subs w9, w10, sym@PAGEOFF
-; CHECK-ERRORS:               ^
-; CHECK-ERRORS: error: invalid immediate expression
-; CHECK-ERRORS: subs x20, x30, sym@PAGEOFF
-; CHECK-ERRORS:                ^
-
-tbl v0.8b, { v1 }, v0.8b
-tbl v0.16b, { v1.8b, v2.8b, v3.8b }, v0.16b
-tbx v3.16b, { v12.8b, v13.8b, v14.8b }, v6.8b
-tbx v2.8b, { v0 }, v6.8b
-; CHECK-ERRORS: error: invalid operand for instruction
-; CHECK-ERRORS: tbl v0.8b, { v1 }, v0.8b
-; CHECK-ERRORS:            ^
-; CHECK-ERRORS: error: invalid operand for instruction
-; CHECK-ERRORS: tbl v0.16b, { v1.8b, v2.8b, v3.8b }, v0.16b
-; CHECK-ERRORS:             ^
-; CHECK-ERRORS: error: invalid operand for instruction
-; CHECK-ERRORS: tbx v3.16b, { v12.8b, v13.8b, v14.8b }, v6.8b
-; CHECK-ERRORS:             ^
-; CHECK-ERRORS: error: invalid operand for instruction
-; CHECK-ERRORS: tbx v2.8b, { v0 }, v6.8b
-; CHECK-ERRORS:            ^
-
-b.c #0x4
-; CHECK-ERRORS: error: invalid condition code
-; CHECK-ERRORS: b.c #0x4
-; CHECK-ERRORS:   ^
diff --git a/test/MC/ARM64/elf-relocs.s b/test/MC/ARM64/elf-relocs.s
deleted file mode 100644
index 31446ff..0000000
--- a/test/MC/ARM64/elf-relocs.s
+++ /dev/null
@@ -1,249 +0,0 @@
-// RUN: llvm-mc -triple=arm64-linux-gnu -o - < %s | FileCheck %s
-// RUN: llvm-mc -triple=arm64-linux-gnu -filetype=obj < %s | llvm-objdump -triple=arm64-linux-gnu - -r | FileCheck %s --check-prefix=CHECK-OBJ
-
-   add x0, x2, #:lo12:sym
-// CHECK: add x0, x2, :lo12:sym
-// CHECK-OBJ: 0 R_AARCH64_ADD_ABS_LO12_NC sym
-
-   add x5, x7, #:dtprel_lo12:sym
-// CHECK: add x5, x7, :dtprel_lo12:sym
-// CHECK-OBJ: 4 R_AARCH64_TLSLD_ADD_DTPREL_LO12 sym
-
-   add x9, x12, #:dtprel_lo12_nc:sym
-// CHECK: add x9, x12, :dtprel_lo12_nc:sym
-// CHECK-OBJ: 8 R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC sym
-
-   add x20, x30, #:tprel_lo12:sym
-// CHECK: add x20, lr, :tprel_lo12:sym
-// CHECK-OBJ: c R_AARCH64_TLSLE_ADD_TPREL_LO12 sym
-
-   add x9, x12, #:tprel_lo12_nc:sym
-// CHECK: add x9, x12, :tprel_lo12_nc:sym
-// CHECK-OBJ: 10 R_AARCH64_TLSLE_ADD_TPREL_LO12_NC sym
-
-   add x5, x0, #:tlsdesc_lo12:sym
-// CHECK: add x5, x0, :tlsdesc_lo12:sym
-// CHECK-OBJ: 14 R_AARCH64_TLSDESC_ADD_LO12_NC sym
-
-        add x0, x2, #:lo12:sym+8
-// CHECK: add x0, x2, :lo12:sym
-// CHECK-OBJ: 18 R_AARCH64_ADD_ABS_LO12_NC sym+8
-
-   add x5, x7, #:dtprel_lo12:sym+1
-// CHECK: add x5, x7, :dtprel_lo12:sym+1
-// CHECK-OBJ: 1c R_AARCH64_TLSLD_ADD_DTPREL_LO12 sym+1
-
-   add x9, x12, #:dtprel_lo12_nc:sym+2
-// CHECK: add x9, x12, :dtprel_lo12_nc:sym+2
-// CHECK-OBJ:20 R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC sym+2
-
-   add x20, x30, #:tprel_lo12:sym+12
-// CHECK: add x20, lr, :tprel_lo12:sym+12
-// CHECK-OBJ: 24 R_AARCH64_TLSLE_ADD_TPREL_LO12 sym+12
-
-   add x9, x12, #:tprel_lo12_nc:sym+54
-// CHECK: add x9, x12, :tprel_lo12_nc:sym+54
-// CHECK-OBJ: 28 R_AARCH64_TLSLE_ADD_TPREL_LO12_NC sym+54
-
-   add x5, x0, #:tlsdesc_lo12:sym+70
-// CHECK: add x5, x0, :tlsdesc_lo12:sym+70
-// CHECK-OBJ: 2c R_AARCH64_TLSDESC_ADD_LO12_NC sym+70
-
-        .hword sym + 4 - .
-// CHECK-OBJ: 30 R_AARCH64_PREL16 sym+4
-        .word sym - . + 8
-// CHECK-OBJ: 32 R_AARCH64_PREL32 sym+8
-        .xword sym-.
-// CHECK-OBJ: 36 R_AARCH64_PREL64 sym{{$}}
-
-        .hword sym
-// CHECK-OBJ: 3e R_AARCH64_ABS16 sym
-        .word sym+1
-// CHECK-OBJ: 40 R_AARCH64_ABS32 sym+1
-        .xword sym+16
-// CHECK-OBJ: 44 R_AARCH64_ABS64 sym+16
-
-   adrp x0, sym
-// CHECK: adrp x0, sym
-// CHECK-OBJ: 4c R_AARCH64_ADR_PREL_PG_HI21 sym
-
-   adrp x15, :got:sym
-// CHECK: adrp x15, :got:sym
-// CHECK-OBJ: 50 R_AARCH64_ADR_GOT_PAGE sym
-
-   adrp x29, :gottprel:sym
-// CHECK: adrp fp, :gottprel:sym
-// CHECK-OBJ: 54 R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 sym
-
-   adrp x2, :tlsdesc:sym
-// CHECK: adrp x2, :tlsdesc:sym
-// CHECK-OBJ: 58 R_AARCH64_TLSDESC_ADR_PAGE sym
-
-   // LLVM is not competent enough to do this relocation because the
-   // page boundary could occur anywhere after linking. A relocation
-   // is needed.
-   adrp x3, trickQuestion
-   .global trickQuestion
-trickQuestion:
-// CHECK: adrp x3, trickQuestion
-// CHECK-OBJ: 5c R_AARCH64_ADR_PREL_PG_HI21 trickQuestion
-
-   ldrb w2, [x3, #:lo12:sym]
-   ldrsb w5, [x7, #:lo12:sym]
-   ldrsb x11, [x13, #:lo12:sym]
-   ldr b17, [x19, #:lo12:sym]
-// CHECK: ldrb w2, [x3, :lo12:sym]
-// CHECK: ldrsb w5, [x7, :lo12:sym]
-// CHECK: ldrsb x11, [x13, :lo12:sym]
-// CHECK: ldr b17, [x19, :lo12:sym]
-// CHECK-OBJ: R_AARCH64_LDST8_ABS_LO12_NC sym
-// CHECK-OBJ: R_AARCH64_LDST8_ABS_LO12_NC sym
-// CHECK-OBJ: R_AARCH64_LDST8_ABS_LO12_NC sym
-// CHECK-OBJ: R_AARCH64_LDST8_ABS_LO12_NC sym
-
-   ldrb w23, [x29, #:dtprel_lo12_nc:sym]
-   ldrsb w23, [x19, #:dtprel_lo12:sym]
-   ldrsb x17, [x13, #:dtprel_lo12_nc:sym]
-   ldr b11, [x7, #:dtprel_lo12:sym]
-// CHECK: ldrb w23, [fp, :dtprel_lo12_nc:sym]
-// CHECK: ldrsb w23, [x19, :dtprel_lo12:sym]
-// CHECK: ldrsb x17, [x13, :dtprel_lo12_nc:sym]
-// CHECK: ldr b11, [x7, :dtprel_lo12:sym]
-// CHECK-OBJ: R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC sym
-// CHECK-OBJ: R_AARCH64_TLSLD_LDST8_DTPREL_LO12 sym
-// CHECK-OBJ: R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC sym
-// CHECK-OBJ: R_AARCH64_TLSLD_LDST8_DTPREL_LO12 sym
-
-   ldrb w1, [x2, #:tprel_lo12:sym]
-   ldrsb w3, [x4, #:tprel_lo12_nc:sym]
-   ldrsb x5, [x6, #:tprel_lo12:sym]
-   ldr b7, [x8, #:tprel_lo12_nc:sym]
-// CHECK: ldrb w1, [x2, :tprel_lo12:sym]
-// CHECK: ldrsb w3, [x4, :tprel_lo12_nc:sym]
-// CHECK: ldrsb x5, [x6, :tprel_lo12:sym]
-// CHECK: ldr b7, [x8, :tprel_lo12_nc:sym]
-// CHECK-OBJ: R_AARCH64_TLSLE_LDST8_TPREL_LO12 sym
-// CHECK-OBJ: R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC sym
-// CHECK-OBJ: R_AARCH64_TLSLE_LDST8_TPREL_LO12 sym
-// CHECK-OBJ: R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC sym
-
-   ldrh w2, [x3, #:lo12:sym]
-   ldrsh w5, [x7, #:lo12:sym]
-   ldrsh x11, [x13, #:lo12:sym]
-   ldr h17, [x19, #:lo12:sym]
-// CHECK: ldrh w2, [x3, :lo12:sym]
-// CHECK: ldrsh w5, [x7, :lo12:sym]
-// CHECK: ldrsh x11, [x13, :lo12:sym]
-// CHECK: ldr h17, [x19, :lo12:sym]
-// CHECK-OBJ: R_AARCH64_LDST16_ABS_LO12_NC sym
-// CHECK-OBJ: R_AARCH64_LDST16_ABS_LO12_NC sym
-// CHECK-OBJ: R_AARCH64_LDST16_ABS_LO12_NC sym
-// CHECK-OBJ: R_AARCH64_LDST16_ABS_LO12_NC sym
-
-   ldrh w23, [x29, #:dtprel_lo12_nc:sym]
-   ldrsh w23, [x19, #:dtprel_lo12:sym]
-   ldrsh x17, [x13, #:dtprel_lo12_nc:sym]
-   ldr h11, [x7, #:dtprel_lo12:sym]
-// CHECK: ldrh w23, [fp, :dtprel_lo12_nc:sym]
-// CHECK: ldrsh w23, [x19, :dtprel_lo12:sym]
-// CHECK: ldrsh x17, [x13, :dtprel_lo12_nc:sym]
-// CHECK: ldr h11, [x7, :dtprel_lo12:sym]
-// CHECK-OBJ: R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC sym
-// CHECK-OBJ: R_AARCH64_TLSLD_LDST16_DTPREL_LO12 sym
-// CHECK-OBJ: R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC sym
-// CHECK-OBJ: R_AARCH64_TLSLD_LDST16_DTPREL_LO12 sym
-
-   ldrh w1, [x2, #:tprel_lo12:sym]
-   ldrsh w3, [x4, #:tprel_lo12_nc:sym]
-   ldrsh x5, [x6, #:tprel_lo12:sym]
-   ldr h7, [x8, #:tprel_lo12_nc:sym]
-// CHECK: ldrh w1, [x2, :tprel_lo12:sym]
-// CHECK: ldrsh w3, [x4, :tprel_lo12_nc:sym]
-// CHECK: ldrsh x5, [x6, :tprel_lo12:sym]
-// CHECK: ldr h7, [x8, :tprel_lo12_nc:sym]
-// CHECK-OBJ: R_AARCH64_TLSLE_LDST16_TPREL_LO12 sym
-// CHECK-OBJ: R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC sym
-// CHECK-OBJ: R_AARCH64_TLSLE_LDST16_TPREL_LO12 sym
-// CHECK-OBJ: R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC sym
-
-   ldr w1, [x2, #:lo12:sym]
-   ldrsw x3, [x4, #:lo12:sym]
-   ldr s4, [x5, #:lo12:sym]
-// CHECK: ldr w1, [x2, :lo12:sym]
-// CHECK: ldrsw x3, [x4, :lo12:sym]
-// CHECK: ldr s4, [x5, :lo12:sym]
-// CHECK-OBJ: R_AARCH64_LDST32_ABS_LO12_NC sym
-// CHECK-OBJ: R_AARCH64_LDST32_ABS_LO12_NC sym
-// CHECK-OBJ: R_AARCH64_LDST32_ABS_LO12_NC sym
-
-   ldr w1, [x2, #:dtprel_lo12:sym]
-   ldrsw x3, [x4, #:dtprel_lo12_nc:sym]
-   ldr s4, [x5, #:dtprel_lo12_nc:sym]
-// CHECK: ldr w1, [x2, :dtprel_lo12:sym]
-// CHECK: ldrsw x3, [x4, :dtprel_lo12_nc:sym]
-// CHECK: ldr s4, [x5, :dtprel_lo12_nc:sym]
-// CHECK-OBJ: R_AARCH64_TLSLD_LDST32_DTPREL_LO12 sym
-// CHECK-OBJ: R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC sym
-// CHECK-OBJ: R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC sym
-
-
-   ldr w1, [x2, #:tprel_lo12:sym]
-   ldrsw x3, [x4, #:tprel_lo12_nc:sym]
-   ldr s4, [x5, #:tprel_lo12_nc:sym]
-// CHECK: ldr w1, [x2, :tprel_lo12:sym]
-// CHECK: ldrsw x3, [x4, :tprel_lo12_nc:sym]
-// CHECK: ldr s4, [x5, :tprel_lo12_nc:sym]
-// CHECK-OBJ: R_AARCH64_TLSLE_LDST32_TPREL_LO12 sym
-// CHECK-OBJ: R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC sym
-// CHECK-OBJ: R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC sym
-
-   ldr x28, [x27, #:lo12:sym]
-   ldr d26, [x25, #:lo12:sym]
-// CHECK: ldr x28, [x27, :lo12:sym]
-// CHECK: ldr d26, [x25, :lo12:sym]
-// CHECK-OBJ: R_AARCH64_LDST64_ABS_LO12_NC sym
-// CHECK-OBJ: R_AARCH64_LDST64_ABS_LO12_NC sym
-
-   ldr x24, [x23, #:got_lo12:sym]
-   ldr d22, [x21, #:got_lo12:sym]
-// CHECK: ldr x24, [x23, :got_lo12:sym]
-// CHECK: ldr d22, [x21, :got_lo12:sym]
-// CHECK-OBJ: R_AARCH64_LD64_GOT_LO12_NC sym
-// CHECK-OBJ: R_AARCH64_LD64_GOT_LO12_NC sym
-
-   ldr x24, [x23, #:dtprel_lo12_nc:sym]
-   ldr d22, [x21, #:dtprel_lo12:sym]
-// CHECK: ldr x24, [x23, :dtprel_lo12_nc:sym]
-// CHECK: ldr d22, [x21, :dtprel_lo12:sym]
-// CHECK-OBJ: R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC sym
-// CHECK-OBJ: R_AARCH64_TLSLD_LDST64_DTPREL_LO12 sym
-
-   ldr x24, [x23, #:tprel_lo12:sym]
-   ldr d22, [x21, #:tprel_lo12_nc:sym]
-// CHECK: ldr x24, [x23, :tprel_lo12:sym]
-// CHECK: ldr d22, [x21, :tprel_lo12_nc:sym]
-// CHECK-OBJ: R_AARCH64_TLSLE_LDST64_TPREL_LO12 sym
-// CHECK-OBJ: R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC sym
-
-   ldr x24, [x23, #:gottprel_lo12:sym]
-   ldr d22, [x21, #:gottprel_lo12:sym]
-// CHECK: ldr x24, [x23, :gottprel_lo12:sym]
-// CHECK: ldr d22, [x21, :gottprel_lo12:sym]
-// CHECK-OBJ: R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC sym
-// CHECK-OBJ: R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC sym
-
-   ldr x24, [x23, #:tlsdesc_lo12:sym]
-   ldr d22, [x21, #:tlsdesc_lo12:sym]
-// CHECK: ldr x24, [x23, :tlsdesc_lo12:sym]
-// CHECK: ldr d22, [x21, :tlsdesc_lo12:sym]
-// CHECK-OBJ: R_AARCH64_TLSDESC_LD64_LO12_NC sym
-// CHECK-OBJ: R_AARCH64_TLSDESC_LD64_LO12_NC sym
-
-   ldr q20, [x19, #:lo12:sym]
-// CHECK: ldr q20, [x19, :lo12:sym]
-// CHECK-OBJ: R_AARCH64_LDST128_ABS_LO12_NC sym
-
-// Since relocated instructions print without a '#', that syntax should
-// certainly be accepted when assembling.
-   add x3, x5, :lo12:imm
-// CHECK: add x3, x5, :lo12:imm
diff --git a/test/MC/ARM64/fp-encoding.s b/test/MC/ARM64/fp-encoding.s
deleted file mode 100644
index 25474c1..0000000
--- a/test/MC/ARM64/fp-encoding.s
+++ /dev/null
@@ -1,507 +0,0 @@
-; RUN: llvm-mc -triple arm64-apple-darwin -show-encoding < %s | FileCheck %s
-
-foo:
-;-----------------------------------------------------------------------------
-; Floating-point arithmetic
-;-----------------------------------------------------------------------------
-
-  fabs s1, s2
-  fabs d1, d2
-
-; CHECK: fabs s1, s2                 ; encoding: [0x41,0xc0,0x20,0x1e]
-; CHECK: fabs d1, d2                 ; encoding: [0x41,0xc0,0x60,0x1e]
-
-  fadd s1, s2, s3
-  fadd d1, d2, d3
-
-; CHECK: fadd s1, s2, s3             ; encoding: [0x41,0x28,0x23,0x1e]
-; CHECK: fadd d1, d2, d3             ; encoding: [0x41,0x28,0x63,0x1e]
-
-  fdiv s1, s2, s3
-  fdiv d1, d2, d3
-
-; CHECK: fdiv s1, s2, s3             ; encoding: [0x41,0x18,0x23,0x1e]
-; CHECK: fdiv d1, d2, d3             ; encoding: [0x41,0x18,0x63,0x1e]
-
-  fmadd s1, s2, s3, s4
-  fmadd d1, d2, d3, d4
-
-; CHECK: fmadd s1, s2, s3, s4        ; encoding: [0x41,0x10,0x03,0x1f]
-; CHECK: fmadd d1, d2, d3, d4        ; encoding: [0x41,0x10,0x43,0x1f]
-
-  fmax   s1, s2, s3
-  fmax   d1, d2, d3
-  fmaxnm s1, s2, s3
-  fmaxnm d1, d2, d3
-
-; CHECK: fmax   s1, s2, s3           ; encoding: [0x41,0x48,0x23,0x1e]
-; CHECK: fmax   d1, d2, d3           ; encoding: [0x41,0x48,0x63,0x1e]
-; CHECK: fmaxnm s1, s2, s3           ; encoding: [0x41,0x68,0x23,0x1e]
-; CHECK: fmaxnm d1, d2, d3           ; encoding: [0x41,0x68,0x63,0x1e]
-
-  fmin   s1, s2, s3
-  fmin   d1, d2, d3
-  fminnm s1, s2, s3
-  fminnm d1, d2, d3
-
-; CHECK: fmin   s1, s2, s3           ; encoding: [0x41,0x58,0x23,0x1e]
-; CHECK: fmin   d1, d2, d3           ; encoding: [0x41,0x58,0x63,0x1e]
-; CHECK: fminnm s1, s2, s3           ; encoding: [0x41,0x78,0x23,0x1e]
-; CHECK: fminnm d1, d2, d3           ; encoding: [0x41,0x78,0x63,0x1e]
-
-  fmsub s1, s2, s3, s4
-  fmsub d1, d2, d3, d4
-
-; CHECK: fmsub s1, s2, s3, s4        ; encoding: [0x41,0x90,0x03,0x1f]
-; CHECK: fmsub d1, d2, d3, d4        ; encoding: [0x41,0x90,0x43,0x1f]
-
-  fmul s1, s2, s3
-  fmul d1, d2, d3
-
-; CHECK: fmul s1, s2, s3             ; encoding: [0x41,0x08,0x23,0x1e]
-; CHECK: fmul d1, d2, d3             ; encoding: [0x41,0x08,0x63,0x1e]
-
-  fneg s1, s2
-  fneg d1, d2
-
-; CHECK: fneg s1, s2                 ; encoding: [0x41,0x40,0x21,0x1e]
-; CHECK: fneg d1, d2                 ; encoding: [0x41,0x40,0x61,0x1e]
-
-  fnmadd s1, s2, s3, s4
-  fnmadd d1, d2, d3, d4
-
-; CHECK: fnmadd s1, s2, s3, s4       ; encoding: [0x41,0x10,0x23,0x1f]
-; CHECK: fnmadd d1, d2, d3, d4       ; encoding: [0x41,0x10,0x63,0x1f]
-
-  fnmsub s1, s2, s3, s4
-  fnmsub d1, d2, d3, d4
-
-; CHECK: fnmsub s1, s2, s3, s4       ; encoding: [0x41,0x90,0x23,0x1f]
-; CHECK: fnmsub d1, d2, d3, d4       ; encoding: [0x41,0x90,0x63,0x1f]
-
-  fnmul s1, s2, s3
-  fnmul d1, d2, d3
-
-; CHECK: fnmul s1, s2, s3            ; encoding: [0x41,0x88,0x23,0x1e]
-; CHECK: fnmul d1, d2, d3            ; encoding: [0x41,0x88,0x63,0x1e]
-
-  fsqrt s1, s2
-  fsqrt d1, d2
-
-; CHECK: fsqrt s1, s2                ; encoding: [0x41,0xc0,0x21,0x1e]
-; CHECK: fsqrt d1, d2                ; encoding: [0x41,0xc0,0x61,0x1e]
-
-  fsub s1, s2, s3
-  fsub d1, d2, d3
-
-; CHECK: fsub s1, s2, s3             ; encoding: [0x41,0x38,0x23,0x1e]
-; CHECK: fsub d1, d2, d3             ; encoding: [0x41,0x38,0x63,0x1e]
-
-;-----------------------------------------------------------------------------
-; Floating-point comparison
-;-----------------------------------------------------------------------------
-
-  fccmp  s1, s2, #0, eq
-  fccmp  d1, d2, #0, eq
-  fccmpe s1, s2, #0, eq
-  fccmpe d1, d2, #0, eq
-
-; CHECK: fccmp  s1, s2, #0, eq       ; encoding: [0x20,0x04,0x22,0x1e]
-; CHECK: fccmp  d1, d2, #0, eq       ; encoding: [0x20,0x04,0x62,0x1e]
-; CHECK: fccmpe s1, s2, #0, eq       ; encoding: [0x30,0x04,0x22,0x1e]
-; CHECK: fccmpe d1, d2, #0, eq       ; encoding: [0x30,0x04,0x62,0x1e]
-
-  fcmp  s1, s2
-  fcmp  d1, d2
-  fcmp  s1, #0.0
-  fcmp  d1, #0.0
-  fcmpe s1, s2
-  fcmpe d1, d2
-  fcmpe s1, #0.0
-  fcmpe d1, #0.0
-
-; CHECK: fcmp  s1, s2                ; encoding: [0x20,0x20,0x22,0x1e]
-; CHECK: fcmp  d1, d2                ; encoding: [0x20,0x20,0x62,0x1e]
-; CHECK: fcmp  s1, #0.0              ; encoding: [0x28,0x20,0x20,0x1e]
-; CHECK: fcmp  d1, #0.0              ; encoding: [0x28,0x20,0x60,0x1e]
-; CHECK: fcmpe s1, s2                ; encoding: [0x30,0x20,0x22,0x1e]
-; CHECK: fcmpe d1, d2                ; encoding: [0x30,0x20,0x62,0x1e]
-; CHECK: fcmpe s1, #0.0              ; encoding: [0x38,0x20,0x20,0x1e]
-; CHECK: fcmpe d1, #0.0              ; encoding: [0x38,0x20,0x60,0x1e]
-
-;-----------------------------------------------------------------------------
-; Floating-point conditional select
-;-----------------------------------------------------------------------------
-
-  fcsel s1, s2, s3, eq
-  fcsel d1, d2, d3, eq
-
-; CHECK: fcsel s1, s2, s3, eq        ; encoding: [0x41,0x0c,0x23,0x1e]
-; CHECK: fcsel d1, d2, d3, eq        ; encoding: [0x41,0x0c,0x63,0x1e]
-
-;-----------------------------------------------------------------------------
-; Floating-point convert
-;-----------------------------------------------------------------------------
-
-  fcvt h1, d2
-  fcvt s1, d2
-  fcvt d1, h2
-  fcvt s1, h2
-  fcvt d1, s2
-  fcvt h1, s2
-
-; CHECK: fcvt h1, d2                 ; encoding: [0x41,0xc0,0x63,0x1e]
-; CHECK: fcvt s1, d2                 ; encoding: [0x41,0x40,0x62,0x1e]
-; CHECK: fcvt d1, h2                 ; encoding: [0x41,0xc0,0xe2,0x1e]
-; CHECK: fcvt s1, h2                 ; encoding: [0x41,0x40,0xe2,0x1e]
-; CHECK: fcvt d1, s2                 ; encoding: [0x41,0xc0,0x22,0x1e]
-; CHECK: fcvt h1, s2                 ; encoding: [0x41,0xc0,0x23,0x1e]
-
-  fcvtas w1, d2
-  fcvtas w1, d2, #1
-  fcvtas x1, d2
-  fcvtas x1, d2, #1
-  fcvtas w1, s2
-  fcvtas w1, s2, #1
-  fcvtas x1, s2
-  fcvtas x1, s2, #1
-
-; CHECK: fcvtas	w1, d2                  ; encoding: [0x41,0x00,0x64,0x1e]
-; CHECK: fcvtas	w1, d2, #1              ; encoding: [0x41,0xfc,0x44,0x1e]
-; CHECK: fcvtas	x1, d2                  ; encoding: [0x41,0x00,0x64,0x9e]
-; CHECK: fcvtas	x1, d2, #1              ; encoding: [0x41,0xfc,0x44,0x9e]
-; CHECK: fcvtas	w1, s2                  ; encoding: [0x41,0x00,0x24,0x1e]
-; CHECK: fcvtas	w1, s2, #1              ; encoding: [0x41,0xfc,0x04,0x1e]
-; CHECK: fcvtas	x1, s2                  ; encoding: [0x41,0x00,0x24,0x9e]
-; CHECK: fcvtas	x1, s2, #1              ; encoding: [0x41,0xfc,0x04,0x9e]
-
-  fcvtau w1, s2
-  fcvtau w1, s2, #1
-  fcvtau w1, d2
-  fcvtau w1, d2, #1
-  fcvtau x1, s2
-  fcvtau x1, s2, #1
-  fcvtau x1, d2
-  fcvtau x1, d2, #1
-
-; CHECK: fcvtau	w1, s2                  ; encoding: [0x41,0x00,0x25,0x1e]
-; CHECK: fcvtau	w1, s2, #1              ; encoding: [0x41,0xfc,0x05,0x1e]
-; CHECK: fcvtau	w1, d2                  ; encoding: [0x41,0x00,0x65,0x1e]
-; CHECK: fcvtau	w1, d2, #1              ; encoding: [0x41,0xfc,0x45,0x1e]
-; CHECK: fcvtau	x1, s2                  ; encoding: [0x41,0x00,0x25,0x9e]
-; CHECK: fcvtau	x1, s2, #1              ; encoding: [0x41,0xfc,0x05,0x9e]
-; CHECK: fcvtau	x1, d2                  ; encoding: [0x41,0x00,0x65,0x9e]
-; CHECK: fcvtau	x1, d2, #1              ; encoding: [0x41,0xfc,0x45,0x9e]
-
-  fcvtms w1, s2
-  fcvtms w1, s2, #1
-  fcvtms w1, d2
-  fcvtms w1, d2, #1
-  fcvtms x1, s2
-  fcvtms x1, s2, #1
-  fcvtms x1, d2
-  fcvtms x1, d2, #1
-
-; CHECK: fcvtms	w1, s2                  ; encoding: [0x41,0x00,0x30,0x1e]
-; CHECK: fcvtms	w1, s2, #1              ; encoding: [0x41,0xfc,0x10,0x1e]
-; CHECK: fcvtms	w1, d2                  ; encoding: [0x41,0x00,0x70,0x1e]
-; CHECK: fcvtms	w1, d2, #1              ; encoding: [0x41,0xfc,0x50,0x1e]
-; CHECK: fcvtms	x1, s2                  ; encoding: [0x41,0x00,0x30,0x9e]
-; CHECK: fcvtms	x1, s2, #1              ; encoding: [0x41,0xfc,0x10,0x9e]
-; CHECK: fcvtms	x1, d2                  ; encoding: [0x41,0x00,0x70,0x9e]
-; CHECK: fcvtms	x1, d2, #1              ; encoding: [0x41,0xfc,0x50,0x9e]
-
-  fcvtmu w1, s2
-  fcvtmu w1, s2, #1
-  fcvtmu w1, d2
-  fcvtmu w1, d2, #1
-  fcvtmu x1, s2
-  fcvtmu x1, s2, #1
-  fcvtmu x1, d2
-  fcvtmu x1, d2, #1
-
-; CHECK: fcvtmu	w1, s2                  ; encoding: [0x41,0x00,0x31,0x1e]
-; CHECK: fcvtmu	w1, s2, #1              ; encoding: [0x41,0xfc,0x11,0x1e]
-; CHECK: fcvtmu	w1, d2                  ; encoding: [0x41,0x00,0x71,0x1e]
-; CHECK: fcvtmu	w1, d2, #1              ; encoding: [0x41,0xfc,0x51,0x1e]
-; CHECK: fcvtmu	x1, s2                  ; encoding: [0x41,0x00,0x31,0x9e]
-; CHECK: fcvtmu	x1, s2, #1              ; encoding: [0x41,0xfc,0x11,0x9e]
-; CHECK: fcvtmu	x1, d2                  ; encoding: [0x41,0x00,0x71,0x9e]
-; CHECK: fcvtmu	x1, d2, #1              ; encoding: [0x41,0xfc,0x51,0x9e]
-
-  fcvtns w1, s2
-  fcvtns w1, s2, #1
-  fcvtns w1, d2
-  fcvtns w1, d2, #1
-  fcvtns x1, s2
-  fcvtns x1, s2, #1
-  fcvtns x1, d2
-  fcvtns x1, d2, #1
-
-; CHECK: fcvtns	w1, s2                  ; encoding: [0x41,0x00,0x20,0x1e]
-; CHECK: fcvtns	w1, s2, #1              ; encoding: [0x41,0xfc,0x00,0x1e]
-; CHECK: fcvtns	w1, d2                  ; encoding: [0x41,0x00,0x60,0x1e]
-; CHECK: fcvtns	w1, d2, #1              ; encoding: [0x41,0xfc,0x40,0x1e]
-; CHECK: fcvtns	x1, s2                  ; encoding: [0x41,0x00,0x20,0x9e]
-; CHECK: fcvtns	x1, s2, #1              ; encoding: [0x41,0xfc,0x00,0x9e]
-; CHECK: fcvtns	x1, d2                  ; encoding: [0x41,0x00,0x60,0x9e]
-; CHECK: fcvtns	x1, d2, #1              ; encoding: [0x41,0xfc,0x40,0x9e]
-
-  fcvtnu w1, s2
-  fcvtnu w1, s2, #1
-  fcvtnu w1, d2
-  fcvtnu w1, d2, #1
-  fcvtnu x1, s2
-  fcvtnu x1, s2, #1
-  fcvtnu x1, d2
-  fcvtnu x1, d2, #1
-
-; CHECK: fcvtnu	w1, s2                  ; encoding: [0x41,0x00,0x21,0x1e]
-; CHECK: fcvtnu	w1, s2, #1              ; encoding: [0x41,0xfc,0x01,0x1e]
-; CHECK: fcvtnu	w1, d2                  ; encoding: [0x41,0x00,0x61,0x1e]
-; CHECK: fcvtnu	w1, d2, #1              ; encoding: [0x41,0xfc,0x41,0x1e]
-; CHECK: fcvtnu	x1, s2                  ; encoding: [0x41,0x00,0x21,0x9e]
-; CHECK: fcvtnu	x1, s2, #1              ; encoding: [0x41,0xfc,0x01,0x9e]
-; CHECK: fcvtnu	x1, d2                  ; encoding: [0x41,0x00,0x61,0x9e]
-; CHECK: fcvtnu	x1, d2, #1              ; encoding: [0x41,0xfc,0x41,0x9e]
-
-  fcvtps w1, s2
-  fcvtps w1, s2, #1
-  fcvtps w1, d2
-  fcvtps w1, d2, #1
-  fcvtps x1, s2
-  fcvtps x1, s2, #1
-  fcvtps x1, d2
-  fcvtps x1, d2, #1
-
-; CHECK: fcvtps	w1, s2                  ; encoding: [0x41,0x00,0x28,0x1e]
-; CHECK: fcvtps	w1, s2, #1              ; encoding: [0x41,0xfc,0x08,0x1e]
-; CHECK: fcvtps	w1, d2                  ; encoding: [0x41,0x00,0x68,0x1e]
-; CHECK: fcvtps	w1, d2, #1              ; encoding: [0x41,0xfc,0x48,0x1e]
-; CHECK: fcvtps	x1, s2                  ; encoding: [0x41,0x00,0x28,0x9e]
-; CHECK: fcvtps	x1, s2, #1              ; encoding: [0x41,0xfc,0x08,0x9e]
-; CHECK: fcvtps	x1, d2                  ; encoding: [0x41,0x00,0x68,0x9e]
-; CHECK: fcvtps	x1, d2, #1              ; encoding: [0x41,0xfc,0x48,0x9e]
-
-  fcvtpu w1, s2
-  fcvtpu w1, s2, #1
-  fcvtpu w1, d2
-  fcvtpu w1, d2, #1
-  fcvtpu x1, s2
-  fcvtpu x1, s2, #1
-  fcvtpu x1, d2
-  fcvtpu x1, d2, #1
-
-; CHECK: fcvtpu	w1, s2                  ; encoding: [0x41,0x00,0x29,0x1e]
-; CHECK: fcvtpu	w1, s2, #1              ; encoding: [0x41,0xfc,0x09,0x1e]
-; CHECK: fcvtpu	w1, d2                  ; encoding: [0x41,0x00,0x69,0x1e]
-; CHECK: fcvtpu	w1, d2, #1              ; encoding: [0x41,0xfc,0x49,0x1e]
-; CHECK: fcvtpu	x1, s2                  ; encoding: [0x41,0x00,0x29,0x9e]
-; CHECK: fcvtpu	x1, s2, #1              ; encoding: [0x41,0xfc,0x09,0x9e]
-; CHECK: fcvtpu	x1, d2                  ; encoding: [0x41,0x00,0x69,0x9e]
-; CHECK: fcvtpu	x1, d2, #1              ; encoding: [0x41,0xfc,0x49,0x9e]
-
-  fcvtzs w1, s2
-  fcvtzs w1, s2, #1
-  fcvtzs w1, d2
-  fcvtzs w1, d2, #1
-  fcvtzs x1, s2
-  fcvtzs x1, s2, #1
-  fcvtzs x1, d2
-  fcvtzs x1, d2, #1
-
-; CHECK: fcvtzs	w1, s2                  ; encoding: [0x41,0x00,0x38,0x1e]
-; CHECK: fcvtzs	w1, s2, #1              ; encoding: [0x41,0xfc,0x18,0x1e]
-; CHECK: fcvtzs	w1, d2                  ; encoding: [0x41,0x00,0x78,0x1e]
-; CHECK: fcvtzs	w1, d2, #1              ; encoding: [0x41,0xfc,0x58,0x1e]
-; CHECK: fcvtzs	x1, s2                  ; encoding: [0x41,0x00,0x38,0x9e]
-; CHECK: fcvtzs	x1, s2, #1              ; encoding: [0x41,0xfc,0x18,0x9e]
-; CHECK: fcvtzs	x1, d2                  ; encoding: [0x41,0x00,0x78,0x9e]
-; CHECK: fcvtzs	x1, d2, #1              ; encoding: [0x41,0xfc,0x58,0x9e]
-
-  fcvtzu w1, s2
-  fcvtzu w1, s2, #1
-  fcvtzu w1, d2
-  fcvtzu w1, d2, #1
-  fcvtzu x1, s2
-  fcvtzu x1, s2, #1
-  fcvtzu x1, d2
-  fcvtzu x1, d2, #1
-
-; CHECK: fcvtzu	w1, s2                  ; encoding: [0x41,0x00,0x39,0x1e]
-; CHECK: fcvtzu	w1, s2, #1              ; encoding: [0x41,0xfc,0x19,0x1e]
-; CHECK: fcvtzu	w1, d2                  ; encoding: [0x41,0x00,0x79,0x1e]
-; CHECK: fcvtzu	w1, d2, #1              ; encoding: [0x41,0xfc,0x59,0x1e]
-; CHECK: fcvtzu	x1, s2                  ; encoding: [0x41,0x00,0x39,0x9e]
-; CHECK: fcvtzu	x1, s2, #1              ; encoding: [0x41,0xfc,0x19,0x9e]
-; CHECK: fcvtzu	x1, d2                  ; encoding: [0x41,0x00,0x79,0x9e]
-; CHECK: fcvtzu	x1, d2, #1              ; encoding: [0x41,0xfc,0x59,0x9e]
-
-  scvtf s1, w2
-  scvtf s1, w2, #1
-  scvtf d1, w2
-  scvtf d1, w2, #1
-  scvtf s1, x2
-  scvtf s1, x2, #1
-  scvtf d1, x2
-  scvtf d1, x2, #1
-
-; CHECK: scvtf	s1, w2                  ; encoding: [0x41,0x00,0x22,0x1e]
-; CHECK: scvtf	s1, w2, #1              ; encoding: [0x41,0xfc,0x02,0x1e]
-; CHECK: scvtf	d1, w2                  ; encoding: [0x41,0x00,0x62,0x1e]
-; CHECK: scvtf	d1, w2, #1              ; encoding: [0x41,0xfc,0x42,0x1e]
-; CHECK: scvtf	s1, x2                  ; encoding: [0x41,0x00,0x22,0x9e]
-; CHECK: scvtf	s1, x2, #1              ; encoding: [0x41,0xfc,0x02,0x9e]
-; CHECK: scvtf	d1, x2                  ; encoding: [0x41,0x00,0x62,0x9e]
-; CHECK: scvtf	d1, x2, #1              ; encoding: [0x41,0xfc,0x42,0x9e]
-
-  ucvtf s1, w2
-  ucvtf s1, w2, #1
-  ucvtf d1, w2
-  ucvtf d1, w2, #1
-  ucvtf s1, x2
-  ucvtf s1, x2, #1
-  ucvtf d1, x2
-  ucvtf d1, x2, #1
-
-; CHECK: ucvtf	s1, w2                  ; encoding: [0x41,0x00,0x23,0x1e]
-; CHECK: ucvtf	s1, w2, #1              ; encoding: [0x41,0xfc,0x03,0x1e]
-; CHECK: ucvtf	d1, w2                  ; encoding: [0x41,0x00,0x63,0x1e]
-; CHECK: ucvtf	d1, w2, #1              ; encoding: [0x41,0xfc,0x43,0x1e]
-; CHECK: ucvtf	s1, x2                  ; encoding: [0x41,0x00,0x23,0x9e]
-; CHECK: ucvtf	s1, x2, #1              ; encoding: [0x41,0xfc,0x03,0x9e]
-; CHECK: ucvtf	d1, x2                  ; encoding: [0x41,0x00,0x63,0x9e]
-; CHECK: ucvtf	d1, x2, #1              ; encoding: [0x41,0xfc,0x43,0x9e]
-
-;-----------------------------------------------------------------------------
-; Floating-point move
-;-----------------------------------------------------------------------------
-
-  fmov s1, w2
-  fmov w1, s2
-  fmov d1, x2
-  fmov x1, d2
-
-; CHECK: fmov s1, w2                 ; encoding: [0x41,0x00,0x27,0x1e]
-; CHECK: fmov w1, s2                 ; encoding: [0x41,0x00,0x26,0x1e]
-; CHECK: fmov d1, x2                 ; encoding: [0x41,0x00,0x67,0x9e]
-; CHECK: fmov x1, d2                 ; encoding: [0x41,0x00,0x66,0x9e]
-
-  fmov s1, #0.125
-  fmov s1, #0x40
-  fmov d1, #0.125
-  fmov d1, #0x40
-  fmov d1, #-4.843750e-01
-  fmov d1, #4.843750e-01
-  fmov d3, #3
-  fmov s2, #0.0
-  fmov d2, #0.0
-
-; CHECK: fmov s1, #1.250000e-01      ; encoding: [0x01,0x10,0x28,0x1e]
-; CHECK: fmov s1, #1.250000e-01      ; encoding: [0x01,0x10,0x28,0x1e]
-; CHECK: fmov d1, #1.250000e-01      ; encoding: [0x01,0x10,0x68,0x1e]
-; CHECK: fmov d1, #1.250000e-01      ; encoding: [0x01,0x10,0x68,0x1e]
-; CHECK: fmov d1, #-4.843750e-01     ; encoding: [0x01,0xf0,0x7b,0x1e]
-; CHECK: fmov d1, #4.843750e-01      ; encoding: [0x01,0xf0,0x6b,0x1e]
-; CHECK: fmov d3, #3.000000e+00      ; encoding: [0x03,0x10,0x61,0x1e]
-; CHECK: fmov s2, wzr                ; encoding: [0xe2,0x03,0x27,0x1e]
-; CHECK: fmov d2, xzr                ; encoding: [0xe2,0x03,0x67,0x9e]
-
-  fmov s1, s2
-  fmov d1, d2
-
-; CHECK: fmov s1, s2                 ; encoding: [0x41,0x40,0x20,0x1e]
-; CHECK: fmov d1, d2                 ; encoding: [0x41,0x40,0x60,0x1e]
-
-
-  fmov x2, v5.d[1]
-  fmov.d x9, v7[1]
-  fmov v1.d[1], x1
-  fmov.d v8[1], x6
-
-; CHECK: fmov.d	x2, v5[1]               ; encoding: [0xa2,0x00,0xae,0x9e]
-; CHECK: fmov.d	x9, v7[1]               ; encoding: [0xe9,0x00,0xae,0x9e]
-; CHECK: fmov.d	v1[1], x1               ; encoding: [0x21,0x00,0xaf,0x9e]
-; CHECK: fmov.d	v8[1], x6               ; encoding: [0xc8,0x00,0xaf,0x9e]
-
-
-;-----------------------------------------------------------------------------
-; Floating-point round to integral
-;-----------------------------------------------------------------------------
-
-  frinta s1, s2
-  frinta d1, d2
-
-; CHECK: frinta s1, s2               ; encoding: [0x41,0x40,0x26,0x1e]
-; CHECK: frinta d1, d2               ; encoding: [0x41,0x40,0x66,0x1e]
-
-  frinti s1, s2
-  frinti d1, d2
-
-; CHECK: frinti s1, s2               ; encoding: [0x41,0xc0,0x27,0x1e]
-; CHECK: frinti d1, d2               ; encoding: [0x41,0xc0,0x67,0x1e]
-
-  frintm s1, s2
-  frintm d1, d2
-
-; CHECK: frintm s1, s2               ; encoding: [0x41,0x40,0x25,0x1e]
-; CHECK: frintm d1, d2               ; encoding: [0x41,0x40,0x65,0x1e]
-
-  frintn s1, s2
-  frintn d1, d2
-
-; CHECK: frintn s1, s2               ; encoding: [0x41,0x40,0x24,0x1e]
-; CHECK: frintn d1, d2               ; encoding: [0x41,0x40,0x64,0x1e]
-
-  frintp s1, s2
-  frintp d1, d2
-
-; CHECK: frintp s1, s2               ; encoding: [0x41,0xc0,0x24,0x1e]
-; CHECK: frintp d1, d2               ; encoding: [0x41,0xc0,0x64,0x1e]
-
-  frintx s1, s2
-  frintx d1, d2
-
-; CHECK: frintx s1, s2               ; encoding: [0x41,0x40,0x27,0x1e]
-; CHECK: frintx d1, d2               ; encoding: [0x41,0x40,0x67,0x1e]
-
-  frintz s1, s2
-  frintz d1, d2
-
-; CHECK: frintz s1, s2               ; encoding: [0x41,0xc0,0x25,0x1e]
-; CHECK: frintz d1, d2               ; encoding: [0x41,0xc0,0x65,0x1e]
-
-  cmhs d0, d0, d0
-  cmtst d0, d0, d0
-
-; CHECK: cmhs	d0, d0, d0              ; encoding: [0x00,0x3c,0xe0,0x7e]
-; CHECK: cmtst	d0, d0, d0              ; encoding: [0x00,0x8c,0xe0,0x5e]
-
-
-
-;-----------------------------------------------------------------------------
-; Floating-point extract and narrow
-;-----------------------------------------------------------------------------
-  sqxtn b4, h2
-  sqxtn h2, s3
-  sqxtn s9, d2
-
-; CHECK: sqxtn b4, h2                  ; encoding: [0x44,0x48,0x21,0x5e]
-; CHECK: sqxtn h2, s3                  ; encoding: [0x62,0x48,0x61,0x5e]
-; CHECK: sqxtn s9, d2                  ; encoding: [0x49,0x48,0xa1,0x5e]
-
-  sqxtun b4, h2
-  sqxtun h2, s3
-  sqxtun s9, d2
-
-; CHECK: sqxtun b4, h2                  ; encoding: [0x44,0x28,0x21,0x7e]
-; CHECK: sqxtun h2, s3                  ; encoding: [0x62,0x28,0x61,0x7e]
-; CHECK: sqxtun s9, d2                  ; encoding: [0x49,0x28,0xa1,0x7e]
-
-  uqxtn b4, h2
-  uqxtn h2, s3
-  uqxtn s9, d2
-
-; CHECK: uqxtn b4, h2                  ; encoding: [0x44,0x48,0x21,0x7e]
-; CHECK: uqxtn h2, s3                  ; encoding: [0x62,0x48,0x61,0x7e]
-; CHECK: uqxtn s9, d2                  ; encoding: [0x49,0x48,0xa1,0x7e]
diff --git a/test/MC/ARM64/large-relocs.s b/test/MC/ARM64/large-relocs.s
deleted file mode 100644
index 348ceb6..0000000
--- a/test/MC/ARM64/large-relocs.s
+++ /dev/null
@@ -1,38 +0,0 @@
-// RUN: llvm-mc -triple=arm64-linux-gnu -show-encoding -o - %s | FileCheck %s
-// RUN: llvm-mc -triple=arm64-linux-gnu -show-encoding -filetype=obj -o - %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-OBJ %s
-
-        movz x2, #:abs_g0:sym
-        movk w3, #:abs_g0_nc:sym
-// CHECK: movz    x2, #:abs_g0:sym        // encoding: [0bAAA00010,A,0b100AAAAA,0x92]
-// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g0:sym, kind: fixup_arm64_movw
-// CHECK: movk     w3, #:abs_g0_nc:sym    // encoding: [0bAAA00011,A,0b100AAAAA,0x72]
-// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g0_nc:sym, kind: fixup_arm64_movw
-
-// CHECK-OBJ: 0 R_AARCH64_MOVW_UABS_G0 sym
-// CHECK-OBJ: 4 R_AARCH64_MOVW_UABS_G0_NC sym
-
-        movz x4, #:abs_g1:sym
-        movk w5, #:abs_g1_nc:sym
-// CHECK: movz     x4, #:abs_g1:sym       // encoding: [0bAAA00100,A,0b101AAAAA,0x92]
-// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g1:sym, kind: fixup_arm64_movw
-// CHECK: movk     w5, #:abs_g1_nc:sym    // encoding: [0bAAA00101,A,0b101AAAAA,0x72]
-// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g1_nc:sym, kind: fixup_arm64_movw
-
-// CHECK-OBJ: 8 R_AARCH64_MOVW_UABS_G1 sym
-// CHECK-OBJ: c R_AARCH64_MOVW_UABS_G1_NC sym
-
-        movz x6, #:abs_g2:sym
-        movk x7, #:abs_g2_nc:sym
-// CHECK: movz     x6, #:abs_g2:sym       // encoding: [0bAAA00110,A,0b110AAAAA,0x92]
-// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g2:sym, kind: fixup_arm64_movw
-// CHECK: movk     x7, #:abs_g2_nc:sym    // encoding: [0bAAA00111,A,0b110AAAAA,0xf2]
-// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g2_nc:sym, kind: fixup_arm64_movw
-
-// CHECK-OBJ: 10 R_AARCH64_MOVW_UABS_G2 sym
-// CHECK-OBJ: 14 R_AARCH64_MOVW_UABS_G2_NC sym
-
-        movz x8, #:abs_g3:sym
-// CHECK: movz     x8, #:abs_g3:sym       // encoding: [0bAAA01000,A,0b111AAAAA,0x92]
-// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g3:sym, kind: fixup_arm64_movw
-
-// CHECK-OBJ: 18 R_AARCH64_MOVW_UABS_G3 sym
diff --git a/test/MC/ARM64/lit.local.cfg b/test/MC/ARM64/lit.local.cfg
deleted file mode 100644
index 49447af..0000000
--- a/test/MC/ARM64/lit.local.cfg
+++ /dev/null
@@ -1,6 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp', '.s']
-
-targets = set(config.root.targets_to_build.split())
-if not 'ARM64' in targets:
-    config.unsupported = True
-
diff --git a/test/MC/ARM64/memory.s b/test/MC/ARM64/memory.s
deleted file mode 100644
index 0e8f1d5..0000000
--- a/test/MC/ARM64/memory.s
+++ /dev/null
@@ -1,634 +0,0 @@
-; RUN: llvm-mc -triple arm64-apple-darwin -show-encoding < %s | FileCheck %s
-
-foo:
-;-----------------------------------------------------------------------------
-; Indexed loads
-;-----------------------------------------------------------------------------
-
-  ldr    w5, [x4, #20]
-  ldr    x4, [x3]
-  ldr    x2, [sp, #32]
-  ldr    b5, [sp, #1]
-  ldr    h6, [sp, #2]
-  ldr    s7, [sp, #4]
-  ldr    d8, [sp, #8]
-  ldr    q9, [sp, #16]
-  ldrb   w4, [x3]
-  ldrb   w5, [x4, #20]
-  ldrb	 w2, [x3, _foo@pageoff]
-  ldrb   w3, [x2, "+[Test method].var"@PAGEOFF]
-  ldrsb  w9, [x3]
-  ldrsb  x2, [sp, #128]
-  ldrh   w2, [sp, #32]
-  ldrsh  w3, [sp, #32]
-  ldrsh  x5, [x9, #24]
-  ldrsw  x9, [sp, #512]
-
-  prfm   #5, [sp, #32]
-  prfm   #31, [sp, #32]
-  prfm   pldl1keep, [x2]
-  prfm   pldl1strm, [x2]
-  prfm   pldl2keep, [x2]
-  prfm   pldl2strm, [x2]
-  prfm   pldl3keep, [x2]
-  prfm   pldl3strm, [x2]
-  prfm   pstl1keep, [x2]
-  prfm   pstl1strm, [x2]
-  prfm   pstl2keep, [x2]
-  prfm   pstl2strm, [x2]
-  prfm   pstl3keep, [x2]
-  prfm   pstl3strm, [x2]
-  prfm  pstl3strm, [x4, x5, lsl #3]
-
-; CHECK: ldr    w5, [x4, #20]           ; encoding: [0x85,0x14,0x40,0xb9]
-; CHECK: ldr    x4, [x3]                ; encoding: [0x64,0x00,0x40,0xf9]
-; CHECK: ldr    x2, [sp, #32]           ; encoding: [0xe2,0x13,0x40,0xf9]
-; CHECK: ldr    b5, [sp, #1]            ; encoding: [0xe5,0x07,0x40,0x3d]
-; CHECK: ldr    h6, [sp, #2]            ; encoding: [0xe6,0x07,0x40,0x7d]
-; CHECK: ldr    s7, [sp, #4]            ; encoding: [0xe7,0x07,0x40,0xbd]
-; CHECK: ldr    d8, [sp, #8]            ; encoding: [0xe8,0x07,0x40,0xfd]
-; CHECK: ldr    q9, [sp, #16]           ; encoding: [0xe9,0x07,0xc0,0x3d]
-; CHECK: ldrb   w4, [x3]                ; encoding: [0x64,0x00,0x40,0x39]
-; CHECK: ldrb   w5, [x4, #20]           ; encoding: [0x85,0x50,0x40,0x39]
-; CHECK: ldrb	w2, [x3, _foo@PAGEOFF]  ; encoding: [0x62,0bAAAAAA00,0b01AAAAAA,0x39]
-; CHECK: ldrb	w3, [x2, "+[Test method].var"@PAGEOFF] ; encoding: [0x43,0bAAAAAA00,0b01AAAAAA,0x39]
-; CHECK: ldrsb  w9, [x3]                ; encoding: [0x69,0x00,0xc0,0x39]
-; CHECK: ldrsb  x2, [sp, #128]          ; encoding: [0xe2,0x03,0x82,0x39]
-; CHECK: ldrh   w2, [sp, #32]           ; encoding: [0xe2,0x43,0x40,0x79]
-; CHECK: ldrsh  w3, [sp, #32]           ; encoding: [0xe3,0x43,0xc0,0x79]
-; CHECK: ldrsh  x5, [x9, #24]           ; encoding: [0x25,0x31,0x80,0x79]
-; CHECK: ldrsw  x9, [sp, #512]          ; encoding: [0xe9,0x03,0x82,0xb9]
-; CHECK: prfm   pldl3strm, [sp, #32]    ; encoding: [0xe5,0x13,0x80,0xf9]
-; CHECK: prfm	#31, [sp, #32]          ; encoding: [0xff,0x13,0x80,0xf9]
-; CHECK: prfm   pldl1keep, [x2]         ; encoding: [0x40,0x00,0x80,0xf9]
-; CHECK: prfm   pldl1strm, [x2]         ; encoding: [0x41,0x00,0x80,0xf9]
-; CHECK: prfm   pldl2keep, [x2]         ; encoding: [0x42,0x00,0x80,0xf9]
-; CHECK: prfm   pldl2strm, [x2]         ; encoding: [0x43,0x00,0x80,0xf9]
-; CHECK: prfm   pldl3keep, [x2]         ; encoding: [0x44,0x00,0x80,0xf9]
-; CHECK: prfm   pldl3strm, [x2]         ; encoding: [0x45,0x00,0x80,0xf9]
-; CHECK: prfm   pstl1keep, [x2]         ; encoding: [0x50,0x00,0x80,0xf9]
-; CHECK: prfm   pstl1strm, [x2]         ; encoding: [0x51,0x00,0x80,0xf9]
-; CHECK: prfm   pstl2keep, [x2]         ; encoding: [0x52,0x00,0x80,0xf9]
-; CHECK: prfm   pstl2strm, [x2]         ; encoding: [0x53,0x00,0x80,0xf9]
-; CHECK: prfm   pstl3keep, [x2]         ; encoding: [0x54,0x00,0x80,0xf9]
-; CHECK: prfm   pstl3strm, [x2]         ; encoding: [0x55,0x00,0x80,0xf9]
-; CHECK: prfm	pstl3strm, [x4, x5, lsl #3] ; encoding: [0x95,0x78,0xa5,0xf8]
-
-;-----------------------------------------------------------------------------
-; Indexed stores
-;-----------------------------------------------------------------------------
-
-  str   x4, [x3]
-  str   x2, [sp, #32]
-  str   w5, [x4, #20]
-  str   b5, [sp, #1]
-  str   h6, [sp, #2]
-  str   s7, [sp, #4]
-  str   d8, [sp, #8]
-  str   q9, [sp, #16]
-  strb  w4, [x3]
-  strb  w5, [x4, #20]
-  strh  w2, [sp, #32]
-
-; CHECK: str   x4, [x3]                 ; encoding: [0x64,0x00,0x00,0xf9]
-; CHECK: str   x2, [sp, #32]            ; encoding: [0xe2,0x13,0x00,0xf9]
-; CHECK: str   w5, [x4, #20]            ; encoding: [0x85,0x14,0x00,0xb9]
-; CHECK: str   b5, [sp, #1]             ; encoding: [0xe5,0x07,0x00,0x3d]
-; CHECK: str   h6, [sp, #2]             ; encoding: [0xe6,0x07,0x00,0x7d]
-; CHECK: str   s7, [sp, #4]             ; encoding: [0xe7,0x07,0x00,0xbd]
-; CHECK: str   d8, [sp, #8]             ; encoding: [0xe8,0x07,0x00,0xfd]
-; CHECK: str   q9, [sp, #16]            ; encoding: [0xe9,0x07,0x80,0x3d]
-; CHECK: strb  w4, [x3]                 ; encoding: [0x64,0x00,0x00,0x39]
-; CHECK: strb  w5, [x4, #20]            ; encoding: [0x85,0x50,0x00,0x39]
-; CHECK: strh  w2, [sp, #32]            ; encoding: [0xe2,0x43,0x00,0x79]
-
-;-----------------------------------------------------------------------------
-; Unscaled immediate loads and stores
-;-----------------------------------------------------------------------------
-
-  ldur    w2, [x3]
-  ldur    w2, [sp, #24]
-  ldur    x2, [x3]
-  ldur    x2, [sp, #24]
-  ldur    b5, [sp, #1]
-  ldur    h6, [sp, #2]
-  ldur    s7, [sp, #4]
-  ldur    d8, [sp, #8]
-  ldur    q9, [sp, #16]
-  ldursb  w9, [x3]
-  ldursb  x2, [sp, #128]
-  ldursh  w3, [sp, #32]
-  ldursh  x5, [x9, #24]
-  ldursw  x9, [sp, #-128]
-
-; CHECK: ldur    w2, [x3]               ; encoding: [0x62,0x00,0x40,0xb8]
-; CHECK: ldur    w2, [sp, #24]          ; encoding: [0xe2,0x83,0x41,0xb8]
-; CHECK: ldur    x2, [x3]               ; encoding: [0x62,0x00,0x40,0xf8]
-; CHECK: ldur    x2, [sp, #24]          ; encoding: [0xe2,0x83,0x41,0xf8]
-; CHECK: ldur    b5, [sp, #1]           ; encoding: [0xe5,0x13,0x40,0x3c]
-; CHECK: ldur    h6, [sp, #2]           ; encoding: [0xe6,0x23,0x40,0x7c]
-; CHECK: ldur    s7, [sp, #4]           ; encoding: [0xe7,0x43,0x40,0xbc]
-; CHECK: ldur    d8, [sp, #8]           ; encoding: [0xe8,0x83,0x40,0xfc]
-; CHECK: ldur    q9, [sp, #16]          ; encoding: [0xe9,0x03,0xc1,0x3c]
-; CHECK: ldursb  w9, [x3]               ; encoding: [0x69,0x00,0xc0,0x38]
-; CHECK: ldursb  x2, [sp, #128]         ; encoding: [0xe2,0x03,0x88,0x38]
-; CHECK: ldursh  w3, [sp, #32]          ; encoding: [0xe3,0x03,0xc2,0x78]
-; CHECK: ldursh  x5, [x9, #24]          ; encoding: [0x25,0x81,0x81,0x78]
-; CHECK: ldursw  x9, [sp, #-128]        ; encoding: [0xe9,0x03,0x98,0xb8]
-
-  stur    w4, [x3]
-  stur    w2, [sp, #32]
-  stur    x4, [x3]
-  stur    x2, [sp, #32]
-  stur    w5, [x4, #20]
-  stur    b5, [sp, #1]
-  stur    h6, [sp, #2]
-  stur    s7, [sp, #4]
-  stur    d8, [sp, #8]
-  stur    q9, [sp, #16]
-  sturb   w4, [x3]
-  sturb   w5, [x4, #20]
-  sturh   w2, [sp, #32]
-  prfum   #5, [sp, #32]
-
-; CHECK: stur    w4, [x3]               ; encoding: [0x64,0x00,0x00,0xb8]
-; CHECK: stur    w2, [sp, #32]          ; encoding: [0xe2,0x03,0x02,0xb8]
-; CHECK: stur    x4, [x3]               ; encoding: [0x64,0x00,0x00,0xf8]
-; CHECK: stur    x2, [sp, #32]          ; encoding: [0xe2,0x03,0x02,0xf8]
-; CHECK: stur    w5, [x4, #20]          ; encoding: [0x85,0x40,0x01,0xb8]
-; CHECK: stur    b5, [sp, #1]           ; encoding: [0xe5,0x13,0x00,0x3c]
-; CHECK: stur    h6, [sp, #2]           ; encoding: [0xe6,0x23,0x00,0x7c]
-; CHECK: stur    s7, [sp, #4]           ; encoding: [0xe7,0x43,0x00,0xbc]
-; CHECK: stur    d8, [sp, #8]           ; encoding: [0xe8,0x83,0x00,0xfc]
-; CHECK: stur    q9, [sp, #16]          ; encoding: [0xe9,0x03,0x81,0x3c]
-; CHECK: sturb   w4, [x3]               ; encoding: [0x64,0x00,0x00,0x38]
-; CHECK: sturb   w5, [x4, #20]          ; encoding: [0x85,0x40,0x01,0x38]
-; CHECK: sturh   w2, [sp, #32]          ; encoding: [0xe2,0x03,0x02,0x78]
-; CHECK: prfum   pldl3strm, [sp, #32]   ; encoding: [0xe5,0x03,0x82,0xf8]
-
-;-----------------------------------------------------------------------------
-; Unprivileged loads and stores
-;-----------------------------------------------------------------------------
-
-  ldtr    w3, [x4, #16]
-  ldtr    x3, [x4, #16]
-  ldtrb   w3, [x4, #16]
-  ldtrsb  w9, [x3]
-  ldtrsb  x2, [sp, #128]
-  ldtrh   w3, [x4, #16]
-  ldtrsh  w3, [sp, #32]
-  ldtrsh  x5, [x9, #24]
-  ldtrsw  x9, [sp, #-128]
-
-; CHECK: ldtr   w3, [x4, #16]           ; encoding: [0x83,0x08,0x41,0xb8]
-; CHECK: ldtr   x3, [x4, #16]           ; encoding: [0x83,0x08,0x41,0xf8]
-; CHECK: ldtrb  w3, [x4, #16]           ; encoding: [0x83,0x08,0x41,0x38]
-; CHECK: ldtrsb w9, [x3]                ; encoding: [0x69,0x08,0xc0,0x38]
-; CHECK: ldtrsb x2, [sp, #128]          ; encoding: [0xe2,0x0b,0x88,0x38]
-; CHECK: ldtrh  w3, [x4, #16]           ; encoding: [0x83,0x08,0x41,0x78]
-; CHECK: ldtrsh w3, [sp, #32]           ; encoding: [0xe3,0x0b,0xc2,0x78]
-; CHECK: ldtrsh x5, [x9, #24]           ; encoding: [0x25,0x89,0x81,0x78]
-; CHECK: ldtrsw x9, [sp, #-128]         ; encoding: [0xe9,0x0b,0x98,0xb8]
-
-  sttr    w5, [x4, #20]
-  sttr    x4, [x3]
-  sttr    x2, [sp, #32]
-  sttrb   w4, [x3]
-  sttrb   w5, [x4, #20]
-  sttrh   w2, [sp, #32]
-
-; CHECK: sttr   w5, [x4, #20]           ; encoding: [0x85,0x48,0x01,0xb8]
-; CHECK: sttr   x4, [x3]                ; encoding: [0x64,0x08,0x00,0xf8]
-; CHECK: sttr   x2, [sp, #32]           ; encoding: [0xe2,0x0b,0x02,0xf8]
-; CHECK: sttrb  w4, [x3]                ; encoding: [0x64,0x08,0x00,0x38]
-; CHECK: sttrb  w5, [x4, #20]           ; encoding: [0x85,0x48,0x01,0x38]
-; CHECK: sttrh  w2, [sp, #32]           ; encoding: [0xe2,0x0b,0x02,0x78]
-
-;-----------------------------------------------------------------------------
-; Pre-indexed loads and stores
-;-----------------------------------------------------------------------------
-
-  ldr   fp, [x7, #8]!
-  ldr   lr, [x7, #8]!
-  ldr   b5, [x0, #1]!
-  ldr   h6, [x0, #2]!
-  ldr   s7, [x0, #4]!
-  ldr   d8, [x0, #8]!
-  ldr   q9, [x0, #16]!
-
-  str   lr, [x7, #-8]!
-  str   fp, [x7, #-8]!
-  str   b5, [x0, #-1]!
-  str   h6, [x0, #-2]!
-  str   s7, [x0, #-4]!
-  str   d8, [x0, #-8]!
-  str   q9, [x0, #-16]!
-
-; CHECK: ldr  fp, [x7, #8]!             ; encoding: [0xfd,0x8c,0x40,0xf8]
-; CHECK: ldr  lr, [x7, #8]!             ; encoding: [0xfe,0x8c,0x40,0xf8]
-; CHECK: ldr  b5, [x0, #1]!             ; encoding: [0x05,0x1c,0x40,0x3c]
-; CHECK: ldr  h6, [x0, #2]!             ; encoding: [0x06,0x2c,0x40,0x7c]
-; CHECK: ldr  s7, [x0, #4]!             ; encoding: [0x07,0x4c,0x40,0xbc]
-; CHECK: ldr  d8, [x0, #8]!             ; encoding: [0x08,0x8c,0x40,0xfc]
-; CHECK: ldr  q9, [x0, #16]!            ; encoding: [0x09,0x0c,0xc1,0x3c]
-
-; CHECK: str  lr, [x7, #-8]!            ; encoding: [0xfe,0x8c,0x1f,0xf8]
-; CHECK: str  fp, [x7, #-8]!            ; encoding: [0xfd,0x8c,0x1f,0xf8]
-; CHECK: str  b5, [x0, #-1]!            ; encoding: [0x05,0xfc,0x1f,0x3c]
-; CHECK: str  h6, [x0, #-2]!            ; encoding: [0x06,0xec,0x1f,0x7c]
-; CHECK: str  s7, [x0, #-4]!            ; encoding: [0x07,0xcc,0x1f,0xbc]
-; CHECK: str  d8, [x0, #-8]!            ; encoding: [0x08,0x8c,0x1f,0xfc]
-; CHECK: str  q9, [x0, #-16]!           ; encoding: [0x09,0x0c,0x9f,0x3c]
-
-;-----------------------------------------------------------------------------
-; post-indexed loads and stores
-;-----------------------------------------------------------------------------
-  str lr, [x7], #-8
-  str fp, [x7], #-8
-  str b5, [x0], #-1
-  str h6, [x0], #-2
-  str s7, [x0], #-4
-  str d8, [x0], #-8
-  str q9, [x0], #-16
-
-  ldr fp, [x7], #8
-  ldr lr, [x7], #8
-  ldr b5, [x0], #1
-  ldr h6, [x0], #2
-  ldr s7, [x0], #4
-  ldr d8, [x0], #8
-  ldr q9, [x0], #16
-
-; CHECK: str lr, [x7], #-8             ; encoding: [0xfe,0x84,0x1f,0xf8]
-; CHECK: str fp, [x7], #-8             ; encoding: [0xfd,0x84,0x1f,0xf8]
-; CHECK: str b5, [x0], #-1             ; encoding: [0x05,0xf4,0x1f,0x3c]
-; CHECK: str h6, [x0], #-2             ; encoding: [0x06,0xe4,0x1f,0x7c]
-; CHECK: str s7, [x0], #-4             ; encoding: [0x07,0xc4,0x1f,0xbc]
-; CHECK: str d8, [x0], #-8             ; encoding: [0x08,0x84,0x1f,0xfc]
-; CHECK: str q9, [x0], #-16            ; encoding: [0x09,0x04,0x9f,0x3c]
-
-; CHECK: ldr fp, [x7], #8              ; encoding: [0xfd,0x84,0x40,0xf8]
-; CHECK: ldr lr, [x7], #8              ; encoding: [0xfe,0x84,0x40,0xf8]
-; CHECK: ldr b5, [x0], #1              ; encoding: [0x05,0x14,0x40,0x3c]
-; CHECK: ldr h6, [x0], #2              ; encoding: [0x06,0x24,0x40,0x7c]
-; CHECK: ldr s7, [x0], #4              ; encoding: [0x07,0x44,0x40,0xbc]
-; CHECK: ldr d8, [x0], #8              ; encoding: [0x08,0x84,0x40,0xfc]
-; CHECK: ldr q9, [x0], #16             ; encoding: [0x09,0x04,0xc1,0x3c]
-
-;-----------------------------------------------------------------------------
-; Load/Store pair (indexed, offset)
-;-----------------------------------------------------------------------------
-
-  ldp    w3, w2, [x15, #16]
-  ldp    x4, x9, [sp, #-16]
-  ldpsw  x2, x3, [x14, #16]
-  ldpsw  x2, x3, [sp, #-16]
-  ldp    s10, s1, [x2, #64]
-  ldp    d10, d1, [x2]
-  ldp    q2, q3, [x0, #32]
-
-; CHECK: ldp    w3, w2, [x15, #16]      ; encoding: [0xe3,0x09,0x42,0x29]
-; CHECK: ldp    x4, x9, [sp, #-16]      ; encoding: [0xe4,0x27,0x7f,0xa9]
-; CHECK: ldpsw  x2, x3, [x14, #16]      ; encoding: [0xc2,0x0d,0x42,0x69]
-; CHECK: ldpsw  x2, x3, [sp, #-16]      ; encoding: [0xe2,0x0f,0x7e,0x69]
-; CHECK: ldp    s10, s1, [x2, #64]      ; encoding: [0x4a,0x04,0x48,0x2d]
-; CHECK: ldp    d10, d1, [x2]           ; encoding: [0x4a,0x04,0x40,0x6d]
-; CHECK: ldp    q2, q3, [x0, #32]       ; encoding: [0x02,0x0c,0x41,0xad]
-
-  stp    w3, w2, [x15, #16]
-  stp    x4, x9, [sp, #-16]
-  stp    s10, s1, [x2, #64]
-  stp    d10, d1, [x2]
-  stp    q2, q3, [x0, #32]
-
-; CHECK: stp    w3, w2, [x15, #16]      ; encoding: [0xe3,0x09,0x02,0x29]
-; CHECK: stp    x4, x9, [sp, #-16]      ; encoding: [0xe4,0x27,0x3f,0xa9]
-; CHECK: stp    s10, s1, [x2, #64]      ; encoding: [0x4a,0x04,0x08,0x2d]
-; CHECK: stp    d10, d1, [x2]           ; encoding: [0x4a,0x04,0x00,0x6d]
-; CHECK: stp    q2, q3, [x0, #32]       ; encoding: [0x02,0x0c,0x01,0xad]
-
-;-----------------------------------------------------------------------------
-; Load/Store pair (pre-indexed)
-;-----------------------------------------------------------------------------
-
-  ldp    w3, w2, [x15, #16]!
-  ldp    x4, x9, [sp, #-16]!
-  ldpsw  x2, x3, [x14, #16]!
-  ldpsw  x2, x3, [sp, #-16]!
-  ldp    s10, s1, [x2, #64]!
-  ldp    d10, d1, [x2, #16]!
-
-; CHECK: ldp  w3, w2, [x15, #16]!       ; encoding: [0xe3,0x09,0xc2,0x29]
-; CHECK: ldp  x4, x9, [sp, #-16]!       ; encoding: [0xe4,0x27,0xff,0xa9]
-; CHECK: ldpsw	x2, x3, [x14, #16]!     ; encoding: [0xc2,0x0d,0xc2,0x69]
-; CHECK: ldpsw	x2, x3, [sp, #-16]!     ; encoding: [0xe2,0x0f,0xfe,0x69]
-; CHECK: ldp  s10, s1, [x2, #64]!       ; encoding: [0x4a,0x04,0xc8,0x2d]
-; CHECK: ldp  d10, d1, [x2, #16]!       ; encoding: [0x4a,0x04,0xc1,0x6d]
-
-  stp    w3, w2, [x15, #16]!
-  stp    x4, x9, [sp, #-16]!
-  stp    s10, s1, [x2, #64]!
-  stp    d10, d1, [x2, #16]!
-
-; CHECK: stp  w3, w2, [x15, #16]!       ; encoding: [0xe3,0x09,0x82,0x29]
-; CHECK: stp  x4, x9, [sp, #-16]!       ; encoding: [0xe4,0x27,0xbf,0xa9]
-; CHECK: stp  s10, s1, [x2, #64]!       ; encoding: [0x4a,0x04,0x88,0x2d]
-; CHECK: stp  d10, d1, [x2, #16]!       ; encoding: [0x4a,0x04,0x81,0x6d]
-
-;-----------------------------------------------------------------------------
-; Load/Store pair (post-indexed)
-;-----------------------------------------------------------------------------
-
-  ldp    w3, w2, [x15], #16
-  ldp    x4, x9, [sp], #-16
-  ldpsw  x2, x3, [x14], #16
-  ldpsw  x2, x3, [sp], #-16
-  ldp    s10, s1, [x2], #64
-  ldp    d10, d1, [x2], #16
-
-; CHECK: ldp  w3, w2, [x15], #16        ; encoding: [0xe3,0x09,0xc2,0x28]
-; CHECK: ldp  x4, x9, [sp], #-16        ; encoding: [0xe4,0x27,0xff,0xa8]
-; CHECK: ldpsw	x2, x3, [x14], #16      ; encoding: [0xc2,0x0d,0xc2,0x68]
-; CHECK: ldpsw	x2, x3, [sp], #-16      ; encoding: [0xe2,0x0f,0xfe,0x68]
-; CHECK: ldp  s10, s1, [x2], #64        ; encoding: [0x4a,0x04,0xc8,0x2c]
-; CHECK: ldp  d10, d1, [x2], #16        ; encoding: [0x4a,0x04,0xc1,0x6c]
-
-  stp    w3, w2, [x15], #16
-  stp    x4, x9, [sp], #-16
-  stp    s10, s1, [x2], #64
-  stp    d10, d1, [x2], #16
-
-; CHECK: stp  w3, w2, [x15], #16        ; encoding: [0xe3,0x09,0x82,0x28]
-; CHECK: stp  x4, x9, [sp], #-16        ; encoding: [0xe4,0x27,0xbf,0xa8]
-; CHECK: stp  s10, s1, [x2], #64        ; encoding: [0x4a,0x04,0x88,0x2c]
-; CHECK: stp  d10, d1, [x2], #16        ; encoding: [0x4a,0x04,0x81,0x6c]
-
-;-----------------------------------------------------------------------------
-; Load/Store pair (no-allocate)
-;-----------------------------------------------------------------------------
-
-  ldnp  w3, w2, [x15, #16]
-  ldnp  x4, x9, [sp, #-16]
-  ldnp  s10, s1, [x2, #64]
-  ldnp  d10, d1, [x2]
-
-; CHECK: ldnp  w3, w2, [x15, #16]       ; encoding: [0xe3,0x09,0x42,0x28]
-; CHECK: ldnp  x4, x9, [sp, #-16]       ; encoding: [0xe4,0x27,0x7f,0xa8]
-; CHECK: ldnp  s10, s1, [x2, #64]       ; encoding: [0x4a,0x04,0x48,0x2c]
-; CHECK: ldnp  d10, d1, [x2]            ; encoding: [0x4a,0x04,0x40,0x6c]
-
-  stnp  w3, w2, [x15, #16]
-  stnp  x4, x9, [sp, #-16]
-  stnp  s10, s1, [x2, #64]
-  stnp  d10, d1, [x2]
-
-; CHECK: stnp  w3, w2, [x15, #16]       ; encoding: [0xe3,0x09,0x02,0x28]
-; CHECK: stnp  x4, x9, [sp, #-16]       ; encoding: [0xe4,0x27,0x3f,0xa8]
-; CHECK: stnp  s10, s1, [x2, #64]       ; encoding: [0x4a,0x04,0x08,0x2c]
-; CHECK: stnp  d10, d1, [x2]            ; encoding: [0x4a,0x04,0x00,0x6c]
-
-;-----------------------------------------------------------------------------
-; Load/Store register offset
-;-----------------------------------------------------------------------------
-
-  ldr  w0, [x0, x0]
-  ldr  w0, [x0, x0, lsl #2]
-  ldr  x0, [x0, x0]
-  ldr  x0, [x0, x0, lsl #3]
-  ldr  x0, [x0, x0, sxtx]
-
-; CHECK: ldr  w0, [x0, x0]              ; encoding: [0x00,0x68,0x60,0xb8]
-; CHECK: ldr  w0, [x0, x0, lsl #2]      ; encoding: [0x00,0x78,0x60,0xb8]
-; CHECK: ldr  x0, [x0, x0]              ; encoding: [0x00,0x68,0x60,0xf8]
-; CHECK: ldr  x0, [x0, x0, lsl #3]      ; encoding: [0x00,0x78,0x60,0xf8]
-; CHECK: ldr  x0, [x0, x0, sxtx]        ; encoding: [0x00,0xe8,0x60,0xf8]
-
-  ldr  b1, [x1, x2]
-  ldr  b1, [x1, x2, lsl #0]
-  ldr  h1, [x1, x2]
-  ldr  h1, [x1, x2, lsl #1]
-  ldr  s1, [x1, x2]
-  ldr  s1, [x1, x2, lsl #2]
-  ldr  d1, [x1, x2]
-  ldr  d1, [x1, x2, lsl #3]
-  ldr  q1, [x1, x2]
-  ldr  q1, [x1, x2, lsl #4]
-
-; CHECK: ldr  b1, [x1, x2]              ; encoding: [0x21,0x68,0x62,0x3c]
-; CHECK: ldr  b1, [x1, x2, lsl #0]      ; encoding: [0x21,0x78,0x62,0x3c]
-; CHECK: ldr  h1, [x1, x2]              ; encoding: [0x21,0x68,0x62,0x7c]
-; CHECK: ldr  h1, [x1, x2, lsl #1]      ; encoding: [0x21,0x78,0x62,0x7c]
-; CHECK: ldr  s1, [x1, x2]              ; encoding: [0x21,0x68,0x62,0xbc]
-; CHECK: ldr  s1, [x1, x2, lsl #2]      ; encoding: [0x21,0x78,0x62,0xbc]
-; CHECK: ldr  d1, [x1, x2]              ; encoding: [0x21,0x68,0x62,0xfc]
-; CHECK: ldr  d1, [x1, x2, lsl #3]      ; encoding: [0x21,0x78,0x62,0xfc]
-; CHECK: ldr  q1, [x1, x2]              ; encoding: [0x21,0x68,0xe2,0x3c]
-; CHECK: ldr  q1, [x1, x2, lsl #4]      ; encoding: [0x21,0x78,0xe2,0x3c]
-
-  str  d1, [sp, x3]
-  str  d1, [sp, x3, uxtw #3]
-  str  q1, [sp, x3]
-  str  q1, [sp, x3, uxtw #4]
-
-; CHECK: str  d1, [sp, x3]              ; encoding: [0xe1,0x6b,0x23,0xfc]
-; CHECK: str  d1, [sp, x3, uxtw #3]     ; encoding: [0xe1,0x5b,0x23,0xfc]
-; CHECK: str  q1, [sp, x3]              ; encoding: [0xe1,0x6b,0xa3,0x3c]
-; CHECK: str  q1, [sp, x3, uxtw #4]     ; encoding: [0xe1,0x5b,0xa3,0x3c]
-
-;-----------------------------------------------------------------------------
-; Load literal
-;-----------------------------------------------------------------------------
-
-  ldr    w5, foo
-  ldr    x4, foo
-  ldrsw  x9, foo
-  prfm   #5, foo
-
-; CHECK: ldr    w5, foo                 ; encoding: [0bAAA00101,A,A,0x18]
-; CHECK: ldr    x4, foo                 ; encoding: [0bAAA00100,A,A,0x58]
-; CHECK: ldrsw  x9, foo                 ; encoding: [0bAAA01001,A,A,0x98]
-; CHECK: prfm   pldl3strm, foo          ; encoding: [0bAAA00101,A,A,0xd8]
-
-;-----------------------------------------------------------------------------
-; Load/Store exclusive
-;-----------------------------------------------------------------------------
-
-  ldxr   w6, [x1]
-  ldxr   x6, [x1]
-  ldxrb  w6, [x1]
-  ldxrh  w6, [x1]
-  ldxp   w7, w3, [x9]
-  ldxp   x7, x3, [x9]
-
-; CHECK: ldxrb  w6, [x1]                ; encoding: [0x26,0x7c,0x5f,0x08]
-; CHECK: ldxrh  w6, [x1]                ; encoding: [0x26,0x7c,0x5f,0x48]
-; CHECK: ldxp   w7, w3, [x9]            ; encoding: [0x27,0x0d,0x7f,0x88]
-; CHECK: ldxp   x7, x3, [x9]            ; encoding: [0x27,0x0d,0x7f,0xc8]
-
-  stxr   w1, x4, [x3]
-  stxr   w1, w4, [x3]
-  stxrb  w1, w4, [x3]
-  stxrh  w1, w4, [x3]
-  stxp   w1, x2, x6, [x1]
-  stxp   w1, w2, w6, [x1]
-
-; CHECK: stxr   w1, x4, [x3]            ; encoding: [0x64,0x7c,0x01,0xc8]
-; CHECK: stxr   w1, w4, [x3]            ; encoding: [0x64,0x7c,0x01,0x88]
-; CHECK: stxrb  w1, w4, [x3]            ; encoding: [0x64,0x7c,0x01,0x08]
-; CHECK: stxrh  w1, w4, [x3]            ; encoding: [0x64,0x7c,0x01,0x48]
-; CHECK: stxp   w1, x2, x6, [x1]        ; encoding: [0x22,0x18,0x21,0xc8]
-; CHECK: stxp   w1, w2, w6, [x1]        ; encoding: [0x22,0x18,0x21,0x88]
-
-;-----------------------------------------------------------------------------
-; Load-acquire/Store-release non-exclusive
-;-----------------------------------------------------------------------------
-
-  ldar   w4, [sp]
-  ldar   x4, [sp, #0]
-  ldarb  w4, [sp]
-  ldarh  w4, [sp]
-
-; CHECK: ldar   w4, [sp]                ; encoding: [0xe4,0xff,0xdf,0x88]
-; CHECK: ldar   x4, [sp]                ; encoding: [0xe4,0xff,0xdf,0xc8]
-; CHECK: ldarb  w4, [sp]                ; encoding: [0xe4,0xff,0xdf,0x08]
-; CHECK: ldarh  w4, [sp]                ; encoding: [0xe4,0xff,0xdf,0x48]
-
-  stlr   w3, [x6]
-  stlr   x3, [x6]
-  stlrb  w3, [x6]
-  stlrh  w3, [x6]
-
-; CHECK: stlr   w3, [x6]                ; encoding: [0xc3,0xfc,0x9f,0x88]
-; CHECK: stlr   x3, [x6]                ; encoding: [0xc3,0xfc,0x9f,0xc8]
-; CHECK: stlrb  w3, [x6]                ; encoding: [0xc3,0xfc,0x9f,0x08]
-; CHECK: stlrh  w3, [x6]                ; encoding: [0xc3,0xfc,0x9f,0x48]
-
-;-----------------------------------------------------------------------------
-; Load-acquire/Store-release exclusive
-;-----------------------------------------------------------------------------
-
-  ldaxr   w2, [x4]
-  ldaxr   x2, [x4]
-  ldaxrb  w2, [x4, #0]
-  ldaxrh  w2, [x4]
-  ldaxp   w2, w6, [x1]
-  ldaxp   x2, x6, [x1]
-
-; CHECK: ldaxr   w2, [x4]               ; encoding: [0x82,0xfc,0x5f,0x88]
-; CHECK: ldaxr   x2, [x4]               ; encoding: [0x82,0xfc,0x5f,0xc8]
-; CHECK: ldaxrb  w2, [x4]               ; encoding: [0x82,0xfc,0x5f,0x08]
-; CHECK: ldaxrh  w2, [x4]               ; encoding: [0x82,0xfc,0x5f,0x48]
-; CHECK: ldaxp   w2, w6, [x1]           ; encoding: [0x22,0x98,0x7f,0x88]
-; CHECK: ldaxp   x2, x6, [x1]           ; encoding: [0x22,0x98,0x7f,0xc8]
-
-  stlxr   w8, x7, [x1]
-  stlxr   w8, w7, [x1]
-  stlxrb  w8, w7, [x1]
-  stlxrh  w8, w7, [x1]
-  stlxp   w1, x2, x6, [x1]
-  stlxp   w1, w2, w6, [x1]
-
-; CHECK: stlxr  w8, x7, [x1]            ; encoding: [0x27,0xfc,0x08,0xc8]
-; CHECK: stlxr  w8, w7, [x1]            ; encoding: [0x27,0xfc,0x08,0x88]
-; CHECK: stlxrb w8, w7, [x1]            ; encoding: [0x27,0xfc,0x08,0x08]
-; CHECK: stlxrh w8, w7, [x1]            ; encoding: [0x27,0xfc,0x08,0x48]
-; CHECK: stlxp  w1, x2, x6, [x1]        ; encoding: [0x22,0x98,0x21,0xc8]
-; CHECK: stlxp  w1, w2, w6, [x1]        ; encoding: [0x22,0x98,0x21,0x88]
-
-
-;-----------------------------------------------------------------------------
-; LDUR/STUR aliases for negative and unaligned LDR/STR instructions.
-;
-; According to the ARM ISA documentation:
-; "A programmer-friendly assembler should also generate these instructions
-; in response to the standard LDR/STR mnemonics when the immediate offset is
-; unambiguous, i.e. negative or unaligned."
-;-----------------------------------------------------------------------------
-
-  ldr x11, [fp, #-8]
-  ldr x11, [fp, #7]
-  ldr w0, [x0, #2]
-  ldr w0, [x0, #-256]
-  ldr b2, [x1, #-2]
-  ldr h3, [x2, #3]
-  ldr h3, [x3, #-4]
-  ldr s3, [x4, #3]
-  ldr s3, [x5, #-4]
-  ldr d4, [x6, #4]
-  ldr d4, [x7, #-8]
-  ldr q5, [x8, #8]
-  ldr q5, [x9, #-16]
-
-; CHECK: ldur	x11, [fp, #-8]          ; encoding: [0xab,0x83,0x5f,0xf8]
-; CHECK: ldur	x11, [fp, #7]           ; encoding: [0xab,0x73,0x40,0xf8]
-; CHECK: ldur	w0, [x0, #2]            ; encoding: [0x00,0x20,0x40,0xb8]
-; CHECK: ldur	w0, [x0, #-256]         ; encoding: [0x00,0x00,0x50,0xb8]
-; CHECK: ldur	b2, [x1, #-2]           ; encoding: [0x22,0xe0,0x5f,0x3c]
-; CHECK: ldur	h3, [x2, #3]            ; encoding: [0x43,0x30,0x40,0x7c]
-; CHECK: ldur	h3, [x3, #-4]           ; encoding: [0x63,0xc0,0x5f,0x7c]
-; CHECK: ldur	s3, [x4, #3]            ; encoding: [0x83,0x30,0x40,0xbc]
-; CHECK: ldur	s3, [x5, #-4]           ; encoding: [0xa3,0xc0,0x5f,0xbc]
-; CHECK: ldur	d4, [x6, #4]            ; encoding: [0xc4,0x40,0x40,0xfc]
-; CHECK: ldur	d4, [x7, #-8]           ; encoding: [0xe4,0x80,0x5f,0xfc]
-; CHECK: ldur	q5, [x8, #8]            ; encoding: [0x05,0x81,0xc0,0x3c]
-; CHECK: ldur	q5, [x9, #-16]          ; encoding: [0x25,0x01,0xdf,0x3c]
-
-  str x11, [fp, #-8]
-  str x11, [fp, #7]
-  str w0, [x0, #2]
-  str w0, [x0, #-256]
-  str b2, [x1, #-2]
-  str h3, [x2, #3]
-  str h3, [x3, #-4]
-  str s3, [x4, #3]
-  str s3, [x5, #-4]
-  str d4, [x6, #4]
-  str d4, [x7, #-8]
-  str q5, [x8, #8]
-  str q5, [x9, #-16]
-
-; CHECK: stur	x11, [fp, #-8]          ; encoding: [0xab,0x83,0x1f,0xf8]
-; CHECK: stur	x11, [fp, #7]           ; encoding: [0xab,0x73,0x00,0xf8]
-; CHECK: stur	w0, [x0, #2]            ; encoding: [0x00,0x20,0x00,0xb8]
-; CHECK: stur	w0, [x0, #-256]         ; encoding: [0x00,0x00,0x10,0xb8]
-; CHECK: stur	b2, [x1, #-2]           ; encoding: [0x22,0xe0,0x1f,0x3c]
-; CHECK: stur	h3, [x2, #3]            ; encoding: [0x43,0x30,0x00,0x7c]
-; CHECK: stur	h3, [x3, #-4]           ; encoding: [0x63,0xc0,0x1f,0x7c]
-; CHECK: stur	s3, [x4, #3]            ; encoding: [0x83,0x30,0x00,0xbc]
-; CHECK: stur	s3, [x5, #-4]           ; encoding: [0xa3,0xc0,0x1f,0xbc]
-; CHECK: stur	d4, [x6, #4]            ; encoding: [0xc4,0x40,0x00,0xfc]
-; CHECK: stur	d4, [x7, #-8]           ; encoding: [0xe4,0x80,0x1f,0xfc]
-; CHECK: stur	q5, [x8, #8]            ; encoding: [0x05,0x81,0x80,0x3c]
-; CHECK: stur	q5, [x9, #-16]          ; encoding: [0x25,0x01,0x9f,0x3c]
-
-  ldrb w3, [x1, #-1]
-  ldrh w4, [x2, #1]
-  ldrh w5, [x3, #-1]
-  ldrsb w6, [x4, #-1]
-  ldrsb x7, [x5, #-1]
-  ldrsh w8, [x6, #1]
-  ldrsh w9, [x7, #-1]
-  ldrsh x1, [x8, #1]
-  ldrsh x2, [x9, #-1]
-  ldrsw x3, [x10, #10]
-  ldrsw x4, [x11, #-1]
-
-; CHECK: ldurb	w3, [x1, #-1]           ; encoding: [0x23,0xf0,0x5f,0x38]
-; CHECK: ldurh	w4, [x2, #1]            ; encoding: [0x44,0x10,0x40,0x78]
-; CHECK: ldurh	w5, [x3, #-1]           ; encoding: [0x65,0xf0,0x5f,0x78]
-; CHECK: ldursb	w6, [x4, #-1]           ; encoding: [0x86,0xf0,0xdf,0x38]
-; CHECK: ldursb	x7, [x5, #-1]           ; encoding: [0xa7,0xf0,0x9f,0x38]
-; CHECK: ldursh	w8, [x6, #1]            ; encoding: [0xc8,0x10,0xc0,0x78]
-; CHECK: ldursh	w9, [x7, #-1]           ; encoding: [0xe9,0xf0,0xdf,0x78]
-; CHECK: ldursh	x1, [x8, #1]            ; encoding: [0x01,0x11,0x80,0x78]
-; CHECK: ldursh	x2, [x9, #-1]           ; encoding: [0x22,0xf1,0x9f,0x78]
-; CHECK: ldursw	x3, [x10, #10]          ; encoding: [0x43,0xa1,0x80,0xb8]
-; CHECK: ldursw	x4, [x11, #-1]          ; encoding: [0x64,0xf1,0x9f,0xb8]
-
-  strb w3, [x1, #-1]
-  strh w4, [x2, #1]
-  strh w5, [x3, #-1]
-
-; CHECK: sturb	w3, [x1, #-1]           ; encoding: [0x23,0xf0,0x1f,0x38]
-; CHECK: sturh	w4, [x2, #1]            ; encoding: [0x44,0x10,0x00,0x78]
-; CHECK: sturh	w5, [x3, #-1]           ; encoding: [0x65,0xf0,0x1f,0x78]
diff --git a/test/MC/ARM64/separator.s b/test/MC/ARM64/separator.s
deleted file mode 100644
index 18f34b9..0000000
--- a/test/MC/ARM64/separator.s
+++ /dev/null
@@ -1,20 +0,0 @@
-; RUN: llvm-mc -triple arm64-apple-darwin -show-encoding < %s | FileCheck %s
-
-; ARM64 uses a multi-character statment separator, "%%". Check that we lex
-; it properly and recognize the multiple assembly statements on the line.
-
-; To make sure the output assembly correctly handled the instructions,
-; tell it to show encodings. That will result in the two 'mov' instructions
-; being on separate lines in the output. We look for the "; encoding" string
-; to verify that. For this test, we don't care what the encoding is, just that
-; there is one for each 'mov' instruction.
-
-
-_foo:
-; CHECK: foo
-; CHECK: mov x0, x1 ; encoding
-; CHECK: mov x1, x0 ; encoding
-	mov x0, x1 %% mov x1, x0
-	ret	lr
-
-
diff --git a/test/MC/ARM64/simd-ldst.s b/test/MC/ARM64/simd-ldst.s
deleted file mode 100644
index a754c72..0000000
--- a/test/MC/ARM64/simd-ldst.s
+++ /dev/null
@@ -1,2404 +0,0 @@
-; RUN: llvm-mc -triple arm64-apple-darwin -output-asm-variant=1 -show-encoding < %s | FileCheck %s
-
-_ld1st1_multiple:
-  ld1.8b {v0}, [x1]
-  ld1.8b {v0, v1}, [x1]
-  ld1.8b {v0, v1, v2}, [x1]
-  ld1.8b {v0, v1, v2, v3}, [x1]
-
-  ld1.8b {v3}, [x1]
-  ld1.8b {v3, v4}, [x2]
-  ld1.8b {v4, v5, v6}, [x3]
-  ld1.8b {v7, v8, v9, v10}, [x4]
-
-  ld1.16b {v0}, [x1]
-  ld1.16b {v0, v1}, [x1]
-  ld1.16b {v0, v1, v2}, [x1]
-  ld1.16b {v0, v1, v2, v3}, [x1]
-
-  ld1.4h {v0}, [x1]
-  ld1.4h {v0, v1}, [x1]
-  ld1.4h {v0, v1, v2}, [x1]
-  ld1.4h {v0, v1, v2, v3}, [x1]
-
-  ld1.8h {v0}, [x1]
-  ld1.8h {v0, v1}, [x1]
-  ld1.8h {v0, v1, v2}, [x1]
-  ld1.8h {v0, v1, v2, v3}, [x1]
-
-  ld1.2s {v0}, [x1]
-  ld1.2s {v0, v1}, [x1]
-  ld1.2s {v0, v1, v2}, [x1]
-  ld1.2s {v0, v1, v2, v3}, [x1]
-
-  ld1.4s {v0}, [x1]
-  ld1.4s {v0, v1}, [x1]
-  ld1.4s {v0, v1, v2}, [x1]
-  ld1.4s {v0, v1, v2, v3}, [x1]
-
-  ld1.1d {v0}, [x1]
-  ld1.1d {v0, v1}, [x1]
-  ld1.1d {v0, v1, v2}, [x1]
-  ld1.1d {v0, v1, v2, v3}, [x1]
-
-  ld1.2d {v0}, [x1]
-  ld1.2d {v0, v1}, [x1]
-  ld1.2d {v0, v1, v2}, [x1]
-  ld1.2d {v0, v1, v2, v3}, [x1]
-
-  st1.8b {v0}, [x1]
-  st1.8b {v0, v1}, [x1]
-  st1.8b {v0, v1, v2}, [x1]
-  st1.8b {v0, v1, v2, v3}, [x1]
-
-  st1.16b {v0}, [x1]
-  st1.16b {v0, v1}, [x1]
-  st1.16b {v0, v1, v2}, [x1]
-  st1.16b {v0, v1, v2, v3}, [x1]
-
-  st1.4h {v0}, [x1]
-  st1.4h {v0, v1}, [x1]
-  st1.4h {v0, v1, v2}, [x1]
-  st1.4h {v0, v1, v2, v3}, [x1]
-
-  st1.8h {v0}, [x1]
-  st1.8h {v0, v1}, [x1]
-  st1.8h {v0, v1, v2}, [x1]
-  st1.8h {v0, v1, v2, v3}, [x1]
-
-  st1.2s {v0}, [x1]
-  st1.2s {v0, v1}, [x1]
-  st1.2s {v0, v1, v2}, [x1]
-  st1.2s {v0, v1, v2, v3}, [x1]
-
-  st1.4s {v0}, [x1]
-  st1.4s {v0, v1}, [x1]
-  st1.4s {v0, v1, v2}, [x1]
-  st1.4s {v0, v1, v2, v3}, [x1]
-
-  st1.1d {v0}, [x1]
-  st1.1d {v0, v1}, [x1]
-  st1.1d {v0, v1, v2}, [x1]
-  st1.1d {v0, v1, v2, v3}, [x1]
-
-  st1.2d {v0}, [x1]
-  st1.2d {v0, v1}, [x1]
-  st1.2d {v0, v1, v2}, [x1]
-  st1.2d {v0, v1, v2, v3}, [x1]
-
-  st1.2d {v5}, [x1]
-  st1.2d {v7, v8}, [x10]
-  st1.2d {v11, v12, v13}, [x1]
-  st1.2d {v28, v29, v30, v31}, [x13]
-
-; CHECK: _ld1st1_multiple:
-; CHECK: ld1.8b	{ v0 }, [x1]            ; encoding: [0x20,0x70,0x40,0x0c]
-; CHECK: ld1.8b	{ v0, v1 }, [x1]        ; encoding: [0x20,0xa0,0x40,0x0c]
-; CHECK: ld1.8b	{ v0, v1, v2 }, [x1]    ; encoding: [0x20,0x60,0x40,0x0c]
-; CHECK: ld1.8b	{ v0, v1, v2, v3 }, [x1] ; encoding: [0x20,0x20,0x40,0x0c]
-
-; CHECK: ld1.8b { v3 }, [x1]            ; encoding: [0x23,0x70,0x40,0x0c]
-; CHECK: ld1.8b { v3, v4 }, [x2]        ; encoding: [0x43,0xa0,0x40,0x0c]
-; CHECK: ld1.8b { v4, v5, v6 }, [x3]    ; encoding: [0x64,0x60,0x40,0x0c]
-; CHECK: ld1.8b { v7, v8, v9, v10 }, [x4] ; encoding: [0x87,0x20,0x40,0x0c]
-
-; CHECK: ld1.16b	{ v0 }, [x1]            ; encoding: [0x20,0x70,0x40,0x4c]
-; CHECK: ld1.16b	{ v0, v1 }, [x1]        ; encoding: [0x20,0xa0,0x40,0x4c]
-; CHECK: ld1.16b	{ v0, v1, v2 }, [x1]    ; encoding: [0x20,0x60,0x40,0x4c]
-; CHECK: ld1.16b	{ v0, v1, v2, v3 }, [x1] ; encoding: [0x20,0x20,0x40,0x4c]
-
-; CHECK: ld1.4h	{ v0 }, [x1]            ; encoding: [0x20,0x74,0x40,0x0c]
-; CHECK: ld1.4h	{ v0, v1 }, [x1]        ; encoding: [0x20,0xa4,0x40,0x0c]
-; CHECK: ld1.4h	{ v0, v1, v2 }, [x1]    ; encoding: [0x20,0x64,0x40,0x0c]
-; CHECK: ld1.4h	{ v0, v1, v2, v3 }, [x1] ; encoding: [0x20,0x24,0x40,0x0c]
-
-; CHECK: ld1.8h	{ v0 }, [x1]            ; encoding: [0x20,0x74,0x40,0x4c]
-; CHECK: ld1.8h	{ v0, v1 }, [x1]        ; encoding: [0x20,0xa4,0x40,0x4c]
-; CHECK: ld1.8h	{ v0, v1, v2 }, [x1]    ; encoding: [0x20,0x64,0x40,0x4c]
-; CHECK: ld1.8h	{ v0, v1, v2, v3 }, [x1] ; encoding: [0x20,0x24,0x40,0x4c]
-
-; CHECK: ld1.2s	{ v0 }, [x1]            ; encoding: [0x20,0x78,0x40,0x0c]
-; CHECK: ld1.2s	{ v0, v1 }, [x1]        ; encoding: [0x20,0xa8,0x40,0x0c]
-; CHECK: ld1.2s	{ v0, v1, v2 }, [x1]    ; encoding: [0x20,0x68,0x40,0x0c]
-; CHECK: ld1.2s	{ v0, v1, v2, v3 }, [x1] ; encoding: [0x20,0x28,0x40,0x0c]
-
-; CHECK: ld1.4s	{ v0 }, [x1]            ; encoding: [0x20,0x78,0x40,0x4c]
-; CHECK: ld1.4s	{ v0, v1 }, [x1]        ; encoding: [0x20,0xa8,0x40,0x4c]
-; CHECK: ld1.4s	{ v0, v1, v2 }, [x1]    ; encoding: [0x20,0x68,0x40,0x4c]
-; CHECK: ld1.4s	{ v0, v1, v2, v3 }, [x1] ; encoding: [0x20,0x28,0x40,0x4c]
-
-; CHECK: ld1.1d	{ v0 }, [x1]            ; encoding: [0x20,0x7c,0x40,0x0c]
-; CHECK: ld1.1d	{ v0, v1 }, [x1]        ; encoding: [0x20,0xac,0x40,0x0c]
-; CHECK: ld1.1d	{ v0, v1, v2 }, [x1]    ; encoding: [0x20,0x6c,0x40,0x0c]
-; CHECK: ld1.1d	{ v0, v1, v2, v3 }, [x1] ; encoding: [0x20,0x2c,0x40,0x0c]
-
-; CHECK: ld1.2d	{ v0 }, [x1]            ; encoding: [0x20,0x7c,0x40,0x4c]
-; CHECK: ld1.2d	{ v0, v1 }, [x1]        ; encoding: [0x20,0xac,0x40,0x4c]
-; CHECK: ld1.2d	{ v0, v1, v2 }, [x1]    ; encoding: [0x20,0x6c,0x40,0x4c]
-; CHECK: ld1.2d	{ v0, v1, v2, v3 }, [x1] ; encoding: [0x20,0x2c,0x40,0x4c]
-
-
-; CHECK: st1.8b	{ v0 }, [x1]            ; encoding: [0x20,0x70,0x00,0x0c]
-; CHECK: st1.8b	{ v0, v1 }, [x1]        ; encoding: [0x20,0xa0,0x00,0x0c]
-; CHECK: st1.8b	{ v0, v1, v2 }, [x1]    ; encoding: [0x20,0x60,0x00,0x0c]
-; CHECK: st1.8b	{ v0, v1, v2, v3 }, [x1] ; encoding: [0x20,0x20,0x00,0x0c]
-
-; CHECK: st1.16b	{ v0 }, [x1]            ; encoding: [0x20,0x70,0x00,0x4c]
-; CHECK: st1.16b	{ v0, v1 }, [x1]        ; encoding: [0x20,0xa0,0x00,0x4c]
-; CHECK: st1.16b	{ v0, v1, v2 }, [x1]    ; encoding: [0x20,0x60,0x00,0x4c]
-; CHECK: st1.16b	{ v0, v1, v2, v3 }, [x1] ; encoding: [0x20,0x20,0x00,0x4c]
-
-; CHECK: st1.4h	{ v0 }, [x1]            ; encoding: [0x20,0x74,0x00,0x0c]
-; CHECK: st1.4h	{ v0, v1 }, [x1]        ; encoding: [0x20,0xa4,0x00,0x0c]
-; CHECK: st1.4h	{ v0, v1, v2 }, [x1]    ; encoding: [0x20,0x64,0x00,0x0c]
-; CHECK: st1.4h	{ v0, v1, v2, v3 }, [x1] ; encoding: [0x20,0x24,0x00,0x0c]
-
-; CHECK: st1.8h	{ v0 }, [x1]            ; encoding: [0x20,0x74,0x00,0x4c]
-; CHECK: st1.8h	{ v0, v1 }, [x1]        ; encoding: [0x20,0xa4,0x00,0x4c]
-; CHECK: st1.8h	{ v0, v1, v2 }, [x1]    ; encoding: [0x20,0x64,0x00,0x4c]
-; CHECK: st1.8h	{ v0, v1, v2, v3 }, [x1] ; encoding: [0x20,0x24,0x00,0x4c]
-
-; CHECK: st1.2s	{ v0 }, [x1]            ; encoding: [0x20,0x78,0x00,0x0c]
-; CHECK: st1.2s	{ v0, v1 }, [x1]        ; encoding: [0x20,0xa8,0x00,0x0c]
-; CHECK: st1.2s	{ v0, v1, v2 }, [x1]    ; encoding: [0x20,0x68,0x00,0x0c]
-; CHECK: st1.2s	{ v0, v1, v2, v3 }, [x1] ; encoding: [0x20,0x28,0x00,0x0c]
-
-; CHECK: st1.4s	{ v0 }, [x1]            ; encoding: [0x20,0x78,0x00,0x4c]
-; CHECK: st1.4s	{ v0, v1 }, [x1]        ; encoding: [0x20,0xa8,0x00,0x4c]
-; CHECK: st1.4s	{ v0, v1, v2 }, [x1]    ; encoding: [0x20,0x68,0x00,0x4c]
-; CHECK: st1.4s	{ v0, v1, v2, v3 }, [x1] ; encoding: [0x20,0x28,0x00,0x4c]
-
-; CHECK: st1.1d	{ v0 }, [x1]            ; encoding: [0x20,0x7c,0x00,0x0c]
-; CHECK: st1.1d	{ v0, v1 }, [x1]        ; encoding: [0x20,0xac,0x00,0x0c]
-; CHECK: st1.1d	{ v0, v1, v2 }, [x1]    ; encoding: [0x20,0x6c,0x00,0x0c]
-; CHECK: st1.1d	{ v0, v1, v2, v3 }, [x1] ; encoding: [0x20,0x2c,0x00,0x0c]
-
-; CHECK: st1.2d	{ v0 }, [x1]            ; encoding: [0x20,0x7c,0x00,0x4c]
-; CHECK: st1.2d	{ v0, v1 }, [x1]        ; encoding: [0x20,0xac,0x00,0x4c]
-; CHECK: st1.2d	{ v0, v1, v2 }, [x1]    ; encoding: [0x20,0x6c,0x00,0x4c]
-; CHECK: st1.2d	{ v0, v1, v2, v3 }, [x1] ; encoding: [0x20,0x2c,0x00,0x4c]
-
-; CHECK: st1.2d { v5 }, [x1]            ; encoding: [0x25,0x7c,0x00,0x4c]
-; CHECK: st1.2d { v7, v8 }, [x10]       ; encoding: [0x47,0xad,0x00,0x4c]
-; CHECK: st1.2d { v11, v12, v13 }, [x1] ; encoding: [0x2b,0x6c,0x00,0x4c]
-; CHECK: st1.2d { v28, v29, v30, v31 }, [x13] ; encoding: [0xbc,0x2d,0x00,0x4c]
-
-_ld2st2_multiple:
-  ld2.8b {v4, v5}, [x19]
-  ld2.16b {v4, v5}, [x19]
-  ld2.4h {v4, v5}, [x19]
-  ld2.8h {v4, v5}, [x19]
-  ld2.2s {v4, v5}, [x19]
-  ld2.4s {v4, v5}, [x19]
-  ld2.2d {v4, v5}, [x19]
-
-  st2.8b {v4, v5}, [x19]
-  st2.16b {v4, v5}, [x19]
-  st2.4h {v4, v5}, [x19]
-  st2.8h {v4, v5}, [x19]
-  st2.2s {v4, v5}, [x19]
-  st2.4s {v4, v5}, [x19]
-  st2.2d {v4, v5}, [x19]
-
-
-; CHECK: _ld2st2_multiple
-; CHECK: ld2.8b { v4, v5 }, [x19]       ; encoding: [0x64,0x82,0x40,0x0c]
-; CHECK: ld2.16b { v4, v5 }, [x19]      ; encoding: [0x64,0x82,0x40,0x4c]
-; CHECK: ld2.4h { v4, v5 }, [x19]       ; encoding: [0x64,0x86,0x40,0x0c]
-; CHECK: ld2.8h { v4, v5 }, [x19]       ; encoding: [0x64,0x86,0x40,0x4c]
-; CHECK: ld2.2s { v4, v5 }, [x19]       ; encoding: [0x64,0x8a,0x40,0x0c]
-; CHECK: ld2.4s { v4, v5 }, [x19]       ; encoding: [0x64,0x8a,0x40,0x4c]
-; CHECK: ld2.2d { v4, v5 }, [x19]       ; encoding: [0x64,0x8e,0x40,0x4c]
-
-; CHECK: st2.8b { v4, v5 }, [x19]       ; encoding: [0x64,0x82,0x00,0x0c]
-; CHECK: st2.16b { v4, v5 }, [x19]      ; encoding: [0x64,0x82,0x00,0x4c]
-; CHECK: st2.4h { v4, v5 }, [x19]       ; encoding: [0x64,0x86,0x00,0x0c]
-; CHECK: st2.8h { v4, v5 }, [x19]       ; encoding: [0x64,0x86,0x00,0x4c]
-; CHECK: st2.2s { v4, v5 }, [x19]       ; encoding: [0x64,0x8a,0x00,0x0c]
-; CHECK: st2.4s { v4, v5 }, [x19]       ; encoding: [0x64,0x8a,0x00,0x4c]
-; CHECK: st2.2d { v4, v5 }, [x19]       ; encoding: [0x64,0x8e,0x00,0x4c]
-
-
-ld3st3_multiple:
-    ld3.8b {v4, v5, v6}, [x19]
-    ld3.16b {v4, v5, v6}, [x19]
-    ld3.4h {v4, v5, v6}, [x19]
-    ld3.8h {v4, v5, v6}, [x19]
-    ld3.2s {v4, v5, v6}, [x19]
-    ld3.4s {v4, v5, v6}, [x19]
-    ld3.2d {v4, v5, v6}, [x19]
-
-    ld3.8b {v9, v10, v11}, [x9]
-    ld3.16b {v14, v15, v16}, [x19]
-    ld3.4h {v24, v25, v26}, [x29]
-    ld3.8h {v30, v31, v0}, [x9]
-    ld3.2s {v2, v3, v4}, [x19]
-    ld3.4s {v4, v5, v6}, [x29]
-    ld3.2d {v7, v8, v9}, [x9]
-
-    st3.8b {v4, v5, v6}, [x19]
-    st3.16b {v4, v5, v6}, [x19]
-    st3.4h {v4, v5, v6}, [x19]
-    st3.8h {v4, v5, v6}, [x19]
-    st3.2s {v4, v5, v6}, [x19]
-    st3.4s {v4, v5, v6}, [x19]
-    st3.2d {v4, v5, v6}, [x19]
-
-    st3.8b {v10, v11, v12}, [x9]
-    st3.16b {v14, v15, v16}, [x19]
-    st3.4h {v24, v25, v26}, [x29]
-    st3.8h {v30, v31, v0}, [x9]
-    st3.2s {v2, v3, v4}, [x19]
-    st3.4s {v7, v8, v9}, [x29]
-    st3.2d {v4, v5, v6}, [x9]
-
-; CHECK: ld3st3_multiple:
-; CHECK: ld3.8b { v4, v5, v6 }, [x19]   ; encoding: [0x64,0x42,0x40,0x0c]
-; CHECK: ld3.16b { v4, v5, v6 }, [x19]   ; encoding: [0x64,0x42,0x40,0x4c]
-; CHECK: ld3.4h { v4, v5, v6 }, [x19]   ; encoding: [0x64,0x46,0x40,0x0c]
-; CHECK: ld3.8h { v4, v5, v6 }, [x19]   ; encoding: [0x64,0x46,0x40,0x4c]
-; CHECK: ld3.2s { v4, v5, v6 }, [x19]   ; encoding: [0x64,0x4a,0x40,0x0c]
-; CHECK: ld3.4s { v4, v5, v6 }, [x19]   ; encoding: [0x64,0x4a,0x40,0x4c]
-; CHECK: ld3.2d { v4, v5, v6 }, [x19]   ; encoding: [0x64,0x4e,0x40,0x4c]
-
-; CHECK: ld3.8b { v9, v10, v11 }, [x9]  ; encoding: [0x29,0x41,0x40,0x0c]
-; CHECK: ld3.16b { v14, v15, v16 }, [x19] ; encoding: [0x6e,0x42,0x40,0x4c]
-; CHECK: ld3.4h { v24, v25, v26 }, [fp] ; encoding: [0xb8,0x47,0x40,0x0c]
-; CHECK: ld3.8h { v30, v31, v0 }, [x9]  ; encoding: [0x3e,0x45,0x40,0x4c]
-; CHECK: ld3.2s { v2, v3, v4 }, [x19]   ; encoding: [0x62,0x4a,0x40,0x0c]
-; CHECK: ld3.4s { v4, v5, v6 }, [fp]    ; encoding: [0xa4,0x4b,0x40,0x4c]
-; CHECK: ld3.2d { v7, v8, v9 }, [x9]    ; encoding: [0x27,0x4d,0x40,0x4c]
-
-; CHECK: st3.8b { v4, v5, v6 }, [x19]   ; encoding: [0x64,0x42,0x00,0x0c]
-; CHECK: st3.16b { v4, v5, v6 }, [x19]   ; encoding: [0x64,0x42,0x00,0x4c]
-; CHECK: st3.4h { v4, v5, v6 }, [x19]   ; encoding: [0x64,0x46,0x00,0x0c]
-; CHECK: st3.8h { v4, v5, v6 }, [x19]   ; encoding: [0x64,0x46,0x00,0x4c]
-; CHECK: st3.2s { v4, v5, v6 }, [x19]   ; encoding: [0x64,0x4a,0x00,0x0c]
-; CHECK: st3.4s { v4, v5, v6 }, [x19]   ; encoding: [0x64,0x4a,0x00,0x4c]
-; CHECK: st3.2d { v4, v5, v6 }, [x19]   ; encoding: [0x64,0x4e,0x00,0x4c]
-
-; CHECK: st3.8b { v10, v11, v12 }, [x9] ; encoding: [0x2a,0x41,0x00,0x0c]
-; CHECK: st3.16b { v14, v15, v16 }, [x19] ; encoding: [0x6e,0x42,0x00,0x4c]
-; CHECK: st3.4h { v24, v25, v26 }, [fp] ; encoding: [0xb8,0x47,0x00,0x0c]
-; CHECK: st3.8h { v30, v31, v0 }, [x9]  ; encoding: [0x3e,0x45,0x00,0x4c]
-; CHECK: st3.2s { v2, v3, v4 }, [x19]   ; encoding: [0x62,0x4a,0x00,0x0c]
-; CHECK: st3.4s { v7, v8, v9 }, [fp]    ; encoding: [0xa7,0x4b,0x00,0x4c]
-; CHECK: st3.2d { v4, v5, v6 }, [x9]    ; encoding: [0x24,0x4d,0x00,0x4c]
-
-ld4st4_multiple:
-    ld4.8b {v4, v5, v6, v7}, [x19]
-    ld4.16b {v4, v5, v6, v7}, [x19]
-    ld4.4h {v4, v5, v6, v7}, [x19]
-    ld4.8h {v4, v5, v6, v7}, [x19]
-    ld4.2s {v4, v5, v6, v7}, [x19]
-    ld4.4s {v4, v5, v6, v7}, [x19]
-    ld4.2d {v4, v5, v6, v7}, [x19]
-
-    st4.8b {v4, v5, v6, v7}, [x19]
-    st4.16b {v4, v5, v6, v7}, [x19]
-    st4.4h {v4, v5, v6, v7}, [x19]
-    st4.8h {v4, v5, v6, v7}, [x19]
-    st4.2s {v4, v5, v6, v7}, [x19]
-    st4.4s {v4, v5, v6, v7}, [x19]
-    st4.2d {v4, v5, v6, v7}, [x19]
-
-; CHECK: ld4st4_multiple:
-; CHECK: ld4.8b { v4, v5, v6, v7 }, [x19] ; encoding: [0x64,0x02,0x40,0x0c]
-; CHECK: ld4.16b { v4, v5, v6, v7 }, [x19] ; encoding: [0x64,0x02,0x40,0x4c]
-; CHECK: ld4.4h { v4, v5, v6, v7 }, [x19] ; encoding: [0x64,0x06,0x40,0x0c]
-; CHECK: ld4.8h { v4, v5, v6, v7 }, [x19] ; encoding: [0x64,0x06,0x40,0x4c]
-; CHECK: ld4.2s { v4, v5, v6, v7 }, [x19] ; encoding: [0x64,0x0a,0x40,0x0c]
-; CHECK: ld4.4s { v4, v5, v6, v7 }, [x19] ; encoding: [0x64,0x0a,0x40,0x4c]
-; CHECK: ld4.2d { v4, v5, v6, v7 }, [x19] ; encoding: [0x64,0x0e,0x40,0x4c]
-
-; CHECK: st4.8b { v4, v5, v6, v7 }, [x19] ; encoding: [0x64,0x02,0x00,0x0c]
-; CHECK: st4.16b { v4, v5, v6, v7 }, [x19] ; encoding: [0x64,0x02,0x00,0x4c]
-; CHECK: st4.4h { v4, v5, v6, v7 }, [x19] ; encoding: [0x64,0x06,0x00,0x0c]
-; CHECK: st4.8h { v4, v5, v6, v7 }, [x19] ; encoding: [0x64,0x06,0x00,0x4c]
-; CHECK: st4.2s { v4, v5, v6, v7 }, [x19] ; encoding: [0x64,0x0a,0x00,0x0c]
-; CHECK: st4.4s { v4, v5, v6, v7 }, [x19] ; encoding: [0x64,0x0a,0x00,0x4c]
-; CHECK: st4.2d { v4, v5, v6, v7 }, [x19] ; encoding: [0x64,0x0e,0x00,0x4c]
-
-;-----------------------------------------------------------------------------
-; Post-increment versions.
-;-----------------------------------------------------------------------------
-
-_ld1st1_multiple_post:
-  ld1.8b {v0}, [x1], x15
-  ld1.8b {v0, v1}, [x1], x15
-  ld1.8b {v0, v1, v2}, [x1], x15
-  ld1.8b {v0, v1, v2, v3}, [x1], x15
-
-  ld1.16b {v0}, [x1], x15
-  ld1.16b {v0, v1}, [x1], x15
-  ld1.16b {v0, v1, v2}, [x1], x15
-  ld1.16b {v0, v1, v2, v3}, [x1], x15
-
-  ld1.4h {v0}, [x1], x15
-  ld1.4h {v0, v1}, [x1], x15
-  ld1.4h {v0, v1, v2}, [x1], x15
-  ld1.4h {v0, v1, v2, v3}, [x1], x15
-
-  ld1.8h {v0}, [x1], x15
-  ld1.8h {v0, v1}, [x1], x15
-  ld1.8h {v0, v1, v2}, [x1], x15
-  ld1.8h {v0, v1, v2, v3}, [x1], x15
-
-  ld1.2s {v0}, [x1], x15
-  ld1.2s {v0, v1}, [x1], x15
-  ld1.2s {v0, v1, v2}, [x1], x15
-  ld1.2s {v0, v1, v2, v3}, [x1], x15
-
-  ld1.4s {v0}, [x1], x15
-  ld1.4s {v0, v1}, [x1], x15
-  ld1.4s {v0, v1, v2}, [x1], x15
-  ld1.4s {v0, v1, v2, v3}, [x1], x15
-
-  ld1.1d {v0}, [x1], x15
-  ld1.1d {v0, v1}, [x1], x15
-  ld1.1d {v0, v1, v2}, [x1], x15
-  ld1.1d {v0, v1, v2, v3}, [x1], x15
-
-  ld1.2d {v0}, [x1], x15
-  ld1.2d {v0, v1}, [x1], x15
-  ld1.2d {v0, v1, v2}, [x1], x15
-  ld1.2d {v0, v1, v2, v3}, [x1], x15
-
-  st1.8b {v0}, [x1], x15
-  st1.8b {v0, v1}, [x1], x15
-  st1.8b {v0, v1, v2}, [x1], x15
-  st1.8b {v0, v1, v2, v3}, [x1], x15
-
-  st1.16b {v0}, [x1], x15
-  st1.16b {v0, v1}, [x1], x15
-  st1.16b {v0, v1, v2}, [x1], x15
-  st1.16b {v0, v1, v2, v3}, [x1], x15
-
-  st1.4h {v0}, [x1], x15
-  st1.4h {v0, v1}, [x1], x15
-  st1.4h {v0, v1, v2}, [x1], x15
-  st1.4h {v0, v1, v2, v3}, [x1], x15
-
-  st1.8h {v0}, [x1], x15
-  st1.8h {v0, v1}, [x1], x15
-  st1.8h {v0, v1, v2}, [x1], x15
-  st1.8h {v0, v1, v2, v3}, [x1], x15
-
-  st1.2s {v0}, [x1], x15
-  st1.2s {v0, v1}, [x1], x15
-  st1.2s {v0, v1, v2}, [x1], x15
-  st1.2s {v0, v1, v2, v3}, [x1], x15
-
-  st1.4s {v0}, [x1], x15
-  st1.4s {v0, v1}, [x1], x15
-  st1.4s {v0, v1, v2}, [x1], x15
-  st1.4s {v0, v1, v2, v3}, [x1], x15
-
-  st1.1d {v0}, [x1], x15
-  st1.1d {v0, v1}, [x1], x15
-  st1.1d {v0, v1, v2}, [x1], x15
-  st1.1d {v0, v1, v2, v3}, [x1], x15
-
-  st1.2d {v0}, [x1], x15
-  st1.2d {v0, v1}, [x1], x15
-  st1.2d {v0, v1, v2}, [x1], x15
-  st1.2d {v0, v1, v2, v3}, [x1], x15
-
-  ld1.8b {v0}, [x1], #8
-  ld1.8b {v0, v1}, [x1], #16
-  ld1.8b {v0, v1, v2}, [x1], #24
-  ld1.8b {v0, v1, v2, v3}, [x1], #32
-
-  ld1.16b {v0}, [x1], #16
-  ld1.16b {v0, v1}, [x1], #32
-  ld1.16b {v0, v1, v2}, [x1], #48
-  ld1.16b {v0, v1, v2, v3}, [x1], #64
-
-  ld1.4h {v0}, [x1], #8
-  ld1.4h {v0, v1}, [x1], #16
-  ld1.4h {v0, v1, v2}, [x1], #24
-  ld1.4h {v0, v1, v2, v3}, [x1], #32
-
-  ld1.8h {v0}, [x1], #16
-  ld1.8h {v0, v1}, [x1], #32
-  ld1.8h {v0, v1, v2}, [x1], #48
-  ld1.8h {v0, v1, v2, v3}, [x1], #64
-
-  ld1.2s {v0}, [x1], #8
-  ld1.2s {v0, v1}, [x1], #16
-  ld1.2s {v0, v1, v2}, [x1], #24
-  ld1.2s {v0, v1, v2, v3}, [x1], #32
-
-  ld1.4s {v0}, [x1], #16
-  ld1.4s {v0, v1}, [x1], #32
-  ld1.4s {v0, v1, v2}, [x1], #48
-  ld1.4s {v0, v1, v2, v3}, [x1], #64
-
-  ld1.1d {v0}, [x1], #8
-  ld1.1d {v0, v1}, [x1], #16
-  ld1.1d {v0, v1, v2}, [x1], #24
-  ld1.1d {v0, v1, v2, v3}, [x1], #32
-
-  ld1.2d {v0}, [x1], #16
-  ld1.2d {v0, v1}, [x1], #32
-  ld1.2d {v0, v1, v2}, [x1], #48
-  ld1.2d {v0, v1, v2, v3}, [x1], #64
-
-  st1.8b {v0}, [x1], #8
-  st1.8b {v0, v1}, [x1], #16
-  st1.8b {v0, v1, v2}, [x1], #24
-  st1.8b {v0, v1, v2, v3}, [x1], #32
-
-  st1.16b {v0}, [x1], #16
-  st1.16b {v0, v1}, [x1], #32
-  st1.16b {v0, v1, v2}, [x1], #48
-  st1.16b {v0, v1, v2, v3}, [x1], #64
-
-  st1.4h {v0}, [x1], #8
-  st1.4h {v0, v1}, [x1], #16
-  st1.4h {v0, v1, v2}, [x1], #24
-  st1.4h {v0, v1, v2, v3}, [x1], #32
-
-  st1.8h {v0}, [x1], #16
-  st1.8h {v0, v1}, [x1], #32
-  st1.8h {v0, v1, v2}, [x1], #48
-  st1.8h {v0, v1, v2, v3}, [x1], #64
-
-  st1.2s {v0}, [x1], #8
-  st1.2s {v0, v1}, [x1], #16
-  st1.2s {v0, v1, v2}, [x1], #24
-  st1.2s {v0, v1, v2, v3}, [x1], #32
-
-  st1.4s {v0}, [x1], #16
-  st1.4s {v0, v1}, [x1], #32
-  st1.4s {v0, v1, v2}, [x1], #48
-  st1.4s {v0, v1, v2, v3}, [x1], #64
-
-  st1.1d {v0}, [x1], #8
-  st1.1d {v0, v1}, [x1], #16
-  st1.1d {v0, v1, v2}, [x1], #24
-  st1.1d {v0, v1, v2, v3}, [x1], #32
-
-  st1.2d {v0}, [x1], #16
-  st1.2d {v0, v1}, [x1], #32
-  st1.2d {v0, v1, v2}, [x1], #48
-  st1.2d {v0, v1, v2, v3}, [x1], #64
-
-; CHECK: ld1st1_multiple_post:
-; CHECK: ld1.8b { v0 }, [x1], x15       ; encoding: [0x20,0x70,0xcf,0x0c]
-; CHECK: ld1.8b { v0, v1 }, [x1], x15   ; encoding: [0x20,0xa0,0xcf,0x0c]
-; CHECK: ld1.8b { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x60,0xcf,0x0c]
-; CHECK: ld1.8b { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x20,0xcf,0x0c]
-
-; CHECK: ld1.16b { v0 }, [x1], x15       ; encoding: [0x20,0x70,0xcf,0x4c]
-; CHECK: ld1.16b { v0, v1 }, [x1], x15   ; encoding: [0x20,0xa0,0xcf,0x4c]
-; CHECK: ld1.16b { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x60,0xcf,0x4c]
-; CHECK: ld1.16b { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x20,0xcf,0x4c]
-
-; CHECK: ld1.4h { v0 }, [x1], x15       ; encoding: [0x20,0x74,0xcf,0x0c]
-; CHECK: ld1.4h { v0, v1 }, [x1], x15   ; encoding: [0x20,0xa4,0xcf,0x0c]
-; CHECK: ld1.4h { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x64,0xcf,0x0c]
-; CHECK: ld1.4h { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x24,0xcf,0x0c]
-
-; CHECK: ld1.8h { v0 }, [x1], x15       ; encoding: [0x20,0x74,0xcf,0x4c]
-; CHECK: ld1.8h { v0, v1 }, [x1], x15   ; encoding: [0x20,0xa4,0xcf,0x4c]
-; CHECK: ld1.8h { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x64,0xcf,0x4c]
-; CHECK: ld1.8h { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x24,0xcf,0x4c]
-
-; CHECK: ld1.2s { v0 }, [x1], x15       ; encoding: [0x20,0x78,0xcf,0x0c]
-; CHECK: ld1.2s { v0, v1 }, [x1], x15   ; encoding: [0x20,0xa8,0xcf,0x0c]
-; CHECK: ld1.2s { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x68,0xcf,0x0c]
-; CHECK: ld1.2s { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x28,0xcf,0x0c]
-
-; CHECK: ld1.4s { v0 }, [x1], x15       ; encoding: [0x20,0x78,0xcf,0x4c]
-; CHECK: ld1.4s { v0, v1 }, [x1], x15   ; encoding: [0x20,0xa8,0xcf,0x4c]
-; CHECK: ld1.4s { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x68,0xcf,0x4c]
-; CHECK: ld1.4s { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x28,0xcf,0x4c]
-
-; CHECK: ld1.1d { v0 }, [x1], x15       ; encoding: [0x20,0x7c,0xcf,0x0c]
-; CHECK: ld1.1d { v0, v1 }, [x1], x15   ; encoding: [0x20,0xac,0xcf,0x0c]
-; CHECK: ld1.1d { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x6c,0xcf,0x0c]
-; CHECK: ld1.1d { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x2c,0xcf,0x0c]
-
-; CHECK: ld1.2d { v0 }, [x1], x15       ; encoding: [0x20,0x7c,0xcf,0x4c]
-; CHECK: ld1.2d { v0, v1 }, [x1], x15   ; encoding: [0x20,0xac,0xcf,0x4c]
-; CHECK: ld1.2d { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x6c,0xcf,0x4c]
-; CHECK: ld1.2d { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x2c,0xcf,0x4c]
-
-; CHECK: st1.8b { v0 }, [x1], x15       ; encoding: [0x20,0x70,0x8f,0x0c]
-; CHECK: st1.8b { v0, v1 }, [x1], x15   ; encoding: [0x20,0xa0,0x8f,0x0c]
-; CHECK: st1.8b { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x60,0x8f,0x0c]
-; CHECK: st1.8b { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x20,0x8f,0x0c]
-
-; CHECK: st1.16b { v0 }, [x1], x15       ; encoding: [0x20,0x70,0x8f,0x4c]
-; CHECK: st1.16b { v0, v1 }, [x1], x15   ; encoding: [0x20,0xa0,0x8f,0x4c]
-; CHECK: st1.16b { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x60,0x8f,0x4c]
-; CHECK: st1.16b { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x20,0x8f,0x4c]
-
-; CHECK: st1.4h { v0 }, [x1], x15       ; encoding: [0x20,0x74,0x8f,0x0c]
-; CHECK: st1.4h { v0, v1 }, [x1], x15   ; encoding: [0x20,0xa4,0x8f,0x0c]
-; CHECK: st1.4h { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x64,0x8f,0x0c]
-; CHECK: st1.4h { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x24,0x8f,0x0c]
-
-; CHECK: st1.8h { v0 }, [x1], x15       ; encoding: [0x20,0x74,0x8f,0x4c]
-; CHECK: st1.8h { v0, v1 }, [x1], x15   ; encoding: [0x20,0xa4,0x8f,0x4c]
-; CHECK: st1.8h { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x64,0x8f,0x4c]
-; CHECK: st1.8h { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x24,0x8f,0x4c]
-
-; CHECK: st1.2s { v0 }, [x1], x15       ; encoding: [0x20,0x78,0x8f,0x0c]
-; CHECK: st1.2s { v0, v1 }, [x1], x15   ; encoding: [0x20,0xa8,0x8f,0x0c]
-; CHECK: st1.2s { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x68,0x8f,0x0c]
-; CHECK: st1.2s { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x28,0x8f,0x0c]
-
-; CHECK: st1.4s { v0 }, [x1], x15       ; encoding: [0x20,0x78,0x8f,0x4c]
-; CHECK: st1.4s { v0, v1 }, [x1], x15   ; encoding: [0x20,0xa8,0x8f,0x4c]
-; CHECK: st1.4s { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x68,0x8f,0x4c]
-; CHECK: st1.4s { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x28,0x8f,0x4c]
-
-; CHECK: st1.1d { v0 }, [x1], x15       ; encoding: [0x20,0x7c,0x8f,0x0c]
-; CHECK: st1.1d { v0, v1 }, [x1], x15   ; encoding: [0x20,0xac,0x8f,0x0c]
-; CHECK: st1.1d { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x6c,0x8f,0x0c]
-; CHECK: st1.1d { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x2c,0x8f,0x0c]
-
-; CHECK: st1.2d { v0 }, [x1], x15       ; encoding: [0x20,0x7c,0x8f,0x4c]
-; CHECK: st1.2d { v0, v1 }, [x1], x15   ; encoding: [0x20,0xac,0x8f,0x4c]
-; CHECK: st1.2d { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x6c,0x8f,0x4c]
-; CHECK: st1.2d { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x2c,0x8f,0x4c]
-
-; CHECK: ld1.8b { v0 }, [x1], #8       ; encoding: [0x20,0x70,0xdf,0x0c]
-; CHECK: ld1.8b { v0, v1 }, [x1], #16   ; encoding: [0x20,0xa0,0xdf,0x0c]
-; CHECK: ld1.8b { v0, v1, v2 }, [x1], #24 ; encoding: [0x20,0x60,0xdf,0x0c]
-; CHECK: ld1.8b { v0, v1, v2, v3 }, [x1], #32 ; encoding: [0x20,0x20,0xdf,0x0c]
-
-; CHECK: ld1.16b { v0 }, [x1], #16       ; encoding: [0x20,0x70,0xdf,0x4c]
-; CHECK: ld1.16b { v0, v1 }, [x1], #32   ; encoding: [0x20,0xa0,0xdf,0x4c]
-; CHECK: ld1.16b { v0, v1, v2 }, [x1], #48 ; encoding: [0x20,0x60,0xdf,0x4c]
-; CHECK: ld1.16b { v0, v1, v2, v3 }, [x1], #64 ; encoding: [0x20,0x20,0xdf,0x4c]
-
-; CHECK: ld1.4h { v0 }, [x1], #8       ; encoding: [0x20,0x74,0xdf,0x0c]
-; CHECK: ld1.4h { v0, v1 }, [x1], #16   ; encoding: [0x20,0xa4,0xdf,0x0c]
-; CHECK: ld1.4h { v0, v1, v2 }, [x1], #24 ; encoding: [0x20,0x64,0xdf,0x0c]
-; CHECK: ld1.4h { v0, v1, v2, v3 }, [x1], #32 ; encoding: [0x20,0x24,0xdf,0x0c]
-
-; CHECK: ld1.8h { v0 }, [x1], #16       ; encoding: [0x20,0x74,0xdf,0x4c]
-; CHECK: ld1.8h { v0, v1 }, [x1], #32   ; encoding: [0x20,0xa4,0xdf,0x4c]
-; CHECK: ld1.8h { v0, v1, v2 }, [x1], #48 ; encoding: [0x20,0x64,0xdf,0x4c]
-; CHECK: ld1.8h { v0, v1, v2, v3 }, [x1], #64 ; encoding: [0x20,0x24,0xdf,0x4c]
-
-; CHECK: ld1.2s { v0 }, [x1], #8       ; encoding: [0x20,0x78,0xdf,0x0c]
-; CHECK: ld1.2s { v0, v1 }, [x1], #16   ; encoding: [0x20,0xa8,0xdf,0x0c]
-; CHECK: ld1.2s { v0, v1, v2 }, [x1], #24 ; encoding: [0x20,0x68,0xdf,0x0c]
-; CHECK: ld1.2s { v0, v1, v2, v3 }, [x1], #32 ; encoding: [0x20,0x28,0xdf,0x0c]
-
-; CHECK: ld1.4s { v0 }, [x1], #16       ; encoding: [0x20,0x78,0xdf,0x4c]
-; CHECK: ld1.4s { v0, v1 }, [x1], #32   ; encoding: [0x20,0xa8,0xdf,0x4c]
-; CHECK: ld1.4s { v0, v1, v2 }, [x1], #48 ; encoding: [0x20,0x68,0xdf,0x4c]
-; CHECK: ld1.4s { v0, v1, v2, v3 }, [x1], #64 ; encoding: [0x20,0x28,0xdf,0x4c]
-
-; CHECK: ld1.1d { v0 }, [x1], #8       ; encoding: [0x20,0x7c,0xdf,0x0c]
-; CHECK: ld1.1d { v0, v1 }, [x1], #16   ; encoding: [0x20,0xac,0xdf,0x0c]
-; CHECK: ld1.1d { v0, v1, v2 }, [x1], #24 ; encoding: [0x20,0x6c,0xdf,0x0c]
-; CHECK: ld1.1d { v0, v1, v2, v3 }, [x1], #32 ; encoding: [0x20,0x2c,0xdf,0x0c]
-
-; CHECK: ld1.2d { v0 }, [x1], #16       ; encoding: [0x20,0x7c,0xdf,0x4c]
-; CHECK: ld1.2d { v0, v1 }, [x1], #32   ; encoding: [0x20,0xac,0xdf,0x4c]
-; CHECK: ld1.2d { v0, v1, v2 }, [x1], #48 ; encoding: [0x20,0x6c,0xdf,0x4c]
-; CHECK: ld1.2d { v0, v1, v2, v3 }, [x1], #64 ; encoding: [0x20,0x2c,0xdf,0x4c]
-
-; CHECK: st1.8b { v0 }, [x1], #8       ; encoding: [0x20,0x70,0x9f,0x0c]
-; CHECK: st1.8b { v0, v1 }, [x1], #16   ; encoding: [0x20,0xa0,0x9f,0x0c]
-; CHECK: st1.8b { v0, v1, v2 }, [x1], #24 ; encoding: [0x20,0x60,0x9f,0x0c]
-; CHECK: st1.8b { v0, v1, v2, v3 }, [x1], #32 ; encoding: [0x20,0x20,0x9f,0x0c]
-
-; CHECK: st1.16b { v0 }, [x1], #16       ; encoding: [0x20,0x70,0x9f,0x4c]
-; CHECK: st1.16b { v0, v1 }, [x1], #32   ; encoding: [0x20,0xa0,0x9f,0x4c]
-; CHECK: st1.16b { v0, v1, v2 }, [x1], #48 ; encoding: [0x20,0x60,0x9f,0x4c]
-; CHECK: st1.16b { v0, v1, v2, v3 }, [x1], #64 ; encoding: [0x20,0x20,0x9f,0x4c]
-
-; CHECK: st1.4h { v0 }, [x1], #8       ; encoding: [0x20,0x74,0x9f,0x0c]
-; CHECK: st1.4h { v0, v1 }, [x1], #16   ; encoding: [0x20,0xa4,0x9f,0x0c]
-; CHECK: st1.4h { v0, v1, v2 }, [x1], #24 ; encoding: [0x20,0x64,0x9f,0x0c]
-; CHECK: st1.4h { v0, v1, v2, v3 }, [x1], #32 ; encoding: [0x20,0x24,0x9f,0x0c]
-
-; CHECK: st1.8h { v0 }, [x1], #16       ; encoding: [0x20,0x74,0x9f,0x4c]
-; CHECK: st1.8h { v0, v1 }, [x1], #32   ; encoding: [0x20,0xa4,0x9f,0x4c]
-; CHECK: st1.8h { v0, v1, v2 }, [x1], #48 ; encoding: [0x20,0x64,0x9f,0x4c]
-; CHECK: st1.8h { v0, v1, v2, v3 }, [x1], #64 ; encoding: [0x20,0x24,0x9f,0x4c]
-
-; CHECK: st1.2s { v0 }, [x1], #8       ; encoding: [0x20,0x78,0x9f,0x0c]
-; CHECK: st1.2s { v0, v1 }, [x1], #16   ; encoding: [0x20,0xa8,0x9f,0x0c]
-; CHECK: st1.2s { v0, v1, v2 }, [x1], #24 ; encoding: [0x20,0x68,0x9f,0x0c]
-; CHECK: st1.2s { v0, v1, v2, v3 }, [x1], #32 ; encoding: [0x20,0x28,0x9f,0x0c]
-
-; CHECK: st1.4s { v0 }, [x1], #16       ; encoding: [0x20,0x78,0x9f,0x4c]
-; CHECK: st1.4s { v0, v1 }, [x1], #32   ; encoding: [0x20,0xa8,0x9f,0x4c]
-; CHECK: st1.4s { v0, v1, v2 }, [x1], #48 ; encoding: [0x20,0x68,0x9f,0x4c]
-; CHECK: st1.4s { v0, v1, v2, v3 }, [x1], #64 ; encoding: [0x20,0x28,0x9f,0x4c]
-
-; CHECK: st1.1d { v0 }, [x1], #8       ; encoding: [0x20,0x7c,0x9f,0x0c]
-; CHECK: st1.1d { v0, v1 }, [x1], #16   ; encoding: [0x20,0xac,0x9f,0x0c]
-; CHECK: st1.1d { v0, v1, v2 }, [x1], #24 ; encoding: [0x20,0x6c,0x9f,0x0c]
-; CHECK: st1.1d { v0, v1, v2, v3 }, [x1], #32 ; encoding: [0x20,0x2c,0x9f,0x0c]
-
-; CHECK: st1.2d { v0 }, [x1], #16       ; encoding: [0x20,0x7c,0x9f,0x4c]
-; CHECK: st1.2d { v0, v1 }, [x1], #32   ; encoding: [0x20,0xac,0x9f,0x4c]
-; CHECK: st1.2d { v0, v1, v2 }, [x1], #48 ; encoding: [0x20,0x6c,0x9f,0x4c]
-; CHECK: st1.2d { v0, v1, v2, v3 }, [x1], #64 ; encoding: [0x20,0x2c,0x9f,0x4c]
-
-
-_ld2st2_multiple_post:
-  ld2.8b {v0, v1}, [x1], x15
-  ld2.16b {v0, v1}, [x1], x15
-  ld2.4h {v0, v1}, [x1], x15
-  ld2.8h {v0, v1}, [x1], x15
-  ld2.2s {v0, v1}, [x1], x15
-  ld2.4s {v0, v1}, [x1], x15
-  ld2.2d {v0, v1}, [x1], x15
-
-  st2.8b {v0, v1}, [x1], x15
-  st2.16b {v0, v1}, [x1], x15
-  st2.4h {v0, v1}, [x1], x15
-  st2.8h {v0, v1}, [x1], x15
-  st2.2s {v0, v1}, [x1], x15
-  st2.4s {v0, v1}, [x1], x15
-  st2.2d {v0, v1}, [x1], x15
-
-  ld2.8b {v0, v1}, [x1], #16
-  ld2.16b {v0, v1}, [x1], #32
-  ld2.4h {v0, v1}, [x1], #16
-  ld2.8h {v0, v1}, [x1], #32
-  ld2.2s {v0, v1}, [x1], #16
-  ld2.4s {v0, v1}, [x1], #32
-  ld2.2d {v0, v1}, [x1], #32
-
-  st2.8b {v0, v1}, [x1], #16
-  st2.16b {v0, v1}, [x1], #32
-  st2.4h {v0, v1}, [x1], #16
-  st2.8h {v0, v1}, [x1], #32
-  st2.2s {v0, v1}, [x1], #16
-  st2.4s {v0, v1}, [x1], #32
-  st2.2d {v0, v1}, [x1], #32
-
-
-; CHECK: ld2st2_multiple_post:
-; CHECK: ld2.8b { v0, v1 }, [x1], x15   ; encoding: [0x20,0x80,0xcf,0x0c]
-; CHECK: ld2.16b { v0, v1 }, [x1], x15   ; encoding: [0x20,0x80,0xcf,0x4c]
-; CHECK: ld2.4h { v0, v1 }, [x1], x15   ; encoding: [0x20,0x84,0xcf,0x0c]
-; CHECK: ld2.8h { v0, v1 }, [x1], x15   ; encoding: [0x20,0x84,0xcf,0x4c]
-; CHECK: ld2.2s { v0, v1 }, [x1], x15   ; encoding: [0x20,0x88,0xcf,0x0c]
-; CHECK: ld2.4s { v0, v1 }, [x1], x15   ; encoding: [0x20,0x88,0xcf,0x4c]
-; CHECK: ld2.2d { v0, v1 }, [x1], x15   ; encoding: [0x20,0x8c,0xcf,0x4c]
-
-; CHECK: st2.8b { v0, v1 }, [x1], x15   ; encoding: [0x20,0x80,0x8f,0x0c]
-; CHECK: st2.16b { v0, v1 }, [x1], x15   ; encoding: [0x20,0x80,0x8f,0x4c]
-; CHECK: st2.4h { v0, v1 }, [x1], x15   ; encoding: [0x20,0x84,0x8f,0x0c]
-; CHECK: st2.8h { v0, v1 }, [x1], x15   ; encoding: [0x20,0x84,0x8f,0x4c]
-; CHECK: st2.2s { v0, v1 }, [x1], x15   ; encoding: [0x20,0x88,0x8f,0x0c]
-; CHECK: st2.4s { v0, v1 }, [x1], x15   ; encoding: [0x20,0x88,0x8f,0x4c]
-; CHECK: st2.2d { v0, v1 }, [x1], x15   ; encoding: [0x20,0x8c,0x8f,0x4c]
-
-; CHECK: ld2.8b { v0, v1 }, [x1], #16   ; encoding: [0x20,0x80,0xdf,0x0c]
-; CHECK: ld2.16b { v0, v1 }, [x1], #32   ; encoding: [0x20,0x80,0xdf,0x4c]
-; CHECK: ld2.4h { v0, v1 }, [x1], #16   ; encoding: [0x20,0x84,0xdf,0x0c]
-; CHECK: ld2.8h { v0, v1 }, [x1], #32   ; encoding: [0x20,0x84,0xdf,0x4c]
-; CHECK: ld2.2s { v0, v1 }, [x1], #16   ; encoding: [0x20,0x88,0xdf,0x0c]
-; CHECK: ld2.4s { v0, v1 }, [x1], #32   ; encoding: [0x20,0x88,0xdf,0x4c]
-; CHECK: ld2.2d { v0, v1 }, [x1], #32   ; encoding: [0x20,0x8c,0xdf,0x4c]
-
-; CHECK: st2.8b { v0, v1 }, [x1], #16   ; encoding: [0x20,0x80,0x9f,0x0c]
-; CHECK: st2.16b { v0, v1 }, [x1], #32   ; encoding: [0x20,0x80,0x9f,0x4c]
-; CHECK: st2.4h { v0, v1 }, [x1], #16   ; encoding: [0x20,0x84,0x9f,0x0c]
-; CHECK: st2.8h { v0, v1 }, [x1], #32   ; encoding: [0x20,0x84,0x9f,0x4c]
-; CHECK: st2.2s { v0, v1 }, [x1], #16   ; encoding: [0x20,0x88,0x9f,0x0c]
-; CHECK: st2.4s { v0, v1 }, [x1], #32   ; encoding: [0x20,0x88,0x9f,0x4c]
-; CHECK: st2.2d { v0, v1 }, [x1], #32   ; encoding: [0x20,0x8c,0x9f,0x4c]
-
-
-_ld3st3_multiple_post:
-  ld3.8b {v0, v1, v2}, [x1], x15
-  ld3.16b {v0, v1, v2}, [x1], x15
-  ld3.4h {v0, v1, v2}, [x1], x15
-  ld3.8h {v0, v1, v2}, [x1], x15
-  ld3.2s {v0, v1, v2}, [x1], x15
-  ld3.4s {v0, v1, v2}, [x1], x15
-  ld3.2d {v0, v1, v2}, [x1], x15
-
-  st3.8b {v0, v1, v2}, [x1], x15
-  st3.16b {v0, v1, v2}, [x1], x15
-  st3.4h {v0, v1, v2}, [x1], x15
-  st3.8h {v0, v1, v2}, [x1], x15
-  st3.2s {v0, v1, v2}, [x1], x15
-  st3.4s {v0, v1, v2}, [x1], x15
-  st3.2d {v0, v1, v2}, [x1], x15
-
-  ld3.8b {v0, v1, v2}, [x1], #24
-  ld3.16b {v0, v1, v2}, [x1], #48
-  ld3.4h {v0, v1, v2}, [x1], #24
-  ld3.8h {v0, v1, v2}, [x1], #48
-  ld3.2s {v0, v1, v2}, [x1], #24
-  ld3.4s {v0, v1, v2}, [x1], #48
-  ld3.2d {v0, v1, v2}, [x1], #48
-
-  st3.8b {v0, v1, v2}, [x1], #24
-  st3.16b {v0, v1, v2}, [x1], #48
-  st3.4h {v0, v1, v2}, [x1], #24
-  st3.8h {v0, v1, v2}, [x1], #48
-  st3.2s {v0, v1, v2}, [x1], #24
-  st3.4s {v0, v1, v2}, [x1], #48
-  st3.2d {v0, v1, v2}, [x1], #48
-
-; CHECK: ld3st3_multiple_post:
-; CHECK: ld3.8b { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x40,0xcf,0x0c]
-; CHECK: ld3.16b { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x40,0xcf,0x4c]
-; CHECK: ld3.4h { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x44,0xcf,0x0c]
-; CHECK: ld3.8h { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x44,0xcf,0x4c]
-; CHECK: ld3.2s { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x48,0xcf,0x0c]
-; CHECK: ld3.4s { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x48,0xcf,0x4c]
-; CHECK: ld3.2d { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x4c,0xcf,0x4c]
-
-; CHECK: st3.8b { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x40,0x8f,0x0c]
-; CHECK: st3.16b { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x40,0x8f,0x4c]
-; CHECK: st3.4h { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x44,0x8f,0x0c]
-; CHECK: st3.8h { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x44,0x8f,0x4c]
-; CHECK: st3.2s { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x48,0x8f,0x0c]
-; CHECK: st3.4s { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x48,0x8f,0x4c]
-; CHECK: st3.2d { v0, v1, v2 }, [x1], x15 ; encoding: [0x20,0x4c,0x8f,0x4c]
-
-; CHECK: ld3.8b { v0, v1, v2 }, [x1], #24 ; encoding: [0x20,0x40,0xdf,0x0c]
-; CHECK: ld3.16b { v0, v1, v2 }, [x1], #48 ; encoding: [0x20,0x40,0xdf,0x4c]
-; CHECK: ld3.4h { v0, v1, v2 }, [x1], #24 ; encoding: [0x20,0x44,0xdf,0x0c]
-; CHECK: ld3.8h { v0, v1, v2 }, [x1], #48 ; encoding: [0x20,0x44,0xdf,0x4c]
-; CHECK: ld3.2s { v0, v1, v2 }, [x1], #24 ; encoding: [0x20,0x48,0xdf,0x0c]
-; CHECK: ld3.4s { v0, v1, v2 }, [x1], #48 ; encoding: [0x20,0x48,0xdf,0x4c]
-; CHECK: ld3.2d { v0, v1, v2 }, [x1], #48 ; encoding: [0x20,0x4c,0xdf,0x4c]
-
-; CHECK: st3.8b { v0, v1, v2 }, [x1], #24 ; encoding: [0x20,0x40,0x9f,0x0c]
-; CHECK: st3.16b { v0, v1, v2 }, [x1], #48 ; encoding: [0x20,0x40,0x9f,0x4c]
-; CHECK: st3.4h { v0, v1, v2 }, [x1], #24 ; encoding: [0x20,0x44,0x9f,0x0c]
-; CHECK: st3.8h { v0, v1, v2 }, [x1], #48 ; encoding: [0x20,0x44,0x9f,0x4c]
-; CHECK: st3.2s { v0, v1, v2 }, [x1], #24 ; encoding: [0x20,0x48,0x9f,0x0c]
-; CHECK: st3.4s { v0, v1, v2 }, [x1], #48 ; encoding: [0x20,0x48,0x9f,0x4c]
-; CHECK: st3.2d { v0, v1, v2 }, [x1], #48 ; encoding: [0x20,0x4c,0x9f,0x4c]
-
-_ld4st4_multiple_post:
-  ld4.8b {v0, v1, v2, v3}, [x1], x15
-  ld4.16b {v0, v1, v2, v3}, [x1], x15
-  ld4.4h {v0, v1, v2, v3}, [x1], x15
-  ld4.8h {v0, v1, v2, v3}, [x1], x15
-  ld4.2s {v0, v1, v2, v3}, [x1], x15
-  ld4.4s {v0, v1, v2, v3}, [x1], x15
-  ld4.2d {v0, v1, v2, v3}, [x1], x15
-
-  st4.8b {v0, v1, v2, v3}, [x1], x15
-  st4.16b {v0, v1, v2, v3}, [x1], x15
-  st4.4h {v0, v1, v2, v3}, [x1], x15
-  st4.8h {v0, v1, v2, v3}, [x1], x15
-  st4.2s {v0, v1, v2, v3}, [x1], x15
-  st4.4s {v0, v1, v2, v3}, [x1], x15
-  st4.2d {v0, v1, v2, v3}, [x1], x15
-
-  ld4.8b {v0, v1, v2, v3}, [x1], #32
-  ld4.16b {v0, v1, v2, v3}, [x1], #64
-  ld4.4h {v0, v1, v2, v3}, [x1], #32
-  ld4.8h {v0, v1, v2, v3}, [x1], #64
-  ld4.2s {v0, v1, v2, v3}, [x1], #32
-  ld4.4s {v0, v1, v2, v3}, [x1], #64
-  ld4.2d {v0, v1, v2, v3}, [x1], #64
-
-  st4.8b {v0, v1, v2, v3}, [x1], #32
-  st4.16b {v0, v1, v2, v3}, [x1], #64
-  st4.4h {v0, v1, v2, v3}, [x1], #32
-  st4.8h {v0, v1, v2, v3}, [x1], #64
-  st4.2s {v0, v1, v2, v3}, [x1], #32
-  st4.4s {v0, v1, v2, v3}, [x1], #64
-  st4.2d {v0, v1, v2, v3}, [x1], #64
-
-
-; CHECK: ld4st4_multiple_post:
-; CHECK: ld4.8b { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x00,0xcf,0x0c]
-; CHECK: ld4.16b { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x00,0xcf,0x4c]
-; CHECK: ld4.4h { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x04,0xcf,0x0c]
-; CHECK: ld4.8h { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x04,0xcf,0x4c]
-; CHECK: ld4.2s { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x08,0xcf,0x0c]
-; CHECK: ld4.4s { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x08,0xcf,0x4c]
-; CHECK: ld4.2d { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x0c,0xcf,0x4c]
-
-; CHECK: st4.8b { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x00,0x8f,0x0c]
-; CHECK: st4.16b { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x00,0x8f,0x4c]
-; CHECK: st4.4h { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x04,0x8f,0x0c]
-; CHECK: st4.8h { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x04,0x8f,0x4c]
-; CHECK: st4.2s { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x08,0x8f,0x0c]
-; CHECK: st4.4s { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x08,0x8f,0x4c]
-; CHECK: st4.2d { v0, v1, v2, v3 }, [x1], x15 ; encoding: [0x20,0x0c,0x8f,0x4c]
-
-; CHECK: ld4.8b { v0, v1, v2, v3 }, [x1], #32 ; encoding: [0x20,0x00,0xdf,0x0c]
-; CHECK: ld4.16b { v0, v1, v2, v3 }, [x1], #64 ; encoding: [0x20,0x00,0xdf,0x4c]
-; CHECK: ld4.4h { v0, v1, v2, v3 }, [x1], #32 ; encoding: [0x20,0x04,0xdf,0x0c]
-; CHECK: ld4.8h { v0, v1, v2, v3 }, [x1], #64 ; encoding: [0x20,0x04,0xdf,0x4c]
-; CHECK: ld4.2s { v0, v1, v2, v3 }, [x1], #32 ; encoding: [0x20,0x08,0xdf,0x0c]
-; CHECK: ld4.4s { v0, v1, v2, v3 }, [x1], #64 ; encoding: [0x20,0x08,0xdf,0x4c]
-; CHECK: ld4.2d { v0, v1, v2, v3 }, [x1], #64 ; encoding: [0x20,0x0c,0xdf,0x4c]
-
-; CHECK: st4.8b { v0, v1, v2, v3 }, [x1], #32 ; encoding: [0x20,0x00,0x9f,0x0c]
-; CHECK: st4.16b { v0, v1, v2, v3 }, [x1], #64 ; encoding: [0x20,0x00,0x9f,0x4c]
-; CHECK: st4.4h { v0, v1, v2, v3 }, [x1], #32 ; encoding: [0x20,0x04,0x9f,0x0c]
-; CHECK: st4.8h { v0, v1, v2, v3 }, [x1], #64 ; encoding: [0x20,0x04,0x9f,0x4c]
-; CHECK: st4.2s { v0, v1, v2, v3 }, [x1], #32 ; encoding: [0x20,0x08,0x9f,0x0c]
-; CHECK: st4.4s { v0, v1, v2, v3 }, [x1], #64 ; encoding: [0x20,0x08,0x9f,0x4c]
-; CHECK: st4.2d { v0, v1, v2, v3 }, [x1], #64 ; encoding: [0x20,0x0c,0x9f,0x4c]
-
-ld1r:
-  ld1r.8b {v4}, [x2]
-  ld1r.8b {v4}, [x2], x3
-  ld1r.16b {v4}, [x2]
-  ld1r.16b {v4}, [x2], x3
-  ld1r.4h {v4}, [x2]
-  ld1r.4h {v4}, [x2], x3
-  ld1r.8h {v4}, [x2]
-  ld1r.8h {v4}, [x2], x3
-  ld1r.2s {v4}, [x2]
-  ld1r.2s {v4}, [x2], x3
-  ld1r.4s {v4}, [x2]
-  ld1r.4s {v4}, [x2], x3
-  ld1r.1d {v4}, [x2]
-  ld1r.1d {v4}, [x2], x3
-  ld1r.2d {v4}, [x2]
-  ld1r.2d {v4}, [x2], x3
-
-  ld1r.8b {v4}, [x2], #1
-  ld1r.16b {v4}, [x2], #1
-  ld1r.4h {v4}, [x2], #2
-  ld1r.8h {v4}, [x2], #2
-  ld1r.2s {v4}, [x2], #4
-  ld1r.4s {v4}, [x2], #4
-  ld1r.1d {v4}, [x2], #8
-  ld1r.2d {v4}, [x2], #8
-
-; CHECK: ld1r:
-; CHECK: ld1r.8b { v4 }, [x2]            ; encoding: [0x44,0xc0,0x40,0x0d]
-; CHECK: ld1r.8b { v4 }, [x2], x3        ; encoding: [0x44,0xc0,0xc3,0x0d]
-; CHECK: ld1r.16b { v4 }, [x2]    ; encoding: [0x44,0xc0,0x40,0x4d]
-; CHECK: ld1r.16b { v4 }, [x2], x3 ; encoding: [0x44,0xc0,0xc3,0x4d]
-; CHECK: ld1r.4h { v4 }, [x2]            ; encoding: [0x44,0xc4,0x40,0x0d]
-; CHECK: ld1r.4h { v4 }, [x2], x3        ; encoding: [0x44,0xc4,0xc3,0x0d]
-; CHECK: ld1r.8h { v4 }, [x2]            ; encoding: [0x44,0xc4,0x40,0x4d]
-; CHECK: ld1r.8h { v4 }, [x2], x3        ; encoding: [0x44,0xc4,0xc3,0x4d]
-; CHECK: ld1r.2s { v4 }, [x2]            ; encoding: [0x44,0xc8,0x40,0x0d]
-; CHECK: ld1r.2s { v4 }, [x2], x3        ; encoding: [0x44,0xc8,0xc3,0x0d]
-; CHECK: ld1r.4s { v4 }, [x2]            ; encoding: [0x44,0xc8,0x40,0x4d]
-; CHECK: ld1r.4s { v4 }, [x2], x3        ; encoding: [0x44,0xc8,0xc3,0x4d]
-; CHECK: ld1r.1d { v4 }, [x2]            ; encoding: [0x44,0xcc,0x40,0x0d]
-; CHECK: ld1r.1d { v4 }, [x2], x3        ; encoding: [0x44,0xcc,0xc3,0x0d]
-; CHECK: ld1r.2d { v4 }, [x2]            ; encoding: [0x44,0xcc,0x40,0x4d]
-; CHECK: ld1r.2d { v4 }, [x2], x3        ; encoding: [0x44,0xcc,0xc3,0x4d]
-
-; CHECK: ld1r.8b { v4 }, [x2], #1        ; encoding: [0x44,0xc0,0xdf,0x0d]
-; CHECK: ld1r.16b { v4 }, [x2], #1 ; encoding: [0x44,0xc0,0xdf,0x4d]
-; CHECK: ld1r.4h { v4 }, [x2], #2        ; encoding: [0x44,0xc4,0xdf,0x0d]
-; CHECK: ld1r.8h { v4 }, [x2], #2        ; encoding: [0x44,0xc4,0xdf,0x4d]
-; CHECK: ld1r.2s { v4 }, [x2], #4        ; encoding: [0x44,0xc8,0xdf,0x0d]
-; CHECK: ld1r.4s { v4 }, [x2], #4        ; encoding: [0x44,0xc8,0xdf,0x4d]
-; CHECK: ld1r.1d { v4 }, [x2], #8        ; encoding: [0x44,0xcc,0xdf,0x0d]
-; CHECK: ld1r.2d { v4 }, [x2], #8        ; encoding: [0x44,0xcc,0xdf,0x4d]
-
-ld2r:
-  ld2r.8b {v4, v5}, [x2]
-  ld2r.8b {v4, v5}, [x2], x3
-  ld2r.16b {v4, v5}, [x2]
-  ld2r.16b {v4, v5}, [x2], x3
-  ld2r.4h {v4, v5}, [x2]
-  ld2r.4h {v4, v5}, [x2], x3
-  ld2r.8h {v4, v5}, [x2]
-  ld2r.8h {v4, v5}, [x2], x3
-  ld2r.2s {v4, v5}, [x2]
-  ld2r.2s {v4, v5}, [x2], x3
-  ld2r.4s {v4, v5}, [x2]
-  ld2r.4s {v4, v5}, [x2], x3
-  ld2r.1d {v4, v5}, [x2]
-  ld2r.1d {v4, v5}, [x2], x3
-  ld2r.2d {v4, v5}, [x2]
-  ld2r.2d {v4, v5}, [x2], x3
-
-  ld2r.8b {v4, v5}, [x2], #2
-  ld2r.16b {v4, v5}, [x2], #2
-  ld2r.4h {v4, v5}, [x2], #4
-  ld2r.8h {v4, v5}, [x2], #4
-  ld2r.2s {v4, v5}, [x2], #8
-  ld2r.4s {v4, v5}, [x2], #8
-  ld2r.1d {v4, v5}, [x2], #16
-  ld2r.2d {v4, v5}, [x2], #16
-
-; CHECK: ld2r:
-; CHECK: ld2r.8b { v4, v5 }, [x2]        ; encoding: [0x44,0xc0,0x60,0x0d]
-; CHECK: ld2r.8b { v4, v5 }, [x2], x3    ; encoding: [0x44,0xc0,0xe3,0x0d]
-; CHECK: ld2r.16b { v4, v5 }, [x2] ; encoding: [0x44,0xc0,0x60,0x4d]
-; CHECK: ld2r.16b { v4, v5 }, [x2], x3 ; encoding: [0x44,0xc0,0xe3,0x4d]
-; CHECK: ld2r.4h { v4, v5 }, [x2]        ; encoding: [0x44,0xc4,0x60,0x0d]
-; CHECK: ld2r.4h { v4, v5 }, [x2], x3    ; encoding: [0x44,0xc4,0xe3,0x0d]
-; CHECK: ld2r.8h { v4, v5 }, [x2]        ; encoding: [0x44,0xc4,0x60,0x4d]
-; CHECK: ld2r.8h { v4, v5 }, [x2], x3    ; encoding: [0x44,0xc4,0xe3,0x4d]
-; CHECK: ld2r.2s { v4, v5 }, [x2]        ; encoding: [0x44,0xc8,0x60,0x0d]
-; CHECK: ld2r.2s { v4, v5 }, [x2], x3    ; encoding: [0x44,0xc8,0xe3,0x0d]
-; CHECK: ld2r.4s { v4, v5 }, [x2]        ; encoding: [0x44,0xc8,0x60,0x4d]
-; CHECK: ld2r.4s { v4, v5 }, [x2], x3    ; encoding: [0x44,0xc8,0xe3,0x4d]
-; CHECK: ld2r.1d { v4, v5 }, [x2]        ; encoding: [0x44,0xcc,0x60,0x0d]
-; CHECK: ld2r.1d { v4, v5 }, [x2], x3    ; encoding: [0x44,0xcc,0xe3,0x0d]
-; CHECK: ld2r.2d { v4, v5 }, [x2]        ; encoding: [0x44,0xcc,0x60,0x4d]
-; CHECK: ld2r.2d { v4, v5 }, [x2], x3    ; encoding: [0x44,0xcc,0xe3,0x4d]
-
-; CHECK: ld2r.8b { v4, v5 }, [x2], #2    ; encoding: [0x44,0xc0,0xff,0x0d]
-; CHECK: ld2r.16b { v4, v5 }, [x2], #2 ; encoding: [0x44,0xc0,0xff,0x4d]
-; CHECK: ld2r.4h { v4, v5 }, [x2], #4    ; encoding: [0x44,0xc4,0xff,0x0d]
-; CHECK: ld2r.8h { v4, v5 }, [x2], #4    ; encoding: [0x44,0xc4,0xff,0x4d]
-; CHECK: ld2r.2s { v4, v5 }, [x2], #8    ; encoding: [0x44,0xc8,0xff,0x0d]
-; CHECK: ld2r.4s { v4, v5 }, [x2], #8    ; encoding: [0x44,0xc8,0xff,0x4d]
-; CHECK: ld2r.1d { v4, v5 }, [x2], #16    ; encoding: [0x44,0xcc,0xff,0x0d]
-; CHECK: ld2r.2d { v4, v5 }, [x2], #16    ; encoding: [0x44,0xcc,0xff,0x4d]
-
-ld3r:
-  ld3r.8b {v4, v5, v6}, [x2]
-  ld3r.8b {v4, v5, v6}, [x2], x3
-  ld3r.16b {v4, v5, v6}, [x2]
-  ld3r.16b {v4, v5, v6}, [x2], x3
-  ld3r.4h {v4, v5, v6}, [x2]
-  ld3r.4h {v4, v5, v6}, [x2], x3
-  ld3r.8h {v4, v5, v6}, [x2]
-  ld3r.8h {v4, v5, v6}, [x2], x3
-  ld3r.2s {v4, v5, v6}, [x2]
-  ld3r.2s {v4, v5, v6}, [x2], x3
-  ld3r.4s {v4, v5, v6}, [x2]
-  ld3r.4s {v4, v5, v6}, [x2], x3
-  ld3r.1d {v4, v5, v6}, [x2]
-  ld3r.1d {v4, v5, v6}, [x2], x3
-  ld3r.2d {v4, v5, v6}, [x2]
-  ld3r.2d {v4, v5, v6}, [x2], x3
-
-  ld3r.8b {v4, v5, v6}, [x2], #3
-  ld3r.16b {v4, v5, v6}, [x2], #3
-  ld3r.4h {v4, v5, v6}, [x2], #6
-  ld3r.8h {v4, v5, v6}, [x2], #6
-  ld3r.2s {v4, v5, v6}, [x2], #12
-  ld3r.4s {v4, v5, v6}, [x2], #12
-  ld3r.1d {v4, v5, v6}, [x2], #24
-  ld3r.2d {v4, v5, v6}, [x2], #24
-
-; CHECK: ld3r:
-; CHECK: ld3r.8b { v4, v5, v6 }, [x2]    ; encoding: [0x44,0xe0,0x40,0x0d]
-; CHECK: ld3r.8b { v4, v5, v6 }, [x2], x3 ; encoding: [0x44,0xe0,0xc3,0x0d]
-; CHECK: ld3r.16b { v4, v5, v6 }, [x2] ; encoding: [0x44,0xe0,0x40,0x4d]
-; CHECK: ld3r.16b { v4, v5, v6 }, [x2], x3 ; encoding: [0x44,0xe0,0xc3,0x4d]
-; CHECK: ld3r.4h { v4, v5, v6 }, [x2]    ; encoding: [0x44,0xe4,0x40,0x0d]
-; CHECK: ld3r.4h { v4, v5, v6 }, [x2], x3 ; encoding: [0x44,0xe4,0xc3,0x0d]
-; CHECK: ld3r.8h { v4, v5, v6 }, [x2]    ; encoding: [0x44,0xe4,0x40,0x4d]
-; CHECK: ld3r.8h { v4, v5, v6 }, [x2], x3 ; encoding: [0x44,0xe4,0xc3,0x4d]
-; CHECK: ld3r.2s { v4, v5, v6 }, [x2]    ; encoding: [0x44,0xe8,0x40,0x0d]
-; CHECK: ld3r.2s { v4, v5, v6 }, [x2], x3 ; encoding: [0x44,0xe8,0xc3,0x0d]
-; CHECK: ld3r.4s { v4, v5, v6 }, [x2]    ; encoding: [0x44,0xe8,0x40,0x4d]
-; CHECK: ld3r.4s { v4, v5, v6 }, [x2], x3 ; encoding: [0x44,0xe8,0xc3,0x4d]
-; CHECK: ld3r.1d { v4, v5, v6 }, [x2]    ; encoding: [0x44,0xec,0x40,0x0d]
-; CHECK: ld3r.1d { v4, v5, v6 }, [x2], x3 ; encoding: [0x44,0xec,0xc3,0x0d]
-; CHECK: ld3r.2d { v4, v5, v6 }, [x2]    ; encoding: [0x44,0xec,0x40,0x4d]
-; CHECK: ld3r.2d { v4, v5, v6 }, [x2], x3 ; encoding: [0x44,0xec,0xc3,0x4d]
-
-; CHECK: ld3r.8b { v4, v5, v6 }, [x2], #3 ; encoding: [0x44,0xe0,0xdf,0x0d]
-; CHECK: ld3r.16b { v4, v5, v6 }, [x2], #3 ; encoding: [0x44,0xe0,0xdf,0x4d]
-; CHECK: ld3r.4h { v4, v5, v6 }, [x2], #6 ; encoding: [0x44,0xe4,0xdf,0x0d]
-; CHECK: ld3r.8h { v4, v5, v6 }, [x2], #6 ; encoding: [0x44,0xe4,0xdf,0x4d]
-; CHECK: ld3r.2s { v4, v5, v6 }, [x2], #12 ; encoding: [0x44,0xe8,0xdf,0x0d]
-; CHECK: ld3r.4s { v4, v5, v6 }, [x2], #12 ; encoding: [0x44,0xe8,0xdf,0x4d]
-; CHECK: ld3r.1d { v4, v5, v6 }, [x2], #24 ; encoding: [0x44,0xec,0xdf,0x0d]
-; CHECK: ld3r.2d { v4, v5, v6 }, [x2], #24 ; encoding: [0x44,0xec,0xdf,0x4d]
-
-ld4r:
-  ld4r.8b {v4, v5, v6, v7}, [x2]
-  ld4r.8b {v4, v5, v6, v7}, [x2], x3
-  ld4r.16b {v4, v5, v6, v7}, [x2]
-  ld4r.16b {v4, v5, v6, v7}, [x2], x3
-  ld4r.4h {v4, v5, v6, v7}, [x2]
-  ld4r.4h {v4, v5, v6, v7}, [x2], x3
-  ld4r.8h {v4, v5, v6, v7}, [x2]
-  ld4r.8h {v4, v5, v6, v7}, [x2], x3
-  ld4r.2s {v4, v5, v6, v7}, [x2]
-  ld4r.2s {v4, v5, v6, v7}, [x2], x3
-  ld4r.4s {v4, v5, v6, v7}, [x2]
-  ld4r.4s {v4, v5, v6, v7}, [x2], x3
-  ld4r.1d {v4, v5, v6, v7}, [x2]
-  ld4r.1d {v4, v5, v6, v7}, [x2], x3
-  ld4r.2d {v4, v5, v6, v7}, [x2]
-  ld4r.2d {v4, v5, v6, v7}, [x2], x3
-
-  ld4r.8b {v4, v5, v6, v7}, [x2], #4
-  ld4r.16b {v5, v6, v7, v8}, [x2], #4
-  ld4r.4h {v6, v7, v8, v9}, [x2], #8
-  ld4r.8h {v1, v2, v3, v4}, [x2], #8
-  ld4r.2s {v2, v3, v4, v5}, [x2], #16
-  ld4r.4s {v3, v4, v5, v6}, [x2], #16
-  ld4r.1d {v0, v1, v2, v3}, [x2], #32
-  ld4r.2d {v4, v5, v6, v7}, [x2], #32
-
-; CHECK: ld4r:
-; CHECK: ld4r.8b { v4, v5, v6, v7 }, [x2] ; encoding: [0x44,0xe0,0x60,0x0d]
-; CHECK: ld4r.8b { v4, v5, v6, v7 }, [x2], x3 ; encoding: [0x44,0xe0,0xe3,0x0d]
-; CHECK: ld4r.16b { v4, v5, v6, v7 }, [x2] ; encoding: [0x44,0xe0,0x60,0x4d]
-; CHECK: ld4r.16b { v4, v5, v6, v7 }, [x2], x3 ; encoding: [0x44,0xe0,0xe3,0x4d]
-; CHECK: ld4r.4h { v4, v5, v6, v7 }, [x2] ; encoding: [0x44,0xe4,0x60,0x0d]
-; CHECK: ld4r.4h { v4, v5, v6, v7 }, [x2], x3 ; encoding: [0x44,0xe4,0xe3,0x0d]
-; CHECK: ld4r.8h { v4, v5, v6, v7 }, [x2] ; encoding: [0x44,0xe4,0x60,0x4d]
-; CHECK: ld4r.8h { v4, v5, v6, v7 }, [x2], x3 ; encoding: [0x44,0xe4,0xe3,0x4d]
-; CHECK: ld4r.2s { v4, v5, v6, v7 }, [x2] ; encoding: [0x44,0xe8,0x60,0x0d]
-; CHECK: ld4r.2s { v4, v5, v6, v7 }, [x2], x3 ; encoding: [0x44,0xe8,0xe3,0x0d]
-; CHECK: ld4r.4s { v4, v5, v6, v7 }, [x2] ; encoding: [0x44,0xe8,0x60,0x4d]
-; CHECK: ld4r.4s { v4, v5, v6, v7 }, [x2], x3 ; encoding: [0x44,0xe8,0xe3,0x4d]
-; CHECK: ld4r.1d { v4, v5, v6, v7 }, [x2] ; encoding: [0x44,0xec,0x60,0x0d]
-; CHECK: ld4r.1d { v4, v5, v6, v7 }, [x2], x3 ; encoding: [0x44,0xec,0xe3,0x0d]
-; CHECK: ld4r.2d { v4, v5, v6, v7 }, [x2] ; encoding: [0x44,0xec,0x60,0x4d]
-; CHECK: ld4r.2d { v4, v5, v6, v7 }, [x2], x3 ; encoding: [0x44,0xec,0xe3,0x4d]
-
-; CHECK: ld4r.8b { v4, v5, v6, v7 }, [x2], #4 ; encoding: [0x44,0xe0,0xff,0x0d]
-; CHECK: ld4r.16b { v5, v6, v7, v8 }, [x2], #4 ; encoding: [0x45,0xe0,0xff,0x4d]
-; CHECK: ld4r.4h { v6, v7, v8, v9 }, [x2], #8 ; encoding: [0x46,0xe4,0xff,0x0d]
-; CHECK: ld4r.8h { v1, v2, v3, v4 }, [x2], #8 ; encoding: [0x41,0xe4,0xff,0x4d]
-; CHECK: ld4r.2s { v2, v3, v4, v5 }, [x2], #16 ; encoding: [0x42,0xe8,0xff,0x0d]
-; CHECK: ld4r.4s { v3, v4, v5, v6 }, [x2], #16 ; encoding: [0x43,0xe8,0xff,0x4d]
-; CHECK: ld4r.1d { v0, v1, v2, v3 }, [x2], #32 ; encoding: [0x40,0xec,0xff,0x0d]
-; CHECK: ld4r.2d { v4, v5, v6, v7 }, [x2], #32 ; encoding: [0x44,0xec,0xff,0x4d]
-
-
-_ld1:
-  ld1.b {v4}[13], [x3]
-  ld1.h {v4}[2], [x3]
-  ld1.s {v4}[2], [x3]
-  ld1.d {v4}[1], [x3]
-  ld1.b {v4}[13], [x3], x5
-  ld1.h {v4}[2], [x3], x5
-  ld1.s {v4}[2], [x3], x5
-  ld1.d {v4}[1], [x3], x5
-  ld1.b {v4}[13], [x3], #1
-  ld1.h {v4}[2], [x3], #2
-  ld1.s {v4}[2], [x3], #4
-  ld1.d {v4}[1], [x3], #8
-
-; CHECK: _ld1:
-; CHECK: ld1.b { v4 }[13], [x3]        ; encoding: [0x64,0x14,0x40,0x4d]
-; CHECK: ld1.h { v4 }[2], [x3]         ; encoding: [0x64,0x50,0x40,0x0d]
-; CHECK: ld1.s { v4 }[2], [x3]         ; encoding: [0x64,0x80,0x40,0x4d]
-; CHECK: ld1.d { v4 }[1], [x3]         ; encoding: [0x64,0x84,0x40,0x4d]
-; CHECK: ld1.b { v4 }[13], [x3], x5    ; encoding: [0x64,0x14,0xc5,0x4d]
-; CHECK: ld1.h { v4 }[2], [x3], x5     ; encoding: [0x64,0x50,0xc5,0x0d]
-; CHECK: ld1.s { v4 }[2], [x3], x5     ; encoding: [0x64,0x80,0xc5,0x4d]
-; CHECK: ld1.d { v4 }[1], [x3], x5     ; encoding: [0x64,0x84,0xc5,0x4d]
-; CHECK: ld1.b { v4 }[13], [x3], #1   ; encoding: [0x64,0x14,0xdf,0x4d]
-; CHECK: ld1.h { v4 }[2], [x3], #2    ; encoding: [0x64,0x50,0xdf,0x0d]
-; CHECK: ld1.s { v4 }[2], [x3], #4    ; encoding: [0x64,0x80,0xdf,0x4d]
-; CHECK: ld1.d { v4 }[1], [x3], #8    ; encoding: [0x64,0x84,0xdf,0x4d]
-
-_ld2:
-  ld2.b {v4, v5}[13], [x3]
-  ld2.h {v4, v5}[2], [x3]
-  ld2.s {v4, v5}[2], [x3]
-  ld2.d {v4, v5}[1], [x3]
-  ld2.b {v4, v5}[13], [x3], x5
-  ld2.h {v4, v5}[2], [x3], x5
-  ld2.s {v4, v5}[2], [x3], x5
-  ld2.d {v4, v5}[1], [x3], x5
-  ld2.b {v4, v5}[13], [x3], #2
-  ld2.h {v4, v5}[2], [x3], #4
-  ld2.s {v4, v5}[2], [x3], #8
-  ld2.d {v4, v5}[1], [x3], #16
-
-
-; CHECK: _ld2:
-; CHECK: ld2.b { v4, v5 }[13], [x3]    ; encoding: [0x64,0x14,0x60,0x4d]
-; CHECK: ld2.h { v4, v5 }[2], [x3]     ; encoding: [0x64,0x50,0x60,0x0d]
-; CHECK: ld2.s { v4, v5 }[2], [x3]     ; encoding: [0x64,0x80,0x60,0x4d]
-; CHECK: ld2.d { v4, v5 }[1], [x3]     ; encoding: [0x64,0x84,0x60,0x4d]
-; CHECK: ld2.b { v4, v5 }[13], [x3], x5 ; encoding: [0x64,0x14,0xe5,0x4d]
-; CHECK: ld2.h { v4, v5 }[2], [x3], x5 ; encoding: [0x64,0x50,0xe5,0x0d]
-; CHECK: ld2.s { v4, v5 }[2], [x3], x5 ; encoding: [0x64,0x80,0xe5,0x4d]
-; CHECK: ld2.d { v4, v5 }[1], [x3], x5 ; encoding: [0x64,0x84,0xe5,0x4d]
-; CHECK: ld2.b { v4, v5 }[13], [x3], #2 ; encoding: [0x64,0x14,0xff,0x4d]
-; CHECK: ld2.h { v4, v5 }[2], [x3], #4 ; encoding: [0x64,0x50,0xff,0x0d]
-; CHECK: ld2.s { v4, v5 }[2], [x3], #8 ; encoding: [0x64,0x80,0xff,0x4d]
-; CHECK: ld2.d { v4, v5 }[1], [x3], #16 ; encoding: [0x64,0x84,0xff,0x4d]
-
-
-_ld3:
-  ld3.b {v4, v5, v6}[13], [x3]
-  ld3.h {v4, v5, v6}[2], [x3]
-  ld3.s {v4, v5, v6}[2], [x3]
-  ld3.d {v4, v5, v6}[1], [x3]
-  ld3.b {v4, v5, v6}[13], [x3], x5
-  ld3.h {v4, v5, v6}[2], [x3], x5
-  ld3.s {v4, v5, v6}[2], [x3], x5
-  ld3.d {v4, v5, v6}[1], [x3], x5
-  ld3.b {v4, v5, v6}[13], [x3], #3
-  ld3.h {v4, v5, v6}[2], [x3], #6
-  ld3.s {v4, v5, v6}[2], [x3], #12
-  ld3.d {v4, v5, v6}[1], [x3], #24
-
-
-; CHECK: _ld3:
-; CHECK: ld3.b { v4, v5, v6 }[13], [x3] ; encoding: [0x64,0x34,0x40,0x4d]
-; CHECK: ld3.h { v4, v5, v6 }[2], [x3] ; encoding: [0x64,0x70,0x40,0x0d]
-; CHECK: ld3.s { v4, v5, v6 }[2], [x3] ; encoding: [0x64,0xa0,0x40,0x4d]
-; CHECK: ld3.d { v4, v5, v6 }[1], [x3] ; encoding: [0x64,0xa4,0x40,0x4d]
-; CHECK: ld3.b { v4, v5, v6 }[13], [x3], x5 ; encoding: [0x64,0x34,0xc5,0x4d]
-; CHECK: ld3.h { v4, v5, v6 }[2], [x3], x5 ; encoding: [0x64,0x70,0xc5,0x0d]
-; CHECK: ld3.s { v4, v5, v6 }[2], [x3], x5 ; encoding: [0x64,0xa0,0xc5,0x4d]
-; CHECK: ld3.d { v4, v5, v6 }[1], [x3], x5 ; encoding: [0x64,0xa4,0xc5,0x4d]
-; CHECK: ld3.b { v4, v5, v6 }[13], [x3], #3 ; encoding: [0x64,0x34,0xdf,0x4d]
-; CHECK: ld3.h { v4, v5, v6 }[2], [x3], #6 ; encoding: [0x64,0x70,0xdf,0x0d]
-; CHECK: ld3.s { v4, v5, v6 }[2], [x3], #12 ; encoding: [0x64,0xa0,0xdf,0x4d]
-; CHECK: ld3.d { v4, v5, v6 }[1], [x3], #24 ; encoding: [0x64,0xa4,0xdf,0x4d]
-
-
-_ld4:
-  ld4.b {v4, v5, v6, v7}[13], [x3]
-  ld4.h {v4, v5, v6, v7}[2], [x3]
-  ld4.s {v4, v5, v6, v7}[2], [x3]
-  ld4.d {v4, v5, v6, v7}[1], [x3]
-  ld4.b {v4, v5, v6, v7}[13], [x3], x5
-  ld4.h {v4, v5, v6, v7}[2], [x3], x5
-  ld4.s {v4, v5, v6, v7}[2], [x3], x5
-  ld4.d {v4, v5, v6, v7}[1], [x3], x5
-  ld4.b {v4, v5, v6, v7}[13], [x3], #4
-  ld4.h {v4, v5, v6, v7}[2], [x3], #8
-  ld4.s {v4, v5, v6, v7}[2], [x3], #16
-  ld4.d {v4, v5, v6, v7}[1], [x3], #32
-
-; CHECK: _ld4:
-; CHECK: ld4.b { v4, v5, v6, v7 }[13], [x3] ; encoding: [0x64,0x34,0x60,0x4d]
-; CHECK: ld4.h { v4, v5, v6, v7 }[2], [x3] ; encoding: [0x64,0x70,0x60,0x0d]
-; CHECK: ld4.s { v4, v5, v6, v7 }[2], [x3] ; encoding: [0x64,0xa0,0x60,0x4d]
-; CHECK: ld4.d { v4, v5, v6, v7 }[1], [x3] ; encoding: [0x64,0xa4,0x60,0x4d]
-; CHECK: ld4.b { v4, v5, v6, v7 }[13], [x3], x5 ; encoding: [0x64,0x34,0xe5,0x4d]
-; CHECK: ld4.h { v4, v5, v6, v7 }[2], [x3], x5 ; encoding: [0x64,0x70,0xe5,0x0d]
-; CHECK: ld4.s { v4, v5, v6, v7 }[2], [x3], x5 ; encoding: [0x64,0xa0,0xe5,0x4d]
-; CHECK: ld4.d { v4, v5, v6, v7 }[1], [x3], x5 ; encoding: [0x64,0xa4,0xe5,0x4d]
-; CHECK: ld4.b { v4, v5, v6, v7 }[13], [x3], #4 ; encoding: [0x64,0x34,0xff,0x4d]
-; CHECK: ld4.h { v4, v5, v6, v7 }[2], [x3], #8 ; encoding: [0x64,0x70,0xff,0x0d]
-; CHECK: ld4.s { v4, v5, v6, v7 }[2], [x3], #16 ; encoding: [0x64,0xa0,0xff,0x4d]
-; CHECK: ld4.d { v4, v5, v6, v7 }[1], [x3], #32 ; encoding: [0x64,0xa4,0xff,0x4d]
-
-_st1:
-  st1.b {v4}[13], [x3]
-  st1.h {v4}[2], [x3]
-  st1.s {v4}[2], [x3]
-  st1.d {v4}[1], [x3]
-  st1.b {v4}[13], [x3], x5
-  st1.h {v4}[2], [x3], x5
-  st1.s {v4}[2], [x3], x5
-  st1.d {v4}[1], [x3], x5
-  st1.b {v4}[13], [x3], #1
-  st1.h {v4}[2], [x3], #2
-  st1.s {v4}[2], [x3], #4
-  st1.d {v4}[1], [x3], #8
-
-; CHECK: _st1:
-; CHECK: st1.b { v4 }[13], [x3]        ; encoding: [0x64,0x14,0x00,0x4d]
-; CHECK: st1.h { v4 }[2], [x3]         ; encoding: [0x64,0x50,0x00,0x0d]
-; CHECK: st1.s { v4 }[2], [x3]         ; encoding: [0x64,0x80,0x00,0x4d]
-; CHECK: st1.d { v4 }[1], [x3]         ; encoding: [0x64,0x84,0x00,0x4d]
-; CHECK: st1.b { v4 }[13], [x3], x5    ; encoding: [0x64,0x14,0x85,0x4d]
-; CHECK: st1.h { v4 }[2], [x3], x5     ; encoding: [0x64,0x50,0x85,0x0d]
-; CHECK: st1.s { v4 }[2], [x3], x5     ; encoding: [0x64,0x80,0x85,0x4d]
-; CHECK: st1.d { v4 }[1], [x3], x5     ; encoding: [0x64,0x84,0x85,0x4d]
-; CHECK: st1.b { v4 }[13], [x3], #1   ; encoding: [0x64,0x14,0x9f,0x4d]
-; CHECK: st1.h { v4 }[2], [x3], #2    ; encoding: [0x64,0x50,0x9f,0x0d]
-; CHECK: st1.s { v4 }[2], [x3], #4    ; encoding: [0x64,0x80,0x9f,0x4d]
-; CHECK: st1.d { v4 }[1], [x3], #8    ; encoding: [0x64,0x84,0x9f,0x4d]
-
-_st2:
-  st2.b {v4, v5}[13], [x3]
-  st2.h {v4, v5}[2], [x3]
-  st2.s {v4, v5}[2], [x3]
-  st2.d {v4, v5}[1], [x3]
-  st2.b {v4, v5}[13], [x3], x5
-  st2.h {v4, v5}[2], [x3], x5
-  st2.s {v4, v5}[2], [x3], x5
-  st2.d {v4, v5}[1], [x3], x5
-  st2.b {v4, v5}[13], [x3], #2
-  st2.h {v4, v5}[2], [x3], #4
-  st2.s {v4, v5}[2], [x3], #8
-  st2.d {v4, v5}[1], [x3], #16
-
-; CHECK: _st2:
-; CHECK: st2.b { v4, v5 }[13], [x3]    ; encoding: [0x64,0x14,0x20,0x4d]
-; CHECK: st2.h { v4, v5 }[2], [x3]     ; encoding: [0x64,0x50,0x20,0x0d]
-; CHECK: st2.s { v4, v5 }[2], [x3]     ; encoding: [0x64,0x80,0x20,0x4d]
-; CHECK: st2.d { v4, v5 }[1], [x3]     ; encoding: [0x64,0x84,0x20,0x4d]
-; CHECK: st2.b { v4, v5 }[13], [x3], x5 ; encoding: [0x64,0x14,0xa5,0x4d]
-; CHECK: st2.h { v4, v5 }[2], [x3], x5 ; encoding: [0x64,0x50,0xa5,0x0d]
-; CHECK: st2.s { v4, v5 }[2], [x3], x5 ; encoding: [0x64,0x80,0xa5,0x4d]
-; CHECK: st2.d { v4, v5 }[1], [x3], x5 ; encoding: [0x64,0x84,0xa5,0x4d]
-; CHECK: st2.b { v4, v5 }[13], [x3], #2 ; encoding: [0x64,0x14,0xbf,0x4d]
-; CHECK: st2.h { v4, v5 }[2], [x3], #4 ; encoding: [0x64,0x50,0xbf,0x0d]
-; CHECK: st2.s { v4, v5 }[2], [x3], #8 ; encoding: [0x64,0x80,0xbf,0x4d]
-; CHECK: st2.d { v4, v5 }[1], [x3], #16 ; encoding: [0x64,0x84,0xbf,0x4d]
-
-
-_st3:
-  st3.b {v4, v5, v6}[13], [x3]
-  st3.h {v4, v5, v6}[2], [x3]
-  st3.s {v4, v5, v6}[2], [x3]
-  st3.d {v4, v5, v6}[1], [x3]
-  st3.b {v4, v5, v6}[13], [x3], x5
-  st3.h {v4, v5, v6}[2], [x3], x5
-  st3.s {v4, v5, v6}[2], [x3], x5
-  st3.d {v4, v5, v6}[1], [x3], x5
-  st3.b {v4, v5, v6}[13], [x3], #3
-  st3.h {v4, v5, v6}[2], [x3], #6
-  st3.s {v4, v5, v6}[2], [x3], #12
-  st3.d {v4, v5, v6}[1], [x3], #24
-
-; CHECK: _st3:
-; CHECK: st3.b { v4, v5, v6 }[13], [x3] ; encoding: [0x64,0x34,0x00,0x4d]
-; CHECK: st3.h { v4, v5, v6 }[2], [x3] ; encoding: [0x64,0x70,0x00,0x0d]
-; CHECK: st3.s { v4, v5, v6 }[2], [x3] ; encoding: [0x64,0xa0,0x00,0x4d]
-; CHECK: st3.d { v4, v5, v6 }[1], [x3] ; encoding: [0x64,0xa4,0x00,0x4d]
-; CHECK: st3.b { v4, v5, v6 }[13], [x3], x5 ; encoding: [0x64,0x34,0x85,0x4d]
-; CHECK: st3.h { v4, v5, v6 }[2], [x3], x5 ; encoding: [0x64,0x70,0x85,0x0d]
-; CHECK: st3.s { v4, v5, v6 }[2], [x3], x5 ; encoding: [0x64,0xa0,0x85,0x4d]
-; CHECK: st3.d { v4, v5, v6 }[1], [x3], x5 ; encoding: [0x64,0xa4,0x85,0x4d]
-; CHECK: st3.b { v4, v5, v6 }[13], [x3], #3 ; encoding: [0x64,0x34,0x9f,0x4d]
-; CHECK: st3.h { v4, v5, v6 }[2], [x3], #6 ; encoding: [0x64,0x70,0x9f,0x0d]
-; CHECK: st3.s { v4, v5, v6 }[2], [x3], #12 ; encoding: [0x64,0xa0,0x9f,0x4d]
-; CHECK: st3.d { v4, v5, v6 }[1], [x3], #24 ; encoding: [0x64,0xa4,0x9f,0x4d]
-
-_st4:
-  st4.b {v4, v5, v6, v7}[13], [x3]
-  st4.h {v4, v5, v6, v7}[2], [x3]
-  st4.s {v4, v5, v6, v7}[2], [x3]
-  st4.d {v4, v5, v6, v7}[1], [x3]
-  st4.b {v4, v5, v6, v7}[13], [x3], x5
-  st4.h {v4, v5, v6, v7}[2], [x3], x5
-  st4.s {v4, v5, v6, v7}[2], [x3], x5
-  st4.d {v4, v5, v6, v7}[1], [x3], x5
-  st4.b {v4, v5, v6, v7}[13], [x3], #4
-  st4.h {v4, v5, v6, v7}[2], [x3], #8
-  st4.s {v4, v5, v6, v7}[2], [x3], #16
-  st4.d {v4, v5, v6, v7}[1], [x3], #32
-
-; CHECK: _st4:
-; CHECK: st4.b { v4, v5, v6, v7 }[13], [x3] ; encoding: [0x64,0x34,0x20,0x4d]
-; CHECK: st4.h { v4, v5, v6, v7 }[2], [x3] ; encoding: [0x64,0x70,0x20,0x0d]
-; CHECK: st4.s { v4, v5, v6, v7 }[2], [x3] ; encoding: [0x64,0xa0,0x20,0x4d]
-; CHECK: st4.d { v4, v5, v6, v7 }[1], [x3] ; encoding: [0x64,0xa4,0x20,0x4d]
-; CHECK: st4.b { v4, v5, v6, v7 }[13], [x3], x5 ; encoding: [0x64,0x34,0xa5,0x4d]
-; CHECK: st4.h { v4, v5, v6, v7 }[2], [x3], x5 ; encoding: [0x64,0x70,0xa5,0x0d]
-; CHECK: st4.s { v4, v5, v6, v7 }[2], [x3], x5 ; encoding: [0x64,0xa0,0xa5,0x4d]
-; CHECK: st4.d { v4, v5, v6, v7 }[1], [x3], x5 ; encoding: [0x64,0xa4,0xa5,0x4d]
-; CHECK: st4.b { v4, v5, v6, v7 }[13], [x3], #4 ; encoding: [0x64,0x34,0xbf,0x4d]
-; CHECK: st4.h { v4, v5, v6, v7 }[2], [x3], #8 ; encoding: [0x64,0x70,0xbf,0x0d]
-; CHECK: st4.s { v4, v5, v6, v7 }[2], [x3], #16 ; encoding: [0x64,0xa0,0xbf,0x4d]
-; CHECK: st4.d { v4, v5, v6, v7 }[1], [x3], #32 ; encoding: [0x64,0xa4,0xbf,0x4d]
-
-
-;---------
-; ARM verbose syntax equivalents to the above.
-;---------
-verbose_syntax:
-
-  ld1 { v1.8b }, [x1]
-  ld1 { v2.8b, v3.8b }, [x1]
-  ld1 { v3.8b, v4.8b, v5.8b }, [x1]
-  ld1 { v4.8b, v5.8b, v6.8b, v7.8b }, [x1]
-
-  ld1 { v1.16b }, [x1]
-  ld1 { v2.16b, v3.16b }, [x1]
-  ld1 { v3.16b, v4.16b, v5.16b }, [x1]
-  ld1 { v4.16b, v5.16b, v6.16b, v7.16b }, [x1]
-
-  ld1 { v1.4h }, [x1]
-  ld1 { v2.4h, v3.4h }, [x1]
-  ld1 { v3.4h, v4.4h, v5.4h }, [x1]
-  ld1 { v7.4h, v8.4h, v9.4h, v10.4h }, [x1]
-
-  ld1 { v1.8h }, [x1]
-  ld1 { v2.8h, v3.8h }, [x1]
-  ld1 { v3.8h, v4.8h, v5.8h }, [x1]
-  ld1 { v7.8h, v8.8h, v9.8h, v10.8h }, [x1]
-
-  ld1 { v1.2s }, [x1]
-  ld1 { v2.2s, v3.2s }, [x1]
-  ld1 { v3.2s, v4.2s, v5.2s }, [x1]
-  ld1 { v7.2s, v8.2s, v9.2s, v10.2s }, [x1]
-
-  ld1 { v1.4s }, [x1]
-  ld1 { v2.4s, v3.4s }, [x1]
-  ld1 { v3.4s, v4.4s, v5.4s }, [x1]
-  ld1 { v7.4s, v8.4s, v9.4s, v10.4s }, [x1]
-
-  ld1 { v1.1d }, [x1]
-  ld1 { v2.1d, v3.1d }, [x1]
-  ld1 { v3.1d, v4.1d, v5.1d }, [x1]
-  ld1 { v7.1d, v8.1d, v9.1d, v10.1d }, [x1]
-
-  ld1 { v1.2d }, [x1]
-  ld1 { v2.2d, v3.2d }, [x1]
-  ld1 { v3.2d, v4.2d, v5.2d }, [x1]
-  ld1 { v7.2d, v8.2d, v9.2d, v10.2d }, [x1]
-
-  st1 { v1.8b }, [x1]
-  st1 { v2.8b, v3.8b }, [x1]
-  st1 { v3.8b, v4.8b, v5.8b }, [x1]
-  st1 { v4.8b, v5.8b, v6.8b, v7.8b }, [x1]
-
-  st1 { v1.16b }, [x1]
-  st1 { v2.16b, v3.16b }, [x1]
-  st1 { v3.16b, v4.16b, v5.16b }, [x1]
-  st1 { v4.16b, v5.16b, v6.16b, v7.16b }, [x1]
-
-  st1 { v1.4h }, [x1]
-  st1 { v2.4h, v3.4h }, [x1]
-  st1 { v3.4h, v4.4h, v5.4h }, [x1]
-  st1 { v7.4h, v8.4h, v9.4h, v10.4h }, [x1]
-
-  st1 { v1.8h }, [x1]
-  st1 { v2.8h, v3.8h }, [x1]
-  st1 { v3.8h, v4.8h, v5.8h }, [x1]
-  st1 { v7.8h, v8.8h, v9.8h, v10.8h }, [x1]
-
-  st1 { v1.2s }, [x1]
-  st1 { v2.2s, v3.2s }, [x1]
-  st1 { v3.2s, v4.2s, v5.2s }, [x1]
-  st1 { v7.2s, v8.2s, v9.2s, v10.2s }, [x1]
-
-  st1 { v1.4s }, [x1]
-  st1 { v2.4s, v3.4s }, [x1]
-  st1 { v3.4s, v4.4s, v5.4s }, [x1]
-  st1 { v7.4s, v8.4s, v9.4s, v10.4s }, [x1]
-
-  st1 { v1.1d }, [x1]
-  st1 { v2.1d, v3.1d }, [x1]
-  st1 { v3.1d, v4.1d, v5.1d }, [x1]
-  st1 { v7.1d, v8.1d, v9.1d, v10.1d }, [x1]
-
-  st1 { v1.2d }, [x1]
-  st1 { v2.2d, v3.2d }, [x1]
-  st1 { v3.2d, v4.2d, v5.2d }, [x1]
-  st1 { v7.2d, v8.2d, v9.2d, v10.2d }, [x1]
-
-  ld2 { v3.8b, v4.8b }, [x19]
-  ld2 { v3.16b, v4.16b }, [x19]
-  ld2 { v3.4h, v4.4h }, [x19]
-  ld2 { v3.8h, v4.8h }, [x19]
-  ld2 { v3.2s, v4.2s }, [x19]
-  ld2 { v3.4s, v4.4s }, [x19]
-  ld2 { v3.2d, v4.2d }, [x19]
-
-  st2 { v3.8b, v4.8b }, [x19]
-  st2 { v3.16b, v4.16b }, [x19]
-  st2 { v3.4h, v4.4h }, [x19]
-  st2 { v3.8h, v4.8h }, [x19]
-  st2 { v3.2s, v4.2s }, [x19]
-  st2 { v3.4s, v4.4s }, [x19]
-  st2 { v3.2d, v4.2d }, [x19]
-
-  ld3 { v2.8b, v3.8b, v4.8b }, [x19]
-  ld3 { v2.16b, v3.16b, v4.16b }, [x19]
-  ld3 { v2.4h, v3.4h, v4.4h }, [x19]
-  ld3 { v2.8h, v3.8h, v4.8h }, [x19]
-  ld3 { v2.2s, v3.2s, v4.2s }, [x19]
-  ld3 { v2.4s, v3.4s, v4.4s }, [x19]
-  ld3 { v2.2d, v3.2d, v4.2d }, [x19]
-
-  st3 { v2.8b, v3.8b, v4.8b }, [x19]
-  st3 { v2.16b, v3.16b, v4.16b }, [x19]
-  st3 { v2.4h, v3.4h, v4.4h }, [x19]
-  st3 { v2.8h, v3.8h, v4.8h }, [x19]
-  st3 { v2.2s, v3.2s, v4.2s }, [x19]
-  st3 { v2.4s, v3.4s, v4.4s }, [x19]
-  st3 { v2.2d, v3.2d, v4.2d }, [x19]
-
-  ld4 { v2.8b, v3.8b, v4.8b, v5.8b }, [x19]
-  ld4 { v2.16b, v3.16b, v4.16b, v5.16b }, [x19]
-  ld4 { v2.4h, v3.4h, v4.4h, v5.4h }, [x19]
-  ld4 { v2.8h, v3.8h, v4.8h, v5.8h }, [x19]
-  ld4 { v2.2s, v3.2s, v4.2s, v5.2s }, [x19]
-  ld4 { v2.4s, v3.4s, v4.4s, v5.4s }, [x19]
-  ld4 { v2.2d, v3.2d, v4.2d, v5.2d }, [x19]
-
-  st4 { v2.8b, v3.8b, v4.8b, v5.8b }, [x19]
-  st4 { v2.16b, v3.16b, v4.16b, v5.16b }, [x19]
-  st4 { v2.4h, v3.4h, v4.4h, v5.4h }, [x19]
-  st4 { v2.8h, v3.8h, v4.8h, v5.8h }, [x19]
-  st4 { v2.2s, v3.2s, v4.2s, v5.2s }, [x19]
-  st4 { v2.4s, v3.4s, v4.4s, v5.4s }, [x19]
-  st4 { v2.2d, v3.2d, v4.2d, v5.2d }, [x19]
-
-  ld1 { v1.8b }, [x1], x15
-  ld1 { v2.8b, v3.8b }, [x1], x15
-  ld1 { v3.8b, v4.8b, v5.8b }, [x1], x15
-  ld1 { v4.8b, v5.8b, v6.8b, v7.8b }, [x1], x15
-
-  ld1 { v1.16b }, [x1], x15
-  ld1 { v2.16b, v3.16b }, [x1], x15
-  ld1 { v3.16b, v4.16b, v5.16b }, [x1], x15
-  ld1 { v4.16b, v5.16b, v6.16b, v7.16b }, [x1], x15
-
-  ld1 { v1.4h }, [x1], x15
-  ld1 { v2.4h, v3.4h }, [x1], x15
-  ld1 { v3.4h, v4.4h, v5.4h }, [x1], x15
-  ld1 { v7.4h, v8.4h, v9.4h, v10.4h }, [x1], x15
-
-  ld1 { v1.8h }, [x1], x15
-  ld1 { v2.8h, v3.8h }, [x1], x15
-  ld1 { v3.8h, v4.8h, v5.8h }, [x1], x15
-  ld1 { v7.8h, v8.8h, v9.8h, v10.8h }, [x1], x15
-
-  ld1 { v1.2s }, [x1], x15
-  ld1 { v2.2s, v3.2s }, [x1], x15
-  ld1 { v3.2s, v4.2s, v5.2s }, [x1], x15
-  ld1 { v7.2s, v8.2s, v9.2s, v10.2s }, [x1], x15
-
-  ld1 { v1.4s }, [x1], x15
-  ld1 { v2.4s, v3.4s }, [x1], x15
-  ld1 { v3.4s, v4.4s, v5.4s }, [x1], x15
-  ld1 { v7.4s, v8.4s, v9.4s, v10.4s }, [x1], x15
-
-  ld1 { v1.1d }, [x1], x15
-  ld1 { v2.1d, v3.1d }, [x1], x15
-  ld1 { v3.1d, v4.1d, v5.1d }, [x1], x15
-  ld1 { v7.1d, v8.1d, v9.1d, v10.1d }, [x1], x15
-
-  ld1 { v1.2d }, [x1], x15
-  ld1 { v2.2d, v3.2d }, [x1], x15
-  ld1 { v3.2d, v4.2d, v5.2d }, [x1], x15
-  ld1 { v7.2d, v8.2d, v9.2d, v10.2d }, [x1], x15
-
-  st1 { v1.8b }, [x1], x15
-  st1 { v2.8b, v3.8b }, [x1], x15
-  st1 { v3.8b, v4.8b, v5.8b }, [x1], x15
-  st1 { v4.8b, v5.8b, v6.8b, v7.8b }, [x1], x15
-
-  st1 { v1.16b }, [x1], x15
-  st1 { v2.16b, v3.16b }, [x1], x15
-  st1 { v3.16b, v4.16b, v5.16b }, [x1], x15
-  st1 { v4.16b, v5.16b, v6.16b, v7.16b }, [x1], x15
-
-  st1 { v1.4h }, [x1], x15
-  st1 { v2.4h, v3.4h }, [x1], x15
-  st1 { v3.4h, v4.4h, v5.4h }, [x1], x15
-  st1 { v7.4h, v8.4h, v9.4h, v10.4h }, [x1], x15
-
-  st1 { v1.8h }, [x1], x15
-  st1 { v2.8h, v3.8h }, [x1], x15
-  st1 { v3.8h, v4.8h, v5.8h }, [x1], x15
-  st1 { v7.8h, v8.8h, v9.8h, v10.8h }, [x1], x15
-
-  st1 { v1.2s }, [x1], x15
-  st1 { v2.2s, v3.2s }, [x1], x15
-  st1 { v3.2s, v4.2s, v5.2s }, [x1], x15
-  st1 { v7.2s, v8.2s, v9.2s, v10.2s }, [x1], x15
-
-  st1 { v1.4s }, [x1], x15
-  st1 { v2.4s, v3.4s }, [x1], x15
-  st1 { v3.4s, v4.4s, v5.4s }, [x1], x15
-  st1 { v7.4s, v8.4s, v9.4s, v10.4s }, [x1], x15
-
-  st1 { v1.1d }, [x1], x15
-  st1 { v2.1d, v3.1d }, [x1], x15
-  st1 { v3.1d, v4.1d, v5.1d }, [x1], x15
-  st1 { v7.1d, v8.1d, v9.1d, v10.1d }, [x1], x15
-
-  st1 { v1.2d }, [x1], x15
-  st1 { v2.2d, v3.2d }, [x1], x15
-  st1 { v3.2d, v4.2d, v5.2d }, [x1], x15
-  st1 { v7.2d, v8.2d, v9.2d, v10.2d }, [x1], x15
-
-  ld1 { v1.8b }, [x1], #8
-  ld1 { v2.8b, v3.8b }, [x1], #16
-  ld1 { v3.8b, v4.8b, v5.8b }, [x1], #24
-  ld1 { v4.8b, v5.8b, v6.8b, v7.8b }, [x1], #32
-
-  ld1 { v1.16b }, [x1], #16
-  ld1 { v2.16b, v3.16b }, [x1], #32
-  ld1 { v3.16b, v4.16b, v5.16b }, [x1], #48
-  ld1 { v4.16b, v5.16b, v6.16b, v7.16b }, [x1], #64
-
-  ld1 { v1.4h }, [x1], #8
-  ld1 { v2.4h, v3.4h }, [x1], #16
-  ld1 { v3.4h, v4.4h, v5.4h }, [x1], #24
-  ld1 { v7.4h, v8.4h, v9.4h, v10.4h }, [x1], #32
-
-  ld1 { v1.8h }, [x1], #16
-  ld1 { v2.8h, v3.8h }, [x1], #32
-  ld1 { v3.8h, v4.8h, v5.8h }, [x1], #48
-  ld1 { v7.8h, v8.8h, v9.8h, v10.8h }, [x1], #64
-
-  ld1 { v1.2s }, [x1], #8
-  ld1 { v2.2s, v3.2s }, [x1], #16
-  ld1 { v3.2s, v4.2s, v5.2s }, [x1], #24
-  ld1 { v7.2s, v8.2s, v9.2s, v10.2s }, [x1], #32
-
-  ld1 { v1.4s }, [x1], #16
-  ld1 { v2.4s, v3.4s }, [x1], #32
-  ld1 { v3.4s, v4.4s, v5.4s }, [x1], #48
-  ld1 { v7.4s, v8.4s, v9.4s, v10.4s }, [x1], #64
-
-  ld1 { v1.1d }, [x1], #8
-  ld1 { v2.1d, v3.1d }, [x1], #16
-  ld1 { v3.1d, v4.1d, v5.1d }, [x1], #24
-  ld1 { v7.1d, v8.1d, v9.1d, v10.1d }, [x1], #32
-
-  ld1 { v1.2d }, [x1], #16
-  ld1 { v2.2d, v3.2d }, [x1], #32
-  ld1 { v3.2d, v4.2d, v5.2d }, [x1], #48
-  ld1 { v7.2d, v8.2d, v9.2d, v10.2d }, [x1], #64
-
-  st1 { v1.8b }, [x1], #8
-  st1 { v2.8b, v3.8b }, [x1], #16
-  st1 { v3.8b, v4.8b, v5.8b }, [x1], #24
-  st1 { v4.8b, v5.8b, v6.8b, v7.8b }, [x1], #32
-
-  st1 { v1.16b }, [x1], #16
-  st1 { v2.16b, v3.16b }, [x1], #32
-  st1 { v3.16b, v4.16b, v5.16b }, [x1], #48
-  st1 { v4.16b, v5.16b, v6.16b, v7.16b }, [x1], #64
-
-  st1 { v1.4h }, [x1], #8
-  st1 { v2.4h, v3.4h }, [x1], #16
-  st1 { v3.4h, v4.4h, v5.4h }, [x1], #24
-  st1 { v7.4h, v8.4h, v9.4h, v10.4h }, [x1], #32
-
-  st1 { v1.8h }, [x1], #16
-  st1 { v2.8h, v3.8h }, [x1], #32
-  st1 { v3.8h, v4.8h, v5.8h }, [x1], #48
-  st1 { v7.8h, v8.8h, v9.8h, v10.8h }, [x1], #64
-
-  st1 { v1.2s }, [x1], #8
-  st1 { v2.2s, v3.2s }, [x1], #16
-  st1 { v3.2s, v4.2s, v5.2s }, [x1], #24
-  st1 { v7.2s, v8.2s, v9.2s, v10.2s }, [x1], #32
-
-  st1 { v1.4s }, [x1], #16
-  st1 { v2.4s, v3.4s }, [x1], #32
-  st1 { v3.4s, v4.4s, v5.4s }, [x1], #48
-  st1 { v7.4s, v8.4s, v9.4s, v10.4s }, [x1], #64
-
-  st1 { v1.1d }, [x1], #8
-  st1 { v2.1d, v3.1d }, [x1], #16
-  st1 { v3.1d, v4.1d, v5.1d }, [x1], #24
-  st1 { v7.1d, v8.1d, v9.1d, v10.1d }, [x1], #32
-
-  st1 { v1.2d }, [x1], #16
-  st1 { v2.2d, v3.2d }, [x1], #32
-  st1 { v3.2d, v4.2d, v5.2d }, [x1], #48
-  st1 { v7.2d, v8.2d, v9.2d, v10.2d }, [x1], #64
-
-  ld2 { v2.8b, v3.8b }, [x1], x15
-  ld2 { v2.16b, v3.16b }, [x1], x15
-  ld2 { v2.4h, v3.4h }, [x1], x15
-  ld2 { v2.8h, v3.8h }, [x1], x15
-  ld2 { v2.2s, v3.2s }, [x1], x15
-  ld2 { v2.4s, v3.4s }, [x1], x15
-  ld2 { v2.2d, v3.2d }, [x1], x15
-
-  st2 { v2.8b, v3.8b }, [x1], x15
-  st2 { v2.16b, v3.16b }, [x1], x15
-  st2 { v2.4h, v3.4h }, [x1], x15
-  st2 { v2.8h, v3.8h }, [x1], x15
-  st2 { v2.2s, v3.2s }, [x1], x15
-  st2 { v2.4s, v3.4s }, [x1], x15
-  st2 { v2.2d, v3.2d }, [x1], x15
-
-  ld2 { v2.8b, v3.8b }, [x1], #16
-  ld2 { v2.16b, v3.16b }, [x1], #32
-  ld2 { v2.4h, v3.4h }, [x1], #16
-  ld2 { v2.8h, v3.8h }, [x1], #32
-  ld2 { v2.2s, v3.2s }, [x1], #16
-  ld2 { v2.4s, v3.4s }, [x1], #32
-  ld2 { v2.2d, v3.2d }, [x1], #32
-
-  st2 { v2.8b, v3.8b }, [x1], #16
-  st2 { v2.16b, v3.16b }, [x1], #32
-  st2 { v2.4h, v3.4h }, [x1], #16
-  st2 { v2.8h, v3.8h }, [x1], #32
-  st2 { v2.2s, v3.2s }, [x1], #16
-  st2 { v2.4s, v3.4s }, [x1], #32
-  st2 { v2.2d, v3.2d }, [x1], #32
-
-  ld3 { v3.8b, v4.8b, v5.8b }, [x1], x15
-  ld3 { v3.16b, v4.16b, v5.16b }, [x1], x15
-  ld3 { v3.4h, v4.4h, v5.4h }, [x1], x15
-  ld3 { v3.8h, v4.8h, v5.8h }, [x1], x15
-  ld3 { v3.2s, v4.2s, v5.2s }, [x1], x15
-  ld3 { v3.4s, v4.4s, v5.4s }, [x1], x15
-  ld3 { v3.2d, v4.2d, v5.2d }, [x1], x15
-
-  st3 { v3.8b, v4.8b, v5.8b }, [x1], x15
-  st3 { v3.16b, v4.16b, v5.16b }, [x1], x15
-  st3 { v3.4h, v4.4h, v5.4h }, [x1], x15
-  st3 { v3.8h, v4.8h, v5.8h }, [x1], x15
-  st3 { v3.2s, v4.2s, v5.2s }, [x1], x15
-  st3 { v3.4s, v4.4s, v5.4s }, [x1], x15
-  st3 { v3.2d, v4.2d, v5.2d }, [x1], x15
-  ld3 { v3.8b, v4.8b, v5.8b }, [x1], #24
-
-  ld3 { v3.16b, v4.16b, v5.16b }, [x1], #48
-  ld3 { v3.4h, v4.4h, v5.4h }, [x1], #24
-  ld3 { v3.8h, v4.8h, v5.8h }, [x1], #48
-  ld3 { v3.2s, v4.2s, v5.2s }, [x1], #24
-  ld3 { v3.4s, v4.4s, v5.4s }, [x1], #48
-  ld3 { v3.2d, v4.2d, v5.2d }, [x1], #48
-
-  st3 { v3.8b, v4.8b, v5.8b }, [x1], #24
-  st3 { v3.16b, v4.16b, v5.16b }, [x1], #48
-  st3 { v3.4h, v4.4h, v5.4h }, [x1], #24
-  st3 { v3.8h, v4.8h, v5.8h }, [x1], #48
-  st3 { v3.2s, v4.2s, v5.2s }, [x1], #24
-  st3 { v3.4s, v4.4s, v5.4s }, [x1], #48
-  st3 { v3.2d, v4.2d, v5.2d }, [x1], #48
-
-  ld4 { v4.8b, v5.8b, v6.8b, v7.8b }, [x1], x15
-  ld4 { v4.16b, v5.16b, v6.16b, v7.16b }, [x1], x15
-  ld4 { v7.4h, v8.4h, v9.4h, v10.4h }, [x1], x15
-  ld4 { v7.8h, v8.8h, v9.8h, v10.8h }, [x1], x15
-  ld4 { v7.2s, v8.2s, v9.2s, v10.2s }, [x1], x15
-  ld4 { v7.4s, v8.4s, v9.4s, v10.4s }, [x1], x15
-  ld4 { v7.2d, v8.2d, v9.2d, v10.2d }, [x1], x15
-
-  st4 { v4.8b, v5.8b, v6.8b, v7.8b }, [x1], x15
-  st4 { v4.16b, v5.16b, v6.16b, v7.16b }, [x1], x15
-  st4 { v7.4h, v8.4h, v9.4h, v10.4h }, [x1], x15
-  st4 { v7.8h, v8.8h, v9.8h, v10.8h }, [x1], x15
-  st4 { v7.2s, v8.2s, v9.2s, v10.2s }, [x1], x15
-  st4 { v7.4s, v8.4s, v9.4s, v10.4s }, [x1], x15
-  st4 { v7.2d, v8.2d, v9.2d, v10.2d }, [x1], x15
-
-  ld4 { v4.8b, v5.8b, v6.8b, v7.8b }, [x1], #32
-  ld4 { v4.16b, v5.16b, v6.16b, v7.16b }, [x1], #64
-  ld4 { v7.4h, v8.4h, v9.4h, v10.4h }, [x1], #32
-  ld4 { v7.8h, v8.8h, v9.8h, v10.8h }, [x1], #64
-  ld4 { v7.2s, v8.2s, v9.2s, v10.2s }, [x1], #32
-  ld4 { v7.4s, v8.4s, v9.4s, v10.4s }, [x1], #64
-  ld4 { v7.2d, v8.2d, v9.2d, v10.2d }, [x1], #64
-
-  st4 { v4.8b, v5.8b, v6.8b, v7.8b }, [x1], #32
-  st4 { v4.16b, v5.16b, v6.16b, v7.16b }, [x1], #64
-  st4 { v7.4h, v8.4h, v9.4h, v10.4h }, [x1], #32
-  st4 { v7.8h, v8.8h, v9.8h, v10.8h }, [x1], #64
-  st4 { v7.2s, v8.2s, v9.2s, v10.2s }, [x1], #32
-  st4 { v7.4s, v8.4s, v9.4s, v10.4s }, [x1], #64
-  st4 { v7.2d, v8.2d, v9.2d, v10.2d }, [x1], #64
-
-
-  ld1r { v12.8b }, [x2]
-  ld1r { v12.8b }, [x2], x3
-  ld1r { v12.16b }, [x2]
-  ld1r { v12.16b }, [x2], x3
-  ld1r { v12.4h }, [x2]
-  ld1r { v12.4h }, [x2], x3
-  ld1r { v12.8h }, [x2]
-  ld1r { v12.8h }, [x2], x3
-  ld1r { v12.2s }, [x2]
-  ld1r { v12.2s }, [x2], x3
-  ld1r { v12.4s }, [x2]
-  ld1r { v12.4s }, [x2], x3
-  ld1r { v12.1d }, [x2]
-  ld1r { v12.1d }, [x2], x3
-  ld1r { v12.2d }, [x2]
-  ld1r { v12.2d }, [x2], x3
-
-  ld1r { v12.8b }, [x2], #1
-  ld1r { v12.16b }, [x2], #1
-  ld1r { v12.4h }, [x2], #2
-  ld1r { v12.8h }, [x2], #2
-  ld1r { v12.2s }, [x2], #4
-  ld1r { v12.4s }, [x2], #4
-  ld1r { v12.1d }, [x2], #8
-  ld1r { v12.2d }, [x2], #8
-  ld2r { v3.8b, v4.8b }, [x2]
-  ld2r { v3.8b, v4.8b }, [x2], x3
-  ld2r { v3.16b, v4.16b }, [x2]
-  ld2r { v3.16b, v4.16b }, [x2], x3
-  ld2r { v3.4h, v4.4h }, [x2]
-  ld2r { v3.4h, v4.4h }, [x2], x3
-  ld2r { v3.8h, v4.8h }, [x2]
-  ld2r { v3.8h, v4.8h }, [x2], x3
-  ld2r { v3.2s, v4.2s }, [x2]
-  ld2r { v3.2s, v4.2s }, [x2], x3
-  ld2r { v3.4s, v4.4s }, [x2]
-  ld2r { v3.4s, v4.4s }, [x2], x3
-  ld2r { v3.1d, v4.1d }, [x2]
-  ld2r { v3.1d, v4.1d }, [x2], x3
-  ld2r { v3.2d, v4.2d }, [x2]
-  ld2r { v3.2d, v4.2d }, [x2], x3
-
-  ld2r { v3.8b, v4.8b }, [x2], #2
-  ld2r { v3.16b, v4.16b }, [x2], #2
-  ld2r { v3.4h, v4.4h }, [x2], #4
-  ld2r { v3.8h, v4.8h }, [x2], #4
-  ld2r { v3.2s, v4.2s }, [x2], #8
-  ld2r { v3.4s, v4.4s }, [x2], #8
-  ld2r { v3.1d, v4.1d }, [x2], #16
-  ld2r { v3.2d, v4.2d }, [x2], #16
-
-  ld3r { v2.8b, v3.8b, v4.8b }, [x2]
-  ld3r { v2.8b, v3.8b, v4.8b }, [x2], x3
-  ld3r { v2.16b, v3.16b, v4.16b }, [x2]
-  ld3r { v2.16b, v3.16b, v4.16b }, [x2], x3
-  ld3r { v2.4h, v3.4h, v4.4h }, [x2]
-  ld3r { v2.4h, v3.4h, v4.4h }, [x2], x3
-  ld3r { v2.8h, v3.8h, v4.8h }, [x2]
-  ld3r { v2.8h, v3.8h, v4.8h }, [x2], x3
-  ld3r { v2.2s, v3.2s, v4.2s }, [x2]
-  ld3r { v2.2s, v3.2s, v4.2s }, [x2], x3
-  ld3r { v2.4s, v3.4s, v4.4s }, [x2]
-  ld3r { v2.4s, v3.4s, v4.4s }, [x2], x3
-  ld3r { v2.1d, v3.1d, v4.1d }, [x2]
-  ld3r { v2.1d, v3.1d, v4.1d }, [x2], x3
-  ld3r { v2.2d, v3.2d, v4.2d }, [x2]
-  ld3r { v2.2d, v3.2d, v4.2d }, [x2], x3
-
-  ld3r { v2.8b, v3.8b, v4.8b }, [x2], #3
-  ld3r { v2.16b, v3.16b, v4.16b }, [x2], #3
-  ld3r { v2.4h, v3.4h, v4.4h }, [x2], #6
-  ld3r { v2.8h, v3.8h, v4.8h }, [x2], #6
-  ld3r { v2.2s, v3.2s, v4.2s }, [x2], #12
-  ld3r { v2.4s, v3.4s, v4.4s }, [x2], #12
-  ld3r { v2.1d, v3.1d, v4.1d }, [x2], #24
-  ld3r { v2.2d, v3.2d, v4.2d }, [x2], #24
-
-  ld4r { v2.8b, v3.8b, v4.8b, v5.8b }, [x2]
-  ld4r { v2.8b, v3.8b, v4.8b, v5.8b }, [x2], x3
-  ld4r { v2.16b, v3.16b, v4.16b, v5.16b }, [x2]
-  ld4r { v2.16b, v3.16b, v4.16b, v5.16b }, [x2], x3
-  ld4r { v2.4h, v3.4h, v4.4h, v5.4h }, [x2]
-  ld4r { v2.4h, v3.4h, v4.4h, v5.4h }, [x2], x3
-  ld4r { v2.8h, v3.8h, v4.8h, v5.8h }, [x2]
-  ld4r { v2.8h, v3.8h, v4.8h, v5.8h }, [x2], x3
-  ld4r { v2.2s, v3.2s, v4.2s, v5.2s }, [x2]
-  ld4r { v2.2s, v3.2s, v4.2s, v5.2s }, [x2], x3
-  ld4r { v2.4s, v3.4s, v4.4s, v5.4s }, [x2]
-  ld4r { v2.4s, v3.4s, v4.4s, v5.4s }, [x2], x3
-  ld4r { v2.1d, v3.1d, v4.1d, v5.1d }, [x2]
-  ld4r { v2.1d, v3.1d, v4.1d, v5.1d }, [x2], x3
-  ld4r { v2.2d, v3.2d, v4.2d, v5.2d }, [x2]
-  ld4r { v2.2d, v3.2d, v4.2d, v5.2d }, [x2], x3
-
-  ld4r { v2.8b, v3.8b, v4.8b, v5.8b }, [x2], #4
-  ld4r { v2.16b, v3.16b, v4.16b, v5.16b }, [x2], #4
-  ld4r { v2.4h, v3.4h, v4.4h, v5.4h }, [x2], #8
-  ld4r { v2.8h, v3.8h, v4.8h, v5.8h }, [x2], #8
-  ld4r { v2.2s, v3.2s, v4.2s, v5.2s }, [x2], #16
-  ld4r { v2.4s, v3.4s, v4.4s, v5.4s }, [x2], #16
-  ld4r { v2.1d, v3.1d, v4.1d, v5.1d }, [x2], #32
-  ld4r { v2.2d, v3.2d, v4.2d, v5.2d }, [x2], #32
-
-  ld1 { v6.b }[13], [x3]
-  ld1 { v6.h }[2], [x3]
-  ld1 { v6.s }[2], [x3]
-  ld1 { v6.d }[1], [x3]
-  ld1 { v6.b }[13], [x3], x5
-  ld1 { v6.h }[2], [x3], x5
-  ld1 { v6.s }[2], [x3], x5
-  ld1 { v6.d }[1], [x3], x5
-  ld1 { v6.b }[13], [x3], #1
-  ld1 { v6.h }[2], [x3], #2
-  ld1 { v6.s }[2], [x3], #4
-  ld1 { v6.d }[1], [x3], #8
-
-  ld2 { v5.b, v6.b }[13], [x3]
-  ld2 { v5.h, v6.h }[2], [x3]
-  ld2 { v5.s, v6.s }[2], [x3]
-  ld2 { v5.d, v6.d }[1], [x3]
-  ld2 { v5.b, v6.b }[13], [x3], x5
-  ld2 { v5.h, v6.h }[2], [x3], x5
-  ld2 { v5.s, v6.s }[2], [x3], x5
-  ld2 { v5.d, v6.d }[1], [x3], x5
-  ld2 { v5.b, v6.b }[13], [x3], #2
-  ld2 { v5.h, v6.h }[2], [x3], #4
-  ld2 { v5.s, v6.s }[2], [x3], #8
-  ld2 { v5.d, v6.d }[1], [x3], #16
-
-  ld3 { v7.b, v8.b, v9.b }[13], [x3]
-  ld3 { v7.h, v8.h, v9.h }[2], [x3]
-  ld3 { v7.s, v8.s, v9.s }[2], [x3]
-  ld3 { v7.d, v8.d, v9.d }[1], [x3]
-  ld3 { v7.b, v8.b, v9.b }[13], [x3], x5
-  ld3 { v7.h, v8.h, v9.h }[2], [x3], x5
-  ld3 { v7.s, v8.s, v9.s }[2], [x3], x5
-  ld3 { v7.d, v8.d, v9.d }[1], [x3], x5
-  ld3 { v7.b, v8.b, v9.b }[13], [x3], #3
-  ld3 { v7.h, v8.h, v9.h }[2], [x3], #6
-  ld3 { v7.s, v8.s, v9.s }[2], [x3], #12
-  ld3 { v7.d, v8.d, v9.d }[1], [x3], #24
-
-  ld4 { v7.b, v8.b, v9.b, v10.b }[13], [x3]
-  ld4 { v7.h, v8.h, v9.h, v10.h }[2], [x3]
-  ld4 { v7.s, v8.s, v9.s, v10.s }[2], [x3]
-  ld4 { v7.d, v8.d, v9.d, v10.d }[1], [x3]
-  ld4 { v7.b, v8.b, v9.b, v10.b }[13], [x3], x5
-  ld4 { v7.h, v8.h, v9.h, v10.h }[2], [x3], x5
-  ld4 { v7.s, v8.s, v9.s, v10.s }[2], [x3], x5
-  ld4 { v7.d, v8.d, v9.d, v10.d }[1], [x3], x5
-  ld4 { v7.b, v8.b, v9.b, v10.b }[13], [x3], #4
-  ld4 { v7.h, v8.h, v9.h, v10.h }[2], [x3], #8
-  ld4 { v7.s, v8.s, v9.s, v10.s }[2], [x3], #16
-  ld4 { v7.d, v8.d, v9.d, v10.d }[1], [x3], #32
-
-  st1 { v6.b }[13], [x3]
-  st1 { v6.h }[2], [x3]
-  st1 { v6.s }[2], [x3]
-  st1 { v6.d }[1], [x3]
-  st1 { v6.b }[13], [x3], x5
-  st1 { v6.h }[2], [x3], x5
-  st1 { v6.s }[2], [x3], x5
-  st1 { v6.d }[1], [x3], x5
-  st1 { v6.b }[13], [x3], #1
-  st1 { v6.h }[2], [x3], #2
-  st1 { v6.s }[2], [x3], #4
-  st1 { v6.d }[1], [x3], #8
-
-
-  st2 { v5.b, v6.b }[13], [x3]
-  st2 { v5.h, v6.h }[2], [x3]
-  st2 { v5.s, v6.s }[2], [x3]
-  st2 { v5.d, v6.d }[1], [x3]
-  st2 { v5.b, v6.b }[13], [x3], x5
-  st2 { v5.h, v6.h }[2], [x3], x5
-  st2 { v5.s, v6.s }[2], [x3], x5
-  st2 { v5.d, v6.d }[1], [x3], x5
-  st2 { v5.b, v6.b }[13], [x3], #2
-  st2 { v5.h, v6.h }[2], [x3], #4
-  st2 { v5.s, v6.s }[2], [x3], #8
-  st2 { v5.d, v6.d }[1], [x3], #16
-
-  st3 { v7.b, v8.b, v9.b }[13], [x3]
-  st3 { v7.h, v8.h, v9.h }[2], [x3]
-  st3 { v7.s, v8.s, v9.s }[2], [x3]
-  st3 { v7.d, v8.d, v9.d }[1], [x3]
-  st3 { v7.b, v8.b, v9.b }[13], [x3], x5
-  st3 { v7.h, v8.h, v9.h }[2], [x3], x5
-  st3 { v7.s, v8.s, v9.s }[2], [x3], x5
-  st3 { v7.d, v8.d, v9.d }[1], [x3], x5
-  st3 { v7.b, v8.b, v9.b }[13], [x3], #3
-  st3 { v7.h, v8.h, v9.h }[2], [x3], #6
-  st3 { v7.s, v8.s, v9.s }[2], [x3], #12
-  st3 { v7.d, v8.d, v9.d }[1], [x3], #24
-
-  st4 { v7.b, v8.b, v9.b, v10.b }[13], [x3]
-  st4 { v7.h, v8.h, v9.h, v10.h }[2], [x3]
-  st4 { v7.s, v8.s, v9.s, v10.s }[2], [x3]
-  st4 { v7.d, v8.d, v9.d, v10.d }[1], [x3]
-  st4 { v7.b, v8.b, v9.b, v10.b }[13], [x3], x5
-  st4 { v7.h, v8.h, v9.h, v10.h }[2], [x3], x5
-  st4 { v7.s, v8.s, v9.s, v10.s }[2], [x3], x5
-  st4 { v7.d, v8.d, v9.d, v10.d }[1], [x3], x5
-  st4 { v7.b, v8.b, v9.b, v10.b }[13], [x3], #4
-  st4 { v7.h, v8.h, v9.h, v10.h }[2], [x3], #8
-  st4 { v7.s, v8.s, v9.s, v10.s }[2], [x3], #16
-  st4 { v7.d, v8.d, v9.d, v10.d }[1], [x3], #32
-
-; CHECK: ld1.8b	{ v1 }, [x1]            ; encoding: [0x21,0x70,0x40,0x0c]
-; CHECK: ld1.8b	{ v2, v3 }, [x1]        ; encoding: [0x22,0xa0,0x40,0x0c]
-; CHECK: ld1.8b	{ v3, v4, v5 }, [x1]    ; encoding: [0x23,0x60,0x40,0x0c]
-; CHECK: ld1.8b	{ v4, v5, v6, v7 }, [x1] ; encoding: [0x24,0x20,0x40,0x0c]
-; CHECK: ld1.16b	{ v1 }, [x1]            ; encoding: [0x21,0x70,0x40,0x4c]
-; CHECK: ld1.16b	{ v2, v3 }, [x1]        ; encoding: [0x22,0xa0,0x40,0x4c]
-; CHECK: ld1.16b	{ v3, v4, v5 }, [x1]    ; encoding: [0x23,0x60,0x40,0x4c]
-; CHECK: ld1.16b	{ v4, v5, v6, v7 }, [x1] ; encoding: [0x24,0x20,0x40,0x4c]
-; CHECK: ld1.4h	{ v1 }, [x1]            ; encoding: [0x21,0x74,0x40,0x0c]
-; CHECK: ld1.4h	{ v2, v3 }, [x1]        ; encoding: [0x22,0xa4,0x40,0x0c]
-; CHECK: ld1.4h	{ v3, v4, v5 }, [x1]    ; encoding: [0x23,0x64,0x40,0x0c]
-; CHECK: ld1.4h	{ v7, v8, v9, v10 }, [x1] ; encoding: [0x27,0x24,0x40,0x0c]
-; CHECK: ld1.8h	{ v1 }, [x1]            ; encoding: [0x21,0x74,0x40,0x4c]
-; CHECK: ld1.8h	{ v2, v3 }, [x1]        ; encoding: [0x22,0xa4,0x40,0x4c]
-; CHECK: ld1.8h	{ v3, v4, v5 }, [x1]    ; encoding: [0x23,0x64,0x40,0x4c]
-; CHECK: ld1.8h	{ v7, v8, v9, v10 }, [x1] ; encoding: [0x27,0x24,0x40,0x4c]
-; CHECK: ld1.2s	{ v1 }, [x1]            ; encoding: [0x21,0x78,0x40,0x0c]
-; CHECK: ld1.2s	{ v2, v3 }, [x1]        ; encoding: [0x22,0xa8,0x40,0x0c]
-; CHECK: ld1.2s	{ v3, v4, v5 }, [x1]    ; encoding: [0x23,0x68,0x40,0x0c]
-; CHECK: ld1.2s	{ v7, v8, v9, v10 }, [x1] ; encoding: [0x27,0x28,0x40,0x0c]
-; CHECK: ld1.4s	{ v1 }, [x1]            ; encoding: [0x21,0x78,0x40,0x4c]
-; CHECK: ld1.4s	{ v2, v3 }, [x1]        ; encoding: [0x22,0xa8,0x40,0x4c]
-; CHECK: ld1.4s	{ v3, v4, v5 }, [x1]    ; encoding: [0x23,0x68,0x40,0x4c]
-; CHECK: ld1.4s	{ v7, v8, v9, v10 }, [x1] ; encoding: [0x27,0x28,0x40,0x4c]
-; CHECK: ld1.1d	{ v1 }, [x1]            ; encoding: [0x21,0x7c,0x40,0x0c]
-; CHECK: ld1.1d	{ v2, v3 }, [x1]        ; encoding: [0x22,0xac,0x40,0x0c]
-; CHECK: ld1.1d	{ v3, v4, v5 }, [x1]    ; encoding: [0x23,0x6c,0x40,0x0c]
-; CHECK: ld1.1d	{ v7, v8, v9, v10 }, [x1] ; encoding: [0x27,0x2c,0x40,0x0c]
-; CHECK: ld1.2d	{ v1 }, [x1]            ; encoding: [0x21,0x7c,0x40,0x4c]
-; CHECK: ld1.2d	{ v2, v3 }, [x1]        ; encoding: [0x22,0xac,0x40,0x4c]
-; CHECK: ld1.2d	{ v3, v4, v5 }, [x1]    ; encoding: [0x23,0x6c,0x40,0x4c]
-; CHECK: ld1.2d	{ v7, v8, v9, v10 }, [x1] ; encoding: [0x27,0x2c,0x40,0x4c]
-; CHECK: st1.8b	{ v1 }, [x1]            ; encoding: [0x21,0x70,0x00,0x0c]
-; CHECK: st1.8b	{ v2, v3 }, [x1]        ; encoding: [0x22,0xa0,0x00,0x0c]
-; CHECK: st1.8b	{ v3, v4, v5 }, [x1]    ; encoding: [0x23,0x60,0x00,0x0c]
-; CHECK: st1.8b	{ v4, v5, v6, v7 }, [x1] ; encoding: [0x24,0x20,0x00,0x0c]
-; CHECK: st1.16b	{ v1 }, [x1]            ; encoding: [0x21,0x70,0x00,0x4c]
-; CHECK: st1.16b	{ v2, v3 }, [x1]        ; encoding: [0x22,0xa0,0x00,0x4c]
-; CHECK: st1.16b	{ v3, v4, v5 }, [x1]    ; encoding: [0x23,0x60,0x00,0x4c]
-; CHECK: st1.16b	{ v4, v5, v6, v7 }, [x1] ; encoding: [0x24,0x20,0x00,0x4c]
-; CHECK: st1.4h	{ v1 }, [x1]            ; encoding: [0x21,0x74,0x00,0x0c]
-; CHECK: st1.4h	{ v2, v3 }, [x1]        ; encoding: [0x22,0xa4,0x00,0x0c]
-; CHECK: st1.4h	{ v3, v4, v5 }, [x1]    ; encoding: [0x23,0x64,0x00,0x0c]
-; CHECK: st1.4h	{ v7, v8, v9, v10 }, [x1] ; encoding: [0x27,0x24,0x00,0x0c]
-; CHECK: st1.8h	{ v1 }, [x1]            ; encoding: [0x21,0x74,0x00,0x4c]
-; CHECK: st1.8h	{ v2, v3 }, [x1]        ; encoding: [0x22,0xa4,0x00,0x4c]
-; CHECK: st1.8h	{ v3, v4, v5 }, [x1]    ; encoding: [0x23,0x64,0x00,0x4c]
-; CHECK: st1.8h	{ v7, v8, v9, v10 }, [x1] ; encoding: [0x27,0x24,0x00,0x4c]
-; CHECK: st1.2s	{ v1 }, [x1]            ; encoding: [0x21,0x78,0x00,0x0c]
-; CHECK: st1.2s	{ v2, v3 }, [x1]        ; encoding: [0x22,0xa8,0x00,0x0c]
-; CHECK: st1.2s	{ v3, v4, v5 }, [x1]    ; encoding: [0x23,0x68,0x00,0x0c]
-; CHECK: st1.2s	{ v7, v8, v9, v10 }, [x1] ; encoding: [0x27,0x28,0x00,0x0c]
-; CHECK: st1.4s	{ v1 }, [x1]            ; encoding: [0x21,0x78,0x00,0x4c]
-; CHECK: st1.4s	{ v2, v3 }, [x1]        ; encoding: [0x22,0xa8,0x00,0x4c]
-; CHECK: st1.4s	{ v3, v4, v5 }, [x1]    ; encoding: [0x23,0x68,0x00,0x4c]
-; CHECK: st1.4s	{ v7, v8, v9, v10 }, [x1] ; encoding: [0x27,0x28,0x00,0x4c]
-; CHECK: st1.1d	{ v1 }, [x1]            ; encoding: [0x21,0x7c,0x00,0x0c]
-; CHECK: st1.1d	{ v2, v3 }, [x1]        ; encoding: [0x22,0xac,0x00,0x0c]
-; CHECK: st1.1d	{ v3, v4, v5 }, [x1]    ; encoding: [0x23,0x6c,0x00,0x0c]
-; CHECK: st1.1d	{ v7, v8, v9, v10 }, [x1] ; encoding: [0x27,0x2c,0x00,0x0c]
-; CHECK: st1.2d	{ v1 }, [x1]            ; encoding: [0x21,0x7c,0x00,0x4c]
-; CHECK: st1.2d	{ v2, v3 }, [x1]        ; encoding: [0x22,0xac,0x00,0x4c]
-; CHECK: st1.2d	{ v3, v4, v5 }, [x1]    ; encoding: [0x23,0x6c,0x00,0x4c]
-; CHECK: st1.2d	{ v7, v8, v9, v10 }, [x1] ; encoding: [0x27,0x2c,0x00,0x4c]
-; CHECK: ld2.8b	{ v3, v4 }, [x19]       ; encoding: [0x63,0x82,0x40,0x0c]
-; CHECK: ld2.16b	{ v3, v4 }, [x19]       ; encoding: [0x63,0x82,0x40,0x4c]
-; CHECK: ld2.4h	{ v3, v4 }, [x19]       ; encoding: [0x63,0x86,0x40,0x0c]
-; CHECK: ld2.8h	{ v3, v4 }, [x19]       ; encoding: [0x63,0x86,0x40,0x4c]
-; CHECK: ld2.2s	{ v3, v4 }, [x19]       ; encoding: [0x63,0x8a,0x40,0x0c]
-; CHECK: ld2.4s	{ v3, v4 }, [x19]       ; encoding: [0x63,0x8a,0x40,0x4c]
-; CHECK: ld2.2d	{ v3, v4 }, [x19]       ; encoding: [0x63,0x8e,0x40,0x4c]
-; CHECK: st2.8b	{ v3, v4 }, [x19]       ; encoding: [0x63,0x82,0x00,0x0c]
-; CHECK: st2.16b { v3, v4 }, [x19]       ; encoding: [0x63,0x82,0x00,0x4c]
-; CHECK: st2.4h	{ v3, v4 }, [x19]       ; encoding: [0x63,0x86,0x00,0x0c]
-; CHECK: st2.8h	{ v3, v4 }, [x19]       ; encoding: [0x63,0x86,0x00,0x4c]
-; CHECK: st2.2s	{ v3, v4 }, [x19]       ; encoding: [0x63,0x8a,0x00,0x0c]
-; CHECK: st2.4s	{ v3, v4 }, [x19]       ; encoding: [0x63,0x8a,0x00,0x4c]
-; CHECK: st2.2d	{ v3, v4 }, [x19]       ; encoding: [0x63,0x8e,0x00,0x4c]
-; CHECK: ld3.8b	{ v2, v3, v4 }, [x19]   ; encoding: [0x62,0x42,0x40,0x0c]
-; CHECK: ld3.16b	{ v2, v3, v4 }, [x19]   ; encoding: [0x62,0x42,0x40,0x4c]
-; CHECK: ld3.4h	{ v2, v3, v4 }, [x19]   ; encoding: [0x62,0x46,0x40,0x0c]
-; CHECK: ld3.8h	{ v2, v3, v4 }, [x19]   ; encoding: [0x62,0x46,0x40,0x4c]
-; CHECK: ld3.2s	{ v2, v3, v4 }, [x19]   ; encoding: [0x62,0x4a,0x40,0x0c]
-; CHECK: ld3.4s	{ v2, v3, v4 }, [x19]   ; encoding: [0x62,0x4a,0x40,0x4c]
-; CHECK: ld3.2d	{ v2, v3, v4 }, [x19]   ; encoding: [0x62,0x4e,0x40,0x4c]
-; CHECK: st3.8b	{ v2, v3, v4 }, [x19]   ; encoding: [0x62,0x42,0x00,0x0c]
-; CHECK: st3.16b	{ v2, v3, v4 }, [x19]   ; encoding: [0x62,0x42,0x00,0x4c]
-; CHECK: st3.4h	{ v2, v3, v4 }, [x19]   ; encoding: [0x62,0x46,0x00,0x0c]
-; CHECK: st3.8h	{ v2, v3, v4 }, [x19]   ; encoding: [0x62,0x46,0x00,0x4c]
-; CHECK: st3.2s	{ v2, v3, v4 }, [x19]   ; encoding: [0x62,0x4a,0x00,0x0c]
-; CHECK: st3.4s	{ v2, v3, v4 }, [x19]   ; encoding: [0x62,0x4a,0x00,0x4c]
-; CHECK: st3.2d	{ v2, v3, v4 }, [x19]   ; encoding: [0x62,0x4e,0x00,0x4c]
-; CHECK: ld4.8b	{ v2, v3, v4, v5 }, [x19] ; encoding: [0x62,0x02,0x40,0x0c]
-; CHECK: ld4.16b	{ v2, v3, v4, v5 }, [x19] ; encoding: [0x62,0x02,0x40,0x4c]
-; CHECK: ld4.4h	{ v2, v3, v4, v5 }, [x19] ; encoding: [0x62,0x06,0x40,0x0c]
-; CHECK: ld4.8h	{ v2, v3, v4, v5 }, [x19] ; encoding: [0x62,0x06,0x40,0x4c]
-; CHECK: ld4.2s	{ v2, v3, v4, v5 }, [x19] ; encoding: [0x62,0x0a,0x40,0x0c]
-; CHECK: ld4.4s	{ v2, v3, v4, v5 }, [x19] ; encoding: [0x62,0x0a,0x40,0x4c]
-; CHECK: ld4.2d	{ v2, v3, v4, v5 }, [x19] ; encoding: [0x62,0x0e,0x40,0x4c]
-; CHECK: st4.8b	{ v2, v3, v4, v5 }, [x19] ; encoding: [0x62,0x02,0x00,0x0c]
-; CHECK: st4.16b	{ v2, v3, v4, v5 }, [x19] ; encoding: [0x62,0x02,0x00,0x4c]
-; CHECK: st4.4h	{ v2, v3, v4, v5 }, [x19] ; encoding: [0x62,0x06,0x00,0x0c]
-; CHECK: st4.8h	{ v2, v3, v4, v5 }, [x19] ; encoding: [0x62,0x06,0x00,0x4c]
-; CHECK: st4.2s	{ v2, v3, v4, v5 }, [x19] ; encoding: [0x62,0x0a,0x00,0x0c]
-; CHECK: st4.4s	{ v2, v3, v4, v5 }, [x19] ; encoding: [0x62,0x0a,0x00,0x4c]
-; CHECK: st4.2d	{ v2, v3, v4, v5 }, [x19] ; encoding: [0x62,0x0e,0x00,0x4c]
-; CHECK: ld1.8b	{ v1 }, [x1], x15       ; encoding: [0x21,0x70,0xcf,0x0c]
-; CHECK: ld1.8b	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0xa0,0xcf,0x0c]
-; CHECK: ld1.8b	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x60,0xcf,0x0c]
-; CHECK: ld1.8b	{ v4, v5, v6, v7 }, [x1], x15 ; encoding: [0x24,0x20,0xcf,0x0c]
-; CHECK: ld1.16b	{ v1 }, [x1], x15       ; encoding: [0x21,0x70,0xcf,0x4c]
-; CHECK: ld1.16b	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0xa0,0xcf,0x4c]
-; CHECK: ld1.16b	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x60,0xcf,0x4c]
-; CHECK: ld1.16b	{ v4, v5, v6, v7 }, [x1], x15 ; encoding: [0x24,0x20,0xcf,0x4c]
-; CHECK: ld1.4h	{ v1 }, [x1], x15       ; encoding: [0x21,0x74,0xcf,0x0c]
-; CHECK: ld1.4h	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0xa4,0xcf,0x0c]
-; CHECK: ld1.4h	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x64,0xcf,0x0c]
-; CHECK: ld1.4h	{ v7, v8, v9, v10 }, [x1], x15 ; encoding: [0x27,0x24,0xcf,0x0c]
-; CHECK: ld1.8h	{ v1 }, [x1], x15       ; encoding: [0x21,0x74,0xcf,0x4c]
-; CHECK: ld1.8h	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0xa4,0xcf,0x4c]
-; CHECK: ld1.8h	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x64,0xcf,0x4c]
-; CHECK: ld1.8h	{ v7, v8, v9, v10 }, [x1], x15 ; encoding: [0x27,0x24,0xcf,0x4c]
-; CHECK: ld1.2s	{ v1 }, [x1], x15       ; encoding: [0x21,0x78,0xcf,0x0c]
-; CHECK: ld1.2s	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0xa8,0xcf,0x0c]
-; CHECK: ld1.2s	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x68,0xcf,0x0c]
-; CHECK: ld1.2s	{ v7, v8, v9, v10 }, [x1], x15 ; encoding: [0x27,0x28,0xcf,0x0c]
-; CHECK: ld1.4s	{ v1 }, [x1], x15       ; encoding: [0x21,0x78,0xcf,0x4c]
-; CHECK: ld1.4s	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0xa8,0xcf,0x4c]
-; CHECK: ld1.4s	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x68,0xcf,0x4c]
-; CHECK: ld1.4s	{ v7, v8, v9, v10 }, [x1], x15 ; encoding: [0x27,0x28,0xcf,0x4c]
-; CHECK: ld1.1d	{ v1 }, [x1], x15       ; encoding: [0x21,0x7c,0xcf,0x0c]
-; CHECK: ld1.1d	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0xac,0xcf,0x0c]
-; CHECK: ld1.1d	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x6c,0xcf,0x0c]
-; CHECK: ld1.1d	{ v7, v8, v9, v10 }, [x1], x15 ; encoding: [0x27,0x2c,0xcf,0x0c]
-; CHECK: ld1.2d	{ v1 }, [x1], x15       ; encoding: [0x21,0x7c,0xcf,0x4c]
-; CHECK: ld1.2d	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0xac,0xcf,0x4c]
-; CHECK: ld1.2d	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x6c,0xcf,0x4c]
-; CHECK: ld1.2d	{ v7, v8, v9, v10 }, [x1], x15 ; encoding: [0x27,0x2c,0xcf,0x4c]
-; CHECK: st1.8b	{ v1 }, [x1], x15       ; encoding: [0x21,0x70,0x8f,0x0c]
-; CHECK: st1.8b	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0xa0,0x8f,0x0c]
-; CHECK: st1.8b	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x60,0x8f,0x0c]
-; CHECK: st1.8b	{ v4, v5, v6, v7 }, [x1], x15 ; encoding: [0x24,0x20,0x8f,0x0c]
-; CHECK: st1.16b	{ v1 }, [x1], x15       ; encoding: [0x21,0x70,0x8f,0x4c]
-; CHECK: st1.16b	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0xa0,0x8f,0x4c]
-; CHECK: st1.16b	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x60,0x8f,0x4c]
-; CHECK: st1.16b	{ v4, v5, v6, v7 }, [x1], x15 ; encoding: [0x24,0x20,0x8f,0x4c]
-; CHECK: st1.4h	{ v1 }, [x1], x15       ; encoding: [0x21,0x74,0x8f,0x0c]
-; CHECK: st1.4h	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0xa4,0x8f,0x0c]
-; CHECK: st1.4h	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x64,0x8f,0x0c]
-; CHECK: st1.4h	{ v7, v8, v9, v10 }, [x1], x15 ; encoding: [0x27,0x24,0x8f,0x0c]
-; CHECK: st1.8h	{ v1 }, [x1], x15       ; encoding: [0x21,0x74,0x8f,0x4c]
-; CHECK: st1.8h	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0xa4,0x8f,0x4c]
-; CHECK: st1.8h	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x64,0x8f,0x4c]
-; CHECK: st1.8h	{ v7, v8, v9, v10 }, [x1], x15 ; encoding: [0x27,0x24,0x8f,0x4c]
-; CHECK: st1.2s	{ v1 }, [x1], x15       ; encoding: [0x21,0x78,0x8f,0x0c]
-; CHECK: st1.2s	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0xa8,0x8f,0x0c]
-; CHECK: st1.2s	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x68,0x8f,0x0c]
-; CHECK: st1.2s	{ v7, v8, v9, v10 }, [x1], x15 ; encoding: [0x27,0x28,0x8f,0x0c]
-; CHECK: st1.4s	{ v1 }, [x1], x15       ; encoding: [0x21,0x78,0x8f,0x4c]
-; CHECK: st1.4s	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0xa8,0x8f,0x4c]
-; CHECK: st1.4s	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x68,0x8f,0x4c]
-; CHECK: st1.4s	{ v7, v8, v9, v10 }, [x1], x15 ; encoding: [0x27,0x28,0x8f,0x4c]
-; CHECK: st1.1d	{ v1 }, [x1], x15       ; encoding: [0x21,0x7c,0x8f,0x0c]
-; CHECK: st1.1d	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0xac,0x8f,0x0c]
-; CHECK: st1.1d	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x6c,0x8f,0x0c]
-; CHECK: st1.1d	{ v7, v8, v9, v10 }, [x1], x15 ; encoding: [0x27,0x2c,0x8f,0x0c]
-; CHECK: st1.2d	{ v1 }, [x1], x15       ; encoding: [0x21,0x7c,0x8f,0x4c]
-; CHECK: st1.2d	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0xac,0x8f,0x4c]
-; CHECK: st1.2d	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x6c,0x8f,0x4c]
-; CHECK: st1.2d	{ v7, v8, v9, v10 }, [x1], x15 ; encoding: [0x27,0x2c,0x8f,0x4c]
-; CHECK: ld1.8b	{ v1 }, [x1], #8       ; encoding: [0x21,0x70,0xdf,0x0c]
-; CHECK: ld1.8b	{ v2, v3 }, [x1], #16   ; encoding: [0x22,0xa0,0xdf,0x0c]
-; CHECK: ld1.8b	{ v3, v4, v5 }, [x1], #24 ; encoding: [0x23,0x60,0xdf,0x0c]
-; CHECK: ld1.8b	{ v4, v5, v6, v7 }, [x1], #32 ; encoding: [0x24,0x20,0xdf,0x0c]
-; CHECK: ld1.16b	{ v1 }, [x1], #16       ; encoding: [0x21,0x70,0xdf,0x4c]
-; CHECK: ld1.16b	{ v2, v3 }, [x1], #32   ; encoding: [0x22,0xa0,0xdf,0x4c]
-; CHECK: ld1.16b	{ v3, v4, v5 }, [x1], #48 ; encoding: [0x23,0x60,0xdf,0x4c]
-; CHECK: ld1.16b	{ v4, v5, v6, v7 }, [x1], #64 ; encoding: [0x24,0x20,0xdf,0x4c]
-; CHECK: ld1.4h	{ v1 }, [x1], #8       ; encoding: [0x21,0x74,0xdf,0x0c]
-; CHECK: ld1.4h	{ v2, v3 }, [x1], #16   ; encoding: [0x22,0xa4,0xdf,0x0c]
-; CHECK: ld1.4h	{ v3, v4, v5 }, [x1], #24 ; encoding: [0x23,0x64,0xdf,0x0c]
-; CHECK: ld1.4h	{ v7, v8, v9, v10 }, [x1], #32 ; encoding: [0x27,0x24,0xdf,0x0c]
-; CHECK: ld1.8h	{ v1 }, [x1], #16       ; encoding: [0x21,0x74,0xdf,0x4c]
-; CHECK: ld1.8h	{ v2, v3 }, [x1], #32   ; encoding: [0x22,0xa4,0xdf,0x4c]
-; CHECK: ld1.8h	{ v3, v4, v5 }, [x1], #48 ; encoding: [0x23,0x64,0xdf,0x4c]
-; CHECK: ld1.8h	{ v7, v8, v9, v10 }, [x1], #64 ; encoding: [0x27,0x24,0xdf,0x4c]
-; CHECK: ld1.2s	{ v1 }, [x1], #8       ; encoding: [0x21,0x78,0xdf,0x0c]
-; CHECK: ld1.2s	{ v2, v3 }, [x1], #16   ; encoding: [0x22,0xa8,0xdf,0x0c]
-; CHECK: ld1.2s	{ v3, v4, v5 }, [x1], #24 ; encoding: [0x23,0x68,0xdf,0x0c]
-; CHECK: ld1.2s	{ v7, v8, v9, v10 }, [x1], #32 ; encoding: [0x27,0x28,0xdf,0x0c]
-; CHECK: ld1.4s	{ v1 }, [x1], #16       ; encoding: [0x21,0x78,0xdf,0x4c]
-; CHECK: ld1.4s	{ v2, v3 }, [x1], #32   ; encoding: [0x22,0xa8,0xdf,0x4c]
-; CHECK: ld1.4s	{ v3, v4, v5 }, [x1], #48 ; encoding: [0x23,0x68,0xdf,0x4c]
-; CHECK: ld1.4s	{ v7, v8, v9, v10 }, [x1], #64 ; encoding: [0x27,0x28,0xdf,0x4c]
-; CHECK: ld1.1d	{ v1 }, [x1], #8       ; encoding: [0x21,0x7c,0xdf,0x0c]
-; CHECK: ld1.1d	{ v2, v3 }, [x1], #16   ; encoding: [0x22,0xac,0xdf,0x0c]
-; CHECK: ld1.1d	{ v3, v4, v5 }, [x1], #24 ; encoding: [0x23,0x6c,0xdf,0x0c]
-; CHECK: ld1.1d	{ v7, v8, v9, v10 }, [x1], #32 ; encoding: [0x27,0x2c,0xdf,0x0c]
-; CHECK: ld1.2d	{ v1 }, [x1], #16       ; encoding: [0x21,0x7c,0xdf,0x4c]
-; CHECK: ld1.2d	{ v2, v3 }, [x1], #32   ; encoding: [0x22,0xac,0xdf,0x4c]
-; CHECK: ld1.2d	{ v3, v4, v5 }, [x1], #48 ; encoding: [0x23,0x6c,0xdf,0x4c]
-; CHECK: ld1.2d	{ v7, v8, v9, v10 }, [x1], #64 ; encoding: [0x27,0x2c,0xdf,0x4c]
-; CHECK: st1.8b	{ v1 }, [x1], #8       ; encoding: [0x21,0x70,0x9f,0x0c]
-; CHECK: st1.8b	{ v2, v3 }, [x1], #16   ; encoding: [0x22,0xa0,0x9f,0x0c]
-; CHECK: st1.8b	{ v3, v4, v5 }, [x1], #24 ; encoding: [0x23,0x60,0x9f,0x0c]
-; CHECK: st1.8b	{ v4, v5, v6, v7 }, [x1], #32 ; encoding: [0x24,0x20,0x9f,0x0c]
-; CHECK: st1.16b	{ v1 }, [x1], #16       ; encoding: [0x21,0x70,0x9f,0x4c]
-; CHECK: st1.16b	{ v2, v3 }, [x1], #32   ; encoding: [0x22,0xa0,0x9f,0x4c]
-; CHECK: st1.16b	{ v3, v4, v5 }, [x1], #48 ; encoding: [0x23,0x60,0x9f,0x4c]
-; CHECK: st1.16b	{ v4, v5, v6, v7 }, [x1], #64 ; encoding: [0x24,0x20,0x9f,0x4c]
-; CHECK: st1.4h	{ v1 }, [x1], #8       ; encoding: [0x21,0x74,0x9f,0x0c]
-; CHECK: st1.4h	{ v2, v3 }, [x1], #16   ; encoding: [0x22,0xa4,0x9f,0x0c]
-; CHECK: st1.4h	{ v3, v4, v5 }, [x1], #24 ; encoding: [0x23,0x64,0x9f,0x0c]
-; CHECK: st1.4h	{ v7, v8, v9, v10 }, [x1], #32 ; encoding: [0x27,0x24,0x9f,0x0c]
-; CHECK: st1.8h	{ v1 }, [x1], #16       ; encoding: [0x21,0x74,0x9f,0x4c]
-; CHECK: st1.8h	{ v2, v3 }, [x1], #32   ; encoding: [0x22,0xa4,0x9f,0x4c]
-; CHECK: st1.8h	{ v3, v4, v5 }, [x1], #48 ; encoding: [0x23,0x64,0x9f,0x4c]
-; CHECK: st1.8h	{ v7, v8, v9, v10 }, [x1], #64 ; encoding: [0x27,0x24,0x9f,0x4c]
-; CHECK: st1.2s	{ v1 }, [x1], #8       ; encoding: [0x21,0x78,0x9f,0x0c]
-; CHECK: st1.2s	{ v2, v3 }, [x1], #16   ; encoding: [0x22,0xa8,0x9f,0x0c]
-; CHECK: st1.2s	{ v3, v4, v5 }, [x1], #24 ; encoding: [0x23,0x68,0x9f,0x0c]
-; CHECK: st1.2s	{ v7, v8, v9, v10 }, [x1], #32 ; encoding: [0x27,0x28,0x9f,0x0c]
-; CHECK: st1.4s	{ v1 }, [x1], #16       ; encoding: [0x21,0x78,0x9f,0x4c]
-; CHECK: st1.4s	{ v2, v3 }, [x1], #32   ; encoding: [0x22,0xa8,0x9f,0x4c]
-; CHECK: st1.4s	{ v3, v4, v5 }, [x1], #48 ; encoding: [0x23,0x68,0x9f,0x4c]
-; CHECK: st1.4s	{ v7, v8, v9, v10 }, [x1], #64 ; encoding: [0x27,0x28,0x9f,0x4c]
-; CHECK: st1.1d	{ v1 }, [x1], #8       ; encoding: [0x21,0x7c,0x9f,0x0c]
-; CHECK: st1.1d	{ v2, v3 }, [x1], #16   ; encoding: [0x22,0xac,0x9f,0x0c]
-; CHECK: st1.1d	{ v3, v4, v5 }, [x1], #24 ; encoding: [0x23,0x6c,0x9f,0x0c]
-; CHECK: st1.1d	{ v7, v8, v9, v10 }, [x1], #32 ; encoding: [0x27,0x2c,0x9f,0x0c]
-; CHECK: st1.2d	{ v1 }, [x1], #16       ; encoding: [0x21,0x7c,0x9f,0x4c]
-; CHECK: st1.2d	{ v2, v3 }, [x1], #32   ; encoding: [0x22,0xac,0x9f,0x4c]
-; CHECK: st1.2d	{ v3, v4, v5 }, [x1], #48 ; encoding: [0x23,0x6c,0x9f,0x4c]
-; CHECK: st1.2d	{ v7, v8, v9, v10 }, [x1], #64 ; encoding: [0x27,0x2c,0x9f,0x4c]
-; CHECK: ld2.8b	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0x80,0xcf,0x0c]
-; CHECK: ld2.16b	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0x80,0xcf,0x4c]
-; CHECK: ld2.4h	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0x84,0xcf,0x0c]
-; CHECK: ld2.8h	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0x84,0xcf,0x4c]
-; CHECK: ld2.2s	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0x88,0xcf,0x0c]
-; CHECK: ld2.4s	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0x88,0xcf,0x4c]
-; CHECK: ld2.2d	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0x8c,0xcf,0x4c]
-; CHECK: st2.8b	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0x80,0x8f,0x0c]
-; CHECK: st2.16b	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0x80,0x8f,0x4c]
-; CHECK: st2.4h	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0x84,0x8f,0x0c]
-; CHECK: st2.8h	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0x84,0x8f,0x4c]
-; CHECK: st2.2s	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0x88,0x8f,0x0c]
-; CHECK: st2.4s	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0x88,0x8f,0x4c]
-; CHECK: st2.2d	{ v2, v3 }, [x1], x15   ; encoding: [0x22,0x8c,0x8f,0x4c]
-; CHECK: ld2.8b	{ v2, v3 }, [x1], #16   ; encoding: [0x22,0x80,0xdf,0x0c]
-; CHECK: ld2.16b	{ v2, v3 }, [x1], #32   ; encoding: [0x22,0x80,0xdf,0x4c]
-; CHECK: ld2.4h	{ v2, v3 }, [x1], #16   ; encoding: [0x22,0x84,0xdf,0x0c]
-; CHECK: ld2.8h	{ v2, v3 }, [x1], #32   ; encoding: [0x22,0x84,0xdf,0x4c]
-; CHECK: ld2.2s	{ v2, v3 }, [x1], #16   ; encoding: [0x22,0x88,0xdf,0x0c]
-; CHECK: ld2.4s	{ v2, v3 }, [x1], #32   ; encoding: [0x22,0x88,0xdf,0x4c]
-; CHECK: ld2.2d	{ v2, v3 }, [x1], #32	; encoding: [0x22,0x8c,0xdf,0x4c]
-; CHECK: st2.8b	{ v2, v3 }, [x1], #16   ; encoding: [0x22,0x80,0x9f,0x0c]
-; CHECK: st2.16b	{ v2, v3 }, [x1], #32   ; encoding: [0x22,0x80,0x9f,0x4c]
-; CHECK: st2.4h	{ v2, v3 }, [x1], #16   ; encoding: [0x22,0x84,0x9f,0x0c]
-; CHECK: st2.8h	{ v2, v3 }, [x1], #32   ; encoding: [0x22,0x84,0x9f,0x4c]
-; CHECK: st2.2s	{ v2, v3 }, [x1], #16   ; encoding: [0x22,0x88,0x9f,0x0c]
-; CHECK: st2.4s	{ v2, v3 }, [x1], #32   ; encoding: [0x22,0x88,0x9f,0x4c]
-; CHECK: st2.2d	{ v2, v3 }, [x1], #32   ; encoding: [0x22,0x8c,0x9f,0x4c]
-; CHECK: ld3.8b	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x40,0xcf,0x0c]
-; CHECK: ld3.16b	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x40,0xcf,0x4c]
-; CHECK: ld3.4h	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x44,0xcf,0x0c]
-; CHECK: ld3.8h	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x44,0xcf,0x4c]
-; CHECK: ld3.2s	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x48,0xcf,0x0c]
-; CHECK: ld3.4s	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x48,0xcf,0x4c]
-; CHECK: ld3.2d	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x4c,0xcf,0x4c]
-; CHECK: st3.8b	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x40,0x8f,0x0c]
-; CHECK: st3.16b	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x40,0x8f,0x4c]
-; CHECK: st3.4h	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x44,0x8f,0x0c]
-; CHECK: st3.8h	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x44,0x8f,0x4c]
-; CHECK: st3.2s	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x48,0x8f,0x0c]
-; CHECK: st3.4s	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x48,0x8f,0x4c]
-; CHECK: st3.2d	{ v3, v4, v5 }, [x1], x15 ; encoding: [0x23,0x4c,0x8f,0x4c]
-; CHECK: ld3.8b	{ v3, v4, v5 }, [x1], #24 ; encoding: [0x23,0x40,0xdf,0x0c]
-; CHECK: ld3.16b	{ v3, v4, v5 }, [x1], #48 ; encoding: [0x23,0x40,0xdf,0x4c]
-; CHECK: ld3.4h	{ v3, v4, v5 }, [x1], #24 ; encoding: [0x23,0x44,0xdf,0x0c]
-; CHECK: ld3.8h	{ v3, v4, v5 }, [x1], #48 ; encoding: [0x23,0x44,0xdf,0x4c]
-; CHECK: ld3.2s	{ v3, v4, v5 }, [x1], #24 ; encoding: [0x23,0x48,0xdf,0x0c]
-; CHECK: ld3.4s	{ v3, v4, v5 }, [x1], #48 ; encoding: [0x23,0x48,0xdf,0x4c]
-; CHECK: ld3.2d	{ v3, v4, v5 }, [x1], #48 ; encoding: [0x23,0x4c,0xdf,0x4c]
-; CHECK: st3.8b	{ v3, v4, v5 }, [x1], #24 ; encoding: [0x23,0x40,0x9f,0x0c]
-; CHECK: st3.16b	{ v3, v4, v5 }, [x1], #48 ; encoding: [0x23,0x40,0x9f,0x4c]
-; CHECK: st3.4h	{ v3, v4, v5 }, [x1], #24 ; encoding: [0x23,0x44,0x9f,0x0c]
-; CHECK: st3.8h	{ v3, v4, v5 }, [x1], #48 ; encoding: [0x23,0x44,0x9f,0x4c]
-; CHECK: st3.2s	{ v3, v4, v5 }, [x1], #24 ; encoding: [0x23,0x48,0x9f,0x0c]
-; CHECK: st3.4s	{ v3, v4, v5 }, [x1], #48 ; encoding: [0x23,0x48,0x9f,0x4c]
-; CHECK: st3.2d	{ v3, v4, v5 }, [x1], #48 ; encoding: [0x23,0x4c,0x9f,0x4c]
-; CHECK: ld4.8b	{ v4, v5, v6, v7 }, [x1], x15 ; encoding: [0x24,0x00,0xcf,0x0c]
-; CHECK: ld4.16b	{ v4, v5, v6, v7 }, [x1], x15 ; encoding: [0x24,0x00,0xcf,0x4c]
-; CHECK: ld4.4h	{ v7, v8, v9, v10 }, [x1], x15 ; encoding: [0x27,0x04,0xcf,0x0c]
-; CHECK: ld4.8h	{ v7, v8, v9, v10 }, [x1], x15 ; encoding: [0x27,0x04,0xcf,0x4c]
-; CHECK: ld4.2s	{ v7, v8, v9, v10 }, [x1], x15 ; encoding: [0x27,0x08,0xcf,0x0c]
-; CHECK: ld4.4s	{ v7, v8, v9, v10 }, [x1], x15 ; encoding: [0x27,0x08,0xcf,0x4c]
-; CHECK: ld4.2d	{ v7, v8, v9, v10 }, [x1], x15 ; encoding: [0x27,0x0c,0xcf,0x4c]
-; CHECK: st4.8b	{ v4, v5, v6, v7 }, [x1], x15 ; encoding: [0x24,0x00,0x8f,0x0c]
-; CHECK: st4.16b	{ v4, v5, v6, v7 }, [x1], x15 ; encoding: [0x24,0x00,0x8f,0x4c]
-; CHECK: st4.4h	{ v7, v8, v9, v10 }, [x1], x15 ; encoding: [0x27,0x04,0x8f,0x0c]
-; CHECK: st4.8h	{ v7, v8, v9, v10 }, [x1], x15 ; encoding: [0x27,0x04,0x8f,0x4c]
-; CHECK: st4.2s	{ v7, v8, v9, v10 }, [x1], x15 ; encoding: [0x27,0x08,0x8f,0x0c]
-; CHECK: st4.4s	{ v7, v8, v9, v10 }, [x1], x15 ; encoding: [0x27,0x08,0x8f,0x4c]
-; CHECK: st4.2d	{ v7, v8, v9, v10 }, [x1], x15 ; encoding: [0x27,0x0c,0x8f,0x4c]
-; CHECK: ld4.8b	{ v4, v5, v6, v7 }, [x1], #32 ; encoding: [0x24,0x00,0xdf,0x0c]
-; CHECK: ld4.16b	{ v4, v5, v6, v7 }, [x1], #64 ; encoding: [0x24,0x00,0xdf,0x4c]
-; CHECK: ld4.4h	{ v7, v8, v9, v10 }, [x1], #32 ; encoding: [0x27,0x04,0xdf,0x0c]
-; CHECK: ld4.8h	{ v7, v8, v9, v10 }, [x1], #64 ; encoding: [0x27,0x04,0xdf,0x4c]
-; CHECK: ld4.2s	{ v7, v8, v9, v10 }, [x1], #32 ; encoding: [0x27,0x08,0xdf,0x0c]
-; CHECK: ld4.4s	{ v7, v8, v9, v10 }, [x1], #64 ; encoding: [0x27,0x08,0xdf,0x4c]
-; CHECK: ld4.2d	{ v7, v8, v9, v10 }, [x1], #64 ; encoding: [0x27,0x0c,0xdf,0x4c]
-; CHECK: st4.8b	{ v4, v5, v6, v7 }, [x1], #32 ; encoding: [0x24,0x00,0x9f,0x0c]
-; CHECK: st4.16b	{ v4, v5, v6, v7 }, [x1], #64 ; encoding: [0x24,0x00,0x9f,0x4c]
-; CHECK: st4.4h	{ v7, v8, v9, v10 }, [x1], #32 ; encoding: [0x27,0x04,0x9f,0x0c]
-; CHECK: st4.8h	{ v7, v8, v9, v10 }, [x1], #64 ; encoding: [0x27,0x04,0x9f,0x4c]
-; CHECK: st4.2s	{ v7, v8, v9, v10 }, [x1], #32 ; encoding: [0x27,0x08,0x9f,0x0c]
-; CHECK: st4.4s	{ v7, v8, v9, v10 }, [x1], #64 ; encoding: [0x27,0x08,0x9f,0x4c]
-; CHECK: st4.2d	{ v7, v8, v9, v10 }, [x1], #64 ; encoding: [0x27,0x0c,0x9f,0x4c]
-; CHECK: ld1r.8b	{ v12 }, [x2]           ; encoding: [0x4c,0xc0,0x40,0x0d]
-; CHECK: ld1r.8b	{ v12 }, [x2], x3       ; encoding: [0x4c,0xc0,0xc3,0x0d]
-; CHECK: ld1r.16b	{ v12 }, [x2]   ; encoding: [0x4c,0xc0,0x40,0x4d]
-; CHECK: ld1r.16b	{ v12 }, [x2], x3 ; encoding: [0x4c,0xc0,0xc3,0x4d]
-; CHECK: ld1r.4h	{ v12 }, [x2]           ; encoding: [0x4c,0xc4,0x40,0x0d]
-; CHECK: ld1r.4h	{ v12 }, [x2], x3       ; encoding: [0x4c,0xc4,0xc3,0x0d]
-; CHECK: ld1r.8h	{ v12 }, [x2]           ; encoding: [0x4c,0xc4,0x40,0x4d]
-; CHECK: ld1r.8h	{ v12 }, [x2], x3       ; encoding: [0x4c,0xc4,0xc3,0x4d]
-; CHECK: ld1r.2s	{ v12 }, [x2]           ; encoding: [0x4c,0xc8,0x40,0x0d]
-; CHECK: ld1r.2s	{ v12 }, [x2], x3       ; encoding: [0x4c,0xc8,0xc3,0x0d]
-; CHECK: ld1r.4s	{ v12 }, [x2]           ; encoding: [0x4c,0xc8,0x40,0x4d]
-; CHECK: ld1r.4s	{ v12 }, [x2], x3       ; encoding: [0x4c,0xc8,0xc3,0x4d]
-; CHECK: ld1r.1d	{ v12 }, [x2]           ; encoding: [0x4c,0xcc,0x40,0x0d]
-; CHECK: ld1r.1d	{ v12 }, [x2], x3       ; encoding: [0x4c,0xcc,0xc3,0x0d]
-; CHECK: ld1r.2d	{ v12 }, [x2]           ; encoding: [0x4c,0xcc,0x40,0x4d]
-; CHECK: ld1r.2d	{ v12 }, [x2], x3       ; encoding: [0x4c,0xcc,0xc3,0x4d]
-; CHECK: ld1r.8b	{ v12 }, [x2], #1      ; encoding: [0x4c,0xc0,0xdf,0x0d]
-; CHECK: ld1r.16b	{ v12 }, [x2], #1 ; encoding: [0x4c,0xc0,0xdf,0x4d]
-; CHECK: ld1r.4h	{ v12 }, [x2], #2      ; encoding: [0x4c,0xc4,0xdf,0x0d]
-; CHECK: ld1r.8h	{ v12 }, [x2], #2      ; encoding: [0x4c,0xc4,0xdf,0x4d]
-; CHECK: ld1r.2s	{ v12 }, [x2], #4      ; encoding: [0x4c,0xc8,0xdf,0x0d]
-; CHECK: ld1r.4s	{ v12 }, [x2], #4      ; encoding: [0x4c,0xc8,0xdf,0x4d]
-; CHECK: ld1r.1d	{ v12 }, [x2], #8      ; encoding: [0x4c,0xcc,0xdf,0x0d]
-; CHECK: ld1r.2d	{ v12 }, [x2], #8      ; encoding: [0x4c,0xcc,0xdf,0x4d]
-; CHECK: ld2r.8b	{ v3, v4 }, [x2]        ; encoding: [0x43,0xc0,0x60,0x0d]
-; CHECK: ld2r.8b	{ v3, v4 }, [x2], x3    ; encoding: [0x43,0xc0,0xe3,0x0d]
-; CHECK: ld2r.16b	{ v3, v4 }, [x2] ; encoding: [0x43,0xc0,0x60,0x4d]
-; CHECK: ld2r.16b	{ v3, v4 }, [x2], x3 ; encoding: [0x43,0xc0,0xe3,0x4d]
-; CHECK: ld2r.4h	{ v3, v4 }, [x2]        ; encoding: [0x43,0xc4,0x60,0x0d]
-; CHECK: ld2r.4h	{ v3, v4 }, [x2], x3    ; encoding: [0x43,0xc4,0xe3,0x0d]
-; CHECK: ld2r.8h	{ v3, v4 }, [x2]        ; encoding: [0x43,0xc4,0x60,0x4d]
-; CHECK: ld2r.8h	{ v3, v4 }, [x2], x3    ; encoding: [0x43,0xc4,0xe3,0x4d]
-; CHECK: ld2r.2s	{ v3, v4 }, [x2]        ; encoding: [0x43,0xc8,0x60,0x0d]
-; CHECK: ld2r.2s	{ v3, v4 }, [x2], x3    ; encoding: [0x43,0xc8,0xe3,0x0d]
-; CHECK: ld2r.4s	{ v3, v4 }, [x2]        ; encoding: [0x43,0xc8,0x60,0x4d]
-; CHECK: ld2r.4s	{ v3, v4 }, [x2], x3    ; encoding: [0x43,0xc8,0xe3,0x4d]
-; CHECK: ld2r.1d	{ v3, v4 }, [x2]        ; encoding: [0x43,0xcc,0x60,0x0d]
-; CHECK: ld2r.1d	{ v3, v4 }, [x2], x3    ; encoding: [0x43,0xcc,0xe3,0x0d]
-; CHECK: ld2r.2d	{ v3, v4 }, [x2]        ; encoding: [0x43,0xcc,0x60,0x4d]
-; CHECK: ld2r.2d	{ v3, v4 }, [x2], x3    ; encoding: [0x43,0xcc,0xe3,0x4d]
-; CHECK: ld2r.8b	{ v3, v4 }, [x2], #2   ; encoding: [0x43,0xc0,0xff,0x0d]
-; CHECK: ld2r.16b	{ v3, v4 }, [x2], #2 ; encoding: [0x43,0xc0,0xff,0x4d]
-; CHECK: ld2r.4h	{ v3, v4 }, [x2], #4   ; encoding: [0x43,0xc4,0xff,0x0d]
-; CHECK: ld2r.8h	{ v3, v4 }, [x2], #4   ; encoding: [0x43,0xc4,0xff,0x4d]
-; CHECK: ld2r.2s	{ v3, v4 }, [x2], #8   ; encoding: [0x43,0xc8,0xff,0x0d]
-; CHECK: ld2r.4s	{ v3, v4 }, [x2], #8   ; encoding: [0x43,0xc8,0xff,0x4d]
-; CHECK: ld2r.1d	{ v3, v4 }, [x2], #16   ; encoding: [0x43,0xcc,0xff,0x0d]
-; CHECK: ld2r.2d	{ v3, v4 }, [x2], #16   ; encoding: [0x43,0xcc,0xff,0x4d]
-; CHECK: ld3r.8b	{ v2, v3, v4 }, [x2]    ; encoding: [0x42,0xe0,0x40,0x0d]
-; CHECK: ld3r.8b	{ v2, v3, v4 }, [x2], x3 ; encoding: [0x42,0xe0,0xc3,0x0d]
-; CHECK: ld3r.16b	{ v2, v3, v4 }, [x2] ; encoding: [0x42,0xe0,0x40,0x4d]
-; CHECK: ld3r.16b	{ v2, v3, v4 }, [x2], x3 ; encoding: [0x42,0xe0,0xc3,0x4d]
-; CHECK: ld3r.4h	{ v2, v3, v4 }, [x2]    ; encoding: [0x42,0xe4,0x40,0x0d]
-; CHECK: ld3r.4h	{ v2, v3, v4 }, [x2], x3 ; encoding: [0x42,0xe4,0xc3,0x0d]
-; CHECK: ld3r.8h	{ v2, v3, v4 }, [x2]    ; encoding: [0x42,0xe4,0x40,0x4d]
-; CHECK: ld3r.8h	{ v2, v3, v4 }, [x2], x3 ; encoding: [0x42,0xe4,0xc3,0x4d]
-; CHECK: ld3r.2s	{ v2, v3, v4 }, [x2]    ; encoding: [0x42,0xe8,0x40,0x0d]
-; CHECK: ld3r.2s	{ v2, v3, v4 }, [x2], x3 ; encoding: [0x42,0xe8,0xc3,0x0d]
-; CHECK: ld3r.4s	{ v2, v3, v4 }, [x2]    ; encoding: [0x42,0xe8,0x40,0x4d]
-; CHECK: ld3r.4s	{ v2, v3, v4 }, [x2], x3 ; encoding: [0x42,0xe8,0xc3,0x4d]
-; CHECK: ld3r.1d	{ v2, v3, v4 }, [x2]    ; encoding: [0x42,0xec,0x40,0x0d]
-; CHECK: ld3r.1d	{ v2, v3, v4 }, [x2], x3 ; encoding: [0x42,0xec,0xc3,0x0d]
-; CHECK: ld3r.2d	{ v2, v3, v4 }, [x2]    ; encoding: [0x42,0xec,0x40,0x4d]
-; CHECK: ld3r.2d	{ v2, v3, v4 }, [x2], x3 ; encoding: [0x42,0xec,0xc3,0x4d]
-; CHECK: ld3r.8b	{ v2, v3, v4 }, [x2], #3 ; encoding: [0x42,0xe0,0xdf,0x0d]
-; CHECK: ld3r.16b	{ v2, v3, v4 }, [x2], #3 ; encoding: [0x42,0xe0,0xdf,0x4d]
-; CHECK: ld3r.4h	{ v2, v3, v4 }, [x2], #6 ; encoding: [0x42,0xe4,0xdf,0x0d]
-; CHECK: ld3r.8h	{ v2, v3, v4 }, [x2], #6 ; encoding: [0x42,0xe4,0xdf,0x4d]
-; CHECK: ld3r.2s	{ v2, v3, v4 }, [x2], #12 ; encoding: [0x42,0xe8,0xdf,0x0d]
-; CHECK: ld3r.4s	{ v2, v3, v4 }, [x2], #12 ; encoding: [0x42,0xe8,0xdf,0x4d]
-; CHECK: ld3r.1d	{ v2, v3, v4 }, [x2], #24 ; encoding: [0x42,0xec,0xdf,0x0d]
-; CHECK: ld3r.2d	{ v2, v3, v4 }, [x2], #24 ; encoding: [0x42,0xec,0xdf,0x4d]
-; CHECK: ld4r.8b	{ v2, v3, v4, v5 }, [x2] ; encoding: [0x42,0xe0,0x60,0x0d]
-; CHECK: ld4r.8b	{ v2, v3, v4, v5 }, [x2], x3 ; encoding: [0x42,0xe0,0xe3,0x0d]
-; CHECK: ld4r.16b	{ v2, v3, v4, v5 }, [x2] ; encoding: [0x42,0xe0,0x60,0x4d]
-; CHECK: ld4r.16b	{ v2, v3, v4, v5 }, [x2], x3 ; encoding: [0x42,0xe0,0xe3,0x4d]
-; CHECK: ld4r.4h	{ v2, v3, v4, v5 }, [x2] ; encoding: [0x42,0xe4,0x60,0x0d]
-; CHECK: ld4r.4h	{ v2, v3, v4, v5 }, [x2], x3 ; encoding: [0x42,0xe4,0xe3,0x0d]
-; CHECK: ld4r.8h	{ v2, v3, v4, v5 }, [x2] ; encoding: [0x42,0xe4,0x60,0x4d]
-; CHECK: ld4r.8h	{ v2, v3, v4, v5 }, [x2], x3 ; encoding: [0x42,0xe4,0xe3,0x4d]
-; CHECK: ld4r.2s	{ v2, v3, v4, v5 }, [x2] ; encoding: [0x42,0xe8,0x60,0x0d]
-; CHECK: ld4r.2s	{ v2, v3, v4, v5 }, [x2], x3 ; encoding: [0x42,0xe8,0xe3,0x0d]
-; CHECK: ld4r.4s	{ v2, v3, v4, v5 }, [x2] ; encoding: [0x42,0xe8,0x60,0x4d]
-; CHECK: ld4r.4s	{ v2, v3, v4, v5 }, [x2], x3 ; encoding: [0x42,0xe8,0xe3,0x4d]
-; CHECK: ld4r.1d	{ v2, v3, v4, v5 }, [x2] ; encoding: [0x42,0xec,0x60,0x0d]
-; CHECK: ld4r.1d	{ v2, v3, v4, v5 }, [x2], x3 ; encoding: [0x42,0xec,0xe3,0x0d]
-; CHECK: ld4r.2d	{ v2, v3, v4, v5 }, [x2] ; encoding: [0x42,0xec,0x60,0x4d]
-; CHECK: ld4r.2d	{ v2, v3, v4, v5 }, [x2], x3 ; encoding: [0x42,0xec,0xe3,0x4d]
-; CHECK: ld4r.8b	{ v2, v3, v4, v5 }, [x2], #4 ; encoding: [0x42,0xe0,0xff,0x0d]
-; CHECK: ld4r.16b	{ v2, v3, v4, v5 }, [x2], #4 ; encoding: [0x42,0xe0,0xff,0x4d]
-; CHECK: ld4r.4h	{ v2, v3, v4, v5 }, [x2], #8 ; encoding: [0x42,0xe4,0xff,0x0d]
-; CHECK: ld4r.8h	{ v2, v3, v4, v5 }, [x2], #8 ; encoding: [0x42,0xe4,0xff,0x4d]
-; CHECK: ld4r.2s	{ v2, v3, v4, v5 }, [x2], #16 ; encoding: [0x42,0xe8,0xff,0x0d]
-; CHECK: ld4r.4s	{ v2, v3, v4, v5 }, [x2], #16 ; encoding: [0x42,0xe8,0xff,0x4d]
-; CHECK: ld4r.1d	{ v2, v3, v4, v5 }, [x2], #32 ; encoding: [0x42,0xec,0xff,0x0d]
-; CHECK: ld4r.2d	{ v2, v3, v4, v5 }, [x2], #32 ; encoding: [0x42,0xec,0xff,0x4d]
-; CHECK: ld1.b	{ v6 }[13], [x3]        ; encoding: [0x66,0x14,0x40,0x4d]
-; CHECK: ld1.h	{ v6 }[2], [x3]         ; encoding: [0x66,0x50,0x40,0x0d]
-; CHECK: ld1.s	{ v6 }[2], [x3]         ; encoding: [0x66,0x80,0x40,0x4d]
-; CHECK: ld1.d	{ v6 }[1], [x3]         ; encoding: [0x66,0x84,0x40,0x4d]
-; CHECK: ld1.b	{ v6 }[13], [x3], x5    ; encoding: [0x66,0x14,0xc5,0x4d]
-; CHECK: ld1.h	{ v6 }[2], [x3], x5     ; encoding: [0x66,0x50,0xc5,0x0d]
-; CHECK: ld1.s	{ v6 }[2], [x3], x5     ; encoding: [0x66,0x80,0xc5,0x4d]
-; CHECK: ld1.d	{ v6 }[1], [x3], x5     ; encoding: [0x66,0x84,0xc5,0x4d]
-; CHECK: ld1.b	{ v6 }[13], [x3], #1   ; encoding: [0x66,0x14,0xdf,0x4d]
-; CHECK: ld1.h	{ v6 }[2], [x3], #2    ; encoding: [0x66,0x50,0xdf,0x0d]
-; CHECK: ld1.s	{ v6 }[2], [x3], #4    ; encoding: [0x66,0x80,0xdf,0x4d]
-; CHECK: ld1.d	{ v6 }[1], [x3], #8    ; encoding: [0x66,0x84,0xdf,0x4d]
-; CHECK: ld2.b	{ v5, v6 }[13], [x3]    ; encoding: [0x65,0x14,0x60,0x4d]
-; CHECK: ld2.h	{ v5, v6 }[2], [x3]     ; encoding: [0x65,0x50,0x60,0x0d]
-; CHECK: ld2.s	{ v5, v6 }[2], [x3]     ; encoding: [0x65,0x80,0x60,0x4d]
-; CHECK: ld2.d	{ v5, v6 }[1], [x3]     ; encoding: [0x65,0x84,0x60,0x4d]
-; CHECK: ld2.b	{ v5, v6 }[13], [x3], x5 ; encoding: [0x65,0x14,0xe5,0x4d]
-; CHECK: ld2.h	{ v5, v6 }[2], [x3], x5 ; encoding: [0x65,0x50,0xe5,0x0d]
-; CHECK: ld2.s	{ v5, v6 }[2], [x3], x5 ; encoding: [0x65,0x80,0xe5,0x4d]
-; CHECK: ld2.d	{ v5, v6 }[1], [x3], x5 ; encoding: [0x65,0x84,0xe5,0x4d]
-; CHECK: ld2.b	{ v5, v6 }[13], [x3], #2 ; encoding: [0x65,0x14,0xff,0x4d]
-; CHECK: ld2.h	{ v5, v6 }[2], [x3], #4 ; encoding: [0x65,0x50,0xff,0x0d]
-; CHECK: ld2.s	{ v5, v6 }[2], [x3], #8 ; encoding: [0x65,0x80,0xff,0x4d]
-; CHECK: ld2.d	{ v5, v6 }[1], [x3], #16 ; encoding: [0x65,0x84,0xff,0x4d]
-; CHECK: ld3.b	{ v7, v8, v9 }[13], [x3] ; encoding: [0x67,0x34,0x40,0x4d]
-; CHECK: ld3.h	{ v7, v8, v9 }[2], [x3] ; encoding: [0x67,0x70,0x40,0x0d]
-; CHECK: ld3.s	{ v7, v8, v9 }[2], [x3] ; encoding: [0x67,0xa0,0x40,0x4d]
-; CHECK: ld3.d	{ v7, v8, v9 }[1], [x3] ; encoding: [0x67,0xa4,0x40,0x4d]
-; CHECK: ld3.b	{ v7, v8, v9 }[13], [x3], x5 ; encoding: [0x67,0x34,0xc5,0x4d]
-; CHECK: ld3.h	{ v7, v8, v9 }[2], [x3], x5 ; encoding: [0x67,0x70,0xc5,0x0d]
-; CHECK: ld3.s	{ v7, v8, v9 }[2], [x3], x5 ; encoding: [0x67,0xa0,0xc5,0x4d]
-; CHECK: ld3.d	{ v7, v8, v9 }[1], [x3], x5 ; encoding: [0x67,0xa4,0xc5,0x4d]
-; CHECK: ld3.b	{ v7, v8, v9 }[13], [x3], #3 ; encoding: [0x67,0x34,0xdf,0x4d]
-; CHECK: ld3.h	{ v7, v8, v9 }[2], [x3], #6 ; encoding: [0x67,0x70,0xdf,0x0d]
-; CHECK: ld3.s	{ v7, v8, v9 }[2], [x3], #12 ; encoding: [0x67,0xa0,0xdf,0x4d]
-; CHECK: ld3.d	{ v7, v8, v9 }[1], [x3], #24 ; encoding: [0x67,0xa4,0xdf,0x4d]
-; CHECK: ld4.b	{ v7, v8, v9, v10 }[13], [x3] ; encoding: [0x67,0x34,0x60,0x4d]
-; CHECK: ld4.h	{ v7, v8, v9, v10 }[2], [x3] ; encoding: [0x67,0x70,0x60,0x0d]
-; CHECK: ld4.s	{ v7, v8, v9, v10 }[2], [x3] ; encoding: [0x67,0xa0,0x60,0x4d]
-; CHECK: ld4.d	{ v7, v8, v9, v10 }[1], [x3] ; encoding: [0x67,0xa4,0x60,0x4d]
-; CHECK: ld4.b	{ v7, v8, v9, v10 }[13], [x3], x5 ; encoding: [0x67,0x34,0xe5,0x4d]
-; CHECK: ld4.h	{ v7, v8, v9, v10 }[2], [x3], x5 ; encoding: [0x67,0x70,0xe5,0x0d]
-; CHECK: ld4.s	{ v7, v8, v9, v10 }[2], [x3], x5 ; encoding: [0x67,0xa0,0xe5,0x4d]
-; CHECK: ld4.d	{ v7, v8, v9, v10 }[1], [x3], x5 ; encoding: [0x67,0xa4,0xe5,0x4d]
-; CHECK: ld4.b	{ v7, v8, v9, v10 }[13], [x3], #4 ; encoding: [0x67,0x34,0xff,0x4d]
-; CHECK: ld4.h	{ v7, v8, v9, v10 }[2], [x3], #8 ; encoding: [0x67,0x70,0xff,0x0d]
-; CHECK: ld4.s	{ v7, v8, v9, v10 }[2], [x3], #16 ; encoding: [0x67,0xa0,0xff,0x4d]
-; CHECK: ld4.d	{ v7, v8, v9, v10 }[1], [x3], #32 ; encoding: [0x67,0xa4,0xff,0x4d]
-; CHECK: st1.b	{ v6 }[13], [x3]        ; encoding: [0x66,0x14,0x00,0x4d]
-; CHECK: st1.h	{ v6 }[2], [x3]         ; encoding: [0x66,0x50,0x00,0x0d]
-; CHECK: st1.s	{ v6 }[2], [x3]         ; encoding: [0x66,0x80,0x00,0x4d]
-; CHECK: st1.d	{ v6 }[1], [x3]         ; encoding: [0x66,0x84,0x00,0x4d]
-; CHECK: st1.b	{ v6 }[13], [x3], x5    ; encoding: [0x66,0x14,0x85,0x4d]
-; CHECK: st1.h	{ v6 }[2], [x3], x5     ; encoding: [0x66,0x50,0x85,0x0d]
-; CHECK: st1.s	{ v6 }[2], [x3], x5     ; encoding: [0x66,0x80,0x85,0x4d]
-; CHECK: st1.d	{ v6 }[1], [x3], x5     ; encoding: [0x66,0x84,0x85,0x4d]
-; CHECK: st1.b	{ v6 }[13], [x3], #1   ; encoding: [0x66,0x14,0x9f,0x4d]
-; CHECK: st1.h	{ v6 }[2], [x3], #2    ; encoding: [0x66,0x50,0x9f,0x0d]
-; CHECK: st1.s	{ v6 }[2], [x3], #4    ; encoding: [0x66,0x80,0x9f,0x4d]
-; CHECK: st1.d	{ v6 }[1], [x3], #8    ; encoding: [0x66,0x84,0x9f,0x4d]
-; CHECK: st2.b	{ v5, v6 }[13], [x3]    ; encoding: [0x65,0x14,0x20,0x4d]
-; CHECK: st2.h	{ v5, v6 }[2], [x3]     ; encoding: [0x65,0x50,0x20,0x0d]
-; CHECK: st2.s	{ v5, v6 }[2], [x3]     ; encoding: [0x65,0x80,0x20,0x4d]
-; CHECK: st2.d	{ v5, v6 }[1], [x3]     ; encoding: [0x65,0x84,0x20,0x4d]
-; CHECK: st2.b	{ v5, v6 }[13], [x3], x5 ; encoding: [0x65,0x14,0xa5,0x4d]
-; CHECK: st2.h	{ v5, v6 }[2], [x3], x5 ; encoding: [0x65,0x50,0xa5,0x0d]
-; CHECK: st2.s	{ v5, v6 }[2], [x3], x5 ; encoding: [0x65,0x80,0xa5,0x4d]
-; CHECK: st2.d	{ v5, v6 }[1], [x3], x5 ; encoding: [0x65,0x84,0xa5,0x4d]
-; CHECK: st2.b	{ v5, v6 }[13], [x3], #2 ; encoding: [0x65,0x14,0xbf,0x4d]
-; CHECK: st2.h	{ v5, v6 }[2], [x3], #4 ; encoding: [0x65,0x50,0xbf,0x0d]
-; CHECK: st2.s	{ v5, v6 }[2], [x3], #8 ; encoding: [0x65,0x80,0xbf,0x4d]
-; CHECK: st2.d	{ v5, v6 }[1], [x3], #16 ; encoding: [0x65,0x84,0xbf,0x4d]
-; CHECK: st3.b	{ v7, v8, v9 }[13], [x3] ; encoding: [0x67,0x34,0x00,0x4d]
-; CHECK: st3.h	{ v7, v8, v9 }[2], [x3] ; encoding: [0x67,0x70,0x00,0x0d]
-; CHECK: st3.s	{ v7, v8, v9 }[2], [x3] ; encoding: [0x67,0xa0,0x00,0x4d]
-; CHECK: st3.d	{ v7, v8, v9 }[1], [x3] ; encoding: [0x67,0xa4,0x00,0x4d]
-; CHECK: st3.b	{ v7, v8, v9 }[13], [x3], x5 ; encoding: [0x67,0x34,0x85,0x4d]
-; CHECK: st3.h	{ v7, v8, v9 }[2], [x3], x5 ; encoding: [0x67,0x70,0x85,0x0d]
-; CHECK: st3.s	{ v7, v8, v9 }[2], [x3], x5 ; encoding: [0x67,0xa0,0x85,0x4d]
-; CHECK: st3.d	{ v7, v8, v9 }[1], [x3], x5 ; encoding: [0x67,0xa4,0x85,0x4d]
-; CHECK: st3.b	{ v7, v8, v9 }[13], [x3], #3 ; encoding: [0x67,0x34,0x9f,0x4d]
-; CHECK: st3.h	{ v7, v8, v9 }[2], [x3], #6 ; encoding: [0x67,0x70,0x9f,0x0d]
-; CHECK: st3.s	{ v7, v8, v9 }[2], [x3], #12 ; encoding: [0x67,0xa0,0x9f,0x4d]
-; CHECK: st3.d	{ v7, v8, v9 }[1], [x3], #24 ; encoding: [0x67,0xa4,0x9f,0x4d]
-; CHECK: st4.b	{ v7, v8, v9, v10 }[13], [x3] ; encoding: [0x67,0x34,0x20,0x4d]
-; CHECK: st4.h	{ v7, v8, v9, v10 }[2], [x3] ; encoding: [0x67,0x70,0x20,0x0d]
-; CHECK: st4.s	{ v7, v8, v9, v10 }[2], [x3] ; encoding: [0x67,0xa0,0x20,0x4d]
-; CHECK: st4.d	{ v7, v8, v9, v10 }[1], [x3] ; encoding: [0x67,0xa4,0x20,0x4d]
-; CHECK: st4.b	{ v7, v8, v9, v10 }[13], [x3], x5 ; encoding: [0x67,0x34,0xa5,0x4d]
-; CHECK: st4.h	{ v7, v8, v9, v10 }[2], [x3], x5 ; encoding: [0x67,0x70,0xa5,0x0d]
-; CHECK: st4.s	{ v7, v8, v9, v10 }[2], [x3], x5 ; encoding: [0x67,0xa0,0xa5,0x4d]
-; CHECK: st4.d	{ v7, v8, v9, v10 }[1], [x3], x5 ; encoding: [0x67,0xa4,0xa5,0x4d]
-; CHECK: st4.b	{ v7, v8, v9, v10 }[13], [x3], #4 ; encoding: [0x67,0x34,0xbf,0x4d]
-; CHECK: st4.h	{ v7, v8, v9, v10 }[2], [x3], #8 ; encoding: [0x67,0x70,0xbf,0x0d]
-; CHECK: st4.s	{ v7, v8, v9, v10 }[2], [x3], #16 ; encoding: [0x67,0xa0,0xbf,0x4d]
-; CHECK: st4.d	{ v7, v8, v9, v10 }[1], [x3], #32 ; encoding: [0x67,0xa4,0xbf,0x4d]
diff --git a/test/MC/ARM64/system-encoding.s b/test/MC/ARM64/system-encoding.s
deleted file mode 100644
index 9f0d3c4..0000000
--- a/test/MC/ARM64/system-encoding.s
+++ /dev/null
@@ -1,679 +0,0 @@
-; RUN: not llvm-mc -triple arm64-apple-darwin -show-encoding < %s 2> %t | FileCheck %s
-; RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
-
-foo:
-
-;-----------------------------------------------------------------------------
-; Simple encodings (instuctions w/ no operands)
-;-----------------------------------------------------------------------------
-
-  nop
-  sev
-  sevl
-  wfe
-  wfi
-  yield
-
-; CHECK: nop                             ; encoding: [0x1f,0x20,0x03,0xd5]
-; CHECK: sev                             ; encoding: [0x9f,0x20,0x03,0xd5]
-; CHECK: sevl                            ; encoding: [0xbf,0x20,0x03,0xd5]
-; CHECK: wfe                             ; encoding: [0x5f,0x20,0x03,0xd5]
-; CHECK: wfi                             ; encoding: [0x7f,0x20,0x03,0xd5]
-; CHECK: yield                           ; encoding: [0x3f,0x20,0x03,0xd5]
-
-;-----------------------------------------------------------------------------
-; Single-immediate operand instructions
-;-----------------------------------------------------------------------------
-
-  clrex #10
-; CHECK: clrex #10  ; encoding: [0x5f,0x3a,0x03,0xd5]
-  isb #15
-  isb sy
-; CHECK: isb     ; encoding: [0xdf,0x3f,0x03,0xd5]
-; CHECK: isb     ; encoding: [0xdf,0x3f,0x03,0xd5]
-  dmb #3
-  dmb osh
-; CHECK: dmb osh    ; encoding: [0xbf,0x33,0x03,0xd5]
-; CHECK: dmb osh    ; encoding: [0xbf,0x33,0x03,0xd5]
-  dsb #7
-  dsb nsh
-; CHECK: dsb nsh    ; encoding: [0x9f,0x37,0x03,0xd5]
-; CHECK: dsb nsh    ; encoding: [0x9f,0x37,0x03,0xd5]
-
-;-----------------------------------------------------------------------------
-; Generic system instructions
-;-----------------------------------------------------------------------------
-  sys #2, c0, c5, #7
-; CHECK: encoding: [0xff,0x05,0x0a,0xd5]
-  sys #7, C6, c10, #7, x7
-; CHECK: encoding: [0xe7,0x6a,0x0f,0xd5]
-  sysl  x20, #6, c3, C15, #7
-; CHECK: encoding: [0xf4,0x3f,0x2e,0xd5]
-
-; Check for error on invalid 'C' operand value.
-  sys #2, c16, c5, #7
-; CHECK-ERRORS: invalid operand for instruction
-
-;-----------------------------------------------------------------------------
-; MSR/MRS instructions
-;-----------------------------------------------------------------------------
-  msr ACTLR_EL1, x3
-  msr ACTLR_EL2, x3
-  msr ACTLR_EL3, x3
-  msr ADFSR_EL1, x3
-  msr ADFSR_EL2, x3
-  msr ADFSR_EL3, x3
-  msr AIDR_EL1, x3
-  msr AIFSR_EL1, x3
-  msr AIFSR_EL2, x3
-  msr AIFSR_EL3, x3
-  msr AMAIR_EL1, x3
-  msr AMAIR_EL2, x3
-  msr AMAIR_EL3, x3
-  msr CCSIDR_EL1, x3
-  msr CLIDR_EL1, x3
-  msr CNTFRQ_EL0, x3
-  msr CNTHCTL_EL2, x3
-  msr CNTHP_CTL_EL2, x3
-  msr CNTHP_CVAL_EL2, x3
-  msr CNTHP_TVAL_EL2, x3
-  msr CNTKCTL_EL1, x3
-  msr CNTPCT_EL0, x3
-  msr CNTP_CTL_EL0, x3
-  msr CNTP_CVAL_EL0, x3
-  msr CNTP_TVAL_EL0, x3
-  msr CNTVCT_EL0, x3
-  msr CNTVOFF_EL2, x3
-  msr CNTV_CTL_EL0, x3
-  msr CNTV_CVAL_EL0, x3
-  msr CNTV_TVAL_EL0, x3
-  msr CONTEXTIDR_EL1, x3
-  msr CPACR_EL1, x3
-  msr CPTR_EL2, x3
-  msr CPTR_EL3, x3
-  msr CSSELR_EL1, x3
-  msr CTR_EL0, x3
-  msr CURRENT_EL, x3
-  msr DACR32_EL2, x3
-  msr DCZID_EL0, x3
-  msr ECOIDR_EL1, x3
-  msr ESR_EL1, x3
-  msr ESR_EL2, x3
-  msr ESR_EL3, x3
-  msr FAR_EL1, x3
-  msr FAR_EL2, x3
-  msr FAR_EL3, x3
-  msr FPEXC32_EL2, x3
-  msr HACR_EL2, x3
-  msr HCR_EL2, x3
-  msr HPFAR_EL2, x3
-  msr HSTR_EL2, x3
-  msr ID_AA64DFR0_EL1, x3
-  msr ID_AA64DFR1_EL1, x3
-  msr ID_AA64ISAR0_EL1, x3
-  msr ID_AA64ISAR1_EL1, x3
-  msr ID_AA64MMFR0_EL1, x3
-  msr ID_AA64MMFR1_EL1, x3
-  msr ID_AA64PFR0_EL1, x3
-  msr ID_AA64PFR1_EL1, x3
-  msr IFSR32_EL2, x3
-  msr ISR_EL1, x3
-  msr MAIR_EL1, x3
-  msr MAIR_EL2, x3
-  msr MAIR_EL3, x3
-  msr MDCR_EL2, x3
-  msr MDCR_EL3, x3
-  msr MIDR_EL1, x3
-  msr MPIDR_EL1, x3
-  msr MVFR0_EL1, x3
-  msr MVFR1_EL1, x3
-  msr PAR_EL1, x3
-  msr RVBAR_EL1, x3
-  msr RVBAR_EL2, x3
-  msr RVBAR_EL3, x3
-  msr SCR_EL3, x3
-  msr SCTLR_EL1, x3
-  msr SCTLR_EL2, x3
-  msr SCTLR_EL3, x3
-  msr SDER32_EL3, x3
-  msr TCR_EL1, x3
-  msr TCR_EL2, x3
-  msr TCR_EL3, x3
-  msr TEECR32_EL1, x3
-  msr TEEHBR32_EL1, x3
-  msr TPIDRRO_EL0, x3
-  msr TPIDR_EL0, x3
-  msr TPIDR_EL1, x3
-  msr TPIDR_EL2, x3
-  msr TPIDR_EL3, x3
-  msr TTBR0_EL1, x3
-  msr TTBR0_EL2, x3
-  msr TTBR0_EL3, x3
-  msr TTBR1_EL1, x3
-  msr VBAR_EL1, x3
-  msr VBAR_EL2, x3
-  msr VBAR_EL3, x3
-  msr VMPIDR_EL2, x3
-  msr VPIDR_EL2, x3
-  msr VTCR_EL2, x3
-  msr VTTBR_EL2, x3
-  msr SPSel, x3
-  msr S2_2_C4_C6_4, x1
-; CHECK: msr ACTLR_EL1, x3              ; encoding: [0x23,0x10,0x18,0xd5]
-; CHECK: msr ACTLR_EL2, x3              ; encoding: [0x23,0x10,0x1c,0xd5]
-; CHECK: msr ACTLR_EL3, x3              ; encoding: [0x23,0x10,0x1e,0xd5]
-; CHECK: msr AFSR0_EL1, x3              ; encoding: [0x03,0x51,0x18,0xd5]
-; CHECK: msr ADFSR_EL2, x3              ; encoding: [0x03,0x51,0x1c,0xd5]
-; CHECK: msr ADFSR_EL3, x3              ; encoding: [0x03,0x51,0x1e,0xd5]
-; CHECK: msr AIDR_EL1, x3               ; encoding: [0xe3,0x00,0x19,0xd5]
-; CHECK: msr AFSR1_EL1, x3              ; encoding: [0x23,0x51,0x18,0xd5]
-; CHECK: msr AIFSR_EL2, x3              ; encoding: [0x23,0x51,0x1c,0xd5]
-; CHECK: msr AIFSR_EL3, x3              ; encoding: [0x23,0x51,0x1e,0xd5]
-; CHECK: msr AMAIR_EL1, x3              ; encoding: [0x03,0xa3,0x18,0xd5]
-; CHECK: msr AMAIR_EL2, x3              ; encoding: [0x03,0xa3,0x1c,0xd5]
-; CHECK: msr AMAIR_EL3, x3              ; encoding: [0x03,0xa3,0x1e,0xd5]
-; CHECK: msr CCSIDR_EL1, x3             ; encoding: [0x03,0x00,0x19,0xd5]
-; CHECK: msr CLIDR_EL1, x3              ; encoding: [0x23,0x00,0x19,0xd5]
-; CHECK: msr CNTFRQ_EL0, x3             ; encoding: [0x03,0xe0,0x1b,0xd5]
-; CHECK: msr CNTHCTL_EL2, x3            ; encoding: [0x03,0xe1,0x1c,0xd5]
-; CHECK: msr CNTHP_CTL_EL2, x3          ; encoding: [0x23,0xe2,0x1c,0xd5]
-; CHECK: msr CNTHP_CVAL_EL2, x3         ; encoding: [0x43,0xe2,0x1c,0xd5]
-; CHECK: msr CNTHP_TVAL_EL2, x3         ; encoding: [0x03,0xe2,0x1c,0xd5]
-; CHECK: msr CNTKCTL_EL1, x3            ; encoding: [0x03,0xe1,0x18,0xd5]
-; CHECK: msr CNTPCT_EL0, x3             ; encoding: [0x23,0xe0,0x1b,0xd5]
-; CHECK: msr CNTP_CTL_EL0, x3           ; encoding: [0x23,0xe2,0x1b,0xd5]
-; CHECK: msr CNTP_CVAL_EL0, x3          ; encoding: [0x43,0xe2,0x1b,0xd5]
-; CHECK: msr CNTP_TVAL_EL0, x3          ; encoding: [0x03,0xe2,0x1b,0xd5]
-; CHECK: msr CNTVCT_EL0, x3             ; encoding: [0x43,0xe0,0x1b,0xd5]
-; CHECK: msr CNTVOFF_EL2, x3            ; encoding: [0x63,0xe0,0x1c,0xd5]
-; CHECK: msr CNTV_CTL_EL0, x3           ; encoding: [0x23,0xe3,0x1b,0xd5]
-; CHECK: msr CNTV_CVAL_EL0, x3          ; encoding: [0x43,0xe3,0x1b,0xd5]
-; CHECK: msr CNTV_TVAL_EL0, x3          ; encoding: [0x03,0xe3,0x1b,0xd5]
-; CHECK: msr CONTEXTIDR_EL1, x3         ; encoding: [0x23,0xd0,0x18,0xd5]
-; CHECK: msr CPACR_EL1, x3              ; encoding: [0x43,0x10,0x18,0xd5]
-; CHECK: msr CPTR_EL2, x3               ; encoding: [0x43,0x11,0x1c,0xd5]
-; CHECK: msr CPTR_EL3, x3               ; encoding: [0x43,0x11,0x1e,0xd5]
-; CHECK: msr CSSELR_EL1, x3             ; encoding: [0x03,0x00,0x1a,0xd5]
-; CHECK: msr CTR_EL0, x3                ; encoding: [0x23,0x00,0x1b,0xd5]
-; CHECK: msr CurrentEL, x3              ; encoding: [0x43,0x42,0x18,0xd5]
-; CHECK: msr DACR32_EL2, x3             ; encoding: [0x03,0x30,0x1c,0xd5]
-; CHECK: msr DCZID_EL0, x3              ; encoding: [0xe3,0x00,0x1b,0xd5]
-; CHECK: msr REVIDR_EL1, x3             ; encoding: [0xc3,0x00,0x18,0xd5]
-; CHECK: msr ESR_EL1, x3                ; encoding: [0x03,0x52,0x18,0xd5]
-; CHECK: msr ESR_EL2, x3                ; encoding: [0x03,0x52,0x1c,0xd5]
-; CHECK: msr ESR_EL3, x3                ; encoding: [0x03,0x52,0x1e,0xd5]
-; CHECK: msr FAR_EL1, x3                ; encoding: [0x03,0x60,0x18,0xd5]
-; CHECK: msr FAR_EL2, x3                ; encoding: [0x03,0x60,0x1c,0xd5]
-; CHECK: msr FAR_EL3, x3                ; encoding: [0x03,0x60,0x1e,0xd5]
-; CHECK: msr FPEXC32_EL2, x3            ; encoding: [0x03,0x53,0x1c,0xd5]
-; CHECK: msr HACR_EL2, x3               ; encoding: [0xe3,0x11,0x1c,0xd5]
-; CHECK: msr HCR_EL2, x3                ; encoding: [0x03,0x11,0x1c,0xd5]
-; CHECK: msr HPFAR_EL2, x3              ; encoding: [0x83,0x60,0x1c,0xd5]
-; CHECK: msr HSTR_EL2, x3               ; encoding: [0x63,0x11,0x1c,0xd5]
-; CHECK: msr ID_AA64DFR0_EL1, x3        ; encoding: [0x03,0x05,0x18,0xd5]
-; CHECK: msr ID_AA64DFR1_EL1, x3        ; encoding: [0x23,0x05,0x18,0xd5]
-; CHECK: msr ID_AA64ISAR0_EL1, x3       ; encoding: [0x03,0x06,0x18,0xd5]
-; CHECK: msr ID_AA64ISAR1_EL1, x3       ; encoding: [0x23,0x06,0x18,0xd5]
-; CHECK: msr ID_AA64MMFR0_EL1, x3       ; encoding: [0x03,0x07,0x18,0xd5]
-; CHECK: msr ID_AA64MMFR1_EL1, x3       ; encoding: [0x23,0x07,0x18,0xd5]
-; CHECK: msr ID_AA64PFR0_EL1, x3        ; encoding: [0x03,0x04,0x18,0xd5]
-; CHECK: msr ID_AA64PFR1_EL1, x3        ; encoding: [0x23,0x04,0x18,0xd5]
-; CHECK: msr IFSR32_EL2, x3             ; encoding: [0x23,0x50,0x1c,0xd5]
-; CHECK: msr ISR_EL1, x3                ; encoding: [0x03,0xc1,0x18,0xd5]
-; CHECK: msr MAIR_EL1, x3               ; encoding: [0x03,0xa2,0x18,0xd5]
-; CHECK: msr MAIR_EL2, x3               ; encoding: [0x03,0xa2,0x1c,0xd5]
-; CHECK: msr MAIR_EL3, x3               ; encoding: [0x03,0xa2,0x1e,0xd5]
-; CHECK: msr MDCR_EL2, x3               ; encoding: [0x23,0x11,0x1c,0xd5]
-; CHECK: msr MDCR_EL3, x3               ; encoding: [0x23,0x13,0x1e,0xd5]
-; CHECK: msr MIDR_EL1, x3               ; encoding: [0x03,0x00,0x18,0xd5]
-; CHECK: msr MPIDR_EL1, x3              ; encoding: [0xa3,0x00,0x18,0xd5]
-; CHECK: msr MVFR0_EL1, x3              ; encoding: [0x03,0x03,0x18,0xd5]
-; CHECK: msr MVFR1_EL1, x3              ; encoding: [0x23,0x03,0x18,0xd5]
-; CHECK: msr PAR_EL1, x3                ; encoding: [0x03,0x74,0x18,0xd5]
-; CHECK: msr RVBAR_EL1, x3              ; encoding: [0x23,0xc0,0x18,0xd5]
-; CHECK: msr RVBAR_EL2, x3              ; encoding: [0x23,0xc0,0x1c,0xd5]
-; CHECK: msr RVBAR_EL3, x3              ; encoding: [0x23,0xc0,0x1e,0xd5]
-; CHECK: msr SCR_EL3, x3                ; encoding: [0x03,0x11,0x1e,0xd5]
-; CHECK: msr SCTLR_EL1, x3              ; encoding: [0x03,0x10,0x18,0xd5]
-; CHECK: msr SCTLR_EL2, x3              ; encoding: [0x03,0x10,0x1c,0xd5]
-; CHECK: msr SCTLR_EL3, x3              ; encoding: [0x03,0x10,0x1e,0xd5]
-; CHECK: msr SDER32_EL3, x3             ; encoding: [0x23,0x11,0x1e,0xd5]
-; CHECK: msr TCR_EL1, x3                ; encoding: [0x43,0x20,0x18,0xd5]
-; CHECK: msr TCR_EL2, x3                ; encoding: [0x43,0x20,0x1c,0xd5]
-; CHECK: msr TCR_EL3, x3                ; encoding: [0x43,0x20,0x1e,0xd5]
-; CHECK: msr TEECR32_EL1, x3            ; encoding: [0x03,0x00,0x12,0xd5]
-; CHECK: msr TEEHBR32_EL1, x3           ; encoding: [0x03,0x10,0x12,0xd5]
-; CHECK: msr TPIDRRO_EL0, x3            ; encoding: [0x63,0xd0,0x1b,0xd5]
-; CHECK: msr TPIDR_EL0, x3              ; encoding: [0x43,0xd0,0x1b,0xd5]
-; CHECK: msr TPIDR_EL1, x3              ; encoding: [0x83,0xd0,0x18,0xd5]
-; CHECK: msr TPIDR_EL2, x3              ; encoding: [0x43,0xd0,0x1c,0xd5]
-; CHECK: msr TPIDR_EL3, x3              ; encoding: [0x43,0xd0,0x1e,0xd5]
-; CHECK: msr TTBR0_EL1, x3              ; encoding: [0x03,0x20,0x18,0xd5]
-; CHECK: msr TTBR0_EL2, x3              ; encoding: [0x03,0x20,0x1c,0xd5]
-; CHECK: msr TTBR0_EL3, x3              ; encoding: [0x03,0x20,0x1e,0xd5]
-; CHECK: msr TTBR1_EL1, x3              ; encoding: [0x23,0x20,0x18,0xd5]
-; CHECK: msr VBAR_EL1, x3               ; encoding: [0x03,0xc0,0x18,0xd5]
-; CHECK: msr VBAR_EL2, x3               ; encoding: [0x03,0xc0,0x1c,0xd5]
-; CHECK: msr VBAR_EL3, x3               ; encoding: [0x03,0xc0,0x1e,0xd5]
-; CHECK: msr VMPIDR_EL2, x3             ; encoding: [0xa3,0x00,0x1c,0xd5]
-; CHECK: msr VPIDR_EL2, x3              ; encoding: [0x03,0x00,0x1c,0xd5]
-; CHECK: msr VTCR_EL2, x3               ; encoding: [0x43,0x21,0x1c,0xd5]
-; CHECK: msr VTTBR_EL2, x3              ; encoding: [0x03,0x21,0x1c,0xd5]
-; CHECK: msr  SPSel, x3                 ; encoding: [0x03,0x42,0x18,0xd5]
-; CHECK: msr  S2_2_C4_C6_4, x1          ; encoding: [0x81,0x46,0x12,0xd5]
-
-  mrs x3, ACTLR_EL1
-  mrs x3, ACTLR_EL2
-  mrs x3, ACTLR_EL3
-  mrs x3, ADFSR_EL1
-  mrs x3, ADFSR_EL2
-  mrs x3, ADFSR_EL3
-  mrs x3, AIDR_EL1
-  mrs x3, AIFSR_EL1
-  mrs x3, AIFSR_EL2
-  mrs x3, AIFSR_EL3
-  mrs x3, AMAIR_EL1
-  mrs x3, AMAIR_EL2
-  mrs x3, AMAIR_EL3
-  mrs x3, CCSIDR_EL1
-  mrs x3, CLIDR_EL1
-  mrs x3, CNTFRQ_EL0
-  mrs x3, CNTHCTL_EL2
-  mrs x3, CNTHP_CTL_EL2
-  mrs x3, CNTHP_CVAL_EL2
-  mrs x3, CNTHP_TVAL_EL2
-  mrs x3, CNTKCTL_EL1
-  mrs x3, CNTPCT_EL0
-  mrs x3, CNTP_CTL_EL0
-  mrs x3, CNTP_CVAL_EL0
-  mrs x3, CNTP_TVAL_EL0
-  mrs x3, CNTVCT_EL0
-  mrs x3, CNTVOFF_EL2
-  mrs x3, CNTV_CTL_EL0
-  mrs x3, CNTV_CVAL_EL0
-  mrs x3, CNTV_TVAL_EL0
-  mrs x3, CONTEXTIDR_EL1
-  mrs x3, CPACR_EL1
-  mrs x3, CPTR_EL2
-  mrs x3, CPTR_EL3
-  mrs x3, CSSELR_EL1
-  mrs x3, CTR_EL0
-  mrs x3, CURRENT_EL
-  mrs x3, DACR32_EL2
-  mrs x3, DCZID_EL0
-  mrs x3, ECOIDR_EL1
-  mrs x3, ESR_EL1
-  mrs x3, ESR_EL2
-  mrs x3, ESR_EL3
-  mrs x3, FAR_EL1
-  mrs x3, FAR_EL2
-  mrs x3, FAR_EL3
-  mrs x3, FPEXC32_EL2
-  mrs x3, HACR_EL2
-  mrs x3, HCR_EL2
-  mrs x3, HPFAR_EL2
-  mrs x3, HSTR_EL2
-  mrs x3, ID_AA64DFR0_EL1
-  mrs x3, ID_AA64DFR1_EL1
-  mrs x3, ID_AA64ISAR0_EL1
-  mrs x3, ID_AA64ISAR1_EL1
-  mrs x3, ID_AA64MMFR0_EL1
-  mrs x3, ID_AA64MMFR1_EL1
-  mrs x3, ID_AA64PFR0_EL1
-  mrs x3, ID_AA64PFR1_EL1
-  mrs x3, IFSR32_EL2
-  mrs x3, ISR_EL1
-  mrs x3, MAIR_EL1
-  mrs x3, MAIR_EL2
-  mrs x3, MAIR_EL3
-  mrs x3, MDCR_EL2
-  mrs x3, MDCR_EL3
-  mrs x3, MIDR_EL1
-  mrs x3, MPIDR_EL1
-  mrs x3, MVFR0_EL1
-  mrs x3, MVFR1_EL1
-  mrs x3, PAR_EL1
-  mrs x3, RVBAR_EL1
-  mrs x3, RVBAR_EL2
-  mrs x3, RVBAR_EL3
-  mrs x3, SCR_EL3
-  mrs x3, SCTLR_EL1
-  mrs x3, SCTLR_EL2
-  mrs x3, SCTLR_EL3
-  mrs x3, SDER32_EL3
-  mrs x3, TCR_EL1
-  mrs x3, TCR_EL2
-  mrs x3, TCR_EL3
-  mrs x3, TEECR32_EL1
-  mrs x3, TEEHBR32_EL1
-  mrs x3, TPIDRRO_EL0
-  mrs x3, TPIDR_EL0
-  mrs x3, TPIDR_EL1
-  mrs x3, TPIDR_EL2
-  mrs x3, TPIDR_EL3
-  mrs x3, TTBR0_EL1
-  mrs x3, TTBR0_EL2
-  mrs x3, TTBR0_EL3
-  mrs x3, TTBR1_EL1
-  mrs x3, VBAR_EL1
-  mrs x3, VBAR_EL2
-  mrs x3, VBAR_EL3
-  mrs x3, VMPIDR_EL2
-  mrs x3, VPIDR_EL2
-  mrs x3, VTCR_EL2
-  mrs x3, VTTBR_EL2
-
-  mrs x3, MDCCSR_EL0
-  mrs x3, MDCCINT_EL1
-  mrs x3, DBGDTR_EL0
-  mrs x3, DBGDTRRX_EL0
-  mrs x3, DBGDTRTX_EL0
-  mrs x3, DBGVCR32_EL2
-  mrs x3, OSDTRRX_EL1
-  mrs x3, MDSCR_EL1
-  mrs x3, OSDTRTX_EL1
-  mrs x3, OSECCR_EL11
-  mrs x3, DBGBVR0_EL1
-  mrs x3, DBGBVR1_EL1
-  mrs x3, DBGBVR2_EL1
-  mrs x3, DBGBVR3_EL1
-  mrs x3, DBGBVR4_EL1
-  mrs x3, DBGBVR5_EL1
-  mrs x3, DBGBVR6_EL1
-  mrs x3, DBGBVR7_EL1
-  mrs x3, DBGBVR8_EL1
-  mrs x3, DBGBVR9_EL1
-  mrs x3, DBGBVR10_EL1
-  mrs x3, DBGBVR11_EL1
-  mrs x3, DBGBVR12_EL1
-  mrs x3, DBGBVR13_EL1
-  mrs x3, DBGBVR14_EL1
-  mrs x3, DBGBVR15_EL1
-  mrs x3, DBGBCR0_EL1
-  mrs x3, DBGBCR1_EL1
-  mrs x3, DBGBCR2_EL1
-  mrs x3, DBGBCR3_EL1
-  mrs x3, DBGBCR4_EL1
-  mrs x3, DBGBCR5_EL1
-  mrs x3, DBGBCR6_EL1
-  mrs x3, DBGBCR7_EL1
-  mrs x3, DBGBCR8_EL1
-  mrs x3, DBGBCR9_EL1
-  mrs x3, DBGBCR10_EL1
-  mrs x3, DBGBCR11_EL1
-  mrs x3, DBGBCR12_EL1
-  mrs x3, DBGBCR13_EL1
-  mrs x3, DBGBCR14_EL1
-  mrs x3, DBGBCR15_EL1
-  mrs x3, DBGWVR0_EL1
-  mrs x3, DBGWVR1_EL1
-  mrs x3, DBGWVR2_EL1
-  mrs x3, DBGWVR3_EL1
-  mrs x3, DBGWVR4_EL1
-  mrs x3, DBGWVR5_EL1
-  mrs x3, DBGWVR6_EL1
-  mrs x3, DBGWVR7_EL1
-  mrs x3, DBGWVR8_EL1
-  mrs x3, DBGWVR9_EL1
-  mrs x3, DBGWVR10_EL1
-  mrs x3, DBGWVR11_EL1
-  mrs x3, DBGWVR12_EL1
-  mrs x3, DBGWVR13_EL1
-  mrs x3, DBGWVR14_EL1
-  mrs x3, DBGWVR15_EL1
-  mrs x3, DBGWCR0_EL1
-  mrs x3, DBGWCR1_EL1
-  mrs x3, DBGWCR2_EL1
-  mrs x3, DBGWCR3_EL1
-  mrs x3, DBGWCR4_EL1
-  mrs x3, DBGWCR5_EL1
-  mrs x3, DBGWCR6_EL1
-  mrs x3, DBGWCR7_EL1
-  mrs x3, DBGWCR8_EL1
-  mrs x3, DBGWCR9_EL1
-  mrs x3, DBGWCR10_EL1
-  mrs x3, DBGWCR11_EL1
-  mrs x3, DBGWCR12_EL1
-  mrs x3, DBGWCR13_EL1
-  mrs x3, DBGWCR14_EL1
-  mrs x3, DBGWCR15_EL1
-  mrs x3, MDRAR_EL1
-  mrs x3, OSLAR_EL1
-  mrs x3, OSLSR_EL1
-  mrs x3, OSDLR_EL1
-  mrs x3, DBGPRCR_EL1
-  mrs x3, DBGCLAIMSET_EL1
-  mrs x3, DBGCLAIMCLR_EL1
-  mrs x3, DBGAUTHSTATUS_EL1
-  mrs x3, DBGDEVID2
-  mrs x3, DBGDEVID1
-  mrs x3, DBGDEVID0
-  mrs x1, S2_2_C4_C6_4
-  mrs x3, s2_3_c2_c1_4
-  mrs x3, S2_3_c2_c1_4
-
-; CHECK: mrs x3, ACTLR_EL1              ; encoding: [0x23,0x10,0x38,0xd5]
-; CHECK: mrs x3, ACTLR_EL2              ; encoding: [0x23,0x10,0x3c,0xd5]
-; CHECK: mrs x3, ACTLR_EL3              ; encoding: [0x23,0x10,0x3e,0xd5]
-; CHECK: mrs x3, AFSR0_EL1              ; encoding: [0x03,0x51,0x38,0xd5]
-; CHECK: mrs x3, ADFSR_EL2              ; encoding: [0x03,0x51,0x3c,0xd5]
-; CHECK: mrs x3, ADFSR_EL3              ; encoding: [0x03,0x51,0x3e,0xd5]
-; CHECK: mrs x3, AIDR_EL1               ; encoding: [0xe3,0x00,0x39,0xd5]
-; CHECK: mrs x3, AFSR1_EL1              ; encoding: [0x23,0x51,0x38,0xd5]
-; CHECK: mrs x3, AIFSR_EL2              ; encoding: [0x23,0x51,0x3c,0xd5]
-; CHECK: mrs x3, AIFSR_EL3              ; encoding: [0x23,0x51,0x3e,0xd5]
-; CHECK: mrs x3, AMAIR_EL1              ; encoding: [0x03,0xa3,0x38,0xd5]
-; CHECK: mrs x3, AMAIR_EL2              ; encoding: [0x03,0xa3,0x3c,0xd5]
-; CHECK: mrs x3, AMAIR_EL3              ; encoding: [0x03,0xa3,0x3e,0xd5]
-; CHECK: mrs x3, CCSIDR_EL1             ; encoding: [0x03,0x00,0x39,0xd5]
-; CHECK: mrs x3, CLIDR_EL1              ; encoding: [0x23,0x00,0x39,0xd5]
-; CHECK: mrs x3, CNTFRQ_EL0             ; encoding: [0x03,0xe0,0x3b,0xd5]
-; CHECK: mrs x3, CNTHCTL_EL2            ; encoding: [0x03,0xe1,0x3c,0xd5]
-; CHECK: mrs x3, CNTHP_CTL_EL2          ; encoding: [0x23,0xe2,0x3c,0xd5]
-; CHECK: mrs x3, CNTHP_CVAL_EL2         ; encoding: [0x43,0xe2,0x3c,0xd5]
-; CHECK: mrs x3, CNTHP_TVAL_EL2         ; encoding: [0x03,0xe2,0x3c,0xd5]
-; CHECK: mrs x3, CNTKCTL_EL1            ; encoding: [0x03,0xe1,0x38,0xd5]
-; CHECK: mrs x3, CNTPCT_EL0             ; encoding: [0x23,0xe0,0x3b,0xd5]
-; CHECK: mrs x3, CNTP_CTL_EL0           ; encoding: [0x23,0xe2,0x3b,0xd5]
-; CHECK: mrs x3, CNTP_CVAL_EL0          ; encoding: [0x43,0xe2,0x3b,0xd5]
-; CHECK: mrs x3, CNTP_TVAL_EL0          ; encoding: [0x03,0xe2,0x3b,0xd5]
-; CHECK: mrs x3, CNTVCT_EL0             ; encoding: [0x43,0xe0,0x3b,0xd5]
-; CHECK: mrs x3, CNTVOFF_EL2            ; encoding: [0x63,0xe0,0x3c,0xd5]
-; CHECK: mrs x3, CNTV_CTL_EL0           ; encoding: [0x23,0xe3,0x3b,0xd5]
-; CHECK: mrs x3, CNTV_CVAL_EL0          ; encoding: [0x43,0xe3,0x3b,0xd5]
-; CHECK: mrs x3, CNTV_TVAL_EL0          ; encoding: [0x03,0xe3,0x3b,0xd5]
-; CHECK: mrs x3, CONTEXTIDR_EL1         ; encoding: [0x23,0xd0,0x38,0xd5]
-; CHECK: mrs x3, CPACR_EL1              ; encoding: [0x43,0x10,0x38,0xd5]
-; CHECK: mrs x3, CPTR_EL2               ; encoding: [0x43,0x11,0x3c,0xd5]
-; CHECK: mrs x3, CPTR_EL3               ; encoding: [0x43,0x11,0x3e,0xd5]
-; CHECK: mrs x3, CSSELR_EL1             ; encoding: [0x03,0x00,0x3a,0xd5]
-; CHECK: mrs x3, CTR_EL0                ; encoding: [0x23,0x00,0x3b,0xd5]
-; CHECK: mrs x3, CurrentEL              ; encoding: [0x43,0x42,0x38,0xd5]
-; CHECK: mrs x3, DACR32_EL2             ; encoding: [0x03,0x30,0x3c,0xd5]
-; CHECK: mrs x3, DCZID_EL0              ; encoding: [0xe3,0x00,0x3b,0xd5]
-; CHECK: mrs x3, REVIDR_EL1             ; encoding: [0xc3,0x00,0x38,0xd5]
-; CHECK: mrs x3, ESR_EL1                ; encoding: [0x03,0x52,0x38,0xd5]
-; CHECK: mrs x3, ESR_EL2                ; encoding: [0x03,0x52,0x3c,0xd5]
-; CHECK: mrs x3, ESR_EL3                ; encoding: [0x03,0x52,0x3e,0xd5]
-; CHECK: mrs x3, FAR_EL1                ; encoding: [0x03,0x60,0x38,0xd5]
-; CHECK: mrs x3, FAR_EL2                ; encoding: [0x03,0x60,0x3c,0xd5]
-; CHECK: mrs x3, FAR_EL3                ; encoding: [0x03,0x60,0x3e,0xd5]
-; CHECK: mrs x3, FPEXC32_EL2            ; encoding: [0x03,0x53,0x3c,0xd5]
-; CHECK: mrs x3, HACR_EL2               ; encoding: [0xe3,0x11,0x3c,0xd5]
-; CHECK: mrs x3, HCR_EL2                ; encoding: [0x03,0x11,0x3c,0xd5]
-; CHECK: mrs x3, HPFAR_EL2              ; encoding: [0x83,0x60,0x3c,0xd5]
-; CHECK: mrs x3, HSTR_EL2               ; encoding: [0x63,0x11,0x3c,0xd5]
-; CHECK: mrs x3, ID_AA64DFR0_EL1        ; encoding: [0x03,0x05,0x38,0xd5]
-; CHECK: mrs x3, ID_AA64DFR1_EL1        ; encoding: [0x23,0x05,0x38,0xd5]
-; CHECK: mrs x3, ID_AA64ISAR0_EL1       ; encoding: [0x03,0x06,0x38,0xd5]
-; CHECK: mrs x3, ID_AA64ISAR1_EL1       ; encoding: [0x23,0x06,0x38,0xd5]
-; CHECK: mrs x3, ID_AA64MMFR0_EL1       ; encoding: [0x03,0x07,0x38,0xd5]
-; CHECK: mrs x3, ID_AA64MMFR1_EL1       ; encoding: [0x23,0x07,0x38,0xd5]
-; CHECK: mrs x3, ID_AA64PFR0_EL1        ; encoding: [0x03,0x04,0x38,0xd5]
-; CHECK: mrs x3, ID_AA64PFR1_EL1        ; encoding: [0x23,0x04,0x38,0xd5]
-; CHECK: mrs x3, IFSR32_EL2             ; encoding: [0x23,0x50,0x3c,0xd5]
-; CHECK: mrs x3, ISR_EL1                ; encoding: [0x03,0xc1,0x38,0xd5]
-; CHECK: mrs x3, MAIR_EL1               ; encoding: [0x03,0xa2,0x38,0xd5]
-; CHECK: mrs x3, MAIR_EL2               ; encoding: [0x03,0xa2,0x3c,0xd5]
-; CHECK: mrs x3, MAIR_EL3               ; encoding: [0x03,0xa2,0x3e,0xd5]
-; CHECK: mrs x3, MDCR_EL2               ; encoding: [0x23,0x11,0x3c,0xd5]
-; CHECK: mrs x3, MDCR_EL3               ; encoding: [0x23,0x13,0x3e,0xd5]
-; CHECK: mrs x3, MIDR_EL1               ; encoding: [0x03,0x00,0x38,0xd5]
-; CHECK: mrs x3, MPIDR_EL1              ; encoding: [0xa3,0x00,0x38,0xd5]
-; CHECK: mrs x3, MVFR0_EL1              ; encoding: [0x03,0x03,0x38,0xd5]
-; CHECK: mrs x3, MVFR1_EL1              ; encoding: [0x23,0x03,0x38,0xd5]
-; CHECK: mrs x3, PAR_EL1                ; encoding: [0x03,0x74,0x38,0xd5]
-; CHECK: mrs x3, RVBAR_EL1              ; encoding: [0x23,0xc0,0x38,0xd5]
-; CHECK: mrs x3, RVBAR_EL2              ; encoding: [0x23,0xc0,0x3c,0xd5]
-; CHECK: mrs x3, RVBAR_EL3              ; encoding: [0x23,0xc0,0x3e,0xd5]
-; CHECK: mrs x3, SCR_EL3                ; encoding: [0x03,0x11,0x3e,0xd5]
-; CHECK: mrs x3, SCTLR_EL1              ; encoding: [0x03,0x10,0x38,0xd5]
-; CHECK: mrs x3, SCTLR_EL2              ; encoding: [0x03,0x10,0x3c,0xd5]
-; CHECK: mrs x3, SCTLR_EL3              ; encoding: [0x03,0x10,0x3e,0xd5]
-; CHECK: mrs x3, SDER32_EL3             ; encoding: [0x23,0x11,0x3e,0xd5]
-; CHECK: mrs x3, TCR_EL1                ; encoding: [0x43,0x20,0x38,0xd5]
-; CHECK: mrs x3, TCR_EL2                ; encoding: [0x43,0x20,0x3c,0xd5]
-; CHECK: mrs x3, TCR_EL3                ; encoding: [0x43,0x20,0x3e,0xd5]
-; CHECK: mrs x3, TEECR32_EL1            ; encoding: [0x03,0x00,0x32,0xd5]
-; CHECK: mrs x3, TEEHBR32_EL1           ; encoding: [0x03,0x10,0x32,0xd5]
-; CHECK: mrs x3, TPIDRRO_EL0            ; encoding: [0x63,0xd0,0x3b,0xd5]
-; CHECK: mrs x3, TPIDR_EL0              ; encoding: [0x43,0xd0,0x3b,0xd5]
-; CHECK: mrs x3, TPIDR_EL1              ; encoding: [0x83,0xd0,0x38,0xd5]
-; CHECK: mrs x3, TPIDR_EL2              ; encoding: [0x43,0xd0,0x3c,0xd5]
-; CHECK: mrs x3, TPIDR_EL3              ; encoding: [0x43,0xd0,0x3e,0xd5]
-; CHECK: mrs x3, TTBR0_EL1              ; encoding: [0x03,0x20,0x38,0xd5]
-; CHECK: mrs x3, TTBR0_EL2              ; encoding: [0x03,0x20,0x3c,0xd5]
-; CHECK: mrs x3, TTBR0_EL3              ; encoding: [0x03,0x20,0x3e,0xd5]
-; CHECK: mrs x3, TTBR1_EL1              ; encoding: [0x23,0x20,0x38,0xd5]
-; CHECK: mrs x3, VBAR_EL1               ; encoding: [0x03,0xc0,0x38,0xd5]
-; CHECK: mrs x3, VBAR_EL2               ; encoding: [0x03,0xc0,0x3c,0xd5]
-; CHECK: mrs x3, VBAR_EL3               ; encoding: [0x03,0xc0,0x3e,0xd5]
-; CHECK: mrs x3, VMPIDR_EL2             ; encoding: [0xa3,0x00,0x3c,0xd5]
-; CHECK: mrs x3, VPIDR_EL2              ; encoding: [0x03,0x00,0x3c,0xd5]
-; CHECK: mrs x3, VTCR_EL2               ; encoding: [0x43,0x21,0x3c,0xd5]
-; CHECK: mrs x3, VTTBR_EL2              ; encoding: [0x03,0x21,0x3c,0xd5]
-; CHECK: mrs	x3, MDCCSR_EL0          ; encoding: [0x03,0x01,0x33,0xd5]
-; CHECK: mrs	x3, MDCCINT_EL1         ; encoding: [0x03,0x02,0x30,0xd5]
-; CHECK: mrs	x3, DBGDTR_EL0          ; encoding: [0x03,0x04,0x33,0xd5]
-; CHECK: mrs	x3, DBGDTRRX_EL0        ; encoding: [0x03,0x05,0x33,0xd5]
-; CHECK: mrs	x3, DBGDTRRX_EL0        ; encoding: [0x03,0x05,0x33,0xd5]
-; CHECK: mrs	x3, DBGVCR32_EL2        ; encoding: [0x03,0x07,0x34,0xd5]
-; CHECK: mrs	x3, OSDTRRX_EL1         ; encoding: [0x43,0x00,0x30,0xd5]
-; CHECK: mrs	x3, MDSCR_EL1           ; encoding: [0x43,0x02,0x30,0xd5]
-; CHECK: mrs	x3, OSDTRTX_EL1         ; encoding: [0x43,0x03,0x30,0xd5]
-; CHECK: mrs	x3, OSECCR_EL11         ; encoding: [0x43,0x06,0x30,0xd5]
-; CHECK: mrs	x3, DBGBVR0_EL1         ; encoding: [0x83,0x00,0x30,0xd5]
-; CHECK: mrs	x3, DBGBVR1_EL1         ; encoding: [0x83,0x01,0x30,0xd5]
-; CHECK: mrs	x3, DBGBVR2_EL1         ; encoding: [0x83,0x02,0x30,0xd5]
-; CHECK: mrs	x3, DBGBVR3_EL1         ; encoding: [0x83,0x03,0x30,0xd5]
-; CHECK: mrs	x3, DBGBVR4_EL1         ; encoding: [0x83,0x04,0x30,0xd5]
-; CHECK: mrs	x3, DBGBVR5_EL1         ; encoding: [0x83,0x05,0x30,0xd5]
-; CHECK: mrs	x3, DBGBVR6_EL1         ; encoding: [0x83,0x06,0x30,0xd5]
-; CHECK: mrs	x3, DBGBVR7_EL1         ; encoding: [0x83,0x07,0x30,0xd5]
-; CHECK: mrs	x3, DBGBVR8_EL1         ; encoding: [0x83,0x08,0x30,0xd5]
-; CHECK: mrs	x3, DBGBVR9_EL1         ; encoding: [0x83,0x09,0x30,0xd5]
-; CHECK: mrs	x3, DBGBVR10_EL1        ; encoding: [0x83,0x0a,0x30,0xd5]
-; CHECK: mrs	x3, DBGBVR11_EL1        ; encoding: [0x83,0x0b,0x30,0xd5]
-; CHECK: mrs	x3, DBGBVR12_EL1        ; encoding: [0x83,0x0c,0x30,0xd5]
-; CHECK: mrs	x3, DBGBVR13_EL1        ; encoding: [0x83,0x0d,0x30,0xd5]
-; CHECK: mrs	x3, DBGBVR14_EL1        ; encoding: [0x83,0x0e,0x30,0xd5]
-; CHECK: mrs	x3, DBGBVR15_EL1        ; encoding: [0x83,0x0f,0x30,0xd5]
-; CHECK: mrs	x3, DBGBCR0_EL1         ; encoding: [0xa3,0x00,0x30,0xd5]
-; CHECK: mrs	x3, DBGBCR1_EL1         ; encoding: [0xa3,0x01,0x30,0xd5]
-; CHECK: mrs	x3, DBGBCR2_EL1         ; encoding: [0xa3,0x02,0x30,0xd5]
-; CHECK: mrs	x3, DBGBCR3_EL1         ; encoding: [0xa3,0x03,0x30,0xd5]
-; CHECK: mrs	x3, DBGBCR4_EL1         ; encoding: [0xa3,0x04,0x30,0xd5]
-; CHECK: mrs	x3, DBGBCR5_EL1         ; encoding: [0xa3,0x05,0x30,0xd5]
-; CHECK: mrs	x3, DBGBCR6_EL1         ; encoding: [0xa3,0x06,0x30,0xd5]
-; CHECK: mrs	x3, DBGBCR7_EL1         ; encoding: [0xa3,0x07,0x30,0xd5]
-; CHECK: mrs	x3, DBGBCR8_EL1         ; encoding: [0xa3,0x08,0x30,0xd5]
-; CHECK: mrs	x3, DBGBCR9_EL1         ; encoding: [0xa3,0x09,0x30,0xd5]
-; CHECK: mrs	x3, DBGBCR10_EL1        ; encoding: [0xa3,0x0a,0x30,0xd5]
-; CHECK: mrs	x3, DBGBCR11_EL1        ; encoding: [0xa3,0x0b,0x30,0xd5]
-; CHECK: mrs	x3, DBGBCR12_EL1        ; encoding: [0xa3,0x0c,0x30,0xd5]
-; CHECK: mrs	x3, DBGBCR13_EL1        ; encoding: [0xa3,0x0d,0x30,0xd5]
-; CHECK: mrs	x3, DBGBCR14_EL1        ; encoding: [0xa3,0x0e,0x30,0xd5]
-; CHECK: mrs	x3, DBGBCR15_EL1        ; encoding: [0xa3,0x0f,0x30,0xd5]
-; CHECK: mrs	x3, DBGWVR0_EL1         ; encoding: [0xc3,0x00,0x30,0xd5]
-; CHECK: mrs	x3, DBGWVR1_EL1         ; encoding: [0xc3,0x01,0x30,0xd5]
-; CHECK: mrs	x3, DBGWVR2_EL1         ; encoding: [0xc3,0x02,0x30,0xd5]
-; CHECK: mrs	x3, DBGWVR3_EL1         ; encoding: [0xc3,0x03,0x30,0xd5]
-; CHECK: mrs	x3, DBGWVR4_EL1         ; encoding: [0xc3,0x04,0x30,0xd5]
-; CHECK: mrs	x3, DBGWVR5_EL1         ; encoding: [0xc3,0x05,0x30,0xd5]
-; CHECK: mrs	x3, DBGWVR6_EL1         ; encoding: [0xc3,0x06,0x30,0xd5]
-; CHECK: mrs	x3, DBGWVR7_EL1         ; encoding: [0xc3,0x07,0x30,0xd5]
-; CHECK: mrs	x3, DBGWVR8_EL1         ; encoding: [0xc3,0x08,0x30,0xd5]
-; CHECK: mrs	x3, DBGWVR9_EL1         ; encoding: [0xc3,0x09,0x30,0xd5]
-; CHECK: mrs	x3, DBGWVR10_EL1        ; encoding: [0xc3,0x0a,0x30,0xd5]
-; CHECK: mrs	x3, DBGWVR11_EL1        ; encoding: [0xc3,0x0b,0x30,0xd5]
-; CHECK: mrs	x3, DBGWVR12_EL1        ; encoding: [0xc3,0x0c,0x30,0xd5]
-; CHECK: mrs	x3, DBGWVR13_EL1        ; encoding: [0xc3,0x0d,0x30,0xd5]
-; CHECK: mrs	x3, DBGWVR14_EL1        ; encoding: [0xc3,0x0e,0x30,0xd5]
-; CHECK: mrs	x3, DBGWVR15_EL1        ; encoding: [0xc3,0x0f,0x30,0xd5]
-; CHECK: mrs	x3, DBGWCR0_EL1         ; encoding: [0xe3,0x00,0x30,0xd5]
-; CHECK: mrs	x3, DBGWCR1_EL1         ; encoding: [0xe3,0x01,0x30,0xd5]
-; CHECK: mrs	x3, DBGWCR2_EL1         ; encoding: [0xe3,0x02,0x30,0xd5]
-; CHECK: mrs	x3, DBGWCR3_EL1         ; encoding: [0xe3,0x03,0x30,0xd5]
-; CHECK: mrs	x3, DBGWCR4_EL1         ; encoding: [0xe3,0x04,0x30,0xd5]
-; CHECK: mrs	x3, DBGWCR5_EL1         ; encoding: [0xe3,0x05,0x30,0xd5]
-; CHECK: mrs	x3, DBGWCR6_EL1         ; encoding: [0xe3,0x06,0x30,0xd5]
-; CHECK: mrs	x3, DBGWCR7_EL1         ; encoding: [0xe3,0x07,0x30,0xd5]
-; CHECK: mrs	x3, DBGWCR8_EL1         ; encoding: [0xe3,0x08,0x30,0xd5]
-; CHECK: mrs	x3, DBGWCR9_EL1         ; encoding: [0xe3,0x09,0x30,0xd5]
-; CHECK: mrs	x3, DBGWCR10_EL1        ; encoding: [0xe3,0x0a,0x30,0xd5]
-; CHECK: mrs	x3, DBGWCR11_EL1        ; encoding: [0xe3,0x0b,0x30,0xd5]
-; CHECK: mrs	x3, DBGWCR12_EL1        ; encoding: [0xe3,0x0c,0x30,0xd5]
-; CHECK: mrs	x3, DBGWCR13_EL1        ; encoding: [0xe3,0x0d,0x30,0xd5]
-; CHECK: mrs	x3, DBGWCR14_EL1        ; encoding: [0xe3,0x0e,0x30,0xd5]
-; CHECK: mrs	x3, DBGWCR15_EL1        ; encoding: [0xe3,0x0f,0x30,0xd5]
-; CHECK: mrs	x3, MDRAR_EL1           ; encoding: [0x03,0x10,0x30,0xd5]
-; CHECK: mrs	x3, OSLAR_EL1           ; encoding: [0x83,0x10,0x30,0xd5]
-; CHECK: mrs	x3, OSLSR_EL1           ; encoding: [0x83,0x11,0x30,0xd5]
-; CHECK: mrs	x3, OSDLR_EL1           ; encoding: [0x83,0x13,0x30,0xd5]
-; CHECK: mrs	x3, DBGPRCR_EL1         ; encoding: [0x83,0x14,0x30,0xd5]
-; CHECK: mrs	x3, DBGCLAIMSET_EL1     ; encoding: [0xc3,0x78,0x30,0xd5]
-; CHECK: mrs	x3, DBGCLAIMCLR_EL1     ; encoding: [0xc3,0x79,0x30,0xd5]
-; CHECK: mrs	x3, DBGAUTHSTATUS_EL1   ; encoding: [0xc3,0x7e,0x30,0xd5]
-; CHECK: mrs	x3, DBGDEVID2           ; encoding: [0xe3,0x70,0x30,0xd5]
-; CHECK: mrs	x3, DBGDEVID1           ; encoding: [0xe3,0x71,0x30,0xd5]
-; CHECK: mrs	x3, DBGDEVID0           ; encoding: [0xe3,0x72,0x30,0xd5]
-; CHECK: mrs    x1, S2_2_C4_C6_4        ; encoding: [0x81,0x46,0x32,0xd5]
-; CHECK: mrs	x3, S2_3_C2_C1_4        ; encoding: [0x83,0x21,0x33,0xd5]
-; CHECK: mrs	x3, S2_3_C2_C1_4        ; encoding: [0x83,0x21,0x33,0xd5]
-
-  msr RMR_EL3, x0
-  msr RMR_EL2, x0
-  msr RMR_EL1, x0
-  msr CPM_IOACC_CTL_EL3, x0
-
-; CHECK: msr	RMR_EL3, x0             ; encoding: [0x40,0xc0,0x1e,0xd5]
-; CHECK: msr	RMR_EL2, x0             ; encoding: [0x40,0xc0,0x1a,0xd5]
-; CHECK: msr	RMR_EL1, x0             ; encoding: [0x40,0xc0,0x19,0xd5]
-; CHECK: msr	CPM_IOACC_CTL_EL3, x0   ; encoding: [0x00,0xf2,0x1f,0xd5]
-
- mrs x0, ID_PFR0_EL1
- mrs x0, ID_PFR1_EL1
- mrs x0, ID_DFR0_EL1
- mrs x0, ID_AFR0_EL1
- mrs x0, ID_ISAR0_EL1
- mrs x0, ID_ISAR1_EL1
- mrs x0, ID_ISAR2_EL1
- mrs x0, ID_ISAR3_EL1
- mrs x0, ID_ISAR4_EL1
- mrs x0, ID_ISAR5_EL1
- mrs x0, AFSR1_EL1
- mrs x0, AFSR0_EL1
- mrs x0, REVIDR_EL1
-; CHECK: mrs	x0, ID_PFR0_EL1         ; encoding: [0x00,0x01,0x38,0xd5]
-; CHECK: mrs	x0, ID_PFR1_EL1         ; encoding: [0x20,0x01,0x38,0xd5]
-; CHECK: mrs	x0, ID_DFR0_EL1         ; encoding: [0x40,0x01,0x38,0xd5]
-; CHECK: mrs	x0, ID_AFR0_EL1         ; encoding: [0x60,0x01,0x38,0xd5]
-; CHECK: mrs	x0, ID_ISAR0_EL1        ; encoding: [0x00,0x02,0x38,0xd5]
-; CHECK: mrs	x0, ID_ISAR1_EL1        ; encoding: [0x20,0x02,0x38,0xd5]
-; CHECK: mrs	x0, ID_ISAR2_EL1        ; encoding: [0x40,0x02,0x38,0xd5]
-; CHECK: mrs	x0, ID_ISAR3_EL1        ; encoding: [0x60,0x02,0x38,0xd5]
-; CHECK: mrs	x0, ID_ISAR4_EL1        ; encoding: [0x80,0x02,0x38,0xd5]
-; CHECK: mrs	x0, ID_ISAR5_EL1        ; encoding: [0xa0,0x02,0x38,0xd5]
-; CHECK: mrs	x0, AFSR1_EL1           ; encoding: [0x20,0x51,0x38,0xd5]
-; CHECK: mrs	x0, AFSR0_EL1           ; encoding: [0x00,0x51,0x38,0xd5]
-; CHECK: mrs	x0, REVIDR_EL1          ; encoding: [0xc0,0x00,0x38,0xd5]
diff --git a/test/MC/ARM64/tls-modifiers-darwin.s b/test/MC/ARM64/tls-modifiers-darwin.s
deleted file mode 100644
index 6478d26..0000000
--- a/test/MC/ARM64/tls-modifiers-darwin.s
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llvm-mc -triple=arm64-apple-ios7.0 %s -o - | FileCheck %s
-; RUN: llvm-mc -triple=arm64-apple-ios7.0 -filetype=obj %s -o - | llvm-objdump -r - | FileCheck %s --check-prefix=CHECK-OBJ
-
-        adrp x2, _var@TLVPPAGE
-        ldr x0, [x15, _var@TLVPPAGEOFF]
-        add lr, x0, _var@TLVPPAGEOFF
-; CHECK: adrp x2, _var@TLVPPAG
-; CHECK: ldr x0, [x15, _var@TLVPPAGEOFF]
-; CHECK: add lr, x0, _var@TLVPPAGEOFF
-
-; CHECK-OBJ: 8 ARM64_RELOC_TLVP_LOAD_PAGEOFF12 _var
-; CHECK-OBJ: 4 ARM64_RELOC_TLVP_LOAD_PAGEOFF12 _var
-; CHECK-OBJ: 0 ARM64_RELOC_TLVP_LOAD_PAGE21 _var
diff --git a/test/MC/ARM64/tls-relocs.s b/test/MC/ARM64/tls-relocs.s
deleted file mode 100644
index 7e8b754..0000000
--- a/test/MC/ARM64/tls-relocs.s
+++ /dev/null
@@ -1,320 +0,0 @@
-// RUN: llvm-mc -triple=arm64-none-linux-gnu -show-encoding < %s | FileCheck %s
-// RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj < %s -o - | \
-// RUN:   llvm-readobj -r -t | FileCheck --check-prefix=CHECK-ELF %s
-
-
-////////////////////////////////////////////////////////////////////////////////
-// TLS initial-exec forms
-////////////////////////////////////////////////////////////////////////////////
-
-        movz x15, #:gottprel_g1:var
-// CHECK: movz    x15, #:gottprel_g1:var  // encoding: [0bAAA01111,A,0b101AAAAA,0x92]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel_g1:var, kind: fixup_arm64_movw
-
-// CHECK-ELF:     {{0x[0-9A-F]+}} R_AARCH64_TLSIE_MOVW_GOTTPREL_G1 [[VARSYM:[^ ]+]]
-
-
-        movk x13, #:gottprel_g0_nc:var
-// CHECK: movk    x13, #:gottprel_g0_nc:var // encoding: [0bAAA01101,A,0b100AAAAA,0xf2]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel_g0_nc:var, kind: fixup_arm64_movw
-
-
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC [[VARSYM]]
-
-        adrp x11, :gottprel:var
-        ldr x10, [x0, #:gottprel_lo12:var]
-        ldr x9, :gottprel:var
-// CHECK: adrp    x11, :gottprel:var      // encoding: [0x0b'A',A,A,0x90'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel:var, kind: fixup_arm64_pcrel_adrp_imm21
-// CHECK: ldr     x10, [x0, :gottprel_lo12:var] // encoding: [0x0a,0bAAAAAA00,0b01AAAAAA,0xf9]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale8
-// CHECK: ldr     x9, :gottprel:var       // encoding: [0bAAA01001,A,A,0x58]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel:var, kind: fixup_arm64_pcrel_imm19
-
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 [[VARSYM]]
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC [[VARSYM]]
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSIE_LD_GOTTPREL_PREL19 [[VARSYM]]
-
-
-////////////////////////////////////////////////////////////////////////////////
-// TLS local-exec forms
-////////////////////////////////////////////////////////////////////////////////
-
-        movz x3, #:tprel_g2:var
-        movn x4, #:tprel_g2:var
-// CHECK: movz    x3, #:tprel_g2:var      // encoding: [0bAAA00011,A,0b110AAAAA,0x92]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g2:var, kind: fixup_arm64_movw
-// CHECK: movn    x4, #:tprel_g2:var      // encoding: [0bAAA00100,A,0b110AAAAA,0x92]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g2:var, kind: fixup_arm64_movw
-
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_MOVW_TPREL_G2 [[VARSYM]]
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_MOVW_TPREL_G2 [[VARSYM]]
-
-
-        movz x5, #:tprel_g1:var
-        movn x6, #:tprel_g1:var
-        movz w7, #:tprel_g1:var
-// CHECK: movz    x5, #:tprel_g1:var      // encoding: [0bAAA00101,A,0b101AAAAA,0x92]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_arm64_movw
-// CHECK: movn    x6, #:tprel_g1:var      // encoding: [0bAAA00110,A,0b101AAAAA,0x92]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_arm64_movw
-// CHECK: movz    w7, #:tprel_g1:var      // encoding: [0bAAA00111,A,0b101AAAAA,0x12]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_arm64_movw
-
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_MOVW_TPREL_G1 [[VARSYM]]
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_MOVW_TPREL_G1 [[VARSYM]]
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_MOVW_TPREL_G1 [[VARSYM]]
-
-
-        movk x9, #:tprel_g1_nc:var
-        movk w10, #:tprel_g1_nc:var
-// CHECK: movk    x9, #:tprel_g1_nc:var   // encoding: [0bAAA01001,A,0b101AAAAA,0xf2]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1_nc:var, kind: fixup_arm64_movw
-// CHECK: movk    w10, #:tprel_g1_nc:var  // encoding: [0bAAA01010,A,0b101AAAAA,0x72]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1_nc:var, kind: fixup_arm64_movw
-
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_MOVW_TPREL_G1_NC [[VARSYM]]
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_MOVW_TPREL_G1_NC [[VARSYM]]
-
-
-        movz x11, #:tprel_g0:var
-        movn x12, #:tprel_g0:var
-        movz w13, #:tprel_g0:var
-// CHECK: movz    x11, #:tprel_g0:var     // encoding: [0bAAA01011,A,0b100AAAAA,0x92]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_arm64_movw
-// CHECK: movn    x12, #:tprel_g0:var     // encoding: [0bAAA01100,A,0b100AAAAA,0x92]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_arm64_movw
-// CHECK: movz    w13, #:tprel_g0:var     // encoding: [0bAAA01101,A,0b100AAAAA,0x12]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_arm64_movw
-
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_MOVW_TPREL_G0 [[VARSYM]]
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_MOVW_TPREL_G0 [[VARSYM]]
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_MOVW_TPREL_G0 [[VARSYM]]
-
-
-        movk x15, #:tprel_g0_nc:var
-        movk w16, #:tprel_g0_nc:var
-// CHECK: movk    x15, #:tprel_g0_nc:var  // encoding: [0bAAA01111,A,0b100AAAAA,0xf2]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0_nc:var, kind: fixup_arm64_movw
-// CHECK: movk    w16, #:tprel_g0_nc:var  // encoding: [0bAAA10000,A,0b100AAAAA,0x72]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0_nc:var, kind: fixup_arm64_movw
-
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_MOVW_TPREL_G0_NC [[VARSYM]]
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_MOVW_TPREL_G0_NC [[VARSYM]]
-
-
-        add x21, x22, #:tprel_lo12:var
-// CHECK: add     x21, x22, :tprel_lo12:var // encoding: [0xd5,0bAAAAAA10,0b00AAAAAA,0x91]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_arm64_add_imm12
-
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_ADD_TPREL_LO12 [[VARSYM]]
-
-
-        add x25, x26, #:tprel_lo12_nc:var
-// CHECK: add     x25, x26, :tprel_lo12_nc:var // encoding: [0x59,0bAAAAAA11,0b00AAAAAA,0x91]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_arm64_add_imm12
-
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_ADD_TPREL_LO12_NC [[VARSYM]]
-
-
-        ldrb w29, [x30, #:tprel_lo12:var]
-        ldrsb x29, [x28, #:tprel_lo12_nc:var]
-// CHECK: ldrb    w29, [lr, :tprel_lo12:var] // encoding: [0xdd,0bAAAAAA11,0b01AAAAAA,0x39]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale1
-// CHECK: ldrsb   fp, [x28, :tprel_lo12_nc:var] // encoding: [0x9d,0bAAAAAA11,0b10AAAAAA,0x39]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_arm64_ldst_imm12_scale1
-
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_LDST8_TPREL_LO12 [[VARSYM]]
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC [[VARSYM]]
-
-
-        strh w27, [x26, #:tprel_lo12:var]
-        ldrsh x25, [x24, #:tprel_lo12_nc:var]
-// CHECK: strh    w27, [x26, :tprel_lo12:var] // encoding: [0x5b,0bAAAAAA11,0b00AAAAAA,0x79]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale2
-// CHECK: ldrsh   x25, [x24, :tprel_lo12_nc:var] // encoding: [0x19,0bAAAAAA11,0b10AAAAAA,0x79]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_arm64_ldst_imm12_scale2
-
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_LDST16_TPREL_LO12 [[VARSYM]]
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC [[VARSYM]]
-
-
-        ldr w23, [x22, #:tprel_lo12:var]
-        ldrsw x21, [x20, #:tprel_lo12_nc:var]
-// CHECK: ldr     w23, [x22, :tprel_lo12:var] // encoding: [0xd7,0bAAAAAA10,0b01AAAAAA,0xb9]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale4
-// CHECK: ldrsw   x21, [x20, :tprel_lo12_nc:var] // encoding: [0x95,0bAAAAAA10,0b10AAAAAA,0xb9]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_arm64_ldst_imm12_scale4
-
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_LDST32_TPREL_LO12 [[VARSYM]]
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC [[VARSYM]]
-
-        ldr x19, [x18, #:tprel_lo12:var]
-        str x17, [x16, #:tprel_lo12_nc:var]
-// CHECK: ldr     x19, [x18, :tprel_lo12:var] // encoding: [0x53,0bAAAAAA10,0b01AAAAAA,0xf9]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale8
-// CHECK: str     x17, [x16, :tprel_lo12_nc:var] // encoding: [0x11,0bAAAAAA10,0b00AAAAAA,0xf9]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_arm64_ldst_imm12_scale8
-
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_LDST64_TPREL_LO12 [[VARSYM]]
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC [[VARSYM]]
-
-
-////////////////////////////////////////////////////////////////////////////////
-// TLS local-dynamic forms
-////////////////////////////////////////////////////////////////////////////////
-
-        movz x3, #:dtprel_g2:var
-        movn x4, #:dtprel_g2:var
-// CHECK: movz    x3, #:dtprel_g2:var      // encoding: [0bAAA00011,A,0b110AAAAA,0x92]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_arm64_movw
-// CHECK: movn    x4, #:dtprel_g2:var      // encoding: [0bAAA00100,A,0b110AAAAA,0x92]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_arm64_movw
-
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_MOVW_DTPREL_G2 [[VARSYM]]
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_MOVW_DTPREL_G2 [[VARSYM]]
-
-
-        movz x5, #:dtprel_g1:var
-        movn x6, #:dtprel_g1:var
-        movz w7, #:dtprel_g1:var
-// CHECK: movz    x5, #:dtprel_g1:var      // encoding: [0bAAA00101,A,0b101AAAAA,0x92]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_arm64_movw
-// CHECK: movn    x6, #:dtprel_g1:var      // encoding: [0bAAA00110,A,0b101AAAAA,0x92]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_arm64_movw
-// CHECK: movz    w7, #:dtprel_g1:var      // encoding: [0bAAA00111,A,0b101AAAAA,0x12]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_arm64_movw
-
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_MOVW_DTPREL_G1 [[VARSYM]]
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_MOVW_DTPREL_G1 [[VARSYM]]
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_MOVW_DTPREL_G1 [[VARSYM]]
-
-
-        movk x9, #:dtprel_g1_nc:var
-        movk w10, #:dtprel_g1_nc:var
-// CHECK: movk    x9, #:dtprel_g1_nc:var   // encoding: [0bAAA01001,A,0b101AAAAA,0xf2]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1_nc:var, kind: fixup_arm64_movw
-// CHECK: movk    w10, #:dtprel_g1_nc:var  // encoding: [0bAAA01010,A,0b101AAAAA,0x72]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1_nc:var, kind: fixup_arm64_movw
-
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC [[VARSYM]]
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC [[VARSYM]]
-
-
-        movz x11, #:dtprel_g0:var
-        movn x12, #:dtprel_g0:var
-        movz w13, #:dtprel_g0:var
-// CHECK: movz    x11, #:dtprel_g0:var     // encoding: [0bAAA01011,A,0b100AAAAA,0x92]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_arm64_movw
-// CHECK: movn    x12, #:dtprel_g0:var     // encoding: [0bAAA01100,A,0b100AAAAA,0x92]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_arm64_movw
-// CHECK: movz    w13, #:dtprel_g0:var     // encoding: [0bAAA01101,A,0b100AAAAA,0x12]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_arm64_movw
-
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_MOVW_DTPREL_G0 [[VARSYM]]
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_MOVW_DTPREL_G0 [[VARSYM]]
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_MOVW_DTPREL_G0 [[VARSYM]]
-
-
-        movk x15, #:dtprel_g0_nc:var
-        movk w16, #:dtprel_g0_nc:var
-// CHECK: movk    x15, #:dtprel_g0_nc:var  // encoding: [0bAAA01111,A,0b100AAAAA,0xf2]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0_nc:var, kind: fixup_arm64_movw
-// CHECK: movk    w16, #:dtprel_g0_nc:var  // encoding: [0bAAA10000,A,0b100AAAAA,0x72]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0_nc:var, kind: fixup_arm64_movw
-
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC [[VARSYM]]
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC [[VARSYM]]
-
-
-        add x21, x22, #:dtprel_lo12:var
-// CHECK: add     x21, x22, :dtprel_lo12:var // encoding: [0xd5,0bAAAAAA10,0b00AAAAAA,0x91]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_arm64_add_imm12
-
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_ADD_DTPREL_LO12 [[VARSYM]]
-
-
-        add x25, x26, #:dtprel_lo12_nc:var
-// CHECK: add     x25, x26, :dtprel_lo12_nc:var // encoding: [0x59,0bAAAAAA11,0b00AAAAAA,0x91]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_arm64_add_imm12
-
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC [[VARSYM]]
-
-
-        ldrb w29, [x30, #:dtprel_lo12:var]
-        ldrsb x29, [x28, #:dtprel_lo12_nc:var]
-// CHECK: ldrb    w29, [lr, :dtprel_lo12:var] // encoding: [0xdd,0bAAAAAA11,0b01AAAAAA,0x39]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale1
-// CHECK: ldrsb   fp, [x28, :dtprel_lo12_nc:var] // encoding: [0x9d,0bAAAAAA11,0b10AAAAAA,0x39]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_arm64_ldst_imm12_scale1
-
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_LDST8_DTPREL_LO12 [[VARSYM]]
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC [[VARSYM]]
-
-
-        strh w27, [x26, #:dtprel_lo12:var]
-        ldrsh x25, [x24, #:dtprel_lo12_nc:var]
-// CHECK: strh    w27, [x26, :dtprel_lo12:var] // encoding: [0x5b,0bAAAAAA11,0b00AAAAAA,0x79]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale2
-// CHECK: ldrsh   x25, [x24, :dtprel_lo12_nc:var] // encoding: [0x19,0bAAAAAA11,0b10AAAAAA,0x79]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_arm64_ldst_imm12_scale2
-
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_LDST16_DTPREL_LO12 [[VARSYM]]
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC [[VARSYM]]
-
-
-        ldr w23, [x22, #:dtprel_lo12:var]
-        ldrsw x21, [x20, #:dtprel_lo12_nc:var]
-// CHECK: ldr     w23, [x22, :dtprel_lo12:var] // encoding: [0xd7,0bAAAAAA10,0b01AAAAAA,0xb9]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale4
-// CHECK: ldrsw   x21, [x20, :dtprel_lo12_nc:var] // encoding: [0x95,0bAAAAAA10,0b10AAAAAA,0xb9]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_arm64_ldst_imm12_scale4
-
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_LDST32_DTPREL_LO12 [[VARSYM]]
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC [[VARSYM]]
-
-        ldr x19, [x18, #:dtprel_lo12:var]
-        str x17, [x16, #:dtprel_lo12_nc:var]
-// CHECK: ldr     x19, [x18, :dtprel_lo12:var] // encoding: [0x53,0bAAAAAA10,0b01AAAAAA,0xf9]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale8
-// CHECK: str     x17, [x16, :dtprel_lo12_nc:var] // encoding: [0x11,0bAAAAAA10,0b00AAAAAA,0xf9]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_arm64_ldst_imm12_scale8
-
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_LDST64_DTPREL_LO12 [[VARSYM]]
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC [[VARSYM]]
-
-////////////////////////////////////////////////////////////////////////////////
-// TLS descriptor forms
-////////////////////////////////////////////////////////////////////////////////
-
-        adrp x8, :tlsdesc:var
-        ldr x7, [x6, #:tlsdesc_lo12:var]
-        add x5, x4, #:tlsdesc_lo12:var
-        .tlsdesccall var
-        blr x3
-
-// CHECK: adrp    x8, :tlsdesc:var        // encoding: [0x08'A',A,A,0x90'A']
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tlsdesc:var, kind: fixup_arm64_pcrel_adrp_imm21
-// CHECK: ldr     x7, [x6, :tlsdesc_lo12:var] // encoding: [0xc7,0bAAAAAA00,0b01AAAAAA,0xf9]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tlsdesc_lo12:var, kind: fixup_arm64_ldst_imm12_scale8
-// CHECK: add     x5, x4, :tlsdesc_lo12:var // encoding: [0x85,0bAAAAAA00,0b00AAAAAA,0x91]
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tlsdesc_lo12:var, kind: fixup_arm64_add_imm12
-// CHECK: .tlsdesccall var                // encoding: []
-// CHECK-NEXT:                                 //   fixup A - offset: 0, value: var, kind: fixup_arm64_tlsdesc_call
-// CHECK: blr     x3                      // encoding: [0x60,0x00,0x3f,0xd6]
-
-
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSDESC_ADR_PAGE [[VARSYM]]
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSDESC_LD64_LO12_NC [[VARSYM]]
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSDESC_ADD_LO12_NC [[VARSYM]]
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSDESC_CALL [[VARSYM]]
-
-        // Make sure symbol 5 has type STT_TLS:
-
-// CHECK-ELF:      Symbols [
-// CHECK-ELF:        Symbol {
-// CHECK-ELF:          Name: var (6)
-// CHECK-ELF-NEXT:     Value:
-// CHECK-ELF-NEXT:     Size:
-// CHECK-ELF-NEXT:     Binding: Global
-// CHECK-ELF-NEXT:     Type: TLS
diff --git a/test/MC/AsmParser/cfi-invalid-startproc.s b/test/MC/AsmParser/cfi-invalid-startproc.s
new file mode 100644
index 0000000..57ded13
--- /dev/null
+++ b/test/MC/AsmParser/cfi-invalid-startproc.s
@@ -0,0 +1,16 @@
+# RUN: not llvm-mc -triple=x86_64-apple-macosx10.8 -filetype=obj -o %t %s 2>&1 | FileCheck %s
+# Check that the cfi_startproc is declared after the beginning of
+# a procedure, otherwise it will reference an invalid symbol for
+# emitting the relocation.
+# <rdar://problem/15939159>
+
+# CHECK: No symbol to start a frame
+.text
+.cfi_startproc
+.globl _someFunction
+_someFunction:
+.cfi_def_cfa_offset 16
+.cfi_offset %rbp, -16
+.cfi_def_cfa_register rbp
+  ret
+.cfi_endproc
diff --git a/test/MC/AsmParser/directive_seh.s b/test/MC/AsmParser/directive_seh.s
index 98fc606..f6eb970 100644
--- a/test/MC/AsmParser/directive_seh.s
+++ b/test/MC/AsmParser/directive_seh.s
@@ -3,10 +3,10 @@
 # CHECK: .seh_proc func
 # CHECK: .seh_pushframe @code
 # CHECK: .seh_stackalloc 24
-# CHECK: .seh_savereg 6, 16
-# CHECK: .seh_savexmm 8, 0
-# CHECK: .seh_pushreg 3
-# CHECK: .seh_setframe 3, 0
+# CHECK: .seh_savereg %rbp, 16
+# CHECK: .seh_savexmm %r8, 0
+# CHECK: .seh_pushreg %rbx
+# CHECK: .seh_setframe %rbx, 0
 # CHECK: .seh_endprologue
 # CHECK: .seh_handler __C_specific_handler, @except
 # CHECK-NOT: .section{{.*}}.xdata
diff --git a/test/MC/AsmParser/invalid-input-assertion.s b/test/MC/AsmParser/invalid-input-assertion.s
new file mode 100644
index 0000000..2557f6e
--- /dev/null
+++ b/test/MC/AsmParser/invalid-input-assertion.s
@@ -0,0 +1,9 @@
+// RUN: not llvm-mc -triple i686-linux -o /dev/null %s
+
+	.macro macro parameter=0
+		.if \parameter
+		.else
+	.endm
+
+	macro 1
+
diff --git a/test/MC/AsmParser/macros-darwin-vararg.s b/test/MC/AsmParser/macros-darwin-vararg.s
new file mode 100644
index 0000000..a650c08
--- /dev/null
+++ b/test/MC/AsmParser/macros-darwin-vararg.s
@@ -0,0 +1,8 @@
+// RUN: not llvm-mc -triple i386-apple-darwin10 %s 2>&1 | FileCheck %s
+
+// CHECK: error: vararg is not a valid parameter qualifier for 'arg' in macro 'abc'
+// CHECK: .macro abc arg:vararg
+
+.macro abc arg:vararg
+    \arg
+.endm
diff --git a/test/MC/AsmParser/vararg-default-value.s b/test/MC/AsmParser/vararg-default-value.s
new file mode 100644
index 0000000..77cd1e8
--- /dev/null
+++ b/test/MC/AsmParser/vararg-default-value.s
@@ -0,0 +1,15 @@
+// RUN: llvm-mc -triple x86_64-linux-gnu %s | FileCheck %s
+.macro abc arg:vararg=nop
+  \arg
+.endm
+
+.macro abcd arg0=%eax arg1:vararg=%ebx
+  movl \arg0, \arg1
+.endm
+
+.text
+
+// CHECK: nop
+  abc
+// CHECK: movl %eax, %ebx
+  abcd ,
diff --git a/test/MC/AsmParser/vararg.s b/test/MC/AsmParser/vararg.s
new file mode 100644
index 0000000..b27668e
--- /dev/null
+++ b/test/MC/AsmParser/vararg.s
@@ -0,0 +1,41 @@
+// RUN: llvm-mc -triple x86_64-linux-gnu %s | FileCheck %s
+.macro ifcc arg:vararg
+.if cc
+            \arg
+.endif
+.endm
+
+.macro ifcc2 arg0 arg1:vararg
+.if cc
+            movl \arg0, \arg1
+.endif
+.endm
+
+.macro ifcc3 arg0, arg1:vararg
+.if cc
+            movl \arg0, \arg1
+.endif
+.endm
+
+.text
+
+// CHECK: movl %esp, %ebp
+// CHECK: subl $0, %esp
+// CHECK: movl %eax, %ebx
+// CHECK: movl %ecx, %ebx
+// CHECK: movl %ecx, %eax
+// CHECK: movl %eax, %ecx
+.set cc,1
+  ifcc  movl    %esp, %ebp
+        subl $0, %esp
+
+  ifcc2 %eax %ebx
+  ifcc2 %ecx, %ebx
+  ifcc3 %ecx %eax
+  ifcc3 %eax, %ecx
+
+// CHECK-NOT movl
+// CHECK: subl $1, %esp
+.set cc,0
+  ifcc  movl    %esp, %ebp
+        subl $1, %esp
diff --git a/test/MC/COFF/alias.s b/test/MC/COFF/alias.s
index f6f6d46..dc4f65a 100644
--- a/test/MC/COFF/alias.s
+++ b/test/MC/COFF/alias.s
@@ -68,7 +68,7 @@ weak_aliased_to_external = external2
 // CHECK-NEXT:     Section: .text (1)
 // CHECK-NEXT:     BaseType: Null (0x0)
 // CHECK-NEXT:     ComplexType: Null (0x0)
-// CHECK-NEXT:     StorageClass: Static (0x3)
+// CHECK-NEXT:     StorageClass: External (0x2)
 // CHECK-NEXT:     AuxSymbolCount: 0
 // CHECK-NEXT:   }
 // CHECK-NEXT:   Symbol {
diff --git a/test/MC/COFF/comm.ll b/test/MC/COFF/comm.ll
index 74da557..6fe122e 100644
--- a/test/MC/COFF/comm.ll
+++ b/test/MC/COFF/comm.ll
@@ -9,5 +9,5 @@
 ; CHECK: .lcomm	_a,1
 ; CHECK: .lcomm	_b,8,8
 ; .comm uses log2 alignment
-; CHECK: .comm	_c,1,0
-; CHECK: .comm	_d,8,3
+; CHECK: .comm	_c,1
+; CHECK: .comm	_d,8
diff --git a/test/MC/COFF/comm.s b/test/MC/COFF/comm.s
index 21ae5d2..37db75f 100644
--- a/test/MC/COFF/comm.s
+++ b/test/MC/COFF/comm.s
@@ -1,7 +1,7 @@
 // RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | llvm-readobj -t | FileCheck %s
 
 .lcomm _a,4,4
-.comm	_b, 4, 2
+.comm	_b, 4
 
 
 // CHECK:       Symbol {
@@ -17,7 +17,7 @@
 // CHECK:       Symbol {
 // CHECK:         Name: _b
 // CHECK-NEXT:    Value: 4
-// CHECK-NEXT:    Section: .bss
+// CHECK-NEXT:    Section:  (0)
 // CHECK-NEXT:    BaseType: Null
 // CHECK-NEXT:    ComplexType: Null
 // CHECK-NEXT:    StorageClass: External
diff --git a/test/MC/COFF/directive-section-characteristics.ll b/test/MC/COFF/directive-section-characteristics.ll
new file mode 100644
index 0000000..ca8102a
--- /dev/null
+++ b/test/MC/COFF/directive-section-characteristics.ll
@@ -0,0 +1,17 @@
+; RUN: llc -mtriple i686-windows -filetype obj -o - %s | llvm-readobj -sections \
+; RUN:    | FileCheck %s
+
+define dllexport void @function() {
+entry:
+  ret void
+}
+
+; CHECK: Section {
+; CHECK:   Name: .drectve
+; CHECK:   Characteristics [
+; CHECK:     IMAGE_SCN_ALIGN_1BYTES
+; CHECK:     IMAGE_SCN_LNK_INFO
+; CHECK:     IMAGE_SCN_LNK_REMOVE
+; CHECK:   ]
+; CHECK: }
+
diff --git a/test/MC/COFF/file.s b/test/MC/COFF/file.s
new file mode 100644
index 0000000..132e82b
--- /dev/null
+++ b/test/MC/COFF/file.s
@@ -0,0 +1,47 @@
+// RUN: llvm-mc -triple i686-windows -filetype obj %s -o - | llvm-objdump -t - \
+// RUN:   | FileCheck %s
+
+// RUN: llvm-mc -triple i686-windows -filetype obj %s -o - \
+// RUN:	  | llvm-readobj -symbols | FileCheck %s -check-prefix CHECK-SCN
+
+	.file "null-padded.asm"
+// CHECK: (nx 1) {{0x[0-9]+}} .file
+// CHECK-NEXT: AUX null-padded.asm{{$}}
+
+	.file "eighteen-chars.asm"
+
+// CHECK: (nx 1) {{0x[0-9]+}} .file
+// CHECK-NEXT: AUX eighteen-chars.asm{{$}}
+
+	.file "multiple-auxiliary-entries.asm"
+
+// CHECK: (nx 2) {{0x[0-9]+}} .file
+// CHECK-NEXT: AUX multiple-auxiliary-entries.asm{{$}}
+
+// CHECK-SCN: Symbols [
+// CHECK-SCN:   Symbol {
+// CHECK-SCN:     Name: .file
+// CHECK-SCN:     Section: (65534)
+// CHECK-SCN:     StorageClass: File
+// CHECK-SCN:     AuxFileRecord {
+// CHECK-SCN:       FileName: null-padded.asm
+// CHECK-SCN:     }
+// CHECK-SCN:   }
+// CHECK-SCN:   Symbol {
+// CHECK-SCN:     Name: .file
+// CHECK-SCN:     Section: (65534)
+// CHECK-SCN:     StorageClass: File
+// CHECK-SCN:     AuxFileRecord {
+// CHECK-SCN:       FileName: eighteen-chars.asm
+// CHECK-SCN:     }
+// CHECK-SCN:   }
+// CHECK-SCN:   Symbol {
+// CHECK-SCN:     Name: .file
+// CHECK-SCN:     Section: (65534)
+// CHECK-SCN:     StorageClass: File
+// CHECK-SCN:     AuxFileRecord {
+// CHECK-SCN:       FileName: multiple-auxiliary-entries.asm
+// CHECK-SCN:     }
+// CHECK-SCN:   }
+// CHECK-SCN: ]
+
diff --git a/test/MC/COFF/global_ctors_dtors.ll b/test/MC/COFF/global_ctors_dtors.ll
index 2a25219..046e93a 100644
--- a/test/MC/COFF/global_ctors_dtors.ll
+++ b/test/MC/COFF/global_ctors_dtors.ll
@@ -9,8 +9,13 @@
 @.str2 = private unnamed_addr constant [12 x i8] c"destructing\00", align 1
 @.str3 = private unnamed_addr constant [5 x i8] c"main\00", align 1
 
-@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @a_global_ctor }]
-@llvm.global_dtors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @a_global_dtor }]
+%ini = type { i32, void()*, i8* }
+
+@llvm.global_ctors = appending global [2 x %ini ] [
+  %ini { i32 65535, void ()* @a_global_ctor, i8* null },
+  %ini { i32 65535, void ()* @b_global_ctor, i8* bitcast (i32* @b to i8*) }
+]
+@llvm.global_dtors = appending global [1 x %ini ] [%ini { i32 65535, void ()* @a_global_dtor, i8* null }]
 
 declare i32 @puts(i8*)
 
@@ -19,6 +24,13 @@ define void @a_global_ctor() nounwind {
   ret void
 }
 
+@b = global i32 zeroinitializer
+
+define void @b_global_ctor() nounwind {
+  store i32 42, i32* @b
+  ret void
+}
+
 define void @a_global_dtor() nounwind {
   %1 = call i32 @puts(i8* getelementptr inbounds ([12 x i8]* @.str2, i32 0, i32 0))
   ret void
@@ -29,11 +41,15 @@ define i32 @main() nounwind {
   ret i32 0
 }
 
-; WIN32: .section .CRT$XCU,"r"
+; WIN32: .section .CRT$XCU,"rd"
 ; WIN32: a_global_ctor
-; WIN32: .section .CRT$XTX,"r"
+; WIN32: .section .CRT$XCU,"rd",associative .bss,{{_?}}b
+; WIN32: b_global_ctor
+; WIN32: .section .CRT$XTX,"rd"
 ; WIN32: a_global_dtor
-; MINGW32: .section .ctors,"w"
+; MINGW32: .section .ctors,"wd"
 ; MINGW32: a_global_ctor
-; MINGW32: .section .dtors,"w"
+; MINGW32: .section .ctors,"wd",associative .bss,{{_?}}b
+; MINGW32: b_global_ctor
+; MINGW32: .section .dtors,"wd"
 ; MINGW32: a_global_dtor
diff --git a/test/MC/COFF/initialised-data.ll b/test/MC/COFF/initialised-data.ll
new file mode 100644
index 0000000..c428469
--- /dev/null
+++ b/test/MC/COFF/initialised-data.ll
@@ -0,0 +1,7 @@
+; RUN: llc -mtriple i686-windows %s -o - | FileCheck %s
+; RUN: llc -mtriple x86_64-windows %s -o - | FileCheck %s
+
+@data = dllexport constant [5 x i8] c"data\00", align 1
+
+; CHECK: .section	.rdata,"rd"
+
diff --git a/test/MC/COFF/invalid-def.s b/test/MC/COFF/invalid-def.s
new file mode 100644
index 0000000..42821c2
--- /dev/null
+++ b/test/MC/COFF/invalid-def.s
@@ -0,0 +1,5 @@
+# RUN: not llvm-mc -triple i686-windows -filetype obj -o /dev/null %s
+
+	.def first
+	.def second
+
diff --git a/test/MC/COFF/invalid-endef.s b/test/MC/COFF/invalid-endef.s
new file mode 100644
index 0000000..c6fd8f5
--- /dev/null
+++ b/test/MC/COFF/invalid-endef.s
@@ -0,0 +1,4 @@
+# RUN: not llvm-mc -triple i686-windows -filetype obj -o /dev/null %s
+
+	.endef
+
diff --git a/test/MC/COFF/invalid-scl-range.s b/test/MC/COFF/invalid-scl-range.s
new file mode 100644
index 0000000..5722505
--- /dev/null
+++ b/test/MC/COFF/invalid-scl-range.s
@@ -0,0 +1,6 @@
+# RUN: not llvm-mc -triple i686-windows -filetype obj -o /dev/null %s
+
+	.def storage_class_range
+		.scl 1337
+	.endef
+
diff --git a/test/MC/COFF/invalid-scl.s b/test/MC/COFF/invalid-scl.s
new file mode 100644
index 0000000..8565a5a
--- /dev/null
+++ b/test/MC/COFF/invalid-scl.s
@@ -0,0 +1,4 @@
+# RUN: not llvm-mc -triple i686-windows -filetype obj -o /dev/null %s
+
+	.scl 1337
+
diff --git a/test/MC/COFF/invalid-type-range.s b/test/MC/COFF/invalid-type-range.s
new file mode 100644
index 0000000..92874cc
--- /dev/null
+++ b/test/MC/COFF/invalid-type-range.s
@@ -0,0 +1,6 @@
+# RUN: not llvm-mc -triple i686-windows -filetype obj -o /dev/null %s
+
+	.def invalid_type_range
+		.type 65536
+	.endef
+
diff --git a/test/MC/COFF/invalid-type.s b/test/MC/COFF/invalid-type.s
new file mode 100644
index 0000000..a1e131e
--- /dev/null
+++ b/test/MC/COFF/invalid-type.s
@@ -0,0 +1,4 @@
+# RUN: not llvm-mc -triple i686-windows -filetype obj -o /dev/null %s
+
+	.type 65536
+
diff --git a/test/MC/COFF/offset.s b/test/MC/COFF/offset.s
new file mode 100644
index 0000000..d0d3710
--- /dev/null
+++ b/test/MC/COFF/offset.s
@@ -0,0 +1,19 @@
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s -o - | llvm-readobj -t -r | FileCheck %s
+
+	.data
+	.globl	test1_foo
+test1_foo:
+        .long 42
+
+        .globl test1_zed
+test1_zed = test1_foo + 1
+
+// CHECK:      Symbol {
+// CHECK:        Name: test1_zed
+// CHECK-NEXT:   Value: 1
+// CHECK-NEXT:   Section: .data
+// CHECK-NEXT:   BaseType: Null
+// CHECK-NEXT:   ComplexType: Null
+// CHECK-NEXT:   StorageClass: External
+// CHECK-NEXT:   AuxSymbolCount: 0
+// CHECK-NEXT: }
diff --git a/test/MC/COFF/symbol-alias.s b/test/MC/COFF/symbol-alias.s
index ccada37..71ccec3 100644
--- a/test/MC/COFF/symbol-alias.s
+++ b/test/MC/COFF/symbol-alias.s
@@ -51,7 +51,7 @@ _bar_alias = _bar
 // CHECK-NEXT: Value:               [[FOO_VALUE]]
 // CHECK-NEXT: Section:             [[FOO_SECTION_NUMBER]]
 // CHECK-NEXT: BaseType:            [[FOO_SIMPLE_TYPE]]
-// CHECK-NEXT: ComplexType:         [[FOO_COMPLEX_TYPE]]
+// CHECK-NEXT: ComplexType:         Null (0x0)
 // CHECK-NEXT: StorageClass:        [[FOO_STORAGE_CLASS]]
 // CHECK-NEXT: AuxSymbolCount:      [[FOO_NUMBER_OF_AUX_SYMBOLS]]
 
diff --git a/test/MC/COFF/weak-symbol.ll b/test/MC/COFF/weak-symbol.ll
index c06692e..fd78307 100644
--- a/test/MC/COFF/weak-symbol.ll
+++ b/test/MC/COFF/weak-symbol.ll
@@ -28,20 +28,20 @@ define weak void @f() section ".sect" {
 }
 
 ; Weak global
-; X86: .section .data,"r",discard,_a
+; X86: .section .data,"rd",discard,_a
 ; X86: .globl _a
 ; X86: .zero 12
 ;
-; X64: .section .data,"r",discard,a
+; X64: .section .data,"rd",discard,a
 ; X64: .globl a
 ; X64: .zero 12
 @a = weak unnamed_addr constant { i32, i32, i32 } { i32 0, i32 0, i32 0}, section ".data"
 
-; X86:  .section        .tls$,"w",discard,_b
+; X86:  .section        .tls$,"wd",discard,_b
 ; X86:  .globl  _b
 ; X86:  .long   0
 ;
-; X64:  .section        .tls$,"w",discard,b
+; X64:  .section        .tls$,"wd",discard,b
 ; X64:  .globl  b
 ; X64:  .long   0
 
diff --git a/test/MC/Disassembler/AArch64/a64-ignored-fields.txt b/test/MC/Disassembler/AArch64/a64-ignored-fields.txt
index 799ecdf..1860bf6 100644
--- a/test/MC/Disassembler/AArch64/a64-ignored-fields.txt
+++ b/test/MC/Disassembler/AArch64/a64-ignored-fields.txt
@@ -1,4 +1,5 @@
 # RUN: llvm-mc -triple=aarch64 -mattr=fp-armv8 -disassemble -show-encoding < %s | FileCheck %s
+# RUN: llvm-mc -triple=arm64 -mattr=fp-armv8 -disassemble -show-encoding < %s | FileCheck %s
 
 # The "Rm" bits are ignored, but the canonical representation has them filled
 # with 0s. This is what we should produce even if the input bit-pattern had
diff --git a/test/MC/Disassembler/AArch64/arm64-advsimd.txt b/test/MC/Disassembler/AArch64/arm64-advsimd.txt
new file mode 100644
index 0000000..cceee67
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/arm64-advsimd.txt
@@ -0,0 +1,2283 @@
+# RUN: llvm-mc -triple arm64-apple-darwin -mattr=crypto -output-asm-variant=1 --disassemble < %s | FileCheck %s
+
+0x00 0xb8 0x20 0x0e
+0x00 0xb8 0x20 0x4e
+0x00 0xb8 0x60 0x0e
+0x00 0xb8 0x60 0x4e
+0x00 0xb8 0xa0 0x0e
+0x00 0xb8 0xa0 0x4e
+
+# CHECK: abs.8b  v0, v0
+# CHECK: abs.16b v0, v0
+# CHECK: abs.4h  v0, v0
+# CHECK: abs.8h  v0, v0
+# CHECK: abs.2s  v0, v0
+# CHECK: abs.4s  v0, v0
+
+0x00 0x84 0x20 0x0e
+0x00 0x84 0x20 0x4e
+0x00 0x84 0x60 0x0e
+0x00 0x84 0x60 0x4e
+0x00 0x84 0xa0 0x0e
+0x00 0x84 0xa0 0x4e
+0x00 0x84 0xe0 0x4e
+
+# CHECK: add.8b  v0, v0, v0
+# CHECK: add.16b v0, v0, v0
+# CHECK: add.4h  v0, v0, v0
+# CHECK: add.8h  v0, v0, v0
+# CHECK: add.2s  v0, v0, v0
+# CHECK: add.4s  v0, v0, v0
+# CHECK: add.2d  v0, v0, v0
+
+0x41 0x84 0xe3 0x5e
+
+# CHECK: add d1, d2, d3
+
+0x00 0x40 0x20 0x0e
+0x00 0x40 0x20 0x4e
+0x00 0x40 0x60 0x0e
+0x00 0x40 0x60 0x4e
+0x00 0x40 0xa0 0x0e
+0x00 0x40 0xa0 0x4e
+
+# CHECK: addhn.8b   v0, v0, v0
+# CHECK: addhn2.16b v0, v0, v0
+# CHECK: addhn.4h   v0, v0, v0
+# CHECK: addhn2.8h  v0, v0, v0
+# CHECK: addhn.2s   v0, v0, v0
+# CHECK: addhn2.4s  v0, v0, v0
+
+0x00 0xbc 0x20 0x0e
+0x00 0xbc 0x20 0x4e
+0x00 0xbc 0x60 0x0e
+0x00 0xbc 0x60 0x4e
+0x00 0xbc 0xa0 0x0e
+0x00 0xbc 0xa0 0x4e
+0x00 0xbc 0xe0 0x4e
+
+# CHECK: addp.8b   v0, v0, v0
+# CHECK: addp.16b  v0, v0, v0
+# CHECK: addp.4h   v0, v0, v0
+# CHECK: addp.8h   v0, v0, v0
+# CHECK: addp.2s   v0, v0, v0
+# CHECK: addp.4s   v0, v0, v0
+# CHECK: addp.2d   v0, v0, v0
+
+0x00 0xb8 0xf1 0x5e
+
+# CHECK: addp.2d d0, v0
+
+0x00 0xb8 0x31 0x0e
+0x00 0xb8 0x31 0x4e
+0x00 0xb8 0x71 0x0e
+0x00 0xb8 0x71 0x4e
+0x00 0xb8 0xb1 0x4e
+
+# CHECK: addv.8b  b0, v0
+# CHECK: addv.16b b0, v0
+# CHECK: addv.4h  h0, v0
+# CHECK: addv.8h  h0, v0
+# CHECK: addv.4s  s0, v0
+
+
+# INS/DUP
+0x60 0x0c 0x08 0x4e
+0x60 0x0c 0x04 0x4e
+0x60 0x0c 0x04 0x0e
+0x60 0x0c 0x02 0x4e
+0x60 0x0c 0x02 0x0e
+0x60 0x0c 0x01 0x4e
+0x60 0x0c 0x01 0x0e
+
+# CHECK: dup.2d  v0, x3
+# CHECK: dup.4s  v0, w3
+# CHECK: dup.2s  v0, w3
+# CHECK: dup.8h  v0, w3
+# CHECK: dup.4h  v0, w3
+# CHECK: dup.16b v0, w3
+# CHECK: dup.8b  v0, w3
+
+0x60 0x04 0x18 0x4e
+0x60 0x04 0x0c 0x0e
+0x60 0x04 0x0c 0x4e
+0x60 0x04 0x06 0x0e
+0x60 0x04 0x06 0x4e
+0x60 0x04 0x03 0x0e
+0x60 0x04 0x03 0x4e
+
+# CHECK: dup.2d  v0, v3[1]
+# CHECK: dup.2s  v0, v3[1]
+# CHECK: dup.4s  v0, v3[1]
+# CHECK: dup.4h  v0, v3[1]
+# CHECK: dup.8h  v0, v3[1]
+# CHECK: dup.8b  v0, v3[1]
+# CHECK: dup.16b v0, v3[1]
+
+
+0x43 0x2c 0x14 0x4e
+0x43 0x2c 0x14 0x4e
+0x43 0x3c 0x14 0x0e
+0x43 0x3c 0x14 0x0e
+0x43 0x3c 0x18 0x4e
+0x43 0x3c 0x18 0x4e
+
+# CHECK: smov.s  x3, v2[2]
+# CHECK: smov.s  x3, v2[2]
+# CHECK: mov.s  w3, v2[2]
+# CHECK: mov.s  w3, v2[2]
+# CHECK: mov.d  x3, v2[1]
+# CHECK: mov.d  x3, v2[1]
+
+0xa2 0x1c 0x18 0x4e
+0xa2 0x1c 0x0c 0x4e
+0xa2 0x1c 0x06 0x4e
+0xa2 0x1c 0x03 0x4e
+
+0xa2 0x1c 0x18 0x4e
+0xa2 0x1c 0x0c 0x4e
+0xa2 0x1c 0x06 0x4e
+0xa2 0x1c 0x03 0x4e
+
+# CHECK: ins.d v2[1], x5
+# CHECK: ins.s v2[1], w5
+# CHECK: ins.h v2[1], w5
+# CHECK: ins.b v2[1], w5
+
+# CHECK: ins.d v2[1], x5
+# CHECK: ins.s v2[1], w5
+# CHECK: ins.h v2[1], w5
+# CHECK: ins.b v2[1], w5
+
+0xe2 0x45 0x18 0x6e
+0xe2 0x25 0x0c 0x6e
+0xe2 0x15 0x06 0x6e
+0xe2 0x0d 0x03 0x6e
+
+0xe2 0x05 0x18 0x6e
+0xe2 0x45 0x1c 0x6e
+0xe2 0x35 0x1e 0x6e
+0xe2 0x2d 0x15 0x6e
+
+# CHECK: ins.d v2[1], v15[1]
+# CHECK: ins.s v2[1], v15[1]
+# CHECK: ins.h v2[1], v15[1]
+# CHECK: ins.b v2[1], v15[1]
+
+# CHECK: ins.d v2[1], v15[0]
+# CHECK: ins.s v2[3], v15[2]
+# CHECK: ins.h v2[7], v15[3]
+# CHECK: ins.b v2[10], v15[5]
+
+0x00 0x1c 0x20 0x0e
+0x00 0x1c 0x20 0x4e
+
+# CHECK: and.8b  v0, v0, v0
+# CHECK: and.16b v0, v0, v0
+
+0x00 0x1c 0x60 0x0e
+
+# CHECK: bic.8b  v0, v0, v0
+
+0x00 0x8c 0x20 0x2e
+0x00 0x3c 0x20 0x0e
+0x00 0x34 0x20 0x0e
+0x00 0x34 0x20 0x2e
+0x00 0x3c 0x20 0x2e
+0x00 0x8c 0x20 0x0e
+0x00 0xd4 0xa0 0x2e
+0x00 0xec 0x20 0x2e
+0x00 0xec 0xa0 0x2e
+0x00 0xd4 0x20 0x2e
+0x00 0xd4 0x20 0x0e
+0x00 0xe4 0x20 0x0e
+0x00 0xe4 0x20 0x2e
+0x00 0xe4 0xa0 0x2e
+0x00 0xfc 0x20 0x2e
+0x00 0xc4 0x20 0x2e
+0x00 0xc4 0x20 0x0e
+0x00 0xf4 0x20 0x2e
+0x00 0xf4 0x20 0x0e
+0x00 0xc4 0xa0 0x2e
+0x00 0xc4 0xa0 0x0e
+0x00 0xf4 0xa0 0x2e
+0x00 0xf4 0xa0 0x0e
+0x00 0xcc 0x20 0x0e
+0x00 0xcc 0xa0 0x0e
+0x00 0xdc 0x20 0x0e
+0x00 0xdc 0x20 0x2e
+0x00 0xfc 0x20 0x0e
+0x00 0xfc 0xa0 0x0e
+0x00 0xd4 0xa0 0x0e
+0x00 0x94 0x20 0x0e
+0x00 0x94 0x20 0x2e
+0x00 0x9c 0x20 0x0e
+0x00 0x9c 0x20 0x2e
+0x00 0x7c 0x20 0x0e
+0x00 0x74 0x20 0x0e
+0x00 0x04 0x20 0x0e
+0x00 0x24 0x20 0x0e
+0x00 0xa4 0x20 0x0e
+0x00 0x64 0x20 0x0e
+0x00 0xac 0x20 0x0e
+0x00 0x6c 0x20 0x0e
+0x00 0x0c 0x20 0x0e
+0x00 0xb4 0x60 0x0e
+0x00 0xb4 0x60 0x2e
+0x00 0x5c 0x20 0x0e
+0x00 0x4c 0x20 0x0e
+0x00 0x2c 0x20 0x0e
+0x00 0x14 0x20 0x0e
+0x00 0x54 0x20 0x0e
+0x00 0x44 0x20 0x0e
+0x00 0x84 0x20 0x2e
+0x00 0x7c 0x20 0x2e
+0x00 0x74 0x20 0x2e
+0x00 0x04 0x20 0x2e
+0x00 0x24 0x20 0x2e
+0x00 0xa4 0x20 0x2e
+0x00 0x64 0x20 0x2e
+0x00 0xac 0x20 0x2e
+0x00 0x6c 0x20 0x2e
+0x00 0x0c 0x20 0x2e
+0x00 0x5c 0x20 0x2e
+0x00 0x4c 0x20 0x2e
+0x00 0x2c 0x20 0x2e
+0x00 0x14 0x20 0x2e
+0x00 0x54 0x20 0x2e
+0x00 0x44 0x20 0x2e
+
+# CHECK: cmeq.8b	v0, v0, v0
+# CHECK: cmge.8b	v0, v0, v0
+# CHECK: cmgt.8b	v0, v0, v0
+# CHECK: cmhi.8b	v0, v0, v0
+# CHECK: cmhs.8b	v0, v0, v0
+# CHECK: cmtst.8b	v0, v0, v0
+# CHECK: fabd.2s	v0, v0, v0
+# CHECK: facge.2s	v0, v0, v0
+# CHECK: facgt.2s	v0, v0, v0
+# CHECK: faddp.2s	v0, v0, v0
+# CHECK: fadd.2s	v0, v0, v0
+# CHECK: fcmeq.2s	v0, v0, v0
+# CHECK: fcmge.2s	v0, v0, v0
+# CHECK: fcmgt.2s	v0, v0, v0
+# CHECK: fdiv.2s	v0, v0, v0
+# CHECK: fmaxnmp.2s	v0, v0, v0
+# CHECK: fmaxnm.2s	v0, v0, v0
+# CHECK: fmaxp.2s	v0, v0, v0
+# CHECK: fmax.2s	v0, v0, v0
+# CHECK: fminnmp.2s	v0, v0, v0
+# CHECK: fminnm.2s	v0, v0, v0
+# CHECK: fminp.2s	v0, v0, v0
+# CHECK: fmin.2s	v0, v0, v0
+# CHECK: fmla.2s	v0, v0, v0
+# CHECK: fmls.2s	v0, v0, v0
+# CHECK: fmulx.2s	v0, v0, v0
+# CHECK: fmul.2s	v0, v0, v0
+# CHECK: frecps.2s	v0, v0, v0
+# CHECK: frsqrts.2s	v0, v0, v0
+# CHECK: fsub.2s	v0, v0, v0
+# CHECK: mla.8b	v0, v0, v0
+# CHECK: mls.8b	v0, v0, v0
+# CHECK: mul.8b	v0, v0, v0
+# CHECK: pmul.8b	v0, v0, v0
+# CHECK: saba.8b	v0, v0, v0
+# CHECK: sabd.8b	v0, v0, v0
+# CHECK: shadd.8b	v0, v0, v0
+# CHECK: shsub.8b	v0, v0, v0
+# CHECK: smaxp.8b	v0, v0, v0
+# CHECK: smax.8b	v0, v0, v0
+# CHECK: sminp.8b	v0, v0, v0
+# CHECK: smin.8b	v0, v0, v0
+# CHECK: sqadd.8b	v0, v0, v0
+# CHECK: sqdmulh.4h v0, v0, v0
+# CHECK: sqrdmulh.4h v0, v0, v0
+# CHECK: sqrshl.8b	v0, v0, v0
+# CHECK: sqshl.8b	v0, v0, v0
+# CHECK: sqsub.8b	v0, v0, v0
+# CHECK: srhadd.8b	v0, v0, v0
+# CHECK: srshl.8b	v0, v0, v0
+# CHECK: sshl.8b	v0, v0, v0
+# CHECK: sub.8b	v0, v0, v0
+# CHECK: uaba.8b	v0, v0, v0
+# CHECK: uabd.8b	v0, v0, v0
+# CHECK: uhadd.8b	v0, v0, v0
+# CHECK: uhsub.8b	v0, v0, v0
+# CHECK: umaxp.8b	v0, v0, v0
+# CHECK: umax.8b	v0, v0, v0
+# CHECK: uminp.8b	v0, v0, v0
+# CHECK: umin.8b	v0, v0, v0
+# CHECK: uqadd.8b	v0, v0, v0
+# CHECK: uqrshl.8b	v0, v0, v0
+# CHECK: uqshl.8b	v0, v0, v0
+# CHECK: uqsub.8b	v0, v0, v0
+# CHECK: urhadd.8b	v0, v0, v0
+# CHECK: urshl.8b	v0, v0, v0
+# CHECK: ushl.8b	v0, v0, v0
+
+0x00 0x1c 0xe0 0x2e
+0x00 0x1c 0xa0 0x2e
+0x00 0x1c 0x60 0x2e
+0x00 0x1c 0x20 0x2e
+0x00 0x1c 0xe0 0x0e
+0x00 0x1c 0xa1 0x0e
+
+# CHECK: bif.8b	v0, v0, v0
+# CHECK: bit.8b	v0, v0, v0
+# CHECK: bsl.8b	v0, v0, v0
+# CHECK: eor.8b	v0, v0, v0
+# CHECK: orn.8b	v0, v0, v0
+# CHECK: orr.8b	v0, v0, v1
+
+0x00 0x68 0x20 0x0e
+0x00 0x68 0x20 0x4e
+0x00 0x68 0x60 0x0e
+0x00 0x68 0x60 0x4e
+0x00 0x68 0xa0 0x0e
+0x00 0x68 0xa0 0x4e
+
+# CHECK: sadalp.4h	v0, v0
+# CHECK: sadalp.8h	v0, v0
+# CHECK: sadalp.2s	v0, v0
+# CHECK: sadalp.4s	v0, v0
+# CHECK: sadalp.1d	v0, v0
+# CHECK: sadalp.2d	v0, v0
+
+0x00 0x48 0x20 0x0e
+0x00 0x48 0x20 0x2e
+0x00 0x58 0x20 0x0e
+0x00 0xf8 0xa0 0x0e
+0x00 0xc8 0x21 0x0e
+0x00 0xc8 0x21 0x2e
+0x00 0xb8 0x21 0x0e
+0x00 0xb8 0x21 0x2e
+0x00 0xa8 0x21 0x0e
+0x00 0xa8 0x21 0x2e
+0x00 0xa8 0xa1 0x0e
+0x00 0xa8 0xa1 0x2e
+0x00 0xb8 0xa1 0x0e
+0x00 0xb8 0xa1 0x2e
+0x00 0xf8 0xa0 0x2e
+0x00 0xd8 0xa1 0x0e
+0x00 0xd8 0xa1 0x2e
+0x00 0xf8 0xa1 0x2e
+0x00 0xb8 0x20 0x2e
+0x00 0x58 0x20 0x2e
+0x00 0x58 0x60 0x2e
+0x00 0x18 0x20 0x0e
+0x00 0x08 0x20 0x2e
+0x00 0x08 0x20 0x0e
+0x00 0x68 0x20 0x0e
+0x00 0x28 0x20 0x0e
+0x00 0xd8 0x21 0x0e
+0x00 0x38 0x21 0x2e
+0x00 0x78 0x20 0x0e
+0x00 0x78 0x20 0x2e
+0x00 0x48 0x21 0x0e
+0x00 0x28 0x21 0x2e
+0x00 0x38 0x20 0x0e
+0x00 0x68 0x20 0x2e
+0x00 0x28 0x20 0x2e
+0x00 0xd8 0x21 0x2e
+0x00 0x48 0x21 0x2e
+0x00 0xc8 0xa1 0x0e
+0x00 0xc8 0xa1 0x2e
+0x00 0x38 0x20 0x2e
+0x00 0x28 0x21 0x0e
+0x00 0x48 0x20 0x0e
+0x00 0x48 0x20 0x2e
+0x00 0x58 0x20 0x0e
+0x00 0xf8 0xa0 0x0e
+0x00 0xc8 0x21 0x0e
+0x00 0xc8 0x21 0x2e
+0x00 0xb8 0x21 0x0e
+0x00 0xb8 0x21 0x2e
+0x00 0xa8 0x21 0x0e
+0x00 0xa8 0x21 0x2e
+0x00 0xa8 0xa1 0x0e
+0x00 0xa8 0xa1 0x2e
+0x00 0xb8 0xa1 0x0e
+0x00 0xb8 0xa1 0x2e
+0x00 0xf8 0xa0 0x2e
+0x00 0xd8 0xa1 0x0e
+0x00 0xd8 0xa1 0x2e
+0x00 0xf8 0xa1 0x2e
+0x00 0xb8 0x20 0x2e
+0x00 0x58 0x20 0x2e
+0x00 0x58 0x60 0x2e
+0x00 0x18 0x20 0x0e
+0x00 0x08 0x20 0x2e
+0x00 0x08 0x20 0x0e
+0x00 0x68 0x20 0x0e
+0x00 0x28 0x20 0x0e
+0x00 0xd8 0x21 0x0e
+0x00 0x38 0x21 0x2e
+0x00 0x78 0x20 0x0e
+0x00 0x78 0x20 0x2e
+0x00 0x48 0x21 0x0e
+0x00 0x28 0x21 0x2e
+0x00 0x38 0x20 0x0e
+0x00 0x68 0x20 0x2e
+0x00 0x28 0x20 0x2e
+0x00 0xd8 0x21 0x2e
+0x00 0x48 0x21 0x2e
+0x00 0xc8 0xa1 0x0e
+0x00 0xc8 0xa1 0x2e
+0x00 0x38 0x20 0x2e
+0x00 0x28 0x21 0x0e
+
+# CHECK: cls.8b	v0, v0
+# CHECK: clz.8b	v0, v0
+# CHECK: cnt.8b	v0, v0
+# CHECK: fabs.2s	v0, v0
+# CHECK: fcvtas.2s	v0, v0
+# CHECK: fcvtau.2s	v0, v0
+# CHECK: fcvtms.2s	v0, v0
+# CHECK: fcvtmu.2s	v0, v0
+# CHECK: fcvtns.2s	v0, v0
+# CHECK: fcvtnu.2s	v0, v0
+# CHECK: fcvtps.2s	v0, v0
+# CHECK: fcvtpu.2s	v0, v0
+# CHECK: fcvtzs.2s	v0, v0
+# CHECK: fcvtzu.2s	v0, v0
+# CHECK: fneg.2s	v0, v0
+# CHECK: frecpe.2s	v0, v0
+# CHECK: frsqrte.2s	v0, v0
+# CHECK: fsqrt.2s	v0, v0
+# CHECK: neg.8b	v0, v0
+# CHECK: mvn.8b	v0, v0
+# CHECK: rbit.8b	v0, v0
+# CHECK: rev16.8b	v0, v0
+# CHECK: rev32.8b	v0, v0
+# CHECK: rev64.8b	v0, v0
+# CHECK: sadalp.4h	v0, v0
+# CHECK: saddlp.4h	v0, v0
+# CHECK: scvtf.2s	v0, v0
+# CHECK: shll.8h	v0, v0, #8
+# CHECK: sqabs.8b	v0, v0
+# CHECK: sqneg.8b	v0, v0
+# CHECK: sqxtn.8b	v0, v0
+# CHECK: sqxtun.8b	v0, v0
+# CHECK: suqadd.8b	v0, v0
+# CHECK: uadalp.4h	v0, v0
+# CHECK: uaddlp.4h	v0, v0
+# CHECK: ucvtf.2s	v0, v0
+# CHECK: uqxtn.8b	v0, v0
+# CHECK: urecpe.2s	v0, v0
+# CHECK: ursqrte.2s	v0, v0
+# CHECK: usqadd.8b	v0, v0
+# CHECK: xtn.8b	v0, v0
+
+0x00 0x98 0x20 0x0e
+0x00 0x98 0x20 0x4e
+0x00 0x98 0x60 0x0e
+0x00 0x98 0x60 0x4e
+0x00 0x98 0xa0 0x0e
+0x00 0x98 0xa0 0x4e
+0x00 0x98 0xe0 0x4e
+
+# CHECK: cmeq.8b	v0, v0, #0
+# CHECK: cmeq.16b	v0, v0, #0
+# CHECK: cmeq.4h	v0, v0, #0
+# CHECK: cmeq.8h	v0, v0, #0
+# CHECK: cmeq.2s	v0, v0, #0
+# CHECK: cmeq.4s	v0, v0, #0
+# CHECK: cmeq.2d	v0, v0, #0
+
+0x00 0x88 0x20 0x2e
+0x00 0x88 0x20 0x0e
+0x00 0x98 0x20 0x2e
+0x00 0xa8 0x20 0x0e
+0x00 0xd8 0xa0 0x0e
+0x00 0xc8 0xa0 0x2e
+0x00 0xc8 0xa0 0x0e
+0x00 0xd8 0xa0 0x2e
+0x00 0xe8 0xa0 0x0e
+
+# CHECK: cmge.8b	v0, v0, #0
+# CHECK: cmgt.8b	v0, v0, #0
+# CHECK: cmle.8b	v0, v0, #0
+# CHECK: cmlt.8b	v0, v0, #0
+# CHECK: fcmeq.2s	v0, v0, #0
+# CHECK: fcmge.2s	v0, v0, #0
+# CHECK: fcmgt.2s	v0, v0, #0
+# CHECK: fcmle.2s	v0, v0, #0
+# CHECK: fcmlt.2s	v0, v0, #0
+
+0x00 0x78 0x21 0x0e
+0x00 0x78 0x21 0x4e
+0x00 0x78 0x61 0x0e
+0x00 0x78 0x61 0x4e
+0x00 0x68 0x21 0x0e
+0x00 0x68 0x21 0x4e
+0x00 0x68 0x61 0x0e
+0x00 0x68 0x61 0x4e
+0x00 0x68 0x61 0x2e
+0x00 0x68 0x61 0x6e
+
+# CHECK: fcvtl	v0.4s, v0.4h
+# CHECK: fcvtl2	v0.4s, v0.8h
+# CHECK: fcvtl	v0.2d, v0.2s
+# CHECK: fcvtl2	v0.2d, v0.4s
+# CHECK: fcvtn	v0.4h, v0.4s
+# CHECK: fcvtn2	v0.8h, v0.4s
+# CHECK: fcvtn	v0.2s, v0.2d
+# CHECK: fcvtn2	v0.4s, v0.2d
+# CHECK: fcvtxn	v0.2s, v0.2d
+# CHECK: fcvtxn2	v0.4s, v0.2d
+
+#===-------------------------------------------------------------------------===
+# AdvSIMD modified immediate instructions
+#===-------------------------------------------------------------------------===
+
+0x20 0x14 0x00 0x2f
+0x20 0x34 0x00 0x2f
+0x20 0x54 0x00 0x2f
+0x20 0x74 0x00 0x2f
+
+# CHECK: bic.2s v0, #0x1
+# CHECK: bic.2s v0, #0x1, lsl #8
+# CHECK: bic.2s v0, #0x1, lsl #16
+# CHECK: bic.2s v0, #0x1, lsl #24
+
+0x20 0x94 0x00 0x2f
+0x20 0x94 0x00 0x2f
+0x20 0xb4 0x00 0x2f
+
+# CHECK: bic.4h v0, #0x1
+# CHECK: bic.4h v0, #0x1
+# FIXME: bic.4h v0, #0x1, lsl #8
+#    'bic.4h' should be selected over "fcvtnu.2s v0, v1, #0"
+
+0x20 0x14 0x00 0x6f
+0x20 0x34 0x00 0x6f
+0x20 0x54 0x00 0x6f
+0x20 0x74 0x00 0x6f
+
+# CHECK: bic.4s v0, #0x1
+# CHECK: bic.4s v0, #0x1, lsl #8
+# CHECK: bic.4s v0, #0x1, lsl #16
+# CHECK: bic.4s v0, #0x1, lsl #24
+
+0x20 0x94 0x00 0x6f
+0x20 0xb4 0x00 0x6f
+
+# CHECK: bic.8h v0, #0x1
+# FIXME: bic.8h v0, #0x1, lsl #8
+#    "bic.8h" should be selected over "fcvtnu.4s v0, v1, #0"
+
+0x00 0xf4 0x02 0x6f
+
+# CHECK: fmov.2d v0, #0.12500000
+
+0x00 0xf4 0x02 0x0f
+0x00 0xf4 0x02 0x4f
+
+# CHECK: fmov.2s v0, #0.12500000
+# CHECK: fmov.4s v0, #0.12500000
+
+0x20 0x14 0x00 0x0f
+0x20 0x34 0x00 0x0f
+0x20 0x54 0x00 0x0f
+0x20 0x74 0x00 0x0f
+
+# CHECK: orr.2s v0, #0x1
+# CHECK: orr.2s v0, #0x1, lsl #8
+# CHECK: orr.2s v0, #0x1, lsl #16
+# CHECK: orr.2s v0, #0x1, lsl #24
+
+0x20 0x94 0x00 0x0f
+0x20 0xb4 0x00 0x0f
+
+# CHECK: orr.4h v0, #0x1
+# FIXME: orr.4h v0, #0x1, lsl #8
+#    'orr.4h' should be selected over "fcvtns.2s v0, v1, #0"
+
+0x20 0x14 0x00 0x4f
+0x20 0x34 0x00 0x4f
+0x20 0x54 0x00 0x4f
+0x20 0x74 0x00 0x4f
+
+# CHECK: orr.4s v0, #0x1
+# CHECK: orr.4s v0, #0x1, lsl #8
+# CHECK: orr.4s v0, #0x1, lsl #16
+# CHECK: orr.4s v0, #0x1, lsl #24
+
+0x20 0x94 0x00 0x4f
+0x20 0xb4 0x00 0x4f
+
+# CHECK: orr.8h v0, #0x1
+# CHECK: orr.8h v0, #0x1, lsl #8
+
+0x21 0x70 0x40 0x0c
+0x42 0xa0 0x40 0x4c
+0x64 0x64 0x40 0x0c
+0x87 0x24 0x40 0x4c
+0x0c 0xa8 0x40 0x0c
+0x0a 0x68 0x40 0x4c
+0x2d 0xac 0x40 0x0c
+0x4f 0x7c 0x40 0x4c
+0xe0 0x03 0x40 0x0d
+
+# CHECK: ld1.8b { v1 }, [x1]
+# CHECK: ld1.16b { v2, v3 }, [x2]
+# CHECK: ld1.4h { v4, v5, v6 }, [x3]
+# CHECK: ld1.8h { v7, v8, v9, v10 }, [x4]
+# CHECK: ld1.2s { v12, v13 }, [x0]
+# CHECK: ld1.4s { v10, v11, v12 }, [x0]
+# CHECK: ld1.1d { v13, v14 }, [x1]
+# CHECK: ld1.2d	{ v15 }, [x2]
+# CHECK: ld1.b	{ v0 }[0], [sp]
+
+0x41 0x70 0xdf 0x0c
+0x41 0xa0 0xdf 0x0c
+0x41 0x60 0xdf 0x0c
+0x41 0x20 0xdf 0x0c
+0x42 0x70 0xdf 0x4c
+0x42 0xa0 0xdf 0x4c
+0x42 0x60 0xdf 0x4c
+0x42 0x20 0xdf 0x4c
+0x64 0x74 0xdf 0x0c
+0x64 0xa4 0xdf 0x0c
+0x64 0x64 0xdf 0x0c
+0x64 0x24 0xdf 0x0c
+0x87 0x74 0xdf 0x4c
+0x87 0xa4 0xdf 0x4c
+0x87 0x64 0xdf 0x4c
+0x87 0x24 0xdf 0x4c
+0x0c 0x78 0xdf 0x0c
+0x0c 0xa8 0xdf 0x0c
+0x0c 0x68 0xdf 0x0c
+0x0c 0x28 0xdf 0x0c
+0x0a 0x78 0xdf 0x4c
+0x0a 0xa8 0xdf 0x4c
+0x0a 0x68 0xdf 0x4c
+0x0a 0x28 0xdf 0x4c
+0x2d 0x7c 0xdf 0x0c
+0x2d 0xac 0xdf 0x0c
+0x2d 0x6c 0xdf 0x0c
+0x2d 0x2c 0xdf 0x0c
+0x4f 0x7c 0xdf 0x4c
+0x4f 0xac 0xdf 0x4c
+0x4f 0x6c 0xdf 0x4c
+0x4f 0x2c 0xdf 0x4c
+
+# CHECK: ld1.8b { v1 }, [x2], #8
+# CHECK: ld1.8b { v1, v2 }, [x2], #16
+# CHECK: ld1.8b { v1, v2, v3 }, [x2], #24
+# CHECK: ld1.8b { v1, v2, v3, v4 }, [x2], #32
+# CHECK: ld1.16b { v2 }, [x2], #16
+# CHECK: ld1.16b { v2, v3 }, [x2], #32
+# CHECK: ld1.16b { v2, v3, v4 }, [x2], #48
+# CHECK: ld1.16b { v2, v3, v4, v5 }, [x2], #64
+# CHECK: ld1.4h { v4 }, [x3], #8
+# CHECK: ld1.4h { v4, v5 }, [x3], #16
+# CHECK: ld1.4h { v4, v5, v6 }, [x3], #24
+# CHECK: ld1.4h { v4, v5, v6, v7 }, [x3], #32
+# CHECK: ld1.8h { v7 }, [x4], #16
+# CHECK: ld1.8h { v7, v8 }, [x4], #32
+# CHECK: ld1.8h { v7, v8, v9 }, [x4], #48
+# CHECK: ld1.8h { v7, v8, v9, v10 }, [x4], #64
+# CHECK: ld1.2s { v12 }, [x0], #8
+# CHECK: ld1.2s { v12, v13 }, [x0], #16
+# CHECK: ld1.2s { v12, v13, v14 }, [x0], #24
+# CHECK: ld1.2s { v12, v13, v14, v15 }, [x0], #32
+# CHECK: ld1.4s { v10 }, [x0], #16
+# CHECK: ld1.4s { v10, v11 }, [x0], #32
+# CHECK: ld1.4s { v10, v11, v12 }, [x0], #48
+# CHECK: ld1.4s { v10, v11, v12, v13 }, [x0], #64
+# CHECK: ld1.1d { v13 }, [x1], #8
+# CHECK: ld1.1d { v13, v14 }, [x1], #16
+# CHECK: ld1.1d { v13, v14, v15 }, [x1], #24
+# CHECK: ld1.1d { v13, v14, v15, v16 }, [x1], #32
+# CHECK: ld1.2d { v15 }, [x2], #16
+# CHECK: ld1.2d { v15, v16 }, [x2], #32
+# CHECK: ld1.2d { v15, v16, v17 }, [x2], #48
+# CHECK: ld1.2d { v15, v16, v17, v18 }, [x2], #64
+
+0x21 0x70 0x00 0x0c
+0x42 0xa0 0x00 0x4c
+0x64 0x64 0x00 0x0c
+0x87 0x24 0x00 0x4c
+0x0c 0xa8 0x00 0x0c
+0x0a 0x68 0x00 0x4c
+0x2d 0xac 0x00 0x0c
+0x4f 0x7c 0x00 0x4c
+
+# CHECK: st1.8b { v1 }, [x1]
+# CHECK: st1.16b { v2, v3 }, [x2]
+# CHECK: st1.4h { v4, v5, v6 }, [x3]
+# CHECK: st1.8h { v7, v8, v9, v10 }, [x4]
+# CHECK: st1.2s { v12, v13 }, [x0]
+# CHECK: st1.4s { v10, v11, v12 }, [x0]
+# CHECK: st1.1d { v13, v14 }, [x1]
+# CHECK: st1.2d	{ v15 }, [x2]
+
+0x61 0x08 0x40 0x0d
+0x82 0x84 0x40 0x4d
+0xa3 0x58 0x40 0x0d
+0xc4 0x80 0x40 0x4d
+
+# CHECK: ld1.b { v1 }[2], [x3]
+# CHECK: ld1.d { v2 }[1], [x4]
+# CHECK: ld1.h { v3 }[3], [x5]
+# CHECK: ld1.s { v4 }[2], [x6]
+
+0x61 0x08 0xdf 0x0d
+0x82 0x84 0xdf 0x4d
+0xa3 0x58 0xdf 0x0d
+0xc4 0x80 0xdf 0x4d
+
+# CHECK: ld1.b { v1 }[2], [x3], #1
+# CHECK: ld1.d { v2 }[1], [x4], #8
+# CHECK: ld1.h { v3 }[3], [x5], #2
+# CHECK: ld1.s { v4 }[2], [x6], #4
+
+0x61 0x08 0x00 0x0d
+0x82 0x84 0x00 0x4d
+0xa3 0x58 0x00 0x0d
+0xc4 0x80 0x00 0x4d
+
+# CHECK: st1.b { v1 }[2], [x3]
+# CHECK: st1.d { v2 }[1], [x4]
+# CHECK: st1.h { v3 }[3], [x5]
+# CHECK: st1.s { v4 }[2], [x6]
+
+0x61 0x08 0x9f 0x0d
+0x82 0x84 0x9f 0x4d
+0xa3 0x58 0x9f 0x0d
+0xc4 0x80 0x9f 0x4d
+
+# CHECK: st1.b { v1 }[2], [x3], #1
+# CHECK: st1.d { v2 }[1], [x4], #8
+# CHECK: st1.h { v3 }[3], [x5], #2
+# CHECK: st1.s { v4 }[2], [x6], #4
+
+0x61 0x08 0xc4 0x0d
+0x82 0x84 0xc5 0x4d
+0xa3 0x58 0xc6 0x0d
+0xc4 0x80 0xc7 0x4d
+
+# CHECK: ld1.b { v1 }[2], [x3], x4
+# CHECK: ld1.d { v2 }[1], [x4], x5
+# CHECK: ld1.h { v3 }[3], [x5], x6
+# CHECK: ld1.s { v4 }[2], [x6], x7
+
+0x61 0x08 0x84 0x0d
+0x82 0x84 0x85 0x4d
+0xa3 0x58 0x86 0x0d
+0xc4 0x80 0x87 0x4d
+
+# CHECK: st1.b { v1 }[2], [x3], x4
+# CHECK: st1.d { v2 }[1], [x4], x5
+# CHECK: st1.h { v3 }[3], [x5], x6
+# CHECK: st1.s { v4 }[2], [x6], x7
+
+0x41 0x70 0xc3 0x0c
+0x42 0xa0 0xc4 0x4c
+0x64 0x64 0xc5 0x0c
+0x87 0x24 0xc6 0x4c
+0x0c 0xa8 0xc7 0x0c
+0x0a 0x68 0xc8 0x4c
+0x2d 0xac 0xc9 0x0c
+0x4f 0x7c 0xca 0x4c
+
+# CHECK: ld1.8b { v1 }, [x2], x3
+# CHECK: ld1.16b { v2, v3 }, [x2], x4
+# CHECK: ld1.4h { v4, v5, v6 }, [x3], x5
+# CHECK: ld1.8h { v7, v8, v9, v10 }, [x4], x6
+# CHECK: ld1.2s { v12, v13 }, [x0], x7
+# CHECK: ld1.4s { v10, v11, v12 }, [x0], x8
+# CHECK: ld1.1d { v13, v14 }, [x1], x9
+# CHECK: ld1.2d { v15 }, [x2], x10
+
+0x41 0x70 0x83 0x0c
+0x42 0xa0 0x84 0x4c
+0x64 0x64 0x85 0x0c
+0x87 0x24 0x86 0x4c
+0x0c 0xa8 0x87 0x0c
+0x0a 0x68 0x88 0x4c
+0x2d 0xac 0x89 0x0c
+0x4f 0x7c 0x8a 0x4c
+
+# CHECK: st1.8b { v1 }, [x2], x3
+# CHECK: st1.16b { v2, v3 }, [x2], x4
+# CHECK: st1.4h { v4, v5, v6 }, [x3], x5
+# CHECK: st1.8h { v7, v8, v9, v10 }, [x4], x6
+# CHECK: st1.2s { v12, v13 }, [x0], x7
+# CHECK: st1.4s { v10, v11, v12 }, [x0], x8
+# CHECK: st1.1d { v13, v14 }, [x1], x9
+# CHECK: st1.2d { v15 }, [x2], x10
+
+0x41 0x70 0x9f 0x0c
+0x41 0xa0 0x9f 0x0c
+0x41 0x60 0x9f 0x0c
+0x41 0x20 0x9f 0x0c
+0x42 0x70 0x9f 0x4c
+0x42 0xa0 0x9f 0x4c
+0x42 0x60 0x9f 0x4c
+0x42 0x20 0x9f 0x4c
+0x64 0x74 0x9f 0x0c
+0x64 0xa4 0x9f 0x0c
+0x64 0x64 0x9f 0x0c
+0x64 0x24 0x9f 0x0c
+0x87 0x74 0x9f 0x4c
+0x87 0xa4 0x9f 0x4c
+0x87 0x64 0x9f 0x4c
+0x87 0x24 0x9f 0x4c
+0x0c 0x78 0x9f 0x0c
+0x0c 0xa8 0x9f 0x0c
+0x0c 0x68 0x9f 0x0c
+0x0c 0x28 0x9f 0x0c
+0x0a 0x78 0x9f 0x4c
+0x0a 0xa8 0x9f 0x4c
+0x0a 0x68 0x9f 0x4c
+0x0a 0x28 0x9f 0x4c
+0x2d 0x7c 0x9f 0x0c
+0x2d 0xac 0x9f 0x0c
+0x2d 0x6c 0x9f 0x0c
+0x2d 0x2c 0x9f 0x0c
+0x4f 0x7c 0x9f 0x4c
+0x4f 0xac 0x9f 0x4c
+0x4f 0x6c 0x9f 0x4c
+0x4f 0x2c 0x9f 0x4c
+
+# CHECK: st1.8b { v1 }, [x2], #8
+# CHECK: st1.8b { v1, v2 }, [x2], #16
+# CHECK: st1.8b { v1, v2, v3 }, [x2], #24
+# CHECK: st1.8b { v1, v2, v3, v4 }, [x2], #32
+# CHECK: st1.16b { v2 }, [x2], #16
+# CHECK: st1.16b { v2, v3 }, [x2], #32
+# CHECK: st1.16b { v2, v3, v4 }, [x2], #48
+# CHECK: st1.16b { v2, v3, v4, v5 }, [x2], #64
+# CHECK: st1.4h { v4 }, [x3], #8
+# CHECK: st1.4h { v4, v5 }, [x3], #16
+# CHECK: st1.4h { v4, v5, v6 }, [x3], #24
+# CHECK: st1.4h { v4, v5, v6, v7 }, [x3], #32
+# CHECK: st1.8h { v7 }, [x4], #16
+# CHECK: st1.8h { v7, v8 }, [x4], #32
+# CHECK: st1.8h { v7, v8, v9 }, [x4], #48
+# CHECK: st1.8h { v7, v8, v9, v10 }, [x4], #64
+# CHECK: st1.2s { v12 }, [x0], #8
+# CHECK: st1.2s { v12, v13 }, [x0], #16
+# CHECK: st1.2s { v12, v13, v14 }, [x0], #24
+# CHECK: st1.2s { v12, v13, v14, v15 }, [x0], #32
+# CHECK: st1.4s { v10 }, [x0], #16
+# CHECK: st1.4s { v10, v11 }, [x0], #32
+# CHECK: st1.4s { v10, v11, v12 }, [x0], #48
+# CHECK: st1.4s { v10, v11, v12, v13 }, [x0], #64
+# CHECK: st1.1d { v13 }, [x1], #8
+# CHECK: st1.1d { v13, v14 }, [x1], #16
+# CHECK: st1.1d { v13, v14, v15 }, [x1], #24
+# CHECK: st1.1d { v13, v14, v15, v16 }, [x1], #32
+# CHECK: st1.2d { v15 }, [x2], #16
+# CHECK: st1.2d { v15, v16 }, [x2], #32
+# CHECK: st1.2d { v15, v16, v17 }, [x2], #48
+# CHECK: st1.2d { v15, v16, v17, v18 }, [x2], #64
+
+0x21 0xc0 0x40 0x0d
+0x21 0xc0 0xc2 0x0d
+0x64 0xc4 0x40 0x0d
+0x64 0xc4 0xc5 0x0d
+0xa9 0xc8 0x40 0x0d
+0xa9 0xc8 0xc6 0x0d
+0xec 0xcc 0x40 0x0d
+0xec 0xcc 0xc8 0x0d
+
+# CHECK: ld1r.8b { v1 }, [x1]
+# CHECK: ld1r.8b { v1 }, [x1], x2
+# CHECK: ld1r.4h { v4 }, [x3]
+# CHECK: ld1r.4h { v4 }, [x3], x5
+# CHECK: ld1r.2s { v9 }, [x5]
+# CHECK: ld1r.2s { v9 }, [x5], x6
+# CHECK: ld1r.1d { v12 }, [x7]
+# CHECK: ld1r.1d { v12 }, [x7], x8
+
+0x21 0xc0 0xdf 0x0d
+0x21 0xc4 0xdf 0x0d
+0x21 0xc8 0xdf 0x0d
+0x21 0xcc 0xdf 0x0d
+
+# CHECK: ld1r.8b { v1 }, [x1], #1
+# CHECK: ld1r.4h { v1 }, [x1], #2
+# CHECK: ld1r.2s { v1 }, [x1], #4
+# CHECK: ld1r.1d { v1 }, [x1], #8
+
+0x45 0x80 0x40 0x4c
+0x0a 0x88 0x40 0x0c
+
+# CHECK: ld2.16b { v5, v6 }, [x2]
+# CHECK: ld2.2s { v10, v11 }, [x0]
+
+0x45 0x80 0x00 0x4c
+0x0a 0x88 0x00 0x0c
+
+# CHECK: st2.16b { v5, v6 }, [x2]
+# CHECK: st2.2s { v10, v11 }, [x0]
+
+0x61 0x08 0x20 0x0d
+0x82 0x84 0x20 0x4d
+0xc3 0x50 0x20 0x0d
+0xe4 0x90 0x20 0x4d
+
+# CHECK: st2.b { v1, v2 }[2], [x3]
+# CHECK: st2.d { v2, v3 }[1], [x4]
+# CHECK: st2.h { v3, v4 }[2], [x6]
+# CHECK: st2.s { v4, v5 }[3], [x7]
+
+0x61 0x08 0xbf 0x0d
+0x82 0x84 0xbf 0x4d
+0xa3 0x58 0xbf 0x0d
+0xc4 0x80 0xbf 0x4d
+
+# CHECK: st2.b { v1, v2 }[2], [x3], #2
+# CHECK: st2.d { v2, v3 }[1], [x4], #16
+# CHECK: st2.h { v3, v4 }[3], [x5], #4
+# CHECK: st2.s { v4, v5 }[2], [x6], #8
+
+0x61 0x08 0x60 0x0d
+0x82 0x84 0x60 0x4d
+0xc3 0x50 0x60 0x0d
+0xe4 0x90 0x60 0x4d
+
+# CHECK: ld2.b { v1, v2 }[2], [x3]
+# CHECK: ld2.d { v2, v3 }[1], [x4]
+# CHECK: ld2.h { v3, v4 }[2], [x6]
+# CHECK: ld2.s { v4, v5 }[3], [x7]
+
+0x61 0x08 0xff 0x0d
+0x82 0x84 0xff 0x4d
+0xa3 0x58 0xff 0x0d
+0xc4 0x80 0xff 0x4d
+
+# CHECK: ld2.b { v1, v2 }[2], [x3], #2
+# CHECK: ld2.d { v2, v3 }[1], [x4], #16
+# CHECK: ld2.h { v3, v4 }[3], [x5], #4
+# CHECK: ld2.s { v4, v5 }[2], [x6], #8
+
+0x61 0x08 0xe4 0x0d
+0x82 0x84 0xe6 0x4d
+0xa3 0x58 0xe8 0x0d
+0xc4 0x80 0xea 0x4d
+
+# CHECK: ld2.b { v1, v2 }[2], [x3], x4
+# CHECK: ld2.d { v2, v3 }[1], [x4], x6
+# CHECK: ld2.h { v3, v4 }[3], [x5], x8
+# CHECK: ld2.s { v4, v5 }[2], [x6], x10
+
+0x61 0x08 0xa4 0x0d
+0x82 0x84 0xa6 0x4d
+0xa3 0x58 0xa8 0x0d
+0xc4 0x80 0xaa 0x4d
+
+# CHECK: st2.b { v1, v2 }[2], [x3], x4
+# CHECK: st2.d { v2, v3 }[1], [x4], x6
+# CHECK: st2.h { v3, v4 }[3], [x5], x8
+# CHECK: st2.s { v4, v5 }[2], [x6], x10
+
+0x64 0x84 0xc5 0x0c
+0x0c 0x88 0xc7 0x0c
+
+# CHECK: ld2.4h { v4, v5 }, [x3], x5
+# CHECK: ld2.2s { v12, v13 }, [x0], x7
+
+0x00 0x80 0xdf 0x0c
+0x00 0x80 0xdf 0x4c
+0x00 0x84 0xdf 0x0c
+0x00 0x84 0xdf 0x4c
+0x00 0x88 0xdf 0x0c
+0x00 0x88 0xdf 0x4c
+0x00 0x8c 0xdf 0x4c
+
+# CHECK: ld2.8b { v0, v1 }, [x0], #16
+# CHECK: ld2.16b { v0, v1 }, [x0], #32
+# CHECK: ld2.4h { v0, v1 }, [x0], #16
+# CHECK: ld2.8h { v0, v1 }, [x0], #32
+# CHECK: ld2.2s { v0, v1 }, [x0], #16
+# CHECK: ld2.4s { v0, v1 }, [x0], #32
+# CHECK: ld2.2d { v0, v1 }, [x0], #32
+
+0x64 0x84 0x85 0x0c
+0x0c 0x88 0x87 0x0c
+
+# CHECK: st2.4h { v4, v5 }, [x3], x5
+# CHECK: st2.2s { v12, v13 }, [x0], x7
+
+0x00 0x80 0x9f 0x0c
+0x00 0x80 0x9f 0x4c
+0x00 0x84 0x9f 0x0c
+0x00 0x84 0x9f 0x4c
+0x00 0x88 0x9f 0x0c
+0x00 0x88 0x9f 0x4c
+0x00 0x8c 0x9f 0x4c
+
+# CHECK: st2.8b { v0, v1 }, [x0], #16
+# CHECK: st2.16b { v0, v1 }, [x0], #32
+# CHECK: st2.4h { v0, v1 }, [x0], #16
+# CHECK: st2.8h { v0, v1 }, [x0], #32
+# CHECK: st2.2s { v0, v1 }, [x0], #16
+# CHECK: st2.4s { v0, v1 }, [x0], #32
+# CHECK: st2.2d { v0, v1 }, [x0], #32
+
+0x21 0xc0 0x60 0x0d
+0x21 0xc0 0xe2 0x0d
+0x21 0xc0 0x60 0x4d
+0x21 0xc0 0xe2 0x4d
+0x21 0xc4 0x60 0x0d
+0x21 0xc4 0xe2 0x0d
+0x21 0xc4 0x60 0x4d
+0x21 0xc4 0xe2 0x4d
+0x21 0xc8 0x60 0x0d
+0x21 0xc8 0xe2 0x0d
+0x21 0xcc 0x60 0x4d
+0x21 0xcc 0xe2 0x4d
+0x21 0xcc 0x60 0x0d
+0x21 0xcc 0xe2 0x0d
+
+# CHECK: ld2r.8b { v1, v2 }, [x1]
+# CHECK: ld2r.8b { v1, v2 }, [x1], x2
+# CHECK: ld2r.16b { v1, v2 }, [x1]
+# CHECK: ld2r.16b { v1, v2 }, [x1], x2
+# CHECK: ld2r.4h { v1, v2 }, [x1]
+# CHECK: ld2r.4h { v1, v2 }, [x1], x2
+# CHECK: ld2r.8h { v1, v2 }, [x1]
+# CHECK: ld2r.8h { v1, v2 }, [x1], x2
+# CHECK: ld2r.2s { v1, v2 }, [x1]
+# CHECK: ld2r.2s { v1, v2 }, [x1], x2
+# CHECK: ld2r.2d { v1, v2 }, [x1]
+# CHECK: ld2r.2d { v1, v2 }, [x1], x2
+# CHECK: ld2r.1d { v1, v2 }, [x1]
+# CHECK: ld2r.1d { v1, v2 }, [x1], x2
+
+0x21 0xc0 0xff 0x0d
+0x21 0xc0 0xff 0x4d
+0x21 0xc4 0xff 0x0d
+0x21 0xc4 0xff 0x4d
+0x21 0xc8 0xff 0x0d
+0x21 0xcc 0xff 0x4d
+0x21 0xcc 0xff 0x0d
+
+# CHECK: ld2r.8b { v1, v2 }, [x1], #2
+# CHECK: ld2r.16b { v1, v2 }, [x1], #2
+# CHECK: ld2r.4h { v1, v2 }, [x1], #4
+# CHECK: ld2r.8h { v1, v2 }, [x1], #4
+# CHECK: ld2r.2s { v1, v2 }, [x1], #8
+# CHECK: ld2r.2d { v1, v2 }, [x1], #16
+# CHECK: ld2r.1d { v1, v2 }, [x1], #16
+
+0x21 0x40 0x40 0x0c
+0x45 0x40 0x40 0x4c
+0x0a 0x48 0x40 0x0c
+
+# CHECK: ld3.8b { v1, v2, v3 }, [x1]
+# CHECK: ld3.16b { v5, v6, v7 }, [x2]
+# CHECK: ld3.2s { v10, v11, v12 }, [x0]
+
+0x21 0x40 0x00 0x0c
+0x45 0x40 0x00 0x4c
+0x0a 0x48 0x00 0x0c
+
+# CHECK: st3.8b { v1, v2, v3 }, [x1]
+# CHECK: st3.16b { v5, v6, v7 }, [x2]
+# CHECK: st3.2s { v10, v11, v12 }, [x0]
+
+0x61 0x28 0xc4 0x0d
+0x82 0xa4 0xc5 0x4d
+0xa3 0x78 0xc6 0x0d
+0xc4 0xa0 0xc7 0x4d
+
+# CHECK: ld3.b { v1, v2, v3 }[2], [x3], x4
+# CHECK: ld3.d { v2, v3, v4 }[1], [x4], x5
+# CHECK: ld3.h { v3, v4, v5 }[3], [x5], x6
+# CHECK: ld3.s { v4, v5, v6 }[2], [x6], x7
+
+0x61 0x28 0x84 0x0d
+0x82 0xa4 0x85 0x4d
+0xa3 0x78 0x86 0x0d
+0xc4 0xa0 0x87 0x4d
+
+# CHECK: st3.b { v1, v2, v3 }[2], [x3], x4
+# CHECK: st3.d { v2, v3, v4 }[1], [x4], x5
+# CHECK: st3.h { v3, v4, v5 }[3], [x5], x6
+# CHECK: st3.s { v4, v5, v6 }[2], [x6], x7
+
+0x61 0x28 0x9f 0x0d
+0x82 0xa4 0x9f 0x4d
+0xa3 0x78 0x9f 0x0d
+0xc4 0xa0 0x9f 0x4d
+
+# CHECK: st3.b { v1, v2, v3 }[2], [x3], #3
+# CHECK: st3.d { v2, v3, v4 }[1], [x4], #24
+# CHECK: st3.h { v3, v4, v5 }[3], [x5], #6
+# CHECK: st3.s { v4, v5, v6 }[2], [x6], #12
+
+0x41 0x40 0xc3 0x0c
+0x42 0x40 0xc4 0x4c
+0x64 0x44 0xc5 0x0c
+0x87 0x44 0xc6 0x4c
+0x0c 0x48 0xc7 0x0c
+0x0a 0x48 0xc8 0x4c
+0x4f 0x4c 0xca 0x4c
+
+# CHECK: ld3.8b { v1, v2, v3 }, [x2], x3
+# CHECK: ld3.16b { v2, v3, v4 }, [x2], x4
+# CHECK: ld3.4h { v4, v5, v6 }, [x3], x5
+# CHECK: ld3.8h { v7, v8, v9 }, [x4], x6
+# CHECK: ld3.2s { v12, v13, v14 }, [x0], x7
+# CHECK: ld3.4s { v10, v11, v12 }, [x0], x8
+# CHECK: ld3.2d { v15, v16, v17 }, [x2], x10
+
+0x00 0x40 0xdf 0x0c
+0x00 0x40 0xdf 0x4c
+0x00 0x44 0xdf 0x0c
+0x00 0x44 0xdf 0x4c
+0x00 0x48 0xdf 0x0c
+0x00 0x48 0xdf 0x4c
+0x00 0x4c 0xdf 0x4c
+
+# CHECK: ld3.8b { v0, v1, v2 }, [x0], #24
+# CHECK: ld3.16b { v0, v1, v2 }, [x0], #48
+# CHECK: ld3.4h { v0, v1, v2 }, [x0], #24
+# CHECK: ld3.8h { v0, v1, v2 }, [x0], #48
+# CHECK: ld3.2s { v0, v1, v2 }, [x0], #24
+# CHECK: ld3.4s { v0, v1, v2 }, [x0], #48
+# CHECK: ld3.2d { v0, v1, v2 }, [x0], #48
+
+0x41 0x40 0x83 0x0c
+0x42 0x40 0x84 0x4c
+0x64 0x44 0x85 0x0c
+0x87 0x44 0x86 0x4c
+0x0c 0x48 0x87 0x0c
+0x0a 0x48 0x88 0x4c
+0x4f 0x4c 0x8a 0x4c
+
+# CHECK: st3.8b { v1, v2, v3 }, [x2], x3
+# CHECK: st3.16b { v2, v3, v4 }, [x2], x4
+# CHECK: st3.4h { v4, v5, v6 }, [x3], x5
+# CHECK: st3.8h { v7, v8, v9 }, [x4], x6
+# CHECK: st3.2s { v12, v13, v14 }, [x0], x7
+# CHECK: st3.4s { v10, v11, v12 }, [x0], x8
+# CHECK: st3.2d { v15, v16, v17 }, [x2], x10
+
+0x00 0x40 0x9f 0x0c
+0x00 0x40 0x9f 0x4c
+0x00 0x44 0x9f 0x0c
+0x00 0x44 0x9f 0x4c
+0x00 0x48 0x9f 0x0c
+0x00 0x48 0x9f 0x4c
+0x00 0x4c 0x9f 0x4c
+
+# CHECK: st3.8b { v0, v1, v2 }, [x0], #24
+# CHECK: st3.16b { v0, v1, v2 }, [x0], #48
+# CHECK: st3.4h { v0, v1, v2 }, [x0], #24
+# CHECK: st3.8h { v0, v1, v2 }, [x0], #48
+# CHECK: st3.2s { v0, v1, v2 }, [x0], #24
+# CHECK: st3.4s { v0, v1, v2 }, [x0], #48
+# CHECK: st3.2d { v0, v1, v2 }, [x0], #48
+
+0x61 0x28 0x40 0x0d
+0x82 0xa4 0x40 0x4d
+0xc3 0x70 0x40 0x0d
+0xe4 0xb0 0x40 0x4d
+
+# CHECK: ld3.b { v1, v2, v3 }[2], [x3]
+# CHECK: ld3.d { v2, v3, v4 }[1], [x4]
+# CHECK: ld3.h { v3, v4, v5 }[2], [x6]
+# CHECK: ld3.s { v4, v5, v6 }[3], [x7]
+
+0x61 0x28 0xdf 0x0d
+0x82 0xa4 0xdf 0x4d
+0xa3 0x78 0xdf 0x0d
+0xc4 0xa0 0xdf 0x4d
+
+# CHECK: ld3.b { v1, v2, v3 }[2], [x3], #3
+# CHECK: ld3.d { v2, v3, v4 }[1], [x4], #24
+# CHECK: ld3.h { v3, v4, v5 }[3], [x5], #6
+# CHECK: ld3.s { v4, v5, v6 }[2], [x6], #12
+
+0x61 0x28 0x00 0x0d
+0x82 0xa4 0x00 0x4d
+0xc3 0x70 0x00 0x0d
+0xe4 0xb0 0x00 0x4d
+
+# CHECK: st3.b { v1, v2, v3 }[2], [x3]
+# CHECK: st3.d { v2, v3, v4 }[1], [x4]
+# CHECK: st3.h { v3, v4, v5 }[2], [x6]
+# CHECK: st3.s { v4, v5, v6 }[3], [x7]
+
+0x21 0xe0 0x40 0x0d
+0x21 0xe0 0xc2 0x0d
+0x21 0xe0 0x40 0x4d
+0x21 0xe0 0xc2 0x4d
+0x21 0xe4 0x40 0x0d
+0x21 0xe4 0xc2 0x0d
+0x21 0xe4 0x40 0x4d
+0x21 0xe4 0xc2 0x4d
+0x21 0xe8 0x40 0x0d
+0x21 0xe8 0xc2 0x0d
+0x21 0xec 0x40 0x4d
+0x21 0xec 0xc2 0x4d
+0x21 0xec 0x40 0x0d
+0x21 0xec 0xc2 0x0d
+
+# CHECK: ld3r.8b { v1, v2, v3 }, [x1]
+# CHECK: ld3r.8b { v1, v2, v3 }, [x1], x2
+# CHECK: ld3r.16b { v1, v2, v3 }, [x1]
+# CHECK: ld3r.16b { v1, v2, v3 }, [x1], x2
+# CHECK: ld3r.4h { v1, v2, v3 }, [x1]
+# CHECK: ld3r.4h { v1, v2, v3 }, [x1], x2
+# CHECK: ld3r.8h { v1, v2, v3 }, [x1]
+# CHECK: ld3r.8h { v1, v2, v3 }, [x1], x2
+# CHECK: ld3r.2s { v1, v2, v3 }, [x1]
+# CHECK: ld3r.2s { v1, v2, v3 }, [x1], x2
+# CHECK: ld3r.2d { v1, v2, v3 }, [x1]
+# CHECK: ld3r.2d { v1, v2, v3 }, [x1], x2
+# CHECK: ld3r.1d { v1, v2, v3 }, [x1]
+# CHECK: ld3r.1d { v1, v2, v3 }, [x1], x2
+
+0x21 0xe0 0xdf 0x0d
+0x21 0xe0 0xdf 0x4d
+0x21 0xe4 0xdf 0x0d
+0x21 0xe4 0xdf 0x4d
+0x21 0xe8 0xdf 0x0d
+0x21 0xec 0xdf 0x4d
+0x21 0xec 0xdf 0x0d
+
+# CHECK: ld3r.8b	{ v1, v2, v3 }, [x1], #3
+# CHECK: ld3r.16b	{ v1, v2, v3 }, [x1], #3
+# CHECK: ld3r.4h	{ v1, v2, v3 }, [x1], #6
+# CHECK: ld3r.8h	{ v1, v2, v3 }, [x1], #6
+# CHECK: ld3r.2s	{ v1, v2, v3 }, [x1], #12
+# CHECK: ld3r.2d	{ v1, v2, v3 }, [x1], #24
+# CHECK: ld3r.1d	{ v1, v2, v3 }, [x1], #24
+
+0x21 0x00 0x40 0x0c
+0x45 0x00 0x40 0x4c
+0x0a 0x08 0x40 0x0c
+
+# CHECK: ld4.8b { v1, v2, v3, v4 }, [x1]
+# CHECK: ld4.16b { v5, v6, v7, v8 }, [x2]
+# CHECK: ld4.2s { v10, v11, v12, v13 }, [x0]
+
+0x21 0x00 0x00 0x0c
+0x45 0x00 0x00 0x4c
+0x0a 0x08 0x00 0x0c
+
+# CHECK: st4.8b { v1, v2, v3, v4 }, [x1]
+# CHECK: st4.16b { v5, v6, v7, v8 }, [x2]
+# CHECK: st4.2s { v10, v11, v12, v13 }, [x0]
+
+0x61 0x28 0xe4 0x0d
+0x82 0xa4 0xe5 0x4d
+0xa3 0x78 0xe6 0x0d
+0xc4 0xa0 0xe7 0x4d
+
+# CHECK: ld4.b { v1, v2, v3, v4 }[2], [x3], x4
+# CHECK: ld4.d { v2, v3, v4, v5 }[1], [x4], x5
+# CHECK: ld4.h { v3, v4, v5, v6 }[3], [x5], x6
+# CHECK: ld4.s { v4, v5, v6, v7 }[2], [x6], x7
+
+0x61 0x28 0xff 0x0d
+0x82 0xa4 0xff 0x4d
+0xa3 0x78 0xff 0x0d
+0xc4 0xa0 0xff 0x4d
+
+# CHECK: ld4.b { v1, v2, v3, v4 }[2], [x3], #4
+# CHECK: ld4.d { v2, v3, v4, v5 }[1], [x4], #32
+# CHECK: ld4.h { v3, v4, v5, v6 }[3], [x5], #8
+# CHECK: ld4.s { v4, v5, v6, v7 }[2], [x6], #16
+
+0x61 0x28 0xa4 0x0d
+0x82 0xa4 0xa5 0x4d
+0xa3 0x78 0xa6 0x0d
+0xc4 0xa0 0xa7 0x4d
+
+# CHECK: st4.b { v1, v2, v3, v4 }[2], [x3], x4
+# CHECK: st4.d { v2, v3, v4, v5 }[1], [x4], x5
+# CHECK: st4.h { v3, v4, v5, v6 }[3], [x5], x6
+# CHECK: st4.s { v4, v5, v6, v7 }[2], [x6], x7
+
+0x61 0x28 0xbf 0x0d
+0x82 0xa4 0xbf 0x4d
+0xa3 0x78 0xbf 0x0d
+0xc4 0xa0 0xbf 0x4d
+
+# CHECK: st4.b { v1, v2, v3, v4 }[2], [x3], #4
+# CHECK: st4.d { v2, v3, v4, v5 }[1], [x4], #32
+# CHECK: st4.h { v3, v4, v5, v6 }[3], [x5], #8
+# CHECK: st4.s { v4, v5, v6, v7 }[2], [x6], #16
+
+0x41 0x00 0xc3 0x0c
+0x42 0x00 0xc4 0x4c
+0x64 0x04 0xc5 0x0c
+0x87 0x04 0xc6 0x4c
+0x0c 0x08 0xc7 0x0c
+0x0a 0x08 0xc8 0x4c
+0x4f 0x0c 0xca 0x4c
+
+# CHECK: ld4.8b { v1, v2, v3, v4 }, [x2], x3
+# CHECK: ld4.16b { v2, v3, v4, v5 }, [x2], x4
+# CHECK: ld4.4h { v4, v5, v6, v7 }, [x3], x5
+# CHECK: ld4.8h { v7, v8, v9, v10 }, [x4], x6
+# CHECK: ld4.2s { v12, v13, v14, v15 }, [x0], x7
+# CHECK: ld4.4s { v10, v11, v12, v13 }, [x0], x8
+# CHECK: ld4.2d { v15, v16, v17, v18 }, [x2], x10
+
+0x00 0x00 0xdf 0x0c
+0x00 0x00 0xdf 0x4c
+0x00 0x04 0xdf 0x0c
+0x00 0x04 0xdf 0x4c
+0x00 0x08 0xdf 0x0c
+0x00 0x08 0xdf 0x4c
+0x00 0x0c 0xdf 0x4c
+
+# CHECK: ld4.8b { v0, v1, v2, v3 }, [x0], #32
+# CHECK: ld4.16b { v0, v1, v2, v3 }, [x0], #64
+# CHECK: ld4.4h { v0, v1, v2, v3 }, [x0], #32
+# CHECK: ld4.8h { v0, v1, v2, v3 }, [x0], #64
+# CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], #32
+# CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], #64
+# CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], #64
+
+0x00 0x00 0x9f 0x0c
+0x00 0x00 0x9f 0x4c
+0x00 0x04 0x9f 0x0c
+0x00 0x04 0x9f 0x4c
+0x00 0x08 0x9f 0x0c
+0x00 0x08 0x9f 0x4c
+0x00 0x0c 0x9f 0x4c
+
+# CHECK: st4.8b { v0, v1, v2, v3 }, [x0], #32
+# CHECK: st4.16b { v0, v1, v2, v3 }, [x0], #64
+# CHECK: st4.4h { v0, v1, v2, v3 }, [x0], #32
+# CHECK: st4.8h { v0, v1, v2, v3 }, [x0], #64
+# CHECK: st4.2s { v0, v1, v2, v3 }, [x0], #32
+# CHECK: st4.4s { v0, v1, v2, v3 }, [x0], #64
+# CHECK: st4.2d { v0, v1, v2, v3 }, [x0], #64
+
+0x41 0x00 0x83 0x0c
+0x42 0x00 0x84 0x4c
+0x64 0x04 0x85 0x0c
+0x87 0x04 0x86 0x4c
+0x0c 0x08 0x87 0x0c
+0x0a 0x08 0x88 0x4c
+0x4f 0x0c 0x8a 0x4c
+
+# CHECK: st4.8b { v1, v2, v3, v4 }, [x2], x3
+# CHECK: st4.16b { v2, v3, v4, v5 }, [x2], x4
+# CHECK: st4.4h { v4, v5, v6, v7 }, [x3], x5
+# CHECK: st4.8h { v7, v8, v9, v10 }, [x4], x6
+# CHECK: st4.2s { v12, v13, v14, v15 }, [x0], x7
+# CHECK: st4.4s { v10, v11, v12, v13 }, [x0], x8
+# CHECK: st4.2d { v15, v16, v17, v18 }, [x2], x10
+
+0x61 0x28 0x60 0x0d
+0x82 0xa4 0x60 0x4d
+0xc3 0x70 0x60 0x0d
+0xe4 0xb0 0x60 0x4d
+
+# CHECK: ld4.b { v1, v2, v3, v4 }[2], [x3]
+# CHECK: ld4.d { v2, v3, v4, v5 }[1], [x4]
+# CHECK: ld4.h { v3, v4, v5, v6 }[2], [x6]
+# CHECK: ld4.s { v4, v5, v6, v7 }[3], [x7]
+
+0x61 0x28 0x20 0x0d
+0x82 0xa4 0x20 0x4d
+0xc3 0x70 0x20 0x0d
+0xe4 0xb0 0x20 0x4d
+
+# CHECK: st4.b { v1, v2, v3, v4 }[2], [x3]
+# CHECK: st4.d { v2, v3, v4, v5 }[1], [x4]
+# CHECK: st4.h { v3, v4, v5, v6 }[2], [x6]
+# CHECK: st4.s { v4, v5, v6, v7 }[3], [x7]
+
+0x21 0xe0 0x60 0x0d
+0x21 0xe0 0xe2 0x0d
+0x21 0xe0 0x60 0x4d
+0x21 0xe0 0xe2 0x4d
+0x21 0xe4 0x60 0x0d
+0x21 0xe4 0xe2 0x0d
+0x21 0xe4 0x60 0x4d
+0x21 0xe4 0xe2 0x4d
+0x21 0xe8 0x60 0x0d
+0x21 0xe8 0xe2 0x0d
+0x21 0xec 0x60 0x4d
+0x21 0xec 0xe2 0x4d
+0x21 0xec 0x60 0x0d
+0x21 0xec 0xe2 0x0d
+
+# CHECK: ld4r.8b { v1, v2, v3, v4 }, [x1]
+# CHECK: ld4r.8b { v1, v2, v3, v4 }, [x1], x2
+# CHECK: ld4r.16b { v1, v2, v3, v4 }, [x1]
+# CHECK: ld4r.16b { v1, v2, v3, v4 }, [x1], x2
+# CHECK: ld4r.4h { v1, v2, v3, v4 }, [x1]
+# CHECK: ld4r.4h { v1, v2, v3, v4 }, [x1], x2
+# CHECK: ld4r.8h { v1, v2, v3, v4 }, [x1]
+# CHECK: ld4r.8h { v1, v2, v3, v4 }, [x1], x2
+# CHECK: ld4r.2s { v1, v2, v3, v4 }, [x1]
+# CHECK: ld4r.2s { v1, v2, v3, v4 }, [x1], x2
+# CHECK: ld4r.2d { v1, v2, v3, v4 }, [x1]
+# CHECK: ld4r.2d { v1, v2, v3, v4 }, [x1], x2
+# CHECK: ld4r.1d { v1, v2, v3, v4 }, [x1]
+# CHECK: ld4r.1d { v1, v2, v3, v4 }, [x1], x2
+
+0x21 0xe0 0xff 0x0d
+0x21 0xe0 0xff 0x4d
+0x21 0xe4 0xff 0x0d
+0x21 0xe4 0xff 0x4d
+0x21 0xe8 0xff 0x0d
+0x21 0xec 0xff 0x4d
+0x21 0xec 0xff 0x0d
+
+# CHECK: ld4r.8b	{ v1, v2, v3, v4 }, [x1], #4
+# CHECK: ld4r.16b	{ v1, v2, v3, v4 }, [x1], #4
+# CHECK: ld4r.4h	{ v1, v2, v3, v4 }, [x1], #8
+# CHECK: ld4r.8h	{ v1, v2, v3, v4 }, [x1], #8
+# CHECK: ld4r.2s	{ v1, v2, v3, v4 }, [x1], #16
+# CHECK: ld4r.2d	{ v1, v2, v3, v4 }, [x1], #32
+# CHECK: ld4r.1d	{ v1, v2, v3, v4 }, [x1], #32
+
+0x20 0xe4 0x00 0x2f
+0x20 0xe4 0x00 0x6f
+0x20 0xe4 0x00 0x0f
+0x20 0xe4 0x00 0x4f
+
+# CHECK: movi     d0, #0x000000000000ff
+# CHECK: movi.2d  v0, #0x000000000000ff
+# CHECK: movi.8b  v0, #0x1
+# CHECK: movi.16b v0, #0x1
+
+0x20 0x04 0x00 0x0f
+0x20 0x24 0x00 0x0f
+0x20 0x44 0x00 0x0f
+0x20 0x64 0x00 0x0f
+
+# CHECK: movi.2s v0, #0x1
+# CHECK: movi.2s v0, #0x1, lsl #8
+# CHECK: movi.2s v0, #0x1, lsl #16
+# CHECK: movi.2s v0, #0x1, lsl #24
+
+0x20 0x04 0x00 0x4f
+0x20 0x24 0x00 0x4f
+0x20 0x44 0x00 0x4f
+0x20 0x64 0x00 0x4f
+
+# CHECK: movi.4s v0, #0x1
+# CHECK: movi.4s v0, #0x1, lsl #8
+# CHECK: movi.4s v0, #0x1, lsl #16
+# CHECK: movi.4s v0, #0x1, lsl #24
+
+0x20 0x84 0x00 0x0f
+0x20 0xa4 0x00 0x0f
+
+# CHECK: movi.4h v0, #0x1
+# CHECK: movi.4h v0, #0x1, lsl #8
+
+0x20 0x84 0x00 0x4f
+0x20 0xa4 0x00 0x4f
+
+# CHECK: movi.8h v0, #0x1
+# CHECK: movi.8h v0, #0x1, lsl #8
+
+0x20 0x04 0x00 0x2f
+0x20 0x24 0x00 0x2f
+0x20 0x44 0x00 0x2f
+0x20 0x64 0x00 0x2f
+
+# CHECK: mvni.2s v0, #0x1
+# CHECK: mvni.2s v0, #0x1, lsl #8
+# CHECK: mvni.2s v0, #0x1, lsl #16
+# CHECK: mvni.2s v0, #0x1, lsl #24
+
+0x20 0x04 0x00 0x6f
+0x20 0x24 0x00 0x6f
+0x20 0x44 0x00 0x6f
+0x20 0x64 0x00 0x6f
+
+# CHECK: mvni.4s v0, #0x1
+# CHECK: mvni.4s v0, #0x1, lsl #8
+# CHECK: mvni.4s v0, #0x1, lsl #16
+# CHECK: mvni.4s v0, #0x1, lsl #24
+
+0x20 0x84 0x00 0x2f
+0x20 0xa4 0x00 0x2f
+
+# CHECK: mvni.4h v0, #0x1
+# CHECK: mvni.4h v0, #0x1, lsl #8
+
+0x20 0x84 0x00 0x6f
+0x20 0xa4 0x00 0x6f
+
+# CHECK: mvni.8h v0, #0x1
+# CHECK: mvni.8h v0, #0x1, lsl #8
+
+0x20 0xc4 0x00 0x2f
+0x20 0xd4 0x00 0x2f
+0x20 0xc4 0x00 0x6f
+0x20 0xd4 0x00 0x6f
+
+# CHECK: mvni.2s v0, #0x1, msl #8
+# CHECK: mvni.2s v0, #0x1, msl #16
+# CHECK: mvni.4s v0, #0x1, msl #8
+# CHECK: mvni.4s v0, #0x1, msl #16
+
+0x00 0x88 0x21 0x2e
+0x00 0x98 0x21 0x2e
+0x00 0x98 0xa1 0x2e
+0x00 0x98 0x21 0x0e
+0x00 0x88 0x21 0x0e
+0x00 0x88 0xa1 0x0e
+0x00 0x98 0xa1 0x0e
+
+# CHECK: frinta.2s	v0, v0
+# CHECK: frintx.2s	v0, v0
+# CHECK: frinti.2s	v0, v0
+# CHECK: frintm.2s	v0, v0
+# CHECK: frintn.2s	v0, v0
+# CHECK: frintp.2s	v0, v0
+# CHECK: frintz.2s	v0, v0
+
+#===-------------------------------------------------------------------------===
+# AdvSIMD scalar x index instructions
+#===-------------------------------------------------------------------------===
+
+0x00 0x18 0xa0 0x5f
+0x00 0x18 0xc0 0x5f
+0x00 0x58 0xa0 0x5f
+0x00 0x58 0xc0 0x5f
+0x00 0x98 0xa0 0x7f
+0x00 0x98 0xc0 0x7f
+0x00 0x98 0xa0 0x5f
+0x00 0x98 0xc0 0x5f
+0x00 0x38 0x70 0x5f
+0x00 0x38 0xa0 0x5f
+0x00 0x78 0x70 0x5f
+0x00 0xc8 0x70 0x5f
+0x00 0xc8 0xa0 0x5f
+0x00 0xb8 0x70 0x5f
+0x00 0xb8 0xa0 0x5f
+0x00 0xd8 0x70 0x5f
+0x00 0xd8 0xa0 0x5f
+
+# CHECK: fmla.s	s0, s0, v0[3]
+# CHECK: fmla.d	d0, d0, v0[1]
+# CHECK: fmls.s	s0, s0, v0[3]
+# CHECK: fmls.d	d0, d0, v0[1]
+# CHECK: fmulx.s	s0, s0, v0[3]
+# CHECK: fmulx.d	d0, d0, v0[1]
+# CHECK: fmul.s	s0, s0, v0[3]
+# CHECK: fmul.d	d0, d0, v0[1]
+# CHECK: sqdmlal.h	s0, h0, v0[7]
+# CHECK: sqdmlal.s	d0, s0, v0[3]
+# CHECK: sqdmlsl.h	s0, h0, v0[7]
+# CHECK: sqdmulh.h	h0, h0, v0[7]
+# CHECK: sqdmulh.s	s0, s0, v0[3]
+# CHECK: sqdmull.h	s0, h0, v0[7]
+# CHECK: sqdmull.s	d0, s0, v0[3]
+# CHECK: sqrdmulh.h	h0, h0, v0[7]
+# CHECK: sqrdmulh.s	s0, s0, v0[3]
+
+#===-------------------------------------------------------------------------===
+# AdvSIMD vector x index instructions
+#===-------------------------------------------------------------------------===
+
+  0x00 0x10 0x80 0x0f
+  0x00 0x10 0xa0 0x4f
+  0x00 0x18 0xc0 0x4f
+  0x00 0x50 0x80 0x0f
+  0x00 0x50 0xa0 0x4f
+  0x00 0x58 0xc0 0x4f
+  0x00 0x90 0x80 0x2f
+  0x00 0x90 0xa0 0x6f
+  0x00 0x98 0xc0 0x6f
+  0x00 0x90 0x80 0x0f
+  0x00 0x90 0xa0 0x4f
+  0x00 0x98 0xc0 0x4f
+  0x00 0x00 0x40 0x2f
+  0x00 0x00 0x50 0x6f
+  0x00 0x08 0x80 0x2f
+  0x00 0x08 0xa0 0x6f
+  0x00 0x40 0x40 0x2f
+  0x00 0x40 0x50 0x6f
+  0x00 0x48 0x80 0x2f
+  0x00 0x48 0xa0 0x6f
+  0x00 0x80 0x40 0x0f
+  0x00 0x80 0x50 0x4f
+  0x00 0x88 0x80 0x0f
+  0x00 0x88 0xa0 0x4f
+  0x00 0x20 0x40 0x0f
+  0x00 0x20 0x50 0x4f
+  0x00 0x28 0x80 0x0f
+  0x00 0x28 0xa0 0x4f
+  0x00 0x60 0x40 0x0f
+  0x00 0x60 0x50 0x4f
+  0x00 0x68 0x80 0x0f
+  0x00 0x68 0xa0 0x4f
+  0x00 0xa0 0x40 0x0f
+  0x00 0xa0 0x50 0x4f
+  0x00 0xa8 0x80 0x0f
+  0x00 0xa8 0xa0 0x4f
+  0x00 0x30 0x40 0x0f
+  0x00 0x30 0x50 0x4f
+  0x00 0x38 0x80 0x0f
+  0x00 0x38 0xa0 0x4f
+  0x00 0x70 0x40 0x0f
+  0x00 0x70 0x50 0x4f
+  0x00 0x78 0x80 0x0f
+  0x00 0x78 0xa0 0x4f
+  0x00 0xc0 0x40 0x0f
+  0x00 0xc0 0x50 0x4f
+  0x00 0xc8 0x80 0x0f
+  0x00 0xc8 0xa0 0x4f
+  0x00 0xb0 0x40 0x0f
+  0x00 0xb0 0x50 0x4f
+  0x00 0xb8 0x80 0x0f
+  0x00 0xb8 0xa0 0x4f
+  0x00 0xd0 0x40 0x0f
+  0x00 0xd0 0x50 0x4f
+  0x00 0xd8 0x80 0x0f
+  0x00 0xd8 0xa0 0x4f
+  0x00 0x20 0x40 0x2f
+  0x00 0x20 0x50 0x6f
+  0x00 0x28 0x80 0x2f
+  0x00 0x28 0xa0 0x6f
+  0x00 0x60 0x40 0x2f
+  0x00 0x60 0x50 0x6f
+  0x00 0x68 0x80 0x2f
+  0x00 0x68 0xa0 0x6f
+  0x00 0xa0 0x40 0x2f
+  0x00 0xa0 0x50 0x6f
+  0x00 0xa8 0x80 0x2f
+  0x00 0xa8 0xa0 0x6f
+
+# CHECK: fmla.2s	v0, v0, v0[0]
+# CHECK: fmla.4s	v0, v0, v0[1]
+# CHECK: fmla.2d	v0, v0, v0[1]
+# CHECK: fmls.2s	v0, v0, v0[0]
+# CHECK: fmls.4s	v0, v0, v0[1]
+# CHECK: fmls.2d	v0, v0, v0[1]
+# CHECK: fmulx.2s	v0, v0, v0[0]
+# CHECK: fmulx.4s	v0, v0, v0[1]
+# CHECK: fmulx.2d	v0, v0, v0[1]
+# CHECK: fmul.2s	v0, v0, v0[0]
+# CHECK: fmul.4s	v0, v0, v0[1]
+# CHECK: fmul.2d	v0, v0, v0[1]
+# CHECK: mla.4h	v0, v0, v0[0]
+# CHECK: mla.8h	v0, v0, v0[1]
+# CHECK: mla.2s	v0, v0, v0[2]
+# CHECK: mla.4s	v0, v0, v0[3]
+# CHECK: mls.4h	v0, v0, v0[0]
+# CHECK: mls.8h	v0, v0, v0[1]
+# CHECK: mls.2s	v0, v0, v0[2]
+# CHECK: mls.4s	v0, v0, v0[3]
+# CHECK: mul.4h	v0, v0, v0[0]
+# CHECK: mul.8h	v0, v0, v0[1]
+# CHECK: mul.2s	v0, v0, v0[2]
+# CHECK: mul.4s	v0, v0, v0[3]
+# CHECK: smlal.4s	v0, v0, v0[0]
+# CHECK: smlal2.4s	v0, v0, v0[1]
+# CHECK: smlal.2d	v0, v0, v0[2]
+# CHECK: smlal2.2d	v0, v0, v0[3]
+# CHECK: smlsl.4s	v0, v0, v0[0]
+# CHECK: smlsl2.4s	v0, v0, v0[1]
+# CHECK: smlsl.2d	v0, v0, v0[2]
+# CHECK: smlsl2.2d	v0, v0, v0[3]
+# CHECK: smull.4s	v0, v0, v0[0]
+# CHECK: smull2.4s	v0, v0, v0[1]
+# CHECK: smull.2d	v0, v0, v0[2]
+# CHECK: smull2.2d	v0, v0, v0[3]
+# CHECK: sqdmlal.4s	v0, v0, v0[0]
+# CHECK: sqdmlal2.4s	v0, v0, v0[1]
+# CHECK: sqdmlal.2d	v0, v0, v0[2]
+# CHECK: sqdmlal2.2d	v0, v0, v0[3]
+# CHECK: sqdmlsl.4s	v0, v0, v0[0]
+# CHECK: sqdmlsl2.4s	v0, v0, v0[1]
+# CHECK: sqdmlsl.2d	v0, v0, v0[2]
+# CHECK: sqdmlsl2.2d	v0, v0, v0[3]
+# CHECK: sqdmulh.4h	v0, v0, v0[0]
+# CHECK: sqdmulh.8h	v0, v0, v0[1]
+# CHECK: sqdmulh.2s	v0, v0, v0[2]
+# CHECK: sqdmulh.4s	v0, v0, v0[3]
+# CHECK: sqdmull.4s	v0, v0, v0[0]
+# CHECK: sqdmull2.4s	v0, v0, v0[1]
+# CHECK: sqdmull.2d	v0, v0, v0[2]
+# CHECK: sqdmull2.2d	v0, v0, v0[3]
+# CHECK: sqrdmulh.4h	v0, v0, v0[0]
+# CHECK: sqrdmulh.8h	v0, v0, v0[1]
+# CHECK: sqrdmulh.2s	v0, v0, v0[2]
+# CHECK: sqrdmulh.4s	v0, v0, v0[3]
+# CHECK: umlal.4s	v0, v0, v0[0]
+# CHECK: umlal2.4s	v0, v0, v0[1]
+# CHECK: umlal.2d	v0, v0, v0[2]
+# CHECK: umlal2.2d	v0, v0, v0[3]
+# CHECK: umlsl.4s	v0, v0, v0[0]
+# CHECK: umlsl2.4s	v0, v0, v0[1]
+# CHECK: umlsl.2d	v0, v0, v0[2]
+# CHECK: umlsl2.2d	v0, v0, v0[3]
+# CHECK: umull.4s	v0, v0, v0[0]
+# CHECK: umull2.4s	v0, v0, v0[1]
+# CHECK: umull.2d	v0, v0, v0[2]
+# CHECK: umull2.2d	v0, v0, v0[3]
+
+
+#===-------------------------------------------------------------------------===
+# AdvSIMD scalar + shift instructions
+#===-------------------------------------------------------------------------===
+
+  0x00 0x54 0x41 0x5f
+  0x00 0x54 0x41 0x7f
+  0x00 0x9c 0x09 0x5f
+  0x00 0x9c 0x12 0x5f
+  0x00 0x9c 0x23 0x5f
+  0x00 0x8c 0x09 0x7f
+  0x00 0x8c 0x12 0x7f
+  0x00 0x8c 0x23 0x7f
+  0x00 0x64 0x09 0x7f
+  0x00 0x64 0x12 0x7f
+  0x00 0x64 0x23 0x7f
+  0x00 0x64 0x44 0x7f
+  0x00 0x74 0x09 0x5f
+  0x00 0x74 0x12 0x5f
+  0x00 0x74 0x23 0x5f
+  0x00 0x74 0x44 0x5f
+  0x00 0x94 0x09 0x5f
+  0x00 0x94 0x12 0x5f
+  0x00 0x94 0x23 0x5f
+  0x00 0x84 0x09 0x7f
+  0x00 0x84 0x12 0x7f
+  0x00 0x84 0x23 0x7f
+  0x00 0x44 0x41 0x7f
+  0x00 0x24 0x41 0x5f
+  0x00 0x34 0x41 0x5f
+  0x00 0x04 0x41 0x5f
+  0x00 0xe4 0x21 0x7f
+  0x00 0xe4 0x42 0x7f
+  0x00 0x9c 0x09 0x7f
+  0x00 0x9c 0x12 0x7f
+  0x00 0x9c 0x23 0x7f
+  0x00 0x74 0x09 0x7f
+  0x00 0x74 0x12 0x7f
+  0x00 0x74 0x23 0x7f
+  0x00 0x74 0x44 0x7f
+  0x00 0x94 0x09 0x7f
+  0x00 0x94 0x12 0x7f
+  0x00 0x94 0x23 0x7f
+  0x00 0x24 0x41 0x7f
+  0x00 0x34 0x41 0x7f
+  0x00 0x04 0x41 0x7f
+  0x00 0x14 0x41 0x7f
+
+# CHECK: shl	d0, d0, #1
+# CHECK: sli	d0, d0, #1
+# CHECK: sqrshrn	b0, h0, #7
+# CHECK: sqrshrn	h0, s0, #14
+# CHECK: sqrshrn	s0, d0, #29
+# CHECK: sqrshrun	b0, h0, #7
+# CHECK: sqrshrun	h0, s0, #14
+# CHECK: sqrshrun	s0, d0, #29
+# CHECK: sqshlu	b0, b0, #1
+# CHECK: sqshlu	h0, h0, #2
+# CHECK: sqshlu	s0, s0, #3
+# CHECK: sqshlu	d0, d0, #4
+# CHECK: sqshl	b0, b0, #1
+# CHECK: sqshl	h0, h0, #2
+# CHECK: sqshl	s0, s0, #3
+# CHECK: sqshl	d0, d0, #4
+# CHECK: sqshrn	b0, h0, #7
+# CHECK: sqshrn	h0, s0, #14
+# CHECK: sqshrn	s0, d0, #29
+# CHECK: sqshrun	b0, h0, #7
+# CHECK: sqshrun	h0, s0, #14
+# CHECK: sqshrun	s0, d0, #29
+# CHECK: sri	d0, d0, #63
+# CHECK: srshr	d0, d0, #63
+# CHECK: srsra	d0, d0, #63
+# CHECK: sshr	d0, d0, #63
+# CHECK: ucvtf	s0, s0, #31
+# CHECK: ucvtf	d0, d0, #62
+# CHECK: uqrshrn	b0, h0, #7
+# CHECK: uqrshrn	h0, s0, #14
+# CHECK: uqrshrn	s0, d0, #29
+# CHECK: uqshl	b0, b0, #1
+# CHECK: uqshl	h0, h0, #2
+# CHECK: uqshl	s0, s0, #3
+# CHECK: uqshl	d0, d0, #4
+# CHECK: uqshrn	b0, h0, #7
+# CHECK: uqshrn	h0, s0, #14
+# CHECK: uqshrn	s0, d0, #29
+# CHECK: urshr	d0, d0, #63
+# CHECK: ursra	d0, d0, #63
+# CHECK: ushr	d0, d0, #63
+# CHECK: usra	d0, d0, #63
+
+#===-------------------------------------------------------------------------===
+# AdvSIMD vector + shift instructions
+#===-------------------------------------------------------------------------===
+
+  0x00 0xfc 0x21 0x0f
+  0x00 0xfc 0x22 0x4f
+  0x00 0xfc 0x43 0x4f
+  0x00 0xfc 0x21 0x2f
+  0x00 0xfc 0x22 0x6f
+  0x00 0xfc 0x43 0x6f
+  0x00 0x8c 0x09 0x0f
+  0x00 0x8c 0x0a 0x4f
+  0x00 0x8c 0x13 0x0f
+  0x00 0x8c 0x14 0x4f
+  0x00 0x8c 0x25 0x0f
+  0x00 0x8c 0x26 0x4f
+  0x00 0xe4 0x21 0x0f
+  0x00 0xe4 0x22 0x4f
+  0x00 0xe4 0x43 0x4f
+  0x00 0x54 0x09 0x0f
+  0x00 0x54 0x0a 0x4f
+  0x00 0x54 0x13 0x0f
+  0x00 0x54 0x14 0x4f
+  0x00 0x54 0x25 0x0f
+  0x00 0x54 0x26 0x4f
+  0x00 0x54 0x47 0x4f
+  0x00 0x84 0x09 0x0f
+  0x00 0x84 0x0a 0x4f
+  0x00 0x84 0x13 0x0f
+  0x00 0x84 0x14 0x4f
+  0x00 0x84 0x25 0x0f
+  0x00 0x84 0x26 0x4f
+  0x00 0x54 0x09 0x2f
+  0x00 0x54 0x0a 0x6f
+  0x00 0x54 0x13 0x2f
+  0x00 0x54 0x14 0x6f
+  0x00 0x54 0x25 0x2f
+  0x00 0x54 0x26 0x6f
+  0x00 0x54 0x47 0x6f
+  0x00 0x9c 0x09 0x0f
+  0x00 0x9c 0x0a 0x4f
+  0x00 0x9c 0x13 0x0f
+  0x00 0x9c 0x14 0x4f
+  0x00 0x9c 0x25 0x0f
+  0x00 0x9c 0x26 0x4f
+  0x00 0x8c 0x09 0x2f
+  0x00 0x8c 0x0a 0x6f
+  0x00 0x8c 0x13 0x2f
+  0x00 0x8c 0x14 0x6f
+  0x00 0x8c 0x25 0x2f
+  0x00 0x8c 0x26 0x6f
+  0x00 0x64 0x09 0x2f
+  0x00 0x64 0x0a 0x6f
+  0x00 0x64 0x13 0x2f
+  0x00 0x64 0x14 0x6f
+  0x00 0x64 0x25 0x2f
+  0x00 0x64 0x26 0x6f
+  0x00 0x64 0x47 0x6f
+  0x00 0x74 0x09 0x0f
+  0x00 0x74 0x0a 0x4f
+  0x00 0x74 0x13 0x0f
+  0x00 0x74 0x14 0x4f
+  0x00 0x74 0x25 0x0f
+  0x00 0x74 0x26 0x4f
+  0x00 0x74 0x47 0x4f
+  0x00 0x94 0x09 0x0f
+  0x00 0x94 0x0a 0x4f
+  0x00 0x94 0x13 0x0f
+  0x00 0x94 0x14 0x4f
+  0x00 0x94 0x25 0x0f
+  0x00 0x94 0x26 0x4f
+  0x00 0x84 0x09 0x2f
+  0x00 0x84 0x0a 0x6f
+  0x00 0x84 0x13 0x2f
+  0x00 0x84 0x14 0x6f
+  0x00 0x84 0x25 0x2f
+  0x00 0x84 0x26 0x6f
+  0x00 0x44 0x09 0x2f
+  0x00 0x44 0x0a 0x6f
+  0x00 0x44 0x13 0x2f
+  0x00 0x44 0x14 0x6f
+  0x00 0x44 0x25 0x2f
+  0x00 0x44 0x26 0x6f
+  0x00 0x44 0x47 0x6f
+  0x00 0x24 0x09 0x0f
+  0x00 0x24 0x0a 0x4f
+  0x00 0x24 0x13 0x0f
+  0x00 0x24 0x14 0x4f
+  0x00 0x24 0x25 0x0f
+  0x00 0x24 0x26 0x4f
+  0x00 0x24 0x47 0x4f
+  0x00 0x34 0x09 0x0f
+  0x00 0x34 0x0a 0x4f
+  0x00 0x34 0x13 0x0f
+  0x00 0x34 0x14 0x4f
+  0x00 0x34 0x25 0x0f
+  0x00 0x34 0x26 0x4f
+  0x00 0x34 0x47 0x4f
+  0x00 0xa4 0x09 0x0f
+  0x00 0xa4 0x0a 0x4f
+  0x00 0xa4 0x13 0x0f
+  0x00 0xa4 0x14 0x4f
+  0x00 0xa4 0x25 0x0f
+  0x00 0xa4 0x26 0x4f
+  0x00 0x04 0x09 0x0f
+  0x00 0x04 0x0a 0x4f
+  0x00 0x04 0x13 0x0f
+  0x00 0x04 0x14 0x4f
+  0x00 0x04 0x25 0x0f
+  0x00 0x04 0x26 0x4f
+  0x00 0x04 0x47 0x4f
+  0x00 0x04 0x09 0x0f
+  0x00 0x14 0x0a 0x4f
+  0x00 0x14 0x13 0x0f
+  0x00 0x14 0x14 0x4f
+  0x00 0x14 0x25 0x0f
+  0x00 0x14 0x26 0x4f
+  0x00 0x14 0x47 0x4f
+  0x00 0x14 0x40 0x5f
+  0x00 0xe4 0x21 0x2f
+  0x00 0xe4 0x22 0x6f
+  0x00 0xe4 0x43 0x6f
+  0x00 0x9c 0x09 0x2f
+  0x00 0x9c 0x0a 0x6f
+  0x00 0x9c 0x13 0x2f
+  0x00 0x9c 0x14 0x6f
+  0x00 0x9c 0x25 0x2f
+  0x00 0x9c 0x26 0x6f
+  0x00 0x74 0x09 0x2f
+  0x00 0x74 0x0a 0x6f
+  0x00 0x74 0x13 0x2f
+  0x00 0x74 0x14 0x6f
+  0x00 0x74 0x25 0x2f
+  0x00 0x74 0x26 0x6f
+  0x00 0x74 0x47 0x6f
+  0x00 0x94 0x09 0x2f
+  0x00 0x94 0x0a 0x6f
+  0x00 0x94 0x13 0x2f
+  0x00 0x94 0x14 0x6f
+  0x00 0x94 0x25 0x2f
+  0x00 0x94 0x26 0x6f
+  0x00 0x24 0x09 0x2f
+  0x00 0x24 0x0a 0x6f
+  0x00 0x24 0x13 0x2f
+  0x00 0x24 0x14 0x6f
+  0x00 0x24 0x25 0x2f
+  0x00 0x24 0x26 0x6f
+  0x00 0x24 0x47 0x6f
+  0x00 0x34 0x09 0x2f
+  0x00 0x34 0x0a 0x6f
+  0x00 0x34 0x13 0x2f
+  0x00 0x34 0x14 0x6f
+  0x00 0x34 0x25 0x2f
+  0x00 0x34 0x26 0x6f
+  0x00 0x34 0x47 0x6f
+  0x00 0xa4 0x09 0x2f
+  0x00 0xa4 0x0a 0x6f
+  0x00 0xa4 0x13 0x2f
+  0x00 0xa4 0x14 0x6f
+  0x00 0xa4 0x25 0x2f
+  0x00 0xa4 0x26 0x6f
+  0x00 0x04 0x09 0x2f
+  0x00 0x04 0x0a 0x6f
+  0x00 0x04 0x13 0x2f
+  0x00 0x04 0x14 0x6f
+  0x00 0x04 0x25 0x2f
+  0x00 0x04 0x26 0x6f
+  0x00 0x04 0x47 0x6f
+  0x00 0x14 0x09 0x2f
+  0x00 0x14 0x0a 0x6f
+  0x00 0x14 0x13 0x2f
+  0x00 0x14 0x14 0x6f
+  0x00 0x14 0x25 0x2f
+  0x00 0x14 0x26 0x6f
+  0x00 0x14 0x47 0x6f
+
+# CHECK: fcvtzs.2s	v0, v0, #31
+# CHECK: fcvtzs.4s	v0, v0, #30
+# CHECK: fcvtzs.2d	v0, v0, #61
+# CHECK: fcvtzu.2s	v0, v0, #31
+# CHECK: fcvtzu.4s	v0, v0, #30
+# CHECK: fcvtzu.2d	v0, v0, #61
+# CHECK: rshrn.8b	v0, v0, #7
+# CHECK: rshrn2.16b	v0, v0, #6
+# CHECK: rshrn.4h	v0, v0, #13
+# CHECK: rshrn2.8h	v0, v0, #12
+# CHECK: rshrn.2s	v0, v0, #27
+# CHECK: rshrn2.4s	v0, v0, #26
+# CHECK: scvtf.2s	v0, v0, #31
+# CHECK: scvtf.4s	v0, v0, #30
+# CHECK: scvtf.2d	v0, v0, #61
+# CHECK: shl.8b	v0, v0, #1
+# CHECK: shl.16b	v0, v0, #2
+# CHECK: shl.4h	v0, v0, #3
+# CHECK: shl.8h	v0, v0, #4
+# CHECK: shl.2s	v0, v0, #5
+# CHECK: shl.4s	v0, v0, #6
+# CHECK: shl.2d	v0, v0, #7
+# CHECK: shrn.8b	v0, v0, #7
+# CHECK: shrn2.16b	v0, v0, #6
+# CHECK: shrn.4h	v0, v0, #13
+# CHECK: shrn2.8h	v0, v0, #12
+# CHECK: shrn.2s	v0, v0, #27
+# CHECK: shrn2.4s	v0, v0, #26
+# CHECK: sli.8b	v0, v0, #1
+# CHECK: sli.16b	v0, v0, #2
+# CHECK: sli.4h	v0, v0, #3
+# CHECK: sli.8h	v0, v0, #4
+# CHECK: sli.2s	v0, v0, #5
+# CHECK: sli.4s	v0, v0, #6
+# CHECK: sli.2d	v0, v0, #7
+# CHECK: sqrshrn.8b	v0, v0, #7
+# CHECK: sqrshrn2.16b	v0, v0, #6
+# CHECK: sqrshrn.4h	v0, v0, #13
+# CHECK: sqrshrn2.8h	v0, v0, #12
+# CHECK: sqrshrn.2s	v0, v0, #27
+# CHECK: sqrshrn2.4s	v0, v0, #26
+# CHECK: sqrshrun.8b	v0, v0, #7
+# CHECK: sqrshrun2.16b	v0, v0, #6
+# CHECK: sqrshrun.4h	v0, v0, #13
+# CHECK: sqrshrun2.8h	v0, v0, #12
+# CHECK: sqrshrun.2s	v0, v0, #27
+# CHECK: sqrshrun2.4s	v0, v0, #26
+# CHECK: sqshlu.8b	v0, v0, #1
+# CHECK: sqshlu.16b	v0, v0, #2
+# CHECK: sqshlu.4h	v0, v0, #3
+# CHECK: sqshlu.8h	v0, v0, #4
+# CHECK: sqshlu.2s	v0, v0, #5
+# CHECK: sqshlu.4s	v0, v0, #6
+# CHECK: sqshlu.2d	v0, v0, #7
+# CHECK: sqshl.8b	v0, v0, #1
+# CHECK: sqshl.16b	v0, v0, #2
+# CHECK: sqshl.4h	v0, v0, #3
+# CHECK: sqshl.8h	v0, v0, #4
+# CHECK: sqshl.2s	v0, v0, #5
+# CHECK: sqshl.4s	v0, v0, #6
+# CHECK: sqshl.2d	v0, v0, #7
+# CHECK: sqshrn.8b	v0, v0, #7
+# CHECK: sqshrn2.16b	v0, v0, #6
+# CHECK: sqshrn.4h	v0, v0, #13
+# CHECK: sqshrn2.8h	v0, v0, #12
+# CHECK: sqshrn.2s	v0, v0, #27
+# CHECK: sqshrn2.4s	v0, v0, #26
+# CHECK: sqshrun.8b	v0, v0, #7
+# CHECK: sqshrun2.16b	v0, v0, #6
+# CHECK: sqshrun.4h	v0, v0, #13
+# CHECK: sqshrun2.8h	v0, v0, #12
+# CHECK: sqshrun.2s	v0, v0, #27
+# CHECK: sqshrun2.4s	v0, v0, #26
+# CHECK: sri.8b	v0, v0, #7
+# CHECK: sri.16b	v0, v0, #6
+# CHECK: sri.4h	v0, v0, #13
+# CHECK: sri.8h	v0, v0, #12
+# CHECK: sri.2s	v0, v0, #27
+# CHECK: sri.4s	v0, v0, #26
+# CHECK: sri.2d	v0, v0, #57
+# CHECK: srshr.8b	v0, v0, #7
+# CHECK: srshr.16b	v0, v0, #6
+# CHECK: srshr.4h	v0, v0, #13
+# CHECK: srshr.8h	v0, v0, #12
+# CHECK: srshr.2s	v0, v0, #27
+# CHECK: srshr.4s	v0, v0, #26
+# CHECK: srshr.2d	v0, v0, #57
+# CHECK: srsra.8b	v0, v0, #7
+# CHECK: srsra.16b	v0, v0, #6
+# CHECK: srsra.4h	v0, v0, #13
+# CHECK: srsra.8h	v0, v0, #12
+# CHECK: srsra.2s	v0, v0, #27
+# CHECK: srsra.4s	v0, v0, #26
+# CHECK: srsra.2d	v0, v0, #57
+# CHECK: sshll.8h	v0, v0, #1
+# CHECK: sshll2.8h	v0, v0, #2
+# CHECK: sshll.4s	v0, v0, #3
+# CHECK: sshll2.4s	v0, v0, #4
+# CHECK: sshll.2d	v0, v0, #5
+# CHECK: sshll2.2d	v0, v0, #6
+# CHECK: sshr.8b	v0, v0, #7
+# CHECK: sshr.16b	v0, v0, #6
+# CHECK: sshr.4h	v0, v0, #13
+# CHECK: sshr.8h	v0, v0, #12
+# CHECK: sshr.2s	v0, v0, #27
+# CHECK: sshr.4s	v0, v0, #26
+# CHECK: sshr.2d	v0, v0, #57
+# CHECK: sshr.8b	v0, v0, #7
+# CHECK: ssra.16b	v0, v0, #6
+# CHECK: ssra.4h	v0, v0, #13
+# CHECK: ssra.8h	v0, v0, #12
+# CHECK: ssra.2s	v0, v0, #27
+# CHECK: ssra.4s	v0, v0, #26
+# CHECK: ssra.2d	v0, v0, #57
+# CHECK: ssra		d0, d0, #64
+# CHECK: ucvtf.2s	v0, v0, #31
+# CHECK: ucvtf.4s	v0, v0, #30
+# CHECK: ucvtf.2d	v0, v0, #61
+# CHECK: uqrshrn.8b	v0, v0, #7
+# CHECK: uqrshrn2.16b	v0, v0, #6
+# CHECK: uqrshrn.4h	v0, v0, #13
+# CHECK: uqrshrn2.8h	v0, v0, #12
+# CHECK: uqrshrn.2s	v0, v0, #27
+# CHECK: uqrshrn2.4s	v0, v0, #26
+# CHECK: uqshl.8b	v0, v0, #1
+# CHECK: uqshl.16b	v0, v0, #2
+# CHECK: uqshl.4h	v0, v0, #3
+# CHECK: uqshl.8h	v0, v0, #4
+# CHECK: uqshl.2s	v0, v0, #5
+# CHECK: uqshl.4s	v0, v0, #6
+# CHECK: uqshl.2d	v0, v0, #7
+# CHECK: uqshrn.8b	v0, v0, #7
+# CHECK: uqshrn2.16b	v0, v0, #6
+# CHECK: uqshrn.4h	v0, v0, #13
+# CHECK: uqshrn2.8h	v0, v0, #12
+# CHECK: uqshrn.2s	v0, v0, #27
+# CHECK: uqshrn2.4s	v0, v0, #26
+# CHECK: urshr.8b	v0, v0, #7
+# CHECK: urshr.16b	v0, v0, #6
+# CHECK: urshr.4h	v0, v0, #13
+# CHECK: urshr.8h	v0, v0, #12
+# CHECK: urshr.2s	v0, v0, #27
+# CHECK: urshr.4s	v0, v0, #26
+# CHECK: urshr.2d	v0, v0, #57
+# CHECK: ursra.8b	v0, v0, #7
+# CHECK: ursra.16b	v0, v0, #6
+# CHECK: ursra.4h	v0, v0, #13
+# CHECK: ursra.8h	v0, v0, #12
+# CHECK: ursra.2s	v0, v0, #27
+# CHECK: ursra.4s	v0, v0, #26
+# CHECK: ursra.2d	v0, v0, #57
+# CHECK: ushll.8h	v0, v0, #1
+# CHECK: ushll2.8h	v0, v0, #2
+# CHECK: ushll.4s	v0, v0, #3
+# CHECK: ushll2.4s	v0, v0, #4
+# CHECK: ushll.2d	v0, v0, #5
+# CHECK: ushll2.2d	v0, v0, #6
+# CHECK: ushr.8b	v0, v0, #7
+# CHECK: ushr.16b	v0, v0, #6
+# CHECK: ushr.4h	v0, v0, #13
+# CHECK: ushr.8h	v0, v0, #12
+# CHECK: ushr.2s	v0, v0, #27
+# CHECK: ushr.4s	v0, v0, #26
+# CHECK: ushr.2d	v0, v0, #57
+# CHECK: usra.8b	v0, v0, #7
+# CHECK: usra.16b	v0, v0, #6
+# CHECK: usra.4h	v0, v0, #13
+# CHECK: usra.8h	v0, v0, #12
+# CHECK: usra.2s	v0, v0, #27
+# CHECK: usra.4s	v0, v0, #26
+# CHECK: usra.2d	v0, v0, #57
+
+
+  0x00 0xe0 0x20 0x0e
+  0x00 0xe0 0x20 0x4e
+  0x00 0xe0 0xe0 0x0e
+  0x00 0xe0 0xe0 0x4e
+
+# CHECK: pmull.8h v0, v0, v0
+# CHECK: pmull2.8h v0, v0, v0
+# CHECK: pmull.1q v0, v0, v0
+# CHECK: pmull2.1q v0, v0, v0
+
+  0x41 0xd8 0x70 0x7e
+  0x83 0xd8 0x30 0x7e
+# CHECK: faddp.2d	d1, v2
+# CHECK: faddp.2s	s3, v4
+
+  0x82 0x60 0x01 0x4e
+  0x80 0x60 0x01 0x0e
+  0xa2 0x00 0x01 0x4e
+  0xa0 0x00 0x01 0x0e
+  0xa2 0x40 0x01 0x4e
+  0xa0 0x40 0x01 0x0e
+  0xc2 0x20 0x01 0x4e
+  0xc0 0x20 0x01 0x0e
+
+# CHECK: tbl.16b	v2, { v4, v5, v6, v7 }, v1
+# CHECK: tbl.8b	v0, { v4, v5, v6, v7 }, v1
+# CHECK: tbl.16b	v2, { v5 }, v1
+# CHECK: tbl.8b	v0, { v5 }, v1
+# CHECK: tbl.16b	v2, { v5, v6, v7 }, v1
+# CHECK: tbl.8b	v0, { v5, v6, v7 }, v1
+# CHECK: tbl.16b	v2, { v6, v7 }, v1
+# CHECK: tbl.8b	v0, { v6, v7 }, v1
+#
+  0x82 0x70 0x01 0x4e
+  0x80 0x70 0x01 0x0e
+  0xa2 0x10 0x01 0x4e
+  0xa0 0x10 0x01 0x0e
+  0xa2 0x50 0x01 0x4e
+  0xa0 0x50 0x01 0x0e
+  0xc2 0x30 0x01 0x4e
+  0xc0 0x30 0x01 0x0e
+
+# CHECK: tbx.16b	v2, { v4, v5, v6, v7 }, v1
+# CHECK: tbx.8b	v0, { v4, v5, v6, v7 }, v1
+# CHECK: tbx.16b	v2, { v5 }, v1
+# CHECK: tbx.8b	v0, { v5 }, v1
+# CHECK: tbx.16b	v2, { v5, v6, v7 }, v1
+# CHECK: tbx.8b	v0, { v5, v6, v7 }, v1
+# CHECK: tbx.16b	v2, { v6, v7 }, v1
+# CHECK: tbx.8b	v0, { v6, v7 }, v1
+#
+
+0x00 0x80 0x20 0x0e
+0x00 0x80 0x20 0x4e
+0x00 0x80 0xa0 0x0e
+0x00 0x80 0xa0 0x4e
+
+# CHECK: smlal.8h v0, v0, v0
+# CHECK: smlal2.8h v0, v0, v0
+# CHECK: smlal.2d v0, v0, v0
+# CHECK: smlal2.2d v0, v0, v0
+
+0x00 0x80 0x20 0x2e
+0x00 0x80 0x20 0x6e
+0x00 0x80 0xa0 0x2e
+0x00 0x80 0xa0 0x6e
+
+# CHECK: umlal.8h v0, v0, v0
+# CHECK: umlal2.8h v0, v0, v0
+# CHECK: umlal.2d v0, v0, v0
+# CHECK: umlal2.2d v0, v0, v0
+
+0x00 0x90 0x60 0x5e
+0x00 0x90 0xa0 0x5e
+0x00 0xb0 0x60 0x5e
+0x00 0xb0 0xa0 0x5e
+
+# CHECK: sqdmlal s0, h0, h0
+# CHECK: sqdmlal d0, s0, s0
+# CHECK: sqdmlsl s0, h0, h0
+# CHECK: sqdmlsl d0, s0, s0
+
+0xaa 0xc5 0xc7 0x4d
+0xaa 0xc9 0xc7 0x4d
+0xaa 0xc1 0xc7 0x4d
+
+# CHECK: ld1r.8h { v10 }, [x13], x7
+# CHECK: ld1r.4s { v10 }, [x13], x7
+# CHECK: ld1r.16b { v10 }, [x13], x7
+
+0x00 0xd0 0x60 0x5e
+0x00 0xd0 0xa0 0x5e
+# CHECK: sqdmull	s0, h0, h0
+# CHECK: sqdmull	d0, s0, s0
+
+0x00 0xd8 0xa1 0x7e
+0x00 0xd8 0xe1 0x7e
+
+# CHECK: frsqrte s0, s0
+# CHECK: frsqrte d0, d0
+
+0xca 0xcd 0xc7 0x4d
+0xea 0xc9 0xe7 0x4d
+0xea 0xe9 0xc7 0x4d
+0xea 0xe9 0xe7 0x4d
+# CHECK: ld1r.2d	{ v10 }, [x14], x7
+# CHECK: ld2r.4s	{ v10, v11 }, [x15], x7
+# CHECK: ld3r.4s	{ v10, v11, v12 }, [x15], x7
+# CHECK: ld4r.4s	{ v10, v11, v12, v13 }, [x15], x7
+
+#===-------------------------------------------------------------------------===
+# AdvSIMD scalar three same
+#===-------------------------------------------------------------------------===
+0x62 0xdc 0x21 0x5e
+# CHECK: fmulx	s2, s3, s1
+0x62 0xdc 0x61 0x5e
+# CHECK: fmulx	d2, d3, d1
+
+
+# rdar://12511369
+0xe8 0x6b 0xdf 0x4c
+# CHECK: ld1.4s	{ v8, v9, v10 }, [sp], #48
diff --git a/test/MC/Disassembler/AArch64/arm64-arithmetic.txt b/test/MC/Disassembler/AArch64/arm64-arithmetic.txt
new file mode 100644
index 0000000..bd870ed
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/arm64-arithmetic.txt
@@ -0,0 +1,526 @@
+# RUN: llvm-mc -triple arm64-apple-darwin --disassemble < %s | FileCheck %s
+
+#==---------------------------------------------------------------------------==
+# Add/Subtract with carry/borrow
+#==---------------------------------------------------------------------------==
+
+0x41 0x00 0x03 0x1a
+0x41 0x00 0x03 0x9a
+0x85 0x00 0x03 0x3a
+0x85 0x00 0x03 0xba
+
+# CHECK: adc  w1, w2, w3
+# CHECK: adc  x1, x2, x3
+# CHECK: adcs w5, w4, w3
+# CHECK: adcs x5, x4, x3
+
+0x41 0x00 0x03 0x5a
+0x41 0x00 0x03 0xda
+0x41 0x00 0x03 0x7a
+0x41 0x00 0x03 0xfa
+
+# CHECK: sbc  w1, w2, w3
+# CHECK: sbc  x1, x2, x3
+# CHECK: sbcs w1, w2, w3
+# CHECK: sbcs x1, x2, x3
+
+#==---------------------------------------------------------------------------==
+# Add/Subtract with (optionally shifted) immediate
+#==---------------------------------------------------------------------------==
+
+0x83 0x00 0x10 0x11
+0x83 0x00 0x10 0x91
+
+# CHECK: add w3, w4, #1024
+# CHECK: add x3, x4, #1024
+
+0x83 0x00 0x50 0x11
+0x83 0x00 0x40 0x11
+0x83 0x00 0x50 0x91
+0x83 0x00 0x40 0x91
+0xff 0x83 0x00 0x91
+
+# CHECK: add w3, w4, #1024, lsl #12
+# CHECK: add x3, x4, #1024, lsl #12
+# CHECK: add x3, x4, #0, lsl #12
+# CHECK: add sp, sp, #32
+
+0x83 0x00 0x10 0x31
+0x83 0x00 0x50 0x31
+0x83 0x00 0x10 0xb1
+0x83 0x00 0x50 0xb1
+0xff 0x83 0x00 0xb1
+
+# CHECK: adds w3, w4, #1024
+# CHECK: adds w3, w4, #1024, lsl #12
+# CHECK: adds x3, x4, #1024
+# CHECK: adds x3, x4, #1024, lsl #12
+# CHECK: cmn  sp, #32
+
+0x83 0x00 0x10 0x51
+0x83 0x00 0x50 0x51
+0x83 0x00 0x10 0xd1
+0x83 0x00 0x50 0xd1
+0xff 0x83 0x00 0xd1
+
+# CHECK: sub w3, w4, #1024
+# CHECK: sub w3, w4, #1024, lsl #12
+# CHECK: sub x3, x4, #1024
+# CHECK: sub x3, x4, #1024, lsl #12
+# CHECK: sub sp, sp, #32
+
+0x83 0x00 0x10 0x71
+0x83 0x00 0x50 0x71
+0x83 0x00 0x10 0xf1
+0x83 0x00 0x50 0xf1
+0xff 0x83 0x00 0xf1
+
+# CHECK: subs w3, w4, #1024
+# CHECK: subs w3, w4, #1024, lsl #12
+# CHECK: subs x3, x4, #1024
+# CHECK: subs x3, x4, #1024, lsl #12
+# CHECK: cmp  sp, #32
+
+#==---------------------------------------------------------------------------==
+# Add/Subtract register with (optional) shift
+#==---------------------------------------------------------------------------==
+
+0xac 0x01 0x0e 0x0b
+0xac 0x01 0x0e 0x8b
+0xac 0x31 0x0e 0x0b
+0xac 0x31 0x0e 0x8b
+0xac 0x29 0x4e 0x0b
+0xac 0x29 0x4e 0x8b
+0xac 0x1d 0x8e 0x0b
+0xac 0x9d 0x8e 0x8b
+
+# CHECK: add w12, w13, w14
+# CHECK: add x12, x13, x14
+# CHECK: add w12, w13, w14, lsl #12
+# CHECK: add x12, x13, x14, lsl #12
+# CHECK: add w12, w13, w14, lsr #10
+# CHECK: add x12, x13, x14, lsr #10
+# CHECK: add w12, w13, w14, asr #7
+# CHECK: add x12, x13, x14, asr #39
+
+0xac 0x01 0x0e 0x4b
+0xac 0x01 0x0e 0xcb
+0xac 0x31 0x0e 0x4b
+0xac 0x31 0x0e 0xcb
+0xac 0x29 0x4e 0x4b
+0xac 0x29 0x4e 0xcb
+0xac 0x1d 0x8e 0x4b
+0xac 0x9d 0x8e 0xcb
+
+# CHECK: sub w12, w13, w14
+# CHECK: sub x12, x13, x14
+# CHECK: sub w12, w13, w14, lsl #12
+# CHECK: sub x12, x13, x14, lsl #12
+# CHECK: sub w12, w13, w14, lsr #10
+# CHECK: sub x12, x13, x14, lsr #10
+# CHECK: sub w12, w13, w14, asr #7
+# CHECK: sub x12, x13, x14, asr #39
+
+0xac 0x01 0x0e 0x2b
+0xac 0x01 0x0e 0xab
+0xac 0x31 0x0e 0x2b
+0xac 0x31 0x0e 0xab
+0xac 0x29 0x4e 0x2b
+0xac 0x29 0x4e 0xab
+0xac 0x1d 0x8e 0x2b
+0xac 0x9d 0x8e 0xab
+
+# CHECK: adds w12, w13, w14
+# CHECK: adds x12, x13, x14
+# CHECK: adds w12, w13, w14, lsl #12
+# CHECK: adds x12, x13, x14, lsl #12
+# CHECK: adds w12, w13, w14, lsr #10
+# CHECK: adds x12, x13, x14, lsr #10
+# CHECK: adds w12, w13, w14, asr #7
+# CHECK: adds x12, x13, x14, asr #39
+
+0xac 0x01 0x0e 0x6b
+0xac 0x01 0x0e 0xeb
+0xac 0x31 0x0e 0x6b
+0xac 0x31 0x0e 0xeb
+0xac 0x29 0x4e 0x6b
+0xac 0x29 0x4e 0xeb
+0xac 0x1d 0x8e 0x6b
+0xac 0x9d 0x8e 0xeb
+
+# CHECK: subs w12, w13, w14
+# CHECK: subs x12, x13, x14
+# CHECK: subs w12, w13, w14, lsl #12
+# CHECK: subs x12, x13, x14, lsl #12
+# CHECK: subs w12, w13, w14, lsr #10
+# CHECK: subs x12, x13, x14, lsr #10
+# CHECK: subs w12, w13, w14, asr #7
+# CHECK: subs x12, x13, x14, asr #39
+
+#==---------------------------------------------------------------------------==
+# Add/Subtract with (optional) extend
+#==---------------------------------------------------------------------------==
+
+0x41 0x00 0x23 0x0b
+0x41 0x20 0x23 0x0b
+0x41 0x40 0x23 0x0b
+0x41 0x60 0x23 0x0b
+0x41 0x80 0x23 0x0b
+0x41 0xa0 0x23 0x0b
+0x41 0xc0 0x23 0x0b
+0x41 0xe0 0x23 0x0b
+
+# CHECK: add w1, w2, w3, uxtb
+# CHECK: add w1, w2, w3, uxth
+# CHECK: add w1, w2, w3
+# CHECK: add w1, w2, w3, uxtx
+# CHECK: add w1, w2, w3, sxtb
+# CHECK: add w1, w2, w3, sxth
+# CHECK: add w1, w2, w3, sxtw
+# CHECK: add w1, w2, w3, sxtx
+
+0x41 0x00 0x23 0x8b
+0x41 0x20 0x23 0x8b
+0x41 0x40 0x23 0x8b
+0x41 0x80 0x23 0x8b
+0x41 0xa0 0x23 0x8b
+0x41 0xc0 0x23 0x8b
+
+# CHECK: add x1, x2, w3, uxtb
+# CHECK: add x1, x2, w3, uxth
+# CHECK: add x1, x2, w3, uxtw
+# CHECK: add x1, x2, w3, sxtb
+# CHECK: add x1, x2, w3, sxth
+# CHECK: add x1, x2, w3, sxtw
+
+0xe1 0x43 0x23 0x0b
+0xe1 0x43 0x23 0x0b
+0x5f 0x60 0x23 0x8b
+0x5f 0x60 0x23 0x8b
+
+# CHECK: add w1, wsp, w3
+# CHECK: add w1, wsp, w3
+# CHECK: add sp, x2, x3
+# CHECK: add sp, x2, x3
+
+0x41 0x00 0x23 0x4b
+0x41 0x20 0x23 0x4b
+0x41 0x40 0x23 0x4b
+0x41 0x60 0x23 0x4b
+0x41 0x80 0x23 0x4b
+0x41 0xa0 0x23 0x4b
+0x41 0xc0 0x23 0x4b
+0x41 0xe0 0x23 0x4b
+
+# CHECK: sub w1, w2, w3, uxtb
+# CHECK: sub w1, w2, w3, uxth
+# CHECK: sub w1, w2, w3
+# CHECK: sub w1, w2, w3, uxtx
+# CHECK: sub w1, w2, w3, sxtb
+# CHECK: sub w1, w2, w3, sxth
+# CHECK: sub w1, w2, w3, sxtw
+# CHECK: sub w1, w2, w3, sxtx
+
+0x41 0x00 0x23 0xcb
+0x41 0x20 0x23 0xcb
+0x41 0x40 0x23 0xcb
+0x41 0x80 0x23 0xcb
+0x41 0xa0 0x23 0xcb
+0x41 0xc0 0x23 0xcb
+
+# CHECK: sub x1, x2, w3, uxtb
+# CHECK: sub x1, x2, w3, uxth
+# CHECK: sub x1, x2, w3, uxtw
+# CHECK: sub x1, x2, w3, sxtb
+# CHECK: sub x1, x2, w3, sxth
+# CHECK: sub x1, x2, w3, sxtw
+
+0xe1 0x43 0x23 0x4b
+0xe1 0x43 0x23 0x4b
+0x5f 0x60 0x23 0xcb
+0x5f 0x60 0x23 0xcb
+
+# CHECK: sub w1, wsp, w3
+# CHECK: sub w1, wsp, w3
+# CHECK: sub sp, x2, x3
+# CHECK: sub sp, x2, x3
+
+0x41 0x00 0x23 0x2b
+0x41 0x20 0x23 0x2b
+0x41 0x40 0x23 0x2b
+0x41 0x60 0x23 0x2b
+0x41 0x80 0x23 0x2b
+0x41 0xa0 0x23 0x2b
+0x41 0xc0 0x23 0x2b
+0x41 0xe0 0x23 0x2b
+
+# CHECK: adds w1, w2, w3, uxtb
+# CHECK: adds w1, w2, w3, uxth
+# CHECK: adds w1, w2, w3
+# CHECK: adds w1, w2, w3, uxtx
+# CHECK: adds w1, w2, w3, sxtb
+# CHECK: adds w1, w2, w3, sxth
+# CHECK: adds w1, w2, w3, sxtw
+# CHECK: adds w1, w2, w3, sxtx
+
+0x41 0x00 0x23 0xab
+0x41 0x20 0x23 0xab
+0x41 0x40 0x23 0xab
+0x41 0x80 0x23 0xab
+0x41 0xa0 0x23 0xab
+0x41 0xc0 0x23 0xab
+
+# CHECK: adds x1, x2, w3, uxtb
+# CHECK: adds x1, x2, w3, uxth
+# CHECK: adds x1, x2, w3, uxtw
+# CHECK: adds x1, x2, w3, sxtb
+# CHECK: adds x1, x2, w3, sxth
+# CHECK: adds x1, x2, w3, sxtw
+
+0xe1 0x43 0x23 0x2b
+0xe1 0x43 0x23 0x2b
+
+# CHECK: adds w1, wsp, w3
+# CHECK: adds w1, wsp, w3
+
+0x41 0x00 0x23 0x6b
+0x41 0x20 0x23 0x6b
+0x41 0x40 0x23 0x6b
+0x41 0x60 0x23 0x6b
+0x41 0x80 0x23 0x6b
+0x41 0xa0 0x23 0x6b
+0x41 0xc0 0x23 0x6b
+0x41 0xe0 0x23 0x6b
+
+# CHECK: subs w1, w2, w3, uxtb
+# CHECK: subs w1, w2, w3, uxth
+# CHECK: subs w1, w2, w3
+# CHECK: subs w1, w2, w3, uxtx
+# CHECK: subs w1, w2, w3, sxtb
+# CHECK: subs w1, w2, w3, sxth
+# CHECK: subs w1, w2, w3, sxtw
+# CHECK: subs w1, w2, w3, sxtx
+
+0x41 0x00 0x23 0xeb
+0x41 0x20 0x23 0xeb
+0x41 0x40 0x23 0xeb
+0x41 0x80 0x23 0xeb
+0x41 0xa0 0x23 0xeb
+0x41 0xc0 0x23 0xeb
+
+# CHECK: subs x1, x2, w3, uxtb
+# CHECK: subs x1, x2, w3, uxth
+# CHECK: subs x1, x2, w3, uxtw
+# CHECK: subs x1, x2, w3, sxtb
+# CHECK: subs x1, x2, w3, sxth
+# CHECK: subs x1, x2, w3, sxtw
+
+0xe1 0x43 0x23 0x6b
+0xe1 0x43 0x23 0x6b
+
+# CHECK: subs w1, wsp, w3
+# CHECK: subs w1, wsp, w3
+
+0x1f 0x41 0x28 0xeb
+0x3f 0x41 0x28 0x6b
+0xff 0x43 0x28 0x6b
+0xff 0x43 0x28 0xeb
+
+# CHECK: cmp x8, w8, uxtw
+# CHECK: cmp w9, w8, uxtw
+# CHECK: cmp wsp, w8
+# CHECK: cmp sp, w8
+
+0x3f 0x41 0x28 0x4b
+0xe1 0x43 0x28 0x4b
+0xff 0x43 0x28 0x4b
+0x3f 0x41 0x28 0xcb
+0xe1 0x43 0x28 0xcb
+0xff 0x43 0x28 0xcb
+0xe1 0x43 0x28 0x6b
+0xe1 0x43 0x28 0xeb
+
+# CHECK: sub wsp, w9, w8
+# CHECK: sub w1, wsp, w8
+# CHECK: sub wsp, wsp, w8
+# CHECK: sub sp, x9, w8
+# CHECK: sub x1, sp, w8
+# CHECK: sub sp, sp, w8
+# CHECK: subs w1, wsp, w8
+# CHECK: subs x1, sp, w8
+
+#==---------------------------------------------------------------------------==
+# Signed/Unsigned divide
+#==---------------------------------------------------------------------------==
+
+0x41 0x0c 0xc3 0x1a
+0x41 0x0c 0xc3 0x9a
+0x41 0x08 0xc3 0x1a
+0x41 0x08 0xc3 0x9a
+
+# CHECK: sdiv w1, w2, w3
+# CHECK: sdiv x1, x2, x3
+# CHECK: udiv w1, w2, w3
+# CHECK: udiv x1, x2, x3
+
+#==---------------------------------------------------------------------------==
+# Variable shifts
+#==---------------------------------------------------------------------------==
+
+  0x41 0x28 0xc3 0x1a
+# CHECK: asr w1, w2, w3
+  0x41 0x28 0xc3 0x9a
+# CHECK: asr x1, x2, x3
+  0x41 0x20 0xc3 0x1a
+# CHECK: lsl w1, w2, w3
+  0x41 0x20 0xc3 0x9a
+# CHECK: lsl x1, x2, x3
+  0x41 0x24 0xc3 0x1a
+# CHECK: lsr w1, w2, w3
+  0x41 0x24 0xc3 0x9a
+# CHECK: lsr x1, x2, x3
+  0x41 0x2c 0xc3 0x1a
+# CHECK: ror w1, w2, w3
+  0x41 0x2c 0xc3 0x9a
+# CHECK: ror x1, x2, x3
+
+#==---------------------------------------------------------------------------==
+# One operand instructions
+#==---------------------------------------------------------------------------==
+
+  0x41 0x14 0xc0 0x5a
+# CHECK: cls w1, w2
+  0x41 0x14 0xc0 0xda
+# CHECK: cls x1, x2
+  0x41 0x10 0xc0 0x5a
+# CHECK: clz w1, w2
+  0x41 0x10 0xc0 0xda
+# CHECK: clz x1, x2
+  0x41 0x00 0xc0 0x5a
+# CHECK: rbit w1, w2
+  0x41 0x00 0xc0 0xda
+# CHECK: rbit x1, x2
+  0x41 0x08 0xc0 0x5a
+# CHECK: rev w1, w2
+  0x41 0x0c 0xc0 0xda
+# CHECK: rev x1, x2
+  0x41 0x04 0xc0 0x5a
+# CHECK: rev16 w1, w2
+  0x41 0x04 0xc0 0xda
+# CHECK: rev16 x1, x2
+  0x41 0x08 0xc0 0xda
+# CHECK: rev32 x1, x2
+
+#==---------------------------------------------------------------------------==
+# 6.6.1 Multiply-add instructions
+#==---------------------------------------------------------------------------==
+
+0x41 0x10 0x03 0x1b
+0x41 0x10 0x03 0x9b
+0x41 0x90 0x03 0x1b
+0x41 0x90 0x03 0x9b
+0x41 0x10 0x23 0x9b
+0x41 0x90 0x23 0x9b
+0x41 0x10 0xa3 0x9b
+0x41 0x90 0xa3 0x9b
+
+# CHECK: madd   w1, w2, w3, w4
+# CHECK: madd   x1, x2, x3, x4
+# CHECK: msub   w1, w2, w3, w4
+# CHECK: msub   x1, x2, x3, x4
+# CHECK: smaddl x1, w2, w3, x4
+# CHECK: smsubl x1, w2, w3, x4
+# CHECK: umaddl x1, w2, w3, x4
+# CHECK: umsubl x1, w2, w3, x4
+
+#==---------------------------------------------------------------------------==
+# Multiply-high instructions
+#==---------------------------------------------------------------------------==
+
+0x41 0x7c 0x43 0x9b
+0x41 0x7c 0xc3 0x9b
+
+# CHECK: smulh x1, x2, x3
+# CHECK: umulh x1, x2, x3
+
+#==---------------------------------------------------------------------------==
+# Move immediate instructions
+#==---------------------------------------------------------------------------==
+
+0x20 0x00 0x80 0x52
+0x20 0x00 0x80 0xd2
+0x20 0x00 0xa0 0x52
+0x20 0x00 0xa0 0xd2
+
+# CHECK: movz w0, #0x1
+# CHECK: movz x0, #0x1
+# CHECK: movz w0, #0x1, lsl #16
+# CHECK: movz x0, #0x1, lsl #16
+
+0x40 0x00 0x80 0x12
+0x40 0x00 0x80 0x92
+0x40 0x00 0xa0 0x12
+0x40 0x00 0xa0 0x92
+
+# CHECK: movn w0, #0x2
+# CHECK: movn x0, #0x2
+# CHECK: movn w0, #0x2, lsl #16
+# CHECK: movn x0, #0x2, lsl #16
+
+0x20 0x00 0x80 0x72
+0x20 0x00 0x80 0xf2
+0x20 0x00 0xa0 0x72
+0x20 0x00 0xa0 0xf2
+
+# CHECK: movk w0, #0x1
+# CHECK: movk x0, #0x1
+# CHECK: movk w0, #0x1, lsl #16
+# CHECK: movk x0, #0x1, lsl #16
+
+#==---------------------------------------------------------------------------==
+# Conditionally set flags instructions
+#==---------------------------------------------------------------------------==
+
+  0x1f 0x00 0x00 0x31
+# CHECK: cmn w0, #0
+  0x1f 0xfc 0x03 0xb1
+# CHECK: x0, #255
+
+  0x23 0x08 0x42 0x3a
+# CHECK: ccmn w1, #2, #3, eq
+  0x23 0x08 0x42 0xba
+# CHECK: ccmn x1, #2, #3, eq
+  0x23 0x08 0x42 0x7a
+# CHECK: ccmp w1, #2, #3, eq
+  0x23 0x08 0x42 0xfa
+# CHECK: ccmp x1, #2, #3, eq
+
+  0x23 0x00 0x42 0x3a
+# CHECK: ccmn w1, w2, #3, eq
+  0x23 0x00 0x42 0xba
+# CHECK: ccmn x1, x2, #3, eq
+  0x23 0x00 0x42 0x7a
+# CHECK: ccmp w1, w2, #3, eq
+  0x23 0x00 0x42 0xfa
+# CHECK: ccmp x1, x2, #3, eq
+
+#==---------------------------------------------------------------------------==
+# Conditional select instructions
+#==---------------------------------------------------------------------------==
+
+  0x41 0x00 0x83 0x1a
+# CHECK: csel w1, w2, w3, eq
+  0x41 0x00 0x83 0x9a
+# CHECK: csel x1, x2, x3, eq
+  0x41 0x04 0x83 0x1a
+# CHECK: csinc w1, w2, w3, eq
+  0x41 0x04 0x83 0x9a
+# CHECK: csinc x1, x2, x3, eq
+  0x41 0x00 0x83 0x5a
+# CHECK: csinv w1, w2, w3, eq
+  0x41 0x00 0x83 0xda
+# CHECK: csinv x1, x2, x3, eq
+  0x41 0x04 0x83 0x5a
+# CHECK: csneg w1, w2, w3, eq
+  0x41 0x04 0x83 0xda
+# CHECK: csneg x1, x2, x3, eq
diff --git a/test/MC/Disassembler/AArch64/arm64-basic-a64-undefined.txt b/test/MC/Disassembler/AArch64/arm64-basic-a64-undefined.txt
new file mode 100644
index 0000000..0e15af6
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/arm64-basic-a64-undefined.txt
@@ -0,0 +1,31 @@
+# These spawn another process so they're rather expensive. Not many.
+
+# LDR/STR: undefined if option field is 10x or 00x.
+# RUN: echo "0x00 0x08 0x20 0xf8" | llvm-mc -triple arm64 -disassemble 2>&1 | FileCheck %s
+# RUN: echo "0x00 0x88 0x20 0xf8" | llvm-mc -triple arm64 -disassemble 2>&1 | FileCheck %s
+
+# Instructions notionally in the add/sub (extended register) sheet, but with
+# invalid shift amount or "opt" field.
+# RUN: echo "0x00 0x10 0xa0 0x0b" | llvm-mc -triple=arm64 -disassemble 2>&1 | FileCheck %s
+# RUN: echo "0x00 0x10 0x60 0x0b" | llvm-mc -triple=arm64 -disassemble 2>&1 | FileCheck %s
+# RUN: echo "0x00 0x14 0x20 0x0b" | llvm-mc -triple=arm64 -disassemble 2>&1 | FileCheck %s
+
+# MOVK with sf == 0 and hw<1> == 1 is unallocated.
+# RUN: echo "0x00 0x00 0xc0 0x72" | llvm-mc -triple=arm64 -disassemble 2>&1 | FileCheck %s
+
+# ADD/SUB (shifted register) are reserved if shift == '11' or sf == '0' and imm6<5> == '1'.
+# RUN: echo "0x00 0x00 0xc0 0xeb" | llvm-mc -triple=arm64 -disassemble 2>&1 | FileCheck %s
+# RUN: echo "0x00 0x80 0x80 0x6b" | llvm-mc -triple=arm64 -disassemble 2>&1 | FileCheck %s
+
+# UBFM is undefined when s == 0 and imms<5> or immr<5> is 1.
+# RUN: echo "0x00 0x80 0x00 0x53" | llvm-mc -triple=arm64 -disassemble 2>&1 | FileCheck %s
+
+# EXT on vectors of i8 must have imm<3> = 0.
+# RUN: echo "0x00 0x40 0x00 0x2e" | llvm-mc -triple=arm64 -disassemble 2>&1 | FileCheck %s
+
+# SCVTF on fixed point W-registers is undefined if scale<5> == 0.
+# Same with FCVTZS and FCVTZU.
+# RUN: echo "0x00 0x00 0x02 0x1e" | llvm-mc -triple=arm64 -disassemble 2>&1 | FileCheck %s
+# RUN: echo "0x00 0x00 0x18 0x1e" | llvm-mc -triple=arm64 -disassemble 2>&1 | FileCheck %s
+
+# CHECK: invalid instruction encoding
diff --git a/test/MC/Disassembler/AArch64/arm64-bitfield.txt b/test/MC/Disassembler/AArch64/arm64-bitfield.txt
new file mode 100644
index 0000000..d620cb3
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/arm64-bitfield.txt
@@ -0,0 +1,29 @@
+# RUN: llvm-mc -triple arm64-apple-darwin --disassemble < %s | FileCheck %s
+
+#==---------------------------------------------------------------------------==
+# 5.4.4 Bitfield Operations
+#==---------------------------------------------------------------------------==
+
+0x41 0x3c 0x01 0x33
+0x41 0x3c 0x41 0xb3
+0x41 0x3c 0x01 0x13
+0x41 0x3c 0x41 0x93
+0x41 0x3c 0x01 0x53
+0x41 0x3c 0x41 0xd3
+
+# CHECK: bfxil  w1, w2, #1, #15
+# CHECK: bfxil  x1, x2, #1, #15
+# CHECK: sbfx w1, w2, #1, #15
+# CHECK: sbfx x1, x2, #1, #15
+# CHECK: ubfx w1, w2, #1, #15
+# CHECK: ubfx x1, x2, #1, #15
+
+#==---------------------------------------------------------------------------==
+# 5.4.5 Extract (immediate)
+#==---------------------------------------------------------------------------==
+
+0x41 0x3c 0x83 0x13
+0x62 0x04 0xc4 0x93
+
+# CHECK: extr w1, w2, w3, #15
+# CHECK: extr x2, x3, x4, #1
diff --git a/test/MC/Disassembler/AArch64/arm64-branch.txt b/test/MC/Disassembler/AArch64/arm64-branch.txt
new file mode 100644
index 0000000..6af1ad8
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/arm64-branch.txt
@@ -0,0 +1,75 @@
+# RUN: llvm-mc -triple arm64-apple-darwin --disassemble < %s | FileCheck %s
+
+#-----------------------------------------------------------------------------
+# Unconditional branch (register) instructions.
+#-----------------------------------------------------------------------------
+
+  0xc0 0x03 0x5f 0xd6
+# CHECK: ret
+  0x20 0x00 0x5f 0xd6
+# CHECK: ret x1
+  0xe0 0x03 0xbf 0xd6
+# CHECK: drps
+  0xe0 0x03 0x9f 0xd6
+# CHECK: eret
+  0xa0 0x00 0x1f 0xd6
+# CHECK: br  x5
+  0x20 0x01 0x3f 0xd6
+# CHECK: blr x9
+  0x0B 0x00 0x18 0x37
+# CHECK: tbnz	w11, #3, #0
+
+#-----------------------------------------------------------------------------
+# Exception generation instructions.
+#-----------------------------------------------------------------------------
+
+  0x20 0x00 0x20 0xd4
+# CHECK: brk   #0x1
+  0x41 0x00 0xa0 0xd4
+# CHECK: dcps1 #0x2
+  0x62 0x00 0xa0 0xd4
+# CHECK: dcps2 #0x3
+  0x83 0x00 0xa0 0xd4
+# CHECK: dcps3 #0x4
+  0xa0 0x00 0x40 0xd4
+# CHECK: hlt   #0x5
+  0xc2 0x00 0x00 0xd4
+# CHECK: hvc   #0x6
+  0xe3 0x00 0x00 0xd4
+# CHECK: smc   #0x7
+  0x01 0x01 0x00 0xd4
+# CHECK: svc   #0x8
+
+#-----------------------------------------------------------------------------
+# PC-relative branches (both positive and negative displacement)
+#-----------------------------------------------------------------------------
+
+  0x07 0x00 0x00 0x14
+# CHECK: b #28
+  0x06 0x00 0x00 0x94
+# CHECK: bl #24
+  0xa1 0x00 0x00 0x54
+# CHECK: b.ne #20
+  0x80 0x00 0x08 0x36
+# CHECK: tbz w0, #1, #16
+  0xe1 0xff 0xf7 0x36
+# CHECK: tbz w1, #30, #-4
+  0x60 0x00 0x08 0x37
+# CHECK: tbnz w0, #1, #12
+  0x40 0x00 0x00 0xb4
+# CHECK: cbz x0, #8
+  0x20 0x00 0x00 0xb5
+# CHECK: cbnz x0, #4
+  0x1f 0x20 0x03 0xd5
+# CHECK: nop
+  0xff 0xff 0xff 0x17
+# CHECK: b #-4
+  0xc1 0xff 0xff 0x54
+# CHECK: b.ne #-8
+  0xa0 0xff 0x0f 0x36
+# CHECK: tbz w0, #1, #-12
+  0x80 0xff 0xff 0xb4
+# CHECK: cbz x0, #-16
+  0x1f 0x20 0x03 0xd5
+# CHECK: nop
+
diff --git a/test/MC/Disassembler/AArch64/arm64-canonical-form.txt b/test/MC/Disassembler/AArch64/arm64-canonical-form.txt
new file mode 100644
index 0000000..1c94b13
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/arm64-canonical-form.txt
@@ -0,0 +1,21 @@
+# RUN: llvm-mc -triple arm64-apple-darwin -mattr=neon --disassemble < %s | FileCheck %s
+
+0x00 0x08 0x00 0xc8
+
+# CHECK: stxr	w0, x0, [x0]
+
+0x00 0x00 0x40 0x9b
+
+# CHECK: smulh x0, x0, x0
+
+0x08 0x20 0x21 0x1e
+
+# CHECK: fcmp s0, #0.0
+
+0x1f 0x00 0x00 0x11
+
+# CHECK: mov wsp, w0
+
+0x00 0x7c 0x00 0x13
+
+# CHECK: asr	w0, w0, #0
diff --git a/test/MC/Disassembler/AArch64/arm64-crc32.txt b/test/MC/Disassembler/AArch64/arm64-crc32.txt
new file mode 100644
index 0000000..51717ee
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/arm64-crc32.txt
@@ -0,0 +1,18 @@
+# RUN: llvm-mc -triple=arm64 -mattr=+crc -disassemble < %s | FileCheck %s
+
+# CHECK: crc32b  w5, w7, w20
+# CHECK: crc32h  w28, wzr, w30
+# CHECK: crc32w  w0, w1, w2
+# CHECK: crc32x  w7, w9, x20
+# CHECK: crc32cb w9, w5, w4
+# CHECK: crc32ch w13, w17, w25
+# CHECK: crc32cw wzr, w3, w5
+# CHECK: crc32cx w18, w16, xzr
+0xe5 0x40 0xd4 0x1a
+0xfc 0x47 0xde 0x1a
+0x20 0x48 0xc2 0x1a
+0x27 0x4d 0xd4 0x9a
+0xa9 0x50 0xc4 0x1a
+0x2d 0x56 0xd9 0x1a
+0x7f 0x58 0xc5 0x1a
+0x12 0x5e 0xdf 0x9a
diff --git a/test/MC/Disassembler/AArch64/arm64-crypto.txt b/test/MC/Disassembler/AArch64/arm64-crypto.txt
new file mode 100644
index 0000000..b905b92
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/arm64-crypto.txt
@@ -0,0 +1,47 @@
+# RUN: llvm-mc -triple arm64-apple-darwin -mattr=crypto --disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple arm64-apple-darwin -mattr=crypto -output-asm-variant=1 --disassemble < %s | FileCheck %s --check-prefix=CHECK-APPLE
+
+  0x20 0x48 0x28 0x4e
+  0x20 0x58 0x28 0x4e
+  0x20 0x68 0x28 0x4e
+  0x20 0x78 0x28 0x4e
+  0x20 0x00 0x02 0x5e
+  0x20 0x10 0x02 0x5e
+  0x20 0x20 0x02 0x5e
+  0x20 0x30 0x02 0x5e
+  0x20 0x40 0x02 0x5e
+  0x20 0x50 0x02 0x5e
+  0x20 0x60 0x02 0x5e
+  0x20 0x08 0x28 0x5e
+  0x20 0x18 0x28 0x5e
+  0x20 0x28 0x28 0x5e
+
+# CHECK: aese v0.16b, v1.16b
+# CHECK: aesd v0.16b, v1.16b
+# CHECK: aesmc v0.16b, v1.16b
+# CHECK: aesimc v0.16b, v1.16b
+# CHECK: sha1c q0, s1, v2.4s
+# CHECK: sha1p q0, s1, v2.4s
+# CHECK: sha1m q0, s1, v2.4s
+# CHECK: sha1su0 v0.4s, v1.4s, v2
+# CHECK: sha256h q0, q1, v2.4s
+# CHECK: sha256h2 q0, q1, v2.4s
+# CHECK: sha256su1 v0.4s, v1.4s, v2.4s
+# CHECK: sha1h s0, s1
+# CHECK: sha1su1 v0.4s, v1.4s
+# CHECK: sha256su0 v0.4s, v1.4s
+
+# CHECK-APPLE: aese.16b v0, v1
+# CHECK-APPLE: aesd.16b v0, v1
+# CHECK-APPLE: aesmc.16b v0, v1
+# CHECK-APPLE: aesimc.16b v0, v1
+# CHECK-APPLE: sha1c.4s q0, s1, v2
+# CHECK-APPLE: sha1p.4s q0, s1, v2
+# CHECK-APPLE: sha1m.4s q0, s1, v2
+# CHECK-APPLE: sha1su0.4s v0, v1, v2
+# CHECK-APPLE: sha256h.4s q0, q1, v2
+# CHECK-APPLE: sha256h2.4s q0, q1, v2
+# CHECK-APPLE: sha256su1.4s v0, v1, v2
+# CHECK-APPLE: sha1h s0, s1
+# CHECK-APPLE: sha1su1.4s v0, v1
+# CHECK-APPLE: sha256su0.4s v0, v1
diff --git a/test/MC/Disassembler/ARM64/invalid-logical.txt b/test/MC/Disassembler/AArch64/arm64-invalid-logical.txt
index 8a4ecb6..8a4ecb6 100644
--- a/test/MC/Disassembler/ARM64/invalid-logical.txt
+++ b/test/MC/Disassembler/AArch64/arm64-invalid-logical.txt
diff --git a/test/MC/Disassembler/AArch64/arm64-logical.txt b/test/MC/Disassembler/AArch64/arm64-logical.txt
new file mode 100644
index 0000000..e3cb3eb
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/arm64-logical.txt
@@ -0,0 +1,223 @@
+# RUN: llvm-mc -triple arm64-apple-darwin --disassemble < %s | FileCheck %s
+
+#==---------------------------------------------------------------------------==
+# 5.4.2 Logical (immediate)
+#==---------------------------------------------------------------------------==
+
+0x00 0x00 0x00 0x12
+0x00 0x00 0x40 0x92
+0x41 0x0c 0x00 0x12
+0x41 0x0c 0x40 0x92
+0xbf 0xec 0x7c 0x92
+0x00 0x00 0x00 0x72
+0x00 0x00 0x40 0xf2
+0x41 0x0c 0x00 0x72
+0x41 0x0c 0x40 0xf2
+0x5f 0x0c 0x40 0xf2
+
+# CHECK: and  w0, w0, #0x1
+# CHECK: and  x0, x0, #0x1
+# CHECK: and  w1, w2, #0xf
+# CHECK: and  x1, x2, #0xf
+# CHECK: and  sp, x5, #0xfffffffffffffff0
+# CHECK: ands w0, w0, #0x1
+# CHECK: ands x0, x0, #0x1
+# CHECK: ands w1, w2, #0xf
+# CHECK: ands x1, x2, #0xf
+# CHECK: tst x2, #0xf
+
+0x41 0x00 0x12 0x52
+0x41 0x00 0x71 0xd2
+0x5f 0x00 0x71 0xd2
+
+# CHECK: eor w1, w2, #0x4000
+# CHECK: eor x1, x2, #0x8000
+# CHECK: eor sp, x2, #0x8000
+
+0x41 0x00 0x12 0x32
+0x41 0x00 0x71 0xb2
+0x5f 0x00 0x71 0xb2
+
+# CHECK: orr w1, w2, #0x4000
+# CHECK: orr x1, x2, #0x8000
+# CHECK: orr sp, x2, #0x8000
+
+#==---------------------------------------------------------------------------==
+# 5.5.3 Logical (shifted register)
+#==---------------------------------------------------------------------------==
+
+0x41 0x00 0x03 0x0a
+0x41 0x00 0x03 0x8a
+0x41 0x08 0x03 0x0a
+0x41 0x08 0x03 0x8a
+0x41 0x08 0x43 0x0a
+0x41 0x08 0x43 0x8a
+0x41 0x08 0x83 0x0a
+0x41 0x08 0x83 0x8a
+0x41 0x08 0xc3 0x0a
+0x41 0x08 0xc3 0x8a
+
+# CHECK: and  w1, w2, w3
+# CHECK: and  x1, x2, x3
+# CHECK: and  w1, w2, w3, lsl #2
+# CHECK: and  x1, x2, x3, lsl #2
+# CHECK: and  w1, w2, w3, lsr #2
+# CHECK: and  x1, x2, x3, lsr #2
+# CHECK: and  w1, w2, w3, asr #2
+# CHECK: and  x1, x2, x3, asr #2
+# CHECK: and  w1, w2, w3, ror #2
+# CHECK: and  x1, x2, x3, ror #2
+
+0x41 0x00 0x03 0x6a
+0x41 0x00 0x03 0xea
+0x41 0x08 0x03 0x6a
+0x41 0x08 0x03 0xea
+0x41 0x08 0x43 0x6a
+0x41 0x08 0x43 0xea
+0x41 0x08 0x83 0x6a
+0x41 0x08 0x83 0xea
+0x41 0x08 0xc3 0x6a
+0x41 0x08 0xc3 0xea
+
+# CHECK: ands w1, w2, w3
+# CHECK: ands x1, x2, x3
+# CHECK: ands w1, w2, w3, lsl #2
+# CHECK: ands x1, x2, x3, lsl #2
+# CHECK: ands w1, w2, w3, lsr #2
+# CHECK: ands x1, x2, x3, lsr #2
+# CHECK: ands w1, w2, w3, asr #2
+# CHECK: ands x1, x2, x3, asr #2
+# CHECK: ands w1, w2, w3, ror #2
+# CHECK: ands x1, x2, x3, ror #2
+
+0x41 0x00 0x23 0x0a
+0x41 0x00 0x23 0x8a
+0x41 0x0c 0x23 0x0a
+0x41 0x0c 0x23 0x8a
+0x41 0x0c 0x63 0x0a
+0x41 0x0c 0x63 0x8a
+0x41 0x0c 0xa3 0x0a
+0x41 0x0c 0xa3 0x8a
+0x41 0x0c 0xe3 0x0a
+0x41 0x0c 0xe3 0x8a
+
+# CHECK: bic w1, w2, w3
+# CHECK: bic x1, x2, x3
+# CHECK: bic w1, w2, w3, lsl #3
+# CHECK: bic x1, x2, x3, lsl #3
+# CHECK: bic w1, w2, w3, lsr #3
+# CHECK: bic x1, x2, x3, lsr #3
+# CHECK: bic w1, w2, w3, asr #3
+# CHECK: bic x1, x2, x3, asr #3
+# CHECK: bic w1, w2, w3, ror #3
+# CHECK: bic x1, x2, x3, ror #3
+
+0x41 0x00 0x23 0x6a
+0x41 0x00 0x23 0xea
+0x41 0x0c 0x23 0x6a
+0x41 0x0c 0x23 0xea
+0x41 0x0c 0x63 0x6a
+0x41 0x0c 0x63 0xea
+0x41 0x0c 0xa3 0x6a
+0x41 0x0c 0xa3 0xea
+0x41 0x0c 0xe3 0x6a
+0x41 0x0c 0xe3 0xea
+
+# CHECK: bics w1, w2, w3
+# CHECK: bics x1, x2, x3
+# CHECK: bics w1, w2, w3, lsl #3
+# CHECK: bics x1, x2, x3, lsl #3
+# CHECK: bics w1, w2, w3, lsr #3
+# CHECK: bics x1, x2, x3, lsr #3
+# CHECK: bics w1, w2, w3, asr #3
+# CHECK: bics x1, x2, x3, asr #3
+# CHECK: bics w1, w2, w3, ror #3
+# CHECK: bics x1, x2, x3, ror #3
+
+0x41 0x00 0x23 0x4a
+0x41 0x00 0x23 0xca
+0x41 0x10 0x23 0x4a
+0x41 0x10 0x23 0xca
+0x41 0x10 0x63 0x4a
+0x41 0x10 0x63 0xca
+0x41 0x10 0xa3 0x4a
+0x41 0x10 0xa3 0xca
+0x41 0x10 0xe3 0x4a
+0x41 0x10 0xe3 0xca
+
+# CHECK: eon w1, w2, w3
+# CHECK: eon x1, x2, x3
+# CHECK: eon w1, w2, w3, lsl #4
+# CHECK: eon x1, x2, x3, lsl #4
+# CHECK: eon w1, w2, w3, lsr #4
+# CHECK: eon x1, x2, x3, lsr #4
+# CHECK: eon w1, w2, w3, asr #4
+# CHECK: eon x1, x2, x3, asr #4
+# CHECK: eon w1, w2, w3, ror #4
+# CHECK: eon x1, x2, x3, ror #4
+
+0x41 0x00 0x03 0x4a
+0x41 0x00 0x03 0xca
+0x41 0x14 0x03 0x4a
+0x41 0x14 0x03 0xca
+0x41 0x14 0x43 0x4a
+0x41 0x14 0x43 0xca
+0x41 0x14 0x83 0x4a
+0x41 0x14 0x83 0xca
+0x41 0x14 0xc3 0x4a
+0x41 0x14 0xc3 0xca
+
+# CHECK: eor w1, w2, w3
+# CHECK: eor x1, x2, x3
+# CHECK: eor w1, w2, w3, lsl #5
+# CHECK: eor x1, x2, x3, lsl #5
+# CHECK: eor w1, w2, w3, lsr #5
+# CHECK: eor x1, x2, x3, lsr #5
+# CHECK: eor w1, w2, w3, asr #5
+# CHECK: eor x1, x2, x3, asr #5
+# CHECK: eor w1, w2, w3, ror #5
+# CHECK: eor x1, x2, x3, ror #5
+
+0x41 0x00 0x03 0x2a
+0x41 0x00 0x03 0xaa
+0x41 0x18 0x03 0x2a
+0x41 0x18 0x03 0xaa
+0x41 0x18 0x43 0x2a
+0x41 0x18 0x43 0xaa
+0x41 0x18 0x83 0x2a
+0x41 0x18 0x83 0xaa
+0x41 0x18 0xc3 0x2a
+0x41 0x18 0xc3 0xaa
+
+# CHECK: orr w1, w2, w3
+# CHECK: orr x1, x2, x3
+# CHECK: orr w1, w2, w3, lsl #6
+# CHECK: orr x1, x2, x3, lsl #6
+# CHECK: orr w1, w2, w3, lsr #6
+# CHECK: orr x1, x2, x3, lsr #6
+# CHECK: orr w1, w2, w3, asr #6
+# CHECK: orr x1, x2, x3, asr #6
+# CHECK: orr w1, w2, w3, ror #6
+# CHECK: orr x1, x2, x3, ror #6
+
+0x41 0x00 0x23 0x2a
+0x41 0x00 0x23 0xaa
+0x41 0x1c 0x23 0x2a
+0x41 0x1c 0x23 0xaa
+0x41 0x1c 0x63 0x2a
+0x41 0x1c 0x63 0xaa
+0x41 0x1c 0xa3 0x2a
+0x41 0x1c 0xa3 0xaa
+0x41 0x1c 0xe3 0x2a
+0x41 0x1c 0xe3 0xaa
+
+# CHECK: orn w1, w2, w3
+# CHECK: orn x1, x2, x3
+# CHECK: orn w1, w2, w3, lsl #7
+# CHECK: orn x1, x2, x3, lsl #7
+# CHECK: orn w1, w2, w3, lsr #7
+# CHECK: orn x1, x2, x3, lsr #7
+# CHECK: orn w1, w2, w3, asr #7
+# CHECK: orn x1, x2, x3, asr #7
+# CHECK: orn w1, w2, w3, ror #7
+# CHECK: orn x1, x2, x3, ror #7
diff --git a/test/MC/Disassembler/AArch64/arm64-memory.txt b/test/MC/Disassembler/AArch64/arm64-memory.txt
new file mode 100644
index 0000000..54556a1
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/arm64-memory.txt
@@ -0,0 +1,564 @@
+# RUN: llvm-mc --disassemble -triple arm64-apple-darwin < %s | FileCheck %s
+
+#-----------------------------------------------------------------------------
+# Indexed loads
+#-----------------------------------------------------------------------------
+
+  0x85 0x14 0x40 0xb9
+  0x64 0x00 0x40 0xf9
+  0xe2 0x13 0x40 0xf9
+  0xe5 0x07 0x40 0x3d
+  0xe6 0x07 0x40 0x7d
+  0xe7 0x07 0x40 0xbd
+  0xe8 0x07 0x40 0xfd
+  0xe9 0x07 0xc0 0x3d
+  0x64 0x00 0x40 0x39
+  0x20 0x78 0xa0 0xb8
+  0x85 0x50 0x40 0x39
+
+# CHECK: ldr	w5, [x4, #20]
+# CHECK: ldr	x4, [x3]
+# CHECK: ldr	x2, [sp, #32]
+# CHECK: ldr	b5, [sp, #1]
+# CHECK: ldr	h6, [sp, #2]
+# CHECK: ldr	s7, [sp, #4]
+# CHECK: ldr	d8, [sp, #8]
+# CHECK: ldr	q9, [sp, #16]
+# CHECK: ldrb	w4, [x3]
+# CHECK: ldrsw	x0, [x1, x0, lsl #2]
+# CHECK: ldrb	w5, [x4, #20]
+# CHECK: ldrsb	w9, [x3]
+# CHECK: ldrsb	x2, [sp, #128]
+# CHECK: ldrh	w2, [sp, #32]
+# CHECK: ldrsh	w3, [sp, #32]
+# CHECK: ldrsh	x5, [x9, #24]
+# CHECK: ldrsw	x9, [sp, #512]
+# CHECK: prfm	pldl3strm, [sp, #32]
+
+  0x69 0x00 0xc0 0x39
+  0xe2 0x03 0x82 0x39
+  0xe2 0x43 0x40 0x79
+  0xe3 0x43 0xc0 0x79
+  0x25 0x31 0x80 0x79
+  0xe9 0x03 0x82 0xb9
+  0xe5 0x13 0x80 0xf9
+  0x40 0x00 0x80 0xf9
+  0x41 0x00 0x80 0xf9
+  0x42 0x00 0x80 0xf9
+  0x43 0x00 0x80 0xf9
+  0x44 0x00 0x80 0xf9
+  0x45 0x00 0x80 0xf9
+  0x50 0x00 0x80 0xf9
+  0x51 0x00 0x80 0xf9
+  0x52 0x00 0x80 0xf9
+  0x53 0x00 0x80 0xf9
+  0x54 0x00 0x80 0xf9
+  0x55 0x00 0x80 0xf9
+
+# CHECK: prfm	pldl1keep, [x2]
+# CHECK: prfm	pldl1strm, [x2]
+# CHECK: prfm	pldl2keep, [x2]
+# CHECK: prfm	pldl2strm, [x2]
+# CHECK: prfm	pldl3keep, [x2]
+# CHECK: prfm	pldl3strm, [x2]
+# CHECK: prfm	pstl1keep, [x2]
+# CHECK: prfm	pstl1strm, [x2]
+# CHECK: prfm	pstl2keep, [x2]
+# CHECK: prfm	pstl2strm, [x2]
+# CHECK: prfm	pstl3keep, [x2]
+# CHECK: prfm	pstl3strm, [x2]
+
+#-----------------------------------------------------------------------------
+# Indexed stores
+#-----------------------------------------------------------------------------
+
+  0x64 0x00 0x00 0xf9
+  0xe2 0x13 0x00 0xf9
+  0x85 0x14 0x00 0xb9
+  0xe5 0x07 0x00 0x3d
+  0xe6 0x07 0x00 0x7d
+  0xe7 0x07 0x00 0xbd
+  0xe8 0x07 0x00 0xfd
+  0xe9 0x07 0x80 0x3d
+  0x64 0x00 0x00 0x39
+  0x85 0x50 0x00 0x39
+  0xe2 0x43 0x00 0x79
+  0x00 0xe8 0x20 0x38
+  0x00 0x48 0x20 0x38
+
+# CHECK: str	x4, [x3]
+# CHECK: str	x2, [sp, #32]
+# CHECK: str	w5, [x4, #20]
+# CHECK: str	b5, [sp, #1]
+# CHECK: str	h6, [sp, #2]
+# CHECK: str	s7, [sp, #4]
+# CHECK: str	d8, [sp, #8]
+# CHECK: str	q9, [sp, #16]
+# CHECK: strb	w4, [x3]
+# CHECK: strb	w5, [x4, #20]
+# CHECK: strh	w2, [sp, #32]
+# CHECK: strb	w0, [x0, x0, sxtx]
+# CHECK: strb	w0, [x0, w0, uxtw]
+
+#-----------------------------------------------------------------------------
+# Unscaled immediate loads and stores
+#-----------------------------------------------------------------------------
+
+  0x62 0x00 0x40 0xb8
+  0xe2 0x83 0x41 0xb8
+  0x62 0x00 0x40 0xf8
+  0xe2 0x83 0x41 0xf8
+  0xe5 0x13 0x40 0x3c
+  0xe6 0x23 0x40 0x7c
+  0xe7 0x43 0x40 0xbc
+  0xe8 0x83 0x40 0xfc
+  0xe9 0x03 0xc1 0x3c
+  0x69 0x00 0xc0 0x38
+  0xe2 0x03 0x88 0x38
+  0xe3 0x03 0xc2 0x78
+  0x25 0x81 0x81 0x78
+  0xe9 0x03 0x98 0xb8
+
+# CHECK: ldur	w2, [x3]
+# CHECK: ldur	w2, [sp, #24]
+# CHECK: ldur	x2, [x3]
+# CHECK: ldur	x2, [sp, #24]
+# CHECK: ldur	b5, [sp, #1]
+# CHECK: ldur	h6, [sp, #2]
+# CHECK: ldur	s7, [sp, #4]
+# CHECK: ldur	d8, [sp, #8]
+# CHECK: ldur	q9, [sp, #16]
+# CHECK: ldursb	w9, [x3]
+# CHECK: ldursb	x2, [sp, #128]
+# CHECK: ldursh	w3, [sp, #32]
+# CHECK: ldursh	x5, [x9, #24]
+# CHECK: ldursw	x9, [sp, #-128]
+
+  0x64 0x00 0x00 0xb8
+  0xe2 0x03 0x02 0xb8
+  0x64 0x00 0x00 0xf8
+  0xe2 0x03 0x02 0xf8
+  0x85 0x40 0x01 0xb8
+  0xe5 0x13 0x00 0x3c
+  0xe6 0x23 0x00 0x7c
+  0xe7 0x43 0x00 0xbc
+  0xe8 0x83 0x00 0xfc
+  0xe9 0x03 0x81 0x3c
+  0x64 0x00 0x00 0x38
+  0x85 0x40 0x01 0x38
+  0xe2 0x03 0x02 0x78
+  0xe5 0x03 0x82 0xf8
+
+# CHECK: stur	w4, [x3]
+# CHECK: stur	w2, [sp, #32]
+# CHECK: stur	x4, [x3]
+# CHECK: stur	x2, [sp, #32]
+# CHECK: stur	w5, [x4, #20]
+# CHECK: stur	b5, [sp, #1]
+# CHECK: stur	h6, [sp, #2]
+# CHECK: stur	s7, [sp, #4]
+# CHECK: stur	d8, [sp, #8]
+# CHECK: stur	q9, [sp, #16]
+# CHECK: sturb	w4, [x3]
+# CHECK: sturb	w5, [x4, #20]
+# CHECK: sturh	w2, [sp, #32]
+# CHECK: prfum	pldl3strm, [sp, #32]
+
+#-----------------------------------------------------------------------------
+# Unprivileged loads and stores
+#-----------------------------------------------------------------------------
+
+  0x83 0x08 0x41 0xb8
+  0x83 0x08 0x41 0xf8
+  0x83 0x08 0x41 0x38
+  0x69 0x08 0xc0 0x38
+  0xe2 0x0b 0x88 0x38
+  0x83 0x08 0x41 0x78
+  0xe3 0x0b 0xc2 0x78
+  0x25 0x89 0x81 0x78
+  0xe9 0x0b 0x98 0xb8
+
+# CHECK: ldtr	w3, [x4, #16]
+# CHECK: ldtr	x3, [x4, #16]
+# CHECK: ldtrb	w3, [x4, #16]
+# CHECK: ldtrsb	w9, [x3]
+# CHECK: ldtrsb	x2, [sp, #128]
+# CHECK: ldtrh	w3, [x4, #16]
+# CHECK: ldtrsh	w3, [sp, #32]
+# CHECK: ldtrsh	x5, [x9, #24]
+# CHECK: ldtrsw	x9, [sp, #-128]
+
+  0x85 0x48 0x01 0xb8
+  0x64 0x08 0x00 0xf8
+  0xe2 0x0b 0x02 0xf8
+  0x64 0x08 0x00 0x38
+  0x85 0x48 0x01 0x38
+  0xe2 0x0b 0x02 0x78
+
+# CHECK: sttr	w5, [x4, #20]
+# CHECK: sttr	x4, [x3]
+# CHECK: sttr	x2, [sp, #32]
+# CHECK: sttrb	w4, [x3]
+# CHECK: sttrb	w5, [x4, #20]
+# CHECK: sttrh	w2, [sp, #32]
+
+#-----------------------------------------------------------------------------
+# Pre-indexed loads and stores
+#-----------------------------------------------------------------------------
+
+  0xfd 0x8c 0x40 0xf8
+  0xfe 0x8c 0x40 0xf8
+  0x05 0x1c 0x40 0x3c
+  0x06 0x2c 0x40 0x7c
+  0x07 0x4c 0x40 0xbc
+  0x08 0x8c 0x40 0xfc
+  0x09 0x0c 0xc1 0x3c
+
+# CHECK: ldr	x29, [x7, #8]!
+# CHECK: ldr	x30, [x7, #8]!
+# CHECK: ldr	b5, [x0, #1]!
+# CHECK: ldr	h6, [x0, #2]!
+# CHECK: ldr	s7, [x0, #4]!
+# CHECK: ldr	d8, [x0, #8]!
+# CHECK: ldr	q9, [x0, #16]!
+
+  0xfe 0x8c 0x1f 0xf8
+  0xfd 0x8c 0x1f 0xf8
+  0x05 0xfc 0x1f 0x3c
+  0x06 0xec 0x1f 0x7c
+  0x07 0xcc 0x1f 0xbc
+  0x08 0x8c 0x1f 0xfc
+  0x09 0x0c 0x9f 0x3c
+
+# CHECK: str	x30, [x7, #-8]!
+# CHECK: str	x29, [x7, #-8]!
+# CHECK: str	b5, [x0, #-1]!
+# CHECK: str	h6, [x0, #-2]!
+# CHECK: str	s7, [x0, #-4]!
+# CHECK: str	d8, [x0, #-8]!
+# CHECK: str	q9, [x0, #-16]!
+
+#-----------------------------------------------------------------------------
+# post-indexed loads and stores
+#-----------------------------------------------------------------------------
+
+  0xfe 0x84 0x1f 0xf8
+  0xfd 0x84 0x1f 0xf8
+  0x05 0xf4 0x1f 0x3c
+  0x06 0xe4 0x1f 0x7c
+  0x07 0xc4 0x1f 0xbc
+  0x08 0x84 0x1f 0xfc
+  0x09 0x04 0x9f 0x3c
+
+# CHECK: str	x30, [x7], #-8
+# CHECK: str	x29, [x7], #-8
+# CHECK: str	b5, [x0], #-1
+# CHECK: str	h6, [x0], #-2
+# CHECK: str	s7, [x0], #-4
+# CHECK: str	d8, [x0], #-8
+# CHECK: str	q9, [x0], #-16
+
+  0xfd 0x84 0x40 0xf8
+  0xfe 0x84 0x40 0xf8
+  0x05 0x14 0x40 0x3c
+  0x06 0x24 0x40 0x7c
+  0x07 0x44 0x40 0xbc
+  0x08 0x84 0x40 0xfc
+  0x09 0x04 0xc1 0x3c
+
+# CHECK: ldr	x29, [x7], #8
+# CHECK: ldr	x30, [x7], #8
+# CHECK: ldr	b5, [x0], #1
+# CHECK: ldr	h6, [x0], #2
+# CHECK: ldr	s7, [x0], #4
+# CHECK: ldr	d8, [x0], #8
+# CHECK: ldr	q9, [x0], #16
+
+#-----------------------------------------------------------------------------
+# Load/Store pair (indexed  offset)
+#-----------------------------------------------------------------------------
+
+  0xe3 0x09 0x42 0x29
+  0xe4 0x27 0x7f 0xa9
+  0xc2 0x0d 0x42 0x69
+  0xe2 0x0f 0x7e 0x69
+  0x4a 0x04 0x48 0x2d
+  0x4a 0x04 0x40 0x6d
+
+# CHECK: ldp	w3, w2, [x15, #16]
+# CHECK: ldp	x4, x9, [sp, #-16]
+# CHECK: ldpsw	x2, x3, [x14, #16]
+# CHECK: ldpsw	x2, x3, [sp, #-16]
+# CHECK: ldp	s10, s1, [x2, #64]
+# CHECK: ldp	d10, d1, [x2]
+
+  0xe3 0x09 0x02 0x29
+  0xe4 0x27 0x3f 0xa9
+  0x4a 0x04 0x08 0x2d
+  0x4a 0x04 0x00 0x6d
+
+# CHECK: stp	w3, w2, [x15, #16]
+# CHECK: stp	x4, x9, [sp, #-16]
+# CHECK: stp	s10, s1, [x2, #64]
+# CHECK: stp	d10, d1, [x2]
+
+#-----------------------------------------------------------------------------
+# Load/Store pair (pre-indexed)
+#-----------------------------------------------------------------------------
+
+  0xe3 0x09 0xc2 0x29
+  0xe4 0x27 0xff 0xa9
+  0xc2 0x0d 0xc2 0x69
+  0xe2 0x0f 0xfe 0x69
+  0x4a 0x04 0xc8 0x2d
+  0x4a 0x04 0xc1 0x6d
+
+# CHECK: ldp	w3, w2, [x15, #16]!
+# CHECK: ldp	x4, x9, [sp, #-16]!
+# CHECK: ldpsw	x2, x3, [x14, #16]!
+# CHECK: ldpsw	x2, x3, [sp, #-16]!
+# CHECK: ldp	s10, s1, [x2, #64]!
+# CHECK: ldp	d10, d1, [x2, #16]!
+
+  0xe3 0x09 0x82 0x29
+  0xe4 0x27 0xbf 0xa9
+  0x4a 0x04 0x88 0x2d
+  0x4a 0x04 0x81 0x6d
+
+# CHECK: stp	w3, w2, [x15, #16]!
+# CHECK: stp	x4, x9, [sp, #-16]!
+# CHECK: stp	s10, s1, [x2, #64]!
+# CHECK: stp	d10, d1, [x2, #16]!
+
+#-----------------------------------------------------------------------------
+# Load/Store pair (post-indexed)
+#-----------------------------------------------------------------------------
+
+  0xe3 0x09 0xc2 0x28
+  0xe4 0x27 0xff 0xa8
+  0xc2 0x0d 0xc2 0x68
+  0xe2 0x0f 0xfe 0x68
+  0x4a 0x04 0xc8 0x2c
+  0x4a 0x04 0xc1 0x6c
+
+# CHECK: ldp	w3, w2, [x15], #16
+# CHECK: ldp	x4, x9, [sp], #-16
+# CHECK: ldpsw	x2, x3, [x14], #16
+# CHECK: ldpsw	x2, x3, [sp], #-16
+# CHECK: ldp	s10, s1, [x2], #64
+# CHECK: ldp	d10, d1, [x2], #16
+
+  0xe3 0x09 0x82 0x28
+  0xe4 0x27 0xbf 0xa8
+  0x4a 0x04 0x88 0x2c
+  0x4a 0x04 0x81 0x6c
+
+# CHECK: stp	w3, w2, [x15], #16
+# CHECK: stp	x4, x9, [sp], #-16
+# CHECK: stp	s10, s1, [x2], #64
+# CHECK: stp	d10, d1, [x2], #16
+
+#-----------------------------------------------------------------------------
+# Load/Store pair (no-allocate)
+#-----------------------------------------------------------------------------
+
+  0xe3 0x09 0x42 0x28
+  0xe4 0x27 0x7f 0xa8
+  0x4a 0x04 0x48 0x2c
+  0x4a 0x04 0x40 0x6c
+
+# CHECK: ldnp	w3, w2, [x15, #16]
+# CHECK: ldnp	x4, x9, [sp, #-16]
+# CHECK: ldnp	s10, s1, [x2, #64]
+# CHECK: ldnp	d10, d1, [x2]
+
+  0xe3 0x09 0x02 0x28
+  0xe4 0x27 0x3f 0xa8
+  0x4a 0x04 0x08 0x2c
+  0x4a 0x04 0x00 0x6c
+
+# CHECK: stnp	w3, w2, [x15, #16]
+# CHECK: stnp	x4, x9, [sp, #-16]
+# CHECK: stnp	s10, s1, [x2, #64]
+# CHECK: stnp	d10, d1, [x2]
+
+#-----------------------------------------------------------------------------
+# Load/Store register offset
+#-----------------------------------------------------------------------------
+
+  0x00 0x68 0x60 0xb8
+  0x00 0x78 0x60 0xb8
+  0x00 0x68 0x60 0xf8
+  0x00 0x78 0x60 0xf8
+  0x00 0xe8 0x60 0xf8
+
+# CHECK: ldr	w0, [x0, x0]
+# CHECK: ldr	w0, [x0, x0, lsl #2]
+# CHECK: ldr	x0, [x0, x0]
+# CHECK: ldr	x0, [x0, x0, lsl #3]
+# CHECK: ldr	x0, [x0, x0, sxtx]
+
+  0x21 0x68 0x62 0x3c
+  0x21 0x78 0x62 0x3c
+  0x21 0x68 0x62 0x7c
+  0x21 0x78 0x62 0x7c
+  0x21 0x68 0x62 0xbc
+  0x21 0x78 0x62 0xbc
+  0x21 0x68 0x62 0xfc
+  0x21 0x78 0x62 0xfc
+  0x21 0x68 0xe2 0x3c
+  0x21 0x78 0xe2 0x3c
+
+# CHECK: ldr	b1, [x1, x2]
+# CHECK: ldr	b1, [x1, x2, lsl #0]
+# CHECK: ldr	h1, [x1, x2]
+# CHECK: ldr	h1, [x1, x2, lsl #1]
+# CHECK: ldr	s1, [x1, x2]
+# CHECK: ldr	s1, [x1, x2, lsl #2]
+# CHECK: ldr	d1, [x1, x2]
+# CHECK: ldr	d1, [x1, x2, lsl #3]
+# CHECK: ldr	q1, [x1, x2]
+# CHECK: ldr	q1, [x1, x2, lsl #4]
+
+  0x00 0x48 0x20 0x7c
+  0xe1 0x6b 0x23 0xfc
+  0xe1 0x5b 0x23 0xfc
+  0xe1 0x6b 0xa3 0x3c
+  0xe1 0x5b 0xa3 0x3c
+
+# CHECK: str	h0, [x0, w0, uxtw]
+# CHECK: str	d1, [sp, x3]
+# CHECK: str	d1, [sp, w3, uxtw #3]
+# CHECK: str	q1, [sp, x3]
+# CHECK: str	q1, [sp, w3, uxtw #4]
+
+#-----------------------------------------------------------------------------
+# Load/Store exclusive
+#-----------------------------------------------------------------------------
+
+  0x26 0x7c 0x5f 0x08
+  0x26 0x7c 0x5f 0x48
+  0x27 0x0d 0x7f 0x88
+  0x27 0x0d 0x7f 0xc8
+
+# CHECK: ldxrb	w6, [x1]
+# CHECK: ldxrh	w6, [x1]
+# CHECK: ldxp	w7, w3, [x9]
+# CHECK: ldxp	x7, x3, [x9]
+
+  0x64 0x7c 0x01 0xc8
+  0x64 0x7c 0x01 0x88
+  0x64 0x7c 0x01 0x08
+  0x64 0x7c 0x01 0x48
+  0x22 0x18 0x21 0xc8
+  0x22 0x18 0x21 0x88
+
+# CHECK: stxr	w1, x4, [x3]
+# CHECK: stxr	w1, w4, [x3]
+# CHECK: stxrb	w1, w4, [x3]
+# CHECK: stxrh	w1, w4, [x3]
+# CHECK: stxp	w1, x2, x6, [x1]
+# CHECK: stxp	w1, w2, w6, [x1]
+
+#-----------------------------------------------------------------------------
+# Load-acquire/Store-release non-exclusive
+#-----------------------------------------------------------------------------
+
+  0xe4 0xff 0xdf 0x88
+  0xe4 0xff 0xdf 0xc8
+  0xe4 0xff 0xdf 0x08
+  0xe4 0xff 0xdf 0x48
+
+# CHECK: ldar	w4, [sp]
+# CHECK: ldar	x4, [sp]
+# CHECK: ldarb	w4, [sp]
+# CHECK: ldarh	w4, [sp]
+
+  0xc3 0xfc 0x9f 0x88
+  0xc3 0xfc 0x9f 0xc8
+  0xc3 0xfc 0x9f 0x08
+  0xc3 0xfc 0x9f 0x48
+
+# CHECK: stlr	w3, [x6]
+# CHECK: stlr	x3, [x6]
+# CHECK: stlrb	w3, [x6]
+# CHECK: stlrh	w3, [x6]
+
+#-----------------------------------------------------------------------------
+# Load-acquire/Store-release exclusive
+#-----------------------------------------------------------------------------
+
+  0x82 0xfc 0x5f 0x88
+  0x82 0xfc 0x5f 0xc8
+  0x82 0xfc 0x5f 0x08
+  0x82 0xfc 0x5f 0x48
+  0x22 0x98 0x7f 0x88
+  0x22 0x98 0x7f 0xc8
+
+# CHECK: ldaxr	w2, [x4]
+# CHECK: ldaxr	x2, [x4]
+# CHECK: ldaxrb	w2, [x4]
+# CHECK: ldaxrh	w2, [x4]
+# CHECK: ldaxp	w2, w6, [x1]
+# CHECK: ldaxp	x2, x6, [x1]
+
+  0x27 0xfc 0x08 0xc8
+  0x27 0xfc 0x08 0x88
+  0x27 0xfc 0x08 0x08
+  0x27 0xfc 0x08 0x48
+  0x22 0x98 0x21 0xc8
+  0x22 0x98 0x21 0x88
+
+# CHECK: stlxr	w8, x7, [x1]
+# CHECK: stlxr	w8, w7, [x1]
+# CHECK: stlxrb	w8, w7, [x1]
+# CHECK: stlxrh	w8, w7, [x1]
+# CHECK: stlxp	w1, x2, x6, [x1]
+# CHECK: stlxp	w1, w2, w6, [x1]
+
+#-----------------------------------------------------------------------------
+# Load/Store with explicit LSL values
+#-----------------------------------------------------------------------------
+  0x20 0x78 0xa0 0xb8
+  0x20 0x78 0x60 0xf8
+  0x20 0x78 0x20 0xf8
+  0x20 0x78 0x60 0xb8
+  0x20 0x78 0x20 0xb8
+  0x20 0x78 0xe0 0x3c
+  0x20 0x78 0xa0 0x3c
+  0x20 0x78 0x60 0xfc
+  0x20 0x78 0x20 0xfc
+  0x20 0x78 0x60 0xbc
+  0x20 0x78 0x20 0xbc
+  0x20 0x78 0x60 0x7c
+  0x20 0x78 0x60 0x3c
+  0x20 0x78 0x60 0x38
+  0x20 0x78 0x20 0x38
+  0x20 0x78 0xe0 0x38
+  0x20 0x78 0x60 0x78
+  0x20 0x78 0x20 0x78
+  0x20 0x78 0xe0 0x78
+  0x20 0x78 0xa0 0x38
+  0x20 0x78 0xa0 0x78
+
+# CHECK: ldrsw	x0, [x1, x0, lsl #2]
+# CHECK: ldr	x0, [x1, x0, lsl #3]
+# CHECK: str	x0, [x1, x0, lsl #3]
+# CHECK: ldr	w0, [x1, x0, lsl #2]
+# CHECK: str	w0, [x1, x0, lsl #2]
+# CHECK: ldr	q0, [x1, x0, lsl #4]
+# CHECK: str	q0, [x1, x0, lsl #4]
+# CHECK: ldr	d0, [x1, x0, lsl #3]
+# CHECK: str	d0, [x1, x0, lsl #3]
+# CHECK: ldr	s0, [x1, x0, lsl #2]
+# CHECK: str	s0, [x1, x0, lsl #2]
+# CHECK: ldr	h0, [x1, x0, lsl #1]
+# CHECK: ldr	b0, [x1, x0, lsl #0]
+# CHECK: ldrb	w0, [x1, x0, lsl #0]
+# CHECK: strb	w0, [x1, x0, lsl #0]
+# CHECK: ldrsb	w0, [x1, x0, lsl #0]
+# CHECK: ldrh	w0, [x1, x0, lsl #1]
+# CHECK: strh	w0, [x1, x0, lsl #1]
+# CHECK: ldrsh	w0, [x1, x0, lsl #1]
+# CHECK: ldrsb	x0, [x1, x0, lsl #0]
+# CHECK: ldrsh	x0, [x1, x0, lsl #1]
diff --git a/test/MC/Disassembler/AArch64/arm64-non-apple-fmov.txt b/test/MC/Disassembler/AArch64/arm64-non-apple-fmov.txt
new file mode 100644
index 0000000..75cb95c
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/arm64-non-apple-fmov.txt
@@ -0,0 +1,7 @@
+# RUN: llvm-mc -triple arm64 -mattr=neon -disassemble < %s | FileCheck %s
+
+0x00 0x00 0xae 0x9e
+0x00 0x00 0xaf 0x9e
+
+# CHECK: fmov x0, v0.d[1]
+# CHECK: fmov v0.d[1], x0
diff --git a/test/MC/Disassembler/AArch64/arm64-scalar-fp.txt b/test/MC/Disassembler/AArch64/arm64-scalar-fp.txt
new file mode 100644
index 0000000..f139700
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/arm64-scalar-fp.txt
@@ -0,0 +1,255 @@
+# RUN: llvm-mc -triple arm64-apple-darwin -mattr=neon --disassemble -output-asm-variant=1 < %s | FileCheck %s
+
+#-----------------------------------------------------------------------------
+# Floating-point arithmetic
+#-----------------------------------------------------------------------------
+
+0x41 0xc0 0x20 0x1e
+0x41 0xc0 0x60 0x1e
+
+# CHECK: fabs s1, s2
+# CHECK: fabs d1, d2
+
+0x41 0x28 0x23 0x1e
+0x41 0x28 0x63 0x1e
+
+# CHECK: fadd s1, s2, s3
+# CHECK: fadd d1, d2, d3
+
+0x41 0x18 0x23 0x1e
+0x41 0x18 0x63 0x1e
+
+# CHECK: fdiv s1, s2, s3
+# CHECK: fdiv d1, d2, d3
+
+0x41 0x10 0x03 0x1f
+0x41 0x10 0x43 0x1f
+
+# CHECK: fmadd s1, s2, s3, s4
+# CHECK: fmadd d1, d2, d3, d4
+
+0x41 0x48 0x23 0x1e
+0x41 0x48 0x63 0x1e
+0x41 0x68 0x23 0x1e
+0x41 0x68 0x63 0x1e
+
+# CHECK: fmax   s1, s2, s3
+# CHECK: fmax   d1, d2, d3
+# CHECK: fmaxnm s1, s2, s3
+# CHECK: fmaxnm d1, d2, d3
+
+0x41 0x58 0x23 0x1e
+0x41 0x58 0x63 0x1e
+0x41 0x78 0x23 0x1e
+0x41 0x78 0x63 0x1e
+
+# CHECK: fmin   s1, s2, s3
+# CHECK: fmin   d1, d2, d3
+# CHECK: fminnm s1, s2, s3
+# CHECK: fminnm d1, d2, d3
+
+0x41 0x90 0x03 0x1f
+0x41 0x90 0x43 0x1f
+
+# CHECK: fmsub s1, s2, s3, s4
+# CHECK: fmsub d1, d2, d3, d4
+
+0x41 0x08 0x23 0x1e
+0x41 0x08 0x63 0x1e
+
+# CHECK: fmul s1, s2, s3
+# CHECK: fmul d1, d2, d3
+
+0x41 0x40 0x21 0x1e
+0x41 0x40 0x61 0x1e
+
+# CHECK: fneg s1, s2
+# CHECK: fneg d1, d2
+
+0x41 0x10 0x23 0x1f
+0x41 0x10 0x63 0x1f
+
+# CHECK: fnmadd s1, s2, s3, s4
+# CHECK: fnmadd d1, d2, d3, d4
+
+0x41 0x90 0x23 0x1f
+0x41 0x90 0x63 0x1f
+
+# CHECK: fnmsub s1, s2, s3, s4
+# CHECK: fnmsub d1, d2, d3, d4
+
+0x41 0x88 0x23 0x1e
+0x41 0x88 0x63 0x1e
+
+# CHECK: fnmul s1, s2, s3
+# CHECK: fnmul d1, d2, d3
+
+0x41 0xc0 0x21 0x1e
+0x41 0xc0 0x61 0x1e
+
+# CHECK: fsqrt s1, s2
+# CHECK: fsqrt d1, d2
+
+0x41 0x38 0x23 0x1e
+0x41 0x38 0x63 0x1e
+
+# CHECK: fsub s1, s2, s3
+# CHECK: fsub d1, d2, d3
+
+#-----------------------------------------------------------------------------
+# Floating-point comparison
+#-----------------------------------------------------------------------------
+
+0x20 0x04 0x22 0x1e
+0x20 0x04 0x62 0x1e
+0x30 0x04 0x22 0x1e
+0x30 0x04 0x62 0x1e
+
+# CHECK: fccmp  s1, s2, #0, eq
+# CHECK: fccmp  d1, d2, #0, eq
+# CHECK: fccmpe s1, s2, #0, eq
+# CHECK: fccmpe d1, d2, #0, eq
+
+0x20 0x20 0x22 0x1e
+0x20 0x20 0x62 0x1e
+0x28 0x20 0x20 0x1e
+0x28 0x20 0x60 0x1e
+0x30 0x20 0x22 0x1e
+0x30 0x20 0x62 0x1e
+0x38 0x20 0x20 0x1e
+0x38 0x20 0x60 0x1e
+
+# CHECK: fcmp  s1, s2
+# CHECK: fcmp  d1, d2
+# CHECK: fcmp  s1, #0.0
+# CHECK: fcmp  d1, #0.0
+# CHECK: fcmpe s1, s2
+# CHECK: fcmpe d1, d2
+# CHECK: fcmpe s1, #0.0
+# CHECK: fcmpe d1, #0.0
+
+#-----------------------------------------------------------------------------
+# Floating-point conditional select
+#-----------------------------------------------------------------------------
+
+0x41 0x0c 0x23 0x1e
+0x41 0x0c 0x63 0x1e
+
+# CHECK: fcsel s1, s2, s3, eq
+# CHECK: fcsel d1, d2, d3, eq
+
+#-----------------------------------------------------------------------------
+# Floating-point convert
+#-----------------------------------------------------------------------------
+
+0x41 0xc0 0x63 0x1e
+0x41 0x40 0x62 0x1e
+0x41 0xc0 0xe2 0x1e
+0x41 0x40 0xe2 0x1e
+0x41 0xc0 0x22 0x1e
+0x41 0xc0 0x23 0x1e
+
+# CHECK: fcvt h1, d2
+# CHECK: fcvt s1, d2
+# CHECK: fcvt d1, h2
+# CHECK: fcvt s1, h2
+# CHECK: fcvt d1, s2
+# CHECK: fcvt h1, s2
+
+0x41 0x00 0x44 0x1e
+0x41 0x04 0x44 0x1e
+0x41 0x00 0x44 0x9e
+0x41 0x04 0x44 0x9e
+0x41 0x00 0x04 0x1e
+0x41 0x04 0x04 0x1e
+0x41 0x00 0x04 0x9e
+0x41 0x04 0x04 0x9e
+
+#-----------------------------------------------------------------------------
+# Floating-point move
+#-----------------------------------------------------------------------------
+
+0x41 0x00 0x27 0x1e
+0x41 0x00 0x26 0x1e
+0x41 0x00 0x67 0x9e
+0x41 0x00 0x66 0x9e
+
+# CHECK: fmov s1, w2
+# CHECK: fmov w1, s2
+# CHECK: fmov d1, x2
+# CHECK: fmov x1, d2
+
+0x01 0x10 0x28 0x1e
+0x01 0x10 0x68 0x1e
+0x01 0xf0 0x7b 0x1e
+0x01 0xf0 0x6b 0x1e
+
+# CHECK: fmov s1, #0.12500000
+# CHECK: fmov d1, #0.12500000
+# CHECK: fmov d1, #-0.48437500
+# CHECK: fmov d1, #0.48437500
+
+0x41 0x40 0x20 0x1e
+0x41 0x40 0x60 0x1e
+
+# CHECK: fmov s1, s2
+# CHECK: fmov d1, d2
+
+#-----------------------------------------------------------------------------
+# Floating-point round to integral
+#-----------------------------------------------------------------------------
+
+0x41 0x40 0x26 0x1e
+0x41 0x40 0x66 0x1e
+
+# CHECK: frinta s1, s2
+# CHECK: frinta d1, d2
+
+0x41 0xc0 0x27 0x1e
+0x41 0xc0 0x67 0x1e
+
+# CHECK: frinti s1, s2
+# CHECK: frinti d1, d2
+
+0x41 0x40 0x25 0x1e
+0x41 0x40 0x65 0x1e
+
+# CHECK: frintm s1, s2
+# CHECK: frintm d1, d2
+
+0x41 0x40 0x24 0x1e
+0x41 0x40 0x64 0x1e
+
+# CHECK: frintn s1, s2
+# CHECK: frintn d1, d2
+
+0x41 0xc0 0x24 0x1e
+0x41 0xc0 0x64 0x1e
+
+# CHECK: frintp s1, s2
+# CHECK: frintp d1, d2
+
+0x41 0x40 0x27 0x1e
+0x41 0x40 0x67 0x1e
+
+# CHECK: frintx s1, s2
+# CHECK: frintx d1, d2
+
+0x41 0xc0 0x25 0x1e
+0x41 0xc0 0x65 0x1e
+
+# CHECK: frintz s1, s2
+# CHECK: frintz d1, d2
+
+  0x00 0x3c 0xe0 0x7e
+  0x00 0x8c 0xe0 0x5e
+
+# CHECK: cmhs d0, d0, d0
+# CHECK: cmtst d0, d0, d0
+
+0x00 0x00 0xaf 0x9e
+0x00 0x00 0xae 0x9e
+
+# CHECK: fmov.d v0[1], x0
+# CHECK: fmov.d x0, v0[1]
+
diff --git a/test/MC/Disassembler/AArch64/arm64-system.txt b/test/MC/Disassembler/AArch64/arm64-system.txt
new file mode 100644
index 0000000..9027a60
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/arm64-system.txt
@@ -0,0 +1,62 @@
+# RUN: llvm-mc -triple arm64-apple-darwin --disassemble < %s | FileCheck %s
+
+
+#-----------------------------------------------------------------------------
+# Hint encodings
+#-----------------------------------------------------------------------------
+
+  0x1f 0x20 0x03 0xd5
+# CHECK: nop
+  0x9f 0x20 0x03 0xd5
+# CHECK: sev
+  0xbf 0x20 0x03 0xd5
+# CHECK: sevl
+  0x5f 0x20 0x03 0xd5
+# CHECK: wfe
+  0x7f 0x20 0x03 0xd5
+# CHECK: wfi
+  0x3f 0x20 0x03 0xd5
+# CHECK: yield
+
+#-----------------------------------------------------------------------------
+# Single-immediate operand instructions
+#-----------------------------------------------------------------------------
+
+  0x5f 0x3a 0x03 0xd5
+# CHECK: clrex #10
+  0xdf 0x3f 0x03 0xd5
+# CHECK: isb{{$}}
+  0xdf 0x31 0x03 0xd5
+# CHECK: isb #1
+  0xbf 0x33 0x03 0xd5
+# CHECK: dmb osh
+  0x9f 0x37 0x03 0xd5
+# CHECK: dsb nsh
+  0x3f 0x76 0x08 0xd5
+# CHECK: dc ivac
+
+#-----------------------------------------------------------------------------
+# Generic system instructions
+#-----------------------------------------------------------------------------
+  0xff 0x05 0x0a 0xd5
+  0xe7 0x6a 0x0f 0xd5
+  0xf4 0x3f 0x2e 0xd5
+  0xbf 0x40 0x00 0xd5
+  0x00 0xb0 0x18 0xd5
+  0x00 0xb0 0x38 0xd5
+
+# CHECK: sys #2, c0, c5, #7
+# CHECK: sys #7, c6, c10, #7, x7
+# CHECK: sysl  x20, #6, c3, c15, #7
+# CHECK: msr  SPSEL, #0
+# CHECK: msr S3_0_C11_C0_0, x0
+# CHECK: mrs x0, S3_0_C11_C0_0
+
+  0x40 0xc0 0x1e 0xd5
+  0x40 0xc0 0x1c 0xd5
+  0x40 0xc0 0x18 0xd5
+
+# CHECK: msr RMR_EL3, x0
+# CHECK: msr RMR_EL2, x0
+# CHECK: msr RMR_EL1, x0
+
diff --git a/test/MC/Disassembler/AArch64/basic-a64-instructions.txt b/test/MC/Disassembler/AArch64/basic-a64-instructions.txt
index 40926b1..397a39e 100644
--- a/test/MC/Disassembler/AArch64/basic-a64-instructions.txt
+++ b/test/MC/Disassembler/AArch64/basic-a64-instructions.txt
@@ -1,4 +1,5 @@
 # RUN: llvm-mc -triple=aarch64 -mattr=+fp-armv8 -disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple=arm64 -mattr=+fp-armv8 -disassemble < %s | FileCheck %s
 
 #------------------------------------------------------------------------------
 # Add/sub (immediate)
@@ -187,7 +188,7 @@
 
 # CHECK: sub      w3, w5, w7
 # CHECK: sub      wzr, w3, w5
-# CHECK: sub      w20, wzr, w4
+# CHECK: {{sub      w20, wzr, w4|neg w20, w4}}
 # CHECK: sub      w4, w6, wzr
 # CHECK: sub      w11, w13, w15
 # CHECK: sub      w9, w3, wzr, lsl #10
@@ -214,7 +215,7 @@
 
 # CHECK: sub      x3, x5, x7
 # CHECK: sub      xzr, x3, x5
-# CHECK: sub      x20, xzr, x4
+# CHECK: {{sub      x20, xzr, x4|neg x20, x4}}
 # CHECK: sub      x4, x6, xzr
 # CHECK: sub      x11, x13, x15
 # CHECK: sub      x9, x3, xzr, lsl #10
@@ -241,7 +242,7 @@
 
 # CHECK: subs     w3, w5, w7
 # CHECK: cmp      w3, w5
-# CHECK: subs     w20, wzr, w4
+# CHECK: {{subs     w20, wzr, w4|negs w20, w4}}
 # CHECK: subs     w4, w6, wzr
 # CHECK: subs     w11, w13, w15
 # CHECK: subs     w9, w3, wzr, lsl #10
@@ -268,7 +269,7 @@
 
 # CHECK: subs     x3, x5, x7
 # CHECK: cmp      x3, x5
-# CHECK: subs     x20, xzr, x4
+# CHECK: {{subs     x20, xzr, x4|negs x20, x4}}
 # CHECK: subs     x4, x6, xzr
 # CHECK: subs     x11, x13, x15
 # CHECK: subs     x9, x3, xzr, lsl #10
@@ -393,18 +394,18 @@
 0x9f 0xde 0x95 0xeb
 0xdf 0xfe 0x97 0xeb
 
-# CHECK: sub      w29, wzr, w30
-# CHECK: sub      w30, wzr, wzr
-# CHECK: sub      wzr, wzr, w0
-# CHECK: sub      w28, wzr, w27
-# CHECK: sub      w26, wzr, w25, lsl #29
-# CHECK: sub      w24, wzr, w23, lsl #31
-# CHECK: sub      w22, wzr, w21, lsr #0
-# CHECK: sub      w20, wzr, w19, lsr #1
-# CHECK: sub      w18, wzr, w17, lsr #31
-# CHECK: sub      w16, wzr, w15, asr #0
-# CHECK: sub      w14, wzr, w13, asr #12
-# CHECK: sub      w12, wzr, w11, asr #31
+# CHECK: {{sub      w29, wzr|neg w29}}, w30
+# CHECK: {{sub      w30, wzr|neg w30}}, wzr
+# CHECK: {{sub      wzr, wzr|neg wzr}}, w0
+# CHECK: {{sub      w28, wzr|neg w28}}, w27
+# CHECK: {{sub      w26, wzr|neg w26}}, w25, lsl #29
+# CHECK: {{sub      w24, wzr|neg w24}}, w23, lsl #31
+# CHECK: {{sub      w22, wzr|neg w22}}, w21, lsr #0
+# CHECK: {{sub      w20, wzr|neg w20}}, w19, lsr #1
+# CHECK: {{sub      w18, wzr|neg w18}}, w17, lsr #31
+# CHECK: {{sub      w16, wzr|neg w16}}, w15, asr #0
+# CHECK: {{sub      w14, wzr|neg w14}}, w13, asr #12
+# CHECK: {{sub      w12, wzr|neg w12}}, w11, asr #31
 0xfd 0x3 0x1e 0x4b
 0xfe 0x3 0x1f 0x4b
 0xff 0x3 0x0 0x4b
@@ -418,18 +419,18 @@
 0xee 0x33 0x8d 0x4b
 0xec 0x7f 0x8b 0x4b
 
-# CHECK: sub      x29, xzr, x30
-# CHECK: sub      x30, xzr, xzr
-# CHECK: sub      xzr, xzr, x0
-# CHECK: sub      x28, xzr, x27
-# CHECK: sub      x26, xzr, x25, lsl #29
-# CHECK: sub      x24, xzr, x23, lsl #31
-# CHECK: sub      x22, xzr, x21, lsr #0
-# CHECK: sub      x20, xzr, x19, lsr #1
-# CHECK: sub      x18, xzr, x17, lsr #31
-# CHECK: sub      x16, xzr, x15, asr #0
-# CHECK: sub      x14, xzr, x13, asr #12
-# CHECK: sub      x12, xzr, x11, asr #31
+# CHECK: {{sub      x29, xzr|neg x29}}, x30
+# CHECK: {{sub      x30, xzr|neg x30}}, xzr
+# CHECK: {{sub      xzr, xzr|neg xzr}}, x0
+# CHECK: {{sub      x28, xzr|neg x28}}, x27
+# CHECK: {{sub      x26, xzr|neg x26}}, x25, lsl #29
+# CHECK: {{sub      x24, xzr|neg x24}}, x23, lsl #31
+# CHECK: {{sub      x22, xzr|neg x22}}, x21, lsr #0
+# CHECK: {{sub      x20, xzr|neg x20}}, x19, lsr #1
+# CHECK: {{sub      x18, xzr|neg x18}}, x17, lsr #31
+# CHECK: {{sub      x16, xzr|neg x16}}, x15, asr #0
+# CHECK: {{sub      x14, xzr|neg x14}}, x13, asr #12
+# CHECK: {{sub      x12, xzr|neg x12}}, x11, asr #31
 0xfd 0x3 0x1e 0xcb
 0xfe 0x3 0x1f 0xcb
 0xff 0x3 0x0 0xcb
@@ -443,18 +444,18 @@
 0xee 0x33 0x8d 0xcb
 0xec 0x7f 0x8b 0xcb
 
-# CHECK: subs     w29, wzr, w30
-# CHECK: subs     w30, wzr, wzr
+# CHECK: {{subs     w29, wzr|negs w29}}, w30
+# CHECK: {{subs     w30, wzr|negs w30}}, wzr
 # CHECK: cmp      wzr, w0
-# CHECK: subs     w28, wzr, w27
-# CHECK: subs     w26, wzr, w25, lsl #29
-# CHECK: subs     w24, wzr, w23, lsl #31
-# CHECK: subs     w22, wzr, w21, lsr #0
-# CHECK: subs     w20, wzr, w19, lsr #1
-# CHECK: subs     w18, wzr, w17, lsr #31
-# CHECK: subs     w16, wzr, w15, asr #0
-# CHECK: subs     w14, wzr, w13, asr #12
-# CHECK: subs     w12, wzr, w11, asr #31
+# CHECK: {{subs     w28, wzr|negs w28}}, w27
+# CHECK: {{subs     w26, wzr|negs w26}}, w25, lsl #29
+# CHECK: {{subs     w24, wzr|negs w24}}, w23, lsl #31
+# CHECK: {{subs     w22, wzr|negs w22}}, w21, lsr #0
+# CHECK: {{subs     w20, wzr|negs w20}}, w19, lsr #1
+# CHECK: {{subs     w18, wzr|negs w18}}, w17, lsr #31
+# CHECK: {{subs     w16, wzr|negs w16}}, w15, asr #0
+# CHECK: {{subs     w14, wzr|negs w14}}, w13, asr #12
+# CHECK: {{subs     w12, wzr|negs w12}}, w11, asr #31
 0xfd 0x3 0x1e 0x6b
 0xfe 0x3 0x1f 0x6b
 0xff 0x3 0x0 0x6b
@@ -468,18 +469,18 @@
 0xee 0x33 0x8d 0x6b
 0xec 0x7f 0x8b 0x6b
 
-# CHECK: subs     x29, xzr, x30
-# CHECK: subs     x30, xzr, xzr
+# CHECK: {{subs     x29, xzr|negs x29}}, x30
+# CHECK: {{subs     x30, xzr|negs x30}}, xzr
 # CHECK: cmp      xzr, x0
-# CHECK: subs     x28, xzr, x27
-# CHECK: subs     x26, xzr, x25, lsl #29
-# CHECK: subs     x24, xzr, x23, lsl #31
-# CHECK: subs     x22, xzr, x21, lsr #0
-# CHECK: subs     x20, xzr, x19, lsr #1
-# CHECK: subs     x18, xzr, x17, lsr #31
-# CHECK: subs     x16, xzr, x15, asr #0
-# CHECK: subs     x14, xzr, x13, asr #12
-# CHECK: subs     x12, xzr, x11, asr #31
+# CHECK: {{subs     x28, xzr|negs x28}}, x27
+# CHECK: {{subs     x26, xzr|negs x26}}, x25, lsl #29
+# CHECK: {{subs     x24, xzr|negs x24}}, x23, lsl #31
+# CHECK: {{subs     x22, xzr|negs x22}}, x21, lsr #0
+# CHECK: {{subs     x20, xzr|negs x20}}, x19, lsr #1
+# CHECK: {{subs     x18, xzr|negs x18}}, x17, lsr #31
+# CHECK: {{subs     x16, xzr|negs x16}}, x15, asr #0
+# CHECK: {{subs     x14, xzr|negs x14}}, x13, asr #12
+# CHECK: {{subs     x12, xzr|negs x12}}, x11, asr #31
 0xfd 0x3 0x1e 0xeb
 0xfe 0x3 0x1f 0xeb
 0xff 0x3 0x0 0xeb
@@ -940,21 +941,21 @@
 0xe5 0x27 0x86 0xda
 0x7 0x35 0x9f 0xda
 
-# CHECK: csinc    w3, wzr, wzr, ne
-# CHECK: csinc    x9, xzr, xzr, mi
-# CHECK: csinv    w20, wzr, wzr, eq
-# CHECK: csinv    x30, xzr, xzr, lt
+# CHECK: cset    w3, eq
+# CHECK: cset    x9, pl
+# CHECK: csetm    w20, ne
+# CHECK: csetm    x30, ge
 0xe3 0x17 0x9f 0x1a
 0xe9 0x47 0x9f 0x9a
 0xf4 0x3 0x9f 0x5a
 0xfe 0xb3 0x9f 0xda
 
-# CHECK: csinc    w3, w5, w5, le
-# CHECK: csinc    wzr, w4, w4, gt
-# CHECK: csinc    w9, wzr, wzr, ge
-# CHECK: csinc    x3, x5, x5, le
-# CHECK: csinc    xzr, x4, x4, gt
-# CHECK: csinc    x9, xzr, xzr, ge
+# CHECK: cinc    w3, w5, gt
+# CHECK: cinc    wzr, w4, le
+# CHECK: cset    w9, lt
+# CHECK: cinc    x3, x5, gt
+# CHECK: cinc    xzr, x4, le
+# CHECK: cset    x9, lt
 0xa3 0xd4 0x85 0x1a
 0x9f 0xc4 0x84 0x1a
 0xe9 0xa7 0x9f 0x1a
@@ -962,12 +963,12 @@
 0x9f 0xc4 0x84 0x9a
 0xe9 0xa7 0x9f 0x9a
 
-# CHECK: csinv    w3, w5, w5, le
-# CHECK: csinv    wzr, w4, w4, gt
-# CHECK: csinv    w9, wzr, wzr, ge
-# CHECK: csinv    x3, x5, x5, le
-# CHECK: csinv    xzr, x4, x4, gt
-# CHECK: csinv    x9, xzr, xzr, ge
+# CHECK: cinv    w3, w5, gt
+# CHECK: cinv    wzr, w4, le
+# CHECK: csetm    w9, lt
+# CHECK: cinv    x3, x5, gt
+# CHECK: cinv    xzr, x4, le
+# CHECK: csetm    x9, lt
 0xa3 0xd0 0x85 0x5a
 0x9f 0xc0 0x84 0x5a
 0xe9 0xa3 0x9f 0x5a
@@ -975,12 +976,12 @@
 0x9f 0xc0 0x84 0xda
 0xe9 0xa3 0x9f 0xda
 
-# CHECK: csneg     w3, w5, w5, le
-# CHECK: csneg     wzr, w4, w4, gt
-# CHECK: csneg     w9, wzr, wzr, ge
-# CHECK: csneg     x3, x5, x5, le
-# CHECK: csneg     xzr, x4, x4, gt
-# CHECK: csneg     x9, xzr, xzr, ge
+# CHECK: cneg     w3, w5, gt
+# CHECK: cneg     wzr, w4, le
+# CHECK: cneg     w9, wzr, lt
+# CHECK: cneg     x3, x5, gt
+# CHECK: cneg     xzr, x4, le
+# CHECK: cneg     x9, xzr, lt
 0xa3 0xd4 0x85 0x5a
 0x9f 0xc4 0x84 0x5a
 0xe9 0xa7 0x9f 0x5a
@@ -1243,22 +1244,22 @@
 #------------------------------------------------------------------------------
 
 # CHECK: svc      #0
-# CHECK: svc      #65535
+# CHECK: svc      #{{65535|0xffff}}
 0x1 0x0 0x0 0xd4
 0xe1 0xff 0x1f 0xd4
 
-# CHECK: hvc      #1
-# CHECK: smc      #12000
-# CHECK: brk      #12
-# CHECK: hlt      #123
+# CHECK: hvc      #{{1|0x1}}
+# CHECK: smc      #{{12000|0x2ee0}}
+# CHECK: brk      #{{12|0xc}}
+# CHECK: hlt      #{{123|0x7b}}
 0x22 0x0 0x0 0xd4
 0x3 0xdc 0x5 0xd4
 0x80 0x1 0x20 0xd4
 0x60 0xf 0x40 0xd4
 
-# CHECK: dcps1    #42
-# CHECK: dcps2    #9
-# CHECK: dcps3    #1000
+# CHECK: dcps1    #{{42|0x2a}}
+# CHECK: dcps2    #{{9|0x9}}
+# CHECK: dcps3    #{{1000|0x3e8}}
 0x41 0x5 0xa0 0xd4
 0x22 0x1 0xa0 0xd4
 0x3 0x7d 0xa0 0xd4
@@ -1284,9 +1285,9 @@
 0xa3 0x3c 0xc7 0x93
 0xab 0xfd 0xd1 0x93
 
-# CHECK: extr     x19, x23, x23, #24
-# CHECK: extr     x29, xzr, xzr, #63
-# CHECK: extr     w9, w13, w13, #31
+# CHECK: ror     x19, x23, #24
+# CHECK: ror     x29, xzr, #63
+# CHECK: ror     w9, w13, #31
 0xf3 0x62 0xd7 0x93
 0xfd 0xff 0xdf 0x93
 0xa9 0x7d 0x8d 0x13
@@ -2353,23 +2354,23 @@
 0xec 0xff 0xbf 0x3d
 
 # CHECK: prfm    pldl1keep, [sp, #8]
-# CHECK: prfm    pldl1strm, [x3, #0]
+# CHECK: prfm    pldl1strm, [x3{{(, #0)?}}]
 # CHECK: prfm    pldl2keep, [x5, #16]
-# CHECK: prfm    pldl2strm, [x2, #0]
-# CHECK: prfm    pldl3keep, [x5, #0]
-# CHECK: prfm    pldl3strm, [x6, #0]
+# CHECK: prfm    pldl2strm, [x2{{(, #0)?}}]
+# CHECK: prfm    pldl3keep, [x5{{(, #0)?}}]
+# CHECK: prfm    pldl3strm, [x6{{(, #0)?}}]
 # CHECK: prfm    plil1keep, [sp, #8]
-# CHECK: prfm    plil1strm, [x3, #0]
+# CHECK: prfm    plil1strm, [x3{{(, #0)?}}]
 # CHECK: prfm    plil2keep, [x5, #16]
-# CHECK: prfm    plil2strm, [x2, #0]
-# CHECK: prfm    plil3keep, [x5, #0]
-# CHECK: prfm    plil3strm, [x6, #0]
+# CHECK: prfm    plil2strm, [x2{{(, #0)?}}]
+# CHECK: prfm    plil3keep, [x5{{(, #0)?}}]
+# CHECK: prfm    plil3strm, [x6{{(, #0)?}}]
 # CHECK: prfm    pstl1keep, [sp, #8]
-# CHECK: prfm    pstl1strm, [x3, #0]
+# CHECK: prfm    pstl1strm, [x3{{(, #0)?}}]
 # CHECK: prfm    pstl2keep, [x5, #16]
-# CHECK: prfm    pstl2strm, [x2, #0]
-# CHECK: prfm    pstl3keep, [x5, #0]
-# CHECK: prfm    pstl3strm, [x6, #0]
+# CHECK: prfm    pstl2strm, [x2{{(, #0)?}}]
+# CHECK: prfm    pstl3keep, [x5{{(, #0)?}}]
+# CHECK: prfm    pstl3strm, [x6{{(, #0)?}}]
 0xe0 0x07 0x80 0xf9
 0x61 0x00 0x80 0xf9
 0xa2 0x08 0x80 0xf9
@@ -2722,15 +2723,15 @@
 0xff 0xc7 0x0 0x52
 0x30 0xc6 0x1 0x52
 
-# CHECK: ands     wzr, w18, #0xcccccccc
+# CHECK: {{ands     wzr,|tst}} w18, #0xcccccccc
 # CHECK: ands     w19, w20, #0x33333333
 # CHECK: ands     w21, w22, #0x99999999
 0x5f 0xe6 0x2 0x72
 0x93 0xe6 0x0 0x72
 0xd5 0xe6 0x1 0x72
 
-# CHECK: ands     wzr, w3, #0xaaaaaaaa
-# CHECK: ands     wzr, wzr, #0x55555555
+# CHECK: {{ands     wzr,|tst}} w3, #0xaaaaaaaa
+# CHECK: {{ands     wzr,|tst}} wzr, #0x55555555
 0x7f 0xf0 0x1 0x72
 0xff 0xf3 0x0 0x72
 
@@ -2762,15 +2763,15 @@
 0xff 0xc7 0x0 0xd2
 0x30 0xc6 0x1 0xd2
 
-# CHECK: ands     xzr, x18, #0xcccccccccccccccc
+# CHECK: {{ands     xzr,|tst}} x18, #0xcccccccccccccccc
 # CHECK: ands     x19, x20, #0x3333333333333333
 # CHECK: ands     x21, x22, #0x9999999999999999
 0x5f 0xe6 0x2 0xf2
 0x93 0xe6 0x0 0xf2
 0xd5 0xe6 0x1 0xf2
 
-# CHECK: ands     xzr, x3, #0xaaaaaaaaaaaaaaaa
-# CHECK: ands     xzr, xzr, #0x5555555555555555
+# CHECK: {{ands     xzr,|tst}} x3, #0xaaaaaaaaaaaaaaaa
+# CHECK: {{ands     xzr,|tst}} xzr, #0x5555555555555555
 0x7f 0xf0 0x1 0xf2
 0xff 0xf3 0x0 0xf2
 
@@ -2858,15 +2859,15 @@
 # limitation in InstAlias. Lots of the "mov[nz]" instructions should
 # be "mov".
 
-# CHECK: movz     w1, #65535
+# CHECK: movz     w1, #{{65535|0xffff}}
 # CHECK: movz     w2, #0, lsl #16
-# CHECK: movn     w2, #1234
+# CHECK: movn     w2, #{{1234|0x4d2}}
 0xe1 0xff 0x9f 0x52
 0x2 0x0 0xa0 0x52
 0x42 0x9a 0x80 0x12
 
-# CHECK: movz     x2, #1234, lsl #32
-# CHECK: movk     xzr, #4321, lsl #48
+# CHECK: movz     x2, #{{1234|0x4d2}}, lsl #32
+# CHECK: movk     xzr, #{{4321|0x10e1}}, lsl #48
 0x42 0x9a 0xc0 0xd2
 0x3f 0x1c 0xe2 0xf2
 
@@ -2906,7 +2907,7 @@
 #------------------------------------------------------------------------------
 
 # CHECK: nop
-# CHECK: hint     #127
+# CHECK: hint     #{{127|0x7f}}
 # CHECK: nop
 # CHECK: yield
 # CHECK: wfe
@@ -2998,9 +2999,9 @@
 0xdf 0x3f 0x3 0xd5
 0xdf 0x3c 0x3 0xd5
 
-# CHECK: msr      spsel, #0
-# CHECK: msr      daifset, #15
-# CHECK: msr      daifclr, #12
+# CHECK: msr      {{spsel|SPSEL}}, #0
+# CHECK: msr      {{daifset|DAIFSET}}, #15
+# CHECK: msr      {{daifclr|DAIFCLR}}, #12
 0xbf 0x40 0x0 0xd5
 0xdf 0x4f 0x3 0xd5
 0xff 0x4c 0x3 0xd5
@@ -3014,21 +3015,21 @@
 0xe9 0x59 0x2f 0xd5
 0x41 0xff 0x28 0xd5
 
-# CHECK: sys     #0, c7, c1, #0, xzr
-# CHECK: sys     #0, c7, c5, #0, xzr
-# CHECK: sys     #3, c7, c5, #1, x9
+# CHECK: {{sys     #0, c7, c1, #0|ic ialluis}}
+# CHECK: {{sys     #0, c7, c5, #0|ic iallu}}
+# CHECK: {{sys     #3, c7, c5, #1|ic ivau}}, x9
 0x1f 0x71 0x8 0xd5
 0x1f 0x75 0x8 0xd5
 0x29 0x75 0xb 0xd5
 
-# CHECK: sys     #3, c7, c4, #1, x12
-# CHECK: sys     #0, c7, c6, #1, xzr
-# CHECK: sys     #0, c7, c6, #2, x2
-# CHECK: sys     #3, c7, c10, #1, x9
-# CHECK: sys     #0, c7, c10, #2, x10
-# CHECK: sys     #3, c7, c11, #1, x0
-# CHECK: sys     #3, c7, c14, #1, x3
-# CHECK: sys     #0, c7, c14, #2, x30
+# CHECK: {{sys     #3, c7, c4, #1|dc zva}}, x12
+# CHECK: {{sys     #0, c7, c6, #1|dc ivac}}
+# CHECK: {{sys     #0, c7, c6, #2|dc isw}}, x2
+# CHECK: {{sys     #3, c7, c10, #1|dc cvac}}, x9
+# CHECK: {{sys     #0, c7, c10, #2|dc csw}}, x10
+# CHECK: {{sys     #3, c7, c11, #1|dc cvau}}, x0
+# CHECK: {{sys     #3, c7, c14, #1|dc civac}}, x3
+# CHECK: {{sys     #0, c7, c14, #2|dc cisw}}, x30
 0x2c 0x74 0xb 0xd5
 0x3f 0x76 0x8 0xd5
 0x42 0x76 0x8 0xd5
@@ -3039,559 +3040,559 @@
 0x5e 0x7e 0x8 0xd5
 
 
-# CHECK: msr      teecr32_el1, x12
-# CHECK: msr      osdtrrx_el1, x12
-# CHECK: msr      mdccint_el1, x12
-# CHECK: msr      mdscr_el1, x12
-# CHECK: msr      osdtrtx_el1, x12
-# CHECK: msr      dbgdtr_el0, x12
-# CHECK: msr      dbgdtrtx_el0, x12
-# CHECK: msr      oseccr_el1, x12
-# CHECK: msr      dbgvcr32_el2, x12
-# CHECK: msr      dbgbvr0_el1, x12
-# CHECK: msr      dbgbvr1_el1, x12
-# CHECK: msr      dbgbvr2_el1, x12
-# CHECK: msr      dbgbvr3_el1, x12
-# CHECK: msr      dbgbvr4_el1, x12
-# CHECK: msr      dbgbvr5_el1, x12
-# CHECK: msr      dbgbvr6_el1, x12
-# CHECK: msr      dbgbvr7_el1, x12
-# CHECK: msr      dbgbvr8_el1, x12
-# CHECK: msr      dbgbvr9_el1, x12
-# CHECK: msr      dbgbvr10_el1, x12
-# CHECK: msr      dbgbvr11_el1, x12
-# CHECK: msr      dbgbvr12_el1, x12
-# CHECK: msr      dbgbvr13_el1, x12
-# CHECK: msr      dbgbvr14_el1, x12
-# CHECK: msr      dbgbvr15_el1, x12
-# CHECK: msr      dbgbcr0_el1, x12
-# CHECK: msr      dbgbcr1_el1, x12
-# CHECK: msr      dbgbcr2_el1, x12
-# CHECK: msr      dbgbcr3_el1, x12
-# CHECK: msr      dbgbcr4_el1, x12
-# CHECK: msr      dbgbcr5_el1, x12
-# CHECK: msr      dbgbcr6_el1, x12
-# CHECK: msr      dbgbcr7_el1, x12
-# CHECK: msr      dbgbcr8_el1, x12
-# CHECK: msr      dbgbcr9_el1, x12
-# CHECK: msr      dbgbcr10_el1, x12
-# CHECK: msr      dbgbcr11_el1, x12
-# CHECK: msr      dbgbcr12_el1, x12
-# CHECK: msr      dbgbcr13_el1, x12
-# CHECK: msr      dbgbcr14_el1, x12
-# CHECK: msr      dbgbcr15_el1, x12
-# CHECK: msr      dbgwvr0_el1, x12
-# CHECK: msr      dbgwvr1_el1, x12
-# CHECK: msr      dbgwvr2_el1, x12
-# CHECK: msr      dbgwvr3_el1, x12
-# CHECK: msr      dbgwvr4_el1, x12
-# CHECK: msr      dbgwvr5_el1, x12
-# CHECK: msr      dbgwvr6_el1, x12
-# CHECK: msr      dbgwvr7_el1, x12
-# CHECK: msr      dbgwvr8_el1, x12
-# CHECK: msr      dbgwvr9_el1, x12
-# CHECK: msr      dbgwvr10_el1, x12
-# CHECK: msr      dbgwvr11_el1, x12
-# CHECK: msr      dbgwvr12_el1, x12
-# CHECK: msr      dbgwvr13_el1, x12
-# CHECK: msr      dbgwvr14_el1, x12
-# CHECK: msr      dbgwvr15_el1, x12
-# CHECK: msr      dbgwcr0_el1, x12
-# CHECK: msr      dbgwcr1_el1, x12
-# CHECK: msr      dbgwcr2_el1, x12
-# CHECK: msr      dbgwcr3_el1, x12
-# CHECK: msr      dbgwcr4_el1, x12
-# CHECK: msr      dbgwcr5_el1, x12
-# CHECK: msr      dbgwcr6_el1, x12
-# CHECK: msr      dbgwcr7_el1, x12
-# CHECK: msr      dbgwcr8_el1, x12
-# CHECK: msr      dbgwcr9_el1, x12
-# CHECK: msr      dbgwcr10_el1, x12
-# CHECK: msr      dbgwcr11_el1, x12
-# CHECK: msr      dbgwcr12_el1, x12
-# CHECK: msr      dbgwcr13_el1, x12
-# CHECK: msr      dbgwcr14_el1, x12
-# CHECK: msr      dbgwcr15_el1, x12
-# CHECK: msr      teehbr32_el1, x12
-# CHECK: msr      oslar_el1, x12
-# CHECK: msr      osdlr_el1, x12
-# CHECK: msr      dbgprcr_el1, x12
-# CHECK: msr      dbgclaimset_el1, x12
-# CHECK: msr      dbgclaimclr_el1, x12
-# CHECK: msr      csselr_el1, x12
-# CHECK: msr      vpidr_el2, x12
-# CHECK: msr      vmpidr_el2, x12
-# CHECK: msr      sctlr_el1, x12
-# CHECK: msr      sctlr_el2, x12
-# CHECK: msr      sctlr_el3, x12
-# CHECK: msr      actlr_el1, x12
-# CHECK: msr      actlr_el2, x12
-# CHECK: msr      actlr_el3, x12
-# CHECK: msr      cpacr_el1, x12
-# CHECK: msr      hcr_el2, x12
-# CHECK: msr      scr_el3, x12
-# CHECK: msr      mdcr_el2, x12
-# CHECK: msr      sder32_el3, x12
-# CHECK: msr      cptr_el2, x12
-# CHECK: msr      cptr_el3, x12
-# CHECK: msr      hstr_el2, x12
-# CHECK: msr      hacr_el2, x12
-# CHECK: msr      mdcr_el3, x12
-# CHECK: msr      ttbr0_el1, x12
-# CHECK: msr      ttbr0_el2, x12
-# CHECK: msr      ttbr0_el3, x12
-# CHECK: msr      ttbr1_el1, x12
-# CHECK: msr      tcr_el1, x12
-# CHECK: msr      tcr_el2, x12
-# CHECK: msr      tcr_el3, x12
-# CHECK: msr      vttbr_el2, x12
-# CHECK: msr      vtcr_el2, x12
-# CHECK: msr      dacr32_el2, x12
-# CHECK: msr      spsr_el1, x12
-# CHECK: msr      spsr_el2, x12
-# CHECK: msr      spsr_el3, x12
-# CHECK: msr      elr_el1, x12
-# CHECK: msr      elr_el2, x12
-# CHECK: msr      elr_el3, x12
-# CHECK: msr      sp_el0, x12
-# CHECK: msr      sp_el1, x12
-# CHECK: msr      sp_el2, x12
-# CHECK: msr      spsel, x12
-# CHECK: msr      nzcv, x12
-# CHECK: msr      daif, x12
-# CHECK: msr      currentel, x12
-# CHECK: msr      spsr_irq, x12
-# CHECK: msr      spsr_abt, x12
-# CHECK: msr      spsr_und, x12
-# CHECK: msr      spsr_fiq, x12
-# CHECK: msr      fpcr, x12
-# CHECK: msr      fpsr, x12
-# CHECK: msr      dspsr_el0, x12
-# CHECK: msr      dlr_el0, x12
-# CHECK: msr      ifsr32_el2, x12
-# CHECK: msr      afsr0_el1, x12
-# CHECK: msr      afsr0_el2, x12
-# CHECK: msr      afsr0_el3, x12
-# CHECK: msr      afsr1_el1, x12
-# CHECK: msr      afsr1_el2, x12
-# CHECK: msr      afsr1_el3, x12
-# CHECK: msr      esr_el1, x12
-# CHECK: msr      esr_el2, x12
-# CHECK: msr      esr_el3, x12
-# CHECK: msr      fpexc32_el2, x12
-# CHECK: msr      far_el1, x12
-# CHECK: msr      far_el2, x12
-# CHECK: msr      far_el3, x12
-# CHECK: msr      hpfar_el2, x12
-# CHECK: msr      par_el1, x12
-# CHECK: msr      pmcr_el0, x12
-# CHECK: msr      pmcntenset_el0, x12
-# CHECK: msr      pmcntenclr_el0, x12
-# CHECK: msr      pmovsclr_el0, x12
-# CHECK: msr      pmselr_el0, x12
-# CHECK: msr      pmccntr_el0, x12
-# CHECK: msr      pmxevtyper_el0, x12
-# CHECK: msr      pmxevcntr_el0, x12
-# CHECK: msr      pmuserenr_el0, x12
-# CHECK: msr      pmintenset_el1, x12
-# CHECK: msr      pmintenclr_el1, x12
-# CHECK: msr      pmovsset_el0, x12
-# CHECK: msr      mair_el1, x12
-# CHECK: msr      mair_el2, x12
-# CHECK: msr      mair_el3, x12
-# CHECK: msr      amair_el1, x12
-# CHECK: msr      amair_el2, x12
-# CHECK: msr      amair_el3, x12
-# CHECK: msr      vbar_el1, x12
-# CHECK: msr      vbar_el2, x12
-# CHECK: msr      vbar_el3, x12
-# CHECK: msr      rmr_el1, x12
-# CHECK: msr      rmr_el2, x12
-# CHECK: msr      rmr_el3, x12
-# CHECK: msr      tpidr_el0, x12
-# CHECK: msr      tpidr_el2, x12
-# CHECK: msr      tpidr_el3, x12
-# CHECK: msr      tpidrro_el0, x12
-# CHECK: msr      tpidr_el1, x12
-# CHECK: msr      cntfrq_el0, x12
-# CHECK: msr      cntvoff_el2, x12
-# CHECK: msr      cntkctl_el1, x12
-# CHECK: msr      cnthctl_el2, x12
-# CHECK: msr      cntp_tval_el0, x12
-# CHECK: msr      cnthp_tval_el2, x12
-# CHECK: msr      cntps_tval_el1, x12
-# CHECK: msr      cntp_ctl_el0, x12
-# CHECK: msr      cnthp_ctl_el2, x12
-# CHECK: msr      cntps_ctl_el1, x12
-# CHECK: msr      cntp_cval_el0, x12
-# CHECK: msr      cnthp_cval_el2, x12
-# CHECK: msr      cntps_cval_el1, x12
-# CHECK: msr      cntv_tval_el0, x12
-# CHECK: msr      cntv_ctl_el0, x12
-# CHECK: msr      cntv_cval_el0, x12
-# CHECK: msr      pmevcntr0_el0, x12
-# CHECK: msr      pmevcntr1_el0, x12
-# CHECK: msr      pmevcntr2_el0, x12
-# CHECK: msr      pmevcntr3_el0, x12
-# CHECK: msr      pmevcntr4_el0, x12
-# CHECK: msr      pmevcntr5_el0, x12
-# CHECK: msr      pmevcntr6_el0, x12
-# CHECK: msr      pmevcntr7_el0, x12
-# CHECK: msr      pmevcntr8_el0, x12
-# CHECK: msr      pmevcntr9_el0, x12
-# CHECK: msr      pmevcntr10_el0, x12
-# CHECK: msr      pmevcntr11_el0, x12
-# CHECK: msr      pmevcntr12_el0, x12
-# CHECK: msr      pmevcntr13_el0, x12
-# CHECK: msr      pmevcntr14_el0, x12
-# CHECK: msr      pmevcntr15_el0, x12
-# CHECK: msr      pmevcntr16_el0, x12
-# CHECK: msr      pmevcntr17_el0, x12
-# CHECK: msr      pmevcntr18_el0, x12
-# CHECK: msr      pmevcntr19_el0, x12
-# CHECK: msr      pmevcntr20_el0, x12
-# CHECK: msr      pmevcntr21_el0, x12
-# CHECK: msr      pmevcntr22_el0, x12
-# CHECK: msr      pmevcntr23_el0, x12
-# CHECK: msr      pmevcntr24_el0, x12
-# CHECK: msr      pmevcntr25_el0, x12
-# CHECK: msr      pmevcntr26_el0, x12
-# CHECK: msr      pmevcntr27_el0, x12
-# CHECK: msr      pmevcntr28_el0, x12
-# CHECK: msr      pmevcntr29_el0, x12
-# CHECK: msr      pmevcntr30_el0, x12
-# CHECK: msr      pmccfiltr_el0, x12
-# CHECK: msr      pmevtyper0_el0, x12
-# CHECK: msr      pmevtyper1_el0, x12
-# CHECK: msr      pmevtyper2_el0, x12
-# CHECK: msr      pmevtyper3_el0, x12
-# CHECK: msr      pmevtyper4_el0, x12
-# CHECK: msr      pmevtyper5_el0, x12
-# CHECK: msr      pmevtyper6_el0, x12
-# CHECK: msr      pmevtyper7_el0, x12
-# CHECK: msr      pmevtyper8_el0, x12
-# CHECK: msr      pmevtyper9_el0, x12
-# CHECK: msr      pmevtyper10_el0, x12
-# CHECK: msr      pmevtyper11_el0, x12
-# CHECK: msr      pmevtyper12_el0, x12
-# CHECK: msr      pmevtyper13_el0, x12
-# CHECK: msr      pmevtyper14_el0, x12
-# CHECK: msr      pmevtyper15_el0, x12
-# CHECK: msr      pmevtyper16_el0, x12
-# CHECK: msr      pmevtyper17_el0, x12
-# CHECK: msr      pmevtyper18_el0, x12
-# CHECK: msr      pmevtyper19_el0, x12
-# CHECK: msr      pmevtyper20_el0, x12
-# CHECK: msr      pmevtyper21_el0, x12
-# CHECK: msr      pmevtyper22_el0, x12
-# CHECK: msr      pmevtyper23_el0, x12
-# CHECK: msr      pmevtyper24_el0, x12
-# CHECK: msr      pmevtyper25_el0, x12
-# CHECK: msr      pmevtyper26_el0, x12
-# CHECK: msr      pmevtyper27_el0, x12
-# CHECK: msr      pmevtyper28_el0, x12
-# CHECK: msr      pmevtyper29_el0, x12
-# CHECK: msr      pmevtyper30_el0, x12
-# CHECK: mrs      x9, teecr32_el1
-# CHECK: mrs      x9, osdtrrx_el1
-# CHECK: mrs      x9, mdccsr_el0
-# CHECK: mrs      x9, mdccint_el1
-# CHECK: mrs      x9, mdscr_el1
-# CHECK: mrs      x9, osdtrtx_el1
-# CHECK: mrs      x9, dbgdtr_el0
-# CHECK: mrs      x9, dbgdtrrx_el0
-# CHECK: mrs      x9, oseccr_el1
-# CHECK: mrs      x9, dbgvcr32_el2
-# CHECK: mrs      x9, dbgbvr0_el1
-# CHECK: mrs      x9, dbgbvr1_el1
-# CHECK: mrs      x9, dbgbvr2_el1
-# CHECK: mrs      x9, dbgbvr3_el1
-# CHECK: mrs      x9, dbgbvr4_el1
-# CHECK: mrs      x9, dbgbvr5_el1
-# CHECK: mrs      x9, dbgbvr6_el1
-# CHECK: mrs      x9, dbgbvr7_el1
-# CHECK: mrs      x9, dbgbvr8_el1
-# CHECK: mrs      x9, dbgbvr9_el1
-# CHECK: mrs      x9, dbgbvr10_el1
-# CHECK: mrs      x9, dbgbvr11_el1
-# CHECK: mrs      x9, dbgbvr12_el1
-# CHECK: mrs      x9, dbgbvr13_el1
-# CHECK: mrs      x9, dbgbvr14_el1
-# CHECK: mrs      x9, dbgbvr15_el1
-# CHECK: mrs      x9, dbgbcr0_el1
-# CHECK: mrs      x9, dbgbcr1_el1
-# CHECK: mrs      x9, dbgbcr2_el1
-# CHECK: mrs      x9, dbgbcr3_el1
-# CHECK: mrs      x9, dbgbcr4_el1
-# CHECK: mrs      x9, dbgbcr5_el1
-# CHECK: mrs      x9, dbgbcr6_el1
-# CHECK: mrs      x9, dbgbcr7_el1
-# CHECK: mrs      x9, dbgbcr8_el1
-# CHECK: mrs      x9, dbgbcr9_el1
-# CHECK: mrs      x9, dbgbcr10_el1
-# CHECK: mrs      x9, dbgbcr11_el1
-# CHECK: mrs      x9, dbgbcr12_el1
-# CHECK: mrs      x9, dbgbcr13_el1
-# CHECK: mrs      x9, dbgbcr14_el1
-# CHECK: mrs      x9, dbgbcr15_el1
-# CHECK: mrs      x9, dbgwvr0_el1
-# CHECK: mrs      x9, dbgwvr1_el1
-# CHECK: mrs      x9, dbgwvr2_el1
-# CHECK: mrs      x9, dbgwvr3_el1
-# CHECK: mrs      x9, dbgwvr4_el1
-# CHECK: mrs      x9, dbgwvr5_el1
-# CHECK: mrs      x9, dbgwvr6_el1
-# CHECK: mrs      x9, dbgwvr7_el1
-# CHECK: mrs      x9, dbgwvr8_el1
-# CHECK: mrs      x9, dbgwvr9_el1
-# CHECK: mrs      x9, dbgwvr10_el1
-# CHECK: mrs      x9, dbgwvr11_el1
-# CHECK: mrs      x9, dbgwvr12_el1
-# CHECK: mrs      x9, dbgwvr13_el1
-# CHECK: mrs      x9, dbgwvr14_el1
-# CHECK: mrs      x9, dbgwvr15_el1
-# CHECK: mrs      x9, dbgwcr0_el1
-# CHECK: mrs      x9, dbgwcr1_el1
-# CHECK: mrs      x9, dbgwcr2_el1
-# CHECK: mrs      x9, dbgwcr3_el1
-# CHECK: mrs      x9, dbgwcr4_el1
-# CHECK: mrs      x9, dbgwcr5_el1
-# CHECK: mrs      x9, dbgwcr6_el1
-# CHECK: mrs      x9, dbgwcr7_el1
-# CHECK: mrs      x9, dbgwcr8_el1
-# CHECK: mrs      x9, dbgwcr9_el1
-# CHECK: mrs      x9, dbgwcr10_el1
-# CHECK: mrs      x9, dbgwcr11_el1
-# CHECK: mrs      x9, dbgwcr12_el1
-# CHECK: mrs      x9, dbgwcr13_el1
-# CHECK: mrs      x9, dbgwcr14_el1
-# CHECK: mrs      x9, dbgwcr15_el1
-# CHECK: mrs      x9, mdrar_el1
-# CHECK: mrs      x9, teehbr32_el1
-# CHECK: mrs      x9, oslsr_el1
-# CHECK: mrs      x9, osdlr_el1
-# CHECK: mrs      x9, dbgprcr_el1
-# CHECK: mrs      x9, dbgclaimset_el1
-# CHECK: mrs      x9, dbgclaimclr_el1
-# CHECK: mrs      x9, dbgauthstatus_el1
-# CHECK: mrs      x9, midr_el1
-# CHECK: mrs      x9, ccsidr_el1
-# CHECK: mrs      x9, csselr_el1
-# CHECK: mrs      x9, vpidr_el2
-# CHECK: mrs      x9, clidr_el1
-# CHECK: mrs      x9, ctr_el0
-# CHECK: mrs      x9, mpidr_el1
-# CHECK: mrs      x9, vmpidr_el2
-# CHECK: mrs      x9, revidr_el1
-# CHECK: mrs      x9, aidr_el1
-# CHECK: mrs      x9, dczid_el0
-# CHECK: mrs      x9, id_pfr0_el1
-# CHECK: mrs      x9, id_pfr1_el1
-# CHECK: mrs      x9, id_dfr0_el1
-# CHECK: mrs      x9, id_afr0_el1
-# CHECK: mrs      x9, id_mmfr0_el1
-# CHECK: mrs      x9, id_mmfr1_el1
-# CHECK: mrs      x9, id_mmfr2_el1
-# CHECK: mrs      x9, id_mmfr3_el1
-# CHECK: mrs      x9, id_isar0_el1
-# CHECK: mrs      x9, id_isar1_el1
-# CHECK: mrs      x9, id_isar2_el1
-# CHECK: mrs      x9, id_isar3_el1
-# CHECK: mrs      x9, id_isar4_el1
-# CHECK: mrs      x9, id_isar5_el1
-# CHECK: mrs      x9, mvfr0_el1
-# CHECK: mrs      x9, mvfr1_el1
-# CHECK: mrs      x9, mvfr2_el1
-# CHECK: mrs      x9, id_aa64pfr0_el1
-# CHECK: mrs      x9, id_aa64pfr1_el1
-# CHECK: mrs      x9, id_aa64dfr0_el1
-# CHECK: mrs      x9, id_aa64dfr1_el1
-# CHECK: mrs      x9, id_aa64afr0_el1
-# CHECK: mrs      x9, id_aa64afr1_el1
-# CHECK: mrs      x9, id_aa64isar0_el1
-# CHECK: mrs      x9, id_aa64isar1_el1
-# CHECK: mrs      x9, id_aa64mmfr0_el1
-# CHECK: mrs      x9, id_aa64mmfr1_el1
-# CHECK: mrs      x9, sctlr_el1
-# CHECK: mrs      x9, sctlr_el2
-# CHECK: mrs      x9, sctlr_el3
-# CHECK: mrs      x9, actlr_el1
-# CHECK: mrs      x9, actlr_el2
-# CHECK: mrs      x9, actlr_el3
-# CHECK: mrs      x9, cpacr_el1
-# CHECK: mrs      x9, hcr_el2
-# CHECK: mrs      x9, scr_el3
-# CHECK: mrs      x9, mdcr_el2
-# CHECK: mrs      x9, sder32_el3
-# CHECK: mrs      x9, cptr_el2
-# CHECK: mrs      x9, cptr_el3
-# CHECK: mrs      x9, hstr_el2
-# CHECK: mrs      x9, hacr_el2
-# CHECK: mrs      x9, mdcr_el3
-# CHECK: mrs      x9, ttbr0_el1
-# CHECK: mrs      x9, ttbr0_el2
-# CHECK: mrs      x9, ttbr0_el3
-# CHECK: mrs      x9, ttbr1_el1
-# CHECK: mrs      x9, tcr_el1
-# CHECK: mrs      x9, tcr_el2
-# CHECK: mrs      x9, tcr_el3
-# CHECK: mrs      x9, vttbr_el2
-# CHECK: mrs      x9, vtcr_el2
-# CHECK: mrs      x9, dacr32_el2
-# CHECK: mrs      x9, spsr_el1
-# CHECK: mrs      x9, spsr_el2
-# CHECK: mrs      x9, spsr_el3
-# CHECK: mrs      x9, elr_el1
-# CHECK: mrs      x9, elr_el2
-# CHECK: mrs      x9, elr_el3
-# CHECK: mrs      x9, sp_el0
-# CHECK: mrs      x9, sp_el1
-# CHECK: mrs      x9, sp_el2
-# CHECK: mrs      x9, spsel
-# CHECK: mrs      x9, nzcv
-# CHECK: mrs      x9, daif
-# CHECK: mrs      x9, currentel
-# CHECK: mrs      x9, spsr_irq
-# CHECK: mrs      x9, spsr_abt
-# CHECK: mrs      x9, spsr_und
-# CHECK: mrs      x9, spsr_fiq
-# CHECK: mrs      x9, fpcr
-# CHECK: mrs      x9, fpsr
-# CHECK: mrs      x9, dspsr_el0
-# CHECK: mrs      x9, dlr_el0
-# CHECK: mrs      x9, ifsr32_el2
-# CHECK: mrs      x9, afsr0_el1
-# CHECK: mrs      x9, afsr0_el2
-# CHECK: mrs      x9, afsr0_el3
-# CHECK: mrs      x9, afsr1_el1
-# CHECK: mrs      x9, afsr1_el2
-# CHECK: mrs      x9, afsr1_el3
-# CHECK: mrs      x9, esr_el1
-# CHECK: mrs      x9, esr_el2
-# CHECK: mrs      x9, esr_el3
-# CHECK: mrs      x9, fpexc32_el2
-# CHECK: mrs      x9, far_el1
-# CHECK: mrs      x9, far_el2
-# CHECK: mrs      x9, far_el3
-# CHECK: mrs      x9, hpfar_el2
-# CHECK: mrs      x9, par_el1
-# CHECK: mrs      x9, pmcr_el0
-# CHECK: mrs      x9, pmcntenset_el0
-# CHECK: mrs      x9, pmcntenclr_el0
-# CHECK: mrs      x9, pmovsclr_el0
-# CHECK: mrs      x9, pmselr_el0
-# CHECK: mrs      x9, pmceid0_el0
-# CHECK: mrs      x9, pmceid1_el0
-# CHECK: mrs      x9, pmccntr_el0
-# CHECK: mrs      x9, pmxevtyper_el0
-# CHECK: mrs      x9, pmxevcntr_el0
-# CHECK: mrs      x9, pmuserenr_el0
-# CHECK: mrs      x9, pmintenset_el1
-# CHECK: mrs      x9, pmintenclr_el1
-# CHECK: mrs      x9, pmovsset_el0
-# CHECK: mrs      x9, mair_el1
-# CHECK: mrs      x9, mair_el2
-# CHECK: mrs      x9, mair_el3
-# CHECK: mrs      x9, amair_el1
-# CHECK: mrs      x9, amair_el2
-# CHECK: mrs      x9, amair_el3
-# CHECK: mrs      x9, vbar_el1
-# CHECK: mrs      x9, vbar_el2
-# CHECK: mrs      x9, vbar_el3
-# CHECK: mrs      x9, rvbar_el1
-# CHECK: mrs      x9, rvbar_el2
-# CHECK: mrs      x9, rvbar_el3
-# CHECK: mrs      x9, rmr_el1
-# CHECK: mrs      x9, rmr_el2
-# CHECK: mrs      x9, rmr_el3
-# CHECK: mrs      x9, isr_el1
-# CHECK: mrs      x9, contextidr_el1
-# CHECK: mrs      x9, tpidr_el0
-# CHECK: mrs      x9, tpidr_el2
-# CHECK: mrs      x9, tpidr_el3
-# CHECK: mrs      x9, tpidrro_el0
-# CHECK: mrs      x9, tpidr_el1
-# CHECK: mrs      x9, cntfrq_el0
-# CHECK: mrs      x9, cntpct_el0
-# CHECK: mrs      x9, cntvct_el0
-# CHECK: mrs      x9, cntvoff_el2
-# CHECK: mrs      x9, cntkctl_el1
-# CHECK: mrs      x9, cnthctl_el2
-# CHECK: mrs      x9, cntp_tval_el0
-# CHECK: mrs      x9, cnthp_tval_el2
-# CHECK: mrs      x9, cntps_tval_el1
-# CHECK: mrs      x9, cntp_ctl_el0
-# CHECK: mrs      x9, cnthp_ctl_el2
-# CHECK: mrs      x9, cntps_ctl_el1
-# CHECK: mrs      x9, cntp_cval_el0
-# CHECK: mrs      x9, cnthp_cval_el2
-# CHECK: mrs      x9, cntps_cval_el1
-# CHECK: mrs      x9, cntv_tval_el0
-# CHECK: mrs      x9, cntv_ctl_el0
-# CHECK: mrs      x9, cntv_cval_el0
-# CHECK: mrs      x9, pmevcntr0_el0
-# CHECK: mrs      x9, pmevcntr1_el0
-# CHECK: mrs      x9, pmevcntr2_el0
-# CHECK: mrs      x9, pmevcntr3_el0
-# CHECK: mrs      x9, pmevcntr4_el0
-# CHECK: mrs      x9, pmevcntr5_el0
-# CHECK: mrs      x9, pmevcntr6_el0
-# CHECK: mrs      x9, pmevcntr7_el0
-# CHECK: mrs      x9, pmevcntr8_el0
-# CHECK: mrs      x9, pmevcntr9_el0
-# CHECK: mrs      x9, pmevcntr10_el0
-# CHECK: mrs      x9, pmevcntr11_el0
-# CHECK: mrs      x9, pmevcntr12_el0
-# CHECK: mrs      x9, pmevcntr13_el0
-# CHECK: mrs      x9, pmevcntr14_el0
-# CHECK: mrs      x9, pmevcntr15_el0
-# CHECK: mrs      x9, pmevcntr16_el0
-# CHECK: mrs      x9, pmevcntr17_el0
-# CHECK: mrs      x9, pmevcntr18_el0
-# CHECK: mrs      x9, pmevcntr19_el0
-# CHECK: mrs      x9, pmevcntr20_el0
-# CHECK: mrs      x9, pmevcntr21_el0
-# CHECK: mrs      x9, pmevcntr22_el0
-# CHECK: mrs      x9, pmevcntr23_el0
-# CHECK: mrs      x9, pmevcntr24_el0
-# CHECK: mrs      x9, pmevcntr25_el0
-# CHECK: mrs      x9, pmevcntr26_el0
-# CHECK: mrs      x9, pmevcntr27_el0
-# CHECK: mrs      x9, pmevcntr28_el0
-# CHECK: mrs      x9, pmevcntr29_el0
-# CHECK: mrs      x9, pmevcntr30_el0
-# CHECK: mrs      x9, pmccfiltr_el0
-# CHECK: mrs      x9, pmevtyper0_el0
-# CHECK: mrs      x9, pmevtyper1_el0
-# CHECK: mrs      x9, pmevtyper2_el0
-# CHECK: mrs      x9, pmevtyper3_el0
-# CHECK: mrs      x9, pmevtyper4_el0
-# CHECK: mrs      x9, pmevtyper5_el0
-# CHECK: mrs      x9, pmevtyper6_el0
-# CHECK: mrs      x9, pmevtyper7_el0
-# CHECK: mrs      x9, pmevtyper8_el0
-# CHECK: mrs      x9, pmevtyper9_el0
-# CHECK: mrs      x9, pmevtyper10_el0
-# CHECK: mrs      x9, pmevtyper11_el0
-# CHECK: mrs      x9, pmevtyper12_el0
-# CHECK: mrs      x9, pmevtyper13_el0
-# CHECK: mrs      x9, pmevtyper14_el0
-# CHECK: mrs      x9, pmevtyper15_el0
-# CHECK: mrs      x9, pmevtyper16_el0
-# CHECK: mrs      x9, pmevtyper17_el0
-# CHECK: mrs      x9, pmevtyper18_el0
-# CHECK: mrs      x9, pmevtyper19_el0
-# CHECK: mrs      x9, pmevtyper20_el0
-# CHECK: mrs      x9, pmevtyper21_el0
-# CHECK: mrs      x9, pmevtyper22_el0
-# CHECK: mrs      x9, pmevtyper23_el0
-# CHECK: mrs      x9, pmevtyper24_el0
-# CHECK: mrs      x9, pmevtyper25_el0
-# CHECK: mrs      x9, pmevtyper26_el0
-# CHECK: mrs      x9, pmevtyper27_el0
-# CHECK: mrs      x9, pmevtyper28_el0
-# CHECK: mrs      x9, pmevtyper29_el0
-# CHECK: mrs      x9, pmevtyper30_el0
+# CHECK: msr      {{teecr32_el1|TEECR32_EL1}}, x12
+# CHECK: msr      {{osdtrrx_el1|OSDTRRX_EL1}}, x12
+# CHECK: msr      {{mdccint_el1|MDCCINT_EL1}}, x12
+# CHECK: msr      {{mdscr_el1|MDSCR_EL1}}, x12
+# CHECK: msr      {{osdtrtx_el1|OSDTRTX_EL1}}, x12
+# CHECK: msr      {{dbgdtr_el0|DBGDTR_EL0}}, x12
+# CHECK: msr      {{dbgdtrtx_el0|DBGDTRTX_EL0}}, x12
+# CHECK: msr      {{oseccr_el1|OSECCR_EL1}}, x12
+# CHECK: msr      {{dbgvcr32_el2|DBGVCR32_EL2}}, x12
+# CHECK: msr      {{dbgbvr0_el1|DBGBVR0_EL1}}, x12
+# CHECK: msr      {{dbgbvr1_el1|DBGBVR1_EL1}}, x12
+# CHECK: msr      {{dbgbvr2_el1|DBGBVR2_EL1}}, x12
+# CHECK: msr      {{dbgbvr3_el1|DBGBVR3_EL1}}, x12
+# CHECK: msr      {{dbgbvr4_el1|DBGBVR4_EL1}}, x12
+# CHECK: msr      {{dbgbvr5_el1|DBGBVR5_EL1}}, x12
+# CHECK: msr      {{dbgbvr6_el1|DBGBVR6_EL1}}, x12
+# CHECK: msr      {{dbgbvr7_el1|DBGBVR7_EL1}}, x12
+# CHECK: msr      {{dbgbvr8_el1|DBGBVR8_EL1}}, x12
+# CHECK: msr      {{dbgbvr9_el1|DBGBVR9_EL1}}, x12
+# CHECK: msr      {{dbgbvr10_el1|DBGBVR10_EL1}}, x12
+# CHECK: msr      {{dbgbvr11_el1|DBGBVR11_EL1}}, x12
+# CHECK: msr      {{dbgbvr12_el1|DBGBVR12_EL1}}, x12
+# CHECK: msr      {{dbgbvr13_el1|DBGBVR13_EL1}}, x12
+# CHECK: msr      {{dbgbvr14_el1|DBGBVR14_EL1}}, x12
+# CHECK: msr      {{dbgbvr15_el1|DBGBVR15_EL1}}, x12
+# CHECK: msr      {{dbgbcr0_el1|DBGBCR0_EL1}}, x12
+# CHECK: msr      {{dbgbcr1_el1|DBGBCR1_EL1}}, x12
+# CHECK: msr      {{dbgbcr2_el1|DBGBCR2_EL1}}, x12
+# CHECK: msr      {{dbgbcr3_el1|DBGBCR3_EL1}}, x12
+# CHECK: msr      {{dbgbcr4_el1|DBGBCR4_EL1}}, x12
+# CHECK: msr      {{dbgbcr5_el1|DBGBCR5_EL1}}, x12
+# CHECK: msr      {{dbgbcr6_el1|DBGBCR6_EL1}}, x12
+# CHECK: msr      {{dbgbcr7_el1|DBGBCR7_EL1}}, x12
+# CHECK: msr      {{dbgbcr8_el1|DBGBCR8_EL1}}, x12
+# CHECK: msr      {{dbgbcr9_el1|DBGBCR9_EL1}}, x12
+# CHECK: msr      {{dbgbcr10_el1|DBGBCR10_EL1}}, x12
+# CHECK: msr      {{dbgbcr11_el1|DBGBCR11_EL1}}, x12
+# CHECK: msr      {{dbgbcr12_el1|DBGBCR12_EL1}}, x12
+# CHECK: msr      {{dbgbcr13_el1|DBGBCR13_EL1}}, x12
+# CHECK: msr      {{dbgbcr14_el1|DBGBCR14_EL1}}, x12
+# CHECK: msr      {{dbgbcr15_el1|DBGBCR15_EL1}}, x12
+# CHECK: msr      {{dbgwvr0_el1|DBGWVR0_EL1}}, x12
+# CHECK: msr      {{dbgwvr1_el1|DBGWVR1_EL1}}, x12
+# CHECK: msr      {{dbgwvr2_el1|DBGWVR2_EL1}}, x12
+# CHECK: msr      {{dbgwvr3_el1|DBGWVR3_EL1}}, x12
+# CHECK: msr      {{dbgwvr4_el1|DBGWVR4_EL1}}, x12
+# CHECK: msr      {{dbgwvr5_el1|DBGWVR5_EL1}}, x12
+# CHECK: msr      {{dbgwvr6_el1|DBGWVR6_EL1}}, x12
+# CHECK: msr      {{dbgwvr7_el1|DBGWVR7_EL1}}, x12
+# CHECK: msr      {{dbgwvr8_el1|DBGWVR8_EL1}}, x12
+# CHECK: msr      {{dbgwvr9_el1|DBGWVR9_EL1}}, x12
+# CHECK: msr      {{dbgwvr10_el1|DBGWVR10_EL1}}, x12
+# CHECK: msr      {{dbgwvr11_el1|DBGWVR11_EL1}}, x12
+# CHECK: msr      {{dbgwvr12_el1|DBGWVR12_EL1}}, x12
+# CHECK: msr      {{dbgwvr13_el1|DBGWVR13_EL1}}, x12
+# CHECK: msr      {{dbgwvr14_el1|DBGWVR14_EL1}}, x12
+# CHECK: msr      {{dbgwvr15_el1|DBGWVR15_EL1}}, x12
+# CHECK: msr      {{dbgwcr0_el1|DBGWCR0_EL1}}, x12
+# CHECK: msr      {{dbgwcr1_el1|DBGWCR1_EL1}}, x12
+# CHECK: msr      {{dbgwcr2_el1|DBGWCR2_EL1}}, x12
+# CHECK: msr      {{dbgwcr3_el1|DBGWCR3_EL1}}, x12
+# CHECK: msr      {{dbgwcr4_el1|DBGWCR4_EL1}}, x12
+# CHECK: msr      {{dbgwcr5_el1|DBGWCR5_EL1}}, x12
+# CHECK: msr      {{dbgwcr6_el1|DBGWCR6_EL1}}, x12
+# CHECK: msr      {{dbgwcr7_el1|DBGWCR7_EL1}}, x12
+# CHECK: msr      {{dbgwcr8_el1|DBGWCR8_EL1}}, x12
+# CHECK: msr      {{dbgwcr9_el1|DBGWCR9_EL1}}, x12
+# CHECK: msr      {{dbgwcr10_el1|DBGWCR10_EL1}}, x12
+# CHECK: msr      {{dbgwcr11_el1|DBGWCR11_EL1}}, x12
+# CHECK: msr      {{dbgwcr12_el1|DBGWCR12_EL1}}, x12
+# CHECK: msr      {{dbgwcr13_el1|DBGWCR13_EL1}}, x12
+# CHECK: msr      {{dbgwcr14_el1|DBGWCR14_EL1}}, x12
+# CHECK: msr      {{dbgwcr15_el1|DBGWCR15_EL1}}, x12
+# CHECK: msr      {{teehbr32_el1|TEEHBR32_EL1}}, x12
+# CHECK: msr      {{oslar_el1|OSLAR_EL1}}, x12
+# CHECK: msr      {{osdlr_el1|OSDLR_EL1}}, x12
+# CHECK: msr      {{dbgprcr_el1|DBGPRCR_EL1}}, x12
+# CHECK: msr      {{dbgclaimset_el1|DBGCLAIMSET_EL1}}, x12
+# CHECK: msr      {{dbgclaimclr_el1|DBGCLAIMCLR_EL1}}, x12
+# CHECK: msr      {{csselr_el1|CSSELR_EL1}}, x12
+# CHECK: msr      {{vpidr_el2|VPIDR_EL2}}, x12
+# CHECK: msr      {{vmpidr_el2|VMPIDR_EL2}}, x12
+# CHECK: msr      {{sctlr_el1|SCTLR_EL1}}, x12
+# CHECK: msr      {{sctlr_el2|SCTLR_EL2}}, x12
+# CHECK: msr      {{sctlr_el3|SCTLR_EL3}}, x12
+# CHECK: msr      {{actlr_el1|ACTLR_EL1}}, x12
+# CHECK: msr      {{actlr_el2|ACTLR_EL2}}, x12
+# CHECK: msr      {{actlr_el3|ACTLR_EL3}}, x12
+# CHECK: msr      {{cpacr_el1|CPACR_EL1}}, x12
+# CHECK: msr      {{hcr_el2|HCR_EL2}}, x12
+# CHECK: msr      {{scr_el3|SCR_EL3}}, x12
+# CHECK: msr      {{mdcr_el2|MDCR_EL2}}, x12
+# CHECK: msr      {{sder32_el3|SDER32_EL3}}, x12
+# CHECK: msr      {{cptr_el2|CPTR_EL2}}, x12
+# CHECK: msr      {{cptr_el3|CPTR_EL3}}, x12
+# CHECK: msr      {{hstr_el2|HSTR_EL2}}, x12
+# CHECK: msr      {{hacr_el2|HACR_EL2}}, x12
+# CHECK: msr      {{mdcr_el3|MDCR_EL3}}, x12
+# CHECK: msr      {{ttbr0_el1|TTBR0_EL1}}, x12
+# CHECK: msr      {{ttbr0_el2|TTBR0_EL2}}, x12
+# CHECK: msr      {{ttbr0_el3|TTBR0_EL3}}, x12
+# CHECK: msr      {{ttbr1_el1|TTBR1_EL1}}, x12
+# CHECK: msr      {{tcr_el1|TCR_EL1}}, x12
+# CHECK: msr      {{tcr_el2|TCR_EL2}}, x12
+# CHECK: msr      {{tcr_el3|TCR_EL3}}, x12
+# CHECK: msr      {{vttbr_el2|VTTBR_EL2}}, x12
+# CHECK: msr      {{vtcr_el2|VTCR_EL2}}, x12
+# CHECK: msr      {{dacr32_el2|DACR32_EL2}}, x12
+# CHECK: msr      {{spsr_el1|SPSR_EL1}}, x12
+# CHECK: msr      {{spsr_el2|SPSR_EL2}}, x12
+# CHECK: msr      {{spsr_el3|SPSR_EL3}}, x12
+# CHECK: msr      {{elr_el1|ELR_EL1}}, x12
+# CHECK: msr      {{elr_el2|ELR_EL2}}, x12
+# CHECK: msr      {{elr_el3|ELR_EL3}}, x12
+# CHECK: msr      {{sp_el0|SP_EL0}}, x12
+# CHECK: msr      {{sp_el1|SP_EL1}}, x12
+# CHECK: msr      {{sp_el2|SP_EL2}}, x12
+# CHECK: msr      {{spsel|SPSEL}}, x12
+# CHECK: msr      {{nzcv|NZCV}}, x12
+# CHECK: msr      {{daif|DAIF}}, x12
+# CHECK: msr      {{currentel|CURRENTEL}}, x12
+# CHECK: msr      {{spsr_irq|SPSR_IRQ}}, x12
+# CHECK: msr      {{spsr_abt|SPSR_ABT}}, x12
+# CHECK: msr      {{spsr_und|SPSR_UND}}, x12
+# CHECK: msr      {{spsr_fiq|SPSR_FIQ}}, x12
+# CHECK: msr      {{fpcr|FPCR}}, x12
+# CHECK: msr      {{fpsr|FPSR}}, x12
+# CHECK: msr      {{dspsr_el0|DSPSR_EL0}}, x12
+# CHECK: msr      {{dlr_el0|DLR_EL0}}, x12
+# CHECK: msr      {{ifsr32_el2|IFSR32_EL2}}, x12
+# CHECK: msr      {{afsr0_el1|AFSR0_EL1}}, x12
+# CHECK: msr      {{afsr0_el2|AFSR0_EL2}}, x12
+# CHECK: msr      {{afsr0_el3|AFSR0_EL3}}, x12
+# CHECK: msr      {{afsr1_el1|AFSR1_EL1}}, x12
+# CHECK: msr      {{afsr1_el2|AFSR1_EL2}}, x12
+# CHECK: msr      {{afsr1_el3|AFSR1_EL3}}, x12
+# CHECK: msr      {{esr_el1|ESR_EL1}}, x12
+# CHECK: msr      {{esr_el2|ESR_EL2}}, x12
+# CHECK: msr      {{esr_el3|ESR_EL3}}, x12
+# CHECK: msr      {{fpexc32_el2|FPEXC32_EL2}}, x12
+# CHECK: msr      {{far_el1|FAR_EL1}}, x12
+# CHECK: msr      {{far_el2|FAR_EL2}}, x12
+# CHECK: msr      {{far_el3|FAR_EL3}}, x12
+# CHECK: msr      {{hpfar_el2|HPFAR_EL2}}, x12
+# CHECK: msr      {{par_el1|PAR_EL1}}, x12
+# CHECK: msr      {{pmcr_el0|PMCR_EL0}}, x12
+# CHECK: msr      {{pmcntenset_el0|PMCNTENSET_EL0}}, x12
+# CHECK: msr      {{pmcntenclr_el0|PMCNTENCLR_EL0}}, x12
+# CHECK: msr      {{pmovsclr_el0|PMOVSCLR_EL0}}, x12
+# CHECK: msr      {{pmselr_el0|PMSELR_EL0}}, x12
+# CHECK: msr      {{pmccntr_el0|PMCCNTR_EL0}}, x12
+# CHECK: msr      {{pmxevtyper_el0|PMXEVTYPER_EL0}}, x12
+# CHECK: msr      {{pmxevcntr_el0|PMXEVCNTR_EL0}}, x12
+# CHECK: msr      {{pmuserenr_el0|PMUSERENR_EL0}}, x12
+# CHECK: msr      {{pmintenset_el1|PMINTENSET_EL1}}, x12
+# CHECK: msr      {{pmintenclr_el1|PMINTENCLR_EL1}}, x12
+# CHECK: msr      {{pmovsset_el0|PMOVSSET_EL0}}, x12
+# CHECK: msr      {{mair_el1|MAIR_EL1}}, x12
+# CHECK: msr      {{mair_el2|MAIR_EL2}}, x12
+# CHECK: msr      {{mair_el3|MAIR_EL3}}, x12
+# CHECK: msr      {{amair_el1|AMAIR_EL1}}, x12
+# CHECK: msr      {{amair_el2|AMAIR_EL2}}, x12
+# CHECK: msr      {{amair_el3|AMAIR_EL3}}, x12
+# CHECK: msr      {{vbar_el1|VBAR_EL1}}, x12
+# CHECK: msr      {{vbar_el2|VBAR_EL2}}, x12
+# CHECK: msr      {{vbar_el3|VBAR_EL3}}, x12
+# CHECK: msr      {{rmr_el1|RMR_EL1}}, x12
+# CHECK: msr      {{rmr_el2|RMR_EL2}}, x12
+# CHECK: msr      {{rmr_el3|RMR_EL3}}, x12
+# CHECK: msr      {{tpidr_el0|TPIDR_EL0}}, x12
+# CHECK: msr      {{tpidr_el2|TPIDR_EL2}}, x12
+# CHECK: msr      {{tpidr_el3|TPIDR_EL3}}, x12
+# CHECK: msr      {{tpidrro_el0|TPIDRRO_EL0}}, x12
+# CHECK: msr      {{tpidr_el1|TPIDR_EL1}}, x12
+# CHECK: msr      {{cntfrq_el0|CNTFRQ_EL0}}, x12
+# CHECK: msr      {{cntvoff_el2|CNTVOFF_EL2}}, x12
+# CHECK: msr      {{cntkctl_el1|CNTKCTL_EL1}}, x12
+# CHECK: msr      {{cnthctl_el2|CNTHCTL_EL2}}, x12
+# CHECK: msr      {{cntp_tval_el0|CNTP_TVAL_EL0}}, x12
+# CHECK: msr      {{cnthp_tval_el2|CNTHP_TVAL_EL2}}, x12
+# CHECK: msr      {{cntps_tval_el1|CNTPS_TVAL_EL1}}, x12
+# CHECK: msr      {{cntp_ctl_el0|CNTP_CTL_EL0}}, x12
+# CHECK: msr      {{cnthp_ctl_el2|CNTHP_CTL_EL2}}, x12
+# CHECK: msr      {{cntps_ctl_el1|CNTPS_CTL_EL1}}, x12
+# CHECK: msr      {{cntp_cval_el0|CNTP_CVAL_EL0}}, x12
+# CHECK: msr      {{cnthp_cval_el2|CNTHP_CVAL_EL2}}, x12
+# CHECK: msr      {{cntps_cval_el1|CNTPS_CVAL_EL1}}, x12
+# CHECK: msr      {{cntv_tval_el0|CNTV_TVAL_EL0}}, x12
+# CHECK: msr      {{cntv_ctl_el0|CNTV_CTL_EL0}}, x12
+# CHECK: msr      {{cntv_cval_el0|CNTV_CVAL_EL0}}, x12
+# CHECK: msr      {{pmevcntr0_el0|PMEVCNTR0_EL0}}, x12
+# CHECK: msr      {{pmevcntr1_el0|PMEVCNTR1_EL0}}, x12
+# CHECK: msr      {{pmevcntr2_el0|PMEVCNTR2_EL0}}, x12
+# CHECK: msr      {{pmevcntr3_el0|PMEVCNTR3_EL0}}, x12
+# CHECK: msr      {{pmevcntr4_el0|PMEVCNTR4_EL0}}, x12
+# CHECK: msr      {{pmevcntr5_el0|PMEVCNTR5_EL0}}, x12
+# CHECK: msr      {{pmevcntr6_el0|PMEVCNTR6_EL0}}, x12
+# CHECK: msr      {{pmevcntr7_el0|PMEVCNTR7_EL0}}, x12
+# CHECK: msr      {{pmevcntr8_el0|PMEVCNTR8_EL0}}, x12
+# CHECK: msr      {{pmevcntr9_el0|PMEVCNTR9_EL0}}, x12
+# CHECK: msr      {{pmevcntr10_el0|PMEVCNTR10_EL0}}, x12
+# CHECK: msr      {{pmevcntr11_el0|PMEVCNTR11_EL0}}, x12
+# CHECK: msr      {{pmevcntr12_el0|PMEVCNTR12_EL0}}, x12
+# CHECK: msr      {{pmevcntr13_el0|PMEVCNTR13_EL0}}, x12
+# CHECK: msr      {{pmevcntr14_el0|PMEVCNTR14_EL0}}, x12
+# CHECK: msr      {{pmevcntr15_el0|PMEVCNTR15_EL0}}, x12
+# CHECK: msr      {{pmevcntr16_el0|PMEVCNTR16_EL0}}, x12
+# CHECK: msr      {{pmevcntr17_el0|PMEVCNTR17_EL0}}, x12
+# CHECK: msr      {{pmevcntr18_el0|PMEVCNTR18_EL0}}, x12
+# CHECK: msr      {{pmevcntr19_el0|PMEVCNTR19_EL0}}, x12
+# CHECK: msr      {{pmevcntr20_el0|PMEVCNTR20_EL0}}, x12
+# CHECK: msr      {{pmevcntr21_el0|PMEVCNTR21_EL0}}, x12
+# CHECK: msr      {{pmevcntr22_el0|PMEVCNTR22_EL0}}, x12
+# CHECK: msr      {{pmevcntr23_el0|PMEVCNTR23_EL0}}, x12
+# CHECK: msr      {{pmevcntr24_el0|PMEVCNTR24_EL0}}, x12
+# CHECK: msr      {{pmevcntr25_el0|PMEVCNTR25_EL0}}, x12
+# CHECK: msr      {{pmevcntr26_el0|PMEVCNTR26_EL0}}, x12
+# CHECK: msr      {{pmevcntr27_el0|PMEVCNTR27_EL0}}, x12
+# CHECK: msr      {{pmevcntr28_el0|PMEVCNTR28_EL0}}, x12
+# CHECK: msr      {{pmevcntr29_el0|PMEVCNTR29_EL0}}, x12
+# CHECK: msr      {{pmevcntr30_el0|PMEVCNTR30_EL0}}, x12
+# CHECK: msr      {{pmccfiltr_el0|PMCCFILTR_EL0}}, x12
+# CHECK: msr      {{pmevtyper0_el0|PMEVTYPER0_EL0}}, x12
+# CHECK: msr      {{pmevtyper1_el0|PMEVTYPER1_EL0}}, x12
+# CHECK: msr      {{pmevtyper2_el0|PMEVTYPER2_EL0}}, x12
+# CHECK: msr      {{pmevtyper3_el0|PMEVTYPER3_EL0}}, x12
+# CHECK: msr      {{pmevtyper4_el0|PMEVTYPER4_EL0}}, x12
+# CHECK: msr      {{pmevtyper5_el0|PMEVTYPER5_EL0}}, x12
+# CHECK: msr      {{pmevtyper6_el0|PMEVTYPER6_EL0}}, x12
+# CHECK: msr      {{pmevtyper7_el0|PMEVTYPER7_EL0}}, x12
+# CHECK: msr      {{pmevtyper8_el0|PMEVTYPER8_EL0}}, x12
+# CHECK: msr      {{pmevtyper9_el0|PMEVTYPER9_EL0}}, x12
+# CHECK: msr      {{pmevtyper10_el0|PMEVTYPER10_EL0}}, x12
+# CHECK: msr      {{pmevtyper11_el0|PMEVTYPER11_EL0}}, x12
+# CHECK: msr      {{pmevtyper12_el0|PMEVTYPER12_EL0}}, x12
+# CHECK: msr      {{pmevtyper13_el0|PMEVTYPER13_EL0}}, x12
+# CHECK: msr      {{pmevtyper14_el0|PMEVTYPER14_EL0}}, x12
+# CHECK: msr      {{pmevtyper15_el0|PMEVTYPER15_EL0}}, x12
+# CHECK: msr      {{pmevtyper16_el0|PMEVTYPER16_EL0}}, x12
+# CHECK: msr      {{pmevtyper17_el0|PMEVTYPER17_EL0}}, x12
+# CHECK: msr      {{pmevtyper18_el0|PMEVTYPER18_EL0}}, x12
+# CHECK: msr      {{pmevtyper19_el0|PMEVTYPER19_EL0}}, x12
+# CHECK: msr      {{pmevtyper20_el0|PMEVTYPER20_EL0}}, x12
+# CHECK: msr      {{pmevtyper21_el0|PMEVTYPER21_EL0}}, x12
+# CHECK: msr      {{pmevtyper22_el0|PMEVTYPER22_EL0}}, x12
+# CHECK: msr      {{pmevtyper23_el0|PMEVTYPER23_EL0}}, x12
+# CHECK: msr      {{pmevtyper24_el0|PMEVTYPER24_EL0}}, x12
+# CHECK: msr      {{pmevtyper25_el0|PMEVTYPER25_EL0}}, x12
+# CHECK: msr      {{pmevtyper26_el0|PMEVTYPER26_EL0}}, x12
+# CHECK: msr      {{pmevtyper27_el0|PMEVTYPER27_EL0}}, x12
+# CHECK: msr      {{pmevtyper28_el0|PMEVTYPER28_EL0}}, x12
+# CHECK: msr      {{pmevtyper29_el0|PMEVTYPER29_EL0}}, x12
+# CHECK: msr      {{pmevtyper30_el0|PMEVTYPER30_EL0}}, x12
+# CHECK: mrs      x9, {{teecr32_el1|TEECR32_EL1}}
+# CHECK: mrs      x9, {{osdtrrx_el1|OSDTRRX_EL1}}
+# CHECK: mrs      x9, {{mdccsr_el0|MDCCSR_EL0}}
+# CHECK: mrs      x9, {{mdccint_el1|MDCCINT_EL1}}
+# CHECK: mrs      x9, {{mdscr_el1|MDSCR_EL1}}
+# CHECK: mrs      x9, {{osdtrtx_el1|OSDTRTX_EL1}}
+# CHECK: mrs      x9, {{dbgdtr_el0|DBGDTR_EL0}}
+# CHECK: mrs      x9, {{dbgdtrrx_el0|DBGDTRRX_EL0}}
+# CHECK: mrs      x9, {{oseccr_el1|OSECCR_EL1}}
+# CHECK: mrs      x9, {{dbgvcr32_el2|DBGVCR32_EL2}}
+# CHECK: mrs      x9, {{dbgbvr0_el1|DBGBVR0_EL1}}
+# CHECK: mrs      x9, {{dbgbvr1_el1|DBGBVR1_EL1}}
+# CHECK: mrs      x9, {{dbgbvr2_el1|DBGBVR2_EL1}}
+# CHECK: mrs      x9, {{dbgbvr3_el1|DBGBVR3_EL1}}
+# CHECK: mrs      x9, {{dbgbvr4_el1|DBGBVR4_EL1}}
+# CHECK: mrs      x9, {{dbgbvr5_el1|DBGBVR5_EL1}}
+# CHECK: mrs      x9, {{dbgbvr6_el1|DBGBVR6_EL1}}
+# CHECK: mrs      x9, {{dbgbvr7_el1|DBGBVR7_EL1}}
+# CHECK: mrs      x9, {{dbgbvr8_el1|DBGBVR8_EL1}}
+# CHECK: mrs      x9, {{dbgbvr9_el1|DBGBVR9_EL1}}
+# CHECK: mrs      x9, {{dbgbvr10_el1|DBGBVR10_EL1}}
+# CHECK: mrs      x9, {{dbgbvr11_el1|DBGBVR11_EL1}}
+# CHECK: mrs      x9, {{dbgbvr12_el1|DBGBVR12_EL1}}
+# CHECK: mrs      x9, {{dbgbvr13_el1|DBGBVR13_EL1}}
+# CHECK: mrs      x9, {{dbgbvr14_el1|DBGBVR14_EL1}}
+# CHECK: mrs      x9, {{dbgbvr15_el1|DBGBVR15_EL1}}
+# CHECK: mrs      x9, {{dbgbcr0_el1|DBGBCR0_EL1}}
+# CHECK: mrs      x9, {{dbgbcr1_el1|DBGBCR1_EL1}}
+# CHECK: mrs      x9, {{dbgbcr2_el1|DBGBCR2_EL1}}
+# CHECK: mrs      x9, {{dbgbcr3_el1|DBGBCR3_EL1}}
+# CHECK: mrs      x9, {{dbgbcr4_el1|DBGBCR4_EL1}}
+# CHECK: mrs      x9, {{dbgbcr5_el1|DBGBCR5_EL1}}
+# CHECK: mrs      x9, {{dbgbcr6_el1|DBGBCR6_EL1}}
+# CHECK: mrs      x9, {{dbgbcr7_el1|DBGBCR7_EL1}}
+# CHECK: mrs      x9, {{dbgbcr8_el1|DBGBCR8_EL1}}
+# CHECK: mrs      x9, {{dbgbcr9_el1|DBGBCR9_EL1}}
+# CHECK: mrs      x9, {{dbgbcr10_el1|DBGBCR10_EL1}}
+# CHECK: mrs      x9, {{dbgbcr11_el1|DBGBCR11_EL1}}
+# CHECK: mrs      x9, {{dbgbcr12_el1|DBGBCR12_EL1}}
+# CHECK: mrs      x9, {{dbgbcr13_el1|DBGBCR13_EL1}}
+# CHECK: mrs      x9, {{dbgbcr14_el1|DBGBCR14_EL1}}
+# CHECK: mrs      x9, {{dbgbcr15_el1|DBGBCR15_EL1}}
+# CHECK: mrs      x9, {{dbgwvr0_el1|DBGWVR0_EL1}}
+# CHECK: mrs      x9, {{dbgwvr1_el1|DBGWVR1_EL1}}
+# CHECK: mrs      x9, {{dbgwvr2_el1|DBGWVR2_EL1}}
+# CHECK: mrs      x9, {{dbgwvr3_el1|DBGWVR3_EL1}}
+# CHECK: mrs      x9, {{dbgwvr4_el1|DBGWVR4_EL1}}
+# CHECK: mrs      x9, {{dbgwvr5_el1|DBGWVR5_EL1}}
+# CHECK: mrs      x9, {{dbgwvr6_el1|DBGWVR6_EL1}}
+# CHECK: mrs      x9, {{dbgwvr7_el1|DBGWVR7_EL1}}
+# CHECK: mrs      x9, {{dbgwvr8_el1|DBGWVR8_EL1}}
+# CHECK: mrs      x9, {{dbgwvr9_el1|DBGWVR9_EL1}}
+# CHECK: mrs      x9, {{dbgwvr10_el1|DBGWVR10_EL1}}
+# CHECK: mrs      x9, {{dbgwvr11_el1|DBGWVR11_EL1}}
+# CHECK: mrs      x9, {{dbgwvr12_el1|DBGWVR12_EL1}}
+# CHECK: mrs      x9, {{dbgwvr13_el1|DBGWVR13_EL1}}
+# CHECK: mrs      x9, {{dbgwvr14_el1|DBGWVR14_EL1}}
+# CHECK: mrs      x9, {{dbgwvr15_el1|DBGWVR15_EL1}}
+# CHECK: mrs      x9, {{dbgwcr0_el1|DBGWCR0_EL1}}
+# CHECK: mrs      x9, {{dbgwcr1_el1|DBGWCR1_EL1}}
+# CHECK: mrs      x9, {{dbgwcr2_el1|DBGWCR2_EL1}}
+# CHECK: mrs      x9, {{dbgwcr3_el1|DBGWCR3_EL1}}
+# CHECK: mrs      x9, {{dbgwcr4_el1|DBGWCR4_EL1}}
+# CHECK: mrs      x9, {{dbgwcr5_el1|DBGWCR5_EL1}}
+# CHECK: mrs      x9, {{dbgwcr6_el1|DBGWCR6_EL1}}
+# CHECK: mrs      x9, {{dbgwcr7_el1|DBGWCR7_EL1}}
+# CHECK: mrs      x9, {{dbgwcr8_el1|DBGWCR8_EL1}}
+# CHECK: mrs      x9, {{dbgwcr9_el1|DBGWCR9_EL1}}
+# CHECK: mrs      x9, {{dbgwcr10_el1|DBGWCR10_EL1}}
+# CHECK: mrs      x9, {{dbgwcr11_el1|DBGWCR11_EL1}}
+# CHECK: mrs      x9, {{dbgwcr12_el1|DBGWCR12_EL1}}
+# CHECK: mrs      x9, {{dbgwcr13_el1|DBGWCR13_EL1}}
+# CHECK: mrs      x9, {{dbgwcr14_el1|DBGWCR14_EL1}}
+# CHECK: mrs      x9, {{dbgwcr15_el1|DBGWCR15_EL1}}
+# CHECK: mrs      x9, {{mdrar_el1|MDRAR_EL1}}
+# CHECK: mrs      x9, {{teehbr32_el1|TEEHBR32_EL1}}
+# CHECK: mrs      x9, {{oslsr_el1|OSLSR_EL1}}
+# CHECK: mrs      x9, {{osdlr_el1|OSDLR_EL1}}
+# CHECK: mrs      x9, {{dbgprcr_el1|DBGPRCR_EL1}}
+# CHECK: mrs      x9, {{dbgclaimset_el1|DBGCLAIMSET_EL1}}
+# CHECK: mrs      x9, {{dbgclaimclr_el1|DBGCLAIMCLR_EL1}}
+# CHECK: mrs      x9, {{dbgauthstatus_el1|DBGAUTHSTATUS_EL1}}
+# CHECK: mrs      x9, {{midr_el1|MIDR_EL1}}
+# CHECK: mrs      x9, {{ccsidr_el1|CCSIDR_EL1}}
+# CHECK: mrs      x9, {{csselr_el1|CSSELR_EL1}}
+# CHECK: mrs      x9, {{vpidr_el2|VPIDR_EL2}}
+# CHECK: mrs      x9, {{clidr_el1|CLIDR_EL1}}
+# CHECK: mrs      x9, {{ctr_el0|CTR_EL0}}
+# CHECK: mrs      x9, {{mpidr_el1|MPIDR_EL1}}
+# CHECK: mrs      x9, {{vmpidr_el2|VMPIDR_EL2}}
+# CHECK: mrs      x9, {{revidr_el1|REVIDR_EL1}}
+# CHECK: mrs      x9, {{aidr_el1|AIDR_EL1}}
+# CHECK: mrs      x9, {{dczid_el0|DCZID_EL0}}
+# CHECK: mrs      x9, {{id_pfr0_el1|ID_PFR0_EL1}}
+# CHECK: mrs      x9, {{id_pfr1_el1|ID_PFR1_EL1}}
+# CHECK: mrs      x9, {{id_dfr0_el1|ID_DFR0_EL1}}
+# CHECK: mrs      x9, {{id_afr0_el1|ID_AFR0_EL1}}
+# CHECK: mrs      x9, {{id_mmfr0_el1|ID_MMFR0_EL1}}
+# CHECK: mrs      x9, {{id_mmfr1_el1|ID_MMFR1_EL1}}
+# CHECK: mrs      x9, {{id_mmfr2_el1|ID_MMFR2_EL1}}
+# CHECK: mrs      x9, {{id_mmfr3_el1|ID_MMFR3_EL1}}
+# CHECK: mrs      x9, {{id_isar0_el1|ID_ISAR0_EL1}}
+# CHECK: mrs      x9, {{id_isar1_el1|ID_ISAR1_EL1}}
+# CHECK: mrs      x9, {{id_isar2_el1|ID_ISAR2_EL1}}
+# CHECK: mrs      x9, {{id_isar3_el1|ID_ISAR3_EL1}}
+# CHECK: mrs      x9, {{id_isar4_el1|ID_ISAR4_EL1}}
+# CHECK: mrs      x9, {{id_isar5_el1|ID_ISAR5_EL1}}
+# CHECK: mrs      x9, {{mvfr0_el1|MVFR0_EL1}}
+# CHECK: mrs      x9, {{mvfr1_el1|MVFR1_EL1}}
+# CHECK: mrs      x9, {{mvfr2_el1|MVFR2_EL1}}
+# CHECK: mrs      x9, {{id_aa64pfr0_el1|ID_AA64PFR0_EL1}}
+# CHECK: mrs      x9, {{id_aa64pfr1_el1|ID_AA64PFR1_EL1}}
+# CHECK: mrs      x9, {{id_aa64dfr0_el1|ID_AA64DFR0_EL1}}
+# CHECK: mrs      x9, {{id_aa64dfr1_el1|ID_AA64DFR1_EL1}}
+# CHECK: mrs      x9, {{id_aa64afr0_el1|ID_AA64AFR0_EL1}}
+# CHECK: mrs      x9, {{id_aa64afr1_el1|ID_AA64AFR1_EL1}}
+# CHECK: mrs      x9, {{id_aa64isar0_el1|ID_AA64ISAR0_EL1}}
+# CHECK: mrs      x9, {{id_aa64isar1_el1|ID_AA64ISAR1_EL1}}
+# CHECK: mrs      x9, {{id_aa64mmfr0_el1|ID_AA64MMFR0_EL1}}
+# CHECK: mrs      x9, {{id_aa64mmfr1_el1|ID_AA64MMFR1_EL1}}
+# CHECK: mrs      x9, {{sctlr_el1|SCTLR_EL1}}
+# CHECK: mrs      x9, {{sctlr_el2|SCTLR_EL2}}
+# CHECK: mrs      x9, {{sctlr_el3|SCTLR_EL3}}
+# CHECK: mrs      x9, {{actlr_el1|ACTLR_EL1}}
+# CHECK: mrs      x9, {{actlr_el2|ACTLR_EL2}}
+# CHECK: mrs      x9, {{actlr_el3|ACTLR_EL3}}
+# CHECK: mrs      x9, {{cpacr_el1|CPACR_EL1}}
+# CHECK: mrs      x9, {{hcr_el2|HCR_EL2}}
+# CHECK: mrs      x9, {{scr_el3|SCR_EL3}}
+# CHECK: mrs      x9, {{mdcr_el2|MDCR_EL2}}
+# CHECK: mrs      x9, {{sder32_el3|SDER32_EL3}}
+# CHECK: mrs      x9, {{cptr_el2|CPTR_EL2}}
+# CHECK: mrs      x9, {{cptr_el3|CPTR_EL3}}
+# CHECK: mrs      x9, {{hstr_el2|HSTR_EL2}}
+# CHECK: mrs      x9, {{hacr_el2|HACR_EL2}}
+# CHECK: mrs      x9, {{mdcr_el3|MDCR_EL3}}
+# CHECK: mrs      x9, {{ttbr0_el1|TTBR0_EL1}}
+# CHECK: mrs      x9, {{ttbr0_el2|TTBR0_EL2}}
+# CHECK: mrs      x9, {{ttbr0_el3|TTBR0_EL3}}
+# CHECK: mrs      x9, {{ttbr1_el1|TTBR1_EL1}}
+# CHECK: mrs      x9, {{tcr_el1|TCR_EL1}}
+# CHECK: mrs      x9, {{tcr_el2|TCR_EL2}}
+# CHECK: mrs      x9, {{tcr_el3|TCR_EL3}}
+# CHECK: mrs      x9, {{vttbr_el2|VTTBR_EL2}}
+# CHECK: mrs      x9, {{vtcr_el2|VTCR_EL2}}
+# CHECK: mrs      x9, {{dacr32_el2|DACR32_EL2}}
+# CHECK: mrs      x9, {{spsr_el1|SPSR_EL1}}
+# CHECK: mrs      x9, {{spsr_el2|SPSR_EL2}}
+# CHECK: mrs      x9, {{spsr_el3|SPSR_EL3}}
+# CHECK: mrs      x9, {{elr_el1|ELR_EL1}}
+# CHECK: mrs      x9, {{elr_el2|ELR_EL2}}
+# CHECK: mrs      x9, {{elr_el3|ELR_EL3}}
+# CHECK: mrs      x9, {{sp_el0|SP_EL0}}
+# CHECK: mrs      x9, {{sp_el1|SP_EL1}}
+# CHECK: mrs      x9, {{sp_el2|SP_EL2}}
+# CHECK: mrs      x9, {{spsel|SPSEL}}
+# CHECK: mrs      x9, {{nzcv|NZCV}}
+# CHECK: mrs      x9, {{daif|DAIF}}
+# CHECK: mrs      x9, {{currentel|CURRENTEL}}
+# CHECK: mrs      x9, {{spsr_irq|SPSR_IRQ}}
+# CHECK: mrs      x9, {{spsr_abt|SPSR_ABT}}
+# CHECK: mrs      x9, {{spsr_und|SPSR_UND}}
+# CHECK: mrs      x9, {{spsr_fiq|SPSR_FIQ}}
+# CHECK: mrs      x9, {{fpcr|FPCR}}
+# CHECK: mrs      x9, {{fpsr|FPSR}}
+# CHECK: mrs      x9, {{dspsr_el0|DSPSR_EL0}}
+# CHECK: mrs      x9, {{dlr_el0|DLR_EL0}}
+# CHECK: mrs      x9, {{ifsr32_el2|IFSR32_EL2}}
+# CHECK: mrs      x9, {{afsr0_el1|AFSR0_EL1}}
+# CHECK: mrs      x9, {{afsr0_el2|AFSR0_EL2}}
+# CHECK: mrs      x9, {{afsr0_el3|AFSR0_EL3}}
+# CHECK: mrs      x9, {{afsr1_el1|AFSR1_EL1}}
+# CHECK: mrs      x9, {{afsr1_el2|AFSR1_EL2}}
+# CHECK: mrs      x9, {{afsr1_el3|AFSR1_EL3}}
+# CHECK: mrs      x9, {{esr_el1|ESR_EL1}}
+# CHECK: mrs      x9, {{esr_el2|ESR_EL2}}
+# CHECK: mrs      x9, {{esr_el3|ESR_EL3}}
+# CHECK: mrs      x9, {{fpexc32_el2|FPEXC32_EL2}}
+# CHECK: mrs      x9, {{far_el1|FAR_EL1}}
+# CHECK: mrs      x9, {{far_el2|FAR_EL2}}
+# CHECK: mrs      x9, {{far_el3|FAR_EL3}}
+# CHECK: mrs      x9, {{hpfar_el2|HPFAR_EL2}}
+# CHECK: mrs      x9, {{par_el1|PAR_EL1}}
+# CHECK: mrs      x9, {{pmcr_el0|PMCR_EL0}}
+# CHECK: mrs      x9, {{pmcntenset_el0|PMCNTENSET_EL0}}
+# CHECK: mrs      x9, {{pmcntenclr_el0|PMCNTENCLR_EL0}}
+# CHECK: mrs      x9, {{pmovsclr_el0|PMOVSCLR_EL0}}
+# CHECK: mrs      x9, {{pmselr_el0|PMSELR_EL0}}
+# CHECK: mrs      x9, {{pmceid0_el0|PMCEID0_EL0}}
+# CHECK: mrs      x9, {{pmceid1_el0|PMCEID1_EL0}}
+# CHECK: mrs      x9, {{pmccntr_el0|PMCCNTR_EL0}}
+# CHECK: mrs      x9, {{pmxevtyper_el0|PMXEVTYPER_EL0}}
+# CHECK: mrs      x9, {{pmxevcntr_el0|PMXEVCNTR_EL0}}
+# CHECK: mrs      x9, {{pmuserenr_el0|PMUSERENR_EL0}}
+# CHECK: mrs      x9, {{pmintenset_el1|PMINTENSET_EL1}}
+# CHECK: mrs      x9, {{pmintenclr_el1|PMINTENCLR_EL1}}
+# CHECK: mrs      x9, {{pmovsset_el0|PMOVSSET_EL0}}
+# CHECK: mrs      x9, {{mair_el1|MAIR_EL1}}
+# CHECK: mrs      x9, {{mair_el2|MAIR_EL2}}
+# CHECK: mrs      x9, {{mair_el3|MAIR_EL3}}
+# CHECK: mrs      x9, {{amair_el1|AMAIR_EL1}}
+# CHECK: mrs      x9, {{amair_el2|AMAIR_EL2}}
+# CHECK: mrs      x9, {{amair_el3|AMAIR_EL3}}
+# CHECK: mrs      x9, {{vbar_el1|VBAR_EL1}}
+# CHECK: mrs      x9, {{vbar_el2|VBAR_EL2}}
+# CHECK: mrs      x9, {{vbar_el3|VBAR_EL3}}
+# CHECK: mrs      x9, {{rvbar_el1|RVBAR_EL1}}
+# CHECK: mrs      x9, {{rvbar_el2|RVBAR_EL2}}
+# CHECK: mrs      x9, {{rvbar_el3|RVBAR_EL3}}
+# CHECK: mrs      x9, {{rmr_el1|RMR_EL1}}
+# CHECK: mrs      x9, {{rmr_el2|RMR_EL2}}
+# CHECK: mrs      x9, {{rmr_el3|RMR_EL3}}
+# CHECK: mrs      x9, {{isr_el1|ISR_EL1}}
+# CHECK: mrs      x9, {{contextidr_el1|CONTEXTIDR_EL1}}
+# CHECK: mrs      x9, {{tpidr_el0|TPIDR_EL0}}
+# CHECK: mrs      x9, {{tpidr_el2|TPIDR_EL2}}
+# CHECK: mrs      x9, {{tpidr_el3|TPIDR_EL3}}
+# CHECK: mrs      x9, {{tpidrro_el0|TPIDRRO_EL0}}
+# CHECK: mrs      x9, {{tpidr_el1|TPIDR_EL1}}
+# CHECK: mrs      x9, {{cntfrq_el0|CNTFRQ_EL0}}
+# CHECK: mrs      x9, {{cntpct_el0|CNTPCT_EL0}}
+# CHECK: mrs      x9, {{cntvct_el0|CNTVCT_EL0}}
+# CHECK: mrs      x9, {{cntvoff_el2|CNTVOFF_EL2}}
+# CHECK: mrs      x9, {{cntkctl_el1|CNTKCTL_EL1}}
+# CHECK: mrs      x9, {{cnthctl_el2|CNTHCTL_EL2}}
+# CHECK: mrs      x9, {{cntp_tval_el0|CNTP_TVAL_EL0}}
+# CHECK: mrs      x9, {{cnthp_tval_el2|CNTHP_TVAL_EL2}}
+# CHECK: mrs      x9, {{cntps_tval_el1|CNTPS_TVAL_EL1}}
+# CHECK: mrs      x9, {{cntp_ctl_el0|CNTP_CTL_EL0}}
+# CHECK: mrs      x9, {{cnthp_ctl_el2|CNTHP_CTL_EL2}}
+# CHECK: mrs      x9, {{cntps_ctl_el1|CNTPS_CTL_EL1}}
+# CHECK: mrs      x9, {{cntp_cval_el0|CNTP_CVAL_EL0}}
+# CHECK: mrs      x9, {{cnthp_cval_el2|CNTHP_CVAL_EL2}}
+# CHECK: mrs      x9, {{cntps_cval_el1|CNTPS_CVAL_EL1}}
+# CHECK: mrs      x9, {{cntv_tval_el0|CNTV_TVAL_EL0}}
+# CHECK: mrs      x9, {{cntv_ctl_el0|CNTV_CTL_EL0}}
+# CHECK: mrs      x9, {{cntv_cval_el0|CNTV_CVAL_EL0}}
+# CHECK: mrs      x9, {{pmevcntr0_el0|PMEVCNTR0_EL0}}
+# CHECK: mrs      x9, {{pmevcntr1_el0|PMEVCNTR1_EL0}}
+# CHECK: mrs      x9, {{pmevcntr2_el0|PMEVCNTR2_EL0}}
+# CHECK: mrs      x9, {{pmevcntr3_el0|PMEVCNTR3_EL0}}
+# CHECK: mrs      x9, {{pmevcntr4_el0|PMEVCNTR4_EL0}}
+# CHECK: mrs      x9, {{pmevcntr5_el0|PMEVCNTR5_EL0}}
+# CHECK: mrs      x9, {{pmevcntr6_el0|PMEVCNTR6_EL0}}
+# CHECK: mrs      x9, {{pmevcntr7_el0|PMEVCNTR7_EL0}}
+# CHECK: mrs      x9, {{pmevcntr8_el0|PMEVCNTR8_EL0}}
+# CHECK: mrs      x9, {{pmevcntr9_el0|PMEVCNTR9_EL0}}
+# CHECK: mrs      x9, {{pmevcntr10_el0|PMEVCNTR10_EL0}}
+# CHECK: mrs      x9, {{pmevcntr11_el0|PMEVCNTR11_EL0}}
+# CHECK: mrs      x9, {{pmevcntr12_el0|PMEVCNTR12_EL0}}
+# CHECK: mrs      x9, {{pmevcntr13_el0|PMEVCNTR13_EL0}}
+# CHECK: mrs      x9, {{pmevcntr14_el0|PMEVCNTR14_EL0}}
+# CHECK: mrs      x9, {{pmevcntr15_el0|PMEVCNTR15_EL0}}
+# CHECK: mrs      x9, {{pmevcntr16_el0|PMEVCNTR16_EL0}}
+# CHECK: mrs      x9, {{pmevcntr17_el0|PMEVCNTR17_EL0}}
+# CHECK: mrs      x9, {{pmevcntr18_el0|PMEVCNTR18_EL0}}
+# CHECK: mrs      x9, {{pmevcntr19_el0|PMEVCNTR19_EL0}}
+# CHECK: mrs      x9, {{pmevcntr20_el0|PMEVCNTR20_EL0}}
+# CHECK: mrs      x9, {{pmevcntr21_el0|PMEVCNTR21_EL0}}
+# CHECK: mrs      x9, {{pmevcntr22_el0|PMEVCNTR22_EL0}}
+# CHECK: mrs      x9, {{pmevcntr23_el0|PMEVCNTR23_EL0}}
+# CHECK: mrs      x9, {{pmevcntr24_el0|PMEVCNTR24_EL0}}
+# CHECK: mrs      x9, {{pmevcntr25_el0|PMEVCNTR25_EL0}}
+# CHECK: mrs      x9, {{pmevcntr26_el0|PMEVCNTR26_EL0}}
+# CHECK: mrs      x9, {{pmevcntr27_el0|PMEVCNTR27_EL0}}
+# CHECK: mrs      x9, {{pmevcntr28_el0|PMEVCNTR28_EL0}}
+# CHECK: mrs      x9, {{pmevcntr29_el0|PMEVCNTR29_EL0}}
+# CHECK: mrs      x9, {{pmevcntr30_el0|PMEVCNTR30_EL0}}
+# CHECK: mrs      x9, {{pmccfiltr_el0|PMCCFILTR_EL0}}
+# CHECK: mrs      x9, {{pmevtyper0_el0|PMEVTYPER0_EL0}}
+# CHECK: mrs      x9, {{pmevtyper1_el0|PMEVTYPER1_EL0}}
+# CHECK: mrs      x9, {{pmevtyper2_el0|PMEVTYPER2_EL0}}
+# CHECK: mrs      x9, {{pmevtyper3_el0|PMEVTYPER3_EL0}}
+# CHECK: mrs      x9, {{pmevtyper4_el0|PMEVTYPER4_EL0}}
+# CHECK: mrs      x9, {{pmevtyper5_el0|PMEVTYPER5_EL0}}
+# CHECK: mrs      x9, {{pmevtyper6_el0|PMEVTYPER6_EL0}}
+# CHECK: mrs      x9, {{pmevtyper7_el0|PMEVTYPER7_EL0}}
+# CHECK: mrs      x9, {{pmevtyper8_el0|PMEVTYPER8_EL0}}
+# CHECK: mrs      x9, {{pmevtyper9_el0|PMEVTYPER9_EL0}}
+# CHECK: mrs      x9, {{pmevtyper10_el0|PMEVTYPER10_EL0}}
+# CHECK: mrs      x9, {{pmevtyper11_el0|PMEVTYPER11_EL0}}
+# CHECK: mrs      x9, {{pmevtyper12_el0|PMEVTYPER12_EL0}}
+# CHECK: mrs      x9, {{pmevtyper13_el0|PMEVTYPER13_EL0}}
+# CHECK: mrs      x9, {{pmevtyper14_el0|PMEVTYPER14_EL0}}
+# CHECK: mrs      x9, {{pmevtyper15_el0|PMEVTYPER15_EL0}}
+# CHECK: mrs      x9, {{pmevtyper16_el0|PMEVTYPER16_EL0}}
+# CHECK: mrs      x9, {{pmevtyper17_el0|PMEVTYPER17_EL0}}
+# CHECK: mrs      x9, {{pmevtyper18_el0|PMEVTYPER18_EL0}}
+# CHECK: mrs      x9, {{pmevtyper19_el0|PMEVTYPER19_EL0}}
+# CHECK: mrs      x9, {{pmevtyper20_el0|PMEVTYPER20_EL0}}
+# CHECK: mrs      x9, {{pmevtyper21_el0|PMEVTYPER21_EL0}}
+# CHECK: mrs      x9, {{pmevtyper22_el0|PMEVTYPER22_EL0}}
+# CHECK: mrs      x9, {{pmevtyper23_el0|PMEVTYPER23_EL0}}
+# CHECK: mrs      x9, {{pmevtyper24_el0|PMEVTYPER24_EL0}}
+# CHECK: mrs      x9, {{pmevtyper25_el0|PMEVTYPER25_EL0}}
+# CHECK: mrs      x9, {{pmevtyper26_el0|PMEVTYPER26_EL0}}
+# CHECK: mrs      x9, {{pmevtyper27_el0|PMEVTYPER27_EL0}}
+# CHECK: mrs      x9, {{pmevtyper28_el0|PMEVTYPER28_EL0}}
+# CHECK: mrs      x9, {{pmevtyper29_el0|PMEVTYPER29_EL0}}
+# CHECK: mrs      x9, {{pmevtyper30_el0|PMEVTYPER30_EL0}}
 
 0xc 0x0 0x12 0xd5
 0x4c 0x0 0x10 0xd5
@@ -4147,10 +4148,10 @@
 0xa9 0xef 0x3b 0xd5
 0xc9 0xef 0x3b 0xd5
 
-# CHECK: mrs     x12, s3_7_c15_c1_5
-# CHECK: mrs     x13, s3_2_c11_c15_7
-# CHECK: msr     s3_0_c15_c0_0, x12
-# CHECK: msr     s3_7_c11_c13_7, x5
+# CHECK: mrs     x12, {{s3_7_c15_c1_5|S3_7_C15_C1_5}}
+# CHECK: mrs     x13, {{s3_2_c11_c15_7|S3_2_C11_C15_7}}
+# CHECK: msr     {{s3_0_c15_c0_0|S3_0_C15_C0_0}}, x12
+# CHECK: msr     {{s3_7_c11_c13_7|S3_7_C11_C13_7}}, x5
 0xac 0xf1 0x3f 0xd5
 0xed 0xbf 0x3a 0xd5
 0x0c 0xf0 0x18 0xd5
diff --git a/test/MC/Disassembler/AArch64/basic-a64-undefined.txt b/test/MC/Disassembler/AArch64/basic-a64-undefined.txt
index a17579c..968a454 100644
--- a/test/MC/Disassembler/AArch64/basic-a64-undefined.txt
+++ b/test/MC/Disassembler/AArch64/basic-a64-undefined.txt
@@ -1,43 +1,66 @@
-# These spawn another process so they're rather expensive. Not many.
+# RUN: not llvm-mc -disassemble -triple=aarch64 %s 2> %t
+# RUN: FileCheck %s < %t
+# RUN: not llvm-mc -disassemble -triple=arm64 %s 2> %t
+# RUN: FileCheck %s < %t
 
 # Instructions notionally in the add/sub (extended register) sheet, but with
 # invalid shift amount or "opt" field.
-# RUN: echo "0x00 0x10 0xa0 0x0b" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
-# RUN: echo "0x00 0x10 0x60 0x0b" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
-# RUN: echo "0x00 0x14 0x20 0x0b" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+[0x00 0x10 0xa0 0x0b]
+[0x00 0x10 0x60 0x0b]
+[0x00 0x14 0x20 0x0b]
+# CHECK: invalid instruction encoding
+# CHECK: invalid instruction encoding
+# CHECK: invalid instruction encoding
 
 # Instructions notionally in the add/sub (immediate) sheet, but with
 # invalid "shift" field.
-# RUN: echo "0xdf 0x3 0x80 0x91" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
-# RUN: echo "0xed 0x8e 0xc4 0x31" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
-# RUN: echo "0x62 0xfc 0xbf 0x11" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
-# RUN: echo "0x3 0xff 0xff 0x91" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+[0xdf 0x3 0x80 0x91]
+[0xed 0x8e 0xc4 0x31]
+[0x62 0xfc 0xbf 0x11]
+[0x3 0xff 0xff 0x91]
+# CHECK: invalid instruction encoding
+# CHECK: invalid instruction encoding
+# CHECK: invalid instruction encoding
+# CHECK: invalid instruction encoding
 
 # Instructions notionally in the load/store (unsigned immediate) sheet.
 # Only unallocated (int-register) variants are: opc=0b11, size=0b10, 0b11
-# RUN: echo "0xd7 0xfc 0xff 0xb9" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
-# RUN: echo "0xd7 0xfc 0xcf 0xf9" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+[0xd7 0xfc 0xff 0xb9]
+[0xd7 0xfc 0xcf 0xf9]
+# CHECK: invalid instruction encoding
+# CHECK: invalid instruction encoding
 
 # Instructions notionally in the floating-point <-> fixed-point conversion
 # Scale field is 64-<imm> and <imm> should be 1-32 for a 32-bit int register.
-# RUN: echo "0x23 0x01 0x18 0x1e" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
-# RUN: echo "0x23 0x25 0x42 0x1e" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+[0x23 0x01 0x18 0x1e]
+[0x23 0x25 0x42 0x1e]
+# CHECK: invalid instruction encoding
+# CHECK: invalid instruction encoding
 
 # Instructions notionally in the logical (shifted register) sheet, but with out
 # of range shift: w-registers can only have 0-31.
-# RUN: echo "0x00 0x80 0x00 0x0a" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+[0x00 0x80 0x00 0x0a]
+# CHECK: invalid instruction encoding
 
 # Instructions notionally in the move wide (immediate) sheet, but with out
 # of range shift: w-registers can only have 0 or 16.
-# RUN: echo "0x00 0x00 0xc0 0x12" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
-# RUN: echo "0x12 0x34 0xe0 0x52" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
-
-# Data-processing instructions are undefined when S=1 and for the 0b0000111 value in opcode:sf
-# RUN: echo "0x00 0x00 0xc0 0x5f" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
-# RUN: echo "0x56 0x0c 0xc0 0x5a" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+[0x00 0x00 0xc0 0x12]
+[0x12 0x34 0xe0 0x52]
+# CHECK: invalid instruction encoding
+# CHECK: invalid instruction encoding
 
-# Data-processing instructions (2 source) are undefined for a value of 0001xx:0:x or 0011xx:0:x for opcode:S:sf
-# RUN: echo "0x00 0x30 0xc1 0x1a" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
-# RUN: echo "0x00 0x10 0xc1 0x1a" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+# Data-processing instructions are undefined when S=1 and for the 0b0000111
+# value in opcode:sf
+[0x00 0x00 0xc0 0x5f]
+[0x56 0x0c 0xc0 0x5a]
+# CHECK: invalid instruction encoding
+# CHECK: invalid instruction encoding
 
+# Data-processing instructions (2 source) are undefined for a value of
+#  0001xx:0:x or 0011xx:0:x for opcode:S:sf
+[0x00 0x30 0xc1 0x1a]
+[0x00 0x10 0xc1 0x1a]
+# CHECK: invalid instruction encoding
 # CHECK: invalid instruction encoding
+
+
diff --git a/test/MC/Disassembler/AArch64/basic-a64-unpredictable.txt b/test/MC/Disassembler/AArch64/basic-a64-unpredictable.txt
index 5363863..2fccccb 100644
--- a/test/MC/Disassembler/AArch64/basic-a64-unpredictable.txt
+++ b/test/MC/Disassembler/AArch64/basic-a64-unpredictable.txt
@@ -1,4 +1,5 @@
 # RUN: llvm-mc -triple=aarch64 -mattr=+fp-armv8 -disassemble < %s 2>&1 | FileCheck %s
+# RUN: llvm-mc -triple=arm64 -mattr=+fp-armv8 -disassemble < %s 2>&1 | FileCheck %s
 
 #------------------------------------------------------------------------------
 # Load-store exclusive
diff --git a/test/MC/Disassembler/AArch64/gicv3-regs.txt b/test/MC/Disassembler/AArch64/gicv3-regs.txt
index 4351f64..851e83d 100644
--- a/test/MC/Disassembler/AArch64/gicv3-regs.txt
+++ b/test/MC/Disassembler/AArch64/gicv3-regs.txt
@@ -1,222 +1,223 @@
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple arm64-none-linux-gnu -disassemble < %s | FileCheck %s
 
 0x8 0xcc 0x38 0xd5
-# CHECK: mrs      x8, icc_iar1_el1
+# CHECK: mrs      x8, {{icc_iar1_el1|ICC_IAR1_EL1}}
 0x1a 0xc8 0x38 0xd5
-# CHECK: mrs      x26, icc_iar0_el1
+# CHECK: mrs      x26, {{icc_iar0_el1|ICC_IAR0_EL1}}
 0x42 0xcc 0x38 0xd5
-# CHECK: mrs      x2, icc_hppir1_el1
+# CHECK: mrs      x2, {{icc_hppir1_el1|ICC_HPPIR1_EL1}}
 0x51 0xc8 0x38 0xd5
-# CHECK: mrs      x17, icc_hppir0_el1
+# CHECK: mrs      x17, {{icc_hppir0_el1|ICC_HPPIR0_EL1}}
 0x7d 0xcb 0x38 0xd5
-# CHECK: mrs      x29, icc_rpr_el1
+# CHECK: mrs      x29, {{icc_rpr_el1|ICC_RPR_EL1}}
 0x24 0xcb 0x3c 0xd5
-# CHECK: mrs      x4, ich_vtr_el2
+# CHECK: mrs      x4, {{ich_vtr_el2|ICH_VTR_EL2}}
 0x78 0xcb 0x3c 0xd5
-# CHECK: mrs      x24, ich_eisr_el2
+# CHECK: mrs      x24, {{ich_eisr_el2|ICH_EISR_EL2}}
 0xa9 0xcb 0x3c 0xd5
-# CHECK: mrs      x9, ich_elsr_el2
+# CHECK: mrs      x9, {{ich_elsr_el2|ICH_ELSR_EL2}}
 0x78 0xcc 0x38 0xd5
-# CHECK: mrs      x24, icc_bpr1_el1
+# CHECK: mrs      x24, {{icc_bpr1_el1|ICC_BPR1_EL1}}
 0x6e 0xc8 0x38 0xd5
-# CHECK: mrs      x14, icc_bpr0_el1
+# CHECK: mrs      x14, {{icc_bpr0_el1|ICC_BPR0_EL1}}
 0x13 0x46 0x38 0xd5
-# CHECK: mrs      x19, icc_pmr_el1
+# CHECK: mrs      x19, {{icc_pmr_el1|ICC_PMR_EL1}}
 0x97 0xcc 0x38 0xd5
-# CHECK: mrs      x23, icc_ctlr_el1
+# CHECK: mrs      x23, {{icc_ctlr_el1|ICC_CTLR_EL1}}
 0x94 0xcc 0x3e 0xd5
-# CHECK: mrs      x20, icc_ctlr_el3
+# CHECK: mrs      x20, {{icc_ctlr_el3|ICC_CTLR_EL3}}
 0xbc 0xcc 0x38 0xd5
-# CHECK: mrs      x28, icc_sre_el1
+# CHECK: mrs      x28, {{icc_sre_el1|ICC_SRE_EL1}}
 0xb9 0xc9 0x3c 0xd5
-# CHECK: mrs      x25, icc_sre_el2
+# CHECK: mrs      x25, {{icc_sre_el2|ICC_SRE_EL2}}
 0xa8 0xcc 0x3e 0xd5
-# CHECK: mrs      x8, icc_sre_el3
+# CHECK: mrs      x8, {{icc_sre_el3|ICC_SRE_EL3}}
 0xd6 0xcc 0x38 0xd5
-# CHECK: mrs      x22, icc_igrpen0_el1
+# CHECK: mrs      x22, {{icc_igrpen0_el1|ICC_IGRPEN0_EL1}}
 0xe5 0xcc 0x38 0xd5
-# CHECK: mrs      x5, icc_igrpen1_el1
+# CHECK: mrs      x5, {{icc_igrpen1_el1|ICC_IGRPEN1_EL1}}
 0xe7 0xcc 0x3e 0xd5
-# CHECK: mrs      x7, icc_igrpen1_el3
+# CHECK: mrs      x7, {{icc_igrpen1_el3|ICC_IGRPEN1_EL3}}
 0x16 0xcd 0x38 0xd5
-# CHECK: mrs      x22, icc_seien_el1
+# CHECK: mrs      x22, {{icc_seien_el1|ICC_SEIEN_EL1}}
 0x84 0xc8 0x38 0xd5
-# CHECK: mrs      x4, icc_ap0r0_el1
+# CHECK: mrs      x4, {{icc_ap0r0_el1|ICC_AP0R0_EL1}}
 0xab 0xc8 0x38 0xd5
-# CHECK: mrs      x11, icc_ap0r1_el1
+# CHECK: mrs      x11, {{icc_ap0r1_el1|ICC_AP0R1_EL1}}
 0xdb 0xc8 0x38 0xd5
-# CHECK: mrs      x27, icc_ap0r2_el1
+# CHECK: mrs      x27, {{icc_ap0r2_el1|ICC_AP0R2_EL1}}
 0xf5 0xc8 0x38 0xd5
-# CHECK: mrs      x21, icc_ap0r3_el1
+# CHECK: mrs      x21, {{icc_ap0r3_el1|ICC_AP0R3_EL1}}
 0x2 0xc9 0x38 0xd5
-# CHECK: mrs      x2, icc_ap1r0_el1
+# CHECK: mrs      x2, {{icc_ap1r0_el1|ICC_AP1R0_EL1}}
 0x35 0xc9 0x38 0xd5
-# CHECK: mrs      x21, icc_ap1r1_el1
+# CHECK: mrs      x21, {{icc_ap1r1_el1|ICC_AP1R1_EL1}}
 0x4a 0xc9 0x38 0xd5
-# CHECK: mrs      x10, icc_ap1r2_el1
+# CHECK: mrs      x10, {{icc_ap1r2_el1|ICC_AP1R2_EL1}}
 0x7b 0xc9 0x38 0xd5
-# CHECK: mrs      x27, icc_ap1r3_el1
+# CHECK: mrs      x27, {{icc_ap1r3_el1|ICC_AP1R3_EL1}}
 0x14 0xc8 0x3c 0xd5
-# CHECK: mrs      x20, ich_ap0r0_el2
+# CHECK: mrs      x20, {{ich_ap0r0_el2|ICH_AP0R0_EL2}}
 0x35 0xc8 0x3c 0xd5
-# CHECK: mrs      x21, ich_ap0r1_el2
+# CHECK: mrs      x21, {{ich_ap0r1_el2|ICH_AP0R1_EL2}}
 0x45 0xc8 0x3c 0xd5
-# CHECK: mrs      x5, ich_ap0r2_el2
+# CHECK: mrs      x5, {{ich_ap0r2_el2|ICH_AP0R2_EL2}}
 0x64 0xc8 0x3c 0xd5
-# CHECK: mrs      x4, ich_ap0r3_el2
+# CHECK: mrs      x4, {{ich_ap0r3_el2|ICH_AP0R3_EL2}}
 0xf 0xc9 0x3c 0xd5
-# CHECK: mrs      x15, ich_ap1r0_el2
+# CHECK: mrs      x15, {{ich_ap1r0_el2|ICH_AP1R0_EL2}}
 0x2c 0xc9 0x3c 0xd5
-# CHECK: mrs      x12, ich_ap1r1_el2
+# CHECK: mrs      x12, {{ich_ap1r1_el2|ICH_AP1R1_EL2}}
 0x5b 0xc9 0x3c 0xd5
-# CHECK: mrs      x27, ich_ap1r2_el2
+# CHECK: mrs      x27, {{ich_ap1r2_el2|ICH_AP1R2_EL2}}
 0x74 0xc9 0x3c 0xd5
-# CHECK: mrs      x20, ich_ap1r3_el2
+# CHECK: mrs      x20, {{ich_ap1r3_el2|ICH_AP1R3_EL2}}
 0xa 0xcb 0x3c 0xd5
-# CHECK: mrs      x10, ich_hcr_el2
+# CHECK: mrs      x10, {{ich_hcr_el2|ICH_HCR_EL2}}
 0x5b 0xcb 0x3c 0xd5
-# CHECK: mrs      x27, ich_misr_el2
+# CHECK: mrs      x27, {{ich_misr_el2|ICH_MISR_EL2}}
 0xe6 0xcb 0x3c 0xd5
-# CHECK: mrs      x6, ich_vmcr_el2
+# CHECK: mrs      x6, {{ich_vmcr_el2|ICH_VMCR_EL2}}
 0x93 0xc9 0x3c 0xd5
-# CHECK: mrs      x19, ich_vseir_el2
+# CHECK: mrs      x19, {{ich_vseir_el2|ICH_VSEIR_EL2}}
 0x3 0xcc 0x3c 0xd5
-# CHECK: mrs      x3, ich_lr0_el2
+# CHECK: mrs      x3, {{ich_lr0_el2|ICH_LR0_EL2}}
 0x21 0xcc 0x3c 0xd5
-# CHECK: mrs      x1, ich_lr1_el2
+# CHECK: mrs      x1, {{ich_lr1_el2|ICH_LR1_EL2}}
 0x56 0xcc 0x3c 0xd5
-# CHECK: mrs      x22, ich_lr2_el2
+# CHECK: mrs      x22, {{ich_lr2_el2|ICH_LR2_EL2}}
 0x75 0xcc 0x3c 0xd5
-# CHECK: mrs      x21, ich_lr3_el2
+# CHECK: mrs      x21, {{ich_lr3_el2|ICH_LR3_EL2}}
 0x86 0xcc 0x3c 0xd5
-# CHECK: mrs      x6, ich_lr4_el2
+# CHECK: mrs      x6, {{ich_lr4_el2|ICH_LR4_EL2}}
 0xaa 0xcc 0x3c 0xd5
-# CHECK: mrs      x10, ich_lr5_el2
+# CHECK: mrs      x10, {{ich_lr5_el2|ICH_LR5_EL2}}
 0xcb 0xcc 0x3c 0xd5
-# CHECK: mrs      x11, ich_lr6_el2
+# CHECK: mrs      x11, {{ich_lr6_el2|ICH_LR6_EL2}}
 0xec 0xcc 0x3c 0xd5
-# CHECK: mrs      x12, ich_lr7_el2
+# CHECK: mrs      x12, {{ich_lr7_el2|ICH_LR7_EL2}}
 0x0 0xcd 0x3c 0xd5
-# CHECK: mrs      x0, ich_lr8_el2
+# CHECK: mrs      x0, {{ich_lr8_el2|ICH_LR8_EL2}}
 0x35 0xcd 0x3c 0xd5
-# CHECK: mrs      x21, ich_lr9_el2
+# CHECK: mrs      x21, {{ich_lr9_el2|ICH_LR9_EL2}}
 0x4d 0xcd 0x3c 0xd5
-# CHECK: mrs      x13, ich_lr10_el2
+# CHECK: mrs      x13, {{ich_lr10_el2|ICH_LR10_EL2}}
 0x7a 0xcd 0x3c 0xd5
-# CHECK: mrs      x26, ich_lr11_el2
+# CHECK: mrs      x26, {{ich_lr11_el2|ICH_LR11_EL2}}
 0x81 0xcd 0x3c 0xd5
-# CHECK: mrs      x1, ich_lr12_el2
+# CHECK: mrs      x1, {{ich_lr12_el2|ICH_LR12_EL2}}
 0xa8 0xcd 0x3c 0xd5
-# CHECK: mrs      x8, ich_lr13_el2
+# CHECK: mrs      x8, {{ich_lr13_el2|ICH_LR13_EL2}}
 0xc2 0xcd 0x3c 0xd5
-# CHECK: mrs      x2, ich_lr14_el2
+# CHECK: mrs      x2, {{ich_lr14_el2|ICH_LR14_EL2}}
 0xe8 0xcd 0x3c 0xd5
-# CHECK: mrs      x8, ich_lr15_el2
+# CHECK: mrs      x8, {{ich_lr15_el2|ICH_LR15_EL2}}
 0x3b 0xcc 0x18 0xd5
-# CHECK: msr      icc_eoir1_el1, x27
+# CHECK: msr      {{icc_eoir1_el1|ICC_EOIR1_EL1}}, x27
 0x25 0xc8 0x18 0xd5
-# CHECK: msr      icc_eoir0_el1, x5
+# CHECK: msr      {{icc_eoir0_el1|ICC_EOIR0_EL1}}, x5
 0x2d 0xcb 0x18 0xd5
-# CHECK: msr      icc_dir_el1, x13
+# CHECK: msr      {{icc_dir_el1|ICC_DIR_EL1}}, x13
 0xb5 0xcb 0x18 0xd5
-# CHECK: msr      icc_sgi1r_el1, x21
+# CHECK: msr      {{icc_sgi1r_el1|ICC_SGI1R_EL1}}, x21
 0xd9 0xcb 0x18 0xd5
-# CHECK: msr      icc_asgi1r_el1, x25
+# CHECK: msr      {{icc_asgi1r_el1|ICC_ASGI1R_EL1}}, x25
 0xfc 0xcb 0x18 0xd5
-# CHECK: msr      icc_sgi0r_el1, x28
+# CHECK: msr      {{icc_sgi0r_el1|ICC_SGI0R_EL1}}, x28
 0x67 0xcc 0x18 0xd5
-# CHECK: msr      icc_bpr1_el1, x7
+# CHECK: msr      {{icc_bpr1_el1|ICC_BPR1_EL1}}, x7
 0x69 0xc8 0x18 0xd5
-# CHECK: msr      icc_bpr0_el1, x9
+# CHECK: msr      {{icc_bpr0_el1|ICC_BPR0_EL1}}, x9
 0x1d 0x46 0x18 0xd5
-# CHECK: msr      icc_pmr_el1, x29
+# CHECK: msr      {{icc_pmr_el1|ICC_PMR_EL1}}, x29
 0x98 0xcc 0x18 0xd5
-# CHECK: msr      icc_ctlr_el1, x24
+# CHECK: msr      {{icc_ctlr_el1|ICC_CTLR_EL1}}, x24
 0x80 0xcc 0x1e 0xd5
-# CHECK: msr      icc_ctlr_el3, x0
+# CHECK: msr      {{icc_ctlr_el3|ICC_CTLR_EL3}}, x0
 0xa2 0xcc 0x18 0xd5
-# CHECK: msr      icc_sre_el1, x2
+# CHECK: msr      {{icc_sre_el1|ICC_SRE_EL1}}, x2
 0xa5 0xc9 0x1c 0xd5
-# CHECK: msr      icc_sre_el2, x5
+# CHECK: msr      {{icc_sre_el2|ICC_SRE_EL2}}, x5
 0xaa 0xcc 0x1e 0xd5
-# CHECK: msr      icc_sre_el3, x10
+# CHECK: msr      {{icc_sre_el3|ICC_SRE_EL3}}, x10
 0xd6 0xcc 0x18 0xd5
-# CHECK: msr      icc_igrpen0_el1, x22
+# CHECK: msr      {{icc_igrpen0_el1|ICC_IGRPEN0_EL1}}, x22
 0xeb 0xcc 0x18 0xd5
-# CHECK: msr      icc_igrpen1_el1, x11
+# CHECK: msr      {{icc_igrpen1_el1|ICC_IGRPEN1_EL1}}, x11
 0xe8 0xcc 0x1e 0xd5
-# CHECK: msr      icc_igrpen1_el3, x8
+# CHECK: msr      {{icc_igrpen1_el3|ICC_IGRPEN1_EL3}}, x8
 0x4 0xcd 0x18 0xd5
-# CHECK: msr      icc_seien_el1, x4
+# CHECK: msr      {{icc_seien_el1|ICC_SEIEN_EL1}}, x4
 0x9b 0xc8 0x18 0xd5
-# CHECK: msr      icc_ap0r0_el1, x27
+# CHECK: msr      {{icc_ap0r0_el1|ICC_AP0R0_EL1}}, x27
 0xa5 0xc8 0x18 0xd5
-# CHECK: msr      icc_ap0r1_el1, x5
+# CHECK: msr      {{icc_ap0r1_el1|ICC_AP0R1_EL1}}, x5
 0xd4 0xc8 0x18 0xd5
-# CHECK: msr      icc_ap0r2_el1, x20
+# CHECK: msr      {{icc_ap0r2_el1|ICC_AP0R2_EL1}}, x20
 0xe0 0xc8 0x18 0xd5
-# CHECK: msr      icc_ap0r3_el1, x0
+# CHECK: msr      {{icc_ap0r3_el1|ICC_AP0R3_EL1}}, x0
 0x2 0xc9 0x18 0xd5
-# CHECK: msr      icc_ap1r0_el1, x2
+# CHECK: msr      {{icc_ap1r0_el1|ICC_AP1R0_EL1}}, x2
 0x3d 0xc9 0x18 0xd5
-# CHECK: msr      icc_ap1r1_el1, x29
+# CHECK: msr      {{icc_ap1r1_el1|ICC_AP1R1_EL1}}, x29
 0x57 0xc9 0x18 0xd5
-# CHECK: msr      icc_ap1r2_el1, x23
+# CHECK: msr      {{icc_ap1r2_el1|ICC_AP1R2_EL1}}, x23
 0x6b 0xc9 0x18 0xd5
-# CHECK: msr      icc_ap1r3_el1, x11
+# CHECK: msr      {{icc_ap1r3_el1|ICC_AP1R3_EL1}}, x11
 0x2 0xc8 0x1c 0xd5
-# CHECK: msr      ich_ap0r0_el2, x2
+# CHECK: msr      {{ich_ap0r0_el2|ICH_AP0R0_EL2}}, x2
 0x3b 0xc8 0x1c 0xd5
-# CHECK: msr      ich_ap0r1_el2, x27
+# CHECK: msr      {{ich_ap0r1_el2|ICH_AP0R1_EL2}}, x27
 0x47 0xc8 0x1c 0xd5
-# CHECK: msr      ich_ap0r2_el2, x7
+# CHECK: msr      {{ich_ap0r2_el2|ICH_AP0R2_EL2}}, x7
 0x61 0xc8 0x1c 0xd5
-# CHECK: msr      ich_ap0r3_el2, x1
+# CHECK: msr      {{ich_ap0r3_el2|ICH_AP0R3_EL2}}, x1
 0x7 0xc9 0x1c 0xd5
-# CHECK: msr      ich_ap1r0_el2, x7
+# CHECK: msr      {{ich_ap1r0_el2|ICH_AP1R0_EL2}}, x7
 0x2c 0xc9 0x1c 0xd5
-# CHECK: msr      ich_ap1r1_el2, x12
+# CHECK: msr      {{ich_ap1r1_el2|ICH_AP1R1_EL2}}, x12
 0x4e 0xc9 0x1c 0xd5
-# CHECK: msr      ich_ap1r2_el2, x14
+# CHECK: msr      {{ich_ap1r2_el2|ICH_AP1R2_EL2}}, x14
 0x6d 0xc9 0x1c 0xd5
-# CHECK: msr      ich_ap1r3_el2, x13
+# CHECK: msr      {{ich_ap1r3_el2|ICH_AP1R3_EL2}}, x13
 0x1 0xcb 0x1c 0xd5
-# CHECK: msr      ich_hcr_el2, x1
+# CHECK: msr      {{ich_hcr_el2|ICH_HCR_EL2}}, x1
 0x4a 0xcb 0x1c 0xd5
-# CHECK: msr      ich_misr_el2, x10
+# CHECK: msr      {{ich_misr_el2|ICH_MISR_EL2}}, x10
 0xf8 0xcb 0x1c 0xd5
-# CHECK: msr      ich_vmcr_el2, x24
+# CHECK: msr      {{ich_vmcr_el2|ICH_VMCR_EL2}}, x24
 0x9d 0xc9 0x1c 0xd5
-# CHECK: msr      ich_vseir_el2, x29
+# CHECK: msr      {{ich_vseir_el2|ICH_VSEIR_EL2}}, x29
 0x1a 0xcc 0x1c 0xd5
-# CHECK: msr      ich_lr0_el2, x26
+# CHECK: msr      {{ich_lr0_el2|ICH_LR0_EL2}}, x26
 0x29 0xcc 0x1c 0xd5
-# CHECK: msr      ich_lr1_el2, x9
+# CHECK: msr      {{ich_lr1_el2|ICH_LR1_EL2}}, x9
 0x52 0xcc 0x1c 0xd5
-# CHECK: msr      ich_lr2_el2, x18
+# CHECK: msr      {{ich_lr2_el2|ICH_LR2_EL2}}, x18
 0x7a 0xcc 0x1c 0xd5
-# CHECK: msr      ich_lr3_el2, x26
+# CHECK: msr      {{ich_lr3_el2|ICH_LR3_EL2}}, x26
 0x96 0xcc 0x1c 0xd5
-# CHECK: msr      ich_lr4_el2, x22
+# CHECK: msr      {{ich_lr4_el2|ICH_LR4_EL2}}, x22
 0xba 0xcc 0x1c 0xd5
-# CHECK: msr      ich_lr5_el2, x26
+# CHECK: msr      {{ich_lr5_el2|ICH_LR5_EL2}}, x26
 0xdb 0xcc 0x1c 0xd5
-# CHECK: msr      ich_lr6_el2, x27
+# CHECK: msr      {{ich_lr6_el2|ICH_LR6_EL2}}, x27
 0xe8 0xcc 0x1c 0xd5
-# CHECK: msr      ich_lr7_el2, x8
+# CHECK: msr      {{ich_lr7_el2|ICH_LR7_EL2}}, x8
 0x11 0xcd 0x1c 0xd5
-# CHECK: msr      ich_lr8_el2, x17
+# CHECK: msr      {{ich_lr8_el2|ICH_LR8_EL2}}, x17
 0x33 0xcd 0x1c 0xd5
-# CHECK: msr      ich_lr9_el2, x19
+# CHECK: msr      {{ich_lr9_el2|ICH_LR9_EL2}}, x19
 0x51 0xcd 0x1c 0xd5
-# CHECK: msr      ich_lr10_el2, x17
+# CHECK: msr      {{ich_lr10_el2|ICH_LR10_EL2}}, x17
 0x65 0xcd 0x1c 0xd5
-# CHECK: msr      ich_lr11_el2, x5
+# CHECK: msr      {{ich_lr11_el2|ICH_LR11_EL2}}, x5
 0x9d 0xcd 0x1c 0xd5
-# CHECK: msr      ich_lr12_el2, x29
+# CHECK: msr      {{ich_lr12_el2|ICH_LR12_EL2}}, x29
 0xa2 0xcd 0x1c 0xd5
-# CHECK: msr      ich_lr13_el2, x2
+# CHECK: msr      {{ich_lr13_el2|ICH_LR13_EL2}}, x2
 0xcd 0xcd 0x1c 0xd5
-# CHECK: msr      ich_lr14_el2, x13
+# CHECK: msr      {{ich_lr14_el2|ICH_LR14_EL2}}, x13
 0xfb 0xcd 0x1c 0xd5
-# CHECK: msr      ich_lr15_el2, x27
+# CHECK: msr      {{ich_lr15_el2|ICH_LR15_EL2}}, x27
diff --git a/test/MC/Disassembler/AArch64/ldp-offset-predictable.txt b/test/MC/Disassembler/AArch64/ldp-offset-predictable.txt
index 7ff495f..3c443a9 100644
--- a/test/MC/Disassembler/AArch64/ldp-offset-predictable.txt
+++ b/test/MC/Disassembler/AArch64/ldp-offset-predictable.txt
@@ -1,4 +1,5 @@
 # RUN: llvm-mc -triple=aarch64 -disassemble < %s 2>&1 | FileCheck %s
+# RUN: llvm-mc -triple=arm64 -disassemble < %s 2>&1 | FileCheck %s
 
 # Stores are OK.
 0xe0 0x83 0x00 0xa9
diff --git a/test/MC/Disassembler/AArch64/ldp-postind.predictable.txt b/test/MC/Disassembler/AArch64/ldp-postind.predictable.txt
index 637ebdb..6ba33ad 100644
--- a/test/MC/Disassembler/AArch64/ldp-postind.predictable.txt
+++ b/test/MC/Disassembler/AArch64/ldp-postind.predictable.txt
@@ -1,4 +1,5 @@
 # RUN: llvm-mc -triple=aarch64 -mattr=+fp-armv8 -disassemble < %s 2>&1 | FileCheck %s
+# RUN: llvm-mc -triple=arm64 -mattr=+fp-armv8 -disassemble < %s 2>&1 | FileCheck %s
 
 # None of these instructions should be classified as unpredictable:
 
diff --git a/test/MC/Disassembler/AArch64/ldp-preind.predictable.txt b/test/MC/Disassembler/AArch64/ldp-preind.predictable.txt
index f52d37f..1915340 100644
--- a/test/MC/Disassembler/AArch64/ldp-preind.predictable.txt
+++ b/test/MC/Disassembler/AArch64/ldp-preind.predictable.txt
@@ -1,4 +1,5 @@
 # RUN: llvm-mc -triple=aarch64 -mattr=+fp-armv8 -disassemble < %s 2>&1 | FileCheck %s
+# RUN: llvm-mc -triple=arm64 -mattr=+fp-armv8 -disassemble < %s 2>&1 | FileCheck %s
 
 # None of these instructions should be classified as unpredictable:
 
diff --git a/test/MC/Disassembler/AArch64/lit.local.cfg b/test/MC/Disassembler/AArch64/lit.local.cfg
index 9a66a00..2c423d1 100644
--- a/test/MC/Disassembler/AArch64/lit.local.cfg
+++ b/test/MC/Disassembler/AArch64/lit.local.cfg
@@ -1,4 +1,4 @@
 targets = set(config.root.targets_to_build.split())
-if not 'AArch64' in targets:
+if 'AArch64' not in targets:
     config.unsupported = True
 
diff --git a/test/MC/Disassembler/AArch64/neon-instructions.txt b/test/MC/Disassembler/AArch64/neon-instructions.txt
index 863730a..3590668 100644
--- a/test/MC/Disassembler/AArch64/neon-instructions.txt
+++ b/test/MC/Disassembler/AArch64/neon-instructions.txt
@@ -1,4 +1,5 @@
 # RUN: llvm-mc  -triple aarch64-none-linux-gnu -mattr=+neon -disassemble < %s | FileCheck %s
+# RUN: llvm-mc  -triple arm64-none-linux-gnu -mattr=+neon -disassemble < %s | FileCheck %s
 
 #------------------------------------------------------------------------------
 # Vector Integer Add/Sub
@@ -87,7 +88,7 @@
 # Vector Bitwise OR - immedidate
 #------------------------------------------------------------------------------
 # CHECK: movi v31.4s, #0xff, lsl #24
-# CHECK: mvni v0.2s, #0x0
+# CHECK: mvni v0.2s, #{{0x0|0}}
 # CHECK: bic v15.4h, #0xf, lsl #8
 # CHECK: orr v16.8h, #0x1f
 0xff 0x67 0x07 0x4f
@@ -132,10 +133,8 @@
 # Vector Move - register
 #------------------------------------------------------------------------------
 
-# FIXME: these should print as "mov", but TableGen can't handle it.
-
-# CHECK: orr v1.16b, v15.16b, v15.16b
-# CHECK: orr v25.8b, v4.8b, v4.8b
+# CHECK: mov v1.16b, v15.16b
+# CHECK: mov v25.8b, v4.8b
 0xe1 0x1d 0xaf 0x4e
 0x99 0x1c 0xa4 0x0e
 
@@ -246,31 +245,31 @@
 #----------------------------------------------------------------------
 # Vector Compare Mask Equal to Zero (Integer)
 #----------------------------------------------------------------------
-# CHECK: cmeq v31.16b, v15.16b, #0x0
+# CHECK: cmeq v31.16b, v15.16b, #{{0x0|0}}
 0xff 0x99 0x20 0x4e
 
 #----------------------------------------------------------------------
 # Vector Compare Mask Greater Than or Equal to Zero (Signed Integer)
 #----------------------------------------------------------------------
-# CHECK: cmge v3.8b, v15.8b, #0x0
+# CHECK: cmge v3.8b, v15.8b, #{{0x0|0}}
 0xe3 0x89 0x20 0x2e
 
 #----------------------------------------------------------------------
 # Vector Compare Mask Greater Than Zero (Signed Integer)
 #----------------------------------------------------------------------
-# CHECK: cmgt v22.2s, v9.2s, #0x0
+# CHECK: cmgt v22.2s, v9.2s, #{{0x0|0}}
 0x36 0x89 0xa0 0x0e
 
 #----------------------------------------------------------------------
 # Vector Compare Mask Less Than or Equal To Zero (Signed Integer)
 #----------------------------------------------------------------------
-# CHECK: cmle v5.2d, v14.2d, #0x0
+# CHECK: cmle v5.2d, v14.2d, #{{0x0|0}}
 0xc5 0x99 0xe0 0x6e
 
 #----------------------------------------------------------------------
 # Vector Compare Mask Less Than Zero (Signed Integer)
 #----------------------------------------------------------------------
-# CHECK: cmlt v13.8h, v11.8h, #0x0
+# CHECK: cmlt v13.8h, v11.8h, #{{0x0|0}}
 0x6d 0xa9 0x60 0x4e
 
 #----------------------------------------------------------------------
@@ -1559,7 +1558,7 @@
 #----------------------------------------------------------------------
 # Scalar Compare Bitwise Equal To Zero
 #----------------------------------------------------------------------
-# CHECK: cmeq d20, d21, #0x0
+# CHECK: cmeq d20, d21, #{{0x0|0}}
 0xb4,0x9a,0xe0,0x5e
 
 #----------------------------------------------------------------------
@@ -1578,7 +1577,7 @@
 #----------------------------------------------------------------------
 # Scalar Compare Signed Greather Than Or Equal To Zero
 #----------------------------------------------------------------------
-# CHECK: cmge d20, d21, #0x0
+# CHECK: cmge d20, d21, #{{0x0|0}}
 0xb4,0x8a,0xe0,0x7e
 
 #----------------------------------------------------------------------
@@ -1596,19 +1595,19 @@
 #----------------------------------------------------------------------
 # Scalar Compare Signed Greater Than Zero
 #----------------------------------------------------------------------
-# CHECK: cmgt d20, d21, #0x0
+# CHECK: cmgt d20, d21, #{{0x0|0}}
 0xb4,0x8a,0xe0,0x5e
 
 #----------------------------------------------------------------------
 # Scalar Compare Signed Less Than Or Equal To Zero
 #----------------------------------------------------------------------
-# CHECK: cmle d20, d21, #0x0
+# CHECK: cmle d20, d21, #{{0x0|0}}
 0xb4,0x9a,0xe0,0x7e
 
 #----------------------------------------------------------------------
 # Scalar Compare Less Than Zero
 #----------------------------------------------------------------------
-# CHECK: cmlt d20, d21, #0x0
+# CHECK: cmlt d20, d21, #{{0x0|0}}
 0xb4,0xaa,0xe0,0x5e
 
 #----------------------------------------------------------------------
@@ -2008,34 +2007,34 @@
 #----------------------------------------------------------------------
 # Vector load/store multiple N-element structure
 #----------------------------------------------------------------------
-# CHECK: ld1 {v0.16b}, [x0]
-# CHECK: ld1 {v15.8h, v16.8h}, [x15]
-# CHECK: ld1 {v31.4s, v0.4s, v1.4s}, [sp]
-# CHECK: ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0]
+# CHECK: ld1 { v0.16b }, [x0]
+# CHECK: ld1 { v15.8h, v16.8h }, [x15]
+# CHECK: ld1 { v31.4s, v0.4s, v1.4s }, [sp]
+# CHECK: ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0]
 0x00,0x70,0x40,0x4c
 0xef,0xa5,0x40,0x4c
 0xff,0x6b,0x40,0x4c
 0x00,0x2c,0x40,0x4c
 
-# CHECK: ld2 {v0.8b, v1.8b}, [x0]
-# CHECK: ld3 {v15.4h, v16.4h, v17.4h}, [x15]
-# CHECK: ld4 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp]
+# CHECK: ld2 { v0.8b, v1.8b }, [x0]
+# CHECK: ld3 { v15.4h, v16.4h, v17.4h }, [x15]
+# CHECK: ld4 { v31.2s, v0.2s, v1.2s, v2.2s }, [sp]
 0x00,0x80,0x40,0x0c
 0xef,0x45,0x40,0x0c
 0xff,0x0b,0x40,0x0c
 
-# CHECK: st1 {v0.16b}, [x0]
-# CHECK: st1 {v15.8h, v16.8h}, [x15]
-# CHECK: st1 {v31.4s, v0.4s, v1.4s}, [sp]
-# CHECK: st1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0]
+# CHECK: st1 { v0.16b }, [x0]
+# CHECK: st1 { v15.8h, v16.8h }, [x15]
+# CHECK: st1 { v31.4s, v0.4s, v1.4s }, [sp]
+# CHECK: st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0]
 0x00,0x70,0x00,0x4c
 0xef,0xa5,0x00,0x4c
 0xff,0x6b,0x00,0x4c
 0x00,0x2c,0x00,0x4c
 
-# CHECK: st2 {v0.8b, v1.8b}, [x0]
-# CHECK: st3 {v15.4h, v16.4h, v17.4h}, [x15]
-# CHECK: st4 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp]
+# CHECK: st2 { v0.8b, v1.8b }, [x0]
+# CHECK: st3 { v15.4h, v16.4h, v17.4h }, [x15]
+# CHECK: st4 { v31.2s, v0.2s, v1.2s, v2.2s }, [sp]
 0x00,0x80,0x00,0x0c
 0xef,0x45,0x00,0x0c
 0xff,0x0b,0x00,0x0c
@@ -2043,35 +2042,35 @@
 #----------------------------------------------------------------------
 # Vector load/store multiple N-element structure (post-index)
 #----------------------------------------------------------------------
-# CHECK: ld1 {v15.8h}, [x15], x2
-# CHECK: ld1 {v31.4s, v0.4s}, [sp], #32
-# CHECK: ld1 {v0.2d, v1.2d, v2.2d}, [x0], #48
-# CHECK: ld1 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0], x3
+# CHECK: ld1 { v15.8h }, [x15], x2
+# CHECK: ld1 { v31.4s, v0.4s }, [sp], #32
+# CHECK: ld1 { v0.2d, v1.2d, v2.2d }, [x0], #48
+# CHECK: ld1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3
 0xef,0x75,0xc2,0x4c
 0xff,0xab,0xdf,0x4c
 0x00,0x6c,0xdf,0x4c
 0x00,0x20,0xc3,0x0c
 
-# CHECK: ld2 {v0.16b, v1.16b}, [x0], x1
-# CHECK: ld3 {v15.8h, v16.8h, v17.8h}, [x15], x2
-# CHECK: ld4 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp], #64
+# CHECK: ld2 { v0.16b, v1.16b }, [x0], x1
+# CHECK: ld3 { v15.8h, v16.8h, v17.8h }, [x15], x2
+# CHECK: ld4 { v31.4s, v0.4s, v1.4s, v2.4s }, [sp], #64
 0x00,0x80,0xc1,0x4c
 0xef,0x45,0xc2,0x4c
 0xff,0x0b,0xdf,0x4c
 
 
-# CHECK: st1 {v15.8h}, [x15], x2
-# CHECK: st1 {v31.4s, v0.4s}, [sp], #32
-# CHECK: st1 {v0.2d, v1.2d, v2.2d}, [x0], #48
-# CHECK: st1 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0], x3
+# CHECK: st1 { v15.8h }, [x15], x2
+# CHECK: st1 { v31.4s, v0.4s }, [sp], #32
+# CHECK: st1 { v0.2d, v1.2d, v2.2d }, [x0], #48
+# CHECK: st1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3
 0xef,0x75,0x82,0x4c
 0xff,0xab,0x9f,0x4c
 0x00,0x6c,0x9f,0x4c
 0x00,0x20,0x83,0x0c
 
-# CHECK: st2 {v0.16b, v1.16b}, [x0], x1
-# CHECK: st3 {v15.8h, v16.8h, v17.8h}, [x15], x2
-# CHECK: st4 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp], #64
+# CHECK: st2 { v0.16b, v1.16b }, [x0], x1
+# CHECK: st3 { v15.8h, v16.8h, v17.8h }, [x15], x2
+# CHECK: st4 { v31.4s, v0.4s, v1.4s, v2.4s }, [sp], #64
 0x00,0x80,0x81,0x4c
 0xef,0x45,0x82,0x4c
 0xff,0x0b,0x9f,0x4c
@@ -2080,14 +2079,14 @@
 # Vector load single N-element structure to all lane of N
 # consecutive registers (N = 1,2,3,4)
 #----------------------------------------------------------------------
-# CHECK: ld1r {v0.16b}, [x0]
-# CHECK: ld1r {v15.8h}, [x15]
-# CHECK: ld2r {v31.4s, v0.4s}, [sp]
-# CHECK: ld2r {v0.2d, v1.2d}, [x0]
-# CHECK: ld3r {v0.8b, v1.8b, v2.8b}, [x0]
-# CHECK: ld3r {v15.4h, v16.4h, v17.4h}, [x15]
-# CHECK: ld4r {v31.2s, v0.2s, v1.2s, v2.2s}, [sp]
-# CHECK: ld4r {v31.1d, v0.1d, v1.1d, v2.1d}, [sp]
+# CHECK: ld1r { v0.16b }, [x0]
+# CHECK: ld1r { v15.8h }, [x15]
+# CHECK: ld2r { v31.4s, v0.4s }, [sp]
+# CHECK: ld2r { v0.2d, v1.2d }, [x0]
+# CHECK: ld3r { v0.8b, v1.8b, v2.8b }, [x0]
+# CHECK: ld3r { v15.4h, v16.4h, v17.4h }, [x15]
+# CHECK: ld4r { v31.2s, v0.2s, v1.2s, v2.2s }, [sp]
+# CHECK: ld4r { v31.1d, v0.1d, v1.1d, v2.1d }, [sp]
 0x00,0xc0,0x40,0x4d
 0xef,0xc5,0x40,0x4d
 0xff,0xcb,0x60,0x4d
@@ -2101,14 +2100,14 @@
 # Vector load/store single N-element structure to/from one lane of N
 # consecutive registers (N = 1,2,3,4)
 #----------------------------------------------------------------------
-# CHECK: ld1 {v0.b}[9], [x0]
-# CHECK: ld2 {v15.h, v16.h}[7], [x15]
-# CHECK: ld3 {v31.s, v0.s, v1.s}[3], [sp]
-# CHECK: ld4 {v0.d, v1.d, v2.d, v3.d}[1], [x0]
-# CHECK: st1 {v0.d}[1], [x0]
-# CHECK: st2 {v31.s, v0.s}[3], [sp]
-# CHECK: st3 {v15.h, v16.h, v17.h}[7], [x15]
-# CHECK: st4 {v0.b, v1.b, v2.b, v3.b}[9], [x0]
+# CHECK: ld1 { v0.b }[9], [x0]
+# CHECK: ld2 { v15.h, v16.h }[7], [x15]
+# CHECK: ld3 { v31.s, v0.s, v1.s }[3], [sp]
+# CHECK: ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0]
+# CHECK: st1 { v0.d }[1], [x0]
+# CHECK: st2 { v31.s, v0.s }[3], [sp]
+# CHECK: st3 { v15.h, v16.h, v17.h }[7], [x15]
+# CHECK: st4 { v0.b, v1.b, v2.b, v3.b }[9], [x0]
 0x00,0x04,0x40,0x4d
 0xef,0x59,0x60,0x4d
 0xff,0xb3,0x40,0x4d
@@ -2122,14 +2121,14 @@
 # Post-index of vector load single N-element structure to all lane of N
 # consecutive registers (N = 1,2,3,4)
 #----------------------------------------------------------------------
-# CHECK: ld1r {v0.16b}, [x0], #1
-# CHECK: ld1r {v15.8h}, [x15], #2
-# CHECK: ld2r {v31.4s, v0.4s}, [sp], #8
-# CHECK: ld2r {v0.2d, v1.2d}, [x0], #16
-# CHECK: ld3r {v0.8b, v1.8b, v2.8b}, [x0], #3
-# CHECK: ld3r {v15.4h, v16.4h, v17.4h}, [x15], #6
-# CHECK: ld4r {v31.2s, v0.2s, v1.2s, v2.2s}, [sp], x30
-# CHECK: ld4r {v31.1d, v0.1d, v1.1d, v2.1d}, [sp], x7
+# CHECK: ld1r { v0.16b }, [x0], #1
+# CHECK: ld1r { v15.8h }, [x15], #2
+# CHECK: ld2r { v31.4s, v0.4s }, [sp], #8
+# CHECK: ld2r { v0.2d, v1.2d }, [x0], #16
+# CHECK: ld3r { v0.8b, v1.8b, v2.8b }, [x0], #3
+# CHECK: ld3r { v15.4h, v16.4h, v17.4h }, [x15], #6
+# CHECK: ld4r { v31.2s, v0.2s, v1.2s, v2.2s }, [sp], x30
+# CHECK: ld4r { v31.1d, v0.1d, v1.1d, v2.1d }, [sp], x7
 0x00,0xc0,0xdf,0x4d
 0xef,0xc5,0xdf,0x4d
 0xff,0xcb,0xff,0x4d
@@ -2143,15 +2142,15 @@
 # Post-index of vector load/store single N-element structure to/from
 #  one lane of N consecutive registers (N = 1,2,3,4)
 #----------------------------------------------------------------------
-# CHECK: ld1 {v0.b}[9], [x0], #1
-# CHECK: ld2 {v15.h, v16.h}[7], [x15], #4
-# CHECK: ld3 {v31.s, v0.s, v1.s}[3], [sp], x3
-# CHECK: ld4 {v0.d, v1.d, v2.d, v3.d}[1], [x0], #32
-# CHECK: ld4 {v0.h, v1.h, v2.h, v3.h}[7], [x0], x0
-# CHECK: st1 {v0.d}[1], [x0], #8
-# CHECK: st2 {v31.s, v0.s}[3], [sp], #8
-# CHECK: st3 {v15.h, v16.h, v17.h}[7], [x15], #6
-# CHECK: st4 {v0.b, v1.b, v2.b, v3.b}[9], [x0], x5
+# CHECK: ld1 { v0.b }[9], [x0], #1
+# CHECK: ld2 { v15.h, v16.h }[7], [x15], #4
+# CHECK: ld3 { v31.s, v0.s, v1.s }[3], [sp], x3
+# CHECK: ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0], #32
+# CHECK: ld4 { v0.h, v1.h, v2.h, v3.h }[7], [x0], x0
+# CHECK: st1 { v0.d }[1], [x0], #8
+# CHECK: st2 { v31.s, v0.s }[3], [sp], #8
+# CHECK: st3 { v15.h, v16.h, v17.h }[7], [x15], #6
+# CHECK: st4 { v0.b, v1.b, v2.b, v3.b }[9], [x0], x5
 0x00,0x04,0xdf,0x4d
 0xef,0x59,0xff,0x4d
 0xff,0xb3,0xc3,0x4d
@@ -2167,8 +2166,8 @@
 #----------------------------------------------------------------------
 0x20,0x18,0x02,0x2e
 0x20,0x18,0x02,0x6e
-# CHECK: ext v0.8b, v1.8b, v2.8b, #0x3
-# CHECK: ext v0.16b, v1.16b, v2.16b, #0x3
+# CHECK: ext v0.8b, v1.8b, v2.8b, #{{0x3|3}}
+# CHECK: ext v0.16b, v1.16b, v2.16b, #{{0x3|3}}
 
 #----------------------------------------------------------------------
 # unzip with 3 same vectors to get primary result
@@ -2481,10 +2480,10 @@
 #----------------------------------------------------------------------
 #Duplicate element (scalar)
 #----------------------------------------------------------------------
-# CHECK: dup b0, v0.b[15]
-# CHECK: dup h2, v31.h[5]
-# CHECK: dup s17, v2.s[2]
-# CHECK: dup d6, v12.d[1]
+# CHECK: {{dup|mov}} b0, v0.b[15]
+# CHECK: {{dup|mov}} h2, v31.h[5]
+# CHECK: {{dup|mov}} s17, v2.s[2]
+# CHECK: {{dup|mov}} d6, v12.d[1]
 0x00 0x04 0x1f 0x5e
 0xe2 0x07 0x16 0x5e
 0x51 0x04 0x14 0x5e
@@ -2497,37 +2496,37 @@
 0xf0,0x23,0x02,0x0e
 0x20,0x40,0x02,0x0e
 0xf0,0x62,0x02,0x0e
-# CHECK: tbl v0.8b, {v1.16b}, v2.8b
-# CHECK: tbl v16.8b, {v31.16b, v0.16b}, v2.8b
-# CHECK: tbl v0.8b, {v1.16b, v2.16b, v3.16b}, v2.8b
-# CHECK: tbl v16.8b, {v23.16b, v24.16b, v25.16b, v26.16b}, v2.8b
+# CHECK: tbl v0.8b, { v1.16b }, v2.8b
+# CHECK: tbl v16.8b, { v31.16b, v0.16b }, v2.8b
+# CHECK: tbl v0.8b, { v1.16b, v2.16b, v3.16b }, v2.8b
+# CHECK: tbl v16.8b, { v23.16b, v24.16b, v25.16b, v26.16b }, v2.8b
 
 0x20,0x00,0x02,0x4e
 0xf0,0x23,0x02,0x4e
 0x20,0x40,0x02,0x4e
 0xe0,0x63,0x02,0x4e
-# CHECK: tbl v0.16b, {v1.16b}, v2.16b
-# CHECK: tbl v16.16b, {v31.16b, v0.16b}, v2.16b
-# CHECK: tbl v0.16b, {v1.16b, v2.16b, v3.16b}, v2.16b
-# CHECK: tbl v0.16b, {v31.16b, v0.16b, v1.16b, v2.16b}, v2.16b
+# CHECK: tbl v0.16b, { v1.16b }, v2.16b
+# CHECK: tbl v16.16b, { v31.16b, v0.16b }, v2.16b
+# CHECK: tbl v0.16b, { v1.16b, v2.16b, v3.16b }, v2.16b
+# CHECK: tbl v0.16b, { v31.16b, v0.16b, v1.16b, v2.16b }, v2.16b
 
 0x20,0x10,0x02,0x0e
 0xf0,0x33,0x02,0x0e
 0x20,0x50,0x02,0x0e
 0xf0,0x72,0x02,0x0e
-# CHECK: tbx v0.8b, {v1.16b}, v2.8b
-# CHECK: tbx v16.8b, {v31.16b, v0.16b}, v2.8b
-# CHECK: tbx v0.8b, {v1.16b, v2.16b, v3.16b}, v2.8b
-# CHECK: tbx v16.8b, {v23.16b, v24.16b, v25.16b, v26.16b}, v2.8b
+# CHECK: tbx v0.8b, { v1.16b }, v2.8b
+# CHECK: tbx v16.8b, { v31.16b, v0.16b }, v2.8b
+# CHECK: tbx v0.8b, { v1.16b, v2.16b, v3.16b }, v2.8b
+# CHECK: tbx v16.8b, { v23.16b, v24.16b, v25.16b, v26.16b }, v2.8b
 
 0x20,0x10,0x02,0x4e
 0xf0,0x33,0x02,0x4e
 0x20,0x50,0x02,0x4e
 0xf0,0x73,0x02,0x4e
-# CHECK: tbx v0.16b, {v1.16b}, v2.16b
-# CHECK: tbx v16.16b, {v31.16b, v0.16b}, v2.16b
-# CHECK: tbx v0.16b, {v1.16b, v2.16b, v3.16b}, v2.16b
-# CHECK: tbx v16.16b, {v31.16b, v0.16b, v1.16b, v2.16b}, v2.16b
+# CHECK: tbx v0.16b, { v1.16b }, v2.16b
+# CHECK: tbx v16.16b, { v31.16b, v0.16b }, v2.16b
+# CHECK: tbx v0.16b, { v1.16b, v2.16b, v3.16b }, v2.16b
+# CHECK: tbx v16.16b, { v31.16b, v0.16b, v1.16b, v2.16b }, v2.16b
 
 #----------------------------------------------------------------------
 # Scalar Floating-point Convert To Lower Precision Narrow, Rounding To
diff --git a/test/MC/Disassembler/AArch64/trace-regs.txt b/test/MC/Disassembler/AArch64/trace-regs.txt
index 10c5937..43171e3 100644
--- a/test/MC/Disassembler/AArch64/trace-regs.txt
+++ b/test/MC/Disassembler/AArch64/trace-regs.txt
@@ -1,736 +1,737 @@
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple arm64-none-linux-gnu -disassemble < %s | FileCheck %s
 
 0x8 0x3 0x31 0xd5
-# CHECK: mrs      x8, trcstatr
+# CHECK: mrs      x8, {{trcstatr|TRCSTATR}}
 0xc9 0x0 0x31 0xd5
-# CHECK: mrs      x9, trcidr8
+# CHECK: mrs      x9, {{trcidr8|TRCIDR8}}
 0xcb 0x1 0x31 0xd5
-# CHECK: mrs      x11, trcidr9
+# CHECK: mrs      x11, {{trcidr9|TRCIDR9}}
 0xd9 0x2 0x31 0xd5
-# CHECK: mrs      x25, trcidr10
+# CHECK: mrs      x25, {{trcidr10|TRCIDR10}}
 0xc7 0x3 0x31 0xd5
-# CHECK: mrs      x7, trcidr11
+# CHECK: mrs      x7, {{trcidr11|TRCIDR11}}
 0xc7 0x4 0x31 0xd5
-# CHECK: mrs      x7, trcidr12
+# CHECK: mrs      x7, {{trcidr12|TRCIDR12}}
 0xc6 0x5 0x31 0xd5
-# CHECK: mrs      x6, trcidr13
+# CHECK: mrs      x6, {{trcidr13|TRCIDR13}}
 0xfb 0x8 0x31 0xd5
-# CHECK: mrs      x27, trcidr0
+# CHECK: mrs      x27, {{trcidr0|TRCIDR0}}
 0xfd 0x9 0x31 0xd5
-# CHECK: mrs      x29, trcidr1
+# CHECK: mrs      x29, {{trcidr1|TRCIDR1}}
 0xe4 0xa 0x31 0xd5
-# CHECK: mrs      x4, trcidr2
+# CHECK: mrs      x4, {{trcidr2|TRCIDR2}}
 0xe8 0xb 0x31 0xd5
-# CHECK: mrs      x8, trcidr3
+# CHECK: mrs      x8, {{trcidr3|TRCIDR3}}
 0xef 0xc 0x31 0xd5
-# CHECK: mrs      x15, trcidr4
+# CHECK: mrs      x15, {{trcidr4|TRCIDR4}}
 0xf4 0xd 0x31 0xd5
-# CHECK: mrs      x20, trcidr5
+# CHECK: mrs      x20, {{trcidr5|TRCIDR5}}
 0xe6 0xe 0x31 0xd5
-# CHECK: mrs      x6, trcidr6
+# CHECK: mrs      x6, {{trcidr6|TRCIDR6}}
 0xe6 0xf 0x31 0xd5
-# CHECK: mrs      x6, trcidr7
+# CHECK: mrs      x6, {{trcidr7|TRCIDR7}}
 0x98 0x11 0x31 0xd5
-# CHECK: mrs      x24, trcoslsr
+# CHECK: mrs      x24, {{trcoslsr|TRCOSLSR}}
 0x92 0x15 0x31 0xd5
-# CHECK: mrs      x18, trcpdsr
+# CHECK: mrs      x18, {{trcpdsr|TRCPDSR}}
 0xdc 0x7a 0x31 0xd5
-# CHECK: mrs      x28, trcdevaff0
+# CHECK: mrs      x28, {{trcdevaff0|TRCDEVAFF0}}
 0xc5 0x7b 0x31 0xd5
-# CHECK: mrs      x5, trcdevaff1
+# CHECK: mrs      x5, {{trcdevaff1|TRCDEVAFF1}}
 0xc5 0x7d 0x31 0xd5
-# CHECK: mrs      x5, trclsr
+# CHECK: mrs      x5, {{trclsr|TRCLSR}}
 0xcb 0x7e 0x31 0xd5
-# CHECK: mrs      x11, trcauthstatus
+# CHECK: mrs      x11, {{trcauthstatus|TRCAUTHSTATUS}}
 0xcd 0x7f 0x31 0xd5
-# CHECK: mrs      x13, trcdevarch
+# CHECK: mrs      x13, {{trcdevarch|TRCDEVARCH}}
 0xf2 0x72 0x31 0xd5
-# CHECK: mrs      x18, trcdevid
+# CHECK: mrs      x18, {{trcdevid|TRCDEVID}}
 0xf6 0x73 0x31 0xd5
-# CHECK: mrs      x22, trcdevtype
+# CHECK: mrs      x22, {{trcdevtype|TRCDEVTYPE}}
 0xee 0x74 0x31 0xd5
-# CHECK: mrs      x14, trcpidr4
+# CHECK: mrs      x14, {{trcpidr4|TRCPIDR4}}
 0xe5 0x75 0x31 0xd5
-# CHECK: mrs      x5, trcpidr5
+# CHECK: mrs      x5, {{trcpidr5|TRCPIDR5}}
 0xe5 0x76 0x31 0xd5
-# CHECK: mrs      x5, trcpidr6
+# CHECK: mrs      x5, {{trcpidr6|TRCPIDR6}}
 0xe9 0x77 0x31 0xd5
-# CHECK: mrs      x9, trcpidr7
+# CHECK: mrs      x9, {{trcpidr7|TRCPIDR7}}
 0xef 0x78 0x31 0xd5
-# CHECK: mrs      x15, trcpidr0
+# CHECK: mrs      x15, {{trcpidr0|TRCPIDR0}}
 0xe6 0x79 0x31 0xd5
-# CHECK: mrs      x6, trcpidr1
+# CHECK: mrs      x6, {{trcpidr1|TRCPIDR1}}
 0xeb 0x7a 0x31 0xd5
-# CHECK: mrs      x11, trcpidr2
+# CHECK: mrs      x11, {{trcpidr2|TRCPIDR2}}
 0xf4 0x7b 0x31 0xd5
-# CHECK: mrs      x20, trcpidr3
+# CHECK: mrs      x20, {{trcpidr3|TRCPIDR3}}
 0xf1 0x7c 0x31 0xd5
-# CHECK: mrs      x17, trccidr0
+# CHECK: mrs      x17, {{trccidr0|TRCCIDR0}}
 0xe2 0x7d 0x31 0xd5
-# CHECK: mrs      x2, trccidr1
+# CHECK: mrs      x2, {{trccidr1|TRCCIDR1}}
 0xf4 0x7e 0x31 0xd5
-# CHECK: mrs      x20, trccidr2
+# CHECK: mrs      x20, {{trccidr2|TRCCIDR2}}
 0xe4 0x7f 0x31 0xd5
-# CHECK: mrs      x4, trccidr3
+# CHECK: mrs      x4, {{trccidr3|TRCCIDR3}}
 0xb 0x1 0x31 0xd5
-# CHECK: mrs      x11, trcprgctlr
+# CHECK: mrs      x11, {{trcprgctlr|TRCPRGCTLR}}
 0x17 0x2 0x31 0xd5
-# CHECK: mrs      x23, trcprocselr
+# CHECK: mrs      x23, {{trcprocselr|TRCPROCSELR}}
 0xd 0x4 0x31 0xd5
-# CHECK: mrs      x13, trcconfigr
+# CHECK: mrs      x13, {{trcconfigr|TRCCONFIGR}}
 0x17 0x6 0x31 0xd5
-# CHECK: mrs      x23, trcauxctlr
+# CHECK: mrs      x23, {{trcauxctlr|TRCAUXCTLR}}
 0x9 0x8 0x31 0xd5
-# CHECK: mrs      x9, trceventctl0r
+# CHECK: mrs      x9, {{trceventctl0r|TRCEVENTCTL0R}}
 0x10 0x9 0x31 0xd5
-# CHECK: mrs      x16, trceventctl1r
+# CHECK: mrs      x16, {{trceventctl1r|TRCEVENTCTL1R}}
 0x4 0xb 0x31 0xd5
-# CHECK: mrs      x4, trcstallctlr
+# CHECK: mrs      x4, {{trcstallctlr|TRCSTALLCTLR}}
 0xe 0xc 0x31 0xd5
-# CHECK: mrs      x14, trctsctlr
+# CHECK: mrs      x14, {{trctsctlr|TRCTSCTLR}}
 0x18 0xd 0x31 0xd5
-# CHECK: mrs      x24, trcsyncpr
+# CHECK: mrs      x24, {{trcsyncpr|TRCSYNCPR}}
 0x1c 0xe 0x31 0xd5
-# CHECK: mrs      x28, trcccctlr
+# CHECK: mrs      x28, {{trcccctlr|TRCCCCTLR}}
 0xf 0xf 0x31 0xd5
-# CHECK: mrs      x15, trcbbctlr
+# CHECK: mrs      x15, {{trcbbctlr|TRCBBCTLR}}
 0x21 0x0 0x31 0xd5
-# CHECK: mrs      x1, trctraceidr
+# CHECK: mrs      x1, {{trctraceidr|TRCTRACEIDR}}
 0x34 0x1 0x31 0xd5
-# CHECK: mrs      x20, trcqctlr
+# CHECK: mrs      x20, {{trcqctlr|TRCQCTLR}}
 0x42 0x0 0x31 0xd5
-# CHECK: mrs      x2, trcvictlr
+# CHECK: mrs      x2, {{trcvictlr|TRCVICTLR}}
 0x4c 0x1 0x31 0xd5
-# CHECK: mrs      x12, trcviiectlr
+# CHECK: mrs      x12, {{trcviiectlr|TRCVIIECTLR}}
 0x50 0x2 0x31 0xd5
-# CHECK: mrs      x16, trcvissctlr
+# CHECK: mrs      x16, {{trcvissctlr|TRCVISSCTLR}}
 0x48 0x3 0x31 0xd5
-# CHECK: mrs      x8, trcvipcssctlr
+# CHECK: mrs      x8, {{trcvipcssctlr|TRCVIPCSSCTLR}}
 0x5b 0x8 0x31 0xd5
-# CHECK: mrs      x27, trcvdctlr
+# CHECK: mrs      x27, {{trcvdctlr|TRCVDCTLR}}
 0x49 0x9 0x31 0xd5
-# CHECK: mrs      x9, trcvdsacctlr
+# CHECK: mrs      x9, {{trcvdsacctlr|TRCVDSACCTLR}}
 0x40 0xa 0x31 0xd5
-# CHECK: mrs      x0, trcvdarcctlr
+# CHECK: mrs      x0, {{trcvdarcctlr|TRCVDARCCTLR}}
 0x8d 0x0 0x31 0xd5
-# CHECK: mrs      x13, trcseqevr0
+# CHECK: mrs      x13, {{trcseqevr0|TRCSEQEVR0}}
 0x8b 0x1 0x31 0xd5
-# CHECK: mrs      x11, trcseqevr1
+# CHECK: mrs      x11, {{trcseqevr1|TRCSEQEVR1}}
 0x9a 0x2 0x31 0xd5
-# CHECK: mrs      x26, trcseqevr2
+# CHECK: mrs      x26, {{trcseqevr2|TRCSEQEVR2}}
 0x8e 0x6 0x31 0xd5
-# CHECK: mrs      x14, trcseqrstevr
+# CHECK: mrs      x14, {{trcseqrstevr|TRCSEQRSTEVR}}
 0x84 0x7 0x31 0xd5
-# CHECK: mrs      x4, trcseqstr
+# CHECK: mrs      x4, {{trcseqstr|TRCSEQSTR}}
 0x91 0x8 0x31 0xd5
-# CHECK: mrs      x17, trcextinselr
+# CHECK: mrs      x17, {{trcextinselr|TRCEXTINSELR}}
 0xb5 0x0 0x31 0xd5
-# CHECK: mrs      x21, trccntrldvr0
+# CHECK: mrs      x21, {{trccntrldvr0|TRCCNTRLDVR0}}
 0xaa 0x1 0x31 0xd5
-# CHECK: mrs      x10, trccntrldvr1
+# CHECK: mrs      x10, {{trccntrldvr1|TRCCNTRLDVR1}}
 0xb4 0x2 0x31 0xd5
-# CHECK: mrs      x20, trccntrldvr2
+# CHECK: mrs      x20, {{trccntrldvr2|TRCCNTRLDVR2}}
 0xa5 0x3 0x31 0xd5
-# CHECK: mrs      x5, trccntrldvr3
+# CHECK: mrs      x5, {{trccntrldvr3|TRCCNTRLDVR3}}
 0xb1 0x4 0x31 0xd5
-# CHECK: mrs      x17, trccntctlr0
+# CHECK: mrs      x17, {{trccntctlr0|TRCCNTCTLR0}}
 0xa1 0x5 0x31 0xd5
-# CHECK: mrs      x1, trccntctlr1
+# CHECK: mrs      x1, {{trccntctlr1|TRCCNTCTLR1}}
 0xb1 0x6 0x31 0xd5
-# CHECK: mrs      x17, trccntctlr2
+# CHECK: mrs      x17, {{trccntctlr2|TRCCNTCTLR2}}
 0xa6 0x7 0x31 0xd5
-# CHECK: mrs      x6, trccntctlr3
+# CHECK: mrs      x6, {{trccntctlr3|TRCCNTCTLR3}}
 0xbc 0x8 0x31 0xd5
-# CHECK: mrs      x28, trccntvr0
+# CHECK: mrs      x28, {{trccntvr0|TRCCNTVR0}}
 0xb7 0x9 0x31 0xd5
-# CHECK: mrs      x23, trccntvr1
+# CHECK: mrs      x23, {{trccntvr1|TRCCNTVR1}}
 0xa9 0xa 0x31 0xd5
-# CHECK: mrs      x9, trccntvr2
+# CHECK: mrs      x9, {{trccntvr2|TRCCNTVR2}}
 0xa6 0xb 0x31 0xd5
-# CHECK: mrs      x6, trccntvr3
+# CHECK: mrs      x6, {{trccntvr3|TRCCNTVR3}}
 0xf8 0x0 0x31 0xd5
-# CHECK: mrs      x24, trcimspec0
+# CHECK: mrs      x24, {{trcimspec0|TRCIMSPEC0}}
 0xf8 0x1 0x31 0xd5
-# CHECK: mrs      x24, trcimspec1
+# CHECK: mrs      x24, {{trcimspec1|TRCIMSPEC1}}
 0xef 0x2 0x31 0xd5
-# CHECK: mrs      x15, trcimspec2
+# CHECK: mrs      x15, {{trcimspec2|TRCIMSPEC2}}
 0xea 0x3 0x31 0xd5
-# CHECK: mrs      x10, trcimspec3
+# CHECK: mrs      x10, {{trcimspec3|TRCIMSPEC3}}
 0xfd 0x4 0x31 0xd5
-# CHECK: mrs      x29, trcimspec4
+# CHECK: mrs      x29, {{trcimspec4|TRCIMSPEC4}}
 0xf2 0x5 0x31 0xd5
-# CHECK: mrs      x18, trcimspec5
+# CHECK: mrs      x18, {{trcimspec5|TRCIMSPEC5}}
 0xfd 0x6 0x31 0xd5
-# CHECK: mrs      x29, trcimspec6
+# CHECK: mrs      x29, {{trcimspec6|TRCIMSPEC6}}
 0xe2 0x7 0x31 0xd5
-# CHECK: mrs      x2, trcimspec7
+# CHECK: mrs      x2, {{trcimspec7|TRCIMSPEC7}}
 0x8 0x12 0x31 0xd5
-# CHECK: mrs      x8, trcrsctlr2
+# CHECK: mrs      x8, {{trcrsctlr2|TRCRSCTLR2}}
 0x0 0x13 0x31 0xd5
-# CHECK: mrs      x0, trcrsctlr3
+# CHECK: mrs      x0, {{trcrsctlr3|TRCRSCTLR3}}
 0xc 0x14 0x31 0xd5
-# CHECK: mrs      x12, trcrsctlr4
+# CHECK: mrs      x12, {{trcrsctlr4|TRCRSCTLR4}}
 0x1a 0x15 0x31 0xd5
-# CHECK: mrs      x26, trcrsctlr5
+# CHECK: mrs      x26, {{trcrsctlr5|TRCRSCTLR5}}
 0x1d 0x16 0x31 0xd5
-# CHECK: mrs      x29, trcrsctlr6
+# CHECK: mrs      x29, {{trcrsctlr6|TRCRSCTLR6}}
 0x11 0x17 0x31 0xd5
-# CHECK: mrs      x17, trcrsctlr7
+# CHECK: mrs      x17, {{trcrsctlr7|TRCRSCTLR7}}
 0x0 0x18 0x31 0xd5
-# CHECK: mrs      x0, trcrsctlr8
+# CHECK: mrs      x0, {{trcrsctlr8|TRCRSCTLR8}}
 0x1 0x19 0x31 0xd5
-# CHECK: mrs      x1, trcrsctlr9
+# CHECK: mrs      x1, {{trcrsctlr9|TRCRSCTLR9}}
 0x11 0x1a 0x31 0xd5
-# CHECK: mrs      x17, trcrsctlr10
+# CHECK: mrs      x17, {{trcrsctlr10|TRCRSCTLR10}}
 0x15 0x1b 0x31 0xd5
-# CHECK: mrs      x21, trcrsctlr11
+# CHECK: mrs      x21, {{trcrsctlr11|TRCRSCTLR11}}
 0x1 0x1c 0x31 0xd5
-# CHECK: mrs      x1, trcrsctlr12
+# CHECK: mrs      x1, {{trcrsctlr12|TRCRSCTLR12}}
 0x8 0x1d 0x31 0xd5
-# CHECK: mrs      x8, trcrsctlr13
+# CHECK: mrs      x8, {{trcrsctlr13|TRCRSCTLR13}}
 0x18 0x1e 0x31 0xd5
-# CHECK: mrs      x24, trcrsctlr14
+# CHECK: mrs      x24, {{trcrsctlr14|TRCRSCTLR14}}
 0x0 0x1f 0x31 0xd5
-# CHECK: mrs      x0, trcrsctlr15
+# CHECK: mrs      x0, {{trcrsctlr15|TRCRSCTLR15}}
 0x22 0x10 0x31 0xd5
-# CHECK: mrs      x2, trcrsctlr16
+# CHECK: mrs      x2, {{trcrsctlr16|TRCRSCTLR16}}
 0x3d 0x11 0x31 0xd5
-# CHECK: mrs      x29, trcrsctlr17
+# CHECK: mrs      x29, {{trcrsctlr17|TRCRSCTLR17}}
 0x36 0x12 0x31 0xd5
-# CHECK: mrs      x22, trcrsctlr18
+# CHECK: mrs      x22, {{trcrsctlr18|TRCRSCTLR18}}
 0x26 0x13 0x31 0xd5
-# CHECK: mrs      x6, trcrsctlr19
+# CHECK: mrs      x6, {{trcrsctlr19|TRCRSCTLR19}}
 0x3a 0x14 0x31 0xd5
-# CHECK: mrs      x26, trcrsctlr20
+# CHECK: mrs      x26, {{trcrsctlr20|TRCRSCTLR20}}
 0x3a 0x15 0x31 0xd5
-# CHECK: mrs      x26, trcrsctlr21
+# CHECK: mrs      x26, {{trcrsctlr21|TRCRSCTLR21}}
 0x24 0x16 0x31 0xd5
-# CHECK: mrs      x4, trcrsctlr22
+# CHECK: mrs      x4, {{trcrsctlr22|TRCRSCTLR22}}
 0x2c 0x17 0x31 0xd5
-# CHECK: mrs      x12, trcrsctlr23
+# CHECK: mrs      x12, {{trcrsctlr23|TRCRSCTLR23}}
 0x21 0x18 0x31 0xd5
-# CHECK: mrs      x1, trcrsctlr24
+# CHECK: mrs      x1, {{trcrsctlr24|TRCRSCTLR24}}
 0x20 0x19 0x31 0xd5
-# CHECK: mrs      x0, trcrsctlr25
+# CHECK: mrs      x0, {{trcrsctlr25|TRCRSCTLR25}}
 0x31 0x1a 0x31 0xd5
-# CHECK: mrs      x17, trcrsctlr26
+# CHECK: mrs      x17, {{trcrsctlr26|TRCRSCTLR26}}
 0x28 0x1b 0x31 0xd5
-# CHECK: mrs      x8, trcrsctlr27
+# CHECK: mrs      x8, {{trcrsctlr27|TRCRSCTLR27}}
 0x2a 0x1c 0x31 0xd5
-# CHECK: mrs      x10, trcrsctlr28
+# CHECK: mrs      x10, {{trcrsctlr28|TRCRSCTLR28}}
 0x39 0x1d 0x31 0xd5
-# CHECK: mrs      x25, trcrsctlr29
+# CHECK: mrs      x25, {{trcrsctlr29|TRCRSCTLR29}}
 0x2c 0x1e 0x31 0xd5
-# CHECK: mrs      x12, trcrsctlr30
+# CHECK: mrs      x12, {{trcrsctlr30|TRCRSCTLR30}}
 0x2b 0x1f 0x31 0xd5
-# CHECK: mrs      x11, trcrsctlr31
+# CHECK: mrs      x11, {{trcrsctlr31|TRCRSCTLR31}}
 0x52 0x10 0x31 0xd5
-# CHECK: mrs      x18, trcssccr0
+# CHECK: mrs      x18, {{trcssccr0|TRCSSCCR0}}
 0x4c 0x11 0x31 0xd5
-# CHECK: mrs      x12, trcssccr1
+# CHECK: mrs      x12, {{trcssccr1|TRCSSCCR1}}
 0x43 0x12 0x31 0xd5
-# CHECK: mrs      x3, trcssccr2
+# CHECK: mrs      x3, {{trcssccr2|TRCSSCCR2}}
 0x42 0x13 0x31 0xd5
-# CHECK: mrs      x2, trcssccr3
+# CHECK: mrs      x2, {{trcssccr3|TRCSSCCR3}}
 0x55 0x14 0x31 0xd5
-# CHECK: mrs      x21, trcssccr4
+# CHECK: mrs      x21, {{trcssccr4|TRCSSCCR4}}
 0x4a 0x15 0x31 0xd5
-# CHECK: mrs      x10, trcssccr5
+# CHECK: mrs      x10, {{trcssccr5|TRCSSCCR5}}
 0x56 0x16 0x31 0xd5
-# CHECK: mrs      x22, trcssccr6
+# CHECK: mrs      x22, {{trcssccr6|TRCSSCCR6}}
 0x57 0x17 0x31 0xd5
-# CHECK: mrs      x23, trcssccr7
+# CHECK: mrs      x23, {{trcssccr7|TRCSSCCR7}}
 0x57 0x18 0x31 0xd5
-# CHECK: mrs      x23, trcsscsr0
+# CHECK: mrs      x23, {{trcsscsr0|TRCSSCSR0}}
 0x53 0x19 0x31 0xd5
-# CHECK: mrs      x19, trcsscsr1
+# CHECK: mrs      x19, {{trcsscsr1|TRCSSCSR1}}
 0x59 0x1a 0x31 0xd5
-# CHECK: mrs      x25, trcsscsr2
+# CHECK: mrs      x25, {{trcsscsr2|TRCSSCSR2}}
 0x51 0x1b 0x31 0xd5
-# CHECK: mrs      x17, trcsscsr3
+# CHECK: mrs      x17, {{trcsscsr3|TRCSSCSR3}}
 0x53 0x1c 0x31 0xd5
-# CHECK: mrs      x19, trcsscsr4
+# CHECK: mrs      x19, {{trcsscsr4|TRCSSCSR4}}
 0x4b 0x1d 0x31 0xd5
-# CHECK: mrs      x11, trcsscsr5
+# CHECK: mrs      x11, {{trcsscsr5|TRCSSCSR5}}
 0x45 0x1e 0x31 0xd5
-# CHECK: mrs      x5, trcsscsr6
+# CHECK: mrs      x5, {{trcsscsr6|TRCSSCSR6}}
 0x49 0x1f 0x31 0xd5
-# CHECK: mrs      x9, trcsscsr7
+# CHECK: mrs      x9, {{trcsscsr7|TRCSSCSR7}}
 0x9a 0x14 0x31 0xd5
-# CHECK: mrs      x26, trcpdcr
+# CHECK: mrs      x26, {{trcpdcr|TRCPDCR}}
 0x8 0x20 0x31 0xd5
-# CHECK: mrs      x8, trcacvr0
+# CHECK: mrs      x8, {{trcacvr0|TRCACVR0}}
 0xf 0x22 0x31 0xd5
-# CHECK: mrs      x15, trcacvr1
+# CHECK: mrs      x15, {{trcacvr1|TRCACVR1}}
 0x13 0x24 0x31 0xd5
-# CHECK: mrs      x19, trcacvr2
+# CHECK: mrs      x19, {{trcacvr2|TRCACVR2}}
 0x8 0x26 0x31 0xd5
-# CHECK: mrs      x8, trcacvr3
+# CHECK: mrs      x8, {{trcacvr3|TRCACVR3}}
 0x1c 0x28 0x31 0xd5
-# CHECK: mrs      x28, trcacvr4
+# CHECK: mrs      x28, {{trcacvr4|TRCACVR4}}
 0x3 0x2a 0x31 0xd5
-# CHECK: mrs      x3, trcacvr5
+# CHECK: mrs      x3, {{trcacvr5|TRCACVR5}}
 0x19 0x2c 0x31 0xd5
-# CHECK: mrs      x25, trcacvr6
+# CHECK: mrs      x25, {{trcacvr6|TRCACVR6}}
 0x18 0x2e 0x31 0xd5
-# CHECK: mrs      x24, trcacvr7
+# CHECK: mrs      x24, {{trcacvr7|TRCACVR7}}
 0x26 0x20 0x31 0xd5
-# CHECK: mrs      x6, trcacvr8
+# CHECK: mrs      x6, {{trcacvr8|TRCACVR8}}
 0x23 0x22 0x31 0xd5
-# CHECK: mrs      x3, trcacvr9
+# CHECK: mrs      x3, {{trcacvr9|TRCACVR9}}
 0x38 0x24 0x31 0xd5
-# CHECK: mrs      x24, trcacvr10
+# CHECK: mrs      x24, {{trcacvr10|TRCACVR10}}
 0x23 0x26 0x31 0xd5
-# CHECK: mrs      x3, trcacvr11
+# CHECK: mrs      x3, {{trcacvr11|TRCACVR11}}
 0x2c 0x28 0x31 0xd5
-# CHECK: mrs      x12, trcacvr12
+# CHECK: mrs      x12, {{trcacvr12|TRCACVR12}}
 0x29 0x2a 0x31 0xd5
-# CHECK: mrs      x9, trcacvr13
+# CHECK: mrs      x9, {{trcacvr13|TRCACVR13}}
 0x2e 0x2c 0x31 0xd5
-# CHECK: mrs      x14, trcacvr14
+# CHECK: mrs      x14, {{trcacvr14|TRCACVR14}}
 0x23 0x2e 0x31 0xd5
-# CHECK: mrs      x3, trcacvr15
+# CHECK: mrs      x3, {{trcacvr15|TRCACVR15}}
 0x55 0x20 0x31 0xd5
-# CHECK: mrs      x21, trcacatr0
+# CHECK: mrs      x21, {{trcacatr0|TRCACATR0}}
 0x5a 0x22 0x31 0xd5
-# CHECK: mrs      x26, trcacatr1
+# CHECK: mrs      x26, {{trcacatr1|TRCACATR1}}
 0x48 0x24 0x31 0xd5
-# CHECK: mrs      x8, trcacatr2
+# CHECK: mrs      x8, {{trcacatr2|TRCACATR2}}
 0x56 0x26 0x31 0xd5
-# CHECK: mrs      x22, trcacatr3
+# CHECK: mrs      x22, {{trcacatr3|TRCACATR3}}
 0x46 0x28 0x31 0xd5
-# CHECK: mrs      x6, trcacatr4
+# CHECK: mrs      x6, {{trcacatr4|TRCACATR4}}
 0x5d 0x2a 0x31 0xd5
-# CHECK: mrs      x29, trcacatr5
+# CHECK: mrs      x29, {{trcacatr5|TRCACATR5}}
 0x45 0x2c 0x31 0xd5
-# CHECK: mrs      x5, trcacatr6
+# CHECK: mrs      x5, {{trcacatr6|TRCACATR6}}
 0x52 0x2e 0x31 0xd5
-# CHECK: mrs      x18, trcacatr7
+# CHECK: mrs      x18, {{trcacatr7|TRCACATR7}}
 0x62 0x20 0x31 0xd5
-# CHECK: mrs      x2, trcacatr8
+# CHECK: mrs      x2, {{trcacatr8|TRCACATR8}}
 0x73 0x22 0x31 0xd5
-# CHECK: mrs      x19, trcacatr9
+# CHECK: mrs      x19, {{trcacatr9|TRCACATR9}}
 0x6d 0x24 0x31 0xd5
-# CHECK: mrs      x13, trcacatr10
+# CHECK: mrs      x13, {{trcacatr10|TRCACATR10}}
 0x79 0x26 0x31 0xd5
-# CHECK: mrs      x25, trcacatr11
+# CHECK: mrs      x25, {{trcacatr11|TRCACATR11}}
 0x72 0x28 0x31 0xd5
-# CHECK: mrs      x18, trcacatr12
+# CHECK: mrs      x18, {{trcacatr12|TRCACATR12}}
 0x7d 0x2a 0x31 0xd5
-# CHECK: mrs      x29, trcacatr13
+# CHECK: mrs      x29, {{trcacatr13|TRCACATR13}}
 0x69 0x2c 0x31 0xd5
-# CHECK: mrs      x9, trcacatr14
+# CHECK: mrs      x9, {{trcacatr14|TRCACATR14}}
 0x72 0x2e 0x31 0xd5
-# CHECK: mrs      x18, trcacatr15
+# CHECK: mrs      x18, {{trcacatr15|TRCACATR15}}
 0x9d 0x20 0x31 0xd5
-# CHECK: mrs      x29, trcdvcvr0
+# CHECK: mrs      x29, {{trcdvcvr0|TRCDVCVR0}}
 0x8f 0x24 0x31 0xd5
-# CHECK: mrs      x15, trcdvcvr1
+# CHECK: mrs      x15, {{trcdvcvr1|TRCDVCVR1}}
 0x8f 0x28 0x31 0xd5
-# CHECK: mrs      x15, trcdvcvr2
+# CHECK: mrs      x15, {{trcdvcvr2|TRCDVCVR2}}
 0x8f 0x2c 0x31 0xd5
-# CHECK: mrs      x15, trcdvcvr3
+# CHECK: mrs      x15, {{trcdvcvr3|TRCDVCVR3}}
 0xb3 0x20 0x31 0xd5
-# CHECK: mrs      x19, trcdvcvr4
+# CHECK: mrs      x19, {{trcdvcvr4|TRCDVCVR4}}
 0xb6 0x24 0x31 0xd5
-# CHECK: mrs      x22, trcdvcvr5
+# CHECK: mrs      x22, {{trcdvcvr5|TRCDVCVR5}}
 0xbb 0x28 0x31 0xd5
-# CHECK: mrs      x27, trcdvcvr6
+# CHECK: mrs      x27, {{trcdvcvr6|TRCDVCVR6}}
 0xa1 0x2c 0x31 0xd5
-# CHECK: mrs      x1, trcdvcvr7
+# CHECK: mrs      x1, {{trcdvcvr7|TRCDVCVR7}}
 0xdd 0x20 0x31 0xd5
-# CHECK: mrs      x29, trcdvcmr0
+# CHECK: mrs      x29, {{trcdvcmr0|TRCDVCMR0}}
 0xc9 0x24 0x31 0xd5
-# CHECK: mrs      x9, trcdvcmr1
+# CHECK: mrs      x9, {{trcdvcmr1|TRCDVCMR1}}
 0xc1 0x28 0x31 0xd5
-# CHECK: mrs      x1, trcdvcmr2
+# CHECK: mrs      x1, {{trcdvcmr2|TRCDVCMR2}}
 0xc2 0x2c 0x31 0xd5
-# CHECK: mrs      x2, trcdvcmr3
+# CHECK: mrs      x2, {{trcdvcmr3|TRCDVCMR3}}
 0xe5 0x20 0x31 0xd5
-# CHECK: mrs      x5, trcdvcmr4
+# CHECK: mrs      x5, {{trcdvcmr4|TRCDVCMR4}}
 0xf5 0x24 0x31 0xd5
-# CHECK: mrs      x21, trcdvcmr5
+# CHECK: mrs      x21, {{trcdvcmr5|TRCDVCMR5}}
 0xe5 0x28 0x31 0xd5
-# CHECK: mrs      x5, trcdvcmr6
+# CHECK: mrs      x5, {{trcdvcmr6|TRCDVCMR6}}
 0xe1 0x2c 0x31 0xd5
-# CHECK: mrs      x1, trcdvcmr7
+# CHECK: mrs      x1, {{trcdvcmr7|TRCDVCMR7}}
 0x15 0x30 0x31 0xd5
-# CHECK: mrs      x21, trccidcvr0
+# CHECK: mrs      x21, {{trccidcvr0|TRCCIDCVR0}}
 0x18 0x32 0x31 0xd5
-# CHECK: mrs      x24, trccidcvr1
+# CHECK: mrs      x24, {{trccidcvr1|TRCCIDCVR1}}
 0x18 0x34 0x31 0xd5
-# CHECK: mrs      x24, trccidcvr2
+# CHECK: mrs      x24, {{trccidcvr2|TRCCIDCVR2}}
 0xc 0x36 0x31 0xd5
-# CHECK: mrs      x12, trccidcvr3
+# CHECK: mrs      x12, {{trccidcvr3|TRCCIDCVR3}}
 0xa 0x38 0x31 0xd5
-# CHECK: mrs      x10, trccidcvr4
+# CHECK: mrs      x10, {{trccidcvr4|TRCCIDCVR4}}
 0x9 0x3a 0x31 0xd5
-# CHECK: mrs      x9, trccidcvr5
+# CHECK: mrs      x9, {{trccidcvr5|TRCCIDCVR5}}
 0x6 0x3c 0x31 0xd5
-# CHECK: mrs      x6, trccidcvr6
+# CHECK: mrs      x6, {{trccidcvr6|TRCCIDCVR6}}
 0x14 0x3e 0x31 0xd5
-# CHECK: mrs      x20, trccidcvr7
+# CHECK: mrs      x20, {{trccidcvr7|TRCCIDCVR7}}
 0x34 0x30 0x31 0xd5
-# CHECK: mrs      x20, trcvmidcvr0
+# CHECK: mrs      x20, {{trcvmidcvr0|TRCVMIDCVR0}}
 0x34 0x32 0x31 0xd5
-# CHECK: mrs      x20, trcvmidcvr1
+# CHECK: mrs      x20, {{trcvmidcvr1|TRCVMIDCVR1}}
 0x3a 0x34 0x31 0xd5
-# CHECK: mrs      x26, trcvmidcvr2
+# CHECK: mrs      x26, {{trcvmidcvr2|TRCVMIDCVR2}}
 0x21 0x36 0x31 0xd5
-# CHECK: mrs      x1, trcvmidcvr3
+# CHECK: mrs      x1, {{trcvmidcvr3|TRCVMIDCVR3}}
 0x2e 0x38 0x31 0xd5
-# CHECK: mrs      x14, trcvmidcvr4
+# CHECK: mrs      x14, {{trcvmidcvr4|TRCVMIDCVR4}}
 0x3b 0x3a 0x31 0xd5
-# CHECK: mrs      x27, trcvmidcvr5
+# CHECK: mrs      x27, {{trcvmidcvr5|TRCVMIDCVR5}}
 0x3d 0x3c 0x31 0xd5
-# CHECK: mrs      x29, trcvmidcvr6
+# CHECK: mrs      x29, {{trcvmidcvr6|TRCVMIDCVR6}}
 0x31 0x3e 0x31 0xd5
-# CHECK: mrs      x17, trcvmidcvr7
+# CHECK: mrs      x17, {{trcvmidcvr7|TRCVMIDCVR7}}
 0x4a 0x30 0x31 0xd5
-# CHECK: mrs      x10, trccidcctlr0
+# CHECK: mrs      x10, {{trccidcctlr0|TRCCIDCCTLR0}}
 0x44 0x31 0x31 0xd5
-# CHECK: mrs      x4, trccidcctlr1
+# CHECK: mrs      x4, {{trccidcctlr1|TRCCIDCCTLR1}}
 0x49 0x32 0x31 0xd5
-# CHECK: mrs      x9, trcvmidcctlr0
+# CHECK: mrs      x9, {{trcvmidcctlr0|TRCVMIDCCTLR0}}
 0x4b 0x33 0x31 0xd5
-# CHECK: mrs      x11, trcvmidcctlr1
+# CHECK: mrs      x11, {{trcvmidcctlr1|TRCVMIDCCTLR1}}
 0x96 0x70 0x31 0xd5
-# CHECK: mrs      x22, trcitctrl
+# CHECK: mrs      x22, {{trcitctrl|TRCITCTRL}}
 0xd7 0x78 0x31 0xd5
-# CHECK: mrs      x23, trcclaimset
+# CHECK: mrs      x23, {{trcclaimset|TRCCLAIMSET}}
 0xce 0x79 0x31 0xd5
-# CHECK: mrs      x14, trcclaimclr
+# CHECK: mrs      x14, {{trcclaimclr|TRCCLAIMCLR}}
 0x9c 0x10 0x11 0xd5
-# CHECK: msr      trcoslar, x28
+# CHECK: msr      {{trcoslar|TRCOSLAR}}, x28
 0xce 0x7c 0x11 0xd5
-# CHECK: msr      trclar, x14
+# CHECK: msr      {{trclar|TRCLAR}}, x14
 0xa 0x1 0x11 0xd5
-# CHECK: msr      trcprgctlr, x10
+# CHECK: msr      {{trcprgctlr|TRCPRGCTLR}}, x10
 0x1b 0x2 0x11 0xd5
-# CHECK: msr      trcprocselr, x27
+# CHECK: msr      {{trcprocselr|TRCPROCSELR}}, x27
 0x18 0x4 0x11 0xd5
-# CHECK: msr      trcconfigr, x24
+# CHECK: msr      {{trcconfigr|TRCCONFIGR}}, x24
 0x8 0x6 0x11 0xd5
-# CHECK: msr      trcauxctlr, x8
+# CHECK: msr      {{trcauxctlr|TRCAUXCTLR}}, x8
 0x10 0x8 0x11 0xd5
-# CHECK: msr      trceventctl0r, x16
+# CHECK: msr      {{trceventctl0r|TRCEVENTCTL0R}}, x16
 0x1b 0x9 0x11 0xd5
-# CHECK: msr      trceventctl1r, x27
+# CHECK: msr      {{trceventctl1r|TRCEVENTCTL1R}}, x27
 0x1a 0xb 0x11 0xd5
-# CHECK: msr      trcstallctlr, x26
+# CHECK: msr      {{trcstallctlr|TRCSTALLCTLR}}, x26
 0x0 0xc 0x11 0xd5
-# CHECK: msr      trctsctlr, x0
+# CHECK: msr      {{trctsctlr|TRCTSCTLR}}, x0
 0xe 0xd 0x11 0xd5
-# CHECK: msr      trcsyncpr, x14
+# CHECK: msr      {{trcsyncpr|TRCSYNCPR}}, x14
 0x8 0xe 0x11 0xd5
-# CHECK: msr      trcccctlr, x8
+# CHECK: msr      {{trcccctlr|TRCCCCTLR}}, x8
 0x6 0xf 0x11 0xd5
-# CHECK: msr      trcbbctlr, x6
+# CHECK: msr      {{trcbbctlr|TRCBBCTLR}}, x6
 0x37 0x0 0x11 0xd5
-# CHECK: msr      trctraceidr, x23
+# CHECK: msr      {{trctraceidr|TRCTRACEIDR}}, x23
 0x25 0x1 0x11 0xd5
-# CHECK: msr      trcqctlr, x5
+# CHECK: msr      {{trcqctlr|TRCQCTLR}}, x5
 0x40 0x0 0x11 0xd5
-# CHECK: msr      trcvictlr, x0
+# CHECK: msr      {{trcvictlr|TRCVICTLR}}, x0
 0x40 0x1 0x11 0xd5
-# CHECK: msr      trcviiectlr, x0
+# CHECK: msr      {{trcviiectlr|TRCVIIECTLR}}, x0
 0x41 0x2 0x11 0xd5
-# CHECK: msr      trcvissctlr, x1
+# CHECK: msr      {{trcvissctlr|TRCVISSCTLR}}, x1
 0x40 0x3 0x11 0xd5
-# CHECK: msr      trcvipcssctlr, x0
+# CHECK: msr      {{trcvipcssctlr|TRCVIPCSSCTLR}}, x0
 0x47 0x8 0x11 0xd5
-# CHECK: msr      trcvdctlr, x7
+# CHECK: msr      {{trcvdctlr|TRCVDCTLR}}, x7
 0x52 0x9 0x11 0xd5
-# CHECK: msr      trcvdsacctlr, x18
+# CHECK: msr      {{trcvdsacctlr|TRCVDSACCTLR}}, x18
 0x58 0xa 0x11 0xd5
-# CHECK: msr      trcvdarcctlr, x24
+# CHECK: msr      {{trcvdarcctlr|TRCVDARCCTLR}}, x24
 0x9c 0x0 0x11 0xd5
-# CHECK: msr      trcseqevr0, x28
+# CHECK: msr      {{trcseqevr0|TRCSEQEVR0}}, x28
 0x95 0x1 0x11 0xd5
-# CHECK: msr      trcseqevr1, x21
+# CHECK: msr      {{trcseqevr1|TRCSEQEVR1}}, x21
 0x90 0x2 0x11 0xd5
-# CHECK: msr      trcseqevr2, x16
+# CHECK: msr      {{trcseqevr2|TRCSEQEVR2}}, x16
 0x90 0x6 0x11 0xd5
-# CHECK: msr      trcseqrstevr, x16
+# CHECK: msr      {{trcseqrstevr|TRCSEQRSTEVR}}, x16
 0x99 0x7 0x11 0xd5
-# CHECK: msr      trcseqstr, x25
+# CHECK: msr      {{trcseqstr|TRCSEQSTR}}, x25
 0x9d 0x8 0x11 0xd5
-# CHECK: msr      trcextinselr, x29
+# CHECK: msr      {{trcextinselr|TRCEXTINSELR}}, x29
 0xb4 0x0 0x11 0xd5
-# CHECK: msr      trccntrldvr0, x20
+# CHECK: msr      {{trccntrldvr0|TRCCNTRLDVR0}}, x20
 0xb4 0x1 0x11 0xd5
-# CHECK: msr      trccntrldvr1, x20
+# CHECK: msr      {{trccntrldvr1|TRCCNTRLDVR1}}, x20
 0xb6 0x2 0x11 0xd5
-# CHECK: msr      trccntrldvr2, x22
+# CHECK: msr      {{trccntrldvr2|TRCCNTRLDVR2}}, x22
 0xac 0x3 0x11 0xd5
-# CHECK: msr      trccntrldvr3, x12
+# CHECK: msr      {{trccntrldvr3|TRCCNTRLDVR3}}, x12
 0xb4 0x4 0x11 0xd5
-# CHECK: msr      trccntctlr0, x20
+# CHECK: msr      {{trccntctlr0|TRCCNTCTLR0}}, x20
 0xa4 0x5 0x11 0xd5
-# CHECK: msr      trccntctlr1, x4
+# CHECK: msr      {{trccntctlr1|TRCCNTCTLR1}}, x4
 0xa8 0x6 0x11 0xd5
-# CHECK: msr      trccntctlr2, x8
+# CHECK: msr      {{trccntctlr2|TRCCNTCTLR2}}, x8
 0xb0 0x7 0x11 0xd5
-# CHECK: msr      trccntctlr3, x16
+# CHECK: msr      {{trccntctlr3|TRCCNTCTLR3}}, x16
 0xa5 0x8 0x11 0xd5
-# CHECK: msr      trccntvr0, x5
+# CHECK: msr      {{trccntvr0|TRCCNTVR0}}, x5
 0xbb 0x9 0x11 0xd5
-# CHECK: msr      trccntvr1, x27
+# CHECK: msr      {{trccntvr1|TRCCNTVR1}}, x27
 0xb5 0xa 0x11 0xd5
-# CHECK: msr      trccntvr2, x21
+# CHECK: msr      {{trccntvr2|TRCCNTVR2}}, x21
 0xa8 0xb 0x11 0xd5
-# CHECK: msr      trccntvr3, x8
+# CHECK: msr      {{trccntvr3|TRCCNTVR3}}, x8
 0xe6 0x0 0x11 0xd5
-# CHECK: msr      trcimspec0, x6
+# CHECK: msr      {{trcimspec0|TRCIMSPEC0}}, x6
 0xfb 0x1 0x11 0xd5
-# CHECK: msr      trcimspec1, x27
+# CHECK: msr      {{trcimspec1|TRCIMSPEC1}}, x27
 0xf7 0x2 0x11 0xd5
-# CHECK: msr      trcimspec2, x23
+# CHECK: msr      {{trcimspec2|TRCIMSPEC2}}, x23
 0xef 0x3 0x11 0xd5
-# CHECK: msr      trcimspec3, x15
+# CHECK: msr      {{trcimspec3|TRCIMSPEC3}}, x15
 0xed 0x4 0x11 0xd5
-# CHECK: msr      trcimspec4, x13
+# CHECK: msr      {{trcimspec4|TRCIMSPEC4}}, x13
 0xf9 0x5 0x11 0xd5
-# CHECK: msr      trcimspec5, x25
+# CHECK: msr      {{trcimspec5|TRCIMSPEC5}}, x25
 0xf3 0x6 0x11 0xd5
-# CHECK: msr      trcimspec6, x19
+# CHECK: msr      {{trcimspec6|TRCIMSPEC6}}, x19
 0xfb 0x7 0x11 0xd5
-# CHECK: msr      trcimspec7, x27
+# CHECK: msr      {{trcimspec7|TRCIMSPEC7}}, x27
 0x4 0x12 0x11 0xd5
-# CHECK: msr      trcrsctlr2, x4
+# CHECK: msr      {{trcrsctlr2|TRCRSCTLR2}}, x4
 0x0 0x13 0x11 0xd5
-# CHECK: msr      trcrsctlr3, x0
+# CHECK: msr      {{trcrsctlr3|TRCRSCTLR3}}, x0
 0x15 0x14 0x11 0xd5
-# CHECK: msr      trcrsctlr4, x21
+# CHECK: msr      {{trcrsctlr4|TRCRSCTLR4}}, x21
 0x8 0x15 0x11 0xd5
-# CHECK: msr      trcrsctlr5, x8
+# CHECK: msr      {{trcrsctlr5|TRCRSCTLR5}}, x8
 0x14 0x16 0x11 0xd5
-# CHECK: msr      trcrsctlr6, x20
+# CHECK: msr      {{trcrsctlr6|TRCRSCTLR6}}, x20
 0xb 0x17 0x11 0xd5
-# CHECK: msr      trcrsctlr7, x11
+# CHECK: msr      {{trcrsctlr7|TRCRSCTLR7}}, x11
 0x12 0x18 0x11 0xd5
-# CHECK: msr      trcrsctlr8, x18
+# CHECK: msr      {{trcrsctlr8|TRCRSCTLR8}}, x18
 0x18 0x19 0x11 0xd5
-# CHECK: msr      trcrsctlr9, x24
+# CHECK: msr      {{trcrsctlr9|TRCRSCTLR9}}, x24
 0xf 0x1a 0x11 0xd5
-# CHECK: msr      trcrsctlr10, x15
+# CHECK: msr      {{trcrsctlr10|TRCRSCTLR10}}, x15
 0x15 0x1b 0x11 0xd5
-# CHECK: msr      trcrsctlr11, x21
+# CHECK: msr      {{trcrsctlr11|TRCRSCTLR11}}, x21
 0x4 0x1c 0x11 0xd5
-# CHECK: msr      trcrsctlr12, x4
+# CHECK: msr      {{trcrsctlr12|TRCRSCTLR12}}, x4
 0x1c 0x1d 0x11 0xd5
-# CHECK: msr      trcrsctlr13, x28
+# CHECK: msr      {{trcrsctlr13|TRCRSCTLR13}}, x28
 0x3 0x1e 0x11 0xd5
-# CHECK: msr      trcrsctlr14, x3
+# CHECK: msr      {{trcrsctlr14|TRCRSCTLR14}}, x3
 0x14 0x1f 0x11 0xd5
-# CHECK: msr      trcrsctlr15, x20
+# CHECK: msr      {{trcrsctlr15|TRCRSCTLR15}}, x20
 0x2c 0x10 0x11 0xd5
-# CHECK: msr      trcrsctlr16, x12
+# CHECK: msr      {{trcrsctlr16|TRCRSCTLR16}}, x12
 0x31 0x11 0x11 0xd5
-# CHECK: msr      trcrsctlr17, x17
+# CHECK: msr      {{trcrsctlr17|TRCRSCTLR17}}, x17
 0x2a 0x12 0x11 0xd5
-# CHECK: msr      trcrsctlr18, x10
+# CHECK: msr      {{trcrsctlr18|TRCRSCTLR18}}, x10
 0x2b 0x13 0x11 0xd5
-# CHECK: msr      trcrsctlr19, x11
+# CHECK: msr      {{trcrsctlr19|TRCRSCTLR19}}, x11
 0x23 0x14 0x11 0xd5
-# CHECK: msr      trcrsctlr20, x3
+# CHECK: msr      {{trcrsctlr20|TRCRSCTLR20}}, x3
 0x32 0x15 0x11 0xd5
-# CHECK: msr      trcrsctlr21, x18
+# CHECK: msr      {{trcrsctlr21|TRCRSCTLR21}}, x18
 0x3a 0x16 0x11 0xd5
-# CHECK: msr      trcrsctlr22, x26
+# CHECK: msr      {{trcrsctlr22|TRCRSCTLR22}}, x26
 0x25 0x17 0x11 0xd5
-# CHECK: msr      trcrsctlr23, x5
+# CHECK: msr      {{trcrsctlr23|TRCRSCTLR23}}, x5
 0x39 0x18 0x11 0xd5
-# CHECK: msr      trcrsctlr24, x25
+# CHECK: msr      {{trcrsctlr24|TRCRSCTLR24}}, x25
 0x25 0x19 0x11 0xd5
-# CHECK: msr      trcrsctlr25, x5
+# CHECK: msr      {{trcrsctlr25|TRCRSCTLR25}}, x5
 0x24 0x1a 0x11 0xd5
-# CHECK: msr      trcrsctlr26, x4
+# CHECK: msr      {{trcrsctlr26|TRCRSCTLR26}}, x4
 0x34 0x1b 0x11 0xd5
-# CHECK: msr      trcrsctlr27, x20
+# CHECK: msr      {{trcrsctlr27|TRCRSCTLR27}}, x20
 0x25 0x1c 0x11 0xd5
-# CHECK: msr      trcrsctlr28, x5
+# CHECK: msr      {{trcrsctlr28|TRCRSCTLR28}}, x5
 0x2a 0x1d 0x11 0xd5
-# CHECK: msr      trcrsctlr29, x10
+# CHECK: msr      {{trcrsctlr29|TRCRSCTLR29}}, x10
 0x38 0x1e 0x11 0xd5
-# CHECK: msr      trcrsctlr30, x24
+# CHECK: msr      {{trcrsctlr30|TRCRSCTLR30}}, x24
 0x34 0x1f 0x11 0xd5
-# CHECK: msr      trcrsctlr31, x20
+# CHECK: msr      {{trcrsctlr31|TRCRSCTLR31}}, x20
 0x57 0x10 0x11 0xd5
-# CHECK: msr      trcssccr0, x23
+# CHECK: msr      {{trcssccr0|TRCSSCCR0}}, x23
 0x5b 0x11 0x11 0xd5
-# CHECK: msr      trcssccr1, x27
+# CHECK: msr      {{trcssccr1|TRCSSCCR1}}, x27
 0x5b 0x12 0x11 0xd5
-# CHECK: msr      trcssccr2, x27
+# CHECK: msr      {{trcssccr2|TRCSSCCR2}}, x27
 0x46 0x13 0x11 0xd5
-# CHECK: msr      trcssccr3, x6
+# CHECK: msr      {{trcssccr3|TRCSSCCR3}}, x6
 0x43 0x14 0x11 0xd5
-# CHECK: msr      trcssccr4, x3
+# CHECK: msr      {{trcssccr4|TRCSSCCR4}}, x3
 0x4c 0x15 0x11 0xd5
-# CHECK: msr      trcssccr5, x12
+# CHECK: msr      {{trcssccr5|TRCSSCCR5}}, x12
 0x47 0x16 0x11 0xd5
-# CHECK: msr      trcssccr6, x7
+# CHECK: msr      {{trcssccr6|TRCSSCCR6}}, x7
 0x46 0x17 0x11 0xd5
-# CHECK: msr      trcssccr7, x6
+# CHECK: msr      {{trcssccr7|TRCSSCCR7}}, x6
 0x54 0x18 0x11 0xd5
-# CHECK: msr      trcsscsr0, x20
+# CHECK: msr      {{trcsscsr0|TRCSSCSR0}}, x20
 0x51 0x19 0x11 0xd5
-# CHECK: msr      trcsscsr1, x17
+# CHECK: msr      {{trcsscsr1|TRCSSCSR1}}, x17
 0x4b 0x1a 0x11 0xd5
-# CHECK: msr      trcsscsr2, x11
+# CHECK: msr      {{trcsscsr2|TRCSSCSR2}}, x11
 0x44 0x1b 0x11 0xd5
-# CHECK: msr      trcsscsr3, x4
+# CHECK: msr      {{trcsscsr3|TRCSSCSR3}}, x4
 0x4e 0x1c 0x11 0xd5
-# CHECK: msr      trcsscsr4, x14
+# CHECK: msr      {{trcsscsr4|TRCSSCSR4}}, x14
 0x56 0x1d 0x11 0xd5
-# CHECK: msr      trcsscsr5, x22
+# CHECK: msr      {{trcsscsr5|TRCSSCSR5}}, x22
 0x43 0x1e 0x11 0xd5
-# CHECK: msr      trcsscsr6, x3
+# CHECK: msr      {{trcsscsr6|TRCSSCSR6}}, x3
 0x4b 0x1f 0x11 0xd5
-# CHECK: msr      trcsscsr7, x11
+# CHECK: msr      {{trcsscsr7|TRCSSCSR7}}, x11
 0x83 0x14 0x11 0xd5
-# CHECK: msr      trcpdcr, x3
+# CHECK: msr      {{trcpdcr|TRCPDCR}}, x3
 0x6 0x20 0x11 0xd5
-# CHECK: msr      trcacvr0, x6
+# CHECK: msr      {{trcacvr0|TRCACVR0}}, x6
 0x14 0x22 0x11 0xd5
-# CHECK: msr      trcacvr1, x20
+# CHECK: msr      {{trcacvr1|TRCACVR1}}, x20
 0x19 0x24 0x11 0xd5
-# CHECK: msr      trcacvr2, x25
+# CHECK: msr      {{trcacvr2|TRCACVR2}}, x25
 0x1 0x26 0x11 0xd5
-# CHECK: msr      trcacvr3, x1
+# CHECK: msr      {{trcacvr3|TRCACVR3}}, x1
 0x1c 0x28 0x11 0xd5
-# CHECK: msr      trcacvr4, x28
+# CHECK: msr      {{trcacvr4|TRCACVR4}}, x28
 0xf 0x2a 0x11 0xd5
-# CHECK: msr      trcacvr5, x15
+# CHECK: msr      {{trcacvr5|TRCACVR5}}, x15
 0x19 0x2c 0x11 0xd5
-# CHECK: msr      trcacvr6, x25
+# CHECK: msr      {{trcacvr6|TRCACVR6}}, x25
 0xc 0x2e 0x11 0xd5
-# CHECK: msr      trcacvr7, x12
+# CHECK: msr      {{trcacvr7|TRCACVR7}}, x12
 0x25 0x20 0x11 0xd5
-# CHECK: msr      trcacvr8, x5
+# CHECK: msr      {{trcacvr8|TRCACVR8}}, x5
 0x39 0x22 0x11 0xd5
-# CHECK: msr      trcacvr9, x25
+# CHECK: msr      {{trcacvr9|TRCACVR9}}, x25
 0x2d 0x24 0x11 0xd5
-# CHECK: msr      trcacvr10, x13
+# CHECK: msr      {{trcacvr10|TRCACVR10}}, x13
 0x2a 0x26 0x11 0xd5
-# CHECK: msr      trcacvr11, x10
+# CHECK: msr      {{trcacvr11|TRCACVR11}}, x10
 0x33 0x28 0x11 0xd5
-# CHECK: msr      trcacvr12, x19
+# CHECK: msr      {{trcacvr12|TRCACVR12}}, x19
 0x2a 0x2a 0x11 0xd5
-# CHECK: msr      trcacvr13, x10
+# CHECK: msr      {{trcacvr13|TRCACVR13}}, x10
 0x33 0x2c 0x11 0xd5
-# CHECK: msr      trcacvr14, x19
+# CHECK: msr      {{trcacvr14|TRCACVR14}}, x19
 0x22 0x2e 0x11 0xd5
-# CHECK: msr      trcacvr15, x2
+# CHECK: msr      {{trcacvr15|TRCACVR15}}, x2
 0x4f 0x20 0x11 0xd5
-# CHECK: msr      trcacatr0, x15
+# CHECK: msr      {{trcacatr0|TRCACATR0}}, x15
 0x4d 0x22 0x11 0xd5
-# CHECK: msr      trcacatr1, x13
+# CHECK: msr      {{trcacatr1|TRCACATR1}}, x13
 0x48 0x24 0x11 0xd5
-# CHECK: msr      trcacatr2, x8
+# CHECK: msr      {{trcacatr2|TRCACATR2}}, x8
 0x41 0x26 0x11 0xd5
-# CHECK: msr      trcacatr3, x1
+# CHECK: msr      {{trcacatr3|TRCACATR3}}, x1
 0x4b 0x28 0x11 0xd5
-# CHECK: msr      trcacatr4, x11
+# CHECK: msr      {{trcacatr4|TRCACATR4}}, x11
 0x48 0x2a 0x11 0xd5
-# CHECK: msr      trcacatr5, x8
+# CHECK: msr      {{trcacatr5|TRCACATR5}}, x8
 0x58 0x2c 0x11 0xd5
-# CHECK: msr      trcacatr6, x24
+# CHECK: msr      {{trcacatr6|TRCACATR6}}, x24
 0x46 0x2e 0x11 0xd5
-# CHECK: msr      trcacatr7, x6
+# CHECK: msr      {{trcacatr7|TRCACATR7}}, x6
 0x77 0x20 0x11 0xd5
-# CHECK: msr      trcacatr8, x23
+# CHECK: msr      {{trcacatr8|TRCACATR8}}, x23
 0x65 0x22 0x11 0xd5
-# CHECK: msr      trcacatr9, x5
+# CHECK: msr      {{trcacatr9|TRCACATR9}}, x5
 0x6b 0x24 0x11 0xd5
-# CHECK: msr      trcacatr10, x11
+# CHECK: msr      {{trcacatr10|TRCACATR10}}, x11
 0x6b 0x26 0x11 0xd5
-# CHECK: msr      trcacatr11, x11
+# CHECK: msr      {{trcacatr11|TRCACATR11}}, x11
 0x63 0x28 0x11 0xd5
-# CHECK: msr      trcacatr12, x3
+# CHECK: msr      {{trcacatr12|TRCACATR12}}, x3
 0x7c 0x2a 0x11 0xd5
-# CHECK: msr      trcacatr13, x28
+# CHECK: msr      {{trcacatr13|TRCACATR13}}, x28
 0x79 0x2c 0x11 0xd5
-# CHECK: msr      trcacatr14, x25
+# CHECK: msr      {{trcacatr14|TRCACATR14}}, x25
 0x64 0x2e 0x11 0xd5
-# CHECK: msr      trcacatr15, x4
+# CHECK: msr      {{trcacatr15|TRCACATR15}}, x4
 0x86 0x20 0x11 0xd5
-# CHECK: msr      trcdvcvr0, x6
+# CHECK: msr      {{trcdvcvr0|TRCDVCVR0}}, x6
 0x83 0x24 0x11 0xd5
-# CHECK: msr      trcdvcvr1, x3
+# CHECK: msr      {{trcdvcvr1|TRCDVCVR1}}, x3
 0x85 0x28 0x11 0xd5
-# CHECK: msr      trcdvcvr2, x5
+# CHECK: msr      {{trcdvcvr2|TRCDVCVR2}}, x5
 0x8b 0x2c 0x11 0xd5
-# CHECK: msr      trcdvcvr3, x11
+# CHECK: msr      {{trcdvcvr3|TRCDVCVR3}}, x11
 0xa9 0x20 0x11 0xd5
-# CHECK: msr      trcdvcvr4, x9
+# CHECK: msr      {{trcdvcvr4|TRCDVCVR4}}, x9
 0xae 0x24 0x11 0xd5
-# CHECK: msr      trcdvcvr5, x14
+# CHECK: msr      {{trcdvcvr5|TRCDVCVR5}}, x14
 0xaa 0x28 0x11 0xd5
-# CHECK: msr      trcdvcvr6, x10
+# CHECK: msr      {{trcdvcvr6|TRCDVCVR6}}, x10
 0xac 0x2c 0x11 0xd5
-# CHECK: msr      trcdvcvr7, x12
+# CHECK: msr      {{trcdvcvr7|TRCDVCVR7}}, x12
 0xc8 0x20 0x11 0xd5
-# CHECK: msr      trcdvcmr0, x8
+# CHECK: msr      {{trcdvcmr0|TRCDVCMR0}}, x8
 0xc8 0x24 0x11 0xd5
-# CHECK: msr      trcdvcmr1, x8
+# CHECK: msr      {{trcdvcmr1|TRCDVCMR1}}, x8
 0xd6 0x28 0x11 0xd5
-# CHECK: msr      trcdvcmr2, x22
+# CHECK: msr      {{trcdvcmr2|TRCDVCMR2}}, x22
 0xd6 0x2c 0x11 0xd5
-# CHECK: msr      trcdvcmr3, x22
+# CHECK: msr      {{trcdvcmr3|TRCDVCMR3}}, x22
 0xe5 0x20 0x11 0xd5
-# CHECK: msr      trcdvcmr4, x5
+# CHECK: msr      {{trcdvcmr4|TRCDVCMR4}}, x5
 0xf0 0x24 0x11 0xd5
-# CHECK: msr      trcdvcmr5, x16
+# CHECK: msr      {{trcdvcmr5|TRCDVCMR5}}, x16
 0xfb 0x28 0x11 0xd5
-# CHECK: msr      trcdvcmr6, x27
+# CHECK: msr      {{trcdvcmr6|TRCDVCMR6}}, x27
 0xf5 0x2c 0x11 0xd5
-# CHECK: msr      trcdvcmr7, x21
+# CHECK: msr      {{trcdvcmr7|TRCDVCMR7}}, x21
 0x8 0x30 0x11 0xd5
-# CHECK: msr      trccidcvr0, x8
+# CHECK: msr      {{trccidcvr0|TRCCIDCVR0}}, x8
 0x6 0x32 0x11 0xd5
-# CHECK: msr      trccidcvr1, x6
+# CHECK: msr      {{trccidcvr1|TRCCIDCVR1}}, x6
 0x9 0x34 0x11 0xd5
-# CHECK: msr      trccidcvr2, x9
+# CHECK: msr      {{trccidcvr2|TRCCIDCVR2}}, x9
 0x8 0x36 0x11 0xd5
-# CHECK: msr      trccidcvr3, x8
+# CHECK: msr      {{trccidcvr3|TRCCIDCVR3}}, x8
 0x3 0x38 0x11 0xd5
-# CHECK: msr      trccidcvr4, x3
+# CHECK: msr      {{trccidcvr4|TRCCIDCVR4}}, x3
 0x15 0x3a 0x11 0xd5
-# CHECK: msr      trccidcvr5, x21
+# CHECK: msr      {{trccidcvr5|TRCCIDCVR5}}, x21
 0xc 0x3c 0x11 0xd5
-# CHECK: msr      trccidcvr6, x12
+# CHECK: msr      {{trccidcvr6|TRCCIDCVR6}}, x12
 0x7 0x3e 0x11 0xd5
-# CHECK: msr      trccidcvr7, x7
+# CHECK: msr      {{trccidcvr7|TRCCIDCVR7}}, x7
 0x24 0x30 0x11 0xd5
-# CHECK: msr      trcvmidcvr0, x4
+# CHECK: msr      {{trcvmidcvr0|TRCVMIDCVR0}}, x4
 0x23 0x32 0x11 0xd5
-# CHECK: msr      trcvmidcvr1, x3
+# CHECK: msr      {{trcvmidcvr1|TRCVMIDCVR1}}, x3
 0x29 0x34 0x11 0xd5
-# CHECK: msr      trcvmidcvr2, x9
+# CHECK: msr      {{trcvmidcvr2|TRCVMIDCVR2}}, x9
 0x31 0x36 0x11 0xd5
-# CHECK: msr      trcvmidcvr3, x17
+# CHECK: msr      {{trcvmidcvr3|TRCVMIDCVR3}}, x17
 0x2e 0x38 0x11 0xd5
-# CHECK: msr      trcvmidcvr4, x14
+# CHECK: msr      {{trcvmidcvr4|TRCVMIDCVR4}}, x14
 0x2c 0x3a 0x11 0xd5
-# CHECK: msr      trcvmidcvr5, x12
+# CHECK: msr      {{trcvmidcvr5|TRCVMIDCVR5}}, x12
 0x2a 0x3c 0x11 0xd5
-# CHECK: msr      trcvmidcvr6, x10
+# CHECK: msr      {{trcvmidcvr6|TRCVMIDCVR6}}, x10
 0x23 0x3e 0x11 0xd5
-# CHECK: msr      trcvmidcvr7, x3
+# CHECK: msr      {{trcvmidcvr7|TRCVMIDCVR7}}, x3
 0x4e 0x30 0x11 0xd5
-# CHECK: msr      trccidcctlr0, x14
+# CHECK: msr      {{trccidcctlr0|TRCCIDCCTLR0}}, x14
 0x56 0x31 0x11 0xd5
-# CHECK: msr      trccidcctlr1, x22
+# CHECK: msr      {{trccidcctlr1|TRCCIDCCTLR1}}, x22
 0x48 0x32 0x11 0xd5
-# CHECK: msr      trcvmidcctlr0, x8
+# CHECK: msr      {{trcvmidcctlr0|TRCVMIDCCTLR0}}, x8
 0x4f 0x33 0x11 0xd5
-# CHECK: msr      trcvmidcctlr1, x15
+# CHECK: msr      {{trcvmidcctlr1|TRCVMIDCCTLR1}}, x15
 0x81 0x70 0x11 0xd5
-# CHECK: msr      trcitctrl, x1
+# CHECK: msr      {{trcitctrl|TRCITCTRL}}, x1
 0xc7 0x78 0x11 0xd5
-# CHECK: msr      trcclaimset, x7
+# CHECK: msr      {{trcclaimset|TRCCLAIMSET}}, x7
 0xdd 0x79 0x11 0xd5
-# CHECK: msr      trcclaimclr, x29
+# CHECK: msr      {{trcclaimclr|TRCCLAIMCLR}}, x29
 
 
diff --git a/test/MC/Disassembler/ARM/invalid-thumbv7.txt b/test/MC/Disassembler/ARM/invalid-thumbv7.txt
index 2c84b8a..5257633 100644
--- a/test/MC/Disassembler/ARM/invalid-thumbv7.txt
+++ b/test/MC/Disassembler/ARM/invalid-thumbv7.txt
@@ -21,17 +21,6 @@
 # CHECK: warning: invalid instruction encoding
 # CHECK-NEXT: [0xaf 0xf7 0x44 0x8b]
 
-# Opcode=2249 Name=tBcc Format=ARM_FORMAT_THUMBFRM(25)
-#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
-# -------------------------------------------------------------------------------------------------
-# | 0: 0: 0: 0| 0: 0: 0: 0| 0: 0: 0: 0| 0: 0: 0: 0| 1: 1: 0: 1| 1: 1: 1: 0| 0: 1: 1: 0| 1: 1: 1: 1|
-# -------------------------------------------------------------------------------------------------
-#
-# if cond = '1110' then UNDEFINED
-[0x6f 0xde]
-# CHECK: invalid instruction encoding
-# CHECK-NEXT: [0x6f 0xde]
-
 #------------------------------------------------------------------------------
 # Undefined encoding for it
 #------------------------------------------------------------------------------
@@ -249,34 +238,6 @@
 # CHECK-NEXT: [0xe4 0xe9 0x02 0x46]
 
 #------------------------------------------------------------------------------
-# Undefined encodings for NEON/VFP instructions with invalid predicate bits
-#------------------------------------------------------------------------------
-
-# VABS
-[0x40 0xde 0x00 0x0a]
-# CHECK: invalid instruction encoding
-# CHECK-NEXT: [0x40 0xde 0x00 0x0a]
-
-
-# VMLA
-[0xf0 0xde 0xe0 0x0b]
-# CHECK: invalid instruction encoding
-# CHECK-NEXT: [0xf0 0xde 0xe0 0x0b]
-
-# VMOV/VDUP between scalar and core registers with invalid predicate bits (pred != 0b1110)
-
-# VMOV
-[0x00 0xde 0x10 0x0b]
-# CHECK: invalid instruction encoding
-# CHECK-NEXT: [0x00 0xde 0x10 0x0b]
-
-# VDUP
-[0xff 0xde 0xf0 0xfb]
-# CHECK: invalid instruction encoding
-# CHECK-NEXT: [0xff 0xde 0xf0 0xfb]
-
-
-#------------------------------------------------------------------------------
 # Undefined encodings for NEON vld instructions
 #------------------------------------------------------------------------------
 
diff --git a/test/MC/Disassembler/ARM64/advsimd.txt b/test/MC/Disassembler/ARM64/advsimd.txt
deleted file mode 100644
index 486dd16..0000000
--- a/test/MC/Disassembler/ARM64/advsimd.txt
+++ /dev/null
@@ -1,2282 +0,0 @@
-# RUN: llvm-mc -triple arm64-apple-darwin -output-asm-variant=1 --disassemble < %s | FileCheck %s
-
-0x00 0xb8 0x20 0x0e
-0x00 0xb8 0x20 0x4e
-0x00 0xb8 0x60 0x0e
-0x00 0xb8 0x60 0x4e
-0x00 0xb8 0xa0 0x0e
-0x00 0xb8 0xa0 0x4e
-
-# CHECK: abs.8b  v0, v0
-# CHECK: abs.16b v0, v0
-# CHECK: abs.4h  v0, v0
-# CHECK: abs.8h  v0, v0
-# CHECK: abs.2s  v0, v0
-# CHECK: abs.4s  v0, v0
-
-0x00 0x84 0x20 0x0e
-0x00 0x84 0x20 0x4e
-0x00 0x84 0x60 0x0e
-0x00 0x84 0x60 0x4e
-0x00 0x84 0xa0 0x0e
-0x00 0x84 0xa0 0x4e
-0x00 0x84 0xe0 0x4e
-
-# CHECK: add.8b  v0, v0, v0
-# CHECK: add.16b v0, v0, v0
-# CHECK: add.4h  v0, v0, v0
-# CHECK: add.8h  v0, v0, v0
-# CHECK: add.2s  v0, v0, v0
-# CHECK: add.4s  v0, v0, v0
-# CHECK: add.2d  v0, v0, v0
-
-0x41 0x84 0xe3 0x5e
-
-# CHECK: add d1, d2, d3
-
-0x00 0x40 0x20 0x0e
-0x00 0x40 0x20 0x4e
-0x00 0x40 0x60 0x0e
-0x00 0x40 0x60 0x4e
-0x00 0x40 0xa0 0x0e
-0x00 0x40 0xa0 0x4e
-
-# CHECK: addhn.8b   v0, v0, v0
-# CHECK: addhn2.16b v0, v0, v0
-# CHECK: addhn.4h   v0, v0, v0
-# CHECK: addhn2.8h  v0, v0, v0
-# CHECK: addhn.2s   v0, v0, v0
-# CHECK: addhn2.4s  v0, v0, v0
-
-0x00 0xbc 0x20 0x0e
-0x00 0xbc 0x20 0x4e
-0x00 0xbc 0x60 0x0e
-0x00 0xbc 0x60 0x4e
-0x00 0xbc 0xa0 0x0e
-0x00 0xbc 0xa0 0x4e
-0x00 0xbc 0xe0 0x4e
-
-# CHECK: addp.8b   v0, v0, v0
-# CHECK: addp.16b  v0, v0, v0
-# CHECK: addp.4h   v0, v0, v0
-# CHECK: addp.8h   v0, v0, v0
-# CHECK: addp.2s   v0, v0, v0
-# CHECK: addp.4s   v0, v0, v0
-# CHECK: addp.2d   v0, v0, v0
-
-0x00 0xb8 0xf1 0x5e
-
-# CHECK: addp.2d d0, v0
-
-0x00 0xb8 0x31 0x0e
-0x00 0xb8 0x31 0x4e
-0x00 0xb8 0x71 0x0e
-0x00 0xb8 0x71 0x4e
-0x00 0xb8 0xb1 0x4e
-
-# CHECK: addv.8b  b0, v0
-# CHECK: addv.16b b0, v0
-# CHECK: addv.4h  h0, v0
-# CHECK: addv.8h  h0, v0
-# CHECK: addv.4s  s0, v0
-
-
-# INS/DUP
-0x60 0x0c 0x08 0x4e
-0x60 0x0c 0x04 0x4e
-0x60 0x0c 0x04 0x0e
-0x60 0x0c 0x02 0x4e
-0x60 0x0c 0x02 0x0e
-0x60 0x0c 0x01 0x4e
-0x60 0x0c 0x01 0x0e
-
-# CHECK: dup.2d  v0, x3
-# CHECK: dup.4s  v0, w3
-# CHECK: dup.2s  v0, w3
-# CHECK: dup.8h  v0, w3
-# CHECK: dup.4h  v0, w3
-# CHECK: dup.16b v0, w3
-# CHECK: dup.8b  v0, w3
-
-0x60 0x04 0x18 0x4e
-0x60 0x04 0x0c 0x0e
-0x60 0x04 0x0c 0x4e
-0x60 0x04 0x06 0x0e
-0x60 0x04 0x06 0x4e
-0x60 0x04 0x03 0x0e
-0x60 0x04 0x03 0x4e
-
-# CHECK: dup.2d  v0, v3[1]
-# CHECK: dup.2s  v0, v3[1]
-# CHECK: dup.4s  v0, v3[1]
-# CHECK: dup.4h  v0, v3[1]
-# CHECK: dup.8h  v0, v3[1]
-# CHECK: dup.8b  v0, v3[1]
-# CHECK: dup.16b v0, v3[1]
-
-
-0x43 0x2c 0x14 0x4e
-0x43 0x2c 0x14 0x4e
-0x43 0x3c 0x14 0x0e
-0x43 0x3c 0x14 0x0e
-0x43 0x3c 0x18 0x4e
-0x43 0x3c 0x18 0x4e
-
-# CHECK: smov.s  x3, v2[2]
-# CHECK: smov.s  x3, v2[2]
-# CHECK: umov.s  w3, v2[2]
-# CHECK: umov.s  w3, v2[2]
-# CHECK: umov.d  x3, v2[1]
-# CHECK: umov.d  x3, v2[1]
-
-0xa2 0x1c 0x18 0x4e
-0xa2 0x1c 0x0c 0x4e
-0xa2 0x1c 0x06 0x4e
-0xa2 0x1c 0x03 0x4e
-
-0xa2 0x1c 0x18 0x4e
-0xa2 0x1c 0x0c 0x4e
-0xa2 0x1c 0x06 0x4e
-0xa2 0x1c 0x03 0x4e
-
-# CHECK: ins.d v2[1], x5
-# CHECK: ins.s v2[1], w5
-# CHECK: ins.h v2[1], w5
-# CHECK: ins.b v2[1], w5
-
-# CHECK: ins.d v2[1], x5
-# CHECK: ins.s v2[1], w5
-# CHECK: ins.h v2[1], w5
-# CHECK: ins.b v2[1], w5
-
-0xe2 0x45 0x18 0x6e
-0xe2 0x25 0x0c 0x6e
-0xe2 0x15 0x06 0x6e
-0xe2 0x0d 0x03 0x6e
-
-0xe2 0x05 0x18 0x6e
-0xe2 0x45 0x1c 0x6e
-0xe2 0x35 0x1e 0x6e
-0xe2 0x2d 0x15 0x6e
-
-# CHECK: ins.d v2[1], v15[1]
-# CHECK: ins.s v2[1], v15[1]
-# CHECK: ins.h v2[1], v15[1]
-# CHECK: ins.b v2[1], v15[1]
-
-# CHECK: ins.d v2[1], v15[0]
-# CHECK: ins.s v2[3], v15[2]
-# CHECK: ins.h v2[7], v15[3]
-# CHECK: ins.b v2[10], v15[5]
-
-0x00 0x1c 0x20 0x0e
-0x00 0x1c 0x20 0x4e
-
-# CHECK: and.8b  v0, v0, v0
-# CHECK: and.16b v0, v0, v0
-
-0x00 0x1c 0x60 0x0e
-
-# CHECK: bic.8b  v0, v0, v0
-
-0x00 0x8c 0x20 0x2e
-0x00 0x3c 0x20 0x0e
-0x00 0x34 0x20 0x0e
-0x00 0x34 0x20 0x2e
-0x00 0x3c 0x20 0x2e
-0x00 0x8c 0x20 0x0e
-0x00 0xd4 0xa0 0x2e
-0x00 0xec 0x20 0x2e
-0x00 0xec 0xa0 0x2e
-0x00 0xd4 0x20 0x2e
-0x00 0xd4 0x20 0x0e
-0x00 0xe4 0x20 0x0e
-0x00 0xe4 0x20 0x2e
-0x00 0xe4 0xa0 0x2e
-0x00 0xfc 0x20 0x2e
-0x00 0xc4 0x20 0x2e
-0x00 0xc4 0x20 0x0e
-0x00 0xf4 0x20 0x2e
-0x00 0xf4 0x20 0x0e
-0x00 0xc4 0xa0 0x2e
-0x00 0xc4 0xa0 0x0e
-0x00 0xf4 0xa0 0x2e
-0x00 0xf4 0xa0 0x0e
-0x00 0xcc 0x20 0x0e
-0x00 0xcc 0xa0 0x0e
-0x00 0xdc 0x20 0x0e
-0x00 0xdc 0x20 0x2e
-0x00 0xfc 0x20 0x0e
-0x00 0xfc 0xa0 0x0e
-0x00 0xd4 0xa0 0x0e
-0x00 0x94 0x20 0x0e
-0x00 0x94 0x20 0x2e
-0x00 0x9c 0x20 0x0e
-0x00 0x9c 0x20 0x2e
-0x00 0x7c 0x20 0x0e
-0x00 0x74 0x20 0x0e
-0x00 0x04 0x20 0x0e
-0x00 0x24 0x20 0x0e
-0x00 0xa4 0x20 0x0e
-0x00 0x64 0x20 0x0e
-0x00 0xac 0x20 0x0e
-0x00 0x6c 0x20 0x0e
-0x00 0x0c 0x20 0x0e
-0x00 0xb4 0x60 0x0e
-0x00 0xb4 0x60 0x2e
-0x00 0x5c 0x20 0x0e
-0x00 0x4c 0x20 0x0e
-0x00 0x2c 0x20 0x0e
-0x00 0x14 0x20 0x0e
-0x00 0x54 0x20 0x0e
-0x00 0x44 0x20 0x0e
-0x00 0x84 0x20 0x2e
-0x00 0x7c 0x20 0x2e
-0x00 0x74 0x20 0x2e
-0x00 0x04 0x20 0x2e
-0x00 0x24 0x20 0x2e
-0x00 0xa4 0x20 0x2e
-0x00 0x64 0x20 0x2e
-0x00 0xac 0x20 0x2e
-0x00 0x6c 0x20 0x2e
-0x00 0x0c 0x20 0x2e
-0x00 0x5c 0x20 0x2e
-0x00 0x4c 0x20 0x2e
-0x00 0x2c 0x20 0x2e
-0x00 0x14 0x20 0x2e
-0x00 0x54 0x20 0x2e
-0x00 0x44 0x20 0x2e
-
-# CHECK: cmeq.8b	v0, v0, v0
-# CHECK: cmge.8b	v0, v0, v0
-# CHECK: cmgt.8b	v0, v0, v0
-# CHECK: cmhi.8b	v0, v0, v0
-# CHECK: cmhs.8b	v0, v0, v0
-# CHECK: cmtst.8b	v0, v0, v0
-# CHECK: fabd.2s	v0, v0, v0
-# CHECK: facge.2s	v0, v0, v0
-# CHECK: facgt.2s	v0, v0, v0
-# CHECK: faddp.2s	v0, v0, v0
-# CHECK: fadd.2s	v0, v0, v0
-# CHECK: fcmeq.2s	v0, v0, v0
-# CHECK: fcmge.2s	v0, v0, v0
-# CHECK: fcmgt.2s	v0, v0, v0
-# CHECK: fdiv.2s	v0, v0, v0
-# CHECK: fmaxnmp.2s	v0, v0, v0
-# CHECK: fmaxnm.2s	v0, v0, v0
-# CHECK: fmaxp.2s	v0, v0, v0
-# CHECK: fmax.2s	v0, v0, v0
-# CHECK: fminnmp.2s	v0, v0, v0
-# CHECK: fminnm.2s	v0, v0, v0
-# CHECK: fminp.2s	v0, v0, v0
-# CHECK: fmin.2s	v0, v0, v0
-# CHECK: fmla.2s	v0, v0, v0
-# CHECK: fmls.2s	v0, v0, v0
-# CHECK: fmulx.2s	v0, v0, v0
-# CHECK: fmul.2s	v0, v0, v0
-# CHECK: frecps.2s	v0, v0, v0
-# CHECK: frsqrts.2s	v0, v0, v0
-# CHECK: fsub.2s	v0, v0, v0
-# CHECK: mla.8b	v0, v0, v0
-# CHECK: mls.8b	v0, v0, v0
-# CHECK: mul.8b	v0, v0, v0
-# CHECK: pmul.8b	v0, v0, v0
-# CHECK: saba.8b	v0, v0, v0
-# CHECK: sabd.8b	v0, v0, v0
-# CHECK: shadd.8b	v0, v0, v0
-# CHECK: shsub.8b	v0, v0, v0
-# CHECK: smaxp.8b	v0, v0, v0
-# CHECK: smax.8b	v0, v0, v0
-# CHECK: sminp.8b	v0, v0, v0
-# CHECK: smin.8b	v0, v0, v0
-# CHECK: sqadd.8b	v0, v0, v0
-# CHECK: sqdmulh.4h v0, v0, v0
-# CHECK: sqrdmulh.4h v0, v0, v0
-# CHECK: sqrshl.8b	v0, v0, v0
-# CHECK: sqshl.8b	v0, v0, v0
-# CHECK: sqsub.8b	v0, v0, v0
-# CHECK: srhadd.8b	v0, v0, v0
-# CHECK: srshl.8b	v0, v0, v0
-# CHECK: sshl.8b	v0, v0, v0
-# CHECK: sub.8b	v0, v0, v0
-# CHECK: uaba.8b	v0, v0, v0
-# CHECK: uabd.8b	v0, v0, v0
-# CHECK: uhadd.8b	v0, v0, v0
-# CHECK: uhsub.8b	v0, v0, v0
-# CHECK: umaxp.8b	v0, v0, v0
-# CHECK: umax.8b	v0, v0, v0
-# CHECK: uminp.8b	v0, v0, v0
-# CHECK: umin.8b	v0, v0, v0
-# CHECK: uqadd.8b	v0, v0, v0
-# CHECK: uqrshl.8b	v0, v0, v0
-# CHECK: uqshl.8b	v0, v0, v0
-# CHECK: uqsub.8b	v0, v0, v0
-# CHECK: urhadd.8b	v0, v0, v0
-# CHECK: urshl.8b	v0, v0, v0
-# CHECK: ushl.8b	v0, v0, v0
-
-0x00 0x1c 0xe0 0x2e
-0x00 0x1c 0xa0 0x2e
-0x00 0x1c 0x60 0x2e
-0x00 0x1c 0x20 0x2e
-0x00 0x1c 0xe0 0x0e
-0x00 0x1c 0xa0 0x0e
-
-# CHECK: bif.8b	v0, v0, v0
-# CHECK: bit.8b	v0, v0, v0
-# CHECK: bsl.8b	v0, v0, v0
-# CHECK: eor.8b	v0, v0, v0
-# CHECK: orn.8b	v0, v0, v0
-# CHECK: orr.8b	v0, v0, v0
-
-0x00 0x68 0x20 0x0e
-0x00 0x68 0x20 0x4e
-0x00 0x68 0x60 0x0e
-0x00 0x68 0x60 0x4e
-0x00 0x68 0xa0 0x0e
-0x00 0x68 0xa0 0x4e
-
-# CHECK: sadalp.4h	v0, v0
-# CHECK: sadalp.8h	v0, v0
-# CHECK: sadalp.2s	v0, v0
-# CHECK: sadalp.4s	v0, v0
-# CHECK: sadalp.1d	v0, v0
-# CHECK: sadalp.2d	v0, v0
-
-0x00 0x48 0x20 0x0e
-0x00 0x48 0x20 0x2e
-0x00 0x58 0x20 0x0e
-0x00 0xf8 0xa0 0x0e
-0x00 0xc8 0x21 0x0e
-0x00 0xc8 0x21 0x2e
-0x00 0xb8 0x21 0x0e
-0x00 0xb8 0x21 0x2e
-0x00 0xa8 0x21 0x0e
-0x00 0xa8 0x21 0x2e
-0x00 0xa8 0xa1 0x0e
-0x00 0xa8 0xa1 0x2e
-0x00 0xb8 0xa1 0x0e
-0x00 0xb8 0xa1 0x2e
-0x00 0xf8 0xa0 0x2e
-0x00 0xd8 0xa1 0x0e
-0x00 0xd8 0xa1 0x2e
-0x00 0xf8 0xa1 0x2e
-0x00 0xb8 0x20 0x2e
-0x00 0x58 0x20 0x2e
-0x00 0x58 0x60 0x2e
-0x00 0x18 0x20 0x0e
-0x00 0x08 0x20 0x2e
-0x00 0x08 0x20 0x0e
-0x00 0x68 0x20 0x0e
-0x00 0x28 0x20 0x0e
-0x00 0xd8 0x21 0x0e
-0x00 0x38 0x21 0x2e
-0x00 0x78 0x20 0x0e
-0x00 0x78 0x20 0x2e
-0x00 0x48 0x21 0x0e
-0x00 0x28 0x21 0x2e
-0x00 0x38 0x20 0x0e
-0x00 0x68 0x20 0x2e
-0x00 0x28 0x20 0x2e
-0x00 0xd8 0x21 0x2e
-0x00 0x48 0x21 0x2e
-0x00 0xc8 0xa1 0x0e
-0x00 0xc8 0xa1 0x2e
-0x00 0x38 0x20 0x2e
-0x00 0x28 0x21 0x0e
-0x00 0x48 0x20 0x0e
-0x00 0x48 0x20 0x2e
-0x00 0x58 0x20 0x0e
-0x00 0xf8 0xa0 0x0e
-0x00 0xc8 0x21 0x0e
-0x00 0xc8 0x21 0x2e
-0x00 0xb8 0x21 0x0e
-0x00 0xb8 0x21 0x2e
-0x00 0xa8 0x21 0x0e
-0x00 0xa8 0x21 0x2e
-0x00 0xa8 0xa1 0x0e
-0x00 0xa8 0xa1 0x2e
-0x00 0xb8 0xa1 0x0e
-0x00 0xb8 0xa1 0x2e
-0x00 0xf8 0xa0 0x2e
-0x00 0xd8 0xa1 0x0e
-0x00 0xd8 0xa1 0x2e
-0x00 0xf8 0xa1 0x2e
-0x00 0xb8 0x20 0x2e
-0x00 0x58 0x20 0x2e
-0x00 0x58 0x60 0x2e
-0x00 0x18 0x20 0x0e
-0x00 0x08 0x20 0x2e
-0x00 0x08 0x20 0x0e
-0x00 0x68 0x20 0x0e
-0x00 0x28 0x20 0x0e
-0x00 0xd8 0x21 0x0e
-0x00 0x38 0x21 0x2e
-0x00 0x78 0x20 0x0e
-0x00 0x78 0x20 0x2e
-0x00 0x48 0x21 0x0e
-0x00 0x28 0x21 0x2e
-0x00 0x38 0x20 0x0e
-0x00 0x68 0x20 0x2e
-0x00 0x28 0x20 0x2e
-0x00 0xd8 0x21 0x2e
-0x00 0x48 0x21 0x2e
-0x00 0xc8 0xa1 0x0e
-0x00 0xc8 0xa1 0x2e
-0x00 0x38 0x20 0x2e
-0x00 0x28 0x21 0x0e
-
-# CHECK: cls.8b	v0, v0
-# CHECK: clz.8b	v0, v0
-# CHECK: cnt.8b	v0, v0
-# CHECK: fabs.2s	v0, v0
-# CHECK: fcvtas.2s	v0, v0
-# CHECK: fcvtau.2s	v0, v0
-# CHECK: fcvtms.2s	v0, v0
-# CHECK: fcvtmu.2s	v0, v0
-# CHECK: fcvtns.2s	v0, v0
-# CHECK: fcvtnu.2s	v0, v0
-# CHECK: fcvtps.2s	v0, v0
-# CHECK: fcvtpu.2s	v0, v0
-# CHECK: fcvtzs.2s	v0, v0
-# CHECK: fcvtzu.2s	v0, v0
-# CHECK: fneg.2s	v0, v0
-# CHECK: frecpe.2s	v0, v0
-# CHECK: frsqrte.2s	v0, v0
-# CHECK: fsqrt.2s	v0, v0
-# CHECK: neg.8b	v0, v0
-# CHECK: not.8b	v0, v0
-# CHECK: rbit.8b	v0, v0
-# CHECK: rev16.8b	v0, v0
-# CHECK: rev32.8b	v0, v0
-# CHECK: rev64.8b	v0, v0
-# CHECK: sadalp.4h	v0, v0
-# CHECK: saddlp.4h	v0, v0
-# CHECK: scvtf.2s	v0, v0
-# CHECK: shll.8h	v0, v0, #8
-# CHECK: sqabs.8b	v0, v0
-# CHECK: sqneg.8b	v0, v0
-# CHECK: sqxtn.8b	v0, v0
-# CHECK: sqxtun.8b	v0, v0
-# CHECK: suqadd.8b	v0, v0
-# CHECK: uadalp.4h	v0, v0
-# CHECK: uaddlp.4h	v0, v0
-# CHECK: ucvtf.2s	v0, v0
-# CHECK: uqxtn.8b	v0, v0
-# CHECK: urecpe.2s	v0, v0
-# CHECK: ursqrte.2s	v0, v0
-# CHECK: usqadd.8b	v0, v0
-# CHECK: xtn.8b	v0, v0
-
-0x00 0x98 0x20 0x0e
-0x00 0x98 0x20 0x4e
-0x00 0x98 0x60 0x0e
-0x00 0x98 0x60 0x4e
-0x00 0x98 0xa0 0x0e
-0x00 0x98 0xa0 0x4e
-0x00 0x98 0xe0 0x4e
-
-# CHECK: cmeq.8b	v0, v0, #0
-# CHECK: cmeq.16b	v0, v0, #0
-# CHECK: cmeq.4h	v0, v0, #0
-# CHECK: cmeq.8h	v0, v0, #0
-# CHECK: cmeq.2s	v0, v0, #0
-# CHECK: cmeq.4s	v0, v0, #0
-# CHECK: cmeq.2d	v0, v0, #0
-
-0x00 0x88 0x20 0x2e
-0x00 0x88 0x20 0x0e
-0x00 0x98 0x20 0x2e
-0x00 0xa8 0x20 0x0e
-0x00 0xd8 0xa0 0x0e
-0x00 0xc8 0xa0 0x2e
-0x00 0xc8 0xa0 0x0e
-0x00 0xd8 0xa0 0x2e
-0x00 0xe8 0xa0 0x0e
-
-# CHECK: cmge.8b	v0, v0, #0
-# CHECK: cmgt.8b	v0, v0, #0
-# CHECK: cmle.8b	v0, v0, #0
-# CHECK: cmlt.8b	v0, v0, #0
-# CHECK: fcmeq.2s	v0, v0, #0
-# CHECK: fcmge.2s	v0, v0, #0
-# CHECK: fcmgt.2s	v0, v0, #0
-# CHECK: fcmle.2s	v0, v0, #0
-# CHECK: fcmlt.2s	v0, v0, #0
-
-0x00 0x78 0x21 0x0e
-0x00 0x78 0x21 0x4e
-0x00 0x78 0x61 0x0e
-0x00 0x78 0x61 0x4e
-0x00 0x68 0x21 0x0e
-0x00 0x68 0x21 0x4e
-0x00 0x68 0x61 0x0e
-0x00 0x68 0x61 0x4e
-0x00 0x68 0x61 0x2e
-0x00 0x68 0x61 0x6e
-
-# CHECK: fcvtl	v0.4s, v0.4h
-# CHECK: fcvtl2	v0.4s, v0.8h
-# CHECK: fcvtl	v0.2d, v0.2s
-# CHECK: fcvtl2	v0.2d, v0.4s
-# CHECK: fcvtn	v0.4h, v0.4s
-# CHECK: fcvtn2	v0.8h, v0.4s
-# CHECK: fcvtn	v0.2s, v0.2d
-# CHECK: fcvtn2	v0.4s, v0.2d
-# CHECK: fcvtxn	v0.2s, v0.2d
-# CHECK: fcvtxn2	v0.4s, v0.2d
-
-#===-------------------------------------------------------------------------===
-# AdvSIMD modified immediate instructions
-#===-------------------------------------------------------------------------===
-
-0x20 0x14 0x00 0x2f
-0x20 0x34 0x00 0x2f
-0x20 0x54 0x00 0x2f
-0x20 0x74 0x00 0x2f
-
-# CHECK: bic.2s v0, #1
-# CHECK: bic.2s v0, #1, lsl #8
-# CHECK: bic.2s v0, #1, lsl #16
-# CHECK: bic.2s v0, #1, lsl #24
-
-0x20 0x94 0x00 0x2f
-0x20 0x94 0x00 0x2f
-0x20 0xb4 0x00 0x2f
-
-# CHECK: bic.4h v0, #1
-# CHECK: bic.4h v0, #1
-# FIXME: bic.4h v0, #1, lsl #8
-#    'bic.4h' should be selected over "fcvtnu.2s v0, v1, #0"
-
-0x20 0x14 0x00 0x6f
-0x20 0x34 0x00 0x6f
-0x20 0x54 0x00 0x6f
-0x20 0x74 0x00 0x6f
-
-# CHECK: bic.4s v0, #1
-# CHECK: bic.4s v0, #1, lsl #8
-# CHECK: bic.4s v0, #1, lsl #16
-# CHECK: bic.4s v0, #1, lsl #24
-
-0x20 0x94 0x00 0x6f
-0x20 0xb4 0x00 0x6f
-
-# CHECK: bic.8h v0, #1
-# FIXME: bic.8h v0, #1, lsl #8
-#    "bic.8h" should be selected over "fcvtnu.4s v0, v1, #0"
-
-0x00 0xf4 0x02 0x6f
-
-# CHECK: fmov.2d v0, #1.250000e-01
-
-0x00 0xf4 0x02 0x0f
-0x00 0xf4 0x02 0x4f
-
-# CHECK: fmov.2s v0, #1.250000e-01
-# CHECK: fmov.4s v0, #1.250000e-01
-
-0x20 0x14 0x00 0x0f
-0x20 0x34 0x00 0x0f
-0x20 0x54 0x00 0x0f
-0x20 0x74 0x00 0x0f
-
-# CHECK: orr.2s v0, #1
-# CHECK: orr.2s v0, #1, lsl #8
-# CHECK: orr.2s v0, #1, lsl #16
-# CHECK: orr.2s v0, #1, lsl #24
-
-0x20 0x94 0x00 0x0f
-0x20 0xb4 0x00 0x0f
-
-# CHECK: orr.4h v0, #1
-# FIXME: orr.4h v0, #1, lsl #8
-#    'orr.4h' should be selected over "fcvtns.2s v0, v1, #0"
-
-0x20 0x14 0x00 0x4f
-0x20 0x34 0x00 0x4f
-0x20 0x54 0x00 0x4f
-0x20 0x74 0x00 0x4f
-
-# CHECK: orr.4s v0, #1
-# CHECK: orr.4s v0, #1, lsl #8
-# CHECK: orr.4s v0, #1, lsl #16
-# CHECK: orr.4s v0, #1, lsl #24
-
-0x20 0x94 0x00 0x4f
-0x20 0xb4 0x00 0x4f
-
-# CHECK: orr.8h v0, #1
-# FIXME: orr.8h v0, #1, lsl #8
-#    "orr.8h" should be selected over "fcvtns.4s v0, v1, #0"
-
-0x21 0x70 0x40 0x0c
-0x42 0xa0 0x40 0x4c
-0x64 0x64 0x40 0x0c
-0x87 0x24 0x40 0x4c
-0x0c 0xa8 0x40 0x0c
-0x0a 0x68 0x40 0x4c
-0x2d 0xac 0x40 0x0c
-0x4f 0x7c 0x40 0x4c
-
-# CHECK: ld1.8b { v1 }, [x1]
-# CHECK: ld1.16b { v2, v3 }, [x2]
-# CHECK: ld1.4h { v4, v5, v6 }, [x3]
-# CHECK: ld1.8h { v7, v8, v9, v10 }, [x4]
-# CHECK: ld1.2s { v12, v13 }, [x0]
-# CHECK: ld1.4s { v10, v11, v12 }, [x0]
-# CHECK: ld1.1d { v13, v14 }, [x1]
-# CHECK: ld1.2d	{ v15 }, [x2]
-
-0x41 0x70 0xdf 0x0c
-0x41 0xa0 0xdf 0x0c
-0x41 0x60 0xdf 0x0c
-0x41 0x20 0xdf 0x0c
-0x42 0x70 0xdf 0x4c
-0x42 0xa0 0xdf 0x4c
-0x42 0x60 0xdf 0x4c
-0x42 0x20 0xdf 0x4c
-0x64 0x74 0xdf 0x0c
-0x64 0xa4 0xdf 0x0c
-0x64 0x64 0xdf 0x0c
-0x64 0x24 0xdf 0x0c
-0x87 0x74 0xdf 0x4c
-0x87 0xa4 0xdf 0x4c
-0x87 0x64 0xdf 0x4c
-0x87 0x24 0xdf 0x4c
-0x0c 0x78 0xdf 0x0c
-0x0c 0xa8 0xdf 0x0c
-0x0c 0x68 0xdf 0x0c
-0x0c 0x28 0xdf 0x0c
-0x0a 0x78 0xdf 0x4c
-0x0a 0xa8 0xdf 0x4c
-0x0a 0x68 0xdf 0x4c
-0x0a 0x28 0xdf 0x4c
-0x2d 0x7c 0xdf 0x0c
-0x2d 0xac 0xdf 0x0c
-0x2d 0x6c 0xdf 0x0c
-0x2d 0x2c 0xdf 0x0c
-0x4f 0x7c 0xdf 0x4c
-0x4f 0xac 0xdf 0x4c
-0x4f 0x6c 0xdf 0x4c
-0x4f 0x2c 0xdf 0x4c
-
-# CHECK: ld1.8b { v1 }, [x2], #8
-# CHECK: ld1.8b { v1, v2 }, [x2], #16
-# CHECK: ld1.8b { v1, v2, v3 }, [x2], #24
-# CHECK: ld1.8b { v1, v2, v3, v4 }, [x2], #32
-# CHECK: ld1.16b { v2 }, [x2], #16
-# CHECK: ld1.16b { v2, v3 }, [x2], #32
-# CHECK: ld1.16b { v2, v3, v4 }, [x2], #48
-# CHECK: ld1.16b { v2, v3, v4, v5 }, [x2], #64
-# CHECK: ld1.4h { v4 }, [x3], #8
-# CHECK: ld1.4h { v4, v5 }, [x3], #16
-# CHECK: ld1.4h { v4, v5, v6 }, [x3], #24
-# CHECK: ld1.4h { v4, v5, v6, v7 }, [x3], #32
-# CHECK: ld1.8h { v7 }, [x4], #16
-# CHECK: ld1.8h { v7, v8 }, [x4], #32
-# CHECK: ld1.8h { v7, v8, v9 }, [x4], #48
-# CHECK: ld1.8h { v7, v8, v9, v10 }, [x4], #64
-# CHECK: ld1.2s { v12 }, [x0], #8
-# CHECK: ld1.2s { v12, v13 }, [x0], #16
-# CHECK: ld1.2s { v12, v13, v14 }, [x0], #24
-# CHECK: ld1.2s { v12, v13, v14, v15 }, [x0], #32
-# CHECK: ld1.4s { v10 }, [x0], #16
-# CHECK: ld1.4s { v10, v11 }, [x0], #32
-# CHECK: ld1.4s { v10, v11, v12 }, [x0], #48
-# CHECK: ld1.4s { v10, v11, v12, v13 }, [x0], #64
-# CHECK: ld1.1d { v13 }, [x1], #8
-# CHECK: ld1.1d { v13, v14 }, [x1], #16
-# CHECK: ld1.1d { v13, v14, v15 }, [x1], #24
-# CHECK: ld1.1d { v13, v14, v15, v16 }, [x1], #32
-# CHECK: ld1.2d { v15 }, [x2], #16
-# CHECK: ld1.2d { v15, v16 }, [x2], #32
-# CHECK: ld1.2d { v15, v16, v17 }, [x2], #48
-# CHECK: ld1.2d { v15, v16, v17, v18 }, [x2], #64
-
-0x21 0x70 0x00 0x0c
-0x42 0xa0 0x00 0x4c
-0x64 0x64 0x00 0x0c
-0x87 0x24 0x00 0x4c
-0x0c 0xa8 0x00 0x0c
-0x0a 0x68 0x00 0x4c
-0x2d 0xac 0x00 0x0c
-0x4f 0x7c 0x00 0x4c
-
-# CHECK: st1.8b { v1 }, [x1]
-# CHECK: st1.16b { v2, v3 }, [x2]
-# CHECK: st1.4h { v4, v5, v6 }, [x3]
-# CHECK: st1.8h { v7, v8, v9, v10 }, [x4]
-# CHECK: st1.2s { v12, v13 }, [x0]
-# CHECK: st1.4s { v10, v11, v12 }, [x0]
-# CHECK: st1.1d { v13, v14 }, [x1]
-# CHECK: st1.2d	{ v15 }, [x2]
-
-0x61 0x08 0x40 0x0d
-0x82 0x84 0x40 0x4d
-0xa3 0x58 0x40 0x0d
-0xc4 0x80 0x40 0x4d
-
-# CHECK: ld1.b { v1 }[2], [x3]
-# CHECK: ld1.d { v2 }[1], [x4]
-# CHECK: ld1.h { v3 }[3], [x5]
-# CHECK: ld1.s { v4 }[2], [x6]
-
-0x61 0x08 0xdf 0x0d
-0x82 0x84 0xdf 0x4d
-0xa3 0x58 0xdf 0x0d
-0xc4 0x80 0xdf 0x4d
-
-# CHECK: ld1.b { v1 }[2], [x3], #1
-# CHECK: ld1.d { v2 }[1], [x4], #8
-# CHECK: ld1.h { v3 }[3], [x5], #2
-# CHECK: ld1.s { v4 }[2], [x6], #4
-
-0x61 0x08 0x00 0x0d
-0x82 0x84 0x00 0x4d
-0xa3 0x58 0x00 0x0d
-0xc4 0x80 0x00 0x4d
-
-# CHECK: st1.b { v1 }[2], [x3]
-# CHECK: st1.d { v2 }[1], [x4]
-# CHECK: st1.h { v3 }[3], [x5]
-# CHECK: st1.s { v4 }[2], [x6]
-
-0x61 0x08 0x9f 0x0d
-0x82 0x84 0x9f 0x4d
-0xa3 0x58 0x9f 0x0d
-0xc4 0x80 0x9f 0x4d
-
-# CHECK: st1.b { v1 }[2], [x3], #1
-# CHECK: st1.d { v2 }[1], [x4], #8
-# CHECK: st1.h { v3 }[3], [x5], #2
-# CHECK: st1.s { v4 }[2], [x6], #4
-
-0x61 0x08 0xc4 0x0d
-0x82 0x84 0xc5 0x4d
-0xa3 0x58 0xc6 0x0d
-0xc4 0x80 0xc7 0x4d
-
-# CHECK: ld1.b { v1 }[2], [x3], x4
-# CHECK: ld1.d { v2 }[1], [x4], x5
-# CHECK: ld1.h { v3 }[3], [x5], x6
-# CHECK: ld1.s { v4 }[2], [x6], x7
-
-0x61 0x08 0x84 0x0d
-0x82 0x84 0x85 0x4d
-0xa3 0x58 0x86 0x0d
-0xc4 0x80 0x87 0x4d
-
-# CHECK: st1.b { v1 }[2], [x3], x4
-# CHECK: st1.d { v2 }[1], [x4], x5
-# CHECK: st1.h { v3 }[3], [x5], x6
-# CHECK: st1.s { v4 }[2], [x6], x7
-
-0x41 0x70 0xc3 0x0c
-0x42 0xa0 0xc4 0x4c
-0x64 0x64 0xc5 0x0c
-0x87 0x24 0xc6 0x4c
-0x0c 0xa8 0xc7 0x0c
-0x0a 0x68 0xc8 0x4c
-0x2d 0xac 0xc9 0x0c
-0x4f 0x7c 0xca 0x4c
-
-# CHECK: ld1.8b { v1 }, [x2], x3
-# CHECK: ld1.16b { v2, v3 }, [x2], x4
-# CHECK: ld1.4h { v4, v5, v6 }, [x3], x5
-# CHECK: ld1.8h { v7, v8, v9, v10 }, [x4], x6
-# CHECK: ld1.2s { v12, v13 }, [x0], x7
-# CHECK: ld1.4s { v10, v11, v12 }, [x0], x8
-# CHECK: ld1.1d { v13, v14 }, [x1], x9
-# CHECK: ld1.2d { v15 }, [x2], x10
-
-0x41 0x70 0x83 0x0c
-0x42 0xa0 0x84 0x4c
-0x64 0x64 0x85 0x0c
-0x87 0x24 0x86 0x4c
-0x0c 0xa8 0x87 0x0c
-0x0a 0x68 0x88 0x4c
-0x2d 0xac 0x89 0x0c
-0x4f 0x7c 0x8a 0x4c
-
-# CHECK: st1.8b { v1 }, [x2], x3
-# CHECK: st1.16b { v2, v3 }, [x2], x4
-# CHECK: st1.4h { v4, v5, v6 }, [x3], x5
-# CHECK: st1.8h { v7, v8, v9, v10 }, [x4], x6
-# CHECK: st1.2s { v12, v13 }, [x0], x7
-# CHECK: st1.4s { v10, v11, v12 }, [x0], x8
-# CHECK: st1.1d { v13, v14 }, [x1], x9
-# CHECK: st1.2d { v15 }, [x2], x10
-
-0x41 0x70 0x9f 0x0c
-0x41 0xa0 0x9f 0x0c
-0x41 0x60 0x9f 0x0c
-0x41 0x20 0x9f 0x0c
-0x42 0x70 0x9f 0x4c
-0x42 0xa0 0x9f 0x4c
-0x42 0x60 0x9f 0x4c
-0x42 0x20 0x9f 0x4c
-0x64 0x74 0x9f 0x0c
-0x64 0xa4 0x9f 0x0c
-0x64 0x64 0x9f 0x0c
-0x64 0x24 0x9f 0x0c
-0x87 0x74 0x9f 0x4c
-0x87 0xa4 0x9f 0x4c
-0x87 0x64 0x9f 0x4c
-0x87 0x24 0x9f 0x4c
-0x0c 0x78 0x9f 0x0c
-0x0c 0xa8 0x9f 0x0c
-0x0c 0x68 0x9f 0x0c
-0x0c 0x28 0x9f 0x0c
-0x0a 0x78 0x9f 0x4c
-0x0a 0xa8 0x9f 0x4c
-0x0a 0x68 0x9f 0x4c
-0x0a 0x28 0x9f 0x4c
-0x2d 0x7c 0x9f 0x0c
-0x2d 0xac 0x9f 0x0c
-0x2d 0x6c 0x9f 0x0c
-0x2d 0x2c 0x9f 0x0c
-0x4f 0x7c 0x9f 0x4c
-0x4f 0xac 0x9f 0x4c
-0x4f 0x6c 0x9f 0x4c
-0x4f 0x2c 0x9f 0x4c
-
-# CHECK: st1.8b { v1 }, [x2], #8
-# CHECK: st1.8b { v1, v2 }, [x2], #16
-# CHECK: st1.8b { v1, v2, v3 }, [x2], #24
-# CHECK: st1.8b { v1, v2, v3, v4 }, [x2], #32
-# CHECK: st1.16b { v2 }, [x2], #16
-# CHECK: st1.16b { v2, v3 }, [x2], #32
-# CHECK: st1.16b { v2, v3, v4 }, [x2], #48
-# CHECK: st1.16b { v2, v3, v4, v5 }, [x2], #64
-# CHECK: st1.4h { v4 }, [x3], #8
-# CHECK: st1.4h { v4, v5 }, [x3], #16
-# CHECK: st1.4h { v4, v5, v6 }, [x3], #24
-# CHECK: st1.4h { v4, v5, v6, v7 }, [x3], #32
-# CHECK: st1.8h { v7 }, [x4], #16
-# CHECK: st1.8h { v7, v8 }, [x4], #32
-# CHECK: st1.8h { v7, v8, v9 }, [x4], #48
-# CHECK: st1.8h { v7, v8, v9, v10 }, [x4], #64
-# CHECK: st1.2s { v12 }, [x0], #8
-# CHECK: st1.2s { v12, v13 }, [x0], #16
-# CHECK: st1.2s { v12, v13, v14 }, [x0], #24
-# CHECK: st1.2s { v12, v13, v14, v15 }, [x0], #32
-# CHECK: st1.4s { v10 }, [x0], #16
-# CHECK: st1.4s { v10, v11 }, [x0], #32
-# CHECK: st1.4s { v10, v11, v12 }, [x0], #48
-# CHECK: st1.4s { v10, v11, v12, v13 }, [x0], #64
-# CHECK: st1.1d { v13 }, [x1], #8
-# CHECK: st1.1d { v13, v14 }, [x1], #16
-# CHECK: st1.1d { v13, v14, v15 }, [x1], #24
-# CHECK: st1.1d { v13, v14, v15, v16 }, [x1], #32
-# CHECK: st1.2d { v15 }, [x2], #16
-# CHECK: st1.2d { v15, v16 }, [x2], #32
-# CHECK: st1.2d { v15, v16, v17 }, [x2], #48
-# CHECK: st1.2d { v15, v16, v17, v18 }, [x2], #64
-
-0x21 0xc0 0x40 0x0d
-0x21 0xc0 0xc2 0x0d
-0x64 0xc4 0x40 0x0d
-0x64 0xc4 0xc5 0x0d
-0xa9 0xc8 0x40 0x0d
-0xa9 0xc8 0xc6 0x0d
-0xec 0xcc 0x40 0x0d
-0xec 0xcc 0xc8 0x0d
-
-# CHECK: ld1r.8b { v1 }, [x1]
-# CHECK: ld1r.8b { v1 }, [x1], x2
-# CHECK: ld1r.4h { v4 }, [x3]
-# CHECK: ld1r.4h { v4 }, [x3], x5
-# CHECK: ld1r.2s { v9 }, [x5]
-# CHECK: ld1r.2s { v9 }, [x5], x6
-# CHECK: ld1r.1d { v12 }, [x7]
-# CHECK: ld1r.1d { v12 }, [x7], x8
-
-0x21 0xc0 0xdf 0x0d
-0x21 0xc4 0xdf 0x0d
-0x21 0xc8 0xdf 0x0d
-0x21 0xcc 0xdf 0x0d
-
-# CHECK: ld1r.8b { v1 }, [x1], #1
-# CHECK: ld1r.4h { v1 }, [x1], #2
-# CHECK: ld1r.2s { v1 }, [x1], #4
-# CHECK: ld1r.1d { v1 }, [x1], #8
-
-0x45 0x80 0x40 0x4c
-0x0a 0x88 0x40 0x0c
-
-# CHECK: ld2.16b { v5, v6 }, [x2]
-# CHECK: ld2.2s { v10, v11 }, [x0]
-
-0x45 0x80 0x00 0x4c
-0x0a 0x88 0x00 0x0c
-
-# CHECK: st2.16b { v5, v6 }, [x2]
-# CHECK: st2.2s { v10, v11 }, [x0]
-
-0x61 0x08 0x20 0x0d
-0x82 0x84 0x20 0x4d
-0xc3 0x50 0x20 0x0d
-0xe4 0x90 0x20 0x4d
-
-# CHECK: st2.b { v1, v2 }[2], [x3]
-# CHECK: st2.d { v2, v3 }[1], [x4]
-# CHECK: st2.h { v3, v4 }[2], [x6]
-# CHECK: st2.s { v4, v5 }[3], [x7]
-
-0x61 0x08 0xbf 0x0d
-0x82 0x84 0xbf 0x4d
-0xa3 0x58 0xbf 0x0d
-0xc4 0x80 0xbf 0x4d
-
-# CHECK: st2.b { v1, v2 }[2], [x3], #2
-# CHECK: st2.d { v2, v3 }[1], [x4], #16
-# CHECK: st2.h { v3, v4 }[3], [x5], #4
-# CHECK: st2.s { v4, v5 }[2], [x6], #8
-
-0x61 0x08 0x60 0x0d
-0x82 0x84 0x60 0x4d
-0xc3 0x50 0x60 0x0d
-0xe4 0x90 0x60 0x4d
-
-# CHECK: ld2.b { v1, v2 }[2], [x3]
-# CHECK: ld2.d { v2, v3 }[1], [x4]
-# CHECK: ld2.h { v3, v4 }[2], [x6]
-# CHECK: ld2.s { v4, v5 }[3], [x7]
-
-0x61 0x08 0xff 0x0d
-0x82 0x84 0xff 0x4d
-0xa3 0x58 0xff 0x0d
-0xc4 0x80 0xff 0x4d
-
-# CHECK: ld2.b { v1, v2 }[2], [x3], #2
-# CHECK: ld2.d { v2, v3 }[1], [x4], #16
-# CHECK: ld2.h { v3, v4 }[3], [x5], #4
-# CHECK: ld2.s { v4, v5 }[2], [x6], #8
-
-0x61 0x08 0xe4 0x0d
-0x82 0x84 0xe6 0x4d
-0xa3 0x58 0xe8 0x0d
-0xc4 0x80 0xea 0x4d
-
-# CHECK: ld2.b { v1, v2 }[2], [x3], x4
-# CHECK: ld2.d { v2, v3 }[1], [x4], x6
-# CHECK: ld2.h { v3, v4 }[3], [x5], x8
-# CHECK: ld2.s { v4, v5 }[2], [x6], x10
-
-0x61 0x08 0xa4 0x0d
-0x82 0x84 0xa6 0x4d
-0xa3 0x58 0xa8 0x0d
-0xc4 0x80 0xaa 0x4d
-
-# CHECK: st2.b { v1, v2 }[2], [x3], x4
-# CHECK: st2.d { v2, v3 }[1], [x4], x6
-# CHECK: st2.h { v3, v4 }[3], [x5], x8
-# CHECK: st2.s { v4, v5 }[2], [x6], x10
-
-0x64 0x84 0xc5 0x0c
-0x0c 0x88 0xc7 0x0c
-
-# CHECK: ld2.4h { v4, v5 }, [x3], x5
-# CHECK: ld2.2s { v12, v13 }, [x0], x7
-
-0x00 0x80 0xdf 0x0c
-0x00 0x80 0xdf 0x4c
-0x00 0x84 0xdf 0x0c
-0x00 0x84 0xdf 0x4c
-0x00 0x88 0xdf 0x0c
-0x00 0x88 0xdf 0x4c
-0x00 0x8c 0xdf 0x4c
-
-# CHECK: ld2.8b { v0, v1 }, [x0], #16
-# CHECK: ld2.16b { v0, v1 }, [x0], #32
-# CHECK: ld2.4h { v0, v1 }, [x0], #16
-# CHECK: ld2.8h { v0, v1 }, [x0], #32
-# CHECK: ld2.2s { v0, v1 }, [x0], #16
-# CHECK: ld2.4s { v0, v1 }, [x0], #32
-# CHECK: ld2.2d { v0, v1 }, [x0], #32
-
-0x64 0x84 0x85 0x0c
-0x0c 0x88 0x87 0x0c
-
-# CHECK: st2.4h { v4, v5 }, [x3], x5
-# CHECK: st2.2s { v12, v13 }, [x0], x7
-
-0x00 0x80 0x9f 0x0c
-0x00 0x80 0x9f 0x4c
-0x00 0x84 0x9f 0x0c
-0x00 0x84 0x9f 0x4c
-0x00 0x88 0x9f 0x0c
-0x00 0x88 0x9f 0x4c
-0x00 0x8c 0x9f 0x4c
-
-# CHECK: st2.8b { v0, v1 }, [x0], #16
-# CHECK: st2.16b { v0, v1 }, [x0], #32
-# CHECK: st2.4h { v0, v1 }, [x0], #16
-# CHECK: st2.8h { v0, v1 }, [x0], #32
-# CHECK: st2.2s { v0, v1 }, [x0], #16
-# CHECK: st2.4s { v0, v1 }, [x0], #32
-# CHECK: st2.2d { v0, v1 }, [x0], #32
-
-0x21 0xc0 0x60 0x0d
-0x21 0xc0 0xe2 0x0d
-0x21 0xc0 0x60 0x4d
-0x21 0xc0 0xe2 0x4d
-0x21 0xc4 0x60 0x0d
-0x21 0xc4 0xe2 0x0d
-0x21 0xc4 0x60 0x4d
-0x21 0xc4 0xe2 0x4d
-0x21 0xc8 0x60 0x0d
-0x21 0xc8 0xe2 0x0d
-0x21 0xcc 0x60 0x4d
-0x21 0xcc 0xe2 0x4d
-0x21 0xcc 0x60 0x0d
-0x21 0xcc 0xe2 0x0d
-
-# CHECK: ld2r.8b { v1, v2 }, [x1]
-# CHECK: ld2r.8b { v1, v2 }, [x1], x2
-# CHECK: ld2r.16b { v1, v2 }, [x1]
-# CHECK: ld2r.16b { v1, v2 }, [x1], x2
-# CHECK: ld2r.4h { v1, v2 }, [x1]
-# CHECK: ld2r.4h { v1, v2 }, [x1], x2
-# CHECK: ld2r.8h { v1, v2 }, [x1]
-# CHECK: ld2r.8h { v1, v2 }, [x1], x2
-# CHECK: ld2r.2s { v1, v2 }, [x1]
-# CHECK: ld2r.2s { v1, v2 }, [x1], x2
-# CHECK: ld2r.2d { v1, v2 }, [x1]
-# CHECK: ld2r.2d { v1, v2 }, [x1], x2
-# CHECK: ld2r.1d { v1, v2 }, [x1]
-# CHECK: ld2r.1d { v1, v2 }, [x1], x2
-
-0x21 0xc0 0xff 0x0d
-0x21 0xc0 0xff 0x4d
-0x21 0xc4 0xff 0x0d
-0x21 0xc4 0xff 0x4d
-0x21 0xc8 0xff 0x0d
-0x21 0xcc 0xff 0x4d
-0x21 0xcc 0xff 0x0d
-
-# CHECK: ld2r.8b { v1, v2 }, [x1], #2
-# CHECK: ld2r.16b { v1, v2 }, [x1], #2
-# CHECK: ld2r.4h { v1, v2 }, [x1], #4
-# CHECK: ld2r.8h { v1, v2 }, [x1], #4
-# CHECK: ld2r.2s { v1, v2 }, [x1], #8
-# CHECK: ld2r.2d { v1, v2 }, [x1], #16
-# CHECK: ld2r.1d { v1, v2 }, [x1], #16
-
-0x21 0x40 0x40 0x0c
-0x45 0x40 0x40 0x4c
-0x0a 0x48 0x40 0x0c
-
-# CHECK: ld3.8b { v1, v2, v3 }, [x1]
-# CHECK: ld3.16b { v5, v6, v7 }, [x2]
-# CHECK: ld3.2s { v10, v11, v12 }, [x0]
-
-0x21 0x40 0x00 0x0c
-0x45 0x40 0x00 0x4c
-0x0a 0x48 0x00 0x0c
-
-# CHECK: st3.8b { v1, v2, v3 }, [x1]
-# CHECK: st3.16b { v5, v6, v7 }, [x2]
-# CHECK: st3.2s { v10, v11, v12 }, [x0]
-
-0x61 0x28 0xc4 0x0d
-0x82 0xa4 0xc5 0x4d
-0xa3 0x78 0xc6 0x0d
-0xc4 0xa0 0xc7 0x4d
-
-# CHECK: ld3.b { v1, v2, v3 }[2], [x3], x4
-# CHECK: ld3.d { v2, v3, v4 }[1], [x4], x5
-# CHECK: ld3.h { v3, v4, v5 }[3], [x5], x6
-# CHECK: ld3.s { v4, v5, v6 }[2], [x6], x7
-
-0x61 0x28 0x84 0x0d
-0x82 0xa4 0x85 0x4d
-0xa3 0x78 0x86 0x0d
-0xc4 0xa0 0x87 0x4d
-
-# CHECK: st3.b { v1, v2, v3 }[2], [x3], x4
-# CHECK: st3.d { v2, v3, v4 }[1], [x4], x5
-# CHECK: st3.h { v3, v4, v5 }[3], [x5], x6
-# CHECK: st3.s { v4, v5, v6 }[2], [x6], x7
-
-0x61 0x28 0x9f 0x0d
-0x82 0xa4 0x9f 0x4d
-0xa3 0x78 0x9f 0x0d
-0xc4 0xa0 0x9f 0x4d
-
-# CHECK: st3.b { v1, v2, v3 }[2], [x3], #3
-# CHECK: st3.d { v2, v3, v4 }[1], [x4], #24
-# CHECK: st3.h { v3, v4, v5 }[3], [x5], #6
-# CHECK: st3.s { v4, v5, v6 }[2], [x6], #12
-
-0x41 0x40 0xc3 0x0c
-0x42 0x40 0xc4 0x4c
-0x64 0x44 0xc5 0x0c
-0x87 0x44 0xc6 0x4c
-0x0c 0x48 0xc7 0x0c
-0x0a 0x48 0xc8 0x4c
-0x4f 0x4c 0xca 0x4c
-
-# CHECK: ld3.8b { v1, v2, v3 }, [x2], x3
-# CHECK: ld3.16b { v2, v3, v4 }, [x2], x4
-# CHECK: ld3.4h { v4, v5, v6 }, [x3], x5
-# CHECK: ld3.8h { v7, v8, v9 }, [x4], x6
-# CHECK: ld3.2s { v12, v13, v14 }, [x0], x7
-# CHECK: ld3.4s { v10, v11, v12 }, [x0], x8
-# CHECK: ld3.2d { v15, v16, v17 }, [x2], x10
-
-0x00 0x40 0xdf 0x0c
-0x00 0x40 0xdf 0x4c
-0x00 0x44 0xdf 0x0c
-0x00 0x44 0xdf 0x4c
-0x00 0x48 0xdf 0x0c
-0x00 0x48 0xdf 0x4c
-0x00 0x4c 0xdf 0x4c
-
-# CHECK: ld3.8b { v0, v1, v2 }, [x0], #24
-# CHECK: ld3.16b { v0, v1, v2 }, [x0], #48
-# CHECK: ld3.4h { v0, v1, v2 }, [x0], #24
-# CHECK: ld3.8h { v0, v1, v2 }, [x0], #48
-# CHECK: ld3.2s { v0, v1, v2 }, [x0], #24
-# CHECK: ld3.4s { v0, v1, v2 }, [x0], #48
-# CHECK: ld3.2d { v0, v1, v2 }, [x0], #48
-
-0x41 0x40 0x83 0x0c
-0x42 0x40 0x84 0x4c
-0x64 0x44 0x85 0x0c
-0x87 0x44 0x86 0x4c
-0x0c 0x48 0x87 0x0c
-0x0a 0x48 0x88 0x4c
-0x4f 0x4c 0x8a 0x4c
-
-# CHECK: st3.8b { v1, v2, v3 }, [x2], x3
-# CHECK: st3.16b { v2, v3, v4 }, [x2], x4
-# CHECK: st3.4h { v4, v5, v6 }, [x3], x5
-# CHECK: st3.8h { v7, v8, v9 }, [x4], x6
-# CHECK: st3.2s { v12, v13, v14 }, [x0], x7
-# CHECK: st3.4s { v10, v11, v12 }, [x0], x8
-# CHECK: st3.2d { v15, v16, v17 }, [x2], x10
-
-0x00 0x40 0x9f 0x0c
-0x00 0x40 0x9f 0x4c
-0x00 0x44 0x9f 0x0c
-0x00 0x44 0x9f 0x4c
-0x00 0x48 0x9f 0x0c
-0x00 0x48 0x9f 0x4c
-0x00 0x4c 0x9f 0x4c
-
-# CHECK: st3.8b { v0, v1, v2 }, [x0], #24
-# CHECK: st3.16b { v0, v1, v2 }, [x0], #48
-# CHECK: st3.4h { v0, v1, v2 }, [x0], #24
-# CHECK: st3.8h { v0, v1, v2 }, [x0], #48
-# CHECK: st3.2s { v0, v1, v2 }, [x0], #24
-# CHECK: st3.4s { v0, v1, v2 }, [x0], #48
-# CHECK: st3.2d { v0, v1, v2 }, [x0], #48
-
-0x61 0x28 0x40 0x0d
-0x82 0xa4 0x40 0x4d
-0xc3 0x70 0x40 0x0d
-0xe4 0xb0 0x40 0x4d
-
-# CHECK: ld3.b { v1, v2, v3 }[2], [x3]
-# CHECK: ld3.d { v2, v3, v4 }[1], [x4]
-# CHECK: ld3.h { v3, v4, v5 }[2], [x6]
-# CHECK: ld3.s { v4, v5, v6 }[3], [x7]
-
-0x61 0x28 0xdf 0x0d
-0x82 0xa4 0xdf 0x4d
-0xa3 0x78 0xdf 0x0d
-0xc4 0xa0 0xdf 0x4d
-
-# CHECK: ld3.b { v1, v2, v3 }[2], [x3], #3
-# CHECK: ld3.d { v2, v3, v4 }[1], [x4], #24
-# CHECK: ld3.h { v3, v4, v5 }[3], [x5], #6
-# CHECK: ld3.s { v4, v5, v6 }[2], [x6], #12
-
-0x61 0x28 0x00 0x0d
-0x82 0xa4 0x00 0x4d
-0xc3 0x70 0x00 0x0d
-0xe4 0xb0 0x00 0x4d
-
-# CHECK: st3.b { v1, v2, v3 }[2], [x3]
-# CHECK: st3.d { v2, v3, v4 }[1], [x4]
-# CHECK: st3.h { v3, v4, v5 }[2], [x6]
-# CHECK: st3.s { v4, v5, v6 }[3], [x7]
-
-0x21 0xe0 0x40 0x0d
-0x21 0xe0 0xc2 0x0d
-0x21 0xe0 0x40 0x4d
-0x21 0xe0 0xc2 0x4d
-0x21 0xe4 0x40 0x0d
-0x21 0xe4 0xc2 0x0d
-0x21 0xe4 0x40 0x4d
-0x21 0xe4 0xc2 0x4d
-0x21 0xe8 0x40 0x0d
-0x21 0xe8 0xc2 0x0d
-0x21 0xec 0x40 0x4d
-0x21 0xec 0xc2 0x4d
-0x21 0xec 0x40 0x0d
-0x21 0xec 0xc2 0x0d
-
-# CHECK: ld3r.8b { v1, v2, v3 }, [x1]
-# CHECK: ld3r.8b { v1, v2, v3 }, [x1], x2
-# CHECK: ld3r.16b { v1, v2, v3 }, [x1]
-# CHECK: ld3r.16b { v1, v2, v3 }, [x1], x2
-# CHECK: ld3r.4h { v1, v2, v3 }, [x1]
-# CHECK: ld3r.4h { v1, v2, v3 }, [x1], x2
-# CHECK: ld3r.8h { v1, v2, v3 }, [x1]
-# CHECK: ld3r.8h { v1, v2, v3 }, [x1], x2
-# CHECK: ld3r.2s { v1, v2, v3 }, [x1]
-# CHECK: ld3r.2s { v1, v2, v3 }, [x1], x2
-# CHECK: ld3r.2d { v1, v2, v3 }, [x1]
-# CHECK: ld3r.2d { v1, v2, v3 }, [x1], x2
-# CHECK: ld3r.1d { v1, v2, v3 }, [x1]
-# CHECK: ld3r.1d { v1, v2, v3 }, [x1], x2
-
-0x21 0xe0 0xdf 0x0d
-0x21 0xe0 0xdf 0x4d
-0x21 0xe4 0xdf 0x0d
-0x21 0xe4 0xdf 0x4d
-0x21 0xe8 0xdf 0x0d
-0x21 0xec 0xdf 0x4d
-0x21 0xec 0xdf 0x0d
-
-# CHECK: ld3r.8b	{ v1, v2, v3 }, [x1], #3
-# CHECK: ld3r.16b	{ v1, v2, v3 }, [x1], #3
-# CHECK: ld3r.4h	{ v1, v2, v3 }, [x1], #6
-# CHECK: ld3r.8h	{ v1, v2, v3 }, [x1], #6
-# CHECK: ld3r.2s	{ v1, v2, v3 }, [x1], #12
-# CHECK: ld3r.2d	{ v1, v2, v3 }, [x1], #24
-# CHECK: ld3r.1d	{ v1, v2, v3 }, [x1], #24
-
-0x21 0x00 0x40 0x0c
-0x45 0x00 0x40 0x4c
-0x0a 0x08 0x40 0x0c
-
-# CHECK: ld4.8b { v1, v2, v3, v4 }, [x1]
-# CHECK: ld4.16b { v5, v6, v7, v8 }, [x2]
-# CHECK: ld4.2s { v10, v11, v12, v13 }, [x0]
-
-0x21 0x00 0x00 0x0c
-0x45 0x00 0x00 0x4c
-0x0a 0x08 0x00 0x0c
-
-# CHECK: st4.8b { v1, v2, v3, v4 }, [x1]
-# CHECK: st4.16b { v5, v6, v7, v8 }, [x2]
-# CHECK: st4.2s { v10, v11, v12, v13 }, [x0]
-
-0x61 0x28 0xe4 0x0d
-0x82 0xa4 0xe5 0x4d
-0xa3 0x78 0xe6 0x0d
-0xc4 0xa0 0xe7 0x4d
-
-# CHECK: ld4.b { v1, v2, v3, v4 }[2], [x3], x4
-# CHECK: ld4.d { v2, v3, v4, v5 }[1], [x4], x5
-# CHECK: ld4.h { v3, v4, v5, v6 }[3], [x5], x6
-# CHECK: ld4.s { v4, v5, v6, v7 }[2], [x6], x7
-
-0x61 0x28 0xff 0x0d
-0x82 0xa4 0xff 0x4d
-0xa3 0x78 0xff 0x0d
-0xc4 0xa0 0xff 0x4d
-
-# CHECK: ld4.b { v1, v2, v3, v4 }[2], [x3], #4
-# CHECK: ld4.d { v2, v3, v4, v5 }[1], [x4], #32
-# CHECK: ld4.h { v3, v4, v5, v6 }[3], [x5], #8
-# CHECK: ld4.s { v4, v5, v6, v7 }[2], [x6], #16
-
-0x61 0x28 0xa4 0x0d
-0x82 0xa4 0xa5 0x4d
-0xa3 0x78 0xa6 0x0d
-0xc4 0xa0 0xa7 0x4d
-
-# CHECK: st4.b { v1, v2, v3, v4 }[2], [x3], x4
-# CHECK: st4.d { v2, v3, v4, v5 }[1], [x4], x5
-# CHECK: st4.h { v3, v4, v5, v6 }[3], [x5], x6
-# CHECK: st4.s { v4, v5, v6, v7 }[2], [x6], x7
-
-0x61 0x28 0xbf 0x0d
-0x82 0xa4 0xbf 0x4d
-0xa3 0x78 0xbf 0x0d
-0xc4 0xa0 0xbf 0x4d
-
-# CHECK: st4.b { v1, v2, v3, v4 }[2], [x3], #4
-# CHECK: st4.d { v2, v3, v4, v5 }[1], [x4], #32
-# CHECK: st4.h { v3, v4, v5, v6 }[3], [x5], #8
-# CHECK: st4.s { v4, v5, v6, v7 }[2], [x6], #16
-
-0x41 0x00 0xc3 0x0c
-0x42 0x00 0xc4 0x4c
-0x64 0x04 0xc5 0x0c
-0x87 0x04 0xc6 0x4c
-0x0c 0x08 0xc7 0x0c
-0x0a 0x08 0xc8 0x4c
-0x4f 0x0c 0xca 0x4c
-
-# CHECK: ld4.8b { v1, v2, v3, v4 }, [x2], x3
-# CHECK: ld4.16b { v2, v3, v4, v5 }, [x2], x4
-# CHECK: ld4.4h { v4, v5, v6, v7 }, [x3], x5
-# CHECK: ld4.8h { v7, v8, v9, v10 }, [x4], x6
-# CHECK: ld4.2s { v12, v13, v14, v15 }, [x0], x7
-# CHECK: ld4.4s { v10, v11, v12, v13 }, [x0], x8
-# CHECK: ld4.2d { v15, v16, v17, v18 }, [x2], x10
-
-0x00 0x00 0xdf 0x0c
-0x00 0x00 0xdf 0x4c
-0x00 0x04 0xdf 0x0c
-0x00 0x04 0xdf 0x4c
-0x00 0x08 0xdf 0x0c
-0x00 0x08 0xdf 0x4c
-0x00 0x0c 0xdf 0x4c
-
-# CHECK: ld4.8b { v0, v1, v2, v3 }, [x0], #32
-# CHECK: ld4.16b { v0, v1, v2, v3 }, [x0], #64
-# CHECK: ld4.4h { v0, v1, v2, v3 }, [x0], #32
-# CHECK: ld4.8h { v0, v1, v2, v3 }, [x0], #64
-# CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], #32
-# CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], #64
-# CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], #64
-
-0x00 0x00 0x9f 0x0c
-0x00 0x00 0x9f 0x4c
-0x00 0x04 0x9f 0x0c
-0x00 0x04 0x9f 0x4c
-0x00 0x08 0x9f 0x0c
-0x00 0x08 0x9f 0x4c
-0x00 0x0c 0x9f 0x4c
-
-# CHECK: st4.8b { v0, v1, v2, v3 }, [x0], #32
-# CHECK: st4.16b { v0, v1, v2, v3 }, [x0], #64
-# CHECK: st4.4h { v0, v1, v2, v3 }, [x0], #32
-# CHECK: st4.8h { v0, v1, v2, v3 }, [x0], #64
-# CHECK: st4.2s { v0, v1, v2, v3 }, [x0], #32
-# CHECK: st4.4s { v0, v1, v2, v3 }, [x0], #64
-# CHECK: st4.2d { v0, v1, v2, v3 }, [x0], #64
-
-0x41 0x00 0x83 0x0c
-0x42 0x00 0x84 0x4c
-0x64 0x04 0x85 0x0c
-0x87 0x04 0x86 0x4c
-0x0c 0x08 0x87 0x0c
-0x0a 0x08 0x88 0x4c
-0x4f 0x0c 0x8a 0x4c
-
-# CHECK: st4.8b { v1, v2, v3, v4 }, [x2], x3
-# CHECK: st4.16b { v2, v3, v4, v5 }, [x2], x4
-# CHECK: st4.4h { v4, v5, v6, v7 }, [x3], x5
-# CHECK: st4.8h { v7, v8, v9, v10 }, [x4], x6
-# CHECK: st4.2s { v12, v13, v14, v15 }, [x0], x7
-# CHECK: st4.4s { v10, v11, v12, v13 }, [x0], x8
-# CHECK: st4.2d { v15, v16, v17, v18 }, [x2], x10
-
-0x61 0x28 0x60 0x0d
-0x82 0xa4 0x60 0x4d
-0xc3 0x70 0x60 0x0d
-0xe4 0xb0 0x60 0x4d
-
-# CHECK: ld4.b { v1, v2, v3, v4 }[2], [x3]
-# CHECK: ld4.d { v2, v3, v4, v5 }[1], [x4]
-# CHECK: ld4.h { v3, v4, v5, v6 }[2], [x6]
-# CHECK: ld4.s { v4, v5, v6, v7 }[3], [x7]
-
-0x61 0x28 0x20 0x0d
-0x82 0xa4 0x20 0x4d
-0xc3 0x70 0x20 0x0d
-0xe4 0xb0 0x20 0x4d
-
-# CHECK: st4.b { v1, v2, v3, v4 }[2], [x3]
-# CHECK: st4.d { v2, v3, v4, v5 }[1], [x4]
-# CHECK: st4.h { v3, v4, v5, v6 }[2], [x6]
-# CHECK: st4.s { v4, v5, v6, v7 }[3], [x7]
-
-0x21 0xe0 0x60 0x0d
-0x21 0xe0 0xe2 0x0d
-0x21 0xe0 0x60 0x4d
-0x21 0xe0 0xe2 0x4d
-0x21 0xe4 0x60 0x0d
-0x21 0xe4 0xe2 0x0d
-0x21 0xe4 0x60 0x4d
-0x21 0xe4 0xe2 0x4d
-0x21 0xe8 0x60 0x0d
-0x21 0xe8 0xe2 0x0d
-0x21 0xec 0x60 0x4d
-0x21 0xec 0xe2 0x4d
-0x21 0xec 0x60 0x0d
-0x21 0xec 0xe2 0x0d
-
-# CHECK: ld4r.8b { v1, v2, v3, v4 }, [x1]
-# CHECK: ld4r.8b { v1, v2, v3, v4 }, [x1], x2
-# CHECK: ld4r.16b { v1, v2, v3, v4 }, [x1]
-# CHECK: ld4r.16b { v1, v2, v3, v4 }, [x1], x2
-# CHECK: ld4r.4h { v1, v2, v3, v4 }, [x1]
-# CHECK: ld4r.4h { v1, v2, v3, v4 }, [x1], x2
-# CHECK: ld4r.8h { v1, v2, v3, v4 }, [x1]
-# CHECK: ld4r.8h { v1, v2, v3, v4 }, [x1], x2
-# CHECK: ld4r.2s { v1, v2, v3, v4 }, [x1]
-# CHECK: ld4r.2s { v1, v2, v3, v4 }, [x1], x2
-# CHECK: ld4r.2d { v1, v2, v3, v4 }, [x1]
-# CHECK: ld4r.2d { v1, v2, v3, v4 }, [x1], x2
-# CHECK: ld4r.1d { v1, v2, v3, v4 }, [x1]
-# CHECK: ld4r.1d { v1, v2, v3, v4 }, [x1], x2
-
-0x21 0xe0 0xff 0x0d
-0x21 0xe0 0xff 0x4d
-0x21 0xe4 0xff 0x0d
-0x21 0xe4 0xff 0x4d
-0x21 0xe8 0xff 0x0d
-0x21 0xec 0xff 0x4d
-0x21 0xec 0xff 0x0d
-
-# CHECK: ld4r.8b	{ v1, v2, v3, v4 }, [x1], #4
-# CHECK: ld4r.16b	{ v1, v2, v3, v4 }, [x1], #4
-# CHECK: ld4r.4h	{ v1, v2, v3, v4 }, [x1], #8
-# CHECK: ld4r.8h	{ v1, v2, v3, v4 }, [x1], #8
-# CHECK: ld4r.2s	{ v1, v2, v3, v4 }, [x1], #16
-# CHECK: ld4r.2d	{ v1, v2, v3, v4 }, [x1], #32
-# CHECK: ld4r.1d	{ v1, v2, v3, v4 }, [x1], #32
-
-0x20 0xe4 0x00 0x2f
-0x20 0xe4 0x00 0x6f
-0x20 0xe4 0x00 0x0f
-0x20 0xe4 0x00 0x4f
-
-# CHECK: movi     d0, #0x000000000000ff
-# CHECK: movi.2d  v0, #0x000000000000ff
-# CHECK: movi.8b  v0, #1
-# CHECK: movi.16b v0, #1
-
-0x20 0x04 0x00 0x0f
-0x20 0x24 0x00 0x0f
-0x20 0x44 0x00 0x0f
-0x20 0x64 0x00 0x0f
-
-# CHECK: movi.2s v0, #1
-# CHECK: movi.2s v0, #1, lsl #8
-# CHECK: movi.2s v0, #1, lsl #16
-# CHECK: movi.2s v0, #1, lsl #24
-
-0x20 0x04 0x00 0x4f
-0x20 0x24 0x00 0x4f
-0x20 0x44 0x00 0x4f
-0x20 0x64 0x00 0x4f
-
-# CHECK: movi.4s v0, #1
-# CHECK: movi.4s v0, #1, lsl #8
-# CHECK: movi.4s v0, #1, lsl #16
-# CHECK: movi.4s v0, #1, lsl #24
-
-0x20 0x84 0x00 0x0f
-0x20 0xa4 0x00 0x0f
-
-# CHECK: movi.4h v0, #1
-# CHECK: movi.4h v0, #1, lsl #8
-
-0x20 0x84 0x00 0x4f
-0x20 0xa4 0x00 0x4f
-
-# CHECK: movi.8h v0, #1
-# CHECK: movi.8h v0, #1, lsl #8
-
-0x20 0x04 0x00 0x2f
-0x20 0x24 0x00 0x2f
-0x20 0x44 0x00 0x2f
-0x20 0x64 0x00 0x2f
-
-# CHECK: mvni.2s v0, #1
-# CHECK: mvni.2s v0, #1, lsl #8
-# CHECK: mvni.2s v0, #1, lsl #16
-# CHECK: mvni.2s v0, #1, lsl #24
-
-0x20 0x04 0x00 0x6f
-0x20 0x24 0x00 0x6f
-0x20 0x44 0x00 0x6f
-0x20 0x64 0x00 0x6f
-
-# CHECK: mvni.4s v0, #1
-# CHECK: mvni.4s v0, #1, lsl #8
-# CHECK: mvni.4s v0, #1, lsl #16
-# CHECK: mvni.4s v0, #1, lsl #24
-
-0x20 0x84 0x00 0x2f
-0x20 0xa4 0x00 0x2f
-
-# CHECK: mvni.4h v0, #1
-# CHECK: mvni.4h v0, #1, lsl #8
-
-0x20 0x84 0x00 0x6f
-0x20 0xa4 0x00 0x6f
-
-# CHECK: mvni.8h v0, #1
-# CHECK: mvni.8h v0, #1, lsl #8
-
-0x20 0xc4 0x00 0x2f
-0x20 0xd4 0x00 0x2f
-0x20 0xc4 0x00 0x6f
-0x20 0xd4 0x00 0x6f
-
-# CHECK: mvni.2s v0, #1, msl #8
-# CHECK: mvni.2s v0, #1, msl #16
-# CHECK: mvni.4s v0, #1, msl #8
-# CHECK: mvni.4s v0, #1, msl #16
-
-0x00 0x88 0x21 0x2e
-0x00 0x98 0x21 0x2e
-0x00 0x98 0xa1 0x2e
-0x00 0x98 0x21 0x0e
-0x00 0x88 0x21 0x0e
-0x00 0x88 0xa1 0x0e
-0x00 0x98 0xa1 0x0e
-
-# CHECK: frinta.2s	v0, v0
-# CHECK: frintx.2s	v0, v0
-# CHECK: frinti.2s	v0, v0
-# CHECK: frintm.2s	v0, v0
-# CHECK: frintn.2s	v0, v0
-# CHECK: frintp.2s	v0, v0
-# CHECK: frintz.2s	v0, v0
-
-#===-------------------------------------------------------------------------===
-# AdvSIMD scalar x index instructions
-#===-------------------------------------------------------------------------===
-
-0x00 0x18 0xa0 0x5f
-0x00 0x18 0xc0 0x5f
-0x00 0x58 0xa0 0x5f
-0x00 0x58 0xc0 0x5f
-0x00 0x98 0xa0 0x7f
-0x00 0x98 0xc0 0x7f
-0x00 0x98 0xa0 0x5f
-0x00 0x98 0xc0 0x5f
-0x00 0x38 0x70 0x5f
-0x00 0x38 0xa0 0x5f
-0x00 0x78 0x70 0x5f
-0x00 0xc8 0x70 0x5f
-0x00 0xc8 0xa0 0x5f
-0x00 0xb8 0x70 0x5f
-0x00 0xb8 0xa0 0x5f
-0x00 0xd8 0x70 0x5f
-0x00 0xd8 0xa0 0x5f
-
-# CHECK: fmla.s	s0, s0, v0[3]
-# CHECK: fmla.d	d0, d0, v0[1]
-# CHECK: fmls.s	s0, s0, v0[3]
-# CHECK: fmls.d	d0, d0, v0[1]
-# CHECK: fmulx.s	s0, s0, v0[3]
-# CHECK: fmulx.d	d0, d0, v0[1]
-# CHECK: fmul.s	s0, s0, v0[3]
-# CHECK: fmul.d	d0, d0, v0[1]
-# CHECK: sqdmlal.h	s0, h0, v0[7]
-# CHECK: sqdmlal.s	d0, s0, v0[3]
-# CHECK: sqdmlsl.h	s0, h0, v0[7]
-# CHECK: sqdmulh.h	h0, h0, v0[7]
-# CHECK: sqdmulh.s	s0, s0, v0[3]
-# CHECK: sqdmull.h	s0, h0, v0[7]
-# CHECK: sqdmull.s	d0, s0, v0[3]
-# CHECK: sqrdmulh.h	h0, h0, v0[7]
-# CHECK: sqrdmulh.s	s0, s0, v0[3]
-
-#===-------------------------------------------------------------------------===
-# AdvSIMD vector x index instructions
-#===-------------------------------------------------------------------------===
-
-  0x00 0x10 0x80 0x0f
-  0x00 0x10 0xa0 0x4f
-  0x00 0x18 0xc0 0x4f
-  0x00 0x50 0x80 0x0f
-  0x00 0x50 0xa0 0x4f
-  0x00 0x58 0xc0 0x4f
-  0x00 0x90 0x80 0x2f
-  0x00 0x90 0xa0 0x6f
-  0x00 0x98 0xc0 0x6f
-  0x00 0x90 0x80 0x0f
-  0x00 0x90 0xa0 0x4f
-  0x00 0x98 0xc0 0x4f
-  0x00 0x00 0x40 0x2f
-  0x00 0x00 0x50 0x6f
-  0x00 0x08 0x80 0x2f
-  0x00 0x08 0xa0 0x6f
-  0x00 0x40 0x40 0x2f
-  0x00 0x40 0x50 0x6f
-  0x00 0x48 0x80 0x2f
-  0x00 0x48 0xa0 0x6f
-  0x00 0x80 0x40 0x0f
-  0x00 0x80 0x50 0x4f
-  0x00 0x88 0x80 0x0f
-  0x00 0x88 0xa0 0x4f
-  0x00 0x20 0x40 0x0f
-  0x00 0x20 0x50 0x4f
-  0x00 0x28 0x80 0x0f
-  0x00 0x28 0xa0 0x4f
-  0x00 0x60 0x40 0x0f
-  0x00 0x60 0x50 0x4f
-  0x00 0x68 0x80 0x0f
-  0x00 0x68 0xa0 0x4f
-  0x00 0xa0 0x40 0x0f
-  0x00 0xa0 0x50 0x4f
-  0x00 0xa8 0x80 0x0f
-  0x00 0xa8 0xa0 0x4f
-  0x00 0x30 0x40 0x0f
-  0x00 0x30 0x50 0x4f
-  0x00 0x38 0x80 0x0f
-  0x00 0x38 0xa0 0x4f
-  0x00 0x70 0x40 0x0f
-  0x00 0x70 0x50 0x4f
-  0x00 0x78 0x80 0x0f
-  0x00 0x78 0xa0 0x4f
-  0x00 0xc0 0x40 0x0f
-  0x00 0xc0 0x50 0x4f
-  0x00 0xc8 0x80 0x0f
-  0x00 0xc8 0xa0 0x4f
-  0x00 0xb0 0x40 0x0f
-  0x00 0xb0 0x50 0x4f
-  0x00 0xb8 0x80 0x0f
-  0x00 0xb8 0xa0 0x4f
-  0x00 0xd0 0x40 0x0f
-  0x00 0xd0 0x50 0x4f
-  0x00 0xd8 0x80 0x0f
-  0x00 0xd8 0xa0 0x4f
-  0x00 0x20 0x40 0x2f
-  0x00 0x20 0x50 0x6f
-  0x00 0x28 0x80 0x2f
-  0x00 0x28 0xa0 0x6f
-  0x00 0x60 0x40 0x2f
-  0x00 0x60 0x50 0x6f
-  0x00 0x68 0x80 0x2f
-  0x00 0x68 0xa0 0x6f
-  0x00 0xa0 0x40 0x2f
-  0x00 0xa0 0x50 0x6f
-  0x00 0xa8 0x80 0x2f
-  0x00 0xa8 0xa0 0x6f
-
-# CHECK: fmla.2s	v0, v0, v0[0]
-# CHECK: fmla.4s	v0, v0, v0[1]
-# CHECK: fmla.2d	v0, v0, v0[1]
-# CHECK: fmls.2s	v0, v0, v0[0]
-# CHECK: fmls.4s	v0, v0, v0[1]
-# CHECK: fmls.2d	v0, v0, v0[1]
-# CHECK: fmulx.2s	v0, v0, v0[0]
-# CHECK: fmulx.4s	v0, v0, v0[1]
-# CHECK: fmulx.2d	v0, v0, v0[1]
-# CHECK: fmul.2s	v0, v0, v0[0]
-# CHECK: fmul.4s	v0, v0, v0[1]
-# CHECK: fmul.2d	v0, v0, v0[1]
-# CHECK: mla.4h	v0, v0, v0[0]
-# CHECK: mla.8h	v0, v0, v0[1]
-# CHECK: mla.2s	v0, v0, v0[2]
-# CHECK: mla.4s	v0, v0, v0[3]
-# CHECK: mls.4h	v0, v0, v0[0]
-# CHECK: mls.8h	v0, v0, v0[1]
-# CHECK: mls.2s	v0, v0, v0[2]
-# CHECK: mls.4s	v0, v0, v0[3]
-# CHECK: mul.4h	v0, v0, v0[0]
-# CHECK: mul.8h	v0, v0, v0[1]
-# CHECK: mul.2s	v0, v0, v0[2]
-# CHECK: mul.4s	v0, v0, v0[3]
-# CHECK: smlal.4s	v0, v0, v0[0]
-# CHECK: smlal2.4s	v0, v0, v0[1]
-# CHECK: smlal.2d	v0, v0, v0[2]
-# CHECK: smlal2.2d	v0, v0, v0[3]
-# CHECK: smlsl.4s	v0, v0, v0[0]
-# CHECK: smlsl2.4s	v0, v0, v0[1]
-# CHECK: smlsl.2d	v0, v0, v0[2]
-# CHECK: smlsl2.2d	v0, v0, v0[3]
-# CHECK: smull.4s	v0, v0, v0[0]
-# CHECK: smull2.4s	v0, v0, v0[1]
-# CHECK: smull.2d	v0, v0, v0[2]
-# CHECK: smull2.2d	v0, v0, v0[3]
-# CHECK: sqdmlal.4s	v0, v0, v0[0]
-# CHECK: sqdmlal2.4s	v0, v0, v0[1]
-# CHECK: sqdmlal.2d	v0, v0, v0[2]
-# CHECK: sqdmlal2.2d	v0, v0, v0[3]
-# CHECK: sqdmlsl.4s	v0, v0, v0[0]
-# CHECK: sqdmlsl2.4s	v0, v0, v0[1]
-# CHECK: sqdmlsl.2d	v0, v0, v0[2]
-# CHECK: sqdmlsl2.2d	v0, v0, v0[3]
-# CHECK: sqdmulh.4h	v0, v0, v0[0]
-# CHECK: sqdmulh.8h	v0, v0, v0[1]
-# CHECK: sqdmulh.2s	v0, v0, v0[2]
-# CHECK: sqdmulh.4s	v0, v0, v0[3]
-# CHECK: sqdmull.4s	v0, v0, v0[0]
-# CHECK: sqdmull2.4s	v0, v0, v0[1]
-# CHECK: sqdmull.2d	v0, v0, v0[2]
-# CHECK: sqdmull2.2d	v0, v0, v0[3]
-# CHECK: sqrdmulh.4h	v0, v0, v0[0]
-# CHECK: sqrdmulh.8h	v0, v0, v0[1]
-# CHECK: sqrdmulh.2s	v0, v0, v0[2]
-# CHECK: sqrdmulh.4s	v0, v0, v0[3]
-# CHECK: umlal.4s	v0, v0, v0[0]
-# CHECK: umlal2.4s	v0, v0, v0[1]
-# CHECK: umlal.2d	v0, v0, v0[2]
-# CHECK: umlal2.2d	v0, v0, v0[3]
-# CHECK: umlsl.4s	v0, v0, v0[0]
-# CHECK: umlsl2.4s	v0, v0, v0[1]
-# CHECK: umlsl.2d	v0, v0, v0[2]
-# CHECK: umlsl2.2d	v0, v0, v0[3]
-# CHECK: umull.4s	v0, v0, v0[0]
-# CHECK: umull2.4s	v0, v0, v0[1]
-# CHECK: umull.2d	v0, v0, v0[2]
-# CHECK: umull2.2d	v0, v0, v0[3]
-
-
-#===-------------------------------------------------------------------------===
-# AdvSIMD scalar + shift instructions
-#===-------------------------------------------------------------------------===
-
-  0x00 0x54 0x41 0x5f
-  0x00 0x54 0x41 0x7f
-  0x00 0x9c 0x09 0x5f
-  0x00 0x9c 0x12 0x5f
-  0x00 0x9c 0x23 0x5f
-  0x00 0x8c 0x09 0x7f
-  0x00 0x8c 0x12 0x7f
-  0x00 0x8c 0x23 0x7f
-  0x00 0x64 0x09 0x7f
-  0x00 0x64 0x12 0x7f
-  0x00 0x64 0x23 0x7f
-  0x00 0x64 0x44 0x7f
-  0x00 0x74 0x09 0x5f
-  0x00 0x74 0x12 0x5f
-  0x00 0x74 0x23 0x5f
-  0x00 0x74 0x44 0x5f
-  0x00 0x94 0x09 0x5f
-  0x00 0x94 0x12 0x5f
-  0x00 0x94 0x23 0x5f
-  0x00 0x84 0x09 0x7f
-  0x00 0x84 0x12 0x7f
-  0x00 0x84 0x23 0x7f
-  0x00 0x44 0x41 0x7f
-  0x00 0x24 0x41 0x5f
-  0x00 0x34 0x41 0x5f
-  0x00 0x04 0x41 0x5f
-  0x00 0xe4 0x21 0x7f
-  0x00 0xe4 0x42 0x7f
-  0x00 0x9c 0x09 0x7f
-  0x00 0x9c 0x12 0x7f
-  0x00 0x9c 0x23 0x7f
-  0x00 0x74 0x09 0x7f
-  0x00 0x74 0x12 0x7f
-  0x00 0x74 0x23 0x7f
-  0x00 0x74 0x44 0x7f
-  0x00 0x94 0x09 0x7f
-  0x00 0x94 0x12 0x7f
-  0x00 0x94 0x23 0x7f
-  0x00 0x24 0x41 0x7f
-  0x00 0x34 0x41 0x7f
-  0x00 0x04 0x41 0x7f
-  0x00 0x14 0x41 0x7f
-
-# CHECK: shl	d0, d0, #1
-# CHECK: sli	d0, d0, #1
-# CHECK: sqrshrn	b0, h0, #7
-# CHECK: sqrshrn	h0, s0, #14
-# CHECK: sqrshrn	s0, d0, #29
-# CHECK: sqrshrun	b0, h0, #7
-# CHECK: sqrshrun	h0, s0, #14
-# CHECK: sqrshrun	s0, d0, #29
-# CHECK: sqshlu	b0, b0, #1
-# CHECK: sqshlu	h0, h0, #2
-# CHECK: sqshlu	s0, s0, #3
-# CHECK: sqshlu	d0, d0, #4
-# CHECK: sqshl	b0, b0, #1
-# CHECK: sqshl	h0, h0, #2
-# CHECK: sqshl	s0, s0, #3
-# CHECK: sqshl	d0, d0, #4
-# CHECK: sqshrn	b0, h0, #7
-# CHECK: sqshrn	h0, s0, #14
-# CHECK: sqshrn	s0, d0, #29
-# CHECK: sqshrun	b0, h0, #7
-# CHECK: sqshrun	h0, s0, #14
-# CHECK: sqshrun	s0, d0, #29
-# CHECK: sri	d0, d0, #63
-# CHECK: srshr	d0, d0, #63
-# CHECK: srsra	d0, d0, #63
-# CHECK: sshr	d0, d0, #63
-# CHECK: ucvtf	s0, s0, #31
-# CHECK: ucvtf	d0, d0, #62
-# CHECK: uqrshrn	b0, h0, #7
-# CHECK: uqrshrn	h0, s0, #14
-# CHECK: uqrshrn	s0, d0, #29
-# CHECK: uqshl	b0, b0, #1
-# CHECK: uqshl	h0, h0, #2
-# CHECK: uqshl	s0, s0, #3
-# CHECK: uqshl	d0, d0, #4
-# CHECK: uqshrn	b0, h0, #7
-# CHECK: uqshrn	h0, s0, #14
-# CHECK: uqshrn	s0, d0, #29
-# CHECK: urshr	d0, d0, #63
-# CHECK: ursra	d0, d0, #63
-# CHECK: ushr	d0, d0, #63
-# CHECK: usra	d0, d0, #63
-
-#===-------------------------------------------------------------------------===
-# AdvSIMD vector + shift instructions
-#===-------------------------------------------------------------------------===
-
-  0x00 0xfc 0x21 0x0f
-  0x00 0xfc 0x22 0x4f
-  0x00 0xfc 0x43 0x4f
-  0x00 0xfc 0x21 0x2f
-  0x00 0xfc 0x22 0x6f
-  0x00 0xfc 0x43 0x6f
-  0x00 0x8c 0x09 0x0f
-  0x00 0x8c 0x0a 0x4f
-  0x00 0x8c 0x13 0x0f
-  0x00 0x8c 0x14 0x4f
-  0x00 0x8c 0x25 0x0f
-  0x00 0x8c 0x26 0x4f
-  0x00 0xe4 0x21 0x0f
-  0x00 0xe4 0x22 0x4f
-  0x00 0xe4 0x43 0x4f
-  0x00 0x54 0x09 0x0f
-  0x00 0x54 0x0a 0x4f
-  0x00 0x54 0x13 0x0f
-  0x00 0x54 0x14 0x4f
-  0x00 0x54 0x25 0x0f
-  0x00 0x54 0x26 0x4f
-  0x00 0x54 0x47 0x4f
-  0x00 0x84 0x09 0x0f
-  0x00 0x84 0x0a 0x4f
-  0x00 0x84 0x13 0x0f
-  0x00 0x84 0x14 0x4f
-  0x00 0x84 0x25 0x0f
-  0x00 0x84 0x26 0x4f
-  0x00 0x54 0x09 0x2f
-  0x00 0x54 0x0a 0x6f
-  0x00 0x54 0x13 0x2f
-  0x00 0x54 0x14 0x6f
-  0x00 0x54 0x25 0x2f
-  0x00 0x54 0x26 0x6f
-  0x00 0x54 0x47 0x6f
-  0x00 0x9c 0x09 0x0f
-  0x00 0x9c 0x0a 0x4f
-  0x00 0x9c 0x13 0x0f
-  0x00 0x9c 0x14 0x4f
-  0x00 0x9c 0x25 0x0f
-  0x00 0x9c 0x26 0x4f
-  0x00 0x8c 0x09 0x2f
-  0x00 0x8c 0x0a 0x6f
-  0x00 0x8c 0x13 0x2f
-  0x00 0x8c 0x14 0x6f
-  0x00 0x8c 0x25 0x2f
-  0x00 0x8c 0x26 0x6f
-  0x00 0x64 0x09 0x2f
-  0x00 0x64 0x0a 0x6f
-  0x00 0x64 0x13 0x2f
-  0x00 0x64 0x14 0x6f
-  0x00 0x64 0x25 0x2f
-  0x00 0x64 0x26 0x6f
-  0x00 0x64 0x47 0x6f
-  0x00 0x74 0x09 0x0f
-  0x00 0x74 0x0a 0x4f
-  0x00 0x74 0x13 0x0f
-  0x00 0x74 0x14 0x4f
-  0x00 0x74 0x25 0x0f
-  0x00 0x74 0x26 0x4f
-  0x00 0x74 0x47 0x4f
-  0x00 0x94 0x09 0x0f
-  0x00 0x94 0x0a 0x4f
-  0x00 0x94 0x13 0x0f
-  0x00 0x94 0x14 0x4f
-  0x00 0x94 0x25 0x0f
-  0x00 0x94 0x26 0x4f
-  0x00 0x84 0x09 0x2f
-  0x00 0x84 0x0a 0x6f
-  0x00 0x84 0x13 0x2f
-  0x00 0x84 0x14 0x6f
-  0x00 0x84 0x25 0x2f
-  0x00 0x84 0x26 0x6f
-  0x00 0x44 0x09 0x2f
-  0x00 0x44 0x0a 0x6f
-  0x00 0x44 0x13 0x2f
-  0x00 0x44 0x14 0x6f
-  0x00 0x44 0x25 0x2f
-  0x00 0x44 0x26 0x6f
-  0x00 0x44 0x47 0x6f
-  0x00 0x24 0x09 0x0f
-  0x00 0x24 0x0a 0x4f
-  0x00 0x24 0x13 0x0f
-  0x00 0x24 0x14 0x4f
-  0x00 0x24 0x25 0x0f
-  0x00 0x24 0x26 0x4f
-  0x00 0x24 0x47 0x4f
-  0x00 0x34 0x09 0x0f
-  0x00 0x34 0x0a 0x4f
-  0x00 0x34 0x13 0x0f
-  0x00 0x34 0x14 0x4f
-  0x00 0x34 0x25 0x0f
-  0x00 0x34 0x26 0x4f
-  0x00 0x34 0x47 0x4f
-  0x00 0xa4 0x09 0x0f
-  0x00 0xa4 0x0a 0x4f
-  0x00 0xa4 0x13 0x0f
-  0x00 0xa4 0x14 0x4f
-  0x00 0xa4 0x25 0x0f
-  0x00 0xa4 0x26 0x4f
-  0x00 0x04 0x09 0x0f
-  0x00 0x04 0x0a 0x4f
-  0x00 0x04 0x13 0x0f
-  0x00 0x04 0x14 0x4f
-  0x00 0x04 0x25 0x0f
-  0x00 0x04 0x26 0x4f
-  0x00 0x04 0x47 0x4f
-  0x00 0x04 0x09 0x0f
-  0x00 0x14 0x0a 0x4f
-  0x00 0x14 0x13 0x0f
-  0x00 0x14 0x14 0x4f
-  0x00 0x14 0x25 0x0f
-  0x00 0x14 0x26 0x4f
-  0x00 0x14 0x47 0x4f
-  0x00 0x14 0x40 0x5f
-  0x00 0xe4 0x21 0x2f
-  0x00 0xe4 0x22 0x6f
-  0x00 0xe4 0x43 0x6f
-  0x00 0x9c 0x09 0x2f
-  0x00 0x9c 0x0a 0x6f
-  0x00 0x9c 0x13 0x2f
-  0x00 0x9c 0x14 0x6f
-  0x00 0x9c 0x25 0x2f
-  0x00 0x9c 0x26 0x6f
-  0x00 0x74 0x09 0x2f
-  0x00 0x74 0x0a 0x6f
-  0x00 0x74 0x13 0x2f
-  0x00 0x74 0x14 0x6f
-  0x00 0x74 0x25 0x2f
-  0x00 0x74 0x26 0x6f
-  0x00 0x74 0x47 0x6f
-  0x00 0x94 0x09 0x2f
-  0x00 0x94 0x0a 0x6f
-  0x00 0x94 0x13 0x2f
-  0x00 0x94 0x14 0x6f
-  0x00 0x94 0x25 0x2f
-  0x00 0x94 0x26 0x6f
-  0x00 0x24 0x09 0x2f
-  0x00 0x24 0x0a 0x6f
-  0x00 0x24 0x13 0x2f
-  0x00 0x24 0x14 0x6f
-  0x00 0x24 0x25 0x2f
-  0x00 0x24 0x26 0x6f
-  0x00 0x24 0x47 0x6f
-  0x00 0x34 0x09 0x2f
-  0x00 0x34 0x0a 0x6f
-  0x00 0x34 0x13 0x2f
-  0x00 0x34 0x14 0x6f
-  0x00 0x34 0x25 0x2f
-  0x00 0x34 0x26 0x6f
-  0x00 0x34 0x47 0x6f
-  0x00 0xa4 0x09 0x2f
-  0x00 0xa4 0x0a 0x6f
-  0x00 0xa4 0x13 0x2f
-  0x00 0xa4 0x14 0x6f
-  0x00 0xa4 0x25 0x2f
-  0x00 0xa4 0x26 0x6f
-  0x00 0x04 0x09 0x2f
-  0x00 0x04 0x0a 0x6f
-  0x00 0x04 0x13 0x2f
-  0x00 0x04 0x14 0x6f
-  0x00 0x04 0x25 0x2f
-  0x00 0x04 0x26 0x6f
-  0x00 0x04 0x47 0x6f
-  0x00 0x14 0x09 0x2f
-  0x00 0x14 0x0a 0x6f
-  0x00 0x14 0x13 0x2f
-  0x00 0x14 0x14 0x6f
-  0x00 0x14 0x25 0x2f
-  0x00 0x14 0x26 0x6f
-  0x00 0x14 0x47 0x6f
-
-# CHECK: fcvtzs.2s	v0, v0, #31
-# CHECK: fcvtzs.4s	v0, v0, #30
-# CHECK: fcvtzs.2d	v0, v0, #61
-# CHECK: fcvtzu.2s	v0, v0, #31
-# CHECK: fcvtzu.4s	v0, v0, #30
-# CHECK: fcvtzu.2d	v0, v0, #61
-# CHECK: rshrn.8b	v0, v0, #7
-# CHECK: rshrn2.16b	v0, v0, #6
-# CHECK: rshrn.4h	v0, v0, #13
-# CHECK: rshrn2.8h	v0, v0, #12
-# CHECK: rshrn.2s	v0, v0, #27
-# CHECK: rshrn2.4s	v0, v0, #26
-# CHECK: scvtf.2s	v0, v0, #31
-# CHECK: scvtf.4s	v0, v0, #30
-# CHECK: scvtf.2d	v0, v0, #61
-# CHECK: shl.8b	v0, v0, #1
-# CHECK: shl.16b	v0, v0, #2
-# CHECK: shl.4h	v0, v0, #3
-# CHECK: shl.8h	v0, v0, #4
-# CHECK: shl.2s	v0, v0, #5
-# CHECK: shl.4s	v0, v0, #6
-# CHECK: shl.2d	v0, v0, #7
-# CHECK: shrn.8b	v0, v0, #7
-# CHECK: shrn2.16b	v0, v0, #6
-# CHECK: shrn.4h	v0, v0, #13
-# CHECK: shrn2.8h	v0, v0, #12
-# CHECK: shrn.2s	v0, v0, #27
-# CHECK: shrn2.4s	v0, v0, #26
-# CHECK: sli.8b	v0, v0, #1
-# CHECK: sli.16b	v0, v0, #2
-# CHECK: sli.4h	v0, v0, #3
-# CHECK: sli.8h	v0, v0, #4
-# CHECK: sli.2s	v0, v0, #5
-# CHECK: sli.4s	v0, v0, #6
-# CHECK: sli.2d	v0, v0, #7
-# CHECK: sqrshrn.8b	v0, v0, #7
-# CHECK: sqrshrn2.16b	v0, v0, #6
-# CHECK: sqrshrn.4h	v0, v0, #13
-# CHECK: sqrshrn2.8h	v0, v0, #12
-# CHECK: sqrshrn.2s	v0, v0, #27
-# CHECK: sqrshrn2.4s	v0, v0, #26
-# CHECK: sqrshrun.8b	v0, v0, #7
-# CHECK: sqrshrun2.16b	v0, v0, #6
-# CHECK: sqrshrun.4h	v0, v0, #13
-# CHECK: sqrshrun2.8h	v0, v0, #12
-# CHECK: sqrshrun.2s	v0, v0, #27
-# CHECK: sqrshrun2.4s	v0, v0, #26
-# CHECK: sqshlu.8b	v0, v0, #1
-# CHECK: sqshlu.16b	v0, v0, #2
-# CHECK: sqshlu.4h	v0, v0, #3
-# CHECK: sqshlu.8h	v0, v0, #4
-# CHECK: sqshlu.2s	v0, v0, #5
-# CHECK: sqshlu.4s	v0, v0, #6
-# CHECK: sqshlu.2d	v0, v0, #7
-# CHECK: sqshl.8b	v0, v0, #1
-# CHECK: sqshl.16b	v0, v0, #2
-# CHECK: sqshl.4h	v0, v0, #3
-# CHECK: sqshl.8h	v0, v0, #4
-# CHECK: sqshl.2s	v0, v0, #5
-# CHECK: sqshl.4s	v0, v0, #6
-# CHECK: sqshl.2d	v0, v0, #7
-# CHECK: sqshrn.8b	v0, v0, #7
-# CHECK: sqshrn2.16b	v0, v0, #6
-# CHECK: sqshrn.4h	v0, v0, #13
-# CHECK: sqshrn2.8h	v0, v0, #12
-# CHECK: sqshrn.2s	v0, v0, #27
-# CHECK: sqshrn2.4s	v0, v0, #26
-# CHECK: sqshrun.8b	v0, v0, #7
-# CHECK: sqshrun2.16b	v0, v0, #6
-# CHECK: sqshrun.4h	v0, v0, #13
-# CHECK: sqshrun2.8h	v0, v0, #12
-# CHECK: sqshrun.2s	v0, v0, #27
-# CHECK: sqshrun2.4s	v0, v0, #26
-# CHECK: sri.8b	v0, v0, #7
-# CHECK: sri.16b	v0, v0, #6
-# CHECK: sri.4h	v0, v0, #13
-# CHECK: sri.8h	v0, v0, #12
-# CHECK: sri.2s	v0, v0, #27
-# CHECK: sri.4s	v0, v0, #26
-# CHECK: sri.2d	v0, v0, #57
-# CHECK: srshr.8b	v0, v0, #7
-# CHECK: srshr.16b	v0, v0, #6
-# CHECK: srshr.4h	v0, v0, #13
-# CHECK: srshr.8h	v0, v0, #12
-# CHECK: srshr.2s	v0, v0, #27
-# CHECK: srshr.4s	v0, v0, #26
-# CHECK: srshr.2d	v0, v0, #57
-# CHECK: srsra.8b	v0, v0, #7
-# CHECK: srsra.16b	v0, v0, #6
-# CHECK: srsra.4h	v0, v0, #13
-# CHECK: srsra.8h	v0, v0, #12
-# CHECK: srsra.2s	v0, v0, #27
-# CHECK: srsra.4s	v0, v0, #26
-# CHECK: srsra.2d	v0, v0, #57
-# CHECK: sshll.8h	v0, v0, #1
-# CHECK: sshll2.8h	v0, v0, #2
-# CHECK: sshll.4s	v0, v0, #3
-# CHECK: sshll2.4s	v0, v0, #4
-# CHECK: sshll.2d	v0, v0, #5
-# CHECK: sshll2.2d	v0, v0, #6
-# CHECK: sshr.8b	v0, v0, #7
-# CHECK: sshr.16b	v0, v0, #6
-# CHECK: sshr.4h	v0, v0, #13
-# CHECK: sshr.8h	v0, v0, #12
-# CHECK: sshr.2s	v0, v0, #27
-# CHECK: sshr.4s	v0, v0, #26
-# CHECK: sshr.2d	v0, v0, #57
-# CHECK: sshr.8b	v0, v0, #7
-# CHECK: ssra.16b	v0, v0, #6
-# CHECK: ssra.4h	v0, v0, #13
-# CHECK: ssra.8h	v0, v0, #12
-# CHECK: ssra.2s	v0, v0, #27
-# CHECK: ssra.4s	v0, v0, #26
-# CHECK: ssra.2d	v0, v0, #57
-# CHECK: ssra		d0, d0, #64
-# CHECK: ucvtf.2s	v0, v0, #31
-# CHECK: ucvtf.4s	v0, v0, #30
-# CHECK: ucvtf.2d	v0, v0, #61
-# CHECK: uqrshrn.8b	v0, v0, #7
-# CHECK: uqrshrn2.16b	v0, v0, #6
-# CHECK: uqrshrn.4h	v0, v0, #13
-# CHECK: uqrshrn2.8h	v0, v0, #12
-# CHECK: uqrshrn.2s	v0, v0, #27
-# CHECK: uqrshrn2.4s	v0, v0, #26
-# CHECK: uqshl.8b	v0, v0, #1
-# CHECK: uqshl.16b	v0, v0, #2
-# CHECK: uqshl.4h	v0, v0, #3
-# CHECK: uqshl.8h	v0, v0, #4
-# CHECK: uqshl.2s	v0, v0, #5
-# CHECK: uqshl.4s	v0, v0, #6
-# CHECK: uqshl.2d	v0, v0, #7
-# CHECK: uqshrn.8b	v0, v0, #7
-# CHECK: uqshrn2.16b	v0, v0, #6
-# CHECK: uqshrn.4h	v0, v0, #13
-# CHECK: uqshrn2.8h	v0, v0, #12
-# CHECK: uqshrn.2s	v0, v0, #27
-# CHECK: uqshrn2.4s	v0, v0, #26
-# CHECK: urshr.8b	v0, v0, #7
-# CHECK: urshr.16b	v0, v0, #6
-# CHECK: urshr.4h	v0, v0, #13
-# CHECK: urshr.8h	v0, v0, #12
-# CHECK: urshr.2s	v0, v0, #27
-# CHECK: urshr.4s	v0, v0, #26
-# CHECK: urshr.2d	v0, v0, #57
-# CHECK: ursra.8b	v0, v0, #7
-# CHECK: ursra.16b	v0, v0, #6
-# CHECK: ursra.4h	v0, v0, #13
-# CHECK: ursra.8h	v0, v0, #12
-# CHECK: ursra.2s	v0, v0, #27
-# CHECK: ursra.4s	v0, v0, #26
-# CHECK: ursra.2d	v0, v0, #57
-# CHECK: ushll.8h	v0, v0, #1
-# CHECK: ushll2.8h	v0, v0, #2
-# CHECK: ushll.4s	v0, v0, #3
-# CHECK: ushll2.4s	v0, v0, #4
-# CHECK: ushll.2d	v0, v0, #5
-# CHECK: ushll2.2d	v0, v0, #6
-# CHECK: ushr.8b	v0, v0, #7
-# CHECK: ushr.16b	v0, v0, #6
-# CHECK: ushr.4h	v0, v0, #13
-# CHECK: ushr.8h	v0, v0, #12
-# CHECK: ushr.2s	v0, v0, #27
-# CHECK: ushr.4s	v0, v0, #26
-# CHECK: ushr.2d	v0, v0, #57
-# CHECK: usra.8b	v0, v0, #7
-# CHECK: usra.16b	v0, v0, #6
-# CHECK: usra.4h	v0, v0, #13
-# CHECK: usra.8h	v0, v0, #12
-# CHECK: usra.2s	v0, v0, #27
-# CHECK: usra.4s	v0, v0, #26
-# CHECK: usra.2d	v0, v0, #57
-
-
-  0x00 0xe0 0x20 0x0e
-  0x00 0xe0 0x20 0x4e
-  0x00 0xe0 0xe0 0x0e
-  0x00 0xe0 0xe0 0x4e
-
-# CHECK: pmull.8h v0, v0, v0
-# CHECK: pmull2.8h v0, v0, v0
-# CHECK: pmull.1q v0, v0, v0
-# CHECK: pmull2.1q v0, v0, v0
-
-  0x41 0xd8 0x70 0x7e
-  0x83 0xd8 0x30 0x7e
-# CHECK: faddp.2d	d1, v2
-# CHECK: faddp.2s	s3, v4
-
-  0x82 0x60 0x01 0x4e
-  0x80 0x60 0x01 0x0e
-  0xa2 0x00 0x01 0x4e
-  0xa0 0x00 0x01 0x0e
-  0xa2 0x40 0x01 0x4e
-  0xa0 0x40 0x01 0x0e
-  0xc2 0x20 0x01 0x4e
-  0xc0 0x20 0x01 0x0e
-
-# CHECK: tbl.16b	v2, { v4, v5, v6, v7 }, v1
-# CHECK: tbl.8b	v0, { v4, v5, v6, v7 }, v1
-# CHECK: tbl.16b	v2, { v5 }, v1
-# CHECK: tbl.8b	v0, { v5 }, v1
-# CHECK: tbl.16b	v2, { v5, v6, v7 }, v1
-# CHECK: tbl.8b	v0, { v5, v6, v7 }, v1
-# CHECK: tbl.16b	v2, { v6, v7 }, v1
-# CHECK: tbl.8b	v0, { v6, v7 }, v1
-#
-  0x82 0x70 0x01 0x4e
-  0x80 0x70 0x01 0x0e
-  0xa2 0x10 0x01 0x4e
-  0xa0 0x10 0x01 0x0e
-  0xa2 0x50 0x01 0x4e
-  0xa0 0x50 0x01 0x0e
-  0xc2 0x30 0x01 0x4e
-  0xc0 0x30 0x01 0x0e
-
-# CHECK: tbx.16b	v2, { v4, v5, v6, v7 }, v1
-# CHECK: tbx.8b	v0, { v4, v5, v6, v7 }, v1
-# CHECK: tbx.16b	v2, { v5 }, v1
-# CHECK: tbx.8b	v0, { v5 }, v1
-# CHECK: tbx.16b	v2, { v5, v6, v7 }, v1
-# CHECK: tbx.8b	v0, { v5, v6, v7 }, v1
-# CHECK: tbx.16b	v2, { v6, v7 }, v1
-# CHECK: tbx.8b	v0, { v6, v7 }, v1
-#
-
-0x00 0x80 0x20 0x0e
-0x00 0x80 0x20 0x4e
-0x00 0x80 0xa0 0x0e
-0x00 0x80 0xa0 0x4e
-
-# CHECK: smlal.8h v0, v0, v0
-# CHECK: smlal2.8h v0, v0, v0
-# CHECK: smlal.2d v0, v0, v0
-# CHECK: smlal2.2d v0, v0, v0
-
-0x00 0x80 0x20 0x2e
-0x00 0x80 0x20 0x6e
-0x00 0x80 0xa0 0x2e
-0x00 0x80 0xa0 0x6e
-
-# CHECK: umlal.8h v0, v0, v0
-# CHECK: umlal2.8h v0, v0, v0
-# CHECK: umlal.2d v0, v0, v0
-# CHECK: umlal2.2d v0, v0, v0
-
-0x00 0x90 0x60 0x5e
-0x00 0x90 0xa0 0x5e
-0x00 0xb0 0x60 0x5e
-0x00 0xb0 0xa0 0x5e
-
-# CHECK: sqdmlal s0, h0, h0
-# CHECK: sqdmlal d0, s0, s0
-# CHECK: sqdmlsl s0, h0, h0
-# CHECK: sqdmlsl d0, s0, s0
-
-0xaa 0xc5 0xc7 0x4d
-0xaa 0xc9 0xc7 0x4d
-0xaa 0xc1 0xc7 0x4d
-
-# CHECK: ld1r.8h { v10 }, [x13], x7
-# CHECK: ld1r.4s { v10 }, [x13], x7
-# CHECK: ld1r.16b { v10 }, [x13], x7
-
-0x00 0xd0 0x60 0x5e
-0x00 0xd0 0xa0 0x5e
-# CHECK: sqdmull	s0, h0, h0
-# CHECK: sqdmull	d0, s0, s0
-
-0x00 0xd8 0xa1 0x7e
-0x00 0xd8 0xe1 0x7e
-
-# CHECK: frsqrte s0, s0
-# CHECK: frsqrte d0, d0
-
-0xca 0xcd 0xc7 0x4d
-0xea 0xc9 0xe7 0x4d
-0xea 0xe9 0xc7 0x4d
-0xea 0xe9 0xe7 0x4d
-# CHECK: ld1r.2d	{ v10 }, [x14], x7
-# CHECK: ld2r.4s	{ v10, v11 }, [x15], x7
-# CHECK: ld3r.4s	{ v10, v11, v12 }, [x15], x7
-# CHECK: ld4r.4s	{ v10, v11, v12, v13 }, [x15], x7
-
-#===-------------------------------------------------------------------------===
-# AdvSIMD scalar three same
-#===-------------------------------------------------------------------------===
-0x62 0xdc 0x21 0x5e
-# CHECK: fmulx	s2, s3, s1
-0x62 0xdc 0x61 0x5e
-# CHECK: fmulx	d2, d3, d1
-
-
-# rdar://12511369
-0xe8 0x6b 0xdf 0x4c
-# CHECK: ld1.4s	{ v8, v9, v10 }, [sp], #48
diff --git a/test/MC/Disassembler/ARM64/arithmetic.txt b/test/MC/Disassembler/ARM64/arithmetic.txt
deleted file mode 100644
index 3981219..0000000
--- a/test/MC/Disassembler/ARM64/arithmetic.txt
+++ /dev/null
@@ -1,522 +0,0 @@
-# RUN: llvm-mc -triple arm64-apple-darwin --disassemble < %s | FileCheck %s
-
-#==---------------------------------------------------------------------------==
-# Add/Subtract with carry/borrow
-#==---------------------------------------------------------------------------==
-
-0x41 0x00 0x03 0x1a
-0x41 0x00 0x03 0x9a
-0x85 0x00 0x03 0x3a
-0x85 0x00 0x03 0xba
-
-# CHECK: adc  w1, w2, w3
-# CHECK: adc  x1, x2, x3
-# CHECK: adcs w5, w4, w3
-# CHECK: adcs x5, x4, x3
-
-0x41 0x00 0x03 0x5a
-0x41 0x00 0x03 0xda
-0x41 0x00 0x03 0x7a
-0x41 0x00 0x03 0xfa
-
-# CHECK: sbc  w1, w2, w3
-# CHECK: sbc  x1, x2, x3
-# CHECK: sbcs w1, w2, w3
-# CHECK: sbcs x1, x2, x3
-
-#==---------------------------------------------------------------------------==
-# Add/Subtract with (optionally shifted) immediate
-#==---------------------------------------------------------------------------==
-
-0x83 0x00 0x10 0x11
-0x83 0x00 0x10 0x91
-
-# CHECK: add w3, w4, #1024
-# CHECK: add x3, x4, #1024
-
-0x83 0x00 0x50 0x11
-0x83 0x00 0x40 0x11
-0x83 0x00 0x50 0x91
-0x83 0x00 0x40 0x91
-0xff 0x83 0x00 0x91
-
-# CHECK: add w3, w4, #4194304
-# CHECK: add x3, x4, #4194304
-# CHECK: add x3, x4, #0, lsl #12
-# CHECK: add sp, sp, #32
-
-0x83 0x00 0x10 0x31
-0x83 0x00 0x50 0x31
-0x83 0x00 0x10 0xb1
-0x83 0x00 0x50 0xb1
-
-# CHECK: adds w3, w4, #1024
-# CHECK: adds w3, w4, #4194304
-# CHECK: adds x3, x4, #1024
-# CHECK: adds x3, x4, #4194304
-
-0x83 0x00 0x10 0x51
-0x83 0x00 0x50 0x51
-0x83 0x00 0x10 0xd1
-0x83 0x00 0x50 0xd1
-0xff 0x83 0x00 0xd1
-
-# CHECK: sub w3, w4, #1024
-# CHECK: sub w3, w4, #4194304
-# CHECK: sub x3, x4, #1024
-# CHECK: sub x3, x4, #4194304
-# CHECK: sub sp, sp, #32
-
-0x83 0x00 0x10 0x71
-0x83 0x00 0x50 0x71
-0x83 0x00 0x10 0xf1
-0x83 0x00 0x50 0xf1
-
-# CHECK: subs w3, w4, #1024
-# CHECK: subs w3, w4, #4194304
-# CHECK: subs x3, x4, #1024
-# CHECK: subs x3, x4, #4194304
-
-#==---------------------------------------------------------------------------==
-# Add/Subtract register with (optional) shift
-#==---------------------------------------------------------------------------==
-
-0xac 0x01 0x0e 0x0b
-0xac 0x01 0x0e 0x8b
-0xac 0x31 0x0e 0x0b
-0xac 0x31 0x0e 0x8b
-0xac 0xa9 0x4e 0x0b
-0xac 0xa9 0x4e 0x8b
-0xac 0x9d 0x8e 0x0b
-0xac 0x9d 0x8e 0x8b
-
-# CHECK: add w12, w13, w14
-# CHECK: add x12, x13, x14
-# CHECK: add w12, w13, w14, lsl #12
-# CHECK: add x12, x13, x14, lsl #12
-# CHECK: add w12, w13, w14, lsr #42
-# CHECK: add x12, x13, x14, lsr #42
-# CHECK: add w12, w13, w14, asr #39
-# CHECK: add x12, x13, x14, asr #39
-
-0xac 0x01 0x0e 0x4b
-0xac 0x01 0x0e 0xcb
-0xac 0x31 0x0e 0x4b
-0xac 0x31 0x0e 0xcb
-0xac 0xa9 0x4e 0x4b
-0xac 0xa9 0x4e 0xcb
-0xac 0x9d 0x8e 0x4b
-0xac 0x9d 0x8e 0xcb
-
-# CHECK: sub w12, w13, w14
-# CHECK: sub x12, x13, x14
-# CHECK: sub w12, w13, w14, lsl #12
-# CHECK: sub x12, x13, x14, lsl #12
-# CHECK: sub w12, w13, w14, lsr #42
-# CHECK: sub x12, x13, x14, lsr #42
-# CHECK: sub w12, w13, w14, asr #39
-# CHECK: sub x12, x13, x14, asr #39
-
-0xac 0x01 0x0e 0x2b
-0xac 0x01 0x0e 0xab
-0xac 0x31 0x0e 0x2b
-0xac 0x31 0x0e 0xab
-0xac 0xa9 0x4e 0x2b
-0xac 0xa9 0x4e 0xab
-0xac 0x9d 0x8e 0x2b
-0xac 0x9d 0x8e 0xab
-
-# CHECK: adds w12, w13, w14
-# CHECK: adds x12, x13, x14
-# CHECK: adds w12, w13, w14, lsl #12
-# CHECK: adds x12, x13, x14, lsl #12
-# CHECK: adds w12, w13, w14, lsr #42
-# CHECK: adds x12, x13, x14, lsr #42
-# CHECK: adds w12, w13, w14, asr #39
-# CHECK: adds x12, x13, x14, asr #39
-
-0xac 0x01 0x0e 0x6b
-0xac 0x01 0x0e 0xeb
-0xac 0x31 0x0e 0x6b
-0xac 0x31 0x0e 0xeb
-0xac 0xa9 0x4e 0x6b
-0xac 0xa9 0x4e 0xeb
-0xac 0x9d 0x8e 0x6b
-0xac 0x9d 0x8e 0xeb
-
-# CHECK: subs w12, w13, w14
-# CHECK: subs x12, x13, x14
-# CHECK: subs w12, w13, w14, lsl #12
-# CHECK: subs x12, x13, x14, lsl #12
-# CHECK: subs w12, w13, w14, lsr #42
-# CHECK: subs x12, x13, x14, lsr #42
-# CHECK: subs w12, w13, w14, asr #39
-# CHECK: subs x12, x13, x14, asr #39
-
-#==---------------------------------------------------------------------------==
-# Add/Subtract with (optional) extend
-#==---------------------------------------------------------------------------==
-
-0x41 0x00 0x23 0x0b
-0x41 0x20 0x23 0x0b
-0x41 0x40 0x23 0x0b
-0x41 0x60 0x23 0x0b
-0x41 0x80 0x23 0x0b
-0x41 0xa0 0x23 0x0b
-0x41 0xc0 0x23 0x0b
-0x41 0xe0 0x23 0x0b
-
-# CHECK: add w1, w2, w3, uxtb
-# CHECK: add w1, w2, w3, uxth
-# CHECK: add w1, w2, w3, uxtw
-# CHECK: add w1, w2, w3, uxtx
-# CHECK: add w1, w2, w3, sxtb
-# CHECK: add w1, w2, w3, sxth
-# CHECK: add w1, w2, w3, sxtw
-# CHECK: add w1, w2, w3, sxtx
-
-0x41 0x00 0x23 0x8b
-0x41 0x20 0x23 0x8b
-0x41 0x40 0x23 0x8b
-0x41 0x80 0x23 0x8b
-0x41 0xa0 0x23 0x8b
-0x41 0xc0 0x23 0x8b
-
-# CHECK: add x1, x2, w3, uxtb
-# CHECK: add x1, x2, w3, uxth
-# CHECK: add x1, x2, w3, uxtw
-# CHECK: add x1, x2, w3, sxtb
-# CHECK: add x1, x2, w3, sxth
-# CHECK: add x1, x2, w3, sxtw
-
-0xe1 0x43 0x23 0x0b
-0xe1 0x43 0x23 0x0b
-0x5f 0x60 0x23 0x8b
-0x5f 0x60 0x23 0x8b
-
-# CHECK: add w1, wsp, w3
-# CHECK: add w1, wsp, w3
-# CHECK: add sp, x2, x3
-# CHECK: add sp, x2, x3
-
-0x41 0x00 0x23 0x4b
-0x41 0x20 0x23 0x4b
-0x41 0x40 0x23 0x4b
-0x41 0x60 0x23 0x4b
-0x41 0x80 0x23 0x4b
-0x41 0xa0 0x23 0x4b
-0x41 0xc0 0x23 0x4b
-0x41 0xe0 0x23 0x4b
-
-# CHECK: sub w1, w2, w3, uxtb
-# CHECK: sub w1, w2, w3, uxth
-# CHECK: sub w1, w2, w3, uxtw
-# CHECK: sub w1, w2, w3, uxtx
-# CHECK: sub w1, w2, w3, sxtb
-# CHECK: sub w1, w2, w3, sxth
-# CHECK: sub w1, w2, w3, sxtw
-# CHECK: sub w1, w2, w3, sxtx
-
-0x41 0x00 0x23 0xcb
-0x41 0x20 0x23 0xcb
-0x41 0x40 0x23 0xcb
-0x41 0x80 0x23 0xcb
-0x41 0xa0 0x23 0xcb
-0x41 0xc0 0x23 0xcb
-
-# CHECK: sub x1, x2, w3, uxtb
-# CHECK: sub x1, x2, w3, uxth
-# CHECK: sub x1, x2, w3, uxtw
-# CHECK: sub x1, x2, w3, sxtb
-# CHECK: sub x1, x2, w3, sxth
-# CHECK: sub x1, x2, w3, sxtw
-
-0xe1 0x43 0x23 0x4b
-0xe1 0x43 0x23 0x4b
-0x5f 0x60 0x23 0xcb
-0x5f 0x60 0x23 0xcb
-
-# CHECK: sub w1, wsp, w3
-# CHECK: sub w1, wsp, w3
-# CHECK: sub sp, x2, x3
-# CHECK: sub sp, x2, x3
-
-0x41 0x00 0x23 0x2b
-0x41 0x20 0x23 0x2b
-0x41 0x40 0x23 0x2b
-0x41 0x60 0x23 0x2b
-0x41 0x80 0x23 0x2b
-0x41 0xa0 0x23 0x2b
-0x41 0xc0 0x23 0x2b
-0x41 0xe0 0x23 0x2b
-
-# CHECK: adds w1, w2, w3, uxtb
-# CHECK: adds w1, w2, w3, uxth
-# CHECK: adds w1, w2, w3, uxtw
-# CHECK: adds w1, w2, w3, uxtx
-# CHECK: adds w1, w2, w3, sxtb
-# CHECK: adds w1, w2, w3, sxth
-# CHECK: adds w1, w2, w3, sxtw
-# CHECK: adds w1, w2, w3, sxtx
-
-0x41 0x00 0x23 0xab
-0x41 0x20 0x23 0xab
-0x41 0x40 0x23 0xab
-0x41 0x80 0x23 0xab
-0x41 0xa0 0x23 0xab
-0x41 0xc0 0x23 0xab
-
-# CHECK: adds x1, x2, w3, uxtb
-# CHECK: adds x1, x2, w3, uxth
-# CHECK: adds x1, x2, w3, uxtw
-# CHECK: adds x1, x2, w3, sxtb
-# CHECK: adds x1, x2, w3, sxth
-# CHECK: adds x1, x2, w3, sxtw
-
-0xe1 0x43 0x23 0x2b
-0xe1 0x43 0x23 0x2b
-
-# CHECK: adds w1, wsp, w3
-# CHECK: adds w1, wsp, w3
-
-0x41 0x00 0x23 0x6b
-0x41 0x20 0x23 0x6b
-0x41 0x40 0x23 0x6b
-0x41 0x60 0x23 0x6b
-0x41 0x80 0x23 0x6b
-0x41 0xa0 0x23 0x6b
-0x41 0xc0 0x23 0x6b
-0x41 0xe0 0x23 0x6b
-
-# CHECK: subs w1, w2, w3, uxtb
-# CHECK: subs w1, w2, w3, uxth
-# CHECK: subs w1, w2, w3, uxtw
-# CHECK: subs w1, w2, w3, uxtx
-# CHECK: subs w1, w2, w3, sxtb
-# CHECK: subs w1, w2, w3, sxth
-# CHECK: subs w1, w2, w3, sxtw
-# CHECK: subs w1, w2, w3, sxtx
-
-0x41 0x00 0x23 0xeb
-0x41 0x20 0x23 0xeb
-0x41 0x40 0x23 0xeb
-0x41 0x80 0x23 0xeb
-0x41 0xa0 0x23 0xeb
-0x41 0xc0 0x23 0xeb
-
-# CHECK: subs x1, x2, w3, uxtb
-# CHECK: subs x1, x2, w3, uxth
-# CHECK: subs x1, x2, w3, uxtw
-# CHECK: subs x1, x2, w3, sxtb
-# CHECK: subs x1, x2, w3, sxth
-# CHECK: subs x1, x2, w3, sxtw
-
-0xe1 0x43 0x23 0x6b
-0xe1 0x43 0x23 0x6b
-
-# CHECK: subs w1, wsp, w3
-# CHECK: subs w1, wsp, w3
-
-0x1f 0x41 0x28 0xeb
-0x3f 0x41 0x28 0x6b
-0xff 0x43 0x28 0x6b
-0xff 0x43 0x28 0xeb
-
-# CHECK: cmp x8, w8, uxtw
-# CHECK: cmp w9, w8, uxtw
-# CHECK: cmp wsp, w8
-# CHECK: cmp sp, w8
-
-0x3f 0x41 0x28 0x4b
-0xe1 0x43 0x28 0x4b
-0xff 0x43 0x28 0x4b
-0x3f 0x41 0x28 0xcb
-0xe1 0x43 0x28 0xcb
-0xff 0x43 0x28 0xcb
-0xe1 0x43 0x28 0x6b
-0xe1 0x43 0x28 0xeb
-
-# CHECK: sub wsp, w9, w8
-# CHECK: sub w1, wsp, w8
-# CHECK: sub wsp, wsp, w8
-# CHECK: sub sp, x9, w8
-# CHECK: sub x1, sp, w8
-# CHECK: sub sp, sp, w8
-# CHECK: subs w1, wsp, w8
-# CHECK: subs x1, sp, w8
-
-#==---------------------------------------------------------------------------==
-# Signed/Unsigned divide
-#==---------------------------------------------------------------------------==
-
-0x41 0x0c 0xc3 0x1a
-0x41 0x0c 0xc3 0x9a
-0x41 0x08 0xc3 0x1a
-0x41 0x08 0xc3 0x9a
-
-# CHECK: sdiv w1, w2, w3
-# CHECK: sdiv x1, x2, x3
-# CHECK: udiv w1, w2, w3
-# CHECK: udiv x1, x2, x3
-
-#==---------------------------------------------------------------------------==
-# Variable shifts
-#==---------------------------------------------------------------------------==
-
-  0x41 0x28 0xc3 0x1a
-# CHECK: asrv w1, w2, w3
-  0x41 0x28 0xc3 0x9a
-# CHECK: asrv x1, x2, x3
-  0x41 0x20 0xc3 0x1a
-# CHECK: lslv w1, w2, w3
-  0x41 0x20 0xc3 0x9a
-# CHECK: lslv x1, x2, x3
-  0x41 0x24 0xc3 0x1a
-# CHECK: lsrv w1, w2, w3
-  0x41 0x24 0xc3 0x9a
-# CHECK: lsrv x1, x2, x3
-  0x41 0x2c 0xc3 0x1a
-# CHECK: rorv w1, w2, w3
-  0x41 0x2c 0xc3 0x9a
-# CHECK: rorv x1, x2, x3
-
-#==---------------------------------------------------------------------------==
-# One operand instructions
-#==---------------------------------------------------------------------------==
-
-  0x41 0x14 0xc0 0x5a
-# CHECK: cls w1, w2
-  0x41 0x14 0xc0 0xda
-# CHECK: cls x1, x2
-  0x41 0x10 0xc0 0x5a
-# CHECK: clz w1, w2
-  0x41 0x10 0xc0 0xda
-# CHECK: clz x1, x2
-  0x41 0x00 0xc0 0x5a
-# CHECK: rbit w1, w2
-  0x41 0x00 0xc0 0xda
-# CHECK: rbit x1, x2
-  0x41 0x08 0xc0 0x5a
-# CHECK: rev w1, w2
-  0x41 0x0c 0xc0 0xda
-# CHECK: rev x1, x2
-  0x41 0x04 0xc0 0x5a
-# CHECK: rev16 w1, w2
-  0x41 0x04 0xc0 0xda
-# CHECK: rev16 x1, x2
-  0x41 0x08 0xc0 0xda
-# CHECK: rev32 x1, x2
-
-#==---------------------------------------------------------------------------==
-# 6.6.1 Multiply-add instructions
-#==---------------------------------------------------------------------------==
-
-0x41 0x10 0x03 0x1b
-0x41 0x10 0x03 0x9b
-0x41 0x90 0x03 0x1b
-0x41 0x90 0x03 0x9b
-0x41 0x10 0x23 0x9b
-0x41 0x90 0x23 0x9b
-0x41 0x10 0xa3 0x9b
-0x41 0x90 0xa3 0x9b
-
-# CHECK: madd   w1, w2, w3, w4
-# CHECK: madd   x1, x2, x3, x4
-# CHECK: msub   w1, w2, w3, w4
-# CHECK: msub   x1, x2, x3, x4
-# CHECK: smaddl x1, w2, w3, x4
-# CHECK: smsubl x1, w2, w3, x4
-# CHECK: umaddl x1, w2, w3, x4
-# CHECK: umsubl x1, w2, w3, x4
-
-#==---------------------------------------------------------------------------==
-# Multiply-high instructions
-#==---------------------------------------------------------------------------==
-
-0x41 0x7c 0x43 0x9b
-0x41 0x7c 0xc3 0x9b
-
-# CHECK: smulh x1, x2, x3
-# CHECK: umulh x1, x2, x3
-
-#==---------------------------------------------------------------------------==
-# Move immediate instructions
-#==---------------------------------------------------------------------------==
-
-0x20 0x00 0x80 0x52
-0x20 0x00 0x80 0xd2
-0x20 0x00 0xa0 0x52
-0x20 0x00 0xa0 0xd2
-
-# CHECK: movz w0, #1
-# CHECK: movz x0, #1
-# CHECK: movz w0, #1, lsl #16
-# CHECK: movz x0, #1, lsl #16
-
-0x40 0x00 0x80 0x12
-0x40 0x00 0x80 0x92
-0x40 0x00 0xa0 0x12
-0x40 0x00 0xa0 0x92
-
-# CHECK: movn w0, #2
-# CHECK: movn x0, #2
-# CHECK: movn w0, #2, lsl #16
-# CHECK: movn x0, #2, lsl #16
-
-0x20 0x00 0x80 0x72
-0x20 0x00 0x80 0xf2
-0x20 0x00 0xa0 0x72
-0x20 0x00 0xa0 0xf2
-
-# CHECK: movk w0, #1
-# CHECK: movk x0, #1
-# CHECK: movk w0, #1, lsl #16
-# CHECK: movk x0, #1, lsl #16
-
-#==---------------------------------------------------------------------------==
-# Conditionally set flags instructions
-#==---------------------------------------------------------------------------==
-
-  0x1f 0x00 0x00 0x31
-# CHECK: cmn w0, #0
-  0x1f 0xfc 0x03 0xb1
-# CHECK: x0, #255
-
-  0x23 0x08 0x42 0x3a
-# CHECK: ccmn w1, #2, #3, eq
-  0x23 0x08 0x42 0xba
-# CHECK: ccmn x1, #2, #3, eq
-  0x23 0x08 0x42 0x7a
-# CHECK: ccmp w1, #2, #3, eq
-  0x23 0x08 0x42 0xfa
-# CHECK: ccmp x1, #2, #3, eq
-
-  0x23 0x00 0x42 0x3a
-# CHECK: ccmn w1, w2, #3, eq
-  0x23 0x00 0x42 0xba
-# CHECK: ccmn x1, x2, #3, eq
-  0x23 0x00 0x42 0x7a
-# CHECK: ccmp w1, w2, #3, eq
-  0x23 0x00 0x42 0xfa
-# CHECK: ccmp x1, x2, #3, eq
-
-#==---------------------------------------------------------------------------==
-# Conditional select instructions
-#==---------------------------------------------------------------------------==
-
-  0x41 0x00 0x83 0x1a
-# CHECK: csel w1, w2, w3, eq
-  0x41 0x00 0x83 0x9a
-# CHECK: csel x1, x2, x3, eq
-  0x41 0x04 0x83 0x1a
-# CHECK: csinc w1, w2, w3, eq
-  0x41 0x04 0x83 0x9a
-# CHECK: csinc x1, x2, x3, eq
-  0x41 0x00 0x83 0x5a
-# CHECK: csinv w1, w2, w3, eq
-  0x41 0x00 0x83 0xda
-# CHECK: csinv x1, x2, x3, eq
-  0x41 0x04 0x83 0x5a
-# CHECK: csneg w1, w2, w3, eq
-  0x41 0x04 0x83 0xda
-# CHECK: csneg x1, x2, x3, eq
diff --git a/test/MC/Disassembler/ARM64/bitfield.txt b/test/MC/Disassembler/ARM64/bitfield.txt
deleted file mode 100644
index 99e7af1..0000000
--- a/test/MC/Disassembler/ARM64/bitfield.txt
+++ /dev/null
@@ -1,29 +0,0 @@
-# RUN: llvm-mc -triple arm64-apple-darwin --disassemble < %s | FileCheck %s
-
-#==---------------------------------------------------------------------------==
-# 5.4.4 Bitfield Operations
-#==---------------------------------------------------------------------------==
-
-0x41 0x3c 0x01 0x33
-0x41 0x3c 0x41 0xb3
-0x41 0x3c 0x01 0x13
-0x41 0x3c 0x41 0x93
-0x41 0x3c 0x01 0x53
-0x41 0x3c 0x41 0xd3
-
-# CHECK: bfm  w1, w2, #1, #15
-# CHECK: bfm  x1, x2, #1, #15
-# CHECK: sbfm w1, w2, #1, #15
-# CHECK: sbfm x1, x2, #1, #15
-# CHECK: ubfm w1, w2, #1, #15
-# CHECK: ubfm x1, x2, #1, #15
-
-#==---------------------------------------------------------------------------==
-# 5.4.5 Extract (immediate)
-#==---------------------------------------------------------------------------==
-
-0x41 0x3c 0x83 0x13
-0x62 0x04 0xc4 0x93
-
-# CHECK: extr w1, w2, w3, #15
-# CHECK: extr x2, x3, x4, #1
diff --git a/test/MC/Disassembler/ARM64/branch.txt b/test/MC/Disassembler/ARM64/branch.txt
deleted file mode 100644
index c5b254b..0000000
--- a/test/MC/Disassembler/ARM64/branch.txt
+++ /dev/null
@@ -1,75 +0,0 @@
-# RUN: llvm-mc -triple arm64-apple-darwin --disassemble < %s | FileCheck %s
-
-#-----------------------------------------------------------------------------
-# Unconditional branch (register) instructions.
-#-----------------------------------------------------------------------------
-
-  0xc0 0x03 0x5f 0xd6
-# CHECK: ret
-  0x20 0x00 0x5f 0xd6
-# CHECK: ret x1
-  0xe0 0x03 0xbf 0xd6
-# CHECK: drps
-  0xe0 0x03 0x9f 0xd6
-# CHECK: eret
-  0xa0 0x00 0x1f 0xd6
-# CHECK: br  x5
-  0x20 0x01 0x3f 0xd6
-# CHECK: blr x9
-  0x0B 0x00 0x18 0x37
-# CHECK: tbnz	w11, #3, #0
-
-#-----------------------------------------------------------------------------
-# Exception generation instructions.
-#-----------------------------------------------------------------------------
-
-  0x20 0x00 0x20 0xd4
-# CHECK: brk   #1
-  0x41 0x00 0xa0 0xd4
-# CHECK: dcps1 #2
-  0x62 0x00 0xa0 0xd4
-# CHECK: dcps2 #3
-  0x83 0x00 0xa0 0xd4
-# CHECK: dcps3 #4
-  0xa0 0x00 0x40 0xd4
-# CHECK: hlt   #5
-  0xc2 0x00 0x00 0xd4
-# CHECK: hvc   #6
-  0xe3 0x00 0x00 0xd4
-# CHECK: smc   #7
-  0x01 0x01 0x00 0xd4
-# CHECK: svc   #8
-
-#-----------------------------------------------------------------------------
-# PC-relative branches (both positive and negative displacement)
-#-----------------------------------------------------------------------------
-
-  0x07 0x00 0x00 0x14
-# CHECK: b #28
-  0x06 0x00 0x00 0x94
-# CHECK: bl #24
-  0xa1 0x00 0x00 0x54
-# CHECK: b.ne #20
-  0x80 0x00 0x08 0x36
-# CHECK: tbz w0, #1, #16
-  0xe1 0xff 0xf7 0x36
-# CHECK: tbz w1, #30, #-4
-  0x60 0x00 0x08 0x37
-# CHECK: tbnz w0, #1, #12
-  0x40 0x00 0x00 0xb4
-# CHECK: cbz x0, #8
-  0x20 0x00 0x00 0xb5
-# CHECK: cbnz x0, #4
-  0x1f 0x20 0x03 0xd5
-# CHECK: nop
-  0xff 0xff 0xff 0x17
-# CHECK: b #-4
-  0xc1 0xff 0xff 0x54
-# CHECK: b.ne #-8
-  0xa0 0xff 0x0f 0x36
-# CHECK: tbz w0, #1, #-12
-  0x80 0xff 0xff 0xb4
-# CHECK: cbz x0, #-16
-  0x1f 0x20 0x03 0xd5
-# CHECK: nop
-
diff --git a/test/MC/Disassembler/ARM64/crc32.txt b/test/MC/Disassembler/ARM64/crc32.txt
deleted file mode 100644
index ef0a26e..0000000
--- a/test/MC/Disassembler/ARM64/crc32.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-# RUN: llvm-mc -triple=arm64 -disassemble < %s | FileCheck %s
-
-# CHECK: crc32b  w5, w7, w20
-# CHECK: crc32h  w28, wzr, w30
-# CHECK: crc32w  w0, w1, w2
-# CHECK: crc32x  w7, w9, x20
-# CHECK: crc32cb w9, w5, w4
-# CHECK: crc32ch w13, w17, w25
-# CHECK: crc32cw wzr, w3, w5
-# CHECK: crc32cx w18, w16, xzr
-0xe5 0x40 0xd4 0x1a
-0xfc 0x47 0xde 0x1a
-0x20 0x48 0xc2 0x1a
-0x27 0x4d 0xd4 0x9a
-0xa9 0x50 0xc4 0x1a
-0x2d 0x56 0xd9 0x1a
-0x7f 0x58 0xc5 0x1a
-0x12 0x5e 0xdf 0x9a
diff --git a/test/MC/Disassembler/ARM64/crypto.txt b/test/MC/Disassembler/ARM64/crypto.txt
deleted file mode 100644
index e163b2c..0000000
--- a/test/MC/Disassembler/ARM64/crypto.txt
+++ /dev/null
@@ -1,47 +0,0 @@
-# RUN: llvm-mc -triple arm64-apple-darwin --disassemble < %s | FileCheck %s
-# RUN: llvm-mc -triple arm64-apple-darwin -output-asm-variant=1 --disassemble < %s | FileCheck %s --check-prefix=CHECK-APPLE
-
-  0x20 0x48 0x28 0x4e
-  0x20 0x58 0x28 0x4e
-  0x20 0x68 0x28 0x4e
-  0x20 0x78 0x28 0x4e
-  0x20 0x00 0x02 0x5e
-  0x20 0x10 0x02 0x5e
-  0x20 0x20 0x02 0x5e
-  0x20 0x30 0x02 0x5e
-  0x20 0x40 0x02 0x5e
-  0x20 0x50 0x02 0x5e
-  0x20 0x60 0x02 0x5e
-  0x20 0x08 0x28 0x5e
-  0x20 0x18 0x28 0x5e
-  0x20 0x28 0x28 0x5e
-
-# CHECK: aese v0.16b, v1.16b
-# CHECK: aesd v0.16b, v1.16b
-# CHECK: aesmc v0.16b, v1.16b
-# CHECK: aesimc v0.16b, v1.16b
-# CHECK: sha1c q0, s1, v2.4s
-# CHECK: sha1p q0, s1, v2.4s
-# CHECK: sha1m q0, s1, v2.4s
-# CHECK: sha1su0 v0.4s, v1.4s, v2
-# CHECK: sha256h q0, q1, v2.4s
-# CHECK: sha256h2 q0, q1, v2.4s
-# CHECK: sha256su1 v0.4s, v1.4s, v2.4s
-# CHECK: sha1h s0, s1
-# CHECK: sha1su1 v0.4s, v1.4s
-# CHECK: sha256su0 v0.4s, v1.4s
-
-# CHECK-APPLE: aese.16b v0, v1
-# CHECK-APPLE: aesd.16b v0, v1
-# CHECK-APPLE: aesmc.16b v0, v1
-# CHECK-APPLE: aesimc.16b v0, v1
-# CHECK-APPLE: sha1c.4s q0, s1, v2
-# CHECK-APPLE: sha1p.4s q0, s1, v2
-# CHECK-APPLE: sha1m.4s q0, s1, v2
-# CHECK-APPLE: sha1su0.4s v0, v1, v2
-# CHECK-APPLE: sha256h.4s q0, q1, v2
-# CHECK-APPLE: sha256h2.4s q0, q1, v2
-# CHECK-APPLE: sha256su1.4s v0, v1, v2
-# CHECK-APPLE: sha1h s0, s1
-# CHECK-APPLE: sha1su1.4s v0, v1
-# CHECK-APPLE: sha256su0.4s v0, v1
diff --git a/test/MC/Disassembler/ARM64/lit.local.cfg b/test/MC/Disassembler/ARM64/lit.local.cfg
deleted file mode 100644
index 46a9468..0000000
--- a/test/MC/Disassembler/ARM64/lit.local.cfg
+++ /dev/null
@@ -1,5 +0,0 @@
-config.suffixes = ['.txt']
-
-targets = set(config.root.targets_to_build.split())
-if not 'ARM64' in targets:
-    config.unsupported = True
diff --git a/test/MC/Disassembler/ARM64/logical.txt b/test/MC/Disassembler/ARM64/logical.txt
deleted file mode 100644
index 29db8cb..0000000
--- a/test/MC/Disassembler/ARM64/logical.txt
+++ /dev/null
@@ -1,217 +0,0 @@
-# RUN: llvm-mc -triple arm64-apple-darwin --disassemble < %s | FileCheck %s
-
-#==---------------------------------------------------------------------------==
-# 5.4.2 Logical (immediate)
-#==---------------------------------------------------------------------------==
-
-0x00 0x00 0x00 0x12
-0x00 0x00 0x40 0x92
-0x41 0x0c 0x00 0x12
-0x41 0x0c 0x40 0x92
-0xbf 0xec 0x7c 0x92
-0x00 0x00 0x00 0x72
-0x00 0x00 0x40 0xf2
-0x41 0x0c 0x00 0x72
-0x41 0x0c 0x40 0xf2
-
-# CHECK: and  w0, w0, #0x1
-# CHECK: and  x0, x0, #0x1
-# CHECK: and  w1, w2, #0xf
-# CHECK: and  x1, x2, #0xf
-# CHECK: and  sp, x5, #0xfffffffffffffff0
-# CHECK: ands w0, w0, #0x1
-# CHECK: ands x0, x0, #0x1
-# CHECK: ands w1, w2, #0xf
-# CHECK: ands x1, x2, #0xf
-
-0x41 0x00 0x12 0x52
-0x41 0x00 0x71 0xd2
-
-# CHECK: eor w1, w2, #0x4000
-# CHECK: eor x1, x2, #0x8000
-
-0x41 0x00 0x12 0x32
-0x41 0x00 0x71 0xb2
-
-# CHECK: orr w1, w2, #0x4000
-# CHECK: orr x1, x2, #0x8000
-
-#==---------------------------------------------------------------------------==
-# 5.5.3 Logical (shifted register)
-#==---------------------------------------------------------------------------==
-
-0x41 0x00 0x03 0x0a
-0x41 0x00 0x03 0x8a
-0x41 0x08 0x03 0x0a
-0x41 0x08 0x03 0x8a
-0x41 0x08 0x43 0x0a
-0x41 0x08 0x43 0x8a
-0x41 0x08 0x83 0x0a
-0x41 0x08 0x83 0x8a
-0x41 0x08 0xc3 0x0a
-0x41 0x08 0xc3 0x8a
-
-# CHECK: and  w1, w2, w3
-# CHECK: and  x1, x2, x3
-# CHECK: and  w1, w2, w3, lsl #2
-# CHECK: and  x1, x2, x3, lsl #2
-# CHECK: and  w1, w2, w3, lsr #2
-# CHECK: and  x1, x2, x3, lsr #2
-# CHECK: and  w1, w2, w3, asr #2
-# CHECK: and  x1, x2, x3, asr #2
-# CHECK: and  w1, w2, w3, ror #2
-# CHECK: and  x1, x2, x3, ror #2
-
-0x41 0x00 0x03 0x6a
-0x41 0x00 0x03 0xea
-0x41 0x08 0x03 0x6a
-0x41 0x08 0x03 0xea
-0x41 0x08 0x43 0x6a
-0x41 0x08 0x43 0xea
-0x41 0x08 0x83 0x6a
-0x41 0x08 0x83 0xea
-0x41 0x08 0xc3 0x6a
-0x41 0x08 0xc3 0xea
-
-# CHECK: ands w1, w2, w3
-# CHECK: ands x1, x2, x3
-# CHECK: ands w1, w2, w3, lsl #2
-# CHECK: ands x1, x2, x3, lsl #2
-# CHECK: ands w1, w2, w3, lsr #2
-# CHECK: ands x1, x2, x3, lsr #2
-# CHECK: ands w1, w2, w3, asr #2
-# CHECK: ands x1, x2, x3, asr #2
-# CHECK: ands w1, w2, w3, ror #2
-# CHECK: ands x1, x2, x3, ror #2
-
-0x41 0x00 0x23 0x0a
-0x41 0x00 0x23 0x8a
-0x41 0x0c 0x23 0x0a
-0x41 0x0c 0x23 0x8a
-0x41 0x0c 0x63 0x0a
-0x41 0x0c 0x63 0x8a
-0x41 0x0c 0xa3 0x0a
-0x41 0x0c 0xa3 0x8a
-0x41 0x0c 0xe3 0x0a
-0x41 0x0c 0xe3 0x8a
-
-# CHECK: bic w1, w2, w3
-# CHECK: bic x1, x2, x3
-# CHECK: bic w1, w2, w3, lsl #3
-# CHECK: bic x1, x2, x3, lsl #3
-# CHECK: bic w1, w2, w3, lsr #3
-# CHECK: bic x1, x2, x3, lsr #3
-# CHECK: bic w1, w2, w3, asr #3
-# CHECK: bic x1, x2, x3, asr #3
-# CHECK: bic w1, w2, w3, ror #3
-# CHECK: bic x1, x2, x3, ror #3
-
-0x41 0x00 0x23 0x6a
-0x41 0x00 0x23 0xea
-0x41 0x0c 0x23 0x6a
-0x41 0x0c 0x23 0xea
-0x41 0x0c 0x63 0x6a
-0x41 0x0c 0x63 0xea
-0x41 0x0c 0xa3 0x6a
-0x41 0x0c 0xa3 0xea
-0x41 0x0c 0xe3 0x6a
-0x41 0x0c 0xe3 0xea
-
-# CHECK: bics w1, w2, w3
-# CHECK: bics x1, x2, x3
-# CHECK: bics w1, w2, w3, lsl #3
-# CHECK: bics x1, x2, x3, lsl #3
-# CHECK: bics w1, w2, w3, lsr #3
-# CHECK: bics x1, x2, x3, lsr #3
-# CHECK: bics w1, w2, w3, asr #3
-# CHECK: bics x1, x2, x3, asr #3
-# CHECK: bics w1, w2, w3, ror #3
-# CHECK: bics x1, x2, x3, ror #3
-
-0x41 0x00 0x23 0x4a
-0x41 0x00 0x23 0xca
-0x41 0x10 0x23 0x4a
-0x41 0x10 0x23 0xca
-0x41 0x10 0x63 0x4a
-0x41 0x10 0x63 0xca
-0x41 0x10 0xa3 0x4a
-0x41 0x10 0xa3 0xca
-0x41 0x10 0xe3 0x4a
-0x41 0x10 0xe3 0xca
-
-# CHECK: eon w1, w2, w3
-# CHECK: eon x1, x2, x3
-# CHECK: eon w1, w2, w3, lsl #4
-# CHECK: eon x1, x2, x3, lsl #4
-# CHECK: eon w1, w2, w3, lsr #4
-# CHECK: eon x1, x2, x3, lsr #4
-# CHECK: eon w1, w2, w3, asr #4
-# CHECK: eon x1, x2, x3, asr #4
-# CHECK: eon w1, w2, w3, ror #4
-# CHECK: eon x1, x2, x3, ror #4
-
-0x41 0x00 0x03 0x4a
-0x41 0x00 0x03 0xca
-0x41 0x14 0x03 0x4a
-0x41 0x14 0x03 0xca
-0x41 0x14 0x43 0x4a
-0x41 0x14 0x43 0xca
-0x41 0x14 0x83 0x4a
-0x41 0x14 0x83 0xca
-0x41 0x14 0xc3 0x4a
-0x41 0x14 0xc3 0xca
-
-# CHECK: eor w1, w2, w3
-# CHECK: eor x1, x2, x3
-# CHECK: eor w1, w2, w3, lsl #5
-# CHECK: eor x1, x2, x3, lsl #5
-# CHECK: eor w1, w2, w3, lsr #5
-# CHECK: eor x1, x2, x3, lsr #5
-# CHECK: eor w1, w2, w3, asr #5
-# CHECK: eor x1, x2, x3, asr #5
-# CHECK: eor w1, w2, w3, ror #5
-# CHECK: eor x1, x2, x3, ror #5
-
-0x41 0x00 0x03 0x2a
-0x41 0x00 0x03 0xaa
-0x41 0x18 0x03 0x2a
-0x41 0x18 0x03 0xaa
-0x41 0x18 0x43 0x2a
-0x41 0x18 0x43 0xaa
-0x41 0x18 0x83 0x2a
-0x41 0x18 0x83 0xaa
-0x41 0x18 0xc3 0x2a
-0x41 0x18 0xc3 0xaa
-
-# CHECK: orr w1, w2, w3
-# CHECK: orr x1, x2, x3
-# CHECK: orr w1, w2, w3, lsl #6
-# CHECK: orr x1, x2, x3, lsl #6
-# CHECK: orr w1, w2, w3, lsr #6
-# CHECK: orr x1, x2, x3, lsr #6
-# CHECK: orr w1, w2, w3, asr #6
-# CHECK: orr x1, x2, x3, asr #6
-# CHECK: orr w1, w2, w3, ror #6
-# CHECK: orr x1, x2, x3, ror #6
-
-0x41 0x00 0x23 0x2a
-0x41 0x00 0x23 0xaa
-0x41 0x1c 0x23 0x2a
-0x41 0x1c 0x23 0xaa
-0x41 0x1c 0x63 0x2a
-0x41 0x1c 0x63 0xaa
-0x41 0x1c 0xa3 0x2a
-0x41 0x1c 0xa3 0xaa
-0x41 0x1c 0xe3 0x2a
-0x41 0x1c 0xe3 0xaa
-
-# CHECK: orn w1, w2, w3
-# CHECK: orn x1, x2, x3
-# CHECK: orn w1, w2, w3, lsl #7
-# CHECK: orn x1, x2, x3, lsl #7
-# CHECK: orn w1, w2, w3, lsr #7
-# CHECK: orn x1, x2, x3, lsr #7
-# CHECK: orn w1, w2, w3, asr #7
-# CHECK: orn x1, x2, x3, asr #7
-# CHECK: orn w1, w2, w3, ror #7
-# CHECK: orn x1, x2, x3, ror #7
diff --git a/test/MC/Disassembler/ARM64/memory.txt b/test/MC/Disassembler/ARM64/memory.txt
deleted file mode 100644
index 031bfa6..0000000
--- a/test/MC/Disassembler/ARM64/memory.txt
+++ /dev/null
@@ -1,558 +0,0 @@
-# RUN: llvm-mc --disassemble -triple arm64-apple-darwin < %s | FileCheck %s
-
-#-----------------------------------------------------------------------------
-# Indexed loads
-#-----------------------------------------------------------------------------
-
-  0x85 0x14 0x40 0xb9
-  0x64 0x00 0x40 0xf9
-  0xe2 0x13 0x40 0xf9
-  0xe5 0x07 0x40 0x3d
-  0xe6 0x07 0x40 0x7d
-  0xe7 0x07 0x40 0xbd
-  0xe8 0x07 0x40 0xfd
-  0xe9 0x07 0xc0 0x3d
-  0x64 0x00 0x40 0x39
-  0x20 0x78 0xa0 0xb8
-  0x85 0x50 0x40 0x39
-
-# CHECK: ldr	w5, [x4, #20]
-# CHECK: ldr	x4, [x3]
-# CHECK: ldr	x2, [sp, #32]
-# CHECK: ldr	b5, [sp, #1]
-# CHECK: ldr	h6, [sp, #2]
-# CHECK: ldr	s7, [sp, #4]
-# CHECK: ldr	d8, [sp, #8]
-# CHECK: ldr	q9, [sp, #16]
-# CHECK: ldrb	w4, [x3]
-# CHECK: ldrsw	x0, [x1, x0, lsl #2]
-# CHECK: ldrb	w5, [x4, #20]
-# CHECK: ldrsb	w9, [x3]
-# CHECK: ldrsb	x2, [sp, #128]
-# CHECK: ldrh	w2, [sp, #32]
-# CHECK: ldrsh	w3, [sp, #32]
-# CHECK: ldrsh	x5, [x9, #24]
-# CHECK: ldrsw	x9, [sp, #512]
-# CHECK: prfm	pldl3strm, [sp, #32]
-
-  0x69 0x00 0xc0 0x39
-  0xe2 0x03 0x82 0x39
-  0xe2 0x43 0x40 0x79
-  0xe3 0x43 0xc0 0x79
-  0x25 0x31 0x80 0x79
-  0xe9 0x03 0x82 0xb9
-  0xe5 0x13 0x80 0xf9
-  0x40 0x00 0x80 0xf9
-  0x41 0x00 0x80 0xf9
-  0x42 0x00 0x80 0xf9
-  0x43 0x00 0x80 0xf9
-  0x44 0x00 0x80 0xf9
-  0x45 0x00 0x80 0xf9
-  0x50 0x00 0x80 0xf9
-  0x51 0x00 0x80 0xf9
-  0x52 0x00 0x80 0xf9
-  0x53 0x00 0x80 0xf9
-  0x54 0x00 0x80 0xf9
-  0x55 0x00 0x80 0xf9
-
-# CHECK: prfm	pldl1keep, [x2]
-# CHECK: prfm	pldl1strm, [x2]
-# CHECK: prfm	pldl2keep, [x2]
-# CHECK: prfm	pldl2strm, [x2]
-# CHECK: prfm	pldl3keep, [x2]
-# CHECK: prfm	pldl3strm, [x2]
-# CHECK: prfm	pstl1keep, [x2]
-# CHECK: prfm	pstl1strm, [x2]
-# CHECK: prfm	pstl2keep, [x2]
-# CHECK: prfm	pstl2strm, [x2]
-# CHECK: prfm	pstl3keep, [x2]
-# CHECK: prfm	pstl3strm, [x2]
-
-#-----------------------------------------------------------------------------
-# Indexed stores
-#-----------------------------------------------------------------------------
-
-  0x64 0x00 0x00 0xf9
-  0xe2 0x13 0x00 0xf9
-  0x85 0x14 0x00 0xb9
-  0xe5 0x07 0x00 0x3d
-  0xe6 0x07 0x00 0x7d
-  0xe7 0x07 0x00 0xbd
-  0xe8 0x07 0x00 0xfd
-  0xe9 0x07 0x80 0x3d
-  0x64 0x00 0x00 0x39
-  0x85 0x50 0x00 0x39
-  0xe2 0x43 0x00 0x79
-
-# CHECK: str	x4, [x3]
-# CHECK: str	x2, [sp, #32]
-# CHECK: str	w5, [x4, #20]
-# CHECK: str	b5, [sp, #1]
-# CHECK: str	h6, [sp, #2]
-# CHECK: str	s7, [sp, #4]
-# CHECK: str	d8, [sp, #8]
-# CHECK: str	q9, [sp, #16]
-# CHECK: strb	w4, [x3]
-# CHECK: strb	w5, [x4, #20]
-# CHECK: strh	w2, [sp, #32]
-
-#-----------------------------------------------------------------------------
-# Unscaled immediate loads and stores
-#-----------------------------------------------------------------------------
-
-  0x62 0x00 0x40 0xb8
-  0xe2 0x83 0x41 0xb8
-  0x62 0x00 0x40 0xf8
-  0xe2 0x83 0x41 0xf8
-  0xe5 0x13 0x40 0x3c
-  0xe6 0x23 0x40 0x7c
-  0xe7 0x43 0x40 0xbc
-  0xe8 0x83 0x40 0xfc
-  0xe9 0x03 0xc1 0x3c
-  0x69 0x00 0xc0 0x38
-  0xe2 0x03 0x88 0x38
-  0xe3 0x03 0xc2 0x78
-  0x25 0x81 0x81 0x78
-  0xe9 0x03 0x98 0xb8
-
-# CHECK: ldur	w2, [x3]
-# CHECK: ldur	w2, [sp, #24]
-# CHECK: ldur	x2, [x3]
-# CHECK: ldur	x2, [sp, #24]
-# CHECK: ldur	b5, [sp, #1]
-# CHECK: ldur	h6, [sp, #2]
-# CHECK: ldur	s7, [sp, #4]
-# CHECK: ldur	d8, [sp, #8]
-# CHECK: ldur	q9, [sp, #16]
-# CHECK: ldursb	w9, [x3]
-# CHECK: ldursb	x2, [sp, #128]
-# CHECK: ldursh	w3, [sp, #32]
-# CHECK: ldursh	x5, [x9, #24]
-# CHECK: ldursw	x9, [sp, #-128]
-
-  0x64 0x00 0x00 0xb8
-  0xe2 0x03 0x02 0xb8
-  0x64 0x00 0x00 0xf8
-  0xe2 0x03 0x02 0xf8
-  0x85 0x40 0x01 0xb8
-  0xe5 0x13 0x00 0x3c
-  0xe6 0x23 0x00 0x7c
-  0xe7 0x43 0x00 0xbc
-  0xe8 0x83 0x00 0xfc
-  0xe9 0x03 0x81 0x3c
-  0x64 0x00 0x00 0x38
-  0x85 0x40 0x01 0x38
-  0xe2 0x03 0x02 0x78
-  0xe5 0x03 0x82 0xf8
-
-# CHECK: stur	w4, [x3]
-# CHECK: stur	w2, [sp, #32]
-# CHECK: stur	x4, [x3]
-# CHECK: stur	x2, [sp, #32]
-# CHECK: stur	w5, [x4, #20]
-# CHECK: stur	b5, [sp, #1]
-# CHECK: stur	h6, [sp, #2]
-# CHECK: stur	s7, [sp, #4]
-# CHECK: stur	d8, [sp, #8]
-# CHECK: stur	q9, [sp, #16]
-# CHECK: sturb	w4, [x3]
-# CHECK: sturb	w5, [x4, #20]
-# CHECK: sturh	w2, [sp, #32]
-# CHECK: prfum	pldl3strm, [sp, #32]
-
-#-----------------------------------------------------------------------------
-# Unprivileged loads and stores
-#-----------------------------------------------------------------------------
-
-  0x83 0x08 0x41 0xb8
-  0x83 0x08 0x41 0xf8
-  0x83 0x08 0x41 0x38
-  0x69 0x08 0xc0 0x38
-  0xe2 0x0b 0x88 0x38
-  0x83 0x08 0x41 0x78
-  0xe3 0x0b 0xc2 0x78
-  0x25 0x89 0x81 0x78
-  0xe9 0x0b 0x98 0xb8
-
-# CHECK: ldtr	w3, [x4, #16]
-# CHECK: ldtr	x3, [x4, #16]
-# CHECK: ldtrb	w3, [x4, #16]
-# CHECK: ldtrsb	w9, [x3]
-# CHECK: ldtrsb	x2, [sp, #128]
-# CHECK: ldtrh	w3, [x4, #16]
-# CHECK: ldtrsh	w3, [sp, #32]
-# CHECK: ldtrsh	x5, [x9, #24]
-# CHECK: ldtrsw	x9, [sp, #-128]
-
-  0x85 0x48 0x01 0xb8
-  0x64 0x08 0x00 0xf8
-  0xe2 0x0b 0x02 0xf8
-  0x64 0x08 0x00 0x38
-  0x85 0x48 0x01 0x38
-  0xe2 0x0b 0x02 0x78
-
-# CHECK: sttr	w5, [x4, #20]
-# CHECK: sttr	x4, [x3]
-# CHECK: sttr	x2, [sp, #32]
-# CHECK: sttrb	w4, [x3]
-# CHECK: sttrb	w5, [x4, #20]
-# CHECK: sttrh	w2, [sp, #32]
-
-#-----------------------------------------------------------------------------
-# Pre-indexed loads and stores
-#-----------------------------------------------------------------------------
-
-  0xfd 0x8c 0x40 0xf8
-  0xfe 0x8c 0x40 0xf8
-  0x05 0x1c 0x40 0x3c
-  0x06 0x2c 0x40 0x7c
-  0x07 0x4c 0x40 0xbc
-  0x08 0x8c 0x40 0xfc
-  0x09 0x0c 0xc1 0x3c
-
-# CHECK: ldr	fp, [x7, #8]!
-# CHECK: ldr	lr, [x7, #8]!
-# CHECK: ldr	b5, [x0, #1]!
-# CHECK: ldr	h6, [x0, #2]!
-# CHECK: ldr	s7, [x0, #4]!
-# CHECK: ldr	d8, [x0, #8]!
-# CHECK: ldr	q9, [x0, #16]!
-
-  0xfe 0x8c 0x1f 0xf8
-  0xfd 0x8c 0x1f 0xf8
-  0x05 0xfc 0x1f 0x3c
-  0x06 0xec 0x1f 0x7c
-  0x07 0xcc 0x1f 0xbc
-  0x08 0x8c 0x1f 0xfc
-  0x09 0x0c 0x9f 0x3c
-
-# CHECK: str	lr, [x7, #-8]!
-# CHECK: str	fp, [x7, #-8]!
-# CHECK: str	b5, [x0, #-1]!
-# CHECK: str	h6, [x0, #-2]!
-# CHECK: str	s7, [x0, #-4]!
-# CHECK: str	d8, [x0, #-8]!
-# CHECK: str	q9, [x0, #-16]!
-
-#-----------------------------------------------------------------------------
-# post-indexed loads and stores
-#-----------------------------------------------------------------------------
-
-  0xfe 0x84 0x1f 0xf8
-  0xfd 0x84 0x1f 0xf8
-  0x05 0xf4 0x1f 0x3c
-  0x06 0xe4 0x1f 0x7c
-  0x07 0xc4 0x1f 0xbc
-  0x08 0x84 0x1f 0xfc
-  0x09 0x04 0x9f 0x3c
-
-# CHECK: str	lr, [x7], #-8
-# CHECK: str	fp, [x7], #-8
-# CHECK: str	b5, [x0], #-1
-# CHECK: str	h6, [x0], #-2
-# CHECK: str	s7, [x0], #-4
-# CHECK: str	d8, [x0], #-8
-# CHECK: str	q9, [x0], #-16
-
-  0xfd 0x84 0x40 0xf8
-  0xfe 0x84 0x40 0xf8
-  0x05 0x14 0x40 0x3c
-  0x06 0x24 0x40 0x7c
-  0x07 0x44 0x40 0xbc
-  0x08 0x84 0x40 0xfc
-  0x09 0x04 0xc1 0x3c
-
-# CHECK: ldr	fp, [x7], #8
-# CHECK: ldr	lr, [x7], #8
-# CHECK: ldr	b5, [x0], #1
-# CHECK: ldr	h6, [x0], #2
-# CHECK: ldr	s7, [x0], #4
-# CHECK: ldr	d8, [x0], #8
-# CHECK: ldr	q9, [x0], #16
-
-#-----------------------------------------------------------------------------
-# Load/Store pair (indexed  offset)
-#-----------------------------------------------------------------------------
-
-  0xe3 0x09 0x42 0x29
-  0xe4 0x27 0x7f 0xa9
-  0xc2 0x0d 0x42 0x69
-  0xe2 0x0f 0x7e 0x69
-  0x4a 0x04 0x48 0x2d
-  0x4a 0x04 0x40 0x6d
-
-# CHECK: ldp	w3, w2, [x15, #16]
-# CHECK: ldp	x4, x9, [sp, #-16]
-# CHECK: ldpsw	x2, x3, [x14, #16]
-# CHECK: ldpsw	x2, x3, [sp, #-16]
-# CHECK: ldp	s10, s1, [x2, #64]
-# CHECK: ldp	d10, d1, [x2]
-
-  0xe3 0x09 0x02 0x29
-  0xe4 0x27 0x3f 0xa9
-  0x4a 0x04 0x08 0x2d
-  0x4a 0x04 0x00 0x6d
-
-# CHECK: stp	w3, w2, [x15, #16]
-# CHECK: stp	x4, x9, [sp, #-16]
-# CHECK: stp	s10, s1, [x2, #64]
-# CHECK: stp	d10, d1, [x2]
-
-#-----------------------------------------------------------------------------
-# Load/Store pair (pre-indexed)
-#-----------------------------------------------------------------------------
-
-  0xe3 0x09 0xc2 0x29
-  0xe4 0x27 0xff 0xa9
-  0xc2 0x0d 0xc2 0x69
-  0xe2 0x0f 0xfe 0x69
-  0x4a 0x04 0xc8 0x2d
-  0x4a 0x04 0xc1 0x6d
-
-# CHECK: ldp	w3, w2, [x15, #16]!
-# CHECK: ldp	x4, x9, [sp, #-16]!
-# CHECK: ldpsw	x2, x3, [x14, #16]!
-# CHECK: ldpsw	x2, x3, [sp, #-16]!
-# CHECK: ldp	s10, s1, [x2, #64]!
-# CHECK: ldp	d10, d1, [x2, #16]!
-
-  0xe3 0x09 0x82 0x29
-  0xe4 0x27 0xbf 0xa9
-  0x4a 0x04 0x88 0x2d
-  0x4a 0x04 0x81 0x6d
-
-# CHECK: stp	w3, w2, [x15, #16]!
-# CHECK: stp	x4, x9, [sp, #-16]!
-# CHECK: stp	s10, s1, [x2, #64]!
-# CHECK: stp	d10, d1, [x2, #16]!
-
-#-----------------------------------------------------------------------------
-# Load/Store pair (post-indexed)
-#-----------------------------------------------------------------------------
-
-  0xe3 0x09 0xc2 0x28
-  0xe4 0x27 0xff 0xa8
-  0xc2 0x0d 0xc2 0x68
-  0xe2 0x0f 0xfe 0x68
-  0x4a 0x04 0xc8 0x2c
-  0x4a 0x04 0xc1 0x6c
-
-# CHECK: ldp	w3, w2, [x15], #16
-# CHECK: ldp	x4, x9, [sp], #-16
-# CHECK: ldpsw	x2, x3, [x14], #16
-# CHECK: ldpsw	x2, x3, [sp], #-16
-# CHECK: ldp	s10, s1, [x2], #64
-# CHECK: ldp	d10, d1, [x2], #16
-
-  0xe3 0x09 0x82 0x28
-  0xe4 0x27 0xbf 0xa8
-  0x4a 0x04 0x88 0x2c
-  0x4a 0x04 0x81 0x6c
-
-# CHECK: stp	w3, w2, [x15], #16
-# CHECK: stp	x4, x9, [sp], #-16
-# CHECK: stp	s10, s1, [x2], #64
-# CHECK: stp	d10, d1, [x2], #16
-
-#-----------------------------------------------------------------------------
-# Load/Store pair (no-allocate)
-#-----------------------------------------------------------------------------
-
-  0xe3 0x09 0x42 0x28
-  0xe4 0x27 0x7f 0xa8
-  0x4a 0x04 0x48 0x2c
-  0x4a 0x04 0x40 0x6c
-
-# CHECK: ldnp	w3, w2, [x15, #16]
-# CHECK: ldnp	x4, x9, [sp, #-16]
-# CHECK: ldnp	s10, s1, [x2, #64]
-# CHECK: ldnp	d10, d1, [x2]
-
-  0xe3 0x09 0x02 0x28
-  0xe4 0x27 0x3f 0xa8
-  0x4a 0x04 0x08 0x2c
-  0x4a 0x04 0x00 0x6c
-
-# CHECK: stnp	w3, w2, [x15, #16]
-# CHECK: stnp	x4, x9, [sp, #-16]
-# CHECK: stnp	s10, s1, [x2, #64]
-# CHECK: stnp	d10, d1, [x2]
-
-#-----------------------------------------------------------------------------
-# Load/Store register offset
-#-----------------------------------------------------------------------------
-
-  0x00 0x68 0x60 0xb8
-  0x00 0x78 0x60 0xb8
-  0x00 0x68 0x60 0xf8
-  0x00 0x78 0x60 0xf8
-  0x00 0xe8 0x60 0xf8
-
-# CHECK: ldr	w0, [x0, x0]
-# CHECK: ldr	w0, [x0, x0, lsl #2]
-# CHECK: ldr	x0, [x0, x0]
-# CHECK: ldr	x0, [x0, x0, lsl #3]
-# CHECK: ldr	x0, [x0, x0, sxtx]
-
-  0x21 0x68 0x62 0x3c
-  0x21 0x78 0x62 0x3c
-  0x21 0x68 0x62 0x7c
-  0x21 0x78 0x62 0x7c
-  0x21 0x68 0x62 0xbc
-  0x21 0x78 0x62 0xbc
-  0x21 0x68 0x62 0xfc
-  0x21 0x78 0x62 0xfc
-  0x21 0x68 0xe2 0x3c
-  0x21 0x78 0xe2 0x3c
-
-# CHECK: ldr	b1, [x1, x2]
-# CHECK: ldr	b1, [x1, x2, lsl #0]
-# CHECK: ldr	h1, [x1, x2]
-# CHECK: ldr	h1, [x1, x2, lsl #1]
-# CHECK: ldr	s1, [x1, x2]
-# CHECK: ldr	s1, [x1, x2, lsl #2]
-# CHECK: ldr	d1, [x1, x2]
-# CHECK: ldr	d1, [x1, x2, lsl #3]
-# CHECK: ldr	q1, [x1, x2]
-# CHECK: ldr	q1, [x1, x2, lsl #4]
-
-  0xe1 0x6b 0x23 0xfc
-  0xe1 0x5b 0x23 0xfc
-  0xe1 0x6b 0xa3 0x3c
-  0xe1 0x5b 0xa3 0x3c
-
-# CHECK: str	d1, [sp, x3]
-# CHECK: str	d1, [sp, x3, uxtw #3]
-# CHECK: str	q1, [sp, x3]
-# CHECK: str	q1, [sp, x3, uxtw #4]
-
-#-----------------------------------------------------------------------------
-# Load/Store exclusive
-#-----------------------------------------------------------------------------
-
-  0x26 0x7c 0x5f 0x08
-  0x26 0x7c 0x5f 0x48
-  0x27 0x0d 0x7f 0x88
-  0x27 0x0d 0x7f 0xc8
-
-# CHECK: ldxrb	w6, [x1]
-# CHECK: ldxrh	w6, [x1]
-# CHECK: ldxp	w7, w3, [x9]
-# CHECK: ldxp	x7, x3, [x9]
-
-  0x64 0x7c 0x01 0xc8
-  0x64 0x7c 0x01 0x88
-  0x64 0x7c 0x01 0x08
-  0x64 0x7c 0x01 0x48
-  0x22 0x18 0x21 0xc8
-  0x22 0x18 0x21 0x88
-
-# CHECK: stxr	w1, x4, [x3]
-# CHECK: stxr	w1, w4, [x3]
-# CHECK: stxrb	w1, w4, [x3]
-# CHECK: stxrh	w1, w4, [x3]
-# CHECK: stxp	w1, x2, x6, [x1]
-# CHECK: stxp	w1, w2, w6, [x1]
-
-#-----------------------------------------------------------------------------
-# Load-acquire/Store-release non-exclusive
-#-----------------------------------------------------------------------------
-
-  0xe4 0xff 0xdf 0x88
-  0xe4 0xff 0xdf 0xc8
-  0xe4 0xff 0xdf 0x08
-  0xe4 0xff 0xdf 0x48
-
-# CHECK: ldar	w4, [sp]
-# CHECK: ldar	x4, [sp]
-# CHECK: ldarb	w4, [sp]
-# CHECK: ldarh	w4, [sp]
-
-  0xc3 0xfc 0x9f 0x88
-  0xc3 0xfc 0x9f 0xc8
-  0xc3 0xfc 0x9f 0x08
-  0xc3 0xfc 0x9f 0x48
-
-# CHECK: stlr	w3, [x6]
-# CHECK: stlr	x3, [x6]
-# CHECK: stlrb	w3, [x6]
-# CHECK: stlrh	w3, [x6]
-
-#-----------------------------------------------------------------------------
-# Load-acquire/Store-release exclusive
-#-----------------------------------------------------------------------------
-
-  0x82 0xfc 0x5f 0x88
-  0x82 0xfc 0x5f 0xc8
-  0x82 0xfc 0x5f 0x08
-  0x82 0xfc 0x5f 0x48
-  0x22 0x98 0x7f 0x88
-  0x22 0x98 0x7f 0xc8
-
-# CHECK: ldaxr	w2, [x4]
-# CHECK: ldaxr	x2, [x4]
-# CHECK: ldaxrb	w2, [x4]
-# CHECK: ldaxrh	w2, [x4]
-# CHECK: ldaxp	w2, w6, [x1]
-# CHECK: ldaxp	x2, x6, [x1]
-
-  0x27 0xfc 0x08 0xc8
-  0x27 0xfc 0x08 0x88
-  0x27 0xfc 0x08 0x08
-  0x27 0xfc 0x08 0x48
-  0x22 0x98 0x21 0xc8
-  0x22 0x98 0x21 0x88
-
-# CHECK: stlxr	w8, x7, [x1]
-# CHECK: stlxr	w8, w7, [x1]
-# CHECK: stlxrb	w8, w7, [x1]
-# CHECK: stlxrh	w8, w7, [x1]
-# CHECK: stlxp	w1, x2, x6, [x1]
-# CHECK: stlxp	w1, w2, w6, [x1]
-
-#-----------------------------------------------------------------------------
-# Load/Store with explicit LSL values
-#-----------------------------------------------------------------------------
-  0x20 0x78 0xa0 0xb8
-  0x20 0x78 0x60 0xf8
-  0x20 0x78 0x20 0xf8
-  0x20 0x78 0x60 0xb8
-  0x20 0x78 0x20 0xb8
-  0x20 0x78 0xe0 0x3c
-  0x20 0x78 0xa0 0x3c
-  0x20 0x78 0x60 0xfc
-  0x20 0x78 0x20 0xfc
-  0x20 0x78 0x60 0xbc
-  0x20 0x78 0x20 0xbc
-  0x20 0x78 0x60 0x7c
-  0x20 0x78 0x60 0x3c
-  0x20 0x78 0x60 0x38
-  0x20 0x78 0x20 0x38
-  0x20 0x78 0xe0 0x38
-  0x20 0x78 0x60 0x78
-  0x20 0x78 0x20 0x78
-  0x20 0x78 0xe0 0x78
-  0x20 0x78 0xa0 0x38
-  0x20 0x78 0xa0 0x78
-
-# CHECK: ldrsw	x0, [x1, x0, lsl #2]
-# CHECK: ldr	x0, [x1, x0, lsl #3]
-# CHECK: str	x0, [x1, x0, lsl #3]
-# CHECK: ldr	w0, [x1, x0, lsl #2]
-# CHECK: str	w0, [x1, x0, lsl #2]
-# CHECK: ldr	q0, [x1, x0, lsl #4]
-# CHECK: str	q0, [x1, x0, lsl #4]
-# CHECK: ldr	d0, [x1, x0, lsl #3]
-# CHECK: str	d0, [x1, x0, lsl #3]
-# CHECK: ldr	s0, [x1, x0, lsl #2]
-# CHECK: str	s0, [x1, x0, lsl #2]
-# CHECK: ldr	h0, [x1, x0, lsl #1]
-# CHECK: ldr	b0, [x1, x0, lsl #0]
-# CHECK: ldrb	w0, [x1, x0, lsl #0]
-# CHECK: strb	w0, [x1, x0, lsl #0]
-# CHECK: ldrsb	w0, [x1, x0, lsl #0]
-# CHECK: ldrh	w0, [x1, x0, lsl #1]
-# CHECK: strh	w0, [x1, x0, lsl #1]
-# CHECK: ldrsh	w0, [x1, x0, lsl #1]
-# CHECK: ldrsb	x0, [x1, x0, lsl #0]
-# CHECK: ldrsh	x0, [x1, x0, lsl #1]
diff --git a/test/MC/Disassembler/ARM64/scalar-fp.txt b/test/MC/Disassembler/ARM64/scalar-fp.txt
deleted file mode 100644
index b242df5..0000000
--- a/test/MC/Disassembler/ARM64/scalar-fp.txt
+++ /dev/null
@@ -1,255 +0,0 @@
-# RUN: llvm-mc -triple arm64-apple-darwin --disassemble < %s | FileCheck %s
-
-#-----------------------------------------------------------------------------
-# Floating-point arithmetic
-#-----------------------------------------------------------------------------
-
-0x41 0xc0 0x20 0x1e
-0x41 0xc0 0x60 0x1e
-
-# CHECK: fabs s1, s2
-# CHECK: fabs d1, d2
-
-0x41 0x28 0x23 0x1e
-0x41 0x28 0x63 0x1e
-
-# CHECK: fadd s1, s2, s3
-# CHECK: fadd d1, d2, d3
-
-0x41 0x18 0x23 0x1e
-0x41 0x18 0x63 0x1e
-
-# CHECK: fdiv s1, s2, s3
-# CHECK: fdiv d1, d2, d3
-
-0x41 0x10 0x03 0x1f
-0x41 0x10 0x43 0x1f
-
-# CHECK: fmadd s1, s2, s3, s4
-# CHECK: fmadd d1, d2, d3, d4
-
-0x41 0x48 0x23 0x1e
-0x41 0x48 0x63 0x1e
-0x41 0x68 0x23 0x1e
-0x41 0x68 0x63 0x1e
-
-# CHECK: fmax   s1, s2, s3
-# CHECK: fmax   d1, d2, d3
-# CHECK: fmaxnm s1, s2, s3
-# CHECK: fmaxnm d1, d2, d3
-
-0x41 0x58 0x23 0x1e
-0x41 0x58 0x63 0x1e
-0x41 0x78 0x23 0x1e
-0x41 0x78 0x63 0x1e
-
-# CHECK: fmin   s1, s2, s3
-# CHECK: fmin   d1, d2, d3
-# CHECK: fminnm s1, s2, s3
-# CHECK: fminnm d1, d2, d3
-
-0x41 0x90 0x03 0x1f
-0x41 0x90 0x43 0x1f
-
-# CHECK: fmsub s1, s2, s3, s4
-# CHECK: fmsub d1, d2, d3, d4
-
-0x41 0x08 0x23 0x1e
-0x41 0x08 0x63 0x1e
-
-# CHECK: fmul s1, s2, s3
-# CHECK: fmul d1, d2, d3
-
-0x41 0x40 0x21 0x1e
-0x41 0x40 0x61 0x1e
-
-# CHECK: fneg s1, s2
-# CHECK: fneg d1, d2
-
-0x41 0x10 0x23 0x1f
-0x41 0x10 0x63 0x1f
-
-# CHECK: fnmadd s1, s2, s3, s4
-# CHECK: fnmadd d1, d2, d3, d4
-
-0x41 0x90 0x23 0x1f
-0x41 0x90 0x63 0x1f
-
-# CHECK: fnmsub s1, s2, s3, s4
-# CHECK: fnmsub d1, d2, d3, d4
-
-0x41 0x88 0x23 0x1e
-0x41 0x88 0x63 0x1e
-
-# CHECK: fnmul s1, s2, s3
-# CHECK: fnmul d1, d2, d3
-
-0x41 0xc0 0x21 0x1e
-0x41 0xc0 0x61 0x1e
-
-# CHECK: fsqrt s1, s2
-# CHECK: fsqrt d1, d2
-
-0x41 0x38 0x23 0x1e
-0x41 0x38 0x63 0x1e
-
-# CHECK: fsub s1, s2, s3
-# CHECK: fsub d1, d2, d3
-
-#-----------------------------------------------------------------------------
-# Floating-point comparison
-#-----------------------------------------------------------------------------
-
-0x20 0x04 0x22 0x1e
-0x20 0x04 0x62 0x1e
-0x30 0x04 0x22 0x1e
-0x30 0x04 0x62 0x1e
-
-# CHECK: fccmp  s1, s2, #0, eq
-# CHECK: fccmp  d1, d2, #0, eq
-# CHECK: fccmpe s1, s2, #0, eq
-# CHECK: fccmpe d1, d2, #0, eq
-
-0x20 0x20 0x22 0x1e
-0x20 0x20 0x62 0x1e
-0x28 0x20 0x20 0x1e
-0x28 0x20 0x60 0x1e
-0x30 0x20 0x22 0x1e
-0x30 0x20 0x62 0x1e
-0x38 0x20 0x20 0x1e
-0x38 0x20 0x60 0x1e
-
-# CHECK: fcmp  s1, s2
-# CHECK: fcmp  d1, d2
-# CHECK: fcmp  s1, #0.0
-# CHECK: fcmp  d1, #0.0
-# CHECK: fcmpe s1, s2
-# CHECK: fcmpe d1, d2
-# CHECK: fcmpe s1, #0.0
-# CHECK: fcmpe d1, #0.0
-
-#-----------------------------------------------------------------------------
-# Floating-point conditional select
-#-----------------------------------------------------------------------------
-
-0x41 0x0c 0x23 0x1e
-0x41 0x0c 0x63 0x1e
-
-# CHECK: fcsel s1, s2, s3, eq
-# CHECK: fcsel d1, d2, d3, eq
-
-#-----------------------------------------------------------------------------
-# Floating-point convert
-#-----------------------------------------------------------------------------
-
-0x41 0xc0 0x63 0x1e
-0x41 0x40 0x62 0x1e
-0x41 0xc0 0xe2 0x1e
-0x41 0x40 0xe2 0x1e
-0x41 0xc0 0x22 0x1e
-0x41 0xc0 0x23 0x1e
-
-# CHECK: fcvt h1, d2
-# CHECK: fcvt s1, d2
-# CHECK: fcvt d1, h2
-# CHECK: fcvt s1, h2
-# CHECK: fcvt d1, s2
-# CHECK: fcvt h1, s2
-
-0x41 0x00 0x44 0x1e
-0x41 0x04 0x44 0x1e
-0x41 0x00 0x44 0x9e
-0x41 0x04 0x44 0x9e
-0x41 0x00 0x04 0x1e
-0x41 0x04 0x04 0x1e
-0x41 0x00 0x04 0x9e
-0x41 0x04 0x04 0x9e
-
-#-----------------------------------------------------------------------------
-# Floating-point move
-#-----------------------------------------------------------------------------
-
-0x41 0x00 0x27 0x1e
-0x41 0x00 0x26 0x1e
-0x41 0x00 0x67 0x9e
-0x41 0x00 0x66 0x9e
-
-# CHECK: fmov s1, w2
-# CHECK: fmov w1, s2
-# CHECK: fmov d1, x2
-# CHECK: fmov x1, d2
-
-0x01 0x10 0x28 0x1e
-0x01 0x10 0x68 0x1e
-0x01 0xf0 0x7b 0x1e
-0x01 0xf0 0x6b 0x1e
-
-# CHECK: fmov s1, #1.250000e-01
-# CHECK: fmov d1, #1.250000e-01
-# CHECK: fmov d1, #-4.843750e-01
-# CHECK: fmov d1, #4.843750e-01
-
-0x41 0x40 0x20 0x1e
-0x41 0x40 0x60 0x1e
-
-# CHECK: fmov s1, s2
-# CHECK: fmov d1, d2
-
-#-----------------------------------------------------------------------------
-# Floating-point round to integral
-#-----------------------------------------------------------------------------
-
-0x41 0x40 0x26 0x1e
-0x41 0x40 0x66 0x1e
-
-# CHECK: frinta s1, s2
-# CHECK: frinta d1, d2
-
-0x41 0xc0 0x27 0x1e
-0x41 0xc0 0x67 0x1e
-
-# CHECK: frinti s1, s2
-# CHECK: frinti d1, d2
-
-0x41 0x40 0x25 0x1e
-0x41 0x40 0x65 0x1e
-
-# CHECK: frintm s1, s2
-# CHECK: frintm d1, d2
-
-0x41 0x40 0x24 0x1e
-0x41 0x40 0x64 0x1e
-
-# CHECK: frintn s1, s2
-# CHECK: frintn d1, d2
-
-0x41 0xc0 0x24 0x1e
-0x41 0xc0 0x64 0x1e
-
-# CHECK: frintp s1, s2
-# CHECK: frintp d1, d2
-
-0x41 0x40 0x27 0x1e
-0x41 0x40 0x67 0x1e
-
-# CHECK: frintx s1, s2
-# CHECK: frintx d1, d2
-
-0x41 0xc0 0x25 0x1e
-0x41 0xc0 0x65 0x1e
-
-# CHECK: frintz s1, s2
-# CHECK: frintz d1, d2
-
-  0x00 0x3c 0xe0 0x7e
-  0x00 0x8c 0xe0 0x5e
-
-# CHECK: cmhs d0, d0, d0
-# CHECK: cmtst d0, d0, d0
-
-0x00 0x00 0xaf 0x9e
-0x00 0x00 0xae 0x9e
-
-# CHECK: fmov.d v0[1], x0
-# CHECK: fmov.d x0, v0[1]
-
diff --git a/test/MC/Disassembler/ARM64/system.txt b/test/MC/Disassembler/ARM64/system.txt
deleted file mode 100644
index cefa635..0000000
--- a/test/MC/Disassembler/ARM64/system.txt
+++ /dev/null
@@ -1,58 +0,0 @@
-# RUN: llvm-mc -triple arm64-apple-darwin --disassemble < %s | FileCheck %s
-
-
-#-----------------------------------------------------------------------------
-# Hint encodings
-#-----------------------------------------------------------------------------
-
-  0x1f 0x20 0x03 0xd5
-# CHECK: nop
-  0x9f 0x20 0x03 0xd5
-# CHECK: sev
-  0xbf 0x20 0x03 0xd5
-# CHECK: sevl
-  0x5f 0x20 0x03 0xd5
-# CHECK: wfe
-  0x7f 0x20 0x03 0xd5
-# CHECK: wfi
-  0x3f 0x20 0x03 0xd5
-# CHECK: yield
-
-#-----------------------------------------------------------------------------
-# Single-immediate operand instructions
-#-----------------------------------------------------------------------------
-
-  0x5f 0x3a 0x03 0xd5
-# CHECK: clrex #10
-  0xdf 0x3f 0x03 0xd5
-# CHECK: isb{{$}}
-  0xbf 0x33 0x03 0xd5
-# CHECK: dmb osh
-  0x9f 0x37 0x03 0xd5
-# CHECK: dsb nsh
-
-#-----------------------------------------------------------------------------
-# Generic system instructions
-#-----------------------------------------------------------------------------
-  0xff 0x05 0x0a 0xd5
-  0xe7 0x6a 0x0f 0xd5
-  0xf4 0x3f 0x2e 0xd5
-  0xbf 0x40 0x00 0xd5
-  0x00 0x00 0x10 0xd5
-  0x00 0x00 0x30 0xd5
-
-# CHECK: sys #2, c0, c5, #7
-# CHECK: sys #7, c6, c10, #7, x7
-# CHECK: sysl  x20, #6, c3, c15, #7
-# CHECK: msr  SPSel, #0
-# CHECK: msr S2_0_C0_C0_0, x0
-# CHECK: mrs x0, S2_0_C0_C0_0
-
-  0x40 0xc0 0x1e 0xd5
-  0x40 0xc0 0x1a 0xd5
-  0x40 0xc0 0x19 0xd5
-
-# CHECK: msr RMR_EL3, x0
-# CHECK: msr RMR_EL2, x0
-# CHECK: msr RMR_EL1, x0
-
diff --git a/test/MC/Disassembler/Mips/mips32r6.txt b/test/MC/Disassembler/Mips/mips32r6.txt
new file mode 100644
index 0000000..adbcd99
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips32r6.txt
@@ -0,0 +1,116 @@
+# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips32r6 | FileCheck %s
+
+0xec 0x80 0x00 0x19 # CHECK: addiupc $4, 100
+0x7c 0x43 0x22 0xa0 # CHECK: align $4, $2, $3, 2
+0xec 0x7f 0x00 0x38 # CHECK: aluipc $3, 56
+0x3c 0x62 0xff 0xe9 # CHECK: aui $3, $2, -23
+0xec 0x7e 0xff 0xff # CHECK: auipc $3, -1
+0xe8 0x37 0x96 0xb8 # CHECK: balc 14572256
+0xc8 0x37 0x96 0xb8 # CHECK: bc 14572256
+
+# FIXME: Don't check the immediate on these for the moment, the encode/decode
+#        functions are not inverses of eachother.
+#        The immediate should be 4 but the disassembler currently emits 8
+0x45 0x20 0x00 0x01 # CHECK: bc1eqz $f0,
+0x45 0x3f 0x00 0x01 # CHECK: bc1eqz $f31,
+0x45 0xa0 0x00 0x01 # CHECK: bc1nez $f0,
+0x45 0xbf 0x00 0x01 # CHECK: bc1nez $f31,
+# FIXME: Don't check the immediate on these for the moment, the encode/decode
+#        functions are not inverses of eachother.
+#        The immediate should be 8 but the disassembler currently emits 12
+0x49 0x20 0x00 0x02 # CHECK: bc2eqz $0,
+0x49 0x3f 0x00 0x02 # CHECK: bc2eqz $31,
+0x49 0xa0 0x00 0x02 # CHECK: bc2nez $0,
+0x49 0xbf 0x00 0x02 # CHECK: bc2nez $31,
+
+0x20 0xa6 0x00 0x40 # CHECK: beqc $5, $6, 256
+# FIXME: Don't check the immediate on the bcczal's for the moment, the
+#        encode/decode functions are not inverses of eachother.
+0x20 0x02 0x01 0x4d # CHECK: beqzalc $2,
+0x60 0xa6 0x00 0x40 # CHECK: bnec $5, $6, 256
+0x60 0x02 0x01 0x4d # CHECK: bnezalc $2,
+0xd8 0xa0 0x46 0x90 # CHECK: beqzc $5, 72256
+0x18 0x42 0x01 0x4d # CHECK: bgezalc $2,
+0xf8 0xa0 0x46 0x90 # CHECK: bnezc $5, 72256
+0x5c 0xa5 0x00 0x40 # CHECK: bltzc $5, 256
+0x58 0xa5 0x00 0x40 # CHECK: bgezc $5, 256
+0x1c 0x02 0x01 0x4d # CHECK: bgtzalc $2,
+0x58 0x05 0x00 0x40 # CHECK: blezc $5, 256
+0x1c 0x42 0x01 0x4d # CHECK: bltzalc $2,
+0x5c 0x05 0x00 0x40 # CHECK: bgtzc $5, 256
+0x7c 0x02 0x20 0x20 # CHECK: bitswap $4, $2
+0x18 0x02 0x01 0x4d # CHECK: blezalc $2,
+0x60 0x00 0x00 0x01 # CHECK: bnvc $zero, $zero, 4
+0x60 0x40 0x00 0x01 # CHECK: bnvc $2, $zero, 4
+0x60 0x82 0x00 0x01 # CHECK: bnvc $4, $2, 4
+0x20 0x00 0x00 0x01 # CHECK: bovc $zero, $zero, 4
+0x20 0x40 0x00 0x01 # CHECK: bovc $2, $zero, 4
+0x20 0x82 0x00 0x01 # CHECK: bovc $4, $2, 4
+0x46 0x84 0x18 0x80 # CHECK: cmp.f.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x80 # CHECK: cmp.f.d $f2, $f3, $f4
+0x46 0x84 0x18 0x81 # CHECK: cmp.un.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x81 # CHECK: cmp.un.d $f2, $f3, $f4
+0x46 0x84 0x18 0x82 # CHECK: cmp.eq.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x82 # CHECK: cmp.eq.d $f2, $f3, $f4
+0x46 0x84 0x18 0x83 # CHECK: cmp.ueq.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x83 # CHECK: cmp.ueq.d $f2, $f3, $f4
+0x46 0x84 0x18 0x84 # CHECK: cmp.olt.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x84 # CHECK: cmp.olt.d $f2, $f3, $f4
+0x46 0x84 0x18 0x85 # CHECK: cmp.ult.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x85 # CHECK: cmp.ult.d $f2, $f3, $f4
+0x46 0x84 0x18 0x86 # CHECK: cmp.ole.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x86 # CHECK: cmp.ole.d $f2, $f3, $f4
+0x46 0x84 0x18 0x87 # CHECK: cmp.ule.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x87 # CHECK: cmp.ule.d $f2, $f3, $f4
+0x46 0x84 0x18 0x88 # CHECK: cmp.sf.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x88 # CHECK: cmp.sf.d $f2, $f3, $f4
+0x46 0x84 0x18 0x89 # CHECK: cmp.ngle.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x89 # CHECK: cmp.ngle.d $f2, $f3, $f4
+0x46 0x84 0x18 0x8a # CHECK: cmp.seq.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x8a # CHECK: cmp.seq.d $f2, $f3, $f4
+0x46 0x84 0x18 0x8b # CHECK: cmp.ngl.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x8b # CHECK: cmp.ngl.d $f2, $f3, $f4
+0x46 0x84 0x18 0x8c # CHECK: cmp.lt.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x8c # CHECK: cmp.lt.d $f2, $f3, $f4
+0x46 0x84 0x18 0x8d # CHECK: cmp.nge.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x8d # CHECK: cmp.nge.d $f2, $f3, $f4
+0x46 0x84 0x18 0x8e # CHECK: cmp.le.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x8e # CHECK: cmp.le.d $f2, $f3, $f4
+0x46 0x84 0x18 0x8f # CHECK: cmp.ngt.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x8f # CHECK: cmp.ngt.d $f2, $f3, $f4
+0x00 0x64 0x10 0x9a # CHECK: div $2, $3, $4
+0x00 0x64 0x10 0x9b # CHECK: divu $2, $3, $4
+# 0xf8 0x05 0x01 0x00 # CHECK-TODO: jialc $5, 256
+# 0xd8 0x05 0x01 0x00 # CHECK-TODO: jic $5, 256
+0xec 0x48 0x00 0x43 # CHECK: lwpc $2, 268
+0xec 0x50 0x00 0x43 # CHECK: lwupc $2, 268
+0x00 0x64 0x10 0xda # CHECK: mod $2, $3, $4
+0x00 0x64 0x10 0xdb # CHECK: modu $2, $3, $4
+0x00 0x64 0x10 0x98 # CHECK: mul $2, $3, $4
+0x00 0x64 0x10 0xd8 # CHECK: muh $2, $3, $4
+0x00 0x64 0x10 0x99 # CHECK: mulu $2, $3, $4
+0x00 0x64 0x10 0xd9 # CHECK: muhu $2, $3, $4
+0x46 0x04 0x18 0x98 # CHECK: maddf.s $f2, $f3, $f4
+0x46 0x24 0x18 0x98 # CHECK: maddf.d $f2, $f3, $f4
+0x46 0x04 0x18 0x99 # CHECK: msubf.s $f2, $f3, $f4
+0x46 0x24 0x18 0x99 # CHECK: msubf.d $f2, $f3, $f4
+0x46 0x22 0x08 0x10 # CHECK: sel.d $f0, $f1, $f2
+0x46 0x02 0x08 0x10 # CHECK: sel.s $f0, $f1, $f2
+0x00 0x64 0x10 0x35 # CHECK: seleqz $2, $3, $4
+0x00 0x64 0x10 0x37 # CHECK: selnez $2, $3, $4
+0x46 0x04 0x10 0x1d # CHECK: max.s $f0, $f2, $f4
+0x46 0x24 0x10 0x1d # CHECK: max.d $f0, $f2, $f4
+0x46 0x04 0x10 0x1c # CHECK: min.s $f0, $f2, $f4
+0x46 0x24 0x10 0x1c # CHECK: min.d $f0, $f2, $f4
+0x46 0x04 0x10 0x1f # CHECK: maxa.s $f0, $f2, $f4
+0x46 0x24 0x10 0x1f # CHECK: maxa.d $f0, $f2, $f4
+0x46 0x04 0x10 0x1e # CHECK: mina.s $f0, $f2, $f4
+0x46 0x24 0x10 0x1e # CHECK: mina.d $f0, $f2, $f4
+0x46 0x04 0x10 0x14 # CHECK: seleqz.s $f0, $f2, $f4
+0x46 0x24 0x10 0x14 # CHECK: seleqz.d $f0, $f2, $f4
+0x46 0x04 0x10 0x17 # CHECK: selnez.s $f0, $f2, $f4
+0x46 0x24 0x10 0x17 # CHECK: selnez.d $f0, $f2, $f4
+0x46 0x00 0x20 0x9a # CHECK: rint.s $f2, $f4
+0x46 0x20 0x20 0x9a # CHECK: rint.d $f2, $f4
+0x46 0x00 0x20 0x9b # CHECK: class.s $f2, $f4
+0x46 0x20 0x20 0x9b # CHECK: class.d $f2, $f4
diff --git a/test/MC/Disassembler/Mips/mips64r6.txt b/test/MC/Disassembler/Mips/mips64r6.txt
new file mode 100644
index 0000000..f5bb14e
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips64r6.txt
@@ -0,0 +1,129 @@
+# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips64r6 | FileCheck %s
+
+0xec 0x80 0x00 0x19 # CHECK: addiupc $4, 100
+0x7c 0x43 0x22 0xa0 # CHECK: align $4, $2, $3, 2
+0xec 0x7f 0x00 0x38 # CHECK: aluipc $3, 56
+0x3c 0x62 0xff 0xe9 # CHECK: aui $3, $2, -23
+0xec 0x7e 0xff 0xff # CHECK: auipc $3, -1
+0xe8 0x37 0x96 0xb8 # CHECK: balc 14572256
+0xc8 0x37 0x96 0xb8 # CHECK: bc 14572256
+
+# FIXME: Don't check the immediate on these for the moment, the encode/decode
+#        functions are not inverses of eachother.
+#        The immediate should be 4 but the disassembler currently emits 8
+0x45 0x20 0x00 0x01 # CHECK: bc1eqz $f0,
+0x45 0x3f 0x00 0x01 # CHECK: bc1eqz $f31,
+0x45 0xa0 0x00 0x01 # CHECK: bc1nez $f0,
+0x45 0xbf 0x00 0x01 # CHECK: bc1nez $f31,
+# FIXME: Don't check the immediate on these for the moment, the encode/decode
+#        functions are not inverses of eachother.
+#        The immediate should be 8 but the disassembler currently emits 12
+0x49 0x20 0x00 0x02 # CHECK: bc2eqz $0,
+0x49 0x3f 0x00 0x02 # CHECK: bc2eqz $31,
+0x49 0xa0 0x00 0x02 # CHECK: bc2nez $0,
+0x49 0xbf 0x00 0x02 # CHECK: bc2nez $31,
+
+0x20 0xa6 0x00 0x40 # CHECK: beqc $5, $6, 256
+# FIXME: Don't check the immediate on the bcczal's for the moment, the
+#        encode/decode functions are not inverses of eachother.
+0x20 0x02 0x01 0x4d # CHECK: beqzalc $2,
+0x60 0xa6 0x00 0x40 # CHECK: bnec $5, $6, 256
+0x60 0x02 0x01 0x4d # CHECK: bnezalc $2,
+0xd8 0xa0 0x46 0x90 # CHECK: beqzc $5, 72256
+0x18 0x42 0x01 0x4d # CHECK: bgezalc $2,
+0xf8 0xa0 0x46 0x90 # CHECK: bnezc $5, 72256
+0x5c 0xa5 0x00 0x40 # CHECK: bltzc $5, 256
+0x58 0xa5 0x00 0x40 # CHECK: bgezc $5, 256
+0x1c 0x02 0x01 0x4d # CHECK: bgtzalc $2,
+0x58 0x05 0x00 0x40 # CHECK: blezc $5, 256
+0x1c 0x42 0x01 0x4d # CHECK: bltzalc $2,
+0x5c 0x05 0x00 0x40 # CHECK: bgtzc $5, 256
+0x7c 0x02 0x20 0x20 # CHECK: bitswap $4, $2
+0x18 0x02 0x01 0x4d # CHECK: blezalc $2,
+0x60 0x00 0x00 0x01 # CHECK: bnvc $zero, $zero, 4
+0x60 0x40 0x00 0x01 # CHECK: bnvc $2, $zero, 4
+0x60 0x82 0x00 0x01 # CHECK: bnvc $4, $2, 4
+0x20 0x00 0x00 0x01 # CHECK: bovc $zero, $zero, 4
+0x20 0x40 0x00 0x01 # CHECK: bovc $2, $zero, 4
+0x20 0x82 0x00 0x01 # CHECK: bovc $4, $2, 4
+0x46 0x84 0x18 0x80 # CHECK: cmp.f.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x80 # CHECK: cmp.f.d $f2, $f3, $f4
+0x46 0x84 0x18 0x81 # CHECK: cmp.un.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x81 # CHECK: cmp.un.d $f2, $f3, $f4
+0x46 0x84 0x18 0x82 # CHECK: cmp.eq.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x82 # CHECK: cmp.eq.d $f2, $f3, $f4
+0x46 0x84 0x18 0x83 # CHECK: cmp.ueq.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x83 # CHECK: cmp.ueq.d $f2, $f3, $f4
+0x46 0x84 0x18 0x84 # CHECK: cmp.olt.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x84 # CHECK: cmp.olt.d $f2, $f3, $f4
+0x46 0x84 0x18 0x85 # CHECK: cmp.ult.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x85 # CHECK: cmp.ult.d $f2, $f3, $f4
+0x46 0x84 0x18 0x86 # CHECK: cmp.ole.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x86 # CHECK: cmp.ole.d $f2, $f3, $f4
+0x46 0x84 0x18 0x87 # CHECK: cmp.ule.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x87 # CHECK: cmp.ule.d $f2, $f3, $f4
+0x46 0x84 0x18 0x88 # CHECK: cmp.sf.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x88 # CHECK: cmp.sf.d $f2, $f3, $f4
+0x46 0x84 0x18 0x89 # CHECK: cmp.ngle.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x89 # CHECK: cmp.ngle.d $f2, $f3, $f4
+0x46 0x84 0x18 0x8a # CHECK: cmp.seq.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x8a # CHECK: cmp.seq.d $f2, $f3, $f4
+0x46 0x84 0x18 0x8b # CHECK: cmp.ngl.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x8b # CHECK: cmp.ngl.d $f2, $f3, $f4
+0x46 0x84 0x18 0x8c # CHECK: cmp.lt.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x8c # CHECK: cmp.lt.d $f2, $f3, $f4
+0x46 0x84 0x18 0x8d # CHECK: cmp.nge.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x8d # CHECK: cmp.nge.d $f2, $f3, $f4
+0x46 0x84 0x18 0x8e # CHECK: cmp.le.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x8e # CHECK: cmp.le.d $f2, $f3, $f4
+0x46 0x84 0x18 0x8f # CHECK: cmp.ngt.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x8f # CHECK: cmp.ngt.d $f2, $f3, $f4
+0x7c 0x43 0x23 0x64 # CHECK: dalign $4, $2, $3, 5
+0x74 0x62 0x12 0x34 # CHECK: daui $3, $2, 4660
+0x04 0x66 0x56 0x78 # CHECK: dahi $3, 22136
+0x04 0x7e 0xab 0xcd # CHECK: dati $3, -21555
+0x7c 0x02 0x20 0x24 # CHECK: dbitswap $4, $2
+0x00 0x64 0x10 0x9a # CHECK: div $2, $3, $4
+0x00 0x64 0x10 0x9b # CHECK: divu $2, $3, $4
+# 0xf8 0x05 0x01 0x00 # CHECK-TODO: jialc $5, 256
+# 0xd8 0x05 0x01 0x00 # CHECK-TODO: jic $5, 256
+0xec 0x48 0x00 0x43 # CHECK: lwpc $2, 268
+0xec 0x50 0x00 0x43 # CHECK: lwupc $2, 268
+0x00 0x64 0x10 0xda # CHECK: mod $2, $3, $4
+0x00 0x64 0x10 0xdb # CHECK: modu $2, $3, $4
+0x00 0x64 0x10 0x9e # CHECK: ddiv $2, $3, $4
+0x00 0x64 0x10 0x9f # CHECK: ddivu $2, $3, $4
+0x00 0x64 0x10 0xde # CHECK: dmod $2, $3, $4
+0x00 0x64 0x10 0xdf # CHECK: dmodu $2, $3, $4
+0x00 0x64 0x10 0x98 # CHECK: mul $2, $3, $4
+0x00 0x64 0x10 0xd8 # CHECK: muh $2, $3, $4
+0x00 0x64 0x10 0x99 # CHECK: mulu $2, $3, $4
+0x00 0x64 0x10 0xd9 # CHECK: muhu $2, $3, $4
+0x00 0x64 0x10 0xb8 # CHECK: dmul $2, $3, $4
+0x00 0x64 0x10 0xf8 # CHECK: dmuh $2, $3, $4
+0x00 0x64 0x10 0xb9 # CHECK: dmulu $2, $3, $4
+0x00 0x64 0x10 0xf9 # CHECK: dmuhu $2, $3, $4
+0x46 0x04 0x18 0x98 # CHECK: maddf.s $f2, $f3, $f4
+0x46 0x24 0x18 0x98 # CHECK: maddf.d $f2, $f3, $f4
+0x46 0x04 0x18 0x99 # CHECK: msubf.s $f2, $f3, $f4
+0x46 0x24 0x18 0x99 # CHECK: msubf.d $f2, $f3, $f4
+0x46 0x22 0x08 0x10 # CHECK: sel.d $f0, $f1, $f2
+0x46 0x02 0x08 0x10 # CHECK: sel.s $f0, $f1, $f2
+0x00 0x64 0x10 0x35 # CHECK: seleqz $2, $3, $4
+0x00 0x64 0x10 0x37 # CHECK: selnez $2, $3, $4
+0x46 0x04 0x10 0x1d # CHECK: max.s $f0, $f2, $f4
+0x46 0x24 0x10 0x1d # CHECK: max.d $f0, $f2, $f4
+0x46 0x04 0x10 0x1c # CHECK: min.s $f0, $f2, $f4
+0x46 0x24 0x10 0x1c # CHECK: min.d $f0, $f2, $f4
+0x46 0x04 0x10 0x1f # CHECK: maxa.s $f0, $f2, $f4
+0x46 0x24 0x10 0x1f # CHECK: maxa.d $f0, $f2, $f4
+0x46 0x04 0x10 0x1e # CHECK: mina.s $f0, $f2, $f4
+0x46 0x24 0x10 0x1e # CHECK: mina.d $f0, $f2, $f4
+0x46 0x04 0x10 0x14 # CHECK: seleqz.s $f0, $f2, $f4
+0x46 0x24 0x10 0x14 # CHECK: seleqz.d $f0, $f2, $f4
+0x46 0x04 0x10 0x17 # CHECK: selnez.s $f0, $f2, $f4
+0x46 0x24 0x10 0x17 # CHECK: selnez.d $f0, $f2, $f4
+0x46 0x00 0x20 0x9a # CHECK: rint.s $f2, $f4
+0x46 0x20 0x20 0x9a # CHECK: rint.d $f2, $f4
+0x46 0x00 0x20 0x9b # CHECK: class.s $f2, $f4
+0x46 0x20 0x20 0x9b # CHECK: class.d $f2, $f4
diff --git a/test/MC/Disassembler/Mips/msa/test_2r.txt b/test/MC/Disassembler/Mips/msa/test_2r.txt
new file mode 100644
index 0000000..7faa13c
--- /dev/null
+++ b/test/MC/Disassembler/Mips/msa/test_2r.txt
@@ -0,0 +1,17 @@
+# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips32r2 -mattr=+msa | FileCheck %s
+
+0x7b 0x00 0x4f 0x9e # CHECK:        fill.b  $w30, $9
+0x7b 0x01 0xbf 0xde # CHECK:        fill.h  $w31, $23
+0x7b 0x02 0xc4 0x1e # CHECK:        fill.w  $w16, $24
+0x7b 0x08 0x05 0x5e # CHECK:        nloc.b  $w21, $w0
+0x7b 0x09 0xfc 0x9e # CHECK:        nloc.h  $w18, $w31
+0x7b 0x0a 0xb8 0x9e # CHECK:        nloc.w  $w2, $w23
+0x7b 0x0b 0x51 0x1e # CHECK:        nloc.d  $w4, $w10
+0x7b 0x0c 0x17 0xde # CHECK:        nlzc.b  $w31, $w2
+0x7b 0x0d 0xb6 0xde # CHECK:        nlzc.h  $w27, $w22
+0x7b 0x0e 0xea 0x9e # CHECK:        nlzc.w  $w10, $w29
+0x7b 0x0f 0x4e 0x5e # CHECK:        nlzc.d  $w25, $w9
+0x7b 0x04 0x95 0x1e # CHECK:        pcnt.b  $w20, $w18
+0x7b 0x05 0x40 0x1e # CHECK:        pcnt.h  $w0, $w8
+0x7b 0x06 0x4d 0xde # CHECK:        pcnt.w  $w23, $w9
+0x7b 0x07 0xc5 0x5e # CHECK:        pcnt.d  $w21, $w24
diff --git a/test/MC/Disassembler/Mips/msa/test_2r_msa64.txt b/test/MC/Disassembler/Mips/msa/test_2r_msa64.txt
new file mode 100644
index 0000000..f212390
--- /dev/null
+++ b/test/MC/Disassembler/Mips/msa/test_2r_msa64.txt
@@ -0,0 +1,3 @@
+# RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux -mcpu=mips64r2 -mattr=+msa | FileCheck %s
+
+0x7b 0x03 0x4e 0xde # CHECK:        fill.d  $w27, $9
diff --git a/test/MC/Disassembler/Mips/msa/test_2rf.txt b/test/MC/Disassembler/Mips/msa/test_2rf.txt
new file mode 100644
index 0000000..e004f11
--- /dev/null
+++ b/test/MC/Disassembler/Mips/msa/test_2rf.txt
@@ -0,0 +1,34 @@
+# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips32r2 -mattr=+msa | FileCheck %s
+
+0x7b 0x20 0x66 0x9e # CHECK:        fclass.w        $w26, $w12
+0x7b 0x21 0x8e 0x1e # CHECK:        fclass.d        $w24, $w17
+0x7b 0x30 0x02 0x1e # CHECK:        fexupl.w        $w8, $w0
+0x7b 0x31 0xec 0x5e # CHECK:        fexupl.d        $w17, $w29
+0x7b 0x32 0x23 0x5e # CHECK:        fexupr.w        $w13, $w4
+0x7b 0x33 0x11 0x5e # CHECK:        fexupr.d        $w5, $w2
+0x7b 0x3c 0xed 0x1e # CHECK:        ffint_s.w       $w20, $w29
+0x7b 0x3d 0x7b 0x1e # CHECK:        ffint_s.d       $w12, $w15
+0x7b 0x3e 0xd9 0xde # CHECK:        ffint_u.w       $w7, $w27
+0x7b 0x3f 0x84 0xde # CHECK:        ffint_u.d       $w19, $w16
+0x7b 0x34 0x6f 0xde # CHECK:        ffql.w          $w31, $w13
+0x7b 0x35 0x6b 0x1e # CHECK:        ffql.d          $w12, $w13
+0x7b 0x36 0xf6 0xde # CHECK:        ffqr.w          $w27, $w30
+0x7b 0x37 0x7f 0x9e # CHECK:        ffqr.d          $w30, $w15
+0x7b 0x2e 0xfe 0x5e # CHECK:        flog2.w         $w25, $w31
+0x7b 0x2f 0x54 0x9e # CHECK:        flog2.d         $w18, $w10
+0x7b 0x2c 0x79 0xde # CHECK:        frint.w         $w7, $w15
+0x7b 0x2d 0xb5 0x5e # CHECK:        frint.d         $w21, $w22
+0x7b 0x2a 0x04 0xde # CHECK:        frcp.w          $w19, $w0
+0x7b 0x2b 0x71 0x1e # CHECK:        frcp.d          $w4, $w14
+0x7b 0x28 0x8b 0x1e # CHECK:        frsqrt.w        $w12, $w17
+0x7b 0x29 0x5d 0xde # CHECK:        frsqrt.d        $w23, $w11
+0x7b 0x26 0x58 0x1e # CHECK:        fsqrt.w         $w0, $w11
+0x7b 0x27 0x63 0xde # CHECK:        fsqrt.d         $w15, $w12
+0x7b 0x38 0x2f 0x9e # CHECK:        ftint_s.w       $w30, $w5
+0x7b 0x39 0xb9 0x5e # CHECK:        ftint_s.d       $w5, $w23
+0x7b 0x3a 0x75 0x1e # CHECK:        ftint_u.w       $w20, $w14
+0x7b 0x3b 0xad 0xde # CHECK:        ftint_u.d       $w23, $w21
+0x7b 0x22 0x8f 0x5e # CHECK:        ftrunc_s.w      $w29, $w17
+0x7b 0x23 0xdb 0x1e # CHECK:        ftrunc_s.d      $w12, $w27
+0x7b 0x24 0x7c 0x5e # CHECK:        ftrunc_u.w      $w17, $w15
+0x7b 0x25 0xd9 0x5e # CHECK:        ftrunc_u.d      $w5, $w27
diff --git a/test/MC/Disassembler/Mips/msa/test_3r.txt b/test/MC/Disassembler/Mips/msa/test_3r.txt
new file mode 100644
index 0000000..2ef3a89
--- /dev/null
+++ b/test/MC/Disassembler/Mips/msa/test_3r.txt
@@ -0,0 +1,244 @@
+# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips32r2 -mattr=+msa | FileCheck %s
+
+0x78 0x04 0x4e 0x90 # CHECK:        add_a.b         $w26, $w9, $w4
+0x78 0x3f 0xdd 0xd0 # CHECK:        add_a.h         $w23, $w27, $w31
+0x78 0x56 0x32 0xd0 # CHECK:        add_a.w         $w11, $w6, $w22
+0x78 0x60 0x51 0x90 # CHECK:        add_a.d         $w6, $w10, $w0
+0x78 0x93 0xc4 0xd0 # CHECK:        adds_a.b        $w19, $w24, $w19
+0x78 0xa4 0x36 0x50 # CHECK:        adds_a.h        $w25, $w6, $w4
+0x78 0xdb 0x8e 0x50 # CHECK:        adds_a.w        $w25, $w17, $w27
+0x78 0xfa 0x93 0xd0 # CHECK:        adds_a.d        $w15, $w18, $w26
+0x79 0x13 0x5f 0x50 # CHECK:        adds_s.b        $w29, $w11, $w19
+0x79 0x3a 0xb9 0x50 # CHECK:        adds_s.h        $w5, $w23, $w26
+0x79 0x4d 0x74 0x10 # CHECK:        adds_s.w        $w16, $w14, $w13
+0x79 0x7c 0x70 0x90 # CHECK:        adds_s.d        $w2, $w14, $w28
+0x79 0x8e 0x88 0xd0 # CHECK:        adds_u.b        $w3, $w17, $w14
+0x79 0xa4 0xf2 0x90 # CHECK:        adds_u.h        $w10, $w30, $w4
+0x79 0xd4 0x93 0xd0 # CHECK:        adds_u.w        $w15, $w18, $w20
+0x79 0xe9 0x57 0x90 # CHECK:        adds_u.d        $w30, $w10, $w9
+0x78 0x15 0xa6 0x0e # CHECK:        addv.b          $w24, $w20, $w21
+0x78 0x3b 0x69 0x0e # CHECK:        addv.h          $w4, $w13, $w27
+0x78 0x4e 0x5c 0xce # CHECK:        addv.w          $w19, $w11, $w14
+0x78 0x7f 0xa8 0x8e # CHECK:        addv.d          $w2, $w21, $w31
+0x7a 0x03 0x85 0xd1 # CHECK:        asub_s.b        $w23, $w16, $w3
+0x7a 0x39 0x8d 0x91 # CHECK:        asub_s.h        $w22, $w17, $w25
+0x7a 0x49 0x0e 0x11 # CHECK:        asub_s.w        $w24, $w1, $w9
+0x7a 0x6c 0x63 0x51 # CHECK:        asub_s.d        $w13, $w12, $w12
+0x7a 0x8b 0xea 0x91 # CHECK:        asub_u.b        $w10, $w29, $w11
+0x7a 0xaf 0x4c 0x91 # CHECK:        asub_u.h        $w18, $w9, $w15
+0x7a 0xdf 0x9a 0x91 # CHECK:        asub_u.w        $w10, $w19, $w31
+0x7a 0xe0 0x54 0x51 # CHECK:        asub_u.d        $w17, $w10, $w0
+0x7a 0x01 0x28 0x90 # CHECK:        ave_s.b         $w2, $w5, $w1
+0x7a 0x29 0x9c 0x10 # CHECK:        ave_s.h         $w16, $w19, $w9
+0x7a 0x45 0xfc 0x50 # CHECK:        ave_s.w         $w17, $w31, $w5
+0x7a 0x6a 0xce 0xd0 # CHECK:        ave_s.d         $w27, $w25, $w10
+0x7a 0x89 0x9c 0x10 # CHECK:        ave_u.b         $w16, $w19, $w9
+0x7a 0xab 0xe7 0x10 # CHECK:        ave_u.h         $w28, $w28, $w11
+0x7a 0xcb 0x62 0xd0 # CHECK:        ave_u.w         $w11, $w12, $w11
+0x7a 0xfc 0x9f 0x90 # CHECK:        ave_u.d         $w30, $w19, $w28
+0x7b 0x02 0x86 0x90 # CHECK:        aver_s.b        $w26, $w16, $w2
+0x7b 0x3b 0xdf 0xd0 # CHECK:        aver_s.h        $w31, $w27, $w27
+0x7b 0x59 0x97 0x10 # CHECK:        aver_s.w        $w28, $w18, $w25
+0x7b 0x7b 0xaf 0x50 # CHECK:        aver_s.d        $w29, $w21, $w27
+0x7b 0x83 0xd7 0x50 # CHECK:        aver_u.b        $w29, $w26, $w3
+0x7b 0xa9 0x94 0x90 # CHECK:        aver_u.h        $w18, $w18, $w9
+0x7b 0xdd 0xcc 0x50 # CHECK:        aver_u.w        $w17, $w25, $w29
+0x7b 0xf3 0xb5 0x90 # CHECK:        aver_u.d        $w22, $w22, $w19
+0x79 0x9d 0x78 0x8d # CHECK:        bclr.b          $w2, $w15, $w29
+0x79 0xbc 0xac 0x0d # CHECK:        bclr.h          $w16, $w21, $w28
+0x79 0xc9 0x14 0xcd # CHECK:        bclr.w          $w19, $w2, $w9
+0x79 0xe4 0xfe 0xcd # CHECK:        bclr.d          $w27, $w31, $w4
+0x7b 0x18 0x81 0x4d # CHECK:        binsl.b         $w5, $w16, $w24
+0x7b 0x2a 0x2f 0x8d # CHECK:        binsl.h         $w30, $w5, $w10
+0x7b 0x4d 0x7b 0x8d # CHECK:        binsl.w         $w14, $w15, $w13
+0x7b 0x6c 0xa5 0xcd # CHECK:        binsl.d         $w23, $w20, $w12
+0x7b 0x82 0x5d 0x8d # CHECK:        binsr.b         $w22, $w11, $w2
+0x7b 0xa6 0xd0 0x0d # CHECK:        binsr.h         $w0, $w26, $w6
+0x7b 0xdc 0x1e 0x8d # CHECK:        binsr.w         $w26, $w3, $w28
+0x7b 0xf5 0x00 0x0d # CHECK:        binsr.d         $w0, $w0, $w21
+0x7a 0x98 0x58 0x0d # CHECK:        bneg.b          $w0, $w11, $w24
+0x7a 0xa4 0x87 0x0d # CHECK:        bneg.h          $w28, $w16, $w4
+0x7a 0xd3 0xd0 0xcd # CHECK:        bneg.w          $w3, $w26, $w19
+0x7a 0xef 0xeb 0x4d # CHECK:        bneg.d          $w13, $w29, $w15
+0x7a 0x1f 0x2f 0xcd # CHECK:        bset.b          $w31, $w5, $w31
+0x7a 0x26 0x63 0x8d # CHECK:        bset.h          $w14, $w12, $w6
+0x7a 0x4c 0x4f 0xcd # CHECK:        bset.w          $w31, $w9, $w12
+0x7a 0x65 0xb1 0x4d # CHECK:        bset.d          $w5, $w22, $w5
+0x78 0x12 0xff 0xcf # CHECK:        ceq.b           $w31, $w31, $w18
+0x78 0x29 0xda 0x8f # CHECK:        ceq.h           $w10, $w27, $w9
+0x78 0x4e 0x2a 0x4f # CHECK:        ceq.w           $w9, $w5, $w14
+0x78 0x60 0x89 0x4f # CHECK:        ceq.d           $w5, $w17, $w0
+0x7a 0x09 0x25 0xcf # CHECK:        cle_s.b         $w23, $w4, $w9
+0x7a 0x33 0xdd 0x8f # CHECK:        cle_s.h         $w22, $w27, $w19
+0x7a 0x4a 0xd7 0x8f # CHECK:        cle_s.w         $w30, $w26, $w10
+0x7a 0x6a 0x2c 0x8f # CHECK:        cle_s.d         $w18, $w5, $w10
+0x7a 0x80 0xc8 0x4f # CHECK:        cle_u.b         $w1, $w25, $w0
+0x7a 0xbd 0x01 0xcf # CHECK:        cle_u.h         $w7, $w0, $w29
+0x7a 0xc1 0x96 0x4f # CHECK:        cle_u.w         $w25, $w18, $w1
+0x7a 0xfe 0x01 0x8f # CHECK:        cle_u.d         $w6, $w0, $w30
+0x79 0x15 0x16 0x4f # CHECK:        clt_s.b         $w25, $w2, $w21
+0x79 0x29 0x98 0x8f # CHECK:        clt_s.h         $w2, $w19, $w9
+0x79 0x50 0x45 0xcf # CHECK:        clt_s.w         $w23, $w8, $w16
+0x79 0x6c 0xf1 0xcf # CHECK:        clt_s.d         $w7, $w30, $w12
+0x79 0x8d 0xf8 0x8f # CHECK:        clt_u.b         $w2, $w31, $w13
+0x79 0xb7 0xfc 0x0f # CHECK:        clt_u.h         $w16, $w31, $w23
+0x79 0xc9 0xc0 0xcf # CHECK:        clt_u.w         $w3, $w24, $w9
+0x79 0xe1 0x01 0xcf # CHECK:        clt_u.d         $w7, $w0, $w1
+0x7a 0x12 0x1f 0x52 # CHECK:        div_s.b         $w29, $w3, $w18
+0x7a 0x2d 0x84 0x52 # CHECK:        div_s.h         $w17, $w16, $w13
+0x7a 0x5e 0xc9 0x12 # CHECK:        div_s.w         $w4, $w25, $w30
+0x7a 0x74 0x4f 0xd2 # CHECK:        div_s.d         $w31, $w9, $w20
+0x7a 0x8a 0xe9 0x92 # CHECK:        div_u.b         $w6, $w29, $w10
+0x7a 0xae 0xae 0x12 # CHECK:        div_u.h         $w24, $w21, $w14
+0x7a 0xd9 0x77 0x52 # CHECK:        div_u.w         $w29, $w14, $w25
+0x7a 0xf5 0x0f 0xd2 # CHECK:        div_u.d         $w31, $w1, $w21
+0x78 0x39 0xb5 0xd3 # CHECK:        dotp_s.h        $w23, $w22, $w25
+0x78 0x45 0x75 0x13 # CHECK:        dotp_s.w        $w20, $w14, $w5
+0x78 0x76 0x14 0x53 # CHECK:        dotp_s.d        $w17, $w2, $w22
+0x78 0xa6 0x13 0x53 # CHECK:        dotp_u.h        $w13, $w2, $w6
+0x78 0xd5 0xb3 0xd3 # CHECK:        dotp_u.w        $w15, $w22, $w21
+0x78 0xfa 0x81 0x13 # CHECK:        dotp_u.d        $w4, $w16, $w26
+0x79 0x36 0xe0 0x53 # CHECK:        dpadd_s.h       $w1, $w28, $w22
+0x79 0x4c 0x0a 0x93 # CHECK:        dpadd_s.w       $w10, $w1, $w12
+0x79 0x7b 0xa8 0xd3 # CHECK:        dpadd_s.d       $w3, $w21, $w27
+0x79 0xb4 0x2c 0x53 # CHECK:        dpadd_u.h       $w17, $w5, $w20
+0x79 0xd0 0x46 0x13 # CHECK:        dpadd_u.w       $w24, $w8, $w16
+0x79 0xf0 0xeb 0xd3 # CHECK:        dpadd_u.d       $w15, $w29, $w16
+0x7a 0x2c 0x59 0x13 # CHECK:        dpsub_s.h       $w4, $w11, $w12
+0x7a 0x46 0x39 0x13 # CHECK:        dpsub_s.w       $w4, $w7, $w6
+0x7a 0x7c 0x67 0xd3 # CHECK:        dpsub_s.d       $w31, $w12, $w28
+0x7a 0xb1 0xc9 0x13 # CHECK:        dpsub_u.h       $w4, $w25, $w17
+0x7a 0xd0 0xcc 0xd3 # CHECK:        dpsub_u.w       $w19, $w25, $w16
+0x7a 0xfa 0x51 0xd3 # CHECK:        dpsub_u.d       $w7, $w10, $w26
+0x7a 0x22 0xc7 0x15 # CHECK:        hadd_s.h        $w28, $w24, $w2
+0x7a 0x4b 0x8e 0x15 # CHECK:        hadd_s.w        $w24, $w17, $w11
+0x7a 0x74 0x7c 0x55 # CHECK:        hadd_s.d        $w17, $w15, $w20
+0x7a 0xb1 0xeb 0x15 # CHECK:        hadd_u.h        $w12, $w29, $w17
+0x7a 0xc6 0x2a 0x55 # CHECK:        hadd_u.w        $w9, $w5, $w6
+0x7a 0xe6 0xa0 0x55 # CHECK:        hadd_u.d        $w1, $w20, $w6
+0x7b 0x3d 0x74 0x15 # CHECK:        hsub_s.h        $w16, $w14, $w29
+0x7b 0x4b 0x6a 0x55 # CHECK:        hsub_s.w        $w9, $w13, $w11
+0x7b 0x6e 0x97 0x95 # CHECK:        hsub_s.d        $w30, $w18, $w14
+0x7b 0xae 0x61 0xd5 # CHECK:        hsub_u.h        $w7, $w12, $w14
+0x7b 0xc5 0x2d 0x55 # CHECK:        hsub_u.w        $w21, $w5, $w5
+0x7b 0xff 0x62 0xd5 # CHECK:        hsub_u.d        $w11, $w12, $w31
+0x7b 0x1e 0x84 0x94 # CHECK:        ilvev.b         $w18, $w16, $w30
+0x7b 0x2d 0x03 0x94 # CHECK:        ilvev.h         $w14, $w0, $w13
+0x7b 0x56 0xcb 0x14 # CHECK:        ilvev.w         $w12, $w25, $w22
+0x7b 0x63 0xdf 0x94 # CHECK:        ilvev.d         $w30, $w27, $w3
+0x7a 0x15 0x1f 0x54 # CHECK:        ilvl.b          $w29, $w3, $w21
+0x7a 0x31 0x56 0xd4 # CHECK:        ilvl.h          $w27, $w10, $w17
+0x7a 0x40 0x09 0x94 # CHECK:        ilvl.w          $w6, $w1, $w0
+0x7a 0x78 0x80 0xd4 # CHECK:        ilvl.d          $w3, $w16, $w24
+0x7b 0x94 0x2a 0xd4 # CHECK:        ilvod.b         $w11, $w5, $w20
+0x7b 0xbf 0x6c 0x94 # CHECK:        ilvod.h         $w18, $w13, $w31
+0x7b 0xd8 0x87 0x54 # CHECK:        ilvod.w         $w29, $w16, $w24
+0x7b 0xfd 0x65 0x94 # CHECK:        ilvod.d         $w22, $w12, $w29
+0x7a 0x86 0xf1 0x14 # CHECK:        ilvr.b          $w4, $w30, $w6
+0x7a 0xbd 0x9f 0x14 # CHECK:        ilvr.h          $w28, $w19, $w29
+0x7a 0xd5 0xa4 0x94 # CHECK:        ilvr.w          $w18, $w20, $w21
+0x7a 0xec 0xf5 0xd4 # CHECK:        ilvr.d          $w23, $w30, $w12
+0x78 0x9d 0xfc 0x52 # CHECK:        maddv.b         $w17, $w31, $w29
+0x78 0xa9 0xc1 0xd2 # CHECK:        maddv.h         $w7, $w24, $w9
+0x78 0xd4 0xb5 0x92 # CHECK:        maddv.w         $w22, $w22, $w20
+0x78 0xf4 0xd7 0x92 # CHECK:        maddv.d         $w30, $w26, $w20
+0x7b 0x17 0x5d 0xce # CHECK:        max_a.b         $w23, $w11, $w23
+0x7b 0x3e 0x2d 0x0e # CHECK:        max_a.h         $w20, $w5, $w30
+0x7b 0x5e 0x91 0xce # CHECK:        max_a.w         $w7, $w18, $w30
+0x7b 0x7f 0x42 0x0e # CHECK:        max_a.d         $w8, $w8, $w31
+0x79 0x13 0x0a 0x8e # CHECK:        max_s.b         $w10, $w1, $w19
+0x79 0x31 0xeb 0xce # CHECK:        max_s.h         $w15, $w29, $w17
+0x79 0x4e 0xeb 0xce # CHECK:        max_s.w         $w15, $w29, $w14
+0x79 0x63 0xc6 0x4e # CHECK:        max_s.d         $w25, $w24, $w3
+0x79 0x85 0xc3 0x0e # CHECK:        max_u.b         $w12, $w24, $w5
+0x79 0xa7 0x31 0x4e # CHECK:        max_u.h         $w5, $w6, $w7
+0x79 0xc7 0x24 0x0e # CHECK:        max_u.w         $w16, $w4, $w7
+0x79 0xf8 0x66 0x8e # CHECK:        max_u.d         $w26, $w12, $w24
+0x7b 0x81 0xd1 0x0e # CHECK:        min_a.b         $w4, $w26, $w1
+0x7b 0xbf 0x6b 0x0e # CHECK:        min_a.h         $w12, $w13, $w31
+0x7b 0xc0 0xa7 0x0e # CHECK:        min_a.w         $w28, $w20, $w0
+0x7b 0xf3 0xa3 0x0e # CHECK:        min_a.d         $w12, $w20, $w19
+0x7a 0x0e 0x1c 0xce # CHECK:        min_s.b         $w19, $w3, $w14
+0x7a 0x28 0xae 0xce # CHECK:        min_s.h         $w27, $w21, $w8
+0x7a 0x5e 0x70 0x0e # CHECK:        min_s.w         $w0, $w14, $w30
+0x7a 0x75 0x41 0x8e # CHECK:        min_s.d         $w6, $w8, $w21
+0x7a 0x88 0xd5 0x8e # CHECK:        min_u.b         $w22, $w26, $w8
+0x7a 0xac 0xd9 0xce # CHECK:        min_u.h         $w7, $w27, $w12
+0x7a 0xce 0xa2 0x0e # CHECK:        min_u.w         $w8, $w20, $w14
+0x7a 0xef 0x76 0x8e # CHECK:        min_u.d         $w26, $w14, $w15
+0x7b 0x1a 0x0c 0x92 # CHECK:        mod_s.b         $w18, $w1, $w26
+0x7b 0x3c 0xf7 0xd2 # CHECK:        mod_s.h         $w31, $w30, $w28
+0x7b 0x4d 0x30 0x92 # CHECK:        mod_s.w         $w2, $w6, $w13
+0x7b 0x76 0xdd 0x52 # CHECK:        mod_s.d         $w21, $w27, $w22
+0x7b 0x8d 0x3c 0x12 # CHECK:        mod_u.b         $w16, $w7, $w13
+0x7b 0xa7 0x46 0x12 # CHECK:        mod_u.h         $w24, $w8, $w7
+0x7b 0xd1 0x17 0x92 # CHECK:        mod_u.w         $w30, $w2, $w17
+0x7b 0xf9 0x17 0xd2 # CHECK:        mod_u.d         $w31, $w2, $w25
+0x79 0x0c 0x2b 0x92 # CHECK:        msubv.b         $w14, $w5, $w12
+0x79 0x3e 0x39 0x92 # CHECK:        msubv.h         $w6, $w7, $w30
+0x79 0x55 0x13 0x52 # CHECK:        msubv.w         $w13, $w2, $w21
+0x79 0x7b 0x74 0x12 # CHECK:        msubv.d         $w16, $w14, $w27
+0x78 0x0d 0x1d 0x12 # CHECK:        mulv.b          $w20, $w3, $w13
+0x78 0x2e 0xd6 0xd2 # CHECK:        mulv.h          $w27, $w26, $w14
+0x78 0x43 0xea 0x92 # CHECK:        mulv.w          $w10, $w29, $w3
+0x78 0x7d 0x99 0xd2 # CHECK:        mulv.d          $w7, $w19, $w29
+0x79 0x07 0xd9 0x54 # CHECK:        pckev.b         $w5, $w27, $w7
+0x79 0x3b 0x20 0x54 # CHECK:        pckev.h         $w1, $w4, $w27
+0x79 0x40 0xa7 0x94 # CHECK:        pckev.w         $w30, $w20, $w0
+0x79 0x6f 0x09 0x94 # CHECK:        pckev.d         $w6, $w1, $w15
+0x79 0x9e 0xe4 0x94 # CHECK:        pckod.b         $w18, $w28, $w30
+0x79 0xa8 0x2e 0x94 # CHECK:        pckod.h         $w26, $w5, $w8
+0x79 0xc2 0x22 0x54 # CHECK:        pckod.w         $w9, $w4, $w2
+0x79 0xf4 0xb7 0x94 # CHECK:        pckod.d         $w30, $w22, $w20
+0x78 0x0c 0xb9 0x54 # CHECK:        sld.b           $w5, $w23[$12]
+0x78 0x23 0xb8 0x54 # CHECK:        sld.h           $w1, $w23[$3]
+0x78 0x49 0x45 0x14 # CHECK:        sld.w           $w20, $w8[$9]
+0x78 0x7e 0xb9 0xd4 # CHECK:        sld.d           $w7, $w23[$fp]
+0x78 0x11 0x00 0xcd # CHECK:        sll.b           $w3, $w0, $w17
+0x78 0x23 0xdc 0x4d # CHECK:        sll.h           $w17, $w27, $w3
+0x78 0x46 0x3c 0x0d # CHECK:        sll.w           $w16, $w7, $w6
+0x78 0x7a 0x02 0x4d # CHECK:        sll.d           $w9, $w0, $w26
+0x78 0x81 0x0f 0x14 # CHECK:        splat.b         $w28, $w1[$1]
+0x78 0xab 0x58 0x94 # CHECK:        splat.h         $w2, $w11[$11]
+0x78 0xcb 0x05 0x94 # CHECK:        splat.w         $w22, $w0[$11]
+0x78 0xe2 0x00 0x14 # CHECK:        splat.d         $w0, $w0[$2]
+0x78 0x91 0x27 0x0d # CHECK:        sra.b           $w28, $w4, $w17
+0x78 0xa3 0x4b 0x4d # CHECK:        sra.h           $w13, $w9, $w3
+0x78 0xd3 0xae 0xcd # CHECK:        sra.w           $w27, $w21, $w19
+0x78 0xf7 0x47 0x8d # CHECK:        sra.d           $w30, $w8, $w23
+0x78 0x92 0x94 0xd5 # CHECK:        srar.b          $w19, $w18, $w18
+0x78 0xa8 0xb9 0xd5 # CHECK:        srar.h          $w7, $w23, $w8
+0x78 0xc2 0x60 0x55 # CHECK:        srar.w          $w1, $w12, $w2
+0x78 0xee 0x3d 0x55 # CHECK:        srar.d          $w21, $w7, $w14
+0x79 0x13 0x1b 0x0d # CHECK:        srl.b           $w12, $w3, $w19
+0x79 0x34 0xfd 0xcd # CHECK:        srl.h           $w23, $w31, $w20
+0x79 0x4b 0xdc 0x8d # CHECK:        srl.w           $w18, $w27, $w11
+0x79 0x7a 0x60 0xcd # CHECK:        srl.d           $w3, $w12, $w26
+0x79 0x0b 0xab 0xd5 # CHECK:        srlr.b          $w15, $w21, $w11
+0x79 0x33 0x6d 0x55 # CHECK:        srlr.h          $w21, $w13, $w19
+0x79 0x43 0xf1 0x95 # CHECK:        srlr.w          $w6, $w30, $w3
+0x79 0x6e 0x10 0x55 # CHECK:        srlr.d          $w1, $w2, $w14
+0x78 0x01 0x7e 0x51 # CHECK:        subs_s.b        $w25, $w15, $w1
+0x78 0x36 0xcf 0x11 # CHECK:        subs_s.h        $w28, $w25, $w22
+0x78 0x55 0x62 0x91 # CHECK:        subs_s.w        $w10, $w12, $w21
+0x78 0x72 0xa1 0x11 # CHECK:        subs_s.d        $w4, $w20, $w18
+0x78 0x99 0x35 0x51 # CHECK:        subs_u.b        $w21, $w6, $w25
+0x78 0xa7 0x50 0xd1 # CHECK:        subs_u.h        $w3, $w10, $w7
+0x78 0xca 0x7a 0x51 # CHECK:        subs_u.w        $w9, $w15, $w10
+0x78 0xea 0x99 0xd1 # CHECK:        subs_u.d        $w7, $w19, $w10
+0x79 0x0c 0x39 0x91 # CHECK:        subsus_u.b      $w6, $w7, $w12
+0x79 0x33 0xe9 0x91 # CHECK:        subsus_u.h      $w6, $w29, $w19
+0x79 0x47 0x79 0xd1 # CHECK:        subsus_u.w      $w7, $w15, $w7
+0x79 0x6f 0x1a 0x51 # CHECK:        subsus_u.d      $w9, $w3, $w15
+0x79 0x9f 0x1d 0x91 # CHECK:        subsuu_s.b      $w22, $w3, $w31
+0x79 0xb6 0xbc 0xd1 # CHECK:        subsuu_s.h      $w19, $w23, $w22
+0x79 0xcd 0x52 0x51 # CHECK:        subsuu_s.w      $w9, $w10, $w13
+0x79 0xe0 0x31 0x51 # CHECK:        subsuu_s.d      $w5, $w6, $w0
+0x78 0x93 0x69 0x8e # CHECK:        subv.b          $w6, $w13, $w19
+0x78 0xac 0xc9 0x0e # CHECK:        subv.h          $w4, $w25, $w12
+0x78 0xcb 0xde 0xce # CHECK:        subv.w          $w27, $w27, $w11
+0x78 0xea 0xc2 0x4e # CHECK:        subv.d          $w9, $w24, $w10
+0x78 0x05 0x80 0xd5 # CHECK:        vshf.b          $w3, $w16, $w5
+0x78 0x28 0x9d 0x15 # CHECK:        vshf.h          $w20, $w19, $w8
+0x78 0x59 0xf4 0x15 # CHECK:        vshf.w          $w16, $w30, $w25
+0x78 0x6f 0x5c 0xd5 # CHECK:        vshf.d          $w19, $w11, $w15
diff --git a/test/MC/Disassembler/Mips/msa/test_3rf.txt b/test/MC/Disassembler/Mips/msa/test_3rf.txt
new file mode 100644
index 0000000..3b7b07c
--- /dev/null
+++ b/test/MC/Disassembler/Mips/msa/test_3rf.txt
@@ -0,0 +1,84 @@
+# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips32r2 -mattr=+msa | FileCheck %s
+
+0x78 0x1c 0x9f 0x1b # CHECK:        fadd.w          $w28, $w19, $w28
+0x78 0x3d 0x13 0x5b # CHECK:        fadd.d          $w13, $w2, $w29
+0x78 0x19 0x5b 0x9a # CHECK:        fcaf.w          $w14, $w11, $w25
+0x78 0x33 0x08 0x5a # CHECK:        fcaf.d          $w1, $w1, $w19
+0x78 0x90 0xb8 0x5a # CHECK:        fceq.w          $w1, $w23, $w16
+0x78 0xb0 0x40 0x1a # CHECK:        fceq.d          $w0, $w8, $w16
+0x79 0x98 0x4c 0x1a # CHECK:        fcle.w          $w16, $w9, $w24
+0x79 0xa1 0x76 0xda # CHECK:        fcle.d          $w27, $w14, $w1
+0x79 0x08 0x47 0x1a # CHECK:        fclt.w          $w28, $w8, $w8
+0x79 0x2b 0xcf 0x9a # CHECK:        fclt.d          $w30, $w25, $w11
+0x78 0xd7 0x90 0x9c # CHECK:        fcne.w          $w2, $w18, $w23
+0x78 0xef 0xa3 0x9c # CHECK:        fcne.d          $w14, $w20, $w15
+0x78 0x59 0x92 0x9c # CHECK:        fcor.w          $w10, $w18, $w25
+0x78 0x6b 0xcc 0x5c # CHECK:        fcor.d          $w17, $w25, $w11
+0x78 0xd5 0x13 0x9a # CHECK:        fcueq.w         $w14, $w2, $w21
+0x78 0xe7 0x1f 0x5a # CHECK:        fcueq.d         $w29, $w3, $w7
+0x79 0xc3 0x2c 0x5a # CHECK:        fcule.w         $w17, $w5, $w3
+0x79 0xfe 0x0f 0xda # CHECK:        fcule.d         $w31, $w1, $w30
+0x79 0x49 0xc9 0x9a # CHECK:        fcult.w         $w6, $w25, $w9
+0x79 0x71 0x46 0xda # CHECK:        fcult.d         $w27, $w8, $w17
+0x78 0x48 0xa1 0x1a # CHECK:        fcun.w          $w4, $w20, $w8
+0x78 0x63 0x5f 0x5a # CHECK:        fcun.d          $w29, $w11, $w3
+0x78 0x93 0x93 0x5c # CHECK:        fcune.w         $w13, $w18, $w19
+0x78 0xb5 0xd4 0x1c # CHECK:        fcune.d         $w16, $w26, $w21
+0x78 0xc2 0xc3 0x5b # CHECK:        fdiv.w          $w13, $w24, $w2
+0x78 0xf9 0x24 0xdb # CHECK:        fdiv.d          $w19, $w4, $w25
+0x7a 0x10 0x02 0x1b # CHECK:        fexdo.h         $w8, $w0, $w16
+0x7a 0x3b 0x68 0x1b # CHECK:        fexdo.w         $w0, $w13, $w27
+0x79 0xc3 0x04 0x5b # CHECK:        fexp2.w         $w17, $w0, $w3
+0x79 0xea 0x05 0x9b # CHECK:        fexp2.d         $w22, $w0, $w10
+0x79 0x17 0x37 0x5b # CHECK:        fmadd.w         $w29, $w6, $w23
+0x79 0x35 0xe2 0xdb # CHECK:        fmadd.d         $w11, $w28, $w21
+0x7b 0x8d 0xb8 0x1b # CHECK:        fmax.w          $w0, $w23, $w13
+0x7b 0xa8 0x96 0x9b # CHECK:        fmax.d          $w26, $w18, $w8
+0x7b 0xca 0x82 0x9b # CHECK:        fmax_a.w        $w10, $w16, $w10
+0x7b 0xf6 0x4f 0x9b # CHECK:        fmax_a.d        $w30, $w9, $w22
+0x7b 0x1e 0x0e 0x1b # CHECK:        fmin.w          $w24, $w1, $w30
+0x7b 0x2a 0xde 0xdb # CHECK:        fmin.d          $w27, $w27, $w10
+0x7b 0x54 0xea 0x9b # CHECK:        fmin_a.w        $w10, $w29, $w20
+0x7b 0x78 0xf3 0x5b # CHECK:        fmin_a.d        $w13, $w30, $w24
+0x79 0x40 0xcc 0x5b # CHECK:        fmsub.w         $w17, $w25, $w0
+0x79 0x70 0x92 0x1b # CHECK:        fmsub.d         $w8, $w18, $w16
+0x78 0x8f 0x78 0xdb # CHECK:        fmul.w          $w3, $w15, $w15
+0x78 0xaa 0xf2 0x5b # CHECK:        fmul.d          $w9, $w30, $w10
+0x7a 0x0a 0x2e 0x5a # CHECK:        fsaf.w          $w25, $w5, $w10
+0x7a 0x3d 0x1e 0x5a # CHECK:        fsaf.d          $w25, $w3, $w29
+0x7a 0x8d 0x8a 0xda # CHECK:        fseq.w          $w11, $w17, $w13
+0x7a 0xbf 0x07 0x5a # CHECK:        fseq.d          $w29, $w0, $w31
+0x7b 0x9f 0xff 0x9a # CHECK:        fsle.w          $w30, $w31, $w31
+0x7b 0xb8 0xbc 0x9a # CHECK:        fsle.d          $w18, $w23, $w24
+0x7b 0x06 0x2b 0x1a # CHECK:        fslt.w          $w12, $w5, $w6
+0x7b 0x35 0xd4 0x1a # CHECK:        fslt.d          $w16, $w26, $w21
+0x7a 0xcc 0x0f 0x9c # CHECK:        fsne.w          $w30, $w1, $w12
+0x7a 0xf7 0x6b 0x9c # CHECK:        fsne.d          $w14, $w13, $w23
+0x7a 0x5b 0x6e 0xdc # CHECK:        fsor.w          $w27, $w13, $w27
+0x7a 0x6b 0xc3 0x1c # CHECK:        fsor.d          $w12, $w24, $w11
+0x78 0x41 0xd7 0xdb # CHECK:        fsub.w          $w31, $w26, $w1
+0x78 0x7b 0x8c 0xdb # CHECK:        fsub.d          $w19, $w17, $w27
+0x7a 0xd9 0xc4 0x1a # CHECK:        fsueq.w         $w16, $w24, $w25
+0x7a 0xee 0x74 0x9a # CHECK:        fsueq.d         $w18, $w14, $w14
+0x7b 0xcd 0xf5 0xda # CHECK:        fsule.w         $w23, $w30, $w13
+0x7b 0xfa 0x58 0x9a # CHECK:        fsule.d         $w2, $w11, $w26
+0x7b 0x56 0xd2 0xda # CHECK:        fsult.w         $w11, $w26, $w22
+0x7b 0x7e 0xb9 0x9a # CHECK:        fsult.d         $w6, $w23, $w30
+0x7a 0x5c 0x90 0xda # CHECK:        fsun.w          $w3, $w18, $w28
+0x7a 0x73 0x5c 0x9a # CHECK:        fsun.d          $w18, $w11, $w19
+0x7a 0x82 0xfc 0x1c # CHECK:        fsune.w         $w16, $w31, $w2
+0x7a 0xb1 0xd0 0xdc # CHECK:        fsune.d         $w3, $w26, $w17
+0x7a 0x98 0x24 0x1b # CHECK:        ftq.h           $w16, $w4, $w24
+0x7a 0xb9 0x29 0x5b # CHECK:        ftq.w           $w5, $w5, $w25
+0x79 0x4a 0xa4 0x1c # CHECK:        madd_q.h        $w16, $w20, $w10
+0x79 0x69 0x17 0x1c # CHECK:        madd_q.w        $w28, $w2, $w9
+0x7b 0x49 0x92 0x1c # CHECK:        maddr_q.h       $w8, $w18, $w9
+0x7b 0x70 0x67 0x5c # CHECK:        maddr_q.w       $w29, $w12, $w16
+0x79 0x8a 0xd6 0x1c # CHECK:        msub_q.h        $w24, $w26, $w10
+0x79 0xbc 0xf3 0x5c # CHECK:        msub_q.w        $w13, $w30, $w28
+0x7b 0x8b 0xab 0x1c # CHECK:        msubr_q.h       $w12, $w21, $w11
+0x7b 0xb4 0x70 0x5c # CHECK:        msubr_q.w       $w1, $w14, $w20
+0x79 0x1e 0x81 0x9c # CHECK:        mul_q.h         $w6, $w16, $w30
+0x79 0x24 0x0c 0x1c # CHECK:        mul_q.w         $w16, $w1, $w4
+0x7b 0x13 0xa1 0x9c # CHECK:        mulr_q.h        $w6, $w20, $w19
+0x7b 0x34 0x0e 0xdc # CHECK:        mulr_q.w        $w27, $w1, $w20
diff --git a/test/MC/Disassembler/Mips/msa/test_bit.txt b/test/MC/Disassembler/Mips/msa/test_bit.txt
new file mode 100644
index 0000000..422d71e
--- /dev/null
+++ b/test/MC/Disassembler/Mips/msa/test_bit.txt
@@ -0,0 +1,50 @@
+# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips32r2 -mattr=+msa | FileCheck %s
+
+0x79 0xf2 0xf5 0x49 # CHECK:        bclri.b         $w21, $w30, 2
+0x79 0xe0 0xae 0x09 # CHECK:        bclri.h         $w24, $w21, 0
+0x79 0xc3 0xf5 0xc9 # CHECK:        bclri.w         $w23, $w30, 3
+0x79 0x80 0x5a 0x49 # CHECK:        bclri.d         $w9, $w11, 0
+0x7b 0x71 0x66 0x49 # CHECK:        binsli.b        $w25, $w12, 1
+0x7b 0x60 0xb5 0x49 # CHECK:        binsli.h        $w21, $w22, 0
+0x7b 0x40 0x25 0x89 # CHECK:        binsli.w        $w22, $w4, 0
+0x7b 0x06 0x11 0x89 # CHECK:        binsli.d        $w6, $w2, 6
+0x7b 0xf0 0x9b 0xc9 # CHECK:        binsri.b        $w15, $w19, 0
+0x7b 0xe1 0xf2 0x09 # CHECK:        binsri.h        $w8, $w30, 1
+0x7b 0xc5 0x98 0x89 # CHECK:        binsri.w        $w2, $w19, 5
+0x7b 0x81 0xa4 0x89 # CHECK:        binsri.d        $w18, $w20, 1
+0x7a 0xf0 0x9e 0x09 # CHECK:        bnegi.b         $w24, $w19, 0
+0x7a 0xe3 0x5f 0x09 # CHECK:        bnegi.h         $w28, $w11, 3
+0x7a 0xc5 0xd8 0x49 # CHECK:        bnegi.w         $w1, $w27, 5
+0x7a 0x81 0xa9 0x09 # CHECK:        bnegi.d         $w4, $w21, 1
+0x7a 0x70 0x44 0x89 # CHECK:        bseti.b         $w18, $w8, 0
+0x7a 0x62 0x76 0x09 # CHECK:        bseti.h         $w24, $w14, 2
+0x7a 0x44 0x92 0x49 # CHECK:        bseti.w         $w9, $w18, 4
+0x7a 0x01 0x79 0xc9 # CHECK:        bseti.d         $w7, $w15, 1
+0x78 0x72 0xff 0xca # CHECK:        sat_s.b         $w31, $w31, 2
+0x78 0x60 0x9c 0xca # CHECK:        sat_s.h         $w19, $w19, 0
+0x78 0x40 0xec 0xca # CHECK:        sat_s.w         $w19, $w29, 0
+0x78 0x00 0xb2 0xca # CHECK:        sat_s.d         $w11, $w22, 0
+0x78 0xf3 0x68 0x4a # CHECK:        sat_u.b         $w1, $w13, 3
+0x78 0xe4 0xc7 0x8a # CHECK:        sat_u.h         $w30, $w24, 4
+0x78 0xc0 0x6f 0xca # CHECK:        sat_u.w         $w31, $w13, 0
+0x78 0x85 0x87 0x4a # CHECK:        sat_u.d         $w29, $w16, 5
+0x78 0x71 0x55 0xc9 # CHECK:        slli.b          $w23, $w10, 1
+0x78 0x61 0x92 0x49 # CHECK:        slli.h          $w9, $w18, 1
+0x78 0x44 0xea 0xc9 # CHECK:        slli.w          $w11, $w29, 4
+0x78 0x01 0xa6 0x49 # CHECK:        slli.d          $w25, $w20, 1
+0x78 0xf1 0xee 0x09 # CHECK:        srai.b          $w24, $w29, 1
+0x78 0xe0 0x30 0x49 # CHECK:        srai.h          $w1, $w6, 0
+0x78 0xc1 0xd1 0xc9 # CHECK:        srai.w          $w7, $w26, 1
+0x78 0x83 0xcd 0x09 # CHECK:        srai.d          $w20, $w25, 3
+0x79 0x70 0xc9 0x4a # CHECK:        srari.b         $w5, $w25, 0
+0x79 0x64 0x31 0xca # CHECK:        srari.h         $w7, $w6, 4
+0x79 0x45 0x5c 0x4a # CHECK:        srari.w         $w17, $w11, 5
+0x79 0x05 0xcd 0x4a # CHECK:        srari.d         $w21, $w25, 5
+0x79 0x72 0x00 0x89 # CHECK:        srli.b          $w2, $w0, 2
+0x79 0x62 0xff 0xc9 # CHECK:        srli.h          $w31, $w31, 2
+0x79 0x44 0x49 0x49 # CHECK:        srli.w          $w5, $w9, 4
+0x79 0x05 0xd6 0xc9 # CHECK:        srli.d          $w27, $w26, 5
+0x79 0xf0 0x1c 0x8a # CHECK:        srlri.b         $w18, $w3, 0
+0x79 0xe3 0x10 0x4a # CHECK:        srlri.h         $w1, $w2, 3
+0x79 0xc2 0xb2 0xca # CHECK:        srlri.w         $w11, $w22, 2
+0x79 0x86 0x56 0x0a # CHECK:        srlri.d         $w24, $w10, 6
diff --git a/test/MC/Disassembler/Mips/msa/test_ctrlregs.txt b/test/MC/Disassembler/Mips/msa/test_ctrlregs.txt
new file mode 100644
index 0000000..fb5b0be
--- /dev/null
+++ b/test/MC/Disassembler/Mips/msa/test_ctrlregs.txt
@@ -0,0 +1,35 @@
+# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips32r2 -mattr=+msa | FileCheck %s
+
+0x78 0x7e 0x00 0x59 # CHECK:  cfcmsa       $1, $0
+0x78 0x7e 0x00 0x59 # CHECK:  cfcmsa       $1, $0
+0x78 0x7e 0x08 0x99 # CHECK:  cfcmsa       $2, $1
+0x78 0x7e 0x08 0x99 # CHECK:  cfcmsa       $2, $1
+0x78 0x7e 0x10 0xd9 # CHECK:  cfcmsa       $3, $2
+0x78 0x7e 0x10 0xd9 # CHECK:  cfcmsa       $3, $2
+0x78 0x7e 0x19 0x19 # CHECK:  cfcmsa       $4, $3
+0x78 0x7e 0x19 0x19 # CHECK:  cfcmsa       $4, $3
+0x78 0x7e 0x21 0x59 # CHECK:  cfcmsa       $5, $4
+0x78 0x7e 0x21 0x59 # CHECK:  cfcmsa       $5, $4
+0x78 0x7e 0x29 0x99 # CHECK:  cfcmsa       $6, $5
+0x78 0x7e 0x29 0x99 # CHECK:  cfcmsa       $6, $5
+0x78 0x7e 0x31 0xd9 # CHECK:  cfcmsa       $7, $6
+0x78 0x7e 0x31 0xd9 # CHECK:  cfcmsa       $7, $6
+0x78 0x7e 0x3a 0x19 # CHECK:  cfcmsa       $8, $7
+0x78 0x7e 0x3a 0x19 # CHECK:  cfcmsa       $8, $7
+
+0x78 0x3e 0x08 0x19 # CHECK:  ctcmsa       $0, $1
+0x78 0x3e 0x08 0x19 # CHECK:  ctcmsa       $0, $1
+0x78 0x3e 0x10 0x59 # CHECK:  ctcmsa       $1, $2
+0x78 0x3e 0x10 0x59 # CHECK:  ctcmsa       $1, $2
+0x78 0x3e 0x18 0x99 # CHECK:  ctcmsa       $2, $3
+0x78 0x3e 0x18 0x99 # CHECK:  ctcmsa       $2, $3
+0x78 0x3e 0x20 0xd9 # CHECK:  ctcmsa       $3, $4
+0x78 0x3e 0x20 0xd9 # CHECK:  ctcmsa       $3, $4
+0x78 0x3e 0x29 0x19 # CHECK:  ctcmsa       $4, $5
+0x78 0x3e 0x29 0x19 # CHECK:  ctcmsa       $4, $5
+0x78 0x3e 0x31 0x59 # CHECK:  ctcmsa       $5, $6
+0x78 0x3e 0x31 0x59 # CHECK:  ctcmsa       $5, $6
+0x78 0x3e 0x39 0x99 # CHECK:  ctcmsa       $6, $7
+0x78 0x3e 0x39 0x99 # CHECK:  ctcmsa       $6, $7
+0x78 0x3e 0x41 0xd9 # CHECK:  ctcmsa       $7, $8
+0x78 0x3e 0x41 0xd9 # CHECK:  ctcmsa       $7, $8
diff --git a/test/MC/Disassembler/Mips/msa/test_dlsa.txt b/test/MC/Disassembler/Mips/msa/test_dlsa.txt
new file mode 100644
index 0000000..2a1d90b
--- /dev/null
+++ b/test/MC/Disassembler/Mips/msa/test_dlsa.txt
@@ -0,0 +1,6 @@
+# RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux -mcpu=mips64r2 -mattr=+msa | FileCheck %s
+
+0x01 0x2a 0x40 0x15 # CHECK: dlsa        $8, $9, $10, 1
+0x01 0x2a 0x40 0x55 # CHECK: dlsa        $8, $9, $10, 2
+0x01 0x2a 0x40 0x95 # CHECK: dlsa        $8, $9, $10, 3
+0x01 0x2a 0x40 0xd5 # CHECK: dlsa        $8, $9, $10, 4
diff --git a/test/MC/Disassembler/Mips/msa/test_elm.txt b/test/MC/Disassembler/Mips/msa/test_elm.txt
new file mode 100644
index 0000000..832587b
--- /dev/null
+++ b/test/MC/Disassembler/Mips/msa/test_elm.txt
@@ -0,0 +1,17 @@
+# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips32r2 -mattr=+msa | FileCheck %s
+
+0x78 0x82 0x43 0x59 # CHECK:        copy_s.b        $13, $w8[2]
+0x78 0xa0 0xc8 0x59 # CHECK:        copy_s.h        $1, $w25[0]
+0x78 0xb1 0x2d 0x99 # CHECK:        copy_s.w        $22, $w5[1]
+0x78 0xc4 0xa5 0x99 # CHECK:        copy_u.b        $22, $w20[4]
+0x78 0xe0 0x25 0x19 # CHECK:        copy_u.h        $20, $w4[0]
+0x78 0xf2 0x6f 0x99 # CHECK:        copy_u.w        $fp, $w13[2]
+0x78 0x04 0xe8 0x19 # CHECK:        sldi.b          $w0, $w29[4]
+0x78 0x20 0x8a 0x19 # CHECK:        sldi.h          $w8, $w17[0]
+0x78 0x32 0xdd 0x19 # CHECK:        sldi.w          $w20, $w27[2]
+0x78 0x38 0x61 0x19 # CHECK:        sldi.d          $w4, $w12[0]
+0x78 0x42 0x1e 0x59 # CHECK:        splati.b        $w25, $w3[2]
+0x78 0x61 0xe6 0x19 # CHECK:        splati.h        $w24, $w28[1]
+0x78 0x70 0x93 0x59 # CHECK:        splati.w        $w13, $w18[0]
+0x78 0x78 0x0f 0x19 # CHECK:        splati.d        $w28, $w1[0]
+0x78 0xbe 0xc5 0xd9 # CHECK:        move.v          $w23, $w24
diff --git a/test/MC/Disassembler/Mips/msa/test_elm_insert.txt b/test/MC/Disassembler/Mips/msa/test_elm_insert.txt
new file mode 100644
index 0000000..605d495
--- /dev/null
+++ b/test/MC/Disassembler/Mips/msa/test_elm_insert.txt
@@ -0,0 +1,5 @@
+# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips32r2 -mattr=+msa | FileCheck %s
+
+0x79 0x03 0xed 0xd9 # CHECK:        insert.b        $w23[3], $sp
+0x79 0x22 0x2d 0x19 # CHECK:        insert.h        $w20[2], $5
+0x79 0x32 0x7a 0x19 # CHECK:        insert.w        $w8[2], $15
diff --git a/test/MC/Disassembler/Mips/msa/test_elm_insert_msa64.txt b/test/MC/Disassembler/Mips/msa/test_elm_insert_msa64.txt
new file mode 100644
index 0000000..62920f3
--- /dev/null
+++ b/test/MC/Disassembler/Mips/msa/test_elm_insert_msa64.txt
@@ -0,0 +1,3 @@
+# RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux -mcpu=mips64r2 -mattr=+msa | FileCheck %s
+
+0x79 0x39 0xe8 0x59 # CHECK:        insert.d        $w1[1], $sp
diff --git a/test/MC/Disassembler/Mips/msa/test_elm_insve.txt b/test/MC/Disassembler/Mips/msa/test_elm_insve.txt
new file mode 100644
index 0000000..c5c3ba0
--- /dev/null
+++ b/test/MC/Disassembler/Mips/msa/test_elm_insve.txt
@@ -0,0 +1,6 @@
+# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips32r2 -mattr=+msa | FileCheck %s
+
+0x79 0x43 0x4e 0x59 # CHECK:        insve.b $w25[3], $w9[0]
+0x79 0x62 0x16 0x19 # CHECK:        insve.h $w24[2], $w2[0]
+0x79 0x72 0x68 0x19 # CHECK:        insve.w $w0[2], $w13[0]
+0x79 0x78 0x90 0xd9 # CHECK:        insve.d $w3[0], $w18[0]
diff --git a/test/MC/Disassembler/Mips/msa/test_elm_msa64.txt b/test/MC/Disassembler/Mips/msa/test_elm_msa64.txt
new file mode 100644
index 0000000..70c831a
--- /dev/null
+++ b/test/MC/Disassembler/Mips/msa/test_elm_msa64.txt
@@ -0,0 +1,6 @@
+# RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux -mcpu=mips64r2 -mattr=+msa | FileCheck %s
+
+# CHECK:        copy_s.d        $19, $w31[0]
+0x78 0xb8 0xfc 0xd9
+# CHECK:        copy_u.d        $18, $w29[1]
+0x78 0xf9 0xec 0x99
diff --git a/test/MC/Disassembler/Mips/msa/test_i10.txt b/test/MC/Disassembler/Mips/msa/test_i10.txt
new file mode 100644
index 0000000..ac95d88
--- /dev/null
+++ b/test/MC/Disassembler/Mips/msa/test_i10.txt
@@ -0,0 +1,6 @@
+# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips32 -mattr=+msa | FileCheck %s
+
+0x7b 0x06 0x32 0x07 # CHECK:        ldi.b   $w8, 198
+0x7b 0x29 0xcd 0x07 # CHECK:        ldi.h   $w20, 313
+0x7b 0x4f 0x66 0x07 # CHECK:        ldi.w   $w24, 492
+0x7b 0x7a 0x66 0xc7 # CHECK:        ldi.d   $w27, 844
diff --git a/test/MC/Disassembler/Mips/msa/test_i5.txt b/test/MC/Disassembler/Mips/msa/test_i5.txt
new file mode 100644
index 0000000..bf5bc51
--- /dev/null
+++ b/test/MC/Disassembler/Mips/msa/test_i5.txt
@@ -0,0 +1,46 @@
+# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips32 -mattr=+msa | FileCheck %s
+
+0x78 0x1e 0xf8 0xc6 # CHECK:        addvi.b         $w3, $w31, 30
+0x78 0x3a 0x6e 0x06 # CHECK:        addvi.h         $w24, $w13, 26
+0x78 0x5a 0xa6 0x86 # CHECK:        addvi.w         $w26, $w20, 26
+0x78 0x75 0x0c 0x06 # CHECK:        addvi.d         $w16, $w1, 21
+0x78 0x18 0xae 0x07 # CHECK:        ceqi.b          $w24, $w21, 24
+0x78 0x22 0x7f 0xc7 # CHECK:        ceqi.h          $w31, $w15, 2
+0x78 0x5f 0x0b 0x07 # CHECK:        ceqi.w          $w12, $w1, 31
+0x78 0x67 0xb6 0x07 # CHECK:        ceqi.d          $w24, $w22, 7
+0x7a 0x01 0x83 0x07 # CHECK:        clei_s.b        $w12, $w16, 1
+0x7a 0x37 0x50 0x87 # CHECK:        clei_s.h        $w2, $w10, 23
+0x7a 0x56 0x59 0x07 # CHECK:        clei_s.w        $w4, $w11, 22
+0x7a 0x76 0xe8 0x07 # CHECK:        clei_s.d        $w0, $w29, 22
+0x7a 0x83 0x8d 0x47 # CHECK:        clei_u.b        $w21, $w17, 3
+0x7a 0xb1 0x3f 0x47 # CHECK:        clei_u.h        $w29, $w7, 17
+0x7a 0xc2 0x08 0x47 # CHECK:        clei_u.w        $w1, $w1, 2
+0x7a 0xfd 0xde 0xc7 # CHECK:        clei_u.d        $w27, $w27, 29
+0x79 0x19 0x6c 0xc7 # CHECK:        clti_s.b        $w19, $w13, 25
+0x79 0x34 0x53 0xc7 # CHECK:        clti_s.h        $w15, $w10, 20
+0x79 0x4b 0x63 0x07 # CHECK:        clti_s.w        $w12, $w12, 11
+0x79 0x71 0xa7 0x47 # CHECK:        clti_s.d        $w29, $w20, 17
+0x79 0x9d 0x4b 0x87 # CHECK:        clti_u.b        $w14, $w9, 29
+0x79 0xb9 0xce 0x07 # CHECK:        clti_u.h        $w24, $w25, 25
+0x79 0xd6 0x08 0x47 # CHECK:        clti_u.w        $w1, $w1, 22
+0x79 0xe1 0xcd 0x47 # CHECK:        clti_u.d        $w21, $w25, 1
+0x79 0x01 0xad 0x86 # CHECK:        maxi_s.b        $w22, $w21, 1
+0x79 0x38 0x2f 0x46 # CHECK:        maxi_s.h        $w29, $w5, 24
+0x79 0x54 0x50 0x46 # CHECK:        maxi_s.w        $w1, $w10, 20
+0x79 0x70 0xeb 0x46 # CHECK:        maxi_s.d        $w13, $w29, 16
+0x79 0x8c 0x05 0x06 # CHECK:        maxi_u.b        $w20, $w0, 12
+0x79 0xa3 0x70 0x46 # CHECK:        maxi_u.h        $w1, $w14, 3
+0x79 0xcb 0xb6 0xc6 # CHECK:        maxi_u.w        $w27, $w22, 11
+0x79 0xe4 0x36 0x86 # CHECK:        maxi_u.d        $w26, $w6, 4
+0x7a 0x01 0x09 0x06 # CHECK:        mini_s.b        $w4, $w1, 1
+0x7a 0x37 0xde 0xc6 # CHECK:        mini_s.h        $w27, $w27, 23
+0x7a 0x49 0x5f 0x06 # CHECK:        mini_s.w        $w28, $w11, 9
+0x7a 0x6a 0x52 0xc6 # CHECK:        mini_s.d        $w11, $w10, 10
+0x7a 0x9b 0xbc 0x86 # CHECK:        mini_u.b        $w18, $w23, 27
+0x7a 0xb2 0xd1 0xc6 # CHECK:        mini_u.h        $w7, $w26, 18
+0x7a 0xda 0x62 0xc6 # CHECK:        mini_u.w        $w11, $w12, 26
+0x7a 0xe2 0x7a 0xc6 # CHECK:        mini_u.d        $w11, $w15, 2
+0x78 0x93 0xa6 0x06 # CHECK:        subvi.b         $w24, $w20, 19
+0x78 0xa4 0x9a 0xc6 # CHECK:        subvi.h         $w11, $w19, 4
+0x78 0xcb 0x53 0x06 # CHECK:        subvi.w         $w12, $w10, 11
+0x78 0xe7 0x84 0xc6 # CHECK:        subvi.d         $w19, $w16, 7
diff --git a/test/MC/Disassembler/Mips/msa/test_i8.txt b/test/MC/Disassembler/Mips/msa/test_i8.txt
new file mode 100644
index 0000000..e08c39b
--- /dev/null
+++ b/test/MC/Disassembler/Mips/msa/test_i8.txt
@@ -0,0 +1,12 @@
+# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips32 -mattr=+msa | FileCheck %s
+
+0x78 0x30 0xe8 0x80 # CHECK:        andi.b  $w2, $w29, 48
+0x78 0x7e 0xb1 0x81 # CHECK:        bmnzi.b $w6, $w22, 126
+0x79 0x58 0x0e 0xc1 # CHECK:        bmzi.b  $w27, $w1, 88
+0x7a 0xbd 0x1f 0x41 # CHECK:        bseli.b $w29, $w3, 189
+0x7a 0x38 0x88 0x40 # CHECK:        nori.b  $w1, $w17, 56
+0x79 0x87 0xa6 0x80 # CHECK:        ori.b   $w26, $w20, 135
+0x78 0x69 0xf4 0xc2 # CHECK:        shf.b   $w19, $w30, 105
+0x79 0x4c 0x44 0x42 # CHECK:        shf.h   $w17, $w8, 76
+0x7a 0x5d 0x1b 0x82 # CHECK:        shf.w   $w14, $w3, 93
+0x7b 0x14 0x54 0x00 # CHECK:        xori.b  $w16, $w10, 20
diff --git a/test/MC/Disassembler/Mips/msa/test_lsa.txt b/test/MC/Disassembler/Mips/msa/test_lsa.txt
new file mode 100644
index 0000000..c3e950b
--- /dev/null
+++ b/test/MC/Disassembler/Mips/msa/test_lsa.txt
@@ -0,0 +1,6 @@
+# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips32r2 -mattr=+msa | FileCheck %s
+
+0x01 0x2a 0x40 0x05 # CHECK: lsa        $8, $9, $10, 1
+0x01 0x2a 0x40 0x45 # CHECK: lsa        $8, $9, $10, 2
+0x01 0x2a 0x40 0x85 # CHECK: lsa        $8, $9, $10, 3
+0x01 0x2a 0x40 0xc5 # CHECK: lsa        $8, $9, $10, 4
diff --git a/test/MC/Disassembler/Mips/msa/test_mi10.txt b/test/MC/Disassembler/Mips/msa/test_mi10.txt
new file mode 100644
index 0000000..b75b49e
--- /dev/null
+++ b/test/MC/Disassembler/Mips/msa/test_mi10.txt
@@ -0,0 +1,28 @@
+# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips32 -mattr=+msa | FileCheck %s
+
+0x7a 0x00 0x08 0x20 # CHECK:        ld.b $w0, -512($1)
+0x78 0x00 0x10 0x60 # CHECK:        ld.b $w1, 0($2)
+0x79 0xff 0x18 0xa0 # CHECK:        ld.b $w2, 511($3)
+
+0x7a 0x00 0x20 0xe1 # CHECK:        ld.h $w3, -1024($4)
+0x7b 0x00 0x29 0x21 # CHECK:        ld.h $w4, -512($5)
+0x78 0x00 0x31 0x61 # CHECK:        ld.h $w5, 0($6)
+0x79 0x00 0x39 0xa1 # CHECK:        ld.h $w6, 512($7)
+0x79 0xff 0x41 0xe1 # CHECK:        ld.h $w7, 1022($8)
+
+0x7a 0x00 0x4a 0x22 # CHECK:        ld.w $w8, -2048($9)
+0x7b 0x00 0x52 0x62 # CHECK:        ld.w $w9, -1024($10)
+0x7b 0x80 0x5a 0xa2 # CHECK:        ld.w $w10, -512($11)
+0x78 0x80 0x62 0xe2 # CHECK:        ld.w $w11, 512($12)
+0x79 0x00 0x6b 0x22 # CHECK:        ld.w $w12, 1024($13)
+0x79 0xff 0x73 0x62 # CHECK:        ld.w $w13, 2044($14)
+
+0x7a 0x00 0x7b 0xa3 # CHECK:        ld.d $w14, -4096($15)
+0x7b 0x00 0x83 0xe3 # CHECK:        ld.d $w15, -2048($16)
+0x7b 0x80 0x8c 0x23 # CHECK:        ld.d $w16, -1024($17)
+0x7b 0xc0 0x94 0x63 # CHECK:        ld.d $w17, -512($18)
+0x78 0x00 0x9c 0xa3 # CHECK:        ld.d $w18, 0($19)
+0x78 0x40 0xa4 0xe3 # CHECK:        ld.d $w19, 512($20)
+0x78 0x80 0xad 0x23 # CHECK:        ld.d $w20, 1024($21)
+0x79 0x00 0xb5 0x63 # CHECK:        ld.d $w21, 2048($22)
+0x79 0xff 0xbd 0xa3 # CHECK:        ld.d $w22, 4088($23)
diff --git a/test/MC/Disassembler/Mips/msa/test_vec.txt b/test/MC/Disassembler/Mips/msa/test_vec.txt
new file mode 100644
index 0000000..eff984f
--- /dev/null
+++ b/test/MC/Disassembler/Mips/msa/test_vec.txt
@@ -0,0 +1,9 @@
+# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips32 -mattr=+msa | FileCheck %s
+
+0x78 0x1b 0xa6 0x5e # CHECK:        and.v   $w25, $w20, $w27
+0x78 0x87 0x34 0x5e # CHECK:        bmnz.v  $w17, $w6, $w7
+0x78 0xa9 0x88 0xde # CHECK:        bmz.v   $w3, $w17, $w9
+0x78 0xce 0x02 0x1e # CHECK:        bsel.v  $w8, $w0, $w14
+0x78 0x40 0xf9 0xde # CHECK:        nor.v   $w7, $w31, $w0
+0x78 0x3e 0xd6 0x1e # CHECK:        or.v    $w24, $w26, $w30
+0x78 0x6f 0xd9 0xde # CHECK:        xor.v   $w7, $w27, $w15
diff --git a/test/MC/Disassembler/Sparc/sparc-fp.txt b/test/MC/Disassembler/Sparc/sparc-fp.txt
index b279da8..b8a5017 100644
--- a/test/MC/Disassembler/Sparc/sparc-fp.txt
+++ b/test/MC/Disassembler/Sparc/sparc-fp.txt
@@ -120,13 +120,13 @@
 # CHECK: fdivq %f0, %f4, %f8
 0x91 0xa0 0x09 0xe4
 
-# CHECK: fcmps %fcc0, %f0, %f4
+# CHECK: fcmps %f0, %f4
 0x81 0xa8 0x0a 0x24
 
-# CHECK: fcmpd %fcc0, %f0, %f4
+# CHECK: fcmpd %f0, %f4
 0x81 0xa8 0x0a 0x44
 
-# CHECK: fcmpq %fcc0, %f0, %f4
+# CHECK: fcmpq %f0, %f4
 0x81 0xa8 0x0a 0x64
 
 # CHECK: fxtos %f0, %f4
diff --git a/test/MC/Disassembler/X86/prefixes.txt b/test/MC/Disassembler/X86/prefixes.txt
index 56596e3..b8830dc 100644
--- a/test/MC/Disassembler/X86/prefixes.txt
+++ b/test/MC/Disassembler/X86/prefixes.txt
@@ -44,6 +44,10 @@
 # CHECK-NEXT:	nop
 0xf0 0x90
 
+# Test that immediate is printed correctly within opsize prefix
+# CHECK: addw    $-12, %ax
+0x66,0x83,0xc0,0xf4
+
 # Test that multiple redundant prefixes work (redundant, but valid x86).
 # CHECK: rep
 # CHECK-NEXT: rep
diff --git a/test/MC/Disassembler/X86/x86-32.txt b/test/MC/Disassembler/X86/x86-32.txt
index a4a0b2c..c9c5086 100644
--- a/test/MC/Disassembler/X86/x86-32.txt
+++ b/test/MC/Disassembler/X86/x86-32.txt
@@ -708,3 +708,6 @@
 
 # CHECK: movl $4294967295, %eax
 0xc7 0xc0 0xff 0xff 0xff 0xff
+
+# CHECK: movq %mm0, %mm1
+0x0f 0x7f 0xc1
diff --git a/test/MC/ELF/comdat.s b/test/MC/ELF/comdat.s
index 05d08e14..68b0f32 100644
--- a/test/MC/ELF/comdat.s
+++ b/test/MC/ELF/comdat.s
@@ -49,7 +49,7 @@
 // Test that g1 and g2 are local, but g3 is an undefined global.
 
 // CHECK:        Symbol {
-// CHECK:          Name: g1 (1)
+// CHECK:          Name: g1
 // CHECK-NEXT:     Value: 0x0
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Local
@@ -58,7 +58,7 @@
 // CHECK-NEXT:     Section: .foo (0x7)
 // CHECK-NEXT:   }
 // CHECK-NEXT:   Symbol {
-// CHECK-NEXT:     Name: g2 (4)
+// CHECK-NEXT:     Name: g2
 // CHECK-NEXT:     Value: 0x0
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Local
@@ -68,7 +68,7 @@
 // CHECK-NEXT:   }
 
 // CHECK:        Symbol {
-// CHECK:          Name: g3 (7)
+// CHECK:          Name: g3
 // CHECK-NEXT:     Value: 0x0
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Global
diff --git a/test/MC/ELF/common.s b/test/MC/ELF/common.s
index 9cff927..bd96564 100644
--- a/test/MC/ELF/common.s
+++ b/test/MC/ELF/common.s
@@ -9,7 +9,7 @@
 	.comm	common1,1,1
 
 // CHECK:        Symbol {
-// CHECK:          Name: common1 (1)
+// CHECK:          Name: common1
 // CHECK-NEXT:     Value: 0x0
 // CHECK-NEXT:     Size: 1
 // CHECK-NEXT:     Binding: Local
@@ -25,7 +25,7 @@
 	.comm	common2,1,1
 
 // CHECK:        Symbol {
-// CHECK:          Name: common2 (9)
+// CHECK:          Name: common2
 // CHECK-NEXT:     Value: 0x1
 // CHECK-NEXT:     Size: 1
 // CHECK-NEXT:     Binding: Local
@@ -39,7 +39,7 @@
         .comm	common6,8,16
 
 // CHECK:        Symbol {
-// CHECK:          Name: common6 (17)
+// CHECK:          Name: common6
 // CHECK-NEXT:     Value: 0x10
 // CHECK-NEXT:     Size: 8
 // CHECK-NEXT:     Binding: Local
@@ -54,7 +54,7 @@
 	.comm	common3,4,4
 
 // CHECK:        Symbol {
-// CHECK:          Name: common3 (25)
+// CHECK:          Name: common3
 // CHECK-NEXT:     Value: 0x4
 // CHECK-NEXT:     Size: 4
 // CHECK-NEXT:     Binding: Global
@@ -76,7 +76,7 @@ foo:
 	.comm	common4,40,16
 
 // CHECK:        Symbol {
-// CHECK:          Name: common4 (37)
+// CHECK:          Name: common4
 // CHECK-NEXT:     Value: 0x10
 // CHECK-NEXT:     Size: 40
 // CHECK-NEXT:     Binding: Global
@@ -89,7 +89,7 @@ foo:
         .comm	common5,4,4
 
 // CHECK:        Symbol {
-// CHECK:          Name: common5 (45)
+// CHECK:          Name: common5
 // CHECK-NEXT:     Value: 0x4
 // CHECK-NEXT:     Size: 4
 // CHECK-NEXT:     Binding: Global
diff --git a/test/MC/ELF/comp-dir.s b/test/MC/ELF/comp-dir.s
index 1b91f64..c8d996f 100644
--- a/test/MC/ELF/comp-dir.s
+++ b/test/MC/ELF/comp-dir.s
@@ -1,5 +1,4 @@
 // REQUIRES: shell
-// XFAIL: mingw
 // RUN: llvm-mc -triple=x86_64-linux-unknown -g -fdebug-compilation-dir=/test/comp/dir %s -filetype=obj -o %t.o
 // RUN: llvm-dwarfdump -debug-dump=info %t.o | FileCheck %s
 
diff --git a/test/MC/ELF/compression.s b/test/MC/ELF/compression.s
index 305a84e..07b689e 100644
--- a/test/MC/ELF/compression.s
+++ b/test/MC/ELF/compression.s
@@ -1,28 +1,80 @@
-// RUN: llvm-mc -filetype=obj -compress-debug-sections -triple x86_64-pc-linux-gnu %s -o - | llvm-objdump -s - | FileCheck %s
+// RUN: llvm-mc -filetype=obj -compress-debug-sections -triple x86_64-pc-linux-gnu < %s -o %t
+// RUN: llvm-objdump -s %t | FileCheck %s
+// RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck --check-prefix=INFO %s
+// RUN: llvm-mc -filetype=obj -compress-debug-sections -triple i386-pc-linux-gnu < %s \
+// RUN:     | llvm-readobj -symbols - | FileCheck --check-prefix=386-SYMBOLS %s
 
 // REQUIRES: zlib
 
-// CHECK: Contents of section .debug_line:
-// FIXME: Figure out how to handle debug_line that currently uses multiple section fragments
+// CHECK: Contents of section .zdebug_line:
+// Check for the 'ZLIB' file magic at the start of the section only
+// CHECK-NEXT: ZLIB
 // CHECK-NOT: ZLIB
+// CHECK: Contents of
 
-// CHECK: Contents of section .zdebug_abbrev:
-// Check for the 'ZLIB' file magic at the start of the section
-// CHECK-NEXT: ZLIB
+// Don't compress small sections, such as this simple debug_abbrev example
+// CHECK: Contents of section .debug_abbrev:
+// CHECK-NOT: ZLIB
+// CHECK-NOT: Contents of
+
+// CHECK: Contents of section .debug_info:
 
-// We shouldn't compress the debug_frame section, since it can be relaxed
-// CHECK: Contents of section .debug_frame
+// FIXME: Handle compressing alignment fragments to support compressing debug_frame
+// CHECK: Contents of section .debug_frame:
 // CHECK-NOT: ZLIB
+// CHECK: Contents of
+
+// Decompress one valid dwarf section just to check that this roundtrips
+// INFO: 0x00000000: Compile Unit: length = 0x0000000c version = 0x0004 abbr_offset = 0x0000 addr_size = 0x08 (next unit at 0x00000010)
+
+// In x86 32 bit named symbols are used for temporary symbols in merge
+// sections, so make sure we handle symbols inside compressed sections
+// 386-SYMBOLS: Name: .Linfo_string0
+// 386-SYMBOLS-NOT: }
+// 386-SYMBOLS: Section: .zdebug_str
 
 	.section	.debug_line,"",@progbits
 
 	.section	.debug_abbrev,"",@progbits
+.Lsection_abbrev:
 	.byte	1                       # Abbreviation Code
+	.byte	17                      # DW_TAG_compile_unit
+	.byte	0                       # DW_CHILDREN_no
+	.byte	27                      # DW_AT_comp_dir
+	.byte	14                      # DW_FORM_strp
+	.byte	0                       # EOM(1)
+	.byte	0                       # EOM(2)
+
+	.section	.debug_info,"",@progbits
+	.long	12                      # Length of Unit
+	.short	4                       # DWARF version number
+	.long	.Lsection_abbrev        # Offset Into Abbrev. Section
+	.byte	8                       # Address Size (in bytes)
+	.byte	1                       # Abbrev [1] DW_TAG_compile_unit
+	.long	.Linfo_string0          # DW_AT_comp_dir
+
 	.text
 foo:
 	.cfi_startproc
 	.file 1 "Driver.ii"
+# pad out the line table to make sure it's big enough to warrant compression
 	.loc 1 2 0
         nop
+	.loc 1 3 0
+        nop
+	.loc 1 4 0
+        nop
+	.loc 1 5 0
+        nop
+	.loc 1 6 0
+        nop
+	.loc 1 7 0
+        nop
+	.loc 1 8 0
+        nop
 	.cfi_endproc
 	.cfi_sections .debug_frame
+
+	.section        .debug_str,"MS",@progbits,1
+.Linfo_string0:
+        .asciz  "compress this                                    "
diff --git a/test/MC/ELF/file-double.s b/test/MC/ELF/file-double.s
index f9b91ed..b5da8c5 100644
--- a/test/MC/ELF/file-double.s
+++ b/test/MC/ELF/file-double.s
@@ -11,7 +11,7 @@ foo.c:
 bar.c:
 
 // CHECK:        Symbol {
-// CHECK:          Name: foo.c (1)
+// CHECK:          Name: foo.c
 // CHECK-NEXT:     Value: 0x0
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Local
@@ -19,7 +19,7 @@ bar.c:
 // CHECK-NEXT:     Other: 0
 // CHECK-NEXT:     Section: Absolute (0xFFF1)
 // CHECK-NEXT:   }
-// CHECK:          Name: bar.c (7)
+// CHECK:          Name: bar.c
 // CHECK-NEXT:     Value: 0x0
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Local
@@ -28,7 +28,7 @@ bar.c:
 // CHECK-NEXT:     Section: Absolute (0xFFF1)
 // CHECK-NEXT:   }
 // CHECK:        Symbol {
-// CHECK:        Name: bar.c (7)
+// CHECK:        Name: bar.c
 // CHECK-NEXT:     Value: 0x0
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Global
@@ -37,7 +37,7 @@ bar.c:
 // CHECK-NEXT:     Section: .text (0x1)
 // CHECK-NEXT:   }
 // CHECK:        Symbol {
-// CHECK:        Name: foo.c (1)
+// CHECK:        Name: foo.c
 // CHECK-NEXT:     Value: 0x0
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Global
diff --git a/test/MC/ELF/gen-dwarf.s b/test/MC/ELF/gen-dwarf.s
index 946119b..7f0c059 100644
--- a/test/MC/ELF/gen-dwarf.s
+++ b/test/MC/ELF/gen-dwarf.s
@@ -1,5 +1,9 @@
-// RUN: llvm-mc -g -triple  i686-pc-linux-gnu %s -filetype=obj -o - | llvm-readobj -r | FileCheck %s
-// RUN: llvm-mc -g -triple  i686-pc-linux-gnu %s -filetype=asm -o - | FileCheck --check-prefix=ASM %s
+// RUN: llvm-mc -g -dwarf-version 2 -triple  i686-pc-linux-gnu %s -filetype=obj -o - | llvm-readobj -r | FileCheck %s
+// RUN: not llvm-mc -g -dwarf-version 1  -triple  i686-pc-linux-gnu %s -filetype=asm -o - 2>&1 | FileCheck --check-prefix=DWARF1 %s
+// RUN: llvm-mc -g -dwarf-version 2 -triple  i686-pc-linux-gnu %s -filetype=asm -o - | FileCheck --check-prefix=ASM --check-prefix=DWARF2 %s
+// RUN: llvm-mc -g -dwarf-version 3 -triple  i686-pc-linux-gnu %s -filetype=asm -o - | FileCheck --check-prefix=ASM --check-prefix=DWARF3 %s
+// RUN: llvm-mc -g -triple  i686-pc-linux-gnu %s -filetype=asm -o - | FileCheck --check-prefix=ASM --check-prefix=DWARF4 %s
+// RUN: not llvm-mc -g -dwarf-version 5  -triple  i686-pc-linux-gnu %s -filetype=asm -o - 2>&1 | FileCheck --check-prefix=DWARF5 %s
 
 
 // Test that on ELF:
@@ -35,7 +39,9 @@ foo:
 // Second instance of the section has the CU
 // ASM: .section .debug_info
 // Dwarf version
-// ASM: .short 2
+// DWARF2: .short 2
+// DWARF3: .short 3
+// DWARF4: .short 4
 // ASM-NEXT: .long [[ABBREV_LABEL]]
 // First .byte 1 is the abbreviation number for the compile_unit abbrev
 // ASM: .byte 1
@@ -44,3 +50,5 @@ foo:
 // ASM: .section .debug_line
 // ASM-NEXT: [[LINE_LABEL]]
 
+// DWARF1: Dwarf version 1 is not supported.
+// DWARF5: Dwarf version 5 is not supported.
diff --git a/test/MC/ELF/lcomm.s b/test/MC/ELF/lcomm.s
index 430b79b..7d8ac3f 100644
--- a/test/MC/ELF/lcomm.s
+++ b/test/MC/ELF/lcomm.s
@@ -4,7 +4,7 @@
 .lcomm B, 32 << 20
 
 // CHECK:        Symbol {
-// CHECK:          Name: A (1)
+// CHECK:          Name: A
 // CHECK-NEXT:     Value: 0x0
 // CHECK-NEXT:     Size: 5
 // CHECK-NEXT:     Binding: Local
@@ -13,7 +13,7 @@
 // CHECK-NEXT:     Section: .bss (0x3)
 // CHECK-NEXT:   }
 // CHECK:        Symbol {
-// CHECK:          Name: B (3)
+// CHECK:          Name: B
 // CHECK-NEXT:     Value: 0x5
 // CHECK-NEXT:     Size: 33554432
 // CHECK-NEXT:     Binding: Local
diff --git a/test/MC/ELF/many-sections-2.s b/test/MC/ELF/many-sections-2.s
index d1f9d00..88a4822 100644
--- a/test/MC/ELF/many-sections-2.s
+++ b/test/MC/ELF/many-sections-2.s
@@ -12,7 +12,7 @@
 
 
 // Test that both a and b show up in the correct section.
-// SYMBOLS:         Name: a (1)
+// SYMBOLS:         Name: a
 // SYMBOLS-NEXT:    Value: 0x0
 // SYMBOLS-NEXT:    Size: 0
 // SYMBOLS-NEXT:    Binding: Local (0x0)
@@ -21,7 +21,7 @@
 // SYMBOLS-NEXT:    Section: last (0xFF00)
 // SYMBOLS-NEXT:  }
 // SYMBOLS-NEXT:  Symbol {
-// SYMBOLS-NEXT:    Name: b (3)
+// SYMBOLS-NEXT:    Name: b
 // SYMBOLS-NEXT:    Value: 0x1
 // SYMBOLS-NEXT:    Size: 0
 // SYMBOLS-NEXT:    Binding: Local (0x0)
@@ -32,7 +32,7 @@
 
 
 // Test that this file has one section too many.
-// SYMBOLS:         Name: last (0)
+// SYMBOLS:         Name: last
 // SYMBOLS-NEXT:    Value: 0x0
 // SYMBOLS-NEXT:    Size: 0
 // SYMBOLS-NEXT:    Binding: Local (0x0)
diff --git a/test/MC/ELF/noexec.s b/test/MC/ELF/noexec.s
index 33cb8ae..28f50cb 100644
--- a/test/MC/ELF/noexec.s
+++ b/test/MC/ELF/noexec.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -mc-no-exec-stack -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -t | FileCheck  %s
+// RUN: llvm-mc -no-exec-stack -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -t | FileCheck  %s
 
 // CHECK:        Section {
 // CHECK:          Index: 4
diff --git a/test/MC/ELF/offset.s b/test/MC/ELF/offset.s
index a412619..f448332 100644
--- a/test/MC/ELF/offset.s
+++ b/test/MC/ELF/offset.s
@@ -71,3 +71,62 @@ sym_f = sym_a + (1 - 1)
 // CHECK-NEXT:    Other: 0
 // CHECK-NEXT:    Section: .data
 // CHECK-NEXT:  }
+
+
+        .globl test2_a
+        .globl test2_b
+        .globl test2_c
+        .globl test2_d
+        .globl test2_e
+test2_a:
+    .long 0
+test2_b = test2_a
+test2_c:
+    .long 0
+test2_d = test2_c
+test2_e = test2_d - test2_b
+// CHECK:      Symbol {
+// CHECK:        Name: test2_a
+// CHECK-NEXT:   Value: 0x5
+// CHECK-NEXT:   Size: 0
+// CHECK-NEXT:   Binding: Global
+// CHECK-NEXT:   Type: None
+// CHECK-NEXT:   Other: 0
+// CHECK-NEXT:   Section: .data
+// CHECK-NEXT: }
+// CHECK-NEXT: Symbol {
+// CHECK-NEXT:   Name: test2_b
+// CHECK-NEXT:   Value: 0x5
+// CHECK-NEXT:   Size: 0
+// CHECK-NEXT:   Binding: Global
+// CHECK-NEXT:   Type: None
+// CHECK-NEXT:   Other: 0
+// CHECK-NEXT:   Section: .data
+// CHECK-NEXT: }
+// CHECK-NEXT: Symbol {
+// CHECK-NEXT:   Name: test2_c
+// CHECK-NEXT:   Value: 0x9
+// CHECK-NEXT:   Size: 0
+// CHECK-NEXT:   Binding: Global
+// CHECK-NEXT:   Type: None
+// CHECK-NEXT:   Other: 0
+// CHECK-NEXT:   Section: .data
+// CHECK-NEXT: }
+// CHECK-NEXT: Symbol {
+// CHECK-NEXT:   Name: test2_d
+// CHECK-NEXT:   Value: 0x9
+// CHECK-NEXT:   Size: 0
+// CHECK-NEXT:   Binding: Global
+// CHECK-NEXT:   Type: None
+// CHECK-NEXT:   Other: 0
+// CHECK-NEXT:   Section: .data
+// CHECK-NEXT: }
+// CHECK-NEXT: Symbol {
+// CHECK-NEXT:   Name: test2_e
+// CHECK-NEXT:   Value: 0x4
+// CHECK-NEXT:   Size: 0
+// CHECK-NEXT:   Binding: Global
+// CHECK-NEXT:   Type: None
+// CHECK-NEXT:   Other: 0
+// CHECK-NEXT:   Section: Absolute
+// CHECK-NEXT: }
diff --git a/test/MC/ELF/pic-diff.s b/test/MC/ELF/pic-diff.s
index 30c9278..5f0b145 100644
--- a/test/MC/ELF/pic-diff.s
+++ b/test/MC/ELF/pic-diff.s
@@ -7,7 +7,7 @@
 // CHECK-NEXT: ]
 
 // CHECK:        Symbol {
-// CHECK:          Name: baz (5)
+// CHECK:          Name: baz
 // CHECK-NEXT:     Value: 0x0
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Global
diff --git a/test/MC/ELF/pr9292.s b/test/MC/ELF/pr9292.s
index a433650..1e01194 100644
--- a/test/MC/ELF/pr9292.s
+++ b/test/MC/ELF/pr9292.s
@@ -8,7 +8,7 @@ mov %eax,bar
 
 
 // CHECK:        Symbol {
-// CHECK:          Name: bar (5)
+// CHECK:          Name: bar
 // CHECK-NEXT:     Value: 0x0
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Global
@@ -17,7 +17,7 @@ mov %eax,bar
 // CHECK-NEXT:     Section: Undefined (0x0)
 // CHECK-NEXT:   }
 // CHECK-NEXT:   Symbol {
-// CHECK-NEXT:     Name: foo (1)
+// CHECK-NEXT:     Name: foo
 // CHECK-NEXT:     Value: 0x0
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Global
diff --git a/test/MC/ELF/relocation-386.s b/test/MC/ELF/relocation-386.s
index 4ddfd00..ba12df0 100644
--- a/test/MC/ELF/relocation-386.s
+++ b/test/MC/ELF/relocation-386.s
@@ -62,6 +62,7 @@
 // CHECK-NEXT:     0x9E         R_386_PC16       und_symbol 0x0
 // Relocation 28 (und_symbol-bar2) is of type R_386_PC8
 // CHECK-NEXT:     0xA0         R_386_PC8        und_symbol 0x0
+// CHECK-NEXT:     0xA3         R_386_GOTOFF     und_symbol 0x0
 // CHECK-NEXT:   }
 // CHECK-NEXT: ]
 
@@ -127,6 +128,8 @@ bar2:
         .word und_symbol-bar2
         .byte und_symbol-bar2
 
+        leal 1 + und_symbol@GOTOFF, %edi
+
         .section        zedsec,"awT",@progbits
 zed:
         .long 0
diff --git a/test/MC/ELF/relocation.s b/test/MC/ELF/relocation.s
index d2ee6af..c0e6007 100644
--- a/test/MC/ELF/relocation.s
+++ b/test/MC/ELF/relocation.s
@@ -25,9 +25,15 @@ bar:
 	.word   foo-bar
 	.byte   foo-bar
 
+        # this should probably be an error...
 	zed = foo +2
 	call zed@PLT
 
+        leaq    -1+foo(%rip), %r11
+
+        movl  $_GLOBAL_OFFSET_TABLE_, %eax
+        movabs  $_GLOBAL_OFFSET_TABLE_, %rax
+
 // CHECK:        Section {
 // CHECK:          Name: .rela.text
 // CHECK:          Relocations [
@@ -52,7 +58,10 @@ bar:
 // CHECK-NEXT:       0x85 R_X86_64_TPOFF64 baz 0x0
 // CHECK-NEXT:       0x8D R_X86_64_PC16 foo 0x8D
 // CHECK-NEXT:       0x8F R_X86_64_PC8 foo 0x8F
-// CHECK-NEXT:       0x91 R_X86_64_PLT32 foo 0xFFFFFFFFFFFFFFFE
+// CHECK-NEXT:       0x91 R_X86_64_PLT32 zed 0xFFFFFFFFFFFFFFFC
+// CHECK-NEXT:       0x98 R_X86_64_PC32 foo 0xFFFFFFFFFFFFFFFB
+// CHECK-NEXT:       0x9D R_X86_64_GOTPC32 _GLOBAL_OFFSET_TABLE_ 0x1
+// CHECK-NEXT:       0xA3 R_X86_64_GOTPC64 _GLOBAL_OFFSET_TABLE_ 0x2
 // CHECK-NEXT:     ]
 // CHECK-NEXT:   }
 
diff --git a/test/MC/ELF/set.s b/test/MC/ELF/set.s
index 80e7e53..b4f77f5 100644
--- a/test/MC/ELF/set.s
+++ b/test/MC/ELF/set.s
@@ -5,7 +5,7 @@
 .set kernbase,0xffffffff80000000
 
 // CHECK:        Symbol {
-// CHECK:          Name: kernbase (1)
+// CHECK:          Name: kernbase
 // CHECK-NEXT:     Value: 0xFFFFFFFF80000000
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Local
@@ -26,7 +26,7 @@
 
 // Test that there is an undefined reference to bar
 // CHECK:        Symbol {
-// CHECK:          Name: bar (10)
+// CHECK:          Name: bar
 // CHECK-NEXT:     Value: 0x0
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Global
diff --git a/test/MC/ELF/strtab-suffix-opt.s b/test/MC/ELF/strtab-suffix-opt.s
new file mode 100644
index 0000000..eb5da8a
--- /dev/null
+++ b/test/MC/ELF/strtab-suffix-opt.s
@@ -0,0 +1,21 @@
+// RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu %s -o - | llvm-readobj -symbols | FileCheck %s
+
+	.text
+	.globl	foobar
+	.align	16, 0x90
+	.type	foobar,@function
+foobar:
+	pushl	%ebp
+	movl	%esp, %ebp
+	subl	$8, %esp
+	calll	foo
+	calll	bar
+	addl	$8, %esp
+	popl	%ebp
+	retl
+.Ltmp3:
+	.size	foobar, .Ltmp3-foobar
+
+// CHECK:     Name: foobar (1)
+// CHECK:     Name: bar (4)
+// CHECK:     Name: foo (8)
diff --git a/test/MC/ELF/subtraction-error.s b/test/MC/ELF/subtraction-error.s
new file mode 100644
index 0000000..6b93d3a
--- /dev/null
+++ b/test/MC/ELF/subtraction-error.s
@@ -0,0 +1,8 @@
+// RUN: not llvm-mc -filetype=obj -triple x86_64-pc-linux < %s 2>&1 | FileCheck %s
+
+a:
+    .section foo
+b:
+c = b - a
+
+; CHECK: symbol 'a' could not be evaluated in a subtraction expression
diff --git a/test/MC/ELF/symref.s b/test/MC/ELF/symref.s
deleted file mode 100644
index 737683b..0000000
--- a/test/MC/ELF/symref.s
+++ /dev/null
@@ -1,142 +0,0 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -r -t | FileCheck %s
-
-defined1:
-defined2:
-defined3:
-        .symver defined1, bar1@zed
-        .symver undefined1, bar2@zed
-
-        .symver defined2, bar3@@zed
-
-        .symver defined3, bar5@@@zed
-        .symver undefined3, bar6@@@zed
-
-        .long defined1
-        .long undefined1
-        .long defined2
-        .long defined3
-        .long undefined3
-
-        .global global1
-        .symver global1, g1@@zed
-global1:
-
-// CHECK:      Relocations [
-// CHECK-NEXT:   Section (2) .rela.text {
-// CHECK-NEXT:     0x0 R_X86_64_32 .text 0x0
-// CHECK-NEXT:     0x4 R_X86_64_32 bar2@zed 0x0
-// CHECK-NEXT:     0x8 R_X86_64_32 .text 0x0
-// CHECK-NEXT:     0xC R_X86_64_32 .text 0x0
-// CHECK-NEXT:     0x10 R_X86_64_32 bar6@zed 0x0
-// CHECK-NEXT:   }
-// CHECK-NEXT: ]
-
-// CHECK:        Symbol {
-// CHECK:          Name: bar1@zed (19)
-// CHECK-NEXT:     Value: 0x0
-// CHECK-NEXT:     Size: 0
-// CHECK-NEXT:     Binding: Local
-// CHECK-NEXT:     Type: None
-// CHECK-NEXT:     Other: 0
-// CHECK-NEXT:     Section: .text (0x1)
-// CHECK-NEXT:   }
-// CHECK-NEXT:   Symbol {
-// CHECK-NEXT:     Name: bar3@@zed (37)
-// CHECK-NEXT:     Value: 0x0
-// CHECK-NEXT:     Size: 0
-// CHECK-NEXT:     Binding: Local
-// CHECK-NEXT:     Type: None
-// CHECK-NEXT:     Other: 0
-// CHECK-NEXT:     Section: .text (0x1)
-// CHECK-NEXT:   }
-// CHECK-NEXT:   Symbol {
-// CHECK-NEXT:     Name: bar5@@zed (47)
-// CHECK-NEXT:     Value: 0x0
-// CHECK-NEXT:     Size: 0
-// CHECK-NEXT:     Binding: Local
-// CHECK-NEXT:     Type: None
-// CHECK-NEXT:     Other: 0
-// CHECK-NEXT:     Section: .text (0x1)
-// CHECK-NEXT:   }
-// CHECK-NEXT:   Symbol {
-// CHECK-NEXT:     Name: defined1 (1)
-// CHECK-NEXT:     Value: 0x0
-// CHECK-NEXT:     Size: 0
-// CHECK-NEXT:     Binding: Local
-// CHECK-NEXT:     Type: None
-// CHECK-NEXT:     Other: 0
-// CHECK-NEXT:     Section: .text (0x1)
-// CHECK-NEXT:   }
-// CHECK-NEXT:   Symbol {
-// CHECK-NEXT:     Name: defined2 (10)
-// CHECK-NEXT:     Value: 0x0
-// CHECK-NEXT:     Size: 0
-// CHECK-NEXT:     Binding: Local
-// CHECK-NEXT:     Type: None
-// CHECK-NEXT:     Other: 0
-// CHECK-NEXT:     Section: .text (0x1)
-// CHECK-NEXT:   }
-// CHECK-NEXT:   Symbol {
-// CHECK-NEXT:     Name: .text (0)
-// CHECK-NEXT:     Value: 0x0
-// CHECK-NEXT:     Size: 0
-// CHECK-NEXT:     Binding: Local
-// CHECK-NEXT:     Type: Section
-// CHECK-NEXT:     Other: 0
-// CHECK-NEXT:     Section: .text (0x1)
-// CHECK-NEXT:   }
-// CHECK-NEXT:   Symbol {
-// CHECK-NEXT:     Name: .data (0)
-// CHECK-NEXT:     Value: 0x0
-// CHECK-NEXT:     Size: 0
-// CHECK-NEXT:     Binding: Local
-// CHECK-NEXT:     Type: Section
-// CHECK-NEXT:     Other: 0
-// CHECK-NEXT:     Section: .data (0x3)
-// CHECK-NEXT:   }
-// CHECK-NEXT:   Symbol {
-// CHECK-NEXT:     Name: .bss (0)
-// CHECK-NEXT:     Value: 0x0
-// CHECK-NEXT:     Size: 0
-// CHECK-NEXT:     Binding: Local
-// CHECK-NEXT:     Type: Section
-// CHECK-NEXT:     Other: 0
-// CHECK-NEXT:     Section: .bss (0x4)
-// CHECK-NEXT:   }
-// CHECK-NEXT:   Symbol {
-// CHECK-NEXT:     Name: g1@@zed (74)
-// CHECK-NEXT:     Value: 0x14
-// CHECK-NEXT:     Size: 0
-// CHECK-NEXT:     Binding: Global
-// CHECK-NEXT:     Type: None
-// CHECK-NEXT:     Other: 0
-// CHECK-NEXT:     Section: .text (0x1)
-// CHECK-NEXT:   }
-// CHECK-NEXT:   Symbol {
-// CHECK-NEXT:     Name: global1 (66)
-// CHECK-NEXT:     Value: 0x14
-// CHECK-NEXT:     Size: 0
-// CHECK-NEXT:     Binding: Global
-// CHECK-NEXT:     Type: None
-// CHECK-NEXT:     Other: 0
-// CHECK-NEXT:     Section: .text (0x1)
-// CHECK-NEXT:   }
-// CHECK-NEXT:   Symbol {
-// CHECK-NEXT:     Name: bar2@zed (28)
-// CHECK-NEXT:     Value: 0x0
-// CHECK-NEXT:     Size: 0
-// CHECK-NEXT:     Binding: Global
-// CHECK-NEXT:     Type: None
-// CHECK-NEXT:     Other: 0
-// CHECK-NEXT:     Section: Undefined (0x0)
-// CHECK-NEXT:   }
-// CHECK-NEXT:   Symbol {
-// CHECK-NEXT:     Name: bar6@zed (57)
-// CHECK-NEXT:     Value: 0x0
-// CHECK-NEXT:     Size: 0
-// CHECK-NEXT:     Binding: Global
-// CHECK-NEXT:     Type: None
-// CHECK-NEXT:     Other: 0
-// CHECK-NEXT:     Section: Undefined (0x0)
-// CHECK-NEXT:   }
-// CHECK-NEXT: ]
diff --git a/test/MC/ELF/symver.s b/test/MC/ELF/symver.s
new file mode 100644
index 0000000..6e5825f
--- /dev/null
+++ b/test/MC/ELF/symver.s
@@ -0,0 +1,142 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -r -t | FileCheck %s
+
+defined1:
+defined2:
+defined3:
+        .symver defined1, bar1@zed
+        .symver undefined1, bar2@zed
+
+        .symver defined2, bar3@@zed
+
+        .symver defined3, bar5@@@zed
+        .symver undefined3, bar6@@@zed
+
+        .long defined1
+        .long undefined1
+        .long defined2
+        .long defined3
+        .long undefined3
+
+        .global global1
+        .symver global1, g1@@zed
+global1:
+
+// CHECK:      Relocations [
+// CHECK-NEXT:   Section (2) .rela.text {
+// CHECK-NEXT:     0x0 R_X86_64_32 .text 0x0
+// CHECK-NEXT:     0x4 R_X86_64_32 bar2@zed 0x0
+// CHECK-NEXT:     0x8 R_X86_64_32 .text 0x0
+// CHECK-NEXT:     0xC R_X86_64_32 .text 0x0
+// CHECK-NEXT:     0x10 R_X86_64_32 bar6@zed 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
+
+// CHECK:        Symbol {
+// CHECK:          Name: bar1@zed
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: bar3@@zed
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: bar5@@zed
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: defined1
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: defined2
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: .text
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: Section
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: .data
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: Section
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .data
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: .bss
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: Section
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .bss
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: g1@@zed
+// CHECK-NEXT:     Value: 0x14
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: global1
+// CHECK-NEXT:     Value: 0x14
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: bar2@zed
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: Undefined
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: bar6@zed
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: Undefined
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
diff --git a/test/MC/ELF/tls-i386.s b/test/MC/ELF/tls-i386.s
index 88e96ff..5ee3668 100644
--- a/test/MC/ELF/tls-i386.s
+++ b/test/MC/ELF/tls-i386.s
@@ -18,7 +18,7 @@
         .long   fooE@INDNTPOFF
 
 // CHECK:        Symbol {
-// CHECK:          Name: foo1 (1)
+// CHECK:          Name: foo1
 // CHECK-NEXT:     Value: 0x0
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Global
@@ -27,7 +27,7 @@
 // CHECK-NEXT:     Section: Undefined (0x0)
 // CHECK-NEXT:   }
 // CHECK-NEXT:   Symbol {
-// CHECK-NEXT:     Name: foo2 (6)
+// CHECK-NEXT:     Name: foo2
 // CHECK-NEXT:     Value: 0x0
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Global
@@ -36,7 +36,7 @@
 // CHECK-NEXT:     Section: Undefined (0x0)
 // CHECK-NEXT:   }
 // CHECK-NEXT:   Symbol {
-// CHECK-NEXT:     Name: foo3 (11)
+// CHECK-NEXT:     Name: foo3
 // CHECK-NEXT:     Value: 0x0
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Global
@@ -45,7 +45,7 @@
 // CHECK-NEXT:     Section: Undefined (0x0)
 // CHECK-NEXT:   }
 // CHECK-NEXT:   Symbol {
-// CHECK-NEXT:     Name: foo4 (16)
+// CHECK-NEXT:     Name: foo4
 // CHECK-NEXT:     Value: 0x0
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Global
@@ -54,7 +54,7 @@
 // CHECK-NEXT:     Section: Undefined (0x0)
 // CHECK-NEXT:   }
 // CHECK-NEXT:   Symbol {
-// CHECK-NEXT:     Name: foo5 (21)
+// CHECK-NEXT:     Name: foo5
 // CHECK-NEXT:     Value: 0x0
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Global
@@ -63,7 +63,7 @@
 // CHECK-NEXT:     Section: Undefined (0x0)
 // CHECK-NEXT:   }
 // CHECK-NEXT:   Symbol {
-// CHECK-NEXT:     Name: foo6 (26)
+// CHECK-NEXT:     Name: foo6
 // CHECK-NEXT:     Value: 0x0
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Global
@@ -72,7 +72,7 @@
 // CHECK-NEXT:     Section: Undefined (0x0)
 // CHECK-NEXT:   }
 // CHECK-NEXT:   Symbol {
-// CHECK-NEXT:     Name: foo7 (31)
+// CHECK-NEXT:     Name: foo7
 // CHECK-NEXT:     Value: 0x0
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Global
@@ -81,7 +81,7 @@
 // CHECK-NEXT:     Section: Undefined (0x0)
 // CHECK-NEXT:   }
 // CHECK-NEXT:   Symbol {
-// CHECK-NEXT:     Name: foo8 (36)
+// CHECK-NEXT:     Name: foo8
 // CHECK-NEXT:     Value: 0x0
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Global
@@ -90,7 +90,7 @@
 // CHECK-NEXT:     Section: Undefined (0x0)
 // CHECK-NEXT:   }
 // CHECK-NEXT:   Symbol {
-// CHECK-NEXT:     Name: foo9 (41)
+// CHECK-NEXT:     Name: foo9
 // CHECK-NEXT:     Value: 0x0
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Global
@@ -99,7 +99,7 @@
 // CHECK-NEXT:     Section: Undefined (0x0)
 // CHECK-NEXT:   }
 // CHECK-NEXT:   Symbol {
-// CHECK-NEXT:     Name: fooA (46)
+// CHECK-NEXT:     Name: fooA
 // CHECK-NEXT:     Value: 0x0
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Global
@@ -108,7 +108,7 @@
 // CHECK-NEXT:     Section: Undefined (0x0)
 // CHECK-NEXT:   }
 // CHECK-NEXT:   Symbol {
-// CHECK-NEXT:     Name: fooB (51)
+// CHECK-NEXT:     Name: fooB
 // CHECK-NEXT:     Value: 0x0
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Global
@@ -117,7 +117,7 @@
 // CHECK-NEXT:     Section: Undefined (0x0)
 // CHECK-NEXT:   }
 // CHECK-NEXT:   Symbol {
-// CHECK-NEXT:     Name: fooC (56)
+// CHECK-NEXT:     Name: fooC
 // CHECK-NEXT:     Value: 0x0
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Global
@@ -126,7 +126,7 @@
 // CHECK-NEXT:     Section: Undefined (0x0)
 // CHECK-NEXT:   }
 // CHECK-NEXT:   Symbol {
-// CHECK-NEXT:     Name: fooD (61)
+// CHECK-NEXT:     Name: fooD
 // CHECK-NEXT:     Value: 0x0
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Global
@@ -135,7 +135,7 @@
 // CHECK-NEXT:     Section: Undefined (0x0)
 // CHECK-NEXT:   }
 // CHECK-NEXT:   Symbol {
-// CHECK-NEXT:     Name: fooE (66)
+// CHECK-NEXT:     Name: fooE
 // CHECK-NEXT:     Value: 0x0
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Global
diff --git a/test/MC/ELF/tls.s b/test/MC/ELF/tls.s
index 6d4b703..79865cd 100644
--- a/test/MC/ELF/tls.s
+++ b/test/MC/ELF/tls.s
@@ -13,7 +13,7 @@ foobar:
 	.long	43
 
 // CHECK:        Symbol {
-// CHECK:          Name: foobar (31)
+// CHECK:          Name: foobar
 // CHECK-NEXT:     Value: 0x0
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Local
@@ -23,7 +23,7 @@ foobar:
 // CHECK-NEXT:   }
 
 // CHECK:        Symbol {
-// CHECK:          Name: foo1 (1)
+// CHECK:          Name: foo1
 // CHECK-NEXT:     Value: 0x0
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Global
@@ -32,7 +32,7 @@ foobar:
 // CHECK-NEXT:     Section: Undefined (0x0)
 // CHECK-NEXT:   }
 // CHECK-NEXT:   Symbol {
-// CHECK-NEXT:     Name: foo2 (6)
+// CHECK-NEXT:     Name: foo2
 // CHECK-NEXT:     Value: 0x0
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Global
@@ -41,7 +41,7 @@ foobar:
 // CHECK-NEXT:     Section: Undefined (0x0)
 // CHECK-NEXT:   }
 // CHECK-NEXT:   Symbol {
-// CHECK-NEXT:     Name: foo3 (11)
+// CHECK-NEXT:     Name: foo3
 // CHECK-NEXT:     Value: 0x0
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Global
@@ -50,7 +50,7 @@ foobar:
 // CHECK-NEXT:     Section: Undefined (0x0)
 // CHECK-NEXT:   }
 // CHECK-NEXT:   Symbol {
-// CHECK-NEXT:     Name: foo4 (16)
+// CHECK-NEXT:     Name: foo4
 // CHECK-NEXT:     Value: 0x0
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Global
@@ -59,7 +59,7 @@ foobar:
 // CHECK-NEXT:     Section: Undefined (0x0)
 // CHECK-NEXT:   }
 // CHECK-NEXT:   Symbol {
-// CHECK-NEXT:     Name: foo5 (21)
+// CHECK-NEXT:     Name: foo5
 // CHECK-NEXT:     Value: 0x0
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Global
@@ -68,7 +68,7 @@ foobar:
 // CHECK-NEXT:     Section: Undefined (0x0)
 // CHECK-NEXT:   }
 // CHECK-NEXT:   Symbol {
-// CHECK-NEXT:     Name: foo6 (26)
+// CHECK-NEXT:     Name: foo6
 // CHECK-NEXT:     Value: 0x0
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Global
diff --git a/test/MC/ELF/type.s b/test/MC/ELF/type.s
index 638d828..c82d300 100644
--- a/test/MC/ELF/type.s
+++ b/test/MC/ELF/type.s
@@ -176,7 +176,7 @@ alias12:
 // CHECK-NEXT:     Section: .text (0x1)
 // CHECK-NEXT:   }
 // CHECK-NEXT:  Symbol {
-// CHECK-NEXT:    Name: sym1 (54)
+// CHECK-NEXT:    Name: sym1
 // CHECK-NEXT:    Value: 0x0
 // CHECK-NEXT:    Size: 0
 // CHECK-NEXT:    Binding: Global (0x1)
@@ -185,7 +185,7 @@ alias12:
 // CHECK-NEXT:    Section: .text (0x1)
 // CHECK-NEXT:  }
 // CHECK-NEXT:  Symbol {
-// CHECK-NEXT:    Name: sym10 (162)
+// CHECK-NEXT:    Name: sym10
 // CHECK-NEXT:    Value: 0x0
 // CHECK-NEXT:    Size: 0
 // CHECK-NEXT:    Binding: Global (0x1)
@@ -194,7 +194,7 @@ alias12:
 // CHECK-NEXT:    Section: .text (0x1)
 // CHECK-NEXT:  }
 // CHECK-NEXT:  Symbol {
-// CHECK-NEXT:    Name: sym11 (176)
+// CHECK-NEXT:    Name: sym11
 // CHECK-NEXT:    Value: 0x0
 // CHECK-NEXT:    Size: 0
 // CHECK-NEXT:    Binding: Global (0x1)
@@ -203,7 +203,7 @@ alias12:
 // CHECK-NEXT:    Section: .text (0x1)
 // CHECK-NEXT:  }
 // CHECK-NEXT:  Symbol {
-// CHECK-NEXT:    Name: sym12 (190)
+// CHECK-NEXT:    Name: sym12
 // CHECK-NEXT:    Value: 0x0
 // CHECK-NEXT:    Size: 0
 // CHECK-NEXT:    Binding: Global (0x1)
@@ -212,7 +212,7 @@ alias12:
 // CHECK-NEXT:    Section: .text (0x1)
 // CHECK-NEXT:  }
 // CHECK-NEXT:  Symbol {
-// CHECK-NEXT:    Name: sym2 (66)
+// CHECK-NEXT:    Name: sym2
 // CHECK-NEXT:    Value: 0x0
 // CHECK-NEXT:    Size: 0
 // CHECK-NEXT:    Binding: Global (0x1)
@@ -221,7 +221,7 @@ alias12:
 // CHECK-NEXT:    Section: .text (0x1)
 // CHECK-NEXT:  }
 // CHECK-NEXT:  Symbol {
-// CHECK-NEXT:    Name: sym3 (78)
+// CHECK-NEXT:    Name: sym3
 // CHECK-NEXT:    Value: 0x0
 // CHECK-NEXT:    Size: 0
 // CHECK-NEXT:    Binding: Global (0x1)
@@ -230,7 +230,7 @@ alias12:
 // CHECK-NEXT:    Section: .text (0x1)
 // CHECK-NEXT:  }
 // CHECK-NEXT:  Symbol {
-// CHECK-NEXT:    Name: sym4 (90)
+// CHECK-NEXT:    Name: sym4
 // CHECK-NEXT:    Value: 0x0
 // CHECK-NEXT:    Size: 0
 // CHECK-NEXT:    Binding: Global (0x1)
@@ -239,7 +239,7 @@ alias12:
 // CHECK-NEXT:    Section: .text (0x1)
 // CHECK-NEXT:  }
 // CHECK-NEXT:  Symbol {
-// CHECK-NEXT:    Name: sym5 (102)
+// CHECK-NEXT:    Name: sym5
 // CHECK-NEXT:    Value: 0x0
 // CHECK-NEXT:    Size: 0
 // CHECK-NEXT:    Binding: Global (0x1)
@@ -248,7 +248,7 @@ alias12:
 // CHECK-NEXT:    Section: .text (0x1)
 // CHECK-NEXT:  }
 // CHECK-NEXT:  Symbol {
-// CHECK-NEXT:    Name: sym6 (114)
+// CHECK-NEXT:    Name: sym6
 // CHECK-NEXT:    Value: 0x0
 // CHECK-NEXT:    Size: 0
 // CHECK-NEXT:    Binding: Global (0x1)
@@ -257,7 +257,7 @@ alias12:
 // CHECK-NEXT:    Section: .text (0x1)
 // CHECK-NEXT:  }
 // CHECK-NEXT:  Symbol {
-// CHECK-NEXT:    Name: sym7 (126)
+// CHECK-NEXT:    Name: sym7
 // CHECK-NEXT:    Value: 0x0
 // CHECK-NEXT:    Size: 0
 // CHECK-NEXT:    Binding: Global (0x1)
@@ -266,7 +266,7 @@ alias12:
 // CHECK-NEXT:    Section: .text (0x1)
 // CHECK-NEXT:  }
 // CHECK-NEXT:  Symbol {
-// CHECK-NEXT:    Name: sym8 (138)
+// CHECK-NEXT:    Name: sym8
 // CHECK-NEXT:    Value: 0x0
 // CHECK-NEXT:    Size: 0
 // CHECK-NEXT:    Binding: Global (0x1)
@@ -275,7 +275,7 @@ alias12:
 // CHECK-NEXT:    Section: .text (0x1)
 // CHECK-NEXT:  }
 // CHECK-NEXT:  Symbol {
-// CHECK-NEXT:    Name: sym9 (150)
+// CHECK-NEXT:    Name: sym9
 // CHECK-NEXT:    Value: 0x0
 // CHECK-NEXT:    Size: 0
 // CHECK-NEXT:    Binding: Global (0x1)
diff --git a/test/MC/ELF/undef.s b/test/MC/ELF/undef.s
index 7c2a876..245b563 100644
--- a/test/MC/ELF/undef.s
+++ b/test/MC/ELF/undef.s
@@ -19,21 +19,80 @@
         .text
         movsd   .Lsym8(%rip), %xmm1
 
-// CHECK:      Symbols [
-
-// CHECK:        Symbol {
-// CHECK:          Name: .Lsym8
-
-// CHECK:        Symbol {
-// CHECK:          Name: .Lsym1
+test2_a = undef
+test2_b = undef + 1
 
-// CHECK:        Symbol {
-// CHECK:          Name: sym6
+// CHECK:      Symbols [
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name:  (0)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: Undefined
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: .Lsym8
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .rodata.str1.1
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: .text
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: Section
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: .data
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: Section
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .data
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: .bss
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: Section
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .bss
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: .rodata.str1.1
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: Section
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .rodata.str1.1
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: .Lsym1
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: Undefined
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: sym6
 // CHECK-NEXT:     Value: 0x0
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Global
 // CHECK-NEXT:     Type: Object
 // CHECK-NEXT:     Other: 0
-// CHECK-NEXT:     Section: Undefined (0x0)
+// CHECK-NEXT:     Section: Undefined
 // CHECK-NEXT:   }
 // CHECK-NEXT: ]
diff --git a/test/MC/ELF/weakref.s b/test/MC/ELF/weakref.s
index cf2228d..2288264 100644
--- a/test/MC/ELF/weakref.s
+++ b/test/MC/ELF/weakref.s
@@ -80,7 +80,7 @@ bar15:
 // CHECK-NEXT:     Section: Undefined (0x0)
 // CHECK-NEXT:   }
 // CHECK-NEXT:   Symbol {
-// CHECK-NEXT:     Name: bar6 (21)
+// CHECK-NEXT:     Name: bar6
 // CHECK-NEXT:     Value: 0x18
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Local
@@ -89,7 +89,7 @@ bar15:
 // CHECK-NEXT:     Section: .text (0x1)
 // CHECK-NEXT:   }
 // CHECK-NEXT:   Symbol {
-// CHECK-NEXT:     Name: bar7 (26)
+// CHECK-NEXT:     Name: bar7
 // CHECK-NEXT:     Value: 0x18
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Local
@@ -98,7 +98,7 @@ bar15:
 // CHECK-NEXT:     Section: .text (0x1)
 // CHECK-NEXT:   }
 // CHECK-NEXT:   Symbol {
-// CHECK-NEXT:     Name: bar8 (31)
+// CHECK-NEXT:     Name: bar8
 // CHECK-NEXT:     Value: 0x1C
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Local
@@ -107,7 +107,7 @@ bar15:
 // CHECK-NEXT:     Section: .text (0x1)
 // CHECK-NEXT:   }
 // CHECK-NEXT:   Symbol {
-// CHECK-NEXT:     Name: bar9 (36)
+// CHECK-NEXT:     Name: bar9
 // CHECK-NEXT:     Value: 0x20
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Local
@@ -116,7 +116,7 @@ bar15:
 // CHECK-NEXT:     Section: .text (0x1)
 // CHECK-NEXT:   }
 // CHECK-NEXT:   Symbol {
-// CHECK-NEXT:     Name: .text (0)
+// CHECK-NEXT:     Name: .text
 // CHECK-NEXT:     Value: 0x0
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Local
@@ -125,7 +125,7 @@ bar15:
 // CHECK-NEXT:     Section: .text (0x1)
 // CHECK-NEXT:   }
 // CHECK-NEXT:   Symbol {
-// CHECK-NEXT:     Name: .data (0)
+// CHECK-NEXT:     Name: .data
 // CHECK-NEXT:     Value: 0x0
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Local
@@ -134,7 +134,7 @@ bar15:
 // CHECK-NEXT:     Section: .data (0x3)
 // CHECK-NEXT:   }
 // CHECK-NEXT:   Symbol {
-// CHECK-NEXT:     Name: .bss (0)
+// CHECK-NEXT:     Name: .bss
 // CHECK-NEXT:     Value: 0x0
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Local
@@ -143,7 +143,7 @@ bar15:
 // CHECK-NEXT:     Section: .bss (0x4)
 // CHECK-NEXT:   }
 // CHECK-NEXT:   Symbol {
-// CHECK-NEXT:     Name: bar10 (41)
+// CHECK-NEXT:     Name: bar10
 // CHECK-NEXT:     Value: 0x28
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Global
@@ -152,7 +152,7 @@ bar15:
 // CHECK-NEXT:     Section: .text (0x1)
 // CHECK-NEXT:   }
 // CHECK-NEXT:   Symbol {
-// CHECK-NEXT:     Name: bar11 (47)
+// CHECK-NEXT:     Name: bar11
 // CHECK-NEXT:     Value: 0x30
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Global
@@ -161,7 +161,7 @@ bar15:
 // CHECK-NEXT:     Section: .text (0x1)
 // CHECK-NEXT:   }
 // CHECK-NEXT:   Symbol {
-// CHECK-NEXT:     Name: bar12 (53)
+// CHECK-NEXT:     Name: bar12
 // CHECK-NEXT:     Value: 0x30
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Global
@@ -170,7 +170,7 @@ bar15:
 // CHECK-NEXT:     Section: .text (0x1)
 // CHECK-NEXT:   }
 // CHECK-NEXT:   Symbol {
-// CHECK-NEXT:     Name: bar13 (59)
+// CHECK-NEXT:     Name: bar13
 // CHECK-NEXT:     Value: 0x34
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Global
@@ -179,7 +179,7 @@ bar15:
 // CHECK-NEXT:     Section: .text (0x1)
 // CHECK-NEXT:   }
 // CHECK-NEXT:   Symbol {
-// CHECK-NEXT:     Name: bar14 (65)
+// CHECK-NEXT:     Name: bar14
 // CHECK-NEXT:     Value: 0x38
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Global
@@ -188,7 +188,7 @@ bar15:
 // CHECK-NEXT:     Section: .text (0x1)
 // CHECK-NEXT:   }
 // CHECK-NEXT:   Symbol {
-// CHECK-NEXT:     Name: bar15 (71)
+// CHECK-NEXT:     Name: bar15
 // CHECK-NEXT:     Value: 0x40
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Global
@@ -197,7 +197,7 @@ bar15:
 // CHECK-NEXT:     Section: .text (0x1)
 // CHECK-NEXT:   }
 // CHECK-NEXT:   Symbol {
-// CHECK-NEXT:     Name: bar2 (1)
+// CHECK-NEXT:     Name: bar2
 // CHECK-NEXT:     Value: 0x0
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Global
@@ -206,7 +206,7 @@ bar15:
 // CHECK-NEXT:     Section: Undefined (0x0)
 // CHECK-NEXT:   }
 // CHECK-NEXT:   Symbol {
-// CHECK-NEXT:     Name: bar3 (6)
+// CHECK-NEXT:     Name: bar3
 // CHECK-NEXT:     Value: 0x0
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Weak
@@ -215,7 +215,7 @@ bar15:
 // CHECK-NEXT:     Section: Undefined (0x0)
 // CHECK-NEXT:   }
 // CHECK-NEXT:   Symbol {
-// CHECK-NEXT:     Name: bar4 (11)
+// CHECK-NEXT:     Name: bar4
 // CHECK-NEXT:     Value: 0x0
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Global
@@ -224,7 +224,7 @@ bar15:
 // CHECK-NEXT:     Section: Undefined (0x0)
 // CHECK-NEXT:   }
 // CHECK-NEXT:   Symbol {
-// CHECK-NEXT:     Name: bar5 (16)
+// CHECK-NEXT:     Name: bar5
 // CHECK-NEXT:     Value: 0x0
 // CHECK-NEXT:     Size: 0
 // CHECK-NEXT:     Binding: Global
diff --git a/test/MC/MachO/ARM64/darwin-ARM64-local-label-diff.s b/test/MC/MachO/AArch64/darwin-ARM64-local-label-diff.s
index d98c257..d98c257 100644
--- a/test/MC/MachO/ARM64/darwin-ARM64-local-label-diff.s
+++ b/test/MC/MachO/AArch64/darwin-ARM64-local-label-diff.s
diff --git a/test/MC/MachO/ARM64/darwin-ARM64-reloc.s b/test/MC/MachO/AArch64/darwin-ARM64-reloc.s
index 7f586ae..7f586ae 100644
--- a/test/MC/MachO/ARM64/darwin-ARM64-reloc.s
+++ b/test/MC/MachO/AArch64/darwin-ARM64-reloc.s
diff --git a/test/MC/MachO/AArch64/lit.local.cfg b/test/MC/MachO/AArch64/lit.local.cfg
new file mode 100644
index 0000000..9a66a00
--- /dev/null
+++ b/test/MC/MachO/AArch64/lit.local.cfg
@@ -0,0 +1,4 @@
+targets = set(config.root.targets_to_build.split())
+if not 'AArch64' in targets:
+    config.unsupported = True
+
diff --git a/test/MC/MachO/ARM/bad-darwin-directives.s b/test/MC/MachO/ARM/bad-darwin-directives.s
index 0499e40..7ac0f6f 100644
--- a/test/MC/MachO/ARM/bad-darwin-directives.s
+++ b/test/MC/MachO/ARM/bad-darwin-directives.s
@@ -1,24 +1,29 @@
-@ RUN: not llvm-mc -n -triple armv7-apple-darwin10 %s -filetype=obj -o - 2> %t.err > %t
-@ RUN: FileCheck --check-prefix=CHECK-ERROR < %t.err %s
+@ RUN: not llvm-mc -n -triple armv7-apple-darwin10 %s -filetype asm -o /dev/null 2>&1 \
+@ RUN:  | FileCheck --check-prefix CHECK-ERROR %s
+
+@ RUN: not llvm-mc -n -triple armv7-apple-darwin10 %s -filetype obj -o /dev/null 2>&1 \
+@ RUN:  | FileCheck --check-prefix CHECK-ERROR %s
+
 @ rdar://16335232
 
 .eabi_attribute 8, 1
-@ CHECK-ERROR: error: .eabi_attribute directive not valid for Mach-O
+@ CHECK-ERROR: error: unknown directive
 
 .cpu
-@ CHECK-ERROR: error: .cpu directive not valid for Mach-O
+@ CHECK-ERROR: error: unknown directive
 
 .fpu neon
-@ CHECK-ERROR: error: .fpu directive not valid for Mach-O
+@ CHECK-ERROR: error: unknown directive
 
 .arch armv7
-@ CHECK-ERROR: error: .arch directive not valid for Mach-O
+@ CHECK-ERROR: error: unknown directive
 
 .fnstart
-@ CHECK-ERROR: error: .fnstart directive not valid for Mach-O
+@ CHECK-ERROR: error: unknown directive
 
 .tlsdescseq
-@ CHECK-ERROR: error: .tlsdescseq directive not valid for Mach-O
+@ CHECK-ERROR: error: unknown directive
 
 .object_arch armv7
-@ CHECK-ERROR: error: .object_arch directive not valid for Mach-O
+@ CHECK-ERROR: error: unknown directive
+
diff --git a/test/MC/MachO/ARM64/lit.local.cfg b/test/MC/MachO/ARM64/lit.local.cfg
deleted file mode 100644
index a75a42b..0000000
--- a/test/MC/MachO/ARM64/lit.local.cfg
+++ /dev/null
@@ -1,4 +0,0 @@
-targets = set(config.root.targets_to_build.split())
-if not 'ARM64' in targets:
-    config.unsupported = True
-
diff --git a/test/MC/MachO/bad-darwin-x86_64-reloc-expr.s b/test/MC/MachO/bad-darwin-x86_64-reloc-expr.s
new file mode 100644
index 0000000..2b4271f
--- /dev/null
+++ b/test/MC/MachO/bad-darwin-x86_64-reloc-expr.s
@@ -0,0 +1,6 @@
+// RUN: not llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - 2> %t.err > %t
+// RUN: FileCheck --check-prefix=CHECK-ERROR < %t.err %s
+
+.quad (0x1234 + (4 * SOME_VALUE))
+// CHECK-ERROR: error: expected relocatable expression
+// CHECK-ERROR:               ^
diff --git a/test/MC/MachO/debug_frame.s b/test/MC/MachO/debug_frame.s
index 20bfd8d..247347d 100644
--- a/test/MC/MachO/debug_frame.s
+++ b/test/MC/MachO/debug_frame.s
@@ -3,6 +3,7 @@
 // Make sure MC can handle file level .cfi_startproc and .cfi_endproc that creates
 // an empty frame.
 // rdar://10017184
+_proc:
 .cfi_startproc
 .cfi_endproc
 
diff --git a/test/MC/MachO/temp-labels.s b/test/MC/MachO/temp-labels.s
index b7382b7..ac0f620 100644
--- a/test/MC/MachO/temp-labels.s
+++ b/test/MC/MachO/temp-labels.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -L -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -save-temp-labels -o - | macho-dump --dump-section-data | FileCheck %s
 
 // CHECK:   # Load Command 1
 // CHECK:  (('command', 2)
diff --git a/test/MC/Mips/cpload-bad.s b/test/MC/Mips/cpload-bad.s
new file mode 100644
index 0000000..7d186f6
--- /dev/null
+++ b/test/MC/Mips/cpload-bad.s
@@ -0,0 +1,15 @@
+# RUN: not llvm-mc %s -arch=mips -mcpu=mips32r2 2>%t1
+# RUN: FileCheck %s < %t1 -check-prefix=ASM
+
+        .text
+        .option pic2
+        .set reorder
+        .cpload $25
+# ASM: :[[@LINE-1]]:9: warning: .cpload in reorder section
+        .set noreorder
+        .cpload $32
+# ASM: :[[@LINE-1]]:17: error: invalid register
+        .cpload $foo
+# ASM: :[[@LINE-1]]:17: error: expected register containing function address
+        .cpload bar
+# ASM: :[[@LINE-1]]:17: error: expected register containing function address
diff --git a/test/MC/Mips/cpload.s b/test/MC/Mips/cpload.s
new file mode 100644
index 0000000..bc5e797
--- /dev/null
+++ b/test/MC/Mips/cpload.s
@@ -0,0 +1,33 @@
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 | FileCheck %s -check-prefix=ASM
+#
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 -filetype=obj -o -| \
+# RUN: llvm-objdump -d -r -arch=mips - | \
+# RUN: FileCheck %s -check-prefix=OBJ
+
+# RUN: llvm-mc %s -arch=mips64 -mcpu=mips64r2 -filetype=obj -o -| \
+# RUN: llvm-objdump -d -r -arch=mips - | \
+# RUN: FileCheck %s -check-prefix=OBJ64
+
+# ASM:    .text
+# ASM:    .option pic2
+# ASM:    .set noreorder
+# ASM:    .cpload $25
+# ASM:    .set reorder
+
+# OBJ:    .text
+# OBJ:    lui $gp, 0
+# OBJ: R_MIPS_HI16 _gp_disp
+# OBJ:    addiu $gp, $gp, 0
+# OBJ: R_MIPS_LO16 _gp_disp
+# OBJ:    addu $gp, $gp, $25
+
+# OBJ64: .text
+# OBJ64-NOT: lui $gp, 0
+# OBJ64-NOT: addiu $gp, $gp, 0
+# OBJ64-NOT: addu $gp, $gp, $25
+
+        .text
+        .option pic2
+        .set noreorder
+        .cpload $25
+        .set reorder
diff --git a/test/MC/Mips/cpsetup.s b/test/MC/Mips/cpsetup.s
index dbdcaab..a21a1e3 100644
--- a/test/MC/Mips/cpsetup.s
+++ b/test/MC/Mips/cpsetup.s
@@ -1,36 +1,78 @@
+# RUN: llvm-mc -triple mips64-unknown-unknown -mattr=-n64,+o32 -filetype=obj -o - %s | \
+# RUN:   llvm-objdump -d -r -arch=mips64 - | \
+# RUN:     FileCheck -check-prefix=O32 %s
+
 # RUN: llvm-mc -triple mips64-unknown-unknown -mattr=-n64,+o32 %s | \
-# RUN:     FileCheck -check-prefix=ANY -check-prefix=O32 %s
+# RUN:   FileCheck -check-prefix=ASM %s
+
+# RUN: llvm-mc -triple mips64-unknown-unknown -mattr=-n64,+n32 -filetype=obj -o - %s | \
+# RUN:   llvm-objdump -d -r -arch=mips64 - | \
+# RUN:     FileCheck -check-prefix=NXX -check-prefix=N32 %s
+
 # RUN: llvm-mc -triple mips64-unknown-unknown -mattr=-n64,+n32 %s | \
-# RUN:     FileCheck -check-prefix=ANY -check-prefix=NXX -check-prefix=N32 %s
-# RUN: llvm-mc -triple mips64-unknown-unknown %s | \
-# RUN:     FileCheck -check-prefix=ANY -check-prefix=NXX -check-prefix=N64 %s
+# RUN:   FileCheck -check-prefix=ASM %s
 
-# TODO: !PIC -> no output
+# RUN: llvm-mc -triple mips64-unknown-unknown %s -filetype=obj -o - | \
+# RUN:   llvm-objdump -d -r -arch=mips64 - | \
+# RUN:     FileCheck -check-prefix=NXX -check-prefix=N64 %s
+
+# RUN: llvm-mc -triple mips64-unknown-unknown %s | \
+# RUN:   FileCheck -check-prefix=ASM %s
 
         .text
         .option pic2
 t1:
         .cpsetup $25, 8, __cerror
 
-# ANY-LABEL: t1:
 
 # O32-NOT: __cerror
 
+# FIXME: Direct object emission for N32 is still under development.
+# N32 doesn't allow 3 operations to be specified in the same relocation
+# record like N64 does.
+
 # NXX: sd       $gp, 8($sp)
-# NXX: lui      $gp, %hi(%neg(%gp_rel(__cerror)))
-# NXX: addiu    $gp, $gp, %lo(%neg(%gp_rel(__cerror)))
+# NXX: lui      $gp, 0
+# NXX: R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_HI16  __cerror
+# NXX: addiu    $gp, $gp, 0
+# NXX: R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_LO16  __cerror
 # N32: addu     $gp, $gp, $25
 # N64: daddu    $gp, $gp, $25
 
+# ASM: .cpsetup $25, 8, __cerror
+
 t2:
-# ANY-LABEL: t2:
 
         .cpsetup $25, $2, __cerror
 
 # O32-NOT: __cerror
 
+# FIXME: Direct object emission for N32 is still under development.
+# N32 doesn't allow 3 operations to be specified in the same relocation
+# record like N64 does.
+
 # NXX: move     $2, $gp
-# NXX: lui      $gp, %hi(%neg(%gp_rel(__cerror)))
-# NXX: addiu    $gp, $gp, %lo(%neg(%gp_rel(__cerror)))
+# NXX: lui      $gp, 0
+# NXX: R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_HI16  __cerror
+# NXX: addiu    $gp, $gp, 0
+# NXX: R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_LO16  __cerror
 # N32: addu     $gp, $gp, $25
 # N64: daddu    $gp, $gp, $25
+
+# ASM: .cpsetup $25, $2, __cerror
+
+t3:
+        .option pic0
+        nop
+        .cpsetup $25, 8, __cerror
+        nop
+
+# Testing that .cpsetup expands to nothing in this case
+# by checking that the next instruction after the first
+# nop is also a 'nop'.
+# NXX: nop
+# NXX-NEXT: nop
+
+# ASM: nop
+# ASM: .cpsetup $25, 8, __cerror
+# ASM: nop
diff --git a/test/MC/Mips/elf-N64.s b/test/MC/Mips/elf-N64.s
index 3c01803..bf6ebd7 100644
--- a/test/MC/Mips/elf-N64.s
+++ b/test/MC/Mips/elf-N64.s
@@ -1,4 +1,5 @@
 // RUN: llvm-mc -filetype=obj -triple=mips64el-pc-linux -mcpu=mips64  %s -o - | llvm-readobj -r | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple=mips64-pc-linux -mcpu=mips64  %s -o - | llvm-readobj -r | FileCheck %s
 
 // Check for N64 relocation production.
 // Check that the appropriate relocations were created.
diff --git a/test/MC/Mips/elf-gprel-32-64.s b/test/MC/Mips/elf-gprel-32-64.s
index ae75197..2f5ac66 100644
--- a/test/MC/Mips/elf-gprel-32-64.s
+++ b/test/MC/Mips/elf-gprel-32-64.s
@@ -1,6 +1,9 @@
 // RUN: llvm-mc -filetype=obj -triple=mips64el-pc-linux -mcpu=mips64 %s -o - \
 // RUN: | llvm-readobj -r \
 // RUN: | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple=mips64-pc-linux -mcpu=mips64 %s -o - \
+// RUN: | llvm-readobj -r \
+// RUN: | FileCheck %s
 
 // Check that the appropriate relocations were created.
 
diff --git a/test/MC/Mips/elf_eflags.s b/test/MC/Mips/elf_eflags.s
index c789428..8cf4960 100644
--- a/test/MC/Mips/elf_eflags.s
+++ b/test/MC/Mips/elf_eflags.s
@@ -4,40 +4,79 @@
 # RUN: llvm-mc -filetype=obj -triple mipsel-unknown-linux -mcpu=mips64r2 %s -o -| llvm-readobj -h | FileCheck --check-prefix=MIPSEL-MIPS64R2 %s
 # MIPSEL-MIPS64R2: Flags [ (0x80001100)
 
+# RUN: llvm-mc -filetype=obj -triple mipsel-unknown-linux -mcpu=mips64r2 -mattr=+nan2008 %s -o -| llvm-readobj -h | FileCheck --check-prefix=MIPSEL-MIPS64R2-NAN2008 %s
+# MIPSEL-MIPS64R2-NAN2008: Flags [ (0x80001500)
+
 # RUN: llvm-mc -filetype=obj -triple mipsel-unknown-linux -mcpu=mips64 %s -o -| llvm-readobj -h | FileCheck --check-prefix=MIPSEL-MIPS64 %s
 # MIPSEL-MIPS64: Flags [ (0x60001100)
 
+# RUN: llvm-mc -filetype=obj -triple mipsel-unknown-linux -mcpu=mips64 -mattr=+nan2008 %s -o -| llvm-readobj -h | FileCheck --check-prefix=MIPSEL-MIPS64-NAN2008 %s
+# MIPSEL-MIPS64-NAN2008: Flags [ (0x60001500)
+
 # RUN: llvm-mc -filetype=obj -triple mipsel-unknown-linux -mcpu=mips32r2 %s -o -| llvm-readobj -h | FileCheck --check-prefix=MIPSEL-MIPS32R2 %s
 # MIPSEL-MIPS32R2: Flags [ (0x70001000)
 
+# RUN: llvm-mc -filetype=obj -triple mipsel-unknown-linux -mcpu=mips32r2 -mattr=+nan2008 %s -o -| llvm-readobj -h | FileCheck --check-prefix=MIPSEL-MIPS32R2-NAN2008 %s
+# MIPSEL-MIPS32R2-NAN2008: Flags [ (0x70001400)
+
 # RUN: llvm-mc -filetype=obj -triple mipsel-unknown-linux -mcpu=mips32 %s -o -| llvm-readobj -h | FileCheck --check-prefix=MIPSEL-MIPS32 %s
 # MIPSEL-MIPS32: Flags [ (0x50001000)
 
+# RUN: llvm-mc -filetype=obj -triple mipsel-unknown-linux -mcpu=mips32 -mattr=+nan2008 %s -o -| llvm-readobj -h | FileCheck --check-prefix=MIPSEL-MIPS32-NAN2008 %s
+# MIPSEL-MIPS32-NAN2008: Flags [ (0x50001400)
+
 # RUN: llvm-mc -filetype=obj -triple mips64el-unknown-linux -mcpu=mips64r2 -mattr=-n64,n32 %s -o -| llvm-readobj -h | FileCheck --check-prefix=MIPS64EL-MIPS64R2-N32 %s
 # MIPS64EL-MIPS64R2-N32: Flags [ (0x80000020)
 
+# RUN: llvm-mc -filetype=obj -triple mips64el-unknown-linux -mcpu=mips64r2 -mattr=-n64,n32,+nan2008 %s -o -| llvm-readobj -h | FileCheck --check-prefix=MIPS64EL-MIPS64R2-N32-NAN2008 %s
+# MIPS64EL-MIPS64R2-N32-NAN2008: Flags [ (0x80000420)
+
 # RUN: llvm-mc -filetype=obj -triple mips64el-unknown-linux -mcpu=mips64 -mattr=-n64,n32 %s -o -| llvm-readobj -h | FileCheck --check-prefix=MIPS64EL-MIPS64-N32 %s
 # MIPS64EL-MIPS64-N32: Flags [ (0x60000020)
 
+# RUN: llvm-mc -filetype=obj -triple mips64el-unknown-linux -mcpu=mips64 -mattr=-n64,n32,+nan2008 %s -o -| llvm-readobj -h | FileCheck --check-prefix=MIPS64EL-MIPS64-N32-NAN2008 %s
+# MIPS64EL-MIPS64-N32-NAN2008: Flags [ (0x60000420)
+
 # RUN: llvm-mc -filetype=obj -triple mips64el-unknown-linux -mcpu=mips64r2 -mattr=n64 %s -o -| llvm-readobj -h | FileCheck --check-prefix=MIPS64EL-MIPS64R2-N64 %s
 # MIPS64EL-MIPS64R2-N64: Flags [ (0x80000000)
 
+# RUN: llvm-mc -filetype=obj -triple mips64el-unknown-linux -mcpu=mips64r2 -mattr=n64,+nan2008 %s -o -| llvm-readobj -h | FileCheck --check-prefix=MIPS64EL-MIPS64R2-N64-NAN2008 %s
+# MIPS64EL-MIPS64R2-N64-NAN2008: Flags [ (0x80000400)
+
 # RUN: llvm-mc -filetype=obj -triple mips64el-unknown-linux -mcpu=mips64 %s -mattr=n64 -o -| llvm-readobj -h | FileCheck --check-prefix=MIPS64EL-MIPS64-N64 %s
 # MIPS64EL-MIPS64-N64: Flags [ (0x60000000)
 
+# RUN: llvm-mc -filetype=obj -triple mips64el-unknown-linux -mcpu=mips64 %s -mattr=n64,+nan2008 -o -| llvm-readobj -h | FileCheck --check-prefix=MIPS64EL-MIPS64-N64-NAN2008 %s
+# MIPS64EL-MIPS64-N64-NAN2008: Flags [ (0x60000400)
+
 # RUN: llvm-mc -filetype=obj -triple mips64el-unknown-linux -mcpu=mips64r2 -mattr=-n64,o32 %s -o -| llvm-readobj -h | FileCheck --check-prefix=MIPS64EL-MIPS64R2-O32 %s
 # MIPS64EL-MIPS64R2-O32: Flags [ (0x80001100)
 
+# RUN: llvm-mc -filetype=obj -triple mips64el-unknown-linux -mcpu=mips64r2 -mattr=-n64,o32,+nan2008 %s -o -| llvm-readobj -h | FileCheck --check-prefix=MIPS64EL-MIPS64R2-O32-NAN2008 %s
+# MIPS64EL-MIPS64R2-O32-NAN2008: Flags [ (0x80001500)
+
 # RUN: llvm-mc -filetype=obj -triple mips64-unknown-linux -mcpu=mips4 %s -o -| llvm-readobj -h | FileCheck --check-prefix=MIPS4 %s
 # MIPS4: Flags [ (0x30000000)
 
+ # RUN: llvm-mc -filetype=obj -triple mips64-unknown-linux -mcpu=mips4 -mattr=+nan2008 %s -o -| llvm-readobj -h | FileCheck --check-prefix=MIPS4-NAN2008 %s
+# MIPS4-NAN2008: Flags [ (0x30000400)
+
 # RUN: llvm-mc -filetype=obj -triple mips64el-unknown-linux -mcpu=mips64 %s -mattr=-n64,o32 -o -| llvm-readobj -h | FileCheck --check-prefix=MIPS64EL-MIPS64-O32 %s
 # MIPS64EL-MIPS64-O32: Flags [ (0x60001100)
-	
+
+# RUN: llvm-mc -filetype=obj -triple mips64el-unknown-linux -mcpu=mips64 %s -mattr=-n64,o32,+nan2008 -o -| llvm-readobj -h | FileCheck --check-prefix=MIPS64EL-MIPS64-O32-NAN2008 %s
+# MIPS64EL-MIPS64-O32-NAN2008: Flags [ (0x60001500)
+
 # Default ABI for MIPS64 is N64 as opposed to GCC/GAS (N32)
 # RUN: llvm-mc -filetype=obj -triple mips64el-unknown-linux -mcpu=mips64r2 %s -o -| llvm-readobj -h | FileCheck --check-prefix=MIPS64EL-MIPS64R2 %s
 # MIPS64EL-MIPS64R2: Flags [ (0x80000000)
 
+# RUN: llvm-mc -filetype=obj -triple mips64el-unknown-linux -mcpu=mips64r2 -mattr=+nan2008 %s -o -| llvm-readobj -h | FileCheck --check-prefix=MIPS64EL-MIPS64R2-NAN2008 %s
+# MIPS64EL-MIPS64R2-NAN2008: Flags [ (0x80000400)
+
 # Default ABI for MIPS64 is N64 as opposed to GCC/GAS (N32)
 # RUN: llvm-mc -filetype=obj -triple mips64el-unknown-linux -mcpu=mips64 %s -o -| llvm-readobj -h | FileCheck --check-prefix=MIPS64EL-MIPS64 %s
 # MIPS64EL-MIPS64: Flags [ (0x60000000)
+
+# RUN: llvm-mc -filetype=obj -triple mips64el-unknown-linux -mcpu=mips64 -mattr=+nan2008 %s -o -| llvm-readobj -h | FileCheck --check-prefix=MIPS64EL-MIPS64-NAN2008 %s
+# MIPS64EL-MIPS64-NAN2008: Flags [ (0x60000400)
diff --git a/test/MC/Mips/elf_eflags_nan2008.s b/test/MC/Mips/elf_eflags_nan2008.s
new file mode 100644
index 0000000..71a22be
--- /dev/null
+++ b/test/MC/Mips/elf_eflags_nan2008.s
@@ -0,0 +1,12 @@
+# RUN: llvm-mc -filetype=obj -triple mipsel-unknown-linux -mcpu=mips32 %s -o - | \
+# RUN:   llvm-readobj -h | \
+# RUN:     FileCheck %s -check-prefix=CHECK-OBJ
+# RUN: llvm-mc -triple mipsel-unknown-linux -mcpu=mips32 %s -o -| \
+# RUN:   FileCheck %s -check-prefix=CHECK-ASM
+
+# This *MUST* match the output of gas compiled with the same triple.
+# CHECK-OBJ: Flags [ (0x50001400)
+
+# CHECK-ASM: .nan 2008
+
+.nan 2008
diff --git a/test/MC/Mips/elf_eflags_nanlegacy.s b/test/MC/Mips/elf_eflags_nanlegacy.s
new file mode 100644
index 0000000..6897ad2
--- /dev/null
+++ b/test/MC/Mips/elf_eflags_nanlegacy.s
@@ -0,0 +1,15 @@
+# RUN: llvm-mc -filetype=obj -triple mipsel-unknown-linux -mcpu=mips32 %s -o - | \
+# RUN:   llvm-readobj -h | \
+# RUN:     FileCheck %s -check-prefix=CHECK-OBJ
+# RUN: llvm-mc -triple mipsel-unknown-linux -mcpu=mips32 %s -o -| \
+# RUN:   FileCheck %s -check-prefix=CHECK-ASM
+
+# This *MUST* match the output of gas compiled with the same triple.
+# CHECK-OBJ: Flags [ (0x50001000)
+
+# CHECK-ASM: .nan 2008
+# CHECK-ASM: .nan legacy
+
+.nan 2008
+// Let's override the previous directive!
+.nan legacy
diff --git a/test/MC/Mips/llvm-mc-fixup-endianness.s b/test/MC/Mips/llvm-mc-fixup-endianness.s
new file mode 100644
index 0000000..bc6a5d9
--- /dev/null
+++ b/test/MC/Mips/llvm-mc-fixup-endianness.s
@@ -0,0 +1,6 @@
+# RUN: llvm-mc -show-encoding -mcpu=mips32 -triple mips-unknown-unknown %s | FileCheck -check-prefix=BE %s
+# RUN: llvm-mc -show-encoding -mcpu=mips32 -triple mipsel-unknown-unknown %s | FileCheck -check-prefix=LE %s
+#
+        .text
+        b foo # BE: b foo # encoding: [0x10,0x00,A,A]
+              # LE: b foo # encoding: [A,A,0x00,0x10]
diff --git a/test/MC/Mips/micromips-control-instructions.s b/test/MC/Mips/micromips-control-instructions.s
index 8170a9c..aff84c2 100644
--- a/test/MC/Mips/micromips-control-instructions.s
+++ b/test/MC/Mips/micromips-control-instructions.s
@@ -1,6 +1,6 @@
-# RUN: llvm-mc %s -triple=mipsel -show-encoding -mattr=micromips \
+# RUN: llvm-mc %s -triple=mipsel -show-encoding -mcpu=mips32r2 -mattr=micromips \
 # RUN: | FileCheck -check-prefix=CHECK-EL %s
-# RUN: llvm-mc %s -triple=mips -show-encoding -mattr=micromips \
+# RUN: llvm-mc %s -triple=mips -show-encoding -mcpu=mips32r2 -mattr=micromips \
 # RUN: | FileCheck -check-prefix=CHECK-EB %s
 # Check that the assembler can handle the documented syntax
 # for control instructions.
@@ -10,7 +10,7 @@
 # Little endian
 #------------------------------------------------------------------------------
 # CHECK-EL:    break                      # encoding: [0x00,0x00,0x07,0x00]
-# CHECK-EL:    break 7, 0                 # encoding: [0x07,0x00,0x07,0x00]
+# CHECK-EL:    break 7                    # encoding: [0x07,0x00,0x07,0x00]
 # CHECK-EL:    break 7, 5                 # encoding: [0x07,0x00,0x47,0x01]
 # CHECK-EL:    syscall                    # encoding: [0x00,0x00,0x7c,0x8b]
 # CHECK-EL:    syscall 396                # encoding: [0x8c,0x01,0x7c,0x8b]
@@ -28,7 +28,7 @@
 # Big endian
 #------------------------------------------------------------------------------
 # CHECK-EB:   break                       # encoding: [0x00,0x00,0x00,0x07]
-# CHECK-EB:   break 7, 0                  # encoding: [0x00,0x07,0x00,0x07]
+# CHECK-EB:   break 7                     # encoding: [0x00,0x07,0x00,0x07]
 # CHECK-EB:   break 7, 5                  # encoding: [0x00,0x07,0x01,0x47]
 # CHECK-EB:   syscall                     # encoding: [0x00,0x00,0x8b,0x7c]
 # CHECK-EB:   syscall 396                 # encoding: [0x01,0x8c,0x8b,0x7c]
diff --git a/test/MC/Mips/micromips-el-fixup-data.s b/test/MC/Mips/micromips-el-fixup-data.s
index 2293f63..4753835 100644
--- a/test/MC/Mips/micromips-el-fixup-data.s
+++ b/test/MC/Mips/micromips-el-fixup-data.s
@@ -2,7 +2,7 @@
 # RUN:   -mattr=+micromips 2>&1 -filetype=obj > %t.o
 # RUN: llvm-objdump %t.o -triple mipsel -mattr=+micromips -d | FileCheck %s
 
-# Check that fixup data is writen in the microMIPS specific little endian
+# Check that fixup data is written in the microMIPS specific little endian
 # byte order.
 
     .text
diff --git a/test/MC/Mips/mips-control-instructions.s b/test/MC/Mips/mips-control-instructions.s
index 4a16c53..47da8cc 100644
--- a/test/MC/Mips/mips-control-instructions.s
+++ b/test/MC/Mips/mips-control-instructions.s
@@ -4,7 +4,7 @@
 # RUN: | FileCheck -check-prefix=CHECK64  %s
 
 # CHECK32:    break                      # encoding: [0x00,0x00,0x00,0x0d]
-# CHECK32:    break   7, 0               # encoding: [0x00,0x07,0x00,0x0d]
+# CHECK32:    break   7                  # encoding: [0x00,0x07,0x00,0x0d]
 # CHECK32:    break   7, 5               # encoding: [0x00,0x07,0x01,0x4d]
 # CHECK32:    syscall                    # encoding: [0x00,0x00,0x00,0x0c]
 # CHECK32:    syscall 13396              # encoding: [0x00,0x0d,0x15,0x0c]
@@ -37,7 +37,7 @@
 # CHECK32:    tnei    $3, 1023           # encoding: [0x04,0x6e,0x03,0xff]
 
 # CHECK64:    break                      # encoding: [0x00,0x00,0x00,0x0d]
-# CHECK64:    break   7, 0               # encoding: [0x00,0x07,0x00,0x0d]
+# CHECK64:    break   7                  # encoding: [0x00,0x07,0x00,0x0d]
 # CHECK64:    break   7, 5               # encoding: [0x00,0x07,0x01,0x4d]
 # CHECK64:    syscall                    # encoding: [0x00,0x00,0x00,0x0c]
 # CHECK64:    syscall 13396              # encoding: [0x00,0x0d,0x15,0x0c]
diff --git a/test/MC/Mips/mips1/invalid-mips2-wrong-error.s b/test/MC/Mips/mips1/invalid-mips2-wrong-error.s
new file mode 100644
index 0000000..8e878fe
--- /dev/null
+++ b/test/MC/Mips/mips1/invalid-mips2-wrong-error.s
@@ -0,0 +1,16 @@
+# Instructions that are invalid and are correctly rejected but use the wrong
+# error message at the moment.
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips1 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        ldc1      $f11,16391($s0) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        ldc2      $8,-21181($at)  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        ldc3      $29,-28645($s1) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        ll        $v0,-7321($s2)  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        sc        $t7,18904($s3)  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        sdc1      $f31,30574($t5) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        sdc2      $20,23157($s2)  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        sdc3      $12,5835($t2)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
diff --git a/test/MC/Mips/mips1/invalid-mips2.s b/test/MC/Mips/mips1/invalid-mips2.s
new file mode 100644
index 0000000..6c3e80a
--- /dev/null
+++ b/test/MC/Mips/mips1/invalid-mips2.s
@@ -0,0 +1,23 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips1 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+        .set noat
+        ceil.w.d  $f11,$f25       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ceil.w.s  $f6,$f20        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        floor.w.d $f14,$f11       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        floor.w.s $f8,$f9         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        round.w.d $f6,$f4         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        round.w.s $f27,$f28       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        sqrt.d    $f17,$f22       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        sqrt.s    $f0,$f1         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        teqi      $s5,-17504      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tgei      $s1,5025        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tgeiu     $sp,-28621      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tlti      $t6,-21059      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tltiu     $ra,-5076       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tnei      $t4,-29647      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        trunc.w.d $f22,$f15       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        trunc.w.s $f28,$f30       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips1/invalid-mips3-wrong-error.s b/test/MC/Mips/mips1/invalid-mips3-wrong-error.s
new file mode 100644
index 0000000..2016e70
--- /dev/null
+++ b/test/MC/Mips/mips1/invalid-mips3-wrong-error.s
@@ -0,0 +1,23 @@
+# Instructions that are invalid and are correctly rejected but use the wrong
+# error message at the moment.
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips1 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        ld        $sp,-28645($s1)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        ldc1      $f11,16391($s0)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        ldc2      $8,-21181($at)    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        ldl       $24,-4167($24)    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        ldr       $14,-30358($s4)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        ll        $v0,-7321($s2)    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lld       $zero,-14736($ra) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lwu       $s3,-24086($v1)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        sc        $15,18904($s3)    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        scd       $15,-8243($sp)    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        sd        $12,5835($10)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        sdc1      $f31,30574($13)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        sdc2      $20,23157($s2)    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        sdl       $a3,-20961($s8)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        sdr       $11,-20423($12)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
diff --git a/test/MC/Mips/mips1/invalid-mips3.s b/test/MC/Mips/mips1/invalid-mips3.s
new file mode 100644
index 0000000..d1b0eec
--- /dev/null
+++ b/test/MC/Mips/mips1/invalid-mips3.s
@@ -0,0 +1,65 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips1 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        dmult     $s7,$9            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsub      $a3,$s6,$8        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ceil.l.d  $f1,$f3           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ceil.l.s  $f18,$f13         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ceil.w.d  $f11,$f25         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ceil.w.s  $f6,$f20          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        cvt.d.l   $f4,$f16          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        cvt.l.d   $f24,$f15         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        cvt.l.s   $f11,$f29         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        cvt.s.l   $f15,$f30         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dadd      $s3,$at,$ra       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        daddi     $sp,$s4,-27705    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        daddiu    $k0,$s6,-4586     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        daddu     $s3,$at,$ra       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ddiv      $zero,$k0,$s3     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ddivu     $zero,$s0,$s1     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dmfc1     $12,$f13          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dmtc1     $s0,$f14          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dmultu    $a1,$a2           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsll      $zero,18          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsll      $zero,$s4,18      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsll      $zero,$s4,$12     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsll32    $zero,18          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsll32    $zero,$zero,18    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsllv     $zero,$s4,$12     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsra      $gp,10            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsra      $gp,$s2,10        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsra      $gp,$s2,$s3       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsra32    $gp,10            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsra32    $gp,$s2,10        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsrav     $gp,$s2,$s3       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsrl      $s3,23            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsrl      $s3,$6,23         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsrl      $s3,$6,$s4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsrl32    $s3,23            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsrl32    $s3,$6,23         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsrlv     $s3,$14,$s4       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsubu     $a1,$a1,$k0       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        floor.l.d $f26,$f7          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        floor.l.s $f12,$f5          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        floor.w.d $f14,$f11         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        floor.w.s $f8,$f9           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        round.l.d $f12,$f1          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        round.l.s $f25,$f5          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        round.w.d $f6,$f4           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        round.w.s $f27,$f28         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        sqrt.d    $f17,$f22         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        sqrt.s    $f0,$f1           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        teqi      $s5,-17504        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tgei      $s1,5025          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tgeiu     $sp,-28621        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tlti      $14,-21059        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tltiu     $ra,-5076         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tnei      $12,-29647        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        trunc.l.d $f23,$f23         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        trunc.l.s $f28,$f31         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        trunc.w.d $f22,$f15         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        trunc.w.s $f28,$f30         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips1/invalid-mips4-wrong-error.s b/test/MC/Mips/mips1/invalid-mips4-wrong-error.s
new file mode 100644
index 0000000..2016e70
--- /dev/null
+++ b/test/MC/Mips/mips1/invalid-mips4-wrong-error.s
@@ -0,0 +1,23 @@
+# Instructions that are invalid and are correctly rejected but use the wrong
+# error message at the moment.
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips1 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        ld        $sp,-28645($s1)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        ldc1      $f11,16391($s0)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        ldc2      $8,-21181($at)    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        ldl       $24,-4167($24)    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        ldr       $14,-30358($s4)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        ll        $v0,-7321($s2)    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lld       $zero,-14736($ra) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lwu       $s3,-24086($v1)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        sc        $15,18904($s3)    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        scd       $15,-8243($sp)    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        sd        $12,5835($10)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        sdc1      $f31,30574($13)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        sdc2      $20,23157($s2)    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        sdl       $a3,-20961($s8)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        sdr       $11,-20423($12)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
diff --git a/test/MC/Mips/mips1/invalid-mips4.s b/test/MC/Mips/mips1/invalid-mips4.s
new file mode 100644
index 0000000..61aaf58
--- /dev/null
+++ b/test/MC/Mips/mips1/invalid-mips4.s
@@ -0,0 +1,82 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips1 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        ceil.l.d  $f1,$f3           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ceil.l.s  $f18,$f13         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ceil.w.d  $f11,$f25         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ceil.w.s  $f6,$f20          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        cvt.d.l   $f4,$f16          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        cvt.l.d   $f24,$f15         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        cvt.l.s   $f11,$f29         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        cvt.s.l   $f15,$f30         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dadd      $s3,$at,$ra       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        daddi     $sp,$s4,-27705    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        daddiu    $k0,$s6,-4586     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        daddu     $s3,$at,$ra       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ddiv      $zero,$k0,$s3     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ddivu     $zero,$s0,$s1     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dmfc1     $12,$f13          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dmtc1     $s0,$f14          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dmult     $s7,$9            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dmultu    $a1,$a2           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsll      $zero,$s4,$12     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsll      $zero,$s4,18      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsll      $zero,18          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsll32    $zero,$zero,18    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsll32    $zero,18          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsllv     $zero,$s4,$12     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsra      $gp,$s2,$s3       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsra      $gp,$s2,10        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsra      $gp,10            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsra32    $gp,$s2,10        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsra32    $gp,10            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsrav     $gp,$s2,$s3       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsrl      $s3,$6,$s4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsrl      $s3,$6,23         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsrl      $s3,23            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsrl32    $s3,$6,23         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsrl32    $s3,23            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsrlv     $s3,$14,$s4       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsub      $a3,$s6,$8        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsubu     $a1,$a1,$k0       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        eret                        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        floor.l.d $f26,$f7          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        floor.l.s $f12,$f5          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        floor.w.d $f14,$f11         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        floor.w.s $f8,$f9           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ldxc1     $f8,$s7($15)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        lwxc1     $f12,$s1($s8)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf      $gp,$8,$fcc7      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf.d    $f6,$f10,$fcc5    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf.s    $f23,$f5,$fcc6    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movn      $v1,$s1,$s0       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movn.d    $f26,$f20,$k0     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movn.s    $f12,$f0,$s7      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt      $zero,$s4,$fcc5   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt.d    $f0,$f2,$fcc0     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt.s    $f30,$f2,$fcc1    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movz      $a1,$s6,$9        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movz.d    $f12,$f29,$9      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movz.s    $f25,$f7,$v1      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        round.l.d $f12,$f1          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        round.l.s $f25,$f5          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        round.w.d $f6,$f4           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        round.w.s $f27,$f28         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        sdxc1     $f11,$10($14)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        sqrt.d    $f17,$f22         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        sqrt.s    $f0,$f1           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        swxc1     $f19,$12($k0)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        teqi      $s5,-17504        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tgei      $s1,5025          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tgeiu     $sp,-28621        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tlti      $14,-21059        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tltiu     $ra,-5076         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tnei      $12,-29647        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        trunc.l.d $f23,$f23         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        trunc.l.s $f28,$f31         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        trunc.w.d $f22,$f15         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        trunc.w.s $f28,$f30         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips1/invalid-mips5-wrong-error.s b/test/MC/Mips/mips1/invalid-mips5-wrong-error.s
new file mode 100644
index 0000000..74473a3
--- /dev/null
+++ b/test/MC/Mips/mips1/invalid-mips5-wrong-error.s
@@ -0,0 +1,46 @@
+# Instructions that are invalid and are correctly rejected but use the wrong
+# error message at the moment.
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips1 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        abs.ps    $f22,$f8            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        add.ps    $f25,$f27,$f13      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        alnv.ps   $f12,$f18,$f30,$t0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.eq.ps   $fcc5,$f0,$f9       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.f.ps    $fcc6,$f11,$f11     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.le.ps   $fcc1,$f7,$f20      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.lt.ps   $f19,$f5            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.nge.ps  $f1,$f26            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.ngl.ps  $f21,$f30           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.ngle.ps $fcc7,$f12,$f20     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.ngt.ps  $fcc5,$f30,$f6      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.ole.ps  $fcc7,$f21,$f8      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.olt.ps  $fcc3,$f7,$f16      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.seq.ps  $fcc6,$f31,$f14     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.sf.ps   $fcc6,$f4,$f6       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.ueq.ps  $fcc1,$f5,$f29      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.ule.ps  $fcc6,$f17,$f3      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.ult.ps  $fcc7,$f14,$f0      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.un.ps   $fcc4,$f2,$f26      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        cvt.ps.s  $f3,$f18,$f19       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        cvt.s.pl  $f30,$f1            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        cvt.s.pu  $f14,$f25           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        madd.ps   $f22,$f3,$f14,$f3   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        mov.ps    $f22,$f17           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        movf.ps   $f10,$f28,$fcc6     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        movn.ps   $f31,$f31,$s3       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        movt.ps   $f20,$f25,$fcc2     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        movz.ps   $f18,$f17,$ra       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        msub.ps   $f12,$f14,$f29,$f17 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        mul.ps    $f14,$f0,$f16       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        neg.ps    $f19,$f13           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        nmadd.ps  $f27,$f4,$f9,$f25   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        nmsub.ps  $f6,$f12,$f14,$f17  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        pll.ps    $f25,$f9,$f30       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        plu.ps    $f1,$f26,$f29       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        pul.ps    $f9,$f30,$f26       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        puu.ps    $f24,$f9,$f2        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        sub.ps    $f5,$f14,$f26       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
diff --git a/test/MC/Mips/mips1/invalid-mips5.s b/test/MC/Mips/mips1/invalid-mips5.s
new file mode 100644
index 0000000..1eddf02
--- /dev/null
+++ b/test/MC/Mips/mips1/invalid-mips5.s
@@ -0,0 +1,83 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips1 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        ceil.l.d  $f1,$f3           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ceil.l.s  $f18,$f13         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ceil.w.d  $f11,$f25         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ceil.w.s  $f6,$f20          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        cvt.d.l   $f4,$f16          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        cvt.l.d   $f24,$f15         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        cvt.l.s   $f11,$f29         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        cvt.s.l   $f15,$f30         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dadd      $s3,$at,$ra       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        daddi     $sp,$s4,-27705    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        daddiu    $k0,$s6,-4586     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        daddu     $s3,$at,$ra       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ddiv      $zero,$k0,$s3     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ddivu     $zero,$s0,$s1     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dmfc1     $t0,$f13          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dmtc1     $s0,$f14          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dmultu    $a1,$a2           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsll      $zero,18          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsll      $zero,$s4,18      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsll      $zero,$s4,$t0     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsll32    $zero,18          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsll32    $zero,$zero,18    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsllv     $zero,$s4,$t0     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsra      $gp,10            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsra      $gp,$s2,10        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsra      $gp,$s2,$s3       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsra32    $gp,10            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsra32    $gp,$s2,10        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsrav     $gp,$s2,$s3       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsrl      $s3,23            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsrl      $s3,$6,23         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsrl      $s3,$6,$s4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsrl32    $s3,23            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsrl32    $s3,$6,23         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsrlv     $s3,$t2,$s4       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsubu     $a1,$a1,$k0       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        eret                        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        floor.l.d $f26,$f7          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        floor.l.s $f12,$f5          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        floor.w.d $f14,$f11         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        floor.w.s $f8,$f9           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ldxc1     $f8,$s7($t3)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        luxc1     $f19,$s6($s5)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        lwxc1     $f12,$s1($s8)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf      $gp,$a0,$fcc7     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf.d    $f6,$f11,$fcc5    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf.s    $f23,$f5,$fcc6    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movn      $v1,$s1,$s0       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movn.d    $f27,$f21,$k0     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movn.s    $f12,$f0,$s7      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt      $zero,$s4,$fcc5   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt.d    $f0,$f2,$fcc0     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt.s    $f30,$f2,$fcc1    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movz      $a1,$s6,$a3       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movz.d    $f12,$f29,$a3     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movz.s    $f25,$f7,$v1      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        round.l.d $f12,$f1          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        round.l.s $f25,$f5          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        round.w.d $f6,$f4           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        round.w.s $f27,$f28         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        sqrt.d    $f17,$f22         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        sqrt.s    $f0,$f1           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        swxc1     $f19,$t0($k0)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        teqi      $s5,-17504        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tgei      $s1,5025          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tgeiu     $sp,-28621        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tlti      $t2,-21059        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tltiu     $ra,-5076         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tnei      $t0,-29647        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        trunc.l.d $f23,$f23         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        trunc.l.s $f28,$f31         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        trunc.w.d $f22,$f15         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        trunc.w.s $f28,$f30         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        sdxc1     $f11,$a2($t2)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        suxc1     $f12,$k1($t1)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        swxc1     $f19,$t0($k0)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips1/valid-xfail.s b/test/MC/Mips/mips1/valid-xfail.s
index 2ffeaa9..7696c9e 100644
--- a/test/MC/Mips/mips1/valid-xfail.s
+++ b/test/MC/Mips/mips1/valid-xfail.s
@@ -2,16 +2,10 @@
 # they aren't implemented yet).
 # This test is set up to XPASS if any instruction generates an encoding.
 #
-# FIXME: Test MIPS-I instead of MIPS32
-# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32 | not FileCheck %s
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips1 | not FileCheck %s
 # CHECK-NOT: encoding
 # XFAIL: *
 
-	.set noat
-	tlbp
-	tlbr
-	tlbwi
-	tlbwr
-	lwc0	c0_entrylo,-7321($s2)
-	lwc3	$10,-32265($k0)
-	swc0	c0_prid,18904($s3)
+        .set noat
+        lwc0    c0_entrylo,-7321($s2)
+        swc0    c0_prid,18904($s3)
diff --git a/test/MC/Mips/mips1/valid.s b/test/MC/Mips/mips1/valid.s
index 7fc866a..473e6b9 100644
--- a/test/MC/Mips/mips1/valid.s
+++ b/test/MC/Mips/mips1/valid.s
@@ -1,85 +1,102 @@
 # Instructions that are valid
 #
-# FIXME: Test MIPS-I instead of MIPS32
-# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32 | FileCheck %s
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips1 | FileCheck %s
 
-	.set noat
-	abs.d	$f7,$f25          # CHECK: encoding:
-	abs.s	$f9,$f16
-	add	$s7,$s2,$a1
-	add.d	$f1,$f7,$f29
-	add.s	$f8,$f21,$f24
-	addi	$t5,$t1,26322
-	addu	$t1,$a0,$a2
-	and	$s7,$v0,$t4
-	c.ngl.d	$f29,$f29
-	c.ngle.d	$f0,$f16
-	c.sf.d	$f30,$f0
-	c.sf.s	$f14,$f22
-	cfc1	$s1,$21
-	ctc1	$a2,$26
-	cvt.d.s	$f22,$f28
-	cvt.d.w	$f26,$f11
-	cvt.s.d	$f26,$f8
-	cvt.s.w	$f22,$f15
-	cvt.w.d	$f20,$f14
-	cvt.w.s	$f20,$f24
-	div	$zero,$t9,$t3
-	div.d	$f29,$f20,$f27
-	div.s	$f4,$f5,$f15
-	divu	$zero,$t9,$t7
-	ehb                      # CHECK: ehb # encoding:  [0x00,0x00,0x00,0xc0]
-	lb	$t8,-14515($t2)
-	lbu	$t0,30195($v1)
-	lh	$t3,-8556($s5)
-	lhu	$s3,-22851($v0)
-	li	$at,-29773
-	li	$zero,-29889
-	lw	$t0,5674($a1)
-	lwc1	$f16,10225($k0)
-	lwc2	$18,-841($a2)
-	lwl	$s4,-4231($t7)
-	lwr	$zero,-19147($gp)
-	mfc1	$a3,$f27
-	mfhi	$s3
-	mfhi	$sp
-	mflo	$s1
-	mov.d	$f20,$f14
-	mov.s	$f2,$f27
-	move	$s8,$a0
-	move	$t9,$a2
-	mtc1	$s8,$f9
-	mthi	$s1
-	mtlo	$sp
-	mtlo	$t9
-	mul.d	$f20,$f20,$f16
-	mul.s	$f30,$f10,$f2
-	mult	$sp,$s4
-	mult	$sp,$v0
-	multu	$gp,$k0
-	multu	$t1,$s2
-	neg.d	$f27,$f18
-	neg.s	$f1,$f15
-	nop
-	nor	$a3,$zero,$a3
-	or	$t4,$s0,$sp
-	sb	$s6,-19857($t6)
-	sh	$t6,-6704($t7)
-	sllv	$a3,$zero,$t1
-	slt	$s7,$t3,$k1
-	slti	$s1,$t2,9489
-	sltiu	$t9,$t9,-15531
-	sltu	$s4,$s5,$t3
-	srav	$s1,$s7,$sp
-	srlv	$t9,$s4,$a0
-	ssnop                    # CHECK: ssnop # encoding:  [0x00,0x00,0x00,0x40]
-	sub	$s6,$s3,$t4
-	sub.d	$f18,$f3,$f17
-	sub.s	$f23,$f22,$f22
-	subu	$sp,$s6,$s6
-	sw	$ra,-10160($sp)
-	swc1	$f6,-8465($t8)
-	swc2	$25,24880($s0)
-	swl	$t7,13694($s3)
-	swr	$s1,-26590($t6)
-	xor	$s2,$a0,$s8
+        .set noat
+        abs.d     $f7,$f25             # CHECK: encoding:
+        abs.s     $f9,$f16
+        add       $s7,$s2,$a1
+        add.d     $f1,$f7,$f29
+        add.s     $f8,$f21,$f24
+        addi      $13,$9,26322
+        addu      $9,$a0,$a2
+        and       $s7,$v0,$12
+        c.ngl.d   $f29,$f29
+        c.ngle.d  $f0,$f16
+        c.sf.d    $f30,$f0
+        c.sf.s    $f14,$f22
+        cfc1      $s1,$21
+        ctc1      $a2,$26
+        cvt.d.s   $f22,$f28
+        cvt.d.w   $f26,$f11
+        cvt.s.d   $f26,$f8
+        cvt.s.w   $f22,$f15
+        cvt.w.d   $f20,$f14
+        cvt.w.s   $f20,$f24
+        div       $zero,$25,$11
+        div.d     $f29,$f20,$f27
+        div.s     $f4,$f5,$f15
+        divu      $zero,$25,$15
+        ehb                            # CHECK: ehb # encoding:  [0x00,0x00,0x00,0xc0]
+        lb        $24,-14515($10)
+        lbu       $8,30195($v1)
+        lh        $11,-8556($s5)
+        lhu       $s3,-22851($v0)
+        li        $at,-29773
+        li        $zero,-29889
+        lw        $8,5674($a1)
+        lwc1      $f16,10225($k0)
+        lwc2      $18,-841($a2)
+        lwc3      $10,-32265($k0)
+        lwl       $s4,-4231($15)
+        lwr       $zero,-19147($gp)
+        mfc1      $a3,$f27
+        mfhi      $s3
+        mfhi      $sp
+        mflo      $s1
+        mov.d     $f20,$f14
+        mov.s     $f2,$f27
+        move      $s8,$a0
+        move      $25,$a2
+        mtc1      $s8,$f9
+        mthi      $s1
+        mtlo      $sp
+        mtlo      $25
+        mul.d     $f20,$f20,$f16
+        mul.s     $f30,$f10,$f2
+        mult      $sp,$s4
+        mult      $sp,$v0
+        multu     $gp,$k0
+        multu     $9,$s2
+        negu      $2                   # CHECK: negu $2, $2            # encoding: [0x00,0x02,0x10,0x23]
+        negu      $2,$3                # CHECK: negu $2, $3            # encoding: [0x00,0x03,0x10,0x23]
+        neg.d     $f27,$f18
+        neg.s     $f1,$f15
+        nop
+        nor       $a3,$zero,$a3
+        or        $12,$s0,$sp
+        sb        $s6,-19857($14)
+        sh        $14,-6704($15)
+        sll       $a3,18               # CHECK: sll $7, $7, 18         # encoding: [0x00,0x07,0x3c,0x80]
+        sll       $a3,$zero,18         # CHECK: sll $7, $zero, 18      # encoding: [0x00,0x00,0x3c,0x80]
+        sll       $a3,$zero,$9         # CHECK: sllv $7, $zero, $9     # encoding: [0x01,0x20,0x38,0x04]
+        sllv      $a3,$zero,$9         # CHECK: sllv $7, $zero, $9     # encoding: [0x01,0x20,0x38,0x04]
+        slt       $s7,$11,$k1          # CHECK: slt $23, $11, $27      # encoding: [0x01,0x7b,0xb8,0x2a]
+        slti      $s1,$10,9489         # CHECK: slti $17, $10, 9489    # encoding: [0x29,0x51,0x25,0x11]
+        sltiu     $25,$25,-15531       # CHECK: sltiu $25, $25, -15531 # encoding: [0x2f,0x39,0xc3,0x55]
+        sltu      $s4,$s5,$11          # CHECK: sltu  $20, $21, $11    # encoding: [0x02,0xab,0xa0,0x2b]
+        sltu      $24,$25,-15531       # CHECK: sltiu $24, $25, -15531 # encoding: [0x2f,0x38,0xc3,0x55]
+        sra       $s1,15               # CHECK: sra $17, $17, 15       # encoding: [0x00,0x11,0x8b,0xc3]
+        sra       $s1,$s7,15           # CHECK: sra $17, $23, 15       # encoding: [0x00,0x17,0x8b,0xc3]
+        sra       $s1,$s7,$sp          # CHECK: srav $17, $23, $sp     # encoding: [0x03,0xb7,0x88,0x07]
+        srav      $s1,$s7,$sp          # CHECK: srav $17, $23, $sp     # encoding: [0x03,0xb7,0x88,0x07]
+        srl       $2,7                 # CHECK: srl $2, $2, 7          # encoding: [0x00,0x02,0x11,0xc2]
+        srl       $2,$2,7              # CHECK: srl $2, $2, 7          # encoding: [0x00,0x02,0x11,0xc2]
+        srl       $25,$s4,$a0          # CHECK: srlv $25, $20, $4      # encoding: [0x00,0x94,0xc8,0x06]
+        srlv      $25,$s4,$a0          # CHECK: srlv $25, $20, $4      # encoding: [0x00,0x94,0xc8,0x06]
+        ssnop                          # CHECK: ssnop                  # encoding: [0x00,0x00,0x00,0x40]
+        sub       $s6,$s3,$12
+        sub.d     $f18,$f3,$f17
+        sub.s     $f23,$f22,$f22
+        subu      $sp,$s6,$s6
+        sw        $ra,-10160($sp)
+        swc1      $f6,-8465($24)
+        swc2      $25,24880($s0)
+        swc3      $10,-32265($k0)
+        swl       $15,13694($s3)
+        swr       $s1,-26590($14)
+        tlbp                           # CHECK: tlbp                   # encoding: [0x42,0x00,0x00,0x08]
+        tlbr                           # CHECK: tlbr                   # encoding: [0x42,0x00,0x00,0x01]
+        tlbwi                          # CHECK: tlbwi                  # encoding: [0x42,0x00,0x00,0x02]
+        tlbwr                          # CHECK: tlbwr                  # encoding: [0x42,0x00,0x00,0x06]
+        xor       $s2,$a0,$s8
diff --git a/test/MC/Mips/mips2/invalid-mips3-wrong-error.s b/test/MC/Mips/mips2/invalid-mips3-wrong-error.s
new file mode 100644
index 0000000..a3f829b
--- /dev/null
+++ b/test/MC/Mips/mips2/invalid-mips3-wrong-error.s
@@ -0,0 +1,19 @@
+# Instructions that are invalid and are correctly rejected but use the wrong
+# error message at the moment.
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips2 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        dmult     $s7,$a5           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        dsub      $a3,$s6,$a4       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        ld        $sp,-28645($s1)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        ldl       $t8,-4167($t8)    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        ldr       $t2,-30358($s4)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lld       $zero,-14736($ra) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lwu       $s3,-24086($v1)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        scd       $t3,-8243($sp)    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        sd        $t0,5835($a6)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        sdl       $a3,-20961($s8)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        sdr       $a7,-20423($t0)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
diff --git a/test/MC/Mips/mips2/invalid-mips3.s b/test/MC/Mips/mips2/invalid-mips3.s
new file mode 100644
index 0000000..ef498d7
--- /dev/null
+++ b/test/MC/Mips/mips2/invalid-mips3.s
@@ -0,0 +1,48 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips2 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+        .set noat
+        ceil.l.d   $f1,$f3           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ceil.l.s   $f18,$f13         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        cvt.d.l    $f4,$f16          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        cvt.l.d    $f24,$f15         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        cvt.l.s    $f11,$f29         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        cvt.s.l    $f15,$f30         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dadd       $s3,$at,$ra       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        daddi      $sp,$s4,-27705    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        daddiu     $k0,$s6,-4586     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        daddu      $s3,$at,$ra       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ddiv       $zero,$k0,$s3     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ddivu      $zero,$s0,$s1     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dmfc1      $t0,$f13          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dmtc1      $s0,$f14          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dmultu     $a1,$a2           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsll       $zero,18          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsll       $zero,$s4,18      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsll       $zero,$s4,$t0     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsll32     $zero,18          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsll32     $zero,$zero,18    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsllv      $zero,$s4,$t0     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsra       $gp,10            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsra       $gp,$s2,10        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsra       $gp,$s2,$s3       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsra32     $gp,10            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsra32     $gp,$s2,10        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsrav      $gp,$s2,$s3       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsrl       $s3,23            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsrl       $s3,$6,23         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsrl       $s3,$6,$s4        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsrl32     $s3,23            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsrl32     $s3,$6,23         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsrlv      $s3,$t2,$s4       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsubu      $a1,$a1,$k0       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        eret                         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        floor.l.d  $f26,$f7          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        floor.l.s  $f12,$f5          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        round.l.d  $f12,$f1          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        round.l.s  $f25,$f5          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        trunc.l.d   $f23,$f23        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        trunc.l.s   $f28,$f31        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips2/invalid-mips32.s b/test/MC/Mips/mips2/invalid-mips32.s
new file mode 100644
index 0000000..2975c68
--- /dev/null
+++ b/test/MC/Mips/mips2/invalid-mips32.s
@@ -0,0 +1,32 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips2 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        clo       $11,$a1         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        clz       $sp,$gp         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        deret                     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        eret                      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        madd      $s6,$13         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        madd      $zero,$9        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        maddu     $s3,$gp         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        maddu     $24,$s2         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mfc0      $a2,$14,1       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf      $gp,$8,$fcc7    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf.d    $f6,$f11,$fcc5  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf.s    $f23,$f5,$fcc6  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movn      $v1,$s1,$s0     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movn.d    $f27,$f21,$k0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movn.s    $f12,$f0,$s7    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt      $zero,$s4,$fcc5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt.d    $f0,$f2,$fcc0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt.s    $f30,$f2,$fcc1  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movz      $a1,$s6,$9      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movz.d    $f12,$f29,$9    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movz.s    $f25,$f7,$v1    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        msub      $s7,$k1         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        msubu     $15,$a1         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mtc0      $9,$29,3        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mul       $s0,$s4,$at     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips2/invalid-mips32r2-xfail.s b/test/MC/Mips/mips2/invalid-mips32r2-xfail.s
new file mode 100644
index 0000000..073f777
--- /dev/null
+++ b/test/MC/Mips/mips2/invalid-mips32r2-xfail.s
@@ -0,0 +1,11 @@
+# Instructions that are supposed to be invalid but currently aren't
+# This test will XPASS if any insn stops assembling.
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips2 \
+# RUN:     2> %t1
+# RUN: not FileCheck %s < %t1
+# XFAIL: *
+
+# CHECK-NOT: error
+        .set noat
+        rdhwr   $sp,$11
diff --git a/test/MC/Mips/mips2/invalid-mips32r2.s b/test/MC/Mips/mips2/invalid-mips32r2.s
new file mode 100644
index 0000000..37f2eed
--- /dev/null
+++ b/test/MC/Mips/mips2/invalid-mips32r2.s
@@ -0,0 +1,59 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips2 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        clo     $t3,$a1             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        clz     $sp,$gp             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        cvt.l.d $f24,$f15           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        cvt.l.s $f11,$f29           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        deret                       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        di      $s8                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ei      $t6                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        eret                        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ldxc1   $f8,$s7($t7)        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        luxc1   $f19,$s6($s5)       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        lwxc1   $f12,$s1($s8)       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        madd    $s6,$t5             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        madd    $zero,$t1           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        madd.d  $f18,$f19,$f26,$f20 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        madd.s  $f1,$f31,$f19,$f25  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        maddu   $s3,$gp             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        maddu   $t8,$s2             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mfc0    $a2,$14,1           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mfhc1   $s8,$f24            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf    $gp,$t0,$fcc7       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf.d  $f6,$f11,$fcc5      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf.s  $f23,$f5,$fcc6      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movn    $v1,$s1,$s0         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movn.d  $f27,$f21,$k0       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movn.s  $f12,$f0,$s7        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt    $zero,$s4,$fcc5     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt.d  $f0,$f2,$fcc0       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt.s  $f30,$f2,$fcc1      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movz    $a1,$s6,$t1         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movz.d  $f12,$f29,$t1       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movz.s  $f25,$f7,$v1        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        msub    $s7,$k1             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        msub.d  $f10,$f1,$f31,$f18  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        msub.s  $f12,$f19,$f10,$f16 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        msubu   $t7,$a1             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mtc0    $t1,$29,3           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mthc1   $zero,$f16          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mul     $s0,$s4,$at         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        nmadd.d $f18,$f9,$f14,$f19  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        nmadd.s $f0,$f5,$f25,$f12   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        nmsub.d $f30,$f8,$f16,$f30  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        nmsub.s $f1,$f24,$f19,$f4   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        pause                       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        rotr    $1,15               # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        rotr    $1,$14,15           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        rotrv   $1,$14,$15          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        sdxc1   $f11,$t2($t6)       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        seb     $t9,$t7             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        seh     $v1,$t4             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        suxc1   $f12,$k1($t5)       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        swxc1   $f19,$t4($k0)       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        wsbh    $k1,$t1             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips2/invalid-mips4-wrong-error.s b/test/MC/Mips/mips2/invalid-mips4-wrong-error.s
new file mode 100644
index 0000000..193f6d7
--- /dev/null
+++ b/test/MC/Mips/mips2/invalid-mips4-wrong-error.s
@@ -0,0 +1,14 @@
+# Instructions that are invalid and are correctly rejected but use the wrong
+# error message at the moment.
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips2 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        ld        $sp,-28645($s1) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lwu       $s3,-24086($v1) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        scd       $15,-8243($sp)  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        sd        $12,5835($10)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        sdl       $a3,-20961($s8) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        sdr       $11,-20423($12) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
diff --git a/test/MC/Mips/mips2/invalid-mips4.s b/test/MC/Mips/mips2/invalid-mips4.s
new file mode 100644
index 0000000..e2eb672
--- /dev/null
+++ b/test/MC/Mips/mips2/invalid-mips4.s
@@ -0,0 +1,65 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips2 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        ceil.l.d  $f1,$f3         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ceil.l.s  $f18,$f13       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        cvt.d.l   $f4,$f16        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        cvt.l.d   $f24,$f15       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        cvt.l.s   $f11,$f29       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        cvt.s.l   $f15,$f30       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dadd      $s3,$at,$ra     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        daddi     $sp,$s4,-27705  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        daddiu    $k0,$s6,-4586   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        daddu     $s3,$at,$ra     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ddiv      $zero,$k0,$s3   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ddivu     $zero,$s0,$s1   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dmfc1     $12,$f13        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dmtc1     $s0,$f14        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dmult     $s7,$9          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dmultu    $a1,$a2         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsll      $zero,$s4,$12   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsll      $zero,$s4,18    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsll      $zero,18        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsll32    $zero,$zero,18  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsll32    $zero,18        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsllv     $zero,$s4,$12   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsra      $gp,$s2,$s3     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsra      $gp,$s2,10      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsra      $gp,10          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsra32    $gp,$s2,10      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsra32    $gp,10          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsrav     $gp,$s2,$s3     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsrl      $s3,$6,$s4      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsrl      $s3,$6,23       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsrl      $s3,23          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsrl32    $s3,$6,23       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsrl32    $s3,23          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsrlv     $s3,$14,$s4     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsub      $a3,$s6,$8      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsubu     $a1,$a1,$k0     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        eret                      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        floor.l.d $f26,$f7        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        floor.l.s $f12,$f5        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ldxc1     $f8,$s7($15)    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        lwxc1     $f12,$s1($s8)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf      $gp,$8,$fcc7    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf.d    $f6,$f11,$fcc5  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf.s    $f23,$f5,$fcc6  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movn      $v1,$s1,$s0     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movn.d    $f27,$f21,$k0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movn.s    $f12,$f0,$s7    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt      $zero,$s4,$fcc5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt.d    $f0,$f2,$fcc0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt.s    $f30,$f2,$fcc1  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movz      $a1,$s6,$9      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movz.d    $f12,$f29,$9    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movz.s    $f25,$f7,$v1    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        round.l.d $f12,$f1        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        round.l.s $f25,$f5        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        sdxc1     $f11,$10($14)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        trunc.l.d $f23,$f23       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        trunc.l.s $f28,$f31       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips2/invalid-mips5-wrong-error.s b/test/MC/Mips/mips2/invalid-mips5-wrong-error.s
new file mode 100644
index 0000000..0c58c6c
--- /dev/null
+++ b/test/MC/Mips/mips2/invalid-mips5-wrong-error.s
@@ -0,0 +1,46 @@
+# Instructions that are invalid and are correctly rejected but use the wrong
+# error message at the moment.
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips2 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        abs.ps    $f22,$f8            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        add.ps    $f25,$f27,$f13      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        alnv.ps   $f12,$f18,$f30,$t0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.eq.ps   $fcc5,$f0,$f9       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.f.ps    $fcc6,$f11,$f11     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.le.ps   $fcc1,$f7,$f20      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.lt.ps   $f19,$f5            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.nge.ps  $f1,$f26            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.ngl.ps  $f21,$f30           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.ngle.ps $fcc7,$f12,$f20     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.ngt.ps  $fcc5,$f30,$f6      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.ole.ps  $fcc7,$f21,$f8      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.olt.ps  $fcc3,$f7,$f16      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.seq.ps  $fcc6,$f31,$f14     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.sf.ps   $fcc6,$f4,$f6       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.ueq.ps  $fcc1,$f5,$f29      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.ule.ps  $fcc6,$f17,$f3      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.ult.ps  $fcc7,$f14,$f0      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.un.ps   $fcc4,$f2,$f26      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        cvt.ps.s  $f3,$f18,$f19       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        cvt.s.pl  $f30,$f1            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        cvt.s.pu  $f14,$f25           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        madd.ps   $f22,$f3,$f14,$f3   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        mov.ps    $f22,$f17           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        movf.ps   $f10,$f28,$fcc6     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        movn.ps   $f31,$f31,$s3       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        movt.ps   $f20,$f25,$fcc2     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        movz.ps   $f18,$f17,$ra       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        msub.ps   $f12,$f14,$f29,$f17 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        mul.ps    $f14,$f0,$f16       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        neg.ps    $f19,$f13           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        nmadd.ps  $f27,$f4,$f9,$f25   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        nmsub.ps  $f6,$f12,$f14,$f17  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        pll.ps    $f25,$f9,$f30       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        plu.ps    $f1,$f26,$f29       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        pul.ps    $f9,$f30,$f26       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        puu.ps    $f24,$f9,$f2        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        sub.ps    $f5,$f14,$f26       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
diff --git a/test/MC/Mips/mips2/invalid-mips5.s b/test/MC/Mips/mips2/invalid-mips5.s
new file mode 100644
index 0000000..f777ffe
--- /dev/null
+++ b/test/MC/Mips/mips2/invalid-mips5.s
@@ -0,0 +1,66 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips2 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        ceil.l.d  $f1,$f3         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ceil.l.s  $f18,$f13       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        cvt.d.l   $f4,$f16        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        cvt.l.d   $f24,$f15       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        cvt.l.s   $f11,$f29       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        cvt.s.l   $f15,$f30       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dadd      $s3,$at,$ra     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        daddi     $sp,$s4,-27705  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        daddiu    $k0,$s6,-4586   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        daddu     $s3,$at,$ra     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ddiv      $zero,$k0,$s3   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ddivu     $zero,$s0,$s1   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dmfc1     $t0,$f13        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dmtc1     $s0,$f14        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dmultu    $a1,$a2         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsll      $zero,18        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsll      $zero,$s4,18    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsll      $zero,$s4,$t0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsll32    $zero,18        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsll32    $zero,$zero,18  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsllv     $zero,$s4,$t0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsra      $gp,10          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsra      $gp,$s2,10      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsra      $gp,$s2,$s3     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsra32    $gp,10          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsra32    $gp,$s2,10      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsrav     $gp,$s2,$s3     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsrl      $s3,23          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsrl      $s3,$6,23       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsrl      $s3,$6,$s4      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsrl32    $s3,23          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsrl32    $s3,$6,23       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsrlv     $s3,$t2,$s4     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsubu     $a1,$a1,$k0     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        eret                      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        floor.l.d $f26,$f7        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        floor.l.s $f12,$f5        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ldxc1     $f8,$s7($t3)    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        luxc1     $f19,$s6($s5)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        lwxc1     $f12,$s1($s8)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf      $gp,$a0,$fcc7   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf.d    $f6,$f11,$fcc5  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf.s    $f23,$f5,$fcc6  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movn      $v1,$s1,$s0     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movn.d    $f27,$f21,$k0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movn.s    $f12,$f0,$s7    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt      $zero,$s4,$fcc5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt.d    $f0,$f2,$fcc0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt.s    $f30,$f2,$fcc1  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movz      $a1,$s6,$a1     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movz.d    $f12,$f29,$a1   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movz.s    $f25,$f7,$v1    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        round.l.d $f12,$f1        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        round.l.s $f25,$f5        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        trunc.l.d $f23,$f23       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        trunc.l.s $f28,$f31       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        sdxc1     $f11,$a2($t2)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        suxc1     $f12,$k1($t1)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        swxc1     $f19,$t0($k0)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips2/valid-xfail.s b/test/MC/Mips/mips2/valid-xfail.s
deleted file mode 100644
index 2f82f5c..0000000
--- a/test/MC/Mips/mips2/valid-xfail.s
+++ /dev/null
@@ -1,17 +0,0 @@
-# Instructions that should be valid but currently fail for known reasons (e.g.
-# they aren't implemented yet).
-# This test is set up to XPASS if any instruction generates an encoding.
-#
-# FIXME: Test MIPS-II instead of MIPS32
-# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32 | not FileCheck %s
-# CHECK-NOT: encoding
-# XFAIL: *
-
-	.set noat
-	ldc3	$29,-28645($s1)
-	lwc3	$10,-32265($k0)
-	sdc3	$12,5835($t2)
-	tlbp
-	tlbr
-	tlbwi
-	tlbwr
diff --git a/test/MC/Mips/mips2/valid.s b/test/MC/Mips/mips2/valid.s
index 1a05040..e3effde 100644
--- a/test/MC/Mips/mips2/valid.s
+++ b/test/MC/Mips/mips2/valid.s
@@ -1,107 +1,126 @@
 # Instructions that are valid
 #
-# FIXME: Test MIPS-II instead of MIPS32
-# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32 | FileCheck %s
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips2 | FileCheck %s
 
-	.set noat
-	abs.d	$f7,$f25            # CHECK: encoding
-	abs.s	$f9,$f16
-	add	$s7,$s2,$a1
-	add.d	$f1,$f7,$f29
-	add.s	$f8,$f21,$f24
-	addi	$t5,$t1,26322
-	addu	$t1,$a0,$a2
-	and	$s7,$v0,$t4
-	c.ngl.d	$f29,$f29
-	c.ngle.d	$f0,$f16
-	c.sf.d	$f30,$f0
-	c.sf.s	$f14,$f22
-	ceil.w.d	$f11,$f25
-	ceil.w.s	$f6,$f20
-	cfc1	$s1,$21
-	ctc1	$a2,$26
-	cvt.d.s	$f22,$f28
-	cvt.d.w	$f26,$f11
-	cvt.s.d	$f26,$f8
-	cvt.s.w	$f22,$f15
-	cvt.w.d	$f20,$f14
-	cvt.w.s	$f20,$f24
-	div	$zero,$t9,$t3
-	div.d	$f29,$f20,$f27
-	div.s	$f4,$f5,$f15
-	divu	$zero,$t9,$t7
-	ehb                      # CHECK: ehb # encoding:  [0x00,0x00,0x00,0xc0]
-	floor.w.d	$f14,$f11
-	floor.w.s	$f8,$f9
-	lb	$t8,-14515($t2)
-	lbu	$t0,30195($v1)
-	ldc1	$f11,16391($s0)
-	ldc2	$8,-21181($at)
-	lh	$t3,-8556($s5)
-	lhu	$s3,-22851($v0)
-	li	$at,-29773
-	li	$zero,-29889
-	ll	$v0,-7321($s2)
-	lw	$t0,5674($a1)
-	lwc1	$f16,10225($k0)
-	lwc2	$18,-841($a2)
-	lwl	$s4,-4231($t7)
-	lwr	$zero,-19147($gp)
-	mfc1	$a3,$f27
-	mfhi	$s3
-	mfhi	$sp
-	mflo	$s1
-	mov.d	$f20,$f14
-	mov.s	$f2,$f27
-	move	$s8,$a0
-	move	$t9,$a2
-	mtc1	$s8,$f9
-	mthi	$s1
-	mtlo	$sp
-	mtlo	$t9
-	mul.d	$f20,$f20,$f16
-	mul.s	$f30,$f10,$f2
-	mult	$sp,$s4
-	mult	$sp,$v0
-	multu	$gp,$k0
-	multu	$t1,$s2
-	neg.d	$f27,$f18
-	neg.s	$f1,$f15
-	nop
-	nor	$a3,$zero,$a3
-	or	$t4,$s0,$sp
-	round.w.d	$f6,$f4
-	round.w.s	$f27,$f28
-	sb	$s6,-19857($t6)
-	sc	$t7,18904($s3)
-	sdc1	$f31,30574($t5)
-	sdc2	$20,23157($s2)
-	sh	$t6,-6704($t7)
-	sllv	$a3,$zero,$t1
-	slt	$s7,$t3,$k1
-	slti	$s1,$t2,9489
-	sltiu	$t9,$t9,-15531
-	sltu	$s4,$s5,$t3
-	sqrt.d	$f17,$f22
-	sqrt.s	$f0,$f1
-	srav	$s1,$s7,$sp
-	srlv	$t9,$s4,$a0
-	ssnop                    # CHECK: ssnop # encoding:  [0x00,0x00,0x00,0x40]
-	sub	$s6,$s3,$t4
-	sub.d	$f18,$f3,$f17
-	sub.s	$f23,$f22,$f22
-	subu	$sp,$s6,$s6
-	sw	$ra,-10160($sp)
-	swc1	$f6,-8465($t8)
-	swc2	$25,24880($s0)
-	swl	$t7,13694($s3)
-	swr	$s1,-26590($t6)
-	teqi	$s5,-17504
-	tgei	$s1,5025
-	tgeiu	$sp,-28621
-	tlti	$t6,-21059
-	tltiu	$ra,-5076
-	tnei	$t4,-29647
-	trunc.w.d	$f22,$f15
-	trunc.w.s	$f28,$f30
-	xor	$s2,$a0,$s8
+        .set noat
+        abs.d     $f7,$f25             # CHECK: encoding:
+        abs.s     $f9,$f16
+        add       $s7,$s2,$a1
+        add.d     $f1,$f7,$f29
+        add.s     $f8,$f21,$f24
+        addi      $13,$9,26322
+        addu      $9,$a0,$a2
+        and       $s7,$v0,$12
+        c.ngl.d   $f29,$f29
+        c.ngle.d  $f0,$f16
+        c.sf.d    $f30,$f0
+        c.sf.s    $f14,$f22
+        ceil.w.d  $f11,$f25
+        ceil.w.s  $f6,$f20
+        cfc1      $s1,$21
+        ctc1      $a2,$26
+        cvt.d.s   $f22,$f28
+        cvt.d.w   $f26,$f11
+        cvt.s.d   $f26,$f8
+        cvt.s.w   $f22,$f15
+        cvt.w.d   $f20,$f14
+        cvt.w.s   $f20,$f24
+        div       $zero,$25,$11
+        div.d     $f29,$f20,$f27
+        div.s     $f4,$f5,$f15
+        divu      $zero,$25,$15
+        ehb                            # CHECK: ehb # encoding:  [0x00,0x00,0x00,0xc0]
+        floor.w.d $f14,$f11
+        floor.w.s $f8,$f9
+        lb        $24,-14515($10)
+        lbu       $8,30195($v1)
+        ldc1      $f11,16391($s0)
+        ldc2      $8,-21181($at)
+        ldc3      $29,-28645($s1)
+        lh        $11,-8556($s5)
+        lhu       $s3,-22851($v0)
+        li        $at,-29773
+        li        $zero,-29889
+        ll        $v0,-7321($s2)
+        lw        $8,5674($a1)
+        lwc1      $f16,10225($k0)
+        lwc2      $18,-841($a2)
+        lwc3      $10,-32265($k0)
+        lwl       $s4,-4231($15)
+        lwr       $zero,-19147($gp)
+        mfc1      $a3,$f27
+        mfhi      $s3
+        mfhi      $sp
+        mflo      $s1
+        mov.d     $f20,$f14
+        mov.s     $f2,$f27
+        move      $s8,$a0
+        move      $25,$a2
+        mtc1      $s8,$f9
+        mthi      $s1
+        mtlo      $sp
+        mtlo      $25
+        mul.d     $f20,$f20,$f16
+        mul.s     $f30,$f10,$f2
+        mult      $sp,$s4
+        mult      $sp,$v0
+        multu     $gp,$k0
+        multu     $9,$s2
+        negu      $2                   # CHECK: negu $2, $2            # encoding: [0x00,0x02,0x10,0x23]
+        negu      $2,$3                # CHECK: negu $2, $3            # encoding: [0x00,0x03,0x10,0x23]
+        neg.d     $f27,$f18
+        neg.s     $f1,$f15
+        nop
+        nor       $a3,$zero,$a3
+        or        $12,$s0,$sp
+        round.w.d $f6,$f4
+        round.w.s $f27,$f28
+        sb        $s6,-19857($14)
+        sc        $15,18904($s3)
+        sdc1      $f31,30574($13)
+        sdc2      $20,23157($s2)
+        sdc3      $12,5835($10)
+        sh        $14,-6704($15)
+        sll       $a3,18               # CHECK: sll $7, $7, 18         # encoding: [0x00,0x07,0x3c,0x80]
+        sll       $a3,$zero,18         # CHECK: sll $7, $zero, 18      # encoding: [0x00,0x00,0x3c,0x80]
+        sll       $a3,$zero,$9         # CHECK: sllv $7, $zero, $9     # encoding: [0x01,0x20,0x38,0x04]
+        sllv      $a3,$zero,$9         # CHECK: sllv $7, $zero, $9     # encoding: [0x01,0x20,0x38,0x04]
+        slt       $s7,$11,$k1          # CHECK: slt $23, $11, $27      # encoding: [0x01,0x7b,0xb8,0x2a]
+        slti      $s1,$10,9489         # CHECK: slti $17, $10, 9489    # encoding: [0x29,0x51,0x25,0x11]
+        sltiu     $25,$25,-15531       # CHECK: sltiu $25, $25, -15531 # encoding: [0x2f,0x39,0xc3,0x55]
+        sltu      $s4,$s5,$11          # CHECK: sltu  $20, $21, $11    # encoding: [0x02,0xab,0xa0,0x2b]
+        sltu      $24,$25,-15531       # CHECK: sltiu $24, $25, -15531 # encoding: [0x2f,0x38,0xc3,0x55]
+        sqrt.d    $f17,$f22
+        sqrt.s    $f0,$f1
+        sra       $s1,15               # CHECK: sra $17, $17, 15       # encoding: [0x00,0x11,0x8b,0xc3]
+        sra       $s1,$s7,15           # CHECK: sra $17, $23, 15       # encoding: [0x00,0x17,0x8b,0xc3]
+        sra       $s1,$s7,$sp          # CHECK: srav $17, $23, $sp     # encoding: [0x03,0xb7,0x88,0x07]
+        srav      $s1,$s7,$sp          # CHECK: srav $17, $23, $sp     # encoding: [0x03,0xb7,0x88,0x07]
+        srl       $2,7                 # CHECK: srl $2, $2, 7          # encoding: [0x00,0x02,0x11,0xc2]
+        srl       $2,$2,7              # CHECK: srl $2, $2, 7          # encoding: [0x00,0x02,0x11,0xc2]
+        srl       $25,$s4,$a0          # CHECK: srlv $25, $20, $4      # encoding: [0x00,0x94,0xc8,0x06]
+        srlv      $25,$s4,$a0          # CHECK: srlv $25, $20, $4      # encoding: [0x00,0x94,0xc8,0x06]
+        ssnop                          # CHECK: ssnop                  # encoding: [0x00,0x00,0x00,0x40]
+        sub       $s6,$s3,$12
+        sub.d     $f18,$f3,$f17
+        sub.s     $f23,$f22,$f22
+        subu      $sp,$s6,$s6
+        sw        $ra,-10160($sp)
+        swc1      $f6,-8465($24)
+        swc2      $25,24880($s0)
+        swc3      $10,-32265($k0)
+        swl       $15,13694($s3)
+        swr       $s1,-26590($14)
+        teqi      $s5,-17504
+        tgei      $s1,5025
+        tgeiu     $sp,-28621
+        tlbp                           # CHECK: tlbp                   # encoding: [0x42,0x00,0x00,0x08]
+        tlbr                           # CHECK: tlbr                   # encoding: [0x42,0x00,0x00,0x01]
+        tlbwi                          # CHECK: tlbwi                  # encoding: [0x42,0x00,0x00,0x02]
+        tlbwr                          # CHECK: tlbwr                  # encoding: [0x42,0x00,0x00,0x06]
+        tlti      $14,-21059
+        tltiu     $ra,-5076
+        tnei      $12,-29647
+        trunc.w.d $f22,$f15
+        trunc.w.s $f28,$f30
+        xor       $s2,$a0,$s8
diff --git a/test/MC/Mips/mips3/invalid-mips4.s b/test/MC/Mips/mips3/invalid-mips4.s
new file mode 100644
index 0000000..6e15d79
--- /dev/null
+++ b/test/MC/Mips/mips3/invalid-mips4.s
@@ -0,0 +1,23 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips3 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        ldxc1     $f8,$s7($15)    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        lwxc1     $f12,$s1($s8)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf      $gp,$8,$fcc7   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf.d    $f6,$f11,$fcc5  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf.s    $f23,$f5,$fcc6  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movn      $v1,$s1,$s0     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movn.d    $f27,$f21,$k0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movn.s    $f12,$f0,$s7    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt      $zero,$s4,$fcc5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt.d    $f0,$f2,$fcc0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt.s    $f30,$f2,$fcc1  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movz      $a1,$s6,$9     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movz.d    $f12,$f29,$9   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movz.s    $f25,$f7,$v1    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        sdxc1     $f11,$10($14)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        swxc1     $f19,$12($k0)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips3/invalid-mips5-wrong-error.s b/test/MC/Mips/mips3/invalid-mips5-wrong-error.s
new file mode 100644
index 0000000..2c0246a
--- /dev/null
+++ b/test/MC/Mips/mips3/invalid-mips5-wrong-error.s
@@ -0,0 +1,46 @@
+# Instructions that are invalid and are correctly rejected but use the wrong
+# error message at the moment.
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips3 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        abs.ps    $f22,$f8            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        add.ps    $f25,$f27,$f13      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        alnv.ps   $f12,$f18,$f30,$t0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.eq.ps   $fcc5,$f0,$f9       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.f.ps    $fcc6,$f11,$f11     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.le.ps   $fcc1,$f7,$f20      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.lt.ps   $f19,$f5            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.nge.ps  $f1,$f26            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.ngl.ps  $f21,$f30           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.ngle.ps $fcc7,$f12,$f20     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.ngt.ps  $fcc5,$f30,$f6      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.ole.ps  $fcc7,$f21,$f8      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.olt.ps  $fcc3,$f7,$f16      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.seq.ps  $fcc6,$f31,$f14     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.sf.ps   $fcc6,$f4,$f6       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.ueq.ps  $fcc1,$f5,$f29      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.ule.ps  $fcc6,$f17,$f3      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.ult.ps  $fcc7,$f14,$f0      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.un.ps   $fcc4,$f2,$f26      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        cvt.ps.s  $f3,$f18,$f19       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        cvt.s.pl  $f30,$f1            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        cvt.s.pu  $f14,$f25           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        madd.ps   $f22,$f3,$f14,$f3   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        mov.ps    $f22,$f17           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        movf.ps   $f10,$f28,$fcc6     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        movn.ps   $f31,$f31,$s3       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        movt.ps   $f20,$f25,$fcc2     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        movz.ps   $f18,$f17,$ra       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        msub.ps   $f12,$f14,$f29,$f17 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        mul.ps    $f14,$f0,$f16       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        neg.ps    $f19,$f13           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        nmadd.ps  $f27,$f4,$f9,$f25   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        nmsub.ps  $f6,$f12,$f14,$f17  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        pll.ps    $f25,$f9,$f30       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        plu.ps    $f1,$f26,$f29       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        pul.ps    $f9,$f30,$f26       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        puu.ps    $f24,$f9,$f2        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        sub.ps    $f5,$f14,$f26       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
diff --git a/test/MC/Mips/mips3/invalid-mips5.s b/test/MC/Mips/mips3/invalid-mips5.s
new file mode 100644
index 0000000..d25621b
--- /dev/null
+++ b/test/MC/Mips/mips3/invalid-mips5.s
@@ -0,0 +1,25 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips3 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        ldxc1     $f8,$s7($t3)    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        luxc1     $f19,$s6($s5)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        lwxc1     $f12,$s1($s8)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf      $gp,$a4,$fcc7   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf.d    $f6,$f11,$fcc5  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movf.s    $f23,$f5,$fcc6  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movn      $v1,$s1,$s0     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movn.d    $f27,$f21,$k0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movn.s    $f12,$f0,$s7    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt      $zero,$s4,$fcc5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt.d    $f0,$f2,$fcc0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movt.s    $f30,$f2,$fcc1  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movz      $a1,$s6,$a5     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movz.d    $f12,$f29,$a5   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        movz.s    $f25,$f7,$v1    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        sdxc1     $f11,$a6($t2)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        suxc1     $f12,$k1($t1)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        swxc1     $f19,$t0($k0)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips3/valid-xfail.s b/test/MC/Mips/mips3/valid-xfail.s
deleted file mode 100644
index 740663e..0000000
--- a/test/MC/Mips/mips3/valid-xfail.s
+++ /dev/null
@@ -1,15 +0,0 @@
-# Instructions that should be valid but currently fail for known reasons (e.g.
-# they aren't implemented yet).
-# This test is set up to XPASS if any instruction generates an encoding.
-#
-# FIXME: Test MIPS-III instead of MIPS64
-# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips64   | not FileCheck %s
-# CHECK-NOT: encoding
-# XFAIL: *
-
-	.set noat
-	lwc3	$10,-32265($k0)
-	tlbp
-	tlbr
-	tlbwi
-	tlbwr
diff --git a/test/MC/Mips/mips3/valid.s b/test/MC/Mips/mips3/valid.s
index dc9b48c..2067666 100644
--- a/test/MC/Mips/mips3/valid.s
+++ b/test/MC/Mips/mips3/valid.s
@@ -1,145 +1,176 @@
 # Instructions that are valid
 #
-# FIXME: Test MIPS-III instead of MIPS64
-# RUN: llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64   | FileCheck %s
+# RUN: llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips3 | FileCheck %s
 
-	.set noat
-	abs.d	$f7,$f25 # CHECK:encoding
-	abs.s	$f9,$f16
-	add	$s7,$s2,$a1
-	add.d	$f1,$f7,$f29
-	add.s	$f8,$f21,$f24
-	addi	$t5,$t1,26322
-	addu	$t1,$a0,$a2
-	and	$s7,$v0,$t4
-	c.ngl.d	$f29,$f29
-	c.ngle.d	$f0,$f16
-	c.sf.d	$f30,$f0
-	c.sf.s	$f14,$f22
-	ceil.l.d	$f1,$f3
-	ceil.l.s	$f18,$f13
-	ceil.w.d	$f11,$f25
-	ceil.w.s	$f6,$f20
-	cfc1	$s1,$21
-	ctc1	$a2,$26
-	cvt.d.l	$f4,$f16
-	cvt.d.s	$f22,$f28
-	cvt.d.w	$f26,$f11
-	cvt.l.d	$f24,$f15
-	cvt.l.s	$f11,$f29
-	cvt.s.d	$f26,$f8
-	cvt.s.l	$f15,$f30
-	cvt.s.w	$f22,$f15
-	cvt.w.d	$f20,$f14
-	cvt.w.s	$f20,$f24
-	dadd	$s3,$at,$ra
-	daddi	$sp,$s4,-27705
-	daddiu	$k0,$s6,-4586
-	ddiv	$zero,$k0,$s3
-	ddivu	$zero,$s0,$s1
-	div	$zero,$t9,$t3
-	div.d	$f29,$f20,$f27
-	div.s	$f4,$f5,$f15
-	divu	$zero,$t9,$t7
-	dmfc1	$t4,$f13
-	dmtc1	$s0,$f14
-	dmult	$s7,$t1
-	dmultu	$a1,$a2
-	dsllv	$zero,$s4,$t4
-	dsrav	$gp,$s2,$s3
-	dsrlv	$s3,$t6,$s4
-	dsub	$a3,$s6,$t0
-	dsubu	$a1,$a1,$k0
-	ehb                      # CHECK: ehb # encoding:  [0x00,0x00,0x00,0xc0]
-	eret
-	floor.l.d	$f26,$f7
-	floor.l.s	$f12,$f5
-	floor.w.d	$f14,$f11
-	floor.w.s	$f8,$f9
-	lb	$t8,-14515($t2)
-	lbu	$t0,30195($v1)
-	ld	$sp,-28645($s1)
-	ldc1	$f11,16391($s0)
-	ldc2	$8,-21181($at)
-	ldl	$t8,-4167($t8)
-	ldr	$t6,-30358($s4)
-	lh	$t3,-8556($s5)
-	lhu	$s3,-22851($v0)
-	li	$at,-29773
-	li	$zero,-29889
-	ll	$v0,-7321($s2)
-	lld	$zero,-14736($ra)
-	lw	$t0,5674($a1)
-	lwc1	$f16,10225($k0)
-	lwc2	$18,-841($a2)
-	lwl	$s4,-4231($t7)
-	lwr	$zero,-19147($gp)
-	lwu	$s3,-24086($v1)
-	mfc1	$a3,$f27
-	mfhi	$s3
-	mfhi	$sp
-	mflo	$s1
-	mov.d	$f20,$f14
-	mov.s	$f2,$f27
-	move	$a0,$a3
-	move	$s5,$a0
-	move	$s8,$a0
-	move	$t9,$a2
-	mtc1	$s8,$f9
-	mthi	$s1
-	mtlo	$sp
-	mtlo	$t9
-	mul.d	$f20,$f20,$f16
-	mul.s	$f30,$f10,$f2
-	mult	$sp,$s4
-	mult	$sp,$v0
-	multu	$gp,$k0
-	multu	$t1,$s2
-	neg.d	$f27,$f18
-	neg.s	$f1,$f15
-	nop
-	nor	$a3,$zero,$a3
-	or	$t4,$s0,$sp
-	round.l.d	$f12,$f1
-	round.l.s	$f25,$f5
-	round.w.d	$f6,$f4
-	round.w.s	$f27,$f28
-	sb	$s6,-19857($t6)
-	sc	$t7,18904($s3)
-	scd	$t7,-8243($sp)
-	sd	$t4,5835($t2)
-	sdc1	$f31,30574($t5)
-	sdc2	$20,23157($s2)
-	sdl	$a3,-20961($s8)
-	sdr	$t3,-20423($t4)
-	sh	$t6,-6704($t7)
-	sllv	$a3,$zero,$t1
-	slt	$s7,$t3,$k1
-	slti	$s1,$t2,9489
-	sltiu	$t9,$t9,-15531
-	sltu	$s4,$s5,$t3
-	sqrt.d	$f17,$f22
-	sqrt.s	$f0,$f1
-	srav	$s1,$s7,$sp
-	srlv	$t9,$s4,$a0
-	ssnop                    # CHECK: ssnop # encoding:  [0x00,0x00,0x00,0x40]
-	sub	$s6,$s3,$t4
-	sub.d	$f18,$f3,$f17
-	sub.s	$f23,$f22,$f22
-	subu	$sp,$s6,$s6
-	sw	$ra,-10160($sp)
-	swc1	$f6,-8465($t8)
-	swc2	$25,24880($s0)
-	swl	$t7,13694($s3)
-	swr	$s1,-26590($t6)
-	teqi	$s5,-17504
-	tgei	$s1,5025
-	tgeiu	$sp,-28621
-	tlti	$t6,-21059
-	tltiu	$ra,-5076
-	tnei	$t4,-29647
-	trunc.l.d	$f23,$f23
-	trunc.l.s	$f28,$f31
-	trunc.w.d	$f22,$f15
-	trunc.w.s	$f28,$f30
-	xor	$s2,$a0,$s8
+        .set noat
+        abs.d     $f7,$f25             # CHECK: encoding:
+        abs.s     $f9,$f16
+        add       $s7,$s2,$a1
+        add.d     $f1,$f7,$f29
+        add.s     $f8,$f21,$f24
+        addi      $13,$9,26322
+        addu      $9,$a0,$a2
+        and       $s7,$v0,$12
+        c.ngl.d   $f29,$f29
+        c.ngle.d  $f0,$f16
+        c.sf.d    $f30,$f0
+        c.sf.s    $f14,$f22
+        ceil.l.d  $f1,$f3
+        ceil.l.s  $f18,$f13
+        ceil.w.d  $f11,$f25
+        ceil.w.s  $f6,$f20
+        cfc1      $s1,$21
+        ctc1      $a2,$26
+        cvt.d.l   $f4,$f16
+        cvt.d.s   $f22,$f28
+        cvt.d.w   $f26,$f11
+        cvt.l.d   $f24,$f15
+        cvt.l.s   $f11,$f29
+        cvt.s.d   $f26,$f8
+        cvt.s.l   $f15,$f30
+        cvt.s.w   $f22,$f15
+        cvt.w.d   $f20,$f14
+        cvt.w.s   $f20,$f24
+        dadd      $s3,$at,$ra
+        daddi     $sp,$s4,-27705
+        daddiu    $k0,$s6,-4586
+        daddu     $s3,$at,$ra
+        ddiv      $zero,$k0,$s3
+        ddivu     $zero,$s0,$s1
+        div       $zero,$25,$11
+        div.d     $f29,$f20,$f27
+        div.s     $f4,$f5,$f15
+        divu      $zero,$25,$15
+        dmfc1     $12,$f13
+        dmtc1     $s0,$f14
+        dmult     $s7,$9
+        dmultu    $a1,$a2
+        dsll      $zero,18             # CHECK: dsll $zero, $zero, 18       # encoding: [0x00,0x00,0x04,0xb8]
+        dsll      $zero,$s4,18         # CHECK: dsll $zero, $20, 18         # encoding: [0x00,0x14,0x04,0xb8]
+        dsll      $zero,$s4,$12        # CHECK: dsllv $zero, $20, $12       # encoding: [0x01,0x94,0x00,0x14]
+        dsll32    $zero,18             # CHECK: dsll32 $zero, $zero, 18     # encoding: [0x00,0x00,0x04,0xbc]
+        dsll32    $zero,$zero,18       # CHECK: dsll32 $zero, $zero, 18     # encoding: [0x00,0x00,0x04,0xbc]
+        dsllv     $zero,$s4,$12        # CHECK: dsllv $zero, $20, $12       # encoding: [0x01,0x94,0x00,0x14]
+        dsra      $gp,10               # CHECK: dsra $gp, $gp, 10           # encoding: [0x00,0x1c,0xe2,0xbb]
+        dsra      $gp,$s2,10           # CHECK: dsra $gp, $18, 10           # encoding: [0x00,0x12,0xe2,0xbb]
+        dsra      $gp,$s2,$s3          # CHECK: dsrav $gp, $18, $19         # encoding: [0x02,0x72,0xe0,0x17]
+        dsra32    $gp,10               # CHECK: dsra32 $gp, $gp, 10         # encoding: [0x00,0x1c,0xe2,0xbf]
+        dsra32    $gp,$s2,10           # CHECK: dsra32 $gp, $18, 10         # encoding: [0x00,0x12,0xe2,0xbf]
+        dsrav     $gp,$s2,$s3          # CHECK: dsrav $gp, $18, $19         # encoding: [0x02,0x72,0xe0,0x17]
+        dsrl      $s3,23               # CHECK: dsrl $19, $19, 23           # encoding: [0x00,0x13,0x9d,0xfa]
+        dsrl      $s3,$6,23            # CHECK: dsrl $19, $6, 23            # encoding: [0x00,0x06,0x9d,0xfa]
+        dsrl      $s3,$6,$s4           # CHECK: dsrlv $19, $6, $20          # encoding: [0x02,0x86,0x98,0x16]
+        dsrl32    $s3,23               # CHECK: dsrl32 $19, $19, 23         # encoding: [0x00,0x13,0x9d,0xfe]
+        dsrl32    $s3,$6,23            # CHECK: dsrl32 $19, $6, 23          # encoding: [0x00,0x06,0x9d,0xfe]
+        dsrlv     $s3,$6,$s4           # CHECK: dsrlv $19, $6, $20          # encoding: [0x02,0x86,0x98,0x16]
+        dsub      $a3,$s6,$8
+        dsubu     $a1,$a1,$k0
+        ehb                            # CHECK: ehb # encoding:  [0x00,0x00,0x00,0xc0]
+        eret
+        floor.l.d $f26,$f7
+        floor.l.s $f12,$f5
+        floor.w.d $f14,$f11
+        floor.w.s $f8,$f9
+        lb        $24,-14515($10)
+        lbu       $8,30195($v1)
+        ld        $sp,-28645($s1)
+        ldc1      $f11,16391($s0)
+        ldc2      $8,-21181($at)
+        ldl       $24,-4167($24)
+        ldr       $14,-30358($s4)
+        lh        $11,-8556($s5)
+        lhu       $s3,-22851($v0)
+        li        $at,-29773
+        li        $zero,-29889
+        ll        $v0,-7321($s2)
+        lld       $zero,-14736($ra)
+        lw        $8,5674($a1)
+        lwc1      $f16,10225($k0)
+        lwc2      $18,-841($a2)
+        lwl       $s4,-4231($15)
+        lwr       $zero,-19147($gp)
+        lwu       $s3,-24086($v1)
+        mfc1      $a3,$f27
+        mfhi      $s3
+        mfhi      $sp
+        mflo      $s1
+        mov.d     $f20,$f14
+        mov.s     $f2,$f27
+        move      $a0,$a3
+        move      $s5,$a0
+        move      $s8,$a0
+        move      $25,$a2
+        mtc1      $s8,$f9
+        mthi      $s1
+        mtlo      $sp
+        mtlo      $25
+        mul.d     $f20,$f20,$f16
+        mul.s     $f30,$f10,$f2
+        mult      $sp,$s4
+        mult      $sp,$v0
+        multu     $gp,$k0
+        multu     $9,$s2
+        negu      $2                   # CHECK: negu $2, $2            # encoding: [0x00,0x02,0x10,0x23]
+        negu      $2,$3                # CHECK: negu $2, $3            # encoding: [0x00,0x03,0x10,0x23]
+        neg.d     $f27,$f18
+        neg.s     $f1,$f15
+        nop
+        nor       $a3,$zero,$a3
+        or        $12,$s0,$sp
+        round.l.d $f12,$f1
+        round.l.s $f25,$f5
+        round.w.d $f6,$f4
+        round.w.s $f27,$f28
+        sb        $s6,-19857($14)
+        sc        $15,18904($s3)
+        scd       $15,-8243($sp)
+        sd        $12,5835($10)
+        sdc1      $f31,30574($13)
+        sdc2      $20,23157($s2)
+        sdl       $a3,-20961($s8)
+        sdr       $11,-20423($12)
+        sh        $14,-6704($15)
+        sll       $a3,18               # CHECK: sll $7, $7, 18         # encoding: [0x00,0x07,0x3c,0x80]
+        sll       $a3,$zero,18         # CHECK: sll $7, $zero, 18      # encoding: [0x00,0x00,0x3c,0x80]
+        sll       $a3,$zero,$9         # CHECK: sllv $7, $zero, $9     # encoding: [0x01,0x20,0x38,0x04]
+        sllv      $a3,$zero,$9         # CHECK: sllv $7, $zero, $9     # encoding: [0x01,0x20,0x38,0x04]
+        slt       $s7,$11,$k1          # CHECK: slt $23, $11, $27      # encoding: [0x01,0x7b,0xb8,0x2a]
+        slti      $s1,$10,9489         # CHECK: slti $17, $10, 9489    # encoding: [0x29,0x51,0x25,0x11]
+        sltiu     $25,$25,-15531       # CHECK: sltiu $25, $25, -15531 # encoding: [0x2f,0x39,0xc3,0x55]
+        sltu      $s4,$s5,$11          # CHECK: sltu  $20, $21, $11    # encoding: [0x02,0xab,0xa0,0x2b]
+        sltu      $24,$25,-15531       # CHECK: sltiu $24, $25, -15531 # encoding: [0x2f,0x38,0xc3,0x55]
+        sqrt.d    $f17,$f22
+        sqrt.s    $f0,$f1
+        sra       $s1,15               # CHECK: sra $17, $17, 15       # encoding: [0x00,0x11,0x8b,0xc3]
+        sra       $s1,$s7,15           # CHECK: sra $17, $23, 15       # encoding: [0x00,0x17,0x8b,0xc3]
+        sra       $s1,$s7,$sp          # CHECK: srav $17, $23, $sp     # encoding: [0x03,0xb7,0x88,0x07]
+        srav      $s1,$s7,$sp          # CHECK: srav $17, $23, $sp     # encoding: [0x03,0xb7,0x88,0x07]
+        srl       $2,7                 # CHECK: srl $2, $2, 7          # encoding: [0x00,0x02,0x11,0xc2]
+        srl       $2,$2,7              # CHECK: srl $2, $2, 7          # encoding: [0x00,0x02,0x11,0xc2]
+        srl       $25,$s4,$a0          # CHECK: srlv $25, $20, $4      # encoding: [0x00,0x94,0xc8,0x06]
+        srlv      $25,$s4,$a0          # CHECK: srlv $25, $20, $4      # encoding: [0x00,0x94,0xc8,0x06]
+        ssnop                          # CHECK: ssnop                  # encoding: [0x00,0x00,0x00,0x40]
+        sub       $s6,$s3,$12
+        sub.d     $f18,$f3,$f17
+        sub.s     $f23,$f22,$f22
+        subu      $sp,$s6,$s6
+        sw        $ra,-10160($sp)
+        swc1      $f6,-8465($24)
+        swc2      $25,24880($s0)
+        swl       $15,13694($s3)
+        swr       $s1,-26590($14)
+        teqi      $s5,-17504
+        tgei      $s1,5025
+        tgeiu     $sp,-28621
+        tlbp                           # CHECK: tlbp                   # encoding: [0x42,0x00,0x00,0x08]
+        tlbr                           # CHECK: tlbr                   # encoding: [0x42,0x00,0x00,0x01]
+        tlbwi                          # CHECK: tlbwi                  # encoding: [0x42,0x00,0x00,0x02]
+        tlbwr                          # CHECK: tlbwr                  # encoding: [0x42,0x00,0x00,0x06]
+        tlti      $14,-21059
+        tltiu     $ra,-5076
+        tnei      $12,-29647
+        trunc.l.d $f23,$f23
+        trunc.l.s $f28,$f31
+        trunc.w.d $f22,$f15
+        trunc.w.s $f28,$f30
+        xor       $s2,$a0,$s8
diff --git a/test/MC/Mips/mips32/invalid-mips32r2-xfail.s b/test/MC/Mips/mips32/invalid-mips32r2-xfail.s
index 73fba94..604ddbf 100644
--- a/test/MC/Mips/mips32/invalid-mips32r2-xfail.s
+++ b/test/MC/Mips/mips32/invalid-mips32r2-xfail.s
@@ -8,12 +8,4 @@
 
 # CHECK-NOT: error
         .set noat
-        cvt.l.d $f24,$f15
-        cvt.l.s $f11,$f29
-        di      $s8
-        ei      $t6
-        luxc1   $f19,$s6($s5)
-        mfhc1   $s8,$f24
-        mthc1   $zero,$f16
         rdhwr   $sp,$11
-        suxc1   $f12,$k1($t5)
diff --git a/test/MC/Mips/mips32/invalid-mips32r2.s b/test/MC/Mips/mips32/invalid-mips32r2.s
index 881f7f1..fa6fe32 100644
--- a/test/MC/Mips/mips32/invalid-mips32r2.s
+++ b/test/MC/Mips/mips32/invalid-mips32r2.s
@@ -4,20 +4,31 @@
 # RUN:     2>%t1
 # RUN: FileCheck %s < %t1
 
-        .set noat
+	.set noat
+        cvt.l.d $f24,$f15           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        cvt.l.s $f11,$f29           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        di      $s8                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ei      $t6                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         ldxc1   $f8,$s7($t7)        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        luxc1   $f19,$s6($s5)       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         lwxc1   $f12,$s1($s8)       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         madd.d  $f18,$f19,$f26,$f20 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         madd.s  $f1,$f31,$f19,$f25  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mfhc1   $s8,$f24            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         msub.d  $f10,$f1,$f31,$f18  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         msub.s  $f12,$f19,$f10,$f16 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mthc1   $zero,$f16          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         nmadd.d $f18,$f9,$f14,$f19  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         nmadd.s $f0,$f5,$f25,$f12   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         nmsub.d $f30,$f8,$f16,$f30  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         nmsub.s $f1,$f24,$f19,$f4   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         pause                       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        rotr    $1,15               # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        rotr    $1,$14,15           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        rotrv   $1,$14,$15          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         sdxc1   $f11,$t2($t6)       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         seb     $t9,$t7             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         seh     $v1,$t4             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        suxc1   $f12,$k1($t5)       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         swxc1   $f19,$t4($k0)       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         wsbh    $k1,$t1             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips32/invalid-mips64.s b/test/MC/Mips/mips32/invalid-mips64.s
new file mode 100644
index 0000000..41040ed
--- /dev/null
+++ b/test/MC/Mips/mips32/invalid-mips64.s
@@ -0,0 +1,9 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips32 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+	dclo	$s2,$a2    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+	dclz	$s0,$t9    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips32/valid-xfail.s b/test/MC/Mips/mips32/valid-xfail.s
index 65cebd3..d680740 100644
--- a/test/MC/Mips/mips32/valid-xfail.s
+++ b/test/MC/Mips/mips32/valid-xfail.s
@@ -2,43 +2,37 @@
 # they aren't implemented yet).
 # This test is set up to XPASS if any instruction generates an encoding.
 #
-# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32   | not FileCheck %s
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32 | not FileCheck %s
 # CHECK-NOT: encoding
 # XFAIL: *
 
-	.set noat
-	c.eq.d	$fcc1,$f15,$f15
-	c.eq.s	$fcc5,$f24,$f17
-	c.f.d	$fcc4,$f11,$f21
-	c.f.s	$fcc4,$f30,$f7
-	c.le.d	$fcc4,$f18,$f1
-	c.le.s	$fcc6,$f24,$f4
-	c.lt.d	$fcc3,$f9,$f3
-	c.lt.s	$fcc2,$f17,$f14
-	c.nge.d	$fcc5,$f21,$f16
-	c.nge.s	$fcc3,$f11,$f8
-	c.ngl.s	$fcc2,$f31,$f23
-	c.ngle.s	$fcc2,$f18,$f23
-	c.ngt.d	$fcc4,$f24,$f7
-	c.ngt.s	$fcc5,$f8,$f13
-	c.ole.d	$fcc2,$f16,$f31
-	c.ole.s	$fcc3,$f7,$f20
-	c.olt.d	$fcc4,$f19,$f28
-	c.olt.s	$fcc6,$f20,$f7
-	c.seq.d	$fcc4,$f31,$f7
-	c.seq.s	$fcc7,$f1,$f25
-	c.ueq.d	$fcc4,$f13,$f25
-	c.ueq.s	$fcc6,$f3,$f30
-	c.ule.d	$fcc7,$f25,$f18
-	c.ule.s	$fcc7,$f21,$f30
-	c.ult.d	$fcc6,$f6,$f17
-	c.ult.s	$fcc7,$f24,$f10
-	c.un.d	$fcc6,$f23,$f24
-	c.un.s	$fcc1,$f30,$f4
-	ldc3	$29,-28645($s1)
-	rorv	$t5,$a3,$s5
-	sdc3	$12,5835($t2)
-	tlbp
-	tlbr
-	tlbwi
-	tlbwr
+        .set noat
+        c.eq.d          $fcc1,$f15,$f15
+        c.eq.s          $fcc5,$f24,$f17
+        c.f.d           $fcc4,$f11,$f21
+        c.f.s           $fcc4,$f30,$f7
+        c.le.d          $fcc4,$f18,$f1
+        c.le.s          $fcc6,$f24,$f4
+        c.lt.d          $fcc3,$f9,$f3
+        c.lt.s          $fcc2,$f17,$f14
+        c.nge.d         $fcc5,$f21,$f16
+        c.nge.s         $fcc3,$f11,$f8
+        c.ngl.s         $fcc2,$f31,$f23
+        c.ngle.s        $fcc2,$f18,$f23
+        c.ngt.d         $fcc4,$f24,$f7
+        c.ngt.s         $fcc5,$f8,$f13
+        c.ole.d         $fcc2,$f16,$f31
+        c.ole.s         $fcc3,$f7,$f20
+        c.olt.d         $fcc4,$f19,$f28
+        c.olt.s         $fcc6,$f20,$f7
+        c.seq.d         $fcc4,$f31,$f7
+        c.seq.s         $fcc7,$f1,$f25
+        c.ueq.d         $fcc4,$f13,$f25
+        c.ueq.s         $fcc6,$f3,$f30
+        c.ule.d         $fcc7,$f25,$f18
+        c.ule.s         $fcc7,$f21,$f30
+        c.ult.d         $fcc6,$f6,$f17
+        c.ult.s         $fcc7,$f24,$f10
+        c.un.d          $fcc6,$f23,$f24
+        c.un.s          $fcc1,$f30,$f4
+        rorv            $13,$a3,$s5
diff --git a/test/MC/Mips/mips32/valid.s b/test/MC/Mips/mips32/valid.s
index 9e83c0f..bc29bdc 100644
--- a/test/MC/Mips/mips32/valid.s
+++ b/test/MC/Mips/mips32/valid.s
@@ -1,131 +1,147 @@
 # Instructions that are valid
 #
-# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32   | FileCheck %s
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32 | FileCheck %s
 
         .set noat
-	abs.d	$f7,$f25 # CHECK: encoding
-	abs.s	$f9,$f16
-	add	$s7,$s2,$a1
-	add.d	$f1,$f7,$f29
-	add.s	$f8,$f21,$f24
-	addi	$t5,$t1,26322
-	addu	$t1,$a0,$a2
-	and	$s7,$v0,$t4
-	c.ngl.d	$f29,$f29
-	c.ngle.d	$f0,$f16
-	c.sf.d	$f30,$f0
-	c.sf.s	$f14,$f22
-	ceil.w.d	$f11,$f25
-	ceil.w.s	$f6,$f20
-	cfc1	$s1,$21
-	clo	$t3,$a1
-	clz	$sp,$gp
-	ctc1	$a2,$26
-	cvt.d.s	$f22,$f28
-	cvt.d.w	$f26,$f11
-	cvt.s.d	$f26,$f8
-	cvt.s.w	$f22,$f15
-	cvt.w.d	$f20,$f14
-	cvt.w.s	$f20,$f24
-	deret
-	div	$zero,$t9,$t3
-	div.d	$f29,$f20,$f27
-	div.s	$f4,$f5,$f15
-	divu	$zero,$t9,$t7
-	ehb                      # CHECK: ehb # encoding:  [0x00,0x00,0x00,0xc0]
-	eret
-	floor.w.d	$f14,$f11
-	floor.w.s	$f8,$f9
-	lb	$t8,-14515($t2)
-	lbu	$t0,30195($v1)
-	ldc1	$f11,16391($s0)
-	ldc2	$8,-21181($at)
-	lh	$t3,-8556($s5)
-	lhu	$s3,-22851($v0)
-	li	$at,-29773
-	li	$zero,-29889
-	ll	$v0,-7321($s2)
-	lw	$t0,5674($a1)
-	lwc1	$f16,10225($k0)
-	lwc2	$18,-841($a2)
-	lwl	$s4,-4231($t7)
-	lwr	$zero,-19147($gp)
-	madd	$s6,$t5
-	madd	$zero,$t1
-	maddu	$s3,$gp
-	maddu	$t8,$s2
-	mfc0	$a2,$14,1
-	mfc1	$a3,$f27
-	mfhi	$s3
-	mfhi	$sp
-	mflo	$s1
-	mov.d	$f20,$f14
-	mov.s	$f2,$f27
-	move	$s8,$a0
-	move	$t9,$a2
-	movf	$gp,$t0,$fcc7
-	movf.d	$f6,$f11,$fcc5
-	movf.s	$f23,$f5,$fcc6
-	movn	$v1,$s1,$s0
-	movn.d	$f27,$f21,$k0
-	movn.s	$f12,$f0,$s7
-	movt	$zero,$s4,$fcc5
-	movt.d	$f0,$f2,$fcc0
-	movt.s	$f30,$f2,$fcc1
-	movz	$a1,$s6,$t1
-	movz.d	$f12,$f29,$t1
-	movz.s	$f25,$f7,$v1
-	msub	$s7,$k1
-	msubu	$t7,$a1
-	mtc0	$t1,$29,3
-	mtc1	$s8,$f9
-	mthi	$s1
-	mtlo	$sp
-	mtlo	$t9
-	mul	$s0,$s4,$at
-	mul.d	$f20,$f20,$f16
-	mul.s	$f30,$f10,$f2
-	mult	$sp,$s4
-	mult	$sp,$v0
-	multu	$gp,$k0
-	multu	$t1,$s2
-	neg.d	$f27,$f18
-	neg.s	$f1,$f15
-	nop
-	nor	$a3,$zero,$a3
-	or	$t4,$s0,$sp
-	round.w.d	$f6,$f4
-	round.w.s	$f27,$f28
-	sb	$s6,-19857($t6)
-	sc	$t7,18904($s3)
-	sdc1	$f31,30574($t5)
-	sdc2	$20,23157($s2)
-	sh	$t6,-6704($t7)
-	sllv	$a3,$zero,$t1
-	slt	$s7,$t3,$k1
-	slti	$s1,$t2,9489
-	sltiu	$t9,$t9,-15531
-	sltu	$s4,$s5,$t3
-	sqrt.d	$f17,$f22
-	sqrt.s	$f0,$f1
-	srav	$s1,$s7,$sp
-	srlv	$t9,$s4,$a0
-	ssnop                    # CHECK: ssnop # encoding:  [0x00,0x00,0x00,0x40]
-	sub	$s6,$s3,$t4
-	sub.d	$f18,$f3,$f17
-	sub.s	$f23,$f22,$f22
-	subu	$sp,$s6,$s6
-	sw	$ra,-10160($sp)
-	swc1	$f6,-8465($t8)
-	swc2	$25,24880($s0)
-	swl	$t7,13694($s3)
-	swr	$s1,-26590($t6)
-	teqi	$s5,-17504
-	tgei	$s1,5025
-	tgeiu	$sp,-28621
-	tlti	$t6,-21059
-	tltiu	$ra,-5076
-	tnei	$t4,-29647
-	trunc.w.d	$f22,$f15
-	trunc.w.s	$f28,$f30
-	xor	$s2,$a0,$s8
+        abs.d     $f7,$f25             # CHECK: encoding:
+        abs.s     $f9,$f16
+        add       $s7,$s2,$a1
+        add.d     $f1,$f7,$f29
+        add.s     $f8,$f21,$f24
+        addi      $13,$9,26322
+        addu      $9,$a0,$a2
+        and       $s7,$v0,$12
+        c.ngl.d   $f29,$f29
+        c.ngle.d  $f0,$f16
+        c.sf.d    $f30,$f0
+        c.sf.s    $f14,$f22
+        ceil.w.d  $f11,$f25
+        ceil.w.s  $f6,$f20
+        cfc1      $s1,$21
+        clo       $11,$a1
+        clz       $sp,$gp
+        ctc1      $a2,$26
+        cvt.d.s   $f22,$f28
+        cvt.d.w   $f26,$f11
+        cvt.s.d   $f26,$f8
+        cvt.s.w   $f22,$f15
+        cvt.w.d   $f20,$f14
+        cvt.w.s   $f20,$f24
+        deret
+        div       $zero,$25,$11
+        div.d     $f29,$f20,$f27
+        div.s     $f4,$f5,$f15
+        divu      $zero,$25,$15
+        ehb                            # CHECK: ehb # encoding:  [0x00,0x00,0x00,0xc0]
+        eret
+        floor.w.d $f14,$f11
+        floor.w.s $f8,$f9
+        lb        $24,-14515($10)
+        lbu       $8,30195($v1)
+        ldc1      $f11,16391($s0)
+        ldc2      $8,-21181($at)
+        lh        $11,-8556($s5)
+        lhu       $s3,-22851($v0)
+        li        $at,-29773
+        li        $zero,-29889
+        ll        $v0,-7321($s2)
+        lw        $8,5674($a1)
+        lwc1      $f16,10225($k0)
+        lwc2      $18,-841($a2)
+        lwl       $s4,-4231($15)
+        lwr       $zero,-19147($gp)
+        madd      $s6,$13
+        madd      $zero,$9
+        maddu     $s3,$gp
+        maddu     $24,$s2
+        mfc0      $a2,$14,1
+        mfc1      $a3,$f27
+        mfhi      $s3
+        mfhi      $sp
+        mflo      $s1
+        mov.d     $f20,$f14
+        mov.s     $f2,$f27
+        move      $s8,$a0
+        move      $25,$a2
+        movf      $gp,$8,$fcc7
+        movf.d    $f6,$f11,$fcc5
+        movf.s    $f23,$f5,$fcc6
+        movn      $v1,$s1,$s0
+        movn.d    $f27,$f21,$k0
+        movn.s    $f12,$f0,$s7
+        movt      $zero,$s4,$fcc5
+        movt.d    $f0,$f2,$fcc0
+        movt.s    $f30,$f2,$fcc1
+        movz      $a1,$s6,$9
+        movz.d    $f12,$f29,$9
+        movz.s    $f25,$f7,$v1
+        msub      $s7,$k1
+        msubu     $15,$a1
+        mtc0      $9,$29,3
+        mtc1      $s8,$f9
+        mthi      $s1
+        mtlo      $sp
+        mtlo      $25
+        mul       $s0,$s4,$at
+        mul.d     $f20,$f20,$f16
+        mul.s     $f30,$f10,$f2
+        mult      $sp,$s4
+        mult      $sp,$v0
+        multu     $gp,$k0
+        multu     $9,$s2
+        negu      $2                   # CHECK: negu $2, $2            # encoding: [0x00,0x02,0x10,0x23]
+        negu      $2,$3                # CHECK: negu $2, $3            # encoding: [0x00,0x03,0x10,0x23]
+        neg.d     $f27,$f18
+        neg.s     $f1,$f15
+        nop
+        nor       $a3,$zero,$a3
+        or        $12,$s0,$sp
+        round.w.d $f6,$f4
+        round.w.s $f27,$f28
+        sb        $s6,-19857($14)
+        sc        $15,18904($s3)
+        sdc1      $f31,30574($13)
+        sdc2      $20,23157($s2)
+        sh        $14,-6704($15)
+        sll       $a3,18               # CHECK: sll $7, $7, 18         # encoding: [0x00,0x07,0x3c,0x80]
+        sll       $a3,$zero,18         # CHECK: sll $7, $zero, 18      # encoding: [0x00,0x00,0x3c,0x80]
+        sll       $a3,$zero,$9         # CHECK: sllv $7, $zero, $9     # encoding: [0x01,0x20,0x38,0x04]
+        sllv      $a3,$zero,$9         # CHECK: sllv $7, $zero, $9     # encoding: [0x01,0x20,0x38,0x04]
+        slt       $s7,$11,$k1          # CHECK: slt $23, $11, $27      # encoding: [0x01,0x7b,0xb8,0x2a]
+        slti      $s1,$10,9489         # CHECK: slti $17, $10, 9489    # encoding: [0x29,0x51,0x25,0x11]
+        sltiu     $25,$25,-15531       # CHECK: sltiu $25, $25, -15531 # encoding: [0x2f,0x39,0xc3,0x55]
+        sltu      $s4,$s5,$11          # CHECK: sltu  $20, $21, $11    # encoding: [0x02,0xab,0xa0,0x2b]
+        sltu      $24,$25,-15531       # CHECK: sltiu $24, $25, -15531 # encoding: [0x2f,0x38,0xc3,0x55]
+        sqrt.d    $f17,$f22
+        sqrt.s    $f0,$f1
+        sra       $s1,15               # CHECK: sra $17, $17, 15       # encoding: [0x00,0x11,0x8b,0xc3]
+        sra       $s1,$s7,15           # CHECK: sra $17, $23, 15       # encoding: [0x00,0x17,0x8b,0xc3]
+        sra       $s1,$s7,$sp          # CHECK: srav $17, $23, $sp     # encoding: [0x03,0xb7,0x88,0x07]
+        srav      $s1,$s7,$sp          # CHECK: srav $17, $23, $sp     # encoding: [0x03,0xb7,0x88,0x07]
+        srl       $2,7                 # CHECK: srl $2, $2, 7          # encoding: [0x00,0x02,0x11,0xc2]
+        srl       $2,$2,7              # CHECK: srl $2, $2, 7          # encoding: [0x00,0x02,0x11,0xc2]
+        srl       $25,$s4,$a0          # CHECK: srlv $25, $20, $4      # encoding: [0x00,0x94,0xc8,0x06]
+        srlv      $25,$s4,$a0          # CHECK: srlv $25, $20, $4      # encoding: [0x00,0x94,0xc8,0x06]
+        ssnop                          # CHECK: ssnop                  # encoding: [0x00,0x00,0x00,0x40]
+        sub       $s6,$s3,$12
+        sub.d     $f18,$f3,$f17
+        sub.s     $f23,$f22,$f22
+        subu      $sp,$s6,$s6
+        sw        $ra,-10160($sp)
+        swc1      $f6,-8465($24)
+        swc2      $25,24880($s0)
+        swl       $15,13694($s3)
+        swr       $s1,-26590($14)
+        teqi      $s5,-17504
+        tgei      $s1,5025
+        tgeiu     $sp,-28621
+        tlbp                           # CHECK: tlbp                   # encoding: [0x42,0x00,0x00,0x08]
+        tlbr                           # CHECK: tlbr                   # encoding: [0x42,0x00,0x00,0x01]
+        tlbwi                          # CHECK: tlbwi                  # encoding: [0x42,0x00,0x00,0x02]
+        tlbwr                          # CHECK: tlbwr                  # encoding: [0x42,0x00,0x00,0x06]
+        tlti      $14,-21059
+        tltiu     $ra,-5076
+        tnei      $12,-29647
+        trunc.w.d $f22,$f15
+        trunc.w.s $f28,$f30
+        xor       $s2,$a0,$s8
diff --git a/test/MC/Mips/mips32r2/invalid-mips64r2.s b/test/MC/Mips/mips32r2/invalid-mips64r2.s
new file mode 100644
index 0000000..293e58e
--- /dev/null
+++ b/test/MC/Mips/mips32r2/invalid-mips64r2.s
@@ -0,0 +1,10 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding \
+# RUN:     -mcpu=mips32r2 2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+	dsbh	$v1,$t6    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+	dshd	$v0,$sp    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+
diff --git a/test/MC/Mips/mips32r2/valid-xfail.s b/test/MC/Mips/mips32r2/valid-xfail.s
index 623c7f6..ef02d51 100644
--- a/test/MC/Mips/mips32r2/valid-xfail.s
+++ b/test/MC/Mips/mips32r2/valid-xfail.s
@@ -6,310 +6,304 @@
 # CHECK-NOT: encoding
 # XFAIL: *
 
-	.set noat
-	abs.ps	$f22,$f8
-	absq_s.ph	$t0,$a0
-	absq_s.qb	$t7,$s1
-	absq_s.w	$s3,$ra
-	add.ps	$f25,$f27,$f13
-	addq.ph	$s1,$t7,$at
-	addq_s.ph	$s3,$s6,$s2
-	addq_s.w	$a2,$t0,$at
-	addqh.ph	$s4,$t6,$s1
-	addqh.w	$s7,$s7,$k1
-	addqh_r.ph	$sp,$t9,$s8
-	addqh_r.w	$t0,$v1,$zero
-	addsc	$s8,$t7,$t4
-	addu.ph	$a2,$t6,$s3
-	addu.qb	$s6,$v1,$v1
-	addu_s.ph	$a3,$s3,$gp
-	addu_s.qb	$s4,$s8,$s1
-	adduh.qb	$a1,$a1,$at
-	adduh_r.qb	$a0,$t1,$t4
-	addwc	$k0,$s6,$s7
-	alnv.ps	$f12,$f18,$f30,$t4
-	and.v	$w10,$w25,$w29
-	bitrev	$t6,$at
-	bmnz.v	$w15,$w2,$w28
-	bmz.v	$w13,$w11,$w21
-	bsel.v	$w28,$w7,$w0
-	c.eq.d	$fcc1,$f15,$f15
-	c.eq.ps	$fcc5,$f0,$f9
-	c.eq.s	$fcc5,$f24,$f17
-	c.f.d	$fcc4,$f11,$f21
-	c.f.ps	$fcc6,$f11,$f11
-	c.f.s	$fcc4,$f30,$f7
-	c.le.d	$fcc4,$f18,$f1
-	c.le.ps	$fcc1,$f7,$f20
-	c.le.s	$fcc6,$f24,$f4
-	c.lt.d	$fcc3,$f9,$f3
-	c.lt.ps	$f19,$f5
-	c.lt.s	$fcc2,$f17,$f14
-	c.nge.d	$fcc5,$f21,$f16
-	c.nge.ps	$f1,$f26
-	c.nge.s	$fcc3,$f11,$f8
-	c.ngl.ps	$f21,$f30
-	c.ngl.s	$fcc2,$f31,$f23
-	c.ngle.ps	$fcc7,$f12,$f20
-	c.ngle.s	$fcc2,$f18,$f23
-	c.ngt.d	$fcc4,$f24,$f7
-	c.ngt.ps	$fcc5,$f30,$f6
-	c.ngt.s	$fcc5,$f8,$f13
-	c.ole.d	$fcc2,$f16,$f31
-	c.ole.ps	$fcc7,$f21,$f8
-	c.ole.s	$fcc3,$f7,$f20
-	c.olt.d	$fcc4,$f19,$f28
-	c.olt.ps	$fcc3,$f7,$f16
-	c.olt.s	$fcc6,$f20,$f7
-	c.seq.d	$fcc4,$f31,$f7
-	c.seq.ps	$fcc6,$f31,$f14
-	c.seq.s	$fcc7,$f1,$f25
-	c.sf.ps	$fcc6,$f4,$f6
-	c.ueq.d	$fcc4,$f13,$f25
-	c.ueq.ps	$fcc1,$f5,$f29
-	c.ueq.s	$fcc6,$f3,$f30
-	c.ule.d	$fcc7,$f25,$f18
-	c.ule.ps	$fcc6,$f17,$f3
-	c.ule.s	$fcc7,$f21,$f30
-	c.ult.d	$fcc6,$f6,$f17
-	c.ult.ps	$fcc7,$f14,$f0
-	c.ult.s	$fcc7,$f24,$f10
-	c.un.d	$fcc6,$f23,$f24
-	c.un.ps	$fcc4,$f2,$f26
-	c.un.s	$fcc1,$f30,$f4
-	ceil.l.d	$f1,$f3
-	ceil.l.s	$f18,$f13
-	cfcmsa	$s6,$19
-	cmp.eq.ph	$s7,$t6
-	cmp.le.ph	$t0,$t6
-	cmp.lt.ph	$k0,$sp
-	cmpgdu.eq.qb	$s3,$zero,$k0
-	cmpgdu.le.qb	$v1,$t7,$s2
-	cmpgdu.lt.qb	$s0,$gp,$sp
-	cmpgu.eq.qb	$t6,$s6,$s8
-	cmpgu.le.qb	$t1,$a3,$s4
-	cmpgu.lt.qb	$sp,$at,$t0
-	cmpu.eq.qb	$v0,$t8
-	cmpu.le.qb	$s1,$a1
-	cmpu.lt.qb	$at,$a3
-	ctcmsa	$31,$s7
-	cvt.d.l	$f4,$f16
-	cvt.ps.s	$f3,$f18,$f19
-	cvt.s.l	$f15,$f30
-	cvt.s.pl	$f30,$f1
-	cvt.s.pu	$f14,$f25
-	dmt	$k0
-	dpa.w.ph	$ac1,$s7,$k0
-	dpaq_s.w.ph	$ac2,$a0,$t5
-	dpaq_sa.l.w	$ac0,$a2,$t6
-	dpaqx_s.w.ph	$ac3,$a0,$t8
-	dpaqx_sa.w.ph	$ac1,$zero,$s5
-	dpau.h.qbl	$ac1,$t2,$t8
-	dpau.h.qbr	$ac1,$s7,$s6
-	dpax.w.ph	$ac3,$a0,$k0
-	dps.w.ph	$ac1,$a3,$a1
-	dpsq_s.w.ph	$ac0,$gp,$k0
-	dpsq_sa.l.w	$ac0,$a3,$t7
-	dpsqx_s.w.ph	$ac3,$t5,$a3
-	dpsqx_sa.w.ph	$ac3,$sp,$s2
-	dpsu.h.qbl	$ac2,$t6,$t2
-	dpsu.h.qbr	$ac2,$a1,$s6
-	dpsx.w.ph	$ac0,$s7,$gp
-	dvpe	$s6
-	emt	$t0
-	evpe	$v0
-	extpdpv	$s6,$ac0,$s8
-	extpv	$t5,$ac0,$t6
-	extrv.w	$t0,$ac3,$at
-	extrv_r.w	$t0,$ac1,$s6
-	extrv_rs.w	$gp,$ac1,$s6
-	extrv_s.h	$s2,$ac1,$t6
-	fclass.d	$w14,$w27
-	fclass.w	$w19,$w28
-	fexupl.d	$w10,$w29
-	fexupl.w	$w12,$w27
-	fexupr.d	$w31,$w15
-	fexupr.w	$w29,$w12
-	ffint_s.d	$w1,$w30
-	ffint_s.w	$w16,$w14
-	ffint_u.d	$w23,$w18
-	ffint_u.w	$w19,$w12
-	ffql.d	$w2,$w3
-	ffql.w	$w9,$w0
-	ffqr.d	$w25,$w24
-	ffqr.w	$w10,$w6
-	fill.b	$w9,$v1
-	fill.h	$w9,$t0
-	fill.w	$w31,$t7
-	flog2.d	$w12,$w16
-	flog2.w	$w19,$w23
-	floor.l.d	$f26,$f7
-	floor.l.s	$f12,$f5
-	fork	$s2,$t0,$a0
-	frcp.d	$w12,$w4
-	frcp.w	$w30,$w8
-	frint.d	$w20,$w8
-	frint.w	$w11,$w29
-	frsqrt.d	$w29,$w2
-	frsqrt.w	$w9,$w8
-	fsqrt.d	$w3,$w1
-	fsqrt.w	$w5,$w15
-	ftint_s.d	$w31,$w26
-	ftint_s.w	$w27,$w14
-	ftint_u.d	$w5,$w31
-	ftint_u.w	$w12,$w29
-	ftrunc_s.d	$w4,$w22
-	ftrunc_s.w	$w24,$w7
-	ftrunc_u.d	$w20,$w25
-	ftrunc_u.w	$w7,$w26
-	insv	$s2,$at
-	iret
-	lbe	$t6,122($t1)
-	lbue	$t3,-108($t2)
-	lbux	$t1,$t6($v0)
-	ldc3	$29,-28645($s1)
-	lhe	$s6,219($v1)
-	lhue	$gp,118($t3)
-	lhx	$sp,$k0($t7)
-	lle	$gp,-237($ra)
-	lwe	$ra,-145($t6)
-	lwle	$t3,-42($t3)
-	lwre	$sp,-152($t8)
-	lwx	$t4,$t4($s4)
-	madd.ps	$f22,$f3,$f14,$f3
-	maq_s.w.phl	$ac2,$t9,$t3
-	maq_s.w.phr	$ac0,$t2,$t9
-	maq_sa.w.phl	$ac3,$a1,$v1
-	maq_sa.w.phr	$ac1,$at,$t2
-	mfgc0	$s6,c0_datahi1
-	mflo	$t1,$ac2
-	modsub	$a3,$t4,$a3
-	mov.ps	$f22,$f17
-	move.v	$w8,$w17
-	movf.ps	$f10,$f28,$fcc6
-	movn.ps	$f31,$f31,$s3
-	movt.ps	$f20,$f25,$fcc2
-	movz.ps	$f18,$f17,$ra
-	msub	$ac2,$sp,$t6
-	msub.ps	$f12,$f14,$f29,$f17
-	msubu	$ac2,$a1,$t8
-	mtc0	$t1,c0_datahi1
-	mtgc0	$s4,$21,7
-	mthi	$v0,$ac1
-	mthlip	$a3,$ac0
-	mul.ph	$s4,$t8,$s0
-	mul.ps	$f14,$f0,$f16
-	mul_s.ph	$t2,$t6,$t7
-	muleq_s.w.phl	$t3,$s4,$s4
-	muleq_s.w.phr	$s6,$a0,$s8
-	muleu_s.ph.qbl	$a2,$t6,$t0
-	muleu_s.ph.qbr	$a1,$ra,$t1
-	mulq_rs.ph	$s2,$t6,$t7
-	mulq_rs.w	$at,$s4,$t9
-	mulq_s.ph	$s0,$k1,$t7
-	mulq_s.w	$t1,$a3,$s0
-	mulsa.w.ph	$ac1,$s4,$s6
-	mulsaq_s.w.ph	$ac0,$ra,$s2
-	neg.ps	$f19,$f13
-	nloc.b	$w12,$w30
-	nloc.d	$w16,$w7
-	nloc.h	$w21,$w17
-	nloc.w	$w17,$w16
-	nlzc.b	$w12,$w7
-	nlzc.d	$w14,$w14
-	nlzc.h	$w24,$w24
-	nlzc.w	$w10,$w4
-	nmadd.ps	$f27,$f4,$f9,$f25
-	nmsub.ps	$f6,$f12,$f14,$f17
-	nor.v	$w20,$w20,$w15
-	or.v	$w13,$w23,$w12
-	packrl.ph	$ra,$t8,$t6
-	pcnt.b	$w30,$w15
-	pcnt.d	$w5,$w16
-	pcnt.h	$w20,$w24
-	pcnt.w	$w22,$w20
-	pick.ph	$ra,$a2,$gp
-	pick.qb	$t3,$a0,$gp
-	pll.ps	$f25,$f9,$f30
-	plu.ps	$f1,$f26,$f29
-	preceq.w.phl	$s8,$gp
-	preceq.w.phr	$s5,$t7
-	precequ.ph.qbl	$s7,$ra
-	precequ.ph.qbla	$a0,$t1
-	precequ.ph.qbr	$ra,$s3
-	precequ.ph.qbra	$t8,$t0
-	preceu.ph.qbl	$sp,$t0
-	preceu.ph.qbla	$s6,$t3
-	preceu.ph.qbr	$gp,$s1
-	preceu.ph.qbra	$k1,$s0
-	precr.qb.ph	$v0,$t4,$s8
-	precrq.ph.w	$t6,$s8,$t8
-	precrq.qb.ph	$a2,$t4,$t4
-	precrq_rs.ph.w	$a1,$k0,$a3
-	precrqu_s.qb.ph	$zero,$gp,$s5
-	pul.ps	$f9,$f30,$f26
-	puu.ps	$f24,$f9,$f2
-	raddu.w.qb	$t9,$s3
-	rdpgpr	$s3,$t1
-	recip.d	$f19,$f6
-	recip.s	$f3,$f30
-	repl.ph	$at,-307
-	replv.ph	$v1,$s7
-	replv.qb	$t9,$t4
-	rorv	$t5,$a3,$s5
-	round.l.d	$f12,$f1
-	round.l.s	$f25,$f5
-	rsqrt.d	$f3,$f28
-	rsqrt.s	$f4,$f8
-	sbe	$s7,33($s1)
-	sce	$sp,189($t2)
-	sdc3	$12,5835($t2)
-	she	$t8,105($v0)
-	shilo	$ac1,26
-	shilov	$ac2,$t2
-	shllv.ph	$t2,$s0,$s0
-	shllv.qb	$gp,$v1,$zero
-	shllv_s.ph	$k1,$at,$t5
-	shllv_s.w	$s1,$ra,$k0
-	shrav.ph	$t9,$s2,$s1
-	shrav.qb	$zero,$t8,$t3
-	shrav_r.ph	$s3,$t3,$t9
-	shrav_r.qb	$a0,$sp,$s5
-	shrav_r.w	$s7,$s4,$s6
-	shrlv.ph	$t6,$t2,$t1
-	shrlv.qb	$a2,$s2,$t3
-	sub.ps	$f5,$f14,$f26
-	subq.ph	$ra,$t1,$s8
-	subq_s.ph	$t5,$s8,$s5
-	subq_s.w	$k1,$a2,$a3
-	subqh.ph	$t2,$at,$t1
-	subqh.w	$v0,$a2,$zero
-	subqh_r.ph	$a0,$t4,$s6
-	subqh_r.w	$t2,$a2,$gp
-	subu.ph	$t1,$s6,$s4
-	subu.qb	$s6,$a2,$s6
-	subu_s.ph	$v1,$a1,$s3
-	subu_s.qb	$s1,$at,$ra
-	subuh.qb	$zero,$gp,$gp
-	subuh_r.qb	$s4,$s8,$s6
-	swe	$t8,94($k0)
-	swle	$v1,-209($gp)
-	swre	$k0,-202($s2)
-	synci	20023($s0)
-	tlbginv
-	tlbginvf
-	tlbgp
-	tlbgr
-	tlbgwi
-	tlbgwr
-	tlbinv
-	tlbinvf
-	tlbp
-	tlbr
-	tlbwi
-	tlbwr
-	trunc.l.d	$f23,$f23
-	trunc.l.s	$f28,$f31
-	wrpgpr	$zero,$t5
-	xor.v	$w20,$w21,$w30
-	yield	$v1,$s0
+        .set noat
+        abs.ps          $f22,$f8
+        absq_s.ph       $8,$a0
+        absq_s.qb       $15,$s1
+        absq_s.w        $s3,$ra
+        add.ps          $f25,$f27,$f13
+        addq.ph         $s1,$15,$at
+        addq_s.ph       $s3,$s6,$s2
+        addq_s.w        $a2,$8,$at
+        addqh.ph        $s4,$14,$s1
+        addqh.w         $s7,$s7,$k1
+        addqh_r.ph      $sp,$25,$s8
+        addqh_r.w       $8,$v1,$zero
+        addsc           $s8,$15,$12
+        addu.ph         $a2,$14,$s3
+        addu.qb         $s6,$v1,$v1
+        addu_s.ph       $a3,$s3,$gp
+        addu_s.qb       $s4,$s8,$s1
+        adduh.qb        $a1,$a1,$at
+        adduh_r.qb      $a0,$9,$12
+        addwc           $k0,$s6,$s7
+        alnv.ps         $f12,$f18,$f30,$12
+        and.v           $w10,$w25,$w29
+        bitrev          $14,$at
+        bmnz.v          $w15,$w2,$w28
+        bmz.v           $w13,$w11,$w21
+        bsel.v          $w28,$w7,$w0
+        c.eq.d          $fcc1,$f15,$f15
+        c.eq.ps         $fcc5,$f0,$f9
+        c.eq.s          $fcc5,$f24,$f17
+        c.f.d           $fcc4,$f11,$f21
+        c.f.ps          $fcc6,$f11,$f11
+        c.f.s           $fcc4,$f30,$f7
+        c.le.d          $fcc4,$f18,$f1
+        c.le.ps         $fcc1,$f7,$f20
+        c.le.s          $fcc6,$f24,$f4
+        c.lt.d          $fcc3,$f9,$f3
+        c.lt.ps         $f19,$f5
+        c.lt.s          $fcc2,$f17,$f14
+        c.nge.d         $fcc5,$f21,$f16
+        c.nge.ps        $f1,$f26
+        c.nge.s         $fcc3,$f11,$f8
+        c.ngl.ps        $f21,$f30
+        c.ngl.s         $fcc2,$f31,$f23
+        c.ngle.ps       $fcc7,$f12,$f20
+        c.ngle.s        $fcc2,$f18,$f23
+        c.ngt.d         $fcc4,$f24,$f7
+        c.ngt.ps        $fcc5,$f30,$f6
+        c.ngt.s         $fcc5,$f8,$f13
+        c.ole.d         $fcc2,$f16,$f31
+        c.ole.ps        $fcc7,$f21,$f8
+        c.ole.s         $fcc3,$f7,$f20
+        c.olt.d         $fcc4,$f19,$f28
+        c.olt.ps        $fcc3,$f7,$f16
+        c.olt.s         $fcc6,$f20,$f7
+        c.seq.d         $fcc4,$f31,$f7
+        c.seq.ps        $fcc6,$f31,$f14
+        c.seq.s         $fcc7,$f1,$f25
+        c.sf.ps         $fcc6,$f4,$f6
+        c.ueq.d         $fcc4,$f13,$f25
+        c.ueq.ps        $fcc1,$f5,$f29
+        c.ueq.s         $fcc6,$f3,$f30
+        c.ule.d         $fcc7,$f25,$f18
+        c.ule.ps        $fcc6,$f17,$f3
+        c.ule.s         $fcc7,$f21,$f30
+        c.ult.d         $fcc6,$f6,$f17
+        c.ult.ps        $fcc7,$f14,$f0
+        c.ult.s         $fcc7,$f24,$f10
+        c.un.d          $fcc6,$f23,$f24
+        c.un.ps         $fcc4,$f2,$f26
+        c.un.s          $fcc1,$f30,$f4
+        ceil.l.d        $f1,$f3
+        ceil.l.s        $f18,$f13
+        cfcmsa          $s6,$19
+        cmp.eq.ph       $s7,$14
+        cmp.le.ph       $8,$14
+        cmp.lt.ph       $k0,$sp
+        cmpgdu.eq.qb    $s3,$zero,$k0
+        cmpgdu.le.qb    $v1,$15,$s2
+        cmpgdu.lt.qb    $s0,$gp,$sp
+        cmpgu.eq.qb     $14,$s6,$s8
+        cmpgu.le.qb     $9,$a3,$s4
+        cmpgu.lt.qb     $sp,$at,$8
+        cmpu.eq.qb      $v0,$24
+        cmpu.le.qb      $s1,$a1
+        cmpu.lt.qb      $at,$a3
+        ctcmsa          $31,$s7
+        cvt.d.l         $f4,$f16
+        cvt.ps.s        $f3,$f18,$f19
+        cvt.s.l         $f15,$f30
+        cvt.s.pl        $f30,$f1
+        cvt.s.pu        $f14,$f25
+        dmt $k0
+        dpa.w.ph        $ac1,$s7,$k0
+        dpaq_s.w.ph     $ac2,$a0,$13
+        dpaq_sa.l.w     $ac0,$a2,$14
+        dpaqx_s.w.ph    $ac3,$a0,$24
+        dpaqx_sa.w.ph   $ac1,$zero,$s5
+        dpau.h.qbl      $ac1,$10,$24
+        dpau.h.qbr      $ac1,$s7,$s6
+        dpax.w.ph       $ac3,$a0,$k0
+        dps.w.ph        $ac1,$a3,$a1
+        dpsq_s.w.ph     $ac0,$gp,$k0
+        dpsq_sa.l.w     $ac0,$a3,$15
+        dpsqx_s.w.ph    $ac3,$13,$a3
+        dpsqx_sa.w.ph   $ac3,$sp,$s2
+        dpsu.h.qbl      $ac2,$14,$10
+        dpsu.h.qbr      $ac2,$a1,$s6
+        dpsx.w.ph       $ac0,$s7,$gp
+        dvpe            $s6
+        emt $8
+        evpe            $v0
+        extpdpv         $s6,$ac0,$s8
+        extpv           $13,$ac0,$14
+        extrv.w         $8,$ac3,$at
+        extrv_r.w       $8,$ac1,$s6
+        extrv_rs.w      $gp,$ac1,$s6
+        extrv_s.h       $s2,$ac1,$14
+        fclass.d        $w14,$w27
+        fclass.w        $w19,$w28
+        fexupl.d        $w10,$w29
+        fexupl.w        $w12,$w27
+        fexupr.d        $w31,$w15
+        fexupr.w        $w29,$w12
+        ffint_s.d       $w1,$w30
+        ffint_s.w       $w16,$w14
+        ffint_u.d       $w23,$w18
+        ffint_u.w       $w19,$w12
+        ffql.d          $w2,$w3
+        ffql.w          $w9,$w0
+        ffqr.d          $w25,$w24
+        ffqr.w          $w10,$w6
+        fill.b          $w9,$v1
+        fill.h          $w9,$8
+        fill.w          $w31,$15
+        flog2.d         $w12,$w16
+        flog2.w         $w19,$w23
+        floor.l.d       $f26,$f7
+        floor.l.s       $f12,$f5
+        fork            $s2,$8,$a0
+        frcp.d          $w12,$w4
+        frcp.w          $w30,$w8
+        frint.d         $w20,$w8
+        frint.w         $w11,$w29
+        frsqrt.d        $w29,$w2
+        frsqrt.w        $w9,$w8
+        fsqrt.d         $w3,$w1
+        fsqrt.w         $w5,$w15
+        ftint_s.d       $w31,$w26
+        ftint_s.w       $w27,$w14
+        ftint_u.d       $w5,$w31
+        ftint_u.w       $w12,$w29
+        ftrunc_s.d      $w4,$w22
+        ftrunc_s.w      $w24,$w7
+        ftrunc_u.d      $w20,$w25
+        ftrunc_u.w      $w7,$w26
+        insv            $s2,$at
+        iret
+        lbe             $14,122($9)
+        lbue            $11,-108($10)
+        lbux            $9,$14($v0)
+        lhe             $s6,219($v1)
+        lhue            $gp,118($11)
+        lhx             $sp,$k0($15)
+        lle             $gp,-237($ra)
+        lwe             $ra,-145($14)
+        lwle            $11,-42($11)
+        lwre            $sp,-152($24)
+        lwx             $12,$12($s4)
+        madd.ps         $f22,$f3,$f14,$f3
+        maq_s.w.phl     $ac2,$25,$11
+        maq_s.w.phr     $ac0,$10,$25
+        maq_sa.w.phl    $ac3,$a1,$v1
+        maq_sa.w.phr    $ac1,$at,$10
+        mfgc0           $s6,c0_datahi1
+        mflo            $9,$ac2
+        modsub          $a3,$12,$a3
+        mov.ps          $f22,$f17
+        move.v          $w8,$w17
+        movf.ps         $f10,$f28,$fcc6
+        movn.ps         $f31,$f31,$s3
+        movt.ps         $f20,$f25,$fcc2
+        movz.ps         $f18,$f17,$ra
+        msub            $ac2,$sp,$14
+        msub.ps         $f12,$f14,$f29,$f17
+        msubu           $ac2,$a1,$24
+        mtc0            $9,c0_datahi1
+        mtgc0           $s4,$21,7
+        mthi            $v0,$ac1
+        mthlip          $a3,$ac0
+        mul.ph          $s4,$24,$s0
+        mul.ps          $f14,$f0,$f16
+        mul_s.ph        $10,$14,$15
+        muleq_s.w.phl   $11,$s4,$s4
+        muleq_s.w.phr   $s6,$a0,$s8
+        muleu_s.ph.qbl  $a2,$14,$8
+        muleu_s.ph.qbr  $a1,$ra,$9
+        mulq_rs.ph      $s2,$14,$15
+        mulq_rs.w       $at,$s4,$25
+        mulq_s.ph       $s0,$k1,$15
+        mulq_s.w        $9,$a3,$s0
+        mulsa.w.ph      $ac1,$s4,$s6
+        mulsaq_s.w.ph   $ac0,$ra,$s2
+        neg.ps          $f19,$f13
+        nloc.b          $w12,$w30
+        nloc.d          $w16,$w7
+        nloc.h          $w21,$w17
+        nloc.w          $w17,$w16
+        nlzc.b          $w12,$w7
+        nlzc.d          $w14,$w14
+        nlzc.h          $w24,$w24
+        nlzc.w          $w10,$w4
+        nmadd.ps        $f27,$f4,$f9,$f25
+        nmsub.ps        $f6,$f12,$f14,$f17
+        nor.v           $w20,$w20,$w15
+        or.v            $w13,$w23,$w12
+        packrl.ph       $ra,$24,$14
+        pcnt.b          $w30,$w15
+        pcnt.d          $w5,$w16
+        pcnt.h          $w20,$w24
+        pcnt.w          $w22,$w20
+        pick.ph         $ra,$a2,$gp
+        pick.qb         $11,$a0,$gp
+        pll.ps          $f25,$f9,$f30
+        plu.ps          $f1,$f26,$f29
+        preceq.w.phl    $s8,$gp
+        preceq.w.phr    $s5,$15
+        precequ.ph.qbl  $s7,$ra
+        precequ.ph.qbla $a0,$9
+        precequ.ph.qbr  $ra,$s3
+        precequ.ph.qbra $24,$8
+        preceu.ph.qbl   $sp,$8
+        preceu.ph.qbla  $s6,$11
+        preceu.ph.qbr   $gp,$s1
+        preceu.ph.qbra  $k1,$s0
+        precr.qb.ph     $v0,$12,$s8
+        precrq.ph.w     $14,$s8,$24
+        precrq.qb.ph    $a2,$12,$12
+        precrq_rs.ph.w  $a1,$k0,$a3
+        precrqu_s.qb.ph $zero,$gp,$s5
+        pul.ps          $f9,$f30,$f26
+        puu.ps          $f24,$f9,$f2
+        raddu.w.qb      $25,$s3
+        rdpgpr          $s3,$9
+        recip.d         $f19,$f6
+        recip.s         $f3,$f30
+        repl.ph         $at,-307
+        replv.ph        $v1,$s7
+        replv.qb        $25,$12
+        rorv            $13,$a3,$s5
+        round.l.d       $f12,$f1
+        round.l.s       $f25,$f5
+        rsqrt.d         $f3,$f28
+        rsqrt.s         $f4,$f8
+        sbe             $s7,33($s1)
+        sce             $sp,189($10)
+        she             $24,105($v0)
+        shilo           $ac1,26
+        shilov          $ac2,$10
+        shllv.ph        $10,$s0,$s0
+        shllv.qb        $gp,$v1,$zero
+        shllv_s.ph      $k1,$at,$13
+        shllv_s.w       $s1,$ra,$k0
+        shrav.ph        $25,$s2,$s1
+        shrav.qb        $zero,$24,$11
+        shrav_r.ph      $s3,$11,$25
+        shrav_r.qb      $a0,$sp,$s5
+        shrav_r.w       $s7,$s4,$s6
+        shrlv.ph        $14,$10,$9
+        shrlv.qb        $a2,$s2,$11
+        sub.ps          $f5,$f14,$f26
+        subq.ph         $ra,$9,$s8
+        subq_s.ph       $13,$s8,$s5
+        subq_s.w        $k1,$a2,$a3
+        subqh.ph        $10,$at,$9
+        subqh.w         $v0,$a2,$zero
+        subqh_r.ph      $a0,$12,$s6
+        subqh_r.w       $10,$a2,$gp
+        subu.ph         $9,$s6,$s4
+        subu.qb         $s6,$a2,$s6
+        subu_s.ph       $v1,$a1,$s3
+        subu_s.qb       $s1,$at,$ra
+        subuh.qb        $zero,$gp,$gp
+        subuh_r.qb      $s4,$s8,$s6
+        swe             $24,94($k0)
+        swle            $v1,-209($gp)
+        swre            $k0,-202($s2)
+        synci           20023($s0)
+        tlbginv
+        tlbginvf
+        tlbgp
+        tlbgr
+        tlbgwi
+        tlbgwr
+        tlbinv
+        tlbinvf
+        trunc.l.d       $f23,$f23
+        trunc.l.s       $f28,$f31
+        wrpgpr          $zero,$13
+        xor.v           $w20,$w21,$w30
+        yield           $v1,$s0
diff --git a/test/MC/Mips/mips32r2/valid.s b/test/MC/Mips/mips32r2/valid.s
index 3e9a1d3..26f8b6b 100644
--- a/test/MC/Mips/mips32r2/valid.s
+++ b/test/MC/Mips/mips32r2/valid.s
@@ -3,154 +3,173 @@
 # RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r2 | FileCheck %s
 
         .set noat
-	abs.d	$f7,$f25 # CHECK: encoding
-	abs.s	$f9,$f16
-	add	$s7,$s2,$a1
-	add.d	$f1,$f7,$f29
-	add.s	$f8,$f21,$f24
-	addi	$t5,$t1,26322
-	addu	$t1,$a0,$a2
-	and	$s7,$v0,$t4
-	c.ngl.d	$f29,$f29
-	c.ngle.d	$f0,$f16
-	c.sf.d	$f30,$f0
-	c.sf.s	$f14,$f22
-	ceil.w.d	$f11,$f25
-	ceil.w.s	$f6,$f20
-	cfc1	$s1,$21
-	clo	$t3,$a1
-	clz	$sp,$gp
-	ctc1	$a2,$26
-	cvt.d.s	$f22,$f28
-	cvt.d.w	$f26,$f11
-	cvt.l.d	$f24,$f15
-	cvt.l.s	$f11,$f29
-	cvt.s.d	$f26,$f8
-	cvt.s.w	$f22,$f15
-	cvt.w.d	$f20,$f14
-	cvt.w.s	$f20,$f24
-	deret
-	di	$s8
-	div	$zero,$t9,$t3
-	div.d	$f29,$f20,$f27
-	div.s	$f4,$f5,$f15
-	divu	$zero,$t9,$t7
-	ehb                      # CHECK: ehb # encoding:  [0x00,0x00,0x00,0xc0]
-	ei	$t6
-	eret
-	floor.w.d	$f14,$f11
-	floor.w.s	$f8,$f9
-	lb	$t8,-14515($t2)
-	lbu	$t0,30195($v1)
-	ldc1	$f11,16391($s0)
-	ldc2	$8,-21181($at)
-	ldxc1	$f8,$s7($t7)
-	lh	$t3,-8556($s5)
-	lhu	$s3,-22851($v0)
-	li	$at,-29773
-	li	$zero,-29889
-	ll	$v0,-7321($s2)
-	luxc1	$f19,$s6($s5)
-	lw	$t0,5674($a1)
-	lwc1	$f16,10225($k0)
-	lwc2	$18,-841($a2)
-	lwl	$s4,-4231($t7)
-	lwr	$zero,-19147($gp)
-	lwxc1	$f12,$s1($s8)
-	madd	$s6,$t5
-	madd	$zero,$t1
-	madd.d	$f18,$f19,$f26,$f20
-	madd.s	$f1,$f31,$f19,$f25
-	maddu	$s3,$gp
-	maddu	$t8,$s2
-	mfc0	$a2,$14,1
-	mfc1	$a3,$f27
-	mfhc1	$s8,$f24
-	mfhi	$s3
-	mfhi	$sp
-	mflo	$s1
-	mov.d	$f20,$f14
-	mov.s	$f2,$f27
-	move	$s8,$a0
-	move	$t9,$a2
-	movf	$gp,$t0,$fcc7
-	movf.d	$f6,$f11,$fcc5
-	movf.s	$f23,$f5,$fcc6
-	movn	$v1,$s1,$s0
-	movn.d	$f27,$f21,$k0
-	movn.s	$f12,$f0,$s7
-	movt	$zero,$s4,$fcc5
-	movt.d	$f0,$f2,$fcc0
-	movt.s	$f30,$f2,$fcc1
-	movz	$a1,$s6,$t1
-	movz.d	$f12,$f29,$t1
-	movz.s	$f25,$f7,$v1
-	msub	$s7,$k1
-	msub.d	$f10,$f1,$f31,$f18
-	msub.s	$f12,$f19,$f10,$f16
-	msubu	$t7,$a1
-	mtc0	$t1,$29,3
-	mtc1	$s8,$f9
-	mthc1	$zero,$f16
-	mthi	$s1
-	mtlo	$sp
-	mtlo	$t9
-	mul	$s0,$s4,$at
-	mul.d	$f20,$f20,$f16
-	mul.s	$f30,$f10,$f2
-	mult	$sp,$s4
-	mult	$sp,$v0
-	multu	$gp,$k0
-	multu	$t1,$s2
-	neg.d	$f27,$f18
-	neg.s	$f1,$f15
-	nmadd.d	$f18,$f9,$f14,$f19
-	nmadd.s	$f0,$f5,$f25,$f12
-	nmsub.d	$f30,$f8,$f16,$f30
-	nmsub.s	$f1,$f24,$f19,$f4
-	nop
-	nor	$a3,$zero,$a3
-	or	$t4,$s0,$sp
-	pause                    # CHECK: pause # encoding:  [0x00,0x00,0x01,0x40]
-	rdhwr	$sp,$11
-	round.w.d	$f6,$f4
-	round.w.s	$f27,$f28
-	sb	$s6,-19857($t6)
-	sc	$t7,18904($s3)
-	sdc1	$f31,30574($t5)
-	sdc2	$20,23157($s2)
-	sdxc1	$f11,$t2($t6)
-	seb	$t9,$t7
-	seh	$v1,$t4
-	sh	$t6,-6704($t7)
-	sllv	$a3,$zero,$t1
-	slt	$s7,$t3,$k1
-	slti	$s1,$t2,9489
-	sltiu	$t9,$t9,-15531
-	sltu	$s4,$s5,$t3
-	sqrt.d	$f17,$f22
-	sqrt.s	$f0,$f1
-	srav	$s1,$s7,$sp
-	srlv	$t9,$s4,$a0
-	ssnop                    # CHECK: ssnop # encoding:  [0x00,0x00,0x00,0x40]
-	sub	$s6,$s3,$t4
-	sub.d	$f18,$f3,$f17
-	sub.s	$f23,$f22,$f22
-	subu	$sp,$s6,$s6
-	suxc1	$f12,$k1($t5)
-	sw	$ra,-10160($sp)
-	swc1	$f6,-8465($t8)
-	swc2	$25,24880($s0)
-	swl	$t7,13694($s3)
-	swr	$s1,-26590($t6)
-	swxc1	$f19,$t4($k0)
-	teqi	$s5,-17504
-	tgei	$s1,5025
-	tgeiu	$sp,-28621
-	tlti	$t6,-21059
-	tltiu	$ra,-5076
-	tnei	$t4,-29647
-	trunc.w.d	$f22,$f15
-	trunc.w.s	$f28,$f30
-	wsbh	$k1,$t1
-	xor	$s2,$a0,$s8
+        abs.d     $f7,$f25             # CHECK: encoding:
+        abs.s     $f9,$f16
+        add       $s7,$s2,$a1
+        add.d     $f1,$f7,$f29
+        add.s     $f8,$f21,$f24
+        addi      $13,$9,26322
+        addu      $9,$a0,$a2
+        and       $s7,$v0,$12
+        c.ngl.d   $f29,$f29
+        c.ngle.d  $f0,$f16
+        c.sf.d    $f30,$f0
+        c.sf.s    $f14,$f22
+        ceil.w.d  $f11,$f25
+        ceil.w.s  $f6,$f20
+        cfc1      $s1,$21
+        clo       $11,$a1
+        clz       $sp,$gp
+        ctc1      $a2,$26
+        cvt.d.s   $f22,$f28
+        cvt.d.w   $f26,$f11
+        cvt.l.d   $f24,$f15
+        cvt.l.s   $f11,$f29
+        cvt.s.d   $f26,$f8
+        cvt.s.w   $f22,$f15
+        cvt.w.d   $f20,$f14
+        cvt.w.s   $f20,$f24
+        deret
+        di        $s8
+        div       $zero,$25,$11
+        div.d     $f29,$f20,$f27
+        div.s     $f4,$f5,$f15
+        divu      $zero,$25,$15
+        ehb                            # CHECK: ehb # encoding:  [0x00,0x00,0x00,0xc0]
+        ei        $14
+        eret
+        floor.w.d $f14,$f11
+        floor.w.s $f8,$f9
+        lb        $24,-14515($10)
+        lbu       $8,30195($v1)
+        ldc1      $f11,16391($s0)
+        ldc2      $8,-21181($at)
+        ldxc1     $f8,$s7($15)
+        lh        $11,-8556($s5)
+        lhu       $s3,-22851($v0)
+        li        $at,-29773
+        li        $zero,-29889
+        ll        $v0,-7321($s2)
+        luxc1     $f19,$s6($s5)
+        lw        $8,5674($a1)
+        lwc1      $f16,10225($k0)
+        lwc2      $18,-841($a2)
+        lwl       $s4,-4231($15)
+        lwr       $zero,-19147($gp)
+        lwxc1     $f12,$s1($s8)
+        madd      $s6,$13
+        madd      $zero,$9
+        madd.d    $f18,$f19,$f26,$f20
+        madd.s    $f1,$f31,$f19,$f25
+        maddu     $s3,$gp
+        maddu     $24,$s2
+        mfc0      $a2,$14,1
+        mfc1      $a3,$f27
+        mfhc1     $s8,$f24
+        mfhi      $s3
+        mfhi      $sp
+        mflo      $s1
+        mov.d     $f20,$f14
+        mov.s     $f2,$f27
+        move      $s8,$a0
+        move      $25,$a2
+        movf      $gp,$8,$fcc7
+        movf.d    $f6,$f11,$fcc5
+        movf.s    $f23,$f5,$fcc6
+        movn      $v1,$s1,$s0
+        movn.d    $f27,$f21,$k0
+        movn.s    $f12,$f0,$s7
+        movt      $zero,$s4,$fcc5
+        movt.d    $f0,$f2,$fcc0
+        movt.s    $f30,$f2,$fcc1
+        movz      $a1,$s6,$9
+        movz.d    $f12,$f29,$9
+        movz.s    $f25,$f7,$v1
+        msub      $s7,$k1
+        msub.d    $f10,$f1,$f31,$f18
+        msub.s    $f12,$f19,$f10,$f16
+        msubu     $15,$a1
+        mtc0      $9,$29,3
+        mtc1      $s8,$f9
+        mthc1     $zero,$f16
+        mthi      $s1
+        mtlo      $sp
+        mtlo      $25
+        mul       $s0,$s4,$at
+        mul.d     $f20,$f20,$f16
+        mul.s     $f30,$f10,$f2
+        mult      $sp,$s4
+        mult      $sp,$v0
+        multu     $gp,$k0
+        multu     $9,$s2
+        negu      $2                   # CHECK: negu $2, $2            # encoding: [0x00,0x02,0x10,0x23]
+        negu      $2,$3                # CHECK: negu $2, $3            # encoding: [0x00,0x03,0x10,0x23]
+        neg.d     $f27,$f18
+        neg.s     $f1,$f15
+        nmadd.d   $f18,$f9,$f14,$f19
+        nmadd.s   $f0,$f5,$f25,$f12
+        nmsub.d   $f30,$f8,$f16,$f30
+        nmsub.s   $f1,$f24,$f19,$f4
+        nop
+        nor       $a3,$zero,$a3
+        or        $12,$s0,$sp
+        pause                          # CHECK: pause # encoding:  [0x00,0x00,0x01,0x40]
+        rdhwr     $sp,$11              
+        rotr      $1,15                # CHECK: rotr $1, $1, 15         # encoding: [0x00,0x21,0x0b,0xc2]
+        rotr      $1,$14,15            # CHECK: rotr $1, $14, 15        # encoding: [0x00,0x2e,0x0b,0xc2]
+        rotrv     $1,$14,$15           # CHECK: rotrv $1, $14, $15      # encoding: [0x01,0xee,0x08,0x46]
+        round.w.d $f6,$f4
+        round.w.s $f27,$f28
+        sb        $s6,-19857($14)
+        sc        $15,18904($s3)
+        sdc1      $f31,30574($13)
+        sdc2      $20,23157($s2)
+        sdxc1     $f11,$10($14)
+        seb       $25,$15
+        seh       $v1,$12
+        sh        $14,-6704($15)
+        sll       $a3,18               # CHECK: sll $7, $7, 18         # encoding: [0x00,0x07,0x3c,0x80]
+        sll       $a3,$zero,18         # CHECK: sll $7, $zero, 18      # encoding: [0x00,0x00,0x3c,0x80]
+        sll       $a3,$zero,$9         # CHECK: sllv $7, $zero, $9     # encoding: [0x01,0x20,0x38,0x04]
+        sllv      $a3,$zero,$9         # CHECK: sllv $7, $zero, $9     # encoding: [0x01,0x20,0x38,0x04]
+        slt       $s7,$11,$k1          # CHECK: slt $23, $11, $27      # encoding: [0x01,0x7b,0xb8,0x2a]
+        slti      $s1,$10,9489         # CHECK: slti $17, $10, 9489    # encoding: [0x29,0x51,0x25,0x11]
+        sltiu     $25,$25,-15531       # CHECK: sltiu $25, $25, -15531 # encoding: [0x2f,0x39,0xc3,0x55]
+        sltu      $s4,$s5,$11          # CHECK: sltu  $20, $21, $11    # encoding: [0x02,0xab,0xa0,0x2b]
+        sltu      $24,$25,-15531       # CHECK: sltiu $24, $25, -15531 # encoding: [0x2f,0x38,0xc3,0x55]
+        sqrt.d    $f17,$f22
+        sqrt.s    $f0,$f1
+        sra       $s1,15               # CHECK: sra $17, $17, 15       # encoding: [0x00,0x11,0x8b,0xc3]
+        sra       $s1,$s7,15           # CHECK: sra $17, $23, 15       # encoding: [0x00,0x17,0x8b,0xc3]
+        sra       $s1,$s7,$sp          # CHECK: srav $17, $23, $sp     # encoding: [0x03,0xb7,0x88,0x07]
+        srav      $s1,$s7,$sp          # CHECK: srav $17, $23, $sp     # encoding: [0x03,0xb7,0x88,0x07]
+        srl       $2,7                 # CHECK: srl $2, $2, 7          # encoding: [0x00,0x02,0x11,0xc2]
+        srl       $2,$2,7              # CHECK: srl $2, $2, 7          # encoding: [0x00,0x02,0x11,0xc2]
+        srl       $25,$s4,$a0          # CHECK: srlv $25, $20, $4      # encoding: [0x00,0x94,0xc8,0x06]
+        srlv      $25,$s4,$a0          # CHECK: srlv $25, $20, $4      # encoding: [0x00,0x94,0xc8,0x06]
+        ssnop                          # CHECK: ssnop                  # encoding: [0x00,0x00,0x00,0x40]
+        sub       $s6,$s3,$12
+        sub.d     $f18,$f3,$f17
+        sub.s     $f23,$f22,$f22
+        subu      $sp,$s6,$s6
+        suxc1     $f12,$k1($13)
+        sw        $ra,-10160($sp)
+        swc1      $f6,-8465($24)
+        swc2      $25,24880($s0)
+        swl       $15,13694($s3)
+        swr       $s1,-26590($14)
+        swxc1     $f19,$12($k0)
+        teqi      $s5,-17504
+        tgei      $s1,5025
+        tgeiu     $sp,-28621
+        tlbp                           # CHECK: tlbp                   # encoding: [0x42,0x00,0x00,0x08]
+        tlbr                           # CHECK: tlbr                   # encoding: [0x42,0x00,0x00,0x01]
+        tlbwi                          # CHECK: tlbwi                  # encoding: [0x42,0x00,0x00,0x02]
+        tlbwr                          # CHECK: tlbwr                  # encoding: [0x42,0x00,0x00,0x06]
+        tlti      $14,-21059
+        tltiu     $ra,-5076
+        tnei      $12,-29647
+        trunc.w.d $f22,$f15
+        trunc.w.s $f28,$f30
+        wsbh      $k1,$9
+        xor       $s2,$a0,$s8
diff --git a/test/MC/Mips/mips32r6/invalid-mips1-wrong-error.s b/test/MC/Mips/mips32r6/invalid-mips1-wrong-error.s
new file mode 100644
index 0000000..aee068a
--- /dev/null
+++ b/test/MC/Mips/mips32r6/invalid-mips1-wrong-error.s
@@ -0,0 +1,15 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r6 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        lwl       $s4,-4231($15)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lwr       $zero,-19147($gp)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        swl       $15,13694($s3)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        swr       $s1,-26590($14)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lwle      $s4,-4231($15)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        lwre      $zero,-19147($gp)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        swle      $15,13694($s3)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        swre      $s1,-26590($14)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
diff --git a/test/MC/Mips/mips32r6/invalid-mips1.s b/test/MC/Mips/mips32r6/invalid-mips1.s
new file mode 100644
index 0000000..aa7d407
--- /dev/null
+++ b/test/MC/Mips/mips32r6/invalid-mips1.s
@@ -0,0 +1,8 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r6 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        addi      $13,$9,26322        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips32r6/invalid-mips2-wrong-error.s b/test/MC/Mips/mips32r6/invalid-mips2-wrong-error.s
new file mode 100644
index 0000000..b799c8e
--- /dev/null
+++ b/test/MC/Mips/mips32r6/invalid-mips2-wrong-error.s
@@ -0,0 +1,20 @@
+# Instructions that are invalid and are correctly rejected but use the wrong
+# error message at the moment.
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r6 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+        .set noat
+        beql $1,$2,4            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bgezall $3,8            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bgezl $3,8              # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bgtzl $4,16             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        blezl $3,8              # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bltzall $3,8            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bltzl $4,16             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bnel $1,$2,4            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc1tl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc1fl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc2tl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc2fl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
diff --git a/test/MC/Mips/mips32r6/invalid-mips2.s b/test/MC/Mips/mips32r6/invalid-mips2.s
new file mode 100644
index 0000000..0638e78
--- /dev/null
+++ b/test/MC/Mips/mips32r6/invalid-mips2.s
@@ -0,0 +1,14 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r6 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        addi      $13,$9,26322        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        teqi      $s5,-17504          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tgei      $s1,5025            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tgeiu     $sp,-28621          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tlti      $14,-21059          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tltiu     $ra,-5076           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tnei      $12,-29647          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips32r6/invalid-mips32-wrong-error.s b/test/MC/Mips/mips32r6/invalid-mips32-wrong-error.s
new file mode 100644
index 0000000..e416a20
--- /dev/null
+++ b/test/MC/Mips/mips32r6/invalid-mips32-wrong-error.s
@@ -0,0 +1,16 @@
+# Instructions that are invalid and are correctly rejected but use the wrong
+# error message at the moment.
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r6 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+        .set noat
+        bc1tl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc1tl $fcc1,4           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc1fl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc1fl $fcc1,4           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc2tl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc2tl $fcc1,4           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc2fl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc2fl $fcc1,4           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
diff --git a/test/MC/Mips/mips32r6/relocations.s b/test/MC/Mips/mips32r6/relocations.s
new file mode 100644
index 0000000..4532e42
--- /dev/null
+++ b/test/MC/Mips/mips32r6/relocations.s
@@ -0,0 +1,55 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r6 \
+# RUN:   | FileCheck %s -check-prefix=CHECK-FIXUP
+# RUN: llvm-mc %s -filetype=obj -triple=mips-unknown-linux -mcpu=mips32r6 \
+# RUN:   | llvm-readobj -r | FileCheck %s -check-prefix=CHECK-ELF
+#------------------------------------------------------------------------------
+# Check that the assembler can handle the documented syntax for fixups.
+#------------------------------------------------------------------------------
+# CHECK-FIXUP: beqc $5, $6, bar # encoding: [0x20,0xa6,A,A]
+# CHECK-FIXUP:                  #   fixup A - offset: 0,
+# CHECK-FIXUP:                      value: bar, kind: fixup_Mips_PC16
+# CHECK-FIXUP: bnec $5, $6, bar # encoding: [0x60,0xa6,A,A]
+# CHECK-FIXUP:                  #   fixup A - offset: 0,
+# CHECK-FIXUP:                      value: bar, kind: fixup_Mips_PC16
+# CHECK-FIXUP: beqzc $9, bar    # encoding: [0xd9,0b001AAAAA,A,A]
+# CHECK-FIXUP:                  #   fixup A - offset: 0,
+# CHECK-FIXUP:                      value: bar, kind: fixup_MIPS_PC21_S2
+# CHECK-FIXUP: bnezc $9, bar    # encoding: [0xf9,0b001AAAAA,A,A]
+# CHECK-FIXUP:                  #   fixup A - offset: 0,
+# CHECK-FIXUP:                      value: bar, kind: fixup_MIPS_PC21_S2
+# CHECK-FIXUP: balc  bar        # encoding: [0b111010AA,A,A,A]
+# CHECK-FIXUP:                  #   fixup A - offset: 0,
+# CHECK-FIXUP:                      value: bar, kind: fixup_MIPS_PC26_S2
+# CHECK-FIXUP: bc    bar        # encoding: [0b110010AA,A,A,A]
+# CHECK-FIXUP:                  #   fixup A - offset: 0,
+# CHECK-FIXUP:                      value: bar, kind: fixup_MIPS_PC26_S2
+# CHECK-FIXUP: aluipc $2, %pcrel_hi(bar)    # encoding: [0xec,0x5f,A,A]
+# CHECK-FIXUP:                              #   fixup A - offset: 0,
+# CHECK-FIXUP:                                  value: bar@PCREL_HI16,
+# CHECK-FIXUP:                                  kind: fixup_MIPS_PCHI16
+# CHECK-FIXUP: addiu $2, $2, %pcrel_lo(bar) # encoding: [0x24,0x42,A,A]
+# CHECK-FIXUP:                              #   fixup A - offset: 0,
+# CHECK-FIXUP:                                  value: bar@PCREL_LO16,
+# CHECK-FIXUP:                                  kind: fixup_MIPS_PCLO16
+#------------------------------------------------------------------------------
+# Check that the appropriate relocations were created.
+#------------------------------------------------------------------------------
+# CHECK-ELF: Relocations [
+# CHECK-ELF:     0x0 R_MIPS_PC16 bar 0x0
+# CHECK-ELF:     0x4 R_MIPS_PC16 bar 0x0
+# CHECK-ELF:     0x8 R_MIPS_PC21_S2 bar 0x0
+# CHECK-ELF:     0xC R_MIPS_PC21_S2 bar 0x0
+# CHECK-ELF:     0x10 R_MIPS_PC26_S2 bar 0x0
+# CHECK-ELF:     0x14 R_MIPS_PC26_S2 bar 0x0
+# CHECK-ELF:     0x18 R_MIPS_PCHI16 bar 0x0
+# CHECK-ELF:     0x1C R_MIPS_PCLO16 bar 0x0
+# CHECK-ELF: ]
+
+  beqc  $5, $6, bar
+  bnec  $5, $6, bar
+  beqzc $9, bar
+  bnezc $9, bar
+  balc  bar
+  bc    bar
+  aluipc $2, %pcrel_hi(bar)
+  addiu  $2, $2, %pcrel_lo(bar)
diff --git a/test/MC/Mips/mips32r6/valid-xfail.s b/test/MC/Mips/mips32r6/valid-xfail.s
new file mode 100644
index 0000000..0c911d7
--- /dev/null
+++ b/test/MC/Mips/mips32r6/valid-xfail.s
@@ -0,0 +1,19 @@
+# Instructions that should be valid but currently fail for known reasons (e.g.
+# they aren't implemented yet).
+# This test is set up to XPASS if any instruction generates an encoding.
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r6 | not FileCheck %s
+# CHECK-NOT: encoding
+# XFAIL: *
+
+        .set noat
+        bovc     $0, $2, 4       # TODO: bovc $0, $2, 4      # encoding: [0x20,0x40,0x00,0x01]
+        bovc     $2, $4, 4       # TODO: bovc $2, $4, 4      # encoding: [0x20,0x82,0x00,0x01]
+        bnvc     $0, $2, 4       # TODO: bnvc $0, $2, 4      # encoding: [0x60,0x40,0x00,0x01]
+        bnvc     $2, $4, 4       # TODO: bnvc $2, $4, 4      # encoding: [0x60,0x82,0x00,0x01]
+        beqc    $0, $6, 256      # TODO: beqc $6, $zero, 256 # encoding: [0x20,0xc0,0x00,0x40]
+        beqc    $5, $0, 256      # TODO: beqc $5, $zero, 256 # encoding: [0x20,0xa0,0x00,0x40]
+        beqc    $6, $5, 256      # TODO: beqc $5, $6, 256    # encoding: [0x20,0xa6,0x00,0x40]
+        bnec    $0, $6, 256      # TODO: bnec $6, $zero, 256 # encoding: [0x60,0xc0,0x00,0x40]
+        bnec    $5, $0, 256      # TODO: bnec $5, $zero, 256 # encoding: [0x60,0xa0,0x00,0x40]
+        bnec    $6, $5, 256      # TODO: bnec $5, $6, 256    # encoding: [0x60,0xa6,0x00,0x40]
diff --git a/test/MC/Mips/mips32r6/valid.s b/test/MC/Mips/mips32r6/valid.s
new file mode 100644
index 0000000..5b4b928
--- /dev/null
+++ b/test/MC/Mips/mips32r6/valid.s
@@ -0,0 +1,126 @@
+# Instructions that are valid
+#
+# Branches have some unusual encoding rules in MIPS32r6 so we need to test:
+#   rs == 0
+#   rs != 0
+#   rt == 0
+#   rt != 0
+#   rs < rt
+#   rs == rt
+#   rs > rt
+# appropriately for each branch instruction
+#
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r6 | FileCheck %s
+
+        .set noat
+        # FIXME: Add the instructions carried forward from older ISA's
+        addiupc $4, 100          # CHECK: addiupc $4, 100     # encoding: [0xec,0x80,0x00,0x19]
+        align   $4, $2, $3, 2    # CHECK: align $4, $2, $3, 2 # encoding: [0x7c,0x43,0x22,0xa0]
+        aluipc  $3, 56           # CHECK: aluipc $3, 56       # encoding: [0xec,0x7f,0x00,0x38]
+        aui     $3,$2,-23        # CHECK: aui $3, $2, -23     # encoding: [0x3c,0x62,0xff,0xe9]
+        auipc   $3, -1           # CHECK: auipc $3, -1        # encoding: [0xec,0x7e,0xff,0xff]
+        balc 14572256            # CHECK: balc 14572256       # encoding: [0xe8,0x37,0x96,0xb8]
+        bc 14572256              # CHECK: bc 14572256         # encoding: [0xc8,0x37,0x96,0xb8]
+        bc1eqz  $f0,4            # CHECK: bc1eqz $f0, 4       # encoding: [0x45,0x20,0x00,0x01]
+        bc1eqz  $f31,4           # CHECK: bc1eqz $f31, 4      # encoding: [0x45,0x3f,0x00,0x01]
+        bc1nez  $f0,4            # CHECK: bc1nez $f0, 4       # encoding: [0x45,0xa0,0x00,0x01]
+        bc1nez  $f31,4           # CHECK: bc1nez $f31, 4      # encoding: [0x45,0xbf,0x00,0x01]
+        bc2eqz  $0,8             # CHECK: bc2eqz $0, 8        # encoding: [0x49,0x20,0x00,0x02]
+        bc2eqz  $31,8            # CHECK: bc2eqz $31, 8       # encoding: [0x49,0x3f,0x00,0x02]
+        bc2nez  $0,8             # CHECK: bc2nez $0, 8        # encoding: [0x49,0xa0,0x00,0x02]
+        bc2nez  $31,8            # CHECK: bc2nez $31, 8       # encoding: [0x49,0xbf,0x00,0x02]
+        # beqc requires rs < rt && rs != 0 but we also accept when this is not true. See also bovc
+        # FIXME: Testcases are in valid-xfail.s at the moment
+        beqc $5, $6, 256         # CHECK: beqc $5, $6, 256    # encoding: [0x20,0xa6,0x00,0x40]
+        beqzalc $2, 1332         # CHECK: beqzalc $2, 1332    # encoding: [0x20,0x02,0x01,0x4d]
+        # bnec requires rs < rt && rs != 0 but we accept when this is not true. See also bnvc
+        # FIXME: Testcases are in valid-xfail.s at the moment
+        bnec $5, $6, 256         # CHECK: bnec $5, $6, 256    # encoding: [0x60,0xa6,0x00,0x40]
+        bnezalc $2, 1332         # CHECK: bnezalc $2, 1332    # encoding: [0x60,0x02,0x01,0x4d]
+        beqzc $5, 72256          # CHECK: beqzc $5, 72256     # encoding: [0xd8,0xa0,0x46,0x90]
+        bgezalc $2, 1332         # CHECK: bgezalc $2, 1332    # encoding: [0x18,0x42,0x01,0x4d]
+        bnezc $5, 72256          # CHECK: bnezc $5, 72256     # encoding: [0xf8,0xa0,0x46,0x90]
+        bltzc $5, 256            # CHECK: bltzc $5, 256       # encoding: [0x5c,0xa5,0x00,0x40]
+        bgezc $5, 256            # CHECK: bgezc $5, 256       # encoding: [0x58,0xa5,0x00,0x40]
+        bgtzalc $2, 1332         # CHECK: bgtzalc $2, 1332    # encoding: [0x1c,0x02,0x01,0x4d]
+        blezc $5, 256            # CHECK: blezc $5, 256       # encoding: [0x58,0x05,0x00,0x40]
+        bltzalc $2, 1332         # CHECK: bltzalc $2, 1332    # encoding: [0x1c,0x42,0x01,0x4d]
+        bgtzc $5, 256            # CHECK: bgtzc $5, 256       # encoding: [0x5c,0x05,0x00,0x40]
+        bitswap $4, $2           # CHECK: bitswap $4, $2      # encoding: [0x7c,0x02,0x20,0x20]
+        blezalc $2, 1332         # CHECK: blezalc $2, 1332    # encoding: [0x18,0x02,0x01,0x4d]
+        # bnvc requires that rs >= rt but we accept both. See also bnec
+        bnvc     $0, $0, 4       # CHECK: bnvc $zero, $zero, 4 # encoding: [0x60,0x00,0x00,0x01]
+        bnvc     $2, $0, 4       # CHECK: bnvc $2, $zero, 4    # encoding: [0x60,0x40,0x00,0x01]
+        bnvc     $4, $2, 4       # CHECK: bnvc $4, $2, 4       # encoding: [0x60,0x82,0x00,0x01]
+        # bovc requires that rs >= rt but we accept both. See also beqc
+        bovc     $0, $0, 4       # CHECK: bovc $zero, $zero, 4 # encoding: [0x20,0x00,0x00,0x01]
+        bovc     $2, $0, 4       # CHECK: bovc $2, $zero, 4    # encoding: [0x20,0x40,0x00,0x01]
+        bovc     $4, $2, 4       # CHECK: bovc $4, $2, 4       # encoding: [0x20,0x82,0x00,0x01]
+        cmp.f.s    $f2,$f3,$f4      # CHECK: cmp.f.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x80]
+        cmp.f.d    $f2,$f3,$f4      # CHECK: cmp.f.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x80]
+        cmp.un.s   $f2,$f3,$f4      # CHECK: cmp.un.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x81]
+        cmp.un.d   $f2,$f3,$f4      # CHECK: cmp.un.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x81]
+        cmp.eq.s   $f2,$f3,$f4      # CHECK: cmp.eq.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x82]
+        cmp.eq.d   $f2,$f3,$f4      # CHECK: cmp.eq.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x82]
+        cmp.ueq.s  $f2,$f3,$f4      # CHECK: cmp.ueq.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x83]
+        cmp.ueq.d  $f2,$f3,$f4      # CHECK: cmp.ueq.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x83]
+        cmp.olt.s  $f2,$f3,$f4      # CHECK: cmp.olt.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x84]
+        cmp.olt.d  $f2,$f3,$f4      # CHECK: cmp.olt.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x84]
+        cmp.ult.s  $f2,$f3,$f4      # CHECK: cmp.ult.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x85]
+        cmp.ult.d  $f2,$f3,$f4      # CHECK: cmp.ult.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x85]
+        cmp.ole.s  $f2,$f3,$f4      # CHECK: cmp.ole.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x86]
+        cmp.ole.d  $f2,$f3,$f4      # CHECK: cmp.ole.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x86]
+        cmp.ule.s  $f2,$f3,$f4      # CHECK: cmp.ule.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x87]
+        cmp.ule.d  $f2,$f3,$f4      # CHECK: cmp.ule.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x87]
+        cmp.sf.s   $f2,$f3,$f4      # CHECK: cmp.sf.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x88]
+        cmp.sf.d   $f2,$f3,$f4      # CHECK: cmp.sf.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x88]
+        cmp.ngle.s $f2,$f3,$f4      # CHECK: cmp.ngle.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x89]
+        cmp.ngle.d $f2,$f3,$f4      # CHECK: cmp.ngle.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x89]
+        cmp.seq.s  $f2,$f3,$f4      # CHECK: cmp.seq.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x8a]
+        cmp.seq.d  $f2,$f3,$f4      # CHECK: cmp.seq.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x8a]
+        cmp.ngl.s  $f2,$f3,$f4      # CHECK: cmp.ngl.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x8b]
+        cmp.ngl.d  $f2,$f3,$f4      # CHECK: cmp.ngl.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x8b]
+        cmp.lt.s   $f2,$f3,$f4      # CHECK: cmp.lt.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x8c]
+        cmp.lt.d   $f2,$f3,$f4      # CHECK: cmp.lt.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x8c]
+        cmp.nge.s  $f2,$f3,$f4      # CHECK: cmp.nge.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x8d]
+        cmp.nge.d  $f2,$f3,$f4      # CHECK: cmp.nge.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x8d]
+        cmp.le.s   $f2,$f3,$f4      # CHECK: cmp.le.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x8e]
+        cmp.le.d   $f2,$f3,$f4      # CHECK: cmp.le.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x8e]
+        cmp.ngt.s  $f2,$f3,$f4      # CHECK: cmp.ngt.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x8f]
+        cmp.ngt.d  $f2,$f3,$f4      # CHECK: cmp.ngt.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x8f]
+        div     $2,$3,$4         # CHECK: div $2, $3, $4   # encoding: [0x00,0x64,0x10,0x9a]
+        divu    $2,$3,$4         # CHECK: divu $2, $3, $4  # encoding: [0x00,0x64,0x10,0x9b]
+        jialc   $5, 256          # CHECK: jialc $5, 256    # encoding: [0xf8,0x05,0x01,0x00]
+        jic     $5, 256          # CHECK: jic $5, 256      # encoding: [0xd8,0x05,0x01,0x00]
+        lwpc    $2,268           # CHECK: lwpc $2, 268     # encoding: [0xec,0x48,0x00,0x43]
+        lwupc   $2,268           # CHECK: lwupc $2, 268    # encoding: [0xec,0x50,0x00,0x43]
+        mod     $2,$3,$4         # CHECK: mod $2, $3, $4   # encoding: [0x00,0x64,0x10,0xda]
+        modu    $2,$3,$4         # CHECK: modu $2, $3, $4  # encoding: [0x00,0x64,0x10,0xdb]
+#        mul     $2,$3,$4         # CHECK-TODO: mul $2, $3, $4   # encoding: [0x00,0x64,0x10,0x98]
+        muh     $2,$3,$4         # CHECK: muh $2, $3, $4   # encoding: [0x00,0x64,0x10,0xd8]
+        mulu    $2,$3,$4         # CHECK: mulu $2, $3, $4  # encoding: [0x00,0x64,0x10,0x99]
+        muhu    $2,$3,$4         # CHECK: muhu $2, $3, $4  # encoding: [0x00,0x64,0x10,0xd9]
+        maddf.s $f2,$f3,$f4      # CHECK: maddf.s $f2, $f3, $f4  # encoding: [0x46,0x04,0x18,0x98]
+        maddf.d $f2,$f3,$f4      # CHECK: maddf.d $f2, $f3, $f4  # encoding: [0x46,0x24,0x18,0x98]
+        msubf.s $f2,$f3,$f4      # CHECK: msubf.s $f2, $f3, $f4  # encoding: [0x46,0x04,0x18,0x99]
+        msubf.d $f2,$f3,$f4      # CHECK: msubf.d $f2, $f3, $f4  # encoding: [0x46,0x24,0x18,0x99]
+        sel.d   $f0,$f1,$f2      # CHECK: sel.d $f0, $f1, $f2 # encoding: [0x46,0x22,0x08,0x10]
+        sel.s   $f0,$f1,$f2      # CHECK: sel.s $f0, $f1, $f2 # encoding: [0x46,0x02,0x08,0x10]
+        seleqz  $2,$3,$4         # CHECK: seleqz $2, $3, $4 # encoding: [0x00,0x64,0x10,0x35]
+        selnez  $2,$3,$4         # CHECK: selnez $2, $3, $4 # encoding: [0x00,0x64,0x10,0x37]
+        max.s   $f0, $f2, $f4    # CHECK: max.s $f0, $f2, $f4 # encoding: [0x46,0x04,0x10,0x1d]
+        max.d   $f0, $f2, $f4    # CHECK: max.d $f0, $f2, $f4 # encoding: [0x46,0x24,0x10,0x1d]
+        min.s   $f0, $f2, $f4    # CHECK: min.s $f0, $f2, $f4 # encoding: [0x46,0x04,0x10,0x1c]
+        min.d   $f0, $f2, $f4    # CHECK: min.d $f0, $f2, $f4 # encoding: [0x46,0x24,0x10,0x1c]
+        maxa.s  $f0, $f2, $f4    # CHECK: maxa.s $f0, $f2, $f4 # encoding: [0x46,0x04,0x10,0x1f]
+        maxa.d  $f0, $f2, $f4    # CHECK: maxa.d $f0, $f2, $f4 # encoding: [0x46,0x24,0x10,0x1f]
+        mina.s  $f0, $f2, $f4    # CHECK: mina.s $f0, $f2, $f4 # encoding: [0x46,0x04,0x10,0x1e]
+        mina.d  $f0, $f2, $f4    # CHECK: mina.d $f0, $f2, $f4 # encoding: [0x46,0x24,0x10,0x1e]
+        seleqz.s $f0, $f2, $f4   # CHECK: seleqz.s $f0, $f2, $f4 # encoding: [0x46,0x04,0x10,0x14]
+        seleqz.d $f0, $f2, $f4   # CHECK: seleqz.d $f0, $f2, $f4 # encoding: [0x46,0x24,0x10,0x14]
+        selnez.s $f0, $f2, $f4   # CHECK: selnez.s $f0, $f2, $f4 # encoding: [0x46,0x04,0x10,0x17]
+        selnez.d $f0, $f2, $f4   # CHECK: selnez.d $f0, $f2, $f4 # encoding: [0x46,0x24,0x10,0x17]
+        rint.s $f2, $f4          # CHECK: rint.s $f2, $f4        # encoding: [0x46,0x00,0x20,0x9a]
+        rint.d $f2, $f4          # CHECK: rint.d $f2, $f4        # encoding: [0x46,0x20,0x20,0x9a]
+        class.s $f2, $f4         # CHECK: class.s $f2, $f4       # encoding: [0x46,0x00,0x20,0x9b]
+        class.d $f2, $f4         # CHECK: class.d $f2, $f4       # encoding: [0x46,0x20,0x20,0x9b]
diff --git a/test/MC/Mips/mips4/invalid-mips5-wrong-error.s b/test/MC/Mips/mips4/invalid-mips5-wrong-error.s
new file mode 100644
index 0000000..c6c8968
--- /dev/null
+++ b/test/MC/Mips/mips4/invalid-mips5-wrong-error.s
@@ -0,0 +1,46 @@
+# Instructions that are invalid and are correctly rejected but use the wrong
+# error message at the moment.
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips4 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        abs.ps    $f22,$f8            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        add.ps    $f25,$f27,$f13      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        alnv.ps   $f12,$f18,$f30,$t0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.eq.ps   $fcc5,$f0,$f9       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.f.ps    $fcc6,$f11,$f11     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.le.ps   $fcc1,$f7,$f20      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.lt.ps   $f19,$f5            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.nge.ps  $f1,$f26            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.ngl.ps  $f21,$f30           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.ngle.ps $fcc7,$f12,$f20     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.ngt.ps  $fcc5,$f30,$f6      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.ole.ps  $fcc7,$f21,$f8      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.olt.ps  $fcc3,$f7,$f16      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.seq.ps  $fcc6,$f31,$f14     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.sf.ps   $fcc6,$f4,$f6       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.ueq.ps  $fcc1,$f5,$f29      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.ule.ps  $fcc6,$f17,$f3      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.ult.ps  $fcc7,$f14,$f0      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.un.ps   $fcc4,$f2,$f26      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        cvt.ps.s  $f3,$f18,$f19       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        cvt.s.pl  $f30,$f1            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        cvt.s.pu  $f14,$f25           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        madd.ps   $f22,$f3,$f14,$f3   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        mov.ps    $f22,$f17           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        movf.ps   $f10,$f28,$fcc6     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        movn.ps   $f31,$f31,$s3       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        movt.ps   $f20,$f25,$fcc2     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        movz.ps   $f18,$f17,$ra       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        msub.ps   $f12,$f14,$f29,$f17 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        mul.ps    $f14,$f0,$f16       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        neg.ps    $f19,$f13           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        nmadd.ps  $f27,$f4,$f9,$f25   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        nmsub.ps  $f6,$f12,$f14,$f17  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        pll.ps    $f25,$f9,$f30       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        plu.ps    $f1,$f26,$f29       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        pul.ps    $f9,$f30,$f26       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        puu.ps    $f24,$f9,$f2        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        sub.ps    $f5,$f14,$f26       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
diff --git a/test/MC/Mips/mips4/invalid-mips5.s b/test/MC/Mips/mips4/invalid-mips5.s
new file mode 100644
index 0000000..8c0db00
--- /dev/null
+++ b/test/MC/Mips/mips4/invalid-mips5.s
@@ -0,0 +1,9 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips4 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        luxc1     $f19,$s6($s5)       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        suxc1     $f12,$k1($t1)       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips4/invalid-mips64-xfail.s b/test/MC/Mips/mips4/invalid-mips64-xfail.s
deleted file mode 100644
index d8ebcd3..0000000
--- a/test/MC/Mips/mips4/invalid-mips64-xfail.s
+++ /dev/null
@@ -1,22 +0,0 @@
-# Instructions that are supposed to be invalid but currently aren't
-# This test will XPASS if any insn stops assembling.
-#
-# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips4 \
-# RUN:     2> %t1
-# RUN: not FileCheck %s < %t1
-# XFAIL: *
-
-# CHECK-NOT: error
-        .set noat
-	deret
-	luxc1	$f19,$s6($s5)
-	madd	$s6,$t5
-	madd	$zero,$t1
-	maddu	$s3,$gp
-	maddu	$t8,$s2
-	mfc0	$a2,$14,1
-	msub	$s7,$k1
-	msubu	$t7,$a1
-	mtc0	$t1,$29,3
-	mul	$s0,$s4,$at
-	suxc1	$f12,$k1($t5)
diff --git a/test/MC/Mips/mips4/invalid-mips64.s b/test/MC/Mips/mips4/invalid-mips64.s
index e0b69f2..c6245cc 100644
--- a/test/MC/Mips/mips4/invalid-mips64.s
+++ b/test/MC/Mips/mips4/invalid-mips64.s
@@ -6,7 +6,19 @@
 # RUN: FileCheck %s < %t1
 
         .set noat
-	clo	$t3,$a1       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-	clz	$sp,$gp       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-	dclo	$s2,$a2       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-	dclz	$s0,$t9       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        clo     $t3,$a1       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        clz     $sp,$gp       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dclo    $s2,$a2       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dclz    $s0,$t9       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        deret                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        luxc1   $f19,$s6($s5) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        madd    $s6,$t5       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        madd    $zero,$t1     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        maddu   $s3,$gp       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        maddu   $t8,$s2       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mfc0    $a2,$14,1     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        msub    $s7,$k1       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        msubu   $t7,$a1       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mtc0    $t1,$29,3     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mul     $s0,$s4,$at   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        suxc1   $f12,$k1($t5) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips4/invalid-mips64r2-xfail.s b/test/MC/Mips/mips4/invalid-mips64r2-xfail.s
index 63edb60..a5581fd 100644
--- a/test/MC/Mips/mips4/invalid-mips64r2-xfail.s
+++ b/test/MC/Mips/mips4/invalid-mips64r2-xfail.s
@@ -8,20 +8,4 @@
 
 # CHECK-NOT: error
         .set noat
-        deret
-        di      $s8
-        ei      $t6
-        luxc1   $f19,$s6($s5)
-        madd    $s6,$t5
-        madd    $zero,$t1
-        maddu   $s3,$gp
-        maddu   $t8,$s2
-        mfc0    $a2,$14,1
-        mfhc1   $s8,$f24
-        msub    $s7,$k1
-        msubu   $t7,$a1
-        mtc0    $t1,$29,3
-        mthc1   $zero,$f16
-        mul     $s0,$s4,$at
         rdhwr   $sp,$11
-        suxc1   $f12,$k1($t5)
diff --git a/test/MC/Mips/mips4/invalid-mips64r2.s b/test/MC/Mips/mips4/invalid-mips64r2.s
index ed2dff8..b259706 100644
--- a/test/MC/Mips/mips4/invalid-mips64r2.s
+++ b/test/MC/Mips/mips4/invalid-mips64r2.s
@@ -1,22 +1,37 @@
 # Instructions that are invalid
 #
-# FIXME: This test should be moved to the mips5 directory when mips5 is supported
 # RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips4 \
 # RUN:     2>%t1
 # RUN: FileCheck %s < %t1
 
         .set noat
-        clo	$t3,$a1             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        clz	$sp,$gp             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        dclo	$s2,$a2             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        dclz	$s0,$t9             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        clo     $t3,$a1             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        clz     $sp,$gp             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dclo    $s2,$a2             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dclz    $s0,$t9             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        deret                       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        di      $s8                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         dsbh    $v1,$t6             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         dshd    $v0,$sp             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ei      $t6                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        luxc1   $f19,$s6($s5)       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        madd    $s6,$t5             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        madd    $zero,$t1           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         madd.s  $f1,$f31,$f19,$f25  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        maddu   $s3,$gp             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        maddu   $t8,$s2             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mfc0    $a2,$14,1           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mfhc1   $s8,$f24            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        msub    $s7,$k1             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         msub.s  $f12,$f19,$f10,$f16 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        msubu   $t7,$a1             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mtc0    $t1,$29,3           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mthc1   $zero,$f16          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mul     $s0,$s4,$at         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         nmadd.s $f0,$f5,$f25,$f12   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         nmsub.s $f1,$f24,$f19,$f4   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         pause                       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         seb     $t9,$t7             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         seh     $v1,$t4             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        suxc1   $f12,$k1($t5)       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         wsbh    $k1,$t1             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips4/valid-xfail.s b/test/MC/Mips/mips4/valid-xfail.s
index baf5c53..ff6f457 100644
--- a/test/MC/Mips/mips4/valid-xfail.s
+++ b/test/MC/Mips/mips4/valid-xfail.s
@@ -2,53 +2,48 @@
 # they aren't implemented yet).
 # This test is set up to XPASS if any instruction generates an encoding.
 #
-# FIXME: Test MIPS-IV instead of MIPS64
-# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64   | not FileCheck %s
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips4 | not FileCheck %s
 # CHECK-NOT: encoding
 # XFAIL: *
 
-	.set noat
-	c.eq.d	$fcc1,$f15,$f15
-	c.eq.s	$fcc5,$f24,$f17
-	c.f.d	$fcc4,$f11,$f21
-	c.f.s	$fcc4,$f30,$f7
-	c.le.d	$fcc4,$f18,$f1
-	c.le.s	$fcc6,$f24,$f4
-	c.lt.d	$fcc3,$f9,$f3
-	c.lt.s	$fcc2,$f17,$f14
-	c.nge.d	$fcc5,$f21,$f16
-	c.nge.s	$fcc3,$f11,$f8
-	c.ngl.s	$fcc2,$f31,$f23
-	c.ngle.s	$fcc2,$f18,$f23
-	c.ngt.d	$fcc4,$f24,$f7
-	c.ngt.s	$fcc5,$f8,$f13
-	c.ole.d	$fcc2,$f16,$f31
-	c.ole.s	$fcc3,$f7,$f20
-	c.olt.d	$fcc4,$f19,$f28
-	c.olt.s	$fcc6,$f20,$f7
-	c.seq.d	$fcc4,$f31,$f7
-	c.seq.s	$fcc7,$f1,$f25
-	c.ueq.d	$fcc4,$f13,$f25
-	c.ueq.s	$fcc6,$f3,$f30
-	c.ule.d	$fcc7,$f25,$f18
-	c.ule.s	$fcc7,$f21,$f30
-	c.ult.d	$fcc6,$f6,$f17
-	c.ult.s	$fcc7,$f24,$f10
-	c.un.d	$fcc6,$f23,$f24
-	c.un.s	$fcc1,$f30,$f4
-	madd.d	$f18,$f19,$f26,$f20
-	madd.s	$f1,$f31,$f19,$f25
-	msub.d	$f10,$f1,$f31,$f18
-	msub.s	$f12,$f19,$f10,$f16
-	nmadd.d	$f18,$f9,$f14,$f19
-	nmadd.s	$f0,$f5,$f25,$f12
-	nmsub.d	$f30,$f8,$f16,$f30
-	nmsub.s	$f1,$f24,$f19,$f4
-	recip.d	$f19,$f6
-	recip.s	$f3,$f30
-	rsqrt.d	$f3,$f28
-	rsqrt.s	$f4,$f8
-	tlbp
-	tlbr
-	tlbwi
-	tlbwr
+        .set noat
+        c.eq.d          $fcc1,$f15,$f15
+        c.eq.s          $fcc5,$f24,$f17
+        c.f.d           $fcc4,$f11,$f21
+        c.f.s           $fcc4,$f30,$f7
+        c.le.d          $fcc4,$f18,$f1
+        c.le.s          $fcc6,$f24,$f4
+        c.lt.d          $fcc3,$f9,$f3
+        c.lt.s          $fcc2,$f17,$f14
+        c.nge.d         $fcc5,$f21,$f16
+        c.nge.s         $fcc3,$f11,$f8
+        c.ngl.s         $fcc2,$f31,$f23
+        c.ngle.s        $fcc2,$f18,$f23
+        c.ngt.d         $fcc4,$f24,$f7
+        c.ngt.s         $fcc5,$f8,$f13
+        c.ole.d         $fcc2,$f16,$f31
+        c.ole.s         $fcc3,$f7,$f20
+        c.olt.d         $fcc4,$f19,$f28
+        c.olt.s         $fcc6,$f20,$f7
+        c.seq.d         $fcc4,$f31,$f7
+        c.seq.s         $fcc7,$f1,$f25
+        c.ueq.d         $fcc4,$f13,$f25
+        c.ueq.s         $fcc6,$f3,$f30
+        c.ule.d         $fcc7,$f25,$f18
+        c.ule.s         $fcc7,$f21,$f30
+        c.ult.d         $fcc6,$f6,$f17
+        c.ult.s         $fcc7,$f24,$f10
+        c.un.d          $fcc6,$f23,$f24
+        c.un.s          $fcc1,$f30,$f4
+        madd.d          $f18,$f19,$f26,$f20
+        madd.s          $f1,$f31,$f19,$f25
+        msub.d          $f10,$f1,$f31,$f18
+        msub.s          $f12,$f19,$f10,$f16
+        nmadd.d         $f18,$f9,$f14,$f19
+        nmadd.s         $f0,$f5,$f25,$f12
+        nmsub.d         $f30,$f8,$f16,$f30
+        nmsub.s         $f1,$f24,$f19,$f4
+        recip.d         $f19,$f6
+        recip.s         $f3,$f30
+        rsqrt.d         $f3,$f28
+        rsqrt.s         $f4,$f8
diff --git a/test/MC/Mips/mips4/valid.s b/test/MC/Mips/mips4/valid.s
index 8dc2a23..811584e 100644
--- a/test/MC/Mips/mips4/valid.s
+++ b/test/MC/Mips/mips4/valid.s
@@ -1,161 +1,194 @@
 # Instructions that are valid
 #
-# FIXME: Test MIPS-IV instead of MIPS64
-# RUN: llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64   | FileCheck %s
+# RUN: llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips4 | FileCheck %s
 
-	.set noat
-	abs.d	$f7,$f25 # CHECK: encoding
-	abs.s	$f9,$f16
-	add	$s7,$s2,$a1
-	add.d	$f1,$f7,$f29
-	add.s	$f8,$f21,$f24
-	addi	$t5,$t1,26322
-	addu	$t1,$a0,$a2
-	and	$s7,$v0,$t4
-	c.ngl.d	$f29,$f29
-	c.ngle.d	$f0,$f16
-	c.sf.d	$f30,$f0
-	c.sf.s	$f14,$f22
-	ceil.l.d	$f1,$f3
-	ceil.l.s	$f18,$f13
-	ceil.w.d	$f11,$f25
-	ceil.w.s	$f6,$f20
-	cfc1	$s1,$21
-	ctc1	$a2,$26
-	cvt.d.l	$f4,$f16
-	cvt.d.s	$f22,$f28
-	cvt.d.w	$f26,$f11
-	cvt.l.d	$f24,$f15
-	cvt.l.s	$f11,$f29
-	cvt.s.d	$f26,$f8
-	cvt.s.l	$f15,$f30
-	cvt.s.w	$f22,$f15
-	cvt.w.d	$f20,$f14
-	cvt.w.s	$f20,$f24
-	dadd	$s3,$at,$ra
-	daddi	$sp,$s4,-27705
-	daddiu	$k0,$s6,-4586
-	ddiv	$zero,$k0,$s3
-	ddivu	$zero,$s0,$s1
-	div	$zero,$t9,$t3
-	div.d	$f29,$f20,$f27
-	div.s	$f4,$f5,$f15
-	divu	$zero,$t9,$t7
-	dmfc1	$t4,$f13
-	dmtc1	$s0,$f14
-	dmult	$s7,$t1
-	dmultu	$a1,$a2
-	dsllv	$zero,$s4,$t4
-	dsrav	$gp,$s2,$s3
-	dsrlv	$s3,$t6,$s4
-	dsub	$a3,$s6,$t0
-	dsubu	$a1,$a1,$k0
-	ehb                      # CHECK: ehb # encoding:  [0x00,0x00,0x00,0xc0]
-	eret
-	floor.l.d	$f26,$f7
-	floor.l.s	$f12,$f5
-	floor.w.d	$f14,$f11
-	floor.w.s	$f8,$f9
-	lb	$t8,-14515($t2)
-	lbu	$t0,30195($v1)
-	ld	$sp,-28645($s1)
-	ldc1	$f11,16391($s0)
-	ldc2	$8,-21181($at)
-	ldl	$t8,-4167($t8)
-	ldr	$t6,-30358($s4)
-	ldxc1	$f8,$s7($t7)
-	lh	$t3,-8556($s5)
-	lhu	$s3,-22851($v0)
-	li	$at,-29773
-	li	$zero,-29889
-	ll	$v0,-7321($s2)
-	lld	$zero,-14736($ra)
-	lw	$t0,5674($a1)
-	lwc1	$f16,10225($k0)
-	lwc2	$18,-841($a2)
-	lwl	$s4,-4231($t7)
-	lwr	$zero,-19147($gp)
-	lwu	$s3,-24086($v1)
-	lwxc1	$f12,$s1($s8)
-	mfc1	$a3,$f27
-	mfhi	$s3
-	mfhi	$sp
-	mflo	$s1
-	mov.d	$f20,$f14
-	mov.s	$f2,$f27
-	move	$a0,$a3
-	move	$s5,$a0
-	move	$s8,$a0
-	move	$t9,$a2
-	movf	$gp,$t0,$fcc7
-	movf.d	$f6,$f11,$fcc5
-	movf.s	$f23,$f5,$fcc6
-	movn	$v1,$s1,$s0
-	movn.d	$f27,$f21,$k0
-	movn.s	$f12,$f0,$s7
-	movt	$zero,$s4,$fcc5
-	movt.d	$f0,$f2,$fcc0
-	movt.s	$f30,$f2,$fcc1
-	movz	$a1,$s6,$t1
-	movz.d	$f12,$f29,$t1
-	movz.s	$f25,$f7,$v1
-	mtc1	$s8,$f9
-	mthi	$s1
-	mtlo	$sp
-	mtlo	$t9
-	mul.d	$f20,$f20,$f16
-	mul.s	$f30,$f10,$f2
-	mult	$sp,$s4
-	mult	$sp,$v0
-	multu	$gp,$k0
-	multu	$t1,$s2
-	neg.d	$f27,$f18
-	neg.s	$f1,$f15
-	nop
-	nor	$a3,$zero,$a3
-	or	$t4,$s0,$sp
-	round.l.d	$f12,$f1
-	round.l.s	$f25,$f5
-	round.w.d	$f6,$f4
-	round.w.s	$f27,$f28
-	sb	$s6,-19857($t6)
-	sc	$t7,18904($s3)
-	scd	$t7,-8243($sp)
-	sd	$t4,5835($t2)
-	sdc1	$f31,30574($t5)
-	sdc2	$20,23157($s2)
-	sdl	$a3,-20961($s8)
-	sdr	$t3,-20423($t4)
-	sdxc1	$f11,$t2($t6)
-	sh	$t6,-6704($t7)
-	sllv	$a3,$zero,$t1
-	slt	$s7,$t3,$k1
-	slti	$s1,$t2,9489
-	sltiu	$t9,$t9,-15531
-	sltu	$s4,$s5,$t3
-	sqrt.d	$f17,$f22
-	sqrt.s	$f0,$f1
-	srav	$s1,$s7,$sp
-	srlv	$t9,$s4,$a0
-	ssnop                    # CHECK: ssnop # encoding:  [0x00,0x00,0x00,0x40]
-	sub	$s6,$s3,$t4
-	sub.d	$f18,$f3,$f17
-	sub.s	$f23,$f22,$f22
-	subu	$sp,$s6,$s6
-	sw	$ra,-10160($sp)
-	swc1	$f6,-8465($t8)
-	swc2	$25,24880($s0)
-	swl	$t7,13694($s3)
-	swr	$s1,-26590($t6)
-	swxc1	$f19,$t4($k0)
-	teqi	$s5,-17504
-	tgei	$s1,5025
-	tgeiu	$sp,-28621
-	tlti	$t6,-21059
-	tltiu	$ra,-5076
-	tnei	$t4,-29647
-	trunc.l.d	$f23,$f23
-	trunc.l.s	$f28,$f31
-	trunc.w.d	$f22,$f15
-	trunc.w.s	$f28,$f30
-	xor	$s2,$a0,$s8
+        .set noat
+        abs.d     $f7,$f25             # CHECK: encoding:
+        abs.s     $f9,$f16
+        add       $s7,$s2,$a1
+        add.d     $f1,$f7,$f29
+        add.s     $f8,$f21,$f24
+        addi      $13,$9,26322
+        addu      $9,$a0,$a2
+        and       $s7,$v0,$12
+        c.ngl.d   $f29,$f29
+        c.ngle.d  $f0,$f16
+        c.sf.d    $f30,$f0
+        c.sf.s    $f14,$f22
+        ceil.l.d  $f1,$f3
+        ceil.l.s  $f18,$f13
+        ceil.w.d  $f11,$f25
+        ceil.w.s  $f6,$f20
+        cfc1      $s1,$21
+        ctc1      $a2,$26
+        cvt.d.l   $f4,$f16
+        cvt.d.s   $f22,$f28
+        cvt.d.w   $f26,$f11
+        cvt.l.d   $f24,$f15
+        cvt.l.s   $f11,$f29
+        cvt.s.d   $f26,$f8
+        cvt.s.l   $f15,$f30
+        cvt.s.w   $f22,$f15
+        cvt.w.d   $f20,$f14
+        cvt.w.s   $f20,$f24
+        dadd      $s3,$at,$ra
+        daddi     $sp,$s4,-27705
+        daddiu    $k0,$s6,-4586
+        daddu     $s3,$at,$ra
+        ddiv      $zero,$k0,$s3
+        ddivu     $zero,$s0,$s1
+        div       $zero,$25,$11
+        div.d     $f29,$f20,$f27
+        div.s     $f4,$f5,$f15
+        divu      $zero,$25,$15
+        dmfc1     $12,$f13
+        dmtc1     $s0,$f14
+        dmult     $s7,$9
+        dmultu    $a1,$a2
+        dsll      $zero,18             # CHECK: dsll $zero, $zero, 18       # encoding: [0x00,0x00,0x04,0xb8]
+        dsll      $zero,$s4,18         # CHECK: dsll $zero, $20, 18         # encoding: [0x00,0x14,0x04,0xb8]
+        dsll      $zero,$s4,$12        # CHECK: dsllv $zero, $20, $12       # encoding: [0x01,0x94,0x00,0x14]
+        dsll32    $zero,18             # CHECK: dsll32 $zero, $zero, 18     # encoding: [0x00,0x00,0x04,0xbc]
+        dsll32    $zero,$zero,18       # CHECK: dsll32 $zero, $zero, 18     # encoding: [0x00,0x00,0x04,0xbc]
+        dsllv     $zero,$s4,$12        # CHECK: dsllv $zero, $20, $12       # encoding: [0x01,0x94,0x00,0x14]
+        dsra      $gp,10               # CHECK: dsra $gp, $gp, 10           # encoding: [0x00,0x1c,0xe2,0xbb]
+        dsra      $gp,$s2,10           # CHECK: dsra $gp, $18, 10           # encoding: [0x00,0x12,0xe2,0xbb]
+        dsra      $gp,$s2,$s3          # CHECK: dsrav $gp, $18, $19         # encoding: [0x02,0x72,0xe0,0x17]
+        dsra32    $gp,10               # CHECK: dsra32 $gp, $gp, 10         # encoding: [0x00,0x1c,0xe2,0xbf]
+        dsra32    $gp,$s2,10           # CHECK: dsra32 $gp, $18, 10         # encoding: [0x00,0x12,0xe2,0xbf]
+        dsrav     $gp,$s2,$s3          # CHECK: dsrav $gp, $18, $19         # encoding: [0x02,0x72,0xe0,0x17]
+        dsrl      $s3,23               # CHECK: dsrl $19, $19, 23           # encoding: [0x00,0x13,0x9d,0xfa]
+        dsrl      $s3,$6,23            # CHECK: dsrl $19, $6, 23            # encoding: [0x00,0x06,0x9d,0xfa]
+        dsrl      $s3,$6,$s4           # CHECK: dsrlv $19, $6, $20          # encoding: [0x02,0x86,0x98,0x16]
+        dsrl32    $s3,23               # CHECK: dsrl32 $19, $19, 23         # encoding: [0x00,0x13,0x9d,0xfe]
+        dsrl32    $s3,$6,23            # CHECK: dsrl32 $19, $6, 23          # encoding: [0x00,0x06,0x9d,0xfe]
+        dsrlv     $s3,$6,$s4           # CHECK: dsrlv $19, $6, $20          # encoding: [0x02,0x86,0x98,0x16]
+        dsub      $a3,$s6,$8
+        dsubu     $a1,$a1,$k0
+        dsub      $a3,$s6,$8
+        dsubu     $a1,$a1,$k0
+        ehb                            # CHECK: ehb # encoding:  [0x00,0x00,0x00,0xc0]
+        eret
+        floor.l.d $f26,$f7
+        floor.l.s $f12,$f5
+        floor.w.d $f14,$f11
+        floor.w.s $f8,$f9
+        lb        $24,-14515($10)
+        lbu       $8,30195($v1)
+        ld        $sp,-28645($s1)
+        ldc1      $f11,16391($s0)
+        ldc2      $8,-21181($at)
+        ldl       $24,-4167($24)
+        ldr       $14,-30358($s4)
+        ldxc1     $f8,$s7($15)
+        lh        $11,-8556($s5)
+        lhu       $s3,-22851($v0)
+        li        $at,-29773
+        li        $zero,-29889
+        ll        $v0,-7321($s2)
+        lld       $zero,-14736($ra)
+        lw        $8,5674($a1)
+        lwc1      $f16,10225($k0)
+        lwc2      $18,-841($a2)
+        lwl       $s4,-4231($15)
+        lwr       $zero,-19147($gp)
+        lwu       $s3,-24086($v1)
+        lwxc1     $f12,$s1($s8)
+        mfc1      $a3,$f27
+        mfhi      $s3
+        mfhi      $sp
+        mflo      $s1
+        mov.d     $f20,$f14
+        mov.s     $f2,$f27
+        move      $a0,$a3
+        move      $s5,$a0
+        move      $s8,$a0
+        move      $25,$a2
+        movf      $gp,$8,$fcc7
+        movf.d    $f6,$f11,$fcc5
+        movf.s    $f23,$f5,$fcc6
+        movn      $v1,$s1,$s0
+        movn.d    $f27,$f21,$k0
+        movn.s    $f12,$f0,$s7
+        movt      $zero,$s4,$fcc5
+        movt.d    $f0,$f2,$fcc0
+        movt.s    $f30,$f2,$fcc1
+        movz      $a1,$s6,$9
+        movz.d    $f12,$f29,$9
+        movz.s    $f25,$f7,$v1
+        mtc1      $s8,$f9
+        mthi      $s1
+        mtlo      $sp
+        mtlo      $25
+        mul.d     $f20,$f20,$f16
+        mul.s     $f30,$f10,$f2
+        mult      $sp,$s4
+        mult      $sp,$v0
+        multu     $gp,$k0
+        multu     $9,$s2
+        negu      $2                   # CHECK: negu $2, $2            # encoding: [0x00,0x02,0x10,0x23]
+        negu      $2,$3                # CHECK: negu $2, $3            # encoding: [0x00,0x03,0x10,0x23]
+        neg.d     $f27,$f18
+        neg.s     $f1,$f15
+        nop
+        nor       $a3,$zero,$a3
+        or        $12,$s0,$sp
+        round.l.d $f12,$f1
+        round.l.s $f25,$f5
+        round.w.d $f6,$f4
+        round.w.s $f27,$f28
+        sb        $s6,-19857($14)
+        sc        $15,18904($s3)
+        scd       $15,-8243($sp)
+        sd        $12,5835($10)
+        sdc1      $f31,30574($13)
+        sdc2      $20,23157($s2)
+        sdl       $a3,-20961($s8)
+        sdr       $11,-20423($12)
+        sdxc1     $f11,$10($14)
+        sh        $14,-6704($15)
+        sll       $a3,18               # CHECK: sll $7, $7, 18         # encoding: [0x00,0x07,0x3c,0x80]
+        sll       $a3,$zero,18         # CHECK: sll $7, $zero, 18      # encoding: [0x00,0x00,0x3c,0x80]
+        sll       $a3,$zero,$9         # CHECK: sllv $7, $zero, $9     # encoding: [0x01,0x20,0x38,0x04]
+        sllv      $a3,$zero,$9         # CHECK: sllv $7, $zero, $9     # encoding: [0x01,0x20,0x38,0x04]
+        slt       $s7,$11,$k1          # CHECK: slt $23, $11, $27      # encoding: [0x01,0x7b,0xb8,0x2a]
+        slti      $s1,$10,9489         # CHECK: slti $17, $10, 9489    # encoding: [0x29,0x51,0x25,0x11]
+        sltiu     $25,$25,-15531       # CHECK: sltiu $25, $25, -15531 # encoding: [0x2f,0x39,0xc3,0x55]
+        sltu      $s4,$s5,$11          # CHECK: sltu  $20, $21, $11    # encoding: [0x02,0xab,0xa0,0x2b]
+        sltu      $24,$25,-15531       # CHECK: sltiu $24, $25, -15531 # encoding: [0x2f,0x38,0xc3,0x55]
+        sqrt.d    $f17,$f22
+        sqrt.s    $f0,$f1
+        sra       $s1,15               # CHECK: sra $17, $17, 15       # encoding: [0x00,0x11,0x8b,0xc3]
+        sra       $s1,$s7,15           # CHECK: sra $17, $23, 15       # encoding: [0x00,0x17,0x8b,0xc3]
+        sra       $s1,$s7,$sp          # CHECK: srav $17, $23, $sp     # encoding: [0x03,0xb7,0x88,0x07]
+        srav      $s1,$s7,$sp          # CHECK: srav $17, $23, $sp     # encoding: [0x03,0xb7,0x88,0x07]
+        srl       $2,7                 # CHECK: srl $2, $2, 7          # encoding: [0x00,0x02,0x11,0xc2]
+        srl       $2,$2,7              # CHECK: srl $2, $2, 7          # encoding: [0x00,0x02,0x11,0xc2]
+        srl       $25,$s4,$a0          # CHECK: srlv $25, $20, $4      # encoding: [0x00,0x94,0xc8,0x06]
+        srlv      $25,$s4,$a0          # CHECK: srlv $25, $20, $4      # encoding: [0x00,0x94,0xc8,0x06]
+        ssnop                          # CHECK: ssnop                  # encoding: [0x00,0x00,0x00,0x40]
+        sub       $s6,$s3,$12
+        sub.d     $f18,$f3,$f17
+        sub.s     $f23,$f22,$f22
+        subu      $sp,$s6,$s6
+        sw        $ra,-10160($sp)
+        swc1      $f6,-8465($24)
+        swc2      $25,24880($s0)
+        swl       $15,13694($s3)
+        swr       $s1,-26590($14)
+        swxc1     $f19,$12($k0)
+        teqi      $s5,-17504
+        tgei      $s1,5025
+        tgeiu     $sp,-28621
+        tlbp                           # CHECK: tlbp                   # encoding: [0x42,0x00,0x00,0x08]
+        tlbr                           # CHECK: tlbr                   # encoding: [0x42,0x00,0x00,0x01]
+        tlbwi                          # CHECK: tlbwi                  # encoding: [0x42,0x00,0x00,0x02]
+        tlbwr                          # CHECK: tlbwr                  # encoding: [0x42,0x00,0x00,0x06]
+        tlti      $14,-21059
+        tltiu     $ra,-5076
+        tnei      $12,-29647
+        trunc.l.d $f23,$f23
+        trunc.l.s $f28,$f31
+        trunc.w.d $f22,$f15
+        trunc.w.s $f28,$f30
+        xor       $s2,$a0,$s8
diff --git a/test/MC/Mips/mips5/invalid-mips64.s b/test/MC/Mips/mips5/invalid-mips64.s
new file mode 100644
index 0000000..19d64dc
--- /dev/null
+++ b/test/MC/Mips/mips5/invalid-mips64.s
@@ -0,0 +1,21 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips5 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        clo       $11,$a1     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        clz       $sp,$gp     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dclo      $s2,$a2     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dclz      $s0,$25     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        deret                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        madd      $s6,$13     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        madd      $zero,$9    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        maddu     $s3,$gp     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        maddu     $24,$s2     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mfc0      $a2,$14,1   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        msub      $s7,$k1     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        msubu     $15,$a1     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mtc0      $9,$29,3    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mul       $s0,$s4,$at # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips5/invalid-mips64r2-xfail.s b/test/MC/Mips/mips5/invalid-mips64r2-xfail.s
new file mode 100644
index 0000000..b2b612d
--- /dev/null
+++ b/test/MC/Mips/mips5/invalid-mips64r2-xfail.s
@@ -0,0 +1,11 @@
+# Instructions that are supposed to be invalid but currently aren't
+# This test will XPASS if any insn stops assembling.
+#
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips4 \
+# RUN:     2> %t1
+# RUN: not FileCheck %s < %t1
+# XFAIL: *
+
+# CHECK-NOT: error
+        .set noat
+	rdhwr	$sp,$11
diff --git a/test/MC/Mips/mips5/invalid-mips64r2.s b/test/MC/Mips/mips5/invalid-mips64r2.s
new file mode 100644
index 0000000..b91e520
--- /dev/null
+++ b/test/MC/Mips/mips5/invalid-mips64r2.s
@@ -0,0 +1,43 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips5 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        clo       $11,$a1             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        clz       $sp,$gp             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dclo      $s2,$a2             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dclz      $s0,$25             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        deret                         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        di        $s8                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        drotr     $1,15               # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        drotr     $1,$14,15           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        drotr32   $1,15               # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        drotr32   $1,$14,15           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        drotrv    $1,$14,$15          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsbh      $v1,$14             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dshd      $v0,$sp             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ei        $14                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        madd      $s6,$13             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        madd      $zero,$9            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        madd.s    $f1,$f31,$f19,$f25  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        maddu     $s3,$gp             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        maddu     $24,$s2             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mfc0      $a2,$14,1           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mfhc1     $s8,$f24            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        msub      $s7,$k1             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        msub.s    $f12,$f19,$f10,$f16 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        msubu     $15,$a1             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mtc0      $9,$29,3            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mthc1     $zero,$f16          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mul       $s0,$s4,$at         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        nmadd.s   $f0,$f5,$f25,$f12   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        nmsub.s   $f1,$f24,$f19,$f4   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        pause                         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        rotr      $1,15               # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        rotr      $1,$14,15           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        rotrv     $1,$14,$15          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        seb       $25,$15             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        seh       $v1,$12             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        wsbh      $k1,$9              # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips5/valid-xfail.s b/test/MC/Mips/mips5/valid-xfail.s
index 85d961b..8d1d0d7 100644
--- a/test/MC/Mips/mips5/valid-xfail.s
+++ b/test/MC/Mips/mips5/valid-xfail.s
@@ -2,91 +2,86 @@
 # they aren't implemented yet).
 # This test is set up to XPASS if any instruction generates an encoding.
 #
-# FIXME: Test MIPS-V instead of MIPS64
-# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64   | not FileCheck %s
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips5 | not FileCheck %s
 # CHECK-NOT: encoding
 # XFAIL: *
 
         .set noat
-	abs.ps	$f22,$f8
-	add.ps	$f25,$f27,$f13
-	alnv.ps	$f12,$f18,$f30,$t4
-	c.eq.d	$fcc1,$f15,$f15
-	c.eq.ps	$fcc5,$f0,$f9
-	c.eq.s	$fcc5,$f24,$f17
-	c.f.d	$fcc4,$f11,$f21
-	c.f.ps	$fcc6,$f11,$f11
-	c.f.s	$fcc4,$f30,$f7
-	c.le.d	$fcc4,$f18,$f1
-	c.le.ps	$fcc1,$f7,$f20
-	c.le.s	$fcc6,$f24,$f4
-	c.lt.d	$fcc3,$f9,$f3
-	c.lt.ps	$f19,$f5
-	c.lt.s	$fcc2,$f17,$f14
-	c.nge.d	$fcc5,$f21,$f16
-	c.nge.ps	$f1,$f26
-	c.nge.s	$fcc3,$f11,$f8
-	c.ngl.ps	$f21,$f30
-	c.ngl.s	$fcc2,$f31,$f23
-	c.ngle.ps	$fcc7,$f12,$f20
-	c.ngle.s	$fcc2,$f18,$f23
-	c.ngt.d	$fcc4,$f24,$f7
-	c.ngt.ps	$fcc5,$f30,$f6
-	c.ngt.s	$fcc5,$f8,$f13
-	c.ole.d	$fcc2,$f16,$f31
-	c.ole.ps	$fcc7,$f21,$f8
-	c.ole.s	$fcc3,$f7,$f20
-	c.olt.d	$fcc4,$f19,$f28
-	c.olt.ps	$fcc3,$f7,$f16
-	c.olt.s	$fcc6,$f20,$f7
-	c.seq.d	$fcc4,$f31,$f7
-	c.seq.ps	$fcc6,$f31,$f14
-	c.seq.s	$fcc7,$f1,$f25
-	c.sf.ps	$fcc6,$f4,$f6
-	c.ueq.d	$fcc4,$f13,$f25
-	c.ueq.ps	$fcc1,$f5,$f29
-	c.ueq.s	$fcc6,$f3,$f30
-	c.ule.d	$fcc7,$f25,$f18
-	c.ule.ps	$fcc6,$f17,$f3
-	c.ule.s	$fcc7,$f21,$f30
-	c.ult.d	$fcc6,$f6,$f17
-	c.ult.ps	$fcc7,$f14,$f0
-	c.ult.s	$fcc7,$f24,$f10
-	c.un.d	$fcc6,$f23,$f24
-	c.un.ps	$fcc4,$f2,$f26
-	c.un.s	$fcc1,$f30,$f4
-	cvt.ps.s	$f3,$f18,$f19
-	cvt.s.pl	$f30,$f1
-	cvt.s.pu	$f14,$f25
-	madd.d	$f18,$f19,$f26,$f20
-	madd.ps	$f22,$f3,$f14,$f3
-	madd.s	$f1,$f31,$f19,$f25
-	mov.ps	$f22,$f17
-	movf.ps	$f10,$f28,$fcc6
-	movn.ps	$f31,$f31,$s3
-	movt.ps	$f20,$f25,$fcc2
-	movz.ps	$f18,$f17,$ra
-	msub.d	$f10,$f1,$f31,$f18
-	msub.ps	$f12,$f14,$f29,$f17
-	msub.s	$f12,$f19,$f10,$f16
-	mul.ps	$f14,$f0,$f16
-	neg.ps	$f19,$f13
-	nmadd.d	$f18,$f9,$f14,$f19
-	nmadd.ps	$f27,$f4,$f9,$f25
-	nmadd.s	$f0,$f5,$f25,$f12
-	nmsub.d	$f30,$f8,$f16,$f30
-	nmsub.ps	$f6,$f12,$f14,$f17
-	nmsub.s	$f1,$f24,$f19,$f4
-	pll.ps	$f25,$f9,$f30
-	plu.ps	$f1,$f26,$f29
-	pul.ps	$f9,$f30,$f26
-	puu.ps	$f24,$f9,$f2
-	recip.d	$f19,$f6
-	recip.s	$f3,$f30
-	rsqrt.d	$f3,$f28
-	rsqrt.s	$f4,$f8
-	sub.ps	$f5,$f14,$f26
-	tlbp
-	tlbr
-	tlbwi
-	tlbwr
+        abs.ps          $f22,$f8
+        add.ps          $f25,$f27,$f13
+        alnv.ps         $f12,$f18,$f30,$12
+        c.eq.d          $fcc1,$f15,$f15
+        c.eq.ps         $fcc5,$f0,$f9
+        c.eq.s          $fcc5,$f24,$f17
+        c.f.d           $fcc4,$f11,$f21
+        c.f.ps          $fcc6,$f11,$f11
+        c.f.s           $fcc4,$f30,$f7
+        c.le.d          $fcc4,$f18,$f1
+        c.le.ps         $fcc1,$f7,$f20
+        c.le.s          $fcc6,$f24,$f4
+        c.lt.d          $fcc3,$f9,$f3
+        c.lt.ps         $f19,$f5
+        c.lt.s          $fcc2,$f17,$f14
+        c.nge.d         $fcc5,$f21,$f16
+        c.nge.ps        $f1,$f26
+        c.nge.s         $fcc3,$f11,$f8
+        c.ngl.ps        $f21,$f30
+        c.ngl.s         $fcc2,$f31,$f23
+        c.ngle.ps       $fcc7,$f12,$f20
+        c.ngle.s        $fcc2,$f18,$f23
+        c.ngt.d         $fcc4,$f24,$f7
+        c.ngt.ps        $fcc5,$f30,$f6
+        c.ngt.s         $fcc5,$f8,$f13
+        c.ole.d         $fcc2,$f16,$f31
+        c.ole.ps        $fcc7,$f21,$f8
+        c.ole.s         $fcc3,$f7,$f20
+        c.olt.d         $fcc4,$f19,$f28
+        c.olt.ps        $fcc3,$f7,$f16
+        c.olt.s         $fcc6,$f20,$f7
+        c.seq.d         $fcc4,$f31,$f7
+        c.seq.ps        $fcc6,$f31,$f14
+        c.seq.s         $fcc7,$f1,$f25
+        c.sf.ps         $fcc6,$f4,$f6
+        c.ueq.d         $fcc4,$f13,$f25
+        c.ueq.ps        $fcc1,$f5,$f29
+        c.ueq.s         $fcc6,$f3,$f30
+        c.ule.d         $fcc7,$f25,$f18
+        c.ule.ps        $fcc6,$f17,$f3
+        c.ule.s         $fcc7,$f21,$f30
+        c.ult.d         $fcc6,$f6,$f17
+        c.ult.ps        $fcc7,$f14,$f0
+        c.ult.s         $fcc7,$f24,$f10
+        c.un.d          $fcc6,$f23,$f24
+        c.un.ps         $fcc4,$f2,$f26
+        c.un.s          $fcc1,$f30,$f4
+        cvt.ps.s        $f3,$f18,$f19
+        cvt.s.pl        $f30,$f1
+        cvt.s.pu        $f14,$f25
+        madd.d          $f18,$f19,$f26,$f20
+        madd.ps         $f22,$f3,$f14,$f3
+        madd.s          $f1,$f31,$f19,$f25
+        mov.ps          $f22,$f17
+        movf.ps         $f10,$f28,$fcc6
+        movn.ps         $f31,$f31,$s3
+        movt.ps         $f20,$f25,$fcc2
+        movz.ps         $f18,$f17,$ra
+        msub.d          $f10,$f1,$f31,$f18
+        msub.ps         $f12,$f14,$f29,$f17
+        msub.s          $f12,$f19,$f10,$f16
+        mul.ps          $f14,$f0,$f16
+        neg.ps          $f19,$f13
+        nmadd.d         $f18,$f9,$f14,$f19
+        nmadd.ps        $f27,$f4,$f9,$f25
+        nmadd.s         $f0,$f5,$f25,$f12
+        nmsub.d         $f30,$f8,$f16,$f30
+        nmsub.ps        $f6,$f12,$f14,$f17
+        nmsub.s         $f1,$f24,$f19,$f4
+        pll.ps          $f25,$f9,$f30
+        plu.ps          $f1,$f26,$f29
+        pul.ps          $f9,$f30,$f26
+        puu.ps          $f24,$f9,$f2
+        recip.d         $f19,$f6
+        recip.s         $f3,$f30
+        rsqrt.d         $f3,$f28
+        rsqrt.s         $f4,$f8
+        sub.ps          $f5,$f14,$f26
diff --git a/test/MC/Mips/mips5/valid.s b/test/MC/Mips/mips5/valid.s
index ebe2f70..19aad05 100644
--- a/test/MC/Mips/mips5/valid.s
+++ b/test/MC/Mips/mips5/valid.s
@@ -1,163 +1,196 @@
 # Instructions that are valid
 #
-# FIXME: Test MIPS-V instead of MIPS64
-# RUN: llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64   | FileCheck %s
+# RUN: llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips5 | FileCheck %s
 
         .set noat
-	abs.d	$f7,$f25 # CHECK: encoding
-	abs.s	$f9,$f16
-	add	$s7,$s2,$a1
-	add.d	$f1,$f7,$f29
-	add.s	$f8,$f21,$f24
-	addi	$t5,$t1,26322
-	addu	$t1,$a0,$a2
-	and	$s7,$v0,$t4
-	c.ngl.d	$f29,$f29
-	c.ngle.d	$f0,$f16
-	c.sf.d	$f30,$f0
-	c.sf.s	$f14,$f22
-	ceil.l.d	$f1,$f3
-	ceil.l.s	$f18,$f13
-	ceil.w.d	$f11,$f25
-	ceil.w.s	$f6,$f20
-	cfc1	$s1,$21
-	ctc1	$a2,$26
-	cvt.d.l	$f4,$f16
-	cvt.d.s	$f22,$f28
-	cvt.d.w	$f26,$f11
-	cvt.l.d	$f24,$f15
-	cvt.l.s	$f11,$f29
-	cvt.s.d	$f26,$f8
-	cvt.s.l	$f15,$f30
-	cvt.s.w	$f22,$f15
-	cvt.w.d	$f20,$f14
-	cvt.w.s	$f20,$f24
-	dadd	$s3,$at,$ra
-	daddi	$sp,$s4,-27705
-	daddiu	$k0,$s6,-4586
-	ddiv	$zero,$k0,$s3
-	ddivu	$zero,$s0,$s1
-	div	$zero,$t9,$t3
-	div.d	$f29,$f20,$f27
-	div.s	$f4,$f5,$f15
-	divu	$zero,$t9,$t7
-	dmfc1	$t4,$f13
-	dmtc1	$s0,$f14
-	dmult	$s7,$t1
-	dmultu	$a1,$a2
-	dsllv	$zero,$s4,$t4
-	dsrav	$gp,$s2,$s3
-	dsrlv	$s3,$t6,$s4
-	dsub	$a3,$s6,$t0
-	dsubu	$a1,$a1,$k0
-	ehb                      # CHECK: ehb # encoding:  [0x00,0x00,0x00,0xc0]
-	eret
-	floor.l.d	$f26,$f7
-	floor.l.s	$f12,$f5
-	floor.w.d	$f14,$f11
-	floor.w.s	$f8,$f9
-	lb	$t8,-14515($t2)
-	lbu	$t0,30195($v1)
-	ld	$sp,-28645($s1)
-	ldc1	$f11,16391($s0)
-	ldc2	$8,-21181($at)
-	ldl	$t8,-4167($t8)
-	ldr	$t6,-30358($s4)
-	ldxc1	$f8,$s7($t7)
-	lh	$t3,-8556($s5)
-	lhu	$s3,-22851($v0)
-	li	$at,-29773
-	li	$zero,-29889
-	ll	$v0,-7321($s2)
-	lld	$zero,-14736($ra)
-	luxc1	$f19,$s6($s5)
-	lw	$t0,5674($a1)
-	lwc1	$f16,10225($k0)
-	lwc2	$18,-841($a2)
-	lwl	$s4,-4231($t7)
-	lwr	$zero,-19147($gp)
-	lwu	$s3,-24086($v1)
-	lwxc1	$f12,$s1($s8)
-	mfc1	$a3,$f27
-	mfhi	$s3
-	mfhi	$sp
-	mflo	$s1
-	mov.d	$f20,$f14
-	mov.s	$f2,$f27
-	move	$a0,$a3
-	move	$s5,$a0
-	move	$s8,$a0
-	move	$t9,$a2
-	movf	$gp,$t0,$fcc7
-	movf.d	$f6,$f11,$fcc5
-	movf.s	$f23,$f5,$fcc6
-	movn	$v1,$s1,$s0
-	movn.d	$f27,$f21,$k0
-	movn.s	$f12,$f0,$s7
-	movt	$zero,$s4,$fcc5
-	movt.d	$f0,$f2,$fcc0
-	movt.s	$f30,$f2,$fcc1
-	movz	$a1,$s6,$t1
-	movz.d	$f12,$f29,$t1
-	movz.s	$f25,$f7,$v1
-	mtc1	$s8,$f9
-	mthi	$s1
-	mtlo	$sp
-	mtlo	$t9
-	mul.d	$f20,$f20,$f16
-	mul.s	$f30,$f10,$f2
-	mult	$sp,$s4
-	mult	$sp,$v0
-	multu	$gp,$k0
-	multu	$t1,$s2
-	neg.d	$f27,$f18
-	neg.s	$f1,$f15
-	nop
-	nor	$a3,$zero,$a3
-	or	$t4,$s0,$sp
-	round.l.d	$f12,$f1
-	round.l.s	$f25,$f5
-	round.w.d	$f6,$f4
-	round.w.s	$f27,$f28
-	sb	$s6,-19857($t6)
-	sc	$t7,18904($s3)
-	scd	$t7,-8243($sp)
-	sd	$t4,5835($t2)
-	sdc1	$f31,30574($t5)
-	sdc2	$20,23157($s2)
-	sdl	$a3,-20961($s8)
-	sdr	$t3,-20423($t4)
-	sdxc1	$f11,$t2($t6)
-	sh	$t6,-6704($t7)
-	sllv	$a3,$zero,$t1
-	slt	$s7,$t3,$k1
-	slti	$s1,$t2,9489
-	sltiu	$t9,$t9,-15531
-	sltu	$s4,$s5,$t3
-	sqrt.d	$f17,$f22
-	sqrt.s	$f0,$f1
-	srav	$s1,$s7,$sp
-	srlv	$t9,$s4,$a0
-	ssnop                    # CHECK: ssnop # encoding:  [0x00,0x00,0x00,0x40]
-	sub	$s6,$s3,$t4
-	sub.d	$f18,$f3,$f17
-	sub.s	$f23,$f22,$f22
-	subu	$sp,$s6,$s6
-	suxc1	$f12,$k1($t5)
-	sw	$ra,-10160($sp)
-	swc1	$f6,-8465($t8)
-	swc2	$25,24880($s0)
-	swl	$t7,13694($s3)
-	swr	$s1,-26590($t6)
-	swxc1	$f19,$t4($k0)
-	teqi	$s5,-17504
-	tgei	$s1,5025
-	tgeiu	$sp,-28621
-	tlti	$t6,-21059
-	tltiu	$ra,-5076
-	tnei	$t4,-29647
-	trunc.l.d	$f23,$f23
-	trunc.l.s	$f28,$f31
-	trunc.w.d	$f22,$f15
-	trunc.w.s	$f28,$f30
-	xor	$s2,$a0,$s8
+        abs.d     $f7,$f25             # CHECK: encoding:
+        abs.s     $f9,$f16
+        add       $s7,$s2,$a1
+        add.d     $f1,$f7,$f29
+        add.s     $f8,$f21,$f24
+        addi      $13,$9,26322
+        addu      $9,$a0,$a2
+        and       $s7,$v0,$12
+        c.ngl.d   $f29,$f29
+        c.ngle.d  $f0,$f16
+        c.sf.d    $f30,$f0
+        c.sf.s    $f14,$f22
+        ceil.l.d  $f1,$f3
+        ceil.l.s  $f18,$f13
+        ceil.w.d  $f11,$f25
+        ceil.w.s  $f6,$f20
+        cfc1      $s1,$21
+        ctc1      $a2,$26
+        cvt.d.l   $f4,$f16
+        cvt.d.s   $f22,$f28
+        cvt.d.w   $f26,$f11
+        cvt.l.d   $f24,$f15
+        cvt.l.s   $f11,$f29
+        cvt.s.d   $f26,$f8
+        cvt.s.l   $f15,$f30
+        cvt.s.w   $f22,$f15
+        cvt.w.d   $f20,$f14
+        cvt.w.s   $f20,$f24
+        dadd      $s3,$at,$ra
+        daddi     $sp,$s4,-27705
+        daddiu    $k0,$s6,-4586
+        daddu     $s3,$at,$ra
+        ddiv      $zero,$k0,$s3
+        ddivu     $zero,$s0,$s1
+        div       $zero,$25,$11
+        div.d     $f29,$f20,$f27
+        div.s     $f4,$f5,$f15
+        divu      $zero,$25,$15
+        dmfc1     $12,$f13
+        dmtc1     $s0,$f14
+        dmult     $s7,$9
+        dmultu    $a1,$a2
+        dsll      $zero,18             # CHECK: dsll $zero, $zero, 18       # encoding: [0x00,0x00,0x04,0xb8]
+        dsll      $zero,$s4,18         # CHECK: dsll $zero, $20, 18         # encoding: [0x00,0x14,0x04,0xb8]
+        dsll      $zero,$s4,$12        # CHECK: dsllv $zero, $20, $12       # encoding: [0x01,0x94,0x00,0x14]
+        dsll32    $zero,18             # CHECK: dsll32 $zero, $zero, 18     # encoding: [0x00,0x00,0x04,0xbc]
+        dsll32    $zero,$zero,18       # CHECK: dsll32 $zero, $zero, 18     # encoding: [0x00,0x00,0x04,0xbc]
+        dsllv     $zero,$s4,$12        # CHECK: dsllv $zero, $20, $12       # encoding: [0x01,0x94,0x00,0x14]
+        dsra      $gp,10               # CHECK: dsra $gp, $gp, 10           # encoding: [0x00,0x1c,0xe2,0xbb]
+        dsra      $gp,$s2,10           # CHECK: dsra $gp, $18, 10           # encoding: [0x00,0x12,0xe2,0xbb]
+        dsra      $gp,$s2,$s3          # CHECK: dsrav $gp, $18, $19         # encoding: [0x02,0x72,0xe0,0x17]
+        dsra32    $gp,10               # CHECK: dsra32 $gp, $gp, 10         # encoding: [0x00,0x1c,0xe2,0xbf]
+        dsra32    $gp,$s2,10           # CHECK: dsra32 $gp, $18, 10         # encoding: [0x00,0x12,0xe2,0xbf]
+        dsrav     $gp,$s2,$s3          # CHECK: dsrav $gp, $18, $19         # encoding: [0x02,0x72,0xe0,0x17]
+        dsrl      $s3,23               # CHECK: dsrl $19, $19, 23           # encoding: [0x00,0x13,0x9d,0xfa]
+        dsrl      $s3,$6,23            # CHECK: dsrl $19, $6, 23            # encoding: [0x00,0x06,0x9d,0xfa]
+        dsrl      $s3,$6,$s4           # CHECK: dsrlv $19, $6, $20          # encoding: [0x02,0x86,0x98,0x16]
+        dsrl32    $s3,23               # CHECK: dsrl32 $19, $19, 23         # encoding: [0x00,0x13,0x9d,0xfe]
+        dsrl32    $s3,$6,23            # CHECK: dsrl32 $19, $6, 23          # encoding: [0x00,0x06,0x9d,0xfe]
+        dsrlv     $s3,$6,$s4           # CHECK: dsrlv $19, $6, $20          # encoding: [0x02,0x86,0x98,0x16]
+        dsub      $a3,$s6,$8
+        dsubu     $a1,$a1,$k0
+        dsub      $a3,$s6,$8
+        dsubu     $a1,$a1,$k0
+        ehb                            # CHECK: ehb # encoding:  [0x00,0x00,0x00,0xc0]
+        eret
+        floor.l.d $f26,$f7
+        floor.l.s $f12,$f5
+        floor.w.d $f14,$f11
+        floor.w.s $f8,$f9
+        lb        $24,-14515($10)
+        lbu       $8,30195($v1)
+        ld        $sp,-28645($s1)
+        ldc1      $f11,16391($s0)
+        ldc2      $8,-21181($at)
+        ldl       $24,-4167($24)
+        ldr       $14,-30358($s4)
+        ldxc1     $f8,$s7($15)
+        lh        $11,-8556($s5)
+        lhu       $s3,-22851($v0)
+        li        $at,-29773
+        li        $zero,-29889
+        ll        $v0,-7321($s2)
+        lld       $zero,-14736($ra)
+        luxc1     $f19,$s6($s5)
+        lw        $8,5674($a1)
+        lwc1      $f16,10225($k0)
+        lwc2      $18,-841($a2)
+        lwl       $s4,-4231($15)
+        lwr       $zero,-19147($gp)
+        lwu       $s3,-24086($v1)
+        lwxc1     $f12,$s1($s8)
+        mfc1      $a3,$f27
+        mfhi      $s3
+        mfhi      $sp
+        mflo      $s1
+        mov.d     $f20,$f14
+        mov.s     $f2,$f27
+        move      $a0,$a3
+        move      $s5,$a0
+        move      $s8,$a0
+        move      $25,$a2
+        movf      $gp,$8,$fcc7
+        movf.d    $f6,$f11,$fcc5
+        movf.s    $f23,$f5,$fcc6
+        movn      $v1,$s1,$s0
+        movn.d    $f27,$f21,$k0
+        movn.s    $f12,$f0,$s7
+        movt      $zero,$s4,$fcc5
+        movt.d    $f0,$f2,$fcc0
+        movt.s    $f30,$f2,$fcc1
+        movz      $a1,$s6,$9
+        movz.d    $f12,$f29,$9
+        movz.s    $f25,$f7,$v1
+        mtc1      $s8,$f9
+        mthi      $s1
+        mtlo      $sp
+        mtlo      $25
+        mul.d     $f20,$f20,$f16
+        mul.s     $f30,$f10,$f2
+        mult      $sp,$s4
+        mult      $sp,$v0
+        multu     $gp,$k0
+        multu     $9,$s2
+        negu      $2                   # CHECK: negu $2, $2            # encoding: [0x00,0x02,0x10,0x23]
+        negu      $2,$3                # CHECK: negu $2, $3            # encoding: [0x00,0x03,0x10,0x23]
+        neg.d     $f27,$f18
+        neg.s     $f1,$f15
+        nop
+        nor       $a3,$zero,$a3
+        or        $12,$s0,$sp
+        round.l.d $f12,$f1
+        round.l.s $f25,$f5
+        round.w.d $f6,$f4
+        round.w.s $f27,$f28
+        sb        $s6,-19857($14)
+        sc        $15,18904($s3)
+        scd       $15,-8243($sp)
+        sd        $12,5835($10)
+        sdc1      $f31,30574($13)
+        sdc2      $20,23157($s2)
+        sdl       $a3,-20961($s8)
+        sdr       $11,-20423($12)
+        sdxc1     $f11,$10($14)
+        sh        $14,-6704($15)
+        sll       $a3,18               # CHECK: sll $7, $7, 18         # encoding: [0x00,0x07,0x3c,0x80]
+        sll       $a3,$zero,18         # CHECK: sll $7, $zero, 18      # encoding: [0x00,0x00,0x3c,0x80]
+        sll       $a3,$zero,$9         # CHECK: sllv $7, $zero, $9     # encoding: [0x01,0x20,0x38,0x04]
+        sllv      $a3,$zero,$9         # CHECK: sllv $7, $zero, $9     # encoding: [0x01,0x20,0x38,0x04]
+        slt       $s7,$11,$k1          # CHECK: slt $23, $11, $27      # encoding: [0x01,0x7b,0xb8,0x2a]
+        slti      $s1,$10,9489         # CHECK: slti $17, $10, 9489    # encoding: [0x29,0x51,0x25,0x11]
+        sltiu     $25,$25,-15531       # CHECK: sltiu $25, $25, -15531 # encoding: [0x2f,0x39,0xc3,0x55]
+        sltu      $s4,$s5,$11          # CHECK: sltu  $20, $21, $11    # encoding: [0x02,0xab,0xa0,0x2b]
+        sltu      $24,$25,-15531       # CHECK: sltiu $24, $25, -15531 # encoding: [0x2f,0x38,0xc3,0x55]
+        sqrt.d    $f17,$f22
+        sqrt.s    $f0,$f1
+        sra       $s1,15               # CHECK: sra $17, $17, 15       # encoding: [0x00,0x11,0x8b,0xc3]
+        sra       $s1,$s7,15           # CHECK: sra $17, $23, 15       # encoding: [0x00,0x17,0x8b,0xc3]
+        sra       $s1,$s7,$sp          # CHECK: srav $17, $23, $sp     # encoding: [0x03,0xb7,0x88,0x07]
+        srav      $s1,$s7,$sp          # CHECK: srav $17, $23, $sp     # encoding: [0x03,0xb7,0x88,0x07]
+        srl       $2,7                 # CHECK: srl $2, $2, 7          # encoding: [0x00,0x02,0x11,0xc2]
+        srl       $2,$2,7              # CHECK: srl $2, $2, 7          # encoding: [0x00,0x02,0x11,0xc2]
+        srl       $25,$s4,$a0          # CHECK: srlv $25, $20, $4      # encoding: [0x00,0x94,0xc8,0x06]
+        srlv      $25,$s4,$a0          # CHECK: srlv $25, $20, $4      # encoding: [0x00,0x94,0xc8,0x06]
+        ssnop                          # CHECK: ssnop                  # encoding: [0x00,0x00,0x00,0x40]
+        sub       $s6,$s3,$12
+        sub.d     $f18,$f3,$f17
+        sub.s     $f23,$f22,$f22
+        subu      $sp,$s6,$s6
+        suxc1     $f12,$k1($13)
+        sw        $ra,-10160($sp)
+        swc1      $f6,-8465($24)
+        swc2      $25,24880($s0)
+        swl       $15,13694($s3)
+        swr       $s1,-26590($14)
+        swxc1     $f19,$12($k0)
+        teqi      $s5,-17504
+        tgei      $s1,5025
+        tgeiu     $sp,-28621
+        tlbp                           # CHECK: tlbp                   # encoding: [0x42,0x00,0x00,0x08]
+        tlbr                           # CHECK: tlbr                   # encoding: [0x42,0x00,0x00,0x01]
+        tlbwi                          # CHECK: tlbwi                  # encoding: [0x42,0x00,0x00,0x02]
+        tlbwr                          # CHECK: tlbwr                  # encoding: [0x42,0x00,0x00,0x06]
+        tlti      $14,-21059
+        tltiu     $ra,-5076
+        tnei      $12,-29647
+        trunc.l.d $f23,$f23
+        trunc.l.s $f28,$f31
+        trunc.w.d $f22,$f15
+        trunc.w.s $f28,$f30
+        xor       $s2,$a0,$s8
diff --git a/test/MC/Mips/mips64/invalid-mips64r2-xfail.s b/test/MC/Mips/mips64/invalid-mips64r2-xfail.s
index 4baf26b..b2b612d 100644
--- a/test/MC/Mips/mips64/invalid-mips64r2-xfail.s
+++ b/test/MC/Mips/mips64/invalid-mips64r2-xfail.s
@@ -8,8 +8,4 @@
 
 # CHECK-NOT: error
         .set noat
-	di	$s8
-	ei	$t6
-	mfhc1	$s8,$f24
-	mthc1	$zero,$f16
 	rdhwr	$sp,$11
diff --git a/test/MC/Mips/mips64/invalid-mips64r2.s b/test/MC/Mips/mips64/invalid-mips64r2.s
index 41aa8ae..1a5abb6 100644
--- a/test/MC/Mips/mips64/invalid-mips64r2.s
+++ b/test/MC/Mips/mips64/invalid-mips64r2.s
@@ -5,13 +5,25 @@
 # RUN: FileCheck %s < %t1
 
 	.set noat
-	dsbh	$v1,$t6             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-	dshd	$v0,$sp             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-	madd.s	$f1,$f31,$f19,$f25  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-	msub.s	$f12,$f19,$f10,$f16 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-	nmadd.s	$f0,$f5,$f25,$f12   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-	nmsub.s	$f1,$f24,$f19,$f4   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-	pause                       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-	seb	$t9,$t7             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-	seh	$v1,$t4             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-	wsbh	$k1,$t1             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        di        $s8                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        drotr     $1,15               # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        drotr     $1,$14,15           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        drotr32   $1,15               # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        drotr32   $1,$14,15           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        drotrv    $1,$14,$15          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsbh      $v1,$14             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dshd      $v0,$sp             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ei        $14                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        madd.s    $f1,$f31,$f19,$f25  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mfhc1     $s8,$f24            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        msub.s    $f12,$f19,$f10,$f16 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mthc1     $zero,$f16          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        nmadd.s   $f0,$f5,$f25,$f12   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        nmsub.s   $f1,$f24,$f19,$f4   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        pause                         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        rotr      $1,15               # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        rotr      $1,$14,15           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        rotrv     $1,$14,$15          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        seb       $25,$15             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        seh       $v1,$12             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        wsbh      $k1,$9              # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips64/valid-xfail.s b/test/MC/Mips/mips64/valid-xfail.s
index 61bf060..e5455f5 100644
--- a/test/MC/Mips/mips64/valid-xfail.s
+++ b/test/MC/Mips/mips64/valid-xfail.s
@@ -6,93 +6,89 @@
 # CHECK-NOT: encoding
 # XFAIL: *
 
-	.set noat
-	abs.ps	$f22,$f8
-	add.ps	$f25,$f27,$f13
-	alnv.ob	$v22,$v19,$v30,$v1
-	alnv.ob	$v31,$v23,$v30,$at
-	alnv.ob	$v8,$v17,$v30,$a1
-	alnv.ps	$f12,$f18,$f30,$t4
-	c.eq.d	$fcc1,$f15,$f15
-	c.eq.ps	$fcc5,$f0,$f9
-	c.eq.s	$fcc5,$f24,$f17
-	c.f.d	$fcc4,$f11,$f21
-	c.f.ps	$fcc6,$f11,$f11
-	c.f.s	$fcc4,$f30,$f7
-	c.le.d	$fcc4,$f18,$f1
-	c.le.ps	$fcc1,$f7,$f20
-	c.le.s	$fcc6,$f24,$f4
-	c.lt.d	$fcc3,$f9,$f3
-	c.lt.ps	$f19,$f5
-	c.lt.s	$fcc2,$f17,$f14
-	c.nge.d	$fcc5,$f21,$f16
-	c.nge.ps	$f1,$f26
-	c.nge.s	$fcc3,$f11,$f8
-	c.ngl.ps	$f21,$f30
-	c.ngl.s	$fcc2,$f31,$f23
-	c.ngle.ps	$fcc7,$f12,$f20
-	c.ngle.s	$fcc2,$f18,$f23
-	c.ngt.d	$fcc4,$f24,$f7
-	c.ngt.ps	$fcc5,$f30,$f6
-	c.ngt.s	$fcc5,$f8,$f13
-	c.ole.d	$fcc2,$f16,$f31
-	c.ole.ps	$fcc7,$f21,$f8
-	c.ole.s	$fcc3,$f7,$f20
-	c.olt.d	$fcc4,$f19,$f28
-	c.olt.ps	$fcc3,$f7,$f16
-	c.olt.s	$fcc6,$f20,$f7
-	c.seq.d	$fcc4,$f31,$f7
-	c.seq.ps	$fcc6,$f31,$f14
-	c.seq.s	$fcc7,$f1,$f25
-	c.sf.ps	$fcc6,$f4,$f6
-	c.ueq.d	$fcc4,$f13,$f25
-	c.ueq.ps	$fcc1,$f5,$f29
-	c.ueq.s	$fcc6,$f3,$f30
-	c.ule.d	$fcc7,$f25,$f18
-	c.ule.ps	$fcc6,$f17,$f3
-	c.ule.s	$fcc7,$f21,$f30
-	c.ult.d	$fcc6,$f6,$f17
-	c.ult.ps	$fcc7,$f14,$f0
-	c.ult.s	$fcc7,$f24,$f10
-	c.un.d	$fcc6,$f23,$f24
-	c.un.ps	$fcc4,$f2,$f26
-	c.un.s	$fcc1,$f30,$f4
-	cvt.ps.s	$f3,$f18,$f19
-	cvt.s.pl	$f30,$f1
-	cvt.s.pu	$f14,$f25
-	dmfc0	$t2,c0_watchhi,2
-	dmtc0	$t7,c0_datalo
-	madd.d	$f18,$f19,$f26,$f20
-	madd.ps	$f22,$f3,$f14,$f3
-	madd.s	$f1,$f31,$f19,$f25
-	mov.ps	$f22,$f17
-	movf.ps	$f10,$f28,$fcc6
-	movn.ps	$f31,$f31,$s3
-	movt.ps	$f20,$f25,$fcc2
-	movz.ps	$f18,$f17,$ra
-	msgn.qh	$v0,$v24,$v20
-	msgn.qh	$v12,$v21,$v0[1]
-	msub.d	$f10,$f1,$f31,$f18
-	msub.ps	$f12,$f14,$f29,$f17
-	msub.s	$f12,$f19,$f10,$f16
-	mul.ps	$f14,$f0,$f16
-	neg.ps	$f19,$f13
-	nmadd.d	$f18,$f9,$f14,$f19
-	nmadd.ps	$f27,$f4,$f9,$f25
-	nmadd.s	$f0,$f5,$f25,$f12
-	nmsub.d	$f30,$f8,$f16,$f30
-	nmsub.ps	$f6,$f12,$f14,$f17
-	nmsub.s	$f1,$f24,$f19,$f4
-	pll.ps	$f25,$f9,$f30
-	plu.ps	$f1,$f26,$f29
-	pul.ps	$f9,$f30,$f26
-	puu.ps	$f24,$f9,$f2
-	recip.d	$f19,$f6
-	recip.s	$f3,$f30
-	rsqrt.d	$f3,$f28
-	rsqrt.s	$f4,$f8
-	sub.ps	$f5,$f14,$f26
-	tlbp
-	tlbr
-	tlbwi
-	tlbwr
+        .set noat
+        abs.ps          $f22,$f8
+        add.ps          $f25,$f27,$f13
+        alnv.ob         $v22,$v19,$v30,$v1
+        alnv.ob         $v31,$v23,$v30,$at
+        alnv.ob         $v8,$v17,$v30,$a1
+        alnv.ps         $f12,$f18,$f30,$12
+        c.eq.d          $fcc1,$f15,$f15
+        c.eq.ps         $fcc5,$f0,$f9
+        c.eq.s          $fcc5,$f24,$f17
+        c.f.d           $fcc4,$f11,$f21
+        c.f.ps          $fcc6,$f11,$f11
+        c.f.s           $fcc4,$f30,$f7
+        c.le.d          $fcc4,$f18,$f1
+        c.le.ps         $fcc1,$f7,$f20
+        c.le.s          $fcc6,$f24,$f4
+        c.lt.d          $fcc3,$f9,$f3
+        c.lt.ps         $f19,$f5
+        c.lt.s          $fcc2,$f17,$f14
+        c.nge.d         $fcc5,$f21,$f16
+        c.nge.ps        $f1,$f26
+        c.nge.s         $fcc3,$f11,$f8
+        c.ngl.ps        $f21,$f30
+        c.ngl.s         $fcc2,$f31,$f23
+        c.ngle.ps       $fcc7,$f12,$f20
+        c.ngle.s        $fcc2,$f18,$f23
+        c.ngt.d         $fcc4,$f24,$f7
+        c.ngt.ps        $fcc5,$f30,$f6
+        c.ngt.s         $fcc5,$f8,$f13
+        c.ole.d         $fcc2,$f16,$f31
+        c.ole.ps        $fcc7,$f21,$f8
+        c.ole.s         $fcc3,$f7,$f20
+        c.olt.d         $fcc4,$f19,$f28
+        c.olt.ps        $fcc3,$f7,$f16
+        c.olt.s         $fcc6,$f20,$f7
+        c.seq.d         $fcc4,$f31,$f7
+        c.seq.ps        $fcc6,$f31,$f14
+        c.seq.s         $fcc7,$f1,$f25
+        c.sf.ps         $fcc6,$f4,$f6
+        c.ueq.d         $fcc4,$f13,$f25
+        c.ueq.ps        $fcc1,$f5,$f29
+        c.ueq.s         $fcc6,$f3,$f30
+        c.ule.d         $fcc7,$f25,$f18
+        c.ule.ps        $fcc6,$f17,$f3
+        c.ule.s         $fcc7,$f21,$f30
+        c.ult.d         $fcc6,$f6,$f17
+        c.ult.ps        $fcc7,$f14,$f0
+        c.ult.s         $fcc7,$f24,$f10
+        c.un.d          $fcc6,$f23,$f24
+        c.un.ps         $fcc4,$f2,$f26
+        c.un.s          $fcc1,$f30,$f4
+        cvt.ps.s        $f3,$f18,$f19
+        cvt.s.pl        $f30,$f1
+        cvt.s.pu        $f14,$f25
+        dmfc0           $10,c0_watchhi,2
+        dmtc0           $15,c0_datalo
+        madd.d          $f18,$f19,$f26,$f20
+        madd.ps         $f22,$f3,$f14,$f3
+        madd.s          $f1,$f31,$f19,$f25
+        mov.ps          $f22,$f17
+        movf.ps         $f10,$f28,$fcc6
+        movn.ps         $f31,$f31,$s3
+        movt.ps         $f20,$f25,$fcc2
+        movz.ps         $f18,$f17,$ra
+        msgn.qh         $v0,$v24,$v20
+        msgn.qh         $v12,$v21,$v0[1]
+        msub.d          $f10,$f1,$f31,$f18
+        msub.ps         $f12,$f14,$f29,$f17
+        msub.s          $f12,$f19,$f10,$f16
+        mul.ps          $f14,$f0,$f16
+        neg.ps          $f19,$f13
+        nmadd.d         $f18,$f9,$f14,$f19
+        nmadd.ps        $f27,$f4,$f9,$f25
+        nmadd.s         $f0,$f5,$f25,$f12
+        nmsub.d         $f30,$f8,$f16,$f30
+        nmsub.ps        $f6,$f12,$f14,$f17
+        nmsub.s         $f1,$f24,$f19,$f4
+        pll.ps          $f25,$f9,$f30
+        plu.ps          $f1,$f26,$f29
+        pul.ps          $f9,$f30,$f26
+        puu.ps          $f24,$f9,$f2
+        recip.d         $f19,$f6
+        recip.s         $f3,$f30
+        rsqrt.d         $f3,$f28
+        rsqrt.s         $f4,$f8
+        sub.ps          $f5,$f14,$f26
diff --git a/test/MC/Mips/mips64/valid.s b/test/MC/Mips/mips64/valid.s
index 9ccb2ff..b9e1002 100644
--- a/test/MC/Mips/mips64/valid.s
+++ b/test/MC/Mips/mips64/valid.s
@@ -3,174 +3,208 @@
 # RUN: llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64 | FileCheck %s
 
         .set noat
-	abs.d	$f7,$f25 # CHECK: encoding
-	abs.s	$f9,$f16
-	add	$s7,$s2,$a1
-	add.d	$f1,$f7,$f29
-	add.s	$f8,$f21,$f24
-	addi	$t5,$t1,26322
-	addu	$t1,$a0,$a2
-	and	$s7,$v0,$t4
-	c.ngl.d	$f29,$f29
-	c.ngle.d	$f0,$f16
-	c.sf.d	$f30,$f0
-	c.sf.s	$f14,$f22
-	ceil.l.d	$f1,$f3
-	ceil.l.s	$f18,$f13
-	ceil.w.d	$f11,$f25
-	ceil.w.s	$f6,$f20
-	cfc1	$s1,$21
-	clo	$t3,$a1
-	clz	$sp,$gp
-	ctc1	$a2,$26
-	cvt.d.l	$f4,$f16
-	cvt.d.s	$f22,$f28
-	cvt.d.w	$f26,$f11
-	cvt.l.d	$f24,$f15
-	cvt.l.s	$f11,$f29
-	cvt.s.d	$f26,$f8
-	cvt.s.l	$f15,$f30
-	cvt.s.w	$f22,$f15
-	cvt.w.d	$f20,$f14
-	cvt.w.s	$f20,$f24
-	dadd	$s3,$at,$ra
-	daddi	$sp,$s4,-27705
-	daddiu	$k0,$s6,-4586
-	dclo	$s2,$a2
-	dclz	$s0,$t9
-	deret
-	ddiv	$zero,$k0,$s3
-	ddivu	$zero,$s0,$s1
-	div	$zero,$t9,$t3
-	div.d	$f29,$f20,$f27
-	div.s	$f4,$f5,$f15
-	divu	$zero,$t9,$t7
-	dmfc1	$t4,$f13
-	dmtc1	$s0,$f14
-	dmult	$s7,$t1
-	dmultu	$a1,$a2
-	dsllv	$zero,$s4,$t4
-	dsrav	$gp,$s2,$s3
-	dsrlv	$s3,$t6,$s4
-	dsub	$a3,$s6,$t0
-	dsubu	$a1,$a1,$k0
-	ehb                      # CHECK: ehb # encoding:  [0x00,0x00,0x00,0xc0]
-	eret
-	floor.l.d	$f26,$f7
-	floor.l.s	$f12,$f5
-	floor.w.d	$f14,$f11
-	floor.w.s	$f8,$f9
-	lb	$t8,-14515($t2)
-	lbu	$t0,30195($v1)
-	ld	$sp,-28645($s1)
-	ldc1	$f11,16391($s0)
-	ldc2	$8,-21181($at)
-	ldl	$t8,-4167($t8)
-	ldr	$t6,-30358($s4)
-	ldxc1	$f8,$s7($t7)
-	lh	$t3,-8556($s5)
-	lhu	$s3,-22851($v0)
-	li	$at,-29773
-	li	$zero,-29889
-	ll	$v0,-7321($s2)
-	lld	$zero,-14736($ra)
-	luxc1	$f19,$s6($s5)
-	lw	$t0,5674($a1)
-	lwc1	$f16,10225($k0)
-	lwc2	$18,-841($a2)
-	lwl	$s4,-4231($t7)
-	lwr	$zero,-19147($gp)
-	lwu	$s3,-24086($v1)
-	lwxc1	$f12,$s1($s8)
-	madd	$s6,$t5
-	madd	$zero,$t1
-	maddu	$s3,$gp
-	maddu	$t8,$s2
-	mfc0	$a2,$14,1
-	mfc1	$a3,$f27
-	mfhi	$s3
-	mfhi	$sp
-	mflo	$s1
-	mov.d	$f20,$f14
-	mov.s	$f2,$f27
-	move	$a0,$a3
-	move	$s5,$a0
-	move	$s8,$a0
-	move	$t9,$a2
-	movf	$gp,$t0,$fcc7
-	movf.d	$f6,$f11,$fcc5
-	movf.s	$f23,$f5,$fcc6
-	movn	$v1,$s1,$s0
-	movn.d	$f27,$f21,$k0
-	movn.s	$f12,$f0,$s7
-	movt	$zero,$s4,$fcc5
-	movt.d	$f0,$f2,$fcc0
-	movt.s	$f30,$f2,$fcc1
-	movz	$a1,$s6,$t1
-	movz.d	$f12,$f29,$t1
-	movz.s	$f25,$f7,$v1
-	msub	$s7,$k1
-	msubu	$t7,$a1
-	mtc0	$t1,$29,3
-	mtc1	$s8,$f9
-	mthi	$s1
-	mtlo	$sp
-	mtlo	$t9
-	mul	$s0,$s4,$at
-	mul.d	$f20,$f20,$f16
-	mul.s	$f30,$f10,$f2
-	mult	$sp,$s4
-	mult	$sp,$v0
-	multu	$gp,$k0
-	multu	$t1,$s2
-	neg.d	$f27,$f18
-	neg.s	$f1,$f15
-	nop
-	nor	$a3,$zero,$a3
-	or	$t4,$s0,$sp
-	round.l.d	$f12,$f1
-	round.l.s	$f25,$f5
-	round.w.d	$f6,$f4
-	round.w.s	$f27,$f28
-	sb	$s6,-19857($t6)
-	sc	$t7,18904($s3)
-	scd	$t7,-8243($sp)
-	sd	$t4,5835($t2)
-	sdc1	$f31,30574($t5)
-	sdc2	$20,23157($s2)
-	sdl	$a3,-20961($s8)
-	sdr	$t3,-20423($t4)
-	sdxc1	$f11,$t2($t6)
-	sh	$t6,-6704($t7)
-	sllv	$a3,$zero,$t1
-	slt	$s7,$t3,$k1
-	slti	$s1,$t2,9489
-	sltiu	$t9,$t9,-15531
-	sltu	$s4,$s5,$t3
-	sqrt.d	$f17,$f22
-	sqrt.s	$f0,$f1
-	srav	$s1,$s7,$sp
-	srlv	$t9,$s4,$a0
-	ssnop                    # CHECK: ssnop # encoding:  [0x00,0x00,0x00,0x40]
-	sub	$s6,$s3,$t4
-	sub.d	$f18,$f3,$f17
-	sub.s	$f23,$f22,$f22
-	subu	$sp,$s6,$s6
-	suxc1	$f12,$k1($t5)
-	sw	$ra,-10160($sp)
-	swc1	$f6,-8465($t8)
-	swc2	$25,24880($s0)
-	swl	$t7,13694($s3)
-	swr	$s1,-26590($t6)
-	swxc1	$f19,$t4($k0)
-	teqi	$s5,-17504
-	tgei	$s1,5025
-	tgeiu	$sp,-28621
-	tlti	$t6,-21059
-	tltiu	$ra,-5076
-	tnei	$t4,-29647
-	trunc.l.d	$f23,$f23
-	trunc.l.s	$f28,$f31
-	trunc.w.d	$f22,$f15
-	trunc.w.s	$f28,$f30
-	xor	$s2,$a0,$s8
+        abs.d     $f7,$f25             # CHECK: encoding:
+        abs.s     $f9,$f16
+        add       $s7,$s2,$a1
+        add.d     $f1,$f7,$f29
+        add.s     $f8,$f21,$f24
+        addi      $13,$9,26322
+        addu      $9,$a0,$a2
+        and       $s7,$v0,$12
+        c.ngl.d   $f29,$f29
+        c.ngle.d  $f0,$f16
+        c.sf.d    $f30,$f0
+        c.sf.s    $f14,$f22
+        ceil.l.d  $f1,$f3
+        ceil.l.s  $f18,$f13
+        ceil.w.d  $f11,$f25
+        ceil.w.s  $f6,$f20
+        cfc1      $s1,$21
+        clo       $11,$a1
+        clz       $sp,$gp
+        ctc1      $a2,$26
+        cvt.d.l   $f4,$f16
+        cvt.d.s   $f22,$f28
+        cvt.d.w   $f26,$f11
+        cvt.l.d   $f24,$f15
+        cvt.l.s   $f11,$f29
+        cvt.s.d   $f26,$f8
+        cvt.s.l   $f15,$f30
+        cvt.s.w   $f22,$f15
+        cvt.w.d   $f20,$f14
+        cvt.w.s   $f20,$f24
+        dadd      $s3,$at,$ra
+        daddi     $sp,$s4,-27705
+        daddiu    $k0,$s6,-4586
+        daddu     $s3,$at,$ra
+        dclo      $s2,$a2
+        dclz      $s0,$25
+        deret
+        ddiv      $zero,$k0,$s3
+        ddivu     $zero,$s0,$s1
+        div       $zero,$25,$11
+        div.d     $f29,$f20,$f27
+        div.s     $f4,$f5,$f15
+        divu      $zero,$25,$15
+        dmfc1     $12,$f13
+        dmtc1     $s0,$f14
+        dmult     $s7,$9
+        dmultu    $a1,$a2
+        dsll      $zero,18             # CHECK: dsll $zero, $zero, 18       # encoding: [0x00,0x00,0x04,0xb8]
+        dsll      $zero,$s4,18         # CHECK: dsll $zero, $20, 18         # encoding: [0x00,0x14,0x04,0xb8]
+        dsll      $zero,$s4,$12        # CHECK: dsllv $zero, $20, $12       # encoding: [0x01,0x94,0x00,0x14]
+        dsll32    $zero,18             # CHECK: dsll32 $zero, $zero, 18     # encoding: [0x00,0x00,0x04,0xbc]
+        dsll32    $zero,$zero,18       # CHECK: dsll32 $zero, $zero, 18     # encoding: [0x00,0x00,0x04,0xbc]
+        dsllv     $zero,$s4,$12        # CHECK: dsllv $zero, $20, $12       # encoding: [0x01,0x94,0x00,0x14]
+        dsra      $gp,10               # CHECK: dsra $gp, $gp, 10           # encoding: [0x00,0x1c,0xe2,0xbb]
+        dsra      $gp,$s2,10           # CHECK: dsra $gp, $18, 10           # encoding: [0x00,0x12,0xe2,0xbb]
+        dsra      $gp,$s2,$s3          # CHECK: dsrav $gp, $18, $19         # encoding: [0x02,0x72,0xe0,0x17]
+        dsra32    $gp,10               # CHECK: dsra32 $gp, $gp, 10         # encoding: [0x00,0x1c,0xe2,0xbf]
+        dsra32    $gp,$s2,10           # CHECK: dsra32 $gp, $18, 10         # encoding: [0x00,0x12,0xe2,0xbf]
+        dsrav     $gp,$s2,$s3          # CHECK: dsrav $gp, $18, $19         # encoding: [0x02,0x72,0xe0,0x17]
+        dsrl      $s3,23               # CHECK: dsrl $19, $19, 23           # encoding: [0x00,0x13,0x9d,0xfa]
+        dsrl      $s3,$6,23            # CHECK: dsrl $19, $6, 23            # encoding: [0x00,0x06,0x9d,0xfa]
+        dsrl      $s3,$6,$s4           # CHECK: dsrlv $19, $6, $20          # encoding: [0x02,0x86,0x98,0x16]
+        dsrl32    $s3,23               # CHECK: dsrl32 $19, $19, 23         # encoding: [0x00,0x13,0x9d,0xfe]
+        dsrl32    $s3,$6,23            # CHECK: dsrl32 $19, $6, 23          # encoding: [0x00,0x06,0x9d,0xfe]
+        dsrlv     $s3,$6,$s4           # CHECK: dsrlv $19, $6, $20          # encoding: [0x02,0x86,0x98,0x16]
+        dsub      $a3,$s6,$8
+        dsubu     $a1,$a1,$k0
+        dsub      $a3,$s6,$8
+        dsubu     $a1,$a1,$k0
+        ehb                            # CHECK: ehb # encoding:  [0x00,0x00,0x00,0xc0]
+        eret
+        floor.l.d $f26,$f7
+        floor.l.s $f12,$f5
+        floor.w.d $f14,$f11
+        floor.w.s $f8,$f9
+        lb        $24,-14515($10)
+        lbu       $8,30195($v1)
+        ld        $sp,-28645($s1)
+        ldc1      $f11,16391($s0)
+        ldc2      $8,-21181($at)
+        ldl       $24,-4167($24)
+        ldr       $14,-30358($s4)
+        ldxc1     $f8,$s7($15)
+        lh        $11,-8556($s5)
+        lhu       $s3,-22851($v0)
+        li        $at,-29773
+        li        $zero,-29889
+        ll        $v0,-7321($s2)
+        lld       $zero,-14736($ra)
+        luxc1     $f19,$s6($s5)
+        lw        $8,5674($a1)
+        lwc1      $f16,10225($k0)
+        lwc2      $18,-841($a2)
+        lwl       $s4,-4231($15)
+        lwr       $zero,-19147($gp)
+        lwu       $s3,-24086($v1)
+        lwxc1     $f12,$s1($s8)
+        madd      $s6,$13
+        madd      $zero,$9
+        maddu     $s3,$gp
+        maddu     $24,$s2
+        mfc0      $a2,$14,1
+        mfc1      $a3,$f27
+        mfhi      $s3
+        mfhi      $sp
+        mflo      $s1
+        mov.d     $f20,$f14
+        mov.s     $f2,$f27
+        move      $a0,$a3
+        move      $s5,$a0
+        move      $s8,$a0
+        move      $25,$a2
+        movf      $gp,$8,$fcc7
+        movf.d    $f6,$f11,$fcc5
+        movf.s    $f23,$f5,$fcc6
+        movn      $v1,$s1,$s0
+        movn.d    $f27,$f21,$k0
+        movn.s    $f12,$f0,$s7
+        movt      $zero,$s4,$fcc5
+        movt.d    $f0,$f2,$fcc0
+        movt.s    $f30,$f2,$fcc1
+        movz      $a1,$s6,$9
+        movz.d    $f12,$f29,$9
+        movz.s    $f25,$f7,$v1
+        msub      $s7,$k1
+        msubu     $15,$a1
+        mtc0      $9,$29,3
+        mtc1      $s8,$f9
+        mthi      $s1
+        mtlo      $sp
+        mtlo      $25
+        mul       $s0,$s4,$at
+        mul.d     $f20,$f20,$f16
+        mul.s     $f30,$f10,$f2
+        mult      $sp,$s4
+        mult      $sp,$v0
+        multu     $gp,$k0
+        multu     $9,$s2
+        negu      $2                   # CHECK: negu $2, $2            # encoding: [0x00,0x02,0x10,0x23]
+        negu      $2,$3                # CHECK: negu $2, $3            # encoding: [0x00,0x03,0x10,0x23]
+        neg.d     $f27,$f18
+        neg.s     $f1,$f15
+        nop
+        nor       $a3,$zero,$a3
+        or        $12,$s0,$sp
+        round.l.d $f12,$f1
+        round.l.s $f25,$f5
+        round.w.d $f6,$f4
+        round.w.s $f27,$f28
+        sb        $s6,-19857($14)
+        sc        $15,18904($s3)
+        scd       $15,-8243($sp)
+        sd        $12,5835($10)
+        sdc1      $f31,30574($13)
+        sdc2      $20,23157($s2)
+        sdl       $a3,-20961($s8)
+        sdr       $11,-20423($12)
+        sdxc1     $f11,$10($14)
+        sh        $14,-6704($15)
+        sll       $a3,18               # CHECK: sll $7, $7, 18         # encoding: [0x00,0x07,0x3c,0x80]
+        sll       $a3,$zero,18         # CHECK: sll $7, $zero, 18      # encoding: [0x00,0x00,0x3c,0x80]
+        sll       $a3,$zero,$9         # CHECK: sllv $7, $zero, $9     # encoding: [0x01,0x20,0x38,0x04]
+        sllv      $a3,$zero,$9         # CHECK: sllv $7, $zero, $9     # encoding: [0x01,0x20,0x38,0x04]
+        slt       $s7,$11,$k1          # CHECK: slt $23, $11, $27      # encoding: [0x01,0x7b,0xb8,0x2a]
+        slti      $s1,$10,9489         # CHECK: slti $17, $10, 9489    # encoding: [0x29,0x51,0x25,0x11]
+        sltiu     $25,$25,-15531       # CHECK: sltiu $25, $25, -15531 # encoding: [0x2f,0x39,0xc3,0x55]
+        sltu      $s4,$s5,$11          # CHECK: sltu  $20, $21, $11    # encoding: [0x02,0xab,0xa0,0x2b]
+        sltu      $24,$25,-15531       # CHECK: sltiu $24, $25, -15531 # encoding: [0x2f,0x38,0xc3,0x55]
+        sqrt.d    $f17,$f22
+        sqrt.s    $f0,$f1
+        sra       $s1,15               # CHECK: sra $17, $17, 15       # encoding: [0x00,0x11,0x8b,0xc3]
+        sra       $s1,$s7,15           # CHECK: sra $17, $23, 15       # encoding: [0x00,0x17,0x8b,0xc3]
+        sra       $s1,$s7,$sp          # CHECK: srav $17, $23, $sp     # encoding: [0x03,0xb7,0x88,0x07]
+        srav      $s1,$s7,$sp          # CHECK: srav $17, $23, $sp     # encoding: [0x03,0xb7,0x88,0x07]
+        srl       $2,7                 # CHECK: srl $2, $2, 7          # encoding: [0x00,0x02,0x11,0xc2]
+        srl       $2,$2,7              # CHECK: srl $2, $2, 7          # encoding: [0x00,0x02,0x11,0xc2]
+        srl       $25,$s4,$a0          # CHECK: srlv $25, $20, $4      # encoding: [0x00,0x94,0xc8,0x06]
+        srlv      $25,$s4,$a0          # CHECK: srlv $25, $20, $4      # encoding: [0x00,0x94,0xc8,0x06]
+        ssnop                          # CHECK: ssnop                  # encoding: [0x00,0x00,0x00,0x40]
+        sub       $s6,$s3,$12
+        sub.d     $f18,$f3,$f17
+        sub.s     $f23,$f22,$f22
+        subu      $sp,$s6,$s6
+        suxc1     $f12,$k1($13)
+        sw        $ra,-10160($sp)
+        swc1      $f6,-8465($24)
+        swc2      $25,24880($s0)
+        swl       $15,13694($s3)
+        swr       $s1,-26590($14)
+        swxc1     $f19,$12($k0)
+        teqi      $s5,-17504
+        tgei      $s1,5025
+        tgeiu     $sp,-28621
+        tlbp                           # CHECK: tlbp                   # encoding: [0x42,0x00,0x00,0x08]
+        tlbr                           # CHECK: tlbr                   # encoding: [0x42,0x00,0x00,0x01]
+        tlbwi                          # CHECK: tlbwi                  # encoding: [0x42,0x00,0x00,0x02]
+        tlbwr                          # CHECK: tlbwr                  # encoding: [0x42,0x00,0x00,0x06]
+        tlti      $14,-21059
+        tltiu     $ra,-5076
+        tnei      $12,-29647
+        trunc.l.d $f23,$f23
+        trunc.l.s $f28,$f31
+        trunc.w.d $f22,$f15
+        trunc.w.s $f28,$f30
+        xor       $s2,$a0,$s8
diff --git a/test/MC/Mips/mips64r2/valid-xfail.s b/test/MC/Mips/mips64r2/valid-xfail.s
index 9d9d6cd..9ac47f6 100644
--- a/test/MC/Mips/mips64r2/valid-xfail.s
+++ b/test/MC/Mips/mips64r2/valid-xfail.s
@@ -5,312 +5,307 @@
 # RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r2 | not FileCheck %s
 # CHECK-NOT: encoding
 # XFAIL: *
-# REQUIRES: asserts
 
-	.set noat
-	abs.ps	$f22,$f8
-	absq_s.ph	$t0,$a0
-	absq_s.qb	$t7,$s1
-	absq_s.w	$s3,$ra
-	add.ps	$f25,$f27,$f13
-	addq.ph	$s1,$t7,$at
-	addq_s.ph	$s3,$s6,$s2
-	addq_s.w	$a2,$t0,$at
-	addqh.ph	$s4,$t6,$s1
-	addqh.w	$s7,$s7,$k1
-	addqh_r.ph	$sp,$t9,$s8
-	addqh_r.w	$t0,$v1,$zero
-	addsc	$s8,$t7,$t4
-	addu.ph	$a2,$t6,$s3
-	addu.qb	$s6,$v1,$v1
-	addu_s.ph	$a3,$s3,$gp
-	addu_s.qb	$s4,$s8,$s1
-	adduh.qb	$a1,$a1,$at
-	adduh_r.qb	$a0,$t1,$t4
-	addwc	$k0,$s6,$s7
-	alnv.ob	$v22,$v19,$v30,$v1
-	alnv.ob	$v31,$v23,$v30,$at
-	alnv.ob	$v8,$v17,$v30,$a1
-	alnv.ps	$f12,$f18,$f30,$t4
-	and.v	$w10,$w25,$w29
-	bitrev	$t6,$at
-	bmnz.v	$w15,$w2,$w28
-	bmz.v	$w13,$w11,$w21
-	bsel.v	$w28,$w7,$w0
-	c.eq.d	$fcc1,$f15,$f15
-	c.eq.ps	$fcc5,$f0,$f9
-	c.eq.s	$fcc5,$f24,$f17
-	c.f.d	$fcc4,$f11,$f21
-	c.f.ps	$fcc6,$f11,$f11
-	c.f.s	$fcc4,$f30,$f7
-	c.le.d	$fcc4,$f18,$f1
-	c.le.ps	$fcc1,$f7,$f20
-	c.le.s	$fcc6,$f24,$f4
-	c.lt.d	$fcc3,$f9,$f3
-	c.lt.ps	$f19,$f5
-	c.lt.s	$fcc2,$f17,$f14
-	c.nge.d	$fcc5,$f21,$f16
-	c.nge.ps	$f1,$f26
-	c.nge.s	$fcc3,$f11,$f8
-	c.ngl.ps	$f21,$f30
-	c.ngl.s	$fcc2,$f31,$f23
-	c.ngle.ps	$fcc7,$f12,$f20
-	c.ngle.s	$fcc2,$f18,$f23
-	c.ngt.d	$fcc4,$f24,$f7
-	c.ngt.ps	$fcc5,$f30,$f6
-	c.ngt.s	$fcc5,$f8,$f13
-	c.ole.d	$fcc2,$f16,$f31
-	c.ole.ps	$fcc7,$f21,$f8
-	c.ole.s	$fcc3,$f7,$f20
-	c.olt.d	$fcc4,$f19,$f28
-	c.olt.ps	$fcc3,$f7,$f16
-	c.olt.s	$fcc6,$f20,$f7
-	c.seq.d	$fcc4,$f31,$f7
-	c.seq.ps	$fcc6,$f31,$f14
-	c.seq.s	$fcc7,$f1,$f25
-	c.sf.ps	$fcc6,$f4,$f6
-	c.ueq.d	$fcc4,$f13,$f25
-	c.ueq.ps	$fcc1,$f5,$f29
-	c.ueq.s	$fcc6,$f3,$f30
-	c.ule.d	$fcc7,$f25,$f18
-	c.ule.ps	$fcc6,$f17,$f3
-	c.ule.s	$fcc7,$f21,$f30
-	c.ult.d	$fcc6,$f6,$f17
-	c.ult.ps	$fcc7,$f14,$f0
-	c.ult.s	$fcc7,$f24,$f10
-	c.un.d	$fcc6,$f23,$f24
-	c.un.ps	$fcc4,$f2,$f26
-	c.un.s	$fcc1,$f30,$f4
-	cvt.ps.s	$f3,$f18,$f19
-	cmp.eq.ph	$s7,$t6
-	cmp.le.ph	$t0,$t6
-	cmp.lt.ph	$k0,$sp
-	cmpgdu.eq.qb	$s3,$zero,$k0
-	cmpgdu.le.qb	$v1,$t7,$s2
-	cmpgdu.lt.qb	$s0,$gp,$sp
-	cmpgu.eq.qb	$t6,$s6,$s8
-	cmpgu.le.qb	$t1,$a3,$s4
-	cmpgu.lt.qb	$sp,$at,$t0
-	cmpu.eq.qb	$v0,$t8
-	cmpu.le.qb	$s1,$a1
-	cmpu.lt.qb	$at,$a3
-	cvt.s.pl	$f30,$f1
-	cvt.s.pu	$f14,$f25
-	dmfc0	$t2,c0_watchhi,2
-	dmfgc0	$gp,c0_perfcnt,6
-	dmt	$k0
-	dmtc0	$t7,c0_datalo
-	dmtgc0	$a2,c0_watchlo,2
-	dpa.w.ph	$ac1,$s7,$k0
-	dpaq_s.w.ph	$ac2,$a0,$t5
-	dpaq_sa.l.w	$ac0,$a2,$t6
-	dpaqx_s.w.ph	$ac3,$a0,$t8
-	dpaqx_sa.w.ph	$ac1,$zero,$s5
-	dpau.h.qbl	$ac1,$t2,$t8
-	dpau.h.qbr	$ac1,$s7,$s6
-	dpax.w.ph	$ac3,$a0,$k0
-	dps.w.ph	$ac1,$a3,$a1
-	dpsq_s.w.ph	$ac0,$gp,$k0
-	dpsq_sa.l.w	$ac0,$a3,$t7
-	dpsqx_s.w.ph	$ac3,$t5,$a3
-	dpsqx_sa.w.ph	$ac3,$sp,$s2
-	dpsu.h.qbl	$ac2,$t6,$t2
-	dpsu.h.qbr	$ac2,$a1,$s6
-	dpsx.w.ph	$ac0,$s7,$gp
-	drorv	$at,$a1,$s7
-	dvpe	$s6
-	emt	$t0
-	evpe	$v0
-	extpdpv	$s6,$ac0,$s8
-	extpv	$t5,$ac0,$t6
-	extrv.w	$t0,$ac3,$at
-	extrv_r.w	$t0,$ac1,$s6
-	extrv_rs.w	$gp,$ac1,$s6
-	extrv_s.h	$s2,$ac1,$t6
-	fclass.d	$w14,$w27
-	fclass.w	$w19,$w28
-	fexupl.d	$w10,$w29
-	fexupl.w	$w12,$w27
-	fexupr.d	$w31,$w15
-	fexupr.w	$w29,$w12
-	ffint_s.d	$w1,$w30
-	ffint_s.w	$w16,$w14
-	ffint_u.d	$w23,$w18
-	ffint_u.w	$w19,$w12
-	ffql.d	$w2,$w3
-	ffql.w	$w9,$w0
-	ffqr.d	$w25,$w24
-	ffqr.w	$w10,$w6
-	fill.b	$w9,$v1
-	fill.d	$w28,$t0
-	fill.h	$w9,$t0
-	fill.w	$w31,$t7
-	flog2.d	$w12,$w16
-	flog2.w	$w19,$w23
-	fork	$s2,$t0,$a0
-	frcp.d	$w12,$w4
-	frcp.w	$w30,$w8
-	frint.d	$w20,$w8
-	frint.w	$w11,$w29
-	frsqrt.d	$w29,$w2
-	frsqrt.w	$w9,$w8
-	fsqrt.d	$w3,$w1
-	fsqrt.w	$w5,$w15
-	ftint_s.d	$w31,$w26
-	ftint_s.w	$w27,$w14
-	ftint_u.d	$w5,$w31
-	ftint_u.w	$w12,$w29
-	ftrunc_s.d	$w4,$w22
-	ftrunc_s.w	$w24,$w7
-	ftrunc_u.d	$w20,$w25
-	ftrunc_u.w	$w7,$w26
-	insv	$s2,$at
-	iret
-	lbe	$t6,122($t1)
-	lbue	$t3,-108($t2)
-	lbux	$t1,$t6($v0)
-	lhe	$s6,219($v1)
-	lhue	$gp,118($t3)
-	lhx	$sp,$k0($t7)
-	lle	$gp,-237($ra)
-	lwe	$ra,-145($t6)
-	lwle	$t3,-42($t3)
-	lwre	$sp,-152($t8)
-	lwx	$t4,$t4($s4)
-	madd.d	$f18,$f19,$f26,$f20
-	madd.ps	$f22,$f3,$f14,$f3
-	maq_s.w.phl	$ac2,$t9,$t3
-	maq_s.w.phr	$ac0,$t2,$t9
-	maq_sa.w.phl	$ac3,$a1,$v1
-	maq_sa.w.phr	$ac1,$at,$t2
-	mfgc0	$s6,c0_datahi1
-	mflo	$t1,$ac2
-	modsub	$a3,$t4,$a3
-	mov.ps	$f22,$f17
-	movf.ps	$f10,$f28,$fcc6
-	movn.ps	$f31,$f31,$s3
-	movt.ps	$f20,$f25,$fcc2
-	movz.ps	$f18,$f17,$ra
-	msgn.qh	$v0,$v24,$v20
-	msgn.qh	$v12,$v21,$v0[1]
-	msub	$ac2,$sp,$t6
-	msub.d	$f10,$f1,$f31,$f18
-	msub.ps	$f12,$f14,$f29,$f17
-	msubu	$ac2,$a1,$t8
-	mtc0	$t1,c0_datahi1
-	mtgc0	$s4,$21,7
-	mthi	$v0,$ac1
-	mthlip	$a3,$ac0
-	mul.ph	$s4,$t8,$s0
-	mul.ps	$f14,$f0,$f16
-	mul_s.ph	$t2,$t6,$t7
-	muleq_s.w.phl	$t3,$s4,$s4
-	muleq_s.w.phr	$s6,$a0,$s8
-	muleu_s.ph.qbl	$a2,$t6,$t0
-	muleu_s.ph.qbr	$a1,$ra,$t1
-	mulq_rs.ph	$s2,$t6,$t7
-	mulq_rs.w	$at,$s4,$t9
-	mulq_s.ph	$s0,$k1,$t7
-	mulq_s.w	$t1,$a3,$s0
-	mulsa.w.ph	$ac1,$s4,$s6
-	mulsaq_s.w.ph	$ac0,$ra,$s2
-	neg.ps	$f19,$f13
-	nloc.b	$w12,$w30
-	nloc.d	$w16,$w7
-	nloc.h	$w21,$w17
-	nloc.w	$w17,$w16
-	nlzc.b	$w12,$w7
-	nlzc.d	$w14,$w14
-	nlzc.h	$w24,$w24
-	nlzc.w	$w10,$w4
-	nmadd.d	$f18,$f9,$f14,$f19
-	nmadd.ps	$f27,$f4,$f9,$f25
-	nmsub.d	$f30,$f8,$f16,$f30
-	nmsub.ps	$f6,$f12,$f14,$f17
-	nor.v	$w20,$w20,$w15
-	or.v	$w13,$w23,$w12
-	packrl.ph	$ra,$t8,$t6
-	pcnt.b	$w30,$w15
-	pcnt.d	$w5,$w16
-	pcnt.h	$w20,$w24
-	pcnt.w	$w22,$w20
-	pick.ph	$ra,$a2,$gp
-	pick.qb	$t3,$a0,$gp
-	pll.ps	$f25,$f9,$f30
-	plu.ps	$f1,$f26,$f29
-	preceq.w.phl	$s8,$gp
-	preceq.w.phr	$s5,$t7
-	precequ.ph.qbl	$s7,$ra
-	precequ.ph.qbla	$a0,$t1
-	precequ.ph.qbr	$ra,$s3
-	precequ.ph.qbra	$t8,$t0
-	preceu.ph.qbl	$sp,$t0
-	preceu.ph.qbla	$s6,$t3
-	preceu.ph.qbr	$gp,$s1
-	preceu.ph.qbra	$k1,$s0
-	precr.qb.ph	$v0,$t4,$s8
-	precrq.ph.w	$t6,$s8,$t8
-	precrq.qb.ph	$a2,$t4,$t4
-	precrq_rs.ph.w	$a1,$k0,$a3
-	precrqu_s.qb.ph	$zero,$gp,$s5
-	pul.ps	$f9,$f30,$f26
-	puu.ps	$f24,$f9,$f2
-	raddu.w.qb	$t9,$s3
-	rdpgpr	$s3,$t1
-	recip.d	$f19,$f6
-	recip.s	$f3,$f30
-	repl.ph	$at,-307
-	replv.ph	$v1,$s7
-	replv.qb	$t9,$t4
-	rorv	$t5,$a3,$s5
-	rsqrt.d	$f3,$f28
-	rsqrt.s	$f4,$f8
-	sbe	$s7,33($s1)
-	sce	$sp,189($t2)
-	she	$t8,105($v0)
-	shilo	$ac1,26
-	shilov	$ac2,$t2
-	shllv.ph	$t2,$s0,$s0
-	shllv.qb	$gp,$v1,$zero
-	shllv_s.ph	$k1,$at,$t5
-	shllv_s.w	$s1,$ra,$k0
-	shrav.ph	$t9,$s2,$s1
-	shrav.qb	$zero,$t8,$t3
-	shrav_r.ph	$s3,$t3,$t9
-	shrav_r.qb	$a0,$sp,$s5
-	shrav_r.w	$s7,$s4,$s6
-	shrlv.ph	$t6,$t2,$t1
-	shrlv.qb	$a2,$s2,$t3
-	sub.ps	$f5,$f14,$f26
-	subq.ph	$ra,$t1,$s8
-	subq_s.ph	$t5,$s8,$s5
-	subq_s.w	$k1,$a2,$a3
-	subqh.ph	$t2,$at,$t1
-	subqh.w	$v0,$a2,$zero
-	subqh_r.ph	$a0,$t4,$s6
-	subqh_r.w	$t2,$a2,$gp
-	subu.ph	$t1,$s6,$s4
-	subu.qb	$s6,$a2,$s6
-	subu_s.ph	$v1,$a1,$s3
-	subu_s.qb	$s1,$at,$ra
-	subuh.qb	$zero,$gp,$gp
-	subuh_r.qb	$s4,$s8,$s6
-	swe	$t8,94($k0)
-	swle	$v1,-209($gp)
-	swre	$k0,-202($s2)
-	synci	20023($s0)
-	tlbginv
-	tlbginvf
-	tlbgp
-	tlbgr
-	tlbgwi
-	tlbgwr
-	tlbinv
-	tlbinvf
-	tlbp
-	tlbr
-	tlbwi
-	tlbwr
-	wrpgpr	$zero,$t5
-	xor.v	$w20,$w21,$w30
-	yield	$v1,$s0
+        .set noat
+        abs.ps          $f22,$f8
+        absq_s.ph       $8,$a0
+        absq_s.qb       $15,$s1
+        absq_s.w        $s3,$ra
+        add.ps          $f25,$f27,$f13
+        addq.ph         $s1,$15,$at
+        addq_s.ph       $s3,$s6,$s2
+        addq_s.w        $a2,$8,$at
+        addqh.ph        $s4,$14,$s1
+        addqh.w         $s7,$s7,$k1
+        addqh_r.ph      $sp,$25,$s8
+        addqh_r.w       $8,$v1,$zero
+        addsc           $s8,$15,$12
+        addu.ph         $a2,$14,$s3
+        addu.qb         $s6,$v1,$v1
+        addu_s.ph       $a3,$s3,$gp
+        addu_s.qb       $s4,$s8,$s1
+        adduh.qb        $a1,$a1,$at
+        adduh_r.qb      $a0,$9,$12
+        addwc           $k0,$s6,$s7
+        alnv.ob         $v22,$v19,$v30,$v1
+        alnv.ob         $v31,$v23,$v30,$at
+        alnv.ob         $v8,$v17,$v30,$a1
+        alnv.ps         $f12,$f18,$f30,$12
+        and.v           $w10,$w25,$w29
+        bitrev          $14,$at
+        bmnz.v          $w15,$w2,$w28
+        bmz.v           $w13,$w11,$w21
+        bsel.v          $w28,$w7,$w0
+        c.eq.d          $fcc1,$f15,$f15
+        c.eq.ps         $fcc5,$f0,$f9
+        c.eq.s          $fcc5,$f24,$f17
+        c.f.d           $fcc4,$f11,$f21
+        c.f.ps          $fcc6,$f11,$f11
+        c.f.s           $fcc4,$f30,$f7
+        c.le.d          $fcc4,$f18,$f1
+        c.le.ps         $fcc1,$f7,$f20
+        c.le.s          $fcc6,$f24,$f4
+        c.lt.d          $fcc3,$f9,$f3
+        c.lt.ps         $f19,$f5
+        c.lt.s          $fcc2,$f17,$f14
+        c.nge.d         $fcc5,$f21,$f16
+        c.nge.ps        $f1,$f26
+        c.nge.s         $fcc3,$f11,$f8
+        c.ngl.ps        $f21,$f30
+        c.ngl.s         $fcc2,$f31,$f23
+        c.ngle.ps       $fcc7,$f12,$f20
+        c.ngle.s        $fcc2,$f18,$f23
+        c.ngt.d         $fcc4,$f24,$f7
+        c.ngt.ps        $fcc5,$f30,$f6
+        c.ngt.s         $fcc5,$f8,$f13
+        c.ole.d         $fcc2,$f16,$f31
+        c.ole.ps        $fcc7,$f21,$f8
+        c.ole.s         $fcc3,$f7,$f20
+        c.olt.d         $fcc4,$f19,$f28
+        c.olt.ps        $fcc3,$f7,$f16
+        c.olt.s         $fcc6,$f20,$f7
+        c.seq.d         $fcc4,$f31,$f7
+        c.seq.ps        $fcc6,$f31,$f14
+        c.seq.s         $fcc7,$f1,$f25
+        c.sf.ps         $fcc6,$f4,$f6
+        c.ueq.d         $fcc4,$f13,$f25
+        c.ueq.ps        $fcc1,$f5,$f29
+        c.ueq.s         $fcc6,$f3,$f30
+        c.ule.d         $fcc7,$f25,$f18
+        c.ule.ps        $fcc6,$f17,$f3
+        c.ule.s         $fcc7,$f21,$f30
+        c.ult.d         $fcc6,$f6,$f17
+        c.ult.ps        $fcc7,$f14,$f0
+        c.ult.s         $fcc7,$f24,$f10
+        c.un.d          $fcc6,$f23,$f24
+        c.un.ps         $fcc4,$f2,$f26
+        c.un.s          $fcc1,$f30,$f4
+        cvt.ps.s        $f3,$f18,$f19
+        cmp.eq.ph       $s7,$14
+        cmp.le.ph       $8,$14
+        cmp.lt.ph       $k0,$sp
+        cmpgdu.eq.qb    $s3,$zero,$k0
+        cmpgdu.le.qb    $v1,$15,$s2
+        cmpgdu.lt.qb    $s0,$gp,$sp
+        cmpgu.eq.qb     $14,$s6,$s8
+        cmpgu.le.qb     $9,$a3,$s4
+        cmpgu.lt.qb     $sp,$at,$8
+        cmpu.eq.qb      $v0,$24
+        cmpu.le.qb      $s1,$a1
+        cmpu.lt.qb      $at,$a3
+        cvt.s.pl        $f30,$f1
+        cvt.s.pu        $f14,$f25
+        dmfc0           $10,c0_watchhi,2
+        dmfgc0          $gp,c0_perfcnt,6
+        dmt $k0
+        dmtc0           $15,c0_datalo
+        dmtgc0          $a2,c0_watchlo,2
+        dpa.w.ph        $ac1,$s7,$k0
+        dpaq_s.w.ph     $ac2,$a0,$13
+        dpaq_sa.l.w     $ac0,$a2,$14
+        dpaqx_s.w.ph    $ac3,$a0,$24
+        dpaqx_sa.w.ph   $ac1,$zero,$s5
+        dpau.h.qbl      $ac1,$10,$24
+        dpau.h.qbr      $ac1,$s7,$s6
+        dpax.w.ph       $ac3,$a0,$k0
+        dps.w.ph        $ac1,$a3,$a1
+        dpsq_s.w.ph     $ac0,$gp,$k0
+        dpsq_sa.l.w     $ac0,$a3,$15
+        dpsqx_s.w.ph    $ac3,$13,$a3
+        dpsqx_sa.w.ph   $ac3,$sp,$s2
+        dpsu.h.qbl      $ac2,$14,$10
+        dpsu.h.qbr      $ac2,$a1,$s6
+        dpsx.w.ph       $ac0,$s7,$gp
+        drorv           $at,$a1,$s7
+        dvpe            $s6
+        emt $8
+        evpe            $v0
+        extpdpv         $s6,$ac0,$s8
+        extpv           $13,$ac0,$14
+        extrv.w         $8,$ac3,$at
+        extrv_r.w       $8,$ac1,$s6
+        extrv_rs.w      $gp,$ac1,$s6
+        extrv_s.h       $s2,$ac1,$14
+        fclass.d        $w14,$w27
+        fclass.w        $w19,$w28
+        fexupl.d        $w10,$w29
+        fexupl.w        $w12,$w27
+        fexupr.d        $w31,$w15
+        fexupr.w        $w29,$w12
+        ffint_s.d       $w1,$w30
+        ffint_s.w       $w16,$w14
+        ffint_u.d       $w23,$w18
+        ffint_u.w       $w19,$w12
+        ffql.d          $w2,$w3
+        ffql.w          $w9,$w0
+        ffqr.d          $w25,$w24
+        ffqr.w          $w10,$w6
+        fill.b          $w9,$v1
+        fill.d          $w28,$8
+        fill.h          $w9,$8
+        fill.w          $w31,$15
+        flog2.d         $w12,$w16
+        flog2.w         $w19,$w23
+        fork            $s2,$8,$a0
+        frcp.d          $w12,$w4
+        frcp.w          $w30,$w8
+        frint.d         $w20,$w8
+        frint.w         $w11,$w29
+        frsqrt.d        $w29,$w2
+        frsqrt.w        $w9,$w8
+        fsqrt.d         $w3,$w1
+        fsqrt.w         $w5,$w15
+        ftint_s.d       $w31,$w26
+        ftint_s.w       $w27,$w14
+        ftint_u.d       $w5,$w31
+        ftint_u.w       $w12,$w29
+        ftrunc_s.d      $w4,$w22
+        ftrunc_s.w      $w24,$w7
+        ftrunc_u.d      $w20,$w25
+        ftrunc_u.w      $w7,$w26
+        insv            $s2,$at
+        iret
+        lbe $14,122($9)
+        lbue            $11,-108($10)
+        lbux            $9,$14($v0)
+        lhe $s6,219($v1)
+        lhue            $gp,118($11)
+        lhx $sp,$k0($15)
+        lle $gp,-237($ra)
+        lwe $ra,-145($14)
+        lwle            $11,-42($11)
+        lwre            $sp,-152($24)
+        lwx $12,$12($s4)
+        madd.d          $f18,$f19,$f26,$f20
+        madd.ps         $f22,$f3,$f14,$f3
+        maq_s.w.phl     $ac2,$25,$11
+        maq_s.w.phr     $ac0,$10,$25
+        maq_sa.w.phl    $ac3,$a1,$v1
+        maq_sa.w.phr    $ac1,$at,$10
+        mfgc0           $s6,c0_datahi1
+        mflo            $9,$ac2
+        modsub          $a3,$12,$a3
+        mov.ps          $f22,$f17
+        movf.ps         $f10,$f28,$fcc6
+        movn.ps         $f31,$f31,$s3
+        movt.ps         $f20,$f25,$fcc2
+        movz.ps         $f18,$f17,$ra
+        msgn.qh         $v0,$v24,$v20
+        msgn.qh         $v12,$v21,$v0[1]
+        msub            $ac2,$sp,$14
+        msub.d          $f10,$f1,$f31,$f18
+        msub.ps         $f12,$f14,$f29,$f17
+        msubu           $ac2,$a1,$24
+        mtc0            $9,c0_datahi1
+        mtgc0           $s4,$21,7
+        mthi            $v0,$ac1
+        mthlip          $a3,$ac0
+        mul.ph          $s4,$24,$s0
+        mul.ps          $f14,$f0,$f16
+        mul_s.ph        $10,$14,$15
+        muleq_s.w.phl   $11,$s4,$s4
+        muleq_s.w.phr   $s6,$a0,$s8
+        muleu_s.ph.qbl  $a2,$14,$8
+        muleu_s.ph.qbr  $a1,$ra,$9
+        mulq_rs.ph      $s2,$14,$15
+        mulq_rs.w       $at,$s4,$25
+        mulq_s.ph       $s0,$k1,$15
+        mulq_s.w        $9,$a3,$s0
+        mulsa.w.ph      $ac1,$s4,$s6
+        mulsaq_s.w.ph   $ac0,$ra,$s2
+        neg.ps          $f19,$f13
+        nloc.b          $w12,$w30
+        nloc.d          $w16,$w7
+        nloc.h          $w21,$w17
+        nloc.w          $w17,$w16
+        nlzc.b          $w12,$w7
+        nlzc.d          $w14,$w14
+        nlzc.h          $w24,$w24
+        nlzc.w          $w10,$w4
+        nmadd.d         $f18,$f9,$f14,$f19
+        nmadd.ps        $f27,$f4,$f9,$f25
+        nmsub.d         $f30,$f8,$f16,$f30
+        nmsub.ps        $f6,$f12,$f14,$f17
+        nor.v           $w20,$w20,$w15
+        or.v            $w13,$w23,$w12
+        packrl.ph       $ra,$24,$14
+        pcnt.b          $w30,$w15
+        pcnt.d          $w5,$w16
+        pcnt.h          $w20,$w24
+        pcnt.w          $w22,$w20
+        pick.ph         $ra,$a2,$gp
+        pick.qb         $11,$a0,$gp
+        pll.ps          $f25,$f9,$f30
+        plu.ps          $f1,$f26,$f29
+        preceq.w.phl    $s8,$gp
+        preceq.w.phr    $s5,$15
+        precequ.ph.qbl  $s7,$ra
+        precequ.ph.qbla $a0,$9
+        precequ.ph.qbr  $ra,$s3
+        precequ.ph.qbra $24,$8
+        preceu.ph.qbl   $sp,$8
+        preceu.ph.qbla  $s6,$11
+        preceu.ph.qbr   $gp,$s1
+        preceu.ph.qbra  $k1,$s0
+        precr.qb.ph     $v0,$12,$s8
+        precrq.ph.w     $14,$s8,$24
+        precrq.qb.ph    $a2,$12,$12
+        precrq_rs.ph.w  $a1,$k0,$a3
+        precrqu_s.qb.ph $zero,$gp,$s5
+        pul.ps          $f9,$f30,$f26
+        puu.ps          $f24,$f9,$f2
+        raddu.w.qb      $25,$s3
+        rdpgpr          $s3,$9
+        recip.d         $f19,$f6
+        recip.s         $f3,$f30
+        repl.ph         $at,-307
+        replv.ph        $v1,$s7
+        replv.qb        $25,$12
+        rorv            $13,$a3,$s5
+        rsqrt.d         $f3,$f28
+        rsqrt.s         $f4,$f8
+        sbe $s7,33($s1)
+        sce $sp,189($10)
+        she $24,105($v0)
+        shilo           $ac1,26
+        shilov          $ac2,$10
+        shllv.ph        $10,$s0,$s0
+        shllv.qb        $gp,$v1,$zero
+        shllv_s.ph      $k1,$at,$13
+        shllv_s.w       $s1,$ra,$k0
+        shrav.ph        $25,$s2,$s1
+        shrav.qb        $zero,$24,$11
+        shrav_r.ph      $s3,$11,$25
+        shrav_r.qb      $a0,$sp,$s5
+        shrav_r.w       $s7,$s4,$s6
+        shrlv.ph        $14,$10,$9
+        shrlv.qb        $a2,$s2,$11
+        sub.ps          $f5,$f14,$f26
+        subq.ph         $ra,$9,$s8
+        subq_s.ph       $13,$s8,$s5
+        subq_s.w        $k1,$a2,$a3
+        subqh.ph        $10,$at,$9
+        subqh.w         $v0,$a2,$zero
+        subqh_r.ph      $a0,$12,$s6
+        subqh_r.w       $10,$a2,$gp
+        subu.ph         $9,$s6,$s4
+        subu.qb         $s6,$a2,$s6
+        subu_s.ph       $v1,$a1,$s3
+        subu_s.qb       $s1,$at,$ra
+        subuh.qb        $zero,$gp,$gp
+        subuh_r.qb      $s4,$s8,$s6
+        swe $24,94($k0)
+        swle            $v1,-209($gp)
+        swre            $k0,-202($s2)
+        synci           20023($s0)
+        tlbginv
+        tlbginvf
+        tlbgp
+        tlbgr
+        tlbgwi
+        tlbgwr
+        tlbinv
+        tlbinvf
+        wrpgpr          $zero,$13
+        xor.v           $w20,$w21,$w30
+        yield           $v1,$s0
diff --git a/test/MC/Mips/mips64r2/valid.s b/test/MC/Mips/mips64r2/valid.s
index 826a6b2..252589d 100644
--- a/test/MC/Mips/mips64r2/valid.s
+++ b/test/MC/Mips/mips64r2/valid.s
@@ -3,189 +3,231 @@
 # RUN: llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r2 | FileCheck %s
 
         .set noat
-	abs.d	$f7,$f25 # CHECK: encoding
-	abs.s	$f9,$f16
-	add	$s7,$s2,$a1
-	add.d	$f1,$f7,$f29
-	add.s	$f8,$f21,$f24
-	addi	$t5,$t1,26322
-	addu	$t1,$a0,$a2
-	and	$s7,$v0,$t4
-	c.ngl.d	$f29,$f29
-	c.ngle.d	$f0,$f16
-	c.sf.d	$f30,$f0
-	c.sf.s	$f14,$f22
-	ceil.l.d	$f1,$f3
-	ceil.l.s	$f18,$f13
-	ceil.w.d	$f11,$f25
-	ceil.w.s	$f6,$f20
-	cfc1	$s1,$21
-	clo	$t3,$a1
-	clz	$sp,$gp
-	ctc1	$a2,$26
-	cvt.d.l	$f4,$f16
-	cvt.d.s	$f22,$f28
-	cvt.d.w	$f26,$f11
-	cvt.l.d	$f24,$f15
-	cvt.l.s	$f11,$f29
-	cvt.s.d	$f26,$f8
-	cvt.s.l	$f15,$f30
-	cvt.s.w	$f22,$f15
-	cvt.w.d	$f20,$f14
-	cvt.w.s	$f20,$f24
-	dadd	$s3,$at,$ra
-	daddi	$sp,$s4,-27705
-	daddiu	$k0,$s6,-4586
-	dclo	$s2,$a2
-	dclz	$s0,$t9
-	deret
-	di	$s8
-	ddiv	$zero,$k0,$s3
-	ddivu	$zero,$s0,$s1
-	div	$zero,$t9,$t3
-	div.d	$f29,$f20,$f27
-	div.s	$f4,$f5,$f15
-	divu	$zero,$t9,$t7
-	dmfc1	$t4,$f13
-	dmtc1	$s0,$f14
-	dmult	$s7,$t1
-	dmultu	$a1,$a2
-	dsbh	$v1,$t6
-	dshd	$v0,$sp
-	dsllv	$zero,$s4,$t4
-	dsrav	$gp,$s2,$s3
-	dsrlv	$s3,$t6,$s4
-	dsub	$a3,$s6,$t0
-	dsubu	$a1,$a1,$k0
-	ehb                      # CHECK: ehb # encoding:  [0x00,0x00,0x00,0xc0]
-	ei	$t6
-	eret
-	floor.l.d	$f26,$f7
-	floor.l.s	$f12,$f5
-	floor.w.d	$f14,$f11
-	floor.w.s	$f8,$f9
-	lb	$t8,-14515($t2)
-	lbu	$t0,30195($v1)
-	ld	$sp,-28645($s1)
-	ldc1	$f11,16391($s0)
-	ldc2	$8,-21181($at)
-	ldl	$t8,-4167($t8)
-	ldr	$t6,-30358($s4)
-	ldxc1	$f8,$s7($t7)
-	lh	$t3,-8556($s5)
-	lhu	$s3,-22851($v0)
-	li	$at,-29773
-	li	$zero,-29889
-	ll	$v0,-7321($s2)
-	lld	$zero,-14736($ra)
-	luxc1	$f19,$s6($s5)
-	lw	$t0,5674($a1)
-	lwc1	$f16,10225($k0)
-	lwc2	$18,-841($a2)
-	lwl	$s4,-4231($t7)
-	lwr	$zero,-19147($gp)
-	lwu	$s3,-24086($v1)
-	lwxc1	$f12,$s1($s8)
-	madd	$s6,$t5
-	madd	$zero,$t1
-	madd.s	$f1,$f31,$f19,$f25
-	maddu	$s3,$gp
-	maddu	$t8,$s2
-	mfc0	$a2,$14,1
-	mfc1	$a3,$f27
-	mfhc1	$s8,$f24
-	mfhi	$s3
-	mfhi	$sp
-	mflo	$s1
-	mov.d	$f20,$f14
-	mov.s	$f2,$f27
-	move	$a0,$a3
-	move	$s5,$a0
-	move	$s8,$a0
-	move	$t9,$a2
-	movf	$gp,$t0,$fcc7
-	movf.d	$f6,$f11,$fcc5
-	movf.s	$f23,$f5,$fcc6
-	movn	$v1,$s1,$s0
-	movn.d	$f27,$f21,$k0
-	movn.s	$f12,$f0,$s7
-	movt	$zero,$s4,$fcc5
-	movt.d	$f0,$f2,$fcc0
-	movt.s	$f30,$f2,$fcc1
-	movz	$a1,$s6,$t1
-	movz.d	$f12,$f29,$t1
-	movz.s	$f25,$f7,$v1
-	msub	$s7,$k1
-	msub.s	$f12,$f19,$f10,$f16
-	msubu	$t7,$a1
-	mtc0	$t1,$29,3
-	mtc1	$s8,$f9
-	mthc1	$zero,$f16
-	mthi	$s1
-	mtlo	$sp
-	mtlo	$t9
-	mul	$s0,$s4,$at
-	mul.d	$f20,$f20,$f16
-	mul.s	$f30,$f10,$f2
-	mult	$sp,$s4
-	mult	$sp,$v0
-	multu	$gp,$k0
-	multu	$t1,$s2
-	neg.d	$f27,$f18
-	neg.s	$f1,$f15
-	nmadd.s	$f0,$f5,$f25,$f12
-	nmsub.s	$f1,$f24,$f19,$f4
-	nop
-	nor	$a3,$zero,$a3
-	or	$t4,$s0,$sp
-	pause                    # CHECK: pause # encoding:  [0x00,0x00,0x01,0x40]
-	rdhwr	$sp,$11
-	round.l.d	$f12,$f1
-	round.l.s	$f25,$f5
-	round.w.d	$f6,$f4
-	round.w.s	$f27,$f28
-	sb	$s6,-19857($t6)
-	sc	$t7,18904($s3)
-	scd	$t7,-8243($sp)
-	sd	$t4,5835($t2)
-	sdc1	$f31,30574($t5)
-	sdc2	$20,23157($s2)
-	sdl	$a3,-20961($s8)
-	sdr	$t3,-20423($t4)
-	sdxc1	$f11,$t2($t6)
-	seb	$t9,$t7
-	seh	$v1,$t4
-	sh	$t6,-6704($t7)
-	sllv	$a3,$zero,$t1
-	slt	$s7,$t3,$k1
-	slti	$s1,$t2,9489
-	sltiu	$t9,$t9,-15531
-	sltu	$s4,$s5,$t3
-	sqrt.d	$f17,$f22
-	sqrt.s	$f0,$f1
-	srav	$s1,$s7,$sp
-	srlv	$t9,$s4,$a0
-	ssnop                    # CHECK: ssnop # encoding:  [0x00,0x00,0x00,0x40]
-	sub	$s6,$s3,$t4
-	sub.d	$f18,$f3,$f17
-	sub.s	$f23,$f22,$f22
-	subu	$sp,$s6,$s6
-	suxc1	$f12,$k1($t5)
-	sw	$ra,-10160($sp)
-	swc1	$f6,-8465($t8)
-	swc2	$25,24880($s0)
-	swl	$t7,13694($s3)
-	swr	$s1,-26590($t6)
-	swxc1	$f19,$t4($k0)
-	teqi	$s5,-17504
-	tgei	$s1,5025
-	tgeiu	$sp,-28621
-	tlti	$t6,-21059
-	tltiu	$ra,-5076
-	tnei	$t4,-29647
-	trunc.l.d	$f23,$f23
-	trunc.l.s	$f28,$f31
-	trunc.w.d	$f22,$f15
-	trunc.w.s	$f28,$f30
-	xor	$s2,$a0,$s8
-	wsbh	$k1,$t1
+        abs.d     $f7,$f25             # CHECK: encoding:
+        abs.s     $f9,$f16
+        add       $s7,$s2,$a1
+        add.d     $f1,$f7,$f29
+        add.s     $f8,$f21,$f24
+        addi      $13,$9,26322
+        addu      $9,$a0,$a2
+        and       $s7,$v0,$12
+        c.ngl.d   $f29,$f29
+        c.ngle.d  $f0,$f16
+        c.sf.d    $f30,$f0
+        c.sf.s    $f14,$f22
+        ceil.l.d  $f1,$f3
+        ceil.l.s  $f18,$f13
+        ceil.w.d  $f11,$f25
+        ceil.w.s  $f6,$f20
+        cfc1      $s1,$21
+        clo       $11,$a1
+        clz       $sp,$gp
+        ctc1      $a2,$26
+        cvt.d.l   $f4,$f16
+        cvt.d.s   $f22,$f28
+        cvt.d.w   $f26,$f11
+        cvt.l.d   $f24,$f15
+        cvt.l.s   $f11,$f29
+        cvt.s.d   $f26,$f8
+        cvt.s.l   $f15,$f30
+        cvt.s.w   $f22,$f15
+        cvt.w.d   $f20,$f14
+        cvt.w.s   $f20,$f24
+        dadd      $s3,$at,$ra
+        daddi     $sp,$s4,-27705
+        daddiu    $k0,$s6,-4586
+        daddu     $s3,$at,$ra
+        dclo      $s2,$a2
+        dclz      $s0,$25
+        deret
+        di        $s8
+        ddiv      $zero,$k0,$s3
+        ddivu     $zero,$s0,$s1
+        div       $zero,$25,$11
+        div.d     $f29,$f20,$f27
+        div.s     $f4,$f5,$f15
+        divu      $zero,$25,$15
+        dmfc1     $12,$f13
+        dmtc1     $s0,$f14
+        dmult     $s7,$9
+        dmultu    $a1,$a2
+        drotr     $1,15                # CHECK: drotr $1, $1, 15            # encoding: [0x00,0x21,0x0b,0xfa]
+        drotr     $1,$14,15            # CHECK: drotr $1, $14, 15           # encoding: [0x00,0x2e,0x0b,0xfa]
+        drotr32   $1,15                # CHECK: drotr32 $1, $1, 15          # encoding: [0x00,0x21,0x0b,0xfe]
+        drotr32   $1,$14,15            # CHECK: drotr32 $1, $14, 15         # encoding: [0x00,0x2e,0x0b,0xfe]
+        drotrv    $1,$14,$15           # CHECK: drotrv $1, $14, $15         # encoding: [0x01,0xee,0x08,0x56]
+        dsbh      $v1,$14
+        dshd      $v0,$sp
+        dsll      $zero,18             # CHECK: dsll $zero, $zero, 18       # encoding: [0x00,0x00,0x04,0xb8]
+        dsll      $zero,$s4,18         # CHECK: dsll $zero, $20, 18         # encoding: [0x00,0x14,0x04,0xb8]
+        dsll      $zero,$s4,$12        # CHECK: dsllv $zero, $20, $12       # encoding: [0x01,0x94,0x00,0x14]
+        dsll32    $zero,18             # CHECK: dsll32 $zero, $zero, 18     # encoding: [0x00,0x00,0x04,0xbc]
+        dsll32    $zero,$zero,18       # CHECK: dsll32 $zero, $zero, 18     # encoding: [0x00,0x00,0x04,0xbc]
+        dsllv     $zero,$s4,$12        # CHECK: dsllv $zero, $20, $12       # encoding: [0x01,0x94,0x00,0x14]
+        dsra      $gp,10               # CHECK: dsra $gp, $gp, 10           # encoding: [0x00,0x1c,0xe2,0xbb]
+        dsra      $gp,$s2,10           # CHECK: dsra $gp, $18, 10           # encoding: [0x00,0x12,0xe2,0xbb]
+        dsra      $gp,$s2,$s3          # CHECK: dsrav $gp, $18, $19         # encoding: [0x02,0x72,0xe0,0x17]
+        dsra32    $gp,10               # CHECK: dsra32 $gp, $gp, 10         # encoding: [0x00,0x1c,0xe2,0xbf]
+        dsra32    $gp,$s2,10           # CHECK: dsra32 $gp, $18, 10         # encoding: [0x00,0x12,0xe2,0xbf]
+        dsrav     $gp,$s2,$s3          # CHECK: dsrav $gp, $18, $19         # encoding: [0x02,0x72,0xe0,0x17]
+        dsrl      $s3,23               # CHECK: dsrl $19, $19, 23           # encoding: [0x00,0x13,0x9d,0xfa]
+        dsrl      $s3,$6,23            # CHECK: dsrl $19, $6, 23            # encoding: [0x00,0x06,0x9d,0xfa]
+        dsrl      $s3,$6,$s4           # CHECK: dsrlv $19, $6, $20          # encoding: [0x02,0x86,0x98,0x16]
+        dsrl32    $s3,23               # CHECK: dsrl32 $19, $19, 23         # encoding: [0x00,0x13,0x9d,0xfe]
+        dsrl32    $s3,$6,23            # CHECK: dsrl32 $19, $6, 23          # encoding: [0x00,0x06,0x9d,0xfe]
+        dsrlv     $s3,$6,$s4           # CHECK: dsrlv $19, $6, $20          # encoding: [0x02,0x86,0x98,0x16]
+        dsub      $a3,$s6,$8
+        dsubu     $a1,$a1,$k0
+        dsub      $a3,$s6,$8
+        dsubu     $a1,$a1,$k0
+        ehb                            # CHECK: ehb # encoding:  [0x00,0x00,0x00,0xc0]
+        ei        $14
+        eret
+        floor.l.d $f26,$f7
+        floor.l.s $f12,$f5
+        floor.w.d $f14,$f11
+        floor.w.s $f8,$f9
+        lb        $24,-14515($10)
+        lbu       $8,30195($v1)
+        ld        $sp,-28645($s1)
+        ldc1      $f11,16391($s0)
+        ldc2      $8,-21181($at)
+        ldl       $24,-4167($24)
+        ldr       $14,-30358($s4)
+        ldxc1     $f8,$s7($15)
+        lh        $11,-8556($s5)
+        lhu       $s3,-22851($v0)
+        li        $at,-29773
+        li        $zero,-29889
+        ll        $v0,-7321($s2)
+        lld       $zero,-14736($ra)
+        luxc1     $f19,$s6($s5)
+        lw        $8,5674($a1)
+        lwc1      $f16,10225($k0)
+        lwc2      $18,-841($a2)
+        lwl       $s4,-4231($15)
+        lwr       $zero,-19147($gp)
+        lwu       $s3,-24086($v1)
+        lwxc1     $f12,$s1($s8)
+        madd      $s6,$13
+        madd      $zero,$9
+        madd.s    $f1,$f31,$f19,$f25
+        maddu     $s3,$gp
+        maddu     $24,$s2
+        mfc0      $a2,$14,1
+        mfc1      $a3,$f27
+        mfhc1     $s8,$f24
+        mfhi      $s3
+        mfhi      $sp
+        mflo      $s1
+        mov.d     $f20,$f14
+        mov.s     $f2,$f27
+        move      $a0,$a3
+        move      $s5,$a0
+        move      $s8,$a0
+        move      $25,$a2
+        movf      $gp,$8,$fcc7
+        movf.d    $f6,$f11,$fcc5
+        movf.s    $f23,$f5,$fcc6
+        movn      $v1,$s1,$s0
+        movn.d    $f27,$f21,$k0
+        movn.s    $f12,$f0,$s7
+        movt      $zero,$s4,$fcc5
+        movt.d    $f0,$f2,$fcc0
+        movt.s    $f30,$f2,$fcc1
+        movz      $a1,$s6,$9
+        movz.d    $f12,$f29,$9
+        movz.s    $f25,$f7,$v1
+        msub      $s7,$k1
+        msub.s    $f12,$f19,$f10,$f16
+        msubu     $15,$a1
+        mtc0      $9,$29,3
+        mtc1      $s8,$f9
+        mthc1     $zero,$f16
+        mthi      $s1
+        mtlo      $sp
+        mtlo      $25
+        mul       $s0,$s4,$at
+        mul.d     $f20,$f20,$f16
+        mul.s     $f30,$f10,$f2
+        mult      $sp,$s4
+        mult      $sp,$v0
+        multu     $gp,$k0
+        multu     $9,$s2
+        negu      $2                   # CHECK: negu $2, $2            # encoding: [0x00,0x02,0x10,0x23]
+        negu      $2,$3                # CHECK: negu $2, $3            # encoding: [0x00,0x03,0x10,0x23]
+        neg.d     $f27,$f18
+        neg.s     $f1,$f15
+        nmadd.s   $f0,$f5,$f25,$f12
+        nmsub.s   $f1,$f24,$f19,$f4
+        nop
+        nor       $a3,$zero,$a3
+        or        $12,$s0,$sp
+        pause                          # CHECK: pause # encoding:  [0x00,0x00,0x01,0x40]
+        rdhwr     $sp,$11
+        rotr      $1,15                # CHECK: rotr $1, $1, 15         # encoding: [0x00,0x21,0x0b,0xc2]
+        rotr      $1,$14,15            # CHECK: rotr $1, $14, 15        # encoding: [0x00,0x2e,0x0b,0xc2]
+        rotrv     $1,$14,$15           # CHECK: rotrv $1, $14, $15      # encoding: [0x01,0xee,0x08,0x46]
+        round.l.d $f12,$f1
+        round.l.s $f25,$f5
+        round.w.d $f6,$f4
+        round.w.s $f27,$f28
+        sb        $s6,-19857($14)
+        sc        $15,18904($s3)
+        scd       $15,-8243($sp)
+        sd        $12,5835($10)
+        sdc1      $f31,30574($13)
+        sdc2      $20,23157($s2)
+        sdl       $a3,-20961($s8)
+        sdr       $11,-20423($12)
+        sdxc1     $f11,$10($14)
+        seb       $25,$15
+        seh       $v1,$12
+        sh        $14,-6704($15)
+        sll       $a3,18               # CHECK: sll $7, $7, 18         # encoding: [0x00,0x07,0x3c,0x80]
+        sll       $a3,$zero,18         # CHECK: sll $7, $zero, 18      # encoding: [0x00,0x00,0x3c,0x80]
+        sll       $a3,$zero,$9         # CHECK: sllv $7, $zero, $9     # encoding: [0x01,0x20,0x38,0x04]
+        sllv      $a3,$zero,$9         # CHECK: sllv $7, $zero, $9     # encoding: [0x01,0x20,0x38,0x04]
+        slt       $s7,$11,$k1          # CHECK: slt $23, $11, $27      # encoding: [0x01,0x7b,0xb8,0x2a]
+        slti      $s1,$10,9489         # CHECK: slti $17, $10, 9489    # encoding: [0x29,0x51,0x25,0x11]
+        sltiu     $25,$25,-15531       # CHECK: sltiu $25, $25, -15531 # encoding: [0x2f,0x39,0xc3,0x55]
+        sltu      $s4,$s5,$11          # CHECK: sltu  $20, $21, $11    # encoding: [0x02,0xab,0xa0,0x2b]
+        sltu      $24,$25,-15531       # CHECK: sltiu $24, $25, -15531 # encoding: [0x2f,0x38,0xc3,0x55]
+        sqrt.d    $f17,$f22
+        sqrt.s    $f0,$f1
+        sra       $s1,15               # CHECK: sra $17, $17, 15       # encoding: [0x00,0x11,0x8b,0xc3]
+        sra       $s1,$s7,15           # CHECK: sra $17, $23, 15       # encoding: [0x00,0x17,0x8b,0xc3]
+        sra       $s1,$s7,$sp          # CHECK: srav $17, $23, $sp     # encoding: [0x03,0xb7,0x88,0x07]
+        srav      $s1,$s7,$sp          # CHECK: srav $17, $23, $sp     # encoding: [0x03,0xb7,0x88,0x07]
+        srl       $2,7                 # CHECK: srl $2, $2, 7          # encoding: [0x00,0x02,0x11,0xc2]
+        srl       $2,$2,7              # CHECK: srl $2, $2, 7          # encoding: [0x00,0x02,0x11,0xc2]
+        srl       $25,$s4,$a0          # CHECK: srlv $25, $20, $4      # encoding: [0x00,0x94,0xc8,0x06]
+        srlv      $25,$s4,$a0          # CHECK: srlv $25, $20, $4      # encoding: [0x00,0x94,0xc8,0x06]
+        ssnop                          # CHECK: ssnop                  # encoding: [0x00,0x00,0x00,0x40]
+        sub       $s6,$s3,$12
+        sub.d     $f18,$f3,$f17
+        sub.s     $f23,$f22,$f22
+        subu      $sp,$s6,$s6
+        suxc1     $f12,$k1($13)
+        sw        $ra,-10160($sp)
+        swc1      $f6,-8465($24)
+        swc2      $25,24880($s0)
+        swl       $15,13694($s3)
+        swr       $s1,-26590($14)
+        swxc1     $f19,$12($k0)
+        teqi      $s5,-17504
+        tgei      $s1,5025
+        tgeiu     $sp,-28621
+        tlbp                           # CHECK: tlbp                   # encoding: [0x42,0x00,0x00,0x08]
+        tlbr                           # CHECK: tlbr                   # encoding: [0x42,0x00,0x00,0x01]
+        tlbwi                          # CHECK: tlbwi                  # encoding: [0x42,0x00,0x00,0x02]
+        tlbwr                          # CHECK: tlbwr                  # encoding: [0x42,0x00,0x00,0x06]
+        tlti      $14,-21059
+        tltiu     $ra,-5076
+        tnei      $12,-29647
+        trunc.l.d $f23,$f23
+        trunc.l.s $f28,$f31
+        trunc.w.d $f22,$f15
+        trunc.w.s $f28,$f30
+        xor       $s2,$a0,$s8
+        wsbh      $k1,$9
diff --git a/test/MC/Mips/mips64r6/invalid-mips1-wrong-error.s b/test/MC/Mips/mips64r6/invalid-mips1-wrong-error.s
new file mode 100644
index 0000000..f7949bb
--- /dev/null
+++ b/test/MC/Mips/mips64r6/invalid-mips1-wrong-error.s
@@ -0,0 +1,15 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r6 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        lwl       $s4,-4231($15)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lwr       $zero,-19147($gp)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        swl       $15,13694($s3)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        swr       $s1,-26590($14)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lwle      $s4,-4231($15)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        lwre      $zero,-19147($gp)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        swle      $15,13694($s3)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        swre      $s1,-26590($14)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
diff --git a/test/MC/Mips/mips64r6/invalid-mips1.s b/test/MC/Mips/mips64r6/invalid-mips1.s
new file mode 100644
index 0000000..1225005
--- /dev/null
+++ b/test/MC/Mips/mips64r6/invalid-mips1.s
@@ -0,0 +1,8 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r6 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        addi      $13,$9,26322        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips64r6/invalid-mips2.s b/test/MC/Mips/mips64r6/invalid-mips2.s
new file mode 100644
index 0000000..0638e78
--- /dev/null
+++ b/test/MC/Mips/mips64r6/invalid-mips2.s
@@ -0,0 +1,14 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r6 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        addi      $13,$9,26322        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        teqi      $s5,-17504          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tgei      $s1,5025            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tgeiu     $sp,-28621          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tlti      $14,-21059          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tltiu     $ra,-5076           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tnei      $12,-29647          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips64r6/invalid-mips3-wrong-error.s b/test/MC/Mips/mips64r6/invalid-mips3-wrong-error.s
new file mode 100644
index 0000000..7424f49
--- /dev/null
+++ b/test/MC/Mips/mips64r6/invalid-mips3-wrong-error.s
@@ -0,0 +1,23 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r6 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        ldl       $s4,-4231($15)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        ldr       $zero,-19147($gp)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        sdl       $15,13694($s3)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        sdr       $s1,-26590($14)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        ldle      $s4,-4231($15)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        ldre      $zero,-19147($gp)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        sdle      $15,13694($s3)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        sdre      $s1,-26590($14)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        lwl       $s4,-4231($15)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lwr       $zero,-19147($gp)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        swl       $15,13694($s3)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        swr       $s1,-26590($14)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lwle      $s4,-4231($15)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        lwre      $zero,-19147($gp)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        swle      $15,13694($s3)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        swre      $s1,-26590($14)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
diff --git a/test/MC/Mips/mips64r6/invalid-mips3.s b/test/MC/Mips/mips64r6/invalid-mips3.s
new file mode 100644
index 0000000..0638e78
--- /dev/null
+++ b/test/MC/Mips/mips64r6/invalid-mips3.s
@@ -0,0 +1,14 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r6 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        addi      $13,$9,26322        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        teqi      $s5,-17504          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tgei      $s1,5025            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tgeiu     $sp,-28621          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tlti      $14,-21059          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tltiu     $ra,-5076           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tnei      $12,-29647          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips64r6/invalid-mips5-wrong-error.s b/test/MC/Mips/mips64r6/invalid-mips5-wrong-error.s
new file mode 100644
index 0000000..6b980e6
--- /dev/null
+++ b/test/MC/Mips/mips64r6/invalid-mips5-wrong-error.s
@@ -0,0 +1,44 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r6 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        abs.ps          $f22,$f8            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        add.ps          $f25,$f27,$f13      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        alnv.ps         $f12,$f18,$f30,$12  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.eq.ps         $fcc5,$f0,$f9       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.f.ps          $fcc6,$f11,$f11     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.le.ps         $fcc1,$f7,$f20      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.lt.ps         $f19,$f5            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.nge.ps        $f1,$f26            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.ngl.ps        $f21,$f30           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.ngle.ps       $fcc7,$f12,$f20     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.ngt.ps        $fcc5,$f30,$f6      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.ole.ps        $fcc7,$f21,$f8      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.olt.ps        $fcc3,$f7,$f16      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.seq.ps        $fcc6,$f31,$f14     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.sf.ps         $fcc6,$f4,$f6       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.ueq.ps        $fcc1,$f5,$f29      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.ule.ps        $fcc6,$f17,$f3      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.ult.ps        $fcc7,$f14,$f0      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        c.un.ps         $fcc4,$f2,$f26      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        cvt.ps.s        $f3,$f18,$f19       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        cvt.ps.pw       $f3,$f18            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        madd.ps         $f22,$f3,$f14,$f3   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        mov.ps          $f22,$f17           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        movf.ps         $f10,$f28,$fcc6     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        movn.ps         $f31,$f31,$s3       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        movt.ps         $f20,$f25,$fcc2     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        movz.ps         $f18,$f17,$ra       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        msub.ps         $f12,$f14,$f29,$f17 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        mul.ps          $f14,$f0,$f16       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        neg.ps          $f19,$f13           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        nmadd.ps        $f27,$f4,$f9,$f25   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        nmsub.ps        $f6,$f12,$f14,$f17  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        pll.ps          $f25,$f9,$f30       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        plu.ps          $f1,$f26,$f29       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        pul.ps          $f9,$f30,$f26       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        puu.ps          $f24,$f9,$f2        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        sub.ps          $f5,$f14,$f26       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
diff --git a/test/MC/Mips/mips64r6/relocations.s b/test/MC/Mips/mips64r6/relocations.s
new file mode 100644
index 0000000..db84715
--- /dev/null
+++ b/test/MC/Mips/mips64r6/relocations.s
@@ -0,0 +1,55 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips64r6 \
+# RUN:   | FileCheck %s -check-prefix=CHECK-FIXUP
+# RUN: llvm-mc %s -filetype=obj -triple=mips-unknown-linux -mcpu=mips64r6 \
+# RUN:   | llvm-readobj -r | FileCheck %s -check-prefix=CHECK-ELF
+#------------------------------------------------------------------------------
+# Check that the assembler can handle the documented syntax for fixups.
+#------------------------------------------------------------------------------
+# CHECK-FIXUP: beqc $5, $6, bar # encoding: [0x20,0xa6,A,A]
+# CHECK-FIXUP:                  #   fixup A - offset: 0,
+# CHECK-FIXUP:                      value: bar, kind: fixup_Mips_PC16
+# CHECK-FIXUP: bnec $5, $6, bar # encoding: [0x60,0xa6,A,A]
+# CHECK-FIXUP:                  #   fixup A - offset: 0,
+# CHECK-FIXUP:                      value: bar, kind: fixup_Mips_PC16
+# CHECK-FIXUP: beqzc $9, bar    # encoding: [0xd9,0b001AAAAA,A,A]
+# CHECK-FIXUP:                  #   fixup A - offset: 0,
+# CHECK-FIXUP:                      value: bar, kind: fixup_MIPS_PC21_S2
+# CHECK-FIXUP: bnezc $9, bar    # encoding: [0xf9,0b001AAAAA,A,A]
+# CHECK-FIXUP:                  #   fixup A - offset: 0,
+# CHECK-FIXUP:                      value: bar, kind: fixup_MIPS_PC21_S2
+# CHECK-FIXUP: balc  bar        # encoding: [0b111010AA,A,A,A]
+# CHECK-FIXUP:                  #   fixup A - offset: 0,
+# CHECK-FIXUP:                      value: bar, kind: fixup_MIPS_PC26_S2
+# CHECK-FIXUP: bc    bar        # encoding: [0b110010AA,A,A,A]
+# CHECK-FIXUP:                  #   fixup A - offset: 0,
+# CHECK-FIXUP:                      value: bar, kind: fixup_MIPS_PC26_S2
+# CHECK-FIXUP: aluipc $2, %pcrel_hi(bar)    # encoding: [0xec,0x5f,A,A]
+# CHECK-FIXUP:                              #   fixup A - offset: 0,
+# CHECK-FIXUP:                                  value: bar@PCREL_HI16,
+# CHECK-FIXUP:                                  kind: fixup_MIPS_PCHI16
+# CHECK-FIXUP: addiu $2, $2, %pcrel_lo(bar) # encoding: [0x24,0x42,A,A]
+# CHECK-FIXUP:                              #   fixup A - offset: 0,
+# CHECK-FIXUP:                                  value: bar@PCREL_LO16,
+# CHECK-FIXUP:                                  kind: fixup_MIPS_PCLO16
+#------------------------------------------------------------------------------
+# Check that the appropriate relocations were created.
+#------------------------------------------------------------------------------
+# CHECK-ELF: Relocations [
+# CHECK-ELF:     0x0 R_MIPS_PC16 bar 0x0
+# CHECK-ELF:     0x4 R_MIPS_PC16 bar 0x0
+# CHECK-ELF:     0x8 R_MIPS_PC21_S2 bar 0x0
+# CHECK-ELF:     0xC R_MIPS_PC21_S2 bar 0x0
+# CHECK-ELF:     0x10 R_MIPS_PC26_S2 bar 0x0
+# CHECK-ELF:     0x14 R_MIPS_PC26_S2 bar 0x0
+# CHECK-ELF:     0x18 R_MIPS_PCHI16 bar 0x0
+# CHECK-ELF:     0x1C R_MIPS_PCLO16 bar 0x0
+# CHECK-ELF: ]
+
+  beqc  $5, $6, bar
+  bnec  $5, $6, bar
+  beqzc $9, bar
+  bnezc $9, bar
+  balc  bar
+  bc    bar
+  aluipc $2, %pcrel_hi(bar)
+  addiu  $2, $2, %pcrel_lo(bar)
diff --git a/test/MC/Mips/mips64r6/valid-xfail.s b/test/MC/Mips/mips64r6/valid-xfail.s
new file mode 100644
index 0000000..a751225
--- /dev/null
+++ b/test/MC/Mips/mips64r6/valid-xfail.s
@@ -0,0 +1,19 @@
+# Instructions that should be valid but currently fail for known reasons (e.g.
+# they aren't implemented yet).
+# This test is set up to XPASS if any instruction generates an encoding.
+#
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r6 | not FileCheck %s
+# CHECK-NOT: encoding
+# XFAIL: *
+
+        .set noat
+        bovc     $0, $2, 4       # TODO: bovc $0, $2, 4      # encoding: [0x20,0x40,0x00,0x01]
+        bovc     $2, $4, 4       # TODO: bovc $2, $4, 4      # encoding: [0x20,0x82,0x00,0x01]
+        bnvc     $0, $2, 4       # TODO: bnvc $0, $2, 4      # encoding: [0x60,0x40,0x00,0x01]
+        bnvc     $2, $4, 4       # TODO: bnvc $2, $4, 4      # encoding: [0x60,0x82,0x00,0x01]
+        beqc    $0, $6, 256      # TODO: beqc $6, $zero, 256 # encoding: [0x20,0xc0,0x00,0x40]
+        beqc    $5, $0, 256      # TODO: beqc $5, $zero, 256 # encoding: [0x20,0xa0,0x00,0x40]
+        beqc    $6, $5, 256      # TODO: beqc $5, $6, 256    # encoding: [0x20,0xa6,0x00,0x40]
+        bnec    $0, $6, 256      # TODO: bnec $6, $zero, 256 # encoding: [0x60,0xc0,0x00,0x40]
+        bnec    $5, $0, 256      # TODO: bnec $5, $zero, 256 # encoding: [0x60,0xa0,0x00,0x40]
+        bnec    $6, $5, 256      # TODO: bnec $5, $6, 256    # encoding: [0x60,0xa6,0x00,0x40]
diff --git a/test/MC/Mips/mips64r6/valid.s b/test/MC/Mips/mips64r6/valid.s
new file mode 100644
index 0000000..efdfc7f
--- /dev/null
+++ b/test/MC/Mips/mips64r6/valid.s
@@ -0,0 +1,139 @@
+# Instructions that are valid
+#
+# Branches have some unusual encoding rules in MIPS32r6 so we need to test:
+#   rs == 0
+#   rs != 0
+#   rt == 0
+#   rt != 0
+#   rs < rt
+#   rs == rt
+#   rs > rt
+# appropriately for each branch instruction
+#
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips64r6 | FileCheck %s
+
+        .set noat
+        # FIXME: Add the instructions carried forward from older ISA's
+        addiupc $4, 100          # CHECK: addiupc $4, 100     # encoding: [0xec,0x80,0x00,0x19]
+        align   $4, $2, $3, 2    # CHECK: align $4, $2, $3, 2 # encoding: [0x7c,0x43,0x22,0xa0]
+        aluipc  $3, 56           # CHECK: aluipc $3, 56       # encoding: [0xec,0x7f,0x00,0x38]
+        aui     $3,$2,-23        # CHECK: aui $3, $2, -23     # encoding: [0x3c,0x62,0xff,0xe9]
+        auipc   $3, -1           # CHECK: auipc $3, -1        # encoding: [0xec,0x7e,0xff,0xff]
+        balc 14572256            # CHECK: balc 14572256       # encoding: [0xe8,0x37,0x96,0xb8]
+        bc 14572256              # CHECK: bc 14572256         # encoding: [0xc8,0x37,0x96,0xb8]
+        bc1eqz  $f0,4            # CHECK: bc1eqz $f0, 4       # encoding: [0x45,0x20,0x00,0x01]
+        bc1eqz  $f31,4           # CHECK: bc1eqz $f31, 4      # encoding: [0x45,0x3f,0x00,0x01]
+        bc1nez  $f0,4            # CHECK: bc1nez $f0, 4       # encoding: [0x45,0xa0,0x00,0x01]
+        bc1nez  $f31,4           # CHECK: bc1nez $f31, 4      # encoding: [0x45,0xbf,0x00,0x01]
+        bc2eqz  $0,8             # CHECK: bc2eqz $0, 8        # encoding: [0x49,0x20,0x00,0x02]
+        bc2eqz  $31,8            # CHECK: bc2eqz $31, 8       # encoding: [0x49,0x3f,0x00,0x02]
+        bc2nez  $0,8             # CHECK: bc2nez $0, 8        # encoding: [0x49,0xa0,0x00,0x02]
+        bc2nez  $31,8            # CHECK: bc2nez $31, 8       # encoding: [0x49,0xbf,0x00,0x02]
+        # beqc requires rs < rt && rs != 0 but we also accept when this is not true. See also bovc
+        # FIXME: Testcases are in valid-xfail.s at the moment
+        beqc $5, $6, 256         # CHECK: beqc $5, $6, 256    # encoding: [0x20,0xa6,0x00,0x40]
+        beqzalc $2, 1332         # CHECK: beqzalc $2, 1332    # encoding: [0x20,0x02,0x01,0x4d]
+        # bnec requires rs < rt && rs != 0 but we accept when this is not true. See also bnvc
+        # FIXME: Testcases are in valid-xfail.s at the moment
+        bnec $5, $6, 256         # CHECK: bnec $5, $6, 256    # encoding: [0x60,0xa6,0x00,0x40]
+        bnezalc $2, 1332         # CHECK: bnezalc $2, 1332    # encoding: [0x60,0x02,0x01,0x4d]
+        beqzc $5, 72256          # CHECK: beqzc $5, 72256     # encoding: [0xd8,0xa0,0x46,0x90]
+        bgezalc $2, 1332         # CHECK: bgezalc $2, 1332    # encoding: [0x18,0x42,0x01,0x4d]
+        bnezc $5, 72256          # CHECK: bnezc $5, 72256     # encoding: [0xf8,0xa0,0x46,0x90]
+        bltzc $5, 256            # CHECK: bltzc $5, 256       # encoding: [0x5c,0xa5,0x00,0x40]
+        bgezc $5, 256            # CHECK: bgezc $5, 256       # encoding: [0x58,0xa5,0x00,0x40]
+        bgtzalc $2, 1332         # CHECK: bgtzalc $2, 1332    # encoding: [0x1c,0x02,0x01,0x4d]
+        blezc $5, 256            # CHECK: blezc $5, 256       # encoding: [0x58,0x05,0x00,0x40]
+        bltzalc $2, 1332         # CHECK: bltzalc $2, 1332    # encoding: [0x1c,0x42,0x01,0x4d]
+        bgtzc $5, 256            # CHECK: bgtzc $5, 256       # encoding: [0x5c,0x05,0x00,0x40]
+        bitswap $4, $2           # CHECK: bitswap $4, $2      # encoding: [0x7c,0x02,0x20,0x20]
+        blezalc $2, 1332         # CHECK: blezalc $2, 1332    # encoding: [0x18,0x02,0x01,0x4d]
+        # bnvc requires that rs >= rt but we accept both. See also bnec
+        bnvc     $0, $0, 4       # CHECK: bnvc $zero, $zero, 4 # encoding: [0x60,0x00,0x00,0x01]
+        bnvc     $2, $0, 4       # CHECK: bnvc $2, $zero, 4    # encoding: [0x60,0x40,0x00,0x01]
+        bnvc     $4, $2, 4       # CHECK: bnvc $4, $2, 4      # encoding: [0x60,0x82,0x00,0x01]
+        # bovc requires that rs >= rt but we accept both. See also beqc
+        bovc     $0, $0, 4       # CHECK: bovc $zero, $zero, 4 # encoding: [0x20,0x00,0x00,0x01]
+        bovc     $2, $0, 4       # CHECK: bovc $2, $zero, 4    # encoding: [0x20,0x40,0x00,0x01]
+        bovc     $4, $2, 4       # CHECK: bovc $4, $2, 4      # encoding: [0x20,0x82,0x00,0x01]
+        cmp.f.s    $f2,$f3,$f4      # CHECK: cmp.f.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x80]
+        cmp.f.d    $f2,$f3,$f4      # CHECK: cmp.f.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x80]
+        cmp.un.s   $f2,$f3,$f4      # CHECK: cmp.un.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x81]
+        cmp.un.d   $f2,$f3,$f4      # CHECK: cmp.un.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x81]
+        cmp.eq.s   $f2,$f3,$f4      # CHECK: cmp.eq.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x82]
+        cmp.eq.d   $f2,$f3,$f4      # CHECK: cmp.eq.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x82]
+        cmp.ueq.s  $f2,$f3,$f4      # CHECK: cmp.ueq.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x83]
+        cmp.ueq.d  $f2,$f3,$f4      # CHECK: cmp.ueq.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x83]
+        cmp.olt.s  $f2,$f3,$f4      # CHECK: cmp.olt.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x84]
+        cmp.olt.d  $f2,$f3,$f4      # CHECK: cmp.olt.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x84]
+        cmp.ult.s  $f2,$f3,$f4      # CHECK: cmp.ult.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x85]
+        cmp.ult.d  $f2,$f3,$f4      # CHECK: cmp.ult.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x85]
+        cmp.ole.s  $f2,$f3,$f4      # CHECK: cmp.ole.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x86]
+        cmp.ole.d  $f2,$f3,$f4      # CHECK: cmp.ole.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x86]
+        cmp.ule.s  $f2,$f3,$f4      # CHECK: cmp.ule.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x87]
+        cmp.ule.d  $f2,$f3,$f4      # CHECK: cmp.ule.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x87]
+        cmp.sf.s   $f2,$f3,$f4      # CHECK: cmp.sf.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x88]
+        cmp.sf.d   $f2,$f3,$f4      # CHECK: cmp.sf.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x88]
+        cmp.ngle.s $f2,$f3,$f4      # CHECK: cmp.ngle.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x89]
+        cmp.ngle.d $f2,$f3,$f4      # CHECK: cmp.ngle.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x89]
+        cmp.seq.s  $f2,$f3,$f4      # CHECK: cmp.seq.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x8a]
+        cmp.seq.d  $f2,$f3,$f4      # CHECK: cmp.seq.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x8a]
+        cmp.ngl.s  $f2,$f3,$f4      # CHECK: cmp.ngl.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x8b]
+        cmp.ngl.d  $f2,$f3,$f4      # CHECK: cmp.ngl.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x8b]
+        cmp.lt.s   $f2,$f3,$f4      # CHECK: cmp.lt.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x8c]
+        cmp.lt.d   $f2,$f3,$f4      # CHECK: cmp.lt.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x8c]
+        cmp.nge.s  $f2,$f3,$f4      # CHECK: cmp.nge.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x8d]
+        cmp.nge.d  $f2,$f3,$f4      # CHECK: cmp.nge.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x8d]
+        cmp.le.s   $f2,$f3,$f4      # CHECK: cmp.le.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x8e]
+        cmp.le.d   $f2,$f3,$f4      # CHECK: cmp.le.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x8e]
+        cmp.ngt.s  $f2,$f3,$f4      # CHECK: cmp.ngt.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x8f]
+        cmp.ngt.d  $f2,$f3,$f4      # CHECK: cmp.ngt.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x8f]
+        dalign  $4,$2,$3,5       # CHECK: dalign $4, $2, $3, 5 # encoding: [0x7c,0x43,0x23,0x64]
+        daui    $3,$2,0x1234     # CHECK: daui $3, $2, 4660  # encoding: [0x74,0x62,0x12,0x34]
+        dahi     $3,0x5678       # CHECK: dahi $3, 22136     # encoding: [0x04,0x66,0x56,0x78]
+        dati     $3,0xabcd       # CHECK: dati $3, 43981     # encoding: [0x04,0x7e,0xab,0xcd]
+        dbitswap $4, $2          # CHECK: dbitswap $4, $2    # encoding: [0x7c,0x02,0x20,0x24]
+        div     $2,$3,$4         # CHECK: div $2, $3, $4   # encoding: [0x00,0x64,0x10,0x9a]
+        divu    $2,$3,$4         # CHECK: divu $2, $3, $4  # encoding: [0x00,0x64,0x10,0x9b]
+        jialc   $5, 256          # CHECK: jialc $5, 256    # encoding: [0xf8,0x05,0x01,0x00]
+        jic     $5, 256          # CHECK: jic $5, 256      # encoding: [0xd8,0x05,0x01,0x00]
+        mod     $2,$3,$4         # CHECK: mod $2, $3, $4   # encoding: [0x00,0x64,0x10,0xda]
+        modu    $2,$3,$4         # CHECK: modu $2, $3, $4  # encoding: [0x00,0x64,0x10,0xdb]
+        ddiv    $2,$3,$4         # CHECK: ddiv $2, $3, $4  # encoding: [0x00,0x64,0x10,0x9e]
+        ddivu   $2,$3,$4         # CHECK: ddivu $2, $3, $4 # encoding: [0x00,0x64,0x10,0x9f]
+        dmod    $2,$3,$4         # CHECK: dmod $2, $3, $4  # encoding: [0x00,0x64,0x10,0xde]
+        dmodu   $2,$3,$4         # CHECK: dmodu $2, $3, $4 # encoding: [0x00,0x64,0x10,0xdf]
+        lwpc    $2,268           # CHECK: lwpc $2, 268     # encoding: [0xec,0x48,0x00,0x43]
+        lwupc   $2,268           # CHECK: lwupc $2, 268    # encoding: [0xec,0x50,0x00,0x43]
+#        mul     $2,$3,$4         # CHECK-TODO: mul $2, $3, $4   # encoding: [0x00,0x64,0x10,0x98]
+        muh     $2,$3,$4         # CHECK: muh $2, $3, $4   # encoding: [0x00,0x64,0x10,0xd8]
+        mulu    $2,$3,$4         # CHECK: mulu $2, $3, $4  # encoding: [0x00,0x64,0x10,0x99]
+        muhu    $2,$3,$4         # CHECK: muhu $2, $3, $4  # encoding: [0x00,0x64,0x10,0xd9]
+        dmul    $2,$3,$4         # CHECK: dmul $2, $3, $4  # encoding: [0x00,0x64,0x10,0xb8]
+        dmuh    $2,$3,$4         # CHECK: dmuh $2, $3, $4  # encoding: [0x00,0x64,0x10,0xf8]
+        dmulu   $2,$3,$4         # CHECK: dmulu $2, $3, $4 # encoding: [0x00,0x64,0x10,0xb9]
+        dmuhu   $2,$3,$4         # CHECK: dmuhu $2, $3, $4 # encoding: [0x00,0x64,0x10,0xf9]
+        maddf.s $f2,$f3,$f4      # CHECK: maddf.s $f2, $f3, $f4  # encoding: [0x46,0x04,0x18,0x98]
+        maddf.d $f2,$f3,$f4      # CHECK: maddf.d $f2, $f3, $f4  # encoding: [0x46,0x24,0x18,0x98]
+        msubf.s $f2,$f3,$f4      # CHECK: msubf.s $f2, $f3, $f4  # encoding: [0x46,0x04,0x18,0x99]
+        msubf.d $f2,$f3,$f4      # CHECK: msubf.d $f2, $f3, $f4  # encoding: [0x46,0x24,0x18,0x99]
+        sel.d   $f0,$f1,$f2      # CHECK: sel.d $f0, $f1, $f2 # encoding: [0x46,0x22,0x08,0x10]
+        sel.s   $f0,$f1,$f2      # CHECK: sel.s $f0, $f1, $f2 # encoding: [0x46,0x02,0x08,0x10]
+        seleqz  $2,$3,$4         # CHECK: seleqz $2, $3, $4 # encoding: [0x00,0x64,0x10,0x35]
+        selnez  $2,$3,$4         # CHECK: selnez $2, $3, $4 # encoding: [0x00,0x64,0x10,0x37]
+        max.s   $f0, $f2, $f4    # CHECK: max.s $f0, $f2, $f4 # encoding: [0x46,0x04,0x10,0x1d]
+        max.d   $f0, $f2, $f4    # CHECK: max.d $f0, $f2, $f4 # encoding: [0x46,0x24,0x10,0x1d]
+        min.s   $f0, $f2, $f4    # CHECK: min.s $f0, $f2, $f4 # encoding: [0x46,0x04,0x10,0x1c]
+        min.d   $f0, $f2, $f4    # CHECK: min.d $f0, $f2, $f4 # encoding: [0x46,0x24,0x10,0x1c]
+        maxa.s  $f0, $f2, $f4    # CHECK: maxa.s $f0, $f2, $f4 # encoding: [0x46,0x04,0x10,0x1f]
+        maxa.d  $f0, $f2, $f4    # CHECK: maxa.d $f0, $f2, $f4 # encoding: [0x46,0x24,0x10,0x1f]
+        mina.s  $f0, $f2, $f4    # CHECK: mina.s $f0, $f2, $f4 # encoding: [0x46,0x04,0x10,0x1e]
+        mina.d  $f0, $f2, $f4    # CHECK: mina.d $f0, $f2, $f4 # encoding: [0x46,0x24,0x10,0x1e]
+        seleqz.s $f0, $f2, $f4   # CHECK: seleqz.s $f0, $f2, $f4 # encoding: [0x46,0x04,0x10,0x14]
+        seleqz.d $f0, $f2, $f4   # CHECK: seleqz.d $f0, $f2, $f4 # encoding: [0x46,0x24,0x10,0x14]
+        selnez.s $f0, $f2, $f4   # CHECK: selnez.s $f0, $f2, $f4 # encoding: [0x46,0x04,0x10,0x17]
+        selnez.d $f0, $f2, $f4   # CHECK: selnez.d $f0, $f2, $f4 # encoding: [0x46,0x24,0x10,0x17]
+        rint.s $f2, $f4          # CHECK: rint.s $f2, $f4        # encoding: [0x46,0x00,0x20,0x9a]
+        rint.d $f2, $f4          # CHECK: rint.d $f2, $f4        # encoding: [0x46,0x20,0x20,0x9a]
+        class.s $f2, $f4         # CHECK: class.s $f2, $f4       # encoding: [0x46,0x00,0x20,0x9b]
+        class.d $f2, $f4         # CHECK: class.d $f2, $f4       # encoding: [0x46,0x20,0x20,0x9b]
diff --git a/test/MC/Mips/mips_directives.s b/test/MC/Mips/mips_directives.s
index 6780dd0..1a7d61f 100644
--- a/test/MC/Mips/mips_directives.s
+++ b/test/MC/Mips/mips_directives.s
@@ -51,7 +51,7 @@ $BB0_4:
     .set  $tmp7, $BB0_4-$BB0_2
     .set f6,$f6
 # CHECK:    abs.s   $f6, $f7           # encoding: [0x46,0x00,0x39,0x85]
-# CHECK:    lui     $1, %hi($tmp7)     # encoding: [0x3c'A',0x01'A',0x00,0x00]
+# CHECK:    lui     $1, %hi($tmp7)     # encoding: [0x3c,0x01,A,A]
 # CHECK:                               #   fixup A - offset: 0, value: ($tmp7)@ABS_HI, kind: fixup_Mips_HI16
     abs.s  f6,FPU_MASK
     lui $1, %hi($tmp7)
diff --git a/test/MC/Mips/mips_gprel16.s b/test/MC/Mips/mips_gprel16.s
index 716c75e..9dd3fa3 100644
--- a/test/MC/Mips/mips_gprel16.s
+++ b/test/MC/Mips/mips_gprel16.s
@@ -6,6 +6,9 @@
 // RUN: llvm-mc -mcpu=mips32r2 -triple=mipsel-pc-linux -filetype=obj -relocation-model=static %s -o - \
 // RUN: | llvm-objdump -disassemble -mattr +mips32r2 - \
 // RUN: | FileCheck %s
+// RUN: llvm-mc -mcpu=mips32r2 -triple=mips-pc-linux -filetype=obj -relocation-model=static %s -o - \
+// RUN: | llvm-objdump -disassemble -mattr +mips32r2 - \
+// RUN: | FileCheck %s
 
 	.text
 	.abicalls
diff --git a/test/MC/Mips/msa/test_2r.s b/test/MC/Mips/msa/test_2r.s
index b657d5f..01bea64 100644
--- a/test/MC/Mips/msa/test_2r.s
+++ b/test/MC/Mips/msa/test_2r.s
@@ -1,9 +1,5 @@
 # RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 -mattr=+msa -show-encoding | FileCheck %s
 #
-# RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 -mattr=+msa -filetype=obj -o - | \
-# RUN: llvm-objdump -d -arch=mips -mattr=+msa - | \
-# RUN: FileCheck %s -check-prefix=CHECKOBJDUMP
-#
 # CHECK:        fill.b  $w30, $9                # encoding: [0x7b,0x00,0x4f,0x9e]
 # CHECK:        fill.h  $w31, $23               # encoding: [0x7b,0x01,0xbf,0xde]
 # CHECK:        fill.w  $w16, $24               # encoding: [0x7b,0x02,0xc4,0x1e]
@@ -20,22 +16,6 @@
 # CHECK:        pcnt.w  $w23, $w9               # encoding: [0x7b,0x06,0x4d,0xde]
 # CHECK:        pcnt.d  $w21, $w24              # encoding: [0x7b,0x07,0xc5,0x5e]
 
-# CHECKOBJDUMP:        fill.b  $w30, $9
-# CHECKOBJDUMP:        fill.h  $w31, $23
-# CHECKOBJDUMP:        fill.w  $w16, $24
-# CHECKOBJDUMP:        nloc.b  $w21, $w0
-# CHECKOBJDUMP:        nloc.h  $w18, $w31
-# CHECKOBJDUMP:        nloc.w  $w2, $w23
-# CHECKOBJDUMP:        nloc.d  $w4, $w10
-# CHECKOBJDUMP:        nlzc.b  $w31, $w2
-# CHECKOBJDUMP:        nlzc.h  $w27, $w22
-# CHECKOBJDUMP:        nlzc.w  $w10, $w29
-# CHECKOBJDUMP:        nlzc.d  $w25, $w9
-# CHECKOBJDUMP:        pcnt.b  $w20, $w18
-# CHECKOBJDUMP:        pcnt.h  $w0, $w8
-# CHECKOBJDUMP:        pcnt.w  $w23, $w9
-# CHECKOBJDUMP:        pcnt.d  $w21, $w24
-
                 fill.b  $w30, $9
                 fill.h  $w31, $23
                 fill.w  $w16, $24
diff --git a/test/MC/Mips/msa/test_2r_msa64.s b/test/MC/Mips/msa/test_2r_msa64.s
index 743fb88..f6e35c4 100644
--- a/test/MC/Mips/msa/test_2r_msa64.s
+++ b/test/MC/Mips/msa/test_2r_msa64.s
@@ -1,11 +1,5 @@
 # RUN: llvm-mc %s -arch=mips64 -mcpu=mips64r2 -mattr=+msa -show-encoding | FileCheck %s
 #
-# RUN: llvm-mc %s -arch=mips64 -mcpu=mips64r2 -mattr=+msa -filetype=obj -o - | \
-# RUN:   llvm-objdump -d -arch=mips64 -mattr=+msa - | \
-# RUN:     FileCheck %s -check-prefix=CHECKOBJDUMP
-#
 # CHECK:        fill.d  $w27, $9                # encoding: [0x7b,0x03,0x4e,0xde]
 
-# CHECKOBJDUMP:        fill.d  $w27, $9
-
                 fill.d  $w27, $9
diff --git a/test/MC/Mips/msa/test_2rf.s b/test/MC/Mips/msa/test_2rf.s
index 284a7d9..5d41545 100644
--- a/test/MC/Mips/msa/test_2rf.s
+++ b/test/MC/Mips/msa/test_2rf.s
@@ -1,9 +1,5 @@
 # RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 -mattr=+msa -show-encoding | FileCheck %s
 #
-# RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 -mattr=+msa -filetype=obj -o - | \
-# RUN: llvm-objdump -d -arch=mips -mattr=+msa - | \
-# RUN: FileCheck %s -check-prefix=CHECKOBJDUMP
-#
 # CHECK:        fclass.w        $w26, $w12              # encoding: [0x7b,0x20,0x66,0x9e]
 # CHECK:        fclass.d        $w24, $w17              # encoding: [0x7b,0x21,0x8e,0x1e]
 # CHECK:        fexupl.w        $w8, $w0                # encoding: [0x7b,0x30,0x02,0x1e]
@@ -37,39 +33,6 @@
 # CHECK:        ftrunc_u.w      $w17, $w15              # encoding: [0x7b,0x24,0x7c,0x5e]
 # CHECK:        ftrunc_u.d      $w5, $w27               # encoding: [0x7b,0x25,0xd9,0x5e]
 
-# CHECKOBJDUMP:        fclass.w        $w26, $w12
-# CHECKOBJDUMP:        fclass.d        $w24, $w17
-# CHECKOBJDUMP:        fexupl.w        $w8, $w0
-# CHECKOBJDUMP:        fexupl.d        $w17, $w29
-# CHECKOBJDUMP:        fexupr.w        $w13, $w4
-# CHECKOBJDUMP:        fexupr.d        $w5, $w2
-# CHECKOBJDUMP:        ffint_s.w       $w20, $w29
-# CHECKOBJDUMP:        ffint_s.d       $w12, $w15
-# CHECKOBJDUMP:        ffint_u.w       $w7, $w27
-# CHECKOBJDUMP:        ffint_u.d       $w19, $w16
-# CHECKOBJDUMP:        ffql.w          $w31, $w13
-# CHECKOBJDUMP:        ffql.d          $w12, $w13
-# CHECKOBJDUMP:        ffqr.w          $w27, $w30
-# CHECKOBJDUMP:        ffqr.d          $w30, $w15
-# CHECKOBJDUMP:        flog2.w         $w25, $w31
-# CHECKOBJDUMP:        flog2.d         $w18, $w10
-# CHECKOBJDUMP:        frint.w         $w7, $w15
-# CHECKOBJDUMP:        frint.d         $w21, $w22
-# CHECKOBJDUMP:        frcp.w          $w19, $w0
-# CHECKOBJDUMP:        frcp.d          $w4, $w14
-# CHECKOBJDUMP:        frsqrt.w        $w12, $w17
-# CHECKOBJDUMP:        frsqrt.d        $w23, $w11
-# CHECKOBJDUMP:        fsqrt.w         $w0, $w11
-# CHECKOBJDUMP:        fsqrt.d         $w15, $w12
-# CHECKOBJDUMP:        ftint_s.w       $w30, $w5
-# CHECKOBJDUMP:        ftint_s.d       $w5, $w23
-# CHECKOBJDUMP:        ftint_u.w       $w20, $w14
-# CHECKOBJDUMP:        ftint_u.d       $w23, $w21
-# CHECKOBJDUMP:        ftrunc_s.w      $w29, $w17
-# CHECKOBJDUMP:        ftrunc_s.d      $w12, $w27
-# CHECKOBJDUMP:        ftrunc_u.w      $w17, $w15
-# CHECKOBJDUMP:        ftrunc_u.d      $w5, $w27
-
                 fclass.w        $w26, $w12
                 fclass.d        $w24, $w17
                 fexupl.w        $w8, $w0
diff --git a/test/MC/Mips/msa/test_3r.s b/test/MC/Mips/msa/test_3r.s
index d6b33f1..df2e1e1 100644
--- a/test/MC/Mips/msa/test_3r.s
+++ b/test/MC/Mips/msa/test_3r.s
@@ -1,9 +1,5 @@
 # RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 -mattr=+msa -show-encoding | FileCheck %s
 #
-# RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 -mattr=+msa -filetype=obj -o - | \
-# RUN: llvm-objdump -d -arch=mips -mattr=+msa - | \
-# RUN: FileCheck %s -check-prefix=CHECKOBJDUMP
-#
 # CHECK:        add_a.b         $w26, $w9, $w4                  # encoding: [0x78,0x04,0x4e,0x90]
 # CHECK:        add_a.h         $w23, $w27, $w31                # encoding: [0x78,0x3f,0xdd,0xd0]
 # CHECK:        add_a.w         $w11, $w6, $w22                 # encoding: [0x78,0x56,0x32,0xd0]
@@ -247,249 +243,6 @@
 # CHECK:        vshf.w          $w16, $w30, $w25                # encoding: [0x78,0x59,0xf4,0x15]
 # CHECK:        vshf.d          $w19, $w11, $w15                # encoding: [0x78,0x6f,0x5c,0xd5]
 
-# CHECKOBJDUMP:        add_a.b         $w26, $w9, $w4
-# CHECKOBJDUMP:        add_a.h         $w23, $w27, $w31
-# CHECKOBJDUMP:        add_a.w         $w11, $w6, $w22
-# CHECKOBJDUMP:        add_a.d         $w6, $w10, $w0
-# CHECKOBJDUMP:        adds_a.b        $w19, $w24, $w19
-# CHECKOBJDUMP:        adds_a.h        $w25, $w6, $w4
-# CHECKOBJDUMP:        adds_a.w        $w25, $w17, $w27
-# CHECKOBJDUMP:        adds_a.d        $w15, $w18, $w26
-# CHECKOBJDUMP:        adds_s.b        $w29, $w11, $w19
-# CHECKOBJDUMP:        adds_s.h        $w5, $w23, $w26
-# CHECKOBJDUMP:        adds_s.w        $w16, $w14, $w13
-# CHECKOBJDUMP:        adds_s.d        $w2, $w14, $w28
-# CHECKOBJDUMP:        adds_u.b        $w3, $w17, $w14
-# CHECKOBJDUMP:        adds_u.h        $w10, $w30, $w4
-# CHECKOBJDUMP:        adds_u.w        $w15, $w18, $w20
-# CHECKOBJDUMP:        adds_u.d        $w30, $w10, $w9
-# CHECKOBJDUMP:        addv.b          $w24, $w20, $w21
-# CHECKOBJDUMP:        addv.h          $w4, $w13, $w27
-# CHECKOBJDUMP:        addv.w          $w19, $w11, $w14
-# CHECKOBJDUMP:        addv.d          $w2, $w21, $w31
-# CHECKOBJDUMP:        asub_s.b        $w23, $w16, $w3
-# CHECKOBJDUMP:        asub_s.h        $w22, $w17, $w25
-# CHECKOBJDUMP:        asub_s.w        $w24, $w1, $w9
-# CHECKOBJDUMP:        asub_s.d        $w13, $w12, $w12
-# CHECKOBJDUMP:        asub_u.b        $w10, $w29, $w11
-# CHECKOBJDUMP:        asub_u.h        $w18, $w9, $w15
-# CHECKOBJDUMP:        asub_u.w        $w10, $w19, $w31
-# CHECKOBJDUMP:        asub_u.d        $w17, $w10, $w0
-# CHECKOBJDUMP:        ave_s.b         $w2, $w5, $w1
-# CHECKOBJDUMP:        ave_s.h         $w16, $w19, $w9
-# CHECKOBJDUMP:        ave_s.w         $w17, $w31, $w5
-# CHECKOBJDUMP:        ave_s.d         $w27, $w25, $w10
-# CHECKOBJDUMP:        ave_u.b         $w16, $w19, $w9
-# CHECKOBJDUMP:        ave_u.h         $w28, $w28, $w11
-# CHECKOBJDUMP:        ave_u.w         $w11, $w12, $w11
-# CHECKOBJDUMP:        ave_u.d         $w30, $w19, $w28
-# CHECKOBJDUMP:        aver_s.b        $w26, $w16, $w2
-# CHECKOBJDUMP:        aver_s.h        $w31, $w27, $w27
-# CHECKOBJDUMP:        aver_s.w        $w28, $w18, $w25
-# CHECKOBJDUMP:        aver_s.d        $w29, $w21, $w27
-# CHECKOBJDUMP:        aver_u.b        $w29, $w26, $w3
-# CHECKOBJDUMP:        aver_u.h        $w18, $w18, $w9
-# CHECKOBJDUMP:        aver_u.w        $w17, $w25, $w29
-# CHECKOBJDUMP:        aver_u.d        $w22, $w22, $w19
-# CHECKOBJDUMP:        bclr.b          $w2, $w15, $w29
-# CHECKOBJDUMP:        bclr.h          $w16, $w21, $w28
-# CHECKOBJDUMP:        bclr.w          $w19, $w2, $w9
-# CHECKOBJDUMP:        bclr.d          $w27, $w31, $w4
-# CHECKOBJDUMP:        binsl.b         $w5, $w16, $w24
-# CHECKOBJDUMP:        binsl.h         $w30, $w5, $w10
-# CHECKOBJDUMP:        binsl.w         $w14, $w15, $w13
-# CHECKOBJDUMP:        binsl.d         $w23, $w20, $w12
-# CHECKOBJDUMP:        binsr.b         $w22, $w11, $w2
-# CHECKOBJDUMP:        binsr.h         $w0, $w26, $w6
-# CHECKOBJDUMP:        binsr.w         $w26, $w3, $w28
-# CHECKOBJDUMP:        binsr.d         $w0, $w0, $w21
-# CHECKOBJDUMP:        bneg.b          $w0, $w11, $w24
-# CHECKOBJDUMP:        bneg.h          $w28, $w16, $w4
-# CHECKOBJDUMP:        bneg.w          $w3, $w26, $w19
-# CHECKOBJDUMP:        bneg.d          $w13, $w29, $w15
-# CHECKOBJDUMP:        bset.b          $w31, $w5, $w31
-# CHECKOBJDUMP:        bset.h          $w14, $w12, $w6
-# CHECKOBJDUMP:        bset.w          $w31, $w9, $w12
-# CHECKOBJDUMP:        bset.d          $w5, $w22, $w5
-# CHECKOBJDUMP:        ceq.b           $w31, $w31, $w18
-# CHECKOBJDUMP:        ceq.h           $w10, $w27, $w9
-# CHECKOBJDUMP:        ceq.w           $w9, $w5, $w14
-# CHECKOBJDUMP:        ceq.d           $w5, $w17, $w0
-# CHECKOBJDUMP:        cle_s.b         $w23, $w4, $w9
-# CHECKOBJDUMP:        cle_s.h         $w22, $w27, $w19
-# CHECKOBJDUMP:        cle_s.w         $w30, $w26, $w10
-# CHECKOBJDUMP:        cle_s.d         $w18, $w5, $w10
-# CHECKOBJDUMP:        cle_u.b         $w1, $w25, $w0
-# CHECKOBJDUMP:        cle_u.h         $w7, $w0, $w29
-# CHECKOBJDUMP:        cle_u.w         $w25, $w18, $w1
-# CHECKOBJDUMP:        cle_u.d         $w6, $w0, $w30
-# CHECKOBJDUMP:        clt_s.b         $w25, $w2, $w21
-# CHECKOBJDUMP:        clt_s.h         $w2, $w19, $w9
-# CHECKOBJDUMP:        clt_s.w         $w23, $w8, $w16
-# CHECKOBJDUMP:        clt_s.d         $w7, $w30, $w12
-# CHECKOBJDUMP:        clt_u.b         $w2, $w31, $w13
-# CHECKOBJDUMP:        clt_u.h         $w16, $w31, $w23
-# CHECKOBJDUMP:        clt_u.w         $w3, $w24, $w9
-# CHECKOBJDUMP:        clt_u.d         $w7, $w0, $w1
-# CHECKOBJDUMP:        div_s.b         $w29, $w3, $w18
-# CHECKOBJDUMP:        div_s.h         $w17, $w16, $w13
-# CHECKOBJDUMP:        div_s.w         $w4, $w25, $w30
-# CHECKOBJDUMP:        div_s.d         $w31, $w9, $w20
-# CHECKOBJDUMP:        div_u.b         $w6, $w29, $w10
-# CHECKOBJDUMP:        div_u.h         $w24, $w21, $w14
-# CHECKOBJDUMP:        div_u.w         $w29, $w14, $w25
-# CHECKOBJDUMP:        div_u.d         $w31, $w1, $w21
-# CHECKOBJDUMP:        dotp_s.h        $w23, $w22, $w25
-# CHECKOBJDUMP:        dotp_s.w        $w20, $w14, $w5
-# CHECKOBJDUMP:        dotp_s.d        $w17, $w2, $w22
-# CHECKOBJDUMP:        dotp_u.h        $w13, $w2, $w6
-# CHECKOBJDUMP:        dotp_u.w        $w15, $w22, $w21
-# CHECKOBJDUMP:        dotp_u.d        $w4, $w16, $w26
-# CHECKOBJDUMP:        dpadd_s.h       $w1, $w28, $w22
-# CHECKOBJDUMP:        dpadd_s.w       $w10, $w1, $w12
-# CHECKOBJDUMP:        dpadd_s.d       $w3, $w21, $w27
-# CHECKOBJDUMP:        dpadd_u.h       $w17, $w5, $w20
-# CHECKOBJDUMP:        dpadd_u.w       $w24, $w8, $w16
-# CHECKOBJDUMP:        dpadd_u.d       $w15, $w29, $w16
-# CHECKOBJDUMP:        dpsub_s.h       $w4, $w11, $w12
-# CHECKOBJDUMP:        dpsub_s.w       $w4, $w7, $w6
-# CHECKOBJDUMP:        dpsub_s.d       $w31, $w12, $w28
-# CHECKOBJDUMP:        dpsub_u.h       $w4, $w25, $w17
-# CHECKOBJDUMP:        dpsub_u.w       $w19, $w25, $w16
-# CHECKOBJDUMP:        dpsub_u.d       $w7, $w10, $w26
-# CHECKOBJDUMP:        hadd_s.h        $w28, $w24, $w2
-# CHECKOBJDUMP:        hadd_s.w        $w24, $w17, $w11
-# CHECKOBJDUMP:        hadd_s.d        $w17, $w15, $w20
-# CHECKOBJDUMP:        hadd_u.h        $w12, $w29, $w17
-# CHECKOBJDUMP:        hadd_u.w        $w9, $w5, $w6
-# CHECKOBJDUMP:        hadd_u.d        $w1, $w20, $w6
-# CHECKOBJDUMP:        hsub_s.h        $w16, $w14, $w29
-# CHECKOBJDUMP:        hsub_s.w        $w9, $w13, $w11
-# CHECKOBJDUMP:        hsub_s.d        $w30, $w18, $w14
-# CHECKOBJDUMP:        hsub_u.h        $w7, $w12, $w14
-# CHECKOBJDUMP:        hsub_u.w        $w21, $w5, $w5
-# CHECKOBJDUMP:        hsub_u.d        $w11, $w12, $w31
-# CHECKOBJDUMP:        ilvev.b         $w18, $w16, $w30
-# CHECKOBJDUMP:        ilvev.h         $w14, $w0, $w13
-# CHECKOBJDUMP:        ilvev.w         $w12, $w25, $w22
-# CHECKOBJDUMP:        ilvev.d         $w30, $w27, $w3
-# CHECKOBJDUMP:        ilvl.b          $w29, $w3, $w21
-# CHECKOBJDUMP:        ilvl.h          $w27, $w10, $w17
-# CHECKOBJDUMP:        ilvl.w          $w6, $w1, $w0
-# CHECKOBJDUMP:        ilvl.d          $w3, $w16, $w24
-# CHECKOBJDUMP:        ilvod.b         $w11, $w5, $w20
-# CHECKOBJDUMP:        ilvod.h         $w18, $w13, $w31
-# CHECKOBJDUMP:        ilvod.w         $w29, $w16, $w24
-# CHECKOBJDUMP:        ilvod.d         $w22, $w12, $w29
-# CHECKOBJDUMP:        ilvr.b          $w4, $w30, $w6
-# CHECKOBJDUMP:        ilvr.h          $w28, $w19, $w29
-# CHECKOBJDUMP:        ilvr.w          $w18, $w20, $w21
-# CHECKOBJDUMP:        ilvr.d          $w23, $w30, $w12
-# CHECKOBJDUMP:        maddv.b         $w17, $w31, $w29
-# CHECKOBJDUMP:        maddv.h         $w7, $w24, $w9
-# CHECKOBJDUMP:        maddv.w         $w22, $w22, $w20
-# CHECKOBJDUMP:        maddv.d         $w30, $w26, $w20
-# CHECKOBJDUMP:        max_a.b         $w23, $w11, $w23
-# CHECKOBJDUMP:        max_a.h         $w20, $w5, $w30
-# CHECKOBJDUMP:        max_a.w         $w7, $w18, $w30
-# CHECKOBJDUMP:        max_a.d         $w8, $w8, $w31
-# CHECKOBJDUMP:        max_s.b         $w10, $w1, $w19
-# CHECKOBJDUMP:        max_s.h         $w15, $w29, $w17
-# CHECKOBJDUMP:        max_s.w         $w15, $w29, $w14
-# CHECKOBJDUMP:        max_s.d         $w25, $w24, $w3
-# CHECKOBJDUMP:        max_u.b         $w12, $w24, $w5
-# CHECKOBJDUMP:        max_u.h         $w5, $w6, $w7
-# CHECKOBJDUMP:        max_u.w         $w16, $w4, $w7
-# CHECKOBJDUMP:        max_u.d         $w26, $w12, $w24
-# CHECKOBJDUMP:        min_a.b         $w4, $w26, $w1
-# CHECKOBJDUMP:        min_a.h         $w12, $w13, $w31
-# CHECKOBJDUMP:        min_a.w         $w28, $w20, $w0
-# CHECKOBJDUMP:        min_a.d         $w12, $w20, $w19
-# CHECKOBJDUMP:        min_s.b         $w19, $w3, $w14
-# CHECKOBJDUMP:        min_s.h         $w27, $w21, $w8
-# CHECKOBJDUMP:        min_s.w         $w0, $w14, $w30
-# CHECKOBJDUMP:        min_s.d         $w6, $w8, $w21
-# CHECKOBJDUMP:        min_u.b         $w22, $w26, $w8
-# CHECKOBJDUMP:        min_u.h         $w7, $w27, $w12
-# CHECKOBJDUMP:        min_u.w         $w8, $w20, $w14
-# CHECKOBJDUMP:        min_u.d         $w26, $w14, $w15
-# CHECKOBJDUMP:        mod_s.b         $w18, $w1, $w26
-# CHECKOBJDUMP:        mod_s.h         $w31, $w30, $w28
-# CHECKOBJDUMP:        mod_s.w         $w2, $w6, $w13
-# CHECKOBJDUMP:        mod_s.d         $w21, $w27, $w22
-# CHECKOBJDUMP:        mod_u.b         $w16, $w7, $w13
-# CHECKOBJDUMP:        mod_u.h         $w24, $w8, $w7
-# CHECKOBJDUMP:        mod_u.w         $w30, $w2, $w17
-# CHECKOBJDUMP:        mod_u.d         $w31, $w2, $w25
-# CHECKOBJDUMP:        msubv.b         $w14, $w5, $w12
-# CHECKOBJDUMP:        msubv.h         $w6, $w7, $w30
-# CHECKOBJDUMP:        msubv.w         $w13, $w2, $w21
-# CHECKOBJDUMP:        msubv.d         $w16, $w14, $w27
-# CHECKOBJDUMP:        mulv.b          $w20, $w3, $w13
-# CHECKOBJDUMP:        mulv.h          $w27, $w26, $w14
-# CHECKOBJDUMP:        mulv.w          $w10, $w29, $w3
-# CHECKOBJDUMP:        mulv.d          $w7, $w19, $w29
-# CHECKOBJDUMP:        pckev.b         $w5, $w27, $w7
-# CHECKOBJDUMP:        pckev.h         $w1, $w4, $w27
-# CHECKOBJDUMP:        pckev.w         $w30, $w20, $w0
-# CHECKOBJDUMP:        pckev.d         $w6, $w1, $w15
-# CHECKOBJDUMP:        pckod.b         $w18, $w28, $w30
-# CHECKOBJDUMP:        pckod.h         $w26, $w5, $w8
-# CHECKOBJDUMP:        pckod.w         $w9, $w4, $w2
-# CHECKOBJDUMP:        pckod.d         $w30, $w22, $w20
-# CHECKOBJDUMP:        sld.b           $w5, $w23[$12]
-# CHECKOBJDUMP:        sld.h           $w1, $w23[$3]
-# CHECKOBJDUMP:        sld.w           $w20, $w8[$9]
-# CHECKOBJDUMP:        sld.d           $w7, $w23[$fp]
-# CHECKOBJDUMP:        sll.b           $w3, $w0, $w17
-# CHECKOBJDUMP:        sll.h           $w17, $w27, $w3
-# CHECKOBJDUMP:        sll.w           $w16, $w7, $w6
-# CHECKOBJDUMP:        sll.d           $w9, $w0, $w26
-# CHECKOBJDUMP:        splat.b         $w28, $w1[$1]
-# CHECKOBJDUMP:        splat.h         $w2, $w11[$11]
-# CHECKOBJDUMP:        splat.w         $w22, $w0[$11]
-# CHECKOBJDUMP:        splat.d         $w0, $w0[$2]
-# CHECKOBJDUMP:        sra.b           $w28, $w4, $w17
-# CHECKOBJDUMP:        sra.h           $w13, $w9, $w3
-# CHECKOBJDUMP:        sra.w           $w27, $w21, $w19
-# CHECKOBJDUMP:        sra.d           $w30, $w8, $w23
-# CHECKOBJDUMP:        srar.b          $w19, $w18, $w18
-# CHECKOBJDUMP:        srar.h          $w7, $w23, $w8
-# CHECKOBJDUMP:        srar.w          $w1, $w12, $w2
-# CHECKOBJDUMP:        srar.d          $w21, $w7, $w14
-# CHECKOBJDUMP:        srl.b           $w12, $w3, $w19
-# CHECKOBJDUMP:        srl.h           $w23, $w31, $w20
-# CHECKOBJDUMP:        srl.w           $w18, $w27, $w11
-# CHECKOBJDUMP:        srl.d           $w3, $w12, $w26
-# CHECKOBJDUMP:        srlr.b          $w15, $w21, $w11
-# CHECKOBJDUMP:        srlr.h          $w21, $w13, $w19
-# CHECKOBJDUMP:        srlr.w          $w6, $w30, $w3
-# CHECKOBJDUMP:        srlr.d          $w1, $w2, $w14
-# CHECKOBJDUMP:        subs_s.b        $w25, $w15, $w1
-# CHECKOBJDUMP:        subs_s.h        $w28, $w25, $w22
-# CHECKOBJDUMP:        subs_s.w        $w10, $w12, $w21
-# CHECKOBJDUMP:        subs_s.d        $w4, $w20, $w18
-# CHECKOBJDUMP:        subs_u.b        $w21, $w6, $w25
-# CHECKOBJDUMP:        subs_u.h        $w3, $w10, $w7
-# CHECKOBJDUMP:        subs_u.w        $w9, $w15, $w10
-# CHECKOBJDUMP:        subs_u.d        $w7, $w19, $w10
-# CHECKOBJDUMP:        subsus_u.b      $w6, $w7, $w12
-# CHECKOBJDUMP:        subsus_u.h      $w6, $w29, $w19
-# CHECKOBJDUMP:        subsus_u.w      $w7, $w15, $w7
-# CHECKOBJDUMP:        subsus_u.d      $w9, $w3, $w15
-# CHECKOBJDUMP:        subsuu_s.b      $w22, $w3, $w31
-# CHECKOBJDUMP:        subsuu_s.h      $w19, $w23, $w22
-# CHECKOBJDUMP:        subsuu_s.w      $w9, $w10, $w13
-# CHECKOBJDUMP:        subsuu_s.d      $w5, $w6, $w0
-# CHECKOBJDUMP:        subv.b          $w6, $w13, $w19
-# CHECKOBJDUMP:        subv.h          $w4, $w25, $w12
-# CHECKOBJDUMP:        subv.w          $w27, $w27, $w11
-# CHECKOBJDUMP:        subv.d          $w9, $w24, $w10
-# CHECKOBJDUMP:        vshf.b          $w3, $w16, $w5
-# CHECKOBJDUMP:        vshf.h          $w20, $w19, $w8
-# CHECKOBJDUMP:        vshf.w          $w16, $w30, $w25
-# CHECKOBJDUMP:        vshf.d          $w19, $w11, $w15
-
                 add_a.b         $w26, $w9, $w4
                 add_a.h         $w23, $w27, $w31
                 add_a.w         $w11, $w6, $w22
diff --git a/test/MC/Mips/msa/test_3rf.s b/test/MC/Mips/msa/test_3rf.s
index 6787d85..c5896d7 100644
--- a/test/MC/Mips/msa/test_3rf.s
+++ b/test/MC/Mips/msa/test_3rf.s
@@ -1,9 +1,5 @@
 # RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 -mattr=+msa -show-encoding | FileCheck %s
 #
-# RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 -mattr=+msa -filetype=obj -o - | \
-# RUN: llvm-objdump -d -arch=mips -mattr=+msa - | \
-# RUN: FileCheck %s -check-prefix=CHECKOBJDUMP
-#
 # CHECK:        fadd.w          $w28, $w19, $w28        # encoding: [0x78,0x1c,0x9f,0x1b]
 # CHECK:        fadd.d          $w13, $w2, $w29         # encoding: [0x78,0x3d,0x13,0x5b]
 # CHECK:        fcaf.w          $w14, $w11, $w25        # encoding: [0x78,0x19,0x5b,0x9a]
@@ -87,89 +83,6 @@
 # CHECK:        mulr_q.h        $w6, $w20, $w19         # encoding: [0x7b,0x13,0xa1,0x9c]
 # CHECK:        mulr_q.w        $w27, $w1, $w20         # encoding: [0x7b,0x34,0x0e,0xdc]
 
-# CHECKOBJDUMP:        fadd.w          $w28, $w19, $w28
-# CHECKOBJDUMP:        fadd.d          $w13, $w2, $w29
-# CHECKOBJDUMP:        fcaf.w          $w14, $w11, $w25
-# CHECKOBJDUMP:        fcaf.d          $w1, $w1, $w19
-# CHECKOBJDUMP:        fceq.w          $w1, $w23, $w16
-# CHECKOBJDUMP:        fceq.d          $w0, $w8, $w16
-# CHECKOBJDUMP:        fcle.w          $w16, $w9, $w24
-# CHECKOBJDUMP:        fcle.d          $w27, $w14, $w1
-# CHECKOBJDUMP:        fclt.w          $w28, $w8, $w8
-# CHECKOBJDUMP:        fclt.d          $w30, $w25, $w11
-# CHECKOBJDUMP:        fcne.w          $w2, $w18, $w23
-# CHECKOBJDUMP:        fcne.d          $w14, $w20, $w15
-# CHECKOBJDUMP:        fcor.w          $w10, $w18, $w25
-# CHECKOBJDUMP:        fcor.d          $w17, $w25, $w11
-# CHECKOBJDUMP:        fcueq.w         $w14, $w2, $w21
-# CHECKOBJDUMP:        fcueq.d         $w29, $w3, $w7
-# CHECKOBJDUMP:        fcule.w         $w17, $w5, $w3
-# CHECKOBJDUMP:        fcule.d         $w31, $w1, $w30
-# CHECKOBJDUMP:        fcult.w         $w6, $w25, $w9
-# CHECKOBJDUMP:        fcult.d         $w27, $w8, $w17
-# CHECKOBJDUMP:        fcun.w          $w4, $w20, $w8
-# CHECKOBJDUMP:        fcun.d          $w29, $w11, $w3
-# CHECKOBJDUMP:        fcune.w         $w13, $w18, $w19
-# CHECKOBJDUMP:        fcune.d         $w16, $w26, $w21
-# CHECKOBJDUMP:        fdiv.w          $w13, $w24, $w2
-# CHECKOBJDUMP:        fdiv.d          $w19, $w4, $w25
-# CHECKOBJDUMP:        fexdo.h         $w8, $w0, $w16
-# CHECKOBJDUMP:        fexdo.w         $w0, $w13, $w27
-# CHECKOBJDUMP:        fexp2.w         $w17, $w0, $w3
-# CHECKOBJDUMP:        fexp2.d         $w22, $w0, $w10
-# CHECKOBJDUMP:        fmadd.w         $w29, $w6, $w23
-# CHECKOBJDUMP:        fmadd.d         $w11, $w28, $w21
-# CHECKOBJDUMP:        fmax.w          $w0, $w23, $w13
-# CHECKOBJDUMP:        fmax.d          $w26, $w18, $w8
-# CHECKOBJDUMP:        fmax_a.w        $w10, $w16, $w10
-# CHECKOBJDUMP:        fmax_a.d        $w30, $w9, $w22
-# CHECKOBJDUMP:        fmin.w          $w24, $w1, $w30
-# CHECKOBJDUMP:        fmin.d          $w27, $w27, $w10
-# CHECKOBJDUMP:        fmin_a.w        $w10, $w29, $w20
-# CHECKOBJDUMP:        fmin_a.d        $w13, $w30, $w24
-# CHECKOBJDUMP:        fmsub.w         $w17, $w25, $w0
-# CHECKOBJDUMP:        fmsub.d         $w8, $w18, $w16
-# CHECKOBJDUMP:        fmul.w          $w3, $w15, $w15
-# CHECKOBJDUMP:        fmul.d          $w9, $w30, $w10
-# CHECKOBJDUMP:        fsaf.w          $w25, $w5, $w10
-# CHECKOBJDUMP:        fsaf.d          $w25, $w3, $w29
-# CHECKOBJDUMP:        fseq.w          $w11, $w17, $w13
-# CHECKOBJDUMP:        fseq.d          $w29, $w0, $w31
-# CHECKOBJDUMP:        fsle.w          $w30, $w31, $w31
-# CHECKOBJDUMP:        fsle.d          $w18, $w23, $w24
-# CHECKOBJDUMP:        fslt.w          $w12, $w5, $w6
-# CHECKOBJDUMP:        fslt.d          $w16, $w26, $w21
-# CHECKOBJDUMP:        fsne.w          $w30, $w1, $w12
-# CHECKOBJDUMP:        fsne.d          $w14, $w13, $w23
-# CHECKOBJDUMP:        fsor.w          $w27, $w13, $w27
-# CHECKOBJDUMP:        fsor.d          $w12, $w24, $w11
-# CHECKOBJDUMP:        fsub.w          $w31, $w26, $w1
-# CHECKOBJDUMP:        fsub.d          $w19, $w17, $w27
-# CHECKOBJDUMP:        fsueq.w         $w16, $w24, $w25
-# CHECKOBJDUMP:        fsueq.d         $w18, $w14, $w14
-# CHECKOBJDUMP:        fsule.w         $w23, $w30, $w13
-# CHECKOBJDUMP:        fsule.d         $w2, $w11, $w26
-# CHECKOBJDUMP:        fsult.w         $w11, $w26, $w22
-# CHECKOBJDUMP:        fsult.d         $w6, $w23, $w30
-# CHECKOBJDUMP:        fsun.w          $w3, $w18, $w28
-# CHECKOBJDUMP:        fsun.d          $w18, $w11, $w19
-# CHECKOBJDUMP:        fsune.w         $w16, $w31, $w2
-# CHECKOBJDUMP:        fsune.d         $w3, $w26, $w17
-# CHECKOBJDUMP:        ftq.h           $w16, $w4, $w24
-# CHECKOBJDUMP:        ftq.w           $w5, $w5, $w25
-# CHECKOBJDUMP:        madd_q.h        $w16, $w20, $w10
-# CHECKOBJDUMP:        madd_q.w        $w28, $w2, $w9
-# CHECKOBJDUMP:        maddr_q.h       $w8, $w18, $w9
-# CHECKOBJDUMP:        maddr_q.w       $w29, $w12, $w16
-# CHECKOBJDUMP:        msub_q.h        $w24, $w26, $w10
-# CHECKOBJDUMP:        msub_q.w        $w13, $w30, $w28
-# CHECKOBJDUMP:        msubr_q.h       $w12, $w21, $w11
-# CHECKOBJDUMP:        msubr_q.w       $w1, $w14, $w20
-# CHECKOBJDUMP:        mul_q.h         $w6, $w16, $w30
-# CHECKOBJDUMP:        mul_q.w         $w16, $w1, $w4
-# CHECKOBJDUMP:        mulr_q.h        $w6, $w20, $w19
-# CHECKOBJDUMP:        mulr_q.w        $w27, $w1, $w20
-
                 fadd.w          $w28, $w19, $w28
                 fadd.d          $w13, $w2, $w29
                 fcaf.w          $w14, $w11, $w25
diff --git a/test/MC/Mips/msa/test_bit.s b/test/MC/Mips/msa/test_bit.s
index 2e5a6a5..85ebe54 100644
--- a/test/MC/Mips/msa/test_bit.s
+++ b/test/MC/Mips/msa/test_bit.s
@@ -1,9 +1,5 @@
 # RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 -mattr=+msa -show-encoding | FileCheck %s
 #
-# RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 -mattr=+msa -filetype=obj -o - | \
-# RUN: llvm-objdump -d -arch=mips -mattr=+msa - | \
-# RUN: FileCheck %s -check-prefix=CHECKOBJDUMP
-#
 # CHECK:        bclri.b         $w21, $w30, 2           # encoding: [0x79,0xf2,0xf5,0x49]
 # CHECK:        bclri.h         $w24, $w21, 0           # encoding: [0x79,0xe0,0xae,0x09]
 # CHECK:        bclri.w         $w23, $w30, 3           # encoding: [0x79,0xc3,0xf5,0xc9]
@@ -53,55 +49,6 @@
 # CHECK:        srlri.w         $w11, $w22, 2           # encoding: [0x79,0xc2,0xb2,0xca]
 # CHECK:        srlri.d         $w24, $w10, 6           # encoding: [0x79,0x86,0x56,0x0a]
 
-# CHECKOBJDUMP:        bclri.b         $w21, $w30, 2
-# CHECKOBJDUMP:        bclri.h         $w24, $w21, 0
-# CHECKOBJDUMP:        bclri.w         $w23, $w30, 3
-# CHECKOBJDUMP:        bclri.d         $w9, $w11, 0
-# CHECKOBJDUMP:        binsli.b        $w25, $w12, 1
-# CHECKOBJDUMP:        binsli.h        $w21, $w22, 0
-# CHECKOBJDUMP:        binsli.w        $w22, $w4, 0
-# CHECKOBJDUMP:        binsli.d        $w6, $w2, 6
-# CHECKOBJDUMP:        binsri.b        $w15, $w19, 0
-# CHECKOBJDUMP:        binsri.h        $w8, $w30, 1
-# CHECKOBJDUMP:        binsri.w        $w2, $w19, 5
-# CHECKOBJDUMP:        binsri.d        $w18, $w20, 1
-# CHECKOBJDUMP:        bnegi.b         $w24, $w19, 0
-# CHECKOBJDUMP:        bnegi.h         $w28, $w11, 3
-# CHECKOBJDUMP:        bnegi.w         $w1, $w27, 5
-# CHECKOBJDUMP:        bnegi.d         $w4, $w21, 1
-# CHECKOBJDUMP:        bseti.b         $w18, $w8, 0
-# CHECKOBJDUMP:        bseti.h         $w24, $w14, 2
-# CHECKOBJDUMP:        bseti.w         $w9, $w18, 4
-# CHECKOBJDUMP:        bseti.d         $w7, $w15, 1
-# CHECKOBJDUMP:        sat_s.b         $w31, $w31, 2
-# CHECKOBJDUMP:        sat_s.h         $w19, $w19, 0
-# CHECKOBJDUMP:        sat_s.w         $w19, $w29, 0
-# CHECKOBJDUMP:        sat_s.d         $w11, $w22, 0
-# CHECKOBJDUMP:        sat_u.b         $w1, $w13, 3
-# CHECKOBJDUMP:        sat_u.h         $w30, $w24, 4
-# CHECKOBJDUMP:        sat_u.w         $w31, $w13, 0
-# CHECKOBJDUMP:        sat_u.d         $w29, $w16, 5
-# CHECKOBJDUMP:        slli.b          $w23, $w10, 1
-# CHECKOBJDUMP:        slli.h          $w9, $w18, 1
-# CHECKOBJDUMP:        slli.w          $w11, $w29, 4
-# CHECKOBJDUMP:        slli.d          $w25, $w20, 1
-# CHECKOBJDUMP:        srai.b          $w24, $w29, 1
-# CHECKOBJDUMP:        srai.h          $w1, $w6, 0
-# CHECKOBJDUMP:        srai.w          $w7, $w26, 1
-# CHECKOBJDUMP:        srai.d          $w20, $w25, 3
-# CHECKOBJDUMP:        srari.b         $w5, $w25, 0
-# CHECKOBJDUMP:        srari.h         $w7, $w6, 4
-# CHECKOBJDUMP:        srari.w         $w17, $w11, 5
-# CHECKOBJDUMP:        srari.d         $w21, $w25, 5
-# CHECKOBJDUMP:        srli.b          $w2, $w0, 2
-# CHECKOBJDUMP:        srli.h          $w31, $w31, 2
-# CHECKOBJDUMP:        srli.w          $w5, $w9, 4
-# CHECKOBJDUMP:        srli.d          $w27, $w26, 5
-# CHECKOBJDUMP:        srlri.b         $w18, $w3, 0
-# CHECKOBJDUMP:        srlri.h         $w1, $w2, 3
-# CHECKOBJDUMP:        srlri.w         $w11, $w22, 2
-# CHECKOBJDUMP:        srlri.d         $w24, $w10, 6
-
                 bclri.b         $w21, $w30, 2
                 bclri.h         $w24, $w21, 0
                 bclri.w         $w23, $w30, 3
diff --git a/test/MC/Mips/msa/test_cbranch.s b/test/MC/Mips/msa/test_cbranch.s
index 37b8872..aa6779b 100644
--- a/test/MC/Mips/msa/test_cbranch.s
+++ b/test/MC/Mips/msa/test_cbranch.s
@@ -7,22 +7,22 @@
 #CHECK:      bnz.w        $w2, 128      # encoding: [0x47,0xc2,0x00,0x20]
 #CHECK:      nop                        # encoding: [0x00,0x00,0x00,0x00]
 #CHECK:      bnz.d        $w3, -128     # encoding: [0x47,0xe3,0xff,0xe0]
-#CHECK:      bnz.b        $w0, SYMBOL0  # encoding: [0x47'A',0x80'A',0x00,0x00]
+#CHECK:      bnz.b        $w0, SYMBOL0  # encoding: [0x47,0x80,A,A]
                                         #   fixup A - offset: 0, value: SYMBOL0, kind: fixup_Mips_PC16
 #CHECK:      nop                        # encoding: [0x00,0x00,0x00,0x00]
-#CHECK:      bnz.h        $w1, SYMBOL1  # encoding: [0x47'A',0xa1'A',0x00,0x00]
+#CHECK:      bnz.h        $w1, SYMBOL1  # encoding: [0x47,0xa1,A,A]
                                         #   fixup A - offset: 0, value: SYMBOL1, kind: fixup_Mips_PC16
 #CHECK:      nop                        # encoding: [0x00,0x00,0x00,0x00]
-#CHECK:      bnz.w        $w2, SYMBOL2  # encoding: [0x47'A',0xc2'A',0x00,0x00]
+#CHECK:      bnz.w        $w2, SYMBOL2  # encoding: [0x47,0xc2,A,A]
                                         #   fixup A - offset: 0, value: SYMBOL2, kind: fixup_Mips_PC16
 #CHECK:      nop                        # encoding: [0x00,0x00,0x00,0x00]
-#CHECK:      bnz.d        $w3, SYMBOL3  # encoding: [0x47'A',0xe3'A',0x00,0x00]
+#CHECK:      bnz.d        $w3, SYMBOL3  # encoding: [0x47,0xe3,A,A]
                                         #   fixup A - offset: 0, value: SYMBOL3, kind: fixup_Mips_PC16
 #CHECK:      nop                        # encoding: [0x00,0x00,0x00,0x00]
 
 #CHECK:      bnz.v        $w0, 4        # encoding: [0x45,0xe0,0x00,0x01]
 #CHECK:      nop                        # encoding: [0x00,0x00,0x00,0x00]
-#CHECK:      bnz.v        $w0, SYMBOL0  # encoding: [0x45'A',0xe0'A',0x00,0x00]
+#CHECK:      bnz.v        $w0, SYMBOL0  # encoding: [0x45,0xe0,A,A]
                                         #   fixup A - offset: 0, value: SYMBOL0, kind: fixup_Mips_PC16
 #CHECK:      nop                        # encoding: [0x00,0x00,0x00,0x00]
 
@@ -34,22 +34,22 @@
 #CHECK:      nop                        # encoding: [0x00,0x00,0x00,0x00]
 #CHECK:      bz.d         $w3, -1024    # encoding: [0x47,0x63,0xff,0x00]
 #CHECK:      nop                        # encoding: [0x00,0x00,0x00,0x00]
-#CHECK:      bz.b         $w0, SYMBOL0  # encoding: [0x47'A',A,0x00,0x00]
+#CHECK:      bz.b         $w0, SYMBOL0  # encoding: [0x47,0x00,A,A]
                                         #   fixup A - offset: 0, value: SYMBOL0, kind: fixup_Mips_PC16
 #CHECK:      nop                        # encoding: [0x00,0x00,0x00,0x00]
-#CHECK:      bz.h         $w1, SYMBOL1  # encoding: [0x47'A',0x21'A',0x00,0x00]
+#CHECK:      bz.h         $w1, SYMBOL1  # encoding: [0x47,0x21,A,A]
                                         #   fixup A - offset: 0, value: SYMBOL1, kind: fixup_Mips_PC16
 #CHECK:      nop                        # encoding: [0x00,0x00,0x00,0x00]
-#CHECK:      bz.w         $w2, SYMBOL2  # encoding: [0x47'A',0x42'A',0x00,0x00]
+#CHECK:      bz.w         $w2, SYMBOL2  # encoding: [0x47,0x42,A,A]
                                         #   fixup A - offset: 0, value: SYMBOL2, kind: fixup_Mips_PC16
 #CHECK:      nop                        # encoding: [0x00,0x00,0x00,0x00]
-#CHECK:      bz.d         $w3, SYMBOL3  # encoding: [0x47'A',0x63'A',0x00,0x00]
+#CHECK:      bz.d         $w3, SYMBOL3  # encoding: [0x47,0x63,A,A]
                                         #   fixup A - offset: 0, value: SYMBOL3, kind: fixup_Mips_PC16
 #CHECK:      nop                        # encoding: [0x00,0x00,0x00,0x00]
 
 #CHECK:      bz.v        $w0, 4        # encoding: [0x45,0x60,0x00,0x01]
 #CHECK:      nop                       # encoding: [0x00,0x00,0x00,0x00]
-#CHECK:      bz.v        $w0, SYMBOL0  # encoding: [0x45'A',0x60'A',0x00,0x00]
+#CHECK:      bz.v        $w0, SYMBOL0  # encoding: [0x45,0x60,A,A]
                                        #   fixup A - offset: 0, value: SYMBOL0, kind: fixup_Mips_PC16
 #CHECK:      nop                       # encoding: [0x00,0x00,0x00,0x00]
 
diff --git a/test/MC/Mips/msa/test_ctrlregs.s b/test/MC/Mips/msa/test_ctrlregs.s
index a014c03..3329072b 100644
--- a/test/MC/Mips/msa/test_ctrlregs.s
+++ b/test/MC/Mips/msa/test_ctrlregs.s
@@ -1,9 +1,5 @@
 # RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 -mattr=+msa -show-encoding | FileCheck %s
 #
-# RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 -mattr=+msa -filetype=obj -o - | \
-# RUN: llvm-objdump -d -arch=mips -mattr=+msa - | \
-# RUN: FileCheck %s -check-prefix=CHECKOBJDUMP
-#
 #CHECK:  cfcmsa       $1, $0                  # encoding: [0x78,0x7e,0x00,0x59]
 #CHECK:  cfcmsa       $1, $0                  # encoding: [0x78,0x7e,0x00,0x59]
 #CHECK:  cfcmsa       $2, $1                  # encoding: [0x78,0x7e,0x08,0x99]
@@ -38,40 +34,6 @@
 #CHECK:  ctcmsa       $7, $8                  # encoding: [0x78,0x3e,0x41,0xd9]
 #CHECK:  ctcmsa       $7, $8                  # encoding: [0x78,0x3e,0x41,0xd9]
 
-#CHECKOBJDUMP:  cfcmsa       $1, $0
-#CHECKOBJDUMP:  cfcmsa       $1, $0
-#CHECKOBJDUMP:  cfcmsa       $2, $1
-#CHECKOBJDUMP:  cfcmsa       $2, $1
-#CHECKOBJDUMP:  cfcmsa       $3, $2
-#CHECKOBJDUMP:  cfcmsa       $3, $2
-#CHECKOBJDUMP:  cfcmsa       $4, $3
-#CHECKOBJDUMP:  cfcmsa       $4, $3
-#CHECKOBJDUMP:  cfcmsa       $5, $4
-#CHECKOBJDUMP:  cfcmsa       $5, $4
-#CHECKOBJDUMP:  cfcmsa       $6, $5
-#CHECKOBJDUMP:  cfcmsa       $6, $5
-#CHECKOBJDUMP:  cfcmsa       $7, $6
-#CHECKOBJDUMP:  cfcmsa       $7, $6
-#CHECKOBJDUMP:  cfcmsa       $8, $7
-#CHECKOBJDUMP:  cfcmsa       $8, $7
-
-#CHECKOBJDUMP:  ctcmsa       $0, $1
-#CHECKOBJDUMP:  ctcmsa       $0, $1
-#CHECKOBJDUMP:  ctcmsa       $1, $2
-#CHECKOBJDUMP:  ctcmsa       $1, $2
-#CHECKOBJDUMP:  ctcmsa       $2, $3
-#CHECKOBJDUMP:  ctcmsa       $2, $3
-#CHECKOBJDUMP:  ctcmsa       $3, $4
-#CHECKOBJDUMP:  ctcmsa       $3, $4
-#CHECKOBJDUMP:  ctcmsa       $4, $5
-#CHECKOBJDUMP:  ctcmsa       $4, $5
-#CHECKOBJDUMP:  ctcmsa       $5, $6
-#CHECKOBJDUMP:  ctcmsa       $5, $6
-#CHECKOBJDUMP:  ctcmsa       $6, $7
-#CHECKOBJDUMP:  ctcmsa       $6, $7
-#CHECKOBJDUMP:  ctcmsa       $7, $8
-#CHECKOBJDUMP:  ctcmsa       $7, $8
-
 cfcmsa       $1, $msair
 cfcmsa       $1, $0
 cfcmsa       $2, $msacsr
diff --git a/test/MC/Mips/msa/test_dlsa.s b/test/MC/Mips/msa/test_dlsa.s
index a70999d..5e14571 100644
--- a/test/MC/Mips/msa/test_dlsa.s
+++ b/test/MC/Mips/msa/test_dlsa.s
@@ -1,20 +1,11 @@
 # RUN: llvm-mc %s -arch=mips64 -mcpu=mips64r2 -mattr=+msa -show-encoding | \
 # RUN:   FileCheck %s
 #
-# RUN: llvm-mc %s -arch=mips -mcpu=mips64r2 -mattr=+msa -filetype=obj -o - | \
-# RUN:   llvm-objdump -d -arch=mips64 -mattr=+msa - | \
-# RUN:     FileCheck %s -check-prefix=CHECKOBJDUMP
-#
 # CHECK:        dlsa        $8, $9, $10, 1              # encoding: [0x01,0x2a,0x40,0x15]
 # CHECK:        dlsa        $8, $9, $10, 2              # encoding: [0x01,0x2a,0x40,0x55]
 # CHECK:        dlsa        $8, $9, $10, 3              # encoding: [0x01,0x2a,0x40,0x95]
 # CHECK:        dlsa        $8, $9, $10, 4              # encoding: [0x01,0x2a,0x40,0xd5]
 
-# CHECKOBJDUMP: dlsa        $8, $9, $10, 1
-# CHECKOBJDUMP: dlsa        $8, $9, $10, 2
-# CHECKOBJDUMP: dlsa        $8, $9, $10, 3
-# CHECKOBJDUMP: dlsa        $8, $9, $10, 4
-
                 dlsa        $8, $9, $10, 1
                 dlsa        $8, $9, $10, 2
                 dlsa        $8, $9, $10, 3
diff --git a/test/MC/Mips/msa/test_elm.s b/test/MC/Mips/msa/test_elm.s
index 1e45fd4..dbe6d5c 100644
--- a/test/MC/Mips/msa/test_elm.s
+++ b/test/MC/Mips/msa/test_elm.s
@@ -1,9 +1,5 @@
 # RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 -mattr=+msa -show-encoding | FileCheck %s
 #
-# RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 -mattr=+msa -filetype=obj -o - | \
-# RUN: llvm-objdump -d -arch=mips -mattr=+msa - | \
-# RUN: FileCheck %s -check-prefix=CHECKOBJDUMP
-#
 # CHECK:        copy_s.b        $13, $w8[2]             # encoding: [0x78,0x82,0x43,0x59]
 # CHECK:        copy_s.h        $1, $w25[0]             # encoding: [0x78,0xa0,0xc8,0x59]
 # CHECK:        copy_s.w        $22, $w5[1]             # encoding: [0x78,0xb1,0x2d,0x99]
@@ -20,22 +16,6 @@
 # CHECK:        splati.d        $w28, $w1[0]            # encoding: [0x78,0x78,0x0f,0x19]
 # CHECK:        move.v          $w23, $w24              # encoding: [0x78,0xbe,0xc5,0xd9]
 
-# CHECKOBJDUMP:        copy_s.b        $13, $w8[2]
-# CHECKOBJDUMP:        copy_s.h        $1, $w25[0]
-# CHECKOBJDUMP:        copy_s.w        $22, $w5[1]
-# CHECKOBJDUMP:        copy_u.b        $22, $w20[4]
-# CHECKOBJDUMP:        copy_u.h        $20, $w4[0]
-# CHECKOBJDUMP:        copy_u.w        $fp, $w13[2]
-# CHECKOBJDUMP:        sldi.b          $w0, $w29[4]
-# CHECKOBJDUMP:        sldi.h          $w8, $w17[0]
-# CHECKOBJDUMP:        sldi.w          $w20, $w27[2]
-# CHECKOBJDUMP:        sldi.d          $w4, $w12[0]
-# CHECKOBJDUMP:        splati.b        $w25, $w3[2]
-# CHECKOBJDUMP:        splati.h        $w24, $w28[1]
-# CHECKOBJDUMP:        splati.w        $w13, $w18[0]
-# CHECKOBJDUMP:        splati.d        $w28, $w1[0]
-# CHECKOBJDUMP:        move.v          $w23, $w24
-
                 copy_s.b        $13, $w8[2]
                 copy_s.h        $1, $w25[0]
                 copy_s.w        $22, $w5[1]
diff --git a/test/MC/Mips/msa/test_elm_insert.s b/test/MC/Mips/msa/test_elm_insert.s
index f66b26c..d58a4e0 100644
--- a/test/MC/Mips/msa/test_elm_insert.s
+++ b/test/MC/Mips/msa/test_elm_insert.s
@@ -1,17 +1,9 @@
 # RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 -mattr=+msa -show-encoding | FileCheck %s
 #
-# RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 -mattr=+msa -filetype=obj -o - | \
-# RUN: llvm-objdump -d -arch=mips -mattr=+msa - | \
-# RUN: FileCheck %s -check-prefix=CHECKOBJDUMP
-#
 # CHECK:        insert.b        $w23[3], $sp            # encoding: [0x79,0x03,0xed,0xd9]
 # CHECK:        insert.h        $w20[2], $5             # encoding: [0x79,0x22,0x2d,0x19]
 # CHECK:        insert.w        $w8[2], $15             # encoding: [0x79,0x32,0x7a,0x19]
 
-# CHECKOBJDUMP:        insert.b        $w23[3], $sp
-# CHECKOBJDUMP:        insert.h        $w20[2], $5
-# CHECKOBJDUMP:        insert.w        $w8[2], $15
-
                 insert.b        $w23[3], $sp
                 insert.h        $w20[2], $5
                 insert.w        $w8[2], $15
diff --git a/test/MC/Mips/msa/test_elm_insert_msa64.s b/test/MC/Mips/msa/test_elm_insert_msa64.s
index 8196fd0..4e99bdb 100644
--- a/test/MC/Mips/msa/test_elm_insert_msa64.s
+++ b/test/MC/Mips/msa/test_elm_insert_msa64.s
@@ -1,11 +1,5 @@
 # RUN: llvm-mc %s -arch=mips64 -mcpu=mips64r2 -mattr=+msa -show-encoding | FileCheck %s
 #
-# RUN: llvm-mc %s -arch=mips64 -mcpu=mips64r2 -mattr=+msa -filetype=obj -o - | \
-# RUN:   llvm-objdump -d -arch=mips64 -mattr=+msa - | \
-# RUN:     FileCheck %s -check-prefix=CHECKOBJDUMP
-#
 # CHECK:        insert.d        $w1[1], $sp            # encoding: [0x79,0x39,0xe8,0x59]
 
-# CHECKOBJDUMP:        insert.d        $w1[1], $sp
-
                 insert.d        $w1[1], $sp
diff --git a/test/MC/Mips/msa/test_elm_insve.s b/test/MC/Mips/msa/test_elm_insve.s
index efdf88f..0053322 100644
--- a/test/MC/Mips/msa/test_elm_insve.s
+++ b/test/MC/Mips/msa/test_elm_insve.s
@@ -1,19 +1,10 @@
 # RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 -mattr=+msa -show-encoding | FileCheck %s
 #
-# RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 -mattr=+msa -filetype=obj -o - | \
-# RUN: llvm-objdump -d -arch=mips -mattr=+msa - | \
-# RUN: FileCheck %s -check-prefix=CHECKOBJDUMP
-#
 # CHECK:        insve.b $w25[3], $w9[0]         # encoding: [0x79,0x43,0x4e,0x59]
 # CHECK:        insve.h $w24[2], $w2[0]         # encoding: [0x79,0x62,0x16,0x19]
 # CHECK:        insve.w $w0[2], $w13[0]         # encoding: [0x79,0x72,0x68,0x19]
 # CHECK:        insve.d $w3[0], $w18[0]         # encoding: [0x79,0x78,0x90,0xd9]
 
-# CHECKOBJDUMP:        insve.b $w25[3], $w9[0]
-# CHECKOBJDUMP:        insve.h $w24[2], $w2[0]
-# CHECKOBJDUMP:        insve.w $w0[2], $w13[0]
-# CHECKOBJDUMP:        insve.d $w3[0], $w18[0]
-
                 insve.b $w25[3], $w9[0]
                 insve.h $w24[2], $w2[0]
                 insve.w $w0[2], $w13[0]
diff --git a/test/MC/Mips/msa/test_elm_msa64.s b/test/MC/Mips/msa/test_elm_msa64.s
index 15bfcca..5cc9147 100644
--- a/test/MC/Mips/msa/test_elm_msa64.s
+++ b/test/MC/Mips/msa/test_elm_msa64.s
@@ -1,14 +1,7 @@
 # RUN: llvm-mc %s -arch=mips64 -mcpu=mips64r2 -mattr=+msa -show-encoding | FileCheck %s
 #
-# RUN: llvm-mc %s -arch=mips64 -mcpu=mips64r2 -mattr=+msa -filetype=obj -o - | \
-# RUN:   llvm-objdump -d -arch=mips64 -mattr=+msa - | \
-# RUN:     FileCheck %s -check-prefix=CHECKOBJDUMP
-#
 # CHECK:        copy_s.d        $19, $w31[0]             # encoding: [0x78,0xb8,0xfc,0xd9]
 # CHECK:        copy_u.d        $18, $w29[1]             # encoding: [0x78,0xf9,0xec,0x99]
 
-# CHECKOBJDUMP:        copy_s.d        $19, $w31[0]
-# CHECKOBJDUMP:        copy_u.d        $18, $w29[1]
-
         copy_s.d        $19, $w31[0]
         copy_u.d        $18, $w29[1]
diff --git a/test/MC/Mips/msa/test_i10.s b/test/MC/Mips/msa/test_i10.s
index e029dfd..d89218a 100644
--- a/test/MC/Mips/msa/test_i10.s
+++ b/test/MC/Mips/msa/test_i10.s
@@ -1,20 +1,10 @@
 # RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 -mattr=+msa -show-encoding | FileCheck %s
 #
-# RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 -mattr=+msa -filetype=obj -o - | \
-# RUN: llvm-objdump -d -arch=mips -mattr=+msa - | \
-# RUN: FileCheck %s -check-prefix=CHECKOBJDUMP
-#
-
 # CHECK:        ldi.b   $w8, 198                # encoding: [0x7b,0x06,0x32,0x07]
 # CHECK:        ldi.h   $w20, 313               # encoding: [0x7b,0x29,0xcd,0x07]
 # CHECK:        ldi.w   $w24, 492               # encoding: [0x7b,0x4f,0x66,0x07]
 # CHECK:        ldi.d   $w27, -180              # encoding: [0x7b,0x7a,0x66,0xc7]
 
-# CHECKOBJDUMP:        ldi.b   $w8, 198
-# CHECKOBJDUMP:        ldi.h   $w20, 313
-# CHECKOBJDUMP:        ldi.w   $w24, 492
-# CHECKOBJDUMP:        ldi.d   $w27, 844
-
                 ldi.b   $w8, 198
                 ldi.h   $w20, 313
                 ldi.w   $w24, 492
diff --git a/test/MC/Mips/msa/test_i5.s b/test/MC/Mips/msa/test_i5.s
index 56c4811..d923787 100644
--- a/test/MC/Mips/msa/test_i5.s
+++ b/test/MC/Mips/msa/test_i5.s
@@ -1,9 +1,5 @@
 # RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 -mattr=+msa -show-encoding | FileCheck %s
 #
-# RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 -mattr=+msa -filetype=obj -o - | \
-# RUN: llvm-objdump -d -arch=mips -mattr=+msa - | \
-# RUN: FileCheck %s -check-prefix=CHECKOBJDUMP
-#
 # CHECK:        addvi.b         $w3, $w31, 30           # encoding: [0x78,0x1e,0xf8,0xc6]
 # CHECK:        addvi.h         $w24, $w13, 26          # encoding: [0x78,0x3a,0x6e,0x06]
 # CHECK:        addvi.w         $w26, $w20, 26          # encoding: [0x78,0x5a,0xa6,0x86]
@@ -49,51 +45,6 @@
 # CHECK:        subvi.w         $w12, $w10, 11          # encoding: [0x78,0xcb,0x53,0x06]
 # CHECK:        subvi.d         $w19, $w16, 7           # encoding: [0x78,0xe7,0x84,0xc6]
 
-# CHECKOBJDUMP:        addvi.b         $w3, $w31, 30
-# CHECKOBJDUMP:        addvi.h         $w24, $w13, 26
-# CHECKOBJDUMP:        addvi.w         $w26, $w20, 26
-# CHECKOBJDUMP:        addvi.d         $w16, $w1, 21
-# CHECKOBJDUMP:        ceqi.b          $w24, $w21, 24
-# CHECKOBJDUMP:        ceqi.h          $w31, $w15, 2
-# CHECKOBJDUMP:        ceqi.w          $w12, $w1, 31
-# CHECKOBJDUMP:        ceqi.d          $w24, $w22, 7
-# CHECKOBJDUMP:        clei_s.b        $w12, $w16, 1
-# CHECKOBJDUMP:        clei_s.h        $w2, $w10, 23
-# CHECKOBJDUMP:        clei_s.w        $w4, $w11, 22
-# CHECKOBJDUMP:        clei_s.d        $w0, $w29, 22
-# CHECKOBJDUMP:        clei_u.b        $w21, $w17, 3
-# CHECKOBJDUMP:        clei_u.h        $w29, $w7, 17
-# CHECKOBJDUMP:        clei_u.w        $w1, $w1, 2
-# CHECKOBJDUMP:        clei_u.d        $w27, $w27, 29
-# CHECKOBJDUMP:        clti_s.b        $w19, $w13, 25
-# CHECKOBJDUMP:        clti_s.h        $w15, $w10, 20
-# CHECKOBJDUMP:        clti_s.w        $w12, $w12, 11
-# CHECKOBJDUMP:        clti_s.d        $w29, $w20, 17
-# CHECKOBJDUMP:        clti_u.b        $w14, $w9, 29
-# CHECKOBJDUMP:        clti_u.h        $w24, $w25, 25
-# CHECKOBJDUMP:        clti_u.w        $w1, $w1, 22
-# CHECKOBJDUMP:        clti_u.d        $w21, $w25, 1
-# CHECKOBJDUMP:        maxi_s.b        $w22, $w21, 1
-# CHECKOBJDUMP:        maxi_s.h        $w29, $w5, 24
-# CHECKOBJDUMP:        maxi_s.w        $w1, $w10, 20
-# CHECKOBJDUMP:        maxi_s.d        $w13, $w29, 16
-# CHECKOBJDUMP:        maxi_u.b        $w20, $w0, 12
-# CHECKOBJDUMP:        maxi_u.h        $w1, $w14, 3
-# CHECKOBJDUMP:        maxi_u.w        $w27, $w22, 11
-# CHECKOBJDUMP:        maxi_u.d        $w26, $w6, 4
-# CHECKOBJDUMP:        mini_s.b        $w4, $w1, 1
-# CHECKOBJDUMP:        mini_s.h        $w27, $w27, 23
-# CHECKOBJDUMP:        mini_s.w        $w28, $w11, 9
-# CHECKOBJDUMP:        mini_s.d        $w11, $w10, 10
-# CHECKOBJDUMP:        mini_u.b        $w18, $w23, 27
-# CHECKOBJDUMP:        mini_u.h        $w7, $w26, 18
-# CHECKOBJDUMP:        mini_u.w        $w11, $w12, 26
-# CHECKOBJDUMP:        mini_u.d        $w11, $w15, 2
-# CHECKOBJDUMP:        subvi.b         $w24, $w20, 19
-# CHECKOBJDUMP:        subvi.h         $w11, $w19, 4
-# CHECKOBJDUMP:        subvi.w         $w12, $w10, 11
-# CHECKOBJDUMP:        subvi.d         $w19, $w16, 7
-
                 addvi.b         $w3, $w31, 30
                 addvi.h         $w24, $w13, 26
                 addvi.w         $w26, $w20, 26
diff --git a/test/MC/Mips/msa/test_i8.s b/test/MC/Mips/msa/test_i8.s
index d4138a1..b520bb4 100644
--- a/test/MC/Mips/msa/test_i8.s
+++ b/test/MC/Mips/msa/test_i8.s
@@ -1,9 +1,5 @@
 # RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 -mattr=+msa -show-encoding | FileCheck %s
 #
-# RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 -mattr=+msa -filetype=obj -o - | \
-# RUN: llvm-objdump -d -arch=mips -mattr=+msa - | \
-# RUN: FileCheck %s -check-prefix=CHECKOBJDUMP
-#
 # CHECK:        andi.b  $w2, $w29, 48           # encoding: [0x78,0x30,0xe8,0x80]
 # CHECK:        bmnzi.b $w6, $w22, 126          # encoding: [0x78,0x7e,0xb1,0x81]
 # CHECK:        bmzi.b  $w27, $w1, 88           # encoding: [0x79,0x58,0x0e,0xc1]
@@ -15,17 +11,6 @@
 # CHECK:        shf.w   $w14, $w3, 93           # encoding: [0x7a,0x5d,0x1b,0x82]
 # CHECK:        xori.b  $w16, $w10, 20          # encoding: [0x7b,0x14,0x54,0x00]
 
-# CHECKOBJDUMP:        andi.b  $w2, $w29, 48
-# CHECKOBJDUMP:        bmnzi.b $w6, $w22, 126
-# CHECKOBJDUMP:        bmzi.b  $w27, $w1, 88
-# CHECKOBJDUMP:        bseli.b $w29, $w3, 189
-# CHECKOBJDUMP:        nori.b  $w1, $w17, 56
-# CHECKOBJDUMP:        ori.b   $w26, $w20, 135
-# CHECKOBJDUMP:        shf.b   $w19, $w30, 105
-# CHECKOBJDUMP:        shf.h   $w17, $w8, 76
-# CHECKOBJDUMP:        shf.w   $w14, $w3, 93
-# CHECKOBJDUMP:        xori.b  $w16, $w10, 20
-
                 andi.b  $w2, $w29, 48
                 bmnzi.b $w6, $w22, 126
                 bmzi.b  $w27, $w1, 88
diff --git a/test/MC/Mips/msa/test_lsa.s b/test/MC/Mips/msa/test_lsa.s
index 9ea76f6..22fd0b3 100644
--- a/test/MC/Mips/msa/test_lsa.s
+++ b/test/MC/Mips/msa/test_lsa.s
@@ -1,19 +1,10 @@
 # RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 -mattr=+msa -show-encoding | FileCheck %s
 #
-# RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 -mattr=+msa -filetype=obj -o - | \
-# RUN: llvm-objdump -d -arch=mips -mattr=+msa - | \
-# RUN: FileCheck %s -check-prefix=CHECKOBJDUMP
-#
 # CHECK:        lsa        $8, $9, $10, 1              # encoding: [0x01,0x2a,0x40,0x05]
 # CHECK:        lsa        $8, $9, $10, 2              # encoding: [0x01,0x2a,0x40,0x45]
 # CHECK:        lsa        $8, $9, $10, 3              # encoding: [0x01,0x2a,0x40,0x85]
 # CHECK:        lsa        $8, $9, $10, 4              # encoding: [0x01,0x2a,0x40,0xc5]
 
-# CHECKOBJDUMP: lsa        $8, $9, $10, 1
-# CHECKOBJDUMP: lsa        $8, $9, $10, 2
-# CHECKOBJDUMP: lsa        $8, $9, $10, 3
-# CHECKOBJDUMP: lsa        $8, $9, $10, 4
-
                 lsa        $8, $9, $10, 1
                 lsa        $8, $9, $10, 2
                 lsa        $8, $9, $10, 3
diff --git a/test/MC/Mips/msa/test_mi10.s b/test/MC/Mips/msa/test_mi10.s
index 90baeba..7269960 100644
--- a/test/MC/Mips/msa/test_mi10.s
+++ b/test/MC/Mips/msa/test_mi10.s
@@ -1,9 +1,5 @@
 # RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 -mattr=+msa -show-encoding | FileCheck %s
 #
-# RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 -mattr=+msa -filetype=obj -o - | \
-# RUN: llvm-objdump -d -arch=mips -mattr=+msa - | \
-# RUN: FileCheck %s -check-prefix=CHECKOBJDUMP
-#
 # CHECK:        ld.b $w0, -512($1)              # encoding: [0x7a,0x00,0x08,0x20]
 # CHECK:        ld.b $w1, 0($2)                 # encoding: [0x78,0x00,0x10,0x60]
 # CHECK:        ld.b $w2, 511($3)               # encoding: [0x79,0xff,0x18,0xa0]
@@ -31,33 +27,6 @@
 # CHECK:        ld.d $w21, 2048($22)            # encoding: [0x79,0x00,0xb5,0x63]
 # CHECK:        ld.d $w22, 4088($23)            # encoding: [0x79,0xff,0xbd,0xa3]
 
-# CHECKOBJDUMP:        ld.b $w0, -512($1)
-# CHECKOBJDUMP:        ld.b $w1, 0($2)
-# CHECKOBJDUMP:        ld.b $w2, 511($3)
-
-# CHECKOBJDUMP:        ld.h $w3, -1024($4)
-# CHECKOBJDUMP:        ld.h $w4, -512($5)
-# CHECKOBJDUMP:        ld.h $w5, 0($6)
-# CHECKOBJDUMP:        ld.h $w6, 512($7)
-# CHECKOBJDUMP:        ld.h $w7, 1022($8)
-
-# CHECKOBJDUMP:        ld.w $w8, -2048($9)
-# CHECKOBJDUMP:        ld.w $w9, -1024($10)
-# CHECKOBJDUMP:        ld.w $w10, -512($11)
-# CHECKOBJDUMP:        ld.w $w11, 512($12)
-# CHECKOBJDUMP:        ld.w $w12, 1024($13)
-# CHECKOBJDUMP:        ld.w $w13, 2044($14)
-
-# CHECKOBJDUMP:        ld.d $w14, -4096($15)
-# CHECKOBJDUMP:        ld.d $w15, -2048($16)
-# CHECKOBJDUMP:        ld.d $w16, -1024($17)
-# CHECKOBJDUMP:        ld.d $w17, -512($18)
-# CHECKOBJDUMP:        ld.d $w18, 0($19)
-# CHECKOBJDUMP:        ld.d $w19, 512($20)
-# CHECKOBJDUMP:        ld.d $w20, 1024($21)
-# CHECKOBJDUMP:        ld.d $w21, 2048($22)
-# CHECKOBJDUMP:        ld.d $w22, 4088($23)
-
         ld.b $w0, -512($1)
         ld.b $w1, 0($2)
         ld.b $w2, 511($3)
diff --git a/test/MC/Mips/msa/test_vec.s b/test/MC/Mips/msa/test_vec.s
index b62da70..3f989d3 100644
--- a/test/MC/Mips/msa/test_vec.s
+++ b/test/MC/Mips/msa/test_vec.s
@@ -1,9 +1,5 @@
 # RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 -mattr=+msa -show-encoding | FileCheck %s
 #
-# RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 -mattr=+msa -filetype=obj -o - | \
-# RUN: llvm-objdump -d -arch=mips -mattr=+msa - | \
-# RUN: FileCheck %s -check-prefix=CHECKOBJDUMP
-#
 # CHECK:        and.v   $w25, $w20, $w27        # encoding: [0x78,0x1b,0xa6,0x5e]
 # CHECK:        bmnz.v  $w17, $w6, $w7          # encoding: [0x78,0x87,0x34,0x5e]
 # CHECK:        bmz.v   $w3, $w17, $w9          # encoding: [0x78,0xa9,0x88,0xde]
@@ -12,14 +8,6 @@
 # CHECK:        or.v    $w24, $w26, $w30        # encoding: [0x78,0x3e,0xd6,0x1e]
 # CHECK:        xor.v   $w7, $w27, $w15         # encoding: [0x78,0x6f,0xd9,0xde]
 
-# CHECKOBJDUMP:        and.v   $w25, $w20, $w27
-# CHECKOBJDUMP:        bmnz.v  $w17, $w6, $w7
-# CHECKOBJDUMP:        bmz.v   $w3, $w17, $w9
-# CHECKOBJDUMP:        bsel.v  $w8, $w0, $w14
-# CHECKOBJDUMP:        nor.v   $w7, $w31, $w0
-# CHECKOBJDUMP:        or.v    $w24, $w26, $w30
-# CHECKOBJDUMP:        xor.v   $w7, $w27, $w15
-
                 and.v   $w25, $w20, $w27
                 bmnz.v  $w17, $w6, $w7
                 bmz.v   $w3, $w17, $w9
diff --git a/test/MC/Mips/octeon-instructions.s b/test/MC/Mips/octeon-instructions.s
index 0244e19..b7c89b4 100644
--- a/test/MC/Mips/octeon-instructions.s
+++ b/test/MC/Mips/octeon-instructions.s
@@ -29,8 +29,18 @@
 # CHECK: pop   $2, $2                 # encoding: [0x70,0x40,0x10,0x2c]
 # CHECK: seq   $25, $23, $24          # encoding: [0x72,0xf8,0xc8,0x2a]
 # CHECK: seq   $6, $6, $24            # encoding: [0x70,0xd8,0x30,0x2a]
+# CHECK: seqi  $17, $15, -512         # encoding: [0x71,0xf1,0x80,0x2e]
+# CHECK: seqi  $16, $16, 38           # encoding: [0x72,0x10,0x09,0xae]
 # CHECK: sne   $25, $23, $24          # encoding: [0x72,0xf8,0xc8,0x2b]
 # CHECK: sne   $23, $23, $20          # encoding: [0x72,0xf4,0xb8,0x2b]
+# CHECK: snei  $4, $16, -313          # encoding: [0x72,0x04,0xb1,0xef]
+# CHECK: snei  $26, $26, 511          # encoding: [0x73,0x5a,0x7f,0xef]
+# CHECK: v3mulu $21, $10, $21         # encoding: [0x71,0x55,0xa8,0x11]
+# CHECK: v3mulu $20, $20, $10         # encoding: [0x72,0x8a,0xa0,0x11]
+# CHECK: vmm0  $3, $19, $16           # encoding: [0x72,0x70,0x18,0x10]
+# CHECK: vmm0  $ra, $ra, $9           # encoding: [0x73,0xe9,0xf8,0x10]
+# CHECK: vmulu $sp, $10, $17          # encoding: [0x71,0x51,0xe8,0x0f]
+# CHECK: vmulu $27, $27, $6           # encoding: [0x73,0x66,0xd8,0x0f]
 
   baddu $9, $6, $7
   baddu $17, $18, $19
@@ -61,5 +71,15 @@
   pop   $2
   seq   $25, $23, $24
   seq   $6, $24
+  seqi  $17, $15, -512
+  seqi  $16, 38
   sne   $25, $23, $24
   sne   $23, $20
+  snei  $4, $16, -313
+  snei  $26, 511
+  v3mulu $21, $10, $21
+  v3mulu $20, $10
+  vmm0  $3, $19, $16
+  vmm0  $31, $9
+  vmulu $29, $10, $17
+  vmulu $27, $6
diff --git a/test/MC/PowerPC/ppc64-initial-cfa.s b/test/MC/PowerPC/ppc64-initial-cfa.s
index b890b30..ca97e1b 100644
--- a/test/MC/PowerPC/ppc64-initial-cfa.s
+++ b/test/MC/PowerPC/ppc64-initial-cfa.s
@@ -7,6 +7,7 @@
 # RUN: llvm-mc -triple=powerpc64le-unknown-linux-gnu -filetype=obj -relocation-model=pic %s | \
 # RUN: llvm-readobj -s -sr -sd | FileCheck %s -check-prefix=PIC -check-prefix=PIC-LE
 
+_proc:
         .cfi_startproc
         nop
         .cfi_endproc
diff --git a/test/MC/Sparc/sparc-alu-instructions.s b/test/MC/Sparc/sparc-alu-instructions.s
index afebf64..e2e5ef8 100644
--- a/test/MC/Sparc/sparc-alu-instructions.s
+++ b/test/MC/Sparc/sparc-alu-instructions.s
@@ -70,10 +70,10 @@
         ! CHECK: subxcc %g1, %g2, %g3 ! encoding: [0x86,0xe0,0x40,0x02]
         subxcc %g1, %g2, %g3
 
-        ! CHECK: or %g0, %g1, %g3     ! encoding: [0x86,0x10,0x00,0x01]
+        ! CHECK: mov %g1, %g3     ! encoding: [0x86,0x10,0x00,0x01]
         mov %g1, %g3
 
-        ! CHECK: or %g0, 255, %g3     ! encoding: [0x86,0x10,0x20,0xff]
+        ! CHECK: mov 255, %g3     ! encoding: [0x86,0x10,0x20,0xff]
         mov 0xff, %g3
 
         ! CHECK: restore              ! encoding: [0x81,0xe8,0x00,0x00]
diff --git a/test/MC/Sparc/sparc-fp-instructions.s b/test/MC/Sparc/sparc-fp-instructions.s
index fdeaa8c..f8c130f 100644
--- a/test/MC/Sparc/sparc-fp-instructions.s
+++ b/test/MC/Sparc/sparc-fp-instructions.s
@@ -96,16 +96,16 @@
         fdivd %f0, %f4, %f8
         fdivq %f0, %f4, %f8
 
-        ! CHECK: fcmps %fcc0, %f0, %f4                  ! encoding: [0x81,0xa8,0x0a,0x24]
-        ! CHECK: fcmpd %fcc0, %f0, %f4                  ! encoding: [0x81,0xa8,0x0a,0x44]
-        ! CHECK: fcmpq %fcc0, %f0, %f4                  ! encoding: [0x81,0xa8,0x0a,0x64]
+        ! CHECK: fcmps %f0, %f4                  ! encoding: [0x81,0xa8,0x0a,0x24]
+        ! CHECK: fcmpd %f0, %f4                  ! encoding: [0x81,0xa8,0x0a,0x44]
+        ! CHECK: fcmpq %f0, %f4                  ! encoding: [0x81,0xa8,0x0a,0x64]
         fcmps %f0, %f4
         fcmpd %f0, %f4
         fcmpq %f0, %f4
 
-        ! CHECK: fcmpes %fcc0, %f0, %f4                  ! encoding: [0x81,0xa8,0x0a,0xa4]
-        ! CHECK: fcmped %fcc0, %f0, %f4                  ! encoding: [0x81,0xa8,0x0a,0xc4]
-        ! CHECK: fcmpeq %fcc0, %f0, %f4                  ! encoding: [0x81,0xa8,0x0a,0xe4]
+        ! CHECK: fcmpes %f0, %f4                  ! encoding: [0x81,0xa8,0x0a,0xa4]
+        ! CHECK: fcmped %f0, %f4                  ! encoding: [0x81,0xa8,0x0a,0xc4]
+        ! CHECK: fcmpeq %f0, %f4                  ! encoding: [0x81,0xa8,0x0a,0xe4]
         fcmpes %f0, %f4
         fcmped %f0, %f4
         fcmpeq %f0, %f4
diff --git a/test/MC/X86/avx512-encodings.s b/test/MC/X86/avx512-encodings.s
index 42c50e1..2915b7a 100644
--- a/test/MC/X86/avx512-encodings.s
+++ b/test/MC/X86/avx512-encodings.s
@@ -1,101 +1,5 @@
 // RUN: llvm-mc -triple x86_64-unknown-unknown -mcpu=knl --show-encoding %s | FileCheck %s
 
-// CHECK: vaddpd -8192(%rdx), %zmm27, %zmm8
-// CHECK:  encoding: [0x62,0x71,0xa5,0x40,0x58,0x42,0x80]
-          vaddpd -8192(%rdx), %zmm27, %zmm8
-
-// CHECK: vaddpd -1024(%rdx){1to8}, %zmm27, %zmm8
-// CHECK:  encoding: [0x62,0x71,0xa5,0x50,0x58,0x42,0x80]
-          vaddpd -1024(%rdx){1to8}, %zmm27, %zmm8
-
-// CHECK: vaddps -8192(%rdx), %zmm13, %zmm18
-// CHECK:  encoding: [0x62,0xe1,0x14,0x48,0x58,0x52,0x80]
-          vaddps -8192(%rdx), %zmm13, %zmm18
-
-// CHECK: vaddps -512(%rdx){1to16}, %zmm13, %zmm18
-// CHECK:  encoding: [0x62,0xe1,0x14,0x58,0x58,0x52,0x80]
-          vaddps -512(%rdx){1to16}, %zmm13, %zmm18
-
-// CHECK: vdivpd -8192(%rdx), %zmm6, %zmm18
-// CHECK:  encoding: [0x62,0xe1,0xcd,0x48,0x5e,0x52,0x80]
-          vdivpd -8192(%rdx), %zmm6, %zmm18
-
-// CHECK: vdivpd -1024(%rdx){1to8}, %zmm6, %zmm18
-// CHECK:  encoding: [0x62,0xe1,0xcd,0x58,0x5e,0x52,0x80]
-          vdivpd -1024(%rdx){1to8}, %zmm6, %zmm18
-
-// CHECK: vdivps -8192(%rdx), %zmm23, %zmm23
-// CHECK:  encoding: [0x62,0xe1,0x44,0x40,0x5e,0x7a,0x80]
-          vdivps -8192(%rdx), %zmm23, %zmm23
-
-// CHECK: vdivps -512(%rdx){1to16}, %zmm23, %zmm23
-// CHECK:  encoding: [0x62,0xe1,0x44,0x50,0x5e,0x7a,0x80]
-          vdivps -512(%rdx){1to16}, %zmm23, %zmm23
-
-// CHECK: vmaxpd -8192(%rdx), %zmm28, %zmm30
-// CHECK:  encoding: [0x62,0x61,0x9d,0x40,0x5f,0x72,0x80]
-          vmaxpd -8192(%rdx), %zmm28, %zmm30
-
-// CHECK: vmaxpd -1024(%rdx){1to8}, %zmm28, %zmm30
-// CHECK:  encoding: [0x62,0x61,0x9d,0x50,0x5f,0x72,0x80]
-          vmaxpd -1024(%rdx){1to8}, %zmm28, %zmm30
-
-// CHECK: vmaxps -8192(%rdx), %zmm6, %zmm25
-// CHECK:  encoding: [0x62,0x61,0x4c,0x48,0x5f,0x4a,0x80]
-          vmaxps -8192(%rdx), %zmm6, %zmm25
-
-// CHECK: vmaxps -512(%rdx){1to16}, %zmm6, %zmm25
-// CHECK:  encoding: [0x62,0x61,0x4c,0x58,0x5f,0x4a,0x80]
-          vmaxps -512(%rdx){1to16}, %zmm6, %zmm25
-
-// CHECK: vminpd -8192(%rdx), %zmm6, %zmm6
-// CHECK:  encoding: [0x62,0xf1,0xcd,0x48,0x5d,0x72,0x80]
-          vminpd -8192(%rdx), %zmm6, %zmm6
-
-// CHECK: vminpd -1024(%rdx){1to8}, %zmm6, %zmm6
-// CHECK:  encoding: [0x62,0xf1,0xcd,0x58,0x5d,0x72,0x80]
-          vminpd -1024(%rdx){1to8}, %zmm6, %zmm6
-
-// CHECK: vminps -8192(%rdx), %zmm3, %zmm3
-// CHECK:  encoding: [0x62,0xf1,0x64,0x48,0x5d,0x5a,0x80]
-          vminps -8192(%rdx), %zmm3, %zmm3
-
-// CHECK: vminps -512(%rdx){1to16}, %zmm3, %zmm3
-// CHECK:  encoding: [0x62,0xf1,0x64,0x58,0x5d,0x5a,0x80]
-          vminps -512(%rdx){1to16}, %zmm3, %zmm3
-
-// CHECK: vmulpd -8192(%rdx), %zmm4, %zmm24
-// CHECK:  encoding: [0x62,0x61,0xdd,0x48,0x59,0x42,0x80]
-          vmulpd -8192(%rdx), %zmm4, %zmm24
-
-// CHECK: vmulpd -1024(%rdx){1to8}, %zmm4, %zmm24
-// CHECK:  encoding: [0x62,0x61,0xdd,0x58,0x59,0x42,0x80]
-          vmulpd -1024(%rdx){1to8}, %zmm4, %zmm24
-
-// CHECK: vmulps -8192(%rdx), %zmm6, %zmm3
-// CHECK:  encoding: [0x62,0xf1,0x4c,0x48,0x59,0x5a,0x80]
-          vmulps -8192(%rdx), %zmm6, %zmm3
-
-// CHECK: vmulps -512(%rdx){1to16}, %zmm6, %zmm3
-// CHECK:  encoding: [0x62,0xf1,0x4c,0x58,0x59,0x5a,0x80]
-          vmulps -512(%rdx){1to16}, %zmm6, %zmm3
-
-// CHECK: vsubpd -8192(%rdx), %zmm12, %zmm9
-// CHECK:  encoding: [0x62,0x71,0x9d,0x48,0x5c,0x4a,0x80]
-          vsubpd -8192(%rdx), %zmm12, %zmm9
-
-// CHECK: vsubpd -1024(%rdx){1to8}, %zmm12, %zmm9
-// CHECK:  encoding: [0x62,0x71,0x9d,0x58,0x5c,0x4a,0x80]
-          vsubpd -1024(%rdx){1to8}, %zmm12, %zmm9
-
-// CHECK: vsubps -8192(%rdx), %zmm27, %zmm14
-// CHECK:  encoding: [0x62,0x71,0x24,0x40,0x5c,0x72,0x80]
-          vsubps -8192(%rdx), %zmm27, %zmm14
-
-// CHECK: vsubps -512(%rdx){1to16}, %zmm27, %zmm14
-// CHECK:  encoding: [0x62,0x71,0x24,0x50,0x5c,0x72,0x80]
-          vsubps -512(%rdx){1to16}, %zmm27, %zmm14
-
 // CHECK: vaddpd %zmm6, %zmm27, %zmm8
 // CHECK:  encoding: [0x62,0x71,0xa5,0x40,0x58,0xc6]
           vaddpd %zmm6, %zmm27, %zmm8
@@ -128,6 +32,10 @@
 // CHECK:  encoding: [0x62,0x71,0xa5,0x40,0x58,0x82,0x00,0x20,0x00,0x00]
           vaddpd 8192(%rdx), %zmm27, %zmm8
 
+// CHECK: vaddpd -8192(%rdx), %zmm27, %zmm8
+// CHECK:  encoding: [0x62,0x71,0xa5,0x40,0x58,0x42,0x80]
+          vaddpd -8192(%rdx), %zmm27, %zmm8
+
 // CHECK: vaddpd -8256(%rdx), %zmm27, %zmm8
 // CHECK:  encoding: [0x62,0x71,0xa5,0x40,0x58,0x82,0xc0,0xdf,0xff,0xff]
           vaddpd -8256(%rdx), %zmm27, %zmm8
@@ -140,6 +48,10 @@
 // CHECK:  encoding: [0x62,0x71,0xa5,0x50,0x58,0x82,0x00,0x04,0x00,0x00]
           vaddpd 1024(%rdx){1to8}, %zmm27, %zmm8
 
+// CHECK: vaddpd -1024(%rdx){1to8}, %zmm27, %zmm8
+// CHECK:  encoding: [0x62,0x71,0xa5,0x50,0x58,0x42,0x80]
+          vaddpd -1024(%rdx){1to8}, %zmm27, %zmm8
+
 // CHECK: vaddpd -1032(%rdx){1to8}, %zmm27, %zmm8
 // CHECK:  encoding: [0x62,0x71,0xa5,0x50,0x58,0x82,0xf8,0xfb,0xff,0xff]
           vaddpd -1032(%rdx){1to8}, %zmm27, %zmm8
@@ -176,6 +88,10 @@
 // CHECK:  encoding: [0x62,0xe1,0x14,0x48,0x58,0x92,0x00,0x20,0x00,0x00]
           vaddps 8192(%rdx), %zmm13, %zmm18
 
+// CHECK: vaddps -8192(%rdx), %zmm13, %zmm18
+// CHECK:  encoding: [0x62,0xe1,0x14,0x48,0x58,0x52,0x80]
+          vaddps -8192(%rdx), %zmm13, %zmm18
+
 // CHECK: vaddps -8256(%rdx), %zmm13, %zmm18
 // CHECK:  encoding: [0x62,0xe1,0x14,0x48,0x58,0x92,0xc0,0xdf,0xff,0xff]
           vaddps -8256(%rdx), %zmm13, %zmm18
@@ -188,6 +104,10 @@
 // CHECK:  encoding: [0x62,0xe1,0x14,0x58,0x58,0x92,0x00,0x02,0x00,0x00]
           vaddps 512(%rdx){1to16}, %zmm13, %zmm18
 
+// CHECK: vaddps -512(%rdx){1to16}, %zmm13, %zmm18
+// CHECK:  encoding: [0x62,0xe1,0x14,0x58,0x58,0x52,0x80]
+          vaddps -512(%rdx){1to16}, %zmm13, %zmm18
+
 // CHECK: vaddps -516(%rdx){1to16}, %zmm13, %zmm18
 // CHECK:  encoding: [0x62,0xe1,0x14,0x58,0x58,0x92,0xfc,0xfd,0xff,0xff]
           vaddps -516(%rdx){1to16}, %zmm13, %zmm18
@@ -224,6 +144,10 @@
 // CHECK:  encoding: [0x62,0xe1,0xcd,0x48,0x5e,0x92,0x00,0x20,0x00,0x00]
           vdivpd 8192(%rdx), %zmm6, %zmm18
 
+// CHECK: vdivpd -8192(%rdx), %zmm6, %zmm18
+// CHECK:  encoding: [0x62,0xe1,0xcd,0x48,0x5e,0x52,0x80]
+          vdivpd -8192(%rdx), %zmm6, %zmm18
+
 // CHECK: vdivpd -8256(%rdx), %zmm6, %zmm18
 // CHECK:  encoding: [0x62,0xe1,0xcd,0x48,0x5e,0x92,0xc0,0xdf,0xff,0xff]
           vdivpd -8256(%rdx), %zmm6, %zmm18
@@ -236,6 +160,10 @@
 // CHECK:  encoding: [0x62,0xe1,0xcd,0x58,0x5e,0x92,0x00,0x04,0x00,0x00]
           vdivpd 1024(%rdx){1to8}, %zmm6, %zmm18
 
+// CHECK: vdivpd -1024(%rdx){1to8}, %zmm6, %zmm18
+// CHECK:  encoding: [0x62,0xe1,0xcd,0x58,0x5e,0x52,0x80]
+          vdivpd -1024(%rdx){1to8}, %zmm6, %zmm18
+
 // CHECK: vdivpd -1032(%rdx){1to8}, %zmm6, %zmm18
 // CHECK:  encoding: [0x62,0xe1,0xcd,0x58,0x5e,0x92,0xf8,0xfb,0xff,0xff]
           vdivpd -1032(%rdx){1to8}, %zmm6, %zmm18
@@ -272,6 +200,10 @@
 // CHECK:  encoding: [0x62,0xe1,0x44,0x40,0x5e,0xba,0x00,0x20,0x00,0x00]
           vdivps 8192(%rdx), %zmm23, %zmm23
 
+// CHECK: vdivps -8192(%rdx), %zmm23, %zmm23
+// CHECK:  encoding: [0x62,0xe1,0x44,0x40,0x5e,0x7a,0x80]
+          vdivps -8192(%rdx), %zmm23, %zmm23
+
 // CHECK: vdivps -8256(%rdx), %zmm23, %zmm23
 // CHECK:  encoding: [0x62,0xe1,0x44,0x40,0x5e,0xba,0xc0,0xdf,0xff,0xff]
           vdivps -8256(%rdx), %zmm23, %zmm23
@@ -284,6 +216,10 @@
 // CHECK:  encoding: [0x62,0xe1,0x44,0x50,0x5e,0xba,0x00,0x02,0x00,0x00]
           vdivps 512(%rdx){1to16}, %zmm23, %zmm23
 
+// CHECK: vdivps -512(%rdx){1to16}, %zmm23, %zmm23
+// CHECK:  encoding: [0x62,0xe1,0x44,0x50,0x5e,0x7a,0x80]
+          vdivps -512(%rdx){1to16}, %zmm23, %zmm23
+
 // CHECK: vdivps -516(%rdx){1to16}, %zmm23, %zmm23
 // CHECK:  encoding: [0x62,0xe1,0x44,0x50,0x5e,0xba,0xfc,0xfd,0xff,0xff]
           vdivps -516(%rdx){1to16}, %zmm23, %zmm23
@@ -320,6 +256,10 @@
 // CHECK:  encoding: [0x62,0x61,0x9d,0x40,0x5f,0xb2,0x00,0x20,0x00,0x00]
           vmaxpd 8192(%rdx), %zmm28, %zmm30
 
+// CHECK: vmaxpd -8192(%rdx), %zmm28, %zmm30
+// CHECK:  encoding: [0x62,0x61,0x9d,0x40,0x5f,0x72,0x80]
+          vmaxpd -8192(%rdx), %zmm28, %zmm30
+
 // CHECK: vmaxpd -8256(%rdx), %zmm28, %zmm30
 // CHECK:  encoding: [0x62,0x61,0x9d,0x40,0x5f,0xb2,0xc0,0xdf,0xff,0xff]
           vmaxpd -8256(%rdx), %zmm28, %zmm30
@@ -332,6 +272,10 @@
 // CHECK:  encoding: [0x62,0x61,0x9d,0x50,0x5f,0xb2,0x00,0x04,0x00,0x00]
           vmaxpd 1024(%rdx){1to8}, %zmm28, %zmm30
 
+// CHECK: vmaxpd -1024(%rdx){1to8}, %zmm28, %zmm30
+// CHECK:  encoding: [0x62,0x61,0x9d,0x50,0x5f,0x72,0x80]
+          vmaxpd -1024(%rdx){1to8}, %zmm28, %zmm30
+
 // CHECK: vmaxpd -1032(%rdx){1to8}, %zmm28, %zmm30
 // CHECK:  encoding: [0x62,0x61,0x9d,0x50,0x5f,0xb2,0xf8,0xfb,0xff,0xff]
           vmaxpd -1032(%rdx){1to8}, %zmm28, %zmm30
@@ -368,6 +312,10 @@
 // CHECK:  encoding: [0x62,0x61,0x4c,0x48,0x5f,0x8a,0x00,0x20,0x00,0x00]
           vmaxps 8192(%rdx), %zmm6, %zmm25
 
+// CHECK: vmaxps -8192(%rdx), %zmm6, %zmm25
+// CHECK:  encoding: [0x62,0x61,0x4c,0x48,0x5f,0x4a,0x80]
+          vmaxps -8192(%rdx), %zmm6, %zmm25
+
 // CHECK: vmaxps -8256(%rdx), %zmm6, %zmm25
 // CHECK:  encoding: [0x62,0x61,0x4c,0x48,0x5f,0x8a,0xc0,0xdf,0xff,0xff]
           vmaxps -8256(%rdx), %zmm6, %zmm25
@@ -380,6 +328,10 @@
 // CHECK:  encoding: [0x62,0x61,0x4c,0x58,0x5f,0x8a,0x00,0x02,0x00,0x00]
           vmaxps 512(%rdx){1to16}, %zmm6, %zmm25
 
+// CHECK: vmaxps -512(%rdx){1to16}, %zmm6, %zmm25
+// CHECK:  encoding: [0x62,0x61,0x4c,0x58,0x5f,0x4a,0x80]
+          vmaxps -512(%rdx){1to16}, %zmm6, %zmm25
+
 // CHECK: vmaxps -516(%rdx){1to16}, %zmm6, %zmm25
 // CHECK:  encoding: [0x62,0x61,0x4c,0x58,0x5f,0x8a,0xfc,0xfd,0xff,0xff]
           vmaxps -516(%rdx){1to16}, %zmm6, %zmm25
@@ -416,6 +368,10 @@
 // CHECK:  encoding: [0x62,0xf1,0xcd,0x48,0x5d,0xb2,0x00,0x20,0x00,0x00]
           vminpd 8192(%rdx), %zmm6, %zmm6
 
+// CHECK: vminpd -8192(%rdx), %zmm6, %zmm6
+// CHECK:  encoding: [0x62,0xf1,0xcd,0x48,0x5d,0x72,0x80]
+          vminpd -8192(%rdx), %zmm6, %zmm6
+
 // CHECK: vminpd -8256(%rdx), %zmm6, %zmm6
 // CHECK:  encoding: [0x62,0xf1,0xcd,0x48,0x5d,0xb2,0xc0,0xdf,0xff,0xff]
           vminpd -8256(%rdx), %zmm6, %zmm6
@@ -428,6 +384,10 @@
 // CHECK:  encoding: [0x62,0xf1,0xcd,0x58,0x5d,0xb2,0x00,0x04,0x00,0x00]
           vminpd 1024(%rdx){1to8}, %zmm6, %zmm6
 
+// CHECK: vminpd -1024(%rdx){1to8}, %zmm6, %zmm6
+// CHECK:  encoding: [0x62,0xf1,0xcd,0x58,0x5d,0x72,0x80]
+          vminpd -1024(%rdx){1to8}, %zmm6, %zmm6
+
 // CHECK: vminpd -1032(%rdx){1to8}, %zmm6, %zmm6
 // CHECK:  encoding: [0x62,0xf1,0xcd,0x58,0x5d,0xb2,0xf8,0xfb,0xff,0xff]
           vminpd -1032(%rdx){1to8}, %zmm6, %zmm6
@@ -464,6 +424,10 @@
 // CHECK:  encoding: [0x62,0xf1,0x64,0x48,0x5d,0x9a,0x00,0x20,0x00,0x00]
           vminps 8192(%rdx), %zmm3, %zmm3
 
+// CHECK: vminps -8192(%rdx), %zmm3, %zmm3
+// CHECK:  encoding: [0x62,0xf1,0x64,0x48,0x5d,0x5a,0x80]
+          vminps -8192(%rdx), %zmm3, %zmm3
+
 // CHECK: vminps -8256(%rdx), %zmm3, %zmm3
 // CHECK:  encoding: [0x62,0xf1,0x64,0x48,0x5d,0x9a,0xc0,0xdf,0xff,0xff]
           vminps -8256(%rdx), %zmm3, %zmm3
@@ -476,6 +440,10 @@
 // CHECK:  encoding: [0x62,0xf1,0x64,0x58,0x5d,0x9a,0x00,0x02,0x00,0x00]
           vminps 512(%rdx){1to16}, %zmm3, %zmm3
 
+// CHECK: vminps -512(%rdx){1to16}, %zmm3, %zmm3
+// CHECK:  encoding: [0x62,0xf1,0x64,0x58,0x5d,0x5a,0x80]
+          vminps -512(%rdx){1to16}, %zmm3, %zmm3
+
 // CHECK: vminps -516(%rdx){1to16}, %zmm3, %zmm3
 // CHECK:  encoding: [0x62,0xf1,0x64,0x58,0x5d,0x9a,0xfc,0xfd,0xff,0xff]
           vminps -516(%rdx){1to16}, %zmm3, %zmm3
@@ -512,6 +480,10 @@
 // CHECK:  encoding: [0x62,0x61,0xdd,0x48,0x59,0x82,0x00,0x20,0x00,0x00]
           vmulpd 8192(%rdx), %zmm4, %zmm24
 
+// CHECK: vmulpd -8192(%rdx), %zmm4, %zmm24
+// CHECK:  encoding: [0x62,0x61,0xdd,0x48,0x59,0x42,0x80]
+          vmulpd -8192(%rdx), %zmm4, %zmm24
+
 // CHECK: vmulpd -8256(%rdx), %zmm4, %zmm24
 // CHECK:  encoding: [0x62,0x61,0xdd,0x48,0x59,0x82,0xc0,0xdf,0xff,0xff]
           vmulpd -8256(%rdx), %zmm4, %zmm24
@@ -524,6 +496,10 @@
 // CHECK:  encoding: [0x62,0x61,0xdd,0x58,0x59,0x82,0x00,0x04,0x00,0x00]
           vmulpd 1024(%rdx){1to8}, %zmm4, %zmm24
 
+// CHECK: vmulpd -1024(%rdx){1to8}, %zmm4, %zmm24
+// CHECK:  encoding: [0x62,0x61,0xdd,0x58,0x59,0x42,0x80]
+          vmulpd -1024(%rdx){1to8}, %zmm4, %zmm24
+
 // CHECK: vmulpd -1032(%rdx){1to8}, %zmm4, %zmm24
 // CHECK:  encoding: [0x62,0x61,0xdd,0x58,0x59,0x82,0xf8,0xfb,0xff,0xff]
           vmulpd -1032(%rdx){1to8}, %zmm4, %zmm24
@@ -560,6 +536,10 @@
 // CHECK:  encoding: [0x62,0xf1,0x4c,0x48,0x59,0x9a,0x00,0x20,0x00,0x00]
           vmulps 8192(%rdx), %zmm6, %zmm3
 
+// CHECK: vmulps -8192(%rdx), %zmm6, %zmm3
+// CHECK:  encoding: [0x62,0xf1,0x4c,0x48,0x59,0x5a,0x80]
+          vmulps -8192(%rdx), %zmm6, %zmm3
+
 // CHECK: vmulps -8256(%rdx), %zmm6, %zmm3
 // CHECK:  encoding: [0x62,0xf1,0x4c,0x48,0x59,0x9a,0xc0,0xdf,0xff,0xff]
           vmulps -8256(%rdx), %zmm6, %zmm3
@@ -572,6 +552,10 @@
 // CHECK:  encoding: [0x62,0xf1,0x4c,0x58,0x59,0x9a,0x00,0x02,0x00,0x00]
           vmulps 512(%rdx){1to16}, %zmm6, %zmm3
 
+// CHECK: vmulps -512(%rdx){1to16}, %zmm6, %zmm3
+// CHECK:  encoding: [0x62,0xf1,0x4c,0x58,0x59,0x5a,0x80]
+          vmulps -512(%rdx){1to16}, %zmm6, %zmm3
+
 // CHECK: vmulps -516(%rdx){1to16}, %zmm6, %zmm3
 // CHECK:  encoding: [0x62,0xf1,0x4c,0x58,0x59,0x9a,0xfc,0xfd,0xff,0xff]
           vmulps -516(%rdx){1to16}, %zmm6, %zmm3
@@ -1504,6 +1488,374 @@
 // CHECK:  encoding: [0x62,0x72,0xad,0x50,0x3b,0x9a,0xf8,0xfb,0xff,0xff]
           vpminuq -1032(%rdx){1to8}, %zmm26, %zmm11
 
+// CHECK: vpmovsxbd %xmm7, %zmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x21,0xdf]
+          vpmovsxbd %xmm7, %zmm27
+
+// CHECK: vpmovsxbd %xmm7, %zmm27 {%k5}
+// CHECK:  encoding: [0x62,0x62,0x7d,0x4d,0x21,0xdf]
+          vpmovsxbd %xmm7, %zmm27 {%k5}
+
+// CHECK: vpmovsxbd %xmm7, %zmm27 {%k5} {z}
+// CHECK:  encoding: [0x62,0x62,0x7d,0xcd,0x21,0xdf]
+          vpmovsxbd %xmm7, %zmm27 {%k5} {z}
+
+// CHECK: vpmovsxbd (%rcx), %zmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x21,0x19]
+          vpmovsxbd (%rcx), %zmm27
+
+// CHECK: vpmovsxbd 291(%rax,%r14,8), %zmm27
+// CHECK:  encoding: [0x62,0x22,0x7d,0x48,0x21,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vpmovsxbd 291(%rax,%r14,8), %zmm27
+
+// CHECK: vpmovsxbd 2032(%rdx), %zmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x21,0x5a,0x7f]
+          vpmovsxbd 2032(%rdx), %zmm27
+
+// CHECK: vpmovsxbd 2048(%rdx), %zmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x21,0x9a,0x00,0x08,0x00,0x00]
+          vpmovsxbd 2048(%rdx), %zmm27
+
+// CHECK: vpmovsxbd -2048(%rdx), %zmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x21,0x5a,0x80]
+          vpmovsxbd -2048(%rdx), %zmm27
+
+// CHECK: vpmovsxbd -2064(%rdx), %zmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x21,0x9a,0xf0,0xf7,0xff,0xff]
+          vpmovsxbd -2064(%rdx), %zmm27
+
+// CHECK: vpmovsxbd (%rcx), %zmm27 {%k1}
+// CHECK:  encoding: [0x62,0x62,0x7d,0x49,0x21,0x19]
+          vpmovsxbd (%rcx), %zmm27 {%k1}
+
+// CHECK: vpmovsxbd (%rcx), %zmm27 {%k2} {z}
+// CHECK:  encoding: [0x62,0x62,0x7d,0xca,0x21,0x19]
+          vpmovsxbd (%rcx), %zmm27 {%k2} {z}
+
+// CHECK: vpmovsxbq %xmm11, %zmm11
+// CHECK:  encoding: [0x62,0x52,0x7d,0x48,0x22,0xdb]
+          vpmovsxbq %xmm11, %zmm11
+
+// CHECK: vpmovsxbq %xmm11, %zmm11 {%k5}
+// CHECK:  encoding: [0x62,0x52,0x7d,0x4d,0x22,0xdb]
+          vpmovsxbq %xmm11, %zmm11 {%k5}
+
+// CHECK: vpmovsxbq %xmm11, %zmm11 {%k5} {z}
+// CHECK:  encoding: [0x62,0x52,0x7d,0xcd,0x22,0xdb]
+          vpmovsxbq %xmm11, %zmm11 {%k5} {z}
+
+// CHECK: vpmovsxbq (%rcx), %zmm11
+// CHECK:  encoding: [0x62,0x72,0x7d,0x48,0x22,0x19]
+          vpmovsxbq (%rcx), %zmm11
+
+// CHECK: vpmovsxbq 291(%rax,%r14,8), %zmm11
+// CHECK:  encoding: [0x62,0x32,0x7d,0x48,0x22,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vpmovsxbq 291(%rax,%r14,8), %zmm11
+
+// CHECK: vpmovsxbq 1016(%rdx), %zmm11
+// CHECK:  encoding: [0x62,0x72,0x7d,0x48,0x22,0x5a,0x7f]
+          vpmovsxbq 1016(%rdx), %zmm11
+
+// CHECK: vpmovsxbq 1024(%rdx), %zmm11
+// CHECK:  encoding: [0x62,0x72,0x7d,0x48,0x22,0x9a,0x00,0x04,0x00,0x00]
+          vpmovsxbq 1024(%rdx), %zmm11
+
+// CHECK: vpmovsxbq -1024(%rdx), %zmm11
+// CHECK:  encoding: [0x62,0x72,0x7d,0x48,0x22,0x5a,0x80]
+          vpmovsxbq -1024(%rdx), %zmm11
+
+// CHECK: vpmovsxbq -1032(%rdx), %zmm11
+// CHECK:  encoding: [0x62,0x72,0x7d,0x48,0x22,0x9a,0xf8,0xfb,0xff,0xff]
+          vpmovsxbq -1032(%rdx), %zmm11
+
+// CHECK: vpmovsxdq %ymm29, %zmm26
+// CHECK:  encoding: [0x62,0x02,0x7d,0x48,0x25,0xd5]
+          vpmovsxdq %ymm29, %zmm26
+
+// CHECK: vpmovsxdq %ymm29, %zmm26 {%k1}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x49,0x25,0xd5]
+          vpmovsxdq %ymm29, %zmm26 {%k1}
+
+// CHECK: vpmovsxdq %ymm29, %zmm26 {%k1} {z}
+// CHECK:  encoding: [0x62,0x02,0x7d,0xc9,0x25,0xd5]
+          vpmovsxdq %ymm29, %zmm26 {%k1} {z}
+
+// CHECK: vpmovsxdq (%rcx), %zmm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x25,0x11]
+          vpmovsxdq (%rcx), %zmm26
+
+// CHECK: vpmovsxdq 291(%rax,%r14,8), %zmm26
+// CHECK:  encoding: [0x62,0x22,0x7d,0x48,0x25,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vpmovsxdq 291(%rax,%r14,8), %zmm26
+
+// CHECK: vpmovsxdq 4064(%rdx), %zmm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x25,0x52,0x7f]
+          vpmovsxdq 4064(%rdx), %zmm26
+
+// CHECK: vpmovsxdq 4096(%rdx), %zmm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x25,0x92,0x00,0x10,0x00,0x00]
+          vpmovsxdq 4096(%rdx), %zmm26
+
+// CHECK: vpmovsxdq -4096(%rdx), %zmm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x25,0x52,0x80]
+          vpmovsxdq -4096(%rdx), %zmm26
+
+// CHECK: vpmovsxdq -4128(%rdx), %zmm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x25,0x92,0xe0,0xef,0xff,0xff]
+          vpmovsxdq -4128(%rdx), %zmm26
+
+// CHECK: vpmovsxwd %ymm11, %zmm23
+// CHECK:  encoding: [0x62,0xc2,0x7d,0x48,0x23,0xfb]
+          vpmovsxwd %ymm11, %zmm23
+
+// CHECK: vpmovsxwd %ymm11, %zmm23 {%k2}
+// CHECK:  encoding: [0x62,0xc2,0x7d,0x4a,0x23,0xfb]
+          vpmovsxwd %ymm11, %zmm23 {%k2}
+
+// CHECK: vpmovsxwd %ymm11, %zmm23 {%k2} {z}
+// CHECK:  encoding: [0x62,0xc2,0x7d,0xca,0x23,0xfb]
+          vpmovsxwd %ymm11, %zmm23 {%k2} {z}
+
+// CHECK: vpmovsxwd (%rcx), %zmm23
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x23,0x39]
+          vpmovsxwd (%rcx), %zmm23
+
+// CHECK: vpmovsxwd 291(%rax,%r14,8), %zmm23
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x48,0x23,0xbc,0xf0,0x23,0x01,0x00,0x00]
+          vpmovsxwd 291(%rax,%r14,8), %zmm23
+
+// CHECK: vpmovsxwd 4064(%rdx), %zmm23
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x23,0x7a,0x7f]
+          vpmovsxwd 4064(%rdx), %zmm23
+
+// CHECK: vpmovsxwd 4096(%rdx), %zmm23
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x23,0xba,0x00,0x10,0x00,0x00]
+          vpmovsxwd 4096(%rdx), %zmm23
+
+// CHECK: vpmovsxwd -4096(%rdx), %zmm23
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x23,0x7a,0x80]
+          vpmovsxwd -4096(%rdx), %zmm23
+
+// CHECK: vpmovsxwd -4128(%rdx), %zmm23
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x23,0xba,0xe0,0xef,0xff,0xff]
+          vpmovsxwd -4128(%rdx), %zmm23
+
+// CHECK: vpmovsxwq %xmm25, %zmm25
+// CHECK:  encoding: [0x62,0x02,0x7d,0x48,0x24,0xc9]
+          vpmovsxwq %xmm25, %zmm25
+
+// CHECK: vpmovsxwq %xmm25, %zmm25 {%k4}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x4c,0x24,0xc9]
+          vpmovsxwq %xmm25, %zmm25 {%k4}
+
+// CHECK: vpmovsxwq %xmm25, %zmm25 {%k4} {z}
+// CHECK:  encoding: [0x62,0x02,0x7d,0xcc,0x24,0xc9]
+          vpmovsxwq %xmm25, %zmm25 {%k4} {z}
+
+// CHECK: vpmovsxwq (%rcx), %zmm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x24,0x09]
+          vpmovsxwq (%rcx), %zmm25
+
+// CHECK: vpmovsxwq 291(%rax,%r14,8), %zmm25
+// CHECK:  encoding: [0x62,0x22,0x7d,0x48,0x24,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpmovsxwq 291(%rax,%r14,8), %zmm25
+
+// CHECK: vpmovsxwq 2032(%rdx), %zmm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x24,0x4a,0x7f]
+          vpmovsxwq 2032(%rdx), %zmm25
+
+// CHECK: vpmovsxwq 2048(%rdx), %zmm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x24,0x8a,0x00,0x08,0x00,0x00]
+          vpmovsxwq 2048(%rdx), %zmm25
+
+// CHECK: vpmovsxwq -2048(%rdx), %zmm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x24,0x4a,0x80]
+          vpmovsxwq -2048(%rdx), %zmm25
+
+// CHECK: vpmovsxwq -2064(%rdx), %zmm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x24,0x8a,0xf0,0xf7,0xff,0xff]
+          vpmovsxwq -2064(%rdx), %zmm25
+
+// CHECK: vpmovzxbd %xmm25, %zmm18
+// CHECK:  encoding: [0x62,0x82,0x7d,0x48,0x31,0xd1]
+          vpmovzxbd %xmm25, %zmm18
+
+// CHECK: vpmovzxbd %xmm25, %zmm18 {%k7}
+// CHECK:  encoding: [0x62,0x82,0x7d,0x4f,0x31,0xd1]
+          vpmovzxbd %xmm25, %zmm18 {%k7}
+
+// CHECK: vpmovzxbd %xmm25, %zmm18 {%k7} {z}
+// CHECK:  encoding: [0x62,0x82,0x7d,0xcf,0x31,0xd1]
+          vpmovzxbd %xmm25, %zmm18 {%k7} {z}
+
+// CHECK: vpmovzxbd (%rcx), %zmm18
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x31,0x11]
+          vpmovzxbd (%rcx), %zmm18
+
+// CHECK: vpmovzxbd 291(%rax,%r14,8), %zmm18
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x48,0x31,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vpmovzxbd 291(%rax,%r14,8), %zmm18
+
+// CHECK: vpmovzxbd 2032(%rdx), %zmm18
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x31,0x52,0x7f]
+          vpmovzxbd 2032(%rdx), %zmm18
+
+// CHECK: vpmovzxbd 2048(%rdx), %zmm18
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x31,0x92,0x00,0x08,0x00,0x00]
+          vpmovzxbd 2048(%rdx), %zmm18
+
+// CHECK: vpmovzxbd -2048(%rdx), %zmm18
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x31,0x52,0x80]
+          vpmovzxbd -2048(%rdx), %zmm18
+
+// CHECK: vpmovzxbd -2064(%rdx), %zmm18
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x31,0x92,0xf0,0xf7,0xff,0xff]
+          vpmovzxbd -2064(%rdx), %zmm18
+
+// CHECK: vpmovzxbq %xmm15, %zmm5
+// CHECK:  encoding: [0x62,0xd2,0x7d,0x48,0x32,0xef]
+          vpmovzxbq %xmm15, %zmm5
+
+// CHECK: vpmovzxbq %xmm15, %zmm5 {%k1}
+// CHECK:  encoding: [0x62,0xd2,0x7d,0x49,0x32,0xef]
+          vpmovzxbq %xmm15, %zmm5 {%k1}
+
+// CHECK: vpmovzxbq %xmm15, %zmm5 {%k1} {z}
+// CHECK:  encoding: [0x62,0xd2,0x7d,0xc9,0x32,0xef]
+          vpmovzxbq %xmm15, %zmm5 {%k1} {z}
+
+// CHECK: vpmovzxbq (%rcx), %zmm5
+// CHECK:  encoding: [0x62,0xf2,0x7d,0x48,0x32,0x29]
+          vpmovzxbq (%rcx), %zmm5
+
+// CHECK: vpmovzxbq 291(%rax,%r14,8), %zmm5
+// CHECK:  encoding: [0x62,0xb2,0x7d,0x48,0x32,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vpmovzxbq 291(%rax,%r14,8), %zmm5
+
+// CHECK: vpmovzxbq 1016(%rdx), %zmm5
+// CHECK:  encoding: [0x62,0xf2,0x7d,0x48,0x32,0x6a,0x7f]
+          vpmovzxbq 1016(%rdx), %zmm5
+
+// CHECK: vpmovzxbq 1024(%rdx), %zmm5
+// CHECK:  encoding: [0x62,0xf2,0x7d,0x48,0x32,0xaa,0x00,0x04,0x00,0x00]
+          vpmovzxbq 1024(%rdx), %zmm5
+
+// CHECK: vpmovzxbq -1024(%rdx), %zmm5
+// CHECK:  encoding: [0x62,0xf2,0x7d,0x48,0x32,0x6a,0x80]
+          vpmovzxbq -1024(%rdx), %zmm5
+
+// CHECK: vpmovzxbq -1032(%rdx), %zmm5
+// CHECK:  encoding: [0x62,0xf2,0x7d,0x48,0x32,0xaa,0xf8,0xfb,0xff,0xff]
+          vpmovzxbq -1032(%rdx), %zmm5
+
+// CHECK: vpmovzxdq %ymm4, %zmm20
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x35,0xe4]
+          vpmovzxdq %ymm4, %zmm20
+
+// CHECK: vpmovzxdq %ymm4, %zmm20 {%k3}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x4b,0x35,0xe4]
+          vpmovzxdq %ymm4, %zmm20 {%k3}
+
+// CHECK: vpmovzxdq %ymm4, %zmm20 {%k3} {z}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0xcb,0x35,0xe4]
+          vpmovzxdq %ymm4, %zmm20 {%k3} {z}
+
+// CHECK: vpmovzxdq (%rcx), %zmm20
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x35,0x21]
+          vpmovzxdq (%rcx), %zmm20
+
+// CHECK: vpmovzxdq 291(%rax,%r14,8), %zmm20
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x48,0x35,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vpmovzxdq 291(%rax,%r14,8), %zmm20
+
+// CHECK: vpmovzxdq 4064(%rdx), %zmm20
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x35,0x62,0x7f]
+          vpmovzxdq 4064(%rdx), %zmm20
+
+// CHECK: vpmovzxdq 4096(%rdx), %zmm20
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x35,0xa2,0x00,0x10,0x00,0x00]
+          vpmovzxdq 4096(%rdx), %zmm20
+
+// CHECK: vpmovzxdq -4096(%rdx), %zmm20
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x35,0x62,0x80]
+          vpmovzxdq -4096(%rdx), %zmm20
+
+// CHECK: vpmovzxdq -4128(%rdx), %zmm20
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x35,0xa2,0xe0,0xef,0xff,0xff]
+          vpmovzxdq -4128(%rdx), %zmm20
+
+// CHECK: vpmovzxwd %ymm6, %zmm8
+// CHECK:  encoding: [0x62,0x72,0x7d,0x48,0x33,0xc6]
+          vpmovzxwd %ymm6, %zmm8
+
+// CHECK: vpmovzxwd %ymm6, %zmm8 {%k7}
+// CHECK:  encoding: [0x62,0x72,0x7d,0x4f,0x33,0xc6]
+          vpmovzxwd %ymm6, %zmm8 {%k7}
+
+// CHECK: vpmovzxwd %ymm6, %zmm8 {%k7} {z}
+// CHECK:  encoding: [0x62,0x72,0x7d,0xcf,0x33,0xc6]
+          vpmovzxwd %ymm6, %zmm8 {%k7} {z}
+
+// CHECK: vpmovzxwd (%rcx), %zmm8
+// CHECK:  encoding: [0x62,0x72,0x7d,0x48,0x33,0x01]
+          vpmovzxwd (%rcx), %zmm8
+
+// CHECK: vpmovzxwd 291(%rax,%r14,8), %zmm8
+// CHECK:  encoding: [0x62,0x32,0x7d,0x48,0x33,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vpmovzxwd 291(%rax,%r14,8), %zmm8
+
+// CHECK: vpmovzxwd 4064(%rdx), %zmm8
+// CHECK:  encoding: [0x62,0x72,0x7d,0x48,0x33,0x42,0x7f]
+          vpmovzxwd 4064(%rdx), %zmm8
+
+// CHECK: vpmovzxwd 4096(%rdx), %zmm8
+// CHECK:  encoding: [0x62,0x72,0x7d,0x48,0x33,0x82,0x00,0x10,0x00,0x00]
+          vpmovzxwd 4096(%rdx), %zmm8
+
+// CHECK: vpmovzxwd -4096(%rdx), %zmm8
+// CHECK:  encoding: [0x62,0x72,0x7d,0x48,0x33,0x42,0x80]
+          vpmovzxwd -4096(%rdx), %zmm8
+
+// CHECK: vpmovzxwd -4128(%rdx), %zmm8
+// CHECK:  encoding: [0x62,0x72,0x7d,0x48,0x33,0x82,0xe0,0xef,0xff,0xff]
+          vpmovzxwd -4128(%rdx), %zmm8
+
+// CHECK: vpmovzxwq %xmm15, %zmm5
+// CHECK:  encoding: [0x62,0xd2,0x7d,0x48,0x34,0xef]
+          vpmovzxwq %xmm15, %zmm5
+
+// CHECK: vpmovzxwq %xmm15, %zmm5 {%k7}
+// CHECK:  encoding: [0x62,0xd2,0x7d,0x4f,0x34,0xef]
+          vpmovzxwq %xmm15, %zmm5 {%k7}
+
+// CHECK: vpmovzxwq %xmm15, %zmm5 {%k7} {z}
+// CHECK:  encoding: [0x62,0xd2,0x7d,0xcf,0x34,0xef]
+          vpmovzxwq %xmm15, %zmm5 {%k7} {z}
+
+// CHECK: vpmovzxwq (%rcx), %zmm5
+// CHECK:  encoding: [0x62,0xf2,0x7d,0x48,0x34,0x29]
+          vpmovzxwq (%rcx), %zmm5
+
+// CHECK: vpmovzxwq 291(%rax,%r14,8), %zmm5
+// CHECK:  encoding: [0x62,0xb2,0x7d,0x48,0x34,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vpmovzxwq 291(%rax,%r14,8), %zmm5
+
+// CHECK: vpmovzxwq 2032(%rdx), %zmm5
+// CHECK:  encoding: [0x62,0xf2,0x7d,0x48,0x34,0x6a,0x7f]
+          vpmovzxwq 2032(%rdx), %zmm5
+
+// CHECK: vpmovzxwq 2048(%rdx), %zmm5
+// CHECK:  encoding: [0x62,0xf2,0x7d,0x48,0x34,0xaa,0x00,0x08,0x00,0x00]
+          vpmovzxwq 2048(%rdx), %zmm5
+
+// CHECK: vpmovzxwq -2048(%rdx), %zmm5
+// CHECK:  encoding: [0x62,0xf2,0x7d,0x48,0x34,0x6a,0x80]
+          vpmovzxwq -2048(%rdx), %zmm5
+
+// CHECK: vpmovzxwq -2064(%rdx), %zmm5
+// CHECK:  encoding: [0x62,0xf2,0x7d,0x48,0x34,0xaa,0xf0,0xf7,0xff,0xff]
+          vpmovzxwq -2064(%rdx), %zmm5
+
 // CHECK: vpmuldq %zmm9, %zmm9, %zmm29
 // CHECK:  encoding: [0x62,0x42,0xb5,0x48,0x28,0xe9]
           vpmuldq %zmm9, %zmm9, %zmm29
@@ -2056,6 +2408,10 @@
 // CHECK:  encoding: [0x62,0x71,0x9d,0x48,0x5c,0x8a,0x00,0x20,0x00,0x00]
           vsubpd 8192(%rdx), %zmm12, %zmm9
 
+// CHECK: vsubpd -8192(%rdx), %zmm12, %zmm9
+// CHECK:  encoding: [0x62,0x71,0x9d,0x48,0x5c,0x4a,0x80]
+          vsubpd -8192(%rdx), %zmm12, %zmm9
+
 // CHECK: vsubpd -8256(%rdx), %zmm12, %zmm9
 // CHECK:  encoding: [0x62,0x71,0x9d,0x48,0x5c,0x8a,0xc0,0xdf,0xff,0xff]
           vsubpd -8256(%rdx), %zmm12, %zmm9
@@ -2068,6 +2424,10 @@
 // CHECK:  encoding: [0x62,0x71,0x9d,0x58,0x5c,0x8a,0x00,0x04,0x00,0x00]
           vsubpd 1024(%rdx){1to8}, %zmm12, %zmm9
 
+// CHECK: vsubpd -1024(%rdx){1to8}, %zmm12, %zmm9
+// CHECK:  encoding: [0x62,0x71,0x9d,0x58,0x5c,0x4a,0x80]
+          vsubpd -1024(%rdx){1to8}, %zmm12, %zmm9
+
 // CHECK: vsubpd -1032(%rdx){1to8}, %zmm12, %zmm9
 // CHECK:  encoding: [0x62,0x71,0x9d,0x58,0x5c,0x8a,0xf8,0xfb,0xff,0xff]
           vsubpd -1032(%rdx){1to8}, %zmm12, %zmm9
@@ -2104,6 +2464,10 @@
 // CHECK:  encoding: [0x62,0x71,0x24,0x40,0x5c,0xb2,0x00,0x20,0x00,0x00]
           vsubps 8192(%rdx), %zmm27, %zmm14
 
+// CHECK: vsubps -8192(%rdx), %zmm27, %zmm14
+// CHECK:  encoding: [0x62,0x71,0x24,0x40,0x5c,0x72,0x80]
+          vsubps -8192(%rdx), %zmm27, %zmm14
+
 // CHECK: vsubps -8256(%rdx), %zmm27, %zmm14
 // CHECK:  encoding: [0x62,0x71,0x24,0x40,0x5c,0xb2,0xc0,0xdf,0xff,0xff]
           vsubps -8256(%rdx), %zmm27, %zmm14
@@ -2116,10 +2480,614 @@
 // CHECK:  encoding: [0x62,0x71,0x24,0x50,0x5c,0xb2,0x00,0x02,0x00,0x00]
           vsubps 512(%rdx){1to16}, %zmm27, %zmm14
 
+// CHECK: vsubps -512(%rdx){1to16}, %zmm27, %zmm14
+// CHECK:  encoding: [0x62,0x71,0x24,0x50,0x5c,0x72,0x80]
+          vsubps -512(%rdx){1to16}, %zmm27, %zmm14
+
 // CHECK: vsubps -516(%rdx){1to16}, %zmm27, %zmm14
 // CHECK:  encoding: [0x62,0x71,0x24,0x50,0x5c,0xb2,0xfc,0xfd,0xff,0xff]
           vsubps -516(%rdx){1to16}, %zmm27, %zmm14
 
+// CHECK: vpmovqb %zmm2, %xmm3
+// CHECK:  encoding: [0x62,0xf2,0x7e,0x48,0x32,0xd3]
+          vpmovqb %zmm2, %xmm3
+
+// CHECK: vpmovqb %zmm2, %xmm3 {%k1}
+// CHECK:  encoding: [0x62,0xf2,0x7e,0x49,0x32,0xd3]
+          vpmovqb %zmm2, %xmm3 {%k1}
+
+// CHECK: vpmovqb %zmm2, %xmm3 {%k1} {z}
+// CHECK:  encoding: [0x62,0xf2,0x7e,0xc9,0x32,0xd3]
+          vpmovqb %zmm2, %xmm3 {%k1} {z}
+
+// CHECK: vpmovsqb %zmm29, %xmm30
+// CHECK:  encoding: [0x62,0x02,0x7e,0x48,0x22,0xee]
+          vpmovsqb %zmm29, %xmm30
+
+// CHECK: vpmovsqb %zmm29, %xmm30 {%k5}
+// CHECK:  encoding: [0x62,0x02,0x7e,0x4d,0x22,0xee]
+          vpmovsqb %zmm29, %xmm30 {%k5}
+
+// CHECK: vpmovsqb %zmm29, %xmm30 {%k5} {z}
+// CHECK:  encoding: [0x62,0x02,0x7e,0xcd,0x22,0xee]
+          vpmovsqb %zmm29, %xmm30 {%k5} {z}
+
+// CHECK: vpmovusqb %zmm28, %xmm24
+// CHECK:  encoding: [0x62,0x02,0x7e,0x48,0x12,0xe0]
+          vpmovusqb %zmm28, %xmm24
+
+// CHECK: vpmovusqb %zmm28, %xmm24 {%k7}
+// CHECK:  encoding: [0x62,0x02,0x7e,0x4f,0x12,0xe0]
+          vpmovusqb %zmm28, %xmm24 {%k7}
+
+// CHECK: vpmovusqb %zmm28, %xmm24 {%k7} {z}
+// CHECK:  encoding: [0x62,0x02,0x7e,0xcf,0x12,0xe0]
+          vpmovusqb %zmm28, %xmm24 {%k7} {z}
+
+// CHECK: vpmovqw %zmm18, %xmm6
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x48,0x34,0xd6]
+          vpmovqw %zmm18, %xmm6
+
+// CHECK: vpmovqw %zmm18, %xmm6 {%k1}
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x49,0x34,0xd6]
+          vpmovqw %zmm18, %xmm6 {%k1}
+
+// CHECK: vpmovqw %zmm18, %xmm6 {%k1} {z}
+// CHECK:  encoding: [0x62,0xe2,0x7e,0xc9,0x34,0xd6]
+          vpmovqw %zmm18, %xmm6 {%k1} {z}
+
+// CHECK: vpmovsqw %zmm19, %xmm27
+// CHECK:  encoding: [0x62,0x82,0x7e,0x48,0x24,0xdb]
+          vpmovsqw %zmm19, %xmm27
+
+// CHECK: vpmovsqw %zmm19, %xmm27 {%k6}
+// CHECK:  encoding: [0x62,0x82,0x7e,0x4e,0x24,0xdb]
+          vpmovsqw %zmm19, %xmm27 {%k6}
+
+// CHECK: vpmovsqw %zmm19, %xmm27 {%k6} {z}
+// CHECK:  encoding: [0x62,0x82,0x7e,0xce,0x24,0xdb]
+          vpmovsqw %zmm19, %xmm27 {%k6} {z}
+
+// CHECK: vpmovusqw %zmm10, %xmm28
+// CHECK:  encoding: [0x62,0x12,0x7e,0x48,0x14,0xd4]
+          vpmovusqw %zmm10, %xmm28
+
+// CHECK: vpmovusqw %zmm10, %xmm28 {%k7}
+// CHECK:  encoding: [0x62,0x12,0x7e,0x4f,0x14,0xd4]
+          vpmovusqw %zmm10, %xmm28 {%k7}
+
+// CHECK: vpmovusqw %zmm10, %xmm28 {%k7} {z}
+// CHECK:  encoding: [0x62,0x12,0x7e,0xcf,0x14,0xd4]
+          vpmovusqw %zmm10, %xmm28 {%k7} {z}
+
+// CHECK: vpmovqd %zmm25, %ymm6
+// CHECK:  encoding: [0x62,0x62,0x7e,0x48,0x35,0xce]
+          vpmovqd %zmm25, %ymm6
+
+// CHECK: vpmovqd %zmm25, %ymm6 {%k5}
+// CHECK:  encoding: [0x62,0x62,0x7e,0x4d,0x35,0xce]
+          vpmovqd %zmm25, %ymm6 {%k5}
+
+// CHECK: vpmovqd %zmm25, %ymm6 {%k5} {z}
+// CHECK:  encoding: [0x62,0x62,0x7e,0xcd,0x35,0xce]
+          vpmovqd %zmm25, %ymm6 {%k5} {z}
+
+// CHECK: vpmovsqd %zmm2, %ymm15
+// CHECK:  encoding: [0x62,0xd2,0x7e,0x48,0x25,0xd7]
+          vpmovsqd %zmm2, %ymm15
+
+// CHECK: vpmovsqd %zmm2, %ymm15 {%k2}
+// CHECK:  encoding: [0x62,0xd2,0x7e,0x4a,0x25,0xd7]
+          vpmovsqd %zmm2, %ymm15 {%k2}
+
+// CHECK: vpmovsqd %zmm2, %ymm15 {%k2} {z}
+// CHECK:  encoding: [0x62,0xd2,0x7e,0xca,0x25,0xd7]
+          vpmovsqd %zmm2, %ymm15 {%k2} {z}
+
+// CHECK: vpmovusqd %zmm4, %ymm8
+// CHECK:  encoding: [0x62,0xd2,0x7e,0x48,0x15,0xe0]
+          vpmovusqd %zmm4, %ymm8
+
+// CHECK: vpmovusqd %zmm4, %ymm8 {%k4}
+// CHECK:  encoding: [0x62,0xd2,0x7e,0x4c,0x15,0xe0]
+          vpmovusqd %zmm4, %ymm8 {%k4}
+
+// CHECK: vpmovusqd %zmm4, %ymm8 {%k4} {z}
+// CHECK:  encoding: [0x62,0xd2,0x7e,0xcc,0x15,0xe0]
+          vpmovusqd %zmm4, %ymm8 {%k4} {z}
+
+// CHECK: vpmovdb %zmm5, %xmm2
+// CHECK:  encoding: [0x62,0xf2,0x7e,0x48,0x31,0xea]
+          vpmovdb %zmm5, %xmm2
+
+// CHECK: vpmovdb %zmm5, %xmm2 {%k5}
+// CHECK:  encoding: [0x62,0xf2,0x7e,0x4d,0x31,0xea]
+          vpmovdb %zmm5, %xmm2 {%k5}
+
+// CHECK: vpmovdb %zmm5, %xmm2 {%k5} {z}
+// CHECK:  encoding: [0x62,0xf2,0x7e,0xcd,0x31,0xea]
+          vpmovdb %zmm5, %xmm2 {%k5} {z}
+
+// CHECK: vpmovsdb %zmm2, %xmm21
+// CHECK:  encoding: [0x62,0xb2,0x7e,0x48,0x21,0xd5]
+          vpmovsdb %zmm2, %xmm21
+
+// CHECK: vpmovsdb %zmm2, %xmm21 {%k4}
+// CHECK:  encoding: [0x62,0xb2,0x7e,0x4c,0x21,0xd5]
+          vpmovsdb %zmm2, %xmm21 {%k4}
+
+// CHECK: vpmovsdb %zmm2, %xmm21 {%k4} {z}
+// CHECK:  encoding: [0x62,0xb2,0x7e,0xcc,0x21,0xd5]
+          vpmovsdb %zmm2, %xmm21 {%k4} {z}
+
+// CHECK: vpmovusdb %zmm2, %xmm20
+// CHECK:  encoding: [0x62,0xb2,0x7e,0x48,0x11,0xd4]
+          vpmovusdb %zmm2, %xmm20
+
+// CHECK: vpmovusdb %zmm2, %xmm20 {%k3}
+// CHECK:  encoding: [0x62,0xb2,0x7e,0x4b,0x11,0xd4]
+          vpmovusdb %zmm2, %xmm20 {%k3}
+
+// CHECK: vpmovusdb %zmm2, %xmm20 {%k3} {z}
+// CHECK:  encoding: [0x62,0xb2,0x7e,0xcb,0x11,0xd4]
+          vpmovusdb %zmm2, %xmm20 {%k3} {z}
+
+// CHECK: vpmovdw %zmm29, %ymm22
+// CHECK:  encoding: [0x62,0x22,0x7e,0x48,0x33,0xee]
+          vpmovdw %zmm29, %ymm22
+
+// CHECK: vpmovdw %zmm29, %ymm22 {%k5}
+// CHECK:  encoding: [0x62,0x22,0x7e,0x4d,0x33,0xee]
+          vpmovdw %zmm29, %ymm22 {%k5}
+
+// CHECK: vpmovdw %zmm29, %ymm22 {%k5} {z}
+// CHECK:  encoding: [0x62,0x22,0x7e,0xcd,0x33,0xee]
+          vpmovdw %zmm29, %ymm22 {%k5} {z}
+
+// CHECK: vpmovsdw %zmm14, %ymm25
+// CHECK:  encoding: [0x62,0x12,0x7e,0x48,0x23,0xf1]
+          vpmovsdw %zmm14, %ymm25
+
+// CHECK: vpmovsdw %zmm14, %ymm25 {%k4}
+// CHECK:  encoding: [0x62,0x12,0x7e,0x4c,0x23,0xf1]
+          vpmovsdw %zmm14, %ymm25 {%k4}
+
+// CHECK: vpmovsdw %zmm14, %ymm25 {%k4} {z}
+// CHECK:  encoding: [0x62,0x12,0x7e,0xcc,0x23,0xf1]
+          vpmovsdw %zmm14, %ymm25 {%k4} {z}
+
+// CHECK: vpmovusdw %zmm7, %ymm8
+// CHECK:  encoding: [0x62,0xd2,0x7e,0x48,0x13,0xf8]
+          vpmovusdw %zmm7, %ymm8
+
+// CHECK: vpmovusdw %zmm7, %ymm8 {%k1}
+// CHECK:  encoding: [0x62,0xd2,0x7e,0x49,0x13,0xf8]
+          vpmovusdw %zmm7, %ymm8 {%k1}
+
+// CHECK: vpmovusdw %zmm7, %ymm8 {%k1} {z}
+// CHECK:  encoding: [0x62,0xd2,0x7e,0xc9,0x13,0xf8]
+          vpmovusdw %zmm7, %ymm8 {%k1} {z}
+
+// CHECK: vpmovqb %zmm3, (%rcx)
+// CHECK:  encoding: [0x62,0xf2,0x7e,0x48,0x32,0x19]
+          vpmovqb %zmm3, (%rcx)
+
+// CHECK: vpmovqb %zmm3, (%rcx) {%k7}
+// CHECK:  encoding: [0x62,0xf2,0x7e,0x4f,0x32,0x19]
+          vpmovqb %zmm3, (%rcx) {%k7}
+
+// CHECK: vpmovqb %zmm3, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xb2,0x7e,0x48,0x32,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vpmovqb %zmm3, 291(%rax,%r14,8)
+
+// CHECK: vpmovqb %zmm3, 1016(%rdx)
+// CHECK:  encoding: [0x62,0xf2,0x7e,0x48,0x32,0x5a,0x7f]
+          vpmovqb %zmm3, 1016(%rdx)
+
+// CHECK: vpmovqb %zmm3, 1024(%rdx)
+// CHECK:  encoding: [0x62,0xf2,0x7e,0x48,0x32,0x9a,0x00,0x04,0x00,0x00]
+          vpmovqb %zmm3, 1024(%rdx)
+
+// CHECK: vpmovqb %zmm3, -1024(%rdx)
+// CHECK:  encoding: [0x62,0xf2,0x7e,0x48,0x32,0x5a,0x80]
+          vpmovqb %zmm3, -1024(%rdx)
+
+// CHECK: vpmovqb %zmm3, -1032(%rdx)
+// CHECK:  encoding: [0x62,0xf2,0x7e,0x48,0x32,0x9a,0xf8,0xfb,0xff,0xff]
+          vpmovqb %zmm3, -1032(%rdx)
+
+// CHECK: vpmovsqb %zmm16, (%rcx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x48,0x22,0x01]
+          vpmovsqb %zmm16, (%rcx)
+
+// CHECK: vpmovsqb %zmm16, (%rcx) {%k2}
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x4a,0x22,0x01]
+          vpmovsqb %zmm16, (%rcx) {%k2}
+
+// CHECK: vpmovsqb %zmm16, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x48,0x22,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vpmovsqb %zmm16, 291(%rax,%r14,8)
+
+// CHECK: vpmovsqb %zmm16, 1016(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x48,0x22,0x42,0x7f]
+          vpmovsqb %zmm16, 1016(%rdx)
+
+// CHECK: vpmovsqb %zmm16, 1024(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x48,0x22,0x82,0x00,0x04,0x00,0x00]
+          vpmovsqb %zmm16, 1024(%rdx)
+
+// CHECK: vpmovsqb %zmm16, -1024(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x48,0x22,0x42,0x80]
+          vpmovsqb %zmm16, -1024(%rdx)
+
+// CHECK: vpmovsqb %zmm16, -1032(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x48,0x22,0x82,0xf8,0xfb,0xff,0xff]
+          vpmovsqb %zmm16, -1032(%rdx)
+
+// CHECK: vpmovusqb %zmm28, (%rcx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x48,0x12,0x21]
+          vpmovusqb %zmm28, (%rcx)
+
+// CHECK: vpmovusqb %zmm28, (%rcx) {%k1}
+// CHECK:  encoding: [0x62,0x62,0x7e,0x49,0x12,0x21]
+          vpmovusqb %zmm28, (%rcx) {%k1}
+
+// CHECK: vpmovusqb %zmm28, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x22,0x7e,0x48,0x12,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vpmovusqb %zmm28, 291(%rax,%r14,8)
+
+// CHECK: vpmovusqb %zmm28, 1016(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x48,0x12,0x62,0x7f]
+          vpmovusqb %zmm28, 1016(%rdx)
+
+// CHECK: vpmovusqb %zmm28, 1024(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x48,0x12,0xa2,0x00,0x04,0x00,0x00]
+          vpmovusqb %zmm28, 1024(%rdx)
+
+// CHECK: vpmovusqb %zmm28, -1024(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x48,0x12,0x62,0x80]
+          vpmovusqb %zmm28, -1024(%rdx)
+
+// CHECK: vpmovusqb %zmm28, -1032(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x48,0x12,0xa2,0xf8,0xfb,0xff,0xff]
+          vpmovusqb %zmm28, -1032(%rdx)
+
+// CHECK: vpmovqw %zmm7, (%rcx)
+// CHECK:  encoding: [0x62,0xf2,0x7e,0x48,0x34,0x39]
+          vpmovqw %zmm7, (%rcx)
+
+// CHECK: vpmovqw %zmm7, (%rcx) {%k6}
+// CHECK:  encoding: [0x62,0xf2,0x7e,0x4e,0x34,0x39]
+          vpmovqw %zmm7, (%rcx) {%k6}
+
+// CHECK: vpmovqw %zmm7, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xb2,0x7e,0x48,0x34,0xbc,0xf0,0x23,0x01,0x00,0x00]
+          vpmovqw %zmm7, 291(%rax,%r14,8)
+
+// CHECK: vpmovqw %zmm7, 2032(%rdx)
+// CHECK:  encoding: [0x62,0xf2,0x7e,0x48,0x34,0x7a,0x7f]
+          vpmovqw %zmm7, 2032(%rdx)
+
+// CHECK: vpmovqw %zmm7, 2048(%rdx)
+// CHECK:  encoding: [0x62,0xf2,0x7e,0x48,0x34,0xba,0x00,0x08,0x00,0x00]
+          vpmovqw %zmm7, 2048(%rdx)
+
+// CHECK: vpmovqw %zmm7, -2048(%rdx)
+// CHECK:  encoding: [0x62,0xf2,0x7e,0x48,0x34,0x7a,0x80]
+          vpmovqw %zmm7, -2048(%rdx)
+
+// CHECK: vpmovqw %zmm7, -2064(%rdx)
+// CHECK:  encoding: [0x62,0xf2,0x7e,0x48,0x34,0xba,0xf0,0xf7,0xff,0xff]
+          vpmovqw %zmm7, -2064(%rdx)
+
+// CHECK: vpmovsqw %zmm1, (%rcx)
+// CHECK:  encoding: [0x62,0xf2,0x7e,0x48,0x24,0x09]
+          vpmovsqw %zmm1, (%rcx)
+
+// CHECK: vpmovsqw %zmm1, (%rcx) {%k5}
+// CHECK:  encoding: [0x62,0xf2,0x7e,0x4d,0x24,0x09]
+          vpmovsqw %zmm1, (%rcx) {%k5}
+
+// CHECK: vpmovsqw %zmm1, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xb2,0x7e,0x48,0x24,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpmovsqw %zmm1, 291(%rax,%r14,8)
+
+// CHECK: vpmovsqw %zmm1, 2032(%rdx)
+// CHECK:  encoding: [0x62,0xf2,0x7e,0x48,0x24,0x4a,0x7f]
+          vpmovsqw %zmm1, 2032(%rdx)
+
+// CHECK: vpmovsqw %zmm1, 2048(%rdx)
+// CHECK:  encoding: [0x62,0xf2,0x7e,0x48,0x24,0x8a,0x00,0x08,0x00,0x00]
+          vpmovsqw %zmm1, 2048(%rdx)
+
+// CHECK: vpmovsqw %zmm1, -2048(%rdx)
+// CHECK:  encoding: [0x62,0xf2,0x7e,0x48,0x24,0x4a,0x80]
+          vpmovsqw %zmm1, -2048(%rdx)
+
+// CHECK: vpmovsqw %zmm1, -2064(%rdx)
+// CHECK:  encoding: [0x62,0xf2,0x7e,0x48,0x24,0x8a,0xf0,0xf7,0xff,0xff]
+          vpmovsqw %zmm1, -2064(%rdx)
+
+// CHECK: vpmovusqw %zmm25, (%rcx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x48,0x14,0x09]
+          vpmovusqw %zmm25, (%rcx)
+
+// CHECK: vpmovusqw %zmm25, (%rcx) {%k3}
+// CHECK:  encoding: [0x62,0x62,0x7e,0x4b,0x14,0x09]
+          vpmovusqw %zmm25, (%rcx) {%k3}
+
+// CHECK: vpmovusqw %zmm25, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x22,0x7e,0x48,0x14,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpmovusqw %zmm25, 291(%rax,%r14,8)
+
+// CHECK: vpmovusqw %zmm25, 2032(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x48,0x14,0x4a,0x7f]
+          vpmovusqw %zmm25, 2032(%rdx)
+
+// CHECK: vpmovusqw %zmm25, 2048(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x48,0x14,0x8a,0x00,0x08,0x00,0x00]
+          vpmovusqw %zmm25, 2048(%rdx)
+
+// CHECK: vpmovusqw %zmm25, -2048(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x48,0x14,0x4a,0x80]
+          vpmovusqw %zmm25, -2048(%rdx)
+
+// CHECK: vpmovusqw %zmm25, -2064(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x48,0x14,0x8a,0xf0,0xf7,0xff,0xff]
+          vpmovusqw %zmm25, -2064(%rdx)
+
+// CHECK: vpmovqd %zmm28, (%rcx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x48,0x35,0x21]
+          vpmovqd %zmm28, (%rcx)
+
+// CHECK: vpmovqd %zmm28, (%rcx) {%k5}
+// CHECK:  encoding: [0x62,0x62,0x7e,0x4d,0x35,0x21]
+          vpmovqd %zmm28, (%rcx) {%k5}
+
+// CHECK: vpmovqd %zmm28, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x22,0x7e,0x48,0x35,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vpmovqd %zmm28, 291(%rax,%r14,8)
+
+// CHECK: vpmovqd %zmm28, 4064(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x48,0x35,0x62,0x7f]
+          vpmovqd %zmm28, 4064(%rdx)
+
+// CHECK: vpmovqd %zmm28, 4096(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x48,0x35,0xa2,0x00,0x10,0x00,0x00]
+          vpmovqd %zmm28, 4096(%rdx)
+
+// CHECK: vpmovqd %zmm28, -4096(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x48,0x35,0x62,0x80]
+          vpmovqd %zmm28, -4096(%rdx)
+
+// CHECK: vpmovqd %zmm28, -4128(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x48,0x35,0xa2,0xe0,0xef,0xff,0xff]
+          vpmovqd %zmm28, -4128(%rdx)
+
+// CHECK: vpmovsqd %zmm9, (%rcx)
+// CHECK:  encoding: [0x62,0x72,0x7e,0x48,0x25,0x09]
+          vpmovsqd %zmm9, (%rcx)
+
+// CHECK: vpmovsqd %zmm9, (%rcx) {%k7}
+// CHECK:  encoding: [0x62,0x72,0x7e,0x4f,0x25,0x09]
+          vpmovsqd %zmm9, (%rcx) {%k7}
+
+// CHECK: vpmovsqd %zmm9, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x32,0x7e,0x48,0x25,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpmovsqd %zmm9, 291(%rax,%r14,8)
+
+// CHECK: vpmovsqd %zmm9, 4064(%rdx)
+// CHECK:  encoding: [0x62,0x72,0x7e,0x48,0x25,0x4a,0x7f]
+          vpmovsqd %zmm9, 4064(%rdx)
+
+// CHECK: vpmovsqd %zmm9, 4096(%rdx)
+// CHECK:  encoding: [0x62,0x72,0x7e,0x48,0x25,0x8a,0x00,0x10,0x00,0x00]
+          vpmovsqd %zmm9, 4096(%rdx)
+
+// CHECK: vpmovsqd %zmm9, -4096(%rdx)
+// CHECK:  encoding: [0x62,0x72,0x7e,0x48,0x25,0x4a,0x80]
+          vpmovsqd %zmm9, -4096(%rdx)
+
+// CHECK: vpmovsqd %zmm9, -4128(%rdx)
+// CHECK:  encoding: [0x62,0x72,0x7e,0x48,0x25,0x8a,0xe0,0xef,0xff,0xff]
+          vpmovsqd %zmm9, -4128(%rdx)
+
+// CHECK: vpmovusqd %zmm22, (%rcx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x48,0x15,0x31]
+          vpmovusqd %zmm22, (%rcx)
+
+// CHECK: vpmovusqd %zmm22, (%rcx) {%k1}
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x49,0x15,0x31]
+          vpmovusqd %zmm22, (%rcx) {%k1}
+
+// CHECK: vpmovusqd %zmm22, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x48,0x15,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vpmovusqd %zmm22, 291(%rax,%r14,8)
+
+// CHECK: vpmovusqd %zmm22, 4064(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x48,0x15,0x72,0x7f]
+          vpmovusqd %zmm22, 4064(%rdx)
+
+// CHECK: vpmovusqd %zmm22, 4096(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x48,0x15,0xb2,0x00,0x10,0x00,0x00]
+          vpmovusqd %zmm22, 4096(%rdx)
+
+// CHECK: vpmovusqd %zmm22, -4096(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x48,0x15,0x72,0x80]
+          vpmovusqd %zmm22, -4096(%rdx)
+
+// CHECK: vpmovusqd %zmm22, -4128(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x48,0x15,0xb2,0xe0,0xef,0xff,0xff]
+          vpmovusqd %zmm22, -4128(%rdx)
+
+// CHECK: vpmovdb %zmm12, (%rcx)
+// CHECK:  encoding: [0x62,0x72,0x7e,0x48,0x31,0x21]
+          vpmovdb %zmm12, (%rcx)
+
+// CHECK: vpmovdb %zmm12, (%rcx) {%k3}
+// CHECK:  encoding: [0x62,0x72,0x7e,0x4b,0x31,0x21]
+          vpmovdb %zmm12, (%rcx) {%k3}
+
+// CHECK: vpmovdb %zmm12, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x32,0x7e,0x48,0x31,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vpmovdb %zmm12, 291(%rax,%r14,8)
+
+// CHECK: vpmovdb %zmm12, 2032(%rdx)
+// CHECK:  encoding: [0x62,0x72,0x7e,0x48,0x31,0x62,0x7f]
+          vpmovdb %zmm12, 2032(%rdx)
+
+// CHECK: vpmovdb %zmm12, 2048(%rdx)
+// CHECK:  encoding: [0x62,0x72,0x7e,0x48,0x31,0xa2,0x00,0x08,0x00,0x00]
+          vpmovdb %zmm12, 2048(%rdx)
+
+// CHECK: vpmovdb %zmm12, -2048(%rdx)
+// CHECK:  encoding: [0x62,0x72,0x7e,0x48,0x31,0x62,0x80]
+          vpmovdb %zmm12, -2048(%rdx)
+
+// CHECK: vpmovdb %zmm12, -2064(%rdx)
+// CHECK:  encoding: [0x62,0x72,0x7e,0x48,0x31,0xa2,0xf0,0xf7,0xff,0xff]
+          vpmovdb %zmm12, -2064(%rdx)
+
+// CHECK: vpmovsdb %zmm6, (%rcx)
+// CHECK:  encoding: [0x62,0xf2,0x7e,0x48,0x21,0x31]
+          vpmovsdb %zmm6, (%rcx)
+
+// CHECK: vpmovsdb %zmm6, (%rcx) {%k1}
+// CHECK:  encoding: [0x62,0xf2,0x7e,0x49,0x21,0x31]
+          vpmovsdb %zmm6, (%rcx) {%k1}
+
+// CHECK: vpmovsdb %zmm6, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xb2,0x7e,0x48,0x21,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vpmovsdb %zmm6, 291(%rax,%r14,8)
+
+// CHECK: vpmovsdb %zmm6, 2032(%rdx)
+// CHECK:  encoding: [0x62,0xf2,0x7e,0x48,0x21,0x72,0x7f]
+          vpmovsdb %zmm6, 2032(%rdx)
+
+// CHECK: vpmovsdb %zmm6, 2048(%rdx)
+// CHECK:  encoding: [0x62,0xf2,0x7e,0x48,0x21,0xb2,0x00,0x08,0x00,0x00]
+          vpmovsdb %zmm6, 2048(%rdx)
+
+// CHECK: vpmovsdb %zmm6, -2048(%rdx)
+// CHECK:  encoding: [0x62,0xf2,0x7e,0x48,0x21,0x72,0x80]
+          vpmovsdb %zmm6, -2048(%rdx)
+
+// CHECK: vpmovsdb %zmm6, -2064(%rdx)
+// CHECK:  encoding: [0x62,0xf2,0x7e,0x48,0x21,0xb2,0xf0,0xf7,0xff,0xff]
+          vpmovsdb %zmm6, -2064(%rdx)
+
+// CHECK: vpmovusdb %zmm23, (%rcx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x48,0x11,0x39]
+          vpmovusdb %zmm23, (%rcx)
+
+// CHECK: vpmovusdb %zmm23, (%rcx) {%k3}
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x4b,0x11,0x39]
+          vpmovusdb %zmm23, (%rcx) {%k3}
+
+// CHECK: vpmovusdb %zmm23, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x48,0x11,0xbc,0xf0,0x23,0x01,0x00,0x00]
+          vpmovusdb %zmm23, 291(%rax,%r14,8)
+
+// CHECK: vpmovusdb %zmm23, 2032(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x48,0x11,0x7a,0x7f]
+          vpmovusdb %zmm23, 2032(%rdx)
+
+// CHECK: vpmovusdb %zmm23, 2048(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x48,0x11,0xba,0x00,0x08,0x00,0x00]
+          vpmovusdb %zmm23, 2048(%rdx)
+
+// CHECK: vpmovusdb %zmm23, -2048(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x48,0x11,0x7a,0x80]
+          vpmovusdb %zmm23, -2048(%rdx)
+
+// CHECK: vpmovusdb %zmm23, -2064(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x48,0x11,0xba,0xf0,0xf7,0xff,0xff]
+          vpmovusdb %zmm23, -2064(%rdx)
+
+// CHECK: vpmovdw %zmm7, (%rcx)
+// CHECK:  encoding: [0x62,0xf2,0x7e,0x48,0x33,0x39]
+          vpmovdw %zmm7, (%rcx)
+
+// CHECK: vpmovdw %zmm7, (%rcx) {%k7}
+// CHECK:  encoding: [0x62,0xf2,0x7e,0x4f,0x33,0x39]
+          vpmovdw %zmm7, (%rcx) {%k7}
+
+// CHECK: vpmovdw %zmm7, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xb2,0x7e,0x48,0x33,0xbc,0xf0,0x23,0x01,0x00,0x00]
+          vpmovdw %zmm7, 291(%rax,%r14,8)
+
+// CHECK: vpmovdw %zmm7, 4064(%rdx)
+// CHECK:  encoding: [0x62,0xf2,0x7e,0x48,0x33,0x7a,0x7f]
+          vpmovdw %zmm7, 4064(%rdx)
+
+// CHECK: vpmovdw %zmm7, 4096(%rdx)
+// CHECK:  encoding: [0x62,0xf2,0x7e,0x48,0x33,0xba,0x00,0x10,0x00,0x00]
+          vpmovdw %zmm7, 4096(%rdx)
+
+// CHECK: vpmovdw %zmm7, -4096(%rdx)
+// CHECK:  encoding: [0x62,0xf2,0x7e,0x48,0x33,0x7a,0x80]
+          vpmovdw %zmm7, -4096(%rdx)
+
+// CHECK: vpmovdw %zmm7, -4128(%rdx)
+// CHECK:  encoding: [0x62,0xf2,0x7e,0x48,0x33,0xba,0xe0,0xef,0xff,0xff]
+          vpmovdw %zmm7, -4128(%rdx)
+
+// CHECK: vpmovsdw %zmm14, (%rcx)
+// CHECK:  encoding: [0x62,0x72,0x7e,0x48,0x23,0x31]
+          vpmovsdw %zmm14, (%rcx)
+
+// CHECK: vpmovsdw %zmm14, (%rcx) {%k6}
+// CHECK:  encoding: [0x62,0x72,0x7e,0x4e,0x23,0x31]
+          vpmovsdw %zmm14, (%rcx) {%k6}
+
+// CHECK: vpmovsdw %zmm14, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x32,0x7e,0x48,0x23,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vpmovsdw %zmm14, 291(%rax,%r14,8)
+
+// CHECK: vpmovsdw %zmm14, 4064(%rdx)
+// CHECK:  encoding: [0x62,0x72,0x7e,0x48,0x23,0x72,0x7f]
+          vpmovsdw %zmm14, 4064(%rdx)
+
+// CHECK: vpmovsdw %zmm14, 4096(%rdx)
+// CHECK:  encoding: [0x62,0x72,0x7e,0x48,0x23,0xb2,0x00,0x10,0x00,0x00]
+          vpmovsdw %zmm14, 4096(%rdx)
+
+// CHECK: vpmovsdw %zmm14, -4096(%rdx)
+// CHECK:  encoding: [0x62,0x72,0x7e,0x48,0x23,0x72,0x80]
+          vpmovsdw %zmm14, -4096(%rdx)
+
+// CHECK: vpmovsdw %zmm14, -4128(%rdx)
+// CHECK:  encoding: [0x62,0x72,0x7e,0x48,0x23,0xb2,0xe0,0xef,0xff,0xff]
+          vpmovsdw %zmm14, -4128(%rdx)
+
+// CHECK: vpmovusdw %zmm5, (%rcx)
+// CHECK:  encoding: [0x62,0xf2,0x7e,0x48,0x13,0x29]
+          vpmovusdw %zmm5, (%rcx)
+
+// CHECK: vpmovusdw %zmm5, (%rcx) {%k3}
+// CHECK:  encoding: [0x62,0xf2,0x7e,0x4b,0x13,0x29]
+          vpmovusdw %zmm5, (%rcx) {%k3}
+
+// CHECK: vpmovusdw %zmm5, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xb2,0x7e,0x48,0x13,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vpmovusdw %zmm5, 291(%rax,%r14,8)
+
+// CHECK: vpmovusdw %zmm5, 4064(%rdx)
+// CHECK:  encoding: [0x62,0xf2,0x7e,0x48,0x13,0x6a,0x7f]
+          vpmovusdw %zmm5, 4064(%rdx)
+
+// CHECK: vpmovusdw %zmm5, 4096(%rdx)
+// CHECK:  encoding: [0x62,0xf2,0x7e,0x48,0x13,0xaa,0x00,0x10,0x00,0x00]
+          vpmovusdw %zmm5, 4096(%rdx)
+
+// CHECK: vpmovusdw %zmm5, -4096(%rdx)
+// CHECK:  encoding: [0x62,0xf2,0x7e,0x48,0x13,0x6a,0x80]
+          vpmovusdw %zmm5, -4096(%rdx)
+
+// CHECK: vpmovusdw %zmm5, -4128(%rdx)
+// CHECK:  encoding: [0x62,0xf2,0x7e,0x48,0x13,0xaa,0xe0,0xef,0xff,0xff]
+          vpmovusdw %zmm5, -4128(%rdx)
+
 // CHECK: vinserti32x4
 // CHECK: encoding: [0x62,0xa3,0x55,0x48,0x38,0xcd,0x01]
 vinserti32x4  $1, %xmm21, %zmm5, %zmm17
diff --git a/test/MC/X86/x86-64.s b/test/MC/X86/x86-64.s
index 2781ef4..10d420a 100644
--- a/test/MC/X86/x86-64.s
+++ b/test/MC/X86/x86-64.s
@@ -203,7 +203,7 @@ int	$3
 // CHECK-STDERR: warning: scale factor without index register is ignored
 movaps %xmm3, (%esi, 2)
 
-// CHECK: imull $12, %eax, %eax
+// CHECK: imull $12, %eax
 imul $12, %eax
 
 // CHECK: imull %ecx, %eax
diff --git a/test/Object/Inputs/COFF/weak-external.yaml b/test/Object/Inputs/COFF/weak-external.yaml
new file mode 100644
index 0000000..064b44a
--- /dev/null
+++ b/test/Object/Inputs/COFF/weak-external.yaml
@@ -0,0 +1,43 @@
+---
+header:
+  Machine:         IMAGE_FILE_MACHINE_I386
+  Characteristics: [ IMAGE_FILE_LINE_NUMS_STRIPPED, IMAGE_FILE_32BIT_MACHINE ]
+sections:
+  - Name:            .text
+    Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ]
+    Alignment:       4
+    SectionData:     5589E583E4F0E800000000B800000000C9C39090
+    Relocations:     
+      - VirtualAddress:  7
+        SymbolName:      ___main
+        Type:            IMAGE_REL_I386_REL32
+symbols:
+  - Name:            .file
+    Value:           0
+    SectionNumber:   65534
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_NULL
+    StorageClass:    IMAGE_SYM_CLASS_FILE
+    File:            'file'
+  - Name:            .text
+    Value:           0
+    SectionNumber:   1
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_NULL
+    StorageClass:    IMAGE_SYM_CLASS_STATIC
+    SectionDefinition: 
+      Length:          18
+      NumberOfRelocations: 1
+      NumberOfLinenumbers: 0
+      CheckSum:        0
+      Number:          0
+  - Name:            ___main
+    Value:           0
+    SectionNumber:   0
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_FUNCTION
+    StorageClass:    IMAGE_SYM_CLASS_EXTERNAL
+    WeakExternal:    
+      TagIndex:        0
+      Characteristics: 0
+...
diff --git a/test/Object/Inputs/COFF/x86-64.yaml b/test/Object/Inputs/COFF/x86-64.yaml
index 1dc2b10..b8a863a 100644
--- a/test/Object/Inputs/COFF/x86-64.yaml
+++ b/test/Object/Inputs/COFF/x86-64.yaml
@@ -30,6 +30,16 @@ sections:
     Characteristics: [IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE, ] # 0xc0100040
     SectionData:  !hex "48656C6C6F20576F726C642100" # |Hello World!.|
 
+  - !Section
+    Name: '.CRT$XCU'
+    Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ ]
+    Alignment: 8
+    SectionData:  !hex "0000000000000000"
+    Relocations:
+      - VirtualAddress: 0
+        SymbolName: '??__Ex@@YAXXZ'
+        Type: IMAGE_REL_AMD64_ADDR64
+
 symbols:
   - !Symbol
     Name: .text
@@ -91,3 +101,10 @@ symbols:
     ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
     StorageClass: IMAGE_SYM_CLASS_EXTERNAL # (2)
 
+  - !Symbol
+    Name: '??__Ex@@YAXXZ'
+    Value: 0
+    SectionNumber: 3
+    SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+    ComplexType: IMAGE_SYM_DTYPE_FUNCTION # (2)
+    StorageClass: IMAGE_SYM_CLASS_STATIC # (3)
diff --git a/test/Object/Inputs/macho-text-data-bss.macho-x86_64 b/test/Object/Inputs/macho-text-data-bss.macho-x86_64
new file mode 100644
index 0000000..b7628c8
--- /dev/null
+++ b/test/Object/Inputs/macho-text-data-bss.macho-x86_64
diff --git a/test/Object/Inputs/macho-universal-archive.x86_64.i386 b/test/Object/Inputs/macho-universal-archive.x86_64.i386
new file mode 100644
index 0000000..1660714
--- /dev/null
+++ b/test/Object/Inputs/macho-universal-archive.x86_64.i386
diff --git a/test/Object/Inputs/relocation-dynamic.elf-i386 b/test/Object/Inputs/relocation-dynamic.elf-i386
new file mode 100755
index 0000000..1548f13
--- /dev/null
+++ b/test/Object/Inputs/relocation-dynamic.elf-i386
diff --git a/test/Object/Inputs/relocation-relocatable.elf-i386 b/test/Object/Inputs/relocation-relocatable.elf-i386
new file mode 100644
index 0000000..b8f375b
--- /dev/null
+++ b/test/Object/Inputs/relocation-relocatable.elf-i386
diff --git a/test/Object/Inputs/trivial-object-test.coff-x86-64 b/test/Object/Inputs/trivial-object-test.coff-x86-64
index 0775914..ed144d1 100644
--- a/test/Object/Inputs/trivial-object-test.coff-x86-64
+++ b/test/Object/Inputs/trivial-object-test.coff-x86-64
diff --git a/test/Object/X86/objdump-cfg-invalid-opcode.yaml b/test/Object/X86/objdump-cfg-invalid-opcode.yaml
index 56ab1d2..d0a29be 100644
--- a/test/Object/X86/objdump-cfg-invalid-opcode.yaml
+++ b/test/Object/X86/objdump-cfg-invalid-opcode.yaml
@@ -38,7 +38,7 @@ Sections:
 #CFG:     Type:            Data
 
 ##    4:   06                      (bad)
-#CFG:     Content:         06
+#CFG:     Content:         '06'
 
 #CFG:   - StartAddress:    0x0000000000000005
 #CFG:     Size:            1
diff --git a/test/Object/X86/objdump-disassembly-symbolic.test b/test/Object/X86/objdump-disassembly-symbolic.test
index 858653e..95a5fc8 100644
--- a/test/Object/X86/objdump-disassembly-symbolic.test
+++ b/test/Object/X86/objdump-disassembly-symbolic.test
@@ -46,3 +46,23 @@ MACHO-STUBS-x86-64:     1faa:       e8 09 00 00 00
 MACHO-STUBS-x86-64:     1faf:       8b 44 24 04                                     movl    4(%rsp), %eax
 MACHO-STUBS-x86-64:     1fb3:       48 83 c4 08                                     addq    $8, %rsp
 MACHO-STUBS-x86-64:     1fb7:       c3                                              ret
+
+
+RUN: llvm-objdump -d -symbolize %p/../Inputs/relocation-relocatable.elf-i386 \
+RUN:              | FileCheck %s -check-prefix ELF-i386-REL
+
+ELF-i386-REL: Disassembly of section .text:
+ELF-i386-REL-NEXT: f:
+ELF-i386-REL-NEXT:       0:	e9 fc ff ff ff                	jmp	h
+ELF-i386-REL:      g:
+ELF-i386-REL-NEXT:       5:	e9 fc ff ff ff                 	jmp	f
+
+
+RUN: llvm-objdump -d -symbolize %p/../Inputs/relocation-dynamic.elf-i386 \
+RUN:              | FileCheck %s -check-prefix ELF-i386-DYN
+
+ELF-i386-DYN: Disassembly of section .text:
+ELF-i386-DYN-NEXT: f:
+ELF-i386-DYN-NEXT:      1a4:	e9 fc ff ff ff                 	jmp	h
+ELF-i386-DYN:      g:
+ELF-i386-DYN-NEXT:      1a9:	e9 fc ff ff ff                 	jmp	f
diff --git a/test/Object/X86/yaml2obj-elf-x86-rel.yaml b/test/Object/X86/yaml2obj-elf-x86-rel.yaml
new file mode 100644
index 0000000..5ca6614
--- /dev/null
+++ b/test/Object/X86/yaml2obj-elf-x86-rel.yaml
@@ -0,0 +1,41 @@
+# RUN: yaml2obj -format=elf %s > %t
+# RUN: llvm-readobj -r %t | FileCheck %s
+
+# CHECK:      Relocations [
+# CHECK-NEXT:   Section (2) .rel.text {
+# CHECK-NEXT:     0x0 R_386_32 main 0x0
+# CHECK-NEXT:   }
+# CHECK-NEXT: ]
+
+FileHeader:
+  Class:           ELFCLASS32
+  Data:            ELFDATA2LSB
+  Type:            ET_REL
+  Machine:         EM_386
+Sections:
+  - Type:            SHT_PROGBITS
+    Name:            .text
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    AddressAlign:    0x04
+    Content:         0000000000000000
+  - Type:            SHT_REL
+    Name:            .rel.text
+    Link:            .symtab
+    Info:            .text
+    AddressAlign:    0x04
+    Relocations:
+      - Offset:          0
+        Symbol:          main
+        Type:            R_386_32
+
+Symbols:
+  Local:
+    - Name:            .text
+      Type:            STT_SECTION
+      Section:         .text
+
+  Global:
+    - Name:            main
+      Type:            STT_FUNC
+      Section:         .text
+      Size:            0x08
diff --git a/test/Object/ar-error.test b/test/Object/ar-error.test
new file mode 100644
index 0000000..7add9b4
--- /dev/null
+++ b/test/Object/ar-error.test
@@ -0,0 +1,6 @@
+Test if we get a proper error with a filename that doesn't exist
+
+RUN: not llvm-ar r %t.out.a sparkle.o %t 2>&1 | FileCheck %s
+
+# Don't check the message "No such file or directory".
+CHECK: llvm-ar{{(.exe|.EXE)?}}: sparkle.o:
diff --git a/test/Object/archive-long-index.test b/test/Object/archive-long-index.test
index bd530ed..f2f4df6 100644
--- a/test/Object/archive-long-index.test
+++ b/test/Object/archive-long-index.test
@@ -17,24 +17,24 @@ CHECKIDX: b in abcdefghijklmnopqrstuvwxyz2.o
 CHECKIDX: bda in abcdefghijklmnopqrstuvwxyz2.o
 CHECKIDX: b in abcdefghijklmnopq.o
 CHECKIDX: 1.o:
-CHECKIDX: 00000000 D abcdefghijklmnopqrstuvwxyz12345678
-CHECKIDX:          U bda
-CHECKIDX: 00000000 T main
+CHECKIDX: 0000000000000000 D abcdefghijklmnopqrstuvwxyz12345678
+CHECKIDX:                  U bda
+CHECKIDX: 0000000000000000 T main
 CHECKIDX: 2.o:
-CHECKIDX: 00000000 T fn1
+CHECKIDX: 0000000000000000 T fn1
 CHECKIDX: 3.o:
-CHECKIDX: 0000000b T fn1
-CHECKIDX: 00000000 T fn3
+CHECKIDX: 000000000000000b T fn1
+CHECKIDX: 0000000000000000 T fn3
 CHECKIDX: 4.o:
-CHECKIDX:          C shankar
+CHECKIDX:                  C shankar
 CHECKIDX: 5.o:
-CHECKIDX:          C a
+CHECKIDX:                  C a
 CHECKIDX: 6.o:
-CHECKIDX:          C b
+CHECKIDX:                  C b
 CHECKIDX: abcdefghijklmnopqrstuvwxyz1.o:
-CHECKIDX:          C a
+CHECKIDX:                  C a
 CHECKIDX: abcdefghijklmnopqrstuvwxyz2.o:
-CHECKIDX:          C b
-CHECKIDX: 00000000 T bda
+CHECKIDX:                  C b
+CHECKIDX: 0000000000000000 T bda
 CHECKIDX: abcdefghijklmnopq.o:
-CHECKIDX:          C b
+CHECKIDX:                  C b
diff --git a/test/Object/archive-symtab.test b/test/Object/archive-symtab.test
index 6379504..88c9c98 100644
--- a/test/Object/archive-symtab.test
+++ b/test/Object/archive-symtab.test
@@ -9,13 +9,13 @@ CHECK-NEXT: main in trivial-object-test2.elf-x86-64
 CHECK-NOT: bar
 
 CHECK: trivial-object-test.elf-x86-64:
-CHECK-NEXT:         U SomeOtherFunction
-CHECK-NEXT: 00000000 T main
-CHECK-NEXT:         U puts
+CHECK-NEXT:                  U SomeOtherFunction
+CHECK-NEXT: 0000000000000000 T main
+CHECK-NEXT:                  U puts
 CHECK-NEXT: trivial-object-test2.elf-x86-64:
-CHECK-NEXT: 00000000 t bar
-CHECK-NEXT: 00000006 T foo
-CHECK-NEXT: 00000016 T main
+CHECK-NEXT: 0000000000000000 t bar
+CHECK-NEXT: 0000000000000006 T foo
+CHECK-NEXT: 0000000000000016 T main
 
 RUN: rm -f %t.a
 RUN: llvm-ar rcS %t.a %p/Inputs/trivial-object-test.elf-x86-64 %p/Inputs/trivial-object-test2.elf-x86-64
@@ -37,13 +37,13 @@ CORRUPT-NEXT: foo in trivial-object-test2.elf-x86-64
 CORRUPT-NEXT: main in trivial-object-test2.elf-x86-64
 
 CORRUPT: trivial-object-test.elf-x86-64:
-CORRUPT-NEXT:         U SomeOtherFunction
-CORRUPT-NEXT: 00000000 T main
-CORRUPT-NEXT:         U puts
+CORRUPT-NEXT:                  U SomeOtherFunction
+CORRUPT-NEXT: 0000000000000000 T main
+CORRUPT-NEXT:                  U puts
 CORRUPT-NEXT: trivial-object-test2.elf-x86-64:
-CORRUPT-NEXT: 00000000 t bar
-CORRUPT-NEXT: 00000006 T foo
-CORRUPT-NEXT: 00000016 T main
+CORRUPT-NEXT: 0000000000000000 t bar
+CORRUPT-NEXT: 0000000000000006 T foo
+CORRUPT-NEXT: 0000000000000016 T main
 
 check that the we *don't* update the symbol table.
 RUN: llvm-ar s %t.a
diff --git a/test/Object/archive-toc.test b/test/Object/archive-toc.test
index 0a5e72b..4195c40 100644
--- a/test/Object/archive-toc.test
+++ b/test/Object/archive-toc.test
@@ -1,20 +1,20 @@
 Test reading an archive created by gnu ar
 RUN: env TZ=GMT llvm-ar tv %p/Inputs/GNU.a | FileCheck %s --check-prefix=GNU -strict-whitespace
 
-GNU:      rw-r--r-- 500/500      8 Nov 19 02:57 2004 evenlen
-GNU-NEXT: rw-r--r-- 500/500      7 Nov 19 02:57 2004 oddlen
-GNU-NEXT: rwxr-xr-x 500/500   1465 Nov 19 03:01 2004 very_long_bytecode_file_name.bc
-GNU-NEXT: rw-r--r-- 500/500   2280 Nov 19 03:04 2004 IsNAN.o
+GNU:      rw-r--r-- 500/500      8 2004-11-19 02:57:37.000000000 evenlen
+GNU-NEXT: rw-r--r-- 500/500      7 2004-11-19 02:57:21.000000000 oddlen
+GNU-NEXT: rwxr-xr-x 500/500   1465 2004-11-19 03:01:31.000000000 very_long_bytecode_file_name.bc
+GNU-NEXT: rw-r--r-- 500/500   2280 2004-11-19 03:04:30.000000000 IsNAN.o
 
 
 Test reading an archive createdy by Mac OS X ar
 RUN: env TZ=GMT llvm-ar tv %p/Inputs/MacOSX.a | FileCheck %s --check-prefix=OSX -strict-whitespace
 
 OSX-NOT: __.SYMDEF
-OSX:      rw-r--r-- 501/501      8 Nov 19 02:57 2004 evenlen
-OSX-NEXT: rw-r--r-- 501/501      8 Nov 19 02:57 2004 oddlen
-OSX-NEXT: rw-r--r-- 502/502   1465 Feb  4 06:59 2010 very_long_bytecode_file_name.bc
-OSX-NEXT: rw-r--r-- 501/501   2280 Nov 19 04:32 2004 IsNAN.o
+OSX:      rw-r--r-- 501/501      8 2004-11-19 02:57:37.000000000 evenlen
+OSX-NEXT: rw-r--r-- 501/501      8 2004-11-19 02:57:21.000000000 oddlen
+OSX-NEXT: rw-r--r-- 502/502   1465 2010-02-04 06:59:14.000000000 very_long_bytecode_file_name.bc
+OSX-NEXT: rw-r--r-- 501/501   2280 2004-11-19 04:32:06.000000000 IsNAN.o
 
 Test reading an archive created on Solaris by /usr/ccs/bin/ar
 RUN: env TZ=GMT llvm-ar tv %p/Inputs/SVR4.a | FileCheck %s -strict-whitespace
@@ -22,7 +22,7 @@ RUN: env TZ=GMT llvm-ar tv %p/Inputs/SVR4.a | FileCheck %s -strict-whitespace
 Test reading an archive created on Solaris by /usr/xpg4/bin/ar
 RUN: env TZ=GMT llvm-ar tv %p/Inputs/xpg4.a | FileCheck %s -strict-whitespace
 
-CHECK:      rw-r--r-- 1002/102      8 Nov 19 03:24 2004 evenlen
-CHECK-NEXT: rw-r--r-- 1002/102      7 Nov 19 03:24 2004 oddlen
-CHECK-NEXT: rwxr-xr-x 1002/102   1465 Nov 19 03:24 2004 very_long_bytecode_file_name.bc
-CHECK-NEXT: rw-r--r-- 1002/102   2280 Nov 19 03:24 2004 IsNAN.o
+CHECK:      rw-r--r-- 1002/102      8 2004-11-19 03:24:02.000000000 evenlen
+CHECK-NEXT: rw-r--r-- 1002/102      7 2004-11-19 03:24:02.000000000 oddlen
+CHECK-NEXT: rwxr-xr-x 1002/102   1465 2004-11-19 03:24:02.000000000 very_long_bytecode_file_name.bc
+CHECK-NEXT: rw-r--r-- 1002/102   2280 2004-11-19 03:24:02.000000000 IsNAN.o
diff --git a/test/Object/extract.ll b/test/Object/extract.ll
index 9f93c68..a4e7649 100644
--- a/test/Object/extract.ll
+++ b/test/Object/extract.ll
@@ -44,4 +44,4 @@
 ; RUN: llvm-ar rc %t.a very_long_bytecode_file_name.bc
 ; RUN: env TZ=GMT llvm-ar tv %t.a | FileCheck %s
 
-CHECK: 1465 Nov 19 03:01 2004 very_long_bytecode_file_name.bc
+CHECK: 1465 2004-11-19 03:01:31.000000000 very_long_bytecode_file_name.bc
diff --git a/test/Object/nm-shared-object.test b/test/Object/nm-shared-object.test
index b77b2ce..32ae6a8 100644
--- a/test/Object/nm-shared-object.test
+++ b/test/Object/nm-shared-object.test
@@ -2,25 +2,25 @@ RUN: llvm-nm -D %p/Inputs/shared-object-test.elf-i386 \
 RUN:         | FileCheck %s -check-prefix ELF-32
 
 ELF-32-NOT: U
-ELF-32: 0012c8 A __bss_start
-ELF-32: 0012c8 A _edata
-ELF-32: 0012cc A _end
-ELF-32: 0012c8 B common_sym
-ELF-32: 0012c4 D defined_sym
-ELF-32: 0001f0 T global_func
-ELF-32: 000000 D tls_sym
+ELF-32: 000012c8 A __bss_start
+ELF-32: 000012c8 A _edata
+ELF-32: 000012cc A _end
+ELF-32: 000012c8 B common_sym
+ELF-32: 000012c4 D defined_sym
+ELF-32: 000001f0 T global_func
+ELF-32: 00000000 D tls_sym
 
 RUN: llvm-nm -D %p/Inputs/shared-object-test.elf-x86-64 \
 RUN:         | FileCheck %s -check-prefix ELF-64
 
 ELF-64-NOT: U
-ELF-64: 200454 A __bss_start
-ELF-64: 200454 A _edata
-ELF-64: 200458 A _end
-ELF-64: 200454 B common_sym
-ELF-64: 200450 D defined_sym
-ELF-64: 0002f0 T global_func
-ELF-64: 000000 D tls_sym
+ELF-64: 0000000000200454 A __bss_start
+ELF-64: 0000000000200454 A _edata
+ELF-64: 0000000000200458 A _end
+ELF-64: 0000000000200454 B common_sym
+ELF-64: 0000000000200450 D defined_sym
+ELF-64: 00000000000002f0 T global_func
+ELF-64: 0000000000000000 D tls_sym
 
 RUN: not llvm-nm -D %p/Inputs/weak-global-symbol.macho-i386 2>&1 \
 RUN:         | FileCheck %s -check-prefix ERROR
diff --git a/test/Object/nm-trivial-object.test b/test/Object/nm-trivial-object.test
index 4e90f96..20ac662 100644
--- a/test/Object/nm-trivial-object.test
+++ b/test/Object/nm-trivial-object.test
@@ -5,11 +5,11 @@ RUN:         | FileCheck %s -check-prefix COFF
 RUN: llvm-nm %p/Inputs/trivial-object-test.elf-i386 \
 RUN:         | FileCheck %s -check-prefix ELF
 RUN: llvm-nm %p/Inputs/trivial-object-test.elf-x86-64 \
-RUN:         | FileCheck %s -check-prefix ELF
+RUN:         | FileCheck %s -check-prefix ELF64
 RUN: llvm-nm %p/Inputs/weak.elf-x86-64 \
-RUN:         | FileCheck %s -check-prefix WEAK-ELF
+RUN:         | FileCheck %s -check-prefix WEAK-ELF64
 RUN: llvm-nm %p/Inputs/absolute.elf-x86-64 \
-RUN:         | FileCheck %s -check-prefix ABSOLUTE-ELF
+RUN:         | FileCheck %s -check-prefix ABSOLUTE-ELF64
 RUN: llvm-nm %p/Inputs/trivial-object-test.macho-i386 \
 RUN:         | FileCheck %s -check-prefix macho
 RUN: llvm-nm %p/Inputs/trivial-object-test.macho-x86-64 \
@@ -17,7 +17,7 @@ RUN:         | FileCheck %s -check-prefix macho64
 RUN: llvm-nm %p/Inputs/common.coff-i386 \
 RUN:         | FileCheck %s -check-prefix COFF-COMMON
 RUN: llvm-nm %p/Inputs/relocatable-with-section-address.elf-x86-64 \
-RUN:         | FileCheck %s -check-prefix ELF-SEC-ADDR
+RUN:         | FileCheck %s -check-prefix ELF-SEC-ADDR64
 RUN: llvm-nm %p/Inputs/thumb-symbols.elf.arm \
 RUN:         | FileCheck %s -check-prefix ELF-THUMB
 
@@ -43,30 +43,34 @@ ELF:          U SomeOtherFunction
 ELF: 00000000 T main
 ELF:          U puts
 
-WEAK-ELF:          w f1
-WEAK-ELF: 00000000 W f2
-WEAK-ELF:          v x1
-WEAK-ELF: 00000000 V x2
+ELF64:                  U SomeOtherFunction
+ELF64: 0000000000000000 T main
+ELF64:                  U puts
+
+WEAK-ELF64:                  w f1
+WEAK-ELF64: 0000000000000000 W f2
+WEAK-ELF64:                  v x1
+WEAK-ELF64: 0000000000000000 V x2
 
-ABSOLUTE-ELF: 00000123 a a1
-ABSOLUTE-ELF: 00000123 A a2
+ABSOLUTE-ELF64: 0000000000000123 a a1
+ABSOLUTE-ELF64: 0000000000000123 A a2
 
-macho: 00000000 U _SomeOtherFunction
+macho:          U _SomeOtherFunction
 macho: 00000000 T _main
-macho: 00000000 U _puts
+macho:          U _puts
 
-macho64: 00000028 s L_.str
-macho64: 00000000 U _SomeOtherFunction
-macho64: 00000000 T _main
-macho64: 00000000 U _puts
+macho64: 0000000000000028 s L_.str
+macho64:                  U _SomeOtherFunction
+macho64: 0000000000000000 T _main
+macho64:                  U _puts
 
 
 Test that nm uses addresses even with ELF .o files.
-ELF-SEC-ADDR:      00000058 D a
-ELF-SEC-ADDR-NEXT: 0000005c D b
-ELF-SEC-ADDR-NEXT: 00000040 T f
-ELF-SEC-ADDR-NEXT: 00000050 T g
-ELF-SEC-ADDR-NEXT: 00000060 D p
+ELF-SEC-ADDR64:      0000000000000058 D a
+ELF-SEC-ADDR64-NEXT: 000000000000005c D b
+ELF-SEC-ADDR64-NEXT: 0000000000000040 T f
+ELF-SEC-ADDR64-NEXT: 0000000000000050 T g
+ELF-SEC-ADDR64-NEXT: 0000000000000060 D p
 
 
 Test that we drop the thumb bit only from function addresses.
diff --git a/test/Object/nm-universal-binary.test b/test/Object/nm-universal-binary.test
index 8febfdf..c20c733 100644
--- a/test/Object/nm-universal-binary.test
+++ b/test/Object/nm-universal-binary.test
@@ -1,6 +1,19 @@
-RUN: llvm-nm %p/Inputs/macho-universal.x86_64.i386 | FileCheck %s
+RUN: llvm-nm %p/Inputs/macho-universal.x86_64.i386 \
+RUN:         | FileCheck %s -check-prefix CHECK-OBJ
+RUN: llvm-nm %p/Inputs/macho-universal-archive.x86_64.i386 \
+RUN:         | FileCheck %s -check-prefix CHECK-AR
 
-CHECK: macho-universal.x86_64.i386:x86_64
-CHECK: main
-CHECK: macho-universal.x86_64.i386:i386
-CHECK: main
+CHECK-OBJ: macho-universal.x86_64.i386:x86_64
+CHECK-OBJ: 0000000100000f60 T _main
+CHECK-OBJ: macho-universal.x86_64.i386:i386
+CHECK-OBJ: 00001fa0 T _main
+
+CHECK-AR: macho-universal-archive.x86_64.i386:x86_64:hello.o:
+CHECK-AR: 0000000000000068 s EH_frame0
+CHECK-AR: 000000000000003b s L_.str
+CHECK-AR: 0000000000000000 T _main
+CHECK-AR: 0000000000000080 S _main.eh
+CHECK-AR:                  U _printf
+CHECK-AR: macho-universal-archive.x86_64.i386:i386:foo.o:
+CHECK-AR: 00000008 S _bar
+CHECK-AR: 00000000 T _foo
diff --git a/test/Object/obj2yaml-coff-weak-external.test b/test/Object/obj2yaml-coff-weak-external.test
new file mode 100644
index 0000000..4ecdc1b
--- /dev/null
+++ b/test/Object/obj2yaml-coff-weak-external.test
@@ -0,0 +1,3 @@
+RUN: yaml2obj %p/Inputs/COFF/weak-external.yaml | obj2yaml | FileCheck %s --check-prefix COFF-I386
+
+COFF-I386: Characteristics: 0
diff --git a/test/Object/obj2yaml.test b/test/Object/obj2yaml.test
index d96275f..1c15263 100644
--- a/test/Object/obj2yaml.test
+++ b/test/Object/obj2yaml.test
@@ -1,6 +1,8 @@
 RUN: obj2yaml %p/Inputs/trivial-object-test.coff-i386 | FileCheck %s --check-prefix COFF-I386
 RUN: obj2yaml %p/Inputs/trivial-object-test.coff-x86-64 | FileCheck %s --check-prefix COFF-X86-64
-
+RUN: obj2yaml %p/Inputs/trivial-object-test.elf-mipsel | FileCheck %s --check-prefix ELF-MIPSEL
+RUN: obj2yaml %p/Inputs/trivial-object-test.elf-mips64el | FileCheck %s --check-prefix ELF-MIPS64EL
+RUN: obj2yaml %p/Inputs/trivial-object-test.elf-x86-64 | FileCheck %s --check-prefix ELF-X86-64
 
 COFF-I386: header:
 COFF-I386-NEXT:  Machine: IMAGE_FILE_MACHINE_I386
@@ -112,6 +114,16 @@ COFF-X86-64-NEXT:     Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_S
 COFF-X86-64-NEXT:     Alignment: 1
 COFF-X86-64-NEXT:     SectionData: 48656C6C6F20576F726C642100
 
+COFF-X86-64:        - Name: '.CRT$XCU'
+COFF-X86-64-NEXT:     Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ ]
+COFF-X86-64-NEXT:     Alignment: 8
+COFF-X86-64-NEXT:     SectionData:  '0000000000000000'
+
+COFF-X86-64:     Relocations:
+COFF-X86-64-NEXT:       - VirtualAddress: 0
+COFF-X86-64-NEXT:         SymbolName: '??__Ex@@YAXXZ'
+COFF-X86-64-NEXT:         Type: IMAGE_REL_AMD64_ADDR64
+
 COFF-X86-64: symbols:
 COFF-X86-64-NEXT:   - Name: .text
 COFF-X86-64-NEXT:     Value: 0
@@ -143,7 +155,7 @@ COFF-X86-64:        - Name: main
 COFF-X86-64-NEXT:     Value: 0
 COFF-X86-64-NEXT:     SectionNumber: 1
 COFF-X86-64-NEXT:     SimpleType: IMAGE_SYM_TYPE_NULL
-COFF-X86-64-NEXT:     ComplexType: IMAGE_SYM_DTYPE_NULL
+COFF-X86-64-NEXT:     ComplexType: IMAGE_SYM_DTYPE_FUNCTION
 COFF-X86-64-NEXT:     StorageClass: IMAGE_SYM_CLASS_EXTERNAL
 
 COFF-X86-64:        - Name: L.str
@@ -166,4 +178,230 @@ COFF-X86-64-NEXT:     SectionNumber: 0
 COFF-X86-64-NEXT:     SimpleType: IMAGE_SYM_TYPE_NULL
 COFF-X86-64-NEXT:     ComplexType: IMAGE_SYM_DTYPE_NULL
 COFF-X86-64-NEXT:     StorageClass: IMAGE_SYM_CLASS_EXTERNAL
-COFF-X86-64-NOT:      NumberOfAuxSymbols
+
+COFF-X86-64:        - Name: '??__Ex@@YAXXZ'
+COFF-X86-64-NEXT:     Value: 0
+COFF-X86-64-NEXT:     SectionNumber: 3
+COFF-X86-64-NEXT:     SimpleType: IMAGE_SYM_TYPE_NULL
+COFF-X86-64-NEXT:     ComplexType: IMAGE_SYM_DTYPE_FUNCTION
+COFF-X86-64-NEXT:     StorageClass: IMAGE_SYM_CLASS_STATIC
+
+ELF-MIPSEL:      FileHeader:
+ELF-MIPSEL-NEXT:   Class:           ELFCLASS32
+ELF-MIPSEL-NEXT:   Data:            ELFDATA2LSB
+ELF-MIPSEL-NEXT:   Type:            ET_REL
+ELF-MIPSEL-NEXT:   Machine:         EM_MIPS
+ELF-MIPSEL-NEXT:   Flags:           [ EF_MIPS_NOREORDER, EF_MIPS_PIC, EF_MIPS_CPIC, EF_MIPS_ABI_O32, EF_MIPS_ARCH_32 ]
+ELF-MIPSEL-NEXT: Sections:
+ELF-MIPSEL-NEXT:   - Name:            .text
+ELF-MIPSEL-NEXT:     Type:            SHT_PROGBITS
+ELF-MIPSEL-NEXT:     Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+ELF-MIPSEL-NEXT:     AddressAlign:    0x0000000000000004
+ELF-MIPSEL-NEXT:     Content:         0000023C00004224E8FFBD271400BFAF1000B0AF218059000000018E000024240000198E09F8200321E000020000198E09F8200321E00002000002241000B08F1400BF8F0800E0031800BD27
+ELF-MIPSEL-NEXT:   - Name:            .rel.text
+ELF-MIPSEL-NEXT:     Type:            SHT_REL
+ELF-MIPSEL-NEXT:     Link:            .symtab
+ELF-MIPSEL-NEXT:     Info:            .text
+ELF-MIPSEL-NEXT:     AddressAlign:    0x0000000000000004
+ELF-MIPSEL-NEXT:     Relocations:
+ELF-MIPSEL-NEXT:       - Offset:          0
+ELF-MIPSEL-NEXT:         Symbol:          _gp_disp
+ELF-MIPSEL-NEXT:         Type:            R_MIPS_HI16
+ELF-MIPSEL-NEXT:         Addend:          0
+ELF-MIPSEL-NEXT:       - Offset:          0x0000000000000004
+ELF-MIPSEL-NEXT:         Symbol:          _gp_disp
+ELF-MIPSEL-NEXT:         Type:            R_MIPS_LO16
+ELF-MIPSEL-NEXT:         Addend:          0
+ELF-MIPSEL-NEXT:       - Offset:          0x0000000000000018
+ELF-MIPSEL-NEXT:         Symbol:          '$.str'
+ELF-MIPSEL-NEXT:         Type:            R_MIPS_GOT16
+ELF-MIPSEL-NEXT:         Addend:          0
+ELF-MIPSEL-NEXT:       - Offset:          0x000000000000001C
+ELF-MIPSEL-NEXT:         Symbol:          '$.str'
+ELF-MIPSEL-NEXT:         Type:            R_MIPS_LO16
+ELF-MIPSEL-NEXT:         Addend:          0
+ELF-MIPSEL-NEXT:       - Offset:          0x0000000000000020
+ELF-MIPSEL-NEXT:         Symbol:          puts
+ELF-MIPSEL-NEXT:         Type:            R_MIPS_CALL16
+ELF-MIPSEL-NEXT:         Addend:          0
+ELF-MIPSEL-NEXT:       - Offset:          0x000000000000002C
+ELF-MIPSEL-NEXT:         Symbol:          SomeOtherFunction
+ELF-MIPSEL-NEXT:         Type:            R_MIPS_CALL16
+ELF-MIPSEL-NEXT:         Addend:          0
+ELF-MIPSEL-NEXT:   - Name:            .data
+ELF-MIPSEL-NEXT:     Type:            SHT_PROGBITS
+ELF-MIPSEL-NEXT:     Flags:           [ SHF_WRITE, SHF_ALLOC ]
+ELF-MIPSEL-NEXT:     AddressAlign:    0x0000000000000004
+ELF-MIPSEL-NEXT:     Content:         ''
+ELF-MIPSEL-NEXT:   - Name:            .bss
+ELF-MIPSEL-NEXT:     Type:            SHT_NOBITS
+ELF-MIPSEL-NEXT:     Flags:           [ SHF_WRITE, SHF_ALLOC ]
+ELF-MIPSEL-NEXT:     AddressAlign:    0x0000000000000004
+ELF-MIPSEL-NEXT:     Content:         ''
+ELF-MIPSEL-NEXT:   - Name:            .rodata.str1.1
+ELF-MIPSEL-NEXT:     Type:            SHT_PROGBITS
+ELF-MIPSEL-NEXT:     Flags:           [ SHF_ALLOC, SHF_MERGE, SHF_STRINGS ]
+ELF-MIPSEL-NEXT:     AddressAlign:    0x0000000000000001
+ELF-MIPSEL-NEXT:     Content:         48656C6C6F20576F726C640A00
+ELF-MIPSEL-NEXT:   - Name:            .reginfo
+ELF-MIPSEL-NEXT:     Type:            SHT_MIPS_REGINFO
+ELF-MIPSEL-NEXT:     Flags:           [ SHF_ALLOC ]
+ELF-MIPSEL-NEXT:     AddressAlign:    0x0000000000000001
+ELF-MIPSEL-NEXT:     Content:         '000000000000000000000000000000000000000000000000'
+ELF-MIPSEL-NEXT: Symbols:
+ELF-MIPSEL-NEXT:   Local:
+ELF-MIPSEL-NEXT:     - Name:            trivial.ll
+ELF-MIPSEL-NEXT:       Type:            STT_FILE
+ELF-MIPSEL-NEXT:     - Name:            '$.str'
+ELF-MIPSEL-NEXT:       Type:            STT_OBJECT
+ELF-MIPSEL-NEXT:       Section:         .rodata.str1.1
+ELF-MIPSEL-NEXT:       Size:            0x000000000000000D
+ELF-MIPSEL-NEXT:     - Name:            .text
+ELF-MIPSEL-NEXT:       Type:            STT_SECTION
+ELF-MIPSEL-NEXT:       Section:         .text
+ELF-MIPSEL-NEXT:     - Name:            .data
+ELF-MIPSEL-NEXT:       Type:            STT_SECTION
+ELF-MIPSEL-NEXT:       Section:         .data
+ELF-MIPSEL-NEXT:     - Name:            .bss
+ELF-MIPSEL-NEXT:       Type:            STT_SECTION
+ELF-MIPSEL-NEXT:       Section:         .bss
+ELF-MIPSEL-NEXT:     - Name:            .rodata.str1.1
+ELF-MIPSEL-NEXT:       Type:            STT_SECTION
+ELF-MIPSEL-NEXT:       Section:         .rodata.str1.1
+ELF-MIPSEL-NEXT:     - Name:            .reginfo
+ELF-MIPSEL-NEXT:       Type:            STT_SECTION
+ELF-MIPSEL-NEXT:       Section:         .reginfo
+ELF-MIPSEL-NEXT:   Global:
+ELF-MIPSEL-NEXT:     - Name:            main
+ELF-MIPSEL-NEXT:       Type:            STT_FUNC
+ELF-MIPSEL-NEXT:       Section:         .text
+ELF-MIPSEL-NEXT:       Size:            0x000000000000004C
+ELF-MIPSEL-NEXT:     - Name:            SomeOtherFunction
+ELF-MIPSEL-NEXT:     - Name:            _gp_disp
+ELF-MIPSEL-NEXT:     - Name:            puts
+
+ELF-MIPS64EL:      FileHeader:
+ELF-MIPS64EL-NEXT:   Class:           ELFCLASS64
+ELF-MIPS64EL-NEXT:   Data:            ELFDATA2LSB
+ELF-MIPS64EL-NEXT:   Type:            ET_REL
+ELF-MIPS64EL-NEXT:   Machine:         EM_MIPS
+ELF-MIPS64EL-NEXT:   Flags:           [ EF_MIPS_ARCH_3 ]
+ELF-MIPS64EL-NEXT: Sections:
+ELF-MIPS64EL-NEXT:   - Name:            .text
+ELF-MIPS64EL-NEXT:     Type:            SHT_PROGBITS
+ELF-MIPS64EL-NEXT:     Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+ELF-MIPS64EL-NEXT:     AddressAlign:    0x0000000000000010
+ELF-MIPS64EL-NEXT:     Content:         ''
+ELF-MIPS64EL-NEXT:   - Name:            .data
+ELF-MIPS64EL-NEXT:     Type:            SHT_PROGBITS
+ELF-MIPS64EL-NEXT:     Flags:           [ SHF_WRITE, SHF_ALLOC ]
+ELF-MIPS64EL-NEXT:     AddressAlign:    0x0000000000000010
+ELF-MIPS64EL-NEXT:     Content:         '00000000000000000000000000000000'
+ELF-MIPS64EL-NEXT:   - Name:            .rela.data
+ELF-MIPS64EL-NEXT:     Type:            SHT_RELA
+ELF-MIPS64EL-NEXT:     Link:            .symtab
+ELF-MIPS64EL-NEXT:     Info:            .data
+ELF-MIPS64EL-NEXT:     AddressAlign:    0x0000000000000008
+ELF-MIPS64EL-NEXT:     Relocations:
+ELF-MIPS64EL-NEXT:       - Offset:          0
+ELF-MIPS64EL-NEXT:         Symbol:          zed
+ELF-MIPS64EL-NEXT:         Type:            R_MIPS_64
+ELF-MIPS64EL-NEXT:         Addend:          0
+ELF-MIPS64EL-NEXT:   - Name:            .bss
+ELF-MIPS64EL-NEXT:     Type:            SHT_NOBITS
+ELF-MIPS64EL-NEXT:     Flags:           [ SHF_WRITE, SHF_ALLOC ]
+ELF-MIPS64EL-NEXT:     AddressAlign:    0x0000000000000010
+ELF-MIPS64EL-NEXT:     Content:         ''
+ELF-MIPS64EL-NEXT:   - Name:            .MIPS.options
+ELF-MIPS64EL-NEXT:     Type:            SHT_MIPS_OPTIONS
+ELF-MIPS64EL-NEXT:     Flags:           [ SHF_ALLOC ]
+ELF-MIPS64EL-NEXT:     AddressAlign:    0x0000000000000008
+ELF-MIPS64EL-NEXT:     Content:         '01280000000000000000000000000000000000000000000000000000000000000000000000000000'
+ELF-MIPS64EL-NEXT:   - Name:            .pdr
+ELF-MIPS64EL-NEXT:     Type:            SHT_PROGBITS
+ELF-MIPS64EL-NEXT:     AddressAlign:    0x0000000000000004
+ELF-MIPS64EL-NEXT:     Content:         ''
+ELF-MIPS64EL-NEXT: Symbols:
+ELF-MIPS64EL-NEXT:   Local:
+ELF-MIPS64EL-NEXT:     - Name:            .text
+ELF-MIPS64EL-NEXT:       Type:            STT_SECTION
+ELF-MIPS64EL-NEXT:       Section:         .text
+ELF-MIPS64EL-NEXT:     - Name:            .data
+ELF-MIPS64EL-NEXT:       Type:            STT_SECTION
+ELF-MIPS64EL-NEXT:       Section:         .data
+ELF-MIPS64EL-NEXT:     - Name:            .bss
+ELF-MIPS64EL-NEXT:       Type:            STT_SECTION
+ELF-MIPS64EL-NEXT:       Section:         .bss
+ELF-MIPS64EL-NEXT:     - Name:            bar
+ELF-MIPS64EL-NEXT:       Section:         .data
+ELF-MIPS64EL-NEXT:     - Name:            .MIPS.options
+ELF-MIPS64EL-NEXT:       Type:            STT_SECTION
+ELF-MIPS64EL-NEXT:       Section:         .MIPS.options
+ELF-MIPS64EL-NEXT:     - Name:            .pdr
+ELF-MIPS64EL-NEXT:       Type:            STT_SECTION
+ELF-MIPS64EL-NEXT:       Section:         .pdr
+ELF-MIPS64EL-NEXT:   Global:
+ELF-MIPS64EL-NEXT:     - Name:            zed
+
+ELF-X86-64:      FileHeader:
+ELF-X86-64-NEXT:   Class:           ELFCLASS64
+ELF-X86-64-NEXT:   Data:            ELFDATA2LSB
+ELF-X86-64-NEXT:   OSABI:           ELFOSABI_GNU
+ELF-X86-64-NEXT:   Type:            ET_REL
+ELF-X86-64-NEXT:   Machine:         EM_X86_64
+ELF-X86-64-NEXT: Sections:
+ELF-X86-64-NEXT:   - Name:            .text
+ELF-X86-64-NEXT:     Type:            SHT_PROGBITS
+ELF-X86-64-NEXT:     Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+ELF-X86-64-NEXT:     AddressAlign:    0x0000000000000010
+ELF-X86-64-NEXT:     Content:         4883EC08C744240400000000BF00000000E80000000030C0E8000000008B4424044883C408C3
+ELF-X86-64-NEXT:   - Name:            .rodata.str1.1
+ELF-X86-64-NEXT:     Type:            SHT_PROGBITS
+ELF-X86-64-NEXT:     Flags:           [ SHF_ALLOC, SHF_MERGE, SHF_STRINGS ]
+ELF-X86-64-NEXT:     Address:         0x0000000000000026
+ELF-X86-64-NEXT:     AddressAlign:    0x0000000000000001
+ELF-X86-64-NEXT:     Content:         48656C6C6F20576F726C642100
+ELF-X86-64-NEXT:   - Name:            .note.GNU-stack
+ELF-X86-64-NEXT:     Type:            SHT_PROGBITS
+ELF-X86-64-NEXT:     Address:         0x0000000000000033
+ELF-X86-64-NEXT:     AddressAlign:    0x0000000000000001
+ELF-X86-64-NEXT:     Content:         ''
+ELF-X86-64-NEXT:   - Name:            .rela.text
+ELF-X86-64-NEXT:     Type:            SHT_RELA
+ELF-X86-64-NEXT:     Address:         0x0000000000000038
+ELF-X86-64-NEXT:     Link:            .symtab
+ELF-X86-64-NEXT:     Info:            .text
+ELF-X86-64-NEXT:     AddressAlign:    0x0000000000000008
+ELF-X86-64-NEXT:     Relocations:
+ELF-X86-64-NEXT:       - Offset:          0x000000000000000D
+ELF-X86-64-NEXT:         Symbol:          .rodata.str1.1
+ELF-X86-64-NEXT:         Type:            R_X86_64_32S
+ELF-X86-64-NEXT:         Addend:          0
+ELF-X86-64-NEXT:       - Offset:          0x0000000000000012
+ELF-X86-64-NEXT:         Symbol:          puts
+ELF-X86-64-NEXT:         Type:            R_X86_64_PC32
+ELF-X86-64-NEXT:         Addend:          -4
+ELF-X86-64-NEXT:       - Offset:          0x0000000000000019
+ELF-X86-64-NEXT:         Symbol:          SomeOtherFunction
+ELF-X86-64-NEXT:         Type:            R_X86_64_PC32
+ELF-X86-64-NEXT:         Addend:          -4
+ELF-X86-64-NEXT: Symbols:
+ELF-X86-64-NEXT:   Local:
+ELF-X86-64-NEXT:     - Name:            trivial-object-test.s
+ELF-X86-64-NEXT:       Type:            STT_FILE
+ELF-X86-64-NEXT:     - Name:            .text
+ELF-X86-64-NEXT:       Type:            STT_SECTION
+ELF-X86-64-NEXT:       Section:         .text
+ELF-X86-64-NEXT:     - Name:            .rodata.str1.1
+ELF-X86-64-NEXT:       Type:            STT_SECTION
+ELF-X86-64-NEXT:       Section:         .rodata.str1.1
+ELF-X86-64-NEXT:     - Name:            .note.GNU-stack
+ELF-X86-64-NEXT:       Type:            STT_SECTION
+ELF-X86-64-NEXT:       Section:         .note.GNU-stack
+ELF-X86-64-NEXT:   Global:
+ELF-X86-64-NEXT:     - Name:            main
+ELF-X86-64-NEXT:       Type:            STT_FUNC
+ELF-X86-64-NEXT:       Section:         .text
+ELF-X86-64-NEXT:       Size:            0x0000000000000026
+ELF-X86-64-NEXT:     - Name:            SomeOtherFunction
+ELF-X86-64-NEXT:     - Name:            puts
diff --git a/test/Object/size-trivial-macho.test b/test/Object/size-trivial-macho.test
new file mode 100644
index 0000000..6ecdf5c
--- /dev/null
+++ b/test/Object/size-trivial-macho.test
@@ -0,0 +1,15 @@
+RUN: llvm-size -A %p/Inputs/macho-text-data-bss.macho-x86_64 \
+RUN:         | FileCheck %s -check-prefix A
+RUN: llvm-size -B %p/Inputs/macho-text-data-bss.macho-x86_64 \
+RUN:         | FileCheck %s -check-prefix B
+
+A: section              size   addr
+A: __text                 12      0
+A: __data                  4     12
+A: __bss                   4    112
+A: __compact_unwind       32     16
+A: __eh_frame             64     48
+A: Total                 116
+
+B:   text    data     bss     dec     hex filename
+B:     12     100       4     116      74 
diff --git a/test/Object/yaml2obj-elf-file-headers-with-e_flags.yaml b/test/Object/yaml2obj-elf-file-headers-with-e_flags.yaml
index 19cc91e..7d09807 100644
--- a/test/Object/yaml2obj-elf-file-headers-with-e_flags.yaml
+++ b/test/Object/yaml2obj-elf-file-headers-with-e_flags.yaml
@@ -5,12 +5,13 @@ FileHeader:
   Data: ELFDATA2LSB
   Type: ET_REL
   Machine: EM_MIPS
-  Flags: [ EF_MIPS_NOREORDER, EF_MIPS_ABI_O32 ]
+  Flags: [ EF_MIPS_NOREORDER, EF_MIPS_ABI_O32, EF_MIPS_ARCH_32R2 ]
 
 # CHECK: Format: ELF32-mips
 # CHECK: Arch: mipsel
 # CHECK: Machine: EM_MIPS
-# CHECK: Flags [ (0x1001)
-# CHECK-NEXT: 0x1
-# CHECK-NEXT: 0x1000
+# CHECK: Flags [ (0x70001001)
+# CHECK-NEXT: EF_MIPS_ABI_O32 (0x1000)
+# CHECK-NEXT: EF_MIPS_ARCH_32R2 (0x70000000)
+# CHECK-NEXT: EF_MIPS_NOREORDER (0x1)
 # CHECK-NEXT: ]
diff --git a/test/Object/yaml2obj-elf-rel.yaml b/test/Object/yaml2obj-elf-rel.yaml
new file mode 100644
index 0000000..6a7ed45
--- /dev/null
+++ b/test/Object/yaml2obj-elf-rel.yaml
@@ -0,0 +1,118 @@
+# RUN: yaml2obj -format=elf %s | llvm-readobj -sections -relocations - | FileCheck %s
+
+!ELF
+FileHeader: !FileHeader
+  Class: ELFCLASS32
+  Data: ELFDATA2MSB
+  Type: ET_REL
+  Machine: EM_MIPS
+
+Sections:
+- Name: .text
+  Type: SHT_PROGBITS
+  Content: "0000000000000000"
+  AddressAlign: 16
+  Flags: [SHF_ALLOC]
+
+- Name: .rel.text
+  Type: SHT_REL
+  Info: .text
+  AddressAlign: 4
+  Relocations:
+    - Offset: 0x1
+      Symbol: glob1
+      Type: R_MIPS_32
+    - Offset: 0x1
+      Symbol: glob2
+      Type: R_MIPS_CALL16
+    - Offset: 0x2
+      Symbol: loc1
+      Type: R_MIPS_LO16
+
+- Name: .rela.text
+  Type: SHT_RELA
+  Link: .symtab
+  Info: .text
+  AddressAlign: 4
+  Relocations:
+    - Offset: 0x1
+      Addend: 1
+      Symbol: glob1
+      Type: R_MIPS_32
+    - Offset: 0x1
+      Addend: 2
+      Symbol: glob2
+      Type: R_MIPS_CALL16
+    - Offset: 0x2
+      Addend: 3
+      Symbol: loc1
+      Type: R_MIPS_LO16
+
+Symbols:
+  Local:
+    - Name: loc1
+    - Name: loc2
+  Global:
+    - Name: glob1
+      Section: .text
+      Value: 0x0
+      Size: 4
+    - Name: glob2
+  Weak:
+    - Name: weak1
+
+# CHECK:        Section {
+# CHECK-NEXT:     Index: 0
+# CHECK:        }
+# CHECK:        Section {
+# CHECK-NEXT:     Index: 1
+# CHECK-NEXT:     Name: .text (16)
+# CHECK:        }
+# CHECK-NEXT:   Section {
+# CHECK-NEXT:     Index: 2
+# CHECK-NEXT:     Name: .rel.text (1)
+# CHECK-NEXT:     Type: SHT_REL (0x9)
+# CHECK-NEXT:     Flags [ (0x0)
+# CHECK-NEXT:     ]
+# CHECK-NEXT:     Address: 0x0
+# CHECK-NEXT:     Offset: 0x160
+# CHECK-NEXT:     Size: 24
+# CHECK-NEXT:     Link: 4
+# CHECK-NEXT:     Info: 1
+# CHECK-NEXT:     AddressAlignment: 4
+# CHECK-NEXT:     EntrySize: 8
+# CHECK-NEXT:   }
+# CHECK-NEXT:   Section {
+# CHECK-NEXT:     Index: 3
+# CHECK-NEXT:     Name: .rela.text (11)
+# CHECK-NEXT:     Type: SHT_RELA (0x4)
+# CHECK-NEXT:     Flags [ (0x0)
+# CHECK-NEXT:     ]
+# CHECK-NEXT:     Address: 0x0
+# CHECK-NEXT:     Offset: 0x180
+# CHECK-NEXT:     Size: 36
+# CHECK-NEXT:     Link: 4
+# CHECK-NEXT:     Info: 1
+# CHECK-NEXT:     AddressAlignment: 4
+# CHECK-NEXT:     EntrySize: 12
+# CHECK-NEXT:   }
+# CHECK-NEXT:   Section {
+# CHECK-NEXT:     Index: 4
+# CHECK-NEXT:     Name: .symtab (40)
+# CHECK:        }
+# CHECK-NEXT:   Section {
+# CHECK-NEXT:     Index: 5
+# CHECK-NEXT:     Name: .strtab (32)
+# CHECK:        }
+# CHECK:        Relocations [
+# CHECK-NEXT:     Section (2) .rel.text {
+# CHECK-NEXT:       0x1 R_MIPS_32 glob1 0x0
+# CHECK-NEXT:       0x1 R_MIPS_CALL16 glob2 0x0
+# CHECK-NEXT:       0x2 R_MIPS_LO16 loc1 0x0
+# CHECK-NEXT:     }
+# CHECK-NEXT:     Section (3) .rela.text {
+# CHECK-NEXT:       0x1 R_MIPS_32 glob1 0x1
+# CHECK-NEXT:       0x1 R_MIPS_CALL16 glob2 0x2
+# CHECK-NEXT:       0x2 R_MIPS_LO16 loc1 0x3
+# CHECK-NEXT:     }
+# CHECK-NEXT:   ]
diff --git a/test/Object/yaml2obj-elf-section-basic.yaml b/test/Object/yaml2obj-elf-section-basic.yaml
index c1f6935..56a3fd6 100644
--- a/test/Object/yaml2obj-elf-section-basic.yaml
+++ b/test/Object/yaml2obj-elf-section-basic.yaml
@@ -1,4 +1,7 @@
 # RUN: yaml2obj -format=elf %s | llvm-readobj -sections -section-data - | FileCheck %s
+# RUN: yaml2obj -format=elf -o %t %s
+# RUN: llvm-readobj -sections -section-data %t | FileCheck %s
+
 !ELF
 FileHeader:
   Class: ELFCLASS64
@@ -14,6 +17,14 @@ Sections:
     Content: EBFE
     AddressAlign: 2
 
+  - Name: .data
+    Type: SHT_PROGBITS
+    Flags: [ SHF_ALLOC ]
+    Address: 0xCAFECAFE
+    Content: FEBF
+    Size: 8
+    AddressAlign: 2
+
 # CHECK:        Section {
 # CHECK:          Index: 0
 # CHECK:          Type: SHT_NULL (0x0)
@@ -35,14 +46,31 @@ Sections:
 # CHECK-NEXT:     )
 #
 # CHECK:        Section {
-# CHECK:          Name: .symtab (7)
+# CHECK:          Name: .data
+# CHECK-NEXT:     Type: SHT_PROGBITS (0x1)
+# CHECK-NEXT:     Flags [ (0x2)
+# CHECK-NEXT:       SHF_ALLOC (0x2)
+# CHECK-NEXT:     ]
+# CHECK-NEXT:     Address: 0xCAFECAFE
+# CHECK-NEXT:     Offset: 0x1D0
+# CHECK-NEXT:     Size: 8
+# CHECK-NEXT:     Link: 0
+# CHECK-NEXT:     Info: 0
+# CHECK-NEXT:     AddressAlignment: 2
+# CHECK-NEXT:     EntrySize: 0
+# CHECK-NEXT:     SectionData (
+# CHECK-NEXT:       0000: FEBF0000 00000000 |........|
+# CHECK-NEXT:     )
+#
+# CHECK:        Section {
+# CHECK:          Name: .symtab (25)
 # CHECK:          Type: SHT_SYMTAB (0x2)
 # CHECK:        }
 # CHECK:        Section {
-# CHECK:          Name: .strtab (15)
+# CHECK:          Name: .strtab (17)
 # CHECK:          Type: SHT_STRTAB (0x3)
 # CHECK:        }
 # CHECK:        Section {
-# CHECK:          Name: .shstrtab (23)
+# CHECK:          Name: .shstrtab (7)
 # CHECK:          Type: SHT_STRTAB (0x3)
 # CHECK:        }
diff --git a/test/Object/yaml2obj-elf-section-invalid-size.yaml b/test/Object/yaml2obj-elf-section-invalid-size.yaml
new file mode 100644
index 0000000..d0cb370
--- /dev/null
+++ b/test/Object/yaml2obj-elf-section-invalid-size.yaml
@@ -0,0 +1,26 @@
+# RUN: not yaml2obj -format=elf -o %t %s 2>&1 | FileCheck %s
+
+!ELF
+FileHeader:
+  Class: ELFCLASS64
+  Data: ELFDATA2LSB
+  Type: ET_REL
+  Machine: EM_X86_64
+
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+    Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+    Content: EBFE
+    AddressAlign: 2
+
+  - Name: .data
+    Type: SHT_PROGBITS
+    Flags: [ SHF_ALLOC ]
+    Content: 0000000000000000
+    Size: 2
+
+# CHECK: YAML:17:5: error: Section size must be greater or equal to the content size
+# CHECK-NEXT: - Name: .data
+# CHECK-NEXT:   ^
+# CHECK-NEXT: yaml2obj: Failed to parse YAML file!
diff --git a/test/Object/yaml2obj-readobj.test b/test/Object/yaml2obj-readobj.test
index 3031f5e..3bd0c6b 100644
--- a/test/Object/yaml2obj-readobj.test
+++ b/test/Object/yaml2obj-readobj.test
@@ -1,4 +1,7 @@
 RUN: yaml2obj %p/Inputs/COFF/i386.yaml | llvm-readobj -file-headers -relocations -expand-relocs - | FileCheck %s --check-prefix COFF-I386
+RUN: yaml2obj -o %t %p/Inputs/COFF/i386.yaml
+RUN: llvm-readobj -file-headers -relocations -expand-relocs %t \
+RUN:   | FileCheck %s --check-prefix COFF-I386
 
 // COFF-I386:  Characteristics [ (0x200)
 // COFF-I386-NEXT:    IMAGE_FILE_DEBUG_STRIPPED (0x200)
diff --git a/test/Other/extract-alias.ll b/test/Other/extract-alias.ll
index d1e4af5..dbc650e 100644
--- a/test/Other/extract-alias.ll
+++ b/test/Other/extract-alias.ll
@@ -14,7 +14,7 @@
 ; DELETE:      @zed = global i32 0
 ; DELETE:      @zeda0 = alias i32* @zed
 ; DELETE-NEXT: @a0foo = alias i32* ()* @foo
-; DELETE-NEXT: @a0a0bar = alias void ()* @a0bar
+; DELETE-NEXT: @a0a0bar = alias void ()* @bar
 ; DELETE-NEXT: @a0bar = alias void ()* @bar
 ; DELETE:      declare i32* @foo()
 ; DELETE:      define void @bar() {
@@ -25,7 +25,7 @@
 ; ALIAS: @zed = external global i32
 ; ALIAS: @zeda0 = alias i32* @zed
 
-; ALIASRE: @a0a0bar = alias void ()* @a0bar
+; ALIASRE: @a0a0bar = alias void ()* @bar
 ; ALIASRE: @a0bar = alias void ()* @bar
 ; ALIASRE: declare void @bar()
 
@@ -39,7 +39,7 @@ define i32* @foo() {
   ret i32* @zeda0
 }
 
-@a0a0bar = alias void ()* @a0bar
+@a0a0bar = alias void ()* @bar
 
 @a0bar = alias void ()* @bar
 
diff --git a/test/Other/optimization-remarks-inline.ll b/test/Other/optimization-remarks-inline.ll
new file mode 100644
index 0000000..566b206
--- /dev/null
+++ b/test/Other/optimization-remarks-inline.ll
@@ -0,0 +1,40 @@
+; RUN: opt < %s -inline -pass-remarks='inline' -S 2>&1 | FileCheck %s
+; RUN: opt < %s -inline -pass-remarks='inl.*' -S 2>&1 | FileCheck %s
+; RUN: opt < %s -inline -pass-remarks='vector' -pass-remarks='inl' -S 2>&1 | FileCheck %s
+
+; These two should not yield an inline remark for the same reason.
+; In the first command, we only ask for vectorizer remarks, in the
+; second one we ask for the inliner, but we then ask for the vectorizer
+; (thus overriding the first flag).
+; RUN: opt < %s -inline -pass-remarks='vector' -S 2>&1 | FileCheck --check-prefix=REMARKS %s
+; RUN: opt < %s -inline -pass-remarks='inl' -pass-remarks='vector' -S 2>&1 | FileCheck --check-prefix=REMARKS %s
+
+; RUN: opt < %s -inline -S 2>&1 | FileCheck --check-prefix=REMARKS %s
+; RUN: not opt < %s -pass-remarks='(' 2>&1 | FileCheck --check-prefix=BAD-REGEXP %s
+
+define i32 @foo(i32 %x, i32 %y) #0 {
+entry:
+  %x.addr = alloca i32, align 4
+  %y.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  store i32 %y, i32* %y.addr, align 4
+  %0 = load i32* %x.addr, align 4
+  %1 = load i32* %y.addr, align 4
+  %add = add nsw i32 %0, %1
+  ret i32 %add
+}
+
+define i32 @bar(i32 %j) #0 {
+entry:
+  %j.addr = alloca i32, align 4
+  store i32 %j, i32* %j.addr, align 4
+  %0 = load i32* %j.addr, align 4
+  %1 = load i32* %j.addr, align 4
+  %sub = sub nsw i32 %1, 2
+  %call = call i32 @foo(i32 %0, i32 %sub)
+; CHECK: foo inlined into bar
+; REMARKS-NOT: foo inlined into bar
+  ret i32 %call
+}
+
+; BAD-REGEXP: Invalid regular expression '(' in -pass-remarks:
diff --git a/test/Other/pass-pipeline-parsing.ll b/test/Other/pass-pipeline-parsing.ll
index ba33610..4ec4162 100644
--- a/test/Other/pass-pipeline-parsing.ll
+++ b/test/Other/pass-pipeline-parsing.ll
@@ -105,6 +105,42 @@
 ; RUN:     | FileCheck %s --check-prefix=CHECK-UNBALANCED10
 ; CHECK-UNBALANCED10: unable to parse pass pipeline description
 
+; RUN: opt -disable-output -debug-pass-manager -debug-cgscc-pass-manager \
+; RUN:     -passes=no-op-cgscc,no-op-cgscc %s 2>&1 \
+; RUN:     | FileCheck %s --check-prefix=CHECK-TWO-NOOP-CG
+; CHECK-TWO-NOOP-CG: Starting module pass manager
+; CHECK-TWO-NOOP-CG: Running module pass: ModuleToPostOrderCGSCCPassAdaptor
+; CHECK-TWO-NOOP-CG: Starting CGSCC pass manager
+; CHECK-TWO-NOOP-CG: Running CGSCC pass: NoOpCGSCCPass
+; CHECK-TWO-NOOP-CG: Running CGSCC pass: NoOpCGSCCPass
+; CHECK-TWO-NOOP-CG: Finished CGSCC pass manager
+; CHECK-TWO-NOOP-CG: Finished module pass manager
+
+; RUN: opt -disable-output -debug-pass-manager -debug-cgscc-pass-manager \
+; RUN:     -passes='module(function(no-op-function),cgscc(no-op-cgscc,function(no-op-function),no-op-cgscc),function(no-op-function))' %s 2>&1 \
+; RUN:     | FileCheck %s --check-prefix=CHECK-NESTED-MP-CG-FP
+; CHECK-NESTED-MP-CG-FP: Starting module pass manager
+; CHECK-NESTED-MP-CG-FP: Starting module pass manager
+; CHECK-NESTED-MP-CG-FP: Running module pass: ModuleToFunctionPassAdaptor
+; CHECK-NESTED-MP-CG-FP: Starting function pass manager
+; CHECK-NESTED-MP-CG-FP: Running function pass: NoOpFunctionPass
+; CHECK-NESTED-MP-CG-FP: Finished function pass manager
+; CHECK-NESTED-MP-CG-FP: Running module pass: ModuleToPostOrderCGSCCPassAdaptor
+; CHECK-NESTED-MP-CG-FP: Starting CGSCC pass manager
+; CHECK-NESTED-MP-CG-FP: Running CGSCC pass: NoOpCGSCCPass
+; CHECK-NESTED-MP-CG-FP: Running CGSCC pass: CGSCCToFunctionPassAdaptor
+; CHECK-NESTED-MP-CG-FP: Starting function pass manager
+; CHECK-NESTED-MP-CG-FP: Running function pass: NoOpFunctionPass
+; CHECK-NESTED-MP-CG-FP: Finished function pass manager
+; CHECK-NESTED-MP-CG-FP: Running CGSCC pass: NoOpCGSCCPass
+; CHECK-NESTED-MP-CG-FP: Finished CGSCC pass manager
+; CHECK-NESTED-MP-CG-FP: Running module pass: ModuleToFunctionPassAdaptor
+; CHECK-NESTED-MP-CG-FP: Starting function pass manager
+; CHECK-NESTED-MP-CG-FP: Running function pass: NoOpFunctionPass
+; CHECK-NESTED-MP-CG-FP: Finished function pass manager
+; CHECK-NESTED-MP-CG-FP: Finished module pass manager
+; CHECK-NESTED-MP-CG-FP: Finished module pass manager
+
 define void @f() {
  ret void
 }
diff --git a/test/TableGen/GeneralList.td b/test/TableGen/GeneralList.td
index 9e0c7df..17cc9a5 100644
--- a/test/TableGen/GeneralList.td
+++ b/test/TableGen/GeneralList.td
@@ -1,5 +1,4 @@
 // RUN: llvm-tblgen %s
-// XFAIL: vg_leak
 //
 // Test to make sure that lists work with any data-type
 
diff --git a/test/TableGen/lisp.td b/test/TableGen/lisp.td
index 9e58605..d753fbd 100644
--- a/test/TableGen/lisp.td
+++ b/test/TableGen/lisp.td
@@ -1,5 +1,4 @@
 // RUN: llvm-tblgen %s
-// XFAIL: vg_leak
 
 // CHECK:      def One {
 // CHECK-NEXT:   list<string> names = ["Jeffrey Sinclair"];
diff --git a/test/TableGen/listconcat.td b/test/TableGen/listconcat.td
new file mode 100644
index 0000000..870e649
--- /dev/null
+++ b/test/TableGen/listconcat.td
@@ -0,0 +1,18 @@
+// RUN: llvm-tblgen %s | FileCheck %s
+
+// CHECK: class Y<list<string> Y:S = ?> {
+// CHECK:   list<string> T1 = !listconcat(Y:S, ["foo"]);
+// CHECK:   list<string> T2 = !listconcat(Y:S, !listconcat(["foo"], !listconcat(Y:S, ["bar", "baz"])));
+// CHECK: }
+
+// CHECK: def Z {
+// CHECK:   list<string> T1 = ["fu", "foo"];
+// CHECK:   list<string> T2 = ["fu", "foo", "fu", "bar", "baz"];
+// CHECK: }
+
+class Y<list<string> S> {
+  list<string> T1 = !listconcat(S, ["foo"]);
+  list<string> T2 = !listconcat(S, ["foo"], S, ["bar", "baz"]);
+}
+
+def Z : Y<["fu"]>;
diff --git a/test/TableGen/strconcat.td b/test/TableGen/strconcat.td
index dfb1a94..f5d7512 100644
--- a/test/TableGen/strconcat.td
+++ b/test/TableGen/strconcat.td
@@ -1,9 +1,21 @@
 // RUN: llvm-tblgen %s | FileCheck %s
 
-// CHECK: fufoo
+// CHECK: class Y<string Y:S = ?> {
+// CHECK:   string T = !strconcat(Y:S, "foo");
+// CHECK:   string T2 = !strconcat(Y:S, !strconcat("foo", !strconcat(Y:S, "bar")));
+// CHECK:   string S = "foobar";
+// CHECK: }
+
+// CHECK: def Z {
+// CHECK:   string T = "fufoo";
+// CHECK:   string T2 = "fufoofubar";
+// CHECK:   string S = "foobar";
+// CHECK: }
 
 class Y<string S> {
   string T = !strconcat(S, "foo");
+  // More than two arguments is equivalent to nested calls
+  string T2 = !strconcat(S, "foo", S, "bar");
 
   // String values concatenate lexically, as in C.
   string S = "foo" "bar";
diff --git a/test/Transforms/AddDiscriminators/no-discriminators.ll b/test/Transforms/AddDiscriminators/no-discriminators.ll
new file mode 100644
index 0000000..f7b45e29
--- /dev/null
+++ b/test/Transforms/AddDiscriminators/no-discriminators.ll
@@ -0,0 +1,71 @@
+; RUN: opt < %s -add-discriminators -S | FileCheck %s
+
+; We should not generate discriminators for DWARF versions prior to 4.
+;
+; Original code:
+;
+; int foo(long i) {
+;   if (i < 5) return 2; else return 90;
+; }
+;
+; None of the !dbg nodes associated with the if() statement should be
+; altered. If they are, it means that the discriminators pass added a
+; new lexical scope.
+
+define i32 @foo(i64 %i) #0 {
+entry:
+  %retval = alloca i32, align 4
+  %i.addr = alloca i64, align 8
+  store i64 %i, i64* %i.addr, align 8
+  call void @llvm.dbg.declare(metadata !{i64* %i.addr}, metadata !13), !dbg !14
+  %0 = load i64* %i.addr, align 8, !dbg !15
+; CHECK:  %0 = load i64* %i.addr, align 8, !dbg !15
+  %cmp = icmp slt i64 %0, 5, !dbg !15
+; CHECK:  %cmp = icmp slt i64 %0, 5, !dbg !15
+  br i1 %cmp, label %if.then, label %if.else, !dbg !15
+; CHECK:  br i1 %cmp, label %if.then, label %if.else, !dbg !15
+
+if.then:                                          ; preds = %entry
+  store i32 2, i32* %retval, !dbg !15
+  br label %return, !dbg !15
+
+if.else:                                          ; preds = %entry
+  store i32 90, i32* %retval, !dbg !15
+  br label %return, !dbg !15
+
+return:                                           ; preds = %if.else, %if.then
+  %1 = load i32* %retval, !dbg !17
+  ret i32 %1, !dbg !17
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!10, !11}
+!llvm.ident = !{!12}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [./no-discriminators] [DW_LANG_C99]
+!1 = metadata !{metadata !"no-discriminators", metadata !"."}
+!2 = metadata !{}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i64)* @foo, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [./no-discriminators]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{metadata !8, metadata !9}
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{i32 786468, null, null, metadata !"long int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [long int] [line 0, size 64, align 64, offset 0, enc DW_ATE_signed]
+!10 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
+; CHECK: !10 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
+!11 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!12 = metadata !{metadata !"clang version 3.5.0 "}
+!13 = metadata !{i32 786689, metadata !4, metadata !"i", metadata !5, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [i] [line 1]
+!14 = metadata !{i32 1, i32 0, metadata !4, null}
+!15 = metadata !{i32 2, i32 0, metadata !16, null}
+; CHECK: !15 = metadata !{i32 2, i32 0, metadata !16, null}
+!16 = metadata !{i32 786443, metadata !1, metadata !4, i32 2, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [./no-discriminators]
+; CHECK: !16 = metadata !{i32 786443, metadata !1, metadata !4, i32 2, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [./no-discriminators]
+!17 = metadata !{i32 3, i32 0, metadata !4, null}
diff --git a/test/Transforms/ArgumentPromotion/inalloca.ll b/test/Transforms/ArgumentPromotion/inalloca.ll
index 513a968..089a78f 100644
--- a/test/Transforms/ArgumentPromotion/inalloca.ll
+++ b/test/Transforms/ArgumentPromotion/inalloca.ll
@@ -20,7 +20,7 @@ entry:
 
 define i32 @main() {
 entry:
-  %S = alloca %struct.ss
+  %S = alloca inalloca %struct.ss
   %f0 = getelementptr %struct.ss* %S, i32 0, i32 0
   %f1 = getelementptr %struct.ss* %S, i32 0, i32 1
   store i32 1, i32* %f0, align 4
@@ -42,7 +42,7 @@ entry:
 
 define i32 @test() {
 entry:
-  %S = alloca %struct.ss
+  %S = alloca inalloca %struct.ss
   %c = call i1 @g(%struct.ss* %S, %struct.ss* inalloca %S)
 ; CHECK: call i1 @g(%struct.ss* %S, %struct.ss* inalloca %S)
   ret i32 0
diff --git a/test/Transforms/AtomicExpandLoadLinked/ARM/atomic-expansion-v7.ll b/test/Transforms/AtomicExpandLoadLinked/ARM/atomic-expansion-v7.ll
new file mode 100644
index 0000000..ac9fc1f
--- /dev/null
+++ b/test/Transforms/AtomicExpandLoadLinked/ARM/atomic-expansion-v7.ll
@@ -0,0 +1,340 @@
+; RUN: opt -S -o - -mtriple=armv7-apple-ios7.0 -atomic-ll-sc %s | FileCheck %s
+
+define i8 @test_atomic_xchg_i8(i8* %ptr, i8 %xchgend) {
+; CHECK-LABEL: @test_atomic_xchg_i8
+; CHECK-NOT: fence
+; CHECK: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
+; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
+; CHECK: [[NEWVAL32:%.*]] = zext i8 %xchgend to i32
+; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
+; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
+; CHECK: [[END]]:
+; CHECK-NOT: fence
+; CHECK: ret i8 [[OLDVAL]]
+  %res = atomicrmw xchg i8* %ptr, i8 %xchgend monotonic
+  ret i8 %res
+}
+
+define i16 @test_atomic_add_i16(i16* %ptr, i16 %addend) {
+; CHECK-LABEL: @test_atomic_add_i16
+; CHECK: fence release
+; CHECK: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i16(i16* %ptr)
+; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i16
+; CHECK: [[NEWVAL:%.*]] = add i16 [[OLDVAL]], %addend
+; CHECK: [[NEWVAL32:%.*]] = zext i16 [[NEWVAL]] to i32
+; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i16(i32 [[NEWVAL32]], i16* %ptr)
+; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
+; CHECK: [[END]]:
+; CHECK: fence seq_cst
+; CHECK: ret i16 [[OLDVAL]]
+  %res = atomicrmw add i16* %ptr, i16 %addend seq_cst
+  ret i16 %res
+}
+
+define i32 @test_atomic_sub_i32(i32* %ptr, i32 %subend) {
+; CHECK-LABEL: @test_atomic_sub_i32
+; CHECK-NOT: fence
+; CHECK: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %ptr)
+; CHECK: [[NEWVAL:%.*]] = sub i32 [[OLDVAL]], %subend
+; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 [[NEWVAL]], i32* %ptr)
+; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
+; CHECK: [[END]]:
+; CHECK: fence acquire
+; CHECK: ret i32 [[OLDVAL]]
+  %res = atomicrmw sub i32* %ptr, i32 %subend acquire
+  ret i32 %res
+}
+
+define i8 @test_atomic_and_i8(i8* %ptr, i8 %andend) {
+; CHECK-LABEL: @test_atomic_and_i8
+; CHECK: fence release
+; CHECK: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
+; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
+; CHECK: [[NEWVAL:%.*]] = and i8 [[OLDVAL]], %andend
+; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32
+; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
+; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
+; CHECK: [[END]]:
+; CHECK-NOT: fence
+; CHECK: ret i8 [[OLDVAL]]
+  %res = atomicrmw and i8* %ptr, i8 %andend release
+  ret i8 %res
+}
+
+define i16 @test_atomic_nand_i16(i16* %ptr, i16 %nandend) {
+; CHECK-LABEL: @test_atomic_nand_i16
+; CHECK: fence release
+; CHECK: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i16(i16* %ptr)
+; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i16
+; CHECK: [[NEWVAL_TMP:%.*]] = xor i16 %nandend, -1
+; CHECK: [[NEWVAL:%.*]] = and i16 [[OLDVAL]], [[NEWVAL_TMP]]
+; CHECK: [[NEWVAL32:%.*]] = zext i16 [[NEWVAL]] to i32
+; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i16(i32 [[NEWVAL32]], i16* %ptr)
+; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
+; CHECK: [[END]]:
+; CHECK: fence seq_cst
+; CHECK: ret i16 [[OLDVAL]]
+  %res = atomicrmw nand i16* %ptr, i16 %nandend seq_cst
+  ret i16 %res
+}
+
+define i64 @test_atomic_or_i64(i64* %ptr, i64 %orend) {
+; CHECK-LABEL: @test_atomic_or_i64
+; CHECK: fence release
+; CHECK: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
+; CHECK: [[LOHI:%.*]] = call { i32, i32 } @llvm.arm.ldrexd(i8* [[PTR8]])
+; CHECK: [[LO:%.*]] = extractvalue { i32, i32 } [[LOHI]], 0
+; CHECK: [[HI:%.*]] = extractvalue { i32, i32 } [[LOHI]], 1
+; CHECK: [[LO64:%.*]] = zext i32 [[LO]] to i64
+; CHECK: [[HI64_TMP:%.*]] = zext i32 [[HI]] to i64
+; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32
+; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]]
+; CHECK: [[NEWVAL:%.*]] = or i64 [[OLDVAL]], %orend
+; CHECK: [[NEWLO:%.*]] = trunc i64 [[NEWVAL]] to i32
+; CHECK: [[NEWHI_TMP:%.*]] = lshr i64 [[NEWVAL]], 32
+; CHECK: [[NEWHI:%.*]] = trunc i64 [[NEWHI_TMP]] to i32
+; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
+; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strexd(i32 [[NEWLO]], i32 [[NEWHI]], i8* [[PTR8]])
+; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
+; CHECK: [[END]]:
+; CHECK: fence seq_cst
+; CHECK: ret i64 [[OLDVAL]]
+  %res = atomicrmw or i64* %ptr, i64 %orend seq_cst
+  ret i64 %res
+}
+
+define i8 @test_atomic_xor_i8(i8* %ptr, i8 %xorend) {
+; CHECK-LABEL: @test_atomic_xor_i8
+; CHECK: fence release
+; CHECK: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
+; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
+; CHECK: [[NEWVAL:%.*]] = xor i8 [[OLDVAL]], %xorend
+; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32
+; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
+; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
+; CHECK: [[END]]:
+; CHECK: fence seq_cst
+; CHECK: ret i8 [[OLDVAL]]
+  %res = atomicrmw xor i8* %ptr, i8 %xorend seq_cst
+  ret i8 %res
+}
+
+define i8 @test_atomic_max_i8(i8* %ptr, i8 %maxend) {
+; CHECK-LABEL: @test_atomic_max_i8
+; CHECK: fence release
+; CHECK: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
+; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
+; CHECK: [[WANT_OLD:%.*]] = icmp sgt i8 [[OLDVAL]], %maxend
+; CHECK: [[NEWVAL:%.*]] = select i1 [[WANT_OLD]], i8 [[OLDVAL]], i8 %maxend
+; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32
+; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
+; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
+; CHECK: [[END]]:
+; CHECK: fence seq_cst
+; CHECK: ret i8 [[OLDVAL]]
+  %res = atomicrmw max i8* %ptr, i8 %maxend seq_cst
+  ret i8 %res
+}
+
+define i8 @test_atomic_min_i8(i8* %ptr, i8 %minend) {
+; CHECK-LABEL: @test_atomic_min_i8
+; CHECK: fence release
+; CHECK: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
+; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
+; CHECK: [[WANT_OLD:%.*]] = icmp sle i8 [[OLDVAL]], %minend
+; CHECK: [[NEWVAL:%.*]] = select i1 [[WANT_OLD]], i8 [[OLDVAL]], i8 %minend
+; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32
+; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
+; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
+; CHECK: [[END]]:
+; CHECK: fence seq_cst
+; CHECK: ret i8 [[OLDVAL]]
+  %res = atomicrmw min i8* %ptr, i8 %minend seq_cst
+  ret i8 %res
+}
+
+define i8 @test_atomic_umax_i8(i8* %ptr, i8 %umaxend) {
+; CHECK-LABEL: @test_atomic_umax_i8
+; CHECK: fence release
+; CHECK: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
+; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
+; CHECK: [[WANT_OLD:%.*]] = icmp ugt i8 [[OLDVAL]], %umaxend
+; CHECK: [[NEWVAL:%.*]] = select i1 [[WANT_OLD]], i8 [[OLDVAL]], i8 %umaxend
+; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32
+; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
+; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
+; CHECK: [[END]]:
+; CHECK: fence seq_cst
+; CHECK: ret i8 [[OLDVAL]]
+  %res = atomicrmw umax i8* %ptr, i8 %umaxend seq_cst
+  ret i8 %res
+}
+
+define i8 @test_atomic_umin_i8(i8* %ptr, i8 %uminend) {
+; CHECK-LABEL: @test_atomic_umin_i8
+; CHECK: fence release
+; CHECK: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
+; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
+; CHECK: [[WANT_OLD:%.*]] = icmp ule i8 [[OLDVAL]], %uminend
+; CHECK: [[NEWVAL:%.*]] = select i1 [[WANT_OLD]], i8 [[OLDVAL]], i8 %uminend
+; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32
+; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
+; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
+; CHECK: [[END]]:
+; CHECK: fence seq_cst
+; CHECK: ret i8 [[OLDVAL]]
+  %res = atomicrmw umin i8* %ptr, i8 %uminend seq_cst
+  ret i8 %res
+}
+
+define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) {
+; CHECK-LABEL: @test_cmpxchg_i8_seqcst_seqcst
+; CHECK: fence release
+; CHECK: br label %[[LOOP:.*]]
+
+; CHECK: [[LOOP]]:
+; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
+; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i8
+; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i8 [[OLDVAL]], %desired
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[BARRIER:.*]]
+
+; CHECK: [[TRY_STORE]]:
+; CHECK: [[NEWVAL32:%.*]] = zext i8 %newval to i32
+; CHECK: [[TRYAGAIN:%.*]] =  call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
+; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[BARRIER:.*]]
+
+; CHECK: [[BARRIER]]:
+; CHECK: fence seq_cst
+; CHECK: br label %[[DONE:.*]]
+
+; CHECK: [[DONE]]:
+; CHECK: ret i8 [[OLDVAL]]
+
+  %old = cmpxchg i8* %ptr, i8 %desired, i8 %newval seq_cst seq_cst
+  ret i8 %old
+}
+
+define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newval) {
+; CHECK-LABEL: @test_cmpxchg_i16_seqcst_monotonic
+; CHECK: fence release
+; CHECK: br label %[[LOOP:.*]]
+
+; CHECK: [[LOOP]]:
+; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i16(i16* %ptr)
+; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i16
+; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i16 [[OLDVAL]], %desired
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[DONE:.*]]
+
+; CHECK: [[TRY_STORE]]:
+; CHECK: [[NEWVAL32:%.*]] = zext i16 %newval to i32
+; CHECK: [[TRYAGAIN:%.*]] =  call i32 @llvm.arm.strex.p0i16(i32 [[NEWVAL32]], i16* %ptr)
+; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[BARRIER:.*]]
+
+; CHECK: [[BARRIER]]:
+; CHECK: fence seq_cst
+; CHECK: br label %[[DONE:.*]]
+
+; CHECK: [[DONE]]:
+; CHECK: ret i16 [[OLDVAL]]
+
+  %old = cmpxchg i16* %ptr, i16 %desired, i16 %newval seq_cst monotonic
+  ret i16 %old
+}
+
+define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newval) {
+; CHECK-LABEL: @test_cmpxchg_i32_acquire_acquire
+; CHECK-NOT: fence
+; CHECK: br label %[[LOOP:.*]]
+
+; CHECK: [[LOOP]]:
+; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %ptr)
+; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[OLDVAL]], %desired
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[DONE:.*]]
+
+; CHECK: [[TRY_STORE]]:
+; CHECK: [[TRYAGAIN:%.*]] =  call i32 @llvm.arm.strex.p0i32(i32 %newval, i32* %ptr)
+; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[BARRIER:.*]]
+
+; CHECK: [[BARRIER]]:
+; CHECK: fence acquire
+; CHECK: br label %[[DONE:.*]]
+
+; CHECK: [[DONE]]:
+; CHECK: ret i32 [[OLDVAL]]
+
+  %old = cmpxchg i32* %ptr, i32 %desired, i32 %newval acquire acquire
+  ret i32 %old
+}
+
+define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %newval) {
+; CHECK-LABEL: @test_cmpxchg_i64_monotonic_monotonic
+; CHECK-NOT: fence
+; CHECK: br label %[[LOOP:.*]]
+
+; CHECK: [[LOOP]]:
+; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
+; CHECK: [[LOHI:%.*]] = call { i32, i32 } @llvm.arm.ldrexd(i8* [[PTR8]])
+; CHECK: [[LO:%.*]] = extractvalue { i32, i32 } [[LOHI]], 0
+; CHECK: [[HI:%.*]] = extractvalue { i32, i32 } [[LOHI]], 1
+; CHECK: [[LO64:%.*]] = zext i32 [[LO]] to i64
+; CHECK: [[HI64_TMP:%.*]] = zext i32 [[HI]] to i64
+; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32
+; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]]
+; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i64 [[OLDVAL]], %desired
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[DONE:.*]]
+
+; CHECK: [[TRY_STORE]]:
+; CHECK: [[NEWLO:%.*]] = trunc i64 %newval to i32
+; CHECK: [[NEWHI_TMP:%.*]] = lshr i64 %newval, 32
+; CHECK: [[NEWHI:%.*]] = trunc i64 [[NEWHI_TMP]] to i32
+; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
+; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strexd(i32 [[NEWLO]], i32 [[NEWHI]], i8* [[PTR8]])
+; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[BARRIER:.*]]
+
+; CHECK: [[BARRIER]]:
+; CHECK-NOT: fence
+; CHECK: br label %[[DONE:.*]]
+
+; CHECK: [[DONE]]:
+; CHECK: ret i64 [[OLDVAL]]
+
+  %old = cmpxchg i64* %ptr, i64 %desired, i64 %newval monotonic monotonic
+  ret i64 %old
+}
+\ No newline at end of file
diff --git a/test/Transforms/AtomicExpandLoadLinked/ARM/atomic-expansion-v8.ll b/test/Transforms/AtomicExpandLoadLinked/ARM/atomic-expansion-v8.ll
new file mode 100644
index 0000000..bec5bef
--- /dev/null
+++ b/test/Transforms/AtomicExpandLoadLinked/ARM/atomic-expansion-v8.ll
@@ -0,0 +1,202 @@
+; RUN: opt -S -o - -mtriple=armv8-linux-gnueabihf -atomic-ll-sc %s | FileCheck %s
+
+define i8 @test_atomic_xchg_i8(i8* %ptr, i8 %xchgend) {
+; CHECK-LABEL: @test_atomic_xchg_i8
+; CHECK-NOT: fence
+; CHECK: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
+; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
+; CHECK: [[NEWVAL32:%.*]] = zext i8 %xchgend to i32
+; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
+; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
+; CHECK: [[END]]:
+; CHECK-NOT: fence
+; CHECK: ret i8 [[OLDVAL]]
+  %res = atomicrmw xchg i8* %ptr, i8 %xchgend monotonic
+  ret i8 %res
+}
+
+define i16 @test_atomic_add_i16(i16* %ptr, i16 %addend) {
+; CHECK-LABEL: @test_atomic_add_i16
+; CHECK-NOT: fence
+; CHECK: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0i16(i16* %ptr)
+; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i16
+; CHECK: [[NEWVAL:%.*]] = add i16 [[OLDVAL]], %addend
+; CHECK: [[NEWVAL32:%.*]] = zext i16 [[NEWVAL]] to i32
+; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.stlex.p0i16(i32 [[NEWVAL32]], i16* %ptr)
+; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
+; CHECK: [[END]]:
+; CHECK-NOT: fence
+; CHECK: ret i16 [[OLDVAL]]
+  %res = atomicrmw add i16* %ptr, i16 %addend seq_cst
+  ret i16 %res
+}
+
+define i32 @test_atomic_sub_i32(i32* %ptr, i32 %subend) {
+; CHECK-LABEL: @test_atomic_sub_i32
+; CHECK-NOT: fence
+; CHECK: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldaex.p0i32(i32* %ptr)
+; CHECK: [[NEWVAL:%.*]] = sub i32 [[OLDVAL]], %subend
+; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 [[NEWVAL]], i32* %ptr)
+; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
+; CHECK: [[END]]:
+; CHECK-NOT: fence
+; CHECK: ret i32 [[OLDVAL]]
+  %res = atomicrmw sub i32* %ptr, i32 %subend acquire
+  ret i32 %res
+}
+
+define i64 @test_atomic_or_i64(i64* %ptr, i64 %orend) {
+; CHECK-LABEL: @test_atomic_or_i64
+; CHECK-NOT: fence
+; CHECK: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
+; CHECK: [[LOHI:%.*]] = call { i32, i32 } @llvm.arm.ldaexd(i8* [[PTR8]])
+; CHECK: [[LO:%.*]] = extractvalue { i32, i32 } [[LOHI]], 0
+; CHECK: [[HI:%.*]] = extractvalue { i32, i32 } [[LOHI]], 1
+; CHECK: [[LO64:%.*]] = zext i32 [[LO]] to i64
+; CHECK: [[HI64_TMP:%.*]] = zext i32 [[HI]] to i64
+; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32
+; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]]
+; CHECK: [[NEWVAL:%.*]] = or i64 [[OLDVAL]], %orend
+; CHECK: [[NEWLO:%.*]] = trunc i64 [[NEWVAL]] to i32
+; CHECK: [[NEWHI_TMP:%.*]] = lshr i64 [[NEWVAL]], 32
+; CHECK: [[NEWHI:%.*]] = trunc i64 [[NEWHI_TMP]] to i32
+; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
+; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.stlexd(i32 [[NEWLO]], i32 [[NEWHI]], i8* [[PTR8]])
+; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
+; CHECK: [[END]]:
+; CHECK-NOT: fence
+; CHECK: ret i64 [[OLDVAL]]
+  %res = atomicrmw or i64* %ptr, i64 %orend seq_cst
+  ret i64 %res
+}
+
+define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) {
+; CHECK-LABEL: @test_cmpxchg_i8_seqcst_seqcst
+; CHECK-NOT: fence
+; CHECK: br label %[[LOOP:.*]]
+
+; CHECK: [[LOOP]]:
+; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0i8(i8* %ptr)
+; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i8
+; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i8 [[OLDVAL]], %desired
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[BARRIER:.*]]
+
+; CHECK: [[TRY_STORE]]:
+; CHECK: [[NEWVAL32:%.*]] = zext i8 %newval to i32
+; CHECK: [[TRYAGAIN:%.*]] =  call i32 @llvm.arm.stlex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
+; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[BARRIER:.*]]
+
+; CHECK: [[BARRIER]]:
+; CHECK-NOT: fence
+; CHECK: br label %[[DONE:.*]]
+
+; CHECK: [[DONE]]:
+; CHECK: ret i8 [[OLDVAL]]
+
+  %old = cmpxchg i8* %ptr, i8 %desired, i8 %newval seq_cst seq_cst
+  ret i8 %old
+}
+
+define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newval) {
+; CHECK-LABEL: @test_cmpxchg_i16_seqcst_monotonic
+; CHECK-NOT: fence
+; CHECK: br label %[[LOOP:.*]]
+
+; CHECK: [[LOOP]]:
+; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0i16(i16* %ptr)
+; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i16
+; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i16 [[OLDVAL]], %desired
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[DONE:.*]]
+
+; CHECK: [[TRY_STORE]]:
+; CHECK: [[NEWVAL32:%.*]] = zext i16 %newval to i32
+; CHECK: [[TRYAGAIN:%.*]] =  call i32 @llvm.arm.stlex.p0i16(i32 [[NEWVAL32]], i16* %ptr)
+; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[BARRIER:.*]]
+
+; CHECK: [[BARRIER]]:
+; CHECK-NOT: fence
+; CHECK: br label %[[DONE:.*]]
+
+; CHECK: [[DONE]]:
+; CHECK: ret i16 [[OLDVAL]]
+
+  %old = cmpxchg i16* %ptr, i16 %desired, i16 %newval seq_cst monotonic
+  ret i16 %old
+}
+
+define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newval) {
+; CHECK-LABEL: @test_cmpxchg_i32_acquire_acquire
+; CHECK-NOT: fence
+; CHECK: br label %[[LOOP:.*]]
+
+; CHECK: [[LOOP]]:
+; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldaex.p0i32(i32* %ptr)
+; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[OLDVAL]], %desired
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[DONE:.*]]
+
+; CHECK: [[TRY_STORE]]:
+; CHECK: [[TRYAGAIN:%.*]] =  call i32 @llvm.arm.strex.p0i32(i32 %newval, i32* %ptr)
+; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[BARRIER:.*]]
+
+; CHECK: [[BARRIER]]:
+; CHECK-NOT: fence
+; CHECK: br label %[[DONE:.*]]
+
+; CHECK: [[DONE]]:
+; CHECK: ret i32 [[OLDVAL]]
+
+  %old = cmpxchg i32* %ptr, i32 %desired, i32 %newval acquire acquire
+  ret i32 %old
+}
+
+define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %newval) {
+; CHECK-LABEL: @test_cmpxchg_i64_monotonic_monotonic
+; CHECK-NOT: fence
+; CHECK: br label %[[LOOP:.*]]
+
+; CHECK: [[LOOP]]:
+; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
+; CHECK: [[LOHI:%.*]] = call { i32, i32 } @llvm.arm.ldrexd(i8* [[PTR8]])
+; CHECK: [[LO:%.*]] = extractvalue { i32, i32 } [[LOHI]], 0
+; CHECK: [[HI:%.*]] = extractvalue { i32, i32 } [[LOHI]], 1
+; CHECK: [[LO64:%.*]] = zext i32 [[LO]] to i64
+; CHECK: [[HI64_TMP:%.*]] = zext i32 [[HI]] to i64
+; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32
+; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]]
+; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i64 [[OLDVAL]], %desired
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[DONE:.*]]
+
+; CHECK: [[TRY_STORE]]:
+; CHECK: [[NEWLO:%.*]] = trunc i64 %newval to i32
+; CHECK: [[NEWHI_TMP:%.*]] = lshr i64 %newval, 32
+; CHECK: [[NEWHI:%.*]] = trunc i64 [[NEWHI_TMP]] to i32
+; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
+; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strexd(i32 [[NEWLO]], i32 [[NEWHI]], i8* [[PTR8]])
+; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[BARRIER:.*]]
+
+; CHECK: [[BARRIER]]:
+; CHECK-NOT: fence
+; CHECK: br label %[[DONE:.*]]
+
+; CHECK: [[DONE]]:
+; CHECK: ret i64 [[OLDVAL]]
+
+  %old = cmpxchg i64* %ptr, i64 %desired, i64 %newval monotonic monotonic
+  ret i64 %old
+}
+\ No newline at end of file
diff --git a/test/Transforms/AtomicExpandLoadLinked/ARM/lit.local.cfg b/test/Transforms/AtomicExpandLoadLinked/ARM/lit.local.cfg
new file mode 100644
index 0000000..8a3ba96
--- /dev/null
+++ b/test/Transforms/AtomicExpandLoadLinked/ARM/lit.local.cfg
@@ -0,0 +1,4 @@
+targets = set(config.root.targets_to_build.split())
+if not 'ARM' in targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/BBVectorize/simple-int.ll b/test/Transforms/BBVectorize/simple-int.ll
index e90900a..e0c1efa 100644
--- a/test/Transforms/BBVectorize/simple-int.ll
+++ b/test/Transforms/BBVectorize/simple-int.ll
@@ -5,6 +5,18 @@ declare double @llvm.fma.f64(double, double, double)
 declare double @llvm.fmuladd.f64(double, double, double)
 declare double @llvm.cos.f64(double)
 declare double @llvm.powi.f64(double, i32)
+declare double @llvm.round.f64(double)
+declare double @llvm.copysign.f64(double, double)
+declare double @llvm.ceil.f64(double)
+declare double @llvm.nearbyint.f64(double)
+declare double @llvm.rint.f64(double)
+declare double @llvm.trunc.f64(double)
+declare double @llvm.floor.f64(double)
+declare double @llvm.fabs.f64(double)
+declare i64 @llvm.bswap.i64(i64)
+declare i64 @llvm.ctpop.i64(i64)
+declare i64 @llvm.ctlz.i64(i64, i1)
+declare i64 @llvm.cttz.i64(i64, i1)
 
 ; Basic depth-3 chain with fma
 define double @test1(double %A1, double %A2, double %B1, double %B2, double %C1, double %C2) {
@@ -124,9 +136,371 @@ define double @test4(double %A1, double %A2, double %B1, double %B2, i32 %P) {
 ; CHECK: ret double %R
 }
 
+; Basic depth-3 chain with round
+define double @testround(double %A1, double %A2, double %B1, double %B2) {
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+	%Y1 = call double @llvm.round.f64(double %X1)
+	%Y2 = call double @llvm.round.f64(double %X2)
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y2, %B2
+	%R  = fmul double %Z1, %Z2
+	ret double %R
+; CHECK: @testround
+; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
+; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
+; CHECK: %Y1 = call <2 x double> @llvm.round.v2f64(<2 x double> %X1)
+; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
+; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
+; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
+; CHECK: ret double %R
+
+}
+
+; Basic depth-3 chain with copysign
+define double @testcopysign(double %A1, double %A2, double %B1, double %B2) {
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+	%Y1 = call double @llvm.copysign.f64(double %X1, double %A1)
+	%Y2 = call double @llvm.copysign.f64(double %X2, double %A1)
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y2, %B2
+	%R  = fmul double %Z1, %Z2
+	ret double %R
+; CHECK: @testcopysign
+; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
+; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
+; CHECK: %Y1.v.i1.2 = insertelement <2 x double> %X1.v.i0.1, double %A1, i32 1
+; CHECK: %Y1 = call <2 x double> @llvm.copysign.v2f64(<2 x double> %X1, <2 x double> %Y1.v.i1.2)
+; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
+; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
+; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
+; CHECK: ret double %R
+
+}
+
+; Basic depth-3 chain with ceil
+define double @testceil(double %A1, double %A2, double %B1, double %B2) {
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+	%Y1 = call double @llvm.ceil.f64(double %X1)
+	%Y2 = call double @llvm.ceil.f64(double %X2)
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y2, %B2
+	%R  = fmul double %Z1, %Z2
+	ret double %R
+; CHECK: @testceil
+; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
+; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
+; CHECK: %Y1 = call <2 x double> @llvm.ceil.v2f64(<2 x double> %X1)
+; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
+; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
+; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
+; CHECK: ret double %R
+
+}
+
+; Basic depth-3 chain with nearbyint
+define double @testnearbyint(double %A1, double %A2, double %B1, double %B2) {
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+	%Y1 = call double @llvm.nearbyint.f64(double %X1)
+	%Y2 = call double @llvm.nearbyint.f64(double %X2)
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y2, %B2
+	%R  = fmul double %Z1, %Z2
+	ret double %R
+; CHECK: @testnearbyint
+; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
+; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
+; CHECK: %Y1 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %X1)
+; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
+; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
+; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
+; CHECK: ret double %R
+
+}
+
+; Basic depth-3 chain with rint
+define double @testrint(double %A1, double %A2, double %B1, double %B2) {
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+	%Y1 = call double @llvm.rint.f64(double %X1)
+	%Y2 = call double @llvm.rint.f64(double %X2)
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y2, %B2
+	%R  = fmul double %Z1, %Z2
+	ret double %R
+; CHECK: @testrint
+; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
+; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
+; CHECK: %Y1 = call <2 x double> @llvm.rint.v2f64(<2 x double> %X1)
+; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
+; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
+; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
+; CHECK: ret double %R
+
+}
+
+; Basic depth-3 chain with trunc
+define double @testtrunc(double %A1, double %A2, double %B1, double %B2) {
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+	%Y1 = call double @llvm.trunc.f64(double %X1)
+	%Y2 = call double @llvm.trunc.f64(double %X2)
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y2, %B2
+	%R  = fmul double %Z1, %Z2
+	ret double %R
+; CHECK: @testtrunc
+; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
+; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
+; CHECK: %Y1 = call <2 x double> @llvm.trunc.v2f64(<2 x double> %X1)
+; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
+; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
+; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
+; CHECK: ret double %R
+
+}
+
+; Basic depth-3 chain with floor
+define double @testfloor(double %A1, double %A2, double %B1, double %B2) {
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+	%Y1 = call double @llvm.floor.f64(double %X1)
+	%Y2 = call double @llvm.floor.f64(double %X2)
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y2, %B2
+	%R  = fmul double %Z1, %Z2
+	ret double %R
+; CHECK: @testfloor
+; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
+; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
+; CHECK: %Y1 = call <2 x double> @llvm.floor.v2f64(<2 x double> %X1)
+; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
+; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
+; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
+; CHECK: ret double %R
+
+}
+
+; Basic depth-3 chain with fabs
+define double @testfabs(double %A1, double %A2, double %B1, double %B2) {
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+	%Y1 = call double @llvm.fabs.f64(double %X1)
+	%Y2 = call double @llvm.fabs.f64(double %X2)
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y2, %B2
+	%R  = fmul double %Z1, %Z2
+	ret double %R
+; CHECK: @testfabs
+; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
+; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
+; CHECK: %Y1 = call <2 x double> @llvm.fabs.v2f64(<2 x double> %X1)
+; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
+; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
+; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
+; CHECK: ret double %R
+
+}
+
+; Basic depth-3 chain with bswap
+define i64 @testbswap(i64 %A1, i64 %A2, i64 %B1, i64 %B2) {
+	%X1 = sub i64 %A1, %B1
+	%X2 = sub i64 %A2, %B2
+	%Y1 = call i64 @llvm.bswap.i64(i64 %X1)
+	%Y2 = call i64 @llvm.bswap.i64(i64 %X2)
+	%Z1 = add i64 %Y1, %B1
+	%Z2 = add i64 %Y2, %B2
+	%R  = mul i64 %Z1, %Z2
+	ret i64 %R
+	
+; CHECK: @testbswap
+; CHECK: %X1.v.i1.1 = insertelement <2 x i64> undef, i64 %B1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x i64> %X1.v.i1.1, i64 %B2, i32 1
+; CHECK: %X1.v.i0.1 = insertelement <2 x i64> undef, i64 %A1, i32 0
+; CHECK: %X1.v.i0.2 = insertelement <2 x i64> %X1.v.i0.1, i64 %A2, i32 1
+; CHECK: %X1 = sub <2 x i64> %X1.v.i0.2, %X1.v.i1.2
+; CHECK: %Y1 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %X1)
+; CHECK: %Z1 = add <2 x i64> %Y1, %X1.v.i1.2
+; CHECK: %Z1.v.r1 = extractelement <2 x i64> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x i64> %Z1, i32 1
+; CHECK: %R = mul i64 %Z1.v.r1, %Z1.v.r2
+; CHECK: ret i64 %R
+
+}
+
+; Basic depth-3 chain with ctpop
+define i64 @testctpop(i64 %A1, i64 %A2, i64 %B1, i64 %B2) {
+	%X1 = sub i64 %A1, %B1
+	%X2 = sub i64 %A2, %B2
+	%Y1 = call i64 @llvm.ctpop.i64(i64 %X1)
+	%Y2 = call i64 @llvm.ctpop.i64(i64 %X2)
+	%Z1 = add i64 %Y1, %B1
+	%Z2 = add i64 %Y2, %B2
+	%R  = mul i64 %Z1, %Z2
+	ret i64 %R
+	
+; CHECK: @testctpop
+; CHECK: %X1.v.i1.1 = insertelement <2 x i64> undef, i64 %B1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x i64> %X1.v.i1.1, i64 %B2, i32 1
+; CHECK: %X1.v.i0.1 = insertelement <2 x i64> undef, i64 %A1, i32 0
+; CHECK: %X1.v.i0.2 = insertelement <2 x i64> %X1.v.i0.1, i64 %A2, i32 1
+; CHECK: %X1 = sub <2 x i64> %X1.v.i0.2, %X1.v.i1.2
+; CHECK: %Y1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %X1)
+; CHECK: %Z1 = add <2 x i64> %Y1, %X1.v.i1.2
+; CHECK: %Z1.v.r1 = extractelement <2 x i64> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x i64> %Z1, i32 1
+; CHECK: %R = mul i64 %Z1.v.r1, %Z1.v.r2
+; CHECK: ret i64 %R
+
+}
+
+; Basic depth-3 chain with ctlz
+define i64 @testctlz(i64 %A1, i64 %A2, i64 %B1, i64 %B2) {
+	%X1 = sub i64 %A1, %B1
+	%X2 = sub i64 %A2, %B2
+	%Y1 = call i64 @llvm.ctlz.i64(i64 %X1, i1 true)
+	%Y2 = call i64 @llvm.ctlz.i64(i64 %X2, i1 true)
+	%Z1 = add i64 %Y1, %B1
+	%Z2 = add i64 %Y2, %B2
+	%R  = mul i64 %Z1, %Z2
+	ret i64 %R
+
+; CHECK: @testctlz
+; CHECK: %X1.v.i1.1 = insertelement <2 x i64> undef, i64 %B1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x i64> %X1.v.i1.1, i64 %B2, i32 1
+; CHECK: %X1.v.i0.1 = insertelement <2 x i64> undef, i64 %A1, i32 0
+; CHECK: %X1.v.i0.2 = insertelement <2 x i64> %X1.v.i0.1, i64 %A2, i32 1
+; CHECK: %X1 = sub <2 x i64> %X1.v.i0.2, %X1.v.i1.2
+; CHECK: %Y1 = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %X1, i1 true)
+; CHECK: %Z1 = add <2 x i64> %Y1, %X1.v.i1.2
+; CHECK: %Z1.v.r1 = extractelement <2 x i64> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x i64> %Z1, i32 1
+; CHECK: %R = mul i64 %Z1.v.r1, %Z1.v.r2
+; CHECK: ret i64 %R
+
+}
+
+; Basic depth-3 chain with ctlz
+define i64 @testctlzneg(i64 %A1, i64 %A2, i64 %B1, i64 %B2) {
+	%X1 = sub i64 %A1, %B1
+	%X2 = sub i64 %A2, %B2
+	%Y1 = call i64 @llvm.ctlz.i64(i64 %X1, i1 true)
+	%Y2 = call i64 @llvm.ctlz.i64(i64 %X2, i1 false)
+	%Z1 = add i64 %Y1, %B1
+	%Z2 = add i64 %Y2, %B2
+	%R  = mul i64 %Z1, %Z2
+	ret i64 %R
+
+; CHECK: @testctlzneg
+; CHECK: %X1 = sub i64 %A1, %B1
+; CHECK: %X2 = sub i64 %A2, %B2
+; CHECK: %Y1 = call i64 @llvm.ctlz.i64(i64 %X1, i1 true)
+; CHECK: %Y2 = call i64 @llvm.ctlz.i64(i64 %X2, i1 false)
+; CHECK: %Z1 = add i64 %Y1, %B1
+; CHECK: %Z2 = add i64 %Y2, %B2
+; CHECK: %R = mul i64 %Z1, %Z2
+; CHECK: ret i64 %R
+}
+
+; Basic depth-3 chain with cttz
+define i64 @testcttz(i64 %A1, i64 %A2, i64 %B1, i64 %B2) {
+	%X1 = sub i64 %A1, %B1
+	%X2 = sub i64 %A2, %B2
+	%Y1 = call i64 @llvm.cttz.i64(i64 %X1, i1 true)
+	%Y2 = call i64 @llvm.cttz.i64(i64 %X2, i1 true)
+	%Z1 = add i64 %Y1, %B1
+	%Z2 = add i64 %Y2, %B2
+	%R  = mul i64 %Z1, %Z2
+	ret i64 %R
+
+; CHECK: @testcttz
+; CHECK: %X1.v.i1.1 = insertelement <2 x i64> undef, i64 %B1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x i64> %X1.v.i1.1, i64 %B2, i32 1
+; CHECK: %X1.v.i0.1 = insertelement <2 x i64> undef, i64 %A1, i32 0
+; CHECK: %X1.v.i0.2 = insertelement <2 x i64> %X1.v.i0.1, i64 %A2, i32 1
+; CHECK: %X1 = sub <2 x i64> %X1.v.i0.2, %X1.v.i1.2
+; CHECK: %Y1 = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %X1, i1 true)
+; CHECK: %Z1 = add <2 x i64> %Y1, %X1.v.i1.2
+; CHECK: %Z1.v.r1 = extractelement <2 x i64> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x i64> %Z1, i32 1
+; CHECK: %R = mul i64 %Z1.v.r1, %Z1.v.r2
+; CHECK: ret i64 %R
+
+}
+
+; Basic depth-3 chain with cttz
+define i64 @testcttzneg(i64 %A1, i64 %A2, i64 %B1, i64 %B2) {
+	%X1 = sub i64 %A1, %B1
+	%X2 = sub i64 %A2, %B2
+	%Y1 = call i64 @llvm.cttz.i64(i64 %X1, i1 true)
+	%Y2 = call i64 @llvm.cttz.i64(i64 %X2, i1 false)
+	%Z1 = add i64 %Y1, %B1
+	%Z2 = add i64 %Y2, %B2
+	%R  = mul i64 %Z1, %Z2
+	ret i64 %R
+
+; CHECK: @testcttzneg
+; CHECK: %X1 = sub i64 %A1, %B1
+; CHECK: %X2 = sub i64 %A2, %B2
+; CHECK: %Y1 = call i64 @llvm.cttz.i64(i64 %X1, i1 true)
+; CHECK: %Y2 = call i64 @llvm.cttz.i64(i64 %X2, i1 false)
+; CHECK: %Z1 = add i64 %Y1, %B1
+; CHECK: %Z2 = add i64 %Y2, %B2
+; CHECK: %R = mul i64 %Z1, %Z2
+; CHECK: ret i64 %R
+}
+
+
+
 ; CHECK: declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #0
 ; CHECK: declare <2 x double> @llvm.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double>) #0
 ; CHECK: declare <2 x double> @llvm.cos.v2f64(<2 x double>) #0
 ; CHECK: declare <2 x double> @llvm.powi.v2f64(<2 x double>, i32) #0
-
+; CHECK: declare <2 x double> @llvm.round.v2f64(<2 x double>) #0
+; CHECK: declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) #0
+; CHECK: declare <2 x double> @llvm.ceil.v2f64(<2 x double>) #0
+; CHECK: declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) #0
+; CHECK: declare <2 x double> @llvm.rint.v2f64(<2 x double>) #0
+; CHECK: declare <2 x double> @llvm.trunc.v2f64(<2 x double>) #0
+; CHECK: declare <2 x double> @llvm.floor.v2f64(<2 x double>) #0
+; CHECK: declare <2 x double> @llvm.fabs.v2f64(<2 x double>) #0
+; CHECK: declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) #0
+; CHECK: declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) #0
+; CHECK: declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) #0
+; CHECK: declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1) #0
 ; CHECK: attributes #0 = { nounwind readnone }
diff --git a/test/Transforms/CodeGenPrepare/X86/sink-addrspacecast.ll b/test/Transforms/CodeGenPrepare/X86/sink-addrspacecast.ll
new file mode 100644
index 0000000..a985c36
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/X86/sink-addrspacecast.ll
@@ -0,0 +1,37 @@
+; RUN: opt -S -codegenprepare < %s | FileCheck %s
+
+target datalayout =
+"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK-LABEL: @load_cast_gep
+; CHECK: add i64 %sunkaddr, 40
+define void @load_cast_gep(i1 %cond, i64* %base) {
+entry:
+  %addr = getelementptr inbounds i64* %base, i64 5
+  %casted = addrspacecast i64* %addr to i32 addrspace(1)*
+  br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+  %v = load i32 addrspace(1)* %casted, align 4
+  br label %fallthrough
+
+fallthrough:
+  ret void
+}
+
+; CHECK-LABEL: @store_gep_cast
+; CHECK: add i64 %sunkaddr, 20
+define void @store_gep_cast(i1 %cond, i64* %base) {
+entry:
+  %casted = addrspacecast i64* %base to i32 addrspace(1)*
+  %addr = getelementptr inbounds i32 addrspace(1)* %casted, i64 5
+  br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+  store i32 0, i32 addrspace(1)* %addr, align 4
+  br label %fallthrough
+
+fallthrough:
+  ret void
+}
diff --git a/test/Transforms/ConstProp/loads.ll b/test/Transforms/ConstProp/loads.ll
index d05db47..0ea9c47 100644
--- a/test/Transforms/ConstProp/loads.ll
+++ b/test/Transforms/ConstProp/loads.ll
@@ -219,3 +219,37 @@ entry:
 ; BE-LABEL: @test15(
 ; BE: ret i64 2
 }
+
+@gv7 = constant [4 x i8*] [i8* null, i8* inttoptr (i64 -14 to i8*), i8* null, i8* null]
+define i64 @test16.1() {
+  %v = load i64* bitcast ([4 x i8*]* @gv7 to i64*), align 8
+  ret i64 %v
+
+; LE-LABEL: @test16.1(
+; LE: ret i64 0
+
+; BE-LABEL: @test16.1(
+; BE: ret i64 0
+}
+
+define i64 @test16.2() {
+  %v = load i64* bitcast (i8** getelementptr inbounds ([4 x i8*]* @gv7, i64 0, i64 1) to i64*), align 8
+  ret i64 %v
+
+; LE-LABEL: @test16.2(
+; LE: ret i64 -14
+
+; BE-LABEL: @test16.2(
+; BE: ret i64 -14
+}
+
+define i64 @test16.3() {
+  %v = load i64* bitcast (i8** getelementptr inbounds ([4 x i8*]* @gv7, i64 0, i64 2) to i64*), align 8
+  ret i64 %v
+
+; LE-LABEL: @test16.3(
+; LE: ret i64 0
+
+; BE-LABEL: @test16.3(
+; BE: ret i64 0
+}
diff --git a/test/Transforms/ConstantHoisting/AArch64/const-addr.ll b/test/Transforms/ConstantHoisting/AArch64/const-addr.ll
new file mode 100644
index 0000000..89d5960
--- /dev/null
+++ b/test/Transforms/ConstantHoisting/AArch64/const-addr.ll
@@ -0,0 +1,23 @@
+; RUN: opt -mtriple=arm64-darwin-unknown -S -consthoist < %s | FileCheck %s
+
+%T = type { i32, i32, i32, i32 }
+
+define i32 @test1() nounwind {
+; CHECK-LABEL: test1
+; CHECK: %const = bitcast i64 68141056 to i64
+; CHECK: %1 = inttoptr i64 %const to %T*
+; CHECK: %o1 = getelementptr %T* %1, i32 0, i32 1
+; CHECK: %o2 = getelementptr %T* %1, i32 0, i32 2
+; CHECK: %o3 = getelementptr %T* %1, i32 0, i32 3
+  %at = inttoptr i64 68141056 to %T*
+  %o1 = getelementptr %T* %at, i32 0, i32 1
+  %t1 = load i32* %o1
+  %o2 = getelementptr %T* %at, i32 0, i32 2
+  %t2 = load i32* %o2
+  %a1 = add i32 %t1, %t2
+  %o3 = getelementptr %T* %at, i32 0, i32 3
+  %t3 = load i32* %o3
+  %a2 = add i32 %a1, %t3
+  ret i32 %a2
+}
+
diff --git a/test/Transforms/ConstantHoisting/AArch64/large-immediate.ll b/test/Transforms/ConstantHoisting/AArch64/large-immediate.ll
new file mode 100644
index 0000000..575be79
--- /dev/null
+++ b/test/Transforms/ConstantHoisting/AArch64/large-immediate.ll
@@ -0,0 +1,27 @@
+; RUN: opt -mtriple=arm64-darwin-unknown -S -consthoist < %s | FileCheck %s
+
+define i128 @test1(i128 %a) nounwind {
+; CHECK-LABEL: test1
+; CHECK: %const = bitcast i128 12297829382473034410122878 to i128
+  %1 = add i128 %a, 12297829382473034410122878
+  %2 = add i128 %1, 12297829382473034410122878
+  ret i128 %2
+}
+
+; Check that we don't hoist large, but cheap constants
+define i512 @test2(i512 %a) nounwind {
+; CHECK-LABEL: test2
+; CHECK-NOT: %const = bitcast i512 7 to i512
+  %1 = and i512 %a, 7
+  %2 = or i512 %1, 7
+  ret i512 %2
+}
+
+; Check that we don't hoist the shift value of a shift instruction.
+define i512 @test3(i512 %a) nounwind {
+; CHECK-LABEL: test3
+; CHECK-NOT: %const = bitcast i512 504 to i512
+  %1 = shl i512 %a, 504
+  %2 = ashr i512 %1, 504
+  ret i512 %2
+}
diff --git a/test/Transforms/ConstantHoisting/AArch64/lit.local.cfg b/test/Transforms/ConstantHoisting/AArch64/lit.local.cfg
new file mode 100644
index 0000000..c420349
--- /dev/null
+++ b/test/Transforms/ConstantHoisting/AArch64/lit.local.cfg
@@ -0,0 +1,3 @@
+targets = set(config.root.targets_to_build.split())
+if not 'AArch64' in targets:
+    config.unsupported = True
diff --git a/test/Transforms/ConstantHoisting/PowerPC/const-base-addr.ll b/test/Transforms/ConstantHoisting/PowerPC/const-base-addr.ll
new file mode 100644
index 0000000..b4337ee
--- /dev/null
+++ b/test/Transforms/ConstantHoisting/PowerPC/const-base-addr.ll
@@ -0,0 +1,23 @@
+; RUN: opt -S -consthoist < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%T = type { i32, i32, i32, i32 }
+
+; Test if even cheap base addresses are hoisted.
+define i32 @test1() nounwind {
+; CHECK-LABEL:  @test1
+; CHECK:        %const = bitcast i32 12345678 to i32
+; CHECK:        %1 = inttoptr i32 %const to %T*
+; CHECK:        %addr1 = getelementptr %T* %1, i32 0, i32 1
+  %addr1 = getelementptr %T* inttoptr (i32 12345678 to %T*), i32 0, i32 1
+  %tmp1 = load i32* %addr1
+  %addr2 = getelementptr %T* inttoptr (i32 12345678 to %T*), i32 0, i32 2
+  %tmp2 = load i32* %addr2
+  %addr3 = getelementptr %T* inttoptr (i32 12345678 to %T*), i32 0, i32 3
+  %tmp3 = load i32* %addr3
+  %tmp4 = add i32 %tmp1, %tmp2
+  %tmp5 = add i32 %tmp3, %tmp4
+  ret i32 %tmp5
+}
+
diff --git a/test/Transforms/ConstantHoisting/PowerPC/lit.local.cfg b/test/Transforms/ConstantHoisting/PowerPC/lit.local.cfg
new file mode 100644
index 0000000..2e46300
--- /dev/null
+++ b/test/Transforms/ConstantHoisting/PowerPC/lit.local.cfg
@@ -0,0 +1,4 @@
+targets = set(config.root.targets_to_build.split())
+if not 'PowerPC' in targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/ConstantHoisting/PowerPC/masks.ll b/test/Transforms/ConstantHoisting/PowerPC/masks.ll
new file mode 100644
index 0000000..d553182
--- /dev/null
+++ b/test/Transforms/ConstantHoisting/PowerPC/masks.ll
@@ -0,0 +1,66 @@
+; RUN: opt -S -consthoist < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Here the masks are all contiguous, and should not be hoisted.
+define i32 @test1() nounwind {
+entry:
+; CHECK-LABEL:  @test1
+; CHECK-NOT: bitcast i32 65535 to i32
+; CHECK: and i32 undef, 65535
+  %conv121 = and i32 undef, 65535
+  br i1 undef, label %if.then152, label %if.end167
+
+if.then152:
+; CHECK: and i32 undef, 65535
+  %conv153 = and i32 undef, 65535
+  br i1 undef, label %if.end167, label %end2
+
+if.end167:
+; CHECK: and i32 {{.*}}, 32768
+  %shl161 = shl nuw nsw i32 %conv121, 15
+  %0 = load i8* undef, align 1
+  %conv169 = zext i8 %0 to i32
+  %shl170 = shl nuw nsw i32 %conv169, 7
+  %shl161.masked = and i32 %shl161, 32768
+  %conv174 = or i32 %shl170, %shl161.masked
+  %cmp178 = icmp ugt i32 %conv174, 32767
+  br i1 %cmp178, label %end1, label %end2
+
+end1:
+  unreachable
+
+end2:
+  unreachable
+}
+
+; Here the masks are not contiguous, and should be hoisted.
+define i32 @test2() nounwind {
+entry:
+; CHECK-LABEL: @test2
+; CHECK: bitcast i32 65531 to i32
+  %conv121 = and i32 undef, 65531
+  br i1 undef, label %if.then152, label %if.end167
+
+if.then152:
+  %conv153 = and i32 undef, 65531
+  br i1 undef, label %if.end167, label %end2
+
+if.end167:
+; CHECK: add i32 {{.*}}, -32758
+  %shl161 = shl nuw nsw i32 %conv121, 15
+  %0 = load i8* undef, align 1
+  %conv169 = zext i8 %0 to i32
+  %shl170 = shl nuw nsw i32 %conv169, 7
+  %shl161.masked = and i32 %shl161, 32773
+  %conv174 = or i32 %shl170, %shl161.masked
+  %cmp178 = icmp ugt i32 %conv174, 32767
+  br i1 %cmp178, label %end1, label %end2
+
+end1:
+  unreachable
+
+end2:
+  unreachable
+}
+
diff --git a/test/Transforms/ConstantHoisting/X86/cast-inst.ll b/test/Transforms/ConstantHoisting/X86/cast-inst.ll
new file mode 100644
index 0000000..f490f4a
--- /dev/null
+++ b/test/Transforms/ConstantHoisting/X86/cast-inst.ll
@@ -0,0 +1,29 @@
+; RUN: opt -S -consthoist < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+; Check if the materialization of the constant and the cast instruction are
+; inserted in the correct order.
+define i32 @cast_inst_test() {
+; CHECK-LABEL:  @cast_inst_test
+; CHECK:        %const = bitcast i64 4646526064 to i64
+; CHECK:        %1 = inttoptr i64 %const to i32*
+; CHECK:        %v0 = load i32* %1, align 16
+; CHECK:        %const_mat = add i64 %const, 16
+; CHECK-NEXT:   %2 = inttoptr i64 %const_mat to i32*
+; CHECK-NEXT:   %v1 = load i32* %2, align 16
+; CHECK:        %const_mat1 = add i64 %const, 32
+; CHECK-NEXT:   %3 = inttoptr i64 %const_mat1 to i32*
+; CHECK-NEXT:   %v2 = load i32* %3, align 16
+  %a0 = inttoptr i64 4646526064 to i32*
+  %v0 = load i32* %a0, align 16
+  %a1 = inttoptr i64 4646526080 to i32*
+  %v1 = load i32* %a1, align 16
+  %a2 = inttoptr i64 4646526096 to i32*
+  %v2 = load i32* %a2, align 16
+  %r0 = add i32 %v0, %v1
+  %r1 = add i32 %r0, %v2
+  ret i32 %r1
+}
+
diff --git a/test/Transforms/ConstantHoisting/X86/delete-dead-cast-inst.ll b/test/Transforms/ConstantHoisting/X86/delete-dead-cast-inst.ll
index f8e478e..d352386 100644
--- a/test/Transforms/ConstantHoisting/X86/delete-dead-cast-inst.ll
+++ b/test/Transforms/ConstantHoisting/X86/delete-dead-cast-inst.ll
@@ -1,5 +1,4 @@
-; Test if this compiles without assertions.
-; RUN: opt -S -consthoist < %s
+; RUN: opt -S -consthoist < %s | FileCheck %s
 
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.9.0"
@@ -7,6 +6,13 @@ target triple = "x86_64-apple-macosx10.9.0"
 %T = type { i32, i32, i32, i32 }
 
 define i32 @test1() nounwind {
+; CHECK-LABEL:  @test1
+; CHECK:        %const = bitcast i32 12345678 to i32
+; CHECK-NOT:    %base = inttoptr i32 12345678 to %T*
+; CHECK-NEXT:   %1 = inttoptr i32 %const to %T*
+; CHECK-NEXT:   %addr1 = getelementptr %T* %1, i32 0, i32 1
+; CHECK-NEXT:   %addr2 = getelementptr %T* %1, i32 0, i32 2
+; CHECK-NEXT:   %addr3 = getelementptr %T* %1, i32 0, i32 3
   %base = inttoptr i32 12345678 to %T*
   %addr1 = getelementptr %T* %base, i32 0, i32 1
   %addr2 = getelementptr %T* %base, i32 0, i32 2
diff --git a/test/Transforms/ConstantHoisting/X86/large-immediate.ll b/test/Transforms/ConstantHoisting/X86/large-immediate.ll
new file mode 100644
index 0000000..e0af9c9
--- /dev/null
+++ b/test/Transforms/ConstantHoisting/X86/large-immediate.ll
@@ -0,0 +1,27 @@
+; RUN: opt -mtriple=x86_64-darwin-unknown -S -consthoist < %s | FileCheck %s
+
+define i128 @test1(i128 %a) nounwind {
+; CHECK-LABEL: test1
+; CHECK: %const = bitcast i128 12297829382473034410122878 to i128
+  %1 = add i128 %a, 12297829382473034410122878
+  %2 = add i128 %1, 12297829382473034410122878
+  ret i128 %2
+}
+
+; Check that we don't hoist the shift value of a shift instruction.
+define i512 @test2(i512 %a) nounwind {
+; CHECK-LABEL: test2
+; CHECK-NOT: %const = bitcast i512 504 to i512
+  %1 = shl i512 %a, 504
+  %2 = ashr i512 %1, 504
+  ret i512 %2
+}
+
+; Check that we don't hoist constants with a type larger than i128.
+define i196 @test3(i196 %a) nounwind {
+; CHECK-LABEL: test3
+; CHECK-NOT: %const = bitcast i196 2 to i196
+  %1 = mul i196 %a, 2
+  %2 = mul i196 %1, 2
+  ret i196 %2
+}
diff --git a/test/Transforms/GVN/load-pre-nonlocal.ll b/test/Transforms/GVN/load-pre-nonlocal.ll
new file mode 100644
index 0000000..7bac1b7
--- /dev/null
+++ b/test/Transforms/GVN/load-pre-nonlocal.ll
@@ -0,0 +1,87 @@
+; RUN: opt -S -o - -basicaa -domtree -gvn %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+%struct.S1 = type { i32, i32 }
+
+@a2 = common global i32* null, align 8
+@a = common global i32* null, align 8
+@s1 = common global %struct.S1 zeroinitializer, align 8
+
+; Check that GVN doesn't determine %2 is partially redundant.
+
+; CHECK-LABEL: define i32 @volatile_load
+; CHECK: for.body:
+; CHECK: %2 = load i32*
+; CHECK: %3 = load volatile i32*
+; CHECK: for.cond.for.end_crit_edge:
+
+define i32 @volatile_load(i32 %n) {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:
+  %0 = load i32** @a2, align 8, !tbaa !1
+  %1 = load i32** @a, align 8, !tbaa !1
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+  %s.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
+  %p.08 = phi i32* [ %0, %for.body.lr.ph ], [ %incdec.ptr, %for.body ]
+  %2 = load i32* %p.08, align 4, !tbaa !5
+  %arrayidx = getelementptr inbounds i32* %1, i64 %indvars.iv
+  store i32 %2, i32* %arrayidx, align 4, !tbaa !5
+  %3 = load volatile i32* %p.08, align 4, !tbaa !5
+  %add = add nsw i32 %3, %s.09
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %incdec.ptr = getelementptr inbounds i32* %p.08, i64 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp ne i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.body, label %for.cond.for.end_crit_edge
+
+for.cond.for.end_crit_edge:
+  %add.lcssa = phi i32 [ %add, %for.body ]
+  br label %for.end
+
+for.end:
+  %s.0.lcssa = phi i32 [ %add.lcssa, %for.cond.for.end_crit_edge ], [ 0, %entry ]
+  ret i32 %s.0.lcssa
+}
+
+; %1 is partially redundant if %0 can be widened to a 64-bit load.
+
+; CHECK-LABEL: define i32 @overaligned_load
+; CHECK: if.end:
+; CHECK-NOT: %1 = load i32*
+
+define i32 @overaligned_load(i32 %a, i32* nocapture %b) {
+entry:
+  %cmp = icmp sgt i32 %a, 0
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  %0 = load i32* getelementptr inbounds (%struct.S1* @s1, i64 0, i32 0), align 8, !tbaa !5
+  br label %if.end
+
+if.else:
+  %arrayidx = getelementptr inbounds i32* %b, i64 2
+  store i32 10, i32* %arrayidx, align 4, !tbaa !5
+  br label %if.end
+
+if.end:
+  %i.0 = phi i32 [ %0, %if.then ], [ 0, %if.else ]
+  %p.0 = phi i32* [ getelementptr inbounds (%struct.S1* @s1, i64 0, i32 0), %if.then ], [ %b, %if.else ]
+  %add.ptr = getelementptr inbounds i32* %p.0, i64 1
+  %1 = load i32* %add.ptr, align 4, !tbaa !5
+  %add1 = add nsw i32 %1, %i.0
+  ret i32 %add1
+}
+
+!1 = metadata !{metadata !2, metadata !2, i64 0}
+!2 = metadata !{metadata !"any pointer", metadata !3, i64 0}
+!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0}
+!4 = metadata !{metadata !"Simple C/C++ TBAA"}
+!5 = metadata !{metadata !6, metadata !6, i64 0}
+!6 = metadata !{metadata !"int", metadata !3, i64 0}
diff --git a/test/Transforms/GlobalDCE/2009-01-05-DeadAliases.ll b/test/Transforms/GlobalDCE/2009-01-05-DeadAliases.ll
index 6658cee..4b96799 100644
--- a/test/Transforms/GlobalDCE/2009-01-05-DeadAliases.ll
+++ b/test/Transforms/GlobalDCE/2009-01-05-DeadAliases.ll
@@ -1,8 +1,18 @@
-; RUN: opt < %s -globaldce -S | not grep @D
-; RUN: opt < %s -globaldce -S | grep @L | count 3
+; RUN: opt < %s -globaldce -S > %t
+; RUN: FileCheck %s < %t
+; RUN: FileCheck --check-prefix=DEAD %s < %t
 
 @A = global i32 0
+; CHECK: @A = global i32 0
+
 @D = alias internal i32* @A
+; DEAD-NOT: @D
+
 @L1 = alias i32* @A
-@L2 = alias internal i32* @L1
-@L3 = alias i32* @L2
+; CHECK: @L1 = alias i32* @A
+
+@L2 = alias internal i32* @A
+; DEAD-NOT: @L2
+
+@L3 = alias i32* @A
+; CHECK: @L3 = alias i32* @A
diff --git a/test/Transforms/GlobalDCE/global_ctors.ll b/test/Transforms/GlobalDCE/global_ctors.ll
new file mode 100644
index 0000000..91bb9ab
--- /dev/null
+++ b/test/Transforms/GlobalDCE/global_ctors.ll
@@ -0,0 +1,14 @@
+; RUN: opt -S -globaldce < %s | FileCheck %s
+
+; CHECK: @llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @_notremovable }]
+; CHECK-NOT: @_GLOBAL__I_a
+
+declare void @_notremovable()
+
+@llvm.global_ctors = appending global [2 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_a }, { i32, void ()* } { i32 65535, void ()* @_notremovable }]
+
+; Function Attrs: nounwind readnone
+define internal void @_GLOBAL__I_a() #1 section "__TEXT,__StaticInit,regular,pure_instructions" {
+entry:
+  ret void
+}
diff --git a/test/Transforms/GlobalDCE/global_ctors_integration.ll b/test/Transforms/GlobalDCE/global_ctors_integration.ll
new file mode 100644
index 0000000..5e6cc79
--- /dev/null
+++ b/test/Transforms/GlobalDCE/global_ctors_integration.ll
@@ -0,0 +1,45 @@
+; RUN: opt -S -O2 < %s | FileCheck %s
+
+; This test checks that -O2 is able to delete constructors that become empty
+; only after some optimization passes have run, even if the pass structure
+; changes.
+; CHECK-NOT: @_GLOBAL__I_a
+
+%class.Foo = type { i32 }
+
+@foo = global %class.Foo zeroinitializer, align 4
+@_ZN3Bar18LINKER_INITIALIZEDE = external constant i32
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_a }]
+
+define internal void @__cxx_global_var_init() section "__TEXT,__StaticInit,regular,pure_instructions" {
+  %1 = load i32* @_ZN3Bar18LINKER_INITIALIZEDE, align 4
+  call void @_ZN3FooC1E17LinkerInitialized(%class.Foo* @foo, i32 %1)
+  ret void
+}
+
+; Function Attrs: ssp uwtable
+define linkonce_odr void @_ZN3FooC1E17LinkerInitialized(%class.Foo* %this, i32) unnamed_addr #0 align 2 {
+  %2 = alloca %class.Foo*, align 8
+  %3 = alloca i32, align 4
+  store %class.Foo* %this, %class.Foo** %2, align 8
+  store i32 %0, i32* %3, align 4
+  %4 = load %class.Foo** %2
+  %5 = load i32* %3, align 4
+  call void @_ZN3FooC2E17LinkerInitialized(%class.Foo* %4, i32 %5)
+  ret void
+}
+
+; Function Attrs: nounwind ssp uwtable
+define linkonce_odr void @_ZN3FooC2E17LinkerInitialized(%class.Foo* %this, i32) unnamed_addr #1 align 2 {
+  %2 = alloca %class.Foo*, align 8
+  %3 = alloca i32, align 4
+  store %class.Foo* %this, %class.Foo** %2, align 8
+  store i32 %0, i32* %3, align 4
+  %4 = load %class.Foo** %2
+  ret void
+}
+
+define internal void @_GLOBAL__I_a() section "__TEXT,__StaticInit,regular,pure_instructions" {
+  call void @__cxx_global_var_init()
+  ret void
+}
diff --git a/test/Transforms/GlobalMerge/ARM64/arm64.ll b/test/Transforms/GlobalMerge/AArch64/arm64.ll
index eea474a..eea474a 100644
--- a/test/Transforms/GlobalMerge/ARM64/arm64.ll
+++ b/test/Transforms/GlobalMerge/AArch64/arm64.ll
diff --git a/test/Transforms/GlobalMerge/AArch64/lit.local.cfg b/test/Transforms/GlobalMerge/AArch64/lit.local.cfg
new file mode 100644
index 0000000..9a66a00
--- /dev/null
+++ b/test/Transforms/GlobalMerge/AArch64/lit.local.cfg
@@ -0,0 +1,4 @@
+targets = set(config.root.targets_to_build.split())
+if not 'AArch64' in targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/GlobalMerge/ARM64/lit.local.cfg b/test/Transforms/GlobalMerge/ARM64/lit.local.cfg
deleted file mode 100644
index a75a42b..0000000
--- a/test/Transforms/GlobalMerge/ARM64/lit.local.cfg
+++ /dev/null
@@ -1,4 +0,0 @@
-targets = set(config.root.targets_to_build.split())
-if not 'ARM64' in targets:
-    config.unsupported = True
-
diff --git a/test/Transforms/GlobalOpt/2009-02-15-BitcastAlias.ll b/test/Transforms/GlobalOpt/2009-02-15-BitcastAlias.ll
index d6a565a..03d6ee4 100644
--- a/test/Transforms/GlobalOpt/2009-02-15-BitcastAlias.ll
+++ b/test/Transforms/GlobalOpt/2009-02-15-BitcastAlias.ll
@@ -2,7 +2,7 @@
 
 @g = global i32 0
 
-@a = alias bitcast (i32* @g to i8*)
+@a = alias i8, i32* @g
 
 define void @f() {
 	%tmp = load i8* @a
diff --git a/test/Transforms/GlobalOpt/alias-resolve.ll b/test/Transforms/GlobalOpt/alias-resolve.ll
index 2d5a956..bd07b31 100644
--- a/test/Transforms/GlobalOpt/alias-resolve.ll
+++ b/test/Transforms/GlobalOpt/alias-resolve.ll
@@ -1,9 +1,9 @@
 ; RUN: opt < %s -globalopt -S | FileCheck %s
 
-@foo1 = alias void ()* @foo2
+@foo1 = alias void ()* @bar2
 ; CHECK: @foo1 = alias void ()* @bar2
 
-@foo2 = alias void()* @bar1
+@foo2 = alias void()* @bar2
 ; CHECK: @foo2 = alias void ()* @bar2
 
 @bar1  = alias void ()* @bar2
diff --git a/test/Transforms/GlobalOpt/alias-used-section.ll b/test/Transforms/GlobalOpt/alias-used-section.ll
index 987c4a4..1217937 100644
--- a/test/Transforms/GlobalOpt/alias-used-section.ll
+++ b/test/Transforms/GlobalOpt/alias-used-section.ll
@@ -1,8 +1,8 @@
 ; RUN: opt -S -globalopt < %s | FileCheck %s
 
 @_Z17in_custom_section = internal global i8 42, section "CUSTOM"
-@in_custom_section = protected dllexport alias internal i8* @_Z17in_custom_section
+@in_custom_section = dllexport alias internal i8* @_Z17in_custom_section
 
-; CHECK: @in_custom_section = internal protected dllexport global i8 42, section "CUSTOM"
+; CHECK: @in_custom_section = internal dllexport global i8 42, section "CUSTOM"
 
 @llvm.used = appending global [1 x i8*] [i8* @in_custom_section], section "llvm.metadata"
diff --git a/test/Transforms/GlobalOpt/atexit.ll b/test/Transforms/GlobalOpt/atexit.ll
index dbcd0d7..55c2dab 100644
--- a/test/Transforms/GlobalOpt/atexit.ll
+++ b/test/Transforms/GlobalOpt/atexit.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -globalopt -S | FileCheck %s
 
 ; CHECK: ModuleID
-define internal hidden i32 @__cxa_atexit(void (i8*)* nocapture %func, i8* nocapture %arg, i8* nocapture %dso_handle) nounwind readnone optsize noimplicitfloat {
+define internal i32 @__cxa_atexit(void (i8*)* nocapture %func, i8* nocapture %arg, i8* nocapture %dso_handle) nounwind readnone optsize noimplicitfloat {
   unreachable
 }
diff --git a/test/Transforms/GlobalOpt/ctor-list-opt.ll b/test/Transforms/GlobalOpt/ctor-list-opt.ll
index 542c786..450bdb8 100644
--- a/test/Transforms/GlobalOpt/ctor-list-opt.ll
+++ b/test/Transforms/GlobalOpt/ctor-list-opt.ll
@@ -1,5 +1,20 @@
-; RUN: opt < %s -globalopt -S | not grep CTOR
-@llvm.global_ctors = appending global [11 x { i32, void ()* }] [ { i32, void ()* } { i32 65535, void ()* @CTOR1 }, { i32, void ()* } { i32 65535, void ()* @CTOR1 }, { i32, void ()* } { i32 65535, void ()* @CTOR2 }, { i32, void ()* } { i32 65535, void ()* @CTOR3 }, { i32, void ()* } { i32 65535, void ()* @CTOR4 }, { i32, void ()* } { i32 65535, void ()* @CTOR5 }, { i32, void ()* } { i32 65535, void ()* @CTOR6 }, { i32, void ()* } { i32 65535, void ()* @CTOR7 }, { i32, void ()* } { i32 65535, void ()* @CTOR8 }, { i32, void ()* } { i32 65535, void ()* @CTOR9 }, { i32, void ()* } { i32 2147483647, void ()* null } ]		; <[10 x { i32, void ()* }]*> [#uses=0]
+; RUN: opt < %s -globalopt -S | FileCheck %s
+; CHECK-NOT: CTOR
+%ini = type { i32, void()*, i8* }
+@llvm.global_ctors = appending global [11 x %ini] [
+	%ini { i32 65535, void ()* @CTOR1, i8* null },
+	%ini { i32 65535, void ()* @CTOR1, i8* null },
+	%ini { i32 65535, void ()* @CTOR2, i8* null },
+	%ini { i32 65535, void ()* @CTOR3, i8* null },
+	%ini { i32 65535, void ()* @CTOR4, i8* null },
+	%ini { i32 65535, void ()* @CTOR5, i8* null },
+	%ini { i32 65535, void ()* @CTOR6, i8* null },
+	%ini { i32 65535, void ()* @CTOR7, i8* null },
+	%ini { i32 65535, void ()* @CTOR8, i8* null },
+	%ini { i32 65535, void ()* @CTOR9, i8* null },
+	%ini { i32 2147483647, void ()* null, i8* null }
+]
+
 @G = global i32 0		; <i32*> [#uses=1]
 @G2 = global i32 0		; <i32*> [#uses=1]
 @G3 = global i32 -123		; <i32*> [#uses=2]
diff --git a/test/Transforms/IPConstantProp/2009-09-24-byval-ptr.ll b/test/Transforms/IPConstantProp/2009-09-24-byval-ptr.ll
index bd174a8..4ea0b88 100644
--- a/test/Transforms/IPConstantProp/2009-09-24-byval-ptr.ll
+++ b/test/Transforms/IPConstantProp/2009-09-24-byval-ptr.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as <%s | opt -ipsccp | llvm-dis | FileCheck %s
+; RUN: opt < %s -ipsccp -S | FileCheck %s
 ; Don't constant-propagate byval pointers, since they are not pointers!
 ; PR5038
 %struct.MYstr = type { i8, i32 }
diff --git a/test/Transforms/IndVarSimplify/pr18223.ll b/test/Transforms/IndVarSimplify/pr18223.ll
new file mode 100644
index 0000000..738f75c
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/pr18223.ll
@@ -0,0 +1,30 @@
+; RUN: opt -indvars -S < %s | FileCheck %s
+
+; indvars should transform the phi node pair from the for-loop
+; CHECK-LABEL: @main(
+; CHECK: ret = phi i32 [ 0, %entry ], [ 0, {{.*}} ]
+
+@c = common global i32 0, align 4
+
+define i32 @main() #0 {
+entry:
+  %0 = load i32* @c, align 4
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %for.body, label %exit
+
+for.body:
+  %inc2 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %sub = add i32 %inc2, -1
+  %cmp1 = icmp uge i32 %sub, %inc2
+  %conv = zext i1 %cmp1 to i32
+  br label %for.inc
+
+for.inc:
+  %inc = add nsw i32 %inc2, 1
+  %cmp = icmp slt i32 %inc, 5
+  br i1 %cmp, label %for.body, label %exit
+
+exit:
+  %ret = phi i32 [ 0, %entry ], [ %conv, %for.inc ]
+  ret i32 %ret
+}
diff --git a/test/Transforms/Inline/2010-05-31-ByvalTailcall.ll b/test/Transforms/Inline/2010-05-31-ByvalTailcall.ll
deleted file mode 100644
index b37b9f2..0000000
--- a/test/Transforms/Inline/2010-05-31-ByvalTailcall.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: opt < %s -tailcallelim -inline -instcombine -dse -S | FileCheck %s
-; PR7272
-
-; When inlining through a byval call site, the inliner creates allocas which may
-; be used by inlined calls, so any inlined calls need to have their 'tail' flags
-; cleared.  If not then you can get nastiness like with this testcase, where the
-; (inlined) call to 'ext' in 'foo' was being passed an uninitialized value.
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
-target triple = "i386-pc-linux-gnu"
-
-declare void @ext(i32*)
-
-define void @bar(i32* byval %x) {
-  call void @ext(i32* %x)
-  ret void
-}
-
-define void @foo(i32* %x) {
-; CHECK-LABEL: define void @foo(
-; CHECK: store i32 %1, i32* %x
-  call void @bar(i32* byval %x)
-  ret void
-}
diff --git a/test/Transforms/Inline/always-inline.ll b/test/Transforms/Inline/always-inline.ll
index a8703b8..5ad1bde 100644
--- a/test/Transforms/Inline/always-inline.ll
+++ b/test/Transforms/Inline/always-inline.ll
@@ -122,3 +122,14 @@ entry:
   ret void
 }
 
+define i32 @inner7() {
+  ret i32 1
+}
+define i32 @outer7() {
+; CHECK-LABEL: @outer7(
+; CHECK-NOT: call
+; CHECK: ret
+
+   %r = call i32 @inner7() alwaysinline
+   ret i32 %r
+}
diff --git a/test/Transforms/Inline/byval-tail-call.ll b/test/Transforms/Inline/byval-tail-call.ll
new file mode 100644
index 0000000..3a8906a
--- /dev/null
+++ b/test/Transforms/Inline/byval-tail-call.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -tailcallelim -inline -instcombine -dse -S | FileCheck %s
+; PR7272
+
+; Calls that capture byval parameters cannot be marked as tail calls. Other
+; tails that don't capture byval parameters can still be tail calls.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-pc-linux-gnu"
+
+declare void @ext(i32*)
+
+define void @bar(i32* byval %x) {
+  call void @ext(i32* %x)
+  ret void
+}
+
+define void @foo(i32* %x) {
+; CHECK-LABEL: define void @foo(
+; CHECK: llvm.lifetime.start
+; CHECK: store i32 %2, i32* %x
+  call void @bar(i32* byval %x)
+  ret void
+}
+
+define internal void @qux(i32* byval %x) {
+  call void @ext(i32* %x)
+  tail call void @ext(i32* null)
+  ret void
+}
+define void @frob(i32* %x) {
+; CHECK-LABEL: define void @frob(
+; CHECK: alloca i32
+; CHECK: {{^ *}}call void @ext(
+; CHECK: tail call void @ext(i32* null)
+; CHECK: ret void
+  tail call void @qux(i32* byval %x)
+  ret void
+}
diff --git a/test/Transforms/Inline/byval_lifetime.ll b/test/Transforms/Inline/byval_lifetime.ll
new file mode 100644
index 0000000..e8dff2a
--- /dev/null
+++ b/test/Transforms/Inline/byval_lifetime.ll
@@ -0,0 +1,26 @@
+; RUN: opt -S -inline < %s | FileCheck %s
+; END.
+
+; By inlining foo, an alloca is created in main to hold the byval argument, so
+; a lifetime marker should be generated as well by default.
+
+%struct.foo = type { i32, [16 x i32] }
+
+@gFoo = global %struct.foo zeroinitializer, align 8
+
+define i32 @foo(%struct.foo* byval align 8 %f, i32 %a) {
+entry:
+  %a1 = getelementptr inbounds %struct.foo* %f, i32 0, i32 1
+  %arrayidx = getelementptr inbounds [16 x i32]* %a1, i32 0, i32 %a
+  %tmp2 = load i32* %arrayidx, align 1
+  ret i32 %tmp2
+}
+
+define i32 @main(i32 %argc, i8** %argv) {
+; CHECK-LABEL: @main
+; CHECK: llvm.lifetime.start
+; CHECK: memcpy
+entry:
+  %call = call i32 @foo(%struct.foo* byval align 8 @gFoo, i32 %argc)
+  ret i32 %call
+}
diff --git a/test/Transforms/Inline/inline-cold.ll b/test/Transforms/Inline/inline-cold.ll
index bb8c008..5743377 100644
--- a/test/Transforms/Inline/inline-cold.ll
+++ b/test/Transforms/Inline/inline-cold.ll
@@ -1,8 +1,15 @@
 ; RUN: opt < %s -inline -S -inlinecold-threshold=75 | FileCheck %s
-
 ; Test that functions with attribute Cold are not inlined while the 
 ; same function without attribute Cold will be inlined.
 
+; RUN: opt < %s -inline -S -inline-threshold=600 | FileCheck %s -check-prefix=OVERRIDE
+; The command line argument for inline-threshold should override
+; the default cold threshold, so a cold function with size bigger
+; than the default cold threshold (225) will be inlined.
+
+; RUN: opt < %s -inline -S | FileCheck %s -check-prefix=DEFAULT
+; The same cold function will not be inlined with the default behavior.
+
 @a = global i32 4
 
 ; This function should be larger than the cold threshold (75), but smaller
@@ -42,6 +49,10 @@ entry:
 define i32 @ColdFunction(i32 %a) #1 {
 ; CHECK-LABEL: @ColdFunction
 ; CHECK: ret
+; OVERRIDE-LABEL: @ColdFunction
+; OVERRIDE: ret
+; DEFAULT-LABEL: @ColdFunction
+; DEFAULT: ret
 entry:
   %a1 = load volatile i32* @a
   %x1 = add i32 %a1,  %a1
@@ -71,16 +82,117 @@ entry:
   ret i32 %add
 }
 
+; This function should be larger than the default cold threshold (225).
+define i32 @ColdFunction2(i32 %a) #1 {
+; CHECK-LABEL: @ColdFunction2
+; CHECK: ret
+; OVERRIDE-LABEL: @ColdFunction2
+; OVERRIDE: ret
+; DEFAULT-LABEL: @ColdFunction2
+; DEFAULT: ret
+entry:
+  %a1 = load volatile i32* @a
+  %x1 = add i32 %a1,  %a1
+  %a2 = load volatile i32* @a
+  %x2 = add i32 %x1, %a2
+  %a3 = load volatile i32* @a
+  %x3 = add i32 %x2, %a3
+  %a4 = load volatile i32* @a
+  %x4 = add i32 %x3, %a4
+  %a5 = load volatile i32* @a
+  %x5 = add i32 %x4, %a5
+  %a6 = load volatile i32* @a
+  %x6 = add i32 %x5, %a6
+  %a7 = load volatile i32* @a
+  %x7 = add i32 %x6, %a7
+  %a8 = load volatile i32* @a
+  %x8 = add i32 %x7, %a8
+  %a9 = load volatile i32* @a
+  %x9 = add i32 %x8, %a9
+  %a10 = load volatile i32* @a
+  %x10 = add i32 %x9, %a10
+  %a11 = load volatile i32* @a
+  %x11 = add i32 %x10, %a11
+  %a12 = load volatile i32* @a
+  %x12 = add i32 %x11, %a12
+
+  %a21 = load volatile i32* @a
+  %x21 = add i32 %x12, %a21
+  %a22 = load volatile i32* @a
+  %x22 = add i32 %x21, %a22
+  %a23 = load volatile i32* @a
+  %x23 = add i32 %x22, %a23
+  %a24 = load volatile i32* @a
+  %x24 = add i32 %x23, %a24
+  %a25 = load volatile i32* @a
+  %x25 = add i32 %x24, %a25
+  %a26 = load volatile i32* @a
+  %x26 = add i32 %x25, %a26
+  %a27 = load volatile i32* @a
+  %x27 = add i32 %x26, %a27
+  %a28 = load volatile i32* @a
+  %x28 = add i32 %x27, %a28
+  %a29 = load volatile i32* @a
+  %x29 = add i32 %x28, %a29
+  %a30 = load volatile i32* @a
+  %x30 = add i32 %x29, %a30
+  %a31 = load volatile i32* @a
+  %x31 = add i32 %x30, %a31
+  %a32 = load volatile i32* @a
+  %x32 = add i32 %x31, %a32
+
+  %a41 = load volatile i32* @a
+  %x41 = add i32 %x32, %a41
+  %a42 = load volatile i32* @a
+  %x42 = add i32 %x41, %a42
+  %a43 = load volatile i32* @a
+  %x43 = add i32 %x42, %a43
+  %a44 = load volatile i32* @a
+  %x44 = add i32 %x43, %a44
+  %a45 = load volatile i32* @a
+  %x45 = add i32 %x44, %a45
+  %a46 = load volatile i32* @a
+  %x46 = add i32 %x45, %a46
+  %a47 = load volatile i32* @a
+  %x47 = add i32 %x46, %a47
+  %a48 = load volatile i32* @a
+  %x48 = add i32 %x47, %a48
+  %a49 = load volatile i32* @a
+  %x49 = add i32 %x48, %a49
+  %a50 = load volatile i32* @a
+  %x50 = add i32 %x49, %a50
+  %a51 = load volatile i32* @a
+  %x51 = add i32 %x50, %a51
+  %a52 = load volatile i32* @a
+  %x52 = add i32 %x51, %a52
+
+  %add = add i32 %x52, %a
+  ret i32 %add
+}
+
 ; Function Attrs: nounwind readnone uwtable
 define i32 @bar(i32 %a) #0 {
 ; CHECK-LABEL: @bar
 ; CHECK: call i32 @ColdFunction(i32 5)
 ; CHECK-NOT: call i32 @simpleFunction(i32 6)
+; CHECK: call i32 @ColdFunction2(i32 5)
 ; CHECK: ret
+; OVERRIDE-LABEL: @bar
+; OVERRIDE-NOT: call i32 @ColdFunction(i32 5)
+; OVERRIDE-NOT: call i32 @simpleFunction(i32 6)
+; OVERRIDE-NOT: call i32 @ColdFunction2(i32 5)
+; OVERRIDE: ret
+; DEFAULT-LABEL: @bar
+; DEFAULT-NOT: call i32 @ColdFunction(i32 5)
+; DEFAULT-NOT: call i32 @simpleFunction(i32 6)
+; DEFAULT: call i32 @ColdFunction2(i32 5)
+; DEFAULT: ret
 entry:
   %0 = tail call i32 @ColdFunction(i32 5)
   %1 = tail call i32 @simpleFunction(i32 6)
-  %add = add i32 %0, %1
+  %2 = tail call i32 @ColdFunction2(i32 5)
+  %3 = add i32 %0, %1
+  %add = add i32 %2, %3
   ret i32 %add
 }
 
diff --git a/test/Transforms/Inline/inline-tail.ll b/test/Transforms/Inline/inline-tail.ll
index 8bb059d..b40328e 100644
--- a/test/Transforms/Inline/inline-tail.ll
+++ b/test/Transforms/Inline/inline-tail.ll
@@ -1,15 +1,182 @@
-; RUN: opt < %s -inline -S | not grep tail
+; RUN: opt < %s -inline -S | FileCheck %s
 
-declare void @bar(i32*)
+; We have to apply the less restrictive TailCallKind of the call site being
+; inlined and any call sites cloned into the caller.
 
-define internal void @foo(i32* %P) {
-        tail call void @bar( i32* %P )
-        ret void
+; No tail marker after inlining, since test_capture_c captures an alloca.
+; CHECK: define void @test_capture_a(
+; CHECK-NOT: tail
+; CHECK: call void @test_capture_c(
+
+declare void @test_capture_c(i32*)
+define internal void @test_capture_b(i32* %P) {
+  tail call void @test_capture_c(i32* %P)
+  ret void
+}
+define void @test_capture_a() {
+  %A = alloca i32  		; captured by test_capture_b
+  call void @test_capture_b(i32* %A)
+  ret void
+}
+
+; No musttail marker after inlining, since the prototypes don't match.
+; CHECK: define void @test_proto_mismatch_a(
+; CHECK-NOT: musttail
+; CHECK: call void @test_proto_mismatch_c(
+
+declare void @test_proto_mismatch_c(i32*)
+define internal void @test_proto_mismatch_b(i32* %p) {
+  musttail call void @test_proto_mismatch_c(i32* %p)
+  ret void
+}
+define void @test_proto_mismatch_a() {
+  call void @test_proto_mismatch_b(i32* null)
+  ret void
+}
+
+; After inlining through a musttail call site, we need to keep musttail markers
+; to prevent unbounded stack growth.
+; CHECK: define void @test_musttail_basic_a(
+; CHECK: musttail call void @test_musttail_basic_c(
+
+declare void @test_musttail_basic_c(i32* %p)
+define internal void @test_musttail_basic_b(i32* %p) {
+  musttail call void @test_musttail_basic_c(i32* %p)
+  ret void
+}
+define void @test_musttail_basic_a(i32* %p) {
+  musttail call void @test_musttail_basic_b(i32* %p)
+  ret void
+}
+
+; Don't insert lifetime end markers here, the lifetime is trivially over due
+; the return.
+; CHECK: define void @test_byval_a(
+; CHECK: musttail call void @test_byval_c(
+; CHECK-NEXT: ret void
+
+declare void @test_byval_c(i32* byval %p)
+define internal void @test_byval_b(i32* byval %p) {
+  musttail call void @test_byval_c(i32* byval %p)
+  ret void
+}
+define void @test_byval_a(i32* byval %p) {
+  musttail call void @test_byval_b(i32* byval %p)
+  ret void
 }
 
-define void @caller() {
-        %A = alloca i32         ; <i32*> [#uses=1]
-        call void @foo( i32* %A )
-        ret void
+; Don't insert a stack restore, we're about to return.
+; CHECK: define void @test_dynalloca_a(
+; CHECK: call i8* @llvm.stacksave(
+; CHECK: alloca i8, i32 %n
+; CHECK: musttail call void @test_dynalloca_c(
+; CHECK-NEXT: ret void
+
+declare void @escape(i8* %buf)
+declare void @test_dynalloca_c(i32* byval %p, i32 %n)
+define internal void @test_dynalloca_b(i32* byval %p, i32 %n) alwaysinline {
+  %buf = alloca i8, i32 %n              ; dynamic alloca
+  call void @escape(i8* %buf)           ; escape it
+  musttail call void @test_dynalloca_c(i32* byval %p, i32 %n)
+  ret void
+}
+define void @test_dynalloca_a(i32* byval %p, i32 %n) {
+  musttail call void @test_dynalloca_b(i32* byval %p, i32 %n)
+  ret void
 }
 
+; We can't merge the returns.
+; CHECK: define void @test_multiret_a(
+; CHECK: musttail call void @test_multiret_c(
+; CHECK-NEXT: ret void
+; CHECK: musttail call void @test_multiret_d(
+; CHECK-NEXT: ret void
+
+declare void @test_multiret_c(i1 zeroext %b)
+declare void @test_multiret_d(i1 zeroext %b)
+define internal void @test_multiret_b(i1 zeroext %b) {
+  br i1 %b, label %c, label %d
+c:
+  musttail call void @test_multiret_c(i1 zeroext %b)
+  ret void
+d:
+  musttail call void @test_multiret_d(i1 zeroext %b)
+  ret void
+}
+define void @test_multiret_a(i1 zeroext %b) {
+  musttail call void @test_multiret_b(i1 zeroext %b)
+  ret void
+}
+
+; We have to avoid bitcast chains.
+; CHECK: define i32* @test_retptr_a(
+; CHECK: musttail call i8* @test_retptr_c(
+; CHECK-NEXT: bitcast i8* {{.*}} to i32*
+; CHECK-NEXT: ret i32*
+
+declare i8* @test_retptr_c()
+define internal i16* @test_retptr_b() {
+  %rv = musttail call i8* @test_retptr_c()
+  %v = bitcast i8* %rv to i16*
+  ret i16* %v
+}
+define i32* @test_retptr_a() {
+  %rv = musttail call i16* @test_retptr_b()
+  %v = bitcast i16* %rv to i32*
+  ret i32* %v
+}
+
+; Combine the last two cases: multiple returns with pointer bitcasts.
+; CHECK: define i32* @test_multiptrret_a(
+; CHECK: musttail call i8* @test_multiptrret_c(
+; CHECK-NEXT: bitcast i8* {{.*}} to i32*
+; CHECK-NEXT: ret i32*
+; CHECK: musttail call i8* @test_multiptrret_d(
+; CHECK-NEXT: bitcast i8* {{.*}} to i32*
+; CHECK-NEXT: ret i32*
+
+declare i8* @test_multiptrret_c(i1 zeroext %b)
+declare i8* @test_multiptrret_d(i1 zeroext %b)
+define internal i16* @test_multiptrret_b(i1 zeroext %b) {
+  br i1 %b, label %c, label %d
+c:
+  %c_rv = musttail call i8* @test_multiptrret_c(i1 zeroext %b)
+  %c_v = bitcast i8* %c_rv to i16*
+  ret i16* %c_v
+d:
+  %d_rv = musttail call i8* @test_multiptrret_d(i1 zeroext %b)
+  %d_v = bitcast i8* %d_rv to i16*
+  ret i16* %d_v
+}
+define i32* @test_multiptrret_a(i1 zeroext %b) {
+  %rv = musttail call i16* @test_multiptrret_b(i1 zeroext %b)
+  %v = bitcast i16* %rv to i32*
+  ret i32* %v
+}
+
+; Inline a musttail call site which contains a normal return and a musttail call.
+; CHECK: define i32 @test_mixedret_a(
+; CHECK: br i1 %b
+; CHECK: musttail call i32 @test_mixedret_c(
+; CHECK-NEXT: ret i32
+; CHECK: call i32 @test_mixedret_d(i1 zeroext %b)
+; CHECK: add i32 1,
+; CHECK-NOT: br
+; CHECK: ret i32
+
+declare i32 @test_mixedret_c(i1 zeroext %b)
+declare i32 @test_mixedret_d(i1 zeroext %b)
+define internal i32 @test_mixedret_b(i1 zeroext %b) {
+  br i1 %b, label %c, label %d
+c:
+  %c_rv = musttail call i32 @test_mixedret_c(i1 zeroext %b)
+  ret i32 %c_rv
+d:
+  %d_rv = call i32 @test_mixedret_d(i1 zeroext %b)
+  %d_rv1 = add i32 1, %d_rv
+  ret i32 %d_rv1
+}
+define i32 @test_mixedret_a(i1 zeroext %b) {
+  %rv = musttail call i32 @test_mixedret_b(i1 zeroext %b)
+  ret i32 %rv
+}
diff --git a/test/Transforms/Inline/inline-vla.ll b/test/Transforms/Inline/inline-vla.ll
new file mode 100644
index 0000000..dc9deaa
--- /dev/null
+++ b/test/Transforms/Inline/inline-vla.ll
@@ -0,0 +1,38 @@
+; RUN: opt -S -inline %s -o - | FileCheck %s
+
+; Check that memcpy2 is completely inlined away.
+; CHECK-NOT: memcpy2
+
+@.str = private unnamed_addr constant [2 x i8] c"a\00", align 1
+@.str1 = private unnamed_addr constant [3 x i8] c"ab\00", align 1
+
+; Function Attrs: nounwind ssp uwtable
+define i32 @main(i32 %argc, i8** nocapture readnone %argv) #0 {
+entry:
+  %data = alloca [2 x i8], align 1
+  %arraydecay = getelementptr inbounds [2 x i8]* %data, i64 0, i64 0
+  call fastcc void @memcpy2(i8* %arraydecay, i8* getelementptr inbounds ([2 x i8]* @.str, i64 0, i64 0), i64 1)
+  call fastcc void @memcpy2(i8* %arraydecay, i8* getelementptr inbounds ([3 x i8]* @.str1, i64 0, i64 0), i64 2)
+  ret i32 0
+}
+
+; Function Attrs: inlinehint nounwind ssp uwtable
+define internal fastcc void @memcpy2(i8* nocapture %dst, i8* nocapture readonly %src, i64 %size) #1 {
+entry:
+  %vla = alloca i64, i64 %size, align 16
+  %0 = bitcast i64* %vla to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %src, i64 %size, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %0, i64 %size, i32 1, i1 false)
+  ret void
+}
+
+; Function Attrs: nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #2
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { inlinehint nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind }
+
+!llvm.ident = !{!0}
+
+!0 = metadata !{metadata !"clang version 3.5.0 (trunk 205695) (llvm/trunk 205706)"}
diff --git a/test/Transforms/Inline/optimization-remarks.ll b/test/Transforms/Inline/optimization-remarks.ll
new file mode 100644
index 0000000..9108f3a
--- /dev/null
+++ b/test/Transforms/Inline/optimization-remarks.ll
@@ -0,0 +1,60 @@
+; RUN: opt < %s -inline -pass-remarks=inline -pass-remarks-missed=inline -pass-remarks-analysis=inline -S 2>&1 | FileCheck %s
+
+; CHECK: foo should always be inlined (cost=always)
+; CHECK: foo inlined into bar
+; CHECK: foz should never be inlined (cost=never)
+; CHECK: foz will not be inlined into bar
+
+; Function Attrs: alwaysinline nounwind uwtable
+define i32 @foo(i32 %x, i32 %y) #0 {
+entry:
+  %x.addr = alloca i32, align 4
+  %y.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  store i32 %y, i32* %y.addr, align 4
+  %0 = load i32* %x.addr, align 4
+  %1 = load i32* %y.addr, align 4
+  %add = add nsw i32 %0, %1
+  ret i32 %add
+}
+
+; Function Attrs: noinline nounwind uwtable
+define float @foz(i32 %x, i32 %y) #1 {
+entry:
+  %x.addr = alloca i32, align 4
+  %y.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  store i32 %y, i32* %y.addr, align 4
+  %0 = load i32* %x.addr, align 4
+  %1 = load i32* %y.addr, align 4
+  %mul = mul nsw i32 %0, %1
+  %conv = sitofp i32 %mul to float
+  ret float %conv
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @bar(i32 %j) #2 {
+entry:
+  %j.addr = alloca i32, align 4
+  store i32 %j, i32* %j.addr, align 4
+  %0 = load i32* %j.addr, align 4
+  %1 = load i32* %j.addr, align 4
+  %sub = sub nsw i32 %1, 2
+  %call = call i32 @foo(i32 %0, i32 %sub)
+  %conv = sitofp i32 %call to float
+  %2 = load i32* %j.addr, align 4
+  %sub1 = sub nsw i32 %2, 2
+  %3 = load i32* %j.addr, align 4
+  %call2 = call float @foz(i32 %sub1, i32 %3)
+  %mul = fmul float %conv, %call2
+  %conv3 = fptosi float %mul to i32
+  ret i32 %conv3
+}
+
+attributes #0 = { alwaysinline nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { noinline nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.ident = !{!0}
+
+!0 = metadata !{metadata !"clang version 3.5.0 "}
diff --git a/test/Transforms/Inline/switch.ll b/test/Transforms/Inline/switch.ll
new file mode 100644
index 0000000..c5dab53
--- /dev/null
+++ b/test/Transforms/Inline/switch.ll
@@ -0,0 +1,60 @@
+; RUN: opt < %s -inline -inline-threshold=20 -S | FileCheck %s
+
+define i32 @callee(i32 %a) {
+  switch i32 %a, label %sw.default [
+    i32 0, label %sw.bb0
+    i32 1, label %sw.bb1
+    i32 2, label %sw.bb2
+    i32 3, label %sw.bb3
+    i32 4, label %sw.bb4
+    i32 5, label %sw.bb5
+    i32 6, label %sw.bb6
+    i32 7, label %sw.bb7
+    i32 8, label %sw.bb8
+    i32 9, label %sw.bb9
+  ]
+
+sw.default:
+  br label %return
+
+sw.bb0:
+  br label %return
+
+sw.bb1:
+  br label %return
+
+sw.bb2:
+  br label %return
+
+sw.bb3:
+  br label %return
+
+sw.bb4:
+  br label %return
+
+sw.bb5:
+  br label %return
+
+sw.bb6:
+  br label %return
+
+sw.bb7:
+  br label %return
+
+sw.bb8:
+  br label %return
+
+sw.bb9:
+  br label %return
+
+return:
+  ret i32 42
+}
+
+define i32 @caller(i32 %a) {
+; CHECK-LABEL: @caller(
+; CHECK: call i32 @callee(
+
+  %result = call i32 @callee(i32 %a)
+  ret i32 %result
+}
diff --git a/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll b/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll
index 1883a8f..39408a2 100644
--- a/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll
+++ b/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll
@@ -68,7 +68,7 @@ declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>) nounwind rea
 
 define <4 x i32> @mulByZeroARM64(<4 x i16> %x) nounwind readnone ssp {
 entry:
-  %a = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %x, <4 x i16> zeroinitializer) nounwind
+  %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %x, <4 x i16> zeroinitializer) nounwind
   ret <4 x i32> %a
 ; CHECK: entry:
 ; CHECK-NEXT: ret <4 x i32> zeroinitializer
@@ -76,7 +76,7 @@ entry:
 
 define <4 x i32> @mulByOneARM64(<4 x i16> %x) nounwind readnone ssp {
 entry:
-  %a = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> %x, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind
+  %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %x, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind
   ret <4 x i32> %a
 ; CHECK: entry:
 ; CHECK-NEXT: %a = sext <4 x i16> %x to <4 x i32>
@@ -85,7 +85,7 @@ entry:
 
 define <4 x i32> @constantMulARM64() nounwind readnone ssp {
 entry:
-  %a = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> <i16 3, i16 3, i16 3, i16 3>, <4 x i16> <i16 2, i16 2, i16 2, i16 2>) nounwind
+  %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> <i16 3, i16 3, i16 3, i16 3>, <4 x i16> <i16 2, i16 2, i16 2, i16 2>) nounwind
   ret <4 x i32> %a
 ; CHECK: entry:
 ; CHECK-NEXT: ret <4 x i32> <i32 6, i32 6, i32 6, i32 6>
@@ -93,7 +93,7 @@ entry:
 
 define <4 x i32> @constantMulSARM64() nounwind readnone ssp {
 entry:
-  %b = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind
+  %b = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind
   ret <4 x i32> %b
 ; CHECK: entry:
 ; CHECK-NEXT: ret <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -101,7 +101,7 @@ entry:
 
 define <4 x i32> @constantMulUARM64() nounwind readnone ssp {
 entry:
-  %b = tail call <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind
+  %b = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind
   ret <4 x i32> %b
 ; CHECK: entry:
 ; CHECK-NEXT: ret <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>
@@ -109,17 +109,17 @@ entry:
 
 define <4 x i32> @complex1ARM64(<4 x i16> %x) nounwind readnone ssp {
 entry:
-  %a = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> <i16 2, i16 2, i16 2, i16 2>, <4 x i16> %x) nounwind
+  %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> <i16 2, i16 2, i16 2, i16 2>, <4 x i16> %x) nounwind
   %b = add <4 x i32> zeroinitializer, %a
   ret <4 x i32> %b
 ; CHECK: entry:
-; CHECK-NEXT: %a = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> <i16 2, i16 2, i16 2, i16 2>, <4 x i16> %x) [[NUW:#[0-9]+]]
+; CHECK-NEXT: %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> <i16 2, i16 2, i16 2, i16 2>, <4 x i16> %x) [[NUW:#[0-9]+]]
 ; CHECK-NEXT: ret <4 x i32> %a
 }
 
 define <4 x i32> @complex2ARM64(<4 x i32> %x) nounwind readnone ssp {
 entry:
-  %a = tail call <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16> <i16 3, i16 3, i16 3, i16 3>, <4 x i16> <i16 2, i16 2, i16 2, i16 2>) nounwind
+  %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> <i16 3, i16 3, i16 3, i16 3>, <4 x i16> <i16 2, i16 2, i16 2, i16 2>) nounwind
   %b = add <4 x i32> %x, %a
   ret <4 x i32> %b
 ; CHECK: entry:
@@ -127,8 +127,8 @@ entry:
 ; CHECK-NEXT: ret <4 x i32> %b
 }
 
-declare <4 x i32> @llvm.arm64.neon.smull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm64.neon.umull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
 
 ; CHECK: attributes #0 = { nounwind readnone ssp }
 ; CHECK: attributes #1 = { nounwind readnone }
diff --git a/test/Transforms/InstCombine/OverlappingInsertvalues.ll b/test/Transforms/InstCombine/OverlappingInsertvalues.ll
new file mode 100644
index 0000000..9248aec
--- /dev/null
+++ b/test/Transforms/InstCombine/OverlappingInsertvalues.ll
@@ -0,0 +1,36 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; Check that we can find and remove redundant insertvalues
+; CHECK-LABEL: foo_simple
+; CHECK-NOT: i8* %x, 0
+define { i8*, i64, i32 } @foo_simple(i8* %x, i8* %y) nounwind {
+entry:
+  %0 = insertvalue { i8*, i64, i32 } undef, i8* %x, 0
+  %1 = insertvalue { i8*, i64, i32 } %0, i8* %y, 0
+  ret { i8*, i64, i32 } %1
+}
+; Check that we can find and remove redundant nodes in insertvalues chain
+; CHECK-LABEL: foo_ovwrt_chain
+; CHECK-NOT: i64 %y, 1
+; CHECK-NOT: i32 555, 2
+define { i8*, i64, i32 } @foo_ovwrt_chain(i8* %x, i64 %y, i64 %z) nounwind {
+entry:
+  %0 = insertvalue { i8*, i64, i32 } undef, i8* %x, 0
+  %1 = insertvalue { i8*, i64, i32 } %0, i64 %y, 1
+  %2 = insertvalue { i8*, i64, i32 } %1, i32 555, 2
+  %3 = insertvalue { i8*, i64, i32 } %2, i64 %z, 1
+  %4 = insertvalue { i8*, i64, i32 } %3, i32 777, 2
+  ret { i8*, i64, i32 } %4
+}
+; Check that we propagate insertvalues only if they are use as the first
+; operand (as initial value of aggregate)
+; CHECK-LABEL: foo_use_as_second_operand
+; CHECK: i16 %x, 0
+; CHECK: %0, 1
+define { i8, {i16, i32} } @foo_use_as_second_operand(i16 %x) nounwind {
+entry:
+  %0 = insertvalue { i16, i32 } undef, i16 %x, 0
+  %1 = insertvalue { i8, {i16, i32} } undef, { i16, i32 } %0, 1
+  ret { i8, {i16, i32} } %1
+}
diff --git a/test/Transforms/InstCombine/alloca.ll b/test/Transforms/InstCombine/alloca.ll
index ae1cfa1..6d0c131 100644
--- a/test/Transforms/InstCombine/alloca.ll
+++ b/test/Transforms/InstCombine/alloca.ll
@@ -129,3 +129,24 @@ define void @test8() {
   call void (...)* @use(i32* %x)
   ret void
 }
+
+; PR19569
+%struct_type = type { i32, i32 }
+declare void @test9_aux(<{ %struct_type }>* inalloca)
+declare i8* @llvm.stacksave()
+declare void @llvm.stackrestore(i8*)
+
+define void @test9(%struct_type* %a) {
+; CHECK-LABEL: @test9(
+entry:
+  %inalloca.save = call i8* @llvm.stacksave()
+  %argmem = alloca inalloca <{ %struct_type }>
+; CHECK: alloca inalloca i64, align 8
+  %0 = getelementptr inbounds <{ %struct_type }>* %argmem, i32 0, i32 0
+  %1 = bitcast %struct_type* %0 to i8*
+  %2 = bitcast %struct_type* %a to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %2, i32 8, i32 4, i1 false)
+  call void @test9_aux(<{ %struct_type }>* inalloca %argmem)
+  call void @llvm.stackrestore(i8* %inalloca.save)
+  ret void
+}
diff --git a/test/Transforms/InstCombine/bitcast-alias-function.ll b/test/Transforms/InstCombine/bitcast-alias-function.ll
index a6b56f9..284960b 100644
--- a/test/Transforms/InstCombine/bitcast-alias-function.ll
+++ b/test/Transforms/InstCombine/bitcast-alias-function.ll
@@ -6,46 +6,46 @@ target datalayout = "e-p:32:32:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16
 ; Cases that should be bitcast
 
 ; Test cast between scalars with same bit sizes
-@alias_i32_to_f32 = alias bitcast (i32 (i32)* @func_i32 to float (float)*)
+@alias_i32_to_f32 = alias float (float), i32 (i32)* @func_i32
 
 ; Test cast between vectors with same number of elements and bit sizes
-@alias_v2i32_to_v2f32 = alias bitcast (<2 x i32> (<2 x i32>)* @func_v2i32 to <2 x float> (<2 x float>)*)
+@alias_v2i32_to_v2f32 = alias <2 x float> (<2 x float>), <2 x i32> (<2 x i32>)* @func_v2i32
 
 ; Test cast from vector to scalar with same number of bits
-@alias_v2f32_to_i64 = alias bitcast (i64 (i64)* @func_i64 to <2 x float> (<2 x float>)*)
+@alias_v2f32_to_i64 = alias <2 x float> (<2 x float>), i64 (i64)* @func_i64
 
 ; Test cast from scalar to vector with same number of bits
-@alias_i64_to_v2f32 = alias bitcast (<2 x float> (<2 x float>)* @func_v2f32 to i64 (i64)*)
+@alias_i64_to_v2f32 = alias  i64 (i64), <2 x float> (<2 x float>)* @func_v2f32
 
 ; Test cast between vectors of pointers
-@alias_v2i32p_to_v2i64p = alias bitcast (<2 x i32*> (<2 x i32*>)* @func_v2i32p to <2 x i64*> (<2 x i64*>)*)
+@alias_v2i32p_to_v2i64p = alias <2 x i64*> (<2 x i64*>), <2 x i32*> (<2 x i32*>)* @func_v2i32p
 
 
 ; Cases that should be invalid and unchanged
 
 ; Test cast between scalars with different bit sizes
-@alias_i64_to_f32 = alias bitcast (i64 (i64)* @func_i64 to float (float)*)
+@alias_i64_to_f32 = alias float (float), i64 (i64)* @func_i64
 
 ; Test cast between vectors with different bit sizes but the
 ; same number of elements
-@alias_v2i64_to_v2f32 = alias bitcast (<2 x i64> (<2 x i64>)* @func_v2i64 to <2 x float> (<2 x float>)*)
+@alias_v2i64_to_v2f32 = alias <2 x float> (<2 x float>), <2 x i64> (<2 x i64>)* @func_v2i64
 
 ; Test cast between vectors with same number of bits and different
 ; numbers of elements
-@alias_v2i32_to_v4f32 = alias bitcast (<2 x i32> (<2 x i32>)* @func_v2i32 to <4 x float> (<4 x float>)*)
+@alias_v2i32_to_v4f32 = alias  <4 x float> (<4 x float>), <2 x i32> (<2 x i32>)* @func_v2i32
 
 ; Test cast between scalar and vector with different number of bits
-@alias_i64_to_v4f32 = alias bitcast (<4 x float> (<4 x float>)* @func_v4f32 to i64 (i64)*)
+@alias_i64_to_v4f32 = alias i64 (i64), <4 x float> (<4 x float>)* @func_v4f32
 
 ; Test cast between vector and scalar with different number of bits
-@alias_v4f32_to_i64 = alias bitcast (i64 (i64)* @func_i64 to <4 x float> (<4 x float>)*)
+@alias_v4f32_to_i64 = alias <4 x float> (<4 x float>), i64 (i64)* @func_i64
 
 ; Test cast from scalar to vector of pointers with same number of bits
 ; We don't know the pointer size at this point, so this can't be done
-@alias_i64_to_v2i32p = alias bitcast (<2 x i32*> (<2 x i32*>)* @func_v2i32p to i64 (i64)*)
+@alias_i64_to_v2i32p = alias  i64 (i64), <2 x i32*> (<2 x i32*>)* @func_v2i32p
 
 ; Test cast between vector of pointers and scalar with different number of bits
-@alias_v4i32p_to_i64 = alias bitcast (i64 (i64)* @func_i64 to <4 x i32*> (<4 x i32*>)*)
+@alias_v4i32p_to_i64 = alias <4 x i32*> (<4 x i32*>), i64 (i64)* @func_i64
 
 
 
diff --git a/test/Transforms/InstCombine/blend_x86.ll b/test/Transforms/InstCombine/blend_x86.ll
new file mode 100644
index 0000000..778d44b
--- /dev/null
+++ b/test/Transforms/InstCombine/blend_x86.ll
@@ -0,0 +1,55 @@
+; RUN: opt < %s -instcombine -mtriple=x86_64-apple-macosx -mcpu=core-avx2 -S | FileCheck %s
+
+define <2 x double> @constant_blendvpd(<2 x double> %xy, <2 x double> %ab) {
+; CHECK-LABEL: @constant_blendvpd
+; CHECK: select <2 x i1> <i1 true, i1 false>, <2 x double> %ab, <2 x double> %xy
+  %1 = tail call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %xy, <2 x double> %ab, <2 x double> <double 0xFFFFFFFFE0000000, double 0.000000e+00>)
+  ret <2 x double> %1
+}
+
+define <4 x float> @constant_blendvps(<4 x float> %xyzw, <4 x float> %abcd) {
+; CHECK-LABEL: @constant_blendvps
+; CHECK: select <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x float> %abcd, <4 x float> %xyzw
+  %1 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %xyzw, <4 x float> %abcd, <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0xFFFFFFFFE0000000>)
+  ret <4 x float> %1
+}
+
+define <16 x i8> @constant_pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd) {
+; CHECK-LABEL: @constant_pblendvb
+; CHECK: select <16 x i1> <i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false>, <16 x i8> %abcd, <16 x i8> %xyzw
+  %1 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd, <16 x i8> <i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0>)
+  ret <16 x i8> %1
+}
+
+define <4 x double> @constant_blendvpd_avx(<4 x double> %xy, <4 x double> %ab) {
+; CHECK-LABEL: @constant_blendvpd_avx
+; CHECK: select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x double> %ab, <4 x double> %xy
+  %1 = tail call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %xy, <4 x double> %ab, <4 x double> <double 0xFFFFFFFFE0000000, double 0.000000e+00, double 0xFFFFFFFFE0000000, double 0.000000e+00>)
+  ret <4 x double> %1
+}
+
+define <8 x float> @constant_blendvps_avx(<8 x float> %xyzw, <8 x float> %abcd) {
+; CHECK-LABEL: @constant_blendvps_avx
+; CHECK: select <8 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true>,  <8 x float> %abcd, <8 x float> %xyzw
+  %1 = tail call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %xyzw, <8 x float> %abcd, <8 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0xFFFFFFFFE0000000, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0xFFFFFFFFE0000000>)
+  ret <8 x float> %1
+}
+
+define <32 x i8> @constant_pblendvb_avx2(<32 x i8> %xyzw, <32 x i8> %abcd) {
+; CHECK-LABEL: @constant_pblendvb_avx2
+; CHECK: select <32 x i1> <i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false>, <32 x i8> %abcd, <32 x i8> %xyzw
+  %1 = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %xyzw, <32 x i8> %abcd,
+        <32 x i8> <i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0,
+                   i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0,
+                   i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0,
+                   i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0>)
+  ret <32 x i8> %1
+}
+
+declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>)
+declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>)
+declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>)
+
+declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>)
+declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>)
+declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>)
diff --git a/test/Transforms/InstCombine/call-cast-target-inalloca.ll b/test/Transforms/InstCombine/call-cast-target-inalloca.ll
index baf97e0..90289e2 100644
--- a/test/Transforms/InstCombine/call-cast-target-inalloca.ll
+++ b/test/Transforms/InstCombine/call-cast-target-inalloca.ll
@@ -8,7 +8,7 @@ declare void @takes_i32_inalloca(i32* inalloca)
 
 define void @f() {
 ; CHECK-LABEL: define void @f()
-  %args = alloca i32
+  %args = alloca inalloca i32
   call void bitcast (void (i32)* @takes_i32 to void (i32*)*)(i32* inalloca %args)
 ; CHECK: call void bitcast
   ret void
diff --git a/test/Transforms/InstCombine/div.ll b/test/Transforms/InstCombine/div.ll
index 1bf486f..9c7ba9b 100644
--- a/test/Transforms/InstCombine/div.ll
+++ b/test/Transforms/InstCombine/div.ll
@@ -156,3 +156,22 @@ define <2 x i64> @test18(<2 x i64> %x) nounwind {
 ; CHECK-NEXT: sub <2 x i64> zeroinitializer, %x
 ; CHECK-NEXT: ret <2 x i64>
 }
+
+define i32 @test19(i32 %x) {
+  %A = udiv i32 1, %x
+  ret i32 %A
+; CHECK-LABEL: @test19(
+; CHECK-NEXT: icmp eq i32 %x, 1
+; CHECK-NEXT: zext i1 %{{.*}} to i32
+; CHECK-NEXT ret i32
+}
+
+define i32 @test20(i32 %x) {
+  %A = sdiv i32 1, %x
+  ret i32 %A
+; CHECK-LABEL: @test20(
+; CHECK-NEXT: add i32 %x, 1
+; CHECK-NEXT: icmp ult i32 %{{.*}}, 3
+; CHECK-NEXT: select i1 %{{.*}}, i32 %x, i32 {{.*}}
+; CHECK-NEXT: ret i32
+}
diff --git a/test/Transforms/InstCombine/gep-addrspace.ll b/test/Transforms/InstCombine/gep-addrspace.ll
index 24c355d..29511a3 100644
--- a/test/Transforms/InstCombine/gep-addrspace.ll
+++ b/test/Transforms/InstCombine/gep-addrspace.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-pc-win32"
@@ -17,3 +17,18 @@ ST:
   ret void
 }
 
+@array = internal addrspace(3) global [256 x float] zeroinitializer, align 4
+@scalar = internal addrspace(3) global float 0.000000e+00, align 4
+
+define void @keep_necessary_addrspacecast(i64 %i, float** %out0, float** %out1) {
+entry:
+; CHECK-LABEL: @keep_necessary_addrspacecast
+  %0 = getelementptr [256 x float]* addrspacecast ([256 x float] addrspace(3)* @array to [256 x float]*), i64 0, i64 %i
+; CHECK: addrspacecast float addrspace(3)* %{{[0-9]+}} to float*
+  %1 = getelementptr [0 x float]* addrspacecast (float addrspace(3)* @scalar to [0 x float]*), i64 0, i64 %i
+; CHECK: addrspacecast float addrspace(3)* %{{[0-9]+}} to float*
+  store float* %0, float** %out0, align 4
+  store float* %1, float** %out1, align 4
+  ret void
+}
+
diff --git a/test/Transforms/InstCombine/icmp.ll b/test/Transforms/InstCombine/icmp.ll
index 12a4744..f45897c 100644
--- a/test/Transforms/InstCombine/icmp.ll
+++ b/test/Transforms/InstCombine/icmp.ll
@@ -1356,3 +1356,12 @@ define i1 @icmp_ashr_ashr_ne(i32 %a, i32 %b) nounwind {
  %z = icmp ne i32 %x, %y
  ret i1 %z
 }
+
+; CHECK-LABEL: @icmp_neg_cst_slt
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %a, 10
+; CHECK-NEXT: ret i1 [[CMP]]
+define i1 @icmp_neg_cst_slt(i32 %a) {
+  %1 = sub nsw i32 0, %a
+  %2 = icmp slt i32 %1, -10
+  ret i1 %2
+}
diff --git a/test/Transforms/InstCombine/memcpy-from-global.ll b/test/Transforms/InstCombine/memcpy-from-global.ll
index 58793ab..b5a0ab8 100644
--- a/test/Transforms/InstCombine/memcpy-from-global.ll
+++ b/test/Transforms/InstCombine/memcpy-from-global.ll
@@ -7,11 +7,11 @@ entry:
 	%lookupTable = alloca [128 x float], align 16		; <[128 x float]*> [#uses=5]
 	%lookupTable1 = bitcast [128 x float]* %lookupTable to i8*		; <i8*> [#uses=1]
 	call void @llvm.memcpy.p0i8.p0i8.i64(i8* %lookupTable1, i8* bitcast ([128 x float]* @C.0.1248 to i8*), i64 512, i32 16, i1 false)
-        
+
 ; CHECK-LABEL: @test1(
 ; CHECK-NOT: alloca
 ; CHECK-NOT: call{{.*}}@llvm.memcpy
-        
+
 	%tmp3 = shl i32 %hash, 2		; <i32> [#uses=1]
 	%tmp5 = and i32 %tmp3, 124		; <i32> [#uses=4]
 	%tmp753 = getelementptr [128 x float]* %lookupTable, i32 0, i32 %tmp5		; <float*> [#uses=1]
@@ -37,6 +37,9 @@ entry:
 }
 
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p1i8.p0i8.i64(i8 addrspace(1)* nocapture, i8* nocapture, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p1i8.i64(i8* nocapture, i8 addrspace(1)* nocapture, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i32, i1) nounwind
 
 %T = type { i8, [123 x i8] }
 %U = type { i32, i32, i32, i32, i32 }
@@ -64,7 +67,30 @@ define void @test2() {
   ret void
 }
 
+define void @test2_addrspacecast() {
+  %A = alloca %T
+  %B = alloca %T
+  %a = addrspacecast %T* %A to i8 addrspace(1)*
+  %b = addrspacecast %T* %B to i8 addrspace(1)*
+
+; CHECK-LABEL: @test2_addrspacecast(
+
+; %A alloca is deleted
+; This doesn't exactly match what test2 does, because folding the type
+; cast into the alloca doesn't work for the addrspacecast yet.
+; CHECK-NEXT: alloca %T
+; CHECK-NEXT: addrspacecast
+
+; use @G instead of %A
+; CHECK-NEXT: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %{{.*}},
+  call void @llvm.memcpy.p1i8.p0i8.i64(i8 addrspace(1)* %a, i8* bitcast (%T* @G to i8*), i64 124, i32 4, i1 false)
+  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %b, i8 addrspace(1)* %a, i64 124, i32 4, i1 false)
+  call void @bar_as1(i8 addrspace(1)* %b)
+  ret void
+}
+
 declare void @bar(i8*)
+declare void @bar_as1(i8 addrspace(1)*)
 
 
 ;; Should be able to eliminate the alloca.
@@ -78,11 +104,22 @@ define void @test3() {
   ret void
 }
 
+define void @test3_addrspacecast() {
+  %A = alloca %T
+  %a = bitcast %T* %A to i8*
+  call void @llvm.memcpy.p0i8.p1i8.i64(i8* %a, i8 addrspace(1)* addrspacecast (%T* @G to i8 addrspace(1)*), i64 124, i32 4, i1 false)
+  call void @bar(i8* %a) readonly
+; CHECK-LABEL: @test3_addrspacecast(
+; CHECK-NEXT: call void @bar(i8* getelementptr inbounds (%T* @G, i64 0, i32 0))
+  ret void
+}
+
+
 define void @test4() {
   %A = alloca %T
   %a = bitcast %T* %A to i8*
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%T* @G to i8*), i64 124, i32 4, i1 false)
-  call void @baz(i8* byval %a) 
+  call void @baz(i8* byval %a)
 ; CHECK-LABEL: @test4(
 ; CHECK-NEXT: call void @baz(i8* byval getelementptr inbounds (%T* @G, i64 0, i32 0))
   ret void
@@ -94,7 +131,7 @@ define void @test5() {
   %a = bitcast %T* %A to i8*
   call void @llvm.lifetime.start(i64 -1, i8* %a)
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%T* @G to i8*), i64 124, i32 4, i1 false)
-  call void @baz(i8* byval %a) 
+  call void @baz(i8* byval %a)
 ; CHECK-LABEL: @test5(
 ; CHECK-NEXT: call void @baz(i8* byval getelementptr inbounds (%T* @G, i64 0, i32 0))
   ret void
@@ -135,6 +172,18 @@ define void @test8() {
   ret void
 }
 
+
+define void @test8_addrspacecast() {
+  %A = alloca %U, align 16
+  %a = bitcast %U* %A to i8*
+  call void @llvm.memcpy.p0i8.p1i8.i64(i8* %a, i8 addrspace(1)* addrspacecast (%U* getelementptr ([2 x %U]* @H, i64 0, i32 1) to i8 addrspace(1)*), i64 20, i32 4, i1 false)
+  call void @bar(i8* %a) readonly
+; CHECK-LABEL: @test8_addrspacecast(
+; CHECK: llvm.memcpy
+; CHECK: bar
+  ret void
+}
+
 define void @test9() {
   %A = alloca %U, align 4
   %a = bitcast %U* %A to i8*
@@ -144,3 +193,13 @@ define void @test9() {
 ; CHECK-NEXT: call void @bar(i8* bitcast (%U* getelementptr inbounds ([2 x %U]* @H, i64 0, i64 1) to i8*))
   ret void
 }
+
+define void @test9_addrspacecast() {
+  %A = alloca %U, align 4
+  %a = bitcast %U* %A to i8*
+  call void @llvm.memcpy.p0i8.p1i8.i64(i8* %a, i8 addrspace(1)* addrspacecast (%U* getelementptr ([2 x %U]* @H, i64 0, i32 1) to i8 addrspace(1)*), i64 20, i32 4, i1 false)
+  call void @bar(i8* %a) readonly
+; CHECK-LABEL: @test9_addrspacecast(
+; CHECK-NEXT: call void @bar(i8* bitcast (%U* getelementptr inbounds ([2 x %U]* @H, i64 0, i64 1) to i8*))
+  ret void
+}
diff --git a/test/Transforms/InstCombine/overflow-mul.ll b/test/Transforms/InstCombine/overflow-mul.ll
new file mode 100644
index 0000000..04019ae
--- /dev/null
+++ b/test/Transforms/InstCombine/overflow-mul.ll
@@ -0,0 +1,164 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+; return mul(zext x, zext y) > MAX
+define i32 @pr4917_1(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: @pr4917_1(
+entry:
+  %l = zext i32 %x to i64
+  %r = zext i32 %y to i64
+; CHECK-NOT: zext i32
+  %mul64 = mul i64 %l, %r
+; CHECK: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %x, i32 %y)
+  %overflow = icmp ugt i64 %mul64, 4294967295
+; CHECK: extractvalue { i32, i1 } [[MUL]], 1
+  %retval = zext i1 %overflow to i32
+  ret i32 %retval
+}
+
+; return mul(zext x, zext y) >= MAX+1
+define i32 @pr4917_1a(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: @pr4917_1a(
+entry:
+  %l = zext i32 %x to i64
+  %r = zext i32 %y to i64
+; CHECK-NOT: zext i32
+  %mul64 = mul i64 %l, %r
+; CHECK: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %x, i32 %y)
+  %overflow = icmp uge i64 %mul64, 4294967296
+; CHECK: extractvalue { i32, i1 } [[MUL]], 1
+  %retval = zext i1 %overflow to i32
+  ret i32 %retval
+}
+
+; mul(zext x, zext y) > MAX
+; mul(x, y) is used
+define i32 @pr4917_2(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: @pr4917_2(
+entry:
+  %l = zext i32 %x to i64
+  %r = zext i32 %y to i64
+; CHECK-NOT: zext i32
+  %mul64 = mul i64 %l, %r
+; CHECK: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %x, i32 %y)
+  %overflow = icmp ugt i64 %mul64, 4294967295
+; CHECK-DAG: [[VAL:%.*]] = extractvalue { i32, i1 } [[MUL]], 0
+  %mul32 = trunc i64 %mul64 to i32
+; CHECK-DAG: [[OVFL:%.*]] = extractvalue { i32, i1 } [[MUL]], 1
+  %retval = select i1 %overflow, i32 %mul32, i32 111
+; CHECK: select i1 [[OVFL]], i32 [[VAL]]
+  ret i32 %retval
+}
+
+; return mul(zext x, zext y) > MAX
+; mul is used in non-truncate
+define i64 @pr4917_3(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: @pr4917_3(
+entry:
+  %l = zext i32 %x to i64
+  %r = zext i32 %y to i64
+  %mul64 = mul i64 %l, %r
+; CHECK-NOT: umul.with.overflow.i32
+  %overflow = icmp ugt i64 %mul64, 4294967295
+  %retval = select i1 %overflow, i64 %mul64, i64 111
+  ret i64 %retval
+}
+
+; return mul(zext x, zext y) <= MAX
+define i32 @pr4917_4(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: @pr4917_4(
+entry:
+  %l = zext i32 %x to i64
+  %r = zext i32 %y to i64
+; CHECK-NOT: zext i32
+  %mul64 = mul i64 %l, %r
+; CHECK: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %x, i32 %y)
+  %overflow = icmp ule i64 %mul64, 4294967295
+; CHECK: extractvalue { i32, i1 } [[MUL]], 1
+; CHECK: xor
+  %retval = zext i1 %overflow to i32
+  ret i32 %retval
+}
+
+; return mul(zext x, zext y) < MAX+1
+define i32 @pr4917_4a(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: @pr4917_4a(
+entry:
+  %l = zext i32 %x to i64
+  %r = zext i32 %y to i64
+; CHECK-NOT: zext i32
+  %mul64 = mul i64 %l, %r
+; CHECK: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %x, i32 %y)
+  %overflow = icmp ult i64 %mul64, 4294967296
+; CHECK: extractvalue { i32, i1 } [[MUL]], 1
+; CHECK: xor
+  %retval = zext i1 %overflow to i32
+  ret i32 %retval
+}
+
+; operands of mul are of different size
+define i32 @pr4917_5(i32 %x, i8 %y) nounwind {
+; CHECK-LABEL: @pr4917_5(
+entry:
+  %l = zext i32 %x to i64
+  %r = zext i8 %y to i64
+; CHECK: [[Y:%.*]] = zext i8 %y to i32
+  %mul64 = mul i64 %l, %r
+  %overflow = icmp ugt i64 %mul64, 4294967295
+  %mul32 = trunc i64 %mul64 to i32
+; CHECK: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %x, i32 [[Y]])
+; CHECK-DAG: [[VAL:%.*]] = extractvalue { i32, i1 } [[MUL]], 0
+; CHECK-DAG: [[OVFL:%.*]] = extractvalue { i32, i1 } [[MUL]], 1
+  %retval = select i1 %overflow, i32 %mul32, i32 111
+; CHECK: select i1 [[OVFL]], i32 [[VAL]]
+  ret i32 %retval
+}
+
+; mul(zext x, zext y) != zext trunc mul
+define i32 @pr4918_1(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: @pr4918_1(
+entry:
+  %l = zext i32 %x to i64
+  %r = zext i32 %y to i64
+  %mul64 = mul i64 %l, %r
+; CHECK: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %x, i32 %y)
+  %part32 = trunc i64 %mul64 to i32
+  %part64 = zext i32 %part32 to i64
+  %overflow = icmp ne i64 %mul64, %part64
+; CHECK: [[OVFL:%.*]] = extractvalue { i32, i1 } [[MUL:%.*]], 1
+  %retval = zext i1 %overflow to i32
+  ret i32 %retval
+}
+
+; mul(zext x, zext y) == zext trunc mul
+define i32 @pr4918_2(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: @pr4918_2(
+entry:
+  %l = zext i32 %x to i64
+  %r = zext i32 %y to i64
+  %mul64 = mul i64 %l, %r
+; CHECK: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %x, i32 %y)
+  %part32 = trunc i64 %mul64 to i32
+  %part64 = zext i32 %part32 to i64
+  %overflow = icmp eq i64 %mul64, %part64
+; CHECK: extractvalue { i32, i1 } [[MUL]]
+  %retval = zext i1 %overflow to i32
+; CHECK: xor
+  ret i32 %retval
+}
+
+; zext trunc mul != mul(zext x, zext y)
+define i32 @pr4918_3(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: @pr4918_3(
+entry:
+  %l = zext i32 %x to i64
+  %r = zext i32 %y to i64
+  %mul64 = mul i64 %l, %r
+; CHECK: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %x, i32 %y)
+  %part32 = trunc i64 %mul64 to i32
+  %part64 = zext i32 %part32 to i64
+  %overflow = icmp ne i64 %part64, %mul64
+; CHECK: extractvalue { i32, i1 } [[MUL]], 1
+  %retval = zext i1 %overflow to i32
+  ret i32 %retval
+}
+
diff --git a/test/Transforms/InstCombine/pr19420.ll b/test/Transforms/InstCombine/pr19420.ll
new file mode 100644
index 0000000..23fa0a4
--- /dev/null
+++ b/test/Transforms/InstCombine/pr19420.ll
@@ -0,0 +1,67 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+; CHECK-LABEL: @test_FoldShiftByConstant_CreateSHL
+; CHECK: mul <4 x i32> %in, <i32 0, i32 -32, i32 0, i32 -32>
+; CHECK-NEXT: ret
+define <4 x i32> @test_FoldShiftByConstant_CreateSHL(<4 x i32> %in) {
+  %mul.i = mul <4 x i32> %in, <i32 0, i32 -1, i32 0, i32 -1>
+  %vshl_n = shl <4 x i32> %mul.i, <i32 5, i32 5, i32 5, i32 5>
+  ret <4 x i32> %vshl_n
+}
+
+; CHECK-LABEL: @test_FoldShiftByConstant_CreateSHL2
+; CHECK: mul <8 x i16> %in, <i16 0, i16 -32, i16 0, i16 -32, i16 0, i16 -32, i16 0, i16 -32>
+; CHECK-NEXT: ret
+define <8 x i16> @test_FoldShiftByConstant_CreateSHL2(<8 x i16> %in) {
+  %mul.i = mul <8 x i16> %in, <i16 0, i16 -1, i16 0, i16 -1, i16 0, i16 -1, i16 0, i16 -1>
+  %vshl_n = shl <8 x i16> %mul.i, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
+  ret <8 x i16> %vshl_n
+}
+
+; CHECK-LABEL: @test_FoldShiftByConstant_CreateAnd
+; CHECK: mul <16 x i8> %in0, <i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33>
+; CHECK-NEXT: and <16 x i8> %vsra_n2, <i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32>
+; CHECK-NEXT: ret
+define <16 x i8> @test_FoldShiftByConstant_CreateAnd(<16 x i8> %in0) {
+  %vsra_n = ashr <16 x i8> %in0, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
+  %tmp = add <16 x i8> %in0, %vsra_n
+  %vshl_n = shl <16 x i8> %tmp, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
+  ret <16 x i8> %vshl_n
+}
+
+
+define i32 @bar(i32 %x, i32 %y) {
+  %a = lshr i32 %x, 4
+  %b = add i32 %a, %y
+  %c = shl i32 %b, 4
+  ret i32 %c
+}
+
+define <2 x i32> @bar_v2i32(<2 x i32> %x, <2 x i32> %y) {
+  %a = lshr <2 x i32> %x, <i32 5, i32 5>
+  %b = add <2 x i32> %a, %y
+  %c = shl <2 x i32> %b, <i32 5, i32 5>
+  ret <2 x i32> %c
+}
+
+
+
+
+define i32 @foo(i32 %x, i32 %y) {
+  %a = lshr i32 %x, 4
+  %b = and i32 %a, 8
+  %c = add i32 %b, %y
+  %d = shl i32 %c, 4
+  ret i32 %d
+}
+
+define <2 x i32> @foo_v2i32(<2 x i32> %x, <2 x i32> %y) {
+  %a = lshr <2 x i32> %x, <i32 4, i32 4>
+  %b = and <2 x i32> %a, <i32 8, i32 8>
+  %c = add <2 x i32> %b, %y
+  %d = shl <2 x i32> %c, <i32 4, i32 4>
+  ret <2 x i32> %d
+}
+
+
+
diff --git a/test/Transforms/InstCombine/select.ll b/test/Transforms/InstCombine/select.ll
index 1458bde..2213be1 100644
--- a/test/Transforms/InstCombine/select.ll
+++ b/test/Transforms/InstCombine/select.ll
@@ -1031,3 +1031,103 @@ define i32 @test67(i16 %x) {
 ; CHECK: lshr exact i32 %2, 1
 ; CHECK: xor i32 %3, 42
 }
+
+; SMIN(SMIN(X, 11), 92) -> SMIN(X, 11)
+define i32 @test68(i32 %x) {
+entry:
+  %cmp = icmp slt i32 11, %x
+  %cond = select i1 %cmp, i32 11, i32 %x
+  %cmp3 = icmp slt i32 92, %cond
+  %retval = select i1 %cmp3, i32 92, i32 %cond
+  ret i32 %retval
+; CHECK-LABEL: @test68(
+; CHECK: ret i32 %cond
+}
+
+; MIN(MIN(X, 24), 83) -> MIN(X, 24)
+define i32 @test69(i32 %x) {
+entry:
+  %cmp = icmp ult i32 24, %x
+  %cond = select i1 %cmp, i32 24, i32 %x
+  %cmp3 = icmp ult i32 83, %cond
+  %retval = select i1 %cmp3, i32 83, i32 %cond
+  ret i32 %retval
+; CHECK-LABEL: @test69(
+; CHECK: ret i32 %cond
+}
+
+; SMAX(SMAX(X, 75), 36) -> SMAX(X, 75)
+define i32 @test70(i32 %x) {
+entry:
+  %cmp = icmp slt i32 %x, 75
+  %cond = select i1 %cmp, i32 75, i32 %x
+  %cmp3 = icmp slt i32 %cond, 36
+  %retval = select i1 %cmp3, i32 36, i32 %cond
+  ret i32 %retval
+; CHECK-LABEL: @test70(
+; CHECK: ret i32 %cond
+}
+
+; MAX(MAX(X, 68), 47) -> MAX(X, 68)
+define i32 @test71(i32 %x) {
+entry:
+  %cmp = icmp ult i32 %x, 68
+  %cond = select i1 %cmp, i32 68, i32 %x
+  %cmp3 = icmp ult i32 %cond, 47
+  %retval = select i1 %cmp3, i32 47, i32 %cond
+  ret i32 %retval
+; CHECK-LABEL: @test71(
+; CHECK: ret i32 %cond
+}
+
+; SMIN(SMIN(X, 92), 11) -> SMIN(X, 11)
+define i32 @test72(i32 %x) {
+  %cmp = icmp sgt i32 %x, 92
+  %cond = select i1 %cmp, i32 92, i32 %x
+  %cmp3 = icmp sgt i32 %cond, 11
+  %retval = select i1 %cmp3, i32 11, i32 %cond
+  ret i32 %retval
+; CHECK-LABEL: @test72(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %x, 11
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 11, i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+; MIN(MIN(X, 83), 24) -> MIN(X, 24)
+define i32 @test73(i32 %x) {
+  %cmp = icmp ugt i32 %x, 83
+  %cond = select i1 %cmp, i32 83, i32 %x
+  %cmp3 = icmp ugt i32 %cond, 24
+  %retval = select i1 %cmp3, i32 24, i32 %cond
+  ret i32 %retval
+; CHECK-LABEL: @test73(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ugt i32 %x, 24
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 24, i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+; SMAX(SMAX(X, 36), 75) -> SMAX(X, 75)
+define i32 @test74(i32 %x) {
+  %cmp = icmp slt i32 %x, 36
+  %cond = select i1 %cmp, i32 36, i32 %x
+  %cmp3 = icmp slt i32 %cond, 75
+  %retval = select i1 %cmp3, i32 75, i32 %cond
+  ret i32 %retval
+; CHECK-LABEL: @test74(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp slt i32 %x, 75
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 75, i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+; MAX(MAX(X, 47), 68) -> MAX(X, 68)
+define i32 @test75(i32 %x) {
+  %cmp = icmp ult i32 %x, 47
+  %cond = select i1 %cmp, i32 47, i32 %x
+  %cmp3 = icmp ult i32 %cond, 68
+  %retval = select i1 %cmp3, i32 68, i32 %cond
+  ret i32 %retval
+; CHECK-LABEL: @test75(
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ult i32 %x, 68
+; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 68, i32 %x
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+\ No newline at end of file
diff --git a/test/Transforms/InstCombine/shift.ll b/test/Transforms/InstCombine/shift.ll
index b1082f0..5586bb6 100644
--- a/test/Transforms/InstCombine/shift.ll
+++ b/test/Transforms/InstCombine/shift.ll
@@ -36,17 +36,52 @@ define i32 @test4(i8 %A) {
 define i32 @test5(i32 %A) {
 ; CHECK-LABEL: @test5(
 ; CHECK: ret i32 undef
-        %B = lshr i32 %A, 32  ;; shift all bits out 
+        %B = lshr i32 %A, 32  ;; shift all bits out
         ret i32 %B
 }
 
+define <4 x i32> @test5_splat_vector(<4 x i32> %A) {
+; CHECK-LABEL: @test5_splat_vector(
+; CHECK: ret <4 x i32> undef
+  %B = lshr <4 x i32> %A, <i32 32, i32 32, i32 32, i32 32>     ;; shift all bits out
+  ret <4 x i32> %B
+}
+
+define <4 x i32> @test5_zero_vector(<4 x i32> %A) {
+; CHECK-LABEL: @test5_zero_vector(
+; CHECK-NEXT: ret <4 x i32> %A
+  %B = lshr <4 x i32> %A, zeroinitializer
+  ret <4 x i32> %B
+}
+
+define <4 x i32> @test5_non_splat_vector(<4 x i32> %A) {
+; CHECK-LABEL: @test5_non_splat_vector(
+; CHECK-NOT: ret <4 x i32> undef
+  %B = shl <4 x i32> %A, <i32 32, i32 1, i32 2, i32 3>
+  ret <4 x i32> %B
+}
+
 define i32 @test5a(i32 %A) {
 ; CHECK-LABEL: @test5a(
 ; CHECK: ret i32 undef
-        %B = shl i32 %A, 32     ;; shift all bits out 
+        %B = shl i32 %A, 32     ;; shift all bits out
         ret i32 %B
 }
 
+define <4 x i32> @test5a_splat_vector(<4 x i32> %A) {
+; CHECK-LABEL: @test5a_splat_vector(
+; CHECK: ret <4 x i32> undef
+  %B = shl <4 x i32> %A, <i32 32, i32 32, i32 32, i32 32>     ;; shift all bits out
+  ret <4 x i32> %B
+}
+
+define <4 x i32> @test5a_non_splat_vector(<4 x i32> %A) {
+; CHECK-LABEL: @test5a_non_splat_vector(
+; CHECK-NOT: ret <4 x i32> undef
+  %B = shl <4 x i32> %A, <i32 32, i32 1, i32 2, i32 3>
+  ret <4 x i32> %B
+}
+
 define i32 @test5b() {
 ; CHECK-LABEL: @test5b(
 ; CHECK: ret i32 -1
@@ -82,7 +117,7 @@ define i32 @test6a(i32 %A) {
 define i32 @test7(i8 %A) {
 ; CHECK-LABEL: @test7(
 ; CHECK-NEXT: ret i32 -1
-        %shift.upgrd.3 = zext i8 %A to i32 
+        %shift.upgrd.3 = zext i8 %A to i32
         %B = ashr i32 -1, %shift.upgrd.3  ;; Always equal to -1
         ret i32 %B
 }
@@ -232,7 +267,7 @@ define i1 @test16(i32 %X) {
 ; CHECK-NEXT: and i32 %X, 16
 ; CHECK-NEXT: icmp ne i32
 ; CHECK-NEXT: ret i1
-        %tmp.3 = ashr i32 %X, 4 
+        %tmp.3 = ashr i32 %X, 4
         %tmp.6 = and i32 %tmp.3, 1
         %tmp.7 = icmp ne i32 %tmp.6, 0
         ret i1 %tmp.7
@@ -344,6 +379,20 @@ define i32 @test25(i32 %tmp.2, i32 %AA) {
         ret i32 %tmp.6
 }
 
+define <2 x i32> @test25_vector(<2 x i32> %tmp.2, <2 x i32> %AA) {
+; CHECK-LABEL: @test25_vector(
+; CHECK: %tmp.3 = lshr <2 x i32> %tmp.2, <i32 17, i32 17>
+; CHECK-NEXT: shl <2 x i32> %tmp.3, <i32 17, i32 17>
+; CHECK-NEXT: add <2 x i32> %tmp.51, %AA
+; CHECK-NEXT: and <2 x i32> %x2, <i32 -131072, i32 -131072>
+; CHECK-NEXT: ret <2 x i32>
+  %x = lshr <2 x i32> %AA, <i32 17, i32 17>
+  %tmp.3 = lshr <2 x i32> %tmp.2, <i32 17, i32 17>
+  %tmp.5 = add <2 x i32> %tmp.3, %x
+  %tmp.6 = shl <2 x i32> %tmp.5, <i32 17, i32 17>
+  ret <2 x i32> %tmp.6
+}
+
 ;; handle casts between shifts.
 define i32 @test26(i32 %A) {
 ; CHECK-LABEL: @test26(
@@ -365,12 +414,12 @@ define i1 @test27(i32 %x) nounwind {
   %z = trunc i32 %y to i1
   ret i1 %z
 }
- 
+
 define i8 @test28(i8 %x) {
 entry:
 ; CHECK-LABEL: @test28(
 ; CHECK:     icmp slt i8 %x, 0
-; CHECK-NEXT:     br i1 
+; CHECK-NEXT:     br i1
 	%tmp1 = lshr i8 %x, 7
 	%cond1 = icmp ne i8 %tmp1, 0
 	br i1 %cond1, label %bb1, label %bb2
@@ -476,7 +525,7 @@ entry:
   %ins = or i128 %tmp23, %tmp27
   %tmp45 = lshr i128 %ins, 64
   ret i128 %tmp45
-  
+
 ; CHECK-LABEL: @test36(
 ; CHECK:  %tmp231 = or i128 %B, %A
 ; CHECK:  %ins = and i128 %tmp231, 18446744073709551615
@@ -492,7 +541,7 @@ entry:
   %tmp45 = lshr i128 %ins, 64
   %tmp46 = trunc i128 %tmp45 to i64
   ret i64 %tmp46
-  
+
 ; CHECK-LABEL: @test37(
 ; CHECK:  %tmp23 = shl nuw nsw i128 %tmp22, 32
 ; CHECK:  %ins = or i128 %tmp23, %A
@@ -780,3 +829,32 @@ bb11:                                             ; preds = %bb8
 bb12:                                             ; preds = %bb11, %bb8, %bb
   ret void
 }
+
+define i32 @test64(i32 %a) {
+; CHECK-LABEL: @test64(
+; CHECK-NEXT: ret i32 undef
+  %b = ashr i32 %a, 32  ; shift all bits out
+  ret i32 %b
+}
+
+define <4 x i32> @test64_splat_vector(<4 x i32> %a) {
+; CHECK-LABEL: @test64_splat_vector
+; CHECK-NEXT: ret <4 x i32> undef
+  %b = ashr <4 x i32> %a, <i32 32, i32 32, i32 32, i32 32>  ; shift all bits out
+  ret <4 x i32> %b
+}
+
+define <4 x i32> @test64_non_splat_vector(<4 x i32> %a) {
+; CHECK-LABEL: @test64_non_splat_vector
+; CHECK-NOT: ret <4 x i32> undef
+  %b = ashr <4 x i32> %a, <i32 32, i32 0, i32 1, i32 2>  ; shift all bits out
+  ret <4 x i32> %b
+}
+
+define <2 x i65> @test_65(<2 x i64> %t) {
+; CHECK-LABEL: @test_65
+  %a = zext <2 x i64> %t to <2 x i65>
+  %sext = shl <2 x i65> %a, <i65 33, i65 33>
+  %b = ashr <2 x i65> %sext, <i65 33, i65 33>
+  ret <2 x i65> %b
+}
diff --git a/test/Transforms/InstCombine/strlen-1.ll b/test/Transforms/InstCombine/strlen-1.ll
index 4fa5b4f..4a3caf2 100644
--- a/test/Transforms/InstCombine/strlen-1.ll
+++ b/test/Transforms/InstCombine/strlen-1.ll
@@ -5,6 +5,7 @@
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 
 @hello = constant [6 x i8] c"hello\00"
+@longer = constant [7 x i8] c"longer\00"
 @null = constant [1 x i8] zeroinitializer
 @null_hello = constant [7 x i8] c"\00hello\00"
 @nullstring = constant i8 0
@@ -85,6 +86,17 @@ define i1 @test_simplify8() {
 ; CHECK-NEXT: ret i1 false
 }
 
+define i32 @test_simplify9(i1 %x) {
+; CHECK-LABEL: @test_simplify9
+  %hello = getelementptr [6 x i8]* @hello, i32 0, i32 0
+  %longer = getelementptr [7 x i8]* @longer, i32 0, i32 0
+  %s = select i1 %x, i8* %hello, i8* %longer
+  %l = call i32 @strlen(i8* %s)
+; CHECK-NEXT: select i1 %x, i32 5, i32 6
+  ret i32 %l
+; CHECK-NEXT: ret
+}
+
 ; Check cases that shouldn't be simplified.
 
 define i32 @test_no_simplify1() {
diff --git a/test/Transforms/InstCombine/vec_demanded_elts.ll b/test/Transforms/InstCombine/vec_demanded_elts.ll
index d12412a..41d2b29 100644
--- a/test/Transforms/InstCombine/vec_demanded_elts.ll
+++ b/test/Transforms/InstCombine/vec_demanded_elts.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 define i16 @test1(float %f) {
 entry:
@@ -209,4 +210,369 @@ define <4 x float> @test_select(float %f, float %g) {
   ret <4 x float> %ret
 }
 
+; We should optimize these two redundant insertqi into one
+; CHECK: define <2 x i64> @testInsertTwice(<2 x i64> %v, <2 x i64> %i)
+define <2 x i64> @testInsertTwice(<2 x i64> %v, <2 x i64> %i) {
+; CHECK: call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 32)
+; CHECK-NOT: insertqi
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 32)
+  %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 32, i8 32)
+  ret <2 x i64> %2
+}
+
+; The result of this insert is the second arg, since the top 64 bits of
+; the result are undefined, and we copy the bottom 64 bits from the
+; second arg
+; CHECK: define <2 x i64> @testInsert64Bits(<2 x i64> %v, <2 x i64> %i)
+define <2 x i64> @testInsert64Bits(<2 x i64> %v, <2 x i64> %i) {
+; CHECK: ret <2 x i64> %i
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 0)
+  ret <2 x i64> %1
+}
+
+; Test the several types of ranges and ordering that exist for two insertqi
+; CHECK: define <2 x i64> @testInsertContainedRange(<2 x i64> %v, <2 x i64> %i)
+define <2 x i64> @testInsertContainedRange(<2 x i64> %v, <2 x i64> %i) {
+; CHECK: %[[RES:.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 0)
+; CHECK: ret <2 x i64> %[[RES]]
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 0)
+  %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 16)
+  ret <2 x i64> %2
+}
+
+; CHECK: define <2 x i64> @testInsertContainedRange_2(<2 x i64> %v, <2 x i64> %i)
+define <2 x i64> @testInsertContainedRange_2(<2 x i64> %v, <2 x i64> %i) {
+; CHECK: %[[RES:.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 0)
+; CHECK: ret <2 x i64> %[[RES]]
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 16)
+  %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 32, i8 0)
+  ret <2 x i64> %2
+}
+
+; CHECK: define <2 x i64> @testInsertOverlappingRange(<2 x i64> %v, <2 x i64> %i)
+define <2 x i64> @testInsertOverlappingRange(<2 x i64> %v, <2 x i64> %i) {
+; CHECK: %[[RES:.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 0)
+; CHECK: ret <2 x i64> %[[RES]]
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 0)
+  %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 32, i8 16)
+  ret <2 x i64> %2
+}
+
+; CHECK: define <2 x i64> @testInsertOverlappingRange_2(<2 x i64> %v, <2 x i64> %i)
+define <2 x i64> @testInsertOverlappingRange_2(<2 x i64> %v, <2 x i64> %i) {
+; CHECK: %[[RES:.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 0)
+; CHECK: ret <2 x i64> %[[RES]]
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 16)
+  %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 32, i8 0)
+  ret <2 x i64> %2
+}
+
+; CHECK: define <2 x i64> @testInsertAdjacentRange(<2 x i64> %v, <2 x i64> %i)
+define <2 x i64> @testInsertAdjacentRange(<2 x i64> %v, <2 x i64> %i) {
+; CHECK: %[[RES:.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 0)
+; CHECK: ret <2 x i64> %[[RES]]
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 0)
+  %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 32)
+  ret <2 x i64> %2
+}
+
+; CHECK: define <2 x i64> @testInsertAdjacentRange_2(<2 x i64> %v, <2 x i64> %i)
+define <2 x i64> @testInsertAdjacentRange_2(<2 x i64> %v, <2 x i64> %i) {
+; CHECK: %[[RES:.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 0)
+; CHECK: ret <2 x i64> %[[RES]]
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 32)
+  %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 32, i8 0)
+  ret <2 x i64> %2
+}
+
+; CHECK: define <2 x i64> @testInsertDisjointRange(<2 x i64> %v, <2 x i64> %i)
+define <2 x i64> @testInsertDisjointRange(<2 x i64> %v, <2 x i64> %i) {
+; CHECK: tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 0)
+; CHECK: tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 32)
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 0)
+  %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 32)
+  ret <2 x i64> %2
+}
+
+; CHECK: define <2 x i64> @testInsertDisjointRange_2(<2 x i64> %v, <2 x i64> %i)
+define <2 x i64> @testInsertDisjointRange_2(<2 x i64> %v, <2 x i64> %i) {
+; CHECK:  tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 0)
+; CHECK:  tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 32)
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 0)
+  %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 32)
+  ret <2 x i64> %2
+}
+
+
+; CHECK: declare <2 x i64> @llvm.x86.sse4a.insertqi
+declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind
+
+declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>)
+define <4 x float> @test_vpermilvar_ps(<4 x float> %v) {
+; CHECK-LABEL: @test_vpermilvar_ps(
+; CHECK: shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %a = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> <i32 3, i32 2, i32 1, i32 0>)
+  ret <4 x float> %a
+}
+
+declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>)
+define <8 x float> @test_vpermilvar_ps_256(<8 x float> %v) {
+; CHECK-LABEL: @test_vpermilvar_ps_256(
+; CHECK: shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+  %a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
+  ret <8 x float> %a
+}
+
+declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i32>)
+define <2 x double> @test_vpermilvar_pd(<2 x double> %v) {
+; CHECK-LABEL: @test_vpermilvar_pd(
+; CHECK: shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 0>
+  %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i32> <i32 2, i32 0>)
+  ret <2 x double> %a
+}
+
+declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i32>)
+define <4 x double> @test_vpermilvar_pd_256(<4 x double> %v) {
+; CHECK-LABEL: @test_vpermilvar_pd_256(
+; CHECK: shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+  %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i32> <i32 3, i32 1, i32 2, i32 0>)
+  ret <4 x double> %a
+}
+
+define <4 x float> @test_vpermilvar_ps_zero(<4 x float> %v) {
+; CHECK-LABEL: @test_vpermilvar_ps_zero(
+; CHECK: shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
+  %a = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> zeroinitializer)
+  ret <4 x float> %a
+}
+
+define <8 x float> @test_vpermilvar_ps_256_zero(<8 x float> %v) {
+; CHECK-LABEL: @test_vpermilvar_ps_256_zero(
+; CHECK: shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
+  %a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> zeroinitializer)
+  ret <8 x float> %a
+}
+
+define <2 x double> @test_vpermilvar_pd_zero(<2 x double> %v) {
+; CHECK-LABEL: @test_vpermilvar_pd_zero(
+; CHECK: shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer
+  %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i32> zeroinitializer)
+  ret <2 x double> %a
+}
+
+define <4 x double> @test_vpermilvar_pd_256_zero(<4 x double> %v) {
+; CHECK-LABEL: @test_vpermilvar_pd_256_zero(
+; CHECK: shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
+  %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i32> zeroinitializer)
+  ret <4 x double> %a
+}
+
+define <2 x i64> @test_sse2_1() nounwind readnone uwtable {
+  %S = bitcast i32 1 to i32
+  %1 = zext i32 %S to i64
+  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+  %3 = insertelement <2 x i64> %2, i64 0, i32 1
+  %4 = bitcast <2 x i64> %3 to <8 x i16>
+  %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4)
+  %6 = bitcast <8 x i16> %5 to <4 x i32>
+  %7 = bitcast <2 x i64> %3 to <4 x i32>
+  %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7)
+  %9 = bitcast <4 x i32> %8 to <2 x i64>
+  %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3)
+  %11 = bitcast <2 x i64> %10 to <8 x i16>
+  %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S)
+  %13 = bitcast <8 x i16> %12 to <4 x i32>
+  %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S)
+  %15 = bitcast <4 x i32> %14 to <2 x i64>
+  %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
+  ret <2 x i64> %16
+; CHECK: test_sse2_1
+; CHECK: ret <2 x i64> <i64 72058418680037440, i64 144117112246370624>
+}
+
+define <4 x i64> @test_avx2_1() nounwind readnone uwtable {
+  %S = bitcast i32 1 to i32
+  %1 = zext i32 %S to i64
+  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+  %3 = insertelement <2 x i64> %2, i64 0, i32 1
+  %4 = bitcast <2 x i64> %3 to <8 x i16>
+  %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4)
+  %6 = bitcast <16 x i16> %5 to <8 x i32>
+  %7 = bitcast <2 x i64> %3 to <4 x i32>
+  %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7)
+  %9 = bitcast <8 x i32> %8 to <4 x i64>
+  %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3)
+  %11 = bitcast <4 x i64> %10 to <16 x i16>
+  %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S)
+  %13 = bitcast <16 x i16> %12 to <8 x i32>
+  %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S)
+  %15 = bitcast <8 x i32> %14 to <4 x i64>
+  %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
+  ret <4 x i64> %16
+; CHECK: test_avx2_1
+; CHECK: ret <4 x i64> <i64 64, i64 128, i64 192, i64 256>
+}
+
+define <2 x i64> @test_sse2_0() nounwind readnone uwtable {
+  %S = bitcast i32 128 to i32
+  %1 = zext i32 %S to i64
+  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+  %3 = insertelement <2 x i64> %2, i64 0, i32 1
+  %4 = bitcast <2 x i64> %3 to <8 x i16>
+  %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4)
+  %6 = bitcast <8 x i16> %5 to <4 x i32>
+  %7 = bitcast <2 x i64> %3 to <4 x i32>
+  %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7)
+  %9 = bitcast <4 x i32> %8 to <2 x i64>
+  %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3)
+  %11 = bitcast <2 x i64> %10 to <8 x i16>
+  %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S)
+  %13 = bitcast <8 x i16> %12 to <4 x i32>
+  %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S)
+  %15 = bitcast <4 x i32> %14 to <2 x i64>
+  %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
+  ret <2 x i64> %16
+; CHECK: test_sse2_0
+; CHECK: ret <2 x i64> zeroinitializer
+}
+
+define <4 x i64> @test_avx2_0() nounwind readnone uwtable {
+  %S = bitcast i32 128 to i32
+  %1 = zext i32 %S to i64
+  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+  %3 = insertelement <2 x i64> %2, i64 0, i32 1
+  %4 = bitcast <2 x i64> %3 to <8 x i16>
+  %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4)
+  %6 = bitcast <16 x i16> %5 to <8 x i32>
+  %7 = bitcast <2 x i64> %3 to <4 x i32>
+  %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7)
+  %9 = bitcast <8 x i32> %8 to <4 x i64>
+  %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3)
+  %11 = bitcast <4 x i64> %10 to <16 x i16>
+  %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S)
+  %13 = bitcast <16 x i16> %12 to <8 x i32>
+  %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S)
+  %15 = bitcast <8 x i32> %14 to <4 x i64>
+  %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
+  ret <4 x i64> %16
+; CHECK: test_avx2_0
+; CHECK: ret <4 x i64> zeroinitializer
+}
+define <2 x i64> @test_sse2_psrl_1() nounwind readnone uwtable {
+  %S = bitcast i32 1 to i32
+  %1 = zext i32 %S to i64
+  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+  %3 = insertelement <2 x i64> %2, i64 0, i32 1
+  %4 = bitcast <2 x i64> %3 to <8 x i16>
+  %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 16, i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048>, <8 x i16> %4)
+  %6 = bitcast <8 x i16> %5 to <4 x i32>
+  %7 = bitcast <2 x i64> %3 to <4 x i32>
+  %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7)
+  %9 = bitcast <4 x i32> %8 to <2 x i64>
+  %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3)
+  %11 = bitcast <2 x i64> %10 to <8 x i16>
+  %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S)
+  %13 = bitcast <8 x i16> %12 to <4 x i32>
+  %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S)
+  %15 = bitcast <4 x i32> %14 to <2 x i64>
+  %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S)
+  ret <2 x i64> %16
+; CHECK: test_sse2_psrl_1
+; CHECK: ret <2 x i64> <i64 562954248421376, i64 9007267974742020>
+}
+
+define <4 x i64> @test_avx2_psrl_1() nounwind readnone uwtable {
+  %S = bitcast i32 1 to i32
+  %1 = zext i32 %S to i64
+  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+  %3 = insertelement <2 x i64> %2, i64 0, i32 1
+  %4 = bitcast <2 x i64> %3 to <8 x i16>
+  %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4)
+  %6 = bitcast <16 x i16> %5 to <8 x i32>
+  %7 = bitcast <2 x i64> %3 to <4 x i32>
+  %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7)
+  %9 = bitcast <8 x i32> %8 to <4 x i64>
+  %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3)
+  %11 = bitcast <4 x i64> %10 to <16 x i16>
+  %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S)
+  %13 = bitcast <16 x i16> %12 to <8 x i32>
+  %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S)
+  %15 = bitcast <8 x i32> %14 to <4 x i64>
+  %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S)
+  ret <4 x i64> %16
+; CHECK: test_avx2_psrl_1
+; CHECK: ret <4 x i64> <i64 16, i64 32, i64 64, i64 128>
+}
+
+define <2 x i64> @test_sse2_psrl_0() nounwind readnone uwtable {
+  %S = bitcast i32 128 to i32
+  %1 = zext i32 %S to i64
+  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+  %3 = insertelement <2 x i64> %2, i64 0, i32 1
+  %4 = bitcast <2 x i64> %3 to <8 x i16>
+  %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048, i16 4096>, <8 x i16> %4)
+  %6 = bitcast <8 x i16> %5 to <4 x i32>
+  %7 = bitcast <2 x i64> %3 to <4 x i32>
+  %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7)
+  %9 = bitcast <4 x i32> %8 to <2 x i64>
+  %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3)
+  %11 = bitcast <2 x i64> %10 to <8 x i16>
+  %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S)
+  %13 = bitcast <8 x i16> %12 to <4 x i32>
+  %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S)
+  %15 = bitcast <4 x i32> %14 to <2 x i64>
+  %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S)
+  ret <2 x i64> %16
+; CHECK: test_sse2_psrl_0
+; CHECK: ret <2 x i64> zeroinitializer
+}
+
+define <4 x i64> @test_avx2_psrl_0() nounwind readnone uwtable {
+  %S = bitcast i32 128 to i32
+  %1 = zext i32 %S to i64
+  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+  %3 = insertelement <2 x i64> %2, i64 0, i32 1
+  %4 = bitcast <2 x i64> %3 to <8 x i16>
+  %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4)
+  %6 = bitcast <16 x i16> %5 to <8 x i32>
+  %7 = bitcast <2 x i64> %3 to <4 x i32>
+  %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7)
+  %9 = bitcast <8 x i32> %8 to <4 x i64>
+  %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3)
+  %11 = bitcast <4 x i64> %10 to <16 x i16>
+  %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S)
+  %13 = bitcast <16 x i16> %12 to <8 x i32>
+  %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S)
+  %15 = bitcast <8 x i32> %14 to <4 x i64>
+  %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S)
+  ret <4 x i64> %16
+; CHECK: test_avx2_psrl_0
+; CHECK: ret <4 x i64> zeroinitializer
+}
+
+declare <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64>, i32) #1
+declare <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32>, i32) #1
+declare <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16>, i32) #1
+declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) #1
+declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) #1
+declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) #1
+declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) #1
+declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) #1
+declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) #1
+declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) #1
+declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) #1
+declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) #1
+declare <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64>, i32) #1
+declare <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32>, i32) #1
+declare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) #1
+declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) #1
+declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) #1
+declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) #1
+declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) #1
+declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) #1
+declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) #1
+declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) #1
+declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) #1
+declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) #1
 
+attributes #1 = { nounwind readnone }
diff --git a/test/Transforms/InstCombine/vec_shuffle.ll b/test/Transforms/InstCombine/vec_shuffle.ll
index a409a91..fc0f8bd 100644
--- a/test/Transforms/InstCombine/vec_shuffle.ll
+++ b/test/Transforms/InstCombine/vec_shuffle.ll
@@ -244,4 +244,164 @@ define <4 x i8> @test16b(i8 %ele) {
   %tmp1 = shl <8 x i8> %tmp0, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
   ret <4 x i8> %tmp2
-}
-\ No newline at end of file
+}
+
+; If composition of two shuffles is identity, shuffles can be removed.
+define <4 x i32> @shuffle_17ident(<4 x i32> %v) nounwind uwtable {
+; CHECK-LABEL: @shuffle_17ident(
+; CHECK-NOT: shufflevector
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> zeroinitializer,
+                           <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+  %shuffle2 = shufflevector <4 x i32> %shuffle, <4 x i32> zeroinitializer,
+                            <4 x i32> <i32 3, i32 0, i32 1, i32 2>
+  ret <4 x i32> %shuffle2
+}
+
+; swizzle can be put after operation
+define <4 x i32> @shuffle_17and(<4 x i32> %v1, <4 x i32> %v2) nounwind uwtable {
+; CHECK-LABEL: @shuffle_17and(
+; CHECK-NOT: shufflevector
+; CHECK: and <4 x i32> %v1, %v2
+; CHECK: shufflevector
+  %t1 = shufflevector <4 x i32> %v1, <4 x i32> zeroinitializer,
+                      <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+  %t2 = shufflevector <4 x i32> %v2, <4 x i32> zeroinitializer,
+                      <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+  %r = and <4 x i32> %t1, %t2
+  ret <4 x i32> %r
+}
+
+define <4 x i32> @shuffle_17add(<4 x i32> %v1, <4 x i32> %v2) nounwind uwtable {
+; CHECK-LABEL: @shuffle_17add(
+; CHECK-NOT: shufflevector
+; CHECK: add <4 x i32> %v1, %v2
+; CHECK: shufflevector
+  %t1 = shufflevector <4 x i32> %v1, <4 x i32> zeroinitializer,
+                      <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+  %t2 = shufflevector <4 x i32> %v2, <4 x i32> zeroinitializer,
+                      <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+  %r = add <4 x i32> %t1, %t2
+  ret <4 x i32> %r
+}
+
+define <4 x i32> @shuffle_17addnsw(<4 x i32> %v1, <4 x i32> %v2) nounwind uwtable {
+; CHECK-LABEL: @shuffle_17addnsw(
+; CHECK-NOT: shufflevector
+; CHECK: add nsw <4 x i32> %v1, %v2
+; CHECK: shufflevector
+  %t1 = shufflevector <4 x i32> %v1, <4 x i32> zeroinitializer,
+                      <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+  %t2 = shufflevector <4 x i32> %v2, <4 x i32> zeroinitializer,
+                      <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+  %r = add nsw <4 x i32> %t1, %t2
+  ret <4 x i32> %r
+}
+
+define <4 x i32> @shuffle_17addnuw(<4 x i32> %v1, <4 x i32> %v2) nounwind uwtable {
+; CHECK-LABEL: @shuffle_17addnuw(
+; CHECK-NOT: shufflevector
+; CHECK: add nuw <4 x i32> %v1, %v2
+; CHECK: shufflevector
+  %t1 = shufflevector <4 x i32> %v1, <4 x i32> zeroinitializer,
+                      <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+  %t2 = shufflevector <4 x i32> %v2, <4 x i32> zeroinitializer,
+                      <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+  %r = add nuw <4 x i32> %t1, %t2
+  ret <4 x i32> %r
+}
+
+define <4 x float> @shuffle_17fsub(<4 x float> %v1, <4 x float> %v2) nounwind uwtable {
+; CHECK-LABEL: @shuffle_17fsub(
+; CHECK-NOT: shufflevector
+; CHECK: fsub <4 x float> %v1, %v2
+; CHECK: shufflevector
+  %t1 = shufflevector <4 x float> %v1, <4 x float> zeroinitializer,
+                      <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+  %t2 = shufflevector <4 x float> %v2, <4 x float> zeroinitializer,
+                      <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+  %r = fsub <4 x float> %t1, %t2
+  ret <4 x float> %r
+}
+
+define <4 x i32> @shuffle_17addconst(<4 x i32> %v1, <4 x i32> %v2) {
+; CHECK-LABEL: @shuffle_17addconst(
+; CHECK-NOT: shufflevector
+; CHECK: [[VAR1:%[a-zA-Z0-9.]+]] = add <4 x i32> %v1, <i32 4, i32 1, i32 2, i32 3>
+; CHECK: [[VAR2:%[a-zA-Z0-9.]+]] = shufflevector <4 x i32> [[VAR1]], <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+; CHECK: ret <4 x i32> [[VAR2]]
+  %t1 = shufflevector <4 x i32> %v1, <4 x i32> zeroinitializer,
+                      <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+  %r = add <4 x i32> %t1, <i32 1, i32 2, i32 3, i32 4>
+  ret <4 x i32> %r
+}
+
+define <4 x i32> @shuffle_17add2(<4 x i32> %v) {
+; CHECK-LABEL: @shuffle_17add2(
+; CHECK-NOT: shufflevector
+; CHECK: [[VAR:%[a-zA-Z0-9.]+]] = shl <4 x i32> %v, <i32 1, i32 1, i32 1, i32 1>
+; CHECK: ret <4 x i32> [[VAR]]
+  %t1 = shufflevector <4 x i32> %v, <4 x i32> zeroinitializer,
+                      <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %t2 = add <4 x i32> %t1, %t1
+  %r = shufflevector <4 x i32> %t2, <4 x i32> zeroinitializer,
+                     <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x i32> %r
+}
+
+define <4 x i32> @shuffle_17mulsplat(<4 x i32> %v) {
+; CHECK-LABEL: @shuffle_17mulsplat(
+; CHECK-NOT: shufflevector
+; CHECK: [[VAR1:%[a-zA-Z0-9.]+]] = mul <4 x i32> %v, %v
+; CHECK: [[VAR2:%[a-zA-Z0-9.]+]] = shufflevector <4 x i32> [[VAR1]], <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK: ret <4 x i32> [[VAR2]]
+  %s1 = shufflevector <4 x i32> %v,
+                      <4 x i32> zeroinitializer,
+                      <4 x i32> zeroinitializer
+  %m1 = mul <4 x i32> %s1, %s1
+  %s2 = shufflevector <4 x i32> %m1,
+                      <4 x i32> zeroinitializer,
+                      <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x i32> %s2
+}
+
+; Do not reorder shuffle and binop if LHS of shuffles are of different size
+define <2 x i32> @pr19717(<4 x i32> %in0, <2 x i32> %in1) {
+; CHECK-LABEL: @pr19717(
+; CHECK: shufflevector
+; CHECK: shufflevector
+; CHECK: mul
+  %shuffle = shufflevector <4 x i32> %in0, <4 x i32> %in0, <2 x i32> zeroinitializer
+  %shuffle4 = shufflevector <2 x i32> %in1, <2 x i32> %in1, <2 x i32> zeroinitializer
+  %mul = mul <2 x i32> %shuffle, %shuffle4
+  ret <2 x i32> %mul
+}
+
+define <4 x i16> @pr19717a(<8 x i16> %in0, <8 x i16> %in1) {
+; CHECK-LABEL: @pr19717a(
+; CHECK: [[VAR1:%[a-zA-Z0-9.]+]] = mul <8 x i16> %in0, %in1
+; CHECK: [[VAR2:%[a-zA-Z0-9.]+]] = shufflevector <8 x i16> [[VAR1]], <8 x i16> undef, <4 x i32> <i32 5, i32 5, i32 5, i32 5>
+; CHECK: ret <4 x i16> [[VAR2]]
+  %shuffle = shufflevector <8 x i16> %in0, <8 x i16> %in0, <4 x i32> <i32 5, i32 5, i32 5, i32 5>
+  %shuffle1 = shufflevector <8 x i16> %in1, <8 x i16> %in1, <4 x i32> <i32 5, i32 5, i32 5, i32 5>
+  %mul = mul <4 x i16> %shuffle, %shuffle1
+  ret <4 x i16> %mul
+}
+
+define <8 x i8> @pr19730(<16 x i8> %in0) {
+; CHECK-LABEL: @pr19730(
+; CHECK: shufflevector
+  %shuffle = shufflevector <16 x i8> %in0, <16 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  %shuffle1 = shufflevector <8 x i8> %shuffle, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  ret <8 x i8> %shuffle1
+}
+
+define i32 @pr19737(<4 x i32> %in0) {
+; CHECK-LABEL: @pr19737(
+; CHECK: [[VAR:%[a-zA-Z0-9.]+]] = extractelement <4 x i32> %in0, i32 0
+; CHECK: ret i32 [[VAR]]
+  %shuffle.i = shufflevector <4 x i32> zeroinitializer, <4 x i32> %in0, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  %neg.i = xor <4 x i32> %shuffle.i, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %and.i = and <4 x i32> %in0, %neg.i
+  %rv = extractelement <4 x i32> %and.i, i32 0
+  ret i32 %rv
+}
diff --git a/test/Transforms/InstSimplify/compare.ll b/test/Transforms/InstSimplify/compare.ll
index ee6be04..105e244 100644
--- a/test/Transforms/InstSimplify/compare.ll
+++ b/test/Transforms/InstSimplify/compare.ll
@@ -757,3 +757,129 @@ define <4 x i8> @vectorselectfold2(<4 x i8> %a, <4 x i8> %b) {
 ; CHECK-LABEL: @vectorselectfold
 ; CHECK-NEXT: ret <4 x i8> %a
 }
+
+define i1 @compare_always_true_slt(i16 %a) {
+  %1 = zext i16 %a to i32
+  %2 = sub nsw i32 0, %1
+  %3 = icmp slt i32 %2, 1
+  ret i1 %3
+
+; CHECK-LABEL: @compare_always_true_slt
+; CHECK-NEXT: ret i1 true
+}
+
+define i1 @compare_always_true_sle(i16 %a) {
+  %1 = zext i16 %a to i32
+  %2 = sub nsw i32 0, %1
+  %3 = icmp sle i32 %2, 0
+  ret i1 %3
+
+; CHECK-LABEL: @compare_always_true_sle
+; CHECK-NEXT: ret i1 true
+}
+
+define i1 @compare_always_false_sgt(i16 %a) {
+  %1 = zext i16 %a to i32
+  %2 = sub nsw i32 0, %1
+  %3 = icmp sgt i32 %2, 0
+  ret i1 %3
+
+; CHECK-LABEL: @compare_always_false_sgt
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @compare_always_false_sge(i16 %a) {
+  %1 = zext i16 %a to i32
+  %2 = sub nsw i32 0, %1
+  %3 = icmp sge i32 %2, 1
+  ret i1 %3
+
+; CHECK-LABEL: @compare_always_false_sge
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @compare_always_false_eq(i16 %a) {
+  %1 = zext i16 %a to i32
+  %2 = sub nsw i32 0, %1
+  %3 = icmp eq i32 %2, 1
+  ret i1 %3
+
+; CHECK-LABEL: @compare_always_false_eq
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @compare_always_false_ne(i16 %a) {
+  %1 = zext i16 %a to i32
+  %2 = sub nsw i32 0, %1
+  %3 = icmp ne i32 %2, 1
+  ret i1 %3
+
+; CHECK-LABEL: @compare_always_false_ne
+; CHECK-NEXT: ret i1 true
+}
+
+define i1 @compare_dividend(i32 %a) {
+  %div = sdiv i32 2, %a
+  %cmp = icmp eq i32 %div, 3
+  ret i1 %cmp
+
+; CHECK-LABEL: @compare_dividend
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @lshr_ugt_false(i32 %a) {
+  %shr = lshr i32 1, %a
+  %cmp = icmp ugt i32 %shr, 1
+  ret i1 %cmp
+; CHECK-LABEL: @lshr_ugt_false
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @exact_lshr_ugt_false(i32 %a) {
+  %shr = lshr exact i32 30, %a
+  %cmp = icmp ult i32 %shr, 15
+  ret i1 %cmp
+; CHECK-LABEL: @exact_lshr_ugt_false
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @lshr_sgt_false(i32 %a) {
+  %shr = lshr i32 1, %a
+  %cmp = icmp sgt i32 %shr, 1
+  ret i1 %cmp
+; CHECK-LABEL: @lshr_sgt_false
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @ashr_sgt_false(i32 %a) {
+  %shr = ashr i32 -30, %a
+  %cmp = icmp sgt i32 %shr, -1
+  ret i1 %cmp
+; CHECK-LABEL: @ashr_sgt_false
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @exact_ashr_sgt_false(i32 %a) {
+  %shr = ashr exact i32 -30, %a
+  %cmp = icmp sgt i32 %shr, -15
+  ret i1 %cmp
+; CHECK-LABEL: @exact_ashr_sgt_false
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @nonnull_arg(i32* nonnull %i) {
+  %cmp = icmp eq i32* %i, null
+  ret i1 %cmp
+; CHECK-LABEL: @nonnull_arg
+; CHECK: ret i1 false
+}
+
+declare nonnull i32* @returns_nonnull_helper()
+define i1 @returns_nonnull() {
+  %call = call nonnull i32* @returns_nonnull_helper()
+  %cmp = icmp eq i32* %call, null
+  ret i1 %cmp
+; CHECK-LABEL: @returns_nonnull
+; CHECK: ret i1 false
+}
+
diff --git a/test/Transforms/InstSimplify/dead-code-removal.ll b/test/Transforms/InstSimplify/dead-code-removal.ll
new file mode 100644
index 0000000..e181f3b
--- /dev/null
+++ b/test/Transforms/InstSimplify/dead-code-removal.ll
@@ -0,0 +1,15 @@
+; RUN: opt -instsimplify -S < %s | FileCheck %s
+
+define void @foo() nounwind {
+  br i1 undef, label %1, label %4
+
+; <label>:1                                       ; preds = %1, %0
+; CHECK-NOT: phi
+; CHECK-NOT: sub
+  %2 = phi i32 [ %3, %1 ], [ undef, %0 ]
+  %3 = sub i32 0, undef
+  br label %1
+
+; <label>:4                                       ; preds = %0
+  ret void
+}
diff --git a/test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll b/test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll
index 47cf3f0..16bfe2a 100644
--- a/test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll
+++ b/test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll
@@ -1,10 +1,17 @@
-; RUN: opt < %s -internalize -internalize-public-api-list main -S | grep internal | count 3
+; RUN: opt < %s -internalize -internalize-public-api-list main -S | FileCheck %s
 
 @A = global i32 0
+; CHECK: @A = internal global i32 0
+
 @B = alias i32* @A
-@C = alias i32* @B
+; CHECK: @B = alias internal i32* @A
+
+@C = alias i32* @A
+; CHECK: @C = alias internal i32* @A
 
 define i32 @main() {
 	%tmp = load i32* @C
 	ret i32 %tmp
 }
+
+; CHECK: define i32 @main() {
diff --git a/test/Transforms/Internalize/local-visibility.ll b/test/Transforms/Internalize/local-visibility.ll
new file mode 100644
index 0000000..c24d4b7
--- /dev/null
+++ b/test/Transforms/Internalize/local-visibility.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -internalize -S | FileCheck %s
+; Internalized symbols should have default visibility.
+
+; CHECK: @global = global i32 0
+@global = global i32 0
+@llvm.used = appending global [1 x i32*] [i32* @global]
+
+; CHECK: @hidden.variable = internal global i32 0
+@hidden.variable = hidden global i32 0
+; CHECK: @protected.variable = internal global i32 0
+@protected.variable = protected global i32 0
+
+; CHECK: @hidden.alias = alias internal i32* @global
+@hidden.alias = hidden alias i32* @global
+; CHECK: @protected.alias = alias internal i32* @global
+@protected.alias = protected alias i32* @global
+
+; CHECK: define internal void @hidden.function() {
+define hidden void @hidden.function() {
+  ret void
+}
+; CHECK: define internal void @protected.function() {
+define protected void @protected.function() {
+  ret void
+}
diff --git a/test/Transforms/JumpThreading/phi-eq.ll b/test/Transforms/JumpThreading/phi-eq.ll
index 40d3c7e..e05d5ee 100644
--- a/test/Transforms/JumpThreading/phi-eq.ll
+++ b/test/Transforms/JumpThreading/phi-eq.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -jump-threading | llvm-dis | FileCheck %s
+; RUN: opt < %s -jump-threading -S | FileCheck %s
 ; Test whether two consecutive switches with identical structures assign the
 ; proper value to the proper variable.  This is really testing 
 ; Instruction::isIdenticalToWhenDefined, as previously that function was 
diff --git a/test/Transforms/LoopSimplify/2007-10-28-InvokeCrash.ll b/test/Transforms/LoopSimplify/2007-10-28-InvokeCrash.ll
index e91d141..0534a0b 100644
--- a/test/Transforms/LoopSimplify/2007-10-28-InvokeCrash.ll
+++ b/test/Transforms/LoopSimplify/2007-10-28-InvokeCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -loop-simplify -disable-output
+; RUN: opt < %s -loop-simplify -disable-output
 ; PR1752
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-s0:0:64-f80:32:32"
 target triple = "i686-pc-mingw32"
diff --git a/test/Transforms/LoopStrengthReduce/AArch64/lit.local.cfg b/test/Transforms/LoopStrengthReduce/AArch64/lit.local.cfg
new file mode 100644
index 0000000..6642d28
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/AArch64/lit.local.cfg
@@ -0,0 +1,5 @@
+config.suffixes = ['.ll']
+
+targets = set(config.root.targets_to_build.split())
+if not 'AArch64' in targets:
+    config.unsupported = True
diff --git a/test/Transforms/LoopStrengthReduce/ARM64/lsr-memcpy.ll b/test/Transforms/LoopStrengthReduce/AArch64/lsr-memcpy.ll
index 9a175ad..9a175ad 100644
--- a/test/Transforms/LoopStrengthReduce/ARM64/lsr-memcpy.ll
+++ b/test/Transforms/LoopStrengthReduce/AArch64/lsr-memcpy.ll
diff --git a/test/Transforms/LoopStrengthReduce/ARM64/lsr-memset.ll b/test/Transforms/LoopStrengthReduce/AArch64/lsr-memset.ll
index 10b2c3a..10b2c3a 100644
--- a/test/Transforms/LoopStrengthReduce/ARM64/lsr-memset.ll
+++ b/test/Transforms/LoopStrengthReduce/AArch64/lsr-memset.ll
diff --git a/test/Transforms/LoopStrengthReduce/AArch64/req-regs.ll b/test/Transforms/LoopStrengthReduce/AArch64/req-regs.ll
new file mode 100644
index 0000000..217896e
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/AArch64/req-regs.ll
@@ -0,0 +1,70 @@
+; RUN: llc -mcpu=cyclone -debug-only=loop-reduce < %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+; LSR used to fail here due to a bug in the ReqRegs test.
+; CHECK: The chosen solution requires
+; CHECK-NOT: No Satisfactory Solution
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-ios"
+
+define void @do_integer_add(i64 %iterations, i8* nocapture readonly %cookie) {
+entry:
+  %N = bitcast i8* %cookie to i32*
+  %0 = load i32* %N, align 4
+  %add = add nsw i32 %0, 57
+  %cmp56 = icmp eq i64 %iterations, 0
+  br i1 %cmp56, label %while.end, label %for.cond.preheader.preheader
+
+for.cond.preheader.preheader:                     ; preds = %entry
+  br label %for.cond.preheader
+
+while.cond.loopexit:                              ; preds = %for.body
+  %add21.lcssa = phi i32 [ %add21, %for.body ]
+  %dec58 = add i64 %dec58.in, -1
+  %cmp = icmp eq i64 %dec58, 0
+  br i1 %cmp, label %while.end.loopexit, label %for.cond.preheader
+
+for.cond.preheader:                               ; preds = %for.cond.preheader.preheader, %while.cond.loopexit
+  %dec58.in = phi i64 [ %dec58, %while.cond.loopexit ], [ %iterations, %for.cond.preheader.preheader ]
+  %a.057 = phi i32 [ %add21.lcssa, %while.cond.loopexit ], [ %add, %for.cond.preheader.preheader ]
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.cond.preheader
+  %a.154 = phi i32 [ %a.057, %for.cond.preheader ], [ %add21, %for.body ]
+  %i.053 = phi i32 [ 1, %for.cond.preheader ], [ %inc, %for.body ]
+  %inc = add nsw i32 %i.053, 1
+  %add2 = shl i32 %a.154, 1
+  %add3 = add nsw i32 %add2, %i.053
+  %add4 = shl i32 %add3, 1
+  %add5 = add nsw i32 %add4, %i.053
+  %add6 = shl i32 %add5, 1
+  %add7 = add nsw i32 %add6, %i.053
+  %add8 = shl i32 %add7, 1
+  %add9 = add nsw i32 %add8, %i.053
+  %add10 = shl i32 %add9, 1
+  %add11 = add nsw i32 %add10, %i.053
+  %add12 = shl i32 %add11, 1
+  %add13 = add nsw i32 %add12, %i.053
+  %add14 = shl i32 %add13, 1
+  %add15 = add nsw i32 %add14, %i.053
+  %add16 = shl i32 %add15, 1
+  %add17 = add nsw i32 %add16, %i.053
+  %add18 = shl i32 %add17, 1
+  %add19 = add nsw i32 %add18, %i.053
+  %add20 = shl i32 %add19, 1
+  %add21 = add nsw i32 %add20, %i.053
+  %exitcond = icmp eq i32 %inc, 1001
+  br i1 %exitcond, label %while.cond.loopexit, label %for.body
+
+while.end.loopexit:                               ; preds = %while.cond.loopexit
+  %add21.lcssa.lcssa = phi i32 [ %add21.lcssa, %while.cond.loopexit ]
+  br label %while.end
+
+while.end:                                        ; preds = %while.end.loopexit, %entry
+  %a.0.lcssa = phi i32 [ %add, %entry ], [ %add21.lcssa.lcssa, %while.end.loopexit ]
+  tail call void @use_int(i32 %a.0.lcssa)
+  ret void
+}
+
+declare void @use_int(i32)
diff --git a/test/Transforms/LoopStrengthReduce/ARM/2012-06-15-lsr-noaddrmode.ll b/test/Transforms/LoopStrengthReduce/ARM/2012-06-15-lsr-noaddrmode.ll
index 5d728b5..756ea82 100644
--- a/test/Transforms/LoopStrengthReduce/ARM/2012-06-15-lsr-noaddrmode.ll
+++ b/test/Transforms/LoopStrengthReduce/ARM/2012-06-15-lsr-noaddrmode.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O3 -march=thumb -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc -O3 -mtriple=thumb-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s
 ;
 ; LSR should only check for valid address modes when the IV user is a
 ; memory address.
diff --git a/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll b/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
index ab7f20f..f4edf09 100644
--- a/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
+++ b/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -O3 -march=thumb -mcpu=cortex-a9 | FileCheck %s -check-prefix=A9
+; RUN: llc -O3 -mtriple=thumb-eabi -mcpu=cortex-a9 %s -o - | FileCheck %s -check-prefix=A9
+; RUN: llc -O3 -mtriple=thumb-eabi -mcpu=cortex-a9 -addr-sink-using-gep=1 %s -o - | FileCheck %s -check-prefix=A9
 
 ; @simple is the most basic chain of address induction variables. Chaining
 ; saves at least one register and avoids complex addressing and setup
diff --git a/test/Transforms/LoopStrengthReduce/ARM64/lit.local.cfg b/test/Transforms/LoopStrengthReduce/ARM64/lit.local.cfg
deleted file mode 100644
index a499579..0000000
--- a/test/Transforms/LoopStrengthReduce/ARM64/lit.local.cfg
+++ /dev/null
@@ -1,5 +0,0 @@
-config.suffixes = ['.ll']
-
-targets = set(config.root.targets_to_build.split())
-if not 'ARM64' in targets:
-    config.unsupported = True
diff --git a/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll b/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
index e42b67f..937791d 100644
--- a/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
@@ -1,5 +1,7 @@
 ; RUN: llc < %s -O3 -march=x86-64 -mcpu=core2 | FileCheck %s -check-prefix=X64
 ; RUN: llc < %s -O3 -march=x86 -mcpu=core2 | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -O3 -march=x86-64 -mcpu=core2 -addr-sink-using-gep=1 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -O3 -march=x86 -mcpu=core2 -addr-sink-using-gep=1 | FileCheck %s -check-prefix=X32
 
 ; @simple is the most basic chain of address induction variables. Chaining
 ; saves at least one register and avoids complex addressing and setup
diff --git a/test/Transforms/LoopStrengthReduce/dont_insert_redundant_ops.ll b/test/Transforms/LoopStrengthReduce/dont_insert_redundant_ops.ll
index 90051e3..16bb508 100644
--- a/test/Transforms/LoopStrengthReduce/dont_insert_redundant_ops.ll
+++ b/test/Transforms/LoopStrengthReduce/dont_insert_redundant_ops.ll
@@ -1,5 +1,9 @@
 ; Check that this test makes INDVAR and related stuff dead.
-; RUN: opt < %s -loop-reduce -S | grep phi | count 2
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+
+; CHECK: phi
+; CHECK: phi
+; CHECK-NOT: phi
 
 declare i1 @pred()
 
diff --git a/test/Transforms/LoopUnroll/X86/partial.ll b/test/Transforms/LoopUnroll/X86/partial.ll
index 15867cb..a2b04c7 100644
--- a/test/Transforms/LoopUnroll/X86/partial.ll
+++ b/test/Transforms/LoopUnroll/X86/partial.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -S -loop-unroll -mcpu=nehalem -x86-use-partial-unrolling=1 | FileCheck %s
-; RUN: opt < %s -S -loop-unroll -mcpu=core -x86-use-partial-unrolling=1 | FileCheck -check-prefix=CHECK-NOUNRL %s
+; RUN: opt < %s -S -loop-unroll -mcpu=nehalem | FileCheck %s
+; RUN: opt < %s -S -loop-unroll -mcpu=core -unroll-runtime=0 | FileCheck -check-prefix=CHECK-NOUNRL %s
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
@@ -76,5 +76,52 @@ for.end:                                          ; preds = %vector.body
   ret void
 }
 
+define zeroext i16 @test1(i16* nocapture readonly %arr, i32 %n) #0 {
+entry:
+  %cmp25 = icmp eq i32 %n, 0
+  br i1 %cmp25, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %reduction.026 = phi i16 [ %add14, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i16* %arr, i64 %indvars.iv
+  %0 = load i16* %arrayidx, align 2
+  %add = add i16 %0, %reduction.026
+  %sext = mul i64 %indvars.iv, 12884901888
+  %idxprom3 = ashr exact i64 %sext, 32
+  %arrayidx4 = getelementptr inbounds i16* %arr, i64 %idxprom3
+  %1 = load i16* %arrayidx4, align 2
+  %add7 = add i16 %add, %1
+  %sext28 = mul i64 %indvars.iv, 21474836480
+  %idxprom10 = ashr exact i64 %sext28, 32
+  %arrayidx11 = getelementptr inbounds i16* %arr, i64 %idxprom10
+  %2 = load i16* %arrayidx11, align 2
+  %add14 = add i16 %add7, %2
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %reduction.0.lcssa = phi i16 [ 0, %entry ], [ %add14, %for.body ]
+  ret i16 %reduction.0.lcssa
+
+; This loop is too large to be partially unrolled (size=16)
+
+; CHECK-LABEL: @test1
+; CHECK: br
+; CHECK: br
+; CHECK: br
+; CHECK: br
+; CHECK-NOT: br
+
+; CHECK-NOUNRL-LABEL: @test1
+; CHECK-NOUNRL: br
+; CHECK-NOUNRL: br
+; CHECK-NOUNRL: br
+; CHECK-NOUNRL: br
+; CHECK-NOUNRL-NOT: br
+}
+
 attributes #0 = { nounwind uwtable }
 
diff --git a/test/Transforms/LoopUnroll/loop-remarks.ll b/test/Transforms/LoopUnroll/loop-remarks.ll
new file mode 100644
index 0000000..ff3ac17
--- /dev/null
+++ b/test/Transforms/LoopUnroll/loop-remarks.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -S -loop-unroll -pass-remarks=loop-unroll -unroll-count=16 2>&1 | FileCheck -check-prefix=COMPLETE-UNROLL %s
+; RUN: opt < %s -S -loop-unroll -pass-remarks=loop-unroll -unroll-count=4 2>&1 | FileCheck -check-prefix=PARTIAL-UNROLL %s
+
+; COMPLETE-UNROLL: remark: {{.*}}: completely unrolled loop with 16 iterations
+; PARTIAL-UNROLL: remark: {{.*}}: unrolled loop by a factor of 4
+
+define i32 @sum() {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %s.06 = phi i32 [ 0, %entry ], [ %add1, %for.body ]
+  %i.05 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %add = add nsw i32 %i.05, 4
+  %call = tail call i32 @baz(i32 %add) #2
+  %add1 = add nsw i32 %call, %s.06
+  %inc = add nsw i32 %i.05, 1
+  %exitcond = icmp eq i32 %inc, 16
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i32 %add1
+}
+
+declare i32 @baz(i32)
diff --git a/test/Transforms/LoopVectorize/AArch64/aarch64-unroll.ll b/test/Transforms/LoopVectorize/AArch64/aarch64-unroll.ll
new file mode 100644
index 0000000..9962c3d
--- /dev/null
+++ b/test/Transforms/LoopVectorize/AArch64/aarch64-unroll.ll
@@ -0,0 +1,42 @@
+; RUN: opt < %s -loop-vectorize -mtriple=aarch64-none-linux-gnu -mattr=+neon -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+
+; Function Attrs: nounwind
+define i32* @array_add(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32* %c, i32 %size) {
+;CHECK-LABEL: array_add
+;CHECK: load <4 x i32>
+;CHECK: load <4 x i32>
+;CHECK: load <4 x i32>
+;CHECK: load <4 x i32>
+;CHECK: add nsw <4 x i32>
+;CHECK: add nsw <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret
+entry:
+  %cmp10 = icmp sgt i32 %size, 0
+  br i1 %cmp10, label %for.body.preheader, label %for.end
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32* %b, i64 %indvars.iv
+  %1 = load i32* %arrayidx2, align 4
+  %add = add nsw i32 %1, %0
+  %arrayidx4 = getelementptr inbounds i32* %c, i64 %indvars.iv
+  store i32 %add, i32* %arrayidx4, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %size
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  ret i32* %c
+}
diff --git a/test/Transforms/LoopVectorize/AArch64/arm64-unroll.ll b/test/Transforms/LoopVectorize/AArch64/arm64-unroll.ll
new file mode 100644
index 0000000..f8eb3ed
--- /dev/null
+++ b/test/Transforms/LoopVectorize/AArch64/arm64-unroll.ll
@@ -0,0 +1,42 @@
+; RUN: opt < %s -loop-vectorize -mtriple=arm64-none-linux-gnu -mattr=+neon -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+
+; Function Attrs: nounwind
+define i32* @array_add(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32* %c, i32 %size) {
+;CHECK-LABEL: array_add
+;CHECK: load <4 x i32>
+;CHECK: load <4 x i32>
+;CHECK: load <4 x i32>
+;CHECK: load <4 x i32>
+;CHECK: add nsw <4 x i32>
+;CHECK: add nsw <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret
+entry:
+  %cmp10 = icmp sgt i32 %size, 0
+  br i1 %cmp10, label %for.body.preheader, label %for.end
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32* %b, i64 %indvars.iv
+  %1 = load i32* %arrayidx2, align 4
+  %add = add nsw i32 %1, %0
+  %arrayidx4 = getelementptr inbounds i32* %c, i64 %indvars.iv
+  store i32 %add, i32* %arrayidx4, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %size
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  ret i32* %c
+}
diff --git a/test/Transforms/LoopVectorize/ARM64/gather-cost.ll b/test/Transforms/LoopVectorize/AArch64/gather-cost.ll
index bb28538..bb28538 100644
--- a/test/Transforms/LoopVectorize/ARM64/gather-cost.ll
+++ b/test/Transforms/LoopVectorize/AArch64/gather-cost.ll
diff --git a/test/Transforms/LoopVectorize/AArch64/lit.local.cfg b/test/Transforms/LoopVectorize/AArch64/lit.local.cfg
new file mode 100644
index 0000000..f1d1f88
--- /dev/null
+++ b/test/Transforms/LoopVectorize/AArch64/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll']
+
+targets = set(config.root.targets_to_build.split())
+if not 'ARM64' in targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/LoopVectorize/ARM64/lit.local.cfg b/test/Transforms/LoopVectorize/ARM64/lit.local.cfg
deleted file mode 100644
index de86e54..0000000
--- a/test/Transforms/LoopVectorize/ARM64/lit.local.cfg
+++ /dev/null
@@ -1,6 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
-targets = set(config.root.targets_to_build.split())
-if not 'ARM64' in targets:
-    config.unsupported = True
-
diff --git a/test/Transforms/LoopVectorize/X86/metadata-enable.ll b/test/Transforms/LoopVectorize/X86/metadata-enable.ll
index 224823b..9e4e989 100644
--- a/test/Transforms/LoopVectorize/X86/metadata-enable.ll
+++ b/test/Transforms/LoopVectorize/X86/metadata-enable.ll
@@ -1,13 +1,13 @@
-; RUN: opt < %s -mcpu=corei7 -O1 -S -x86-use-partial-unrolling=0 | FileCheck %s --check-prefix=O1
-; RUN: opt < %s -mcpu=corei7 -O2 -S -x86-use-partial-unrolling=0 | FileCheck %s --check-prefix=O2
-; RUN: opt < %s -mcpu=corei7 -O3 -S -x86-use-partial-unrolling=0 | FileCheck %s --check-prefix=O3
-; RUN: opt < %s -mcpu=corei7 -Os -S -x86-use-partial-unrolling=0 | FileCheck %s --check-prefix=Os
-; RUN: opt < %s -mcpu=corei7 -Oz -S -x86-use-partial-unrolling=0 | FileCheck %s --check-prefix=Oz
-; RUN: opt < %s -mcpu=corei7 -O1 -vectorize-loops -S -x86-use-partial-unrolling=0 | FileCheck %s --check-prefix=O1VEC
-; RUN: opt < %s -mcpu=corei7 -Oz -vectorize-loops -S -x86-use-partial-unrolling=0 | FileCheck %s --check-prefix=OzVEC
-; RUN: opt < %s -mcpu=corei7 -O1 -loop-vectorize -S -x86-use-partial-unrolling=0 | FileCheck %s --check-prefix=O1VEC2
-; RUN: opt < %s -mcpu=corei7 -Oz -loop-vectorize -S -x86-use-partial-unrolling=0 | FileCheck %s --check-prefix=OzVEC2
-; RUN: opt < %s -mcpu=corei7 -O3 -disable-loop-vectorization -S -x86-use-partial-unrolling=0 | FileCheck %s --check-prefix=O3DIS
+; RUN: opt < %s -mcpu=corei7 -O1 -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O1
+; RUN: opt < %s -mcpu=corei7 -O2 -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O2
+; RUN: opt < %s -mcpu=corei7 -O3 -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3
+; RUN: opt < %s -mcpu=corei7 -Os -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=Os
+; RUN: opt < %s -mcpu=corei7 -Oz -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=Oz
+; RUN: opt < %s -mcpu=corei7 -O1 -vectorize-loops -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O1VEC
+; RUN: opt < %s -mcpu=corei7 -Oz -vectorize-loops -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=OzVEC
+; RUN: opt < %s -mcpu=corei7 -O1 -loop-vectorize -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O1VEC2
+; RUN: opt < %s -mcpu=corei7 -Oz -loop-vectorize -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=OzVEC2
+; RUN: opt < %s -mcpu=corei7 -O3 -disable-loop-vectorization -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3DIS
 
 ; This file tests the llvm.vectorizer.pragma forcing vectorization even when
 ; optimization levels are too low, or when vectorization is disabled.
diff --git a/test/Transforms/LoopVectorize/X86/vect.omp.force.ll b/test/Transforms/LoopVectorize/X86/vect.omp.force.ll
new file mode 100644
index 0000000..84ffb27
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/vect.omp.force.ll
@@ -0,0 +1,93 @@
+; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -debug-only=loop-vectorize -stats -S 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+; CHECK: LV: Loop hints: force=enabled
+; CHECK: LV: Loop hints: force=?
+; No more loops in the module
+; CHECK-NOT: LV: Loop hints: force=
+; CHECK: 2 loop-vectorize               - Number of loops analyzed for vectorization
+; CHECK: 1 loop-vectorize               - Number of loops vectorized
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+;
+; The source code for the test:
+;
+; #include <math.h>
+; void foo(float* restrict A, float * restrict B, int size)
+; {
+;   for (int i = 0; i < size; ++i) A[i] = sinf(B[i]);
+; }
+;
+
+;
+; This loop will be vectorized, although the scalar cost is lower than any of vector costs, but vectorization is explicitly forced in metadata.
+;
+
+define void @vectorized(float* noalias nocapture %A, float* noalias nocapture %B, i32 %size) {
+entry:
+  %cmp6 = icmp sgt i32 %size, 0
+  br i1 %cmp6, label %for.body.preheader, label %for.end
+
+for.body.preheader:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds float* %B, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !llvm.mem.parallel_loop_access !1
+  %call = tail call float @llvm.sin.f32(float %0)
+  %arrayidx2 = getelementptr inbounds float* %A, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !1
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %size
+  br i1 %exitcond, label %for.end.loopexit, label %for.body, !llvm.loop !1
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+!1 = metadata !{metadata !1, metadata !2}
+!2 = metadata !{metadata !"llvm.vectorizer.enable", i1 true}
+
+;
+; This method will not be vectorized, as scalar cost is lower than any of vector costs.
+;
+
+define void @not_vectorized(float* noalias nocapture %A, float* noalias nocapture %B, i32 %size) {
+entry:
+  %cmp6 = icmp sgt i32 %size, 0
+  br i1 %cmp6, label %for.body.preheader, label %for.end
+
+for.body.preheader:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds float* %B, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !llvm.mem.parallel_loop_access !3
+  %call = tail call float @llvm.sin.f32(float %0)
+  %arrayidx2 = getelementptr inbounds float* %A, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %size
+  br i1 %exitcond, label %for.end.loopexit, label %for.body, !llvm.loop !3
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+declare float @llvm.sin.f32(float) nounwind readnone
+
+; Dummy metadata
+!3 = metadata !{metadata !3}
+
diff --git a/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll b/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll
new file mode 100644
index 0000000..1b979e5
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll
@@ -0,0 +1,73 @@
+; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -debug-only=loop-vectorize -stats -S -vectorizer-min-trip-count=21 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+; CHECK: LV: Loop hints: force=enabled
+; CHECK: LV: Loop hints: force=?
+; No more loops in the module
+; CHECK-NOT: LV: Loop hints: force=
+; CHECK: 2 loop-vectorize               - Number of loops analyzed for vectorization
+; CHECK: 1 loop-vectorize               - Number of loops vectorized
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+;
+; The source code for the test:
+;
+; void foo(float* restrict A, float* restrict B)
+; {
+;     for (int i = 0; i < 20; ++i) A[i] += B[i];
+; }
+;
+
+;
+; This loop will be vectorized, although the trip count is below the threshold, but vectorization is explicitly forced in metadata.
+;
+define void @vectorized(float* noalias nocapture %A, float* noalias nocapture readonly %B) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds float* %B, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !llvm.mem.parallel_loop_access !1
+  %arrayidx2 = getelementptr inbounds float* %A, i64 %indvars.iv
+  %1 = load float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !1
+  %add = fadd fast float %0, %1
+  store float %add, float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !1
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 20
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1
+
+for.end:
+  ret void
+}
+
+!1 = metadata !{metadata !1, metadata !2}
+!2 = metadata !{metadata !"llvm.vectorizer.enable", i1 true}
+
+;
+; This loop will not be vectorized as the trip count is below the threshold.
+;
+define void @not_vectorized(float* noalias nocapture %A, float* noalias nocapture readonly %B) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds float* %B, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !llvm.mem.parallel_loop_access !3
+  %arrayidx2 = getelementptr inbounds float* %A, i64 %indvars.iv
+  %1 = load float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3
+  %add = fadd fast float %0, %1
+  store float %add, float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 20
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !3
+
+for.end:
+  ret void
+}
+
+!3 = metadata !{metadata !3}
+
diff --git a/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll b/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll
new file mode 100644
index 0000000..685d034
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll
@@ -0,0 +1,67 @@
+; RUN: opt < %s -loop-vectorize -mtriple=x86_64-unknown-linux -S -pass-remarks='loop-vectorize' 2>&1 | FileCheck -check-prefix=VECTORIZED %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-unroll=4 -mtriple=x86_64-unknown-linux -S -pass-remarks='.*vectorize.*' 2>&1 | FileCheck -check-prefix=UNROLLED %s
+
+; VECTORIZED: remark: {{.*}}.c:17:8: vectorized loop (vectorization factor: 4, unrolling interleave factor: 1)
+; UNROLLED: remark: {{.*}}.c:17:8: unrolled with interleaving factor 4 (vectorization not beneficial)
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define i32 @foo(i32 %n) #0 {
+entry:
+  %diff = alloca i32, align 4
+  %cb = alloca [16 x i8], align 16
+  %cc = alloca [16 x i8], align 16
+  store i32 0, i32* %diff, align 4, !dbg !10, !tbaa !11
+  br label %for.body, !dbg !15
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %add8 = phi i32 [ 0, %entry ], [ %add, %for.body ], !dbg !19
+  %arrayidx = getelementptr inbounds [16 x i8]* %cb, i64 0, i64 %indvars.iv, !dbg !19
+  %0 = load i8* %arrayidx, align 1, !dbg !19, !tbaa !21
+  %conv = sext i8 %0 to i32, !dbg !19
+  %arrayidx2 = getelementptr inbounds [16 x i8]* %cc, i64 0, i64 %indvars.iv, !dbg !19
+  %1 = load i8* %arrayidx2, align 1, !dbg !19, !tbaa !21
+  %conv3 = sext i8 %1 to i32, !dbg !19
+  %sub = sub i32 %conv, %conv3, !dbg !19
+  %add = add nsw i32 %sub, %add8, !dbg !19
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !15
+  %exitcond = icmp eq i64 %indvars.iv.next, 16, !dbg !15
+  br i1 %exitcond, label %for.end, label %for.body, !dbg !15
+
+for.end:                                          ; preds = %for.body
+  store i32 %add, i32* %diff, align 4, !dbg !19, !tbaa !11
+  call void @ibar(i32* %diff) #2, !dbg !22
+  ret i32 0, !dbg !23
+}
+
+declare void @ibar(i32*) #1
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+!llvm.ident = !{!9}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5.0 ", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 2} ; [ DW_TAG_compile_unit ] [./vectorization-remarks.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"vectorization-remarks.c", metadata !"."}
+!2 = metadata !{}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 5, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32)* @foo, null, null, metadata !2, i32 6} ; [ DW_TAG_subprogram ] [line 5] [def] [scope 6] [foo]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [./vectorization-remarks.c]
+!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
+!8 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!9 = metadata !{metadata !"clang version 3.5.0 "}
+!10 = metadata !{i32 8, i32 3, metadata !4, null} ; [ DW_TAG_imported_declaration ]
+!11 = metadata !{metadata !12, metadata !12, i64 0}
+!12 = metadata !{metadata !"int", metadata !13, i64 0}
+!13 = metadata !{metadata !"omnipotent char", metadata !14, i64 0}
+!14 = metadata !{metadata !"Simple C/C++ TBAA"}
+!15 = metadata !{i32 17, i32 8, metadata !16, null}
+!16 = metadata !{i32 786443, metadata !1, metadata !17, i32 17, i32 8, i32 2, i32 3} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c]
+!17 = metadata !{i32 786443, metadata !1, metadata !18, i32 17, i32 8, i32 1, i32 2} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c]
+!18 = metadata !{i32 786443, metadata !1, metadata !4, i32 17, i32 3, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c]
+!19 = metadata !{i32 18, i32 5, metadata !20, null}
+!20 = metadata !{i32 786443, metadata !1, metadata !18, i32 17, i32 27, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c]
+!21 = metadata !{metadata !13, metadata !13, i64 0}
+!22 = metadata !{i32 20, i32 3, metadata !4, null}
+!23 = metadata !{i32 21, i32 3, metadata !4, null}
diff --git a/test/Transforms/LoopVectorize/store-shuffle-bug.ll b/test/Transforms/LoopVectorize/store-shuffle-bug.ll
index 0ec8010..e53c120 100644
--- a/test/Transforms/LoopVectorize/store-shuffle-bug.ll
+++ b/test/Transforms/LoopVectorize/store-shuffle-bug.ll
@@ -19,18 +19,13 @@ entry:
 
 ; CHECK-LABEL: @t(
 ; CHECK: vector.body:
-; CHECK: load <4 x i32>
-; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = shufflevector
-; CHECK: load <4 x i32>
-; CHECK: [[VAR2:%[a-zA-Z0-9]+]] = shufflevector
+; CHECK: [[VAR1:%[a-zA-Z0-9.]+]] = load <4 x i32>
+; CHECK: [[VAR2:%[a-zA-Z0-9.]+]] = load <4 x i32>
 ; CHECK: [[VAR3:%[a-zA-Z0-9]+]] = add nsw <4 x i32> [[VAR2]], [[VAR1]]
-; CHECK: [[VAR4:%[a-zA-Z0-9]+]] = shufflevector <4 x i32> [[VAR3]]
-; CHECK: store <4 x i32> [[VAR4]]
-; CHECK: load <4 x i32>
-; CHECK: [[VAR5:%[a-zA-Z0-9]+]] = shufflevector
-; CHECK-NOT: add nsw <4 x i32> [[VAR4]], [[VAR5]]
-; CHECK-NOT: add nsw <4 x i32> [[VAR5]], [[VAR4]]
-; CHECK: add nsw <4 x i32> [[VAR3]], [[VAR5]]
+; CHECK: store <4 x i32> [[VAR3]]
+; CHECK: [[VAR4:%[a-zA-Z0-9.]+]] = load <4 x i32>
+; CHECK: add nsw <4 x i32> [[VAR3]], [[VAR4]]
+; CHECK-NOT: shufflevector
 
 for.body:
   %indvars.iv = phi i64 [ 93, %entry ], [ %indvars.iv.next, %for.body ]
diff --git a/test/Transforms/LoopVectorize/vect.omp.persistence.ll b/test/Transforms/LoopVectorize/vect.omp.persistence.ll
new file mode 100644
index 0000000..dc3df7a
--- /dev/null
+++ b/test/Transforms/LoopVectorize/vect.omp.persistence.ll
@@ -0,0 +1,88 @@
+; RUN: opt < %s -O2 -force-vector-unroll=2 -force-vector-width=4 -debug-only=loop-vectorize -stats -S 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+; Loop from "rotated"
+; CHECK: LV: Loop hints: force=enabled
+; Loop from "nonrotated"
+; CHECK: LV: Loop hints: force=enabled
+; No more loops in the module
+; CHECK-NOT: LV: Loop hints: force=
+; In total only 1 loop should be rotated.
+; CHECK: 1 loop-rotate
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; See http://reviews.llvm.org/D3348 for details.
+
+;
+; Test #1
+;
+; Ensure that "llvm.vectorizer.enable" metadata was not lost prior to LoopVectorize pass.
+; In past LoopRotate was clearing that metadata.
+;
+; The source C code is:
+; void rotated(float *a, int size)
+; {
+;   int t = 0;
+;   #pragma omp simd
+;   for (int i = 0; i < size; ++i) {
+;     a[i] = a[i-5] * a[i+2];
+;     ++t;
+;   }
+;}
+
+define void @rotated(float* nocapture %a, i64 %size) {
+entry:
+  %cmp1 = icmp sgt i64 %size, 0
+  br i1 %cmp1, label %for.header, label %for.end
+
+for.header:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %cmp2 = icmp sgt i64 %indvars.iv, %size
+  br i1 %cmp2, label %for.end, label %for.body
+
+for.body:
+
+  %0 = add nsw i64 %indvars.iv, -5
+  %arrayidx = getelementptr inbounds float* %a, i64 %0
+  %1 = load float* %arrayidx, align 4, !llvm.mem.parallel_loop_access !1
+  %2 = add nsw i64 %indvars.iv, 2
+  %arrayidx2 = getelementptr inbounds float* %a, i64 %2
+  %3 = load float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !1
+  %mul = fmul float %1, %3
+  %arrayidx4 = getelementptr inbounds float* %a, i64 %indvars.iv
+  store float %mul, float* %arrayidx4, align 4, !llvm.mem.parallel_loop_access !1
+
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  br label %for.header, !llvm.loop !1
+
+for.end:
+  ret void
+}
+
+!1 = metadata !{metadata !1, metadata !2}
+!2 = metadata !{metadata !"llvm.vectorizer.enable", i1 true}
+
+;
+; Test #2
+;
+; Ensure that "llvm.vectorizer.enable" metadata was not lost even
+; if loop was not rotated (see http://reviews.llvm.org/D3348#comment-4).
+;
+define i32 @nonrotated(i32 %a) {
+entry:
+  br label %loop_cond
+loop_cond:
+  %indx = phi i32 [ 1, %entry ], [ %inc, %loop_inc ]
+  %cmp = icmp ne i32 %indx, %a
+  br i1 %cmp, label %return, label %loop_inc
+loop_inc:
+  %inc = add i32 %indx, 1
+  br label %loop_cond, !llvm.loop !3
+return:
+  ret i32 0
+}
+
+!3 = metadata !{metadata !3, metadata !4}
+!4 = metadata !{metadata !"llvm.vectorizer.enable", i1 true}
diff --git a/test/Transforms/LoopVectorize/vect.stats.ll b/test/Transforms/LoopVectorize/vect.stats.ll
new file mode 100644
index 0000000..92ec24f
--- /dev/null
+++ b/test/Transforms/LoopVectorize/vect.stats.ll
@@ -0,0 +1,65 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=4 -force-vector-width=4 -debug-only=loop-vectorize -stats -S 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+;
+; We have 2 loops, one of them is vectorizable and the second one is not.
+;
+
+; CHECK: 2 loop-vectorize               - Number of loops analyzed for vectorization
+; CHECK: 1 loop-vectorize               - Number of loops vectorized
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @vectorized(float* nocapture %a, i64 %size) {
+entry:
+  %cmp1 = icmp sgt i64 %size, 0
+  br i1 %cmp1, label %for.header, label %for.end
+
+for.header:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %cmp2 = icmp sgt i64 %indvars.iv, %size
+  br i1 %cmp2, label %for.end, label %for.body
+
+for.body:
+
+  %arrayidx = getelementptr inbounds float* %a, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %mul = fmul float %0, %0
+  store float %mul, float* %arrayidx, align 4
+
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  br label %for.header
+
+for.end:
+  ret void
+}
+
+define void @not_vectorized(float* nocapture %a, i64 %size) {
+entry:
+  %cmp1 = icmp sgt i64 %size, 0
+  br i1 %cmp1, label %for.header, label %for.end
+
+for.header:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %cmp2 = icmp sgt i64 %indvars.iv, %size
+  br i1 %cmp2, label %for.end, label %for.body
+
+for.body:
+
+  %0 = add nsw i64 %indvars.iv, -5
+  %arrayidx = getelementptr inbounds float* %a, i64 %0
+  %1 = load float* %arrayidx, align 4
+  %2 = add nsw i64 %indvars.iv, 2
+  %arrayidx2 = getelementptr inbounds float* %a, i64 %2
+  %3 = load float* %arrayidx2, align 4
+  %mul = fmul float %1, %3
+  %arrayidx4 = getelementptr inbounds float* %a, i64 %indvars.iv
+  store float %mul, float* %arrayidx4, align 4
+
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  br label %for.header
+
+for.end:
+  ret void
+}
+\ No newline at end of file
diff --git a/test/Transforms/MergeFunc/crash.ll b/test/Transforms/MergeFunc/crash.ll
index 0897ba2..3475e28 100644
--- a/test/Transforms/MergeFunc/crash.ll
+++ b/test/Transforms/MergeFunc/crash.ll
@@ -8,9 +8,9 @@ target triple = "i386-pc-linux-gnu"
 %.qux.2585 = type { i32, i32, i8* }
 
 @g2 = external unnamed_addr constant [9 x i8], align 1
-@g3 = internal hidden unnamed_addr constant [1 x i8*] [i8* bitcast (i8* (%.qux.2585*)* @func35 to i8*)]
+@g3 = internal unnamed_addr constant [1 x i8*] [i8* bitcast (i8* (%.qux.2585*)* @func35 to i8*)]
 
-define internal hidden i32 @func1(i32* %ptr, { i32, i32 }* nocapture %method) align 2 {
+define internal i32 @func1(i32* %ptr, { i32, i32 }* nocapture %method) align 2 {
   br label %1
 
 ; <label>:1
@@ -20,26 +20,26 @@ define internal hidden i32 @func1(i32* %ptr, { i32, i32 }* nocapture %method) al
   ret i32 undef
 }
 
-define internal hidden i32 @func10(%.qux.2496* nocapture %this) align 2 {
+define internal i32 @func10(%.qux.2496* nocapture %this) align 2 {
   %1 = getelementptr inbounds %.qux.2496* %this, i32 0, i32 1, i32 1
   %2 = load i32* %1, align 4
   ret i32 %2
 }
 
-define internal hidden i8* @func29(i32* nocapture %this) align 2 {
+define internal i8* @func29(i32* nocapture %this) align 2 {
   ret i8* getelementptr inbounds ([9 x i8]* @g2, i32 0, i32 0)
 }
 
-define internal hidden i32* @func33(%.qux.2585* nocapture %this) align 2 {
+define internal i32* @func33(%.qux.2585* nocapture %this) align 2 {
   ret i32* undef
 }
 
-define internal hidden i32* @func34(%.qux.2585* nocapture %this) align 2 {
+define internal i32* @func34(%.qux.2585* nocapture %this) align 2 {
   %1 = getelementptr inbounds %.qux.2585* %this, i32 0
   ret i32* undef
 }
 
-define internal hidden i8* @func35(%.qux.2585* nocapture %this) align 2 {
+define internal i8* @func35(%.qux.2585* nocapture %this) align 2 {
   %1 = getelementptr inbounds %.qux.2585* %this, i32 0, i32 2
   %2 = load i8** %1, align 4
   ret i8* %2
diff --git a/test/Transforms/MergeFunc/inttoptr-address-space.ll b/test/Transforms/MergeFunc/inttoptr-address-space.ll
index 0d834bc..2e5e2fc 100644
--- a/test/Transforms/MergeFunc/inttoptr-address-space.ll
+++ b/test/Transforms/MergeFunc/inttoptr-address-space.ll
@@ -6,10 +6,10 @@ target datalayout = "e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-
 %.qux.2585 = type { i32, i32, i8* }
 
 @g2 = external addrspace(1) constant [9 x i8], align 1
-@g3 = internal hidden unnamed_addr constant [1 x i8*] [i8* bitcast (i8* (%.qux.2585 addrspace(1)*)* @func35 to i8*)]
+@g3 = internal unnamed_addr constant [1 x i8*] [i8* bitcast (i8* (%.qux.2585 addrspace(1)*)* @func35 to i8*)]
 
 
-define internal hidden i32 @func10(%.qux.2496 addrspace(1)* nocapture %this) align 2 {
+define internal i32 @func10(%.qux.2496 addrspace(1)* nocapture %this) align 2 {
 bb:
   %tmp = getelementptr inbounds %.qux.2496 addrspace(1)* %this, i32 0, i32 1, i32 1
   %tmp1 = load i32 addrspace(1)* %tmp, align 4
@@ -17,7 +17,7 @@ bb:
 }
 
 ; Check for pointer bitwidth equal assertion failure
-define internal hidden i8* @func35(%.qux.2585 addrspace(1)* nocapture %this) align 2 {
+define internal i8* @func35(%.qux.2585 addrspace(1)* nocapture %this) align 2 {
 bb:
 ; CHECK-LABEL: @func35(
 ; CHECK: %[[V2:.+]] = bitcast %.qux.2585 addrspace(1)* %{{.*}} to %.qux.2496 addrspace(1)*
diff --git a/test/Transforms/MergeFunc/inttoptr.ll b/test/Transforms/MergeFunc/inttoptr.ll
index 6a69e3f..86c18a0 100644
--- a/test/Transforms/MergeFunc/inttoptr.ll
+++ b/test/Transforms/MergeFunc/inttoptr.ll
@@ -8,9 +8,9 @@ target triple = "i386-pc-linux-gnu"
 %.qux.2585 = type { i32, i32, i8* }
 
 @g2 = external unnamed_addr constant [9 x i8], align 1
-@g3 = internal hidden unnamed_addr constant [1 x i8*] [i8* bitcast (i8* (%.qux.2585*)* @func35 to i8*)]
+@g3 = internal unnamed_addr constant [1 x i8*] [i8* bitcast (i8* (%.qux.2585*)* @func35 to i8*)]
 
-define internal hidden i32 @func1(i32* %ptr, { i32, i32 }* nocapture %method) align 2 {
+define internal i32 @func1(i32* %ptr, { i32, i32 }* nocapture %method) align 2 {
 bb:
   br label %bb1
 
@@ -21,30 +21,30 @@ bb2:                                              ; preds = %bb1
   ret i32 undef
 }
 
-define internal hidden i32 @func10(%.qux.2496* nocapture %this) align 2 {
+define internal i32 @func10(%.qux.2496* nocapture %this) align 2 {
 bb:
   %tmp = getelementptr inbounds %.qux.2496* %this, i32 0, i32 1, i32 1
   %tmp1 = load i32* %tmp, align 4
   ret i32 %tmp1
 }
 
-define internal hidden i8* @func29(i32* nocapture %this) align 2 {
+define internal i8* @func29(i32* nocapture %this) align 2 {
 bb:
   ret i8* getelementptr inbounds ([9 x i8]* @g2, i32 0, i32 0)
 }
 
-define internal hidden i32* @func33(%.qux.2585* nocapture %this) align 2 {
+define internal i32* @func33(%.qux.2585* nocapture %this) align 2 {
 bb:
   ret i32* undef
 }
 
-define internal hidden i32* @func34(%.qux.2585* nocapture %this) align 2 {
+define internal i32* @func34(%.qux.2585* nocapture %this) align 2 {
 bb:
   %tmp = getelementptr inbounds %.qux.2585* %this, i32 0
   ret i32* undef
 }
 
-define internal hidden i8* @func35(%.qux.2585* nocapture %this) align 2 {
+define internal i8* @func35(%.qux.2585* nocapture %this) align 2 {
 bb:
 ; CHECK-LABEL: @func35(
 ; CHECK: %[[V2:.+]] = bitcast %.qux.2585* %{{.*}} to %.qux.2496*
diff --git a/test/Transforms/MergeFunc/mergefunc-struct-return.ll b/test/Transforms/MergeFunc/mergefunc-struct-return.ll
new file mode 100644
index 0000000..d2cbe43
--- /dev/null
+++ b/test/Transforms/MergeFunc/mergefunc-struct-return.ll
@@ -0,0 +1,40 @@
+; RUN: opt -mergefunc -S < %s | FileCheck %s
+
+; This test makes sure that the mergefunc pass, uses extract and insert value
+; to convert the struct result type; as struct types cannot be bitcast.
+
+target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32"
+
+%kv1 = type { i32*, i32* }
+%kv2 = type { i8*, i8* }
+
+declare void @noop()
+
+define %kv1 @fn1() {
+; CHECK-LABEL: @fn1(
+  %tmp = alloca %kv1
+  %v1 = getelementptr %kv1* %tmp, i32 0, i32 0
+  store i32* null, i32** %v1
+  %v2 = getelementptr %kv1* %tmp, i32 0, i32 0
+  store i32* null, i32** %v2
+  call void @noop()
+  %v3 = load %kv1* %tmp
+  ret %kv1 %v3
+}
+
+define %kv2 @fn2() {
+; CHECK-LABEL: @fn2(
+; CHECK: %1 = tail call %kv1 @fn1()
+; CHECK: %2 = extractvalue %kv1 %1, 0
+; CHECK: %3 = bitcast i32* %2 to i8*
+; CHECK: %4 = insertvalue %kv2 undef, i8* %3, 0
+  %tmp = alloca %kv2
+  %v1 = getelementptr %kv2* %tmp, i32 0, i32 0
+  store i8* null, i8** %v1
+  %v2 = getelementptr %kv2* %tmp, i32 0, i32 0
+  store i8* null, i8** %v2
+  call void @noop()
+
+  %v3 = load %kv2* %tmp
+  ret %kv2 %v3
+}
diff --git a/test/Transforms/SLPVectorizer/AArch64/lit.local.cfg b/test/Transforms/SLPVectorizer/AArch64/lit.local.cfg
new file mode 100644
index 0000000..c420349
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/AArch64/lit.local.cfg
@@ -0,0 +1,3 @@
+targets = set(config.root.targets_to_build.split())
+if not 'AArch64' in targets:
+    config.unsupported = True
diff --git a/test/Transforms/SLPVectorizer/ARM64/mismatched-intrinsics.ll b/test/Transforms/SLPVectorizer/AArch64/mismatched-intrinsics.ll
index 3d6da12..3d6da12 100644
--- a/test/Transforms/SLPVectorizer/ARM64/mismatched-intrinsics.ll
+++ b/test/Transforms/SLPVectorizer/AArch64/mismatched-intrinsics.ll
diff --git a/test/Transforms/SLPVectorizer/ARM64/lit.local.cfg b/test/Transforms/SLPVectorizer/ARM64/lit.local.cfg
deleted file mode 100644
index 84ac981..0000000
--- a/test/Transforms/SLPVectorizer/ARM64/lit.local.cfg
+++ /dev/null
@@ -1,3 +0,0 @@
-targets = set(config.root.targets_to_build.split())
-if not 'ARM64' in targets:
-    config.unsupported = True
diff --git a/test/Transforms/SLPVectorizer/X86/align.ll b/test/Transforms/SLPVectorizer/X86/align.ll
new file mode 100644
index 0000000..f586573
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/align.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; Simple 3-pair chain with loads and stores
+; CHECK: test1
+define void @test1(double* %a, double* %b, double* %c) {
+entry:
+  %agg.tmp.i.i.sroa.0 = alloca [3 x double], align 16
+; CHECK: %[[V0:[0-9]+]] = load <2 x double>* %[[V2:[0-9]+]], align 8
+  %i0 = load double* %a 
+  %i1 = load double* %b 
+  %mul = fmul double %i0, %i1
+  %store1 = getelementptr inbounds [3 x double]* %agg.tmp.i.i.sroa.0, i64 0, i64 1
+  %store2 = getelementptr inbounds [3 x double]* %agg.tmp.i.i.sroa.0, i64 0, i64 2
+  %arrayidx3 = getelementptr inbounds double* %a, i64 1
+  %i3 = load double* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %b, i64 1
+  %i4 = load double* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+; CHECK: store <2 x double> %[[V1:[0-9]+]], <2 x double>* %[[V2:[0-9]+]], align 8
+  store double %mul, double* %store1
+  store double %mul5, double* %store2, align 16
+; CHECK: ret
+  ret void
+}
diff --git a/test/Transforms/SLPVectorizer/X86/call.ll b/test/Transforms/SLPVectorizer/X86/call.ll
new file mode 100644
index 0000000..83d45c0
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/call.ll
@@ -0,0 +1,128 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -slp-threshold=-999 -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+declare double @sin(double)
+declare double @cos(double)
+declare double @pow(double, double)
+declare double @exp2(double)
+declare i64 @round(i64)
+
+
+; CHECK: sin_libm
+; CHECK: call <2 x double> @llvm.sin.v2f64
+; CHECK: ret void
+define void @sin_libm(double* %a, double* %b, double* %c) {
+entry:
+  %i0 = load double* %a, align 8
+  %i1 = load double* %b, align 8
+  %mul = fmul double %i0, %i1
+  %call = tail call double @sin(double %mul) nounwind readnone
+  %arrayidx3 = getelementptr inbounds double* %a, i64 1
+  %i3 = load double* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %b, i64 1
+  %i4 = load double* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  %call5 = tail call double @sin(double %mul5) nounwind readnone
+  store double %call, double* %c, align 8
+  %arrayidx5 = getelementptr inbounds double* %c, i64 1
+  store double %call5, double* %arrayidx5, align 8
+  ret void
+}
+
+; CHECK: cos_libm
+; CHECK: call <2 x double> @llvm.cos.v2f64
+; CHECK: ret void
+define void @cos_libm(double* %a, double* %b, double* %c) {
+entry:
+  %i0 = load double* %a, align 8
+  %i1 = load double* %b, align 8
+  %mul = fmul double %i0, %i1
+  %call = tail call double @cos(double %mul) nounwind readnone
+  %arrayidx3 = getelementptr inbounds double* %a, i64 1
+  %i3 = load double* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %b, i64 1
+  %i4 = load double* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  %call5 = tail call double @cos(double %mul5) nounwind readnone
+  store double %call, double* %c, align 8
+  %arrayidx5 = getelementptr inbounds double* %c, i64 1
+  store double %call5, double* %arrayidx5, align 8
+  ret void
+}
+
+; CHECK: pow_libm
+; CHECK: call <2 x double> @llvm.pow.v2f64
+; CHECK: ret void
+define void @pow_libm(double* %a, double* %b, double* %c) {
+entry:
+  %i0 = load double* %a, align 8
+  %i1 = load double* %b, align 8
+  %mul = fmul double %i0, %i1
+  %call = tail call double @pow(double %mul,double %mul) nounwind readnone
+  %arrayidx3 = getelementptr inbounds double* %a, i64 1
+  %i3 = load double* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %b, i64 1
+  %i4 = load double* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  %call5 = tail call double @pow(double %mul5,double %mul5) nounwind readnone
+  store double %call, double* %c, align 8
+  %arrayidx5 = getelementptr inbounds double* %c, i64 1
+  store double %call5, double* %arrayidx5, align 8
+  ret void
+}
+
+
+; CHECK: exp2_libm
+; CHECK: call <2 x double> @llvm.exp2.v2f64
+; CHECK: ret void
+define void @exp2_libm(double* %a, double* %b, double* %c) {
+entry:
+  %i0 = load double* %a, align 8
+  %i1 = load double* %b, align 8
+  %mul = fmul double %i0, %i1
+  %call = tail call double @exp2(double %mul) nounwind readnone
+  %arrayidx3 = getelementptr inbounds double* %a, i64 1
+  %i3 = load double* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %b, i64 1
+  %i4 = load double* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  %call5 = tail call double @exp2(double %mul5) nounwind readnone
+  store double %call, double* %c, align 8
+  %arrayidx5 = getelementptr inbounds double* %c, i64 1
+  store double %call5, double* %arrayidx5, align 8
+  ret void
+}
+
+
+; Negative test case
+; CHECK: round_custom
+; CHECK-NOT: load <4 x i64>
+; CHECK: ret void
+define void @round_custom(i64* %a, i64* %b, i64* %c) {
+entry:
+  %i0 = load i64* %a, align 8
+  %i1 = load i64* %b, align 8
+  %mul = mul i64 %i0, %i1
+  %call = tail call i64 @round(i64 %mul) nounwind readnone
+  %arrayidx3 = getelementptr inbounds i64* %a, i64 1
+  %i3 = load i64* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds i64* %b, i64 1
+  %i4 = load i64* %arrayidx4, align 8
+  %mul5 = mul i64 %i3, %i4
+  %call5 = tail call i64 @round(i64 %mul5) nounwind readnone
+  store i64 %call, i64* %c, align 8
+  %arrayidx5 = getelementptr inbounds i64* %c, i64 1
+  store i64 %call5, i64* %arrayidx5, align 8
+  ret void
+}
+
+
+; CHECK: declare <2 x double> @llvm.sin.v2f64(<2 x double>) #0
+; CHECK: declare <2 x double> @llvm.cos.v2f64(<2 x double>) #0
+; CHECK: declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>) #0
+; CHECK: declare <2 x double> @llvm.exp2.v2f64(<2 x double>) #0
+
+; CHECK: attributes #0 = { nounwind readnone }
+
diff --git a/test/Transforms/SLPVectorizer/X86/consecutive-access.ll b/test/Transforms/SLPVectorizer/X86/consecutive-access.ll
new file mode 100644
index 0000000..f4f112f
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/consecutive-access.ll
@@ -0,0 +1,175 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -S | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+@A = common global [2000 x double] zeroinitializer, align 16
+@B = common global [2000 x double] zeroinitializer, align 16
+@C = common global [2000 x float] zeroinitializer, align 16
+@D = common global [2000 x float] zeroinitializer, align 16
+
+; Currently SCEV isn't smart enough to figure out that accesses
+; A[3*i], A[3*i+1] and A[3*i+2] are consecutive, but in future
+; that would hopefully be fixed. For now, check that this isn't
+; vectorized.
+; CHECK-LABEL: foo_3double
+; CHECK-NOT: x double>
+; Function Attrs: nounwind ssp uwtable
+define void @foo_3double(i32 %u) #0 {
+entry:
+  %u.addr = alloca i32, align 4
+  store i32 %u, i32* %u.addr, align 4
+  %mul = mul nsw i32 %u, 3
+  %idxprom = sext i32 %mul to i64
+  %arrayidx = getelementptr inbounds [2000 x double]* @A, i32 0, i64 %idxprom
+  %0 = load double* %arrayidx, align 8
+  %arrayidx4 = getelementptr inbounds [2000 x double]* @B, i32 0, i64 %idxprom
+  %1 = load double* %arrayidx4, align 8
+  %add5 = fadd double %0, %1
+  store double %add5, double* %arrayidx, align 8
+  %add11 = add nsw i32 %mul, 1
+  %idxprom12 = sext i32 %add11 to i64
+  %arrayidx13 = getelementptr inbounds [2000 x double]* @A, i32 0, i64 %idxprom12
+  %2 = load double* %arrayidx13, align 8
+  %arrayidx17 = getelementptr inbounds [2000 x double]* @B, i32 0, i64 %idxprom12
+  %3 = load double* %arrayidx17, align 8
+  %add18 = fadd double %2, %3
+  store double %add18, double* %arrayidx13, align 8
+  %add24 = add nsw i32 %mul, 2
+  %idxprom25 = sext i32 %add24 to i64
+  %arrayidx26 = getelementptr inbounds [2000 x double]* @A, i32 0, i64 %idxprom25
+  %4 = load double* %arrayidx26, align 8
+  %arrayidx30 = getelementptr inbounds [2000 x double]* @B, i32 0, i64 %idxprom25
+  %5 = load double* %arrayidx30, align 8
+  %add31 = fadd double %4, %5
+  store double %add31, double* %arrayidx26, align 8
+  ret void
+}
+
+; SCEV should be able to tell that accesses A[C1 + C2*i], A[C1 + C2*i], ...
+; A[C1 + C2*i] are consecutive, if C2 is a power of 2, and C2 > C1 > 0.
+; Thus, the following code should be vectorized.
+; CHECK-LABEL: foo_2double
+; CHECK: x double>
+; Function Attrs: nounwind ssp uwtable
+define void @foo_2double(i32 %u) #0 {
+entry:
+  %u.addr = alloca i32, align 4
+  store i32 %u, i32* %u.addr, align 4
+  %mul = mul nsw i32 %u, 2
+  %idxprom = sext i32 %mul to i64
+  %arrayidx = getelementptr inbounds [2000 x double]* @A, i32 0, i64 %idxprom
+  %0 = load double* %arrayidx, align 8
+  %arrayidx4 = getelementptr inbounds [2000 x double]* @B, i32 0, i64 %idxprom
+  %1 = load double* %arrayidx4, align 8
+  %add5 = fadd double %0, %1
+  store double %add5, double* %arrayidx, align 8
+  %add11 = add nsw i32 %mul, 1
+  %idxprom12 = sext i32 %add11 to i64
+  %arrayidx13 = getelementptr inbounds [2000 x double]* @A, i32 0, i64 %idxprom12
+  %2 = load double* %arrayidx13, align 8
+  %arrayidx17 = getelementptr inbounds [2000 x double]* @B, i32 0, i64 %idxprom12
+  %3 = load double* %arrayidx17, align 8
+  %add18 = fadd double %2, %3
+  store double %add18, double* %arrayidx13, align 8
+  ret void
+}
+
+; Similar to the previous test, but with different datatype.
+; CHECK-LABEL: foo_4float
+; CHECK: x float>
+; Function Attrs: nounwind ssp uwtable
+define void @foo_4float(i32 %u) #0 {
+entry:
+  %u.addr = alloca i32, align 4
+  store i32 %u, i32* %u.addr, align 4
+  %mul = mul nsw i32 %u, 4
+  %idxprom = sext i32 %mul to i64
+  %arrayidx = getelementptr inbounds [2000 x float]* @C, i32 0, i64 %idxprom
+  %0 = load float* %arrayidx, align 4
+  %arrayidx4 = getelementptr inbounds [2000 x float]* @D, i32 0, i64 %idxprom
+  %1 = load float* %arrayidx4, align 4
+  %add5 = fadd float %0, %1
+  store float %add5, float* %arrayidx, align 4
+  %add11 = add nsw i32 %mul, 1
+  %idxprom12 = sext i32 %add11 to i64
+  %arrayidx13 = getelementptr inbounds [2000 x float]* @C, i32 0, i64 %idxprom12
+  %2 = load float* %arrayidx13, align 4
+  %arrayidx17 = getelementptr inbounds [2000 x float]* @D, i32 0, i64 %idxprom12
+  %3 = load float* %arrayidx17, align 4
+  %add18 = fadd float %2, %3
+  store float %add18, float* %arrayidx13, align 4
+  %add24 = add nsw i32 %mul, 2
+  %idxprom25 = sext i32 %add24 to i64
+  %arrayidx26 = getelementptr inbounds [2000 x float]* @C, i32 0, i64 %idxprom25
+  %4 = load float* %arrayidx26, align 4
+  %arrayidx30 = getelementptr inbounds [2000 x float]* @D, i32 0, i64 %idxprom25
+  %5 = load float* %arrayidx30, align 4
+  %add31 = fadd float %4, %5
+  store float %add31, float* %arrayidx26, align 4
+  %add37 = add nsw i32 %mul, 3
+  %idxprom38 = sext i32 %add37 to i64
+  %arrayidx39 = getelementptr inbounds [2000 x float]* @C, i32 0, i64 %idxprom38
+  %6 = load float* %arrayidx39, align 4
+  %arrayidx43 = getelementptr inbounds [2000 x float]* @D, i32 0, i64 %idxprom38
+  %7 = load float* %arrayidx43, align 4
+  %add44 = fadd float %6, %7
+  store float %add44, float* %arrayidx39, align 4
+  ret void
+}
+
+; Similar to the previous tests, but now we are dealing with AddRec SCEV.
+; CHECK-LABEL: foo_loop
+; CHECK: x double>
+; Function Attrs: nounwind ssp uwtable
+define i32 @foo_loop(double* %A, i32 %n) #0 {
+entry:
+  %A.addr = alloca double*, align 8
+  %n.addr = alloca i32, align 4
+  %sum = alloca double, align 8
+  %i = alloca i32, align 4
+  store double* %A, double** %A.addr, align 8
+  store i32 %n, i32* %n.addr, align 4
+  store double 0.000000e+00, double* %sum, align 8
+  store i32 0, i32* %i, align 4
+  %cmp1 = icmp slt i32 0, %n
+  br i1 %cmp1, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %0 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %1 = phi double [ 0.000000e+00, %for.body.lr.ph ], [ %add7, %for.body ]
+  %mul = mul nsw i32 %0, 2
+  %idxprom = sext i32 %mul to i64
+  %arrayidx = getelementptr inbounds double* %A, i64 %idxprom
+  %2 = load double* %arrayidx, align 8
+  %mul1 = fmul double 7.000000e+00, %2
+  %add = add nsw i32 %mul, 1
+  %idxprom3 = sext i32 %add to i64
+  %arrayidx4 = getelementptr inbounds double* %A, i64 %idxprom3
+  %3 = load double* %arrayidx4, align 8
+  %mul5 = fmul double 7.000000e+00, %3
+  %add6 = fadd double %mul1, %mul5
+  %add7 = fadd double %1, %add6
+  store double %add7, double* %sum, align 8
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %i, align 4
+  %cmp = icmp slt i32 %inc, %n
+  br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
+
+for.cond.for.end_crit_edge:                       ; preds = %for.body
+  %split = phi double [ %add7, %for.body ]
+  br label %for.end
+
+for.end:                                          ; preds = %for.cond.for.end_crit_edge, %entry
+  %.lcssa = phi double [ %split, %for.cond.for.end_crit_edge ], [ 0.000000e+00, %entry ]
+  %conv = fptosi double %.lcssa to i32
+  ret i32 %conv
+}
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.ident = !{!0}
+
+!0 = metadata !{metadata !"clang version 3.5.0 "}
diff --git a/test/Transforms/SLPVectorizer/X86/continue_vectorizing.ll b/test/Transforms/SLPVectorizer/X86/continue_vectorizing.ll
new file mode 100644
index 0000000..ed22574
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/continue_vectorizing.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; We will keep trying to vectorize the basic block even we already find vectorized store.
+; CHECK: test1
+; CHECK: store <2 x double>
+; CHECK: ret
+define void @test1(double* %a, double* %b, double* %c, double* %d) {
+entry:
+  %i0 = load double* %a, align 8
+  %i1 = load double* %b, align 8
+  %mul = fmul double %i0, %i1
+  %arrayidx3 = getelementptr inbounds double* %a, i64 1
+  %i3 = load double* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %b, i64 1
+  %i4 = load double* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  store double %mul, double* %c, align 8
+  %arrayidx5 = getelementptr inbounds double* %c, i64 1
+  store double %mul5, double* %arrayidx5, align 8
+  %0 = bitcast double* %a to <4 x i32>*
+  %1 = load <4 x i32>* %0, align 8
+  %2 = bitcast double* %b to <4 x i32>*
+  %3 = load <4 x i32>* %2, align 8
+  %4 = mul <4 x i32> %1, %3
+  %5 = bitcast double* %d to <4 x i32>*
+  store <4 x i32> %4, <4 x i32>* %5, align 8
+  ret void
+}
diff --git a/test/Transforms/SLPVectorizer/X86/cse.ll b/test/Transforms/SLPVectorizer/X86/cse.ll
index bbfd6f2..d2ad7eb 100644
--- a/test/Transforms/SLPVectorizer/X86/cse.ll
+++ b/test/Transforms/SLPVectorizer/X86/cse.ll
@@ -217,3 +217,33 @@ return:                                           ; preds = %entry, %if.end
   ret i32 0
 }
 
+%class.B.53.55 = type { %class.A.52.54, double }
+%class.A.52.54 = type { double, double, double }
+
+@a = external global double, align 8
+
+define void @PR19646(%class.B.53.55* %this) {
+entry:
+  br i1 undef, label %if.end13, label %if.end13
+
+sw.epilog7:                                       ; No predecessors!
+  %.in = getelementptr inbounds %class.B.53.55* %this, i64 0, i32 0, i32 1
+  %0 = load double* %.in, align 8
+  %add = fadd double undef, 0.000000e+00
+  %add6 = fadd double %add, %0
+  %1 = load double* @a, align 8
+  %add8 = fadd double %1, 0.000000e+00
+  %_dy = getelementptr inbounds %class.B.53.55* %this, i64 0, i32 0, i32 2
+  %2 = load double* %_dy, align 8
+  %add10 = fadd double %add8, %2
+  br i1 undef, label %if.then12, label %if.end13
+
+if.then12:                                        ; preds = %sw.epilog7
+  %3 = load double* undef, align 8
+  br label %if.end13
+
+if.end13:                                         ; preds = %if.then12, %sw.epilog7, %entry
+  %x.1 = phi double [ 0.000000e+00, %if.then12 ], [ %add6, %sw.epilog7 ], [ undef, %entry ], [ undef, %entry ]
+  %b.0 = phi double [ %3, %if.then12 ], [ %add10, %sw.epilog7 ], [ undef, %entry], [ undef, %entry ]
+  unreachable
+}
diff --git a/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll b/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
index 7537ea3..9eda29f 100644
--- a/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
+++ b/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
@@ -195,11 +195,35 @@ define <4 x float> @simple_select_partial_vector(<4 x float> %a, <4 x float> %b,
   ret <4 x float> %rb
 }
 
+; Make sure that vectorization happens even if insertelements operations
+; must be rescheduled. The case here is from compiling Julia.
+define <4 x float> @reschedule_extract(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: @reschedule_extract(
+; CHECK: %1 = fadd <4 x float> %a, %b
+  %a0 = extractelement <4 x float> %a, i32 0
+  %b0 = extractelement <4 x float> %b, i32 0
+  %c0 = fadd float %a0, %b0
+  %v0 = insertelement <4 x float> undef, float %c0, i32 0
+  %a1 = extractelement <4 x float> %a, i32 1
+  %b1 = extractelement <4 x float> %b, i32 1
+  %c1 = fadd float %a1, %b1
+  %v1 = insertelement <4 x float> %v0, float %c1, i32 1
+  %a2 = extractelement <4 x float> %a, i32 2
+  %b2 = extractelement <4 x float> %b, i32 2
+  %c2 = fadd float %a2, %b2
+  %v2 = insertelement <4 x float> %v1, float %c2, i32 2
+  %a3 = extractelement <4 x float> %a, i32 3
+  %b3 = extractelement <4 x float> %b, i32 3
+  %c3 = fadd float %a3, %b3
+  %v3 = insertelement <4 x float> %v2, float %c3, i32 3
+  ret <4 x float> %v3
+}
+
 ; Check that cost model for vectorization takes credit for
 ; instructions that are erased.
 define <4 x float> @take_credit(<4 x float> %a, <4 x float> %b) {
 ; ZEROTHRESH-LABEL: @take_credit(
-; ZEROTHRESH-CHECK: %1 = fadd <4 x float> %a, %b
+; ZEROTHRESH: %1 = fadd <4 x float> %a, %b
   %a0 = extractelement <4 x float> %a, i32 0
   %b0 = extractelement <4 x float> %b, i32 0
   %c0 = fadd float %a0, %b0
@@ -219,4 +243,40 @@ define <4 x float> @take_credit(<4 x float> %a, <4 x float> %b) {
   ret <4 x float> %v3
 }
 
+; Make sure we handle multiple trees that feed one build vector correctly.
+define <4 x double> @multi_tree(double %w, double %x, double %y, double %z) {
+entry:
+  %t0 = fadd double %w , 0.000000e+00
+  %t1 = fadd double %x , 1.000000e+00
+  %t2 = fadd double %y , 2.000000e+00
+  %t3 = fadd double %z , 3.000000e+00
+  %t4 = fmul double %t0, 1.000000e+00
+  %i1 = insertelement <4 x double> undef, double %t4, i32 3
+  %t5 = fmul double %t1, 1.000000e+00
+  %i2 = insertelement <4 x double> %i1, double %t5, i32 2
+  %t6 = fmul double %t2, 1.000000e+00
+  %i3 = insertelement <4 x double> %i2, double %t6, i32 1
+  %t7 = fmul double %t3, 1.000000e+00
+  %i4 = insertelement <4 x double> %i3, double %t7, i32 0
+  ret <4 x double> %i4
+}
+; CHECK-LABEL: @multi_tree
+; CHECK-DAG:  %[[V0:.+]] = insertelement <2 x double> undef, double %w, i32 0
+; CHECK-DAG:  %[[V1:.+]] = insertelement <2 x double> %[[V0]], double %x, i32 1
+; CHECK-DAG:  %[[V2:.+]] = fadd <2 x double> %[[V1]], <double 0.000000e+00, double 1.000000e+00>
+; CHECK-DAG:  %[[V3:.+]] = insertelement <2 x double> undef, double %y, i32 0
+; CHECK-DAG:  %[[V4:.+]] = insertelement <2 x double> %[[V3]], double %z, i32 1
+; CHECK-DAG:  %[[V5:.+]] = fadd <2 x double> %[[V4]], <double 2.000000e+00, double 3.000000e+00>
+; CHECK-DAG:  %[[V6:.+]] = fmul <2 x double> <double 1.000000e+00, double 1.000000e+00>, %[[V2]]
+; CHECK-DAG:  %[[V7:.+]] = extractelement <2 x double> %[[V6]], i32 0
+; CHECK-DAG:  %[[I1:.+]] = insertelement <4 x double> undef, double %[[V7]], i32 3
+; CHECK-DAG:  %[[V8:.+]] = extractelement <2 x double> %[[V6]], i32 1
+; CHECK-DAG:  %[[I2:.+]] = insertelement <4 x double> %[[I1]], double %[[V8]], i32 2
+; CHECK-DAG:  %[[V9:.+]] = fmul <2 x double> <double 1.000000e+00, double 1.000000e+00>, %[[V5]]
+; CHECK-DAG:  %[[V10:.+]] = extractelement <2 x double> %[[V9]], i32 0
+; CHECK-DAG:  %[[I3:.+]] = insertelement <4 x double> %i2, double %[[V10]], i32 1
+; CHECK-DAG:  %[[V11:.+]] = extractelement <2 x double> %[[V9]], i32 1
+; CHECK-DAG:  %[[I4:.+]] = insertelement <4 x double> %i3, double %[[V11]], i32 0
+; CHECK:  ret <4 x double> %[[I4]]
+
 attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/Transforms/SLPVectorizer/X86/intrinsic.ll b/test/Transforms/SLPVectorizer/X86/intrinsic.ll
index 2b7ee75..30c5093 100644
--- a/test/Transforms/SLPVectorizer/X86/intrinsic.ll
+++ b/test/Transforms/SLPVectorizer/X86/intrinsic.ll
@@ -71,5 +71,49 @@ entry:
   ret void
 }
 
+declare i32 @llvm.bswap.i32(i32) nounwind readnone
 
+define void @vec_bswap_i32(i32* %a, i32* %b, i32* %c) {
+entry:
+  %i0 = load i32* %a, align 4
+  %i1 = load i32* %b, align 4
+  %add1 = add i32 %i0, %i1
+  %call1 = tail call i32 @llvm.bswap.i32(i32 %add1) nounwind readnone
+
+  %arrayidx2 = getelementptr inbounds i32* %a, i32 1
+  %i2 = load i32* %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds i32* %b, i32 1
+  %i3 = load i32* %arrayidx3, align 4
+  %add2 = add i32 %i2, %i3
+  %call2 = tail call i32 @llvm.bswap.i32(i32 %add2) nounwind readnone
+
+  %arrayidx4 = getelementptr inbounds i32* %a, i32 2
+  %i4 = load i32* %arrayidx4, align 4
+  %arrayidx5 = getelementptr inbounds i32* %b, i32 2
+  %i5 = load i32* %arrayidx5, align 4
+  %add3 = add i32 %i4, %i5
+  %call3 = tail call i32 @llvm.bswap.i32(i32 %add3) nounwind readnone
+
+  %arrayidx6 = getelementptr inbounds i32* %a, i32 3
+  %i6 = load i32* %arrayidx6, align 4
+  %arrayidx7 = getelementptr inbounds i32* %b, i32 3
+  %i7 = load i32* %arrayidx7, align 4
+  %add4 = add i32 %i6, %i7
+  %call4 = tail call i32 @llvm.bswap.i32(i32 %add4) nounwind readnone
 
+  store i32 %call1, i32* %c, align 4
+  %arrayidx8 = getelementptr inbounds i32* %c, i32 1
+  store i32 %call2, i32* %arrayidx8, align 4
+  %arrayidx9 = getelementptr inbounds i32* %c, i32 2
+  store i32 %call3, i32* %arrayidx9, align 4
+  %arrayidx10 = getelementptr inbounds i32* %c, i32 3
+  store i32 %call4, i32* %arrayidx10, align 4
+  ret void
+
+; CHECK-LABEL: @vec_bswap_i32(
+; CHECK: load <4 x i32>
+; CHECK: load <4 x i32>
+; CHECK: call <4 x i32> @llvm.bswap.v4i32
+; CHECK: store <4 x i32>
+; CHECK: ret
+}
diff --git a/test/Transforms/SLPVectorizer/X86/non-vectorizable-intrinsic.ll b/test/Transforms/SLPVectorizer/X86/non-vectorizable-intrinsic.ll
new file mode 100644
index 0000000..b250735
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/non-vectorizable-intrinsic.ll
@@ -0,0 +1,36 @@
+; RUN: opt < %s -slp-vectorizer -o - -S -slp-threshold=-1000
+
+target datalayout = "e-p:32:32-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx--nvidiacl"
+
+; CTLZ cannot be vectorized currently because the second argument is a scalar
+; for both the scalar and vector forms of the intrinsic. In the future it
+; should be possible to vectorize such functions.
+; Test causes an assert if LLVM tries to vectorize CTLZ.
+
+define <2 x i8> @cltz_test(<2 x i8> %x) #0 {
+entry:
+  %0 = extractelement <2 x i8> %x, i32 0
+  %call.i = call i8 @llvm.ctlz.i8(i8 %0, i1 false)
+  %vecinit = insertelement <2 x i8> undef, i8 %call.i, i32 0
+  %1 = extractelement <2 x i8> %x, i32 1
+  %call.i4 = call i8 @llvm.ctlz.i8(i8 %1, i1 false)
+  %vecinit2 = insertelement <2 x i8> %vecinit, i8 %call.i4, i32 1
+  ret <2 x i8> %vecinit2
+}
+
+define <2 x i8> @cltz_test2(<2 x i8> %x) #1 {
+entry:
+  %0 = extractelement <2 x i8> %x, i32 0
+  %1 = extractelement <2 x i8> %x, i32 1
+  %call.i = call i8 @llvm.ctlz.i8(i8 %0, i1 false)
+  %call.i4 = call i8 @llvm.ctlz.i8(i8 %1, i1 false)
+  %vecinit = insertelement <2 x i8> undef, i8 %call.i, i32 0
+  %vecinit2 = insertelement <2 x i8> %vecinit, i8 %call.i4, i32 1
+  ret <2 x i8> %vecinit2
+}
+
+declare i8 @llvm.ctlz.i8(i8, i1) #3
+
+attributes #0 = { alwaysinline nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
diff --git a/test/Transforms/SLPVectorizer/X86/value-bug.ll b/test/Transforms/SLPVectorizer/X86/value-bug.ll
new file mode 100644
index 0000000..64d2ae1
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/value-bug.ll
@@ -0,0 +1,80 @@
+; RUN: opt -slp-vectorizer < %s -S -mtriple="x86_64-grtev3-linux-gnu" -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-grtev3-linux-gnu"
+
+; We used to crash on this example because we were building a constant
+; expression during vectorization and the vectorizer expects instructions
+; as elements of the vectorized tree.
+; CHECK-LABEL: @test
+; PR19621
+
+define void @test() {
+bb279:
+  br label %bb283
+
+bb283:
+  %Av.sroa.8.0 = phi float [ undef, %bb279 ], [ %tmp315, %exit ]
+  %Av.sroa.5.0 = phi float [ undef, %bb279 ], [ %tmp319, %exit ]
+  %Av.sroa.3.0 = phi float [ undef, %bb279 ], [ %tmp307, %exit ]
+  %Av.sroa.0.0 = phi float [ undef, %bb279 ], [ %tmp317, %exit ]
+  br label %bb284
+
+bb284:
+  %tmp7.i = fpext float %Av.sroa.3.0 to double
+  %tmp8.i = fsub double %tmp7.i, undef
+  %tmp9.i = fsub double %tmp8.i, undef
+  %tmp17.i = fpext float %Av.sroa.8.0 to double
+  %tmp19.i = fsub double %tmp17.i, undef
+  %tmp20.i = fsub double %tmp19.i, undef
+  br label %bb21.i
+
+bb21.i:
+  br i1 undef, label %bb22.i, label %exit
+
+bb22.i:
+  %tmp24.i = fadd double undef, %tmp9.i
+  %tmp26.i = fadd double undef, %tmp20.i
+  br label %bb32.i
+
+bb32.i:
+  %xs.0.i = phi double [ %tmp24.i, %bb22.i ], [ 0.000000e+00, %bb32.i ]
+  %ys.0.i = phi double [ %tmp26.i, %bb22.i ], [ 0.000000e+00, %bb32.i ]
+  br i1 undef, label %bb32.i, label %bb21.i
+
+exit:
+  %tmp303 = fpext float %Av.sroa.0.0 to double
+  %tmp304 = fmul double %tmp303, undef
+  %tmp305 = fadd double undef, %tmp304
+  %tmp306 = fadd double %tmp305, undef
+  %tmp307 = fptrunc double %tmp306 to float
+  %tmp311 = fpext float %Av.sroa.5.0 to double
+  %tmp312 = fmul double %tmp311, 0.000000e+00
+  %tmp313 = fadd double undef, %tmp312
+  %tmp314 = fadd double %tmp313, undef
+  %tmp315 = fptrunc double %tmp314 to float
+  %tmp317 = fptrunc double undef to float
+  %tmp319 = fptrunc double undef to float
+  br label %bb283
+}
+
+; Make sure that we probably handle constant folded vectorized trees. The
+; vectorizer starts at the type (%t2, %t3) and wil constant fold the tree.
+; The code that handles insertelement instructions must handle this.
+define <4 x double> @constant_folding() {
+entry:
+  %t0 = fadd double 1.000000e+00 , 0.000000e+00
+  %t1 = fadd double 1.000000e+00 , 1.000000e+00
+  %t2 = fmul double %t0, 1.000000e+00
+  %i1 = insertelement <4 x double> undef, double %t2, i32 1
+  %t3 = fmul double %t1, 1.000000e+00
+  %i2 = insertelement <4 x double> %i1, double %t3, i32 0
+  ret <4 x double> %i2
+}
+
+; CHECK-LABEL: @constant_folding
+; CHECK: %[[V0:.+]] = extractelement <2 x double> <double 1.000000e+00, double 2.000000e+00>, i32 0
+; CHECK: %[[V1:.+]] = insertelement <4 x double> undef, double %[[V0]], i32 1
+; CHECK: %[[V2:.+]] = extractelement <2 x double> <double 1.000000e+00, double 2.000000e+00>, i32 1
+; CHECK: %[[V3:.+]] = insertelement <4 x double> %[[V1]], double %[[V2]], i32 0
+; CHECK: ret <4 x double> %[[V3]]
diff --git a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/lit.local.cfg b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/lit.local.cfg
new file mode 100644
index 0000000..40532cd
--- /dev/null
+++ b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/lit.local.cfg
@@ -0,0 +1,4 @@
+targets = set(config.root.targets_to_build.split())
+if not 'NVPTX' in targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll
new file mode 100644
index 0000000..850fc4c
--- /dev/null
+++ b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll
@@ -0,0 +1,59 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX
+; RUN: opt < %s -S -separate-const-offset-from-gep -gvn -dce | FileCheck %s --check-prefix=IR
+
+; Verifies the SeparateConstOffsetFromGEP pass.
+; The following code computes
+; *output = array[x][y] + array[x][y+1] + array[x+1][y] + array[x+1][y+1]
+;
+; We expect SeparateConstOffsetFromGEP to transform it to
+;
+; float *base = &a[x][y];
+; *output = base[0] + base[1] + base[32] + base[33];
+;
+; so the backend can emit PTX that uses fewer virtual registers.
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-unknown-unknown"
+
+@array = internal addrspace(3) constant [32 x [32 x float]] zeroinitializer, align 4
+
+define void @sum_of_array(i32 %x, i32 %y, float* nocapture %output) {
+.preheader:
+  %0 = zext i32 %y to i64
+  %1 = zext i32 %x to i64
+  %2 = getelementptr inbounds [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %1, i64 %0
+  %3 = addrspacecast float addrspace(3)* %2 to float*
+  %4 = load float* %3, align 4
+  %5 = fadd float %4, 0.000000e+00
+  %6 = add i32 %y, 1
+  %7 = zext i32 %6 to i64
+  %8 = getelementptr inbounds [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %1, i64 %7
+  %9 = addrspacecast float addrspace(3)* %8 to float*
+  %10 = load float* %9, align 4
+  %11 = fadd float %5, %10
+  %12 = add i32 %x, 1
+  %13 = zext i32 %12 to i64
+  %14 = getelementptr inbounds [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %13, i64 %0
+  %15 = addrspacecast float addrspace(3)* %14 to float*
+  %16 = load float* %15, align 4
+  %17 = fadd float %11, %16
+  %18 = getelementptr inbounds [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %13, i64 %7
+  %19 = addrspacecast float addrspace(3)* %18 to float*
+  %20 = load float* %19, align 4
+  %21 = fadd float %17, %20
+  store float %21, float* %output, align 4
+  ret void
+}
+
+; PTX-LABEL: sum_of_array(
+; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG:%(rl|r)[0-9]+]]{{\]}}
+; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+4{{\]}}
+; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+128{{\]}}
+; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+132{{\]}}
+
+; IR-LABEL: @sum_of_array(
+; IR: [[BASE_PTR:%[0-9]+]] = getelementptr inbounds [32 x [32 x float]] addrspace(3)* @array, i64 0, i32 %x, i32 %y
+; IR: getelementptr float addrspace(3)* [[BASE_PTR]], i64 1
+; IR: getelementptr float addrspace(3)* [[BASE_PTR]], i64 32
+; IR: getelementptr float addrspace(3)* [[BASE_PTR]], i64 33
diff --git a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
new file mode 100644
index 0000000..2e50f5f
--- /dev/null
+++ b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
@@ -0,0 +1,137 @@
+; RUN: opt < %s -separate-const-offset-from-gep -dce -S | FileCheck %s
+
+; Several unit tests for -separate-const-offset-from-gep. The transformation
+; heavily relies on TargetTransformInfo, so we put these tests under
+; target-specific folders.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+; target triple is necessary; otherwise TargetTransformInfo rejects any
+; addressing mode.
+target triple = "nvptx64-unknown-unknown"
+
+%struct.S = type { float, double }
+
+@struct_array = global [1024 x %struct.S] zeroinitializer, align 16
+@float_2d_array = global [32 x [32 x float]] zeroinitializer, align 4
+
+; We should not extract any struct field indices, because fields in a struct
+; may have different types.
+define double* @struct(i32 %i) {
+entry:
+  %add = add nsw i32 %i, 5
+  %idxprom = sext i32 %add to i64
+  %p = getelementptr inbounds [1024 x %struct.S]* @struct_array, i64 0, i64 %idxprom, i32 1
+  ret double* %p
+}
+; CHECK-LABEL: @struct
+; CHECK: getelementptr [1024 x %struct.S]* @struct_array, i64 0, i32 %i, i32 1
+
+; We should be able to trace into sext/zext if it's directly used as a GEP
+; index.
+define float* @sext_zext(i32 %i, i32 %j) {
+entry:
+  %i1 = add i32 %i, 1
+  %j2 = add i32 %j, 2
+  %i1.ext = sext i32 %i1 to i64
+  %j2.ext = zext i32 %j2 to i64
+  %p = getelementptr inbounds [32 x [32 x float]]* @float_2d_array, i64 0, i64 %i1.ext, i64 %j2.ext
+  ret float* %p
+}
+; CHECK-LABEL: @sext_zext
+; CHECK: getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i32 %i, i32 %j
+; CHECK: getelementptr float* %{{[0-9]+}}, i64 34
+
+; We should be able to trace into sext/zext if it can be distributed to both
+; operands, e.g., sext (add nsw a, b) == add nsw (sext a), (sext b)
+define float* @ext_add_no_overflow(i64 %a, i32 %b, i64 %c, i32 %d) {
+  %b1 = add nsw i32 %b, 1
+  %b2 = sext i32 %b1 to i64
+  %i = add i64 %a, %b2
+  %d1 = add nuw i32 %d, 1
+  %d2 = zext i32 %d1 to i64
+  %j = add i64 %c, %d2
+  %p = getelementptr inbounds [32 x [32 x float]]* @float_2d_array, i64 0, i64 %i, i64 %j
+  ret float* %p
+}
+; CHECK-LABEL: @ext_add_no_overflow
+; CHECK: [[BASE_PTR:%[0-9]+]] = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[0-9]+}}, i64 %{{[0-9]+}}
+; CHECK: getelementptr float* [[BASE_PTR]], i64 33
+
+; Similar to @ext_add_no_overflow, we should be able to trace into sext/zext if
+; its operand is an "or" instruction.
+define float* @ext_or(i64 %a, i32 %b) {
+entry:
+  %b1 = shl i32 %b, 2
+  %b2 = or i32 %b1, 1
+  %b3 = or i32 %b1, 2
+  %b2.ext = sext i32 %b2 to i64
+  %b3.ext = sext i32 %b3 to i64
+  %i = add i64 %a, %b2.ext
+  %j = add i64 %a, %b3.ext
+  %p = getelementptr inbounds [32 x [32 x float]]* @float_2d_array, i64 0, i64 %i, i64 %j
+  ret float* %p
+}
+; CHECK-LABEL: @ext_or
+; CHECK: [[BASE_PTR:%[0-9]+]] = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[0-9]+}}, i64 %{{[0-9]+}}
+; CHECK: getelementptr float* [[BASE_PTR]], i64 34
+
+; We should treat "or" with no common bits (%k) as "add", and leave "or" with
+; potentially common bits (%l) as is.
+define float* @or(i64 %i) {
+entry:
+  %j = shl i64 %i, 2
+  %k = or i64 %j, 3 ; no common bits
+  %l = or i64 %j, 4 ; potentially common bits
+  %p = getelementptr inbounds [32 x [32 x float]]* @float_2d_array, i64 0, i64 %k, i64 %l
+  ret float* %p
+}
+; CHECK-LABEL: @or
+; CHECK: [[BASE_PTR:%[0-9]+]] = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %j, i64 %l
+; CHECK: getelementptr float* [[BASE_PTR]], i64 96
+
+; The subexpression (b + 5) is used in both "i = a + (b + 5)" and "*out = b +
+; 5". When extracting the constant offset 5, make sure "*out = b + 5" isn't
+; affected.
+define float* @expr(i64 %a, i64 %b, i64* %out) {
+entry:
+  %b5 = add i64 %b, 5
+  %i = add i64 %b5, %a
+  %p = getelementptr inbounds [32 x [32 x float]]* @float_2d_array, i64 0, i64 %i, i64 0
+  store i64 %b5, i64* %out
+  ret float* %p
+}
+; CHECK-LABEL: @expr
+; CHECK: [[BASE_PTR:%[0-9]+]] = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %0, i64 0
+; CHECK: getelementptr float* [[BASE_PTR]], i64 160
+; CHECK: store i64 %b5, i64* %out
+
+; Verifies we handle "sub" correctly.
+define float* @sub(i64 %i, i64 %j) {
+  %i2 = sub i64 %i, 5 ; i - 5
+  %j2 = sub i64 5, %j ; 5 - i
+  %p = getelementptr inbounds [32 x [32 x float]]* @float_2d_array, i64 0, i64 %i2, i64 %j2
+  ret float* %p
+}
+; CHECK-LABEL: @sub
+; CHECK: %[[j2:[0-9]+]] = sub i64 0, %j
+; CHECK: [[BASE_PTR:%[0-9]+]] = getelementptr [32 x [32 x float]]* @float_2d_array, i64 0, i64 %i, i64 %[[j2]]
+; CHECK: getelementptr float* [[BASE_PTR]], i64 -155
+
+%struct.Packed = type <{ [3 x i32], [8 x i64] }> ; <> means packed
+
+; Verifies we can emit correct uglygep if the address is not natually aligned.
+define i64* @packed_struct(i32 %i, i32 %j) {
+entry:
+  %s = alloca [1024 x %struct.Packed], align 16
+  %add = add nsw i32 %j, 3
+  %idxprom = sext i32 %add to i64
+  %add1 = add nsw i32 %i, 1
+  %idxprom2 = sext i32 %add1 to i64
+  %arrayidx3 = getelementptr inbounds [1024 x %struct.Packed]* %s, i64 0, i64 %idxprom2, i32 1, i64 %idxprom
+  ret i64* %arrayidx3
+}
+; CHECK-LABEL: @packed_struct
+; CHECK: [[BASE_PTR:%[0-9]+]] = getelementptr [1024 x %struct.Packed]* %s, i64 0, i32 %i, i32 1, i32 %j
+; CHECK: [[CASTED_PTR:%[0-9]+]] = bitcast i64* [[BASE_PTR]] to i8*
+; CHECK: %uglygep = getelementptr i8* [[CASTED_PTR]], i64 100
+; CHECK: bitcast i8* %uglygep to i64*
diff --git a/test/Transforms/SimplifyCFG/extract-cost.ll b/test/Transforms/SimplifyCFG/extract-cost.ll
new file mode 100644
index 0000000..9c86725
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/extract-cost.ll
@@ -0,0 +1,22 @@
+; RUN: opt -simplifycfg -S  < %s | FileCheck %s
+
+declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) #1
+
+define i32 @f(i32 %a, i32 %b) #0 {
+entry:
+  %uadd = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
+  %cmp = extractvalue { i32, i1 } %uadd, 1
+  br i1 %cmp, label %return, label %if.end
+
+if.end:                                           ; preds = %entry
+  %0 = extractvalue { i32, i1 } %uadd, 0
+  br label %return
+
+return:                                           ; preds = %entry, %if.end
+  %retval.0 = phi i32 [ %0, %if.end ], [ 0, %entry ]
+  ret i32 %retval.0
+
+; CHECK-LABEL: @f(
+; CHECK-NOT: phi
+; CHECK: select
+}
diff --git a/test/Transforms/TailCallElim/basic.ll b/test/Transforms/TailCallElim/basic.ll
index 35420ab..341736d 100644
--- a/test/Transforms/TailCallElim/basic.ll
+++ b/test/Transforms/TailCallElim/basic.ll
@@ -143,3 +143,34 @@ cond_false:
   call void @noarg()
   ret i32* null
 }
+
+; Don't tail call if a byval arg is captured.
+define void @test9(i32* byval %a) {
+; CHECK-LABEL: define void @test9(
+; CHECK: {{^ *}}call void @use(
+  call void @use(i32* %a)
+  ret void
+}
+
+%struct.X = type { i8* }
+
+declare void @ctor(%struct.X*)
+define void @test10(%struct.X* noalias sret %agg.result, i1 zeroext %b) {
+; CHECK-LABEL @test10
+entry:
+  %x = alloca %struct.X, align 8
+  br i1 %b, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  call void @ctor(%struct.X* %agg.result)
+; CHECK: tail call void @ctor
+  br label %return
+
+if.end:
+  call void @ctor(%struct.X* %x)
+; CHECK: call void @ctor
+  br label %return
+
+return:
+  ret void
+}
diff --git a/test/Verifier/aliasing-chain.ll b/test/Verifier/aliasing-chain.ll
deleted file mode 100644
index ae0b77f..0000000
--- a/test/Verifier/aliasing-chain.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN:  not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
-; CHECK: Aliasing chain should end with function or global variable
-; Test that alising chain does not create a cycle
-
-@b1 = alias i32* @c1
-@c1 = alias i32* @b1
diff --git a/test/Verifier/bitcast-alias-address-space.ll b/test/Verifier/bitcast-alias-address-space.ll
deleted file mode 100644
index d9794d9..0000000
--- a/test/Verifier/bitcast-alias-address-space.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: not llvm-as -disable-output %s 2>&1 | FileCheck %s
-
-; CHECK: error: invalid cast opcode for cast from 'i32 addrspace(2)*' to 'i32 addrspace(1)*'
-
-target datalayout = "e-p:32:32:32-p1:16:16:16-p2:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n8:16:32"
-
-
-@data = addrspace(2) global i32 27
-
-@illegal_alias_data = alias bitcast (i32 addrspace(2)* @data to i32 addrspace(1)*)
diff --git a/test/Verifier/global-ctors.ll b/test/Verifier/global-ctors.ll
new file mode 100644
index 0000000..76570c5
--- /dev/null
+++ b/test/Verifier/global-ctors.ll
@@ -0,0 +1,11 @@
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+
+@llvm.global_ctors = appending global [1 x { i32, void()*, i8 } ] [
+  { i32, void()*, i8 } { i32 65535, void ()* null, i8 0 }
+]
+; CHECK: wrong type for intrinsic global variable
+
+@llvm.global_dtors = appending global [1 x { i32, void()*, i8*, i8 } ] [
+  { i32, void()*, i8*, i8 } { i32 65535, void ()* null, i8* null, i8 0}
+]
+; CHECK: wrong type for intrinsic global variable
diff --git a/test/Verifier/inalloca3.ll b/test/Verifier/inalloca3.ll
new file mode 100644
index 0000000..c09ce10
--- /dev/null
+++ b/test/Verifier/inalloca3.ll
@@ -0,0 +1,13 @@
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+
+
+declare void @doit(i64* inalloca %a)
+
+define void @a() {
+entry:
+  %a = alloca [2 x i32]
+  %b = bitcast [2 x i32]* %a to i64*
+  call void @doit(i64* inalloca %b)
+; CHECK: inalloca argument for call has mismatched alloca
+  ret void
+}
diff --git a/test/Verifier/musttail-invalid.ll b/test/Verifier/musttail-invalid.ll
new file mode 100644
index 0000000..e5f9a40
--- /dev/null
+++ b/test/Verifier/musttail-invalid.ll
@@ -0,0 +1,82 @@
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+
+; Each musttail call should fail to validate.
+
+declare x86_stdcallcc void @cc_mismatch_callee()
+define void @cc_mismatch() {
+; CHECK: mismatched calling conv
+  musttail call x86_stdcallcc void @cc_mismatch_callee()
+  ret void
+}
+
+declare void @more_parms_callee(i32)
+define void @more_parms() {
+; CHECK: mismatched parameter counts
+  musttail call void @more_parms_callee(i32 0)
+  ret void
+}
+
+declare void @mismatched_intty_callee(i8)
+define void @mismatched_intty(i32) {
+; CHECK: mismatched parameter types
+  musttail call void @mismatched_intty_callee(i8 0)
+  ret void
+}
+
+declare void @mismatched_vararg_callee(i8*, ...)
+define void @mismatched_vararg(i8*) {
+; CHECK: mismatched varargs
+  musttail call void (i8*, ...)* @mismatched_vararg_callee(i8* null)
+  ret void
+}
+
+; We would make this an implicit sret parameter, which would disturb the
+; tail call.
+declare { i32, i32, i32 } @mismatched_retty_callee(i32)
+define void @mismatched_retty(i32) {
+; CHECK: mismatched return types
+  musttail call { i32, i32, i32 } @mismatched_retty_callee(i32 0)
+  ret void
+}
+
+declare void @mismatched_byval_callee({ i32 }*)
+define void @mismatched_byval({ i32 }* byval %a) {
+; CHECK: mismatched ABI impacting function attributes
+  musttail call void @mismatched_byval_callee({ i32 }* %a)
+  ret void
+}
+
+declare void @mismatched_inreg_callee(i32 inreg)
+define void @mismatched_inreg(i32 %a) {
+; CHECK: mismatched ABI impacting function attributes
+  musttail call void @mismatched_inreg_callee(i32 inreg %a)
+  ret void
+}
+
+declare void @mismatched_sret_callee(i32* sret)
+define void @mismatched_sret(i32* %a) {
+; CHECK: mismatched ABI impacting function attributes
+  musttail call void @mismatched_sret_callee(i32* sret %a)
+  ret void
+}
+
+declare void @mismatched_alignment_callee(i32* byval align 8)
+define void @mismatched_alignment(i32* byval align 4 %a) {
+; CHECK: mismatched ABI impacting function attributes
+  musttail call void @mismatched_alignment_callee(i32* byval align 8 %a)
+  ret void
+}
+
+declare i32 @not_tail_pos_callee()
+define i32 @not_tail_pos() {
+; CHECK: musttail call must be precede a ret with an optional bitcast
+  %v = musttail call i32 @not_tail_pos_callee()
+  %w = add i32 %v, 1
+  ret i32 %w
+}
+
+define void @inline_asm() {
+; CHECK: cannot use musttail call with inline asm
+  musttail call void asm "ret", ""()
+  ret void
+}
diff --git a/test/Verifier/musttail-valid.ll b/test/Verifier/musttail-valid.ll
new file mode 100644
index 0000000..815d77a
--- /dev/null
+++ b/test/Verifier/musttail-valid.ll
@@ -0,0 +1,16 @@
+; RUN: llvm-as %s -o /dev/null
+
+; Should assemble without error.
+
+declare void @similar_param_ptrty_callee(i8*)
+define void @similar_param_ptrty(i32*) {
+  musttail call void @similar_param_ptrty_callee(i8* null)
+  ret void
+}
+
+declare i8* @similar_ret_ptrty_callee()
+define i32* @similar_ret_ptrty() {
+  %v = musttail call i8* @similar_ret_ptrty_callee()
+  %w = bitcast i8* %v to i32*
+  ret i32* %w
+}
diff --git a/test/Verifier/sret.ll b/test/Verifier/sret.ll
new file mode 100644
index 0000000..1ddbf1f
--- /dev/null
+++ b/test/Verifier/sret.ll
@@ -0,0 +1,7 @@
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+
+declare void @a(i32* sret %a, i32* sret %b)
+; CHECK: Cannot have multiple 'sret' parameters!
+
+declare void @b(i32* %a, i32* %b, i32* sret %c)
+; CHECK: Attribute 'sret' is not on first or second parameter!
diff --git a/test/lit.cfg b/test/lit.cfg
index df1850a..2815a61 100644
--- a/test/lit.cfg
+++ b/test/lit.cfg
@@ -95,6 +95,11 @@ for symbolizer in ['ASAN_SYMBOLIZER_PATH', 'MSAN_SYMBOLIZER_PATH']:
     if symbolizer in os.environ:
         config.environment[symbolizer] = os.environ[symbolizer]
 
+# Propagate options for sanitizers.
+for options in ['ASAN_OPTIONS']:
+    if options in os.environ:
+        config.environment[options] = os.environ[options]
+
 ###
 
 import os
@@ -211,10 +216,8 @@ config.substitutions.append( ('%exeext', config.llvm_exe_ext) )
 NOJUNK = r"(?<!\.|-|\^|/)"
 
 for pattern in [r"\bbugpoint\b(?!-)",
-                NOJUNK + r"\bclang\b(?!-)",
                 NOJUNK + r"\bllc\b",
                 r"\blli\b",
-                r"\bllvm-PerfectShuffle\b",
                 r"\bllvm-ar\b",
                 r"\bllvm-as\b",
                 r"\bllvm-bcanalyzer\b",
@@ -241,8 +244,6 @@ for pattern in [r"\bbugpoint\b(?!-)",
                 NOJUNK + r"\bopt\b",
                 r"\bFileCheck\b",
                 r"\bFileUpdate\b",
-                r"\bc-index-test\b",
-                r"\bfpcmp\b",
                 r"\bobj2yaml\b",
                 r"\byaml2obj\b",
                 # Handle these specially as they are strings searched
diff --git a/test/tools/llvm-cov/Inputs/range_based_for.gcda b/test/tools/llvm-cov/Inputs/range_based_for.gcda
new file mode 100644
index 0000000..df51888
--- /dev/null
+++ b/test/tools/llvm-cov/Inputs/range_based_for.gcda
diff --git a/test/tools/llvm-cov/Inputs/range_based_for.gcno b/test/tools/llvm-cov/Inputs/range_based_for.gcno
new file mode 100644
index 0000000..5f30acf
--- /dev/null
+++ b/test/tools/llvm-cov/Inputs/range_based_for.gcno
diff --git a/test/tools/llvm-cov/Inputs/test_long_file_names.output b/test/tools/llvm-cov/Inputs/test_long_file_names.output
new file mode 100644
index 0000000..e09f4cb
--- /dev/null
+++ b/test/tools/llvm-cov/Inputs/test_long_file_names.output
@@ -0,0 +1,8 @@
+File 'srcdir/./nested_dir/../test.h'
+Lines executed:100.00% of 1
+srcdir/./nested_dir/../test.h:creating 'test_paths.cpp##test.h.gcov'
+
+File 'srcdir/./nested_dir/../test.cpp'
+Lines executed:84.21% of 38
+srcdir/./nested_dir/../test.cpp:creating 'test_paths.cpp##test.cpp.gcov'
+
diff --git a/test/tools/llvm-cov/Inputs/test_long_paths.output b/test/tools/llvm-cov/Inputs/test_long_paths.output
new file mode 100644
index 0000000..376ee5b
--- /dev/null
+++ b/test/tools/llvm-cov/Inputs/test_long_paths.output
@@ -0,0 +1,8 @@
+File 'srcdir/./nested_dir/../test.h'
+Lines executed:100.00% of 1
+srcdir/./nested_dir/../test.h:creating 'srcdir#^#test_paths.cpp##srcdir#nested_dir#^#test.h.gcov'
+
+File 'srcdir/./nested_dir/../test.cpp'
+Lines executed:84.21% of 38
+srcdir/./nested_dir/../test.cpp:creating 'srcdir#^#test_paths.cpp##srcdir#nested_dir#^#test.cpp.gcov'
+
diff --git a/test/tools/llvm-cov/Inputs/test_missing.cpp.gcov b/test/tools/llvm-cov/Inputs/test_missing.cpp.gcov
new file mode 100644
index 0000000..1c138e4
--- /dev/null
+++ b/test/tools/llvm-cov/Inputs/test_missing.cpp.gcov
@@ -0,0 +1,77 @@
+        -:    0:Source:srcdir/./nested_dir/../test.cpp
+        -:    0:Graph:test_paths.gcno
+        -:    0:Data:test_paths.gcda
+        -:    0:Runs:3
+        -:    0:Programs:1
+        -:    1:/*EOF*/
+        -:    2:/*EOF*/
+        -:    3:/*EOF*/
+        -:    4:/*EOF*/
+        -:    5:/*EOF*/
+        -:    6:/*EOF*/
+        -:    7:/*EOF*/
+        -:    8:/*EOF*/
+        -:    9:/*EOF*/
+12884901888:   10:/*EOF*/
+        -:   11:/*EOF*/
+    #####:   12:/*EOF*/
+        -:   13:/*EOF*/
+        -:   14:/*EOF*/
+    #####:   15:/*EOF*/
+        -:   16:/*EOF*/
+        -:   17:/*EOF*/
+        -:   18:/*EOF*/
+        3:   19:/*EOF*/
+        3:   20:/*EOF*/
+        -:   21:/*EOF*/
+        -:   22:/*EOF*/
+        -:   23:/*EOF*/
+    #####:   24:/*EOF*/
+    #####:   25:/*EOF*/
+        -:   26:/*EOF*/
+        -:   27:/*EOF*/
+       12:   28:/*EOF*/
+       12:   29:/*EOF*/
+       12:   30:/*EOF*/
+        -:   31:/*EOF*/
+        -:   32:/*EOF*/
+       21:   33:/*EOF*/
+       36:   34:/*EOF*/
+       18:   35:/*EOF*/
+        3:   36:/*EOF*/
+        -:   37:/*EOF*/
+        -:   38:/*EOF*/
+        3:   39:/*EOF*/
+        -:   40:/*EOF*/
+        3:   41:/*EOF*/
+        3:   42:/*EOF*/
+        3:   43:/*EOF*/
+        3:   44:/*EOF*/
+        3:   45:/*EOF*/
+        3:   46:/*EOF*/
+    #####:   47:/*EOF*/
+    #####:   48:/*EOF*/
+        -:   49:/*EOF*/
+        -:   50:/*EOF*/
+       66:   51:/*EOF*/
+       30:   52:/*EOF*/
+        -:   53:/*EOF*/
+        6:   54:/*EOF*/
+        6:   55:/*EOF*/
+        -:   56:/*EOF*/
+        -:   57:/*EOF*/
+        3:   58:/*EOF*/
+        3:   59:/*EOF*/
+        -:   60:/*EOF*/
+        9:   61:/*EOF*/
+        9:   62:/*EOF*/
+        -:   63:/*EOF*/
+       12:   64:/*EOF*/
+       12:   65:/*EOF*/
+       30:   66:/*EOF*/
+        -:   67:/*EOF*/
+        3:   68:/*EOF*/
+25769803782:   69:/*EOF*/
+12884901888:   70:/*EOF*/
+        -:   71:/*EOF*/
+        3:   72:/*EOF*/
diff --git a/test/tools/llvm-cov/Inputs/test_missing.h.gcov b/test/tools/llvm-cov/Inputs/test_missing.h.gcov
new file mode 100644
index 0000000..d500e86
--- /dev/null
+++ b/test/tools/llvm-cov/Inputs/test_missing.h.gcov
@@ -0,0 +1,6 @@
+        -:    0:Source:srcdir/./nested_dir/../test.h
+        -:    0:Graph:test_paths.gcno
+        -:    0:Data:test_paths.gcda
+        -:    0:Runs:3
+        -:    0:Programs:1
+        6:    1:/*EOF*/
diff --git a/test/tools/llvm-cov/Inputs/test_missing.output b/test/tools/llvm-cov/Inputs/test_missing.output
new file mode 100644
index 0000000..ada0c36
--- /dev/null
+++ b/test/tools/llvm-cov/Inputs/test_missing.output
@@ -0,0 +1,8 @@
+File 'srcdir/./nested_dir/../test.h'
+Lines executed:100.00% of 1
+srcdir/./nested_dir/../test.h:creating 'test.h.gcov'
+
+File 'srcdir/./nested_dir/../test.cpp'
+Lines executed:84.21% of 38
+srcdir/./nested_dir/../test.cpp:creating 'test.cpp.gcov'
+
diff --git a/test/tools/llvm-cov/Inputs/test_no_output.output b/test/tools/llvm-cov/Inputs/test_no_output.output
new file mode 100644
index 0000000..74286b9
--- /dev/null
+++ b/test/tools/llvm-cov/Inputs/test_no_output.output
@@ -0,0 +1,6 @@
+File 'test.cpp'
+Lines executed:84.21% of 38
+
+File './test.h'
+Lines executed:100.00% of 1
+
diff --git a/test/tools/llvm-cov/lit.local.cfg b/test/tools/llvm-cov/lit.local.cfg
index f738810..56c6f1f 100644
--- a/test/tools/llvm-cov/lit.local.cfg
+++ b/test/tools/llvm-cov/lit.local.cfg
@@ -1 +1 @@
-config.suffixes = ['.test', '.m']
+config.suffixes = ['.test', '.m', '.cpp']
diff --git a/test/tools/llvm-cov/llvm-cov.test b/test/tools/llvm-cov/llvm-cov.test
index 19d3e5d..2345f8d 100644
--- a/test/tools/llvm-cov/llvm-cov.test
+++ b/test/tools/llvm-cov/llvm-cov.test
@@ -31,6 +31,15 @@ RUN: llvm-cov -o objdir/test test.c | diff -u test_no_options.output -
 RUN: diff -aub test_objdir.cpp.gcov test.cpp.gcov
 RUN: diff -aub test_objdir.h.gcov test.h.gcov
 
+# With gcov output disabled
+RUN: llvm-cov -n test.c | diff -u test_no_output.output -
+
+# Missing source files. This test is fragile, as it depends on being
+# run before we copy some sources into place in the next test.
+RUN: llvm-cov test_paths.cpp 2>/dev/null | diff -u test_missing.output -
+RUN: diff -aub test_missing.cpp.gcov test.cpp.gcov
+RUN: diff -aub test_missing.h.gcov test.h.gcov
+
 # Preserve paths. This mangles the output filenames.
 RUN: mkdir -p %t/srcdir/nested_dir
 RUN: cp test.cpp test.h %t/srcdir
@@ -43,6 +52,16 @@ RUN: llvm-cov test_paths.cpp | diff -u test_no_preserve_paths.output -
 RUN: diff -aub test_paths.cpp.gcov test.cpp.gcov
 RUN: diff -aub test_paths.h.gcov test.h.gcov
 
+# Long file names.
+RUN: llvm-cov -l test_paths.cpp | diff -u test_long_file_names.output -
+RUN: diff -aub test_paths.cpp.gcov test_paths.cpp##test.cpp.gcov
+RUN: diff -aub test_paths.h.gcov test_paths.cpp##test.h.gcov
+
+# Long file names and preserve paths.
+RUN: llvm-cov -lp -gcno test_paths.gcno -gcda test_paths.gcda srcdir/../test_paths.cpp | diff -u test_long_paths.output -
+RUN: diff -aub test_paths.cpp.gcov srcdir#^#test_paths.cpp##srcdir#nested_dir#^#test.cpp.gcov
+RUN: diff -aub test_paths.h.gcov srcdir#^#test_paths.cpp##srcdir#nested_dir#^#test.h.gcov
+
 # Function summaries. This changes stdout, but not the gcov files.
 RUN: llvm-cov test.c -f | diff -u test_-f.output -
 RUN: diff -aub test_no_options.cpp.gcov test.cpp.gcov
diff --git a/test/tools/llvm-cov/range_based_for.cpp b/test/tools/llvm-cov/range_based_for.cpp
new file mode 100644
index 0000000..61f60f6
--- /dev/null
+++ b/test/tools/llvm-cov/range_based_for.cpp
@@ -0,0 +1,29 @@
+// Make sure that compiler-added local variables (whose line number is zero)
+// don't crash llvm-cov.
+
+// We need shell for cd
+// REQUIRES: shell
+
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: cd %t
+// RUN: cp %s %p/Inputs/range_based_for.gc* .
+
+// RUN: llvm-cov range_based_for.cpp | FileCheck %s --check-prefix=STDOUT
+// STDOUT: File 'range_based_for.cpp'
+// STDOUT: Lines executed:100.00% of 5
+// STDOUT: range_based_for.cpp:creating 'range_based_for.cpp.gcov'
+
+// RUN: FileCheck %s --check-prefix=GCOV < %t/range_based_for.cpp.gcov
+// GCOV: -:    0:Runs:1
+// GCOV: -:    0:Programs:1
+
+int main(int argc, const char *argv[]) { // GCOV: 1:    [[@LINE]]:int main(
+  int V[] = {1, 2};                      // GCOV: 1:    [[@LINE]]:  int V[]
+  for (int &I : V) {                     // GCOV: 10:   [[@LINE]]:  for (
+  }                                      // GCOV: 2:    [[@LINE]]:  }
+  return 0;                              // GCOV: 1:    [[@LINE]]:  return
+}                                        // GCOV: -:    [[@LINE]]:}
+
+// llvm-cov doesn't work on big endian yet
+// XFAIL: powerpc64, s390x, mips-, mips64-, sparc
diff --git a/test/tools/llvm-objdump/Inputs/file-aux-record.yaml b/test/tools/llvm-objdump/Inputs/file-aux-record.yaml
new file mode 100644
index 0000000..d19afaf
--- /dev/null
+++ b/test/tools/llvm-objdump/Inputs/file-aux-record.yaml
@@ -0,0 +1,21 @@
+header: !Header
+  Machine: IMAGE_FILE_MACHINE_I386 # (0x14c)
+  Characteristics: [ IMAGE_FILE_DEBUG_STRIPPED ]
+sections:
+symbols:
+  - !Symbol
+    Name: .file
+    Value: 0
+    SectionNumber: 65534
+    SimpleType: IMAGE_SYM_TYPE_NULL
+    ComplexType: IMAGE_SYM_DTYPE_NULL
+    StorageClass: IMAGE_SYM_CLASS_FILE
+    File: eighteen-chars.obj
+  - !Symbol
+    Name: '@comp.id'
+    Value: 13485607
+    SectionNumber: 65535
+    SimpleType: IMAGE_SYM_TYPE_NULL
+    ComplexType: IMAGE_SYM_DTYPE_NULL
+    StorageClass: IMAGE_SYM_CLASS_STATIC
+
diff --git a/test/tools/llvm-objdump/Inputs/file.obj.coff-arm b/test/tools/llvm-objdump/Inputs/file.obj.coff-arm
new file mode 100755
index 0000000..a333a87
--- /dev/null
+++ b/test/tools/llvm-objdump/Inputs/file.obj.coff-arm
diff --git a/test/tools/llvm-objdump/coff-file.test b/test/tools/llvm-objdump/coff-file.test
new file mode 100644
index 0000000..75d02b8
--- /dev/null
+++ b/test/tools/llvm-objdump/coff-file.test
@@ -0,0 +1,6 @@
+RUN: llvm-objdump -t %p/Inputs/file.obj.coff-arm | FileCheck %s
+
+CHECK: .file
+CHECK-NEXT: AUX /Users/compnerd/work/llvm/test/tools/llvm-readobj/Inputs/file.asm
+CHECK-NEXT: [{{[ 0-9]+}}]
+
diff --git a/test/tools/llvm-objdump/coff-non-null-terminated-file.test b/test/tools/llvm-objdump/coff-non-null-terminated-file.test
new file mode 100644
index 0000000..125994f
--- /dev/null
+++ b/test/tools/llvm-objdump/coff-non-null-terminated-file.test
@@ -0,0 +1,5 @@
+RUN: yaml2obj %p/Inputs/file-aux-record.yaml | llvm-objdump -t - | FileCheck %s
+
+CHECK: .file
+CHECK: AUX eighteen-chars.obj{{$}}
+
diff --git a/test/tools/llvm-profdata/Inputs/no-counts.profdata b/test/tools/llvm-profdata/Inputs/no-counts.profdata
new file mode 100644
index 0000000..5c1fa15
--- /dev/null
+++ b/test/tools/llvm-profdata/Inputs/no-counts.profdata
@@ -0,0 +1,3 @@
+no_counts
+0
+0
diff --git a/test/tools/llvm-profdata/errors.test b/test/tools/llvm-profdata/errors.test
index 6ccb084..28262ef 100644
--- a/test/tools/llvm-profdata/errors.test
+++ b/test/tools/llvm-profdata/errors.test
@@ -1,13 +1,16 @@
-RUN: llvm-profdata merge %p/Inputs/foo3-1.profdata %p/Inputs/foo4-1.profdata -o /dev/null 2>&1 | FileCheck %s --check-prefix=HASH
+RUN: llvm-profdata merge %p/Inputs/foo3-1.profdata %p/Inputs/foo4-1.profdata -o %t.out 2>&1 | FileCheck %s --check-prefix=HASH
 HASH: foo4-1.profdata: foo: Function hash mismatch
 
-RUN: llvm-profdata merge %p/Inputs/overflow.profdata %p/Inputs/overflow.profdata -o /dev/null 2>&1 | FileCheck %s --check-prefix=OVERFLOW
+RUN: llvm-profdata merge %p/Inputs/overflow.profdata %p/Inputs/overflow.profdata -o %t.out 2>&1 | FileCheck %s --check-prefix=OVERFLOW
 OVERFLOW: overflow.profdata: overflow: Counter overflow
 
 RUN: not llvm-profdata show %p/Inputs/invalid-count-later.profdata 2>&1 | FileCheck %s --check-prefix=INVALID-COUNT-LATER
-RUN: not llvm-profdata merge %p/Inputs/invalid-count-later.profdata %p/Inputs/invalid-count-later.profdata 2>&1 | FileCheck %s --check-prefix=INVALID-COUNT-LATER
+RUN: not llvm-profdata merge %p/Inputs/invalid-count-later.profdata %p/Inputs/invalid-count-later.profdata -o %t.out 2>&1 | FileCheck %s --check-prefix=INVALID-COUNT-LATER
 INVALID-COUNT-LATER: error: {{.*}}invalid-count-later.profdata: Malformed profile data
 
 RUN: not llvm-profdata show %p/Inputs/bad-hash.profdata 2>&1 | FileCheck %s --check-prefix=BAD-HASH
-RUN: not llvm-profdata merge %p/Inputs/bad-hash.profdata %p/Inputs/bad-hash.profdata 2>&1 | FileCheck %s --check-prefix=BAD-HASH
+RUN: not llvm-profdata merge %p/Inputs/bad-hash.profdata %p/Inputs/bad-hash.profdata -o %t.out 2>&1 | FileCheck %s --check-prefix=BAD-HASH
 BAD-HASH: error: {{.*}}bad-hash.profdata: Malformed profile data
+
+RUN: not llvm-profdata show %p/Inputs/no-counts.profdata 2>&1 | FileCheck %s --check-prefix=NO-COUNTS
+NO-COUNTS: error: {{.*}}no-counts.profdata: Malformed profile data
diff --git a/test/tools/llvm-profdata/raw-two-profiles.test b/test/tools/llvm-profdata/raw-two-profiles.test
new file mode 100644
index 0000000..3260836
--- /dev/null
+++ b/test/tools/llvm-profdata/raw-two-profiles.test
@@ -0,0 +1,64 @@
+RUN: printf '\201rforpl\377' > %t-foo.profraw
+RUN: printf '\1\0\0\0\0\0\0\0' >> %t-foo.profraw
+RUN: printf '\1\0\0\0\0\0\0\0' >> %t-foo.profraw
+RUN: printf '\1\0\0\0\0\0\0\0' >> %t-foo.profraw
+RUN: printf '\3\0\0\0\0\0\0\0' >> %t-foo.profraw
+RUN: printf '\0\0\4\0\1\0\0\0' >> %t-foo.profraw
+RUN: printf '\0\0\4\0\2\0\0\0' >> %t-foo.profraw
+
+RUN: printf '\3\0\0\0' >> %t-foo.profraw
+RUN: printf '\1\0\0\0' >> %t-foo.profraw
+RUN: printf '\1\0\0\0\0\0\0\0' >> %t-foo.profraw
+RUN: printf '\0\0\4\0\2\0\0\0' >> %t-foo.profraw
+RUN: printf '\0\0\4\0\1\0\0\0' >> %t-foo.profraw
+
+RUN: printf '\023\0\0\0\0\0\0\0' >> %t-foo.profraw
+RUN: printf 'foo' >> %t-foo.profraw
+
+RUN: printf '\201rforpl\377' > %t-bar.profraw
+RUN: printf '\1\0\0\0\0\0\0\0' >> %t-bar.profraw
+RUN: printf '\1\0\0\0\0\0\0\0' >> %t-bar.profraw
+RUN: printf '\2\0\0\0\0\0\0\0' >> %t-bar.profraw
+RUN: printf '\3\0\0\0\0\0\0\0' >> %t-bar.profraw
+RUN: printf '\0\0\6\0\1\0\0\0' >> %t-bar.profraw
+RUN: printf '\0\0\6\0\2\0\0\0' >> %t-bar.profraw
+
+RUN: printf '\3\0\0\0' >> %t-bar.profraw
+RUN: printf '\2\0\0\0' >> %t-bar.profraw
+RUN: printf '\2\0\0\0\0\0\0\0' >> %t-bar.profraw
+RUN: printf '\0\0\6\0\2\0\0\0' >> %t-bar.profraw
+RUN: printf '\0\0\6\0\1\0\0\0' >> %t-bar.profraw
+
+RUN: printf '\067\0\0\0\0\0\0\0' >> %t-bar.profraw
+RUN: printf '\101\0\0\0\0\0\0\0' >> %t-bar.profraw
+RUN: printf 'bar' >> %t-bar.profraw
+
+Versions of the profiles that are padded to eight byte alignment.
+RUN: cat %t-foo.profraw > %t-foo-padded.profraw
+RUN: printf '\0\0\0\0\0' >> %t-foo-padded.profraw
+RUN: cat %t-bar.profraw > %t-bar-padded.profraw
+RUN: printf '\0\0\0\0\0' >> %t-bar-padded.profraw
+
+RUN: cat %t-foo.profraw %t-bar.profraw > %t-nopad.profraw
+RUN: cat %t-foo-padded.profraw %t-bar.profraw > %t-pad-between.profraw
+RUN: cat %t-foo-padded.profraw %t-bar-padded.profraw > %t-pad.profraw
+
+RUN: llvm-profdata show %t-nopad.profraw -all-functions -counts | FileCheck %s
+RUN: llvm-profdata show %t-pad-between.profraw -all-functions -counts | FileCheck %s
+RUN: llvm-profdata show %t-pad.profraw -all-functions -counts | FileCheck %s
+
+CHECK: Counters:
+CHECK:   foo:
+CHECK:     Hash: 0x0000000000000001
+CHECK:     Counters: 1
+CHECK:     Function count: 19
+CHECK:     Block counts: []
+CHECK:   bar:
+CHECK:     Hash: 0x0000000000000002
+CHECK:     Counters: 2
+CHECK:     Function count: 55
+CHECK:     Block counts: [65]
+CHECK: Functions shown: 2
+CHECK: Total functions: 2
+CHECK: Maximum function count: 55
+CHECK: Maximum internal block count: 65
diff --git a/test/tools/llvm-profdata/simple.test b/test/tools/llvm-profdata/simple.test
index 97dda5a..18741dd 100644
--- a/test/tools/llvm-profdata/simple.test
+++ b/test/tools/llvm-profdata/simple.test
@@ -1,5 +1,7 @@
-RUN: llvm-profdata merge %p/Inputs/foo3-1.profdata %p/Inputs/foo3-2.profdata | llvm-profdata show - -all-functions -counts | FileCheck %s --check-prefix=FOO3
-RUN: llvm-profdata merge %p/Inputs/foo3-2.profdata %p/Inputs/foo3-1.profdata | llvm-profdata show - -all-functions -counts | FileCheck %s --check-prefix=FOO3
+RUN: llvm-profdata merge %p/Inputs/foo3-1.profdata %p/Inputs/foo3-2.profdata -o %t
+RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=FOO3
+RUN: llvm-profdata merge %p/Inputs/foo3-2.profdata %p/Inputs/foo3-1.profdata -o %t
+RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=FOO3
 FOO3: foo:
 FOO3: Counters: 3
 FOO3: Function count: 8
@@ -8,8 +10,10 @@ FOO3: Total functions: 1
 FOO3: Maximum function count: 8
 FOO3: Maximum internal block count: 7
 
-RUN: llvm-profdata merge %p/Inputs/foo4-1.profdata %p/Inputs/foo4-2.profdata | llvm-profdata show - -all-functions -counts | FileCheck %s --check-prefix=FOO4
-RUN: llvm-profdata merge %p/Inputs/foo4-2.profdata %p/Inputs/foo4-1.profdata | llvm-profdata show - -all-functions -counts | FileCheck %s --check-prefix=FOO4
+RUN: llvm-profdata merge %p/Inputs/foo4-1.profdata %p/Inputs/foo4-2.profdata -o %t
+RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=FOO4
+RUN: llvm-profdata merge %p/Inputs/foo4-2.profdata %p/Inputs/foo4-1.profdata -o %t
+RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=FOO4
 FOO4: foo:
 FOO4: Counters: 4
 FOO4: Function count: 18
@@ -18,8 +22,10 @@ FOO4: Total functions: 1
 FOO4: Maximum function count: 18
 FOO4: Maximum internal block count: 48
 
-RUN: llvm-profdata merge %p/Inputs/foo3bar3-1.profdata %p/Inputs/foo3bar3-2.profdata | llvm-profdata show - -all-functions -counts | FileCheck %s --check-prefix=FOO3BAR3
-RUN: llvm-profdata merge %p/Inputs/foo3bar3-2.profdata %p/Inputs/foo3bar3-1.profdata | llvm-profdata show - -all-functions -counts | FileCheck %s --check-prefix=FOO3BAR3
+RUN: llvm-profdata merge %p/Inputs/foo3bar3-1.profdata %p/Inputs/foo3bar3-2.profdata -o %t
+RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=FOO3BAR3
+RUN: llvm-profdata merge %p/Inputs/foo3bar3-2.profdata %p/Inputs/foo3bar3-1.profdata -o %t
+RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=FOO3BAR3
 FOO3BAR3: foo:
 FOO3BAR3: Counters: 3
 FOO3BAR3: Function count: 19
@@ -32,7 +38,8 @@ FOO3BAR3: Total functions: 2
 FOO3BAR3: Maximum function count: 36
 FOO3BAR3: Maximum internal block count: 50
 
-RUN: llvm-profdata merge %p/Inputs/empty.profdata %p/Inputs/foo3-1.profdata | llvm-profdata show - -all-functions -counts | FileCheck %s --check-prefix=FOO3EMPTY
+RUN: llvm-profdata merge %p/Inputs/empty.profdata %p/Inputs/foo3-1.profdata -o %t
+RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=FOO3EMPTY
 FOO3EMPTY: foo:
 FOO3EMPTY: Counters: 3
 FOO3EMPTY: Function count: 1
@@ -41,7 +48,8 @@ FOO3EMPTY: Total functions: 1
 FOO3EMPTY: Maximum function count: 1
 FOO3EMPTY: Maximum internal block count: 3
 
-RUN: llvm-profdata merge %p/Inputs/foo3-1.profdata %p/Inputs/foo3bar3-1.profdata | llvm-profdata show - -all-functions -counts | FileCheck %s --check-prefix=FOO3FOO3BAR3
+RUN: llvm-profdata merge %p/Inputs/foo3-1.profdata %p/Inputs/foo3bar3-1.profdata -o %t
+RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=FOO3FOO3BAR3
 FOO3FOO3BAR3: foo:
 FOO3FOO3BAR3: Counters: 3
 FOO3FOO3BAR3: Function count: 3
@@ -54,7 +62,8 @@ FOO3FOO3BAR3: Total functions: 2
 FOO3FOO3BAR3: Maximum function count: 7
 FOO3FOO3BAR3: Maximum internal block count: 13
 
-RUN: llvm-profdata merge %p/Inputs/foo3-1.profdata %p/Inputs/bar3-1.profdata | llvm-profdata show - -all-functions -counts | FileCheck %s --check-prefix=DISJOINT
+RUN: llvm-profdata merge %p/Inputs/foo3-1.profdata %p/Inputs/bar3-1.profdata -o %t
+RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=DISJOINT
 DISJOINT: foo:
 DISJOINT: Counters: 3
 DISJOINT: Function count: 1
diff --git a/test/tools/llvm-readobj/Inputs/dynamic-table-exe.x86 b/test/tools/llvm-readobj/Inputs/dynamic-table-exe.x86
new file mode 100755
index 0000000..4edbe58
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/dynamic-table-exe.x86
diff --git a/test/tools/llvm-readobj/Inputs/file-aux-record.yaml b/test/tools/llvm-readobj/Inputs/file-aux-record.yaml
new file mode 100644
index 0000000..d19afaf
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/file-aux-record.yaml
@@ -0,0 +1,21 @@
+header: !Header
+  Machine: IMAGE_FILE_MACHINE_I386 # (0x14c)
+  Characteristics: [ IMAGE_FILE_DEBUG_STRIPPED ]
+sections:
+symbols:
+  - !Symbol
+    Name: .file
+    Value: 0
+    SectionNumber: 65534
+    SimpleType: IMAGE_SYM_TYPE_NULL
+    ComplexType: IMAGE_SYM_DTYPE_NULL
+    StorageClass: IMAGE_SYM_CLASS_FILE
+    File: eighteen-chars.obj
+  - !Symbol
+    Name: '@comp.id'
+    Value: 13485607
+    SectionNumber: 65535
+    SimpleType: IMAGE_SYM_TYPE_NULL
+    ComplexType: IMAGE_SYM_DTYPE_NULL
+    StorageClass: IMAGE_SYM_CLASS_STATIC
+
diff --git a/test/tools/llvm-readobj/Inputs/file-multiple-aux-records.yaml b/test/tools/llvm-readobj/Inputs/file-multiple-aux-records.yaml
new file mode 100644
index 0000000..8d8f684
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/file-multiple-aux-records.yaml
@@ -0,0 +1,21 @@
+header: !Header
+  Machine: IMAGE_FILE_MACHINE_I386 # (0x14c)
+  Characteristics: [ IMAGE_FILE_DEBUG_STRIPPED ]
+sections:
+symbols:
+  - !Symbol
+    Name: .file
+    Value: 0
+    SectionNumber: 65534
+    SimpleType: IMAGE_SYM_TYPE_NULL
+    ComplexType: IMAGE_SYM_DTYPE_NULL
+    StorageClass: IMAGE_SYM_CLASS_FILE
+    File: first-section-has-eighteen-characters.asm
+  - !Symbol
+    Name: '@comp.id'
+    Value: 13485607
+    SectionNumber: 65535
+    SimpleType: IMAGE_SYM_TYPE_NULL
+    ComplexType: IMAGE_SYM_DTYPE_NULL
+    StorageClass: IMAGE_SYM_CLASS_STATIC
+
diff --git a/test/tools/llvm-readobj/coff-file-sections-reading.test b/test/tools/llvm-readobj/coff-file-sections-reading.test
new file mode 100644
index 0000000..5c44c16
--- /dev/null
+++ b/test/tools/llvm-readobj/coff-file-sections-reading.test
@@ -0,0 +1,18 @@
+RUN: yaml2obj %p/Inputs/file-multiple-aux-records.yaml | llvm-readobj -t - | FileCheck %s
+
+CHECK: Symbols [
+CHECK:   Symbol {
+CHECK:     Name: .file
+CHECK:     Value: 0
+CHECK:     Section:  (65534)
+CHECK:     BaseType: Null (0x0)
+CHECK:     ComplexType: Null (0x0)
+CHECK:     StorageClass: File (0x67)
+CHECK:     AuxSymbolCount: 3
+CHECK:     AuxFileRecord {
+CHECK:       FileName: first-section-has-eighteen-characters.asm
+CHECK:     }
+CHECK-NOT:     AuxFileRecord {
+CHECK:   }
+CHECK: ]
+
diff --git a/test/tools/llvm-readobj/coff-non-null-terminated-file.test b/test/tools/llvm-readobj/coff-non-null-terminated-file.test
new file mode 100644
index 0000000..8bd88f3
--- /dev/null
+++ b/test/tools/llvm-readobj/coff-non-null-terminated-file.test
@@ -0,0 +1,20 @@
+RUN: yaml2obj %p/Inputs/file-aux-record.yaml | llvm-readobj -t - | FileCheck %s
+
+CHECK: Symbols [
+CHECK:   Symbol {
+CHECK:     Name: .file
+CHECK:     Value: 0
+CHECK:     StorageClass: File
+CHECK:     AuxSymbolCount: 1
+CHECK:     AuxFileRecord {
+CHECK:       FileName: eighteen-chars.obj{{$}}
+CHECK:     }
+CHECK:   }
+CHECK:   Symbol {
+CHECK:     Name: @comp.id
+CHECK:     Value: 13485607
+CHECK:     StorageClass: Static
+CHECK:     AuxSymbolCount: 0
+CHECK:   }
+CHECK: ]
+
diff --git a/test/tools/llvm-readobj/dynamic.test b/test/tools/llvm-readobj/dynamic.test
index 6a5fe95..08f29fc 100644
--- a/test/tools/llvm-readobj/dynamic.test
+++ b/test/tools/llvm-readobj/dynamic.test
@@ -21,7 +21,7 @@ ELF-MIPS:   0x00000011 REL                  0x518
 ELF-MIPS:   0x00000012 RELSZ                16 (bytes)
 ELF-MIPS:   0x00000013 RELENT               8 (bytes)
 ELF-MIPS:   0x70000001 MIPS_RLD_VERSION     1
-ELF-MIPS:   0x70000005 MIPS_FLAGS           0x2
+ELF-MIPS:   0x70000005 MIPS_FLAGS           NOTPOT
 ELF-MIPS:   0x70000006 MIPS_BASE_ADDRESS    0x0
 ELF-MIPS:   0x7000000A MIPS_LOCAL_GOTNO     10
 ELF-MIPS:   0x70000011 MIPS_SYMTABNO        19
@@ -55,7 +55,7 @@ ELF-MIPS-EXE:   0x70000016 MIPS_RLD_MAP         0x410880
 ELF-MIPS-EXE:   0x00000015 DEBUG                0x0
 ELF-MIPS-EXE:   0x00000003 PLTGOT               0x410890
 ELF-MIPS-EXE:   0x70000001 MIPS_RLD_VERSION     1
-ELF-MIPS-EXE:   0x70000005 MIPS_FLAGS           0x2
+ELF-MIPS-EXE:   0x70000005 MIPS_FLAGS           NOTPOT
 ELF-MIPS-EXE:   0x70000006 MIPS_BASE_ADDRESS    0x400000
 ELF-MIPS-EXE:   0x7000000A MIPS_LOCAL_GOTNO     5
 ELF-MIPS-EXE:   0x70000011 MIPS_SYMTABNO        8
@@ -70,3 +70,44 @@ ELF-MIPS-EXE:   0x6FFFFFFF VERNEEDNUM           1
 ELF-MIPS-EXE:   0x6FFFFFF0 VERSYM               0x4003D8
 ELF-MIPS-EXE:   0x00000000 NULL                 0x0
 ELF-MIPS-EXE: ]
+
+RUN: llvm-readobj -dynamic-table %p/Inputs/dynamic-table-exe.x86 \
+RUN:     | FileCheck %s -check-prefix ELF-X86-EXE
+
+ELF-X86-EXE: Format: ELF32-i386
+ELF-X86-EXE: Arch: i386
+ELF-X86-EXE: AddressSize: 32bit
+ELF-X86-EXE: LoadName:
+ELF-X86-EXE: DynamicSection [ (30 entries)
+ELF-X86-EXE:   Tag        Type                 Name/Value
+ELF-X86-EXE:   0x00000001 NEEDED               SharedLibrary (libstdc++.so.6)
+ELF-X86-EXE:   0x00000001 NEEDED               SharedLibrary (libgcc_s.so.1)
+ELF-X86-EXE:   0x00000001 NEEDED               SharedLibrary (libc.so.6)
+ELF-X86-EXE:   0x0000000C INIT                 0x62C
+ELF-X86-EXE:   0x0000000D FINI                 0x920
+ELF-X86-EXE:   0x00000019 INIT_ARRAY           0x19FC
+ELF-X86-EXE:   0x0000001B INIT_ARRAYSZ         4 (bytes)
+ELF-X86-EXE:   0x0000001A FINI_ARRAY           0x1A00
+ELF-X86-EXE:   0x0000001C FINI_ARRAYSZ         4 (bytes)
+ELF-X86-EXE:   0x00000004 HASH                 0x18C
+ELF-X86-EXE:   0x6FFFFEF5 GNU_HASH             0x1E4
+ELF-X86-EXE:   0x00000005 STRTAB               0x328
+ELF-X86-EXE:   0x00000006 SYMTAB               0x218
+ELF-X86-EXE:   0x0000000A STRSZ                408 (bytes)
+ELF-X86-EXE:   0x0000000B SYMENT               16 (bytes)
+ELF-X86-EXE:   0x00000015 DEBUG                0x0
+ELF-X86-EXE:   0x00000003 PLTGOT               0x1B30
+ELF-X86-EXE:   0x00000002 PLTRELSZ             64 (bytes)
+ELF-X86-EXE:   0x00000014 PLTREL               REL
+ELF-X86-EXE:   0x00000017 JMPREL               0x5EC
+ELF-X86-EXE:   0x00000011 REL                  0x564
+ELF-X86-EXE:   0x00000012 RELSZ                136 (bytes)
+ELF-X86-EXE:   0x00000013 RELENT               8 (bytes)
+ELF-X86-EXE:   0x00000016 TEXTREL
+ELF-X86-EXE:   0x0000001E FLAGS                TEXTREL
+ELF-X86-EXE:   0x6FFFFFFE VERNEED              0x4E4
+ELF-X86-EXE:   0x6FFFFFFF VERNEEDNUM           3
+ELF-X86-EXE:   0x6FFFFFF0 VERSYM               0x4C0
+ELF-X86-EXE:   0x6FFFFFFA RELCOUNT             6
+ELF-X86-EXE:   0x00000000 NULL                 0x0
+ELF-X86-EXE: ]